]> git.saurik.com Git - apple/xnu.git/blame_incremental - bsd/vfs/vfs_syscalls.c
xnu-4903.241.1.tar.gz
[apple/xnu.git] / bsd / vfs / vfs_syscalls.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 1995-2017 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * Copyright (c) 1989, 1993
30 * The Regents of the University of California. All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)vfs_syscalls.c 8.41 (Berkeley) 6/15/95
66 */
67/*
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
71 * Version 2.0.
72 */
73
74#include <sys/param.h>
75#include <sys/systm.h>
76#include <sys/namei.h>
77#include <sys/filedesc.h>
78#include <sys/kernel.h>
79#include <sys/file_internal.h>
80#include <sys/stat.h>
81#include <sys/vnode_internal.h>
82#include <sys/mount_internal.h>
83#include <sys/proc_internal.h>
84#include <sys/kauth.h>
85#include <sys/uio_internal.h>
86#include <sys/malloc.h>
87#include <sys/mman.h>
88#include <sys/dirent.h>
89#include <sys/attr.h>
90#include <sys/sysctl.h>
91#include <sys/ubc.h>
92#include <sys/quota.h>
93#include <sys/kdebug.h>
94#include <sys/fsevents.h>
95#include <sys/imgsrc.h>
96#include <sys/sysproto.h>
97#include <sys/xattr.h>
98#include <sys/fcntl.h>
99#include <sys/fsctl.h>
100#include <sys/ubc_internal.h>
101#include <sys/disk.h>
102#include <sys/content_protection.h>
103#include <sys/clonefile.h>
104#include <sys/snapshot.h>
105#include <sys/priv.h>
106#include <machine/cons.h>
107#include <machine/limits.h>
108#include <miscfs/specfs/specdev.h>
109
110#include <vfs/vfs_disk_conditioner.h>
111
112#include <security/audit/audit.h>
113#include <bsm/audit_kevents.h>
114
115#include <mach/mach_types.h>
116#include <kern/kern_types.h>
117#include <kern/kalloc.h>
118#include <kern/task.h>
119
120#include <vm/vm_pageout.h>
121#include <vm/vm_protos.h>
122
123#include <libkern/OSAtomic.h>
124#include <pexpert/pexpert.h>
125#include <IOKit/IOBSD.h>
126
127#if ROUTEFS
128#include <miscfs/routefs/routefs.h>
129#endif /* ROUTEFS */
130
131#if CONFIG_MACF
132#include <security/mac.h>
133#include <security/mac_framework.h>
134#endif
135
136#if CONFIG_FSE
137#define GET_PATH(x) \
138 (x) = get_pathbuff();
139#define RELEASE_PATH(x) \
140 release_pathbuff(x);
141#else
142#define GET_PATH(x) \
143 MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
144#define RELEASE_PATH(x) \
145 FREE_ZONE((x), MAXPATHLEN, M_NAMEI);
146#endif /* CONFIG_FSE */
147
148#ifndef HFS_GET_BOOT_INFO
149#define HFS_GET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00004)
150#endif
151
152#ifndef HFS_SET_BOOT_INFO
153#define HFS_SET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00005)
154#endif
155
156#ifndef APFSIOC_REVERT_TO_SNAPSHOT
157#define APFSIOC_REVERT_TO_SNAPSHOT _IOW('J', 1, u_int64_t)
158#endif
159
160extern void disk_conditioner_unmount(mount_t mp);
161
162/* struct for checkdirs iteration */
163struct cdirargs {
164 vnode_t olddp;
165 vnode_t newdp;
166};
167/* callback for checkdirs iteration */
168static int checkdirs_callback(proc_t p, void * arg);
169
170static int change_dir(struct nameidata *ndp, vfs_context_t ctx);
171static int checkdirs(vnode_t olddp, vfs_context_t ctx);
172void enablequotas(struct mount *mp, vfs_context_t ctx);
173static int getfsstat_callback(mount_t mp, void * arg);
174static int getutimes(user_addr_t usrtvp, struct timespec *tsp);
175static int setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts, int nullflag);
176static int sync_callback(mount_t, void *);
177static int munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
178 user_addr_t bufp, int *sizep, boolean_t is_64_bit,
179 boolean_t partial_copy);
180static int statfs64_common(struct mount *mp, struct vfsstatfs *sfsp,
181 user_addr_t bufp);
182static int fsync_common(proc_t p, struct fsync_args *uap, int flags);
183static int mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
184 struct componentname *cnp, user_addr_t fsmountargs,
185 int flags, uint32_t internal_flags, char *labelstr, boolean_t kernelmount,
186 vfs_context_t ctx);
187void vfs_notify_mount(vnode_t pdvp);
188
189int prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth);
190
191struct fd_vn_data * fg_vn_data_alloc(void);
192
193/*
194 * Max retries for ENOENT returns from vn_authorize_{rmdir, unlink, rename}
195 * Concurrent lookups (or lookups by ids) on hard links can cause the
196 * vn_getpath (which does not re-enter the filesystem as vn_getpath_fsenter
197 * does) to return ENOENT as the path cannot be returned from the name cache
198 * alone. We have no option but to retry and hope to get one namei->reverse path
199 * generation done without an intervening lookup, lookup by id on the hard link
200 * item. This is only an issue for MAC hooks which cannot reenter the filesystem
201 * which currently are the MAC hooks for rename, unlink and rmdir.
202 */
203#define MAX_AUTHORIZE_ENOENT_RETRIES 1024
204
205static int rmdirat_internal(vfs_context_t, int, user_addr_t, enum uio_seg);
206
207static int fsgetpath_internal(vfs_context_t, int, uint64_t, vm_size_t, caddr_t, int *);
208
209#ifdef CONFIG_IMGSRC_ACCESS
210static int authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx);
211static int place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx);
212static void undo_place_on_covered_vp(mount_t mp, vnode_t vp);
213static int mount_begin_update(mount_t mp, vfs_context_t ctx, int flags);
214static void mount_end_update(mount_t mp);
215static int relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp, const char *fsname, vfs_context_t ctx, boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index);
216#endif /* CONFIG_IMGSRC_ACCESS */
217
218//snapshot functions
219#if CONFIG_MNT_ROOTSNAP
220static int snapshot_root(int dirfd, user_addr_t name, uint32_t flags, vfs_context_t ctx);
221#else
222static int snapshot_root(int dirfd, user_addr_t name, uint32_t flags, vfs_context_t ctx) __attribute__((unused));
223#endif
224
225int (*union_dircheckp)(struct vnode **, struct fileproc *, vfs_context_t);
226
227__private_extern__
228int sync_internal(void);
229
230__private_extern__
231int unlink1(vfs_context_t, vnode_t, user_addr_t, enum uio_seg, int);
232
233extern lck_grp_t *fd_vn_lck_grp;
234extern lck_grp_attr_t *fd_vn_lck_grp_attr;
235extern lck_attr_t *fd_vn_lck_attr;
236
237/*
238 * incremented each time a mount or unmount operation occurs
239 * used to invalidate the cached value of the rootvp in the
240 * mount structure utilized by cache_lookup_path
241 */
242uint32_t mount_generation = 0;
243
244/* counts number of mount and unmount operations */
245unsigned int vfs_nummntops=0;
246
247extern const struct fileops vnops;
248#if CONFIG_APPLEDOUBLE
249extern errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *);
250#endif /* CONFIG_APPLEDOUBLE */
251
252/*
253 * Virtual File System System Calls
254 */
255
256#if NFSCLIENT || DEVFS || ROUTEFS
257/*
258 * Private in-kernel mounting spi (NFS only, not exported)
259 */
260 __private_extern__
261boolean_t
262vfs_iskernelmount(mount_t mp)
263{
264 return ((mp->mnt_kern_flag & MNTK_KERNEL_MOUNT) ? TRUE : FALSE);
265}
266
267 __private_extern__
268int
269kernel_mount(char *fstype, vnode_t pvp, vnode_t vp, const char *path,
270 void *data, __unused size_t datalen, int syscall_flags, __unused uint32_t kern_flags, vfs_context_t ctx)
271{
272 struct nameidata nd;
273 boolean_t did_namei;
274 int error;
275
276 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
277 UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx);
278
279 /*
280 * Get the vnode to be covered if it's not supplied
281 */
282 if (vp == NULLVP) {
283 error = namei(&nd);
284 if (error)
285 return (error);
286 vp = nd.ni_vp;
287 pvp = nd.ni_dvp;
288 did_namei = TRUE;
289 } else {
290 char *pnbuf = CAST_DOWN(char *, path);
291
292 nd.ni_cnd.cn_pnbuf = pnbuf;
293 nd.ni_cnd.cn_pnlen = strlen(pnbuf) + 1;
294 did_namei = FALSE;
295 }
296
297 error = mount_common(fstype, pvp, vp, &nd.ni_cnd, CAST_USER_ADDR_T(data),
298 syscall_flags, kern_flags, NULL, TRUE, ctx);
299
300 if (did_namei) {
301 vnode_put(vp);
302 vnode_put(pvp);
303 nameidone(&nd);
304 }
305
306 return (error);
307}
308#endif /* NFSCLIENT || DEVFS */
309
310/*
311 * Mount a file system.
312 */
313/* ARGSUSED */
314int
315mount(proc_t p, struct mount_args *uap, __unused int32_t *retval)
316{
317 struct __mac_mount_args muap;
318
319 muap.type = uap->type;
320 muap.path = uap->path;
321 muap.flags = uap->flags;
322 muap.data = uap->data;
323 muap.mac_p = USER_ADDR_NULL;
324 return (__mac_mount(p, &muap, retval));
325}
326
327int
328fmount(__unused proc_t p, struct fmount_args *uap, __unused int32_t *retval)
329{
330 struct componentname cn;
331 vfs_context_t ctx = vfs_context_current();
332 size_t dummy = 0;
333 int error;
334 int flags = uap->flags;
335 char fstypename[MFSNAMELEN];
336 char *labelstr = NULL; /* regular mount call always sets it to NULL for __mac_mount() */
337 vnode_t pvp;
338 vnode_t vp;
339
340 AUDIT_ARG(fd, uap->fd);
341 AUDIT_ARG(fflags, flags);
342 /* fstypename will get audited by mount_common */
343
344 /* Sanity check the flags */
345 if (flags & (MNT_IMGSRC_BY_INDEX|MNT_ROOTFS)) {
346 return (ENOTSUP);
347 }
348
349 if (flags & MNT_UNION) {
350 return (EPERM);
351 }
352
353 error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy);
354 if (error) {
355 return (error);
356 }
357
358 if ((error = file_vnode(uap->fd, &vp)) != 0) {
359 return (error);
360 }
361
362 if ((error = vnode_getwithref(vp)) != 0) {
363 file_drop(uap->fd);
364 return (error);
365 }
366
367 pvp = vnode_getparent(vp);
368 if (pvp == NULL) {
369 vnode_put(vp);
370 file_drop(uap->fd);
371 return (EINVAL);
372 }
373
374 memset(&cn, 0, sizeof(struct componentname));
375 MALLOC(cn.cn_pnbuf, char *, MAXPATHLEN, M_TEMP, M_WAITOK);
376 cn.cn_pnlen = MAXPATHLEN;
377
378 if((error = vn_getpath(vp, cn.cn_pnbuf, &cn.cn_pnlen)) != 0) {
379 FREE(cn.cn_pnbuf, M_TEMP);
380 vnode_put(pvp);
381 vnode_put(vp);
382 file_drop(uap->fd);
383 return (error);
384 }
385
386 error = mount_common(fstypename, pvp, vp, &cn, uap->data, flags, 0, labelstr, FALSE, ctx);
387
388 FREE(cn.cn_pnbuf, M_TEMP);
389 vnode_put(pvp);
390 vnode_put(vp);
391 file_drop(uap->fd);
392
393 return (error);
394}
395
396void
397vfs_notify_mount(vnode_t pdvp)
398{
399 vfs_event_signal(NULL, VQ_MOUNT, (intptr_t)NULL);
400 lock_vnode_and_post(pdvp, NOTE_WRITE);
401}
402
403/*
404 * __mac_mount:
405 * Mount a file system taking into account MAC label behavior.
406 * See mount(2) man page for more information
407 *
408 * Parameters: p Process requesting the mount
409 * uap User argument descriptor (see below)
410 * retval (ignored)
411 *
412 * Indirect: uap->type Filesystem type
413 * uap->path Path to mount
414 * uap->data Mount arguments
415 * uap->mac_p MAC info
416 * uap->flags Mount flags
417 *
418 *
419 * Returns: 0 Success
420 * !0 Not success
421 */
422boolean_t root_fs_upgrade_try = FALSE;
423
424int
425__mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int32_t *retval)
426{
427 vnode_t pvp = NULL;
428 vnode_t vp = NULL;
429 int need_nameidone = 0;
430 vfs_context_t ctx = vfs_context_current();
431 char fstypename[MFSNAMELEN];
432 struct nameidata nd;
433 size_t dummy=0;
434 char *labelstr = NULL;
435 int flags = uap->flags;
436 int error;
437#if CONFIG_IMGSRC_ACCESS || CONFIG_MACF
438 boolean_t is_64bit = IS_64BIT_PROCESS(p);
439#else
440#pragma unused(p)
441#endif
442 /*
443 * Get the fs type name from user space
444 */
445 error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy);
446 if (error)
447 return (error);
448
449 /*
450 * Get the vnode to be covered
451 */
452 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
453 UIO_USERSPACE, uap->path, ctx);
454 error = namei(&nd);
455 if (error) {
456 goto out;
457 }
458 need_nameidone = 1;
459 vp = nd.ni_vp;
460 pvp = nd.ni_dvp;
461
462#ifdef CONFIG_IMGSRC_ACCESS
463 /* Mounting image source cannot be batched with other operations */
464 if (flags == MNT_IMGSRC_BY_INDEX) {
465 error = relocate_imageboot_source(pvp, vp, &nd.ni_cnd, fstypename,
466 ctx, is_64bit, uap->data, (flags == MNT_IMGSRC_BY_INDEX));
467 goto out;
468 }
469#endif /* CONFIG_IMGSRC_ACCESS */
470
471#if CONFIG_MACF
472 /*
473 * Get the label string (if any) from user space
474 */
475 if (uap->mac_p != USER_ADDR_NULL) {
476 struct user_mac mac;
477 size_t ulen = 0;
478
479 if (is_64bit) {
480 struct user64_mac mac64;
481 error = copyin(uap->mac_p, &mac64, sizeof(mac64));
482 mac.m_buflen = mac64.m_buflen;
483 mac.m_string = mac64.m_string;
484 } else {
485 struct user32_mac mac32;
486 error = copyin(uap->mac_p, &mac32, sizeof(mac32));
487 mac.m_buflen = mac32.m_buflen;
488 mac.m_string = mac32.m_string;
489 }
490 if (error)
491 goto out;
492 if ((mac.m_buflen > MAC_MAX_LABEL_BUF_LEN) ||
493 (mac.m_buflen < 2)) {
494 error = EINVAL;
495 goto out;
496 }
497 MALLOC(labelstr, char *, mac.m_buflen, M_MACTEMP, M_WAITOK);
498 error = copyinstr(mac.m_string, labelstr, mac.m_buflen, &ulen);
499 if (error) {
500 goto out;
501 }
502 AUDIT_ARG(mac_string, labelstr);
503 }
504#endif /* CONFIG_MACF */
505
506 AUDIT_ARG(fflags, flags);
507
508#if SECURE_KERNEL
509 if (flags & MNT_UNION) {
510 /* No union mounts on release kernels */
511 error = EPERM;
512 goto out;
513 }
514#endif
515
516 if ((vp->v_flag & VROOT) &&
517 (vp->v_mount->mnt_flag & MNT_ROOTFS)) {
518 if (!(flags & MNT_UNION)) {
519 flags |= MNT_UPDATE;
520 }
521 else {
522 /*
523 * For a union mount on '/', treat it as fresh
524 * mount instead of update.
525 * Otherwise, union mouting on '/' used to panic the
526 * system before, since mnt_vnodecovered was found to
527 * be NULL for '/' which is required for unionlookup
528 * after it gets ENOENT on union mount.
529 */
530 flags = (flags & ~(MNT_UPDATE));
531 }
532
533#if SECURE_KERNEL
534 if ((flags & MNT_RDONLY) == 0) {
535 /* Release kernels are not allowed to mount "/" as rw */
536 error = EPERM;
537 goto out;
538 }
539#endif
540 /*
541 * See 7392553 for more details on why this check exists.
542 * Suffice to say: If this check is ON and something tries
543 * to mount the rootFS RW, we'll turn off the codesign
544 * bitmap optimization.
545 */
546#if CHECK_CS_VALIDATION_BITMAP
547 if ((flags & MNT_RDONLY) == 0 ) {
548 root_fs_upgrade_try = TRUE;
549 }
550#endif
551 }
552
553 error = mount_common(fstypename, pvp, vp, &nd.ni_cnd, uap->data, flags, 0,
554 labelstr, FALSE, ctx);
555
556out:
557
558#if CONFIG_MACF
559 if (labelstr)
560 FREE(labelstr, M_MACTEMP);
561#endif /* CONFIG_MACF */
562
563 if (vp) {
564 vnode_put(vp);
565 }
566 if (pvp) {
567 vnode_put(pvp);
568 }
569 if (need_nameidone) {
570 nameidone(&nd);
571 }
572
573 return (error);
574}
575
576/*
577 * common mount implementation (final stage of mounting)
578
579 * Arguments:
580 * fstypename file system type (ie it's vfs name)
581 * pvp parent of covered vnode
582 * vp covered vnode
583 * cnp component name (ie path) of covered vnode
584 * flags generic mount flags
585 * fsmountargs file system specific data
586 * labelstr optional MAC label
587 * kernelmount TRUE for mounts initiated from inside the kernel
588 * ctx caller's context
589 */
590static int
591mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
592 struct componentname *cnp, user_addr_t fsmountargs, int flags, uint32_t internal_flags,
593 char *labelstr, boolean_t kernelmount, vfs_context_t ctx)
594{
595#if !CONFIG_MACF
596#pragma unused(labelstr)
597#endif
598 struct vnode *devvp = NULLVP;
599 struct vnode *device_vnode = NULLVP;
600#if CONFIG_MACF
601 struct vnode *rvp;
602#endif
603 struct mount *mp;
604 struct vfstable *vfsp = (struct vfstable *)0;
605 struct proc *p = vfs_context_proc(ctx);
606 int error, flag = 0;
607 user_addr_t devpath = USER_ADDR_NULL;
608 int ronly = 0;
609 int mntalloc = 0;
610 boolean_t vfsp_ref = FALSE;
611 boolean_t is_rwlock_locked = FALSE;
612 boolean_t did_rele = FALSE;
613 boolean_t have_usecount = FALSE;
614
615 /*
616 * Process an update for an existing mount
617 */
618 if (flags & MNT_UPDATE) {
619 if ((vp->v_flag & VROOT) == 0) {
620 error = EINVAL;
621 goto out1;
622 }
623 mp = vp->v_mount;
624
625 /* unmount in progress return error */
626 mount_lock_spin(mp);
627 if (mp->mnt_lflag & MNT_LUNMOUNT) {
628 mount_unlock(mp);
629 error = EBUSY;
630 goto out1;
631 }
632 mount_unlock(mp);
633 lck_rw_lock_exclusive(&mp->mnt_rwlock);
634 is_rwlock_locked = TRUE;
635 /*
636 * We only allow the filesystem to be reloaded if it
637 * is currently mounted read-only.
638 */
639 if ((flags & MNT_RELOAD) &&
640 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
641 error = ENOTSUP;
642 goto out1;
643 }
644
645 /*
646 * If content protection is enabled, update mounts are not
647 * allowed to turn it off.
648 */
649 if ((mp->mnt_flag & MNT_CPROTECT) &&
650 ((flags & MNT_CPROTECT) == 0)) {
651 error = EINVAL;
652 goto out1;
653 }
654
655#ifdef CONFIG_IMGSRC_ACCESS
656 /* Can't downgrade the backer of the root FS */
657 if ((mp->mnt_kern_flag & MNTK_BACKS_ROOT) &&
658 (!vfs_isrdonly(mp)) && (flags & MNT_RDONLY)) {
659 error = ENOTSUP;
660 goto out1;
661 }
662#endif /* CONFIG_IMGSRC_ACCESS */
663
664 /*
665 * Only root, or the user that did the original mount is
666 * permitted to update it.
667 */
668 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
669 (error = suser(vfs_context_ucred(ctx), &p->p_acflag))) {
670 goto out1;
671 }
672#if CONFIG_MACF
673 error = mac_mount_check_remount(ctx, mp);
674 if (error != 0) {
675 goto out1;
676 }
677#endif
678 /*
679 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
680 * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
681 */
682 if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
683 flags |= MNT_NOSUID | MNT_NODEV;
684 if (mp->mnt_flag & MNT_NOEXEC)
685 flags |= MNT_NOEXEC;
686 }
687 flag = mp->mnt_flag;
688
689
690
691 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
692
693 vfsp = mp->mnt_vtable;
694 goto update;
695 }
696
697 /*
698 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
699 * MNT_NOEXEC if mount point is already MNT_NOEXEC.
700 */
701 if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
702 flags |= MNT_NOSUID | MNT_NODEV;
703 if (vp->v_mount->mnt_flag & MNT_NOEXEC)
704 flags |= MNT_NOEXEC;
705 }
706
707 /* XXXAUDIT: Should we capture the type on the error path as well? */
708 AUDIT_ARG(text, fstypename);
709 mount_list_lock();
710 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
711 if (!strncmp(vfsp->vfc_name, fstypename, MFSNAMELEN)) {
712 vfsp->vfc_refcount++;
713 vfsp_ref = TRUE;
714 break;
715 }
716 mount_list_unlock();
717 if (vfsp == NULL) {
718 error = ENODEV;
719 goto out1;
720 }
721
722 /*
723 * VFC_VFSLOCALARGS is not currently supported for kernel mounts
724 */
725 if (kernelmount && (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS)) {
726 error = EINVAL; /* unsupported request */
727 goto out1;
728 }
729
730 error = prepare_coveredvp(vp, ctx, cnp, fstypename, ((internal_flags & KERNEL_MOUNT_NOAUTH) != 0));
731 if (error != 0) {
732 goto out1;
733 }
734
735 /*
736 * Allocate and initialize the filesystem (mount_t)
737 */
738 MALLOC_ZONE(mp, struct mount *, (u_int32_t)sizeof(struct mount),
739 M_MOUNT, M_WAITOK);
740 bzero((char *)mp, (u_int32_t)sizeof(struct mount));
741 mntalloc = 1;
742
743 /* Initialize the default IO constraints */
744 mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
745 mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
746 mp->mnt_maxsegreadsize = mp->mnt_maxreadcnt;
747 mp->mnt_maxsegwritesize = mp->mnt_maxwritecnt;
748 mp->mnt_devblocksize = DEV_BSIZE;
749 mp->mnt_alignmentmask = PAGE_MASK;
750 mp->mnt_ioqueue_depth = MNT_DEFAULT_IOQUEUE_DEPTH;
751 mp->mnt_ioscale = 1;
752 mp->mnt_ioflags = 0;
753 mp->mnt_realrootvp = NULLVP;
754 mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL;
755
756 TAILQ_INIT(&mp->mnt_vnodelist);
757 TAILQ_INIT(&mp->mnt_workerqueue);
758 TAILQ_INIT(&mp->mnt_newvnodes);
759 mount_lock_init(mp);
760 lck_rw_lock_exclusive(&mp->mnt_rwlock);
761 is_rwlock_locked = TRUE;
762 mp->mnt_op = vfsp->vfc_vfsops;
763 mp->mnt_vtable = vfsp;
764 //mp->mnt_stat.f_type = vfsp->vfc_typenum;
765 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
766 strlcpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN);
767 strlcpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
768 mp->mnt_vnodecovered = vp;
769 mp->mnt_vfsstat.f_owner = kauth_cred_getuid(vfs_context_ucred(ctx));
770 mp->mnt_throttle_mask = LOWPRI_MAX_NUM_DEV - 1;
771 mp->mnt_devbsdunit = 0;
772
773 /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
774 vfs_setowner(mp, KAUTH_UID_NONE, KAUTH_GID_NONE);
775
776#if NFSCLIENT || DEVFS || ROUTEFS
777 if (kernelmount)
778 mp->mnt_kern_flag |= MNTK_KERNEL_MOUNT;
779 if ((internal_flags & KERNEL_MOUNT_PERMIT_UNMOUNT) != 0)
780 mp->mnt_kern_flag |= MNTK_PERMIT_UNMOUNT;
781#endif /* NFSCLIENT || DEVFS */
782
783update:
784
785 /*
786 * Set the mount level flags.
787 */
788 if (flags & MNT_RDONLY)
789 mp->mnt_flag |= MNT_RDONLY;
790 else if (mp->mnt_flag & MNT_RDONLY) {
791 // disallow read/write upgrades of file systems that
792 // had the TYPENAME_OVERRIDE feature set.
793 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
794 error = EPERM;
795 goto out1;
796 }
797 mp->mnt_kern_flag |= MNTK_WANTRDWR;
798 }
799 mp->mnt_flag &= ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
800 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
801 MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
802 MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
803 MNT_QUARANTINE | MNT_CPROTECT);
804
805#if SECURE_KERNEL
806#if !CONFIG_MNT_SUID
807 /*
808 * On release builds of iOS based platforms, always enforce NOSUID on
809 * all mounts. We do this here because we can catch update mounts as well as
810 * non-update mounts in this case.
811 */
812 mp->mnt_flag |= (MNT_NOSUID);
813#endif
814#endif
815
816 mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
817 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
818 MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
819 MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
820 MNT_QUARANTINE | MNT_CPROTECT);
821
822#if CONFIG_MACF
823 if (flags & MNT_MULTILABEL) {
824 if (vfsp->vfc_vfsflags & VFC_VFSNOMACLABEL) {
825 error = EINVAL;
826 goto out1;
827 }
828 mp->mnt_flag |= MNT_MULTILABEL;
829 }
830#endif
831 /*
832 * Process device path for local file systems if requested
833 */
834 if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS &&
835 !(internal_flags & KERNEL_MOUNT_SNAPSHOT)) {
836 if (vfs_context_is64bit(ctx)) {
837 if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
838 goto out1;
839 fsmountargs += sizeof(devpath);
840 } else {
841 user32_addr_t tmp;
842 if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
843 goto out1;
844 /* munge into LP64 addr */
845 devpath = CAST_USER_ADDR_T(tmp);
846 fsmountargs += sizeof(tmp);
847 }
848
849 /* Lookup device and authorize access to it */
850 if ((devpath)) {
851 struct nameidata nd;
852
853 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW, UIO_USERSPACE, devpath, ctx);
854 if ( (error = namei(&nd)) )
855 goto out1;
856
857 strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
858 devvp = nd.ni_vp;
859
860 nameidone(&nd);
861
862 if (devvp->v_type != VBLK) {
863 error = ENOTBLK;
864 goto out2;
865 }
866 if (major(devvp->v_rdev) >= nblkdev) {
867 error = ENXIO;
868 goto out2;
869 }
870 /*
871 * If mount by non-root, then verify that user has necessary
872 * permissions on the device.
873 */
874 if (suser(vfs_context_ucred(ctx), NULL) != 0) {
875 mode_t accessmode = KAUTH_VNODE_READ_DATA;
876
877 if ((mp->mnt_flag & MNT_RDONLY) == 0)
878 accessmode |= KAUTH_VNODE_WRITE_DATA;
879 if ((error = vnode_authorize(devvp, NULL, accessmode, ctx)) != 0)
880 goto out2;
881 }
882 }
883 /* On first mount, preflight and open device */
884 if (devpath && ((flags & MNT_UPDATE) == 0)) {
885 if ( (error = vnode_ref(devvp)) )
886 goto out2;
887 /*
888 * Disallow multiple mounts of the same device.
889 * Disallow mounting of a device that is currently in use
890 * (except for root, which might share swap device for miniroot).
891 * Flush out any old buffers remaining from a previous use.
892 */
893 if ( (error = vfs_mountedon(devvp)) )
894 goto out3;
895
896 if (vcount(devvp) > 1 && !(vfs_flags(mp) & MNT_ROOTFS)) {
897 error = EBUSY;
898 goto out3;
899 }
900 if ( (error = VNOP_FSYNC(devvp, MNT_WAIT, ctx)) ) {
901 error = ENOTBLK;
902 goto out3;
903 }
904 if ( (error = buf_invalidateblks(devvp, BUF_WRITE_DATA, 0, 0)) )
905 goto out3;
906
907 ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
908#if CONFIG_MACF
909 error = mac_vnode_check_open(ctx,
910 devvp,
911 ronly ? FREAD : FREAD|FWRITE);
912 if (error)
913 goto out3;
914#endif /* MAC */
915 if ( (error = VNOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, ctx)) )
916 goto out3;
917
918 mp->mnt_devvp = devvp;
919 device_vnode = devvp;
920
921 } else if ((mp->mnt_flag & MNT_RDONLY) &&
922 (mp->mnt_kern_flag & MNTK_WANTRDWR) &&
923 (device_vnode = mp->mnt_devvp)) {
924 dev_t dev;
925 int maj;
926 /*
927 * If upgrade to read-write by non-root, then verify
928 * that user has necessary permissions on the device.
929 */
930 vnode_getalways(device_vnode);
931
932 if (suser(vfs_context_ucred(ctx), NULL) &&
933 (error = vnode_authorize(device_vnode, NULL,
934 KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA,
935 ctx)) != 0) {
936 vnode_put(device_vnode);
937 goto out2;
938 }
939
940 /* Tell the device that we're upgrading */
941 dev = (dev_t)device_vnode->v_rdev;
942 maj = major(dev);
943
944 if ((u_int)maj >= (u_int)nblkdev)
945 panic("Volume mounted on a device with invalid major number.");
946
947 error = bdevsw[maj].d_open(dev, FREAD | FWRITE, S_IFBLK, p);
948 vnode_put(device_vnode);
949 device_vnode = NULLVP;
950 if (error != 0) {
951 goto out2;
952 }
953 }
954 }
955#if CONFIG_MACF
956 if ((flags & MNT_UPDATE) == 0) {
957 mac_mount_label_init(mp);
958 mac_mount_label_associate(ctx, mp);
959 }
960 if (labelstr) {
961 if ((flags & MNT_UPDATE) != 0) {
962 error = mac_mount_check_label_update(ctx, mp);
963 if (error != 0)
964 goto out3;
965 }
966 }
967#endif
968 /*
969 * Mount the filesystem.
970 */
971 if (internal_flags & KERNEL_MOUNT_SNAPSHOT) {
972 error = VFS_IOCTL(mp, VFSIOC_MOUNT_SNAPSHOT,
973 (caddr_t)fsmountargs, 0, ctx);
974 } else {
975 error = VFS_MOUNT(mp, device_vnode, fsmountargs, ctx);
976 }
977
978 if (flags & MNT_UPDATE) {
979 if (mp->mnt_kern_flag & MNTK_WANTRDWR)
980 mp->mnt_flag &= ~MNT_RDONLY;
981 mp->mnt_flag &=~
982 (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
983 mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
984 if (error)
985 mp->mnt_flag = flag; /* restore flag value */
986 vfs_event_signal(NULL, VQ_UPDATE, (intptr_t)NULL);
987 lck_rw_done(&mp->mnt_rwlock);
988 is_rwlock_locked = FALSE;
989 if (!error)
990 enablequotas(mp, ctx);
991 goto exit;
992 }
993
994 /*
995 * Put the new filesystem on the mount list after root.
996 */
997 if (error == 0) {
998 struct vfs_attr vfsattr;
999#if CONFIG_MACF
1000 if (vfs_flags(mp) & MNT_MULTILABEL) {
1001 error = VFS_ROOT(mp, &rvp, ctx);
1002 if (error) {
1003 printf("%s() VFS_ROOT returned %d\n", __func__, error);
1004 goto out3;
1005 }
1006 error = vnode_label(mp, NULL, rvp, NULL, 0, ctx);
1007 /*
1008 * drop reference provided by VFS_ROOT
1009 */
1010 vnode_put(rvp);
1011
1012 if (error)
1013 goto out3;
1014 }
1015#endif /* MAC */
1016
1017 vnode_lock_spin(vp);
1018 CLR(vp->v_flag, VMOUNT);
1019 vp->v_mountedhere = mp;
1020 vnode_unlock(vp);
1021
1022 /*
1023 * taking the name_cache_lock exclusively will
1024 * insure that everyone is out of the fast path who
1025 * might be trying to use a now stale copy of
1026 * vp->v_mountedhere->mnt_realrootvp
1027 * bumping mount_generation causes the cached values
1028 * to be invalidated
1029 */
1030 name_cache_lock();
1031 mount_generation++;
1032 name_cache_unlock();
1033
1034 error = vnode_ref(vp);
1035 if (error != 0) {
1036 goto out4;
1037 }
1038
1039 have_usecount = TRUE;
1040
1041 error = checkdirs(vp, ctx);
1042 if (error != 0) {
1043 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1044 goto out4;
1045 }
1046 /*
1047 * there is no cleanup code here so I have made it void
1048 * we need to revisit this
1049 */
1050 (void)VFS_START(mp, 0, ctx);
1051
1052 if (mount_list_add(mp) != 0) {
1053 /*
1054 * The system is shutting down trying to umount
1055 * everything, so fail with a plausible errno.
1056 */
1057 error = EBUSY;
1058 goto out4;
1059 }
1060 lck_rw_done(&mp->mnt_rwlock);
1061 is_rwlock_locked = FALSE;
1062
1063 /* Check if this mounted file system supports EAs or named streams. */
1064 /* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */
1065 VFSATTR_INIT(&vfsattr);
1066 VFSATTR_WANTED(&vfsattr, f_capabilities);
1067 if (strncmp(mp->mnt_vfsstat.f_fstypename, "webdav", sizeof("webdav")) != 0 &&
1068 vfs_getattr(mp, &vfsattr, ctx) == 0 &&
1069 VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) {
1070 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR) &&
1071 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR)) {
1072 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
1073 }
1074#if NAMEDSTREAMS
1075 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS) &&
1076 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS)) {
1077 mp->mnt_kern_flag |= MNTK_NAMED_STREAMS;
1078 }
1079#endif
1080 /* Check if this file system supports path from id lookups. */
1081 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID) &&
1082 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID)) {
1083 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
1084 } else if (mp->mnt_flag & MNT_DOVOLFS) {
1085 /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */
1086 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
1087 }
1088
1089 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_DIR_HARDLINKS) &&
1090 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_DIR_HARDLINKS)) {
1091 mp->mnt_kern_flag |= MNTK_DIR_HARDLINKS;
1092 }
1093 }
1094 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSNATIVEXATTR) {
1095 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
1096 }
1097 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSPREFLIGHT) {
1098 mp->mnt_kern_flag |= MNTK_UNMOUNT_PREFLIGHT;
1099 }
1100 /* increment the operations count */
1101 OSAddAtomic(1, &vfs_nummntops);
1102 enablequotas(mp, ctx);
1103
1104 if (device_vnode) {
1105 device_vnode->v_specflags |= SI_MOUNTEDON;
1106
1107 /*
1108 * cache the IO attributes for the underlying physical media...
1109 * an error return indicates the underlying driver doesn't
1110 * support all the queries necessary... however, reasonable
1111 * defaults will have been set, so no reason to bail or care
1112 */
1113 vfs_init_io_attributes(device_vnode, mp);
1114 }
1115
1116 /* Now that mount is setup, notify the listeners */
1117 vfs_notify_mount(pvp);
1118 IOBSDMountChange(mp, kIOMountChangeMount);
1119
1120 } else {
1121 /* If we fail a fresh mount, there should be no vnodes left hooked into the mountpoint. */
1122 if (mp->mnt_vnodelist.tqh_first != NULL) {
1123 panic("mount_common(): mount of %s filesystem failed with %d, but vnode list is not empty.",
1124 mp->mnt_vtable->vfc_name, error);
1125 }
1126
1127 vnode_lock_spin(vp);
1128 CLR(vp->v_flag, VMOUNT);
1129 vnode_unlock(vp);
1130 mount_list_lock();
1131 mp->mnt_vtable->vfc_refcount--;
1132 mount_list_unlock();
1133
1134 if (device_vnode ) {
1135 vnode_rele(device_vnode);
1136 VNOP_CLOSE(device_vnode, ronly ? FREAD : FREAD|FWRITE, ctx);
1137 }
1138 lck_rw_done(&mp->mnt_rwlock);
1139 is_rwlock_locked = FALSE;
1140
1141 /*
1142 * if we get here, we have a mount structure that needs to be freed,
1143 * but since the coveredvp hasn't yet been updated to point at it,
1144 * no need to worry about other threads holding a crossref on this mp
1145 * so it's ok to just free it
1146 */
1147 mount_lock_destroy(mp);
1148#if CONFIG_MACF
1149 mac_mount_label_destroy(mp);
1150#endif
1151 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1152 }
1153exit:
1154 /*
1155 * drop I/O count on the device vp if there was one
1156 */
1157 if (devpath && devvp)
1158 vnode_put(devvp);
1159
1160 return(error);
1161
1162/* Error condition exits */
1163out4:
1164 (void)VFS_UNMOUNT(mp, MNT_FORCE, ctx);
1165
1166 /*
1167 * If the mount has been placed on the covered vp,
1168 * it may have been discovered by now, so we have
1169 * to treat this just like an unmount
1170 */
1171 mount_lock_spin(mp);
1172 mp->mnt_lflag |= MNT_LDEAD;
1173 mount_unlock(mp);
1174
1175 if (device_vnode != NULLVP) {
1176 vnode_rele(device_vnode);
1177 VNOP_CLOSE(device_vnode, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
1178 ctx);
1179 did_rele = TRUE;
1180 }
1181
1182 vnode_lock_spin(vp);
1183
1184 mp->mnt_crossref++;
1185 vp->v_mountedhere = (mount_t) 0;
1186
1187 vnode_unlock(vp);
1188
1189 if (have_usecount) {
1190 vnode_rele(vp);
1191 }
1192out3:
1193 if (devpath && ((flags & MNT_UPDATE) == 0) && (!did_rele))
1194 vnode_rele(devvp);
1195out2:
1196 if (devpath && devvp)
1197 vnode_put(devvp);
1198out1:
1199 /* Release mnt_rwlock only when it was taken */
1200 if (is_rwlock_locked == TRUE) {
1201 lck_rw_done(&mp->mnt_rwlock);
1202 }
1203
1204 if (mntalloc) {
1205 if (mp->mnt_crossref)
1206 mount_dropcrossref(mp, vp, 0);
1207 else {
1208 mount_lock_destroy(mp);
1209#if CONFIG_MACF
1210 mac_mount_label_destroy(mp);
1211#endif
1212 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1213 }
1214 }
1215 if (vfsp_ref) {
1216 mount_list_lock();
1217 vfsp->vfc_refcount--;
1218 mount_list_unlock();
1219 }
1220
1221 return(error);
1222}
1223
1224/*
1225 * Flush in-core data, check for competing mount attempts,
1226 * and set VMOUNT
1227 */
1228int
1229prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth)
1230{
1231#if !CONFIG_MACF
1232#pragma unused(cnp,fsname)
1233#endif
1234 struct vnode_attr va;
1235 int error;
1236
1237 if (!skip_auth) {
1238 /*
1239 * If the user is not root, ensure that they own the directory
1240 * onto which we are attempting to mount.
1241 */
1242 VATTR_INIT(&va);
1243 VATTR_WANTED(&va, va_uid);
1244 if ((error = vnode_getattr(vp, &va, ctx)) ||
1245 (va.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
1246 (!vfs_context_issuser(ctx)))) {
1247 error = EPERM;
1248 goto out;
1249 }
1250 }
1251
1252 if ( (error = VNOP_FSYNC(vp, MNT_WAIT, ctx)) )
1253 goto out;
1254
1255 if ( (error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0)) )
1256 goto out;
1257
1258 if (vp->v_type != VDIR) {
1259 error = ENOTDIR;
1260 goto out;
1261 }
1262
1263 if (ISSET(vp->v_flag, VMOUNT) && (vp->v_mountedhere != NULL)) {
1264 error = EBUSY;
1265 goto out;
1266 }
1267
1268#if CONFIG_MACF
1269 error = mac_mount_check_mount(ctx, vp,
1270 cnp, fsname);
1271 if (error != 0)
1272 goto out;
1273#endif
1274
1275 vnode_lock_spin(vp);
1276 SET(vp->v_flag, VMOUNT);
1277 vnode_unlock(vp);
1278
1279out:
1280 return error;
1281}
1282
1283#if CONFIG_IMGSRC_ACCESS
1284
1285#if DEBUG
1286#define IMGSRC_DEBUG(args...) printf(args)
1287#else
1288#define IMGSRC_DEBUG(args...) do { } while(0)
1289#endif
1290
1291static int
1292authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx)
1293{
1294 struct nameidata nd;
1295 vnode_t vp, realdevvp;
1296 mode_t accessmode;
1297 int error;
1298
1299 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW, UIO_USERSPACE, devpath, ctx);
1300 if ( (error = namei(&nd)) ) {
1301 IMGSRC_DEBUG("namei() failed with %d\n", error);
1302 return error;
1303 }
1304
1305 vp = nd.ni_vp;
1306
1307 if (!vnode_isblk(vp)) {
1308 IMGSRC_DEBUG("Not block device.\n");
1309 error = ENOTBLK;
1310 goto out;
1311 }
1312
1313 realdevvp = mp->mnt_devvp;
1314 if (realdevvp == NULLVP) {
1315 IMGSRC_DEBUG("No device backs the mount.\n");
1316 error = ENXIO;
1317 goto out;
1318 }
1319
1320 error = vnode_getwithref(realdevvp);
1321 if (error != 0) {
1322 IMGSRC_DEBUG("Coudn't get iocount on device.\n");
1323 goto out;
1324 }
1325
1326 if (vnode_specrdev(vp) != vnode_specrdev(realdevvp)) {
1327 IMGSRC_DEBUG("Wrong dev_t.\n");
1328 error = ENXIO;
1329 goto out1;
1330 }
1331
1332 strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
1333
1334 /*
1335 * If mount by non-root, then verify that user has necessary
1336 * permissions on the device.
1337 */
1338 if (!vfs_context_issuser(ctx)) {
1339 accessmode = KAUTH_VNODE_READ_DATA;
1340 if ((mp->mnt_flag & MNT_RDONLY) == 0)
1341 accessmode |= KAUTH_VNODE_WRITE_DATA;
1342 if ((error = vnode_authorize(vp, NULL, accessmode, ctx)) != 0) {
1343 IMGSRC_DEBUG("Access denied.\n");
1344 goto out1;
1345 }
1346 }
1347
1348 *devvpp = vp;
1349
1350out1:
1351 vnode_put(realdevvp);
1352out:
1353 nameidone(&nd);
1354 if (error) {
1355 vnode_put(vp);
1356 }
1357
1358 return error;
1359}
1360
1361/*
1362 * Clear VMOUNT, set v_mountedhere, and mnt_vnodecovered, ref the vnode,
1363 * and call checkdirs()
1364 */
1365static int
1366place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx)
1367{
1368 int error;
1369
1370 mp->mnt_vnodecovered = vp; /* XXX This is normally only set at init-time ... */
1371
1372 vnode_lock_spin(vp);
1373 CLR(vp->v_flag, VMOUNT);
1374 vp->v_mountedhere = mp;
1375 vnode_unlock(vp);
1376
1377 /*
1378 * taking the name_cache_lock exclusively will
1379 * insure that everyone is out of the fast path who
1380 * might be trying to use a now stale copy of
1381 * vp->v_mountedhere->mnt_realrootvp
1382 * bumping mount_generation causes the cached values
1383 * to be invalidated
1384 */
1385 name_cache_lock();
1386 mount_generation++;
1387 name_cache_unlock();
1388
1389 error = vnode_ref(vp);
1390 if (error != 0) {
1391 goto out;
1392 }
1393
1394 error = checkdirs(vp, ctx);
1395 if (error != 0) {
1396 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1397 vnode_rele(vp);
1398 goto out;
1399 }
1400
1401out:
1402 if (error != 0) {
1403 mp->mnt_vnodecovered = NULLVP;
1404 }
1405 return error;
1406}
1407
1408static void
1409undo_place_on_covered_vp(mount_t mp, vnode_t vp)
1410{
1411 vnode_rele(vp);
1412 vnode_lock_spin(vp);
1413 vp->v_mountedhere = (mount_t)NULL;
1414 vnode_unlock(vp);
1415
1416 mp->mnt_vnodecovered = NULLVP;
1417}
1418
1419static int
1420mount_begin_update(mount_t mp, vfs_context_t ctx, int flags)
1421{
1422 int error;
1423
1424 /* unmount in progress return error */
1425 mount_lock_spin(mp);
1426 if (mp->mnt_lflag & MNT_LUNMOUNT) {
1427 mount_unlock(mp);
1428 return EBUSY;
1429 }
1430 mount_unlock(mp);
1431 lck_rw_lock_exclusive(&mp->mnt_rwlock);
1432
1433 /*
1434 * We only allow the filesystem to be reloaded if it
1435 * is currently mounted read-only.
1436 */
1437 if ((flags & MNT_RELOAD) &&
1438 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
1439 error = ENOTSUP;
1440 goto out;
1441 }
1442
1443 /*
1444 * Only root, or the user that did the original mount is
1445 * permitted to update it.
1446 */
1447 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
1448 (!vfs_context_issuser(ctx))) {
1449 error = EPERM;
1450 goto out;
1451 }
1452#if CONFIG_MACF
1453 error = mac_mount_check_remount(ctx, mp);
1454 if (error != 0) {
1455 goto out;
1456 }
1457#endif
1458
1459out:
1460 if (error) {
1461 lck_rw_done(&mp->mnt_rwlock);
1462 }
1463
1464 return error;
1465}
1466
1467static void
1468mount_end_update(mount_t mp)
1469{
1470 lck_rw_done(&mp->mnt_rwlock);
1471}
1472
1473static int
1474get_imgsrc_rootvnode(uint32_t height, vnode_t *rvpp)
1475{
1476 vnode_t vp;
1477
1478 if (height >= MAX_IMAGEBOOT_NESTING) {
1479 return EINVAL;
1480 }
1481
1482 vp = imgsrc_rootvnodes[height];
1483 if ((vp != NULLVP) && (vnode_get(vp) == 0)) {
1484 *rvpp = vp;
1485 return 0;
1486 } else {
1487 return ENOENT;
1488 }
1489}
1490
1491static int
1492relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp,
1493 const char *fsname, vfs_context_t ctx,
1494 boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index)
1495{
1496 int error;
1497 mount_t mp;
1498 boolean_t placed = FALSE;
1499 vnode_t devvp = NULLVP;
1500 struct vfstable *vfsp;
1501 user_addr_t devpath;
1502 char *old_mntonname;
1503 vnode_t rvp;
1504 uint32_t height;
1505 uint32_t flags;
1506
1507 /* If we didn't imageboot, nothing to move */
1508 if (imgsrc_rootvnodes[0] == NULLVP) {
1509 return EINVAL;
1510 }
1511
1512 /* Only root can do this */
1513 if (!vfs_context_issuser(ctx)) {
1514 return EPERM;
1515 }
1516
1517 IMGSRC_DEBUG("looking for root vnode.\n");
1518
1519 /*
1520 * Get root vnode of filesystem we're moving.
1521 */
1522 if (by_index) {
1523 if (is64bit) {
1524 struct user64_mnt_imgsrc_args mia64;
1525 error = copyin(fsmountargs, &mia64, sizeof(mia64));
1526 if (error != 0) {
1527 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1528 return error;
1529 }
1530
1531 height = mia64.mi_height;
1532 flags = mia64.mi_flags;
1533 devpath = mia64.mi_devpath;
1534 } else {
1535 struct user32_mnt_imgsrc_args mia32;
1536 error = copyin(fsmountargs, &mia32, sizeof(mia32));
1537 if (error != 0) {
1538 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1539 return error;
1540 }
1541
1542 height = mia32.mi_height;
1543 flags = mia32.mi_flags;
1544 devpath = mia32.mi_devpath;
1545 }
1546 } else {
1547 /*
1548 * For binary compatibility--assumes one level of nesting.
1549 */
1550 if (is64bit) {
1551 if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
1552 return error;
1553 } else {
1554 user32_addr_t tmp;
1555 if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
1556 return error;
1557
1558 /* munge into LP64 addr */
1559 devpath = CAST_USER_ADDR_T(tmp);
1560 }
1561
1562 height = 0;
1563 flags = 0;
1564 }
1565
1566 if (flags != 0) {
1567 IMGSRC_DEBUG("%s: Got nonzero flags.\n", __FUNCTION__);
1568 return EINVAL;
1569 }
1570
1571 error = get_imgsrc_rootvnode(height, &rvp);
1572 if (error != 0) {
1573 IMGSRC_DEBUG("getting root vnode failed with %d\n", error);
1574 return error;
1575 }
1576
1577 IMGSRC_DEBUG("got root vnode.\n");
1578
1579 MALLOC(old_mntonname, char*, MAXPATHLEN, M_TEMP, M_WAITOK);
1580
1581 /* Can only move once */
1582 mp = vnode_mount(rvp);
1583 if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
1584 IMGSRC_DEBUG("Already moved.\n");
1585 error = EBUSY;
1586 goto out0;
1587 }
1588
1589 IMGSRC_DEBUG("Starting updated.\n");
1590
1591 /* Get exclusive rwlock on mount, authorize update on mp */
1592 error = mount_begin_update(mp , ctx, 0);
1593 if (error != 0) {
1594 IMGSRC_DEBUG("Starting updated failed with %d\n", error);
1595 goto out0;
1596 }
1597
1598 /*
1599 * It can only be moved once. Flag is set under the rwlock,
1600 * so we're now safe to proceed.
1601 */
1602 if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
1603 IMGSRC_DEBUG("Already moved [2]\n");
1604 goto out1;
1605 }
1606
1607
1608 IMGSRC_DEBUG("Preparing coveredvp.\n");
1609
1610 /* Mark covered vnode as mount in progress, authorize placing mount on top */
1611 error = prepare_coveredvp(vp, ctx, cnp, fsname, FALSE);
1612 if (error != 0) {
1613 IMGSRC_DEBUG("Preparing coveredvp failed with %d.\n", error);
1614 goto out1;
1615 }
1616
1617 IMGSRC_DEBUG("Covered vp OK.\n");
1618
1619 /* Sanity check the name caller has provided */
1620 vfsp = mp->mnt_vtable;
1621 if (strncmp(vfsp->vfc_name, fsname, MFSNAMELEN) != 0) {
1622 IMGSRC_DEBUG("Wrong fs name.\n");
1623 error = EINVAL;
1624 goto out2;
1625 }
1626
1627 /* Check the device vnode and update mount-from name, for local filesystems */
1628 if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
1629 IMGSRC_DEBUG("Local, doing device validation.\n");
1630
1631 if (devpath != USER_ADDR_NULL) {
1632 error = authorize_devpath_and_update_mntfromname(mp, devpath, &devvp, ctx);
1633 if (error) {
1634 IMGSRC_DEBUG("authorize_devpath_and_update_mntfromname() failed.\n");
1635 goto out2;
1636 }
1637
1638 vnode_put(devvp);
1639 }
1640 }
1641
1642 /*
1643 * Place mp on top of vnode, ref the vnode, call checkdirs(),
1644 * and increment the name cache's mount generation
1645 */
1646
1647 IMGSRC_DEBUG("About to call place_mount_and_checkdirs().\n");
1648 error = place_mount_and_checkdirs(mp, vp, ctx);
1649 if (error != 0) {
1650 goto out2;
1651 }
1652
1653 placed = TRUE;
1654
1655 strlcpy(old_mntonname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN);
1656 strlcpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
1657
1658 /* Forbid future moves */
1659 mount_lock(mp);
1660 mp->mnt_kern_flag |= MNTK_HAS_MOVED;
1661 mount_unlock(mp);
1662
1663 /* Finally, add to mount list, completely ready to go */
1664 if (mount_list_add(mp) != 0) {
1665 /*
1666 * The system is shutting down trying to umount
1667 * everything, so fail with a plausible errno.
1668 */
1669 error = EBUSY;
1670 goto out3;
1671 }
1672
1673 mount_end_update(mp);
1674 vnode_put(rvp);
1675 FREE(old_mntonname, M_TEMP);
1676
1677 vfs_notify_mount(pvp);
1678
1679 return 0;
1680out3:
1681 strlcpy(mp->mnt_vfsstat.f_mntonname, old_mntonname, MAXPATHLEN);
1682
1683 mount_lock(mp);
1684 mp->mnt_kern_flag &= ~(MNTK_HAS_MOVED);
1685 mount_unlock(mp);
1686
1687out2:
1688 /*
1689 * Placing the mp on the vnode clears VMOUNT,
1690 * so cleanup is different after that point
1691 */
1692 if (placed) {
1693 /* Rele the vp, clear VMOUNT and v_mountedhere */
1694 undo_place_on_covered_vp(mp, vp);
1695 } else {
1696 vnode_lock_spin(vp);
1697 CLR(vp->v_flag, VMOUNT);
1698 vnode_unlock(vp);
1699 }
1700out1:
1701 mount_end_update(mp);
1702
1703out0:
1704 vnode_put(rvp);
1705 FREE(old_mntonname, M_TEMP);
1706 return error;
1707}
1708
1709#endif /* CONFIG_IMGSRC_ACCESS */
1710
1711void
1712enablequotas(struct mount *mp, vfs_context_t ctx)
1713{
1714 struct nameidata qnd;
1715 int type;
1716 char qfpath[MAXPATHLEN];
1717 const char *qfname = QUOTAFILENAME;
1718 const char *qfopsname = QUOTAOPSNAME;
1719 const char *qfextension[] = INITQFNAMES;
1720
1721 /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
1722 if (strncmp(mp->mnt_vfsstat.f_fstypename, "hfs", sizeof("hfs")) != 0 ) {
1723 return;
1724 }
1725 /*
1726 * Enable filesystem disk quotas if necessary.
1727 * We ignore errors as this should not interfere with final mount
1728 */
1729 for (type=0; type < MAXQUOTAS; type++) {
1730 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfopsname, qfextension[type]);
1731 NDINIT(&qnd, LOOKUP, OP_MOUNT, FOLLOW, UIO_SYSSPACE,
1732 CAST_USER_ADDR_T(qfpath), ctx);
1733 if (namei(&qnd) != 0)
1734 continue; /* option file to trigger quotas is not present */
1735 vnode_put(qnd.ni_vp);
1736 nameidone(&qnd);
1737 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfname, qfextension[type]);
1738
1739 (void) VFS_QUOTACTL(mp, QCMD(Q_QUOTAON, type), 0, qfpath, ctx);
1740 }
1741 return;
1742}
1743
1744
1745static int
1746checkdirs_callback(proc_t p, void * arg)
1747{
1748 struct cdirargs * cdrp = (struct cdirargs * )arg;
1749 vnode_t olddp = cdrp->olddp;
1750 vnode_t newdp = cdrp->newdp;
1751 struct filedesc *fdp;
1752 vnode_t tvp;
1753 vnode_t fdp_cvp;
1754 vnode_t fdp_rvp;
1755 int cdir_changed = 0;
1756 int rdir_changed = 0;
1757
1758 /*
1759 * XXX Also needs to iterate each thread in the process to see if it
1760 * XXX is using a per-thread current working directory, and, if so,
1761 * XXX update that as well.
1762 */
1763
1764 proc_fdlock(p);
1765 fdp = p->p_fd;
1766 if (fdp == (struct filedesc *)0) {
1767 proc_fdunlock(p);
1768 return(PROC_RETURNED);
1769 }
1770 fdp_cvp = fdp->fd_cdir;
1771 fdp_rvp = fdp->fd_rdir;
1772 proc_fdunlock(p);
1773
1774 if (fdp_cvp == olddp) {
1775 vnode_ref(newdp);
1776 tvp = fdp->fd_cdir;
1777 fdp_cvp = newdp;
1778 cdir_changed = 1;
1779 vnode_rele(tvp);
1780 }
1781 if (fdp_rvp == olddp) {
1782 vnode_ref(newdp);
1783 tvp = fdp->fd_rdir;
1784 fdp_rvp = newdp;
1785 rdir_changed = 1;
1786 vnode_rele(tvp);
1787 }
1788 if (cdir_changed || rdir_changed) {
1789 proc_fdlock(p);
1790 fdp->fd_cdir = fdp_cvp;
1791 fdp->fd_rdir = fdp_rvp;
1792 proc_fdunlock(p);
1793 }
1794 return(PROC_RETURNED);
1795}
1796
1797
1798
1799/*
1800 * Scan all active processes to see if any of them have a current
1801 * or root directory onto which the new filesystem has just been
1802 * mounted. If so, replace them with the new mount point.
1803 */
1804static int
1805checkdirs(vnode_t olddp, vfs_context_t ctx)
1806{
1807 vnode_t newdp;
1808 vnode_t tvp;
1809 int err;
1810 struct cdirargs cdr;
1811
1812 if (olddp->v_usecount == 1)
1813 return(0);
1814 err = VFS_ROOT(olddp->v_mountedhere, &newdp, ctx);
1815
1816 if (err != 0) {
1817#if DIAGNOSTIC
1818 panic("mount: lost mount: error %d", err);
1819#endif
1820 return(err);
1821 }
1822
1823 cdr.olddp = olddp;
1824 cdr.newdp = newdp;
1825 /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
1826 proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS, checkdirs_callback, (void *)&cdr, NULL, NULL);
1827
1828 if (rootvnode == olddp) {
1829 vnode_ref(newdp);
1830 tvp = rootvnode;
1831 rootvnode = newdp;
1832 vnode_rele(tvp);
1833 }
1834
1835 vnode_put(newdp);
1836 return(0);
1837}
1838
1839/*
1840 * Unmount a file system.
1841 *
1842 * Note: unmount takes a path to the vnode mounted on as argument,
1843 * not special file (as before).
1844 */
1845/* ARGSUSED */
1846int
1847unmount(__unused proc_t p, struct unmount_args *uap, __unused int32_t *retval)
1848{
1849 vnode_t vp;
1850 struct mount *mp;
1851 int error;
1852 struct nameidata nd;
1853 vfs_context_t ctx = vfs_context_current();
1854
1855 NDINIT(&nd, LOOKUP, OP_UNMOUNT, FOLLOW | AUDITVNPATH1,
1856 UIO_USERSPACE, uap->path, ctx);
1857 error = namei(&nd);
1858 if (error)
1859 return (error);
1860 vp = nd.ni_vp;
1861 mp = vp->v_mount;
1862 nameidone(&nd);
1863
1864#if CONFIG_MACF
1865 error = mac_mount_check_umount(ctx, mp);
1866 if (error != 0) {
1867 vnode_put(vp);
1868 return (error);
1869 }
1870#endif
1871 /*
1872 * Must be the root of the filesystem
1873 */
1874 if ((vp->v_flag & VROOT) == 0) {
1875 vnode_put(vp);
1876 return (EINVAL);
1877 }
1878 mount_ref(mp, 0);
1879 vnode_put(vp);
1880 /* safedounmount consumes the mount ref */
1881 return (safedounmount(mp, uap->flags, ctx));
1882}
1883
1884int
1885vfs_unmountbyfsid(fsid_t *fsid, int flags, vfs_context_t ctx)
1886{
1887 mount_t mp;
1888
1889 mp = mount_list_lookupby_fsid(fsid, 0, 1);
1890 if (mp == (mount_t)0) {
1891 return(ENOENT);
1892 }
1893 mount_ref(mp, 0);
1894 mount_iterdrop(mp);
1895 /* safedounmount consumes the mount ref */
1896 return(safedounmount(mp, flags, ctx));
1897}
1898
1899
1900/*
1901 * The mount struct comes with a mount ref which will be consumed.
1902 * Do the actual file system unmount, prevent some common foot shooting.
1903 */
1904int
1905safedounmount(struct mount *mp, int flags, vfs_context_t ctx)
1906{
1907 int error;
1908 proc_t p = vfs_context_proc(ctx);
1909
1910 /*
1911 * If the file system is not responding and MNT_NOBLOCK
1912 * is set and not a forced unmount then return EBUSY.
1913 */
1914 if ((mp->mnt_kern_flag & MNT_LNOTRESP) &&
1915 (flags & MNT_NOBLOCK) && ((flags & MNT_FORCE) == 0)) {
1916 error = EBUSY;
1917 goto out;
1918 }
1919
1920 /*
1921 * Skip authorization if the mount is tagged as permissive and
1922 * this is not a forced-unmount attempt.
1923 */
1924 if (!(((mp->mnt_kern_flag & MNTK_PERMIT_UNMOUNT) != 0) && ((flags & MNT_FORCE) == 0))) {
1925 /*
1926 * Only root, or the user that did the original mount is
1927 * permitted to unmount this filesystem.
1928 */
1929 if ((mp->mnt_vfsstat.f_owner != kauth_cred_getuid(kauth_cred_get())) &&
1930 (error = suser(kauth_cred_get(), &p->p_acflag)))
1931 goto out;
1932 }
1933 /*
1934 * Don't allow unmounting the root file system.
1935 */
1936 if (mp->mnt_flag & MNT_ROOTFS) {
1937 error = EBUSY; /* the root is always busy */
1938 goto out;
1939 }
1940
1941#ifdef CONFIG_IMGSRC_ACCESS
1942 if (mp->mnt_kern_flag & MNTK_BACKS_ROOT) {
1943 error = EBUSY;
1944 goto out;
1945 }
1946#endif /* CONFIG_IMGSRC_ACCESS */
1947
1948 return (dounmount(mp, flags, 1, ctx));
1949
1950out:
1951 mount_drop(mp, 0);
1952 return(error);
1953}
1954
1955/*
1956 * Do the actual file system unmount.
1957 */
1958int
1959dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx)
1960{
1961 vnode_t coveredvp = (vnode_t)0;
1962 int error;
1963 int needwakeup = 0;
1964 int forcedunmount = 0;
1965 int lflags = 0;
1966 struct vnode *devvp = NULLVP;
1967#if CONFIG_TRIGGERS
1968 proc_t p = vfs_context_proc(ctx);
1969 int did_vflush = 0;
1970 int pflags_save = 0;
1971#endif /* CONFIG_TRIGGERS */
1972
1973#if CONFIG_FSE
1974 if (!(flags & MNT_FORCE)) {
1975 fsevent_unmount(mp, ctx); /* has to come first! */
1976 }
1977#endif
1978
1979 mount_lock(mp);
1980
1981 /*
1982 * If already an unmount in progress just return EBUSY.
1983 * Even a forced unmount cannot override.
1984 */
1985 if (mp->mnt_lflag & MNT_LUNMOUNT) {
1986 if (withref != 0)
1987 mount_drop(mp, 1);
1988 mount_unlock(mp);
1989 return (EBUSY);
1990 }
1991
1992 if (flags & MNT_FORCE) {
1993 forcedunmount = 1;
1994 mp->mnt_lflag |= MNT_LFORCE;
1995 }
1996
1997#if CONFIG_TRIGGERS
1998 if (flags & MNT_NOBLOCK && p != kernproc)
1999 pflags_save = OSBitOrAtomic(P_NOREMOTEHANG, &p->p_flag);
2000#endif
2001
2002 mp->mnt_kern_flag |= MNTK_UNMOUNT;
2003 mp->mnt_lflag |= MNT_LUNMOUNT;
2004 mp->mnt_flag &=~ MNT_ASYNC;
2005 /*
2006 * anyone currently in the fast path that
2007 * trips over the cached rootvp will be
2008 * dumped out and forced into the slow path
2009 * to regenerate a new cached value
2010 */
2011 mp->mnt_realrootvp = NULLVP;
2012 mount_unlock(mp);
2013
2014 if (forcedunmount && (flags & MNT_LNOSUB) == 0) {
2015 /*
2016 * Force unmount any mounts in this filesystem.
2017 * If any unmounts fail - just leave them dangling.
2018 * Avoids recursion.
2019 */
2020 (void) dounmount_submounts(mp, flags | MNT_LNOSUB, ctx);
2021 }
2022
2023 /*
2024 * taking the name_cache_lock exclusively will
2025 * insure that everyone is out of the fast path who
2026 * might be trying to use a now stale copy of
2027 * vp->v_mountedhere->mnt_realrootvp
2028 * bumping mount_generation causes the cached values
2029 * to be invalidated
2030 */
2031 name_cache_lock();
2032 mount_generation++;
2033 name_cache_unlock();
2034
2035
2036 lck_rw_lock_exclusive(&mp->mnt_rwlock);
2037 if (withref != 0)
2038 mount_drop(mp, 0);
2039 error = 0;
2040 if (forcedunmount == 0) {
2041 ubc_umount(mp); /* release cached vnodes */
2042 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
2043 error = VFS_SYNC(mp, MNT_WAIT, ctx);
2044 if (error) {
2045 mount_lock(mp);
2046 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
2047 mp->mnt_lflag &= ~MNT_LUNMOUNT;
2048 mp->mnt_lflag &= ~MNT_LFORCE;
2049 goto out;
2050 }
2051 }
2052 }
2053
2054 /* free disk_conditioner_info structure for this mount */
2055 disk_conditioner_unmount(mp);
2056
2057 IOBSDMountChange(mp, kIOMountChangeUnmount);
2058
2059#if CONFIG_TRIGGERS
2060 vfs_nested_trigger_unmounts(mp, flags, ctx);
2061 did_vflush = 1;
2062#endif
2063 if (forcedunmount)
2064 lflags |= FORCECLOSE;
2065 error = vflush(mp, NULLVP, SKIPSWAP | SKIPSYSTEM | SKIPROOT | lflags);
2066 if ((forcedunmount == 0) && error) {
2067 mount_lock(mp);
2068 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
2069 mp->mnt_lflag &= ~MNT_LUNMOUNT;
2070 mp->mnt_lflag &= ~MNT_LFORCE;
2071 goto out;
2072 }
2073
2074 /* make sure there are no one in the mount iterations or lookup */
2075 mount_iterdrain(mp);
2076
2077 error = VFS_UNMOUNT(mp, flags, ctx);
2078 if (error) {
2079 mount_iterreset(mp);
2080 mount_lock(mp);
2081 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
2082 mp->mnt_lflag &= ~MNT_LUNMOUNT;
2083 mp->mnt_lflag &= ~MNT_LFORCE;
2084 goto out;
2085 }
2086
2087 /* increment the operations count */
2088 if (!error)
2089 OSAddAtomic(1, &vfs_nummntops);
2090
2091 if ( mp->mnt_devvp && mp->mnt_vtable->vfc_vfsflags & VFC_VFSLOCALARGS) {
2092 /* hold an io reference and drop the usecount before close */
2093 devvp = mp->mnt_devvp;
2094 vnode_getalways(devvp);
2095 vnode_rele(devvp);
2096 VNOP_CLOSE(devvp, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
2097 ctx);
2098 vnode_clearmountedon(devvp);
2099 vnode_put(devvp);
2100 }
2101 lck_rw_done(&mp->mnt_rwlock);
2102 mount_list_remove(mp);
2103 lck_rw_lock_exclusive(&mp->mnt_rwlock);
2104
2105 /* mark the mount point hook in the vp but not drop the ref yet */
2106 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
2107 /*
2108 * The covered vnode needs special handling. Trying to get an
2109 * iocount must not block here as this may lead to deadlocks
2110 * if the Filesystem to which the covered vnode belongs is
2111 * undergoing forced unmounts. Since we hold a usecount, the
2112 * vnode cannot be reused (it can, however, still be terminated)
2113 */
2114 vnode_getalways(coveredvp);
2115 vnode_lock_spin(coveredvp);
2116
2117 mp->mnt_crossref++;
2118 coveredvp->v_mountedhere = (struct mount *)0;
2119 CLR(coveredvp->v_flag, VMOUNT);
2120
2121 vnode_unlock(coveredvp);
2122 vnode_put(coveredvp);
2123 }
2124
2125 mount_list_lock();
2126 mp->mnt_vtable->vfc_refcount--;
2127 mount_list_unlock();
2128
2129 cache_purgevfs(mp); /* remove cache entries for this file sys */
2130 vfs_event_signal(NULL, VQ_UNMOUNT, (intptr_t)NULL);
2131 mount_lock(mp);
2132 mp->mnt_lflag |= MNT_LDEAD;
2133
2134 if (mp->mnt_lflag & MNT_LWAIT) {
2135 /*
2136 * do the wakeup here
2137 * in case we block in mount_refdrain
2138 * which will drop the mount lock
2139 * and allow anyone blocked in vfs_busy
2140 * to wakeup and see the LDEAD state
2141 */
2142 mp->mnt_lflag &= ~MNT_LWAIT;
2143 wakeup((caddr_t)mp);
2144 }
2145 mount_refdrain(mp);
2146out:
2147 if (mp->mnt_lflag & MNT_LWAIT) {
2148 mp->mnt_lflag &= ~MNT_LWAIT;
2149 needwakeup = 1;
2150 }
2151
2152#if CONFIG_TRIGGERS
2153 if (flags & MNT_NOBLOCK && p != kernproc) {
2154 // Restore P_NOREMOTEHANG bit to its previous value
2155 if ((pflags_save & P_NOREMOTEHANG) == 0)
2156 OSBitAndAtomic(~((uint32_t) P_NOREMOTEHANG), &p->p_flag);
2157 }
2158
2159 /*
2160 * Callback and context are set together under the mount lock, and
2161 * never cleared, so we're safe to examine them here, drop the lock,
2162 * and call out.
2163 */
2164 if (mp->mnt_triggercallback != NULL) {
2165 mount_unlock(mp);
2166 if (error == 0) {
2167 mp->mnt_triggercallback(mp, VTC_RELEASE, mp->mnt_triggerdata, ctx);
2168 } else if (did_vflush) {
2169 mp->mnt_triggercallback(mp, VTC_REPLACE, mp->mnt_triggerdata, ctx);
2170 }
2171 } else {
2172 mount_unlock(mp);
2173 }
2174#else
2175 mount_unlock(mp);
2176#endif /* CONFIG_TRIGGERS */
2177
2178 lck_rw_done(&mp->mnt_rwlock);
2179
2180 if (needwakeup)
2181 wakeup((caddr_t)mp);
2182
2183 if (!error) {
2184 if ((coveredvp != NULLVP)) {
2185 vnode_t pvp = NULLVP;
2186
2187 /*
2188 * The covered vnode needs special handling. Trying to
2189 * get an iocount must not block here as this may lead
2190 * to deadlocks if the Filesystem to which the covered
2191 * vnode belongs is undergoing forced unmounts. Since we
2192 * hold a usecount, the vnode cannot be reused
2193 * (it can, however, still be terminated).
2194 */
2195 vnode_getalways(coveredvp);
2196
2197 mount_dropcrossref(mp, coveredvp, 0);
2198 /*
2199 * We'll _try_ to detect if this really needs to be
2200 * done. The coveredvp can only be in termination (or
2201 * terminated) if the coveredvp's mount point is in a
2202 * forced unmount (or has been) since we still hold the
2203 * ref.
2204 */
2205 if (!vnode_isrecycled(coveredvp)) {
2206 pvp = vnode_getparent(coveredvp);
2207#if CONFIG_TRIGGERS
2208 if (coveredvp->v_resolve) {
2209 vnode_trigger_rearm(coveredvp, ctx);
2210 }
2211#endif
2212 }
2213
2214 vnode_rele(coveredvp);
2215 vnode_put(coveredvp);
2216 coveredvp = NULLVP;
2217
2218 if (pvp) {
2219 lock_vnode_and_post(pvp, NOTE_WRITE);
2220 vnode_put(pvp);
2221 }
2222 } else if (mp->mnt_flag & MNT_ROOTFS) {
2223 mount_lock_destroy(mp);
2224#if CONFIG_MACF
2225 mac_mount_label_destroy(mp);
2226#endif
2227 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
2228 } else
2229 panic("dounmount: no coveredvp");
2230 }
2231 return (error);
2232}
2233
2234/*
2235 * Unmount any mounts in this filesystem.
2236 */
2237void
2238dounmount_submounts(struct mount *mp, int flags, vfs_context_t ctx)
2239{
2240 mount_t smp;
2241 fsid_t *fsids, fsid;
2242 int fsids_sz;
2243 int count = 0, i, m = 0;
2244 vnode_t vp;
2245
2246 mount_list_lock();
2247
2248 // Get an array to hold the submounts fsids.
2249 TAILQ_FOREACH(smp, &mountlist, mnt_list)
2250 count++;
2251 fsids_sz = count * sizeof(fsid_t);
2252 MALLOC(fsids, fsid_t *, fsids_sz, M_TEMP, M_NOWAIT);
2253 if (fsids == NULL) {
2254 mount_list_unlock();
2255 goto out;
2256 }
2257 fsids[0] = mp->mnt_vfsstat.f_fsid; // Prime the pump
2258
2259 /*
2260 * Fill the array with submount fsids.
2261 * Since mounts are always added to the tail of the mount list, the
2262 * list is always in mount order.
2263 * For each mount check if the mounted-on vnode belongs to a
2264 * mount that's already added to our array of mounts to be unmounted.
2265 */
2266 for (smp = TAILQ_NEXT(mp, mnt_list); smp; smp = TAILQ_NEXT(smp, mnt_list)) {
2267 vp = smp->mnt_vnodecovered;
2268 if (vp == NULL)
2269 continue;
2270 fsid = vnode_mount(vp)->mnt_vfsstat.f_fsid; // Underlying fsid
2271 for (i = 0; i <= m; i++) {
2272 if (fsids[i].val[0] == fsid.val[0] &&
2273 fsids[i].val[1] == fsid.val[1]) {
2274 fsids[++m] = smp->mnt_vfsstat.f_fsid;
2275 break;
2276 }
2277 }
2278 }
2279 mount_list_unlock();
2280
2281 // Unmount the submounts in reverse order. Ignore errors.
2282 for (i = m; i > 0; i--) {
2283 smp = mount_list_lookupby_fsid(&fsids[i], 0, 1);
2284 if (smp) {
2285 mount_ref(smp, 0);
2286 mount_iterdrop(smp);
2287 (void) dounmount(smp, flags, 1, ctx);
2288 }
2289 }
2290out:
2291 if (fsids)
2292 FREE(fsids, M_TEMP);
2293}
2294
2295void
2296mount_dropcrossref(mount_t mp, vnode_t dp, int need_put)
2297{
2298 vnode_lock(dp);
2299 mp->mnt_crossref--;
2300
2301 if (mp->mnt_crossref < 0)
2302 panic("mount cross refs -ve");
2303
2304 if ((mp != dp->v_mountedhere) && (mp->mnt_crossref == 0)) {
2305
2306 if (need_put)
2307 vnode_put_locked(dp);
2308 vnode_unlock(dp);
2309
2310 mount_lock_destroy(mp);
2311#if CONFIG_MACF
2312 mac_mount_label_destroy(mp);
2313#endif
2314 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
2315 return;
2316 }
2317 if (need_put)
2318 vnode_put_locked(dp);
2319 vnode_unlock(dp);
2320}
2321
2322
2323/*
2324 * Sync each mounted filesystem.
2325 */
2326#if DIAGNOSTIC
2327int syncprt = 0;
2328#endif
2329
2330int print_vmpage_stat=0;
2331
2332static int
2333sync_callback(mount_t mp, __unused void *arg)
2334{
2335 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
2336 int asyncflag = mp->mnt_flag & MNT_ASYNC;
2337
2338 mp->mnt_flag &= ~MNT_ASYNC;
2339 VFS_SYNC(mp, arg ? MNT_WAIT : MNT_NOWAIT, vfs_context_kernel());
2340 if (asyncflag)
2341 mp->mnt_flag |= MNT_ASYNC;
2342 }
2343
2344 return (VFS_RETURNED);
2345}
2346
2347/* ARGSUSED */
2348int
2349sync(__unused proc_t p, __unused struct sync_args *uap, __unused int32_t *retval)
2350{
2351 vfs_iterate(LK_NOWAIT, sync_callback, NULL);
2352
2353 if (print_vmpage_stat) {
2354 vm_countdirtypages();
2355 }
2356
2357#if DIAGNOSTIC
2358 if (syncprt)
2359 vfs_bufstats();
2360#endif /* DIAGNOSTIC */
2361 return 0;
2362}
2363
2364typedef enum {
2365 SYNC_ALL = 0,
2366 SYNC_ONLY_RELIABLE_MEDIA = 1,
2367 SYNC_ONLY_UNRELIABLE_MEDIA = 2
2368} sync_type_t;
2369
2370static int
2371sync_internal_callback(mount_t mp, void *arg)
2372{
2373 if (arg) {
2374 int is_reliable = !(mp->mnt_kern_flag & MNTK_VIRTUALDEV) &&
2375 (mp->mnt_flag & MNT_LOCAL);
2376 sync_type_t sync_type = *((sync_type_t *)arg);
2377
2378 if ((sync_type == SYNC_ONLY_RELIABLE_MEDIA) && !is_reliable)
2379 return (VFS_RETURNED);
2380 else if ((sync_type = SYNC_ONLY_UNRELIABLE_MEDIA) && is_reliable)
2381 return (VFS_RETURNED);
2382 }
2383
2384 (void)sync_callback(mp, NULL);
2385
2386 return (VFS_RETURNED);
2387}
2388
2389int sync_thread_state = 0;
2390int sync_timeout_seconds = 5;
2391
2392#define SYNC_THREAD_RUN 0x0001
2393#define SYNC_THREAD_RUNNING 0x0002
2394
2395static void
2396sync_thread(__unused void *arg, __unused wait_result_t wr)
2397{
2398 sync_type_t sync_type;
2399
2400 lck_mtx_lock(sync_mtx_lck);
2401 while (sync_thread_state & SYNC_THREAD_RUN) {
2402 sync_thread_state &= ~SYNC_THREAD_RUN;
2403 lck_mtx_unlock(sync_mtx_lck);
2404
2405 sync_type = SYNC_ONLY_RELIABLE_MEDIA;
2406 vfs_iterate(LK_NOWAIT, sync_internal_callback, &sync_type);
2407 sync_type = SYNC_ONLY_UNRELIABLE_MEDIA;
2408 vfs_iterate(LK_NOWAIT, sync_internal_callback, &sync_type);
2409
2410 lck_mtx_lock(sync_mtx_lck);
2411 }
2412 /*
2413 * This wakeup _has_ to be issued before the lock is released otherwise
2414 * we may end up waking up a thread in sync_internal which is
2415 * expecting a wakeup from a thread it just created and not from this
2416 * thread which is about to exit.
2417 */
2418 wakeup(&sync_thread_state);
2419 sync_thread_state &= ~SYNC_THREAD_RUNNING;
2420 lck_mtx_unlock(sync_mtx_lck);
2421
2422 if (print_vmpage_stat) {
2423 vm_countdirtypages();
2424 }
2425
2426#if DIAGNOSTIC
2427 if (syncprt)
2428 vfs_bufstats();
2429#endif /* DIAGNOSTIC */
2430}
2431
2432struct timeval sync_timeout_last_print = {0, 0};
2433
2434/*
2435 * An in-kernel sync for power management to call.
2436 * This function always returns within sync_timeout seconds.
2437 */
2438__private_extern__ int
2439sync_internal(void)
2440{
2441 thread_t thd;
2442 int error;
2443 int thread_created = FALSE;
2444 struct timespec ts = {sync_timeout_seconds, 0};
2445
2446 lck_mtx_lock(sync_mtx_lck);
2447 sync_thread_state |= SYNC_THREAD_RUN;
2448 if (!(sync_thread_state & SYNC_THREAD_RUNNING)) {
2449 int kr;
2450
2451 sync_thread_state |= SYNC_THREAD_RUNNING;
2452 kr = kernel_thread_start(sync_thread, NULL, &thd);
2453 if (kr != KERN_SUCCESS) {
2454 sync_thread_state &= ~SYNC_THREAD_RUNNING;
2455 lck_mtx_unlock(sync_mtx_lck);
2456 printf("sync_thread failed\n");
2457 return (0);
2458 }
2459 thread_created = TRUE;
2460 }
2461
2462 error = msleep((caddr_t)&sync_thread_state, sync_mtx_lck,
2463 (PVFS | PDROP | PCATCH), "sync_thread", &ts);
2464 if (error) {
2465 struct timeval now;
2466
2467 microtime(&now);
2468 if (now.tv_sec - sync_timeout_last_print.tv_sec > 120) {
2469 printf("sync timed out: %d sec\n", sync_timeout_seconds);
2470 sync_timeout_last_print.tv_sec = now.tv_sec;
2471 }
2472 }
2473
2474 if (thread_created)
2475 thread_deallocate(thd);
2476
2477 return (0);
2478} /* end of sync_internal call */
2479
2480/*
2481 * Change filesystem quotas.
2482 */
2483#if QUOTA
2484int
2485quotactl(proc_t p, struct quotactl_args *uap, __unused int32_t *retval)
2486{
2487 struct mount *mp;
2488 int error, quota_cmd, quota_status = 0;
2489 caddr_t datap;
2490 size_t fnamelen;
2491 struct nameidata nd;
2492 vfs_context_t ctx = vfs_context_current();
2493 struct dqblk my_dqblk = {};
2494
2495 AUDIT_ARG(uid, uap->uid);
2496 AUDIT_ARG(cmd, uap->cmd);
2497 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
2498 uap->path, ctx);
2499 error = namei(&nd);
2500 if (error)
2501 return (error);
2502 mp = nd.ni_vp->v_mount;
2503 vnode_put(nd.ni_vp);
2504 nameidone(&nd);
2505
2506 /* copyin any data we will need for downstream code */
2507 quota_cmd = uap->cmd >> SUBCMDSHIFT;
2508
2509 switch (quota_cmd) {
2510 case Q_QUOTAON:
2511 /* uap->arg specifies a file from which to take the quotas */
2512 fnamelen = MAXPATHLEN;
2513 datap = kalloc(MAXPATHLEN);
2514 error = copyinstr(uap->arg, datap, MAXPATHLEN, &fnamelen);
2515 break;
2516 case Q_GETQUOTA:
2517 /* uap->arg is a pointer to a dqblk structure. */
2518 datap = (caddr_t) &my_dqblk;
2519 break;
2520 case Q_SETQUOTA:
2521 case Q_SETUSE:
2522 /* uap->arg is a pointer to a dqblk structure. */
2523 datap = (caddr_t) &my_dqblk;
2524 if (proc_is64bit(p)) {
2525 struct user_dqblk my_dqblk64;
2526 error = copyin(uap->arg, (caddr_t)&my_dqblk64, sizeof (my_dqblk64));
2527 if (error == 0) {
2528 munge_dqblk(&my_dqblk, &my_dqblk64, FALSE);
2529 }
2530 }
2531 else {
2532 error = copyin(uap->arg, (caddr_t)&my_dqblk, sizeof (my_dqblk));
2533 }
2534 break;
2535 case Q_QUOTASTAT:
2536 /* uap->arg is a pointer to an integer */
2537 datap = (caddr_t) &quota_status;
2538 break;
2539 default:
2540 datap = NULL;
2541 break;
2542 } /* switch */
2543
2544 if (error == 0) {
2545 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, datap, ctx);
2546 }
2547
2548 switch (quota_cmd) {
2549 case Q_QUOTAON:
2550 if (datap != NULL)
2551 kfree(datap, MAXPATHLEN);
2552 break;
2553 case Q_GETQUOTA:
2554 /* uap->arg is a pointer to a dqblk structure we need to copy out to */
2555 if (error == 0) {
2556 if (proc_is64bit(p)) {
2557 struct user_dqblk my_dqblk64;
2558
2559 memset(&my_dqblk64, 0, sizeof(my_dqblk64));
2560 munge_dqblk(&my_dqblk, &my_dqblk64, TRUE);
2561 error = copyout((caddr_t)&my_dqblk64, uap->arg, sizeof (my_dqblk64));
2562 }
2563 else {
2564 error = copyout(datap, uap->arg, sizeof (struct dqblk));
2565 }
2566 }
2567 break;
2568 case Q_QUOTASTAT:
2569 /* uap->arg is a pointer to an integer */
2570 if (error == 0) {
2571 error = copyout(datap, uap->arg, sizeof(quota_status));
2572 }
2573 break;
2574 default:
2575 break;
2576 } /* switch */
2577
2578 return (error);
2579}
2580#else
2581int
2582quotactl(__unused proc_t p, __unused struct quotactl_args *uap, __unused int32_t *retval)
2583{
2584 return (EOPNOTSUPP);
2585}
2586#endif /* QUOTA */
2587
2588/*
2589 * Get filesystem statistics.
2590 *
2591 * Returns: 0 Success
2592 * namei:???
2593 * vfs_update_vfsstat:???
2594 * munge_statfs:EFAULT
2595 */
2596/* ARGSUSED */
2597int
2598statfs(__unused proc_t p, struct statfs_args *uap, __unused int32_t *retval)
2599{
2600 struct mount *mp;
2601 struct vfsstatfs *sp;
2602 int error;
2603 struct nameidata nd;
2604 vfs_context_t ctx = vfs_context_current();
2605 vnode_t vp;
2606
2607 NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1,
2608 UIO_USERSPACE, uap->path, ctx);
2609 error = namei(&nd);
2610 if (error != 0)
2611 return (error);
2612 vp = nd.ni_vp;
2613 mp = vp->v_mount;
2614 sp = &mp->mnt_vfsstat;
2615 nameidone(&nd);
2616
2617#if CONFIG_MACF
2618 error = mac_mount_check_stat(ctx, mp);
2619 if (error != 0)
2620 return (error);
2621#endif
2622
2623 error = vfs_update_vfsstat(mp, ctx, VFS_USER_EVENT);
2624 if (error != 0) {
2625 vnode_put(vp);
2626 return (error);
2627 }
2628
2629 error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
2630 vnode_put(vp);
2631 return (error);
2632}
2633
2634/*
2635 * Get filesystem statistics.
2636 */
2637/* ARGSUSED */
2638int
2639fstatfs(__unused proc_t p, struct fstatfs_args *uap, __unused int32_t *retval)
2640{
2641 vnode_t vp;
2642 struct mount *mp;
2643 struct vfsstatfs *sp;
2644 int error;
2645
2646 AUDIT_ARG(fd, uap->fd);
2647
2648 if ( (error = file_vnode(uap->fd, &vp)) )
2649 return (error);
2650
2651 error = vnode_getwithref(vp);
2652 if (error) {
2653 file_drop(uap->fd);
2654 return (error);
2655 }
2656
2657 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2658
2659 mp = vp->v_mount;
2660 if (!mp) {
2661 error = EBADF;
2662 goto out;
2663 }
2664
2665#if CONFIG_MACF
2666 error = mac_mount_check_stat(vfs_context_current(), mp);
2667 if (error != 0)
2668 goto out;
2669#endif
2670
2671 sp = &mp->mnt_vfsstat;
2672 if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
2673 goto out;
2674 }
2675
2676 error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
2677
2678out:
2679 file_drop(uap->fd);
2680 vnode_put(vp);
2681
2682 return (error);
2683}
2684
2685/*
2686 * Common routine to handle copying of statfs64 data to user space
2687 */
2688static int
2689statfs64_common(struct mount *mp, struct vfsstatfs *sfsp, user_addr_t bufp)
2690{
2691 int error;
2692 struct statfs64 sfs;
2693
2694 bzero(&sfs, sizeof(sfs));
2695
2696 sfs.f_bsize = sfsp->f_bsize;
2697 sfs.f_iosize = (int32_t)sfsp->f_iosize;
2698 sfs.f_blocks = sfsp->f_blocks;
2699 sfs.f_bfree = sfsp->f_bfree;
2700 sfs.f_bavail = sfsp->f_bavail;
2701 sfs.f_files = sfsp->f_files;
2702 sfs.f_ffree = sfsp->f_ffree;
2703 sfs.f_fsid = sfsp->f_fsid;
2704 sfs.f_owner = sfsp->f_owner;
2705 sfs.f_type = mp->mnt_vtable->vfc_typenum;
2706 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
2707 sfs.f_fssubtype = sfsp->f_fssubtype;
2708 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
2709 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN);
2710 } else {
2711 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSTYPENAMELEN);
2712 }
2713 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MAXPATHLEN);
2714 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MAXPATHLEN);
2715
2716 error = copyout((caddr_t)&sfs, bufp, sizeof(sfs));
2717
2718 return(error);
2719}
2720
2721/*
2722 * Get file system statistics in 64-bit mode
2723 */
2724int
2725statfs64(__unused struct proc *p, struct statfs64_args *uap, __unused int32_t *retval)
2726{
2727 struct mount *mp;
2728 struct vfsstatfs *sp;
2729 int error;
2730 struct nameidata nd;
2731 vfs_context_t ctxp = vfs_context_current();
2732 vnode_t vp;
2733
2734 NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1,
2735 UIO_USERSPACE, uap->path, ctxp);
2736 error = namei(&nd);
2737 if (error != 0)
2738 return (error);
2739 vp = nd.ni_vp;
2740 mp = vp->v_mount;
2741 sp = &mp->mnt_vfsstat;
2742 nameidone(&nd);
2743
2744#if CONFIG_MACF
2745 error = mac_mount_check_stat(ctxp, mp);
2746 if (error != 0)
2747 return (error);
2748#endif
2749
2750 error = vfs_update_vfsstat(mp, ctxp, VFS_USER_EVENT);
2751 if (error != 0) {
2752 vnode_put(vp);
2753 return (error);
2754 }
2755
2756 error = statfs64_common(mp, sp, uap->buf);
2757 vnode_put(vp);
2758
2759 return (error);
2760}
2761
2762/*
2763 * Get file system statistics in 64-bit mode
2764 */
2765int
2766fstatfs64(__unused struct proc *p, struct fstatfs64_args *uap, __unused int32_t *retval)
2767{
2768 struct vnode *vp;
2769 struct mount *mp;
2770 struct vfsstatfs *sp;
2771 int error;
2772
2773 AUDIT_ARG(fd, uap->fd);
2774
2775 if ( (error = file_vnode(uap->fd, &vp)) )
2776 return (error);
2777
2778 error = vnode_getwithref(vp);
2779 if (error) {
2780 file_drop(uap->fd);
2781 return (error);
2782 }
2783
2784 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2785
2786 mp = vp->v_mount;
2787 if (!mp) {
2788 error = EBADF;
2789 goto out;
2790 }
2791
2792#if CONFIG_MACF
2793 error = mac_mount_check_stat(vfs_context_current(), mp);
2794 if (error != 0)
2795 goto out;
2796#endif
2797
2798 sp = &mp->mnt_vfsstat;
2799 if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
2800 goto out;
2801 }
2802
2803 error = statfs64_common(mp, sp, uap->buf);
2804
2805out:
2806 file_drop(uap->fd);
2807 vnode_put(vp);
2808
2809 return (error);
2810}
2811
2812struct getfsstat_struct {
2813 user_addr_t sfsp;
2814 user_addr_t *mp;
2815 int count;
2816 int maxcount;
2817 int flags;
2818 int error;
2819};
2820
2821
2822static int
2823getfsstat_callback(mount_t mp, void * arg)
2824{
2825
2826 struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
2827 struct vfsstatfs *sp;
2828 int error, my_size;
2829 vfs_context_t ctx = vfs_context_current();
2830
2831 if (fstp->sfsp && fstp->count < fstp->maxcount) {
2832#if CONFIG_MACF
2833 error = mac_mount_check_stat(ctx, mp);
2834 if (error != 0) {
2835 fstp->error = error;
2836 return(VFS_RETURNED_DONE);
2837 }
2838#endif
2839 sp = &mp->mnt_vfsstat;
2840 /*
2841 * If MNT_NOWAIT is specified, do not refresh the
2842 * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT.
2843 */
2844 if (((fstp->flags & MNT_NOWAIT) == 0 || (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
2845 (error = vfs_update_vfsstat(mp, ctx,
2846 VFS_USER_EVENT))) {
2847 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
2848 return(VFS_RETURNED);
2849 }
2850
2851 /*
2852 * Need to handle LP64 version of struct statfs
2853 */
2854 error = munge_statfs(mp, sp, fstp->sfsp, &my_size, IS_64BIT_PROCESS(vfs_context_proc(ctx)), FALSE);
2855 if (error) {
2856 fstp->error = error;
2857 return(VFS_RETURNED_DONE);
2858 }
2859 fstp->sfsp += my_size;
2860
2861 if (fstp->mp) {
2862#if CONFIG_MACF
2863 error = mac_mount_label_get(mp, *fstp->mp);
2864 if (error) {
2865 fstp->error = error;
2866 return(VFS_RETURNED_DONE);
2867 }
2868#endif
2869 fstp->mp++;
2870 }
2871 }
2872 fstp->count++;
2873 return(VFS_RETURNED);
2874}
2875
2876/*
2877 * Get statistics on all filesystems.
2878 */
2879int
2880getfsstat(__unused proc_t p, struct getfsstat_args *uap, int *retval)
2881{
2882 struct __mac_getfsstat_args muap;
2883
2884 muap.buf = uap->buf;
2885 muap.bufsize = uap->bufsize;
2886 muap.mac = USER_ADDR_NULL;
2887 muap.macsize = 0;
2888 muap.flags = uap->flags;
2889
2890 return (__mac_getfsstat(p, &muap, retval));
2891}
2892
2893/*
2894 * __mac_getfsstat: Get MAC-related file system statistics
2895 *
2896 * Parameters: p (ignored)
2897 * uap User argument descriptor (see below)
2898 * retval Count of file system statistics (N stats)
2899 *
2900 * Indirect: uap->bufsize Buffer size
2901 * uap->macsize MAC info size
2902 * uap->buf Buffer where information will be returned
2903 * uap->mac MAC info
2904 * uap->flags File system flags
2905 *
2906 *
2907 * Returns: 0 Success
2908 * !0 Not success
2909 *
2910 */
2911int
2912__mac_getfsstat(__unused proc_t p, struct __mac_getfsstat_args *uap, int *retval)
2913{
2914 user_addr_t sfsp;
2915 user_addr_t *mp;
2916 size_t count, maxcount, bufsize, macsize;
2917 struct getfsstat_struct fst;
2918
2919 bufsize = (size_t) uap->bufsize;
2920 macsize = (size_t) uap->macsize;
2921
2922 if (IS_64BIT_PROCESS(p)) {
2923 maxcount = bufsize / sizeof(struct user64_statfs);
2924 }
2925 else {
2926 maxcount = bufsize / sizeof(struct user32_statfs);
2927 }
2928 sfsp = uap->buf;
2929 count = 0;
2930
2931 mp = NULL;
2932
2933#if CONFIG_MACF
2934 if (uap->mac != USER_ADDR_NULL) {
2935 u_int32_t *mp0;
2936 int error;
2937 unsigned int i;
2938
2939 count = (macsize / (IS_64BIT_PROCESS(p) ? 8 : 4));
2940 if (count != maxcount)
2941 return (EINVAL);
2942
2943 /* Copy in the array */
2944 MALLOC(mp0, u_int32_t *, macsize, M_MACTEMP, M_WAITOK);
2945 if (mp0 == NULL) {
2946 return (ENOMEM);
2947 }
2948
2949 error = copyin(uap->mac, mp0, macsize);
2950 if (error) {
2951 FREE(mp0, M_MACTEMP);
2952 return (error);
2953 }
2954
2955 /* Normalize to an array of user_addr_t */
2956 MALLOC(mp, user_addr_t *, count * sizeof(user_addr_t), M_MACTEMP, M_WAITOK);
2957 if (mp == NULL) {
2958 FREE(mp0, M_MACTEMP);
2959 return (ENOMEM);
2960 }
2961
2962 for (i = 0; i < count; i++) {
2963 if (IS_64BIT_PROCESS(p))
2964 mp[i] = ((user_addr_t *)mp0)[i];
2965 else
2966 mp[i] = (user_addr_t)mp0[i];
2967 }
2968 FREE(mp0, M_MACTEMP);
2969 }
2970#endif
2971
2972
2973 fst.sfsp = sfsp;
2974 fst.mp = mp;
2975 fst.flags = uap->flags;
2976 fst.count = 0;
2977 fst.error = 0;
2978 fst.maxcount = maxcount;
2979
2980
2981 vfs_iterate(0, getfsstat_callback, &fst);
2982
2983 if (mp)
2984 FREE(mp, M_MACTEMP);
2985
2986 if (fst.error ) {
2987 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
2988 return(fst.error);
2989 }
2990
2991 if (fst.sfsp && fst.count > fst.maxcount)
2992 *retval = fst.maxcount;
2993 else
2994 *retval = fst.count;
2995 return (0);
2996}
2997
2998static int
2999getfsstat64_callback(mount_t mp, void * arg)
3000{
3001 struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
3002 struct vfsstatfs *sp;
3003 int error;
3004
3005 if (fstp->sfsp && fstp->count < fstp->maxcount) {
3006#if CONFIG_MACF
3007 error = mac_mount_check_stat(vfs_context_current(), mp);
3008 if (error != 0) {
3009 fstp->error = error;
3010 return(VFS_RETURNED_DONE);
3011 }
3012#endif
3013 sp = &mp->mnt_vfsstat;
3014 /*
3015 * If MNT_NOWAIT is specified, do not refresh the fsstat
3016 * cache. MNT_WAIT overrides MNT_NOWAIT.
3017 *
3018 * We treat MNT_DWAIT as MNT_WAIT for all instances of
3019 * getfsstat, since the constants are out of the same
3020 * namespace.
3021 */
3022 if (((fstp->flags & MNT_NOWAIT) == 0 ||
3023 (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
3024 (error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT))) {
3025 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
3026 return(VFS_RETURNED);
3027 }
3028
3029 error = statfs64_common(mp, sp, fstp->sfsp);
3030 if (error) {
3031 fstp->error = error;
3032 return(VFS_RETURNED_DONE);
3033 }
3034 fstp->sfsp += sizeof(struct statfs64);
3035 }
3036 fstp->count++;
3037 return(VFS_RETURNED);
3038}
3039
3040/*
3041 * Get statistics on all file systems in 64 bit mode.
3042 */
3043int
3044getfsstat64(__unused proc_t p, struct getfsstat64_args *uap, int *retval)
3045{
3046 user_addr_t sfsp;
3047 int count, maxcount;
3048 struct getfsstat_struct fst;
3049
3050 maxcount = uap->bufsize / sizeof(struct statfs64);
3051
3052 sfsp = uap->buf;
3053 count = 0;
3054
3055 fst.sfsp = sfsp;
3056 fst.flags = uap->flags;
3057 fst.count = 0;
3058 fst.error = 0;
3059 fst.maxcount = maxcount;
3060
3061 vfs_iterate(0, getfsstat64_callback, &fst);
3062
3063 if (fst.error ) {
3064 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
3065 return(fst.error);
3066 }
3067
3068 if (fst.sfsp && fst.count > fst.maxcount)
3069 *retval = fst.maxcount;
3070 else
3071 *retval = fst.count;
3072
3073 return (0);
3074}
3075
3076/*
3077 * gets the associated vnode with the file descriptor passed.
3078 * as input
3079 *
3080 * INPUT
3081 * ctx - vfs context of caller
3082 * fd - file descriptor for which vnode is required.
3083 * vpp - Pointer to pointer to vnode to be returned.
3084 *
3085 * The vnode is returned with an iocount so any vnode obtained
3086 * by this call needs a vnode_put
3087 *
3088 */
3089int
3090vnode_getfromfd(vfs_context_t ctx, int fd, vnode_t *vpp)
3091{
3092 int error;
3093 vnode_t vp;
3094 struct fileproc *fp;
3095 proc_t p = vfs_context_proc(ctx);
3096
3097 *vpp = NULLVP;
3098
3099 error = fp_getfvp(p, fd, &fp, &vp);
3100 if (error)
3101 return (error);
3102
3103 error = vnode_getwithref(vp);
3104 if (error) {
3105 (void)fp_drop(p, fd, fp, 0);
3106 return (error);
3107 }
3108
3109 (void)fp_drop(p, fd, fp, 0);
3110 *vpp = vp;
3111 return (error);
3112}
3113
3114/*
3115 * Wrapper function around namei to start lookup from a directory
3116 * specified by a file descriptor ni_dirfd.
3117 *
3118 * In addition to all the errors returned by namei, this call can
3119 * return ENOTDIR if the file descriptor does not refer to a directory.
3120 * and EBADF if the file descriptor is not valid.
3121 */
3122int
3123nameiat(struct nameidata *ndp, int dirfd)
3124{
3125 if ((dirfd != AT_FDCWD) &&
3126 !(ndp->ni_flag & NAMEI_CONTLOOKUP) &&
3127 !(ndp->ni_cnd.cn_flags & USEDVP)) {
3128 int error = 0;
3129 char c;
3130
3131 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
3132 error = copyin(ndp->ni_dirp, &c, sizeof(char));
3133 if (error)
3134 return (error);
3135 } else {
3136 c = *((char *)(ndp->ni_dirp));
3137 }
3138
3139 if (c != '/') {
3140 vnode_t dvp_at;
3141
3142 error = vnode_getfromfd(ndp->ni_cnd.cn_context, dirfd,
3143 &dvp_at);
3144 if (error)
3145 return (error);
3146
3147 if (vnode_vtype(dvp_at) != VDIR) {
3148 vnode_put(dvp_at);
3149 return (ENOTDIR);
3150 }
3151
3152 ndp->ni_dvp = dvp_at;
3153 ndp->ni_cnd.cn_flags |= USEDVP;
3154 error = namei(ndp);
3155 ndp->ni_cnd.cn_flags &= ~USEDVP;
3156 vnode_put(dvp_at);
3157 return (error);
3158 }
3159 }
3160
3161 return (namei(ndp));
3162}
3163
3164/*
3165 * Change current working directory to a given file descriptor.
3166 */
3167/* ARGSUSED */
3168static int
3169common_fchdir(proc_t p, struct fchdir_args *uap, int per_thread)
3170{
3171 struct filedesc *fdp = p->p_fd;
3172 vnode_t vp;
3173 vnode_t tdp;
3174 vnode_t tvp;
3175 struct mount *mp;
3176 int error;
3177 vfs_context_t ctx = vfs_context_current();
3178
3179 AUDIT_ARG(fd, uap->fd);
3180 if (per_thread && uap->fd == -1) {
3181 /*
3182 * Switching back from per-thread to per process CWD; verify we
3183 * in fact have one before proceeding. The only success case
3184 * for this code path is to return 0 preemptively after zapping
3185 * the thread structure contents.
3186 */
3187 thread_t th = vfs_context_thread(ctx);
3188 if (th) {
3189 uthread_t uth = get_bsdthread_info(th);
3190 tvp = uth->uu_cdir;
3191 uth->uu_cdir = NULLVP;
3192 if (tvp != NULLVP) {
3193 vnode_rele(tvp);
3194 return (0);
3195 }
3196 }
3197 return (EBADF);
3198 }
3199
3200 if ( (error = file_vnode(uap->fd, &vp)) )
3201 return(error);
3202 if ( (error = vnode_getwithref(vp)) ) {
3203 file_drop(uap->fd);
3204 return(error);
3205 }
3206
3207 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
3208
3209 if (vp->v_type != VDIR) {
3210 error = ENOTDIR;
3211 goto out;
3212 }
3213
3214#if CONFIG_MACF
3215 error = mac_vnode_check_chdir(ctx, vp);
3216 if (error)
3217 goto out;
3218#endif
3219 error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
3220 if (error)
3221 goto out;
3222
3223 while (!error && (mp = vp->v_mountedhere) != NULL) {
3224 if (vfs_busy(mp, LK_NOWAIT)) {
3225 error = EACCES;
3226 goto out;
3227 }
3228 error = VFS_ROOT(mp, &tdp, ctx);
3229 vfs_unbusy(mp);
3230 if (error)
3231 break;
3232 vnode_put(vp);
3233 vp = tdp;
3234 }
3235 if (error)
3236 goto out;
3237 if ( (error = vnode_ref(vp)) )
3238 goto out;
3239 vnode_put(vp);
3240
3241 if (per_thread) {
3242 thread_t th = vfs_context_thread(ctx);
3243 if (th) {
3244 uthread_t uth = get_bsdthread_info(th);
3245 tvp = uth->uu_cdir;
3246 uth->uu_cdir = vp;
3247 OSBitOrAtomic(P_THCWD, &p->p_flag);
3248 } else {
3249 vnode_rele(vp);
3250 return (ENOENT);
3251 }
3252 } else {
3253 proc_fdlock(p);
3254 tvp = fdp->fd_cdir;
3255 fdp->fd_cdir = vp;
3256 proc_fdunlock(p);
3257 }
3258
3259 if (tvp)
3260 vnode_rele(tvp);
3261 file_drop(uap->fd);
3262
3263 return (0);
3264out:
3265 vnode_put(vp);
3266 file_drop(uap->fd);
3267
3268 return(error);
3269}
3270
3271int
3272fchdir(proc_t p, struct fchdir_args *uap, __unused int32_t *retval)
3273{
3274 return common_fchdir(p, uap, 0);
3275}
3276
3277int
3278__pthread_fchdir(proc_t p, struct __pthread_fchdir_args *uap, __unused int32_t *retval)
3279{
3280 return common_fchdir(p, (void *)uap, 1);
3281}
3282
3283/*
3284 * Change current working directory (".").
3285 *
3286 * Returns: 0 Success
3287 * change_dir:ENOTDIR
3288 * change_dir:???
3289 * vnode_ref:ENOENT No such file or directory
3290 */
3291/* ARGSUSED */
3292static int
3293common_chdir(proc_t p, struct chdir_args *uap, int per_thread)
3294{
3295 struct filedesc *fdp = p->p_fd;
3296 int error;
3297 struct nameidata nd;
3298 vnode_t tvp;
3299 vfs_context_t ctx = vfs_context_current();
3300
3301 NDINIT(&nd, LOOKUP, OP_CHDIR, FOLLOW | AUDITVNPATH1,
3302 UIO_USERSPACE, uap->path, ctx);
3303 error = change_dir(&nd, ctx);
3304 if (error)
3305 return (error);
3306 if ( (error = vnode_ref(nd.ni_vp)) ) {
3307 vnode_put(nd.ni_vp);
3308 return (error);
3309 }
3310 /*
3311 * drop the iocount we picked up in change_dir
3312 */
3313 vnode_put(nd.ni_vp);
3314
3315 if (per_thread) {
3316 thread_t th = vfs_context_thread(ctx);
3317 if (th) {
3318 uthread_t uth = get_bsdthread_info(th);
3319 tvp = uth->uu_cdir;
3320 uth->uu_cdir = nd.ni_vp;
3321 OSBitOrAtomic(P_THCWD, &p->p_flag);
3322 } else {
3323 vnode_rele(nd.ni_vp);
3324 return (ENOENT);
3325 }
3326 } else {
3327 proc_fdlock(p);
3328 tvp = fdp->fd_cdir;
3329 fdp->fd_cdir = nd.ni_vp;
3330 proc_fdunlock(p);
3331 }
3332
3333 if (tvp)
3334 vnode_rele(tvp);
3335
3336 return (0);
3337}
3338
3339
3340/*
3341 * chdir
3342 *
3343 * Change current working directory (".") for the entire process
3344 *
3345 * Parameters: p Process requesting the call
3346 * uap User argument descriptor (see below)
3347 * retval (ignored)
3348 *
3349 * Indirect parameters: uap->path Directory path
3350 *
3351 * Returns: 0 Success
3352 * common_chdir: ENOTDIR
3353 * common_chdir: ENOENT No such file or directory
3354 * common_chdir: ???
3355 *
3356 */
3357int
3358chdir(proc_t p, struct chdir_args *uap, __unused int32_t *retval)
3359{
3360 return common_chdir(p, (void *)uap, 0);
3361}
3362
3363/*
3364 * __pthread_chdir
3365 *
3366 * Change current working directory (".") for a single thread
3367 *
3368 * Parameters: p Process requesting the call
3369 * uap User argument descriptor (see below)
3370 * retval (ignored)
3371 *
3372 * Indirect parameters: uap->path Directory path
3373 *
3374 * Returns: 0 Success
3375 * common_chdir: ENOTDIR
3376 * common_chdir: ENOENT No such file or directory
3377 * common_chdir: ???
3378 *
3379 */
3380int
3381__pthread_chdir(proc_t p, struct __pthread_chdir_args *uap, __unused int32_t *retval)
3382{
3383 return common_chdir(p, (void *)uap, 1);
3384}
3385
3386
3387/*
3388 * Change notion of root (``/'') directory.
3389 */
3390/* ARGSUSED */
3391int
3392chroot(proc_t p, struct chroot_args *uap, __unused int32_t *retval)
3393{
3394 struct filedesc *fdp = p->p_fd;
3395 int error;
3396 struct nameidata nd;
3397 vnode_t tvp;
3398 vfs_context_t ctx = vfs_context_current();
3399
3400 if ((error = suser(kauth_cred_get(), &p->p_acflag)))
3401 return (error);
3402
3403 NDINIT(&nd, LOOKUP, OP_CHROOT, FOLLOW | AUDITVNPATH1,
3404 UIO_USERSPACE, uap->path, ctx);
3405 error = change_dir(&nd, ctx);
3406 if (error)
3407 return (error);
3408
3409#if CONFIG_MACF
3410 error = mac_vnode_check_chroot(ctx, nd.ni_vp,
3411 &nd.ni_cnd);
3412 if (error) {
3413 vnode_put(nd.ni_vp);
3414 return (error);
3415 }
3416#endif
3417
3418 if ( (error = vnode_ref(nd.ni_vp)) ) {
3419 vnode_put(nd.ni_vp);
3420 return (error);
3421 }
3422 vnode_put(nd.ni_vp);
3423
3424 proc_fdlock(p);
3425 tvp = fdp->fd_rdir;
3426 fdp->fd_rdir = nd.ni_vp;
3427 fdp->fd_flags |= FD_CHROOT;
3428 proc_fdunlock(p);
3429
3430 if (tvp != NULL)
3431 vnode_rele(tvp);
3432
3433 return (0);
3434}
3435
3436/*
3437 * Common routine for chroot and chdir.
3438 *
3439 * Returns: 0 Success
3440 * ENOTDIR Not a directory
3441 * namei:??? [anything namei can return]
3442 * vnode_authorize:??? [anything vnode_authorize can return]
3443 */
3444static int
3445change_dir(struct nameidata *ndp, vfs_context_t ctx)
3446{
3447 vnode_t vp;
3448 int error;
3449
3450 if ((error = namei(ndp)))
3451 return (error);
3452 nameidone(ndp);
3453 vp = ndp->ni_vp;
3454
3455 if (vp->v_type != VDIR) {
3456 vnode_put(vp);
3457 return (ENOTDIR);
3458 }
3459
3460#if CONFIG_MACF
3461 error = mac_vnode_check_chdir(ctx, vp);
3462 if (error) {
3463 vnode_put(vp);
3464 return (error);
3465 }
3466#endif
3467
3468 error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
3469 if (error) {
3470 vnode_put(vp);
3471 return (error);
3472 }
3473
3474 return (error);
3475}
3476
3477/*
3478 * Free the vnode data (for directories) associated with the file glob.
3479 */
3480struct fd_vn_data *
3481fg_vn_data_alloc(void)
3482{
3483 struct fd_vn_data *fvdata;
3484
3485 /* Allocate per fd vnode data */
3486 MALLOC(fvdata, struct fd_vn_data *, (sizeof(struct fd_vn_data)),
3487 M_FD_VN_DATA, M_WAITOK | M_ZERO);
3488 lck_mtx_init(&fvdata->fv_lock, fd_vn_lck_grp, fd_vn_lck_attr);
3489 return fvdata;
3490}
3491
3492/*
3493 * Free the vnode data (for directories) associated with the file glob.
3494 */
3495void
3496fg_vn_data_free(void *fgvndata)
3497{
3498 struct fd_vn_data *fvdata = (struct fd_vn_data *)fgvndata;
3499
3500 if (fvdata->fv_buf)
3501 FREE(fvdata->fv_buf, M_FD_DIRBUF);
3502 lck_mtx_destroy(&fvdata->fv_lock, fd_vn_lck_grp);
3503 FREE(fvdata, M_FD_VN_DATA);
3504}
3505
3506/*
3507 * Check permissions, allocate an open file structure,
3508 * and call the device open routine if any.
3509 *
3510 * Returns: 0 Success
3511 * EINVAL
3512 * EINTR
3513 * falloc:ENFILE
3514 * falloc:EMFILE
3515 * falloc:ENOMEM
3516 * vn_open_auth:???
3517 * dupfdopen:???
3518 * VNOP_ADVLOCK:???
3519 * vnode_setsize:???
3520 *
3521 * XXX Need to implement uid, gid
3522 */
3523int
3524open1(vfs_context_t ctx, struct nameidata *ndp, int uflags,
3525 struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra,
3526 int32_t *retval)
3527{
3528 proc_t p = vfs_context_proc(ctx);
3529 uthread_t uu = get_bsdthread_info(vfs_context_thread(ctx));
3530 struct fileproc *fp;
3531 vnode_t vp;
3532 int flags, oflags;
3533 int type, indx, error;
3534 struct flock lf;
3535 struct vfs_context context;
3536
3537 oflags = uflags;
3538
3539 if ((oflags & O_ACCMODE) == O_ACCMODE)
3540 return(EINVAL);
3541
3542 flags = FFLAGS(uflags);
3543 CLR(flags, FENCRYPTED);
3544 CLR(flags, FUNENCRYPTED);
3545
3546 AUDIT_ARG(fflags, oflags);
3547 AUDIT_ARG(mode, vap->va_mode);
3548
3549 if ((error = falloc_withalloc(p,
3550 &fp, &indx, ctx, fp_zalloc, cra)) != 0) {
3551 return (error);
3552 }
3553 uu->uu_dupfd = -indx - 1;
3554
3555 if ((error = vn_open_auth(ndp, &flags, vap))) {
3556 if ((error == ENODEV || error == ENXIO) && (uu->uu_dupfd >= 0)){ /* XXX from fdopen */
3557 if ((error = dupfdopen(p->p_fd, indx, uu->uu_dupfd, flags, error)) == 0) {
3558 fp_drop(p, indx, NULL, 0);
3559 *retval = indx;
3560 return (0);
3561 }
3562 }
3563 if (error == ERESTART)
3564 error = EINTR;
3565 fp_free(p, indx, fp);
3566 return (error);
3567 }
3568 uu->uu_dupfd = 0;
3569 vp = ndp->ni_vp;
3570
3571 fp->f_fglob->fg_flag = flags & (FMASK | O_EVTONLY | FENCRYPTED | FUNENCRYPTED);
3572 fp->f_fglob->fg_ops = &vnops;
3573 fp->f_fglob->fg_data = (caddr_t)vp;
3574
3575 if (flags & (O_EXLOCK | O_SHLOCK)) {
3576 lf.l_whence = SEEK_SET;
3577 lf.l_start = 0;
3578 lf.l_len = 0;
3579 if (flags & O_EXLOCK)
3580 lf.l_type = F_WRLCK;
3581 else
3582 lf.l_type = F_RDLCK;
3583 type = F_FLOCK;
3584 if ((flags & FNONBLOCK) == 0)
3585 type |= F_WAIT;
3586#if CONFIG_MACF
3587 error = mac_file_check_lock(vfs_context_ucred(ctx), fp->f_fglob,
3588 F_SETLK, &lf);
3589 if (error)
3590 goto bad;
3591#endif
3592 if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx, NULL)))
3593 goto bad;
3594 fp->f_fglob->fg_flag |= FHASLOCK;
3595 }
3596
3597#if DEVELOPMENT || DEBUG
3598 /*
3599 * XXX VSWAP: Check for entitlements or special flag here
3600 * so we can restrict access appropriately.
3601 */
3602#else /* DEVELOPMENT || DEBUG */
3603
3604 if (vnode_isswap(vp) && (flags & (FWRITE | O_TRUNC)) && (ctx != vfs_context_kernel())) {
3605 /* block attempt to write/truncate swapfile */
3606 error = EPERM;
3607 goto bad;
3608 }
3609#endif /* DEVELOPMENT || DEBUG */
3610
3611 /* try to truncate by setting the size attribute */
3612 if ((flags & O_TRUNC) && ((error = vnode_setsize(vp, (off_t)0, 0, ctx)) != 0))
3613 goto bad;
3614
3615 /*
3616 * For directories we hold some additional information in the fd.
3617 */
3618 if (vnode_vtype(vp) == VDIR) {
3619 fp->f_fglob->fg_vn_data = fg_vn_data_alloc();
3620 } else {
3621 fp->f_fglob->fg_vn_data = NULL;
3622 }
3623
3624 vnode_put(vp);
3625
3626 /*
3627 * The first terminal open (without a O_NOCTTY) by a session leader
3628 * results in it being set as the controlling terminal.
3629 */
3630 if (vnode_istty(vp) && !(p->p_flag & P_CONTROLT) &&
3631 !(flags & O_NOCTTY)) {
3632 int tmp = 0;
3633
3634 (void)(*fp->f_fglob->fg_ops->fo_ioctl)(fp, (int)TIOCSCTTY,
3635 (caddr_t)&tmp, ctx);
3636 }
3637
3638 proc_fdlock(p);
3639 if (flags & O_CLOEXEC)
3640 *fdflags(p, indx) |= UF_EXCLOSE;
3641 if (flags & O_CLOFORK)
3642 *fdflags(p, indx) |= UF_FORKCLOSE;
3643 procfdtbl_releasefd(p, indx, NULL);
3644
3645#if CONFIG_SECLUDED_MEMORY
3646 if (secluded_for_filecache &&
3647 FILEGLOB_DTYPE(fp->f_fglob) == DTYPE_VNODE &&
3648 vnode_vtype(vp) == VREG) {
3649 memory_object_control_t moc;
3650
3651 moc = ubc_getobject(vp, UBC_FLAGS_NONE);
3652
3653 if (moc == MEMORY_OBJECT_CONTROL_NULL) {
3654 /* nothing to do... */
3655 } else if (fp->f_fglob->fg_flag & FWRITE) {
3656 /* writable -> no longer eligible for secluded pages */
3657 memory_object_mark_eligible_for_secluded(moc,
3658 FALSE);
3659 } else if (secluded_for_filecache == 1) {
3660 char pathname[32] = { 0, };
3661 size_t copied;
3662 /* XXX FBDP: better way to detect /Applications/ ? */
3663 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
3664 copyinstr(ndp->ni_dirp,
3665 pathname,
3666 sizeof (pathname),
3667 &copied);
3668 } else {
3669 copystr(CAST_DOWN(void *, ndp->ni_dirp),
3670 pathname,
3671 sizeof (pathname),
3672 &copied);
3673 }
3674 pathname[sizeof (pathname) - 1] = '\0';
3675 if (strncmp(pathname,
3676 "/Applications/",
3677 strlen("/Applications/")) == 0 &&
3678 strncmp(pathname,
3679 "/Applications/Camera.app/",
3680 strlen("/Applications/Camera.app/")) != 0) {
3681 /*
3682 * not writable
3683 * AND from "/Applications/"
3684 * AND not from "/Applications/Camera.app/"
3685 * ==> eligible for secluded
3686 */
3687 memory_object_mark_eligible_for_secluded(moc,
3688 TRUE);
3689 }
3690 } else if (secluded_for_filecache == 2) {
3691#if __arm64__
3692#define DYLD_SHARED_CACHE_NAME "dyld_shared_cache_arm64"
3693#elif __arm__
3694#define DYLD_SHARED_CACHE_NAME "dyld_shared_cache_armv7"
3695#else
3696/* not implemented... */
3697#endif
3698 if (!strncmp(vp->v_name,
3699 DYLD_SHARED_CACHE_NAME,
3700 strlen(DYLD_SHARED_CACHE_NAME)) ||
3701 !strncmp(vp->v_name,
3702 "dyld",
3703 strlen(vp->v_name)) ||
3704 !strncmp(vp->v_name,
3705 "launchd",
3706 strlen(vp->v_name)) ||
3707 !strncmp(vp->v_name,
3708 "Camera",
3709 strlen(vp->v_name)) ||
3710 !strncmp(vp->v_name,
3711 "mediaserverd",
3712 strlen(vp->v_name)) ||
3713 !strncmp(vp->v_name,
3714 "SpringBoard",
3715 strlen(vp->v_name)) ||
3716 !strncmp(vp->v_name,
3717 "backboardd",
3718 strlen(vp->v_name))) {
3719 /*
3720 * This file matters when launching Camera:
3721 * do not store its contents in the secluded
3722 * pool that will be drained on Camera launch.
3723 */
3724 memory_object_mark_eligible_for_secluded(moc,
3725 FALSE);
3726 }
3727 }
3728 }
3729#endif /* CONFIG_SECLUDED_MEMORY */
3730
3731 fp_drop(p, indx, fp, 1);
3732 proc_fdunlock(p);
3733
3734 *retval = indx;
3735
3736 return (0);
3737bad:
3738 context = *vfs_context_current();
3739 context.vc_ucred = fp->f_fglob->fg_cred;
3740
3741 if ((fp->f_fglob->fg_flag & FHASLOCK) &&
3742 (FILEGLOB_DTYPE(fp->f_fglob) == DTYPE_VNODE)) {
3743 lf.l_whence = SEEK_SET;
3744 lf.l_start = 0;
3745 lf.l_len = 0;
3746 lf.l_type = F_UNLCK;
3747
3748 (void)VNOP_ADVLOCK(
3749 vp, (caddr_t)fp->f_fglob, F_UNLCK, &lf, F_FLOCK, ctx, NULL);
3750 }
3751
3752 vn_close(vp, fp->f_fglob->fg_flag, &context);
3753 vnode_put(vp);
3754 fp_free(p, indx, fp);
3755
3756 return (error);
3757}
3758
3759/*
3760 * While most of the *at syscall handlers can call nameiat() which
3761 * is a wrapper around namei, the use of namei and initialisation
3762 * of nameidata are far removed and in different functions - namei
3763 * gets called in vn_open_auth for open1. So we'll just do here what
3764 * nameiat() does.
3765 */
3766static int
3767open1at(vfs_context_t ctx, struct nameidata *ndp, int uflags,
3768 struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra, int32_t *retval,
3769 int dirfd)
3770{
3771 if ((dirfd != AT_FDCWD) && !(ndp->ni_cnd.cn_flags & USEDVP)) {
3772 int error;
3773 char c;
3774
3775 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
3776 error = copyin(ndp->ni_dirp, &c, sizeof(char));
3777 if (error)
3778 return (error);
3779 } else {
3780 c = *((char *)(ndp->ni_dirp));
3781 }
3782
3783 if (c != '/') {
3784 vnode_t dvp_at;
3785
3786 error = vnode_getfromfd(ndp->ni_cnd.cn_context, dirfd,
3787 &dvp_at);
3788 if (error)
3789 return (error);
3790
3791 if (vnode_vtype(dvp_at) != VDIR) {
3792 vnode_put(dvp_at);
3793 return (ENOTDIR);
3794 }
3795
3796 ndp->ni_dvp = dvp_at;
3797 ndp->ni_cnd.cn_flags |= USEDVP;
3798 error = open1(ctx, ndp, uflags, vap, fp_zalloc, cra,
3799 retval);
3800 vnode_put(dvp_at);
3801 return (error);
3802 }
3803 }
3804
3805 return (open1(ctx, ndp, uflags, vap, fp_zalloc, cra, retval));
3806}
3807
3808/*
3809 * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)).
3810 *
3811 * Parameters: p Process requesting the open
3812 * uap User argument descriptor (see below)
3813 * retval Pointer to an area to receive the
3814 * return calue from the system call
3815 *
3816 * Indirect: uap->path Path to open (same as 'open')
3817 * uap->flags Flags to open (same as 'open'
3818 * uap->uid UID to set, if creating
3819 * uap->gid GID to set, if creating
3820 * uap->mode File mode, if creating (same as 'open')
3821 * uap->xsecurity ACL to set, if creating
3822 *
3823 * Returns: 0 Success
3824 * !0 errno value
3825 *
3826 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
3827 *
3828 * XXX: We should enummerate the possible errno values here, and where
3829 * in the code they originated.
3830 */
3831int
3832open_extended(proc_t p, struct open_extended_args *uap, int32_t *retval)
3833{
3834 struct filedesc *fdp = p->p_fd;
3835 int ciferror;
3836 kauth_filesec_t xsecdst;
3837 struct vnode_attr va;
3838 struct nameidata nd;
3839 int cmode;
3840
3841 AUDIT_ARG(owner, uap->uid, uap->gid);
3842
3843 xsecdst = NULL;
3844 if ((uap->xsecurity != USER_ADDR_NULL) &&
3845 ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
3846 return ciferror;
3847
3848 VATTR_INIT(&va);
3849 cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3850 VATTR_SET(&va, va_mode, cmode);
3851 if (uap->uid != KAUTH_UID_NONE)
3852 VATTR_SET(&va, va_uid, uap->uid);
3853 if (uap->gid != KAUTH_GID_NONE)
3854 VATTR_SET(&va, va_gid, uap->gid);
3855 if (xsecdst != NULL)
3856 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
3857
3858 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
3859 uap->path, vfs_context_current());
3860
3861 ciferror = open1(vfs_context_current(), &nd, uap->flags, &va,
3862 fileproc_alloc_init, NULL, retval);
3863 if (xsecdst != NULL)
3864 kauth_filesec_free(xsecdst);
3865
3866 return ciferror;
3867}
3868
3869/*
3870 * Go through the data-protected atomically controlled open (2)
3871 *
3872 * int open_dprotected_np(user_addr_t path, int flags, int class, int dpflags, int mode)
3873 */
3874int open_dprotected_np (__unused proc_t p, struct open_dprotected_np_args *uap, int32_t *retval) {
3875 int flags = uap->flags;
3876 int class = uap->class;
3877 int dpflags = uap->dpflags;
3878
3879 /*
3880 * Follow the same path as normal open(2)
3881 * Look up the item if it exists, and acquire the vnode.
3882 */
3883 struct filedesc *fdp = p->p_fd;
3884 struct vnode_attr va;
3885 struct nameidata nd;
3886 int cmode;
3887 int error;
3888
3889 VATTR_INIT(&va);
3890 /* Mask off all but regular access permissions */
3891 cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3892 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
3893
3894 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
3895 uap->path, vfs_context_current());
3896
3897 /*
3898 * Initialize the extra fields in vnode_attr to pass down our
3899 * extra fields.
3900 * 1. target cprotect class.
3901 * 2. set a flag to mark it as requiring open-raw-encrypted semantics.
3902 */
3903 if (flags & O_CREAT) {
3904 /* lower level kernel code validates that the class is valid before applying it. */
3905 if (class != PROTECTION_CLASS_DEFAULT) {
3906 /*
3907 * PROTECTION_CLASS_DEFAULT implies that we make the class for this
3908 * file behave the same as open (2)
3909 */
3910 VATTR_SET(&va, va_dataprotect_class, class);
3911 }
3912 }
3913
3914 if (dpflags & (O_DP_GETRAWENCRYPTED|O_DP_GETRAWUNENCRYPTED)) {
3915 if ( flags & (O_RDWR | O_WRONLY)) {
3916 /* Not allowed to write raw encrypted bytes */
3917 return EINVAL;
3918 }
3919 if (uap->dpflags & O_DP_GETRAWENCRYPTED) {
3920 VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED);
3921 }
3922 if (uap->dpflags & O_DP_GETRAWUNENCRYPTED) {
3923 VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWUNENCRYPTED);
3924 }
3925 }
3926
3927 error = open1(vfs_context_current(), &nd, uap->flags, &va,
3928 fileproc_alloc_init, NULL, retval);
3929
3930 return error;
3931}
3932
3933static int
3934openat_internal(vfs_context_t ctx, user_addr_t path, int flags, int mode,
3935 int fd, enum uio_seg segflg, int *retval)
3936{
3937 struct filedesc *fdp = (vfs_context_proc(ctx))->p_fd;
3938 struct vnode_attr va;
3939 struct nameidata nd;
3940 int cmode;
3941
3942 VATTR_INIT(&va);
3943 /* Mask off all but regular access permissions */
3944 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3945 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
3946
3947 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1,
3948 segflg, path, ctx);
3949
3950 return (open1at(ctx, &nd, flags, &va, fileproc_alloc_init, NULL,
3951 retval, fd));
3952}
3953
3954int
3955open(proc_t p, struct open_args *uap, int32_t *retval)
3956{
3957 __pthread_testcancel(1);
3958 return(open_nocancel(p, (struct open_nocancel_args *)uap, retval));
3959}
3960
3961int
3962open_nocancel(__unused proc_t p, struct open_nocancel_args *uap,
3963 int32_t *retval)
3964{
3965 return (openat_internal(vfs_context_current(), uap->path, uap->flags,
3966 uap->mode, AT_FDCWD, UIO_USERSPACE, retval));
3967}
3968
3969int
3970openat_nocancel(__unused proc_t p, struct openat_nocancel_args *uap,
3971 int32_t *retval)
3972{
3973 return (openat_internal(vfs_context_current(), uap->path, uap->flags,
3974 uap->mode, uap->fd, UIO_USERSPACE, retval));
3975}
3976
3977int
3978openat(proc_t p, struct openat_args *uap, int32_t *retval)
3979{
3980 __pthread_testcancel(1);
3981 return(openat_nocancel(p, (struct openat_nocancel_args *)uap, retval));
3982}
3983
3984/*
3985 * openbyid_np: open a file given a file system id and a file system object id
3986 * the hfs file system object id is an fsobj_id_t {uint32, uint32}
3987 * file systems that don't support object ids it is a node id (uint64_t).
3988 *
3989 * Parameters: p Process requesting the open
3990 * uap User argument descriptor (see below)
3991 * retval Pointer to an area to receive the
3992 * return calue from the system call
3993 *
3994 * Indirect: uap->path Path to open (same as 'open')
3995 *
3996 * uap->fsid id of target file system
3997 * uap->objid id of target file system object
3998 * uap->flags Flags to open (same as 'open')
3999 *
4000 * Returns: 0 Success
4001 * !0 errno value
4002 *
4003 *
4004 * XXX: We should enummerate the possible errno values here, and where
4005 * in the code they originated.
4006 */
4007int
4008openbyid_np(__unused proc_t p, struct openbyid_np_args *uap, int *retval)
4009{
4010 fsid_t fsid;
4011 uint64_t objid;
4012 int error;
4013 char *buf = NULL;
4014 int buflen = MAXPATHLEN;
4015 int pathlen = 0;
4016 vfs_context_t ctx = vfs_context_current();
4017
4018 if ((error = priv_check_cred(vfs_context_ucred(ctx), PRIV_VFS_OPEN_BY_ID, 0))) {
4019 return (error);
4020 }
4021
4022 if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
4023 return (error);
4024 }
4025
4026 /*uap->obj is an fsobj_id_t defined as struct {uint32_t, uint32_t} */
4027 if ((error = copyin(uap->objid, (caddr_t)&objid, sizeof(uint64_t)))) {
4028 return (error);
4029 }
4030
4031 AUDIT_ARG(value32, fsid.val[0]);
4032 AUDIT_ARG(value64, objid);
4033
4034 /*resolve path from fsis, objid*/
4035 do {
4036 MALLOC(buf, char *, buflen + 1, M_TEMP, M_WAITOK);
4037 if (buf == NULL) {
4038 return (ENOMEM);
4039 }
4040
4041 error = fsgetpath_internal(
4042 ctx, fsid.val[0], objid,
4043 buflen, buf, &pathlen);
4044
4045 if (error) {
4046 FREE(buf, M_TEMP);
4047 buf = NULL;
4048 }
4049 } while (error == ENOSPC && (buflen += MAXPATHLEN));
4050
4051 if (error) {
4052 return error;
4053 }
4054
4055 buf[pathlen] = 0;
4056
4057 error = openat_internal(
4058 ctx, (user_addr_t)buf, uap->oflags, 0, AT_FDCWD, UIO_SYSSPACE, retval);
4059
4060 FREE(buf, M_TEMP);
4061
4062 return error;
4063}
4064
4065
4066/*
4067 * Create a special file.
4068 */
4069static int mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap);
4070
4071int
4072mknod(proc_t p, struct mknod_args *uap, __unused int32_t *retval)
4073{
4074 struct vnode_attr va;
4075 vfs_context_t ctx = vfs_context_current();
4076 int error;
4077 struct nameidata nd;
4078 vnode_t vp, dvp;
4079
4080 VATTR_INIT(&va);
4081 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
4082 VATTR_SET(&va, va_rdev, uap->dev);
4083
4084 /* If it's a mknod() of a FIFO, call mkfifo1() instead */
4085 if ((uap->mode & S_IFMT) == S_IFIFO)
4086 return(mkfifo1(ctx, uap->path, &va));
4087
4088 AUDIT_ARG(mode, uap->mode);
4089 AUDIT_ARG(value32, uap->dev);
4090
4091 if ((error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
4092 return (error);
4093 NDINIT(&nd, CREATE, OP_MKNOD, LOCKPARENT | AUDITVNPATH1,
4094 UIO_USERSPACE, uap->path, ctx);
4095 error = namei(&nd);
4096 if (error)
4097 return (error);
4098 dvp = nd.ni_dvp;
4099 vp = nd.ni_vp;
4100
4101 if (vp != NULL) {
4102 error = EEXIST;
4103 goto out;
4104 }
4105
4106 switch (uap->mode & S_IFMT) {
4107 case S_IFCHR:
4108 VATTR_SET(&va, va_type, VCHR);
4109 break;
4110 case S_IFBLK:
4111 VATTR_SET(&va, va_type, VBLK);
4112 break;
4113 default:
4114 error = EINVAL;
4115 goto out;
4116 }
4117
4118#if CONFIG_MACF
4119 error = mac_vnode_check_create(ctx,
4120 nd.ni_dvp, &nd.ni_cnd, &va);
4121 if (error)
4122 goto out;
4123#endif
4124
4125 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
4126 goto out;
4127
4128 if ((error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx)) != 0)
4129 goto out;
4130
4131 if (vp) {
4132 int update_flags = 0;
4133
4134 // Make sure the name & parent pointers are hooked up
4135 if (vp->v_name == NULL)
4136 update_flags |= VNODE_UPDATE_NAME;
4137 if (vp->v_parent == NULLVP)
4138 update_flags |= VNODE_UPDATE_PARENT;
4139
4140 if (update_flags)
4141 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
4142
4143#if CONFIG_FSE
4144 add_fsevent(FSE_CREATE_FILE, ctx,
4145 FSE_ARG_VNODE, vp,
4146 FSE_ARG_DONE);
4147#endif
4148 }
4149
4150out:
4151 /*
4152 * nameidone has to happen before we vnode_put(dvp)
4153 * since it may need to release the fs_nodelock on the dvp
4154 */
4155 nameidone(&nd);
4156
4157 if (vp)
4158 vnode_put(vp);
4159 vnode_put(dvp);
4160
4161 return (error);
4162}
4163
4164/*
4165 * Create a named pipe.
4166 *
4167 * Returns: 0 Success
4168 * EEXIST
4169 * namei:???
4170 * vnode_authorize:???
4171 * vn_create:???
4172 */
4173static int
4174mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap)
4175{
4176 vnode_t vp, dvp;
4177 int error;
4178 struct nameidata nd;
4179
4180 NDINIT(&nd, CREATE, OP_MKFIFO, LOCKPARENT | AUDITVNPATH1,
4181 UIO_USERSPACE, upath, ctx);
4182 error = namei(&nd);
4183 if (error)
4184 return (error);
4185 dvp = nd.ni_dvp;
4186 vp = nd.ni_vp;
4187
4188 /* check that this is a new file and authorize addition */
4189 if (vp != NULL) {
4190 error = EEXIST;
4191 goto out;
4192 }
4193 VATTR_SET(vap, va_type, VFIFO);
4194
4195 if ((error = vn_authorize_create(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0)
4196 goto out;
4197
4198 error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx);
4199out:
4200 /*
4201 * nameidone has to happen before we vnode_put(dvp)
4202 * since it may need to release the fs_nodelock on the dvp
4203 */
4204 nameidone(&nd);
4205
4206 if (vp)
4207 vnode_put(vp);
4208 vnode_put(dvp);
4209
4210 return error;
4211}
4212
4213
4214/*
4215 * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)).
4216 *
4217 * Parameters: p Process requesting the open
4218 * uap User argument descriptor (see below)
4219 * retval (Ignored)
4220 *
4221 * Indirect: uap->path Path to fifo (same as 'mkfifo')
4222 * uap->uid UID to set
4223 * uap->gid GID to set
4224 * uap->mode File mode to set (same as 'mkfifo')
4225 * uap->xsecurity ACL to set, if creating
4226 *
4227 * Returns: 0 Success
4228 * !0 errno value
4229 *
4230 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
4231 *
4232 * XXX: We should enummerate the possible errno values here, and where
4233 * in the code they originated.
4234 */
4235int
4236mkfifo_extended(proc_t p, struct mkfifo_extended_args *uap, __unused int32_t *retval)
4237{
4238 int ciferror;
4239 kauth_filesec_t xsecdst;
4240 struct vnode_attr va;
4241
4242 AUDIT_ARG(owner, uap->uid, uap->gid);
4243
4244 xsecdst = KAUTH_FILESEC_NONE;
4245 if (uap->xsecurity != USER_ADDR_NULL) {
4246 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
4247 return ciferror;
4248 }
4249
4250 VATTR_INIT(&va);
4251 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
4252 if (uap->uid != KAUTH_UID_NONE)
4253 VATTR_SET(&va, va_uid, uap->uid);
4254 if (uap->gid != KAUTH_GID_NONE)
4255 VATTR_SET(&va, va_gid, uap->gid);
4256 if (xsecdst != KAUTH_FILESEC_NONE)
4257 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
4258
4259 ciferror = mkfifo1(vfs_context_current(), uap->path, &va);
4260
4261 if (xsecdst != KAUTH_FILESEC_NONE)
4262 kauth_filesec_free(xsecdst);
4263 return ciferror;
4264}
4265
4266/* ARGSUSED */
4267int
4268mkfifo(proc_t p, struct mkfifo_args *uap, __unused int32_t *retval)
4269{
4270 struct vnode_attr va;
4271
4272 VATTR_INIT(&va);
4273 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
4274
4275 return(mkfifo1(vfs_context_current(), uap->path, &va));
4276}
4277
4278
4279static char *
4280my_strrchr(char *p, int ch)
4281{
4282 char *save;
4283
4284 for (save = NULL;; ++p) {
4285 if (*p == ch)
4286 save = p;
4287 if (!*p)
4288 return(save);
4289 }
4290 /* NOTREACHED */
4291}
4292
4293extern int safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path);
4294
4295int
4296safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path)
4297{
4298 int ret, len = _len;
4299
4300 *truncated_path = 0;
4301 ret = vn_getpath(dvp, path, &len);
4302 if (ret == 0 && len < (MAXPATHLEN - 1)) {
4303 if (leafname) {
4304 path[len-1] = '/';
4305 len += strlcpy(&path[len], leafname, MAXPATHLEN-len) + 1;
4306 if (len > MAXPATHLEN) {
4307 char *ptr;
4308
4309 // the string got truncated!
4310 *truncated_path = 1;
4311 ptr = my_strrchr(path, '/');
4312 if (ptr) {
4313 *ptr = '\0'; // chop off the string at the last directory component
4314 }
4315 len = strlen(path) + 1;
4316 }
4317 }
4318 } else if (ret == 0) {
4319 *truncated_path = 1;
4320 } else if (ret != 0) {
4321 struct vnode *mydvp=dvp;
4322
4323 if (ret != ENOSPC) {
4324 printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n",
4325 dvp, dvp->v_name ? dvp->v_name : "no-name", ret);
4326 }
4327 *truncated_path = 1;
4328
4329 do {
4330 if (mydvp->v_parent != NULL) {
4331 mydvp = mydvp->v_parent;
4332 } else if (mydvp->v_mount) {
4333 strlcpy(path, mydvp->v_mount->mnt_vfsstat.f_mntonname, _len);
4334 break;
4335 } else {
4336 // no parent and no mount point? only thing is to punt and say "/" changed
4337 strlcpy(path, "/", _len);
4338 len = 2;
4339 mydvp = NULL;
4340 }
4341
4342 if (mydvp == NULL) {
4343 break;
4344 }
4345
4346 len = _len;
4347 ret = vn_getpath(mydvp, path, &len);
4348 } while (ret == ENOSPC);
4349 }
4350
4351 return len;
4352}
4353
4354
4355/*
4356 * Make a hard file link.
4357 *
4358 * Returns: 0 Success
4359 * EPERM
4360 * EEXIST
4361 * EXDEV
4362 * namei:???
4363 * vnode_authorize:???
4364 * VNOP_LINK:???
4365 */
4366/* ARGSUSED */
4367static int
4368linkat_internal(vfs_context_t ctx, int fd1, user_addr_t path, int fd2,
4369 user_addr_t link, int flag, enum uio_seg segflg)
4370{
4371 vnode_t vp, dvp, lvp;
4372 struct nameidata nd;
4373 int follow;
4374 int error;
4375#if CONFIG_FSE
4376 fse_info finfo;
4377#endif
4378 int need_event, has_listeners, need_kpath2;
4379 char *target_path = NULL;
4380 int truncated=0;
4381
4382 vp = dvp = lvp = NULLVP;
4383
4384 /* look up the object we are linking to */
4385 follow = (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW;
4386 NDINIT(&nd, LOOKUP, OP_LOOKUP, AUDITVNPATH1 | follow,
4387 segflg, path, ctx);
4388
4389 error = nameiat(&nd, fd1);
4390 if (error)
4391 return (error);
4392 vp = nd.ni_vp;
4393
4394 nameidone(&nd);
4395
4396 /*
4397 * Normally, linking to directories is not supported.
4398 * However, some file systems may have limited support.
4399 */
4400 if (vp->v_type == VDIR) {
4401 if (!ISSET(vp->v_mount->mnt_kern_flag, MNTK_DIR_HARDLINKS)) {
4402 error = EPERM; /* POSIX */
4403 goto out;
4404 }
4405
4406 /* Linking to a directory requires ownership. */
4407 if (!kauth_cred_issuser(vfs_context_ucred(ctx))) {
4408 struct vnode_attr dva;
4409
4410 VATTR_INIT(&dva);
4411 VATTR_WANTED(&dva, va_uid);
4412 if (vnode_getattr(vp, &dva, ctx) != 0 ||
4413 !VATTR_IS_SUPPORTED(&dva, va_uid) ||
4414 (dva.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)))) {
4415 error = EACCES;
4416 goto out;
4417 }
4418 }
4419 }
4420
4421 /* lookup the target node */
4422#if CONFIG_TRIGGERS
4423 nd.ni_op = OP_LINK;
4424#endif
4425 nd.ni_cnd.cn_nameiop = CREATE;
4426 nd.ni_cnd.cn_flags = LOCKPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK;
4427 nd.ni_dirp = link;
4428 error = nameiat(&nd, fd2);
4429 if (error != 0)
4430 goto out;
4431 dvp = nd.ni_dvp;
4432 lvp = nd.ni_vp;
4433
4434#if CONFIG_MACF
4435 if ((error = mac_vnode_check_link(ctx, dvp, vp, &nd.ni_cnd)) != 0)
4436 goto out2;
4437#endif
4438
4439 /* or to anything that kauth doesn't want us to (eg. immutable items) */
4440 if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_LINKTARGET, ctx)) != 0)
4441 goto out2;
4442
4443 /* target node must not exist */
4444 if (lvp != NULLVP) {
4445 error = EEXIST;
4446 goto out2;
4447 }
4448 /* cannot link across mountpoints */
4449 if (vnode_mount(vp) != vnode_mount(dvp)) {
4450 error = EXDEV;
4451 goto out2;
4452 }
4453
4454 /* authorize creation of the target note */
4455 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
4456 goto out2;
4457
4458 /* and finally make the link */
4459 error = VNOP_LINK(vp, dvp, &nd.ni_cnd, ctx);
4460 if (error)
4461 goto out2;
4462
4463#if CONFIG_MACF
4464 (void)mac_vnode_notify_link(ctx, vp, dvp, &nd.ni_cnd);
4465#endif
4466
4467#if CONFIG_FSE
4468 need_event = need_fsevent(FSE_CREATE_FILE, dvp);
4469#else
4470 need_event = 0;
4471#endif
4472 has_listeners = kauth_authorize_fileop_has_listeners();
4473
4474 need_kpath2 = 0;
4475#if CONFIG_AUDIT
4476 if (AUDIT_RECORD_EXISTS()) {
4477 need_kpath2 = 1;
4478 }
4479#endif
4480
4481 if (need_event || has_listeners || need_kpath2) {
4482 char *link_to_path = NULL;
4483 int len, link_name_len;
4484
4485 /* build the path to the new link file */
4486 GET_PATH(target_path);
4487 if (target_path == NULL) {
4488 error = ENOMEM;
4489 goto out2;
4490 }
4491
4492 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, target_path, MAXPATHLEN, &truncated);
4493
4494 AUDIT_ARG(kpath, target_path, ARG_KPATH2);
4495
4496 if (has_listeners) {
4497 /* build the path to file we are linking to */
4498 GET_PATH(link_to_path);
4499 if (link_to_path == NULL) {
4500 error = ENOMEM;
4501 goto out2;
4502 }
4503
4504 link_name_len = MAXPATHLEN;
4505 if (vn_getpath(vp, link_to_path, &link_name_len) == 0) {
4506 /*
4507 * Call out to allow 3rd party notification of rename.
4508 * Ignore result of kauth_authorize_fileop call.
4509 */
4510 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_LINK,
4511 (uintptr_t)link_to_path,
4512 (uintptr_t)target_path);
4513 }
4514 if (link_to_path != NULL) {
4515 RELEASE_PATH(link_to_path);
4516 }
4517 }
4518#if CONFIG_FSE
4519 if (need_event) {
4520 /* construct fsevent */
4521 if (get_fse_info(vp, &finfo, ctx) == 0) {
4522 if (truncated) {
4523 finfo.mode |= FSE_TRUNCATED_PATH;
4524 }
4525
4526 // build the path to the destination of the link
4527 add_fsevent(FSE_CREATE_FILE, ctx,
4528 FSE_ARG_STRING, len, target_path,
4529 FSE_ARG_FINFO, &finfo,
4530 FSE_ARG_DONE);
4531 }
4532 if (vp->v_parent) {
4533 add_fsevent(FSE_STAT_CHANGED, ctx,
4534 FSE_ARG_VNODE, vp->v_parent,
4535 FSE_ARG_DONE);
4536 }
4537 }
4538#endif
4539 }
4540out2:
4541 /*
4542 * nameidone has to happen before we vnode_put(dvp)
4543 * since it may need to release the fs_nodelock on the dvp
4544 */
4545 nameidone(&nd);
4546 if (target_path != NULL) {
4547 RELEASE_PATH(target_path);
4548 }
4549out:
4550 if (lvp)
4551 vnode_put(lvp);
4552 if (dvp)
4553 vnode_put(dvp);
4554 vnode_put(vp);
4555 return (error);
4556}
4557
4558int
4559link(__unused proc_t p, struct link_args *uap, __unused int32_t *retval)
4560{
4561 return (linkat_internal(vfs_context_current(), AT_FDCWD, uap->path,
4562 AT_FDCWD, uap->link, AT_SYMLINK_FOLLOW, UIO_USERSPACE));
4563}
4564
4565int
4566linkat(__unused proc_t p, struct linkat_args *uap, __unused int32_t *retval)
4567{
4568 if (uap->flag & ~AT_SYMLINK_FOLLOW)
4569 return (EINVAL);
4570
4571 return (linkat_internal(vfs_context_current(), uap->fd1, uap->path,
4572 uap->fd2, uap->link, uap->flag, UIO_USERSPACE));
4573}
4574
4575/*
4576 * Make a symbolic link.
4577 *
4578 * We could add support for ACLs here too...
4579 */
4580/* ARGSUSED */
4581static int
4582symlinkat_internal(vfs_context_t ctx, user_addr_t path_data, int fd,
4583 user_addr_t link, enum uio_seg segflg)
4584{
4585 struct vnode_attr va;
4586 char *path;
4587 int error;
4588 struct nameidata nd;
4589 vnode_t vp, dvp;
4590 size_t dummy=0;
4591 proc_t p;
4592
4593 error = 0;
4594 if (UIO_SEG_IS_USER_SPACE(segflg)) {
4595 MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
4596 error = copyinstr(path_data, path, MAXPATHLEN, &dummy);
4597 } else {
4598 path = (char *)path_data;
4599 }
4600 if (error)
4601 goto out;
4602 AUDIT_ARG(text, path); /* This is the link string */
4603
4604 NDINIT(&nd, CREATE, OP_SYMLINK, LOCKPARENT | AUDITVNPATH1,
4605 segflg, link, ctx);
4606
4607 error = nameiat(&nd, fd);
4608 if (error)
4609 goto out;
4610 dvp = nd.ni_dvp;
4611 vp = nd.ni_vp;
4612
4613 p = vfs_context_proc(ctx);
4614 VATTR_INIT(&va);
4615 VATTR_SET(&va, va_type, VLNK);
4616 VATTR_SET(&va, va_mode, ACCESSPERMS & ~p->p_fd->fd_cmask);
4617
4618#if CONFIG_MACF
4619 error = mac_vnode_check_create(ctx,
4620 dvp, &nd.ni_cnd, &va);
4621#endif
4622 if (error != 0) {
4623 goto skipit;
4624 }
4625
4626 if (vp != NULL) {
4627 error = EEXIST;
4628 goto skipit;
4629 }
4630
4631 /* authorize */
4632 if (error == 0)
4633 error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
4634 /* get default ownership, etc. */
4635 if (error == 0)
4636 error = vnode_authattr_new(dvp, &va, 0, ctx);
4637 if (error == 0)
4638 error = VNOP_SYMLINK(dvp, &vp, &nd.ni_cnd, &va, path, ctx);
4639
4640#if CONFIG_MACF
4641 if (error == 0 && vp)
4642 error = vnode_label(vnode_mount(vp), dvp, vp, &nd.ni_cnd, VNODE_LABEL_CREATE, ctx);
4643#endif
4644
4645 /* do fallback attribute handling */
4646 if (error == 0 && vp)
4647 error = vnode_setattr_fallback(vp, &va, ctx);
4648
4649 if (error == 0) {
4650 int update_flags = 0;
4651
4652 /*check if a new vnode was created, else try to get one*/
4653 if (vp == NULL) {
4654 nd.ni_cnd.cn_nameiop = LOOKUP;
4655#if CONFIG_TRIGGERS
4656 nd.ni_op = OP_LOOKUP;
4657#endif
4658 nd.ni_cnd.cn_flags = 0;
4659 error = nameiat(&nd, fd);
4660 vp = nd.ni_vp;
4661
4662 if (vp == NULL)
4663 goto skipit;
4664 }
4665
4666#if 0 /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
4667 /* call out to allow 3rd party notification of rename.
4668 * Ignore result of kauth_authorize_fileop call.
4669 */
4670 if (kauth_authorize_fileop_has_listeners() &&
4671 namei(&nd) == 0) {
4672 char *new_link_path = NULL;
4673 int len;
4674
4675 /* build the path to the new link file */
4676 new_link_path = get_pathbuff();
4677 len = MAXPATHLEN;
4678 vn_getpath(dvp, new_link_path, &len);
4679 if ((len + 1 + nd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) {
4680 new_link_path[len - 1] = '/';
4681 strlcpy(&new_link_path[len], nd.ni_cnd.cn_nameptr, MAXPATHLEN-len);
4682 }
4683
4684 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_SYMLINK,
4685 (uintptr_t)path, (uintptr_t)new_link_path);
4686 if (new_link_path != NULL)
4687 release_pathbuff(new_link_path);
4688 }
4689#endif
4690 // Make sure the name & parent pointers are hooked up
4691 if (vp->v_name == NULL)
4692 update_flags |= VNODE_UPDATE_NAME;
4693 if (vp->v_parent == NULLVP)
4694 update_flags |= VNODE_UPDATE_PARENT;
4695
4696 if (update_flags)
4697 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
4698
4699#if CONFIG_FSE
4700 add_fsevent(FSE_CREATE_FILE, ctx,
4701 FSE_ARG_VNODE, vp,
4702 FSE_ARG_DONE);
4703#endif
4704 }
4705
4706skipit:
4707 /*
4708 * nameidone has to happen before we vnode_put(dvp)
4709 * since it may need to release the fs_nodelock on the dvp
4710 */
4711 nameidone(&nd);
4712
4713 if (vp)
4714 vnode_put(vp);
4715 vnode_put(dvp);
4716out:
4717 if (path && (path != (char *)path_data))
4718 FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
4719
4720 return (error);
4721}
4722
4723int
4724symlink(__unused proc_t p, struct symlink_args *uap, __unused int32_t *retval)
4725{
4726 return (symlinkat_internal(vfs_context_current(), uap->path, AT_FDCWD,
4727 uap->link, UIO_USERSPACE));
4728}
4729
4730int
4731symlinkat(__unused proc_t p, struct symlinkat_args *uap,
4732 __unused int32_t *retval)
4733{
4734 return (symlinkat_internal(vfs_context_current(), uap->path1, uap->fd,
4735 uap->path2, UIO_USERSPACE));
4736}
4737
4738/*
4739 * Delete a whiteout from the filesystem.
4740 * No longer supported.
4741 */
4742int
4743undelete(__unused proc_t p, __unused struct undelete_args *uap, __unused int32_t *retval)
4744{
4745 return (ENOTSUP);
4746}
4747
4748/*
4749 * Delete a name from the filesystem.
4750 */
4751/* ARGSUSED */
4752static int
4753unlinkat_internal(vfs_context_t ctx, int fd, vnode_t start_dvp,
4754 user_addr_t path_arg, enum uio_seg segflg, int unlink_flags)
4755{
4756 struct nameidata nd;
4757 vnode_t vp, dvp;
4758 int error;
4759 struct componentname *cnp;
4760 char *path = NULL;
4761 int len=0;
4762#if CONFIG_FSE
4763 fse_info finfo;
4764 struct vnode_attr va;
4765#endif
4766 int flags;
4767 int need_event;
4768 int has_listeners;
4769 int truncated_path;
4770 int batched;
4771 struct vnode_attr *vap;
4772 int do_retry;
4773 int retry_count = 0;
4774 int cn_flags;
4775
4776 cn_flags = LOCKPARENT;
4777 if (!(unlink_flags & VNODE_REMOVE_NO_AUDIT_PATH))
4778 cn_flags |= AUDITVNPATH1;
4779 /* If a starting dvp is passed, it trumps any fd passed. */
4780 if (start_dvp)
4781 cn_flags |= USEDVP;
4782
4783#if NAMEDRSRCFORK
4784 /* unlink or delete is allowed on rsrc forks and named streams */
4785 cn_flags |= CN_ALLOWRSRCFORK;
4786#endif
4787
4788retry:
4789 do_retry = 0;
4790 flags = 0;
4791 need_event = 0;
4792 has_listeners = 0;
4793 truncated_path = 0;
4794 vap = NULL;
4795
4796 NDINIT(&nd, DELETE, OP_UNLINK, cn_flags, segflg, path_arg, ctx);
4797
4798 nd.ni_dvp = start_dvp;
4799 nd.ni_flag |= NAMEI_COMPOUNDREMOVE;
4800 cnp = &nd.ni_cnd;
4801
4802continue_lookup:
4803 error = nameiat(&nd, fd);
4804 if (error)
4805 return (error);
4806
4807 dvp = nd.ni_dvp;
4808 vp = nd.ni_vp;
4809
4810
4811 /* With Carbon delete semantics, busy files cannot be deleted */
4812 if (unlink_flags & VNODE_REMOVE_NODELETEBUSY) {
4813 flags |= VNODE_REMOVE_NODELETEBUSY;
4814 }
4815
4816 /* Skip any potential upcalls if told to. */
4817 if (unlink_flags & VNODE_REMOVE_SKIP_NAMESPACE_EVENT) {
4818 flags |= VNODE_REMOVE_SKIP_NAMESPACE_EVENT;
4819 }
4820
4821 if (vp) {
4822 batched = vnode_compound_remove_available(vp);
4823 /*
4824 * The root of a mounted filesystem cannot be deleted.
4825 */
4826 if (vp->v_flag & VROOT) {
4827 error = EBUSY;
4828 }
4829
4830#if DEVELOPMENT || DEBUG
4831 /*
4832 * XXX VSWAP: Check for entitlements or special flag here
4833 * so we can restrict access appropriately.
4834 */
4835#else /* DEVELOPMENT || DEBUG */
4836
4837 if (vnode_isswap(vp) && (ctx != vfs_context_kernel())) {
4838 error = EPERM;
4839 goto out;
4840 }
4841#endif /* DEVELOPMENT || DEBUG */
4842
4843 if (!batched) {
4844 error = vn_authorize_unlink(dvp, vp, cnp, ctx, NULL);
4845 if (error) {
4846 if (error == ENOENT) {
4847 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
4848 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
4849 do_retry = 1;
4850 retry_count++;
4851 }
4852 }
4853 goto out;
4854 }
4855 }
4856 } else {
4857 batched = 1;
4858
4859 if (!vnode_compound_remove_available(dvp)) {
4860 panic("No vp, but no compound remove?");
4861 }
4862 }
4863
4864#if CONFIG_FSE
4865 need_event = need_fsevent(FSE_DELETE, dvp);
4866 if (need_event) {
4867 if (!batched) {
4868 if ((vp->v_flag & VISHARDLINK) == 0) {
4869 /* XXX need to get these data in batched VNOP */
4870 get_fse_info(vp, &finfo, ctx);
4871 }
4872 } else {
4873 error = vfs_get_notify_attributes(&va);
4874 if (error) {
4875 goto out;
4876 }
4877
4878 vap = &va;
4879 }
4880 }
4881#endif
4882 has_listeners = kauth_authorize_fileop_has_listeners();
4883 if (need_event || has_listeners) {
4884 if (path == NULL) {
4885 GET_PATH(path);
4886 if (path == NULL) {
4887 error = ENOMEM;
4888 goto out;
4889 }
4890 }
4891 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated_path);
4892 }
4893
4894#if NAMEDRSRCFORK
4895 if (nd.ni_cnd.cn_flags & CN_WANTSRSRCFORK)
4896 error = vnode_removenamedstream(dvp, vp, XATTR_RESOURCEFORK_NAME, 0, ctx);
4897 else
4898#endif
4899 {
4900 error = vn_remove(dvp, &nd.ni_vp, &nd, flags, vap, ctx);
4901 vp = nd.ni_vp;
4902 if (error == EKEEPLOOKING) {
4903 if (!batched) {
4904 panic("EKEEPLOOKING, but not a filesystem that supports compound VNOPs?");
4905 }
4906
4907 if ((nd.ni_flag & NAMEI_CONTLOOKUP) == 0) {
4908 panic("EKEEPLOOKING, but continue flag not set?");
4909 }
4910
4911 if (vnode_isdir(vp)) {
4912 error = EISDIR;
4913 goto out;
4914 }
4915 goto continue_lookup;
4916 } else if (error == ENOENT && batched) {
4917 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
4918 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
4919 /*
4920 * For compound VNOPs, the authorization callback may
4921 * return ENOENT in case of racing hardlink lookups
4922 * hitting the name cache, redrive the lookup.
4923 */
4924 do_retry = 1;
4925 retry_count += 1;
4926 goto out;
4927 }
4928 }
4929 }
4930
4931 /*
4932 * Call out to allow 3rd party notification of delete.
4933 * Ignore result of kauth_authorize_fileop call.
4934 */
4935 if (!error) {
4936 if (has_listeners) {
4937 kauth_authorize_fileop(vfs_context_ucred(ctx),
4938 KAUTH_FILEOP_DELETE,
4939 (uintptr_t)vp,
4940 (uintptr_t)path);
4941 }
4942
4943 if (vp->v_flag & VISHARDLINK) {
4944 //
4945 // if a hardlink gets deleted we want to blow away the
4946 // v_parent link because the path that got us to this
4947 // instance of the link is no longer valid. this will
4948 // force the next call to get the path to ask the file
4949 // system instead of just following the v_parent link.
4950 //
4951 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
4952 }
4953
4954#if CONFIG_FSE
4955 if (need_event) {
4956 if (vp->v_flag & VISHARDLINK) {
4957 get_fse_info(vp, &finfo, ctx);
4958 } else if (vap) {
4959 vnode_get_fse_info_from_vap(vp, &finfo, vap);
4960 }
4961 if (truncated_path) {
4962 finfo.mode |= FSE_TRUNCATED_PATH;
4963 }
4964 add_fsevent(FSE_DELETE, ctx,
4965 FSE_ARG_STRING, len, path,
4966 FSE_ARG_FINFO, &finfo,
4967 FSE_ARG_DONE);
4968 }
4969#endif
4970 }
4971
4972out:
4973 if (path != NULL)
4974 RELEASE_PATH(path);
4975
4976#if NAMEDRSRCFORK
4977 /* recycle the deleted rsrc fork vnode to force a reclaim, which
4978 * will cause its shadow file to go away if necessary.
4979 */
4980 if (vp && (vnode_isnamedstream(vp)) &&
4981 (vp->v_parent != NULLVP) &&
4982 vnode_isshadow(vp)) {
4983 vnode_recycle(vp);
4984 }
4985#endif
4986 /*
4987 * nameidone has to happen before we vnode_put(dvp)
4988 * since it may need to release the fs_nodelock on the dvp
4989 */
4990 nameidone(&nd);
4991 vnode_put(dvp);
4992 if (vp) {
4993 vnode_put(vp);
4994 }
4995
4996 if (do_retry) {
4997 goto retry;
4998 }
4999
5000 return (error);
5001}
5002
5003int
5004unlink1(vfs_context_t ctx, vnode_t start_dvp, user_addr_t path_arg,
5005 enum uio_seg segflg, int unlink_flags)
5006{
5007 return (unlinkat_internal(ctx, AT_FDCWD, start_dvp, path_arg, segflg,
5008 unlink_flags));
5009}
5010
5011/*
5012 * Delete a name from the filesystem using Carbon semantics.
5013 */
5014int
5015delete(__unused proc_t p, struct delete_args *uap, __unused int32_t *retval)
5016{
5017 return (unlinkat_internal(vfs_context_current(), AT_FDCWD, NULLVP,
5018 uap->path, UIO_USERSPACE, VNODE_REMOVE_NODELETEBUSY));
5019}
5020
5021/*
5022 * Delete a name from the filesystem using POSIX semantics.
5023 */
5024int
5025unlink(__unused proc_t p, struct unlink_args *uap, __unused int32_t *retval)
5026{
5027 return (unlinkat_internal(vfs_context_current(), AT_FDCWD, NULLVP,
5028 uap->path, UIO_USERSPACE, 0));
5029}
5030
5031int
5032unlinkat(__unused proc_t p, struct unlinkat_args *uap, __unused int32_t *retval)
5033{
5034 if (uap->flag & ~AT_REMOVEDIR)
5035 return (EINVAL);
5036
5037 if (uap->flag & AT_REMOVEDIR)
5038 return (rmdirat_internal(vfs_context_current(), uap->fd,
5039 uap->path, UIO_USERSPACE));
5040 else
5041 return (unlinkat_internal(vfs_context_current(), uap->fd,
5042 NULLVP, uap->path, UIO_USERSPACE, 0));
5043}
5044
5045/*
5046 * Reposition read/write file offset.
5047 */
5048int
5049lseek(proc_t p, struct lseek_args *uap, off_t *retval)
5050{
5051 struct fileproc *fp;
5052 vnode_t vp;
5053 struct vfs_context *ctx;
5054 off_t offset = uap->offset, file_size;
5055 int error;
5056
5057 if ( (error = fp_getfvp(p,uap->fd, &fp, &vp)) ) {
5058 if (error == ENOTSUP)
5059 return (ESPIPE);
5060 return (error);
5061 }
5062 if (vnode_isfifo(vp)) {
5063 file_drop(uap->fd);
5064 return(ESPIPE);
5065 }
5066
5067
5068 ctx = vfs_context_current();
5069#if CONFIG_MACF
5070 if (uap->whence == L_INCR && uap->offset == 0)
5071 error = mac_file_check_get_offset(vfs_context_ucred(ctx),
5072 fp->f_fglob);
5073 else
5074 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
5075 fp->f_fglob);
5076 if (error) {
5077 file_drop(uap->fd);
5078 return (error);
5079 }
5080#endif
5081 if ( (error = vnode_getwithref(vp)) ) {
5082 file_drop(uap->fd);
5083 return(error);
5084 }
5085
5086 switch (uap->whence) {
5087 case L_INCR:
5088 offset += fp->f_fglob->fg_offset;
5089 break;
5090 case L_XTND:
5091 if ((error = vnode_size(vp, &file_size, ctx)) != 0)
5092 break;
5093 offset += file_size;
5094 break;
5095 case L_SET:
5096 break;
5097 case SEEK_HOLE:
5098 error = VNOP_IOCTL(vp, FSIOC_FIOSEEKHOLE, (caddr_t)&offset, 0, ctx);
5099 break;
5100 case SEEK_DATA:
5101 error = VNOP_IOCTL(vp, FSIOC_FIOSEEKDATA, (caddr_t)&offset, 0, ctx);
5102 break;
5103 default:
5104 error = EINVAL;
5105 }
5106 if (error == 0) {
5107 if (uap->offset > 0 && offset < 0) {
5108 /* Incremented/relative move past max size */
5109 error = EOVERFLOW;
5110 } else {
5111 /*
5112 * Allow negative offsets on character devices, per
5113 * POSIX 1003.1-2001. Most likely for writing disk
5114 * labels.
5115 */
5116 if (offset < 0 && vp->v_type != VCHR) {
5117 /* Decremented/relative move before start */
5118 error = EINVAL;
5119 } else {
5120 /* Success */
5121 fp->f_fglob->fg_offset = offset;
5122 *retval = fp->f_fglob->fg_offset;
5123 }
5124 }
5125 }
5126
5127 /*
5128 * An lseek can affect whether data is "available to read." Use
5129 * hint of NOTE_NONE so no EVFILT_VNODE events fire
5130 */
5131 post_event_if_success(vp, error, NOTE_NONE);
5132 (void)vnode_put(vp);
5133 file_drop(uap->fd);
5134 return (error);
5135}
5136
5137
5138/*
5139 * Check access permissions.
5140 *
5141 * Returns: 0 Success
5142 * vnode_authorize:???
5143 */
5144static int
5145access1(vnode_t vp, vnode_t dvp, int uflags, vfs_context_t ctx)
5146{
5147 kauth_action_t action;
5148 int error;
5149
5150 /*
5151 * If just the regular access bits, convert them to something
5152 * that vnode_authorize will understand.
5153 */
5154 if (!(uflags & _ACCESS_EXTENDED_MASK)) {
5155 action = 0;
5156 if (uflags & R_OK)
5157 action |= KAUTH_VNODE_READ_DATA; /* aka KAUTH_VNODE_LIST_DIRECTORY */
5158 if (uflags & W_OK) {
5159 if (vnode_isdir(vp)) {
5160 action |= KAUTH_VNODE_ADD_FILE |
5161 KAUTH_VNODE_ADD_SUBDIRECTORY;
5162 /* might want delete rights here too */
5163 } else {
5164 action |= KAUTH_VNODE_WRITE_DATA;
5165 }
5166 }
5167 if (uflags & X_OK) {
5168 if (vnode_isdir(vp)) {
5169 action |= KAUTH_VNODE_SEARCH;
5170 } else {
5171 action |= KAUTH_VNODE_EXECUTE;
5172 }
5173 }
5174 } else {
5175 /* take advantage of definition of uflags */
5176 action = uflags >> 8;
5177 }
5178
5179#if CONFIG_MACF
5180 error = mac_vnode_check_access(ctx, vp, uflags);
5181 if (error)
5182 return (error);
5183#endif /* MAC */
5184
5185 /* action == 0 means only check for existence */
5186 if (action != 0) {
5187 error = vnode_authorize(vp, dvp, action | KAUTH_VNODE_ACCESS, ctx);
5188 } else {
5189 error = 0;
5190 }
5191
5192 return(error);
5193}
5194
5195
5196
5197/*
5198 * access_extended: Check access permissions in bulk.
5199 *
5200 * Description: uap->entries Pointer to an array of accessx
5201 * descriptor structs, plus one or
5202 * more NULL terminated strings (see
5203 * "Notes" section below).
5204 * uap->size Size of the area pointed to by
5205 * uap->entries.
5206 * uap->results Pointer to the results array.
5207 *
5208 * Returns: 0 Success
5209 * ENOMEM Insufficient memory
5210 * EINVAL Invalid arguments
5211 * namei:EFAULT Bad address
5212 * namei:ENAMETOOLONG Filename too long
5213 * namei:ENOENT No such file or directory
5214 * namei:ELOOP Too many levels of symbolic links
5215 * namei:EBADF Bad file descriptor
5216 * namei:ENOTDIR Not a directory
5217 * namei:???
5218 * access1:
5219 *
5220 * Implicit returns:
5221 * uap->results Array contents modified
5222 *
5223 * Notes: The uap->entries are structured as an arbitrary length array
5224 * of accessx descriptors, followed by one or more NULL terminated
5225 * strings
5226 *
5227 * struct accessx_descriptor[0]
5228 * ...
5229 * struct accessx_descriptor[n]
5230 * char name_data[0];
5231 *
5232 * We determine the entry count by walking the buffer containing
5233 * the uap->entries argument descriptor. For each descriptor we
5234 * see, the valid values for the offset ad_name_offset will be
5235 * in the byte range:
5236 *
5237 * [ uap->entries + sizeof(struct accessx_descriptor) ]
5238 * to
5239 * [ uap->entries + uap->size - 2 ]
5240 *
5241 * since we must have at least one string, and the string must
5242 * be at least one character plus the NULL terminator in length.
5243 *
5244 * XXX: Need to support the check-as uid argument
5245 */
5246int
5247access_extended(__unused proc_t p, struct access_extended_args *uap, __unused int32_t *retval)
5248{
5249 struct accessx_descriptor *input = NULL;
5250 errno_t *result = NULL;
5251 errno_t error = 0;
5252 int wantdelete = 0;
5253 unsigned int desc_max, desc_actual, i, j;
5254 struct vfs_context context;
5255 struct nameidata nd;
5256 int niopts;
5257 vnode_t vp = NULL;
5258 vnode_t dvp = NULL;
5259#define ACCESSX_MAX_DESCR_ON_STACK 10
5260 struct accessx_descriptor stack_input[ACCESSX_MAX_DESCR_ON_STACK];
5261
5262 context.vc_ucred = NULL;
5263
5264 /*
5265 * Validate parameters; if valid, copy the descriptor array and string
5266 * arguments into local memory. Before proceeding, the following
5267 * conditions must have been met:
5268 *
5269 * o The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
5270 * o There must be sufficient room in the request for at least one
5271 * descriptor and a one yte NUL terminated string.
5272 * o The allocation of local storage must not fail.
5273 */
5274 if (uap->size > ACCESSX_MAX_TABLESIZE)
5275 return(ENOMEM);
5276 if (uap->size < (sizeof(struct accessx_descriptor) + 2))
5277 return(EINVAL);
5278 if (uap->size <= sizeof (stack_input)) {
5279 input = stack_input;
5280 } else {
5281 MALLOC(input, struct accessx_descriptor *, uap->size, M_TEMP, M_WAITOK);
5282 if (input == NULL) {
5283 error = ENOMEM;
5284 goto out;
5285 }
5286 }
5287 error = copyin(uap->entries, input, uap->size);
5288 if (error)
5289 goto out;
5290
5291 AUDIT_ARG(opaque, input, uap->size);
5292
5293 /*
5294 * Force NUL termination of the copyin buffer to avoid nami() running
5295 * off the end. If the caller passes us bogus data, they may get a
5296 * bogus result.
5297 */
5298 ((char *)input)[uap->size - 1] = 0;
5299
5300 /*
5301 * Access is defined as checking against the process' real identity,
5302 * even if operations are checking the effective identity. This
5303 * requires that we use a local vfs context.
5304 */
5305 context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
5306 context.vc_thread = current_thread();
5307
5308 /*
5309 * Find out how many entries we have, so we can allocate the result
5310 * array by walking the list and adjusting the count downward by the
5311 * earliest string offset we see.
5312 */
5313 desc_max = (uap->size - 2) / sizeof(struct accessx_descriptor);
5314 desc_actual = desc_max;
5315 for (i = 0; i < desc_actual; i++) {
5316 /*
5317 * Take the offset to the name string for this entry and
5318 * convert to an input array index, which would be one off
5319 * the end of the array if this entry was the lowest-addressed
5320 * name string.
5321 */
5322 j = input[i].ad_name_offset / sizeof(struct accessx_descriptor);
5323
5324 /*
5325 * An offset greater than the max allowable offset is an error.
5326 * It is also an error for any valid entry to point
5327 * to a location prior to the end of the current entry, if
5328 * it's not a reference to the string of the previous entry.
5329 */
5330 if (j > desc_max || (j != 0 && j <= i)) {
5331 error = EINVAL;
5332 goto out;
5333 }
5334
5335 /* Also do not let ad_name_offset point to something beyond the size of the input */
5336 if (input[i].ad_name_offset >= uap->size) {
5337 error = EINVAL;
5338 goto out;
5339 }
5340
5341 /*
5342 * An offset of 0 means use the previous descriptor's offset;
5343 * this is used to chain multiple requests for the same file
5344 * to avoid multiple lookups.
5345 */
5346 if (j == 0) {
5347 /* This is not valid for the first entry */
5348 if (i == 0) {
5349 error = EINVAL;
5350 goto out;
5351 }
5352 continue;
5353 }
5354
5355 /*
5356 * If the offset of the string for this descriptor is before
5357 * what we believe is the current actual last descriptor,
5358 * then we need to adjust our estimate downward; this permits
5359 * the string table following the last descriptor to be out
5360 * of order relative to the descriptor list.
5361 */
5362 if (j < desc_actual)
5363 desc_actual = j;
5364 }
5365
5366 /*
5367 * We limit the actual number of descriptors we are willing to process
5368 * to a hard maximum of ACCESSX_MAX_DESCRIPTORS. If the number being
5369 * requested does not exceed this limit,
5370 */
5371 if (desc_actual > ACCESSX_MAX_DESCRIPTORS) {
5372 error = ENOMEM;
5373 goto out;
5374 }
5375 MALLOC(result, errno_t *, desc_actual * sizeof(errno_t), M_TEMP, M_WAITOK | M_ZERO);
5376 if (result == NULL) {
5377 error = ENOMEM;
5378 goto out;
5379 }
5380
5381 /*
5382 * Do the work by iterating over the descriptor entries we know to
5383 * at least appear to contain valid data.
5384 */
5385 error = 0;
5386 for (i = 0; i < desc_actual; i++) {
5387 /*
5388 * If the ad_name_offset is 0, then we use the previous
5389 * results to make the check; otherwise, we are looking up
5390 * a new file name.
5391 */
5392 if (input[i].ad_name_offset != 0) {
5393 /* discard old vnodes */
5394 if (vp) {
5395 vnode_put(vp);
5396 vp = NULL;
5397 }
5398 if (dvp) {
5399 vnode_put(dvp);
5400 dvp = NULL;
5401 }
5402
5403 /*
5404 * Scan forward in the descriptor list to see if we
5405 * need the parent vnode. We will need it if we are
5406 * deleting, since we must have rights to remove
5407 * entries in the parent directory, as well as the
5408 * rights to delete the object itself.
5409 */
5410 wantdelete = input[i].ad_flags & _DELETE_OK;
5411 for (j = i + 1; (j < desc_actual) && (input[j].ad_name_offset == 0); j++)
5412 if (input[j].ad_flags & _DELETE_OK)
5413 wantdelete = 1;
5414
5415 niopts = FOLLOW | AUDITVNPATH1;
5416
5417 /* need parent for vnode_authorize for deletion test */
5418 if (wantdelete)
5419 niopts |= WANTPARENT;
5420
5421 /* do the lookup */
5422 NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, UIO_SYSSPACE,
5423 CAST_USER_ADDR_T(((const char *)input) + input[i].ad_name_offset),
5424 &context);
5425 error = namei(&nd);
5426 if (!error) {
5427 vp = nd.ni_vp;
5428 if (wantdelete)
5429 dvp = nd.ni_dvp;
5430 }
5431 nameidone(&nd);
5432 }
5433
5434 /*
5435 * Handle lookup errors.
5436 */
5437 switch(error) {
5438 case ENOENT:
5439 case EACCES:
5440 case EPERM:
5441 case ENOTDIR:
5442 result[i] = error;
5443 break;
5444 case 0:
5445 /* run this access check */
5446 result[i] = access1(vp, dvp, input[i].ad_flags, &context);
5447 break;
5448 default:
5449 /* fatal lookup error */
5450
5451 goto out;
5452 }
5453 }
5454
5455 AUDIT_ARG(data, result, sizeof(errno_t), desc_actual);
5456
5457 /* copy out results */
5458 error = copyout(result, uap->results, desc_actual * sizeof(errno_t));
5459
5460out:
5461 if (input && input != stack_input)
5462 FREE(input, M_TEMP);
5463 if (result)
5464 FREE(result, M_TEMP);
5465 if (vp)
5466 vnode_put(vp);
5467 if (dvp)
5468 vnode_put(dvp);
5469 if (IS_VALID_CRED(context.vc_ucred))
5470 kauth_cred_unref(&context.vc_ucred);
5471 return(error);
5472}
5473
5474
5475/*
5476 * Returns: 0 Success
5477 * namei:EFAULT Bad address
5478 * namei:ENAMETOOLONG Filename too long
5479 * namei:ENOENT No such file or directory
5480 * namei:ELOOP Too many levels of symbolic links
5481 * namei:EBADF Bad file descriptor
5482 * namei:ENOTDIR Not a directory
5483 * namei:???
5484 * access1:
5485 */
5486static int
5487faccessat_internal(vfs_context_t ctx, int fd, user_addr_t path, int amode,
5488 int flag, enum uio_seg segflg)
5489{
5490 int error;
5491 struct nameidata nd;
5492 int niopts;
5493 struct vfs_context context;
5494#if NAMEDRSRCFORK
5495 int is_namedstream = 0;
5496#endif
5497
5498 /*
5499 * Unless the AT_EACCESS option is used, Access is defined as checking
5500 * against the process' real identity, even if operations are checking
5501 * the effective identity. So we need to tweak the credential
5502 * in the context for that case.
5503 */
5504 if (!(flag & AT_EACCESS))
5505 context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
5506 else
5507 context.vc_ucred = ctx->vc_ucred;
5508 context.vc_thread = ctx->vc_thread;
5509
5510
5511 niopts = FOLLOW | AUDITVNPATH1;
5512 /* need parent for vnode_authorize for deletion test */
5513 if (amode & _DELETE_OK)
5514 niopts |= WANTPARENT;
5515 NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, segflg,
5516 path, &context);
5517
5518#if NAMEDRSRCFORK
5519 /* access(F_OK) calls are allowed for resource forks. */
5520 if (amode == F_OK)
5521 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
5522#endif
5523 error = nameiat(&nd, fd);
5524 if (error)
5525 goto out;
5526
5527#if NAMEDRSRCFORK
5528 /* Grab reference on the shadow stream file vnode to
5529 * force an inactive on release which will mark it
5530 * for recycle.
5531 */
5532 if (vnode_isnamedstream(nd.ni_vp) &&
5533 (nd.ni_vp->v_parent != NULLVP) &&
5534 vnode_isshadow(nd.ni_vp)) {
5535 is_namedstream = 1;
5536 vnode_ref(nd.ni_vp);
5537 }
5538#endif
5539
5540 error = access1(nd.ni_vp, nd.ni_dvp, amode, &context);
5541
5542#if NAMEDRSRCFORK
5543 if (is_namedstream) {
5544 vnode_rele(nd.ni_vp);
5545 }
5546#endif
5547
5548 vnode_put(nd.ni_vp);
5549 if (amode & _DELETE_OK)
5550 vnode_put(nd.ni_dvp);
5551 nameidone(&nd);
5552
5553out:
5554 if (!(flag & AT_EACCESS))
5555 kauth_cred_unref(&context.vc_ucred);
5556 return (error);
5557}
5558
5559int
5560access(__unused proc_t p, struct access_args *uap, __unused int32_t *retval)
5561{
5562 return (faccessat_internal(vfs_context_current(), AT_FDCWD,
5563 uap->path, uap->flags, 0, UIO_USERSPACE));
5564}
5565
5566int
5567faccessat(__unused proc_t p, struct faccessat_args *uap,
5568 __unused int32_t *retval)
5569{
5570 if (uap->flag & ~AT_EACCESS)
5571 return (EINVAL);
5572
5573 return (faccessat_internal(vfs_context_current(), uap->fd,
5574 uap->path, uap->amode, uap->flag, UIO_USERSPACE));
5575}
5576
5577/*
5578 * Returns: 0 Success
5579 * EFAULT
5580 * copyout:EFAULT
5581 * namei:???
5582 * vn_stat:???
5583 */
5584static int
5585fstatat_internal(vfs_context_t ctx, user_addr_t path, user_addr_t ub,
5586 user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64,
5587 enum uio_seg segflg, int fd, int flag)
5588{
5589 struct nameidata nd;
5590 int follow;
5591 union {
5592 struct stat sb;
5593 struct stat64 sb64;
5594 } source = {};
5595 union {
5596 struct user64_stat user64_sb;
5597 struct user32_stat user32_sb;
5598 struct user64_stat64 user64_sb64;
5599 struct user32_stat64 user32_sb64;
5600 } dest = {};
5601 caddr_t sbp;
5602 int error, my_size;
5603 kauth_filesec_t fsec;
5604 size_t xsecurity_bufsize;
5605 void * statptr;
5606
5607 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
5608 NDINIT(&nd, LOOKUP, OP_GETATTR, follow | AUDITVNPATH1,
5609 segflg, path, ctx);
5610
5611#if NAMEDRSRCFORK
5612 int is_namedstream = 0;
5613 /* stat calls are allowed for resource forks. */
5614 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
5615#endif
5616 error = nameiat(&nd, fd);
5617 if (error)
5618 return (error);
5619 fsec = KAUTH_FILESEC_NONE;
5620
5621 statptr = (void *)&source;
5622
5623#if NAMEDRSRCFORK
5624 /* Grab reference on the shadow stream file vnode to
5625 * force an inactive on release which will mark it
5626 * for recycle.
5627 */
5628 if (vnode_isnamedstream(nd.ni_vp) &&
5629 (nd.ni_vp->v_parent != NULLVP) &&
5630 vnode_isshadow(nd.ni_vp)) {
5631 is_namedstream = 1;
5632 vnode_ref(nd.ni_vp);
5633 }
5634#endif
5635
5636 error = vn_stat(nd.ni_vp, statptr, (xsecurity != USER_ADDR_NULL ? &fsec : NULL), isstat64, ctx);
5637
5638#if NAMEDRSRCFORK
5639 if (is_namedstream) {
5640 vnode_rele(nd.ni_vp);
5641 }
5642#endif
5643 vnode_put(nd.ni_vp);
5644 nameidone(&nd);
5645
5646 if (error)
5647 return (error);
5648 /* Zap spare fields */
5649 if (isstat64 != 0) {
5650 source.sb64.st_lspare = 0;
5651 source.sb64.st_qspare[0] = 0LL;
5652 source.sb64.st_qspare[1] = 0LL;
5653 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
5654 munge_user64_stat64(&source.sb64, &dest.user64_sb64);
5655 my_size = sizeof(dest.user64_sb64);
5656 sbp = (caddr_t)&dest.user64_sb64;
5657 } else {
5658 munge_user32_stat64(&source.sb64, &dest.user32_sb64);
5659 my_size = sizeof(dest.user32_sb64);
5660 sbp = (caddr_t)&dest.user32_sb64;
5661 }
5662 /*
5663 * Check if we raced (post lookup) against the last unlink of a file.
5664 */
5665 if ((source.sb64.st_nlink == 0) && S_ISREG(source.sb64.st_mode)) {
5666 source.sb64.st_nlink = 1;
5667 }
5668 } else {
5669 source.sb.st_lspare = 0;
5670 source.sb.st_qspare[0] = 0LL;
5671 source.sb.st_qspare[1] = 0LL;
5672 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
5673 munge_user64_stat(&source.sb, &dest.user64_sb);
5674 my_size = sizeof(dest.user64_sb);
5675 sbp = (caddr_t)&dest.user64_sb;
5676 } else {
5677 munge_user32_stat(&source.sb, &dest.user32_sb);
5678 my_size = sizeof(dest.user32_sb);
5679 sbp = (caddr_t)&dest.user32_sb;
5680 }
5681
5682 /*
5683 * Check if we raced (post lookup) against the last unlink of a file.
5684 */
5685 if ((source.sb.st_nlink == 0) && S_ISREG(source.sb.st_mode)) {
5686 source.sb.st_nlink = 1;
5687 }
5688 }
5689 if ((error = copyout(sbp, ub, my_size)) != 0)
5690 goto out;
5691
5692 /* caller wants extended security information? */
5693 if (xsecurity != USER_ADDR_NULL) {
5694
5695 /* did we get any? */
5696 if (fsec == KAUTH_FILESEC_NONE) {
5697 if (susize(xsecurity_size, 0) != 0) {
5698 error = EFAULT;
5699 goto out;
5700 }
5701 } else {
5702 /* find the user buffer size */
5703 xsecurity_bufsize = fusize(xsecurity_size);
5704
5705 /* copy out the actual data size */
5706 if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
5707 error = EFAULT;
5708 goto out;
5709 }
5710
5711 /* if the caller supplied enough room, copy out to it */
5712 if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec))
5713 error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
5714 }
5715 }
5716out:
5717 if (fsec != KAUTH_FILESEC_NONE)
5718 kauth_filesec_free(fsec);
5719 return (error);
5720}
5721
5722/*
5723 * stat_extended: Get file status; with extended security (ACL).
5724 *
5725 * Parameters: p (ignored)
5726 * uap User argument descriptor (see below)
5727 * retval (ignored)
5728 *
5729 * Indirect: uap->path Path of file to get status from
5730 * uap->ub User buffer (holds file status info)
5731 * uap->xsecurity ACL to get (extended security)
5732 * uap->xsecurity_size Size of ACL
5733 *
5734 * Returns: 0 Success
5735 * !0 errno value
5736 *
5737 */
5738int
5739stat_extended(__unused proc_t p, struct stat_extended_args *uap,
5740 __unused int32_t *retval)
5741{
5742 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5743 uap->xsecurity, uap->xsecurity_size, 0, UIO_USERSPACE, AT_FDCWD,
5744 0));
5745}
5746
5747/*
5748 * Returns: 0 Success
5749 * fstatat_internal:??? [see fstatat_internal() in this file]
5750 */
5751int
5752stat(__unused proc_t p, struct stat_args *uap, __unused int32_t *retval)
5753{
5754 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5755 0, 0, 0, UIO_USERSPACE, AT_FDCWD, 0));
5756}
5757
5758int
5759stat64(__unused proc_t p, struct stat64_args *uap, __unused int32_t *retval)
5760{
5761 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5762 0, 0, 1, UIO_USERSPACE, AT_FDCWD, 0));
5763}
5764
5765/*
5766 * stat64_extended: Get file status; can handle large inode numbers; with extended security (ACL).
5767 *
5768 * Parameters: p (ignored)
5769 * uap User argument descriptor (see below)
5770 * retval (ignored)
5771 *
5772 * Indirect: uap->path Path of file to get status from
5773 * uap->ub User buffer (holds file status info)
5774 * uap->xsecurity ACL to get (extended security)
5775 * uap->xsecurity_size Size of ACL
5776 *
5777 * Returns: 0 Success
5778 * !0 errno value
5779 *
5780 */
5781int
5782stat64_extended(__unused proc_t p, struct stat64_extended_args *uap, __unused int32_t *retval)
5783{
5784 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5785 uap->xsecurity, uap->xsecurity_size, 1, UIO_USERSPACE, AT_FDCWD,
5786 0));
5787}
5788
5789/*
5790 * lstat_extended: Get file status; does not follow links; with extended security (ACL).
5791 *
5792 * Parameters: p (ignored)
5793 * uap User argument descriptor (see below)
5794 * retval (ignored)
5795 *
5796 * Indirect: uap->path Path of file to get status from
5797 * uap->ub User buffer (holds file status info)
5798 * uap->xsecurity ACL to get (extended security)
5799 * uap->xsecurity_size Size of ACL
5800 *
5801 * Returns: 0 Success
5802 * !0 errno value
5803 *
5804 */
5805int
5806lstat_extended(__unused proc_t p, struct lstat_extended_args *uap, __unused int32_t *retval)
5807{
5808 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5809 uap->xsecurity, uap->xsecurity_size, 0, UIO_USERSPACE, AT_FDCWD,
5810 AT_SYMLINK_NOFOLLOW));
5811}
5812
5813/*
5814 * Get file status; this version does not follow links.
5815 */
5816int
5817lstat(__unused proc_t p, struct lstat_args *uap, __unused int32_t *retval)
5818{
5819 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5820 0, 0, 0, UIO_USERSPACE, AT_FDCWD, AT_SYMLINK_NOFOLLOW));
5821}
5822
5823int
5824lstat64(__unused proc_t p, struct lstat64_args *uap, __unused int32_t *retval)
5825{
5826 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5827 0, 0, 1, UIO_USERSPACE, AT_FDCWD, AT_SYMLINK_NOFOLLOW));
5828}
5829
5830/*
5831 * lstat64_extended: Get file status; can handle large inode numbers; does not
5832 * follow links; with extended security (ACL).
5833 *
5834 * Parameters: p (ignored)
5835 * uap User argument descriptor (see below)
5836 * retval (ignored)
5837 *
5838 * Indirect: uap->path Path of file to get status from
5839 * uap->ub User buffer (holds file status info)
5840 * uap->xsecurity ACL to get (extended security)
5841 * uap->xsecurity_size Size of ACL
5842 *
5843 * Returns: 0 Success
5844 * !0 errno value
5845 *
5846 */
5847int
5848lstat64_extended(__unused proc_t p, struct lstat64_extended_args *uap, __unused int32_t *retval)
5849{
5850 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5851 uap->xsecurity, uap->xsecurity_size, 1, UIO_USERSPACE, AT_FDCWD,
5852 AT_SYMLINK_NOFOLLOW));
5853}
5854
5855int
5856fstatat(__unused proc_t p, struct fstatat_args *uap, __unused int32_t *retval)
5857{
5858 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
5859 return (EINVAL);
5860
5861 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5862 0, 0, 0, UIO_USERSPACE, uap->fd, uap->flag));
5863}
5864
5865int
5866fstatat64(__unused proc_t p, struct fstatat64_args *uap,
5867 __unused int32_t *retval)
5868{
5869 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
5870 return (EINVAL);
5871
5872 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5873 0, 0, 1, UIO_USERSPACE, uap->fd, uap->flag));
5874}
5875
5876/*
5877 * Get configurable pathname variables.
5878 *
5879 * Returns: 0 Success
5880 * namei:???
5881 * vn_pathconf:???
5882 *
5883 * Notes: Global implementation constants are intended to be
5884 * implemented in this function directly; all other constants
5885 * are per-FS implementation, and therefore must be handled in
5886 * each respective FS, instead.
5887 *
5888 * XXX We implement some things globally right now that should actually be
5889 * XXX per-FS; we will need to deal with this at some point.
5890 */
5891/* ARGSUSED */
5892int
5893pathconf(__unused proc_t p, struct pathconf_args *uap, int32_t *retval)
5894{
5895 int error;
5896 struct nameidata nd;
5897 vfs_context_t ctx = vfs_context_current();
5898
5899 NDINIT(&nd, LOOKUP, OP_PATHCONF, FOLLOW | AUDITVNPATH1,
5900 UIO_USERSPACE, uap->path, ctx);
5901 error = namei(&nd);
5902 if (error)
5903 return (error);
5904
5905 error = vn_pathconf(nd.ni_vp, uap->name, retval, ctx);
5906
5907 vnode_put(nd.ni_vp);
5908 nameidone(&nd);
5909 return (error);
5910}
5911
5912/*
5913 * Return target name of a symbolic link.
5914 */
5915/* ARGSUSED */
5916static int
5917readlinkat_internal(vfs_context_t ctx, int fd, user_addr_t path,
5918 enum uio_seg seg, user_addr_t buf, size_t bufsize, enum uio_seg bufseg,
5919 int *retval)
5920{
5921 vnode_t vp;
5922 uio_t auio;
5923 int error;
5924 struct nameidata nd;
5925 char uio_buf[ UIO_SIZEOF(1) ];
5926
5927 NDINIT(&nd, LOOKUP, OP_READLINK, NOFOLLOW | AUDITVNPATH1,
5928 seg, path, ctx);
5929
5930 error = nameiat(&nd, fd);
5931 if (error)
5932 return (error);
5933 vp = nd.ni_vp;
5934
5935 nameidone(&nd);
5936
5937 auio = uio_createwithbuffer(1, 0, bufseg, UIO_READ,
5938 &uio_buf[0], sizeof(uio_buf));
5939 uio_addiov(auio, buf, bufsize);
5940 if (vp->v_type != VLNK) {
5941 error = EINVAL;
5942 } else {
5943#if CONFIG_MACF
5944 error = mac_vnode_check_readlink(ctx, vp);
5945#endif
5946 if (error == 0)
5947 error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA,
5948 ctx);
5949 if (error == 0)
5950 error = VNOP_READLINK(vp, auio, ctx);
5951 }
5952 vnode_put(vp);
5953
5954 *retval = bufsize - (int)uio_resid(auio);
5955 return (error);
5956}
5957
5958int
5959readlink(proc_t p, struct readlink_args *uap, int32_t *retval)
5960{
5961 enum uio_seg procseg;
5962
5963 procseg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
5964 return (readlinkat_internal(vfs_context_current(), AT_FDCWD,
5965 CAST_USER_ADDR_T(uap->path), procseg, CAST_USER_ADDR_T(uap->buf),
5966 uap->count, procseg, retval));
5967}
5968
5969int
5970readlinkat(proc_t p, struct readlinkat_args *uap, int32_t *retval)
5971{
5972 enum uio_seg procseg;
5973
5974 procseg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
5975 return (readlinkat_internal(vfs_context_current(), uap->fd, uap->path,
5976 procseg, uap->buf, uap->bufsize, procseg, retval));
5977}
5978
5979/*
5980 * Change file flags.
5981 *
5982 * NOTE: this will vnode_put() `vp'
5983 */
5984static int
5985chflags1(vnode_t vp, int flags, vfs_context_t ctx)
5986{
5987 struct vnode_attr va;
5988 kauth_action_t action;
5989 int error;
5990
5991 VATTR_INIT(&va);
5992 VATTR_SET(&va, va_flags, flags);
5993
5994#if CONFIG_MACF
5995 error = mac_vnode_check_setflags(ctx, vp, flags);
5996 if (error)
5997 goto out;
5998#endif
5999
6000 /* request authorisation, disregard immutability */
6001 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
6002 goto out;
6003 /*
6004 * Request that the auth layer disregard those file flags it's allowed to when
6005 * authorizing this operation; we need to do this in order to be able to
6006 * clear immutable flags.
6007 */
6008 if (action && ((error = vnode_authorize(vp, NULL, action | KAUTH_VNODE_NOIMMUTABLE, ctx)) != 0))
6009 goto out;
6010 error = vnode_setattr(vp, &va, ctx);
6011
6012#if CONFIG_MACF
6013 if (error == 0)
6014 mac_vnode_notify_setflags(ctx, vp, flags);
6015#endif
6016
6017 if ((error == 0) && !VATTR_IS_SUPPORTED(&va, va_flags)) {
6018 error = ENOTSUP;
6019 }
6020out:
6021 vnode_put(vp);
6022 return(error);
6023}
6024
6025/*
6026 * Change flags of a file given a path name.
6027 */
6028/* ARGSUSED */
6029int
6030chflags(__unused proc_t p, struct chflags_args *uap, __unused int32_t *retval)
6031{
6032 vnode_t vp;
6033 vfs_context_t ctx = vfs_context_current();
6034 int error;
6035 struct nameidata nd;
6036
6037 AUDIT_ARG(fflags, uap->flags);
6038 NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
6039 UIO_USERSPACE, uap->path, ctx);
6040 error = namei(&nd);
6041 if (error)
6042 return (error);
6043 vp = nd.ni_vp;
6044 nameidone(&nd);
6045
6046 /* we don't vnode_put() here because chflags1 does internally */
6047 error = chflags1(vp, uap->flags, ctx);
6048
6049 return(error);
6050}
6051
6052/*
6053 * Change flags of a file given a file descriptor.
6054 */
6055/* ARGSUSED */
6056int
6057fchflags(__unused proc_t p, struct fchflags_args *uap, __unused int32_t *retval)
6058{
6059 vnode_t vp;
6060 int error;
6061
6062 AUDIT_ARG(fd, uap->fd);
6063 AUDIT_ARG(fflags, uap->flags);
6064 if ( (error = file_vnode(uap->fd, &vp)) )
6065 return (error);
6066
6067 if ((error = vnode_getwithref(vp))) {
6068 file_drop(uap->fd);
6069 return(error);
6070 }
6071
6072 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6073
6074 /* we don't vnode_put() here because chflags1 does internally */
6075 error = chflags1(vp, uap->flags, vfs_context_current());
6076
6077 file_drop(uap->fd);
6078 return (error);
6079}
6080
6081/*
6082 * Change security information on a filesystem object.
6083 *
6084 * Returns: 0 Success
6085 * EPERM Operation not permitted
6086 * vnode_authattr:??? [anything vnode_authattr can return]
6087 * vnode_authorize:??? [anything vnode_authorize can return]
6088 * vnode_setattr:??? [anything vnode_setattr can return]
6089 *
6090 * Notes: If vnode_authattr or vnode_authorize return EACCES, it will be
6091 * translated to EPERM before being returned.
6092 */
6093static int
6094chmod_vnode(vfs_context_t ctx, vnode_t vp, struct vnode_attr *vap)
6095{
6096 kauth_action_t action;
6097 int error;
6098
6099 AUDIT_ARG(mode, vap->va_mode);
6100 /* XXX audit new args */
6101
6102#if NAMEDSTREAMS
6103 /* chmod calls are not allowed for resource forks. */
6104 if (vp->v_flag & VISNAMEDSTREAM) {
6105 return (EPERM);
6106 }
6107#endif
6108
6109#if CONFIG_MACF
6110 if (VATTR_IS_ACTIVE(vap, va_mode) &&
6111 (error = mac_vnode_check_setmode(ctx, vp, (mode_t)vap->va_mode)) != 0)
6112 return (error);
6113
6114 if (VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid)) {
6115 if ((error = mac_vnode_check_setowner(ctx, vp,
6116 VATTR_IS_ACTIVE(vap, va_uid) ? vap->va_uid : -1,
6117 VATTR_IS_ACTIVE(vap, va_gid) ? vap->va_gid : -1)))
6118 return (error);
6119 }
6120
6121 if (VATTR_IS_ACTIVE(vap, va_acl) &&
6122 (error = mac_vnode_check_setacl(ctx, vp, vap->va_acl)))
6123 return (error);
6124#endif
6125
6126 /* make sure that the caller is allowed to set this security information */
6127 if (((error = vnode_authattr(vp, vap, &action, ctx)) != 0) ||
6128 ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
6129 if (error == EACCES)
6130 error = EPERM;
6131 return(error);
6132 }
6133
6134 if ((error = vnode_setattr(vp, vap, ctx)) != 0)
6135 return (error);
6136
6137#if CONFIG_MACF
6138 if (VATTR_IS_ACTIVE(vap, va_mode))
6139 mac_vnode_notify_setmode(ctx, vp, (mode_t)vap->va_mode);
6140
6141 if (VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid))
6142 mac_vnode_notify_setowner(ctx, vp,
6143 VATTR_IS_ACTIVE(vap, va_uid) ? vap->va_uid : -1,
6144 VATTR_IS_ACTIVE(vap, va_gid) ? vap->va_gid : -1);
6145
6146 if (VATTR_IS_ACTIVE(vap, va_acl))
6147 mac_vnode_notify_setacl(ctx, vp, vap->va_acl);
6148#endif
6149
6150 return (error);
6151}
6152
6153
6154/*
6155 * Change mode of a file given a path name.
6156 *
6157 * Returns: 0 Success
6158 * namei:??? [anything namei can return]
6159 * chmod_vnode:??? [anything chmod_vnode can return]
6160 */
6161static int
6162chmodat(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap,
6163 int fd, int flag, enum uio_seg segflg)
6164{
6165 struct nameidata nd;
6166 int follow, error;
6167
6168 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
6169 NDINIT(&nd, LOOKUP, OP_SETATTR, follow | AUDITVNPATH1,
6170 segflg, path, ctx);
6171 if ((error = nameiat(&nd, fd)))
6172 return (error);
6173 error = chmod_vnode(ctx, nd.ni_vp, vap);
6174 vnode_put(nd.ni_vp);
6175 nameidone(&nd);
6176 return(error);
6177}
6178
6179/*
6180 * chmod_extended: Change the mode of a file given a path name; with extended
6181 * argument list (including extended security (ACL)).
6182 *
6183 * Parameters: p Process requesting the open
6184 * uap User argument descriptor (see below)
6185 * retval (ignored)
6186 *
6187 * Indirect: uap->path Path to object (same as 'chmod')
6188 * uap->uid UID to set
6189 * uap->gid GID to set
6190 * uap->mode File mode to set (same as 'chmod')
6191 * uap->xsecurity ACL to set (or delete)
6192 *
6193 * Returns: 0 Success
6194 * !0 errno value
6195 *
6196 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
6197 *
6198 * XXX: We should enummerate the possible errno values here, and where
6199 * in the code they originated.
6200 */
6201int
6202chmod_extended(__unused proc_t p, struct chmod_extended_args *uap, __unused int32_t *retval)
6203{
6204 int error;
6205 struct vnode_attr va;
6206 kauth_filesec_t xsecdst;
6207
6208 AUDIT_ARG(owner, uap->uid, uap->gid);
6209
6210 VATTR_INIT(&va);
6211 if (uap->mode != -1)
6212 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
6213 if (uap->uid != KAUTH_UID_NONE)
6214 VATTR_SET(&va, va_uid, uap->uid);
6215 if (uap->gid != KAUTH_GID_NONE)
6216 VATTR_SET(&va, va_gid, uap->gid);
6217
6218 xsecdst = NULL;
6219 switch(uap->xsecurity) {
6220 /* explicit remove request */
6221 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
6222 VATTR_SET(&va, va_acl, NULL);
6223 break;
6224 /* not being set */
6225 case USER_ADDR_NULL:
6226 break;
6227 default:
6228 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
6229 return(error);
6230 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
6231 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va.va_acl->acl_entrycount);
6232 }
6233
6234 error = chmodat(vfs_context_current(), uap->path, &va, AT_FDCWD, 0,
6235 UIO_USERSPACE);
6236
6237 if (xsecdst != NULL)
6238 kauth_filesec_free(xsecdst);
6239 return(error);
6240}
6241
6242/*
6243 * Returns: 0 Success
6244 * chmodat:??? [anything chmodat can return]
6245 */
6246static int
6247fchmodat_internal(vfs_context_t ctx, user_addr_t path, int mode, int fd,
6248 int flag, enum uio_seg segflg)
6249{
6250 struct vnode_attr va;
6251
6252 VATTR_INIT(&va);
6253 VATTR_SET(&va, va_mode, mode & ALLPERMS);
6254
6255 return (chmodat(ctx, path, &va, fd, flag, segflg));
6256}
6257
6258int
6259chmod(__unused proc_t p, struct chmod_args *uap, __unused int32_t *retval)
6260{
6261 return (fchmodat_internal(vfs_context_current(), uap->path, uap->mode,
6262 AT_FDCWD, 0, UIO_USERSPACE));
6263}
6264
6265int
6266fchmodat(__unused proc_t p, struct fchmodat_args *uap, __unused int32_t *retval)
6267{
6268 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
6269 return (EINVAL);
6270
6271 return (fchmodat_internal(vfs_context_current(), uap->path, uap->mode,
6272 uap->fd, uap->flag, UIO_USERSPACE));
6273}
6274
6275/*
6276 * Change mode of a file given a file descriptor.
6277 */
6278static int
6279fchmod1(__unused proc_t p, int fd, struct vnode_attr *vap)
6280{
6281 vnode_t vp;
6282 int error;
6283
6284 AUDIT_ARG(fd, fd);
6285
6286 if ((error = file_vnode(fd, &vp)) != 0)
6287 return (error);
6288 if ((error = vnode_getwithref(vp)) != 0) {
6289 file_drop(fd);
6290 return(error);
6291 }
6292 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6293
6294 error = chmod_vnode(vfs_context_current(), vp, vap);
6295 (void)vnode_put(vp);
6296 file_drop(fd);
6297
6298 return (error);
6299}
6300
6301/*
6302 * fchmod_extended: Change mode of a file given a file descriptor; with
6303 * extended argument list (including extended security (ACL)).
6304 *
6305 * Parameters: p Process requesting to change file mode
6306 * uap User argument descriptor (see below)
6307 * retval (ignored)
6308 *
6309 * Indirect: uap->mode File mode to set (same as 'chmod')
6310 * uap->uid UID to set
6311 * uap->gid GID to set
6312 * uap->xsecurity ACL to set (or delete)
6313 * uap->fd File descriptor of file to change mode
6314 *
6315 * Returns: 0 Success
6316 * !0 errno value
6317 *
6318 */
6319int
6320fchmod_extended(proc_t p, struct fchmod_extended_args *uap, __unused int32_t *retval)
6321{
6322 int error;
6323 struct vnode_attr va;
6324 kauth_filesec_t xsecdst;
6325
6326 AUDIT_ARG(owner, uap->uid, uap->gid);
6327
6328 VATTR_INIT(&va);
6329 if (uap->mode != -1)
6330 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
6331 if (uap->uid != KAUTH_UID_NONE)
6332 VATTR_SET(&va, va_uid, uap->uid);
6333 if (uap->gid != KAUTH_GID_NONE)
6334 VATTR_SET(&va, va_gid, uap->gid);
6335
6336 xsecdst = NULL;
6337 switch(uap->xsecurity) {
6338 case USER_ADDR_NULL:
6339 VATTR_SET(&va, va_acl, NULL);
6340 break;
6341 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
6342 VATTR_SET(&va, va_acl, NULL);
6343 break;
6344 /* not being set */
6345 case CAST_USER_ADDR_T(-1):
6346 break;
6347 default:
6348 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
6349 return(error);
6350 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
6351 }
6352
6353 error = fchmod1(p, uap->fd, &va);
6354
6355
6356 switch(uap->xsecurity) {
6357 case USER_ADDR_NULL:
6358 case CAST_USER_ADDR_T(-1):
6359 break;
6360 default:
6361 if (xsecdst != NULL)
6362 kauth_filesec_free(xsecdst);
6363 }
6364 return(error);
6365}
6366
6367int
6368fchmod(proc_t p, struct fchmod_args *uap, __unused int32_t *retval)
6369{
6370 struct vnode_attr va;
6371
6372 VATTR_INIT(&va);
6373 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
6374
6375 return(fchmod1(p, uap->fd, &va));
6376}
6377
6378
6379/*
6380 * Set ownership given a path name.
6381 */
6382/* ARGSUSED */
6383static int
6384fchownat_internal(vfs_context_t ctx, int fd, user_addr_t path, uid_t uid,
6385 gid_t gid, int flag, enum uio_seg segflg)
6386{
6387 vnode_t vp;
6388 struct vnode_attr va;
6389 int error;
6390 struct nameidata nd;
6391 int follow;
6392 kauth_action_t action;
6393
6394 AUDIT_ARG(owner, uid, gid);
6395
6396 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
6397 NDINIT(&nd, LOOKUP, OP_SETATTR, follow | AUDITVNPATH1, segflg,
6398 path, ctx);
6399 error = nameiat(&nd, fd);
6400 if (error)
6401 return (error);
6402 vp = nd.ni_vp;
6403
6404 nameidone(&nd);
6405
6406 VATTR_INIT(&va);
6407 if (uid != (uid_t)VNOVAL)
6408 VATTR_SET(&va, va_uid, uid);
6409 if (gid != (gid_t)VNOVAL)
6410 VATTR_SET(&va, va_gid, gid);
6411
6412#if CONFIG_MACF
6413 error = mac_vnode_check_setowner(ctx, vp, uid, gid);
6414 if (error)
6415 goto out;
6416#endif
6417
6418 /* preflight and authorize attribute changes */
6419 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
6420 goto out;
6421 if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
6422 goto out;
6423 error = vnode_setattr(vp, &va, ctx);
6424
6425#if CONFIG_MACF
6426 if (error == 0)
6427 mac_vnode_notify_setowner(ctx, vp, uid, gid);
6428#endif
6429
6430out:
6431 /*
6432 * EACCES is only allowed from namei(); permissions failure should
6433 * return EPERM, so we need to translate the error code.
6434 */
6435 if (error == EACCES)
6436 error = EPERM;
6437
6438 vnode_put(vp);
6439 return (error);
6440}
6441
6442int
6443chown(__unused proc_t p, struct chown_args *uap, __unused int32_t *retval)
6444{
6445 return (fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path,
6446 uap->uid, uap->gid, 0, UIO_USERSPACE));
6447}
6448
6449int
6450lchown(__unused proc_t p, struct lchown_args *uap, __unused int32_t *retval)
6451{
6452 return (fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path,
6453 uap->owner, uap->group, AT_SYMLINK_NOFOLLOW, UIO_USERSPACE));
6454}
6455
6456int
6457fchownat(__unused proc_t p, struct fchownat_args *uap, __unused int32_t *retval)
6458{
6459 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
6460 return (EINVAL);
6461
6462 return (fchownat_internal(vfs_context_current(), uap->fd, uap->path,
6463 uap->uid, uap->gid, uap->flag, UIO_USERSPACE));
6464}
6465
6466/*
6467 * Set ownership given a file descriptor.
6468 */
6469/* ARGSUSED */
6470int
6471fchown(__unused proc_t p, struct fchown_args *uap, __unused int32_t *retval)
6472{
6473 struct vnode_attr va;
6474 vfs_context_t ctx = vfs_context_current();
6475 vnode_t vp;
6476 int error;
6477 kauth_action_t action;
6478
6479 AUDIT_ARG(owner, uap->uid, uap->gid);
6480 AUDIT_ARG(fd, uap->fd);
6481
6482 if ( (error = file_vnode(uap->fd, &vp)) )
6483 return (error);
6484
6485 if ( (error = vnode_getwithref(vp)) ) {
6486 file_drop(uap->fd);
6487 return(error);
6488 }
6489 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6490
6491 VATTR_INIT(&va);
6492 if (uap->uid != VNOVAL)
6493 VATTR_SET(&va, va_uid, uap->uid);
6494 if (uap->gid != VNOVAL)
6495 VATTR_SET(&va, va_gid, uap->gid);
6496
6497#if NAMEDSTREAMS
6498 /* chown calls are not allowed for resource forks. */
6499 if (vp->v_flag & VISNAMEDSTREAM) {
6500 error = EPERM;
6501 goto out;
6502 }
6503#endif
6504
6505#if CONFIG_MACF
6506 error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
6507 if (error)
6508 goto out;
6509#endif
6510
6511 /* preflight and authorize attribute changes */
6512 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
6513 goto out;
6514 if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
6515 if (error == EACCES)
6516 error = EPERM;
6517 goto out;
6518 }
6519 error = vnode_setattr(vp, &va, ctx);
6520
6521#if CONFIG_MACF
6522 if (error == 0)
6523 mac_vnode_notify_setowner(ctx, vp, uap->uid, uap->gid);
6524#endif
6525
6526out:
6527 (void)vnode_put(vp);
6528 file_drop(uap->fd);
6529 return (error);
6530}
6531
6532static int
6533getutimes(user_addr_t usrtvp, struct timespec *tsp)
6534{
6535 int error;
6536
6537 if (usrtvp == USER_ADDR_NULL) {
6538 struct timeval old_tv;
6539 /* XXX Y2038 bug because of microtime argument */
6540 microtime(&old_tv);
6541 TIMEVAL_TO_TIMESPEC(&old_tv, &tsp[0]);
6542 tsp[1] = tsp[0];
6543 } else {
6544 if (IS_64BIT_PROCESS(current_proc())) {
6545 struct user64_timeval tv[2];
6546 error = copyin(usrtvp, (void *)tv, sizeof(tv));
6547 if (error)
6548 return (error);
6549 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
6550 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
6551 } else {
6552 struct user32_timeval tv[2];
6553 error = copyin(usrtvp, (void *)tv, sizeof(tv));
6554 if (error)
6555 return (error);
6556 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
6557 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
6558 }
6559 }
6560 return 0;
6561}
6562
6563static int
6564setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts,
6565 int nullflag)
6566{
6567 int error;
6568 struct vnode_attr va;
6569 kauth_action_t action;
6570
6571 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6572
6573 VATTR_INIT(&va);
6574 VATTR_SET(&va, va_access_time, ts[0]);
6575 VATTR_SET(&va, va_modify_time, ts[1]);
6576 if (nullflag)
6577 va.va_vaflags |= VA_UTIMES_NULL;
6578
6579#if NAMEDSTREAMS
6580 /* utimes calls are not allowed for resource forks. */
6581 if (vp->v_flag & VISNAMEDSTREAM) {
6582 error = EPERM;
6583 goto out;
6584 }
6585#endif
6586
6587#if CONFIG_MACF
6588 error = mac_vnode_check_setutimes(ctx, vp, ts[0], ts[1]);
6589 if (error)
6590 goto out;
6591#endif
6592 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
6593 if (!nullflag && error == EACCES)
6594 error = EPERM;
6595 goto out;
6596 }
6597
6598 /* since we may not need to auth anything, check here */
6599 if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
6600 if (!nullflag && error == EACCES)
6601 error = EPERM;
6602 goto out;
6603 }
6604 error = vnode_setattr(vp, &va, ctx);
6605
6606#if CONFIG_MACF
6607 if (error == 0)
6608 mac_vnode_notify_setutimes(ctx, vp, ts[0], ts[1]);
6609#endif
6610
6611out:
6612 return error;
6613}
6614
6615/*
6616 * Set the access and modification times of a file.
6617 */
6618/* ARGSUSED */
6619int
6620utimes(__unused proc_t p, struct utimes_args *uap, __unused int32_t *retval)
6621{
6622 struct timespec ts[2];
6623 user_addr_t usrtvp;
6624 int error;
6625 struct nameidata nd;
6626 vfs_context_t ctx = vfs_context_current();
6627
6628 /*
6629 * AUDIT: Needed to change the order of operations to do the
6630 * name lookup first because auditing wants the path.
6631 */
6632 NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
6633 UIO_USERSPACE, uap->path, ctx);
6634 error = namei(&nd);
6635 if (error)
6636 return (error);
6637 nameidone(&nd);
6638
6639 /*
6640 * Fetch the user-supplied time. If usrtvp is USER_ADDR_NULL, we fetch
6641 * the current time instead.
6642 */
6643 usrtvp = uap->tptr;
6644 if ((error = getutimes(usrtvp, ts)) != 0)
6645 goto out;
6646
6647 error = setutimes(ctx, nd.ni_vp, ts, usrtvp == USER_ADDR_NULL);
6648
6649out:
6650 vnode_put(nd.ni_vp);
6651 return (error);
6652}
6653
6654/*
6655 * Set the access and modification times of a file.
6656 */
6657/* ARGSUSED */
6658int
6659futimes(__unused proc_t p, struct futimes_args *uap, __unused int32_t *retval)
6660{
6661 struct timespec ts[2];
6662 vnode_t vp;
6663 user_addr_t usrtvp;
6664 int error;
6665
6666 AUDIT_ARG(fd, uap->fd);
6667 usrtvp = uap->tptr;
6668 if ((error = getutimes(usrtvp, ts)) != 0)
6669 return (error);
6670 if ((error = file_vnode(uap->fd, &vp)) != 0)
6671 return (error);
6672 if((error = vnode_getwithref(vp))) {
6673 file_drop(uap->fd);
6674 return(error);
6675 }
6676
6677 error = setutimes(vfs_context_current(), vp, ts, usrtvp == 0);
6678 vnode_put(vp);
6679 file_drop(uap->fd);
6680 return(error);
6681}
6682
6683/*
6684 * Truncate a file given its path name.
6685 */
6686/* ARGSUSED */
6687int
6688truncate(__unused proc_t p, struct truncate_args *uap, __unused int32_t *retval)
6689{
6690 vnode_t vp;
6691 struct vnode_attr va;
6692 vfs_context_t ctx = vfs_context_current();
6693 int error;
6694 struct nameidata nd;
6695 kauth_action_t action;
6696
6697 if (uap->length < 0)
6698 return(EINVAL);
6699 NDINIT(&nd, LOOKUP, OP_TRUNCATE, FOLLOW | AUDITVNPATH1,
6700 UIO_USERSPACE, uap->path, ctx);
6701 if ((error = namei(&nd)))
6702 return (error);
6703 vp = nd.ni_vp;
6704
6705 nameidone(&nd);
6706
6707 VATTR_INIT(&va);
6708 VATTR_SET(&va, va_data_size, uap->length);
6709
6710#if CONFIG_MACF
6711 error = mac_vnode_check_truncate(ctx, NOCRED, vp);
6712 if (error)
6713 goto out;
6714#endif
6715
6716 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
6717 goto out;
6718 if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
6719 goto out;
6720 error = vnode_setattr(vp, &va, ctx);
6721
6722#if CONFIG_MACF
6723 if (error == 0)
6724 mac_vnode_notify_truncate(ctx, NOCRED, vp);
6725#endif
6726
6727out:
6728 vnode_put(vp);
6729 return (error);
6730}
6731
6732/*
6733 * Truncate a file given a file descriptor.
6734 */
6735/* ARGSUSED */
6736int
6737ftruncate(proc_t p, struct ftruncate_args *uap, int32_t *retval)
6738{
6739 vfs_context_t ctx = vfs_context_current();
6740 struct vnode_attr va;
6741 vnode_t vp;
6742 struct fileproc *fp;
6743 int error ;
6744 int fd = uap->fd;
6745
6746 AUDIT_ARG(fd, uap->fd);
6747 if (uap->length < 0)
6748 return(EINVAL);
6749
6750 if ( (error = fp_lookup(p,fd,&fp,0)) ) {
6751 return(error);
6752 }
6753
6754 switch (FILEGLOB_DTYPE(fp->f_fglob)) {
6755 case DTYPE_PSXSHM:
6756 error = pshm_truncate(p, fp, uap->fd, uap->length, retval);
6757 goto out;
6758 case DTYPE_VNODE:
6759 break;
6760 default:
6761 error = EINVAL;
6762 goto out;
6763 }
6764
6765 vp = (vnode_t)fp->f_fglob->fg_data;
6766
6767 if ((fp->f_fglob->fg_flag & FWRITE) == 0) {
6768 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
6769 error = EINVAL;
6770 goto out;
6771 }
6772
6773 if ((error = vnode_getwithref(vp)) != 0) {
6774 goto out;
6775 }
6776
6777 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6778
6779#if CONFIG_MACF
6780 error = mac_vnode_check_truncate(ctx,
6781 fp->f_fglob->fg_cred, vp);
6782 if (error) {
6783 (void)vnode_put(vp);
6784 goto out;
6785 }
6786#endif
6787 VATTR_INIT(&va);
6788 VATTR_SET(&va, va_data_size, uap->length);
6789 error = vnode_setattr(vp, &va, ctx);
6790
6791#if CONFIG_MACF
6792 if (error == 0)
6793 mac_vnode_notify_truncate(ctx, fp->f_fglob->fg_cred, vp);
6794#endif
6795
6796 (void)vnode_put(vp);
6797out:
6798 file_drop(fd);
6799 return (error);
6800}
6801
6802
6803/*
6804 * Sync an open file with synchronized I/O _file_ integrity completion
6805 */
6806/* ARGSUSED */
6807int
6808fsync(proc_t p, struct fsync_args *uap, __unused int32_t *retval)
6809{
6810 __pthread_testcancel(1);
6811 return(fsync_common(p, uap, MNT_WAIT));
6812}
6813
6814
6815/*
6816 * Sync an open file with synchronized I/O _file_ integrity completion
6817 *
6818 * Notes: This is a legacy support function that does not test for
6819 * thread cancellation points.
6820 */
6821/* ARGSUSED */
6822int
6823fsync_nocancel(proc_t p, struct fsync_nocancel_args *uap, __unused int32_t *retval)
6824{
6825 return(fsync_common(p, (struct fsync_args *)uap, MNT_WAIT));
6826}
6827
6828
6829/*
6830 * Sync an open file with synchronized I/O _data_ integrity completion
6831 */
6832/* ARGSUSED */
6833int
6834fdatasync(proc_t p, struct fdatasync_args *uap, __unused int32_t *retval)
6835{
6836 __pthread_testcancel(1);
6837 return(fsync_common(p, (struct fsync_args *)uap, MNT_DWAIT));
6838}
6839
6840
6841/*
6842 * fsync_common
6843 *
6844 * Common fsync code to support both synchronized I/O file integrity completion
6845 * (normal fsync) and synchronized I/O data integrity completion (fdatasync).
6846 *
6847 * If 'flags' is MNT_DWAIT, the caller is requesting data integrity, which
6848 * will only guarantee that the file data contents are retrievable. If
6849 * 'flags' is MNT_WAIT, the caller is rewuesting file integrity, which also
6850 * includes additional metadata unnecessary for retrieving the file data
6851 * contents, such as atime, mtime, ctime, etc., also be committed to stable
6852 * storage.
6853 *
6854 * Parameters: p The process
6855 * uap->fd The descriptor to synchronize
6856 * flags The data integrity flags
6857 *
6858 * Returns: int Success
6859 * fp_getfvp:EBADF Bad file descriptor
6860 * fp_getfvp:ENOTSUP fd does not refer to a vnode
6861 * VNOP_FSYNC:??? unspecified
6862 *
6863 * Notes: We use struct fsync_args because it is a short name, and all
6864 * caller argument structures are otherwise identical.
6865 */
6866static int
6867fsync_common(proc_t p, struct fsync_args *uap, int flags)
6868{
6869 vnode_t vp;
6870 struct fileproc *fp;
6871 vfs_context_t ctx = vfs_context_current();
6872 int error;
6873
6874 AUDIT_ARG(fd, uap->fd);
6875
6876 if ( (error = fp_getfvp(p, uap->fd, &fp, &vp)) )
6877 return (error);
6878 if ( (error = vnode_getwithref(vp)) ) {
6879 file_drop(uap->fd);
6880 return(error);
6881 }
6882
6883 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6884
6885 error = VNOP_FSYNC(vp, flags, ctx);
6886
6887#if NAMEDRSRCFORK
6888 /* Sync resource fork shadow file if necessary. */
6889 if ((error == 0) &&
6890 (vp->v_flag & VISNAMEDSTREAM) &&
6891 (vp->v_parent != NULLVP) &&
6892 vnode_isshadow(vp) &&
6893 (fp->f_flags & FP_WRITTEN)) {
6894 (void) vnode_flushnamedstream(vp->v_parent, vp, ctx);
6895 }
6896#endif
6897
6898 (void)vnode_put(vp);
6899 file_drop(uap->fd);
6900 return (error);
6901}
6902
6903/*
6904 * Duplicate files. Source must be a file, target must be a file or
6905 * must not exist.
6906 *
6907 * XXX Copyfile authorisation checking is woefully inadequate, and will not
6908 * perform inheritance correctly.
6909 */
6910/* ARGSUSED */
6911int
6912copyfile(__unused proc_t p, struct copyfile_args *uap, __unused int32_t *retval)
6913{
6914 vnode_t tvp, fvp, tdvp, sdvp;
6915 struct nameidata fromnd, tond;
6916 int error;
6917 vfs_context_t ctx = vfs_context_current();
6918#if CONFIG_MACF
6919 struct filedesc *fdp = (vfs_context_proc(ctx))->p_fd;
6920 struct vnode_attr va;
6921#endif
6922
6923 /* Check that the flags are valid. */
6924
6925 if (uap->flags & ~CPF_MASK) {
6926 return(EINVAL);
6927 }
6928
6929 NDINIT(&fromnd, LOOKUP, OP_COPYFILE, AUDITVNPATH1,
6930 UIO_USERSPACE, uap->from, ctx);
6931 if ((error = namei(&fromnd)))
6932 return (error);
6933 fvp = fromnd.ni_vp;
6934
6935 NDINIT(&tond, CREATE, OP_LINK,
6936 LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNPATH2 | CN_NBMOUNTLOOK,
6937 UIO_USERSPACE, uap->to, ctx);
6938 if ((error = namei(&tond))) {
6939 goto out1;
6940 }
6941 tdvp = tond.ni_dvp;
6942 tvp = tond.ni_vp;
6943
6944 if (tvp != NULL) {
6945 if (!(uap->flags & CPF_OVERWRITE)) {
6946 error = EEXIST;
6947 goto out;
6948 }
6949 }
6950
6951 if (fvp->v_type == VDIR || (tvp && tvp->v_type == VDIR)) {
6952 error = EISDIR;
6953 goto out;
6954 }
6955
6956 /* This calls existing MAC hooks for open */
6957 if ((error = vn_authorize_open_existing(fvp, &fromnd.ni_cnd, FREAD, ctx,
6958 NULL))) {
6959 goto out;
6960 }
6961
6962 if (tvp) {
6963 /*
6964 * See unlinkat_internal for an explanation of the potential
6965 * ENOENT from the MAC hook but the gist is that the MAC hook
6966 * can fail because vn_getpath isn't able to return the full
6967 * path. We choose to ignore this failure.
6968 */
6969 error = vn_authorize_unlink(tdvp, tvp, &tond.ni_cnd, ctx, NULL);
6970 if (error && error != ENOENT)
6971 goto out;
6972 error = 0;
6973 }
6974
6975#if CONFIG_MACF
6976 VATTR_INIT(&va);
6977 VATTR_SET(&va, va_type, fvp->v_type);
6978 /* Mask off all but regular access permissions */
6979 VATTR_SET(&va, va_mode,
6980 ((((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT) & ACCESSPERMS));
6981 error = mac_vnode_check_create(ctx, tdvp, &tond.ni_cnd, &va);
6982 if (error)
6983 goto out;
6984#endif /* CONFIG_MACF */
6985
6986 if ((error = vnode_authorize(tdvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
6987 goto out;
6988
6989 if (fvp == tdvp)
6990 error = EINVAL;
6991 /*
6992 * If source is the same as the destination (that is the
6993 * same inode number) then there is nothing to do.
6994 * (fixed to have POSIX semantics - CSM 3/2/98)
6995 */
6996 if (fvp == tvp)
6997 error = -1;
6998 if (!error)
6999 error = VNOP_COPYFILE(fvp, tdvp, tvp, &tond.ni_cnd, uap->mode, uap->flags, ctx);
7000out:
7001 sdvp = tond.ni_startdir;
7002 /*
7003 * nameidone has to happen before we vnode_put(tdvp)
7004 * since it may need to release the fs_nodelock on the tdvp
7005 */
7006 nameidone(&tond);
7007
7008 if (tvp)
7009 vnode_put(tvp);
7010 vnode_put(tdvp);
7011 vnode_put(sdvp);
7012out1:
7013 vnode_put(fvp);
7014
7015 nameidone(&fromnd);
7016
7017 if (error == -1)
7018 return (0);
7019 return (error);
7020}
7021
7022#define CLONE_SNAPSHOT_FALLBACKS_ENABLED 1
7023
7024/*
7025 * Helper function for doing clones. The caller is expected to provide an
7026 * iocounted source vnode and release it.
7027 */
7028static int
7029clonefile_internal(vnode_t fvp, boolean_t data_read_authorised, int dst_dirfd,
7030 user_addr_t dst, uint32_t flags, vfs_context_t ctx)
7031{
7032 vnode_t tvp, tdvp;
7033 struct nameidata tond;
7034 int error;
7035 int follow;
7036 boolean_t free_src_acl;
7037 boolean_t attr_cleanup;
7038 enum vtype v_type;
7039 kauth_action_t action;
7040 struct componentname *cnp;
7041 uint32_t defaulted;
7042 struct vnode_attr va;
7043 struct vnode_attr nva;
7044 uint32_t vnop_flags;
7045
7046 v_type = vnode_vtype(fvp);
7047 switch (v_type) {
7048 case VLNK:
7049 /* FALLTHRU */
7050 case VREG:
7051 action = KAUTH_VNODE_ADD_FILE;
7052 break;
7053 case VDIR:
7054 if (vnode_isvroot(fvp) || vnode_ismount(fvp) ||
7055 fvp->v_mountedhere) {
7056 return (EINVAL);
7057 }
7058 action = KAUTH_VNODE_ADD_SUBDIRECTORY;
7059 break;
7060 default:
7061 return (EINVAL);
7062 }
7063
7064 AUDIT_ARG(fd2, dst_dirfd);
7065 AUDIT_ARG(value32, flags);
7066
7067 follow = (flags & CLONE_NOFOLLOW) ? NOFOLLOW : FOLLOW;
7068 NDINIT(&tond, CREATE, OP_LINK, follow | WANTPARENT | AUDITVNPATH2,
7069 UIO_USERSPACE, dst, ctx);
7070 if ((error = nameiat(&tond, dst_dirfd)))
7071 return (error);
7072 cnp = &tond.ni_cnd;
7073 tdvp = tond.ni_dvp;
7074 tvp = tond.ni_vp;
7075
7076 free_src_acl = FALSE;
7077 attr_cleanup = FALSE;
7078
7079 if (tvp != NULL) {
7080 error = EEXIST;
7081 goto out;
7082 }
7083
7084 if (vnode_mount(tdvp) != vnode_mount(fvp)) {
7085 error = EXDEV;
7086 goto out;
7087 }
7088
7089#if CONFIG_MACF
7090 if ((error = mac_vnode_check_clone(ctx, tdvp, fvp, cnp)))
7091 goto out;
7092#endif
7093 if ((error = vnode_authorize(tdvp, NULL, action, ctx)))
7094 goto out;
7095
7096 action = KAUTH_VNODE_GENERIC_READ_BITS;
7097 if (data_read_authorised)
7098 action &= ~KAUTH_VNODE_READ_DATA;
7099 if ((error = vnode_authorize(fvp, NULL, action, ctx)))
7100 goto out;
7101
7102 /*
7103 * certain attributes may need to be changed from the source, we ask for
7104 * those here.
7105 */
7106 VATTR_INIT(&va);
7107 VATTR_WANTED(&va, va_uid);
7108 VATTR_WANTED(&va, va_gid);
7109 VATTR_WANTED(&va, va_mode);
7110 VATTR_WANTED(&va, va_flags);
7111 VATTR_WANTED(&va, va_acl);
7112
7113 if ((error = vnode_getattr(fvp, &va, ctx)) != 0)
7114 goto out;
7115
7116 VATTR_INIT(&nva);
7117 VATTR_SET(&nva, va_type, v_type);
7118 if (VATTR_IS_SUPPORTED(&va, va_acl) && va.va_acl != NULL) {
7119 VATTR_SET(&nva, va_acl, va.va_acl);
7120 free_src_acl = TRUE;
7121 }
7122
7123 /* Handle ACL inheritance, initialize vap. */
7124 if (v_type == VLNK) {
7125 error = vnode_authattr_new(tdvp, &nva, 0, ctx);
7126 } else {
7127 error = vn_attribute_prepare(tdvp, &nva, &defaulted, ctx);
7128 if (error)
7129 goto out;
7130 attr_cleanup = TRUE;
7131 }
7132
7133 vnop_flags = VNODE_CLONEFILE_DEFAULT;
7134 /*
7135 * We've got initial values for all security parameters,
7136 * If we are superuser, then we can change owners to be the
7137 * same as the source. Both superuser and the owner have default
7138 * WRITE_SECURITY privileges so all other fields can be taken
7139 * from source as well.
7140 */
7141 if (!(flags & CLONE_NOOWNERCOPY) && vfs_context_issuser(ctx)) {
7142 if (VATTR_IS_SUPPORTED(&va, va_uid))
7143 VATTR_SET(&nva, va_uid, va.va_uid);
7144 if (VATTR_IS_SUPPORTED(&va, va_gid))
7145 VATTR_SET(&nva, va_gid, va.va_gid);
7146 } else {
7147 vnop_flags |= VNODE_CLONEFILE_NOOWNERCOPY;
7148 }
7149
7150 if (VATTR_IS_SUPPORTED(&va, va_mode))
7151 VATTR_SET(&nva, va_mode, va.va_mode);
7152 if (VATTR_IS_SUPPORTED(&va, va_flags)) {
7153 VATTR_SET(&nva, va_flags,
7154 ((va.va_flags & ~(UF_DATAVAULT | SF_RESTRICTED)) | /* Turn off from source */
7155 (nva.va_flags & (UF_DATAVAULT | SF_RESTRICTED))));
7156 }
7157
7158 error = VNOP_CLONEFILE(fvp, tdvp, &tvp, cnp, &nva, vnop_flags, ctx);
7159
7160 if (!error && tvp) {
7161 int update_flags = 0;
7162#if CONFIG_FSE
7163 int fsevent;
7164#endif /* CONFIG_FSE */
7165
7166#if CONFIG_MACF
7167 (void)vnode_label(vnode_mount(tvp), tdvp, tvp, cnp,
7168 VNODE_LABEL_CREATE, ctx);
7169#endif
7170 /*
7171 * If some of the requested attributes weren't handled by the
7172 * VNOP, use our fallback code.
7173 */
7174 if (!VATTR_ALL_SUPPORTED(&va))
7175 (void)vnode_setattr_fallback(tvp, &nva, ctx);
7176
7177 // Make sure the name & parent pointers are hooked up
7178 if (tvp->v_name == NULL)
7179 update_flags |= VNODE_UPDATE_NAME;
7180 if (tvp->v_parent == NULLVP)
7181 update_flags |= VNODE_UPDATE_PARENT;
7182
7183 if (update_flags) {
7184 (void)vnode_update_identity(tvp, tdvp, cnp->cn_nameptr,
7185 cnp->cn_namelen, cnp->cn_hash, update_flags);
7186 }
7187
7188#if CONFIG_FSE
7189 switch (vnode_vtype(tvp)) {
7190 case VLNK:
7191 /* FALLTHRU */
7192 case VREG:
7193 fsevent = FSE_CREATE_FILE;
7194 break;
7195 case VDIR:
7196 fsevent = FSE_CREATE_DIR;
7197 break;
7198 default:
7199 goto out;
7200 }
7201
7202 if (need_fsevent(fsevent, tvp)) {
7203 /*
7204 * The following is a sequence of three explicit events.
7205 * A pair of FSE_CLONE events representing the source and destination
7206 * followed by an FSE_CREATE_[FILE | DIR] for the destination.
7207 * fseventsd may coalesce the destination clone and create events
7208 * into a single event resulting in the following sequence for a client
7209 * FSE_CLONE (src)
7210 * FSE_CLONE | FSE_CREATE (dst)
7211 */
7212 add_fsevent(FSE_CLONE, ctx, FSE_ARG_VNODE, fvp, FSE_ARG_VNODE, tvp,
7213 FSE_ARG_DONE);
7214 add_fsevent(fsevent, ctx, FSE_ARG_VNODE, tvp,
7215 FSE_ARG_DONE);
7216 }
7217#endif /* CONFIG_FSE */
7218 }
7219
7220out:
7221 if (attr_cleanup)
7222 vn_attribute_cleanup(&nva, defaulted);
7223 if (free_src_acl && va.va_acl)
7224 kauth_acl_free(va.va_acl);
7225 nameidone(&tond);
7226 if (tvp)
7227 vnode_put(tvp);
7228 vnode_put(tdvp);
7229 return (error);
7230}
7231
7232/*
7233 * clone files or directories, target must not exist.
7234 */
7235/* ARGSUSED */
7236int
7237clonefileat(__unused proc_t p, struct clonefileat_args *uap,
7238 __unused int32_t *retval)
7239{
7240 vnode_t fvp;
7241 struct nameidata fromnd;
7242 int follow;
7243 int error;
7244 vfs_context_t ctx = vfs_context_current();
7245
7246 /* Check that the flags are valid. */
7247 if (uap->flags & ~(CLONE_NOFOLLOW | CLONE_NOOWNERCOPY))
7248 return (EINVAL);
7249
7250 AUDIT_ARG(fd, uap->src_dirfd);
7251
7252 follow = (uap->flags & CLONE_NOFOLLOW) ? NOFOLLOW : FOLLOW;
7253 NDINIT(&fromnd, LOOKUP, OP_COPYFILE, follow | AUDITVNPATH1,
7254 UIO_USERSPACE, uap->src, ctx);
7255 if ((error = nameiat(&fromnd, uap->src_dirfd)))
7256 return (error);
7257
7258 fvp = fromnd.ni_vp;
7259 nameidone(&fromnd);
7260
7261 error = clonefile_internal(fvp, FALSE, uap->dst_dirfd, uap->dst,
7262 uap->flags, ctx);
7263
7264 vnode_put(fvp);
7265 return (error);
7266}
7267
7268int
7269fclonefileat(__unused proc_t p, struct fclonefileat_args *uap,
7270 __unused int32_t *retval)
7271{
7272 vnode_t fvp;
7273 struct fileproc *fp;
7274 int error;
7275 vfs_context_t ctx = vfs_context_current();
7276
7277 /* Check that the flags are valid. */
7278 if (uap->flags & ~(CLONE_NOFOLLOW | CLONE_NOOWNERCOPY))
7279 return (EINVAL);
7280
7281 AUDIT_ARG(fd, uap->src_fd);
7282 error = fp_getfvp(p, uap->src_fd, &fp, &fvp);
7283 if (error)
7284 return (error);
7285
7286 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
7287 AUDIT_ARG(vnpath_withref, fvp, ARG_VNODE1);
7288 error = EBADF;
7289 goto out;
7290 }
7291
7292 if ((error = vnode_getwithref(fvp)))
7293 goto out;
7294
7295 AUDIT_ARG(vnpath, fvp, ARG_VNODE1);
7296
7297 error = clonefile_internal(fvp, TRUE, uap->dst_dirfd, uap->dst,
7298 uap->flags, ctx);
7299
7300 vnode_put(fvp);
7301out:
7302 file_drop(uap->src_fd);
7303 return (error);
7304}
7305
7306/*
7307 * Rename files. Source and destination must either both be directories,
7308 * or both not be directories. If target is a directory, it must be empty.
7309 */
7310/* ARGSUSED */
7311static int
7312renameat_internal(vfs_context_t ctx, int fromfd, user_addr_t from,
7313 int tofd, user_addr_t to, int segflg, vfs_rename_flags_t flags)
7314{
7315 if (flags & ~VFS_RENAME_FLAGS_MASK)
7316 return EINVAL;
7317
7318 if (ISSET(flags, VFS_RENAME_SWAP) && ISSET(flags, VFS_RENAME_EXCL))
7319 return EINVAL;
7320
7321 vnode_t tvp, tdvp;
7322 vnode_t fvp, fdvp;
7323 struct nameidata *fromnd, *tond;
7324 int error;
7325 int do_retry;
7326 int retry_count;
7327 int mntrename;
7328 int need_event;
7329 int need_kpath2;
7330 int has_listeners;
7331 const char *oname = NULL;
7332 char *from_name = NULL, *to_name = NULL;
7333 int from_len=0, to_len=0;
7334 int holding_mntlock;
7335 mount_t locked_mp = NULL;
7336 vnode_t oparent = NULLVP;
7337#if CONFIG_FSE
7338 fse_info from_finfo, to_finfo;
7339#endif
7340 int from_truncated=0, to_truncated;
7341 int batched = 0;
7342 struct vnode_attr *fvap, *tvap;
7343 int continuing = 0;
7344 /* carving out a chunk for structs that are too big to be on stack. */
7345 struct {
7346 struct nameidata from_node, to_node;
7347 struct vnode_attr fv_attr, tv_attr;
7348 } * __rename_data;
7349 MALLOC(__rename_data, void *, sizeof(*__rename_data), M_TEMP, M_WAITOK);
7350 fromnd = &__rename_data->from_node;
7351 tond = &__rename_data->to_node;
7352
7353 holding_mntlock = 0;
7354 do_retry = 0;
7355 retry_count = 0;
7356retry:
7357 fvp = tvp = NULL;
7358 fdvp = tdvp = NULL;
7359 fvap = tvap = NULL;
7360 mntrename = FALSE;
7361
7362 NDINIT(fromnd, DELETE, OP_UNLINK, WANTPARENT | AUDITVNPATH1,
7363 segflg, from, ctx);
7364 fromnd->ni_flag = NAMEI_COMPOUNDRENAME;
7365
7366 NDINIT(tond, RENAME, OP_RENAME, WANTPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK,
7367 segflg, to, ctx);
7368 tond->ni_flag = NAMEI_COMPOUNDRENAME;
7369
7370continue_lookup:
7371 if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
7372 if ( (error = nameiat(fromnd, fromfd)) )
7373 goto out1;
7374 fdvp = fromnd->ni_dvp;
7375 fvp = fromnd->ni_vp;
7376
7377 if (fvp && fvp->v_type == VDIR)
7378 tond->ni_cnd.cn_flags |= WILLBEDIR;
7379 }
7380
7381 if ((tond->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
7382 if ( (error = nameiat(tond, tofd)) ) {
7383 /*
7384 * Translate error code for rename("dir1", "dir2/.").
7385 */
7386 if (error == EISDIR && fvp->v_type == VDIR)
7387 error = EINVAL;
7388 goto out1;
7389 }
7390 tdvp = tond->ni_dvp;
7391 tvp = tond->ni_vp;
7392 }
7393
7394#if DEVELOPMENT || DEBUG
7395 /*
7396 * XXX VSWAP: Check for entitlements or special flag here
7397 * so we can restrict access appropriately.
7398 */
7399#else /* DEVELOPMENT || DEBUG */
7400
7401 if (fromnd->ni_vp && vnode_isswap(fromnd->ni_vp) && (ctx != vfs_context_kernel())) {
7402 error = EPERM;
7403 goto out1;
7404 }
7405
7406 if (tond->ni_vp && vnode_isswap(tond->ni_vp) && (ctx != vfs_context_kernel())) {
7407 error = EPERM;
7408 goto out1;
7409 }
7410#endif /* DEVELOPMENT || DEBUG */
7411
7412 if (!tvp && ISSET(flags, VFS_RENAME_SWAP)) {
7413 error = ENOENT;
7414 goto out1;
7415 }
7416
7417 if (tvp && ISSET(flags, VFS_RENAME_EXCL)) {
7418 error = EEXIST;
7419 goto out1;
7420 }
7421
7422 batched = vnode_compound_rename_available(fdvp);
7423
7424#if CONFIG_FSE
7425 need_event = need_fsevent(FSE_RENAME, fdvp);
7426 if (need_event) {
7427 if (fvp) {
7428 get_fse_info(fvp, &from_finfo, ctx);
7429 } else {
7430 error = vfs_get_notify_attributes(&__rename_data->fv_attr);
7431 if (error) {
7432 goto out1;
7433 }
7434
7435 fvap = &__rename_data->fv_attr;
7436 }
7437
7438 if (tvp) {
7439 get_fse_info(tvp, &to_finfo, ctx);
7440 } else if (batched) {
7441 error = vfs_get_notify_attributes(&__rename_data->tv_attr);
7442 if (error) {
7443 goto out1;
7444 }
7445
7446 tvap = &__rename_data->tv_attr;
7447 }
7448 }
7449#else
7450 need_event = 0;
7451#endif /* CONFIG_FSE */
7452
7453 has_listeners = kauth_authorize_fileop_has_listeners();
7454
7455 need_kpath2 = 0;
7456#if CONFIG_AUDIT
7457 if (AUDIT_RECORD_EXISTS()) {
7458 need_kpath2 = 1;
7459 }
7460#endif
7461
7462 if (need_event || has_listeners) {
7463 if (from_name == NULL) {
7464 GET_PATH(from_name);
7465 if (from_name == NULL) {
7466 error = ENOMEM;
7467 goto out1;
7468 }
7469 }
7470
7471 from_len = safe_getpath(fdvp, fromnd->ni_cnd.cn_nameptr, from_name, MAXPATHLEN, &from_truncated);
7472 }
7473
7474 if (need_event || need_kpath2 || has_listeners) {
7475 if (to_name == NULL) {
7476 GET_PATH(to_name);
7477 if (to_name == NULL) {
7478 error = ENOMEM;
7479 goto out1;
7480 }
7481 }
7482
7483 to_len = safe_getpath(tdvp, tond->ni_cnd.cn_nameptr, to_name, MAXPATHLEN, &to_truncated);
7484 if (to_name && need_kpath2) {
7485 AUDIT_ARG(kpath, to_name, ARG_KPATH2);
7486 }
7487 }
7488 if (!fvp) {
7489 /*
7490 * Claim: this check will never reject a valid rename.
7491 * For success, either fvp must be on the same mount as tdvp, or fvp must sit atop a vnode on the same mount as tdvp.
7492 * Suppose fdvp and tdvp are not on the same mount.
7493 * If fvp is on the same mount as tdvp, then fvp is not on the same mount as fdvp, so fvp is the root of its filesystem. If fvp is the root,
7494 * then you can't move it to within another dir on the same mountpoint.
7495 * If fvp sits atop a vnode on the same mount as fdvp, then that vnode must be part of the same mount as fdvp, which is a contradiction.
7496 *
7497 * If this check passes, then we are safe to pass these vnodes to the same FS.
7498 */
7499 if (fdvp->v_mount != tdvp->v_mount) {
7500 error = EXDEV;
7501 goto out1;
7502 }
7503 goto skipped_lookup;
7504 }
7505
7506 if (!batched) {
7507 error = vn_authorize_renamex_with_paths(fdvp, fvp, &fromnd->ni_cnd, from_name, tdvp, tvp, &tond->ni_cnd, to_name, ctx, flags, NULL);
7508 if (error) {
7509 if (error == ENOENT) {
7510 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
7511 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
7512 /*
7513 * We encountered a race where after doing the namei, tvp stops
7514 * being valid. If so, simply re-drive the rename call from the
7515 * top.
7516 */
7517 do_retry = 1;
7518 retry_count += 1;
7519 }
7520 }
7521 goto out1;
7522 }
7523 }
7524
7525 /*
7526 * If the source and destination are the same (i.e. they're
7527 * links to the same vnode) and the target file system is
7528 * case sensitive, then there is nothing to do.
7529 *
7530 * XXX Come back to this.
7531 */
7532 if (fvp == tvp) {
7533 int pathconf_val;
7534
7535 /*
7536 * Note: if _PC_CASE_SENSITIVE selector isn't supported,
7537 * then assume that this file system is case sensitive.
7538 */
7539 if (VNOP_PATHCONF(fvp, _PC_CASE_SENSITIVE, &pathconf_val, ctx) != 0 ||
7540 pathconf_val != 0) {
7541 goto out1;
7542 }
7543 }
7544
7545 /*
7546 * Allow the renaming of mount points.
7547 * - target must not exist
7548 * - target must reside in the same directory as source
7549 * - union mounts cannot be renamed
7550 * - "/" cannot be renamed
7551 *
7552 * XXX Handle this in VFS after a continued lookup (if we missed
7553 * in the cache to start off)
7554 *
7555 * N.B. If RENAME_SWAP is being used, then @tvp != NULL and so
7556 * we'll skip past here. The file system is responsible for
7557 * checking that @tvp is not a descendent of @fvp and vice versa
7558 * so it should always return EINVAL if either @tvp or @fvp is the
7559 * root of a volume.
7560 */
7561 if ((fvp->v_flag & VROOT) &&
7562 (fvp->v_type == VDIR) &&
7563 (tvp == NULL) &&
7564 (fvp->v_mountedhere == NULL) &&
7565 (fdvp == tdvp) &&
7566 ((fvp->v_mount->mnt_flag & (MNT_UNION | MNT_ROOTFS)) == 0) &&
7567 (fvp->v_mount->mnt_vnodecovered != NULLVP)) {
7568 vnode_t coveredvp;
7569
7570 /* switch fvp to the covered vnode */
7571 coveredvp = fvp->v_mount->mnt_vnodecovered;
7572 if ( (vnode_getwithref(coveredvp)) ) {
7573 error = ENOENT;
7574 goto out1;
7575 }
7576 vnode_put(fvp);
7577
7578 fvp = coveredvp;
7579 mntrename = TRUE;
7580 }
7581 /*
7582 * Check for cross-device rename.
7583 */
7584 if ((fvp->v_mount != tdvp->v_mount) ||
7585 (tvp && (fvp->v_mount != tvp->v_mount))) {
7586 error = EXDEV;
7587 goto out1;
7588 }
7589
7590 /*
7591 * If source is the same as the destination (that is the
7592 * same inode number) then there is nothing to do...
7593 * EXCEPT if the underlying file system supports case
7594 * insensitivity and is case preserving. In this case
7595 * the file system needs to handle the special case of
7596 * getting the same vnode as target (fvp) and source (tvp).
7597 *
7598 * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
7599 * and _PC_CASE_PRESERVING can have this exception, and they need to
7600 * handle the special case of getting the same vnode as target and
7601 * source. NOTE: Then the target is unlocked going into vnop_rename,
7602 * so not to cause locking problems. There is a single reference on tvp.
7603 *
7604 * NOTE - that fvp == tvp also occurs if they are hard linked and
7605 * that correct behaviour then is just to return success without doing
7606 * anything.
7607 *
7608 * XXX filesystem should take care of this itself, perhaps...
7609 */
7610 if (fvp == tvp && fdvp == tdvp) {
7611 if (fromnd->ni_cnd.cn_namelen == tond->ni_cnd.cn_namelen &&
7612 !bcmp(fromnd->ni_cnd.cn_nameptr, tond->ni_cnd.cn_nameptr,
7613 fromnd->ni_cnd.cn_namelen)) {
7614 goto out1;
7615 }
7616 }
7617
7618 if (holding_mntlock && fvp->v_mount != locked_mp) {
7619 /*
7620 * we're holding a reference and lock
7621 * on locked_mp, but it no longer matches
7622 * what we want to do... so drop our hold
7623 */
7624 mount_unlock_renames(locked_mp);
7625 mount_drop(locked_mp, 0);
7626 holding_mntlock = 0;
7627 }
7628 if (tdvp != fdvp && fvp->v_type == VDIR) {
7629 /*
7630 * serialize renames that re-shape
7631 * the tree... if holding_mntlock is
7632 * set, then we're ready to go...
7633 * otherwise we
7634 * first need to drop the iocounts
7635 * we picked up, second take the
7636 * lock to serialize the access,
7637 * then finally start the lookup
7638 * process over with the lock held
7639 */
7640 if (!holding_mntlock) {
7641 /*
7642 * need to grab a reference on
7643 * the mount point before we
7644 * drop all the iocounts... once
7645 * the iocounts are gone, the mount
7646 * could follow
7647 */
7648 locked_mp = fvp->v_mount;
7649 mount_ref(locked_mp, 0);
7650
7651 /*
7652 * nameidone has to happen before we vnode_put(tvp)
7653 * since it may need to release the fs_nodelock on the tvp
7654 */
7655 nameidone(tond);
7656
7657 if (tvp)
7658 vnode_put(tvp);
7659 vnode_put(tdvp);
7660
7661 /*
7662 * nameidone has to happen before we vnode_put(fdvp)
7663 * since it may need to release the fs_nodelock on the fvp
7664 */
7665 nameidone(fromnd);
7666
7667 vnode_put(fvp);
7668 vnode_put(fdvp);
7669
7670 mount_lock_renames(locked_mp);
7671 holding_mntlock = 1;
7672
7673 goto retry;
7674 }
7675 } else {
7676 /*
7677 * when we dropped the iocounts to take
7678 * the lock, we allowed the identity of
7679 * the various vnodes to change... if they did,
7680 * we may no longer be dealing with a rename
7681 * that reshapes the tree... once we're holding
7682 * the iocounts, the vnodes can't change type
7683 * so we're free to drop the lock at this point
7684 * and continue on
7685 */
7686 if (holding_mntlock) {
7687 mount_unlock_renames(locked_mp);
7688 mount_drop(locked_mp, 0);
7689 holding_mntlock = 0;
7690 }
7691 }
7692
7693 // save these off so we can later verify that fvp is the same
7694 oname = fvp->v_name;
7695 oparent = fvp->v_parent;
7696
7697skipped_lookup:
7698 error = vn_rename(fdvp, &fvp, &fromnd->ni_cnd, fvap,
7699 tdvp, &tvp, &tond->ni_cnd, tvap,
7700 flags, ctx);
7701
7702 if (holding_mntlock) {
7703 /*
7704 * we can drop our serialization
7705 * lock now
7706 */
7707 mount_unlock_renames(locked_mp);
7708 mount_drop(locked_mp, 0);
7709 holding_mntlock = 0;
7710 }
7711 if (error) {
7712 if (error == EKEEPLOOKING) {
7713 if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) == 0) {
7714 if ((tond->ni_flag & NAMEI_CONTLOOKUP) == 0) {
7715 panic("EKEEPLOOKING without NAMEI_CONTLOOKUP on either ndp?");
7716 }
7717 }
7718
7719 fromnd->ni_vp = fvp;
7720 tond->ni_vp = tvp;
7721
7722 goto continue_lookup;
7723 }
7724
7725 /*
7726 * We may encounter a race in the VNOP where the destination didn't
7727 * exist when we did the namei, but it does by the time we go and
7728 * try to create the entry. In this case, we should re-drive this rename
7729 * call from the top again. Currently, only HFS bubbles out ERECYCLE,
7730 * but other filesystems susceptible to this race could return it, too.
7731 */
7732 if (error == ERECYCLE) {
7733 do_retry = 1;
7734 }
7735
7736 /*
7737 * For compound VNOPs, the authorization callback may return
7738 * ENOENT in case of racing hardlink lookups hitting the name
7739 * cache, redrive the lookup.
7740 */
7741 if (batched && error == ENOENT) {
7742 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
7743 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
7744 do_retry = 1;
7745 retry_count += 1;
7746 }
7747 }
7748
7749 goto out1;
7750 }
7751
7752 /* call out to allow 3rd party notification of rename.
7753 * Ignore result of kauth_authorize_fileop call.
7754 */
7755 kauth_authorize_fileop(vfs_context_ucred(ctx),
7756 KAUTH_FILEOP_RENAME,
7757 (uintptr_t)from_name, (uintptr_t)to_name);
7758 if (flags & VFS_RENAME_SWAP) {
7759 kauth_authorize_fileop(vfs_context_ucred(ctx),
7760 KAUTH_FILEOP_RENAME,
7761 (uintptr_t)to_name, (uintptr_t)from_name);
7762 }
7763
7764#if CONFIG_FSE
7765 if (from_name != NULL && to_name != NULL) {
7766 if (from_truncated || to_truncated) {
7767 // set it here since only the from_finfo gets reported up to user space
7768 from_finfo.mode |= FSE_TRUNCATED_PATH;
7769 }
7770
7771 if (tvap && tvp) {
7772 vnode_get_fse_info_from_vap(tvp, &to_finfo, tvap);
7773 }
7774 if (fvap) {
7775 vnode_get_fse_info_from_vap(fvp, &from_finfo, fvap);
7776 }
7777
7778 if (tvp) {
7779 add_fsevent(FSE_RENAME, ctx,
7780 FSE_ARG_STRING, from_len, from_name,
7781 FSE_ARG_FINFO, &from_finfo,
7782 FSE_ARG_STRING, to_len, to_name,
7783 FSE_ARG_FINFO, &to_finfo,
7784 FSE_ARG_DONE);
7785 if (flags & VFS_RENAME_SWAP) {
7786 /*
7787 * Strictly speaking, swap is the equivalent of
7788 * *three* renames. FSEvents clients should only take
7789 * the events as a hint, so we only bother reporting
7790 * two.
7791 */
7792 add_fsevent(FSE_RENAME, ctx,
7793 FSE_ARG_STRING, to_len, to_name,
7794 FSE_ARG_FINFO, &to_finfo,
7795 FSE_ARG_STRING, from_len, from_name,
7796 FSE_ARG_FINFO, &from_finfo,
7797 FSE_ARG_DONE);
7798 }
7799 } else {
7800 add_fsevent(FSE_RENAME, ctx,
7801 FSE_ARG_STRING, from_len, from_name,
7802 FSE_ARG_FINFO, &from_finfo,
7803 FSE_ARG_STRING, to_len, to_name,
7804 FSE_ARG_DONE);
7805 }
7806 }
7807#endif /* CONFIG_FSE */
7808
7809 /*
7810 * update filesystem's mount point data
7811 */
7812 if (mntrename) {
7813 char *cp, *pathend, *mpname;
7814 char * tobuf;
7815 struct mount *mp;
7816 int maxlen;
7817 size_t len = 0;
7818
7819 mp = fvp->v_mountedhere;
7820
7821 if (vfs_busy(mp, LK_NOWAIT)) {
7822 error = EBUSY;
7823 goto out1;
7824 }
7825 MALLOC_ZONE(tobuf, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
7826
7827 if (UIO_SEG_IS_USER_SPACE(segflg))
7828 error = copyinstr(to, tobuf, MAXPATHLEN, &len);
7829 else
7830 error = copystr((void *)to, tobuf, MAXPATHLEN, &len);
7831 if (!error) {
7832 /* find current mount point prefix */
7833 pathend = &mp->mnt_vfsstat.f_mntonname[0];
7834 for (cp = pathend; *cp != '\0'; ++cp) {
7835 if (*cp == '/')
7836 pathend = cp + 1;
7837 }
7838 /* find last component of target name */
7839 for (mpname = cp = tobuf; *cp != '\0'; ++cp) {
7840 if (*cp == '/')
7841 mpname = cp + 1;
7842 }
7843 /* append name to prefix */
7844 maxlen = MAXPATHLEN - (pathend - mp->mnt_vfsstat.f_mntonname);
7845 bzero(pathend, maxlen);
7846 strlcpy(pathend, mpname, maxlen);
7847 }
7848 FREE_ZONE(tobuf, MAXPATHLEN, M_NAMEI);
7849
7850 vfs_unbusy(mp);
7851 }
7852 /*
7853 * fix up name & parent pointers. note that we first
7854 * check that fvp has the same name/parent pointers it
7855 * had before the rename call... this is a 'weak' check
7856 * at best...
7857 *
7858 * XXX oparent and oname may not be set in the compound vnop case
7859 */
7860 if (batched || (oname == fvp->v_name && oparent == fvp->v_parent)) {
7861 int update_flags;
7862
7863 update_flags = VNODE_UPDATE_NAME;
7864
7865 if (fdvp != tdvp)
7866 update_flags |= VNODE_UPDATE_PARENT;
7867
7868 vnode_update_identity(fvp, tdvp, tond->ni_cnd.cn_nameptr, tond->ni_cnd.cn_namelen, tond->ni_cnd.cn_hash, update_flags);
7869 }
7870out1:
7871 if (to_name != NULL) {
7872 RELEASE_PATH(to_name);
7873 to_name = NULL;
7874 }
7875 if (from_name != NULL) {
7876 RELEASE_PATH(from_name);
7877 from_name = NULL;
7878 }
7879 if (holding_mntlock) {
7880 mount_unlock_renames(locked_mp);
7881 mount_drop(locked_mp, 0);
7882 holding_mntlock = 0;
7883 }
7884 if (tdvp) {
7885 /*
7886 * nameidone has to happen before we vnode_put(tdvp)
7887 * since it may need to release the fs_nodelock on the tdvp
7888 */
7889 nameidone(tond);
7890
7891 if (tvp)
7892 vnode_put(tvp);
7893 vnode_put(tdvp);
7894 }
7895 if (fdvp) {
7896 /*
7897 * nameidone has to happen before we vnode_put(fdvp)
7898 * since it may need to release the fs_nodelock on the fdvp
7899 */
7900 nameidone(fromnd);
7901
7902 if (fvp)
7903 vnode_put(fvp);
7904 vnode_put(fdvp);
7905 }
7906
7907 /*
7908 * If things changed after we did the namei, then we will re-drive
7909 * this rename call from the top.
7910 */
7911 if (do_retry) {
7912 do_retry = 0;
7913 goto retry;
7914 }
7915
7916 FREE(__rename_data, M_TEMP);
7917 return (error);
7918}
7919
7920int
7921rename(__unused proc_t p, struct rename_args *uap, __unused int32_t *retval)
7922{
7923 return (renameat_internal(vfs_context_current(), AT_FDCWD, uap->from,
7924 AT_FDCWD, uap->to, UIO_USERSPACE, 0));
7925}
7926
7927int renameatx_np(__unused proc_t p, struct renameatx_np_args *uap, __unused int32_t *retval)
7928{
7929 return renameat_internal(
7930 vfs_context_current(),
7931 uap->fromfd, uap->from,
7932 uap->tofd, uap->to,
7933 UIO_USERSPACE, uap->flags);
7934}
7935
7936int
7937renameat(__unused proc_t p, struct renameat_args *uap, __unused int32_t *retval)
7938{
7939 return (renameat_internal(vfs_context_current(), uap->fromfd, uap->from,
7940 uap->tofd, uap->to, UIO_USERSPACE, 0));
7941}
7942
7943/*
7944 * Make a directory file.
7945 *
7946 * Returns: 0 Success
7947 * EEXIST
7948 * namei:???
7949 * vnode_authorize:???
7950 * vn_create:???
7951 */
7952/* ARGSUSED */
7953static int
7954mkdir1at(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap, int fd,
7955 enum uio_seg segflg)
7956{
7957 vnode_t vp, dvp;
7958 int error;
7959 int update_flags = 0;
7960 int batched;
7961 struct nameidata nd;
7962
7963 AUDIT_ARG(mode, vap->va_mode);
7964 NDINIT(&nd, CREATE, OP_MKDIR, LOCKPARENT | AUDITVNPATH1, segflg,
7965 path, ctx);
7966 nd.ni_cnd.cn_flags |= WILLBEDIR;
7967 nd.ni_flag = NAMEI_COMPOUNDMKDIR;
7968
7969continue_lookup:
7970 error = nameiat(&nd, fd);
7971 if (error)
7972 return (error);
7973 dvp = nd.ni_dvp;
7974 vp = nd.ni_vp;
7975
7976 if (vp != NULL) {
7977 error = EEXIST;
7978 goto out;
7979 }
7980
7981 batched = vnode_compound_mkdir_available(dvp);
7982
7983 VATTR_SET(vap, va_type, VDIR);
7984
7985 /*
7986 * XXX
7987 * Don't authorize in VFS for compound VNOP.... mkdir -p today assumes that it will
7988 * only get EXISTS or EISDIR for existing path components, and not that it could see
7989 * EACCESS/EPERM--so if we authorize for mkdir on "/" for "mkdir -p /tmp/foo/bar/baz"
7990 * it will fail in a spurious manner. Need to figure out if this is valid behavior.
7991 */
7992 if ((error = vn_authorize_mkdir(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0) {
7993 if (error == EACCES || error == EPERM) {
7994 int error2;
7995
7996 nameidone(&nd);
7997 vnode_put(dvp);
7998 dvp = NULLVP;
7999
8000 /*
8001 * Try a lookup without "NAMEI_COMPOUNDVNOP" to make sure we return EEXIST
8002 * rather than EACCESS if the target exists.
8003 */
8004 NDINIT(&nd, LOOKUP, OP_MKDIR, AUDITVNPATH1, segflg,
8005 path, ctx);
8006 error2 = nameiat(&nd, fd);
8007 if (error2) {
8008 goto out;
8009 } else {
8010 vp = nd.ni_vp;
8011 error = EEXIST;
8012 goto out;
8013 }
8014 }
8015
8016 goto out;
8017 }
8018
8019 /*
8020 * make the directory
8021 */
8022 if ((error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx)) != 0) {
8023 if (error == EKEEPLOOKING) {
8024 nd.ni_vp = vp;
8025 goto continue_lookup;
8026 }
8027
8028 goto out;
8029 }
8030
8031 // Make sure the name & parent pointers are hooked up
8032 if (vp->v_name == NULL)
8033 update_flags |= VNODE_UPDATE_NAME;
8034 if (vp->v_parent == NULLVP)
8035 update_flags |= VNODE_UPDATE_PARENT;
8036
8037 if (update_flags)
8038 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
8039
8040#if CONFIG_FSE
8041 add_fsevent(FSE_CREATE_DIR, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE);
8042#endif
8043
8044out:
8045 /*
8046 * nameidone has to happen before we vnode_put(dvp)
8047 * since it may need to release the fs_nodelock on the dvp
8048 */
8049 nameidone(&nd);
8050
8051 if (vp)
8052 vnode_put(vp);
8053 if (dvp)
8054 vnode_put(dvp);
8055
8056 return (error);
8057}
8058
8059/*
8060 * mkdir_extended: Create a directory; with extended security (ACL).
8061 *
8062 * Parameters: p Process requesting to create the directory
8063 * uap User argument descriptor (see below)
8064 * retval (ignored)
8065 *
8066 * Indirect: uap->path Path of directory to create
8067 * uap->mode Access permissions to set
8068 * uap->xsecurity ACL to set
8069 *
8070 * Returns: 0 Success
8071 * !0 Not success
8072 *
8073 */
8074int
8075mkdir_extended(proc_t p, struct mkdir_extended_args *uap, __unused int32_t *retval)
8076{
8077 int ciferror;
8078 kauth_filesec_t xsecdst;
8079 struct vnode_attr va;
8080
8081 AUDIT_ARG(owner, uap->uid, uap->gid);
8082
8083 xsecdst = NULL;
8084 if ((uap->xsecurity != USER_ADDR_NULL) &&
8085 ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
8086 return ciferror;
8087
8088 VATTR_INIT(&va);
8089 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
8090 if (xsecdst != NULL)
8091 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
8092
8093 ciferror = mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD,
8094 UIO_USERSPACE);
8095 if (xsecdst != NULL)
8096 kauth_filesec_free(xsecdst);
8097 return ciferror;
8098}
8099
8100int
8101mkdir(proc_t p, struct mkdir_args *uap, __unused int32_t *retval)
8102{
8103 struct vnode_attr va;
8104
8105 VATTR_INIT(&va);
8106 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
8107
8108 return (mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD,
8109 UIO_USERSPACE));
8110}
8111
8112int
8113mkdirat(proc_t p, struct mkdirat_args *uap, __unused int32_t *retval)
8114{
8115 struct vnode_attr va;
8116
8117 VATTR_INIT(&va);
8118 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
8119
8120 return(mkdir1at(vfs_context_current(), uap->path, &va, uap->fd,
8121 UIO_USERSPACE));
8122}
8123
8124static int
8125rmdirat_internal(vfs_context_t ctx, int fd, user_addr_t dirpath,
8126 enum uio_seg segflg)
8127{
8128 vnode_t vp, dvp;
8129 int error;
8130 struct nameidata nd;
8131 char *path = NULL;
8132 int len=0;
8133 int has_listeners = 0;
8134 int need_event = 0;
8135 int truncated = 0;
8136#if CONFIG_FSE
8137 struct vnode_attr va;
8138#endif /* CONFIG_FSE */
8139 struct vnode_attr *vap = NULL;
8140 int restart_count = 0;
8141 int batched;
8142
8143 int restart_flag;
8144
8145 /*
8146 * This loop exists to restart rmdir in the unlikely case that two
8147 * processes are simultaneously trying to remove the same directory
8148 * containing orphaned appleDouble files.
8149 */
8150 do {
8151 NDINIT(&nd, DELETE, OP_RMDIR, LOCKPARENT | AUDITVNPATH1,
8152 segflg, dirpath, ctx);
8153 nd.ni_flag = NAMEI_COMPOUNDRMDIR;
8154continue_lookup:
8155 restart_flag = 0;
8156 vap = NULL;
8157
8158 error = nameiat(&nd, fd);
8159 if (error)
8160 return (error);
8161
8162 dvp = nd.ni_dvp;
8163 vp = nd.ni_vp;
8164
8165 if (vp) {
8166 batched = vnode_compound_rmdir_available(vp);
8167
8168 if (vp->v_flag & VROOT) {
8169 /*
8170 * The root of a mounted filesystem cannot be deleted.
8171 */
8172 error = EBUSY;
8173 goto out;
8174 }
8175
8176#if DEVELOPMENT || DEBUG
8177 /*
8178 * XXX VSWAP: Check for entitlements or special flag here
8179 * so we can restrict access appropriately.
8180 */
8181#else /* DEVELOPMENT || DEBUG */
8182
8183 if (vnode_isswap(vp) && (ctx != vfs_context_kernel())) {
8184 error = EPERM;
8185 goto out;
8186 }
8187#endif /* DEVELOPMENT || DEBUG */
8188
8189 /*
8190 * Removed a check here; we used to abort if vp's vid
8191 * was not the same as what we'd seen the last time around.
8192 * I do not think that check was valid, because if we retry
8193 * and all dirents are gone, the directory could legitimately
8194 * be recycled but still be present in a situation where we would
8195 * have had permission to delete. Therefore, we won't make
8196 * an effort to preserve that check now that we may not have a
8197 * vp here.
8198 */
8199
8200 if (!batched) {
8201 error = vn_authorize_rmdir(dvp, vp, &nd.ni_cnd, ctx, NULL);
8202 if (error) {
8203 if (error == ENOENT) {
8204 assert(restart_count < MAX_AUTHORIZE_ENOENT_RETRIES);
8205 if (restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
8206 restart_flag = 1;
8207 restart_count += 1;
8208 }
8209 }
8210 goto out;
8211 }
8212 }
8213 } else {
8214 batched = 1;
8215
8216 if (!vnode_compound_rmdir_available(dvp)) {
8217 panic("No error, but no compound rmdir?");
8218 }
8219 }
8220
8221#if CONFIG_FSE
8222 fse_info finfo;
8223
8224 need_event = need_fsevent(FSE_DELETE, dvp);
8225 if (need_event) {
8226 if (!batched) {
8227 get_fse_info(vp, &finfo, ctx);
8228 } else {
8229 error = vfs_get_notify_attributes(&va);
8230 if (error) {
8231 goto out;
8232 }
8233
8234 vap = &va;
8235 }
8236 }
8237#endif
8238 has_listeners = kauth_authorize_fileop_has_listeners();
8239 if (need_event || has_listeners) {
8240 if (path == NULL) {
8241 GET_PATH(path);
8242 if (path == NULL) {
8243 error = ENOMEM;
8244 goto out;
8245 }
8246 }
8247
8248 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated);
8249#if CONFIG_FSE
8250 if (truncated) {
8251 finfo.mode |= FSE_TRUNCATED_PATH;
8252 }
8253#endif
8254 }
8255
8256 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
8257 nd.ni_vp = vp;
8258 if (vp == NULLVP) {
8259 /* Couldn't find a vnode */
8260 goto out;
8261 }
8262
8263 if (error == EKEEPLOOKING) {
8264 goto continue_lookup;
8265 } else if (batched && error == ENOENT) {
8266 assert(restart_count < MAX_AUTHORIZE_ENOENT_RETRIES);
8267 if (restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
8268 /*
8269 * For compound VNOPs, the authorization callback
8270 * may return ENOENT in case of racing hard link lookups
8271 * redrive the lookup.
8272 */
8273 restart_flag = 1;
8274 restart_count += 1;
8275 goto out;
8276 }
8277 }
8278#if CONFIG_APPLEDOUBLE
8279 /*
8280 * Special case to remove orphaned AppleDouble
8281 * files. I don't like putting this in the kernel,
8282 * but carbon does not like putting this in carbon either,
8283 * so here we are.
8284 */
8285 if (error == ENOTEMPTY) {
8286 error = rmdir_remove_orphaned_appleDouble(vp, ctx, &restart_flag);
8287 if (error == EBUSY) {
8288 goto out;
8289 }
8290
8291
8292 /*
8293 * Assuming everything went well, we will try the RMDIR again
8294 */
8295 if (!error)
8296 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
8297 }
8298#endif /* CONFIG_APPLEDOUBLE */
8299 /*
8300 * Call out to allow 3rd party notification of delete.
8301 * Ignore result of kauth_authorize_fileop call.
8302 */
8303 if (!error) {
8304 if (has_listeners) {
8305 kauth_authorize_fileop(vfs_context_ucred(ctx),
8306 KAUTH_FILEOP_DELETE,
8307 (uintptr_t)vp,
8308 (uintptr_t)path);
8309 }
8310
8311 if (vp->v_flag & VISHARDLINK) {
8312 // see the comment in unlink1() about why we update
8313 // the parent of a hard link when it is removed
8314 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
8315 }
8316
8317#if CONFIG_FSE
8318 if (need_event) {
8319 if (vap) {
8320 vnode_get_fse_info_from_vap(vp, &finfo, vap);
8321 }
8322 add_fsevent(FSE_DELETE, ctx,
8323 FSE_ARG_STRING, len, path,
8324 FSE_ARG_FINFO, &finfo,
8325 FSE_ARG_DONE);
8326 }
8327#endif
8328 }
8329
8330out:
8331 if (path != NULL) {
8332 RELEASE_PATH(path);
8333 path = NULL;
8334 }
8335 /*
8336 * nameidone has to happen before we vnode_put(dvp)
8337 * since it may need to release the fs_nodelock on the dvp
8338 */
8339 nameidone(&nd);
8340 vnode_put(dvp);
8341
8342 if (vp)
8343 vnode_put(vp);
8344
8345 if (restart_flag == 0) {
8346 wakeup_one((caddr_t)vp);
8347 return (error);
8348 }
8349 tsleep(vp, PVFS, "rm AD", 1);
8350
8351 } while (restart_flag != 0);
8352
8353 return (error);
8354
8355}
8356
8357/*
8358 * Remove a directory file.
8359 */
8360/* ARGSUSED */
8361int
8362rmdir(__unused proc_t p, struct rmdir_args *uap, __unused int32_t *retval)
8363{
8364 return (rmdirat_internal(vfs_context_current(), AT_FDCWD,
8365 CAST_USER_ADDR_T(uap->path), UIO_USERSPACE));
8366}
8367
8368/* Get direntry length padded to 8 byte alignment */
8369#define DIRENT64_LEN(namlen) \
8370 ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
8371
8372/* Get dirent length padded to 4 byte alignment */
8373#define DIRENT_LEN(namelen) \
8374 ((sizeof(struct dirent) + (namelen + 1) - (__DARWIN_MAXNAMLEN + 1) + 3) & ~3)
8375
8376/* Get the end of this dirent */
8377#define DIRENT_END(dep) \
8378 (((char *)(dep)) + (dep)->d_reclen - 1)
8379
8380errno_t
8381vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag,
8382 int *numdirent, vfs_context_t ctxp)
8383{
8384 /* Check if fs natively supports VNODE_READDIR_EXTENDED */
8385 if ((vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED) &&
8386 ((vp->v_mount->mnt_kern_flag & MNTK_DENY_READDIREXT) == 0)) {
8387 return VNOP_READDIR(vp, uio, flags, eofflag, numdirent, ctxp);
8388 } else {
8389 size_t bufsize;
8390 void * bufptr;
8391 uio_t auio;
8392 struct direntry *entry64;
8393 struct dirent *dep;
8394 int bytesread;
8395 int error;
8396
8397 /*
8398 * We're here because the underlying file system does not
8399 * support direnties or we mounted denying support so we must
8400 * fall back to dirents and convert them to direntries.
8401 *
8402 * Our kernel buffer needs to be smaller since re-packing will
8403 * expand each dirent. The worse case (when the name length
8404 * is 3 or less) corresponds to a struct direntry size of 32
8405 * bytes (8-byte aligned) and a struct dirent size of 12 bytes
8406 * (4-byte aligned). So having a buffer that is 3/8 the size
8407 * will prevent us from reading more than we can pack.
8408 *
8409 * Since this buffer is wired memory, we will limit the
8410 * buffer size to a maximum of 32K. We would really like to
8411 * use 32K in the MIN(), but we use magic number 87371 to
8412 * prevent uio_resid() * 3 / 8 from overflowing.
8413 */
8414 bufsize = 3 * MIN((user_size_t)uio_resid(uio), 87371u) / 8;
8415 MALLOC(bufptr, void *, bufsize, M_TEMP, M_WAITOK);
8416 if (bufptr == NULL) {
8417 return ENOMEM;
8418 }
8419
8420 auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
8421 uio_addiov(auio, (uintptr_t)bufptr, bufsize);
8422 auio->uio_offset = uio->uio_offset;
8423
8424 error = VNOP_READDIR(vp, auio, 0, eofflag, numdirent, ctxp);
8425
8426 dep = (struct dirent *)bufptr;
8427 bytesread = bufsize - uio_resid(auio);
8428
8429 MALLOC(entry64, struct direntry *, sizeof(struct direntry),
8430 M_TEMP, M_WAITOK);
8431 /*
8432 * Convert all the entries and copy them out to user's buffer.
8433 */
8434 while (error == 0 && (char *)dep < ((char *)bufptr + bytesread)) {
8435 size_t enbufsize = DIRENT64_LEN(dep->d_namlen);
8436
8437 if (DIRENT_END(dep) > ((char *)bufptr + bytesread) ||
8438 DIRENT_LEN(dep->d_namlen) > dep->d_reclen) {
8439 printf("%s: %s: Bad dirent recived from directory %s\n", __func__,
8440 vp->v_mount->mnt_vfsstat.f_mntonname,
8441 vp->v_name ? vp->v_name : "<unknown>");
8442 error = EIO;
8443 break;
8444 }
8445
8446 bzero(entry64, enbufsize);
8447 /* Convert a dirent to a dirent64. */
8448 entry64->d_ino = dep->d_ino;
8449 entry64->d_seekoff = 0;
8450 entry64->d_reclen = enbufsize;
8451 entry64->d_namlen = dep->d_namlen;
8452 entry64->d_type = dep->d_type;
8453 bcopy(dep->d_name, entry64->d_name, dep->d_namlen + 1);
8454
8455 /* Move to next entry. */
8456 dep = (struct dirent *)((char *)dep + dep->d_reclen);
8457
8458 /* Copy entry64 to user's buffer. */
8459 error = uiomove((caddr_t)entry64, entry64->d_reclen, uio);
8460 }
8461
8462 /* Update the real offset using the offset we got from VNOP_READDIR. */
8463 if (error == 0) {
8464 uio->uio_offset = auio->uio_offset;
8465 }
8466 uio_free(auio);
8467 FREE(bufptr, M_TEMP);
8468 FREE(entry64, M_TEMP);
8469 return (error);
8470 }
8471}
8472
8473#define GETDIRENTRIES_MAXBUFSIZE (128 * 1024 * 1024U)
8474
8475/*
8476 * Read a block of directory entries in a file system independent format.
8477 */
8478static int
8479getdirentries_common(int fd, user_addr_t bufp, user_size_t bufsize, ssize_t *bytesread,
8480 off_t *offset, int flags)
8481{
8482 vnode_t vp;
8483 struct vfs_context context = *vfs_context_current(); /* local copy */
8484 struct fileproc *fp;
8485 uio_t auio;
8486 int spacetype = proc_is64bit(vfs_context_proc(&context)) ? UIO_USERSPACE64 : UIO_USERSPACE32;
8487 off_t loff;
8488 int error, eofflag, numdirent;
8489 char uio_buf[ UIO_SIZEOF(1) ];
8490
8491 error = fp_getfvp(vfs_context_proc(&context), fd, &fp, &vp);
8492 if (error) {
8493 return (error);
8494 }
8495 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
8496 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
8497 error = EBADF;
8498 goto out;
8499 }
8500
8501 if (bufsize > GETDIRENTRIES_MAXBUFSIZE)
8502 bufsize = GETDIRENTRIES_MAXBUFSIZE;
8503
8504#if CONFIG_MACF
8505 error = mac_file_check_change_offset(vfs_context_ucred(&context), fp->f_fglob);
8506 if (error)
8507 goto out;
8508#endif
8509 if ( (error = vnode_getwithref(vp)) ) {
8510 goto out;
8511 }
8512 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
8513
8514unionread:
8515 if (vp->v_type != VDIR) {
8516 (void)vnode_put(vp);
8517 error = EINVAL;
8518 goto out;
8519 }
8520
8521#if CONFIG_MACF
8522 error = mac_vnode_check_readdir(&context, vp);
8523 if (error != 0) {
8524 (void)vnode_put(vp);
8525 goto out;
8526 }
8527#endif /* MAC */
8528
8529 loff = fp->f_fglob->fg_offset;
8530 auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
8531 uio_addiov(auio, bufp, bufsize);
8532
8533 if (flags & VNODE_READDIR_EXTENDED) {
8534 error = vnode_readdir64(vp, auio, flags, &eofflag, &numdirent, &context);
8535 fp->f_fglob->fg_offset = uio_offset(auio);
8536 } else {
8537 error = VNOP_READDIR(vp, auio, 0, &eofflag, &numdirent, &context);
8538 fp->f_fglob->fg_offset = uio_offset(auio);
8539 }
8540 if (error) {
8541 (void)vnode_put(vp);
8542 goto out;
8543 }
8544
8545 if ((user_ssize_t)bufsize == uio_resid(auio)){
8546 if (union_dircheckp) {
8547 error = union_dircheckp(&vp, fp, &context);
8548 if (error == -1)
8549 goto unionread;
8550 if (error) {
8551 (void)vnode_put(vp);
8552 goto out;
8553 }
8554 }
8555
8556 if ((vp->v_mount->mnt_flag & MNT_UNION)) {
8557 struct vnode *tvp = vp;
8558 if (lookup_traverse_union(tvp, &vp, &context) == 0) {
8559 vnode_ref(vp);
8560 fp->f_fglob->fg_data = (caddr_t) vp;
8561 fp->f_fglob->fg_offset = 0;
8562 vnode_rele(tvp);
8563 vnode_put(tvp);
8564 goto unionread;
8565 }
8566 vp = tvp;
8567 }
8568 }
8569
8570 vnode_put(vp);
8571 if (offset) {
8572 *offset = loff;
8573 }
8574
8575 *bytesread = bufsize - uio_resid(auio);
8576out:
8577 file_drop(fd);
8578 return (error);
8579}
8580
8581
8582int
8583getdirentries(__unused struct proc *p, struct getdirentries_args *uap, int32_t *retval)
8584{
8585 off_t offset;
8586 ssize_t bytesread;
8587 int error;
8588
8589 AUDIT_ARG(fd, uap->fd);
8590 error = getdirentries_common(uap->fd, uap->buf, uap->count, &bytesread, &offset, 0);
8591
8592 if (error == 0) {
8593 if (proc_is64bit(p)) {
8594 user64_long_t base = (user64_long_t)offset;
8595 error = copyout((caddr_t)&base, uap->basep, sizeof(user64_long_t));
8596 } else {
8597 user32_long_t base = (user32_long_t)offset;
8598 error = copyout((caddr_t)&base, uap->basep, sizeof(user32_long_t));
8599 }
8600 *retval = bytesread;
8601 }
8602 return (error);
8603}
8604
8605int
8606getdirentries64(__unused struct proc *p, struct getdirentries64_args *uap, user_ssize_t *retval)
8607{
8608 off_t offset;
8609 ssize_t bytesread;
8610 int error;
8611
8612 AUDIT_ARG(fd, uap->fd);
8613 error = getdirentries_common(uap->fd, uap->buf, uap->bufsize, &bytesread, &offset, VNODE_READDIR_EXTENDED);
8614
8615 if (error == 0) {
8616 *retval = bytesread;
8617 error = copyout((caddr_t)&offset, uap->position, sizeof(off_t));
8618 }
8619 return (error);
8620}
8621
8622
8623/*
8624 * Set the mode mask for creation of filesystem nodes.
8625 * XXX implement xsecurity
8626 */
8627#define UMASK_NOXSECURITY (void *)1 /* leave existing xsecurity alone */
8628static int
8629umask1(proc_t p, int newmask, __unused kauth_filesec_t fsec, int32_t *retval)
8630{
8631 struct filedesc *fdp;
8632
8633 AUDIT_ARG(mask, newmask);
8634 proc_fdlock(p);
8635 fdp = p->p_fd;
8636 *retval = fdp->fd_cmask;
8637 fdp->fd_cmask = newmask & ALLPERMS;
8638 proc_fdunlock(p);
8639 return (0);
8640}
8641
8642/*
8643 * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL).
8644 *
8645 * Parameters: p Process requesting to set the umask
8646 * uap User argument descriptor (see below)
8647 * retval umask of the process (parameter p)
8648 *
8649 * Indirect: uap->newmask umask to set
8650 * uap->xsecurity ACL to set
8651 *
8652 * Returns: 0 Success
8653 * !0 Not success
8654 *
8655 */
8656int
8657umask_extended(proc_t p, struct umask_extended_args *uap, int32_t *retval)
8658{
8659 int ciferror;
8660 kauth_filesec_t xsecdst;
8661
8662 xsecdst = KAUTH_FILESEC_NONE;
8663 if (uap->xsecurity != USER_ADDR_NULL) {
8664 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
8665 return ciferror;
8666 } else {
8667 xsecdst = KAUTH_FILESEC_NONE;
8668 }
8669
8670 ciferror = umask1(p, uap->newmask, xsecdst, retval);
8671
8672 if (xsecdst != KAUTH_FILESEC_NONE)
8673 kauth_filesec_free(xsecdst);
8674 return ciferror;
8675}
8676
8677int
8678umask(proc_t p, struct umask_args *uap, int32_t *retval)
8679{
8680 return(umask1(p, uap->newmask, UMASK_NOXSECURITY, retval));
8681}
8682
8683/*
8684 * Void all references to file by ripping underlying filesystem
8685 * away from vnode.
8686 */
8687/* ARGSUSED */
8688int
8689revoke(proc_t p, struct revoke_args *uap, __unused int32_t *retval)
8690{
8691 vnode_t vp;
8692 struct vnode_attr va;
8693 vfs_context_t ctx = vfs_context_current();
8694 int error;
8695 struct nameidata nd;
8696
8697 NDINIT(&nd, LOOKUP, OP_REVOKE, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
8698 uap->path, ctx);
8699 error = namei(&nd);
8700 if (error)
8701 return (error);
8702 vp = nd.ni_vp;
8703
8704 nameidone(&nd);
8705
8706 if (!(vnode_ischr(vp) || vnode_isblk(vp))) {
8707 error = ENOTSUP;
8708 goto out;
8709 }
8710
8711 if (vnode_isblk(vp) && vnode_ismountedon(vp)) {
8712 error = EBUSY;
8713 goto out;
8714 }
8715
8716#if CONFIG_MACF
8717 error = mac_vnode_check_revoke(ctx, vp);
8718 if (error)
8719 goto out;
8720#endif
8721
8722 VATTR_INIT(&va);
8723 VATTR_WANTED(&va, va_uid);
8724 if ((error = vnode_getattr(vp, &va, ctx)))
8725 goto out;
8726 if (kauth_cred_getuid(vfs_context_ucred(ctx)) != va.va_uid &&
8727 (error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
8728 goto out;
8729 if (vp->v_usecount > 0 || (vnode_isaliased(vp)))
8730 VNOP_REVOKE(vp, REVOKEALL, ctx);
8731out:
8732 vnode_put(vp);
8733 return (error);
8734}
8735
8736
8737/*
8738 * HFS/HFS PlUS SPECIFIC SYSTEM CALLS
8739 * The following system calls are designed to support features
8740 * which are specific to the HFS & HFS Plus volume formats
8741 */
8742
8743
8744/*
8745 * Obtain attribute information on objects in a directory while enumerating
8746 * the directory.
8747 */
8748/* ARGSUSED */
8749int
8750getdirentriesattr (proc_t p, struct getdirentriesattr_args *uap, int32_t *retval)
8751{
8752 vnode_t vp;
8753 struct fileproc *fp;
8754 uio_t auio = NULL;
8755 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
8756 uint32_t count = 0, savecount = 0;
8757 uint32_t newstate = 0;
8758 int error, eofflag;
8759 uint32_t loff = 0;
8760 struct attrlist attributelist;
8761 vfs_context_t ctx = vfs_context_current();
8762 int fd = uap->fd;
8763 char uio_buf[ UIO_SIZEOF(1) ];
8764 kauth_action_t action;
8765
8766 AUDIT_ARG(fd, fd);
8767
8768 /* Get the attributes into kernel space */
8769 if ((error = copyin(uap->alist, (caddr_t)&attributelist, sizeof(attributelist)))) {
8770 return(error);
8771 }
8772 if ((error = copyin(uap->count, (caddr_t)&count, sizeof(count)))) {
8773 return(error);
8774 }
8775 savecount = count;
8776 if ( (error = fp_getfvp(p, fd, &fp, &vp)) ) {
8777 return (error);
8778 }
8779 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
8780 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
8781 error = EBADF;
8782 goto out;
8783 }
8784
8785
8786#if CONFIG_MACF
8787 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
8788 fp->f_fglob);
8789 if (error)
8790 goto out;
8791#endif
8792
8793
8794 if ( (error = vnode_getwithref(vp)) )
8795 goto out;
8796
8797 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
8798
8799unionread:
8800 if (vp->v_type != VDIR) {
8801 (void)vnode_put(vp);
8802 error = EINVAL;
8803 goto out;
8804 }
8805
8806#if CONFIG_MACF
8807 error = mac_vnode_check_readdir(ctx, vp);
8808 if (error != 0) {
8809 (void)vnode_put(vp);
8810 goto out;
8811 }
8812#endif /* MAC */
8813
8814 /* set up the uio structure which will contain the users return buffer */
8815 loff = fp->f_fglob->fg_offset;
8816 auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
8817 uio_addiov(auio, uap->buffer, uap->buffersize);
8818
8819 /*
8820 * If the only item requested is file names, we can let that past with
8821 * just LIST_DIRECTORY. If they want any other attributes, that means
8822 * they need SEARCH as well.
8823 */
8824 action = KAUTH_VNODE_LIST_DIRECTORY;
8825 if ((attributelist.commonattr & ~ATTR_CMN_NAME) ||
8826 attributelist.fileattr || attributelist.dirattr)
8827 action |= KAUTH_VNODE_SEARCH;
8828
8829 if ((error = vnode_authorize(vp, NULL, action, ctx)) == 0) {
8830
8831 /* Believe it or not, uap->options only has 32-bits of valid
8832 * info, so truncate before extending again */
8833
8834 error = VNOP_READDIRATTR(vp, &attributelist, auio, count,
8835 (u_long)(uint32_t)uap->options, &newstate, &eofflag, &count, ctx);
8836 }
8837
8838 if (error) {
8839 (void) vnode_put(vp);
8840 goto out;
8841 }
8842
8843 /*
8844 * If we've got the last entry of a directory in a union mount
8845 * then reset the eofflag and pretend there's still more to come.
8846 * The next call will again set eofflag and the buffer will be empty,
8847 * so traverse to the underlying directory and do the directory
8848 * read there.
8849 */
8850 if (eofflag && vp->v_mount->mnt_flag & MNT_UNION) {
8851 if (uio_resid(auio) < (user_ssize_t) uap->buffersize) { // Got some entries
8852 eofflag = 0;
8853 } else { // Empty buffer
8854 struct vnode *tvp = vp;
8855 if (lookup_traverse_union(tvp, &vp, ctx) == 0) {
8856 vnode_ref_ext(vp, fp->f_fglob->fg_flag & O_EVTONLY, 0);
8857 fp->f_fglob->fg_data = (caddr_t) vp;
8858 fp->f_fglob->fg_offset = 0; // reset index for new dir
8859 count = savecount;
8860 vnode_rele_internal(tvp, fp->f_fglob->fg_flag & O_EVTONLY, 0, 0);
8861 vnode_put(tvp);
8862 goto unionread;
8863 }
8864 vp = tvp;
8865 }
8866 }
8867
8868 (void)vnode_put(vp);
8869
8870 if (error)
8871 goto out;
8872 fp->f_fglob->fg_offset = uio_offset(auio); /* should be multiple of dirent, not variable */
8873
8874 if ((error = copyout((caddr_t) &count, uap->count, sizeof(count))))
8875 goto out;
8876 if ((error = copyout((caddr_t) &newstate, uap->newstate, sizeof(newstate))))
8877 goto out;
8878 if ((error = copyout((caddr_t) &loff, uap->basep, sizeof(loff))))
8879 goto out;
8880
8881 *retval = eofflag; /* similar to getdirentries */
8882 error = 0;
8883out:
8884 file_drop(fd);
8885 return (error); /* return error earlier, an retval of 0 or 1 now */
8886
8887} /* end of getdirentriesattr system call */
8888
8889/*
8890* Exchange data between two files
8891*/
8892
8893/* ARGSUSED */
8894int
8895exchangedata (__unused proc_t p, struct exchangedata_args *uap, __unused int32_t *retval)
8896{
8897
8898 struct nameidata fnd, snd;
8899 vfs_context_t ctx = vfs_context_current();
8900 vnode_t fvp;
8901 vnode_t svp;
8902 int error;
8903 u_int32_t nameiflags;
8904 char *fpath = NULL;
8905 char *spath = NULL;
8906 int flen=0, slen=0;
8907 int from_truncated=0, to_truncated=0;
8908#if CONFIG_FSE
8909 fse_info f_finfo, s_finfo;
8910#endif
8911
8912 nameiflags = 0;
8913 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
8914
8915 NDINIT(&fnd, LOOKUP, OP_EXCHANGEDATA, nameiflags | AUDITVNPATH1,
8916 UIO_USERSPACE, uap->path1, ctx);
8917
8918 error = namei(&fnd);
8919 if (error)
8920 goto out2;
8921
8922 nameidone(&fnd);
8923 fvp = fnd.ni_vp;
8924
8925 NDINIT(&snd, LOOKUP, OP_EXCHANGEDATA, CN_NBMOUNTLOOK | nameiflags | AUDITVNPATH2,
8926 UIO_USERSPACE, uap->path2, ctx);
8927
8928 error = namei(&snd);
8929 if (error) {
8930 vnode_put(fvp);
8931 goto out2;
8932 }
8933 nameidone(&snd);
8934 svp = snd.ni_vp;
8935
8936 /*
8937 * if the files are the same, return an inval error
8938 */
8939 if (svp == fvp) {
8940 error = EINVAL;
8941 goto out;
8942 }
8943
8944 /*
8945 * if the files are on different volumes, return an error
8946 */
8947 if (svp->v_mount != fvp->v_mount) {
8948 error = EXDEV;
8949 goto out;
8950 }
8951
8952 /* If they're not files, return an error */
8953 if ( (vnode_isreg(fvp) == 0) || (vnode_isreg(svp) == 0)) {
8954 error = EINVAL;
8955 goto out;
8956 }
8957
8958#if CONFIG_MACF
8959 error = mac_vnode_check_exchangedata(ctx,
8960 fvp, svp);
8961 if (error)
8962 goto out;
8963#endif
8964 if (((error = vnode_authorize(fvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0) ||
8965 ((error = vnode_authorize(svp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0))
8966 goto out;
8967
8968 if (
8969#if CONFIG_FSE
8970 need_fsevent(FSE_EXCHANGE, fvp) ||
8971#endif
8972 kauth_authorize_fileop_has_listeners()) {
8973 GET_PATH(fpath);
8974 GET_PATH(spath);
8975 if (fpath == NULL || spath == NULL) {
8976 error = ENOMEM;
8977 goto out;
8978 }
8979
8980 flen = safe_getpath(fvp, NULL, fpath, MAXPATHLEN, &from_truncated);
8981 slen = safe_getpath(svp, NULL, spath, MAXPATHLEN, &to_truncated);
8982
8983#if CONFIG_FSE
8984 get_fse_info(fvp, &f_finfo, ctx);
8985 get_fse_info(svp, &s_finfo, ctx);
8986 if (from_truncated || to_truncated) {
8987 // set it here since only the f_finfo gets reported up to user space
8988 f_finfo.mode |= FSE_TRUNCATED_PATH;
8989 }
8990#endif
8991 }
8992 /* Ok, make the call */
8993 error = VNOP_EXCHANGE(fvp, svp, 0, ctx);
8994
8995 if (error == 0) {
8996 const char *tmpname;
8997
8998 if (fpath != NULL && spath != NULL) {
8999 /* call out to allow 3rd party notification of exchangedata.
9000 * Ignore result of kauth_authorize_fileop call.
9001 */
9002 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_EXCHANGE,
9003 (uintptr_t)fpath, (uintptr_t)spath);
9004 }
9005 name_cache_lock();
9006
9007 tmpname = fvp->v_name;
9008 fvp->v_name = svp->v_name;
9009 svp->v_name = tmpname;
9010
9011 if (fvp->v_parent != svp->v_parent) {
9012 vnode_t tmp;
9013
9014 tmp = fvp->v_parent;
9015 fvp->v_parent = svp->v_parent;
9016 svp->v_parent = tmp;
9017 }
9018 name_cache_unlock();
9019
9020#if CONFIG_FSE
9021 if (fpath != NULL && spath != NULL) {
9022 add_fsevent(FSE_EXCHANGE, ctx,
9023 FSE_ARG_STRING, flen, fpath,
9024 FSE_ARG_FINFO, &f_finfo,
9025 FSE_ARG_STRING, slen, spath,
9026 FSE_ARG_FINFO, &s_finfo,
9027 FSE_ARG_DONE);
9028 }
9029#endif
9030 }
9031
9032out:
9033 if (fpath != NULL)
9034 RELEASE_PATH(fpath);
9035 if (spath != NULL)
9036 RELEASE_PATH(spath);
9037 vnode_put(svp);
9038 vnode_put(fvp);
9039out2:
9040 return (error);
9041}
9042
9043/*
9044 * Return (in MB) the amount of freespace on the given vnode's volume.
9045 */
9046uint32_t freespace_mb(vnode_t vp);
9047
9048uint32_t
9049freespace_mb(vnode_t vp)
9050{
9051 vfs_update_vfsstat(vp->v_mount, vfs_context_current(), VFS_USER_EVENT);
9052 return (((uint64_t)vp->v_mount->mnt_vfsstat.f_bavail *
9053 vp->v_mount->mnt_vfsstat.f_bsize) >> 20);
9054}
9055
9056#if CONFIG_SEARCHFS
9057
9058/* ARGSUSED */
9059
9060int
9061searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval)
9062{
9063 vnode_t vp, tvp;
9064 int i, error=0;
9065 int fserror = 0;
9066 struct nameidata nd;
9067 struct user64_fssearchblock searchblock;
9068 struct searchstate *state;
9069 struct attrlist *returnattrs;
9070 struct timeval timelimit;
9071 void *searchparams1,*searchparams2;
9072 uio_t auio = NULL;
9073 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9074 uint32_t nummatches;
9075 int mallocsize;
9076 uint32_t nameiflags;
9077 vfs_context_t ctx = vfs_context_current();
9078 char uio_buf[ UIO_SIZEOF(1) ];
9079
9080 /* Start by copying in fsearchblock parameter list */
9081 if (IS_64BIT_PROCESS(p)) {
9082 error = copyin(uap->searchblock, (caddr_t) &searchblock, sizeof(searchblock));
9083 timelimit.tv_sec = searchblock.timelimit.tv_sec;
9084 timelimit.tv_usec = searchblock.timelimit.tv_usec;
9085 }
9086 else {
9087 struct user32_fssearchblock tmp_searchblock;
9088
9089 error = copyin(uap->searchblock, (caddr_t) &tmp_searchblock, sizeof(tmp_searchblock));
9090 // munge into 64-bit version
9091 searchblock.returnattrs = CAST_USER_ADDR_T(tmp_searchblock.returnattrs);
9092 searchblock.returnbuffer = CAST_USER_ADDR_T(tmp_searchblock.returnbuffer);
9093 searchblock.returnbuffersize = tmp_searchblock.returnbuffersize;
9094 searchblock.maxmatches = tmp_searchblock.maxmatches;
9095 /*
9096 * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary
9097 * from a 32 bit long, and tv_usec is already a signed 32 bit int.
9098 */
9099 timelimit.tv_sec = (__darwin_time_t) tmp_searchblock.timelimit.tv_sec;
9100 timelimit.tv_usec = (__darwin_useconds_t) tmp_searchblock.timelimit.tv_usec;
9101 searchblock.searchparams1 = CAST_USER_ADDR_T(tmp_searchblock.searchparams1);
9102 searchblock.sizeofsearchparams1 = tmp_searchblock.sizeofsearchparams1;
9103 searchblock.searchparams2 = CAST_USER_ADDR_T(tmp_searchblock.searchparams2);
9104 searchblock.sizeofsearchparams2 = tmp_searchblock.sizeofsearchparams2;
9105 searchblock.searchattrs = tmp_searchblock.searchattrs;
9106 }
9107 if (error)
9108 return(error);
9109
9110 /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
9111 */
9112 if (searchblock.sizeofsearchparams1 > SEARCHFS_MAX_SEARCHPARMS ||
9113 searchblock.sizeofsearchparams2 > SEARCHFS_MAX_SEARCHPARMS)
9114 return(EINVAL);
9115
9116 /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
9117 /* It all has to do into local memory and it's not that big so we might as well put it all together. */
9118 /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
9119 /* block. */
9120 /* */
9121 /* NOTE: we allocate an extra 8 bytes to account for the difference in size of the searchstate */
9122 /* due to the changes in rdar://problem/12438273. That way if a 3rd party file system */
9123 /* assumes the size is still 556 bytes it will continue to work */
9124
9125 mallocsize = searchblock.sizeofsearchparams1 + searchblock.sizeofsearchparams2 +
9126 sizeof(struct attrlist) + sizeof(struct searchstate) + (2*sizeof(uint32_t));
9127
9128 MALLOC(searchparams1, void *, mallocsize, M_TEMP, M_WAITOK);
9129
9130 /* Now set up the various pointers to the correct place in our newly allocated memory */
9131
9132 searchparams2 = (void *) (((caddr_t) searchparams1) + searchblock.sizeofsearchparams1);
9133 returnattrs = (struct attrlist *) (((caddr_t) searchparams2) + searchblock.sizeofsearchparams2);
9134 state = (struct searchstate *) (((caddr_t) returnattrs) + sizeof (struct attrlist));
9135
9136 /* Now copy in the stuff given our local variables. */
9137
9138 if ((error = copyin(searchblock.searchparams1, searchparams1, searchblock.sizeofsearchparams1)))
9139 goto freeandexit;
9140
9141 if ((error = copyin(searchblock.searchparams2, searchparams2, searchblock.sizeofsearchparams2)))
9142 goto freeandexit;
9143
9144 if ((error = copyin(searchblock.returnattrs, (caddr_t) returnattrs, sizeof(struct attrlist))))
9145 goto freeandexit;
9146
9147 if ((error = copyin(uap->state, (caddr_t) state, sizeof(struct searchstate))))
9148 goto freeandexit;
9149
9150 /*
9151 * When searching a union mount, need to set the
9152 * start flag at the first call on each layer to
9153 * reset state for the new volume.
9154 */
9155 if (uap->options & SRCHFS_START)
9156 state->ss_union_layer = 0;
9157 else
9158 uap->options |= state->ss_union_flags;
9159 state->ss_union_flags = 0;
9160
9161 /*
9162 * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter,
9163 * which is passed in with an attrreference_t, we need to inspect the buffer manually here.
9164 * The KPI does not provide us the ability to pass in the length of the buffers searchparams1
9165 * and searchparams2. To obviate the need for all searchfs-supporting filesystems to
9166 * validate the user-supplied data offset of the attrreference_t, we'll do it here.
9167 */
9168
9169 if (searchblock.searchattrs.commonattr & ATTR_CMN_NAME) {
9170 attrreference_t* string_ref;
9171 u_int32_t* start_length;
9172 user64_size_t param_length;
9173
9174 /* validate searchparams1 */
9175 param_length = searchblock.sizeofsearchparams1;
9176 /* skip the word that specifies length of the buffer */
9177 start_length= (u_int32_t*) searchparams1;
9178 start_length= start_length+1;
9179 string_ref= (attrreference_t*) start_length;
9180
9181 /* ensure no negative offsets or too big offsets */
9182 if (string_ref->attr_dataoffset < 0 ) {
9183 error = EINVAL;
9184 goto freeandexit;
9185 }
9186 if (string_ref->attr_length > MAXPATHLEN) {
9187 error = EINVAL;
9188 goto freeandexit;
9189 }
9190
9191 /* Check for pointer overflow in the string ref */
9192 if (((char*) string_ref + string_ref->attr_dataoffset) < (char*) string_ref) {
9193 error = EINVAL;
9194 goto freeandexit;
9195 }
9196
9197 if (((char*) string_ref + string_ref->attr_dataoffset) > ((char*)searchparams1 + param_length)) {
9198 error = EINVAL;
9199 goto freeandexit;
9200 }
9201 if (((char*)string_ref + string_ref->attr_dataoffset + string_ref->attr_length) > ((char*)searchparams1 + param_length)) {
9202 error = EINVAL;
9203 goto freeandexit;
9204 }
9205 }
9206
9207 /* set up the uio structure which will contain the users return buffer */
9208 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
9209 uio_addiov(auio, searchblock.returnbuffer, searchblock.returnbuffersize);
9210
9211 nameiflags = 0;
9212 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
9213 NDINIT(&nd, LOOKUP, OP_SEARCHFS, nameiflags | AUDITVNPATH1,
9214 UIO_USERSPACE, uap->path, ctx);
9215
9216 error = namei(&nd);
9217 if (error)
9218 goto freeandexit;
9219 vp = nd.ni_vp;
9220 nameidone(&nd);
9221
9222 /*
9223 * Switch to the root vnode for the volume
9224 */
9225 error = VFS_ROOT(vnode_mount(vp), &tvp, ctx);
9226 vnode_put(vp);
9227 if (error)
9228 goto freeandexit;
9229 vp = tvp;
9230
9231 /*
9232 * If it's a union mount, the path lookup takes
9233 * us to the top layer. But we may need to descend
9234 * to a lower layer. For non-union mounts the layer
9235 * is always zero.
9236 */
9237 for (i = 0; i < (int) state->ss_union_layer; i++) {
9238 if ((vp->v_mount->mnt_flag & MNT_UNION) == 0)
9239 break;
9240 tvp = vp;
9241 vp = vp->v_mount->mnt_vnodecovered;
9242 if (vp == NULL) {
9243 vnode_put(tvp);
9244 error = ENOENT;
9245 goto freeandexit;
9246 }
9247 error = vnode_getwithref(vp);
9248 vnode_put(tvp);
9249 if (error)
9250 goto freeandexit;
9251 }
9252
9253#if CONFIG_MACF
9254 error = mac_vnode_check_searchfs(ctx, vp, &searchblock.searchattrs);
9255 if (error) {
9256 vnode_put(vp);
9257 goto freeandexit;
9258 }
9259#endif
9260
9261
9262 /*
9263 * If searchblock.maxmatches == 0, then skip the search. This has happened
9264 * before and sometimes the underlying code doesnt deal with it well.
9265 */
9266 if (searchblock.maxmatches == 0) {
9267 nummatches = 0;
9268 goto saveandexit;
9269 }
9270
9271 /*
9272 * Allright, we have everything we need, so lets make that call.
9273 *
9274 * We keep special track of the return value from the file system:
9275 * EAGAIN is an acceptable error condition that shouldn't keep us
9276 * from copying out any results...
9277 */
9278
9279 fserror = VNOP_SEARCHFS(vp,
9280 searchparams1,
9281 searchparams2,
9282 &searchblock.searchattrs,
9283 (u_long)searchblock.maxmatches,
9284 &timelimit,
9285 returnattrs,
9286 &nummatches,
9287 (u_long)uap->scriptcode,
9288 (u_long)uap->options,
9289 auio,
9290 (struct searchstate *) &state->ss_fsstate,
9291 ctx);
9292
9293 /*
9294 * If it's a union mount we need to be called again
9295 * to search the mounted-on filesystem.
9296 */
9297 if ((vp->v_mount->mnt_flag & MNT_UNION) && fserror == 0) {
9298 state->ss_union_flags = SRCHFS_START;
9299 state->ss_union_layer++; // search next layer down
9300 fserror = EAGAIN;
9301 }
9302
9303saveandexit:
9304
9305 vnode_put(vp);
9306
9307 /* Now copy out the stuff that needs copying out. That means the number of matches, the
9308 search state. Everything was already put into he return buffer by the vop call. */
9309
9310 if ((error = copyout((caddr_t) state, uap->state, sizeof(struct searchstate))) != 0)
9311 goto freeandexit;
9312
9313 if ((error = suulong(uap->nummatches, (uint64_t)nummatches)) != 0)
9314 goto freeandexit;
9315
9316 error = fserror;
9317
9318freeandexit:
9319
9320 FREE(searchparams1,M_TEMP);
9321
9322 return(error);
9323
9324
9325} /* end of searchfs system call */
9326
9327#else /* CONFIG_SEARCHFS */
9328
9329int
9330searchfs(__unused proc_t p, __unused struct searchfs_args *uap, __unused int32_t *retval)
9331{
9332 return (ENOTSUP);
9333}
9334
9335#endif /* CONFIG_SEARCHFS */
9336
9337
9338lck_grp_attr_t * nspace_group_attr;
9339lck_attr_t * nspace_lock_attr;
9340lck_grp_t * nspace_mutex_group;
9341
9342lck_mtx_t nspace_handler_lock;
9343lck_mtx_t nspace_handler_exclusion_lock;
9344
9345time_t snapshot_timestamp=0;
9346int nspace_allow_virtual_devs=0;
9347
9348void nspace_handler_init(void);
9349
9350typedef struct nspace_item_info {
9351 struct vnode *vp;
9352 void *arg;
9353 uint64_t op;
9354 uint32_t vid;
9355 uint32_t flags;
9356 uint32_t token;
9357 uint32_t refcount;
9358} nspace_item_info;
9359
9360#define MAX_NSPACE_ITEMS 128
9361nspace_item_info nspace_items[MAX_NSPACE_ITEMS];
9362uint32_t nspace_item_idx=0; // also used as the sleep/wakeup rendezvous address
9363uint32_t nspace_token_id=0;
9364uint32_t nspace_handler_timeout = 15; // seconds
9365
9366#define NSPACE_ITEM_NEW 0x0001
9367#define NSPACE_ITEM_PROCESSING 0x0002
9368#define NSPACE_ITEM_DEAD 0x0004
9369#define NSPACE_ITEM_CANCELLED 0x0008
9370#define NSPACE_ITEM_DONE 0x0010
9371#define NSPACE_ITEM_RESET_TIMER 0x0020
9372
9373#define NSPACE_ITEM_NSPACE_EVENT 0x0040
9374#define NSPACE_ITEM_SNAPSHOT_EVENT 0x0080
9375
9376#define NSPACE_ITEM_ALL_EVENT_TYPES (NSPACE_ITEM_NSPACE_EVENT | NSPACE_ITEM_SNAPSHOT_EVENT)
9377
9378//#pragma optimization_level 0
9379
9380typedef enum {
9381 NSPACE_HANDLER_NSPACE = 0,
9382 NSPACE_HANDLER_SNAPSHOT = 1,
9383
9384 NSPACE_HANDLER_COUNT,
9385} nspace_type_t;
9386
9387typedef struct {
9388 uint64_t handler_tid;
9389 struct proc *handler_proc;
9390 int handler_busy;
9391} nspace_handler_t;
9392
9393nspace_handler_t nspace_handlers[NSPACE_HANDLER_COUNT];
9394
9395/* namespace fsctl functions */
9396static int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type);
9397static int nspace_item_flags_for_type(nspace_type_t nspace_type);
9398static int nspace_open_flags_for_type(nspace_type_t nspace_type);
9399static nspace_type_t nspace_type_for_op(uint64_t op);
9400static int nspace_is_special_process(struct proc *proc);
9401static int vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx);
9402static int wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type);
9403static int validate_namespace_args (int is64bit, int size);
9404static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data);
9405
9406
9407static inline int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type)
9408{
9409 switch(nspace_type) {
9410 case NSPACE_HANDLER_NSPACE:
9411 return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_NSPACE_EVENT;
9412 case NSPACE_HANDLER_SNAPSHOT:
9413 return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_SNAPSHOT_EVENT;
9414 default:
9415 printf("nspace_flags_matches_handler: invalid type %u\n", (int)nspace_type);
9416 return 0;
9417 }
9418}
9419
9420static inline int nspace_item_flags_for_type(nspace_type_t nspace_type)
9421{
9422 switch(nspace_type) {
9423 case NSPACE_HANDLER_NSPACE:
9424 return NSPACE_ITEM_NSPACE_EVENT;
9425 case NSPACE_HANDLER_SNAPSHOT:
9426 return NSPACE_ITEM_SNAPSHOT_EVENT;
9427 default:
9428 printf("nspace_item_flags_for_type: invalid type %u\n", (int)nspace_type);
9429 return 0;
9430 }
9431}
9432
9433static inline int nspace_open_flags_for_type(nspace_type_t nspace_type)
9434{
9435 switch(nspace_type) {
9436 case NSPACE_HANDLER_NSPACE:
9437 return FREAD | FWRITE | O_EVTONLY;
9438 case NSPACE_HANDLER_SNAPSHOT:
9439 return FREAD | O_EVTONLY;
9440 default:
9441 printf("nspace_open_flags_for_type: invalid type %u\n", (int)nspace_type);
9442 return 0;
9443 }
9444}
9445
9446static inline nspace_type_t nspace_type_for_op(uint64_t op)
9447{
9448 switch(op & NAMESPACE_HANDLER_EVENT_TYPE_MASK) {
9449 case NAMESPACE_HANDLER_NSPACE_EVENT:
9450 return NSPACE_HANDLER_NSPACE;
9451 case NAMESPACE_HANDLER_SNAPSHOT_EVENT:
9452 return NSPACE_HANDLER_SNAPSHOT;
9453 default:
9454 printf("nspace_type_for_op: invalid op mask %llx\n", op & NAMESPACE_HANDLER_EVENT_TYPE_MASK);
9455 return NSPACE_HANDLER_NSPACE;
9456 }
9457}
9458
9459static inline int nspace_is_special_process(struct proc *proc)
9460{
9461 int i;
9462 for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
9463 if (proc == nspace_handlers[i].handler_proc)
9464 return 1;
9465 }
9466 return 0;
9467}
9468
9469void
9470nspace_handler_init(void)
9471{
9472 nspace_lock_attr = lck_attr_alloc_init();
9473 nspace_group_attr = lck_grp_attr_alloc_init();
9474 nspace_mutex_group = lck_grp_alloc_init("nspace-mutex", nspace_group_attr);
9475 lck_mtx_init(&nspace_handler_lock, nspace_mutex_group, nspace_lock_attr);
9476 lck_mtx_init(&nspace_handler_exclusion_lock, nspace_mutex_group, nspace_lock_attr);
9477 memset(&nspace_items[0], 0, sizeof(nspace_items));
9478}
9479
9480void
9481nspace_proc_exit(struct proc *p)
9482{
9483 int i, event_mask = 0;
9484
9485 for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
9486 if (p == nspace_handlers[i].handler_proc) {
9487 event_mask |= nspace_item_flags_for_type(i);
9488 nspace_handlers[i].handler_tid = 0;
9489 nspace_handlers[i].handler_proc = NULL;
9490 }
9491 }
9492
9493 if (event_mask == 0) {
9494 return;
9495 }
9496
9497 lck_mtx_lock(&nspace_handler_lock);
9498 if (event_mask & NSPACE_ITEM_SNAPSHOT_EVENT) {
9499 // if this process was the snapshot handler, zero snapshot_timeout
9500 snapshot_timestamp = 0;
9501 }
9502
9503 //
9504 // unblock anyone that's waiting for the handler that died
9505 //
9506 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9507 if (nspace_items[i].flags & (NSPACE_ITEM_NEW | NSPACE_ITEM_PROCESSING)) {
9508
9509 if ( nspace_items[i].flags & event_mask ) {
9510
9511 if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
9512 vnode_lock_spin(nspace_items[i].vp);
9513 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
9514 vnode_unlock(nspace_items[i].vp);
9515 }
9516 nspace_items[i].vp = NULL;
9517 nspace_items[i].vid = 0;
9518 nspace_items[i].flags = NSPACE_ITEM_DONE;
9519 nspace_items[i].token = 0;
9520
9521 wakeup((caddr_t)&(nspace_items[i].vp));
9522 }
9523 }
9524 }
9525
9526 wakeup((caddr_t)&nspace_item_idx);
9527 lck_mtx_unlock(&nspace_handler_lock);
9528}
9529
9530
9531int
9532resolve_nspace_item(struct vnode *vp, uint64_t op)
9533{
9534 return resolve_nspace_item_ext(vp, op, NULL);
9535}
9536
9537int
9538resolve_nspace_item_ext(struct vnode *vp, uint64_t op, void *arg)
9539{
9540 int i, error, keep_waiting;
9541 struct timespec ts;
9542 nspace_type_t nspace_type = nspace_type_for_op(op);
9543
9544 // only allow namespace events on regular files, directories and symlinks.
9545 if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
9546 return 0;
9547 }
9548
9549 //
9550 // if this is a snapshot event and the vnode is on a
9551 // disk image just pretend nothing happened since any
9552 // change to the disk image will cause the disk image
9553 // itself to get backed up and this avoids multi-way
9554 // deadlocks between the snapshot handler and the ever
9555 // popular diskimages-helper process. the variable
9556 // nspace_allow_virtual_devs allows this behavior to
9557 // be overridden (for use by the Mobile TimeMachine
9558 // testing infrastructure which uses disk images)
9559 //
9560 if ( (op & NAMESPACE_HANDLER_SNAPSHOT_EVENT)
9561 && (vp->v_mount != NULL)
9562 && (vp->v_mount->mnt_kern_flag & MNTK_VIRTUALDEV)
9563 && !nspace_allow_virtual_devs) {
9564
9565 return 0;
9566 }
9567
9568 // if (thread_tid(current_thread()) == namespace_handler_tid) {
9569 if (nspace_handlers[nspace_type].handler_proc == NULL) {
9570 return 0;
9571 }
9572
9573 if (nspace_is_special_process(current_proc())) {
9574 return EDEADLK;
9575 }
9576
9577 lck_mtx_lock(&nspace_handler_lock);
9578
9579retry:
9580 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9581 if (vp == nspace_items[i].vp && op == nspace_items[i].op) {
9582 break;
9583 }
9584 }
9585
9586 if (i >= MAX_NSPACE_ITEMS) {
9587 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9588 if (nspace_items[i].flags == 0) {
9589 break;
9590 }
9591 }
9592 } else {
9593 nspace_items[i].refcount++;
9594 }
9595
9596 if (i >= MAX_NSPACE_ITEMS) {
9597 ts.tv_sec = nspace_handler_timeout;
9598 ts.tv_nsec = 0;
9599
9600 error = msleep((caddr_t)&nspace_token_id, &nspace_handler_lock, PVFS|PCATCH, "nspace-no-space", &ts);
9601 if (error == 0) {
9602 // an entry got free'd up, go see if we can get a slot
9603 goto retry;
9604 } else {
9605 lck_mtx_unlock(&nspace_handler_lock);
9606 return error;
9607 }
9608 }
9609
9610 //
9611 // if it didn't already exist, add it. if it did exist
9612 // we'll get woken up when someone does a wakeup() on
9613 // the slot in the nspace_items table.
9614 //
9615 if (vp != nspace_items[i].vp) {
9616 nspace_items[i].vp = vp;
9617 nspace_items[i].arg = (arg == NSPACE_REARM_NO_ARG) ? NULL : arg; // arg is {NULL, true, uio *} - only pass uio thru to the user
9618 nspace_items[i].op = op;
9619 nspace_items[i].vid = vnode_vid(vp);
9620 nspace_items[i].flags = NSPACE_ITEM_NEW;
9621 nspace_items[i].flags |= nspace_item_flags_for_type(nspace_type);
9622 if (nspace_items[i].flags & NSPACE_ITEM_SNAPSHOT_EVENT) {
9623 if (arg) {
9624 vnode_lock_spin(vp);
9625 vp->v_flag |= VNEEDSSNAPSHOT;
9626 vnode_unlock(vp);
9627 }
9628 }
9629
9630 nspace_items[i].token = 0;
9631 nspace_items[i].refcount = 1;
9632
9633 wakeup((caddr_t)&nspace_item_idx);
9634 }
9635
9636 //
9637 // Now go to sleep until the handler does a wakeup on this
9638 // slot in the nspace_items table (or we timeout).
9639 //
9640 keep_waiting = 1;
9641 while(keep_waiting) {
9642 ts.tv_sec = nspace_handler_timeout;
9643 ts.tv_nsec = 0;
9644 error = msleep((caddr_t)&(nspace_items[i].vp), &nspace_handler_lock, PVFS|PCATCH, "namespace-done", &ts);
9645
9646 if (nspace_items[i].flags & NSPACE_ITEM_DONE) {
9647 error = 0;
9648 } else if (nspace_items[i].flags & NSPACE_ITEM_CANCELLED) {
9649 error = nspace_items[i].token;
9650 } else if (error == EWOULDBLOCK || error == ETIMEDOUT) {
9651 if (nspace_items[i].flags & NSPACE_ITEM_RESET_TIMER) {
9652 nspace_items[i].flags &= ~NSPACE_ITEM_RESET_TIMER;
9653 continue;
9654 } else {
9655 error = ETIMEDOUT;
9656 }
9657 } else if (error == 0) {
9658 // hmmm, why did we get woken up?
9659 printf("woken up for token %d but it's not done, cancelled or timedout and error == 0.\n",
9660 nspace_items[i].token);
9661 }
9662
9663 if (--nspace_items[i].refcount == 0) {
9664 nspace_items[i].vp = NULL; // clear this so that no one will match on it again
9665 nspace_items[i].arg = NULL;
9666 nspace_items[i].token = 0; // clear this so that the handler will not find it anymore
9667 nspace_items[i].flags = 0; // this clears it for re-use
9668 }
9669 wakeup(&nspace_token_id);
9670 keep_waiting = 0;
9671 }
9672
9673 lck_mtx_unlock(&nspace_handler_lock);
9674
9675 return error;
9676}
9677
9678int nspace_snapshot_event(vnode_t vp, time_t ctime, uint64_t op_type, void *arg)
9679{
9680 int snapshot_error = 0;
9681
9682 if (vp == NULL) {
9683 return 0;
9684 }
9685
9686 /* Swap files are special; skip them */
9687 if (vnode_isswap(vp)) {
9688 return 0;
9689 }
9690
9691 if (ctime != 0 && snapshot_timestamp != 0 && (ctime <= snapshot_timestamp || vnode_needssnapshots(vp))) {
9692 // the change time is within this epoch
9693 int error;
9694
9695 error = resolve_nspace_item_ext(vp, op_type | NAMESPACE_HANDLER_SNAPSHOT_EVENT, arg);
9696 if (error == EDEADLK) {
9697 snapshot_error = 0;
9698 } else if (error) {
9699 if (error == EAGAIN) {
9700 printf("nspace_snapshot_event: timed out waiting for namespace handler...\n");
9701 } else if (error == EINTR) {
9702 // printf("nspace_snapshot_event: got a signal while waiting for namespace handler...\n");
9703 snapshot_error = EINTR;
9704 }
9705 }
9706 }
9707
9708 return snapshot_error;
9709}
9710
9711int
9712get_nspace_item_status(struct vnode *vp, int32_t *status)
9713{
9714 int i;
9715
9716 lck_mtx_lock(&nspace_handler_lock);
9717 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9718 if (nspace_items[i].vp == vp) {
9719 break;
9720 }
9721 }
9722
9723 if (i >= MAX_NSPACE_ITEMS) {
9724 lck_mtx_unlock(&nspace_handler_lock);
9725 return ENOENT;
9726 }
9727
9728 *status = nspace_items[i].flags;
9729 lck_mtx_unlock(&nspace_handler_lock);
9730 return 0;
9731}
9732
9733
9734#if 0
9735static int
9736build_volfs_path(struct vnode *vp, char *path, int *len)
9737{
9738 struct vnode_attr va;
9739 int ret;
9740
9741 VATTR_INIT(&va);
9742 VATTR_WANTED(&va, va_fsid);
9743 VATTR_WANTED(&va, va_fileid);
9744
9745 if (vnode_getattr(vp, &va, vfs_context_kernel()) != 0) {
9746 *len = snprintf(path, *len, "/non/existent/path/because/vnode_getattr/failed") + 1;
9747 ret = -1;
9748 } else {
9749 *len = snprintf(path, *len, "/.vol/%d/%lld", (dev_t)va.va_fsid, va.va_fileid) + 1;
9750 ret = 0;
9751 }
9752
9753 return ret;
9754}
9755#endif
9756
9757//
9758// Note: this function does NOT check permissions on all of the
9759// parent directories leading to this vnode. It should only be
9760// called on behalf of a root process. Otherwise a process may
9761// get access to a file because the file itself is readable even
9762// though its parent directories would prevent access.
9763//
9764static int
9765vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx)
9766{
9767 int error, action;
9768
9769 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
9770 return error;
9771 }
9772
9773#if CONFIG_MACF
9774 error = mac_vnode_check_open(ctx, vp, fmode);
9775 if (error)
9776 return error;
9777#endif
9778
9779 /* compute action to be authorized */
9780 action = 0;
9781 if (fmode & FREAD) {
9782 action |= KAUTH_VNODE_READ_DATA;
9783 }
9784 if (fmode & (FWRITE | O_TRUNC)) {
9785 /*
9786 * If we are writing, appending, and not truncating,
9787 * indicate that we are appending so that if the
9788 * UF_APPEND or SF_APPEND bits are set, we do not deny
9789 * the open.
9790 */
9791 if ((fmode & O_APPEND) && !(fmode & O_TRUNC)) {
9792 action |= KAUTH_VNODE_APPEND_DATA;
9793 } else {
9794 action |= KAUTH_VNODE_WRITE_DATA;
9795 }
9796 }
9797
9798 if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)
9799 return error;
9800
9801
9802 //
9803 // if the vnode is tagged VOPENEVT and the current process
9804 // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY
9805 // flag to the open mode so that this open won't count against
9806 // the vnode when carbon delete() does a vnode_isinuse() to see
9807 // if a file is currently in use. this allows spotlight
9808 // importers to not interfere with carbon apps that depend on
9809 // the no-delete-if-busy semantics of carbon delete().
9810 //
9811 if ((vp->v_flag & VOPENEVT) && (current_proc()->p_flag & P_CHECKOPENEVT)) {
9812 fmode |= O_EVTONLY;
9813 }
9814
9815 if ( (error = VNOP_OPEN(vp, fmode, ctx)) ) {
9816 return error;
9817 }
9818 if ( (error = vnode_ref_ext(vp, fmode, 0)) ) {
9819 VNOP_CLOSE(vp, fmode, ctx);
9820 return error;
9821 }
9822
9823 /* Call out to allow 3rd party notification of open.
9824 * Ignore result of kauth_authorize_fileop call.
9825 */
9826#if CONFIG_MACF
9827 mac_vnode_notify_open(ctx, vp, fmode);
9828#endif
9829 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN,
9830 (uintptr_t)vp, 0);
9831
9832
9833 return 0;
9834}
9835
9836static int
9837wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type)
9838{
9839 int i;
9840 int error = 0;
9841 int unblock = 0;
9842 task_t curtask;
9843
9844 lck_mtx_lock(&nspace_handler_exclusion_lock);
9845 if (nspace_handlers[nspace_type].handler_busy) {
9846 lck_mtx_unlock(&nspace_handler_exclusion_lock);
9847 return EBUSY;
9848 }
9849
9850 nspace_handlers[nspace_type].handler_busy = 1;
9851 lck_mtx_unlock(&nspace_handler_exclusion_lock);
9852
9853 /*
9854 * Any process that gets here will be one of the namespace handlers.
9855 * As such, they should be prevented from acquiring DMG vnodes during vnode reclamation
9856 * as we can cause deadlocks to occur, because the namespace handler may prevent
9857 * VNOP_INACTIVE from proceeding. Mark the current task as a P_DEPENDENCY_CAPABLE
9858 * process.
9859 */
9860 curtask = current_task();
9861 bsd_set_dependency_capable (curtask);
9862
9863 lck_mtx_lock(&nspace_handler_lock);
9864 if (nspace_handlers[nspace_type].handler_proc == NULL) {
9865 nspace_handlers[nspace_type].handler_tid = thread_tid(current_thread());
9866 nspace_handlers[nspace_type].handler_proc = current_proc();
9867 }
9868
9869 if (nspace_type == NSPACE_HANDLER_SNAPSHOT &&
9870 (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
9871 error = EINVAL;
9872 }
9873
9874 while (error == 0) {
9875
9876 /* Try to find matching namespace item */
9877 for (i = 0; i < MAX_NSPACE_ITEMS; i++) {
9878 if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
9879 if (nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
9880 break;
9881 }
9882 }
9883 }
9884
9885 if (i >= MAX_NSPACE_ITEMS) {
9886 /* Nothing is there yet. Wait for wake up and retry */
9887 error = msleep((caddr_t)&nspace_item_idx, &nspace_handler_lock, PVFS|PCATCH, "namespace-items", 0);
9888 if ((nspace_type == NSPACE_HANDLER_SNAPSHOT) && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
9889 /* Prevent infinite loop if snapshot handler exited */
9890 error = EINVAL;
9891 break;
9892 }
9893 continue;
9894 }
9895
9896 nspace_items[i].flags &= ~NSPACE_ITEM_NEW;
9897 nspace_items[i].flags |= NSPACE_ITEM_PROCESSING;
9898 nspace_items[i].token = ++nspace_token_id;
9899
9900 assert(nspace_items[i].vp);
9901 struct fileproc *fp;
9902 int32_t indx;
9903 int32_t fmode;
9904 struct proc *p = current_proc();
9905 vfs_context_t ctx = vfs_context_current();
9906 struct vnode_attr va;
9907 bool vn_get_succsessful = false;
9908 bool vn_open_successful = false;
9909 bool fp_alloc_successful = false;
9910
9911 /*
9912 * Use vnode pointer to acquire a file descriptor for
9913 * hand-off to userland
9914 */
9915 fmode = nspace_open_flags_for_type(nspace_type);
9916 error = vnode_getwithvid(nspace_items[i].vp, nspace_items[i].vid);
9917 if (error) goto cleanup;
9918 vn_get_succsessful = true;
9919
9920 error = vn_open_with_vp(nspace_items[i].vp, fmode, ctx);
9921 if (error) goto cleanup;
9922 vn_open_successful = true;
9923
9924 error = falloc(p, &fp, &indx, ctx);
9925 if (error) goto cleanup;
9926 fp_alloc_successful = true;
9927
9928 fp->f_fglob->fg_flag = fmode;
9929 fp->f_fglob->fg_ops = &vnops;
9930 fp->f_fglob->fg_data = (caddr_t)nspace_items[i].vp;
9931
9932 proc_fdlock(p);
9933 procfdtbl_releasefd(p, indx, NULL);
9934 fp_drop(p, indx, fp, 1);
9935 proc_fdunlock(p);
9936
9937 /*
9938 * All variants of the namespace handler struct support these three fields:
9939 * token, flags, and the FD pointer
9940 */
9941 error = copyout(&nspace_items[i].token, nhd->token, sizeof(uint32_t));
9942 if (error) goto cleanup;
9943 error = copyout(&nspace_items[i].op, nhd->flags, sizeof(uint64_t));
9944 if (error) goto cleanup;
9945 error = copyout(&indx, nhd->fdptr, sizeof(uint32_t));
9946 if (error) goto cleanup;
9947
9948 /*
9949 * Handle optional fields:
9950 * extended version support an info ptr (offset, length), and the
9951 *
9952 * namedata version supports a unique per-link object ID
9953 *
9954 */
9955 if (nhd->infoptr) {
9956 uio_t uio = (uio_t)nspace_items[i].arg;
9957 uint64_t u_offset, u_length;
9958
9959 if (uio) {
9960 u_offset = uio_offset(uio);
9961 u_length = uio_resid(uio);
9962 } else {
9963 u_offset = 0;
9964 u_length = 0;
9965 }
9966 error = copyout(&u_offset, nhd->infoptr, sizeof(uint64_t));
9967 if (error) goto cleanup;
9968 error = copyout(&u_length, nhd->infoptr + sizeof(uint64_t), sizeof(uint64_t));
9969 if (error) goto cleanup;
9970 }
9971
9972 if (nhd->objid) {
9973 VATTR_INIT(&va);
9974 VATTR_WANTED(&va, va_linkid);
9975 error = vnode_getattr(nspace_items[i].vp, &va, ctx);
9976 if (error) goto cleanup;
9977
9978 uint64_t linkid = 0;
9979 if (VATTR_IS_SUPPORTED (&va, va_linkid)) {
9980 linkid = (uint64_t)va.va_linkid;
9981 }
9982 error = copyout(&linkid, nhd->objid, sizeof(uint64_t));
9983 }
9984cleanup:
9985 if (error) {
9986 if (fp_alloc_successful) fp_free(p, indx, fp);
9987 if (vn_open_successful) vn_close(nspace_items[i].vp, fmode, ctx);
9988 unblock = 1;
9989 }
9990
9991 if (vn_get_succsessful) vnode_put(nspace_items[i].vp);
9992
9993 break;
9994 }
9995
9996 if (unblock) {
9997 if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
9998 vnode_lock_spin(nspace_items[i].vp);
9999 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
10000 vnode_unlock(nspace_items[i].vp);
10001 }
10002 nspace_items[i].vp = NULL;
10003 nspace_items[i].vid = 0;
10004 nspace_items[i].flags = NSPACE_ITEM_DONE;
10005 nspace_items[i].token = 0;
10006
10007 wakeup((caddr_t)&(nspace_items[i].vp));
10008 }
10009
10010 if (nspace_type == NSPACE_HANDLER_SNAPSHOT) {
10011 // just go through every snapshot event and unblock it immediately.
10012 if (error && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
10013 for(i = 0; i < MAX_NSPACE_ITEMS; i++) {
10014 if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
10015 if (nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
10016 nspace_items[i].vp = NULL;
10017 nspace_items[i].vid = 0;
10018 nspace_items[i].flags = NSPACE_ITEM_DONE;
10019 nspace_items[i].token = 0;
10020
10021 wakeup((caddr_t)&(nspace_items[i].vp));
10022 }
10023 }
10024 }
10025 }
10026 }
10027
10028 lck_mtx_unlock(&nspace_handler_lock);
10029
10030 lck_mtx_lock(&nspace_handler_exclusion_lock);
10031 nspace_handlers[nspace_type].handler_busy = 0;
10032 lck_mtx_unlock(&nspace_handler_exclusion_lock);
10033
10034 return error;
10035}
10036
10037static inline int validate_namespace_args (int is64bit, int size) {
10038
10039 if (is64bit) {
10040 /* Must be one of these */
10041 if (size == sizeof(user64_namespace_handler_info)) {
10042 goto sizeok;
10043 }
10044 if (size == sizeof(user64_namespace_handler_info_ext)) {
10045 goto sizeok;
10046 }
10047 if (size == sizeof(user64_namespace_handler_data)) {
10048 goto sizeok;
10049 }
10050 return EINVAL;
10051 }
10052 else {
10053 /* 32 bit -- must be one of these */
10054 if (size == sizeof(user32_namespace_handler_info)) {
10055 goto sizeok;
10056 }
10057 if (size == sizeof(user32_namespace_handler_info_ext)) {
10058 goto sizeok;
10059 }
10060 if (size == sizeof(user32_namespace_handler_data)) {
10061 goto sizeok;
10062 }
10063 return EINVAL;
10064 }
10065
10066sizeok:
10067
10068 return 0;
10069
10070}
10071
10072static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data)
10073{
10074 int error = 0;
10075 namespace_handler_data nhd;
10076
10077 bzero (&nhd, sizeof(namespace_handler_data));
10078
10079 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
10080 return error;
10081 }
10082
10083 error = validate_namespace_args (is64bit, size);
10084 if (error) {
10085 return error;
10086 }
10087
10088 /* Copy in the userland pointers into our kernel-only struct */
10089
10090 if (is64bit) {
10091 /* 64 bit userland structures */
10092 nhd.token = (user_addr_t)((user64_namespace_handler_info *)data)->token;
10093 nhd.flags = (user_addr_t)((user64_namespace_handler_info *)data)->flags;
10094 nhd.fdptr = (user_addr_t)((user64_namespace_handler_info *)data)->fdptr;
10095
10096 /* If the size is greater than the standard info struct, add in extra fields */
10097 if (size > (sizeof(user64_namespace_handler_info))) {
10098 if (size >= (sizeof(user64_namespace_handler_info_ext))) {
10099 nhd.infoptr = (user_addr_t)((user64_namespace_handler_info_ext *)data)->infoptr;
10100 }
10101 if (size == (sizeof(user64_namespace_handler_data))) {
10102 nhd.objid = (user_addr_t)((user64_namespace_handler_data*)data)->objid;
10103 }
10104 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
10105 }
10106 }
10107 else {
10108 /* 32 bit userland structures */
10109 nhd.token = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->token);
10110 nhd.flags = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->flags);
10111 nhd.fdptr = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->fdptr);
10112
10113 if (size > (sizeof(user32_namespace_handler_info))) {
10114 if (size >= (sizeof(user32_namespace_handler_info_ext))) {
10115 nhd.infoptr = CAST_USER_ADDR_T(((user32_namespace_handler_info_ext *)data)->infoptr);
10116 }
10117 if (size == (sizeof(user32_namespace_handler_data))) {
10118 nhd.objid = (user_addr_t)((user32_namespace_handler_data*)data)->objid;
10119 }
10120 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
10121 }
10122 }
10123
10124 return wait_for_namespace_event(&nhd, nspace_type);
10125}
10126
10127static unsigned long
10128fsctl_bogus_command_compat(unsigned long cmd)
10129{
10130
10131 switch (cmd) {
10132 case IOCBASECMD(FSIOC_SYNC_VOLUME):
10133 return (FSIOC_SYNC_VOLUME);
10134 case IOCBASECMD(FSIOC_ROUTEFS_SETROUTEID):
10135 return (FSIOC_ROUTEFS_SETROUTEID);
10136 case IOCBASECMD(FSIOC_SET_PACKAGE_EXTS):
10137 return (FSIOC_SET_PACKAGE_EXTS);
10138 case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_GET):
10139 return (FSIOC_NAMESPACE_HANDLER_GET);
10140 case IOCBASECMD(FSIOC_OLD_SNAPSHOT_HANDLER_GET):
10141 return (FSIOC_OLD_SNAPSHOT_HANDLER_GET);
10142 case IOCBASECMD(FSIOC_SNAPSHOT_HANDLER_GET_EXT):
10143 return (FSIOC_SNAPSHOT_HANDLER_GET_EXT);
10144 case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_UPDATE):
10145 return (FSIOC_NAMESPACE_HANDLER_UPDATE);
10146 case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_UNBLOCK):
10147 return (FSIOC_NAMESPACE_HANDLER_UNBLOCK);
10148 case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_CANCEL):
10149 return (FSIOC_NAMESPACE_HANDLER_CANCEL);
10150 case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME):
10151 return (FSIOC_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME);
10152 case IOCBASECMD(FSIOC_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS):
10153 return (FSIOC_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS);
10154 case IOCBASECMD(FSIOC_SET_FSTYPENAME_OVERRIDE):
10155 return (FSIOC_SET_FSTYPENAME_OVERRIDE);
10156 case IOCBASECMD(DISK_CONDITIONER_IOC_GET):
10157 return (DISK_CONDITIONER_IOC_GET);
10158 case IOCBASECMD(DISK_CONDITIONER_IOC_SET):
10159 return (DISK_CONDITIONER_IOC_SET);
10160 case IOCBASECMD(FSIOC_FIOSEEKHOLE):
10161 return (FSIOC_FIOSEEKHOLE);
10162 case IOCBASECMD(FSIOC_FIOSEEKDATA):
10163 return (FSIOC_FIOSEEKDATA);
10164 case IOCBASECMD(SPOTLIGHT_IOC_GET_MOUNT_TIME):
10165 return (SPOTLIGHT_IOC_GET_MOUNT_TIME);
10166 case IOCBASECMD(SPOTLIGHT_IOC_GET_LAST_MTIME):
10167 return (SPOTLIGHT_IOC_GET_LAST_MTIME);
10168 }
10169
10170 return (cmd);
10171}
10172
10173/*
10174 * Make a filesystem-specific control call:
10175 */
10176/* ARGSUSED */
10177static int
10178fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long options, vfs_context_t ctx)
10179{
10180 int error=0;
10181 boolean_t is64bit;
10182 u_int size;
10183#define STK_PARAMS 128
10184 char stkbuf[STK_PARAMS] = {0};
10185 caddr_t data, memp;
10186 vnode_t vp = *arg_vp;
10187
10188 cmd = fsctl_bogus_command_compat(cmd);
10189
10190 size = IOCPARM_LEN(cmd);
10191 if (size > IOCPARM_MAX) return (EINVAL);
10192
10193 is64bit = proc_is64bit(p);
10194
10195 memp = NULL;
10196
10197 if (size > sizeof (stkbuf)) {
10198 if ((memp = (caddr_t)kalloc(size)) == 0) return ENOMEM;
10199 data = memp;
10200 } else {
10201 data = &stkbuf[0];
10202 };
10203
10204 if (cmd & IOC_IN) {
10205 if (size) {
10206 error = copyin(udata, data, size);
10207 if (error) {
10208 if (memp) {
10209 kfree (memp, size);
10210 }
10211 return error;
10212 }
10213 } else {
10214 if (is64bit) {
10215 *(user_addr_t *)data = udata;
10216 }
10217 else {
10218 *(uint32_t *)data = (uint32_t)udata;
10219 }
10220 };
10221 } else if ((cmd & IOC_OUT) && size) {
10222 /*
10223 * Zero the buffer so the user always
10224 * gets back something deterministic.
10225 */
10226 bzero(data, size);
10227 } else if (cmd & IOC_VOID) {
10228 if (is64bit) {
10229 *(user_addr_t *)data = udata;
10230 }
10231 else {
10232 *(uint32_t *)data = (uint32_t)udata;
10233 }
10234 }
10235
10236 /* Check to see if it's a generic command */
10237 switch (cmd) {
10238
10239 case FSIOC_SYNC_VOLUME: {
10240 mount_t mp = vp->v_mount;
10241 int arg = *(uint32_t*)data;
10242
10243 /* record vid of vp so we can drop it below. */
10244 uint32_t vvid = vp->v_id;
10245
10246 /*
10247 * Then grab mount_iterref so that we can release the vnode.
10248 * Without this, a thread may call vnode_iterate_prepare then
10249 * get into a deadlock because we've never released the root vp
10250 */
10251 error = mount_iterref (mp, 0);
10252 if (error) {
10253 break;
10254 }
10255 vnode_put(vp);
10256
10257 /* issue the sync for this volume */
10258 (void)sync_callback(mp, (arg & FSCTL_SYNC_WAIT) ? &arg : NULL);
10259
10260 /*
10261 * Then release the mount_iterref once we're done syncing; it's not
10262 * needed for the VNOP_IOCTL below
10263 */
10264 mount_iterdrop(mp);
10265
10266 if (arg & FSCTL_SYNC_FULLSYNC) {
10267 /* re-obtain vnode iocount on the root vp, if possible */
10268 error = vnode_getwithvid (vp, vvid);
10269 if (error == 0) {
10270 error = VNOP_IOCTL(vp, F_FULLFSYNC, (caddr_t)NULL, 0, ctx);
10271 vnode_put (vp);
10272 }
10273 }
10274 /* mark the argument VP as having been released */
10275 *arg_vp = NULL;
10276 }
10277 break;
10278
10279 case FSIOC_ROUTEFS_SETROUTEID: {
10280#if ROUTEFS
10281 char routepath[MAXPATHLEN];
10282 size_t len = 0;
10283
10284 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
10285 break;
10286 }
10287 bzero(routepath, MAXPATHLEN);
10288 error = copyinstr(udata, &routepath[0], MAXPATHLEN, &len);
10289 if (error) {
10290 break;
10291 }
10292 error = routefs_kernel_mount(routepath);
10293 if (error) {
10294 break;
10295 }
10296#endif
10297 }
10298 break;
10299
10300 case FSIOC_SET_PACKAGE_EXTS: {
10301 user_addr_t ext_strings;
10302 uint32_t num_entries;
10303 uint32_t max_width;
10304
10305 if ((error = priv_check_cred(kauth_cred_get(), PRIV_PACKAGE_EXTENSIONS, 0)))
10306 break;
10307
10308 if ( (is64bit && size != sizeof(user64_package_ext_info))
10309 || (is64bit == 0 && size != sizeof(user32_package_ext_info))) {
10310
10311 // either you're 64-bit and passed a 64-bit struct or
10312 // you're 32-bit and passed a 32-bit struct. otherwise
10313 // it's not ok.
10314 error = EINVAL;
10315 break;
10316 }
10317
10318 if (is64bit) {
10319 ext_strings = ((user64_package_ext_info *)data)->strings;
10320 num_entries = ((user64_package_ext_info *)data)->num_entries;
10321 max_width = ((user64_package_ext_info *)data)->max_width;
10322 } else {
10323 ext_strings = CAST_USER_ADDR_T(((user32_package_ext_info *)data)->strings);
10324 num_entries = ((user32_package_ext_info *)data)->num_entries;
10325 max_width = ((user32_package_ext_info *)data)->max_width;
10326 }
10327 error = set_package_extensions_table(ext_strings, num_entries, max_width);
10328 }
10329 break;
10330
10331 /* namespace handlers */
10332 case FSIOC_NAMESPACE_HANDLER_GET: {
10333 error = process_namespace_fsctl(NSPACE_HANDLER_NSPACE, is64bit, size, data);
10334 }
10335 break;
10336
10337 /* Snapshot handlers */
10338 case FSIOC_OLD_SNAPSHOT_HANDLER_GET: {
10339 error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
10340 }
10341 break;
10342
10343 case FSIOC_SNAPSHOT_HANDLER_GET_EXT: {
10344 error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
10345 }
10346 break;
10347
10348 case FSIOC_NAMESPACE_HANDLER_UPDATE: {
10349 uint32_t token, val;
10350 int i;
10351
10352 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
10353 break;
10354 }
10355
10356 if (!nspace_is_special_process(p)) {
10357 error = EINVAL;
10358 break;
10359 }
10360
10361 token = ((uint32_t *)data)[0];
10362 val = ((uint32_t *)data)[1];
10363
10364 lck_mtx_lock(&nspace_handler_lock);
10365
10366 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
10367 if (nspace_items[i].token == token) {
10368 break; /* exit for loop, not case stmt */
10369 }
10370 }
10371
10372 if (i >= MAX_NSPACE_ITEMS) {
10373 error = ENOENT;
10374 } else {
10375 //
10376 // if this bit is set, when resolve_nspace_item() times out
10377 // it will loop and go back to sleep.
10378 //
10379 nspace_items[i].flags |= NSPACE_ITEM_RESET_TIMER;
10380 }
10381
10382 lck_mtx_unlock(&nspace_handler_lock);
10383
10384 if (error) {
10385 printf("nspace-handler-update: did not find token %u\n", token);
10386 }
10387 }
10388 break;
10389
10390 case FSIOC_NAMESPACE_HANDLER_UNBLOCK: {
10391 uint32_t token, val;
10392 int i;
10393
10394 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
10395 break;
10396 }
10397
10398 if (!nspace_is_special_process(p)) {
10399 error = EINVAL;
10400 break;
10401 }
10402
10403 token = ((uint32_t *)data)[0];
10404 val = ((uint32_t *)data)[1];
10405
10406 lck_mtx_lock(&nspace_handler_lock);
10407
10408 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
10409 if (nspace_items[i].token == token) {
10410 break; /* exit for loop, not case statement */
10411 }
10412 }
10413
10414 if (i >= MAX_NSPACE_ITEMS) {
10415 printf("nspace-handler-unblock: did not find token %u\n", token);
10416 error = ENOENT;
10417 } else {
10418 if (val == 0 && nspace_items[i].vp) {
10419 vnode_lock_spin(nspace_items[i].vp);
10420 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
10421 vnode_unlock(nspace_items[i].vp);
10422 }
10423
10424 nspace_items[i].vp = NULL;
10425 nspace_items[i].arg = NULL;
10426 nspace_items[i].op = 0;
10427 nspace_items[i].vid = 0;
10428 nspace_items[i].flags = NSPACE_ITEM_DONE;
10429 nspace_items[i].token = 0;
10430
10431 wakeup((caddr_t)&(nspace_items[i].vp));
10432 }
10433
10434 lck_mtx_unlock(&nspace_handler_lock);
10435 }
10436 break;
10437
10438 case FSIOC_NAMESPACE_HANDLER_CANCEL: {
10439 uint32_t token, val;
10440 int i;
10441
10442 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
10443 break;
10444 }
10445
10446 if (!nspace_is_special_process(p)) {
10447 error = EINVAL;
10448 break;
10449 }
10450
10451 token = ((uint32_t *)data)[0];
10452 val = ((uint32_t *)data)[1];
10453
10454 lck_mtx_lock(&nspace_handler_lock);
10455
10456 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
10457 if (nspace_items[i].token == token) {
10458 break; /* exit for loop, not case stmt */
10459 }
10460 }
10461
10462 if (i >= MAX_NSPACE_ITEMS) {
10463 printf("nspace-handler-cancel: did not find token %u\n", token);
10464 error = ENOENT;
10465 } else {
10466 if (nspace_items[i].vp) {
10467 vnode_lock_spin(nspace_items[i].vp);
10468 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
10469 vnode_unlock(nspace_items[i].vp);
10470 }
10471
10472 nspace_items[i].vp = NULL;
10473 nspace_items[i].arg = NULL;
10474 nspace_items[i].vid = 0;
10475 nspace_items[i].token = val;
10476 nspace_items[i].flags &= ~NSPACE_ITEM_PROCESSING;
10477 nspace_items[i].flags |= NSPACE_ITEM_CANCELLED;
10478
10479 wakeup((caddr_t)&(nspace_items[i].vp));
10480 }
10481
10482 lck_mtx_unlock(&nspace_handler_lock);
10483 }
10484 break;
10485
10486 case FSIOC_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME: {
10487 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
10488 break;
10489 }
10490
10491 // we explicitly do not do the namespace_handler_proc check here
10492
10493 lck_mtx_lock(&nspace_handler_lock);
10494 snapshot_timestamp = ((uint32_t *)data)[0];
10495 wakeup(&nspace_item_idx);
10496 lck_mtx_unlock(&nspace_handler_lock);
10497 printf("nspace-handler-set-snapshot-time: %d\n", (int)snapshot_timestamp);
10498
10499 }
10500 break;
10501
10502 case FSIOC_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS:
10503 {
10504 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
10505 break;
10506 }
10507
10508 lck_mtx_lock(&nspace_handler_lock);
10509 nspace_allow_virtual_devs = ((uint32_t *)data)[0];
10510 lck_mtx_unlock(&nspace_handler_lock);
10511 printf("nspace-snapshot-handler will%s allow events on disk-images\n",
10512 nspace_allow_virtual_devs ? "" : " NOT");
10513 error = 0;
10514
10515 }
10516 break;
10517
10518 case FSIOC_SET_FSTYPENAME_OVERRIDE:
10519 {
10520 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
10521 break;
10522 }
10523 if (vp->v_mount) {
10524 mount_lock(vp->v_mount);
10525 if (data[0] != 0) {
10526 strlcpy(&vp->v_mount->fstypename_override[0], data, MFSTYPENAMELEN);
10527 vp->v_mount->mnt_kern_flag |= MNTK_TYPENAME_OVERRIDE;
10528 if (vfs_isrdonly(vp->v_mount) && strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
10529 vp->v_mount->mnt_kern_flag |= MNTK_EXTENDED_SECURITY;
10530 vp->v_mount->mnt_kern_flag &= ~MNTK_AUTH_OPAQUE;
10531 }
10532 } else {
10533 if (strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
10534 vp->v_mount->mnt_kern_flag &= ~MNTK_EXTENDED_SECURITY;
10535 }
10536 vp->v_mount->mnt_kern_flag &= ~MNTK_TYPENAME_OVERRIDE;
10537 vp->v_mount->fstypename_override[0] = '\0';
10538 }
10539 mount_unlock(vp->v_mount);
10540 }
10541 }
10542 break;
10543
10544 case DISK_CONDITIONER_IOC_GET: {
10545 error = disk_conditioner_get_info(vp->v_mount, (disk_conditioner_info *)data);
10546 }
10547 break;
10548
10549 case DISK_CONDITIONER_IOC_SET: {
10550 error = disk_conditioner_set_info(vp->v_mount, (disk_conditioner_info *)data);
10551 }
10552 break;
10553
10554 default: {
10555 /* other, known commands shouldn't be passed down here */
10556 switch (cmd) {
10557 case F_PUNCHHOLE:
10558 case F_TRIM_ACTIVE_FILE:
10559 case F_RDADVISE:
10560 case F_TRANSCODEKEY:
10561 case F_GETPROTECTIONLEVEL:
10562 case F_GETDEFAULTPROTLEVEL:
10563 case F_MAKECOMPRESSED:
10564 case F_SET_GREEDY_MODE:
10565 case F_SETSTATICCONTENT:
10566 case F_SETIOTYPE:
10567 case F_SETBACKINGSTORE:
10568 case F_GETPATH_MTMINFO:
10569 case APFSIOC_REVERT_TO_SNAPSHOT:
10570 case FSIOC_FIOSEEKHOLE:
10571 case FSIOC_FIOSEEKDATA:
10572 case HFS_GET_BOOT_INFO:
10573 case HFS_SET_BOOT_INFO:
10574 case FIOPINSWAP:
10575 case F_CHKCLEAN:
10576 case F_FULLFSYNC:
10577 case F_BARRIERFSYNC:
10578 case F_FREEZE_FS:
10579 case F_THAW_FS:
10580 error = EINVAL;
10581 goto outdrop;
10582 }
10583 /* Invoke the filesystem-specific code */
10584 error = VNOP_IOCTL(vp, cmd, data, options, ctx);
10585 }
10586
10587 } /* end switch stmt */
10588
10589 /*
10590 * if no errors, copy any data to user. Size was
10591 * already set and checked above.
10592 */
10593 if (error == 0 && (cmd & IOC_OUT) && size)
10594 error = copyout(data, udata, size);
10595
10596outdrop:
10597 if (memp) {
10598 kfree(memp, size);
10599 }
10600
10601 return error;
10602}
10603
10604/* ARGSUSED */
10605int
10606fsctl (proc_t p, struct fsctl_args *uap, __unused int32_t *retval)
10607{
10608 int error;
10609 struct nameidata nd;
10610 u_long nameiflags;
10611 vnode_t vp = NULL;
10612 vfs_context_t ctx = vfs_context_current();
10613
10614 AUDIT_ARG(cmd, uap->cmd);
10615 AUDIT_ARG(value32, uap->options);
10616 /* Get the vnode for the file we are getting info on: */
10617 nameiflags = 0;
10618 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
10619 NDINIT(&nd, LOOKUP, OP_FSCTL, nameiflags | AUDITVNPATH1,
10620 UIO_USERSPACE, uap->path, ctx);
10621 if ((error = namei(&nd))) goto done;
10622 vp = nd.ni_vp;
10623 nameidone(&nd);
10624
10625#if CONFIG_MACF
10626 error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd);
10627 if (error) {
10628 goto done;
10629 }
10630#endif
10631
10632 error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
10633
10634done:
10635 if (vp)
10636 vnode_put(vp);
10637 return error;
10638}
10639/* ARGSUSED */
10640int
10641ffsctl (proc_t p, struct ffsctl_args *uap, __unused int32_t *retval)
10642{
10643 int error;
10644 vnode_t vp = NULL;
10645 vfs_context_t ctx = vfs_context_current();
10646 int fd = -1;
10647
10648 AUDIT_ARG(fd, uap->fd);
10649 AUDIT_ARG(cmd, uap->cmd);
10650 AUDIT_ARG(value32, uap->options);
10651
10652 /* Get the vnode for the file we are getting info on: */
10653 if ((error = file_vnode(uap->fd, &vp)))
10654 return error;
10655 fd = uap->fd;
10656 if ((error = vnode_getwithref(vp))) {
10657 file_drop(fd);
10658 return error;
10659 }
10660
10661#if CONFIG_MACF
10662 if ((error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd))) {
10663 file_drop(fd);
10664 vnode_put(vp);
10665 return error;
10666 }
10667#endif
10668
10669 error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
10670
10671 file_drop(fd);
10672
10673 /*validate vp; fsctl_internal() can drop iocount and reset vp to NULL*/
10674 if (vp) {
10675 vnode_put(vp);
10676 }
10677
10678 return error;
10679}
10680/* end of fsctl system call */
10681
10682/*
10683 * Retrieve the data of an extended attribute.
10684 */
10685int
10686getxattr(proc_t p, struct getxattr_args *uap, user_ssize_t *retval)
10687{
10688 vnode_t vp;
10689 struct nameidata nd;
10690 char attrname[XATTR_MAXNAMELEN+1];
10691 vfs_context_t ctx = vfs_context_current();
10692 uio_t auio = NULL;
10693 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10694 size_t attrsize = 0;
10695 size_t namelen;
10696 u_int32_t nameiflags;
10697 int error;
10698 char uio_buf[ UIO_SIZEOF(1) ];
10699
10700 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
10701 return (EINVAL);
10702
10703 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
10704 NDINIT(&nd, LOOKUP, OP_GETXATTR, nameiflags, spacetype, uap->path, ctx);
10705 if ((error = namei(&nd))) {
10706 return (error);
10707 }
10708 vp = nd.ni_vp;
10709 nameidone(&nd);
10710
10711 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
10712 if (error != 0) {
10713 goto out;
10714 }
10715 if (xattr_protected(attrname)) {
10716 if (!vfs_context_issuser(ctx) || strcmp(attrname, "com.apple.system.Security") != 0) {
10717 error = EPERM;
10718 goto out;
10719 }
10720 }
10721 /*
10722 * the specific check for 0xffffffff is a hack to preserve
10723 * binaray compatibilty in K64 with applications that discovered
10724 * that passing in a buf pointer and a size of -1 resulted in
10725 * just the size of the indicated extended attribute being returned.
10726 * this isn't part of the documented behavior, but because of the
10727 * original implemtation's check for "uap->size > 0", this behavior
10728 * was allowed. In K32 that check turned into a signed comparison
10729 * even though uap->size is unsigned... in K64, we blow by that
10730 * check because uap->size is unsigned and doesn't get sign smeared
10731 * in the munger for a 32 bit user app. we also need to add a
10732 * check to limit the maximum size of the buffer being passed in...
10733 * unfortunately, the underlying fileystems seem to just malloc
10734 * the requested size even if the actual extended attribute is tiny.
10735 * because that malloc is for kernel wired memory, we have to put a
10736 * sane limit on it.
10737 *
10738 * U32 running on K64 will yield 0x00000000ffffffff for uap->size
10739 * U64 running on K64 will yield -1 (64 bits wide)
10740 * U32/U64 running on K32 will yield -1 (32 bits wide)
10741 */
10742 if (uap->size == 0xffffffff || uap->size == (size_t)-1)
10743 goto no_uio;
10744
10745 if (uap->value) {
10746 if (uap->size > (size_t)XATTR_MAXSIZE)
10747 uap->size = XATTR_MAXSIZE;
10748
10749 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
10750 &uio_buf[0], sizeof(uio_buf));
10751 uio_addiov(auio, uap->value, uap->size);
10752 }
10753no_uio:
10754 error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, ctx);
10755out:
10756 vnode_put(vp);
10757
10758 if (auio) {
10759 *retval = uap->size - uio_resid(auio);
10760 } else {
10761 *retval = (user_ssize_t)attrsize;
10762 }
10763
10764 return (error);
10765}
10766
10767/*
10768 * Retrieve the data of an extended attribute.
10769 */
10770int
10771fgetxattr(proc_t p, struct fgetxattr_args *uap, user_ssize_t *retval)
10772{
10773 vnode_t vp;
10774 char attrname[XATTR_MAXNAMELEN+1];
10775 uio_t auio = NULL;
10776 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10777 size_t attrsize = 0;
10778 size_t namelen;
10779 int error;
10780 char uio_buf[ UIO_SIZEOF(1) ];
10781
10782 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
10783 return (EINVAL);
10784
10785 if ( (error = file_vnode(uap->fd, &vp)) ) {
10786 return (error);
10787 }
10788 if ( (error = vnode_getwithref(vp)) ) {
10789 file_drop(uap->fd);
10790 return(error);
10791 }
10792 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
10793 if (error != 0) {
10794 goto out;
10795 }
10796 if (xattr_protected(attrname)) {
10797 error = EPERM;
10798 goto out;
10799 }
10800 if (uap->value && uap->size > 0) {
10801 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
10802 &uio_buf[0], sizeof(uio_buf));
10803 uio_addiov(auio, uap->value, uap->size);
10804 }
10805
10806 error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, vfs_context_current());
10807out:
10808 (void)vnode_put(vp);
10809 file_drop(uap->fd);
10810
10811 if (auio) {
10812 *retval = uap->size - uio_resid(auio);
10813 } else {
10814 *retval = (user_ssize_t)attrsize;
10815 }
10816 return (error);
10817}
10818
10819/*
10820 * Set the data of an extended attribute.
10821 */
10822int
10823setxattr(proc_t p, struct setxattr_args *uap, int *retval)
10824{
10825 vnode_t vp;
10826 struct nameidata nd;
10827 char attrname[XATTR_MAXNAMELEN+1];
10828 vfs_context_t ctx = vfs_context_current();
10829 uio_t auio = NULL;
10830 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10831 size_t namelen;
10832 u_int32_t nameiflags;
10833 int error;
10834 char uio_buf[ UIO_SIZEOF(1) ];
10835
10836 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
10837 return (EINVAL);
10838
10839 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
10840 if (error != 0) {
10841 if (error == EPERM) {
10842 /* if the string won't fit in attrname, copyinstr emits EPERM */
10843 return (ENAMETOOLONG);
10844 }
10845 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
10846 return error;
10847 }
10848 if (xattr_protected(attrname))
10849 return(EPERM);
10850 if (uap->size != 0 && uap->value == 0) {
10851 return (EINVAL);
10852 }
10853
10854 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
10855 NDINIT(&nd, LOOKUP, OP_SETXATTR, nameiflags, spacetype, uap->path, ctx);
10856 if ((error = namei(&nd))) {
10857 return (error);
10858 }
10859 vp = nd.ni_vp;
10860 nameidone(&nd);
10861
10862 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
10863 &uio_buf[0], sizeof(uio_buf));
10864 uio_addiov(auio, uap->value, uap->size);
10865
10866 error = vn_setxattr(vp, attrname, auio, uap->options, ctx);
10867#if CONFIG_FSE
10868 if (error == 0) {
10869 add_fsevent(FSE_XATTR_MODIFIED, ctx,
10870 FSE_ARG_VNODE, vp,
10871 FSE_ARG_DONE);
10872 }
10873#endif
10874 vnode_put(vp);
10875 *retval = 0;
10876 return (error);
10877}
10878
10879/*
10880 * Set the data of an extended attribute.
10881 */
10882int
10883fsetxattr(proc_t p, struct fsetxattr_args *uap, int *retval)
10884{
10885 vnode_t vp;
10886 char attrname[XATTR_MAXNAMELEN+1];
10887 uio_t auio = NULL;
10888 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10889 size_t namelen;
10890 int error;
10891 char uio_buf[ UIO_SIZEOF(1) ];
10892#if CONFIG_FSE
10893 vfs_context_t ctx = vfs_context_current();
10894#endif
10895
10896 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
10897 return (EINVAL);
10898
10899 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
10900 if (error != 0) {
10901 if (error == EPERM) {
10902 /* if the string won't fit in attrname, copyinstr emits EPERM */
10903 return (ENAMETOOLONG);
10904 }
10905 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
10906 return error;
10907 }
10908 if (xattr_protected(attrname))
10909 return(EPERM);
10910 if (uap->size != 0 && uap->value == 0) {
10911 return (EINVAL);
10912 }
10913 if ( (error = file_vnode(uap->fd, &vp)) ) {
10914 return (error);
10915 }
10916 if ( (error = vnode_getwithref(vp)) ) {
10917 file_drop(uap->fd);
10918 return(error);
10919 }
10920 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
10921 &uio_buf[0], sizeof(uio_buf));
10922 uio_addiov(auio, uap->value, uap->size);
10923
10924 error = vn_setxattr(vp, attrname, auio, uap->options, vfs_context_current());
10925#if CONFIG_FSE
10926 if (error == 0) {
10927 add_fsevent(FSE_XATTR_MODIFIED, ctx,
10928 FSE_ARG_VNODE, vp,
10929 FSE_ARG_DONE);
10930 }
10931#endif
10932 vnode_put(vp);
10933 file_drop(uap->fd);
10934 *retval = 0;
10935 return (error);
10936}
10937
10938/*
10939 * Remove an extended attribute.
10940 * XXX Code duplication here.
10941 */
10942int
10943removexattr(proc_t p, struct removexattr_args *uap, int *retval)
10944{
10945 vnode_t vp;
10946 struct nameidata nd;
10947 char attrname[XATTR_MAXNAMELEN+1];
10948 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10949 vfs_context_t ctx = vfs_context_current();
10950 size_t namelen;
10951 u_int32_t nameiflags;
10952 int error;
10953
10954 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
10955 return (EINVAL);
10956
10957 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
10958 if (error != 0) {
10959 return (error);
10960 }
10961 if (xattr_protected(attrname))
10962 return(EPERM);
10963 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
10964 NDINIT(&nd, LOOKUP, OP_REMOVEXATTR, nameiflags, spacetype, uap->path, ctx);
10965 if ((error = namei(&nd))) {
10966 return (error);
10967 }
10968 vp = nd.ni_vp;
10969 nameidone(&nd);
10970
10971 error = vn_removexattr(vp, attrname, uap->options, ctx);
10972#if CONFIG_FSE
10973 if (error == 0) {
10974 add_fsevent(FSE_XATTR_REMOVED, ctx,
10975 FSE_ARG_VNODE, vp,
10976 FSE_ARG_DONE);
10977 }
10978#endif
10979 vnode_put(vp);
10980 *retval = 0;
10981 return (error);
10982}
10983
10984/*
10985 * Remove an extended attribute.
10986 * XXX Code duplication here.
10987 */
10988int
10989fremovexattr(__unused proc_t p, struct fremovexattr_args *uap, int *retval)
10990{
10991 vnode_t vp;
10992 char attrname[XATTR_MAXNAMELEN+1];
10993 size_t namelen;
10994 int error;
10995#if CONFIG_FSE
10996 vfs_context_t ctx = vfs_context_current();
10997#endif
10998
10999 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
11000 return (EINVAL);
11001
11002 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
11003 if (error != 0) {
11004 return (error);
11005 }
11006 if (xattr_protected(attrname))
11007 return(EPERM);
11008 if ( (error = file_vnode(uap->fd, &vp)) ) {
11009 return (error);
11010 }
11011 if ( (error = vnode_getwithref(vp)) ) {
11012 file_drop(uap->fd);
11013 return(error);
11014 }
11015
11016 error = vn_removexattr(vp, attrname, uap->options, vfs_context_current());
11017#if CONFIG_FSE
11018 if (error == 0) {
11019 add_fsevent(FSE_XATTR_REMOVED, ctx,
11020 FSE_ARG_VNODE, vp,
11021 FSE_ARG_DONE);
11022 }
11023#endif
11024 vnode_put(vp);
11025 file_drop(uap->fd);
11026 *retval = 0;
11027 return (error);
11028}
11029
11030/*
11031 * Retrieve the list of extended attribute names.
11032 * XXX Code duplication here.
11033 */
11034int
11035listxattr(proc_t p, struct listxattr_args *uap, user_ssize_t *retval)
11036{
11037 vnode_t vp;
11038 struct nameidata nd;
11039 vfs_context_t ctx = vfs_context_current();
11040 uio_t auio = NULL;
11041 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
11042 size_t attrsize = 0;
11043 u_int32_t nameiflags;
11044 int error;
11045 char uio_buf[ UIO_SIZEOF(1) ];
11046
11047 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
11048 return (EINVAL);
11049
11050 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
11051 NDINIT(&nd, LOOKUP, OP_LISTXATTR, nameiflags, spacetype, uap->path, ctx);
11052 if ((error = namei(&nd))) {
11053 return (error);
11054 }
11055 vp = nd.ni_vp;
11056 nameidone(&nd);
11057 if (uap->namebuf != 0 && uap->bufsize > 0) {
11058 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
11059 &uio_buf[0], sizeof(uio_buf));
11060 uio_addiov(auio, uap->namebuf, uap->bufsize);
11061 }
11062
11063 error = vn_listxattr(vp, auio, &attrsize, uap->options, ctx);
11064
11065 vnode_put(vp);
11066 if (auio) {
11067 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
11068 } else {
11069 *retval = (user_ssize_t)attrsize;
11070 }
11071 return (error);
11072}
11073
11074/*
11075 * Retrieve the list of extended attribute names.
11076 * XXX Code duplication here.
11077 */
11078int
11079flistxattr(proc_t p, struct flistxattr_args *uap, user_ssize_t *retval)
11080{
11081 vnode_t vp;
11082 uio_t auio = NULL;
11083 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
11084 size_t attrsize = 0;
11085 int error;
11086 char uio_buf[ UIO_SIZEOF(1) ];
11087
11088 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
11089 return (EINVAL);
11090
11091 if ( (error = file_vnode(uap->fd, &vp)) ) {
11092 return (error);
11093 }
11094 if ( (error = vnode_getwithref(vp)) ) {
11095 file_drop(uap->fd);
11096 return(error);
11097 }
11098 if (uap->namebuf != 0 && uap->bufsize > 0) {
11099 auio = uio_createwithbuffer(1, 0, spacetype,
11100 UIO_READ, &uio_buf[0], sizeof(uio_buf));
11101 uio_addiov(auio, uap->namebuf, uap->bufsize);
11102 }
11103
11104 error = vn_listxattr(vp, auio, &attrsize, uap->options, vfs_context_current());
11105
11106 vnode_put(vp);
11107 file_drop(uap->fd);
11108 if (auio) {
11109 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
11110 } else {
11111 *retval = (user_ssize_t)attrsize;
11112 }
11113 return (error);
11114}
11115
11116static int fsgetpath_internal(
11117 vfs_context_t ctx, int volfs_id, uint64_t objid,
11118 vm_size_t bufsize, caddr_t buf, int *pathlen)
11119{
11120 int error;
11121 struct mount *mp = NULL;
11122 vnode_t vp;
11123 int length;
11124 int bpflags;
11125 /* maximum number of times to retry build_path */
11126 unsigned int retries = 0x10;
11127
11128 if (bufsize > PAGE_SIZE) {
11129 return (EINVAL);
11130 }
11131
11132 if (buf == NULL) {
11133 return (ENOMEM);
11134 }
11135
11136retry:
11137 if ((mp = mount_lookupby_volfsid(volfs_id, 1)) == NULL) {
11138 error = ENOTSUP; /* unexpected failure */
11139 return ENOTSUP;
11140 }
11141
11142unionget:
11143 if (objid == 2) {
11144 error = VFS_ROOT(mp, &vp, ctx);
11145 } else {
11146 error = VFS_VGET(mp, (ino64_t)objid, &vp, ctx);
11147 }
11148
11149 if (error == ENOENT && (mp->mnt_flag & MNT_UNION)) {
11150 /*
11151 * If the fileid isn't found and we're in a union
11152 * mount volume, then see if the fileid is in the
11153 * mounted-on volume.
11154 */
11155 struct mount *tmp = mp;
11156 mp = vnode_mount(tmp->mnt_vnodecovered);
11157 vfs_unbusy(tmp);
11158 if (vfs_busy(mp, LK_NOWAIT) == 0)
11159 goto unionget;
11160 } else {
11161 vfs_unbusy(mp);
11162 }
11163
11164 if (error) {
11165 return error;
11166 }
11167
11168#if CONFIG_MACF
11169 error = mac_vnode_check_fsgetpath(ctx, vp);
11170 if (error) {
11171 vnode_put(vp);
11172 return error;
11173 }
11174#endif
11175
11176 /* Obtain the absolute path to this vnode. */
11177 bpflags = vfs_context_suser(ctx) ? BUILDPATH_CHECKACCESS : 0;
11178 bpflags |= BUILDPATH_CHECK_MOVED;
11179 error = build_path(vp, buf, bufsize, &length, bpflags, ctx);
11180 vnode_put(vp);
11181
11182 if (error) {
11183 /* there was a race building the path, try a few more times */
11184 if (error == EAGAIN) {
11185 --retries;
11186 if (retries > 0)
11187 goto retry;
11188
11189 error = ENOENT;
11190 }
11191 goto out;
11192 }
11193
11194 AUDIT_ARG(text, buf);
11195
11196 if (kdebug_enable) {
11197 long dbg_parms[NUMPARMS];
11198 int dbg_namelen;
11199
11200 dbg_namelen = (int)sizeof(dbg_parms);
11201
11202 if (length < dbg_namelen) {
11203 memcpy((char *)dbg_parms, buf, length);
11204 memset((char *)dbg_parms + length, 0, dbg_namelen - length);
11205
11206 dbg_namelen = length;
11207 } else {
11208 memcpy((char *)dbg_parms, buf + (length - dbg_namelen), dbg_namelen);
11209 }
11210
11211 kdebug_vfs_lookup(dbg_parms, dbg_namelen, (void *)vp,
11212 KDBG_VFS_LOOKUP_FLAG_LOOKUP);
11213 }
11214
11215 *pathlen = (user_ssize_t)length; /* may be superseded by error */
11216
11217out:
11218 return (error);
11219}
11220
11221/*
11222 * Obtain the full pathname of a file system object by id.
11223 */
11224int
11225fsgetpath(__unused proc_t p, struct fsgetpath_args *uap, user_ssize_t *retval)
11226{
11227 vfs_context_t ctx = vfs_context_current();
11228 fsid_t fsid;
11229 char *realpath;
11230 int length;
11231 int error;
11232
11233 if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
11234 return (error);
11235 }
11236 AUDIT_ARG(value32, fsid.val[0]);
11237 AUDIT_ARG(value64, uap->objid);
11238 /* Restrict output buffer size for now. */
11239
11240 if (uap->bufsize > PAGE_SIZE) {
11241 return (EINVAL);
11242 }
11243 MALLOC(realpath, char *, uap->bufsize, M_TEMP, M_WAITOK | M_ZERO);
11244 if (realpath == NULL) {
11245 return (ENOMEM);
11246 }
11247
11248 error = fsgetpath_internal(
11249 ctx, fsid.val[0], uap->objid,
11250 uap->bufsize, realpath, &length);
11251
11252 if (error) {
11253 goto out;
11254 }
11255
11256 error = copyout((caddr_t)realpath, uap->buf, length);
11257
11258 *retval = (user_ssize_t)length; /* may be superseded by error */
11259out:
11260 if (realpath) {
11261 FREE(realpath, M_TEMP);
11262 }
11263 return (error);
11264}
11265
11266/*
11267 * Common routine to handle various flavors of statfs data heading out
11268 * to user space.
11269 *
11270 * Returns: 0 Success
11271 * EFAULT
11272 */
11273static int
11274munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
11275 user_addr_t bufp, int *sizep, boolean_t is_64_bit,
11276 boolean_t partial_copy)
11277{
11278 int error;
11279 int my_size, copy_size;
11280
11281 if (is_64_bit) {
11282 struct user64_statfs sfs;
11283 my_size = copy_size = sizeof(sfs);
11284 bzero(&sfs, my_size);
11285 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
11286 sfs.f_type = mp->mnt_vtable->vfc_typenum;
11287 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
11288 sfs.f_bsize = (user64_long_t)sfsp->f_bsize;
11289 sfs.f_iosize = (user64_long_t)sfsp->f_iosize;
11290 sfs.f_blocks = (user64_long_t)sfsp->f_blocks;
11291 sfs.f_bfree = (user64_long_t)sfsp->f_bfree;
11292 sfs.f_bavail = (user64_long_t)sfsp->f_bavail;
11293 sfs.f_files = (user64_long_t)sfsp->f_files;
11294 sfs.f_ffree = (user64_long_t)sfsp->f_ffree;
11295 sfs.f_fsid = sfsp->f_fsid;
11296 sfs.f_owner = sfsp->f_owner;
11297 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
11298 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
11299 } else {
11300 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
11301 }
11302 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
11303 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
11304
11305 if (partial_copy) {
11306 copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
11307 }
11308 error = copyout((caddr_t)&sfs, bufp, copy_size);
11309 }
11310 else {
11311 struct user32_statfs sfs;
11312
11313 my_size = copy_size = sizeof(sfs);
11314 bzero(&sfs, my_size);
11315
11316 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
11317 sfs.f_type = mp->mnt_vtable->vfc_typenum;
11318 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
11319
11320 /*
11321 * It's possible for there to be more than 2^^31 blocks in the filesystem, so we
11322 * have to fudge the numbers here in that case. We inflate the blocksize in order
11323 * to reflect the filesystem size as best we can.
11324 */
11325 if ((sfsp->f_blocks > INT_MAX)
11326 /* Hack for 4061702 . I think the real fix is for Carbon to
11327 * look for some volume capability and not depend on hidden
11328 * semantics agreed between a FS and carbon.
11329 * f_blocks, f_bfree, and f_bavail set to -1 is the trigger
11330 * for Carbon to set bNoVolumeSizes volume attribute.
11331 * Without this the webdavfs files cannot be copied onto
11332 * disk as they look huge. This change should not affect
11333 * XSAN as they should not setting these to -1..
11334 */
11335 && (sfsp->f_blocks != 0xffffffffffffffffULL)
11336 && (sfsp->f_bfree != 0xffffffffffffffffULL)
11337 && (sfsp->f_bavail != 0xffffffffffffffffULL)) {
11338 int shift;
11339
11340 /*
11341 * Work out how far we have to shift the block count down to make it fit.
11342 * Note that it's possible to have to shift so far that the resulting
11343 * blocksize would be unreportably large. At that point, we will clip
11344 * any values that don't fit.
11345 *
11346 * For safety's sake, we also ensure that f_iosize is never reported as
11347 * being smaller than f_bsize.
11348 */
11349 for (shift = 0; shift < 32; shift++) {
11350 if ((sfsp->f_blocks >> shift) <= INT_MAX)
11351 break;
11352 if ((sfsp->f_bsize << (shift + 1)) > INT_MAX)
11353 break;
11354 }
11355#define __SHIFT_OR_CLIP(x, s) ((((x) >> (s)) > INT_MAX) ? INT_MAX : ((x) >> (s)))
11356 sfs.f_blocks = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_blocks, shift);
11357 sfs.f_bfree = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bfree, shift);
11358 sfs.f_bavail = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bavail, shift);
11359#undef __SHIFT_OR_CLIP
11360 sfs.f_bsize = (user32_long_t)(sfsp->f_bsize << shift);
11361 sfs.f_iosize = lmax(sfsp->f_iosize, sfsp->f_bsize);
11362 } else {
11363 /* filesystem is small enough to be reported honestly */
11364 sfs.f_bsize = (user32_long_t)sfsp->f_bsize;
11365 sfs.f_iosize = (user32_long_t)sfsp->f_iosize;
11366 sfs.f_blocks = (user32_long_t)sfsp->f_blocks;
11367 sfs.f_bfree = (user32_long_t)sfsp->f_bfree;
11368 sfs.f_bavail = (user32_long_t)sfsp->f_bavail;
11369 }
11370 sfs.f_files = (user32_long_t)sfsp->f_files;
11371 sfs.f_ffree = (user32_long_t)sfsp->f_ffree;
11372 sfs.f_fsid = sfsp->f_fsid;
11373 sfs.f_owner = sfsp->f_owner;
11374 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
11375 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
11376 } else {
11377 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
11378 }
11379 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
11380 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
11381
11382 if (partial_copy) {
11383 copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
11384 }
11385 error = copyout((caddr_t)&sfs, bufp, copy_size);
11386 }
11387
11388 if (sizep != NULL) {
11389 *sizep = my_size;
11390 }
11391 return(error);
11392}
11393
11394/*
11395 * copy stat structure into user_stat structure.
11396 */
11397void munge_user64_stat(struct stat *sbp, struct user64_stat *usbp)
11398{
11399 bzero(usbp, sizeof(*usbp));
11400
11401 usbp->st_dev = sbp->st_dev;
11402 usbp->st_ino = sbp->st_ino;
11403 usbp->st_mode = sbp->st_mode;
11404 usbp->st_nlink = sbp->st_nlink;
11405 usbp->st_uid = sbp->st_uid;
11406 usbp->st_gid = sbp->st_gid;
11407 usbp->st_rdev = sbp->st_rdev;
11408#ifndef _POSIX_C_SOURCE
11409 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
11410 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
11411 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
11412 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
11413 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
11414 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
11415#else
11416 usbp->st_atime = sbp->st_atime;
11417 usbp->st_atimensec = sbp->st_atimensec;
11418 usbp->st_mtime = sbp->st_mtime;
11419 usbp->st_mtimensec = sbp->st_mtimensec;
11420 usbp->st_ctime = sbp->st_ctime;
11421 usbp->st_ctimensec = sbp->st_ctimensec;
11422#endif
11423 usbp->st_size = sbp->st_size;
11424 usbp->st_blocks = sbp->st_blocks;
11425 usbp->st_blksize = sbp->st_blksize;
11426 usbp->st_flags = sbp->st_flags;
11427 usbp->st_gen = sbp->st_gen;
11428 usbp->st_lspare = sbp->st_lspare;
11429 usbp->st_qspare[0] = sbp->st_qspare[0];
11430 usbp->st_qspare[1] = sbp->st_qspare[1];
11431}
11432
11433void munge_user32_stat(struct stat *sbp, struct user32_stat *usbp)
11434{
11435 bzero(usbp, sizeof(*usbp));
11436
11437 usbp->st_dev = sbp->st_dev;
11438 usbp->st_ino = sbp->st_ino;
11439 usbp->st_mode = sbp->st_mode;
11440 usbp->st_nlink = sbp->st_nlink;
11441 usbp->st_uid = sbp->st_uid;
11442 usbp->st_gid = sbp->st_gid;
11443 usbp->st_rdev = sbp->st_rdev;
11444#ifndef _POSIX_C_SOURCE
11445 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
11446 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
11447 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
11448 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
11449 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
11450 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
11451#else
11452 usbp->st_atime = sbp->st_atime;
11453 usbp->st_atimensec = sbp->st_atimensec;
11454 usbp->st_mtime = sbp->st_mtime;
11455 usbp->st_mtimensec = sbp->st_mtimensec;
11456 usbp->st_ctime = sbp->st_ctime;
11457 usbp->st_ctimensec = sbp->st_ctimensec;
11458#endif
11459 usbp->st_size = sbp->st_size;
11460 usbp->st_blocks = sbp->st_blocks;
11461 usbp->st_blksize = sbp->st_blksize;
11462 usbp->st_flags = sbp->st_flags;
11463 usbp->st_gen = sbp->st_gen;
11464 usbp->st_lspare = sbp->st_lspare;
11465 usbp->st_qspare[0] = sbp->st_qspare[0];
11466 usbp->st_qspare[1] = sbp->st_qspare[1];
11467}
11468
11469/*
11470 * copy stat64 structure into user_stat64 structure.
11471 */
11472void munge_user64_stat64(struct stat64 *sbp, struct user64_stat64 *usbp)
11473{
11474 bzero(usbp, sizeof(*usbp));
11475
11476 usbp->st_dev = sbp->st_dev;
11477 usbp->st_ino = sbp->st_ino;
11478 usbp->st_mode = sbp->st_mode;
11479 usbp->st_nlink = sbp->st_nlink;
11480 usbp->st_uid = sbp->st_uid;
11481 usbp->st_gid = sbp->st_gid;
11482 usbp->st_rdev = sbp->st_rdev;
11483#ifndef _POSIX_C_SOURCE
11484 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
11485 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
11486 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
11487 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
11488 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
11489 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
11490 usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
11491 usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
11492#else
11493 usbp->st_atime = sbp->st_atime;
11494 usbp->st_atimensec = sbp->st_atimensec;
11495 usbp->st_mtime = sbp->st_mtime;
11496 usbp->st_mtimensec = sbp->st_mtimensec;
11497 usbp->st_ctime = sbp->st_ctime;
11498 usbp->st_ctimensec = sbp->st_ctimensec;
11499 usbp->st_birthtime = sbp->st_birthtime;
11500 usbp->st_birthtimensec = sbp->st_birthtimensec;
11501#endif
11502 usbp->st_size = sbp->st_size;
11503 usbp->st_blocks = sbp->st_blocks;
11504 usbp->st_blksize = sbp->st_blksize;
11505 usbp->st_flags = sbp->st_flags;
11506 usbp->st_gen = sbp->st_gen;
11507 usbp->st_lspare = sbp->st_lspare;
11508 usbp->st_qspare[0] = sbp->st_qspare[0];
11509 usbp->st_qspare[1] = sbp->st_qspare[1];
11510}
11511
11512void munge_user32_stat64(struct stat64 *sbp, struct user32_stat64 *usbp)
11513{
11514 bzero(usbp, sizeof(*usbp));
11515
11516 usbp->st_dev = sbp->st_dev;
11517 usbp->st_ino = sbp->st_ino;
11518 usbp->st_mode = sbp->st_mode;
11519 usbp->st_nlink = sbp->st_nlink;
11520 usbp->st_uid = sbp->st_uid;
11521 usbp->st_gid = sbp->st_gid;
11522 usbp->st_rdev = sbp->st_rdev;
11523#ifndef _POSIX_C_SOURCE
11524 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
11525 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
11526 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
11527 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
11528 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
11529 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
11530 usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
11531 usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
11532#else
11533 usbp->st_atime = sbp->st_atime;
11534 usbp->st_atimensec = sbp->st_atimensec;
11535 usbp->st_mtime = sbp->st_mtime;
11536 usbp->st_mtimensec = sbp->st_mtimensec;
11537 usbp->st_ctime = sbp->st_ctime;
11538 usbp->st_ctimensec = sbp->st_ctimensec;
11539 usbp->st_birthtime = sbp->st_birthtime;
11540 usbp->st_birthtimensec = sbp->st_birthtimensec;
11541#endif
11542 usbp->st_size = sbp->st_size;
11543 usbp->st_blocks = sbp->st_blocks;
11544 usbp->st_blksize = sbp->st_blksize;
11545 usbp->st_flags = sbp->st_flags;
11546 usbp->st_gen = sbp->st_gen;
11547 usbp->st_lspare = sbp->st_lspare;
11548 usbp->st_qspare[0] = sbp->st_qspare[0];
11549 usbp->st_qspare[1] = sbp->st_qspare[1];
11550}
11551
11552/*
11553 * Purge buffer cache for simulating cold starts
11554 */
11555static int vnode_purge_callback(struct vnode *vp, __unused void *cargs)
11556{
11557 ubc_msync(vp, (off_t)0, ubc_getsize(vp), NULL /* off_t *resid_off */, UBC_PUSHALL | UBC_INVALIDATE);
11558
11559 return VNODE_RETURNED;
11560}
11561
11562static int vfs_purge_callback(mount_t mp, __unused void * arg)
11563{
11564 vnode_iterate(mp, VNODE_WAIT | VNODE_ITERATE_ALL, vnode_purge_callback, NULL);
11565
11566 return VFS_RETURNED;
11567}
11568
11569int
11570vfs_purge(__unused struct proc *p, __unused struct vfs_purge_args *uap, __unused int32_t *retval)
11571{
11572 if (!kauth_cred_issuser(kauth_cred_get()))
11573 return EPERM;
11574
11575 vfs_iterate(0/* flags */, vfs_purge_callback, NULL);
11576
11577 return 0;
11578}
11579
11580/*
11581 * gets the vnode associated with the (unnamed) snapshot directory
11582 * for a Filesystem. The snapshot directory vnode is returned with
11583 * an iocount on it.
11584 */
11585int
11586vnode_get_snapdir(vnode_t rvp, vnode_t *sdvpp, vfs_context_t ctx)
11587{
11588 return (VFS_VGET_SNAPDIR(vnode_mount(rvp), sdvpp, ctx));
11589}
11590
11591/*
11592 * Get the snapshot vnode.
11593 *
11594 * If successful, the call returns with an iocount on *rvpp ,*sdvpp and
11595 * needs nameidone() on ndp.
11596 *
11597 * If the snapshot vnode exists it is returned in ndp->ni_vp.
11598 *
11599 * If it returns with an error, *rvpp, *sdvpp are NULL and nameidone() is
11600 * not needed.
11601 */
11602static int
11603vnode_get_snapshot(int dirfd, vnode_t *rvpp, vnode_t *sdvpp,
11604 user_addr_t name, struct nameidata *ndp, int32_t op,
11605#if !CONFIG_TRIGGERS
11606 __unused
11607#endif
11608 enum path_operation pathop,
11609 vfs_context_t ctx)
11610{
11611 int error, i;
11612 caddr_t name_buf;
11613 size_t name_len;
11614 struct vfs_attr vfa;
11615
11616 *sdvpp = NULLVP;
11617 *rvpp = NULLVP;
11618
11619 error = vnode_getfromfd(ctx, dirfd, rvpp);
11620 if (error)
11621 return (error);
11622
11623 if (!vnode_isvroot(*rvpp)) {
11624 error = EINVAL;
11625 goto out;
11626 }
11627
11628 /* Make sure the filesystem supports snapshots */
11629 VFSATTR_INIT(&vfa);
11630 VFSATTR_WANTED(&vfa, f_capabilities);
11631 if ((vfs_getattr(vnode_mount(*rvpp), &vfa, ctx) != 0) ||
11632 !VFSATTR_IS_SUPPORTED(&vfa, f_capabilities) ||
11633 !((vfa.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] &
11634 VOL_CAP_INT_SNAPSHOT)) ||
11635 !((vfa.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] &
11636 VOL_CAP_INT_SNAPSHOT))) {
11637 error = ENOTSUP;
11638 goto out;
11639 }
11640
11641 error = vnode_get_snapdir(*rvpp, sdvpp, ctx);
11642 if (error)
11643 goto out;
11644
11645 MALLOC(name_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
11646 error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
11647 if (error)
11648 goto out1;
11649
11650 /*
11651 * Some sanity checks- name can't be empty, "." or ".." or have slashes.
11652 * (the length returned by copyinstr includes the terminating NUL)
11653 */
11654 if ((name_len == 1) || (name_len == 2 && name_buf[0] == '.') ||
11655 (name_len == 3 && name_buf[0] == '.' && name_buf[1] == '.')) {
11656 error = EINVAL;
11657 goto out1;
11658 }
11659 for (i = 0; i < (int)name_len && name_buf[i] != '/'; i++);
11660 if (i < (int)name_len) {
11661 error = EINVAL;
11662 goto out1;
11663 }
11664
11665#if CONFIG_MACF
11666 if (op == CREATE) {
11667 error = mac_mount_check_snapshot_create(ctx, vnode_mount(*rvpp),
11668 name_buf);
11669 } else if (op == DELETE) {
11670 error = mac_mount_check_snapshot_delete(ctx, vnode_mount(*rvpp),
11671 name_buf);
11672 }
11673 if (error)
11674 goto out1;
11675#endif
11676
11677 /* Check if the snapshot already exists ... */
11678 NDINIT(ndp, op, pathop, USEDVP | NOCACHE | AUDITVNPATH1,
11679 UIO_SYSSPACE, CAST_USER_ADDR_T(name_buf), ctx);
11680 ndp->ni_dvp = *sdvpp;
11681
11682 error = namei(ndp);
11683out1:
11684 FREE(name_buf, M_TEMP);
11685out:
11686 if (error) {
11687 if (*sdvpp) {
11688 vnode_put(*sdvpp);
11689 *sdvpp = NULLVP;
11690 }
11691 if (*rvpp) {
11692 vnode_put(*rvpp);
11693 *rvpp = NULLVP;
11694 }
11695 }
11696 return (error);
11697}
11698
11699/*
11700 * create a filesystem snapshot (for supporting filesystems)
11701 *
11702 * A much simplified version of openat(dirfd, name, O_CREAT | O_EXCL)
11703 * We get to the (unnamed) snapshot directory vnode and create the vnode
11704 * for the snapshot in it.
11705 *
11706 * Restrictions:
11707 *
11708 * a) Passed in name for snapshot cannot have slashes.
11709 * b) name can't be "." or ".."
11710 *
11711 * Since this requires superuser privileges, vnode_authorize calls are not
11712 * made.
11713 */
11714static int
11715snapshot_create(int dirfd, user_addr_t name, __unused uint32_t flags,
11716 vfs_context_t ctx)
11717{
11718 vnode_t rvp, snapdvp;
11719 int error;
11720 struct nameidata namend;
11721
11722 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, CREATE,
11723 OP_LINK, ctx);
11724 if (error)
11725 return (error);
11726
11727 if (namend.ni_vp) {
11728 vnode_put(namend.ni_vp);
11729 error = EEXIST;
11730 } else {
11731 struct vnode_attr va;
11732 vnode_t vp = NULLVP;
11733
11734 VATTR_INIT(&va);
11735 VATTR_SET(&va, va_type, VREG);
11736 VATTR_SET(&va, va_mode, 0);
11737
11738 error = vn_create(snapdvp, &vp, &namend, &va,
11739 VN_CREATE_NOAUTH | VN_CREATE_NOINHERIT, 0, NULL, ctx);
11740 if (!error && vp)
11741 vnode_put(vp);
11742 }
11743
11744 nameidone(&namend);
11745 vnode_put(snapdvp);
11746 vnode_put(rvp);
11747 return (error);
11748}
11749
11750/*
11751 * Delete a Filesystem snapshot
11752 *
11753 * get the vnode for the unnamed snapshot directory and the snapshot and
11754 * delete the snapshot.
11755 */
11756static int
11757snapshot_delete(int dirfd, user_addr_t name, __unused uint32_t flags,
11758 vfs_context_t ctx)
11759{
11760 vnode_t rvp, snapdvp;
11761 int error;
11762 struct nameidata namend;
11763
11764 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, DELETE,
11765 OP_UNLINK, ctx);
11766 if (error)
11767 goto out;
11768
11769 error = VNOP_REMOVE(snapdvp, namend.ni_vp, &namend.ni_cnd,
11770 VNODE_REMOVE_SKIP_NAMESPACE_EVENT, ctx);
11771
11772 vnode_put(namend.ni_vp);
11773 nameidone(&namend);
11774 vnode_put(snapdvp);
11775 vnode_put(rvp);
11776out:
11777 return (error);
11778}
11779
11780/*
11781 * Revert a filesystem to a snapshot
11782 *
11783 * Marks the filesystem to revert to the given snapshot on next mount.
11784 */
11785static int
11786snapshot_revert(int dirfd, user_addr_t name, __unused uint32_t flags,
11787 vfs_context_t ctx)
11788{
11789 int error;
11790 vnode_t rvp;
11791 mount_t mp;
11792 struct fs_snapshot_revert_args revert_data;
11793 struct componentname cnp;
11794 caddr_t name_buf;
11795 size_t name_len;
11796
11797 error = vnode_getfromfd(ctx, dirfd, &rvp);
11798 if (error) {
11799 return (error);
11800 }
11801 mp = vnode_mount(rvp);
11802
11803 MALLOC(name_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
11804 error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
11805 if (error) {
11806 FREE(name_buf, M_TEMP);
11807 vnode_put(rvp);
11808 return (error);
11809 }
11810
11811#if CONFIG_MACF
11812 error = mac_mount_check_snapshot_revert(ctx, mp, name_buf);
11813 if (error) {
11814 FREE(name_buf, M_TEMP);
11815 vnode_put(rvp);
11816 return (error);
11817 }
11818#endif
11819
11820 /*
11821 * Grab mount_iterref so that we can release the vnode,
11822 * since VFSIOC_REVERT_SNAPSHOT could conceivably cause a sync.
11823 */
11824 error = mount_iterref (mp, 0);
11825 vnode_put(rvp);
11826 if (error) {
11827 FREE(name_buf, M_TEMP);
11828 return (error);
11829 }
11830
11831 memset(&cnp, 0, sizeof(cnp));
11832 cnp.cn_pnbuf = (char *)name_buf;
11833 cnp.cn_nameiop = LOOKUP;
11834 cnp.cn_flags = ISLASTCN | HASBUF;
11835 cnp.cn_pnlen = MAXPATHLEN;
11836 cnp.cn_nameptr = cnp.cn_pnbuf;
11837 cnp.cn_namelen = (int)name_len;
11838 revert_data.sr_cnp = &cnp;
11839
11840 error = VFS_IOCTL(mp, VFSIOC_REVERT_SNAPSHOT, (caddr_t)&revert_data, 0, ctx);
11841 mount_iterdrop(mp);
11842 FREE(name_buf, M_TEMP);
11843
11844 if (error) {
11845 /* If there was any error, try again using VNOP_IOCTL */
11846
11847 vnode_t snapdvp;
11848 struct nameidata namend;
11849
11850 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, LOOKUP,
11851 OP_LOOKUP, ctx);
11852 if (error) {
11853 return (error);
11854 }
11855
11856
11857 error = VNOP_IOCTL(namend.ni_vp, APFSIOC_REVERT_TO_SNAPSHOT, (caddr_t) NULL,
11858 0, ctx);
11859
11860 vnode_put(namend.ni_vp);
11861 nameidone(&namend);
11862 vnode_put(snapdvp);
11863 vnode_put(rvp);
11864 }
11865
11866 return (error);
11867}
11868
11869/*
11870 * rename a Filesystem snapshot
11871 *
11872 * get the vnode for the unnamed snapshot directory and the snapshot and
11873 * rename the snapshot. This is a very specialised (and simple) case of
11874 * rename(2) (which has to deal with a lot more complications). It differs
11875 * slightly from rename(2) in that EEXIST is returned if the new name exists.
11876 */
11877static int
11878snapshot_rename(int dirfd, user_addr_t old, user_addr_t new,
11879 __unused uint32_t flags, vfs_context_t ctx)
11880{
11881 vnode_t rvp, snapdvp;
11882 int error, i;
11883 caddr_t newname_buf;
11884 size_t name_len;
11885 vnode_t fvp;
11886 struct nameidata *fromnd, *tond;
11887 /* carving out a chunk for structs that are too big to be on stack. */
11888 struct {
11889 struct nameidata from_node;
11890 struct nameidata to_node;
11891 } * __rename_data;
11892
11893 MALLOC(__rename_data, void *, sizeof(*__rename_data), M_TEMP, M_WAITOK);
11894 fromnd = &__rename_data->from_node;
11895 tond = &__rename_data->to_node;
11896
11897 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, old, fromnd, DELETE,
11898 OP_UNLINK, ctx);
11899 if (error)
11900 goto out;
11901 fvp = fromnd->ni_vp;
11902
11903 MALLOC(newname_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
11904 error = copyinstr(new, newname_buf, MAXPATHLEN, &name_len);
11905 if (error)
11906 goto out1;
11907
11908 /*
11909 * Some sanity checks- new name can't be empty, "." or ".." or have
11910 * slashes.
11911 * (the length returned by copyinstr includes the terminating NUL)
11912 *
11913 * The FS rename VNOP is suppossed to handle this but we'll pick it
11914 * off here itself.
11915 */
11916 if ((name_len == 1) || (name_len == 2 && newname_buf[0] == '.') ||
11917 (name_len == 3 && newname_buf[0] == '.' && newname_buf[1] == '.')) {
11918 error = EINVAL;
11919 goto out1;
11920 }
11921 for (i = 0; i < (int)name_len && newname_buf[i] != '/'; i++);
11922 if (i < (int)name_len) {
11923 error = EINVAL;
11924 goto out1;
11925 }
11926
11927#if CONFIG_MACF
11928 error = mac_mount_check_snapshot_create(ctx, vnode_mount(rvp),
11929 newname_buf);
11930 if (error)
11931 goto out1;
11932#endif
11933
11934 NDINIT(tond, RENAME, OP_RENAME, USEDVP | NOCACHE | AUDITVNPATH2,
11935 UIO_SYSSPACE, CAST_USER_ADDR_T(newname_buf), ctx);
11936 tond->ni_dvp = snapdvp;
11937
11938 error = namei(tond);
11939 if (error) {
11940 goto out2;
11941 } else if (tond->ni_vp) {
11942 /*
11943 * snapshot rename behaves differently than rename(2) - if the
11944 * new name exists, EEXIST is returned.
11945 */
11946 vnode_put(tond->ni_vp);
11947 error = EEXIST;
11948 goto out2;
11949 }
11950
11951 error = VNOP_RENAME(snapdvp, fvp, &fromnd->ni_cnd, snapdvp, NULLVP,
11952 &tond->ni_cnd, ctx);
11953
11954out2:
11955 nameidone(tond);
11956out1:
11957 FREE(newname_buf, M_TEMP);
11958 vnode_put(fvp);
11959 vnode_put(snapdvp);
11960 vnode_put(rvp);
11961 nameidone(fromnd);
11962out:
11963 FREE(__rename_data, M_TEMP);
11964 return (error);
11965}
11966
11967/*
11968 * Mount a Filesystem snapshot
11969 *
11970 * get the vnode for the unnamed snapshot directory and the snapshot and
11971 * mount the snapshot.
11972 */
11973static int
11974snapshot_mount(int dirfd, user_addr_t name, user_addr_t directory,
11975 __unused user_addr_t mnt_data, __unused uint32_t flags, vfs_context_t ctx)
11976{
11977 vnode_t rvp, snapdvp, snapvp, vp, pvp;
11978 int error;
11979 struct nameidata *snapndp, *dirndp;
11980 /* carving out a chunk for structs that are too big to be on stack. */
11981 struct {
11982 struct nameidata snapnd;
11983 struct nameidata dirnd;
11984 } * __snapshot_mount_data;
11985
11986 MALLOC(__snapshot_mount_data, void *, sizeof(*__snapshot_mount_data),
11987 M_TEMP, M_WAITOK);
11988 snapndp = &__snapshot_mount_data->snapnd;
11989 dirndp = &__snapshot_mount_data->dirnd;
11990
11991 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, snapndp, LOOKUP,
11992 OP_LOOKUP, ctx);
11993 if (error)
11994 goto out;
11995
11996 snapvp = snapndp->ni_vp;
11997 if (!vnode_mount(rvp) || (vnode_mount(rvp) == dead_mountp)) {
11998 error = EIO;
11999 goto out1;
12000 }
12001
12002 /* Get the vnode to be covered */
12003 NDINIT(dirndp, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
12004 UIO_USERSPACE, directory, ctx);
12005 error = namei(dirndp);
12006 if (error)
12007 goto out1;
12008
12009 vp = dirndp->ni_vp;
12010 pvp = dirndp->ni_dvp;
12011
12012 if ((vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_ROOTFS)) {
12013 error = EINVAL;
12014 } else {
12015 mount_t mp = vnode_mount(rvp);
12016 struct fs_snapshot_mount_args smnt_data;
12017
12018 smnt_data.sm_mp = mp;
12019 smnt_data.sm_cnp = &snapndp->ni_cnd;
12020 error = mount_common(mp->mnt_vfsstat.f_fstypename, pvp, vp,
12021 &dirndp->ni_cnd, CAST_USER_ADDR_T(&smnt_data), flags & MNT_DONTBROWSE,
12022 KERNEL_MOUNT_SNAPSHOT, NULL, FALSE, ctx);
12023 }
12024
12025 vnode_put(vp);
12026 vnode_put(pvp);
12027 nameidone(dirndp);
12028out1:
12029 vnode_put(snapvp);
12030 vnode_put(snapdvp);
12031 vnode_put(rvp);
12032 nameidone(snapndp);
12033out:
12034 FREE(__snapshot_mount_data, M_TEMP);
12035 return (error);
12036}
12037
12038/*
12039 * Root from a snapshot of the filesystem
12040 *
12041 * Marks the filesystem to root from the given snapshot on next boot.
12042 */
12043static int
12044snapshot_root(int dirfd, user_addr_t name, __unused uint32_t flags,
12045 vfs_context_t ctx)
12046{
12047 int error;
12048 vnode_t rvp;
12049 mount_t mp;
12050 struct fs_snapshot_root_args root_data;
12051 struct componentname cnp;
12052 caddr_t name_buf;
12053 size_t name_len;
12054
12055 error = vnode_getfromfd(ctx, dirfd, &rvp);
12056 if (error) {
12057 return (error);
12058 }
12059 mp = vnode_mount(rvp);
12060
12061 MALLOC(name_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
12062 error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
12063 if (error) {
12064 FREE(name_buf, M_TEMP);
12065 vnode_put(rvp);
12066 return (error);
12067 }
12068
12069 // XXX MAC checks ?
12070
12071 /*
12072 * Grab mount_iterref so that we can release the vnode,
12073 * since VFSIOC_ROOT_SNAPSHOT could conceivably cause a sync.
12074 */
12075 error = mount_iterref (mp, 0);
12076 vnode_put(rvp);
12077 if (error) {
12078 FREE(name_buf, M_TEMP);
12079 return (error);
12080 }
12081
12082 memset(&cnp, 0, sizeof(cnp));
12083 cnp.cn_pnbuf = (char *)name_buf;
12084 cnp.cn_nameiop = LOOKUP;
12085 cnp.cn_flags = ISLASTCN | HASBUF;
12086 cnp.cn_pnlen = MAXPATHLEN;
12087 cnp.cn_nameptr = cnp.cn_pnbuf;
12088 cnp.cn_namelen = (int)name_len;
12089 root_data.sr_cnp = &cnp;
12090
12091 error = VFS_IOCTL(mp, VFSIOC_ROOT_SNAPSHOT, (caddr_t)&root_data, 0, ctx);
12092
12093 mount_iterdrop(mp);
12094 FREE(name_buf, M_TEMP);
12095
12096 return (error);
12097}
12098
12099/*
12100 * FS snapshot operations dispatcher
12101 */
12102int
12103fs_snapshot(__unused proc_t p, struct fs_snapshot_args *uap,
12104 __unused int32_t *retval)
12105{
12106 int error;
12107 vfs_context_t ctx = vfs_context_current();
12108
12109 AUDIT_ARG(fd, uap->dirfd);
12110 AUDIT_ARG(value32, uap->op);
12111
12112 error = priv_check_cred(vfs_context_ucred(ctx), PRIV_VFS_SNAPSHOT, 0);
12113 if (error)
12114 return (error);
12115
12116 switch (uap->op) {
12117 case SNAPSHOT_OP_CREATE:
12118 error = snapshot_create(uap->dirfd, uap->name1, uap->flags, ctx);
12119 break;
12120 case SNAPSHOT_OP_DELETE:
12121 error = snapshot_delete(uap->dirfd, uap->name1, uap->flags, ctx);
12122 break;
12123 case SNAPSHOT_OP_RENAME:
12124 error = snapshot_rename(uap->dirfd, uap->name1, uap->name2,
12125 uap->flags, ctx);
12126 break;
12127 case SNAPSHOT_OP_MOUNT:
12128 error = snapshot_mount(uap->dirfd, uap->name1, uap->name2,
12129 uap->data, uap->flags, ctx);
12130 break;
12131 case SNAPSHOT_OP_REVERT:
12132 error = snapshot_revert(uap->dirfd, uap->name1, uap->flags, ctx);
12133 break;
12134#if CONFIG_MNT_ROOTSNAP
12135 case SNAPSHOT_OP_ROOT:
12136 error = snapshot_root(uap->dirfd, uap->name1, uap->flags, ctx);
12137 break;
12138#endif /* CONFIG_MNT_ROOTSNAP */
12139 default:
12140 error = ENOSYS;
12141 }
12142
12143 return (error);
12144}