]> git.saurik.com Git - apple/xnu.git/blame - bsd/vfs/vfs_syscalls.c
xnu-2782.20.48.tar.gz
[apple/xnu.git] / bsd / vfs / vfs_syscalls.c
CommitLineData
1c79356b 1/*
fe8ab488 2 * Copyright (c) 1995-2014 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * Copyright (c) 1989, 1993
30 * The Regents of the University of California. All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)vfs_syscalls.c 8.41 (Berkeley) 6/15/95
66 */
2d21ac55
A
67/*
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
71 * Version 2.0.
72 */
1c79356b
A
73
74#include <sys/param.h>
75#include <sys/systm.h>
76#include <sys/namei.h>
77#include <sys/filedesc.h>
78#include <sys/kernel.h>
91447636 79#include <sys/file_internal.h>
1c79356b 80#include <sys/stat.h>
91447636
A
81#include <sys/vnode_internal.h>
82#include <sys/mount_internal.h>
83#include <sys/proc_internal.h>
84#include <sys/kauth.h>
85#include <sys/uio_internal.h>
1c79356b 86#include <sys/malloc.h>
91447636 87#include <sys/mman.h>
1c79356b
A
88#include <sys/dirent.h>
89#include <sys/attr.h>
90#include <sys/sysctl.h>
91#include <sys/ubc.h>
9bccf70c 92#include <sys/quota.h>
91447636
A
93#include <sys/kdebug.h>
94#include <sys/fsevents.h>
6d2010ae 95#include <sys/imgsrc.h>
91447636
A
96#include <sys/sysproto.h>
97#include <sys/xattr.h>
b0d623f7
A
98#include <sys/fcntl.h>
99#include <sys/fsctl.h>
91447636 100#include <sys/ubc_internal.h>
593a1d5f 101#include <sys/disk.h>
91447636
A
102#include <machine/cons.h>
103#include <machine/limits.h>
104#include <miscfs/specfs/specdev.h>
e5568f75 105
b0d623f7 106#include <security/audit/audit.h>
e5568f75
A
107#include <bsm/audit_kevents.h>
108
91447636
A
109#include <mach/mach_types.h>
110#include <kern/kern_types.h>
111#include <kern/kalloc.h>
6d2010ae 112#include <kern/task.h>
91447636
A
113
114#include <vm/vm_pageout.h>
1c79356b 115
91447636 116#include <libkern/OSAtomic.h>
b0d623f7 117#include <pexpert/pexpert.h>
55e303ae 118
2d21ac55
A
119#if CONFIG_MACF
120#include <security/mac.h>
121#include <security/mac_framework.h>
122#endif
1c79356b 123
2d21ac55
A
124#if CONFIG_FSE
125#define GET_PATH(x) \
126 (x) = get_pathbuff();
127#define RELEASE_PATH(x) \
128 release_pathbuff(x);
129#else
130#define GET_PATH(x) \
131 MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
132#define RELEASE_PATH(x) \
133 FREE_ZONE((x), MAXPATHLEN, M_NAMEI);
134#endif /* CONFIG_FSE */
135
136/* struct for checkdirs iteration */
137struct cdirargs {
138 vnode_t olddp;
139 vnode_t newdp;
140};
141/* callback for checkdirs iteration */
142static int checkdirs_callback(proc_t p, void * arg);
1c79356b 143
91447636 144static int change_dir(struct nameidata *ndp, vfs_context_t ctx);
6601e61a 145static int checkdirs(vnode_t olddp, vfs_context_t ctx);
91447636
A
146void enablequotas(struct mount *mp, vfs_context_t ctx);
147static int getfsstat_callback(mount_t mp, void * arg);
148static int getutimes(user_addr_t usrtvp, struct timespec *tsp);
2d21ac55 149static int setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts, int nullflag);
91447636 150static int sync_callback(mount_t, void *);
fe8ab488
A
151static void sync_thread(void *, __unused wait_result_t);
152static int sync_async(int);
91447636
A
153static int munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
154 user_addr_t bufp, int *sizep, boolean_t is_64_bit,
155 boolean_t partial_copy);
b0d623f7
A
156static int statfs64_common(struct mount *mp, struct vfsstatfs *sfsp,
157 user_addr_t bufp);
158static int fsync_common(proc_t p, struct fsync_args *uap, int flags);
6d2010ae
A
159static int mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
160 struct componentname *cnp, user_addr_t fsmountargs,
161 int flags, uint32_t internal_flags, char *labelstr, boolean_t kernelmount,
162 vfs_context_t ctx);
163void vfs_notify_mount(vnode_t pdvp);
164
165int prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth);
b7266188 166
fe8ab488
A
167struct fd_vn_data * fg_vn_data_alloc(void);
168
169static int rmdirat_internal(vfs_context_t, int, user_addr_t, enum uio_seg);
170
171static int fsgetpath_internal(vfs_context_t, int, uint64_t, vm_size_t, caddr_t, int *);
172
b7266188 173#ifdef CONFIG_IMGSRC_ACCESS
b7266188
A
174static int authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx);
175static int place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx);
176static void undo_place_on_covered_vp(mount_t mp, vnode_t vp);
177static int mount_begin_update(mount_t mp, vfs_context_t ctx, int flags);
178static void mount_end_update(mount_t mp);
6d2010ae 179static int relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp, const char *fsname, vfs_context_t ctx, boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index);
b7266188
A
180#endif /* CONFIG_IMGSRC_ACCESS */
181
2d21ac55
A
182int (*union_dircheckp)(struct vnode **, struct fileproc *, vfs_context_t);
183
184__private_extern__
185int sync_internal(void);
186
2d21ac55
A
187__private_extern__
188int unlink1(vfs_context_t, struct nameidata *, int);
91447636 189
fe8ab488
A
190extern lck_grp_t *fd_vn_lck_grp;
191extern lck_grp_attr_t *fd_vn_lck_grp_attr;
192extern lck_attr_t *fd_vn_lck_attr;
193
2d21ac55
A
194/*
195 * incremented each time a mount or unmount operation occurs
196 * used to invalidate the cached value of the rootvp in the
197 * mount structure utilized by cache_lookup_path
198 */
b0d623f7 199uint32_t mount_generation = 0;
1c79356b
A
200
201/* counts number of mount and unmount operations */
202unsigned int vfs_nummntops=0;
203
39236c6e
A
204extern const struct fileops vnops;
205#if CONFIG_APPLEDOUBLE
2d21ac55 206extern errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *);
39236c6e 207#endif /* CONFIG_APPLEDOUBLE */
91447636 208
fe8ab488
A
209typedef uint32_t vfs_rename_flags_t;
210#if CONFIG_SECLUDED_RENAME
211enum {
212 VFS_SECLUDE_RENAME = 0x00000001
213};
214#endif
215
1c79356b
A
216/*
217 * Virtual File System System Calls
218 */
219
fe8ab488 220#if NFSCLIENT || DEVFS
6d2010ae
A
221/*
222 * Private in-kernel mounting spi (NFS only, not exported)
223 */
224 __private_extern__
225boolean_t
226vfs_iskernelmount(mount_t mp)
227{
228 return ((mp->mnt_kern_flag & MNTK_KERNEL_MOUNT) ? TRUE : FALSE);
229}
230
231 __private_extern__
232int
233kernel_mount(char *fstype, vnode_t pvp, vnode_t vp, const char *path,
234 void *data, __unused size_t datalen, int syscall_flags, __unused uint32_t kern_flags, vfs_context_t ctx)
235{
236 struct nameidata nd;
237 boolean_t did_namei;
238 int error;
239
240 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
241 UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx);
242
243 /*
244 * Get the vnode to be covered if it's not supplied
245 */
246 if (vp == NULLVP) {
247 error = namei(&nd);
248 if (error)
249 return (error);
250 vp = nd.ni_vp;
251 pvp = nd.ni_dvp;
252 did_namei = TRUE;
253 } else {
254 char *pnbuf = CAST_DOWN(char *, path);
255
256 nd.ni_cnd.cn_pnbuf = pnbuf;
257 nd.ni_cnd.cn_pnlen = strlen(pnbuf) + 1;
258 did_namei = FALSE;
259 }
260
261 error = mount_common(fstype, pvp, vp, &nd.ni_cnd, CAST_USER_ADDR_T(data),
262 syscall_flags, kern_flags, NULL, TRUE, ctx);
263
264 if (did_namei) {
265 vnode_put(vp);
266 vnode_put(pvp);
267 nameidone(&nd);
268 }
269
270 return (error);
271}
fe8ab488 272#endif /* NFSCLIENT || DEVFS */
6d2010ae 273
1c79356b
A
274/*
275 * Mount a file system.
276 */
1c79356b
A
277/* ARGSUSED */
278int
b0d623f7 279mount(proc_t p, struct mount_args *uap, __unused int32_t *retval)
2d21ac55
A
280{
281 struct __mac_mount_args muap;
282
283 muap.type = uap->type;
284 muap.path = uap->path;
285 muap.flags = uap->flags;
286 muap.data = uap->data;
287 muap.mac_p = USER_ADDR_NULL;
288 return (__mac_mount(p, &muap, retval));
289}
290
6d2010ae
A
291void
292vfs_notify_mount(vnode_t pdvp)
293{
294 vfs_event_signal(NULL, VQ_MOUNT, (intptr_t)NULL);
295 lock_vnode_and_post(pdvp, NOTE_WRITE);
296}
297
b0d623f7
A
298/*
299 * __mac_mount:
300 * Mount a file system taking into account MAC label behavior.
301 * See mount(2) man page for more information
302 *
303 * Parameters: p Process requesting the mount
304 * uap User argument descriptor (see below)
305 * retval (ignored)
306 *
307 * Indirect: uap->type Filesystem type
308 * uap->path Path to mount
309 * uap->data Mount arguments
310 * uap->mac_p MAC info
311 * uap->flags Mount flags
312 *
313 *
314 * Returns: 0 Success
315 * !0 Not success
316 */
6d2010ae
A
317boolean_t root_fs_upgrade_try = FALSE;
318
2d21ac55 319int
b0d623f7 320__mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int32_t *retval)
1c79356b 321{
39236c6e
A
322 vnode_t pvp = NULL;
323 vnode_t vp = NULL;
324 int need_nameidone = 0;
6d2010ae
A
325 vfs_context_t ctx = vfs_context_current();
326 char fstypename[MFSNAMELEN];
327 struct nameidata nd;
328 size_t dummy=0;
329 char *labelstr = NULL;
330 int flags = uap->flags;
331 int error;
39236c6e 332#if CONFIG_IMGSRC_ACCESS || CONFIG_MACF
6d2010ae 333 boolean_t is_64bit = IS_64BIT_PROCESS(p);
39236c6e
A
334#else
335#pragma unused(p)
336#endif
6d2010ae
A
337 /*
338 * Get the fs type name from user space
339 */
340 error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy);
341 if (error)
342 return (error);
343
344 /*
345 * Get the vnode to be covered
346 */
fe8ab488 347 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
6d2010ae
A
348 UIO_USERSPACE, uap->path, ctx);
349 error = namei(&nd);
39236c6e
A
350 if (error) {
351 goto out;
352 }
353 need_nameidone = 1;
6d2010ae
A
354 vp = nd.ni_vp;
355 pvp = nd.ni_dvp;
356
357#ifdef CONFIG_IMGSRC_ACCESS
358 /* Mounting image source cannot be batched with other operations */
359 if (flags == MNT_IMGSRC_BY_INDEX) {
360 error = relocate_imageboot_source(pvp, vp, &nd.ni_cnd, fstypename,
361 ctx, is_64bit, uap->data, (flags == MNT_IMGSRC_BY_INDEX));
362 goto out;
363 }
364#endif /* CONFIG_IMGSRC_ACCESS */
365
366#if CONFIG_MACF
367 /*
368 * Get the label string (if any) from user space
369 */
370 if (uap->mac_p != USER_ADDR_NULL) {
371 struct user_mac mac;
372 size_t ulen = 0;
373
374 if (is_64bit) {
375 struct user64_mac mac64;
376 error = copyin(uap->mac_p, &mac64, sizeof(mac64));
377 mac.m_buflen = mac64.m_buflen;
378 mac.m_string = mac64.m_string;
379 } else {
380 struct user32_mac mac32;
381 error = copyin(uap->mac_p, &mac32, sizeof(mac32));
382 mac.m_buflen = mac32.m_buflen;
383 mac.m_string = mac32.m_string;
384 }
385 if (error)
386 goto out;
387 if ((mac.m_buflen > MAC_MAX_LABEL_BUF_LEN) ||
388 (mac.m_buflen < 2)) {
389 error = EINVAL;
390 goto out;
391 }
392 MALLOC(labelstr, char *, mac.m_buflen, M_MACTEMP, M_WAITOK);
393 error = copyinstr(mac.m_string, labelstr, mac.m_buflen, &ulen);
394 if (error) {
395 goto out;
396 }
397 AUDIT_ARG(mac_string, labelstr);
398 }
399#endif /* CONFIG_MACF */
400
401 AUDIT_ARG(fflags, flags);
402
403 if ((vp->v_flag & VROOT) &&
39236c6e
A
404 (vp->v_mount->mnt_flag & MNT_ROOTFS)) {
405 if (!(flags & MNT_UNION)) {
6d2010ae 406 flags |= MNT_UPDATE;
39236c6e
A
407 }
408 else {
409 /*
410 * For a union mount on '/', treat it as fresh
411 * mount instead of update.
412 * Otherwise, union mouting on '/' used to panic the
413 * system before, since mnt_vnodecovered was found to
414 * be NULL for '/' which is required for unionlookup
415 * after it gets ENOENT on union mount.
416 */
417 flags = (flags & ~(MNT_UPDATE));
418 }
419
fe8ab488 420#ifdef SECURE_KERNEL
39236c6e
A
421 if ((flags & MNT_RDONLY) == 0) {
422 /* Release kernels are not allowed to mount "/" as rw */
423 error = EPERM;
424 goto out;
425 }
39236c6e
A
426#endif
427 /*
428 * See 7392553 for more details on why this check exists.
429 * Suffice to say: If this check is ON and something tries
430 * to mount the rootFS RW, we'll turn off the codesign
431 * bitmap optimization.
432 */
6d2010ae 433#if CHECK_CS_VALIDATION_BITMAP
39236c6e 434 if ((flags & MNT_RDONLY) == 0 ) {
6d2010ae
A
435 root_fs_upgrade_try = TRUE;
436 }
437#endif
438 }
439
440 error = mount_common(fstypename, pvp, vp, &nd.ni_cnd, uap->data, flags, 0,
441 labelstr, FALSE, ctx);
39236c6e 442
6d2010ae 443out:
39236c6e 444
6d2010ae
A
445#if CONFIG_MACF
446 if (labelstr)
447 FREE(labelstr, M_MACTEMP);
448#endif /* CONFIG_MACF */
449
39236c6e
A
450 if (vp) {
451 vnode_put(vp);
452 }
453 if (pvp) {
454 vnode_put(pvp);
455 }
456 if (need_nameidone) {
457 nameidone(&nd);
458 }
6d2010ae
A
459
460 return (error);
461}
462
463/*
464 * common mount implementation (final stage of mounting)
465
466 * Arguments:
467 * fstypename file system type (ie it's vfs name)
468 * pvp parent of covered vnode
469 * vp covered vnode
470 * cnp component name (ie path) of covered vnode
471 * flags generic mount flags
472 * fsmountargs file system specific data
473 * labelstr optional MAC label
474 * kernelmount TRUE for mounts initiated from inside the kernel
475 * ctx caller's context
476 */
477static int
478mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
479 struct componentname *cnp, user_addr_t fsmountargs, int flags, uint32_t internal_flags,
480 char *labelstr, boolean_t kernelmount, vfs_context_t ctx)
481{
39236c6e
A
482#if !CONFIG_MACF
483#pragma unused(labelstr)
484#endif
91447636
A
485 struct vnode *devvp = NULLVP;
486 struct vnode *device_vnode = NULLVP;
2d21ac55
A
487#if CONFIG_MACF
488 struct vnode *rvp;
489#endif
1c79356b 490 struct mount *mp;
6601e61a 491 struct vfstable *vfsp = (struct vfstable *)0;
6d2010ae 492 struct proc *p = vfs_context_proc(ctx);
91447636 493 int error, flag = 0;
91447636 494 user_addr_t devpath = USER_ADDR_NULL;
91447636
A
495 int ronly = 0;
496 int mntalloc = 0;
b0d623f7 497 boolean_t vfsp_ref = FALSE;
743b1565 498 boolean_t is_rwlock_locked = FALSE;
b0d623f7
A
499 boolean_t did_rele = FALSE;
500 boolean_t have_usecount = FALSE;
9bccf70c 501
1c79356b 502 /*
6d2010ae 503 * Process an update for an existing mount
1c79356b 504 */
6d2010ae 505 if (flags & MNT_UPDATE) {
1c79356b 506 if ((vp->v_flag & VROOT) == 0) {
91447636
A
507 error = EINVAL;
508 goto out1;
1c79356b
A
509 }
510 mp = vp->v_mount;
d12e1678 511
91447636 512 /* unmount in progress return error */
b0d623f7 513 mount_lock_spin(mp);
91447636
A
514 if (mp->mnt_lflag & MNT_LUNMOUNT) {
515 mount_unlock(mp);
516 error = EBUSY;
517 goto out1;
d12e1678 518 }
91447636
A
519 mount_unlock(mp);
520 lck_rw_lock_exclusive(&mp->mnt_rwlock);
743b1565 521 is_rwlock_locked = TRUE;
1c79356b
A
522 /*
523 * We only allow the filesystem to be reloaded if it
524 * is currently mounted read-only.
525 */
6d2010ae 526 if ((flags & MNT_RELOAD) &&
1c79356b 527 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
91447636
A
528 error = ENOTSUP;
529 goto out1;
1c79356b 530 }
b7266188 531
316670eb
A
532 /*
533 * If content protection is enabled, update mounts are not
534 * allowed to turn it off.
535 */
536 if ((mp->mnt_flag & MNT_CPROTECT) &&
537 ((flags & MNT_CPROTECT) == 0)) {
538 error = EINVAL;
539 goto out1;
540 }
541
b7266188
A
542#ifdef CONFIG_IMGSRC_ACCESS
543 /* Can't downgrade the backer of the root FS */
544 if ((mp->mnt_kern_flag & MNTK_BACKS_ROOT) &&
6d2010ae 545 (!vfs_isrdonly(mp)) && (flags & MNT_RDONLY)) {
b7266188
A
546 error = ENOTSUP;
547 goto out1;
548 }
549#endif /* CONFIG_IMGSRC_ACCESS */
550
1c79356b
A
551 /*
552 * Only root, or the user that did the original mount is
553 * permitted to update it.
554 */
2d21ac55
A
555 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
556 (error = suser(vfs_context_ucred(ctx), &p->p_acflag))) {
557 goto out1;
558 }
559#if CONFIG_MACF
560 error = mac_mount_check_remount(ctx, mp);
561 if (error != 0) {
91447636 562 goto out1;
1c79356b 563 }
2d21ac55 564#endif
1c79356b 565 /*
91447636
A
566 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
567 * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
1c79356b 568 */
6d2010ae
A
569 if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
570 flags |= MNT_NOSUID | MNT_NODEV;
d12e1678 571 if (mp->mnt_flag & MNT_NOEXEC)
6d2010ae 572 flags |= MNT_NOEXEC;
1c79356b 573 }
d12e1678
A
574 flag = mp->mnt_flag;
575
316670eb
A
576
577
6d2010ae 578 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
d12e1678 579
91447636 580 vfsp = mp->mnt_vtable;
1c79356b
A
581 goto update;
582 }
1c79356b 583 /*
91447636 584 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
1c79356b
A
585 * MNT_NOEXEC if mount point is already MNT_NOEXEC.
586 */
6d2010ae
A
587 if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
588 flags |= MNT_NOSUID | MNT_NODEV;
1c79356b 589 if (vp->v_mount->mnt_flag & MNT_NOEXEC)
6d2010ae 590 flags |= MNT_NOEXEC;
1c79356b 591 }
91447636 592
55e303ae
A
593 /* XXXAUDIT: Should we capture the type on the error path as well? */
594 AUDIT_ARG(text, fstypename);
91447636 595 mount_list_lock();
1c79356b 596 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
b0d623f7
A
597 if (!strncmp(vfsp->vfc_name, fstypename, MFSNAMELEN)) {
598 vfsp->vfc_refcount++;
599 vfsp_ref = TRUE;
1c79356b 600 break;
b0d623f7 601 }
91447636 602 mount_list_unlock();
1c79356b 603 if (vfsp == NULL) {
91447636
A
604 error = ENODEV;
605 goto out1;
1c79356b 606 }
6d2010ae
A
607
608 /*
609 * VFC_VFSLOCALARGS is not currently supported for kernel mounts
610 */
611 if (kernelmount && (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS)) {
612 error = EINVAL; /* unsupported request */
2d21ac55 613 goto out1;
6d2010ae
A
614 }
615
616 error = prepare_coveredvp(vp, ctx, cnp, fstypename, ((internal_flags & KERNEL_MOUNT_NOAUTH) != 0));
617 if (error != 0) {
91447636 618 goto out1;
1c79356b 619 }
1c79356b
A
620
621 /*
6d2010ae 622 * Allocate and initialize the filesystem (mount_t)
1c79356b 623 */
b0d623f7 624 MALLOC_ZONE(mp, struct mount *, (u_int32_t)sizeof(struct mount),
1c79356b 625 M_MOUNT, M_WAITOK);
b0d623f7 626 bzero((char *)mp, (u_int32_t)sizeof(struct mount));
91447636 627 mntalloc = 1;
0b4e3aa0
A
628
629 /* Initialize the default IO constraints */
630 mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
631 mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
91447636
A
632 mp->mnt_maxsegreadsize = mp->mnt_maxreadcnt;
633 mp->mnt_maxsegwritesize = mp->mnt_maxwritecnt;
634 mp->mnt_devblocksize = DEV_BSIZE;
2d21ac55 635 mp->mnt_alignmentmask = PAGE_MASK;
b0d623f7
A
636 mp->mnt_ioqueue_depth = MNT_DEFAULT_IOQUEUE_DEPTH;
637 mp->mnt_ioscale = 1;
2d21ac55
A
638 mp->mnt_ioflags = 0;
639 mp->mnt_realrootvp = NULLVP;
640 mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL;
91447636
A
641
642 TAILQ_INIT(&mp->mnt_vnodelist);
643 TAILQ_INIT(&mp->mnt_workerqueue);
644 TAILQ_INIT(&mp->mnt_newvnodes);
645 mount_lock_init(mp);
646 lck_rw_lock_exclusive(&mp->mnt_rwlock);
743b1565 647 is_rwlock_locked = TRUE;
1c79356b 648 mp->mnt_op = vfsp->vfc_vfsops;
91447636 649 mp->mnt_vtable = vfsp;
91447636 650 //mp->mnt_stat.f_type = vfsp->vfc_typenum;
1c79356b 651 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
fe8ab488
A
652 strlcpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN);
653 strlcpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
1c79356b 654 mp->mnt_vnodecovered = vp;
2d21ac55 655 mp->mnt_vfsstat.f_owner = kauth_cred_getuid(vfs_context_ucred(ctx));
6d2010ae
A
656 mp->mnt_throttle_mask = LOWPRI_MAX_NUM_DEV - 1;
657 mp->mnt_devbsdunit = 0;
1c79356b 658
91447636
A
659 /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
660 vfs_setowner(mp, KAUTH_UID_NONE, KAUTH_GID_NONE);
6d2010ae 661
fe8ab488 662#if NFSCLIENT || DEVFS
6d2010ae
A
663 if (kernelmount)
664 mp->mnt_kern_flag |= MNTK_KERNEL_MOUNT;
665 if ((internal_flags & KERNEL_MOUNT_PERMIT_UNMOUNT) != 0)
666 mp->mnt_kern_flag |= MNTK_PERMIT_UNMOUNT;
fe8ab488 667#endif /* NFSCLIENT || DEVFS */
6d2010ae 668
1c79356b
A
669update:
670 /*
671 * Set the mount level flags.
672 */
6d2010ae 673 if (flags & MNT_RDONLY)
1c79356b 674 mp->mnt_flag |= MNT_RDONLY;
6d2010ae
A
675 else if (mp->mnt_flag & MNT_RDONLY) {
676 // disallow read/write upgrades of file systems that
677 // had the TYPENAME_OVERRIDE feature set.
678 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
679 error = EPERM;
680 goto out1;
681 }
1c79356b 682 mp->mnt_kern_flag |= MNTK_WANTRDWR;
6d2010ae 683 }
0b4e3aa0
A
684 mp->mnt_flag &= ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
685 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
6d2010ae
A
686 MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
687 MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
688 MNT_QUARANTINE | MNT_CPROTECT);
689 mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
690 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
691 MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
692 MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
693 MNT_QUARANTINE | MNT_CPROTECT);
2d21ac55
A
694
695#if CONFIG_MACF
6d2010ae 696 if (flags & MNT_MULTILABEL) {
2d21ac55
A
697 if (vfsp->vfc_vfsflags & VFC_VFSNOMACLABEL) {
698 error = EINVAL;
699 goto out1;
700 }
701 mp->mnt_flag |= MNT_MULTILABEL;
702 }
703#endif
6d2010ae
A
704 /*
705 * Process device path for local file systems if requested
706 */
91447636 707 if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
6d2010ae 708 if (vfs_context_is64bit(ctx)) {
91447636
A
709 if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
710 goto out1;
711 fsmountargs += sizeof(devpath);
712 } else {
b0d623f7 713 user32_addr_t tmp;
91447636
A
714 if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
715 goto out1;
716 /* munge into LP64 addr */
717 devpath = CAST_USER_ADDR_T(tmp);
718 fsmountargs += sizeof(tmp);
719 }
720
6d2010ae 721 /* Lookup device and authorize access to it */
91447636 722 if ((devpath)) {
6d2010ae
A
723 struct nameidata nd;
724
725 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW, UIO_USERSPACE, devpath, ctx);
726 if ( (error = namei(&nd)) )
91447636
A
727 goto out1;
728
6d2010ae
A
729 strncpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
730 devvp = nd.ni_vp;
91447636 731
6d2010ae 732 nameidone(&nd);
91447636
A
733
734 if (devvp->v_type != VBLK) {
735 error = ENOTBLK;
736 goto out2;
737 }
738 if (major(devvp->v_rdev) >= nblkdev) {
739 error = ENXIO;
740 goto out2;
741 }
742 /*
743 * If mount by non-root, then verify that user has necessary
744 * permissions on the device.
745 */
2d21ac55 746 if (suser(vfs_context_ucred(ctx), NULL) != 0) {
6d2010ae
A
747 mode_t accessmode = KAUTH_VNODE_READ_DATA;
748
91447636
A
749 if ((mp->mnt_flag & MNT_RDONLY) == 0)
750 accessmode |= KAUTH_VNODE_WRITE_DATA;
2d21ac55 751 if ((error = vnode_authorize(devvp, NULL, accessmode, ctx)) != 0)
91447636
A
752 goto out2;
753 }
754 }
6d2010ae
A
755 /* On first mount, preflight and open device */
756 if (devpath && ((flags & MNT_UPDATE) == 0)) {
91447636
A
757 if ( (error = vnode_ref(devvp)) )
758 goto out2;
759 /*
760 * Disallow multiple mounts of the same device.
761 * Disallow mounting of a device that is currently in use
762 * (except for root, which might share swap device for miniroot).
763 * Flush out any old buffers remaining from a previous use.
764 */
765 if ( (error = vfs_mountedon(devvp)) )
766 goto out3;
767
768 if (vcount(devvp) > 1 && !(vfs_flags(mp) & MNT_ROOTFS)) {
769 error = EBUSY;
770 goto out3;
771 }
2d21ac55 772 if ( (error = VNOP_FSYNC(devvp, MNT_WAIT, ctx)) ) {
91447636
A
773 error = ENOTBLK;
774 goto out3;
775 }
776 if ( (error = buf_invalidateblks(devvp, BUF_WRITE_DATA, 0, 0)) )
777 goto out3;
778
779 ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
2d21ac55
A
780#if CONFIG_MACF
781 error = mac_vnode_check_open(ctx,
782 devvp,
783 ronly ? FREAD : FREAD|FWRITE);
784 if (error)
785 goto out3;
786#endif /* MAC */
787 if ( (error = VNOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, ctx)) )
91447636
A
788 goto out3;
789
790 mp->mnt_devvp = devvp;
791 device_vnode = devvp;
b0d623f7 792
6d2010ae
A
793 } else if ((mp->mnt_flag & MNT_RDONLY) &&
794 (mp->mnt_kern_flag & MNTK_WANTRDWR) &&
795 (device_vnode = mp->mnt_devvp)) {
796 dev_t dev;
797 int maj;
798 /*
799 * If upgrade to read-write by non-root, then verify
800 * that user has necessary permissions on the device.
801 */
802 vnode_getalways(device_vnode);
b0d623f7 803
6d2010ae
A
804 if (suser(vfs_context_ucred(ctx), NULL) &&
805 (error = vnode_authorize(device_vnode, NULL,
806 KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA,
807 ctx)) != 0) {
808 vnode_put(device_vnode);
809 goto out2;
810 }
b0d623f7 811
6d2010ae
A
812 /* Tell the device that we're upgrading */
813 dev = (dev_t)device_vnode->v_rdev;
814 maj = major(dev);
b0d623f7 815
6d2010ae
A
816 if ((u_int)maj >= (u_int)nblkdev)
817 panic("Volume mounted on a device with invalid major number.");
b0d623f7 818
6d2010ae
A
819 error = bdevsw[maj].d_open(dev, FREAD | FWRITE, S_IFBLK, p);
820 vnode_put(device_vnode);
91447636 821 device_vnode = NULLVP;
6d2010ae
A
822 if (error != 0) {
823 goto out2;
824 }
91447636
A
825 }
826 }
2d21ac55 827#if CONFIG_MACF
6d2010ae 828 if ((flags & MNT_UPDATE) == 0) {
2d21ac55
A
829 mac_mount_label_init(mp);
830 mac_mount_label_associate(ctx, mp);
831 }
6d2010ae
A
832 if (labelstr) {
833 if ((flags & MNT_UPDATE) != 0) {
834 error = mac_mount_check_label_update(ctx, mp);
2d21ac55
A
835 if (error != 0)
836 goto out3;
837 }
2d21ac55
A
838 }
839#endif
1c79356b
A
840 /*
841 * Mount the filesystem.
842 */
2d21ac55 843 error = VFS_MOUNT(mp, device_vnode, fsmountargs, ctx);
d12e1678 844
6d2010ae 845 if (flags & MNT_UPDATE) {
1c79356b
A
846 if (mp->mnt_kern_flag & MNTK_WANTRDWR)
847 mp->mnt_flag &= ~MNT_RDONLY;
848 mp->mnt_flag &=~
849 (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
850 mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
851 if (error)
6d2010ae 852 mp->mnt_flag = flag; /* restore flag value */
91447636
A
853 vfs_event_signal(NULL, VQ_UPDATE, (intptr_t)NULL);
854 lck_rw_done(&mp->mnt_rwlock);
743b1565 855 is_rwlock_locked = FALSE;
9bccf70c 856 if (!error)
2d21ac55 857 enablequotas(mp, ctx);
6d2010ae 858 goto exit;
1c79356b 859 }
6d2010ae 860
1c79356b
A
861 /*
862 * Put the new filesystem on the mount list after root.
863 */
6601e61a 864 if (error == 0) {
2d21ac55
A
865 struct vfs_attr vfsattr;
866#if CONFIG_MACF
867 if (vfs_flags(mp) & MNT_MULTILABEL) {
868 error = VFS_ROOT(mp, &rvp, ctx);
869 if (error) {
870 printf("%s() VFS_ROOT returned %d\n", __func__, error);
871 goto out3;
872 }
2d21ac55 873 error = vnode_label(mp, NULL, rvp, NULL, 0, ctx);
b0d623f7
A
874 /*
875 * drop reference provided by VFS_ROOT
876 */
877 vnode_put(rvp);
878
2d21ac55
A
879 if (error)
880 goto out3;
881 }
882#endif /* MAC */
883
884 vnode_lock_spin(vp);
885 CLR(vp->v_flag, VMOUNT);
91447636
A
886 vp->v_mountedhere = mp;
887 vnode_unlock(vp);
888
2d21ac55
A
889 /*
890 * taking the name_cache_lock exclusively will
891 * insure that everyone is out of the fast path who
892 * might be trying to use a now stale copy of
893 * vp->v_mountedhere->mnt_realrootvp
894 * bumping mount_generation causes the cached values
895 * to be invalidated
896 */
897 name_cache_lock();
898 mount_generation++;
899 name_cache_unlock();
900
b0d623f7
A
901 error = vnode_ref(vp);
902 if (error != 0) {
903 goto out4;
904 }
905
906 have_usecount = TRUE;
91447636 907
2d21ac55 908 error = checkdirs(vp, ctx);
6601e61a
A
909 if (error != 0) {
910 /* Unmount the filesystem as cdir/rdirs cannot be updated */
911 goto out4;
912 }
91447636
A
913 /*
914 * there is no cleanup code here so I have made it void
915 * we need to revisit this
916 */
2d21ac55 917 (void)VFS_START(mp, 0, ctx);
1c79356b 918
6d2010ae
A
919 if (mount_list_add(mp) != 0) {
920 /*
921 * The system is shutting down trying to umount
922 * everything, so fail with a plausible errno.
923 */
924 error = EBUSY;
b0d623f7
A
925 goto out4;
926 }
6601e61a
A
927 lck_rw_done(&mp->mnt_rwlock);
928 is_rwlock_locked = FALSE;
929
2d21ac55
A
930 /* Check if this mounted file system supports EAs or named streams. */
931 /* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */
932 VFSATTR_INIT(&vfsattr);
933 VFSATTR_WANTED(&vfsattr, f_capabilities);
934 if (strncmp(mp->mnt_vfsstat.f_fstypename, "webdav", sizeof("webdav")) != 0 &&
935 vfs_getattr(mp, &vfsattr, ctx) == 0 &&
936 VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) {
937 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR) &&
938 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR)) {
939 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
940 }
941#if NAMEDSTREAMS
942 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS) &&
943 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS)) {
944 mp->mnt_kern_flag |= MNTK_NAMED_STREAMS;
945 }
946#endif
947 /* Check if this file system supports path from id lookups. */
948 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID) &&
949 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID)) {
950 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
951 } else if (mp->mnt_flag & MNT_DOVOLFS) {
952 /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */
953 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
954 }
955 }
956 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSNATIVEXATTR) {
957 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
958 }
959 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSPREFLIGHT) {
960 mp->mnt_kern_flag |= MNTK_UNMOUNT_PREFLIGHT;
961 }
1c79356b 962 /* increment the operations count */
b0d623f7 963 OSAddAtomic(1, &vfs_nummntops);
2d21ac55 964 enablequotas(mp, ctx);
91447636
A
965
966 if (device_vnode) {
967 device_vnode->v_specflags |= SI_MOUNTEDON;
968
969 /*
970 * cache the IO attributes for the underlying physical media...
971 * an error return indicates the underlying driver doesn't
972 * support all the queries necessary... however, reasonable
973 * defaults will have been set, so no reason to bail or care
974 */
975 vfs_init_io_attributes(device_vnode, mp);
976 }
6601e61a
A
977
978 /* Now that mount is setup, notify the listeners */
6d2010ae 979 vfs_notify_mount(pvp);
1c79356b 980 } else {
6d2010ae
A
981 /* If we fail a fresh mount, there should be no vnodes left hooked into the mountpoint. */
982 if (mp->mnt_vnodelist.tqh_first != NULL) {
983 panic("mount_common(): mount of %s filesystem failed with %d, but vnode list is not empty.",
984 mp->mnt_vtable->vfc_name, error);
985 }
986
2d21ac55 987 vnode_lock_spin(vp);
1c79356b 988 CLR(vp->v_flag, VMOUNT);
6601e61a 989 vnode_unlock(vp);
91447636
A
990 mount_list_lock();
991 mp->mnt_vtable->vfc_refcount--;
992 mount_list_unlock();
55e303ae 993
91447636 994 if (device_vnode ) {
91447636 995 vnode_rele(device_vnode);
b0d623f7 996 VNOP_CLOSE(device_vnode, ronly ? FREAD : FREAD|FWRITE, ctx);
91447636
A
997 }
998 lck_rw_done(&mp->mnt_rwlock);
743b1565 999 is_rwlock_locked = FALSE;
6d2010ae
A
1000
1001 /*
1002 * if we get here, we have a mount structure that needs to be freed,
1003 * but since the coveredvp hasn't yet been updated to point at it,
1004 * no need to worry about other threads holding a crossref on this mp
1005 * so it's ok to just free it
1006 */
91447636 1007 mount_lock_destroy(mp);
2d21ac55
A
1008#if CONFIG_MACF
1009 mac_mount_label_destroy(mp);
1010#endif
55e303ae 1011 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1c79356b 1012 }
6d2010ae 1013exit:
91447636 1014 /*
6d2010ae 1015 * drop I/O count on the device vp if there was one
91447636
A
1016 */
1017 if (devpath && devvp)
1018 vnode_put(devvp);
b0d623f7 1019
91447636 1020 return(error);
b0d623f7 1021
6d2010ae 1022/* Error condition exits */
6601e61a 1023out4:
2d21ac55 1024 (void)VFS_UNMOUNT(mp, MNT_FORCE, ctx);
6d2010ae
A
1025
1026 /*
1027 * If the mount has been placed on the covered vp,
1028 * it may have been discovered by now, so we have
1029 * to treat this just like an unmount
1030 */
1031 mount_lock_spin(mp);
1032 mp->mnt_lflag |= MNT_LDEAD;
1033 mount_unlock(mp);
1034
6601e61a 1035 if (device_vnode != NULLVP) {
b0d623f7 1036 vnode_rele(device_vnode);
2d21ac55
A
1037 VNOP_CLOSE(device_vnode, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
1038 ctx);
b0d623f7 1039 did_rele = TRUE;
6601e61a 1040 }
6d2010ae 1041
2d21ac55 1042 vnode_lock_spin(vp);
6d2010ae
A
1043
1044 mp->mnt_crossref++;
6601e61a 1045 vp->v_mountedhere = (mount_t) 0;
6d2010ae 1046
6601e61a 1047 vnode_unlock(vp);
6d2010ae 1048
b0d623f7
A
1049 if (have_usecount) {
1050 vnode_rele(vp);
1051 }
91447636 1052out3:
6d2010ae 1053 if (devpath && ((flags & MNT_UPDATE) == 0) && (!did_rele))
2d21ac55 1054 vnode_rele(devvp);
91447636
A
1055out2:
1056 if (devpath && devvp)
1057 vnode_put(devvp);
1058out1:
743b1565
A
1059 /* Release mnt_rwlock only when it was taken */
1060 if (is_rwlock_locked == TRUE) {
1061 lck_rw_done(&mp->mnt_rwlock);
1062 }
6d2010ae 1063
6601e61a 1064 if (mntalloc) {
6d2010ae
A
1065 if (mp->mnt_crossref)
1066 mount_dropcrossref(mp, vp, 0);
1067 else {
1068 mount_lock_destroy(mp);
2d21ac55 1069#if CONFIG_MACF
6d2010ae 1070 mac_mount_label_destroy(mp);
2d21ac55 1071#endif
6d2010ae
A
1072 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1073 }
b0d623f7 1074 }
b0d623f7 1075 if (vfsp_ref) {
6601e61a
A
1076 mount_list_lock();
1077 vfsp->vfc_refcount--;
1078 mount_list_unlock();
6601e61a 1079 }
91447636
A
1080
1081 return(error);
1c79356b
A
1082}
1083
b7266188
A
1084/*
1085 * Flush in-core data, check for competing mount attempts,
1086 * and set VMOUNT
1087 */
6d2010ae
A
1088int
1089prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth)
b7266188 1090{
39236c6e
A
1091#if !CONFIG_MACF
1092#pragma unused(cnp,fsname)
1093#endif
b7266188
A
1094 struct vnode_attr va;
1095 int error;
1096
6d2010ae
A
1097 if (!skip_auth) {
1098 /*
1099 * If the user is not root, ensure that they own the directory
1100 * onto which we are attempting to mount.
1101 */
1102 VATTR_INIT(&va);
1103 VATTR_WANTED(&va, va_uid);
1104 if ((error = vnode_getattr(vp, &va, ctx)) ||
1105 (va.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
1106 (!vfs_context_issuser(ctx)))) {
1107 error = EPERM;
1108 goto out;
1109 }
b7266188
A
1110 }
1111
1112 if ( (error = VNOP_FSYNC(vp, MNT_WAIT, ctx)) )
1113 goto out;
1114
1115 if ( (error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0)) )
1116 goto out;
1117
1118 if (vp->v_type != VDIR) {
1119 error = ENOTDIR;
1120 goto out;
1121 }
1122
1123 if (ISSET(vp->v_flag, VMOUNT) && (vp->v_mountedhere != NULL)) {
1124 error = EBUSY;
1125 goto out;
1126 }
1127
1128#if CONFIG_MACF
1129 error = mac_mount_check_mount(ctx, vp,
1130 cnp, fsname);
1131 if (error != 0)
1132 goto out;
1133#endif
1134
1135 vnode_lock_spin(vp);
1136 SET(vp->v_flag, VMOUNT);
1137 vnode_unlock(vp);
1138
1139out:
1140 return error;
1141}
1142
6d2010ae
A
1143#if CONFIG_IMGSRC_ACCESS
1144
1145#if DEBUG
1146#define IMGSRC_DEBUG(args...) printf(args)
1147#else
1148#define IMGSRC_DEBUG(args...) do { } while(0)
1149#endif
1150
b7266188
A
1151static int
1152authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx)
1153{
1154 struct nameidata nd;
6d2010ae 1155 vnode_t vp, realdevvp;
b7266188
A
1156 mode_t accessmode;
1157 int error;
1158
6d2010ae
A
1159 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW, UIO_USERSPACE, devpath, ctx);
1160 if ( (error = namei(&nd)) ) {
1161 IMGSRC_DEBUG("namei() failed with %d\n", error);
b7266188 1162 return error;
6d2010ae 1163 }
b7266188 1164
b7266188 1165 vp = nd.ni_vp;
b7266188 1166
6d2010ae
A
1167 if (!vnode_isblk(vp)) {
1168 IMGSRC_DEBUG("Not block device.\n");
b7266188
A
1169 error = ENOTBLK;
1170 goto out;
1171 }
6d2010ae
A
1172
1173 realdevvp = mp->mnt_devvp;
1174 if (realdevvp == NULLVP) {
1175 IMGSRC_DEBUG("No device backs the mount.\n");
b7266188
A
1176 error = ENXIO;
1177 goto out;
1178 }
6d2010ae
A
1179
1180 error = vnode_getwithref(realdevvp);
1181 if (error != 0) {
1182 IMGSRC_DEBUG("Coudn't get iocount on device.\n");
1183 goto out;
1184 }
1185
1186 if (vnode_specrdev(vp) != vnode_specrdev(realdevvp)) {
1187 IMGSRC_DEBUG("Wrong dev_t.\n");
1188 error = ENXIO;
1189 goto out1;
1190 }
1191
1192 strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
1193
b7266188
A
1194 /*
1195 * If mount by non-root, then verify that user has necessary
1196 * permissions on the device.
1197 */
1198 if (!vfs_context_issuser(ctx)) {
1199 accessmode = KAUTH_VNODE_READ_DATA;
1200 if ((mp->mnt_flag & MNT_RDONLY) == 0)
1201 accessmode |= KAUTH_VNODE_WRITE_DATA;
6d2010ae
A
1202 if ((error = vnode_authorize(vp, NULL, accessmode, ctx)) != 0) {
1203 IMGSRC_DEBUG("Access denied.\n");
1204 goto out1;
1205 }
b7266188
A
1206 }
1207
1208 *devvpp = vp;
6d2010ae
A
1209
1210out1:
1211 vnode_put(realdevvp);
b7266188 1212out:
6d2010ae 1213 nameidone(&nd);
b7266188
A
1214 if (error) {
1215 vnode_put(vp);
1216 }
1217
1218 return error;
1219}
1220
1221/*
1222 * Clear VMOUNT, set v_mountedhere, and mnt_vnodecovered, ref the vnode,
1223 * and call checkdirs()
1224 */
1225static int
1226place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx)
1227{
1228 int error;
1229
1230 mp->mnt_vnodecovered = vp; /* XXX This is normally only set at init-time ... */
1231
1232 vnode_lock_spin(vp);
1233 CLR(vp->v_flag, VMOUNT);
1234 vp->v_mountedhere = mp;
1235 vnode_unlock(vp);
1236
1237 /*
1238 * taking the name_cache_lock exclusively will
1239 * insure that everyone is out of the fast path who
1240 * might be trying to use a now stale copy of
1241 * vp->v_mountedhere->mnt_realrootvp
1242 * bumping mount_generation causes the cached values
1243 * to be invalidated
1244 */
1245 name_cache_lock();
1246 mount_generation++;
1247 name_cache_unlock();
1248
1249 error = vnode_ref(vp);
1250 if (error != 0) {
1251 goto out;
1252 }
1253
1254 error = checkdirs(vp, ctx);
1255 if (error != 0) {
1256 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1257 vnode_rele(vp);
1258 goto out;
1259 }
1260
1261out:
1262 if (error != 0) {
1263 mp->mnt_vnodecovered = NULLVP;
1264 }
1265 return error;
1266}
1267
1268static void
1269undo_place_on_covered_vp(mount_t mp, vnode_t vp)
1270{
1271 vnode_rele(vp);
1272 vnode_lock_spin(vp);
1273 vp->v_mountedhere = (mount_t)NULL;
1274 vnode_unlock(vp);
1275
1276 mp->mnt_vnodecovered = NULLVP;
1277}
1278
1279static int
1280mount_begin_update(mount_t mp, vfs_context_t ctx, int flags)
1281{
1282 int error;
1283
1284 /* unmount in progress return error */
1285 mount_lock_spin(mp);
1286 if (mp->mnt_lflag & MNT_LUNMOUNT) {
1287 mount_unlock(mp);
1288 return EBUSY;
1289 }
1290 mount_unlock(mp);
1291 lck_rw_lock_exclusive(&mp->mnt_rwlock);
1292
1293 /*
1294 * We only allow the filesystem to be reloaded if it
1295 * is currently mounted read-only.
1296 */
1297 if ((flags & MNT_RELOAD) &&
1298 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
1299 error = ENOTSUP;
1300 goto out;
1301 }
1302
1303 /*
1304 * Only root, or the user that did the original mount is
1305 * permitted to update it.
1306 */
1307 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
1308 (!vfs_context_issuser(ctx))) {
1309 error = EPERM;
1310 goto out;
1311 }
1312#if CONFIG_MACF
1313 error = mac_mount_check_remount(ctx, mp);
1314 if (error != 0) {
1315 goto out;
1316 }
1317#endif
1318
1319out:
1320 if (error) {
1321 lck_rw_done(&mp->mnt_rwlock);
1322 }
1323
1324 return error;
1325}
1326
1327static void
1328mount_end_update(mount_t mp)
1329{
1330 lck_rw_done(&mp->mnt_rwlock);
1331}
1332
1333static int
6d2010ae
A
1334get_imgsrc_rootvnode(uint32_t height, vnode_t *rvpp)
1335{
1336 vnode_t vp;
1337
1338 if (height >= MAX_IMAGEBOOT_NESTING) {
1339 return EINVAL;
1340 }
1341
1342 vp = imgsrc_rootvnodes[height];
1343 if ((vp != NULLVP) && (vnode_get(vp) == 0)) {
1344 *rvpp = vp;
1345 return 0;
1346 } else {
1347 return ENOENT;
1348 }
1349}
1350
1351static int
1352relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp,
b7266188 1353 const char *fsname, vfs_context_t ctx,
6d2010ae 1354 boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index)
b7266188
A
1355{
1356 int error;
1357 mount_t mp;
1358 boolean_t placed = FALSE;
6d2010ae 1359 vnode_t devvp = NULLVP;
b7266188
A
1360 struct vfstable *vfsp;
1361 user_addr_t devpath;
1362 char *old_mntonname;
6d2010ae
A
1363 vnode_t rvp;
1364 uint32_t height;
1365 uint32_t flags;
b7266188
A
1366
1367 /* If we didn't imageboot, nothing to move */
6d2010ae 1368 if (imgsrc_rootvnodes[0] == NULLVP) {
b7266188
A
1369 return EINVAL;
1370 }
1371
1372 /* Only root can do this */
1373 if (!vfs_context_issuser(ctx)) {
1374 return EPERM;
1375 }
1376
6d2010ae
A
1377 IMGSRC_DEBUG("looking for root vnode.\n");
1378
1379 /*
1380 * Get root vnode of filesystem we're moving.
1381 */
1382 if (by_index) {
1383 if (is64bit) {
1384 struct user64_mnt_imgsrc_args mia64;
1385 error = copyin(fsmountargs, &mia64, sizeof(mia64));
1386 if (error != 0) {
1387 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1388 return error;
1389 }
1390
1391 height = mia64.mi_height;
1392 flags = mia64.mi_flags;
1393 devpath = mia64.mi_devpath;
1394 } else {
1395 struct user32_mnt_imgsrc_args mia32;
1396 error = copyin(fsmountargs, &mia32, sizeof(mia32));
1397 if (error != 0) {
1398 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1399 return error;
1400 }
1401
1402 height = mia32.mi_height;
1403 flags = mia32.mi_flags;
1404 devpath = mia32.mi_devpath;
1405 }
1406 } else {
1407 /*
1408 * For binary compatibility--assumes one level of nesting.
1409 */
1410 if (is64bit) {
1411 if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
1412 return error;
1413 } else {
1414 user32_addr_t tmp;
1415 if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
1416 return error;
1417
1418 /* munge into LP64 addr */
1419 devpath = CAST_USER_ADDR_T(tmp);
1420 }
1421
1422 height = 0;
1423 flags = 0;
1424 }
1425
1426 if (flags != 0) {
1427 IMGSRC_DEBUG("%s: Got nonzero flags.\n", __FUNCTION__);
1428 return EINVAL;
1429 }
1430
1431 error = get_imgsrc_rootvnode(height, &rvp);
b7266188 1432 if (error != 0) {
6d2010ae 1433 IMGSRC_DEBUG("getting root vnode failed with %d\n", error);
b7266188
A
1434 return error;
1435 }
1436
6d2010ae
A
1437 IMGSRC_DEBUG("got root vnode.\n");
1438
b7266188
A
1439 MALLOC(old_mntonname, char*, MAXPATHLEN, M_TEMP, M_WAITOK);
1440
1441 /* Can only move once */
6d2010ae 1442 mp = vnode_mount(rvp);
b7266188 1443 if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
6d2010ae 1444 IMGSRC_DEBUG("Already moved.\n");
b7266188
A
1445 error = EBUSY;
1446 goto out0;
1447 }
1448
6d2010ae
A
1449 IMGSRC_DEBUG("Starting updated.\n");
1450
b7266188
A
1451 /* Get exclusive rwlock on mount, authorize update on mp */
1452 error = mount_begin_update(mp , ctx, 0);
1453 if (error != 0) {
6d2010ae 1454 IMGSRC_DEBUG("Starting updated failed with %d\n", error);
b7266188
A
1455 goto out0;
1456 }
1457
1458 /*
1459 * It can only be moved once. Flag is set under the rwlock,
1460 * so we're now safe to proceed.
1461 */
1462 if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
6d2010ae 1463 IMGSRC_DEBUG("Already moved [2]\n");
b7266188
A
1464 goto out1;
1465 }
6d2010ae
A
1466
1467
1468 IMGSRC_DEBUG("Preparing coveredvp.\n");
b7266188
A
1469
1470 /* Mark covered vnode as mount in progress, authorize placing mount on top */
6d2010ae 1471 error = prepare_coveredvp(vp, ctx, cnp, fsname, FALSE);
b7266188 1472 if (error != 0) {
6d2010ae 1473 IMGSRC_DEBUG("Preparing coveredvp failed with %d.\n", error);
b7266188
A
1474 goto out1;
1475 }
1476
6d2010ae
A
1477 IMGSRC_DEBUG("Covered vp OK.\n");
1478
b7266188
A
1479 /* Sanity check the name caller has provided */
1480 vfsp = mp->mnt_vtable;
1481 if (strncmp(vfsp->vfc_name, fsname, MFSNAMELEN) != 0) {
6d2010ae 1482 IMGSRC_DEBUG("Wrong fs name.\n");
b7266188
A
1483 error = EINVAL;
1484 goto out2;
1485 }
1486
1487 /* Check the device vnode and update mount-from name, for local filesystems */
1488 if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
6d2010ae 1489 IMGSRC_DEBUG("Local, doing device validation.\n");
b7266188
A
1490
1491 if (devpath != USER_ADDR_NULL) {
1492 error = authorize_devpath_and_update_mntfromname(mp, devpath, &devvp, ctx);
1493 if (error) {
6d2010ae 1494 IMGSRC_DEBUG("authorize_devpath_and_update_mntfromname() failed.\n");
b7266188
A
1495 goto out2;
1496 }
1497
1498 vnode_put(devvp);
1499 }
1500 }
1501
1502 /*
1503 * Place mp on top of vnode, ref the vnode, call checkdirs(),
1504 * and increment the name cache's mount generation
1505 */
6d2010ae
A
1506
1507 IMGSRC_DEBUG("About to call place_mount_and_checkdirs().\n");
b7266188
A
1508 error = place_mount_and_checkdirs(mp, vp, ctx);
1509 if (error != 0) {
1510 goto out2;
1511 }
1512
1513 placed = TRUE;
1514
1515 strncpy(old_mntonname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN);
1516 strncpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
1517
1518 /* Forbid future moves */
1519 mount_lock(mp);
1520 mp->mnt_kern_flag |= MNTK_HAS_MOVED;
1521 mount_unlock(mp);
1522
1523 /* Finally, add to mount list, completely ready to go */
6d2010ae
A
1524 if (mount_list_add(mp) != 0) {
1525 /*
1526 * The system is shutting down trying to umount
1527 * everything, so fail with a plausible errno.
1528 */
1529 error = EBUSY;
b7266188
A
1530 goto out3;
1531 }
1532
1533 mount_end_update(mp);
6d2010ae 1534 vnode_put(rvp);
b7266188
A
1535 FREE(old_mntonname, M_TEMP);
1536
6d2010ae
A
1537 vfs_notify_mount(pvp);
1538
b7266188
A
1539 return 0;
1540out3:
1541 strncpy(mp->mnt_vfsstat.f_mntonname, old_mntonname, MAXPATHLEN);
1542
1543 mount_lock(mp);
1544 mp->mnt_kern_flag &= ~(MNTK_HAS_MOVED);
1545 mount_unlock(mp);
1546
1547out2:
1548 /*
1549 * Placing the mp on the vnode clears VMOUNT,
1550 * so cleanup is different after that point
1551 */
1552 if (placed) {
1553 /* Rele the vp, clear VMOUNT and v_mountedhere */
1554 undo_place_on_covered_vp(mp, vp);
1555 } else {
1556 vnode_lock_spin(vp);
1557 CLR(vp->v_flag, VMOUNT);
1558 vnode_unlock(vp);
1559 }
1560out1:
1561 mount_end_update(mp);
1562
1563out0:
6d2010ae 1564 vnode_put(rvp);
b7266188
A
1565 FREE(old_mntonname, M_TEMP);
1566 return error;
1567}
1568
1569#endif /* CONFIG_IMGSRC_ACCESS */
1570
91447636 1571void
2d21ac55 1572enablequotas(struct mount *mp, vfs_context_t ctx)
9bccf70c 1573{
9bccf70c
A
1574 struct nameidata qnd;
1575 int type;
1576 char qfpath[MAXPATHLEN];
91447636
A
1577 const char *qfname = QUOTAFILENAME;
1578 const char *qfopsname = QUOTAOPSNAME;
1579 const char *qfextension[] = INITQFNAMES;
9bccf70c 1580
2d21ac55 1581 /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
b0d623f7
A
1582 if (strncmp(mp->mnt_vfsstat.f_fstypename, "hfs", sizeof("hfs")) != 0 ) {
1583 return;
1584 }
9bccf70c
A
1585 /*
1586 * Enable filesystem disk quotas if necessary.
1587 * We ignore errors as this should not interfere with final mount
1588 */
1589 for (type=0; type < MAXQUOTAS; type++) {
2d21ac55 1590 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfopsname, qfextension[type]);
6d2010ae
A
1591 NDINIT(&qnd, LOOKUP, OP_MOUNT, FOLLOW, UIO_SYSSPACE,
1592 CAST_USER_ADDR_T(qfpath), ctx);
91447636
A
1593 if (namei(&qnd) != 0)
1594 continue; /* option file to trigger quotas is not present */
1595 vnode_put(qnd.ni_vp);
1596 nameidone(&qnd);
2d21ac55 1597 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfname, qfextension[type]);
91447636 1598
2d21ac55 1599 (void) VFS_QUOTACTL(mp, QCMD(Q_QUOTAON, type), 0, qfpath, ctx);
9bccf70c
A
1600 }
1601 return;
1602}
1603
2d21ac55
A
1604
1605static int
1606checkdirs_callback(proc_t p, void * arg)
1607{
1608 struct cdirargs * cdrp = (struct cdirargs * )arg;
1609 vnode_t olddp = cdrp->olddp;
1610 vnode_t newdp = cdrp->newdp;
1611 struct filedesc *fdp;
1612 vnode_t tvp;
1613 vnode_t fdp_cvp;
1614 vnode_t fdp_rvp;
1615 int cdir_changed = 0;
1616 int rdir_changed = 0;
1617
1618 /*
1619 * XXX Also needs to iterate each thread in the process to see if it
1620 * XXX is using a per-thread current working directory, and, if so,
1621 * XXX update that as well.
1622 */
1623
1624 proc_fdlock(p);
1625 fdp = p->p_fd;
1626 if (fdp == (struct filedesc *)0) {
1627 proc_fdunlock(p);
1628 return(PROC_RETURNED);
1629 }
1630 fdp_cvp = fdp->fd_cdir;
1631 fdp_rvp = fdp->fd_rdir;
1632 proc_fdunlock(p);
1633
1634 if (fdp_cvp == olddp) {
1635 vnode_ref(newdp);
1636 tvp = fdp->fd_cdir;
1637 fdp_cvp = newdp;
1638 cdir_changed = 1;
1639 vnode_rele(tvp);
1640 }
1641 if (fdp_rvp == olddp) {
1642 vnode_ref(newdp);
1643 tvp = fdp->fd_rdir;
1644 fdp_rvp = newdp;
1645 rdir_changed = 1;
1646 vnode_rele(tvp);
1647 }
1648 if (cdir_changed || rdir_changed) {
1649 proc_fdlock(p);
1650 fdp->fd_cdir = fdp_cvp;
1651 fdp->fd_rdir = fdp_rvp;
1652 proc_fdunlock(p);
1653 }
1654 return(PROC_RETURNED);
1655}
1656
1657
1658
1c79356b
A
1659/*
1660 * Scan all active processes to see if any of them have a current
1661 * or root directory onto which the new filesystem has just been
1662 * mounted. If so, replace them with the new mount point.
1663 */
6601e61a 1664static int
2d21ac55 1665checkdirs(vnode_t olddp, vfs_context_t ctx)
1c79356b 1666{
2d21ac55
A
1667 vnode_t newdp;
1668 vnode_t tvp;
6601e61a 1669 int err;
2d21ac55 1670 struct cdirargs cdr;
1c79356b
A
1671
1672 if (olddp->v_usecount == 1)
6601e61a 1673 return(0);
2d21ac55 1674 err = VFS_ROOT(olddp->v_mountedhere, &newdp, ctx);
2d21ac55
A
1675
1676 if (err != 0) {
6601e61a 1677#if DIAGNOSTIC
2d21ac55 1678 panic("mount: lost mount: error %d", err);
6601e61a
A
1679#endif
1680 return(err);
1681 }
91447636 1682
2d21ac55
A
1683 cdr.olddp = olddp;
1684 cdr.newdp = newdp;
1685 /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
1686 proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS, checkdirs_callback, (void *)&cdr, NULL, NULL);
91447636 1687
1c79356b 1688 if (rootvnode == olddp) {
91447636 1689 vnode_ref(newdp);
fa4905b1 1690 tvp = rootvnode;
1c79356b 1691 rootvnode = newdp;
91447636 1692 vnode_rele(tvp);
1c79356b 1693 }
91447636
A
1694
1695 vnode_put(newdp);
6601e61a 1696 return(0);
1c79356b
A
1697}
1698
1699/*
1700 * Unmount a file system.
1701 *
1702 * Note: unmount takes a path to the vnode mounted on as argument,
1703 * not special file (as before).
1704 */
1c79356b
A
1705/* ARGSUSED */
1706int
b0d623f7 1707unmount(__unused proc_t p, struct unmount_args *uap, __unused int32_t *retval)
1c79356b 1708{
2d21ac55 1709 vnode_t vp;
1c79356b
A
1710 struct mount *mp;
1711 int error;
1712 struct nameidata nd;
2d21ac55 1713 vfs_context_t ctx = vfs_context_current();
91447636 1714
fe8ab488 1715 NDINIT(&nd, LOOKUP, OP_UNMOUNT, FOLLOW | AUDITVNPATH1,
2d21ac55 1716 UIO_USERSPACE, uap->path, ctx);
55e303ae
A
1717 error = namei(&nd);
1718 if (error)
1c79356b
A
1719 return (error);
1720 vp = nd.ni_vp;
1721 mp = vp->v_mount;
91447636 1722 nameidone(&nd);
1c79356b 1723
2d21ac55
A
1724#if CONFIG_MACF
1725 error = mac_mount_check_umount(ctx, mp);
1726 if (error != 0) {
1727 vnode_put(vp);
1728 return (error);
1729 }
1730#endif
55e303ae
A
1731 /*
1732 * Must be the root of the filesystem
1733 */
1734 if ((vp->v_flag & VROOT) == 0) {
91447636 1735 vnode_put(vp);
55e303ae
A
1736 return (EINVAL);
1737 }
6601e61a 1738 mount_ref(mp, 0);
91447636 1739 vnode_put(vp);
6601e61a 1740 /* safedounmount consumes the mount ref */
2d21ac55
A
1741 return (safedounmount(mp, uap->flags, ctx));
1742}
1743
1744int
1745vfs_unmountbyfsid(fsid_t * fsid, int flags, vfs_context_t ctx)
1746{
1747 mount_t mp;
1748
1749 mp = mount_list_lookupby_fsid(fsid, 0, 1);
1750 if (mp == (mount_t)0) {
1751 return(ENOENT);
1752 }
1753 mount_ref(mp, 0);
1754 mount_iterdrop(mp);
1755 /* safedounmount consumes the mount ref */
1756 return(safedounmount(mp, flags, ctx));
55e303ae
A
1757}
1758
2d21ac55 1759
55e303ae 1760/*
6601e61a 1761 * The mount struct comes with a mount ref which will be consumed.
55e303ae
A
1762 * Do the actual file system unmount, prevent some common foot shooting.
1763 */
1764int
2d21ac55 1765safedounmount(struct mount *mp, int flags, vfs_context_t ctx)
55e303ae
A
1766{
1767 int error;
2d21ac55 1768 proc_t p = vfs_context_proc(ctx);
55e303ae 1769
316670eb
A
1770 /*
1771 * If the file system is not responding and MNT_NOBLOCK
1772 * is set and not a forced unmount then return EBUSY.
1773 */
1774 if ((mp->mnt_kern_flag & MNT_LNOTRESP) &&
1775 (flags & MNT_NOBLOCK) && ((flags & MNT_FORCE) == 0)) {
1776 error = EBUSY;
1777 goto out;
1778 }
1779
1c79356b 1780 /*
6d2010ae
A
1781 * Skip authorization if the mount is tagged as permissive and
1782 * this is not a forced-unmount attempt.
1c79356b 1783 */
6d2010ae
A
1784 if (!(((mp->mnt_kern_flag & MNTK_PERMIT_UNMOUNT) != 0) && ((flags & MNT_FORCE) == 0))) {
1785 /*
1786 * Only root, or the user that did the original mount is
1787 * permitted to unmount this filesystem.
1788 */
1789 if ((mp->mnt_vfsstat.f_owner != kauth_cred_getuid(kauth_cred_get())) &&
1790 (error = suser(kauth_cred_get(), &p->p_acflag)))
1791 goto out;
1792 }
1c79356b
A
1793 /*
1794 * Don't allow unmounting the root file system.
1795 */
6601e61a 1796 if (mp->mnt_flag & MNT_ROOTFS) {
2d21ac55 1797 error = EBUSY; /* the root is always busy */
6601e61a
A
1798 goto out;
1799 }
1c79356b 1800
b7266188
A
1801#ifdef CONFIG_IMGSRC_ACCESS
1802 if (mp->mnt_kern_flag & MNTK_BACKS_ROOT) {
1803 error = EBUSY;
1804 goto out;
1805 }
1806#endif /* CONFIG_IMGSRC_ACCESS */
1807
2d21ac55
A
1808 return (dounmount(mp, flags, 1, ctx));
1809
6601e61a
A
1810out:
1811 mount_drop(mp, 0);
1812 return(error);
1c79356b
A
1813}
1814
1815/*
1816 * Do the actual file system unmount.
1817 */
1818int
2d21ac55 1819dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx)
1c79356b 1820{
2d21ac55 1821 vnode_t coveredvp = (vnode_t)0;
1c79356b 1822 int error;
91447636 1823 int needwakeup = 0;
91447636
A
1824 int forcedunmount = 0;
1825 int lflags = 0;
593a1d5f 1826 struct vnode *devvp = NULLVP;
6d2010ae 1827#if CONFIG_TRIGGERS
39236c6e 1828 proc_t p = vfs_context_proc(ctx);
6d2010ae 1829 int did_vflush = 0;
39236c6e 1830 int pflags_save = 0;
6d2010ae 1831#endif /* CONFIG_TRIGGERS */
91447636 1832
91447636 1833 mount_lock(mp);
fe8ab488
A
1834
1835 /*
1836 * If already an unmount in progress just return EBUSY.
1837 * Even a forced unmount cannot override.
1838 */
91447636 1839 if (mp->mnt_lflag & MNT_LUNMOUNT) {
fe8ab488 1840 if (withref != 0)
6601e61a 1841 mount_drop(mp, 1);
fe8ab488 1842 mount_unlock(mp);
9bccf70c
A
1843 return (EBUSY);
1844 }
39236c6e 1845
fe8ab488
A
1846 if (flags & MNT_FORCE) {
1847 forcedunmount = 1;
1848 mp->mnt_lflag |= MNT_LFORCE;
1849 }
1850
39236c6e
A
1851#if CONFIG_TRIGGERS
1852 if (flags & MNT_NOBLOCK && p != kernproc)
1853 pflags_save = OSBitOrAtomic(P_NOREMOTEHANG, &p->p_flag);
1854#endif
1855
1c79356b 1856 mp->mnt_kern_flag |= MNTK_UNMOUNT;
91447636
A
1857 mp->mnt_lflag |= MNT_LUNMOUNT;
1858 mp->mnt_flag &=~ MNT_ASYNC;
2d21ac55
A
1859 /*
1860 * anyone currently in the fast path that
1861 * trips over the cached rootvp will be
1862 * dumped out and forced into the slow path
1863 * to regenerate a new cached value
1864 */
1865 mp->mnt_realrootvp = NULLVP;
91447636 1866 mount_unlock(mp);
2d21ac55 1867
fe8ab488
A
1868 if (forcedunmount && (flags & MNT_LNOSUB) == 0) {
1869 /*
1870 * Force unmount any mounts in this filesystem.
1871 * If any unmounts fail - just leave them dangling.
1872 * Avoids recursion.
1873 */
1874 (void) dounmount_submounts(mp, flags | MNT_LNOSUB, ctx);
1875 }
1876
2d21ac55
A
1877 /*
1878 * taking the name_cache_lock exclusively will
1879 * insure that everyone is out of the fast path who
1880 * might be trying to use a now stale copy of
1881 * vp->v_mountedhere->mnt_realrootvp
1882 * bumping mount_generation causes the cached values
1883 * to be invalidated
1884 */
1885 name_cache_lock();
1886 mount_generation++;
1887 name_cache_unlock();
1888
1889
91447636 1890 lck_rw_lock_exclusive(&mp->mnt_rwlock);
6601e61a
A
1891 if (withref != 0)
1892 mount_drop(mp, 0);
2d21ac55 1893#if CONFIG_FSE
91447636 1894 fsevent_unmount(mp); /* has to come first! */
2d21ac55 1895#endif
91447636
A
1896 error = 0;
1897 if (forcedunmount == 0) {
1898 ubc_umount(mp); /* release cached vnodes */
1899 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
2d21ac55 1900 error = VFS_SYNC(mp, MNT_WAIT, ctx);
91447636
A
1901 if (error) {
1902 mount_lock(mp);
1903 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1904 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1905 mp->mnt_lflag &= ~MNT_LFORCE;
1906 goto out;
1907 }
1908 }
1909 }
6d2010ae
A
1910
1911#if CONFIG_TRIGGERS
1912 vfs_nested_trigger_unmounts(mp, flags, ctx);
1913 did_vflush = 1;
1914#endif
91447636
A
1915 if (forcedunmount)
1916 lflags |= FORCECLOSE;
1917 error = vflush(mp, NULLVP, SKIPSWAP | SKIPSYSTEM | SKIPROOT | lflags);
1918 if ((forcedunmount == 0) && error) {
1919 mount_lock(mp);
9bccf70c 1920 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
91447636
A
1921 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1922 mp->mnt_lflag &= ~MNT_LFORCE;
9bccf70c
A
1923 goto out;
1924 }
91447636
A
1925
1926 /* make sure there are no one in the mount iterations or lookup */
1927 mount_iterdrain(mp);
1928
2d21ac55 1929 error = VFS_UNMOUNT(mp, flags, ctx);
1c79356b 1930 if (error) {
91447636
A
1931 mount_iterreset(mp);
1932 mount_lock(mp);
1c79356b 1933 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
91447636
A
1934 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1935 mp->mnt_lflag &= ~MNT_LFORCE;
1c79356b
A
1936 goto out;
1937 }
1938
1939 /* increment the operations count */
1940 if (!error)
b0d623f7 1941 OSAddAtomic(1, &vfs_nummntops);
91447636
A
1942
1943 if ( mp->mnt_devvp && mp->mnt_vtable->vfc_vfsflags & VFC_VFSLOCALARGS) {
593a1d5f
A
1944 /* hold an io reference and drop the usecount before close */
1945 devvp = mp->mnt_devvp;
593a1d5f
A
1946 vnode_getalways(devvp);
1947 vnode_rele(devvp);
1948 VNOP_CLOSE(devvp, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
2d21ac55 1949 ctx);
b0d623f7 1950 vnode_clearmountedon(devvp);
593a1d5f 1951 vnode_put(devvp);
91447636
A
1952 }
1953 lck_rw_done(&mp->mnt_rwlock);
1954 mount_list_remove(mp);
1955 lck_rw_lock_exclusive(&mp->mnt_rwlock);
6d2010ae 1956
91447636 1957 /* mark the mount point hook in the vp but not drop the ref yet */
1c79356b 1958 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
fe8ab488
A
1959 /*
1960 * The covered vnode needs special handling. Trying to get an
1961 * iocount must not block here as this may lead to deadlocks
1962 * if the Filesystem to which the covered vnode belongs is
1963 * undergoing forced unmounts. Since we hold a usecount, the
1964 * vnode cannot be reused (it can, however, still be terminated)
1965 */
1966 vnode_getalways(coveredvp);
6d2010ae
A
1967 vnode_lock_spin(coveredvp);
1968
1969 mp->mnt_crossref++;
1970 coveredvp->v_mountedhere = (struct mount *)0;
fe8ab488 1971 CLR(coveredvp->v_flag, VMOUNT);
6d2010ae
A
1972
1973 vnode_unlock(coveredvp);
1974 vnode_put(coveredvp);
1c79356b 1975 }
91447636
A
1976
1977 mount_list_lock();
1978 mp->mnt_vtable->vfc_refcount--;
1979 mount_list_unlock();
1980
1981 cache_purgevfs(mp); /* remove cache entries for this file sys */
1982 vfs_event_signal(NULL, VQ_UNMOUNT, (intptr_t)NULL);
1983 mount_lock(mp);
1984 mp->mnt_lflag |= MNT_LDEAD;
1985
1986 if (mp->mnt_lflag & MNT_LWAIT) {
1987 /*
1988 * do the wakeup here
1989 * in case we block in mount_refdrain
1990 * which will drop the mount lock
1991 * and allow anyone blocked in vfs_busy
1992 * to wakeup and see the LDEAD state
1993 */
1994 mp->mnt_lflag &= ~MNT_LWAIT;
1995 wakeup((caddr_t)mp);
1c79356b 1996 }
91447636 1997 mount_refdrain(mp);
1c79356b 1998out:
91447636
A
1999 if (mp->mnt_lflag & MNT_LWAIT) {
2000 mp->mnt_lflag &= ~MNT_LWAIT;
2001 needwakeup = 1;
2002 }
6d2010ae 2003
6d2010ae 2004#if CONFIG_TRIGGERS
39236c6e
A
2005 if (flags & MNT_NOBLOCK && p != kernproc) {
2006 // Restore P_NOREMOTEHANG bit to its previous value
2007 if ((pflags_save & P_NOREMOTEHANG) == 0)
2008 OSBitAndAtomic(~((uint32_t) P_NOREMOTEHANG), &p->p_flag);
2009 }
2010
6d2010ae
A
2011 /*
2012 * Callback and context are set together under the mount lock, and
2013 * never cleared, so we're safe to examine them here, drop the lock,
2014 * and call out.
2015 */
2016 if (mp->mnt_triggercallback != NULL) {
2017 mount_unlock(mp);
2018 if (error == 0) {
2019 mp->mnt_triggercallback(mp, VTC_RELEASE, mp->mnt_triggerdata, ctx);
2020 } else if (did_vflush) {
2021 mp->mnt_triggercallback(mp, VTC_REPLACE, mp->mnt_triggerdata, ctx);
2022 }
2023 } else {
2024 mount_unlock(mp);
2025 }
2026#else
91447636 2027 mount_unlock(mp);
6d2010ae
A
2028#endif /* CONFIG_TRIGGERS */
2029
91447636
A
2030 lck_rw_done(&mp->mnt_rwlock);
2031
2032 if (needwakeup)
1c79356b 2033 wakeup((caddr_t)mp);
6d2010ae 2034
55e303ae 2035 if (!error) {
91447636 2036 if ((coveredvp != NULLVP)) {
fe8ab488 2037 vnode_t pvp = NULLVP;
b0d623f7 2038
fe8ab488
A
2039 /*
2040 * The covered vnode needs special handling. Trying to
2041 * get an iocount must not block here as this may lead
2042 * to deadlocks if the Filesystem to which the covered
2043 * vnode belongs is undergoing forced unmounts. Since we
2044 * hold a usecount, the vnode cannot be reused
2045 * (it can, however, still be terminated).
2046 */
2047 vnode_getalways(coveredvp);
6d2010ae
A
2048
2049 mount_dropcrossref(mp, coveredvp, 0);
fe8ab488
A
2050 /*
2051 * We'll _try_ to detect if this really needs to be
2052 * done. The coveredvp can only be in termination (or
2053 * terminated) if the coveredvp's mount point is in a
2054 * forced unmount (or has been) since we still hold the
2055 * ref.
2056 */
2057 if (!vnode_isrecycled(coveredvp)) {
2058 pvp = vnode_getparent(coveredvp);
6d2010ae 2059#if CONFIG_TRIGGERS
fe8ab488
A
2060 if (coveredvp->v_resolve) {
2061 vnode_trigger_rearm(coveredvp, ctx);
2062 }
2063#endif
2064 }
2065
2066 vnode_rele(coveredvp);
91447636 2067 vnode_put(coveredvp);
fe8ab488 2068 coveredvp = NULLVP;
b0d623f7
A
2069
2070 if (pvp) {
2071 lock_vnode_and_post(pvp, NOTE_WRITE);
2072 vnode_put(pvp);
2073 }
91447636
A
2074 } else if (mp->mnt_flag & MNT_ROOTFS) {
2075 mount_lock_destroy(mp);
2d21ac55
A
2076#if CONFIG_MACF
2077 mac_mount_label_destroy(mp);
2078#endif
91447636
A
2079 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
2080 } else
2081 panic("dounmount: no coveredvp");
55e303ae 2082 }
1c79356b
A
2083 return (error);
2084}
2085
fe8ab488
A
2086/*
2087 * Unmount any mounts in this filesystem.
2088 */
2089void
2090dounmount_submounts(struct mount *mp, int flags, vfs_context_t ctx)
2091{
2092 mount_t smp;
2093 fsid_t *fsids, fsid;
2094 int fsids_sz;
2095 int count = 0, i, m = 0;
2096 vnode_t vp;
2097
2098 mount_list_lock();
2099
2100 // Get an array to hold the submounts fsids.
2101 TAILQ_FOREACH(smp, &mountlist, mnt_list)
2102 count++;
2103 fsids_sz = count * sizeof(fsid_t);
2104 MALLOC(fsids, fsid_t *, fsids_sz, M_TEMP, M_NOWAIT);
2105 if (fsids == NULL) {
2106 mount_list_unlock();
2107 goto out;
2108 }
2109 fsids[0] = mp->mnt_vfsstat.f_fsid; // Prime the pump
2110
2111 /*
2112 * Fill the array with submount fsids.
2113 * Since mounts are always added to the tail of the mount list, the
2114 * list is always in mount order.
2115 * For each mount check if the mounted-on vnode belongs to a
2116 * mount that's already added to our array of mounts to be unmounted.
2117 */
2118 for (smp = TAILQ_NEXT(mp, mnt_list); smp; smp = TAILQ_NEXT(smp, mnt_list)) {
2119 vp = smp->mnt_vnodecovered;
2120 if (vp == NULL)
2121 continue;
2122 fsid = vnode_mount(vp)->mnt_vfsstat.f_fsid; // Underlying fsid
2123 for (i = 0; i <= m; i++) {
2124 if (fsids[i].val[0] == fsid.val[0] &&
2125 fsids[i].val[1] == fsid.val[1]) {
2126 fsids[++m] = smp->mnt_vfsstat.f_fsid;
2127 break;
2128 }
2129 }
2130 }
2131 mount_list_unlock();
2132
2133 // Unmount the submounts in reverse order. Ignore errors.
2134 for (i = m; i > 0; i--) {
2135 smp = mount_list_lookupby_fsid(&fsids[i], 0, 1);
2136 if (smp) {
2137 mount_ref(smp, 0);
2138 mount_iterdrop(smp);
2139 (void) dounmount(smp, flags, 1, ctx);
2140 }
2141 }
2142out:
2143 if (fsids)
2144 FREE(fsids, M_TEMP);
2145}
2146
91447636
A
2147void
2148mount_dropcrossref(mount_t mp, vnode_t dp, int need_put)
2149{
6d2010ae
A
2150 vnode_lock(dp);
2151 mp->mnt_crossref--;
2152
2153 if (mp->mnt_crossref < 0)
2154 panic("mount cross refs -ve");
2155
2156 if ((mp != dp->v_mountedhere) && (mp->mnt_crossref == 0)) {
2157
91447636 2158 if (need_put)
6d2010ae 2159 vnode_put_locked(dp);
91447636 2160 vnode_unlock(dp);
6d2010ae
A
2161
2162 mount_lock_destroy(mp);
2163#if CONFIG_MACF
2164 mac_mount_label_destroy(mp);
2165#endif
2166 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
2167 return;
2168 }
2169 if (need_put)
2170 vnode_put_locked(dp);
2171 vnode_unlock(dp);
91447636
A
2172}
2173
2174
1c79356b
A
2175/*
2176 * Sync each mounted filesystem.
2177 */
2178#if DIAGNOSTIC
2179int syncprt = 0;
1c79356b
A
2180#endif
2181
1c79356b 2182int print_vmpage_stat=0;
fe8ab488 2183int sync_timeout = 60; // Sync time limit (sec)
1c79356b 2184
91447636 2185static int
fe8ab488 2186sync_callback(mount_t mp, __unused void *arg)
1c79356b 2187{
91447636 2188 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
fe8ab488
A
2189 int asyncflag = mp->mnt_flag & MNT_ASYNC;
2190
2191 mp->mnt_flag &= ~MNT_ASYNC;
2192 VFS_SYNC(mp, arg ? MNT_WAIT : MNT_NOWAIT, vfs_context_kernel());
2193 if (asyncflag)
2194 mp->mnt_flag |= MNT_ASYNC;
1c79356b 2195 }
1c79356b 2196
fe8ab488
A
2197 return (VFS_RETURNED);
2198}
91447636 2199
91447636
A
2200/* ARGSUSED */
2201int
b0d623f7 2202sync(__unused proc_t p, __unused struct sync_args *uap, __unused int32_t *retval)
91447636 2203{
fe8ab488 2204 vfs_iterate(LK_NOWAIT, sync_callback, NULL);
b0d623f7 2205
fe8ab488
A
2206 if (print_vmpage_stat) {
2207 vm_countdirtypages();
2208 }
2209
2210#if DIAGNOSTIC
2211 if (syncprt)
2212 vfs_bufstats();
2213#endif /* DIAGNOSTIC */
2214 return 0;
2215}
2216
2217static void
2218sync_thread(void *arg, __unused wait_result_t wr)
2219{
2220 int *timeout = (int *) arg;
2221
2222 vfs_iterate(LK_NOWAIT, sync_callback, NULL);
2223
2224 if (timeout)
2225 wakeup((caddr_t) timeout);
2226 if (print_vmpage_stat) {
1c79356b 2227 vm_countdirtypages();
1c79356b 2228 }
39236c6e 2229
1c79356b
A
2230#if DIAGNOSTIC
2231 if (syncprt)
2232 vfs_bufstats();
2233#endif /* DIAGNOSTIC */
1c79356b
A
2234}
2235
2236/*
fe8ab488 2237 * Sync in a separate thread so we can time out if it blocks.
1c79356b 2238 */
fe8ab488
A
2239static int
2240sync_async(int timeout)
2d21ac55 2241{
fe8ab488 2242 thread_t thd;
2d21ac55 2243 int error;
fe8ab488
A
2244 struct timespec ts = {timeout, 0};
2245
2246 lck_mtx_lock(sync_mtx_lck);
2247 if (kernel_thread_start(sync_thread, &timeout, &thd) != KERN_SUCCESS) {
2248 printf("sync_thread failed\n");
2249 lck_mtx_unlock(sync_mtx_lck);
2250 return (0);
2251 }
2252
2253 error = msleep((caddr_t) &timeout, sync_mtx_lck, (PVFS | PDROP | PCATCH), "sync_thread", &ts);
2254 if (error) {
2255 printf("sync timed out: %d sec\n", timeout);
2256 }
2257 thread_deallocate(thd);
2258
2259 return (0);
2d21ac55
A
2260}
2261
fe8ab488
A
2262/*
2263 * An in-kernel sync for power management to call.
2264 */
2265__private_extern__ int
2266sync_internal(void)
2267{
2268 (void) sync_async(sync_timeout);
2269
2270 return 0;
2271} /* end of sync_internal call */
2272
2273/*
2274 * Change filesystem quotas.
2275 */
2276#if QUOTA
2277int
2278quotactl(proc_t p, struct quotactl_args *uap, __unused int32_t *retval)
1c79356b 2279{
2d21ac55 2280 struct mount *mp;
91447636
A
2281 int error, quota_cmd, quota_status;
2282 caddr_t datap;
2283 size_t fnamelen;
1c79356b 2284 struct nameidata nd;
2d21ac55 2285 vfs_context_t ctx = vfs_context_current();
91447636
A
2286 struct dqblk my_dqblk;
2287
b0d623f7 2288 AUDIT_ARG(uid, uap->uid);
55e303ae 2289 AUDIT_ARG(cmd, uap->cmd);
6d2010ae
A
2290 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
2291 uap->path, ctx);
55e303ae
A
2292 error = namei(&nd);
2293 if (error)
1c79356b
A
2294 return (error);
2295 mp = nd.ni_vp->v_mount;
91447636
A
2296 vnode_put(nd.ni_vp);
2297 nameidone(&nd);
2298
2299 /* copyin any data we will need for downstream code */
2300 quota_cmd = uap->cmd >> SUBCMDSHIFT;
2301
2302 switch (quota_cmd) {
2303 case Q_QUOTAON:
2304 /* uap->arg specifies a file from which to take the quotas */
2305 fnamelen = MAXPATHLEN;
2306 datap = kalloc(MAXPATHLEN);
2307 error = copyinstr(uap->arg, datap, MAXPATHLEN, &fnamelen);
2308 break;
2309 case Q_GETQUOTA:
2310 /* uap->arg is a pointer to a dqblk structure. */
2311 datap = (caddr_t) &my_dqblk;
2312 break;
2313 case Q_SETQUOTA:
2314 case Q_SETUSE:
2315 /* uap->arg is a pointer to a dqblk structure. */
2316 datap = (caddr_t) &my_dqblk;
2317 if (proc_is64bit(p)) {
2318 struct user_dqblk my_dqblk64;
2319 error = copyin(uap->arg, (caddr_t)&my_dqblk64, sizeof (my_dqblk64));
2320 if (error == 0) {
2321 munge_dqblk(&my_dqblk, &my_dqblk64, FALSE);
2322 }
2323 }
2324 else {
2325 error = copyin(uap->arg, (caddr_t)&my_dqblk, sizeof (my_dqblk));
2326 }
2327 break;
2328 case Q_QUOTASTAT:
2329 /* uap->arg is a pointer to an integer */
2330 datap = (caddr_t) &quota_status;
2331 break;
2332 default:
2333 datap = NULL;
2334 break;
2335 } /* switch */
2336
2337 if (error == 0) {
2d21ac55 2338 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, datap, ctx);
91447636
A
2339 }
2340
2341 switch (quota_cmd) {
2342 case Q_QUOTAON:
2343 if (datap != NULL)
2344 kfree(datap, MAXPATHLEN);
2345 break;
2346 case Q_GETQUOTA:
2347 /* uap->arg is a pointer to a dqblk structure we need to copy out to */
2348 if (error == 0) {
2349 if (proc_is64bit(p)) {
fe8ab488 2350 struct user_dqblk my_dqblk64 = {.dqb_bhardlimit = 0};
91447636
A
2351 munge_dqblk(&my_dqblk, &my_dqblk64, TRUE);
2352 error = copyout((caddr_t)&my_dqblk64, uap->arg, sizeof (my_dqblk64));
2353 }
2354 else {
2355 error = copyout(datap, uap->arg, sizeof (struct dqblk));
2356 }
2357 }
2358 break;
2359 case Q_QUOTASTAT:
2360 /* uap->arg is a pointer to an integer */
2361 if (error == 0) {
2362 error = copyout(datap, uap->arg, sizeof(quota_status));
2363 }
2364 break;
2365 default:
2366 break;
2367 } /* switch */
2368
2369 return (error);
1c79356b 2370}
2d21ac55
A
2371#else
2372int
b0d623f7 2373quotactl(__unused proc_t p, __unused struct quotactl_args *uap, __unused int32_t *retval)
2d21ac55
A
2374{
2375 return (EOPNOTSUPP);
2376}
2377#endif /* QUOTA */
1c79356b
A
2378
2379/*
2380 * Get filesystem statistics.
2d21ac55
A
2381 *
2382 * Returns: 0 Success
2383 * namei:???
2384 * vfs_update_vfsstat:???
2385 * munge_statfs:EFAULT
1c79356b 2386 */
1c79356b
A
2387/* ARGSUSED */
2388int
b0d623f7 2389statfs(__unused proc_t p, struct statfs_args *uap, __unused int32_t *retval)
1c79356b 2390{
91447636
A
2391 struct mount *mp;
2392 struct vfsstatfs *sp;
1c79356b
A
2393 int error;
2394 struct nameidata nd;
2d21ac55 2395 vfs_context_t ctx = vfs_context_current();
91447636 2396 vnode_t vp;
1c79356b 2397
fe8ab488 2398 NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1,
2d21ac55 2399 UIO_USERSPACE, uap->path, ctx);
55e303ae
A
2400 error = namei(&nd);
2401 if (error)
1c79356b 2402 return (error);
91447636
A
2403 vp = nd.ni_vp;
2404 mp = vp->v_mount;
2405 sp = &mp->mnt_vfsstat;
2406 nameidone(&nd);
2407
2d21ac55 2408 error = vfs_update_vfsstat(mp, ctx, VFS_USER_EVENT);
39236c6e
A
2409 if (error != 0) {
2410 vnode_put(vp);
1c79356b 2411 return (error);
39236c6e 2412 }
91447636
A
2413
2414 error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
39236c6e 2415 vnode_put(vp);
91447636 2416 return (error);
1c79356b
A
2417}
2418
2419/*
2420 * Get filesystem statistics.
2421 */
1c79356b
A
2422/* ARGSUSED */
2423int
b0d623f7 2424fstatfs(__unused proc_t p, struct fstatfs_args *uap, __unused int32_t *retval)
1c79356b 2425{
2d21ac55 2426 vnode_t vp;
1c79356b 2427 struct mount *mp;
91447636 2428 struct vfsstatfs *sp;
1c79356b
A
2429 int error;
2430
55e303ae
A
2431 AUDIT_ARG(fd, uap->fd);
2432
91447636 2433 if ( (error = file_vnode(uap->fd, &vp)) )
1c79356b 2434 return (error);
55e303ae 2435
d1ecb069
A
2436 error = vnode_getwithref(vp);
2437 if (error) {
2438 file_drop(uap->fd);
2439 return (error);
2440 }
2441
91447636 2442 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
55e303ae 2443
91447636
A
2444 mp = vp->v_mount;
2445 if (!mp) {
d1ecb069
A
2446 error = EBADF;
2447 goto out;
91447636
A
2448 }
2449 sp = &mp->mnt_vfsstat;
2d21ac55 2450 if ((error = vfs_update_vfsstat(mp,vfs_context_current(),VFS_USER_EVENT)) != 0) {
d1ecb069 2451 goto out;
91447636 2452 }
91447636
A
2453
2454 error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
2455
d1ecb069
A
2456out:
2457 file_drop(uap->fd);
2458 vnode_put(vp);
2459
91447636 2460 return (error);
1c79356b
A
2461}
2462
2d21ac55
A
2463/*
2464 * Common routine to handle copying of statfs64 data to user space
2465 */
2466static int
2467statfs64_common(struct mount *mp, struct vfsstatfs *sfsp, user_addr_t bufp)
2468{
2469 int error;
2470 struct statfs64 sfs;
2471
2472 bzero(&sfs, sizeof(sfs));
2473
2474 sfs.f_bsize = sfsp->f_bsize;
2475 sfs.f_iosize = (int32_t)sfsp->f_iosize;
2476 sfs.f_blocks = sfsp->f_blocks;
2477 sfs.f_bfree = sfsp->f_bfree;
2478 sfs.f_bavail = sfsp->f_bavail;
2479 sfs.f_files = sfsp->f_files;
2480 sfs.f_ffree = sfsp->f_ffree;
2481 sfs.f_fsid = sfsp->f_fsid;
2482 sfs.f_owner = sfsp->f_owner;
2483 sfs.f_type = mp->mnt_vtable->vfc_typenum;
2484 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
2485 sfs.f_fssubtype = sfsp->f_fssubtype;
6d2010ae
A
2486 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
2487 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN);
2488 } else {
2489 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSTYPENAMELEN);
2490 }
2d21ac55
A
2491 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MAXPATHLEN);
2492 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MAXPATHLEN);
2493
2494 error = copyout((caddr_t)&sfs, bufp, sizeof(sfs));
2495
2496 return(error);
2497}
2498
2499/*
2500 * Get file system statistics in 64-bit mode
2501 */
2502int
b0d623f7 2503statfs64(__unused struct proc *p, struct statfs64_args *uap, __unused int32_t *retval)
2d21ac55
A
2504{
2505 struct mount *mp;
2506 struct vfsstatfs *sp;
2507 int error;
2508 struct nameidata nd;
2509 vfs_context_t ctxp = vfs_context_current();
2510 vnode_t vp;
2511
fe8ab488 2512 NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1,
2d21ac55
A
2513 UIO_USERSPACE, uap->path, ctxp);
2514 error = namei(&nd);
2515 if (error)
2516 return (error);
2517 vp = nd.ni_vp;
2518 mp = vp->v_mount;
2519 sp = &mp->mnt_vfsstat;
2520 nameidone(&nd);
2521
2522 error = vfs_update_vfsstat(mp, ctxp, VFS_USER_EVENT);
39236c6e
A
2523 if (error != 0) {
2524 vnode_put(vp);
2d21ac55 2525 return (error);
39236c6e 2526 }
2d21ac55
A
2527
2528 error = statfs64_common(mp, sp, uap->buf);
39236c6e 2529 vnode_put(vp);
2d21ac55
A
2530
2531 return (error);
2532}
2533
2534/*
2535 * Get file system statistics in 64-bit mode
2536 */
2537int
b0d623f7 2538fstatfs64(__unused struct proc *p, struct fstatfs64_args *uap, __unused int32_t *retval)
2d21ac55
A
2539{
2540 struct vnode *vp;
2541 struct mount *mp;
2542 struct vfsstatfs *sp;
2543 int error;
2544
2545 AUDIT_ARG(fd, uap->fd);
2546
2547 if ( (error = file_vnode(uap->fd, &vp)) )
2548 return (error);
2549
d1ecb069
A
2550 error = vnode_getwithref(vp);
2551 if (error) {
2552 file_drop(uap->fd);
2553 return (error);
2554 }
2555
2d21ac55
A
2556 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2557
2558 mp = vp->v_mount;
2559 if (!mp) {
316670eb 2560 error = EBADF;
d1ecb069 2561 goto out;
2d21ac55
A
2562 }
2563 sp = &mp->mnt_vfsstat;
2564 if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
d1ecb069 2565 goto out;
2d21ac55 2566 }
2d21ac55
A
2567
2568 error = statfs64_common(mp, sp, uap->buf);
2569
d1ecb069
A
2570out:
2571 file_drop(uap->fd);
2572 vnode_put(vp);
2573
2d21ac55
A
2574 return (error);
2575}
91447636
A
2576
2577struct getfsstat_struct {
2578 user_addr_t sfsp;
2d21ac55 2579 user_addr_t *mp;
91447636
A
2580 int count;
2581 int maxcount;
2582 int flags;
2583 int error;
1c79356b 2584};
1c79356b 2585
91447636
A
2586
2587static int
2588getfsstat_callback(mount_t mp, void * arg)
2589{
2590
2591 struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
2592 struct vfsstatfs *sp;
91447636 2593 int error, my_size;
2d21ac55 2594 vfs_context_t ctx = vfs_context_current();
91447636
A
2595
2596 if (fstp->sfsp && fstp->count < fstp->maxcount) {
2597 sp = &mp->mnt_vfsstat;
2598 /*
2599 * If MNT_NOWAIT is specified, do not refresh the
b0d623f7 2600 * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT.
91447636 2601 */
b0d623f7 2602 if (((fstp->flags & MNT_NOWAIT) == 0 || (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
2d21ac55
A
2603 (error = vfs_update_vfsstat(mp, ctx,
2604 VFS_USER_EVENT))) {
91447636
A
2605 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
2606 return(VFS_RETURNED);
1c79356b 2607 }
91447636
A
2608
2609 /*
2610 * Need to handle LP64 version of struct statfs
2611 */
2d21ac55 2612 error = munge_statfs(mp, sp, fstp->sfsp, &my_size, IS_64BIT_PROCESS(vfs_context_proc(ctx)), FALSE);
91447636
A
2613 if (error) {
2614 fstp->error = error;
2615 return(VFS_RETURNED_DONE);
1c79356b 2616 }
91447636 2617 fstp->sfsp += my_size;
2d21ac55
A
2618
2619 if (fstp->mp) {
39236c6e 2620#if CONFIG_MACF
2d21ac55
A
2621 error = mac_mount_label_get(mp, *fstp->mp);
2622 if (error) {
2623 fstp->error = error;
2624 return(VFS_RETURNED_DONE);
2625 }
39236c6e 2626#endif
2d21ac55
A
2627 fstp->mp++;
2628 }
2629 }
91447636
A
2630 fstp->count++;
2631 return(VFS_RETURNED);
2632}
2633
2634/*
2635 * Get statistics on all filesystems.
2636 */
2637int
2638getfsstat(__unused proc_t p, struct getfsstat_args *uap, int *retval)
2d21ac55
A
2639{
2640 struct __mac_getfsstat_args muap;
2641
2642 muap.buf = uap->buf;
2643 muap.bufsize = uap->bufsize;
2644 muap.mac = USER_ADDR_NULL;
2645 muap.macsize = 0;
2646 muap.flags = uap->flags;
2647
2648 return (__mac_getfsstat(p, &muap, retval));
2649}
2650
b0d623f7
A
2651/*
2652 * __mac_getfsstat: Get MAC-related file system statistics
2653 *
2654 * Parameters: p (ignored)
2655 * uap User argument descriptor (see below)
2656 * retval Count of file system statistics (N stats)
2657 *
2658 * Indirect: uap->bufsize Buffer size
2659 * uap->macsize MAC info size
2660 * uap->buf Buffer where information will be returned
2661 * uap->mac MAC info
2662 * uap->flags File system flags
2663 *
2664 *
2665 * Returns: 0 Success
2666 * !0 Not success
2667 *
2668 */
2d21ac55
A
2669int
2670__mac_getfsstat(__unused proc_t p, struct __mac_getfsstat_args *uap, int *retval)
91447636
A
2671{
2672 user_addr_t sfsp;
2d21ac55 2673 user_addr_t *mp;
b0d623f7 2674 size_t count, maxcount, bufsize, macsize;
91447636
A
2675 struct getfsstat_struct fst;
2676
b0d623f7
A
2677 bufsize = (size_t) uap->bufsize;
2678 macsize = (size_t) uap->macsize;
2679
91447636 2680 if (IS_64BIT_PROCESS(p)) {
b0d623f7 2681 maxcount = bufsize / sizeof(struct user64_statfs);
91447636
A
2682 }
2683 else {
b0d623f7 2684 maxcount = bufsize / sizeof(struct user32_statfs);
91447636
A
2685 }
2686 sfsp = uap->buf;
2687 count = 0;
2688
2d21ac55
A
2689 mp = NULL;
2690
2691#if CONFIG_MACF
2692 if (uap->mac != USER_ADDR_NULL) {
2693 u_int32_t *mp0;
2694 int error;
b0d623f7 2695 unsigned int i;
2d21ac55 2696
b0d623f7 2697 count = (macsize / (IS_64BIT_PROCESS(p) ? 8 : 4));
2d21ac55
A
2698 if (count != maxcount)
2699 return (EINVAL);
2700
2701 /* Copy in the array */
b0d623f7
A
2702 MALLOC(mp0, u_int32_t *, macsize, M_MACTEMP, M_WAITOK);
2703 if (mp0 == NULL) {
2704 return (ENOMEM);
2705 }
2706
2707 error = copyin(uap->mac, mp0, macsize);
2708 if (error) {
2709 FREE(mp0, M_MACTEMP);
2d21ac55 2710 return (error);
b0d623f7 2711 }
2d21ac55
A
2712
2713 /* Normalize to an array of user_addr_t */
2714 MALLOC(mp, user_addr_t *, count * sizeof(user_addr_t), M_MACTEMP, M_WAITOK);
b0d623f7
A
2715 if (mp == NULL) {
2716 FREE(mp0, M_MACTEMP);
2717 return (ENOMEM);
2718 }
2719
2d21ac55
A
2720 for (i = 0; i < count; i++) {
2721 if (IS_64BIT_PROCESS(p))
2722 mp[i] = ((user_addr_t *)mp0)[i];
2723 else
2724 mp[i] = (user_addr_t)mp0[i];
2725 }
2726 FREE(mp0, M_MACTEMP);
2727 }
2728#endif
2729
2730
91447636 2731 fst.sfsp = sfsp;
2d21ac55 2732 fst.mp = mp;
91447636
A
2733 fst.flags = uap->flags;
2734 fst.count = 0;
2735 fst.error = 0;
2736 fst.maxcount = maxcount;
2737
2738
2739 vfs_iterate(0, getfsstat_callback, &fst);
2740
2d21ac55
A
2741 if (mp)
2742 FREE(mp, M_MACTEMP);
2743
91447636
A
2744 if (fst.error ) {
2745 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
2746 return(fst.error);
2747 }
2748
2749 if (fst.sfsp && fst.count > fst.maxcount)
2750 *retval = fst.maxcount;
1c79356b 2751 else
91447636 2752 *retval = fst.count;
1c79356b
A
2753 return (0);
2754}
2755
2d21ac55
A
2756static int
2757getfsstat64_callback(mount_t mp, void * arg)
2758{
2759 struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
2760 struct vfsstatfs *sp;
2761 int error;
2762
2763 if (fstp->sfsp && fstp->count < fstp->maxcount) {
2764 sp = &mp->mnt_vfsstat;
2765 /*
b0d623f7
A
2766 * If MNT_NOWAIT is specified, do not refresh the fsstat
2767 * cache. MNT_WAIT overrides MNT_NOWAIT.
2768 *
2769 * We treat MNT_DWAIT as MNT_WAIT for all instances of
2770 * getfsstat, since the constants are out of the same
2771 * namespace.
2d21ac55 2772 */
b0d623f7
A
2773 if (((fstp->flags & MNT_NOWAIT) == 0 ||
2774 (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
2d21ac55
A
2775 (error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT))) {
2776 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
2777 return(VFS_RETURNED);
2778 }
2779
2780 error = statfs64_common(mp, sp, fstp->sfsp);
2781 if (error) {
2782 fstp->error = error;
2783 return(VFS_RETURNED_DONE);
2784 }
2785 fstp->sfsp += sizeof(struct statfs64);
2786 }
2787 fstp->count++;
2788 return(VFS_RETURNED);
2789}
2790
2791/*
2792 * Get statistics on all file systems in 64 bit mode.
2793 */
2794int
2795getfsstat64(__unused proc_t p, struct getfsstat64_args *uap, int *retval)
2796{
2797 user_addr_t sfsp;
2798 int count, maxcount;
2799 struct getfsstat_struct fst;
2800
2801 maxcount = uap->bufsize / sizeof(struct statfs64);
2802
2803 sfsp = uap->buf;
2804 count = 0;
2805
2806 fst.sfsp = sfsp;
2807 fst.flags = uap->flags;
2808 fst.count = 0;
2809 fst.error = 0;
2810 fst.maxcount = maxcount;
2811
2812 vfs_iterate(0, getfsstat64_callback, &fst);
2813
2814 if (fst.error ) {
2815 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
2816 return(fst.error);
2817 }
2818
2819 if (fst.sfsp && fst.count > fst.maxcount)
2820 *retval = fst.maxcount;
2821 else
2822 *retval = fst.count;
2823
2824 return (0);
2825}
2826
fe8ab488
A
2827/*
2828 * gets the associated vnode with the file descriptor passed.
2829 * as input
2830 *
2831 * INPUT
2832 * ctx - vfs context of caller
2833 * fd - file descriptor for which vnode is required.
2834 * vpp - Pointer to pointer to vnode to be returned.
2835 *
2836 * The vnode is returned with an iocount so any vnode obtained
2837 * by this call needs a vnode_put
2838 *
2839 */
2840static int
2841vnode_getfromfd(vfs_context_t ctx, int fd, vnode_t *vpp)
2842{
2843 int error;
2844 vnode_t vp;
2845 struct fileproc *fp;
2846 proc_t p = vfs_context_proc(ctx);
2847
2848 *vpp = NULLVP;
2849
2850 error = fp_getfvp(p, fd, &fp, &vp);
2851 if (error)
2852 return (error);
2853
2854 error = vnode_getwithref(vp);
2855 if (error) {
2856 (void)fp_drop(p, fd, fp, 0);
2857 return (error);
2858 }
2859
2860 (void)fp_drop(p, fd, fp, 0);
2861 *vpp = vp;
2862 return (error);
2863}
2864
2865/*
2866 * Wrapper function around namei to start lookup from a directory
2867 * specified by a file descriptor ni_dirfd.
2868 *
2869 * In addition to all the errors returned by namei, this call can
2870 * return ENOTDIR if the file descriptor does not refer to a directory.
2871 * and EBADF if the file descriptor is not valid.
2872 */
2873int
2874nameiat(struct nameidata *ndp, int dirfd)
2875{
2876 if ((dirfd != AT_FDCWD) &&
2877 !(ndp->ni_flag & NAMEI_CONTLOOKUP) &&
2878 !(ndp->ni_cnd.cn_flags & USEDVP)) {
2879 int error = 0;
2880 char c;
2881
2882 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
2883 error = copyin(ndp->ni_dirp, &c, sizeof(char));
2884 if (error)
2885 return (error);
2886 } else {
2887 c = *((char *)(ndp->ni_dirp));
2888 }
2889
2890 if (c != '/') {
2891 vnode_t dvp_at;
2892
2893 error = vnode_getfromfd(ndp->ni_cnd.cn_context, dirfd,
2894 &dvp_at);
2895 if (error)
2896 return (error);
2897
2898 if (vnode_vtype(dvp_at) != VDIR) {
2899 vnode_put(dvp_at);
2900 return (ENOTDIR);
2901 }
2902
2903 ndp->ni_dvp = dvp_at;
2904 ndp->ni_cnd.cn_flags |= USEDVP;
2905 error = namei(ndp);
2906 ndp->ni_cnd.cn_flags &= ~USEDVP;
2907 vnode_put(dvp_at);
2908 return (error);
2909 }
2910 }
2911
2912 return (namei(ndp));
2913}
2914
1c79356b
A
2915/*
2916 * Change current working directory to a given file descriptor.
2917 */
1c79356b 2918/* ARGSUSED */
2d21ac55
A
2919static int
2920common_fchdir(proc_t p, struct fchdir_args *uap, int per_thread)
1c79356b 2921{
2d21ac55
A
2922 struct filedesc *fdp = p->p_fd;
2923 vnode_t vp;
2924 vnode_t tdp;
2925 vnode_t tvp;
1c79356b 2926 struct mount *mp;
1c79356b 2927 int error;
2d21ac55 2928 vfs_context_t ctx = vfs_context_current();
1c79356b 2929
b0d623f7 2930 AUDIT_ARG(fd, uap->fd);
2d21ac55
A
2931 if (per_thread && uap->fd == -1) {
2932 /*
2933 * Switching back from per-thread to per process CWD; verify we
2934 * in fact have one before proceeding. The only success case
2935 * for this code path is to return 0 preemptively after zapping
2936 * the thread structure contents.
2937 */
2938 thread_t th = vfs_context_thread(ctx);
2939 if (th) {
2940 uthread_t uth = get_bsdthread_info(th);
2941 tvp = uth->uu_cdir;
2942 uth->uu_cdir = NULLVP;
2943 if (tvp != NULLVP) {
2944 vnode_rele(tvp);
2945 return (0);
2946 }
2947 }
2948 return (EBADF);
2949 }
91447636
A
2950
2951 if ( (error = file_vnode(uap->fd, &vp)) )
2952 return(error);
2953 if ( (error = vnode_getwithref(vp)) ) {
2954 file_drop(uap->fd);
2955 return(error);
2956 }
55e303ae
A
2957
2958 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
2959
2d21ac55 2960 if (vp->v_type != VDIR) {
1c79356b 2961 error = ENOTDIR;
2d21ac55
A
2962 goto out;
2963 }
2964
2965#if CONFIG_MACF
2966 error = mac_vnode_check_chdir(ctx, vp);
2967 if (error)
2968 goto out;
2969#endif
2970 error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
2971 if (error)
2972 goto out;
2973
1c79356b 2974 while (!error && (mp = vp->v_mountedhere) != NULL) {
91447636
A
2975 if (vfs_busy(mp, LK_NOWAIT)) {
2976 error = EACCES;
2977 goto out;
55e303ae 2978 }
2d21ac55 2979 error = VFS_ROOT(mp, &tdp, ctx);
91447636 2980 vfs_unbusy(mp);
1c79356b
A
2981 if (error)
2982 break;
91447636 2983 vnode_put(vp);
1c79356b
A
2984 vp = tdp;
2985 }
91447636
A
2986 if (error)
2987 goto out;
2988 if ( (error = vnode_ref(vp)) )
2989 goto out;
2990 vnode_put(vp);
2991
2d21ac55
A
2992 if (per_thread) {
2993 thread_t th = vfs_context_thread(ctx);
2994 if (th) {
2995 uthread_t uth = get_bsdthread_info(th);
2996 tvp = uth->uu_cdir;
2997 uth->uu_cdir = vp;
b0d623f7 2998 OSBitOrAtomic(P_THCWD, &p->p_flag);
2d21ac55
A
2999 } else {
3000 vnode_rele(vp);
3001 return (ENOENT);
3002 }
3003 } else {
3004 proc_fdlock(p);
3005 tvp = fdp->fd_cdir;
3006 fdp->fd_cdir = vp;
3007 proc_fdunlock(p);
3008 }
91447636
A
3009
3010 if (tvp)
3011 vnode_rele(tvp);
3012 file_drop(uap->fd);
3013
1c79356b 3014 return (0);
91447636
A
3015out:
3016 vnode_put(vp);
3017 file_drop(uap->fd);
3018
3019 return(error);
1c79356b
A
3020}
3021
2d21ac55 3022int
b0d623f7 3023fchdir(proc_t p, struct fchdir_args *uap, __unused int32_t *retval)
2d21ac55
A
3024{
3025 return common_fchdir(p, uap, 0);
3026}
3027
3028int
b0d623f7 3029__pthread_fchdir(proc_t p, struct __pthread_fchdir_args *uap, __unused int32_t *retval)
2d21ac55
A
3030{
3031 return common_fchdir(p, (void *)uap, 1);
3032}
3033
1c79356b 3034/*
b0d623f7 3035 * Change current working directory (".").
2d21ac55
A
3036 *
3037 * Returns: 0 Success
3038 * change_dir:ENOTDIR
3039 * change_dir:???
3040 * vnode_ref:ENOENT No such file or directory
1c79356b 3041 */
1c79356b 3042/* ARGSUSED */
2d21ac55
A
3043static int
3044common_chdir(proc_t p, struct chdir_args *uap, int per_thread)
1c79356b 3045{
2d21ac55 3046 struct filedesc *fdp = p->p_fd;
1c79356b
A
3047 int error;
3048 struct nameidata nd;
2d21ac55
A
3049 vnode_t tvp;
3050 vfs_context_t ctx = vfs_context_current();
91447636 3051
6d2010ae 3052 NDINIT(&nd, LOOKUP, OP_CHDIR, FOLLOW | AUDITVNPATH1,
2d21ac55
A
3053 UIO_USERSPACE, uap->path, ctx);
3054 error = change_dir(&nd, ctx);
55e303ae 3055 if (error)
1c79356b 3056 return (error);
91447636
A
3057 if ( (error = vnode_ref(nd.ni_vp)) ) {
3058 vnode_put(nd.ni_vp);
3059 return (error);
3060 }
3061 /*
3062 * drop the iocount we picked up in change_dir
3063 */
3064 vnode_put(nd.ni_vp);
3065
2d21ac55
A
3066 if (per_thread) {
3067 thread_t th = vfs_context_thread(ctx);
3068 if (th) {
3069 uthread_t uth = get_bsdthread_info(th);
3070 tvp = uth->uu_cdir;
3071 uth->uu_cdir = nd.ni_vp;
b0d623f7 3072 OSBitOrAtomic(P_THCWD, &p->p_flag);
2d21ac55
A
3073 } else {
3074 vnode_rele(nd.ni_vp);
3075 return (ENOENT);
3076 }
3077 } else {
3078 proc_fdlock(p);
3079 tvp = fdp->fd_cdir;
3080 fdp->fd_cdir = nd.ni_vp;
3081 proc_fdunlock(p);
3082 }
91447636
A
3083
3084 if (tvp)
3085 vnode_rele(tvp);
3086
1c79356b
A
3087 return (0);
3088}
3089
b0d623f7
A
3090
3091/*
3092 * chdir
3093 *
3094 * Change current working directory (".") for the entire process
3095 *
3096 * Parameters: p Process requesting the call
3097 * uap User argument descriptor (see below)
3098 * retval (ignored)
3099 *
3100 * Indirect parameters: uap->path Directory path
3101 *
3102 * Returns: 0 Success
3103 * common_chdir: ENOTDIR
3104 * common_chdir: ENOENT No such file or directory
3105 * common_chdir: ???
3106 *
3107 */
2d21ac55 3108int
b0d623f7 3109chdir(proc_t p, struct chdir_args *uap, __unused int32_t *retval)
2d21ac55
A
3110{
3111 return common_chdir(p, (void *)uap, 0);
3112}
3113
b0d623f7
A
3114/*
3115 * __pthread_chdir
3116 *
3117 * Change current working directory (".") for a single thread
3118 *
3119 * Parameters: p Process requesting the call
3120 * uap User argument descriptor (see below)
3121 * retval (ignored)
3122 *
3123 * Indirect parameters: uap->path Directory path
3124 *
3125 * Returns: 0 Success
3126 * common_chdir: ENOTDIR
3127 * common_chdir: ENOENT No such file or directory
3128 * common_chdir: ???
3129 *
3130 */
2d21ac55 3131int
b0d623f7 3132__pthread_chdir(proc_t p, struct __pthread_chdir_args *uap, __unused int32_t *retval)
2d21ac55
A
3133{
3134 return common_chdir(p, (void *)uap, 1);
3135}
3136
3137
1c79356b
A
3138/*
3139 * Change notion of root (``/'') directory.
3140 */
1c79356b
A
3141/* ARGSUSED */
3142int
b0d623f7 3143chroot(proc_t p, struct chroot_args *uap, __unused int32_t *retval)
1c79356b 3144{
2d21ac55 3145 struct filedesc *fdp = p->p_fd;
1c79356b
A
3146 int error;
3147 struct nameidata nd;
2d21ac55
A
3148 vnode_t tvp;
3149 vfs_context_t ctx = vfs_context_current();
1c79356b 3150
91447636 3151 if ((error = suser(kauth_cred_get(), &p->p_acflag)))
1c79356b
A
3152 return (error);
3153
6d2010ae 3154 NDINIT(&nd, LOOKUP, OP_CHROOT, FOLLOW | AUDITVNPATH1,
2d21ac55
A
3155 UIO_USERSPACE, uap->path, ctx);
3156 error = change_dir(&nd, ctx);
55e303ae 3157 if (error)
1c79356b
A
3158 return (error);
3159
2d21ac55
A
3160#if CONFIG_MACF
3161 error = mac_vnode_check_chroot(ctx, nd.ni_vp,
3162 &nd.ni_cnd);
3163 if (error) {
91447636
A
3164 vnode_put(nd.ni_vp);
3165 return (error);
3166 }
2d21ac55
A
3167#endif
3168
91447636
A
3169 if ( (error = vnode_ref(nd.ni_vp)) ) {
3170 vnode_put(nd.ni_vp);
1c79356b
A
3171 return (error);
3172 }
91447636 3173 vnode_put(nd.ni_vp);
1c79356b 3174
91447636 3175 proc_fdlock(p);
fa4905b1 3176 tvp = fdp->fd_rdir;
1c79356b 3177 fdp->fd_rdir = nd.ni_vp;
91447636
A
3178 fdp->fd_flags |= FD_CHROOT;
3179 proc_fdunlock(p);
3180
fa4905b1 3181 if (tvp != NULL)
91447636
A
3182 vnode_rele(tvp);
3183
1c79356b
A
3184 return (0);
3185}
3186
3187/*
3188 * Common routine for chroot and chdir.
2d21ac55
A
3189 *
3190 * Returns: 0 Success
3191 * ENOTDIR Not a directory
3192 * namei:??? [anything namei can return]
3193 * vnode_authorize:??? [anything vnode_authorize can return]
1c79356b
A
3194 */
3195static int
91447636 3196change_dir(struct nameidata *ndp, vfs_context_t ctx)
1c79356b 3197{
2d21ac55 3198 vnode_t vp;
1c79356b
A
3199 int error;
3200
91447636 3201 if ((error = namei(ndp)))
1c79356b 3202 return (error);
91447636 3203 nameidone(ndp);
1c79356b 3204 vp = ndp->ni_vp;
2d21ac55
A
3205
3206 if (vp->v_type != VDIR) {
91447636 3207 vnode_put(vp);
2d21ac55
A
3208 return (ENOTDIR);
3209 }
3210
3211#if CONFIG_MACF
3212 error = mac_vnode_check_chdir(ctx, vp);
3213 if (error) {
3214 vnode_put(vp);
3215 return (error);
3216 }
3217#endif
3218
3219 error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
3220 if (error) {
3221 vnode_put(vp);
3222 return (error);
3223 }
91447636 3224
1c79356b
A
3225 return (error);
3226}
3227
fe8ab488
A
3228/*
3229 * Free the vnode data (for directories) associated with the file glob.
3230 */
3231struct fd_vn_data *
3232fg_vn_data_alloc(void)
3233{
3234 struct fd_vn_data *fvdata;
3235
3236 /* Allocate per fd vnode data */
3237 MALLOC(fvdata, struct fd_vn_data *, (sizeof(struct fd_vn_data)),
3238 M_FD_VN_DATA, M_WAITOK | M_ZERO);
3239 lck_mtx_init(&fvdata->fv_lock, fd_vn_lck_grp, fd_vn_lck_attr);
3240 return fvdata;
3241}
3242
3243/*
3244 * Free the vnode data (for directories) associated with the file glob.
3245 */
3246void
3247fg_vn_data_free(void *fgvndata)
3248{
3249 struct fd_vn_data *fvdata = (struct fd_vn_data *)fgvndata;
3250
3251 if (fvdata->fv_buf)
3252 FREE(fvdata->fv_buf, M_FD_DIRBUF);
3253 lck_mtx_destroy(&fvdata->fv_lock, fd_vn_lck_grp);
3254 FREE(fvdata, M_FD_VN_DATA);
3255}
3256
1c79356b
A
3257/*
3258 * Check permissions, allocate an open file structure,
3259 * and call the device open routine if any.
2d21ac55
A
3260 *
3261 * Returns: 0 Success
3262 * EINVAL
3263 * EINTR
3264 * falloc:ENFILE
3265 * falloc:EMFILE
3266 * falloc:ENOMEM
3267 * vn_open_auth:???
3268 * dupfdopen:???
3269 * VNOP_ADVLOCK:???
3270 * vnode_setsize:???
b0d623f7
A
3271 *
3272 * XXX Need to implement uid, gid
1c79356b 3273 */
2d21ac55 3274int
39236c6e
A
3275open1(vfs_context_t ctx, struct nameidata *ndp, int uflags,
3276 struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra,
3277 int32_t *retval)
1c79356b 3278{
2d21ac55
A
3279 proc_t p = vfs_context_proc(ctx);
3280 uthread_t uu = get_bsdthread_info(vfs_context_thread(ctx));
2d21ac55
A
3281 struct fileproc *fp;
3282 vnode_t vp;
91447636 3283 int flags, oflags;
1c79356b
A
3284 int type, indx, error;
3285 struct flock lf;
2d21ac55
A
3286 int no_controlling_tty = 0;
3287 int deny_controlling_tty = 0;
3288 struct session *sessp = SESSION_NULL;
ccc36f2f 3289
91447636 3290 oflags = uflags;
ccc36f2f
A
3291
3292 if ((oflags & O_ACCMODE) == O_ACCMODE)
3293 return(EINVAL);
91447636
A
3294 flags = FFLAGS(uflags);
3295
3296 AUDIT_ARG(fflags, oflags);
3297 AUDIT_ARG(mode, vap->va_mode);
3298
39236c6e
A
3299 if ((error = falloc_withalloc(p,
3300 &fp, &indx, ctx, fp_zalloc, cra)) != 0) {
1c79356b 3301 return (error);
91447636 3302 }
2d21ac55 3303 uu->uu_dupfd = -indx - 1;
91447636 3304
2d21ac55
A
3305 if (!(p->p_flag & P_CONTROLT)) {
3306 sessp = proc_session(p);
3307 no_controlling_tty = 1;
3308 /*
3309 * If conditions would warrant getting a controlling tty if
3310 * the device being opened is a tty (see ttyopen in tty.c),
3311 * but the open flags deny it, set a flag in the session to
3312 * prevent it.
3313 */
3314 if (SESS_LEADER(p, sessp) &&
3315 sessp->s_ttyvp == NULL &&
3316 (flags & O_NOCTTY)) {
3317 session_lock(sessp);
3318 sessp->s_flags |= S_NOCTTY;
3319 session_unlock(sessp);
3320 deny_controlling_tty = 1;
3321 }
3322 }
3323
3324 if ((error = vn_open_auth(ndp, &flags, vap))) {
3325 if ((error == ENODEV || error == ENXIO) && (uu->uu_dupfd >= 0)){ /* XXX from fdopen */
39236c6e 3326 if ((error = dupfdopen(p->p_fd, indx, uu->uu_dupfd, flags, error)) == 0) {
2d21ac55 3327 fp_drop(p, indx, NULL, 0);
91447636 3328 *retval = indx;
2d21ac55
A
3329 if (deny_controlling_tty) {
3330 session_lock(sessp);
3331 sessp->s_flags &= ~S_NOCTTY;
3332 session_unlock(sessp);
3333 }
3334 if (sessp != SESSION_NULL)
3335 session_rele(sessp);
91447636
A
3336 return (0);
3337 }
1c79356b
A
3338 }
3339 if (error == ERESTART)
91447636
A
3340 error = EINTR;
3341 fp_free(p, indx, fp);
3342
2d21ac55
A
3343 if (deny_controlling_tty) {
3344 session_lock(sessp);
3345 sessp->s_flags &= ~S_NOCTTY;
3346 session_unlock(sessp);
3347 }
3348 if (sessp != SESSION_NULL)
3349 session_rele(sessp);
1c79356b
A
3350 return (error);
3351 }
2d21ac55
A
3352 uu->uu_dupfd = 0;
3353 vp = ndp->ni_vp;
55e303ae 3354
91447636 3355 fp->f_fglob->fg_flag = flags & (FMASK | O_EVTONLY);
91447636
A
3356 fp->f_fglob->fg_ops = &vnops;
3357 fp->f_fglob->fg_data = (caddr_t)vp;
3358
316670eb
A
3359#if CONFIG_PROTECT
3360 if (VATTR_IS_ACTIVE (vap, va_dataprotect_flags)) {
3361 if (vap->va_dataprotect_flags & VA_DP_RAWENCRYPTED) {
3362 fp->f_fglob->fg_flag |= FENCRYPTED;
3363 }
3364 }
3365#endif
3366
1c79356b
A
3367 if (flags & (O_EXLOCK | O_SHLOCK)) {
3368 lf.l_whence = SEEK_SET;
3369 lf.l_start = 0;
3370 lf.l_len = 0;
3371 if (flags & O_EXLOCK)
3372 lf.l_type = F_WRLCK;
3373 else
3374 lf.l_type = F_RDLCK;
3375 type = F_FLOCK;
3376 if ((flags & FNONBLOCK) == 0)
3377 type |= F_WAIT;
2d21ac55
A
3378#if CONFIG_MACF
3379 error = mac_file_check_lock(vfs_context_ucred(ctx), fp->f_fglob,
3380 F_SETLK, &lf);
3381 if (error)
3382 goto bad;
3383#endif
39236c6e 3384 if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx, NULL)))
55e303ae 3385 goto bad;
91447636 3386 fp->f_fglob->fg_flag |= FHASLOCK;
1c79356b 3387 }
55e303ae 3388
91447636
A
3389 /* try to truncate by setting the size attribute */
3390 if ((flags & O_TRUNC) && ((error = vnode_setsize(vp, (off_t)0, 0, ctx)) != 0))
3391 goto bad;
55e303ae 3392
2d21ac55
A
3393 /*
3394 * If the open flags denied the acquisition of a controlling tty,
3395 * clear the flag in the session structure that prevented the lower
3396 * level code from assigning one.
3397 */
3398 if (deny_controlling_tty) {
3399 session_lock(sessp);
3400 sessp->s_flags &= ~S_NOCTTY;
3401 session_unlock(sessp);
3402 }
3403
3404 /*
3405 * If a controlling tty was set by the tty line discipline, then we
3406 * want to set the vp of the tty into the session structure. We have
3407 * a race here because we can't get to the vp for the tp in ttyopen,
3408 * because it's not passed as a parameter in the open path.
3409 */
3410 if (no_controlling_tty && (p->p_flag & P_CONTROLT)) {
3411 vnode_t ttyvp;
6d2010ae 3412
2d21ac55
A
3413 session_lock(sessp);
3414 ttyvp = sessp->s_ttyvp;
3415 sessp->s_ttyvp = vp;
3416 sessp->s_ttyvid = vnode_vid(vp);
3417 session_unlock(sessp);
fe8ab488
A
3418 }
3419
3420 /*
3421 * For directories we hold some additional information in the fd.
3422 */
3423 if (vnode_vtype(vp) == VDIR) {
3424 fp->f_fglob->fg_vn_data = fg_vn_data_alloc();
3425 } else {
3426 fp->f_fglob->fg_vn_data = NULL;
2d21ac55
A
3427 }
3428
91447636 3429 vnode_put(vp);
55e303ae 3430
91447636 3431 proc_fdlock(p);
6d2010ae
A
3432 if (flags & O_CLOEXEC)
3433 *fdflags(p, indx) |= UF_EXCLOSE;
39236c6e
A
3434 if (flags & O_CLOFORK)
3435 *fdflags(p, indx) |= UF_FORKCLOSE;
6601e61a 3436 procfdtbl_releasefd(p, indx, NULL);
91447636
A
3437 fp_drop(p, indx, fp, 1);
3438 proc_fdunlock(p);
3439
1c79356b 3440 *retval = indx;
91447636 3441
2d21ac55
A
3442 if (sessp != SESSION_NULL)
3443 session_rele(sessp);
1c79356b 3444 return (0);
55e303ae 3445bad:
2d21ac55
A
3446 if (deny_controlling_tty) {
3447 session_lock(sessp);
3448 sessp->s_flags &= ~S_NOCTTY;
3449 session_unlock(sessp);
3450 }
3451 if (sessp != SESSION_NULL)
3452 session_rele(sessp);
3453
39236c6e 3454 struct vfs_context context = *vfs_context_current();
2d21ac55 3455 context.vc_ucred = fp->f_fglob->fg_cred;
fe8ab488
A
3456
3457 if ((fp->f_fglob->fg_flag & FHASLOCK) &&
3458 (FILEGLOB_DTYPE(fp->f_fglob) == DTYPE_VNODE)) {
3459 lf.l_whence = SEEK_SET;
3460 lf.l_start = 0;
3461 lf.l_len = 0;
3462 lf.l_type = F_UNLCK;
3463
3464 (void)VNOP_ADVLOCK(
3465 vp, (caddr_t)fp->f_fglob, F_UNLCK, &lf, F_FLOCK, ctx, NULL);
3466 }
2d21ac55
A
3467
3468 vn_close(vp, fp->f_fglob->fg_flag, &context);
91447636
A
3469 vnode_put(vp);
3470 fp_free(p, indx, fp);
3471
55e303ae 3472 return (error);
1c79356b
A
3473}
3474
fe8ab488
A
3475/*
3476 * While most of the *at syscall handlers can call nameiat() which
3477 * is a wrapper around namei, the use of namei and initialisation
3478 * of nameidata are far removed and in different functions - namei
3479 * gets called in vn_open_auth for open1. So we'll just do here what
3480 * nameiat() does.
3481 */
3482static int
3483open1at(vfs_context_t ctx, struct nameidata *ndp, int uflags,
3484 struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra, int32_t *retval,
3485 int dirfd)
3486{
3487 if ((dirfd != AT_FDCWD) && !(ndp->ni_cnd.cn_flags & USEDVP)) {
3488 int error;
3489 char c;
3490
3491 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
3492 error = copyin(ndp->ni_dirp, &c, sizeof(char));
3493 if (error)
3494 return (error);
3495 } else {
3496 c = *((char *)(ndp->ni_dirp));
3497 }
3498
3499 if (c != '/') {
3500 vnode_t dvp_at;
3501
3502 error = vnode_getfromfd(ndp->ni_cnd.cn_context, dirfd,
3503 &dvp_at);
3504 if (error)
3505 return (error);
3506
3507 if (vnode_vtype(dvp_at) != VDIR) {
3508 vnode_put(dvp_at);
3509 return (ENOTDIR);
3510 }
3511
3512 ndp->ni_dvp = dvp_at;
3513 ndp->ni_cnd.cn_flags |= USEDVP;
3514 error = open1(ctx, ndp, uflags, vap, fp_zalloc, cra,
3515 retval);
3516 vnode_put(dvp_at);
3517 return (error);
3518 }
3519 }
3520
3521 return (open1(ctx, ndp, uflags, vap, fp_zalloc, cra, retval));
3522}
3523
0c530ab8 3524/*
b0d623f7 3525 * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)).
0c530ab8
A
3526 *
3527 * Parameters: p Process requesting the open
3528 * uap User argument descriptor (see below)
3529 * retval Pointer to an area to receive the
3530 * return calue from the system call
3531 *
3532 * Indirect: uap->path Path to open (same as 'open')
3533 * uap->flags Flags to open (same as 'open'
3534 * uap->uid UID to set, if creating
3535 * uap->gid GID to set, if creating
3536 * uap->mode File mode, if creating (same as 'open')
3537 * uap->xsecurity ACL to set, if creating
3538 *
3539 * Returns: 0 Success
3540 * !0 errno value
3541 *
3542 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
3543 *
3544 * XXX: We should enummerate the possible errno values here, and where
3545 * in the code they originated.
3546 */
1c79356b 3547int
b0d623f7 3548open_extended(proc_t p, struct open_extended_args *uap, int32_t *retval)
91447636 3549{
2d21ac55 3550 struct filedesc *fdp = p->p_fd;
91447636
A
3551 int ciferror;
3552 kauth_filesec_t xsecdst;
3553 struct vnode_attr va;
2d21ac55 3554 struct nameidata nd;
91447636
A
3555 int cmode;
3556
b0d623f7
A
3557 AUDIT_ARG(owner, uap->uid, uap->gid);
3558
91447636
A
3559 xsecdst = NULL;
3560 if ((uap->xsecurity != USER_ADDR_NULL) &&
3561 ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
3562 return ciferror;
3563
91447636
A
3564 VATTR_INIT(&va);
3565 cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3566 VATTR_SET(&va, va_mode, cmode);
3567 if (uap->uid != KAUTH_UID_NONE)
3568 VATTR_SET(&va, va_uid, uap->uid);
3569 if (uap->gid != KAUTH_GID_NONE)
3570 VATTR_SET(&va, va_gid, uap->gid);
3571 if (xsecdst != NULL)
3572 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
3573
6d2010ae
A
3574 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
3575 uap->path, vfs_context_current());
2d21ac55 3576
39236c6e
A
3577 ciferror = open1(vfs_context_current(), &nd, uap->flags, &va,
3578 fileproc_alloc_init, NULL, retval);
91447636
A
3579 if (xsecdst != NULL)
3580 kauth_filesec_free(xsecdst);
3581
3582 return ciferror;
3583}
3584
316670eb
A
3585/*
3586 * Go through the data-protected atomically controlled open (2)
3587 *
3588 * int open_dprotected_np(user_addr_t path, int flags, int class, int dpflags, int mode)
3589 */
3590int open_dprotected_np (__unused proc_t p, struct open_dprotected_np_args *uap, int32_t *retval) {
3591 int flags = uap->flags;
3592 int class = uap->class;
3593 int dpflags = uap->dpflags;
3594
3595 /*
3596 * Follow the same path as normal open(2)
3597 * Look up the item if it exists, and acquire the vnode.
3598 */
3599 struct filedesc *fdp = p->p_fd;
3600 struct vnode_attr va;
3601 struct nameidata nd;
3602 int cmode;
3603 int error;
3604
3605 VATTR_INIT(&va);
3606 /* Mask off all but regular access permissions */
3607 cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3608 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
3609
3610 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
3611 uap->path, vfs_context_current());
3612
3613 /*
3614 * Initialize the extra fields in vnode_attr to pass down our
3615 * extra fields.
3616 * 1. target cprotect class.
3617 * 2. set a flag to mark it as requiring open-raw-encrypted semantics.
3618 */
3619 if (flags & O_CREAT) {
3620 VATTR_SET(&va, va_dataprotect_class, class);
3621 }
3622
3623 if (dpflags & O_DP_GETRAWENCRYPTED) {
3624 if ( flags & (O_RDWR | O_WRONLY)) {
3625 /* Not allowed to write raw encrypted bytes */
3626 return EINVAL;
3627 }
3628 VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED);
3629 }
3630
39236c6e
A
3631 error = open1(vfs_context_current(), &nd, uap->flags, &va,
3632 fileproc_alloc_init, NULL, retval);
316670eb
A
3633
3634 return error;
3635}
3636
fe8ab488
A
3637static int
3638openat_internal(vfs_context_t ctx, user_addr_t path, int flags, int mode,
3639 int fd, enum uio_seg segflg, int *retval)
2d21ac55 3640{
fe8ab488 3641 struct filedesc *fdp = (vfs_context_proc(ctx))->p_fd;
91447636 3642 struct vnode_attr va;
2d21ac55 3643 struct nameidata nd;
91447636 3644 int cmode;
1c79356b 3645
91447636
A
3646 VATTR_INIT(&va);
3647 /* Mask off all but regular access permissions */
fe8ab488 3648 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
91447636
A
3649 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
3650
fe8ab488
A
3651 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1,
3652 segflg, path, ctx);
2d21ac55 3653
fe8ab488
A
3654 return (open1at(ctx, &nd, flags, &va, fileproc_alloc_init, NULL,
3655 retval, fd));
1c79356b 3656}
91447636 3657
fe8ab488
A
3658int
3659open(proc_t p, struct open_args *uap, int32_t *retval)
3660{
3661 __pthread_testcancel(1);
3662 return(open_nocancel(p, (struct open_nocancel_args *)uap, retval));
3663}
1c79356b 3664
fe8ab488
A
3665int
3666open_nocancel(__unused proc_t p, struct open_nocancel_args *uap,
3667 int32_t *retval)
3668{
3669 return (openat_internal(vfs_context_current(), uap->path, uap->flags,
3670 uap->mode, AT_FDCWD, UIO_USERSPACE, retval));
3671}
91447636 3672
1c79356b 3673int
fe8ab488
A
3674openat_nocancel(__unused proc_t p, struct openat_nocancel_args *uap,
3675 int32_t *retval)
1c79356b 3676{
fe8ab488
A
3677 return (openat_internal(vfs_context_current(), uap->path, uap->flags,
3678 uap->mode, uap->fd, UIO_USERSPACE, retval));
3679}
91447636 3680
fe8ab488
A
3681int
3682openat(proc_t p, struct openat_args *uap, int32_t *retval)
3683{
3684 __pthread_testcancel(1);
3685 return(openat_nocancel(p, (struct openat_nocancel_args *)uap, retval));
3686}
3687
3688/*
3689 * openbyid_np: open a file given a file system id and a file system object id
3690 * the hfs file system object id is an fsobj_id_t {uint32, uint32}
3691 * file systems that don't support object ids it is a node id (uint64_t).
3692 *
3693 * Parameters: p Process requesting the open
3694 * uap User argument descriptor (see below)
3695 * retval Pointer to an area to receive the
3696 * return calue from the system call
3697 *
3698 * Indirect: uap->path Path to open (same as 'open')
3699 *
3700 * uap->fsid id of target file system
3701 * uap->objid id of target file system object
3702 * uap->flags Flags to open (same as 'open')
3703 *
3704 * Returns: 0 Success
3705 * !0 errno value
3706 *
3707 *
3708 * XXX: We should enummerate the possible errno values here, and where
3709 * in the code they originated.
3710 */
3711int
3712openbyid_np(__unused proc_t p, struct openbyid_np_args *uap, int *retval)
3713{
3714 fsid_t fsid;
3715 uint64_t objid;
3716 int error;
3717 char *buf = NULL;
3718 int buflen = MAXPATHLEN;
3719 int pathlen = 0;
3720 vfs_context_t ctx = vfs_context_current();
3721
3722 if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
3723 return (error);
3724 }
3725
3726 /*uap->obj is an fsobj_id_t defined as struct {uint32_t, uint32_t} */
3727 if ((error = copyin(uap->objid, (caddr_t)&objid, sizeof(uint64_t)))) {
3728 return (error);
3729 }
3730
3731 AUDIT_ARG(value32, fsid.val[0]);
3732 AUDIT_ARG(value64, objid);
3733
3734 /*resolve path from fsis, objid*/
3735 do {
3736 MALLOC(buf, char *, buflen + 1, M_TEMP, M_WAITOK);
3737 if (buf == NULL) {
3738 return (ENOMEM);
3739 }
3740
3741 error = fsgetpath_internal(
3742 ctx, fsid.val[0], objid,
3743 buflen, buf, &pathlen);
3744
3745 if (error) {
3746 FREE(buf, M_TEMP);
3747 buf = NULL;
3748 }
3749 } while (error == ENOSPC && (buflen += MAXPATHLEN));
3750
3751 if (error) {
3752 return error;
3753 }
3754
3755 buf[pathlen] = 0;
3756
3757 error = openat_internal(
3758 ctx, (user_addr_t)buf, uap->oflags, 0, AT_FDCWD, UIO_SYSSPACE, retval);
3759
3760 FREE(buf, M_TEMP);
3761
3762 return error;
3763}
3764
3765
3766/*
3767 * Create a special file.
3768 */
3769static int mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap);
3770
3771int
3772mknod(proc_t p, struct mknod_args *uap, __unused int32_t *retval)
3773{
3774 struct vnode_attr va;
3775 vfs_context_t ctx = vfs_context_current();
3776 int error;
3777 struct nameidata nd;
3778 vnode_t vp, dvp;
3779
3780 VATTR_INIT(&va);
3781 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
3782 VATTR_SET(&va, va_rdev, uap->dev);
91447636
A
3783
3784 /* If it's a mknod() of a FIFO, call mkfifo1() instead */
3785 if ((uap->mode & S_IFMT) == S_IFIFO)
2d21ac55 3786 return(mkfifo1(ctx, uap->path, &va));
1c79356b 3787
55e303ae 3788 AUDIT_ARG(mode, uap->mode);
b0d623f7 3789 AUDIT_ARG(value32, uap->dev);
91447636 3790
2d21ac55 3791 if ((error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
1c79356b 3792 return (error);
6d2010ae 3793 NDINIT(&nd, CREATE, OP_MKNOD, LOCKPARENT | AUDITVNPATH1,
2d21ac55 3794 UIO_USERSPACE, uap->path, ctx);
55e303ae
A
3795 error = namei(&nd);
3796 if (error)
1c79356b 3797 return (error);
91447636 3798 dvp = nd.ni_dvp;
1c79356b 3799 vp = nd.ni_vp;
91447636
A
3800
3801 if (vp != NULL) {
1c79356b 3802 error = EEXIST;
91447636 3803 goto out;
1c79356b 3804 }
55e303ae 3805
91447636
A
3806 switch (uap->mode & S_IFMT) {
3807 case S_IFMT: /* used by badsect to flag bad sectors */
3808 VATTR_SET(&va, va_type, VBAD);
3809 break;
3810 case S_IFCHR:
3811 VATTR_SET(&va, va_type, VCHR);
3812 break;
3813 case S_IFBLK:
3814 VATTR_SET(&va, va_type, VBLK);
3815 break;
91447636
A
3816 default:
3817 error = EINVAL;
3818 goto out;
3819 }
2d21ac55
A
3820
3821#if CONFIG_MACF
6d2010ae
A
3822 error = mac_vnode_check_create(ctx,
3823 nd.ni_dvp, &nd.ni_cnd, &va);
3824 if (error)
3825 goto out;
2d21ac55
A
3826#endif
3827
3828 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
3829 goto out;
3830
6d2010ae 3831 if ((error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx)) != 0)
91447636
A
3832 goto out;
3833
3834 if (vp) {
3835 int update_flags = 0;
3836
3837 // Make sure the name & parent pointers are hooked up
3838 if (vp->v_name == NULL)
3839 update_flags |= VNODE_UPDATE_NAME;
3840 if (vp->v_parent == NULLVP)
3841 update_flags |= VNODE_UPDATE_PARENT;
3842
3843 if (update_flags)
3844 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
3845
2d21ac55
A
3846#if CONFIG_FSE
3847 add_fsevent(FSE_CREATE_FILE, ctx,
91447636
A
3848 FSE_ARG_VNODE, vp,
3849 FSE_ARG_DONE);
2d21ac55 3850#endif
1c79356b 3851 }
91447636
A
3852
3853out:
3854 /*
3855 * nameidone has to happen before we vnode_put(dvp)
3856 * since it may need to release the fs_nodelock on the dvp
3857 */
3858 nameidone(&nd);
3859
3860 if (vp)
3861 vnode_put(vp);
3862 vnode_put(dvp);
3863
1c79356b
A
3864 return (error);
3865}
3866
3867/*
3868 * Create a named pipe.
2d21ac55
A
3869 *
3870 * Returns: 0 Success
3871 * EEXIST
3872 * namei:???
3873 * vnode_authorize:???
3874 * vn_create:???
1c79356b 3875 */
91447636
A
3876static int
3877mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap)
1c79356b 3878{
91447636 3879 vnode_t vp, dvp;
1c79356b
A
3880 int error;
3881 struct nameidata nd;
55e303ae 3882
6d2010ae 3883 NDINIT(&nd, CREATE, OP_MKFIFO, LOCKPARENT | AUDITVNPATH1,
91447636 3884 UIO_USERSPACE, upath, ctx);
55e303ae
A
3885 error = namei(&nd);
3886 if (error)
1c79356b 3887 return (error);
91447636
A
3888 dvp = nd.ni_dvp;
3889 vp = nd.ni_vp;
3890
3891 /* check that this is a new file and authorize addition */
3892 if (vp != NULL) {
3893 error = EEXIST;
3894 goto out;
3895 }
2d21ac55
A
3896 VATTR_SET(vap, va_type, VFIFO);
3897
6d2010ae 3898 if ((error = vn_authorize_create(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0)
2d21ac55 3899 goto out;
2d21ac55 3900
6d2010ae 3901 error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx);
91447636
A
3902out:
3903 /*
3904 * nameidone has to happen before we vnode_put(dvp)
3905 * since it may need to release the fs_nodelock on the dvp
3906 */
3907 nameidone(&nd);
3908
3909 if (vp)
3910 vnode_put(vp);
3911 vnode_put(dvp);
3912
55e303ae 3913 return error;
91447636
A
3914}
3915
0c530ab8
A
3916
3917/*
b0d623f7 3918 * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)).
0c530ab8
A
3919 *
3920 * Parameters: p Process requesting the open
3921 * uap User argument descriptor (see below)
3922 * retval (Ignored)
3923 *
3924 * Indirect: uap->path Path to fifo (same as 'mkfifo')
3925 * uap->uid UID to set
3926 * uap->gid GID to set
3927 * uap->mode File mode to set (same as 'mkfifo')
3928 * uap->xsecurity ACL to set, if creating
3929 *
3930 * Returns: 0 Success
3931 * !0 errno value
3932 *
3933 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
3934 *
3935 * XXX: We should enummerate the possible errno values here, and where
3936 * in the code they originated.
3937 */
91447636 3938int
b0d623f7 3939mkfifo_extended(proc_t p, struct mkfifo_extended_args *uap, __unused int32_t *retval)
91447636
A
3940{
3941 int ciferror;
3942 kauth_filesec_t xsecdst;
91447636
A
3943 struct vnode_attr va;
3944
b0d623f7
A
3945 AUDIT_ARG(owner, uap->uid, uap->gid);
3946
91447636
A
3947 xsecdst = KAUTH_FILESEC_NONE;
3948 if (uap->xsecurity != USER_ADDR_NULL) {
3949 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
3950 return ciferror;
3951 }
3952
91447636
A
3953 VATTR_INIT(&va);
3954 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
3955 if (uap->uid != KAUTH_UID_NONE)
3956 VATTR_SET(&va, va_uid, uap->uid);
3957 if (uap->gid != KAUTH_GID_NONE)
3958 VATTR_SET(&va, va_gid, uap->gid);
3959 if (xsecdst != KAUTH_FILESEC_NONE)
3960 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
3961
2d21ac55 3962 ciferror = mkfifo1(vfs_context_current(), uap->path, &va);
91447636
A
3963
3964 if (xsecdst != KAUTH_FILESEC_NONE)
3965 kauth_filesec_free(xsecdst);
3966 return ciferror;
3967}
3968
3969/* ARGSUSED */
3970int
b0d623f7 3971mkfifo(proc_t p, struct mkfifo_args *uap, __unused int32_t *retval)
91447636 3972{
91447636
A
3973 struct vnode_attr va;
3974
91447636
A
3975 VATTR_INIT(&va);
3976 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
3977
2d21ac55 3978 return(mkfifo1(vfs_context_current(), uap->path, &va));
1c79356b
A
3979}
3980
b0d623f7
A
3981
3982static char *
3983my_strrchr(char *p, int ch)
3984{
3985 char *save;
3986
3987 for (save = NULL;; ++p) {
3988 if (*p == ch)
3989 save = p;
3990 if (!*p)
3991 return(save);
3992 }
3993 /* NOTREACHED */
3994}
3995
3996extern int safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path);
3997
3998int
3999safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path)
4000{
4001 int ret, len = _len;
4002
4003 *truncated_path = 0;
4004 ret = vn_getpath(dvp, path, &len);
4005 if (ret == 0 && len < (MAXPATHLEN - 1)) {
4006 if (leafname) {
4007 path[len-1] = '/';
4008 len += strlcpy(&path[len], leafname, MAXPATHLEN-len) + 1;
4009 if (len > MAXPATHLEN) {
4010 char *ptr;
4011
4012 // the string got truncated!
4013 *truncated_path = 1;
4014 ptr = my_strrchr(path, '/');
4015 if (ptr) {
4016 *ptr = '\0'; // chop off the string at the last directory component
4017 }
4018 len = strlen(path) + 1;
4019 }
4020 }
4021 } else if (ret == 0) {
4022 *truncated_path = 1;
4023 } else if (ret != 0) {
4024 struct vnode *mydvp=dvp;
4025
4026 if (ret != ENOSPC) {
4027 printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n",
4028 dvp, dvp->v_name ? dvp->v_name : "no-name", ret);
4029 }
4030 *truncated_path = 1;
4031
4032 do {
4033 if (mydvp->v_parent != NULL) {
4034 mydvp = mydvp->v_parent;
4035 } else if (mydvp->v_mount) {
4036 strlcpy(path, mydvp->v_mount->mnt_vfsstat.f_mntonname, _len);
4037 break;
4038 } else {
4039 // no parent and no mount point? only thing is to punt and say "/" changed
4040 strlcpy(path, "/", _len);
4041 len = 2;
4042 mydvp = NULL;
4043 }
4044
4045 if (mydvp == NULL) {
4046 break;
4047 }
4048
4049 len = _len;
4050 ret = vn_getpath(mydvp, path, &len);
4051 } while (ret == ENOSPC);
4052 }
4053
4054 return len;
4055}
4056
4057
1c79356b
A
4058/*
4059 * Make a hard file link.
2d21ac55
A
4060 *
4061 * Returns: 0 Success
4062 * EPERM
4063 * EEXIST
4064 * EXDEV
4065 * namei:???
4066 * vnode_authorize:???
4067 * VNOP_LINK:???
1c79356b 4068 */
1c79356b 4069/* ARGSUSED */
fe8ab488
A
4070static int
4071linkat_internal(vfs_context_t ctx, int fd1, user_addr_t path, int fd2,
4072 user_addr_t link, int flag, enum uio_seg segflg)
1c79356b 4073{
91447636 4074 vnode_t vp, dvp, lvp;
1c79356b 4075 struct nameidata nd;
fe8ab488 4076 int follow;
1c79356b 4077 int error;
b0d623f7 4078#if CONFIG_FSE
91447636 4079 fse_info finfo;
b0d623f7 4080#endif
91447636 4081 int need_event, has_listeners;
2d21ac55 4082 char *target_path = NULL;
b0d623f7 4083 int truncated=0;
1c79356b 4084
91447636
A
4085 vp = dvp = lvp = NULLVP;
4086
4087 /* look up the object we are linking to */
fe8ab488
A
4088 follow = (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW;
4089 NDINIT(&nd, LOOKUP, OP_LOOKUP, AUDITVNPATH1 | follow,
4090 segflg, path, ctx);
4091
4092 error = nameiat(&nd, fd1);
55e303ae 4093 if (error)
1c79356b
A
4094 return (error);
4095 vp = nd.ni_vp;
91447636
A
4096
4097 nameidone(&nd);
4098
2d21ac55
A
4099 /*
4100 * Normally, linking to directories is not supported.
4101 * However, some file systems may have limited support.
4102 */
91447636 4103 if (vp->v_type == VDIR) {
2d21ac55
A
4104 if (!(vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSDIRLINKS)) {
4105 error = EPERM; /* POSIX */
4106 goto out;
4107 }
4108 /* Linking to a directory requires ownership. */
4109 if (!kauth_cred_issuser(vfs_context_ucred(ctx))) {
4110 struct vnode_attr dva;
4111
4112 VATTR_INIT(&dva);
4113 VATTR_WANTED(&dva, va_uid);
4114 if (vnode_getattr(vp, &dva, ctx) != 0 ||
4115 !VATTR_IS_SUPPORTED(&dva, va_uid) ||
4116 (dva.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)))) {
4117 error = EACCES;
4118 goto out;
4119 }
4120 }
91447636
A
4121 }
4122
91447636 4123 /* lookup the target node */
6d2010ae
A
4124#if CONFIG_TRIGGERS
4125 nd.ni_op = OP_LINK;
4126#endif
91447636 4127 nd.ni_cnd.cn_nameiop = CREATE;
2d21ac55 4128 nd.ni_cnd.cn_flags = LOCKPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK;
fe8ab488
A
4129 nd.ni_dirp = link;
4130 error = nameiat(&nd, fd2);
91447636
A
4131 if (error != 0)
4132 goto out;
4133 dvp = nd.ni_dvp;
4134 lvp = nd.ni_vp;
2d21ac55
A
4135
4136#if CONFIG_MACF
4137 if ((error = mac_vnode_check_link(ctx, dvp, vp, &nd.ni_cnd)) != 0)
4138 goto out2;
4139#endif
4140
4141 /* or to anything that kauth doesn't want us to (eg. immutable items) */
4142 if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_LINKTARGET, ctx)) != 0)
4143 goto out2;
4144
91447636
A
4145 /* target node must not exist */
4146 if (lvp != NULLVP) {
4147 error = EEXIST;
4148 goto out2;
4149 }
4150 /* cannot link across mountpoints */
4151 if (vnode_mount(vp) != vnode_mount(dvp)) {
4152 error = EXDEV;
4153 goto out2;
4154 }
4155
4156 /* authorize creation of the target note */
2d21ac55 4157 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
91447636
A
4158 goto out2;
4159
4160 /* and finally make the link */
2d21ac55 4161 error = VNOP_LINK(vp, dvp, &nd.ni_cnd, ctx);
91447636
A
4162 if (error)
4163 goto out2;
4164
39236c6e
A
4165#if CONFIG_MACF
4166 (void)mac_vnode_notify_link(ctx, vp, dvp, &nd.ni_cnd);
4167#endif
4168
2d21ac55 4169#if CONFIG_FSE
91447636 4170 need_event = need_fsevent(FSE_CREATE_FILE, dvp);
2d21ac55
A
4171#else
4172 need_event = 0;
4173#endif
91447636
A
4174 has_listeners = kauth_authorize_fileop_has_listeners();
4175
4176 if (need_event || has_listeners) {
91447636
A
4177 char *link_to_path = NULL;
4178 int len, link_name_len;
4179
4180 /* build the path to the new link file */
2d21ac55
A
4181 GET_PATH(target_path);
4182 if (target_path == NULL) {
4183 error = ENOMEM;
4184 goto out2;
4185 }
4186
b0d623f7 4187 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, target_path, MAXPATHLEN, &truncated);
91447636
A
4188
4189 if (has_listeners) {
4190 /* build the path to file we are linking to */
2d21ac55
A
4191 GET_PATH(link_to_path);
4192 if (link_to_path == NULL) {
4193 error = ENOMEM;
4194 goto out2;
4195 }
4196
91447636 4197 link_name_len = MAXPATHLEN;
fe8ab488
A
4198 if (vn_getpath(vp, link_to_path, &link_name_len) == 0) {
4199 /*
4200 * Call out to allow 3rd party notification of rename.
4201 * Ignore result of kauth_authorize_fileop call.
4202 */
4203 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_LINK,
4204 (uintptr_t)link_to_path,
4205 (uintptr_t)target_path);
4206 }
2d21ac55
A
4207 if (link_to_path != NULL) {
4208 RELEASE_PATH(link_to_path);
4209 }
91447636 4210 }
2d21ac55 4211#if CONFIG_FSE
91447636
A
4212 if (need_event) {
4213 /* construct fsevent */
2d21ac55 4214 if (get_fse_info(vp, &finfo, ctx) == 0) {
b0d623f7
A
4215 if (truncated) {
4216 finfo.mode |= FSE_TRUNCATED_PATH;
4217 }
4218
91447636 4219 // build the path to the destination of the link
2d21ac55 4220 add_fsevent(FSE_CREATE_FILE, ctx,
91447636
A
4221 FSE_ARG_STRING, len, target_path,
4222 FSE_ARG_FINFO, &finfo,
4223 FSE_ARG_DONE);
1c79356b 4224 }
b0d623f7
A
4225 if (vp->v_parent) {
4226 add_fsevent(FSE_STAT_CHANGED, ctx,
4227 FSE_ARG_VNODE, vp->v_parent,
4228 FSE_ARG_DONE);
4229 }
1c79356b 4230 }
2d21ac55 4231#endif
1c79356b 4232 }
91447636
A
4233out2:
4234 /*
4235 * nameidone has to happen before we vnode_put(dvp)
4236 * since it may need to release the fs_nodelock on the dvp
4237 */
4238 nameidone(&nd);
2d21ac55
A
4239 if (target_path != NULL) {
4240 RELEASE_PATH(target_path);
4241 }
91447636
A
4242out:
4243 if (lvp)
4244 vnode_put(lvp);
4245 if (dvp)
4246 vnode_put(dvp);
4247 vnode_put(vp);
4248 return (error);
4249}
1c79356b 4250
fe8ab488
A
4251int
4252link(__unused proc_t p, struct link_args *uap, __unused int32_t *retval)
4253{
4254 return (linkat_internal(vfs_context_current(), AT_FDCWD, uap->path,
4255 AT_FDCWD, uap->link, AT_SYMLINK_FOLLOW, UIO_USERSPACE));
4256}
4257
4258int
4259linkat(__unused proc_t p, struct linkat_args *uap, __unused int32_t *retval)
4260{
4261 if (uap->flag & ~AT_SYMLINK_FOLLOW)
4262 return (EINVAL);
4263
4264 return (linkat_internal(vfs_context_current(), uap->fd1, uap->path,
4265 uap->fd2, uap->link, uap->flag, UIO_USERSPACE));
4266}
4267
1c79356b
A
4268/*
4269 * Make a symbolic link.
91447636
A
4270 *
4271 * We could add support for ACLs here too...
1c79356b 4272 */
1c79356b 4273/* ARGSUSED */
fe8ab488
A
4274static int
4275symlinkat_internal(vfs_context_t ctx, user_addr_t path_data, int fd,
4276 user_addr_t link, enum uio_seg segflg)
1c79356b 4277{
91447636
A
4278 struct vnode_attr va;
4279 char *path;
1c79356b
A
4280 int error;
4281 struct nameidata nd;
91447636 4282 vnode_t vp, dvp;
fe8ab488 4283 uint32_t dfflags; // Directory file flags
1c79356b 4284 size_t dummy=0;
fe8ab488
A
4285 proc_t p;
4286
4287 error = 0;
4288 if (UIO_SEG_IS_USER_SPACE(segflg)) {
4289 MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
4290 error = copyinstr(path_data, path, MAXPATHLEN, &dummy);
4291 } else {
4292 path = (char *)path_data;
4293 }
91447636 4294 if (error)
1c79356b 4295 goto out;
55e303ae 4296 AUDIT_ARG(text, path); /* This is the link string */
91447636 4297
fe8ab488
A
4298 NDINIT(&nd, CREATE, OP_SYMLINK, LOCKPARENT | AUDITVNPATH1,
4299 segflg, link, ctx);
4300
4301 error = nameiat(&nd, fd);
55e303ae 4302 if (error)
1c79356b 4303 goto out;
91447636
A
4304 dvp = nd.ni_dvp;
4305 vp = nd.ni_vp;
55e303ae 4306
fe8ab488 4307 p = vfs_context_proc(ctx);
2d21ac55
A
4308 VATTR_INIT(&va);
4309 VATTR_SET(&va, va_type, VLNK);
4310 VATTR_SET(&va, va_mode, ACCESSPERMS & ~p->p_fd->fd_cmask);
fe8ab488
A
4311
4312 /*
4313 * Handle inheritance of restricted flag
4314 */
4315 error = vnode_flags(dvp, &dfflags, ctx);
4316 if (error)
4317 goto skipit;
4318 if (dfflags & SF_RESTRICTED)
4319 VATTR_SET(&va, va_flags, SF_RESTRICTED);
4320
2d21ac55
A
4321#if CONFIG_MACF
4322 error = mac_vnode_check_create(ctx,
4323 dvp, &nd.ni_cnd, &va);
4324#endif
4325 if (error != 0) {
4326 goto skipit;
4327 }
91447636 4328
2d21ac55
A
4329 if (vp != NULL) {
4330 error = EEXIST;
4331 goto skipit;
4332 }
4333
4334 /* authorize */
4335 if (error == 0)
4336 error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
4337 /* get default ownership, etc. */
4338 if (error == 0)
4339 error = vnode_authattr_new(dvp, &va, 0, ctx);
4340 if (error == 0)
4341 error = VNOP_SYMLINK(dvp, &vp, &nd.ni_cnd, &va, path, ctx);
4342
39236c6e
A
4343#if CONFIG_MACF
4344 if (error == 0)
4345 error = vnode_label(vnode_mount(vp), dvp, vp, &nd.ni_cnd, VNODE_LABEL_CREATE, ctx);
4346#endif
4347
2d21ac55
A
4348 /* do fallback attribute handling */
4349 if (error == 0)
4350 error = vnode_setattr_fallback(vp, &va, ctx);
39236c6e 4351
2d21ac55
A
4352 if (error == 0) {
4353 int update_flags = 0;
55e303ae 4354
2d21ac55
A
4355 if (vp == NULL) {
4356 nd.ni_cnd.cn_nameiop = LOOKUP;
6d2010ae
A
4357#if CONFIG_TRIGGERS
4358 nd.ni_op = OP_LOOKUP;
4359#endif
2d21ac55 4360 nd.ni_cnd.cn_flags = 0;
fe8ab488 4361 error = nameiat(&nd, fd);
2d21ac55 4362 vp = nd.ni_vp;
55e303ae 4363
2d21ac55
A
4364 if (vp == NULL)
4365 goto skipit;
4366 }
fe8ab488 4367
91447636 4368#if 0 /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
fe8ab488 4369 /* call out to allow 3rd party notification of rename.
2d21ac55
A
4370 * Ignore result of kauth_authorize_fileop call.
4371 */
4372 if (kauth_authorize_fileop_has_listeners() &&
4373 namei(&nd) == 0) {
4374 char *new_link_path = NULL;
4375 int len;
fe8ab488 4376
2d21ac55
A
4377 /* build the path to the new link file */
4378 new_link_path = get_pathbuff();
4379 len = MAXPATHLEN;
4380 vn_getpath(dvp, new_link_path, &len);
4381 if ((len + 1 + nd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) {
91447636 4382 new_link_path[len - 1] = '/';
2d21ac55 4383 strlcpy(&new_link_path[len], nd.ni_cnd.cn_nameptr, MAXPATHLEN-len);
91447636 4384 }
fe8ab488
A
4385
4386 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_SYMLINK,
2d21ac55
A
4387 (uintptr_t)path, (uintptr_t)new_link_path);
4388 if (new_link_path != NULL)
4389 release_pathbuff(new_link_path);
4390 }
fe8ab488 4391#endif
2d21ac55
A
4392 // Make sure the name & parent pointers are hooked up
4393 if (vp->v_name == NULL)
4394 update_flags |= VNODE_UPDATE_NAME;
4395 if (vp->v_parent == NULLVP)
4396 update_flags |= VNODE_UPDATE_PARENT;
fe8ab488 4397
2d21ac55
A
4398 if (update_flags)
4399 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
91447636 4400
2d21ac55
A
4401#if CONFIG_FSE
4402 add_fsevent(FSE_CREATE_FILE, ctx,
4403 FSE_ARG_VNODE, vp,
4404 FSE_ARG_DONE);
4405#endif
4406 }
91447636
A
4407
4408skipit:
4409 /*
4410 * nameidone has to happen before we vnode_put(dvp)
4411 * since it may need to release the fs_nodelock on the dvp
4412 */
4413 nameidone(&nd);
4414
4415 if (vp)
4416 vnode_put(vp);
4417 vnode_put(dvp);
1c79356b 4418out:
fe8ab488
A
4419 if (path && (path != (char *)path_data))
4420 FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
91447636 4421
1c79356b
A
4422 return (error);
4423}
4424
fe8ab488
A
4425int
4426symlink(__unused proc_t p, struct symlink_args *uap, __unused int32_t *retval)
4427{
4428 return (symlinkat_internal(vfs_context_current(), uap->path, AT_FDCWD,
4429 uap->link, UIO_USERSPACE));
4430}
4431
4432int
4433symlinkat(__unused proc_t p, struct symlinkat_args *uap,
4434 __unused int32_t *retval)
4435{
4436 return (symlinkat_internal(vfs_context_current(), uap->path1, uap->fd,
4437 uap->path2, UIO_USERSPACE));
4438}
4439
1c79356b
A
4440/*
4441 * Delete a whiteout from the filesystem.
fe8ab488 4442 * No longer supported.
1c79356b 4443 */
1c79356b 4444int
fe8ab488 4445undelete(__unused proc_t p, __unused struct undelete_args *uap, __unused int32_t *retval)
1c79356b 4446{
fe8ab488 4447 return (ENOTSUP);
1c79356b
A
4448}
4449
4450/*
4451 * Delete a name from the filesystem.
4452 */
1c79356b 4453/* ARGSUSED */
fe8ab488
A
4454static int
4455unlink1at(vfs_context_t ctx, struct nameidata *ndp, int unlink_flags, int fd)
1c79356b 4456{
91447636 4457 vnode_t vp, dvp;
1c79356b 4458 int error;
91447636 4459 struct componentname *cnp;
2d21ac55 4460 char *path = NULL;
b0d623f7
A
4461 int len=0;
4462#if CONFIG_FSE
2d21ac55 4463 fse_info finfo;
6d2010ae 4464 struct vnode_attr va;
b0d623f7 4465#endif
91447636 4466 int flags = 0;
2d21ac55
A
4467 int need_event = 0;
4468 int has_listeners = 0;
b0d623f7 4469 int truncated_path=0;
6d2010ae
A
4470 int batched;
4471 struct vnode_attr *vap = NULL;
4472
c910b4d9
A
4473#if NAMEDRSRCFORK
4474 /* unlink or delete is allowed on rsrc forks and named streams */
4475 ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
4476#endif
4477
2d21ac55 4478 ndp->ni_cnd.cn_flags |= LOCKPARENT;
6d2010ae 4479 ndp->ni_flag |= NAMEI_COMPOUNDREMOVE;
2d21ac55 4480 cnp = &ndp->ni_cnd;
91447636 4481
6d2010ae 4482lookup_continue:
fe8ab488 4483 error = nameiat(ndp, fd);
2d21ac55
A
4484 if (error)
4485 return (error);
b0d623f7 4486
2d21ac55
A
4487 dvp = ndp->ni_dvp;
4488 vp = ndp->ni_vp;
91447636 4489
6d2010ae 4490
91447636 4491 /* With Carbon delete semantics, busy files cannot be deleted */
316670eb 4492 if (unlink_flags & VNODE_REMOVE_NODELETEBUSY) {
91447636 4493 flags |= VNODE_REMOVE_NODELETEBUSY;
2d21ac55 4494 }
316670eb 4495
39236c6e 4496 /* Skip any potential upcalls if told to. */
316670eb
A
4497 if (unlink_flags & VNODE_REMOVE_SKIP_NAMESPACE_EVENT) {
4498 flags |= VNODE_REMOVE_SKIP_NAMESPACE_EVENT;
4499 }
4500
6d2010ae
A
4501 if (vp) {
4502 batched = vnode_compound_remove_available(vp);
4503 /*
4504 * The root of a mounted filesystem cannot be deleted.
4505 */
4506 if (vp->v_flag & VROOT) {
4507 error = EBUSY;
4508 }
2d21ac55 4509
6d2010ae
A
4510 if (!batched) {
4511 error = vn_authorize_unlink(dvp, vp, cnp, ctx, NULL);
4512 if (error) {
4513 goto out;
4514 }
4515 }
4516 } else {
4517 batched = 1;
2d21ac55 4518
6d2010ae
A
4519 if (!vnode_compound_remove_available(dvp)) {
4520 panic("No vp, but no compound remove?");
4521 }
4522 }
2d21ac55 4523
2d21ac55
A
4524#if CONFIG_FSE
4525 need_event = need_fsevent(FSE_DELETE, dvp);
4526 if (need_event) {
6d2010ae
A
4527 if (!batched) {
4528 if ((vp->v_flag & VISHARDLINK) == 0) {
4529 /* XXX need to get these data in batched VNOP */
4530 get_fse_info(vp, &finfo, ctx);
4531 }
4532 } else {
4533 error = vfs_get_notify_attributes(&va);
4534 if (error) {
4535 goto out;
4536 }
4537
4538 vap = &va;
2d21ac55
A
4539 }
4540 }
4541#endif
4542 has_listeners = kauth_authorize_fileop_has_listeners();
4543 if (need_event || has_listeners) {
2d21ac55 4544 if (path == NULL) {
6d2010ae
A
4545 GET_PATH(path);
4546 if (path == NULL) {
4547 error = ENOMEM;
4548 goto out;
4549 }
2d21ac55 4550 }
b0d623f7 4551 len = safe_getpath(dvp, ndp->ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated_path);
2d21ac55
A
4552 }
4553
4554#if NAMEDRSRCFORK
4555 if (ndp->ni_cnd.cn_flags & CN_WANTSRSRCFORK)
4556 error = vnode_removenamedstream(dvp, vp, XATTR_RESOURCEFORK_NAME, 0, ctx);
4557 else
4558#endif
6d2010ae
A
4559 {
4560 error = vn_remove(dvp, &ndp->ni_vp, ndp, flags, vap, ctx);
4561 vp = ndp->ni_vp;
4562 if (error == EKEEPLOOKING) {
4563 if (!batched) {
4564 panic("EKEEPLOOKING, but not a filesystem that supports compound VNOPs?");
4565 }
4566
4567 if ((ndp->ni_flag & NAMEI_CONTLOOKUP) == 0) {
4568 panic("EKEEPLOOKING, but continue flag not set?");
4569 }
4570
4571 if (vnode_isdir(vp)) {
4572 error = EISDIR;
4573 goto out;
4574 }
4575 goto lookup_continue;
4576 }
4577 }
2d21ac55
A
4578
4579 /*
4580 * Call out to allow 3rd party notification of delete.
4581 * Ignore result of kauth_authorize_fileop call.
4582 */
1c79356b 4583 if (!error) {
2d21ac55
A
4584 if (has_listeners) {
4585 kauth_authorize_fileop(vfs_context_ucred(ctx),
4586 KAUTH_FILEOP_DELETE,
4587 (uintptr_t)vp,
4588 (uintptr_t)path);
4589 }
91447636 4590
2d21ac55
A
4591 if (vp->v_flag & VISHARDLINK) {
4592 //
4593 // if a hardlink gets deleted we want to blow away the
4594 // v_parent link because the path that got us to this
4595 // instance of the link is no longer valid. this will
4596 // force the next call to get the path to ask the file
4597 // system instead of just following the v_parent link.
4598 //
4599 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
91447636 4600 }
91447636 4601
2d21ac55
A
4602#if CONFIG_FSE
4603 if (need_event) {
4604 if (vp->v_flag & VISHARDLINK) {
4605 get_fse_info(vp, &finfo, ctx);
6d2010ae
A
4606 } else if (vap) {
4607 vnode_get_fse_info_from_vap(vp, &finfo, vap);
2d21ac55 4608 }
b0d623f7
A
4609 if (truncated_path) {
4610 finfo.mode |= FSE_TRUNCATED_PATH;
4611 }
2d21ac55
A
4612 add_fsevent(FSE_DELETE, ctx,
4613 FSE_ARG_STRING, len, path,
4614 FSE_ARG_FINFO, &finfo,
4615 FSE_ARG_DONE);
4616 }
4617#endif
1c79356b 4618 }
6d2010ae
A
4619
4620out:
2d21ac55
A
4621 if (path != NULL)
4622 RELEASE_PATH(path);
4623
c910b4d9 4624#if NAMEDRSRCFORK
b0d623f7
A
4625 /* recycle the deleted rsrc fork vnode to force a reclaim, which
4626 * will cause its shadow file to go away if necessary.
4627 */
6d2010ae
A
4628 if (vp && (vnode_isnamedstream(vp)) &&
4629 (vp->v_parent != NULLVP) &&
4630 vnode_isshadow(vp)) {
4631 vnode_recycle(vp);
b0d623f7 4632 }
c910b4d9 4633#endif
6d2010ae
A
4634 /*
4635 * nameidone has to happen before we vnode_put(dvp)
4636 * since it may need to release the fs_nodelock on the dvp
4637 */
2d21ac55 4638 nameidone(ndp);
91447636 4639 vnode_put(dvp);
6d2010ae
A
4640 if (vp) {
4641 vnode_put(vp);
4642 }
1c79356b
A
4643 return (error);
4644}
4645
fe8ab488
A
4646int
4647unlink1(vfs_context_t ctx, struct nameidata *ndp, int unlink_flags)
4648{
4649 return (unlink1at(ctx, ndp, unlink_flags, AT_FDCWD));
4650}
4651
1c79356b
A
4652/*
4653 * Delete a name from the filesystem using POSIX semantics.
4654 */
fe8ab488
A
4655static int
4656unlinkat_internal(vfs_context_t ctx, int fd, user_addr_t path,
4657 enum uio_seg segflg)
4658{
4659 struct nameidata nd;
4660
4661 NDINIT(&nd, DELETE, OP_UNLINK, AUDITVNPATH1, segflg,
4662 path, ctx);
4663 return (unlink1at(ctx, &nd, 0, fd));
4664}
4665
1c79356b 4666int
b0d623f7 4667unlink(__unused proc_t p, struct unlink_args *uap, __unused int32_t *retval)
1c79356b 4668{
fe8ab488
A
4669 return (unlinkat_internal(vfs_context_current(), AT_FDCWD, uap->path,
4670 UIO_USERSPACE));
4671}
2d21ac55 4672
fe8ab488
A
4673int
4674unlinkat(__unused proc_t p, struct unlinkat_args *uap, __unused int32_t *retval)
4675{
4676 if (uap->flag & ~AT_REMOVEDIR)
4677 return (EINVAL);
4678
4679 if (uap->flag & AT_REMOVEDIR)
4680 return (rmdirat_internal(vfs_context_current(), uap->fd,
4681 uap->path, UIO_USERSPACE));
4682 else
4683 return (unlinkat_internal(vfs_context_current(), uap->fd,
4684 uap->path, UIO_USERSPACE));
1c79356b
A
4685}
4686
4687/*
0b4e3aa0 4688 * Delete a name from the filesystem using Carbon semantics.
1c79356b
A
4689 */
4690int
b0d623f7 4691delete(__unused proc_t p, struct delete_args *uap, __unused int32_t *retval)
1c79356b 4692{
2d21ac55
A
4693 struct nameidata nd;
4694 vfs_context_t ctx = vfs_context_current();
4695
6d2010ae
A
4696 NDINIT(&nd, DELETE, OP_UNLINK, AUDITVNPATH1, UIO_USERSPACE,
4697 uap->path, ctx);
316670eb 4698 return unlink1(ctx, &nd, VNODE_REMOVE_NODELETEBUSY);
1c79356b
A
4699}
4700
4701/*
4702 * Reposition read/write file offset.
4703 */
1c79356b 4704int
2d21ac55 4705lseek(proc_t p, struct lseek_args *uap, off_t *retval)
1c79356b 4706{
91447636 4707 struct fileproc *fp;
2d21ac55
A
4708 vnode_t vp;
4709 struct vfs_context *ctx;
91447636 4710 off_t offset = uap->offset, file_size;
1c79356b
A
4711 int error;
4712
91447636
A
4713 if ( (error = fp_getfvp(p,uap->fd, &fp, &vp)) ) {
4714 if (error == ENOTSUP)
4715 return (ESPIPE);
1c79356b 4716 return (error);
55e303ae 4717 }
91447636
A
4718 if (vnode_isfifo(vp)) {
4719 file_drop(uap->fd);
4720 return(ESPIPE);
4721 }
2d21ac55
A
4722
4723
4724 ctx = vfs_context_current();
4725#if CONFIG_MACF
4726 if (uap->whence == L_INCR && uap->offset == 0)
4727 error = mac_file_check_get_offset(vfs_context_ucred(ctx),
4728 fp->f_fglob);
4729 else
4730 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
4731 fp->f_fglob);
4732 if (error) {
4733 file_drop(uap->fd);
4734 return (error);
4735 }
4736#endif
91447636
A
4737 if ( (error = vnode_getwithref(vp)) ) {
4738 file_drop(uap->fd);
4739 return(error);
4740 }
4741
1c79356b
A
4742 switch (uap->whence) {
4743 case L_INCR:
91447636 4744 offset += fp->f_fglob->fg_offset;
1c79356b
A
4745 break;
4746 case L_XTND:
2d21ac55 4747 if ((error = vnode_size(vp, &file_size, ctx)) != 0)
55e303ae 4748 break;
91447636 4749 offset += file_size;
1c79356b
A
4750 break;
4751 case L_SET:
1c79356b
A
4752 break;
4753 default:
55e303ae 4754 error = EINVAL;
1c79356b 4755 }
55e303ae
A
4756 if (error == 0) {
4757 if (uap->offset > 0 && offset < 0) {
4758 /* Incremented/relative move past max size */
4759 error = EOVERFLOW;
4760 } else {
4761 /*
4762 * Allow negative offsets on character devices, per
4763 * POSIX 1003.1-2001. Most likely for writing disk
4764 * labels.
4765 */
4766 if (offset < 0 && vp->v_type != VCHR) {
4767 /* Decremented/relative move before start */
4768 error = EINVAL;
4769 } else {
4770 /* Success */
91447636
A
4771 fp->f_fglob->fg_offset = offset;
4772 *retval = fp->f_fglob->fg_offset;
55e303ae
A
4773 }
4774 }
4775 }
b0d623f7
A
4776
4777 /*
4778 * An lseek can affect whether data is "available to read." Use
4779 * hint of NOTE_NONE so no EVFILT_VNODE events fire
4780 */
4781 post_event_if_success(vp, error, NOTE_NONE);
91447636
A
4782 (void)vnode_put(vp);
4783 file_drop(uap->fd);
55e303ae 4784 return (error);
1c79356b
A
4785}
4786
91447636 4787
1c79356b 4788/*
91447636 4789 * Check access permissions.
2d21ac55
A
4790 *
4791 * Returns: 0 Success
4792 * vnode_authorize:???
1c79356b 4793 */
91447636
A
4794static int
4795access1(vnode_t vp, vnode_t dvp, int uflags, vfs_context_t ctx)
1c79356b 4796{
91447636 4797 kauth_action_t action;
1c79356b
A
4798 int error;
4799
91447636
A
4800 /*
4801 * If just the regular access bits, convert them to something
4802 * that vnode_authorize will understand.
4803 */
4804 if (!(uflags & _ACCESS_EXTENDED_MASK)) {
4805 action = 0;
4806 if (uflags & R_OK)
4807 action |= KAUTH_VNODE_READ_DATA; /* aka KAUTH_VNODE_LIST_DIRECTORY */
4808 if (uflags & W_OK) {
4809 if (vnode_isdir(vp)) {
4810 action |= KAUTH_VNODE_ADD_FILE |
4811 KAUTH_VNODE_ADD_SUBDIRECTORY;
4812 /* might want delete rights here too */
4813 } else {
4814 action |= KAUTH_VNODE_WRITE_DATA;
4815 }
4816 }
4817 if (uflags & X_OK) {
4818 if (vnode_isdir(vp)) {
4819 action |= KAUTH_VNODE_SEARCH;
4820 } else {
4821 action |= KAUTH_VNODE_EXECUTE;
4822 }
4823 }
4824 } else {
4825 /* take advantage of definition of uflags */
4826 action = uflags >> 8;
4827 }
4828
2d21ac55
A
4829#if CONFIG_MACF
4830 error = mac_vnode_check_access(ctx, vp, uflags);
4831 if (error)
4832 return (error);
4833#endif /* MAC */
4834
91447636
A
4835 /* action == 0 means only check for existence */
4836 if (action != 0) {
4837 error = vnode_authorize(vp, dvp, action | KAUTH_VNODE_ACCESS, ctx);
4838 } else {
4839 error = 0;
4840 }
4841
4842 return(error);
1c79356b 4843}
1c79356b 4844
91447636
A
4845
4846
2d21ac55 4847/*
b0d623f7 4848 * access_extended: Check access permissions in bulk.
2d21ac55 4849 *
b0d623f7
A
4850 * Description: uap->entries Pointer to an array of accessx
4851 * descriptor structs, plus one or
4852 * more NULL terminated strings (see
4853 * "Notes" section below).
4854 * uap->size Size of the area pointed to by
4855 * uap->entries.
4856 * uap->results Pointer to the results array.
2d21ac55
A
4857 *
4858 * Returns: 0 Success
4859 * ENOMEM Insufficient memory
4860 * EINVAL Invalid arguments
4861 * namei:EFAULT Bad address
4862 * namei:ENAMETOOLONG Filename too long
4863 * namei:ENOENT No such file or directory
4864 * namei:ELOOP Too many levels of symbolic links
4865 * namei:EBADF Bad file descriptor
4866 * namei:ENOTDIR Not a directory
4867 * namei:???
4868 * access1:
4869 *
4870 * Implicit returns:
4871 * uap->results Array contents modified
4872 *
4873 * Notes: The uap->entries are structured as an arbitrary length array
b0d623f7 4874 * of accessx descriptors, followed by one or more NULL terminated
2d21ac55
A
4875 * strings
4876 *
4877 * struct accessx_descriptor[0]
4878 * ...
4879 * struct accessx_descriptor[n]
4880 * char name_data[0];
4881 *
4882 * We determine the entry count by walking the buffer containing
b0d623f7 4883 * the uap->entries argument descriptor. For each descriptor we
2d21ac55
A
4884 * see, the valid values for the offset ad_name_offset will be
4885 * in the byte range:
4886 *
4887 * [ uap->entries + sizeof(struct accessx_descriptor) ]
4888 * to
4889 * [ uap->entries + uap->size - 2 ]
4890 *
4891 * since we must have at least one string, and the string must
b0d623f7 4892 * be at least one character plus the NULL terminator in length.
2d21ac55
A
4893 *
4894 * XXX: Need to support the check-as uid argument
4895 */
1c79356b 4896int
b0d623f7 4897access_extended(__unused proc_t p, struct access_extended_args *uap, __unused int32_t *retval)
1c79356b 4898{
2d21ac55
A
4899 struct accessx_descriptor *input = NULL;
4900 errno_t *result = NULL;
4901 errno_t error = 0;
4902 int wantdelete = 0;
4903 unsigned int desc_max, desc_actual, i, j;
91447636 4904 struct vfs_context context;
1c79356b 4905 struct nameidata nd;
91447636 4906 int niopts;
2d21ac55
A
4907 vnode_t vp = NULL;
4908 vnode_t dvp = NULL;
4909#define ACCESSX_MAX_DESCR_ON_STACK 10
4910 struct accessx_descriptor stack_input[ACCESSX_MAX_DESCR_ON_STACK];
91447636 4911
91447636
A
4912 context.vc_ucred = NULL;
4913
2d21ac55
A
4914 /*
4915 * Validate parameters; if valid, copy the descriptor array and string
4916 * arguments into local memory. Before proceeding, the following
4917 * conditions must have been met:
4918 *
4919 * o The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
4920 * o There must be sufficient room in the request for at least one
4921 * descriptor and a one yte NUL terminated string.
4922 * o The allocation of local storage must not fail.
4923 */
91447636
A
4924 if (uap->size > ACCESSX_MAX_TABLESIZE)
4925 return(ENOMEM);
2d21ac55 4926 if (uap->size < (sizeof(struct accessx_descriptor) + 2))
91447636 4927 return(EINVAL);
2d21ac55
A
4928 if (uap->size <= sizeof (stack_input)) {
4929 input = stack_input;
4930 } else {
91447636
A
4931 MALLOC(input, struct accessx_descriptor *, uap->size, M_TEMP, M_WAITOK);
4932 if (input == NULL) {
4933 error = ENOMEM;
4934 goto out;
4935 }
2d21ac55 4936 }
91447636 4937 error = copyin(uap->entries, input, uap->size);
55e303ae 4938 if (error)
91447636 4939 goto out;
1c79356b 4940
b0d623f7
A
4941 AUDIT_ARG(opaque, input, uap->size);
4942
91447636 4943 /*
2d21ac55
A
4944 * Force NUL termination of the copyin buffer to avoid nami() running
4945 * off the end. If the caller passes us bogus data, they may get a
4946 * bogus result.
4947 */
4948 ((char *)input)[uap->size - 1] = 0;
4949
4950 /*
4951 * Access is defined as checking against the process' real identity,
4952 * even if operations are checking the effective identity. This
4953 * requires that we use a local vfs context.
91447636
A
4954 */
4955 context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
2d21ac55 4956 context.vc_thread = current_thread();
91447636
A
4957
4958 /*
2d21ac55
A
4959 * Find out how many entries we have, so we can allocate the result
4960 * array by walking the list and adjusting the count downward by the
4961 * earliest string offset we see.
91447636 4962 */
2d21ac55
A
4963 desc_max = (uap->size - 2) / sizeof(struct accessx_descriptor);
4964 desc_actual = desc_max;
4965 for (i = 0; i < desc_actual; i++) {
91447636 4966 /*
2d21ac55
A
4967 * Take the offset to the name string for this entry and
4968 * convert to an input array index, which would be one off
4969 * the end of the array if this entry was the lowest-addressed
4970 * name string.
91447636
A
4971 */
4972 j = input[i].ad_name_offset / sizeof(struct accessx_descriptor);
2d21ac55
A
4973
4974 /*
4975 * An offset greater than the max allowable offset is an error.
4976 * It is also an error for any valid entry to point
4977 * to a location prior to the end of the current entry, if
4978 * it's not a reference to the string of the previous entry.
4979 */
4980 if (j > desc_max || (j != 0 && j <= i)) {
91447636
A
4981 error = EINVAL;
4982 goto out;
4983 }
2d21ac55
A
4984
4985 /*
4986 * An offset of 0 means use the previous descriptor's offset;
4987 * this is used to chain multiple requests for the same file
4988 * to avoid multiple lookups.
4989 */
91447636 4990 if (j == 0) {
2d21ac55 4991 /* This is not valid for the first entry */
91447636
A
4992 if (i == 0) {
4993 error = EINVAL;
4994 goto out;
4995 }
4996 continue;
4997 }
2d21ac55
A
4998
4999 /*
5000 * If the offset of the string for this descriptor is before
5001 * what we believe is the current actual last descriptor,
5002 * then we need to adjust our estimate downward; this permits
5003 * the string table following the last descriptor to be out
5004 * of order relative to the descriptor list.
5005 */
5006 if (j < desc_actual)
5007 desc_actual = j;
91447636 5008 }
2d21ac55
A
5009
5010 /*
5011 * We limit the actual number of descriptors we are willing to process
5012 * to a hard maximum of ACCESSX_MAX_DESCRIPTORS. If the number being
5013 * requested does not exceed this limit,
5014 */
5015 if (desc_actual > ACCESSX_MAX_DESCRIPTORS) {
91447636
A
5016 error = ENOMEM;
5017 goto out;
5018 }
2d21ac55 5019 MALLOC(result, errno_t *, desc_actual * sizeof(errno_t), M_TEMP, M_WAITOK);
91447636
A
5020 if (result == NULL) {
5021 error = ENOMEM;
5022 goto out;
5023 }
5024
5025 /*
2d21ac55
A
5026 * Do the work by iterating over the descriptor entries we know to
5027 * at least appear to contain valid data.
91447636
A
5028 */
5029 error = 0;
2d21ac55 5030 for (i = 0; i < desc_actual; i++) {
91447636 5031 /*
2d21ac55
A
5032 * If the ad_name_offset is 0, then we use the previous
5033 * results to make the check; otherwise, we are looking up
5034 * a new file name.
91447636
A
5035 */
5036 if (input[i].ad_name_offset != 0) {
5037 /* discard old vnodes */
5038 if (vp) {
5039 vnode_put(vp);
5040 vp = NULL;
5041 }
5042 if (dvp) {
5043 vnode_put(dvp);
5044 dvp = NULL;
5045 }
5046
2d21ac55
A
5047 /*
5048 * Scan forward in the descriptor list to see if we
5049 * need the parent vnode. We will need it if we are
5050 * deleting, since we must have rights to remove
5051 * entries in the parent directory, as well as the
5052 * rights to delete the object itself.
5053 */
91447636 5054 wantdelete = input[i].ad_flags & _DELETE_OK;
2d21ac55 5055 for (j = i + 1; (j < desc_actual) && (input[j].ad_name_offset == 0); j++)
91447636
A
5056 if (input[j].ad_flags & _DELETE_OK)
5057 wantdelete = 1;
5058
5059 niopts = FOLLOW | AUDITVNPATH1;
2d21ac55 5060
91447636
A
5061 /* need parent for vnode_authorize for deletion test */
5062 if (wantdelete)
5063 niopts |= WANTPARENT;
5064
5065 /* do the lookup */
6d2010ae
A
5066 NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, UIO_SYSSPACE,
5067 CAST_USER_ADDR_T(((const char *)input) + input[i].ad_name_offset),
5068 &context);
91447636
A
5069 error = namei(&nd);
5070 if (!error) {
5071 vp = nd.ni_vp;
5072 if (wantdelete)
5073 dvp = nd.ni_dvp;
5074 }
5075 nameidone(&nd);
5076 }
5077
5078 /*
5079 * Handle lookup errors.
5080 */
5081 switch(error) {
5082 case ENOENT:
5083 case EACCES:
5084 case EPERM:
5085 case ENOTDIR:
5086 result[i] = error;
5087 break;
5088 case 0:
5089 /* run this access check */
5090 result[i] = access1(vp, dvp, input[i].ad_flags, &context);
5091 break;
5092 default:
5093 /* fatal lookup error */
5094
5095 goto out;
5096 }
5097 }
5098
b0d623f7
A
5099 AUDIT_ARG(data, result, sizeof(errno_t), desc_actual);
5100
91447636 5101 /* copy out results */
2d21ac55 5102 error = copyout(result, uap->results, desc_actual * sizeof(errno_t));
91447636
A
5103
5104out:
2d21ac55 5105 if (input && input != stack_input)
91447636
A
5106 FREE(input, M_TEMP);
5107 if (result)
5108 FREE(result, M_TEMP);
5109 if (vp)
5110 vnode_put(vp);
5111 if (dvp)
5112 vnode_put(dvp);
0c530ab8
A
5113 if (IS_VALID_CRED(context.vc_ucred))
5114 kauth_cred_unref(&context.vc_ucred);
91447636 5115 return(error);
1c79356b
A
5116}
5117
2d21ac55
A
5118
5119/*
5120 * Returns: 0 Success
5121 * namei:EFAULT Bad address
5122 * namei:ENAMETOOLONG Filename too long
5123 * namei:ENOENT No such file or directory
5124 * namei:ELOOP Too many levels of symbolic links
5125 * namei:EBADF Bad file descriptor
5126 * namei:ENOTDIR Not a directory
5127 * namei:???
5128 * access1:
5129 */
fe8ab488
A
5130static int
5131faccessat_internal(vfs_context_t ctx, int fd, user_addr_t path, int amode,
5132 int flag, enum uio_seg segflg)
1c79356b 5133{
1c79356b
A
5134 int error;
5135 struct nameidata nd;
91447636
A
5136 int niopts;
5137 struct vfs_context context;
cf7d32b8
A
5138#if NAMEDRSRCFORK
5139 int is_namedstream = 0;
5140#endif
5141
91447636 5142 /*
fe8ab488
A
5143 * Unless the AT_EACCESS option is used, Access is defined as checking
5144 * against the process' real identity, even if operations are checking
5145 * the effective identity. So we need to tweak the credential
5146 * in the context for that case.
91447636 5147 */
fe8ab488
A
5148 if (!(flag & AT_EACCESS))
5149 context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
5150 else
5151 context.vc_ucred = ctx->vc_ucred;
5152 context.vc_thread = ctx->vc_thread;
5153
91447636
A
5154
5155 niopts = FOLLOW | AUDITVNPATH1;
5156 /* need parent for vnode_authorize for deletion test */
fe8ab488 5157 if (amode & _DELETE_OK)
91447636 5158 niopts |= WANTPARENT;
fe8ab488
A
5159 NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, segflg,
5160 path, &context);
2d21ac55
A
5161
5162#if NAMEDRSRCFORK
5163 /* access(F_OK) calls are allowed for resource forks. */
fe8ab488 5164 if (amode == F_OK)
2d21ac55
A
5165 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
5166#endif
fe8ab488 5167 error = nameiat(&nd, fd);
91447636
A
5168 if (error)
5169 goto out;
5170
cf7d32b8 5171#if NAMEDRSRCFORK
b0d623f7
A
5172 /* Grab reference on the shadow stream file vnode to
5173 * force an inactive on release which will mark it
5174 * for recycle.
cf7d32b8
A
5175 */
5176 if (vnode_isnamedstream(nd.ni_vp) &&
b0d623f7
A
5177 (nd.ni_vp->v_parent != NULLVP) &&
5178 vnode_isshadow(nd.ni_vp)) {
cf7d32b8
A
5179 is_namedstream = 1;
5180 vnode_ref(nd.ni_vp);
5181 }
5182#endif
5183
fe8ab488 5184 error = access1(nd.ni_vp, nd.ni_dvp, amode, &context);
b0d623f7 5185
cf7d32b8
A
5186#if NAMEDRSRCFORK
5187 if (is_namedstream) {
5188 vnode_rele(nd.ni_vp);
5189 }
5190#endif
5191
91447636 5192 vnode_put(nd.ni_vp);
fe8ab488 5193 if (amode & _DELETE_OK)
91447636
A
5194 vnode_put(nd.ni_dvp);
5195 nameidone(&nd);
5196
5197out:
fe8ab488
A
5198 if (!(flag & AT_EACCESS))
5199 kauth_cred_unref(&context.vc_ucred);
5200 return (error);
5201}
5202
5203int
5204access(__unused proc_t p, struct access_args *uap, __unused int32_t *retval)
5205{
5206 return (faccessat_internal(vfs_context_current(), AT_FDCWD,
5207 uap->path, uap->flags, 0, UIO_USERSPACE));
91447636
A
5208}
5209
fe8ab488
A
5210int
5211faccessat(__unused proc_t p, struct faccessat_args *uap,
5212 __unused int32_t *retval)
5213{
5214 if (uap->flag & ~AT_EACCESS)
5215 return (EINVAL);
5216
5217 return (faccessat_internal(vfs_context_current(), uap->fd,
5218 uap->path, uap->amode, uap->flag, UIO_USERSPACE));
5219}
91447636 5220
2d21ac55
A
5221/*
5222 * Returns: 0 Success
5223 * EFAULT
5224 * copyout:EFAULT
5225 * namei:???
5226 * vn_stat:???
5227 */
91447636 5228static int
fe8ab488
A
5229fstatat_internal(vfs_context_t ctx, user_addr_t path, user_addr_t ub,
5230 user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64,
5231 enum uio_seg segflg, int fd, int flag)
91447636 5232{
fe8ab488
A
5233 struct nameidata nd;
5234 int follow;
b0d623f7
A
5235 union {
5236 struct stat sb;
5237 struct stat64 sb64;
5238 } source;
5239 union {
5240 struct user64_stat user64_sb;
5241 struct user32_stat user32_sb;
5242 struct user64_stat64 user64_sb64;
5243 struct user32_stat64 user32_sb64;
5244 } dest;
91447636
A
5245 caddr_t sbp;
5246 int error, my_size;
5247 kauth_filesec_t fsec;
5248 size_t xsecurity_bufsize;
2d21ac55 5249 void * statptr;
1c79356b 5250
fe8ab488
A
5251 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
5252 NDINIT(&nd, LOOKUP, OP_GETATTR, follow | AUDITVNPATH1,
5253 segflg, path, ctx);
5254
2d21ac55 5255#if NAMEDRSRCFORK
cf7d32b8 5256 int is_namedstream = 0;
2d21ac55 5257 /* stat calls are allowed for resource forks. */
fe8ab488 5258 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
2d21ac55 5259#endif
fe8ab488 5260 error = nameiat(&nd, fd);
91447636 5261 if (error)
1c79356b 5262 return (error);
91447636 5263 fsec = KAUTH_FILESEC_NONE;
b0d623f7
A
5264
5265 statptr = (void *)&source;
cf7d32b8
A
5266
5267#if NAMEDRSRCFORK
b0d623f7
A
5268 /* Grab reference on the shadow stream file vnode to
5269 * force an inactive on release which will mark it
5270 * for recycle.
cf7d32b8 5271 */
fe8ab488
A
5272 if (vnode_isnamedstream(nd.ni_vp) &&
5273 (nd.ni_vp->v_parent != NULLVP) &&
5274 vnode_isshadow(nd.ni_vp)) {
cf7d32b8 5275 is_namedstream = 1;
fe8ab488 5276 vnode_ref(nd.ni_vp);
cf7d32b8
A
5277 }
5278#endif
5279
fe8ab488 5280 error = vn_stat(nd.ni_vp, statptr, (xsecurity != USER_ADDR_NULL ? &fsec : NULL), isstat64, ctx);
2d21ac55
A
5281
5282#if NAMEDRSRCFORK
cf7d32b8 5283 if (is_namedstream) {
fe8ab488 5284 vnode_rele(nd.ni_vp);
2d21ac55
A
5285 }
5286#endif
fe8ab488
A
5287 vnode_put(nd.ni_vp);
5288 nameidone(&nd);
91447636 5289
1c79356b
A
5290 if (error)
5291 return (error);
91447636 5292 /* Zap spare fields */
2d21ac55 5293 if (isstat64 != 0) {
b0d623f7
A
5294 source.sb64.st_lspare = 0;
5295 source.sb64.st_qspare[0] = 0LL;
5296 source.sb64.st_qspare[1] = 0LL;
2d21ac55 5297 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
b0d623f7
A
5298 munge_user64_stat64(&source.sb64, &dest.user64_sb64);
5299 my_size = sizeof(dest.user64_sb64);
5300 sbp = (caddr_t)&dest.user64_sb64;
2d21ac55 5301 } else {
b0d623f7
A
5302 munge_user32_stat64(&source.sb64, &dest.user32_sb64);
5303 my_size = sizeof(dest.user32_sb64);
5304 sbp = (caddr_t)&dest.user32_sb64;
2d21ac55
A
5305 }
5306 /*
5307 * Check if we raced (post lookup) against the last unlink of a file.
5308 */
b0d623f7
A
5309 if ((source.sb64.st_nlink == 0) && S_ISREG(source.sb64.st_mode)) {
5310 source.sb64.st_nlink = 1;
2d21ac55
A
5311 }
5312 } else {
b0d623f7
A
5313 source.sb.st_lspare = 0;
5314 source.sb.st_qspare[0] = 0LL;
5315 source.sb.st_qspare[1] = 0LL;
2d21ac55 5316 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
b0d623f7
A
5317 munge_user64_stat(&source.sb, &dest.user64_sb);
5318 my_size = sizeof(dest.user64_sb);
5319 sbp = (caddr_t)&dest.user64_sb;
2d21ac55 5320 } else {
b0d623f7
A
5321 munge_user32_stat(&source.sb, &dest.user32_sb);
5322 my_size = sizeof(dest.user32_sb);
5323 sbp = (caddr_t)&dest.user32_sb;
2d21ac55
A
5324 }
5325
5326 /*
5327 * Check if we raced (post lookup) against the last unlink of a file.
5328 */
b0d623f7
A
5329 if ((source.sb.st_nlink == 0) && S_ISREG(source.sb.st_mode)) {
5330 source.sb.st_nlink = 1;
2d21ac55 5331 }
91447636
A
5332 }
5333 if ((error = copyout(sbp, ub, my_size)) != 0)
5334 goto out;
5335
5336 /* caller wants extended security information? */
5337 if (xsecurity != USER_ADDR_NULL) {
5338
5339 /* did we get any? */
5340 if (fsec == KAUTH_FILESEC_NONE) {
5341 if (susize(xsecurity_size, 0) != 0) {
5342 error = EFAULT;
5343 goto out;
5344 }
5345 } else {
5346 /* find the user buffer size */
5347 xsecurity_bufsize = fusize(xsecurity_size);
5348
5349 /* copy out the actual data size */
5350 if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
5351 error = EFAULT;
5352 goto out;
5353 }
5354
5355 /* if the caller supplied enough room, copy out to it */
5356 if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec))
5357 error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
5358 }
5359 }
5360out:
5361 if (fsec != KAUTH_FILESEC_NONE)
5362 kauth_filesec_free(fsec);
1c79356b
A
5363 return (error);
5364}
5365
b0d623f7
A
5366/*
5367 * stat_extended: Get file status; with extended security (ACL).
5368 *
5369 * Parameters: p (ignored)
5370 * uap User argument descriptor (see below)
5371 * retval (ignored)
5372 *
5373 * Indirect: uap->path Path of file to get status from
5374 * uap->ub User buffer (holds file status info)
5375 * uap->xsecurity ACL to get (extended security)
5376 * uap->xsecurity_size Size of ACL
5377 *
5378 * Returns: 0 Success
5379 * !0 errno value
5380 *
5381 */
2d21ac55 5382int
fe8ab488
A
5383stat_extended(__unused proc_t p, struct stat_extended_args *uap,
5384 __unused int32_t *retval)
2d21ac55 5385{
fe8ab488
A
5386 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5387 uap->xsecurity, uap->xsecurity_size, 0, UIO_USERSPACE, AT_FDCWD,
5388 0));
1c79356b
A
5389}
5390
2d21ac55
A
5391/*
5392 * Returns: 0 Success
fe8ab488 5393 * fstatat_internal:??? [see fstatat_internal() in this file]
2d21ac55 5394 */
91447636 5395int
b0d623f7 5396stat(__unused proc_t p, struct stat_args *uap, __unused int32_t *retval)
1c79356b 5397{
fe8ab488
A
5398 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5399 0, 0, 0, UIO_USERSPACE, AT_FDCWD, 0));
91447636 5400}
1c79356b 5401
91447636 5402int
b0d623f7 5403stat64(__unused proc_t p, struct stat64_args *uap, __unused int32_t *retval)
91447636 5404{
fe8ab488
A
5405 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5406 0, 0, 1, UIO_USERSPACE, AT_FDCWD, 0));
1c79356b 5407}
1c79356b 5408
b0d623f7
A
5409/*
5410 * stat64_extended: Get file status; can handle large inode numbers; with extended security (ACL).
5411 *
5412 * Parameters: p (ignored)
5413 * uap User argument descriptor (see below)
5414 * retval (ignored)
5415 *
5416 * Indirect: uap->path Path of file to get status from
5417 * uap->ub User buffer (holds file status info)
5418 * uap->xsecurity ACL to get (extended security)
5419 * uap->xsecurity_size Size of ACL
5420 *
5421 * Returns: 0 Success
5422 * !0 errno value
5423 *
5424 */
2d21ac55 5425int
b0d623f7 5426stat64_extended(__unused proc_t p, struct stat64_extended_args *uap, __unused int32_t *retval)
2d21ac55 5427{
fe8ab488
A
5428 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5429 uap->xsecurity, uap->xsecurity_size, 1, UIO_USERSPACE, AT_FDCWD,
5430 0));
2d21ac55 5431}
91447636 5432
b0d623f7
A
5433/*
5434 * lstat_extended: Get file status; does not follow links; with extended security (ACL).
5435 *
5436 * Parameters: p (ignored)
5437 * uap User argument descriptor (see below)
5438 * retval (ignored)
5439 *
5440 * Indirect: uap->path Path of file to get status from
5441 * uap->ub User buffer (holds file status info)
5442 * uap->xsecurity ACL to get (extended security)
5443 * uap->xsecurity_size Size of ACL
5444 *
5445 * Returns: 0 Success
5446 * !0 errno value
5447 *
5448 */
2d21ac55 5449int
b0d623f7 5450lstat_extended(__unused proc_t p, struct lstat_extended_args *uap, __unused int32_t *retval)
2d21ac55 5451{
fe8ab488
A
5452 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5453 uap->xsecurity, uap->xsecurity_size, 0, UIO_USERSPACE, AT_FDCWD,
5454 AT_SYMLINK_NOFOLLOW));
91447636
A
5455}
5456
fe8ab488
A
5457/*
5458 * Get file status; this version does not follow links.
5459 */
91447636 5460int
b0d623f7 5461lstat(__unused proc_t p, struct lstat_args *uap, __unused int32_t *retval)
91447636 5462{
fe8ab488
A
5463 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5464 0, 0, 0, UIO_USERSPACE, AT_FDCWD, AT_SYMLINK_NOFOLLOW));
2d21ac55 5465}
b0d623f7 5466
2d21ac55 5467int
b0d623f7 5468lstat64(__unused proc_t p, struct lstat64_args *uap, __unused int32_t *retval)
2d21ac55 5469{
fe8ab488
A
5470 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5471 0, 0, 1, UIO_USERSPACE, AT_FDCWD, AT_SYMLINK_NOFOLLOW));
91447636
A
5472}
5473
b0d623f7
A
5474/*
5475 * lstat64_extended: Get file status; can handle large inode numbers; does not
5476 * follow links; with extended security (ACL).
5477 *
5478 * Parameters: p (ignored)
5479 * uap User argument descriptor (see below)
5480 * retval (ignored)
5481 *
5482 * Indirect: uap->path Path of file to get status from
5483 * uap->ub User buffer (holds file status info)
5484 * uap->xsecurity ACL to get (extended security)
5485 * uap->xsecurity_size Size of ACL
5486 *
5487 * Returns: 0 Success
5488 * !0 errno value
5489 *
5490 */
91447636 5491int
b0d623f7 5492lstat64_extended(__unused proc_t p, struct lstat64_extended_args *uap, __unused int32_t *retval)
91447636 5493{
fe8ab488
A
5494 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5495 uap->xsecurity, uap->xsecurity_size, 1, UIO_USERSPACE, AT_FDCWD,
5496 AT_SYMLINK_NOFOLLOW));
5497}
5498
5499int
5500fstatat(__unused proc_t p, struct fstatat_args *uap, __unused int32_t *retval)
5501{
5502 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
5503 return (EINVAL);
5504
5505 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5506 0, 0, 0, UIO_USERSPACE, uap->fd, uap->flag));
5507}
5508
5509int
5510fstatat64(__unused proc_t p, struct fstatat64_args *uap,
5511 __unused int32_t *retval)
5512{
5513 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
5514 return (EINVAL);
5515
5516 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5517 0, 0, 1, UIO_USERSPACE, uap->fd, uap->flag));
91447636
A
5518}
5519
1c79356b 5520/*
91447636 5521 * Get configurable pathname variables.
2d21ac55
A
5522 *
5523 * Returns: 0 Success
5524 * namei:???
5525 * vn_pathconf:???
5526 *
5527 * Notes: Global implementation constants are intended to be
5528 * implemented in this function directly; all other constants
5529 * are per-FS implementation, and therefore must be handled in
5530 * each respective FS, instead.
5531 *
5532 * XXX We implement some things globally right now that should actually be
5533 * XXX per-FS; we will need to deal with this at some point.
1c79356b 5534 */
1c79356b
A
5535/* ARGSUSED */
5536int
b0d623f7 5537pathconf(__unused proc_t p, struct pathconf_args *uap, int32_t *retval)
1c79356b 5538{
1c79356b
A
5539 int error;
5540 struct nameidata nd;
2d21ac55 5541 vfs_context_t ctx = vfs_context_current();
91447636 5542
6d2010ae 5543 NDINIT(&nd, LOOKUP, OP_PATHCONF, FOLLOW | AUDITVNPATH1,
2d21ac55 5544 UIO_USERSPACE, uap->path, ctx);
55e303ae
A
5545 error = namei(&nd);
5546 if (error)
1c79356b 5547 return (error);
1c79356b 5548
2d21ac55 5549 error = vn_pathconf(nd.ni_vp, uap->name, retval, ctx);
1c79356b 5550
91447636
A
5551 vnode_put(nd.ni_vp);
5552 nameidone(&nd);
1c79356b
A
5553 return (error);
5554}
5555
5556/*
5557 * Return target name of a symbolic link.
5558 */
1c79356b 5559/* ARGSUSED */
fe8ab488
A
5560static int
5561readlinkat_internal(vfs_context_t ctx, int fd, user_addr_t path,
5562 enum uio_seg seg, user_addr_t buf, size_t bufsize, enum uio_seg bufseg,
5563 int *retval)
1c79356b 5564{
2d21ac55 5565 vnode_t vp;
91447636 5566 uio_t auio;
1c79356b
A
5567 int error;
5568 struct nameidata nd;
91447636
A
5569 char uio_buf[ UIO_SIZEOF(1) ];
5570
fe8ab488
A
5571 NDINIT(&nd, LOOKUP, OP_READLINK, NOFOLLOW | AUDITVNPATH1,
5572 seg, path, ctx);
5573
5574 error = nameiat(&nd, fd);
55e303ae 5575 if (error)
1c79356b
A
5576 return (error);
5577 vp = nd.ni_vp;
91447636
A
5578
5579 nameidone(&nd);
5580
fe8ab488
A
5581 auio = uio_createwithbuffer(1, 0, bufseg, UIO_READ,
5582 &uio_buf[0], sizeof(uio_buf));
5583 uio_addiov(auio, buf, bufsize);
5584 if (vp->v_type != VLNK) {
1c79356b 5585 error = EINVAL;
fe8ab488 5586 } else {
2d21ac55 5587#if CONFIG_MACF
fe8ab488 5588 error = mac_vnode_check_readlink(ctx, vp);
2d21ac55
A
5589#endif
5590 if (error == 0)
fe8ab488
A
5591 error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA,
5592 ctx);
91447636 5593 if (error == 0)
2d21ac55 5594 error = VNOP_READLINK(vp, auio, ctx);
91447636
A
5595 }
5596 vnode_put(vp);
b0d623f7 5597
fe8ab488 5598 *retval = bufsize - (int)uio_resid(auio);
1c79356b
A
5599 return (error);
5600}
5601
fe8ab488
A
5602int
5603readlink(proc_t p, struct readlink_args *uap, int32_t *retval)
5604{
5605 enum uio_seg procseg;
5606
5607 procseg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
5608 return (readlinkat_internal(vfs_context_current(), AT_FDCWD,
5609 CAST_USER_ADDR_T(uap->path), procseg, CAST_USER_ADDR_T(uap->buf),
5610 uap->count, procseg, retval));
5611}
5612
5613int
5614readlinkat(proc_t p, struct readlinkat_args *uap, int32_t *retval)
5615{
5616 enum uio_seg procseg;
5617
5618 procseg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
5619 return (readlinkat_internal(vfs_context_current(), uap->fd, uap->path,
5620 procseg, uap->buf, uap->bufsize, procseg, retval));
5621}
5622
5623/*
5624 * Change file flags.
91447636
A
5625 */
5626static int
5627chflags1(vnode_t vp, int flags, vfs_context_t ctx)
5628{
5629 struct vnode_attr va;
5630 kauth_action_t action;
5631 int error;
5632
5633 VATTR_INIT(&va);
5634 VATTR_SET(&va, va_flags, flags);
5635
2d21ac55
A
5636#if CONFIG_MACF
5637 error = mac_vnode_check_setflags(ctx, vp, flags);
5638 if (error)
5639 goto out;
5640#endif
5641
91447636
A
5642 /* request authorisation, disregard immutability */
5643 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
5644 goto out;
5645 /*
5646 * Request that the auth layer disregard those file flags it's allowed to when
5647 * authorizing this operation; we need to do this in order to be able to
5648 * clear immutable flags.
5649 */
5650 if (action && ((error = vnode_authorize(vp, NULL, action | KAUTH_VNODE_NOIMMUTABLE, ctx)) != 0))
5651 goto out;
5652 error = vnode_setattr(vp, &va, ctx);
5653
2d21ac55
A
5654 if ((error == 0) && !VATTR_IS_SUPPORTED(&va, va_flags)) {
5655 error = ENOTSUP;
5656 }
91447636
A
5657out:
5658 vnode_put(vp);
5659 return(error);
5660}
5661
1c79356b
A
5662/*
5663 * Change flags of a file given a path name.
5664 */
1c79356b
A
5665/* ARGSUSED */
5666int
b0d623f7 5667chflags(__unused proc_t p, struct chflags_args *uap, __unused int32_t *retval)
1c79356b 5668{
2d21ac55
A
5669 vnode_t vp;
5670 vfs_context_t ctx = vfs_context_current();
1c79356b
A
5671 int error;
5672 struct nameidata nd;
5673
55e303ae 5674 AUDIT_ARG(fflags, uap->flags);
6d2010ae 5675 NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
2d21ac55 5676 UIO_USERSPACE, uap->path, ctx);
55e303ae
A
5677 error = namei(&nd);
5678 if (error)
1c79356b
A
5679 return (error);
5680 vp = nd.ni_vp;
91447636
A
5681 nameidone(&nd);
5682
2d21ac55 5683 error = chflags1(vp, uap->flags, ctx);
91447636
A
5684
5685 return(error);
1c79356b
A
5686}
5687
5688/*
5689 * Change flags of a file given a file descriptor.
5690 */
1c79356b
A
5691/* ARGSUSED */
5692int
b0d623f7 5693fchflags(__unused proc_t p, struct fchflags_args *uap, __unused int32_t *retval)
1c79356b 5694{
2d21ac55 5695 vnode_t vp;
1c79356b
A
5696 int error;
5697
55e303ae
A
5698 AUDIT_ARG(fd, uap->fd);
5699 AUDIT_ARG(fflags, uap->flags);
91447636 5700 if ( (error = file_vnode(uap->fd, &vp)) )
1c79356b 5701 return (error);
55e303ae 5702
91447636
A
5703 if ((error = vnode_getwithref(vp))) {
5704 file_drop(uap->fd);
5705 return(error);
5706 }
e5568f75
A
5707
5708 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5709
2d21ac55 5710 error = chflags1(vp, uap->flags, vfs_context_current());
91447636
A
5711
5712 file_drop(uap->fd);
5713 return (error);
5714}
5715
5716/*
5717 * Change security information on a filesystem object.
2d21ac55
A
5718 *
5719 * Returns: 0 Success
5720 * EPERM Operation not permitted
5721 * vnode_authattr:??? [anything vnode_authattr can return]
5722 * vnode_authorize:??? [anything vnode_authorize can return]
5723 * vnode_setattr:??? [anything vnode_setattr can return]
5724 *
5725 * Notes: If vnode_authattr or vnode_authorize return EACCES, it will be
5726 * translated to EPERM before being returned.
91447636
A
5727 */
5728static int
fe8ab488 5729chmod_vnode(vfs_context_t ctx, vnode_t vp, struct vnode_attr *vap)
91447636
A
5730{
5731 kauth_action_t action;
5732 int error;
5733
b0d623f7
A
5734 AUDIT_ARG(mode, vap->va_mode);
5735 /* XXX audit new args */
91447636 5736
2d21ac55
A
5737#if NAMEDSTREAMS
5738 /* chmod calls are not allowed for resource forks. */
5739 if (vp->v_flag & VISNAMEDSTREAM) {
5740 return (EPERM);
5741 }
5742#endif
5743
5744#if CONFIG_MACF
316670eb
A
5745 if (VATTR_IS_ACTIVE(vap, va_mode) &&
5746 (error = mac_vnode_check_setmode(ctx, vp, (mode_t)vap->va_mode)) != 0)
2d21ac55
A
5747 return (error);
5748#endif
5749
91447636
A
5750 /* make sure that the caller is allowed to set this security information */
5751 if (((error = vnode_authattr(vp, vap, &action, ctx)) != 0) ||
5752 ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
5753 if (error == EACCES)
5754 error = EPERM;
5755 return(error);
5756 }
5757
5758 error = vnode_setattr(vp, vap, ctx);
5759
1c79356b
A
5760 return (error);
5761}
5762
91447636 5763
1c79356b 5764/*
b0d623f7 5765 * Change mode of a file given a path name.
2d21ac55
A
5766 *
5767 * Returns: 0 Success
5768 * namei:??? [anything namei can return]
fe8ab488 5769 * chmod_vnode:??? [anything chmod_vnode can return]
1c79356b 5770 */
91447636 5771static int
fe8ab488
A
5772chmodat(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap,
5773 int fd, int flag, enum uio_seg segflg)
91447636
A
5774{
5775 struct nameidata nd;
fe8ab488 5776 int follow, error;
91447636 5777
fe8ab488
A
5778 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
5779 NDINIT(&nd, LOOKUP, OP_SETATTR, follow | AUDITVNPATH1,
5780 segflg, path, ctx);
5781 if ((error = nameiat(&nd, fd)))
91447636 5782 return (error);
fe8ab488 5783 error = chmod_vnode(ctx, nd.ni_vp, vap);
91447636
A
5784 vnode_put(nd.ni_vp);
5785 nameidone(&nd);
5786 return(error);
5787}
5788
0c530ab8 5789/*
b0d623f7
A
5790 * chmod_extended: Change the mode of a file given a path name; with extended
5791 * argument list (including extended security (ACL)).
0c530ab8
A
5792 *
5793 * Parameters: p Process requesting the open
5794 * uap User argument descriptor (see below)
5795 * retval (ignored)
5796 *
5797 * Indirect: uap->path Path to object (same as 'chmod')
5798 * uap->uid UID to set
5799 * uap->gid GID to set
5800 * uap->mode File mode to set (same as 'chmod')
5801 * uap->xsecurity ACL to set (or delete)
5802 *
5803 * Returns: 0 Success
5804 * !0 errno value
5805 *
5806 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
5807 *
5808 * XXX: We should enummerate the possible errno values here, and where
5809 * in the code they originated.
5810 */
1c79356b 5811int
b0d623f7 5812chmod_extended(__unused proc_t p, struct chmod_extended_args *uap, __unused int32_t *retval)
1c79356b 5813{
1c79356b 5814 int error;
91447636
A
5815 struct vnode_attr va;
5816 kauth_filesec_t xsecdst;
5817
b0d623f7
A
5818 AUDIT_ARG(owner, uap->uid, uap->gid);
5819
91447636
A
5820 VATTR_INIT(&va);
5821 if (uap->mode != -1)
5822 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
5823 if (uap->uid != KAUTH_UID_NONE)
5824 VATTR_SET(&va, va_uid, uap->uid);
5825 if (uap->gid != KAUTH_GID_NONE)
5826 VATTR_SET(&va, va_gid, uap->gid);
5827
5828 xsecdst = NULL;
5829 switch(uap->xsecurity) {
5830 /* explicit remove request */
5831 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
5832 VATTR_SET(&va, va_acl, NULL);
5833 break;
5834 /* not being set */
5835 case USER_ADDR_NULL:
5836 break;
5837 default:
5838 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
5839 return(error);
5840 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
5841 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va.va_acl->acl_entrycount);
5842 }
1c79356b 5843
fe8ab488
A
5844 error = chmodat(vfs_context_current(), uap->path, &va, AT_FDCWD, 0,
5845 UIO_USERSPACE);
55e303ae 5846
91447636
A
5847 if (xsecdst != NULL)
5848 kauth_filesec_free(xsecdst);
5849 return(error);
5850}
4a249263 5851
2d21ac55
A
5852/*
5853 * Returns: 0 Success
fe8ab488 5854 * chmodat:??? [anything chmodat can return]
2d21ac55 5855 */
fe8ab488
A
5856static int
5857fchmodat_internal(vfs_context_t ctx, user_addr_t path, int mode, int fd,
5858 int flag, enum uio_seg segflg)
91447636 5859{
91447636
A
5860 struct vnode_attr va;
5861
5862 VATTR_INIT(&va);
fe8ab488
A
5863 VATTR_SET(&va, va_mode, mode & ALLPERMS);
5864
5865 return (chmodat(ctx, path, &va, fd, flag, segflg));
5866}
5867
5868int
5869chmod(__unused proc_t p, struct chmod_args *uap, __unused int32_t *retval)
5870{
5871 return (fchmodat_internal(vfs_context_current(), uap->path, uap->mode,
5872 AT_FDCWD, 0, UIO_USERSPACE));
5873}
91447636 5874
fe8ab488
A
5875int
5876fchmodat(__unused proc_t p, struct fchmodat_args *uap, __unused int32_t *retval)
5877{
5878 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
5879 return (EINVAL);
5880
5881 return (fchmodat_internal(vfs_context_current(), uap->path, uap->mode,
5882 uap->fd, uap->flag, UIO_USERSPACE));
1c79356b
A
5883}
5884
5885/*
5886 * Change mode of a file given a file descriptor.
5887 */
91447636 5888static int
2d21ac55 5889fchmod1(__unused proc_t p, int fd, struct vnode_attr *vap)
1c79356b 5890{
2d21ac55 5891 vnode_t vp;
1c79356b 5892 int error;
55e303ae 5893
91447636 5894 AUDIT_ARG(fd, fd);
55e303ae 5895
91447636
A
5896 if ((error = file_vnode(fd, &vp)) != 0)
5897 return (error);
5898 if ((error = vnode_getwithref(vp)) != 0) {
5899 file_drop(fd);
5900 return(error);
5901 }
55e303ae
A
5902 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5903
fe8ab488 5904 error = chmod_vnode(vfs_context_current(), vp, vap);
91447636
A
5905 (void)vnode_put(vp);
5906 file_drop(fd);
55e303ae 5907
1c79356b
A
5908 return (error);
5909}
5910
b0d623f7
A
5911/*
5912 * fchmod_extended: Change mode of a file given a file descriptor; with
5913 * extended argument list (including extended security (ACL)).
5914 *
5915 * Parameters: p Process requesting to change file mode
5916 * uap User argument descriptor (see below)
5917 * retval (ignored)
5918 *
5919 * Indirect: uap->mode File mode to set (same as 'chmod')
5920 * uap->uid UID to set
5921 * uap->gid GID to set
5922 * uap->xsecurity ACL to set (or delete)
5923 * uap->fd File descriptor of file to change mode
5924 *
5925 * Returns: 0 Success
5926 * !0 errno value
5927 *
5928 */
91447636 5929int
b0d623f7 5930fchmod_extended(proc_t p, struct fchmod_extended_args *uap, __unused int32_t *retval)
91447636
A
5931{
5932 int error;
5933 struct vnode_attr va;
5934 kauth_filesec_t xsecdst;
5935
b0d623f7
A
5936 AUDIT_ARG(owner, uap->uid, uap->gid);
5937
91447636
A
5938 VATTR_INIT(&va);
5939 if (uap->mode != -1)
5940 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
5941 if (uap->uid != KAUTH_UID_NONE)
5942 VATTR_SET(&va, va_uid, uap->uid);
5943 if (uap->gid != KAUTH_GID_NONE)
5944 VATTR_SET(&va, va_gid, uap->gid);
5945
5946 xsecdst = NULL;
5947 switch(uap->xsecurity) {
5948 case USER_ADDR_NULL:
5949 VATTR_SET(&va, va_acl, NULL);
5950 break;
39236c6e
A
5951 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
5952 VATTR_SET(&va, va_acl, NULL);
5953 break;
5954 /* not being set */
91447636
A
5955 case CAST_USER_ADDR_T(-1):
5956 break;
5957 default:
5958 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
5959 return(error);
5960 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
5961 }
5962
5963 error = fchmod1(p, uap->fd, &va);
5964
5965
5966 switch(uap->xsecurity) {
5967 case USER_ADDR_NULL:
5968 case CAST_USER_ADDR_T(-1):
5969 break;
5970 default:
5971 if (xsecdst != NULL)
5972 kauth_filesec_free(xsecdst);
5973 }
5974 return(error);
5975}
5976
5977int
b0d623f7 5978fchmod(proc_t p, struct fchmod_args *uap, __unused int32_t *retval)
91447636
A
5979{
5980 struct vnode_attr va;
5981
5982 VATTR_INIT(&va);
5983 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
5984
5985 return(fchmod1(p, uap->fd, &va));
5986}
5987
5988
1c79356b
A
5989/*
5990 * Set ownership given a path name.
5991 */
1c79356b 5992/* ARGSUSED */
91447636 5993static int
fe8ab488
A
5994fchownat_internal(vfs_context_t ctx, int fd, user_addr_t path, uid_t uid,
5995 gid_t gid, int flag, enum uio_seg segflg)
1c79356b 5996{
2d21ac55 5997 vnode_t vp;
91447636 5998 struct vnode_attr va;
1c79356b
A
5999 int error;
6000 struct nameidata nd;
fe8ab488 6001 int follow;
91447636 6002 kauth_action_t action;
1c79356b 6003
fe8ab488 6004 AUDIT_ARG(owner, uid, gid);
55e303ae 6005
fe8ab488
A
6006 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
6007 NDINIT(&nd, LOOKUP, OP_SETATTR, follow | AUDITVNPATH1, segflg,
6008 path, ctx);
6009 error = nameiat(&nd, fd);
55e303ae 6010 if (error)
1c79356b
A
6011 return (error);
6012 vp = nd.ni_vp;
6013
91447636
A
6014 nameidone(&nd);
6015
91447636 6016 VATTR_INIT(&va);
fe8ab488
A
6017 if (uid != (uid_t)VNOVAL)
6018 VATTR_SET(&va, va_uid, uid);
6019 if (gid != (gid_t)VNOVAL)
6020 VATTR_SET(&va, va_gid, gid);
91447636 6021
2d21ac55 6022#if CONFIG_MACF
fe8ab488 6023 error = mac_vnode_check_setowner(ctx, vp, uid, gid);
2d21ac55
A
6024 if (error)
6025 goto out;
6026#endif
6027
91447636
A
6028 /* preflight and authorize attribute changes */
6029 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
6030 goto out;
6031 if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
6032 goto out;
6033 error = vnode_setattr(vp, &va, ctx);
6034
6035out:
6036 /*
6037 * EACCES is only allowed from namei(); permissions failure should
6038 * return EPERM, so we need to translate the error code.
6039 */
6040 if (error == EACCES)
6041 error = EPERM;
fe8ab488 6042
91447636 6043 vnode_put(vp);
1c79356b
A
6044 return (error);
6045}
6046
91447636 6047int
fe8ab488 6048chown(__unused proc_t p, struct chown_args *uap, __unused int32_t *retval)
91447636 6049{
fe8ab488
A
6050 return (fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path,
6051 uap->uid, uap->gid, 0, UIO_USERSPACE));
91447636
A
6052}
6053
6054int
fe8ab488 6055lchown(__unused proc_t p, struct lchown_args *uap, __unused int32_t *retval)
91447636 6056{
fe8ab488
A
6057 return (fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path,
6058 uap->owner, uap->group, AT_SYMLINK_NOFOLLOW, UIO_USERSPACE));
6059}
6060
6061int
6062fchownat(__unused proc_t p, struct fchownat_args *uap, __unused int32_t *retval)
6063{
6064 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
6065 return (EINVAL);
6066
6067 return (fchownat_internal(vfs_context_current(), uap->fd, uap->path,
6068 uap->uid, uap->gid, uap->flag, UIO_USERSPACE));
91447636
A
6069}
6070
1c79356b
A
6071/*
6072 * Set ownership given a file descriptor.
6073 */
1c79356b
A
6074/* ARGSUSED */
6075int
b0d623f7 6076fchown(__unused proc_t p, struct fchown_args *uap, __unused int32_t *retval)
1c79356b 6077{
91447636 6078 struct vnode_attr va;
2d21ac55
A
6079 vfs_context_t ctx = vfs_context_current();
6080 vnode_t vp;
1c79356b 6081 int error;
91447636 6082 kauth_action_t action;
1c79356b 6083
55e303ae
A
6084 AUDIT_ARG(owner, uap->uid, uap->gid);
6085 AUDIT_ARG(fd, uap->fd);
6086
91447636 6087 if ( (error = file_vnode(uap->fd, &vp)) )
1c79356b 6088 return (error);
55e303ae 6089
91447636
A
6090 if ( (error = vnode_getwithref(vp)) ) {
6091 file_drop(uap->fd);
6092 return(error);
6093 }
55e303ae
A
6094 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6095
91447636
A
6096 VATTR_INIT(&va);
6097 if (uap->uid != VNOVAL)
6098 VATTR_SET(&va, va_uid, uap->uid);
6099 if (uap->gid != VNOVAL)
6100 VATTR_SET(&va, va_gid, uap->gid);
6101
2d21ac55
A
6102#if NAMEDSTREAMS
6103 /* chown calls are not allowed for resource forks. */
6104 if (vp->v_flag & VISNAMEDSTREAM) {
6105 error = EPERM;
6106 goto out;
6107 }
6108#endif
6109
6110#if CONFIG_MACF
6111 error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
6112 if (error)
6113 goto out;
6114#endif
91447636
A
6115
6116 /* preflight and authorize attribute changes */
2d21ac55 6117 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
91447636 6118 goto out;
2d21ac55 6119 if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
91447636
A
6120 if (error == EACCES)
6121 error = EPERM;
6122 goto out;
6123 }
2d21ac55 6124 error = vnode_setattr(vp, &va, ctx);
4a249263 6125
91447636
A
6126out:
6127 (void)vnode_put(vp);
6128 file_drop(uap->fd);
1c79356b
A
6129 return (error);
6130}
6131
9bccf70c 6132static int
2d21ac55 6133getutimes(user_addr_t usrtvp, struct timespec *tsp)
9bccf70c 6134{
9bccf70c
A
6135 int error;
6136
91447636
A
6137 if (usrtvp == USER_ADDR_NULL) {
6138 struct timeval old_tv;
6139 /* XXX Y2038 bug because of microtime argument */
6140 microtime(&old_tv);
6141 TIMEVAL_TO_TIMESPEC(&old_tv, &tsp[0]);
9bccf70c
A
6142 tsp[1] = tsp[0];
6143 } else {
91447636 6144 if (IS_64BIT_PROCESS(current_proc())) {
b0d623f7 6145 struct user64_timeval tv[2];
91447636 6146 error = copyin(usrtvp, (void *)tv, sizeof(tv));
b0d623f7
A
6147 if (error)
6148 return (error);
6149 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
6150 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
91447636 6151 } else {
b0d623f7
A
6152 struct user32_timeval tv[2];
6153 error = copyin(usrtvp, (void *)tv, sizeof(tv));
6154 if (error)
6155 return (error);
6156 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
6157 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
91447636 6158 }
9bccf70c
A
6159 }
6160 return 0;
6161}
6162
6163static int
2d21ac55 6164setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts,
91447636 6165 int nullflag)
9bccf70c
A
6166{
6167 int error;
91447636
A
6168 struct vnode_attr va;
6169 kauth_action_t action;
e5568f75
A
6170
6171 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6172
91447636
A
6173 VATTR_INIT(&va);
6174 VATTR_SET(&va, va_access_time, ts[0]);
6175 VATTR_SET(&va, va_modify_time, ts[1]);
9bccf70c 6176 if (nullflag)
91447636
A
6177 va.va_vaflags |= VA_UTIMES_NULL;
6178
2d21ac55
A
6179#if NAMEDSTREAMS
6180 /* utimes calls are not allowed for resource forks. */
6181 if (vp->v_flag & VISNAMEDSTREAM) {
6182 error = EPERM;
6183 goto out;
6184 }
6185#endif
6186
6187#if CONFIG_MACF
6188 error = mac_vnode_check_setutimes(ctx, vp, ts[0], ts[1]);
6189 if (error)
6190 goto out;
6191#endif
6192 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
6193 if (!nullflag && error == EACCES)
6194 error = EPERM;
91447636 6195 goto out;
2d21ac55
A
6196 }
6197
91447636 6198 /* since we may not need to auth anything, check here */
2d21ac55
A
6199 if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
6200 if (!nullflag && error == EACCES)
6201 error = EPERM;
91447636 6202 goto out;
2d21ac55 6203 }
91447636 6204 error = vnode_setattr(vp, &va, ctx);
4a249263 6205
9bccf70c
A
6206out:
6207 return error;
6208}
6209
1c79356b
A
6210/*
6211 * Set the access and modification times of a file.
6212 */
1c79356b
A
6213/* ARGSUSED */
6214int
b0d623f7 6215utimes(__unused proc_t p, struct utimes_args *uap, __unused int32_t *retval)
1c79356b 6216{
9bccf70c 6217 struct timespec ts[2];
91447636 6218 user_addr_t usrtvp;
1c79356b
A
6219 int error;
6220 struct nameidata nd;
2d21ac55 6221 vfs_context_t ctx = vfs_context_current();
1c79356b 6222
2d21ac55
A
6223 /*
6224 * AUDIT: Needed to change the order of operations to do the
55e303ae
A
6225 * name lookup first because auditing wants the path.
6226 */
6d2010ae 6227 NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
2d21ac55 6228 UIO_USERSPACE, uap->path, ctx);
55e303ae
A
6229 error = namei(&nd);
6230 if (error)
9bccf70c 6231 return (error);
91447636 6232 nameidone(&nd);
55e303ae 6233
91447636
A
6234 /*
6235 * Fetch the user-supplied time. If usrtvp is USER_ADDR_NULL, we fetch
6236 * the current time instead.
6237 */
55e303ae 6238 usrtvp = uap->tptr;
91447636
A
6239 if ((error = getutimes(usrtvp, ts)) != 0)
6240 goto out;
6241
2d21ac55 6242 error = setutimes(ctx, nd.ni_vp, ts, usrtvp == USER_ADDR_NULL);
91447636
A
6243
6244out:
6245 vnode_put(nd.ni_vp);
1c79356b
A
6246 return (error);
6247}
6248
9bccf70c
A
6249/*
6250 * Set the access and modification times of a file.
6251 */
9bccf70c
A
6252/* ARGSUSED */
6253int
b0d623f7 6254futimes(__unused proc_t p, struct futimes_args *uap, __unused int32_t *retval)
9bccf70c
A
6255{
6256 struct timespec ts[2];
2d21ac55 6257 vnode_t vp;
91447636 6258 user_addr_t usrtvp;
9bccf70c
A
6259 int error;
6260
55e303ae 6261 AUDIT_ARG(fd, uap->fd);
9bccf70c
A
6262 usrtvp = uap->tptr;
6263 if ((error = getutimes(usrtvp, ts)) != 0)
6264 return (error);
91447636 6265 if ((error = file_vnode(uap->fd, &vp)) != 0)
9bccf70c 6266 return (error);
91447636
A
6267 if((error = vnode_getwithref(vp))) {
6268 file_drop(uap->fd);
6269 return(error);
6270 }
55e303ae 6271
2d21ac55 6272 error = setutimes(vfs_context_current(), vp, ts, usrtvp == 0);
91447636
A
6273 vnode_put(vp);
6274 file_drop(uap->fd);
6275 return(error);
9bccf70c
A
6276}
6277
1c79356b
A
6278/*
6279 * Truncate a file given its path name.
6280 */
1c79356b
A
6281/* ARGSUSED */
6282int
b0d623f7 6283truncate(__unused proc_t p, struct truncate_args *uap, __unused int32_t *retval)
1c79356b 6284{
2d21ac55 6285 vnode_t vp;
91447636 6286 struct vnode_attr va;
2d21ac55 6287 vfs_context_t ctx = vfs_context_current();
1c79356b
A
6288 int error;
6289 struct nameidata nd;
91447636
A
6290 kauth_action_t action;
6291
0b4e3aa0
A
6292 if (uap->length < 0)
6293 return(EINVAL);
6d2010ae 6294 NDINIT(&nd, LOOKUP, OP_TRUNCATE, FOLLOW | AUDITVNPATH1,
2d21ac55 6295 UIO_USERSPACE, uap->path, ctx);
91447636 6296 if ((error = namei(&nd)))
1c79356b
A
6297 return (error);
6298 vp = nd.ni_vp;
91447636
A
6299
6300 nameidone(&nd);
6301
6302 VATTR_INIT(&va);
6303 VATTR_SET(&va, va_data_size, uap->length);
2d21ac55
A
6304
6305#if CONFIG_MACF
6306 error = mac_vnode_check_truncate(ctx, NOCRED, vp);
6307 if (error)
6308 goto out;
6309#endif
6310
6311 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
91447636 6312 goto out;
2d21ac55 6313 if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
91447636 6314 goto out;
2d21ac55 6315 error = vnode_setattr(vp, &va, ctx);
91447636
A
6316out:
6317 vnode_put(vp);
1c79356b
A
6318 return (error);
6319}
6320
6321/*
6322 * Truncate a file given a file descriptor.
6323 */
1c79356b
A
6324/* ARGSUSED */
6325int
b0d623f7 6326ftruncate(proc_t p, struct ftruncate_args *uap, int32_t *retval)
1c79356b 6327{
2d21ac55 6328 vfs_context_t ctx = vfs_context_current();
91447636 6329 struct vnode_attr va;
2d21ac55 6330 vnode_t vp;
91447636
A
6331 struct fileproc *fp;
6332 int error ;
6333 int fd = uap->fd;
1c79356b 6334
55e303ae 6335 AUDIT_ARG(fd, uap->fd);
0b4e3aa0
A
6336 if (uap->length < 0)
6337 return(EINVAL);
1c79356b 6338
91447636
A
6339 if ( (error = fp_lookup(p,fd,&fp,0)) ) {
6340 return(error);
6341 }
1c79356b 6342
39236c6e
A
6343 switch (FILEGLOB_DTYPE(fp->f_fglob)) {
6344 case DTYPE_PSXSHM:
91447636
A
6345 error = pshm_truncate(p, fp, uap->fd, uap->length, retval);
6346 goto out;
39236c6e
A
6347 case DTYPE_VNODE:
6348 break;
6349 default:
91447636
A
6350 error = EINVAL;
6351 goto out;
1c79356b 6352 }
1c79356b 6353
2d21ac55 6354 vp = (vnode_t)fp->f_fglob->fg_data;
e5568f75 6355
91447636
A
6356 if ((fp->f_fglob->fg_flag & FWRITE) == 0) {
6357 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
6358 error = EINVAL;
6359 goto out;
1c79356b 6360 }
1c79356b 6361
91447636
A
6362 if ((error = vnode_getwithref(vp)) != 0) {
6363 goto out;
6364 }
1c79356b 6365
91447636 6366 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
1c79356b 6367
2d21ac55
A
6368#if CONFIG_MACF
6369 error = mac_vnode_check_truncate(ctx,
6370 fp->f_fglob->fg_cred, vp);
6371 if (error) {
6372 (void)vnode_put(vp);
6373 goto out;
6374 }
6375#endif
91447636
A
6376 VATTR_INIT(&va);
6377 VATTR_SET(&va, va_data_size, uap->length);
2d21ac55 6378 error = vnode_setattr(vp, &va, ctx);
91447636
A
6379 (void)vnode_put(vp);
6380out:
6381 file_drop(fd);
6382 return (error);
1c79356b 6383}
91447636 6384
1c79356b
A
6385
6386/*
b0d623f7 6387 * Sync an open file with synchronized I/O _file_ integrity completion
1c79356b 6388 */
1c79356b
A
6389/* ARGSUSED */
6390int
b0d623f7 6391fsync(proc_t p, struct fsync_args *uap, __unused int32_t *retval)
1c79356b 6392{
2d21ac55 6393 __pthread_testcancel(1);
b0d623f7
A
6394 return(fsync_common(p, uap, MNT_WAIT));
6395}
6396
6397
6398/*
6399 * Sync an open file with synchronized I/O _file_ integrity completion
6400 *
6401 * Notes: This is a legacy support function that does not test for
6402 * thread cancellation points.
6403 */
6404/* ARGSUSED */
6405int
6406fsync_nocancel(proc_t p, struct fsync_nocancel_args *uap, __unused int32_t *retval)
6407{
6408 return(fsync_common(p, (struct fsync_args *)uap, MNT_WAIT));
2d21ac55
A
6409}
6410
b0d623f7
A
6411
6412/*
6413 * Sync an open file with synchronized I/O _data_ integrity completion
6414 */
6415/* ARGSUSED */
2d21ac55 6416int
b0d623f7
A
6417fdatasync(proc_t p, struct fdatasync_args *uap, __unused int32_t *retval)
6418{
6419 __pthread_testcancel(1);
6420 return(fsync_common(p, (struct fsync_args *)uap, MNT_DWAIT));
6421}
6422
6423
6424/*
6425 * fsync_common
6426 *
6427 * Common fsync code to support both synchronized I/O file integrity completion
6428 * (normal fsync) and synchronized I/O data integrity completion (fdatasync).
6429 *
6430 * If 'flags' is MNT_DWAIT, the caller is requesting data integrity, which
6431 * will only guarantee that the file data contents are retrievable. If
6432 * 'flags' is MNT_WAIT, the caller is rewuesting file integrity, which also
6433 * includes additional metadata unnecessary for retrieving the file data
6434 * contents, such as atime, mtime, ctime, etc., also be committed to stable
6435 * storage.
6436 *
6437 * Parameters: p The process
6438 * uap->fd The descriptor to synchronize
6439 * flags The data integrity flags
6440 *
6441 * Returns: int Success
6442 * fp_getfvp:EBADF Bad file descriptor
6443 * fp_getfvp:ENOTSUP fd does not refer to a vnode
6444 * VNOP_FSYNC:??? unspecified
6445 *
6446 * Notes: We use struct fsync_args because it is a short name, and all
6447 * caller argument structures are otherwise identical.
6448 */
6449static int
6450fsync_common(proc_t p, struct fsync_args *uap, int flags)
2d21ac55
A
6451{
6452 vnode_t vp;
91447636 6453 struct fileproc *fp;
2d21ac55 6454 vfs_context_t ctx = vfs_context_current();
1c79356b
A
6455 int error;
6456
b0d623f7
A
6457 AUDIT_ARG(fd, uap->fd);
6458
91447636 6459 if ( (error = fp_getfvp(p, uap->fd, &fp, &vp)) )
1c79356b 6460 return (error);
91447636
A
6461 if ( (error = vnode_getwithref(vp)) ) {
6462 file_drop(uap->fd);
6463 return(error);
6464 }
91447636 6465
b0d623f7
A
6466 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6467
6468 error = VNOP_FSYNC(vp, flags, ctx);
2d21ac55
A
6469
6470#if NAMEDRSRCFORK
6471 /* Sync resource fork shadow file if necessary. */
6472 if ((error == 0) &&
6473 (vp->v_flag & VISNAMEDSTREAM) &&
6474 (vp->v_parent != NULLVP) &&
b0d623f7 6475 vnode_isshadow(vp) &&
2d21ac55
A
6476 (fp->f_flags & FP_WRITTEN)) {
6477 (void) vnode_flushnamedstream(vp->v_parent, vp, ctx);
6478 }
6479#endif
91447636
A
6480
6481 (void)vnode_put(vp);
6482 file_drop(uap->fd);
1c79356b
A
6483 return (error);
6484}
6485
6486/*
6487 * Duplicate files. Source must be a file, target must be a file or
6488 * must not exist.
91447636
A
6489 *
6490 * XXX Copyfile authorisation checking is woefully inadequate, and will not
6491 * perform inheritance correctly.
1c79356b 6492 */
1c79356b
A
6493/* ARGSUSED */
6494int
b0d623f7 6495copyfile(__unused proc_t p, struct copyfile_args *uap, __unused int32_t *retval)
1c79356b 6496{
91447636 6497 vnode_t tvp, fvp, tdvp, sdvp;
1c79356b
A
6498 struct nameidata fromnd, tond;
6499 int error;
2d21ac55 6500 vfs_context_t ctx = vfs_context_current();
55e303ae
A
6501
6502 /* Check that the flags are valid. */
1c79356b
A
6503
6504 if (uap->flags & ~CPF_MASK) {
55e303ae
A
6505 return(EINVAL);
6506 }
1c79356b 6507
6d2010ae 6508 NDINIT(&fromnd, LOOKUP, OP_COPYFILE, SAVESTART | AUDITVNPATH1,
2d21ac55 6509 UIO_USERSPACE, uap->from, ctx);
91447636 6510 if ((error = namei(&fromnd)))
1c79356b
A
6511 return (error);
6512 fvp = fromnd.ni_vp;
6513
6d2010ae
A
6514 NDINIT(&tond, CREATE, OP_LINK,
6515 LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNPATH2 | CN_NBMOUNTLOOK,
6516 UIO_USERSPACE, uap->to, ctx);
91447636 6517 if ((error = namei(&tond))) {
1c79356b
A
6518 goto out1;
6519 }
6520 tdvp = tond.ni_dvp;
6521 tvp = tond.ni_vp;
91447636 6522
1c79356b
A
6523 if (tvp != NULL) {
6524 if (!(uap->flags & CPF_OVERWRITE)) {
6525 error = EEXIST;
6526 goto out;
6527 }
6528 }
1c79356b
A
6529 if (fvp->v_type == VDIR || (tvp && tvp->v_type == VDIR)) {
6530 error = EISDIR;
6531 goto out;
6532 }
6533
2d21ac55 6534 if ((error = vnode_authorize(tdvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
1c79356b
A
6535 goto out;
6536
6537 if (fvp == tdvp)
6538 error = EINVAL;
6539 /*
6540 * If source is the same as the destination (that is the
6541 * same inode number) then there is nothing to do.
6542 * (fixed to have POSIX semantics - CSM 3/2/98)
6543 */
6544 if (fvp == tvp)
6545 error = -1;
91447636 6546 if (!error)
2d21ac55 6547 error = VNOP_COPYFILE(fvp, tdvp, tvp, &tond.ni_cnd, uap->mode, uap->flags, ctx);
1c79356b 6548out:
91447636
A
6549 sdvp = tond.ni_startdir;
6550 /*
6551 * nameidone has to happen before we vnode_put(tdvp)
6552 * since it may need to release the fs_nodelock on the tdvp
6553 */
6554 nameidone(&tond);
6555
6556 if (tvp)
6557 vnode_put(tvp);
6558 vnode_put(tdvp);
6559 vnode_put(sdvp);
1c79356b 6560out1:
91447636
A
6561 vnode_put(fvp);
6562
1c79356b 6563 if (fromnd.ni_startdir)
91447636
A
6564 vnode_put(fromnd.ni_startdir);
6565 nameidone(&fromnd);
6566
1c79356b
A
6567 if (error == -1)
6568 return (0);
6569 return (error);
6570}
6571
91447636 6572
1c79356b
A
6573/*
6574 * Rename files. Source and destination must either both be directories,
6575 * or both not be directories. If target is a directory, it must be empty.
6576 */
1c79356b 6577/* ARGSUSED */
fe8ab488
A
6578static int
6579renameat_internal(vfs_context_t ctx, int fromfd, user_addr_t from,
6580 int tofd, user_addr_t to, int segflg, vfs_rename_flags_t flags)
1c79356b 6581{
91447636
A
6582 vnode_t tvp, tdvp;
6583 vnode_t fvp, fdvp;
316670eb 6584 struct nameidata *fromnd, *tond;
1c79356b 6585 int error;
593a1d5f 6586 int do_retry;
1c79356b 6587 int mntrename;
2d21ac55 6588 int need_event;
6d2010ae 6589 const char *oname = NULL;
2d21ac55 6590 char *from_name = NULL, *to_name = NULL;
b0d623f7 6591 int from_len=0, to_len=0;
91447636
A
6592 int holding_mntlock;
6593 mount_t locked_mp = NULL;
6d2010ae 6594 vnode_t oparent = NULLVP;
b0d623f7 6595#if CONFIG_FSE
91447636 6596 fse_info from_finfo, to_finfo;
b0d623f7
A
6597#endif
6598 int from_truncated=0, to_truncated;
6d2010ae
A
6599 int batched = 0;
6600 struct vnode_attr *fvap, *tvap;
6601 int continuing = 0;
316670eb
A
6602 /* carving out a chunk for structs that are too big to be on stack. */
6603 struct {
6604 struct nameidata from_node, to_node;
6605 struct vnode_attr fv_attr, tv_attr;
6606 } * __rename_data;
6607 MALLOC(__rename_data, void *, sizeof(*__rename_data), M_TEMP, M_WAITOK);
6608 fromnd = &__rename_data->from_node;
6609 tond = &__rename_data->to_node;
6610
91447636 6611 holding_mntlock = 0;
316670eb 6612 do_retry = 0;
91447636
A
6613retry:
6614 fvp = tvp = NULL;
6615 fdvp = tdvp = NULL;
6d2010ae 6616 fvap = tvap = NULL;
1c79356b
A
6617 mntrename = FALSE;
6618
316670eb 6619 NDINIT(fromnd, DELETE, OP_UNLINK, WANTPARENT | AUDITVNPATH1,
fe8ab488 6620 segflg, from, ctx);
316670eb 6621 fromnd->ni_flag = NAMEI_COMPOUNDRENAME;
fe8ab488 6622
316670eb 6623 NDINIT(tond, RENAME, OP_RENAME, WANTPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK,
fe8ab488 6624 segflg, to, ctx);
316670eb 6625 tond->ni_flag = NAMEI_COMPOUNDRENAME;
fe8ab488 6626
6d2010ae 6627continue_lookup:
316670eb 6628 if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
fe8ab488 6629 if ( (error = nameiat(fromnd, fromfd)) )
6d2010ae 6630 goto out1;
316670eb
A
6631 fdvp = fromnd->ni_dvp;
6632 fvp = fromnd->ni_vp;
1c79356b 6633
6d2010ae 6634 if (fvp && fvp->v_type == VDIR)
316670eb 6635 tond->ni_cnd.cn_flags |= WILLBEDIR;
6d2010ae 6636 }
2d21ac55 6637
316670eb 6638 if ((tond->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
fe8ab488 6639 if ( (error = nameiat(tond, tofd)) ) {
6d2010ae
A
6640 /*
6641 * Translate error code for rename("dir1", "dir2/.").
6642 */
fe8ab488 6643 if (error == EISDIR && fvp->v_type == VDIR)
6d2010ae
A
6644 error = EINVAL;
6645 goto out1;
6646 }
316670eb
A
6647 tdvp = tond->ni_dvp;
6648 tvp = tond->ni_vp;
fe8ab488 6649 }
91447636 6650
6d2010ae
A
6651 batched = vnode_compound_rename_available(fdvp);
6652 if (!fvp) {
fe8ab488 6653 /*
6d2010ae
A
6654 * Claim: this check will never reject a valid rename.
6655 * For success, either fvp must be on the same mount as tdvp, or fvp must sit atop a vnode on the same mount as tdvp.
6656 * Suppose fdvp and tdvp are not on the same mount.
fe8ab488 6657 * If fvp is on the same mount as tdvp, then fvp is not on the same mount as fdvp, so fvp is the root of its filesystem. If fvp is the root,
6d2010ae
A
6658 * then you can't move it to within another dir on the same mountpoint.
6659 * If fvp sits atop a vnode on the same mount as fdvp, then that vnode must be part of the same mount as fdvp, which is a contradiction.
6660 *
6661 * If this check passes, then we are safe to pass these vnodes to the same FS.
91447636 6662 */
6d2010ae
A
6663 if (fdvp->v_mount != tdvp->v_mount) {
6664 error = EXDEV;
6665 goto out1;
6666 }
6667 goto skipped_lookup;
1c79356b 6668 }
2d21ac55 6669
6d2010ae 6670 if (!batched) {
316670eb 6671 error = vn_authorize_rename(fdvp, fvp, &fromnd->ni_cnd, tdvp, tvp, &tond->ni_cnd, ctx, NULL);
6d2010ae
A
6672 if (error) {
6673 if (error == ENOENT) {
6674 /*
6675 * We encountered a race where after doing the namei, tvp stops
6676 * being valid. If so, simply re-drive the rename call from the
6677 * top.
6678 */
fe8ab488 6679 do_retry = 1;
6d2010ae 6680 }
91447636 6681 goto out1;
1c79356b
A
6682 }
6683 }
6d2010ae 6684
2d21ac55
A
6685 /*
6686 * If the source and destination are the same (i.e. they're
6687 * links to the same vnode) and the target file system is
6688 * case sensitive, then there is nothing to do.
6d2010ae
A
6689 *
6690 * XXX Come back to this.
2d21ac55
A
6691 */
6692 if (fvp == tvp) {
6693 int pathconf_val;
fe8ab488 6694
2d21ac55
A
6695 /*
6696 * Note: if _PC_CASE_SENSITIVE selector isn't supported,
6697 * then assume that this file system is case sensitive.
6698 */
6699 if (VNOP_PATHCONF(fvp, _PC_CASE_SENSITIVE, &pathconf_val, ctx) != 0 ||
6700 pathconf_val != 0) {
6701 goto out1;
fe8ab488 6702 }
2d21ac55 6703 }
91447636 6704
1c79356b
A
6705 /*
6706 * Allow the renaming of mount points.
6707 * - target must not exist
6708 * - target must reside in the same directory as source
6709 * - union mounts cannot be renamed
6710 * - "/" cannot be renamed
6d2010ae
A
6711 *
6712 * XXX Handle this in VFS after a continued lookup (if we missed
6713 * in the cache to start off)
1c79356b 6714 */
91447636 6715 if ((fvp->v_flag & VROOT) &&
1c79356b
A
6716 (fvp->v_type == VDIR) &&
6717 (tvp == NULL) &&
6718 (fvp->v_mountedhere == NULL) &&
91447636 6719 (fdvp == tdvp) &&
1c79356b
A
6720 ((fvp->v_mount->mnt_flag & (MNT_UNION | MNT_ROOTFS)) == 0) &&
6721 (fvp->v_mount->mnt_vnodecovered != NULLVP)) {
2d21ac55 6722 vnode_t coveredvp;
fe8ab488 6723
1c79356b 6724 /* switch fvp to the covered vnode */
91447636
A
6725 coveredvp = fvp->v_mount->mnt_vnodecovered;
6726 if ( (vnode_getwithref(coveredvp)) ) {
6727 error = ENOENT;
6728 goto out1;
6729 }
6730 vnode_put(fvp);
6731
6732 fvp = coveredvp;
1c79356b
A
6733 mntrename = TRUE;
6734 }
91447636
A
6735 /*
6736 * Check for cross-device rename.
6737 */
6738 if ((fvp->v_mount != tdvp->v_mount) ||
6739 (tvp && (fvp->v_mount != tvp->v_mount))) {
6740 error = EXDEV;
6741 goto out1;
6742 }
55e303ae 6743
91447636
A
6744 /*
6745 * If source is the same as the destination (that is the
6746 * same inode number) then there is nothing to do...
6747 * EXCEPT if the underlying file system supports case
6748 * insensitivity and is case preserving. In this case
6749 * the file system needs to handle the special case of
6750 * getting the same vnode as target (fvp) and source (tvp).
6751 *
6752 * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
6753 * and _PC_CASE_PRESERVING can have this exception, and they need to
6754 * handle the special case of getting the same vnode as target and
6755 * source. NOTE: Then the target is unlocked going into vnop_rename,
6756 * so not to cause locking problems. There is a single reference on tvp.
6757 *
fe8ab488 6758 * NOTE - that fvp == tvp also occurs if they are hard linked and
b0d623f7
A
6759 * that correct behaviour then is just to return success without doing
6760 * anything.
6d2010ae
A
6761 *
6762 * XXX filesystem should take care of this itself, perhaps...
91447636
A
6763 */
6764 if (fvp == tvp && fdvp == tdvp) {
316670eb
A
6765 if (fromnd->ni_cnd.cn_namelen == tond->ni_cnd.cn_namelen &&
6766 !bcmp(fromnd->ni_cnd.cn_nameptr, tond->ni_cnd.cn_nameptr,
6767 fromnd->ni_cnd.cn_namelen)) {
91447636 6768 goto out1;
55e303ae 6769 }
91447636 6770 }
55e303ae 6771
91447636
A
6772 if (holding_mntlock && fvp->v_mount != locked_mp) {
6773 /*
6774 * we're holding a reference and lock
6775 * on locked_mp, but it no longer matches
6776 * what we want to do... so drop our hold
6777 */
6778 mount_unlock_renames(locked_mp);
6779 mount_drop(locked_mp, 0);
6780 holding_mntlock = 0;
6781 }
6782 if (tdvp != fdvp && fvp->v_type == VDIR) {
6783 /*
6784 * serialize renames that re-shape
6785 * the tree... if holding_mntlock is
6786 * set, then we're ready to go...
6787 * otherwise we
6788 * first need to drop the iocounts
6789 * we picked up, second take the
6790 * lock to serialize the access,
6791 * then finally start the lookup
6792 * process over with the lock held
6793 */
6794 if (!holding_mntlock) {
6795 /*
6796 * need to grab a reference on
6797 * the mount point before we
6798 * drop all the iocounts... once
6799 * the iocounts are gone, the mount
6800 * could follow
6801 */
6802 locked_mp = fvp->v_mount;
6803 mount_ref(locked_mp, 0);
55e303ae 6804
91447636
A
6805 /*
6806 * nameidone has to happen before we vnode_put(tvp)
6807 * since it may need to release the fs_nodelock on the tvp
6808 */
316670eb 6809 nameidone(tond);
55e303ae 6810
91447636
A
6811 if (tvp)
6812 vnode_put(tvp);
6813 vnode_put(tdvp);
6814
6815 /*
6816 * nameidone has to happen before we vnode_put(fdvp)
6817 * since it may need to release the fs_nodelock on the fvp
6818 */
316670eb 6819 nameidone(fromnd);
55e303ae 6820
91447636
A
6821 vnode_put(fvp);
6822 vnode_put(fdvp);
6823
6824 mount_lock_renames(locked_mp);
6825 holding_mntlock = 1;
6826
6827 goto retry;
55e303ae 6828 }
91447636
A
6829 } else {
6830 /*
6831 * when we dropped the iocounts to take
fe8ab488 6832 * the lock, we allowed the identity of
91447636
A
6833 * the various vnodes to change... if they did,
6834 * we may no longer be dealing with a rename
6835 * that reshapes the tree... once we're holding
6836 * the iocounts, the vnodes can't change type
6837 * so we're free to drop the lock at this point
6838 * and continue on
1c79356b 6839 */
91447636
A
6840 if (holding_mntlock) {
6841 mount_unlock_renames(locked_mp);
6842 mount_drop(locked_mp, 0);
6843 holding_mntlock = 0;
1c79356b 6844 }
91447636 6845 }
6d2010ae 6846
91447636
A
6847 // save these off so we can later verify that fvp is the same
6848 oname = fvp->v_name;
6849 oparent = fvp->v_parent;
55e303ae 6850
6d2010ae 6851skipped_lookup:
2d21ac55 6852#if CONFIG_FSE
6d2010ae 6853 need_event = need_fsevent(FSE_RENAME, fdvp);
fe8ab488 6854 if (need_event) {
6d2010ae
A
6855 if (fvp) {
6856 get_fse_info(fvp, &from_finfo, ctx);
6857 } else {
316670eb 6858 error = vfs_get_notify_attributes(&__rename_data->fv_attr);
6d2010ae
A
6859 if (error) {
6860 goto out1;
6861 }
6862
316670eb 6863 fvap = &__rename_data->fv_attr;
6d2010ae 6864 }
55e303ae 6865
91447636 6866 if (tvp) {
2d21ac55 6867 get_fse_info(tvp, &to_finfo, ctx);
6d2010ae 6868 } else if (batched) {
316670eb 6869 error = vfs_get_notify_attributes(&__rename_data->tv_attr);
6d2010ae
A
6870 if (error) {
6871 goto out1;
6872 }
6873
316670eb 6874 tvap = &__rename_data->tv_attr;
2d21ac55
A
6875 }
6876 }
6877#else
6878 need_event = 0;
6879#endif /* CONFIG_FSE */
6880
6881 if (need_event || kauth_authorize_fileop_has_listeners()) {
2d21ac55 6882 if (from_name == NULL) {
6d2010ae
A
6883 GET_PATH(from_name);
6884 if (from_name == NULL) {
6885 error = ENOMEM;
6886 goto out1;
6887 }
91447636 6888 }
b0d623f7 6889
316670eb 6890 from_len = safe_getpath(fdvp, fromnd->ni_cnd.cn_nameptr, from_name, MAXPATHLEN, &from_truncated);
55e303ae 6891
2d21ac55 6892 if (to_name == NULL) {
6d2010ae
A
6893 GET_PATH(to_name);
6894 if (to_name == NULL) {
6895 error = ENOMEM;
6896 goto out1;
6897 }
2d21ac55 6898 }
91447636 6899
316670eb 6900 to_len = safe_getpath(tdvp, tond->ni_cnd.cn_nameptr, to_name, MAXPATHLEN, &to_truncated);
fe8ab488
A
6901 }
6902#if CONFIG_SECLUDED_RENAME
6903 if (flags & VFS_SECLUDE_RENAME) {
6904 fromnd->ni_cnd.cn_flags |= CN_SECLUDE_RENAME;
6905 }
6906#else
6907 #pragma unused(flags)
6908#endif
316670eb
A
6909 error = vn_rename(fdvp, &fvp, &fromnd->ni_cnd, fvap,
6910 tdvp, &tvp, &tond->ni_cnd, tvap,
6d2010ae 6911 0, ctx);
55e303ae 6912
91447636
A
6913 if (holding_mntlock) {
6914 /*
6915 * we can drop our serialization
6916 * lock now
6917 */
6918 mount_unlock_renames(locked_mp);
6919 mount_drop(locked_mp, 0);
6920 holding_mntlock = 0;
6921 }
6922 if (error) {
6d2010ae 6923 if (error == EKEEPLOOKING) {
316670eb
A
6924 if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) == 0) {
6925 if ((tond->ni_flag & NAMEI_CONTLOOKUP) == 0) {
6d2010ae
A
6926 panic("EKEEPLOOKING without NAMEI_CONTLOOKUP on either ndp?");
6927 }
6928 }
6929
316670eb
A
6930 fromnd->ni_vp = fvp;
6931 tond->ni_vp = tvp;
fe8ab488 6932
6d2010ae
A
6933 goto continue_lookup;
6934 }
6935
6936 /*
fe8ab488
A
6937 * We may encounter a race in the VNOP where the destination didn't
6938 * exist when we did the namei, but it does by the time we go and
6d2010ae
A
6939 * try to create the entry. In this case, we should re-drive this rename
6940 * call from the top again. Currently, only HFS bubbles out ERECYCLE,
fe8ab488 6941 * but other filesystems susceptible to this race could return it, too.
6d2010ae
A
6942 */
6943 if (error == ERECYCLE) {
6944 do_retry = 1;
6945 }
55e303ae 6946
91447636 6947 goto out1;
fe8ab488
A
6948 }
6949
6950 /* call out to allow 3rd party notification of rename.
91447636
A
6951 * Ignore result of kauth_authorize_fileop call.
6952 */
fe8ab488
A
6953 kauth_authorize_fileop(vfs_context_ucred(ctx),
6954 KAUTH_FILEOP_RENAME,
2d21ac55 6955 (uintptr_t)from_name, (uintptr_t)to_name);
91447636 6956
2d21ac55 6957#if CONFIG_FSE
91447636 6958 if (from_name != NULL && to_name != NULL) {
b0d623f7
A
6959 if (from_truncated || to_truncated) {
6960 // set it here since only the from_finfo gets reported up to user space
6961 from_finfo.mode |= FSE_TRUNCATED_PATH;
6962 }
6d2010ae
A
6963
6964 if (tvap && tvp) {
6965 vnode_get_fse_info_from_vap(tvp, &to_finfo, tvap);
6966 }
6967 if (fvap) {
6968 vnode_get_fse_info_from_vap(fvp, &from_finfo, fvap);
6969 }
6970
91447636 6971 if (tvp) {
2d21ac55 6972 add_fsevent(FSE_RENAME, ctx,
91447636
A
6973 FSE_ARG_STRING, from_len, from_name,
6974 FSE_ARG_FINFO, &from_finfo,
6975 FSE_ARG_STRING, to_len, to_name,
6976 FSE_ARG_FINFO, &to_finfo,
6977 FSE_ARG_DONE);
55e303ae 6978 } else {
2d21ac55 6979 add_fsevent(FSE_RENAME, ctx,
91447636
A
6980 FSE_ARG_STRING, from_len, from_name,
6981 FSE_ARG_FINFO, &from_finfo,
6982 FSE_ARG_STRING, to_len, to_name,
6983 FSE_ARG_DONE);
6984 }
6985 }
2d21ac55 6986#endif /* CONFIG_FSE */
fe8ab488 6987
91447636
A
6988 /*
6989 * update filesystem's mount point data
6990 */
6991 if (mntrename) {
6992 char *cp, *pathend, *mpname;
6993 char * tobuf;
6994 struct mount *mp;
6995 int maxlen;
6996 size_t len = 0;
6997
6998 mp = fvp->v_mountedhere;
6999
7000 if (vfs_busy(mp, LK_NOWAIT)) {
7001 error = EBUSY;
7002 goto out1;
55e303ae 7003 }
91447636 7004 MALLOC_ZONE(tobuf, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
55e303ae 7005
fe8ab488
A
7006 if (UIO_SEG_IS_USER_SPACE(segflg))
7007 error = copyinstr(to, tobuf, MAXPATHLEN, &len);
7008 else
7009 error = copystr((void *)to, tobuf, MAXPATHLEN, &len);
91447636
A
7010 if (!error) {
7011 /* find current mount point prefix */
7012 pathend = &mp->mnt_vfsstat.f_mntonname[0];
7013 for (cp = pathend; *cp != '\0'; ++cp) {
7014 if (*cp == '/')
7015 pathend = cp + 1;
7016 }
7017 /* find last component of target name */
7018 for (mpname = cp = tobuf; *cp != '\0'; ++cp) {
7019 if (*cp == '/')
7020 mpname = cp + 1;
7021 }
7022 /* append name to prefix */
7023 maxlen = MAXPATHLEN - (pathend - mp->mnt_vfsstat.f_mntonname);
7024 bzero(pathend, maxlen);
2d21ac55 7025 strlcpy(pathend, mpname, maxlen);
91447636
A
7026 }
7027 FREE_ZONE(tobuf, MAXPATHLEN, M_NAMEI);
7028
7029 vfs_unbusy(mp);
7030 }
7031 /*
fe8ab488 7032 * fix up name & parent pointers. note that we first
91447636
A
7033 * check that fvp has the same name/parent pointers it
7034 * had before the rename call... this is a 'weak' check
7035 * at best...
6d2010ae
A
7036 *
7037 * XXX oparent and oname may not be set in the compound vnop case
91447636 7038 */
6d2010ae 7039 if (batched || (oname == fvp->v_name && oparent == fvp->v_parent)) {
91447636
A
7040 int update_flags;
7041
7042 update_flags = VNODE_UPDATE_NAME;
7043
7044 if (fdvp != tdvp)
7045 update_flags |= VNODE_UPDATE_PARENT;
7046
316670eb 7047 vnode_update_identity(fvp, tdvp, tond->ni_cnd.cn_nameptr, tond->ni_cnd.cn_namelen, tond->ni_cnd.cn_hash, update_flags);
1c79356b
A
7048 }
7049out1:
593a1d5f
A
7050 if (to_name != NULL) {
7051 RELEASE_PATH(to_name);
7052 to_name = NULL;
7053 }
7054 if (from_name != NULL) {
7055 RELEASE_PATH(from_name);
7056 from_name = NULL;
7057 }
91447636
A
7058 if (holding_mntlock) {
7059 mount_unlock_renames(locked_mp);
7060 mount_drop(locked_mp, 0);
593a1d5f 7061 holding_mntlock = 0;
91447636
A
7062 }
7063 if (tdvp) {
7064 /*
7065 * nameidone has to happen before we vnode_put(tdvp)
7066 * since it may need to release the fs_nodelock on the tdvp
7067 */
316670eb 7068 nameidone(tond);
91447636
A
7069
7070 if (tvp)
7071 vnode_put(tvp);
7072 vnode_put(tdvp);
7073 }
7074 if (fdvp) {
7075 /*
7076 * nameidone has to happen before we vnode_put(fdvp)
7077 * since it may need to release the fs_nodelock on the fdvp
7078 */
316670eb 7079 nameidone(fromnd);
91447636
A
7080
7081 if (fvp)
7082 vnode_put(fvp);
7083 vnode_put(fdvp);
7084 }
fe8ab488 7085
6d2010ae
A
7086 /*
7087 * If things changed after we did the namei, then we will re-drive
7088 * this rename call from the top.
7089 */
316670eb 7090 if (do_retry) {
6d2010ae 7091 do_retry = 0;
593a1d5f
A
7092 goto retry;
7093 }
316670eb
A
7094
7095 FREE(__rename_data, M_TEMP);
1c79356b
A
7096 return (error);
7097}
7098
fe8ab488
A
7099int
7100rename(__unused proc_t p, struct rename_args *uap, __unused int32_t *retval)
7101{
7102 return (renameat_internal(vfs_context_current(), AT_FDCWD, uap->from,
7103 AT_FDCWD, uap->to, UIO_USERSPACE, 0));
7104}
7105
7106#if CONFIG_SECLUDED_RENAME
7107int rename_ext(__unused proc_t p, struct rename_ext_args *uap, __unused int32_t *retval)
7108{
7109 return renameat_internal(
7110 vfs_context_current(),
7111 AT_FDCWD, uap->from,
7112 AT_FDCWD, uap->to,
7113 UIO_USERSPACE, uap->flags);
7114}
7115#endif
7116
7117int
7118renameat(__unused proc_t p, struct renameat_args *uap, __unused int32_t *retval)
7119{
7120 return (renameat_internal(vfs_context_current(), uap->fromfd, uap->from,
7121 uap->tofd, uap->to, UIO_USERSPACE, 0));
7122}
7123
1c79356b
A
7124/*
7125 * Make a directory file.
2d21ac55
A
7126 *
7127 * Returns: 0 Success
7128 * EEXIST
7129 * namei:???
7130 * vnode_authorize:???
7131 * vn_create:???
1c79356b 7132 */
1c79356b 7133/* ARGSUSED */
91447636 7134static int
fe8ab488
A
7135mkdir1at(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap, int fd,
7136 enum uio_seg segflg)
1c79356b 7137{
91447636 7138 vnode_t vp, dvp;
1c79356b 7139 int error;
91447636 7140 int update_flags = 0;
6d2010ae 7141 int batched;
1c79356b
A
7142 struct nameidata nd;
7143
91447636 7144 AUDIT_ARG(mode, vap->va_mode);
fe8ab488 7145 NDINIT(&nd, CREATE, OP_MKDIR, LOCKPARENT | AUDITVNPATH1, segflg,
6d2010ae 7146 path, ctx);
9bccf70c 7147 nd.ni_cnd.cn_flags |= WILLBEDIR;
6d2010ae
A
7148 nd.ni_flag = NAMEI_COMPOUNDMKDIR;
7149
7150continue_lookup:
fe8ab488 7151 error = nameiat(&nd, fd);
55e303ae 7152 if (error)
1c79356b 7153 return (error);
91447636 7154 dvp = nd.ni_dvp;
1c79356b 7155 vp = nd.ni_vp;
55e303ae 7156
fe8ab488
A
7157 if (vp != NULL) {
7158 error = EEXIST;
7159 goto out;
7160 }
7161
6d2010ae 7162 batched = vnode_compound_mkdir_available(dvp);
2d21ac55
A
7163
7164 VATTR_SET(vap, va_type, VDIR);
fe8ab488 7165
6d2010ae
A
7166 /*
7167 * XXX
7168 * Don't authorize in VFS for compound VNOP.... mkdir -p today assumes that it will
7169 * only get EXISTS or EISDIR for existing path components, and not that it could see
7170 * EACCESS/EPERM--so if we authorize for mkdir on "/" for "mkdir -p /tmp/foo/bar/baz"
7171 * it will fail in a spurious manner. Need to figure out if this is valid behavior.
7172 */
fe8ab488 7173 if ((error = vn_authorize_mkdir(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0) {
6d2010ae
A
7174 if (error == EACCES || error == EPERM) {
7175 int error2;
7176
7177 nameidone(&nd);
7178 vnode_put(dvp);
7179 dvp = NULLVP;
7180
fe8ab488
A
7181 /*
7182 * Try a lookup without "NAMEI_COMPOUNDVNOP" to make sure we return EEXIST
6d2010ae
A
7183 * rather than EACCESS if the target exists.
7184 */
fe8ab488
A
7185 NDINIT(&nd, LOOKUP, OP_MKDIR, AUDITVNPATH1, segflg,
7186 path, ctx);
7187 error2 = nameiat(&nd, fd);
6d2010ae
A
7188 if (error2) {
7189 goto out;
7190 } else {
7191 vp = nd.ni_vp;
7192 error = EEXIST;
7193 goto out;
7194 }
7195 }
7196
2d21ac55 7197 goto out;
6d2010ae
A
7198 }
7199
7200 /*
fe8ab488 7201 * make the directory
6d2010ae 7202 */
fe8ab488 7203 if ((error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx)) != 0) {
6d2010ae
A
7204 if (error == EKEEPLOOKING) {
7205 nd.ni_vp = vp;
7206 goto continue_lookup;
7207 }
2d21ac55 7208
fe8ab488 7209 goto out;
6d2010ae 7210 }
fe8ab488 7211
91447636
A
7212 // Make sure the name & parent pointers are hooked up
7213 if (vp->v_name == NULL)
7214 update_flags |= VNODE_UPDATE_NAME;
7215 if (vp->v_parent == NULLVP)
7216 update_flags |= VNODE_UPDATE_PARENT;
7217
7218 if (update_flags)
7219 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
55e303ae 7220
2d21ac55 7221#if CONFIG_FSE
91447636 7222 add_fsevent(FSE_CREATE_DIR, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE);
2d21ac55 7223#endif
91447636
A
7224
7225out:
7226 /*
7227 * nameidone has to happen before we vnode_put(dvp)
7228 * since it may need to release the fs_nodelock on the dvp
7229 */
7230 nameidone(&nd);
7231
7232 if (vp)
6d2010ae 7233 vnode_put(vp);
fe8ab488 7234 if (dvp)
6d2010ae 7235 vnode_put(dvp);
55e303ae 7236
1c79356b
A
7237 return (error);
7238}
7239
b0d623f7
A
7240/*
7241 * mkdir_extended: Create a directory; with extended security (ACL).
7242 *
7243 * Parameters: p Process requesting to create the directory
7244 * uap User argument descriptor (see below)
fe8ab488 7245 * retval (ignored)
b0d623f7
A
7246 *
7247 * Indirect: uap->path Path of directory to create
7248 * uap->mode Access permissions to set
7249 * uap->xsecurity ACL to set
fe8ab488 7250 *
b0d623f7
A
7251 * Returns: 0 Success
7252 * !0 Not success
7253 *
7254 */
1c79356b 7255int
b0d623f7 7256mkdir_extended(proc_t p, struct mkdir_extended_args *uap, __unused int32_t *retval)
1c79356b 7257{
91447636
A
7258 int ciferror;
7259 kauth_filesec_t xsecdst;
7260 struct vnode_attr va;
7261
b0d623f7
A
7262 AUDIT_ARG(owner, uap->uid, uap->gid);
7263
91447636
A
7264 xsecdst = NULL;
7265 if ((uap->xsecurity != USER_ADDR_NULL) &&
7266 ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
7267 return ciferror;
7268
91447636 7269 VATTR_INIT(&va);
fe8ab488 7270 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
91447636
A
7271 if (xsecdst != NULL)
7272 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
7273
fe8ab488
A
7274 ciferror = mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD,
7275 UIO_USERSPACE);
91447636
A
7276 if (xsecdst != NULL)
7277 kauth_filesec_free(xsecdst);
7278 return ciferror;
1c79356b
A
7279}
7280
1c79356b 7281int
b0d623f7 7282mkdir(proc_t p, struct mkdir_args *uap, __unused int32_t *retval)
1c79356b 7283{
91447636 7284 struct vnode_attr va;
1c79356b 7285
91447636 7286 VATTR_INIT(&va);
fe8ab488 7287 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
e5568f75 7288
fe8ab488
A
7289 return (mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD,
7290 UIO_USERSPACE));
91447636 7291}
1c79356b 7292
91447636 7293int
fe8ab488
A
7294mkdirat(proc_t p, struct mkdirat_args *uap, __unused int32_t *retval)
7295{
7296 struct vnode_attr va;
7297
7298 VATTR_INIT(&va);
7299 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
7300
7301 return(mkdir1at(vfs_context_current(), uap->path, &va, uap->fd,
7302 UIO_USERSPACE));
7303}
7304
7305static int
7306rmdirat_internal(vfs_context_t ctx, int fd, user_addr_t dirpath,
7307 enum uio_seg segflg)
1c79356b 7308{
2d21ac55 7309 vnode_t vp, dvp;
91447636
A
7310 int error;
7311 struct nameidata nd;
6d2010ae
A
7312 char *path = NULL;
7313 int len=0;
7314 int has_listeners = 0;
7315 int need_event = 0;
7316 int truncated = 0;
6d2010ae
A
7317#if CONFIG_FSE
7318 struct vnode_attr va;
7319#endif /* CONFIG_FSE */
7320 struct vnode_attr *vap = NULL;
7321 int batched;
91447636 7322
b0d623f7 7323 int restart_flag;
91447636 7324
fe8ab488 7325 /*
2d21ac55
A
7326 * This loop exists to restart rmdir in the unlikely case that two
7327 * processes are simultaneously trying to remove the same directory
7328 * containing orphaned appleDouble files.
7329 */
7330 do {
6d2010ae 7331 NDINIT(&nd, DELETE, OP_RMDIR, LOCKPARENT | AUDITVNPATH1,
fe8ab488 7332 segflg, dirpath, ctx);
6d2010ae
A
7333 nd.ni_flag = NAMEI_COMPOUNDRMDIR;
7334continue_lookup:
2d21ac55 7335 restart_flag = 0;
6d2010ae 7336 vap = NULL;
2d21ac55 7337
fe8ab488 7338 error = nameiat(&nd, fd);
2d21ac55
A
7339 if (error)
7340 return (error);
7341
7342 dvp = nd.ni_dvp;
7343 vp = nd.ni_vp;
7344
6d2010ae
A
7345 if (vp) {
7346 batched = vnode_compound_rmdir_available(vp);
2d21ac55 7347
6d2010ae
A
7348 if (vp->v_flag & VROOT) {
7349 /*
7350 * The root of a mounted filesystem cannot be deleted.
7351 */
7352 error = EBUSY;
7353 goto out;
7354 }
1c79356b 7355
2d21ac55 7356 /*
6d2010ae
A
7357 * Removed a check here; we used to abort if vp's vid
7358 * was not the same as what we'd seen the last time around.
7359 * I do not think that check was valid, because if we retry
7360 * and all dirents are gone, the directory could legitimately
7361 * be recycled but still be present in a situation where we would
fe8ab488 7362 * have had permission to delete. Therefore, we won't make
6d2010ae
A
7363 * an effort to preserve that check now that we may not have a
7364 * vp here.
2d21ac55 7365 */
6d2010ae
A
7366
7367 if (!batched) {
7368 error = vn_authorize_rmdir(dvp, vp, &nd.ni_cnd, ctx, NULL);
7369 if (error) {
7370 goto out;
7371 }
7372 }
2d21ac55 7373 } else {
6d2010ae
A
7374 batched = 1;
7375
7376 if (!vnode_compound_rmdir_available(dvp)) {
7377 panic("No error, but no compound rmdir?");
7378 }
91447636 7379 }
6d2010ae 7380
2d21ac55 7381#if CONFIG_FSE
6d2010ae 7382 fse_info finfo;
b0d623f7 7383
6d2010ae
A
7384 need_event = need_fsevent(FSE_DELETE, dvp);
7385 if (need_event) {
7386 if (!batched) {
2d21ac55 7387 get_fse_info(vp, &finfo, ctx);
6d2010ae
A
7388 } else {
7389 error = vfs_get_notify_attributes(&va);
7390 if (error) {
7391 goto out;
7392 }
7393
7394 vap = &va;
2d21ac55 7395 }
6d2010ae 7396 }
2d21ac55 7397#endif
6d2010ae
A
7398 has_listeners = kauth_authorize_fileop_has_listeners();
7399 if (need_event || has_listeners) {
7400 if (path == NULL) {
2d21ac55
A
7401 GET_PATH(path);
7402 if (path == NULL) {
7403 error = ENOMEM;
7404 goto out;
7405 }
6d2010ae 7406 }
b0d623f7 7407
6d2010ae 7408 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated);
b0d623f7 7409#if CONFIG_FSE
6d2010ae
A
7410 if (truncated) {
7411 finfo.mode |= FSE_TRUNCATED_PATH;
2d21ac55 7412 }
6d2010ae
A
7413#endif
7414 }
91447636 7415
6d2010ae
A
7416 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
7417 nd.ni_vp = vp;
7418 if (vp == NULLVP) {
7419 /* Couldn't find a vnode */
7420 goto out;
7421 }
2d21ac55 7422
6d2010ae
A
7423 if (error == EKEEPLOOKING) {
7424 goto continue_lookup;
7425 }
39236c6e 7426#if CONFIG_APPLEDOUBLE
6d2010ae
A
7427 /*
7428 * Special case to remove orphaned AppleDouble
7429 * files. I don't like putting this in the kernel,
7430 * but carbon does not like putting this in carbon either,
7431 * so here we are.
7432 */
7433 if (error == ENOTEMPTY) {
7434 error = rmdir_remove_orphaned_appleDouble(vp, ctx, &restart_flag);
7435 if (error == EBUSY) {
7436 goto out;
2d21ac55
A
7437 }
7438
6d2010ae 7439
2d21ac55 7440 /*
fe8ab488 7441 * Assuming everything went well, we will try the RMDIR again
2d21ac55 7442 */
6d2010ae
A
7443 if (!error)
7444 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
7445 }
39236c6e 7446#endif /* CONFIG_APPLEDOUBLE */
6d2010ae 7447 /*
fe8ab488 7448 * Call out to allow 3rd party notification of delete.
6d2010ae
A
7449 * Ignore result of kauth_authorize_fileop call.
7450 */
7451 if (!error) {
7452 if (has_listeners) {
fe8ab488
A
7453 kauth_authorize_fileop(vfs_context_ucred(ctx),
7454 KAUTH_FILEOP_DELETE,
6d2010ae
A
7455 (uintptr_t)vp,
7456 (uintptr_t)path);
7457 }
7458
7459 if (vp->v_flag & VISHARDLINK) {
7460 // see the comment in unlink1() about why we update
7461 // the parent of a hard link when it is removed
7462 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
7463 }
2d21ac55
A
7464
7465#if CONFIG_FSE
6d2010ae
A
7466 if (need_event) {
7467 if (vap) {
7468 vnode_get_fse_info_from_vap(vp, &finfo, vap);
2d21ac55 7469 }
6d2010ae
A
7470 add_fsevent(FSE_DELETE, ctx,
7471 FSE_ARG_STRING, len, path,
7472 FSE_ARG_FINFO, &finfo,
7473 FSE_ARG_DONE);
2d21ac55 7474 }
6d2010ae 7475#endif
2d21ac55
A
7476 }
7477
7478out:
6d2010ae
A
7479 if (path != NULL) {
7480 RELEASE_PATH(path);
7481 path = NULL;
7482 }
2d21ac55
A
7483 /*
7484 * nameidone has to happen before we vnode_put(dvp)
7485 * since it may need to release the fs_nodelock on the dvp
7486 */
7487 nameidone(&nd);
2d21ac55 7488 vnode_put(dvp);
6d2010ae 7489
fe8ab488 7490 if (vp)
6d2010ae 7491 vnode_put(vp);
2d21ac55
A
7492
7493 if (restart_flag == 0) {
7494 wakeup_one((caddr_t)vp);
7495 return (error);
7496 }
7497 tsleep(vp, PVFS, "rm AD", 1);
7498
7499 } while (restart_flag != 0);
91447636 7500
1c79356b 7501 return (error);
2d21ac55 7502
1c79356b 7503}
91447636 7504
fe8ab488
A
7505/*
7506 * Remove a directory file.
7507 */
7508/* ARGSUSED */
7509int
7510rmdir(__unused proc_t p, struct rmdir_args *uap, __unused int32_t *retval)
7511{
7512 return (rmdirat_internal(vfs_context_current(), AT_FDCWD,
7513 CAST_USER_ADDR_T(uap->path), UIO_USERSPACE));
7514}
7515
2d21ac55
A
7516/* Get direntry length padded to 8 byte alignment */
7517#define DIRENT64_LEN(namlen) \
7518 ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
7519
fe8ab488 7520errno_t
2d21ac55
A
7521vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag,
7522 int *numdirent, vfs_context_t ctxp)
7523{
7524 /* Check if fs natively supports VNODE_READDIR_EXTENDED */
6d2010ae
A
7525 if ((vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED) &&
7526 ((vp->v_mount->mnt_kern_flag & MNTK_DENY_READDIREXT) == 0)) {
2d21ac55
A
7527 return VNOP_READDIR(vp, uio, flags, eofflag, numdirent, ctxp);
7528 } else {
7529 size_t bufsize;
7530 void * bufptr;
7531 uio_t auio;
15129b1c 7532 struct direntry *entry64;
2d21ac55
A
7533 struct dirent *dep;
7534 int bytesread;
7535 int error;
7536
7537 /*
7538 * Our kernel buffer needs to be smaller since re-packing
7539 * will expand each dirent. The worse case (when the name
7540 * length is 3) corresponds to a struct direntry size of 32
7541 * bytes (8-byte aligned) and a struct dirent size of 12 bytes
7542 * (4-byte aligned). So having a buffer that is 3/8 the size
7543 * will prevent us from reading more than we can pack.
7544 *
7545 * Since this buffer is wired memory, we will limit the
7546 * buffer size to a maximum of 32K. We would really like to
7547 * use 32K in the MIN(), but we use magic number 87371 to
7548 * prevent uio_resid() * 3 / 8 from overflowing.
7549 */
316670eb 7550 bufsize = 3 * MIN((user_size_t)uio_resid(uio), 87371u) / 8;
2d21ac55 7551 MALLOC(bufptr, void *, bufsize, M_TEMP, M_WAITOK);
b0d623f7
A
7552 if (bufptr == NULL) {
7553 return ENOMEM;
7554 }
2d21ac55 7555
b0d623f7 7556 auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
2d21ac55
A
7557 uio_addiov(auio, (uintptr_t)bufptr, bufsize);
7558 auio->uio_offset = uio->uio_offset;
7559
7560 error = VNOP_READDIR(vp, auio, 0, eofflag, numdirent, ctxp);
7561
7562 dep = (struct dirent *)bufptr;
7563 bytesread = bufsize - uio_resid(auio);
7564
15129b1c
A
7565 MALLOC(entry64, struct direntry *, sizeof(struct direntry),
7566 M_TEMP, M_WAITOK);
2d21ac55
A
7567 /*
7568 * Convert all the entries and copy them out to user's buffer.
7569 */
7570 while (error == 0 && (char *)dep < ((char *)bufptr + bytesread)) {
15129b1c
A
7571 size_t enbufsize = DIRENT64_LEN(dep->d_namlen);
7572
7573 bzero(entry64, enbufsize);
2d21ac55 7574 /* Convert a dirent to a dirent64. */
15129b1c
A
7575 entry64->d_ino = dep->d_ino;
7576 entry64->d_seekoff = 0;
7577 entry64->d_reclen = enbufsize;
7578 entry64->d_namlen = dep->d_namlen;
7579 entry64->d_type = dep->d_type;
7580 bcopy(dep->d_name, entry64->d_name, dep->d_namlen + 1);
2d21ac55
A
7581
7582 /* Move to next entry. */
7583 dep = (struct dirent *)((char *)dep + dep->d_reclen);
7584
7585 /* Copy entry64 to user's buffer. */
15129b1c 7586 error = uiomove((caddr_t)entry64, entry64->d_reclen, uio);
2d21ac55
A
7587 }
7588
7589 /* Update the real offset using the offset we got from VNOP_READDIR. */
7590 if (error == 0) {
7591 uio->uio_offset = auio->uio_offset;
7592 }
7593 uio_free(auio);
7594 FREE(bufptr, M_TEMP);
15129b1c 7595 FREE(entry64, M_TEMP);
2d21ac55
A
7596 return (error);
7597 }
7598}
1c79356b 7599
39236c6e
A
7600#define GETDIRENTRIES_MAXBUFSIZE (128 * 1024 * 1024U)
7601
1c79356b
A
7602/*
7603 * Read a block of directory entries in a file system independent format.
7604 */
2d21ac55
A
7605static int
7606getdirentries_common(int fd, user_addr_t bufp, user_size_t bufsize, ssize_t *bytesread,
7607 off_t *offset, int flags)
1c79356b 7608{
2d21ac55
A
7609 vnode_t vp;
7610 struct vfs_context context = *vfs_context_current(); /* local copy */
91447636
A
7611 struct fileproc *fp;
7612 uio_t auio;
2d21ac55
A
7613 int spacetype = proc_is64bit(vfs_context_proc(&context)) ? UIO_USERSPACE64 : UIO_USERSPACE32;
7614 off_t loff;
7615 int error, eofflag, numdirent;
91447636 7616 char uio_buf[ UIO_SIZEOF(1) ];
1c79356b 7617
2d21ac55
A
7618 error = fp_getfvp(vfs_context_proc(&context), fd, &fp, &vp);
7619 if (error) {
1c79356b 7620 return (error);
2d21ac55 7621 }
91447636
A
7622 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
7623 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
7624 error = EBADF;
7625 goto out;
7626 }
2d21ac55 7627
39236c6e
A
7628 if (bufsize > GETDIRENTRIES_MAXBUFSIZE)
7629 bufsize = GETDIRENTRIES_MAXBUFSIZE;
7630
2d21ac55
A
7631#if CONFIG_MACF
7632 error = mac_file_check_change_offset(vfs_context_ucred(&context), fp->f_fglob);
7633 if (error)
7634 goto out;
7635#endif
91447636
A
7636 if ( (error = vnode_getwithref(vp)) ) {
7637 goto out;
7638 }
91447636 7639 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
55e303ae 7640
1c79356b 7641unionread:
91447636
A
7642 if (vp->v_type != VDIR) {
7643 (void)vnode_put(vp);
7644 error = EINVAL;
7645 goto out;
7646 }
2d21ac55
A
7647
7648#if CONFIG_MACF
7649 error = mac_vnode_check_readdir(&context, vp);
7650 if (error != 0) {
7651 (void)vnode_put(vp);
7652 goto out;
7653 }
7654#endif /* MAC */
91447636
A
7655
7656 loff = fp->f_fglob->fg_offset;
2d21ac55
A
7657 auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
7658 uio_addiov(auio, bufp, bufsize);
91447636 7659
2d21ac55
A
7660 if (flags & VNODE_READDIR_EXTENDED) {
7661 error = vnode_readdir64(vp, auio, flags, &eofflag, &numdirent, &context);
7662 fp->f_fglob->fg_offset = uio_offset(auio);
7663 } else {
7664 error = VNOP_READDIR(vp, auio, 0, &eofflag, &numdirent, &context);
7665 fp->f_fglob->fg_offset = uio_offset(auio);
7666 }
91447636
A
7667 if (error) {
7668 (void)vnode_put(vp);
7669 goto out;
7670 }
1c79356b 7671
2d21ac55
A
7672 if ((user_ssize_t)bufsize == uio_resid(auio)){
7673 if (union_dircheckp) {
7674 error = union_dircheckp(&vp, fp, &context);
7675 if (error == -1)
7676 goto unionread;
7677 if (error)
7678 goto out;
1c79356b
A
7679 }
7680
39236c6e 7681 if ((vp->v_mount->mnt_flag & MNT_UNION)) {
2d21ac55 7682 struct vnode *tvp = vp;
39236c6e
A
7683 if (lookup_traverse_union(tvp, &vp, &context) == 0) {
7684 vnode_ref(vp);
7685 fp->f_fglob->fg_data = (caddr_t) vp;
7686 fp->f_fglob->fg_offset = 0;
7687 vnode_rele(tvp);
7688 vnode_put(tvp);
7689 goto unionread;
7690 }
7691 vp = tvp;
1c79356b
A
7692 }
7693 }
2d21ac55 7694
91447636 7695 vnode_put(vp);
2d21ac55
A
7696 if (offset) {
7697 *offset = loff;
7698 }
b0d623f7 7699
2d21ac55 7700 *bytesread = bufsize - uio_resid(auio);
91447636
A
7701out:
7702 file_drop(fd);
1c79356b
A
7703 return (error);
7704}
7705
2d21ac55
A
7706
7707int
b0d623f7 7708getdirentries(__unused struct proc *p, struct getdirentries_args *uap, int32_t *retval)
2d21ac55
A
7709{
7710 off_t offset;
2d21ac55
A
7711 ssize_t bytesread;
7712 int error;
7713
7714 AUDIT_ARG(fd, uap->fd);
7715 error = getdirentries_common(uap->fd, uap->buf, uap->count, &bytesread, &offset, 0);
7716
7717 if (error == 0) {
b0d623f7
A
7718 if (proc_is64bit(p)) {
7719 user64_long_t base = (user64_long_t)offset;
7720 error = copyout((caddr_t)&base, uap->basep, sizeof(user64_long_t));
7721 } else {
7722 user32_long_t base = (user32_long_t)offset;
7723 error = copyout((caddr_t)&base, uap->basep, sizeof(user32_long_t));
7724 }
2d21ac55
A
7725 *retval = bytesread;
7726 }
7727 return (error);
7728}
7729
7730int
7731getdirentries64(__unused struct proc *p, struct getdirentries64_args *uap, user_ssize_t *retval)
7732{
7733 off_t offset;
7734 ssize_t bytesread;
7735 int error;
7736
7737 AUDIT_ARG(fd, uap->fd);
7738 error = getdirentries_common(uap->fd, uap->buf, uap->bufsize, &bytesread, &offset, VNODE_READDIR_EXTENDED);
7739
7740 if (error == 0) {
7741 *retval = bytesread;
7742 error = copyout((caddr_t)&offset, uap->position, sizeof(off_t));
7743 }
7744 return (error);
7745}
7746
7747
1c79356b
A
7748/*
7749 * Set the mode mask for creation of filesystem nodes.
b0d623f7 7750 * XXX implement xsecurity
1c79356b 7751 */
91447636
A
7752#define UMASK_NOXSECURITY (void *)1 /* leave existing xsecurity alone */
7753static int
b0d623f7 7754umask1(proc_t p, int newmask, __unused kauth_filesec_t fsec, int32_t *retval)
1c79356b 7755{
2d21ac55 7756 struct filedesc *fdp;
1c79356b 7757
91447636 7758 AUDIT_ARG(mask, newmask);
2d21ac55 7759 proc_fdlock(p);
1c79356b
A
7760 fdp = p->p_fd;
7761 *retval = fdp->fd_cmask;
91447636 7762 fdp->fd_cmask = newmask & ALLPERMS;
2d21ac55 7763 proc_fdunlock(p);
1c79356b
A
7764 return (0);
7765}
7766
b0d623f7
A
7767/*
7768 * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL).
7769 *
7770 * Parameters: p Process requesting to set the umask
7771 * uap User argument descriptor (see below)
7772 * retval umask of the process (parameter p)
7773 *
7774 * Indirect: uap->newmask umask to set
7775 * uap->xsecurity ACL to set
7776 *
7777 * Returns: 0 Success
7778 * !0 Not success
7779 *
7780 */
7781int
7782umask_extended(proc_t p, struct umask_extended_args *uap, int32_t *retval)
91447636
A
7783{
7784 int ciferror;
7785 kauth_filesec_t xsecdst;
7786
7787 xsecdst = KAUTH_FILESEC_NONE;
7788 if (uap->xsecurity != USER_ADDR_NULL) {
7789 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
7790 return ciferror;
7791 } else {
7792 xsecdst = KAUTH_FILESEC_NONE;
7793 }
7794
7795 ciferror = umask1(p, uap->newmask, xsecdst, retval);
7796
7797 if (xsecdst != KAUTH_FILESEC_NONE)
7798 kauth_filesec_free(xsecdst);
7799 return ciferror;
7800}
7801
7802int
b0d623f7 7803umask(proc_t p, struct umask_args *uap, int32_t *retval)
91447636
A
7804{
7805 return(umask1(p, uap->newmask, UMASK_NOXSECURITY, retval));
7806}
7807
1c79356b
A
7808/*
7809 * Void all references to file by ripping underlying filesystem
7810 * away from vnode.
7811 */
1c79356b
A
7812/* ARGSUSED */
7813int
b0d623f7 7814revoke(proc_t p, struct revoke_args *uap, __unused int32_t *retval)
1c79356b 7815{
2d21ac55 7816 vnode_t vp;
91447636 7817 struct vnode_attr va;
2d21ac55 7818 vfs_context_t ctx = vfs_context_current();
1c79356b
A
7819 int error;
7820 struct nameidata nd;
7821
6d2010ae
A
7822 NDINIT(&nd, LOOKUP, OP_REVOKE, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
7823 uap->path, ctx);
55e303ae
A
7824 error = namei(&nd);
7825 if (error)
1c79356b
A
7826 return (error);
7827 vp = nd.ni_vp;
91447636
A
7828
7829 nameidone(&nd);
7830
b0d623f7
A
7831 if (!(vnode_ischr(vp) || vnode_isblk(vp))) {
7832 error = ENOTSUP;
7833 goto out;
7834 }
7835
7836 if (vnode_isblk(vp) && vnode_ismountedon(vp)) {
7837 error = EBUSY;
7838 goto out;
7839 }
7840
2d21ac55
A
7841#if CONFIG_MACF
7842 error = mac_vnode_check_revoke(ctx, vp);
7843 if (error)
7844 goto out;
7845#endif
7846
91447636
A
7847 VATTR_INIT(&va);
7848 VATTR_WANTED(&va, va_uid);
2d21ac55 7849 if ((error = vnode_getattr(vp, &va, ctx)))
1c79356b 7850 goto out;
2d21ac55
A
7851 if (kauth_cred_getuid(vfs_context_ucred(ctx)) != va.va_uid &&
7852 (error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
1c79356b 7853 goto out;
b0d623f7 7854 if (vp->v_usecount > 0 || (vnode_isaliased(vp)))
2d21ac55 7855 VNOP_REVOKE(vp, REVOKEALL, ctx);
1c79356b 7856out:
91447636 7857 vnode_put(vp);
1c79356b
A
7858 return (error);
7859}
7860
0b4e3aa0 7861
1c79356b
A
7862/*
7863 * HFS/HFS PlUS SPECIFIC SYSTEM CALLS
9bccf70c 7864 * The following system calls are designed to support features
1c79356b
A
7865 * which are specific to the HFS & HFS Plus volume formats
7866 */
7867
9bccf70c 7868
1c79356b 7869/*
39236c6e
A
7870 * Obtain attribute information on objects in a directory while enumerating
7871 * the directory.
7872 */
1c79356b
A
7873/* ARGSUSED */
7874int
b0d623f7 7875getdirentriesattr (proc_t p, struct getdirentriesattr_args *uap, int32_t *retval)
1c79356b 7876{
2d21ac55 7877 vnode_t vp;
91447636
A
7878 struct fileproc *fp;
7879 uio_t auio = NULL;
7880 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
39236c6e 7881 uint32_t count, savecount;
2d21ac55 7882 uint32_t newstate;
91447636 7883 int error, eofflag;
2d21ac55 7884 uint32_t loff;
91447636 7885 struct attrlist attributelist;
2d21ac55 7886 vfs_context_t ctx = vfs_context_current();
91447636
A
7887 int fd = uap->fd;
7888 char uio_buf[ UIO_SIZEOF(1) ];
7889 kauth_action_t action;
7890
7891 AUDIT_ARG(fd, fd);
7892
7893 /* Get the attributes into kernel space */
2d21ac55 7894 if ((error = copyin(uap->alist, (caddr_t)&attributelist, sizeof(attributelist)))) {
91447636 7895 return(error);
2d21ac55
A
7896 }
7897 if ((error = copyin(uap->count, (caddr_t)&count, sizeof(count)))) {
7898 return(error);
7899 }
39236c6e 7900 savecount = count;
2d21ac55 7901 if ( (error = fp_getfvp(p, fd, &fp, &vp)) ) {
91447636 7902 return (error);
2d21ac55 7903 }
91447636
A
7904 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
7905 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
7906 error = EBADF;
7907 goto out;
7908 }
2d21ac55
A
7909
7910
7911#if CONFIG_MACF
7912 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
7913 fp->f_fglob);
7914 if (error)
7915 goto out;
7916#endif
7917
7918
91447636
A
7919 if ( (error = vnode_getwithref(vp)) )
7920 goto out;
55e303ae 7921
91447636 7922 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
1c79356b 7923
39236c6e 7924unionread:
91447636
A
7925 if (vp->v_type != VDIR) {
7926 (void)vnode_put(vp);
7927 error = EINVAL;
7928 goto out;
7929 }
55e303ae 7930
2d21ac55
A
7931#if CONFIG_MACF
7932 error = mac_vnode_check_readdir(ctx, vp);
7933 if (error != 0) {
7934 (void)vnode_put(vp);
7935 goto out;
7936 }
7937#endif /* MAC */
7938
91447636
A
7939 /* set up the uio structure which will contain the users return buffer */
7940 loff = fp->f_fglob->fg_offset;
39236c6e 7941 auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
91447636
A
7942 uio_addiov(auio, uap->buffer, uap->buffersize);
7943
91447636
A
7944 /*
7945 * If the only item requested is file names, we can let that past with
7946 * just LIST_DIRECTORY. If they want any other attributes, that means
7947 * they need SEARCH as well.
7948 */
7949 action = KAUTH_VNODE_LIST_DIRECTORY;
7950 if ((attributelist.commonattr & ~ATTR_CMN_NAME) ||
7951 attributelist.fileattr || attributelist.dirattr)
7952 action |= KAUTH_VNODE_SEARCH;
7953
2d21ac55 7954 if ((error = vnode_authorize(vp, NULL, action, ctx)) == 0) {
2d21ac55 7955
b0d623f7
A
7956 /* Believe it or not, uap->options only has 32-bits of valid
7957 * info, so truncate before extending again */
39236c6e
A
7958
7959 error = VNOP_READDIRATTR(vp, &attributelist, auio, count,
7960 (u_long)(uint32_t)uap->options, &newstate, &eofflag, &count, ctx);
7961 }
7962
7963 if (error) {
7964 (void) vnode_put(vp);
7965 goto out;
7966 }
7967
7968 /*
7969 * If we've got the last entry of a directory in a union mount
7970 * then reset the eofflag and pretend there's still more to come.
7971 * The next call will again set eofflag and the buffer will be empty,
7972 * so traverse to the underlying directory and do the directory
7973 * read there.
7974 */
7975 if (eofflag && vp->v_mount->mnt_flag & MNT_UNION) {
7976 if (uio_resid(auio) < (user_ssize_t) uap->buffersize) { // Got some entries
7977 eofflag = 0;
7978 } else { // Empty buffer
7979 struct vnode *tvp = vp;
7980 if (lookup_traverse_union(tvp, &vp, ctx) == 0) {
7981 vnode_ref_ext(vp, fp->f_fglob->fg_flag & O_EVTONLY, 0);
7982 fp->f_fglob->fg_data = (caddr_t) vp;
7983 fp->f_fglob->fg_offset = 0; // reset index for new dir
7984 count = savecount;
7985 vnode_rele_internal(tvp, fp->f_fglob->fg_flag & O_EVTONLY, 0, 0);
7986 vnode_put(tvp);
7987 goto unionread;
7988 }
7989 vp = tvp;
7990 }
2d21ac55 7991 }
39236c6e 7992
91447636 7993 (void)vnode_put(vp);
1c79356b 7994
91447636
A
7995 if (error)
7996 goto out;
7997 fp->f_fglob->fg_offset = uio_offset(auio); /* should be multiple of dirent, not variable */
1c79356b 7998
2d21ac55 7999 if ((error = copyout((caddr_t) &count, uap->count, sizeof(count))))
91447636 8000 goto out;
2d21ac55 8001 if ((error = copyout((caddr_t) &newstate, uap->newstate, sizeof(newstate))))
91447636 8002 goto out;
2d21ac55 8003 if ((error = copyout((caddr_t) &loff, uap->basep, sizeof(loff))))
91447636 8004 goto out;
1c79356b
A
8005
8006 *retval = eofflag; /* similar to getdirentries */
91447636 8007 error = 0;
2d21ac55 8008out:
91447636
A
8009 file_drop(fd);
8010 return (error); /* return error earlier, an retval of 0 or 1 now */
1c79356b 8011
39236c6e 8012} /* end of getdirentriesattr system call */
1c79356b
A
8013
8014/*
8015* Exchange data between two files
8016*/
8017
1c79356b
A
8018/* ARGSUSED */
8019int
b0d623f7 8020exchangedata (__unused proc_t p, struct exchangedata_args *uap, __unused int32_t *retval)
1c79356b
A
8021{
8022
8023 struct nameidata fnd, snd;
2d21ac55
A
8024 vfs_context_t ctx = vfs_context_current();
8025 vnode_t fvp;
8026 vnode_t svp;
8027 int error;
b0d623f7 8028 u_int32_t nameiflags;
91447636
A
8029 char *fpath = NULL;
8030 char *spath = NULL;
b0d623f7
A
8031 int flen=0, slen=0;
8032 int from_truncated=0, to_truncated=0;
8033#if CONFIG_FSE
91447636 8034 fse_info f_finfo, s_finfo;
b0d623f7
A
8035#endif
8036
1c79356b
A
8037 nameiflags = 0;
8038 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
8039
6d2010ae
A
8040 NDINIT(&fnd, LOOKUP, OP_EXCHANGEDATA, nameiflags | AUDITVNPATH1,
8041 UIO_USERSPACE, uap->path1, ctx);
1c79356b 8042
6d2010ae
A
8043 error = namei(&fnd);
8044 if (error)
8045 goto out2;
1c79356b 8046
91447636
A
8047 nameidone(&fnd);
8048 fvp = fnd.ni_vp;
1c79356b 8049
6d2010ae
A
8050 NDINIT(&snd, LOOKUP, OP_EXCHANGEDATA, CN_NBMOUNTLOOK | nameiflags | AUDITVNPATH2,
8051 UIO_USERSPACE, uap->path2, ctx);
1c79356b 8052
6d2010ae
A
8053 error = namei(&snd);
8054 if (error) {
91447636 8055 vnode_put(fvp);
55e303ae 8056 goto out2;
6d2010ae 8057 }
91447636 8058 nameidone(&snd);
1c79356b
A
8059 svp = snd.ni_vp;
8060
91447636
A
8061 /*
8062 * if the files are the same, return an inval error
8063 */
1c79356b 8064 if (svp == fvp) {
91447636
A
8065 error = EINVAL;
8066 goto out;
8067 }
1c79356b 8068
91447636
A
8069 /*
8070 * if the files are on different volumes, return an error
8071 */
8072 if (svp->v_mount != fvp->v_mount) {
8073 error = EXDEV;
8074 goto out;
8075 }
2d21ac55 8076
39236c6e
A
8077 /* If they're not files, return an error */
8078 if ( (vnode_isreg(fvp) == 0) || (vnode_isreg(svp) == 0)) {
db609669
A
8079 error = EINVAL;
8080 goto out;
8081 }
8082
2d21ac55
A
8083#if CONFIG_MACF
8084 error = mac_vnode_check_exchangedata(ctx,
8085 fvp, svp);
8086 if (error)
8087 goto out;
8088#endif
8089 if (((error = vnode_authorize(fvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0) ||
8090 ((error = vnode_authorize(svp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0))
91447636 8091 goto out;
1c79356b 8092
2d21ac55
A
8093 if (
8094#if CONFIG_FSE
8095 need_fsevent(FSE_EXCHANGE, fvp) ||
8096#endif
8097 kauth_authorize_fileop_has_listeners()) {
8098 GET_PATH(fpath);
8099 GET_PATH(spath);
8100 if (fpath == NULL || spath == NULL) {
8101 error = ENOMEM;
8102 goto out;
8103 }
b0d623f7
A
8104
8105 flen = safe_getpath(fvp, NULL, fpath, MAXPATHLEN, &from_truncated);
8106 slen = safe_getpath(svp, NULL, spath, MAXPATHLEN, &to_truncated);
8107
2d21ac55
A
8108#if CONFIG_FSE
8109 get_fse_info(fvp, &f_finfo, ctx);
8110 get_fse_info(svp, &s_finfo, ctx);
b0d623f7
A
8111 if (from_truncated || to_truncated) {
8112 // set it here since only the f_finfo gets reported up to user space
8113 f_finfo.mode |= FSE_TRUNCATED_PATH;
8114 }
2d21ac55 8115#endif
91447636 8116 }
1c79356b 8117 /* Ok, make the call */
2d21ac55 8118 error = VNOP_EXCHANGE(fvp, svp, 0, ctx);
55e303ae 8119
91447636 8120 if (error == 0) {
2d21ac55 8121 const char *tmpname;
91447636
A
8122
8123 if (fpath != NULL && spath != NULL) {
8124 /* call out to allow 3rd party notification of exchangedata.
8125 * Ignore result of kauth_authorize_fileop call.
8126 */
2d21ac55 8127 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_EXCHANGE,
91447636
A
8128 (uintptr_t)fpath, (uintptr_t)spath);
8129 }
8130 name_cache_lock();
8131
8132 tmpname = fvp->v_name;
8133 fvp->v_name = svp->v_name;
8134 svp->v_name = tmpname;
8135
8136 if (fvp->v_parent != svp->v_parent) {
2d21ac55 8137 vnode_t tmp;
91447636
A
8138
8139 tmp = fvp->v_parent;
8140 fvp->v_parent = svp->v_parent;
8141 svp->v_parent = tmp;
8142 }
8143 name_cache_unlock();
8144
2d21ac55 8145#if CONFIG_FSE
91447636 8146 if (fpath != NULL && spath != NULL) {
2d21ac55 8147 add_fsevent(FSE_EXCHANGE, ctx,
91447636
A
8148 FSE_ARG_STRING, flen, fpath,
8149 FSE_ARG_FINFO, &f_finfo,
8150 FSE_ARG_STRING, slen, spath,
8151 FSE_ARG_FINFO, &s_finfo,
8152 FSE_ARG_DONE);
8153 }
2d21ac55 8154#endif
55e303ae
A
8155 }
8156
1c79356b 8157out:
2d21ac55
A
8158 if (fpath != NULL)
8159 RELEASE_PATH(fpath);
8160 if (spath != NULL)
8161 RELEASE_PATH(spath);
91447636
A
8162 vnode_put(svp);
8163 vnode_put(fvp);
1c79356b 8164out2:
1c79356b 8165 return (error);
91447636 8166}
1c79356b 8167
39236c6e
A
8168/*
8169 * Return (in MB) the amount of freespace on the given vnode's volume.
8170 */
8171uint32_t freespace_mb(vnode_t vp);
8172
8173uint32_t
8174freespace_mb(vnode_t vp)
8175{
8176 vfs_update_vfsstat(vp->v_mount, vfs_context_current(), VFS_USER_EVENT);
8177 return (((uint64_t)vp->v_mount->mnt_vfsstat.f_bavail *
8178 vp->v_mount->mnt_vfsstat.f_bsize) >> 20);
8179}
8180
316670eb 8181#if CONFIG_SEARCHFS
1c79356b 8182
1c79356b
A
8183/* ARGSUSED */
8184
8185int
b0d623f7 8186searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval)
1c79356b 8187{
39236c6e
A
8188 vnode_t vp, tvp;
8189 int i, error=0;
1c79356b
A
8190 int fserror = 0;
8191 struct nameidata nd;
b0d623f7 8192 struct user64_fssearchblock searchblock;
1c79356b
A
8193 struct searchstate *state;
8194 struct attrlist *returnattrs;
b0d623f7 8195 struct timeval timelimit;
1c79356b 8196 void *searchparams1,*searchparams2;
91447636
A
8197 uio_t auio = NULL;
8198 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
b0d623f7 8199 uint32_t nummatches;
1c79356b 8200 int mallocsize;
b0d623f7 8201 uint32_t nameiflags;
2d21ac55 8202 vfs_context_t ctx = vfs_context_current();
91447636 8203 char uio_buf[ UIO_SIZEOF(1) ];
1c79356b 8204
39236c6e 8205 /* Start by copying in fsearchblock parameter list */
91447636 8206 if (IS_64BIT_PROCESS(p)) {
b0d623f7
A
8207 error = copyin(uap->searchblock, (caddr_t) &searchblock, sizeof(searchblock));
8208 timelimit.tv_sec = searchblock.timelimit.tv_sec;
8209 timelimit.tv_usec = searchblock.timelimit.tv_usec;
91447636
A
8210 }
8211 else {
b0d623f7
A
8212 struct user32_fssearchblock tmp_searchblock;
8213
91447636
A
8214 error = copyin(uap->searchblock, (caddr_t) &tmp_searchblock, sizeof(tmp_searchblock));
8215 // munge into 64-bit version
8216 searchblock.returnattrs = CAST_USER_ADDR_T(tmp_searchblock.returnattrs);
8217 searchblock.returnbuffer = CAST_USER_ADDR_T(tmp_searchblock.returnbuffer);
8218 searchblock.returnbuffersize = tmp_searchblock.returnbuffersize;
8219 searchblock.maxmatches = tmp_searchblock.maxmatches;
b0d623f7
A
8220 /*
8221 * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary
8222 * from a 32 bit long, and tv_usec is already a signed 32 bit int.
8223 */
8224 timelimit.tv_sec = (__darwin_time_t) tmp_searchblock.timelimit.tv_sec;
8225 timelimit.tv_usec = (__darwin_useconds_t) tmp_searchblock.timelimit.tv_usec;
91447636
A
8226 searchblock.searchparams1 = CAST_USER_ADDR_T(tmp_searchblock.searchparams1);
8227 searchblock.sizeofsearchparams1 = tmp_searchblock.sizeofsearchparams1;
8228 searchblock.searchparams2 = CAST_USER_ADDR_T(tmp_searchblock.searchparams2);
8229 searchblock.sizeofsearchparams2 = tmp_searchblock.sizeofsearchparams2;
8230 searchblock.searchattrs = tmp_searchblock.searchattrs;
8231 }
8232 if (error)
1c79356b
A
8233 return(error);
8234
a3d08fcd
A
8235 /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
8236 */
8237 if (searchblock.sizeofsearchparams1 > SEARCHFS_MAX_SEARCHPARMS ||
8238 searchblock.sizeofsearchparams2 > SEARCHFS_MAX_SEARCHPARMS)
8239 return(EINVAL);
91447636 8240
1c79356b
A
8241 /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
8242 /* It all has to do into local memory and it's not that big so we might as well put it all together. */
8243 /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
8244 /* block. */
fe8ab488
A
8245 /* */
8246 /* NOTE: we allocate an extra 8 bytes to account for the difference in size of the searchstate */
8247 /* due to the changes in rdar://problem/12438273. That way if a 3rd party file system */
8248 /* assumes the size is still 556 bytes it will continue to work */
8249
91447636 8250 mallocsize = searchblock.sizeofsearchparams1 + searchblock.sizeofsearchparams2 +
fe8ab488 8251 sizeof(struct attrlist) + sizeof(struct searchstate) + (2*sizeof(uint32_t));
1c79356b
A
8252
8253 MALLOC(searchparams1, void *, mallocsize, M_TEMP, M_WAITOK);
8254
8255 /* Now set up the various pointers to the correct place in our newly allocated memory */
8256
8257 searchparams2 = (void *) (((caddr_t) searchparams1) + searchblock.sizeofsearchparams1);
8258 returnattrs = (struct attrlist *) (((caddr_t) searchparams2) + searchblock.sizeofsearchparams2);
8259 state = (struct searchstate *) (((caddr_t) returnattrs) + sizeof (struct attrlist));
8260
8261 /* Now copy in the stuff given our local variables. */
8262
91447636 8263 if ((error = copyin(searchblock.searchparams1, searchparams1, searchblock.sizeofsearchparams1)))
1c79356b
A
8264 goto freeandexit;
8265
91447636 8266 if ((error = copyin(searchblock.searchparams2, searchparams2, searchblock.sizeofsearchparams2)))
1c79356b
A
8267 goto freeandexit;
8268
91447636 8269 if ((error = copyin(searchblock.returnattrs, (caddr_t) returnattrs, sizeof(struct attrlist))))
1c79356b
A
8270 goto freeandexit;
8271
91447636 8272 if ((error = copyin(uap->state, (caddr_t) state, sizeof(struct searchstate))))
1c79356b 8273 goto freeandexit;
1c79356b 8274
39236c6e
A
8275 /*
8276 * When searching a union mount, need to set the
8277 * start flag at the first call on each layer to
8278 * reset state for the new volume.
8279 */
8280 if (uap->options & SRCHFS_START)
8281 state->ss_union_layer = 0;
8282 else
8283 uap->options |= state->ss_union_flags;
8284 state->ss_union_flags = 0;
b0d623f7
A
8285
8286 /*
8287 * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter,
8288 * which is passed in with an attrreference_t, we need to inspect the buffer manually here.
8289 * The KPI does not provide us the ability to pass in the length of the buffers searchparams1
8290 * and searchparams2. To obviate the need for all searchfs-supporting filesystems to
8291 * validate the user-supplied data offset of the attrreference_t, we'll do it here.
8292 */
8293
8294 if (searchblock.searchattrs.commonattr & ATTR_CMN_NAME) {
8295 attrreference_t* string_ref;
8296 u_int32_t* start_length;
8297 user64_size_t param_length;
8298
8299 /* validate searchparams1 */
8300 param_length = searchblock.sizeofsearchparams1;
8301 /* skip the word that specifies length of the buffer */
8302 start_length= (u_int32_t*) searchparams1;
8303 start_length= start_length+1;
8304 string_ref= (attrreference_t*) start_length;
8305
8306 /* ensure no negative offsets or too big offsets */
8307 if (string_ref->attr_dataoffset < 0 ) {
8308 error = EINVAL;
8309 goto freeandexit;
8310 }
8311 if (string_ref->attr_length > MAXPATHLEN) {
8312 error = EINVAL;
8313 goto freeandexit;
8314 }
8315
8316 /* Check for pointer overflow in the string ref */
8317 if (((char*) string_ref + string_ref->attr_dataoffset) < (char*) string_ref) {
8318 error = EINVAL;
8319 goto freeandexit;
8320 }
8321
8322 if (((char*) string_ref + string_ref->attr_dataoffset) > ((char*)searchparams1 + param_length)) {
8323 error = EINVAL;
8324 goto freeandexit;
8325 }
8326 if (((char*)string_ref + string_ref->attr_dataoffset + string_ref->attr_length) > ((char*)searchparams1 + param_length)) {
8327 error = EINVAL;
8328 goto freeandexit;
8329 }
8330 }
8331
8332 /* set up the uio structure which will contain the users return buffer */
39236c6e
A
8333 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
8334 uio_addiov(auio, searchblock.returnbuffer, searchblock.returnbuffersize);
1c79356b 8335
91447636 8336 nameiflags = 0;
1c79356b 8337 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
6d2010ae
A
8338 NDINIT(&nd, LOOKUP, OP_SEARCHFS, nameiflags | AUDITVNPATH1,
8339 UIO_USERSPACE, uap->path, ctx);
1c79356b 8340
55e303ae
A
8341 error = namei(&nd);
8342 if (error)
1c79356b 8343 goto freeandexit;
39236c6e 8344 vp = nd.ni_vp;
91447636 8345 nameidone(&nd);
39236c6e
A
8346
8347 /*
8348 * Switch to the root vnode for the volume
8349 */
8350 error = VFS_ROOT(vnode_mount(vp), &tvp, ctx);
fe8ab488 8351 vnode_put(vp);
39236c6e
A
8352 if (error)
8353 goto freeandexit;
39236c6e
A
8354 vp = tvp;
8355
8356 /*
8357 * If it's a union mount, the path lookup takes
8358 * us to the top layer. But we may need to descend
8359 * to a lower layer. For non-union mounts the layer
8360 * is always zero.
8361 */
8362 for (i = 0; i < (int) state->ss_union_layer; i++) {
8363 if ((vp->v_mount->mnt_flag & MNT_UNION) == 0)
8364 break;
8365 tvp = vp;
8366 vp = vp->v_mount->mnt_vnodecovered;
8367 if (vp == NULL) {
fe8ab488 8368 vnode_put(tvp);
39236c6e
A
8369 error = ENOENT;
8370 goto freeandexit;
8371 }
8372 vnode_getwithref(vp);
8373 vnode_put(tvp);
8374 }
1c79356b 8375
6d2010ae
A
8376#if CONFIG_MACF
8377 error = mac_vnode_check_searchfs(ctx, vp, &searchblock.searchattrs);
8378 if (error) {
8379 vnode_put(vp);
8380 goto freeandexit;
8381 }
8382#endif
8383
1c79356b
A
8384
8385 /*
8386 * If searchblock.maxmatches == 0, then skip the search. This has happened
39236c6e 8387 * before and sometimes the underlying code doesnt deal with it well.
1c79356b
A
8388 */
8389 if (searchblock.maxmatches == 0) {
8390 nummatches = 0;
8391 goto saveandexit;
8392 }
8393
8394 /*
39236c6e
A
8395 * Allright, we have everything we need, so lets make that call.
8396 *
8397 * We keep special track of the return value from the file system:
8398 * EAGAIN is an acceptable error condition that shouldn't keep us
8399 * from copying out any results...
1c79356b
A
8400 */
8401
6d2010ae 8402 fserror = VNOP_SEARCHFS(vp,
39236c6e
A
8403 searchparams1,
8404 searchparams2,
8405 &searchblock.searchattrs,
8406 (u_long)searchblock.maxmatches,
8407 &timelimit,
8408 returnattrs,
8409 &nummatches,
8410 (u_long)uap->scriptcode,
8411 (u_long)uap->options,
8412 auio,
8413 (struct searchstate *) &state->ss_fsstate,
8414 ctx);
6d2010ae 8415
39236c6e
A
8416 /*
8417 * If it's a union mount we need to be called again
8418 * to search the mounted-on filesystem.
8419 */
8420 if ((vp->v_mount->mnt_flag & MNT_UNION) && fserror == 0) {
8421 state->ss_union_flags = SRCHFS_START;
8422 state->ss_union_layer++; // search next layer down
8423 fserror = EAGAIN;
8424 }
8425
6d2010ae
A
8426saveandexit:
8427
8428 vnode_put(vp);
8429
8430 /* Now copy out the stuff that needs copying out. That means the number of matches, the
8431 search state. Everything was already put into he return buffer by the vop call. */
8432
8433 if ((error = copyout((caddr_t) state, uap->state, sizeof(struct searchstate))) != 0)
8434 goto freeandexit;
8435
39236c6e 8436 if ((error = suulong(uap->nummatches, (uint64_t)nummatches)) != 0)
6d2010ae
A
8437 goto freeandexit;
8438
8439 error = fserror;
8440
8441freeandexit:
8442
8443 FREE(searchparams1,M_TEMP);
8444
8445 return(error);
8446
8447
8448} /* end of searchfs system call */
8449
316670eb
A
8450#else /* CONFIG_SEARCHFS */
8451
8452int
8453searchfs(__unused proc_t p, __unused struct searchfs_args *uap, __unused int32_t *retval)
8454{
8455 return (ENOTSUP);
8456}
8457
8458#endif /* CONFIG_SEARCHFS */
6d2010ae
A
8459
8460
8461lck_grp_attr_t * nspace_group_attr;
8462lck_attr_t * nspace_lock_attr;
8463lck_grp_t * nspace_mutex_group;
8464
8465lck_mtx_t nspace_handler_lock;
8466lck_mtx_t nspace_handler_exclusion_lock;
8467
8468time_t snapshot_timestamp=0;
8469int nspace_allow_virtual_devs=0;
8470
8471void nspace_handler_init(void);
8472
8473typedef struct nspace_item_info {
8474 struct vnode *vp;
8475 void *arg;
8476 uint64_t op;
8477 uint32_t vid;
8478 uint32_t flags;
8479 uint32_t token;
8480 uint32_t refcount;
8481} nspace_item_info;
8482
8483#define MAX_NSPACE_ITEMS 128
8484nspace_item_info nspace_items[MAX_NSPACE_ITEMS];
8485uint32_t nspace_item_idx=0; // also used as the sleep/wakeup rendezvous address
8486uint32_t nspace_token_id=0;
8487uint32_t nspace_handler_timeout = 15; // seconds
8488
8489#define NSPACE_ITEM_NEW 0x0001
8490#define NSPACE_ITEM_PROCESSING 0x0002
8491#define NSPACE_ITEM_DEAD 0x0004
8492#define NSPACE_ITEM_CANCELLED 0x0008
8493#define NSPACE_ITEM_DONE 0x0010
8494#define NSPACE_ITEM_RESET_TIMER 0x0020
8495
8496#define NSPACE_ITEM_NSPACE_EVENT 0x0040
8497#define NSPACE_ITEM_SNAPSHOT_EVENT 0x0080
6d2010ae 8498
fe8ab488 8499#define NSPACE_ITEM_ALL_EVENT_TYPES (NSPACE_ITEM_NSPACE_EVENT | NSPACE_ITEM_SNAPSHOT_EVENT)
6d2010ae
A
8500
8501//#pragma optimization_level 0
8502
8503typedef enum {
8504 NSPACE_HANDLER_NSPACE = 0,
8505 NSPACE_HANDLER_SNAPSHOT = 1,
6d2010ae
A
8506
8507 NSPACE_HANDLER_COUNT,
8508} nspace_type_t;
8509
8510typedef struct {
8511 uint64_t handler_tid;
8512 struct proc *handler_proc;
8513 int handler_busy;
8514} nspace_handler_t;
8515
8516nspace_handler_t nspace_handlers[NSPACE_HANDLER_COUNT];
8517
39236c6e
A
8518/* namespace fsctl functions */
8519static int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type);
8520static int nspace_item_flags_for_type(nspace_type_t nspace_type);
8521static int nspace_open_flags_for_type(nspace_type_t nspace_type);
8522static nspace_type_t nspace_type_for_op(uint64_t op);
8523static int nspace_is_special_process(struct proc *proc);
8524static int vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx);
8525static int wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type);
8526static int validate_namespace_args (int is64bit, int size);
8527static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data);
8528
8529
6d2010ae
A
8530static inline int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type)
8531{
8532 switch(nspace_type) {
8533 case NSPACE_HANDLER_NSPACE:
8534 return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_NSPACE_EVENT;
8535 case NSPACE_HANDLER_SNAPSHOT:
8536 return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_SNAPSHOT_EVENT;
6d2010ae
A
8537 default:
8538 printf("nspace_flags_matches_handler: invalid type %u\n", (int)nspace_type);
8539 return 0;
8540 }
8541}
8542
8543static inline int nspace_item_flags_for_type(nspace_type_t nspace_type)
8544{
8545 switch(nspace_type) {
8546 case NSPACE_HANDLER_NSPACE:
8547 return NSPACE_ITEM_NSPACE_EVENT;
8548 case NSPACE_HANDLER_SNAPSHOT:
8549 return NSPACE_ITEM_SNAPSHOT_EVENT;
6d2010ae
A
8550 default:
8551 printf("nspace_item_flags_for_type: invalid type %u\n", (int)nspace_type);
8552 return 0;
8553 }
8554}
8555
8556static inline int nspace_open_flags_for_type(nspace_type_t nspace_type)
8557{
8558 switch(nspace_type) {
8559 case NSPACE_HANDLER_NSPACE:
8560 return FREAD | FWRITE | O_EVTONLY;
8561 case NSPACE_HANDLER_SNAPSHOT:
6d2010ae
A
8562 return FREAD | O_EVTONLY;
8563 default:
8564 printf("nspace_open_flags_for_type: invalid type %u\n", (int)nspace_type);
8565 return 0;
8566 }
8567}
8568
8569static inline nspace_type_t nspace_type_for_op(uint64_t op)
8570{
8571 switch(op & NAMESPACE_HANDLER_EVENT_TYPE_MASK) {
8572 case NAMESPACE_HANDLER_NSPACE_EVENT:
8573 return NSPACE_HANDLER_NSPACE;
8574 case NAMESPACE_HANDLER_SNAPSHOT_EVENT:
8575 return NSPACE_HANDLER_SNAPSHOT;
6d2010ae
A
8576 default:
8577 printf("nspace_type_for_op: invalid op mask %llx\n", op & NAMESPACE_HANDLER_EVENT_TYPE_MASK);
8578 return NSPACE_HANDLER_NSPACE;
8579 }
8580}
8581
8582static inline int nspace_is_special_process(struct proc *proc)
8583{
8584 int i;
8585 for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
8586 if (proc == nspace_handlers[i].handler_proc)
8587 return 1;
8588 }
8589 return 0;
8590}
8591
8592void
8593nspace_handler_init(void)
8594{
8595 nspace_lock_attr = lck_attr_alloc_init();
8596 nspace_group_attr = lck_grp_attr_alloc_init();
8597 nspace_mutex_group = lck_grp_alloc_init("nspace-mutex", nspace_group_attr);
8598 lck_mtx_init(&nspace_handler_lock, nspace_mutex_group, nspace_lock_attr);
8599 lck_mtx_init(&nspace_handler_exclusion_lock, nspace_mutex_group, nspace_lock_attr);
8600 memset(&nspace_items[0], 0, sizeof(nspace_items));
8601}
8602
8603void
8604nspace_proc_exit(struct proc *p)
8605{
8606 int i, event_mask = 0;
8607
8608 for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
8609 if (p == nspace_handlers[i].handler_proc) {
8610 event_mask |= nspace_item_flags_for_type(i);
8611 nspace_handlers[i].handler_tid = 0;
8612 nspace_handlers[i].handler_proc = NULL;
8613 }
8614 }
8615
8616 if (event_mask == 0) {
8617 return;
8618 }
8619
8620 if (event_mask & NSPACE_ITEM_SNAPSHOT_EVENT) {
8621 // if this process was the snapshot handler, zero snapshot_timeout
8622 snapshot_timestamp = 0;
8623 }
8624
8625 //
8626 // unblock anyone that's waiting for the handler that died
8627 //
8628 lck_mtx_lock(&nspace_handler_lock);
8629 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8630 if (nspace_items[i].flags & (NSPACE_ITEM_NEW | NSPACE_ITEM_PROCESSING)) {
8631
8632 if ( nspace_items[i].flags & event_mask ) {
8633
8634 if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
8635 vnode_lock_spin(nspace_items[i].vp);
8636 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
8637 vnode_unlock(nspace_items[i].vp);
8638 }
8639 nspace_items[i].vp = NULL;
8640 nspace_items[i].vid = 0;
8641 nspace_items[i].flags = NSPACE_ITEM_DONE;
8642 nspace_items[i].token = 0;
8643
8644 wakeup((caddr_t)&(nspace_items[i].vp));
8645 }
8646 }
8647 }
8648
8649 wakeup((caddr_t)&nspace_item_idx);
8650 lck_mtx_unlock(&nspace_handler_lock);
8651}
8652
8653
8654int
8655resolve_nspace_item(struct vnode *vp, uint64_t op)
8656{
8657 return resolve_nspace_item_ext(vp, op, NULL);
8658}
8659
8660int
8661resolve_nspace_item_ext(struct vnode *vp, uint64_t op, void *arg)
8662{
8663 int i, error, keep_waiting;
8664 struct timespec ts;
8665 nspace_type_t nspace_type = nspace_type_for_op(op);
8666
8667 // only allow namespace events on regular files, directories and symlinks.
8668 if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
8669 return 0;
8670 }
8671
8672 //
8673 // if this is a snapshot event and the vnode is on a
8674 // disk image just pretend nothing happened since any
8675 // change to the disk image will cause the disk image
8676 // itself to get backed up and this avoids multi-way
8677 // deadlocks between the snapshot handler and the ever
8678 // popular diskimages-helper process. the variable
8679 // nspace_allow_virtual_devs allows this behavior to
8680 // be overridden (for use by the Mobile TimeMachine
8681 // testing infrastructure which uses disk images)
8682 //
8683 if ( (op & NAMESPACE_HANDLER_SNAPSHOT_EVENT)
8684 && (vp->v_mount != NULL)
8685 && (vp->v_mount->mnt_kern_flag & MNTK_VIRTUALDEV)
8686 && !nspace_allow_virtual_devs) {
8687
8688 return 0;
8689 }
8690
8691 // if (thread_tid(current_thread()) == namespace_handler_tid) {
8692 if (nspace_handlers[nspace_type].handler_proc == NULL) {
8693 return 0;
8694 }
8695
8696 if (nspace_is_special_process(current_proc())) {
8697 return EDEADLK;
8698 }
8699
8700 lck_mtx_lock(&nspace_handler_lock);
8701
8702retry:
8703 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8704 if (vp == nspace_items[i].vp && op == nspace_items[i].op) {
8705 break;
8706 }
8707 }
8708
8709 if (i >= MAX_NSPACE_ITEMS) {
8710 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8711 if (nspace_items[i].flags == 0) {
8712 break;
8713 }
8714 }
8715 } else {
8716 nspace_items[i].refcount++;
8717 }
8718
8719 if (i >= MAX_NSPACE_ITEMS) {
8720 ts.tv_sec = nspace_handler_timeout;
8721 ts.tv_nsec = 0;
8722
8723 error = msleep((caddr_t)&nspace_token_id, &nspace_handler_lock, PVFS|PCATCH, "nspace-no-space", &ts);
8724 if (error == 0) {
8725 // an entry got free'd up, go see if we can get a slot
8726 goto retry;
8727 } else {
8728 lck_mtx_unlock(&nspace_handler_lock);
8729 return error;
8730 }
8731 }
8732
8733 //
8734 // if it didn't already exist, add it. if it did exist
8735 // we'll get woken up when someone does a wakeup() on
8736 // the slot in the nspace_items table.
8737 //
8738 if (vp != nspace_items[i].vp) {
8739 nspace_items[i].vp = vp;
39236c6e 8740 nspace_items[i].arg = (arg == NSPACE_REARM_NO_ARG) ? NULL : arg; // arg is {NULL, true, uio *} - only pass uio thru to the user
6d2010ae
A
8741 nspace_items[i].op = op;
8742 nspace_items[i].vid = vnode_vid(vp);
8743 nspace_items[i].flags = NSPACE_ITEM_NEW;
8744 nspace_items[i].flags |= nspace_item_flags_for_type(nspace_type);
8745 if (nspace_items[i].flags & NSPACE_ITEM_SNAPSHOT_EVENT) {
8746 if (arg) {
8747 vnode_lock_spin(vp);
8748 vp->v_flag |= VNEEDSSNAPSHOT;
8749 vnode_unlock(vp);
8750 }
8751 }
8752
8753 nspace_items[i].token = 0;
8754 nspace_items[i].refcount = 1;
8755
8756 wakeup((caddr_t)&nspace_item_idx);
8757 }
8758
8759 //
8760 // Now go to sleep until the handler does a wakeup on this
8761 // slot in the nspace_items table (or we timeout).
8762 //
8763 keep_waiting = 1;
8764 while(keep_waiting) {
8765 ts.tv_sec = nspace_handler_timeout;
8766 ts.tv_nsec = 0;
8767 error = msleep((caddr_t)&(nspace_items[i].vp), &nspace_handler_lock, PVFS|PCATCH, "namespace-done", &ts);
8768
8769 if (nspace_items[i].flags & NSPACE_ITEM_DONE) {
8770 error = 0;
8771 } else if (nspace_items[i].flags & NSPACE_ITEM_CANCELLED) {
8772 error = nspace_items[i].token;
8773 } else if (error == EWOULDBLOCK || error == ETIMEDOUT) {
8774 if (nspace_items[i].flags & NSPACE_ITEM_RESET_TIMER) {
8775 nspace_items[i].flags &= ~NSPACE_ITEM_RESET_TIMER;
8776 continue;
8777 } else {
8778 error = ETIMEDOUT;
8779 }
8780 } else if (error == 0) {
8781 // hmmm, why did we get woken up?
8782 printf("woken up for token %d but it's not done, cancelled or timedout and error == 0.\n",
8783 nspace_items[i].token);
8784 }
8785
8786 if (--nspace_items[i].refcount == 0) {
8787 nspace_items[i].vp = NULL; // clear this so that no one will match on it again
8788 nspace_items[i].arg = NULL;
8789 nspace_items[i].token = 0; // clear this so that the handler will not find it anymore
8790 nspace_items[i].flags = 0; // this clears it for re-use
8791 }
8792 wakeup(&nspace_token_id);
8793 keep_waiting = 0;
8794 }
8795
8796 lck_mtx_unlock(&nspace_handler_lock);
8797
8798 return error;
8799}
8800
8801
8802int
8803get_nspace_item_status(struct vnode *vp, int32_t *status)
8804{
8805 int i;
8806
8807 lck_mtx_lock(&nspace_handler_lock);
8808 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8809 if (nspace_items[i].vp == vp) {
8810 break;
8811 }
8812 }
8813
8814 if (i >= MAX_NSPACE_ITEMS) {
8815 lck_mtx_unlock(&nspace_handler_lock);
8816 return ENOENT;
8817 }
8818
8819 *status = nspace_items[i].flags;
8820 lck_mtx_unlock(&nspace_handler_lock);
8821 return 0;
8822}
8823
8824
8825#if 0
8826static int
8827build_volfs_path(struct vnode *vp, char *path, int *len)
8828{
8829 struct vnode_attr va;
8830 int ret;
8831
8832 VATTR_INIT(&va);
8833 VATTR_WANTED(&va, va_fsid);
8834 VATTR_WANTED(&va, va_fileid);
8835
8836 if (vnode_getattr(vp, &va, vfs_context_kernel()) != 0) {
8837 *len = snprintf(path, *len, "/non/existent/path/because/vnode_getattr/failed") + 1;
8838 ret = -1;
8839 } else {
8840 *len = snprintf(path, *len, "/.vol/%d/%lld", (dev_t)va.va_fsid, va.va_fileid) + 1;
8841 ret = 0;
8842 }
8843
8844 return ret;
8845}
8846#endif
8847
8848//
8849// Note: this function does NOT check permissions on all of the
8850// parent directories leading to this vnode. It should only be
8851// called on behalf of a root process. Otherwise a process may
8852// get access to a file because the file itself is readable even
8853// though its parent directories would prevent access.
8854//
8855static int
8856vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx)
8857{
8858 int error, action;
8859
8860 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
8861 return error;
8862 }
8863
8864#if CONFIG_MACF
8865 error = mac_vnode_check_open(ctx, vp, fmode);
8866 if (error)
8867 return error;
8868#endif
1c79356b 8869
6d2010ae
A
8870 /* compute action to be authorized */
8871 action = 0;
8872 if (fmode & FREAD) {
8873 action |= KAUTH_VNODE_READ_DATA;
8874 }
8875 if (fmode & (FWRITE | O_TRUNC)) {
8876 /*
8877 * If we are writing, appending, and not truncating,
8878 * indicate that we are appending so that if the
8879 * UF_APPEND or SF_APPEND bits are set, we do not deny
8880 * the open.
8881 */
8882 if ((fmode & O_APPEND) && !(fmode & O_TRUNC)) {
8883 action |= KAUTH_VNODE_APPEND_DATA;
8884 } else {
8885 action |= KAUTH_VNODE_WRITE_DATA;
8886 }
8887 }
1c79356b 8888
6d2010ae
A
8889 if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)
8890 return error;
8891
1c79356b 8892
6d2010ae
A
8893 //
8894 // if the vnode is tagged VOPENEVT and the current process
8895 // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY
8896 // flag to the open mode so that this open won't count against
8897 // the vnode when carbon delete() does a vnode_isinuse() to see
8898 // if a file is currently in use. this allows spotlight
8899 // importers to not interfere with carbon apps that depend on
8900 // the no-delete-if-busy semantics of carbon delete().
8901 //
8902 if ((vp->v_flag & VOPENEVT) && (current_proc()->p_flag & P_CHECKOPENEVT)) {
8903 fmode |= O_EVTONLY;
8904 }
1c79356b 8905
6d2010ae
A
8906 if ( (error = VNOP_OPEN(vp, fmode, ctx)) ) {
8907 return error;
8908 }
8909 if ( (error = vnode_ref_ext(vp, fmode, 0)) ) {
8910 VNOP_CLOSE(vp, fmode, ctx);
8911 return error;
8912 }
1c79356b 8913
4b17d6b6 8914 /* Call out to allow 3rd party notification of open.
6d2010ae
A
8915 * Ignore result of kauth_authorize_fileop call.
8916 */
4b17d6b6
A
8917#if CONFIG_MACF
8918 mac_vnode_notify_open(ctx, vp, fmode);
8919#endif
6d2010ae
A
8920 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN,
8921 (uintptr_t)vp, 0);
1c79356b 8922
1c79356b 8923
6d2010ae
A
8924 return 0;
8925}
1c79356b 8926
6d2010ae 8927static int
39236c6e 8928wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type)
6d2010ae
A
8929{
8930 int i, error=0, unblock=0;
8931 task_t curtask;
8932
8933 lck_mtx_lock(&nspace_handler_exclusion_lock);
8934 if (nspace_handlers[nspace_type].handler_busy) {
8935 lck_mtx_unlock(&nspace_handler_exclusion_lock);
8936 return EBUSY;
8937 }
8938 nspace_handlers[nspace_type].handler_busy = 1;
8939 lck_mtx_unlock(&nspace_handler_exclusion_lock);
8940
8941 /*
8942 * Any process that gets here will be one of the namespace handlers.
8943 * As such, they should be prevented from acquiring DMG vnodes during vnode reclamation
8944 * as we can cause deadlocks to occur, because the namespace handler may prevent
8945 * VNOP_INACTIVE from proceeding. Mark the current task as a P_DEPENDENCY_CAPABLE
8946 * process.
8947 */
8948 curtask = current_task();
8949 bsd_set_dependency_capable (curtask);
8950
8951 lck_mtx_lock(&nspace_handler_lock);
8952 if (nspace_handlers[nspace_type].handler_proc == NULL) {
8953 nspace_handlers[nspace_type].handler_tid = thread_tid(current_thread());
8954 nspace_handlers[nspace_type].handler_proc = current_proc();
8955 }
8956
8957 while (error == 0) {
8958
8959 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8960 if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
8961 if (!nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
8962 continue;
8963 }
8964 break;
8965 }
8966 }
8967
8968 if (i < MAX_NSPACE_ITEMS) {
8969 nspace_items[i].flags &= ~NSPACE_ITEM_NEW;
8970 nspace_items[i].flags |= NSPACE_ITEM_PROCESSING;
8971 nspace_items[i].token = ++nspace_token_id;
8972
8973 if (nspace_items[i].vp) {
8974 struct fileproc *fp;
8975 int32_t indx, fmode;
8976 struct proc *p = current_proc();
8977 vfs_context_t ctx = vfs_context_current();
39236c6e
A
8978 struct vnode_attr va;
8979
8980
8981 /*
8982 * Use vnode pointer to acquire a file descriptor for
8983 * hand-off to userland
8984 */
6d2010ae 8985 fmode = nspace_open_flags_for_type(nspace_type);
6d2010ae
A
8986 error = vnode_getwithvid(nspace_items[i].vp, nspace_items[i].vid);
8987 if (error) {
8988 unblock = 1;
8989 break;
8990 }
8991 error = vn_open_with_vp(nspace_items[i].vp, fmode, ctx);
8992 if (error) {
8993 unblock = 1;
8994 vnode_put(nspace_items[i].vp);
8995 break;
8996 }
8997
8998 if ((error = falloc(p, &fp, &indx, ctx))) {
8999 vn_close(nspace_items[i].vp, fmode, ctx);
9000 vnode_put(nspace_items[i].vp);
9001 unblock = 1;
9002 break;
9003 }
9004
9005 fp->f_fglob->fg_flag = fmode;
6d2010ae
A
9006 fp->f_fglob->fg_ops = &vnops;
9007 fp->f_fglob->fg_data = (caddr_t)nspace_items[i].vp;
9008
9009 proc_fdlock(p);
9010 procfdtbl_releasefd(p, indx, NULL);
9011 fp_drop(p, indx, fp, 1);
39236c6e
A
9012 proc_fdunlock(p);
9013
9014 /*
9015 * All variants of the namespace handler struct support these three fields:
9016 * token, flags, and the FD pointer
9017 */
9018 error = copyout(&nspace_items[i].token, nhd->token, sizeof(uint32_t));
9019 error = copyout(&nspace_items[i].op, nhd->flags, sizeof(uint64_t));
9020 error = copyout(&indx, nhd->fdptr, sizeof(uint32_t));
9021
9022 /*
9023 * Handle optional fields:
9024 * extended version support an info ptr (offset, length), and the
9025 *
9026 * namedata version supports a unique per-link object ID
9027 *
9028 */
9029 if (nhd->infoptr) {
6d2010ae
A
9030 uio_t uio = (uio_t)nspace_items[i].arg;
9031 uint64_t u_offset, u_length;
9032
9033 if (uio) {
9034 u_offset = uio_offset(uio);
9035 u_length = uio_resid(uio);
9036 } else {
9037 u_offset = 0;
9038 u_length = 0;
9039 }
39236c6e
A
9040 error = copyout(&u_offset, nhd->infoptr, sizeof(uint64_t));
9041 error = copyout(&u_length, nhd->infoptr+sizeof(uint64_t), sizeof(uint64_t));
6d2010ae 9042 }
39236c6e
A
9043
9044 if (nhd->objid) {
9045 VATTR_INIT(&va);
9046 VATTR_WANTED(&va, va_linkid);
9047 error = vnode_getattr(nspace_items[i].vp, &va, ctx);
9048 if (error == 0 ) {
9049 uint64_t linkid = 0;
9050 if (VATTR_IS_SUPPORTED (&va, va_linkid)) {
9051 linkid = (uint64_t)va.va_linkid;
9052 }
9053 error = copyout (&linkid, nhd->objid, sizeof(uint64_t));
9054 }
9055 }
9056
6d2010ae
A
9057 if (error) {
9058 vn_close(nspace_items[i].vp, fmode, ctx);
9059 fp_free(p, indx, fp);
9060 unblock = 1;
9061 }
9062
9063 vnode_put(nspace_items[i].vp);
9064
9065 break;
9066 } else {
9067 printf("wait_for_nspace_event: failed (nspace_items[%d] == %p error %d, name %s)\n",
9068 i, nspace_items[i].vp, error, nspace_items[i].vp->v_name);
9069 }
9070
9071 } else {
9072 error = msleep((caddr_t)&nspace_item_idx, &nspace_handler_lock, PVFS|PCATCH, "namespace-items", 0);
9073 if ((nspace_type == NSPACE_HANDLER_SNAPSHOT) && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
9074 error = EINVAL;
9075 break;
9076 }
9077
9078 }
9079 }
9080
9081 if (unblock) {
9082 if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
9083 vnode_lock_spin(nspace_items[i].vp);
9084 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
9085 vnode_unlock(nspace_items[i].vp);
9086 }
9087 nspace_items[i].vp = NULL;
9088 nspace_items[i].vid = 0;
9089 nspace_items[i].flags = NSPACE_ITEM_DONE;
9090 nspace_items[i].token = 0;
9091
9092 wakeup((caddr_t)&(nspace_items[i].vp));
9093 }
9094
9095 if (nspace_type == NSPACE_HANDLER_SNAPSHOT) {
9096 // just go through every snapshot event and unblock it immediately.
9097 if (error && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
9098 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9099 if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
9100 if (nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
9101 nspace_items[i].vp = NULL;
9102 nspace_items[i].vid = 0;
9103 nspace_items[i].flags = NSPACE_ITEM_DONE;
9104 nspace_items[i].token = 0;
9105
9106 wakeup((caddr_t)&(nspace_items[i].vp));
9107 }
9108 }
9109 }
9110 }
9111 }
9112
9113 lck_mtx_unlock(&nspace_handler_lock);
9114
9115 lck_mtx_lock(&nspace_handler_exclusion_lock);
9116 nspace_handlers[nspace_type].handler_busy = 0;
9117 lck_mtx_unlock(&nspace_handler_exclusion_lock);
9118
9119 return error;
9120}
1c79356b 9121
39236c6e
A
9122static inline int validate_namespace_args (int is64bit, int size) {
9123
9124 if (is64bit) {
9125 /* Must be one of these */
9126 if (size == sizeof(user64_namespace_handler_info)) {
9127 goto sizeok;
9128 }
9129 if (size == sizeof(user64_namespace_handler_info_ext)) {
9130 goto sizeok;
9131 }
9132 if (size == sizeof(user64_namespace_handler_data)) {
9133 goto sizeok;
9134 }
9135 return EINVAL;
9136 }
9137 else {
9138 /* 32 bit -- must be one of these */
9139 if (size == sizeof(user32_namespace_handler_info)) {
9140 goto sizeok;
9141 }
9142 if (size == sizeof(user32_namespace_handler_info_ext)) {
9143 goto sizeok;
9144 }
9145 if (size == sizeof(user32_namespace_handler_data)) {
9146 goto sizeok;
9147 }
9148 return EINVAL;
9149 }
9150
9151sizeok:
9152
9153 return 0;
9154
9155}
1c79356b 9156
6d2010ae
A
9157static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data)
9158{
9159 int error = 0;
39236c6e 9160 namespace_handler_data nhd;
6d2010ae 9161
39236c6e
A
9162 bzero (&nhd, sizeof(namespace_handler_data));
9163
9164 if (nspace_type == NSPACE_HANDLER_SNAPSHOT &&
9165 (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
6d2010ae
A
9166 return EINVAL;
9167 }
9168
9169 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
9170 return error;
9171 }
9172
39236c6e
A
9173 error = validate_namespace_args (is64bit, size);
9174 if (error) {
9175 return error;
6d2010ae
A
9176 }
9177
39236c6e
A
9178 /* Copy in the userland pointers into our kernel-only struct */
9179
6d2010ae 9180 if (is64bit) {
39236c6e
A
9181 /* 64 bit userland structures */
9182 nhd.token = (user_addr_t)((user64_namespace_handler_info *)data)->token;
9183 nhd.flags = (user_addr_t)((user64_namespace_handler_info *)data)->flags;
9184 nhd.fdptr = (user_addr_t)((user64_namespace_handler_info *)data)->fdptr;
9185
9186 /* If the size is greater than the standard info struct, add in extra fields */
9187 if (size > (sizeof(user64_namespace_handler_info))) {
9188 if (size >= (sizeof(user64_namespace_handler_info_ext))) {
9189 nhd.infoptr = (user_addr_t)((user64_namespace_handler_info_ext *)data)->infoptr;
9190 }
9191 if (size == (sizeof(user64_namespace_handler_data))) {
9192 nhd.objid = (user_addr_t)((user64_namespace_handler_data*)data)->objid;
9193 }
9194 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
6d2010ae 9195 }
39236c6e
A
9196 }
9197 else {
9198 /* 32 bit userland structures */
9199 nhd.token = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->token);
9200 nhd.flags = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->flags);
9201 nhd.fdptr = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->fdptr);
9202
9203 if (size > (sizeof(user32_namespace_handler_info))) {
9204 if (size >= (sizeof(user32_namespace_handler_info_ext))) {
9205 nhd.infoptr = CAST_USER_ADDR_T(((user32_namespace_handler_info_ext *)data)->infoptr);
9206 }
9207 if (size == (sizeof(user32_namespace_handler_data))) {
9208 nhd.objid = (user_addr_t)((user32_namespace_handler_data*)data)->objid;
9209 }
9210 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
6d2010ae
A
9211 }
9212 }
9213
39236c6e 9214 return wait_for_namespace_event(&nhd, nspace_type);
6d2010ae 9215}
1c79356b
A
9216
9217/*
9218 * Make a filesystem-specific control call:
9219 */
1c79356b 9220/* ARGSUSED */
b0d623f7
A
9221static int
9222fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long options, vfs_context_t ctx)
1c79356b 9223{
b0d623f7 9224 int error=0;
91447636 9225 boolean_t is64bit;
2d21ac55 9226 u_int size;
1c79356b
A
9227#define STK_PARAMS 128
9228 char stkbuf[STK_PARAMS];
9229 caddr_t data, memp;
b0d623f7 9230 vnode_t vp = *arg_vp;
1c79356b
A
9231
9232 size = IOCPARM_LEN(cmd);
9233 if (size > IOCPARM_MAX) return (EINVAL);
9234
6d2010ae 9235 is64bit = proc_is64bit(p);
91447636 9236
1c79356b 9237 memp = NULL;
04b8595b
A
9238
9239 /*
9240 * ensure the buffer is large enough for underlying calls
9241 */
9242#ifndef HFSIOC_GETPATH
9243typedef char pn_t[MAXPATHLEN];
9244#define HFSIOC_GETPATH _IOWR('h', 13, pn_t)
9245#endif
9246
9247#ifndef HFS_GETPATH
9248#define HFS_GETPATH IOCBASECMD(HFSIOC_GETPATH)
9249#endif
9250 if (IOCBASECMD(cmd) == HFS_GETPATH) {
9251 /* Round up to MAXPATHLEN regardless of user input */
9252 size = MAXPATHLEN;
9253 }
9254
9255
1c79356b
A
9256 if (size > sizeof (stkbuf)) {
9257 if ((memp = (caddr_t)kalloc(size)) == 0) return ENOMEM;
9258 data = memp;
9259 } else {
91447636 9260 data = &stkbuf[0];
1c79356b
A
9261 };
9262
9263 if (cmd & IOC_IN) {
9264 if (size) {
b0d623f7 9265 error = copyin(udata, data, size);
fe8ab488
A
9266 if (error) {
9267 if (memp) {
9268 kfree (memp, size);
9269 }
9270 return error;
9271 }
1c79356b 9272 } else {
6d2010ae
A
9273 if (is64bit) {
9274 *(user_addr_t *)data = udata;
9275 }
9276 else {
9277 *(uint32_t *)data = (uint32_t)udata;
9278 }
1c79356b
A
9279 };
9280 } else if ((cmd & IOC_OUT) && size) {
9281 /*
9282 * Zero the buffer so the user always
9283 * gets back something deterministic.
9284 */
9285 bzero(data, size);
91447636 9286 } else if (cmd & IOC_VOID) {
b0d623f7 9287 if (is64bit) {
6d2010ae 9288 *(user_addr_t *)data = udata;
b0d623f7
A
9289 }
9290 else {
6d2010ae 9291 *(uint32_t *)data = (uint32_t)udata;
b0d623f7 9292 }
91447636 9293 }
1c79356b 9294
b0d623f7 9295 /* Check to see if it's a generic command */
fe8ab488 9296 switch (IOCBASECMD(cmd)) {
91447636 9297
fe8ab488
A
9298 case FSCTL_SYNC_VOLUME: {
9299 mount_t mp = vp->v_mount;
9300 int arg = *(uint32_t*)data;
b0d623f7 9301
fe8ab488
A
9302 /* record vid of vp so we can drop it below. */
9303 uint32_t vvid = vp->v_id;
b0d623f7 9304
fe8ab488
A
9305 /*
9306 * Then grab mount_iterref so that we can release the vnode.
9307 * Without this, a thread may call vnode_iterate_prepare then
9308 * get into a deadlock because we've never released the root vp
9309 */
9310 error = mount_iterref (mp, 0);
9311 if (error) {
9312 break;
9313 }
9314 vnode_put(vp);
9315
9316 /* issue the sync for this volume */
9317 (void)sync_callback(mp, (arg & FSCTL_SYNC_WAIT) ? &arg : NULL);
9318
9319 /*
9320 * Then release the mount_iterref once we're done syncing; it's not
9321 * needed for the VNOP_IOCTL below
9322 */
9323 mount_iterdrop(mp);
9324
9325 if (arg & FSCTL_SYNC_FULLSYNC) {
9326 /* re-obtain vnode iocount on the root vp, if possible */
9327 error = vnode_getwithvid (vp, vvid);
9328 if (error == 0) {
9329 error = VNOP_IOCTL(vp, F_FULLFSYNC, (caddr_t)NULL, 0, ctx);
9330 vnode_put (vp);
9331 }
b0d623f7 9332 }
fe8ab488
A
9333 /* mark the argument VP as having been released */
9334 *arg_vp = NULL;
b0d623f7 9335 }
fe8ab488 9336 break;
b0d623f7 9337
fe8ab488
A
9338 case FSCTL_SET_PACKAGE_EXTS: {
9339 user_addr_t ext_strings;
9340 uint32_t num_entries;
9341 uint32_t max_width;
b0d623f7 9342
fe8ab488
A
9343 if ( (is64bit && size != sizeof(user64_package_ext_info))
9344 || (is64bit == 0 && size != sizeof(user32_package_ext_info))) {
9345
9346 // either you're 64-bit and passed a 64-bit struct or
9347 // you're 32-bit and passed a 32-bit struct. otherwise
9348 // it's not ok.
9349 error = EINVAL;
9350 break;
9351 }
9352
9353 if (is64bit) {
9354 ext_strings = ((user64_package_ext_info *)data)->strings;
9355 num_entries = ((user64_package_ext_info *)data)->num_entries;
9356 max_width = ((user64_package_ext_info *)data)->max_width;
9357 } else {
9358 ext_strings = CAST_USER_ADDR_T(((user32_package_ext_info *)data)->strings);
9359 num_entries = ((user32_package_ext_info *)data)->num_entries;
9360 max_width = ((user32_package_ext_info *)data)->max_width;
9361 }
9362 error = set_package_extensions_table(ext_strings, num_entries, max_width);
6d2010ae 9363 }
fe8ab488 9364 break;
2d21ac55 9365
fe8ab488
A
9366 /* namespace handlers */
9367 case FSCTL_NAMESPACE_HANDLER_GET: {
9368 error = process_namespace_fsctl(NSPACE_HANDLER_NSPACE, is64bit, size, data);
6d2010ae 9369 }
fe8ab488 9370 break;
b0d623f7 9371
fe8ab488
A
9372 /* Snapshot handlers */
9373 case FSCTL_OLD_SNAPSHOT_HANDLER_GET: {
9374 error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
9375 }
9376 break;
39236c6e 9377
fe8ab488
A
9378 case FSCTL_SNAPSHOT_HANDLER_GET_EXT: {
9379 error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
9380 }
9381 break;
39236c6e 9382
fe8ab488
A
9383 case FSCTL_NAMESPACE_HANDLER_UPDATE: {
9384 uint32_t token, val;
9385 int i;
39236c6e 9386
fe8ab488
A
9387 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
9388 break;
9389 }
39236c6e 9390
fe8ab488
A
9391 if (!nspace_is_special_process(p)) {
9392 error = EINVAL;
9393 break;
9394 }
6d2010ae 9395
fe8ab488
A
9396 token = ((uint32_t *)data)[0];
9397 val = ((uint32_t *)data)[1];
6d2010ae 9398
fe8ab488 9399 lck_mtx_lock(&nspace_handler_lock);
6d2010ae 9400
fe8ab488
A
9401 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9402 if (nspace_items[i].token == token) {
9403 break; /* exit for loop, not case stmt */
9404 }
9405 }
6d2010ae 9406
fe8ab488
A
9407 if (i >= MAX_NSPACE_ITEMS) {
9408 error = ENOENT;
9409 } else {
9410 //
9411 // if this bit is set, when resolve_nspace_item() times out
9412 // it will loop and go back to sleep.
9413 //
9414 nspace_items[i].flags |= NSPACE_ITEM_RESET_TIMER;
9415 }
6d2010ae 9416
fe8ab488
A
9417 lck_mtx_unlock(&nspace_handler_lock);
9418
9419 if (error) {
9420 printf("nspace-handler-update: did not find token %u\n", token);
9421 }
9422 }
9423 break;
9424
9425 case FSCTL_NAMESPACE_HANDLER_UNBLOCK: {
9426 uint32_t token, val;
9427 int i;
9428
9429 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
6d2010ae
A
9430 break;
9431 }
6d2010ae 9432
fe8ab488
A
9433 if (!nspace_is_special_process(p)) {
9434 error = EINVAL;
9435 break;
9436 }
6d2010ae 9437
fe8ab488
A
9438 token = ((uint32_t *)data)[0];
9439 val = ((uint32_t *)data)[1];
6d2010ae 9440
fe8ab488 9441 lck_mtx_lock(&nspace_handler_lock);
6d2010ae 9442
fe8ab488
A
9443 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9444 if (nspace_items[i].token == token) {
9445 break; /* exit for loop, not case statement */
9446 }
9447 }
6d2010ae 9448
fe8ab488
A
9449 if (i >= MAX_NSPACE_ITEMS) {
9450 printf("nspace-handler-unblock: did not find token %u\n", token);
9451 error = ENOENT;
9452 } else {
9453 if (val == 0 && nspace_items[i].vp) {
9454 vnode_lock_spin(nspace_items[i].vp);
9455 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
9456 vnode_unlock(nspace_items[i].vp);
9457 }
6d2010ae 9458
fe8ab488
A
9459 nspace_items[i].vp = NULL;
9460 nspace_items[i].arg = NULL;
9461 nspace_items[i].op = 0;
9462 nspace_items[i].vid = 0;
9463 nspace_items[i].flags = NSPACE_ITEM_DONE;
9464 nspace_items[i].token = 0;
6d2010ae 9465
fe8ab488
A
9466 wakeup((caddr_t)&(nspace_items[i].vp));
9467 }
9468
9469 lck_mtx_unlock(&nspace_handler_lock);
9470 }
9471 break;
6d2010ae 9472
fe8ab488
A
9473 case FSCTL_NAMESPACE_HANDLER_CANCEL: {
9474 uint32_t token, val;
9475 int i;
6d2010ae 9476
fe8ab488 9477 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
6d2010ae
A
9478 break;
9479 }
6d2010ae 9480
fe8ab488
A
9481 if (!nspace_is_special_process(p)) {
9482 error = EINVAL;
9483 break;
6d2010ae
A
9484 }
9485
fe8ab488
A
9486 token = ((uint32_t *)data)[0];
9487 val = ((uint32_t *)data)[1];
6d2010ae 9488
fe8ab488 9489 lck_mtx_lock(&nspace_handler_lock);
6d2010ae 9490
fe8ab488
A
9491 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9492 if (nspace_items[i].token == token) {
9493 break; /* exit for loop, not case stmt */
9494 }
9495 }
6d2010ae 9496
fe8ab488
A
9497 if (i >= MAX_NSPACE_ITEMS) {
9498 printf("nspace-handler-cancel: did not find token %u\n", token);
9499 error = ENOENT;
9500 } else {
9501 if (nspace_items[i].vp) {
9502 vnode_lock_spin(nspace_items[i].vp);
9503 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
9504 vnode_unlock(nspace_items[i].vp);
9505 }
6d2010ae 9506
fe8ab488
A
9507 nspace_items[i].vp = NULL;
9508 nspace_items[i].arg = NULL;
9509 nspace_items[i].vid = 0;
9510 nspace_items[i].token = val;
9511 nspace_items[i].flags &= ~NSPACE_ITEM_PROCESSING;
9512 nspace_items[i].flags |= NSPACE_ITEM_CANCELLED;
6d2010ae 9513
fe8ab488
A
9514 wakeup((caddr_t)&(nspace_items[i].vp));
9515 }
6d2010ae 9516
fe8ab488
A
9517 lck_mtx_unlock(&nspace_handler_lock);
9518 }
9519 break;
6d2010ae 9520
fe8ab488
A
9521 case FSCTL_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME: {
9522 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
6d2010ae
A
9523 break;
9524 }
6d2010ae 9525
fe8ab488 9526 // we explicitly do not do the namespace_handler_proc check here
6d2010ae 9527
fe8ab488
A
9528 lck_mtx_lock(&nspace_handler_lock);
9529 snapshot_timestamp = ((uint32_t *)data)[0];
9530 wakeup(&nspace_item_idx);
9531 lck_mtx_unlock(&nspace_handler_lock);
9532 printf("nspace-handler-set-snapshot-time: %d\n", (int)snapshot_timestamp);
6d2010ae 9533
fe8ab488
A
9534 }
9535 break;
6d2010ae 9536
fe8ab488
A
9537 case FSCTL_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS:
9538 {
9539 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
9540 break;
9541 }
6d2010ae 9542
fe8ab488
A
9543 lck_mtx_lock(&nspace_handler_lock);
9544 nspace_allow_virtual_devs = ((uint32_t *)data)[0];
9545 lck_mtx_unlock(&nspace_handler_lock);
9546 printf("nspace-snapshot-handler will%s allow events on disk-images\n",
9547 nspace_allow_virtual_devs ? "" : " NOT");
9548 error = 0;
6d2010ae 9549
6d2010ae 9550 }
fe8ab488 9551 break;
6d2010ae 9552
fe8ab488
A
9553 case FSCTL_SET_FSTYPENAME_OVERRIDE:
9554 {
9555 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
9556 break;
9557 }
9558 if (vp->v_mount) {
9559 mount_lock(vp->v_mount);
9560 if (data[0] != 0) {
9561 strlcpy(&vp->v_mount->fstypename_override[0], data, MFSTYPENAMELEN);
9562 vp->v_mount->mnt_kern_flag |= MNTK_TYPENAME_OVERRIDE;
9563 if (vfs_isrdonly(vp->v_mount) && strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
9564 vp->v_mount->mnt_kern_flag |= MNTK_EXTENDED_SECURITY;
9565 vp->v_mount->mnt_kern_flag &= ~MNTK_AUTH_OPAQUE;
9566 }
9567 } else {
9568 if (strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
9569 vp->v_mount->mnt_kern_flag &= ~MNTK_EXTENDED_SECURITY;
9570 }
9571 vp->v_mount->mnt_kern_flag &= ~MNTK_TYPENAME_OVERRIDE;
9572 vp->v_mount->fstypename_override[0] = '\0';
6d2010ae 9573 }
fe8ab488 9574 mount_unlock(vp->v_mount);
6d2010ae 9575 }
6d2010ae 9576 }
fe8ab488
A
9577 break;
9578
9579 default: {
9580 /* Invoke the filesystem-specific code */
9581 error = VNOP_IOCTL(vp, IOCBASECMD(cmd), data, options, ctx);
9582 }
9583
9584 } /* end switch stmt */
9585
1c79356b 9586 /*
fe8ab488 9587 * if no errors, copy any data to user. Size was
1c79356b
A
9588 * already set and checked above.
9589 */
91447636 9590 if (error == 0 && (cmd & IOC_OUT) && size)
b0d623f7 9591 error = copyout(data, udata, size);
1c79356b 9592
fe8ab488
A
9593 if (memp) {
9594 kfree(memp, size);
9595 }
1c79356b
A
9596
9597 return error;
9598}
b0d623f7
A
9599
9600/* ARGSUSED */
9601int
9602fsctl (proc_t p, struct fsctl_args *uap, __unused int32_t *retval)
9603{
9604 int error;
9605 struct nameidata nd;
9606 u_long nameiflags;
9607 vnode_t vp = NULL;
9608 vfs_context_t ctx = vfs_context_current();
9609
9610 AUDIT_ARG(cmd, uap->cmd);
9611 AUDIT_ARG(value32, uap->options);
9612 /* Get the vnode for the file we are getting info on: */
9613 nameiflags = 0;
9614 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
6d2010ae
A
9615 NDINIT(&nd, LOOKUP, OP_FSCTL, nameiflags | AUDITVNPATH1,
9616 UIO_USERSPACE, uap->path, ctx);
b0d623f7
A
9617 if ((error = namei(&nd))) goto done;
9618 vp = nd.ni_vp;
9619 nameidone(&nd);
9620
9621#if CONFIG_MACF
9622 error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd);
9623 if (error) {
9624 goto done;
9625 }
9626#endif
9627
9628 error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
9629
9630done:
9631 if (vp)
9632 vnode_put(vp);
9633 return error;
9634}
9635/* ARGSUSED */
9636int
9637ffsctl (proc_t p, struct ffsctl_args *uap, __unused int32_t *retval)
9638{
9639 int error;
9640 vnode_t vp = NULL;
9641 vfs_context_t ctx = vfs_context_current();
9642 int fd = -1;
9643
9644 AUDIT_ARG(fd, uap->fd);
9645 AUDIT_ARG(cmd, uap->cmd);
9646 AUDIT_ARG(value32, uap->options);
9647
9648 /* Get the vnode for the file we are getting info on: */
9649 if ((error = file_vnode(uap->fd, &vp)))
9650 goto done;
9651 fd = uap->fd;
9652 if ((error = vnode_getwithref(vp))) {
9653 goto done;
9654 }
9655
9656#if CONFIG_MACF
9657 error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd);
9658 if (error) {
9659 goto done;
9660 }
9661#endif
9662
9663 error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
9664
9665done:
9666 if (fd != -1)
9667 file_drop(fd);
9668
9669 if (vp)
9670 vnode_put(vp);
9671 return error;
9672}
1c79356b 9673/* end of fsctl system call */
0b4e3aa0 9674
91447636
A
9675/*
9676 * Retrieve the data of an extended attribute.
9677 */
9678int
2d21ac55 9679getxattr(proc_t p, struct getxattr_args *uap, user_ssize_t *retval)
91447636 9680{
2d21ac55 9681 vnode_t vp;
91447636
A
9682 struct nameidata nd;
9683 char attrname[XATTR_MAXNAMELEN+1];
2d21ac55 9684 vfs_context_t ctx = vfs_context_current();
91447636
A
9685 uio_t auio = NULL;
9686 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9687 size_t attrsize = 0;
9688 size_t namelen;
b0d623f7 9689 u_int32_t nameiflags;
91447636
A
9690 int error;
9691 char uio_buf[ UIO_SIZEOF(1) ];
55e303ae 9692
2d21ac55 9693 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636 9694 return (EINVAL);
55e303ae 9695
91447636 9696 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
6d2010ae 9697 NDINIT(&nd, LOOKUP, OP_GETXATTR, nameiflags, spacetype, uap->path, ctx);
91447636
A
9698 if ((error = namei(&nd))) {
9699 return (error);
9700 }
9701 vp = nd.ni_vp;
9702 nameidone(&nd);
55e303ae 9703
91447636
A
9704 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
9705 goto out;
9706 }
9707 if (xattr_protected(attrname)) {
6d2010ae
A
9708 if (!vfs_context_issuser(ctx) || strcmp(attrname, "com.apple.system.Security") != 0) {
9709 error = EPERM;
9710 goto out;
9711 }
91447636 9712 }
b0d623f7
A
9713 /*
9714 * the specific check for 0xffffffff is a hack to preserve
9715 * binaray compatibilty in K64 with applications that discovered
9716 * that passing in a buf pointer and a size of -1 resulted in
9717 * just the size of the indicated extended attribute being returned.
9718 * this isn't part of the documented behavior, but because of the
9719 * original implemtation's check for "uap->size > 0", this behavior
9720 * was allowed. In K32 that check turned into a signed comparison
9721 * even though uap->size is unsigned... in K64, we blow by that
9722 * check because uap->size is unsigned and doesn't get sign smeared
9723 * in the munger for a 32 bit user app. we also need to add a
9724 * check to limit the maximum size of the buffer being passed in...
9725 * unfortunately, the underlying fileystems seem to just malloc
9726 * the requested size even if the actual extended attribute is tiny.
9727 * because that malloc is for kernel wired memory, we have to put a
9728 * sane limit on it.
9729 *
9730 * U32 running on K64 will yield 0x00000000ffffffff for uap->size
9731 * U64 running on K64 will yield -1 (64 bits wide)
9732 * U32/U64 running on K32 will yield -1 (32 bits wide)
9733 */
9734 if (uap->size == 0xffffffff || uap->size == (size_t)-1)
9735 goto no_uio;
9736
b0d623f7 9737 if (uap->value) {
6d2010ae
A
9738 if (uap->size > (size_t)XATTR_MAXSIZE)
9739 uap->size = XATTR_MAXSIZE;
9740
91447636
A
9741 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
9742 &uio_buf[0], sizeof(uio_buf));
9743 uio_addiov(auio, uap->value, uap->size);
9744 }
b0d623f7 9745no_uio:
2d21ac55 9746 error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, ctx);
91447636
A
9747out:
9748 vnode_put(vp);
55e303ae 9749
91447636
A
9750 if (auio) {
9751 *retval = uap->size - uio_resid(auio);
9752 } else {
9753 *retval = (user_ssize_t)attrsize;
55e303ae
A
9754 }
9755
91447636
A
9756 return (error);
9757}
55e303ae 9758
91447636
A
9759/*
9760 * Retrieve the data of an extended attribute.
9761 */
9762int
2d21ac55 9763fgetxattr(proc_t p, struct fgetxattr_args *uap, user_ssize_t *retval)
91447636 9764{
2d21ac55 9765 vnode_t vp;
91447636 9766 char attrname[XATTR_MAXNAMELEN+1];
91447636
A
9767 uio_t auio = NULL;
9768 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9769 size_t attrsize = 0;
9770 size_t namelen;
9771 int error;
9772 char uio_buf[ UIO_SIZEOF(1) ];
55e303ae 9773
2d21ac55 9774 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636 9775 return (EINVAL);
55e303ae 9776
91447636
A
9777 if ( (error = file_vnode(uap->fd, &vp)) ) {
9778 return (error);
9779 }
9780 if ( (error = vnode_getwithref(vp)) ) {
9781 file_drop(uap->fd);
9782 return(error);
9783 }
9784 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
9785 goto out;
9786 }
9787 if (xattr_protected(attrname)) {
9788 error = EPERM;
9789 goto out;
9790 }
9791 if (uap->value && uap->size > 0) {
9792 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
9793 &uio_buf[0], sizeof(uio_buf));
9794 uio_addiov(auio, uap->value, uap->size);
9795 }
55e303ae 9796
2d21ac55 9797 error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, vfs_context_current());
91447636
A
9798out:
9799 (void)vnode_put(vp);
9800 file_drop(uap->fd);
55e303ae 9801
91447636
A
9802 if (auio) {
9803 *retval = uap->size - uio_resid(auio);
9804 } else {
9805 *retval = (user_ssize_t)attrsize;
9806 }
9807 return (error);
9808}
55e303ae 9809
91447636
A
9810/*
9811 * Set the data of an extended attribute.
9812 */
55e303ae 9813int
2d21ac55 9814setxattr(proc_t p, struct setxattr_args *uap, int *retval)
55e303ae 9815{
2d21ac55 9816 vnode_t vp;
91447636
A
9817 struct nameidata nd;
9818 char attrname[XATTR_MAXNAMELEN+1];
2d21ac55 9819 vfs_context_t ctx = vfs_context_current();
91447636
A
9820 uio_t auio = NULL;
9821 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9822 size_t namelen;
b0d623f7 9823 u_int32_t nameiflags;
91447636
A
9824 int error;
9825 char uio_buf[ UIO_SIZEOF(1) ];
55e303ae 9826
2d21ac55 9827 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636 9828 return (EINVAL);
55e303ae 9829
91447636 9830 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
6d2010ae
A
9831 if (error == EPERM) {
9832 /* if the string won't fit in attrname, copyinstr emits EPERM */
9833 return (ENAMETOOLONG);
9834 }
9835 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
9836 return error;
91447636
A
9837 }
9838 if (xattr_protected(attrname))
9839 return(EPERM);
2d21ac55 9840 if (uap->size != 0 && uap->value == 0) {
91447636 9841 return (EINVAL);
55e303ae 9842 }
55e303ae 9843
91447636 9844 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
6d2010ae 9845 NDINIT(&nd, LOOKUP, OP_SETXATTR, nameiflags, spacetype, uap->path, ctx);
91447636
A
9846 if ((error = namei(&nd))) {
9847 return (error);
9848 }
9849 vp = nd.ni_vp;
9850 nameidone(&nd);
55e303ae 9851
91447636
A
9852 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
9853 &uio_buf[0], sizeof(uio_buf));
9854 uio_addiov(auio, uap->value, uap->size);
55e303ae 9855
2d21ac55
A
9856 error = vn_setxattr(vp, attrname, auio, uap->options, ctx);
9857#if CONFIG_FSE
9858 if (error == 0) {
9859 add_fsevent(FSE_XATTR_MODIFIED, ctx,
9860 FSE_ARG_VNODE, vp,
9861 FSE_ARG_DONE);
9862 }
9863#endif
91447636
A
9864 vnode_put(vp);
9865 *retval = 0;
9866 return (error);
9867}
55e303ae 9868
91447636
A
9869/*
9870 * Set the data of an extended attribute.
9871 */
9872int
2d21ac55 9873fsetxattr(proc_t p, struct fsetxattr_args *uap, int *retval)
91447636 9874{
2d21ac55 9875 vnode_t vp;
91447636 9876 char attrname[XATTR_MAXNAMELEN+1];
91447636
A
9877 uio_t auio = NULL;
9878 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9879 size_t namelen;
9880 int error;
9881 char uio_buf[ UIO_SIZEOF(1) ];
6d2010ae 9882#if CONFIG_FSE
2d21ac55 9883 vfs_context_t ctx = vfs_context_current();
6d2010ae 9884#endif
55e303ae 9885
2d21ac55 9886 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636 9887 return (EINVAL);
55e303ae 9888
91447636
A
9889 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
9890 return (error);
55e303ae 9891 }
91447636
A
9892 if (xattr_protected(attrname))
9893 return(EPERM);
2d21ac55 9894 if (uap->size != 0 && uap->value == 0) {
91447636 9895 return (EINVAL);
55e303ae 9896 }
91447636
A
9897 if ( (error = file_vnode(uap->fd, &vp)) ) {
9898 return (error);
55e303ae 9899 }
91447636
A
9900 if ( (error = vnode_getwithref(vp)) ) {
9901 file_drop(uap->fd);
9902 return(error);
9903 }
9904 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
9905 &uio_buf[0], sizeof(uio_buf));
9906 uio_addiov(auio, uap->value, uap->size);
91447636 9907
2d21ac55
A
9908 error = vn_setxattr(vp, attrname, auio, uap->options, vfs_context_current());
9909#if CONFIG_FSE
9910 if (error == 0) {
9911 add_fsevent(FSE_XATTR_MODIFIED, ctx,
9912 FSE_ARG_VNODE, vp,
9913 FSE_ARG_DONE);
9914 }
9915#endif
91447636
A
9916 vnode_put(vp);
9917 file_drop(uap->fd);
9918 *retval = 0;
9919 return (error);
9920}
55e303ae 9921
91447636
A
9922/*
9923 * Remove an extended attribute.
b0d623f7 9924 * XXX Code duplication here.
91447636 9925 */
91447636 9926int
2d21ac55 9927removexattr(proc_t p, struct removexattr_args *uap, int *retval)
91447636 9928{
2d21ac55 9929 vnode_t vp;
91447636
A
9930 struct nameidata nd;
9931 char attrname[XATTR_MAXNAMELEN+1];
9932 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
2d21ac55 9933 vfs_context_t ctx = vfs_context_current();
91447636 9934 size_t namelen;
b0d623f7 9935 u_int32_t nameiflags;
91447636 9936 int error;
55e303ae 9937
2d21ac55 9938 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636 9939 return (EINVAL);
55e303ae 9940
91447636
A
9941 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
9942 if (error != 0) {
9943 return (error);
9944 }
9945 if (xattr_protected(attrname))
9946 return(EPERM);
9947 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
6d2010ae 9948 NDINIT(&nd, LOOKUP, OP_REMOVEXATTR, nameiflags, spacetype, uap->path, ctx);
91447636
A
9949 if ((error = namei(&nd))) {
9950 return (error);
9951 }
9952 vp = nd.ni_vp;
9953 nameidone(&nd);
55e303ae 9954
2d21ac55
A
9955 error = vn_removexattr(vp, attrname, uap->options, ctx);
9956#if CONFIG_FSE
9957 if (error == 0) {
9958 add_fsevent(FSE_XATTR_REMOVED, ctx,
9959 FSE_ARG_VNODE, vp,
9960 FSE_ARG_DONE);
9961 }
9962#endif
91447636
A
9963 vnode_put(vp);
9964 *retval = 0;
9965 return (error);
55e303ae
A
9966}
9967
91447636
A
9968/*
9969 * Remove an extended attribute.
b0d623f7 9970 * XXX Code duplication here.
91447636 9971 */
91447636 9972int
2d21ac55 9973fremovexattr(__unused proc_t p, struct fremovexattr_args *uap, int *retval)
55e303ae 9974{
2d21ac55 9975 vnode_t vp;
91447636 9976 char attrname[XATTR_MAXNAMELEN+1];
91447636
A
9977 size_t namelen;
9978 int error;
6d2010ae 9979#if CONFIG_FSE
2d21ac55 9980 vfs_context_t ctx = vfs_context_current();
6d2010ae 9981#endif
55e303ae 9982
2d21ac55 9983 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636
A
9984 return (EINVAL);
9985
9986 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
9987 if (error != 0) {
9988 return (error);
9989 }
9990 if (xattr_protected(attrname))
9991 return(EPERM);
9992 if ( (error = file_vnode(uap->fd, &vp)) ) {
9993 return (error);
9994 }
9995 if ( (error = vnode_getwithref(vp)) ) {
9996 file_drop(uap->fd);
9997 return(error);
9998 }
4a249263 9999
2d21ac55
A
10000 error = vn_removexattr(vp, attrname, uap->options, vfs_context_current());
10001#if CONFIG_FSE
10002 if (error == 0) {
10003 add_fsevent(FSE_XATTR_REMOVED, ctx,
10004 FSE_ARG_VNODE, vp,
10005 FSE_ARG_DONE);
10006 }
10007#endif
91447636
A
10008 vnode_put(vp);
10009 file_drop(uap->fd);
10010 *retval = 0;
10011 return (error);
55e303ae
A
10012}
10013
91447636
A
10014/*
10015 * Retrieve the list of extended attribute names.
b0d623f7 10016 * XXX Code duplication here.
91447636 10017 */
91447636 10018int
2d21ac55 10019listxattr(proc_t p, struct listxattr_args *uap, user_ssize_t *retval)
55e303ae 10020{
2d21ac55 10021 vnode_t vp;
91447636 10022 struct nameidata nd;
2d21ac55 10023 vfs_context_t ctx = vfs_context_current();
91447636
A
10024 uio_t auio = NULL;
10025 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10026 size_t attrsize = 0;
b0d623f7 10027 u_int32_t nameiflags;
91447636
A
10028 int error;
10029 char uio_buf[ UIO_SIZEOF(1) ];
4a249263 10030
2d21ac55 10031 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636 10032 return (EINVAL);
55e303ae 10033
fe8ab488 10034 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
6d2010ae 10035 NDINIT(&nd, LOOKUP, OP_LISTXATTR, nameiflags, spacetype, uap->path, ctx);
91447636
A
10036 if ((error = namei(&nd))) {
10037 return (error);
10038 }
10039 vp = nd.ni_vp;
10040 nameidone(&nd);
10041 if (uap->namebuf != 0 && uap->bufsize > 0) {
6d2010ae
A
10042 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
10043 &uio_buf[0], sizeof(uio_buf));
91447636
A
10044 uio_addiov(auio, uap->namebuf, uap->bufsize);
10045 }
55e303ae 10046
2d21ac55 10047 error = vn_listxattr(vp, auio, &attrsize, uap->options, ctx);
55e303ae 10048
91447636
A
10049 vnode_put(vp);
10050 if (auio) {
10051 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
10052 } else {
10053 *retval = (user_ssize_t)attrsize;
10054 }
10055 return (error);
55e303ae
A
10056}
10057
91447636
A
10058/*
10059 * Retrieve the list of extended attribute names.
b0d623f7 10060 * XXX Code duplication here.
91447636 10061 */
55e303ae 10062int
2d21ac55 10063flistxattr(proc_t p, struct flistxattr_args *uap, user_ssize_t *retval)
55e303ae 10064{
2d21ac55 10065 vnode_t vp;
91447636
A
10066 uio_t auio = NULL;
10067 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10068 size_t attrsize = 0;
10069 int error;
10070 char uio_buf[ UIO_SIZEOF(1) ];
10071
2d21ac55 10072 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636
A
10073 return (EINVAL);
10074
10075 if ( (error = file_vnode(uap->fd, &vp)) ) {
10076 return (error);
10077 }
10078 if ( (error = vnode_getwithref(vp)) ) {
10079 file_drop(uap->fd);
10080 return(error);
10081 }
10082 if (uap->namebuf != 0 && uap->bufsize > 0) {
91447636
A
10083 auio = uio_createwithbuffer(1, 0, spacetype,
10084 UIO_READ, &uio_buf[0], sizeof(uio_buf));
10085 uio_addiov(auio, uap->namebuf, uap->bufsize);
10086 }
91447636 10087
2d21ac55 10088 error = vn_listxattr(vp, auio, &attrsize, uap->options, vfs_context_current());
55e303ae 10089
91447636
A
10090 vnode_put(vp);
10091 file_drop(uap->fd);
10092 if (auio) {
10093 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
10094 } else {
10095 *retval = (user_ssize_t)attrsize;
10096 }
10097 return (error);
55e303ae 10098}
4a249263 10099
fe8ab488
A
10100static int fsgetpath_internal(
10101 vfs_context_t ctx, int volfs_id, uint64_t objid,
10102 vm_size_t bufsize, caddr_t buf, int *pathlen)
b0d623f7 10103{
fe8ab488 10104 int error;
b0d623f7 10105 struct mount *mp = NULL;
fe8ab488 10106 vnode_t vp;
b0d623f7 10107 int length;
fe8ab488 10108 int bpflags;
b0d623f7 10109
fe8ab488 10110 if (bufsize > PAGE_SIZE) {
b0d623f7 10111 return (EINVAL);
fe8ab488
A
10112 }
10113
10114 if (buf == NULL) {
b0d623f7
A
10115 return (ENOMEM);
10116 }
fe8ab488
A
10117
10118 if ((mp = mount_lookupby_volfsid(volfs_id, 1)) == NULL) {
b0d623f7 10119 error = ENOTSUP; /* unexpected failure */
fe8ab488 10120 return ENOTSUP;
b0d623f7 10121 }
fe8ab488 10122
39236c6e 10123unionget:
fe8ab488 10124 if (objid == 2) {
b0d623f7
A
10125 error = VFS_ROOT(mp, &vp, ctx);
10126 } else {
fe8ab488 10127 error = VFS_VGET(mp, (ino64_t)objid, &vp, ctx);
b0d623f7 10128 }
39236c6e
A
10129
10130 if (error == ENOENT && (mp->mnt_flag & MNT_UNION)) {
10131 /*
10132 * If the fileid isn't found and we're in a union
10133 * mount volume, then see if the fileid is in the
10134 * mounted-on volume.
10135 */
10136 struct mount *tmp = mp;
10137 mp = vnode_mount(tmp->mnt_vnodecovered);
10138 vfs_unbusy(tmp);
10139 if (vfs_busy(mp, LK_NOWAIT) == 0)
10140 goto unionget;
fe8ab488 10141 } else {
39236c6e 10142 vfs_unbusy(mp);
fe8ab488 10143 }
39236c6e 10144
b0d623f7 10145 if (error) {
fe8ab488 10146 return error;
b0d623f7 10147 }
fe8ab488 10148
6d2010ae
A
10149#if CONFIG_MACF
10150 error = mac_vnode_check_fsgetpath(ctx, vp);
10151 if (error) {
10152 vnode_put(vp);
fe8ab488 10153 return error;
6d2010ae
A
10154 }
10155#endif
fe8ab488 10156
b0d623f7
A
10157 /* Obtain the absolute path to this vnode. */
10158 bpflags = vfs_context_suser(ctx) ? BUILDPATH_CHECKACCESS : 0;
316670eb 10159 bpflags |= BUILDPATH_CHECK_MOVED;
fe8ab488 10160 error = build_path(vp, buf, bufsize, &length, bpflags, ctx);
b0d623f7 10161 vnode_put(vp);
fe8ab488 10162
b0d623f7
A
10163 if (error) {
10164 goto out;
10165 }
fe8ab488
A
10166
10167 AUDIT_ARG(text, buf);
39236c6e
A
10168
10169 if (kdebug_enable) {
10170 long dbg_parms[NUMPARMS];
10171 int dbg_namelen;
10172
10173 dbg_namelen = (int)sizeof(dbg_parms);
10174
fe8ab488
A
10175 if (length < dbg_namelen) {
10176 memcpy((char *)dbg_parms, buf, length);
39236c6e
A
10177 memset((char *)dbg_parms + length, 0, dbg_namelen - length);
10178
10179 dbg_namelen = length;
fe8ab488
A
10180 } else {
10181 memcpy((char *)dbg_parms, buf + (length - dbg_namelen), dbg_namelen);
10182 }
39236c6e
A
10183
10184 kdebug_lookup_gen_events(dbg_parms, dbg_namelen, (void *)vp, TRUE);
10185 }
fe8ab488
A
10186
10187 *pathlen = (user_ssize_t)length; /* may be superseded by error */
10188
10189out:
10190 return (error);
10191}
10192
10193/*
10194 * Obtain the full pathname of a file system object by id.
10195 *
10196 * This is a private SPI used by the File Manager.
10197 */
10198__private_extern__
10199int
10200fsgetpath(__unused proc_t p, struct fsgetpath_args *uap, user_ssize_t *retval)
10201{
10202 vfs_context_t ctx = vfs_context_current();
10203 fsid_t fsid;
10204 char *realpath;
10205 int length;
10206 int error;
10207
10208 if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
10209 return (error);
10210 }
10211 AUDIT_ARG(value32, fsid.val[0]);
10212 AUDIT_ARG(value64, uap->objid);
10213 /* Restrict output buffer size for now. */
10214
10215 if (uap->bufsize > PAGE_SIZE) {
10216 return (EINVAL);
10217 }
10218 MALLOC(realpath, char *, uap->bufsize, M_TEMP, M_WAITOK);
10219 if (realpath == NULL) {
10220 return (ENOMEM);
10221 }
10222
10223 error = fsgetpath_internal(
10224 ctx, fsid.val[0], uap->objid,
10225 uap->bufsize, realpath, &length);
10226
10227 if (error) {
10228 goto out;
10229 }
10230
b0d623f7
A
10231 error = copyout((caddr_t)realpath, uap->buf, length);
10232
10233 *retval = (user_ssize_t)length; /* may be superseded by error */
10234out:
10235 if (realpath) {
10236 FREE(realpath, M_TEMP);
10237 }
10238 return (error);
10239}
10240
91447636
A
10241/*
10242 * Common routine to handle various flavors of statfs data heading out
10243 * to user space.
2d21ac55
A
10244 *
10245 * Returns: 0 Success
10246 * EFAULT
91447636
A
10247 */
10248static int
10249munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
10250 user_addr_t bufp, int *sizep, boolean_t is_64_bit,
10251 boolean_t partial_copy)
4a249263 10252{
91447636
A
10253 int error;
10254 int my_size, copy_size;
10255
10256 if (is_64_bit) {
b0d623f7 10257 struct user64_statfs sfs;
91447636
A
10258 my_size = copy_size = sizeof(sfs);
10259 bzero(&sfs, my_size);
10260 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
10261 sfs.f_type = mp->mnt_vtable->vfc_typenum;
10262 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
b0d623f7
A
10263 sfs.f_bsize = (user64_long_t)sfsp->f_bsize;
10264 sfs.f_iosize = (user64_long_t)sfsp->f_iosize;
10265 sfs.f_blocks = (user64_long_t)sfsp->f_blocks;
10266 sfs.f_bfree = (user64_long_t)sfsp->f_bfree;
10267 sfs.f_bavail = (user64_long_t)sfsp->f_bavail;
10268 sfs.f_files = (user64_long_t)sfsp->f_files;
10269 sfs.f_ffree = (user64_long_t)sfsp->f_ffree;
91447636
A
10270 sfs.f_fsid = sfsp->f_fsid;
10271 sfs.f_owner = sfsp->f_owner;
6d2010ae 10272 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
fe8ab488 10273 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
6d2010ae
A
10274 } else {
10275 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
10276 }
2d21ac55
A
10277 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
10278 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
91447636
A
10279
10280 if (partial_copy) {
10281 copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
10282 }
10283 error = copyout((caddr_t)&sfs, bufp, copy_size);
10284 }
10285 else {
b0d623f7
A
10286 struct user32_statfs sfs;
10287
91447636
A
10288 my_size = copy_size = sizeof(sfs);
10289 bzero(&sfs, my_size);
10290
10291 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
10292 sfs.f_type = mp->mnt_vtable->vfc_typenum;
10293 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
10294
10295 /*
10296 * It's possible for there to be more than 2^^31 blocks in the filesystem, so we
10297 * have to fudge the numbers here in that case. We inflate the blocksize in order
10298 * to reflect the filesystem size as best we can.
10299 */
b0d623f7 10300 if ((sfsp->f_blocks > INT_MAX)
91447636
A
10301 /* Hack for 4061702 . I think the real fix is for Carbon to
10302 * look for some volume capability and not depend on hidden
10303 * semantics agreed between a FS and carbon.
10304 * f_blocks, f_bfree, and f_bavail set to -1 is the trigger
10305 * for Carbon to set bNoVolumeSizes volume attribute.
10306 * Without this the webdavfs files cannot be copied onto
10307 * disk as they look huge. This change should not affect
10308 * XSAN as they should not setting these to -1..
10309 */
2d21ac55
A
10310 && (sfsp->f_blocks != 0xffffffffffffffffULL)
10311 && (sfsp->f_bfree != 0xffffffffffffffffULL)
10312 && (sfsp->f_bavail != 0xffffffffffffffffULL)) {
91447636
A
10313 int shift;
10314
10315 /*
10316 * Work out how far we have to shift the block count down to make it fit.
10317 * Note that it's possible to have to shift so far that the resulting
10318 * blocksize would be unreportably large. At that point, we will clip
10319 * any values that don't fit.
10320 *
10321 * For safety's sake, we also ensure that f_iosize is never reported as
10322 * being smaller than f_bsize.
10323 */
10324 for (shift = 0; shift < 32; shift++) {
b0d623f7 10325 if ((sfsp->f_blocks >> shift) <= INT_MAX)
91447636 10326 break;
b0d623f7 10327 if ((sfsp->f_bsize << (shift + 1)) > INT_MAX)
91447636
A
10328 break;
10329 }
b0d623f7
A
10330#define __SHIFT_OR_CLIP(x, s) ((((x) >> (s)) > INT_MAX) ? INT_MAX : ((x) >> (s)))
10331 sfs.f_blocks = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_blocks, shift);
10332 sfs.f_bfree = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bfree, shift);
10333 sfs.f_bavail = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bavail, shift);
91447636 10334#undef __SHIFT_OR_CLIP
b0d623f7 10335 sfs.f_bsize = (user32_long_t)(sfsp->f_bsize << shift);
91447636
A
10336 sfs.f_iosize = lmax(sfsp->f_iosize, sfsp->f_bsize);
10337 } else {
10338 /* filesystem is small enough to be reported honestly */
b0d623f7
A
10339 sfs.f_bsize = (user32_long_t)sfsp->f_bsize;
10340 sfs.f_iosize = (user32_long_t)sfsp->f_iosize;
10341 sfs.f_blocks = (user32_long_t)sfsp->f_blocks;
10342 sfs.f_bfree = (user32_long_t)sfsp->f_bfree;
10343 sfs.f_bavail = (user32_long_t)sfsp->f_bavail;
91447636 10344 }
b0d623f7
A
10345 sfs.f_files = (user32_long_t)sfsp->f_files;
10346 sfs.f_ffree = (user32_long_t)sfsp->f_ffree;
91447636
A
10347 sfs.f_fsid = sfsp->f_fsid;
10348 sfs.f_owner = sfsp->f_owner;
6d2010ae 10349 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
fe8ab488 10350 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
6d2010ae
A
10351 } else {
10352 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
10353 }
2d21ac55
A
10354 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
10355 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
91447636
A
10356
10357 if (partial_copy) {
10358 copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
10359 }
10360 error = copyout((caddr_t)&sfs, bufp, copy_size);
10361 }
4a249263 10362
91447636
A
10363 if (sizep != NULL) {
10364 *sizep = my_size;
10365 }
10366 return(error);
10367}
10368
10369/*
10370 * copy stat structure into user_stat structure.
10371 */
b0d623f7 10372void munge_user64_stat(struct stat *sbp, struct user64_stat *usbp)
91447636 10373{
b0d623f7
A
10374 bzero(usbp, sizeof(*usbp));
10375
10376 usbp->st_dev = sbp->st_dev;
10377 usbp->st_ino = sbp->st_ino;
10378 usbp->st_mode = sbp->st_mode;
10379 usbp->st_nlink = sbp->st_nlink;
10380 usbp->st_uid = sbp->st_uid;
10381 usbp->st_gid = sbp->st_gid;
10382 usbp->st_rdev = sbp->st_rdev;
10383#ifndef _POSIX_C_SOURCE
10384 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
10385 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
10386 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
10387 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
10388 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
10389 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
10390#else
10391 usbp->st_atime = sbp->st_atime;
10392 usbp->st_atimensec = sbp->st_atimensec;
10393 usbp->st_mtime = sbp->st_mtime;
10394 usbp->st_mtimensec = sbp->st_mtimensec;
10395 usbp->st_ctime = sbp->st_ctime;
10396 usbp->st_ctimensec = sbp->st_ctimensec;
10397#endif
10398 usbp->st_size = sbp->st_size;
10399 usbp->st_blocks = sbp->st_blocks;
10400 usbp->st_blksize = sbp->st_blksize;
10401 usbp->st_flags = sbp->st_flags;
10402 usbp->st_gen = sbp->st_gen;
10403 usbp->st_lspare = sbp->st_lspare;
10404 usbp->st_qspare[0] = sbp->st_qspare[0];
10405 usbp->st_qspare[1] = sbp->st_qspare[1];
10406}
10407
10408void munge_user32_stat(struct stat *sbp, struct user32_stat *usbp)
10409{
10410 bzero(usbp, sizeof(*usbp));
0c530ab8 10411
91447636
A
10412 usbp->st_dev = sbp->st_dev;
10413 usbp->st_ino = sbp->st_ino;
10414 usbp->st_mode = sbp->st_mode;
10415 usbp->st_nlink = sbp->st_nlink;
10416 usbp->st_uid = sbp->st_uid;
10417 usbp->st_gid = sbp->st_gid;
10418 usbp->st_rdev = sbp->st_rdev;
2d21ac55
A
10419#ifndef _POSIX_C_SOURCE
10420 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
10421 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
10422 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
10423 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
10424 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
10425 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
10426#else
10427 usbp->st_atime = sbp->st_atime;
10428 usbp->st_atimensec = sbp->st_atimensec;
10429 usbp->st_mtime = sbp->st_mtime;
10430 usbp->st_mtimensec = sbp->st_mtimensec;
10431 usbp->st_ctime = sbp->st_ctime;
10432 usbp->st_ctimensec = sbp->st_ctimensec;
10433#endif
10434 usbp->st_size = sbp->st_size;
10435 usbp->st_blocks = sbp->st_blocks;
10436 usbp->st_blksize = sbp->st_blksize;
10437 usbp->st_flags = sbp->st_flags;
10438 usbp->st_gen = sbp->st_gen;
10439 usbp->st_lspare = sbp->st_lspare;
10440 usbp->st_qspare[0] = sbp->st_qspare[0];
10441 usbp->st_qspare[1] = sbp->st_qspare[1];
10442}
10443
10444/*
10445 * copy stat64 structure into user_stat64 structure.
10446 */
b0d623f7
A
10447void munge_user64_stat64(struct stat64 *sbp, struct user64_stat64 *usbp)
10448{
10449 bzero(usbp, sizeof(*usbp));
10450
10451 usbp->st_dev = sbp->st_dev;
10452 usbp->st_ino = sbp->st_ino;
10453 usbp->st_mode = sbp->st_mode;
10454 usbp->st_nlink = sbp->st_nlink;
10455 usbp->st_uid = sbp->st_uid;
10456 usbp->st_gid = sbp->st_gid;
10457 usbp->st_rdev = sbp->st_rdev;
10458#ifndef _POSIX_C_SOURCE
10459 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
10460 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
10461 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
10462 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
10463 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
10464 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
10465 usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
10466 usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
10467#else
10468 usbp->st_atime = sbp->st_atime;
10469 usbp->st_atimensec = sbp->st_atimensec;
10470 usbp->st_mtime = sbp->st_mtime;
10471 usbp->st_mtimensec = sbp->st_mtimensec;
10472 usbp->st_ctime = sbp->st_ctime;
10473 usbp->st_ctimensec = sbp->st_ctimensec;
10474 usbp->st_birthtime = sbp->st_birthtime;
10475 usbp->st_birthtimensec = sbp->st_birthtimensec;
10476#endif
10477 usbp->st_size = sbp->st_size;
10478 usbp->st_blocks = sbp->st_blocks;
10479 usbp->st_blksize = sbp->st_blksize;
10480 usbp->st_flags = sbp->st_flags;
10481 usbp->st_gen = sbp->st_gen;
10482 usbp->st_lspare = sbp->st_lspare;
10483 usbp->st_qspare[0] = sbp->st_qspare[0];
10484 usbp->st_qspare[1] = sbp->st_qspare[1];
10485}
10486
10487void munge_user32_stat64(struct stat64 *sbp, struct user32_stat64 *usbp)
2d21ac55 10488{
b0d623f7 10489 bzero(usbp, sizeof(*usbp));
2d21ac55
A
10490
10491 usbp->st_dev = sbp->st_dev;
10492 usbp->st_ino = sbp->st_ino;
10493 usbp->st_mode = sbp->st_mode;
10494 usbp->st_nlink = sbp->st_nlink;
10495 usbp->st_uid = sbp->st_uid;
10496 usbp->st_gid = sbp->st_gid;
10497 usbp->st_rdev = sbp->st_rdev;
10498#ifndef _POSIX_C_SOURCE
91447636
A
10499 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
10500 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
10501 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
10502 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
10503 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
10504 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
2d21ac55
A
10505 usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
10506 usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
91447636
A
10507#else
10508 usbp->st_atime = sbp->st_atime;
10509 usbp->st_atimensec = sbp->st_atimensec;
10510 usbp->st_mtime = sbp->st_mtime;
10511 usbp->st_mtimensec = sbp->st_mtimensec;
10512 usbp->st_ctime = sbp->st_ctime;
10513 usbp->st_ctimensec = sbp->st_ctimensec;
2d21ac55
A
10514 usbp->st_birthtime = sbp->st_birthtime;
10515 usbp->st_birthtimensec = sbp->st_birthtimensec;
91447636
A
10516#endif
10517 usbp->st_size = sbp->st_size;
10518 usbp->st_blocks = sbp->st_blocks;
10519 usbp->st_blksize = sbp->st_blksize;
10520 usbp->st_flags = sbp->st_flags;
10521 usbp->st_gen = sbp->st_gen;
10522 usbp->st_lspare = sbp->st_lspare;
10523 usbp->st_qspare[0] = sbp->st_qspare[0];
10524 usbp->st_qspare[1] = sbp->st_qspare[1];
4a249263 10525}
39236c6e
A
10526
10527/*
10528 * Purge buffer cache for simulating cold starts
10529 */
10530static int vnode_purge_callback(struct vnode *vp, __unused void *cargs)
10531{
10532 ubc_msync(vp, (off_t)0, ubc_getsize(vp), NULL /* off_t *resid_off */, UBC_PUSHALL | UBC_INVALIDATE);
10533
10534 return VNODE_RETURNED;
10535}
10536
10537static int vfs_purge_callback(mount_t mp, __unused void * arg)
10538{
10539 vnode_iterate(mp, VNODE_WAIT | VNODE_ITERATE_ALL, vnode_purge_callback, NULL);
10540
10541 return VFS_RETURNED;
10542}
10543
10544int
10545vfs_purge(__unused struct proc *p, __unused struct vfs_purge_args *uap, __unused int32_t *retval)
10546{
10547 if (!kauth_cred_issuser(kauth_cred_get()))
10548 return EPERM;
10549
10550 vfs_iterate(0/* flags */, vfs_purge_callback, NULL);
10551
10552 return 0;
10553}
10554