]> git.saurik.com Git - apple/xnu.git/blame - bsd/vfs/vfs_syscalls.c
xnu-2422.90.20.tar.gz
[apple/xnu.git] / bsd / vfs / vfs_syscalls.c
CommitLineData
1c79356b 1/*
316670eb 2 * Copyright (c) 1995-2012 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * Copyright (c) 1989, 1993
30 * The Regents of the University of California. All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)vfs_syscalls.c 8.41 (Berkeley) 6/15/95
66 */
2d21ac55
A
67/*
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
71 * Version 2.0.
72 */
1c79356b
A
73
74#include <sys/param.h>
75#include <sys/systm.h>
76#include <sys/namei.h>
77#include <sys/filedesc.h>
78#include <sys/kernel.h>
91447636 79#include <sys/file_internal.h>
1c79356b 80#include <sys/stat.h>
91447636
A
81#include <sys/vnode_internal.h>
82#include <sys/mount_internal.h>
83#include <sys/proc_internal.h>
84#include <sys/kauth.h>
85#include <sys/uio_internal.h>
1c79356b 86#include <sys/malloc.h>
91447636 87#include <sys/mman.h>
1c79356b
A
88#include <sys/dirent.h>
89#include <sys/attr.h>
90#include <sys/sysctl.h>
91#include <sys/ubc.h>
9bccf70c 92#include <sys/quota.h>
91447636
A
93#include <sys/kdebug.h>
94#include <sys/fsevents.h>
6d2010ae 95#include <sys/imgsrc.h>
91447636
A
96#include <sys/sysproto.h>
97#include <sys/xattr.h>
b0d623f7
A
98#include <sys/fcntl.h>
99#include <sys/fsctl.h>
91447636 100#include <sys/ubc_internal.h>
593a1d5f 101#include <sys/disk.h>
91447636
A
102#include <machine/cons.h>
103#include <machine/limits.h>
104#include <miscfs/specfs/specdev.h>
e5568f75 105
b0d623f7 106#include <security/audit/audit.h>
e5568f75
A
107#include <bsm/audit_kevents.h>
108
91447636
A
109#include <mach/mach_types.h>
110#include <kern/kern_types.h>
111#include <kern/kalloc.h>
6d2010ae 112#include <kern/task.h>
91447636
A
113
114#include <vm/vm_pageout.h>
1c79356b 115
91447636 116#include <libkern/OSAtomic.h>
b0d623f7 117#include <pexpert/pexpert.h>
55e303ae 118
2d21ac55
A
119#if CONFIG_MACF
120#include <security/mac.h>
121#include <security/mac_framework.h>
122#endif
1c79356b 123
2d21ac55
A
124#if CONFIG_FSE
125#define GET_PATH(x) \
126 (x) = get_pathbuff();
127#define RELEASE_PATH(x) \
128 release_pathbuff(x);
129#else
130#define GET_PATH(x) \
131 MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
132#define RELEASE_PATH(x) \
133 FREE_ZONE((x), MAXPATHLEN, M_NAMEI);
134#endif /* CONFIG_FSE */
135
136/* struct for checkdirs iteration */
137struct cdirargs {
138 vnode_t olddp;
139 vnode_t newdp;
140};
141/* callback for checkdirs iteration */
142static int checkdirs_callback(proc_t p, void * arg);
1c79356b 143
91447636 144static int change_dir(struct nameidata *ndp, vfs_context_t ctx);
6601e61a 145static int checkdirs(vnode_t olddp, vfs_context_t ctx);
91447636
A
146void enablequotas(struct mount *mp, vfs_context_t ctx);
147static int getfsstat_callback(mount_t mp, void * arg);
148static int getutimes(user_addr_t usrtvp, struct timespec *tsp);
2d21ac55 149static int setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts, int nullflag);
91447636
A
150static int sync_callback(mount_t, void *);
151static int munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
152 user_addr_t bufp, int *sizep, boolean_t is_64_bit,
153 boolean_t partial_copy);
b0d623f7
A
154static int statfs64_common(struct mount *mp, struct vfsstatfs *sfsp,
155 user_addr_t bufp);
156static int fsync_common(proc_t p, struct fsync_args *uap, int flags);
6d2010ae
A
157static int mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
158 struct componentname *cnp, user_addr_t fsmountargs,
159 int flags, uint32_t internal_flags, char *labelstr, boolean_t kernelmount,
160 vfs_context_t ctx);
161void vfs_notify_mount(vnode_t pdvp);
162
163int prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth);
b7266188
A
164
165#ifdef CONFIG_IMGSRC_ACCESS
b7266188
A
166static int authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx);
167static int place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx);
168static void undo_place_on_covered_vp(mount_t mp, vnode_t vp);
169static int mount_begin_update(mount_t mp, vfs_context_t ctx, int flags);
170static void mount_end_update(mount_t mp);
6d2010ae 171static int relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp, const char *fsname, vfs_context_t ctx, boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index);
b7266188
A
172#endif /* CONFIG_IMGSRC_ACCESS */
173
2d21ac55
A
174int (*union_dircheckp)(struct vnode **, struct fileproc *, vfs_context_t);
175
176__private_extern__
177int sync_internal(void);
178
2d21ac55
A
179__private_extern__
180int unlink1(vfs_context_t, struct nameidata *, int);
91447636 181
2d21ac55
A
182/*
183 * incremented each time a mount or unmount operation occurs
184 * used to invalidate the cached value of the rootvp in the
185 * mount structure utilized by cache_lookup_path
186 */
b0d623f7 187uint32_t mount_generation = 0;
1c79356b
A
188
189/* counts number of mount and unmount operations */
190unsigned int vfs_nummntops=0;
191
39236c6e
A
192extern const struct fileops vnops;
193#if CONFIG_APPLEDOUBLE
2d21ac55 194extern errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *);
39236c6e 195#endif /* CONFIG_APPLEDOUBLE */
91447636 196
1c79356b
A
197/*
198 * Virtual File System System Calls
199 */
200
6d2010ae
A
201#if NFSCLIENT
202/*
203 * Private in-kernel mounting spi (NFS only, not exported)
204 */
205 __private_extern__
206boolean_t
207vfs_iskernelmount(mount_t mp)
208{
209 return ((mp->mnt_kern_flag & MNTK_KERNEL_MOUNT) ? TRUE : FALSE);
210}
211
212 __private_extern__
213int
214kernel_mount(char *fstype, vnode_t pvp, vnode_t vp, const char *path,
215 void *data, __unused size_t datalen, int syscall_flags, __unused uint32_t kern_flags, vfs_context_t ctx)
216{
217 struct nameidata nd;
218 boolean_t did_namei;
219 int error;
220
221 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
222 UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx);
223
224 /*
225 * Get the vnode to be covered if it's not supplied
226 */
227 if (vp == NULLVP) {
228 error = namei(&nd);
229 if (error)
230 return (error);
231 vp = nd.ni_vp;
232 pvp = nd.ni_dvp;
233 did_namei = TRUE;
234 } else {
235 char *pnbuf = CAST_DOWN(char *, path);
236
237 nd.ni_cnd.cn_pnbuf = pnbuf;
238 nd.ni_cnd.cn_pnlen = strlen(pnbuf) + 1;
239 did_namei = FALSE;
240 }
241
242 error = mount_common(fstype, pvp, vp, &nd.ni_cnd, CAST_USER_ADDR_T(data),
243 syscall_flags, kern_flags, NULL, TRUE, ctx);
244
245 if (did_namei) {
246 vnode_put(vp);
247 vnode_put(pvp);
248 nameidone(&nd);
249 }
250
251 return (error);
252}
253#endif /* NFSCLIENT */
254
1c79356b
A
255/*
256 * Mount a file system.
257 */
1c79356b
A
258/* ARGSUSED */
259int
b0d623f7 260mount(proc_t p, struct mount_args *uap, __unused int32_t *retval)
2d21ac55
A
261{
262 struct __mac_mount_args muap;
263
264 muap.type = uap->type;
265 muap.path = uap->path;
266 muap.flags = uap->flags;
267 muap.data = uap->data;
268 muap.mac_p = USER_ADDR_NULL;
269 return (__mac_mount(p, &muap, retval));
270}
271
6d2010ae
A
272void
273vfs_notify_mount(vnode_t pdvp)
274{
275 vfs_event_signal(NULL, VQ_MOUNT, (intptr_t)NULL);
276 lock_vnode_and_post(pdvp, NOTE_WRITE);
277}
278
b0d623f7
A
279/*
280 * __mac_mount:
281 * Mount a file system taking into account MAC label behavior.
282 * See mount(2) man page for more information
283 *
284 * Parameters: p Process requesting the mount
285 * uap User argument descriptor (see below)
286 * retval (ignored)
287 *
288 * Indirect: uap->type Filesystem type
289 * uap->path Path to mount
290 * uap->data Mount arguments
291 * uap->mac_p MAC info
292 * uap->flags Mount flags
293 *
294 *
295 * Returns: 0 Success
296 * !0 Not success
297 */
6d2010ae
A
298boolean_t root_fs_upgrade_try = FALSE;
299
2d21ac55 300int
b0d623f7 301__mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int32_t *retval)
1c79356b 302{
39236c6e
A
303 vnode_t pvp = NULL;
304 vnode_t vp = NULL;
305 int need_nameidone = 0;
6d2010ae
A
306 vfs_context_t ctx = vfs_context_current();
307 char fstypename[MFSNAMELEN];
308 struct nameidata nd;
309 size_t dummy=0;
310 char *labelstr = NULL;
311 int flags = uap->flags;
312 int error;
39236c6e 313#if CONFIG_IMGSRC_ACCESS || CONFIG_MACF
6d2010ae 314 boolean_t is_64bit = IS_64BIT_PROCESS(p);
39236c6e
A
315#else
316#pragma unused(p)
317#endif
6d2010ae
A
318 /*
319 * Get the fs type name from user space
320 */
321 error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy);
322 if (error)
323 return (error);
324
325 /*
326 * Get the vnode to be covered
327 */
328 NDINIT(&nd, LOOKUP, OP_MOUNT, NOTRIGGER | FOLLOW | AUDITVNPATH1 | WANTPARENT,
329 UIO_USERSPACE, uap->path, ctx);
330 error = namei(&nd);
39236c6e
A
331 if (error) {
332 goto out;
333 }
334 need_nameidone = 1;
6d2010ae
A
335 vp = nd.ni_vp;
336 pvp = nd.ni_dvp;
337
338#ifdef CONFIG_IMGSRC_ACCESS
339 /* Mounting image source cannot be batched with other operations */
340 if (flags == MNT_IMGSRC_BY_INDEX) {
341 error = relocate_imageboot_source(pvp, vp, &nd.ni_cnd, fstypename,
342 ctx, is_64bit, uap->data, (flags == MNT_IMGSRC_BY_INDEX));
343 goto out;
344 }
345#endif /* CONFIG_IMGSRC_ACCESS */
346
347#if CONFIG_MACF
348 /*
349 * Get the label string (if any) from user space
350 */
351 if (uap->mac_p != USER_ADDR_NULL) {
352 struct user_mac mac;
353 size_t ulen = 0;
354
355 if (is_64bit) {
356 struct user64_mac mac64;
357 error = copyin(uap->mac_p, &mac64, sizeof(mac64));
358 mac.m_buflen = mac64.m_buflen;
359 mac.m_string = mac64.m_string;
360 } else {
361 struct user32_mac mac32;
362 error = copyin(uap->mac_p, &mac32, sizeof(mac32));
363 mac.m_buflen = mac32.m_buflen;
364 mac.m_string = mac32.m_string;
365 }
366 if (error)
367 goto out;
368 if ((mac.m_buflen > MAC_MAX_LABEL_BUF_LEN) ||
369 (mac.m_buflen < 2)) {
370 error = EINVAL;
371 goto out;
372 }
373 MALLOC(labelstr, char *, mac.m_buflen, M_MACTEMP, M_WAITOK);
374 error = copyinstr(mac.m_string, labelstr, mac.m_buflen, &ulen);
375 if (error) {
376 goto out;
377 }
378 AUDIT_ARG(mac_string, labelstr);
379 }
380#endif /* CONFIG_MACF */
381
382 AUDIT_ARG(fflags, flags);
383
384 if ((vp->v_flag & VROOT) &&
39236c6e
A
385 (vp->v_mount->mnt_flag & MNT_ROOTFS)) {
386 if (!(flags & MNT_UNION)) {
6d2010ae 387 flags |= MNT_UPDATE;
39236c6e
A
388 }
389 else {
390 /*
391 * For a union mount on '/', treat it as fresh
392 * mount instead of update.
393 * Otherwise, union mouting on '/' used to panic the
394 * system before, since mnt_vnodecovered was found to
395 * be NULL for '/' which is required for unionlookup
396 * after it gets ENOENT on union mount.
397 */
398 flags = (flags & ~(MNT_UPDATE));
399 }
400
401#if 0
402//#ifdef SECURE_KERNEL
403 if ((flags & MNT_RDONLY) == 0) {
404 /* Release kernels are not allowed to mount "/" as rw */
405 error = EPERM;
406 goto out;
407 }
408//#endif
409#endif
410 /*
411 * See 7392553 for more details on why this check exists.
412 * Suffice to say: If this check is ON and something tries
413 * to mount the rootFS RW, we'll turn off the codesign
414 * bitmap optimization.
415 */
6d2010ae 416#if CHECK_CS_VALIDATION_BITMAP
39236c6e 417 if ((flags & MNT_RDONLY) == 0 ) {
6d2010ae
A
418 root_fs_upgrade_try = TRUE;
419 }
420#endif
421 }
422
423 error = mount_common(fstypename, pvp, vp, &nd.ni_cnd, uap->data, flags, 0,
424 labelstr, FALSE, ctx);
39236c6e 425
6d2010ae 426out:
39236c6e 427
6d2010ae
A
428#if CONFIG_MACF
429 if (labelstr)
430 FREE(labelstr, M_MACTEMP);
431#endif /* CONFIG_MACF */
432
39236c6e
A
433 if (vp) {
434 vnode_put(vp);
435 }
436 if (pvp) {
437 vnode_put(pvp);
438 }
439 if (need_nameidone) {
440 nameidone(&nd);
441 }
6d2010ae
A
442
443 return (error);
444}
445
446/*
447 * common mount implementation (final stage of mounting)
448
449 * Arguments:
450 * fstypename file system type (ie it's vfs name)
451 * pvp parent of covered vnode
452 * vp covered vnode
453 * cnp component name (ie path) of covered vnode
454 * flags generic mount flags
455 * fsmountargs file system specific data
456 * labelstr optional MAC label
457 * kernelmount TRUE for mounts initiated from inside the kernel
458 * ctx caller's context
459 */
460static int
461mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
462 struct componentname *cnp, user_addr_t fsmountargs, int flags, uint32_t internal_flags,
463 char *labelstr, boolean_t kernelmount, vfs_context_t ctx)
464{
39236c6e
A
465#if !CONFIG_MACF
466#pragma unused(labelstr)
467#endif
91447636
A
468 struct vnode *devvp = NULLVP;
469 struct vnode *device_vnode = NULLVP;
2d21ac55
A
470#if CONFIG_MACF
471 struct vnode *rvp;
472#endif
1c79356b 473 struct mount *mp;
6601e61a 474 struct vfstable *vfsp = (struct vfstable *)0;
6d2010ae 475 struct proc *p = vfs_context_proc(ctx);
91447636 476 int error, flag = 0;
91447636 477 user_addr_t devpath = USER_ADDR_NULL;
91447636
A
478 int ronly = 0;
479 int mntalloc = 0;
b0d623f7 480 boolean_t vfsp_ref = FALSE;
743b1565 481 boolean_t is_rwlock_locked = FALSE;
b0d623f7
A
482 boolean_t did_rele = FALSE;
483 boolean_t have_usecount = FALSE;
9bccf70c 484
1c79356b 485 /*
6d2010ae 486 * Process an update for an existing mount
1c79356b 487 */
6d2010ae 488 if (flags & MNT_UPDATE) {
1c79356b 489 if ((vp->v_flag & VROOT) == 0) {
91447636
A
490 error = EINVAL;
491 goto out1;
1c79356b
A
492 }
493 mp = vp->v_mount;
d12e1678 494
91447636 495 /* unmount in progress return error */
b0d623f7 496 mount_lock_spin(mp);
91447636
A
497 if (mp->mnt_lflag & MNT_LUNMOUNT) {
498 mount_unlock(mp);
499 error = EBUSY;
500 goto out1;
d12e1678 501 }
91447636
A
502 mount_unlock(mp);
503 lck_rw_lock_exclusive(&mp->mnt_rwlock);
743b1565 504 is_rwlock_locked = TRUE;
1c79356b
A
505 /*
506 * We only allow the filesystem to be reloaded if it
507 * is currently mounted read-only.
508 */
6d2010ae 509 if ((flags & MNT_RELOAD) &&
1c79356b 510 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
91447636
A
511 error = ENOTSUP;
512 goto out1;
1c79356b 513 }
b7266188 514
316670eb
A
515 /*
516 * If content protection is enabled, update mounts are not
517 * allowed to turn it off.
518 */
519 if ((mp->mnt_flag & MNT_CPROTECT) &&
520 ((flags & MNT_CPROTECT) == 0)) {
521 error = EINVAL;
522 goto out1;
523 }
524
b7266188
A
525#ifdef CONFIG_IMGSRC_ACCESS
526 /* Can't downgrade the backer of the root FS */
527 if ((mp->mnt_kern_flag & MNTK_BACKS_ROOT) &&
6d2010ae 528 (!vfs_isrdonly(mp)) && (flags & MNT_RDONLY)) {
b7266188
A
529 error = ENOTSUP;
530 goto out1;
531 }
532#endif /* CONFIG_IMGSRC_ACCESS */
533
1c79356b
A
534 /*
535 * Only root, or the user that did the original mount is
536 * permitted to update it.
537 */
2d21ac55
A
538 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
539 (error = suser(vfs_context_ucred(ctx), &p->p_acflag))) {
540 goto out1;
541 }
542#if CONFIG_MACF
543 error = mac_mount_check_remount(ctx, mp);
544 if (error != 0) {
91447636 545 goto out1;
1c79356b 546 }
2d21ac55 547#endif
1c79356b 548 /*
91447636
A
549 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
550 * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
1c79356b 551 */
6d2010ae
A
552 if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
553 flags |= MNT_NOSUID | MNT_NODEV;
d12e1678 554 if (mp->mnt_flag & MNT_NOEXEC)
6d2010ae 555 flags |= MNT_NOEXEC;
1c79356b 556 }
d12e1678
A
557 flag = mp->mnt_flag;
558
316670eb
A
559
560
6d2010ae 561 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
d12e1678 562
91447636 563 vfsp = mp->mnt_vtable;
1c79356b
A
564 goto update;
565 }
1c79356b 566 /*
91447636 567 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
1c79356b
A
568 * MNT_NOEXEC if mount point is already MNT_NOEXEC.
569 */
6d2010ae
A
570 if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
571 flags |= MNT_NOSUID | MNT_NODEV;
1c79356b 572 if (vp->v_mount->mnt_flag & MNT_NOEXEC)
6d2010ae 573 flags |= MNT_NOEXEC;
1c79356b 574 }
91447636 575
55e303ae
A
576 /* XXXAUDIT: Should we capture the type on the error path as well? */
577 AUDIT_ARG(text, fstypename);
91447636 578 mount_list_lock();
1c79356b 579 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
b0d623f7
A
580 if (!strncmp(vfsp->vfc_name, fstypename, MFSNAMELEN)) {
581 vfsp->vfc_refcount++;
582 vfsp_ref = TRUE;
1c79356b 583 break;
b0d623f7 584 }
91447636 585 mount_list_unlock();
1c79356b 586 if (vfsp == NULL) {
91447636
A
587 error = ENODEV;
588 goto out1;
1c79356b 589 }
6d2010ae
A
590
591 /*
592 * VFC_VFSLOCALARGS is not currently supported for kernel mounts
593 */
594 if (kernelmount && (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS)) {
595 error = EINVAL; /* unsupported request */
2d21ac55 596 goto out1;
6d2010ae
A
597 }
598
599 error = prepare_coveredvp(vp, ctx, cnp, fstypename, ((internal_flags & KERNEL_MOUNT_NOAUTH) != 0));
600 if (error != 0) {
91447636 601 goto out1;
1c79356b 602 }
1c79356b
A
603
604 /*
6d2010ae 605 * Allocate and initialize the filesystem (mount_t)
1c79356b 606 */
b0d623f7 607 MALLOC_ZONE(mp, struct mount *, (u_int32_t)sizeof(struct mount),
1c79356b 608 M_MOUNT, M_WAITOK);
b0d623f7 609 bzero((char *)mp, (u_int32_t)sizeof(struct mount));
91447636 610 mntalloc = 1;
0b4e3aa0
A
611
612 /* Initialize the default IO constraints */
613 mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
614 mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
91447636
A
615 mp->mnt_maxsegreadsize = mp->mnt_maxreadcnt;
616 mp->mnt_maxsegwritesize = mp->mnt_maxwritecnt;
617 mp->mnt_devblocksize = DEV_BSIZE;
2d21ac55 618 mp->mnt_alignmentmask = PAGE_MASK;
b0d623f7
A
619 mp->mnt_ioqueue_depth = MNT_DEFAULT_IOQUEUE_DEPTH;
620 mp->mnt_ioscale = 1;
2d21ac55
A
621 mp->mnt_ioflags = 0;
622 mp->mnt_realrootvp = NULLVP;
623 mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL;
91447636
A
624
625 TAILQ_INIT(&mp->mnt_vnodelist);
626 TAILQ_INIT(&mp->mnt_workerqueue);
627 TAILQ_INIT(&mp->mnt_newvnodes);
628 mount_lock_init(mp);
629 lck_rw_lock_exclusive(&mp->mnt_rwlock);
743b1565 630 is_rwlock_locked = TRUE;
1c79356b 631 mp->mnt_op = vfsp->vfc_vfsops;
91447636 632 mp->mnt_vtable = vfsp;
91447636 633 //mp->mnt_stat.f_type = vfsp->vfc_typenum;
1c79356b 634 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
91447636 635 strncpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN);
6d2010ae 636 strncpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
1c79356b 637 mp->mnt_vnodecovered = vp;
2d21ac55 638 mp->mnt_vfsstat.f_owner = kauth_cred_getuid(vfs_context_ucred(ctx));
6d2010ae
A
639 mp->mnt_throttle_mask = LOWPRI_MAX_NUM_DEV - 1;
640 mp->mnt_devbsdunit = 0;
1c79356b 641
91447636
A
642 /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
643 vfs_setowner(mp, KAUTH_UID_NONE, KAUTH_GID_NONE);
6d2010ae
A
644
645#if NFSCLIENT
646 if (kernelmount)
647 mp->mnt_kern_flag |= MNTK_KERNEL_MOUNT;
648 if ((internal_flags & KERNEL_MOUNT_PERMIT_UNMOUNT) != 0)
649 mp->mnt_kern_flag |= MNTK_PERMIT_UNMOUNT;
650#endif /* NFSCLIENT */
651
1c79356b
A
652update:
653 /*
654 * Set the mount level flags.
655 */
6d2010ae 656 if (flags & MNT_RDONLY)
1c79356b 657 mp->mnt_flag |= MNT_RDONLY;
6d2010ae
A
658 else if (mp->mnt_flag & MNT_RDONLY) {
659 // disallow read/write upgrades of file systems that
660 // had the TYPENAME_OVERRIDE feature set.
661 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
662 error = EPERM;
663 goto out1;
664 }
1c79356b 665 mp->mnt_kern_flag |= MNTK_WANTRDWR;
6d2010ae 666 }
0b4e3aa0
A
667 mp->mnt_flag &= ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
668 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
6d2010ae
A
669 MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
670 MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
671 MNT_QUARANTINE | MNT_CPROTECT);
672 mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
673 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
674 MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
675 MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
676 MNT_QUARANTINE | MNT_CPROTECT);
2d21ac55
A
677
678#if CONFIG_MACF
6d2010ae 679 if (flags & MNT_MULTILABEL) {
2d21ac55
A
680 if (vfsp->vfc_vfsflags & VFC_VFSNOMACLABEL) {
681 error = EINVAL;
682 goto out1;
683 }
684 mp->mnt_flag |= MNT_MULTILABEL;
685 }
686#endif
6d2010ae
A
687 /*
688 * Process device path for local file systems if requested
689 */
91447636 690 if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
6d2010ae 691 if (vfs_context_is64bit(ctx)) {
91447636
A
692 if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
693 goto out1;
694 fsmountargs += sizeof(devpath);
695 } else {
b0d623f7 696 user32_addr_t tmp;
91447636
A
697 if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
698 goto out1;
699 /* munge into LP64 addr */
700 devpath = CAST_USER_ADDR_T(tmp);
701 fsmountargs += sizeof(tmp);
702 }
703
6d2010ae 704 /* Lookup device and authorize access to it */
91447636 705 if ((devpath)) {
6d2010ae
A
706 struct nameidata nd;
707
708 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW, UIO_USERSPACE, devpath, ctx);
709 if ( (error = namei(&nd)) )
91447636
A
710 goto out1;
711
6d2010ae
A
712 strncpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
713 devvp = nd.ni_vp;
91447636 714
6d2010ae 715 nameidone(&nd);
91447636
A
716
717 if (devvp->v_type != VBLK) {
718 error = ENOTBLK;
719 goto out2;
720 }
721 if (major(devvp->v_rdev) >= nblkdev) {
722 error = ENXIO;
723 goto out2;
724 }
725 /*
726 * If mount by non-root, then verify that user has necessary
727 * permissions on the device.
728 */
2d21ac55 729 if (suser(vfs_context_ucred(ctx), NULL) != 0) {
6d2010ae
A
730 mode_t accessmode = KAUTH_VNODE_READ_DATA;
731
91447636
A
732 if ((mp->mnt_flag & MNT_RDONLY) == 0)
733 accessmode |= KAUTH_VNODE_WRITE_DATA;
2d21ac55 734 if ((error = vnode_authorize(devvp, NULL, accessmode, ctx)) != 0)
91447636
A
735 goto out2;
736 }
737 }
6d2010ae
A
738 /* On first mount, preflight and open device */
739 if (devpath && ((flags & MNT_UPDATE) == 0)) {
91447636
A
740 if ( (error = vnode_ref(devvp)) )
741 goto out2;
742 /*
743 * Disallow multiple mounts of the same device.
744 * Disallow mounting of a device that is currently in use
745 * (except for root, which might share swap device for miniroot).
746 * Flush out any old buffers remaining from a previous use.
747 */
748 if ( (error = vfs_mountedon(devvp)) )
749 goto out3;
750
751 if (vcount(devvp) > 1 && !(vfs_flags(mp) & MNT_ROOTFS)) {
752 error = EBUSY;
753 goto out3;
754 }
2d21ac55 755 if ( (error = VNOP_FSYNC(devvp, MNT_WAIT, ctx)) ) {
91447636
A
756 error = ENOTBLK;
757 goto out3;
758 }
759 if ( (error = buf_invalidateblks(devvp, BUF_WRITE_DATA, 0, 0)) )
760 goto out3;
761
762 ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
2d21ac55
A
763#if CONFIG_MACF
764 error = mac_vnode_check_open(ctx,
765 devvp,
766 ronly ? FREAD : FREAD|FWRITE);
767 if (error)
768 goto out3;
769#endif /* MAC */
770 if ( (error = VNOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, ctx)) )
91447636
A
771 goto out3;
772
773 mp->mnt_devvp = devvp;
774 device_vnode = devvp;
b0d623f7 775
6d2010ae
A
776 } else if ((mp->mnt_flag & MNT_RDONLY) &&
777 (mp->mnt_kern_flag & MNTK_WANTRDWR) &&
778 (device_vnode = mp->mnt_devvp)) {
779 dev_t dev;
780 int maj;
781 /*
782 * If upgrade to read-write by non-root, then verify
783 * that user has necessary permissions on the device.
784 */
785 vnode_getalways(device_vnode);
b0d623f7 786
6d2010ae
A
787 if (suser(vfs_context_ucred(ctx), NULL) &&
788 (error = vnode_authorize(device_vnode, NULL,
789 KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA,
790 ctx)) != 0) {
791 vnode_put(device_vnode);
792 goto out2;
793 }
b0d623f7 794
6d2010ae
A
795 /* Tell the device that we're upgrading */
796 dev = (dev_t)device_vnode->v_rdev;
797 maj = major(dev);
b0d623f7 798
6d2010ae
A
799 if ((u_int)maj >= (u_int)nblkdev)
800 panic("Volume mounted on a device with invalid major number.");
b0d623f7 801
6d2010ae
A
802 error = bdevsw[maj].d_open(dev, FREAD | FWRITE, S_IFBLK, p);
803 vnode_put(device_vnode);
91447636 804 device_vnode = NULLVP;
6d2010ae
A
805 if (error != 0) {
806 goto out2;
807 }
91447636
A
808 }
809 }
2d21ac55 810#if CONFIG_MACF
6d2010ae 811 if ((flags & MNT_UPDATE) == 0) {
2d21ac55
A
812 mac_mount_label_init(mp);
813 mac_mount_label_associate(ctx, mp);
814 }
6d2010ae
A
815 if (labelstr) {
816 if ((flags & MNT_UPDATE) != 0) {
817 error = mac_mount_check_label_update(ctx, mp);
2d21ac55
A
818 if (error != 0)
819 goto out3;
820 }
2d21ac55
A
821 }
822#endif
1c79356b
A
823 /*
824 * Mount the filesystem.
825 */
2d21ac55 826 error = VFS_MOUNT(mp, device_vnode, fsmountargs, ctx);
d12e1678 827
6d2010ae 828 if (flags & MNT_UPDATE) {
1c79356b
A
829 if (mp->mnt_kern_flag & MNTK_WANTRDWR)
830 mp->mnt_flag &= ~MNT_RDONLY;
831 mp->mnt_flag &=~
832 (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
833 mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
834 if (error)
6d2010ae 835 mp->mnt_flag = flag; /* restore flag value */
91447636
A
836 vfs_event_signal(NULL, VQ_UPDATE, (intptr_t)NULL);
837 lck_rw_done(&mp->mnt_rwlock);
743b1565 838 is_rwlock_locked = FALSE;
9bccf70c 839 if (!error)
2d21ac55 840 enablequotas(mp, ctx);
6d2010ae 841 goto exit;
1c79356b 842 }
6d2010ae 843
1c79356b
A
844 /*
845 * Put the new filesystem on the mount list after root.
846 */
6601e61a 847 if (error == 0) {
2d21ac55
A
848 struct vfs_attr vfsattr;
849#if CONFIG_MACF
850 if (vfs_flags(mp) & MNT_MULTILABEL) {
851 error = VFS_ROOT(mp, &rvp, ctx);
852 if (error) {
853 printf("%s() VFS_ROOT returned %d\n", __func__, error);
854 goto out3;
855 }
2d21ac55 856 error = vnode_label(mp, NULL, rvp, NULL, 0, ctx);
b0d623f7
A
857 /*
858 * drop reference provided by VFS_ROOT
859 */
860 vnode_put(rvp);
861
2d21ac55
A
862 if (error)
863 goto out3;
864 }
865#endif /* MAC */
866
867 vnode_lock_spin(vp);
868 CLR(vp->v_flag, VMOUNT);
91447636
A
869 vp->v_mountedhere = mp;
870 vnode_unlock(vp);
871
2d21ac55
A
872 /*
873 * taking the name_cache_lock exclusively will
874 * insure that everyone is out of the fast path who
875 * might be trying to use a now stale copy of
876 * vp->v_mountedhere->mnt_realrootvp
877 * bumping mount_generation causes the cached values
878 * to be invalidated
879 */
880 name_cache_lock();
881 mount_generation++;
882 name_cache_unlock();
883
b0d623f7
A
884 error = vnode_ref(vp);
885 if (error != 0) {
886 goto out4;
887 }
888
889 have_usecount = TRUE;
91447636 890
2d21ac55 891 error = checkdirs(vp, ctx);
6601e61a
A
892 if (error != 0) {
893 /* Unmount the filesystem as cdir/rdirs cannot be updated */
894 goto out4;
895 }
91447636
A
896 /*
897 * there is no cleanup code here so I have made it void
898 * we need to revisit this
899 */
2d21ac55 900 (void)VFS_START(mp, 0, ctx);
1c79356b 901
6d2010ae
A
902 if (mount_list_add(mp) != 0) {
903 /*
904 * The system is shutting down trying to umount
905 * everything, so fail with a plausible errno.
906 */
907 error = EBUSY;
b0d623f7
A
908 goto out4;
909 }
6601e61a
A
910 lck_rw_done(&mp->mnt_rwlock);
911 is_rwlock_locked = FALSE;
912
2d21ac55
A
913 /* Check if this mounted file system supports EAs or named streams. */
914 /* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */
915 VFSATTR_INIT(&vfsattr);
916 VFSATTR_WANTED(&vfsattr, f_capabilities);
917 if (strncmp(mp->mnt_vfsstat.f_fstypename, "webdav", sizeof("webdav")) != 0 &&
918 vfs_getattr(mp, &vfsattr, ctx) == 0 &&
919 VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) {
920 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR) &&
921 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR)) {
922 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
923 }
924#if NAMEDSTREAMS
925 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS) &&
926 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS)) {
927 mp->mnt_kern_flag |= MNTK_NAMED_STREAMS;
928 }
929#endif
930 /* Check if this file system supports path from id lookups. */
931 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID) &&
932 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID)) {
933 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
934 } else if (mp->mnt_flag & MNT_DOVOLFS) {
935 /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */
936 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
937 }
938 }
939 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSNATIVEXATTR) {
940 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
941 }
942 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSPREFLIGHT) {
943 mp->mnt_kern_flag |= MNTK_UNMOUNT_PREFLIGHT;
944 }
1c79356b 945 /* increment the operations count */
b0d623f7 946 OSAddAtomic(1, &vfs_nummntops);
2d21ac55 947 enablequotas(mp, ctx);
91447636
A
948
949 if (device_vnode) {
950 device_vnode->v_specflags |= SI_MOUNTEDON;
951
952 /*
953 * cache the IO attributes for the underlying physical media...
954 * an error return indicates the underlying driver doesn't
955 * support all the queries necessary... however, reasonable
956 * defaults will have been set, so no reason to bail or care
957 */
958 vfs_init_io_attributes(device_vnode, mp);
959 }
6601e61a
A
960
961 /* Now that mount is setup, notify the listeners */
6d2010ae 962 vfs_notify_mount(pvp);
1c79356b 963 } else {
6d2010ae
A
964 /* If we fail a fresh mount, there should be no vnodes left hooked into the mountpoint. */
965 if (mp->mnt_vnodelist.tqh_first != NULL) {
966 panic("mount_common(): mount of %s filesystem failed with %d, but vnode list is not empty.",
967 mp->mnt_vtable->vfc_name, error);
968 }
969
2d21ac55 970 vnode_lock_spin(vp);
1c79356b 971 CLR(vp->v_flag, VMOUNT);
6601e61a 972 vnode_unlock(vp);
91447636
A
973 mount_list_lock();
974 mp->mnt_vtable->vfc_refcount--;
975 mount_list_unlock();
55e303ae 976
91447636 977 if (device_vnode ) {
91447636 978 vnode_rele(device_vnode);
b0d623f7 979 VNOP_CLOSE(device_vnode, ronly ? FREAD : FREAD|FWRITE, ctx);
91447636
A
980 }
981 lck_rw_done(&mp->mnt_rwlock);
743b1565 982 is_rwlock_locked = FALSE;
6d2010ae
A
983
984 /*
985 * if we get here, we have a mount structure that needs to be freed,
986 * but since the coveredvp hasn't yet been updated to point at it,
987 * no need to worry about other threads holding a crossref on this mp
988 * so it's ok to just free it
989 */
91447636 990 mount_lock_destroy(mp);
2d21ac55
A
991#if CONFIG_MACF
992 mac_mount_label_destroy(mp);
993#endif
55e303ae 994 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1c79356b 995 }
6d2010ae 996exit:
91447636 997 /*
6d2010ae 998 * drop I/O count on the device vp if there was one
91447636
A
999 */
1000 if (devpath && devvp)
1001 vnode_put(devvp);
b0d623f7 1002
91447636 1003 return(error);
b0d623f7 1004
6d2010ae 1005/* Error condition exits */
6601e61a 1006out4:
2d21ac55 1007 (void)VFS_UNMOUNT(mp, MNT_FORCE, ctx);
6d2010ae
A
1008
1009 /*
1010 * If the mount has been placed on the covered vp,
1011 * it may have been discovered by now, so we have
1012 * to treat this just like an unmount
1013 */
1014 mount_lock_spin(mp);
1015 mp->mnt_lflag |= MNT_LDEAD;
1016 mount_unlock(mp);
1017
6601e61a 1018 if (device_vnode != NULLVP) {
b0d623f7 1019 vnode_rele(device_vnode);
2d21ac55
A
1020 VNOP_CLOSE(device_vnode, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
1021 ctx);
b0d623f7 1022 did_rele = TRUE;
6601e61a 1023 }
6d2010ae 1024
2d21ac55 1025 vnode_lock_spin(vp);
6d2010ae
A
1026
1027 mp->mnt_crossref++;
6601e61a 1028 vp->v_mountedhere = (mount_t) 0;
6d2010ae 1029
6601e61a 1030 vnode_unlock(vp);
6d2010ae 1031
b0d623f7
A
1032 if (have_usecount) {
1033 vnode_rele(vp);
1034 }
91447636 1035out3:
6d2010ae 1036 if (devpath && ((flags & MNT_UPDATE) == 0) && (!did_rele))
2d21ac55 1037 vnode_rele(devvp);
91447636
A
1038out2:
1039 if (devpath && devvp)
1040 vnode_put(devvp);
1041out1:
743b1565
A
1042 /* Release mnt_rwlock only when it was taken */
1043 if (is_rwlock_locked == TRUE) {
1044 lck_rw_done(&mp->mnt_rwlock);
1045 }
6d2010ae 1046
6601e61a 1047 if (mntalloc) {
6d2010ae
A
1048 if (mp->mnt_crossref)
1049 mount_dropcrossref(mp, vp, 0);
1050 else {
1051 mount_lock_destroy(mp);
2d21ac55 1052#if CONFIG_MACF
6d2010ae 1053 mac_mount_label_destroy(mp);
2d21ac55 1054#endif
6d2010ae
A
1055 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1056 }
b0d623f7 1057 }
b0d623f7 1058 if (vfsp_ref) {
6601e61a
A
1059 mount_list_lock();
1060 vfsp->vfc_refcount--;
1061 mount_list_unlock();
6601e61a 1062 }
91447636
A
1063
1064 return(error);
1c79356b
A
1065}
1066
b7266188
A
1067/*
1068 * Flush in-core data, check for competing mount attempts,
1069 * and set VMOUNT
1070 */
6d2010ae
A
1071int
1072prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth)
b7266188 1073{
39236c6e
A
1074#if !CONFIG_MACF
1075#pragma unused(cnp,fsname)
1076#endif
b7266188
A
1077 struct vnode_attr va;
1078 int error;
1079
6d2010ae
A
1080 if (!skip_auth) {
1081 /*
1082 * If the user is not root, ensure that they own the directory
1083 * onto which we are attempting to mount.
1084 */
1085 VATTR_INIT(&va);
1086 VATTR_WANTED(&va, va_uid);
1087 if ((error = vnode_getattr(vp, &va, ctx)) ||
1088 (va.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
1089 (!vfs_context_issuser(ctx)))) {
1090 error = EPERM;
1091 goto out;
1092 }
b7266188
A
1093 }
1094
1095 if ( (error = VNOP_FSYNC(vp, MNT_WAIT, ctx)) )
1096 goto out;
1097
1098 if ( (error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0)) )
1099 goto out;
1100
1101 if (vp->v_type != VDIR) {
1102 error = ENOTDIR;
1103 goto out;
1104 }
1105
1106 if (ISSET(vp->v_flag, VMOUNT) && (vp->v_mountedhere != NULL)) {
1107 error = EBUSY;
1108 goto out;
1109 }
1110
1111#if CONFIG_MACF
1112 error = mac_mount_check_mount(ctx, vp,
1113 cnp, fsname);
1114 if (error != 0)
1115 goto out;
1116#endif
1117
1118 vnode_lock_spin(vp);
1119 SET(vp->v_flag, VMOUNT);
1120 vnode_unlock(vp);
1121
1122out:
1123 return error;
1124}
1125
6d2010ae
A
1126#if CONFIG_IMGSRC_ACCESS
1127
1128#if DEBUG
1129#define IMGSRC_DEBUG(args...) printf(args)
1130#else
1131#define IMGSRC_DEBUG(args...) do { } while(0)
1132#endif
1133
b7266188
A
1134static int
1135authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx)
1136{
1137 struct nameidata nd;
6d2010ae 1138 vnode_t vp, realdevvp;
b7266188
A
1139 mode_t accessmode;
1140 int error;
1141
6d2010ae
A
1142 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW, UIO_USERSPACE, devpath, ctx);
1143 if ( (error = namei(&nd)) ) {
1144 IMGSRC_DEBUG("namei() failed with %d\n", error);
b7266188 1145 return error;
6d2010ae 1146 }
b7266188 1147
b7266188 1148 vp = nd.ni_vp;
b7266188 1149
6d2010ae
A
1150 if (!vnode_isblk(vp)) {
1151 IMGSRC_DEBUG("Not block device.\n");
b7266188
A
1152 error = ENOTBLK;
1153 goto out;
1154 }
6d2010ae
A
1155
1156 realdevvp = mp->mnt_devvp;
1157 if (realdevvp == NULLVP) {
1158 IMGSRC_DEBUG("No device backs the mount.\n");
b7266188
A
1159 error = ENXIO;
1160 goto out;
1161 }
6d2010ae
A
1162
1163 error = vnode_getwithref(realdevvp);
1164 if (error != 0) {
1165 IMGSRC_DEBUG("Coudn't get iocount on device.\n");
1166 goto out;
1167 }
1168
1169 if (vnode_specrdev(vp) != vnode_specrdev(realdevvp)) {
1170 IMGSRC_DEBUG("Wrong dev_t.\n");
1171 error = ENXIO;
1172 goto out1;
1173 }
1174
1175 strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
1176
b7266188
A
1177 /*
1178 * If mount by non-root, then verify that user has necessary
1179 * permissions on the device.
1180 */
1181 if (!vfs_context_issuser(ctx)) {
1182 accessmode = KAUTH_VNODE_READ_DATA;
1183 if ((mp->mnt_flag & MNT_RDONLY) == 0)
1184 accessmode |= KAUTH_VNODE_WRITE_DATA;
6d2010ae
A
1185 if ((error = vnode_authorize(vp, NULL, accessmode, ctx)) != 0) {
1186 IMGSRC_DEBUG("Access denied.\n");
1187 goto out1;
1188 }
b7266188
A
1189 }
1190
1191 *devvpp = vp;
6d2010ae
A
1192
1193out1:
1194 vnode_put(realdevvp);
b7266188 1195out:
6d2010ae 1196 nameidone(&nd);
b7266188
A
1197 if (error) {
1198 vnode_put(vp);
1199 }
1200
1201 return error;
1202}
1203
1204/*
1205 * Clear VMOUNT, set v_mountedhere, and mnt_vnodecovered, ref the vnode,
1206 * and call checkdirs()
1207 */
1208static int
1209place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx)
1210{
1211 int error;
1212
1213 mp->mnt_vnodecovered = vp; /* XXX This is normally only set at init-time ... */
1214
1215 vnode_lock_spin(vp);
1216 CLR(vp->v_flag, VMOUNT);
1217 vp->v_mountedhere = mp;
1218 vnode_unlock(vp);
1219
1220 /*
1221 * taking the name_cache_lock exclusively will
1222 * insure that everyone is out of the fast path who
1223 * might be trying to use a now stale copy of
1224 * vp->v_mountedhere->mnt_realrootvp
1225 * bumping mount_generation causes the cached values
1226 * to be invalidated
1227 */
1228 name_cache_lock();
1229 mount_generation++;
1230 name_cache_unlock();
1231
1232 error = vnode_ref(vp);
1233 if (error != 0) {
1234 goto out;
1235 }
1236
1237 error = checkdirs(vp, ctx);
1238 if (error != 0) {
1239 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1240 vnode_rele(vp);
1241 goto out;
1242 }
1243
1244out:
1245 if (error != 0) {
1246 mp->mnt_vnodecovered = NULLVP;
1247 }
1248 return error;
1249}
1250
1251static void
1252undo_place_on_covered_vp(mount_t mp, vnode_t vp)
1253{
1254 vnode_rele(vp);
1255 vnode_lock_spin(vp);
1256 vp->v_mountedhere = (mount_t)NULL;
1257 vnode_unlock(vp);
1258
1259 mp->mnt_vnodecovered = NULLVP;
1260}
1261
1262static int
1263mount_begin_update(mount_t mp, vfs_context_t ctx, int flags)
1264{
1265 int error;
1266
1267 /* unmount in progress return error */
1268 mount_lock_spin(mp);
1269 if (mp->mnt_lflag & MNT_LUNMOUNT) {
1270 mount_unlock(mp);
1271 return EBUSY;
1272 }
1273 mount_unlock(mp);
1274 lck_rw_lock_exclusive(&mp->mnt_rwlock);
1275
1276 /*
1277 * We only allow the filesystem to be reloaded if it
1278 * is currently mounted read-only.
1279 */
1280 if ((flags & MNT_RELOAD) &&
1281 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
1282 error = ENOTSUP;
1283 goto out;
1284 }
1285
1286 /*
1287 * Only root, or the user that did the original mount is
1288 * permitted to update it.
1289 */
1290 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
1291 (!vfs_context_issuser(ctx))) {
1292 error = EPERM;
1293 goto out;
1294 }
1295#if CONFIG_MACF
1296 error = mac_mount_check_remount(ctx, mp);
1297 if (error != 0) {
1298 goto out;
1299 }
1300#endif
1301
1302out:
1303 if (error) {
1304 lck_rw_done(&mp->mnt_rwlock);
1305 }
1306
1307 return error;
1308}
1309
1310static void
1311mount_end_update(mount_t mp)
1312{
1313 lck_rw_done(&mp->mnt_rwlock);
1314}
1315
1316static int
6d2010ae
A
1317get_imgsrc_rootvnode(uint32_t height, vnode_t *rvpp)
1318{
1319 vnode_t vp;
1320
1321 if (height >= MAX_IMAGEBOOT_NESTING) {
1322 return EINVAL;
1323 }
1324
1325 vp = imgsrc_rootvnodes[height];
1326 if ((vp != NULLVP) && (vnode_get(vp) == 0)) {
1327 *rvpp = vp;
1328 return 0;
1329 } else {
1330 return ENOENT;
1331 }
1332}
1333
1334static int
1335relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp,
b7266188 1336 const char *fsname, vfs_context_t ctx,
6d2010ae 1337 boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index)
b7266188
A
1338{
1339 int error;
1340 mount_t mp;
1341 boolean_t placed = FALSE;
6d2010ae 1342 vnode_t devvp = NULLVP;
b7266188
A
1343 struct vfstable *vfsp;
1344 user_addr_t devpath;
1345 char *old_mntonname;
6d2010ae
A
1346 vnode_t rvp;
1347 uint32_t height;
1348 uint32_t flags;
b7266188
A
1349
1350 /* If we didn't imageboot, nothing to move */
6d2010ae 1351 if (imgsrc_rootvnodes[0] == NULLVP) {
b7266188
A
1352 return EINVAL;
1353 }
1354
1355 /* Only root can do this */
1356 if (!vfs_context_issuser(ctx)) {
1357 return EPERM;
1358 }
1359
6d2010ae
A
1360 IMGSRC_DEBUG("looking for root vnode.\n");
1361
1362 /*
1363 * Get root vnode of filesystem we're moving.
1364 */
1365 if (by_index) {
1366 if (is64bit) {
1367 struct user64_mnt_imgsrc_args mia64;
1368 error = copyin(fsmountargs, &mia64, sizeof(mia64));
1369 if (error != 0) {
1370 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1371 return error;
1372 }
1373
1374 height = mia64.mi_height;
1375 flags = mia64.mi_flags;
1376 devpath = mia64.mi_devpath;
1377 } else {
1378 struct user32_mnt_imgsrc_args mia32;
1379 error = copyin(fsmountargs, &mia32, sizeof(mia32));
1380 if (error != 0) {
1381 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1382 return error;
1383 }
1384
1385 height = mia32.mi_height;
1386 flags = mia32.mi_flags;
1387 devpath = mia32.mi_devpath;
1388 }
1389 } else {
1390 /*
1391 * For binary compatibility--assumes one level of nesting.
1392 */
1393 if (is64bit) {
1394 if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
1395 return error;
1396 } else {
1397 user32_addr_t tmp;
1398 if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
1399 return error;
1400
1401 /* munge into LP64 addr */
1402 devpath = CAST_USER_ADDR_T(tmp);
1403 }
1404
1405 height = 0;
1406 flags = 0;
1407 }
1408
1409 if (flags != 0) {
1410 IMGSRC_DEBUG("%s: Got nonzero flags.\n", __FUNCTION__);
1411 return EINVAL;
1412 }
1413
1414 error = get_imgsrc_rootvnode(height, &rvp);
b7266188 1415 if (error != 0) {
6d2010ae 1416 IMGSRC_DEBUG("getting root vnode failed with %d\n", error);
b7266188
A
1417 return error;
1418 }
1419
6d2010ae
A
1420 IMGSRC_DEBUG("got root vnode.\n");
1421
b7266188
A
1422 MALLOC(old_mntonname, char*, MAXPATHLEN, M_TEMP, M_WAITOK);
1423
1424 /* Can only move once */
6d2010ae 1425 mp = vnode_mount(rvp);
b7266188 1426 if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
6d2010ae 1427 IMGSRC_DEBUG("Already moved.\n");
b7266188
A
1428 error = EBUSY;
1429 goto out0;
1430 }
1431
6d2010ae
A
1432 IMGSRC_DEBUG("Starting updated.\n");
1433
b7266188
A
1434 /* Get exclusive rwlock on mount, authorize update on mp */
1435 error = mount_begin_update(mp , ctx, 0);
1436 if (error != 0) {
6d2010ae 1437 IMGSRC_DEBUG("Starting updated failed with %d\n", error);
b7266188
A
1438 goto out0;
1439 }
1440
1441 /*
1442 * It can only be moved once. Flag is set under the rwlock,
1443 * so we're now safe to proceed.
1444 */
1445 if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
6d2010ae 1446 IMGSRC_DEBUG("Already moved [2]\n");
b7266188
A
1447 goto out1;
1448 }
6d2010ae
A
1449
1450
1451 IMGSRC_DEBUG("Preparing coveredvp.\n");
b7266188
A
1452
1453 /* Mark covered vnode as mount in progress, authorize placing mount on top */
6d2010ae 1454 error = prepare_coveredvp(vp, ctx, cnp, fsname, FALSE);
b7266188 1455 if (error != 0) {
6d2010ae 1456 IMGSRC_DEBUG("Preparing coveredvp failed with %d.\n", error);
b7266188
A
1457 goto out1;
1458 }
1459
6d2010ae
A
1460 IMGSRC_DEBUG("Covered vp OK.\n");
1461
b7266188
A
1462 /* Sanity check the name caller has provided */
1463 vfsp = mp->mnt_vtable;
1464 if (strncmp(vfsp->vfc_name, fsname, MFSNAMELEN) != 0) {
6d2010ae 1465 IMGSRC_DEBUG("Wrong fs name.\n");
b7266188
A
1466 error = EINVAL;
1467 goto out2;
1468 }
1469
1470 /* Check the device vnode and update mount-from name, for local filesystems */
1471 if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
6d2010ae 1472 IMGSRC_DEBUG("Local, doing device validation.\n");
b7266188
A
1473
1474 if (devpath != USER_ADDR_NULL) {
1475 error = authorize_devpath_and_update_mntfromname(mp, devpath, &devvp, ctx);
1476 if (error) {
6d2010ae 1477 IMGSRC_DEBUG("authorize_devpath_and_update_mntfromname() failed.\n");
b7266188
A
1478 goto out2;
1479 }
1480
1481 vnode_put(devvp);
1482 }
1483 }
1484
1485 /*
1486 * Place mp on top of vnode, ref the vnode, call checkdirs(),
1487 * and increment the name cache's mount generation
1488 */
6d2010ae
A
1489
1490 IMGSRC_DEBUG("About to call place_mount_and_checkdirs().\n");
b7266188
A
1491 error = place_mount_and_checkdirs(mp, vp, ctx);
1492 if (error != 0) {
1493 goto out2;
1494 }
1495
1496 placed = TRUE;
1497
1498 strncpy(old_mntonname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN);
1499 strncpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
1500
1501 /* Forbid future moves */
1502 mount_lock(mp);
1503 mp->mnt_kern_flag |= MNTK_HAS_MOVED;
1504 mount_unlock(mp);
1505
1506 /* Finally, add to mount list, completely ready to go */
6d2010ae
A
1507 if (mount_list_add(mp) != 0) {
1508 /*
1509 * The system is shutting down trying to umount
1510 * everything, so fail with a plausible errno.
1511 */
1512 error = EBUSY;
b7266188
A
1513 goto out3;
1514 }
1515
1516 mount_end_update(mp);
6d2010ae 1517 vnode_put(rvp);
b7266188
A
1518 FREE(old_mntonname, M_TEMP);
1519
6d2010ae
A
1520 vfs_notify_mount(pvp);
1521
b7266188
A
1522 return 0;
1523out3:
1524 strncpy(mp->mnt_vfsstat.f_mntonname, old_mntonname, MAXPATHLEN);
1525
1526 mount_lock(mp);
1527 mp->mnt_kern_flag &= ~(MNTK_HAS_MOVED);
1528 mount_unlock(mp);
1529
1530out2:
1531 /*
1532 * Placing the mp on the vnode clears VMOUNT,
1533 * so cleanup is different after that point
1534 */
1535 if (placed) {
1536 /* Rele the vp, clear VMOUNT and v_mountedhere */
1537 undo_place_on_covered_vp(mp, vp);
1538 } else {
1539 vnode_lock_spin(vp);
1540 CLR(vp->v_flag, VMOUNT);
1541 vnode_unlock(vp);
1542 }
1543out1:
1544 mount_end_update(mp);
1545
1546out0:
6d2010ae 1547 vnode_put(rvp);
b7266188
A
1548 FREE(old_mntonname, M_TEMP);
1549 return error;
1550}
1551
1552#endif /* CONFIG_IMGSRC_ACCESS */
1553
91447636 1554void
2d21ac55 1555enablequotas(struct mount *mp, vfs_context_t ctx)
9bccf70c 1556{
9bccf70c
A
1557 struct nameidata qnd;
1558 int type;
1559 char qfpath[MAXPATHLEN];
91447636
A
1560 const char *qfname = QUOTAFILENAME;
1561 const char *qfopsname = QUOTAOPSNAME;
1562 const char *qfextension[] = INITQFNAMES;
9bccf70c 1563
2d21ac55 1564 /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
b0d623f7
A
1565 if (strncmp(mp->mnt_vfsstat.f_fstypename, "hfs", sizeof("hfs")) != 0 ) {
1566 return;
1567 }
9bccf70c
A
1568 /*
1569 * Enable filesystem disk quotas if necessary.
1570 * We ignore errors as this should not interfere with final mount
1571 */
1572 for (type=0; type < MAXQUOTAS; type++) {
2d21ac55 1573 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfopsname, qfextension[type]);
6d2010ae
A
1574 NDINIT(&qnd, LOOKUP, OP_MOUNT, FOLLOW, UIO_SYSSPACE,
1575 CAST_USER_ADDR_T(qfpath), ctx);
91447636
A
1576 if (namei(&qnd) != 0)
1577 continue; /* option file to trigger quotas is not present */
1578 vnode_put(qnd.ni_vp);
1579 nameidone(&qnd);
2d21ac55 1580 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfname, qfextension[type]);
91447636 1581
2d21ac55 1582 (void) VFS_QUOTACTL(mp, QCMD(Q_QUOTAON, type), 0, qfpath, ctx);
9bccf70c
A
1583 }
1584 return;
1585}
1586
2d21ac55
A
1587
1588static int
1589checkdirs_callback(proc_t p, void * arg)
1590{
1591 struct cdirargs * cdrp = (struct cdirargs * )arg;
1592 vnode_t olddp = cdrp->olddp;
1593 vnode_t newdp = cdrp->newdp;
1594 struct filedesc *fdp;
1595 vnode_t tvp;
1596 vnode_t fdp_cvp;
1597 vnode_t fdp_rvp;
1598 int cdir_changed = 0;
1599 int rdir_changed = 0;
1600
1601 /*
1602 * XXX Also needs to iterate each thread in the process to see if it
1603 * XXX is using a per-thread current working directory, and, if so,
1604 * XXX update that as well.
1605 */
1606
1607 proc_fdlock(p);
1608 fdp = p->p_fd;
1609 if (fdp == (struct filedesc *)0) {
1610 proc_fdunlock(p);
1611 return(PROC_RETURNED);
1612 }
1613 fdp_cvp = fdp->fd_cdir;
1614 fdp_rvp = fdp->fd_rdir;
1615 proc_fdunlock(p);
1616
1617 if (fdp_cvp == olddp) {
1618 vnode_ref(newdp);
1619 tvp = fdp->fd_cdir;
1620 fdp_cvp = newdp;
1621 cdir_changed = 1;
1622 vnode_rele(tvp);
1623 }
1624 if (fdp_rvp == olddp) {
1625 vnode_ref(newdp);
1626 tvp = fdp->fd_rdir;
1627 fdp_rvp = newdp;
1628 rdir_changed = 1;
1629 vnode_rele(tvp);
1630 }
1631 if (cdir_changed || rdir_changed) {
1632 proc_fdlock(p);
1633 fdp->fd_cdir = fdp_cvp;
1634 fdp->fd_rdir = fdp_rvp;
1635 proc_fdunlock(p);
1636 }
1637 return(PROC_RETURNED);
1638}
1639
1640
1641
1c79356b
A
1642/*
1643 * Scan all active processes to see if any of them have a current
1644 * or root directory onto which the new filesystem has just been
1645 * mounted. If so, replace them with the new mount point.
1646 */
6601e61a 1647static int
2d21ac55 1648checkdirs(vnode_t olddp, vfs_context_t ctx)
1c79356b 1649{
2d21ac55
A
1650 vnode_t newdp;
1651 vnode_t tvp;
6601e61a 1652 int err;
2d21ac55
A
1653 struct cdirargs cdr;
1654 struct uthread * uth = get_bsdthread_info(current_thread());
1c79356b
A
1655
1656 if (olddp->v_usecount == 1)
6601e61a 1657 return(0);
2d21ac55
A
1658 if (uth != (struct uthread *)0)
1659 uth->uu_notrigger = 1;
1660 err = VFS_ROOT(olddp->v_mountedhere, &newdp, ctx);
1661 if (uth != (struct uthread *)0)
1662 uth->uu_notrigger = 0;
1663
1664 if (err != 0) {
6601e61a 1665#if DIAGNOSTIC
2d21ac55 1666 panic("mount: lost mount: error %d", err);
6601e61a
A
1667#endif
1668 return(err);
1669 }
91447636 1670
2d21ac55
A
1671 cdr.olddp = olddp;
1672 cdr.newdp = newdp;
1673 /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
1674 proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS, checkdirs_callback, (void *)&cdr, NULL, NULL);
91447636 1675
1c79356b 1676 if (rootvnode == olddp) {
91447636 1677 vnode_ref(newdp);
fa4905b1 1678 tvp = rootvnode;
1c79356b 1679 rootvnode = newdp;
91447636 1680 vnode_rele(tvp);
1c79356b 1681 }
91447636
A
1682
1683 vnode_put(newdp);
6601e61a 1684 return(0);
1c79356b
A
1685}
1686
1687/*
1688 * Unmount a file system.
1689 *
1690 * Note: unmount takes a path to the vnode mounted on as argument,
1691 * not special file (as before).
1692 */
1c79356b
A
1693/* ARGSUSED */
1694int
b0d623f7 1695unmount(__unused proc_t p, struct unmount_args *uap, __unused int32_t *retval)
1c79356b 1696{
2d21ac55 1697 vnode_t vp;
1c79356b
A
1698 struct mount *mp;
1699 int error;
1700 struct nameidata nd;
2d21ac55 1701 vfs_context_t ctx = vfs_context_current();
91447636 1702
6d2010ae 1703 NDINIT(&nd, LOOKUP, OP_UNMOUNT, NOTRIGGER | FOLLOW | AUDITVNPATH1,
2d21ac55 1704 UIO_USERSPACE, uap->path, ctx);
55e303ae
A
1705 error = namei(&nd);
1706 if (error)
1c79356b
A
1707 return (error);
1708 vp = nd.ni_vp;
1709 mp = vp->v_mount;
91447636 1710 nameidone(&nd);
1c79356b 1711
2d21ac55
A
1712#if CONFIG_MACF
1713 error = mac_mount_check_umount(ctx, mp);
1714 if (error != 0) {
1715 vnode_put(vp);
1716 return (error);
1717 }
1718#endif
55e303ae
A
1719 /*
1720 * Must be the root of the filesystem
1721 */
1722 if ((vp->v_flag & VROOT) == 0) {
91447636 1723 vnode_put(vp);
55e303ae
A
1724 return (EINVAL);
1725 }
6601e61a 1726 mount_ref(mp, 0);
91447636 1727 vnode_put(vp);
6601e61a 1728 /* safedounmount consumes the mount ref */
2d21ac55
A
1729 return (safedounmount(mp, uap->flags, ctx));
1730}
1731
1732int
1733vfs_unmountbyfsid(fsid_t * fsid, int flags, vfs_context_t ctx)
1734{
1735 mount_t mp;
1736
1737 mp = mount_list_lookupby_fsid(fsid, 0, 1);
1738 if (mp == (mount_t)0) {
1739 return(ENOENT);
1740 }
1741 mount_ref(mp, 0);
1742 mount_iterdrop(mp);
1743 /* safedounmount consumes the mount ref */
1744 return(safedounmount(mp, flags, ctx));
55e303ae
A
1745}
1746
2d21ac55 1747
55e303ae 1748/*
6601e61a 1749 * The mount struct comes with a mount ref which will be consumed.
55e303ae
A
1750 * Do the actual file system unmount, prevent some common foot shooting.
1751 */
1752int
2d21ac55 1753safedounmount(struct mount *mp, int flags, vfs_context_t ctx)
55e303ae
A
1754{
1755 int error;
2d21ac55 1756 proc_t p = vfs_context_proc(ctx);
55e303ae 1757
316670eb
A
1758 /*
1759 * If the file system is not responding and MNT_NOBLOCK
1760 * is set and not a forced unmount then return EBUSY.
1761 */
1762 if ((mp->mnt_kern_flag & MNT_LNOTRESP) &&
1763 (flags & MNT_NOBLOCK) && ((flags & MNT_FORCE) == 0)) {
1764 error = EBUSY;
1765 goto out;
1766 }
1767
1c79356b 1768 /*
6d2010ae
A
1769 * Skip authorization if the mount is tagged as permissive and
1770 * this is not a forced-unmount attempt.
1c79356b 1771 */
6d2010ae
A
1772 if (!(((mp->mnt_kern_flag & MNTK_PERMIT_UNMOUNT) != 0) && ((flags & MNT_FORCE) == 0))) {
1773 /*
1774 * Only root, or the user that did the original mount is
1775 * permitted to unmount this filesystem.
1776 */
1777 if ((mp->mnt_vfsstat.f_owner != kauth_cred_getuid(kauth_cred_get())) &&
1778 (error = suser(kauth_cred_get(), &p->p_acflag)))
1779 goto out;
1780 }
1c79356b
A
1781 /*
1782 * Don't allow unmounting the root file system.
1783 */
6601e61a 1784 if (mp->mnt_flag & MNT_ROOTFS) {
2d21ac55 1785 error = EBUSY; /* the root is always busy */
6601e61a
A
1786 goto out;
1787 }
1c79356b 1788
b7266188
A
1789#ifdef CONFIG_IMGSRC_ACCESS
1790 if (mp->mnt_kern_flag & MNTK_BACKS_ROOT) {
1791 error = EBUSY;
1792 goto out;
1793 }
1794#endif /* CONFIG_IMGSRC_ACCESS */
1795
2d21ac55
A
1796 return (dounmount(mp, flags, 1, ctx));
1797
6601e61a
A
1798out:
1799 mount_drop(mp, 0);
1800 return(error);
1c79356b
A
1801}
1802
1803/*
1804 * Do the actual file system unmount.
1805 */
1806int
2d21ac55 1807dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx)
1c79356b 1808{
2d21ac55 1809 vnode_t coveredvp = (vnode_t)0;
1c79356b 1810 int error;
91447636 1811 int needwakeup = 0;
91447636
A
1812 int forcedunmount = 0;
1813 int lflags = 0;
593a1d5f 1814 struct vnode *devvp = NULLVP;
6d2010ae 1815#if CONFIG_TRIGGERS
39236c6e 1816 proc_t p = vfs_context_proc(ctx);
6d2010ae 1817 int did_vflush = 0;
39236c6e 1818 int pflags_save = 0;
6d2010ae 1819#endif /* CONFIG_TRIGGERS */
91447636 1820
91447636
A
1821 if (flags & MNT_FORCE)
1822 forcedunmount = 1;
6d2010ae 1823
91447636 1824 mount_lock(mp);
9bccf70c 1825 /* XXX post jaguar fix LK_DRAIN - then clean this up */
91447636 1826 if ((flags & MNT_FORCE)) {
55e303ae 1827 mp->mnt_kern_flag |= MNTK_FRCUNMOUNT;
91447636
A
1828 mp->mnt_lflag |= MNT_LFORCE;
1829 }
1830 if (mp->mnt_lflag & MNT_LUNMOUNT) {
1831 mp->mnt_lflag |= MNT_LWAIT;
2d21ac55 1832 if(withref != 0)
6601e61a 1833 mount_drop(mp, 1);
2d21ac55 1834 msleep((caddr_t)mp, &mp->mnt_mlock, (PVFS | PDROP), "dounmount", NULL);
9bccf70c
A
1835 /*
1836 * The prior unmount attempt has probably succeeded.
1837 * Do not dereference mp here - returning EBUSY is safest.
1838 */
1839 return (EBUSY);
1840 }
39236c6e
A
1841
1842#if CONFIG_TRIGGERS
1843 if (flags & MNT_NOBLOCK && p != kernproc)
1844 pflags_save = OSBitOrAtomic(P_NOREMOTEHANG, &p->p_flag);
1845#endif
1846
1c79356b 1847 mp->mnt_kern_flag |= MNTK_UNMOUNT;
91447636
A
1848 mp->mnt_lflag |= MNT_LUNMOUNT;
1849 mp->mnt_flag &=~ MNT_ASYNC;
2d21ac55
A
1850 /*
1851 * anyone currently in the fast path that
1852 * trips over the cached rootvp will be
1853 * dumped out and forced into the slow path
1854 * to regenerate a new cached value
1855 */
1856 mp->mnt_realrootvp = NULLVP;
91447636 1857 mount_unlock(mp);
2d21ac55
A
1858
1859 /*
1860 * taking the name_cache_lock exclusively will
1861 * insure that everyone is out of the fast path who
1862 * might be trying to use a now stale copy of
1863 * vp->v_mountedhere->mnt_realrootvp
1864 * bumping mount_generation causes the cached values
1865 * to be invalidated
1866 */
1867 name_cache_lock();
1868 mount_generation++;
1869 name_cache_unlock();
1870
1871
91447636 1872 lck_rw_lock_exclusive(&mp->mnt_rwlock);
6601e61a
A
1873 if (withref != 0)
1874 mount_drop(mp, 0);
2d21ac55 1875#if CONFIG_FSE
91447636 1876 fsevent_unmount(mp); /* has to come first! */
2d21ac55 1877#endif
91447636
A
1878 error = 0;
1879 if (forcedunmount == 0) {
1880 ubc_umount(mp); /* release cached vnodes */
1881 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
2d21ac55 1882 error = VFS_SYNC(mp, MNT_WAIT, ctx);
91447636
A
1883 if (error) {
1884 mount_lock(mp);
1885 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1886 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1887 mp->mnt_lflag &= ~MNT_LFORCE;
1888 goto out;
1889 }
1890 }
1891 }
6d2010ae
A
1892
1893#if CONFIG_TRIGGERS
1894 vfs_nested_trigger_unmounts(mp, flags, ctx);
1895 did_vflush = 1;
1896#endif
91447636
A
1897 if (forcedunmount)
1898 lflags |= FORCECLOSE;
1899 error = vflush(mp, NULLVP, SKIPSWAP | SKIPSYSTEM | SKIPROOT | lflags);
1900 if ((forcedunmount == 0) && error) {
1901 mount_lock(mp);
9bccf70c 1902 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
91447636
A
1903 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1904 mp->mnt_lflag &= ~MNT_LFORCE;
9bccf70c
A
1905 goto out;
1906 }
91447636
A
1907
1908 /* make sure there are no one in the mount iterations or lookup */
1909 mount_iterdrain(mp);
1910
2d21ac55 1911 error = VFS_UNMOUNT(mp, flags, ctx);
1c79356b 1912 if (error) {
91447636
A
1913 mount_iterreset(mp);
1914 mount_lock(mp);
1c79356b 1915 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
91447636
A
1916 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1917 mp->mnt_lflag &= ~MNT_LFORCE;
1c79356b
A
1918 goto out;
1919 }
1920
1921 /* increment the operations count */
1922 if (!error)
b0d623f7 1923 OSAddAtomic(1, &vfs_nummntops);
91447636
A
1924
1925 if ( mp->mnt_devvp && mp->mnt_vtable->vfc_vfsflags & VFC_VFSLOCALARGS) {
593a1d5f
A
1926 /* hold an io reference and drop the usecount before close */
1927 devvp = mp->mnt_devvp;
593a1d5f
A
1928 vnode_getalways(devvp);
1929 vnode_rele(devvp);
1930 VNOP_CLOSE(devvp, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
2d21ac55 1931 ctx);
b0d623f7 1932 vnode_clearmountedon(devvp);
593a1d5f 1933 vnode_put(devvp);
91447636
A
1934 }
1935 lck_rw_done(&mp->mnt_rwlock);
1936 mount_list_remove(mp);
1937 lck_rw_lock_exclusive(&mp->mnt_rwlock);
6d2010ae 1938
91447636 1939 /* mark the mount point hook in the vp but not drop the ref yet */
1c79356b 1940 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
6d2010ae
A
1941 vnode_getwithref(coveredvp);
1942 vnode_lock_spin(coveredvp);
1943
1944 mp->mnt_crossref++;
1945 coveredvp->v_mountedhere = (struct mount *)0;
1946
1947 vnode_unlock(coveredvp);
1948 vnode_put(coveredvp);
1c79356b 1949 }
91447636
A
1950
1951 mount_list_lock();
1952 mp->mnt_vtable->vfc_refcount--;
1953 mount_list_unlock();
1954
1955 cache_purgevfs(mp); /* remove cache entries for this file sys */
1956 vfs_event_signal(NULL, VQ_UNMOUNT, (intptr_t)NULL);
1957 mount_lock(mp);
1958 mp->mnt_lflag |= MNT_LDEAD;
1959
1960 if (mp->mnt_lflag & MNT_LWAIT) {
1961 /*
1962 * do the wakeup here
1963 * in case we block in mount_refdrain
1964 * which will drop the mount lock
1965 * and allow anyone blocked in vfs_busy
1966 * to wakeup and see the LDEAD state
1967 */
1968 mp->mnt_lflag &= ~MNT_LWAIT;
1969 wakeup((caddr_t)mp);
1c79356b 1970 }
91447636 1971 mount_refdrain(mp);
1c79356b 1972out:
91447636
A
1973 if (mp->mnt_lflag & MNT_LWAIT) {
1974 mp->mnt_lflag &= ~MNT_LWAIT;
1975 needwakeup = 1;
1976 }
6d2010ae 1977
6d2010ae 1978#if CONFIG_TRIGGERS
39236c6e
A
1979 if (flags & MNT_NOBLOCK && p != kernproc) {
1980 // Restore P_NOREMOTEHANG bit to its previous value
1981 if ((pflags_save & P_NOREMOTEHANG) == 0)
1982 OSBitAndAtomic(~((uint32_t) P_NOREMOTEHANG), &p->p_flag);
1983 }
1984
6d2010ae
A
1985 /*
1986 * Callback and context are set together under the mount lock, and
1987 * never cleared, so we're safe to examine them here, drop the lock,
1988 * and call out.
1989 */
1990 if (mp->mnt_triggercallback != NULL) {
1991 mount_unlock(mp);
1992 if (error == 0) {
1993 mp->mnt_triggercallback(mp, VTC_RELEASE, mp->mnt_triggerdata, ctx);
1994 } else if (did_vflush) {
1995 mp->mnt_triggercallback(mp, VTC_REPLACE, mp->mnt_triggerdata, ctx);
1996 }
1997 } else {
1998 mount_unlock(mp);
1999 }
2000#else
91447636 2001 mount_unlock(mp);
6d2010ae
A
2002#endif /* CONFIG_TRIGGERS */
2003
91447636
A
2004 lck_rw_done(&mp->mnt_rwlock);
2005
2006 if (needwakeup)
1c79356b 2007 wakeup((caddr_t)mp);
6d2010ae 2008
55e303ae 2009 if (!error) {
91447636 2010 if ((coveredvp != NULLVP)) {
b0d623f7
A
2011 vnode_t pvp;
2012
91447636 2013 vnode_getwithref(coveredvp);
b0d623f7 2014 pvp = vnode_getparent(coveredvp);
91447636 2015 vnode_rele(coveredvp);
6d2010ae
A
2016
2017 mount_dropcrossref(mp, coveredvp, 0);
2018#if CONFIG_TRIGGERS
2019 if (coveredvp->v_resolve)
2020 vnode_trigger_rearm(coveredvp, ctx);
2021#endif
91447636 2022 vnode_put(coveredvp);
b0d623f7
A
2023
2024 if (pvp) {
2025 lock_vnode_and_post(pvp, NOTE_WRITE);
2026 vnode_put(pvp);
2027 }
91447636
A
2028 } else if (mp->mnt_flag & MNT_ROOTFS) {
2029 mount_lock_destroy(mp);
2d21ac55
A
2030#if CONFIG_MACF
2031 mac_mount_label_destroy(mp);
2032#endif
91447636
A
2033 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
2034 } else
2035 panic("dounmount: no coveredvp");
55e303ae 2036 }
1c79356b
A
2037 return (error);
2038}
2039
91447636
A
2040void
2041mount_dropcrossref(mount_t mp, vnode_t dp, int need_put)
2042{
6d2010ae
A
2043 vnode_lock(dp);
2044 mp->mnt_crossref--;
2045
2046 if (mp->mnt_crossref < 0)
2047 panic("mount cross refs -ve");
2048
2049 if ((mp != dp->v_mountedhere) && (mp->mnt_crossref == 0)) {
2050
91447636 2051 if (need_put)
6d2010ae 2052 vnode_put_locked(dp);
91447636 2053 vnode_unlock(dp);
6d2010ae
A
2054
2055 mount_lock_destroy(mp);
2056#if CONFIG_MACF
2057 mac_mount_label_destroy(mp);
2058#endif
2059 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
2060 return;
2061 }
2062 if (need_put)
2063 vnode_put_locked(dp);
2064 vnode_unlock(dp);
91447636
A
2065}
2066
2067
1c79356b
A
2068/*
2069 * Sync each mounted filesystem.
2070 */
2071#if DIAGNOSTIC
2072int syncprt = 0;
2073struct ctldebug debug0 = { "syncprt", &syncprt };
2074#endif
2075
1c79356b
A
2076int print_vmpage_stat=0;
2077
91447636 2078static int
b0d623f7 2079sync_callback(mount_t mp, void * arg)
1c79356b 2080{
1c79356b 2081 int asyncflag;
91447636
A
2082
2083 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
1c79356b
A
2084 asyncflag = mp->mnt_flag & MNT_ASYNC;
2085 mp->mnt_flag &= ~MNT_ASYNC;
b0d623f7 2086 VFS_SYNC(mp, arg ? MNT_WAIT : MNT_NOWAIT, vfs_context_current());
1c79356b
A
2087 if (asyncflag)
2088 mp->mnt_flag |= MNT_ASYNC;
1c79356b 2089 }
91447636
A
2090 return(VFS_RETURNED);
2091}
1c79356b 2092
91447636 2093
91447636
A
2094/* ARGSUSED */
2095int
b0d623f7 2096sync(__unused proc_t p, __unused struct sync_args *uap, __unused int32_t *retval)
91447636 2097{
91447636 2098 vfs_iterate(LK_NOWAIT, sync_callback, (void *)0);
b0d623f7 2099
1c79356b
A
2100 if(print_vmpage_stat) {
2101 vm_countdirtypages();
1c79356b 2102 }
39236c6e 2103
1c79356b
A
2104#if DIAGNOSTIC
2105 if (syncprt)
2106 vfs_bufstats();
2107#endif /* DIAGNOSTIC */
2108 return (0);
2109}
2110
2111/*
2112 * Change filesystem quotas.
2113 */
2d21ac55 2114#if QUOTA
b0d623f7 2115static int quotactl_funneled(proc_t p, struct quotactl_args *uap, int32_t *retval);
2d21ac55 2116
1c79356b 2117int
b0d623f7 2118quotactl(proc_t p, struct quotactl_args *uap, int32_t *retval)
2d21ac55
A
2119{
2120 boolean_t funnel_state;
2121 int error;
2122
2123 funnel_state = thread_funnel_set(kernel_flock, TRUE);
2124 error = quotactl_funneled(p, uap, retval);
2125 thread_funnel_set(kernel_flock, funnel_state);
2126 return(error);
2127}
2128
2129static int
b0d623f7 2130quotactl_funneled(proc_t p, struct quotactl_args *uap, __unused int32_t *retval)
1c79356b 2131{
2d21ac55 2132 struct mount *mp;
91447636
A
2133 int error, quota_cmd, quota_status;
2134 caddr_t datap;
2135 size_t fnamelen;
1c79356b 2136 struct nameidata nd;
2d21ac55 2137 vfs_context_t ctx = vfs_context_current();
91447636
A
2138 struct dqblk my_dqblk;
2139
b0d623f7 2140 AUDIT_ARG(uid, uap->uid);
55e303ae 2141 AUDIT_ARG(cmd, uap->cmd);
6d2010ae
A
2142 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
2143 uap->path, ctx);
55e303ae
A
2144 error = namei(&nd);
2145 if (error)
1c79356b
A
2146 return (error);
2147 mp = nd.ni_vp->v_mount;
91447636
A
2148 vnode_put(nd.ni_vp);
2149 nameidone(&nd);
2150
2151 /* copyin any data we will need for downstream code */
2152 quota_cmd = uap->cmd >> SUBCMDSHIFT;
2153
2154 switch (quota_cmd) {
2155 case Q_QUOTAON:
2156 /* uap->arg specifies a file from which to take the quotas */
2157 fnamelen = MAXPATHLEN;
2158 datap = kalloc(MAXPATHLEN);
2159 error = copyinstr(uap->arg, datap, MAXPATHLEN, &fnamelen);
2160 break;
2161 case Q_GETQUOTA:
2162 /* uap->arg is a pointer to a dqblk structure. */
2163 datap = (caddr_t) &my_dqblk;
2164 break;
2165 case Q_SETQUOTA:
2166 case Q_SETUSE:
2167 /* uap->arg is a pointer to a dqblk structure. */
2168 datap = (caddr_t) &my_dqblk;
2169 if (proc_is64bit(p)) {
2170 struct user_dqblk my_dqblk64;
2171 error = copyin(uap->arg, (caddr_t)&my_dqblk64, sizeof (my_dqblk64));
2172 if (error == 0) {
2173 munge_dqblk(&my_dqblk, &my_dqblk64, FALSE);
2174 }
2175 }
2176 else {
2177 error = copyin(uap->arg, (caddr_t)&my_dqblk, sizeof (my_dqblk));
2178 }
2179 break;
2180 case Q_QUOTASTAT:
2181 /* uap->arg is a pointer to an integer */
2182 datap = (caddr_t) &quota_status;
2183 break;
2184 default:
2185 datap = NULL;
2186 break;
2187 } /* switch */
2188
2189 if (error == 0) {
2d21ac55 2190 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, datap, ctx);
91447636
A
2191 }
2192
2193 switch (quota_cmd) {
2194 case Q_QUOTAON:
2195 if (datap != NULL)
2196 kfree(datap, MAXPATHLEN);
2197 break;
2198 case Q_GETQUOTA:
2199 /* uap->arg is a pointer to a dqblk structure we need to copy out to */
2200 if (error == 0) {
2201 if (proc_is64bit(p)) {
2202 struct user_dqblk my_dqblk64;
2203 munge_dqblk(&my_dqblk, &my_dqblk64, TRUE);
2204 error = copyout((caddr_t)&my_dqblk64, uap->arg, sizeof (my_dqblk64));
2205 }
2206 else {
2207 error = copyout(datap, uap->arg, sizeof (struct dqblk));
2208 }
2209 }
2210 break;
2211 case Q_QUOTASTAT:
2212 /* uap->arg is a pointer to an integer */
2213 if (error == 0) {
2214 error = copyout(datap, uap->arg, sizeof(quota_status));
2215 }
2216 break;
2217 default:
2218 break;
2219 } /* switch */
2220
2221 return (error);
1c79356b 2222}
2d21ac55
A
2223#else
2224int
b0d623f7 2225quotactl(__unused proc_t p, __unused struct quotactl_args *uap, __unused int32_t *retval)
2d21ac55
A
2226{
2227 return (EOPNOTSUPP);
2228}
2229#endif /* QUOTA */
1c79356b
A
2230
2231/*
2232 * Get filesystem statistics.
2d21ac55
A
2233 *
2234 * Returns: 0 Success
2235 * namei:???
2236 * vfs_update_vfsstat:???
2237 * munge_statfs:EFAULT
1c79356b 2238 */
1c79356b
A
2239/* ARGSUSED */
2240int
b0d623f7 2241statfs(__unused proc_t p, struct statfs_args *uap, __unused int32_t *retval)
1c79356b 2242{
91447636
A
2243 struct mount *mp;
2244 struct vfsstatfs *sp;
1c79356b
A
2245 int error;
2246 struct nameidata nd;
2d21ac55 2247 vfs_context_t ctx = vfs_context_current();
91447636 2248 vnode_t vp;
1c79356b 2249
6d2010ae 2250 NDINIT(&nd, LOOKUP, OP_STATFS, NOTRIGGER | FOLLOW | AUDITVNPATH1,
2d21ac55 2251 UIO_USERSPACE, uap->path, ctx);
55e303ae
A
2252 error = namei(&nd);
2253 if (error)
1c79356b 2254 return (error);
91447636
A
2255 vp = nd.ni_vp;
2256 mp = vp->v_mount;
2257 sp = &mp->mnt_vfsstat;
2258 nameidone(&nd);
2259
2d21ac55 2260 error = vfs_update_vfsstat(mp, ctx, VFS_USER_EVENT);
39236c6e
A
2261 if (error != 0) {
2262 vnode_put(vp);
1c79356b 2263 return (error);
39236c6e 2264 }
91447636
A
2265
2266 error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
39236c6e 2267 vnode_put(vp);
91447636 2268 return (error);
1c79356b
A
2269}
2270
2271/*
2272 * Get filesystem statistics.
2273 */
1c79356b
A
2274/* ARGSUSED */
2275int
b0d623f7 2276fstatfs(__unused proc_t p, struct fstatfs_args *uap, __unused int32_t *retval)
1c79356b 2277{
2d21ac55 2278 vnode_t vp;
1c79356b 2279 struct mount *mp;
91447636 2280 struct vfsstatfs *sp;
1c79356b
A
2281 int error;
2282
55e303ae
A
2283 AUDIT_ARG(fd, uap->fd);
2284
91447636 2285 if ( (error = file_vnode(uap->fd, &vp)) )
1c79356b 2286 return (error);
55e303ae 2287
d1ecb069
A
2288 error = vnode_getwithref(vp);
2289 if (error) {
2290 file_drop(uap->fd);
2291 return (error);
2292 }
2293
91447636 2294 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
55e303ae 2295
91447636
A
2296 mp = vp->v_mount;
2297 if (!mp) {
d1ecb069
A
2298 error = EBADF;
2299 goto out;
91447636
A
2300 }
2301 sp = &mp->mnt_vfsstat;
2d21ac55 2302 if ((error = vfs_update_vfsstat(mp,vfs_context_current(),VFS_USER_EVENT)) != 0) {
d1ecb069 2303 goto out;
91447636 2304 }
91447636
A
2305
2306 error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
2307
d1ecb069
A
2308out:
2309 file_drop(uap->fd);
2310 vnode_put(vp);
2311
91447636 2312 return (error);
1c79356b
A
2313}
2314
2d21ac55
A
2315/*
2316 * Common routine to handle copying of statfs64 data to user space
2317 */
2318static int
2319statfs64_common(struct mount *mp, struct vfsstatfs *sfsp, user_addr_t bufp)
2320{
2321 int error;
2322 struct statfs64 sfs;
2323
2324 bzero(&sfs, sizeof(sfs));
2325
2326 sfs.f_bsize = sfsp->f_bsize;
2327 sfs.f_iosize = (int32_t)sfsp->f_iosize;
2328 sfs.f_blocks = sfsp->f_blocks;
2329 sfs.f_bfree = sfsp->f_bfree;
2330 sfs.f_bavail = sfsp->f_bavail;
2331 sfs.f_files = sfsp->f_files;
2332 sfs.f_ffree = sfsp->f_ffree;
2333 sfs.f_fsid = sfsp->f_fsid;
2334 sfs.f_owner = sfsp->f_owner;
2335 sfs.f_type = mp->mnt_vtable->vfc_typenum;
2336 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
2337 sfs.f_fssubtype = sfsp->f_fssubtype;
6d2010ae
A
2338 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
2339 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN);
2340 } else {
2341 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSTYPENAMELEN);
2342 }
2d21ac55
A
2343 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MAXPATHLEN);
2344 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MAXPATHLEN);
2345
2346 error = copyout((caddr_t)&sfs, bufp, sizeof(sfs));
2347
2348 return(error);
2349}
2350
2351/*
2352 * Get file system statistics in 64-bit mode
2353 */
2354int
b0d623f7 2355statfs64(__unused struct proc *p, struct statfs64_args *uap, __unused int32_t *retval)
2d21ac55
A
2356{
2357 struct mount *mp;
2358 struct vfsstatfs *sp;
2359 int error;
2360 struct nameidata nd;
2361 vfs_context_t ctxp = vfs_context_current();
2362 vnode_t vp;
2363
6d2010ae 2364 NDINIT(&nd, LOOKUP, OP_STATFS, NOTRIGGER | FOLLOW | AUDITVNPATH1,
2d21ac55
A
2365 UIO_USERSPACE, uap->path, ctxp);
2366 error = namei(&nd);
2367 if (error)
2368 return (error);
2369 vp = nd.ni_vp;
2370 mp = vp->v_mount;
2371 sp = &mp->mnt_vfsstat;
2372 nameidone(&nd);
2373
2374 error = vfs_update_vfsstat(mp, ctxp, VFS_USER_EVENT);
39236c6e
A
2375 if (error != 0) {
2376 vnode_put(vp);
2d21ac55 2377 return (error);
39236c6e 2378 }
2d21ac55
A
2379
2380 error = statfs64_common(mp, sp, uap->buf);
39236c6e 2381 vnode_put(vp);
2d21ac55
A
2382
2383 return (error);
2384}
2385
2386/*
2387 * Get file system statistics in 64-bit mode
2388 */
2389int
b0d623f7 2390fstatfs64(__unused struct proc *p, struct fstatfs64_args *uap, __unused int32_t *retval)
2d21ac55
A
2391{
2392 struct vnode *vp;
2393 struct mount *mp;
2394 struct vfsstatfs *sp;
2395 int error;
2396
2397 AUDIT_ARG(fd, uap->fd);
2398
2399 if ( (error = file_vnode(uap->fd, &vp)) )
2400 return (error);
2401
d1ecb069
A
2402 error = vnode_getwithref(vp);
2403 if (error) {
2404 file_drop(uap->fd);
2405 return (error);
2406 }
2407
2d21ac55
A
2408 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2409
2410 mp = vp->v_mount;
2411 if (!mp) {
316670eb 2412 error = EBADF;
d1ecb069 2413 goto out;
2d21ac55
A
2414 }
2415 sp = &mp->mnt_vfsstat;
2416 if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
d1ecb069 2417 goto out;
2d21ac55 2418 }
2d21ac55
A
2419
2420 error = statfs64_common(mp, sp, uap->buf);
2421
d1ecb069
A
2422out:
2423 file_drop(uap->fd);
2424 vnode_put(vp);
2425
2d21ac55
A
2426 return (error);
2427}
91447636
A
2428
2429struct getfsstat_struct {
2430 user_addr_t sfsp;
2d21ac55 2431 user_addr_t *mp;
91447636
A
2432 int count;
2433 int maxcount;
2434 int flags;
2435 int error;
1c79356b 2436};
1c79356b 2437
91447636
A
2438
2439static int
2440getfsstat_callback(mount_t mp, void * arg)
2441{
2442
2443 struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
2444 struct vfsstatfs *sp;
91447636 2445 int error, my_size;
2d21ac55 2446 vfs_context_t ctx = vfs_context_current();
91447636
A
2447
2448 if (fstp->sfsp && fstp->count < fstp->maxcount) {
2449 sp = &mp->mnt_vfsstat;
2450 /*
2451 * If MNT_NOWAIT is specified, do not refresh the
b0d623f7 2452 * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT.
91447636 2453 */
b0d623f7 2454 if (((fstp->flags & MNT_NOWAIT) == 0 || (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
2d21ac55
A
2455 (error = vfs_update_vfsstat(mp, ctx,
2456 VFS_USER_EVENT))) {
91447636
A
2457 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
2458 return(VFS_RETURNED);
1c79356b 2459 }
91447636
A
2460
2461 /*
2462 * Need to handle LP64 version of struct statfs
2463 */
2d21ac55 2464 error = munge_statfs(mp, sp, fstp->sfsp, &my_size, IS_64BIT_PROCESS(vfs_context_proc(ctx)), FALSE);
91447636
A
2465 if (error) {
2466 fstp->error = error;
2467 return(VFS_RETURNED_DONE);
1c79356b 2468 }
91447636 2469 fstp->sfsp += my_size;
2d21ac55
A
2470
2471 if (fstp->mp) {
39236c6e 2472#if CONFIG_MACF
2d21ac55
A
2473 error = mac_mount_label_get(mp, *fstp->mp);
2474 if (error) {
2475 fstp->error = error;
2476 return(VFS_RETURNED_DONE);
2477 }
39236c6e 2478#endif
2d21ac55
A
2479 fstp->mp++;
2480 }
2481 }
91447636
A
2482 fstp->count++;
2483 return(VFS_RETURNED);
2484}
2485
2486/*
2487 * Get statistics on all filesystems.
2488 */
2489int
2490getfsstat(__unused proc_t p, struct getfsstat_args *uap, int *retval)
2d21ac55
A
2491{
2492 struct __mac_getfsstat_args muap;
2493
2494 muap.buf = uap->buf;
2495 muap.bufsize = uap->bufsize;
2496 muap.mac = USER_ADDR_NULL;
2497 muap.macsize = 0;
2498 muap.flags = uap->flags;
2499
2500 return (__mac_getfsstat(p, &muap, retval));
2501}
2502
b0d623f7
A
2503/*
2504 * __mac_getfsstat: Get MAC-related file system statistics
2505 *
2506 * Parameters: p (ignored)
2507 * uap User argument descriptor (see below)
2508 * retval Count of file system statistics (N stats)
2509 *
2510 * Indirect: uap->bufsize Buffer size
2511 * uap->macsize MAC info size
2512 * uap->buf Buffer where information will be returned
2513 * uap->mac MAC info
2514 * uap->flags File system flags
2515 *
2516 *
2517 * Returns: 0 Success
2518 * !0 Not success
2519 *
2520 */
2d21ac55
A
2521int
2522__mac_getfsstat(__unused proc_t p, struct __mac_getfsstat_args *uap, int *retval)
91447636
A
2523{
2524 user_addr_t sfsp;
2d21ac55 2525 user_addr_t *mp;
b0d623f7 2526 size_t count, maxcount, bufsize, macsize;
91447636
A
2527 struct getfsstat_struct fst;
2528
b0d623f7
A
2529 bufsize = (size_t) uap->bufsize;
2530 macsize = (size_t) uap->macsize;
2531
91447636 2532 if (IS_64BIT_PROCESS(p)) {
b0d623f7 2533 maxcount = bufsize / sizeof(struct user64_statfs);
91447636
A
2534 }
2535 else {
b0d623f7 2536 maxcount = bufsize / sizeof(struct user32_statfs);
91447636
A
2537 }
2538 sfsp = uap->buf;
2539 count = 0;
2540
2d21ac55
A
2541 mp = NULL;
2542
2543#if CONFIG_MACF
2544 if (uap->mac != USER_ADDR_NULL) {
2545 u_int32_t *mp0;
2546 int error;
b0d623f7 2547 unsigned int i;
2d21ac55 2548
b0d623f7 2549 count = (macsize / (IS_64BIT_PROCESS(p) ? 8 : 4));
2d21ac55
A
2550 if (count != maxcount)
2551 return (EINVAL);
2552
2553 /* Copy in the array */
b0d623f7
A
2554 MALLOC(mp0, u_int32_t *, macsize, M_MACTEMP, M_WAITOK);
2555 if (mp0 == NULL) {
2556 return (ENOMEM);
2557 }
2558
2559 error = copyin(uap->mac, mp0, macsize);
2560 if (error) {
2561 FREE(mp0, M_MACTEMP);
2d21ac55 2562 return (error);
b0d623f7 2563 }
2d21ac55
A
2564
2565 /* Normalize to an array of user_addr_t */
2566 MALLOC(mp, user_addr_t *, count * sizeof(user_addr_t), M_MACTEMP, M_WAITOK);
b0d623f7
A
2567 if (mp == NULL) {
2568 FREE(mp0, M_MACTEMP);
2569 return (ENOMEM);
2570 }
2571
2d21ac55
A
2572 for (i = 0; i < count; i++) {
2573 if (IS_64BIT_PROCESS(p))
2574 mp[i] = ((user_addr_t *)mp0)[i];
2575 else
2576 mp[i] = (user_addr_t)mp0[i];
2577 }
2578 FREE(mp0, M_MACTEMP);
2579 }
2580#endif
2581
2582
91447636 2583 fst.sfsp = sfsp;
2d21ac55 2584 fst.mp = mp;
91447636
A
2585 fst.flags = uap->flags;
2586 fst.count = 0;
2587 fst.error = 0;
2588 fst.maxcount = maxcount;
2589
2590
2591 vfs_iterate(0, getfsstat_callback, &fst);
2592
2d21ac55
A
2593 if (mp)
2594 FREE(mp, M_MACTEMP);
2595
91447636
A
2596 if (fst.error ) {
2597 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
2598 return(fst.error);
2599 }
2600
2601 if (fst.sfsp && fst.count > fst.maxcount)
2602 *retval = fst.maxcount;
1c79356b 2603 else
91447636 2604 *retval = fst.count;
1c79356b
A
2605 return (0);
2606}
2607
2d21ac55
A
2608static int
2609getfsstat64_callback(mount_t mp, void * arg)
2610{
2611 struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
2612 struct vfsstatfs *sp;
2613 int error;
2614
2615 if (fstp->sfsp && fstp->count < fstp->maxcount) {
2616 sp = &mp->mnt_vfsstat;
2617 /*
b0d623f7
A
2618 * If MNT_NOWAIT is specified, do not refresh the fsstat
2619 * cache. MNT_WAIT overrides MNT_NOWAIT.
2620 *
2621 * We treat MNT_DWAIT as MNT_WAIT for all instances of
2622 * getfsstat, since the constants are out of the same
2623 * namespace.
2d21ac55 2624 */
b0d623f7
A
2625 if (((fstp->flags & MNT_NOWAIT) == 0 ||
2626 (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
2d21ac55
A
2627 (error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT))) {
2628 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
2629 return(VFS_RETURNED);
2630 }
2631
2632 error = statfs64_common(mp, sp, fstp->sfsp);
2633 if (error) {
2634 fstp->error = error;
2635 return(VFS_RETURNED_DONE);
2636 }
2637 fstp->sfsp += sizeof(struct statfs64);
2638 }
2639 fstp->count++;
2640 return(VFS_RETURNED);
2641}
2642
2643/*
2644 * Get statistics on all file systems in 64 bit mode.
2645 */
2646int
2647getfsstat64(__unused proc_t p, struct getfsstat64_args *uap, int *retval)
2648{
2649 user_addr_t sfsp;
2650 int count, maxcount;
2651 struct getfsstat_struct fst;
2652
2653 maxcount = uap->bufsize / sizeof(struct statfs64);
2654
2655 sfsp = uap->buf;
2656 count = 0;
2657
2658 fst.sfsp = sfsp;
2659 fst.flags = uap->flags;
2660 fst.count = 0;
2661 fst.error = 0;
2662 fst.maxcount = maxcount;
2663
2664 vfs_iterate(0, getfsstat64_callback, &fst);
2665
2666 if (fst.error ) {
2667 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
2668 return(fst.error);
2669 }
2670
2671 if (fst.sfsp && fst.count > fst.maxcount)
2672 *retval = fst.maxcount;
2673 else
2674 *retval = fst.count;
2675
2676 return (0);
2677}
2678
1c79356b
A
2679/*
2680 * Change current working directory to a given file descriptor.
2681 */
1c79356b 2682/* ARGSUSED */
2d21ac55
A
2683static int
2684common_fchdir(proc_t p, struct fchdir_args *uap, int per_thread)
1c79356b 2685{
2d21ac55
A
2686 struct filedesc *fdp = p->p_fd;
2687 vnode_t vp;
2688 vnode_t tdp;
2689 vnode_t tvp;
1c79356b 2690 struct mount *mp;
1c79356b 2691 int error;
2d21ac55 2692 vfs_context_t ctx = vfs_context_current();
1c79356b 2693
b0d623f7 2694 AUDIT_ARG(fd, uap->fd);
2d21ac55
A
2695 if (per_thread && uap->fd == -1) {
2696 /*
2697 * Switching back from per-thread to per process CWD; verify we
2698 * in fact have one before proceeding. The only success case
2699 * for this code path is to return 0 preemptively after zapping
2700 * the thread structure contents.
2701 */
2702 thread_t th = vfs_context_thread(ctx);
2703 if (th) {
2704 uthread_t uth = get_bsdthread_info(th);
2705 tvp = uth->uu_cdir;
2706 uth->uu_cdir = NULLVP;
2707 if (tvp != NULLVP) {
2708 vnode_rele(tvp);
2709 return (0);
2710 }
2711 }
2712 return (EBADF);
2713 }
91447636
A
2714
2715 if ( (error = file_vnode(uap->fd, &vp)) )
2716 return(error);
2717 if ( (error = vnode_getwithref(vp)) ) {
2718 file_drop(uap->fd);
2719 return(error);
2720 }
55e303ae
A
2721
2722 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
2723
2d21ac55 2724 if (vp->v_type != VDIR) {
1c79356b 2725 error = ENOTDIR;
2d21ac55
A
2726 goto out;
2727 }
2728
2729#if CONFIG_MACF
2730 error = mac_vnode_check_chdir(ctx, vp);
2731 if (error)
2732 goto out;
2733#endif
2734 error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
2735 if (error)
2736 goto out;
2737
1c79356b 2738 while (!error && (mp = vp->v_mountedhere) != NULL) {
91447636
A
2739 if (vfs_busy(mp, LK_NOWAIT)) {
2740 error = EACCES;
2741 goto out;
55e303ae 2742 }
2d21ac55 2743 error = VFS_ROOT(mp, &tdp, ctx);
91447636 2744 vfs_unbusy(mp);
1c79356b
A
2745 if (error)
2746 break;
91447636 2747 vnode_put(vp);
1c79356b
A
2748 vp = tdp;
2749 }
91447636
A
2750 if (error)
2751 goto out;
2752 if ( (error = vnode_ref(vp)) )
2753 goto out;
2754 vnode_put(vp);
2755
2d21ac55
A
2756 if (per_thread) {
2757 thread_t th = vfs_context_thread(ctx);
2758 if (th) {
2759 uthread_t uth = get_bsdthread_info(th);
2760 tvp = uth->uu_cdir;
2761 uth->uu_cdir = vp;
b0d623f7 2762 OSBitOrAtomic(P_THCWD, &p->p_flag);
2d21ac55
A
2763 } else {
2764 vnode_rele(vp);
2765 return (ENOENT);
2766 }
2767 } else {
2768 proc_fdlock(p);
2769 tvp = fdp->fd_cdir;
2770 fdp->fd_cdir = vp;
2771 proc_fdunlock(p);
2772 }
91447636
A
2773
2774 if (tvp)
2775 vnode_rele(tvp);
2776 file_drop(uap->fd);
2777
1c79356b 2778 return (0);
91447636
A
2779out:
2780 vnode_put(vp);
2781 file_drop(uap->fd);
2782
2783 return(error);
1c79356b
A
2784}
2785
2d21ac55 2786int
b0d623f7 2787fchdir(proc_t p, struct fchdir_args *uap, __unused int32_t *retval)
2d21ac55
A
2788{
2789 return common_fchdir(p, uap, 0);
2790}
2791
2792int
b0d623f7 2793__pthread_fchdir(proc_t p, struct __pthread_fchdir_args *uap, __unused int32_t *retval)
2d21ac55
A
2794{
2795 return common_fchdir(p, (void *)uap, 1);
2796}
2797
1c79356b 2798/*
b0d623f7 2799 * Change current working directory (".").
2d21ac55
A
2800 *
2801 * Returns: 0 Success
2802 * change_dir:ENOTDIR
2803 * change_dir:???
2804 * vnode_ref:ENOENT No such file or directory
1c79356b 2805 */
1c79356b 2806/* ARGSUSED */
2d21ac55
A
2807static int
2808common_chdir(proc_t p, struct chdir_args *uap, int per_thread)
1c79356b 2809{
2d21ac55 2810 struct filedesc *fdp = p->p_fd;
1c79356b
A
2811 int error;
2812 struct nameidata nd;
2d21ac55
A
2813 vnode_t tvp;
2814 vfs_context_t ctx = vfs_context_current();
91447636 2815
6d2010ae 2816 NDINIT(&nd, LOOKUP, OP_CHDIR, FOLLOW | AUDITVNPATH1,
2d21ac55
A
2817 UIO_USERSPACE, uap->path, ctx);
2818 error = change_dir(&nd, ctx);
55e303ae 2819 if (error)
1c79356b 2820 return (error);
91447636
A
2821 if ( (error = vnode_ref(nd.ni_vp)) ) {
2822 vnode_put(nd.ni_vp);
2823 return (error);
2824 }
2825 /*
2826 * drop the iocount we picked up in change_dir
2827 */
2828 vnode_put(nd.ni_vp);
2829
2d21ac55
A
2830 if (per_thread) {
2831 thread_t th = vfs_context_thread(ctx);
2832 if (th) {
2833 uthread_t uth = get_bsdthread_info(th);
2834 tvp = uth->uu_cdir;
2835 uth->uu_cdir = nd.ni_vp;
b0d623f7 2836 OSBitOrAtomic(P_THCWD, &p->p_flag);
2d21ac55
A
2837 } else {
2838 vnode_rele(nd.ni_vp);
2839 return (ENOENT);
2840 }
2841 } else {
2842 proc_fdlock(p);
2843 tvp = fdp->fd_cdir;
2844 fdp->fd_cdir = nd.ni_vp;
2845 proc_fdunlock(p);
2846 }
91447636
A
2847
2848 if (tvp)
2849 vnode_rele(tvp);
2850
1c79356b
A
2851 return (0);
2852}
2853
b0d623f7
A
2854
2855/*
2856 * chdir
2857 *
2858 * Change current working directory (".") for the entire process
2859 *
2860 * Parameters: p Process requesting the call
2861 * uap User argument descriptor (see below)
2862 * retval (ignored)
2863 *
2864 * Indirect parameters: uap->path Directory path
2865 *
2866 * Returns: 0 Success
2867 * common_chdir: ENOTDIR
2868 * common_chdir: ENOENT No such file or directory
2869 * common_chdir: ???
2870 *
2871 */
2d21ac55 2872int
b0d623f7 2873chdir(proc_t p, struct chdir_args *uap, __unused int32_t *retval)
2d21ac55
A
2874{
2875 return common_chdir(p, (void *)uap, 0);
2876}
2877
b0d623f7
A
2878/*
2879 * __pthread_chdir
2880 *
2881 * Change current working directory (".") for a single thread
2882 *
2883 * Parameters: p Process requesting the call
2884 * uap User argument descriptor (see below)
2885 * retval (ignored)
2886 *
2887 * Indirect parameters: uap->path Directory path
2888 *
2889 * Returns: 0 Success
2890 * common_chdir: ENOTDIR
2891 * common_chdir: ENOENT No such file or directory
2892 * common_chdir: ???
2893 *
2894 */
2d21ac55 2895int
b0d623f7 2896__pthread_chdir(proc_t p, struct __pthread_chdir_args *uap, __unused int32_t *retval)
2d21ac55
A
2897{
2898 return common_chdir(p, (void *)uap, 1);
2899}
2900
2901
1c79356b
A
2902/*
2903 * Change notion of root (``/'') directory.
2904 */
1c79356b
A
2905/* ARGSUSED */
2906int
b0d623f7 2907chroot(proc_t p, struct chroot_args *uap, __unused int32_t *retval)
1c79356b 2908{
2d21ac55 2909 struct filedesc *fdp = p->p_fd;
1c79356b
A
2910 int error;
2911 struct nameidata nd;
2d21ac55
A
2912 vnode_t tvp;
2913 vfs_context_t ctx = vfs_context_current();
1c79356b 2914
91447636 2915 if ((error = suser(kauth_cred_get(), &p->p_acflag)))
1c79356b
A
2916 return (error);
2917
6d2010ae 2918 NDINIT(&nd, LOOKUP, OP_CHROOT, FOLLOW | AUDITVNPATH1,
2d21ac55
A
2919 UIO_USERSPACE, uap->path, ctx);
2920 error = change_dir(&nd, ctx);
55e303ae 2921 if (error)
1c79356b
A
2922 return (error);
2923
2d21ac55
A
2924#if CONFIG_MACF
2925 error = mac_vnode_check_chroot(ctx, nd.ni_vp,
2926 &nd.ni_cnd);
2927 if (error) {
91447636
A
2928 vnode_put(nd.ni_vp);
2929 return (error);
2930 }
2d21ac55
A
2931#endif
2932
91447636
A
2933 if ( (error = vnode_ref(nd.ni_vp)) ) {
2934 vnode_put(nd.ni_vp);
1c79356b
A
2935 return (error);
2936 }
91447636 2937 vnode_put(nd.ni_vp);
1c79356b 2938
91447636 2939 proc_fdlock(p);
fa4905b1 2940 tvp = fdp->fd_rdir;
1c79356b 2941 fdp->fd_rdir = nd.ni_vp;
91447636
A
2942 fdp->fd_flags |= FD_CHROOT;
2943 proc_fdunlock(p);
2944
fa4905b1 2945 if (tvp != NULL)
91447636
A
2946 vnode_rele(tvp);
2947
1c79356b
A
2948 return (0);
2949}
2950
2951/*
2952 * Common routine for chroot and chdir.
2d21ac55
A
2953 *
2954 * Returns: 0 Success
2955 * ENOTDIR Not a directory
2956 * namei:??? [anything namei can return]
2957 * vnode_authorize:??? [anything vnode_authorize can return]
1c79356b
A
2958 */
2959static int
91447636 2960change_dir(struct nameidata *ndp, vfs_context_t ctx)
1c79356b 2961{
2d21ac55 2962 vnode_t vp;
1c79356b
A
2963 int error;
2964
91447636 2965 if ((error = namei(ndp)))
1c79356b 2966 return (error);
91447636 2967 nameidone(ndp);
1c79356b 2968 vp = ndp->ni_vp;
2d21ac55
A
2969
2970 if (vp->v_type != VDIR) {
91447636 2971 vnode_put(vp);
2d21ac55
A
2972 return (ENOTDIR);
2973 }
2974
2975#if CONFIG_MACF
2976 error = mac_vnode_check_chdir(ctx, vp);
2977 if (error) {
2978 vnode_put(vp);
2979 return (error);
2980 }
2981#endif
2982
2983 error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
2984 if (error) {
2985 vnode_put(vp);
2986 return (error);
2987 }
91447636 2988
1c79356b
A
2989 return (error);
2990}
2991
2992/*
2993 * Check permissions, allocate an open file structure,
2994 * and call the device open routine if any.
2d21ac55
A
2995 *
2996 * Returns: 0 Success
2997 * EINVAL
2998 * EINTR
2999 * falloc:ENFILE
3000 * falloc:EMFILE
3001 * falloc:ENOMEM
3002 * vn_open_auth:???
3003 * dupfdopen:???
3004 * VNOP_ADVLOCK:???
3005 * vnode_setsize:???
b0d623f7
A
3006 *
3007 * XXX Need to implement uid, gid
1c79356b 3008 */
2d21ac55 3009int
39236c6e
A
3010open1(vfs_context_t ctx, struct nameidata *ndp, int uflags,
3011 struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra,
3012 int32_t *retval)
1c79356b 3013{
2d21ac55
A
3014 proc_t p = vfs_context_proc(ctx);
3015 uthread_t uu = get_bsdthread_info(vfs_context_thread(ctx));
2d21ac55
A
3016 struct fileproc *fp;
3017 vnode_t vp;
91447636 3018 int flags, oflags;
1c79356b
A
3019 int type, indx, error;
3020 struct flock lf;
2d21ac55
A
3021 int no_controlling_tty = 0;
3022 int deny_controlling_tty = 0;
3023 struct session *sessp = SESSION_NULL;
ccc36f2f 3024
91447636 3025 oflags = uflags;
ccc36f2f
A
3026
3027 if ((oflags & O_ACCMODE) == O_ACCMODE)
3028 return(EINVAL);
91447636
A
3029 flags = FFLAGS(uflags);
3030
3031 AUDIT_ARG(fflags, oflags);
3032 AUDIT_ARG(mode, vap->va_mode);
3033
39236c6e
A
3034 if ((error = falloc_withalloc(p,
3035 &fp, &indx, ctx, fp_zalloc, cra)) != 0) {
1c79356b 3036 return (error);
91447636 3037 }
2d21ac55 3038 uu->uu_dupfd = -indx - 1;
91447636 3039
2d21ac55
A
3040 if (!(p->p_flag & P_CONTROLT)) {
3041 sessp = proc_session(p);
3042 no_controlling_tty = 1;
3043 /*
3044 * If conditions would warrant getting a controlling tty if
3045 * the device being opened is a tty (see ttyopen in tty.c),
3046 * but the open flags deny it, set a flag in the session to
3047 * prevent it.
3048 */
3049 if (SESS_LEADER(p, sessp) &&
3050 sessp->s_ttyvp == NULL &&
3051 (flags & O_NOCTTY)) {
3052 session_lock(sessp);
3053 sessp->s_flags |= S_NOCTTY;
3054 session_unlock(sessp);
3055 deny_controlling_tty = 1;
3056 }
3057 }
3058
3059 if ((error = vn_open_auth(ndp, &flags, vap))) {
3060 if ((error == ENODEV || error == ENXIO) && (uu->uu_dupfd >= 0)){ /* XXX from fdopen */
39236c6e 3061 if ((error = dupfdopen(p->p_fd, indx, uu->uu_dupfd, flags, error)) == 0) {
2d21ac55 3062 fp_drop(p, indx, NULL, 0);
91447636 3063 *retval = indx;
2d21ac55
A
3064 if (deny_controlling_tty) {
3065 session_lock(sessp);
3066 sessp->s_flags &= ~S_NOCTTY;
3067 session_unlock(sessp);
3068 }
3069 if (sessp != SESSION_NULL)
3070 session_rele(sessp);
91447636
A
3071 return (0);
3072 }
1c79356b
A
3073 }
3074 if (error == ERESTART)
91447636
A
3075 error = EINTR;
3076 fp_free(p, indx, fp);
3077
2d21ac55
A
3078 if (deny_controlling_tty) {
3079 session_lock(sessp);
3080 sessp->s_flags &= ~S_NOCTTY;
3081 session_unlock(sessp);
3082 }
3083 if (sessp != SESSION_NULL)
3084 session_rele(sessp);
1c79356b
A
3085 return (error);
3086 }
2d21ac55
A
3087 uu->uu_dupfd = 0;
3088 vp = ndp->ni_vp;
55e303ae 3089
91447636 3090 fp->f_fglob->fg_flag = flags & (FMASK | O_EVTONLY);
91447636
A
3091 fp->f_fglob->fg_ops = &vnops;
3092 fp->f_fglob->fg_data = (caddr_t)vp;
3093
316670eb
A
3094#if CONFIG_PROTECT
3095 if (VATTR_IS_ACTIVE (vap, va_dataprotect_flags)) {
3096 if (vap->va_dataprotect_flags & VA_DP_RAWENCRYPTED) {
3097 fp->f_fglob->fg_flag |= FENCRYPTED;
3098 }
3099 }
3100#endif
3101
1c79356b
A
3102 if (flags & (O_EXLOCK | O_SHLOCK)) {
3103 lf.l_whence = SEEK_SET;
3104 lf.l_start = 0;
3105 lf.l_len = 0;
3106 if (flags & O_EXLOCK)
3107 lf.l_type = F_WRLCK;
3108 else
3109 lf.l_type = F_RDLCK;
3110 type = F_FLOCK;
3111 if ((flags & FNONBLOCK) == 0)
3112 type |= F_WAIT;
2d21ac55
A
3113#if CONFIG_MACF
3114 error = mac_file_check_lock(vfs_context_ucred(ctx), fp->f_fglob,
3115 F_SETLK, &lf);
3116 if (error)
3117 goto bad;
3118#endif
39236c6e 3119 if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx, NULL)))
55e303ae 3120 goto bad;
91447636 3121 fp->f_fglob->fg_flag |= FHASLOCK;
1c79356b 3122 }
55e303ae 3123
91447636
A
3124 /* try to truncate by setting the size attribute */
3125 if ((flags & O_TRUNC) && ((error = vnode_setsize(vp, (off_t)0, 0, ctx)) != 0))
3126 goto bad;
55e303ae 3127
2d21ac55
A
3128 /*
3129 * If the open flags denied the acquisition of a controlling tty,
3130 * clear the flag in the session structure that prevented the lower
3131 * level code from assigning one.
3132 */
3133 if (deny_controlling_tty) {
3134 session_lock(sessp);
3135 sessp->s_flags &= ~S_NOCTTY;
3136 session_unlock(sessp);
3137 }
3138
3139 /*
3140 * If a controlling tty was set by the tty line discipline, then we
3141 * want to set the vp of the tty into the session structure. We have
3142 * a race here because we can't get to the vp for the tp in ttyopen,
3143 * because it's not passed as a parameter in the open path.
3144 */
3145 if (no_controlling_tty && (p->p_flag & P_CONTROLT)) {
3146 vnode_t ttyvp;
6d2010ae
A
3147
3148 /*
3149 * We already have a ref from vn_open_auth(), so we can demand another reference.
3150 */
3151 error = vnode_ref_ext(vp, 0, VNODE_REF_FORCE);
3152 if (error != 0) {
3153 panic("vnode_ref_ext() with VNODE_REF_FORCE failed?!");
3154 }
3155
2d21ac55
A
3156 session_lock(sessp);
3157 ttyvp = sessp->s_ttyvp;
3158 sessp->s_ttyvp = vp;
3159 sessp->s_ttyvid = vnode_vid(vp);
3160 session_unlock(sessp);
3161 if (ttyvp != NULLVP)
3162 vnode_rele(ttyvp);
3163 }
3164
91447636 3165 vnode_put(vp);
55e303ae 3166
91447636 3167 proc_fdlock(p);
6d2010ae
A
3168 if (flags & O_CLOEXEC)
3169 *fdflags(p, indx) |= UF_EXCLOSE;
39236c6e
A
3170 if (flags & O_CLOFORK)
3171 *fdflags(p, indx) |= UF_FORKCLOSE;
6601e61a 3172 procfdtbl_releasefd(p, indx, NULL);
91447636
A
3173 fp_drop(p, indx, fp, 1);
3174 proc_fdunlock(p);
3175
1c79356b 3176 *retval = indx;
91447636 3177
2d21ac55
A
3178 if (sessp != SESSION_NULL)
3179 session_rele(sessp);
1c79356b 3180 return (0);
55e303ae 3181bad:
2d21ac55
A
3182 if (deny_controlling_tty) {
3183 session_lock(sessp);
3184 sessp->s_flags &= ~S_NOCTTY;
3185 session_unlock(sessp);
3186 }
3187 if (sessp != SESSION_NULL)
3188 session_rele(sessp);
3189
39236c6e 3190 struct vfs_context context = *vfs_context_current();
2d21ac55
A
3191 context.vc_ucred = fp->f_fglob->fg_cred;
3192
3193 vn_close(vp, fp->f_fglob->fg_flag, &context);
91447636
A
3194 vnode_put(vp);
3195 fp_free(p, indx, fp);
3196
55e303ae 3197 return (error);
1c79356b
A
3198}
3199
0c530ab8 3200/*
b0d623f7 3201 * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)).
0c530ab8
A
3202 *
3203 * Parameters: p Process requesting the open
3204 * uap User argument descriptor (see below)
3205 * retval Pointer to an area to receive the
3206 * return calue from the system call
3207 *
3208 * Indirect: uap->path Path to open (same as 'open')
3209 * uap->flags Flags to open (same as 'open'
3210 * uap->uid UID to set, if creating
3211 * uap->gid GID to set, if creating
3212 * uap->mode File mode, if creating (same as 'open')
3213 * uap->xsecurity ACL to set, if creating
3214 *
3215 * Returns: 0 Success
3216 * !0 errno value
3217 *
3218 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
3219 *
3220 * XXX: We should enummerate the possible errno values here, and where
3221 * in the code they originated.
3222 */
1c79356b 3223int
b0d623f7 3224open_extended(proc_t p, struct open_extended_args *uap, int32_t *retval)
91447636 3225{
2d21ac55 3226 struct filedesc *fdp = p->p_fd;
91447636
A
3227 int ciferror;
3228 kauth_filesec_t xsecdst;
3229 struct vnode_attr va;
2d21ac55 3230 struct nameidata nd;
91447636
A
3231 int cmode;
3232
b0d623f7
A
3233 AUDIT_ARG(owner, uap->uid, uap->gid);
3234
91447636
A
3235 xsecdst = NULL;
3236 if ((uap->xsecurity != USER_ADDR_NULL) &&
3237 ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
3238 return ciferror;
3239
91447636
A
3240 VATTR_INIT(&va);
3241 cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3242 VATTR_SET(&va, va_mode, cmode);
3243 if (uap->uid != KAUTH_UID_NONE)
3244 VATTR_SET(&va, va_uid, uap->uid);
3245 if (uap->gid != KAUTH_GID_NONE)
3246 VATTR_SET(&va, va_gid, uap->gid);
3247 if (xsecdst != NULL)
3248 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
3249
6d2010ae
A
3250 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
3251 uap->path, vfs_context_current());
2d21ac55 3252
39236c6e
A
3253 ciferror = open1(vfs_context_current(), &nd, uap->flags, &va,
3254 fileproc_alloc_init, NULL, retval);
91447636
A
3255 if (xsecdst != NULL)
3256 kauth_filesec_free(xsecdst);
3257
3258 return ciferror;
3259}
3260
316670eb
A
3261/*
3262 * Go through the data-protected atomically controlled open (2)
3263 *
3264 * int open_dprotected_np(user_addr_t path, int flags, int class, int dpflags, int mode)
3265 */
3266int open_dprotected_np (__unused proc_t p, struct open_dprotected_np_args *uap, int32_t *retval) {
3267 int flags = uap->flags;
3268 int class = uap->class;
3269 int dpflags = uap->dpflags;
3270
3271 /*
3272 * Follow the same path as normal open(2)
3273 * Look up the item if it exists, and acquire the vnode.
3274 */
3275 struct filedesc *fdp = p->p_fd;
3276 struct vnode_attr va;
3277 struct nameidata nd;
3278 int cmode;
3279 int error;
3280
3281 VATTR_INIT(&va);
3282 /* Mask off all but regular access permissions */
3283 cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3284 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
3285
3286 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
3287 uap->path, vfs_context_current());
3288
3289 /*
3290 * Initialize the extra fields in vnode_attr to pass down our
3291 * extra fields.
3292 * 1. target cprotect class.
3293 * 2. set a flag to mark it as requiring open-raw-encrypted semantics.
3294 */
3295 if (flags & O_CREAT) {
3296 VATTR_SET(&va, va_dataprotect_class, class);
3297 }
3298
3299 if (dpflags & O_DP_GETRAWENCRYPTED) {
3300 if ( flags & (O_RDWR | O_WRONLY)) {
3301 /* Not allowed to write raw encrypted bytes */
3302 return EINVAL;
3303 }
3304 VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED);
3305 }
3306
39236c6e
A
3307 error = open1(vfs_context_current(), &nd, uap->flags, &va,
3308 fileproc_alloc_init, NULL, retval);
316670eb
A
3309
3310 return error;
3311}
3312
3313
91447636 3314int
b0d623f7 3315open(proc_t p, struct open_args *uap, int32_t *retval)
1c79356b 3316{
2d21ac55
A
3317 __pthread_testcancel(1);
3318 return(open_nocancel(p, (struct open_nocancel_args *)uap, retval));
3319}
3320
2d21ac55 3321int
b0d623f7 3322open_nocancel(proc_t p, struct open_nocancel_args *uap, int32_t *retval)
2d21ac55
A
3323{
3324 struct filedesc *fdp = p->p_fd;
91447636 3325 struct vnode_attr va;
2d21ac55 3326 struct nameidata nd;
91447636 3327 int cmode;
1c79356b 3328
91447636
A
3329 VATTR_INIT(&va);
3330 /* Mask off all but regular access permissions */
3331 cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3332 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
3333
6d2010ae
A
3334 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
3335 uap->path, vfs_context_current());
2d21ac55 3336
39236c6e
A
3337 return (open1(vfs_context_current(), &nd, uap->flags, &va,
3338 fileproc_alloc_init, NULL, retval));
1c79356b 3339}
91447636 3340
1c79356b
A
3341
3342/*
3343 * Create a special file.
3344 */
91447636
A
3345static int mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap);
3346
1c79356b 3347int
b0d623f7 3348mknod(proc_t p, struct mknod_args *uap, __unused int32_t *retval)
1c79356b 3349{
91447636 3350 struct vnode_attr va;
2d21ac55 3351 vfs_context_t ctx = vfs_context_current();
91447636 3352 int error;
1c79356b 3353 struct nameidata nd;
91447636
A
3354 vnode_t vp, dvp;
3355
91447636
A
3356 VATTR_INIT(&va);
3357 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
3358 VATTR_SET(&va, va_rdev, uap->dev);
3359
3360 /* If it's a mknod() of a FIFO, call mkfifo1() instead */
3361 if ((uap->mode & S_IFMT) == S_IFIFO)
2d21ac55 3362 return(mkfifo1(ctx, uap->path, &va));
1c79356b 3363
55e303ae 3364 AUDIT_ARG(mode, uap->mode);
b0d623f7 3365 AUDIT_ARG(value32, uap->dev);
91447636 3366
2d21ac55 3367 if ((error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
1c79356b 3368 return (error);
6d2010ae 3369 NDINIT(&nd, CREATE, OP_MKNOD, LOCKPARENT | AUDITVNPATH1,
2d21ac55 3370 UIO_USERSPACE, uap->path, ctx);
55e303ae
A
3371 error = namei(&nd);
3372 if (error)
1c79356b 3373 return (error);
91447636 3374 dvp = nd.ni_dvp;
1c79356b 3375 vp = nd.ni_vp;
91447636
A
3376
3377 if (vp != NULL) {
1c79356b 3378 error = EEXIST;
91447636 3379 goto out;
1c79356b 3380 }
55e303ae 3381
91447636
A
3382 switch (uap->mode & S_IFMT) {
3383 case S_IFMT: /* used by badsect to flag bad sectors */
3384 VATTR_SET(&va, va_type, VBAD);
3385 break;
3386 case S_IFCHR:
3387 VATTR_SET(&va, va_type, VCHR);
3388 break;
3389 case S_IFBLK:
3390 VATTR_SET(&va, va_type, VBLK);
3391 break;
91447636
A
3392 default:
3393 error = EINVAL;
3394 goto out;
3395 }
2d21ac55
A
3396
3397#if CONFIG_MACF
6d2010ae
A
3398 error = mac_vnode_check_create(ctx,
3399 nd.ni_dvp, &nd.ni_cnd, &va);
3400 if (error)
3401 goto out;
2d21ac55
A
3402#endif
3403
3404 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
3405 goto out;
3406
6d2010ae 3407 if ((error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx)) != 0)
91447636
A
3408 goto out;
3409
3410 if (vp) {
3411 int update_flags = 0;
3412
3413 // Make sure the name & parent pointers are hooked up
3414 if (vp->v_name == NULL)
3415 update_flags |= VNODE_UPDATE_NAME;
3416 if (vp->v_parent == NULLVP)
3417 update_flags |= VNODE_UPDATE_PARENT;
3418
3419 if (update_flags)
3420 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
3421
2d21ac55
A
3422#if CONFIG_FSE
3423 add_fsevent(FSE_CREATE_FILE, ctx,
91447636
A
3424 FSE_ARG_VNODE, vp,
3425 FSE_ARG_DONE);
2d21ac55 3426#endif
1c79356b 3427 }
91447636
A
3428
3429out:
3430 /*
3431 * nameidone has to happen before we vnode_put(dvp)
3432 * since it may need to release the fs_nodelock on the dvp
3433 */
3434 nameidone(&nd);
3435
3436 if (vp)
3437 vnode_put(vp);
3438 vnode_put(dvp);
3439
1c79356b
A
3440 return (error);
3441}
3442
3443/*
3444 * Create a named pipe.
2d21ac55
A
3445 *
3446 * Returns: 0 Success
3447 * EEXIST
3448 * namei:???
3449 * vnode_authorize:???
3450 * vn_create:???
1c79356b 3451 */
91447636
A
3452static int
3453mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap)
1c79356b 3454{
91447636 3455 vnode_t vp, dvp;
1c79356b
A
3456 int error;
3457 struct nameidata nd;
55e303ae 3458
6d2010ae 3459 NDINIT(&nd, CREATE, OP_MKFIFO, LOCKPARENT | AUDITVNPATH1,
91447636 3460 UIO_USERSPACE, upath, ctx);
55e303ae
A
3461 error = namei(&nd);
3462 if (error)
1c79356b 3463 return (error);
91447636
A
3464 dvp = nd.ni_dvp;
3465 vp = nd.ni_vp;
3466
3467 /* check that this is a new file and authorize addition */
3468 if (vp != NULL) {
3469 error = EEXIST;
3470 goto out;
3471 }
2d21ac55
A
3472 VATTR_SET(vap, va_type, VFIFO);
3473
6d2010ae 3474 if ((error = vn_authorize_create(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0)
2d21ac55 3475 goto out;
2d21ac55 3476
6d2010ae 3477 error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx);
91447636
A
3478out:
3479 /*
3480 * nameidone has to happen before we vnode_put(dvp)
3481 * since it may need to release the fs_nodelock on the dvp
3482 */
3483 nameidone(&nd);
3484
3485 if (vp)
3486 vnode_put(vp);
3487 vnode_put(dvp);
3488
55e303ae 3489 return error;
91447636
A
3490}
3491
0c530ab8
A
3492
3493/*
b0d623f7 3494 * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)).
0c530ab8
A
3495 *
3496 * Parameters: p Process requesting the open
3497 * uap User argument descriptor (see below)
3498 * retval (Ignored)
3499 *
3500 * Indirect: uap->path Path to fifo (same as 'mkfifo')
3501 * uap->uid UID to set
3502 * uap->gid GID to set
3503 * uap->mode File mode to set (same as 'mkfifo')
3504 * uap->xsecurity ACL to set, if creating
3505 *
3506 * Returns: 0 Success
3507 * !0 errno value
3508 *
3509 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
3510 *
3511 * XXX: We should enummerate the possible errno values here, and where
3512 * in the code they originated.
3513 */
91447636 3514int
b0d623f7 3515mkfifo_extended(proc_t p, struct mkfifo_extended_args *uap, __unused int32_t *retval)
91447636
A
3516{
3517 int ciferror;
3518 kauth_filesec_t xsecdst;
91447636
A
3519 struct vnode_attr va;
3520
b0d623f7
A
3521 AUDIT_ARG(owner, uap->uid, uap->gid);
3522
91447636
A
3523 xsecdst = KAUTH_FILESEC_NONE;
3524 if (uap->xsecurity != USER_ADDR_NULL) {
3525 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
3526 return ciferror;
3527 }
3528
91447636
A
3529 VATTR_INIT(&va);
3530 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
3531 if (uap->uid != KAUTH_UID_NONE)
3532 VATTR_SET(&va, va_uid, uap->uid);
3533 if (uap->gid != KAUTH_GID_NONE)
3534 VATTR_SET(&va, va_gid, uap->gid);
3535 if (xsecdst != KAUTH_FILESEC_NONE)
3536 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
3537
2d21ac55 3538 ciferror = mkfifo1(vfs_context_current(), uap->path, &va);
91447636
A
3539
3540 if (xsecdst != KAUTH_FILESEC_NONE)
3541 kauth_filesec_free(xsecdst);
3542 return ciferror;
3543}
3544
3545/* ARGSUSED */
3546int
b0d623f7 3547mkfifo(proc_t p, struct mkfifo_args *uap, __unused int32_t *retval)
91447636 3548{
91447636
A
3549 struct vnode_attr va;
3550
91447636
A
3551 VATTR_INIT(&va);
3552 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
3553
2d21ac55 3554 return(mkfifo1(vfs_context_current(), uap->path, &va));
1c79356b
A
3555}
3556
b0d623f7
A
3557
3558static char *
3559my_strrchr(char *p, int ch)
3560{
3561 char *save;
3562
3563 for (save = NULL;; ++p) {
3564 if (*p == ch)
3565 save = p;
3566 if (!*p)
3567 return(save);
3568 }
3569 /* NOTREACHED */
3570}
3571
3572extern int safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path);
3573
3574int
3575safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path)
3576{
3577 int ret, len = _len;
3578
3579 *truncated_path = 0;
3580 ret = vn_getpath(dvp, path, &len);
3581 if (ret == 0 && len < (MAXPATHLEN - 1)) {
3582 if (leafname) {
3583 path[len-1] = '/';
3584 len += strlcpy(&path[len], leafname, MAXPATHLEN-len) + 1;
3585 if (len > MAXPATHLEN) {
3586 char *ptr;
3587
3588 // the string got truncated!
3589 *truncated_path = 1;
3590 ptr = my_strrchr(path, '/');
3591 if (ptr) {
3592 *ptr = '\0'; // chop off the string at the last directory component
3593 }
3594 len = strlen(path) + 1;
3595 }
3596 }
3597 } else if (ret == 0) {
3598 *truncated_path = 1;
3599 } else if (ret != 0) {
3600 struct vnode *mydvp=dvp;
3601
3602 if (ret != ENOSPC) {
3603 printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n",
3604 dvp, dvp->v_name ? dvp->v_name : "no-name", ret);
3605 }
3606 *truncated_path = 1;
3607
3608 do {
3609 if (mydvp->v_parent != NULL) {
3610 mydvp = mydvp->v_parent;
3611 } else if (mydvp->v_mount) {
3612 strlcpy(path, mydvp->v_mount->mnt_vfsstat.f_mntonname, _len);
3613 break;
3614 } else {
3615 // no parent and no mount point? only thing is to punt and say "/" changed
3616 strlcpy(path, "/", _len);
3617 len = 2;
3618 mydvp = NULL;
3619 }
3620
3621 if (mydvp == NULL) {
3622 break;
3623 }
3624
3625 len = _len;
3626 ret = vn_getpath(mydvp, path, &len);
3627 } while (ret == ENOSPC);
3628 }
3629
3630 return len;
3631}
3632
3633
1c79356b
A
3634/*
3635 * Make a hard file link.
2d21ac55
A
3636 *
3637 * Returns: 0 Success
3638 * EPERM
3639 * EEXIST
3640 * EXDEV
3641 * namei:???
3642 * vnode_authorize:???
3643 * VNOP_LINK:???
1c79356b 3644 */
1c79356b
A
3645/* ARGSUSED */
3646int
b0d623f7 3647link(__unused proc_t p, struct link_args *uap, __unused int32_t *retval)
1c79356b 3648{
91447636 3649 vnode_t vp, dvp, lvp;
1c79356b 3650 struct nameidata nd;
2d21ac55 3651 vfs_context_t ctx = vfs_context_current();
1c79356b 3652 int error;
b0d623f7 3653#if CONFIG_FSE
91447636 3654 fse_info finfo;
b0d623f7 3655#endif
91447636 3656 int need_event, has_listeners;
2d21ac55 3657 char *target_path = NULL;
b0d623f7 3658 int truncated=0;
1c79356b 3659
91447636
A
3660 vp = dvp = lvp = NULLVP;
3661
3662 /* look up the object we are linking to */
6d2010ae 3663 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | AUDITVNPATH1,
2d21ac55 3664 UIO_USERSPACE, uap->path, ctx);
55e303ae
A
3665 error = namei(&nd);
3666 if (error)
1c79356b
A
3667 return (error);
3668 vp = nd.ni_vp;
91447636
A
3669
3670 nameidone(&nd);
3671
2d21ac55
A
3672 /*
3673 * Normally, linking to directories is not supported.
3674 * However, some file systems may have limited support.
3675 */
91447636 3676 if (vp->v_type == VDIR) {
2d21ac55
A
3677 if (!(vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSDIRLINKS)) {
3678 error = EPERM; /* POSIX */
3679 goto out;
3680 }
3681 /* Linking to a directory requires ownership. */
3682 if (!kauth_cred_issuser(vfs_context_ucred(ctx))) {
3683 struct vnode_attr dva;
3684
3685 VATTR_INIT(&dva);
3686 VATTR_WANTED(&dva, va_uid);
3687 if (vnode_getattr(vp, &dva, ctx) != 0 ||
3688 !VATTR_IS_SUPPORTED(&dva, va_uid) ||
3689 (dva.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)))) {
3690 error = EACCES;
3691 goto out;
3692 }
3693 }
91447636
A
3694 }
3695
91447636 3696 /* lookup the target node */
6d2010ae
A
3697#if CONFIG_TRIGGERS
3698 nd.ni_op = OP_LINK;
3699#endif
91447636 3700 nd.ni_cnd.cn_nameiop = CREATE;
2d21ac55 3701 nd.ni_cnd.cn_flags = LOCKPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK;
91447636
A
3702 nd.ni_dirp = uap->link;
3703 error = namei(&nd);
3704 if (error != 0)
3705 goto out;
3706 dvp = nd.ni_dvp;
3707 lvp = nd.ni_vp;
2d21ac55
A
3708
3709#if CONFIG_MACF
3710 if ((error = mac_vnode_check_link(ctx, dvp, vp, &nd.ni_cnd)) != 0)
3711 goto out2;
3712#endif
3713
3714 /* or to anything that kauth doesn't want us to (eg. immutable items) */
3715 if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_LINKTARGET, ctx)) != 0)
3716 goto out2;
3717
91447636
A
3718 /* target node must not exist */
3719 if (lvp != NULLVP) {
3720 error = EEXIST;
3721 goto out2;
3722 }
3723 /* cannot link across mountpoints */
3724 if (vnode_mount(vp) != vnode_mount(dvp)) {
3725 error = EXDEV;
3726 goto out2;
3727 }
3728
3729 /* authorize creation of the target note */
2d21ac55 3730 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
91447636
A
3731 goto out2;
3732
3733 /* and finally make the link */
2d21ac55 3734 error = VNOP_LINK(vp, dvp, &nd.ni_cnd, ctx);
91447636
A
3735 if (error)
3736 goto out2;
3737
39236c6e
A
3738#if CONFIG_MACF
3739 (void)mac_vnode_notify_link(ctx, vp, dvp, &nd.ni_cnd);
3740#endif
3741
2d21ac55 3742#if CONFIG_FSE
91447636 3743 need_event = need_fsevent(FSE_CREATE_FILE, dvp);
2d21ac55
A
3744#else
3745 need_event = 0;
3746#endif
91447636
A
3747 has_listeners = kauth_authorize_fileop_has_listeners();
3748
3749 if (need_event || has_listeners) {
91447636
A
3750 char *link_to_path = NULL;
3751 int len, link_name_len;
3752
3753 /* build the path to the new link file */
2d21ac55
A
3754 GET_PATH(target_path);
3755 if (target_path == NULL) {
3756 error = ENOMEM;
3757 goto out2;
3758 }
3759
b0d623f7 3760 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, target_path, MAXPATHLEN, &truncated);
91447636
A
3761
3762 if (has_listeners) {
3763 /* build the path to file we are linking to */
2d21ac55
A
3764 GET_PATH(link_to_path);
3765 if (link_to_path == NULL) {
3766 error = ENOMEM;
3767 goto out2;
3768 }
3769
91447636
A
3770 link_name_len = MAXPATHLEN;
3771 vn_getpath(vp, link_to_path, &link_name_len);
3772
2d21ac55
A
3773 /*
3774 * Call out to allow 3rd party notification of rename.
91447636
A
3775 * Ignore result of kauth_authorize_fileop call.
3776 */
2d21ac55 3777 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_LINK,
91447636 3778 (uintptr_t)link_to_path, (uintptr_t)target_path);
2d21ac55
A
3779 if (link_to_path != NULL) {
3780 RELEASE_PATH(link_to_path);
3781 }
91447636 3782 }
2d21ac55 3783#if CONFIG_FSE
91447636
A
3784 if (need_event) {
3785 /* construct fsevent */
2d21ac55 3786 if (get_fse_info(vp, &finfo, ctx) == 0) {
b0d623f7
A
3787 if (truncated) {
3788 finfo.mode |= FSE_TRUNCATED_PATH;
3789 }
3790
91447636 3791 // build the path to the destination of the link
2d21ac55 3792 add_fsevent(FSE_CREATE_FILE, ctx,
91447636
A
3793 FSE_ARG_STRING, len, target_path,
3794 FSE_ARG_FINFO, &finfo,
3795 FSE_ARG_DONE);
1c79356b 3796 }
b0d623f7
A
3797 if (vp->v_parent) {
3798 add_fsevent(FSE_STAT_CHANGED, ctx,
3799 FSE_ARG_VNODE, vp->v_parent,
3800 FSE_ARG_DONE);
3801 }
1c79356b 3802 }
2d21ac55 3803#endif
1c79356b 3804 }
91447636
A
3805out2:
3806 /*
3807 * nameidone has to happen before we vnode_put(dvp)
3808 * since it may need to release the fs_nodelock on the dvp
3809 */
3810 nameidone(&nd);
2d21ac55
A
3811 if (target_path != NULL) {
3812 RELEASE_PATH(target_path);
3813 }
91447636
A
3814out:
3815 if (lvp)
3816 vnode_put(lvp);
3817 if (dvp)
3818 vnode_put(dvp);
3819 vnode_put(vp);
3820 return (error);
3821}
1c79356b
A
3822
3823/*
3824 * Make a symbolic link.
91447636
A
3825 *
3826 * We could add support for ACLs here too...
1c79356b 3827 */
1c79356b
A
3828/* ARGSUSED */
3829int
b0d623f7 3830symlink(proc_t p, struct symlink_args *uap, __unused int32_t *retval)
1c79356b 3831{
91447636
A
3832 struct vnode_attr va;
3833 char *path;
1c79356b
A
3834 int error;
3835 struct nameidata nd;
2d21ac55 3836 vfs_context_t ctx = vfs_context_current();
91447636 3837 vnode_t vp, dvp;
1c79356b 3838 size_t dummy=0;
55e303ae 3839
1c79356b 3840 MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
91447636
A
3841 error = copyinstr(uap->path, path, MAXPATHLEN, &dummy);
3842 if (error)
1c79356b 3843 goto out;
55e303ae 3844 AUDIT_ARG(text, path); /* This is the link string */
91447636 3845
6d2010ae 3846 NDINIT(&nd, CREATE, OP_SYMLINK, LOCKPARENT | AUDITVNPATH1,
2d21ac55 3847 UIO_USERSPACE, uap->link, ctx);
55e303ae
A
3848 error = namei(&nd);
3849 if (error)
1c79356b 3850 goto out;
91447636
A
3851 dvp = nd.ni_dvp;
3852 vp = nd.ni_vp;
55e303ae 3853
2d21ac55
A
3854 VATTR_INIT(&va);
3855 VATTR_SET(&va, va_type, VLNK);
3856 VATTR_SET(&va, va_mode, ACCESSPERMS & ~p->p_fd->fd_cmask);
3857#if CONFIG_MACF
3858 error = mac_vnode_check_create(ctx,
3859 dvp, &nd.ni_cnd, &va);
3860#endif
3861 if (error != 0) {
3862 goto skipit;
3863 }
91447636 3864
2d21ac55
A
3865 if (vp != NULL) {
3866 error = EEXIST;
3867 goto skipit;
3868 }
3869
3870 /* authorize */
3871 if (error == 0)
3872 error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
3873 /* get default ownership, etc. */
3874 if (error == 0)
3875 error = vnode_authattr_new(dvp, &va, 0, ctx);
3876 if (error == 0)
3877 error = VNOP_SYMLINK(dvp, &vp, &nd.ni_cnd, &va, path, ctx);
3878
39236c6e
A
3879#if CONFIG_MACF
3880 if (error == 0)
3881 error = vnode_label(vnode_mount(vp), dvp, vp, &nd.ni_cnd, VNODE_LABEL_CREATE, ctx);
3882#endif
3883
2d21ac55
A
3884 /* do fallback attribute handling */
3885 if (error == 0)
3886 error = vnode_setattr_fallback(vp, &va, ctx);
39236c6e 3887
2d21ac55
A
3888 if (error == 0) {
3889 int update_flags = 0;
55e303ae 3890
2d21ac55
A
3891 if (vp == NULL) {
3892 nd.ni_cnd.cn_nameiop = LOOKUP;
6d2010ae
A
3893#if CONFIG_TRIGGERS
3894 nd.ni_op = OP_LOOKUP;
3895#endif
2d21ac55
A
3896 nd.ni_cnd.cn_flags = 0;
3897 error = namei(&nd);
3898 vp = nd.ni_vp;
55e303ae 3899
2d21ac55
A
3900 if (vp == NULL)
3901 goto skipit;
3902 }
91447636
A
3903
3904#if 0 /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
2d21ac55
A
3905 /* call out to allow 3rd party notification of rename.
3906 * Ignore result of kauth_authorize_fileop call.
3907 */
3908 if (kauth_authorize_fileop_has_listeners() &&
3909 namei(&nd) == 0) {
3910 char *new_link_path = NULL;
3911 int len;
91447636 3912
2d21ac55
A
3913 /* build the path to the new link file */
3914 new_link_path = get_pathbuff();
3915 len = MAXPATHLEN;
3916 vn_getpath(dvp, new_link_path, &len);
3917 if ((len + 1 + nd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) {
91447636 3918 new_link_path[len - 1] = '/';
2d21ac55 3919 strlcpy(&new_link_path[len], nd.ni_cnd.cn_nameptr, MAXPATHLEN-len);
91447636 3920 }
2d21ac55
A
3921
3922 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_SYMLINK,
3923 (uintptr_t)path, (uintptr_t)new_link_path);
3924 if (new_link_path != NULL)
3925 release_pathbuff(new_link_path);
3926 }
91447636 3927#endif
2d21ac55
A
3928 // Make sure the name & parent pointers are hooked up
3929 if (vp->v_name == NULL)
3930 update_flags |= VNODE_UPDATE_NAME;
3931 if (vp->v_parent == NULLVP)
3932 update_flags |= VNODE_UPDATE_PARENT;
3933
3934 if (update_flags)
3935 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
91447636 3936
2d21ac55
A
3937#if CONFIG_FSE
3938 add_fsevent(FSE_CREATE_FILE, ctx,
3939 FSE_ARG_VNODE, vp,
3940 FSE_ARG_DONE);
3941#endif
3942 }
91447636
A
3943
3944skipit:
3945 /*
3946 * nameidone has to happen before we vnode_put(dvp)
3947 * since it may need to release the fs_nodelock on the dvp
3948 */
3949 nameidone(&nd);
3950
3951 if (vp)
3952 vnode_put(vp);
3953 vnode_put(dvp);
1c79356b
A
3954out:
3955 FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
91447636 3956
1c79356b
A
3957 return (error);
3958}
3959
3960/*
3961 * Delete a whiteout from the filesystem.
b0d623f7 3962 * XXX authorization not implmented for whiteouts
1c79356b 3963 */
1c79356b 3964int
b0d623f7 3965undelete(__unused proc_t p, struct undelete_args *uap, __unused int32_t *retval)
1c79356b
A
3966{
3967 int error;
3968 struct nameidata nd;
2d21ac55 3969 vfs_context_t ctx = vfs_context_current();
91447636
A
3970 vnode_t vp, dvp;
3971
6d2010ae 3972 NDINIT(&nd, DELETE, OP_UNLINK, LOCKPARENT | DOWHITEOUT | AUDITVNPATH1,
2d21ac55 3973 UIO_USERSPACE, uap->path, ctx);
1c79356b
A
3974 error = namei(&nd);
3975 if (error)
3976 return (error);
91447636
A
3977 dvp = nd.ni_dvp;
3978 vp = nd.ni_vp;
1c79356b 3979
91447636 3980 if (vp == NULLVP && (nd.ni_cnd.cn_flags & ISWHITEOUT)) {
2d21ac55 3981 error = VNOP_WHITEOUT(dvp, &nd.ni_cnd, DELETE, ctx);
91447636
A
3982 } else
3983 error = EEXIST;
3984
3985 /*
3986 * nameidone has to happen before we vnode_put(dvp)
3987 * since it may need to release the fs_nodelock on the dvp
3988 */
3989 nameidone(&nd);
3990
3991 if (vp)
3992 vnode_put(vp);
3993 vnode_put(dvp);
1c79356b 3994
1c79356b
A
3995 return (error);
3996}
3997
b0d623f7 3998
1c79356b
A
3999/*
4000 * Delete a name from the filesystem.
4001 */
1c79356b 4002/* ARGSUSED */
2d21ac55 4003int
316670eb 4004unlink1(vfs_context_t ctx, struct nameidata *ndp, int unlink_flags)
1c79356b 4005{
91447636 4006 vnode_t vp, dvp;
1c79356b 4007 int error;
91447636 4008 struct componentname *cnp;
2d21ac55 4009 char *path = NULL;
b0d623f7
A
4010 int len=0;
4011#if CONFIG_FSE
2d21ac55 4012 fse_info finfo;
6d2010ae 4013 struct vnode_attr va;
b0d623f7 4014#endif
91447636 4015 int flags = 0;
2d21ac55
A
4016 int need_event = 0;
4017 int has_listeners = 0;
b0d623f7 4018 int truncated_path=0;
6d2010ae
A
4019 int batched;
4020 struct vnode_attr *vap = NULL;
4021
c910b4d9
A
4022#if NAMEDRSRCFORK
4023 /* unlink or delete is allowed on rsrc forks and named streams */
4024 ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
4025#endif
4026
2d21ac55 4027 ndp->ni_cnd.cn_flags |= LOCKPARENT;
6d2010ae 4028 ndp->ni_flag |= NAMEI_COMPOUNDREMOVE;
2d21ac55 4029 cnp = &ndp->ni_cnd;
91447636 4030
6d2010ae 4031lookup_continue:
2d21ac55
A
4032 error = namei(ndp);
4033 if (error)
4034 return (error);
b0d623f7 4035
2d21ac55
A
4036 dvp = ndp->ni_dvp;
4037 vp = ndp->ni_vp;
91447636 4038
6d2010ae 4039
91447636 4040 /* With Carbon delete semantics, busy files cannot be deleted */
316670eb 4041 if (unlink_flags & VNODE_REMOVE_NODELETEBUSY) {
91447636 4042 flags |= VNODE_REMOVE_NODELETEBUSY;
2d21ac55 4043 }
316670eb 4044
39236c6e 4045 /* Skip any potential upcalls if told to. */
316670eb
A
4046 if (unlink_flags & VNODE_REMOVE_SKIP_NAMESPACE_EVENT) {
4047 flags |= VNODE_REMOVE_SKIP_NAMESPACE_EVENT;
4048 }
4049
6d2010ae
A
4050 if (vp) {
4051 batched = vnode_compound_remove_available(vp);
4052 /*
4053 * The root of a mounted filesystem cannot be deleted.
4054 */
4055 if (vp->v_flag & VROOT) {
4056 error = EBUSY;
4057 }
2d21ac55 4058
6d2010ae
A
4059 if (!batched) {
4060 error = vn_authorize_unlink(dvp, vp, cnp, ctx, NULL);
4061 if (error) {
4062 goto out;
4063 }
4064 }
4065 } else {
4066 batched = 1;
2d21ac55 4067
6d2010ae
A
4068 if (!vnode_compound_remove_available(dvp)) {
4069 panic("No vp, but no compound remove?");
4070 }
4071 }
2d21ac55 4072
2d21ac55
A
4073#if CONFIG_FSE
4074 need_event = need_fsevent(FSE_DELETE, dvp);
4075 if (need_event) {
6d2010ae
A
4076 if (!batched) {
4077 if ((vp->v_flag & VISHARDLINK) == 0) {
4078 /* XXX need to get these data in batched VNOP */
4079 get_fse_info(vp, &finfo, ctx);
4080 }
4081 } else {
4082 error = vfs_get_notify_attributes(&va);
4083 if (error) {
4084 goto out;
4085 }
4086
4087 vap = &va;
2d21ac55
A
4088 }
4089 }
4090#endif
4091 has_listeners = kauth_authorize_fileop_has_listeners();
4092 if (need_event || has_listeners) {
2d21ac55 4093 if (path == NULL) {
6d2010ae
A
4094 GET_PATH(path);
4095 if (path == NULL) {
4096 error = ENOMEM;
4097 goto out;
4098 }
2d21ac55 4099 }
b0d623f7 4100 len = safe_getpath(dvp, ndp->ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated_path);
2d21ac55
A
4101 }
4102
4103#if NAMEDRSRCFORK
4104 if (ndp->ni_cnd.cn_flags & CN_WANTSRSRCFORK)
4105 error = vnode_removenamedstream(dvp, vp, XATTR_RESOURCEFORK_NAME, 0, ctx);
4106 else
4107#endif
6d2010ae
A
4108 {
4109 error = vn_remove(dvp, &ndp->ni_vp, ndp, flags, vap, ctx);
4110 vp = ndp->ni_vp;
4111 if (error == EKEEPLOOKING) {
4112 if (!batched) {
4113 panic("EKEEPLOOKING, but not a filesystem that supports compound VNOPs?");
4114 }
4115
4116 if ((ndp->ni_flag & NAMEI_CONTLOOKUP) == 0) {
4117 panic("EKEEPLOOKING, but continue flag not set?");
4118 }
4119
4120 if (vnode_isdir(vp)) {
4121 error = EISDIR;
4122 goto out;
4123 }
4124 goto lookup_continue;
4125 }
4126 }
2d21ac55
A
4127
4128 /*
4129 * Call out to allow 3rd party notification of delete.
4130 * Ignore result of kauth_authorize_fileop call.
4131 */
1c79356b 4132 if (!error) {
2d21ac55
A
4133 if (has_listeners) {
4134 kauth_authorize_fileop(vfs_context_ucred(ctx),
4135 KAUTH_FILEOP_DELETE,
4136 (uintptr_t)vp,
4137 (uintptr_t)path);
4138 }
91447636 4139
2d21ac55
A
4140 if (vp->v_flag & VISHARDLINK) {
4141 //
4142 // if a hardlink gets deleted we want to blow away the
4143 // v_parent link because the path that got us to this
4144 // instance of the link is no longer valid. this will
4145 // force the next call to get the path to ask the file
4146 // system instead of just following the v_parent link.
4147 //
4148 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
91447636 4149 }
91447636 4150
2d21ac55
A
4151#if CONFIG_FSE
4152 if (need_event) {
4153 if (vp->v_flag & VISHARDLINK) {
4154 get_fse_info(vp, &finfo, ctx);
6d2010ae
A
4155 } else if (vap) {
4156 vnode_get_fse_info_from_vap(vp, &finfo, vap);
2d21ac55 4157 }
b0d623f7
A
4158 if (truncated_path) {
4159 finfo.mode |= FSE_TRUNCATED_PATH;
4160 }
2d21ac55
A
4161 add_fsevent(FSE_DELETE, ctx,
4162 FSE_ARG_STRING, len, path,
4163 FSE_ARG_FINFO, &finfo,
4164 FSE_ARG_DONE);
4165 }
4166#endif
1c79356b 4167 }
6d2010ae
A
4168
4169out:
2d21ac55
A
4170 if (path != NULL)
4171 RELEASE_PATH(path);
4172
c910b4d9 4173#if NAMEDRSRCFORK
b0d623f7
A
4174 /* recycle the deleted rsrc fork vnode to force a reclaim, which
4175 * will cause its shadow file to go away if necessary.
4176 */
6d2010ae
A
4177 if (vp && (vnode_isnamedstream(vp)) &&
4178 (vp->v_parent != NULLVP) &&
4179 vnode_isshadow(vp)) {
4180 vnode_recycle(vp);
b0d623f7 4181 }
c910b4d9 4182#endif
6d2010ae
A
4183 /*
4184 * nameidone has to happen before we vnode_put(dvp)
4185 * since it may need to release the fs_nodelock on the dvp
4186 */
2d21ac55 4187 nameidone(ndp);
91447636 4188 vnode_put(dvp);
6d2010ae
A
4189 if (vp) {
4190 vnode_put(vp);
4191 }
1c79356b
A
4192 return (error);
4193}
4194
4195/*
4196 * Delete a name from the filesystem using POSIX semantics.
4197 */
4198int
b0d623f7 4199unlink(__unused proc_t p, struct unlink_args *uap, __unused int32_t *retval)
1c79356b 4200{
2d21ac55
A
4201 struct nameidata nd;
4202 vfs_context_t ctx = vfs_context_current();
4203
6d2010ae
A
4204 NDINIT(&nd, DELETE, OP_UNLINK, AUDITVNPATH1, UIO_USERSPACE,
4205 uap->path, ctx);
2d21ac55 4206 return unlink1(ctx, &nd, 0);
1c79356b
A
4207}
4208
4209/*
0b4e3aa0 4210 * Delete a name from the filesystem using Carbon semantics.
1c79356b
A
4211 */
4212int
b0d623f7 4213delete(__unused proc_t p, struct delete_args *uap, __unused int32_t *retval)
1c79356b 4214{
2d21ac55
A
4215 struct nameidata nd;
4216 vfs_context_t ctx = vfs_context_current();
4217
6d2010ae
A
4218 NDINIT(&nd, DELETE, OP_UNLINK, AUDITVNPATH1, UIO_USERSPACE,
4219 uap->path, ctx);
316670eb 4220 return unlink1(ctx, &nd, VNODE_REMOVE_NODELETEBUSY);
1c79356b
A
4221}
4222
4223/*
4224 * Reposition read/write file offset.
4225 */
1c79356b 4226int
2d21ac55 4227lseek(proc_t p, struct lseek_args *uap, off_t *retval)
1c79356b 4228{
91447636 4229 struct fileproc *fp;
2d21ac55
A
4230 vnode_t vp;
4231 struct vfs_context *ctx;
91447636 4232 off_t offset = uap->offset, file_size;
1c79356b
A
4233 int error;
4234
91447636
A
4235 if ( (error = fp_getfvp(p,uap->fd, &fp, &vp)) ) {
4236 if (error == ENOTSUP)
4237 return (ESPIPE);
1c79356b 4238 return (error);
55e303ae 4239 }
91447636
A
4240 if (vnode_isfifo(vp)) {
4241 file_drop(uap->fd);
4242 return(ESPIPE);
4243 }
2d21ac55
A
4244
4245
4246 ctx = vfs_context_current();
4247#if CONFIG_MACF
4248 if (uap->whence == L_INCR && uap->offset == 0)
4249 error = mac_file_check_get_offset(vfs_context_ucred(ctx),
4250 fp->f_fglob);
4251 else
4252 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
4253 fp->f_fglob);
4254 if (error) {
4255 file_drop(uap->fd);
4256 return (error);
4257 }
4258#endif
91447636
A
4259 if ( (error = vnode_getwithref(vp)) ) {
4260 file_drop(uap->fd);
4261 return(error);
4262 }
4263
1c79356b
A
4264 switch (uap->whence) {
4265 case L_INCR:
91447636 4266 offset += fp->f_fglob->fg_offset;
1c79356b
A
4267 break;
4268 case L_XTND:
2d21ac55 4269 if ((error = vnode_size(vp, &file_size, ctx)) != 0)
55e303ae 4270 break;
91447636 4271 offset += file_size;
1c79356b
A
4272 break;
4273 case L_SET:
1c79356b
A
4274 break;
4275 default:
55e303ae 4276 error = EINVAL;
1c79356b 4277 }
55e303ae
A
4278 if (error == 0) {
4279 if (uap->offset > 0 && offset < 0) {
4280 /* Incremented/relative move past max size */
4281 error = EOVERFLOW;
4282 } else {
4283 /*
4284 * Allow negative offsets on character devices, per
4285 * POSIX 1003.1-2001. Most likely for writing disk
4286 * labels.
4287 */
4288 if (offset < 0 && vp->v_type != VCHR) {
4289 /* Decremented/relative move before start */
4290 error = EINVAL;
4291 } else {
4292 /* Success */
91447636
A
4293 fp->f_fglob->fg_offset = offset;
4294 *retval = fp->f_fglob->fg_offset;
55e303ae
A
4295 }
4296 }
4297 }
b0d623f7
A
4298
4299 /*
4300 * An lseek can affect whether data is "available to read." Use
4301 * hint of NOTE_NONE so no EVFILT_VNODE events fire
4302 */
4303 post_event_if_success(vp, error, NOTE_NONE);
91447636
A
4304 (void)vnode_put(vp);
4305 file_drop(uap->fd);
55e303ae 4306 return (error);
1c79356b
A
4307}
4308
91447636 4309
1c79356b 4310/*
91447636 4311 * Check access permissions.
2d21ac55
A
4312 *
4313 * Returns: 0 Success
4314 * vnode_authorize:???
1c79356b 4315 */
91447636
A
4316static int
4317access1(vnode_t vp, vnode_t dvp, int uflags, vfs_context_t ctx)
1c79356b 4318{
91447636 4319 kauth_action_t action;
1c79356b
A
4320 int error;
4321
91447636
A
4322 /*
4323 * If just the regular access bits, convert them to something
4324 * that vnode_authorize will understand.
4325 */
4326 if (!(uflags & _ACCESS_EXTENDED_MASK)) {
4327 action = 0;
4328 if (uflags & R_OK)
4329 action |= KAUTH_VNODE_READ_DATA; /* aka KAUTH_VNODE_LIST_DIRECTORY */
4330 if (uflags & W_OK) {
4331 if (vnode_isdir(vp)) {
4332 action |= KAUTH_VNODE_ADD_FILE |
4333 KAUTH_VNODE_ADD_SUBDIRECTORY;
4334 /* might want delete rights here too */
4335 } else {
4336 action |= KAUTH_VNODE_WRITE_DATA;
4337 }
4338 }
4339 if (uflags & X_OK) {
4340 if (vnode_isdir(vp)) {
4341 action |= KAUTH_VNODE_SEARCH;
4342 } else {
4343 action |= KAUTH_VNODE_EXECUTE;
4344 }
4345 }
4346 } else {
4347 /* take advantage of definition of uflags */
4348 action = uflags >> 8;
4349 }
4350
2d21ac55
A
4351#if CONFIG_MACF
4352 error = mac_vnode_check_access(ctx, vp, uflags);
4353 if (error)
4354 return (error);
4355#endif /* MAC */
4356
91447636
A
4357 /* action == 0 means only check for existence */
4358 if (action != 0) {
4359 error = vnode_authorize(vp, dvp, action | KAUTH_VNODE_ACCESS, ctx);
4360 } else {
4361 error = 0;
4362 }
4363
4364 return(error);
1c79356b 4365}
1c79356b 4366
91447636
A
4367
4368
2d21ac55 4369/*
b0d623f7 4370 * access_extended: Check access permissions in bulk.
2d21ac55 4371 *
b0d623f7
A
4372 * Description: uap->entries Pointer to an array of accessx
4373 * descriptor structs, plus one or
4374 * more NULL terminated strings (see
4375 * "Notes" section below).
4376 * uap->size Size of the area pointed to by
4377 * uap->entries.
4378 * uap->results Pointer to the results array.
2d21ac55
A
4379 *
4380 * Returns: 0 Success
4381 * ENOMEM Insufficient memory
4382 * EINVAL Invalid arguments
4383 * namei:EFAULT Bad address
4384 * namei:ENAMETOOLONG Filename too long
4385 * namei:ENOENT No such file or directory
4386 * namei:ELOOP Too many levels of symbolic links
4387 * namei:EBADF Bad file descriptor
4388 * namei:ENOTDIR Not a directory
4389 * namei:???
4390 * access1:
4391 *
4392 * Implicit returns:
4393 * uap->results Array contents modified
4394 *
4395 * Notes: The uap->entries are structured as an arbitrary length array
b0d623f7 4396 * of accessx descriptors, followed by one or more NULL terminated
2d21ac55
A
4397 * strings
4398 *
4399 * struct accessx_descriptor[0]
4400 * ...
4401 * struct accessx_descriptor[n]
4402 * char name_data[0];
4403 *
4404 * We determine the entry count by walking the buffer containing
b0d623f7 4405 * the uap->entries argument descriptor. For each descriptor we
2d21ac55
A
4406 * see, the valid values for the offset ad_name_offset will be
4407 * in the byte range:
4408 *
4409 * [ uap->entries + sizeof(struct accessx_descriptor) ]
4410 * to
4411 * [ uap->entries + uap->size - 2 ]
4412 *
4413 * since we must have at least one string, and the string must
b0d623f7 4414 * be at least one character plus the NULL terminator in length.
2d21ac55
A
4415 *
4416 * XXX: Need to support the check-as uid argument
4417 */
1c79356b 4418int
b0d623f7 4419access_extended(__unused proc_t p, struct access_extended_args *uap, __unused int32_t *retval)
1c79356b 4420{
2d21ac55
A
4421 struct accessx_descriptor *input = NULL;
4422 errno_t *result = NULL;
4423 errno_t error = 0;
4424 int wantdelete = 0;
4425 unsigned int desc_max, desc_actual, i, j;
91447636 4426 struct vfs_context context;
1c79356b 4427 struct nameidata nd;
91447636 4428 int niopts;
2d21ac55
A
4429 vnode_t vp = NULL;
4430 vnode_t dvp = NULL;
4431#define ACCESSX_MAX_DESCR_ON_STACK 10
4432 struct accessx_descriptor stack_input[ACCESSX_MAX_DESCR_ON_STACK];
91447636 4433
91447636
A
4434 context.vc_ucred = NULL;
4435
2d21ac55
A
4436 /*
4437 * Validate parameters; if valid, copy the descriptor array and string
4438 * arguments into local memory. Before proceeding, the following
4439 * conditions must have been met:
4440 *
4441 * o The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
4442 * o There must be sufficient room in the request for at least one
4443 * descriptor and a one yte NUL terminated string.
4444 * o The allocation of local storage must not fail.
4445 */
91447636
A
4446 if (uap->size > ACCESSX_MAX_TABLESIZE)
4447 return(ENOMEM);
2d21ac55 4448 if (uap->size < (sizeof(struct accessx_descriptor) + 2))
91447636 4449 return(EINVAL);
2d21ac55
A
4450 if (uap->size <= sizeof (stack_input)) {
4451 input = stack_input;
4452 } else {
91447636
A
4453 MALLOC(input, struct accessx_descriptor *, uap->size, M_TEMP, M_WAITOK);
4454 if (input == NULL) {
4455 error = ENOMEM;
4456 goto out;
4457 }
2d21ac55 4458 }
91447636 4459 error = copyin(uap->entries, input, uap->size);
55e303ae 4460 if (error)
91447636 4461 goto out;
1c79356b 4462
b0d623f7
A
4463 AUDIT_ARG(opaque, input, uap->size);
4464
91447636 4465 /*
2d21ac55
A
4466 * Force NUL termination of the copyin buffer to avoid nami() running
4467 * off the end. If the caller passes us bogus data, they may get a
4468 * bogus result.
4469 */
4470 ((char *)input)[uap->size - 1] = 0;
4471
4472 /*
4473 * Access is defined as checking against the process' real identity,
4474 * even if operations are checking the effective identity. This
4475 * requires that we use a local vfs context.
91447636
A
4476 */
4477 context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
2d21ac55 4478 context.vc_thread = current_thread();
91447636
A
4479
4480 /*
2d21ac55
A
4481 * Find out how many entries we have, so we can allocate the result
4482 * array by walking the list and adjusting the count downward by the
4483 * earliest string offset we see.
91447636 4484 */
2d21ac55
A
4485 desc_max = (uap->size - 2) / sizeof(struct accessx_descriptor);
4486 desc_actual = desc_max;
4487 for (i = 0; i < desc_actual; i++) {
91447636 4488 /*
2d21ac55
A
4489 * Take the offset to the name string for this entry and
4490 * convert to an input array index, which would be one off
4491 * the end of the array if this entry was the lowest-addressed
4492 * name string.
91447636
A
4493 */
4494 j = input[i].ad_name_offset / sizeof(struct accessx_descriptor);
2d21ac55
A
4495
4496 /*
4497 * An offset greater than the max allowable offset is an error.
4498 * It is also an error for any valid entry to point
4499 * to a location prior to the end of the current entry, if
4500 * it's not a reference to the string of the previous entry.
4501 */
4502 if (j > desc_max || (j != 0 && j <= i)) {
91447636
A
4503 error = EINVAL;
4504 goto out;
4505 }
2d21ac55
A
4506
4507 /*
4508 * An offset of 0 means use the previous descriptor's offset;
4509 * this is used to chain multiple requests for the same file
4510 * to avoid multiple lookups.
4511 */
91447636 4512 if (j == 0) {
2d21ac55 4513 /* This is not valid for the first entry */
91447636
A
4514 if (i == 0) {
4515 error = EINVAL;
4516 goto out;
4517 }
4518 continue;
4519 }
2d21ac55
A
4520
4521 /*
4522 * If the offset of the string for this descriptor is before
4523 * what we believe is the current actual last descriptor,
4524 * then we need to adjust our estimate downward; this permits
4525 * the string table following the last descriptor to be out
4526 * of order relative to the descriptor list.
4527 */
4528 if (j < desc_actual)
4529 desc_actual = j;
91447636 4530 }
2d21ac55
A
4531
4532 /*
4533 * We limit the actual number of descriptors we are willing to process
4534 * to a hard maximum of ACCESSX_MAX_DESCRIPTORS. If the number being
4535 * requested does not exceed this limit,
4536 */
4537 if (desc_actual > ACCESSX_MAX_DESCRIPTORS) {
91447636
A
4538 error = ENOMEM;
4539 goto out;
4540 }
2d21ac55 4541 MALLOC(result, errno_t *, desc_actual * sizeof(errno_t), M_TEMP, M_WAITOK);
91447636
A
4542 if (result == NULL) {
4543 error = ENOMEM;
4544 goto out;
4545 }
4546
4547 /*
2d21ac55
A
4548 * Do the work by iterating over the descriptor entries we know to
4549 * at least appear to contain valid data.
91447636
A
4550 */
4551 error = 0;
2d21ac55 4552 for (i = 0; i < desc_actual; i++) {
91447636 4553 /*
2d21ac55
A
4554 * If the ad_name_offset is 0, then we use the previous
4555 * results to make the check; otherwise, we are looking up
4556 * a new file name.
91447636
A
4557 */
4558 if (input[i].ad_name_offset != 0) {
4559 /* discard old vnodes */
4560 if (vp) {
4561 vnode_put(vp);
4562 vp = NULL;
4563 }
4564 if (dvp) {
4565 vnode_put(dvp);
4566 dvp = NULL;
4567 }
4568
2d21ac55
A
4569 /*
4570 * Scan forward in the descriptor list to see if we
4571 * need the parent vnode. We will need it if we are
4572 * deleting, since we must have rights to remove
4573 * entries in the parent directory, as well as the
4574 * rights to delete the object itself.
4575 */
91447636 4576 wantdelete = input[i].ad_flags & _DELETE_OK;
2d21ac55 4577 for (j = i + 1; (j < desc_actual) && (input[j].ad_name_offset == 0); j++)
91447636
A
4578 if (input[j].ad_flags & _DELETE_OK)
4579 wantdelete = 1;
4580
4581 niopts = FOLLOW | AUDITVNPATH1;
2d21ac55 4582
91447636
A
4583 /* need parent for vnode_authorize for deletion test */
4584 if (wantdelete)
4585 niopts |= WANTPARENT;
4586
4587 /* do the lookup */
6d2010ae
A
4588 NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, UIO_SYSSPACE,
4589 CAST_USER_ADDR_T(((const char *)input) + input[i].ad_name_offset),
4590 &context);
91447636
A
4591 error = namei(&nd);
4592 if (!error) {
4593 vp = nd.ni_vp;
4594 if (wantdelete)
4595 dvp = nd.ni_dvp;
4596 }
4597 nameidone(&nd);
4598 }
4599
4600 /*
4601 * Handle lookup errors.
4602 */
4603 switch(error) {
4604 case ENOENT:
4605 case EACCES:
4606 case EPERM:
4607 case ENOTDIR:
4608 result[i] = error;
4609 break;
4610 case 0:
4611 /* run this access check */
4612 result[i] = access1(vp, dvp, input[i].ad_flags, &context);
4613 break;
4614 default:
4615 /* fatal lookup error */
4616
4617 goto out;
4618 }
4619 }
4620
b0d623f7
A
4621 AUDIT_ARG(data, result, sizeof(errno_t), desc_actual);
4622
91447636 4623 /* copy out results */
2d21ac55 4624 error = copyout(result, uap->results, desc_actual * sizeof(errno_t));
91447636
A
4625
4626out:
2d21ac55 4627 if (input && input != stack_input)
91447636
A
4628 FREE(input, M_TEMP);
4629 if (result)
4630 FREE(result, M_TEMP);
4631 if (vp)
4632 vnode_put(vp);
4633 if (dvp)
4634 vnode_put(dvp);
0c530ab8
A
4635 if (IS_VALID_CRED(context.vc_ucred))
4636 kauth_cred_unref(&context.vc_ucred);
91447636 4637 return(error);
1c79356b
A
4638}
4639
2d21ac55
A
4640
4641/*
4642 * Returns: 0 Success
4643 * namei:EFAULT Bad address
4644 * namei:ENAMETOOLONG Filename too long
4645 * namei:ENOENT No such file or directory
4646 * namei:ELOOP Too many levels of symbolic links
4647 * namei:EBADF Bad file descriptor
4648 * namei:ENOTDIR Not a directory
4649 * namei:???
4650 * access1:
4651 */
1c79356b 4652int
b0d623f7 4653access(__unused proc_t p, struct access_args *uap, __unused int32_t *retval)
1c79356b 4654{
1c79356b
A
4655 int error;
4656 struct nameidata nd;
91447636
A
4657 int niopts;
4658 struct vfs_context context;
cf7d32b8
A
4659#if NAMEDRSRCFORK
4660 int is_namedstream = 0;
4661#endif
4662
91447636
A
4663 /*
4664 * Access is defined as checking against the process'
4665 * real identity, even if operations are checking the
4666 * effective identity. So we need to tweak the credential
4667 * in the context.
4668 */
4669 context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
2d21ac55 4670 context.vc_thread = current_thread();
91447636
A
4671
4672 niopts = FOLLOW | AUDITVNPATH1;
4673 /* need parent for vnode_authorize for deletion test */
4674 if (uap->flags & _DELETE_OK)
4675 niopts |= WANTPARENT;
6d2010ae
A
4676 NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, UIO_USERSPACE,
4677 uap->path, &context);
2d21ac55
A
4678
4679#if NAMEDRSRCFORK
4680 /* access(F_OK) calls are allowed for resource forks. */
4681 if (uap->flags == F_OK)
4682 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
4683#endif
91447636
A
4684 error = namei(&nd);
4685 if (error)
4686 goto out;
4687
cf7d32b8 4688#if NAMEDRSRCFORK
b0d623f7
A
4689 /* Grab reference on the shadow stream file vnode to
4690 * force an inactive on release which will mark it
4691 * for recycle.
cf7d32b8
A
4692 */
4693 if (vnode_isnamedstream(nd.ni_vp) &&
b0d623f7
A
4694 (nd.ni_vp->v_parent != NULLVP) &&
4695 vnode_isshadow(nd.ni_vp)) {
cf7d32b8
A
4696 is_namedstream = 1;
4697 vnode_ref(nd.ni_vp);
4698 }
4699#endif
4700
91447636 4701 error = access1(nd.ni_vp, nd.ni_dvp, uap->flags, &context);
b0d623f7 4702
cf7d32b8
A
4703#if NAMEDRSRCFORK
4704 if (is_namedstream) {
4705 vnode_rele(nd.ni_vp);
4706 }
4707#endif
4708
91447636
A
4709 vnode_put(nd.ni_vp);
4710 if (uap->flags & _DELETE_OK)
4711 vnode_put(nd.ni_dvp);
4712 nameidone(&nd);
4713
4714out:
0c530ab8 4715 kauth_cred_unref(&context.vc_ucred);
91447636
A
4716 return(error);
4717}
4718
4719
2d21ac55
A
4720/*
4721 * Returns: 0 Success
4722 * EFAULT
4723 * copyout:EFAULT
4724 * namei:???
4725 * vn_stat:???
4726 */
91447636 4727static int
2d21ac55 4728stat2(vfs_context_t ctx, struct nameidata *ndp, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
91447636 4729{
b0d623f7
A
4730 union {
4731 struct stat sb;
4732 struct stat64 sb64;
4733 } source;
4734 union {
4735 struct user64_stat user64_sb;
4736 struct user32_stat user32_sb;
4737 struct user64_stat64 user64_sb64;
4738 struct user32_stat64 user32_sb64;
4739 } dest;
91447636
A
4740 caddr_t sbp;
4741 int error, my_size;
4742 kauth_filesec_t fsec;
4743 size_t xsecurity_bufsize;
2d21ac55 4744 void * statptr;
1c79356b 4745
2d21ac55 4746#if NAMEDRSRCFORK
cf7d32b8 4747 int is_namedstream = 0;
2d21ac55
A
4748 /* stat calls are allowed for resource forks. */
4749 ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
4750#endif
91447636
A
4751 error = namei(ndp);
4752 if (error)
1c79356b 4753 return (error);
91447636 4754 fsec = KAUTH_FILESEC_NONE;
b0d623f7
A
4755
4756 statptr = (void *)&source;
cf7d32b8
A
4757
4758#if NAMEDRSRCFORK
b0d623f7
A
4759 /* Grab reference on the shadow stream file vnode to
4760 * force an inactive on release which will mark it
4761 * for recycle.
cf7d32b8
A
4762 */
4763 if (vnode_isnamedstream(ndp->ni_vp) &&
b0d623f7
A
4764 (ndp->ni_vp->v_parent != NULLVP) &&
4765 vnode_isshadow(ndp->ni_vp)) {
cf7d32b8 4766 is_namedstream = 1;
b0d623f7 4767 vnode_ref(ndp->ni_vp);
cf7d32b8
A
4768 }
4769#endif
4770
2d21ac55
A
4771 error = vn_stat(ndp->ni_vp, statptr, (xsecurity != USER_ADDR_NULL ? &fsec : NULL), isstat64, ctx);
4772
4773#if NAMEDRSRCFORK
cf7d32b8 4774 if (is_namedstream) {
b0d623f7 4775 vnode_rele(ndp->ni_vp);
2d21ac55
A
4776 }
4777#endif
91447636
A
4778 vnode_put(ndp->ni_vp);
4779 nameidone(ndp);
4780
1c79356b
A
4781 if (error)
4782 return (error);
91447636 4783 /* Zap spare fields */
2d21ac55 4784 if (isstat64 != 0) {
b0d623f7
A
4785 source.sb64.st_lspare = 0;
4786 source.sb64.st_qspare[0] = 0LL;
4787 source.sb64.st_qspare[1] = 0LL;
2d21ac55 4788 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
b0d623f7
A
4789 munge_user64_stat64(&source.sb64, &dest.user64_sb64);
4790 my_size = sizeof(dest.user64_sb64);
4791 sbp = (caddr_t)&dest.user64_sb64;
2d21ac55 4792 } else {
b0d623f7
A
4793 munge_user32_stat64(&source.sb64, &dest.user32_sb64);
4794 my_size = sizeof(dest.user32_sb64);
4795 sbp = (caddr_t)&dest.user32_sb64;
2d21ac55
A
4796 }
4797 /*
4798 * Check if we raced (post lookup) against the last unlink of a file.
4799 */
b0d623f7
A
4800 if ((source.sb64.st_nlink == 0) && S_ISREG(source.sb64.st_mode)) {
4801 source.sb64.st_nlink = 1;
2d21ac55
A
4802 }
4803 } else {
b0d623f7
A
4804 source.sb.st_lspare = 0;
4805 source.sb.st_qspare[0] = 0LL;
4806 source.sb.st_qspare[1] = 0LL;
2d21ac55 4807 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
b0d623f7
A
4808 munge_user64_stat(&source.sb, &dest.user64_sb);
4809 my_size = sizeof(dest.user64_sb);
4810 sbp = (caddr_t)&dest.user64_sb;
2d21ac55 4811 } else {
b0d623f7
A
4812 munge_user32_stat(&source.sb, &dest.user32_sb);
4813 my_size = sizeof(dest.user32_sb);
4814 sbp = (caddr_t)&dest.user32_sb;
2d21ac55
A
4815 }
4816
4817 /*
4818 * Check if we raced (post lookup) against the last unlink of a file.
4819 */
b0d623f7
A
4820 if ((source.sb.st_nlink == 0) && S_ISREG(source.sb.st_mode)) {
4821 source.sb.st_nlink = 1;
2d21ac55 4822 }
91447636
A
4823 }
4824 if ((error = copyout(sbp, ub, my_size)) != 0)
4825 goto out;
4826
4827 /* caller wants extended security information? */
4828 if (xsecurity != USER_ADDR_NULL) {
4829
4830 /* did we get any? */
4831 if (fsec == KAUTH_FILESEC_NONE) {
4832 if (susize(xsecurity_size, 0) != 0) {
4833 error = EFAULT;
4834 goto out;
4835 }
4836 } else {
4837 /* find the user buffer size */
4838 xsecurity_bufsize = fusize(xsecurity_size);
4839
4840 /* copy out the actual data size */
4841 if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
4842 error = EFAULT;
4843 goto out;
4844 }
4845
4846 /* if the caller supplied enough room, copy out to it */
4847 if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec))
4848 error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
4849 }
4850 }
4851out:
4852 if (fsec != KAUTH_FILESEC_NONE)
4853 kauth_filesec_free(fsec);
1c79356b
A
4854 return (error);
4855}
4856
4857/*
91447636 4858 * Get file status; this version follows links.
2d21ac55
A
4859 *
4860 * Returns: 0 Success
4861 * stat2:??? [see stat2() in this file]
1c79356b 4862 */
91447636 4863static int
2d21ac55 4864stat1(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
1c79356b 4865{
1c79356b 4866 struct nameidata nd;
2d21ac55 4867 vfs_context_t ctx = vfs_context_current();
1c79356b 4868
6d2010ae 4869 NDINIT(&nd, LOOKUP, OP_GETATTR, NOTRIGGER | FOLLOW | AUDITVNPATH1,
2d21ac55
A
4870 UIO_USERSPACE, path, ctx);
4871 return(stat2(ctx, &nd, ub, xsecurity, xsecurity_size, isstat64));
4872}
91447636 4873
b0d623f7
A
4874/*
4875 * stat_extended: Get file status; with extended security (ACL).
4876 *
4877 * Parameters: p (ignored)
4878 * uap User argument descriptor (see below)
4879 * retval (ignored)
4880 *
4881 * Indirect: uap->path Path of file to get status from
4882 * uap->ub User buffer (holds file status info)
4883 * uap->xsecurity ACL to get (extended security)
4884 * uap->xsecurity_size Size of ACL
4885 *
4886 * Returns: 0 Success
4887 * !0 errno value
4888 *
4889 */
2d21ac55 4890int
b0d623f7 4891stat_extended(__unused proc_t p, struct stat_extended_args *uap, __unused int32_t *retval)
2d21ac55
A
4892{
4893 return (stat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 0));
1c79356b
A
4894}
4895
2d21ac55
A
4896/*
4897 * Returns: 0 Success
4898 * stat1:??? [see stat1() in this file]
4899 */
91447636 4900int
b0d623f7 4901stat(__unused proc_t p, struct stat_args *uap, __unused int32_t *retval)
1c79356b 4902{
2d21ac55 4903 return(stat1(uap->path, uap->ub, 0, 0, 0));
91447636 4904}
1c79356b 4905
91447636 4906int
b0d623f7 4907stat64(__unused proc_t p, struct stat64_args *uap, __unused int32_t *retval)
91447636 4908{
2d21ac55 4909 return(stat1(uap->path, uap->ub, 0, 0, 1));
1c79356b 4910}
1c79356b 4911
b0d623f7
A
4912/*
4913 * stat64_extended: Get file status; can handle large inode numbers; with extended security (ACL).
4914 *
4915 * Parameters: p (ignored)
4916 * uap User argument descriptor (see below)
4917 * retval (ignored)
4918 *
4919 * Indirect: uap->path Path of file to get status from
4920 * uap->ub User buffer (holds file status info)
4921 * uap->xsecurity ACL to get (extended security)
4922 * uap->xsecurity_size Size of ACL
4923 *
4924 * Returns: 0 Success
4925 * !0 errno value
4926 *
4927 */
2d21ac55 4928int
b0d623f7 4929stat64_extended(__unused proc_t p, struct stat64_extended_args *uap, __unused int32_t *retval)
2d21ac55
A
4930{
4931 return (stat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 1));
4932}
55e303ae 4933/*
91447636 4934 * Get file status; this version does not follow links.
55e303ae 4935 */
91447636 4936static int
2d21ac55 4937lstat1(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
91447636
A
4938{
4939 struct nameidata nd;
2d21ac55 4940 vfs_context_t ctx = vfs_context_current();
91447636 4941
6d2010ae 4942 NDINIT(&nd, LOOKUP, OP_GETATTR, NOTRIGGER | NOFOLLOW | AUDITVNPATH1,
2d21ac55 4943 UIO_USERSPACE, path, ctx);
91447636 4944
2d21ac55
A
4945 return(stat2(ctx, &nd, ub, xsecurity, xsecurity_size, isstat64));
4946}
91447636 4947
b0d623f7
A
4948/*
4949 * lstat_extended: Get file status; does not follow links; with extended security (ACL).
4950 *
4951 * Parameters: p (ignored)
4952 * uap User argument descriptor (see below)
4953 * retval (ignored)
4954 *
4955 * Indirect: uap->path Path of file to get status from
4956 * uap->ub User buffer (holds file status info)
4957 * uap->xsecurity ACL to get (extended security)
4958 * uap->xsecurity_size Size of ACL
4959 *
4960 * Returns: 0 Success
4961 * !0 errno value
4962 *
4963 */
2d21ac55 4964int
b0d623f7 4965lstat_extended(__unused proc_t p, struct lstat_extended_args *uap, __unused int32_t *retval)
2d21ac55
A
4966{
4967 return (lstat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 0));
91447636
A
4968}
4969
4970int
b0d623f7 4971lstat(__unused proc_t p, struct lstat_args *uap, __unused int32_t *retval)
91447636 4972{
2d21ac55
A
4973 return(lstat1(uap->path, uap->ub, 0, 0, 0));
4974}
b0d623f7 4975
2d21ac55 4976int
b0d623f7 4977lstat64(__unused proc_t p, struct lstat64_args *uap, __unused int32_t *retval)
2d21ac55
A
4978{
4979 return(lstat1(uap->path, uap->ub, 0, 0, 1));
91447636
A
4980}
4981
b0d623f7
A
4982/*
4983 * lstat64_extended: Get file status; can handle large inode numbers; does not
4984 * follow links; with extended security (ACL).
4985 *
4986 * Parameters: p (ignored)
4987 * uap User argument descriptor (see below)
4988 * retval (ignored)
4989 *
4990 * Indirect: uap->path Path of file to get status from
4991 * uap->ub User buffer (holds file status info)
4992 * uap->xsecurity ACL to get (extended security)
4993 * uap->xsecurity_size Size of ACL
4994 *
4995 * Returns: 0 Success
4996 * !0 errno value
4997 *
4998 */
91447636 4999int
b0d623f7 5000lstat64_extended(__unused proc_t p, struct lstat64_extended_args *uap, __unused int32_t *retval)
91447636 5001{
2d21ac55 5002 return (lstat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 1));
91447636
A
5003}
5004
1c79356b 5005/*
91447636 5006 * Get configurable pathname variables.
2d21ac55
A
5007 *
5008 * Returns: 0 Success
5009 * namei:???
5010 * vn_pathconf:???
5011 *
5012 * Notes: Global implementation constants are intended to be
5013 * implemented in this function directly; all other constants
5014 * are per-FS implementation, and therefore must be handled in
5015 * each respective FS, instead.
5016 *
5017 * XXX We implement some things globally right now that should actually be
5018 * XXX per-FS; we will need to deal with this at some point.
1c79356b 5019 */
1c79356b
A
5020/* ARGSUSED */
5021int
b0d623f7 5022pathconf(__unused proc_t p, struct pathconf_args *uap, int32_t *retval)
1c79356b 5023{
1c79356b
A
5024 int error;
5025 struct nameidata nd;
2d21ac55 5026 vfs_context_t ctx = vfs_context_current();
91447636 5027
6d2010ae 5028 NDINIT(&nd, LOOKUP, OP_PATHCONF, FOLLOW | AUDITVNPATH1,
2d21ac55 5029 UIO_USERSPACE, uap->path, ctx);
55e303ae
A
5030 error = namei(&nd);
5031 if (error)
1c79356b 5032 return (error);
1c79356b 5033
2d21ac55 5034 error = vn_pathconf(nd.ni_vp, uap->name, retval, ctx);
1c79356b 5035
91447636
A
5036 vnode_put(nd.ni_vp);
5037 nameidone(&nd);
1c79356b
A
5038 return (error);
5039}
5040
5041/*
5042 * Return target name of a symbolic link.
5043 */
1c79356b
A
5044/* ARGSUSED */
5045int
b0d623f7 5046readlink(proc_t p, struct readlink_args *uap, int32_t *retval)
1c79356b 5047{
2d21ac55 5048 vnode_t vp;
91447636
A
5049 uio_t auio;
5050 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
1c79356b
A
5051 int error;
5052 struct nameidata nd;
2d21ac55 5053 vfs_context_t ctx = vfs_context_current();
91447636
A
5054 char uio_buf[ UIO_SIZEOF(1) ];
5055
6d2010ae 5056 NDINIT(&nd, LOOKUP, OP_READLINK, NOFOLLOW | AUDITVNPATH1,
2d21ac55 5057 UIO_USERSPACE, uap->path, ctx);
55e303ae
A
5058 error = namei(&nd);
5059 if (error)
1c79356b
A
5060 return (error);
5061 vp = nd.ni_vp;
91447636
A
5062
5063 nameidone(&nd);
5064
5065 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
5066 &uio_buf[0], sizeof(uio_buf));
5067 uio_addiov(auio, uap->buf, uap->count);
1c79356b
A
5068 if (vp->v_type != VLNK)
5069 error = EINVAL;
5070 else {
2d21ac55
A
5071#if CONFIG_MACF
5072 error = mac_vnode_check_readlink(ctx,
5073 vp);
5074#endif
5075 if (error == 0)
5076 error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, ctx);
91447636 5077 if (error == 0)
2d21ac55 5078 error = VNOP_READLINK(vp, auio, ctx);
91447636
A
5079 }
5080 vnode_put(vp);
b0d623f7
A
5081
5082 /* Safe: uio_resid() is bounded above by "count", and "count" is an int */
91447636 5083 *retval = uap->count - (int)uio_resid(auio);
1c79356b
A
5084 return (error);
5085}
5086
91447636
A
5087/*
5088 * Change file flags.
5089 */
5090static int
5091chflags1(vnode_t vp, int flags, vfs_context_t ctx)
5092{
5093 struct vnode_attr va;
5094 kauth_action_t action;
5095 int error;
5096
5097 VATTR_INIT(&va);
5098 VATTR_SET(&va, va_flags, flags);
5099
2d21ac55
A
5100#if CONFIG_MACF
5101 error = mac_vnode_check_setflags(ctx, vp, flags);
5102 if (error)
5103 goto out;
5104#endif
5105
91447636
A
5106 /* request authorisation, disregard immutability */
5107 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
5108 goto out;
5109 /*
5110 * Request that the auth layer disregard those file flags it's allowed to when
5111 * authorizing this operation; we need to do this in order to be able to
5112 * clear immutable flags.
5113 */
5114 if (action && ((error = vnode_authorize(vp, NULL, action | KAUTH_VNODE_NOIMMUTABLE, ctx)) != 0))
5115 goto out;
5116 error = vnode_setattr(vp, &va, ctx);
5117
2d21ac55
A
5118 if ((error == 0) && !VATTR_IS_SUPPORTED(&va, va_flags)) {
5119 error = ENOTSUP;
5120 }
91447636
A
5121out:
5122 vnode_put(vp);
5123 return(error);
5124}
5125
1c79356b
A
5126/*
5127 * Change flags of a file given a path name.
5128 */
1c79356b
A
5129/* ARGSUSED */
5130int
b0d623f7 5131chflags(__unused proc_t p, struct chflags_args *uap, __unused int32_t *retval)
1c79356b 5132{
2d21ac55
A
5133 vnode_t vp;
5134 vfs_context_t ctx = vfs_context_current();
1c79356b
A
5135 int error;
5136 struct nameidata nd;
5137
55e303ae 5138 AUDIT_ARG(fflags, uap->flags);
6d2010ae 5139 NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
2d21ac55 5140 UIO_USERSPACE, uap->path, ctx);
55e303ae
A
5141 error = namei(&nd);
5142 if (error)
1c79356b
A
5143 return (error);
5144 vp = nd.ni_vp;
91447636
A
5145 nameidone(&nd);
5146
2d21ac55 5147 error = chflags1(vp, uap->flags, ctx);
91447636
A
5148
5149 return(error);
1c79356b
A
5150}
5151
5152/*
5153 * Change flags of a file given a file descriptor.
5154 */
1c79356b
A
5155/* ARGSUSED */
5156int
b0d623f7 5157fchflags(__unused proc_t p, struct fchflags_args *uap, __unused int32_t *retval)
1c79356b 5158{
2d21ac55 5159 vnode_t vp;
1c79356b
A
5160 int error;
5161
55e303ae
A
5162 AUDIT_ARG(fd, uap->fd);
5163 AUDIT_ARG(fflags, uap->flags);
91447636 5164 if ( (error = file_vnode(uap->fd, &vp)) )
1c79356b 5165 return (error);
55e303ae 5166
91447636
A
5167 if ((error = vnode_getwithref(vp))) {
5168 file_drop(uap->fd);
5169 return(error);
5170 }
e5568f75
A
5171
5172 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5173
2d21ac55 5174 error = chflags1(vp, uap->flags, vfs_context_current());
91447636
A
5175
5176 file_drop(uap->fd);
5177 return (error);
5178}
5179
5180/*
5181 * Change security information on a filesystem object.
2d21ac55
A
5182 *
5183 * Returns: 0 Success
5184 * EPERM Operation not permitted
5185 * vnode_authattr:??? [anything vnode_authattr can return]
5186 * vnode_authorize:??? [anything vnode_authorize can return]
5187 * vnode_setattr:??? [anything vnode_setattr can return]
5188 *
5189 * Notes: If vnode_authattr or vnode_authorize return EACCES, it will be
5190 * translated to EPERM before being returned.
91447636
A
5191 */
5192static int
2d21ac55 5193chmod2(vfs_context_t ctx, vnode_t vp, struct vnode_attr *vap)
91447636
A
5194{
5195 kauth_action_t action;
5196 int error;
5197
b0d623f7
A
5198 AUDIT_ARG(mode, vap->va_mode);
5199 /* XXX audit new args */
91447636 5200
2d21ac55
A
5201#if NAMEDSTREAMS
5202 /* chmod calls are not allowed for resource forks. */
5203 if (vp->v_flag & VISNAMEDSTREAM) {
5204 return (EPERM);
5205 }
5206#endif
5207
5208#if CONFIG_MACF
316670eb
A
5209 if (VATTR_IS_ACTIVE(vap, va_mode) &&
5210 (error = mac_vnode_check_setmode(ctx, vp, (mode_t)vap->va_mode)) != 0)
2d21ac55
A
5211 return (error);
5212#endif
5213
91447636
A
5214 /* make sure that the caller is allowed to set this security information */
5215 if (((error = vnode_authattr(vp, vap, &action, ctx)) != 0) ||
5216 ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
5217 if (error == EACCES)
5218 error = EPERM;
5219 return(error);
5220 }
5221
5222 error = vnode_setattr(vp, vap, ctx);
5223
1c79356b
A
5224 return (error);
5225}
5226
91447636 5227
1c79356b 5228/*
b0d623f7 5229 * Change mode of a file given a path name.
2d21ac55
A
5230 *
5231 * Returns: 0 Success
5232 * namei:??? [anything namei can return]
5233 * chmod2:??? [anything chmod2 can return]
1c79356b 5234 */
91447636
A
5235static int
5236chmod1(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap)
5237{
5238 struct nameidata nd;
5239 int error;
5240
6d2010ae 5241 NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
91447636
A
5242 UIO_USERSPACE, path, ctx);
5243 if ((error = namei(&nd)))
5244 return (error);
5245 error = chmod2(ctx, nd.ni_vp, vap);
5246 vnode_put(nd.ni_vp);
5247 nameidone(&nd);
5248 return(error);
5249}
5250
0c530ab8 5251/*
b0d623f7
A
5252 * chmod_extended: Change the mode of a file given a path name; with extended
5253 * argument list (including extended security (ACL)).
0c530ab8
A
5254 *
5255 * Parameters: p Process requesting the open
5256 * uap User argument descriptor (see below)
5257 * retval (ignored)
5258 *
5259 * Indirect: uap->path Path to object (same as 'chmod')
5260 * uap->uid UID to set
5261 * uap->gid GID to set
5262 * uap->mode File mode to set (same as 'chmod')
5263 * uap->xsecurity ACL to set (or delete)
5264 *
5265 * Returns: 0 Success
5266 * !0 errno value
5267 *
5268 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
5269 *
5270 * XXX: We should enummerate the possible errno values here, and where
5271 * in the code they originated.
5272 */
1c79356b 5273int
b0d623f7 5274chmod_extended(__unused proc_t p, struct chmod_extended_args *uap, __unused int32_t *retval)
1c79356b 5275{
1c79356b 5276 int error;
91447636
A
5277 struct vnode_attr va;
5278 kauth_filesec_t xsecdst;
5279
b0d623f7
A
5280 AUDIT_ARG(owner, uap->uid, uap->gid);
5281
91447636
A
5282 VATTR_INIT(&va);
5283 if (uap->mode != -1)
5284 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
5285 if (uap->uid != KAUTH_UID_NONE)
5286 VATTR_SET(&va, va_uid, uap->uid);
5287 if (uap->gid != KAUTH_GID_NONE)
5288 VATTR_SET(&va, va_gid, uap->gid);
5289
5290 xsecdst = NULL;
5291 switch(uap->xsecurity) {
5292 /* explicit remove request */
5293 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
5294 VATTR_SET(&va, va_acl, NULL);
5295 break;
5296 /* not being set */
5297 case USER_ADDR_NULL:
5298 break;
5299 default:
5300 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
5301 return(error);
5302 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
5303 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va.va_acl->acl_entrycount);
5304 }
1c79356b 5305
2d21ac55 5306 error = chmod1(vfs_context_current(), uap->path, &va);
55e303ae 5307
91447636
A
5308 if (xsecdst != NULL)
5309 kauth_filesec_free(xsecdst);
5310 return(error);
5311}
4a249263 5312
2d21ac55
A
5313/*
5314 * Returns: 0 Success
5315 * chmod1:??? [anything chmod1 can return]
5316 */
91447636 5317int
b0d623f7 5318chmod(__unused proc_t p, struct chmod_args *uap, __unused int32_t *retval)
91447636 5319{
91447636
A
5320 struct vnode_attr va;
5321
5322 VATTR_INIT(&va);
5323 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
5324
2d21ac55 5325 return(chmod1(vfs_context_current(), uap->path, &va));
1c79356b
A
5326}
5327
5328/*
5329 * Change mode of a file given a file descriptor.
5330 */
91447636 5331static int
2d21ac55 5332fchmod1(__unused proc_t p, int fd, struct vnode_attr *vap)
1c79356b 5333{
2d21ac55 5334 vnode_t vp;
1c79356b 5335 int error;
55e303ae 5336
91447636 5337 AUDIT_ARG(fd, fd);
55e303ae 5338
91447636
A
5339 if ((error = file_vnode(fd, &vp)) != 0)
5340 return (error);
5341 if ((error = vnode_getwithref(vp)) != 0) {
5342 file_drop(fd);
5343 return(error);
5344 }
55e303ae
A
5345 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5346
2d21ac55 5347 error = chmod2(vfs_context_current(), vp, vap);
91447636
A
5348 (void)vnode_put(vp);
5349 file_drop(fd);
55e303ae 5350
1c79356b
A
5351 return (error);
5352}
5353
b0d623f7
A
5354/*
5355 * fchmod_extended: Change mode of a file given a file descriptor; with
5356 * extended argument list (including extended security (ACL)).
5357 *
5358 * Parameters: p Process requesting to change file mode
5359 * uap User argument descriptor (see below)
5360 * retval (ignored)
5361 *
5362 * Indirect: uap->mode File mode to set (same as 'chmod')
5363 * uap->uid UID to set
5364 * uap->gid GID to set
5365 * uap->xsecurity ACL to set (or delete)
5366 * uap->fd File descriptor of file to change mode
5367 *
5368 * Returns: 0 Success
5369 * !0 errno value
5370 *
5371 */
91447636 5372int
b0d623f7 5373fchmod_extended(proc_t p, struct fchmod_extended_args *uap, __unused int32_t *retval)
91447636
A
5374{
5375 int error;
5376 struct vnode_attr va;
5377 kauth_filesec_t xsecdst;
5378
b0d623f7
A
5379 AUDIT_ARG(owner, uap->uid, uap->gid);
5380
91447636
A
5381 VATTR_INIT(&va);
5382 if (uap->mode != -1)
5383 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
5384 if (uap->uid != KAUTH_UID_NONE)
5385 VATTR_SET(&va, va_uid, uap->uid);
5386 if (uap->gid != KAUTH_GID_NONE)
5387 VATTR_SET(&va, va_gid, uap->gid);
5388
5389 xsecdst = NULL;
5390 switch(uap->xsecurity) {
5391 case USER_ADDR_NULL:
5392 VATTR_SET(&va, va_acl, NULL);
5393 break;
39236c6e
A
5394 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
5395 VATTR_SET(&va, va_acl, NULL);
5396 break;
5397 /* not being set */
91447636
A
5398 case CAST_USER_ADDR_T(-1):
5399 break;
5400 default:
5401 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
5402 return(error);
5403 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
5404 }
5405
5406 error = fchmod1(p, uap->fd, &va);
5407
5408
5409 switch(uap->xsecurity) {
5410 case USER_ADDR_NULL:
5411 case CAST_USER_ADDR_T(-1):
5412 break;
5413 default:
5414 if (xsecdst != NULL)
5415 kauth_filesec_free(xsecdst);
5416 }
5417 return(error);
5418}
5419
5420int
b0d623f7 5421fchmod(proc_t p, struct fchmod_args *uap, __unused int32_t *retval)
91447636
A
5422{
5423 struct vnode_attr va;
5424
5425 VATTR_INIT(&va);
5426 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
5427
5428 return(fchmod1(p, uap->fd, &va));
5429}
5430
5431
1c79356b
A
5432/*
5433 * Set ownership given a path name.
5434 */
1c79356b 5435/* ARGSUSED */
91447636 5436static int
b0d623f7 5437chown1(vfs_context_t ctx, struct chown_args *uap, __unused int32_t *retval, int follow)
1c79356b 5438{
2d21ac55 5439 vnode_t vp;
91447636 5440 struct vnode_attr va;
1c79356b
A
5441 int error;
5442 struct nameidata nd;
91447636 5443 kauth_action_t action;
1c79356b 5444
55e303ae
A
5445 AUDIT_ARG(owner, uap->uid, uap->gid);
5446
6d2010ae
A
5447 NDINIT(&nd, LOOKUP, OP_SETATTR,
5448 (follow ? FOLLOW : 0) | NOTRIGGER | AUDITVNPATH1,
91447636 5449 UIO_USERSPACE, uap->path, ctx);
55e303ae
A
5450 error = namei(&nd);
5451 if (error)
1c79356b
A
5452 return (error);
5453 vp = nd.ni_vp;
5454
91447636
A
5455 nameidone(&nd);
5456
91447636
A
5457 VATTR_INIT(&va);
5458 if (uap->uid != VNOVAL)
5459 VATTR_SET(&va, va_uid, uap->uid);
5460 if (uap->gid != VNOVAL)
5461 VATTR_SET(&va, va_gid, uap->gid);
5462
2d21ac55
A
5463#if CONFIG_MACF
5464 error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
5465 if (error)
5466 goto out;
5467#endif
5468
91447636
A
5469 /* preflight and authorize attribute changes */
5470 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
5471 goto out;
5472 if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
5473 goto out;
5474 error = vnode_setattr(vp, &va, ctx);
5475
5476out:
5477 /*
5478 * EACCES is only allowed from namei(); permissions failure should
5479 * return EPERM, so we need to translate the error code.
5480 */
5481 if (error == EACCES)
5482 error = EPERM;
1c79356b 5483
91447636 5484 vnode_put(vp);
1c79356b
A
5485 return (error);
5486}
5487
91447636 5488int
b0d623f7 5489chown(__unused proc_t p, struct chown_args *uap, int32_t *retval)
91447636 5490{
2d21ac55 5491 return chown1(vfs_context_current(), uap, retval, 1);
91447636
A
5492}
5493
5494int
b0d623f7 5495lchown(__unused proc_t p, struct lchown_args *uap, int32_t *retval)
91447636 5496{
91447636 5497 /* Argument list identical, but machine generated; cast for chown1() */
2d21ac55 5498 return chown1(vfs_context_current(), (struct chown_args *)uap, retval, 0);
91447636
A
5499}
5500
1c79356b
A
5501/*
5502 * Set ownership given a file descriptor.
5503 */
1c79356b
A
5504/* ARGSUSED */
5505int
b0d623f7 5506fchown(__unused proc_t p, struct fchown_args *uap, __unused int32_t *retval)
1c79356b 5507{
91447636 5508 struct vnode_attr va;
2d21ac55
A
5509 vfs_context_t ctx = vfs_context_current();
5510 vnode_t vp;
1c79356b 5511 int error;
91447636 5512 kauth_action_t action;
1c79356b 5513
55e303ae
A
5514 AUDIT_ARG(owner, uap->uid, uap->gid);
5515 AUDIT_ARG(fd, uap->fd);
5516
91447636 5517 if ( (error = file_vnode(uap->fd, &vp)) )
1c79356b 5518 return (error);
55e303ae 5519
91447636
A
5520 if ( (error = vnode_getwithref(vp)) ) {
5521 file_drop(uap->fd);
5522 return(error);
5523 }
55e303ae
A
5524 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5525
91447636
A
5526 VATTR_INIT(&va);
5527 if (uap->uid != VNOVAL)
5528 VATTR_SET(&va, va_uid, uap->uid);
5529 if (uap->gid != VNOVAL)
5530 VATTR_SET(&va, va_gid, uap->gid);
5531
2d21ac55
A
5532#if NAMEDSTREAMS
5533 /* chown calls are not allowed for resource forks. */
5534 if (vp->v_flag & VISNAMEDSTREAM) {
5535 error = EPERM;
5536 goto out;
5537 }
5538#endif
5539
5540#if CONFIG_MACF
5541 error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
5542 if (error)
5543 goto out;
5544#endif
91447636
A
5545
5546 /* preflight and authorize attribute changes */
2d21ac55 5547 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
91447636 5548 goto out;
2d21ac55 5549 if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
91447636
A
5550 if (error == EACCES)
5551 error = EPERM;
5552 goto out;
5553 }
2d21ac55 5554 error = vnode_setattr(vp, &va, ctx);
4a249263 5555
91447636
A
5556out:
5557 (void)vnode_put(vp);
5558 file_drop(uap->fd);
1c79356b
A
5559 return (error);
5560}
5561
9bccf70c 5562static int
2d21ac55 5563getutimes(user_addr_t usrtvp, struct timespec *tsp)
9bccf70c 5564{
9bccf70c
A
5565 int error;
5566
91447636
A
5567 if (usrtvp == USER_ADDR_NULL) {
5568 struct timeval old_tv;
5569 /* XXX Y2038 bug because of microtime argument */
5570 microtime(&old_tv);
5571 TIMEVAL_TO_TIMESPEC(&old_tv, &tsp[0]);
9bccf70c
A
5572 tsp[1] = tsp[0];
5573 } else {
91447636 5574 if (IS_64BIT_PROCESS(current_proc())) {
b0d623f7 5575 struct user64_timeval tv[2];
91447636 5576 error = copyin(usrtvp, (void *)tv, sizeof(tv));
b0d623f7
A
5577 if (error)
5578 return (error);
5579 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
5580 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
91447636 5581 } else {
b0d623f7
A
5582 struct user32_timeval tv[2];
5583 error = copyin(usrtvp, (void *)tv, sizeof(tv));
5584 if (error)
5585 return (error);
5586 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
5587 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
91447636 5588 }
9bccf70c
A
5589 }
5590 return 0;
5591}
5592
5593static int
2d21ac55 5594setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts,
91447636 5595 int nullflag)
9bccf70c
A
5596{
5597 int error;
91447636
A
5598 struct vnode_attr va;
5599 kauth_action_t action;
e5568f75
A
5600
5601 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5602
91447636
A
5603 VATTR_INIT(&va);
5604 VATTR_SET(&va, va_access_time, ts[0]);
5605 VATTR_SET(&va, va_modify_time, ts[1]);
9bccf70c 5606 if (nullflag)
91447636
A
5607 va.va_vaflags |= VA_UTIMES_NULL;
5608
2d21ac55
A
5609#if NAMEDSTREAMS
5610 /* utimes calls are not allowed for resource forks. */
5611 if (vp->v_flag & VISNAMEDSTREAM) {
5612 error = EPERM;
5613 goto out;
5614 }
5615#endif
5616
5617#if CONFIG_MACF
5618 error = mac_vnode_check_setutimes(ctx, vp, ts[0], ts[1]);
5619 if (error)
5620 goto out;
5621#endif
5622 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
5623 if (!nullflag && error == EACCES)
5624 error = EPERM;
91447636 5625 goto out;
2d21ac55
A
5626 }
5627
91447636 5628 /* since we may not need to auth anything, check here */
2d21ac55
A
5629 if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
5630 if (!nullflag && error == EACCES)
5631 error = EPERM;
91447636 5632 goto out;
2d21ac55 5633 }
91447636 5634 error = vnode_setattr(vp, &va, ctx);
4a249263 5635
9bccf70c
A
5636out:
5637 return error;
5638}
5639
1c79356b
A
5640/*
5641 * Set the access and modification times of a file.
5642 */
1c79356b
A
5643/* ARGSUSED */
5644int
b0d623f7 5645utimes(__unused proc_t p, struct utimes_args *uap, __unused int32_t *retval)
1c79356b 5646{
9bccf70c 5647 struct timespec ts[2];
91447636 5648 user_addr_t usrtvp;
1c79356b
A
5649 int error;
5650 struct nameidata nd;
2d21ac55 5651 vfs_context_t ctx = vfs_context_current();
1c79356b 5652
2d21ac55
A
5653 /*
5654 * AUDIT: Needed to change the order of operations to do the
55e303ae
A
5655 * name lookup first because auditing wants the path.
5656 */
6d2010ae 5657 NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
2d21ac55 5658 UIO_USERSPACE, uap->path, ctx);
55e303ae
A
5659 error = namei(&nd);
5660 if (error)
9bccf70c 5661 return (error);
91447636 5662 nameidone(&nd);
55e303ae 5663
91447636
A
5664 /*
5665 * Fetch the user-supplied time. If usrtvp is USER_ADDR_NULL, we fetch
5666 * the current time instead.
5667 */
55e303ae 5668 usrtvp = uap->tptr;
91447636
A
5669 if ((error = getutimes(usrtvp, ts)) != 0)
5670 goto out;
5671
2d21ac55 5672 error = setutimes(ctx, nd.ni_vp, ts, usrtvp == USER_ADDR_NULL);
91447636
A
5673
5674out:
5675 vnode_put(nd.ni_vp);
1c79356b
A
5676 return (error);
5677}
5678
9bccf70c
A
5679/*
5680 * Set the access and modification times of a file.
5681 */
9bccf70c
A
5682/* ARGSUSED */
5683int
b0d623f7 5684futimes(__unused proc_t p, struct futimes_args *uap, __unused int32_t *retval)
9bccf70c
A
5685{
5686 struct timespec ts[2];
2d21ac55 5687 vnode_t vp;
91447636 5688 user_addr_t usrtvp;
9bccf70c
A
5689 int error;
5690
55e303ae 5691 AUDIT_ARG(fd, uap->fd);
9bccf70c
A
5692 usrtvp = uap->tptr;
5693 if ((error = getutimes(usrtvp, ts)) != 0)
5694 return (error);
91447636 5695 if ((error = file_vnode(uap->fd, &vp)) != 0)
9bccf70c 5696 return (error);
91447636
A
5697 if((error = vnode_getwithref(vp))) {
5698 file_drop(uap->fd);
5699 return(error);
5700 }
55e303ae 5701
2d21ac55 5702 error = setutimes(vfs_context_current(), vp, ts, usrtvp == 0);
91447636
A
5703 vnode_put(vp);
5704 file_drop(uap->fd);
5705 return(error);
9bccf70c
A
5706}
5707
1c79356b
A
5708/*
5709 * Truncate a file given its path name.
5710 */
1c79356b
A
5711/* ARGSUSED */
5712int
b0d623f7 5713truncate(__unused proc_t p, struct truncate_args *uap, __unused int32_t *retval)
1c79356b 5714{
2d21ac55 5715 vnode_t vp;
91447636 5716 struct vnode_attr va;
2d21ac55 5717 vfs_context_t ctx = vfs_context_current();
1c79356b
A
5718 int error;
5719 struct nameidata nd;
91447636
A
5720 kauth_action_t action;
5721
0b4e3aa0
A
5722 if (uap->length < 0)
5723 return(EINVAL);
6d2010ae 5724 NDINIT(&nd, LOOKUP, OP_TRUNCATE, FOLLOW | AUDITVNPATH1,
2d21ac55 5725 UIO_USERSPACE, uap->path, ctx);
91447636 5726 if ((error = namei(&nd)))
1c79356b
A
5727 return (error);
5728 vp = nd.ni_vp;
91447636
A
5729
5730 nameidone(&nd);
5731
5732 VATTR_INIT(&va);
5733 VATTR_SET(&va, va_data_size, uap->length);
2d21ac55
A
5734
5735#if CONFIG_MACF
5736 error = mac_vnode_check_truncate(ctx, NOCRED, vp);
5737 if (error)
5738 goto out;
5739#endif
5740
5741 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
91447636 5742 goto out;
2d21ac55 5743 if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
91447636 5744 goto out;
2d21ac55 5745 error = vnode_setattr(vp, &va, ctx);
91447636
A
5746out:
5747 vnode_put(vp);
1c79356b
A
5748 return (error);
5749}
5750
5751/*
5752 * Truncate a file given a file descriptor.
5753 */
1c79356b
A
5754/* ARGSUSED */
5755int
b0d623f7 5756ftruncate(proc_t p, struct ftruncate_args *uap, int32_t *retval)
1c79356b 5757{
2d21ac55 5758 vfs_context_t ctx = vfs_context_current();
91447636 5759 struct vnode_attr va;
2d21ac55 5760 vnode_t vp;
91447636
A
5761 struct fileproc *fp;
5762 int error ;
5763 int fd = uap->fd;
1c79356b 5764
55e303ae 5765 AUDIT_ARG(fd, uap->fd);
0b4e3aa0
A
5766 if (uap->length < 0)
5767 return(EINVAL);
1c79356b 5768
91447636
A
5769 if ( (error = fp_lookup(p,fd,&fp,0)) ) {
5770 return(error);
5771 }
1c79356b 5772
39236c6e
A
5773 switch (FILEGLOB_DTYPE(fp->f_fglob)) {
5774 case DTYPE_PSXSHM:
91447636
A
5775 error = pshm_truncate(p, fp, uap->fd, uap->length, retval);
5776 goto out;
39236c6e
A
5777 case DTYPE_VNODE:
5778 break;
5779 default:
91447636
A
5780 error = EINVAL;
5781 goto out;
1c79356b 5782 }
1c79356b 5783
2d21ac55 5784 vp = (vnode_t)fp->f_fglob->fg_data;
e5568f75 5785
91447636
A
5786 if ((fp->f_fglob->fg_flag & FWRITE) == 0) {
5787 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
5788 error = EINVAL;
5789 goto out;
1c79356b 5790 }
1c79356b 5791
91447636
A
5792 if ((error = vnode_getwithref(vp)) != 0) {
5793 goto out;
5794 }
1c79356b 5795
91447636 5796 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
1c79356b 5797
2d21ac55
A
5798#if CONFIG_MACF
5799 error = mac_vnode_check_truncate(ctx,
5800 fp->f_fglob->fg_cred, vp);
5801 if (error) {
5802 (void)vnode_put(vp);
5803 goto out;
5804 }
5805#endif
91447636
A
5806 VATTR_INIT(&va);
5807 VATTR_SET(&va, va_data_size, uap->length);
2d21ac55 5808 error = vnode_setattr(vp, &va, ctx);
91447636
A
5809 (void)vnode_put(vp);
5810out:
5811 file_drop(fd);
5812 return (error);
1c79356b 5813}
91447636 5814
1c79356b
A
5815
5816/*
b0d623f7 5817 * Sync an open file with synchronized I/O _file_ integrity completion
1c79356b 5818 */
1c79356b
A
5819/* ARGSUSED */
5820int
b0d623f7 5821fsync(proc_t p, struct fsync_args *uap, __unused int32_t *retval)
1c79356b 5822{
2d21ac55 5823 __pthread_testcancel(1);
b0d623f7
A
5824 return(fsync_common(p, uap, MNT_WAIT));
5825}
5826
5827
5828/*
5829 * Sync an open file with synchronized I/O _file_ integrity completion
5830 *
5831 * Notes: This is a legacy support function that does not test for
5832 * thread cancellation points.
5833 */
5834/* ARGSUSED */
5835int
5836fsync_nocancel(proc_t p, struct fsync_nocancel_args *uap, __unused int32_t *retval)
5837{
5838 return(fsync_common(p, (struct fsync_args *)uap, MNT_WAIT));
2d21ac55
A
5839}
5840
b0d623f7
A
5841
5842/*
5843 * Sync an open file with synchronized I/O _data_ integrity completion
5844 */
5845/* ARGSUSED */
2d21ac55 5846int
b0d623f7
A
5847fdatasync(proc_t p, struct fdatasync_args *uap, __unused int32_t *retval)
5848{
5849 __pthread_testcancel(1);
5850 return(fsync_common(p, (struct fsync_args *)uap, MNT_DWAIT));
5851}
5852
5853
5854/*
5855 * fsync_common
5856 *
5857 * Common fsync code to support both synchronized I/O file integrity completion
5858 * (normal fsync) and synchronized I/O data integrity completion (fdatasync).
5859 *
5860 * If 'flags' is MNT_DWAIT, the caller is requesting data integrity, which
5861 * will only guarantee that the file data contents are retrievable. If
5862 * 'flags' is MNT_WAIT, the caller is rewuesting file integrity, which also
5863 * includes additional metadata unnecessary for retrieving the file data
5864 * contents, such as atime, mtime, ctime, etc., also be committed to stable
5865 * storage.
5866 *
5867 * Parameters: p The process
5868 * uap->fd The descriptor to synchronize
5869 * flags The data integrity flags
5870 *
5871 * Returns: int Success
5872 * fp_getfvp:EBADF Bad file descriptor
5873 * fp_getfvp:ENOTSUP fd does not refer to a vnode
5874 * VNOP_FSYNC:??? unspecified
5875 *
5876 * Notes: We use struct fsync_args because it is a short name, and all
5877 * caller argument structures are otherwise identical.
5878 */
5879static int
5880fsync_common(proc_t p, struct fsync_args *uap, int flags)
2d21ac55
A
5881{
5882 vnode_t vp;
91447636 5883 struct fileproc *fp;
2d21ac55 5884 vfs_context_t ctx = vfs_context_current();
1c79356b
A
5885 int error;
5886
b0d623f7
A
5887 AUDIT_ARG(fd, uap->fd);
5888
91447636 5889 if ( (error = fp_getfvp(p, uap->fd, &fp, &vp)) )
1c79356b 5890 return (error);
91447636
A
5891 if ( (error = vnode_getwithref(vp)) ) {
5892 file_drop(uap->fd);
5893 return(error);
5894 }
91447636 5895
b0d623f7
A
5896 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5897
5898 error = VNOP_FSYNC(vp, flags, ctx);
2d21ac55
A
5899
5900#if NAMEDRSRCFORK
5901 /* Sync resource fork shadow file if necessary. */
5902 if ((error == 0) &&
5903 (vp->v_flag & VISNAMEDSTREAM) &&
5904 (vp->v_parent != NULLVP) &&
b0d623f7 5905 vnode_isshadow(vp) &&
2d21ac55
A
5906 (fp->f_flags & FP_WRITTEN)) {
5907 (void) vnode_flushnamedstream(vp->v_parent, vp, ctx);
5908 }
5909#endif
91447636
A
5910
5911 (void)vnode_put(vp);
5912 file_drop(uap->fd);
1c79356b
A
5913 return (error);
5914}
5915
5916/*
5917 * Duplicate files. Source must be a file, target must be a file or
5918 * must not exist.
91447636
A
5919 *
5920 * XXX Copyfile authorisation checking is woefully inadequate, and will not
5921 * perform inheritance correctly.
1c79356b 5922 */
1c79356b
A
5923/* ARGSUSED */
5924int
b0d623f7 5925copyfile(__unused proc_t p, struct copyfile_args *uap, __unused int32_t *retval)
1c79356b 5926{
91447636 5927 vnode_t tvp, fvp, tdvp, sdvp;
1c79356b
A
5928 struct nameidata fromnd, tond;
5929 int error;
2d21ac55 5930 vfs_context_t ctx = vfs_context_current();
55e303ae
A
5931
5932 /* Check that the flags are valid. */
1c79356b
A
5933
5934 if (uap->flags & ~CPF_MASK) {
55e303ae
A
5935 return(EINVAL);
5936 }
1c79356b 5937
6d2010ae 5938 NDINIT(&fromnd, LOOKUP, OP_COPYFILE, SAVESTART | AUDITVNPATH1,
2d21ac55 5939 UIO_USERSPACE, uap->from, ctx);
91447636 5940 if ((error = namei(&fromnd)))
1c79356b
A
5941 return (error);
5942 fvp = fromnd.ni_vp;
5943
6d2010ae
A
5944 NDINIT(&tond, CREATE, OP_LINK,
5945 LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNPATH2 | CN_NBMOUNTLOOK,
5946 UIO_USERSPACE, uap->to, ctx);
91447636 5947 if ((error = namei(&tond))) {
1c79356b
A
5948 goto out1;
5949 }
5950 tdvp = tond.ni_dvp;
5951 tvp = tond.ni_vp;
91447636 5952
1c79356b
A
5953 if (tvp != NULL) {
5954 if (!(uap->flags & CPF_OVERWRITE)) {
5955 error = EEXIST;
5956 goto out;
5957 }
5958 }
1c79356b
A
5959 if (fvp->v_type == VDIR || (tvp && tvp->v_type == VDIR)) {
5960 error = EISDIR;
5961 goto out;
5962 }
5963
2d21ac55 5964 if ((error = vnode_authorize(tdvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
1c79356b
A
5965 goto out;
5966
5967 if (fvp == tdvp)
5968 error = EINVAL;
5969 /*
5970 * If source is the same as the destination (that is the
5971 * same inode number) then there is nothing to do.
5972 * (fixed to have POSIX semantics - CSM 3/2/98)
5973 */
5974 if (fvp == tvp)
5975 error = -1;
91447636 5976 if (!error)
2d21ac55 5977 error = VNOP_COPYFILE(fvp, tdvp, tvp, &tond.ni_cnd, uap->mode, uap->flags, ctx);
1c79356b 5978out:
91447636
A
5979 sdvp = tond.ni_startdir;
5980 /*
5981 * nameidone has to happen before we vnode_put(tdvp)
5982 * since it may need to release the fs_nodelock on the tdvp
5983 */
5984 nameidone(&tond);
5985
5986 if (tvp)
5987 vnode_put(tvp);
5988 vnode_put(tdvp);
5989 vnode_put(sdvp);
1c79356b 5990out1:
91447636
A
5991 vnode_put(fvp);
5992
1c79356b 5993 if (fromnd.ni_startdir)
91447636
A
5994 vnode_put(fromnd.ni_startdir);
5995 nameidone(&fromnd);
5996
1c79356b
A
5997 if (error == -1)
5998 return (0);
5999 return (error);
6000}
6001
91447636 6002
1c79356b
A
6003/*
6004 * Rename files. Source and destination must either both be directories,
6005 * or both not be directories. If target is a directory, it must be empty.
6006 */
1c79356b
A
6007/* ARGSUSED */
6008int
b0d623f7 6009rename(__unused proc_t p, struct rename_args *uap, __unused int32_t *retval)
1c79356b 6010{
91447636
A
6011 vnode_t tvp, tdvp;
6012 vnode_t fvp, fdvp;
316670eb 6013 struct nameidata *fromnd, *tond;
2d21ac55 6014 vfs_context_t ctx = vfs_context_current();
1c79356b 6015 int error;
593a1d5f 6016 int do_retry;
1c79356b 6017 int mntrename;
2d21ac55 6018 int need_event;
6d2010ae 6019 const char *oname = NULL;
2d21ac55 6020 char *from_name = NULL, *to_name = NULL;
b0d623f7 6021 int from_len=0, to_len=0;
91447636
A
6022 int holding_mntlock;
6023 mount_t locked_mp = NULL;
6d2010ae 6024 vnode_t oparent = NULLVP;
b0d623f7 6025#if CONFIG_FSE
91447636 6026 fse_info from_finfo, to_finfo;
b0d623f7
A
6027#endif
6028 int from_truncated=0, to_truncated;
6d2010ae
A
6029 int batched = 0;
6030 struct vnode_attr *fvap, *tvap;
6031 int continuing = 0;
316670eb
A
6032 /* carving out a chunk for structs that are too big to be on stack. */
6033 struct {
6034 struct nameidata from_node, to_node;
6035 struct vnode_attr fv_attr, tv_attr;
6036 } * __rename_data;
6037 MALLOC(__rename_data, void *, sizeof(*__rename_data), M_TEMP, M_WAITOK);
6038 fromnd = &__rename_data->from_node;
6039 tond = &__rename_data->to_node;
6040
91447636 6041 holding_mntlock = 0;
316670eb 6042 do_retry = 0;
91447636
A
6043retry:
6044 fvp = tvp = NULL;
6045 fdvp = tdvp = NULL;
6d2010ae 6046 fvap = tvap = NULL;
1c79356b
A
6047 mntrename = FALSE;
6048
316670eb 6049 NDINIT(fromnd, DELETE, OP_UNLINK, WANTPARENT | AUDITVNPATH1,
6d2010ae 6050 UIO_USERSPACE, uap->from, ctx);
316670eb 6051 fromnd->ni_flag = NAMEI_COMPOUNDRENAME;
91447636 6052
316670eb 6053 NDINIT(tond, RENAME, OP_RENAME, WANTPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK,
6d2010ae 6054 UIO_USERSPACE, uap->to, ctx);
316670eb 6055 tond->ni_flag = NAMEI_COMPOUNDRENAME;
6d2010ae
A
6056
6057continue_lookup:
316670eb
A
6058 if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
6059 if ( (error = namei(fromnd)) )
6d2010ae 6060 goto out1;
316670eb
A
6061 fdvp = fromnd->ni_dvp;
6062 fvp = fromnd->ni_vp;
1c79356b 6063
6d2010ae 6064 if (fvp && fvp->v_type == VDIR)
316670eb 6065 tond->ni_cnd.cn_flags |= WILLBEDIR;
6d2010ae 6066 }
2d21ac55 6067
316670eb
A
6068 if ((tond->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
6069 if ( (error = namei(tond)) ) {
6d2010ae
A
6070 /*
6071 * Translate error code for rename("dir1", "dir2/.").
6072 */
6073 if (error == EISDIR && fvp->v_type == VDIR)
6074 error = EINVAL;
6075 goto out1;
6076 }
316670eb
A
6077 tdvp = tond->ni_dvp;
6078 tvp = tond->ni_vp;
6d2010ae 6079 }
91447636 6080
6d2010ae
A
6081 batched = vnode_compound_rename_available(fdvp);
6082 if (!fvp) {
6083 /*
6084 * Claim: this check will never reject a valid rename.
6085 * For success, either fvp must be on the same mount as tdvp, or fvp must sit atop a vnode on the same mount as tdvp.
6086 * Suppose fdvp and tdvp are not on the same mount.
6087 * If fvp is on the same mount as tdvp, then fvp is not on the same mount as fdvp, so fvp is the root of its filesystem. If fvp is the root,
6088 * then you can't move it to within another dir on the same mountpoint.
6089 * If fvp sits atop a vnode on the same mount as fdvp, then that vnode must be part of the same mount as fdvp, which is a contradiction.
6090 *
6091 * If this check passes, then we are safe to pass these vnodes to the same FS.
91447636 6092 */
6d2010ae
A
6093 if (fdvp->v_mount != tdvp->v_mount) {
6094 error = EXDEV;
6095 goto out1;
6096 }
6097 goto skipped_lookup;
1c79356b 6098 }
2d21ac55 6099
6d2010ae 6100 if (!batched) {
316670eb 6101 error = vn_authorize_rename(fdvp, fvp, &fromnd->ni_cnd, tdvp, tvp, &tond->ni_cnd, ctx, NULL);
6d2010ae
A
6102 if (error) {
6103 if (error == ENOENT) {
6104 /*
6105 * We encountered a race where after doing the namei, tvp stops
6106 * being valid. If so, simply re-drive the rename call from the
6107 * top.
6108 */
6109 do_retry = 1;
6110 }
91447636 6111 goto out1;
1c79356b
A
6112 }
6113 }
6d2010ae 6114
2d21ac55
A
6115 /*
6116 * If the source and destination are the same (i.e. they're
6117 * links to the same vnode) and the target file system is
6118 * case sensitive, then there is nothing to do.
6d2010ae
A
6119 *
6120 * XXX Come back to this.
2d21ac55
A
6121 */
6122 if (fvp == tvp) {
6123 int pathconf_val;
6124
6125 /*
6126 * Note: if _PC_CASE_SENSITIVE selector isn't supported,
6127 * then assume that this file system is case sensitive.
6128 */
6129 if (VNOP_PATHCONF(fvp, _PC_CASE_SENSITIVE, &pathconf_val, ctx) != 0 ||
6130 pathconf_val != 0) {
6131 goto out1;
6132 }
6133 }
91447636 6134
1c79356b
A
6135 /*
6136 * Allow the renaming of mount points.
6137 * - target must not exist
6138 * - target must reside in the same directory as source
6139 * - union mounts cannot be renamed
6140 * - "/" cannot be renamed
6d2010ae
A
6141 *
6142 * XXX Handle this in VFS after a continued lookup (if we missed
6143 * in the cache to start off)
1c79356b 6144 */
91447636 6145 if ((fvp->v_flag & VROOT) &&
1c79356b
A
6146 (fvp->v_type == VDIR) &&
6147 (tvp == NULL) &&
6148 (fvp->v_mountedhere == NULL) &&
91447636 6149 (fdvp == tdvp) &&
1c79356b
A
6150 ((fvp->v_mount->mnt_flag & (MNT_UNION | MNT_ROOTFS)) == 0) &&
6151 (fvp->v_mount->mnt_vnodecovered != NULLVP)) {
2d21ac55 6152 vnode_t coveredvp;
1c79356b
A
6153
6154 /* switch fvp to the covered vnode */
91447636
A
6155 coveredvp = fvp->v_mount->mnt_vnodecovered;
6156 if ( (vnode_getwithref(coveredvp)) ) {
6157 error = ENOENT;
6158 goto out1;
6159 }
6160 vnode_put(fvp);
6161
6162 fvp = coveredvp;
1c79356b
A
6163 mntrename = TRUE;
6164 }
91447636
A
6165 /*
6166 * Check for cross-device rename.
6167 */
6168 if ((fvp->v_mount != tdvp->v_mount) ||
6169 (tvp && (fvp->v_mount != tvp->v_mount))) {
6170 error = EXDEV;
6171 goto out1;
6172 }
55e303ae 6173
91447636
A
6174 /*
6175 * If source is the same as the destination (that is the
6176 * same inode number) then there is nothing to do...
6177 * EXCEPT if the underlying file system supports case
6178 * insensitivity and is case preserving. In this case
6179 * the file system needs to handle the special case of
6180 * getting the same vnode as target (fvp) and source (tvp).
6181 *
6182 * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
6183 * and _PC_CASE_PRESERVING can have this exception, and they need to
6184 * handle the special case of getting the same vnode as target and
6185 * source. NOTE: Then the target is unlocked going into vnop_rename,
6186 * so not to cause locking problems. There is a single reference on tvp.
6187 *
b0d623f7
A
6188 * NOTE - that fvp == tvp also occurs if they are hard linked and
6189 * that correct behaviour then is just to return success without doing
6190 * anything.
6d2010ae
A
6191 *
6192 * XXX filesystem should take care of this itself, perhaps...
91447636
A
6193 */
6194 if (fvp == tvp && fdvp == tdvp) {
316670eb
A
6195 if (fromnd->ni_cnd.cn_namelen == tond->ni_cnd.cn_namelen &&
6196 !bcmp(fromnd->ni_cnd.cn_nameptr, tond->ni_cnd.cn_nameptr,
6197 fromnd->ni_cnd.cn_namelen)) {
91447636 6198 goto out1;
55e303ae 6199 }
91447636 6200 }
55e303ae 6201
91447636
A
6202 if (holding_mntlock && fvp->v_mount != locked_mp) {
6203 /*
6204 * we're holding a reference and lock
6205 * on locked_mp, but it no longer matches
6206 * what we want to do... so drop our hold
6207 */
6208 mount_unlock_renames(locked_mp);
6209 mount_drop(locked_mp, 0);
6210 holding_mntlock = 0;
6211 }
6212 if (tdvp != fdvp && fvp->v_type == VDIR) {
6213 /*
6214 * serialize renames that re-shape
6215 * the tree... if holding_mntlock is
6216 * set, then we're ready to go...
6217 * otherwise we
6218 * first need to drop the iocounts
6219 * we picked up, second take the
6220 * lock to serialize the access,
6221 * then finally start the lookup
6222 * process over with the lock held
6223 */
6224 if (!holding_mntlock) {
6225 /*
6226 * need to grab a reference on
6227 * the mount point before we
6228 * drop all the iocounts... once
6229 * the iocounts are gone, the mount
6230 * could follow
6231 */
6232 locked_mp = fvp->v_mount;
6233 mount_ref(locked_mp, 0);
55e303ae 6234
91447636
A
6235 /*
6236 * nameidone has to happen before we vnode_put(tvp)
6237 * since it may need to release the fs_nodelock on the tvp
6238 */
316670eb 6239 nameidone(tond);
55e303ae 6240
91447636
A
6241 if (tvp)
6242 vnode_put(tvp);
6243 vnode_put(tdvp);
6244
6245 /*
6246 * nameidone has to happen before we vnode_put(fdvp)
6247 * since it may need to release the fs_nodelock on the fvp
6248 */
316670eb 6249 nameidone(fromnd);
55e303ae 6250
91447636
A
6251 vnode_put(fvp);
6252 vnode_put(fdvp);
6253
6254 mount_lock_renames(locked_mp);
6255 holding_mntlock = 1;
6256
6257 goto retry;
55e303ae 6258 }
91447636
A
6259 } else {
6260 /*
6261 * when we dropped the iocounts to take
6262 * the lock, we allowed the identity of
6263 * the various vnodes to change... if they did,
6264 * we may no longer be dealing with a rename
6265 * that reshapes the tree... once we're holding
6266 * the iocounts, the vnodes can't change type
6267 * so we're free to drop the lock at this point
6268 * and continue on
1c79356b 6269 */
91447636
A
6270 if (holding_mntlock) {
6271 mount_unlock_renames(locked_mp);
6272 mount_drop(locked_mp, 0);
6273 holding_mntlock = 0;
1c79356b 6274 }
91447636 6275 }
6d2010ae 6276
91447636
A
6277 // save these off so we can later verify that fvp is the same
6278 oname = fvp->v_name;
6279 oparent = fvp->v_parent;
55e303ae 6280
6d2010ae 6281skipped_lookup:
2d21ac55 6282#if CONFIG_FSE
6d2010ae 6283 need_event = need_fsevent(FSE_RENAME, fdvp);
2d21ac55 6284 if (need_event) {
6d2010ae
A
6285 if (fvp) {
6286 get_fse_info(fvp, &from_finfo, ctx);
6287 } else {
316670eb 6288 error = vfs_get_notify_attributes(&__rename_data->fv_attr);
6d2010ae
A
6289 if (error) {
6290 goto out1;
6291 }
6292
316670eb 6293 fvap = &__rename_data->fv_attr;
6d2010ae 6294 }
55e303ae 6295
91447636 6296 if (tvp) {
2d21ac55 6297 get_fse_info(tvp, &to_finfo, ctx);
6d2010ae 6298 } else if (batched) {
316670eb 6299 error = vfs_get_notify_attributes(&__rename_data->tv_attr);
6d2010ae
A
6300 if (error) {
6301 goto out1;
6302 }
6303
316670eb 6304 tvap = &__rename_data->tv_attr;
2d21ac55
A
6305 }
6306 }
6307#else
6308 need_event = 0;
6309#endif /* CONFIG_FSE */
6310
6311 if (need_event || kauth_authorize_fileop_has_listeners()) {
2d21ac55 6312 if (from_name == NULL) {
6d2010ae
A
6313 GET_PATH(from_name);
6314 if (from_name == NULL) {
6315 error = ENOMEM;
6316 goto out1;
6317 }
91447636 6318 }
b0d623f7 6319
316670eb 6320 from_len = safe_getpath(fdvp, fromnd->ni_cnd.cn_nameptr, from_name, MAXPATHLEN, &from_truncated);
55e303ae 6321
2d21ac55 6322 if (to_name == NULL) {
6d2010ae
A
6323 GET_PATH(to_name);
6324 if (to_name == NULL) {
6325 error = ENOMEM;
6326 goto out1;
6327 }
2d21ac55 6328 }
91447636 6329
316670eb 6330 to_len = safe_getpath(tdvp, tond->ni_cnd.cn_nameptr, to_name, MAXPATHLEN, &to_truncated);
2d21ac55
A
6331 }
6332
316670eb
A
6333 error = vn_rename(fdvp, &fvp, &fromnd->ni_cnd, fvap,
6334 tdvp, &tvp, &tond->ni_cnd, tvap,
6d2010ae 6335 0, ctx);
55e303ae 6336
91447636
A
6337 if (holding_mntlock) {
6338 /*
6339 * we can drop our serialization
6340 * lock now
6341 */
6342 mount_unlock_renames(locked_mp);
6343 mount_drop(locked_mp, 0);
6344 holding_mntlock = 0;
6345 }
6346 if (error) {
6d2010ae 6347 if (error == EKEEPLOOKING) {
316670eb
A
6348 if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) == 0) {
6349 if ((tond->ni_flag & NAMEI_CONTLOOKUP) == 0) {
6d2010ae
A
6350 panic("EKEEPLOOKING without NAMEI_CONTLOOKUP on either ndp?");
6351 }
6352 }
6353
316670eb
A
6354 fromnd->ni_vp = fvp;
6355 tond->ni_vp = tvp;
6d2010ae
A
6356
6357 goto continue_lookup;
6358 }
6359
6360 /*
6361 * We may encounter a race in the VNOP where the destination didn't
6362 * exist when we did the namei, but it does by the time we go and
6363 * try to create the entry. In this case, we should re-drive this rename
6364 * call from the top again. Currently, only HFS bubbles out ERECYCLE,
b0d623f7 6365 * but other filesystems susceptible to this race could return it, too.
6d2010ae
A
6366 */
6367 if (error == ERECYCLE) {
6368 do_retry = 1;
6369 }
55e303ae 6370
91447636
A
6371 goto out1;
6372 }
6373
6374 /* call out to allow 3rd party notification of rename.
6375 * Ignore result of kauth_authorize_fileop call.
6376 */
2d21ac55
A
6377 kauth_authorize_fileop(vfs_context_ucred(ctx),
6378 KAUTH_FILEOP_RENAME,
6379 (uintptr_t)from_name, (uintptr_t)to_name);
91447636 6380
2d21ac55 6381#if CONFIG_FSE
91447636 6382 if (from_name != NULL && to_name != NULL) {
b0d623f7
A
6383 if (from_truncated || to_truncated) {
6384 // set it here since only the from_finfo gets reported up to user space
6385 from_finfo.mode |= FSE_TRUNCATED_PATH;
6386 }
6d2010ae
A
6387
6388 if (tvap && tvp) {
6389 vnode_get_fse_info_from_vap(tvp, &to_finfo, tvap);
6390 }
6391 if (fvap) {
6392 vnode_get_fse_info_from_vap(fvp, &from_finfo, fvap);
6393 }
6394
91447636 6395 if (tvp) {
2d21ac55 6396 add_fsevent(FSE_RENAME, ctx,
91447636
A
6397 FSE_ARG_STRING, from_len, from_name,
6398 FSE_ARG_FINFO, &from_finfo,
6399 FSE_ARG_STRING, to_len, to_name,
6400 FSE_ARG_FINFO, &to_finfo,
6401 FSE_ARG_DONE);
55e303ae 6402 } else {
2d21ac55 6403 add_fsevent(FSE_RENAME, ctx,
91447636
A
6404 FSE_ARG_STRING, from_len, from_name,
6405 FSE_ARG_FINFO, &from_finfo,
6406 FSE_ARG_STRING, to_len, to_name,
6407 FSE_ARG_DONE);
6408 }
6409 }
2d21ac55 6410#endif /* CONFIG_FSE */
91447636
A
6411
6412 /*
6413 * update filesystem's mount point data
6414 */
6415 if (mntrename) {
6416 char *cp, *pathend, *mpname;
6417 char * tobuf;
6418 struct mount *mp;
6419 int maxlen;
6420 size_t len = 0;
6421
6422 mp = fvp->v_mountedhere;
6423
6424 if (vfs_busy(mp, LK_NOWAIT)) {
6425 error = EBUSY;
6426 goto out1;
55e303ae 6427 }
91447636 6428 MALLOC_ZONE(tobuf, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
55e303ae 6429
91447636
A
6430 error = copyinstr(uap->to, tobuf, MAXPATHLEN, &len);
6431 if (!error) {
6432 /* find current mount point prefix */
6433 pathend = &mp->mnt_vfsstat.f_mntonname[0];
6434 for (cp = pathend; *cp != '\0'; ++cp) {
6435 if (*cp == '/')
6436 pathend = cp + 1;
6437 }
6438 /* find last component of target name */
6439 for (mpname = cp = tobuf; *cp != '\0'; ++cp) {
6440 if (*cp == '/')
6441 mpname = cp + 1;
6442 }
6443 /* append name to prefix */
6444 maxlen = MAXPATHLEN - (pathend - mp->mnt_vfsstat.f_mntonname);
6445 bzero(pathend, maxlen);
2d21ac55 6446 strlcpy(pathend, mpname, maxlen);
91447636
A
6447 }
6448 FREE_ZONE(tobuf, MAXPATHLEN, M_NAMEI);
6449
6450 vfs_unbusy(mp);
6451 }
6452 /*
6453 * fix up name & parent pointers. note that we first
6454 * check that fvp has the same name/parent pointers it
6455 * had before the rename call... this is a 'weak' check
6456 * at best...
6d2010ae
A
6457 *
6458 * XXX oparent and oname may not be set in the compound vnop case
91447636 6459 */
6d2010ae 6460 if (batched || (oname == fvp->v_name && oparent == fvp->v_parent)) {
91447636
A
6461 int update_flags;
6462
6463 update_flags = VNODE_UPDATE_NAME;
6464
6465 if (fdvp != tdvp)
6466 update_flags |= VNODE_UPDATE_PARENT;
6467
316670eb 6468 vnode_update_identity(fvp, tdvp, tond->ni_cnd.cn_nameptr, tond->ni_cnd.cn_namelen, tond->ni_cnd.cn_hash, update_flags);
1c79356b
A
6469 }
6470out1:
593a1d5f
A
6471 if (to_name != NULL) {
6472 RELEASE_PATH(to_name);
6473 to_name = NULL;
6474 }
6475 if (from_name != NULL) {
6476 RELEASE_PATH(from_name);
6477 from_name = NULL;
6478 }
91447636
A
6479 if (holding_mntlock) {
6480 mount_unlock_renames(locked_mp);
6481 mount_drop(locked_mp, 0);
593a1d5f 6482 holding_mntlock = 0;
91447636
A
6483 }
6484 if (tdvp) {
6485 /*
6486 * nameidone has to happen before we vnode_put(tdvp)
6487 * since it may need to release the fs_nodelock on the tdvp
6488 */
316670eb 6489 nameidone(tond);
91447636
A
6490
6491 if (tvp)
6492 vnode_put(tvp);
6493 vnode_put(tdvp);
6494 }
6495 if (fdvp) {
6496 /*
6497 * nameidone has to happen before we vnode_put(fdvp)
6498 * since it may need to release the fs_nodelock on the fdvp
6499 */
316670eb 6500 nameidone(fromnd);
91447636
A
6501
6502 if (fvp)
6503 vnode_put(fvp);
6504 vnode_put(fdvp);
6505 }
b0d623f7 6506
316670eb 6507
6d2010ae
A
6508 /*
6509 * If things changed after we did the namei, then we will re-drive
6510 * this rename call from the top.
6511 */
316670eb 6512 if (do_retry) {
6d2010ae 6513 do_retry = 0;
593a1d5f
A
6514 goto retry;
6515 }
316670eb
A
6516
6517 FREE(__rename_data, M_TEMP);
1c79356b
A
6518 return (error);
6519}
6520
6521/*
6522 * Make a directory file.
2d21ac55
A
6523 *
6524 * Returns: 0 Success
6525 * EEXIST
6526 * namei:???
6527 * vnode_authorize:???
6528 * vn_create:???
1c79356b 6529 */
1c79356b 6530/* ARGSUSED */
91447636
A
6531static int
6532mkdir1(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap)
1c79356b 6533{
91447636 6534 vnode_t vp, dvp;
1c79356b 6535 int error;
91447636 6536 int update_flags = 0;
6d2010ae 6537 int batched;
1c79356b
A
6538 struct nameidata nd;
6539
91447636 6540 AUDIT_ARG(mode, vap->va_mode);
6d2010ae
A
6541 NDINIT(&nd, CREATE, OP_MKDIR, LOCKPARENT | AUDITVNPATH1, UIO_USERSPACE,
6542 path, ctx);
9bccf70c 6543 nd.ni_cnd.cn_flags |= WILLBEDIR;
6d2010ae
A
6544 nd.ni_flag = NAMEI_COMPOUNDMKDIR;
6545
6546continue_lookup:
55e303ae
A
6547 error = namei(&nd);
6548 if (error)
1c79356b 6549 return (error);
91447636 6550 dvp = nd.ni_dvp;
1c79356b 6551 vp = nd.ni_vp;
55e303ae 6552
91447636
A
6553 if (vp != NULL) {
6554 error = EEXIST;
6555 goto out;
6556 }
6d2010ae
A
6557
6558 batched = vnode_compound_mkdir_available(dvp);
2d21ac55
A
6559
6560 VATTR_SET(vap, va_type, VDIR);
6d2010ae
A
6561
6562 /*
6563 * XXX
6564 * Don't authorize in VFS for compound VNOP.... mkdir -p today assumes that it will
6565 * only get EXISTS or EISDIR for existing path components, and not that it could see
6566 * EACCESS/EPERM--so if we authorize for mkdir on "/" for "mkdir -p /tmp/foo/bar/baz"
6567 * it will fail in a spurious manner. Need to figure out if this is valid behavior.
6568 */
6569 if ((error = vn_authorize_mkdir(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0) {
6570 if (error == EACCES || error == EPERM) {
6571 int error2;
6572
6573 nameidone(&nd);
6574 vnode_put(dvp);
6575 dvp = NULLVP;
6576
6577 /*
6578 * Try a lookup without "NAMEI_COMPOUNDVNOP" to make sure we return EEXIST
6579 * rather than EACCESS if the target exists.
6580 */
6581 NDINIT(&nd, LOOKUP, OP_MKDIR, AUDITVNPATH1, UIO_USERSPACE,
6582 path, ctx);
6583 error2 = namei(&nd);
6584 if (error2) {
6585 goto out;
6586 } else {
6587 vp = nd.ni_vp;
6588 error = EEXIST;
6589 goto out;
6590 }
6591 }
6592
2d21ac55 6593 goto out;
6d2010ae
A
6594 }
6595
6596 /*
6597 * make the directory
6598 */
6599 if ((error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx)) != 0) {
6600 if (error == EKEEPLOOKING) {
6601 nd.ni_vp = vp;
6602 goto continue_lookup;
6603 }
2d21ac55 6604
91447636 6605 goto out;
6d2010ae 6606 }
91447636
A
6607
6608 // Make sure the name & parent pointers are hooked up
6609 if (vp->v_name == NULL)
6610 update_flags |= VNODE_UPDATE_NAME;
6611 if (vp->v_parent == NULLVP)
6612 update_flags |= VNODE_UPDATE_PARENT;
6613
6614 if (update_flags)
6615 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
55e303ae 6616
2d21ac55 6617#if CONFIG_FSE
91447636 6618 add_fsevent(FSE_CREATE_DIR, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE);
2d21ac55 6619#endif
91447636
A
6620
6621out:
6622 /*
6623 * nameidone has to happen before we vnode_put(dvp)
6624 * since it may need to release the fs_nodelock on the dvp
6625 */
6626 nameidone(&nd);
6627
6628 if (vp)
6d2010ae
A
6629 vnode_put(vp);
6630 if (dvp)
6631 vnode_put(dvp);
55e303ae 6632
1c79356b
A
6633 return (error);
6634}
6635
b0d623f7
A
6636/*
6637 * mkdir_extended: Create a directory; with extended security (ACL).
6638 *
6639 * Parameters: p Process requesting to create the directory
6640 * uap User argument descriptor (see below)
6641 * retval (ignored)
6642 *
6643 * Indirect: uap->path Path of directory to create
6644 * uap->mode Access permissions to set
6645 * uap->xsecurity ACL to set
6646 *
6647 * Returns: 0 Success
6648 * !0 Not success
6649 *
6650 */
1c79356b 6651int
b0d623f7 6652mkdir_extended(proc_t p, struct mkdir_extended_args *uap, __unused int32_t *retval)
1c79356b 6653{
91447636
A
6654 int ciferror;
6655 kauth_filesec_t xsecdst;
6656 struct vnode_attr va;
6657
b0d623f7
A
6658 AUDIT_ARG(owner, uap->uid, uap->gid);
6659
91447636
A
6660 xsecdst = NULL;
6661 if ((uap->xsecurity != USER_ADDR_NULL) &&
6662 ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
6663 return ciferror;
6664
91447636
A
6665 VATTR_INIT(&va);
6666 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
6667 if (xsecdst != NULL)
6668 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
6669
2d21ac55 6670 ciferror = mkdir1(vfs_context_current(), uap->path, &va);
91447636
A
6671 if (xsecdst != NULL)
6672 kauth_filesec_free(xsecdst);
6673 return ciferror;
1c79356b
A
6674}
6675
1c79356b 6676int
b0d623f7 6677mkdir(proc_t p, struct mkdir_args *uap, __unused int32_t *retval)
1c79356b 6678{
91447636 6679 struct vnode_attr va;
1c79356b 6680
91447636
A
6681 VATTR_INIT(&va);
6682 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
e5568f75 6683
2d21ac55 6684 return(mkdir1(vfs_context_current(), uap->path, &va));
91447636 6685}
1c79356b 6686
91447636
A
6687/*
6688 * Remove a directory file.
6689 */
6690/* ARGSUSED */
6691int
b0d623f7 6692rmdir(__unused proc_t p, struct rmdir_args *uap, __unused int32_t *retval)
1c79356b 6693{
2d21ac55 6694 vnode_t vp, dvp;
91447636
A
6695 int error;
6696 struct nameidata nd;
6d2010ae
A
6697 char *path = NULL;
6698 int len=0;
6699 int has_listeners = 0;
6700 int need_event = 0;
6701 int truncated = 0;
2d21ac55 6702 vfs_context_t ctx = vfs_context_current();
6d2010ae
A
6703#if CONFIG_FSE
6704 struct vnode_attr va;
6705#endif /* CONFIG_FSE */
6706 struct vnode_attr *vap = NULL;
6707 int batched;
91447636 6708
b0d623f7 6709 int restart_flag;
91447636 6710
2d21ac55
A
6711 /*
6712 * This loop exists to restart rmdir in the unlikely case that two
6713 * processes are simultaneously trying to remove the same directory
6714 * containing orphaned appleDouble files.
6715 */
6716 do {
6d2010ae
A
6717 NDINIT(&nd, DELETE, OP_RMDIR, LOCKPARENT | AUDITVNPATH1,
6718 UIO_USERSPACE, uap->path, ctx);
6719 nd.ni_flag = NAMEI_COMPOUNDRMDIR;
6720continue_lookup:
2d21ac55 6721 restart_flag = 0;
6d2010ae 6722 vap = NULL;
2d21ac55 6723
2d21ac55
A
6724 error = namei(&nd);
6725 if (error)
6726 return (error);
6727
6728 dvp = nd.ni_dvp;
6729 vp = nd.ni_vp;
6730
6d2010ae
A
6731 if (vp) {
6732 batched = vnode_compound_rmdir_available(vp);
2d21ac55 6733
6d2010ae
A
6734 if (vp->v_flag & VROOT) {
6735 /*
6736 * The root of a mounted filesystem cannot be deleted.
6737 */
6738 error = EBUSY;
6739 goto out;
6740 }
1c79356b 6741
2d21ac55 6742 /*
6d2010ae
A
6743 * Removed a check here; we used to abort if vp's vid
6744 * was not the same as what we'd seen the last time around.
6745 * I do not think that check was valid, because if we retry
6746 * and all dirents are gone, the directory could legitimately
6747 * be recycled but still be present in a situation where we would
6748 * have had permission to delete. Therefore, we won't make
6749 * an effort to preserve that check now that we may not have a
6750 * vp here.
2d21ac55 6751 */
6d2010ae
A
6752
6753 if (!batched) {
6754 error = vn_authorize_rmdir(dvp, vp, &nd.ni_cnd, ctx, NULL);
6755 if (error) {
6756 goto out;
6757 }
6758 }
2d21ac55 6759 } else {
6d2010ae
A
6760 batched = 1;
6761
6762 if (!vnode_compound_rmdir_available(dvp)) {
6763 panic("No error, but no compound rmdir?");
6764 }
91447636 6765 }
6d2010ae 6766
2d21ac55 6767#if CONFIG_FSE
6d2010ae 6768 fse_info finfo;
b0d623f7 6769
6d2010ae
A
6770 need_event = need_fsevent(FSE_DELETE, dvp);
6771 if (need_event) {
6772 if (!batched) {
2d21ac55 6773 get_fse_info(vp, &finfo, ctx);
6d2010ae
A
6774 } else {
6775 error = vfs_get_notify_attributes(&va);
6776 if (error) {
6777 goto out;
6778 }
6779
6780 vap = &va;
2d21ac55 6781 }
6d2010ae 6782 }
2d21ac55 6783#endif
6d2010ae
A
6784 has_listeners = kauth_authorize_fileop_has_listeners();
6785 if (need_event || has_listeners) {
6786 if (path == NULL) {
2d21ac55
A
6787 GET_PATH(path);
6788 if (path == NULL) {
6789 error = ENOMEM;
6790 goto out;
6791 }
6d2010ae 6792 }
b0d623f7 6793
6d2010ae 6794 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated);
b0d623f7 6795#if CONFIG_FSE
6d2010ae
A
6796 if (truncated) {
6797 finfo.mode |= FSE_TRUNCATED_PATH;
2d21ac55 6798 }
6d2010ae
A
6799#endif
6800 }
91447636 6801
6d2010ae
A
6802 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
6803 nd.ni_vp = vp;
6804 if (vp == NULLVP) {
6805 /* Couldn't find a vnode */
6806 goto out;
6807 }
2d21ac55 6808
6d2010ae
A
6809 if (error == EKEEPLOOKING) {
6810 goto continue_lookup;
6811 }
39236c6e 6812#if CONFIG_APPLEDOUBLE
6d2010ae
A
6813 /*
6814 * Special case to remove orphaned AppleDouble
6815 * files. I don't like putting this in the kernel,
6816 * but carbon does not like putting this in carbon either,
6817 * so here we are.
6818 */
6819 if (error == ENOTEMPTY) {
6820 error = rmdir_remove_orphaned_appleDouble(vp, ctx, &restart_flag);
6821 if (error == EBUSY) {
6822 goto out;
2d21ac55
A
6823 }
6824
6d2010ae 6825
2d21ac55 6826 /*
6d2010ae 6827 * Assuming everything went well, we will try the RMDIR again
2d21ac55 6828 */
6d2010ae
A
6829 if (!error)
6830 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
6831 }
39236c6e 6832#endif /* CONFIG_APPLEDOUBLE */
6d2010ae
A
6833 /*
6834 * Call out to allow 3rd party notification of delete.
6835 * Ignore result of kauth_authorize_fileop call.
6836 */
6837 if (!error) {
6838 if (has_listeners) {
6839 kauth_authorize_fileop(vfs_context_ucred(ctx),
6840 KAUTH_FILEOP_DELETE,
6841 (uintptr_t)vp,
6842 (uintptr_t)path);
6843 }
6844
6845 if (vp->v_flag & VISHARDLINK) {
6846 // see the comment in unlink1() about why we update
6847 // the parent of a hard link when it is removed
6848 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
6849 }
2d21ac55
A
6850
6851#if CONFIG_FSE
6d2010ae
A
6852 if (need_event) {
6853 if (vap) {
6854 vnode_get_fse_info_from_vap(vp, &finfo, vap);
2d21ac55 6855 }
6d2010ae
A
6856 add_fsevent(FSE_DELETE, ctx,
6857 FSE_ARG_STRING, len, path,
6858 FSE_ARG_FINFO, &finfo,
6859 FSE_ARG_DONE);
2d21ac55 6860 }
6d2010ae 6861#endif
2d21ac55
A
6862 }
6863
6864out:
6d2010ae
A
6865 if (path != NULL) {
6866 RELEASE_PATH(path);
6867 path = NULL;
6868 }
2d21ac55
A
6869 /*
6870 * nameidone has to happen before we vnode_put(dvp)
6871 * since it may need to release the fs_nodelock on the dvp
6872 */
6873 nameidone(&nd);
2d21ac55 6874 vnode_put(dvp);
6d2010ae
A
6875
6876 if (vp)
6877 vnode_put(vp);
2d21ac55
A
6878
6879 if (restart_flag == 0) {
6880 wakeup_one((caddr_t)vp);
6881 return (error);
6882 }
6883 tsleep(vp, PVFS, "rm AD", 1);
6884
6885 } while (restart_flag != 0);
91447636 6886
1c79356b 6887 return (error);
2d21ac55 6888
1c79356b 6889}
91447636 6890
2d21ac55
A
6891/* Get direntry length padded to 8 byte alignment */
6892#define DIRENT64_LEN(namlen) \
6893 ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
6894
6895static errno_t
6896vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag,
6897 int *numdirent, vfs_context_t ctxp)
6898{
6899 /* Check if fs natively supports VNODE_READDIR_EXTENDED */
6d2010ae
A
6900 if ((vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED) &&
6901 ((vp->v_mount->mnt_kern_flag & MNTK_DENY_READDIREXT) == 0)) {
2d21ac55
A
6902 return VNOP_READDIR(vp, uio, flags, eofflag, numdirent, ctxp);
6903 } else {
6904 size_t bufsize;
6905 void * bufptr;
6906 uio_t auio;
15129b1c 6907 struct direntry *entry64;
2d21ac55
A
6908 struct dirent *dep;
6909 int bytesread;
6910 int error;
6911
6912 /*
6913 * Our kernel buffer needs to be smaller since re-packing
6914 * will expand each dirent. The worse case (when the name
6915 * length is 3) corresponds to a struct direntry size of 32
6916 * bytes (8-byte aligned) and a struct dirent size of 12 bytes
6917 * (4-byte aligned). So having a buffer that is 3/8 the size
6918 * will prevent us from reading more than we can pack.
6919 *
6920 * Since this buffer is wired memory, we will limit the
6921 * buffer size to a maximum of 32K. We would really like to
6922 * use 32K in the MIN(), but we use magic number 87371 to
6923 * prevent uio_resid() * 3 / 8 from overflowing.
6924 */
316670eb 6925 bufsize = 3 * MIN((user_size_t)uio_resid(uio), 87371u) / 8;
2d21ac55 6926 MALLOC(bufptr, void *, bufsize, M_TEMP, M_WAITOK);
b0d623f7
A
6927 if (bufptr == NULL) {
6928 return ENOMEM;
6929 }
2d21ac55 6930
b0d623f7 6931 auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
2d21ac55
A
6932 uio_addiov(auio, (uintptr_t)bufptr, bufsize);
6933 auio->uio_offset = uio->uio_offset;
6934
6935 error = VNOP_READDIR(vp, auio, 0, eofflag, numdirent, ctxp);
6936
6937 dep = (struct dirent *)bufptr;
6938 bytesread = bufsize - uio_resid(auio);
6939
15129b1c
A
6940 MALLOC(entry64, struct direntry *, sizeof(struct direntry),
6941 M_TEMP, M_WAITOK);
2d21ac55
A
6942 /*
6943 * Convert all the entries and copy them out to user's buffer.
6944 */
6945 while (error == 0 && (char *)dep < ((char *)bufptr + bytesread)) {
15129b1c
A
6946 size_t enbufsize = DIRENT64_LEN(dep->d_namlen);
6947
6948 bzero(entry64, enbufsize);
2d21ac55 6949 /* Convert a dirent to a dirent64. */
15129b1c
A
6950 entry64->d_ino = dep->d_ino;
6951 entry64->d_seekoff = 0;
6952 entry64->d_reclen = enbufsize;
6953 entry64->d_namlen = dep->d_namlen;
6954 entry64->d_type = dep->d_type;
6955 bcopy(dep->d_name, entry64->d_name, dep->d_namlen + 1);
2d21ac55
A
6956
6957 /* Move to next entry. */
6958 dep = (struct dirent *)((char *)dep + dep->d_reclen);
6959
6960 /* Copy entry64 to user's buffer. */
15129b1c 6961 error = uiomove((caddr_t)entry64, entry64->d_reclen, uio);
2d21ac55
A
6962 }
6963
6964 /* Update the real offset using the offset we got from VNOP_READDIR. */
6965 if (error == 0) {
6966 uio->uio_offset = auio->uio_offset;
6967 }
6968 uio_free(auio);
6969 FREE(bufptr, M_TEMP);
15129b1c 6970 FREE(entry64, M_TEMP);
2d21ac55
A
6971 return (error);
6972 }
6973}
1c79356b 6974
39236c6e
A
6975#define GETDIRENTRIES_MAXBUFSIZE (128 * 1024 * 1024U)
6976
1c79356b
A
6977/*
6978 * Read a block of directory entries in a file system independent format.
6979 */
2d21ac55
A
6980static int
6981getdirentries_common(int fd, user_addr_t bufp, user_size_t bufsize, ssize_t *bytesread,
6982 off_t *offset, int flags)
1c79356b 6983{
2d21ac55
A
6984 vnode_t vp;
6985 struct vfs_context context = *vfs_context_current(); /* local copy */
91447636
A
6986 struct fileproc *fp;
6987 uio_t auio;
2d21ac55
A
6988 int spacetype = proc_is64bit(vfs_context_proc(&context)) ? UIO_USERSPACE64 : UIO_USERSPACE32;
6989 off_t loff;
6990 int error, eofflag, numdirent;
91447636 6991 char uio_buf[ UIO_SIZEOF(1) ];
1c79356b 6992
2d21ac55
A
6993 error = fp_getfvp(vfs_context_proc(&context), fd, &fp, &vp);
6994 if (error) {
1c79356b 6995 return (error);
2d21ac55 6996 }
91447636
A
6997 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
6998 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
6999 error = EBADF;
7000 goto out;
7001 }
2d21ac55 7002
39236c6e
A
7003 if (bufsize > GETDIRENTRIES_MAXBUFSIZE)
7004 bufsize = GETDIRENTRIES_MAXBUFSIZE;
7005
2d21ac55
A
7006#if CONFIG_MACF
7007 error = mac_file_check_change_offset(vfs_context_ucred(&context), fp->f_fglob);
7008 if (error)
7009 goto out;
7010#endif
91447636
A
7011 if ( (error = vnode_getwithref(vp)) ) {
7012 goto out;
7013 }
91447636 7014 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
55e303ae 7015
1c79356b 7016unionread:
91447636
A
7017 if (vp->v_type != VDIR) {
7018 (void)vnode_put(vp);
7019 error = EINVAL;
7020 goto out;
7021 }
2d21ac55
A
7022
7023#if CONFIG_MACF
7024 error = mac_vnode_check_readdir(&context, vp);
7025 if (error != 0) {
7026 (void)vnode_put(vp);
7027 goto out;
7028 }
7029#endif /* MAC */
91447636
A
7030
7031 loff = fp->f_fglob->fg_offset;
2d21ac55
A
7032 auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
7033 uio_addiov(auio, bufp, bufsize);
91447636 7034
2d21ac55
A
7035 if (flags & VNODE_READDIR_EXTENDED) {
7036 error = vnode_readdir64(vp, auio, flags, &eofflag, &numdirent, &context);
7037 fp->f_fglob->fg_offset = uio_offset(auio);
7038 } else {
7039 error = VNOP_READDIR(vp, auio, 0, &eofflag, &numdirent, &context);
7040 fp->f_fglob->fg_offset = uio_offset(auio);
7041 }
91447636
A
7042 if (error) {
7043 (void)vnode_put(vp);
7044 goto out;
7045 }
1c79356b 7046
2d21ac55
A
7047 if ((user_ssize_t)bufsize == uio_resid(auio)){
7048 if (union_dircheckp) {
7049 error = union_dircheckp(&vp, fp, &context);
7050 if (error == -1)
7051 goto unionread;
7052 if (error)
7053 goto out;
1c79356b
A
7054 }
7055
39236c6e 7056 if ((vp->v_mount->mnt_flag & MNT_UNION)) {
2d21ac55 7057 struct vnode *tvp = vp;
39236c6e
A
7058 if (lookup_traverse_union(tvp, &vp, &context) == 0) {
7059 vnode_ref(vp);
7060 fp->f_fglob->fg_data = (caddr_t) vp;
7061 fp->f_fglob->fg_offset = 0;
7062 vnode_rele(tvp);
7063 vnode_put(tvp);
7064 goto unionread;
7065 }
7066 vp = tvp;
1c79356b
A
7067 }
7068 }
2d21ac55 7069
91447636 7070 vnode_put(vp);
2d21ac55
A
7071 if (offset) {
7072 *offset = loff;
7073 }
b0d623f7 7074
2d21ac55 7075 *bytesread = bufsize - uio_resid(auio);
91447636
A
7076out:
7077 file_drop(fd);
1c79356b
A
7078 return (error);
7079}
7080
2d21ac55
A
7081
7082int
b0d623f7 7083getdirentries(__unused struct proc *p, struct getdirentries_args *uap, int32_t *retval)
2d21ac55
A
7084{
7085 off_t offset;
2d21ac55
A
7086 ssize_t bytesread;
7087 int error;
7088
7089 AUDIT_ARG(fd, uap->fd);
7090 error = getdirentries_common(uap->fd, uap->buf, uap->count, &bytesread, &offset, 0);
7091
7092 if (error == 0) {
b0d623f7
A
7093 if (proc_is64bit(p)) {
7094 user64_long_t base = (user64_long_t)offset;
7095 error = copyout((caddr_t)&base, uap->basep, sizeof(user64_long_t));
7096 } else {
7097 user32_long_t base = (user32_long_t)offset;
7098 error = copyout((caddr_t)&base, uap->basep, sizeof(user32_long_t));
7099 }
2d21ac55
A
7100 *retval = bytesread;
7101 }
7102 return (error);
7103}
7104
7105int
7106getdirentries64(__unused struct proc *p, struct getdirentries64_args *uap, user_ssize_t *retval)
7107{
7108 off_t offset;
7109 ssize_t bytesread;
7110 int error;
7111
7112 AUDIT_ARG(fd, uap->fd);
7113 error = getdirentries_common(uap->fd, uap->buf, uap->bufsize, &bytesread, &offset, VNODE_READDIR_EXTENDED);
7114
7115 if (error == 0) {
7116 *retval = bytesread;
7117 error = copyout((caddr_t)&offset, uap->position, sizeof(off_t));
7118 }
7119 return (error);
7120}
7121
7122
1c79356b
A
7123/*
7124 * Set the mode mask for creation of filesystem nodes.
b0d623f7 7125 * XXX implement xsecurity
1c79356b 7126 */
91447636
A
7127#define UMASK_NOXSECURITY (void *)1 /* leave existing xsecurity alone */
7128static int
b0d623f7 7129umask1(proc_t p, int newmask, __unused kauth_filesec_t fsec, int32_t *retval)
1c79356b 7130{
2d21ac55 7131 struct filedesc *fdp;
1c79356b 7132
91447636 7133 AUDIT_ARG(mask, newmask);
2d21ac55 7134 proc_fdlock(p);
1c79356b
A
7135 fdp = p->p_fd;
7136 *retval = fdp->fd_cmask;
91447636 7137 fdp->fd_cmask = newmask & ALLPERMS;
2d21ac55 7138 proc_fdunlock(p);
1c79356b
A
7139 return (0);
7140}
7141
b0d623f7
A
7142/*
7143 * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL).
7144 *
7145 * Parameters: p Process requesting to set the umask
7146 * uap User argument descriptor (see below)
7147 * retval umask of the process (parameter p)
7148 *
7149 * Indirect: uap->newmask umask to set
7150 * uap->xsecurity ACL to set
7151 *
7152 * Returns: 0 Success
7153 * !0 Not success
7154 *
7155 */
7156int
7157umask_extended(proc_t p, struct umask_extended_args *uap, int32_t *retval)
91447636
A
7158{
7159 int ciferror;
7160 kauth_filesec_t xsecdst;
7161
7162 xsecdst = KAUTH_FILESEC_NONE;
7163 if (uap->xsecurity != USER_ADDR_NULL) {
7164 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
7165 return ciferror;
7166 } else {
7167 xsecdst = KAUTH_FILESEC_NONE;
7168 }
7169
7170 ciferror = umask1(p, uap->newmask, xsecdst, retval);
7171
7172 if (xsecdst != KAUTH_FILESEC_NONE)
7173 kauth_filesec_free(xsecdst);
7174 return ciferror;
7175}
7176
7177int
b0d623f7 7178umask(proc_t p, struct umask_args *uap, int32_t *retval)
91447636
A
7179{
7180 return(umask1(p, uap->newmask, UMASK_NOXSECURITY, retval));
7181}
7182
1c79356b
A
7183/*
7184 * Void all references to file by ripping underlying filesystem
7185 * away from vnode.
7186 */
1c79356b
A
7187/* ARGSUSED */
7188int
b0d623f7 7189revoke(proc_t p, struct revoke_args *uap, __unused int32_t *retval)
1c79356b 7190{
2d21ac55 7191 vnode_t vp;
91447636 7192 struct vnode_attr va;
2d21ac55 7193 vfs_context_t ctx = vfs_context_current();
1c79356b
A
7194 int error;
7195 struct nameidata nd;
7196
6d2010ae
A
7197 NDINIT(&nd, LOOKUP, OP_REVOKE, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
7198 uap->path, ctx);
55e303ae
A
7199 error = namei(&nd);
7200 if (error)
1c79356b
A
7201 return (error);
7202 vp = nd.ni_vp;
91447636
A
7203
7204 nameidone(&nd);
7205
b0d623f7
A
7206 if (!(vnode_ischr(vp) || vnode_isblk(vp))) {
7207 error = ENOTSUP;
7208 goto out;
7209 }
7210
7211 if (vnode_isblk(vp) && vnode_ismountedon(vp)) {
7212 error = EBUSY;
7213 goto out;
7214 }
7215
2d21ac55
A
7216#if CONFIG_MACF
7217 error = mac_vnode_check_revoke(ctx, vp);
7218 if (error)
7219 goto out;
7220#endif
7221
91447636
A
7222 VATTR_INIT(&va);
7223 VATTR_WANTED(&va, va_uid);
2d21ac55 7224 if ((error = vnode_getattr(vp, &va, ctx)))
1c79356b 7225 goto out;
2d21ac55
A
7226 if (kauth_cred_getuid(vfs_context_ucred(ctx)) != va.va_uid &&
7227 (error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
1c79356b 7228 goto out;
b0d623f7 7229 if (vp->v_usecount > 0 || (vnode_isaliased(vp)))
2d21ac55 7230 VNOP_REVOKE(vp, REVOKEALL, ctx);
1c79356b 7231out:
91447636 7232 vnode_put(vp);
1c79356b
A
7233 return (error);
7234}
7235
0b4e3aa0 7236
1c79356b
A
7237/*
7238 * HFS/HFS PlUS SPECIFIC SYSTEM CALLS
9bccf70c 7239 * The following system calls are designed to support features
1c79356b
A
7240 * which are specific to the HFS & HFS Plus volume formats
7241 */
7242
9bccf70c 7243
1c79356b 7244/*
39236c6e
A
7245 * Obtain attribute information on objects in a directory while enumerating
7246 * the directory.
7247 */
1c79356b
A
7248/* ARGSUSED */
7249int
b0d623f7 7250getdirentriesattr (proc_t p, struct getdirentriesattr_args *uap, int32_t *retval)
1c79356b 7251{
2d21ac55 7252 vnode_t vp;
91447636
A
7253 struct fileproc *fp;
7254 uio_t auio = NULL;
7255 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
39236c6e 7256 uint32_t count, savecount;
2d21ac55 7257 uint32_t newstate;
91447636 7258 int error, eofflag;
2d21ac55 7259 uint32_t loff;
91447636 7260 struct attrlist attributelist;
2d21ac55 7261 vfs_context_t ctx = vfs_context_current();
91447636
A
7262 int fd = uap->fd;
7263 char uio_buf[ UIO_SIZEOF(1) ];
7264 kauth_action_t action;
7265
7266 AUDIT_ARG(fd, fd);
7267
7268 /* Get the attributes into kernel space */
2d21ac55 7269 if ((error = copyin(uap->alist, (caddr_t)&attributelist, sizeof(attributelist)))) {
91447636 7270 return(error);
2d21ac55
A
7271 }
7272 if ((error = copyin(uap->count, (caddr_t)&count, sizeof(count)))) {
7273 return(error);
7274 }
39236c6e 7275 savecount = count;
2d21ac55 7276 if ( (error = fp_getfvp(p, fd, &fp, &vp)) ) {
91447636 7277 return (error);
2d21ac55 7278 }
91447636
A
7279 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
7280 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
7281 error = EBADF;
7282 goto out;
7283 }
2d21ac55
A
7284
7285
7286#if CONFIG_MACF
7287 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
7288 fp->f_fglob);
7289 if (error)
7290 goto out;
7291#endif
7292
7293
91447636
A
7294 if ( (error = vnode_getwithref(vp)) )
7295 goto out;
55e303ae 7296
91447636 7297 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
1c79356b 7298
39236c6e 7299unionread:
91447636
A
7300 if (vp->v_type != VDIR) {
7301 (void)vnode_put(vp);
7302 error = EINVAL;
7303 goto out;
7304 }
55e303ae 7305
2d21ac55
A
7306#if CONFIG_MACF
7307 error = mac_vnode_check_readdir(ctx, vp);
7308 if (error != 0) {
7309 (void)vnode_put(vp);
7310 goto out;
7311 }
7312#endif /* MAC */
7313
91447636
A
7314 /* set up the uio structure which will contain the users return buffer */
7315 loff = fp->f_fglob->fg_offset;
39236c6e 7316 auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
91447636
A
7317 uio_addiov(auio, uap->buffer, uap->buffersize);
7318
91447636
A
7319 /*
7320 * If the only item requested is file names, we can let that past with
7321 * just LIST_DIRECTORY. If they want any other attributes, that means
7322 * they need SEARCH as well.
7323 */
7324 action = KAUTH_VNODE_LIST_DIRECTORY;
7325 if ((attributelist.commonattr & ~ATTR_CMN_NAME) ||
7326 attributelist.fileattr || attributelist.dirattr)
7327 action |= KAUTH_VNODE_SEARCH;
7328
2d21ac55 7329 if ((error = vnode_authorize(vp, NULL, action, ctx)) == 0) {
2d21ac55 7330
b0d623f7
A
7331 /* Believe it or not, uap->options only has 32-bits of valid
7332 * info, so truncate before extending again */
39236c6e
A
7333
7334 error = VNOP_READDIRATTR(vp, &attributelist, auio, count,
7335 (u_long)(uint32_t)uap->options, &newstate, &eofflag, &count, ctx);
7336 }
7337
7338 if (error) {
7339 (void) vnode_put(vp);
7340 goto out;
7341 }
7342
7343 /*
7344 * If we've got the last entry of a directory in a union mount
7345 * then reset the eofflag and pretend there's still more to come.
7346 * The next call will again set eofflag and the buffer will be empty,
7347 * so traverse to the underlying directory and do the directory
7348 * read there.
7349 */
7350 if (eofflag && vp->v_mount->mnt_flag & MNT_UNION) {
7351 if (uio_resid(auio) < (user_ssize_t) uap->buffersize) { // Got some entries
7352 eofflag = 0;
7353 } else { // Empty buffer
7354 struct vnode *tvp = vp;
7355 if (lookup_traverse_union(tvp, &vp, ctx) == 0) {
7356 vnode_ref_ext(vp, fp->f_fglob->fg_flag & O_EVTONLY, 0);
7357 fp->f_fglob->fg_data = (caddr_t) vp;
7358 fp->f_fglob->fg_offset = 0; // reset index for new dir
7359 count = savecount;
7360 vnode_rele_internal(tvp, fp->f_fglob->fg_flag & O_EVTONLY, 0, 0);
7361 vnode_put(tvp);
7362 goto unionread;
7363 }
7364 vp = tvp;
7365 }
2d21ac55 7366 }
39236c6e 7367
91447636 7368 (void)vnode_put(vp);
1c79356b 7369
91447636
A
7370 if (error)
7371 goto out;
7372 fp->f_fglob->fg_offset = uio_offset(auio); /* should be multiple of dirent, not variable */
1c79356b 7373
2d21ac55 7374 if ((error = copyout((caddr_t) &count, uap->count, sizeof(count))))
91447636 7375 goto out;
2d21ac55 7376 if ((error = copyout((caddr_t) &newstate, uap->newstate, sizeof(newstate))))
91447636 7377 goto out;
2d21ac55 7378 if ((error = copyout((caddr_t) &loff, uap->basep, sizeof(loff))))
91447636 7379 goto out;
1c79356b
A
7380
7381 *retval = eofflag; /* similar to getdirentries */
91447636 7382 error = 0;
2d21ac55 7383out:
91447636
A
7384 file_drop(fd);
7385 return (error); /* return error earlier, an retval of 0 or 1 now */
1c79356b 7386
39236c6e 7387} /* end of getdirentriesattr system call */
1c79356b
A
7388
7389/*
7390* Exchange data between two files
7391*/
7392
1c79356b
A
7393/* ARGSUSED */
7394int
b0d623f7 7395exchangedata (__unused proc_t p, struct exchangedata_args *uap, __unused int32_t *retval)
1c79356b
A
7396{
7397
7398 struct nameidata fnd, snd;
2d21ac55
A
7399 vfs_context_t ctx = vfs_context_current();
7400 vnode_t fvp;
7401 vnode_t svp;
7402 int error;
b0d623f7 7403 u_int32_t nameiflags;
91447636
A
7404 char *fpath = NULL;
7405 char *spath = NULL;
b0d623f7
A
7406 int flen=0, slen=0;
7407 int from_truncated=0, to_truncated=0;
7408#if CONFIG_FSE
91447636 7409 fse_info f_finfo, s_finfo;
b0d623f7
A
7410#endif
7411
1c79356b
A
7412 nameiflags = 0;
7413 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
7414
6d2010ae
A
7415 NDINIT(&fnd, LOOKUP, OP_EXCHANGEDATA, nameiflags | AUDITVNPATH1,
7416 UIO_USERSPACE, uap->path1, ctx);
1c79356b 7417
6d2010ae
A
7418 error = namei(&fnd);
7419 if (error)
7420 goto out2;
1c79356b 7421
91447636
A
7422 nameidone(&fnd);
7423 fvp = fnd.ni_vp;
1c79356b 7424
6d2010ae
A
7425 NDINIT(&snd, LOOKUP, OP_EXCHANGEDATA, CN_NBMOUNTLOOK | nameiflags | AUDITVNPATH2,
7426 UIO_USERSPACE, uap->path2, ctx);
1c79356b 7427
6d2010ae
A
7428 error = namei(&snd);
7429 if (error) {
91447636 7430 vnode_put(fvp);
55e303ae 7431 goto out2;
6d2010ae 7432 }
91447636 7433 nameidone(&snd);
1c79356b
A
7434 svp = snd.ni_vp;
7435
91447636
A
7436 /*
7437 * if the files are the same, return an inval error
7438 */
1c79356b 7439 if (svp == fvp) {
91447636
A
7440 error = EINVAL;
7441 goto out;
7442 }
1c79356b 7443
91447636
A
7444 /*
7445 * if the files are on different volumes, return an error
7446 */
7447 if (svp->v_mount != fvp->v_mount) {
7448 error = EXDEV;
7449 goto out;
7450 }
2d21ac55 7451
39236c6e
A
7452 /* If they're not files, return an error */
7453 if ( (vnode_isreg(fvp) == 0) || (vnode_isreg(svp) == 0)) {
db609669
A
7454 error = EINVAL;
7455 goto out;
7456 }
7457
2d21ac55
A
7458#if CONFIG_MACF
7459 error = mac_vnode_check_exchangedata(ctx,
7460 fvp, svp);
7461 if (error)
7462 goto out;
7463#endif
7464 if (((error = vnode_authorize(fvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0) ||
7465 ((error = vnode_authorize(svp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0))
91447636 7466 goto out;
1c79356b 7467
2d21ac55
A
7468 if (
7469#if CONFIG_FSE
7470 need_fsevent(FSE_EXCHANGE, fvp) ||
7471#endif
7472 kauth_authorize_fileop_has_listeners()) {
7473 GET_PATH(fpath);
7474 GET_PATH(spath);
7475 if (fpath == NULL || spath == NULL) {
7476 error = ENOMEM;
7477 goto out;
7478 }
b0d623f7
A
7479
7480 flen = safe_getpath(fvp, NULL, fpath, MAXPATHLEN, &from_truncated);
7481 slen = safe_getpath(svp, NULL, spath, MAXPATHLEN, &to_truncated);
7482
2d21ac55
A
7483#if CONFIG_FSE
7484 get_fse_info(fvp, &f_finfo, ctx);
7485 get_fse_info(svp, &s_finfo, ctx);
b0d623f7
A
7486 if (from_truncated || to_truncated) {
7487 // set it here since only the f_finfo gets reported up to user space
7488 f_finfo.mode |= FSE_TRUNCATED_PATH;
7489 }
2d21ac55 7490#endif
91447636 7491 }
1c79356b 7492 /* Ok, make the call */
2d21ac55 7493 error = VNOP_EXCHANGE(fvp, svp, 0, ctx);
55e303ae 7494
91447636 7495 if (error == 0) {
2d21ac55 7496 const char *tmpname;
91447636
A
7497
7498 if (fpath != NULL && spath != NULL) {
7499 /* call out to allow 3rd party notification of exchangedata.
7500 * Ignore result of kauth_authorize_fileop call.
7501 */
2d21ac55 7502 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_EXCHANGE,
91447636
A
7503 (uintptr_t)fpath, (uintptr_t)spath);
7504 }
7505 name_cache_lock();
7506
7507 tmpname = fvp->v_name;
7508 fvp->v_name = svp->v_name;
7509 svp->v_name = tmpname;
7510
7511 if (fvp->v_parent != svp->v_parent) {
2d21ac55 7512 vnode_t tmp;
91447636
A
7513
7514 tmp = fvp->v_parent;
7515 fvp->v_parent = svp->v_parent;
7516 svp->v_parent = tmp;
7517 }
7518 name_cache_unlock();
7519
2d21ac55 7520#if CONFIG_FSE
91447636 7521 if (fpath != NULL && spath != NULL) {
2d21ac55 7522 add_fsevent(FSE_EXCHANGE, ctx,
91447636
A
7523 FSE_ARG_STRING, flen, fpath,
7524 FSE_ARG_FINFO, &f_finfo,
7525 FSE_ARG_STRING, slen, spath,
7526 FSE_ARG_FINFO, &s_finfo,
7527 FSE_ARG_DONE);
7528 }
2d21ac55 7529#endif
55e303ae
A
7530 }
7531
1c79356b 7532out:
2d21ac55
A
7533 if (fpath != NULL)
7534 RELEASE_PATH(fpath);
7535 if (spath != NULL)
7536 RELEASE_PATH(spath);
91447636
A
7537 vnode_put(svp);
7538 vnode_put(fvp);
1c79356b 7539out2:
1c79356b 7540 return (error);
91447636 7541}
1c79356b 7542
39236c6e
A
7543/*
7544 * Return (in MB) the amount of freespace on the given vnode's volume.
7545 */
7546uint32_t freespace_mb(vnode_t vp);
7547
7548uint32_t
7549freespace_mb(vnode_t vp)
7550{
7551 vfs_update_vfsstat(vp->v_mount, vfs_context_current(), VFS_USER_EVENT);
7552 return (((uint64_t)vp->v_mount->mnt_vfsstat.f_bavail *
7553 vp->v_mount->mnt_vfsstat.f_bsize) >> 20);
7554}
7555
316670eb 7556#if CONFIG_SEARCHFS
1c79356b 7557
1c79356b
A
7558/* ARGSUSED */
7559
7560int
b0d623f7 7561searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval)
1c79356b 7562{
39236c6e
A
7563 vnode_t vp, tvp;
7564 int i, error=0;
1c79356b
A
7565 int fserror = 0;
7566 struct nameidata nd;
b0d623f7 7567 struct user64_fssearchblock searchblock;
1c79356b
A
7568 struct searchstate *state;
7569 struct attrlist *returnattrs;
b0d623f7 7570 struct timeval timelimit;
1c79356b 7571 void *searchparams1,*searchparams2;
91447636
A
7572 uio_t auio = NULL;
7573 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
b0d623f7 7574 uint32_t nummatches;
1c79356b 7575 int mallocsize;
b0d623f7 7576 uint32_t nameiflags;
2d21ac55 7577 vfs_context_t ctx = vfs_context_current();
91447636 7578 char uio_buf[ UIO_SIZEOF(1) ];
1c79356b 7579
39236c6e 7580 /* Start by copying in fsearchblock parameter list */
91447636 7581 if (IS_64BIT_PROCESS(p)) {
b0d623f7
A
7582 error = copyin(uap->searchblock, (caddr_t) &searchblock, sizeof(searchblock));
7583 timelimit.tv_sec = searchblock.timelimit.tv_sec;
7584 timelimit.tv_usec = searchblock.timelimit.tv_usec;
91447636
A
7585 }
7586 else {
b0d623f7
A
7587 struct user32_fssearchblock tmp_searchblock;
7588
91447636
A
7589 error = copyin(uap->searchblock, (caddr_t) &tmp_searchblock, sizeof(tmp_searchblock));
7590 // munge into 64-bit version
7591 searchblock.returnattrs = CAST_USER_ADDR_T(tmp_searchblock.returnattrs);
7592 searchblock.returnbuffer = CAST_USER_ADDR_T(tmp_searchblock.returnbuffer);
7593 searchblock.returnbuffersize = tmp_searchblock.returnbuffersize;
7594 searchblock.maxmatches = tmp_searchblock.maxmatches;
b0d623f7
A
7595 /*
7596 * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary
7597 * from a 32 bit long, and tv_usec is already a signed 32 bit int.
7598 */
7599 timelimit.tv_sec = (__darwin_time_t) tmp_searchblock.timelimit.tv_sec;
7600 timelimit.tv_usec = (__darwin_useconds_t) tmp_searchblock.timelimit.tv_usec;
91447636
A
7601 searchblock.searchparams1 = CAST_USER_ADDR_T(tmp_searchblock.searchparams1);
7602 searchblock.sizeofsearchparams1 = tmp_searchblock.sizeofsearchparams1;
7603 searchblock.searchparams2 = CAST_USER_ADDR_T(tmp_searchblock.searchparams2);
7604 searchblock.sizeofsearchparams2 = tmp_searchblock.sizeofsearchparams2;
7605 searchblock.searchattrs = tmp_searchblock.searchattrs;
7606 }
7607 if (error)
1c79356b
A
7608 return(error);
7609
a3d08fcd
A
7610 /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
7611 */
7612 if (searchblock.sizeofsearchparams1 > SEARCHFS_MAX_SEARCHPARMS ||
7613 searchblock.sizeofsearchparams2 > SEARCHFS_MAX_SEARCHPARMS)
7614 return(EINVAL);
91447636 7615
1c79356b
A
7616 /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
7617 /* It all has to do into local memory and it's not that big so we might as well put it all together. */
7618 /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
7619 /* block. */
7620
91447636 7621 mallocsize = searchblock.sizeofsearchparams1 + searchblock.sizeofsearchparams2 +
1c79356b
A
7622 sizeof(struct attrlist) + sizeof(struct searchstate);
7623
7624 MALLOC(searchparams1, void *, mallocsize, M_TEMP, M_WAITOK);
7625
7626 /* Now set up the various pointers to the correct place in our newly allocated memory */
7627
7628 searchparams2 = (void *) (((caddr_t) searchparams1) + searchblock.sizeofsearchparams1);
7629 returnattrs = (struct attrlist *) (((caddr_t) searchparams2) + searchblock.sizeofsearchparams2);
7630 state = (struct searchstate *) (((caddr_t) returnattrs) + sizeof (struct attrlist));
7631
7632 /* Now copy in the stuff given our local variables. */
7633
91447636 7634 if ((error = copyin(searchblock.searchparams1, searchparams1, searchblock.sizeofsearchparams1)))
1c79356b
A
7635 goto freeandexit;
7636
91447636 7637 if ((error = copyin(searchblock.searchparams2, searchparams2, searchblock.sizeofsearchparams2)))
1c79356b
A
7638 goto freeandexit;
7639
91447636 7640 if ((error = copyin(searchblock.returnattrs, (caddr_t) returnattrs, sizeof(struct attrlist))))
1c79356b
A
7641 goto freeandexit;
7642
91447636 7643 if ((error = copyin(uap->state, (caddr_t) state, sizeof(struct searchstate))))
1c79356b 7644 goto freeandexit;
1c79356b 7645
39236c6e
A
7646 /*
7647 * When searching a union mount, need to set the
7648 * start flag at the first call on each layer to
7649 * reset state for the new volume.
7650 */
7651 if (uap->options & SRCHFS_START)
7652 state->ss_union_layer = 0;
7653 else
7654 uap->options |= state->ss_union_flags;
7655 state->ss_union_flags = 0;
b0d623f7
A
7656
7657 /*
7658 * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter,
7659 * which is passed in with an attrreference_t, we need to inspect the buffer manually here.
7660 * The KPI does not provide us the ability to pass in the length of the buffers searchparams1
7661 * and searchparams2. To obviate the need for all searchfs-supporting filesystems to
7662 * validate the user-supplied data offset of the attrreference_t, we'll do it here.
7663 */
7664
7665 if (searchblock.searchattrs.commonattr & ATTR_CMN_NAME) {
7666 attrreference_t* string_ref;
7667 u_int32_t* start_length;
7668 user64_size_t param_length;
7669
7670 /* validate searchparams1 */
7671 param_length = searchblock.sizeofsearchparams1;
7672 /* skip the word that specifies length of the buffer */
7673 start_length= (u_int32_t*) searchparams1;
7674 start_length= start_length+1;
7675 string_ref= (attrreference_t*) start_length;
7676
7677 /* ensure no negative offsets or too big offsets */
7678 if (string_ref->attr_dataoffset < 0 ) {
7679 error = EINVAL;
7680 goto freeandexit;
7681 }
7682 if (string_ref->attr_length > MAXPATHLEN) {
7683 error = EINVAL;
7684 goto freeandexit;
7685 }
7686
7687 /* Check for pointer overflow in the string ref */
7688 if (((char*) string_ref + string_ref->attr_dataoffset) < (char*) string_ref) {
7689 error = EINVAL;
7690 goto freeandexit;
7691 }
7692
7693 if (((char*) string_ref + string_ref->attr_dataoffset) > ((char*)searchparams1 + param_length)) {
7694 error = EINVAL;
7695 goto freeandexit;
7696 }
7697 if (((char*)string_ref + string_ref->attr_dataoffset + string_ref->attr_length) > ((char*)searchparams1 + param_length)) {
7698 error = EINVAL;
7699 goto freeandexit;
7700 }
7701 }
7702
7703 /* set up the uio structure which will contain the users return buffer */
39236c6e
A
7704 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
7705 uio_addiov(auio, searchblock.returnbuffer, searchblock.returnbuffersize);
1c79356b 7706
91447636 7707 nameiflags = 0;
1c79356b 7708 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
6d2010ae
A
7709 NDINIT(&nd, LOOKUP, OP_SEARCHFS, nameiflags | AUDITVNPATH1,
7710 UIO_USERSPACE, uap->path, ctx);
1c79356b 7711
55e303ae
A
7712 error = namei(&nd);
7713 if (error)
1c79356b 7714 goto freeandexit;
39236c6e 7715 vp = nd.ni_vp;
91447636 7716 nameidone(&nd);
39236c6e
A
7717
7718 /*
7719 * Switch to the root vnode for the volume
7720 */
7721 error = VFS_ROOT(vnode_mount(vp), &tvp, ctx);
7722 if (error)
7723 goto freeandexit;
7724 vnode_put(vp);
7725 vp = tvp;
7726
7727 /*
7728 * If it's a union mount, the path lookup takes
7729 * us to the top layer. But we may need to descend
7730 * to a lower layer. For non-union mounts the layer
7731 * is always zero.
7732 */
7733 for (i = 0; i < (int) state->ss_union_layer; i++) {
7734 if ((vp->v_mount->mnt_flag & MNT_UNION) == 0)
7735 break;
7736 tvp = vp;
7737 vp = vp->v_mount->mnt_vnodecovered;
7738 if (vp == NULL) {
7739 vp = tvp;
7740 error = ENOENT;
7741 goto freeandexit;
7742 }
7743 vnode_getwithref(vp);
7744 vnode_put(tvp);
7745 }
1c79356b 7746
6d2010ae
A
7747#if CONFIG_MACF
7748 error = mac_vnode_check_searchfs(ctx, vp, &searchblock.searchattrs);
7749 if (error) {
7750 vnode_put(vp);
7751 goto freeandexit;
7752 }
7753#endif
7754
1c79356b
A
7755
7756 /*
7757 * If searchblock.maxmatches == 0, then skip the search. This has happened
39236c6e 7758 * before and sometimes the underlying code doesnt deal with it well.
1c79356b
A
7759 */
7760 if (searchblock.maxmatches == 0) {
7761 nummatches = 0;
7762 goto saveandexit;
7763 }
7764
7765 /*
39236c6e
A
7766 * Allright, we have everything we need, so lets make that call.
7767 *
7768 * We keep special track of the return value from the file system:
7769 * EAGAIN is an acceptable error condition that shouldn't keep us
7770 * from copying out any results...
1c79356b
A
7771 */
7772
6d2010ae 7773 fserror = VNOP_SEARCHFS(vp,
39236c6e
A
7774 searchparams1,
7775 searchparams2,
7776 &searchblock.searchattrs,
7777 (u_long)searchblock.maxmatches,
7778 &timelimit,
7779 returnattrs,
7780 &nummatches,
7781 (u_long)uap->scriptcode,
7782 (u_long)uap->options,
7783 auio,
7784 (struct searchstate *) &state->ss_fsstate,
7785 ctx);
6d2010ae 7786
39236c6e
A
7787 /*
7788 * If it's a union mount we need to be called again
7789 * to search the mounted-on filesystem.
7790 */
7791 if ((vp->v_mount->mnt_flag & MNT_UNION) && fserror == 0) {
7792 state->ss_union_flags = SRCHFS_START;
7793 state->ss_union_layer++; // search next layer down
7794 fserror = EAGAIN;
7795 }
7796
6d2010ae
A
7797saveandexit:
7798
7799 vnode_put(vp);
7800
7801 /* Now copy out the stuff that needs copying out. That means the number of matches, the
7802 search state. Everything was already put into he return buffer by the vop call. */
7803
7804 if ((error = copyout((caddr_t) state, uap->state, sizeof(struct searchstate))) != 0)
7805 goto freeandexit;
7806
39236c6e 7807 if ((error = suulong(uap->nummatches, (uint64_t)nummatches)) != 0)
6d2010ae
A
7808 goto freeandexit;
7809
7810 error = fserror;
7811
7812freeandexit:
7813
7814 FREE(searchparams1,M_TEMP);
7815
7816 return(error);
7817
7818
7819} /* end of searchfs system call */
7820
316670eb
A
7821#else /* CONFIG_SEARCHFS */
7822
7823int
7824searchfs(__unused proc_t p, __unused struct searchfs_args *uap, __unused int32_t *retval)
7825{
7826 return (ENOTSUP);
7827}
7828
7829#endif /* CONFIG_SEARCHFS */
6d2010ae
A
7830
7831
7832lck_grp_attr_t * nspace_group_attr;
7833lck_attr_t * nspace_lock_attr;
7834lck_grp_t * nspace_mutex_group;
7835
7836lck_mtx_t nspace_handler_lock;
7837lck_mtx_t nspace_handler_exclusion_lock;
7838
7839time_t snapshot_timestamp=0;
7840int nspace_allow_virtual_devs=0;
7841
7842void nspace_handler_init(void);
7843
7844typedef struct nspace_item_info {
7845 struct vnode *vp;
7846 void *arg;
7847 uint64_t op;
7848 uint32_t vid;
7849 uint32_t flags;
7850 uint32_t token;
7851 uint32_t refcount;
7852} nspace_item_info;
7853
7854#define MAX_NSPACE_ITEMS 128
7855nspace_item_info nspace_items[MAX_NSPACE_ITEMS];
7856uint32_t nspace_item_idx=0; // also used as the sleep/wakeup rendezvous address
7857uint32_t nspace_token_id=0;
7858uint32_t nspace_handler_timeout = 15; // seconds
7859
7860#define NSPACE_ITEM_NEW 0x0001
7861#define NSPACE_ITEM_PROCESSING 0x0002
7862#define NSPACE_ITEM_DEAD 0x0004
7863#define NSPACE_ITEM_CANCELLED 0x0008
7864#define NSPACE_ITEM_DONE 0x0010
7865#define NSPACE_ITEM_RESET_TIMER 0x0020
7866
7867#define NSPACE_ITEM_NSPACE_EVENT 0x0040
7868#define NSPACE_ITEM_SNAPSHOT_EVENT 0x0080
7869#define NSPACE_ITEM_TRACK_EVENT 0x0100
7870
7871#define NSPACE_ITEM_ALL_EVENT_TYPES (NSPACE_ITEM_NSPACE_EVENT | NSPACE_ITEM_SNAPSHOT_EVENT | NSPACE_ITEM_TRACK_EVENT)
7872
7873//#pragma optimization_level 0
7874
7875typedef enum {
7876 NSPACE_HANDLER_NSPACE = 0,
7877 NSPACE_HANDLER_SNAPSHOT = 1,
7878 NSPACE_HANDLER_TRACK = 2,
7879
7880 NSPACE_HANDLER_COUNT,
7881} nspace_type_t;
7882
7883typedef struct {
7884 uint64_t handler_tid;
7885 struct proc *handler_proc;
7886 int handler_busy;
7887} nspace_handler_t;
7888
7889nspace_handler_t nspace_handlers[NSPACE_HANDLER_COUNT];
7890
39236c6e
A
7891/* namespace fsctl functions */
7892static int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type);
7893static int nspace_item_flags_for_type(nspace_type_t nspace_type);
7894static int nspace_open_flags_for_type(nspace_type_t nspace_type);
7895static nspace_type_t nspace_type_for_op(uint64_t op);
7896static int nspace_is_special_process(struct proc *proc);
7897static int vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx);
7898static int wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type);
7899static int validate_namespace_args (int is64bit, int size);
7900static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data);
7901
7902
6d2010ae
A
7903static inline int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type)
7904{
7905 switch(nspace_type) {
7906 case NSPACE_HANDLER_NSPACE:
7907 return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_NSPACE_EVENT;
7908 case NSPACE_HANDLER_SNAPSHOT:
7909 return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_SNAPSHOT_EVENT;
7910 case NSPACE_HANDLER_TRACK:
7911 return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_TRACK_EVENT;
7912 default:
7913 printf("nspace_flags_matches_handler: invalid type %u\n", (int)nspace_type);
7914 return 0;
7915 }
7916}
7917
7918static inline int nspace_item_flags_for_type(nspace_type_t nspace_type)
7919{
7920 switch(nspace_type) {
7921 case NSPACE_HANDLER_NSPACE:
7922 return NSPACE_ITEM_NSPACE_EVENT;
7923 case NSPACE_HANDLER_SNAPSHOT:
7924 return NSPACE_ITEM_SNAPSHOT_EVENT;
7925 case NSPACE_HANDLER_TRACK:
7926 return NSPACE_ITEM_TRACK_EVENT;
7927 default:
7928 printf("nspace_item_flags_for_type: invalid type %u\n", (int)nspace_type);
7929 return 0;
7930 }
7931}
7932
7933static inline int nspace_open_flags_for_type(nspace_type_t nspace_type)
7934{
7935 switch(nspace_type) {
7936 case NSPACE_HANDLER_NSPACE:
7937 return FREAD | FWRITE | O_EVTONLY;
7938 case NSPACE_HANDLER_SNAPSHOT:
7939 case NSPACE_HANDLER_TRACK:
7940 return FREAD | O_EVTONLY;
7941 default:
7942 printf("nspace_open_flags_for_type: invalid type %u\n", (int)nspace_type);
7943 return 0;
7944 }
7945}
7946
7947static inline nspace_type_t nspace_type_for_op(uint64_t op)
7948{
7949 switch(op & NAMESPACE_HANDLER_EVENT_TYPE_MASK) {
7950 case NAMESPACE_HANDLER_NSPACE_EVENT:
7951 return NSPACE_HANDLER_NSPACE;
7952 case NAMESPACE_HANDLER_SNAPSHOT_EVENT:
7953 return NSPACE_HANDLER_SNAPSHOT;
7954 case NAMESPACE_HANDLER_TRACK_EVENT:
7955 return NSPACE_HANDLER_TRACK;
7956 default:
7957 printf("nspace_type_for_op: invalid op mask %llx\n", op & NAMESPACE_HANDLER_EVENT_TYPE_MASK);
7958 return NSPACE_HANDLER_NSPACE;
7959 }
7960}
7961
7962static inline int nspace_is_special_process(struct proc *proc)
7963{
7964 int i;
7965 for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
7966 if (proc == nspace_handlers[i].handler_proc)
7967 return 1;
7968 }
7969 return 0;
7970}
7971
7972void
7973nspace_handler_init(void)
7974{
7975 nspace_lock_attr = lck_attr_alloc_init();
7976 nspace_group_attr = lck_grp_attr_alloc_init();
7977 nspace_mutex_group = lck_grp_alloc_init("nspace-mutex", nspace_group_attr);
7978 lck_mtx_init(&nspace_handler_lock, nspace_mutex_group, nspace_lock_attr);
7979 lck_mtx_init(&nspace_handler_exclusion_lock, nspace_mutex_group, nspace_lock_attr);
7980 memset(&nspace_items[0], 0, sizeof(nspace_items));
7981}
7982
7983void
7984nspace_proc_exit(struct proc *p)
7985{
7986 int i, event_mask = 0;
7987
7988 for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
7989 if (p == nspace_handlers[i].handler_proc) {
7990 event_mask |= nspace_item_flags_for_type(i);
7991 nspace_handlers[i].handler_tid = 0;
7992 nspace_handlers[i].handler_proc = NULL;
7993 }
7994 }
7995
7996 if (event_mask == 0) {
7997 return;
7998 }
7999
8000 if (event_mask & NSPACE_ITEM_SNAPSHOT_EVENT) {
8001 // if this process was the snapshot handler, zero snapshot_timeout
8002 snapshot_timestamp = 0;
8003 }
8004
8005 //
8006 // unblock anyone that's waiting for the handler that died
8007 //
8008 lck_mtx_lock(&nspace_handler_lock);
8009 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8010 if (nspace_items[i].flags & (NSPACE_ITEM_NEW | NSPACE_ITEM_PROCESSING)) {
8011
8012 if ( nspace_items[i].flags & event_mask ) {
8013
8014 if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
8015 vnode_lock_spin(nspace_items[i].vp);
8016 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
8017 vnode_unlock(nspace_items[i].vp);
8018 }
8019 nspace_items[i].vp = NULL;
8020 nspace_items[i].vid = 0;
8021 nspace_items[i].flags = NSPACE_ITEM_DONE;
8022 nspace_items[i].token = 0;
8023
8024 wakeup((caddr_t)&(nspace_items[i].vp));
8025 }
8026 }
8027 }
8028
8029 wakeup((caddr_t)&nspace_item_idx);
8030 lck_mtx_unlock(&nspace_handler_lock);
8031}
8032
8033
8034int
8035resolve_nspace_item(struct vnode *vp, uint64_t op)
8036{
8037 return resolve_nspace_item_ext(vp, op, NULL);
8038}
8039
8040int
8041resolve_nspace_item_ext(struct vnode *vp, uint64_t op, void *arg)
8042{
8043 int i, error, keep_waiting;
8044 struct timespec ts;
8045 nspace_type_t nspace_type = nspace_type_for_op(op);
8046
8047 // only allow namespace events on regular files, directories and symlinks.
8048 if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
8049 return 0;
8050 }
8051
8052 //
8053 // if this is a snapshot event and the vnode is on a
8054 // disk image just pretend nothing happened since any
8055 // change to the disk image will cause the disk image
8056 // itself to get backed up and this avoids multi-way
8057 // deadlocks between the snapshot handler and the ever
8058 // popular diskimages-helper process. the variable
8059 // nspace_allow_virtual_devs allows this behavior to
8060 // be overridden (for use by the Mobile TimeMachine
8061 // testing infrastructure which uses disk images)
8062 //
8063 if ( (op & NAMESPACE_HANDLER_SNAPSHOT_EVENT)
8064 && (vp->v_mount != NULL)
8065 && (vp->v_mount->mnt_kern_flag & MNTK_VIRTUALDEV)
8066 && !nspace_allow_virtual_devs) {
8067
8068 return 0;
8069 }
8070
8071 // if (thread_tid(current_thread()) == namespace_handler_tid) {
8072 if (nspace_handlers[nspace_type].handler_proc == NULL) {
8073 return 0;
8074 }
8075
8076 if (nspace_is_special_process(current_proc())) {
8077 return EDEADLK;
8078 }
8079
8080 lck_mtx_lock(&nspace_handler_lock);
8081
8082retry:
8083 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8084 if (vp == nspace_items[i].vp && op == nspace_items[i].op) {
8085 break;
8086 }
8087 }
8088
8089 if (i >= MAX_NSPACE_ITEMS) {
8090 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8091 if (nspace_items[i].flags == 0) {
8092 break;
8093 }
8094 }
8095 } else {
8096 nspace_items[i].refcount++;
8097 }
8098
8099 if (i >= MAX_NSPACE_ITEMS) {
8100 ts.tv_sec = nspace_handler_timeout;
8101 ts.tv_nsec = 0;
8102
8103 error = msleep((caddr_t)&nspace_token_id, &nspace_handler_lock, PVFS|PCATCH, "nspace-no-space", &ts);
8104 if (error == 0) {
8105 // an entry got free'd up, go see if we can get a slot
8106 goto retry;
8107 } else {
8108 lck_mtx_unlock(&nspace_handler_lock);
8109 return error;
8110 }
8111 }
8112
8113 //
8114 // if it didn't already exist, add it. if it did exist
8115 // we'll get woken up when someone does a wakeup() on
8116 // the slot in the nspace_items table.
8117 //
8118 if (vp != nspace_items[i].vp) {
8119 nspace_items[i].vp = vp;
39236c6e 8120 nspace_items[i].arg = (arg == NSPACE_REARM_NO_ARG) ? NULL : arg; // arg is {NULL, true, uio *} - only pass uio thru to the user
6d2010ae
A
8121 nspace_items[i].op = op;
8122 nspace_items[i].vid = vnode_vid(vp);
8123 nspace_items[i].flags = NSPACE_ITEM_NEW;
8124 nspace_items[i].flags |= nspace_item_flags_for_type(nspace_type);
8125 if (nspace_items[i].flags & NSPACE_ITEM_SNAPSHOT_EVENT) {
8126 if (arg) {
8127 vnode_lock_spin(vp);
8128 vp->v_flag |= VNEEDSSNAPSHOT;
8129 vnode_unlock(vp);
8130 }
8131 }
8132
8133 nspace_items[i].token = 0;
8134 nspace_items[i].refcount = 1;
8135
8136 wakeup((caddr_t)&nspace_item_idx);
8137 }
8138
8139 //
8140 // Now go to sleep until the handler does a wakeup on this
8141 // slot in the nspace_items table (or we timeout).
8142 //
8143 keep_waiting = 1;
8144 while(keep_waiting) {
8145 ts.tv_sec = nspace_handler_timeout;
8146 ts.tv_nsec = 0;
8147 error = msleep((caddr_t)&(nspace_items[i].vp), &nspace_handler_lock, PVFS|PCATCH, "namespace-done", &ts);
8148
8149 if (nspace_items[i].flags & NSPACE_ITEM_DONE) {
8150 error = 0;
8151 } else if (nspace_items[i].flags & NSPACE_ITEM_CANCELLED) {
8152 error = nspace_items[i].token;
8153 } else if (error == EWOULDBLOCK || error == ETIMEDOUT) {
8154 if (nspace_items[i].flags & NSPACE_ITEM_RESET_TIMER) {
8155 nspace_items[i].flags &= ~NSPACE_ITEM_RESET_TIMER;
8156 continue;
8157 } else {
8158 error = ETIMEDOUT;
8159 }
8160 } else if (error == 0) {
8161 // hmmm, why did we get woken up?
8162 printf("woken up for token %d but it's not done, cancelled or timedout and error == 0.\n",
8163 nspace_items[i].token);
8164 }
8165
8166 if (--nspace_items[i].refcount == 0) {
8167 nspace_items[i].vp = NULL; // clear this so that no one will match on it again
8168 nspace_items[i].arg = NULL;
8169 nspace_items[i].token = 0; // clear this so that the handler will not find it anymore
8170 nspace_items[i].flags = 0; // this clears it for re-use
8171 }
8172 wakeup(&nspace_token_id);
8173 keep_waiting = 0;
8174 }
8175
8176 lck_mtx_unlock(&nspace_handler_lock);
8177
8178 return error;
8179}
8180
8181
8182int
8183get_nspace_item_status(struct vnode *vp, int32_t *status)
8184{
8185 int i;
8186
8187 lck_mtx_lock(&nspace_handler_lock);
8188 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8189 if (nspace_items[i].vp == vp) {
8190 break;
8191 }
8192 }
8193
8194 if (i >= MAX_NSPACE_ITEMS) {
8195 lck_mtx_unlock(&nspace_handler_lock);
8196 return ENOENT;
8197 }
8198
8199 *status = nspace_items[i].flags;
8200 lck_mtx_unlock(&nspace_handler_lock);
8201 return 0;
8202}
8203
8204
8205#if 0
8206static int
8207build_volfs_path(struct vnode *vp, char *path, int *len)
8208{
8209 struct vnode_attr va;
8210 int ret;
8211
8212 VATTR_INIT(&va);
8213 VATTR_WANTED(&va, va_fsid);
8214 VATTR_WANTED(&va, va_fileid);
8215
8216 if (vnode_getattr(vp, &va, vfs_context_kernel()) != 0) {
8217 *len = snprintf(path, *len, "/non/existent/path/because/vnode_getattr/failed") + 1;
8218 ret = -1;
8219 } else {
8220 *len = snprintf(path, *len, "/.vol/%d/%lld", (dev_t)va.va_fsid, va.va_fileid) + 1;
8221 ret = 0;
8222 }
8223
8224 return ret;
8225}
8226#endif
8227
8228//
8229// Note: this function does NOT check permissions on all of the
8230// parent directories leading to this vnode. It should only be
8231// called on behalf of a root process. Otherwise a process may
8232// get access to a file because the file itself is readable even
8233// though its parent directories would prevent access.
8234//
8235static int
8236vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx)
8237{
8238 int error, action;
8239
8240 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
8241 return error;
8242 }
8243
8244#if CONFIG_MACF
8245 error = mac_vnode_check_open(ctx, vp, fmode);
8246 if (error)
8247 return error;
8248#endif
1c79356b 8249
6d2010ae
A
8250 /* compute action to be authorized */
8251 action = 0;
8252 if (fmode & FREAD) {
8253 action |= KAUTH_VNODE_READ_DATA;
8254 }
8255 if (fmode & (FWRITE | O_TRUNC)) {
8256 /*
8257 * If we are writing, appending, and not truncating,
8258 * indicate that we are appending so that if the
8259 * UF_APPEND or SF_APPEND bits are set, we do not deny
8260 * the open.
8261 */
8262 if ((fmode & O_APPEND) && !(fmode & O_TRUNC)) {
8263 action |= KAUTH_VNODE_APPEND_DATA;
8264 } else {
8265 action |= KAUTH_VNODE_WRITE_DATA;
8266 }
8267 }
1c79356b 8268
6d2010ae
A
8269 if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)
8270 return error;
8271
1c79356b 8272
6d2010ae
A
8273 //
8274 // if the vnode is tagged VOPENEVT and the current process
8275 // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY
8276 // flag to the open mode so that this open won't count against
8277 // the vnode when carbon delete() does a vnode_isinuse() to see
8278 // if a file is currently in use. this allows spotlight
8279 // importers to not interfere with carbon apps that depend on
8280 // the no-delete-if-busy semantics of carbon delete().
8281 //
8282 if ((vp->v_flag & VOPENEVT) && (current_proc()->p_flag & P_CHECKOPENEVT)) {
8283 fmode |= O_EVTONLY;
8284 }
1c79356b 8285
6d2010ae
A
8286 if ( (error = VNOP_OPEN(vp, fmode, ctx)) ) {
8287 return error;
8288 }
8289 if ( (error = vnode_ref_ext(vp, fmode, 0)) ) {
8290 VNOP_CLOSE(vp, fmode, ctx);
8291 return error;
8292 }
1c79356b 8293
4b17d6b6 8294 /* Call out to allow 3rd party notification of open.
6d2010ae
A
8295 * Ignore result of kauth_authorize_fileop call.
8296 */
4b17d6b6
A
8297#if CONFIG_MACF
8298 mac_vnode_notify_open(ctx, vp, fmode);
8299#endif
6d2010ae
A
8300 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN,
8301 (uintptr_t)vp, 0);
1c79356b 8302
1c79356b 8303
6d2010ae
A
8304 return 0;
8305}
1c79356b 8306
6d2010ae 8307static int
39236c6e 8308wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type)
6d2010ae
A
8309{
8310 int i, error=0, unblock=0;
8311 task_t curtask;
8312
8313 lck_mtx_lock(&nspace_handler_exclusion_lock);
8314 if (nspace_handlers[nspace_type].handler_busy) {
8315 lck_mtx_unlock(&nspace_handler_exclusion_lock);
8316 return EBUSY;
8317 }
8318 nspace_handlers[nspace_type].handler_busy = 1;
8319 lck_mtx_unlock(&nspace_handler_exclusion_lock);
8320
8321 /*
8322 * Any process that gets here will be one of the namespace handlers.
8323 * As such, they should be prevented from acquiring DMG vnodes during vnode reclamation
8324 * as we can cause deadlocks to occur, because the namespace handler may prevent
8325 * VNOP_INACTIVE from proceeding. Mark the current task as a P_DEPENDENCY_CAPABLE
8326 * process.
8327 */
8328 curtask = current_task();
8329 bsd_set_dependency_capable (curtask);
8330
8331 lck_mtx_lock(&nspace_handler_lock);
8332 if (nspace_handlers[nspace_type].handler_proc == NULL) {
8333 nspace_handlers[nspace_type].handler_tid = thread_tid(current_thread());
8334 nspace_handlers[nspace_type].handler_proc = current_proc();
8335 }
8336
8337 while (error == 0) {
8338
8339 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8340 if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
8341 if (!nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
8342 continue;
8343 }
8344 break;
8345 }
8346 }
8347
8348 if (i < MAX_NSPACE_ITEMS) {
8349 nspace_items[i].flags &= ~NSPACE_ITEM_NEW;
8350 nspace_items[i].flags |= NSPACE_ITEM_PROCESSING;
8351 nspace_items[i].token = ++nspace_token_id;
8352
8353 if (nspace_items[i].vp) {
8354 struct fileproc *fp;
8355 int32_t indx, fmode;
8356 struct proc *p = current_proc();
8357 vfs_context_t ctx = vfs_context_current();
39236c6e
A
8358 struct vnode_attr va;
8359
8360
8361 /*
8362 * Use vnode pointer to acquire a file descriptor for
8363 * hand-off to userland
8364 */
6d2010ae 8365 fmode = nspace_open_flags_for_type(nspace_type);
6d2010ae
A
8366 error = vnode_getwithvid(nspace_items[i].vp, nspace_items[i].vid);
8367 if (error) {
8368 unblock = 1;
8369 break;
8370 }
8371 error = vn_open_with_vp(nspace_items[i].vp, fmode, ctx);
8372 if (error) {
8373 unblock = 1;
8374 vnode_put(nspace_items[i].vp);
8375 break;
8376 }
8377
8378 if ((error = falloc(p, &fp, &indx, ctx))) {
8379 vn_close(nspace_items[i].vp, fmode, ctx);
8380 vnode_put(nspace_items[i].vp);
8381 unblock = 1;
8382 break;
8383 }
8384
8385 fp->f_fglob->fg_flag = fmode;
6d2010ae
A
8386 fp->f_fglob->fg_ops = &vnops;
8387 fp->f_fglob->fg_data = (caddr_t)nspace_items[i].vp;
8388
8389 proc_fdlock(p);
8390 procfdtbl_releasefd(p, indx, NULL);
8391 fp_drop(p, indx, fp, 1);
39236c6e
A
8392 proc_fdunlock(p);
8393
8394 /*
8395 * All variants of the namespace handler struct support these three fields:
8396 * token, flags, and the FD pointer
8397 */
8398 error = copyout(&nspace_items[i].token, nhd->token, sizeof(uint32_t));
8399 error = copyout(&nspace_items[i].op, nhd->flags, sizeof(uint64_t));
8400 error = copyout(&indx, nhd->fdptr, sizeof(uint32_t));
8401
8402 /*
8403 * Handle optional fields:
8404 * extended version support an info ptr (offset, length), and the
8405 *
8406 * namedata version supports a unique per-link object ID
8407 *
8408 */
8409 if (nhd->infoptr) {
6d2010ae
A
8410 uio_t uio = (uio_t)nspace_items[i].arg;
8411 uint64_t u_offset, u_length;
8412
8413 if (uio) {
8414 u_offset = uio_offset(uio);
8415 u_length = uio_resid(uio);
8416 } else {
8417 u_offset = 0;
8418 u_length = 0;
8419 }
39236c6e
A
8420 error = copyout(&u_offset, nhd->infoptr, sizeof(uint64_t));
8421 error = copyout(&u_length, nhd->infoptr+sizeof(uint64_t), sizeof(uint64_t));
6d2010ae 8422 }
39236c6e
A
8423
8424 if (nhd->objid) {
8425 VATTR_INIT(&va);
8426 VATTR_WANTED(&va, va_linkid);
8427 error = vnode_getattr(nspace_items[i].vp, &va, ctx);
8428 if (error == 0 ) {
8429 uint64_t linkid = 0;
8430 if (VATTR_IS_SUPPORTED (&va, va_linkid)) {
8431 linkid = (uint64_t)va.va_linkid;
8432 }
8433 error = copyout (&linkid, nhd->objid, sizeof(uint64_t));
8434 }
8435 }
8436
6d2010ae
A
8437 if (error) {
8438 vn_close(nspace_items[i].vp, fmode, ctx);
8439 fp_free(p, indx, fp);
8440 unblock = 1;
8441 }
8442
8443 vnode_put(nspace_items[i].vp);
8444
8445 break;
8446 } else {
8447 printf("wait_for_nspace_event: failed (nspace_items[%d] == %p error %d, name %s)\n",
8448 i, nspace_items[i].vp, error, nspace_items[i].vp->v_name);
8449 }
8450
8451 } else {
8452 error = msleep((caddr_t)&nspace_item_idx, &nspace_handler_lock, PVFS|PCATCH, "namespace-items", 0);
8453 if ((nspace_type == NSPACE_HANDLER_SNAPSHOT) && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
8454 error = EINVAL;
8455 break;
8456 }
8457
8458 }
8459 }
8460
8461 if (unblock) {
8462 if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
8463 vnode_lock_spin(nspace_items[i].vp);
8464 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
8465 vnode_unlock(nspace_items[i].vp);
8466 }
8467 nspace_items[i].vp = NULL;
8468 nspace_items[i].vid = 0;
8469 nspace_items[i].flags = NSPACE_ITEM_DONE;
8470 nspace_items[i].token = 0;
8471
8472 wakeup((caddr_t)&(nspace_items[i].vp));
8473 }
8474
8475 if (nspace_type == NSPACE_HANDLER_SNAPSHOT) {
8476 // just go through every snapshot event and unblock it immediately.
8477 if (error && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
8478 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8479 if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
8480 if (nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
8481 nspace_items[i].vp = NULL;
8482 nspace_items[i].vid = 0;
8483 nspace_items[i].flags = NSPACE_ITEM_DONE;
8484 nspace_items[i].token = 0;
8485
8486 wakeup((caddr_t)&(nspace_items[i].vp));
8487 }
8488 }
8489 }
8490 }
8491 }
8492
8493 lck_mtx_unlock(&nspace_handler_lock);
8494
8495 lck_mtx_lock(&nspace_handler_exclusion_lock);
8496 nspace_handlers[nspace_type].handler_busy = 0;
8497 lck_mtx_unlock(&nspace_handler_exclusion_lock);
8498
8499 return error;
8500}
1c79356b 8501
39236c6e
A
8502static inline int validate_namespace_args (int is64bit, int size) {
8503
8504 if (is64bit) {
8505 /* Must be one of these */
8506 if (size == sizeof(user64_namespace_handler_info)) {
8507 goto sizeok;
8508 }
8509 if (size == sizeof(user64_namespace_handler_info_ext)) {
8510 goto sizeok;
8511 }
8512 if (size == sizeof(user64_namespace_handler_data)) {
8513 goto sizeok;
8514 }
8515 return EINVAL;
8516 }
8517 else {
8518 /* 32 bit -- must be one of these */
8519 if (size == sizeof(user32_namespace_handler_info)) {
8520 goto sizeok;
8521 }
8522 if (size == sizeof(user32_namespace_handler_info_ext)) {
8523 goto sizeok;
8524 }
8525 if (size == sizeof(user32_namespace_handler_data)) {
8526 goto sizeok;
8527 }
8528 return EINVAL;
8529 }
8530
8531sizeok:
8532
8533 return 0;
8534
8535}
1c79356b 8536
6d2010ae
A
8537static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data)
8538{
8539 int error = 0;
39236c6e 8540 namespace_handler_data nhd;
6d2010ae 8541
39236c6e
A
8542 bzero (&nhd, sizeof(namespace_handler_data));
8543
8544 if (nspace_type == NSPACE_HANDLER_SNAPSHOT &&
8545 (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
6d2010ae
A
8546 return EINVAL;
8547 }
8548
8549 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
8550 return error;
8551 }
8552
39236c6e
A
8553 error = validate_namespace_args (is64bit, size);
8554 if (error) {
8555 return error;
6d2010ae
A
8556 }
8557
39236c6e
A
8558 /* Copy in the userland pointers into our kernel-only struct */
8559
6d2010ae 8560 if (is64bit) {
39236c6e
A
8561 /* 64 bit userland structures */
8562 nhd.token = (user_addr_t)((user64_namespace_handler_info *)data)->token;
8563 nhd.flags = (user_addr_t)((user64_namespace_handler_info *)data)->flags;
8564 nhd.fdptr = (user_addr_t)((user64_namespace_handler_info *)data)->fdptr;
8565
8566 /* If the size is greater than the standard info struct, add in extra fields */
8567 if (size > (sizeof(user64_namespace_handler_info))) {
8568 if (size >= (sizeof(user64_namespace_handler_info_ext))) {
8569 nhd.infoptr = (user_addr_t)((user64_namespace_handler_info_ext *)data)->infoptr;
8570 }
8571 if (size == (sizeof(user64_namespace_handler_data))) {
8572 nhd.objid = (user_addr_t)((user64_namespace_handler_data*)data)->objid;
8573 }
8574 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
6d2010ae 8575 }
39236c6e
A
8576 }
8577 else {
8578 /* 32 bit userland structures */
8579 nhd.token = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->token);
8580 nhd.flags = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->flags);
8581 nhd.fdptr = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->fdptr);
8582
8583 if (size > (sizeof(user32_namespace_handler_info))) {
8584 if (size >= (sizeof(user32_namespace_handler_info_ext))) {
8585 nhd.infoptr = CAST_USER_ADDR_T(((user32_namespace_handler_info_ext *)data)->infoptr);
8586 }
8587 if (size == (sizeof(user32_namespace_handler_data))) {
8588 nhd.objid = (user_addr_t)((user32_namespace_handler_data*)data)->objid;
8589 }
8590 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
6d2010ae
A
8591 }
8592 }
8593
39236c6e 8594 return wait_for_namespace_event(&nhd, nspace_type);
6d2010ae 8595}
1c79356b
A
8596
8597/*
8598 * Make a filesystem-specific control call:
8599 */
1c79356b 8600/* ARGSUSED */
b0d623f7
A
8601static int
8602fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long options, vfs_context_t ctx)
1c79356b 8603{
b0d623f7 8604 int error=0;
91447636 8605 boolean_t is64bit;
2d21ac55 8606 u_int size;
1c79356b
A
8607#define STK_PARAMS 128
8608 char stkbuf[STK_PARAMS];
8609 caddr_t data, memp;
b0d623f7 8610 vnode_t vp = *arg_vp;
1c79356b
A
8611
8612 size = IOCPARM_LEN(cmd);
8613 if (size > IOCPARM_MAX) return (EINVAL);
8614
6d2010ae 8615 is64bit = proc_is64bit(p);
91447636 8616
1c79356b
A
8617 memp = NULL;
8618 if (size > sizeof (stkbuf)) {
8619 if ((memp = (caddr_t)kalloc(size)) == 0) return ENOMEM;
8620 data = memp;
8621 } else {
91447636 8622 data = &stkbuf[0];
1c79356b
A
8623 };
8624
8625 if (cmd & IOC_IN) {
8626 if (size) {
b0d623f7 8627 error = copyin(udata, data, size);
1c79356b
A
8628 if (error) goto FSCtl_Exit;
8629 } else {
6d2010ae
A
8630 if (is64bit) {
8631 *(user_addr_t *)data = udata;
8632 }
8633 else {
8634 *(uint32_t *)data = (uint32_t)udata;
8635 }
1c79356b
A
8636 };
8637 } else if ((cmd & IOC_OUT) && size) {
8638 /*
8639 * Zero the buffer so the user always
8640 * gets back something deterministic.
8641 */
8642 bzero(data, size);
91447636 8643 } else if (cmd & IOC_VOID) {
b0d623f7 8644 if (is64bit) {
6d2010ae 8645 *(user_addr_t *)data = udata;
b0d623f7
A
8646 }
8647 else {
6d2010ae 8648 *(uint32_t *)data = (uint32_t)udata;
b0d623f7 8649 }
91447636 8650 }
1c79356b 8651
b0d623f7
A
8652 /* Check to see if it's a generic command */
8653 if (IOCBASECMD(cmd) == FSCTL_SYNC_VOLUME) {
8654 mount_t mp = vp->v_mount;
8655 int arg = *(uint32_t*)data;
8656
8657 /* record vid of vp so we can drop it below. */
8658 uint32_t vvid = vp->v_id;
91447636 8659
b0d623f7
A
8660 /*
8661 * Then grab mount_iterref so that we can release the vnode.
8662 * Without this, a thread may call vnode_iterate_prepare then
8663 * get into a deadlock because we've never released the root vp
8664 */
8665 error = mount_iterref (mp, 0);
8666 if (error) {
8667 goto FSCtl_Exit;
8668 }
8669 vnode_put(vp);
8670
8671 /* issue the sync for this volume */
8672 (void)sync_callback(mp, (arg & FSCTL_SYNC_WAIT) ? &arg : NULL);
8673
8674 /*
8675 * Then release the mount_iterref once we're done syncing; it's not
8676 * needed for the VNOP_IOCTL below
8677 */
8678 mount_iterdrop(mp);
8679
8680 if (arg & FSCTL_SYNC_FULLSYNC) {
8681 /* re-obtain vnode iocount on the root vp, if possible */
8682 error = vnode_getwithvid (vp, vvid);
8683 if (error == 0) {
8684 error = VNOP_IOCTL(vp, F_FULLFSYNC, (caddr_t)NULL, 0, ctx);
8685 vnode_put (vp);
8686 }
8687 }
8688 /* mark the argument VP as having been released */
8689 *arg_vp = NULL;
8690
8691 } else if (IOCBASECMD(cmd) == FSCTL_SET_PACKAGE_EXTS) {
6d2010ae
A
8692 user_addr_t ext_strings;
8693 uint32_t num_entries;
8694 uint32_t max_width;
b0d623f7 8695
6d2010ae
A
8696 if ( (is64bit && size != sizeof(user64_package_ext_info))
8697 || (is64bit == 0 && size != sizeof(user32_package_ext_info))) {
b0d623f7 8698
6d2010ae
A
8699 // either you're 64-bit and passed a 64-bit struct or
8700 // you're 32-bit and passed a 32-bit struct. otherwise
8701 // it's not ok.
8702 error = EINVAL;
8703 goto FSCtl_Exit;
8704 }
2d21ac55 8705
6d2010ae
A
8706 if (is64bit) {
8707 ext_strings = ((user64_package_ext_info *)data)->strings;
8708 num_entries = ((user64_package_ext_info *)data)->num_entries;
8709 max_width = ((user64_package_ext_info *)data)->max_width;
8710 } else {
8711 ext_strings = CAST_USER_ADDR_T(((user32_package_ext_info *)data)->strings);
8712 num_entries = ((user32_package_ext_info *)data)->num_entries;
8713 max_width = ((user32_package_ext_info *)data)->max_width;
8714 }
b0d623f7 8715
6d2010ae 8716 error = set_package_extensions_table(ext_strings, num_entries, max_width);
b0d623f7 8717
39236c6e
A
8718
8719 }
8720
8721 /* namespace handlers */
8722 else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_HANDLER_GET) {
6d2010ae 8723 error = process_namespace_fsctl(NSPACE_HANDLER_NSPACE, is64bit, size, data);
39236c6e
A
8724 }
8725
8726 /* Snapshot handlers */
8727 else if (IOCBASECMD(cmd) == FSCTL_OLD_SNAPSHOT_HANDLER_GET) {
6d2010ae
A
8728 error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
8729 } else if (IOCBASECMD(cmd) == FSCTL_SNAPSHOT_HANDLER_GET_EXT) {
8730 error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
39236c6e
A
8731 }
8732
8733 /* Tracked File Handlers */
8734 else if (IOCBASECMD(cmd) == FSCTL_TRACKED_HANDLER_GET) {
8735 error = process_namespace_fsctl(NSPACE_HANDLER_TRACK, is64bit, size, data);
8736 }
8737 else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_HANDLER_GETDATA) {
6d2010ae
A
8738 error = process_namespace_fsctl(NSPACE_HANDLER_TRACK, is64bit, size, data);
8739 } else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_HANDLER_UPDATE) {
8740 uint32_t token, val;
8741 int i;
8742
8743 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
8744 goto FSCtl_Exit;
8745 }
8746
8747 if (!nspace_is_special_process(p)) {
8748 error = EINVAL;
8749 goto FSCtl_Exit;
8750 }
8751
8752 token = ((uint32_t *)data)[0];
8753 val = ((uint32_t *)data)[1];
8754
8755 lck_mtx_lock(&nspace_handler_lock);
8756
8757 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8758 if (nspace_items[i].token == token) {
8759 break;
8760 }
8761 }
8762
8763 if (i >= MAX_NSPACE_ITEMS) {
8764 error = ENOENT;
8765 } else {
8766 //
8767 // if this bit is set, when resolve_nspace_item() times out
8768 // it will loop and go back to sleep.
8769 //
8770 nspace_items[i].flags |= NSPACE_ITEM_RESET_TIMER;
8771 }
8772
8773 lck_mtx_unlock(&nspace_handler_lock);
8774
8775 if (error) {
8776 printf("nspace-handler-update: did not find token %u\n", token);
8777 }
8778
8779 } else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_HANDLER_UNBLOCK) {
8780 uint32_t token, val;
8781 int i;
8782
8783 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
8784 goto FSCtl_Exit;
8785 }
8786
8787 if (!nspace_is_special_process(p)) {
8788 error = EINVAL;
8789 goto FSCtl_Exit;
8790 }
8791
8792 token = ((uint32_t *)data)[0];
8793 val = ((uint32_t *)data)[1];
8794
8795 lck_mtx_lock(&nspace_handler_lock);
8796
8797 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8798 if (nspace_items[i].token == token) {
8799 break;
8800 }
8801 }
8802
8803 if (i >= MAX_NSPACE_ITEMS) {
8804 printf("nspace-handler-unblock: did not find token %u\n", token);
8805 error = ENOENT;
8806 } else {
8807 if (val == 0 && nspace_items[i].vp) {
8808 vnode_lock_spin(nspace_items[i].vp);
8809 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
8810 vnode_unlock(nspace_items[i].vp);
8811 }
8812
8813 nspace_items[i].vp = NULL;
8814 nspace_items[i].arg = NULL;
8815 nspace_items[i].op = 0;
8816 nspace_items[i].vid = 0;
8817 nspace_items[i].flags = NSPACE_ITEM_DONE;
8818 nspace_items[i].token = 0;
8819
8820 wakeup((caddr_t)&(nspace_items[i].vp));
8821 }
8822
8823 lck_mtx_unlock(&nspace_handler_lock);
8824
8825 } else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_HANDLER_CANCEL) {
8826 uint32_t token, val;
8827 int i;
8828
8829 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
8830 goto FSCtl_Exit;
8831 }
8832
8833 if (!nspace_is_special_process(p)) {
8834 error = EINVAL;
8835 goto FSCtl_Exit;
8836 }
8837
8838 token = ((uint32_t *)data)[0];
8839 val = ((uint32_t *)data)[1];
8840
8841 lck_mtx_lock(&nspace_handler_lock);
8842
8843 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8844 if (nspace_items[i].token == token) {
8845 break;
8846 }
8847 }
8848
8849 if (i >= MAX_NSPACE_ITEMS) {
8850 printf("nspace-handler-cancel: did not find token %u\n", token);
8851 error = ENOENT;
8852 } else {
8853 if (nspace_items[i].vp) {
8854 vnode_lock_spin(nspace_items[i].vp);
8855 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
8856 vnode_unlock(nspace_items[i].vp);
8857 }
8858
8859 nspace_items[i].vp = NULL;
8860 nspace_items[i].arg = NULL;
8861 nspace_items[i].vid = 0;
8862 nspace_items[i].token = val;
8863 nspace_items[i].flags &= ~NSPACE_ITEM_PROCESSING;
8864 nspace_items[i].flags |= NSPACE_ITEM_CANCELLED;
8865
8866 wakeup((caddr_t)&(nspace_items[i].vp));
8867 }
8868
8869 lck_mtx_unlock(&nspace_handler_lock);
8870 } else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME) {
8871 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
8872 goto FSCtl_Exit;
8873 }
8874
8875 // we explicitly do not do the namespace_handler_proc check here
8876
8877 lck_mtx_lock(&nspace_handler_lock);
8878 snapshot_timestamp = ((uint32_t *)data)[0];
8879 wakeup(&nspace_item_idx);
8880 lck_mtx_unlock(&nspace_handler_lock);
8881 printf("nspace-handler-set-snapshot-time: %d\n", (int)snapshot_timestamp);
8882
8883 } else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS) {
8884 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
8885 goto FSCtl_Exit;
8886 }
8887
8888 lck_mtx_lock(&nspace_handler_lock);
8889 nspace_allow_virtual_devs = ((uint32_t *)data)[0];
8890 lck_mtx_unlock(&nspace_handler_lock);
8891 printf("nspace-snapshot-handler will%s allow events on disk-images\n",
8892 nspace_allow_virtual_devs ? "" : " NOT");
8893 error = 0;
8894
8895 } else if (IOCBASECMD(cmd) == FSCTL_SET_FSTYPENAME_OVERRIDE) {
8896 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
8897 goto FSCtl_Exit;
8898 }
8899 if (vp->v_mount) {
8900 mount_lock(vp->v_mount);
8901 if (data[0] != 0) {
8902 strlcpy(&vp->v_mount->fstypename_override[0], data, MFSTYPENAMELEN);
8903 vp->v_mount->mnt_kern_flag |= MNTK_TYPENAME_OVERRIDE;
8904 if (vfs_isrdonly(vp->v_mount) && strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
8905 vp->v_mount->mnt_kern_flag |= MNTK_EXTENDED_SECURITY;
8906 vp->v_mount->mnt_kern_flag &= ~MNTK_AUTH_OPAQUE;
8907 }
8908 } else {
8909 if (strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
8910 vp->v_mount->mnt_kern_flag &= ~MNTK_EXTENDED_SECURITY;
8911 }
8912 vp->v_mount->mnt_kern_flag &= ~MNTK_TYPENAME_OVERRIDE;
8913 vp->v_mount->fstypename_override[0] = '\0';
8914 }
8915 mount_unlock(vp->v_mount);
8916 }
b0d623f7
A
8917 } else {
8918 /* Invoke the filesystem-specific code */
8919 error = VNOP_IOCTL(vp, IOCBASECMD(cmd), data, options, ctx);
8920 }
1c79356b 8921
1c79356b
A
8922
8923 /*
8924 * Copy any data to user, size was
8925 * already set and checked above.
8926 */
91447636 8927 if (error == 0 && (cmd & IOC_OUT) && size)
b0d623f7 8928 error = copyout(data, udata, size);
1c79356b
A
8929
8930FSCtl_Exit:
8931 if (memp) kfree(memp, size);
8932
8933 return error;
8934}
b0d623f7
A
8935
8936/* ARGSUSED */
8937int
8938fsctl (proc_t p, struct fsctl_args *uap, __unused int32_t *retval)
8939{
8940 int error;
8941 struct nameidata nd;
8942 u_long nameiflags;
8943 vnode_t vp = NULL;
8944 vfs_context_t ctx = vfs_context_current();
8945
8946 AUDIT_ARG(cmd, uap->cmd);
8947 AUDIT_ARG(value32, uap->options);
8948 /* Get the vnode for the file we are getting info on: */
8949 nameiflags = 0;
8950 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
6d2010ae
A
8951 NDINIT(&nd, LOOKUP, OP_FSCTL, nameiflags | AUDITVNPATH1,
8952 UIO_USERSPACE, uap->path, ctx);
b0d623f7
A
8953 if ((error = namei(&nd))) goto done;
8954 vp = nd.ni_vp;
8955 nameidone(&nd);
8956
8957#if CONFIG_MACF
8958 error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd);
8959 if (error) {
8960 goto done;
8961 }
8962#endif
8963
8964 error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
8965
8966done:
8967 if (vp)
8968 vnode_put(vp);
8969 return error;
8970}
8971/* ARGSUSED */
8972int
8973ffsctl (proc_t p, struct ffsctl_args *uap, __unused int32_t *retval)
8974{
8975 int error;
8976 vnode_t vp = NULL;
8977 vfs_context_t ctx = vfs_context_current();
8978 int fd = -1;
8979
8980 AUDIT_ARG(fd, uap->fd);
8981 AUDIT_ARG(cmd, uap->cmd);
8982 AUDIT_ARG(value32, uap->options);
8983
8984 /* Get the vnode for the file we are getting info on: */
8985 if ((error = file_vnode(uap->fd, &vp)))
8986 goto done;
8987 fd = uap->fd;
8988 if ((error = vnode_getwithref(vp))) {
8989 goto done;
8990 }
8991
8992#if CONFIG_MACF
8993 error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd);
8994 if (error) {
8995 goto done;
8996 }
8997#endif
8998
8999 error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
9000
9001done:
9002 if (fd != -1)
9003 file_drop(fd);
9004
9005 if (vp)
9006 vnode_put(vp);
9007 return error;
9008}
1c79356b 9009/* end of fsctl system call */
0b4e3aa0
A
9010
9011/*
9012 * An in-kernel sync for power management to call.
9013 */
9014__private_extern__ int
9015sync_internal(void)
9016{
0b4e3aa0
A
9017 int error;
9018
9019 struct sync_args data;
9020
9021 int retval[2];
9022
0b4e3aa0 9023
91447636 9024 error = sync(current_proc(), &data, &retval[0]);
0b4e3aa0 9025
0b4e3aa0
A
9026
9027 return (error);
9028} /* end of sync_internal call */
9029
55e303ae 9030
91447636
A
9031/*
9032 * Retrieve the data of an extended attribute.
9033 */
9034int
2d21ac55 9035getxattr(proc_t p, struct getxattr_args *uap, user_ssize_t *retval)
91447636 9036{
2d21ac55 9037 vnode_t vp;
91447636
A
9038 struct nameidata nd;
9039 char attrname[XATTR_MAXNAMELEN+1];
2d21ac55 9040 vfs_context_t ctx = vfs_context_current();
91447636
A
9041 uio_t auio = NULL;
9042 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9043 size_t attrsize = 0;
9044 size_t namelen;
b0d623f7 9045 u_int32_t nameiflags;
91447636
A
9046 int error;
9047 char uio_buf[ UIO_SIZEOF(1) ];
55e303ae 9048
2d21ac55 9049 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636 9050 return (EINVAL);
55e303ae 9051
91447636 9052 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
6d2010ae 9053 NDINIT(&nd, LOOKUP, OP_GETXATTR, nameiflags, spacetype, uap->path, ctx);
91447636
A
9054 if ((error = namei(&nd))) {
9055 return (error);
9056 }
9057 vp = nd.ni_vp;
9058 nameidone(&nd);
55e303ae 9059
91447636
A
9060 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
9061 goto out;
9062 }
9063 if (xattr_protected(attrname)) {
6d2010ae
A
9064 if (!vfs_context_issuser(ctx) || strcmp(attrname, "com.apple.system.Security") != 0) {
9065 error = EPERM;
9066 goto out;
9067 }
91447636 9068 }
b0d623f7
A
9069 /*
9070 * the specific check for 0xffffffff is a hack to preserve
9071 * binaray compatibilty in K64 with applications that discovered
9072 * that passing in a buf pointer and a size of -1 resulted in
9073 * just the size of the indicated extended attribute being returned.
9074 * this isn't part of the documented behavior, but because of the
9075 * original implemtation's check for "uap->size > 0", this behavior
9076 * was allowed. In K32 that check turned into a signed comparison
9077 * even though uap->size is unsigned... in K64, we blow by that
9078 * check because uap->size is unsigned and doesn't get sign smeared
9079 * in the munger for a 32 bit user app. we also need to add a
9080 * check to limit the maximum size of the buffer being passed in...
9081 * unfortunately, the underlying fileystems seem to just malloc
9082 * the requested size even if the actual extended attribute is tiny.
9083 * because that malloc is for kernel wired memory, we have to put a
9084 * sane limit on it.
9085 *
9086 * U32 running on K64 will yield 0x00000000ffffffff for uap->size
9087 * U64 running on K64 will yield -1 (64 bits wide)
9088 * U32/U64 running on K32 will yield -1 (32 bits wide)
9089 */
9090 if (uap->size == 0xffffffff || uap->size == (size_t)-1)
9091 goto no_uio;
9092
b0d623f7 9093 if (uap->value) {
6d2010ae
A
9094 if (uap->size > (size_t)XATTR_MAXSIZE)
9095 uap->size = XATTR_MAXSIZE;
9096
91447636
A
9097 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
9098 &uio_buf[0], sizeof(uio_buf));
9099 uio_addiov(auio, uap->value, uap->size);
9100 }
b0d623f7 9101no_uio:
2d21ac55 9102 error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, ctx);
91447636
A
9103out:
9104 vnode_put(vp);
55e303ae 9105
91447636
A
9106 if (auio) {
9107 *retval = uap->size - uio_resid(auio);
9108 } else {
9109 *retval = (user_ssize_t)attrsize;
55e303ae
A
9110 }
9111
91447636
A
9112 return (error);
9113}
55e303ae 9114
91447636
A
9115/*
9116 * Retrieve the data of an extended attribute.
9117 */
9118int
2d21ac55 9119fgetxattr(proc_t p, struct fgetxattr_args *uap, user_ssize_t *retval)
91447636 9120{
2d21ac55 9121 vnode_t vp;
91447636 9122 char attrname[XATTR_MAXNAMELEN+1];
91447636
A
9123 uio_t auio = NULL;
9124 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9125 size_t attrsize = 0;
9126 size_t namelen;
9127 int error;
9128 char uio_buf[ UIO_SIZEOF(1) ];
55e303ae 9129
2d21ac55 9130 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636 9131 return (EINVAL);
55e303ae 9132
91447636
A
9133 if ( (error = file_vnode(uap->fd, &vp)) ) {
9134 return (error);
9135 }
9136 if ( (error = vnode_getwithref(vp)) ) {
9137 file_drop(uap->fd);
9138 return(error);
9139 }
9140 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
9141 goto out;
9142 }
9143 if (xattr_protected(attrname)) {
9144 error = EPERM;
9145 goto out;
9146 }
9147 if (uap->value && uap->size > 0) {
9148 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
9149 &uio_buf[0], sizeof(uio_buf));
9150 uio_addiov(auio, uap->value, uap->size);
9151 }
55e303ae 9152
2d21ac55 9153 error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, vfs_context_current());
91447636
A
9154out:
9155 (void)vnode_put(vp);
9156 file_drop(uap->fd);
55e303ae 9157
91447636
A
9158 if (auio) {
9159 *retval = uap->size - uio_resid(auio);
9160 } else {
9161 *retval = (user_ssize_t)attrsize;
9162 }
9163 return (error);
9164}
55e303ae 9165
91447636
A
9166/*
9167 * Set the data of an extended attribute.
9168 */
55e303ae 9169int
2d21ac55 9170setxattr(proc_t p, struct setxattr_args *uap, int *retval)
55e303ae 9171{
2d21ac55 9172 vnode_t vp;
91447636
A
9173 struct nameidata nd;
9174 char attrname[XATTR_MAXNAMELEN+1];
2d21ac55 9175 vfs_context_t ctx = vfs_context_current();
91447636
A
9176 uio_t auio = NULL;
9177 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9178 size_t namelen;
b0d623f7 9179 u_int32_t nameiflags;
91447636
A
9180 int error;
9181 char uio_buf[ UIO_SIZEOF(1) ];
55e303ae 9182
2d21ac55 9183 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636 9184 return (EINVAL);
55e303ae 9185
91447636 9186 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
6d2010ae
A
9187 if (error == EPERM) {
9188 /* if the string won't fit in attrname, copyinstr emits EPERM */
9189 return (ENAMETOOLONG);
9190 }
9191 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
9192 return error;
91447636
A
9193 }
9194 if (xattr_protected(attrname))
9195 return(EPERM);
2d21ac55 9196 if (uap->size != 0 && uap->value == 0) {
91447636 9197 return (EINVAL);
55e303ae 9198 }
55e303ae 9199
91447636 9200 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
6d2010ae 9201 NDINIT(&nd, LOOKUP, OP_SETXATTR, nameiflags, spacetype, uap->path, ctx);
91447636
A
9202 if ((error = namei(&nd))) {
9203 return (error);
9204 }
9205 vp = nd.ni_vp;
9206 nameidone(&nd);
55e303ae 9207
91447636
A
9208 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
9209 &uio_buf[0], sizeof(uio_buf));
9210 uio_addiov(auio, uap->value, uap->size);
55e303ae 9211
2d21ac55
A
9212 error = vn_setxattr(vp, attrname, auio, uap->options, ctx);
9213#if CONFIG_FSE
9214 if (error == 0) {
9215 add_fsevent(FSE_XATTR_MODIFIED, ctx,
9216 FSE_ARG_VNODE, vp,
9217 FSE_ARG_DONE);
9218 }
9219#endif
91447636
A
9220 vnode_put(vp);
9221 *retval = 0;
9222 return (error);
9223}
55e303ae 9224
91447636
A
9225/*
9226 * Set the data of an extended attribute.
9227 */
9228int
2d21ac55 9229fsetxattr(proc_t p, struct fsetxattr_args *uap, int *retval)
91447636 9230{
2d21ac55 9231 vnode_t vp;
91447636 9232 char attrname[XATTR_MAXNAMELEN+1];
91447636
A
9233 uio_t auio = NULL;
9234 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9235 size_t namelen;
9236 int error;
9237 char uio_buf[ UIO_SIZEOF(1) ];
6d2010ae 9238#if CONFIG_FSE
2d21ac55 9239 vfs_context_t ctx = vfs_context_current();
6d2010ae 9240#endif
55e303ae 9241
2d21ac55 9242 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636 9243 return (EINVAL);
55e303ae 9244
91447636
A
9245 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
9246 return (error);
55e303ae 9247 }
91447636
A
9248 if (xattr_protected(attrname))
9249 return(EPERM);
2d21ac55 9250 if (uap->size != 0 && uap->value == 0) {
91447636 9251 return (EINVAL);
55e303ae 9252 }
91447636
A
9253 if ( (error = file_vnode(uap->fd, &vp)) ) {
9254 return (error);
55e303ae 9255 }
91447636
A
9256 if ( (error = vnode_getwithref(vp)) ) {
9257 file_drop(uap->fd);
9258 return(error);
9259 }
9260 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
9261 &uio_buf[0], sizeof(uio_buf));
9262 uio_addiov(auio, uap->value, uap->size);
91447636 9263
2d21ac55
A
9264 error = vn_setxattr(vp, attrname, auio, uap->options, vfs_context_current());
9265#if CONFIG_FSE
9266 if (error == 0) {
9267 add_fsevent(FSE_XATTR_MODIFIED, ctx,
9268 FSE_ARG_VNODE, vp,
9269 FSE_ARG_DONE);
9270 }
9271#endif
91447636
A
9272 vnode_put(vp);
9273 file_drop(uap->fd);
9274 *retval = 0;
9275 return (error);
9276}
55e303ae 9277
91447636
A
9278/*
9279 * Remove an extended attribute.
b0d623f7 9280 * XXX Code duplication here.
91447636 9281 */
91447636 9282int
2d21ac55 9283removexattr(proc_t p, struct removexattr_args *uap, int *retval)
91447636 9284{
2d21ac55 9285 vnode_t vp;
91447636
A
9286 struct nameidata nd;
9287 char attrname[XATTR_MAXNAMELEN+1];
9288 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
2d21ac55 9289 vfs_context_t ctx = vfs_context_current();
91447636 9290 size_t namelen;
b0d623f7 9291 u_int32_t nameiflags;
91447636 9292 int error;
55e303ae 9293
2d21ac55 9294 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636 9295 return (EINVAL);
55e303ae 9296
91447636
A
9297 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
9298 if (error != 0) {
9299 return (error);
9300 }
9301 if (xattr_protected(attrname))
9302 return(EPERM);
9303 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
6d2010ae 9304 NDINIT(&nd, LOOKUP, OP_REMOVEXATTR, nameiflags, spacetype, uap->path, ctx);
91447636
A
9305 if ((error = namei(&nd))) {
9306 return (error);
9307 }
9308 vp = nd.ni_vp;
9309 nameidone(&nd);
55e303ae 9310
2d21ac55
A
9311 error = vn_removexattr(vp, attrname, uap->options, ctx);
9312#if CONFIG_FSE
9313 if (error == 0) {
9314 add_fsevent(FSE_XATTR_REMOVED, ctx,
9315 FSE_ARG_VNODE, vp,
9316 FSE_ARG_DONE);
9317 }
9318#endif
91447636
A
9319 vnode_put(vp);
9320 *retval = 0;
9321 return (error);
55e303ae
A
9322}
9323
91447636
A
9324/*
9325 * Remove an extended attribute.
b0d623f7 9326 * XXX Code duplication here.
91447636 9327 */
91447636 9328int
2d21ac55 9329fremovexattr(__unused proc_t p, struct fremovexattr_args *uap, int *retval)
55e303ae 9330{
2d21ac55 9331 vnode_t vp;
91447636 9332 char attrname[XATTR_MAXNAMELEN+1];
91447636
A
9333 size_t namelen;
9334 int error;
6d2010ae 9335#if CONFIG_FSE
2d21ac55 9336 vfs_context_t ctx = vfs_context_current();
6d2010ae 9337#endif
55e303ae 9338
2d21ac55 9339 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636
A
9340 return (EINVAL);
9341
9342 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
9343 if (error != 0) {
9344 return (error);
9345 }
9346 if (xattr_protected(attrname))
9347 return(EPERM);
9348 if ( (error = file_vnode(uap->fd, &vp)) ) {
9349 return (error);
9350 }
9351 if ( (error = vnode_getwithref(vp)) ) {
9352 file_drop(uap->fd);
9353 return(error);
9354 }
4a249263 9355
2d21ac55
A
9356 error = vn_removexattr(vp, attrname, uap->options, vfs_context_current());
9357#if CONFIG_FSE
9358 if (error == 0) {
9359 add_fsevent(FSE_XATTR_REMOVED, ctx,
9360 FSE_ARG_VNODE, vp,
9361 FSE_ARG_DONE);
9362 }
9363#endif
91447636
A
9364 vnode_put(vp);
9365 file_drop(uap->fd);
9366 *retval = 0;
9367 return (error);
55e303ae
A
9368}
9369
91447636
A
9370/*
9371 * Retrieve the list of extended attribute names.
b0d623f7 9372 * XXX Code duplication here.
91447636 9373 */
91447636 9374int
2d21ac55 9375listxattr(proc_t p, struct listxattr_args *uap, user_ssize_t *retval)
55e303ae 9376{
2d21ac55 9377 vnode_t vp;
91447636 9378 struct nameidata nd;
2d21ac55 9379 vfs_context_t ctx = vfs_context_current();
91447636
A
9380 uio_t auio = NULL;
9381 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9382 size_t attrsize = 0;
b0d623f7 9383 u_int32_t nameiflags;
91447636
A
9384 int error;
9385 char uio_buf[ UIO_SIZEOF(1) ];
4a249263 9386
2d21ac55 9387 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636 9388 return (EINVAL);
55e303ae 9389
2d21ac55 9390 nameiflags = ((uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW) | NOTRIGGER;
6d2010ae 9391 NDINIT(&nd, LOOKUP, OP_LISTXATTR, nameiflags, spacetype, uap->path, ctx);
91447636
A
9392 if ((error = namei(&nd))) {
9393 return (error);
9394 }
9395 vp = nd.ni_vp;
9396 nameidone(&nd);
9397 if (uap->namebuf != 0 && uap->bufsize > 0) {
6d2010ae
A
9398 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
9399 &uio_buf[0], sizeof(uio_buf));
91447636
A
9400 uio_addiov(auio, uap->namebuf, uap->bufsize);
9401 }
55e303ae 9402
2d21ac55 9403 error = vn_listxattr(vp, auio, &attrsize, uap->options, ctx);
55e303ae 9404
91447636
A
9405 vnode_put(vp);
9406 if (auio) {
9407 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
9408 } else {
9409 *retval = (user_ssize_t)attrsize;
9410 }
9411 return (error);
55e303ae
A
9412}
9413
91447636
A
9414/*
9415 * Retrieve the list of extended attribute names.
b0d623f7 9416 * XXX Code duplication here.
91447636 9417 */
55e303ae 9418int
2d21ac55 9419flistxattr(proc_t p, struct flistxattr_args *uap, user_ssize_t *retval)
55e303ae 9420{
2d21ac55 9421 vnode_t vp;
91447636
A
9422 uio_t auio = NULL;
9423 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9424 size_t attrsize = 0;
9425 int error;
9426 char uio_buf[ UIO_SIZEOF(1) ];
9427
2d21ac55 9428 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636
A
9429 return (EINVAL);
9430
9431 if ( (error = file_vnode(uap->fd, &vp)) ) {
9432 return (error);
9433 }
9434 if ( (error = vnode_getwithref(vp)) ) {
9435 file_drop(uap->fd);
9436 return(error);
9437 }
9438 if (uap->namebuf != 0 && uap->bufsize > 0) {
91447636
A
9439 auio = uio_createwithbuffer(1, 0, spacetype,
9440 UIO_READ, &uio_buf[0], sizeof(uio_buf));
9441 uio_addiov(auio, uap->namebuf, uap->bufsize);
9442 }
91447636 9443
2d21ac55 9444 error = vn_listxattr(vp, auio, &attrsize, uap->options, vfs_context_current());
55e303ae 9445
91447636
A
9446 vnode_put(vp);
9447 file_drop(uap->fd);
9448 if (auio) {
9449 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
9450 } else {
9451 *retval = (user_ssize_t)attrsize;
9452 }
9453 return (error);
55e303ae 9454}
4a249263 9455
b0d623f7
A
9456/*
9457 * Obtain the full pathname of a file system object by id.
9458 *
9459 * This is a private SPI used by the File Manager.
9460 */
9461__private_extern__
9462int
9463fsgetpath(__unused proc_t p, struct fsgetpath_args *uap, user_ssize_t *retval)
9464{
9465 vnode_t vp;
9466 struct mount *mp = NULL;
9467 vfs_context_t ctx = vfs_context_current();
9468 fsid_t fsid;
9469 char *realpath;
9470 int bpflags;
9471 int length;
9472 int error;
9473
9474 if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
9475 return (error);
9476 }
9477 AUDIT_ARG(value32, fsid.val[0]);
9478 AUDIT_ARG(value64, uap->objid);
9479 /* Restrict output buffer size for now. */
9480 if (uap->bufsize > PAGE_SIZE) {
9481 return (EINVAL);
9482 }
9483 MALLOC(realpath, char *, uap->bufsize, M_TEMP, M_WAITOK);
9484 if (realpath == NULL) {
9485 return (ENOMEM);
9486 }
9487 /* Find the target mountpoint. */
9488 if ((mp = mount_lookupby_volfsid(fsid.val[0], 1)) == NULL) {
9489 error = ENOTSUP; /* unexpected failure */
9490 goto out;
9491 }
39236c6e 9492unionget:
b0d623f7
A
9493 /* Find the target vnode. */
9494 if (uap->objid == 2) {
9495 error = VFS_ROOT(mp, &vp, ctx);
9496 } else {
9497 error = VFS_VGET(mp, (ino64_t)uap->objid, &vp, ctx);
9498 }
39236c6e
A
9499
9500 if (error == ENOENT && (mp->mnt_flag & MNT_UNION)) {
9501 /*
9502 * If the fileid isn't found and we're in a union
9503 * mount volume, then see if the fileid is in the
9504 * mounted-on volume.
9505 */
9506 struct mount *tmp = mp;
9507 mp = vnode_mount(tmp->mnt_vnodecovered);
9508 vfs_unbusy(tmp);
9509 if (vfs_busy(mp, LK_NOWAIT) == 0)
9510 goto unionget;
9511 } else
9512 vfs_unbusy(mp);
9513
b0d623f7
A
9514 if (error) {
9515 goto out;
9516 }
6d2010ae
A
9517#if CONFIG_MACF
9518 error = mac_vnode_check_fsgetpath(ctx, vp);
9519 if (error) {
9520 vnode_put(vp);
9521 goto out;
9522 }
9523#endif
b0d623f7
A
9524 /* Obtain the absolute path to this vnode. */
9525 bpflags = vfs_context_suser(ctx) ? BUILDPATH_CHECKACCESS : 0;
316670eb 9526 bpflags |= BUILDPATH_CHECK_MOVED;
b0d623f7
A
9527 error = build_path(vp, realpath, uap->bufsize, &length, bpflags, ctx);
9528 vnode_put(vp);
9529 if (error) {
9530 goto out;
9531 }
9532 AUDIT_ARG(text, realpath);
39236c6e
A
9533
9534 if (kdebug_enable) {
9535 long dbg_parms[NUMPARMS];
9536 int dbg_namelen;
9537
9538 dbg_namelen = (int)sizeof(dbg_parms);
9539
9540 if (length < dbg_namelen) {
9541 memcpy((char *)dbg_parms, realpath, length);
9542 memset((char *)dbg_parms + length, 0, dbg_namelen - length);
9543
9544 dbg_namelen = length;
9545 } else
9546 memcpy((char *)dbg_parms, realpath + (length - dbg_namelen), dbg_namelen);
9547
9548 kdebug_lookup_gen_events(dbg_parms, dbg_namelen, (void *)vp, TRUE);
9549 }
b0d623f7
A
9550 error = copyout((caddr_t)realpath, uap->buf, length);
9551
9552 *retval = (user_ssize_t)length; /* may be superseded by error */
9553out:
9554 if (realpath) {
9555 FREE(realpath, M_TEMP);
9556 }
9557 return (error);
9558}
9559
91447636
A
9560/*
9561 * Common routine to handle various flavors of statfs data heading out
9562 * to user space.
2d21ac55
A
9563 *
9564 * Returns: 0 Success
9565 * EFAULT
91447636
A
9566 */
9567static int
9568munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
9569 user_addr_t bufp, int *sizep, boolean_t is_64_bit,
9570 boolean_t partial_copy)
4a249263 9571{
91447636
A
9572 int error;
9573 int my_size, copy_size;
9574
9575 if (is_64_bit) {
b0d623f7 9576 struct user64_statfs sfs;
91447636
A
9577 my_size = copy_size = sizeof(sfs);
9578 bzero(&sfs, my_size);
9579 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
9580 sfs.f_type = mp->mnt_vtable->vfc_typenum;
9581 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
b0d623f7
A
9582 sfs.f_bsize = (user64_long_t)sfsp->f_bsize;
9583 sfs.f_iosize = (user64_long_t)sfsp->f_iosize;
9584 sfs.f_blocks = (user64_long_t)sfsp->f_blocks;
9585 sfs.f_bfree = (user64_long_t)sfsp->f_bfree;
9586 sfs.f_bavail = (user64_long_t)sfsp->f_bavail;
9587 sfs.f_files = (user64_long_t)sfsp->f_files;
9588 sfs.f_ffree = (user64_long_t)sfsp->f_ffree;
91447636
A
9589 sfs.f_fsid = sfsp->f_fsid;
9590 sfs.f_owner = sfsp->f_owner;
6d2010ae
A
9591 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
9592 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN);
9593 } else {
9594 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
9595 }
2d21ac55
A
9596 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
9597 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
91447636
A
9598
9599 if (partial_copy) {
9600 copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
9601 }
9602 error = copyout((caddr_t)&sfs, bufp, copy_size);
9603 }
9604 else {
b0d623f7
A
9605 struct user32_statfs sfs;
9606
91447636
A
9607 my_size = copy_size = sizeof(sfs);
9608 bzero(&sfs, my_size);
9609
9610 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
9611 sfs.f_type = mp->mnt_vtable->vfc_typenum;
9612 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
9613
9614 /*
9615 * It's possible for there to be more than 2^^31 blocks in the filesystem, so we
9616 * have to fudge the numbers here in that case. We inflate the blocksize in order
9617 * to reflect the filesystem size as best we can.
9618 */
b0d623f7 9619 if ((sfsp->f_blocks > INT_MAX)
91447636
A
9620 /* Hack for 4061702 . I think the real fix is for Carbon to
9621 * look for some volume capability and not depend on hidden
9622 * semantics agreed between a FS and carbon.
9623 * f_blocks, f_bfree, and f_bavail set to -1 is the trigger
9624 * for Carbon to set bNoVolumeSizes volume attribute.
9625 * Without this the webdavfs files cannot be copied onto
9626 * disk as they look huge. This change should not affect
9627 * XSAN as they should not setting these to -1..
9628 */
2d21ac55
A
9629 && (sfsp->f_blocks != 0xffffffffffffffffULL)
9630 && (sfsp->f_bfree != 0xffffffffffffffffULL)
9631 && (sfsp->f_bavail != 0xffffffffffffffffULL)) {
91447636
A
9632 int shift;
9633
9634 /*
9635 * Work out how far we have to shift the block count down to make it fit.
9636 * Note that it's possible to have to shift so far that the resulting
9637 * blocksize would be unreportably large. At that point, we will clip
9638 * any values that don't fit.
9639 *
9640 * For safety's sake, we also ensure that f_iosize is never reported as
9641 * being smaller than f_bsize.
9642 */
9643 for (shift = 0; shift < 32; shift++) {
b0d623f7 9644 if ((sfsp->f_blocks >> shift) <= INT_MAX)
91447636 9645 break;
b0d623f7 9646 if ((sfsp->f_bsize << (shift + 1)) > INT_MAX)
91447636
A
9647 break;
9648 }
b0d623f7
A
9649#define __SHIFT_OR_CLIP(x, s) ((((x) >> (s)) > INT_MAX) ? INT_MAX : ((x) >> (s)))
9650 sfs.f_blocks = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_blocks, shift);
9651 sfs.f_bfree = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bfree, shift);
9652 sfs.f_bavail = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bavail, shift);
91447636 9653#undef __SHIFT_OR_CLIP
b0d623f7 9654 sfs.f_bsize = (user32_long_t)(sfsp->f_bsize << shift);
91447636
A
9655 sfs.f_iosize = lmax(sfsp->f_iosize, sfsp->f_bsize);
9656 } else {
9657 /* filesystem is small enough to be reported honestly */
b0d623f7
A
9658 sfs.f_bsize = (user32_long_t)sfsp->f_bsize;
9659 sfs.f_iosize = (user32_long_t)sfsp->f_iosize;
9660 sfs.f_blocks = (user32_long_t)sfsp->f_blocks;
9661 sfs.f_bfree = (user32_long_t)sfsp->f_bfree;
9662 sfs.f_bavail = (user32_long_t)sfsp->f_bavail;
91447636 9663 }
b0d623f7
A
9664 sfs.f_files = (user32_long_t)sfsp->f_files;
9665 sfs.f_ffree = (user32_long_t)sfsp->f_ffree;
91447636
A
9666 sfs.f_fsid = sfsp->f_fsid;
9667 sfs.f_owner = sfsp->f_owner;
6d2010ae
A
9668 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
9669 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN);
9670 } else {
9671 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
9672 }
2d21ac55
A
9673 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
9674 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
91447636
A
9675
9676 if (partial_copy) {
9677 copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
9678 }
9679 error = copyout((caddr_t)&sfs, bufp, copy_size);
9680 }
4a249263 9681
91447636
A
9682 if (sizep != NULL) {
9683 *sizep = my_size;
9684 }
9685 return(error);
9686}
9687
9688/*
9689 * copy stat structure into user_stat structure.
9690 */
b0d623f7 9691void munge_user64_stat(struct stat *sbp, struct user64_stat *usbp)
91447636 9692{
b0d623f7
A
9693 bzero(usbp, sizeof(*usbp));
9694
9695 usbp->st_dev = sbp->st_dev;
9696 usbp->st_ino = sbp->st_ino;
9697 usbp->st_mode = sbp->st_mode;
9698 usbp->st_nlink = sbp->st_nlink;
9699 usbp->st_uid = sbp->st_uid;
9700 usbp->st_gid = sbp->st_gid;
9701 usbp->st_rdev = sbp->st_rdev;
9702#ifndef _POSIX_C_SOURCE
9703 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
9704 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
9705 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
9706 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
9707 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
9708 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
9709#else
9710 usbp->st_atime = sbp->st_atime;
9711 usbp->st_atimensec = sbp->st_atimensec;
9712 usbp->st_mtime = sbp->st_mtime;
9713 usbp->st_mtimensec = sbp->st_mtimensec;
9714 usbp->st_ctime = sbp->st_ctime;
9715 usbp->st_ctimensec = sbp->st_ctimensec;
9716#endif
9717 usbp->st_size = sbp->st_size;
9718 usbp->st_blocks = sbp->st_blocks;
9719 usbp->st_blksize = sbp->st_blksize;
9720 usbp->st_flags = sbp->st_flags;
9721 usbp->st_gen = sbp->st_gen;
9722 usbp->st_lspare = sbp->st_lspare;
9723 usbp->st_qspare[0] = sbp->st_qspare[0];
9724 usbp->st_qspare[1] = sbp->st_qspare[1];
9725}
9726
9727void munge_user32_stat(struct stat *sbp, struct user32_stat *usbp)
9728{
9729 bzero(usbp, sizeof(*usbp));
0c530ab8 9730
91447636
A
9731 usbp->st_dev = sbp->st_dev;
9732 usbp->st_ino = sbp->st_ino;
9733 usbp->st_mode = sbp->st_mode;
9734 usbp->st_nlink = sbp->st_nlink;
9735 usbp->st_uid = sbp->st_uid;
9736 usbp->st_gid = sbp->st_gid;
9737 usbp->st_rdev = sbp->st_rdev;
2d21ac55
A
9738#ifndef _POSIX_C_SOURCE
9739 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
9740 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
9741 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
9742 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
9743 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
9744 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
9745#else
9746 usbp->st_atime = sbp->st_atime;
9747 usbp->st_atimensec = sbp->st_atimensec;
9748 usbp->st_mtime = sbp->st_mtime;
9749 usbp->st_mtimensec = sbp->st_mtimensec;
9750 usbp->st_ctime = sbp->st_ctime;
9751 usbp->st_ctimensec = sbp->st_ctimensec;
9752#endif
9753 usbp->st_size = sbp->st_size;
9754 usbp->st_blocks = sbp->st_blocks;
9755 usbp->st_blksize = sbp->st_blksize;
9756 usbp->st_flags = sbp->st_flags;
9757 usbp->st_gen = sbp->st_gen;
9758 usbp->st_lspare = sbp->st_lspare;
9759 usbp->st_qspare[0] = sbp->st_qspare[0];
9760 usbp->st_qspare[1] = sbp->st_qspare[1];
9761}
9762
9763/*
9764 * copy stat64 structure into user_stat64 structure.
9765 */
b0d623f7
A
9766void munge_user64_stat64(struct stat64 *sbp, struct user64_stat64 *usbp)
9767{
9768 bzero(usbp, sizeof(*usbp));
9769
9770 usbp->st_dev = sbp->st_dev;
9771 usbp->st_ino = sbp->st_ino;
9772 usbp->st_mode = sbp->st_mode;
9773 usbp->st_nlink = sbp->st_nlink;
9774 usbp->st_uid = sbp->st_uid;
9775 usbp->st_gid = sbp->st_gid;
9776 usbp->st_rdev = sbp->st_rdev;
9777#ifndef _POSIX_C_SOURCE
9778 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
9779 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
9780 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
9781 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
9782 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
9783 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
9784 usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
9785 usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
9786#else
9787 usbp->st_atime = sbp->st_atime;
9788 usbp->st_atimensec = sbp->st_atimensec;
9789 usbp->st_mtime = sbp->st_mtime;
9790 usbp->st_mtimensec = sbp->st_mtimensec;
9791 usbp->st_ctime = sbp->st_ctime;
9792 usbp->st_ctimensec = sbp->st_ctimensec;
9793 usbp->st_birthtime = sbp->st_birthtime;
9794 usbp->st_birthtimensec = sbp->st_birthtimensec;
9795#endif
9796 usbp->st_size = sbp->st_size;
9797 usbp->st_blocks = sbp->st_blocks;
9798 usbp->st_blksize = sbp->st_blksize;
9799 usbp->st_flags = sbp->st_flags;
9800 usbp->st_gen = sbp->st_gen;
9801 usbp->st_lspare = sbp->st_lspare;
9802 usbp->st_qspare[0] = sbp->st_qspare[0];
9803 usbp->st_qspare[1] = sbp->st_qspare[1];
9804}
9805
9806void munge_user32_stat64(struct stat64 *sbp, struct user32_stat64 *usbp)
2d21ac55 9807{
b0d623f7 9808 bzero(usbp, sizeof(*usbp));
2d21ac55
A
9809
9810 usbp->st_dev = sbp->st_dev;
9811 usbp->st_ino = sbp->st_ino;
9812 usbp->st_mode = sbp->st_mode;
9813 usbp->st_nlink = sbp->st_nlink;
9814 usbp->st_uid = sbp->st_uid;
9815 usbp->st_gid = sbp->st_gid;
9816 usbp->st_rdev = sbp->st_rdev;
9817#ifndef _POSIX_C_SOURCE
91447636
A
9818 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
9819 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
9820 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
9821 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
9822 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
9823 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
2d21ac55
A
9824 usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
9825 usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
91447636
A
9826#else
9827 usbp->st_atime = sbp->st_atime;
9828 usbp->st_atimensec = sbp->st_atimensec;
9829 usbp->st_mtime = sbp->st_mtime;
9830 usbp->st_mtimensec = sbp->st_mtimensec;
9831 usbp->st_ctime = sbp->st_ctime;
9832 usbp->st_ctimensec = sbp->st_ctimensec;
2d21ac55
A
9833 usbp->st_birthtime = sbp->st_birthtime;
9834 usbp->st_birthtimensec = sbp->st_birthtimensec;
91447636
A
9835#endif
9836 usbp->st_size = sbp->st_size;
9837 usbp->st_blocks = sbp->st_blocks;
9838 usbp->st_blksize = sbp->st_blksize;
9839 usbp->st_flags = sbp->st_flags;
9840 usbp->st_gen = sbp->st_gen;
9841 usbp->st_lspare = sbp->st_lspare;
9842 usbp->st_qspare[0] = sbp->st_qspare[0];
9843 usbp->st_qspare[1] = sbp->st_qspare[1];
4a249263 9844}
39236c6e
A
9845
9846/*
9847 * Purge buffer cache for simulating cold starts
9848 */
9849static int vnode_purge_callback(struct vnode *vp, __unused void *cargs)
9850{
9851 ubc_msync(vp, (off_t)0, ubc_getsize(vp), NULL /* off_t *resid_off */, UBC_PUSHALL | UBC_INVALIDATE);
9852
9853 return VNODE_RETURNED;
9854}
9855
9856static int vfs_purge_callback(mount_t mp, __unused void * arg)
9857{
9858 vnode_iterate(mp, VNODE_WAIT | VNODE_ITERATE_ALL, vnode_purge_callback, NULL);
9859
9860 return VFS_RETURNED;
9861}
9862
9863int
9864vfs_purge(__unused struct proc *p, __unused struct vfs_purge_args *uap, __unused int32_t *retval)
9865{
9866 if (!kauth_cred_issuser(kauth_cred_get()))
9867 return EPERM;
9868
9869 vfs_iterate(0/* flags */, vfs_purge_callback, NULL);
9870
9871 return 0;
9872}
9873