]> git.saurik.com Git - apple/xnu.git/blame - bsd/vfs/vfs_syscalls.c
xnu-3789.31.2.tar.gz
[apple/xnu.git] / bsd / vfs / vfs_syscalls.c
CommitLineData
1c79356b 1/*
39037602 2 * Copyright (c) 1995-2016 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
39037602 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
39037602 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
39037602 17 *
2d21ac55
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
39037602 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * Copyright (c) 1989, 1993
30 * The Regents of the University of California. All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)vfs_syscalls.c 8.41 (Berkeley) 6/15/95
66 */
2d21ac55
A
67/*
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
71 * Version 2.0.
72 */
1c79356b
A
73
74#include <sys/param.h>
75#include <sys/systm.h>
76#include <sys/namei.h>
77#include <sys/filedesc.h>
78#include <sys/kernel.h>
91447636 79#include <sys/file_internal.h>
1c79356b 80#include <sys/stat.h>
91447636
A
81#include <sys/vnode_internal.h>
82#include <sys/mount_internal.h>
83#include <sys/proc_internal.h>
84#include <sys/kauth.h>
85#include <sys/uio_internal.h>
1c79356b 86#include <sys/malloc.h>
91447636 87#include <sys/mman.h>
1c79356b
A
88#include <sys/dirent.h>
89#include <sys/attr.h>
90#include <sys/sysctl.h>
91#include <sys/ubc.h>
9bccf70c 92#include <sys/quota.h>
91447636
A
93#include <sys/kdebug.h>
94#include <sys/fsevents.h>
6d2010ae 95#include <sys/imgsrc.h>
91447636
A
96#include <sys/sysproto.h>
97#include <sys/xattr.h>
b0d623f7
A
98#include <sys/fcntl.h>
99#include <sys/fsctl.h>
91447636 100#include <sys/ubc_internal.h>
593a1d5f 101#include <sys/disk.h>
3e170ce0 102#include <sys/content_protection.h>
39037602
A
103#include <sys/clonefile.h>
104#include <sys/snapshot.h>
490019cf 105#include <sys/priv.h>
91447636
A
106#include <machine/cons.h>
107#include <machine/limits.h>
108#include <miscfs/specfs/specdev.h>
e5568f75 109
b0d623f7 110#include <security/audit/audit.h>
e5568f75
A
111#include <bsm/audit_kevents.h>
112
91447636
A
113#include <mach/mach_types.h>
114#include <kern/kern_types.h>
115#include <kern/kalloc.h>
6d2010ae 116#include <kern/task.h>
91447636
A
117
118#include <vm/vm_pageout.h>
39037602 119#include <vm/vm_protos.h>
1c79356b 120
91447636 121#include <libkern/OSAtomic.h>
b0d623f7 122#include <pexpert/pexpert.h>
3e170ce0 123#include <IOKit/IOBSD.h>
55e303ae 124
490019cf
A
125#if ROUTEFS
126#include <miscfs/routefs/routefs.h>
127#endif /* ROUTEFS */
128
2d21ac55
A
129#if CONFIG_MACF
130#include <security/mac.h>
131#include <security/mac_framework.h>
132#endif
1c79356b 133
39037602 134#if CONFIG_FSE
2d21ac55 135#define GET_PATH(x) \
39037602 136 (x) = get_pathbuff();
2d21ac55
A
137#define RELEASE_PATH(x) \
138 release_pathbuff(x);
39037602 139#else
2d21ac55 140#define GET_PATH(x) \
39037602 141 MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
2d21ac55
A
142#define RELEASE_PATH(x) \
143 FREE_ZONE((x), MAXPATHLEN, M_NAMEI);
144#endif /* CONFIG_FSE */
145
146/* struct for checkdirs iteration */
147struct cdirargs {
148 vnode_t olddp;
149 vnode_t newdp;
150};
151/* callback for checkdirs iteration */
152static int checkdirs_callback(proc_t p, void * arg);
1c79356b 153
91447636 154static int change_dir(struct nameidata *ndp, vfs_context_t ctx);
6601e61a 155static int checkdirs(vnode_t olddp, vfs_context_t ctx);
91447636
A
156void enablequotas(struct mount *mp, vfs_context_t ctx);
157static int getfsstat_callback(mount_t mp, void * arg);
158static int getutimes(user_addr_t usrtvp, struct timespec *tsp);
2d21ac55 159static int setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts, int nullflag);
91447636 160static int sync_callback(mount_t, void *);
fe8ab488
A
161static void sync_thread(void *, __unused wait_result_t);
162static int sync_async(int);
39037602
A
163static int munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
164 user_addr_t bufp, int *sizep, boolean_t is_64_bit,
91447636 165 boolean_t partial_copy);
b0d623f7
A
166static int statfs64_common(struct mount *mp, struct vfsstatfs *sfsp,
167 user_addr_t bufp);
168static int fsync_common(proc_t p, struct fsync_args *uap, int flags);
6d2010ae
A
169static int mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
170 struct componentname *cnp, user_addr_t fsmountargs,
171 int flags, uint32_t internal_flags, char *labelstr, boolean_t kernelmount,
172 vfs_context_t ctx);
173void vfs_notify_mount(vnode_t pdvp);
174
175int prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth);
b7266188 176
fe8ab488
A
177struct fd_vn_data * fg_vn_data_alloc(void);
178
c18c124e
A
179/*
180 * Max retries for ENOENT returns from vn_authorize_{rmdir, unlink, rename}
181 * Concurrent lookups (or lookups by ids) on hard links can cause the
182 * vn_getpath (which does not re-enter the filesystem as vn_getpath_fsenter
183 * does) to return ENOENT as the path cannot be returned from the name cache
184 * alone. We have no option but to retry and hope to get one namei->reverse path
185 * generation done without an intervening lookup, lookup by id on the hard link
186 * item. This is only an issue for MAC hooks which cannot reenter the filesystem
187 * which currently are the MAC hooks for rename, unlink and rmdir.
188 */
189#define MAX_AUTHORIZE_ENOENT_RETRIES 1024
190
fe8ab488
A
191static int rmdirat_internal(vfs_context_t, int, user_addr_t, enum uio_seg);
192
193static int fsgetpath_internal(vfs_context_t, int, uint64_t, vm_size_t, caddr_t, int *);
194
b7266188 195#ifdef CONFIG_IMGSRC_ACCESS
b7266188
A
196static int authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx);
197static int place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx);
198static void undo_place_on_covered_vp(mount_t mp, vnode_t vp);
199static int mount_begin_update(mount_t mp, vfs_context_t ctx, int flags);
200static void mount_end_update(mount_t mp);
6d2010ae 201static int relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp, const char *fsname, vfs_context_t ctx, boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index);
b7266188
A
202#endif /* CONFIG_IMGSRC_ACCESS */
203
2d21ac55
A
204int (*union_dircheckp)(struct vnode **, struct fileproc *, vfs_context_t);
205
206__private_extern__
207int sync_internal(void);
208
2d21ac55 209__private_extern__
c18c124e 210int unlink1(vfs_context_t, vnode_t, user_addr_t, enum uio_seg, int);
91447636 211
fe8ab488
A
212extern lck_grp_t *fd_vn_lck_grp;
213extern lck_grp_attr_t *fd_vn_lck_grp_attr;
214extern lck_attr_t *fd_vn_lck_attr;
215
2d21ac55
A
216/*
217 * incremented each time a mount or unmount operation occurs
218 * used to invalidate the cached value of the rootvp in the
219 * mount structure utilized by cache_lookup_path
220 */
b0d623f7 221uint32_t mount_generation = 0;
1c79356b
A
222
223/* counts number of mount and unmount operations */
224unsigned int vfs_nummntops=0;
225
39236c6e
A
226extern const struct fileops vnops;
227#if CONFIG_APPLEDOUBLE
39037602 228extern errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *);
39236c6e 229#endif /* CONFIG_APPLEDOUBLE */
91447636 230
1c79356b
A
231/*
232 * Virtual File System System Calls
233 */
234
490019cf 235#if NFSCLIENT || DEVFS || ROUTEFS
6d2010ae
A
236/*
237 * Private in-kernel mounting spi (NFS only, not exported)
238 */
239 __private_extern__
240boolean_t
241vfs_iskernelmount(mount_t mp)
242{
243 return ((mp->mnt_kern_flag & MNTK_KERNEL_MOUNT) ? TRUE : FALSE);
244}
245
246 __private_extern__
247int
248kernel_mount(char *fstype, vnode_t pvp, vnode_t vp, const char *path,
249 void *data, __unused size_t datalen, int syscall_flags, __unused uint32_t kern_flags, vfs_context_t ctx)
250{
251 struct nameidata nd;
252 boolean_t did_namei;
253 int error;
254
39037602 255 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
6d2010ae
A
256 UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx);
257
258 /*
259 * Get the vnode to be covered if it's not supplied
260 */
261 if (vp == NULLVP) {
262 error = namei(&nd);
263 if (error)
264 return (error);
265 vp = nd.ni_vp;
266 pvp = nd.ni_dvp;
267 did_namei = TRUE;
268 } else {
269 char *pnbuf = CAST_DOWN(char *, path);
270
271 nd.ni_cnd.cn_pnbuf = pnbuf;
272 nd.ni_cnd.cn_pnlen = strlen(pnbuf) + 1;
273 did_namei = FALSE;
274 }
275
276 error = mount_common(fstype, pvp, vp, &nd.ni_cnd, CAST_USER_ADDR_T(data),
277 syscall_flags, kern_flags, NULL, TRUE, ctx);
278
279 if (did_namei) {
280 vnode_put(vp);
281 vnode_put(pvp);
282 nameidone(&nd);
283 }
284
285 return (error);
286}
fe8ab488 287#endif /* NFSCLIENT || DEVFS */
6d2010ae 288
1c79356b
A
289/*
290 * Mount a file system.
291 */
1c79356b
A
292/* ARGSUSED */
293int
b0d623f7 294mount(proc_t p, struct mount_args *uap, __unused int32_t *retval)
2d21ac55
A
295{
296 struct __mac_mount_args muap;
297
298 muap.type = uap->type;
299 muap.path = uap->path;
300 muap.flags = uap->flags;
301 muap.data = uap->data;
302 muap.mac_p = USER_ADDR_NULL;
303 return (__mac_mount(p, &muap, retval));
304}
305
6d2010ae 306void
39037602 307vfs_notify_mount(vnode_t pdvp)
6d2010ae
A
308{
309 vfs_event_signal(NULL, VQ_MOUNT, (intptr_t)NULL);
310 lock_vnode_and_post(pdvp, NOTE_WRITE);
311}
312
b0d623f7
A
313/*
314 * __mac_mount:
315 * Mount a file system taking into account MAC label behavior.
316 * See mount(2) man page for more information
317 *
318 * Parameters: p Process requesting the mount
319 * uap User argument descriptor (see below)
39037602 320 * retval (ignored)
b0d623f7
A
321 *
322 * Indirect: uap->type Filesystem type
323 * uap->path Path to mount
39037602
A
324 * uap->data Mount arguments
325 * uap->mac_p MAC info
b0d623f7 326 * uap->flags Mount flags
39037602 327 *
b0d623f7
A
328 *
329 * Returns: 0 Success
330 * !0 Not success
331 */
6d2010ae
A
332boolean_t root_fs_upgrade_try = FALSE;
333
2d21ac55 334int
b0d623f7 335__mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int32_t *retval)
1c79356b 336{
39236c6e
A
337 vnode_t pvp = NULL;
338 vnode_t vp = NULL;
339 int need_nameidone = 0;
6d2010ae
A
340 vfs_context_t ctx = vfs_context_current();
341 char fstypename[MFSNAMELEN];
342 struct nameidata nd;
343 size_t dummy=0;
344 char *labelstr = NULL;
345 int flags = uap->flags;
346 int error;
39037602 347#if CONFIG_IMGSRC_ACCESS || CONFIG_MACF
6d2010ae 348 boolean_t is_64bit = IS_64BIT_PROCESS(p);
39236c6e
A
349#else
350#pragma unused(p)
351#endif
6d2010ae
A
352 /*
353 * Get the fs type name from user space
354 */
355 error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy);
356 if (error)
357 return (error);
358
359 /*
360 * Get the vnode to be covered
361 */
39037602 362 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
6d2010ae
A
363 UIO_USERSPACE, uap->path, ctx);
364 error = namei(&nd);
39236c6e
A
365 if (error) {
366 goto out;
367 }
368 need_nameidone = 1;
6d2010ae
A
369 vp = nd.ni_vp;
370 pvp = nd.ni_dvp;
39037602 371
6d2010ae
A
372#ifdef CONFIG_IMGSRC_ACCESS
373 /* Mounting image source cannot be batched with other operations */
374 if (flags == MNT_IMGSRC_BY_INDEX) {
375 error = relocate_imageboot_source(pvp, vp, &nd.ni_cnd, fstypename,
376 ctx, is_64bit, uap->data, (flags == MNT_IMGSRC_BY_INDEX));
377 goto out;
378 }
379#endif /* CONFIG_IMGSRC_ACCESS */
380
381#if CONFIG_MACF
382 /*
383 * Get the label string (if any) from user space
384 */
385 if (uap->mac_p != USER_ADDR_NULL) {
386 struct user_mac mac;
387 size_t ulen = 0;
388
389 if (is_64bit) {
390 struct user64_mac mac64;
391 error = copyin(uap->mac_p, &mac64, sizeof(mac64));
392 mac.m_buflen = mac64.m_buflen;
393 mac.m_string = mac64.m_string;
394 } else {
395 struct user32_mac mac32;
396 error = copyin(uap->mac_p, &mac32, sizeof(mac32));
397 mac.m_buflen = mac32.m_buflen;
398 mac.m_string = mac32.m_string;
399 }
400 if (error)
401 goto out;
402 if ((mac.m_buflen > MAC_MAX_LABEL_BUF_LEN) ||
403 (mac.m_buflen < 2)) {
404 error = EINVAL;
405 goto out;
406 }
407 MALLOC(labelstr, char *, mac.m_buflen, M_MACTEMP, M_WAITOK);
408 error = copyinstr(mac.m_string, labelstr, mac.m_buflen, &ulen);
409 if (error) {
410 goto out;
411 }
412 AUDIT_ARG(mac_string, labelstr);
413 }
414#endif /* CONFIG_MACF */
415
416 AUDIT_ARG(fflags, flags);
417
4bd07ac2
A
418#if SECURE_KERNEL
419 if (flags & MNT_UNION) {
420 /* No union mounts on release kernels */
421 error = EPERM;
422 goto out;
423 }
424#endif
425
6d2010ae 426 if ((vp->v_flag & VROOT) &&
39236c6e
A
427 (vp->v_mount->mnt_flag & MNT_ROOTFS)) {
428 if (!(flags & MNT_UNION)) {
6d2010ae 429 flags |= MNT_UPDATE;
39236c6e
A
430 }
431 else {
39037602 432 /*
39236c6e 433 * For a union mount on '/', treat it as fresh
39037602
A
434 * mount instead of update.
435 * Otherwise, union mouting on '/' used to panic the
436 * system before, since mnt_vnodecovered was found to
437 * be NULL for '/' which is required for unionlookup
39236c6e
A
438 * after it gets ENOENT on union mount.
439 */
440 flags = (flags & ~(MNT_UPDATE));
441 }
442
4bd07ac2 443#if SECURE_KERNEL
39236c6e
A
444 if ((flags & MNT_RDONLY) == 0) {
445 /* Release kernels are not allowed to mount "/" as rw */
446 error = EPERM;
39037602 447 goto out;
39236c6e 448 }
39236c6e
A
449#endif
450 /*
451 * See 7392553 for more details on why this check exists.
452 * Suffice to say: If this check is ON and something tries
453 * to mount the rootFS RW, we'll turn off the codesign
39037602
A
454 * bitmap optimization.
455 */
6d2010ae 456#if CHECK_CS_VALIDATION_BITMAP
39236c6e 457 if ((flags & MNT_RDONLY) == 0 ) {
6d2010ae
A
458 root_fs_upgrade_try = TRUE;
459 }
460#endif
461 }
462
463 error = mount_common(fstypename, pvp, vp, &nd.ni_cnd, uap->data, flags, 0,
464 labelstr, FALSE, ctx);
39236c6e 465
6d2010ae 466out:
39236c6e 467
6d2010ae
A
468#if CONFIG_MACF
469 if (labelstr)
470 FREE(labelstr, M_MACTEMP);
471#endif /* CONFIG_MACF */
472
39236c6e
A
473 if (vp) {
474 vnode_put(vp);
475 }
476 if (pvp) {
477 vnode_put(pvp);
478 }
479 if (need_nameidone) {
480 nameidone(&nd);
481 }
6d2010ae
A
482
483 return (error);
484}
485
486/*
487 * common mount implementation (final stage of mounting)
39037602 488
6d2010ae
A
489 * Arguments:
490 * fstypename file system type (ie it's vfs name)
491 * pvp parent of covered vnode
492 * vp covered vnode
493 * cnp component name (ie path) of covered vnode
494 * flags generic mount flags
495 * fsmountargs file system specific data
496 * labelstr optional MAC label
497 * kernelmount TRUE for mounts initiated from inside the kernel
498 * ctx caller's context
499 */
500static int
501mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
502 struct componentname *cnp, user_addr_t fsmountargs, int flags, uint32_t internal_flags,
503 char *labelstr, boolean_t kernelmount, vfs_context_t ctx)
504{
39236c6e
A
505#if !CONFIG_MACF
506#pragma unused(labelstr)
507#endif
91447636
A
508 struct vnode *devvp = NULLVP;
509 struct vnode *device_vnode = NULLVP;
2d21ac55
A
510#if CONFIG_MACF
511 struct vnode *rvp;
512#endif
1c79356b 513 struct mount *mp;
6601e61a 514 struct vfstable *vfsp = (struct vfstable *)0;
6d2010ae 515 struct proc *p = vfs_context_proc(ctx);
91447636 516 int error, flag = 0;
91447636 517 user_addr_t devpath = USER_ADDR_NULL;
91447636
A
518 int ronly = 0;
519 int mntalloc = 0;
b0d623f7 520 boolean_t vfsp_ref = FALSE;
743b1565 521 boolean_t is_rwlock_locked = FALSE;
b0d623f7
A
522 boolean_t did_rele = FALSE;
523 boolean_t have_usecount = FALSE;
9bccf70c 524
1c79356b 525 /*
6d2010ae 526 * Process an update for an existing mount
1c79356b 527 */
6d2010ae 528 if (flags & MNT_UPDATE) {
1c79356b 529 if ((vp->v_flag & VROOT) == 0) {
91447636
A
530 error = EINVAL;
531 goto out1;
1c79356b
A
532 }
533 mp = vp->v_mount;
d12e1678 534
91447636 535 /* unmount in progress return error */
b0d623f7 536 mount_lock_spin(mp);
91447636
A
537 if (mp->mnt_lflag & MNT_LUNMOUNT) {
538 mount_unlock(mp);
539 error = EBUSY;
540 goto out1;
d12e1678 541 }
91447636
A
542 mount_unlock(mp);
543 lck_rw_lock_exclusive(&mp->mnt_rwlock);
743b1565 544 is_rwlock_locked = TRUE;
1c79356b
A
545 /*
546 * We only allow the filesystem to be reloaded if it
547 * is currently mounted read-only.
548 */
6d2010ae 549 if ((flags & MNT_RELOAD) &&
1c79356b 550 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
91447636
A
551 error = ENOTSUP;
552 goto out1;
1c79356b 553 }
b7266188 554
316670eb
A
555 /*
556 * If content protection is enabled, update mounts are not
557 * allowed to turn it off.
558 */
39037602 559 if ((mp->mnt_flag & MNT_CPROTECT) &&
316670eb
A
560 ((flags & MNT_CPROTECT) == 0)) {
561 error = EINVAL;
562 goto out1;
563 }
564
39037602 565#ifdef CONFIG_IMGSRC_ACCESS
b7266188
A
566 /* Can't downgrade the backer of the root FS */
567 if ((mp->mnt_kern_flag & MNTK_BACKS_ROOT) &&
6d2010ae 568 (!vfs_isrdonly(mp)) && (flags & MNT_RDONLY)) {
b7266188
A
569 error = ENOTSUP;
570 goto out1;
571 }
572#endif /* CONFIG_IMGSRC_ACCESS */
573
1c79356b
A
574 /*
575 * Only root, or the user that did the original mount is
576 * permitted to update it.
577 */
2d21ac55
A
578 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
579 (error = suser(vfs_context_ucred(ctx), &p->p_acflag))) {
580 goto out1;
581 }
582#if CONFIG_MACF
583 error = mac_mount_check_remount(ctx, mp);
584 if (error != 0) {
91447636 585 goto out1;
1c79356b 586 }
2d21ac55 587#endif
1c79356b 588 /*
91447636
A
589 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
590 * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
1c79356b 591 */
6d2010ae
A
592 if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
593 flags |= MNT_NOSUID | MNT_NODEV;
d12e1678 594 if (mp->mnt_flag & MNT_NOEXEC)
6d2010ae 595 flags |= MNT_NOEXEC;
1c79356b 596 }
d12e1678
A
597 flag = mp->mnt_flag;
598
316670eb
A
599
600
6d2010ae 601 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
d12e1678 602
91447636 603 vfsp = mp->mnt_vtable;
1c79356b
A
604 goto update;
605 }
1c79356b 606 /*
91447636 607 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
1c79356b
A
608 * MNT_NOEXEC if mount point is already MNT_NOEXEC.
609 */
6d2010ae
A
610 if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
611 flags |= MNT_NOSUID | MNT_NODEV;
1c79356b 612 if (vp->v_mount->mnt_flag & MNT_NOEXEC)
6d2010ae 613 flags |= MNT_NOEXEC;
1c79356b 614 }
91447636 615
55e303ae
A
616 /* XXXAUDIT: Should we capture the type on the error path as well? */
617 AUDIT_ARG(text, fstypename);
91447636 618 mount_list_lock();
1c79356b 619 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
b0d623f7
A
620 if (!strncmp(vfsp->vfc_name, fstypename, MFSNAMELEN)) {
621 vfsp->vfc_refcount++;
622 vfsp_ref = TRUE;
1c79356b 623 break;
b0d623f7 624 }
91447636 625 mount_list_unlock();
1c79356b 626 if (vfsp == NULL) {
91447636
A
627 error = ENODEV;
628 goto out1;
1c79356b 629 }
6d2010ae
A
630
631 /*
632 * VFC_VFSLOCALARGS is not currently supported for kernel mounts
633 */
634 if (kernelmount && (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS)) {
635 error = EINVAL; /* unsupported request */
2d21ac55 636 goto out1;
6d2010ae
A
637 }
638
639 error = prepare_coveredvp(vp, ctx, cnp, fstypename, ((internal_flags & KERNEL_MOUNT_NOAUTH) != 0));
640 if (error != 0) {
91447636 641 goto out1;
1c79356b 642 }
1c79356b
A
643
644 /*
6d2010ae 645 * Allocate and initialize the filesystem (mount_t)
1c79356b 646 */
b0d623f7 647 MALLOC_ZONE(mp, struct mount *, (u_int32_t)sizeof(struct mount),
1c79356b 648 M_MOUNT, M_WAITOK);
b0d623f7 649 bzero((char *)mp, (u_int32_t)sizeof(struct mount));
91447636 650 mntalloc = 1;
0b4e3aa0
A
651
652 /* Initialize the default IO constraints */
653 mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
654 mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
91447636
A
655 mp->mnt_maxsegreadsize = mp->mnt_maxreadcnt;
656 mp->mnt_maxsegwritesize = mp->mnt_maxwritecnt;
657 mp->mnt_devblocksize = DEV_BSIZE;
2d21ac55 658 mp->mnt_alignmentmask = PAGE_MASK;
b0d623f7
A
659 mp->mnt_ioqueue_depth = MNT_DEFAULT_IOQUEUE_DEPTH;
660 mp->mnt_ioscale = 1;
2d21ac55
A
661 mp->mnt_ioflags = 0;
662 mp->mnt_realrootvp = NULLVP;
663 mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL;
91447636
A
664
665 TAILQ_INIT(&mp->mnt_vnodelist);
666 TAILQ_INIT(&mp->mnt_workerqueue);
667 TAILQ_INIT(&mp->mnt_newvnodes);
668 mount_lock_init(mp);
669 lck_rw_lock_exclusive(&mp->mnt_rwlock);
743b1565 670 is_rwlock_locked = TRUE;
1c79356b 671 mp->mnt_op = vfsp->vfc_vfsops;
91447636 672 mp->mnt_vtable = vfsp;
91447636 673 //mp->mnt_stat.f_type = vfsp->vfc_typenum;
1c79356b 674 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
fe8ab488
A
675 strlcpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN);
676 strlcpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
1c79356b 677 mp->mnt_vnodecovered = vp;
2d21ac55 678 mp->mnt_vfsstat.f_owner = kauth_cred_getuid(vfs_context_ucred(ctx));
6d2010ae
A
679 mp->mnt_throttle_mask = LOWPRI_MAX_NUM_DEV - 1;
680 mp->mnt_devbsdunit = 0;
1c79356b 681
91447636
A
682 /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
683 vfs_setowner(mp, KAUTH_UID_NONE, KAUTH_GID_NONE);
6d2010ae 684
490019cf 685#if NFSCLIENT || DEVFS || ROUTEFS
6d2010ae
A
686 if (kernelmount)
687 mp->mnt_kern_flag |= MNTK_KERNEL_MOUNT;
688 if ((internal_flags & KERNEL_MOUNT_PERMIT_UNMOUNT) != 0)
689 mp->mnt_kern_flag |= MNTK_PERMIT_UNMOUNT;
fe8ab488 690#endif /* NFSCLIENT || DEVFS */
6d2010ae 691
1c79356b
A
692update:
693 /*
694 * Set the mount level flags.
695 */
6d2010ae 696 if (flags & MNT_RDONLY)
1c79356b 697 mp->mnt_flag |= MNT_RDONLY;
6d2010ae
A
698 else if (mp->mnt_flag & MNT_RDONLY) {
699 // disallow read/write upgrades of file systems that
700 // had the TYPENAME_OVERRIDE feature set.
701 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
702 error = EPERM;
703 goto out1;
704 }
1c79356b 705 mp->mnt_kern_flag |= MNTK_WANTRDWR;
6d2010ae 706 }
0b4e3aa0
A
707 mp->mnt_flag &= ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
708 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
6d2010ae
A
709 MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
710 MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
711 MNT_QUARANTINE | MNT_CPROTECT);
712 mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
713 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
714 MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
715 MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
716 MNT_QUARANTINE | MNT_CPROTECT);
2d21ac55
A
717
718#if CONFIG_MACF
6d2010ae 719 if (flags & MNT_MULTILABEL) {
2d21ac55
A
720 if (vfsp->vfc_vfsflags & VFC_VFSNOMACLABEL) {
721 error = EINVAL;
722 goto out1;
723 }
724 mp->mnt_flag |= MNT_MULTILABEL;
725 }
726#endif
6d2010ae
A
727 /*
728 * Process device path for local file systems if requested
729 */
39037602
A
730 if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS &&
731 !(internal_flags & KERNEL_MOUNT_SNAPSHOT)) {
6d2010ae 732 if (vfs_context_is64bit(ctx)) {
91447636 733 if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
39037602 734 goto out1;
91447636
A
735 fsmountargs += sizeof(devpath);
736 } else {
b0d623f7 737 user32_addr_t tmp;
91447636 738 if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
39037602 739 goto out1;
91447636
A
740 /* munge into LP64 addr */
741 devpath = CAST_USER_ADDR_T(tmp);
742 fsmountargs += sizeof(tmp);
743 }
744
6d2010ae 745 /* Lookup device and authorize access to it */
91447636 746 if ((devpath)) {
6d2010ae
A
747 struct nameidata nd;
748
749 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW, UIO_USERSPACE, devpath, ctx);
750 if ( (error = namei(&nd)) )
91447636
A
751 goto out1;
752
3e170ce0 753 strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
6d2010ae 754 devvp = nd.ni_vp;
91447636 755
6d2010ae 756 nameidone(&nd);
91447636
A
757
758 if (devvp->v_type != VBLK) {
759 error = ENOTBLK;
760 goto out2;
761 }
762 if (major(devvp->v_rdev) >= nblkdev) {
763 error = ENXIO;
764 goto out2;
765 }
766 /*
767 * If mount by non-root, then verify that user has necessary
768 * permissions on the device.
769 */
2d21ac55 770 if (suser(vfs_context_ucred(ctx), NULL) != 0) {
6d2010ae
A
771 mode_t accessmode = KAUTH_VNODE_READ_DATA;
772
91447636
A
773 if ((mp->mnt_flag & MNT_RDONLY) == 0)
774 accessmode |= KAUTH_VNODE_WRITE_DATA;
2d21ac55 775 if ((error = vnode_authorize(devvp, NULL, accessmode, ctx)) != 0)
91447636
A
776 goto out2;
777 }
778 }
6d2010ae
A
779 /* On first mount, preflight and open device */
780 if (devpath && ((flags & MNT_UPDATE) == 0)) {
91447636
A
781 if ( (error = vnode_ref(devvp)) )
782 goto out2;
783 /*
784 * Disallow multiple mounts of the same device.
785 * Disallow mounting of a device that is currently in use
786 * (except for root, which might share swap device for miniroot).
787 * Flush out any old buffers remaining from a previous use.
788 */
789 if ( (error = vfs_mountedon(devvp)) )
790 goto out3;
39037602 791
91447636
A
792 if (vcount(devvp) > 1 && !(vfs_flags(mp) & MNT_ROOTFS)) {
793 error = EBUSY;
794 goto out3;
795 }
2d21ac55 796 if ( (error = VNOP_FSYNC(devvp, MNT_WAIT, ctx)) ) {
91447636
A
797 error = ENOTBLK;
798 goto out3;
799 }
800 if ( (error = buf_invalidateblks(devvp, BUF_WRITE_DATA, 0, 0)) )
801 goto out3;
802
803 ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
2d21ac55
A
804#if CONFIG_MACF
805 error = mac_vnode_check_open(ctx,
806 devvp,
807 ronly ? FREAD : FREAD|FWRITE);
808 if (error)
809 goto out3;
810#endif /* MAC */
811 if ( (error = VNOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, ctx)) )
91447636
A
812 goto out3;
813
814 mp->mnt_devvp = devvp;
815 device_vnode = devvp;
b0d623f7 816
6d2010ae
A
817 } else if ((mp->mnt_flag & MNT_RDONLY) &&
818 (mp->mnt_kern_flag & MNTK_WANTRDWR) &&
819 (device_vnode = mp->mnt_devvp)) {
820 dev_t dev;
821 int maj;
822 /*
823 * If upgrade to read-write by non-root, then verify
824 * that user has necessary permissions on the device.
825 */
826 vnode_getalways(device_vnode);
b0d623f7 827
6d2010ae 828 if (suser(vfs_context_ucred(ctx), NULL) &&
39037602 829 (error = vnode_authorize(device_vnode, NULL,
6d2010ae
A
830 KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA,
831 ctx)) != 0) {
832 vnode_put(device_vnode);
833 goto out2;
834 }
b0d623f7 835
6d2010ae
A
836 /* Tell the device that we're upgrading */
837 dev = (dev_t)device_vnode->v_rdev;
838 maj = major(dev);
b0d623f7 839
6d2010ae
A
840 if ((u_int)maj >= (u_int)nblkdev)
841 panic("Volume mounted on a device with invalid major number.");
b0d623f7 842
6d2010ae
A
843 error = bdevsw[maj].d_open(dev, FREAD | FWRITE, S_IFBLK, p);
844 vnode_put(device_vnode);
91447636 845 device_vnode = NULLVP;
6d2010ae
A
846 if (error != 0) {
847 goto out2;
848 }
91447636
A
849 }
850 }
2d21ac55 851#if CONFIG_MACF
6d2010ae 852 if ((flags & MNT_UPDATE) == 0) {
2d21ac55
A
853 mac_mount_label_init(mp);
854 mac_mount_label_associate(ctx, mp);
855 }
6d2010ae
A
856 if (labelstr) {
857 if ((flags & MNT_UPDATE) != 0) {
858 error = mac_mount_check_label_update(ctx, mp);
2d21ac55
A
859 if (error != 0)
860 goto out3;
861 }
2d21ac55
A
862 }
863#endif
1c79356b
A
864 /*
865 * Mount the filesystem.
866 */
39037602
A
867 if (internal_flags & KERNEL_MOUNT_SNAPSHOT) {
868 error = VFS_IOCTL(mp, VFSIOC_MOUNT_SNAPSHOT,
869 (caddr_t)fsmountargs, 0, ctx);
870 } else {
871 error = VFS_MOUNT(mp, device_vnode, fsmountargs, ctx);
872 }
d12e1678 873
6d2010ae 874 if (flags & MNT_UPDATE) {
1c79356b
A
875 if (mp->mnt_kern_flag & MNTK_WANTRDWR)
876 mp->mnt_flag &= ~MNT_RDONLY;
877 mp->mnt_flag &=~
878 (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
879 mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
880 if (error)
6d2010ae 881 mp->mnt_flag = flag; /* restore flag value */
91447636
A
882 vfs_event_signal(NULL, VQ_UPDATE, (intptr_t)NULL);
883 lck_rw_done(&mp->mnt_rwlock);
743b1565 884 is_rwlock_locked = FALSE;
9bccf70c 885 if (!error)
2d21ac55 886 enablequotas(mp, ctx);
6d2010ae 887 goto exit;
1c79356b 888 }
6d2010ae 889
1c79356b
A
890 /*
891 * Put the new filesystem on the mount list after root.
892 */
6601e61a 893 if (error == 0) {
2d21ac55
A
894 struct vfs_attr vfsattr;
895#if CONFIG_MACF
896 if (vfs_flags(mp) & MNT_MULTILABEL) {
897 error = VFS_ROOT(mp, &rvp, ctx);
898 if (error) {
899 printf("%s() VFS_ROOT returned %d\n", __func__, error);
900 goto out3;
901 }
2d21ac55 902 error = vnode_label(mp, NULL, rvp, NULL, 0, ctx);
b0d623f7
A
903 /*
904 * drop reference provided by VFS_ROOT
905 */
906 vnode_put(rvp);
907
2d21ac55
A
908 if (error)
909 goto out3;
910 }
911#endif /* MAC */
912
913 vnode_lock_spin(vp);
914 CLR(vp->v_flag, VMOUNT);
91447636
A
915 vp->v_mountedhere = mp;
916 vnode_unlock(vp);
917
2d21ac55
A
918 /*
919 * taking the name_cache_lock exclusively will
920 * insure that everyone is out of the fast path who
921 * might be trying to use a now stale copy of
922 * vp->v_mountedhere->mnt_realrootvp
923 * bumping mount_generation causes the cached values
924 * to be invalidated
925 */
926 name_cache_lock();
927 mount_generation++;
928 name_cache_unlock();
929
b0d623f7
A
930 error = vnode_ref(vp);
931 if (error != 0) {
932 goto out4;
933 }
934
935 have_usecount = TRUE;
91447636 936
2d21ac55 937 error = checkdirs(vp, ctx);
6601e61a
A
938 if (error != 0) {
939 /* Unmount the filesystem as cdir/rdirs cannot be updated */
940 goto out4;
941 }
39037602
A
942 /*
943 * there is no cleanup code here so I have made it void
91447636
A
944 * we need to revisit this
945 */
2d21ac55 946 (void)VFS_START(mp, 0, ctx);
1c79356b 947
6d2010ae
A
948 if (mount_list_add(mp) != 0) {
949 /*
950 * The system is shutting down trying to umount
951 * everything, so fail with a plausible errno.
952 */
953 error = EBUSY;
b0d623f7
A
954 goto out4;
955 }
6601e61a
A
956 lck_rw_done(&mp->mnt_rwlock);
957 is_rwlock_locked = FALSE;
958
2d21ac55
A
959 /* Check if this mounted file system supports EAs or named streams. */
960 /* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */
961 VFSATTR_INIT(&vfsattr);
962 VFSATTR_WANTED(&vfsattr, f_capabilities);
963 if (strncmp(mp->mnt_vfsstat.f_fstypename, "webdav", sizeof("webdav")) != 0 &&
39037602 964 vfs_getattr(mp, &vfsattr, ctx) == 0 &&
2d21ac55
A
965 VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) {
966 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR) &&
967 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR)) {
968 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
969 }
970#if NAMEDSTREAMS
971 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS) &&
972 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS)) {
973 mp->mnt_kern_flag |= MNTK_NAMED_STREAMS;
974 }
975#endif
976 /* Check if this file system supports path from id lookups. */
977 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID) &&
978 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID)) {
979 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
980 } else if (mp->mnt_flag & MNT_DOVOLFS) {
981 /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */
982 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
983 }
39037602
A
984
985 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_DIR_HARDLINKS) &&
986 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_DIR_HARDLINKS)) {
987 mp->mnt_kern_flag |= MNTK_DIR_HARDLINKS;
988 }
2d21ac55
A
989 }
990 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSNATIVEXATTR) {
991 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
992 }
993 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSPREFLIGHT) {
994 mp->mnt_kern_flag |= MNTK_UNMOUNT_PREFLIGHT;
995 }
1c79356b 996 /* increment the operations count */
b0d623f7 997 OSAddAtomic(1, &vfs_nummntops);
2d21ac55 998 enablequotas(mp, ctx);
91447636
A
999
1000 if (device_vnode) {
1001 device_vnode->v_specflags |= SI_MOUNTEDON;
1002
1003 /*
1004 * cache the IO attributes for the underlying physical media...
1005 * an error return indicates the underlying driver doesn't
1006 * support all the queries necessary... however, reasonable
1007 * defaults will have been set, so no reason to bail or care
1008 */
1009 vfs_init_io_attributes(device_vnode, mp);
39037602 1010 }
6601e61a
A
1011
1012 /* Now that mount is setup, notify the listeners */
6d2010ae 1013 vfs_notify_mount(pvp);
3e170ce0
A
1014 IOBSDMountChange(mp, kIOMountChangeMount);
1015
1c79356b 1016 } else {
6d2010ae
A
1017 /* If we fail a fresh mount, there should be no vnodes left hooked into the mountpoint. */
1018 if (mp->mnt_vnodelist.tqh_first != NULL) {
39037602 1019 panic("mount_common(): mount of %s filesystem failed with %d, but vnode list is not empty.",
6d2010ae
A
1020 mp->mnt_vtable->vfc_name, error);
1021 }
1022
2d21ac55 1023 vnode_lock_spin(vp);
1c79356b 1024 CLR(vp->v_flag, VMOUNT);
6601e61a 1025 vnode_unlock(vp);
91447636
A
1026 mount_list_lock();
1027 mp->mnt_vtable->vfc_refcount--;
1028 mount_list_unlock();
55e303ae 1029
91447636 1030 if (device_vnode ) {
91447636 1031 vnode_rele(device_vnode);
b0d623f7 1032 VNOP_CLOSE(device_vnode, ronly ? FREAD : FREAD|FWRITE, ctx);
91447636
A
1033 }
1034 lck_rw_done(&mp->mnt_rwlock);
743b1565 1035 is_rwlock_locked = FALSE;
39037602 1036
6d2010ae
A
1037 /*
1038 * if we get here, we have a mount structure that needs to be freed,
1039 * but since the coveredvp hasn't yet been updated to point at it,
1040 * no need to worry about other threads holding a crossref on this mp
1041 * so it's ok to just free it
1042 */
91447636 1043 mount_lock_destroy(mp);
2d21ac55
A
1044#if CONFIG_MACF
1045 mac_mount_label_destroy(mp);
1046#endif
55e303ae 1047 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1c79356b 1048 }
6d2010ae 1049exit:
91447636 1050 /*
6d2010ae 1051 * drop I/O count on the device vp if there was one
91447636
A
1052 */
1053 if (devpath && devvp)
1054 vnode_put(devvp);
b0d623f7 1055
91447636 1056 return(error);
b0d623f7 1057
6d2010ae 1058/* Error condition exits */
6601e61a 1059out4:
2d21ac55 1060 (void)VFS_UNMOUNT(mp, MNT_FORCE, ctx);
39037602
A
1061
1062 /*
6d2010ae
A
1063 * If the mount has been placed on the covered vp,
1064 * it may have been discovered by now, so we have
1065 * to treat this just like an unmount
1066 */
1067 mount_lock_spin(mp);
1068 mp->mnt_lflag |= MNT_LDEAD;
1069 mount_unlock(mp);
1070
6601e61a 1071 if (device_vnode != NULLVP) {
b0d623f7 1072 vnode_rele(device_vnode);
2d21ac55
A
1073 VNOP_CLOSE(device_vnode, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
1074 ctx);
b0d623f7 1075 did_rele = TRUE;
6601e61a 1076 }
6d2010ae 1077
2d21ac55 1078 vnode_lock_spin(vp);
6d2010ae
A
1079
1080 mp->mnt_crossref++;
6601e61a 1081 vp->v_mountedhere = (mount_t) 0;
6d2010ae 1082
6601e61a 1083 vnode_unlock(vp);
6d2010ae 1084
b0d623f7
A
1085 if (have_usecount) {
1086 vnode_rele(vp);
1087 }
91447636 1088out3:
6d2010ae 1089 if (devpath && ((flags & MNT_UPDATE) == 0) && (!did_rele))
2d21ac55 1090 vnode_rele(devvp);
91447636
A
1091out2:
1092 if (devpath && devvp)
1093 vnode_put(devvp);
1094out1:
743b1565
A
1095 /* Release mnt_rwlock only when it was taken */
1096 if (is_rwlock_locked == TRUE) {
1097 lck_rw_done(&mp->mnt_rwlock);
1098 }
39037602 1099
6601e61a 1100 if (mntalloc) {
6d2010ae
A
1101 if (mp->mnt_crossref)
1102 mount_dropcrossref(mp, vp, 0);
1103 else {
1104 mount_lock_destroy(mp);
2d21ac55 1105#if CONFIG_MACF
6d2010ae 1106 mac_mount_label_destroy(mp);
2d21ac55 1107#endif
6d2010ae
A
1108 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1109 }
b0d623f7 1110 }
b0d623f7 1111 if (vfsp_ref) {
6601e61a
A
1112 mount_list_lock();
1113 vfsp->vfc_refcount--;
1114 mount_list_unlock();
6601e61a 1115 }
91447636
A
1116
1117 return(error);
1c79356b
A
1118}
1119
39037602 1120/*
b7266188
A
1121 * Flush in-core data, check for competing mount attempts,
1122 * and set VMOUNT
1123 */
6d2010ae
A
1124int
1125prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth)
b7266188 1126{
39236c6e
A
1127#if !CONFIG_MACF
1128#pragma unused(cnp,fsname)
1129#endif
b7266188
A
1130 struct vnode_attr va;
1131 int error;
1132
6d2010ae
A
1133 if (!skip_auth) {
1134 /*
1135 * If the user is not root, ensure that they own the directory
1136 * onto which we are attempting to mount.
1137 */
1138 VATTR_INIT(&va);
1139 VATTR_WANTED(&va, va_uid);
1140 if ((error = vnode_getattr(vp, &va, ctx)) ||
1141 (va.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
39037602 1142 (!vfs_context_issuser(ctx)))) {
6d2010ae
A
1143 error = EPERM;
1144 goto out;
1145 }
b7266188
A
1146 }
1147
1148 if ( (error = VNOP_FSYNC(vp, MNT_WAIT, ctx)) )
1149 goto out;
1150
1151 if ( (error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0)) )
1152 goto out;
1153
1154 if (vp->v_type != VDIR) {
1155 error = ENOTDIR;
1156 goto out;
1157 }
1158
1159 if (ISSET(vp->v_flag, VMOUNT) && (vp->v_mountedhere != NULL)) {
1160 error = EBUSY;
1161 goto out;
1162 }
1163
1164#if CONFIG_MACF
1165 error = mac_mount_check_mount(ctx, vp,
1166 cnp, fsname);
1167 if (error != 0)
1168 goto out;
1169#endif
1170
1171 vnode_lock_spin(vp);
1172 SET(vp->v_flag, VMOUNT);
1173 vnode_unlock(vp);
1174
1175out:
1176 return error;
1177}
1178
6d2010ae
A
1179#if CONFIG_IMGSRC_ACCESS
1180
1181#if DEBUG
1182#define IMGSRC_DEBUG(args...) printf(args)
1183#else
1184#define IMGSRC_DEBUG(args...) do { } while(0)
39037602 1185#endif
6d2010ae 1186
b7266188
A
1187static int
1188authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx)
1189{
1190 struct nameidata nd;
6d2010ae 1191 vnode_t vp, realdevvp;
b7266188
A
1192 mode_t accessmode;
1193 int error;
1194
6d2010ae
A
1195 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW, UIO_USERSPACE, devpath, ctx);
1196 if ( (error = namei(&nd)) ) {
1197 IMGSRC_DEBUG("namei() failed with %d\n", error);
b7266188 1198 return error;
6d2010ae 1199 }
b7266188 1200
b7266188 1201 vp = nd.ni_vp;
b7266188 1202
6d2010ae
A
1203 if (!vnode_isblk(vp)) {
1204 IMGSRC_DEBUG("Not block device.\n");
b7266188
A
1205 error = ENOTBLK;
1206 goto out;
1207 }
6d2010ae
A
1208
1209 realdevvp = mp->mnt_devvp;
1210 if (realdevvp == NULLVP) {
1211 IMGSRC_DEBUG("No device backs the mount.\n");
b7266188
A
1212 error = ENXIO;
1213 goto out;
1214 }
6d2010ae
A
1215
1216 error = vnode_getwithref(realdevvp);
1217 if (error != 0) {
1218 IMGSRC_DEBUG("Coudn't get iocount on device.\n");
1219 goto out;
1220 }
1221
1222 if (vnode_specrdev(vp) != vnode_specrdev(realdevvp)) {
1223 IMGSRC_DEBUG("Wrong dev_t.\n");
1224 error = ENXIO;
1225 goto out1;
1226 }
1227
1228 strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
1229
b7266188
A
1230 /*
1231 * If mount by non-root, then verify that user has necessary
1232 * permissions on the device.
1233 */
1234 if (!vfs_context_issuser(ctx)) {
1235 accessmode = KAUTH_VNODE_READ_DATA;
1236 if ((mp->mnt_flag & MNT_RDONLY) == 0)
1237 accessmode |= KAUTH_VNODE_WRITE_DATA;
6d2010ae
A
1238 if ((error = vnode_authorize(vp, NULL, accessmode, ctx)) != 0) {
1239 IMGSRC_DEBUG("Access denied.\n");
1240 goto out1;
1241 }
b7266188
A
1242 }
1243
1244 *devvpp = vp;
6d2010ae
A
1245
1246out1:
1247 vnode_put(realdevvp);
b7266188 1248out:
6d2010ae 1249 nameidone(&nd);
b7266188
A
1250 if (error) {
1251 vnode_put(vp);
1252 }
1253
1254 return error;
1255}
1256
1257/*
1258 * Clear VMOUNT, set v_mountedhere, and mnt_vnodecovered, ref the vnode,
1259 * and call checkdirs()
1260 */
1261static int
1262place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx)
1263{
1264 int error;
1265
1266 mp->mnt_vnodecovered = vp; /* XXX This is normally only set at init-time ... */
1267
1268 vnode_lock_spin(vp);
1269 CLR(vp->v_flag, VMOUNT);
1270 vp->v_mountedhere = mp;
1271 vnode_unlock(vp);
1272
1273 /*
1274 * taking the name_cache_lock exclusively will
1275 * insure that everyone is out of the fast path who
1276 * might be trying to use a now stale copy of
1277 * vp->v_mountedhere->mnt_realrootvp
1278 * bumping mount_generation causes the cached values
1279 * to be invalidated
1280 */
1281 name_cache_lock();
1282 mount_generation++;
1283 name_cache_unlock();
1284
1285 error = vnode_ref(vp);
1286 if (error != 0) {
1287 goto out;
1288 }
1289
1290 error = checkdirs(vp, ctx);
1291 if (error != 0) {
1292 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1293 vnode_rele(vp);
1294 goto out;
1295 }
1296
1297out:
1298 if (error != 0) {
1299 mp->mnt_vnodecovered = NULLVP;
1300 }
1301 return error;
1302}
1303
1304static void
1305undo_place_on_covered_vp(mount_t mp, vnode_t vp)
1306{
1307 vnode_rele(vp);
1308 vnode_lock_spin(vp);
1309 vp->v_mountedhere = (mount_t)NULL;
1310 vnode_unlock(vp);
1311
1312 mp->mnt_vnodecovered = NULLVP;
1313}
1314
1315static int
1316mount_begin_update(mount_t mp, vfs_context_t ctx, int flags)
1317{
1318 int error;
1319
1320 /* unmount in progress return error */
1321 mount_lock_spin(mp);
1322 if (mp->mnt_lflag & MNT_LUNMOUNT) {
1323 mount_unlock(mp);
1324 return EBUSY;
1325 }
1326 mount_unlock(mp);
1327 lck_rw_lock_exclusive(&mp->mnt_rwlock);
1328
1329 /*
1330 * We only allow the filesystem to be reloaded if it
1331 * is currently mounted read-only.
1332 */
1333 if ((flags & MNT_RELOAD) &&
1334 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
1335 error = ENOTSUP;
1336 goto out;
1337 }
1338
1339 /*
1340 * Only root, or the user that did the original mount is
1341 * permitted to update it.
1342 */
1343 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
39037602 1344 (!vfs_context_issuser(ctx))) {
b7266188
A
1345 error = EPERM;
1346 goto out;
1347 }
1348#if CONFIG_MACF
1349 error = mac_mount_check_remount(ctx, mp);
1350 if (error != 0) {
1351 goto out;
1352 }
1353#endif
1354
1355out:
1356 if (error) {
1357 lck_rw_done(&mp->mnt_rwlock);
1358 }
1359
1360 return error;
1361}
1362
39037602 1363static void
b7266188
A
1364mount_end_update(mount_t mp)
1365{
1366 lck_rw_done(&mp->mnt_rwlock);
1367}
1368
1369static int
6d2010ae
A
1370get_imgsrc_rootvnode(uint32_t height, vnode_t *rvpp)
1371{
1372 vnode_t vp;
1373
1374 if (height >= MAX_IMAGEBOOT_NESTING) {
1375 return EINVAL;
1376 }
1377
1378 vp = imgsrc_rootvnodes[height];
1379 if ((vp != NULLVP) && (vnode_get(vp) == 0)) {
1380 *rvpp = vp;
1381 return 0;
1382 } else {
1383 return ENOENT;
1384 }
1385}
1386
1387static int
39037602
A
1388relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp,
1389 const char *fsname, vfs_context_t ctx,
6d2010ae 1390 boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index)
b7266188
A
1391{
1392 int error;
1393 mount_t mp;
1394 boolean_t placed = FALSE;
6d2010ae 1395 vnode_t devvp = NULLVP;
b7266188
A
1396 struct vfstable *vfsp;
1397 user_addr_t devpath;
1398 char *old_mntonname;
6d2010ae
A
1399 vnode_t rvp;
1400 uint32_t height;
1401 uint32_t flags;
b7266188
A
1402
1403 /* If we didn't imageboot, nothing to move */
6d2010ae 1404 if (imgsrc_rootvnodes[0] == NULLVP) {
b7266188
A
1405 return EINVAL;
1406 }
1407
1408 /* Only root can do this */
1409 if (!vfs_context_issuser(ctx)) {
1410 return EPERM;
1411 }
1412
6d2010ae
A
1413 IMGSRC_DEBUG("looking for root vnode.\n");
1414
1415 /*
1416 * Get root vnode of filesystem we're moving.
1417 */
1418 if (by_index) {
1419 if (is64bit) {
1420 struct user64_mnt_imgsrc_args mia64;
1421 error = copyin(fsmountargs, &mia64, sizeof(mia64));
1422 if (error != 0) {
1423 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1424 return error;
1425 }
1426
1427 height = mia64.mi_height;
1428 flags = mia64.mi_flags;
1429 devpath = mia64.mi_devpath;
1430 } else {
1431 struct user32_mnt_imgsrc_args mia32;
1432 error = copyin(fsmountargs, &mia32, sizeof(mia32));
1433 if (error != 0) {
1434 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1435 return error;
1436 }
1437
1438 height = mia32.mi_height;
1439 flags = mia32.mi_flags;
1440 devpath = mia32.mi_devpath;
1441 }
1442 } else {
1443 /*
1444 * For binary compatibility--assumes one level of nesting.
1445 */
1446 if (is64bit) {
1447 if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
1448 return error;
1449 } else {
1450 user32_addr_t tmp;
1451 if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
1452 return error;
1453
1454 /* munge into LP64 addr */
1455 devpath = CAST_USER_ADDR_T(tmp);
1456 }
1457
1458 height = 0;
1459 flags = 0;
1460 }
1461
1462 if (flags != 0) {
1463 IMGSRC_DEBUG("%s: Got nonzero flags.\n", __FUNCTION__);
1464 return EINVAL;
1465 }
1466
1467 error = get_imgsrc_rootvnode(height, &rvp);
b7266188 1468 if (error != 0) {
6d2010ae 1469 IMGSRC_DEBUG("getting root vnode failed with %d\n", error);
b7266188
A
1470 return error;
1471 }
1472
6d2010ae
A
1473 IMGSRC_DEBUG("got root vnode.\n");
1474
b7266188
A
1475 MALLOC(old_mntonname, char*, MAXPATHLEN, M_TEMP, M_WAITOK);
1476
1477 /* Can only move once */
6d2010ae 1478 mp = vnode_mount(rvp);
b7266188 1479 if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
6d2010ae 1480 IMGSRC_DEBUG("Already moved.\n");
b7266188
A
1481 error = EBUSY;
1482 goto out0;
1483 }
1484
6d2010ae
A
1485 IMGSRC_DEBUG("Starting updated.\n");
1486
b7266188
A
1487 /* Get exclusive rwlock on mount, authorize update on mp */
1488 error = mount_begin_update(mp , ctx, 0);
1489 if (error != 0) {
6d2010ae 1490 IMGSRC_DEBUG("Starting updated failed with %d\n", error);
b7266188
A
1491 goto out0;
1492 }
1493
39037602 1494 /*
b7266188
A
1495 * It can only be moved once. Flag is set under the rwlock,
1496 * so we're now safe to proceed.
1497 */
1498 if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
6d2010ae 1499 IMGSRC_DEBUG("Already moved [2]\n");
b7266188
A
1500 goto out1;
1501 }
39037602
A
1502
1503
6d2010ae 1504 IMGSRC_DEBUG("Preparing coveredvp.\n");
b7266188
A
1505
1506 /* Mark covered vnode as mount in progress, authorize placing mount on top */
6d2010ae 1507 error = prepare_coveredvp(vp, ctx, cnp, fsname, FALSE);
b7266188 1508 if (error != 0) {
6d2010ae 1509 IMGSRC_DEBUG("Preparing coveredvp failed with %d.\n", error);
b7266188
A
1510 goto out1;
1511 }
39037602 1512
6d2010ae
A
1513 IMGSRC_DEBUG("Covered vp OK.\n");
1514
b7266188
A
1515 /* Sanity check the name caller has provided */
1516 vfsp = mp->mnt_vtable;
1517 if (strncmp(vfsp->vfc_name, fsname, MFSNAMELEN) != 0) {
6d2010ae 1518 IMGSRC_DEBUG("Wrong fs name.\n");
b7266188
A
1519 error = EINVAL;
1520 goto out2;
1521 }
1522
1523 /* Check the device vnode and update mount-from name, for local filesystems */
1524 if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
6d2010ae 1525 IMGSRC_DEBUG("Local, doing device validation.\n");
b7266188
A
1526
1527 if (devpath != USER_ADDR_NULL) {
1528 error = authorize_devpath_and_update_mntfromname(mp, devpath, &devvp, ctx);
1529 if (error) {
6d2010ae 1530 IMGSRC_DEBUG("authorize_devpath_and_update_mntfromname() failed.\n");
b7266188
A
1531 goto out2;
1532 }
1533
1534 vnode_put(devvp);
1535 }
1536 }
1537
39037602 1538 /*
b7266188 1539 * Place mp on top of vnode, ref the vnode, call checkdirs(),
39037602 1540 * and increment the name cache's mount generation
b7266188 1541 */
6d2010ae
A
1542
1543 IMGSRC_DEBUG("About to call place_mount_and_checkdirs().\n");
b7266188
A
1544 error = place_mount_and_checkdirs(mp, vp, ctx);
1545 if (error != 0) {
1546 goto out2;
1547 }
1548
1549 placed = TRUE;
1550
3e170ce0
A
1551 strlcpy(old_mntonname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN);
1552 strlcpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
b7266188
A
1553
1554 /* Forbid future moves */
1555 mount_lock(mp);
1556 mp->mnt_kern_flag |= MNTK_HAS_MOVED;
1557 mount_unlock(mp);
1558
1559 /* Finally, add to mount list, completely ready to go */
6d2010ae
A
1560 if (mount_list_add(mp) != 0) {
1561 /*
1562 * The system is shutting down trying to umount
1563 * everything, so fail with a plausible errno.
1564 */
1565 error = EBUSY;
b7266188
A
1566 goto out3;
1567 }
1568
1569 mount_end_update(mp);
6d2010ae 1570 vnode_put(rvp);
b7266188
A
1571 FREE(old_mntonname, M_TEMP);
1572
6d2010ae
A
1573 vfs_notify_mount(pvp);
1574
b7266188
A
1575 return 0;
1576out3:
3e170ce0 1577 strlcpy(mp->mnt_vfsstat.f_mntonname, old_mntonname, MAXPATHLEN);
b7266188
A
1578
1579 mount_lock(mp);
1580 mp->mnt_kern_flag &= ~(MNTK_HAS_MOVED);
1581 mount_unlock(mp);
1582
1583out2:
39037602 1584 /*
b7266188 1585 * Placing the mp on the vnode clears VMOUNT,
39037602 1586 * so cleanup is different after that point
b7266188
A
1587 */
1588 if (placed) {
1589 /* Rele the vp, clear VMOUNT and v_mountedhere */
1590 undo_place_on_covered_vp(mp, vp);
1591 } else {
1592 vnode_lock_spin(vp);
1593 CLR(vp->v_flag, VMOUNT);
1594 vnode_unlock(vp);
1595 }
1596out1:
1597 mount_end_update(mp);
1598
1599out0:
6d2010ae 1600 vnode_put(rvp);
b7266188
A
1601 FREE(old_mntonname, M_TEMP);
1602 return error;
1603}
1604
1605#endif /* CONFIG_IMGSRC_ACCESS */
1606
91447636 1607void
2d21ac55 1608enablequotas(struct mount *mp, vfs_context_t ctx)
9bccf70c 1609{
9bccf70c
A
1610 struct nameidata qnd;
1611 int type;
1612 char qfpath[MAXPATHLEN];
91447636
A
1613 const char *qfname = QUOTAFILENAME;
1614 const char *qfopsname = QUOTAOPSNAME;
1615 const char *qfextension[] = INITQFNAMES;
9bccf70c 1616
2d21ac55 1617 /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
b0d623f7
A
1618 if (strncmp(mp->mnt_vfsstat.f_fstypename, "hfs", sizeof("hfs")) != 0 ) {
1619 return;
1620 }
39037602 1621 /*
9bccf70c
A
1622 * Enable filesystem disk quotas if necessary.
1623 * We ignore errors as this should not interfere with final mount
1624 */
1625 for (type=0; type < MAXQUOTAS; type++) {
2d21ac55 1626 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfopsname, qfextension[type]);
6d2010ae
A
1627 NDINIT(&qnd, LOOKUP, OP_MOUNT, FOLLOW, UIO_SYSSPACE,
1628 CAST_USER_ADDR_T(qfpath), ctx);
91447636
A
1629 if (namei(&qnd) != 0)
1630 continue; /* option file to trigger quotas is not present */
1631 vnode_put(qnd.ni_vp);
1632 nameidone(&qnd);
2d21ac55 1633 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfname, qfextension[type]);
91447636 1634
2d21ac55 1635 (void) VFS_QUOTACTL(mp, QCMD(Q_QUOTAON, type), 0, qfpath, ctx);
9bccf70c
A
1636 }
1637 return;
1638}
1639
2d21ac55
A
1640
1641static int
39037602 1642checkdirs_callback(proc_t p, void * arg)
2d21ac55
A
1643{
1644 struct cdirargs * cdrp = (struct cdirargs * )arg;
1645 vnode_t olddp = cdrp->olddp;
1646 vnode_t newdp = cdrp->newdp;
1647 struct filedesc *fdp;
1648 vnode_t tvp;
1649 vnode_t fdp_cvp;
1650 vnode_t fdp_rvp;
1651 int cdir_changed = 0;
1652 int rdir_changed = 0;
1653
1654 /*
1655 * XXX Also needs to iterate each thread in the process to see if it
1656 * XXX is using a per-thread current working directory, and, if so,
1657 * XXX update that as well.
1658 */
1659
1660 proc_fdlock(p);
1661 fdp = p->p_fd;
1662 if (fdp == (struct filedesc *)0) {
1663 proc_fdunlock(p);
1664 return(PROC_RETURNED);
1665 }
1666 fdp_cvp = fdp->fd_cdir;
1667 fdp_rvp = fdp->fd_rdir;
1668 proc_fdunlock(p);
1669
1670 if (fdp_cvp == olddp) {
1671 vnode_ref(newdp);
1672 tvp = fdp->fd_cdir;
1673 fdp_cvp = newdp;
1674 cdir_changed = 1;
1675 vnode_rele(tvp);
1676 }
1677 if (fdp_rvp == olddp) {
1678 vnode_ref(newdp);
1679 tvp = fdp->fd_rdir;
1680 fdp_rvp = newdp;
1681 rdir_changed = 1;
1682 vnode_rele(tvp);
1683 }
1684 if (cdir_changed || rdir_changed) {
1685 proc_fdlock(p);
1686 fdp->fd_cdir = fdp_cvp;
1687 fdp->fd_rdir = fdp_rvp;
1688 proc_fdunlock(p);
1689 }
1690 return(PROC_RETURNED);
1691}
1692
1693
1694
1c79356b
A
1695/*
1696 * Scan all active processes to see if any of them have a current
1697 * or root directory onto which the new filesystem has just been
1698 * mounted. If so, replace them with the new mount point.
1699 */
6601e61a 1700static int
2d21ac55 1701checkdirs(vnode_t olddp, vfs_context_t ctx)
1c79356b 1702{
2d21ac55
A
1703 vnode_t newdp;
1704 vnode_t tvp;
6601e61a 1705 int err;
2d21ac55 1706 struct cdirargs cdr;
1c79356b
A
1707
1708 if (olddp->v_usecount == 1)
6601e61a 1709 return(0);
2d21ac55 1710 err = VFS_ROOT(olddp->v_mountedhere, &newdp, ctx);
2d21ac55
A
1711
1712 if (err != 0) {
6601e61a 1713#if DIAGNOSTIC
2d21ac55 1714 panic("mount: lost mount: error %d", err);
6601e61a
A
1715#endif
1716 return(err);
1717 }
91447636 1718
2d21ac55
A
1719 cdr.olddp = olddp;
1720 cdr.newdp = newdp;
1721 /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
1722 proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS, checkdirs_callback, (void *)&cdr, NULL, NULL);
91447636 1723
1c79356b 1724 if (rootvnode == olddp) {
91447636 1725 vnode_ref(newdp);
fa4905b1 1726 tvp = rootvnode;
1c79356b 1727 rootvnode = newdp;
91447636 1728 vnode_rele(tvp);
1c79356b 1729 }
91447636
A
1730
1731 vnode_put(newdp);
6601e61a 1732 return(0);
1c79356b
A
1733}
1734
1735/*
1736 * Unmount a file system.
1737 *
1738 * Note: unmount takes a path to the vnode mounted on as argument,
1739 * not special file (as before).
1740 */
1c79356b
A
1741/* ARGSUSED */
1742int
b0d623f7 1743unmount(__unused proc_t p, struct unmount_args *uap, __unused int32_t *retval)
1c79356b 1744{
2d21ac55 1745 vnode_t vp;
1c79356b
A
1746 struct mount *mp;
1747 int error;
1748 struct nameidata nd;
2d21ac55 1749 vfs_context_t ctx = vfs_context_current();
91447636 1750
39037602 1751 NDINIT(&nd, LOOKUP, OP_UNMOUNT, FOLLOW | AUDITVNPATH1,
2d21ac55 1752 UIO_USERSPACE, uap->path, ctx);
55e303ae
A
1753 error = namei(&nd);
1754 if (error)
1c79356b
A
1755 return (error);
1756 vp = nd.ni_vp;
1757 mp = vp->v_mount;
91447636 1758 nameidone(&nd);
1c79356b 1759
2d21ac55
A
1760#if CONFIG_MACF
1761 error = mac_mount_check_umount(ctx, mp);
1762 if (error != 0) {
1763 vnode_put(vp);
1764 return (error);
1765 }
1766#endif
55e303ae
A
1767 /*
1768 * Must be the root of the filesystem
1769 */
1770 if ((vp->v_flag & VROOT) == 0) {
91447636 1771 vnode_put(vp);
55e303ae
A
1772 return (EINVAL);
1773 }
6601e61a 1774 mount_ref(mp, 0);
91447636 1775 vnode_put(vp);
6601e61a 1776 /* safedounmount consumes the mount ref */
2d21ac55
A
1777 return (safedounmount(mp, uap->flags, ctx));
1778}
1779
1780int
39037602 1781vfs_unmountbyfsid(fsid_t *fsid, int flags, vfs_context_t ctx)
2d21ac55
A
1782{
1783 mount_t mp;
1784
1785 mp = mount_list_lookupby_fsid(fsid, 0, 1);
1786 if (mp == (mount_t)0) {
1787 return(ENOENT);
1788 }
1789 mount_ref(mp, 0);
1790 mount_iterdrop(mp);
1791 /* safedounmount consumes the mount ref */
1792 return(safedounmount(mp, flags, ctx));
55e303ae
A
1793}
1794
2d21ac55 1795
55e303ae 1796/*
6601e61a 1797 * The mount struct comes with a mount ref which will be consumed.
55e303ae
A
1798 * Do the actual file system unmount, prevent some common foot shooting.
1799 */
1800int
2d21ac55 1801safedounmount(struct mount *mp, int flags, vfs_context_t ctx)
55e303ae
A
1802{
1803 int error;
2d21ac55 1804 proc_t p = vfs_context_proc(ctx);
55e303ae 1805
316670eb
A
1806 /*
1807 * If the file system is not responding and MNT_NOBLOCK
1808 * is set and not a forced unmount then return EBUSY.
1809 */
1810 if ((mp->mnt_kern_flag & MNT_LNOTRESP) &&
1811 (flags & MNT_NOBLOCK) && ((flags & MNT_FORCE) == 0)) {
1812 error = EBUSY;
1813 goto out;
1814 }
1815
1c79356b 1816 /*
39037602 1817 * Skip authorization if the mount is tagged as permissive and
6d2010ae 1818 * this is not a forced-unmount attempt.
1c79356b 1819 */
6d2010ae
A
1820 if (!(((mp->mnt_kern_flag & MNTK_PERMIT_UNMOUNT) != 0) && ((flags & MNT_FORCE) == 0))) {
1821 /*
1822 * Only root, or the user that did the original mount is
1823 * permitted to unmount this filesystem.
1824 */
1825 if ((mp->mnt_vfsstat.f_owner != kauth_cred_getuid(kauth_cred_get())) &&
1826 (error = suser(kauth_cred_get(), &p->p_acflag)))
1827 goto out;
1828 }
1c79356b
A
1829 /*
1830 * Don't allow unmounting the root file system.
1831 */
6601e61a 1832 if (mp->mnt_flag & MNT_ROOTFS) {
2d21ac55 1833 error = EBUSY; /* the root is always busy */
6601e61a
A
1834 goto out;
1835 }
1c79356b 1836
b7266188
A
1837#ifdef CONFIG_IMGSRC_ACCESS
1838 if (mp->mnt_kern_flag & MNTK_BACKS_ROOT) {
1839 error = EBUSY;
1840 goto out;
1841 }
1842#endif /* CONFIG_IMGSRC_ACCESS */
1843
2d21ac55
A
1844 return (dounmount(mp, flags, 1, ctx));
1845
6601e61a
A
1846out:
1847 mount_drop(mp, 0);
1848 return(error);
1c79356b
A
1849}
1850
1851/*
1852 * Do the actual file system unmount.
1853 */
1854int
2d21ac55 1855dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx)
1c79356b 1856{
2d21ac55 1857 vnode_t coveredvp = (vnode_t)0;
1c79356b 1858 int error;
91447636 1859 int needwakeup = 0;
91447636
A
1860 int forcedunmount = 0;
1861 int lflags = 0;
593a1d5f 1862 struct vnode *devvp = NULLVP;
6d2010ae 1863#if CONFIG_TRIGGERS
39236c6e 1864 proc_t p = vfs_context_proc(ctx);
6d2010ae 1865 int did_vflush = 0;
39236c6e 1866 int pflags_save = 0;
6d2010ae 1867#endif /* CONFIG_TRIGGERS */
91447636 1868
91447636 1869 mount_lock(mp);
fe8ab488
A
1870
1871 /*
1872 * If already an unmount in progress just return EBUSY.
1873 * Even a forced unmount cannot override.
1874 */
91447636 1875 if (mp->mnt_lflag & MNT_LUNMOUNT) {
fe8ab488 1876 if (withref != 0)
6601e61a 1877 mount_drop(mp, 1);
fe8ab488 1878 mount_unlock(mp);
9bccf70c
A
1879 return (EBUSY);
1880 }
39236c6e 1881
fe8ab488
A
1882 if (flags & MNT_FORCE) {
1883 forcedunmount = 1;
1884 mp->mnt_lflag |= MNT_LFORCE;
1885 }
1886
39236c6e
A
1887#if CONFIG_TRIGGERS
1888 if (flags & MNT_NOBLOCK && p != kernproc)
1889 pflags_save = OSBitOrAtomic(P_NOREMOTEHANG, &p->p_flag);
1890#endif
1891
1c79356b 1892 mp->mnt_kern_flag |= MNTK_UNMOUNT;
91447636
A
1893 mp->mnt_lflag |= MNT_LUNMOUNT;
1894 mp->mnt_flag &=~ MNT_ASYNC;
2d21ac55
A
1895 /*
1896 * anyone currently in the fast path that
1897 * trips over the cached rootvp will be
1898 * dumped out and forced into the slow path
1899 * to regenerate a new cached value
1900 */
1901 mp->mnt_realrootvp = NULLVP;
91447636 1902 mount_unlock(mp);
39037602 1903
fe8ab488
A
1904 if (forcedunmount && (flags & MNT_LNOSUB) == 0) {
1905 /*
1906 * Force unmount any mounts in this filesystem.
1907 * If any unmounts fail - just leave them dangling.
1908 * Avoids recursion.
1909 */
1910 (void) dounmount_submounts(mp, flags | MNT_LNOSUB, ctx);
1911 }
1912
2d21ac55
A
1913 /*
1914 * taking the name_cache_lock exclusively will
1915 * insure that everyone is out of the fast path who
1916 * might be trying to use a now stale copy of
1917 * vp->v_mountedhere->mnt_realrootvp
1918 * bumping mount_generation causes the cached values
1919 * to be invalidated
1920 */
1921 name_cache_lock();
1922 mount_generation++;
1923 name_cache_unlock();
1924
1925
91447636 1926 lck_rw_lock_exclusive(&mp->mnt_rwlock);
6601e61a
A
1927 if (withref != 0)
1928 mount_drop(mp, 0);
2d21ac55 1929#if CONFIG_FSE
91447636 1930 fsevent_unmount(mp); /* has to come first! */
2d21ac55 1931#endif
91447636
A
1932 error = 0;
1933 if (forcedunmount == 0) {
1934 ubc_umount(mp); /* release cached vnodes */
1935 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
2d21ac55 1936 error = VFS_SYNC(mp, MNT_WAIT, ctx);
91447636
A
1937 if (error) {
1938 mount_lock(mp);
1939 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1940 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1941 mp->mnt_lflag &= ~MNT_LFORCE;
1942 goto out;
1943 }
1944 }
1945 }
6d2010ae 1946
3e170ce0
A
1947 IOBSDMountChange(mp, kIOMountChangeUnmount);
1948
6d2010ae
A
1949#if CONFIG_TRIGGERS
1950 vfs_nested_trigger_unmounts(mp, flags, ctx);
1951 did_vflush = 1;
39037602 1952#endif
91447636
A
1953 if (forcedunmount)
1954 lflags |= FORCECLOSE;
1955 error = vflush(mp, NULLVP, SKIPSWAP | SKIPSYSTEM | SKIPROOT | lflags);
1956 if ((forcedunmount == 0) && error) {
1957 mount_lock(mp);
9bccf70c 1958 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
91447636
A
1959 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1960 mp->mnt_lflag &= ~MNT_LFORCE;
9bccf70c
A
1961 goto out;
1962 }
91447636
A
1963
1964 /* make sure there are no one in the mount iterations or lookup */
1965 mount_iterdrain(mp);
1966
2d21ac55 1967 error = VFS_UNMOUNT(mp, flags, ctx);
1c79356b 1968 if (error) {
91447636
A
1969 mount_iterreset(mp);
1970 mount_lock(mp);
1c79356b 1971 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
91447636
A
1972 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1973 mp->mnt_lflag &= ~MNT_LFORCE;
1c79356b
A
1974 goto out;
1975 }
1976
1977 /* increment the operations count */
1978 if (!error)
b0d623f7 1979 OSAddAtomic(1, &vfs_nummntops);
91447636
A
1980
1981 if ( mp->mnt_devvp && mp->mnt_vtable->vfc_vfsflags & VFC_VFSLOCALARGS) {
593a1d5f
A
1982 /* hold an io reference and drop the usecount before close */
1983 devvp = mp->mnt_devvp;
593a1d5f
A
1984 vnode_getalways(devvp);
1985 vnode_rele(devvp);
1986 VNOP_CLOSE(devvp, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
2d21ac55 1987 ctx);
b0d623f7 1988 vnode_clearmountedon(devvp);
593a1d5f 1989 vnode_put(devvp);
91447636
A
1990 }
1991 lck_rw_done(&mp->mnt_rwlock);
1992 mount_list_remove(mp);
1993 lck_rw_lock_exclusive(&mp->mnt_rwlock);
6d2010ae 1994
91447636 1995 /* mark the mount point hook in the vp but not drop the ref yet */
1c79356b 1996 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
fe8ab488
A
1997 /*
1998 * The covered vnode needs special handling. Trying to get an
1999 * iocount must not block here as this may lead to deadlocks
2000 * if the Filesystem to which the covered vnode belongs is
2001 * undergoing forced unmounts. Since we hold a usecount, the
2002 * vnode cannot be reused (it can, however, still be terminated)
2003 */
2004 vnode_getalways(coveredvp);
6d2010ae
A
2005 vnode_lock_spin(coveredvp);
2006
2007 mp->mnt_crossref++;
2008 coveredvp->v_mountedhere = (struct mount *)0;
fe8ab488 2009 CLR(coveredvp->v_flag, VMOUNT);
6d2010ae
A
2010
2011 vnode_unlock(coveredvp);
2012 vnode_put(coveredvp);
1c79356b 2013 }
91447636
A
2014
2015 mount_list_lock();
2016 mp->mnt_vtable->vfc_refcount--;
2017 mount_list_unlock();
2018
2019 cache_purgevfs(mp); /* remove cache entries for this file sys */
2020 vfs_event_signal(NULL, VQ_UNMOUNT, (intptr_t)NULL);
2021 mount_lock(mp);
2022 mp->mnt_lflag |= MNT_LDEAD;
2023
2024 if (mp->mnt_lflag & MNT_LWAIT) {
2025 /*
2026 * do the wakeup here
2027 * in case we block in mount_refdrain
2028 * which will drop the mount lock
2029 * and allow anyone blocked in vfs_busy
2030 * to wakeup and see the LDEAD state
2031 */
2032 mp->mnt_lflag &= ~MNT_LWAIT;
2033 wakeup((caddr_t)mp);
1c79356b 2034 }
91447636 2035 mount_refdrain(mp);
1c79356b 2036out:
91447636
A
2037 if (mp->mnt_lflag & MNT_LWAIT) {
2038 mp->mnt_lflag &= ~MNT_LWAIT;
39037602 2039 needwakeup = 1;
91447636 2040 }
6d2010ae 2041
6d2010ae 2042#if CONFIG_TRIGGERS
39236c6e
A
2043 if (flags & MNT_NOBLOCK && p != kernproc) {
2044 // Restore P_NOREMOTEHANG bit to its previous value
2045 if ((pflags_save & P_NOREMOTEHANG) == 0)
2046 OSBitAndAtomic(~((uint32_t) P_NOREMOTEHANG), &p->p_flag);
2047 }
2048
39037602 2049 /*
6d2010ae 2050 * Callback and context are set together under the mount lock, and
39037602 2051 * never cleared, so we're safe to examine them here, drop the lock,
6d2010ae
A
2052 * and call out.
2053 */
2054 if (mp->mnt_triggercallback != NULL) {
2055 mount_unlock(mp);
2056 if (error == 0) {
2057 mp->mnt_triggercallback(mp, VTC_RELEASE, mp->mnt_triggerdata, ctx);
2058 } else if (did_vflush) {
2059 mp->mnt_triggercallback(mp, VTC_REPLACE, mp->mnt_triggerdata, ctx);
2060 }
2061 } else {
2062 mount_unlock(mp);
2063 }
39037602 2064#else
91447636 2065 mount_unlock(mp);
6d2010ae
A
2066#endif /* CONFIG_TRIGGERS */
2067
91447636
A
2068 lck_rw_done(&mp->mnt_rwlock);
2069
2070 if (needwakeup)
1c79356b 2071 wakeup((caddr_t)mp);
6d2010ae 2072
55e303ae 2073 if (!error) {
91447636 2074 if ((coveredvp != NULLVP)) {
fe8ab488 2075 vnode_t pvp = NULLVP;
b0d623f7 2076
fe8ab488
A
2077 /*
2078 * The covered vnode needs special handling. Trying to
2079 * get an iocount must not block here as this may lead
2080 * to deadlocks if the Filesystem to which the covered
2081 * vnode belongs is undergoing forced unmounts. Since we
2082 * hold a usecount, the vnode cannot be reused
2083 * (it can, however, still be terminated).
2084 */
2085 vnode_getalways(coveredvp);
6d2010ae
A
2086
2087 mount_dropcrossref(mp, coveredvp, 0);
fe8ab488
A
2088 /*
2089 * We'll _try_ to detect if this really needs to be
2090 * done. The coveredvp can only be in termination (or
2091 * terminated) if the coveredvp's mount point is in a
2092 * forced unmount (or has been) since we still hold the
2093 * ref.
2094 */
2095 if (!vnode_isrecycled(coveredvp)) {
2096 pvp = vnode_getparent(coveredvp);
6d2010ae 2097#if CONFIG_TRIGGERS
fe8ab488
A
2098 if (coveredvp->v_resolve) {
2099 vnode_trigger_rearm(coveredvp, ctx);
2100 }
2101#endif
2102 }
2103
2104 vnode_rele(coveredvp);
91447636 2105 vnode_put(coveredvp);
fe8ab488 2106 coveredvp = NULLVP;
b0d623f7
A
2107
2108 if (pvp) {
2109 lock_vnode_and_post(pvp, NOTE_WRITE);
2110 vnode_put(pvp);
2111 }
91447636
A
2112 } else if (mp->mnt_flag & MNT_ROOTFS) {
2113 mount_lock_destroy(mp);
2d21ac55
A
2114#if CONFIG_MACF
2115 mac_mount_label_destroy(mp);
2116#endif
91447636
A
2117 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
2118 } else
2119 panic("dounmount: no coveredvp");
55e303ae 2120 }
1c79356b
A
2121 return (error);
2122}
2123
fe8ab488
A
2124/*
2125 * Unmount any mounts in this filesystem.
2126 */
2127void
2128dounmount_submounts(struct mount *mp, int flags, vfs_context_t ctx)
2129{
2130 mount_t smp;
2131 fsid_t *fsids, fsid;
2132 int fsids_sz;
2133 int count = 0, i, m = 0;
2134 vnode_t vp;
2135
2136 mount_list_lock();
2137
2138 // Get an array to hold the submounts fsids.
2139 TAILQ_FOREACH(smp, &mountlist, mnt_list)
2140 count++;
2141 fsids_sz = count * sizeof(fsid_t);
2142 MALLOC(fsids, fsid_t *, fsids_sz, M_TEMP, M_NOWAIT);
2143 if (fsids == NULL) {
2144 mount_list_unlock();
2145 goto out;
2146 }
2147 fsids[0] = mp->mnt_vfsstat.f_fsid; // Prime the pump
2148
2149 /*
2150 * Fill the array with submount fsids.
2151 * Since mounts are always added to the tail of the mount list, the
39037602 2152 * list is always in mount order.
fe8ab488
A
2153 * For each mount check if the mounted-on vnode belongs to a
2154 * mount that's already added to our array of mounts to be unmounted.
2155 */
2156 for (smp = TAILQ_NEXT(mp, mnt_list); smp; smp = TAILQ_NEXT(smp, mnt_list)) {
2157 vp = smp->mnt_vnodecovered;
2158 if (vp == NULL)
2159 continue;
2160 fsid = vnode_mount(vp)->mnt_vfsstat.f_fsid; // Underlying fsid
2161 for (i = 0; i <= m; i++) {
2162 if (fsids[i].val[0] == fsid.val[0] &&
2163 fsids[i].val[1] == fsid.val[1]) {
2164 fsids[++m] = smp->mnt_vfsstat.f_fsid;
2165 break;
2166 }
2167 }
2168 }
2169 mount_list_unlock();
2170
2171 // Unmount the submounts in reverse order. Ignore errors.
2172 for (i = m; i > 0; i--) {
2173 smp = mount_list_lookupby_fsid(&fsids[i], 0, 1);
2174 if (smp) {
2175 mount_ref(smp, 0);
2176 mount_iterdrop(smp);
2177 (void) dounmount(smp, flags, 1, ctx);
2178 }
2179 }
2180out:
2181 if (fsids)
2182 FREE(fsids, M_TEMP);
2183}
2184
91447636
A
2185void
2186mount_dropcrossref(mount_t mp, vnode_t dp, int need_put)
2187{
6d2010ae
A
2188 vnode_lock(dp);
2189 mp->mnt_crossref--;
2190
2191 if (mp->mnt_crossref < 0)
2192 panic("mount cross refs -ve");
2193
2194 if ((mp != dp->v_mountedhere) && (mp->mnt_crossref == 0)) {
39037602 2195
91447636 2196 if (need_put)
6d2010ae 2197 vnode_put_locked(dp);
91447636 2198 vnode_unlock(dp);
6d2010ae
A
2199
2200 mount_lock_destroy(mp);
2201#if CONFIG_MACF
2202 mac_mount_label_destroy(mp);
2203#endif
2204 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
2205 return;
2206 }
2207 if (need_put)
2208 vnode_put_locked(dp);
2209 vnode_unlock(dp);
91447636
A
2210}
2211
2212
1c79356b
A
2213/*
2214 * Sync each mounted filesystem.
2215 */
2216#if DIAGNOSTIC
2217int syncprt = 0;
1c79356b
A
2218#endif
2219
1c79356b 2220int print_vmpage_stat=0;
fe8ab488 2221int sync_timeout = 60; // Sync time limit (sec)
1c79356b 2222
39037602 2223static int
fe8ab488 2224sync_callback(mount_t mp, __unused void *arg)
1c79356b 2225{
91447636 2226 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
fe8ab488
A
2227 int asyncflag = mp->mnt_flag & MNT_ASYNC;
2228
2229 mp->mnt_flag &= ~MNT_ASYNC;
2230 VFS_SYNC(mp, arg ? MNT_WAIT : MNT_NOWAIT, vfs_context_kernel());
2231 if (asyncflag)
2232 mp->mnt_flag |= MNT_ASYNC;
1c79356b 2233 }
1c79356b 2234
fe8ab488
A
2235 return (VFS_RETURNED);
2236}
91447636 2237
91447636
A
2238/* ARGSUSED */
2239int
b0d623f7 2240sync(__unused proc_t p, __unused struct sync_args *uap, __unused int32_t *retval)
91447636 2241{
fe8ab488 2242 vfs_iterate(LK_NOWAIT, sync_callback, NULL);
b0d623f7 2243
fe8ab488
A
2244 if (print_vmpage_stat) {
2245 vm_countdirtypages();
2246 }
2247
2248#if DIAGNOSTIC
2249 if (syncprt)
2250 vfs_bufstats();
2251#endif /* DIAGNOSTIC */
2252 return 0;
2253}
2254
2255static void
2256sync_thread(void *arg, __unused wait_result_t wr)
2257{
2258 int *timeout = (int *) arg;
2259
2260 vfs_iterate(LK_NOWAIT, sync_callback, NULL);
2261
2262 if (timeout)
2263 wakeup((caddr_t) timeout);
2264 if (print_vmpage_stat) {
1c79356b 2265 vm_countdirtypages();
1c79356b 2266 }
39236c6e 2267
1c79356b
A
2268#if DIAGNOSTIC
2269 if (syncprt)
2270 vfs_bufstats();
2271#endif /* DIAGNOSTIC */
1c79356b
A
2272}
2273
2274/*
fe8ab488 2275 * Sync in a separate thread so we can time out if it blocks.
1c79356b 2276 */
fe8ab488
A
2277static int
2278sync_async(int timeout)
2d21ac55 2279{
fe8ab488 2280 thread_t thd;
2d21ac55 2281 int error;
fe8ab488
A
2282 struct timespec ts = {timeout, 0};
2283
2284 lck_mtx_lock(sync_mtx_lck);
2285 if (kernel_thread_start(sync_thread, &timeout, &thd) != KERN_SUCCESS) {
2286 printf("sync_thread failed\n");
2287 lck_mtx_unlock(sync_mtx_lck);
2288 return (0);
2289 }
2290
2291 error = msleep((caddr_t) &timeout, sync_mtx_lck, (PVFS | PDROP | PCATCH), "sync_thread", &ts);
2292 if (error) {
2293 printf("sync timed out: %d sec\n", timeout);
2294 }
2295 thread_deallocate(thd);
2296
2297 return (0);
2d21ac55
A
2298}
2299
fe8ab488
A
2300/*
2301 * An in-kernel sync for power management to call.
2302 */
2303__private_extern__ int
2304sync_internal(void)
2305{
2306 (void) sync_async(sync_timeout);
2307
2308 return 0;
2309} /* end of sync_internal call */
2310
2311/*
2312 * Change filesystem quotas.
2313 */
2314#if QUOTA
2315int
2316quotactl(proc_t p, struct quotactl_args *uap, __unused int32_t *retval)
1c79356b 2317{
2d21ac55 2318 struct mount *mp;
91447636
A
2319 int error, quota_cmd, quota_status;
2320 caddr_t datap;
2321 size_t fnamelen;
1c79356b 2322 struct nameidata nd;
2d21ac55 2323 vfs_context_t ctx = vfs_context_current();
91447636
A
2324 struct dqblk my_dqblk;
2325
b0d623f7 2326 AUDIT_ARG(uid, uap->uid);
55e303ae 2327 AUDIT_ARG(cmd, uap->cmd);
6d2010ae
A
2328 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
2329 uap->path, ctx);
55e303ae
A
2330 error = namei(&nd);
2331 if (error)
1c79356b
A
2332 return (error);
2333 mp = nd.ni_vp->v_mount;
91447636
A
2334 vnode_put(nd.ni_vp);
2335 nameidone(&nd);
2336
2337 /* copyin any data we will need for downstream code */
2338 quota_cmd = uap->cmd >> SUBCMDSHIFT;
2339
2340 switch (quota_cmd) {
2341 case Q_QUOTAON:
2342 /* uap->arg specifies a file from which to take the quotas */
2343 fnamelen = MAXPATHLEN;
2344 datap = kalloc(MAXPATHLEN);
2345 error = copyinstr(uap->arg, datap, MAXPATHLEN, &fnamelen);
2346 break;
2347 case Q_GETQUOTA:
2348 /* uap->arg is a pointer to a dqblk structure. */
2349 datap = (caddr_t) &my_dqblk;
2350 break;
2351 case Q_SETQUOTA:
2352 case Q_SETUSE:
2353 /* uap->arg is a pointer to a dqblk structure. */
2354 datap = (caddr_t) &my_dqblk;
2355 if (proc_is64bit(p)) {
2356 struct user_dqblk my_dqblk64;
2357 error = copyin(uap->arg, (caddr_t)&my_dqblk64, sizeof (my_dqblk64));
2358 if (error == 0) {
2359 munge_dqblk(&my_dqblk, &my_dqblk64, FALSE);
2360 }
2361 }
2362 else {
2363 error = copyin(uap->arg, (caddr_t)&my_dqblk, sizeof (my_dqblk));
2364 }
2365 break;
2366 case Q_QUOTASTAT:
2367 /* uap->arg is a pointer to an integer */
2368 datap = (caddr_t) &quota_status;
2369 break;
2370 default:
2371 datap = NULL;
2372 break;
2373 } /* switch */
2374
2375 if (error == 0) {
2d21ac55 2376 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, datap, ctx);
91447636
A
2377 }
2378
2379 switch (quota_cmd) {
2380 case Q_QUOTAON:
2381 if (datap != NULL)
2382 kfree(datap, MAXPATHLEN);
2383 break;
2384 case Q_GETQUOTA:
2385 /* uap->arg is a pointer to a dqblk structure we need to copy out to */
2386 if (error == 0) {
2387 if (proc_is64bit(p)) {
fe8ab488 2388 struct user_dqblk my_dqblk64 = {.dqb_bhardlimit = 0};
91447636
A
2389 munge_dqblk(&my_dqblk, &my_dqblk64, TRUE);
2390 error = copyout((caddr_t)&my_dqblk64, uap->arg, sizeof (my_dqblk64));
2391 }
2392 else {
2393 error = copyout(datap, uap->arg, sizeof (struct dqblk));
2394 }
2395 }
2396 break;
2397 case Q_QUOTASTAT:
2398 /* uap->arg is a pointer to an integer */
2399 if (error == 0) {
2400 error = copyout(datap, uap->arg, sizeof(quota_status));
2401 }
2402 break;
2403 default:
2404 break;
2405 } /* switch */
2406
2407 return (error);
1c79356b 2408}
2d21ac55
A
2409#else
2410int
b0d623f7 2411quotactl(__unused proc_t p, __unused struct quotactl_args *uap, __unused int32_t *retval)
2d21ac55
A
2412{
2413 return (EOPNOTSUPP);
2414}
2415#endif /* QUOTA */
1c79356b
A
2416
2417/*
2418 * Get filesystem statistics.
2d21ac55
A
2419 *
2420 * Returns: 0 Success
2421 * namei:???
2422 * vfs_update_vfsstat:???
2423 * munge_statfs:EFAULT
1c79356b 2424 */
1c79356b
A
2425/* ARGSUSED */
2426int
b0d623f7 2427statfs(__unused proc_t p, struct statfs_args *uap, __unused int32_t *retval)
1c79356b 2428{
91447636
A
2429 struct mount *mp;
2430 struct vfsstatfs *sp;
1c79356b
A
2431 int error;
2432 struct nameidata nd;
2d21ac55 2433 vfs_context_t ctx = vfs_context_current();
91447636 2434 vnode_t vp;
1c79356b 2435
39037602 2436 NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1,
2d21ac55 2437 UIO_USERSPACE, uap->path, ctx);
55e303ae 2438 error = namei(&nd);
39037602 2439 if (error != 0)
1c79356b 2440 return (error);
91447636
A
2441 vp = nd.ni_vp;
2442 mp = vp->v_mount;
2443 sp = &mp->mnt_vfsstat;
2444 nameidone(&nd);
2445
39037602
A
2446#if CONFIG_MACF
2447 error = mac_mount_check_stat(ctx, mp);
2448 if (error != 0)
2449 return (error);
2450#endif
2451
2d21ac55 2452 error = vfs_update_vfsstat(mp, ctx, VFS_USER_EVENT);
39037602 2453 if (error != 0) {
39236c6e 2454 vnode_put(vp);
1c79356b 2455 return (error);
39236c6e 2456 }
91447636
A
2457
2458 error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
39236c6e 2459 vnode_put(vp);
91447636 2460 return (error);
1c79356b
A
2461}
2462
2463/*
2464 * Get filesystem statistics.
2465 */
1c79356b
A
2466/* ARGSUSED */
2467int
b0d623f7 2468fstatfs(__unused proc_t p, struct fstatfs_args *uap, __unused int32_t *retval)
1c79356b 2469{
2d21ac55 2470 vnode_t vp;
1c79356b 2471 struct mount *mp;
91447636 2472 struct vfsstatfs *sp;
1c79356b
A
2473 int error;
2474
55e303ae
A
2475 AUDIT_ARG(fd, uap->fd);
2476
91447636 2477 if ( (error = file_vnode(uap->fd, &vp)) )
1c79356b 2478 return (error);
55e303ae 2479
d1ecb069
A
2480 error = vnode_getwithref(vp);
2481 if (error) {
2482 file_drop(uap->fd);
2483 return (error);
2484 }
2485
91447636 2486 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
55e303ae 2487
91447636
A
2488 mp = vp->v_mount;
2489 if (!mp) {
d1ecb069
A
2490 error = EBADF;
2491 goto out;
91447636 2492 }
39037602
A
2493
2494#if CONFIG_MACF
2495 error = mac_mount_check_stat(vfs_context_current(), mp);
2496 if (error != 0)
2497 goto out;
2498#endif
2499
91447636 2500 sp = &mp->mnt_vfsstat;
39037602 2501 if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
d1ecb069 2502 goto out;
91447636 2503 }
91447636
A
2504
2505 error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
2506
d1ecb069
A
2507out:
2508 file_drop(uap->fd);
2509 vnode_put(vp);
2510
91447636 2511 return (error);
1c79356b
A
2512}
2513
39037602
A
2514/*
2515 * Common routine to handle copying of statfs64 data to user space
2d21ac55 2516 */
39037602 2517static int
2d21ac55
A
2518statfs64_common(struct mount *mp, struct vfsstatfs *sfsp, user_addr_t bufp)
2519{
2520 int error;
2521 struct statfs64 sfs;
39037602 2522
2d21ac55
A
2523 bzero(&sfs, sizeof(sfs));
2524
2525 sfs.f_bsize = sfsp->f_bsize;
2526 sfs.f_iosize = (int32_t)sfsp->f_iosize;
2527 sfs.f_blocks = sfsp->f_blocks;
2528 sfs.f_bfree = sfsp->f_bfree;
2529 sfs.f_bavail = sfsp->f_bavail;
2530 sfs.f_files = sfsp->f_files;
2531 sfs.f_ffree = sfsp->f_ffree;
2532 sfs.f_fsid = sfsp->f_fsid;
2533 sfs.f_owner = sfsp->f_owner;
2534 sfs.f_type = mp->mnt_vtable->vfc_typenum;
2535 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
2536 sfs.f_fssubtype = sfsp->f_fssubtype;
6d2010ae
A
2537 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
2538 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN);
2539 } else {
2540 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSTYPENAMELEN);
2541 }
2d21ac55
A
2542 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MAXPATHLEN);
2543 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MAXPATHLEN);
2544
2545 error = copyout((caddr_t)&sfs, bufp, sizeof(sfs));
2546
2547 return(error);
2548}
2549
39037602
A
2550/*
2551 * Get file system statistics in 64-bit mode
2d21ac55
A
2552 */
2553int
b0d623f7 2554statfs64(__unused struct proc *p, struct statfs64_args *uap, __unused int32_t *retval)
2d21ac55
A
2555{
2556 struct mount *mp;
2557 struct vfsstatfs *sp;
2558 int error;
2559 struct nameidata nd;
2560 vfs_context_t ctxp = vfs_context_current();
2561 vnode_t vp;
2562
39037602 2563 NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1,
2d21ac55
A
2564 UIO_USERSPACE, uap->path, ctxp);
2565 error = namei(&nd);
39037602 2566 if (error != 0)
2d21ac55
A
2567 return (error);
2568 vp = nd.ni_vp;
2569 mp = vp->v_mount;
2570 sp = &mp->mnt_vfsstat;
2571 nameidone(&nd);
2572
39037602
A
2573#if CONFIG_MACF
2574 error = mac_mount_check_stat(ctxp, mp);
2575 if (error != 0)
2576 return (error);
2577#endif
2578
2d21ac55 2579 error = vfs_update_vfsstat(mp, ctxp, VFS_USER_EVENT);
39037602 2580 if (error != 0) {
39236c6e 2581 vnode_put(vp);
2d21ac55 2582 return (error);
39236c6e 2583 }
2d21ac55
A
2584
2585 error = statfs64_common(mp, sp, uap->buf);
39236c6e 2586 vnode_put(vp);
2d21ac55
A
2587
2588 return (error);
2589}
2590
39037602
A
2591/*
2592 * Get file system statistics in 64-bit mode
2d21ac55
A
2593 */
2594int
b0d623f7 2595fstatfs64(__unused struct proc *p, struct fstatfs64_args *uap, __unused int32_t *retval)
2d21ac55
A
2596{
2597 struct vnode *vp;
2598 struct mount *mp;
2599 struct vfsstatfs *sp;
2600 int error;
2601
2602 AUDIT_ARG(fd, uap->fd);
2603
2604 if ( (error = file_vnode(uap->fd, &vp)) )
2605 return (error);
2606
d1ecb069
A
2607 error = vnode_getwithref(vp);
2608 if (error) {
2609 file_drop(uap->fd);
2610 return (error);
2611 }
2612
2d21ac55
A
2613 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2614
2615 mp = vp->v_mount;
2616 if (!mp) {
316670eb 2617 error = EBADF;
d1ecb069 2618 goto out;
2d21ac55 2619 }
39037602
A
2620
2621#if CONFIG_MACF
2622 error = mac_mount_check_stat(vfs_context_current(), mp);
2623 if (error != 0)
2624 goto out;
2625#endif
2626
2d21ac55
A
2627 sp = &mp->mnt_vfsstat;
2628 if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
d1ecb069 2629 goto out;
2d21ac55 2630 }
2d21ac55
A
2631
2632 error = statfs64_common(mp, sp, uap->buf);
2633
d1ecb069
A
2634out:
2635 file_drop(uap->fd);
2636 vnode_put(vp);
2637
2d21ac55
A
2638 return (error);
2639}
91447636
A
2640
2641struct getfsstat_struct {
2642 user_addr_t sfsp;
2d21ac55 2643 user_addr_t *mp;
91447636
A
2644 int count;
2645 int maxcount;
2646 int flags;
2647 int error;
1c79356b 2648};
1c79356b 2649
91447636
A
2650
2651static int
2652getfsstat_callback(mount_t mp, void * arg)
2653{
39037602 2654
91447636
A
2655 struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
2656 struct vfsstatfs *sp;
91447636 2657 int error, my_size;
2d21ac55 2658 vfs_context_t ctx = vfs_context_current();
91447636
A
2659
2660 if (fstp->sfsp && fstp->count < fstp->maxcount) {
39037602
A
2661#if CONFIG_MACF
2662 error = mac_mount_check_stat(ctx, mp);
2663 if (error != 0) {
2664 fstp->error = error;
2665 return(VFS_RETURNED_DONE);
2666 }
2667#endif
91447636
A
2668 sp = &mp->mnt_vfsstat;
2669 /*
2670 * If MNT_NOWAIT is specified, do not refresh the
b0d623f7 2671 * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT.
91447636 2672 */
b0d623f7 2673 if (((fstp->flags & MNT_NOWAIT) == 0 || (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
2d21ac55
A
2674 (error = vfs_update_vfsstat(mp, ctx,
2675 VFS_USER_EVENT))) {
91447636
A
2676 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
2677 return(VFS_RETURNED);
1c79356b 2678 }
91447636
A
2679
2680 /*
2681 * Need to handle LP64 version of struct statfs
2682 */
2d21ac55 2683 error = munge_statfs(mp, sp, fstp->sfsp, &my_size, IS_64BIT_PROCESS(vfs_context_proc(ctx)), FALSE);
91447636
A
2684 if (error) {
2685 fstp->error = error;
2686 return(VFS_RETURNED_DONE);
1c79356b 2687 }
91447636 2688 fstp->sfsp += my_size;
2d21ac55
A
2689
2690 if (fstp->mp) {
39236c6e 2691#if CONFIG_MACF
2d21ac55
A
2692 error = mac_mount_label_get(mp, *fstp->mp);
2693 if (error) {
2694 fstp->error = error;
2695 return(VFS_RETURNED_DONE);
2696 }
39236c6e 2697#endif
2d21ac55
A
2698 fstp->mp++;
2699 }
2700 }
91447636
A
2701 fstp->count++;
2702 return(VFS_RETURNED);
2703}
2704
2705/*
2706 * Get statistics on all filesystems.
2707 */
2708int
2709getfsstat(__unused proc_t p, struct getfsstat_args *uap, int *retval)
2d21ac55
A
2710{
2711 struct __mac_getfsstat_args muap;
2712
2713 muap.buf = uap->buf;
2714 muap.bufsize = uap->bufsize;
2715 muap.mac = USER_ADDR_NULL;
2716 muap.macsize = 0;
2717 muap.flags = uap->flags;
2718
2719 return (__mac_getfsstat(p, &muap, retval));
2720}
2721
b0d623f7
A
2722/*
2723 * __mac_getfsstat: Get MAC-related file system statistics
2724 *
2725 * Parameters: p (ignored)
2726 * uap User argument descriptor (see below)
39037602 2727 * retval Count of file system statistics (N stats)
b0d623f7
A
2728 *
2729 * Indirect: uap->bufsize Buffer size
2730 * uap->macsize MAC info size
2731 * uap->buf Buffer where information will be returned
2732 * uap->mac MAC info
2733 * uap->flags File system flags
39037602 2734 *
b0d623f7
A
2735 *
2736 * Returns: 0 Success
2737 * !0 Not success
2738 *
2739 */
2d21ac55
A
2740int
2741__mac_getfsstat(__unused proc_t p, struct __mac_getfsstat_args *uap, int *retval)
91447636
A
2742{
2743 user_addr_t sfsp;
2d21ac55 2744 user_addr_t *mp;
b0d623f7 2745 size_t count, maxcount, bufsize, macsize;
91447636
A
2746 struct getfsstat_struct fst;
2747
b0d623f7
A
2748 bufsize = (size_t) uap->bufsize;
2749 macsize = (size_t) uap->macsize;
2750
91447636 2751 if (IS_64BIT_PROCESS(p)) {
b0d623f7 2752 maxcount = bufsize / sizeof(struct user64_statfs);
91447636
A
2753 }
2754 else {
b0d623f7 2755 maxcount = bufsize / sizeof(struct user32_statfs);
91447636
A
2756 }
2757 sfsp = uap->buf;
2758 count = 0;
2759
2d21ac55
A
2760 mp = NULL;
2761
2762#if CONFIG_MACF
2763 if (uap->mac != USER_ADDR_NULL) {
2764 u_int32_t *mp0;
2765 int error;
b0d623f7 2766 unsigned int i;
2d21ac55 2767
b0d623f7 2768 count = (macsize / (IS_64BIT_PROCESS(p) ? 8 : 4));
2d21ac55
A
2769 if (count != maxcount)
2770 return (EINVAL);
2771
2772 /* Copy in the array */
b0d623f7
A
2773 MALLOC(mp0, u_int32_t *, macsize, M_MACTEMP, M_WAITOK);
2774 if (mp0 == NULL) {
2775 return (ENOMEM);
2776 }
2777
2778 error = copyin(uap->mac, mp0, macsize);
2779 if (error) {
2780 FREE(mp0, M_MACTEMP);
2d21ac55 2781 return (error);
b0d623f7 2782 }
2d21ac55
A
2783
2784 /* Normalize to an array of user_addr_t */
2785 MALLOC(mp, user_addr_t *, count * sizeof(user_addr_t), M_MACTEMP, M_WAITOK);
b0d623f7
A
2786 if (mp == NULL) {
2787 FREE(mp0, M_MACTEMP);
2788 return (ENOMEM);
2789 }
2790
2d21ac55
A
2791 for (i = 0; i < count; i++) {
2792 if (IS_64BIT_PROCESS(p))
2793 mp[i] = ((user_addr_t *)mp0)[i];
2794 else
2795 mp[i] = (user_addr_t)mp0[i];
2796 }
2797 FREE(mp0, M_MACTEMP);
2798 }
2799#endif
2800
2801
91447636 2802 fst.sfsp = sfsp;
2d21ac55 2803 fst.mp = mp;
91447636
A
2804 fst.flags = uap->flags;
2805 fst.count = 0;
2806 fst.error = 0;
2807 fst.maxcount = maxcount;
2808
39037602 2809
91447636
A
2810 vfs_iterate(0, getfsstat_callback, &fst);
2811
2d21ac55
A
2812 if (mp)
2813 FREE(mp, M_MACTEMP);
2814
91447636
A
2815 if (fst.error ) {
2816 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
2817 return(fst.error);
2818 }
2819
2820 if (fst.sfsp && fst.count > fst.maxcount)
2821 *retval = fst.maxcount;
1c79356b 2822 else
91447636 2823 *retval = fst.count;
1c79356b
A
2824 return (0);
2825}
2826
2d21ac55
A
2827static int
2828getfsstat64_callback(mount_t mp, void * arg)
2829{
2830 struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
2831 struct vfsstatfs *sp;
2832 int error;
2833
2834 if (fstp->sfsp && fstp->count < fstp->maxcount) {
39037602
A
2835#if CONFIG_MACF
2836 error = mac_mount_check_stat(vfs_context_current(), mp);
2837 if (error != 0) {
2838 fstp->error = error;
2839 return(VFS_RETURNED_DONE);
2840 }
2841#endif
2d21ac55
A
2842 sp = &mp->mnt_vfsstat;
2843 /*
b0d623f7
A
2844 * If MNT_NOWAIT is specified, do not refresh the fsstat
2845 * cache. MNT_WAIT overrides MNT_NOWAIT.
2846 *
2847 * We treat MNT_DWAIT as MNT_WAIT for all instances of
2848 * getfsstat, since the constants are out of the same
2849 * namespace.
2d21ac55 2850 */
b0d623f7
A
2851 if (((fstp->flags & MNT_NOWAIT) == 0 ||
2852 (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
2d21ac55
A
2853 (error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT))) {
2854 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
2855 return(VFS_RETURNED);
2856 }
2857
2858 error = statfs64_common(mp, sp, fstp->sfsp);
2859 if (error) {
2860 fstp->error = error;
2861 return(VFS_RETURNED_DONE);
2862 }
2863 fstp->sfsp += sizeof(struct statfs64);
2864 }
2865 fstp->count++;
2866 return(VFS_RETURNED);
2867}
2868
2869/*
2870 * Get statistics on all file systems in 64 bit mode.
2871 */
2872int
2873getfsstat64(__unused proc_t p, struct getfsstat64_args *uap, int *retval)
2874{
2875 user_addr_t sfsp;
2876 int count, maxcount;
2877 struct getfsstat_struct fst;
2878
2879 maxcount = uap->bufsize / sizeof(struct statfs64);
2880
2881 sfsp = uap->buf;
2882 count = 0;
2883
2884 fst.sfsp = sfsp;
2885 fst.flags = uap->flags;
2886 fst.count = 0;
2887 fst.error = 0;
2888 fst.maxcount = maxcount;
2889
2890 vfs_iterate(0, getfsstat64_callback, &fst);
2891
2892 if (fst.error ) {
2893 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
2894 return(fst.error);
2895 }
2896
2897 if (fst.sfsp && fst.count > fst.maxcount)
2898 *retval = fst.maxcount;
2899 else
2900 *retval = fst.count;
2901
2902 return (0);
2903}
2904
fe8ab488
A
2905/*
2906 * gets the associated vnode with the file descriptor passed.
2907 * as input
2908 *
2909 * INPUT
2910 * ctx - vfs context of caller
2911 * fd - file descriptor for which vnode is required.
2912 * vpp - Pointer to pointer to vnode to be returned.
2913 *
2914 * The vnode is returned with an iocount so any vnode obtained
2915 * by this call needs a vnode_put
2916 *
2917 */
39037602 2918int
fe8ab488
A
2919vnode_getfromfd(vfs_context_t ctx, int fd, vnode_t *vpp)
2920{
2921 int error;
2922 vnode_t vp;
2923 struct fileproc *fp;
2924 proc_t p = vfs_context_proc(ctx);
2925
2926 *vpp = NULLVP;
2927
2928 error = fp_getfvp(p, fd, &fp, &vp);
2929 if (error)
2930 return (error);
2931
2932 error = vnode_getwithref(vp);
2933 if (error) {
2934 (void)fp_drop(p, fd, fp, 0);
2935 return (error);
2936 }
2937
2938 (void)fp_drop(p, fd, fp, 0);
2939 *vpp = vp;
2940 return (error);
2941}
2942
2943/*
2944 * Wrapper function around namei to start lookup from a directory
2945 * specified by a file descriptor ni_dirfd.
2946 *
2947 * In addition to all the errors returned by namei, this call can
2948 * return ENOTDIR if the file descriptor does not refer to a directory.
2949 * and EBADF if the file descriptor is not valid.
2950 */
2951int
2952nameiat(struct nameidata *ndp, int dirfd)
2953{
2954 if ((dirfd != AT_FDCWD) &&
2955 !(ndp->ni_flag & NAMEI_CONTLOOKUP) &&
2956 !(ndp->ni_cnd.cn_flags & USEDVP)) {
2957 int error = 0;
2958 char c;
2959
2960 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
2961 error = copyin(ndp->ni_dirp, &c, sizeof(char));
2962 if (error)
2963 return (error);
2964 } else {
2965 c = *((char *)(ndp->ni_dirp));
2966 }
2967
2968 if (c != '/') {
2969 vnode_t dvp_at;
2970
2971 error = vnode_getfromfd(ndp->ni_cnd.cn_context, dirfd,
2972 &dvp_at);
2973 if (error)
2974 return (error);
2975
2976 if (vnode_vtype(dvp_at) != VDIR) {
2977 vnode_put(dvp_at);
2978 return (ENOTDIR);
2979 }
2980
2981 ndp->ni_dvp = dvp_at;
2982 ndp->ni_cnd.cn_flags |= USEDVP;
2983 error = namei(ndp);
2984 ndp->ni_cnd.cn_flags &= ~USEDVP;
2985 vnode_put(dvp_at);
2986 return (error);
2987 }
2988 }
2989
2990 return (namei(ndp));
2991}
2992
1c79356b
A
2993/*
2994 * Change current working directory to a given file descriptor.
2995 */
1c79356b 2996/* ARGSUSED */
2d21ac55
A
2997static int
2998common_fchdir(proc_t p, struct fchdir_args *uap, int per_thread)
1c79356b 2999{
2d21ac55
A
3000 struct filedesc *fdp = p->p_fd;
3001 vnode_t vp;
3002 vnode_t tdp;
3003 vnode_t tvp;
1c79356b 3004 struct mount *mp;
1c79356b 3005 int error;
2d21ac55 3006 vfs_context_t ctx = vfs_context_current();
1c79356b 3007
b0d623f7 3008 AUDIT_ARG(fd, uap->fd);
2d21ac55
A
3009 if (per_thread && uap->fd == -1) {
3010 /*
3011 * Switching back from per-thread to per process CWD; verify we
3012 * in fact have one before proceeding. The only success case
3013 * for this code path is to return 0 preemptively after zapping
3014 * the thread structure contents.
3015 */
3016 thread_t th = vfs_context_thread(ctx);
3017 if (th) {
3018 uthread_t uth = get_bsdthread_info(th);
3019 tvp = uth->uu_cdir;
3020 uth->uu_cdir = NULLVP;
3021 if (tvp != NULLVP) {
3022 vnode_rele(tvp);
3023 return (0);
3024 }
3025 }
3026 return (EBADF);
3027 }
91447636
A
3028
3029 if ( (error = file_vnode(uap->fd, &vp)) )
3030 return(error);
3031 if ( (error = vnode_getwithref(vp)) ) {
3032 file_drop(uap->fd);
3033 return(error);
3034 }
55e303ae
A
3035
3036 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
3037
2d21ac55 3038 if (vp->v_type != VDIR) {
1c79356b 3039 error = ENOTDIR;
2d21ac55
A
3040 goto out;
3041 }
3042
3043#if CONFIG_MACF
3044 error = mac_vnode_check_chdir(ctx, vp);
3045 if (error)
3046 goto out;
3047#endif
3048 error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
3049 if (error)
3050 goto out;
3051
1c79356b 3052 while (!error && (mp = vp->v_mountedhere) != NULL) {
91447636
A
3053 if (vfs_busy(mp, LK_NOWAIT)) {
3054 error = EACCES;
3055 goto out;
55e303ae 3056 }
2d21ac55 3057 error = VFS_ROOT(mp, &tdp, ctx);
91447636 3058 vfs_unbusy(mp);
1c79356b
A
3059 if (error)
3060 break;
91447636 3061 vnode_put(vp);
1c79356b
A
3062 vp = tdp;
3063 }
91447636
A
3064 if (error)
3065 goto out;
3066 if ( (error = vnode_ref(vp)) )
3067 goto out;
3068 vnode_put(vp);
3069
2d21ac55
A
3070 if (per_thread) {
3071 thread_t th = vfs_context_thread(ctx);
3072 if (th) {
3073 uthread_t uth = get_bsdthread_info(th);
3074 tvp = uth->uu_cdir;
3075 uth->uu_cdir = vp;
b0d623f7 3076 OSBitOrAtomic(P_THCWD, &p->p_flag);
2d21ac55
A
3077 } else {
3078 vnode_rele(vp);
3079 return (ENOENT);
3080 }
3081 } else {
3082 proc_fdlock(p);
3083 tvp = fdp->fd_cdir;
3084 fdp->fd_cdir = vp;
3085 proc_fdunlock(p);
3086 }
91447636
A
3087
3088 if (tvp)
3089 vnode_rele(tvp);
3090 file_drop(uap->fd);
3091
1c79356b 3092 return (0);
91447636
A
3093out:
3094 vnode_put(vp);
3095 file_drop(uap->fd);
3096
3097 return(error);
1c79356b
A
3098}
3099
2d21ac55 3100int
b0d623f7 3101fchdir(proc_t p, struct fchdir_args *uap, __unused int32_t *retval)
2d21ac55
A
3102{
3103 return common_fchdir(p, uap, 0);
3104}
3105
3106int
b0d623f7 3107__pthread_fchdir(proc_t p, struct __pthread_fchdir_args *uap, __unused int32_t *retval)
2d21ac55
A
3108{
3109 return common_fchdir(p, (void *)uap, 1);
3110}
3111
1c79356b 3112/*
b0d623f7 3113 * Change current working directory (".").
2d21ac55
A
3114 *
3115 * Returns: 0 Success
3116 * change_dir:ENOTDIR
3117 * change_dir:???
3118 * vnode_ref:ENOENT No such file or directory
1c79356b 3119 */
1c79356b 3120/* ARGSUSED */
2d21ac55
A
3121static int
3122common_chdir(proc_t p, struct chdir_args *uap, int per_thread)
1c79356b 3123{
2d21ac55 3124 struct filedesc *fdp = p->p_fd;
1c79356b
A
3125 int error;
3126 struct nameidata nd;
2d21ac55
A
3127 vnode_t tvp;
3128 vfs_context_t ctx = vfs_context_current();
91447636 3129
39037602 3130 NDINIT(&nd, LOOKUP, OP_CHDIR, FOLLOW | AUDITVNPATH1,
2d21ac55
A
3131 UIO_USERSPACE, uap->path, ctx);
3132 error = change_dir(&nd, ctx);
55e303ae 3133 if (error)
1c79356b 3134 return (error);
91447636
A
3135 if ( (error = vnode_ref(nd.ni_vp)) ) {
3136 vnode_put(nd.ni_vp);
3137 return (error);
3138 }
3139 /*
3140 * drop the iocount we picked up in change_dir
3141 */
3142 vnode_put(nd.ni_vp);
3143
2d21ac55
A
3144 if (per_thread) {
3145 thread_t th = vfs_context_thread(ctx);
3146 if (th) {
3147 uthread_t uth = get_bsdthread_info(th);
3148 tvp = uth->uu_cdir;
3149 uth->uu_cdir = nd.ni_vp;
b0d623f7 3150 OSBitOrAtomic(P_THCWD, &p->p_flag);
2d21ac55
A
3151 } else {
3152 vnode_rele(nd.ni_vp);
3153 return (ENOENT);
3154 }
3155 } else {
3156 proc_fdlock(p);
3157 tvp = fdp->fd_cdir;
3158 fdp->fd_cdir = nd.ni_vp;
3159 proc_fdunlock(p);
3160 }
91447636
A
3161
3162 if (tvp)
3163 vnode_rele(tvp);
3164
1c79356b
A
3165 return (0);
3166}
3167
b0d623f7
A
3168
3169/*
3170 * chdir
3171 *
3172 * Change current working directory (".") for the entire process
3173 *
3174 * Parameters: p Process requesting the call
3175 * uap User argument descriptor (see below)
3176 * retval (ignored)
3177 *
3178 * Indirect parameters: uap->path Directory path
3179 *
3180 * Returns: 0 Success
3181 * common_chdir: ENOTDIR
3182 * common_chdir: ENOENT No such file or directory
3183 * common_chdir: ???
3184 *
3185 */
2d21ac55 3186int
b0d623f7 3187chdir(proc_t p, struct chdir_args *uap, __unused int32_t *retval)
2d21ac55
A
3188{
3189 return common_chdir(p, (void *)uap, 0);
3190}
3191
b0d623f7
A
3192/*
3193 * __pthread_chdir
3194 *
3195 * Change current working directory (".") for a single thread
3196 *
3197 * Parameters: p Process requesting the call
3198 * uap User argument descriptor (see below)
3199 * retval (ignored)
3200 *
3201 * Indirect parameters: uap->path Directory path
3202 *
3203 * Returns: 0 Success
3204 * common_chdir: ENOTDIR
3205 * common_chdir: ENOENT No such file or directory
3206 * common_chdir: ???
3207 *
3208 */
2d21ac55 3209int
b0d623f7 3210__pthread_chdir(proc_t p, struct __pthread_chdir_args *uap, __unused int32_t *retval)
2d21ac55
A
3211{
3212 return common_chdir(p, (void *)uap, 1);
3213}
3214
3215
1c79356b
A
3216/*
3217 * Change notion of root (``/'') directory.
3218 */
1c79356b
A
3219/* ARGSUSED */
3220int
b0d623f7 3221chroot(proc_t p, struct chroot_args *uap, __unused int32_t *retval)
1c79356b 3222{
2d21ac55 3223 struct filedesc *fdp = p->p_fd;
1c79356b
A
3224 int error;
3225 struct nameidata nd;
2d21ac55
A
3226 vnode_t tvp;
3227 vfs_context_t ctx = vfs_context_current();
1c79356b 3228
91447636 3229 if ((error = suser(kauth_cred_get(), &p->p_acflag)))
1c79356b
A
3230 return (error);
3231
39037602 3232 NDINIT(&nd, LOOKUP, OP_CHROOT, FOLLOW | AUDITVNPATH1,
2d21ac55
A
3233 UIO_USERSPACE, uap->path, ctx);
3234 error = change_dir(&nd, ctx);
55e303ae 3235 if (error)
1c79356b
A
3236 return (error);
3237
2d21ac55
A
3238#if CONFIG_MACF
3239 error = mac_vnode_check_chroot(ctx, nd.ni_vp,
3240 &nd.ni_cnd);
3241 if (error) {
91447636
A
3242 vnode_put(nd.ni_vp);
3243 return (error);
3244 }
2d21ac55
A
3245#endif
3246
91447636
A
3247 if ( (error = vnode_ref(nd.ni_vp)) ) {
3248 vnode_put(nd.ni_vp);
1c79356b
A
3249 return (error);
3250 }
91447636 3251 vnode_put(nd.ni_vp);
1c79356b 3252
91447636 3253 proc_fdlock(p);
fa4905b1 3254 tvp = fdp->fd_rdir;
1c79356b 3255 fdp->fd_rdir = nd.ni_vp;
91447636
A
3256 fdp->fd_flags |= FD_CHROOT;
3257 proc_fdunlock(p);
3258
fa4905b1 3259 if (tvp != NULL)
91447636
A
3260 vnode_rele(tvp);
3261
1c79356b
A
3262 return (0);
3263}
3264
3265/*
3266 * Common routine for chroot and chdir.
2d21ac55
A
3267 *
3268 * Returns: 0 Success
3269 * ENOTDIR Not a directory
3270 * namei:??? [anything namei can return]
3271 * vnode_authorize:??? [anything vnode_authorize can return]
1c79356b
A
3272 */
3273static int
91447636 3274change_dir(struct nameidata *ndp, vfs_context_t ctx)
1c79356b 3275{
2d21ac55 3276 vnode_t vp;
1c79356b
A
3277 int error;
3278
91447636 3279 if ((error = namei(ndp)))
1c79356b 3280 return (error);
91447636 3281 nameidone(ndp);
1c79356b 3282 vp = ndp->ni_vp;
2d21ac55
A
3283
3284 if (vp->v_type != VDIR) {
91447636 3285 vnode_put(vp);
2d21ac55
A
3286 return (ENOTDIR);
3287 }
3288
3289#if CONFIG_MACF
3290 error = mac_vnode_check_chdir(ctx, vp);
3291 if (error) {
3292 vnode_put(vp);
3293 return (error);
3294 }
3295#endif
3296
3297 error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
3298 if (error) {
3299 vnode_put(vp);
3300 return (error);
3301 }
91447636 3302
1c79356b
A
3303 return (error);
3304}
3305
fe8ab488
A
3306/*
3307 * Free the vnode data (for directories) associated with the file glob.
3308 */
3309struct fd_vn_data *
3310fg_vn_data_alloc(void)
3311{
3312 struct fd_vn_data *fvdata;
3313
3314 /* Allocate per fd vnode data */
3315 MALLOC(fvdata, struct fd_vn_data *, (sizeof(struct fd_vn_data)),
3316 M_FD_VN_DATA, M_WAITOK | M_ZERO);
3317 lck_mtx_init(&fvdata->fv_lock, fd_vn_lck_grp, fd_vn_lck_attr);
3318 return fvdata;
3319}
3320
3321/*
3322 * Free the vnode data (for directories) associated with the file glob.
3323 */
3324void
3325fg_vn_data_free(void *fgvndata)
3326{
3327 struct fd_vn_data *fvdata = (struct fd_vn_data *)fgvndata;
3328
3329 if (fvdata->fv_buf)
3330 FREE(fvdata->fv_buf, M_FD_DIRBUF);
3331 lck_mtx_destroy(&fvdata->fv_lock, fd_vn_lck_grp);
3332 FREE(fvdata, M_FD_VN_DATA);
3333}
3334
1c79356b
A
3335/*
3336 * Check permissions, allocate an open file structure,
3337 * and call the device open routine if any.
2d21ac55
A
3338 *
3339 * Returns: 0 Success
3340 * EINVAL
3341 * EINTR
3342 * falloc:ENFILE
3343 * falloc:EMFILE
3344 * falloc:ENOMEM
3345 * vn_open_auth:???
3346 * dupfdopen:???
3347 * VNOP_ADVLOCK:???
3348 * vnode_setsize:???
b0d623f7
A
3349 *
3350 * XXX Need to implement uid, gid
1c79356b 3351 */
2d21ac55 3352int
39236c6e
A
3353open1(vfs_context_t ctx, struct nameidata *ndp, int uflags,
3354 struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra,
3355 int32_t *retval)
1c79356b 3356{
2d21ac55
A
3357 proc_t p = vfs_context_proc(ctx);
3358 uthread_t uu = get_bsdthread_info(vfs_context_thread(ctx));
2d21ac55
A
3359 struct fileproc *fp;
3360 vnode_t vp;
91447636 3361 int flags, oflags;
1c79356b
A
3362 int type, indx, error;
3363 struct flock lf;
3e170ce0 3364 struct vfs_context context;
ccc36f2f 3365
91447636 3366 oflags = uflags;
ccc36f2f
A
3367
3368 if ((oflags & O_ACCMODE) == O_ACCMODE)
3369 return(EINVAL);
3e170ce0 3370
91447636 3371 flags = FFLAGS(uflags);
3e170ce0
A
3372 CLR(flags, FENCRYPTED);
3373 CLR(flags, FUNENCRYPTED);
91447636
A
3374
3375 AUDIT_ARG(fflags, oflags);
3376 AUDIT_ARG(mode, vap->va_mode);
3377
39236c6e
A
3378 if ((error = falloc_withalloc(p,
3379 &fp, &indx, ctx, fp_zalloc, cra)) != 0) {
1c79356b 3380 return (error);
91447636 3381 }
2d21ac55 3382 uu->uu_dupfd = -indx - 1;
91447636 3383
2d21ac55
A
3384 if ((error = vn_open_auth(ndp, &flags, vap))) {
3385 if ((error == ENODEV || error == ENXIO) && (uu->uu_dupfd >= 0)){ /* XXX from fdopen */
39236c6e 3386 if ((error = dupfdopen(p->p_fd, indx, uu->uu_dupfd, flags, error)) == 0) {
2d21ac55 3387 fp_drop(p, indx, NULL, 0);
91447636
A
3388 *retval = indx;
3389 return (0);
3390 }
1c79356b
A
3391 }
3392 if (error == ERESTART)
91447636
A
3393 error = EINTR;
3394 fp_free(p, indx, fp);
1c79356b
A
3395 return (error);
3396 }
2d21ac55
A
3397 uu->uu_dupfd = 0;
3398 vp = ndp->ni_vp;
55e303ae 3399
3e170ce0 3400 fp->f_fglob->fg_flag = flags & (FMASK | O_EVTONLY | FENCRYPTED | FUNENCRYPTED);
91447636
A
3401 fp->f_fglob->fg_ops = &vnops;
3402 fp->f_fglob->fg_data = (caddr_t)vp;
3403
1c79356b
A
3404 if (flags & (O_EXLOCK | O_SHLOCK)) {
3405 lf.l_whence = SEEK_SET;
3406 lf.l_start = 0;
3407 lf.l_len = 0;
3408 if (flags & O_EXLOCK)
3409 lf.l_type = F_WRLCK;
3410 else
3411 lf.l_type = F_RDLCK;
3412 type = F_FLOCK;
3413 if ((flags & FNONBLOCK) == 0)
3414 type |= F_WAIT;
2d21ac55
A
3415#if CONFIG_MACF
3416 error = mac_file_check_lock(vfs_context_ucred(ctx), fp->f_fglob,
3417 F_SETLK, &lf);
3418 if (error)
3419 goto bad;
3420#endif
39236c6e 3421 if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx, NULL)))
55e303ae 3422 goto bad;
91447636 3423 fp->f_fglob->fg_flag |= FHASLOCK;
1c79356b 3424 }
55e303ae 3425
91447636
A
3426 /* try to truncate by setting the size attribute */
3427 if ((flags & O_TRUNC) && ((error = vnode_setsize(vp, (off_t)0, 0, ctx)) != 0))
3428 goto bad;
55e303ae 3429
fe8ab488
A
3430 /*
3431 * For directories we hold some additional information in the fd.
3432 */
3433 if (vnode_vtype(vp) == VDIR) {
3434 fp->f_fglob->fg_vn_data = fg_vn_data_alloc();
3435 } else {
3436 fp->f_fglob->fg_vn_data = NULL;
2d21ac55
A
3437 }
3438
91447636 3439 vnode_put(vp);
55e303ae 3440
3e170ce0
A
3441 /*
3442 * The first terminal open (without a O_NOCTTY) by a session leader
3443 * results in it being set as the controlling terminal.
3444 */
3445 if (vnode_istty(vp) && !(p->p_flag & P_CONTROLT) &&
3446 !(flags & O_NOCTTY)) {
3447 int tmp = 0;
3448
3449 (void)(*fp->f_fglob->fg_ops->fo_ioctl)(fp, (int)TIOCSCTTY,
3450 (caddr_t)&tmp, ctx);
3451 }
3452
91447636 3453 proc_fdlock(p);
6d2010ae
A
3454 if (flags & O_CLOEXEC)
3455 *fdflags(p, indx) |= UF_EXCLOSE;
39236c6e
A
3456 if (flags & O_CLOFORK)
3457 *fdflags(p, indx) |= UF_FORKCLOSE;
6601e61a 3458 procfdtbl_releasefd(p, indx, NULL);
39037602
A
3459
3460#if CONFIG_SECLUDED_MEMORY
3461 if (secluded_for_filecache &&
3462 FILEGLOB_DTYPE(fp->f_fglob) == DTYPE_VNODE &&
3463 vnode_vtype(vp) == VREG) {
3464 memory_object_control_t moc;
3465
3466 moc = ubc_getobject(vp, UBC_FLAGS_NONE);
3467
3468 if (moc == MEMORY_OBJECT_CONTROL_NULL) {
3469 /* nothing to do... */
3470 } else if (fp->f_fglob->fg_flag & FWRITE) {
3471 /* writable -> no longer eligible for secluded pages */
3472 memory_object_mark_eligible_for_secluded(moc,
3473 FALSE);
3474 } else if (secluded_for_filecache == 1) {
3475 char pathname[32] = { 0, };
3476 size_t copied;
3477 /* XXX FBDP: better way to detect /Applications/ ? */
3478 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
3479 copyinstr(ndp->ni_dirp,
3480 pathname,
3481 sizeof (pathname),
3482 &copied);
3483 } else {
3484 copystr(CAST_DOWN(void *, ndp->ni_dirp),
3485 pathname,
3486 sizeof (pathname),
3487 &copied);
3488 }
3489 pathname[sizeof (pathname) - 1] = '\0';
3490 if (strncmp(pathname,
3491 "/Applications/",
3492 strlen("/Applications/")) == 0 &&
3493 strncmp(pathname,
3494 "/Applications/Camera.app/",
3495 strlen("/Applications/Camera.app/")) != 0) {
3496 /*
3497 * not writable
3498 * AND from "/Applications/"
3499 * AND not from "/Applications/Camera.app/"
3500 * ==> eligible for secluded
3501 */
3502 memory_object_mark_eligible_for_secluded(moc,
3503 TRUE);
3504 }
3505 } else if (secluded_for_filecache == 2) {
3506/* not implemented... */
3507 if (!strncmp(vp->v_name,
3508 DYLD_SHARED_CACHE_NAME,
3509 strlen(DYLD_SHARED_CACHE_NAME)) ||
3510 !strncmp(vp->v_name,
3511 "dyld",
3512 strlen(vp->v_name)) ||
3513 !strncmp(vp->v_name,
3514 "launchd",
3515 strlen(vp->v_name)) ||
3516 !strncmp(vp->v_name,
3517 "Camera",
3518 strlen(vp->v_name)) ||
3519 !strncmp(vp->v_name,
3520 "mediaserverd",
3521 strlen(vp->v_name))) {
3522 /*
3523 * This file matters when launching Camera:
3524 * do not store its contents in the secluded
3525 * pool that will be drained on Camera launch.
3526 */
3527 memory_object_mark_eligible_for_secluded(moc,
3528 FALSE);
3529 }
3530 }
3531 }
3532#endif /* CONFIG_SECLUDED_MEMORY */
3533
91447636
A
3534 fp_drop(p, indx, fp, 1);
3535 proc_fdunlock(p);
3536
1c79356b 3537 *retval = indx;
91447636 3538
1c79356b 3539 return (0);
55e303ae 3540bad:
3e170ce0 3541 context = *vfs_context_current();
2d21ac55 3542 context.vc_ucred = fp->f_fglob->fg_cred;
39037602 3543
fe8ab488
A
3544 if ((fp->f_fglob->fg_flag & FHASLOCK) &&
3545 (FILEGLOB_DTYPE(fp->f_fglob) == DTYPE_VNODE)) {
3546 lf.l_whence = SEEK_SET;
3547 lf.l_start = 0;
3548 lf.l_len = 0;
3549 lf.l_type = F_UNLCK;
39037602 3550
fe8ab488
A
3551 (void)VNOP_ADVLOCK(
3552 vp, (caddr_t)fp->f_fglob, F_UNLCK, &lf, F_FLOCK, ctx, NULL);
3553 }
2d21ac55
A
3554
3555 vn_close(vp, fp->f_fglob->fg_flag, &context);
91447636
A
3556 vnode_put(vp);
3557 fp_free(p, indx, fp);
3558
55e303ae 3559 return (error);
1c79356b
A
3560}
3561
fe8ab488
A
3562/*
3563 * While most of the *at syscall handlers can call nameiat() which
3564 * is a wrapper around namei, the use of namei and initialisation
3565 * of nameidata are far removed and in different functions - namei
3566 * gets called in vn_open_auth for open1. So we'll just do here what
3567 * nameiat() does.
3568 */
3569static int
3570open1at(vfs_context_t ctx, struct nameidata *ndp, int uflags,
3571 struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra, int32_t *retval,
3572 int dirfd)
3573{
3574 if ((dirfd != AT_FDCWD) && !(ndp->ni_cnd.cn_flags & USEDVP)) {
3575 int error;
3576 char c;
3577
3578 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
3579 error = copyin(ndp->ni_dirp, &c, sizeof(char));
3580 if (error)
3581 return (error);
3582 } else {
3583 c = *((char *)(ndp->ni_dirp));
3584 }
3585
3586 if (c != '/') {
3587 vnode_t dvp_at;
3588
3589 error = vnode_getfromfd(ndp->ni_cnd.cn_context, dirfd,
3590 &dvp_at);
3591 if (error)
3592 return (error);
3593
3594 if (vnode_vtype(dvp_at) != VDIR) {
3595 vnode_put(dvp_at);
3596 return (ENOTDIR);
3597 }
3598
3599 ndp->ni_dvp = dvp_at;
3600 ndp->ni_cnd.cn_flags |= USEDVP;
3601 error = open1(ctx, ndp, uflags, vap, fp_zalloc, cra,
3602 retval);
3603 vnode_put(dvp_at);
3604 return (error);
3605 }
3606 }
3607
3608 return (open1(ctx, ndp, uflags, vap, fp_zalloc, cra, retval));
3609}
3610
0c530ab8 3611/*
b0d623f7 3612 * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)).
0c530ab8
A
3613 *
3614 * Parameters: p Process requesting the open
3615 * uap User argument descriptor (see below)
3616 * retval Pointer to an area to receive the
3617 * return calue from the system call
3618 *
3619 * Indirect: uap->path Path to open (same as 'open')
3620 * uap->flags Flags to open (same as 'open'
3621 * uap->uid UID to set, if creating
3622 * uap->gid GID to set, if creating
3623 * uap->mode File mode, if creating (same as 'open')
3624 * uap->xsecurity ACL to set, if creating
3625 *
3626 * Returns: 0 Success
3627 * !0 errno value
3628 *
3629 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
3630 *
3631 * XXX: We should enummerate the possible errno values here, and where
3632 * in the code they originated.
3633 */
1c79356b 3634int
b0d623f7 3635open_extended(proc_t p, struct open_extended_args *uap, int32_t *retval)
91447636 3636{
2d21ac55 3637 struct filedesc *fdp = p->p_fd;
91447636
A
3638 int ciferror;
3639 kauth_filesec_t xsecdst;
3640 struct vnode_attr va;
2d21ac55 3641 struct nameidata nd;
91447636
A
3642 int cmode;
3643
b0d623f7
A
3644 AUDIT_ARG(owner, uap->uid, uap->gid);
3645
91447636
A
3646 xsecdst = NULL;
3647 if ((uap->xsecurity != USER_ADDR_NULL) &&
3648 ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
3649 return ciferror;
3650
91447636
A
3651 VATTR_INIT(&va);
3652 cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3653 VATTR_SET(&va, va_mode, cmode);
3654 if (uap->uid != KAUTH_UID_NONE)
3655 VATTR_SET(&va, va_uid, uap->uid);
3656 if (uap->gid != KAUTH_GID_NONE)
3657 VATTR_SET(&va, va_gid, uap->gid);
3658 if (xsecdst != NULL)
3659 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
3660
6d2010ae
A
3661 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
3662 uap->path, vfs_context_current());
2d21ac55 3663
39236c6e
A
3664 ciferror = open1(vfs_context_current(), &nd, uap->flags, &va,
3665 fileproc_alloc_init, NULL, retval);
91447636
A
3666 if (xsecdst != NULL)
3667 kauth_filesec_free(xsecdst);
3668
3669 return ciferror;
3670}
3671
39037602 3672/*
316670eb 3673 * Go through the data-protected atomically controlled open (2)
39037602 3674 *
316670eb
A
3675 * int open_dprotected_np(user_addr_t path, int flags, int class, int dpflags, int mode)
3676 */
3677int open_dprotected_np (__unused proc_t p, struct open_dprotected_np_args *uap, int32_t *retval) {
3678 int flags = uap->flags;
3679 int class = uap->class;
3680 int dpflags = uap->dpflags;
3681
39037602 3682 /*
316670eb
A
3683 * Follow the same path as normal open(2)
3684 * Look up the item if it exists, and acquire the vnode.
3685 */
3686 struct filedesc *fdp = p->p_fd;
3687 struct vnode_attr va;
3688 struct nameidata nd;
3689 int cmode;
3690 int error;
39037602 3691
316670eb
A
3692 VATTR_INIT(&va);
3693 /* Mask off all but regular access permissions */
3694 cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3695 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
3696
3697 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
3698 uap->path, vfs_context_current());
3699
39037602
A
3700 /*
3701 * Initialize the extra fields in vnode_attr to pass down our
316670eb
A
3702 * extra fields.
3703 * 1. target cprotect class.
39037602
A
3704 * 2. set a flag to mark it as requiring open-raw-encrypted semantics.
3705 */
3706 if (flags & O_CREAT) {
3e170ce0
A
3707 /* lower level kernel code validates that the class is valid before applying it. */
3708 if (class != PROTECTION_CLASS_DEFAULT) {
3709 /*
3710 * PROTECTION_CLASS_DEFAULT implies that we make the class for this
3711 * file behave the same as open (2)
3712 */
3713 VATTR_SET(&va, va_dataprotect_class, class);
3714 }
316670eb 3715 }
39037602 3716
3e170ce0 3717 if (dpflags & (O_DP_GETRAWENCRYPTED|O_DP_GETRAWUNENCRYPTED)) {
316670eb
A
3718 if ( flags & (O_RDWR | O_WRONLY)) {
3719 /* Not allowed to write raw encrypted bytes */
39037602
A
3720 return EINVAL;
3721 }
3e170ce0
A
3722 if (uap->dpflags & O_DP_GETRAWENCRYPTED) {
3723 VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED);
3724 }
3725 if (uap->dpflags & O_DP_GETRAWUNENCRYPTED) {
3726 VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWUNENCRYPTED);
3727 }
316670eb
A
3728 }
3729
39236c6e
A
3730 error = open1(vfs_context_current(), &nd, uap->flags, &va,
3731 fileproc_alloc_init, NULL, retval);
316670eb
A
3732
3733 return error;
3734}
3735
fe8ab488
A
3736static int
3737openat_internal(vfs_context_t ctx, user_addr_t path, int flags, int mode,
3738 int fd, enum uio_seg segflg, int *retval)
2d21ac55 3739{
fe8ab488 3740 struct filedesc *fdp = (vfs_context_proc(ctx))->p_fd;
91447636 3741 struct vnode_attr va;
2d21ac55 3742 struct nameidata nd;
91447636 3743 int cmode;
1c79356b 3744
91447636
A
3745 VATTR_INIT(&va);
3746 /* Mask off all but regular access permissions */
fe8ab488 3747 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
91447636
A
3748 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
3749
fe8ab488
A
3750 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1,
3751 segflg, path, ctx);
2d21ac55 3752
fe8ab488
A
3753 return (open1at(ctx, &nd, flags, &va, fileproc_alloc_init, NULL,
3754 retval, fd));
1c79356b 3755}
91447636 3756
fe8ab488
A
3757int
3758open(proc_t p, struct open_args *uap, int32_t *retval)
3759{
3760 __pthread_testcancel(1);
3761 return(open_nocancel(p, (struct open_nocancel_args *)uap, retval));
3762}
1c79356b 3763
fe8ab488
A
3764int
3765open_nocancel(__unused proc_t p, struct open_nocancel_args *uap,
3766 int32_t *retval)
3767{
3768 return (openat_internal(vfs_context_current(), uap->path, uap->flags,
3769 uap->mode, AT_FDCWD, UIO_USERSPACE, retval));
3770}
91447636 3771
1c79356b 3772int
fe8ab488
A
3773openat_nocancel(__unused proc_t p, struct openat_nocancel_args *uap,
3774 int32_t *retval)
1c79356b 3775{
fe8ab488
A
3776 return (openat_internal(vfs_context_current(), uap->path, uap->flags,
3777 uap->mode, uap->fd, UIO_USERSPACE, retval));
3778}
91447636 3779
fe8ab488
A
3780int
3781openat(proc_t p, struct openat_args *uap, int32_t *retval)
3782{
3783 __pthread_testcancel(1);
3784 return(openat_nocancel(p, (struct openat_nocancel_args *)uap, retval));
3785}
3786
3787/*
3788 * openbyid_np: open a file given a file system id and a file system object id
3789 * the hfs file system object id is an fsobj_id_t {uint32, uint32}
3790 * file systems that don't support object ids it is a node id (uint64_t).
3791 *
3792 * Parameters: p Process requesting the open
3793 * uap User argument descriptor (see below)
3794 * retval Pointer to an area to receive the
3795 * return calue from the system call
3796 *
3797 * Indirect: uap->path Path to open (same as 'open')
3798 *
3799 * uap->fsid id of target file system
3800 * uap->objid id of target file system object
3801 * uap->flags Flags to open (same as 'open')
3802 *
3803 * Returns: 0 Success
3804 * !0 errno value
3805 *
3806 *
3807 * XXX: We should enummerate the possible errno values here, and where
3808 * in the code they originated.
3809 */
3810int
3811openbyid_np(__unused proc_t p, struct openbyid_np_args *uap, int *retval)
3812{
3813 fsid_t fsid;
3814 uint64_t objid;
3815 int error;
3816 char *buf = NULL;
3817 int buflen = MAXPATHLEN;
3818 int pathlen = 0;
3819 vfs_context_t ctx = vfs_context_current();
3820
490019cf
A
3821 if ((error = priv_check_cred(vfs_context_ucred(ctx), PRIV_VFS_OPEN_BY_ID, 0))) {
3822 return (error);
3823 }
3824
fe8ab488
A
3825 if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
3826 return (error);
3827 }
3828
3829 /*uap->obj is an fsobj_id_t defined as struct {uint32_t, uint32_t} */
3830 if ((error = copyin(uap->objid, (caddr_t)&objid, sizeof(uint64_t)))) {
3831 return (error);
3832 }
3833
3834 AUDIT_ARG(value32, fsid.val[0]);
3835 AUDIT_ARG(value64, objid);
3836
3837 /*resolve path from fsis, objid*/
3838 do {
3839 MALLOC(buf, char *, buflen + 1, M_TEMP, M_WAITOK);
3840 if (buf == NULL) {
3841 return (ENOMEM);
3842 }
3843
3844 error = fsgetpath_internal(
3845 ctx, fsid.val[0], objid,
3846 buflen, buf, &pathlen);
3847
3848 if (error) {
3849 FREE(buf, M_TEMP);
3850 buf = NULL;
3851 }
3852 } while (error == ENOSPC && (buflen += MAXPATHLEN));
3853
3854 if (error) {
3855 return error;
3856 }
3857
3858 buf[pathlen] = 0;
3859
3860 error = openat_internal(
3861 ctx, (user_addr_t)buf, uap->oflags, 0, AT_FDCWD, UIO_SYSSPACE, retval);
3862
3863 FREE(buf, M_TEMP);
3864
3865 return error;
3866}
3867
3868
3869/*
3870 * Create a special file.
3871 */
3872static int mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap);
3873
3874int
3875mknod(proc_t p, struct mknod_args *uap, __unused int32_t *retval)
3876{
3877 struct vnode_attr va;
3878 vfs_context_t ctx = vfs_context_current();
3879 int error;
3880 struct nameidata nd;
3881 vnode_t vp, dvp;
3882
3883 VATTR_INIT(&va);
3884 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
3885 VATTR_SET(&va, va_rdev, uap->dev);
91447636
A
3886
3887 /* If it's a mknod() of a FIFO, call mkfifo1() instead */
3888 if ((uap->mode & S_IFMT) == S_IFIFO)
2d21ac55 3889 return(mkfifo1(ctx, uap->path, &va));
1c79356b 3890
55e303ae 3891 AUDIT_ARG(mode, uap->mode);
b0d623f7 3892 AUDIT_ARG(value32, uap->dev);
91447636 3893
2d21ac55 3894 if ((error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
1c79356b 3895 return (error);
39037602 3896 NDINIT(&nd, CREATE, OP_MKNOD, LOCKPARENT | AUDITVNPATH1,
2d21ac55 3897 UIO_USERSPACE, uap->path, ctx);
55e303ae
A
3898 error = namei(&nd);
3899 if (error)
1c79356b 3900 return (error);
91447636 3901 dvp = nd.ni_dvp;
1c79356b 3902 vp = nd.ni_vp;
91447636
A
3903
3904 if (vp != NULL) {
1c79356b 3905 error = EEXIST;
91447636 3906 goto out;
1c79356b 3907 }
55e303ae 3908
91447636 3909 switch (uap->mode & S_IFMT) {
91447636
A
3910 case S_IFCHR:
3911 VATTR_SET(&va, va_type, VCHR);
3912 break;
3913 case S_IFBLK:
3914 VATTR_SET(&va, va_type, VBLK);
3915 break;
91447636
A
3916 default:
3917 error = EINVAL;
3918 goto out;
3919 }
2d21ac55
A
3920
3921#if CONFIG_MACF
6d2010ae
A
3922 error = mac_vnode_check_create(ctx,
3923 nd.ni_dvp, &nd.ni_cnd, &va);
3924 if (error)
3925 goto out;
2d21ac55
A
3926#endif
3927
3928 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
3929 goto out;
3930
6d2010ae 3931 if ((error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx)) != 0)
91447636
A
3932 goto out;
3933
3934 if (vp) {
3935 int update_flags = 0;
3936
3937 // Make sure the name & parent pointers are hooked up
3938 if (vp->v_name == NULL)
3939 update_flags |= VNODE_UPDATE_NAME;
3940 if (vp->v_parent == NULLVP)
3941 update_flags |= VNODE_UPDATE_PARENT;
3942
3943 if (update_flags)
3944 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
3945
2d21ac55
A
3946#if CONFIG_FSE
3947 add_fsevent(FSE_CREATE_FILE, ctx,
91447636
A
3948 FSE_ARG_VNODE, vp,
3949 FSE_ARG_DONE);
2d21ac55 3950#endif
1c79356b 3951 }
91447636
A
3952
3953out:
3954 /*
3955 * nameidone has to happen before we vnode_put(dvp)
3956 * since it may need to release the fs_nodelock on the dvp
3957 */
3958 nameidone(&nd);
3959
3960 if (vp)
3961 vnode_put(vp);
3962 vnode_put(dvp);
3963
1c79356b
A
3964 return (error);
3965}
3966
3967/*
3968 * Create a named pipe.
2d21ac55
A
3969 *
3970 * Returns: 0 Success
3971 * EEXIST
3972 * namei:???
3973 * vnode_authorize:???
3974 * vn_create:???
1c79356b 3975 */
91447636
A
3976static int
3977mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap)
1c79356b 3978{
91447636 3979 vnode_t vp, dvp;
1c79356b
A
3980 int error;
3981 struct nameidata nd;
55e303ae 3982
39037602 3983 NDINIT(&nd, CREATE, OP_MKFIFO, LOCKPARENT | AUDITVNPATH1,
91447636 3984 UIO_USERSPACE, upath, ctx);
55e303ae
A
3985 error = namei(&nd);
3986 if (error)
1c79356b 3987 return (error);
91447636
A
3988 dvp = nd.ni_dvp;
3989 vp = nd.ni_vp;
3990
3991 /* check that this is a new file and authorize addition */
3992 if (vp != NULL) {
3993 error = EEXIST;
3994 goto out;
3995 }
2d21ac55
A
3996 VATTR_SET(vap, va_type, VFIFO);
3997
6d2010ae 3998 if ((error = vn_authorize_create(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0)
2d21ac55 3999 goto out;
2d21ac55 4000
6d2010ae 4001 error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx);
91447636
A
4002out:
4003 /*
4004 * nameidone has to happen before we vnode_put(dvp)
4005 * since it may need to release the fs_nodelock on the dvp
4006 */
4007 nameidone(&nd);
4008
4009 if (vp)
4010 vnode_put(vp);
4011 vnode_put(dvp);
4012
55e303ae 4013 return error;
91447636
A
4014}
4015
0c530ab8
A
4016
4017/*
b0d623f7 4018 * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)).
0c530ab8
A
4019 *
4020 * Parameters: p Process requesting the open
4021 * uap User argument descriptor (see below)
4022 * retval (Ignored)
4023 *
4024 * Indirect: uap->path Path to fifo (same as 'mkfifo')
4025 * uap->uid UID to set
4026 * uap->gid GID to set
4027 * uap->mode File mode to set (same as 'mkfifo')
4028 * uap->xsecurity ACL to set, if creating
4029 *
4030 * Returns: 0 Success
4031 * !0 errno value
4032 *
4033 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
4034 *
4035 * XXX: We should enummerate the possible errno values here, and where
4036 * in the code they originated.
4037 */
91447636 4038int
b0d623f7 4039mkfifo_extended(proc_t p, struct mkfifo_extended_args *uap, __unused int32_t *retval)
91447636
A
4040{
4041 int ciferror;
4042 kauth_filesec_t xsecdst;
91447636
A
4043 struct vnode_attr va;
4044
b0d623f7
A
4045 AUDIT_ARG(owner, uap->uid, uap->gid);
4046
91447636
A
4047 xsecdst = KAUTH_FILESEC_NONE;
4048 if (uap->xsecurity != USER_ADDR_NULL) {
4049 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
4050 return ciferror;
4051 }
4052
91447636
A
4053 VATTR_INIT(&va);
4054 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
4055 if (uap->uid != KAUTH_UID_NONE)
4056 VATTR_SET(&va, va_uid, uap->uid);
4057 if (uap->gid != KAUTH_GID_NONE)
4058 VATTR_SET(&va, va_gid, uap->gid);
4059 if (xsecdst != KAUTH_FILESEC_NONE)
4060 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
4061
2d21ac55 4062 ciferror = mkfifo1(vfs_context_current(), uap->path, &va);
91447636
A
4063
4064 if (xsecdst != KAUTH_FILESEC_NONE)
4065 kauth_filesec_free(xsecdst);
4066 return ciferror;
4067}
4068
4069/* ARGSUSED */
4070int
b0d623f7 4071mkfifo(proc_t p, struct mkfifo_args *uap, __unused int32_t *retval)
91447636 4072{
91447636
A
4073 struct vnode_attr va;
4074
91447636
A
4075 VATTR_INIT(&va);
4076 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
4077
2d21ac55 4078 return(mkfifo1(vfs_context_current(), uap->path, &va));
1c79356b
A
4079}
4080
b0d623f7
A
4081
4082static char *
4083my_strrchr(char *p, int ch)
4084{
4085 char *save;
4086
4087 for (save = NULL;; ++p) {
4088 if (*p == ch)
4089 save = p;
4090 if (!*p)
4091 return(save);
4092 }
4093 /* NOTREACHED */
4094}
4095
4096extern int safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path);
4097
4098int
4099safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path)
4100{
4101 int ret, len = _len;
4102
4103 *truncated_path = 0;
4104 ret = vn_getpath(dvp, path, &len);
4105 if (ret == 0 && len < (MAXPATHLEN - 1)) {
4106 if (leafname) {
4107 path[len-1] = '/';
4108 len += strlcpy(&path[len], leafname, MAXPATHLEN-len) + 1;
4109 if (len > MAXPATHLEN) {
4110 char *ptr;
39037602 4111
b0d623f7
A
4112 // the string got truncated!
4113 *truncated_path = 1;
4114 ptr = my_strrchr(path, '/');
4115 if (ptr) {
4116 *ptr = '\0'; // chop off the string at the last directory component
4117 }
4118 len = strlen(path) + 1;
4119 }
4120 }
4121 } else if (ret == 0) {
4122 *truncated_path = 1;
4123 } else if (ret != 0) {
4124 struct vnode *mydvp=dvp;
4125
4126 if (ret != ENOSPC) {
4127 printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n",
4128 dvp, dvp->v_name ? dvp->v_name : "no-name", ret);
39037602 4129 }
b0d623f7 4130 *truncated_path = 1;
39037602 4131
b0d623f7
A
4132 do {
4133 if (mydvp->v_parent != NULL) {
4134 mydvp = mydvp->v_parent;
4135 } else if (mydvp->v_mount) {
4136 strlcpy(path, mydvp->v_mount->mnt_vfsstat.f_mntonname, _len);
4137 break;
4138 } else {
4139 // no parent and no mount point? only thing is to punt and say "/" changed
4140 strlcpy(path, "/", _len);
4141 len = 2;
4142 mydvp = NULL;
4143 }
39037602 4144
b0d623f7
A
4145 if (mydvp == NULL) {
4146 break;
4147 }
4148
4149 len = _len;
4150 ret = vn_getpath(mydvp, path, &len);
4151 } while (ret == ENOSPC);
4152 }
4153
4154 return len;
4155}
4156
4157
1c79356b
A
4158/*
4159 * Make a hard file link.
2d21ac55
A
4160 *
4161 * Returns: 0 Success
4162 * EPERM
4163 * EEXIST
4164 * EXDEV
4165 * namei:???
4166 * vnode_authorize:???
4167 * VNOP_LINK:???
1c79356b 4168 */
1c79356b 4169/* ARGSUSED */
fe8ab488
A
4170static int
4171linkat_internal(vfs_context_t ctx, int fd1, user_addr_t path, int fd2,
4172 user_addr_t link, int flag, enum uio_seg segflg)
1c79356b 4173{
91447636 4174 vnode_t vp, dvp, lvp;
1c79356b 4175 struct nameidata nd;
fe8ab488 4176 int follow;
1c79356b 4177 int error;
b0d623f7 4178#if CONFIG_FSE
91447636 4179 fse_info finfo;
b0d623f7 4180#endif
91447636 4181 int need_event, has_listeners;
2d21ac55 4182 char *target_path = NULL;
b0d623f7 4183 int truncated=0;
1c79356b 4184
91447636
A
4185 vp = dvp = lvp = NULLVP;
4186
4187 /* look up the object we are linking to */
fe8ab488
A
4188 follow = (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW;
4189 NDINIT(&nd, LOOKUP, OP_LOOKUP, AUDITVNPATH1 | follow,
4190 segflg, path, ctx);
4191
4192 error = nameiat(&nd, fd1);
55e303ae 4193 if (error)
1c79356b
A
4194 return (error);
4195 vp = nd.ni_vp;
91447636
A
4196
4197 nameidone(&nd);
4198
2d21ac55
A
4199 /*
4200 * Normally, linking to directories is not supported.
4201 * However, some file systems may have limited support.
4202 */
91447636 4203 if (vp->v_type == VDIR) {
39037602 4204 if (!ISSET(vp->v_mount->mnt_kern_flag, MNTK_DIR_HARDLINKS)) {
2d21ac55
A
4205 error = EPERM; /* POSIX */
4206 goto out;
4207 }
39037602 4208
2d21ac55
A
4209 /* Linking to a directory requires ownership. */
4210 if (!kauth_cred_issuser(vfs_context_ucred(ctx))) {
4211 struct vnode_attr dva;
4212
4213 VATTR_INIT(&dva);
4214 VATTR_WANTED(&dva, va_uid);
4215 if (vnode_getattr(vp, &dva, ctx) != 0 ||
4216 !VATTR_IS_SUPPORTED(&dva, va_uid) ||
4217 (dva.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)))) {
4218 error = EACCES;
4219 goto out;
4220 }
4221 }
91447636
A
4222 }
4223
91447636 4224 /* lookup the target node */
6d2010ae
A
4225#if CONFIG_TRIGGERS
4226 nd.ni_op = OP_LINK;
4227#endif
91447636 4228 nd.ni_cnd.cn_nameiop = CREATE;
2d21ac55 4229 nd.ni_cnd.cn_flags = LOCKPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK;
fe8ab488
A
4230 nd.ni_dirp = link;
4231 error = nameiat(&nd, fd2);
91447636
A
4232 if (error != 0)
4233 goto out;
4234 dvp = nd.ni_dvp;
4235 lvp = nd.ni_vp;
2d21ac55
A
4236
4237#if CONFIG_MACF
4238 if ((error = mac_vnode_check_link(ctx, dvp, vp, &nd.ni_cnd)) != 0)
4239 goto out2;
4240#endif
4241
4242 /* or to anything that kauth doesn't want us to (eg. immutable items) */
4243 if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_LINKTARGET, ctx)) != 0)
4244 goto out2;
4245
91447636
A
4246 /* target node must not exist */
4247 if (lvp != NULLVP) {
4248 error = EEXIST;
4249 goto out2;
4250 }
4251 /* cannot link across mountpoints */
4252 if (vnode_mount(vp) != vnode_mount(dvp)) {
4253 error = EXDEV;
4254 goto out2;
4255 }
39037602 4256
91447636 4257 /* authorize creation of the target note */
2d21ac55 4258 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
91447636
A
4259 goto out2;
4260
4261 /* and finally make the link */
2d21ac55 4262 error = VNOP_LINK(vp, dvp, &nd.ni_cnd, ctx);
91447636
A
4263 if (error)
4264 goto out2;
4265
39236c6e
A
4266#if CONFIG_MACF
4267 (void)mac_vnode_notify_link(ctx, vp, dvp, &nd.ni_cnd);
4268#endif
4269
2d21ac55 4270#if CONFIG_FSE
91447636 4271 need_event = need_fsevent(FSE_CREATE_FILE, dvp);
2d21ac55
A
4272#else
4273 need_event = 0;
4274#endif
91447636
A
4275 has_listeners = kauth_authorize_fileop_has_listeners();
4276
4277 if (need_event || has_listeners) {
91447636
A
4278 char *link_to_path = NULL;
4279 int len, link_name_len;
4280
4281 /* build the path to the new link file */
2d21ac55
A
4282 GET_PATH(target_path);
4283 if (target_path == NULL) {
4284 error = ENOMEM;
4285 goto out2;
4286 }
4287
b0d623f7 4288 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, target_path, MAXPATHLEN, &truncated);
91447636
A
4289
4290 if (has_listeners) {
4291 /* build the path to file we are linking to */
2d21ac55
A
4292 GET_PATH(link_to_path);
4293 if (link_to_path == NULL) {
4294 error = ENOMEM;
4295 goto out2;
4296 }
4297
91447636 4298 link_name_len = MAXPATHLEN;
fe8ab488
A
4299 if (vn_getpath(vp, link_to_path, &link_name_len) == 0) {
4300 /*
39037602 4301 * Call out to allow 3rd party notification of rename.
fe8ab488
A
4302 * Ignore result of kauth_authorize_fileop call.
4303 */
39037602
A
4304 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_LINK,
4305 (uintptr_t)link_to_path,
fe8ab488
A
4306 (uintptr_t)target_path);
4307 }
2d21ac55
A
4308 if (link_to_path != NULL) {
4309 RELEASE_PATH(link_to_path);
4310 }
91447636 4311 }
2d21ac55 4312#if CONFIG_FSE
91447636
A
4313 if (need_event) {
4314 /* construct fsevent */
2d21ac55 4315 if (get_fse_info(vp, &finfo, ctx) == 0) {
b0d623f7
A
4316 if (truncated) {
4317 finfo.mode |= FSE_TRUNCATED_PATH;
4318 }
4319
91447636 4320 // build the path to the destination of the link
2d21ac55 4321 add_fsevent(FSE_CREATE_FILE, ctx,
91447636
A
4322 FSE_ARG_STRING, len, target_path,
4323 FSE_ARG_FINFO, &finfo,
4324 FSE_ARG_DONE);
1c79356b 4325 }
b0d623f7
A
4326 if (vp->v_parent) {
4327 add_fsevent(FSE_STAT_CHANGED, ctx,
4328 FSE_ARG_VNODE, vp->v_parent,
4329 FSE_ARG_DONE);
4330 }
1c79356b 4331 }
2d21ac55 4332#endif
1c79356b 4333 }
91447636
A
4334out2:
4335 /*
4336 * nameidone has to happen before we vnode_put(dvp)
4337 * since it may need to release the fs_nodelock on the dvp
4338 */
4339 nameidone(&nd);
2d21ac55
A
4340 if (target_path != NULL) {
4341 RELEASE_PATH(target_path);
4342 }
91447636
A
4343out:
4344 if (lvp)
4345 vnode_put(lvp);
4346 if (dvp)
4347 vnode_put(dvp);
4348 vnode_put(vp);
4349 return (error);
4350}
1c79356b 4351
fe8ab488
A
4352int
4353link(__unused proc_t p, struct link_args *uap, __unused int32_t *retval)
4354{
4355 return (linkat_internal(vfs_context_current(), AT_FDCWD, uap->path,
4356 AT_FDCWD, uap->link, AT_SYMLINK_FOLLOW, UIO_USERSPACE));
4357}
4358
4359int
4360linkat(__unused proc_t p, struct linkat_args *uap, __unused int32_t *retval)
4361{
4362 if (uap->flag & ~AT_SYMLINK_FOLLOW)
4363 return (EINVAL);
4364
4365 return (linkat_internal(vfs_context_current(), uap->fd1, uap->path,
4366 uap->fd2, uap->link, uap->flag, UIO_USERSPACE));
4367}
4368
1c79356b
A
4369/*
4370 * Make a symbolic link.
91447636
A
4371 *
4372 * We could add support for ACLs here too...
1c79356b 4373 */
1c79356b 4374/* ARGSUSED */
fe8ab488
A
4375static int
4376symlinkat_internal(vfs_context_t ctx, user_addr_t path_data, int fd,
4377 user_addr_t link, enum uio_seg segflg)
1c79356b 4378{
91447636
A
4379 struct vnode_attr va;
4380 char *path;
1c79356b
A
4381 int error;
4382 struct nameidata nd;
91447636 4383 vnode_t vp, dvp;
1c79356b 4384 size_t dummy=0;
fe8ab488
A
4385 proc_t p;
4386
4387 error = 0;
4388 if (UIO_SEG_IS_USER_SPACE(segflg)) {
4389 MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
4390 error = copyinstr(path_data, path, MAXPATHLEN, &dummy);
4391 } else {
4392 path = (char *)path_data;
4393 }
91447636 4394 if (error)
1c79356b 4395 goto out;
55e303ae 4396 AUDIT_ARG(text, path); /* This is the link string */
91447636 4397
fe8ab488
A
4398 NDINIT(&nd, CREATE, OP_SYMLINK, LOCKPARENT | AUDITVNPATH1,
4399 segflg, link, ctx);
4400
4401 error = nameiat(&nd, fd);
55e303ae 4402 if (error)
1c79356b 4403 goto out;
91447636
A
4404 dvp = nd.ni_dvp;
4405 vp = nd.ni_vp;
55e303ae 4406
fe8ab488 4407 p = vfs_context_proc(ctx);
2d21ac55
A
4408 VATTR_INIT(&va);
4409 VATTR_SET(&va, va_type, VLNK);
4410 VATTR_SET(&va, va_mode, ACCESSPERMS & ~p->p_fd->fd_cmask);
fe8ab488 4411
2d21ac55
A
4412#if CONFIG_MACF
4413 error = mac_vnode_check_create(ctx,
4414 dvp, &nd.ni_cnd, &va);
4415#endif
4416 if (error != 0) {
4417 goto skipit;
4418 }
91447636 4419
2d21ac55
A
4420 if (vp != NULL) {
4421 error = EEXIST;
4422 goto skipit;
4423 }
4424
4425 /* authorize */
4426 if (error == 0)
4427 error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
4428 /* get default ownership, etc. */
4429 if (error == 0)
4430 error = vnode_authattr_new(dvp, &va, 0, ctx);
4431 if (error == 0)
4432 error = VNOP_SYMLINK(dvp, &vp, &nd.ni_cnd, &va, path, ctx);
4433
39236c6e 4434#if CONFIG_MACF
3e170ce0 4435 if (error == 0 && vp)
39236c6e
A
4436 error = vnode_label(vnode_mount(vp), dvp, vp, &nd.ni_cnd, VNODE_LABEL_CREATE, ctx);
4437#endif
4438
2d21ac55 4439 /* do fallback attribute handling */
3e170ce0 4440 if (error == 0 && vp)
2d21ac55 4441 error = vnode_setattr_fallback(vp, &va, ctx);
39236c6e 4442
2d21ac55
A
4443 if (error == 0) {
4444 int update_flags = 0;
55e303ae 4445
3e170ce0 4446 /*check if a new vnode was created, else try to get one*/
2d21ac55
A
4447 if (vp == NULL) {
4448 nd.ni_cnd.cn_nameiop = LOOKUP;
6d2010ae
A
4449#if CONFIG_TRIGGERS
4450 nd.ni_op = OP_LOOKUP;
4451#endif
2d21ac55 4452 nd.ni_cnd.cn_flags = 0;
fe8ab488 4453 error = nameiat(&nd, fd);
2d21ac55 4454 vp = nd.ni_vp;
55e303ae 4455
2d21ac55
A
4456 if (vp == NULL)
4457 goto skipit;
4458 }
fe8ab488 4459
91447636 4460#if 0 /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
fe8ab488 4461 /* call out to allow 3rd party notification of rename.
2d21ac55
A
4462 * Ignore result of kauth_authorize_fileop call.
4463 */
4464 if (kauth_authorize_fileop_has_listeners() &&
4465 namei(&nd) == 0) {
4466 char *new_link_path = NULL;
4467 int len;
fe8ab488 4468
2d21ac55
A
4469 /* build the path to the new link file */
4470 new_link_path = get_pathbuff();
4471 len = MAXPATHLEN;
4472 vn_getpath(dvp, new_link_path, &len);
4473 if ((len + 1 + nd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) {
91447636 4474 new_link_path[len - 1] = '/';
2d21ac55 4475 strlcpy(&new_link_path[len], nd.ni_cnd.cn_nameptr, MAXPATHLEN-len);
91447636 4476 }
fe8ab488
A
4477
4478 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_SYMLINK,
2d21ac55
A
4479 (uintptr_t)path, (uintptr_t)new_link_path);
4480 if (new_link_path != NULL)
4481 release_pathbuff(new_link_path);
4482 }
fe8ab488 4483#endif
2d21ac55
A
4484 // Make sure the name & parent pointers are hooked up
4485 if (vp->v_name == NULL)
4486 update_flags |= VNODE_UPDATE_NAME;
4487 if (vp->v_parent == NULLVP)
4488 update_flags |= VNODE_UPDATE_PARENT;
fe8ab488 4489
2d21ac55
A
4490 if (update_flags)
4491 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
91447636 4492
2d21ac55
A
4493#if CONFIG_FSE
4494 add_fsevent(FSE_CREATE_FILE, ctx,
4495 FSE_ARG_VNODE, vp,
4496 FSE_ARG_DONE);
4497#endif
4498 }
91447636
A
4499
4500skipit:
4501 /*
4502 * nameidone has to happen before we vnode_put(dvp)
4503 * since it may need to release the fs_nodelock on the dvp
4504 */
4505 nameidone(&nd);
4506
4507 if (vp)
4508 vnode_put(vp);
4509 vnode_put(dvp);
1c79356b 4510out:
fe8ab488
A
4511 if (path && (path != (char *)path_data))
4512 FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
91447636 4513
1c79356b
A
4514 return (error);
4515}
4516
fe8ab488
A
4517int
4518symlink(__unused proc_t p, struct symlink_args *uap, __unused int32_t *retval)
4519{
4520 return (symlinkat_internal(vfs_context_current(), uap->path, AT_FDCWD,
4521 uap->link, UIO_USERSPACE));
4522}
4523
4524int
4525symlinkat(__unused proc_t p, struct symlinkat_args *uap,
4526 __unused int32_t *retval)
4527{
4528 return (symlinkat_internal(vfs_context_current(), uap->path1, uap->fd,
4529 uap->path2, UIO_USERSPACE));
4530}
4531
1c79356b
A
4532/*
4533 * Delete a whiteout from the filesystem.
fe8ab488 4534 * No longer supported.
1c79356b 4535 */
1c79356b 4536int
fe8ab488 4537undelete(__unused proc_t p, __unused struct undelete_args *uap, __unused int32_t *retval)
1c79356b 4538{
fe8ab488 4539 return (ENOTSUP);
1c79356b
A
4540}
4541
4542/*
4543 * Delete a name from the filesystem.
4544 */
1c79356b 4545/* ARGSUSED */
fe8ab488 4546static int
c18c124e
A
4547unlinkat_internal(vfs_context_t ctx, int fd, vnode_t start_dvp,
4548 user_addr_t path_arg, enum uio_seg segflg, int unlink_flags)
1c79356b 4549{
c18c124e 4550 struct nameidata nd;
91447636 4551 vnode_t vp, dvp;
1c79356b 4552 int error;
91447636 4553 struct componentname *cnp;
2d21ac55 4554 char *path = NULL;
b0d623f7
A
4555 int len=0;
4556#if CONFIG_FSE
2d21ac55 4557 fse_info finfo;
6d2010ae 4558 struct vnode_attr va;
b0d623f7 4559#endif
c18c124e
A
4560 int flags;
4561 int need_event;
4562 int has_listeners;
4563 int truncated_path;
6d2010ae 4564 int batched;
c18c124e
A
4565 struct vnode_attr *vap;
4566 int do_retry;
4567 int retry_count = 0;
4568 int cn_flags;
4569
4570 cn_flags = LOCKPARENT;
4571 if (!(unlink_flags & VNODE_REMOVE_NO_AUDIT_PATH))
4572 cn_flags |= AUDITVNPATH1;
4573 /* If a starting dvp is passed, it trumps any fd passed. */
4574 if (start_dvp)
4575 cn_flags |= USEDVP;
6d2010ae 4576
c910b4d9
A
4577#if NAMEDRSRCFORK
4578 /* unlink or delete is allowed on rsrc forks and named streams */
c18c124e 4579 cn_flags |= CN_ALLOWRSRCFORK;
c910b4d9
A
4580#endif
4581
c18c124e
A
4582retry:
4583 do_retry = 0;
4584 flags = 0;
4585 need_event = 0;
4586 has_listeners = 0;
4587 truncated_path = 0;
4588 vap = NULL;
4589
4590 NDINIT(&nd, DELETE, OP_UNLINK, cn_flags, segflg, path_arg, ctx);
4591
4592 nd.ni_dvp = start_dvp;
4593 nd.ni_flag |= NAMEI_COMPOUNDREMOVE;
4594 cnp = &nd.ni_cnd;
91447636 4595
6d2010ae 4596lookup_continue:
c18c124e 4597 error = nameiat(&nd, fd);
2d21ac55
A
4598 if (error)
4599 return (error);
b0d623f7 4600
c18c124e
A
4601 dvp = nd.ni_dvp;
4602 vp = nd.ni_vp;
91447636 4603
6d2010ae 4604
91447636 4605 /* With Carbon delete semantics, busy files cannot be deleted */
316670eb 4606 if (unlink_flags & VNODE_REMOVE_NODELETEBUSY) {
91447636 4607 flags |= VNODE_REMOVE_NODELETEBUSY;
2d21ac55 4608 }
39037602 4609
39236c6e 4610 /* Skip any potential upcalls if told to. */
316670eb
A
4611 if (unlink_flags & VNODE_REMOVE_SKIP_NAMESPACE_EVENT) {
4612 flags |= VNODE_REMOVE_SKIP_NAMESPACE_EVENT;
4613 }
4614
6d2010ae
A
4615 if (vp) {
4616 batched = vnode_compound_remove_available(vp);
4617 /*
4618 * The root of a mounted filesystem cannot be deleted.
4619 */
4620 if (vp->v_flag & VROOT) {
4621 error = EBUSY;
4622 }
2d21ac55 4623
6d2010ae
A
4624 if (!batched) {
4625 error = vn_authorize_unlink(dvp, vp, cnp, ctx, NULL);
4626 if (error) {
3e170ce0
A
4627 if (error == ENOENT) {
4628 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
4629 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
4630 do_retry = 1;
4631 retry_count++;
4632 }
c18c124e 4633 }
6d2010ae
A
4634 goto out;
4635 }
4636 }
4637 } else {
4638 batched = 1;
2d21ac55 4639
6d2010ae
A
4640 if (!vnode_compound_remove_available(dvp)) {
4641 panic("No vp, but no compound remove?");
4642 }
4643 }
2d21ac55 4644
2d21ac55
A
4645#if CONFIG_FSE
4646 need_event = need_fsevent(FSE_DELETE, dvp);
4647 if (need_event) {
6d2010ae
A
4648 if (!batched) {
4649 if ((vp->v_flag & VISHARDLINK) == 0) {
4650 /* XXX need to get these data in batched VNOP */
4651 get_fse_info(vp, &finfo, ctx);
4652 }
4653 } else {
4654 error = vfs_get_notify_attributes(&va);
4655 if (error) {
4656 goto out;
4657 }
4658
4659 vap = &va;
2d21ac55
A
4660 }
4661 }
4662#endif
4663 has_listeners = kauth_authorize_fileop_has_listeners();
4664 if (need_event || has_listeners) {
2d21ac55 4665 if (path == NULL) {
6d2010ae
A
4666 GET_PATH(path);
4667 if (path == NULL) {
4668 error = ENOMEM;
4669 goto out;
4670 }
2d21ac55 4671 }
c18c124e 4672 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated_path);
2d21ac55
A
4673 }
4674
4675#if NAMEDRSRCFORK
c18c124e 4676 if (nd.ni_cnd.cn_flags & CN_WANTSRSRCFORK)
2d21ac55
A
4677 error = vnode_removenamedstream(dvp, vp, XATTR_RESOURCEFORK_NAME, 0, ctx);
4678 else
4679#endif
6d2010ae 4680 {
c18c124e
A
4681 error = vn_remove(dvp, &nd.ni_vp, &nd, flags, vap, ctx);
4682 vp = nd.ni_vp;
6d2010ae
A
4683 if (error == EKEEPLOOKING) {
4684 if (!batched) {
4685 panic("EKEEPLOOKING, but not a filesystem that supports compound VNOPs?");
4686 }
4687
c18c124e 4688 if ((nd.ni_flag & NAMEI_CONTLOOKUP) == 0) {
6d2010ae
A
4689 panic("EKEEPLOOKING, but continue flag not set?");
4690 }
4691
4692 if (vnode_isdir(vp)) {
4693 error = EISDIR;
4694 goto out;
4695 }
4696 goto lookup_continue;
3e170ce0
A
4697 } else if (error == ENOENT && batched) {
4698 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
4699 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
4700 /*
4701 * For compound VNOPs, the authorization callback may
4702 * return ENOENT in case of racing hardlink lookups
4703 * hitting the name cache, redrive the lookup.
4704 */
4705 do_retry = 1;
4706 retry_count += 1;
4707 goto out;
4708 }
6d2010ae
A
4709 }
4710 }
2d21ac55
A
4711
4712 /*
39037602 4713 * Call out to allow 3rd party notification of delete.
2d21ac55
A
4714 * Ignore result of kauth_authorize_fileop call.
4715 */
1c79356b 4716 if (!error) {
2d21ac55 4717 if (has_listeners) {
39037602
A
4718 kauth_authorize_fileop(vfs_context_ucred(ctx),
4719 KAUTH_FILEOP_DELETE,
2d21ac55
A
4720 (uintptr_t)vp,
4721 (uintptr_t)path);
4722 }
91447636 4723
2d21ac55
A
4724 if (vp->v_flag & VISHARDLINK) {
4725 //
4726 // if a hardlink gets deleted we want to blow away the
4727 // v_parent link because the path that got us to this
4728 // instance of the link is no longer valid. this will
4729 // force the next call to get the path to ask the file
4730 // system instead of just following the v_parent link.
4731 //
4732 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
91447636 4733 }
91447636 4734
2d21ac55
A
4735#if CONFIG_FSE
4736 if (need_event) {
4737 if (vp->v_flag & VISHARDLINK) {
4738 get_fse_info(vp, &finfo, ctx);
6d2010ae
A
4739 } else if (vap) {
4740 vnode_get_fse_info_from_vap(vp, &finfo, vap);
2d21ac55 4741 }
b0d623f7
A
4742 if (truncated_path) {
4743 finfo.mode |= FSE_TRUNCATED_PATH;
4744 }
2d21ac55
A
4745 add_fsevent(FSE_DELETE, ctx,
4746 FSE_ARG_STRING, len, path,
4747 FSE_ARG_FINFO, &finfo,
4748 FSE_ARG_DONE);
4749 }
4750#endif
1c79356b 4751 }
6d2010ae
A
4752
4753out:
2d21ac55
A
4754 if (path != NULL)
4755 RELEASE_PATH(path);
4756
c910b4d9 4757#if NAMEDRSRCFORK
39037602 4758 /* recycle the deleted rsrc fork vnode to force a reclaim, which
b0d623f7
A
4759 * will cause its shadow file to go away if necessary.
4760 */
6d2010ae
A
4761 if (vp && (vnode_isnamedstream(vp)) &&
4762 (vp->v_parent != NULLVP) &&
4763 vnode_isshadow(vp)) {
4764 vnode_recycle(vp);
39037602 4765 }
c910b4d9 4766#endif
6d2010ae
A
4767 /*
4768 * nameidone has to happen before we vnode_put(dvp)
4769 * since it may need to release the fs_nodelock on the dvp
4770 */
c18c124e 4771 nameidone(&nd);
91447636 4772 vnode_put(dvp);
6d2010ae
A
4773 if (vp) {
4774 vnode_put(vp);
4775 }
c18c124e
A
4776
4777 if (do_retry) {
4778 goto retry;
4779 }
4780
1c79356b
A
4781 return (error);
4782}
4783
fe8ab488 4784int
c18c124e
A
4785unlink1(vfs_context_t ctx, vnode_t start_dvp, user_addr_t path_arg,
4786 enum uio_seg segflg, int unlink_flags)
fe8ab488 4787{
c18c124e
A
4788 return (unlinkat_internal(ctx, AT_FDCWD, start_dvp, path_arg, segflg,
4789 unlink_flags));
fe8ab488
A
4790}
4791
1c79356b 4792/*
c18c124e 4793 * Delete a name from the filesystem using Carbon semantics.
1c79356b 4794 */
c18c124e
A
4795int
4796delete(__unused proc_t p, struct delete_args *uap, __unused int32_t *retval)
fe8ab488 4797{
c18c124e
A
4798 return (unlinkat_internal(vfs_context_current(), AT_FDCWD, NULLVP,
4799 uap->path, UIO_USERSPACE, VNODE_REMOVE_NODELETEBUSY));
fe8ab488
A
4800}
4801
c18c124e
A
4802/*
4803 * Delete a name from the filesystem using POSIX semantics.
4804 */
1c79356b 4805int
b0d623f7 4806unlink(__unused proc_t p, struct unlink_args *uap, __unused int32_t *retval)
1c79356b 4807{
c18c124e
A
4808 return (unlinkat_internal(vfs_context_current(), AT_FDCWD, NULLVP,
4809 uap->path, UIO_USERSPACE, 0));
fe8ab488 4810}
2d21ac55 4811
fe8ab488
A
4812int
4813unlinkat(__unused proc_t p, struct unlinkat_args *uap, __unused int32_t *retval)
4814{
4815 if (uap->flag & ~AT_REMOVEDIR)
4816 return (EINVAL);
4817
4818 if (uap->flag & AT_REMOVEDIR)
4819 return (rmdirat_internal(vfs_context_current(), uap->fd,
4820 uap->path, UIO_USERSPACE));
4821 else
4822 return (unlinkat_internal(vfs_context_current(), uap->fd,
c18c124e 4823 NULLVP, uap->path, UIO_USERSPACE, 0));
1c79356b
A
4824}
4825
4826/*
4827 * Reposition read/write file offset.
4828 */
1c79356b 4829int
2d21ac55 4830lseek(proc_t p, struct lseek_args *uap, off_t *retval)
1c79356b 4831{
91447636 4832 struct fileproc *fp;
2d21ac55
A
4833 vnode_t vp;
4834 struct vfs_context *ctx;
91447636 4835 off_t offset = uap->offset, file_size;
1c79356b
A
4836 int error;
4837
91447636
A
4838 if ( (error = fp_getfvp(p,uap->fd, &fp, &vp)) ) {
4839 if (error == ENOTSUP)
4840 return (ESPIPE);
1c79356b 4841 return (error);
55e303ae 4842 }
91447636
A
4843 if (vnode_isfifo(vp)) {
4844 file_drop(uap->fd);
4845 return(ESPIPE);
4846 }
2d21ac55
A
4847
4848
4849 ctx = vfs_context_current();
4850#if CONFIG_MACF
4851 if (uap->whence == L_INCR && uap->offset == 0)
4852 error = mac_file_check_get_offset(vfs_context_ucred(ctx),
4853 fp->f_fglob);
4854 else
4855 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
4856 fp->f_fglob);
4857 if (error) {
4858 file_drop(uap->fd);
4859 return (error);
4860 }
4861#endif
91447636
A
4862 if ( (error = vnode_getwithref(vp)) ) {
4863 file_drop(uap->fd);
4864 return(error);
4865 }
4866
1c79356b
A
4867 switch (uap->whence) {
4868 case L_INCR:
91447636 4869 offset += fp->f_fglob->fg_offset;
1c79356b
A
4870 break;
4871 case L_XTND:
2d21ac55 4872 if ((error = vnode_size(vp, &file_size, ctx)) != 0)
55e303ae 4873 break;
91447636 4874 offset += file_size;
1c79356b
A
4875 break;
4876 case L_SET:
1c79356b
A
4877 break;
4878 default:
55e303ae 4879 error = EINVAL;
1c79356b 4880 }
55e303ae
A
4881 if (error == 0) {
4882 if (uap->offset > 0 && offset < 0) {
4883 /* Incremented/relative move past max size */
4884 error = EOVERFLOW;
4885 } else {
4886 /*
4887 * Allow negative offsets on character devices, per
4888 * POSIX 1003.1-2001. Most likely for writing disk
4889 * labels.
4890 */
4891 if (offset < 0 && vp->v_type != VCHR) {
4892 /* Decremented/relative move before start */
4893 error = EINVAL;
4894 } else {
4895 /* Success */
91447636
A
4896 fp->f_fglob->fg_offset = offset;
4897 *retval = fp->f_fglob->fg_offset;
55e303ae
A
4898 }
4899 }
4900 }
b0d623f7 4901
39037602 4902 /*
b0d623f7
A
4903 * An lseek can affect whether data is "available to read." Use
4904 * hint of NOTE_NONE so no EVFILT_VNODE events fire
4905 */
4906 post_event_if_success(vp, error, NOTE_NONE);
91447636
A
4907 (void)vnode_put(vp);
4908 file_drop(uap->fd);
55e303ae 4909 return (error);
1c79356b
A
4910}
4911
91447636 4912
1c79356b 4913/*
91447636 4914 * Check access permissions.
2d21ac55
A
4915 *
4916 * Returns: 0 Success
4917 * vnode_authorize:???
1c79356b 4918 */
91447636
A
4919static int
4920access1(vnode_t vp, vnode_t dvp, int uflags, vfs_context_t ctx)
1c79356b 4921{
91447636 4922 kauth_action_t action;
1c79356b
A
4923 int error;
4924
91447636
A
4925 /*
4926 * If just the regular access bits, convert them to something
4927 * that vnode_authorize will understand.
4928 */
4929 if (!(uflags & _ACCESS_EXTENDED_MASK)) {
4930 action = 0;
4931 if (uflags & R_OK)
4932 action |= KAUTH_VNODE_READ_DATA; /* aka KAUTH_VNODE_LIST_DIRECTORY */
4933 if (uflags & W_OK) {
4934 if (vnode_isdir(vp)) {
4935 action |= KAUTH_VNODE_ADD_FILE |
4936 KAUTH_VNODE_ADD_SUBDIRECTORY;
4937 /* might want delete rights here too */
4938 } else {
4939 action |= KAUTH_VNODE_WRITE_DATA;
4940 }
4941 }
4942 if (uflags & X_OK) {
4943 if (vnode_isdir(vp)) {
4944 action |= KAUTH_VNODE_SEARCH;
4945 } else {
4946 action |= KAUTH_VNODE_EXECUTE;
4947 }
4948 }
4949 } else {
4950 /* take advantage of definition of uflags */
4951 action = uflags >> 8;
4952 }
39037602 4953
2d21ac55
A
4954#if CONFIG_MACF
4955 error = mac_vnode_check_access(ctx, vp, uflags);
4956 if (error)
4957 return (error);
4958#endif /* MAC */
4959
91447636
A
4960 /* action == 0 means only check for existence */
4961 if (action != 0) {
4962 error = vnode_authorize(vp, dvp, action | KAUTH_VNODE_ACCESS, ctx);
4963 } else {
4964 error = 0;
4965 }
4966
4967 return(error);
1c79356b 4968}
1c79356b 4969
91447636
A
4970
4971
2d21ac55 4972/*
b0d623f7 4973 * access_extended: Check access permissions in bulk.
2d21ac55 4974 *
b0d623f7 4975 * Description: uap->entries Pointer to an array of accessx
39037602
A
4976 * descriptor structs, plus one or
4977 * more NULL terminated strings (see
b0d623f7
A
4978 * "Notes" section below).
4979 * uap->size Size of the area pointed to by
4980 * uap->entries.
4981 * uap->results Pointer to the results array.
2d21ac55
A
4982 *
4983 * Returns: 0 Success
4984 * ENOMEM Insufficient memory
4985 * EINVAL Invalid arguments
4986 * namei:EFAULT Bad address
4987 * namei:ENAMETOOLONG Filename too long
4988 * namei:ENOENT No such file or directory
4989 * namei:ELOOP Too many levels of symbolic links
4990 * namei:EBADF Bad file descriptor
4991 * namei:ENOTDIR Not a directory
4992 * namei:???
4993 * access1:
4994 *
4995 * Implicit returns:
4996 * uap->results Array contents modified
4997 *
4998 * Notes: The uap->entries are structured as an arbitrary length array
b0d623f7 4999 * of accessx descriptors, followed by one or more NULL terminated
2d21ac55
A
5000 * strings
5001 *
5002 * struct accessx_descriptor[0]
5003 * ...
5004 * struct accessx_descriptor[n]
5005 * char name_data[0];
5006 *
5007 * We determine the entry count by walking the buffer containing
b0d623f7 5008 * the uap->entries argument descriptor. For each descriptor we
2d21ac55
A
5009 * see, the valid values for the offset ad_name_offset will be
5010 * in the byte range:
5011 *
5012 * [ uap->entries + sizeof(struct accessx_descriptor) ]
5013 * to
5014 * [ uap->entries + uap->size - 2 ]
5015 *
5016 * since we must have at least one string, and the string must
b0d623f7 5017 * be at least one character plus the NULL terminator in length.
39037602 5018 *
2d21ac55
A
5019 * XXX: Need to support the check-as uid argument
5020 */
1c79356b 5021int
b0d623f7 5022access_extended(__unused proc_t p, struct access_extended_args *uap, __unused int32_t *retval)
1c79356b 5023{
2d21ac55
A
5024 struct accessx_descriptor *input = NULL;
5025 errno_t *result = NULL;
5026 errno_t error = 0;
5027 int wantdelete = 0;
5028 unsigned int desc_max, desc_actual, i, j;
91447636 5029 struct vfs_context context;
1c79356b 5030 struct nameidata nd;
91447636 5031 int niopts;
2d21ac55
A
5032 vnode_t vp = NULL;
5033 vnode_t dvp = NULL;
5034#define ACCESSX_MAX_DESCR_ON_STACK 10
5035 struct accessx_descriptor stack_input[ACCESSX_MAX_DESCR_ON_STACK];
91447636 5036
91447636
A
5037 context.vc_ucred = NULL;
5038
2d21ac55
A
5039 /*
5040 * Validate parameters; if valid, copy the descriptor array and string
5041 * arguments into local memory. Before proceeding, the following
5042 * conditions must have been met:
5043 *
5044 * o The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
5045 * o There must be sufficient room in the request for at least one
5046 * descriptor and a one yte NUL terminated string.
5047 * o The allocation of local storage must not fail.
5048 */
91447636
A
5049 if (uap->size > ACCESSX_MAX_TABLESIZE)
5050 return(ENOMEM);
2d21ac55 5051 if (uap->size < (sizeof(struct accessx_descriptor) + 2))
91447636 5052 return(EINVAL);
2d21ac55
A
5053 if (uap->size <= sizeof (stack_input)) {
5054 input = stack_input;
5055 } else {
91447636
A
5056 MALLOC(input, struct accessx_descriptor *, uap->size, M_TEMP, M_WAITOK);
5057 if (input == NULL) {
5058 error = ENOMEM;
5059 goto out;
5060 }
2d21ac55 5061 }
91447636 5062 error = copyin(uap->entries, input, uap->size);
55e303ae 5063 if (error)
91447636 5064 goto out;
1c79356b 5065
b0d623f7
A
5066 AUDIT_ARG(opaque, input, uap->size);
5067
91447636 5068 /*
2d21ac55
A
5069 * Force NUL termination of the copyin buffer to avoid nami() running
5070 * off the end. If the caller passes us bogus data, they may get a
5071 * bogus result.
5072 */
5073 ((char *)input)[uap->size - 1] = 0;
5074
5075 /*
5076 * Access is defined as checking against the process' real identity,
5077 * even if operations are checking the effective identity. This
5078 * requires that we use a local vfs context.
91447636
A
5079 */
5080 context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
2d21ac55 5081 context.vc_thread = current_thread();
91447636
A
5082
5083 /*
2d21ac55
A
5084 * Find out how many entries we have, so we can allocate the result
5085 * array by walking the list and adjusting the count downward by the
5086 * earliest string offset we see.
91447636 5087 */
2d21ac55
A
5088 desc_max = (uap->size - 2) / sizeof(struct accessx_descriptor);
5089 desc_actual = desc_max;
5090 for (i = 0; i < desc_actual; i++) {
91447636 5091 /*
2d21ac55
A
5092 * Take the offset to the name string for this entry and
5093 * convert to an input array index, which would be one off
5094 * the end of the array if this entry was the lowest-addressed
5095 * name string.
91447636
A
5096 */
5097 j = input[i].ad_name_offset / sizeof(struct accessx_descriptor);
2d21ac55
A
5098
5099 /*
5100 * An offset greater than the max allowable offset is an error.
5101 * It is also an error for any valid entry to point
5102 * to a location prior to the end of the current entry, if
5103 * it's not a reference to the string of the previous entry.
5104 */
5105 if (j > desc_max || (j != 0 && j <= i)) {
91447636
A
5106 error = EINVAL;
5107 goto out;
5108 }
2d21ac55 5109
39037602
A
5110 /* Also do not let ad_name_offset point to something beyond the size of the input */
5111 if (input[i].ad_name_offset >= uap->size) {
5112 error = EINVAL;
5113 goto out;
5114 }
5115
2d21ac55
A
5116 /*
5117 * An offset of 0 means use the previous descriptor's offset;
5118 * this is used to chain multiple requests for the same file
5119 * to avoid multiple lookups.
5120 */
91447636 5121 if (j == 0) {
2d21ac55 5122 /* This is not valid for the first entry */
91447636
A
5123 if (i == 0) {
5124 error = EINVAL;
5125 goto out;
5126 }
5127 continue;
5128 }
2d21ac55
A
5129
5130 /*
5131 * If the offset of the string for this descriptor is before
5132 * what we believe is the current actual last descriptor,
5133 * then we need to adjust our estimate downward; this permits
5134 * the string table following the last descriptor to be out
5135 * of order relative to the descriptor list.
5136 */
5137 if (j < desc_actual)
5138 desc_actual = j;
91447636 5139 }
2d21ac55
A
5140
5141 /*
5142 * We limit the actual number of descriptors we are willing to process
5143 * to a hard maximum of ACCESSX_MAX_DESCRIPTORS. If the number being
5144 * requested does not exceed this limit,
5145 */
5146 if (desc_actual > ACCESSX_MAX_DESCRIPTORS) {
91447636
A
5147 error = ENOMEM;
5148 goto out;
5149 }
2d21ac55 5150 MALLOC(result, errno_t *, desc_actual * sizeof(errno_t), M_TEMP, M_WAITOK);
91447636
A
5151 if (result == NULL) {
5152 error = ENOMEM;
5153 goto out;
5154 }
5155
5156 /*
2d21ac55
A
5157 * Do the work by iterating over the descriptor entries we know to
5158 * at least appear to contain valid data.
91447636
A
5159 */
5160 error = 0;
2d21ac55 5161 for (i = 0; i < desc_actual; i++) {
91447636 5162 /*
2d21ac55
A
5163 * If the ad_name_offset is 0, then we use the previous
5164 * results to make the check; otherwise, we are looking up
5165 * a new file name.
91447636
A
5166 */
5167 if (input[i].ad_name_offset != 0) {
5168 /* discard old vnodes */
5169 if (vp) {
5170 vnode_put(vp);
5171 vp = NULL;
5172 }
5173 if (dvp) {
5174 vnode_put(dvp);
5175 dvp = NULL;
5176 }
39037602 5177
2d21ac55
A
5178 /*
5179 * Scan forward in the descriptor list to see if we
5180 * need the parent vnode. We will need it if we are
5181 * deleting, since we must have rights to remove
5182 * entries in the parent directory, as well as the
5183 * rights to delete the object itself.
5184 */
91447636 5185 wantdelete = input[i].ad_flags & _DELETE_OK;
2d21ac55 5186 for (j = i + 1; (j < desc_actual) && (input[j].ad_name_offset == 0); j++)
91447636
A
5187 if (input[j].ad_flags & _DELETE_OK)
5188 wantdelete = 1;
39037602 5189
91447636 5190 niopts = FOLLOW | AUDITVNPATH1;
2d21ac55 5191
91447636
A
5192 /* need parent for vnode_authorize for deletion test */
5193 if (wantdelete)
5194 niopts |= WANTPARENT;
5195
5196 /* do the lookup */
6d2010ae
A
5197 NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, UIO_SYSSPACE,
5198 CAST_USER_ADDR_T(((const char *)input) + input[i].ad_name_offset),
5199 &context);
91447636
A
5200 error = namei(&nd);
5201 if (!error) {
5202 vp = nd.ni_vp;
5203 if (wantdelete)
5204 dvp = nd.ni_dvp;
5205 }
5206 nameidone(&nd);
5207 }
5208
5209 /*
5210 * Handle lookup errors.
5211 */
5212 switch(error) {
5213 case ENOENT:
5214 case EACCES:
5215 case EPERM:
5216 case ENOTDIR:
5217 result[i] = error;
5218 break;
5219 case 0:
5220 /* run this access check */
5221 result[i] = access1(vp, dvp, input[i].ad_flags, &context);
5222 break;
5223 default:
5224 /* fatal lookup error */
5225
5226 goto out;
5227 }
5228 }
5229
b0d623f7
A
5230 AUDIT_ARG(data, result, sizeof(errno_t), desc_actual);
5231
91447636 5232 /* copy out results */
2d21ac55 5233 error = copyout(result, uap->results, desc_actual * sizeof(errno_t));
39037602 5234
91447636 5235out:
2d21ac55 5236 if (input && input != stack_input)
91447636
A
5237 FREE(input, M_TEMP);
5238 if (result)
5239 FREE(result, M_TEMP);
5240 if (vp)
5241 vnode_put(vp);
5242 if (dvp)
5243 vnode_put(dvp);
0c530ab8
A
5244 if (IS_VALID_CRED(context.vc_ucred))
5245 kauth_cred_unref(&context.vc_ucred);
91447636 5246 return(error);
1c79356b
A
5247}
5248
2d21ac55
A
5249
5250/*
5251 * Returns: 0 Success
5252 * namei:EFAULT Bad address
5253 * namei:ENAMETOOLONG Filename too long
5254 * namei:ENOENT No such file or directory
5255 * namei:ELOOP Too many levels of symbolic links
5256 * namei:EBADF Bad file descriptor
5257 * namei:ENOTDIR Not a directory
5258 * namei:???
5259 * access1:
5260 */
fe8ab488
A
5261static int
5262faccessat_internal(vfs_context_t ctx, int fd, user_addr_t path, int amode,
5263 int flag, enum uio_seg segflg)
1c79356b 5264{
1c79356b
A
5265 int error;
5266 struct nameidata nd;
91447636
A
5267 int niopts;
5268 struct vfs_context context;
cf7d32b8
A
5269#if NAMEDRSRCFORK
5270 int is_namedstream = 0;
5271#endif
5272
91447636 5273 /*
fe8ab488
A
5274 * Unless the AT_EACCESS option is used, Access is defined as checking
5275 * against the process' real identity, even if operations are checking
5276 * the effective identity. So we need to tweak the credential
5277 * in the context for that case.
91447636 5278 */
fe8ab488
A
5279 if (!(flag & AT_EACCESS))
5280 context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
5281 else
5282 context.vc_ucred = ctx->vc_ucred;
5283 context.vc_thread = ctx->vc_thread;
5284
91447636
A
5285
5286 niopts = FOLLOW | AUDITVNPATH1;
5287 /* need parent for vnode_authorize for deletion test */
fe8ab488 5288 if (amode & _DELETE_OK)
91447636 5289 niopts |= WANTPARENT;
fe8ab488
A
5290 NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, segflg,
5291 path, &context);
2d21ac55
A
5292
5293#if NAMEDRSRCFORK
5294 /* access(F_OK) calls are allowed for resource forks. */
fe8ab488 5295 if (amode == F_OK)
2d21ac55
A
5296 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
5297#endif
fe8ab488 5298 error = nameiat(&nd, fd);
91447636
A
5299 if (error)
5300 goto out;
5301
cf7d32b8 5302#if NAMEDRSRCFORK
39037602 5303 /* Grab reference on the shadow stream file vnode to
b0d623f7
A
5304 * force an inactive on release which will mark it
5305 * for recycle.
cf7d32b8
A
5306 */
5307 if (vnode_isnamedstream(nd.ni_vp) &&
b0d623f7
A
5308 (nd.ni_vp->v_parent != NULLVP) &&
5309 vnode_isshadow(nd.ni_vp)) {
cf7d32b8
A
5310 is_namedstream = 1;
5311 vnode_ref(nd.ni_vp);
5312 }
5313#endif
5314
fe8ab488 5315 error = access1(nd.ni_vp, nd.ni_dvp, amode, &context);
b0d623f7 5316
cf7d32b8
A
5317#if NAMEDRSRCFORK
5318 if (is_namedstream) {
5319 vnode_rele(nd.ni_vp);
5320 }
5321#endif
5322
91447636 5323 vnode_put(nd.ni_vp);
fe8ab488 5324 if (amode & _DELETE_OK)
91447636
A
5325 vnode_put(nd.ni_dvp);
5326 nameidone(&nd);
39037602 5327
91447636 5328out:
fe8ab488
A
5329 if (!(flag & AT_EACCESS))
5330 kauth_cred_unref(&context.vc_ucred);
5331 return (error);
5332}
5333
5334int
5335access(__unused proc_t p, struct access_args *uap, __unused int32_t *retval)
5336{
5337 return (faccessat_internal(vfs_context_current(), AT_FDCWD,
5338 uap->path, uap->flags, 0, UIO_USERSPACE));
91447636
A
5339}
5340
fe8ab488
A
5341int
5342faccessat(__unused proc_t p, struct faccessat_args *uap,
5343 __unused int32_t *retval)
5344{
5345 if (uap->flag & ~AT_EACCESS)
5346 return (EINVAL);
5347
5348 return (faccessat_internal(vfs_context_current(), uap->fd,
5349 uap->path, uap->amode, uap->flag, UIO_USERSPACE));
5350}
91447636 5351
2d21ac55
A
5352/*
5353 * Returns: 0 Success
5354 * EFAULT
5355 * copyout:EFAULT
5356 * namei:???
5357 * vn_stat:???
5358 */
91447636 5359static int
fe8ab488
A
5360fstatat_internal(vfs_context_t ctx, user_addr_t path, user_addr_t ub,
5361 user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64,
5362 enum uio_seg segflg, int fd, int flag)
91447636 5363{
fe8ab488
A
5364 struct nameidata nd;
5365 int follow;
b0d623f7
A
5366 union {
5367 struct stat sb;
5368 struct stat64 sb64;
5369 } source;
5370 union {
5371 struct user64_stat user64_sb;
5372 struct user32_stat user32_sb;
5373 struct user64_stat64 user64_sb64;
5374 struct user32_stat64 user32_sb64;
5375 } dest;
91447636
A
5376 caddr_t sbp;
5377 int error, my_size;
5378 kauth_filesec_t fsec;
5379 size_t xsecurity_bufsize;
2d21ac55 5380 void * statptr;
1c79356b 5381
fe8ab488
A
5382 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
5383 NDINIT(&nd, LOOKUP, OP_GETATTR, follow | AUDITVNPATH1,
5384 segflg, path, ctx);
5385
2d21ac55 5386#if NAMEDRSRCFORK
cf7d32b8 5387 int is_namedstream = 0;
2d21ac55 5388 /* stat calls are allowed for resource forks. */
fe8ab488 5389 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
2d21ac55 5390#endif
fe8ab488 5391 error = nameiat(&nd, fd);
91447636 5392 if (error)
1c79356b 5393 return (error);
91447636 5394 fsec = KAUTH_FILESEC_NONE;
b0d623f7
A
5395
5396 statptr = (void *)&source;
cf7d32b8
A
5397
5398#if NAMEDRSRCFORK
39037602
A
5399 /* Grab reference on the shadow stream file vnode to
5400 * force an inactive on release which will mark it
b0d623f7 5401 * for recycle.
cf7d32b8 5402 */
fe8ab488
A
5403 if (vnode_isnamedstream(nd.ni_vp) &&
5404 (nd.ni_vp->v_parent != NULLVP) &&
5405 vnode_isshadow(nd.ni_vp)) {
cf7d32b8 5406 is_namedstream = 1;
fe8ab488 5407 vnode_ref(nd.ni_vp);
cf7d32b8
A
5408 }
5409#endif
5410
fe8ab488 5411 error = vn_stat(nd.ni_vp, statptr, (xsecurity != USER_ADDR_NULL ? &fsec : NULL), isstat64, ctx);
2d21ac55
A
5412
5413#if NAMEDRSRCFORK
cf7d32b8 5414 if (is_namedstream) {
fe8ab488 5415 vnode_rele(nd.ni_vp);
2d21ac55
A
5416 }
5417#endif
fe8ab488
A
5418 vnode_put(nd.ni_vp);
5419 nameidone(&nd);
91447636 5420
1c79356b
A
5421 if (error)
5422 return (error);
91447636 5423 /* Zap spare fields */
2d21ac55 5424 if (isstat64 != 0) {
b0d623f7
A
5425 source.sb64.st_lspare = 0;
5426 source.sb64.st_qspare[0] = 0LL;
5427 source.sb64.st_qspare[1] = 0LL;
2d21ac55 5428 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
39037602 5429 munge_user64_stat64(&source.sb64, &dest.user64_sb64);
b0d623f7
A
5430 my_size = sizeof(dest.user64_sb64);
5431 sbp = (caddr_t)&dest.user64_sb64;
2d21ac55 5432 } else {
39037602 5433 munge_user32_stat64(&source.sb64, &dest.user32_sb64);
b0d623f7
A
5434 my_size = sizeof(dest.user32_sb64);
5435 sbp = (caddr_t)&dest.user32_sb64;
2d21ac55
A
5436 }
5437 /*
5438 * Check if we raced (post lookup) against the last unlink of a file.
5439 */
b0d623f7
A
5440 if ((source.sb64.st_nlink == 0) && S_ISREG(source.sb64.st_mode)) {
5441 source.sb64.st_nlink = 1;
2d21ac55
A
5442 }
5443 } else {
b0d623f7
A
5444 source.sb.st_lspare = 0;
5445 source.sb.st_qspare[0] = 0LL;
5446 source.sb.st_qspare[1] = 0LL;
2d21ac55 5447 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
39037602 5448 munge_user64_stat(&source.sb, &dest.user64_sb);
b0d623f7
A
5449 my_size = sizeof(dest.user64_sb);
5450 sbp = (caddr_t)&dest.user64_sb;
2d21ac55 5451 } else {
39037602 5452 munge_user32_stat(&source.sb, &dest.user32_sb);
b0d623f7
A
5453 my_size = sizeof(dest.user32_sb);
5454 sbp = (caddr_t)&dest.user32_sb;
2d21ac55
A
5455 }
5456
5457 /*
5458 * Check if we raced (post lookup) against the last unlink of a file.
5459 */
b0d623f7
A
5460 if ((source.sb.st_nlink == 0) && S_ISREG(source.sb.st_mode)) {
5461 source.sb.st_nlink = 1;
2d21ac55 5462 }
91447636
A
5463 }
5464 if ((error = copyout(sbp, ub, my_size)) != 0)
5465 goto out;
5466
5467 /* caller wants extended security information? */
5468 if (xsecurity != USER_ADDR_NULL) {
5469
5470 /* did we get any? */
5471 if (fsec == KAUTH_FILESEC_NONE) {
5472 if (susize(xsecurity_size, 0) != 0) {
5473 error = EFAULT;
5474 goto out;
5475 }
5476 } else {
5477 /* find the user buffer size */
5478 xsecurity_bufsize = fusize(xsecurity_size);
5479
5480 /* copy out the actual data size */
5481 if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
5482 error = EFAULT;
5483 goto out;
5484 }
5485
5486 /* if the caller supplied enough room, copy out to it */
5487 if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec))
5488 error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
5489 }
5490 }
5491out:
5492 if (fsec != KAUTH_FILESEC_NONE)
5493 kauth_filesec_free(fsec);
1c79356b
A
5494 return (error);
5495}
5496
b0d623f7
A
5497/*
5498 * stat_extended: Get file status; with extended security (ACL).
5499 *
5500 * Parameters: p (ignored)
5501 * uap User argument descriptor (see below)
39037602 5502 * retval (ignored)
b0d623f7
A
5503 *
5504 * Indirect: uap->path Path of file to get status from
5505 * uap->ub User buffer (holds file status info)
5506 * uap->xsecurity ACL to get (extended security)
5507 * uap->xsecurity_size Size of ACL
39037602 5508 *
b0d623f7
A
5509 * Returns: 0 Success
5510 * !0 errno value
5511 *
5512 */
2d21ac55 5513int
fe8ab488
A
5514stat_extended(__unused proc_t p, struct stat_extended_args *uap,
5515 __unused int32_t *retval)
2d21ac55 5516{
fe8ab488
A
5517 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5518 uap->xsecurity, uap->xsecurity_size, 0, UIO_USERSPACE, AT_FDCWD,
5519 0));
1c79356b
A
5520}
5521
2d21ac55
A
5522/*
5523 * Returns: 0 Success
fe8ab488 5524 * fstatat_internal:??? [see fstatat_internal() in this file]
2d21ac55 5525 */
91447636 5526int
b0d623f7 5527stat(__unused proc_t p, struct stat_args *uap, __unused int32_t *retval)
1c79356b 5528{
fe8ab488
A
5529 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5530 0, 0, 0, UIO_USERSPACE, AT_FDCWD, 0));
91447636 5531}
1c79356b 5532
91447636 5533int
b0d623f7 5534stat64(__unused proc_t p, struct stat64_args *uap, __unused int32_t *retval)
91447636 5535{
fe8ab488
A
5536 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5537 0, 0, 1, UIO_USERSPACE, AT_FDCWD, 0));
1c79356b 5538}
1c79356b 5539
b0d623f7
A
5540/*
5541 * stat64_extended: Get file status; can handle large inode numbers; with extended security (ACL).
5542 *
5543 * Parameters: p (ignored)
5544 * uap User argument descriptor (see below)
39037602 5545 * retval (ignored)
b0d623f7
A
5546 *
5547 * Indirect: uap->path Path of file to get status from
5548 * uap->ub User buffer (holds file status info)
5549 * uap->xsecurity ACL to get (extended security)
5550 * uap->xsecurity_size Size of ACL
39037602 5551 *
b0d623f7
A
5552 * Returns: 0 Success
5553 * !0 errno value
5554 *
5555 */
2d21ac55 5556int
b0d623f7 5557stat64_extended(__unused proc_t p, struct stat64_extended_args *uap, __unused int32_t *retval)
2d21ac55 5558{
fe8ab488
A
5559 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5560 uap->xsecurity, uap->xsecurity_size, 1, UIO_USERSPACE, AT_FDCWD,
5561 0));
2d21ac55 5562}
91447636 5563
b0d623f7
A
5564/*
5565 * lstat_extended: Get file status; does not follow links; with extended security (ACL).
5566 *
5567 * Parameters: p (ignored)
5568 * uap User argument descriptor (see below)
39037602 5569 * retval (ignored)
b0d623f7
A
5570 *
5571 * Indirect: uap->path Path of file to get status from
5572 * uap->ub User buffer (holds file status info)
5573 * uap->xsecurity ACL to get (extended security)
5574 * uap->xsecurity_size Size of ACL
39037602 5575 *
b0d623f7
A
5576 * Returns: 0 Success
5577 * !0 errno value
5578 *
5579 */
2d21ac55 5580int
b0d623f7 5581lstat_extended(__unused proc_t p, struct lstat_extended_args *uap, __unused int32_t *retval)
2d21ac55 5582{
fe8ab488
A
5583 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5584 uap->xsecurity, uap->xsecurity_size, 0, UIO_USERSPACE, AT_FDCWD,
5585 AT_SYMLINK_NOFOLLOW));
91447636
A
5586}
5587
fe8ab488
A
5588/*
5589 * Get file status; this version does not follow links.
5590 */
91447636 5591int
b0d623f7 5592lstat(__unused proc_t p, struct lstat_args *uap, __unused int32_t *retval)
91447636 5593{
fe8ab488
A
5594 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5595 0, 0, 0, UIO_USERSPACE, AT_FDCWD, AT_SYMLINK_NOFOLLOW));
2d21ac55 5596}
b0d623f7 5597
2d21ac55 5598int
b0d623f7 5599lstat64(__unused proc_t p, struct lstat64_args *uap, __unused int32_t *retval)
2d21ac55 5600{
fe8ab488
A
5601 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5602 0, 0, 1, UIO_USERSPACE, AT_FDCWD, AT_SYMLINK_NOFOLLOW));
91447636
A
5603}
5604
b0d623f7
A
5605/*
5606 * lstat64_extended: Get file status; can handle large inode numbers; does not
5607 * follow links; with extended security (ACL).
5608 *
5609 * Parameters: p (ignored)
5610 * uap User argument descriptor (see below)
39037602 5611 * retval (ignored)
b0d623f7
A
5612 *
5613 * Indirect: uap->path Path of file to get status from
5614 * uap->ub User buffer (holds file status info)
5615 * uap->xsecurity ACL to get (extended security)
5616 * uap->xsecurity_size Size of ACL
39037602 5617 *
b0d623f7
A
5618 * Returns: 0 Success
5619 * !0 errno value
5620 *
5621 */
91447636 5622int
b0d623f7 5623lstat64_extended(__unused proc_t p, struct lstat64_extended_args *uap, __unused int32_t *retval)
91447636 5624{
fe8ab488
A
5625 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5626 uap->xsecurity, uap->xsecurity_size, 1, UIO_USERSPACE, AT_FDCWD,
5627 AT_SYMLINK_NOFOLLOW));
5628}
5629
5630int
5631fstatat(__unused proc_t p, struct fstatat_args *uap, __unused int32_t *retval)
5632{
5633 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
5634 return (EINVAL);
5635
5636 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5637 0, 0, 0, UIO_USERSPACE, uap->fd, uap->flag));
5638}
5639
5640int
5641fstatat64(__unused proc_t p, struct fstatat64_args *uap,
5642 __unused int32_t *retval)
5643{
5644 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
5645 return (EINVAL);
5646
5647 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5648 0, 0, 1, UIO_USERSPACE, uap->fd, uap->flag));
91447636
A
5649}
5650
1c79356b 5651/*
91447636 5652 * Get configurable pathname variables.
2d21ac55
A
5653 *
5654 * Returns: 0 Success
5655 * namei:???
5656 * vn_pathconf:???
5657 *
5658 * Notes: Global implementation constants are intended to be
5659 * implemented in this function directly; all other constants
5660 * are per-FS implementation, and therefore must be handled in
5661 * each respective FS, instead.
5662 *
5663 * XXX We implement some things globally right now that should actually be
5664 * XXX per-FS; we will need to deal with this at some point.
1c79356b 5665 */
1c79356b
A
5666/* ARGSUSED */
5667int
b0d623f7 5668pathconf(__unused proc_t p, struct pathconf_args *uap, int32_t *retval)
1c79356b 5669{
1c79356b
A
5670 int error;
5671 struct nameidata nd;
2d21ac55 5672 vfs_context_t ctx = vfs_context_current();
91447636 5673
39037602 5674 NDINIT(&nd, LOOKUP, OP_PATHCONF, FOLLOW | AUDITVNPATH1,
2d21ac55 5675 UIO_USERSPACE, uap->path, ctx);
55e303ae
A
5676 error = namei(&nd);
5677 if (error)
1c79356b 5678 return (error);
1c79356b 5679
2d21ac55 5680 error = vn_pathconf(nd.ni_vp, uap->name, retval, ctx);
1c79356b 5681
91447636
A
5682 vnode_put(nd.ni_vp);
5683 nameidone(&nd);
1c79356b
A
5684 return (error);
5685}
5686
5687/*
5688 * Return target name of a symbolic link.
5689 */
1c79356b 5690/* ARGSUSED */
fe8ab488
A
5691static int
5692readlinkat_internal(vfs_context_t ctx, int fd, user_addr_t path,
5693 enum uio_seg seg, user_addr_t buf, size_t bufsize, enum uio_seg bufseg,
5694 int *retval)
1c79356b 5695{
2d21ac55 5696 vnode_t vp;
91447636 5697 uio_t auio;
1c79356b
A
5698 int error;
5699 struct nameidata nd;
91447636
A
5700 char uio_buf[ UIO_SIZEOF(1) ];
5701
fe8ab488
A
5702 NDINIT(&nd, LOOKUP, OP_READLINK, NOFOLLOW | AUDITVNPATH1,
5703 seg, path, ctx);
5704
5705 error = nameiat(&nd, fd);
55e303ae 5706 if (error)
1c79356b
A
5707 return (error);
5708 vp = nd.ni_vp;
91447636
A
5709
5710 nameidone(&nd);
5711
fe8ab488
A
5712 auio = uio_createwithbuffer(1, 0, bufseg, UIO_READ,
5713 &uio_buf[0], sizeof(uio_buf));
5714 uio_addiov(auio, buf, bufsize);
5715 if (vp->v_type != VLNK) {
1c79356b 5716 error = EINVAL;
fe8ab488 5717 } else {
2d21ac55 5718#if CONFIG_MACF
fe8ab488 5719 error = mac_vnode_check_readlink(ctx, vp);
2d21ac55
A
5720#endif
5721 if (error == 0)
fe8ab488
A
5722 error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA,
5723 ctx);
91447636 5724 if (error == 0)
2d21ac55 5725 error = VNOP_READLINK(vp, auio, ctx);
91447636
A
5726 }
5727 vnode_put(vp);
b0d623f7 5728
fe8ab488 5729 *retval = bufsize - (int)uio_resid(auio);
1c79356b
A
5730 return (error);
5731}
5732
fe8ab488
A
5733int
5734readlink(proc_t p, struct readlink_args *uap, int32_t *retval)
5735{
5736 enum uio_seg procseg;
5737
5738 procseg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
5739 return (readlinkat_internal(vfs_context_current(), AT_FDCWD,
5740 CAST_USER_ADDR_T(uap->path), procseg, CAST_USER_ADDR_T(uap->buf),
5741 uap->count, procseg, retval));
5742}
5743
5744int
5745readlinkat(proc_t p, struct readlinkat_args *uap, int32_t *retval)
5746{
5747 enum uio_seg procseg;
5748
5749 procseg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
5750 return (readlinkat_internal(vfs_context_current(), uap->fd, uap->path,
5751 procseg, uap->buf, uap->bufsize, procseg, retval));
5752}
5753
5754/*
5755 * Change file flags.
91447636
A
5756 */
5757static int
5758chflags1(vnode_t vp, int flags, vfs_context_t ctx)
5759{
5760 struct vnode_attr va;
5761 kauth_action_t action;
5762 int error;
5763
5764 VATTR_INIT(&va);
5765 VATTR_SET(&va, va_flags, flags);
5766
2d21ac55
A
5767#if CONFIG_MACF
5768 error = mac_vnode_check_setflags(ctx, vp, flags);
5769 if (error)
5770 goto out;
5771#endif
5772
91447636
A
5773 /* request authorisation, disregard immutability */
5774 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
5775 goto out;
5776 /*
5777 * Request that the auth layer disregard those file flags it's allowed to when
5778 * authorizing this operation; we need to do this in order to be able to
5779 * clear immutable flags.
5780 */
5781 if (action && ((error = vnode_authorize(vp, NULL, action | KAUTH_VNODE_NOIMMUTABLE, ctx)) != 0))
5782 goto out;
5783 error = vnode_setattr(vp, &va, ctx);
5784
39037602
A
5785#if CONFIG_MACF
5786 if (error == 0)
5787 mac_vnode_notify_setflags(ctx, vp, flags);
5788#endif
5789
2d21ac55
A
5790 if ((error == 0) && !VATTR_IS_SUPPORTED(&va, va_flags)) {
5791 error = ENOTSUP;
5792 }
91447636
A
5793out:
5794 vnode_put(vp);
5795 return(error);
5796}
5797
1c79356b
A
5798/*
5799 * Change flags of a file given a path name.
5800 */
1c79356b
A
5801/* ARGSUSED */
5802int
b0d623f7 5803chflags(__unused proc_t p, struct chflags_args *uap, __unused int32_t *retval)
1c79356b 5804{
2d21ac55
A
5805 vnode_t vp;
5806 vfs_context_t ctx = vfs_context_current();
1c79356b
A
5807 int error;
5808 struct nameidata nd;
5809
55e303ae 5810 AUDIT_ARG(fflags, uap->flags);
39037602 5811 NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
2d21ac55 5812 UIO_USERSPACE, uap->path, ctx);
55e303ae
A
5813 error = namei(&nd);
5814 if (error)
1c79356b
A
5815 return (error);
5816 vp = nd.ni_vp;
91447636
A
5817 nameidone(&nd);
5818
2d21ac55 5819 error = chflags1(vp, uap->flags, ctx);
91447636
A
5820
5821 return(error);
1c79356b
A
5822}
5823
5824/*
5825 * Change flags of a file given a file descriptor.
5826 */
1c79356b
A
5827/* ARGSUSED */
5828int
b0d623f7 5829fchflags(__unused proc_t p, struct fchflags_args *uap, __unused int32_t *retval)
1c79356b 5830{
2d21ac55 5831 vnode_t vp;
1c79356b
A
5832 int error;
5833
55e303ae
A
5834 AUDIT_ARG(fd, uap->fd);
5835 AUDIT_ARG(fflags, uap->flags);
91447636 5836 if ( (error = file_vnode(uap->fd, &vp)) )
1c79356b 5837 return (error);
55e303ae 5838
91447636
A
5839 if ((error = vnode_getwithref(vp))) {
5840 file_drop(uap->fd);
5841 return(error);
5842 }
e5568f75
A
5843
5844 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5845
2d21ac55 5846 error = chflags1(vp, uap->flags, vfs_context_current());
91447636
A
5847
5848 file_drop(uap->fd);
5849 return (error);
5850}
5851
5852/*
5853 * Change security information on a filesystem object.
2d21ac55
A
5854 *
5855 * Returns: 0 Success
5856 * EPERM Operation not permitted
5857 * vnode_authattr:??? [anything vnode_authattr can return]
5858 * vnode_authorize:??? [anything vnode_authorize can return]
5859 * vnode_setattr:??? [anything vnode_setattr can return]
5860 *
5861 * Notes: If vnode_authattr or vnode_authorize return EACCES, it will be
5862 * translated to EPERM before being returned.
91447636
A
5863 */
5864static int
fe8ab488 5865chmod_vnode(vfs_context_t ctx, vnode_t vp, struct vnode_attr *vap)
91447636
A
5866{
5867 kauth_action_t action;
5868 int error;
39037602 5869
b0d623f7
A
5870 AUDIT_ARG(mode, vap->va_mode);
5871 /* XXX audit new args */
91447636 5872
2d21ac55
A
5873#if NAMEDSTREAMS
5874 /* chmod calls are not allowed for resource forks. */
5875 if (vp->v_flag & VISNAMEDSTREAM) {
5876 return (EPERM);
5877 }
5878#endif
5879
5880#if CONFIG_MACF
316670eb
A
5881 if (VATTR_IS_ACTIVE(vap, va_mode) &&
5882 (error = mac_vnode_check_setmode(ctx, vp, (mode_t)vap->va_mode)) != 0)
2d21ac55 5883 return (error);
39037602
A
5884
5885 if (VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid)) {
5886 if ((error = mac_vnode_check_setowner(ctx, vp,
5887 VATTR_IS_ACTIVE(vap, va_uid) ? vap->va_uid : -1,
5888 VATTR_IS_ACTIVE(vap, va_gid) ? vap->va_gid : -1)))
5889 return (error);
5890 }
5891
5892 if (VATTR_IS_ACTIVE(vap, va_acl) &&
5893 (error = mac_vnode_check_setacl(ctx, vp, vap->va_acl)))
5894 return (error);
2d21ac55
A
5895#endif
5896
91447636
A
5897 /* make sure that the caller is allowed to set this security information */
5898 if (((error = vnode_authattr(vp, vap, &action, ctx)) != 0) ||
5899 ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
5900 if (error == EACCES)
5901 error = EPERM;
5902 return(error);
5903 }
39037602
A
5904
5905 if ((error = vnode_setattr(vp, vap, ctx)) != 0)
5906 return (error);
5907
5908#if CONFIG_MACF
5909 if (VATTR_IS_ACTIVE(vap, va_mode))
5910 mac_vnode_notify_setmode(ctx, vp, (mode_t)vap->va_mode);
5911
5912 if (VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid))
5913 mac_vnode_notify_setowner(ctx, vp,
5914 VATTR_IS_ACTIVE(vap, va_uid) ? vap->va_uid : -1,
5915 VATTR_IS_ACTIVE(vap, va_gid) ? vap->va_gid : -1);
5916
5917 if (VATTR_IS_ACTIVE(vap, va_acl))
5918 mac_vnode_notify_setacl(ctx, vp, vap->va_acl);
5919#endif
91447636 5920
1c79356b
A
5921 return (error);
5922}
5923
91447636 5924
1c79356b 5925/*
b0d623f7 5926 * Change mode of a file given a path name.
2d21ac55
A
5927 *
5928 * Returns: 0 Success
5929 * namei:??? [anything namei can return]
fe8ab488 5930 * chmod_vnode:??? [anything chmod_vnode can return]
1c79356b 5931 */
91447636 5932static int
fe8ab488
A
5933chmodat(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap,
5934 int fd, int flag, enum uio_seg segflg)
91447636
A
5935{
5936 struct nameidata nd;
fe8ab488 5937 int follow, error;
91447636 5938
fe8ab488
A
5939 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
5940 NDINIT(&nd, LOOKUP, OP_SETATTR, follow | AUDITVNPATH1,
5941 segflg, path, ctx);
5942 if ((error = nameiat(&nd, fd)))
91447636 5943 return (error);
fe8ab488 5944 error = chmod_vnode(ctx, nd.ni_vp, vap);
91447636
A
5945 vnode_put(nd.ni_vp);
5946 nameidone(&nd);
5947 return(error);
5948}
5949
0c530ab8 5950/*
39037602 5951 * chmod_extended: Change the mode of a file given a path name; with extended
b0d623f7 5952 * argument list (including extended security (ACL)).
0c530ab8
A
5953 *
5954 * Parameters: p Process requesting the open
5955 * uap User argument descriptor (see below)
5956 * retval (ignored)
5957 *
5958 * Indirect: uap->path Path to object (same as 'chmod')
5959 * uap->uid UID to set
5960 * uap->gid GID to set
5961 * uap->mode File mode to set (same as 'chmod')
5962 * uap->xsecurity ACL to set (or delete)
5963 *
5964 * Returns: 0 Success
5965 * !0 errno value
5966 *
5967 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
5968 *
5969 * XXX: We should enummerate the possible errno values here, and where
5970 * in the code they originated.
5971 */
1c79356b 5972int
b0d623f7 5973chmod_extended(__unused proc_t p, struct chmod_extended_args *uap, __unused int32_t *retval)
1c79356b 5974{
1c79356b 5975 int error;
91447636
A
5976 struct vnode_attr va;
5977 kauth_filesec_t xsecdst;
5978
b0d623f7
A
5979 AUDIT_ARG(owner, uap->uid, uap->gid);
5980
91447636
A
5981 VATTR_INIT(&va);
5982 if (uap->mode != -1)
5983 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
5984 if (uap->uid != KAUTH_UID_NONE)
5985 VATTR_SET(&va, va_uid, uap->uid);
5986 if (uap->gid != KAUTH_GID_NONE)
5987 VATTR_SET(&va, va_gid, uap->gid);
5988
5989 xsecdst = NULL;
5990 switch(uap->xsecurity) {
5991 /* explicit remove request */
5992 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
5993 VATTR_SET(&va, va_acl, NULL);
5994 break;
5995 /* not being set */
5996 case USER_ADDR_NULL:
5997 break;
5998 default:
5999 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
6000 return(error);
6001 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
6002 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va.va_acl->acl_entrycount);
6003 }
1c79356b 6004
fe8ab488
A
6005 error = chmodat(vfs_context_current(), uap->path, &va, AT_FDCWD, 0,
6006 UIO_USERSPACE);
55e303ae 6007
91447636
A
6008 if (xsecdst != NULL)
6009 kauth_filesec_free(xsecdst);
6010 return(error);
6011}
4a249263 6012
2d21ac55
A
6013/*
6014 * Returns: 0 Success
fe8ab488 6015 * chmodat:??? [anything chmodat can return]
2d21ac55 6016 */
fe8ab488
A
6017static int
6018fchmodat_internal(vfs_context_t ctx, user_addr_t path, int mode, int fd,
6019 int flag, enum uio_seg segflg)
91447636 6020{
91447636
A
6021 struct vnode_attr va;
6022
6023 VATTR_INIT(&va);
fe8ab488
A
6024 VATTR_SET(&va, va_mode, mode & ALLPERMS);
6025
6026 return (chmodat(ctx, path, &va, fd, flag, segflg));
6027}
6028
6029int
6030chmod(__unused proc_t p, struct chmod_args *uap, __unused int32_t *retval)
6031{
6032 return (fchmodat_internal(vfs_context_current(), uap->path, uap->mode,
6033 AT_FDCWD, 0, UIO_USERSPACE));
6034}
91447636 6035
fe8ab488
A
6036int
6037fchmodat(__unused proc_t p, struct fchmodat_args *uap, __unused int32_t *retval)
6038{
6039 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
6040 return (EINVAL);
6041
6042 return (fchmodat_internal(vfs_context_current(), uap->path, uap->mode,
6043 uap->fd, uap->flag, UIO_USERSPACE));
1c79356b
A
6044}
6045
6046/*
6047 * Change mode of a file given a file descriptor.
6048 */
91447636 6049static int
2d21ac55 6050fchmod1(__unused proc_t p, int fd, struct vnode_attr *vap)
1c79356b 6051{
2d21ac55 6052 vnode_t vp;
1c79356b 6053 int error;
55e303ae 6054
91447636 6055 AUDIT_ARG(fd, fd);
55e303ae 6056
91447636
A
6057 if ((error = file_vnode(fd, &vp)) != 0)
6058 return (error);
6059 if ((error = vnode_getwithref(vp)) != 0) {
6060 file_drop(fd);
6061 return(error);
6062 }
55e303ae
A
6063 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6064
fe8ab488 6065 error = chmod_vnode(vfs_context_current(), vp, vap);
91447636
A
6066 (void)vnode_put(vp);
6067 file_drop(fd);
55e303ae 6068
1c79356b
A
6069 return (error);
6070}
6071
b0d623f7
A
6072/*
6073 * fchmod_extended: Change mode of a file given a file descriptor; with
6074 * extended argument list (including extended security (ACL)).
6075 *
6076 * Parameters: p Process requesting to change file mode
6077 * uap User argument descriptor (see below)
39037602 6078 * retval (ignored)
b0d623f7
A
6079 *
6080 * Indirect: uap->mode File mode to set (same as 'chmod')
6081 * uap->uid UID to set
6082 * uap->gid GID to set
6083 * uap->xsecurity ACL to set (or delete)
6084 * uap->fd File descriptor of file to change mode
39037602 6085 *
b0d623f7
A
6086 * Returns: 0 Success
6087 * !0 errno value
6088 *
6089 */
91447636 6090int
b0d623f7 6091fchmod_extended(proc_t p, struct fchmod_extended_args *uap, __unused int32_t *retval)
91447636
A
6092{
6093 int error;
6094 struct vnode_attr va;
6095 kauth_filesec_t xsecdst;
6096
b0d623f7
A
6097 AUDIT_ARG(owner, uap->uid, uap->gid);
6098
91447636
A
6099 VATTR_INIT(&va);
6100 if (uap->mode != -1)
6101 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
6102 if (uap->uid != KAUTH_UID_NONE)
6103 VATTR_SET(&va, va_uid, uap->uid);
6104 if (uap->gid != KAUTH_GID_NONE)
6105 VATTR_SET(&va, va_gid, uap->gid);
6106
6107 xsecdst = NULL;
6108 switch(uap->xsecurity) {
6109 case USER_ADDR_NULL:
6110 VATTR_SET(&va, va_acl, NULL);
6111 break;
39236c6e
A
6112 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
6113 VATTR_SET(&va, va_acl, NULL);
6114 break;
6115 /* not being set */
91447636
A
6116 case CAST_USER_ADDR_T(-1):
6117 break;
6118 default:
6119 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
6120 return(error);
6121 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
6122 }
6123
6124 error = fchmod1(p, uap->fd, &va);
6125
39037602 6126
91447636
A
6127 switch(uap->xsecurity) {
6128 case USER_ADDR_NULL:
6129 case CAST_USER_ADDR_T(-1):
6130 break;
6131 default:
6132 if (xsecdst != NULL)
6133 kauth_filesec_free(xsecdst);
6134 }
6135 return(error);
6136}
6137
6138int
b0d623f7 6139fchmod(proc_t p, struct fchmod_args *uap, __unused int32_t *retval)
91447636
A
6140{
6141 struct vnode_attr va;
6142
6143 VATTR_INIT(&va);
6144 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
6145
6146 return(fchmod1(p, uap->fd, &va));
6147}
6148
6149
1c79356b
A
6150/*
6151 * Set ownership given a path name.
6152 */
1c79356b 6153/* ARGSUSED */
91447636 6154static int
fe8ab488
A
6155fchownat_internal(vfs_context_t ctx, int fd, user_addr_t path, uid_t uid,
6156 gid_t gid, int flag, enum uio_seg segflg)
1c79356b 6157{
2d21ac55 6158 vnode_t vp;
91447636 6159 struct vnode_attr va;
1c79356b
A
6160 int error;
6161 struct nameidata nd;
fe8ab488 6162 int follow;
91447636 6163 kauth_action_t action;
1c79356b 6164
fe8ab488 6165 AUDIT_ARG(owner, uid, gid);
55e303ae 6166
fe8ab488
A
6167 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
6168 NDINIT(&nd, LOOKUP, OP_SETATTR, follow | AUDITVNPATH1, segflg,
6169 path, ctx);
6170 error = nameiat(&nd, fd);
55e303ae 6171 if (error)
1c79356b
A
6172 return (error);
6173 vp = nd.ni_vp;
6174
91447636
A
6175 nameidone(&nd);
6176
91447636 6177 VATTR_INIT(&va);
fe8ab488
A
6178 if (uid != (uid_t)VNOVAL)
6179 VATTR_SET(&va, va_uid, uid);
6180 if (gid != (gid_t)VNOVAL)
6181 VATTR_SET(&va, va_gid, gid);
91447636 6182
2d21ac55 6183#if CONFIG_MACF
fe8ab488 6184 error = mac_vnode_check_setowner(ctx, vp, uid, gid);
2d21ac55
A
6185 if (error)
6186 goto out;
6187#endif
6188
91447636
A
6189 /* preflight and authorize attribute changes */
6190 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
6191 goto out;
6192 if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
6193 goto out;
6194 error = vnode_setattr(vp, &va, ctx);
39037602
A
6195
6196#if CONFIG_MACF
6197 if (error == 0)
6198 mac_vnode_notify_setowner(ctx, vp, uid, gid);
6199#endif
6200
91447636
A
6201out:
6202 /*
6203 * EACCES is only allowed from namei(); permissions failure should
6204 * return EPERM, so we need to translate the error code.
6205 */
6206 if (error == EACCES)
6207 error = EPERM;
fe8ab488 6208
91447636 6209 vnode_put(vp);
1c79356b
A
6210 return (error);
6211}
6212
91447636 6213int
fe8ab488 6214chown(__unused proc_t p, struct chown_args *uap, __unused int32_t *retval)
91447636 6215{
fe8ab488
A
6216 return (fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path,
6217 uap->uid, uap->gid, 0, UIO_USERSPACE));
91447636
A
6218}
6219
6220int
fe8ab488 6221lchown(__unused proc_t p, struct lchown_args *uap, __unused int32_t *retval)
91447636 6222{
fe8ab488
A
6223 return (fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path,
6224 uap->owner, uap->group, AT_SYMLINK_NOFOLLOW, UIO_USERSPACE));
6225}
6226
6227int
6228fchownat(__unused proc_t p, struct fchownat_args *uap, __unused int32_t *retval)
6229{
6230 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
6231 return (EINVAL);
6232
6233 return (fchownat_internal(vfs_context_current(), uap->fd, uap->path,
6234 uap->uid, uap->gid, uap->flag, UIO_USERSPACE));
91447636
A
6235}
6236
1c79356b
A
6237/*
6238 * Set ownership given a file descriptor.
6239 */
1c79356b
A
6240/* ARGSUSED */
6241int
b0d623f7 6242fchown(__unused proc_t p, struct fchown_args *uap, __unused int32_t *retval)
1c79356b 6243{
91447636 6244 struct vnode_attr va;
2d21ac55
A
6245 vfs_context_t ctx = vfs_context_current();
6246 vnode_t vp;
1c79356b 6247 int error;
91447636 6248 kauth_action_t action;
1c79356b 6249
55e303ae
A
6250 AUDIT_ARG(owner, uap->uid, uap->gid);
6251 AUDIT_ARG(fd, uap->fd);
6252
91447636 6253 if ( (error = file_vnode(uap->fd, &vp)) )
1c79356b 6254 return (error);
55e303ae 6255
91447636
A
6256 if ( (error = vnode_getwithref(vp)) ) {
6257 file_drop(uap->fd);
6258 return(error);
6259 }
55e303ae
A
6260 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6261
91447636
A
6262 VATTR_INIT(&va);
6263 if (uap->uid != VNOVAL)
6264 VATTR_SET(&va, va_uid, uap->uid);
6265 if (uap->gid != VNOVAL)
6266 VATTR_SET(&va, va_gid, uap->gid);
6267
2d21ac55
A
6268#if NAMEDSTREAMS
6269 /* chown calls are not allowed for resource forks. */
6270 if (vp->v_flag & VISNAMEDSTREAM) {
6271 error = EPERM;
6272 goto out;
6273 }
6274#endif
6275
6276#if CONFIG_MACF
6277 error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
6278 if (error)
6279 goto out;
6280#endif
91447636
A
6281
6282 /* preflight and authorize attribute changes */
2d21ac55 6283 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
91447636 6284 goto out;
2d21ac55 6285 if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
91447636
A
6286 if (error == EACCES)
6287 error = EPERM;
6288 goto out;
6289 }
2d21ac55 6290 error = vnode_setattr(vp, &va, ctx);
4a249263 6291
39037602
A
6292#if CONFIG_MACF
6293 if (error == 0)
6294 mac_vnode_notify_setowner(ctx, vp, uap->uid, uap->gid);
6295#endif
6296
91447636
A
6297out:
6298 (void)vnode_put(vp);
6299 file_drop(uap->fd);
1c79356b
A
6300 return (error);
6301}
6302
9bccf70c 6303static int
2d21ac55 6304getutimes(user_addr_t usrtvp, struct timespec *tsp)
9bccf70c 6305{
9bccf70c
A
6306 int error;
6307
91447636
A
6308 if (usrtvp == USER_ADDR_NULL) {
6309 struct timeval old_tv;
6310 /* XXX Y2038 bug because of microtime argument */
6311 microtime(&old_tv);
6312 TIMEVAL_TO_TIMESPEC(&old_tv, &tsp[0]);
9bccf70c
A
6313 tsp[1] = tsp[0];
6314 } else {
91447636 6315 if (IS_64BIT_PROCESS(current_proc())) {
b0d623f7 6316 struct user64_timeval tv[2];
91447636 6317 error = copyin(usrtvp, (void *)tv, sizeof(tv));
b0d623f7
A
6318 if (error)
6319 return (error);
6320 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
6321 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
91447636 6322 } else {
b0d623f7
A
6323 struct user32_timeval tv[2];
6324 error = copyin(usrtvp, (void *)tv, sizeof(tv));
6325 if (error)
6326 return (error);
6327 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
6328 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
91447636 6329 }
9bccf70c
A
6330 }
6331 return 0;
6332}
6333
6334static int
2d21ac55 6335setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts,
91447636 6336 int nullflag)
9bccf70c
A
6337{
6338 int error;
91447636
A
6339 struct vnode_attr va;
6340 kauth_action_t action;
e5568f75
A
6341
6342 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6343
91447636
A
6344 VATTR_INIT(&va);
6345 VATTR_SET(&va, va_access_time, ts[0]);
6346 VATTR_SET(&va, va_modify_time, ts[1]);
9bccf70c 6347 if (nullflag)
91447636
A
6348 va.va_vaflags |= VA_UTIMES_NULL;
6349
2d21ac55
A
6350#if NAMEDSTREAMS
6351 /* utimes calls are not allowed for resource forks. */
6352 if (vp->v_flag & VISNAMEDSTREAM) {
6353 error = EPERM;
6354 goto out;
6355 }
6356#endif
6357
6358#if CONFIG_MACF
6359 error = mac_vnode_check_setutimes(ctx, vp, ts[0], ts[1]);
6360 if (error)
6361 goto out;
6362#endif
6363 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
6364 if (!nullflag && error == EACCES)
6365 error = EPERM;
91447636 6366 goto out;
2d21ac55
A
6367 }
6368
91447636 6369 /* since we may not need to auth anything, check here */
2d21ac55
A
6370 if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
6371 if (!nullflag && error == EACCES)
6372 error = EPERM;
91447636 6373 goto out;
2d21ac55 6374 }
91447636 6375 error = vnode_setattr(vp, &va, ctx);
4a249263 6376
39037602
A
6377#if CONFIG_MACF
6378 if (error == 0)
6379 mac_vnode_notify_setutimes(ctx, vp, ts[0], ts[1]);
6380#endif
6381
9bccf70c
A
6382out:
6383 return error;
6384}
6385
1c79356b
A
6386/*
6387 * Set the access and modification times of a file.
6388 */
1c79356b
A
6389/* ARGSUSED */
6390int
b0d623f7 6391utimes(__unused proc_t p, struct utimes_args *uap, __unused int32_t *retval)
1c79356b 6392{
9bccf70c 6393 struct timespec ts[2];
91447636 6394 user_addr_t usrtvp;
1c79356b
A
6395 int error;
6396 struct nameidata nd;
2d21ac55 6397 vfs_context_t ctx = vfs_context_current();
1c79356b 6398
2d21ac55 6399 /*
39037602 6400 * AUDIT: Needed to change the order of operations to do the
55e303ae
A
6401 * name lookup first because auditing wants the path.
6402 */
39037602 6403 NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
2d21ac55 6404 UIO_USERSPACE, uap->path, ctx);
55e303ae
A
6405 error = namei(&nd);
6406 if (error)
9bccf70c 6407 return (error);
91447636 6408 nameidone(&nd);
55e303ae 6409
91447636
A
6410 /*
6411 * Fetch the user-supplied time. If usrtvp is USER_ADDR_NULL, we fetch
6412 * the current time instead.
6413 */
55e303ae 6414 usrtvp = uap->tptr;
91447636
A
6415 if ((error = getutimes(usrtvp, ts)) != 0)
6416 goto out;
6417
2d21ac55 6418 error = setutimes(ctx, nd.ni_vp, ts, usrtvp == USER_ADDR_NULL);
91447636
A
6419
6420out:
6421 vnode_put(nd.ni_vp);
1c79356b
A
6422 return (error);
6423}
6424
9bccf70c
A
6425/*
6426 * Set the access and modification times of a file.
6427 */
9bccf70c
A
6428/* ARGSUSED */
6429int
b0d623f7 6430futimes(__unused proc_t p, struct futimes_args *uap, __unused int32_t *retval)
9bccf70c
A
6431{
6432 struct timespec ts[2];
2d21ac55 6433 vnode_t vp;
91447636 6434 user_addr_t usrtvp;
9bccf70c
A
6435 int error;
6436
55e303ae 6437 AUDIT_ARG(fd, uap->fd);
9bccf70c
A
6438 usrtvp = uap->tptr;
6439 if ((error = getutimes(usrtvp, ts)) != 0)
6440 return (error);
91447636 6441 if ((error = file_vnode(uap->fd, &vp)) != 0)
9bccf70c 6442 return (error);
91447636
A
6443 if((error = vnode_getwithref(vp))) {
6444 file_drop(uap->fd);
6445 return(error);
6446 }
55e303ae 6447
2d21ac55 6448 error = setutimes(vfs_context_current(), vp, ts, usrtvp == 0);
91447636
A
6449 vnode_put(vp);
6450 file_drop(uap->fd);
6451 return(error);
9bccf70c
A
6452}
6453
1c79356b
A
6454/*
6455 * Truncate a file given its path name.
6456 */
1c79356b
A
6457/* ARGSUSED */
6458int
b0d623f7 6459truncate(__unused proc_t p, struct truncate_args *uap, __unused int32_t *retval)
1c79356b 6460{
2d21ac55 6461 vnode_t vp;
91447636 6462 struct vnode_attr va;
2d21ac55 6463 vfs_context_t ctx = vfs_context_current();
1c79356b
A
6464 int error;
6465 struct nameidata nd;
91447636
A
6466 kauth_action_t action;
6467
0b4e3aa0
A
6468 if (uap->length < 0)
6469 return(EINVAL);
39037602 6470 NDINIT(&nd, LOOKUP, OP_TRUNCATE, FOLLOW | AUDITVNPATH1,
2d21ac55 6471 UIO_USERSPACE, uap->path, ctx);
91447636 6472 if ((error = namei(&nd)))
1c79356b
A
6473 return (error);
6474 vp = nd.ni_vp;
91447636
A
6475
6476 nameidone(&nd);
6477
6478 VATTR_INIT(&va);
6479 VATTR_SET(&va, va_data_size, uap->length);
2d21ac55
A
6480
6481#if CONFIG_MACF
6482 error = mac_vnode_check_truncate(ctx, NOCRED, vp);
6483 if (error)
6484 goto out;
6485#endif
6486
6487 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
91447636 6488 goto out;
2d21ac55 6489 if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
91447636 6490 goto out;
2d21ac55 6491 error = vnode_setattr(vp, &va, ctx);
39037602
A
6492
6493#if CONFIG_MACF
6494 if (error == 0)
6495 mac_vnode_notify_truncate(ctx, NOCRED, vp);
6496#endif
6497
91447636
A
6498out:
6499 vnode_put(vp);
1c79356b
A
6500 return (error);
6501}
6502
6503/*
6504 * Truncate a file given a file descriptor.
6505 */
1c79356b
A
6506/* ARGSUSED */
6507int
b0d623f7 6508ftruncate(proc_t p, struct ftruncate_args *uap, int32_t *retval)
1c79356b 6509{
2d21ac55 6510 vfs_context_t ctx = vfs_context_current();
91447636 6511 struct vnode_attr va;
2d21ac55 6512 vnode_t vp;
91447636
A
6513 struct fileproc *fp;
6514 int error ;
6515 int fd = uap->fd;
1c79356b 6516
55e303ae 6517 AUDIT_ARG(fd, uap->fd);
0b4e3aa0
A
6518 if (uap->length < 0)
6519 return(EINVAL);
39037602 6520
91447636
A
6521 if ( (error = fp_lookup(p,fd,&fp,0)) ) {
6522 return(error);
6523 }
1c79356b 6524
39236c6e
A
6525 switch (FILEGLOB_DTYPE(fp->f_fglob)) {
6526 case DTYPE_PSXSHM:
91447636
A
6527 error = pshm_truncate(p, fp, uap->fd, uap->length, retval);
6528 goto out;
39236c6e
A
6529 case DTYPE_VNODE:
6530 break;
6531 default:
91447636
A
6532 error = EINVAL;
6533 goto out;
1c79356b 6534 }
1c79356b 6535
2d21ac55 6536 vp = (vnode_t)fp->f_fglob->fg_data;
e5568f75 6537
91447636
A
6538 if ((fp->f_fglob->fg_flag & FWRITE) == 0) {
6539 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
6540 error = EINVAL;
6541 goto out;
1c79356b 6542 }
1c79356b 6543
91447636
A
6544 if ((error = vnode_getwithref(vp)) != 0) {
6545 goto out;
6546 }
1c79356b 6547
91447636 6548 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
1c79356b 6549
2d21ac55
A
6550#if CONFIG_MACF
6551 error = mac_vnode_check_truncate(ctx,
6552 fp->f_fglob->fg_cred, vp);
6553 if (error) {
6554 (void)vnode_put(vp);
6555 goto out;
6556 }
6557#endif
91447636
A
6558 VATTR_INIT(&va);
6559 VATTR_SET(&va, va_data_size, uap->length);
2d21ac55 6560 error = vnode_setattr(vp, &va, ctx);
39037602
A
6561
6562#if CONFIG_MACF
6563 if (error == 0)
6564 mac_vnode_notify_truncate(ctx, fp->f_fglob->fg_cred, vp);
6565#endif
6566
91447636
A
6567 (void)vnode_put(vp);
6568out:
6569 file_drop(fd);
6570 return (error);
1c79356b 6571}
91447636 6572
1c79356b
A
6573
6574/*
b0d623f7 6575 * Sync an open file with synchronized I/O _file_ integrity completion
1c79356b 6576 */
1c79356b
A
6577/* ARGSUSED */
6578int
b0d623f7 6579fsync(proc_t p, struct fsync_args *uap, __unused int32_t *retval)
1c79356b 6580{
2d21ac55 6581 __pthread_testcancel(1);
b0d623f7
A
6582 return(fsync_common(p, uap, MNT_WAIT));
6583}
6584
6585
6586/*
6587 * Sync an open file with synchronized I/O _file_ integrity completion
6588 *
6589 * Notes: This is a legacy support function that does not test for
6590 * thread cancellation points.
6591 */
6592/* ARGSUSED */
39037602 6593int
b0d623f7
A
6594fsync_nocancel(proc_t p, struct fsync_nocancel_args *uap, __unused int32_t *retval)
6595{
6596 return(fsync_common(p, (struct fsync_args *)uap, MNT_WAIT));
2d21ac55
A
6597}
6598
b0d623f7
A
6599
6600/*
6601 * Sync an open file with synchronized I/O _data_ integrity completion
6602 */
6603/* ARGSUSED */
2d21ac55 6604int
b0d623f7
A
6605fdatasync(proc_t p, struct fdatasync_args *uap, __unused int32_t *retval)
6606{
6607 __pthread_testcancel(1);
6608 return(fsync_common(p, (struct fsync_args *)uap, MNT_DWAIT));
6609}
6610
6611
6612/*
6613 * fsync_common
6614 *
6615 * Common fsync code to support both synchronized I/O file integrity completion
6616 * (normal fsync) and synchronized I/O data integrity completion (fdatasync).
6617 *
6618 * If 'flags' is MNT_DWAIT, the caller is requesting data integrity, which
6619 * will only guarantee that the file data contents are retrievable. If
6620 * 'flags' is MNT_WAIT, the caller is rewuesting file integrity, which also
6621 * includes additional metadata unnecessary for retrieving the file data
6622 * contents, such as atime, mtime, ctime, etc., also be committed to stable
6623 * storage.
6624 *
6625 * Parameters: p The process
6626 * uap->fd The descriptor to synchronize
6627 * flags The data integrity flags
6628 *
6629 * Returns: int Success
6630 * fp_getfvp:EBADF Bad file descriptor
6631 * fp_getfvp:ENOTSUP fd does not refer to a vnode
6632 * VNOP_FSYNC:??? unspecified
6633 *
6634 * Notes: We use struct fsync_args because it is a short name, and all
6635 * caller argument structures are otherwise identical.
6636 */
6637static int
6638fsync_common(proc_t p, struct fsync_args *uap, int flags)
2d21ac55
A
6639{
6640 vnode_t vp;
91447636 6641 struct fileproc *fp;
2d21ac55 6642 vfs_context_t ctx = vfs_context_current();
1c79356b
A
6643 int error;
6644
b0d623f7
A
6645 AUDIT_ARG(fd, uap->fd);
6646
91447636 6647 if ( (error = fp_getfvp(p, uap->fd, &fp, &vp)) )
1c79356b 6648 return (error);
91447636
A
6649 if ( (error = vnode_getwithref(vp)) ) {
6650 file_drop(uap->fd);
6651 return(error);
6652 }
91447636 6653
b0d623f7
A
6654 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6655
6656 error = VNOP_FSYNC(vp, flags, ctx);
2d21ac55
A
6657
6658#if NAMEDRSRCFORK
6659 /* Sync resource fork shadow file if necessary. */
6660 if ((error == 0) &&
39037602 6661 (vp->v_flag & VISNAMEDSTREAM) &&
2d21ac55 6662 (vp->v_parent != NULLVP) &&
b0d623f7 6663 vnode_isshadow(vp) &&
2d21ac55
A
6664 (fp->f_flags & FP_WRITTEN)) {
6665 (void) vnode_flushnamedstream(vp->v_parent, vp, ctx);
6666 }
6667#endif
91447636
A
6668
6669 (void)vnode_put(vp);
6670 file_drop(uap->fd);
1c79356b
A
6671 return (error);
6672}
6673
6674/*
39037602 6675 * Duplicate files. Source must be a file, target must be a file or
1c79356b 6676 * must not exist.
91447636
A
6677 *
6678 * XXX Copyfile authorisation checking is woefully inadequate, and will not
6679 * perform inheritance correctly.
1c79356b 6680 */
1c79356b
A
6681/* ARGSUSED */
6682int
b0d623f7 6683copyfile(__unused proc_t p, struct copyfile_args *uap, __unused int32_t *retval)
1c79356b 6684{
91447636 6685 vnode_t tvp, fvp, tdvp, sdvp;
1c79356b
A
6686 struct nameidata fromnd, tond;
6687 int error;
2d21ac55 6688 vfs_context_t ctx = vfs_context_current();
39037602
A
6689#if CONFIG_MACF
6690 struct filedesc *fdp = (vfs_context_proc(ctx))->p_fd;
6691 struct vnode_attr va;
6692#endif
55e303ae
A
6693
6694 /* Check that the flags are valid. */
1c79356b
A
6695
6696 if (uap->flags & ~CPF_MASK) {
55e303ae
A
6697 return(EINVAL);
6698 }
1c79356b 6699
4bd07ac2 6700 NDINIT(&fromnd, LOOKUP, OP_COPYFILE, AUDITVNPATH1,
2d21ac55 6701 UIO_USERSPACE, uap->from, ctx);
91447636 6702 if ((error = namei(&fromnd)))
1c79356b
A
6703 return (error);
6704 fvp = fromnd.ni_vp;
6705
6d2010ae
A
6706 NDINIT(&tond, CREATE, OP_LINK,
6707 LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNPATH2 | CN_NBMOUNTLOOK,
6708 UIO_USERSPACE, uap->to, ctx);
91447636 6709 if ((error = namei(&tond))) {
1c79356b
A
6710 goto out1;
6711 }
6712 tdvp = tond.ni_dvp;
6713 tvp = tond.ni_vp;
91447636 6714
1c79356b
A
6715 if (tvp != NULL) {
6716 if (!(uap->flags & CPF_OVERWRITE)) {
6717 error = EEXIST;
6718 goto out;
6719 }
6720 }
39037602 6721
1c79356b
A
6722 if (fvp->v_type == VDIR || (tvp && tvp->v_type == VDIR)) {
6723 error = EISDIR;
6724 goto out;
6725 }
6726
39037602
A
6727 /* This calls existing MAC hooks for open */
6728 if ((error = vn_authorize_open_existing(fvp, &fromnd.ni_cnd, FREAD, ctx,
6729 NULL))) {
6730 goto out;
6731 }
6732
6733 if (tvp) {
6734 /*
6735 * See unlinkat_internal for an explanation of the potential
6736 * ENOENT from the MAC hook but the gist is that the MAC hook
6737 * can fail because vn_getpath isn't able to return the full
6738 * path. We choose to ignore this failure.
6739 */
6740 error = vn_authorize_unlink(tdvp, tvp, &tond.ni_cnd, ctx, NULL);
6741 if (error && error != ENOENT)
6742 goto out;
6743 error = 0;
6744 }
6745
6746#if CONFIG_MACF
6747 VATTR_INIT(&va);
6748 VATTR_SET(&va, va_type, fvp->v_type);
6749 /* Mask off all but regular access permissions */
6750 VATTR_SET(&va, va_mode,
6751 ((((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT) & ACCESSPERMS));
6752 error = mac_vnode_check_create(ctx, tdvp, &tond.ni_cnd, &va);
6753 if (error)
6754 goto out;
6755#endif /* CONFIG_MACF */
6756
2d21ac55 6757 if ((error = vnode_authorize(tdvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
1c79356b
A
6758 goto out;
6759
6760 if (fvp == tdvp)
6761 error = EINVAL;
6762 /*
6763 * If source is the same as the destination (that is the
6764 * same inode number) then there is nothing to do.
6765 * (fixed to have POSIX semantics - CSM 3/2/98)
6766 */
6767 if (fvp == tvp)
6768 error = -1;
91447636 6769 if (!error)
2d21ac55 6770 error = VNOP_COPYFILE(fvp, tdvp, tvp, &tond.ni_cnd, uap->mode, uap->flags, ctx);
1c79356b 6771out:
91447636
A
6772 sdvp = tond.ni_startdir;
6773 /*
6774 * nameidone has to happen before we vnode_put(tdvp)
6775 * since it may need to release the fs_nodelock on the tdvp
6776 */
6777 nameidone(&tond);
6778
6779 if (tvp)
6780 vnode_put(tvp);
6781 vnode_put(tdvp);
6782 vnode_put(sdvp);
1c79356b 6783out1:
91447636
A
6784 vnode_put(fvp);
6785
91447636
A
6786 nameidone(&fromnd);
6787
1c79356b
A
6788 if (error == -1)
6789 return (0);
6790 return (error);
6791}
6792
39037602 6793#define CLONE_SNAPSHOT_FALLBACKS_ENABLED 1
91447636 6794
1c79356b 6795/*
39037602
A
6796 * Helper function for doing clones. The caller is expected to provide an
6797 * iocounted source vnode and release it.
1c79356b 6798 */
fe8ab488 6799static int
39037602
A
6800clonefile_internal(vnode_t fvp, boolean_t data_read_authorised, int dst_dirfd,
6801 user_addr_t dst, uint32_t flags, vfs_context_t ctx)
1c79356b 6802{
91447636 6803 vnode_t tvp, tdvp;
39037602 6804 struct nameidata tond;
1c79356b 6805 int error;
39037602
A
6806 int follow;
6807 boolean_t free_acl;
6808 boolean_t attr_cleanup;
6809 enum vtype v_type;
6810 kauth_action_t action;
6811 struct componentname *cnp;
6812 uint32_t defaulted;
6813 struct vnode_attr va;
316670eb 6814
39037602
A
6815 v_type = vnode_vtype(fvp);
6816 switch (v_type) {
6817 case VLNK:
6818 /* FALLTHRU */
6819 case VREG:
6820 action = KAUTH_VNODE_ADD_FILE;
6821 break;
6822 case VDIR:
6823 if (vnode_isvroot(fvp) || vnode_ismount(fvp) ||
6824 fvp->v_mountedhere) {
6825 return (EINVAL);
6826 }
6827 action = KAUTH_VNODE_ADD_SUBDIRECTORY;
6828 break;
6829 default:
6830 return (EINVAL);
6831 }
6832
6833 AUDIT_ARG(fd2, dst_dirfd);
6834 AUDIT_ARG(value32, flags);
6835
6836 follow = (flags & CLONE_NOFOLLOW) ? NOFOLLOW : FOLLOW;
6837 NDINIT(&tond, CREATE, OP_LINK, follow | WANTPARENT | AUDITVNPATH2,
6838 UIO_USERSPACE, dst, ctx);
6839 if ((error = nameiat(&tond, dst_dirfd)))
6840 return (error);
6841 cnp = &tond.ni_cnd;
6842 tdvp = tond.ni_dvp;
6843 tvp = tond.ni_vp;
6844
6845 free_acl = FALSE;
6846 attr_cleanup = FALSE;
6847
6848 if (tvp != NULL) {
6849 error = EEXIST;
6850 goto out;
6851 }
6852
6853 if (vnode_mount(tdvp) != vnode_mount(fvp)) {
6854 error = EXDEV;
6855 goto out;
6856 }
6857
6858#if CONFIG_MACF
6859 if ((error = mac_vnode_check_clone(ctx, tdvp, fvp, cnp)))
6860 goto out;
6861#endif
6862 if ((error = vnode_authorize(tdvp, NULL, action, ctx)))
6863 goto out;
6864
6865 action = KAUTH_VNODE_GENERIC_READ_BITS;
6866 if (data_read_authorised)
6867 action &= ~KAUTH_VNODE_READ_DATA;
6868 if ((error = vnode_authorize(fvp, NULL, action, ctx)))
6869 goto out;
6870
6871 /*
6872 * certain attributes may need to be changed from the source, we ask for
6873 * those here.
6874 */
6875 VATTR_INIT(&va);
6876 VATTR_WANTED(&va, va_type);
6877 VATTR_WANTED(&va, va_mode);
6878 VATTR_WANTED(&va, va_flags);
6879 VATTR_WANTED(&va, va_acl);
6880
6881 if ((error = vnode_getattr(fvp, &va, ctx)) != 0)
6882 goto out;
6883
6884 if (!VATTR_IS_SUPPORTED(&va, va_acl))
6885 VATTR_CLEAR_ACTIVE(&va, va_acl);
6886 else if (va.va_acl != NULL)
6887 free_acl = TRUE;
6888
6889 if (!VATTR_IS_SUPPORTED(&va, va_mode)) {
6890 VATTR_CLEAR_ACTIVE(&va, va_mode);
6891 } else {
6892 proc_t p = vfs_context_proc(ctx);
6893
6894 VATTR_SET(&va, va_mode,
6895 (va.va_mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
6896 }
6897
6898 if (!VATTR_IS_SUPPORTED(&va, va_flags)) {
6899 VATTR_CLEAR_ACTIVE(&va, va_flags);
6900 } else if (va.va_flags & SF_RESTRICTED) {
6901 /*
6902 * Turn off SF_RESTRICTED from source, if the destination needs
6903 * it, it will be handled in vnode_authattr_new.
6904 */
6905 VATTR_SET(&va, va_flags, (va.va_flags & ~SF_RESTRICTED));
6906 }
6907
6908 /* Handle ACL inheritance, initialize vap. */
6909 if (v_type == VLNK) {
6910 error = vnode_authattr_new(tdvp, &va, 0, ctx);
6911 } else {
6912 error = vn_attribute_prepare(tdvp, &va, &defaulted, ctx);
6913 attr_cleanup = TRUE;
6914 }
6915
6916 if (error) {
6917 attr_cleanup = FALSE;
6918 goto out;
6919 }
6920
6921 error = VNOP_CLONEFILE(fvp, tdvp, &tvp, cnp, &va, flags, ctx);
6922
6923 if (!error && tvp) {
6924 int update_flags = 0;
6925#if CONFIG_FSE
6926 int fsevent;
6927#endif /* CONFIG_FSE */
6928
6929#if CONFIG_MACF
6930 (void)vnode_label(vnode_mount(tvp), tdvp, tvp, cnp,
6931 VNODE_LABEL_CREATE, ctx);
6932#endif
6933 /*
6934 * If some of the requested attributes weren't handled by the
6935 * VNOP, use our fallback code.
6936 */
6937 if (!VATTR_ALL_SUPPORTED(&va))
6938 (void)vnode_setattr_fallback(tvp, &va, ctx);
6939
6940 // Make sure the name & parent pointers are hooked up
6941 if (tvp->v_name == NULL)
6942 update_flags |= VNODE_UPDATE_NAME;
6943 if (tvp->v_parent == NULLVP)
6944 update_flags |= VNODE_UPDATE_PARENT;
6945
6946 if (update_flags) {
6947 (void)vnode_update_identity(tvp, tdvp, cnp->cn_nameptr,
6948 cnp->cn_namelen, cnp->cn_hash, update_flags);
6949 }
6950
6951#if CONFIG_FSE
6952 switch (vnode_vtype(tvp)) {
6953 case VLNK:
6954 /* FALLTHRU */
6955 case VREG:
6956 fsevent = FSE_CREATE_FILE;
6957 break;
6958 case VDIR:
6959 fsevent = FSE_CREATE_DIR;
6960 break;
6961 default:
6962 goto out;
6963 }
6964
6965 if (need_fsevent(fsevent, tvp)) {
6966 add_fsevent(fsevent, ctx, FSE_ARG_VNODE, tvp,
6967 FSE_ARG_DONE);
6968 }
6969#endif /* CONFIG_FSE */
6970 }
6971#if CLONE_SNAPSHOT_FALLBACKS_ENABLED
6972 else if (error == ENOTSUP) {
6973 struct vfs_attr vfa;
6974
6975 /*
6976 * Fallback to VNOP_COPYFILE but check first that the
6977 * filesystem supports cloning.
6978 */
6979 VFSATTR_INIT(&vfa);
6980 VFSATTR_WANTED(&vfa, f_capabilities);
6981 if ((vfs_getattr(vnode_mount(tdvp), &vfa, ctx) == 0) &&
6982 VFSATTR_IS_SUPPORTED(&vfa, f_capabilities) &&
6983 (vfa.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_CLONE) &&
6984 (vfa.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_CLONE)) {
6985
6986 error = VNOP_COPYFILE(fvp, tdvp, tvp, cnp, 0,
6987 0, ctx);
6988 }
6989 }
6990#endif /* CLONE_SNAPSHOT_FALLBACKS_ENABLED */
6991
6992out:
6993 if (attr_cleanup)
6994 vn_attribute_cleanup(&va, defaulted);
6995 if (free_acl && va.va_acl)
6996 kauth_acl_free(va.va_acl);
6997 nameidone(&tond);
6998 if (tvp)
6999 vnode_put(tvp);
7000 vnode_put(tdvp);
7001 return (error);
7002}
7003
7004/*
7005 * clone files or directories, target must not exist.
7006 */
7007/* ARGSUSED */
7008int
7009clonefileat(__unused proc_t p, struct clonefileat_args *uap,
7010 __unused int32_t *retval)
7011{
7012 vnode_t fvp;
7013 struct nameidata fromnd;
7014 int follow;
7015 int error;
7016 vfs_context_t ctx = vfs_context_current();
7017
7018 /* Check that the flags are valid. */
7019 if (uap->flags & ~CLONE_NOFOLLOW)
7020 return (EINVAL);
7021
7022 AUDIT_ARG(fd, uap->src_dirfd);
7023
7024 follow = (uap->flags & CLONE_NOFOLLOW) ? NOFOLLOW : FOLLOW;
7025 NDINIT(&fromnd, LOOKUP, OP_COPYFILE, follow | AUDITVNPATH1,
7026 UIO_USERSPACE, uap->src, ctx);
7027 if ((error = nameiat(&fromnd, uap->src_dirfd)))
7028 return (error);
7029
7030 fvp = fromnd.ni_vp;
7031 nameidone(&fromnd);
7032
7033 error = clonefile_internal(fvp, FALSE, uap->dst_dirfd, uap->dst,
7034 uap->flags, ctx);
7035
7036 vnode_put(fvp);
7037 return (error);
7038}
7039
7040int
7041fclonefileat(__unused proc_t p, struct fclonefileat_args *uap,
7042 __unused int32_t *retval)
7043{
7044 vnode_t fvp;
7045 struct fileproc *fp;
7046 int error;
7047 vfs_context_t ctx = vfs_context_current();
7048
7049 AUDIT_ARG(fd, uap->src_fd);
7050 error = fp_getfvp(p, uap->src_fd, &fp, &fvp);
7051 if (error)
7052 return (error);
7053
7054 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
7055 AUDIT_ARG(vnpath_withref, fvp, ARG_VNODE1);
7056 error = EBADF;
7057 goto out;
7058 }
7059
7060 if ((error = vnode_getwithref(fvp)))
7061 goto out;
7062
7063 AUDIT_ARG(vnpath, fvp, ARG_VNODE1);
7064
7065 error = clonefile_internal(fvp, TRUE, uap->dst_dirfd, uap->dst,
7066 uap->flags, ctx);
7067
7068 vnode_put(fvp);
7069out:
7070 file_drop(uap->src_fd);
7071 return (error);
7072}
7073
7074/*
7075 * Rename files. Source and destination must either both be directories,
7076 * or both not be directories. If target is a directory, it must be empty.
7077 */
7078/* ARGSUSED */
7079static int
7080renameat_internal(vfs_context_t ctx, int fromfd, user_addr_t from,
7081 int tofd, user_addr_t to, int segflg, vfs_rename_flags_t flags)
7082{
7083 if (flags & ~VFS_RENAME_FLAGS_MASK)
7084 return EINVAL;
7085
7086 if (ISSET(flags, VFS_RENAME_SWAP) && ISSET(flags, VFS_RENAME_EXCL))
7087 return EINVAL;
7088
7089 vnode_t tvp, tdvp;
7090 vnode_t fvp, fdvp;
7091 struct nameidata *fromnd, *tond;
7092 int error;
7093 int do_retry;
7094 int retry_count;
7095 int mntrename;
7096 int need_event;
7097 const char *oname = NULL;
7098 char *from_name = NULL, *to_name = NULL;
7099 int from_len=0, to_len=0;
7100 int holding_mntlock;
7101 mount_t locked_mp = NULL;
7102 vnode_t oparent = NULLVP;
7103#if CONFIG_FSE
7104 fse_info from_finfo, to_finfo;
7105#endif
7106 int from_truncated=0, to_truncated;
7107 int batched = 0;
7108 struct vnode_attr *fvap, *tvap;
7109 int continuing = 0;
7110 /* carving out a chunk for structs that are too big to be on stack. */
7111 struct {
7112 struct nameidata from_node, to_node;
7113 struct vnode_attr fv_attr, tv_attr;
7114 } * __rename_data;
7115 MALLOC(__rename_data, void *, sizeof(*__rename_data), M_TEMP, M_WAITOK);
7116 fromnd = &__rename_data->from_node;
7117 tond = &__rename_data->to_node;
7118
7119 holding_mntlock = 0;
7120 do_retry = 0;
7121 retry_count = 0;
91447636
A
7122retry:
7123 fvp = tvp = NULL;
7124 fdvp = tdvp = NULL;
6d2010ae 7125 fvap = tvap = NULL;
1c79356b
A
7126 mntrename = FALSE;
7127
316670eb 7128 NDINIT(fromnd, DELETE, OP_UNLINK, WANTPARENT | AUDITVNPATH1,
fe8ab488 7129 segflg, from, ctx);
316670eb 7130 fromnd->ni_flag = NAMEI_COMPOUNDRENAME;
fe8ab488 7131
316670eb 7132 NDINIT(tond, RENAME, OP_RENAME, WANTPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK,
fe8ab488 7133 segflg, to, ctx);
316670eb 7134 tond->ni_flag = NAMEI_COMPOUNDRENAME;
fe8ab488 7135
6d2010ae 7136continue_lookup:
316670eb 7137 if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
fe8ab488 7138 if ( (error = nameiat(fromnd, fromfd)) )
6d2010ae 7139 goto out1;
316670eb
A
7140 fdvp = fromnd->ni_dvp;
7141 fvp = fromnd->ni_vp;
1c79356b 7142
6d2010ae 7143 if (fvp && fvp->v_type == VDIR)
316670eb 7144 tond->ni_cnd.cn_flags |= WILLBEDIR;
6d2010ae 7145 }
2d21ac55 7146
316670eb 7147 if ((tond->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
fe8ab488 7148 if ( (error = nameiat(tond, tofd)) ) {
6d2010ae
A
7149 /*
7150 * Translate error code for rename("dir1", "dir2/.").
7151 */
fe8ab488 7152 if (error == EISDIR && fvp->v_type == VDIR)
6d2010ae
A
7153 error = EINVAL;
7154 goto out1;
7155 }
316670eb
A
7156 tdvp = tond->ni_dvp;
7157 tvp = tond->ni_vp;
fe8ab488 7158 }
91447636 7159
39037602
A
7160 if (!tvp && ISSET(flags, VFS_RENAME_SWAP)) {
7161 error = ENOENT;
7162 goto out1;
7163 }
7164
7165 if (tvp && ISSET(flags, VFS_RENAME_EXCL)) {
7166 error = EEXIST;
7167 goto out1;
7168 }
7169
6d2010ae
A
7170 batched = vnode_compound_rename_available(fdvp);
7171 if (!fvp) {
fe8ab488 7172 /*
6d2010ae
A
7173 * Claim: this check will never reject a valid rename.
7174 * For success, either fvp must be on the same mount as tdvp, or fvp must sit atop a vnode on the same mount as tdvp.
7175 * Suppose fdvp and tdvp are not on the same mount.
fe8ab488 7176 * If fvp is on the same mount as tdvp, then fvp is not on the same mount as fdvp, so fvp is the root of its filesystem. If fvp is the root,
6d2010ae
A
7177 * then you can't move it to within another dir on the same mountpoint.
7178 * If fvp sits atop a vnode on the same mount as fdvp, then that vnode must be part of the same mount as fdvp, which is a contradiction.
7179 *
7180 * If this check passes, then we are safe to pass these vnodes to the same FS.
91447636 7181 */
6d2010ae
A
7182 if (fdvp->v_mount != tdvp->v_mount) {
7183 error = EXDEV;
7184 goto out1;
7185 }
7186 goto skipped_lookup;
1c79356b 7187 }
2d21ac55 7188
6d2010ae 7189 if (!batched) {
39037602 7190 error = vn_authorize_renamex(fdvp, fvp, &fromnd->ni_cnd, tdvp, tvp, &tond->ni_cnd, ctx, flags, NULL);
6d2010ae 7191 if (error) {
3e170ce0
A
7192 if (error == ENOENT) {
7193 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
7194 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
7195 /*
7196 * We encountered a race where after doing the namei, tvp stops
7197 * being valid. If so, simply re-drive the rename call from the
7198 * top.
7199 */
7200 do_retry = 1;
7201 retry_count += 1;
7202 }
6d2010ae 7203 }
91447636 7204 goto out1;
1c79356b
A
7205 }
7206 }
6d2010ae 7207
2d21ac55
A
7208 /*
7209 * If the source and destination are the same (i.e. they're
7210 * links to the same vnode) and the target file system is
7211 * case sensitive, then there is nothing to do.
6d2010ae
A
7212 *
7213 * XXX Come back to this.
2d21ac55
A
7214 */
7215 if (fvp == tvp) {
7216 int pathconf_val;
fe8ab488 7217
2d21ac55
A
7218 /*
7219 * Note: if _PC_CASE_SENSITIVE selector isn't supported,
7220 * then assume that this file system is case sensitive.
7221 */
7222 if (VNOP_PATHCONF(fvp, _PC_CASE_SENSITIVE, &pathconf_val, ctx) != 0 ||
7223 pathconf_val != 0) {
7224 goto out1;
fe8ab488 7225 }
2d21ac55 7226 }
91447636 7227
1c79356b
A
7228 /*
7229 * Allow the renaming of mount points.
7230 * - target must not exist
7231 * - target must reside in the same directory as source
7232 * - union mounts cannot be renamed
7233 * - "/" cannot be renamed
6d2010ae
A
7234 *
7235 * XXX Handle this in VFS after a continued lookup (if we missed
7236 * in the cache to start off)
39037602
A
7237 *
7238 * N.B. If RENAME_SWAP is being used, then @tvp != NULL and so
7239 * we'll skip past here. The file system is responsible for
7240 * checking that @tvp is not a descendent of @fvp and vice versa
7241 * so it should always return EINVAL if either @tvp or @fvp is the
7242 * root of a volume.
1c79356b 7243 */
91447636 7244 if ((fvp->v_flag & VROOT) &&
1c79356b
A
7245 (fvp->v_type == VDIR) &&
7246 (tvp == NULL) &&
7247 (fvp->v_mountedhere == NULL) &&
91447636 7248 (fdvp == tdvp) &&
1c79356b
A
7249 ((fvp->v_mount->mnt_flag & (MNT_UNION | MNT_ROOTFS)) == 0) &&
7250 (fvp->v_mount->mnt_vnodecovered != NULLVP)) {
2d21ac55 7251 vnode_t coveredvp;
fe8ab488 7252
1c79356b 7253 /* switch fvp to the covered vnode */
91447636
A
7254 coveredvp = fvp->v_mount->mnt_vnodecovered;
7255 if ( (vnode_getwithref(coveredvp)) ) {
7256 error = ENOENT;
7257 goto out1;
7258 }
7259 vnode_put(fvp);
7260
7261 fvp = coveredvp;
1c79356b
A
7262 mntrename = TRUE;
7263 }
91447636
A
7264 /*
7265 * Check for cross-device rename.
7266 */
7267 if ((fvp->v_mount != tdvp->v_mount) ||
7268 (tvp && (fvp->v_mount != tvp->v_mount))) {
7269 error = EXDEV;
7270 goto out1;
7271 }
55e303ae 7272
91447636
A
7273 /*
7274 * If source is the same as the destination (that is the
7275 * same inode number) then there is nothing to do...
7276 * EXCEPT if the underlying file system supports case
7277 * insensitivity and is case preserving. In this case
7278 * the file system needs to handle the special case of
7279 * getting the same vnode as target (fvp) and source (tvp).
7280 *
7281 * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
7282 * and _PC_CASE_PRESERVING can have this exception, and they need to
7283 * handle the special case of getting the same vnode as target and
7284 * source. NOTE: Then the target is unlocked going into vnop_rename,
7285 * so not to cause locking problems. There is a single reference on tvp.
7286 *
fe8ab488 7287 * NOTE - that fvp == tvp also occurs if they are hard linked and
b0d623f7
A
7288 * that correct behaviour then is just to return success without doing
7289 * anything.
6d2010ae
A
7290 *
7291 * XXX filesystem should take care of this itself, perhaps...
91447636
A
7292 */
7293 if (fvp == tvp && fdvp == tdvp) {
316670eb
A
7294 if (fromnd->ni_cnd.cn_namelen == tond->ni_cnd.cn_namelen &&
7295 !bcmp(fromnd->ni_cnd.cn_nameptr, tond->ni_cnd.cn_nameptr,
7296 fromnd->ni_cnd.cn_namelen)) {
91447636 7297 goto out1;
55e303ae 7298 }
91447636 7299 }
55e303ae 7300
91447636
A
7301 if (holding_mntlock && fvp->v_mount != locked_mp) {
7302 /*
7303 * we're holding a reference and lock
7304 * on locked_mp, but it no longer matches
7305 * what we want to do... so drop our hold
7306 */
7307 mount_unlock_renames(locked_mp);
7308 mount_drop(locked_mp, 0);
7309 holding_mntlock = 0;
7310 }
7311 if (tdvp != fdvp && fvp->v_type == VDIR) {
7312 /*
7313 * serialize renames that re-shape
7314 * the tree... if holding_mntlock is
7315 * set, then we're ready to go...
7316 * otherwise we
7317 * first need to drop the iocounts
7318 * we picked up, second take the
7319 * lock to serialize the access,
7320 * then finally start the lookup
7321 * process over with the lock held
7322 */
7323 if (!holding_mntlock) {
7324 /*
7325 * need to grab a reference on
7326 * the mount point before we
7327 * drop all the iocounts... once
7328 * the iocounts are gone, the mount
7329 * could follow
7330 */
7331 locked_mp = fvp->v_mount;
7332 mount_ref(locked_mp, 0);
55e303ae 7333
91447636
A
7334 /*
7335 * nameidone has to happen before we vnode_put(tvp)
7336 * since it may need to release the fs_nodelock on the tvp
7337 */
316670eb 7338 nameidone(tond);
55e303ae 7339
91447636
A
7340 if (tvp)
7341 vnode_put(tvp);
7342 vnode_put(tdvp);
7343
7344 /*
7345 * nameidone has to happen before we vnode_put(fdvp)
7346 * since it may need to release the fs_nodelock on the fvp
7347 */
316670eb 7348 nameidone(fromnd);
55e303ae 7349
91447636
A
7350 vnode_put(fvp);
7351 vnode_put(fdvp);
7352
7353 mount_lock_renames(locked_mp);
7354 holding_mntlock = 1;
7355
7356 goto retry;
55e303ae 7357 }
91447636
A
7358 } else {
7359 /*
7360 * when we dropped the iocounts to take
fe8ab488 7361 * the lock, we allowed the identity of
91447636
A
7362 * the various vnodes to change... if they did,
7363 * we may no longer be dealing with a rename
7364 * that reshapes the tree... once we're holding
7365 * the iocounts, the vnodes can't change type
7366 * so we're free to drop the lock at this point
7367 * and continue on
1c79356b 7368 */
91447636
A
7369 if (holding_mntlock) {
7370 mount_unlock_renames(locked_mp);
7371 mount_drop(locked_mp, 0);
7372 holding_mntlock = 0;
1c79356b 7373 }
91447636 7374 }
6d2010ae 7375
91447636
A
7376 // save these off so we can later verify that fvp is the same
7377 oname = fvp->v_name;
7378 oparent = fvp->v_parent;
55e303ae 7379
6d2010ae 7380skipped_lookup:
2d21ac55 7381#if CONFIG_FSE
6d2010ae 7382 need_event = need_fsevent(FSE_RENAME, fdvp);
fe8ab488 7383 if (need_event) {
6d2010ae
A
7384 if (fvp) {
7385 get_fse_info(fvp, &from_finfo, ctx);
7386 } else {
316670eb 7387 error = vfs_get_notify_attributes(&__rename_data->fv_attr);
6d2010ae
A
7388 if (error) {
7389 goto out1;
7390 }
7391
316670eb 7392 fvap = &__rename_data->fv_attr;
6d2010ae 7393 }
55e303ae 7394
91447636 7395 if (tvp) {
2d21ac55 7396 get_fse_info(tvp, &to_finfo, ctx);
6d2010ae 7397 } else if (batched) {
316670eb 7398 error = vfs_get_notify_attributes(&__rename_data->tv_attr);
6d2010ae
A
7399 if (error) {
7400 goto out1;
7401 }
7402
316670eb 7403 tvap = &__rename_data->tv_attr;
2d21ac55
A
7404 }
7405 }
7406#else
7407 need_event = 0;
7408#endif /* CONFIG_FSE */
7409
7410 if (need_event || kauth_authorize_fileop_has_listeners()) {
2d21ac55 7411 if (from_name == NULL) {
6d2010ae
A
7412 GET_PATH(from_name);
7413 if (from_name == NULL) {
7414 error = ENOMEM;
7415 goto out1;
7416 }
91447636 7417 }
b0d623f7 7418
316670eb 7419 from_len = safe_getpath(fdvp, fromnd->ni_cnd.cn_nameptr, from_name, MAXPATHLEN, &from_truncated);
55e303ae 7420
2d21ac55 7421 if (to_name == NULL) {
6d2010ae
A
7422 GET_PATH(to_name);
7423 if (to_name == NULL) {
7424 error = ENOMEM;
7425 goto out1;
7426 }
2d21ac55 7427 }
91447636 7428
316670eb 7429 to_len = safe_getpath(tdvp, tond->ni_cnd.cn_nameptr, to_name, MAXPATHLEN, &to_truncated);
fe8ab488 7430 }
316670eb
A
7431 error = vn_rename(fdvp, &fvp, &fromnd->ni_cnd, fvap,
7432 tdvp, &tvp, &tond->ni_cnd, tvap,
39037602 7433 flags, ctx);
55e303ae 7434
91447636
A
7435 if (holding_mntlock) {
7436 /*
7437 * we can drop our serialization
7438 * lock now
7439 */
7440 mount_unlock_renames(locked_mp);
7441 mount_drop(locked_mp, 0);
7442 holding_mntlock = 0;
7443 }
7444 if (error) {
6d2010ae 7445 if (error == EKEEPLOOKING) {
316670eb
A
7446 if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) == 0) {
7447 if ((tond->ni_flag & NAMEI_CONTLOOKUP) == 0) {
6d2010ae
A
7448 panic("EKEEPLOOKING without NAMEI_CONTLOOKUP on either ndp?");
7449 }
7450 }
7451
316670eb
A
7452 fromnd->ni_vp = fvp;
7453 tond->ni_vp = tvp;
fe8ab488 7454
6d2010ae
A
7455 goto continue_lookup;
7456 }
7457
7458 /*
fe8ab488
A
7459 * We may encounter a race in the VNOP where the destination didn't
7460 * exist when we did the namei, but it does by the time we go and
6d2010ae
A
7461 * try to create the entry. In this case, we should re-drive this rename
7462 * call from the top again. Currently, only HFS bubbles out ERECYCLE,
fe8ab488 7463 * but other filesystems susceptible to this race could return it, too.
6d2010ae
A
7464 */
7465 if (error == ERECYCLE) {
7466 do_retry = 1;
7467 }
55e303ae 7468
c18c124e
A
7469 /*
7470 * For compound VNOPs, the authorization callback may return
7471 * ENOENT in case of racing hardlink lookups hitting the name
7472 * cache, redrive the lookup.
7473 */
3e170ce0
A
7474 if (batched && error == ENOENT) {
7475 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
7476 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
7477 do_retry = 1;
7478 retry_count += 1;
7479 }
c18c124e
A
7480 }
7481
91447636 7482 goto out1;
fe8ab488
A
7483 }
7484
7485 /* call out to allow 3rd party notification of rename.
91447636
A
7486 * Ignore result of kauth_authorize_fileop call.
7487 */
fe8ab488
A
7488 kauth_authorize_fileop(vfs_context_ucred(ctx),
7489 KAUTH_FILEOP_RENAME,
2d21ac55 7490 (uintptr_t)from_name, (uintptr_t)to_name);
39037602
A
7491 if (flags & VFS_RENAME_SWAP) {
7492 kauth_authorize_fileop(vfs_context_ucred(ctx),
7493 KAUTH_FILEOP_RENAME,
7494 (uintptr_t)to_name, (uintptr_t)from_name);
7495 }
91447636 7496
2d21ac55 7497#if CONFIG_FSE
91447636 7498 if (from_name != NULL && to_name != NULL) {
b0d623f7
A
7499 if (from_truncated || to_truncated) {
7500 // set it here since only the from_finfo gets reported up to user space
7501 from_finfo.mode |= FSE_TRUNCATED_PATH;
7502 }
6d2010ae
A
7503
7504 if (tvap && tvp) {
7505 vnode_get_fse_info_from_vap(tvp, &to_finfo, tvap);
7506 }
7507 if (fvap) {
7508 vnode_get_fse_info_from_vap(fvp, &from_finfo, fvap);
7509 }
7510
39037602
A
7511 if (tvp) {
7512 add_fsevent(FSE_RENAME, ctx,
7513 FSE_ARG_STRING, from_len, from_name,
7514 FSE_ARG_FINFO, &from_finfo,
7515 FSE_ARG_STRING, to_len, to_name,
7516 FSE_ARG_FINFO, &to_finfo,
7517 FSE_ARG_DONE);
7518 if (flags & VFS_RENAME_SWAP) {
7519 /*
7520 * Strictly speaking, swap is the equivalent of
7521 * *three* renames. FSEvents clients should only take
7522 * the events as a hint, so we only bother reporting
7523 * two.
7524 */
7525 add_fsevent(FSE_RENAME, ctx,
7526 FSE_ARG_STRING, to_len, to_name,
7527 FSE_ARG_FINFO, &to_finfo,
7528 FSE_ARG_STRING, from_len, from_name,
7529 FSE_ARG_FINFO, &from_finfo,
7530 FSE_ARG_DONE);
7531 }
55e303ae 7532 } else {
2d21ac55 7533 add_fsevent(FSE_RENAME, ctx,
91447636
A
7534 FSE_ARG_STRING, from_len, from_name,
7535 FSE_ARG_FINFO, &from_finfo,
7536 FSE_ARG_STRING, to_len, to_name,
7537 FSE_ARG_DONE);
7538 }
7539 }
2d21ac55 7540#endif /* CONFIG_FSE */
fe8ab488 7541
91447636
A
7542 /*
7543 * update filesystem's mount point data
7544 */
7545 if (mntrename) {
7546 char *cp, *pathend, *mpname;
7547 char * tobuf;
7548 struct mount *mp;
7549 int maxlen;
7550 size_t len = 0;
7551
7552 mp = fvp->v_mountedhere;
7553
7554 if (vfs_busy(mp, LK_NOWAIT)) {
7555 error = EBUSY;
7556 goto out1;
55e303ae 7557 }
91447636 7558 MALLOC_ZONE(tobuf, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
55e303ae 7559
fe8ab488
A
7560 if (UIO_SEG_IS_USER_SPACE(segflg))
7561 error = copyinstr(to, tobuf, MAXPATHLEN, &len);
7562 else
7563 error = copystr((void *)to, tobuf, MAXPATHLEN, &len);
91447636
A
7564 if (!error) {
7565 /* find current mount point prefix */
7566 pathend = &mp->mnt_vfsstat.f_mntonname[0];
7567 for (cp = pathend; *cp != '\0'; ++cp) {
7568 if (*cp == '/')
7569 pathend = cp + 1;
7570 }
7571 /* find last component of target name */
7572 for (mpname = cp = tobuf; *cp != '\0'; ++cp) {
7573 if (*cp == '/')
7574 mpname = cp + 1;
7575 }
7576 /* append name to prefix */
7577 maxlen = MAXPATHLEN - (pathend - mp->mnt_vfsstat.f_mntonname);
7578 bzero(pathend, maxlen);
2d21ac55 7579 strlcpy(pathend, mpname, maxlen);
91447636
A
7580 }
7581 FREE_ZONE(tobuf, MAXPATHLEN, M_NAMEI);
7582
7583 vfs_unbusy(mp);
7584 }
7585 /*
fe8ab488 7586 * fix up name & parent pointers. note that we first
91447636
A
7587 * check that fvp has the same name/parent pointers it
7588 * had before the rename call... this is a 'weak' check
7589 * at best...
6d2010ae
A
7590 *
7591 * XXX oparent and oname may not be set in the compound vnop case
91447636 7592 */
6d2010ae 7593 if (batched || (oname == fvp->v_name && oparent == fvp->v_parent)) {
91447636
A
7594 int update_flags;
7595
7596 update_flags = VNODE_UPDATE_NAME;
7597
7598 if (fdvp != tdvp)
7599 update_flags |= VNODE_UPDATE_PARENT;
7600
316670eb 7601 vnode_update_identity(fvp, tdvp, tond->ni_cnd.cn_nameptr, tond->ni_cnd.cn_namelen, tond->ni_cnd.cn_hash, update_flags);
1c79356b
A
7602 }
7603out1:
593a1d5f
A
7604 if (to_name != NULL) {
7605 RELEASE_PATH(to_name);
7606 to_name = NULL;
7607 }
7608 if (from_name != NULL) {
7609 RELEASE_PATH(from_name);
7610 from_name = NULL;
7611 }
91447636
A
7612 if (holding_mntlock) {
7613 mount_unlock_renames(locked_mp);
7614 mount_drop(locked_mp, 0);
593a1d5f 7615 holding_mntlock = 0;
91447636
A
7616 }
7617 if (tdvp) {
7618 /*
7619 * nameidone has to happen before we vnode_put(tdvp)
7620 * since it may need to release the fs_nodelock on the tdvp
7621 */
316670eb 7622 nameidone(tond);
91447636
A
7623
7624 if (tvp)
7625 vnode_put(tvp);
7626 vnode_put(tdvp);
7627 }
7628 if (fdvp) {
7629 /*
7630 * nameidone has to happen before we vnode_put(fdvp)
7631 * since it may need to release the fs_nodelock on the fdvp
7632 */
316670eb 7633 nameidone(fromnd);
91447636
A
7634
7635 if (fvp)
7636 vnode_put(fvp);
7637 vnode_put(fdvp);
7638 }
fe8ab488 7639
6d2010ae
A
7640 /*
7641 * If things changed after we did the namei, then we will re-drive
7642 * this rename call from the top.
7643 */
316670eb 7644 if (do_retry) {
6d2010ae 7645 do_retry = 0;
593a1d5f
A
7646 goto retry;
7647 }
316670eb
A
7648
7649 FREE(__rename_data, M_TEMP);
1c79356b
A
7650 return (error);
7651}
7652
fe8ab488
A
7653int
7654rename(__unused proc_t p, struct rename_args *uap, __unused int32_t *retval)
7655{
7656 return (renameat_internal(vfs_context_current(), AT_FDCWD, uap->from,
7657 AT_FDCWD, uap->to, UIO_USERSPACE, 0));
7658}
7659
39037602 7660int renameatx_np(__unused proc_t p, struct renameatx_np_args *uap, __unused int32_t *retval)
fe8ab488
A
7661{
7662 return renameat_internal(
39037602
A
7663 vfs_context_current(),
7664 uap->fromfd, uap->from,
7665 uap->tofd, uap->to,
fe8ab488
A
7666 UIO_USERSPACE, uap->flags);
7667}
39037602 7668
fe8ab488
A
7669int
7670renameat(__unused proc_t p, struct renameat_args *uap, __unused int32_t *retval)
7671{
7672 return (renameat_internal(vfs_context_current(), uap->fromfd, uap->from,
7673 uap->tofd, uap->to, UIO_USERSPACE, 0));
7674}
7675
1c79356b
A
7676/*
7677 * Make a directory file.
2d21ac55
A
7678 *
7679 * Returns: 0 Success
7680 * EEXIST
7681 * namei:???
7682 * vnode_authorize:???
7683 * vn_create:???
1c79356b 7684 */
1c79356b 7685/* ARGSUSED */
91447636 7686static int
fe8ab488
A
7687mkdir1at(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap, int fd,
7688 enum uio_seg segflg)
1c79356b 7689{
91447636 7690 vnode_t vp, dvp;
1c79356b 7691 int error;
91447636 7692 int update_flags = 0;
6d2010ae 7693 int batched;
1c79356b
A
7694 struct nameidata nd;
7695
91447636 7696 AUDIT_ARG(mode, vap->va_mode);
fe8ab488 7697 NDINIT(&nd, CREATE, OP_MKDIR, LOCKPARENT | AUDITVNPATH1, segflg,
6d2010ae 7698 path, ctx);
9bccf70c 7699 nd.ni_cnd.cn_flags |= WILLBEDIR;
6d2010ae
A
7700 nd.ni_flag = NAMEI_COMPOUNDMKDIR;
7701
7702continue_lookup:
fe8ab488 7703 error = nameiat(&nd, fd);
55e303ae 7704 if (error)
1c79356b 7705 return (error);
91447636 7706 dvp = nd.ni_dvp;
1c79356b 7707 vp = nd.ni_vp;
55e303ae 7708
fe8ab488
A
7709 if (vp != NULL) {
7710 error = EEXIST;
7711 goto out;
7712 }
7713
6d2010ae 7714 batched = vnode_compound_mkdir_available(dvp);
2d21ac55
A
7715
7716 VATTR_SET(vap, va_type, VDIR);
fe8ab488 7717
6d2010ae
A
7718 /*
7719 * XXX
7720 * Don't authorize in VFS for compound VNOP.... mkdir -p today assumes that it will
7721 * only get EXISTS or EISDIR for existing path components, and not that it could see
7722 * EACCESS/EPERM--so if we authorize for mkdir on "/" for "mkdir -p /tmp/foo/bar/baz"
7723 * it will fail in a spurious manner. Need to figure out if this is valid behavior.
7724 */
fe8ab488 7725 if ((error = vn_authorize_mkdir(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0) {
6d2010ae
A
7726 if (error == EACCES || error == EPERM) {
7727 int error2;
7728
7729 nameidone(&nd);
7730 vnode_put(dvp);
7731 dvp = NULLVP;
7732
fe8ab488
A
7733 /*
7734 * Try a lookup without "NAMEI_COMPOUNDVNOP" to make sure we return EEXIST
6d2010ae
A
7735 * rather than EACCESS if the target exists.
7736 */
fe8ab488
A
7737 NDINIT(&nd, LOOKUP, OP_MKDIR, AUDITVNPATH1, segflg,
7738 path, ctx);
7739 error2 = nameiat(&nd, fd);
6d2010ae
A
7740 if (error2) {
7741 goto out;
7742 } else {
7743 vp = nd.ni_vp;
7744 error = EEXIST;
7745 goto out;
7746 }
7747 }
7748
2d21ac55 7749 goto out;
6d2010ae
A
7750 }
7751
7752 /*
fe8ab488 7753 * make the directory
6d2010ae 7754 */
fe8ab488 7755 if ((error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx)) != 0) {
6d2010ae
A
7756 if (error == EKEEPLOOKING) {
7757 nd.ni_vp = vp;
7758 goto continue_lookup;
7759 }
2d21ac55 7760
fe8ab488 7761 goto out;
6d2010ae 7762 }
fe8ab488 7763
91447636
A
7764 // Make sure the name & parent pointers are hooked up
7765 if (vp->v_name == NULL)
7766 update_flags |= VNODE_UPDATE_NAME;
7767 if (vp->v_parent == NULLVP)
7768 update_flags |= VNODE_UPDATE_PARENT;
7769
7770 if (update_flags)
7771 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
55e303ae 7772
2d21ac55 7773#if CONFIG_FSE
91447636 7774 add_fsevent(FSE_CREATE_DIR, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE);
2d21ac55 7775#endif
91447636
A
7776
7777out:
7778 /*
7779 * nameidone has to happen before we vnode_put(dvp)
7780 * since it may need to release the fs_nodelock on the dvp
7781 */
7782 nameidone(&nd);
7783
7784 if (vp)
6d2010ae 7785 vnode_put(vp);
fe8ab488 7786 if (dvp)
6d2010ae 7787 vnode_put(dvp);
55e303ae 7788
1c79356b
A
7789 return (error);
7790}
7791
b0d623f7
A
7792/*
7793 * mkdir_extended: Create a directory; with extended security (ACL).
7794 *
7795 * Parameters: p Process requesting to create the directory
7796 * uap User argument descriptor (see below)
fe8ab488 7797 * retval (ignored)
b0d623f7
A
7798 *
7799 * Indirect: uap->path Path of directory to create
7800 * uap->mode Access permissions to set
7801 * uap->xsecurity ACL to set
fe8ab488 7802 *
b0d623f7
A
7803 * Returns: 0 Success
7804 * !0 Not success
7805 *
7806 */
1c79356b 7807int
b0d623f7 7808mkdir_extended(proc_t p, struct mkdir_extended_args *uap, __unused int32_t *retval)
1c79356b 7809{
91447636
A
7810 int ciferror;
7811 kauth_filesec_t xsecdst;
7812 struct vnode_attr va;
7813
b0d623f7
A
7814 AUDIT_ARG(owner, uap->uid, uap->gid);
7815
91447636
A
7816 xsecdst = NULL;
7817 if ((uap->xsecurity != USER_ADDR_NULL) &&
7818 ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
7819 return ciferror;
7820
91447636 7821 VATTR_INIT(&va);
fe8ab488 7822 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
91447636
A
7823 if (xsecdst != NULL)
7824 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
7825
fe8ab488
A
7826 ciferror = mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD,
7827 UIO_USERSPACE);
91447636
A
7828 if (xsecdst != NULL)
7829 kauth_filesec_free(xsecdst);
7830 return ciferror;
1c79356b
A
7831}
7832
1c79356b 7833int
b0d623f7 7834mkdir(proc_t p, struct mkdir_args *uap, __unused int32_t *retval)
1c79356b 7835{
91447636 7836 struct vnode_attr va;
1c79356b 7837
91447636 7838 VATTR_INIT(&va);
fe8ab488 7839 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
e5568f75 7840
fe8ab488
A
7841 return (mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD,
7842 UIO_USERSPACE));
91447636 7843}
1c79356b 7844
91447636 7845int
fe8ab488
A
7846mkdirat(proc_t p, struct mkdirat_args *uap, __unused int32_t *retval)
7847{
7848 struct vnode_attr va;
7849
7850 VATTR_INIT(&va);
7851 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
7852
7853 return(mkdir1at(vfs_context_current(), uap->path, &va, uap->fd,
7854 UIO_USERSPACE));
7855}
7856
7857static int
7858rmdirat_internal(vfs_context_t ctx, int fd, user_addr_t dirpath,
7859 enum uio_seg segflg)
1c79356b 7860{
2d21ac55 7861 vnode_t vp, dvp;
91447636
A
7862 int error;
7863 struct nameidata nd;
6d2010ae
A
7864 char *path = NULL;
7865 int len=0;
7866 int has_listeners = 0;
7867 int need_event = 0;
7868 int truncated = 0;
6d2010ae
A
7869#if CONFIG_FSE
7870 struct vnode_attr va;
7871#endif /* CONFIG_FSE */
7872 struct vnode_attr *vap = NULL;
c18c124e 7873 int restart_count = 0;
6d2010ae 7874 int batched;
91447636 7875
b0d623f7 7876 int restart_flag;
91447636 7877
fe8ab488 7878 /*
2d21ac55
A
7879 * This loop exists to restart rmdir in the unlikely case that two
7880 * processes are simultaneously trying to remove the same directory
7881 * containing orphaned appleDouble files.
7882 */
7883 do {
6d2010ae 7884 NDINIT(&nd, DELETE, OP_RMDIR, LOCKPARENT | AUDITVNPATH1,
fe8ab488 7885 segflg, dirpath, ctx);
6d2010ae
A
7886 nd.ni_flag = NAMEI_COMPOUNDRMDIR;
7887continue_lookup:
2d21ac55 7888 restart_flag = 0;
6d2010ae 7889 vap = NULL;
2d21ac55 7890
fe8ab488 7891 error = nameiat(&nd, fd);
2d21ac55
A
7892 if (error)
7893 return (error);
7894
7895 dvp = nd.ni_dvp;
7896 vp = nd.ni_vp;
7897
6d2010ae
A
7898 if (vp) {
7899 batched = vnode_compound_rmdir_available(vp);
2d21ac55 7900
6d2010ae
A
7901 if (vp->v_flag & VROOT) {
7902 /*
7903 * The root of a mounted filesystem cannot be deleted.
7904 */
7905 error = EBUSY;
7906 goto out;
7907 }
1c79356b 7908
2d21ac55 7909 /*
6d2010ae
A
7910 * Removed a check here; we used to abort if vp's vid
7911 * was not the same as what we'd seen the last time around.
7912 * I do not think that check was valid, because if we retry
7913 * and all dirents are gone, the directory could legitimately
7914 * be recycled but still be present in a situation where we would
fe8ab488 7915 * have had permission to delete. Therefore, we won't make
6d2010ae
A
7916 * an effort to preserve that check now that we may not have a
7917 * vp here.
2d21ac55 7918 */
6d2010ae
A
7919
7920 if (!batched) {
7921 error = vn_authorize_rmdir(dvp, vp, &nd.ni_cnd, ctx, NULL);
7922 if (error) {
3e170ce0
A
7923 if (error == ENOENT) {
7924 assert(restart_count < MAX_AUTHORIZE_ENOENT_RETRIES);
7925 if (restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
7926 restart_flag = 1;
7927 restart_count += 1;
7928 }
c18c124e 7929 }
6d2010ae
A
7930 goto out;
7931 }
7932 }
2d21ac55 7933 } else {
6d2010ae
A
7934 batched = 1;
7935
7936 if (!vnode_compound_rmdir_available(dvp)) {
7937 panic("No error, but no compound rmdir?");
7938 }
91447636 7939 }
6d2010ae 7940
2d21ac55 7941#if CONFIG_FSE
6d2010ae 7942 fse_info finfo;
b0d623f7 7943
6d2010ae
A
7944 need_event = need_fsevent(FSE_DELETE, dvp);
7945 if (need_event) {
7946 if (!batched) {
2d21ac55 7947 get_fse_info(vp, &finfo, ctx);
6d2010ae
A
7948 } else {
7949 error = vfs_get_notify_attributes(&va);
7950 if (error) {
7951 goto out;
7952 }
7953
7954 vap = &va;
2d21ac55 7955 }
6d2010ae 7956 }
2d21ac55 7957#endif
6d2010ae
A
7958 has_listeners = kauth_authorize_fileop_has_listeners();
7959 if (need_event || has_listeners) {
7960 if (path == NULL) {
2d21ac55
A
7961 GET_PATH(path);
7962 if (path == NULL) {
7963 error = ENOMEM;
7964 goto out;
7965 }
6d2010ae 7966 }
b0d623f7 7967
6d2010ae 7968 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated);
b0d623f7 7969#if CONFIG_FSE
6d2010ae
A
7970 if (truncated) {
7971 finfo.mode |= FSE_TRUNCATED_PATH;
2d21ac55 7972 }
6d2010ae
A
7973#endif
7974 }
91447636 7975
6d2010ae
A
7976 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
7977 nd.ni_vp = vp;
7978 if (vp == NULLVP) {
7979 /* Couldn't find a vnode */
7980 goto out;
7981 }
2d21ac55 7982
6d2010ae
A
7983 if (error == EKEEPLOOKING) {
7984 goto continue_lookup;
3e170ce0
A
7985 } else if (batched && error == ENOENT) {
7986 assert(restart_count < MAX_AUTHORIZE_ENOENT_RETRIES);
7987 if (restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
7988 /*
7989 * For compound VNOPs, the authorization callback
7990 * may return ENOENT in case of racing hard link lookups
7991 * redrive the lookup.
7992 */
7993 restart_flag = 1;
7994 restart_count += 1;
7995 goto out;
7996 }
6d2010ae 7997 }
39236c6e 7998#if CONFIG_APPLEDOUBLE
6d2010ae
A
7999 /*
8000 * Special case to remove orphaned AppleDouble
8001 * files. I don't like putting this in the kernel,
8002 * but carbon does not like putting this in carbon either,
8003 * so here we are.
8004 */
8005 if (error == ENOTEMPTY) {
8006 error = rmdir_remove_orphaned_appleDouble(vp, ctx, &restart_flag);
8007 if (error == EBUSY) {
8008 goto out;
2d21ac55
A
8009 }
8010
6d2010ae 8011
2d21ac55 8012 /*
fe8ab488 8013 * Assuming everything went well, we will try the RMDIR again
2d21ac55 8014 */
6d2010ae
A
8015 if (!error)
8016 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
8017 }
39236c6e 8018#endif /* CONFIG_APPLEDOUBLE */
6d2010ae 8019 /*
fe8ab488 8020 * Call out to allow 3rd party notification of delete.
6d2010ae
A
8021 * Ignore result of kauth_authorize_fileop call.
8022 */
8023 if (!error) {
8024 if (has_listeners) {
fe8ab488
A
8025 kauth_authorize_fileop(vfs_context_ucred(ctx),
8026 KAUTH_FILEOP_DELETE,
6d2010ae
A
8027 (uintptr_t)vp,
8028 (uintptr_t)path);
8029 }
8030
8031 if (vp->v_flag & VISHARDLINK) {
8032 // see the comment in unlink1() about why we update
8033 // the parent of a hard link when it is removed
8034 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
8035 }
2d21ac55
A
8036
8037#if CONFIG_FSE
6d2010ae
A
8038 if (need_event) {
8039 if (vap) {
8040 vnode_get_fse_info_from_vap(vp, &finfo, vap);
2d21ac55 8041 }
6d2010ae
A
8042 add_fsevent(FSE_DELETE, ctx,
8043 FSE_ARG_STRING, len, path,
8044 FSE_ARG_FINFO, &finfo,
8045 FSE_ARG_DONE);
2d21ac55 8046 }
6d2010ae 8047#endif
2d21ac55
A
8048 }
8049
8050out:
6d2010ae
A
8051 if (path != NULL) {
8052 RELEASE_PATH(path);
8053 path = NULL;
8054 }
2d21ac55
A
8055 /*
8056 * nameidone has to happen before we vnode_put(dvp)
8057 * since it may need to release the fs_nodelock on the dvp
8058 */
8059 nameidone(&nd);
2d21ac55 8060 vnode_put(dvp);
6d2010ae 8061
fe8ab488 8062 if (vp)
6d2010ae 8063 vnode_put(vp);
2d21ac55
A
8064
8065 if (restart_flag == 0) {
8066 wakeup_one((caddr_t)vp);
8067 return (error);
8068 }
8069 tsleep(vp, PVFS, "rm AD", 1);
8070
8071 } while (restart_flag != 0);
91447636 8072
1c79356b 8073 return (error);
2d21ac55 8074
1c79356b 8075}
91447636 8076
fe8ab488
A
8077/*
8078 * Remove a directory file.
8079 */
8080/* ARGSUSED */
8081int
8082rmdir(__unused proc_t p, struct rmdir_args *uap, __unused int32_t *retval)
8083{
8084 return (rmdirat_internal(vfs_context_current(), AT_FDCWD,
8085 CAST_USER_ADDR_T(uap->path), UIO_USERSPACE));
8086}
8087
2d21ac55
A
8088/* Get direntry length padded to 8 byte alignment */
8089#define DIRENT64_LEN(namlen) \
8090 ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
8091
fe8ab488 8092errno_t
2d21ac55
A
8093vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag,
8094 int *numdirent, vfs_context_t ctxp)
8095{
8096 /* Check if fs natively supports VNODE_READDIR_EXTENDED */
39037602 8097 if ((vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED) &&
6d2010ae 8098 ((vp->v_mount->mnt_kern_flag & MNTK_DENY_READDIREXT) == 0)) {
2d21ac55
A
8099 return VNOP_READDIR(vp, uio, flags, eofflag, numdirent, ctxp);
8100 } else {
8101 size_t bufsize;
8102 void * bufptr;
8103 uio_t auio;
15129b1c 8104 struct direntry *entry64;
2d21ac55
A
8105 struct dirent *dep;
8106 int bytesread;
8107 int error;
8108
8109 /*
8110 * Our kernel buffer needs to be smaller since re-packing
8111 * will expand each dirent. The worse case (when the name
8112 * length is 3) corresponds to a struct direntry size of 32
8113 * bytes (8-byte aligned) and a struct dirent size of 12 bytes
8114 * (4-byte aligned). So having a buffer that is 3/8 the size
8115 * will prevent us from reading more than we can pack.
8116 *
8117 * Since this buffer is wired memory, we will limit the
39037602 8118 * buffer size to a maximum of 32K. We would really like to
2d21ac55 8119 * use 32K in the MIN(), but we use magic number 87371 to
39037602 8120 * prevent uio_resid() * 3 / 8 from overflowing.
2d21ac55 8121 */
316670eb 8122 bufsize = 3 * MIN((user_size_t)uio_resid(uio), 87371u) / 8;
2d21ac55 8123 MALLOC(bufptr, void *, bufsize, M_TEMP, M_WAITOK);
b0d623f7
A
8124 if (bufptr == NULL) {
8125 return ENOMEM;
8126 }
2d21ac55 8127
b0d623f7 8128 auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
2d21ac55
A
8129 uio_addiov(auio, (uintptr_t)bufptr, bufsize);
8130 auio->uio_offset = uio->uio_offset;
8131
8132 error = VNOP_READDIR(vp, auio, 0, eofflag, numdirent, ctxp);
8133
8134 dep = (struct dirent *)bufptr;
8135 bytesread = bufsize - uio_resid(auio);
8136
15129b1c
A
8137 MALLOC(entry64, struct direntry *, sizeof(struct direntry),
8138 M_TEMP, M_WAITOK);
2d21ac55
A
8139 /*
8140 * Convert all the entries and copy them out to user's buffer.
8141 */
8142 while (error == 0 && (char *)dep < ((char *)bufptr + bytesread)) {
15129b1c
A
8143 size_t enbufsize = DIRENT64_LEN(dep->d_namlen);
8144
8145 bzero(entry64, enbufsize);
2d21ac55 8146 /* Convert a dirent to a dirent64. */
15129b1c
A
8147 entry64->d_ino = dep->d_ino;
8148 entry64->d_seekoff = 0;
8149 entry64->d_reclen = enbufsize;
8150 entry64->d_namlen = dep->d_namlen;
8151 entry64->d_type = dep->d_type;
8152 bcopy(dep->d_name, entry64->d_name, dep->d_namlen + 1);
2d21ac55
A
8153
8154 /* Move to next entry. */
8155 dep = (struct dirent *)((char *)dep + dep->d_reclen);
8156
8157 /* Copy entry64 to user's buffer. */
15129b1c 8158 error = uiomove((caddr_t)entry64, entry64->d_reclen, uio);
2d21ac55
A
8159 }
8160
8161 /* Update the real offset using the offset we got from VNOP_READDIR. */
8162 if (error == 0) {
8163 uio->uio_offset = auio->uio_offset;
8164 }
8165 uio_free(auio);
8166 FREE(bufptr, M_TEMP);
15129b1c 8167 FREE(entry64, M_TEMP);
2d21ac55
A
8168 return (error);
8169 }
8170}
1c79356b 8171
39236c6e
A
8172#define GETDIRENTRIES_MAXBUFSIZE (128 * 1024 * 1024U)
8173
1c79356b
A
8174/*
8175 * Read a block of directory entries in a file system independent format.
8176 */
2d21ac55
A
8177static int
8178getdirentries_common(int fd, user_addr_t bufp, user_size_t bufsize, ssize_t *bytesread,
8179 off_t *offset, int flags)
1c79356b 8180{
2d21ac55
A
8181 vnode_t vp;
8182 struct vfs_context context = *vfs_context_current(); /* local copy */
91447636
A
8183 struct fileproc *fp;
8184 uio_t auio;
2d21ac55
A
8185 int spacetype = proc_is64bit(vfs_context_proc(&context)) ? UIO_USERSPACE64 : UIO_USERSPACE32;
8186 off_t loff;
8187 int error, eofflag, numdirent;
91447636 8188 char uio_buf[ UIO_SIZEOF(1) ];
1c79356b 8189
2d21ac55
A
8190 error = fp_getfvp(vfs_context_proc(&context), fd, &fp, &vp);
8191 if (error) {
1c79356b 8192 return (error);
2d21ac55 8193 }
91447636
A
8194 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
8195 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
8196 error = EBADF;
8197 goto out;
8198 }
2d21ac55 8199
39236c6e
A
8200 if (bufsize > GETDIRENTRIES_MAXBUFSIZE)
8201 bufsize = GETDIRENTRIES_MAXBUFSIZE;
8202
2d21ac55
A
8203#if CONFIG_MACF
8204 error = mac_file_check_change_offset(vfs_context_ucred(&context), fp->f_fglob);
8205 if (error)
8206 goto out;
8207#endif
91447636
A
8208 if ( (error = vnode_getwithref(vp)) ) {
8209 goto out;
8210 }
91447636 8211 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
55e303ae 8212
1c79356b 8213unionread:
91447636
A
8214 if (vp->v_type != VDIR) {
8215 (void)vnode_put(vp);
8216 error = EINVAL;
8217 goto out;
8218 }
2d21ac55
A
8219
8220#if CONFIG_MACF
8221 error = mac_vnode_check_readdir(&context, vp);
8222 if (error != 0) {
8223 (void)vnode_put(vp);
8224 goto out;
8225 }
8226#endif /* MAC */
91447636
A
8227
8228 loff = fp->f_fglob->fg_offset;
2d21ac55
A
8229 auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
8230 uio_addiov(auio, bufp, bufsize);
91447636 8231
2d21ac55
A
8232 if (flags & VNODE_READDIR_EXTENDED) {
8233 error = vnode_readdir64(vp, auio, flags, &eofflag, &numdirent, &context);
8234 fp->f_fglob->fg_offset = uio_offset(auio);
8235 } else {
8236 error = VNOP_READDIR(vp, auio, 0, &eofflag, &numdirent, &context);
8237 fp->f_fglob->fg_offset = uio_offset(auio);
8238 }
91447636
A
8239 if (error) {
8240 (void)vnode_put(vp);
8241 goto out;
8242 }
1c79356b 8243
2d21ac55
A
8244 if ((user_ssize_t)bufsize == uio_resid(auio)){
8245 if (union_dircheckp) {
8246 error = union_dircheckp(&vp, fp, &context);
8247 if (error == -1)
8248 goto unionread;
8249 if (error)
8250 goto out;
1c79356b
A
8251 }
8252
39236c6e 8253 if ((vp->v_mount->mnt_flag & MNT_UNION)) {
2d21ac55 8254 struct vnode *tvp = vp;
39236c6e
A
8255 if (lookup_traverse_union(tvp, &vp, &context) == 0) {
8256 vnode_ref(vp);
8257 fp->f_fglob->fg_data = (caddr_t) vp;
8258 fp->f_fglob->fg_offset = 0;
8259 vnode_rele(tvp);
8260 vnode_put(tvp);
8261 goto unionread;
8262 }
8263 vp = tvp;
1c79356b
A
8264 }
8265 }
2d21ac55 8266
91447636 8267 vnode_put(vp);
2d21ac55
A
8268 if (offset) {
8269 *offset = loff;
8270 }
39037602 8271
2d21ac55 8272 *bytesread = bufsize - uio_resid(auio);
91447636
A
8273out:
8274 file_drop(fd);
1c79356b
A
8275 return (error);
8276}
8277
2d21ac55
A
8278
8279int
b0d623f7 8280getdirentries(__unused struct proc *p, struct getdirentries_args *uap, int32_t *retval)
2d21ac55
A
8281{
8282 off_t offset;
2d21ac55
A
8283 ssize_t bytesread;
8284 int error;
8285
8286 AUDIT_ARG(fd, uap->fd);
8287 error = getdirentries_common(uap->fd, uap->buf, uap->count, &bytesread, &offset, 0);
8288
8289 if (error == 0) {
b0d623f7
A
8290 if (proc_is64bit(p)) {
8291 user64_long_t base = (user64_long_t)offset;
8292 error = copyout((caddr_t)&base, uap->basep, sizeof(user64_long_t));
8293 } else {
8294 user32_long_t base = (user32_long_t)offset;
8295 error = copyout((caddr_t)&base, uap->basep, sizeof(user32_long_t));
8296 }
2d21ac55
A
8297 *retval = bytesread;
8298 }
8299 return (error);
8300}
8301
8302int
8303getdirentries64(__unused struct proc *p, struct getdirentries64_args *uap, user_ssize_t *retval)
8304{
8305 off_t offset;
8306 ssize_t bytesread;
8307 int error;
8308
8309 AUDIT_ARG(fd, uap->fd);
8310 error = getdirentries_common(uap->fd, uap->buf, uap->bufsize, &bytesread, &offset, VNODE_READDIR_EXTENDED);
8311
8312 if (error == 0) {
8313 *retval = bytesread;
8314 error = copyout((caddr_t)&offset, uap->position, sizeof(off_t));
8315 }
8316 return (error);
8317}
8318
8319
1c79356b
A
8320/*
8321 * Set the mode mask for creation of filesystem nodes.
b0d623f7 8322 * XXX implement xsecurity
1c79356b 8323 */
91447636
A
8324#define UMASK_NOXSECURITY (void *)1 /* leave existing xsecurity alone */
8325static int
b0d623f7 8326umask1(proc_t p, int newmask, __unused kauth_filesec_t fsec, int32_t *retval)
1c79356b 8327{
2d21ac55 8328 struct filedesc *fdp;
1c79356b 8329
91447636 8330 AUDIT_ARG(mask, newmask);
2d21ac55 8331 proc_fdlock(p);
1c79356b
A
8332 fdp = p->p_fd;
8333 *retval = fdp->fd_cmask;
91447636 8334 fdp->fd_cmask = newmask & ALLPERMS;
2d21ac55 8335 proc_fdunlock(p);
1c79356b
A
8336 return (0);
8337}
8338
b0d623f7
A
8339/*
8340 * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL).
8341 *
8342 * Parameters: p Process requesting to set the umask
8343 * uap User argument descriptor (see below)
8344 * retval umask of the process (parameter p)
8345 *
8346 * Indirect: uap->newmask umask to set
8347 * uap->xsecurity ACL to set
39037602 8348 *
b0d623f7
A
8349 * Returns: 0 Success
8350 * !0 Not success
8351 *
8352 */
8353int
8354umask_extended(proc_t p, struct umask_extended_args *uap, int32_t *retval)
91447636
A
8355{
8356 int ciferror;
8357 kauth_filesec_t xsecdst;
8358
8359 xsecdst = KAUTH_FILESEC_NONE;
8360 if (uap->xsecurity != USER_ADDR_NULL) {
8361 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
8362 return ciferror;
8363 } else {
8364 xsecdst = KAUTH_FILESEC_NONE;
8365 }
8366
8367 ciferror = umask1(p, uap->newmask, xsecdst, retval);
8368
8369 if (xsecdst != KAUTH_FILESEC_NONE)
8370 kauth_filesec_free(xsecdst);
8371 return ciferror;
8372}
8373
8374int
b0d623f7 8375umask(proc_t p, struct umask_args *uap, int32_t *retval)
91447636
A
8376{
8377 return(umask1(p, uap->newmask, UMASK_NOXSECURITY, retval));
8378}
8379
1c79356b
A
8380/*
8381 * Void all references to file by ripping underlying filesystem
8382 * away from vnode.
8383 */
1c79356b
A
8384/* ARGSUSED */
8385int
b0d623f7 8386revoke(proc_t p, struct revoke_args *uap, __unused int32_t *retval)
1c79356b 8387{
2d21ac55 8388 vnode_t vp;
91447636 8389 struct vnode_attr va;
2d21ac55 8390 vfs_context_t ctx = vfs_context_current();
1c79356b
A
8391 int error;
8392 struct nameidata nd;
8393
6d2010ae
A
8394 NDINIT(&nd, LOOKUP, OP_REVOKE, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
8395 uap->path, ctx);
55e303ae
A
8396 error = namei(&nd);
8397 if (error)
1c79356b
A
8398 return (error);
8399 vp = nd.ni_vp;
91447636
A
8400
8401 nameidone(&nd);
8402
b0d623f7
A
8403 if (!(vnode_ischr(vp) || vnode_isblk(vp))) {
8404 error = ENOTSUP;
8405 goto out;
8406 }
8407
8408 if (vnode_isblk(vp) && vnode_ismountedon(vp)) {
8409 error = EBUSY;
8410 goto out;
8411 }
8412
2d21ac55
A
8413#if CONFIG_MACF
8414 error = mac_vnode_check_revoke(ctx, vp);
8415 if (error)
8416 goto out;
8417#endif
8418
91447636
A
8419 VATTR_INIT(&va);
8420 VATTR_WANTED(&va, va_uid);
2d21ac55 8421 if ((error = vnode_getattr(vp, &va, ctx)))
1c79356b 8422 goto out;
2d21ac55
A
8423 if (kauth_cred_getuid(vfs_context_ucred(ctx)) != va.va_uid &&
8424 (error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
1c79356b 8425 goto out;
b0d623f7 8426 if (vp->v_usecount > 0 || (vnode_isaliased(vp)))
2d21ac55 8427 VNOP_REVOKE(vp, REVOKEALL, ctx);
1c79356b 8428out:
91447636 8429 vnode_put(vp);
1c79356b
A
8430 return (error);
8431}
8432
0b4e3aa0 8433
1c79356b
A
8434/*
8435 * HFS/HFS PlUS SPECIFIC SYSTEM CALLS
9bccf70c 8436 * The following system calls are designed to support features
1c79356b
A
8437 * which are specific to the HFS & HFS Plus volume formats
8438 */
8439
9bccf70c 8440
1c79356b 8441/*
39236c6e
A
8442 * Obtain attribute information on objects in a directory while enumerating
8443 * the directory.
8444 */
1c79356b
A
8445/* ARGSUSED */
8446int
b0d623f7 8447getdirentriesattr (proc_t p, struct getdirentriesattr_args *uap, int32_t *retval)
1c79356b 8448{
2d21ac55 8449 vnode_t vp;
91447636
A
8450 struct fileproc *fp;
8451 uio_t auio = NULL;
8452 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
39236c6e 8453 uint32_t count, savecount;
2d21ac55 8454 uint32_t newstate;
91447636 8455 int error, eofflag;
2d21ac55 8456 uint32_t loff;
39037602 8457 struct attrlist attributelist;
2d21ac55 8458 vfs_context_t ctx = vfs_context_current();
91447636
A
8459 int fd = uap->fd;
8460 char uio_buf[ UIO_SIZEOF(1) ];
8461 kauth_action_t action;
8462
8463 AUDIT_ARG(fd, fd);
39037602 8464
91447636 8465 /* Get the attributes into kernel space */
2d21ac55 8466 if ((error = copyin(uap->alist, (caddr_t)&attributelist, sizeof(attributelist)))) {
91447636 8467 return(error);
2d21ac55
A
8468 }
8469 if ((error = copyin(uap->count, (caddr_t)&count, sizeof(count)))) {
8470 return(error);
8471 }
39236c6e 8472 savecount = count;
2d21ac55 8473 if ( (error = fp_getfvp(p, fd, &fp, &vp)) ) {
91447636 8474 return (error);
2d21ac55 8475 }
91447636
A
8476 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
8477 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
8478 error = EBADF;
8479 goto out;
8480 }
2d21ac55
A
8481
8482
8483#if CONFIG_MACF
8484 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
8485 fp->f_fglob);
8486 if (error)
8487 goto out;
8488#endif
8489
8490
91447636
A
8491 if ( (error = vnode_getwithref(vp)) )
8492 goto out;
55e303ae 8493
91447636 8494 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
1c79356b 8495
39236c6e 8496unionread:
91447636
A
8497 if (vp->v_type != VDIR) {
8498 (void)vnode_put(vp);
8499 error = EINVAL;
8500 goto out;
8501 }
55e303ae 8502
2d21ac55
A
8503#if CONFIG_MACF
8504 error = mac_vnode_check_readdir(ctx, vp);
8505 if (error != 0) {
8506 (void)vnode_put(vp);
8507 goto out;
8508 }
8509#endif /* MAC */
8510
91447636
A
8511 /* set up the uio structure which will contain the users return buffer */
8512 loff = fp->f_fglob->fg_offset;
39236c6e 8513 auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
91447636 8514 uio_addiov(auio, uap->buffer, uap->buffersize);
39037602 8515
91447636
A
8516 /*
8517 * If the only item requested is file names, we can let that past with
8518 * just LIST_DIRECTORY. If they want any other attributes, that means
8519 * they need SEARCH as well.
8520 */
8521 action = KAUTH_VNODE_LIST_DIRECTORY;
8522 if ((attributelist.commonattr & ~ATTR_CMN_NAME) ||
8523 attributelist.fileattr || attributelist.dirattr)
8524 action |= KAUTH_VNODE_SEARCH;
39037602 8525
2d21ac55 8526 if ((error = vnode_authorize(vp, NULL, action, ctx)) == 0) {
2d21ac55 8527
b0d623f7
A
8528 /* Believe it or not, uap->options only has 32-bits of valid
8529 * info, so truncate before extending again */
39236c6e
A
8530
8531 error = VNOP_READDIRATTR(vp, &attributelist, auio, count,
8532 (u_long)(uint32_t)uap->options, &newstate, &eofflag, &count, ctx);
8533 }
8534
8535 if (error) {
8536 (void) vnode_put(vp);
8537 goto out;
8538 }
8539
8540 /*
8541 * If we've got the last entry of a directory in a union mount
8542 * then reset the eofflag and pretend there's still more to come.
8543 * The next call will again set eofflag and the buffer will be empty,
8544 * so traverse to the underlying directory and do the directory
8545 * read there.
8546 */
8547 if (eofflag && vp->v_mount->mnt_flag & MNT_UNION) {
8548 if (uio_resid(auio) < (user_ssize_t) uap->buffersize) { // Got some entries
8549 eofflag = 0;
8550 } else { // Empty buffer
8551 struct vnode *tvp = vp;
8552 if (lookup_traverse_union(tvp, &vp, ctx) == 0) {
8553 vnode_ref_ext(vp, fp->f_fglob->fg_flag & O_EVTONLY, 0);
8554 fp->f_fglob->fg_data = (caddr_t) vp;
8555 fp->f_fglob->fg_offset = 0; // reset index for new dir
8556 count = savecount;
8557 vnode_rele_internal(tvp, fp->f_fglob->fg_flag & O_EVTONLY, 0, 0);
8558 vnode_put(tvp);
8559 goto unionread;
8560 }
8561 vp = tvp;
8562 }
2d21ac55 8563 }
39236c6e 8564
91447636 8565 (void)vnode_put(vp);
1c79356b 8566
39037602 8567 if (error)
91447636
A
8568 goto out;
8569 fp->f_fglob->fg_offset = uio_offset(auio); /* should be multiple of dirent, not variable */
1c79356b 8570
2d21ac55 8571 if ((error = copyout((caddr_t) &count, uap->count, sizeof(count))))
91447636 8572 goto out;
2d21ac55 8573 if ((error = copyout((caddr_t) &newstate, uap->newstate, sizeof(newstate))))
91447636 8574 goto out;
2d21ac55 8575 if ((error = copyout((caddr_t) &loff, uap->basep, sizeof(loff))))
91447636 8576 goto out;
1c79356b
A
8577
8578 *retval = eofflag; /* similar to getdirentries */
91447636 8579 error = 0;
2d21ac55 8580out:
91447636
A
8581 file_drop(fd);
8582 return (error); /* return error earlier, an retval of 0 or 1 now */
1c79356b 8583
39236c6e 8584} /* end of getdirentriesattr system call */
1c79356b
A
8585
8586/*
8587* Exchange data between two files
8588*/
8589
1c79356b
A
8590/* ARGSUSED */
8591int
b0d623f7 8592exchangedata (__unused proc_t p, struct exchangedata_args *uap, __unused int32_t *retval)
1c79356b
A
8593{
8594
8595 struct nameidata fnd, snd;
2d21ac55
A
8596 vfs_context_t ctx = vfs_context_current();
8597 vnode_t fvp;
8598 vnode_t svp;
8599 int error;
b0d623f7 8600 u_int32_t nameiflags;
91447636
A
8601 char *fpath = NULL;
8602 char *spath = NULL;
b0d623f7
A
8603 int flen=0, slen=0;
8604 int from_truncated=0, to_truncated=0;
8605#if CONFIG_FSE
91447636 8606 fse_info f_finfo, s_finfo;
b0d623f7 8607#endif
39037602 8608
1c79356b
A
8609 nameiflags = 0;
8610 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
8611
6d2010ae
A
8612 NDINIT(&fnd, LOOKUP, OP_EXCHANGEDATA, nameiflags | AUDITVNPATH1,
8613 UIO_USERSPACE, uap->path1, ctx);
1c79356b 8614
6d2010ae
A
8615 error = namei(&fnd);
8616 if (error)
8617 goto out2;
1c79356b 8618
91447636
A
8619 nameidone(&fnd);
8620 fvp = fnd.ni_vp;
1c79356b 8621
39037602 8622 NDINIT(&snd, LOOKUP, OP_EXCHANGEDATA, CN_NBMOUNTLOOK | nameiflags | AUDITVNPATH2,
6d2010ae 8623 UIO_USERSPACE, uap->path2, ctx);
1c79356b 8624
6d2010ae
A
8625 error = namei(&snd);
8626 if (error) {
91447636 8627 vnode_put(fvp);
55e303ae 8628 goto out2;
6d2010ae 8629 }
91447636 8630 nameidone(&snd);
1c79356b
A
8631 svp = snd.ni_vp;
8632
91447636
A
8633 /*
8634 * if the files are the same, return an inval error
8635 */
1c79356b 8636 if (svp == fvp) {
91447636
A
8637 error = EINVAL;
8638 goto out;
39037602 8639 }
1c79356b 8640
91447636
A
8641 /*
8642 * if the files are on different volumes, return an error
8643 */
8644 if (svp->v_mount != fvp->v_mount) {
8645 error = EXDEV;
8646 goto out;
8647 }
2d21ac55 8648
39236c6e
A
8649 /* If they're not files, return an error */
8650 if ( (vnode_isreg(fvp) == 0) || (vnode_isreg(svp) == 0)) {
db609669
A
8651 error = EINVAL;
8652 goto out;
8653 }
8654
2d21ac55
A
8655#if CONFIG_MACF
8656 error = mac_vnode_check_exchangedata(ctx,
8657 fvp, svp);
8658 if (error)
8659 goto out;
8660#endif
8661 if (((error = vnode_authorize(fvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0) ||
8662 ((error = vnode_authorize(svp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0))
91447636 8663 goto out;
1c79356b 8664
2d21ac55
A
8665 if (
8666#if CONFIG_FSE
39037602 8667 need_fsevent(FSE_EXCHANGE, fvp) ||
2d21ac55
A
8668#endif
8669 kauth_authorize_fileop_has_listeners()) {
8670 GET_PATH(fpath);
8671 GET_PATH(spath);
8672 if (fpath == NULL || spath == NULL) {
8673 error = ENOMEM;
8674 goto out;
8675 }
b0d623f7
A
8676
8677 flen = safe_getpath(fvp, NULL, fpath, MAXPATHLEN, &from_truncated);
8678 slen = safe_getpath(svp, NULL, spath, MAXPATHLEN, &to_truncated);
39037602 8679
2d21ac55
A
8680#if CONFIG_FSE
8681 get_fse_info(fvp, &f_finfo, ctx);
8682 get_fse_info(svp, &s_finfo, ctx);
b0d623f7
A
8683 if (from_truncated || to_truncated) {
8684 // set it here since only the f_finfo gets reported up to user space
8685 f_finfo.mode |= FSE_TRUNCATED_PATH;
8686 }
2d21ac55 8687#endif
91447636 8688 }
1c79356b 8689 /* Ok, make the call */
2d21ac55 8690 error = VNOP_EXCHANGE(fvp, svp, 0, ctx);
55e303ae 8691
91447636 8692 if (error == 0) {
2d21ac55 8693 const char *tmpname;
91447636
A
8694
8695 if (fpath != NULL && spath != NULL) {
39037602 8696 /* call out to allow 3rd party notification of exchangedata.
91447636
A
8697 * Ignore result of kauth_authorize_fileop call.
8698 */
39037602 8699 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_EXCHANGE,
91447636
A
8700 (uintptr_t)fpath, (uintptr_t)spath);
8701 }
8702 name_cache_lock();
8703
8704 tmpname = fvp->v_name;
8705 fvp->v_name = svp->v_name;
8706 svp->v_name = tmpname;
39037602 8707
91447636 8708 if (fvp->v_parent != svp->v_parent) {
2d21ac55 8709 vnode_t tmp;
91447636
A
8710
8711 tmp = fvp->v_parent;
8712 fvp->v_parent = svp->v_parent;
8713 svp->v_parent = tmp;
8714 }
8715 name_cache_unlock();
8716
2d21ac55 8717#if CONFIG_FSE
91447636 8718 if (fpath != NULL && spath != NULL) {
2d21ac55 8719 add_fsevent(FSE_EXCHANGE, ctx,
91447636
A
8720 FSE_ARG_STRING, flen, fpath,
8721 FSE_ARG_FINFO, &f_finfo,
8722 FSE_ARG_STRING, slen, spath,
8723 FSE_ARG_FINFO, &s_finfo,
8724 FSE_ARG_DONE);
8725 }
2d21ac55 8726#endif
55e303ae
A
8727 }
8728
1c79356b 8729out:
2d21ac55
A
8730 if (fpath != NULL)
8731 RELEASE_PATH(fpath);
8732 if (spath != NULL)
8733 RELEASE_PATH(spath);
91447636
A
8734 vnode_put(svp);
8735 vnode_put(fvp);
1c79356b 8736out2:
1c79356b 8737 return (error);
91447636 8738}
1c79356b 8739
39236c6e
A
8740/*
8741 * Return (in MB) the amount of freespace on the given vnode's volume.
8742 */
8743uint32_t freespace_mb(vnode_t vp);
8744
8745uint32_t
8746freespace_mb(vnode_t vp)
8747{
39037602 8748 vfs_update_vfsstat(vp->v_mount, vfs_context_current(), VFS_USER_EVENT);
39236c6e
A
8749 return (((uint64_t)vp->v_mount->mnt_vfsstat.f_bavail *
8750 vp->v_mount->mnt_vfsstat.f_bsize) >> 20);
8751}
8752
316670eb 8753#if CONFIG_SEARCHFS
1c79356b 8754
1c79356b
A
8755/* ARGSUSED */
8756
8757int
b0d623f7 8758searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval)
1c79356b 8759{
39236c6e
A
8760 vnode_t vp, tvp;
8761 int i, error=0;
1c79356b
A
8762 int fserror = 0;
8763 struct nameidata nd;
b0d623f7 8764 struct user64_fssearchblock searchblock;
1c79356b
A
8765 struct searchstate *state;
8766 struct attrlist *returnattrs;
b0d623f7 8767 struct timeval timelimit;
1c79356b 8768 void *searchparams1,*searchparams2;
91447636
A
8769 uio_t auio = NULL;
8770 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
b0d623f7 8771 uint32_t nummatches;
1c79356b 8772 int mallocsize;
b0d623f7 8773 uint32_t nameiflags;
2d21ac55 8774 vfs_context_t ctx = vfs_context_current();
91447636 8775 char uio_buf[ UIO_SIZEOF(1) ];
1c79356b 8776
39236c6e 8777 /* Start by copying in fsearchblock parameter list */
91447636 8778 if (IS_64BIT_PROCESS(p)) {
b0d623f7
A
8779 error = copyin(uap->searchblock, (caddr_t) &searchblock, sizeof(searchblock));
8780 timelimit.tv_sec = searchblock.timelimit.tv_sec;
8781 timelimit.tv_usec = searchblock.timelimit.tv_usec;
91447636
A
8782 }
8783 else {
b0d623f7
A
8784 struct user32_fssearchblock tmp_searchblock;
8785
91447636
A
8786 error = copyin(uap->searchblock, (caddr_t) &tmp_searchblock, sizeof(tmp_searchblock));
8787 // munge into 64-bit version
8788 searchblock.returnattrs = CAST_USER_ADDR_T(tmp_searchblock.returnattrs);
8789 searchblock.returnbuffer = CAST_USER_ADDR_T(tmp_searchblock.returnbuffer);
8790 searchblock.returnbuffersize = tmp_searchblock.returnbuffersize;
8791 searchblock.maxmatches = tmp_searchblock.maxmatches;
39037602 8792 /*
b0d623f7
A
8793 * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary
8794 * from a 32 bit long, and tv_usec is already a signed 32 bit int.
8795 */
8796 timelimit.tv_sec = (__darwin_time_t) tmp_searchblock.timelimit.tv_sec;
8797 timelimit.tv_usec = (__darwin_useconds_t) tmp_searchblock.timelimit.tv_usec;
91447636
A
8798 searchblock.searchparams1 = CAST_USER_ADDR_T(tmp_searchblock.searchparams1);
8799 searchblock.sizeofsearchparams1 = tmp_searchblock.sizeofsearchparams1;
8800 searchblock.searchparams2 = CAST_USER_ADDR_T(tmp_searchblock.searchparams2);
8801 searchblock.sizeofsearchparams2 = tmp_searchblock.sizeofsearchparams2;
8802 searchblock.searchattrs = tmp_searchblock.searchattrs;
8803 }
8804 if (error)
1c79356b
A
8805 return(error);
8806
39037602 8807 /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
a3d08fcd 8808 */
39037602 8809 if (searchblock.sizeofsearchparams1 > SEARCHFS_MAX_SEARCHPARMS ||
a3d08fcd
A
8810 searchblock.sizeofsearchparams2 > SEARCHFS_MAX_SEARCHPARMS)
8811 return(EINVAL);
39037602 8812
1c79356b
A
8813 /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
8814 /* It all has to do into local memory and it's not that big so we might as well put it all together. */
8815 /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
8816 /* block. */
fe8ab488
A
8817 /* */
8818 /* NOTE: we allocate an extra 8 bytes to account for the difference in size of the searchstate */
8819 /* due to the changes in rdar://problem/12438273. That way if a 3rd party file system */
8820 /* assumes the size is still 556 bytes it will continue to work */
39037602 8821
91447636 8822 mallocsize = searchblock.sizeofsearchparams1 + searchblock.sizeofsearchparams2 +
fe8ab488 8823 sizeof(struct attrlist) + sizeof(struct searchstate) + (2*sizeof(uint32_t));
1c79356b
A
8824
8825 MALLOC(searchparams1, void *, mallocsize, M_TEMP, M_WAITOK);
8826
8827 /* Now set up the various pointers to the correct place in our newly allocated memory */
8828
8829 searchparams2 = (void *) (((caddr_t) searchparams1) + searchblock.sizeofsearchparams1);
8830 returnattrs = (struct attrlist *) (((caddr_t) searchparams2) + searchblock.sizeofsearchparams2);
8831 state = (struct searchstate *) (((caddr_t) returnattrs) + sizeof (struct attrlist));
8832
8833 /* Now copy in the stuff given our local variables. */
8834
91447636 8835 if ((error = copyin(searchblock.searchparams1, searchparams1, searchblock.sizeofsearchparams1)))
1c79356b
A
8836 goto freeandexit;
8837
91447636 8838 if ((error = copyin(searchblock.searchparams2, searchparams2, searchblock.sizeofsearchparams2)))
1c79356b
A
8839 goto freeandexit;
8840
91447636 8841 if ((error = copyin(searchblock.returnattrs, (caddr_t) returnattrs, sizeof(struct attrlist))))
1c79356b 8842 goto freeandexit;
39037602 8843
91447636 8844 if ((error = copyin(uap->state, (caddr_t) state, sizeof(struct searchstate))))
1c79356b 8845 goto freeandexit;
1c79356b 8846
39236c6e
A
8847 /*
8848 * When searching a union mount, need to set the
8849 * start flag at the first call on each layer to
8850 * reset state for the new volume.
8851 */
8852 if (uap->options & SRCHFS_START)
8853 state->ss_union_layer = 0;
39037602 8854 else
39236c6e
A
8855 uap->options |= state->ss_union_flags;
8856 state->ss_union_flags = 0;
b0d623f7
A
8857
8858 /*
8859 * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter,
8860 * which is passed in with an attrreference_t, we need to inspect the buffer manually here.
39037602
A
8861 * The KPI does not provide us the ability to pass in the length of the buffers searchparams1
8862 * and searchparams2. To obviate the need for all searchfs-supporting filesystems to
b0d623f7
A
8863 * validate the user-supplied data offset of the attrreference_t, we'll do it here.
8864 */
8865
8866 if (searchblock.searchattrs.commonattr & ATTR_CMN_NAME) {
8867 attrreference_t* string_ref;
8868 u_int32_t* start_length;
39037602 8869 user64_size_t param_length;
b0d623f7
A
8870
8871 /* validate searchparams1 */
39037602 8872 param_length = searchblock.sizeofsearchparams1;
b0d623f7
A
8873 /* skip the word that specifies length of the buffer */
8874 start_length= (u_int32_t*) searchparams1;
8875 start_length= start_length+1;
8876 string_ref= (attrreference_t*) start_length;
8877
8878 /* ensure no negative offsets or too big offsets */
8879 if (string_ref->attr_dataoffset < 0 ) {
8880 error = EINVAL;
39037602 8881 goto freeandexit;
b0d623f7
A
8882 }
8883 if (string_ref->attr_length > MAXPATHLEN) {
8884 error = EINVAL;
8885 goto freeandexit;
8886 }
39037602 8887
b0d623f7
A
8888 /* Check for pointer overflow in the string ref */
8889 if (((char*) string_ref + string_ref->attr_dataoffset) < (char*) string_ref) {
8890 error = EINVAL;
8891 goto freeandexit;
8892 }
8893
8894 if (((char*) string_ref + string_ref->attr_dataoffset) > ((char*)searchparams1 + param_length)) {
8895 error = EINVAL;
8896 goto freeandexit;
8897 }
8898 if (((char*)string_ref + string_ref->attr_dataoffset + string_ref->attr_length) > ((char*)searchparams1 + param_length)) {
8899 error = EINVAL;
8900 goto freeandexit;
8901 }
8902 }
8903
8904 /* set up the uio structure which will contain the users return buffer */
39236c6e
A
8905 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
8906 uio_addiov(auio, searchblock.returnbuffer, searchblock.returnbuffersize);
1c79356b 8907
91447636 8908 nameiflags = 0;
1c79356b 8909 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
6d2010ae
A
8910 NDINIT(&nd, LOOKUP, OP_SEARCHFS, nameiflags | AUDITVNPATH1,
8911 UIO_USERSPACE, uap->path, ctx);
1c79356b 8912
55e303ae
A
8913 error = namei(&nd);
8914 if (error)
1c79356b 8915 goto freeandexit;
39236c6e 8916 vp = nd.ni_vp;
91447636 8917 nameidone(&nd);
39236c6e
A
8918
8919 /*
8920 * Switch to the root vnode for the volume
8921 */
8922 error = VFS_ROOT(vnode_mount(vp), &tvp, ctx);
fe8ab488 8923 vnode_put(vp);
39236c6e
A
8924 if (error)
8925 goto freeandexit;
39236c6e
A
8926 vp = tvp;
8927
8928 /*
8929 * If it's a union mount, the path lookup takes
8930 * us to the top layer. But we may need to descend
8931 * to a lower layer. For non-union mounts the layer
8932 * is always zero.
8933 */
8934 for (i = 0; i < (int) state->ss_union_layer; i++) {
8935 if ((vp->v_mount->mnt_flag & MNT_UNION) == 0)
8936 break;
8937 tvp = vp;
8938 vp = vp->v_mount->mnt_vnodecovered;
8939 if (vp == NULL) {
fe8ab488 8940 vnode_put(tvp);
39236c6e
A
8941 error = ENOENT;
8942 goto freeandexit;
8943 }
8944 vnode_getwithref(vp);
8945 vnode_put(tvp);
8946 }
1c79356b 8947
6d2010ae
A
8948#if CONFIG_MACF
8949 error = mac_vnode_check_searchfs(ctx, vp, &searchblock.searchattrs);
8950 if (error) {
8951 vnode_put(vp);
8952 goto freeandexit;
8953 }
8954#endif
8955
39037602 8956
1c79356b 8957 /*
39037602 8958 * If searchblock.maxmatches == 0, then skip the search. This has happened
39236c6e 8959 * before and sometimes the underlying code doesnt deal with it well.
1c79356b
A
8960 */
8961 if (searchblock.maxmatches == 0) {
8962 nummatches = 0;
8963 goto saveandexit;
8964 }
8965
8966 /*
39236c6e 8967 * Allright, we have everything we need, so lets make that call.
39037602 8968 *
39236c6e
A
8969 * We keep special track of the return value from the file system:
8970 * EAGAIN is an acceptable error condition that shouldn't keep us
8971 * from copying out any results...
1c79356b
A
8972 */
8973
6d2010ae 8974 fserror = VNOP_SEARCHFS(vp,
39236c6e
A
8975 searchparams1,
8976 searchparams2,
8977 &searchblock.searchattrs,
8978 (u_long)searchblock.maxmatches,
8979 &timelimit,
8980 returnattrs,
8981 &nummatches,
8982 (u_long)uap->scriptcode,
8983 (u_long)uap->options,
8984 auio,
8985 (struct searchstate *) &state->ss_fsstate,
8986 ctx);
39037602 8987
39236c6e
A
8988 /*
8989 * If it's a union mount we need to be called again
8990 * to search the mounted-on filesystem.
8991 */
8992 if ((vp->v_mount->mnt_flag & MNT_UNION) && fserror == 0) {
8993 state->ss_union_flags = SRCHFS_START;
8994 state->ss_union_layer++; // search next layer down
8995 fserror = EAGAIN;
8996 }
8997
6d2010ae
A
8998saveandexit:
8999
9000 vnode_put(vp);
9001
9002 /* Now copy out the stuff that needs copying out. That means the number of matches, the
9003 search state. Everything was already put into he return buffer by the vop call. */
9004
9005 if ((error = copyout((caddr_t) state, uap->state, sizeof(struct searchstate))) != 0)
9006 goto freeandexit;
9007
39236c6e 9008 if ((error = suulong(uap->nummatches, (uint64_t)nummatches)) != 0)
6d2010ae 9009 goto freeandexit;
39037602 9010
6d2010ae
A
9011 error = fserror;
9012
9013freeandexit:
9014
9015 FREE(searchparams1,M_TEMP);
9016
9017 return(error);
9018
9019
9020} /* end of searchfs system call */
9021
316670eb
A
9022#else /* CONFIG_SEARCHFS */
9023
9024int
9025searchfs(__unused proc_t p, __unused struct searchfs_args *uap, __unused int32_t *retval)
9026{
9027 return (ENOTSUP);
9028}
9029
9030#endif /* CONFIG_SEARCHFS */
6d2010ae
A
9031
9032
9033lck_grp_attr_t * nspace_group_attr;
9034lck_attr_t * nspace_lock_attr;
9035lck_grp_t * nspace_mutex_group;
9036
9037lck_mtx_t nspace_handler_lock;
9038lck_mtx_t nspace_handler_exclusion_lock;
9039
9040time_t snapshot_timestamp=0;
9041int nspace_allow_virtual_devs=0;
9042
9043void nspace_handler_init(void);
9044
9045typedef struct nspace_item_info {
9046 struct vnode *vp;
9047 void *arg;
9048 uint64_t op;
9049 uint32_t vid;
9050 uint32_t flags;
9051 uint32_t token;
9052 uint32_t refcount;
9053} nspace_item_info;
9054
9055#define MAX_NSPACE_ITEMS 128
9056nspace_item_info nspace_items[MAX_NSPACE_ITEMS];
9057uint32_t nspace_item_idx=0; // also used as the sleep/wakeup rendezvous address
9058uint32_t nspace_token_id=0;
9059uint32_t nspace_handler_timeout = 15; // seconds
9060
9061#define NSPACE_ITEM_NEW 0x0001
9062#define NSPACE_ITEM_PROCESSING 0x0002
9063#define NSPACE_ITEM_DEAD 0x0004
9064#define NSPACE_ITEM_CANCELLED 0x0008
9065#define NSPACE_ITEM_DONE 0x0010
9066#define NSPACE_ITEM_RESET_TIMER 0x0020
9067
9068#define NSPACE_ITEM_NSPACE_EVENT 0x0040
9069#define NSPACE_ITEM_SNAPSHOT_EVENT 0x0080
6d2010ae 9070
fe8ab488 9071#define NSPACE_ITEM_ALL_EVENT_TYPES (NSPACE_ITEM_NSPACE_EVENT | NSPACE_ITEM_SNAPSHOT_EVENT)
6d2010ae
A
9072
9073//#pragma optimization_level 0
9074
9075typedef enum {
9076 NSPACE_HANDLER_NSPACE = 0,
9077 NSPACE_HANDLER_SNAPSHOT = 1,
6d2010ae
A
9078
9079 NSPACE_HANDLER_COUNT,
9080} nspace_type_t;
9081
9082typedef struct {
9083 uint64_t handler_tid;
9084 struct proc *handler_proc;
9085 int handler_busy;
9086} nspace_handler_t;
9087
9088nspace_handler_t nspace_handlers[NSPACE_HANDLER_COUNT];
9089
39236c6e
A
9090/* namespace fsctl functions */
9091static int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type);
9092static int nspace_item_flags_for_type(nspace_type_t nspace_type);
9093static int nspace_open_flags_for_type(nspace_type_t nspace_type);
9094static nspace_type_t nspace_type_for_op(uint64_t op);
9095static int nspace_is_special_process(struct proc *proc);
9096static int vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx);
9097static int wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type);
9098static int validate_namespace_args (int is64bit, int size);
9099static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data);
9100
9101
6d2010ae
A
9102static inline int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type)
9103{
9104 switch(nspace_type) {
9105 case NSPACE_HANDLER_NSPACE:
9106 return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_NSPACE_EVENT;
9107 case NSPACE_HANDLER_SNAPSHOT:
9108 return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_SNAPSHOT_EVENT;
6d2010ae
A
9109 default:
9110 printf("nspace_flags_matches_handler: invalid type %u\n", (int)nspace_type);
9111 return 0;
9112 }
9113}
9114
9115static inline int nspace_item_flags_for_type(nspace_type_t nspace_type)
9116{
9117 switch(nspace_type) {
9118 case NSPACE_HANDLER_NSPACE:
9119 return NSPACE_ITEM_NSPACE_EVENT;
9120 case NSPACE_HANDLER_SNAPSHOT:
9121 return NSPACE_ITEM_SNAPSHOT_EVENT;
6d2010ae
A
9122 default:
9123 printf("nspace_item_flags_for_type: invalid type %u\n", (int)nspace_type);
9124 return 0;
9125 }
9126}
9127
9128static inline int nspace_open_flags_for_type(nspace_type_t nspace_type)
9129{
9130 switch(nspace_type) {
9131 case NSPACE_HANDLER_NSPACE:
9132 return FREAD | FWRITE | O_EVTONLY;
9133 case NSPACE_HANDLER_SNAPSHOT:
6d2010ae
A
9134 return FREAD | O_EVTONLY;
9135 default:
9136 printf("nspace_open_flags_for_type: invalid type %u\n", (int)nspace_type);
9137 return 0;
9138 }
9139}
9140
9141static inline nspace_type_t nspace_type_for_op(uint64_t op)
9142{
9143 switch(op & NAMESPACE_HANDLER_EVENT_TYPE_MASK) {
9144 case NAMESPACE_HANDLER_NSPACE_EVENT:
9145 return NSPACE_HANDLER_NSPACE;
9146 case NAMESPACE_HANDLER_SNAPSHOT_EVENT:
9147 return NSPACE_HANDLER_SNAPSHOT;
6d2010ae
A
9148 default:
9149 printf("nspace_type_for_op: invalid op mask %llx\n", op & NAMESPACE_HANDLER_EVENT_TYPE_MASK);
9150 return NSPACE_HANDLER_NSPACE;
9151 }
9152}
9153
9154static inline int nspace_is_special_process(struct proc *proc)
9155{
9156 int i;
9157 for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
9158 if (proc == nspace_handlers[i].handler_proc)
9159 return 1;
9160 }
9161 return 0;
9162}
9163
9164void
9165nspace_handler_init(void)
9166{
9167 nspace_lock_attr = lck_attr_alloc_init();
9168 nspace_group_attr = lck_grp_attr_alloc_init();
9169 nspace_mutex_group = lck_grp_alloc_init("nspace-mutex", nspace_group_attr);
9170 lck_mtx_init(&nspace_handler_lock, nspace_mutex_group, nspace_lock_attr);
9171 lck_mtx_init(&nspace_handler_exclusion_lock, nspace_mutex_group, nspace_lock_attr);
9172 memset(&nspace_items[0], 0, sizeof(nspace_items));
9173}
9174
9175void
9176nspace_proc_exit(struct proc *p)
9177{
9178 int i, event_mask = 0;
39037602 9179
6d2010ae
A
9180 for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
9181 if (p == nspace_handlers[i].handler_proc) {
9182 event_mask |= nspace_item_flags_for_type(i);
9183 nspace_handlers[i].handler_tid = 0;
9184 nspace_handlers[i].handler_proc = NULL;
9185 }
9186 }
9187
9188 if (event_mask == 0) {
9189 return;
9190 }
39037602
A
9191
9192 lck_mtx_lock(&nspace_handler_lock);
6d2010ae
A
9193 if (event_mask & NSPACE_ITEM_SNAPSHOT_EVENT) {
9194 // if this process was the snapshot handler, zero snapshot_timeout
9195 snapshot_timestamp = 0;
9196 }
39037602 9197
6d2010ae
A
9198 //
9199 // unblock anyone that's waiting for the handler that died
9200 //
6d2010ae
A
9201 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9202 if (nspace_items[i].flags & (NSPACE_ITEM_NEW | NSPACE_ITEM_PROCESSING)) {
9203
9204 if ( nspace_items[i].flags & event_mask ) {
9205
9206 if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
9207 vnode_lock_spin(nspace_items[i].vp);
9208 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
9209 vnode_unlock(nspace_items[i].vp);
9210 }
9211 nspace_items[i].vp = NULL;
9212 nspace_items[i].vid = 0;
9213 nspace_items[i].flags = NSPACE_ITEM_DONE;
9214 nspace_items[i].token = 0;
39037602 9215
6d2010ae
A
9216 wakeup((caddr_t)&(nspace_items[i].vp));
9217 }
9218 }
9219 }
39037602 9220
6d2010ae
A
9221 wakeup((caddr_t)&nspace_item_idx);
9222 lck_mtx_unlock(&nspace_handler_lock);
9223}
9224
9225
39037602 9226int
6d2010ae
A
9227resolve_nspace_item(struct vnode *vp, uint64_t op)
9228{
9229 return resolve_nspace_item_ext(vp, op, NULL);
9230}
9231
39037602 9232int
6d2010ae
A
9233resolve_nspace_item_ext(struct vnode *vp, uint64_t op, void *arg)
9234{
9235 int i, error, keep_waiting;
9236 struct timespec ts;
9237 nspace_type_t nspace_type = nspace_type_for_op(op);
9238
9239 // only allow namespace events on regular files, directories and symlinks.
9240 if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
9241 return 0;
9242 }
9243
9244 //
9245 // if this is a snapshot event and the vnode is on a
9246 // disk image just pretend nothing happened since any
9247 // change to the disk image will cause the disk image
9248 // itself to get backed up and this avoids multi-way
9249 // deadlocks between the snapshot handler and the ever
9250 // popular diskimages-helper process. the variable
9251 // nspace_allow_virtual_devs allows this behavior to
9252 // be overridden (for use by the Mobile TimeMachine
9253 // testing infrastructure which uses disk images)
9254 //
9255 if ( (op & NAMESPACE_HANDLER_SNAPSHOT_EVENT)
9256 && (vp->v_mount != NULL)
9257 && (vp->v_mount->mnt_kern_flag & MNTK_VIRTUALDEV)
9258 && !nspace_allow_virtual_devs) {
9259
9260 return 0;
9261 }
9262
9263 // if (thread_tid(current_thread()) == namespace_handler_tid) {
9264 if (nspace_handlers[nspace_type].handler_proc == NULL) {
9265 return 0;
9266 }
9267
9268 if (nspace_is_special_process(current_proc())) {
9269 return EDEADLK;
9270 }
9271
9272 lck_mtx_lock(&nspace_handler_lock);
9273
9274retry:
9275 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9276 if (vp == nspace_items[i].vp && op == nspace_items[i].op) {
9277 break;
9278 }
9279 }
9280
9281 if (i >= MAX_NSPACE_ITEMS) {
9282 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9283 if (nspace_items[i].flags == 0) {
9284 break;
9285 }
9286 }
9287 } else {
9288 nspace_items[i].refcount++;
9289 }
39037602 9290
6d2010ae
A
9291 if (i >= MAX_NSPACE_ITEMS) {
9292 ts.tv_sec = nspace_handler_timeout;
9293 ts.tv_nsec = 0;
9294
9295 error = msleep((caddr_t)&nspace_token_id, &nspace_handler_lock, PVFS|PCATCH, "nspace-no-space", &ts);
9296 if (error == 0) {
9297 // an entry got free'd up, go see if we can get a slot
9298 goto retry;
9299 } else {
9300 lck_mtx_unlock(&nspace_handler_lock);
9301 return error;
9302 }
9303 }
9304
9305 //
9306 // if it didn't already exist, add it. if it did exist
9307 // we'll get woken up when someone does a wakeup() on
9308 // the slot in the nspace_items table.
9309 //
9310 if (vp != nspace_items[i].vp) {
9311 nspace_items[i].vp = vp;
39236c6e 9312 nspace_items[i].arg = (arg == NSPACE_REARM_NO_ARG) ? NULL : arg; // arg is {NULL, true, uio *} - only pass uio thru to the user
6d2010ae
A
9313 nspace_items[i].op = op;
9314 nspace_items[i].vid = vnode_vid(vp);
9315 nspace_items[i].flags = NSPACE_ITEM_NEW;
9316 nspace_items[i].flags |= nspace_item_flags_for_type(nspace_type);
9317 if (nspace_items[i].flags & NSPACE_ITEM_SNAPSHOT_EVENT) {
9318 if (arg) {
9319 vnode_lock_spin(vp);
9320 vp->v_flag |= VNEEDSSNAPSHOT;
9321 vnode_unlock(vp);
9322 }
9323 }
9324
9325 nspace_items[i].token = 0;
9326 nspace_items[i].refcount = 1;
39037602 9327
6d2010ae
A
9328 wakeup((caddr_t)&nspace_item_idx);
9329 }
9330
9331 //
9332 // Now go to sleep until the handler does a wakeup on this
9333 // slot in the nspace_items table (or we timeout).
9334 //
9335 keep_waiting = 1;
9336 while(keep_waiting) {
9337 ts.tv_sec = nspace_handler_timeout;
9338 ts.tv_nsec = 0;
9339 error = msleep((caddr_t)&(nspace_items[i].vp), &nspace_handler_lock, PVFS|PCATCH, "namespace-done", &ts);
9340
9341 if (nspace_items[i].flags & NSPACE_ITEM_DONE) {
9342 error = 0;
9343 } else if (nspace_items[i].flags & NSPACE_ITEM_CANCELLED) {
9344 error = nspace_items[i].token;
9345 } else if (error == EWOULDBLOCK || error == ETIMEDOUT) {
9346 if (nspace_items[i].flags & NSPACE_ITEM_RESET_TIMER) {
9347 nspace_items[i].flags &= ~NSPACE_ITEM_RESET_TIMER;
9348 continue;
9349 } else {
9350 error = ETIMEDOUT;
9351 }
9352 } else if (error == 0) {
9353 // hmmm, why did we get woken up?
9354 printf("woken up for token %d but it's not done, cancelled or timedout and error == 0.\n",
9355 nspace_items[i].token);
39037602 9356 }
6d2010ae
A
9357
9358 if (--nspace_items[i].refcount == 0) {
9359 nspace_items[i].vp = NULL; // clear this so that no one will match on it again
9360 nspace_items[i].arg = NULL;
9361 nspace_items[i].token = 0; // clear this so that the handler will not find it anymore
9362 nspace_items[i].flags = 0; // this clears it for re-use
9363 }
9364 wakeup(&nspace_token_id);
9365 keep_waiting = 0;
9366 }
9367
9368 lck_mtx_unlock(&nspace_handler_lock);
9369
9370 return error;
9371}
9372
39037602 9373int nspace_snapshot_event(vnode_t vp, time_t ctime, uint64_t op_type, void *arg)
6d2010ae 9374{
39037602 9375 int snapshot_error = 0;
6d2010ae 9376
39037602
A
9377 if (vp == NULL) {
9378 return 0;
9379 }
9380
9381 /* Swap files are special; skip them */
9382 if (vnode_isswap(vp)) {
9383 return 0;
9384 }
9385
9386 if (ctime != 0 && snapshot_timestamp != 0 && (ctime <= snapshot_timestamp || vnode_needssnapshots(vp))) {
9387 // the change time is within this epoch
9388 int error;
9389
9390 error = resolve_nspace_item_ext(vp, op_type | NAMESPACE_HANDLER_SNAPSHOT_EVENT, arg);
9391 if (error == EDEADLK) {
9392 snapshot_error = 0;
9393 } else if (error) {
9394 if (error == EAGAIN) {
9395 printf("nspace_snapshot_event: timed out waiting for namespace handler...\n");
9396 } else if (error == EINTR) {
9397 // printf("nspace_snapshot_event: got a signal while waiting for namespace handler...\n");
9398 snapshot_error = EINTR;
9399 }
9400 }
9401 }
9402
9403 return snapshot_error;
9404}
9405
9406int
9407get_nspace_item_status(struct vnode *vp, int32_t *status)
9408{
9409 int i;
9410
9411 lck_mtx_lock(&nspace_handler_lock);
9412 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9413 if (nspace_items[i].vp == vp) {
9414 break;
6d2010ae
A
9415 }
9416 }
9417
9418 if (i >= MAX_NSPACE_ITEMS) {
9419 lck_mtx_unlock(&nspace_handler_lock);
9420 return ENOENT;
9421 }
9422
9423 *status = nspace_items[i].flags;
9424 lck_mtx_unlock(&nspace_handler_lock);
9425 return 0;
9426}
39037602 9427
6d2010ae
A
9428
9429#if 0
9430static int
9431build_volfs_path(struct vnode *vp, char *path, int *len)
9432{
9433 struct vnode_attr va;
9434 int ret;
9435
9436 VATTR_INIT(&va);
9437 VATTR_WANTED(&va, va_fsid);
9438 VATTR_WANTED(&va, va_fileid);
9439
9440 if (vnode_getattr(vp, &va, vfs_context_kernel()) != 0) {
9441 *len = snprintf(path, *len, "/non/existent/path/because/vnode_getattr/failed") + 1;
9442 ret = -1;
9443 } else {
9444 *len = snprintf(path, *len, "/.vol/%d/%lld", (dev_t)va.va_fsid, va.va_fileid) + 1;
9445 ret = 0;
9446 }
9447
9448 return ret;
9449}
9450#endif
9451
9452//
9453// Note: this function does NOT check permissions on all of the
9454// parent directories leading to this vnode. It should only be
9455// called on behalf of a root process. Otherwise a process may
9456// get access to a file because the file itself is readable even
9457// though its parent directories would prevent access.
9458//
9459static int
9460vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx)
9461{
9462 int error, action;
9463
9464 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
9465 return error;
9466 }
9467
9468#if CONFIG_MACF
9469 error = mac_vnode_check_open(ctx, vp, fmode);
9470 if (error)
9471 return error;
9472#endif
1c79356b 9473
6d2010ae
A
9474 /* compute action to be authorized */
9475 action = 0;
9476 if (fmode & FREAD) {
9477 action |= KAUTH_VNODE_READ_DATA;
9478 }
9479 if (fmode & (FWRITE | O_TRUNC)) {
9480 /*
9481 * If we are writing, appending, and not truncating,
9482 * indicate that we are appending so that if the
9483 * UF_APPEND or SF_APPEND bits are set, we do not deny
9484 * the open.
9485 */
9486 if ((fmode & O_APPEND) && !(fmode & O_TRUNC)) {
9487 action |= KAUTH_VNODE_APPEND_DATA;
9488 } else {
9489 action |= KAUTH_VNODE_WRITE_DATA;
9490 }
9491 }
1c79356b 9492
6d2010ae
A
9493 if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)
9494 return error;
39037602 9495
1c79356b 9496
6d2010ae
A
9497 //
9498 // if the vnode is tagged VOPENEVT and the current process
9499 // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY
9500 // flag to the open mode so that this open won't count against
9501 // the vnode when carbon delete() does a vnode_isinuse() to see
9502 // if a file is currently in use. this allows spotlight
9503 // importers to not interfere with carbon apps that depend on
9504 // the no-delete-if-busy semantics of carbon delete().
9505 //
9506 if ((vp->v_flag & VOPENEVT) && (current_proc()->p_flag & P_CHECKOPENEVT)) {
9507 fmode |= O_EVTONLY;
9508 }
1c79356b 9509
6d2010ae
A
9510 if ( (error = VNOP_OPEN(vp, fmode, ctx)) ) {
9511 return error;
9512 }
9513 if ( (error = vnode_ref_ext(vp, fmode, 0)) ) {
9514 VNOP_CLOSE(vp, fmode, ctx);
9515 return error;
9516 }
1c79356b 9517
39037602 9518 /* Call out to allow 3rd party notification of open.
6d2010ae
A
9519 * Ignore result of kauth_authorize_fileop call.
9520 */
4b17d6b6
A
9521#if CONFIG_MACF
9522 mac_vnode_notify_open(ctx, vp, fmode);
9523#endif
39037602 9524 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN,
6d2010ae 9525 (uintptr_t)vp, 0);
1c79356b 9526
1c79356b 9527
6d2010ae
A
9528 return 0;
9529}
1c79356b 9530
6d2010ae 9531static int
39236c6e 9532wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type)
6d2010ae 9533{
39037602
A
9534 int i;
9535 int error = 0;
9536 int unblock = 0;
6d2010ae 9537 task_t curtask;
39037602 9538
6d2010ae
A
9539 lck_mtx_lock(&nspace_handler_exclusion_lock);
9540 if (nspace_handlers[nspace_type].handler_busy) {
9541 lck_mtx_unlock(&nspace_handler_exclusion_lock);
9542 return EBUSY;
9543 }
39037602 9544
6d2010ae
A
9545 nspace_handlers[nspace_type].handler_busy = 1;
9546 lck_mtx_unlock(&nspace_handler_exclusion_lock);
39037602
A
9547
9548 /*
6d2010ae
A
9549 * Any process that gets here will be one of the namespace handlers.
9550 * As such, they should be prevented from acquiring DMG vnodes during vnode reclamation
9551 * as we can cause deadlocks to occur, because the namespace handler may prevent
39037602 9552 * VNOP_INACTIVE from proceeding. Mark the current task as a P_DEPENDENCY_CAPABLE
6d2010ae
A
9553 * process.
9554 */
9555 curtask = current_task();
39037602
A
9556 bsd_set_dependency_capable (curtask);
9557
6d2010ae
A
9558 lck_mtx_lock(&nspace_handler_lock);
9559 if (nspace_handlers[nspace_type].handler_proc == NULL) {
9560 nspace_handlers[nspace_type].handler_tid = thread_tid(current_thread());
9561 nspace_handlers[nspace_type].handler_proc = current_proc();
9562 }
39037602
A
9563
9564 if (nspace_type == NSPACE_HANDLER_SNAPSHOT &&
9565 (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
9566 error = EINVAL;
9567 }
9568
6d2010ae 9569 while (error == 0) {
39037602
A
9570
9571 /* Try to find matching namespace item */
9572 for (i = 0; i < MAX_NSPACE_ITEMS; i++) {
6d2010ae 9573 if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
39037602
A
9574 if (nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
9575 break;
6d2010ae 9576 }
6d2010ae
A
9577 }
9578 }
39236c6e 9579
39037602
A
9580 if (i >= MAX_NSPACE_ITEMS) {
9581 /* Nothing is there yet. Wait for wake up and retry */
6d2010ae
A
9582 error = msleep((caddr_t)&nspace_item_idx, &nspace_handler_lock, PVFS|PCATCH, "namespace-items", 0);
9583 if ((nspace_type == NSPACE_HANDLER_SNAPSHOT) && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
39037602 9584 /* Prevent infinite loop if snapshot handler exited */
6d2010ae
A
9585 error = EINVAL;
9586 break;
9587 }
39037602 9588 continue;
6d2010ae 9589 }
39037602
A
9590
9591 nspace_items[i].flags &= ~NSPACE_ITEM_NEW;
9592 nspace_items[i].flags |= NSPACE_ITEM_PROCESSING;
9593 nspace_items[i].token = ++nspace_token_id;
9594
9595 assert(nspace_items[i].vp);
9596 struct fileproc *fp;
9597 int32_t indx;
9598 int32_t fmode;
9599 struct proc *p = current_proc();
9600 vfs_context_t ctx = vfs_context_current();
9601 struct vnode_attr va;
9602 bool vn_get_succsessful = false;
9603 bool vn_open_successful = false;
9604 bool fp_alloc_successful = false;
9605
9606 /*
9607 * Use vnode pointer to acquire a file descriptor for
9608 * hand-off to userland
9609 */
9610 fmode = nspace_open_flags_for_type(nspace_type);
9611 error = vnode_getwithvid(nspace_items[i].vp, nspace_items[i].vid);
9612 if (error) goto cleanup;
9613 vn_get_succsessful = true;
9614
9615 error = vn_open_with_vp(nspace_items[i].vp, fmode, ctx);
9616 if (error) goto cleanup;
9617 vn_open_successful = true;
9618
9619 error = falloc(p, &fp, &indx, ctx);
9620 if (error) goto cleanup;
9621 fp_alloc_successful = true;
9622
9623 fp->f_fglob->fg_flag = fmode;
9624 fp->f_fglob->fg_ops = &vnops;
9625 fp->f_fglob->fg_data = (caddr_t)nspace_items[i].vp;
9626
9627 proc_fdlock(p);
9628 procfdtbl_releasefd(p, indx, NULL);
9629 fp_drop(p, indx, fp, 1);
9630 proc_fdunlock(p);
9631
9632 /*
9633 * All variants of the namespace handler struct support these three fields:
9634 * token, flags, and the FD pointer
9635 */
9636 error = copyout(&nspace_items[i].token, nhd->token, sizeof(uint32_t));
9637 if (error) goto cleanup;
9638 error = copyout(&nspace_items[i].op, nhd->flags, sizeof(uint64_t));
9639 if (error) goto cleanup;
9640 error = copyout(&indx, nhd->fdptr, sizeof(uint32_t));
9641 if (error) goto cleanup;
9642
9643 /*
9644 * Handle optional fields:
9645 * extended version support an info ptr (offset, length), and the
9646 *
9647 * namedata version supports a unique per-link object ID
9648 *
9649 */
9650 if (nhd->infoptr) {
9651 uio_t uio = (uio_t)nspace_items[i].arg;
9652 uint64_t u_offset, u_length;
9653
9654 if (uio) {
9655 u_offset = uio_offset(uio);
9656 u_length = uio_resid(uio);
9657 } else {
9658 u_offset = 0;
9659 u_length = 0;
9660 }
9661 error = copyout(&u_offset, nhd->infoptr, sizeof(uint64_t));
9662 if (error) goto cleanup;
9663 error = copyout(&u_length, nhd->infoptr + sizeof(uint64_t), sizeof(uint64_t));
9664 if (error) goto cleanup;
9665 }
9666
9667 if (nhd->objid) {
9668 VATTR_INIT(&va);
9669 VATTR_WANTED(&va, va_linkid);
9670 error = vnode_getattr(nspace_items[i].vp, &va, ctx);
9671 if (error) goto cleanup;
9672
9673 uint64_t linkid = 0;
9674 if (VATTR_IS_SUPPORTED (&va, va_linkid)) {
9675 linkid = (uint64_t)va.va_linkid;
9676 }
9677 error = copyout(&linkid, nhd->objid, sizeof(uint64_t));
9678 }
9679cleanup:
9680 if (error) {
9681 if (fp_alloc_successful) fp_free(p, indx, fp);
9682 if (vn_open_successful) vn_close(nspace_items[i].vp, fmode, ctx);
9683 unblock = 1;
9684 }
9685
9686 if (vn_get_succsessful) vnode_put(nspace_items[i].vp);
9687
9688 break;
6d2010ae 9689 }
39037602 9690
6d2010ae
A
9691 if (unblock) {
9692 if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
9693 vnode_lock_spin(nspace_items[i].vp);
9694 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
9695 vnode_unlock(nspace_items[i].vp);
9696 }
9697 nspace_items[i].vp = NULL;
9698 nspace_items[i].vid = 0;
9699 nspace_items[i].flags = NSPACE_ITEM_DONE;
9700 nspace_items[i].token = 0;
39037602 9701
6d2010ae
A
9702 wakeup((caddr_t)&(nspace_items[i].vp));
9703 }
39037602 9704
6d2010ae
A
9705 if (nspace_type == NSPACE_HANDLER_SNAPSHOT) {
9706 // just go through every snapshot event and unblock it immediately.
9707 if (error && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
39037602 9708 for(i = 0; i < MAX_NSPACE_ITEMS; i++) {
6d2010ae
A
9709 if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
9710 if (nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
9711 nspace_items[i].vp = NULL;
9712 nspace_items[i].vid = 0;
9713 nspace_items[i].flags = NSPACE_ITEM_DONE;
9714 nspace_items[i].token = 0;
39037602
A
9715
9716 wakeup((caddr_t)&(nspace_items[i].vp));
6d2010ae
A
9717 }
9718 }
9719 }
9720 }
9721 }
39037602 9722
6d2010ae 9723 lck_mtx_unlock(&nspace_handler_lock);
39037602 9724
6d2010ae
A
9725 lck_mtx_lock(&nspace_handler_exclusion_lock);
9726 nspace_handlers[nspace_type].handler_busy = 0;
9727 lck_mtx_unlock(&nspace_handler_exclusion_lock);
39037602 9728
6d2010ae
A
9729 return error;
9730}
1c79356b 9731
39236c6e
A
9732static inline int validate_namespace_args (int is64bit, int size) {
9733
9734 if (is64bit) {
9735 /* Must be one of these */
9736 if (size == sizeof(user64_namespace_handler_info)) {
9737 goto sizeok;
9738 }
9739 if (size == sizeof(user64_namespace_handler_info_ext)) {
9740 goto sizeok;
9741 }
9742 if (size == sizeof(user64_namespace_handler_data)) {
9743 goto sizeok;
9744 }
9745 return EINVAL;
9746 }
9747 else {
9748 /* 32 bit -- must be one of these */
9749 if (size == sizeof(user32_namespace_handler_info)) {
9750 goto sizeok;
9751 }
9752 if (size == sizeof(user32_namespace_handler_info_ext)) {
9753 goto sizeok;
9754 }
9755 if (size == sizeof(user32_namespace_handler_data)) {
9756 goto sizeok;
9757 }
9758 return EINVAL;
9759 }
9760
9761sizeok:
9762
9763 return 0;
9764
9765}
1c79356b 9766
6d2010ae
A
9767static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data)
9768{
9769 int error = 0;
39236c6e 9770 namespace_handler_data nhd;
39037602 9771
39236c6e
A
9772 bzero (&nhd, sizeof(namespace_handler_data));
9773
6d2010ae
A
9774 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
9775 return error;
9776 }
39037602 9777
39236c6e
A
9778 error = validate_namespace_args (is64bit, size);
9779 if (error) {
9780 return error;
6d2010ae 9781 }
39037602 9782
39236c6e
A
9783 /* Copy in the userland pointers into our kernel-only struct */
9784
6d2010ae 9785 if (is64bit) {
39236c6e
A
9786 /* 64 bit userland structures */
9787 nhd.token = (user_addr_t)((user64_namespace_handler_info *)data)->token;
9788 nhd.flags = (user_addr_t)((user64_namespace_handler_info *)data)->flags;
9789 nhd.fdptr = (user_addr_t)((user64_namespace_handler_info *)data)->fdptr;
9790
9791 /* If the size is greater than the standard info struct, add in extra fields */
9792 if (size > (sizeof(user64_namespace_handler_info))) {
9793 if (size >= (sizeof(user64_namespace_handler_info_ext))) {
9794 nhd.infoptr = (user_addr_t)((user64_namespace_handler_info_ext *)data)->infoptr;
9795 }
9796 if (size == (sizeof(user64_namespace_handler_data))) {
9797 nhd.objid = (user_addr_t)((user64_namespace_handler_data*)data)->objid;
9798 }
9799 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
6d2010ae 9800 }
39037602 9801 }
39236c6e
A
9802 else {
9803 /* 32 bit userland structures */
9804 nhd.token = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->token);
9805 nhd.flags = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->flags);
9806 nhd.fdptr = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->fdptr);
39037602 9807
39236c6e
A
9808 if (size > (sizeof(user32_namespace_handler_info))) {
9809 if (size >= (sizeof(user32_namespace_handler_info_ext))) {
9810 nhd.infoptr = CAST_USER_ADDR_T(((user32_namespace_handler_info_ext *)data)->infoptr);
9811 }
9812 if (size == (sizeof(user32_namespace_handler_data))) {
9813 nhd.objid = (user_addr_t)((user32_namespace_handler_data*)data)->objid;
9814 }
9815 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
6d2010ae
A
9816 }
9817 }
39037602 9818
39236c6e 9819 return wait_for_namespace_event(&nhd, nspace_type);
6d2010ae 9820}
1c79356b
A
9821
9822/*
9823 * Make a filesystem-specific control call:
9824 */
1c79356b 9825/* ARGSUSED */
b0d623f7
A
9826static int
9827fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long options, vfs_context_t ctx)
1c79356b 9828{
b0d623f7 9829 int error=0;
91447636 9830 boolean_t is64bit;
2d21ac55 9831 u_int size;
1c79356b 9832#define STK_PARAMS 128
39037602 9833 char stkbuf[STK_PARAMS] = {0};
1c79356b 9834 caddr_t data, memp;
b0d623f7 9835 vnode_t vp = *arg_vp;
1c79356b
A
9836
9837 size = IOCPARM_LEN(cmd);
9838 if (size > IOCPARM_MAX) return (EINVAL);
9839
6d2010ae 9840 is64bit = proc_is64bit(p);
91447636 9841
1c79356b 9842 memp = NULL;
04b8595b 9843
3e170ce0 9844
04b8595b
A
9845 /*
9846 * ensure the buffer is large enough for underlying calls
9847 */
9848#ifndef HFSIOC_GETPATH
3e170ce0 9849 typedef char pn_t[MAXPATHLEN];
04b8595b
A
9850#define HFSIOC_GETPATH _IOWR('h', 13, pn_t)
9851#endif
9852
9853#ifndef HFS_GETPATH
9854#define HFS_GETPATH IOCBASECMD(HFSIOC_GETPATH)
9855#endif
9856 if (IOCBASECMD(cmd) == HFS_GETPATH) {
9857 /* Round up to MAXPATHLEN regardless of user input */
9858 size = MAXPATHLEN;
9859 }
9860
1c79356b
A
9861 if (size > sizeof (stkbuf)) {
9862 if ((memp = (caddr_t)kalloc(size)) == 0) return ENOMEM;
9863 data = memp;
9864 } else {
91447636 9865 data = &stkbuf[0];
1c79356b 9866 };
39037602 9867
1c79356b
A
9868 if (cmd & IOC_IN) {
9869 if (size) {
b0d623f7 9870 error = copyin(udata, data, size);
39037602 9871 if (error) {
fe8ab488 9872 if (memp) {
39037602 9873 kfree (memp, size);
fe8ab488
A
9874 }
9875 return error;
9876 }
1c79356b 9877 } else {
6d2010ae
A
9878 if (is64bit) {
9879 *(user_addr_t *)data = udata;
9880 }
9881 else {
9882 *(uint32_t *)data = (uint32_t)udata;
9883 }
1c79356b
A
9884 };
9885 } else if ((cmd & IOC_OUT) && size) {
9886 /*
9887 * Zero the buffer so the user always
9888 * gets back something deterministic.
9889 */
9890 bzero(data, size);
91447636 9891 } else if (cmd & IOC_VOID) {
b0d623f7 9892 if (is64bit) {
6d2010ae 9893 *(user_addr_t *)data = udata;
b0d623f7
A
9894 }
9895 else {
6d2010ae 9896 *(uint32_t *)data = (uint32_t)udata;
b0d623f7 9897 }
91447636 9898 }
1c79356b 9899
b0d623f7 9900 /* Check to see if it's a generic command */
fe8ab488 9901 switch (IOCBASECMD(cmd)) {
91447636 9902
fe8ab488
A
9903 case FSCTL_SYNC_VOLUME: {
9904 mount_t mp = vp->v_mount;
9905 int arg = *(uint32_t*)data;
b0d623f7 9906
fe8ab488
A
9907 /* record vid of vp so we can drop it below. */
9908 uint32_t vvid = vp->v_id;
b0d623f7 9909
fe8ab488
A
9910 /*
9911 * Then grab mount_iterref so that we can release the vnode.
9912 * Without this, a thread may call vnode_iterate_prepare then
9913 * get into a deadlock because we've never released the root vp
9914 */
9915 error = mount_iterref (mp, 0);
9916 if (error) {
9917 break;
9918 }
9919 vnode_put(vp);
9920
9921 /* issue the sync for this volume */
9922 (void)sync_callback(mp, (arg & FSCTL_SYNC_WAIT) ? &arg : NULL);
9923
39037602 9924 /*
fe8ab488
A
9925 * Then release the mount_iterref once we're done syncing; it's not
9926 * needed for the VNOP_IOCTL below
9927 */
9928 mount_iterdrop(mp);
9929
9930 if (arg & FSCTL_SYNC_FULLSYNC) {
9931 /* re-obtain vnode iocount on the root vp, if possible */
9932 error = vnode_getwithvid (vp, vvid);
9933 if (error == 0) {
9934 error = VNOP_IOCTL(vp, F_FULLFSYNC, (caddr_t)NULL, 0, ctx);
9935 vnode_put (vp);
9936 }
b0d623f7 9937 }
fe8ab488
A
9938 /* mark the argument VP as having been released */
9939 *arg_vp = NULL;
b0d623f7 9940 }
fe8ab488 9941 break;
b0d623f7 9942
490019cf
A
9943 case FSCTL_ROUTEFS_SETROUTEID: {
9944#if ROUTEFS
9945 char routepath[MAXPATHLEN];
9946 size_t len = 0;
39037602 9947
490019cf
A
9948 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
9949 break;
9950 }
9951 bzero(routepath, MAXPATHLEN);
9952 error = copyinstr(udata, &routepath[0], MAXPATHLEN, &len);
9953 if (error) {
9954 break;
9955 }
9956 error = routefs_kernel_mount(routepath);
9957 if (error) {
9958 break;
9959 }
9960#endif
9961 }
9962 break;
9963
fe8ab488
A
9964 case FSCTL_SET_PACKAGE_EXTS: {
9965 user_addr_t ext_strings;
9966 uint32_t num_entries;
9967 uint32_t max_width;
b0d623f7 9968
39037602
A
9969 if ((error = priv_check_cred(kauth_cred_get(), PRIV_PACKAGE_EXTENSIONS, 0)))
9970 break;
9971
fe8ab488
A
9972 if ( (is64bit && size != sizeof(user64_package_ext_info))
9973 || (is64bit == 0 && size != sizeof(user32_package_ext_info))) {
9974
9975 // either you're 64-bit and passed a 64-bit struct or
9976 // you're 32-bit and passed a 32-bit struct. otherwise
9977 // it's not ok.
9978 error = EINVAL;
9979 break;
9980 }
9981
9982 if (is64bit) {
9983 ext_strings = ((user64_package_ext_info *)data)->strings;
9984 num_entries = ((user64_package_ext_info *)data)->num_entries;
9985 max_width = ((user64_package_ext_info *)data)->max_width;
9986 } else {
9987 ext_strings = CAST_USER_ADDR_T(((user32_package_ext_info *)data)->strings);
9988 num_entries = ((user32_package_ext_info *)data)->num_entries;
9989 max_width = ((user32_package_ext_info *)data)->max_width;
9990 }
9991 error = set_package_extensions_table(ext_strings, num_entries, max_width);
6d2010ae 9992 }
fe8ab488 9993 break;
2d21ac55 9994
39037602 9995 /* namespace handlers */
fe8ab488
A
9996 case FSCTL_NAMESPACE_HANDLER_GET: {
9997 error = process_namespace_fsctl(NSPACE_HANDLER_NSPACE, is64bit, size, data);
6d2010ae 9998 }
fe8ab488 9999 break;
b0d623f7 10000
fe8ab488
A
10001 /* Snapshot handlers */
10002 case FSCTL_OLD_SNAPSHOT_HANDLER_GET: {
10003 error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
39037602 10004 }
fe8ab488 10005 break;
39236c6e 10006
fe8ab488
A
10007 case FSCTL_SNAPSHOT_HANDLER_GET_EXT: {
10008 error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
10009 }
39037602 10010 break;
39236c6e 10011
fe8ab488
A
10012 case FSCTL_NAMESPACE_HANDLER_UPDATE: {
10013 uint32_t token, val;
10014 int i;
39236c6e 10015
fe8ab488
A
10016 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
10017 break;
10018 }
39236c6e 10019
fe8ab488
A
10020 if (!nspace_is_special_process(p)) {
10021 error = EINVAL;
10022 break;
10023 }
6d2010ae 10024
fe8ab488
A
10025 token = ((uint32_t *)data)[0];
10026 val = ((uint32_t *)data)[1];
6d2010ae 10027
fe8ab488 10028 lck_mtx_lock(&nspace_handler_lock);
6d2010ae 10029
fe8ab488
A
10030 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
10031 if (nspace_items[i].token == token) {
10032 break; /* exit for loop, not case stmt */
10033 }
10034 }
6d2010ae 10035
fe8ab488
A
10036 if (i >= MAX_NSPACE_ITEMS) {
10037 error = ENOENT;
10038 } else {
10039 //
10040 // if this bit is set, when resolve_nspace_item() times out
10041 // it will loop and go back to sleep.
10042 //
10043 nspace_items[i].flags |= NSPACE_ITEM_RESET_TIMER;
10044 }
6d2010ae 10045
fe8ab488
A
10046 lck_mtx_unlock(&nspace_handler_lock);
10047
10048 if (error) {
10049 printf("nspace-handler-update: did not find token %u\n", token);
10050 }
39037602 10051 }
fe8ab488 10052 break;
39037602
A
10053
10054 case FSCTL_NAMESPACE_HANDLER_UNBLOCK: {
fe8ab488
A
10055 uint32_t token, val;
10056 int i;
10057
10058 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
6d2010ae
A
10059 break;
10060 }
6d2010ae 10061
fe8ab488
A
10062 if (!nspace_is_special_process(p)) {
10063 error = EINVAL;
10064 break;
10065 }
6d2010ae 10066
fe8ab488
A
10067 token = ((uint32_t *)data)[0];
10068 val = ((uint32_t *)data)[1];
6d2010ae 10069
fe8ab488 10070 lck_mtx_lock(&nspace_handler_lock);
6d2010ae 10071
fe8ab488
A
10072 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
10073 if (nspace_items[i].token == token) {
10074 break; /* exit for loop, not case statement */
10075 }
10076 }
6d2010ae 10077
fe8ab488
A
10078 if (i >= MAX_NSPACE_ITEMS) {
10079 printf("nspace-handler-unblock: did not find token %u\n", token);
10080 error = ENOENT;
10081 } else {
10082 if (val == 0 && nspace_items[i].vp) {
10083 vnode_lock_spin(nspace_items[i].vp);
10084 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
10085 vnode_unlock(nspace_items[i].vp);
10086 }
6d2010ae 10087
fe8ab488
A
10088 nspace_items[i].vp = NULL;
10089 nspace_items[i].arg = NULL;
10090 nspace_items[i].op = 0;
10091 nspace_items[i].vid = 0;
10092 nspace_items[i].flags = NSPACE_ITEM_DONE;
10093 nspace_items[i].token = 0;
6d2010ae 10094
fe8ab488
A
10095 wakeup((caddr_t)&(nspace_items[i].vp));
10096 }
10097
10098 lck_mtx_unlock(&nspace_handler_lock);
39037602 10099 }
fe8ab488 10100 break;
6d2010ae 10101
fe8ab488
A
10102 case FSCTL_NAMESPACE_HANDLER_CANCEL: {
10103 uint32_t token, val;
10104 int i;
6d2010ae 10105
fe8ab488 10106 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
6d2010ae
A
10107 break;
10108 }
6d2010ae 10109
fe8ab488
A
10110 if (!nspace_is_special_process(p)) {
10111 error = EINVAL;
10112 break;
6d2010ae
A
10113 }
10114
fe8ab488
A
10115 token = ((uint32_t *)data)[0];
10116 val = ((uint32_t *)data)[1];
6d2010ae 10117
fe8ab488 10118 lck_mtx_lock(&nspace_handler_lock);
6d2010ae 10119
fe8ab488
A
10120 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
10121 if (nspace_items[i].token == token) {
10122 break; /* exit for loop, not case stmt */
10123 }
10124 }
6d2010ae 10125
fe8ab488
A
10126 if (i >= MAX_NSPACE_ITEMS) {
10127 printf("nspace-handler-cancel: did not find token %u\n", token);
10128 error = ENOENT;
10129 } else {
10130 if (nspace_items[i].vp) {
10131 vnode_lock_spin(nspace_items[i].vp);
10132 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
10133 vnode_unlock(nspace_items[i].vp);
10134 }
6d2010ae 10135
39037602
A
10136 nspace_items[i].vp = NULL;
10137 nspace_items[i].arg = NULL;
fe8ab488
A
10138 nspace_items[i].vid = 0;
10139 nspace_items[i].token = val;
10140 nspace_items[i].flags &= ~NSPACE_ITEM_PROCESSING;
39037602 10141 nspace_items[i].flags |= NSPACE_ITEM_CANCELLED;
6d2010ae 10142
fe8ab488
A
10143 wakeup((caddr_t)&(nspace_items[i].vp));
10144 }
6d2010ae 10145
fe8ab488 10146 lck_mtx_unlock(&nspace_handler_lock);
39037602 10147 }
fe8ab488 10148 break;
6d2010ae 10149
fe8ab488
A
10150 case FSCTL_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME: {
10151 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
6d2010ae
A
10152 break;
10153 }
6d2010ae 10154
fe8ab488 10155 // we explicitly do not do the namespace_handler_proc check here
6d2010ae 10156
fe8ab488
A
10157 lck_mtx_lock(&nspace_handler_lock);
10158 snapshot_timestamp = ((uint32_t *)data)[0];
10159 wakeup(&nspace_item_idx);
10160 lck_mtx_unlock(&nspace_handler_lock);
10161 printf("nspace-handler-set-snapshot-time: %d\n", (int)snapshot_timestamp);
6d2010ae 10162
39037602 10163 }
fe8ab488 10164 break;
6d2010ae 10165
fe8ab488
A
10166 case FSCTL_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS:
10167 {
10168 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
10169 break;
10170 }
6d2010ae 10171
fe8ab488
A
10172 lck_mtx_lock(&nspace_handler_lock);
10173 nspace_allow_virtual_devs = ((uint32_t *)data)[0];
10174 lck_mtx_unlock(&nspace_handler_lock);
10175 printf("nspace-snapshot-handler will%s allow events on disk-images\n",
10176 nspace_allow_virtual_devs ? "" : " NOT");
10177 error = 0;
6d2010ae 10178
6d2010ae 10179 }
fe8ab488 10180 break;
6d2010ae 10181
39037602
A
10182 case FSCTL_SET_FSTYPENAME_OVERRIDE:
10183 {
fe8ab488
A
10184 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
10185 break;
10186 }
10187 if (vp->v_mount) {
10188 mount_lock(vp->v_mount);
10189 if (data[0] != 0) {
10190 strlcpy(&vp->v_mount->fstypename_override[0], data, MFSTYPENAMELEN);
10191 vp->v_mount->mnt_kern_flag |= MNTK_TYPENAME_OVERRIDE;
10192 if (vfs_isrdonly(vp->v_mount) && strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
10193 vp->v_mount->mnt_kern_flag |= MNTK_EXTENDED_SECURITY;
10194 vp->v_mount->mnt_kern_flag &= ~MNTK_AUTH_OPAQUE;
10195 }
10196 } else {
10197 if (strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
10198 vp->v_mount->mnt_kern_flag &= ~MNTK_EXTENDED_SECURITY;
10199 }
10200 vp->v_mount->mnt_kern_flag &= ~MNTK_TYPENAME_OVERRIDE;
10201 vp->v_mount->fstypename_override[0] = '\0';
6d2010ae 10202 }
fe8ab488 10203 mount_unlock(vp->v_mount);
6d2010ae 10204 }
6d2010ae 10205 }
fe8ab488 10206 break;
39037602 10207
fe8ab488
A
10208 default: {
10209 /* Invoke the filesystem-specific code */
10210 error = VNOP_IOCTL(vp, IOCBASECMD(cmd), data, options, ctx);
10211 }
10212
10213 } /* end switch stmt */
10214
1c79356b 10215 /*
fe8ab488 10216 * if no errors, copy any data to user. Size was
1c79356b
A
10217 * already set and checked above.
10218 */
39037602 10219 if (error == 0 && (cmd & IOC_OUT) && size)
b0d623f7 10220 error = copyout(data, udata, size);
39037602 10221
fe8ab488
A
10222 if (memp) {
10223 kfree(memp, size);
10224 }
39037602 10225
1c79356b
A
10226 return error;
10227}
b0d623f7
A
10228
10229/* ARGSUSED */
10230int
10231fsctl (proc_t p, struct fsctl_args *uap, __unused int32_t *retval)
10232{
10233 int error;
39037602 10234 struct nameidata nd;
b0d623f7
A
10235 u_long nameiflags;
10236 vnode_t vp = NULL;
10237 vfs_context_t ctx = vfs_context_current();
10238
10239 AUDIT_ARG(cmd, uap->cmd);
10240 AUDIT_ARG(value32, uap->options);
10241 /* Get the vnode for the file we are getting info on: */
10242 nameiflags = 0;
10243 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
6d2010ae
A
10244 NDINIT(&nd, LOOKUP, OP_FSCTL, nameiflags | AUDITVNPATH1,
10245 UIO_USERSPACE, uap->path, ctx);
b0d623f7
A
10246 if ((error = namei(&nd))) goto done;
10247 vp = nd.ni_vp;
10248 nameidone(&nd);
10249
10250#if CONFIG_MACF
10251 error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd);
10252 if (error) {
10253 goto done;
10254 }
10255#endif
10256
10257 error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
10258
10259done:
10260 if (vp)
10261 vnode_put(vp);
10262 return error;
10263}
10264/* ARGSUSED */
10265int
10266ffsctl (proc_t p, struct ffsctl_args *uap, __unused int32_t *retval)
10267{
10268 int error;
10269 vnode_t vp = NULL;
10270 vfs_context_t ctx = vfs_context_current();
10271 int fd = -1;
10272
10273 AUDIT_ARG(fd, uap->fd);
10274 AUDIT_ARG(cmd, uap->cmd);
10275 AUDIT_ARG(value32, uap->options);
39037602 10276
b0d623f7
A
10277 /* Get the vnode for the file we are getting info on: */
10278 if ((error = file_vnode(uap->fd, &vp)))
3e170ce0 10279 return error;
b0d623f7
A
10280 fd = uap->fd;
10281 if ((error = vnode_getwithref(vp))) {
3e170ce0
A
10282 file_drop(fd);
10283 return error;
b0d623f7
A
10284 }
10285
10286#if CONFIG_MACF
3e170ce0
A
10287 if ((error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd))) {
10288 file_drop(fd);
10289 vnode_put(vp);
10290 return error;
b0d623f7
A
10291 }
10292#endif
10293
10294 error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
10295
3e170ce0 10296 file_drop(fd);
b0d623f7 10297
3e170ce0
A
10298 /*validate vp; fsctl_internal() can drop iocount and reset vp to NULL*/
10299 if (vp) {
b0d623f7 10300 vnode_put(vp);
3e170ce0
A
10301 }
10302
b0d623f7
A
10303 return error;
10304}
1c79356b 10305/* end of fsctl system call */
0b4e3aa0 10306
91447636
A
10307/*
10308 * Retrieve the data of an extended attribute.
10309 */
10310int
2d21ac55 10311getxattr(proc_t p, struct getxattr_args *uap, user_ssize_t *retval)
91447636 10312{
2d21ac55 10313 vnode_t vp;
91447636
A
10314 struct nameidata nd;
10315 char attrname[XATTR_MAXNAMELEN+1];
2d21ac55 10316 vfs_context_t ctx = vfs_context_current();
91447636
A
10317 uio_t auio = NULL;
10318 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10319 size_t attrsize = 0;
10320 size_t namelen;
b0d623f7 10321 u_int32_t nameiflags;
91447636
A
10322 int error;
10323 char uio_buf[ UIO_SIZEOF(1) ];
55e303ae 10324
2d21ac55 10325 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636 10326 return (EINVAL);
55e303ae 10327
91447636 10328 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
6d2010ae 10329 NDINIT(&nd, LOOKUP, OP_GETXATTR, nameiflags, spacetype, uap->path, ctx);
91447636
A
10330 if ((error = namei(&nd))) {
10331 return (error);
10332 }
10333 vp = nd.ni_vp;
10334 nameidone(&nd);
55e303ae 10335
91447636
A
10336 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
10337 goto out;
10338 }
10339 if (xattr_protected(attrname)) {
6d2010ae
A
10340 if (!vfs_context_issuser(ctx) || strcmp(attrname, "com.apple.system.Security") != 0) {
10341 error = EPERM;
10342 goto out;
10343 }
91447636 10344 }
b0d623f7
A
10345 /*
10346 * the specific check for 0xffffffff is a hack to preserve
10347 * binaray compatibilty in K64 with applications that discovered
39037602 10348 * that passing in a buf pointer and a size of -1 resulted in
b0d623f7
A
10349 * just the size of the indicated extended attribute being returned.
10350 * this isn't part of the documented behavior, but because of the
10351 * original implemtation's check for "uap->size > 0", this behavior
10352 * was allowed. In K32 that check turned into a signed comparison
10353 * even though uap->size is unsigned... in K64, we blow by that
10354 * check because uap->size is unsigned and doesn't get sign smeared
39037602 10355 * in the munger for a 32 bit user app. we also need to add a
b0d623f7
A
10356 * check to limit the maximum size of the buffer being passed in...
10357 * unfortunately, the underlying fileystems seem to just malloc
10358 * the requested size even if the actual extended attribute is tiny.
10359 * because that malloc is for kernel wired memory, we have to put a
10360 * sane limit on it.
10361 *
10362 * U32 running on K64 will yield 0x00000000ffffffff for uap->size
10363 * U64 running on K64 will yield -1 (64 bits wide)
10364 * U32/U64 running on K32 will yield -1 (32 bits wide)
10365 */
10366 if (uap->size == 0xffffffff || uap->size == (size_t)-1)
10367 goto no_uio;
10368
b0d623f7 10369 if (uap->value) {
6d2010ae
A
10370 if (uap->size > (size_t)XATTR_MAXSIZE)
10371 uap->size = XATTR_MAXSIZE;
39037602 10372
91447636
A
10373 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
10374 &uio_buf[0], sizeof(uio_buf));
10375 uio_addiov(auio, uap->value, uap->size);
10376 }
b0d623f7 10377no_uio:
2d21ac55 10378 error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, ctx);
91447636
A
10379out:
10380 vnode_put(vp);
55e303ae 10381
91447636
A
10382 if (auio) {
10383 *retval = uap->size - uio_resid(auio);
10384 } else {
10385 *retval = (user_ssize_t)attrsize;
55e303ae
A
10386 }
10387
91447636
A
10388 return (error);
10389}
55e303ae 10390
91447636
A
10391/*
10392 * Retrieve the data of an extended attribute.
10393 */
10394int
2d21ac55 10395fgetxattr(proc_t p, struct fgetxattr_args *uap, user_ssize_t *retval)
91447636 10396{
2d21ac55 10397 vnode_t vp;
91447636 10398 char attrname[XATTR_MAXNAMELEN+1];
91447636
A
10399 uio_t auio = NULL;
10400 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10401 size_t attrsize = 0;
10402 size_t namelen;
10403 int error;
10404 char uio_buf[ UIO_SIZEOF(1) ];
55e303ae 10405
2d21ac55 10406 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636 10407 return (EINVAL);
55e303ae 10408
91447636
A
10409 if ( (error = file_vnode(uap->fd, &vp)) ) {
10410 return (error);
10411 }
10412 if ( (error = vnode_getwithref(vp)) ) {
10413 file_drop(uap->fd);
10414 return(error);
10415 }
10416 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
10417 goto out;
10418 }
10419 if (xattr_protected(attrname)) {
10420 error = EPERM;
10421 goto out;
10422 }
10423 if (uap->value && uap->size > 0) {
10424 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
10425 &uio_buf[0], sizeof(uio_buf));
10426 uio_addiov(auio, uap->value, uap->size);
10427 }
55e303ae 10428
2d21ac55 10429 error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, vfs_context_current());
91447636
A
10430out:
10431 (void)vnode_put(vp);
10432 file_drop(uap->fd);
55e303ae 10433
91447636
A
10434 if (auio) {
10435 *retval = uap->size - uio_resid(auio);
10436 } else {
10437 *retval = (user_ssize_t)attrsize;
10438 }
10439 return (error);
10440}
55e303ae 10441
91447636
A
10442/*
10443 * Set the data of an extended attribute.
10444 */
55e303ae 10445int
2d21ac55 10446setxattr(proc_t p, struct setxattr_args *uap, int *retval)
55e303ae 10447{
2d21ac55 10448 vnode_t vp;
91447636
A
10449 struct nameidata nd;
10450 char attrname[XATTR_MAXNAMELEN+1];
2d21ac55 10451 vfs_context_t ctx = vfs_context_current();
91447636
A
10452 uio_t auio = NULL;
10453 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10454 size_t namelen;
b0d623f7 10455 u_int32_t nameiflags;
91447636
A
10456 int error;
10457 char uio_buf[ UIO_SIZEOF(1) ];
55e303ae 10458
2d21ac55 10459 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636 10460 return (EINVAL);
55e303ae 10461
91447636 10462 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
6d2010ae
A
10463 if (error == EPERM) {
10464 /* if the string won't fit in attrname, copyinstr emits EPERM */
10465 return (ENAMETOOLONG);
10466 }
10467 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
10468 return error;
91447636
A
10469 }
10470 if (xattr_protected(attrname))
10471 return(EPERM);
2d21ac55 10472 if (uap->size != 0 && uap->value == 0) {
91447636 10473 return (EINVAL);
55e303ae 10474 }
55e303ae 10475
91447636 10476 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
6d2010ae 10477 NDINIT(&nd, LOOKUP, OP_SETXATTR, nameiflags, spacetype, uap->path, ctx);
91447636
A
10478 if ((error = namei(&nd))) {
10479 return (error);
10480 }
10481 vp = nd.ni_vp;
10482 nameidone(&nd);
55e303ae 10483
91447636
A
10484 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
10485 &uio_buf[0], sizeof(uio_buf));
10486 uio_addiov(auio, uap->value, uap->size);
55e303ae 10487
2d21ac55
A
10488 error = vn_setxattr(vp, attrname, auio, uap->options, ctx);
10489#if CONFIG_FSE
10490 if (error == 0) {
10491 add_fsevent(FSE_XATTR_MODIFIED, ctx,
10492 FSE_ARG_VNODE, vp,
10493 FSE_ARG_DONE);
10494 }
10495#endif
91447636
A
10496 vnode_put(vp);
10497 *retval = 0;
10498 return (error);
10499}
55e303ae 10500
91447636
A
10501/*
10502 * Set the data of an extended attribute.
10503 */
10504int
2d21ac55 10505fsetxattr(proc_t p, struct fsetxattr_args *uap, int *retval)
91447636 10506{
2d21ac55 10507 vnode_t vp;
91447636 10508 char attrname[XATTR_MAXNAMELEN+1];
91447636
A
10509 uio_t auio = NULL;
10510 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10511 size_t namelen;
10512 int error;
10513 char uio_buf[ UIO_SIZEOF(1) ];
6d2010ae 10514#if CONFIG_FSE
2d21ac55 10515 vfs_context_t ctx = vfs_context_current();
6d2010ae 10516#endif
55e303ae 10517
2d21ac55 10518 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636 10519 return (EINVAL);
55e303ae 10520
91447636 10521 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
3e170ce0
A
10522 if (error == EPERM) {
10523 /* if the string won't fit in attrname, copyinstr emits EPERM */
10524 return (ENAMETOOLONG);
10525 }
10526 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
10527 return error;
55e303ae 10528 }
91447636
A
10529 if (xattr_protected(attrname))
10530 return(EPERM);
2d21ac55 10531 if (uap->size != 0 && uap->value == 0) {
91447636 10532 return (EINVAL);
55e303ae 10533 }
91447636
A
10534 if ( (error = file_vnode(uap->fd, &vp)) ) {
10535 return (error);
55e303ae 10536 }
91447636
A
10537 if ( (error = vnode_getwithref(vp)) ) {
10538 file_drop(uap->fd);
10539 return(error);
10540 }
10541 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
10542 &uio_buf[0], sizeof(uio_buf));
10543 uio_addiov(auio, uap->value, uap->size);
91447636 10544
2d21ac55
A
10545 error = vn_setxattr(vp, attrname, auio, uap->options, vfs_context_current());
10546#if CONFIG_FSE
10547 if (error == 0) {
10548 add_fsevent(FSE_XATTR_MODIFIED, ctx,
10549 FSE_ARG_VNODE, vp,
10550 FSE_ARG_DONE);
10551 }
10552#endif
91447636
A
10553 vnode_put(vp);
10554 file_drop(uap->fd);
10555 *retval = 0;
10556 return (error);
10557}
55e303ae 10558
91447636
A
10559/*
10560 * Remove an extended attribute.
b0d623f7 10561 * XXX Code duplication here.
91447636 10562 */
91447636 10563int
2d21ac55 10564removexattr(proc_t p, struct removexattr_args *uap, int *retval)
91447636 10565{
2d21ac55 10566 vnode_t vp;
91447636
A
10567 struct nameidata nd;
10568 char attrname[XATTR_MAXNAMELEN+1];
10569 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
2d21ac55 10570 vfs_context_t ctx = vfs_context_current();
91447636 10571 size_t namelen;
b0d623f7 10572 u_int32_t nameiflags;
91447636 10573 int error;
55e303ae 10574
2d21ac55 10575 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636 10576 return (EINVAL);
55e303ae 10577
91447636
A
10578 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
10579 if (error != 0) {
10580 return (error);
10581 }
10582 if (xattr_protected(attrname))
10583 return(EPERM);
10584 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
6d2010ae 10585 NDINIT(&nd, LOOKUP, OP_REMOVEXATTR, nameiflags, spacetype, uap->path, ctx);
91447636
A
10586 if ((error = namei(&nd))) {
10587 return (error);
10588 }
10589 vp = nd.ni_vp;
10590 nameidone(&nd);
55e303ae 10591
2d21ac55
A
10592 error = vn_removexattr(vp, attrname, uap->options, ctx);
10593#if CONFIG_FSE
10594 if (error == 0) {
10595 add_fsevent(FSE_XATTR_REMOVED, ctx,
10596 FSE_ARG_VNODE, vp,
10597 FSE_ARG_DONE);
10598 }
10599#endif
91447636
A
10600 vnode_put(vp);
10601 *retval = 0;
10602 return (error);
55e303ae
A
10603}
10604
91447636
A
10605/*
10606 * Remove an extended attribute.
b0d623f7 10607 * XXX Code duplication here.
91447636 10608 */
91447636 10609int
2d21ac55 10610fremovexattr(__unused proc_t p, struct fremovexattr_args *uap, int *retval)
55e303ae 10611{
2d21ac55 10612 vnode_t vp;
91447636 10613 char attrname[XATTR_MAXNAMELEN+1];
91447636
A
10614 size_t namelen;
10615 int error;
6d2010ae 10616#if CONFIG_FSE
2d21ac55 10617 vfs_context_t ctx = vfs_context_current();
6d2010ae 10618#endif
55e303ae 10619
2d21ac55 10620 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636
A
10621 return (EINVAL);
10622
10623 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
10624 if (error != 0) {
10625 return (error);
10626 }
10627 if (xattr_protected(attrname))
10628 return(EPERM);
10629 if ( (error = file_vnode(uap->fd, &vp)) ) {
10630 return (error);
10631 }
10632 if ( (error = vnode_getwithref(vp)) ) {
10633 file_drop(uap->fd);
10634 return(error);
10635 }
4a249263 10636
2d21ac55
A
10637 error = vn_removexattr(vp, attrname, uap->options, vfs_context_current());
10638#if CONFIG_FSE
10639 if (error == 0) {
10640 add_fsevent(FSE_XATTR_REMOVED, ctx,
10641 FSE_ARG_VNODE, vp,
10642 FSE_ARG_DONE);
10643 }
10644#endif
91447636
A
10645 vnode_put(vp);
10646 file_drop(uap->fd);
10647 *retval = 0;
10648 return (error);
55e303ae
A
10649}
10650
91447636
A
10651/*
10652 * Retrieve the list of extended attribute names.
b0d623f7 10653 * XXX Code duplication here.
91447636 10654 */
91447636 10655int
2d21ac55 10656listxattr(proc_t p, struct listxattr_args *uap, user_ssize_t *retval)
55e303ae 10657{
2d21ac55 10658 vnode_t vp;
91447636 10659 struct nameidata nd;
2d21ac55 10660 vfs_context_t ctx = vfs_context_current();
91447636
A
10661 uio_t auio = NULL;
10662 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10663 size_t attrsize = 0;
b0d623f7 10664 u_int32_t nameiflags;
91447636
A
10665 int error;
10666 char uio_buf[ UIO_SIZEOF(1) ];
4a249263 10667
2d21ac55 10668 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636 10669 return (EINVAL);
55e303ae 10670
fe8ab488 10671 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
6d2010ae 10672 NDINIT(&nd, LOOKUP, OP_LISTXATTR, nameiflags, spacetype, uap->path, ctx);
91447636
A
10673 if ((error = namei(&nd))) {
10674 return (error);
10675 }
10676 vp = nd.ni_vp;
10677 nameidone(&nd);
10678 if (uap->namebuf != 0 && uap->bufsize > 0) {
6d2010ae
A
10679 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
10680 &uio_buf[0], sizeof(uio_buf));
91447636
A
10681 uio_addiov(auio, uap->namebuf, uap->bufsize);
10682 }
55e303ae 10683
2d21ac55 10684 error = vn_listxattr(vp, auio, &attrsize, uap->options, ctx);
55e303ae 10685
91447636
A
10686 vnode_put(vp);
10687 if (auio) {
10688 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
10689 } else {
10690 *retval = (user_ssize_t)attrsize;
10691 }
10692 return (error);
55e303ae
A
10693}
10694
91447636
A
10695/*
10696 * Retrieve the list of extended attribute names.
b0d623f7 10697 * XXX Code duplication here.
91447636 10698 */
55e303ae 10699int
2d21ac55 10700flistxattr(proc_t p, struct flistxattr_args *uap, user_ssize_t *retval)
55e303ae 10701{
2d21ac55 10702 vnode_t vp;
91447636
A
10703 uio_t auio = NULL;
10704 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10705 size_t attrsize = 0;
10706 int error;
10707 char uio_buf[ UIO_SIZEOF(1) ];
10708
2d21ac55 10709 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636
A
10710 return (EINVAL);
10711
10712 if ( (error = file_vnode(uap->fd, &vp)) ) {
10713 return (error);
10714 }
10715 if ( (error = vnode_getwithref(vp)) ) {
10716 file_drop(uap->fd);
10717 return(error);
10718 }
10719 if (uap->namebuf != 0 && uap->bufsize > 0) {
39037602 10720 auio = uio_createwithbuffer(1, 0, spacetype,
91447636
A
10721 UIO_READ, &uio_buf[0], sizeof(uio_buf));
10722 uio_addiov(auio, uap->namebuf, uap->bufsize);
10723 }
91447636 10724
2d21ac55 10725 error = vn_listxattr(vp, auio, &attrsize, uap->options, vfs_context_current());
55e303ae 10726
91447636
A
10727 vnode_put(vp);
10728 file_drop(uap->fd);
10729 if (auio) {
10730 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
10731 } else {
10732 *retval = (user_ssize_t)attrsize;
10733 }
10734 return (error);
55e303ae 10735}
4a249263 10736
fe8ab488
A
10737static int fsgetpath_internal(
10738 vfs_context_t ctx, int volfs_id, uint64_t objid,
10739 vm_size_t bufsize, caddr_t buf, int *pathlen)
b0d623f7 10740{
fe8ab488 10741 int error;
b0d623f7 10742 struct mount *mp = NULL;
fe8ab488 10743 vnode_t vp;
b0d623f7 10744 int length;
fe8ab488 10745 int bpflags;
b0d623f7 10746
fe8ab488 10747 if (bufsize > PAGE_SIZE) {
b0d623f7 10748 return (EINVAL);
fe8ab488
A
10749 }
10750
10751 if (buf == NULL) {
b0d623f7
A
10752 return (ENOMEM);
10753 }
fe8ab488
A
10754
10755 if ((mp = mount_lookupby_volfsid(volfs_id, 1)) == NULL) {
b0d623f7 10756 error = ENOTSUP; /* unexpected failure */
fe8ab488 10757 return ENOTSUP;
b0d623f7 10758 }
fe8ab488 10759
39236c6e 10760unionget:
fe8ab488 10761 if (objid == 2) {
b0d623f7
A
10762 error = VFS_ROOT(mp, &vp, ctx);
10763 } else {
fe8ab488 10764 error = VFS_VGET(mp, (ino64_t)objid, &vp, ctx);
b0d623f7 10765 }
39236c6e
A
10766
10767 if (error == ENOENT && (mp->mnt_flag & MNT_UNION)) {
10768 /*
10769 * If the fileid isn't found and we're in a union
10770 * mount volume, then see if the fileid is in the
10771 * mounted-on volume.
10772 */
10773 struct mount *tmp = mp;
10774 mp = vnode_mount(tmp->mnt_vnodecovered);
10775 vfs_unbusy(tmp);
10776 if (vfs_busy(mp, LK_NOWAIT) == 0)
10777 goto unionget;
fe8ab488 10778 } else {
39236c6e 10779 vfs_unbusy(mp);
fe8ab488 10780 }
39236c6e 10781
b0d623f7 10782 if (error) {
fe8ab488 10783 return error;
b0d623f7 10784 }
fe8ab488 10785
6d2010ae
A
10786#if CONFIG_MACF
10787 error = mac_vnode_check_fsgetpath(ctx, vp);
10788 if (error) {
10789 vnode_put(vp);
fe8ab488 10790 return error;
6d2010ae
A
10791 }
10792#endif
fe8ab488 10793
b0d623f7
A
10794 /* Obtain the absolute path to this vnode. */
10795 bpflags = vfs_context_suser(ctx) ? BUILDPATH_CHECKACCESS : 0;
316670eb 10796 bpflags |= BUILDPATH_CHECK_MOVED;
fe8ab488 10797 error = build_path(vp, buf, bufsize, &length, bpflags, ctx);
b0d623f7 10798 vnode_put(vp);
fe8ab488 10799
b0d623f7
A
10800 if (error) {
10801 goto out;
10802 }
fe8ab488
A
10803
10804 AUDIT_ARG(text, buf);
39236c6e
A
10805
10806 if (kdebug_enable) {
10807 long dbg_parms[NUMPARMS];
10808 int dbg_namelen;
10809
10810 dbg_namelen = (int)sizeof(dbg_parms);
10811
fe8ab488
A
10812 if (length < dbg_namelen) {
10813 memcpy((char *)dbg_parms, buf, length);
39236c6e
A
10814 memset((char *)dbg_parms + length, 0, dbg_namelen - length);
10815
10816 dbg_namelen = length;
fe8ab488
A
10817 } else {
10818 memcpy((char *)dbg_parms, buf + (length - dbg_namelen), dbg_namelen);
10819 }
39236c6e
A
10820
10821 kdebug_lookup_gen_events(dbg_parms, dbg_namelen, (void *)vp, TRUE);
10822 }
fe8ab488
A
10823
10824 *pathlen = (user_ssize_t)length; /* may be superseded by error */
10825
10826out:
10827 return (error);
10828}
10829
10830/*
10831 * Obtain the full pathname of a file system object by id.
10832 *
10833 * This is a private SPI used by the File Manager.
10834 */
10835__private_extern__
10836int
10837fsgetpath(__unused proc_t p, struct fsgetpath_args *uap, user_ssize_t *retval)
10838{
10839 vfs_context_t ctx = vfs_context_current();
10840 fsid_t fsid;
10841 char *realpath;
10842 int length;
10843 int error;
10844
10845 if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
10846 return (error);
10847 }
10848 AUDIT_ARG(value32, fsid.val[0]);
10849 AUDIT_ARG(value64, uap->objid);
10850 /* Restrict output buffer size for now. */
39037602 10851
fe8ab488
A
10852 if (uap->bufsize > PAGE_SIZE) {
10853 return (EINVAL);
39037602 10854 }
fe8ab488
A
10855 MALLOC(realpath, char *, uap->bufsize, M_TEMP, M_WAITOK);
10856 if (realpath == NULL) {
10857 return (ENOMEM);
10858 }
10859
10860 error = fsgetpath_internal(
39037602 10861 ctx, fsid.val[0], uap->objid,
fe8ab488
A
10862 uap->bufsize, realpath, &length);
10863
10864 if (error) {
10865 goto out;
10866 }
39037602 10867
b0d623f7
A
10868 error = copyout((caddr_t)realpath, uap->buf, length);
10869
10870 *retval = (user_ssize_t)length; /* may be superseded by error */
10871out:
10872 if (realpath) {
10873 FREE(realpath, M_TEMP);
10874 }
10875 return (error);
10876}
10877
91447636
A
10878/*
10879 * Common routine to handle various flavors of statfs data heading out
10880 * to user space.
2d21ac55
A
10881 *
10882 * Returns: 0 Success
10883 * EFAULT
91447636
A
10884 */
10885static int
39037602
A
10886munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
10887 user_addr_t bufp, int *sizep, boolean_t is_64_bit,
91447636 10888 boolean_t partial_copy)
4a249263 10889{
91447636
A
10890 int error;
10891 int my_size, copy_size;
10892
10893 if (is_64_bit) {
b0d623f7 10894 struct user64_statfs sfs;
91447636
A
10895 my_size = copy_size = sizeof(sfs);
10896 bzero(&sfs, my_size);
10897 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
10898 sfs.f_type = mp->mnt_vtable->vfc_typenum;
10899 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
b0d623f7
A
10900 sfs.f_bsize = (user64_long_t)sfsp->f_bsize;
10901 sfs.f_iosize = (user64_long_t)sfsp->f_iosize;
10902 sfs.f_blocks = (user64_long_t)sfsp->f_blocks;
10903 sfs.f_bfree = (user64_long_t)sfsp->f_bfree;
10904 sfs.f_bavail = (user64_long_t)sfsp->f_bavail;
10905 sfs.f_files = (user64_long_t)sfsp->f_files;
10906 sfs.f_ffree = (user64_long_t)sfsp->f_ffree;
91447636
A
10907 sfs.f_fsid = sfsp->f_fsid;
10908 sfs.f_owner = sfsp->f_owner;
6d2010ae 10909 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
fe8ab488 10910 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
6d2010ae
A
10911 } else {
10912 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
10913 }
2d21ac55
A
10914 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
10915 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
91447636
A
10916
10917 if (partial_copy) {
10918 copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
10919 }
10920 error = copyout((caddr_t)&sfs, bufp, copy_size);
10921 }
10922 else {
b0d623f7
A
10923 struct user32_statfs sfs;
10924
91447636
A
10925 my_size = copy_size = sizeof(sfs);
10926 bzero(&sfs, my_size);
39037602 10927
91447636
A
10928 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
10929 sfs.f_type = mp->mnt_vtable->vfc_typenum;
10930 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
39037602 10931
91447636
A
10932 /*
10933 * It's possible for there to be more than 2^^31 blocks in the filesystem, so we
10934 * have to fudge the numbers here in that case. We inflate the blocksize in order
10935 * to reflect the filesystem size as best we can.
10936 */
39037602
A
10937 if ((sfsp->f_blocks > INT_MAX)
10938 /* Hack for 4061702 . I think the real fix is for Carbon to
91447636 10939 * look for some volume capability and not depend on hidden
39037602 10940 * semantics agreed between a FS and carbon.
91447636
A
10941 * f_blocks, f_bfree, and f_bavail set to -1 is the trigger
10942 * for Carbon to set bNoVolumeSizes volume attribute.
39037602 10943 * Without this the webdavfs files cannot be copied onto
91447636
A
10944 * disk as they look huge. This change should not affect
10945 * XSAN as they should not setting these to -1..
10946 */
2d21ac55
A
10947 && (sfsp->f_blocks != 0xffffffffffffffffULL)
10948 && (sfsp->f_bfree != 0xffffffffffffffffULL)
10949 && (sfsp->f_bavail != 0xffffffffffffffffULL)) {
91447636
A
10950 int shift;
10951
10952 /*
10953 * Work out how far we have to shift the block count down to make it fit.
10954 * Note that it's possible to have to shift so far that the resulting
10955 * blocksize would be unreportably large. At that point, we will clip
10956 * any values that don't fit.
10957 *
10958 * For safety's sake, we also ensure that f_iosize is never reported as
10959 * being smaller than f_bsize.
10960 */
10961 for (shift = 0; shift < 32; shift++) {
b0d623f7 10962 if ((sfsp->f_blocks >> shift) <= INT_MAX)
91447636 10963 break;
b0d623f7 10964 if ((sfsp->f_bsize << (shift + 1)) > INT_MAX)
91447636
A
10965 break;
10966 }
b0d623f7
A
10967#define __SHIFT_OR_CLIP(x, s) ((((x) >> (s)) > INT_MAX) ? INT_MAX : ((x) >> (s)))
10968 sfs.f_blocks = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_blocks, shift);
10969 sfs.f_bfree = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bfree, shift);
10970 sfs.f_bavail = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bavail, shift);
91447636 10971#undef __SHIFT_OR_CLIP
b0d623f7 10972 sfs.f_bsize = (user32_long_t)(sfsp->f_bsize << shift);
91447636
A
10973 sfs.f_iosize = lmax(sfsp->f_iosize, sfsp->f_bsize);
10974 } else {
10975 /* filesystem is small enough to be reported honestly */
b0d623f7
A
10976 sfs.f_bsize = (user32_long_t)sfsp->f_bsize;
10977 sfs.f_iosize = (user32_long_t)sfsp->f_iosize;
10978 sfs.f_blocks = (user32_long_t)sfsp->f_blocks;
10979 sfs.f_bfree = (user32_long_t)sfsp->f_bfree;
10980 sfs.f_bavail = (user32_long_t)sfsp->f_bavail;
91447636 10981 }
b0d623f7
A
10982 sfs.f_files = (user32_long_t)sfsp->f_files;
10983 sfs.f_ffree = (user32_long_t)sfsp->f_ffree;
91447636
A
10984 sfs.f_fsid = sfsp->f_fsid;
10985 sfs.f_owner = sfsp->f_owner;
6d2010ae 10986 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
fe8ab488 10987 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
6d2010ae
A
10988 } else {
10989 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
10990 }
2d21ac55
A
10991 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
10992 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
91447636
A
10993
10994 if (partial_copy) {
10995 copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
10996 }
10997 error = copyout((caddr_t)&sfs, bufp, copy_size);
10998 }
39037602 10999
91447636
A
11000 if (sizep != NULL) {
11001 *sizep = my_size;
11002 }
11003 return(error);
11004}
11005
11006/*
11007 * copy stat structure into user_stat structure.
11008 */
b0d623f7 11009void munge_user64_stat(struct stat *sbp, struct user64_stat *usbp)
91447636 11010{
b0d623f7
A
11011 bzero(usbp, sizeof(*usbp));
11012
11013 usbp->st_dev = sbp->st_dev;
11014 usbp->st_ino = sbp->st_ino;
11015 usbp->st_mode = sbp->st_mode;
11016 usbp->st_nlink = sbp->st_nlink;
11017 usbp->st_uid = sbp->st_uid;
11018 usbp->st_gid = sbp->st_gid;
11019 usbp->st_rdev = sbp->st_rdev;
11020#ifndef _POSIX_C_SOURCE
11021 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
11022 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
11023 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
11024 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
11025 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
11026 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
11027#else
11028 usbp->st_atime = sbp->st_atime;
11029 usbp->st_atimensec = sbp->st_atimensec;
11030 usbp->st_mtime = sbp->st_mtime;
11031 usbp->st_mtimensec = sbp->st_mtimensec;
11032 usbp->st_ctime = sbp->st_ctime;
11033 usbp->st_ctimensec = sbp->st_ctimensec;
11034#endif
11035 usbp->st_size = sbp->st_size;
11036 usbp->st_blocks = sbp->st_blocks;
11037 usbp->st_blksize = sbp->st_blksize;
11038 usbp->st_flags = sbp->st_flags;
11039 usbp->st_gen = sbp->st_gen;
11040 usbp->st_lspare = sbp->st_lspare;
11041 usbp->st_qspare[0] = sbp->st_qspare[0];
11042 usbp->st_qspare[1] = sbp->st_qspare[1];
11043}
11044
11045void munge_user32_stat(struct stat *sbp, struct user32_stat *usbp)
11046{
11047 bzero(usbp, sizeof(*usbp));
0c530ab8 11048
91447636
A
11049 usbp->st_dev = sbp->st_dev;
11050 usbp->st_ino = sbp->st_ino;
11051 usbp->st_mode = sbp->st_mode;
11052 usbp->st_nlink = sbp->st_nlink;
11053 usbp->st_uid = sbp->st_uid;
11054 usbp->st_gid = sbp->st_gid;
11055 usbp->st_rdev = sbp->st_rdev;
2d21ac55
A
11056#ifndef _POSIX_C_SOURCE
11057 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
11058 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
11059 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
11060 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
11061 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
11062 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
11063#else
11064 usbp->st_atime = sbp->st_atime;
11065 usbp->st_atimensec = sbp->st_atimensec;
11066 usbp->st_mtime = sbp->st_mtime;
11067 usbp->st_mtimensec = sbp->st_mtimensec;
11068 usbp->st_ctime = sbp->st_ctime;
11069 usbp->st_ctimensec = sbp->st_ctimensec;
11070#endif
11071 usbp->st_size = sbp->st_size;
11072 usbp->st_blocks = sbp->st_blocks;
11073 usbp->st_blksize = sbp->st_blksize;
11074 usbp->st_flags = sbp->st_flags;
11075 usbp->st_gen = sbp->st_gen;
11076 usbp->st_lspare = sbp->st_lspare;
11077 usbp->st_qspare[0] = sbp->st_qspare[0];
11078 usbp->st_qspare[1] = sbp->st_qspare[1];
11079}
11080
11081/*
11082 * copy stat64 structure into user_stat64 structure.
11083 */
b0d623f7
A
11084void munge_user64_stat64(struct stat64 *sbp, struct user64_stat64 *usbp)
11085{
11086 bzero(usbp, sizeof(*usbp));
11087
11088 usbp->st_dev = sbp->st_dev;
11089 usbp->st_ino = sbp->st_ino;
11090 usbp->st_mode = sbp->st_mode;
11091 usbp->st_nlink = sbp->st_nlink;
11092 usbp->st_uid = sbp->st_uid;
11093 usbp->st_gid = sbp->st_gid;
11094 usbp->st_rdev = sbp->st_rdev;
11095#ifndef _POSIX_C_SOURCE
11096 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
11097 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
11098 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
11099 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
11100 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
11101 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
11102 usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
11103 usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
11104#else
11105 usbp->st_atime = sbp->st_atime;
11106 usbp->st_atimensec = sbp->st_atimensec;
11107 usbp->st_mtime = sbp->st_mtime;
11108 usbp->st_mtimensec = sbp->st_mtimensec;
11109 usbp->st_ctime = sbp->st_ctime;
11110 usbp->st_ctimensec = sbp->st_ctimensec;
11111 usbp->st_birthtime = sbp->st_birthtime;
11112 usbp->st_birthtimensec = sbp->st_birthtimensec;
11113#endif
11114 usbp->st_size = sbp->st_size;
11115 usbp->st_blocks = sbp->st_blocks;
11116 usbp->st_blksize = sbp->st_blksize;
11117 usbp->st_flags = sbp->st_flags;
11118 usbp->st_gen = sbp->st_gen;
11119 usbp->st_lspare = sbp->st_lspare;
11120 usbp->st_qspare[0] = sbp->st_qspare[0];
11121 usbp->st_qspare[1] = sbp->st_qspare[1];
11122}
11123
11124void munge_user32_stat64(struct stat64 *sbp, struct user32_stat64 *usbp)
2d21ac55 11125{
b0d623f7 11126 bzero(usbp, sizeof(*usbp));
2d21ac55
A
11127
11128 usbp->st_dev = sbp->st_dev;
11129 usbp->st_ino = sbp->st_ino;
11130 usbp->st_mode = sbp->st_mode;
11131 usbp->st_nlink = sbp->st_nlink;
11132 usbp->st_uid = sbp->st_uid;
11133 usbp->st_gid = sbp->st_gid;
11134 usbp->st_rdev = sbp->st_rdev;
11135#ifndef _POSIX_C_SOURCE
91447636
A
11136 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
11137 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
11138 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
11139 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
11140 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
11141 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
2d21ac55
A
11142 usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
11143 usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
91447636
A
11144#else
11145 usbp->st_atime = sbp->st_atime;
11146 usbp->st_atimensec = sbp->st_atimensec;
11147 usbp->st_mtime = sbp->st_mtime;
11148 usbp->st_mtimensec = sbp->st_mtimensec;
11149 usbp->st_ctime = sbp->st_ctime;
11150 usbp->st_ctimensec = sbp->st_ctimensec;
2d21ac55
A
11151 usbp->st_birthtime = sbp->st_birthtime;
11152 usbp->st_birthtimensec = sbp->st_birthtimensec;
91447636
A
11153#endif
11154 usbp->st_size = sbp->st_size;
11155 usbp->st_blocks = sbp->st_blocks;
11156 usbp->st_blksize = sbp->st_blksize;
11157 usbp->st_flags = sbp->st_flags;
11158 usbp->st_gen = sbp->st_gen;
11159 usbp->st_lspare = sbp->st_lspare;
11160 usbp->st_qspare[0] = sbp->st_qspare[0];
11161 usbp->st_qspare[1] = sbp->st_qspare[1];
4a249263 11162}
39236c6e
A
11163
11164/*
11165 * Purge buffer cache for simulating cold starts
11166 */
11167static int vnode_purge_callback(struct vnode *vp, __unused void *cargs)
11168{
11169 ubc_msync(vp, (off_t)0, ubc_getsize(vp), NULL /* off_t *resid_off */, UBC_PUSHALL | UBC_INVALIDATE);
11170
11171 return VNODE_RETURNED;
11172}
11173
11174static int vfs_purge_callback(mount_t mp, __unused void * arg)
11175{
11176 vnode_iterate(mp, VNODE_WAIT | VNODE_ITERATE_ALL, vnode_purge_callback, NULL);
11177
11178 return VFS_RETURNED;
11179}
11180
11181int
11182vfs_purge(__unused struct proc *p, __unused struct vfs_purge_args *uap, __unused int32_t *retval)
11183{
11184 if (!kauth_cred_issuser(kauth_cred_get()))
11185 return EPERM;
11186
11187 vfs_iterate(0/* flags */, vfs_purge_callback, NULL);
11188
11189 return 0;
11190}
11191
39037602
A
11192/*
11193 * gets the vnode associated with the (unnamed) snapshot directory
11194 * for a Filesystem. The snapshot directory vnode is returned with
11195 * an iocount on it.
11196 */
11197int
11198vnode_get_snapdir(vnode_t rvp, vnode_t *sdvpp, vfs_context_t ctx)
11199{
11200 int error;
11201
11202 error = VFS_VGET_SNAPDIR(vnode_mount(rvp), sdvpp, ctx);
11203
11204#if CLONE_SNAPSHOT_FALLBACKS_ENABLED
11205 if (error == ENOTSUP) {
11206 struct nameidata snapnd;
11207
11208 /*
11209 * Temporary fallback to <mountpoint>/.snaps lookup
11210 * XXX: To be removed.
11211 */
11212 NDINIT(&snapnd, LOOKUP, OP_LOOKUP, USEDVP,
11213 UIO_SYSSPACE, CAST_USER_ADDR_T(".snaps"), ctx);
11214 snapnd.ni_dvp = rvp;
11215
11216 if ((error = namei(&snapnd))) {
11217 error = ENOTSUP;
11218 *sdvpp = NULLVP;
11219 } else {
11220 *sdvpp = snapnd.ni_vp;
11221 nameidone(&snapnd);
11222 }
11223 }
11224#endif /* CLONE_SNAPSHOT_FALLBACKS_ENABLED */
11225 return (error);
11226}
11227
11228/*
11229 * Get the snapshot vnode.
11230 *
11231 * If successful, the call returns with an iocount on *rvpp ,*sdvpp and
11232 * needs nameidone() on ndp.
11233 *
11234 * If the snapshot vnode exists it is returned in ndp->ni_vp.
11235 *
11236 * If it returns with an error, *rvpp, *sdvpp are NULL and nameidone() is
11237 * not needed.
11238 */
11239static int
11240vnode_get_snapshot(int dirfd, vnode_t *rvpp, vnode_t *sdvpp,
11241 user_addr_t name, struct nameidata *ndp, int32_t op,
11242#if !CONFIG_TRIGGERS
11243 __unused
11244#endif
11245 enum path_operation pathop,
11246 vfs_context_t ctx)
11247{
11248 int error, i;
11249 caddr_t name_buf;
11250 size_t name_len;
11251 struct vfs_attr vfa;
11252
11253 *sdvpp = NULLVP;
11254 *rvpp = NULLVP;
11255
11256 error = vnode_getfromfd(ctx, dirfd, rvpp);
11257 if (error)
11258 return (error);
11259
11260 if (!vnode_isvroot(*rvpp)) {
11261 error = EINVAL;
11262 goto out;
11263 }
11264
11265 /* Make sure the filesystem supports snapshots */
11266 VFSATTR_INIT(&vfa);
11267 VFSATTR_WANTED(&vfa, f_capabilities);
11268 if ((vfs_getattr(vnode_mount(*rvpp), &vfa, ctx) != 0) ||
11269 !VFSATTR_IS_SUPPORTED(&vfa, f_capabilities) ||
11270 !((vfa.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] &
11271 VOL_CAP_INT_SNAPSHOT)) ||
11272 !((vfa.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] &
11273 VOL_CAP_INT_SNAPSHOT))) {
11274 error = ENOTSUP;
11275 goto out;
11276 }
11277
11278 error = vnode_get_snapdir(*rvpp, sdvpp, ctx);
11279 if (error)
11280 goto out;
11281
11282 MALLOC(name_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
11283 error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
11284 if (error)
11285 goto out1;
11286
11287 /*
11288 * Some sanity checks- name can't be empty, "." or ".." or have slashes.
11289 * (the length returned by copyinstr includes the terminating NUL)
11290 */
11291 if ((name_len == 1) || (name_len == 2 && name_buf[0] == '.') ||
11292 (name_len == 3 && name_buf[0] == '.' && name_buf[1] == '.')) {
11293 error = EINVAL;
11294 goto out1;
11295 }
11296 for (i = 0; i < (int)name_len && name_buf[i] != '/'; i++);
11297 if (i < (int)name_len) {
11298 error = EINVAL;
11299 goto out1;
11300 }
11301
11302#if CONFIG_MACF
11303 if (op == CREATE) {
11304 error = mac_mount_check_snapshot_create(ctx, vnode_mount(*rvpp),
11305 name_buf);
11306 } else if (op == DELETE) {
11307 error = mac_mount_check_snapshot_delete(ctx, vnode_mount(*rvpp),
11308 name_buf);
11309 }
11310 if (error)
11311 goto out1;
11312#endif
11313
11314 /* Check if the snapshot already exists ... */
11315 NDINIT(ndp, op, pathop, USEDVP | NOCACHE | AUDITVNPATH1,
11316 UIO_SYSSPACE, CAST_USER_ADDR_T(name_buf), ctx);
11317 ndp->ni_dvp = *sdvpp;
11318
11319 error = namei(ndp);
11320out1:
11321 FREE(name_buf, M_TEMP);
11322out:
11323 if (error) {
11324 if (*sdvpp) {
11325 vnode_put(*sdvpp);
11326 *sdvpp = NULLVP;
11327 }
11328 if (*rvpp) {
11329 vnode_put(*rvpp);
11330 *rvpp = NULLVP;
11331 }
11332 }
11333 return (error);
11334}
11335
11336/*
11337 * create a filesystem snapshot (for supporting filesystems)
11338 *
11339 * A much simplified version of openat(dirfd, name, O_CREAT | O_EXCL)
11340 * We get to the (unnamed) snapshot directory vnode and create the vnode
11341 * for the snapshot in it.
11342 *
11343 * Restrictions:
11344 *
11345 * a) Passed in name for snapshot cannot have slashes.
11346 * b) name can't be "." or ".."
11347 *
11348 * Since this requires superuser privileges, vnode_authorize calls are not
11349 * made.
11350 */
11351static int
11352snapshot_create(int dirfd, user_addr_t name, __unused uint32_t flags,
11353 vfs_context_t ctx)
11354{
11355 vnode_t rvp, snapdvp;
11356 int error;
11357 struct nameidata namend;
11358
11359 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, CREATE,
11360 OP_LINK, ctx);
11361 if (error)
11362 return (error);
11363
11364 if (namend.ni_vp) {
11365 vnode_put(namend.ni_vp);
11366 error = EEXIST;
11367 } else {
11368 struct vnode_attr va;
11369 vnode_t vp = NULLVP;
11370
11371 VATTR_INIT(&va);
11372 VATTR_SET(&va, va_type, VREG);
11373 VATTR_SET(&va, va_mode, 0);
11374
11375 error = vn_create(snapdvp, &vp, &namend, &va,
11376 VN_CREATE_NOAUTH | VN_CREATE_NOINHERIT, 0, NULL, ctx);
11377 if (!error && vp)
11378 vnode_put(vp);
11379#if CLONE_SNAPSHOT_FALLBACKS_ENABLED
11380 else if (error) {
11381 error = VNOP_COPYFILE(rvp, rvp, NULLVP, &namend.ni_cnd,
11382 0, 0, ctx);
11383 }
11384#endif /* CLONE_SNAPSHOT_FALLBACKS_ENABLED */
11385 }
11386
11387 nameidone(&namend);
11388 vnode_put(snapdvp);
11389 vnode_put(rvp);
11390 return (error);
11391}
11392
11393/*
11394 * Delete a Filesystem snapshot
11395 *
11396 * get the vnode for the unnamed snapshot directory and the snapshot and
11397 * delete the snapshot.
11398 */
11399static int
11400snapshot_delete(int dirfd, user_addr_t name, __unused uint32_t flags,
11401 vfs_context_t ctx)
11402{
11403 vnode_t rvp, snapdvp;
11404 int error;
11405 struct nameidata namend;
11406
11407 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, DELETE,
11408 OP_UNLINK, ctx);
11409 if (error)
11410 goto out;
11411
11412 error = VNOP_REMOVE(snapdvp, namend.ni_vp, &namend.ni_cnd,
11413 VNODE_REMOVE_SKIP_NAMESPACE_EVENT, ctx);
11414
11415 vnode_put(namend.ni_vp);
11416 nameidone(&namend);
11417 vnode_put(snapdvp);
11418 vnode_put(rvp);
11419out:
11420 return (error);
11421}
11422
11423/*
11424 * Revert a filesystem to a snapshot
11425 *
11426 * Marks the filesystem to revert to the given snapshot on next mount.
11427 */
11428static int
11429snapshot_revert(int dirfd, user_addr_t name, __unused uint32_t flags,
11430 vfs_context_t ctx)
11431{
11432 int error;
11433 vnode_t rvp;
11434 mount_t mp;
11435 struct fs_snapshot_revert_args revert_data;
11436 struct componentname cnp;
11437 caddr_t name_buf;
11438 size_t name_len;
11439
11440 error = vnode_getfromfd(ctx, dirfd, &rvp);
11441 if (error) {
11442 return (error);
11443 }
11444 mp = vnode_mount(rvp);
11445
11446 /*
11447 * Grab mount_iterref so that we can release the vnode,
11448 * since VFSIOC_REVERT_SNAPSHOT could conceivably cause a sync.
11449 */
11450 error = mount_iterref (mp, 0);
11451 vnode_put(rvp);
11452 if (error) {
11453 return (error);
11454 }
11455
11456 MALLOC(name_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
11457 error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
11458 if (error) {
11459 mount_iterdrop(mp);
11460 FREE(name_buf, M_TEMP);
11461 return (error);
11462 }
11463
11464 memset(&cnp, 0, sizeof(cnp));
11465 cnp.cn_pnbuf = (char *)name_buf;
11466 cnp.cn_nameiop = LOOKUP;
11467 cnp.cn_flags = ISLASTCN | HASBUF;
11468 cnp.cn_pnlen = MAXPATHLEN;
11469 cnp.cn_nameptr = cnp.cn_pnbuf;
11470 cnp.cn_namelen = (int)name_len;
11471 revert_data.sr_cnp = &cnp;
11472
11473 error = VFS_IOCTL(mp, VFSIOC_REVERT_SNAPSHOT, (caddr_t)&revert_data, 0, ctx);
11474 mount_iterdrop(mp);
11475 FREE(name_buf, M_TEMP);
11476
11477 if (error) {
11478 /* If there was any error, try again using VNOP_IOCTL */
11479
11480 vnode_t snapdvp;
11481 struct nameidata namend;
11482
11483 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, LOOKUP,
11484 OP_LOOKUP, ctx);
11485 if (error) {
11486 return (error);
11487 }
11488
11489
11490#ifndef APFSIOC_REVERT_TO_SNAPSHOT
11491#define APFSIOC_REVERT_TO_SNAPSHOT _IOW('J', 1, u_int64_t)
11492#endif
11493
11494#ifndef APFS_REVERT_TO_SNAPSHOT
11495#define APFS_REVERT_TO_SNAPSHOT IOCBASECMD(APFSIOC_REVERT_TO_SNAPSHOT)
11496#endif
11497
11498 error = VNOP_IOCTL(namend.ni_vp, APFS_REVERT_TO_SNAPSHOT, (caddr_t) NULL,
11499 0, ctx);
11500
11501 vnode_put(namend.ni_vp);
11502 nameidone(&namend);
11503 vnode_put(snapdvp);
11504 vnode_put(rvp);
11505 }
11506
11507 return (error);
11508}
11509
11510/*
11511 * rename a Filesystem snapshot
11512 *
11513 * get the vnode for the unnamed snapshot directory and the snapshot and
11514 * rename the snapshot. This is a very specialised (and simple) case of
11515 * rename(2) (which has to deal with a lot more complications). It differs
11516 * slightly from rename(2) in that EEXIST is returned if the new name exists.
11517 */
11518static int
11519snapshot_rename(int dirfd, user_addr_t old, user_addr_t new,
11520 __unused uint32_t flags, vfs_context_t ctx)
11521{
11522 vnode_t rvp, snapdvp;
11523 int error, i;
11524 caddr_t newname_buf;
11525 size_t name_len;
11526 vnode_t fvp;
11527 struct nameidata *fromnd, *tond;
11528 /* carving out a chunk for structs that are too big to be on stack. */
11529 struct {
11530 struct nameidata from_node;
11531 struct nameidata to_node;
11532 } * __rename_data;
11533
11534 MALLOC(__rename_data, void *, sizeof(*__rename_data), M_TEMP, M_WAITOK);
11535 fromnd = &__rename_data->from_node;
11536 tond = &__rename_data->to_node;
11537
11538 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, old, fromnd, DELETE,
11539 OP_UNLINK, ctx);
11540 if (error)
11541 goto out;
11542 fvp = fromnd->ni_vp;
11543
11544 MALLOC(newname_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
11545 error = copyinstr(new, newname_buf, MAXPATHLEN, &name_len);
11546 if (error)
11547 goto out1;
11548
11549 /*
11550 * Some sanity checks- new name can't be empty, "." or ".." or have
11551 * slashes.
11552 * (the length returned by copyinstr includes the terminating NUL)
11553 *
11554 * The FS rename VNOP is suppossed to handle this but we'll pick it
11555 * off here itself.
11556 */
11557 if ((name_len == 1) || (name_len == 2 && newname_buf[0] == '.') ||
11558 (name_len == 3 && newname_buf[0] == '.' && newname_buf[1] == '.')) {
11559 error = EINVAL;
11560 goto out1;
11561 }
11562 for (i = 0; i < (int)name_len && newname_buf[i] != '/'; i++);
11563 if (i < (int)name_len) {
11564 error = EINVAL;
11565 goto out1;
11566 }
11567
11568#if CONFIG_MACF
11569 error = mac_mount_check_snapshot_create(ctx, vnode_mount(rvp),
11570 newname_buf);
11571 if (error)
11572 goto out1;
11573#endif
11574
11575 NDINIT(tond, RENAME, OP_RENAME, USEDVP | NOCACHE | AUDITVNPATH2,
11576 UIO_SYSSPACE, CAST_USER_ADDR_T(newname_buf), ctx);
11577 tond->ni_dvp = snapdvp;
11578
11579 error = namei(tond);
11580 if (error) {
11581 goto out2;
11582 } else if (tond->ni_vp) {
11583 /*
11584 * snapshot rename behaves differently than rename(2) - if the
11585 * new name exists, EEXIST is returned.
11586 */
11587 vnode_put(tond->ni_vp);
11588 error = EEXIST;
11589 goto out2;
11590 }
11591
11592 error = VNOP_RENAME(snapdvp, fvp, &fromnd->ni_cnd, snapdvp, NULLVP,
11593 &tond->ni_cnd, ctx);
11594
11595out2:
11596 nameidone(tond);
11597out1:
11598 FREE(newname_buf, M_TEMP);
11599 vnode_put(fvp);
11600 vnode_put(snapdvp);
11601 vnode_put(rvp);
11602 nameidone(fromnd);
11603out:
11604 FREE(__rename_data, M_TEMP);
11605 return (error);
11606}
11607
11608/*
11609 * Mount a Filesystem snapshot
11610 *
11611 * get the vnode for the unnamed snapshot directory and the snapshot and
11612 * mount the snapshot.
11613 */
11614static int
11615snapshot_mount(int dirfd, user_addr_t name, user_addr_t directory,
11616 user_addr_t mnt_data, __unused uint32_t flags, vfs_context_t ctx)
11617{
11618 vnode_t rvp, snapdvp, snapvp, vp, pvp;
11619 int error;
11620 struct nameidata *snapndp, *dirndp;
11621 /* carving out a chunk for structs that are too big to be on stack. */
11622 struct {
11623 struct nameidata snapnd;
11624 struct nameidata dirnd;
11625 } * __snapshot_mount_data;
11626
11627 MALLOC(__snapshot_mount_data, void *, sizeof(*__snapshot_mount_data),
11628 M_TEMP, M_WAITOK);
11629 snapndp = &__snapshot_mount_data->snapnd;
11630 dirndp = &__snapshot_mount_data->dirnd;
11631
11632 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, snapndp, LOOKUP,
11633 OP_LOOKUP, ctx);
11634 if (error)
11635 goto out;
11636
11637 snapvp = snapndp->ni_vp;
11638 if (!vnode_mount(rvp) || (vnode_mount(rvp) == dead_mountp)) {
11639 error = EIO;
11640 goto out1;
11641 }
11642
11643 /* Get the vnode to be covered */
11644 NDINIT(dirndp, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
11645 UIO_USERSPACE, directory, ctx);
11646 error = namei(dirndp);
11647 if (error)
11648 goto out1;
11649
11650 vp = dirndp->ni_vp;
11651 pvp = dirndp->ni_dvp;
11652
11653 if ((vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_ROOTFS)) {
11654 error = EINVAL;
11655 } else {
11656 mount_t mp = vnode_mount(rvp);
11657 struct fs_snapshot_mount_args smnt_data;
11658
11659 smnt_data.sm_mp = mp;
11660 smnt_data.sm_cnp = &snapndp->ni_cnd;
11661 error = mount_common(mp->mnt_vfsstat.f_fstypename, pvp, vp,
11662 &dirndp->ni_cnd, CAST_USER_ADDR_T(&smnt_data), 0,
11663 KERNEL_MOUNT_SNAPSHOT, NULL, FALSE, ctx);
11664 if (error) {
11665 /* Retry with user passed args */
11666 error = mount_common(mp->mnt_vfsstat.f_fstypename, pvp,
11667 vp, &dirndp->ni_cnd, CAST_USER_ADDR_T(mnt_data), 0,
11668 0, NULL, FALSE, ctx);
11669 }
11670 }
11671
11672 vnode_put(vp);
11673 vnode_put(pvp);
11674 nameidone(dirndp);
11675out1:
11676 vnode_put(snapvp);
11677 vnode_put(snapdvp);
11678 vnode_put(rvp);
11679 nameidone(snapndp);
11680out:
11681 FREE(__snapshot_mount_data, M_TEMP);
11682 return (error);
11683}
11684
11685/*
11686 * FS snapshot operations dispatcher
11687 */
11688int
11689fs_snapshot(__unused proc_t p, struct fs_snapshot_args *uap,
11690 __unused int32_t *retval)
11691{
11692 int error;
11693 vfs_context_t ctx = vfs_context_current();
11694
11695 error = priv_check_cred(vfs_context_ucred(ctx), PRIV_VFS_SNAPSHOT, 0);
11696 if (error)
11697 return (error);
11698
11699 switch (uap->op) {
11700 case SNAPSHOT_OP_CREATE:
11701 error = snapshot_create(uap->dirfd, uap->name1, uap->flags, ctx);
11702 break;
11703 case SNAPSHOT_OP_DELETE:
11704 error = snapshot_delete(uap->dirfd, uap->name1, uap->flags, ctx);
11705 break;
11706 case SNAPSHOT_OP_RENAME:
11707 error = snapshot_rename(uap->dirfd, uap->name1, uap->name2,
11708 uap->flags, ctx);
11709 break;
11710 case SNAPSHOT_OP_MOUNT:
11711 error = snapshot_mount(uap->dirfd, uap->name1, uap->name2,
11712 uap->data, uap->flags, ctx);
11713 break;
11714 case SNAPSHOT_OP_REVERT:
11715 error = snapshot_revert(uap->dirfd, uap->name1, uap->flags, ctx);
11716 break;
11717 default:
11718 error = ENOSYS;
11719 }
11720
11721 return (error);
11722}