]> git.saurik.com Git - apple/xnu.git/blame - bsd/vfs/vfs_syscalls.c
xnu-3789.60.24.tar.gz
[apple/xnu.git] / bsd / vfs / vfs_syscalls.c
CommitLineData
1c79356b 1/*
39037602 2 * Copyright (c) 1995-2016 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
39037602 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
39037602 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
39037602 17 *
2d21ac55
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
39037602 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * Copyright (c) 1989, 1993
30 * The Regents of the University of California. All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)vfs_syscalls.c 8.41 (Berkeley) 6/15/95
66 */
2d21ac55
A
67/*
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
71 * Version 2.0.
72 */
1c79356b
A
73
74#include <sys/param.h>
75#include <sys/systm.h>
76#include <sys/namei.h>
77#include <sys/filedesc.h>
78#include <sys/kernel.h>
91447636 79#include <sys/file_internal.h>
1c79356b 80#include <sys/stat.h>
91447636
A
81#include <sys/vnode_internal.h>
82#include <sys/mount_internal.h>
83#include <sys/proc_internal.h>
84#include <sys/kauth.h>
85#include <sys/uio_internal.h>
1c79356b 86#include <sys/malloc.h>
91447636 87#include <sys/mman.h>
1c79356b
A
88#include <sys/dirent.h>
89#include <sys/attr.h>
90#include <sys/sysctl.h>
91#include <sys/ubc.h>
9bccf70c 92#include <sys/quota.h>
91447636
A
93#include <sys/kdebug.h>
94#include <sys/fsevents.h>
6d2010ae 95#include <sys/imgsrc.h>
91447636
A
96#include <sys/sysproto.h>
97#include <sys/xattr.h>
b0d623f7
A
98#include <sys/fcntl.h>
99#include <sys/fsctl.h>
91447636 100#include <sys/ubc_internal.h>
593a1d5f 101#include <sys/disk.h>
3e170ce0 102#include <sys/content_protection.h>
39037602
A
103#include <sys/clonefile.h>
104#include <sys/snapshot.h>
490019cf 105#include <sys/priv.h>
91447636
A
106#include <machine/cons.h>
107#include <machine/limits.h>
108#include <miscfs/specfs/specdev.h>
e5568f75 109
b0d623f7 110#include <security/audit/audit.h>
e5568f75
A
111#include <bsm/audit_kevents.h>
112
91447636
A
113#include <mach/mach_types.h>
114#include <kern/kern_types.h>
115#include <kern/kalloc.h>
6d2010ae 116#include <kern/task.h>
91447636
A
117
118#include <vm/vm_pageout.h>
39037602 119#include <vm/vm_protos.h>
1c79356b 120
91447636 121#include <libkern/OSAtomic.h>
b0d623f7 122#include <pexpert/pexpert.h>
3e170ce0 123#include <IOKit/IOBSD.h>
55e303ae 124
490019cf
A
125#if ROUTEFS
126#include <miscfs/routefs/routefs.h>
127#endif /* ROUTEFS */
128
2d21ac55
A
129#if CONFIG_MACF
130#include <security/mac.h>
131#include <security/mac_framework.h>
132#endif
1c79356b 133
39037602 134#if CONFIG_FSE
2d21ac55 135#define GET_PATH(x) \
39037602 136 (x) = get_pathbuff();
2d21ac55
A
137#define RELEASE_PATH(x) \
138 release_pathbuff(x);
39037602 139#else
2d21ac55 140#define GET_PATH(x) \
39037602 141 MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
2d21ac55
A
142#define RELEASE_PATH(x) \
143 FREE_ZONE((x), MAXPATHLEN, M_NAMEI);
144#endif /* CONFIG_FSE */
145
146/* struct for checkdirs iteration */
147struct cdirargs {
148 vnode_t olddp;
149 vnode_t newdp;
150};
151/* callback for checkdirs iteration */
152static int checkdirs_callback(proc_t p, void * arg);
1c79356b 153
91447636 154static int change_dir(struct nameidata *ndp, vfs_context_t ctx);
6601e61a 155static int checkdirs(vnode_t olddp, vfs_context_t ctx);
91447636
A
156void enablequotas(struct mount *mp, vfs_context_t ctx);
157static int getfsstat_callback(mount_t mp, void * arg);
158static int getutimes(user_addr_t usrtvp, struct timespec *tsp);
2d21ac55 159static int setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts, int nullflag);
91447636 160static int sync_callback(mount_t, void *);
fe8ab488
A
161static void sync_thread(void *, __unused wait_result_t);
162static int sync_async(int);
39037602
A
163static int munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
164 user_addr_t bufp, int *sizep, boolean_t is_64_bit,
91447636 165 boolean_t partial_copy);
b0d623f7
A
166static int statfs64_common(struct mount *mp, struct vfsstatfs *sfsp,
167 user_addr_t bufp);
168static int fsync_common(proc_t p, struct fsync_args *uap, int flags);
6d2010ae
A
169static int mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
170 struct componentname *cnp, user_addr_t fsmountargs,
171 int flags, uint32_t internal_flags, char *labelstr, boolean_t kernelmount,
172 vfs_context_t ctx);
173void vfs_notify_mount(vnode_t pdvp);
174
175int prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth);
b7266188 176
fe8ab488
A
177struct fd_vn_data * fg_vn_data_alloc(void);
178
c18c124e
A
179/*
180 * Max retries for ENOENT returns from vn_authorize_{rmdir, unlink, rename}
181 * Concurrent lookups (or lookups by ids) on hard links can cause the
182 * vn_getpath (which does not re-enter the filesystem as vn_getpath_fsenter
183 * does) to return ENOENT as the path cannot be returned from the name cache
184 * alone. We have no option but to retry and hope to get one namei->reverse path
185 * generation done without an intervening lookup, lookup by id on the hard link
186 * item. This is only an issue for MAC hooks which cannot reenter the filesystem
187 * which currently are the MAC hooks for rename, unlink and rmdir.
188 */
189#define MAX_AUTHORIZE_ENOENT_RETRIES 1024
190
fe8ab488
A
191static int rmdirat_internal(vfs_context_t, int, user_addr_t, enum uio_seg);
192
193static int fsgetpath_internal(vfs_context_t, int, uint64_t, vm_size_t, caddr_t, int *);
194
b7266188 195#ifdef CONFIG_IMGSRC_ACCESS
b7266188
A
196static int authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx);
197static int place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx);
198static void undo_place_on_covered_vp(mount_t mp, vnode_t vp);
199static int mount_begin_update(mount_t mp, vfs_context_t ctx, int flags);
200static void mount_end_update(mount_t mp);
6d2010ae 201static int relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp, const char *fsname, vfs_context_t ctx, boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index);
b7266188
A
202#endif /* CONFIG_IMGSRC_ACCESS */
203
2d21ac55
A
204int (*union_dircheckp)(struct vnode **, struct fileproc *, vfs_context_t);
205
206__private_extern__
207int sync_internal(void);
208
2d21ac55 209__private_extern__
c18c124e 210int unlink1(vfs_context_t, vnode_t, user_addr_t, enum uio_seg, int);
91447636 211
fe8ab488
A
212extern lck_grp_t *fd_vn_lck_grp;
213extern lck_grp_attr_t *fd_vn_lck_grp_attr;
214extern lck_attr_t *fd_vn_lck_attr;
215
2d21ac55
A
216/*
217 * incremented each time a mount or unmount operation occurs
218 * used to invalidate the cached value of the rootvp in the
219 * mount structure utilized by cache_lookup_path
220 */
b0d623f7 221uint32_t mount_generation = 0;
1c79356b
A
222
223/* counts number of mount and unmount operations */
224unsigned int vfs_nummntops=0;
225
39236c6e
A
226extern const struct fileops vnops;
227#if CONFIG_APPLEDOUBLE
39037602 228extern errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *);
39236c6e 229#endif /* CONFIG_APPLEDOUBLE */
91447636 230
1c79356b
A
231/*
232 * Virtual File System System Calls
233 */
234
490019cf 235#if NFSCLIENT || DEVFS || ROUTEFS
6d2010ae
A
236/*
237 * Private in-kernel mounting spi (NFS only, not exported)
238 */
239 __private_extern__
240boolean_t
241vfs_iskernelmount(mount_t mp)
242{
243 return ((mp->mnt_kern_flag & MNTK_KERNEL_MOUNT) ? TRUE : FALSE);
244}
245
246 __private_extern__
247int
248kernel_mount(char *fstype, vnode_t pvp, vnode_t vp, const char *path,
249 void *data, __unused size_t datalen, int syscall_flags, __unused uint32_t kern_flags, vfs_context_t ctx)
250{
251 struct nameidata nd;
252 boolean_t did_namei;
253 int error;
254
39037602 255 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
6d2010ae
A
256 UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx);
257
258 /*
259 * Get the vnode to be covered if it's not supplied
260 */
261 if (vp == NULLVP) {
262 error = namei(&nd);
263 if (error)
264 return (error);
265 vp = nd.ni_vp;
266 pvp = nd.ni_dvp;
267 did_namei = TRUE;
268 } else {
269 char *pnbuf = CAST_DOWN(char *, path);
270
271 nd.ni_cnd.cn_pnbuf = pnbuf;
272 nd.ni_cnd.cn_pnlen = strlen(pnbuf) + 1;
273 did_namei = FALSE;
274 }
275
276 error = mount_common(fstype, pvp, vp, &nd.ni_cnd, CAST_USER_ADDR_T(data),
277 syscall_flags, kern_flags, NULL, TRUE, ctx);
278
279 if (did_namei) {
280 vnode_put(vp);
281 vnode_put(pvp);
282 nameidone(&nd);
283 }
284
285 return (error);
286}
fe8ab488 287#endif /* NFSCLIENT || DEVFS */
6d2010ae 288
1c79356b
A
289/*
290 * Mount a file system.
291 */
1c79356b
A
292/* ARGSUSED */
293int
b0d623f7 294mount(proc_t p, struct mount_args *uap, __unused int32_t *retval)
2d21ac55
A
295{
296 struct __mac_mount_args muap;
297
298 muap.type = uap->type;
299 muap.path = uap->path;
300 muap.flags = uap->flags;
301 muap.data = uap->data;
302 muap.mac_p = USER_ADDR_NULL;
303 return (__mac_mount(p, &muap, retval));
304}
305
6d2010ae 306void
39037602 307vfs_notify_mount(vnode_t pdvp)
6d2010ae
A
308{
309 vfs_event_signal(NULL, VQ_MOUNT, (intptr_t)NULL);
310 lock_vnode_and_post(pdvp, NOTE_WRITE);
311}
312
b0d623f7
A
313/*
314 * __mac_mount:
315 * Mount a file system taking into account MAC label behavior.
316 * See mount(2) man page for more information
317 *
318 * Parameters: p Process requesting the mount
319 * uap User argument descriptor (see below)
39037602 320 * retval (ignored)
b0d623f7
A
321 *
322 * Indirect: uap->type Filesystem type
323 * uap->path Path to mount
39037602
A
324 * uap->data Mount arguments
325 * uap->mac_p MAC info
b0d623f7 326 * uap->flags Mount flags
39037602 327 *
b0d623f7
A
328 *
329 * Returns: 0 Success
330 * !0 Not success
331 */
6d2010ae
A
332boolean_t root_fs_upgrade_try = FALSE;
333
2d21ac55 334int
b0d623f7 335__mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int32_t *retval)
1c79356b 336{
39236c6e
A
337 vnode_t pvp = NULL;
338 vnode_t vp = NULL;
339 int need_nameidone = 0;
6d2010ae
A
340 vfs_context_t ctx = vfs_context_current();
341 char fstypename[MFSNAMELEN];
342 struct nameidata nd;
343 size_t dummy=0;
344 char *labelstr = NULL;
345 int flags = uap->flags;
346 int error;
39037602 347#if CONFIG_IMGSRC_ACCESS || CONFIG_MACF
6d2010ae 348 boolean_t is_64bit = IS_64BIT_PROCESS(p);
39236c6e
A
349#else
350#pragma unused(p)
351#endif
6d2010ae
A
352 /*
353 * Get the fs type name from user space
354 */
355 error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy);
356 if (error)
357 return (error);
358
359 /*
360 * Get the vnode to be covered
361 */
39037602 362 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
6d2010ae
A
363 UIO_USERSPACE, uap->path, ctx);
364 error = namei(&nd);
39236c6e
A
365 if (error) {
366 goto out;
367 }
368 need_nameidone = 1;
6d2010ae
A
369 vp = nd.ni_vp;
370 pvp = nd.ni_dvp;
39037602 371
6d2010ae
A
372#ifdef CONFIG_IMGSRC_ACCESS
373 /* Mounting image source cannot be batched with other operations */
374 if (flags == MNT_IMGSRC_BY_INDEX) {
375 error = relocate_imageboot_source(pvp, vp, &nd.ni_cnd, fstypename,
376 ctx, is_64bit, uap->data, (flags == MNT_IMGSRC_BY_INDEX));
377 goto out;
378 }
379#endif /* CONFIG_IMGSRC_ACCESS */
380
381#if CONFIG_MACF
382 /*
383 * Get the label string (if any) from user space
384 */
385 if (uap->mac_p != USER_ADDR_NULL) {
386 struct user_mac mac;
387 size_t ulen = 0;
388
389 if (is_64bit) {
390 struct user64_mac mac64;
391 error = copyin(uap->mac_p, &mac64, sizeof(mac64));
392 mac.m_buflen = mac64.m_buflen;
393 mac.m_string = mac64.m_string;
394 } else {
395 struct user32_mac mac32;
396 error = copyin(uap->mac_p, &mac32, sizeof(mac32));
397 mac.m_buflen = mac32.m_buflen;
398 mac.m_string = mac32.m_string;
399 }
400 if (error)
401 goto out;
402 if ((mac.m_buflen > MAC_MAX_LABEL_BUF_LEN) ||
403 (mac.m_buflen < 2)) {
404 error = EINVAL;
405 goto out;
406 }
407 MALLOC(labelstr, char *, mac.m_buflen, M_MACTEMP, M_WAITOK);
408 error = copyinstr(mac.m_string, labelstr, mac.m_buflen, &ulen);
409 if (error) {
410 goto out;
411 }
412 AUDIT_ARG(mac_string, labelstr);
413 }
414#endif /* CONFIG_MACF */
415
416 AUDIT_ARG(fflags, flags);
417
4bd07ac2
A
418#if SECURE_KERNEL
419 if (flags & MNT_UNION) {
420 /* No union mounts on release kernels */
421 error = EPERM;
422 goto out;
423 }
424#endif
425
6d2010ae 426 if ((vp->v_flag & VROOT) &&
39236c6e
A
427 (vp->v_mount->mnt_flag & MNT_ROOTFS)) {
428 if (!(flags & MNT_UNION)) {
6d2010ae 429 flags |= MNT_UPDATE;
39236c6e
A
430 }
431 else {
39037602 432 /*
39236c6e 433 * For a union mount on '/', treat it as fresh
39037602
A
434 * mount instead of update.
435 * Otherwise, union mouting on '/' used to panic the
436 * system before, since mnt_vnodecovered was found to
437 * be NULL for '/' which is required for unionlookup
39236c6e
A
438 * after it gets ENOENT on union mount.
439 */
440 flags = (flags & ~(MNT_UPDATE));
441 }
442
4bd07ac2 443#if SECURE_KERNEL
39236c6e
A
444 if ((flags & MNT_RDONLY) == 0) {
445 /* Release kernels are not allowed to mount "/" as rw */
446 error = EPERM;
39037602 447 goto out;
39236c6e 448 }
39236c6e
A
449#endif
450 /*
451 * See 7392553 for more details on why this check exists.
452 * Suffice to say: If this check is ON and something tries
453 * to mount the rootFS RW, we'll turn off the codesign
39037602
A
454 * bitmap optimization.
455 */
6d2010ae 456#if CHECK_CS_VALIDATION_BITMAP
39236c6e 457 if ((flags & MNT_RDONLY) == 0 ) {
6d2010ae
A
458 root_fs_upgrade_try = TRUE;
459 }
460#endif
461 }
462
463 error = mount_common(fstypename, pvp, vp, &nd.ni_cnd, uap->data, flags, 0,
464 labelstr, FALSE, ctx);
39236c6e 465
6d2010ae 466out:
39236c6e 467
6d2010ae
A
468#if CONFIG_MACF
469 if (labelstr)
470 FREE(labelstr, M_MACTEMP);
471#endif /* CONFIG_MACF */
472
39236c6e
A
473 if (vp) {
474 vnode_put(vp);
475 }
476 if (pvp) {
477 vnode_put(pvp);
478 }
479 if (need_nameidone) {
480 nameidone(&nd);
481 }
6d2010ae
A
482
483 return (error);
484}
485
486/*
487 * common mount implementation (final stage of mounting)
39037602 488
6d2010ae
A
489 * Arguments:
490 * fstypename file system type (ie it's vfs name)
491 * pvp parent of covered vnode
492 * vp covered vnode
493 * cnp component name (ie path) of covered vnode
494 * flags generic mount flags
495 * fsmountargs file system specific data
496 * labelstr optional MAC label
497 * kernelmount TRUE for mounts initiated from inside the kernel
498 * ctx caller's context
499 */
500static int
501mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
502 struct componentname *cnp, user_addr_t fsmountargs, int flags, uint32_t internal_flags,
503 char *labelstr, boolean_t kernelmount, vfs_context_t ctx)
504{
39236c6e
A
505#if !CONFIG_MACF
506#pragma unused(labelstr)
507#endif
91447636
A
508 struct vnode *devvp = NULLVP;
509 struct vnode *device_vnode = NULLVP;
2d21ac55
A
510#if CONFIG_MACF
511 struct vnode *rvp;
512#endif
1c79356b 513 struct mount *mp;
6601e61a 514 struct vfstable *vfsp = (struct vfstable *)0;
6d2010ae 515 struct proc *p = vfs_context_proc(ctx);
91447636 516 int error, flag = 0;
91447636 517 user_addr_t devpath = USER_ADDR_NULL;
91447636
A
518 int ronly = 0;
519 int mntalloc = 0;
b0d623f7 520 boolean_t vfsp_ref = FALSE;
743b1565 521 boolean_t is_rwlock_locked = FALSE;
b0d623f7
A
522 boolean_t did_rele = FALSE;
523 boolean_t have_usecount = FALSE;
9bccf70c 524
1c79356b 525 /*
6d2010ae 526 * Process an update for an existing mount
1c79356b 527 */
6d2010ae 528 if (flags & MNT_UPDATE) {
1c79356b 529 if ((vp->v_flag & VROOT) == 0) {
91447636
A
530 error = EINVAL;
531 goto out1;
1c79356b
A
532 }
533 mp = vp->v_mount;
d12e1678 534
91447636 535 /* unmount in progress return error */
b0d623f7 536 mount_lock_spin(mp);
91447636
A
537 if (mp->mnt_lflag & MNT_LUNMOUNT) {
538 mount_unlock(mp);
539 error = EBUSY;
540 goto out1;
d12e1678 541 }
91447636
A
542 mount_unlock(mp);
543 lck_rw_lock_exclusive(&mp->mnt_rwlock);
743b1565 544 is_rwlock_locked = TRUE;
1c79356b
A
545 /*
546 * We only allow the filesystem to be reloaded if it
547 * is currently mounted read-only.
548 */
6d2010ae 549 if ((flags & MNT_RELOAD) &&
1c79356b 550 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
91447636
A
551 error = ENOTSUP;
552 goto out1;
1c79356b 553 }
b7266188 554
316670eb
A
555 /*
556 * If content protection is enabled, update mounts are not
557 * allowed to turn it off.
558 */
39037602 559 if ((mp->mnt_flag & MNT_CPROTECT) &&
316670eb
A
560 ((flags & MNT_CPROTECT) == 0)) {
561 error = EINVAL;
562 goto out1;
563 }
564
39037602 565#ifdef CONFIG_IMGSRC_ACCESS
b7266188
A
566 /* Can't downgrade the backer of the root FS */
567 if ((mp->mnt_kern_flag & MNTK_BACKS_ROOT) &&
6d2010ae 568 (!vfs_isrdonly(mp)) && (flags & MNT_RDONLY)) {
b7266188
A
569 error = ENOTSUP;
570 goto out1;
571 }
572#endif /* CONFIG_IMGSRC_ACCESS */
573
1c79356b
A
574 /*
575 * Only root, or the user that did the original mount is
576 * permitted to update it.
577 */
2d21ac55
A
578 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
579 (error = suser(vfs_context_ucred(ctx), &p->p_acflag))) {
580 goto out1;
581 }
582#if CONFIG_MACF
583 error = mac_mount_check_remount(ctx, mp);
584 if (error != 0) {
91447636 585 goto out1;
1c79356b 586 }
2d21ac55 587#endif
1c79356b 588 /*
91447636
A
589 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
590 * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
1c79356b 591 */
6d2010ae
A
592 if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
593 flags |= MNT_NOSUID | MNT_NODEV;
d12e1678 594 if (mp->mnt_flag & MNT_NOEXEC)
6d2010ae 595 flags |= MNT_NOEXEC;
1c79356b 596 }
d12e1678
A
597 flag = mp->mnt_flag;
598
316670eb
A
599
600
6d2010ae 601 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
d12e1678 602
91447636 603 vfsp = mp->mnt_vtable;
1c79356b
A
604 goto update;
605 }
1c79356b 606 /*
91447636 607 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
1c79356b
A
608 * MNT_NOEXEC if mount point is already MNT_NOEXEC.
609 */
6d2010ae
A
610 if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
611 flags |= MNT_NOSUID | MNT_NODEV;
1c79356b 612 if (vp->v_mount->mnt_flag & MNT_NOEXEC)
6d2010ae 613 flags |= MNT_NOEXEC;
1c79356b 614 }
91447636 615
55e303ae
A
616 /* XXXAUDIT: Should we capture the type on the error path as well? */
617 AUDIT_ARG(text, fstypename);
91447636 618 mount_list_lock();
1c79356b 619 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
b0d623f7
A
620 if (!strncmp(vfsp->vfc_name, fstypename, MFSNAMELEN)) {
621 vfsp->vfc_refcount++;
622 vfsp_ref = TRUE;
1c79356b 623 break;
b0d623f7 624 }
91447636 625 mount_list_unlock();
1c79356b 626 if (vfsp == NULL) {
91447636
A
627 error = ENODEV;
628 goto out1;
1c79356b 629 }
6d2010ae
A
630
631 /*
632 * VFC_VFSLOCALARGS is not currently supported for kernel mounts
633 */
634 if (kernelmount && (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS)) {
635 error = EINVAL; /* unsupported request */
2d21ac55 636 goto out1;
6d2010ae
A
637 }
638
639 error = prepare_coveredvp(vp, ctx, cnp, fstypename, ((internal_flags & KERNEL_MOUNT_NOAUTH) != 0));
640 if (error != 0) {
91447636 641 goto out1;
1c79356b 642 }
1c79356b
A
643
644 /*
6d2010ae 645 * Allocate and initialize the filesystem (mount_t)
1c79356b 646 */
b0d623f7 647 MALLOC_ZONE(mp, struct mount *, (u_int32_t)sizeof(struct mount),
1c79356b 648 M_MOUNT, M_WAITOK);
b0d623f7 649 bzero((char *)mp, (u_int32_t)sizeof(struct mount));
91447636 650 mntalloc = 1;
0b4e3aa0
A
651
652 /* Initialize the default IO constraints */
653 mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
654 mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
91447636
A
655 mp->mnt_maxsegreadsize = mp->mnt_maxreadcnt;
656 mp->mnt_maxsegwritesize = mp->mnt_maxwritecnt;
657 mp->mnt_devblocksize = DEV_BSIZE;
2d21ac55 658 mp->mnt_alignmentmask = PAGE_MASK;
b0d623f7
A
659 mp->mnt_ioqueue_depth = MNT_DEFAULT_IOQUEUE_DEPTH;
660 mp->mnt_ioscale = 1;
2d21ac55
A
661 mp->mnt_ioflags = 0;
662 mp->mnt_realrootvp = NULLVP;
663 mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL;
91447636
A
664
665 TAILQ_INIT(&mp->mnt_vnodelist);
666 TAILQ_INIT(&mp->mnt_workerqueue);
667 TAILQ_INIT(&mp->mnt_newvnodes);
668 mount_lock_init(mp);
669 lck_rw_lock_exclusive(&mp->mnt_rwlock);
743b1565 670 is_rwlock_locked = TRUE;
1c79356b 671 mp->mnt_op = vfsp->vfc_vfsops;
91447636 672 mp->mnt_vtable = vfsp;
91447636 673 //mp->mnt_stat.f_type = vfsp->vfc_typenum;
1c79356b 674 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
fe8ab488
A
675 strlcpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN);
676 strlcpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
1c79356b 677 mp->mnt_vnodecovered = vp;
2d21ac55 678 mp->mnt_vfsstat.f_owner = kauth_cred_getuid(vfs_context_ucred(ctx));
6d2010ae
A
679 mp->mnt_throttle_mask = LOWPRI_MAX_NUM_DEV - 1;
680 mp->mnt_devbsdunit = 0;
1c79356b 681
91447636
A
682 /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
683 vfs_setowner(mp, KAUTH_UID_NONE, KAUTH_GID_NONE);
6d2010ae 684
490019cf 685#if NFSCLIENT || DEVFS || ROUTEFS
6d2010ae
A
686 if (kernelmount)
687 mp->mnt_kern_flag |= MNTK_KERNEL_MOUNT;
688 if ((internal_flags & KERNEL_MOUNT_PERMIT_UNMOUNT) != 0)
689 mp->mnt_kern_flag |= MNTK_PERMIT_UNMOUNT;
fe8ab488 690#endif /* NFSCLIENT || DEVFS */
6d2010ae 691
1c79356b
A
692update:
693 /*
694 * Set the mount level flags.
695 */
6d2010ae 696 if (flags & MNT_RDONLY)
1c79356b 697 mp->mnt_flag |= MNT_RDONLY;
6d2010ae
A
698 else if (mp->mnt_flag & MNT_RDONLY) {
699 // disallow read/write upgrades of file systems that
700 // had the TYPENAME_OVERRIDE feature set.
701 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
702 error = EPERM;
703 goto out1;
704 }
1c79356b 705 mp->mnt_kern_flag |= MNTK_WANTRDWR;
6d2010ae 706 }
0b4e3aa0
A
707 mp->mnt_flag &= ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
708 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
6d2010ae
A
709 MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
710 MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
711 MNT_QUARANTINE | MNT_CPROTECT);
813fb2f6
A
712
713#if SECURE_KERNEL
714#if !CONFIG_MNT_SUID
715 /*
716 * On release builds of iOS based platforms, always enforce NOSUID and NODEV on
717 * all mounts. We do this here because we can catch update mounts as well as
718 * non-update mounts in this case.
719 */
720 mp->mnt_flag |= (MNT_NOSUID);
721#endif
722#endif
723
6d2010ae
A
724 mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
725 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
726 MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
727 MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
728 MNT_QUARANTINE | MNT_CPROTECT);
2d21ac55
A
729
730#if CONFIG_MACF
6d2010ae 731 if (flags & MNT_MULTILABEL) {
2d21ac55
A
732 if (vfsp->vfc_vfsflags & VFC_VFSNOMACLABEL) {
733 error = EINVAL;
734 goto out1;
735 }
736 mp->mnt_flag |= MNT_MULTILABEL;
737 }
738#endif
6d2010ae
A
739 /*
740 * Process device path for local file systems if requested
741 */
39037602
A
742 if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS &&
743 !(internal_flags & KERNEL_MOUNT_SNAPSHOT)) {
6d2010ae 744 if (vfs_context_is64bit(ctx)) {
91447636 745 if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
39037602 746 goto out1;
91447636
A
747 fsmountargs += sizeof(devpath);
748 } else {
b0d623f7 749 user32_addr_t tmp;
91447636 750 if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
39037602 751 goto out1;
91447636
A
752 /* munge into LP64 addr */
753 devpath = CAST_USER_ADDR_T(tmp);
754 fsmountargs += sizeof(tmp);
755 }
756
6d2010ae 757 /* Lookup device and authorize access to it */
91447636 758 if ((devpath)) {
6d2010ae
A
759 struct nameidata nd;
760
761 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW, UIO_USERSPACE, devpath, ctx);
762 if ( (error = namei(&nd)) )
91447636
A
763 goto out1;
764
3e170ce0 765 strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
6d2010ae 766 devvp = nd.ni_vp;
91447636 767
6d2010ae 768 nameidone(&nd);
91447636
A
769
770 if (devvp->v_type != VBLK) {
771 error = ENOTBLK;
772 goto out2;
773 }
774 if (major(devvp->v_rdev) >= nblkdev) {
775 error = ENXIO;
776 goto out2;
777 }
778 /*
779 * If mount by non-root, then verify that user has necessary
780 * permissions on the device.
781 */
2d21ac55 782 if (suser(vfs_context_ucred(ctx), NULL) != 0) {
6d2010ae
A
783 mode_t accessmode = KAUTH_VNODE_READ_DATA;
784
91447636
A
785 if ((mp->mnt_flag & MNT_RDONLY) == 0)
786 accessmode |= KAUTH_VNODE_WRITE_DATA;
2d21ac55 787 if ((error = vnode_authorize(devvp, NULL, accessmode, ctx)) != 0)
91447636
A
788 goto out2;
789 }
790 }
6d2010ae
A
791 /* On first mount, preflight and open device */
792 if (devpath && ((flags & MNT_UPDATE) == 0)) {
91447636
A
793 if ( (error = vnode_ref(devvp)) )
794 goto out2;
795 /*
796 * Disallow multiple mounts of the same device.
797 * Disallow mounting of a device that is currently in use
798 * (except for root, which might share swap device for miniroot).
799 * Flush out any old buffers remaining from a previous use.
800 */
801 if ( (error = vfs_mountedon(devvp)) )
802 goto out3;
39037602 803
91447636
A
804 if (vcount(devvp) > 1 && !(vfs_flags(mp) & MNT_ROOTFS)) {
805 error = EBUSY;
806 goto out3;
807 }
2d21ac55 808 if ( (error = VNOP_FSYNC(devvp, MNT_WAIT, ctx)) ) {
91447636
A
809 error = ENOTBLK;
810 goto out3;
811 }
812 if ( (error = buf_invalidateblks(devvp, BUF_WRITE_DATA, 0, 0)) )
813 goto out3;
814
815 ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
2d21ac55
A
816#if CONFIG_MACF
817 error = mac_vnode_check_open(ctx,
818 devvp,
819 ronly ? FREAD : FREAD|FWRITE);
820 if (error)
821 goto out3;
822#endif /* MAC */
823 if ( (error = VNOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, ctx)) )
91447636
A
824 goto out3;
825
826 mp->mnt_devvp = devvp;
827 device_vnode = devvp;
b0d623f7 828
6d2010ae
A
829 } else if ((mp->mnt_flag & MNT_RDONLY) &&
830 (mp->mnt_kern_flag & MNTK_WANTRDWR) &&
831 (device_vnode = mp->mnt_devvp)) {
832 dev_t dev;
833 int maj;
834 /*
835 * If upgrade to read-write by non-root, then verify
836 * that user has necessary permissions on the device.
837 */
838 vnode_getalways(device_vnode);
b0d623f7 839
6d2010ae 840 if (suser(vfs_context_ucred(ctx), NULL) &&
39037602 841 (error = vnode_authorize(device_vnode, NULL,
6d2010ae
A
842 KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA,
843 ctx)) != 0) {
844 vnode_put(device_vnode);
845 goto out2;
846 }
b0d623f7 847
6d2010ae
A
848 /* Tell the device that we're upgrading */
849 dev = (dev_t)device_vnode->v_rdev;
850 maj = major(dev);
b0d623f7 851
6d2010ae
A
852 if ((u_int)maj >= (u_int)nblkdev)
853 panic("Volume mounted on a device with invalid major number.");
b0d623f7 854
6d2010ae
A
855 error = bdevsw[maj].d_open(dev, FREAD | FWRITE, S_IFBLK, p);
856 vnode_put(device_vnode);
91447636 857 device_vnode = NULLVP;
6d2010ae
A
858 if (error != 0) {
859 goto out2;
860 }
91447636
A
861 }
862 }
2d21ac55 863#if CONFIG_MACF
6d2010ae 864 if ((flags & MNT_UPDATE) == 0) {
2d21ac55
A
865 mac_mount_label_init(mp);
866 mac_mount_label_associate(ctx, mp);
867 }
6d2010ae
A
868 if (labelstr) {
869 if ((flags & MNT_UPDATE) != 0) {
870 error = mac_mount_check_label_update(ctx, mp);
2d21ac55
A
871 if (error != 0)
872 goto out3;
873 }
2d21ac55
A
874 }
875#endif
1c79356b
A
876 /*
877 * Mount the filesystem.
878 */
39037602
A
879 if (internal_flags & KERNEL_MOUNT_SNAPSHOT) {
880 error = VFS_IOCTL(mp, VFSIOC_MOUNT_SNAPSHOT,
881 (caddr_t)fsmountargs, 0, ctx);
882 } else {
883 error = VFS_MOUNT(mp, device_vnode, fsmountargs, ctx);
884 }
d12e1678 885
6d2010ae 886 if (flags & MNT_UPDATE) {
1c79356b
A
887 if (mp->mnt_kern_flag & MNTK_WANTRDWR)
888 mp->mnt_flag &= ~MNT_RDONLY;
889 mp->mnt_flag &=~
890 (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
891 mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
892 if (error)
6d2010ae 893 mp->mnt_flag = flag; /* restore flag value */
91447636
A
894 vfs_event_signal(NULL, VQ_UPDATE, (intptr_t)NULL);
895 lck_rw_done(&mp->mnt_rwlock);
743b1565 896 is_rwlock_locked = FALSE;
9bccf70c 897 if (!error)
2d21ac55 898 enablequotas(mp, ctx);
6d2010ae 899 goto exit;
1c79356b 900 }
6d2010ae 901
1c79356b
A
902 /*
903 * Put the new filesystem on the mount list after root.
904 */
6601e61a 905 if (error == 0) {
2d21ac55
A
906 struct vfs_attr vfsattr;
907#if CONFIG_MACF
908 if (vfs_flags(mp) & MNT_MULTILABEL) {
909 error = VFS_ROOT(mp, &rvp, ctx);
910 if (error) {
911 printf("%s() VFS_ROOT returned %d\n", __func__, error);
912 goto out3;
913 }
2d21ac55 914 error = vnode_label(mp, NULL, rvp, NULL, 0, ctx);
b0d623f7
A
915 /*
916 * drop reference provided by VFS_ROOT
917 */
918 vnode_put(rvp);
919
2d21ac55
A
920 if (error)
921 goto out3;
922 }
923#endif /* MAC */
924
925 vnode_lock_spin(vp);
926 CLR(vp->v_flag, VMOUNT);
91447636
A
927 vp->v_mountedhere = mp;
928 vnode_unlock(vp);
929
2d21ac55
A
930 /*
931 * taking the name_cache_lock exclusively will
932 * insure that everyone is out of the fast path who
933 * might be trying to use a now stale copy of
934 * vp->v_mountedhere->mnt_realrootvp
935 * bumping mount_generation causes the cached values
936 * to be invalidated
937 */
938 name_cache_lock();
939 mount_generation++;
940 name_cache_unlock();
941
b0d623f7
A
942 error = vnode_ref(vp);
943 if (error != 0) {
944 goto out4;
945 }
946
947 have_usecount = TRUE;
91447636 948
2d21ac55 949 error = checkdirs(vp, ctx);
6601e61a
A
950 if (error != 0) {
951 /* Unmount the filesystem as cdir/rdirs cannot be updated */
952 goto out4;
953 }
39037602
A
954 /*
955 * there is no cleanup code here so I have made it void
91447636
A
956 * we need to revisit this
957 */
2d21ac55 958 (void)VFS_START(mp, 0, ctx);
1c79356b 959
6d2010ae
A
960 if (mount_list_add(mp) != 0) {
961 /*
962 * The system is shutting down trying to umount
963 * everything, so fail with a plausible errno.
964 */
965 error = EBUSY;
b0d623f7
A
966 goto out4;
967 }
6601e61a
A
968 lck_rw_done(&mp->mnt_rwlock);
969 is_rwlock_locked = FALSE;
970
2d21ac55
A
971 /* Check if this mounted file system supports EAs or named streams. */
972 /* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */
973 VFSATTR_INIT(&vfsattr);
974 VFSATTR_WANTED(&vfsattr, f_capabilities);
975 if (strncmp(mp->mnt_vfsstat.f_fstypename, "webdav", sizeof("webdav")) != 0 &&
39037602 976 vfs_getattr(mp, &vfsattr, ctx) == 0 &&
2d21ac55
A
977 VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) {
978 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR) &&
979 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR)) {
980 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
981 }
982#if NAMEDSTREAMS
983 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS) &&
984 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS)) {
985 mp->mnt_kern_flag |= MNTK_NAMED_STREAMS;
986 }
987#endif
988 /* Check if this file system supports path from id lookups. */
989 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID) &&
990 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID)) {
991 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
992 } else if (mp->mnt_flag & MNT_DOVOLFS) {
993 /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */
994 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
995 }
39037602
A
996
997 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_DIR_HARDLINKS) &&
998 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_DIR_HARDLINKS)) {
999 mp->mnt_kern_flag |= MNTK_DIR_HARDLINKS;
1000 }
2d21ac55
A
1001 }
1002 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSNATIVEXATTR) {
1003 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
1004 }
1005 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSPREFLIGHT) {
1006 mp->mnt_kern_flag |= MNTK_UNMOUNT_PREFLIGHT;
1007 }
1c79356b 1008 /* increment the operations count */
b0d623f7 1009 OSAddAtomic(1, &vfs_nummntops);
2d21ac55 1010 enablequotas(mp, ctx);
91447636
A
1011
1012 if (device_vnode) {
1013 device_vnode->v_specflags |= SI_MOUNTEDON;
1014
1015 /*
1016 * cache the IO attributes for the underlying physical media...
1017 * an error return indicates the underlying driver doesn't
1018 * support all the queries necessary... however, reasonable
1019 * defaults will have been set, so no reason to bail or care
1020 */
1021 vfs_init_io_attributes(device_vnode, mp);
39037602 1022 }
6601e61a
A
1023
1024 /* Now that mount is setup, notify the listeners */
6d2010ae 1025 vfs_notify_mount(pvp);
3e170ce0
A
1026 IOBSDMountChange(mp, kIOMountChangeMount);
1027
1c79356b 1028 } else {
6d2010ae
A
1029 /* If we fail a fresh mount, there should be no vnodes left hooked into the mountpoint. */
1030 if (mp->mnt_vnodelist.tqh_first != NULL) {
39037602 1031 panic("mount_common(): mount of %s filesystem failed with %d, but vnode list is not empty.",
6d2010ae
A
1032 mp->mnt_vtable->vfc_name, error);
1033 }
1034
2d21ac55 1035 vnode_lock_spin(vp);
1c79356b 1036 CLR(vp->v_flag, VMOUNT);
6601e61a 1037 vnode_unlock(vp);
91447636
A
1038 mount_list_lock();
1039 mp->mnt_vtable->vfc_refcount--;
1040 mount_list_unlock();
55e303ae 1041
91447636 1042 if (device_vnode ) {
91447636 1043 vnode_rele(device_vnode);
b0d623f7 1044 VNOP_CLOSE(device_vnode, ronly ? FREAD : FREAD|FWRITE, ctx);
91447636
A
1045 }
1046 lck_rw_done(&mp->mnt_rwlock);
743b1565 1047 is_rwlock_locked = FALSE;
39037602 1048
6d2010ae
A
1049 /*
1050 * if we get here, we have a mount structure that needs to be freed,
1051 * but since the coveredvp hasn't yet been updated to point at it,
1052 * no need to worry about other threads holding a crossref on this mp
1053 * so it's ok to just free it
1054 */
91447636 1055 mount_lock_destroy(mp);
2d21ac55
A
1056#if CONFIG_MACF
1057 mac_mount_label_destroy(mp);
1058#endif
55e303ae 1059 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1c79356b 1060 }
6d2010ae 1061exit:
91447636 1062 /*
6d2010ae 1063 * drop I/O count on the device vp if there was one
91447636
A
1064 */
1065 if (devpath && devvp)
1066 vnode_put(devvp);
b0d623f7 1067
91447636 1068 return(error);
b0d623f7 1069
6d2010ae 1070/* Error condition exits */
6601e61a 1071out4:
2d21ac55 1072 (void)VFS_UNMOUNT(mp, MNT_FORCE, ctx);
39037602
A
1073
1074 /*
6d2010ae
A
1075 * If the mount has been placed on the covered vp,
1076 * it may have been discovered by now, so we have
1077 * to treat this just like an unmount
1078 */
1079 mount_lock_spin(mp);
1080 mp->mnt_lflag |= MNT_LDEAD;
1081 mount_unlock(mp);
1082
6601e61a 1083 if (device_vnode != NULLVP) {
b0d623f7 1084 vnode_rele(device_vnode);
2d21ac55
A
1085 VNOP_CLOSE(device_vnode, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
1086 ctx);
b0d623f7 1087 did_rele = TRUE;
6601e61a 1088 }
6d2010ae 1089
2d21ac55 1090 vnode_lock_spin(vp);
6d2010ae
A
1091
1092 mp->mnt_crossref++;
6601e61a 1093 vp->v_mountedhere = (mount_t) 0;
6d2010ae 1094
6601e61a 1095 vnode_unlock(vp);
6d2010ae 1096
b0d623f7
A
1097 if (have_usecount) {
1098 vnode_rele(vp);
1099 }
91447636 1100out3:
6d2010ae 1101 if (devpath && ((flags & MNT_UPDATE) == 0) && (!did_rele))
2d21ac55 1102 vnode_rele(devvp);
91447636
A
1103out2:
1104 if (devpath && devvp)
1105 vnode_put(devvp);
1106out1:
743b1565
A
1107 /* Release mnt_rwlock only when it was taken */
1108 if (is_rwlock_locked == TRUE) {
1109 lck_rw_done(&mp->mnt_rwlock);
1110 }
39037602 1111
6601e61a 1112 if (mntalloc) {
6d2010ae
A
1113 if (mp->mnt_crossref)
1114 mount_dropcrossref(mp, vp, 0);
1115 else {
1116 mount_lock_destroy(mp);
2d21ac55 1117#if CONFIG_MACF
6d2010ae 1118 mac_mount_label_destroy(mp);
2d21ac55 1119#endif
6d2010ae
A
1120 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1121 }
b0d623f7 1122 }
b0d623f7 1123 if (vfsp_ref) {
6601e61a
A
1124 mount_list_lock();
1125 vfsp->vfc_refcount--;
1126 mount_list_unlock();
6601e61a 1127 }
91447636
A
1128
1129 return(error);
1c79356b
A
1130}
1131
39037602 1132/*
b7266188
A
1133 * Flush in-core data, check for competing mount attempts,
1134 * and set VMOUNT
1135 */
6d2010ae
A
1136int
1137prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth)
b7266188 1138{
39236c6e
A
1139#if !CONFIG_MACF
1140#pragma unused(cnp,fsname)
1141#endif
b7266188
A
1142 struct vnode_attr va;
1143 int error;
1144
6d2010ae
A
1145 if (!skip_auth) {
1146 /*
1147 * If the user is not root, ensure that they own the directory
1148 * onto which we are attempting to mount.
1149 */
1150 VATTR_INIT(&va);
1151 VATTR_WANTED(&va, va_uid);
1152 if ((error = vnode_getattr(vp, &va, ctx)) ||
1153 (va.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
39037602 1154 (!vfs_context_issuser(ctx)))) {
6d2010ae
A
1155 error = EPERM;
1156 goto out;
1157 }
b7266188
A
1158 }
1159
1160 if ( (error = VNOP_FSYNC(vp, MNT_WAIT, ctx)) )
1161 goto out;
1162
1163 if ( (error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0)) )
1164 goto out;
1165
1166 if (vp->v_type != VDIR) {
1167 error = ENOTDIR;
1168 goto out;
1169 }
1170
1171 if (ISSET(vp->v_flag, VMOUNT) && (vp->v_mountedhere != NULL)) {
1172 error = EBUSY;
1173 goto out;
1174 }
1175
1176#if CONFIG_MACF
1177 error = mac_mount_check_mount(ctx, vp,
1178 cnp, fsname);
1179 if (error != 0)
1180 goto out;
1181#endif
1182
1183 vnode_lock_spin(vp);
1184 SET(vp->v_flag, VMOUNT);
1185 vnode_unlock(vp);
1186
1187out:
1188 return error;
1189}
1190
6d2010ae
A
1191#if CONFIG_IMGSRC_ACCESS
1192
1193#if DEBUG
1194#define IMGSRC_DEBUG(args...) printf(args)
1195#else
1196#define IMGSRC_DEBUG(args...) do { } while(0)
39037602 1197#endif
6d2010ae 1198
b7266188
A
1199static int
1200authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx)
1201{
1202 struct nameidata nd;
6d2010ae 1203 vnode_t vp, realdevvp;
b7266188
A
1204 mode_t accessmode;
1205 int error;
1206
6d2010ae
A
1207 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW, UIO_USERSPACE, devpath, ctx);
1208 if ( (error = namei(&nd)) ) {
1209 IMGSRC_DEBUG("namei() failed with %d\n", error);
b7266188 1210 return error;
6d2010ae 1211 }
b7266188 1212
b7266188 1213 vp = nd.ni_vp;
b7266188 1214
6d2010ae
A
1215 if (!vnode_isblk(vp)) {
1216 IMGSRC_DEBUG("Not block device.\n");
b7266188
A
1217 error = ENOTBLK;
1218 goto out;
1219 }
6d2010ae
A
1220
1221 realdevvp = mp->mnt_devvp;
1222 if (realdevvp == NULLVP) {
1223 IMGSRC_DEBUG("No device backs the mount.\n");
b7266188
A
1224 error = ENXIO;
1225 goto out;
1226 }
6d2010ae
A
1227
1228 error = vnode_getwithref(realdevvp);
1229 if (error != 0) {
1230 IMGSRC_DEBUG("Coudn't get iocount on device.\n");
1231 goto out;
1232 }
1233
1234 if (vnode_specrdev(vp) != vnode_specrdev(realdevvp)) {
1235 IMGSRC_DEBUG("Wrong dev_t.\n");
1236 error = ENXIO;
1237 goto out1;
1238 }
1239
1240 strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
1241
b7266188
A
1242 /*
1243 * If mount by non-root, then verify that user has necessary
1244 * permissions on the device.
1245 */
1246 if (!vfs_context_issuser(ctx)) {
1247 accessmode = KAUTH_VNODE_READ_DATA;
1248 if ((mp->mnt_flag & MNT_RDONLY) == 0)
1249 accessmode |= KAUTH_VNODE_WRITE_DATA;
6d2010ae
A
1250 if ((error = vnode_authorize(vp, NULL, accessmode, ctx)) != 0) {
1251 IMGSRC_DEBUG("Access denied.\n");
1252 goto out1;
1253 }
b7266188
A
1254 }
1255
1256 *devvpp = vp;
6d2010ae
A
1257
1258out1:
1259 vnode_put(realdevvp);
b7266188 1260out:
6d2010ae 1261 nameidone(&nd);
b7266188
A
1262 if (error) {
1263 vnode_put(vp);
1264 }
1265
1266 return error;
1267}
1268
1269/*
1270 * Clear VMOUNT, set v_mountedhere, and mnt_vnodecovered, ref the vnode,
1271 * and call checkdirs()
1272 */
1273static int
1274place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx)
1275{
1276 int error;
1277
1278 mp->mnt_vnodecovered = vp; /* XXX This is normally only set at init-time ... */
1279
1280 vnode_lock_spin(vp);
1281 CLR(vp->v_flag, VMOUNT);
1282 vp->v_mountedhere = mp;
1283 vnode_unlock(vp);
1284
1285 /*
1286 * taking the name_cache_lock exclusively will
1287 * insure that everyone is out of the fast path who
1288 * might be trying to use a now stale copy of
1289 * vp->v_mountedhere->mnt_realrootvp
1290 * bumping mount_generation causes the cached values
1291 * to be invalidated
1292 */
1293 name_cache_lock();
1294 mount_generation++;
1295 name_cache_unlock();
1296
1297 error = vnode_ref(vp);
1298 if (error != 0) {
1299 goto out;
1300 }
1301
1302 error = checkdirs(vp, ctx);
1303 if (error != 0) {
1304 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1305 vnode_rele(vp);
1306 goto out;
1307 }
1308
1309out:
1310 if (error != 0) {
1311 mp->mnt_vnodecovered = NULLVP;
1312 }
1313 return error;
1314}
1315
1316static void
1317undo_place_on_covered_vp(mount_t mp, vnode_t vp)
1318{
1319 vnode_rele(vp);
1320 vnode_lock_spin(vp);
1321 vp->v_mountedhere = (mount_t)NULL;
1322 vnode_unlock(vp);
1323
1324 mp->mnt_vnodecovered = NULLVP;
1325}
1326
1327static int
1328mount_begin_update(mount_t mp, vfs_context_t ctx, int flags)
1329{
1330 int error;
1331
1332 /* unmount in progress return error */
1333 mount_lock_spin(mp);
1334 if (mp->mnt_lflag & MNT_LUNMOUNT) {
1335 mount_unlock(mp);
1336 return EBUSY;
1337 }
1338 mount_unlock(mp);
1339 lck_rw_lock_exclusive(&mp->mnt_rwlock);
1340
1341 /*
1342 * We only allow the filesystem to be reloaded if it
1343 * is currently mounted read-only.
1344 */
1345 if ((flags & MNT_RELOAD) &&
1346 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
1347 error = ENOTSUP;
1348 goto out;
1349 }
1350
1351 /*
1352 * Only root, or the user that did the original mount is
1353 * permitted to update it.
1354 */
1355 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
39037602 1356 (!vfs_context_issuser(ctx))) {
b7266188
A
1357 error = EPERM;
1358 goto out;
1359 }
1360#if CONFIG_MACF
1361 error = mac_mount_check_remount(ctx, mp);
1362 if (error != 0) {
1363 goto out;
1364 }
1365#endif
1366
1367out:
1368 if (error) {
1369 lck_rw_done(&mp->mnt_rwlock);
1370 }
1371
1372 return error;
1373}
1374
39037602 1375static void
b7266188
A
1376mount_end_update(mount_t mp)
1377{
1378 lck_rw_done(&mp->mnt_rwlock);
1379}
1380
1381static int
6d2010ae
A
1382get_imgsrc_rootvnode(uint32_t height, vnode_t *rvpp)
1383{
1384 vnode_t vp;
1385
1386 if (height >= MAX_IMAGEBOOT_NESTING) {
1387 return EINVAL;
1388 }
1389
1390 vp = imgsrc_rootvnodes[height];
1391 if ((vp != NULLVP) && (vnode_get(vp) == 0)) {
1392 *rvpp = vp;
1393 return 0;
1394 } else {
1395 return ENOENT;
1396 }
1397}
1398
1399static int
39037602
A
1400relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp,
1401 const char *fsname, vfs_context_t ctx,
6d2010ae 1402 boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index)
b7266188
A
1403{
1404 int error;
1405 mount_t mp;
1406 boolean_t placed = FALSE;
6d2010ae 1407 vnode_t devvp = NULLVP;
b7266188
A
1408 struct vfstable *vfsp;
1409 user_addr_t devpath;
1410 char *old_mntonname;
6d2010ae
A
1411 vnode_t rvp;
1412 uint32_t height;
1413 uint32_t flags;
b7266188
A
1414
1415 /* If we didn't imageboot, nothing to move */
6d2010ae 1416 if (imgsrc_rootvnodes[0] == NULLVP) {
b7266188
A
1417 return EINVAL;
1418 }
1419
1420 /* Only root can do this */
1421 if (!vfs_context_issuser(ctx)) {
1422 return EPERM;
1423 }
1424
6d2010ae
A
1425 IMGSRC_DEBUG("looking for root vnode.\n");
1426
1427 /*
1428 * Get root vnode of filesystem we're moving.
1429 */
1430 if (by_index) {
1431 if (is64bit) {
1432 struct user64_mnt_imgsrc_args mia64;
1433 error = copyin(fsmountargs, &mia64, sizeof(mia64));
1434 if (error != 0) {
1435 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1436 return error;
1437 }
1438
1439 height = mia64.mi_height;
1440 flags = mia64.mi_flags;
1441 devpath = mia64.mi_devpath;
1442 } else {
1443 struct user32_mnt_imgsrc_args mia32;
1444 error = copyin(fsmountargs, &mia32, sizeof(mia32));
1445 if (error != 0) {
1446 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1447 return error;
1448 }
1449
1450 height = mia32.mi_height;
1451 flags = mia32.mi_flags;
1452 devpath = mia32.mi_devpath;
1453 }
1454 } else {
1455 /*
1456 * For binary compatibility--assumes one level of nesting.
1457 */
1458 if (is64bit) {
1459 if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
1460 return error;
1461 } else {
1462 user32_addr_t tmp;
1463 if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
1464 return error;
1465
1466 /* munge into LP64 addr */
1467 devpath = CAST_USER_ADDR_T(tmp);
1468 }
1469
1470 height = 0;
1471 flags = 0;
1472 }
1473
1474 if (flags != 0) {
1475 IMGSRC_DEBUG("%s: Got nonzero flags.\n", __FUNCTION__);
1476 return EINVAL;
1477 }
1478
1479 error = get_imgsrc_rootvnode(height, &rvp);
b7266188 1480 if (error != 0) {
6d2010ae 1481 IMGSRC_DEBUG("getting root vnode failed with %d\n", error);
b7266188
A
1482 return error;
1483 }
1484
6d2010ae
A
1485 IMGSRC_DEBUG("got root vnode.\n");
1486
b7266188
A
1487 MALLOC(old_mntonname, char*, MAXPATHLEN, M_TEMP, M_WAITOK);
1488
1489 /* Can only move once */
6d2010ae 1490 mp = vnode_mount(rvp);
b7266188 1491 if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
6d2010ae 1492 IMGSRC_DEBUG("Already moved.\n");
b7266188
A
1493 error = EBUSY;
1494 goto out0;
1495 }
1496
6d2010ae
A
1497 IMGSRC_DEBUG("Starting updated.\n");
1498
b7266188
A
1499 /* Get exclusive rwlock on mount, authorize update on mp */
1500 error = mount_begin_update(mp , ctx, 0);
1501 if (error != 0) {
6d2010ae 1502 IMGSRC_DEBUG("Starting updated failed with %d\n", error);
b7266188
A
1503 goto out0;
1504 }
1505
39037602 1506 /*
b7266188
A
1507 * It can only be moved once. Flag is set under the rwlock,
1508 * so we're now safe to proceed.
1509 */
1510 if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
6d2010ae 1511 IMGSRC_DEBUG("Already moved [2]\n");
b7266188
A
1512 goto out1;
1513 }
39037602
A
1514
1515
6d2010ae 1516 IMGSRC_DEBUG("Preparing coveredvp.\n");
b7266188
A
1517
1518 /* Mark covered vnode as mount in progress, authorize placing mount on top */
6d2010ae 1519 error = prepare_coveredvp(vp, ctx, cnp, fsname, FALSE);
b7266188 1520 if (error != 0) {
6d2010ae 1521 IMGSRC_DEBUG("Preparing coveredvp failed with %d.\n", error);
b7266188
A
1522 goto out1;
1523 }
39037602 1524
6d2010ae
A
1525 IMGSRC_DEBUG("Covered vp OK.\n");
1526
b7266188
A
1527 /* Sanity check the name caller has provided */
1528 vfsp = mp->mnt_vtable;
1529 if (strncmp(vfsp->vfc_name, fsname, MFSNAMELEN) != 0) {
6d2010ae 1530 IMGSRC_DEBUG("Wrong fs name.\n");
b7266188
A
1531 error = EINVAL;
1532 goto out2;
1533 }
1534
1535 /* Check the device vnode and update mount-from name, for local filesystems */
1536 if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
6d2010ae 1537 IMGSRC_DEBUG("Local, doing device validation.\n");
b7266188
A
1538
1539 if (devpath != USER_ADDR_NULL) {
1540 error = authorize_devpath_and_update_mntfromname(mp, devpath, &devvp, ctx);
1541 if (error) {
6d2010ae 1542 IMGSRC_DEBUG("authorize_devpath_and_update_mntfromname() failed.\n");
b7266188
A
1543 goto out2;
1544 }
1545
1546 vnode_put(devvp);
1547 }
1548 }
1549
39037602 1550 /*
b7266188 1551 * Place mp on top of vnode, ref the vnode, call checkdirs(),
39037602 1552 * and increment the name cache's mount generation
b7266188 1553 */
6d2010ae
A
1554
1555 IMGSRC_DEBUG("About to call place_mount_and_checkdirs().\n");
b7266188
A
1556 error = place_mount_and_checkdirs(mp, vp, ctx);
1557 if (error != 0) {
1558 goto out2;
1559 }
1560
1561 placed = TRUE;
1562
3e170ce0
A
1563 strlcpy(old_mntonname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN);
1564 strlcpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
b7266188
A
1565
1566 /* Forbid future moves */
1567 mount_lock(mp);
1568 mp->mnt_kern_flag |= MNTK_HAS_MOVED;
1569 mount_unlock(mp);
1570
1571 /* Finally, add to mount list, completely ready to go */
6d2010ae
A
1572 if (mount_list_add(mp) != 0) {
1573 /*
1574 * The system is shutting down trying to umount
1575 * everything, so fail with a plausible errno.
1576 */
1577 error = EBUSY;
b7266188
A
1578 goto out3;
1579 }
1580
1581 mount_end_update(mp);
6d2010ae 1582 vnode_put(rvp);
b7266188
A
1583 FREE(old_mntonname, M_TEMP);
1584
6d2010ae
A
1585 vfs_notify_mount(pvp);
1586
b7266188
A
1587 return 0;
1588out3:
3e170ce0 1589 strlcpy(mp->mnt_vfsstat.f_mntonname, old_mntonname, MAXPATHLEN);
b7266188
A
1590
1591 mount_lock(mp);
1592 mp->mnt_kern_flag &= ~(MNTK_HAS_MOVED);
1593 mount_unlock(mp);
1594
1595out2:
39037602 1596 /*
b7266188 1597 * Placing the mp on the vnode clears VMOUNT,
39037602 1598 * so cleanup is different after that point
b7266188
A
1599 */
1600 if (placed) {
1601 /* Rele the vp, clear VMOUNT and v_mountedhere */
1602 undo_place_on_covered_vp(mp, vp);
1603 } else {
1604 vnode_lock_spin(vp);
1605 CLR(vp->v_flag, VMOUNT);
1606 vnode_unlock(vp);
1607 }
1608out1:
1609 mount_end_update(mp);
1610
1611out0:
6d2010ae 1612 vnode_put(rvp);
b7266188
A
1613 FREE(old_mntonname, M_TEMP);
1614 return error;
1615}
1616
1617#endif /* CONFIG_IMGSRC_ACCESS */
1618
91447636 1619void
2d21ac55 1620enablequotas(struct mount *mp, vfs_context_t ctx)
9bccf70c 1621{
9bccf70c
A
1622 struct nameidata qnd;
1623 int type;
1624 char qfpath[MAXPATHLEN];
91447636
A
1625 const char *qfname = QUOTAFILENAME;
1626 const char *qfopsname = QUOTAOPSNAME;
1627 const char *qfextension[] = INITQFNAMES;
9bccf70c 1628
2d21ac55 1629 /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
b0d623f7
A
1630 if (strncmp(mp->mnt_vfsstat.f_fstypename, "hfs", sizeof("hfs")) != 0 ) {
1631 return;
1632 }
39037602 1633 /*
9bccf70c
A
1634 * Enable filesystem disk quotas if necessary.
1635 * We ignore errors as this should not interfere with final mount
1636 */
1637 for (type=0; type < MAXQUOTAS; type++) {
2d21ac55 1638 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfopsname, qfextension[type]);
6d2010ae
A
1639 NDINIT(&qnd, LOOKUP, OP_MOUNT, FOLLOW, UIO_SYSSPACE,
1640 CAST_USER_ADDR_T(qfpath), ctx);
91447636
A
1641 if (namei(&qnd) != 0)
1642 continue; /* option file to trigger quotas is not present */
1643 vnode_put(qnd.ni_vp);
1644 nameidone(&qnd);
2d21ac55 1645 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfname, qfextension[type]);
91447636 1646
2d21ac55 1647 (void) VFS_QUOTACTL(mp, QCMD(Q_QUOTAON, type), 0, qfpath, ctx);
9bccf70c
A
1648 }
1649 return;
1650}
1651
2d21ac55
A
1652
1653static int
39037602 1654checkdirs_callback(proc_t p, void * arg)
2d21ac55
A
1655{
1656 struct cdirargs * cdrp = (struct cdirargs * )arg;
1657 vnode_t olddp = cdrp->olddp;
1658 vnode_t newdp = cdrp->newdp;
1659 struct filedesc *fdp;
1660 vnode_t tvp;
1661 vnode_t fdp_cvp;
1662 vnode_t fdp_rvp;
1663 int cdir_changed = 0;
1664 int rdir_changed = 0;
1665
1666 /*
1667 * XXX Also needs to iterate each thread in the process to see if it
1668 * XXX is using a per-thread current working directory, and, if so,
1669 * XXX update that as well.
1670 */
1671
1672 proc_fdlock(p);
1673 fdp = p->p_fd;
1674 if (fdp == (struct filedesc *)0) {
1675 proc_fdunlock(p);
1676 return(PROC_RETURNED);
1677 }
1678 fdp_cvp = fdp->fd_cdir;
1679 fdp_rvp = fdp->fd_rdir;
1680 proc_fdunlock(p);
1681
1682 if (fdp_cvp == olddp) {
1683 vnode_ref(newdp);
1684 tvp = fdp->fd_cdir;
1685 fdp_cvp = newdp;
1686 cdir_changed = 1;
1687 vnode_rele(tvp);
1688 }
1689 if (fdp_rvp == olddp) {
1690 vnode_ref(newdp);
1691 tvp = fdp->fd_rdir;
1692 fdp_rvp = newdp;
1693 rdir_changed = 1;
1694 vnode_rele(tvp);
1695 }
1696 if (cdir_changed || rdir_changed) {
1697 proc_fdlock(p);
1698 fdp->fd_cdir = fdp_cvp;
1699 fdp->fd_rdir = fdp_rvp;
1700 proc_fdunlock(p);
1701 }
1702 return(PROC_RETURNED);
1703}
1704
1705
1706
1c79356b
A
1707/*
1708 * Scan all active processes to see if any of them have a current
1709 * or root directory onto which the new filesystem has just been
1710 * mounted. If so, replace them with the new mount point.
1711 */
6601e61a 1712static int
2d21ac55 1713checkdirs(vnode_t olddp, vfs_context_t ctx)
1c79356b 1714{
2d21ac55
A
1715 vnode_t newdp;
1716 vnode_t tvp;
6601e61a 1717 int err;
2d21ac55 1718 struct cdirargs cdr;
1c79356b
A
1719
1720 if (olddp->v_usecount == 1)
6601e61a 1721 return(0);
2d21ac55 1722 err = VFS_ROOT(olddp->v_mountedhere, &newdp, ctx);
2d21ac55
A
1723
1724 if (err != 0) {
6601e61a 1725#if DIAGNOSTIC
2d21ac55 1726 panic("mount: lost mount: error %d", err);
6601e61a
A
1727#endif
1728 return(err);
1729 }
91447636 1730
2d21ac55
A
1731 cdr.olddp = olddp;
1732 cdr.newdp = newdp;
1733 /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
1734 proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS, checkdirs_callback, (void *)&cdr, NULL, NULL);
91447636 1735
1c79356b 1736 if (rootvnode == olddp) {
91447636 1737 vnode_ref(newdp);
fa4905b1 1738 tvp = rootvnode;
1c79356b 1739 rootvnode = newdp;
91447636 1740 vnode_rele(tvp);
1c79356b 1741 }
91447636
A
1742
1743 vnode_put(newdp);
6601e61a 1744 return(0);
1c79356b
A
1745}
1746
1747/*
1748 * Unmount a file system.
1749 *
1750 * Note: unmount takes a path to the vnode mounted on as argument,
1751 * not special file (as before).
1752 */
1c79356b
A
1753/* ARGSUSED */
1754int
b0d623f7 1755unmount(__unused proc_t p, struct unmount_args *uap, __unused int32_t *retval)
1c79356b 1756{
2d21ac55 1757 vnode_t vp;
1c79356b
A
1758 struct mount *mp;
1759 int error;
1760 struct nameidata nd;
2d21ac55 1761 vfs_context_t ctx = vfs_context_current();
91447636 1762
39037602 1763 NDINIT(&nd, LOOKUP, OP_UNMOUNT, FOLLOW | AUDITVNPATH1,
2d21ac55 1764 UIO_USERSPACE, uap->path, ctx);
55e303ae
A
1765 error = namei(&nd);
1766 if (error)
1c79356b
A
1767 return (error);
1768 vp = nd.ni_vp;
1769 mp = vp->v_mount;
91447636 1770 nameidone(&nd);
1c79356b 1771
2d21ac55
A
1772#if CONFIG_MACF
1773 error = mac_mount_check_umount(ctx, mp);
1774 if (error != 0) {
1775 vnode_put(vp);
1776 return (error);
1777 }
1778#endif
55e303ae
A
1779 /*
1780 * Must be the root of the filesystem
1781 */
1782 if ((vp->v_flag & VROOT) == 0) {
91447636 1783 vnode_put(vp);
55e303ae
A
1784 return (EINVAL);
1785 }
6601e61a 1786 mount_ref(mp, 0);
91447636 1787 vnode_put(vp);
6601e61a 1788 /* safedounmount consumes the mount ref */
2d21ac55
A
1789 return (safedounmount(mp, uap->flags, ctx));
1790}
1791
1792int
39037602 1793vfs_unmountbyfsid(fsid_t *fsid, int flags, vfs_context_t ctx)
2d21ac55
A
1794{
1795 mount_t mp;
1796
1797 mp = mount_list_lookupby_fsid(fsid, 0, 1);
1798 if (mp == (mount_t)0) {
1799 return(ENOENT);
1800 }
1801 mount_ref(mp, 0);
1802 mount_iterdrop(mp);
1803 /* safedounmount consumes the mount ref */
1804 return(safedounmount(mp, flags, ctx));
55e303ae
A
1805}
1806
2d21ac55 1807
55e303ae 1808/*
6601e61a 1809 * The mount struct comes with a mount ref which will be consumed.
55e303ae
A
1810 * Do the actual file system unmount, prevent some common foot shooting.
1811 */
1812int
2d21ac55 1813safedounmount(struct mount *mp, int flags, vfs_context_t ctx)
55e303ae
A
1814{
1815 int error;
2d21ac55 1816 proc_t p = vfs_context_proc(ctx);
55e303ae 1817
316670eb
A
1818 /*
1819 * If the file system is not responding and MNT_NOBLOCK
1820 * is set and not a forced unmount then return EBUSY.
1821 */
1822 if ((mp->mnt_kern_flag & MNT_LNOTRESP) &&
1823 (flags & MNT_NOBLOCK) && ((flags & MNT_FORCE) == 0)) {
1824 error = EBUSY;
1825 goto out;
1826 }
1827
1c79356b 1828 /*
39037602 1829 * Skip authorization if the mount is tagged as permissive and
6d2010ae 1830 * this is not a forced-unmount attempt.
1c79356b 1831 */
6d2010ae
A
1832 if (!(((mp->mnt_kern_flag & MNTK_PERMIT_UNMOUNT) != 0) && ((flags & MNT_FORCE) == 0))) {
1833 /*
1834 * Only root, or the user that did the original mount is
1835 * permitted to unmount this filesystem.
1836 */
1837 if ((mp->mnt_vfsstat.f_owner != kauth_cred_getuid(kauth_cred_get())) &&
1838 (error = suser(kauth_cred_get(), &p->p_acflag)))
1839 goto out;
1840 }
1c79356b
A
1841 /*
1842 * Don't allow unmounting the root file system.
1843 */
6601e61a 1844 if (mp->mnt_flag & MNT_ROOTFS) {
2d21ac55 1845 error = EBUSY; /* the root is always busy */
6601e61a
A
1846 goto out;
1847 }
1c79356b 1848
b7266188
A
1849#ifdef CONFIG_IMGSRC_ACCESS
1850 if (mp->mnt_kern_flag & MNTK_BACKS_ROOT) {
1851 error = EBUSY;
1852 goto out;
1853 }
1854#endif /* CONFIG_IMGSRC_ACCESS */
1855
2d21ac55
A
1856 return (dounmount(mp, flags, 1, ctx));
1857
6601e61a
A
1858out:
1859 mount_drop(mp, 0);
1860 return(error);
1c79356b
A
1861}
1862
1863/*
1864 * Do the actual file system unmount.
1865 */
1866int
2d21ac55 1867dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx)
1c79356b 1868{
2d21ac55 1869 vnode_t coveredvp = (vnode_t)0;
1c79356b 1870 int error;
91447636 1871 int needwakeup = 0;
91447636
A
1872 int forcedunmount = 0;
1873 int lflags = 0;
593a1d5f 1874 struct vnode *devvp = NULLVP;
6d2010ae 1875#if CONFIG_TRIGGERS
39236c6e 1876 proc_t p = vfs_context_proc(ctx);
6d2010ae 1877 int did_vflush = 0;
39236c6e 1878 int pflags_save = 0;
6d2010ae 1879#endif /* CONFIG_TRIGGERS */
91447636 1880
813fb2f6
A
1881#if CONFIG_FSE
1882 if (!(flags & MNT_FORCE)) {
1883 fsevent_unmount(mp, ctx); /* has to come first! */
1884 }
1885#endif
1886
91447636 1887 mount_lock(mp);
fe8ab488
A
1888
1889 /*
1890 * If already an unmount in progress just return EBUSY.
1891 * Even a forced unmount cannot override.
1892 */
91447636 1893 if (mp->mnt_lflag & MNT_LUNMOUNT) {
fe8ab488 1894 if (withref != 0)
6601e61a 1895 mount_drop(mp, 1);
fe8ab488 1896 mount_unlock(mp);
9bccf70c
A
1897 return (EBUSY);
1898 }
39236c6e 1899
fe8ab488
A
1900 if (flags & MNT_FORCE) {
1901 forcedunmount = 1;
1902 mp->mnt_lflag |= MNT_LFORCE;
1903 }
1904
39236c6e
A
1905#if CONFIG_TRIGGERS
1906 if (flags & MNT_NOBLOCK && p != kernproc)
1907 pflags_save = OSBitOrAtomic(P_NOREMOTEHANG, &p->p_flag);
1908#endif
1909
1c79356b 1910 mp->mnt_kern_flag |= MNTK_UNMOUNT;
91447636
A
1911 mp->mnt_lflag |= MNT_LUNMOUNT;
1912 mp->mnt_flag &=~ MNT_ASYNC;
2d21ac55
A
1913 /*
1914 * anyone currently in the fast path that
1915 * trips over the cached rootvp will be
1916 * dumped out and forced into the slow path
1917 * to regenerate a new cached value
1918 */
1919 mp->mnt_realrootvp = NULLVP;
91447636 1920 mount_unlock(mp);
39037602 1921
fe8ab488
A
1922 if (forcedunmount && (flags & MNT_LNOSUB) == 0) {
1923 /*
1924 * Force unmount any mounts in this filesystem.
1925 * If any unmounts fail - just leave them dangling.
1926 * Avoids recursion.
1927 */
1928 (void) dounmount_submounts(mp, flags | MNT_LNOSUB, ctx);
1929 }
1930
2d21ac55
A
1931 /*
1932 * taking the name_cache_lock exclusively will
1933 * insure that everyone is out of the fast path who
1934 * might be trying to use a now stale copy of
1935 * vp->v_mountedhere->mnt_realrootvp
1936 * bumping mount_generation causes the cached values
1937 * to be invalidated
1938 */
1939 name_cache_lock();
1940 mount_generation++;
1941 name_cache_unlock();
1942
1943
91447636 1944 lck_rw_lock_exclusive(&mp->mnt_rwlock);
6601e61a
A
1945 if (withref != 0)
1946 mount_drop(mp, 0);
91447636
A
1947 error = 0;
1948 if (forcedunmount == 0) {
1949 ubc_umount(mp); /* release cached vnodes */
1950 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
2d21ac55 1951 error = VFS_SYNC(mp, MNT_WAIT, ctx);
91447636
A
1952 if (error) {
1953 mount_lock(mp);
1954 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1955 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1956 mp->mnt_lflag &= ~MNT_LFORCE;
1957 goto out;
1958 }
1959 }
1960 }
6d2010ae 1961
3e170ce0
A
1962 IOBSDMountChange(mp, kIOMountChangeUnmount);
1963
6d2010ae
A
1964#if CONFIG_TRIGGERS
1965 vfs_nested_trigger_unmounts(mp, flags, ctx);
1966 did_vflush = 1;
39037602 1967#endif
91447636
A
1968 if (forcedunmount)
1969 lflags |= FORCECLOSE;
1970 error = vflush(mp, NULLVP, SKIPSWAP | SKIPSYSTEM | SKIPROOT | lflags);
1971 if ((forcedunmount == 0) && error) {
1972 mount_lock(mp);
9bccf70c 1973 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
91447636
A
1974 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1975 mp->mnt_lflag &= ~MNT_LFORCE;
9bccf70c
A
1976 goto out;
1977 }
91447636
A
1978
1979 /* make sure there are no one in the mount iterations or lookup */
1980 mount_iterdrain(mp);
1981
2d21ac55 1982 error = VFS_UNMOUNT(mp, flags, ctx);
1c79356b 1983 if (error) {
91447636
A
1984 mount_iterreset(mp);
1985 mount_lock(mp);
1c79356b 1986 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
91447636
A
1987 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1988 mp->mnt_lflag &= ~MNT_LFORCE;
1c79356b
A
1989 goto out;
1990 }
1991
1992 /* increment the operations count */
1993 if (!error)
b0d623f7 1994 OSAddAtomic(1, &vfs_nummntops);
91447636
A
1995
1996 if ( mp->mnt_devvp && mp->mnt_vtable->vfc_vfsflags & VFC_VFSLOCALARGS) {
593a1d5f
A
1997 /* hold an io reference and drop the usecount before close */
1998 devvp = mp->mnt_devvp;
593a1d5f
A
1999 vnode_getalways(devvp);
2000 vnode_rele(devvp);
2001 VNOP_CLOSE(devvp, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
2d21ac55 2002 ctx);
b0d623f7 2003 vnode_clearmountedon(devvp);
593a1d5f 2004 vnode_put(devvp);
91447636
A
2005 }
2006 lck_rw_done(&mp->mnt_rwlock);
2007 mount_list_remove(mp);
2008 lck_rw_lock_exclusive(&mp->mnt_rwlock);
6d2010ae 2009
91447636 2010 /* mark the mount point hook in the vp but not drop the ref yet */
1c79356b 2011 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
fe8ab488
A
2012 /*
2013 * The covered vnode needs special handling. Trying to get an
2014 * iocount must not block here as this may lead to deadlocks
2015 * if the Filesystem to which the covered vnode belongs is
2016 * undergoing forced unmounts. Since we hold a usecount, the
2017 * vnode cannot be reused (it can, however, still be terminated)
2018 */
2019 vnode_getalways(coveredvp);
6d2010ae
A
2020 vnode_lock_spin(coveredvp);
2021
2022 mp->mnt_crossref++;
2023 coveredvp->v_mountedhere = (struct mount *)0;
fe8ab488 2024 CLR(coveredvp->v_flag, VMOUNT);
6d2010ae
A
2025
2026 vnode_unlock(coveredvp);
2027 vnode_put(coveredvp);
1c79356b 2028 }
91447636
A
2029
2030 mount_list_lock();
2031 mp->mnt_vtable->vfc_refcount--;
2032 mount_list_unlock();
2033
2034 cache_purgevfs(mp); /* remove cache entries for this file sys */
2035 vfs_event_signal(NULL, VQ_UNMOUNT, (intptr_t)NULL);
2036 mount_lock(mp);
2037 mp->mnt_lflag |= MNT_LDEAD;
2038
2039 if (mp->mnt_lflag & MNT_LWAIT) {
2040 /*
2041 * do the wakeup here
2042 * in case we block in mount_refdrain
2043 * which will drop the mount lock
2044 * and allow anyone blocked in vfs_busy
2045 * to wakeup and see the LDEAD state
2046 */
2047 mp->mnt_lflag &= ~MNT_LWAIT;
2048 wakeup((caddr_t)mp);
1c79356b 2049 }
91447636 2050 mount_refdrain(mp);
1c79356b 2051out:
91447636
A
2052 if (mp->mnt_lflag & MNT_LWAIT) {
2053 mp->mnt_lflag &= ~MNT_LWAIT;
39037602 2054 needwakeup = 1;
91447636 2055 }
6d2010ae 2056
6d2010ae 2057#if CONFIG_TRIGGERS
39236c6e
A
2058 if (flags & MNT_NOBLOCK && p != kernproc) {
2059 // Restore P_NOREMOTEHANG bit to its previous value
2060 if ((pflags_save & P_NOREMOTEHANG) == 0)
2061 OSBitAndAtomic(~((uint32_t) P_NOREMOTEHANG), &p->p_flag);
2062 }
2063
39037602 2064 /*
6d2010ae 2065 * Callback and context are set together under the mount lock, and
39037602 2066 * never cleared, so we're safe to examine them here, drop the lock,
6d2010ae
A
2067 * and call out.
2068 */
2069 if (mp->mnt_triggercallback != NULL) {
2070 mount_unlock(mp);
2071 if (error == 0) {
2072 mp->mnt_triggercallback(mp, VTC_RELEASE, mp->mnt_triggerdata, ctx);
2073 } else if (did_vflush) {
2074 mp->mnt_triggercallback(mp, VTC_REPLACE, mp->mnt_triggerdata, ctx);
2075 }
2076 } else {
2077 mount_unlock(mp);
2078 }
39037602 2079#else
91447636 2080 mount_unlock(mp);
6d2010ae
A
2081#endif /* CONFIG_TRIGGERS */
2082
91447636
A
2083 lck_rw_done(&mp->mnt_rwlock);
2084
2085 if (needwakeup)
1c79356b 2086 wakeup((caddr_t)mp);
6d2010ae 2087
55e303ae 2088 if (!error) {
91447636 2089 if ((coveredvp != NULLVP)) {
fe8ab488 2090 vnode_t pvp = NULLVP;
b0d623f7 2091
fe8ab488
A
2092 /*
2093 * The covered vnode needs special handling. Trying to
2094 * get an iocount must not block here as this may lead
2095 * to deadlocks if the Filesystem to which the covered
2096 * vnode belongs is undergoing forced unmounts. Since we
2097 * hold a usecount, the vnode cannot be reused
2098 * (it can, however, still be terminated).
2099 */
2100 vnode_getalways(coveredvp);
6d2010ae
A
2101
2102 mount_dropcrossref(mp, coveredvp, 0);
fe8ab488
A
2103 /*
2104 * We'll _try_ to detect if this really needs to be
2105 * done. The coveredvp can only be in termination (or
2106 * terminated) if the coveredvp's mount point is in a
2107 * forced unmount (or has been) since we still hold the
2108 * ref.
2109 */
2110 if (!vnode_isrecycled(coveredvp)) {
2111 pvp = vnode_getparent(coveredvp);
6d2010ae 2112#if CONFIG_TRIGGERS
fe8ab488
A
2113 if (coveredvp->v_resolve) {
2114 vnode_trigger_rearm(coveredvp, ctx);
2115 }
2116#endif
2117 }
2118
2119 vnode_rele(coveredvp);
91447636 2120 vnode_put(coveredvp);
fe8ab488 2121 coveredvp = NULLVP;
b0d623f7
A
2122
2123 if (pvp) {
2124 lock_vnode_and_post(pvp, NOTE_WRITE);
2125 vnode_put(pvp);
2126 }
91447636
A
2127 } else if (mp->mnt_flag & MNT_ROOTFS) {
2128 mount_lock_destroy(mp);
2d21ac55
A
2129#if CONFIG_MACF
2130 mac_mount_label_destroy(mp);
2131#endif
91447636
A
2132 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
2133 } else
2134 panic("dounmount: no coveredvp");
55e303ae 2135 }
1c79356b
A
2136 return (error);
2137}
2138
fe8ab488
A
2139/*
2140 * Unmount any mounts in this filesystem.
2141 */
2142void
2143dounmount_submounts(struct mount *mp, int flags, vfs_context_t ctx)
2144{
2145 mount_t smp;
2146 fsid_t *fsids, fsid;
2147 int fsids_sz;
2148 int count = 0, i, m = 0;
2149 vnode_t vp;
2150
2151 mount_list_lock();
2152
2153 // Get an array to hold the submounts fsids.
2154 TAILQ_FOREACH(smp, &mountlist, mnt_list)
2155 count++;
2156 fsids_sz = count * sizeof(fsid_t);
2157 MALLOC(fsids, fsid_t *, fsids_sz, M_TEMP, M_NOWAIT);
2158 if (fsids == NULL) {
2159 mount_list_unlock();
2160 goto out;
2161 }
2162 fsids[0] = mp->mnt_vfsstat.f_fsid; // Prime the pump
2163
2164 /*
2165 * Fill the array with submount fsids.
2166 * Since mounts are always added to the tail of the mount list, the
39037602 2167 * list is always in mount order.
fe8ab488
A
2168 * For each mount check if the mounted-on vnode belongs to a
2169 * mount that's already added to our array of mounts to be unmounted.
2170 */
2171 for (smp = TAILQ_NEXT(mp, mnt_list); smp; smp = TAILQ_NEXT(smp, mnt_list)) {
2172 vp = smp->mnt_vnodecovered;
2173 if (vp == NULL)
2174 continue;
2175 fsid = vnode_mount(vp)->mnt_vfsstat.f_fsid; // Underlying fsid
2176 for (i = 0; i <= m; i++) {
2177 if (fsids[i].val[0] == fsid.val[0] &&
2178 fsids[i].val[1] == fsid.val[1]) {
2179 fsids[++m] = smp->mnt_vfsstat.f_fsid;
2180 break;
2181 }
2182 }
2183 }
2184 mount_list_unlock();
2185
2186 // Unmount the submounts in reverse order. Ignore errors.
2187 for (i = m; i > 0; i--) {
2188 smp = mount_list_lookupby_fsid(&fsids[i], 0, 1);
2189 if (smp) {
2190 mount_ref(smp, 0);
2191 mount_iterdrop(smp);
2192 (void) dounmount(smp, flags, 1, ctx);
2193 }
2194 }
2195out:
2196 if (fsids)
2197 FREE(fsids, M_TEMP);
2198}
2199
91447636
A
2200void
2201mount_dropcrossref(mount_t mp, vnode_t dp, int need_put)
2202{
6d2010ae
A
2203 vnode_lock(dp);
2204 mp->mnt_crossref--;
2205
2206 if (mp->mnt_crossref < 0)
2207 panic("mount cross refs -ve");
2208
2209 if ((mp != dp->v_mountedhere) && (mp->mnt_crossref == 0)) {
39037602 2210
91447636 2211 if (need_put)
6d2010ae 2212 vnode_put_locked(dp);
91447636 2213 vnode_unlock(dp);
6d2010ae
A
2214
2215 mount_lock_destroy(mp);
2216#if CONFIG_MACF
2217 mac_mount_label_destroy(mp);
2218#endif
2219 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
2220 return;
2221 }
2222 if (need_put)
2223 vnode_put_locked(dp);
2224 vnode_unlock(dp);
91447636
A
2225}
2226
2227
1c79356b
A
2228/*
2229 * Sync each mounted filesystem.
2230 */
2231#if DIAGNOSTIC
2232int syncprt = 0;
1c79356b
A
2233#endif
2234
1c79356b 2235int print_vmpage_stat=0;
fe8ab488 2236int sync_timeout = 60; // Sync time limit (sec)
1c79356b 2237
39037602 2238static int
fe8ab488 2239sync_callback(mount_t mp, __unused void *arg)
1c79356b 2240{
91447636 2241 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
fe8ab488
A
2242 int asyncflag = mp->mnt_flag & MNT_ASYNC;
2243
2244 mp->mnt_flag &= ~MNT_ASYNC;
2245 VFS_SYNC(mp, arg ? MNT_WAIT : MNT_NOWAIT, vfs_context_kernel());
2246 if (asyncflag)
2247 mp->mnt_flag |= MNT_ASYNC;
1c79356b 2248 }
1c79356b 2249
fe8ab488
A
2250 return (VFS_RETURNED);
2251}
91447636 2252
91447636
A
2253/* ARGSUSED */
2254int
b0d623f7 2255sync(__unused proc_t p, __unused struct sync_args *uap, __unused int32_t *retval)
91447636 2256{
fe8ab488 2257 vfs_iterate(LK_NOWAIT, sync_callback, NULL);
b0d623f7 2258
fe8ab488
A
2259 if (print_vmpage_stat) {
2260 vm_countdirtypages();
2261 }
2262
2263#if DIAGNOSTIC
2264 if (syncprt)
2265 vfs_bufstats();
2266#endif /* DIAGNOSTIC */
2267 return 0;
2268}
2269
2270static void
2271sync_thread(void *arg, __unused wait_result_t wr)
2272{
2273 int *timeout = (int *) arg;
2274
2275 vfs_iterate(LK_NOWAIT, sync_callback, NULL);
2276
2277 if (timeout)
2278 wakeup((caddr_t) timeout);
2279 if (print_vmpage_stat) {
1c79356b 2280 vm_countdirtypages();
1c79356b 2281 }
39236c6e 2282
1c79356b
A
2283#if DIAGNOSTIC
2284 if (syncprt)
2285 vfs_bufstats();
2286#endif /* DIAGNOSTIC */
1c79356b
A
2287}
2288
2289/*
fe8ab488 2290 * Sync in a separate thread so we can time out if it blocks.
1c79356b 2291 */
fe8ab488
A
2292static int
2293sync_async(int timeout)
2d21ac55 2294{
fe8ab488 2295 thread_t thd;
2d21ac55 2296 int error;
fe8ab488
A
2297 struct timespec ts = {timeout, 0};
2298
2299 lck_mtx_lock(sync_mtx_lck);
2300 if (kernel_thread_start(sync_thread, &timeout, &thd) != KERN_SUCCESS) {
2301 printf("sync_thread failed\n");
2302 lck_mtx_unlock(sync_mtx_lck);
2303 return (0);
2304 }
2305
2306 error = msleep((caddr_t) &timeout, sync_mtx_lck, (PVFS | PDROP | PCATCH), "sync_thread", &ts);
2307 if (error) {
2308 printf("sync timed out: %d sec\n", timeout);
2309 }
2310 thread_deallocate(thd);
2311
2312 return (0);
2d21ac55
A
2313}
2314
fe8ab488
A
2315/*
2316 * An in-kernel sync for power management to call.
2317 */
2318__private_extern__ int
2319sync_internal(void)
2320{
2321 (void) sync_async(sync_timeout);
2322
2323 return 0;
2324} /* end of sync_internal call */
2325
2326/*
2327 * Change filesystem quotas.
2328 */
2329#if QUOTA
2330int
2331quotactl(proc_t p, struct quotactl_args *uap, __unused int32_t *retval)
1c79356b 2332{
2d21ac55 2333 struct mount *mp;
91447636
A
2334 int error, quota_cmd, quota_status;
2335 caddr_t datap;
2336 size_t fnamelen;
1c79356b 2337 struct nameidata nd;
2d21ac55 2338 vfs_context_t ctx = vfs_context_current();
91447636
A
2339 struct dqblk my_dqblk;
2340
b0d623f7 2341 AUDIT_ARG(uid, uap->uid);
55e303ae 2342 AUDIT_ARG(cmd, uap->cmd);
6d2010ae
A
2343 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
2344 uap->path, ctx);
55e303ae
A
2345 error = namei(&nd);
2346 if (error)
1c79356b
A
2347 return (error);
2348 mp = nd.ni_vp->v_mount;
91447636
A
2349 vnode_put(nd.ni_vp);
2350 nameidone(&nd);
2351
2352 /* copyin any data we will need for downstream code */
2353 quota_cmd = uap->cmd >> SUBCMDSHIFT;
2354
2355 switch (quota_cmd) {
2356 case Q_QUOTAON:
2357 /* uap->arg specifies a file from which to take the quotas */
2358 fnamelen = MAXPATHLEN;
2359 datap = kalloc(MAXPATHLEN);
2360 error = copyinstr(uap->arg, datap, MAXPATHLEN, &fnamelen);
2361 break;
2362 case Q_GETQUOTA:
2363 /* uap->arg is a pointer to a dqblk structure. */
2364 datap = (caddr_t) &my_dqblk;
2365 break;
2366 case Q_SETQUOTA:
2367 case Q_SETUSE:
2368 /* uap->arg is a pointer to a dqblk structure. */
2369 datap = (caddr_t) &my_dqblk;
2370 if (proc_is64bit(p)) {
2371 struct user_dqblk my_dqblk64;
2372 error = copyin(uap->arg, (caddr_t)&my_dqblk64, sizeof (my_dqblk64));
2373 if (error == 0) {
2374 munge_dqblk(&my_dqblk, &my_dqblk64, FALSE);
2375 }
2376 }
2377 else {
2378 error = copyin(uap->arg, (caddr_t)&my_dqblk, sizeof (my_dqblk));
2379 }
2380 break;
2381 case Q_QUOTASTAT:
2382 /* uap->arg is a pointer to an integer */
2383 datap = (caddr_t) &quota_status;
2384 break;
2385 default:
2386 datap = NULL;
2387 break;
2388 } /* switch */
2389
2390 if (error == 0) {
2d21ac55 2391 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, datap, ctx);
91447636
A
2392 }
2393
2394 switch (quota_cmd) {
2395 case Q_QUOTAON:
2396 if (datap != NULL)
2397 kfree(datap, MAXPATHLEN);
2398 break;
2399 case Q_GETQUOTA:
2400 /* uap->arg is a pointer to a dqblk structure we need to copy out to */
2401 if (error == 0) {
2402 if (proc_is64bit(p)) {
fe8ab488 2403 struct user_dqblk my_dqblk64 = {.dqb_bhardlimit = 0};
91447636
A
2404 munge_dqblk(&my_dqblk, &my_dqblk64, TRUE);
2405 error = copyout((caddr_t)&my_dqblk64, uap->arg, sizeof (my_dqblk64));
2406 }
2407 else {
2408 error = copyout(datap, uap->arg, sizeof (struct dqblk));
2409 }
2410 }
2411 break;
2412 case Q_QUOTASTAT:
2413 /* uap->arg is a pointer to an integer */
2414 if (error == 0) {
2415 error = copyout(datap, uap->arg, sizeof(quota_status));
2416 }
2417 break;
2418 default:
2419 break;
2420 } /* switch */
2421
2422 return (error);
1c79356b 2423}
2d21ac55
A
2424#else
2425int
b0d623f7 2426quotactl(__unused proc_t p, __unused struct quotactl_args *uap, __unused int32_t *retval)
2d21ac55
A
2427{
2428 return (EOPNOTSUPP);
2429}
2430#endif /* QUOTA */
1c79356b
A
2431
2432/*
2433 * Get filesystem statistics.
2d21ac55
A
2434 *
2435 * Returns: 0 Success
2436 * namei:???
2437 * vfs_update_vfsstat:???
2438 * munge_statfs:EFAULT
1c79356b 2439 */
1c79356b
A
2440/* ARGSUSED */
2441int
b0d623f7 2442statfs(__unused proc_t p, struct statfs_args *uap, __unused int32_t *retval)
1c79356b 2443{
91447636
A
2444 struct mount *mp;
2445 struct vfsstatfs *sp;
1c79356b
A
2446 int error;
2447 struct nameidata nd;
2d21ac55 2448 vfs_context_t ctx = vfs_context_current();
91447636 2449 vnode_t vp;
1c79356b 2450
39037602 2451 NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1,
2d21ac55 2452 UIO_USERSPACE, uap->path, ctx);
55e303ae 2453 error = namei(&nd);
39037602 2454 if (error != 0)
1c79356b 2455 return (error);
91447636
A
2456 vp = nd.ni_vp;
2457 mp = vp->v_mount;
2458 sp = &mp->mnt_vfsstat;
2459 nameidone(&nd);
2460
39037602
A
2461#if CONFIG_MACF
2462 error = mac_mount_check_stat(ctx, mp);
2463 if (error != 0)
2464 return (error);
2465#endif
2466
2d21ac55 2467 error = vfs_update_vfsstat(mp, ctx, VFS_USER_EVENT);
39037602 2468 if (error != 0) {
39236c6e 2469 vnode_put(vp);
1c79356b 2470 return (error);
39236c6e 2471 }
91447636
A
2472
2473 error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
39236c6e 2474 vnode_put(vp);
91447636 2475 return (error);
1c79356b
A
2476}
2477
2478/*
2479 * Get filesystem statistics.
2480 */
1c79356b
A
2481/* ARGSUSED */
2482int
b0d623f7 2483fstatfs(__unused proc_t p, struct fstatfs_args *uap, __unused int32_t *retval)
1c79356b 2484{
2d21ac55 2485 vnode_t vp;
1c79356b 2486 struct mount *mp;
91447636 2487 struct vfsstatfs *sp;
1c79356b
A
2488 int error;
2489
55e303ae
A
2490 AUDIT_ARG(fd, uap->fd);
2491
91447636 2492 if ( (error = file_vnode(uap->fd, &vp)) )
1c79356b 2493 return (error);
55e303ae 2494
d1ecb069
A
2495 error = vnode_getwithref(vp);
2496 if (error) {
2497 file_drop(uap->fd);
2498 return (error);
2499 }
2500
91447636 2501 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
55e303ae 2502
91447636
A
2503 mp = vp->v_mount;
2504 if (!mp) {
d1ecb069
A
2505 error = EBADF;
2506 goto out;
91447636 2507 }
39037602
A
2508
2509#if CONFIG_MACF
2510 error = mac_mount_check_stat(vfs_context_current(), mp);
2511 if (error != 0)
2512 goto out;
2513#endif
2514
91447636 2515 sp = &mp->mnt_vfsstat;
39037602 2516 if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
d1ecb069 2517 goto out;
91447636 2518 }
91447636
A
2519
2520 error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
2521
d1ecb069
A
2522out:
2523 file_drop(uap->fd);
2524 vnode_put(vp);
2525
91447636 2526 return (error);
1c79356b
A
2527}
2528
39037602
A
2529/*
2530 * Common routine to handle copying of statfs64 data to user space
2d21ac55 2531 */
39037602 2532static int
2d21ac55
A
2533statfs64_common(struct mount *mp, struct vfsstatfs *sfsp, user_addr_t bufp)
2534{
2535 int error;
2536 struct statfs64 sfs;
39037602 2537
2d21ac55
A
2538 bzero(&sfs, sizeof(sfs));
2539
2540 sfs.f_bsize = sfsp->f_bsize;
2541 sfs.f_iosize = (int32_t)sfsp->f_iosize;
2542 sfs.f_blocks = sfsp->f_blocks;
2543 sfs.f_bfree = sfsp->f_bfree;
2544 sfs.f_bavail = sfsp->f_bavail;
2545 sfs.f_files = sfsp->f_files;
2546 sfs.f_ffree = sfsp->f_ffree;
2547 sfs.f_fsid = sfsp->f_fsid;
2548 sfs.f_owner = sfsp->f_owner;
2549 sfs.f_type = mp->mnt_vtable->vfc_typenum;
2550 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
2551 sfs.f_fssubtype = sfsp->f_fssubtype;
6d2010ae
A
2552 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
2553 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN);
2554 } else {
2555 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSTYPENAMELEN);
2556 }
2d21ac55
A
2557 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MAXPATHLEN);
2558 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MAXPATHLEN);
2559
2560 error = copyout((caddr_t)&sfs, bufp, sizeof(sfs));
2561
2562 return(error);
2563}
2564
39037602
A
2565/*
2566 * Get file system statistics in 64-bit mode
2d21ac55
A
2567 */
2568int
b0d623f7 2569statfs64(__unused struct proc *p, struct statfs64_args *uap, __unused int32_t *retval)
2d21ac55
A
2570{
2571 struct mount *mp;
2572 struct vfsstatfs *sp;
2573 int error;
2574 struct nameidata nd;
2575 vfs_context_t ctxp = vfs_context_current();
2576 vnode_t vp;
2577
39037602 2578 NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1,
2d21ac55
A
2579 UIO_USERSPACE, uap->path, ctxp);
2580 error = namei(&nd);
39037602 2581 if (error != 0)
2d21ac55
A
2582 return (error);
2583 vp = nd.ni_vp;
2584 mp = vp->v_mount;
2585 sp = &mp->mnt_vfsstat;
2586 nameidone(&nd);
2587
39037602
A
2588#if CONFIG_MACF
2589 error = mac_mount_check_stat(ctxp, mp);
2590 if (error != 0)
2591 return (error);
2592#endif
2593
2d21ac55 2594 error = vfs_update_vfsstat(mp, ctxp, VFS_USER_EVENT);
39037602 2595 if (error != 0) {
39236c6e 2596 vnode_put(vp);
2d21ac55 2597 return (error);
39236c6e 2598 }
2d21ac55
A
2599
2600 error = statfs64_common(mp, sp, uap->buf);
39236c6e 2601 vnode_put(vp);
2d21ac55
A
2602
2603 return (error);
2604}
2605
39037602
A
2606/*
2607 * Get file system statistics in 64-bit mode
2d21ac55
A
2608 */
2609int
b0d623f7 2610fstatfs64(__unused struct proc *p, struct fstatfs64_args *uap, __unused int32_t *retval)
2d21ac55
A
2611{
2612 struct vnode *vp;
2613 struct mount *mp;
2614 struct vfsstatfs *sp;
2615 int error;
2616
2617 AUDIT_ARG(fd, uap->fd);
2618
2619 if ( (error = file_vnode(uap->fd, &vp)) )
2620 return (error);
2621
d1ecb069
A
2622 error = vnode_getwithref(vp);
2623 if (error) {
2624 file_drop(uap->fd);
2625 return (error);
2626 }
2627
2d21ac55
A
2628 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2629
2630 mp = vp->v_mount;
2631 if (!mp) {
316670eb 2632 error = EBADF;
d1ecb069 2633 goto out;
2d21ac55 2634 }
39037602
A
2635
2636#if CONFIG_MACF
2637 error = mac_mount_check_stat(vfs_context_current(), mp);
2638 if (error != 0)
2639 goto out;
2640#endif
2641
2d21ac55
A
2642 sp = &mp->mnt_vfsstat;
2643 if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
d1ecb069 2644 goto out;
2d21ac55 2645 }
2d21ac55
A
2646
2647 error = statfs64_common(mp, sp, uap->buf);
2648
d1ecb069
A
2649out:
2650 file_drop(uap->fd);
2651 vnode_put(vp);
2652
2d21ac55
A
2653 return (error);
2654}
91447636
A
2655
2656struct getfsstat_struct {
2657 user_addr_t sfsp;
2d21ac55 2658 user_addr_t *mp;
91447636
A
2659 int count;
2660 int maxcount;
2661 int flags;
2662 int error;
1c79356b 2663};
1c79356b 2664
91447636
A
2665
2666static int
2667getfsstat_callback(mount_t mp, void * arg)
2668{
39037602 2669
91447636
A
2670 struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
2671 struct vfsstatfs *sp;
91447636 2672 int error, my_size;
2d21ac55 2673 vfs_context_t ctx = vfs_context_current();
91447636
A
2674
2675 if (fstp->sfsp && fstp->count < fstp->maxcount) {
39037602
A
2676#if CONFIG_MACF
2677 error = mac_mount_check_stat(ctx, mp);
2678 if (error != 0) {
2679 fstp->error = error;
2680 return(VFS_RETURNED_DONE);
2681 }
2682#endif
91447636
A
2683 sp = &mp->mnt_vfsstat;
2684 /*
2685 * If MNT_NOWAIT is specified, do not refresh the
b0d623f7 2686 * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT.
91447636 2687 */
b0d623f7 2688 if (((fstp->flags & MNT_NOWAIT) == 0 || (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
2d21ac55
A
2689 (error = vfs_update_vfsstat(mp, ctx,
2690 VFS_USER_EVENT))) {
91447636
A
2691 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
2692 return(VFS_RETURNED);
1c79356b 2693 }
91447636
A
2694
2695 /*
2696 * Need to handle LP64 version of struct statfs
2697 */
2d21ac55 2698 error = munge_statfs(mp, sp, fstp->sfsp, &my_size, IS_64BIT_PROCESS(vfs_context_proc(ctx)), FALSE);
91447636
A
2699 if (error) {
2700 fstp->error = error;
2701 return(VFS_RETURNED_DONE);
1c79356b 2702 }
91447636 2703 fstp->sfsp += my_size;
2d21ac55
A
2704
2705 if (fstp->mp) {
39236c6e 2706#if CONFIG_MACF
2d21ac55
A
2707 error = mac_mount_label_get(mp, *fstp->mp);
2708 if (error) {
2709 fstp->error = error;
2710 return(VFS_RETURNED_DONE);
2711 }
39236c6e 2712#endif
2d21ac55
A
2713 fstp->mp++;
2714 }
2715 }
91447636
A
2716 fstp->count++;
2717 return(VFS_RETURNED);
2718}
2719
2720/*
2721 * Get statistics on all filesystems.
2722 */
2723int
2724getfsstat(__unused proc_t p, struct getfsstat_args *uap, int *retval)
2d21ac55
A
2725{
2726 struct __mac_getfsstat_args muap;
2727
2728 muap.buf = uap->buf;
2729 muap.bufsize = uap->bufsize;
2730 muap.mac = USER_ADDR_NULL;
2731 muap.macsize = 0;
2732 muap.flags = uap->flags;
2733
2734 return (__mac_getfsstat(p, &muap, retval));
2735}
2736
b0d623f7
A
2737/*
2738 * __mac_getfsstat: Get MAC-related file system statistics
2739 *
2740 * Parameters: p (ignored)
2741 * uap User argument descriptor (see below)
39037602 2742 * retval Count of file system statistics (N stats)
b0d623f7
A
2743 *
2744 * Indirect: uap->bufsize Buffer size
2745 * uap->macsize MAC info size
2746 * uap->buf Buffer where information will be returned
2747 * uap->mac MAC info
2748 * uap->flags File system flags
39037602 2749 *
b0d623f7
A
2750 *
2751 * Returns: 0 Success
2752 * !0 Not success
2753 *
2754 */
2d21ac55
A
2755int
2756__mac_getfsstat(__unused proc_t p, struct __mac_getfsstat_args *uap, int *retval)
91447636
A
2757{
2758 user_addr_t sfsp;
2d21ac55 2759 user_addr_t *mp;
b0d623f7 2760 size_t count, maxcount, bufsize, macsize;
91447636
A
2761 struct getfsstat_struct fst;
2762
b0d623f7
A
2763 bufsize = (size_t) uap->bufsize;
2764 macsize = (size_t) uap->macsize;
2765
91447636 2766 if (IS_64BIT_PROCESS(p)) {
b0d623f7 2767 maxcount = bufsize / sizeof(struct user64_statfs);
91447636
A
2768 }
2769 else {
b0d623f7 2770 maxcount = bufsize / sizeof(struct user32_statfs);
91447636
A
2771 }
2772 sfsp = uap->buf;
2773 count = 0;
2774
2d21ac55
A
2775 mp = NULL;
2776
2777#if CONFIG_MACF
2778 if (uap->mac != USER_ADDR_NULL) {
2779 u_int32_t *mp0;
2780 int error;
b0d623f7 2781 unsigned int i;
2d21ac55 2782
b0d623f7 2783 count = (macsize / (IS_64BIT_PROCESS(p) ? 8 : 4));
2d21ac55
A
2784 if (count != maxcount)
2785 return (EINVAL);
2786
2787 /* Copy in the array */
b0d623f7
A
2788 MALLOC(mp0, u_int32_t *, macsize, M_MACTEMP, M_WAITOK);
2789 if (mp0 == NULL) {
2790 return (ENOMEM);
2791 }
2792
2793 error = copyin(uap->mac, mp0, macsize);
2794 if (error) {
2795 FREE(mp0, M_MACTEMP);
2d21ac55 2796 return (error);
b0d623f7 2797 }
2d21ac55
A
2798
2799 /* Normalize to an array of user_addr_t */
2800 MALLOC(mp, user_addr_t *, count * sizeof(user_addr_t), M_MACTEMP, M_WAITOK);
b0d623f7
A
2801 if (mp == NULL) {
2802 FREE(mp0, M_MACTEMP);
2803 return (ENOMEM);
2804 }
2805
2d21ac55
A
2806 for (i = 0; i < count; i++) {
2807 if (IS_64BIT_PROCESS(p))
2808 mp[i] = ((user_addr_t *)mp0)[i];
2809 else
2810 mp[i] = (user_addr_t)mp0[i];
2811 }
2812 FREE(mp0, M_MACTEMP);
2813 }
2814#endif
2815
2816
91447636 2817 fst.sfsp = sfsp;
2d21ac55 2818 fst.mp = mp;
91447636
A
2819 fst.flags = uap->flags;
2820 fst.count = 0;
2821 fst.error = 0;
2822 fst.maxcount = maxcount;
2823
39037602 2824
91447636
A
2825 vfs_iterate(0, getfsstat_callback, &fst);
2826
2d21ac55
A
2827 if (mp)
2828 FREE(mp, M_MACTEMP);
2829
91447636
A
2830 if (fst.error ) {
2831 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
2832 return(fst.error);
2833 }
2834
2835 if (fst.sfsp && fst.count > fst.maxcount)
2836 *retval = fst.maxcount;
1c79356b 2837 else
91447636 2838 *retval = fst.count;
1c79356b
A
2839 return (0);
2840}
2841
2d21ac55
A
2842static int
2843getfsstat64_callback(mount_t mp, void * arg)
2844{
2845 struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
2846 struct vfsstatfs *sp;
2847 int error;
2848
2849 if (fstp->sfsp && fstp->count < fstp->maxcount) {
39037602
A
2850#if CONFIG_MACF
2851 error = mac_mount_check_stat(vfs_context_current(), mp);
2852 if (error != 0) {
2853 fstp->error = error;
2854 return(VFS_RETURNED_DONE);
2855 }
2856#endif
2d21ac55
A
2857 sp = &mp->mnt_vfsstat;
2858 /*
b0d623f7
A
2859 * If MNT_NOWAIT is specified, do not refresh the fsstat
2860 * cache. MNT_WAIT overrides MNT_NOWAIT.
2861 *
2862 * We treat MNT_DWAIT as MNT_WAIT for all instances of
2863 * getfsstat, since the constants are out of the same
2864 * namespace.
2d21ac55 2865 */
b0d623f7
A
2866 if (((fstp->flags & MNT_NOWAIT) == 0 ||
2867 (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
2d21ac55
A
2868 (error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT))) {
2869 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
2870 return(VFS_RETURNED);
2871 }
2872
2873 error = statfs64_common(mp, sp, fstp->sfsp);
2874 if (error) {
2875 fstp->error = error;
2876 return(VFS_RETURNED_DONE);
2877 }
2878 fstp->sfsp += sizeof(struct statfs64);
2879 }
2880 fstp->count++;
2881 return(VFS_RETURNED);
2882}
2883
2884/*
2885 * Get statistics on all file systems in 64 bit mode.
2886 */
2887int
2888getfsstat64(__unused proc_t p, struct getfsstat64_args *uap, int *retval)
2889{
2890 user_addr_t sfsp;
2891 int count, maxcount;
2892 struct getfsstat_struct fst;
2893
2894 maxcount = uap->bufsize / sizeof(struct statfs64);
2895
2896 sfsp = uap->buf;
2897 count = 0;
2898
2899 fst.sfsp = sfsp;
2900 fst.flags = uap->flags;
2901 fst.count = 0;
2902 fst.error = 0;
2903 fst.maxcount = maxcount;
2904
2905 vfs_iterate(0, getfsstat64_callback, &fst);
2906
2907 if (fst.error ) {
2908 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
2909 return(fst.error);
2910 }
2911
2912 if (fst.sfsp && fst.count > fst.maxcount)
2913 *retval = fst.maxcount;
2914 else
2915 *retval = fst.count;
2916
2917 return (0);
2918}
2919
fe8ab488
A
2920/*
2921 * gets the associated vnode with the file descriptor passed.
2922 * as input
2923 *
2924 * INPUT
2925 * ctx - vfs context of caller
2926 * fd - file descriptor for which vnode is required.
2927 * vpp - Pointer to pointer to vnode to be returned.
2928 *
2929 * The vnode is returned with an iocount so any vnode obtained
2930 * by this call needs a vnode_put
2931 *
2932 */
39037602 2933int
fe8ab488
A
2934vnode_getfromfd(vfs_context_t ctx, int fd, vnode_t *vpp)
2935{
2936 int error;
2937 vnode_t vp;
2938 struct fileproc *fp;
2939 proc_t p = vfs_context_proc(ctx);
2940
2941 *vpp = NULLVP;
2942
2943 error = fp_getfvp(p, fd, &fp, &vp);
2944 if (error)
2945 return (error);
2946
2947 error = vnode_getwithref(vp);
2948 if (error) {
2949 (void)fp_drop(p, fd, fp, 0);
2950 return (error);
2951 }
2952
2953 (void)fp_drop(p, fd, fp, 0);
2954 *vpp = vp;
2955 return (error);
2956}
2957
2958/*
2959 * Wrapper function around namei to start lookup from a directory
2960 * specified by a file descriptor ni_dirfd.
2961 *
2962 * In addition to all the errors returned by namei, this call can
2963 * return ENOTDIR if the file descriptor does not refer to a directory.
2964 * and EBADF if the file descriptor is not valid.
2965 */
2966int
2967nameiat(struct nameidata *ndp, int dirfd)
2968{
2969 if ((dirfd != AT_FDCWD) &&
2970 !(ndp->ni_flag & NAMEI_CONTLOOKUP) &&
2971 !(ndp->ni_cnd.cn_flags & USEDVP)) {
2972 int error = 0;
2973 char c;
2974
2975 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
2976 error = copyin(ndp->ni_dirp, &c, sizeof(char));
2977 if (error)
2978 return (error);
2979 } else {
2980 c = *((char *)(ndp->ni_dirp));
2981 }
2982
2983 if (c != '/') {
2984 vnode_t dvp_at;
2985
2986 error = vnode_getfromfd(ndp->ni_cnd.cn_context, dirfd,
2987 &dvp_at);
2988 if (error)
2989 return (error);
2990
2991 if (vnode_vtype(dvp_at) != VDIR) {
2992 vnode_put(dvp_at);
2993 return (ENOTDIR);
2994 }
2995
2996 ndp->ni_dvp = dvp_at;
2997 ndp->ni_cnd.cn_flags |= USEDVP;
2998 error = namei(ndp);
2999 ndp->ni_cnd.cn_flags &= ~USEDVP;
3000 vnode_put(dvp_at);
3001 return (error);
3002 }
3003 }
3004
3005 return (namei(ndp));
3006}
3007
1c79356b
A
3008/*
3009 * Change current working directory to a given file descriptor.
3010 */
1c79356b 3011/* ARGSUSED */
2d21ac55
A
3012static int
3013common_fchdir(proc_t p, struct fchdir_args *uap, int per_thread)
1c79356b 3014{
2d21ac55
A
3015 struct filedesc *fdp = p->p_fd;
3016 vnode_t vp;
3017 vnode_t tdp;
3018 vnode_t tvp;
1c79356b 3019 struct mount *mp;
1c79356b 3020 int error;
2d21ac55 3021 vfs_context_t ctx = vfs_context_current();
1c79356b 3022
b0d623f7 3023 AUDIT_ARG(fd, uap->fd);
2d21ac55
A
3024 if (per_thread && uap->fd == -1) {
3025 /*
3026 * Switching back from per-thread to per process CWD; verify we
3027 * in fact have one before proceeding. The only success case
3028 * for this code path is to return 0 preemptively after zapping
3029 * the thread structure contents.
3030 */
3031 thread_t th = vfs_context_thread(ctx);
3032 if (th) {
3033 uthread_t uth = get_bsdthread_info(th);
3034 tvp = uth->uu_cdir;
3035 uth->uu_cdir = NULLVP;
3036 if (tvp != NULLVP) {
3037 vnode_rele(tvp);
3038 return (0);
3039 }
3040 }
3041 return (EBADF);
3042 }
91447636
A
3043
3044 if ( (error = file_vnode(uap->fd, &vp)) )
3045 return(error);
3046 if ( (error = vnode_getwithref(vp)) ) {
3047 file_drop(uap->fd);
3048 return(error);
3049 }
55e303ae
A
3050
3051 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
3052
2d21ac55 3053 if (vp->v_type != VDIR) {
1c79356b 3054 error = ENOTDIR;
2d21ac55
A
3055 goto out;
3056 }
3057
3058#if CONFIG_MACF
3059 error = mac_vnode_check_chdir(ctx, vp);
3060 if (error)
3061 goto out;
3062#endif
3063 error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
3064 if (error)
3065 goto out;
3066
1c79356b 3067 while (!error && (mp = vp->v_mountedhere) != NULL) {
91447636
A
3068 if (vfs_busy(mp, LK_NOWAIT)) {
3069 error = EACCES;
3070 goto out;
55e303ae 3071 }
2d21ac55 3072 error = VFS_ROOT(mp, &tdp, ctx);
91447636 3073 vfs_unbusy(mp);
1c79356b
A
3074 if (error)
3075 break;
91447636 3076 vnode_put(vp);
1c79356b
A
3077 vp = tdp;
3078 }
91447636
A
3079 if (error)
3080 goto out;
3081 if ( (error = vnode_ref(vp)) )
3082 goto out;
3083 vnode_put(vp);
3084
2d21ac55
A
3085 if (per_thread) {
3086 thread_t th = vfs_context_thread(ctx);
3087 if (th) {
3088 uthread_t uth = get_bsdthread_info(th);
3089 tvp = uth->uu_cdir;
3090 uth->uu_cdir = vp;
b0d623f7 3091 OSBitOrAtomic(P_THCWD, &p->p_flag);
2d21ac55
A
3092 } else {
3093 vnode_rele(vp);
3094 return (ENOENT);
3095 }
3096 } else {
3097 proc_fdlock(p);
3098 tvp = fdp->fd_cdir;
3099 fdp->fd_cdir = vp;
3100 proc_fdunlock(p);
3101 }
91447636
A
3102
3103 if (tvp)
3104 vnode_rele(tvp);
3105 file_drop(uap->fd);
3106
1c79356b 3107 return (0);
91447636
A
3108out:
3109 vnode_put(vp);
3110 file_drop(uap->fd);
3111
3112 return(error);
1c79356b
A
3113}
3114
2d21ac55 3115int
b0d623f7 3116fchdir(proc_t p, struct fchdir_args *uap, __unused int32_t *retval)
2d21ac55
A
3117{
3118 return common_fchdir(p, uap, 0);
3119}
3120
3121int
b0d623f7 3122__pthread_fchdir(proc_t p, struct __pthread_fchdir_args *uap, __unused int32_t *retval)
2d21ac55
A
3123{
3124 return common_fchdir(p, (void *)uap, 1);
3125}
3126
1c79356b 3127/*
b0d623f7 3128 * Change current working directory (".").
2d21ac55
A
3129 *
3130 * Returns: 0 Success
3131 * change_dir:ENOTDIR
3132 * change_dir:???
3133 * vnode_ref:ENOENT No such file or directory
1c79356b 3134 */
1c79356b 3135/* ARGSUSED */
2d21ac55
A
3136static int
3137common_chdir(proc_t p, struct chdir_args *uap, int per_thread)
1c79356b 3138{
2d21ac55 3139 struct filedesc *fdp = p->p_fd;
1c79356b
A
3140 int error;
3141 struct nameidata nd;
2d21ac55
A
3142 vnode_t tvp;
3143 vfs_context_t ctx = vfs_context_current();
91447636 3144
39037602 3145 NDINIT(&nd, LOOKUP, OP_CHDIR, FOLLOW | AUDITVNPATH1,
2d21ac55
A
3146 UIO_USERSPACE, uap->path, ctx);
3147 error = change_dir(&nd, ctx);
55e303ae 3148 if (error)
1c79356b 3149 return (error);
91447636
A
3150 if ( (error = vnode_ref(nd.ni_vp)) ) {
3151 vnode_put(nd.ni_vp);
3152 return (error);
3153 }
3154 /*
3155 * drop the iocount we picked up in change_dir
3156 */
3157 vnode_put(nd.ni_vp);
3158
2d21ac55
A
3159 if (per_thread) {
3160 thread_t th = vfs_context_thread(ctx);
3161 if (th) {
3162 uthread_t uth = get_bsdthread_info(th);
3163 tvp = uth->uu_cdir;
3164 uth->uu_cdir = nd.ni_vp;
b0d623f7 3165 OSBitOrAtomic(P_THCWD, &p->p_flag);
2d21ac55
A
3166 } else {
3167 vnode_rele(nd.ni_vp);
3168 return (ENOENT);
3169 }
3170 } else {
3171 proc_fdlock(p);
3172 tvp = fdp->fd_cdir;
3173 fdp->fd_cdir = nd.ni_vp;
3174 proc_fdunlock(p);
3175 }
91447636
A
3176
3177 if (tvp)
3178 vnode_rele(tvp);
3179
1c79356b
A
3180 return (0);
3181}
3182
b0d623f7
A
3183
3184/*
3185 * chdir
3186 *
3187 * Change current working directory (".") for the entire process
3188 *
3189 * Parameters: p Process requesting the call
3190 * uap User argument descriptor (see below)
3191 * retval (ignored)
3192 *
3193 * Indirect parameters: uap->path Directory path
3194 *
3195 * Returns: 0 Success
3196 * common_chdir: ENOTDIR
3197 * common_chdir: ENOENT No such file or directory
3198 * common_chdir: ???
3199 *
3200 */
2d21ac55 3201int
b0d623f7 3202chdir(proc_t p, struct chdir_args *uap, __unused int32_t *retval)
2d21ac55
A
3203{
3204 return common_chdir(p, (void *)uap, 0);
3205}
3206
b0d623f7
A
3207/*
3208 * __pthread_chdir
3209 *
3210 * Change current working directory (".") for a single thread
3211 *
3212 * Parameters: p Process requesting the call
3213 * uap User argument descriptor (see below)
3214 * retval (ignored)
3215 *
3216 * Indirect parameters: uap->path Directory path
3217 *
3218 * Returns: 0 Success
3219 * common_chdir: ENOTDIR
3220 * common_chdir: ENOENT No such file or directory
3221 * common_chdir: ???
3222 *
3223 */
2d21ac55 3224int
b0d623f7 3225__pthread_chdir(proc_t p, struct __pthread_chdir_args *uap, __unused int32_t *retval)
2d21ac55
A
3226{
3227 return common_chdir(p, (void *)uap, 1);
3228}
3229
3230
1c79356b
A
3231/*
3232 * Change notion of root (``/'') directory.
3233 */
1c79356b
A
3234/* ARGSUSED */
3235int
b0d623f7 3236chroot(proc_t p, struct chroot_args *uap, __unused int32_t *retval)
1c79356b 3237{
2d21ac55 3238 struct filedesc *fdp = p->p_fd;
1c79356b
A
3239 int error;
3240 struct nameidata nd;
2d21ac55
A
3241 vnode_t tvp;
3242 vfs_context_t ctx = vfs_context_current();
1c79356b 3243
91447636 3244 if ((error = suser(kauth_cred_get(), &p->p_acflag)))
1c79356b
A
3245 return (error);
3246
39037602 3247 NDINIT(&nd, LOOKUP, OP_CHROOT, FOLLOW | AUDITVNPATH1,
2d21ac55
A
3248 UIO_USERSPACE, uap->path, ctx);
3249 error = change_dir(&nd, ctx);
55e303ae 3250 if (error)
1c79356b
A
3251 return (error);
3252
2d21ac55
A
3253#if CONFIG_MACF
3254 error = mac_vnode_check_chroot(ctx, nd.ni_vp,
3255 &nd.ni_cnd);
3256 if (error) {
91447636
A
3257 vnode_put(nd.ni_vp);
3258 return (error);
3259 }
2d21ac55
A
3260#endif
3261
91447636
A
3262 if ( (error = vnode_ref(nd.ni_vp)) ) {
3263 vnode_put(nd.ni_vp);
1c79356b
A
3264 return (error);
3265 }
91447636 3266 vnode_put(nd.ni_vp);
1c79356b 3267
91447636 3268 proc_fdlock(p);
fa4905b1 3269 tvp = fdp->fd_rdir;
1c79356b 3270 fdp->fd_rdir = nd.ni_vp;
91447636
A
3271 fdp->fd_flags |= FD_CHROOT;
3272 proc_fdunlock(p);
3273
fa4905b1 3274 if (tvp != NULL)
91447636
A
3275 vnode_rele(tvp);
3276
1c79356b
A
3277 return (0);
3278}
3279
3280/*
3281 * Common routine for chroot and chdir.
2d21ac55
A
3282 *
3283 * Returns: 0 Success
3284 * ENOTDIR Not a directory
3285 * namei:??? [anything namei can return]
3286 * vnode_authorize:??? [anything vnode_authorize can return]
1c79356b
A
3287 */
3288static int
91447636 3289change_dir(struct nameidata *ndp, vfs_context_t ctx)
1c79356b 3290{
2d21ac55 3291 vnode_t vp;
1c79356b
A
3292 int error;
3293
91447636 3294 if ((error = namei(ndp)))
1c79356b 3295 return (error);
91447636 3296 nameidone(ndp);
1c79356b 3297 vp = ndp->ni_vp;
2d21ac55
A
3298
3299 if (vp->v_type != VDIR) {
91447636 3300 vnode_put(vp);
2d21ac55
A
3301 return (ENOTDIR);
3302 }
3303
3304#if CONFIG_MACF
3305 error = mac_vnode_check_chdir(ctx, vp);
3306 if (error) {
3307 vnode_put(vp);
3308 return (error);
3309 }
3310#endif
3311
3312 error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
3313 if (error) {
3314 vnode_put(vp);
3315 return (error);
3316 }
91447636 3317
1c79356b
A
3318 return (error);
3319}
3320
fe8ab488
A
3321/*
3322 * Free the vnode data (for directories) associated with the file glob.
3323 */
3324struct fd_vn_data *
3325fg_vn_data_alloc(void)
3326{
3327 struct fd_vn_data *fvdata;
3328
3329 /* Allocate per fd vnode data */
3330 MALLOC(fvdata, struct fd_vn_data *, (sizeof(struct fd_vn_data)),
3331 M_FD_VN_DATA, M_WAITOK | M_ZERO);
3332 lck_mtx_init(&fvdata->fv_lock, fd_vn_lck_grp, fd_vn_lck_attr);
3333 return fvdata;
3334}
3335
3336/*
3337 * Free the vnode data (for directories) associated with the file glob.
3338 */
3339void
3340fg_vn_data_free(void *fgvndata)
3341{
3342 struct fd_vn_data *fvdata = (struct fd_vn_data *)fgvndata;
3343
3344 if (fvdata->fv_buf)
3345 FREE(fvdata->fv_buf, M_FD_DIRBUF);
3346 lck_mtx_destroy(&fvdata->fv_lock, fd_vn_lck_grp);
3347 FREE(fvdata, M_FD_VN_DATA);
3348}
3349
1c79356b
A
3350/*
3351 * Check permissions, allocate an open file structure,
3352 * and call the device open routine if any.
2d21ac55
A
3353 *
3354 * Returns: 0 Success
3355 * EINVAL
3356 * EINTR
3357 * falloc:ENFILE
3358 * falloc:EMFILE
3359 * falloc:ENOMEM
3360 * vn_open_auth:???
3361 * dupfdopen:???
3362 * VNOP_ADVLOCK:???
3363 * vnode_setsize:???
b0d623f7
A
3364 *
3365 * XXX Need to implement uid, gid
1c79356b 3366 */
2d21ac55 3367int
39236c6e
A
3368open1(vfs_context_t ctx, struct nameidata *ndp, int uflags,
3369 struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra,
3370 int32_t *retval)
1c79356b 3371{
2d21ac55
A
3372 proc_t p = vfs_context_proc(ctx);
3373 uthread_t uu = get_bsdthread_info(vfs_context_thread(ctx));
2d21ac55
A
3374 struct fileproc *fp;
3375 vnode_t vp;
91447636 3376 int flags, oflags;
1c79356b
A
3377 int type, indx, error;
3378 struct flock lf;
3e170ce0 3379 struct vfs_context context;
ccc36f2f 3380
91447636 3381 oflags = uflags;
ccc36f2f
A
3382
3383 if ((oflags & O_ACCMODE) == O_ACCMODE)
3384 return(EINVAL);
3e170ce0 3385
91447636 3386 flags = FFLAGS(uflags);
3e170ce0
A
3387 CLR(flags, FENCRYPTED);
3388 CLR(flags, FUNENCRYPTED);
91447636
A
3389
3390 AUDIT_ARG(fflags, oflags);
3391 AUDIT_ARG(mode, vap->va_mode);
3392
39236c6e
A
3393 if ((error = falloc_withalloc(p,
3394 &fp, &indx, ctx, fp_zalloc, cra)) != 0) {
1c79356b 3395 return (error);
91447636 3396 }
2d21ac55 3397 uu->uu_dupfd = -indx - 1;
91447636 3398
2d21ac55
A
3399 if ((error = vn_open_auth(ndp, &flags, vap))) {
3400 if ((error == ENODEV || error == ENXIO) && (uu->uu_dupfd >= 0)){ /* XXX from fdopen */
39236c6e 3401 if ((error = dupfdopen(p->p_fd, indx, uu->uu_dupfd, flags, error)) == 0) {
2d21ac55 3402 fp_drop(p, indx, NULL, 0);
91447636
A
3403 *retval = indx;
3404 return (0);
3405 }
1c79356b
A
3406 }
3407 if (error == ERESTART)
91447636
A
3408 error = EINTR;
3409 fp_free(p, indx, fp);
1c79356b
A
3410 return (error);
3411 }
2d21ac55
A
3412 uu->uu_dupfd = 0;
3413 vp = ndp->ni_vp;
55e303ae 3414
3e170ce0 3415 fp->f_fglob->fg_flag = flags & (FMASK | O_EVTONLY | FENCRYPTED | FUNENCRYPTED);
91447636
A
3416 fp->f_fglob->fg_ops = &vnops;
3417 fp->f_fglob->fg_data = (caddr_t)vp;
3418
1c79356b
A
3419 if (flags & (O_EXLOCK | O_SHLOCK)) {
3420 lf.l_whence = SEEK_SET;
3421 lf.l_start = 0;
3422 lf.l_len = 0;
3423 if (flags & O_EXLOCK)
3424 lf.l_type = F_WRLCK;
3425 else
3426 lf.l_type = F_RDLCK;
3427 type = F_FLOCK;
3428 if ((flags & FNONBLOCK) == 0)
3429 type |= F_WAIT;
2d21ac55
A
3430#if CONFIG_MACF
3431 error = mac_file_check_lock(vfs_context_ucred(ctx), fp->f_fglob,
3432 F_SETLK, &lf);
3433 if (error)
3434 goto bad;
3435#endif
39236c6e 3436 if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx, NULL)))
55e303ae 3437 goto bad;
91447636 3438 fp->f_fglob->fg_flag |= FHASLOCK;
1c79356b 3439 }
55e303ae 3440
00867663
A
3441#if DEVELOPMENT || DEBUG
3442 /*
3443 * XXX VSWAP: Check for entitlements or special flag here
3444 * so we can restrict access appropriately.
3445 */
3446#else /* DEVELOPMENT || DEBUG */
3447
3448 if (vnode_isswap(vp) && (flags & (FWRITE | O_TRUNC)) && (ctx != vfs_context_kernel())) {
3449 /* block attempt to write/truncate swapfile */
3450 error = EPERM;
3451 goto bad;
3452 }
3453#endif /* DEVELOPMENT || DEBUG */
3454
91447636
A
3455 /* try to truncate by setting the size attribute */
3456 if ((flags & O_TRUNC) && ((error = vnode_setsize(vp, (off_t)0, 0, ctx)) != 0))
3457 goto bad;
55e303ae 3458
fe8ab488
A
3459 /*
3460 * For directories we hold some additional information in the fd.
3461 */
3462 if (vnode_vtype(vp) == VDIR) {
3463 fp->f_fglob->fg_vn_data = fg_vn_data_alloc();
3464 } else {
3465 fp->f_fglob->fg_vn_data = NULL;
2d21ac55
A
3466 }
3467
91447636 3468 vnode_put(vp);
55e303ae 3469
3e170ce0
A
3470 /*
3471 * The first terminal open (without a O_NOCTTY) by a session leader
3472 * results in it being set as the controlling terminal.
3473 */
3474 if (vnode_istty(vp) && !(p->p_flag & P_CONTROLT) &&
3475 !(flags & O_NOCTTY)) {
3476 int tmp = 0;
3477
3478 (void)(*fp->f_fglob->fg_ops->fo_ioctl)(fp, (int)TIOCSCTTY,
3479 (caddr_t)&tmp, ctx);
3480 }
3481
91447636 3482 proc_fdlock(p);
6d2010ae
A
3483 if (flags & O_CLOEXEC)
3484 *fdflags(p, indx) |= UF_EXCLOSE;
39236c6e
A
3485 if (flags & O_CLOFORK)
3486 *fdflags(p, indx) |= UF_FORKCLOSE;
6601e61a 3487 procfdtbl_releasefd(p, indx, NULL);
39037602
A
3488
3489#if CONFIG_SECLUDED_MEMORY
3490 if (secluded_for_filecache &&
3491 FILEGLOB_DTYPE(fp->f_fglob) == DTYPE_VNODE &&
3492 vnode_vtype(vp) == VREG) {
3493 memory_object_control_t moc;
3494
3495 moc = ubc_getobject(vp, UBC_FLAGS_NONE);
3496
3497 if (moc == MEMORY_OBJECT_CONTROL_NULL) {
3498 /* nothing to do... */
3499 } else if (fp->f_fglob->fg_flag & FWRITE) {
3500 /* writable -> no longer eligible for secluded pages */
3501 memory_object_mark_eligible_for_secluded(moc,
3502 FALSE);
3503 } else if (secluded_for_filecache == 1) {
3504 char pathname[32] = { 0, };
3505 size_t copied;
3506 /* XXX FBDP: better way to detect /Applications/ ? */
3507 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
3508 copyinstr(ndp->ni_dirp,
3509 pathname,
3510 sizeof (pathname),
3511 &copied);
3512 } else {
3513 copystr(CAST_DOWN(void *, ndp->ni_dirp),
3514 pathname,
3515 sizeof (pathname),
3516 &copied);
3517 }
3518 pathname[sizeof (pathname) - 1] = '\0';
3519 if (strncmp(pathname,
3520 "/Applications/",
3521 strlen("/Applications/")) == 0 &&
3522 strncmp(pathname,
3523 "/Applications/Camera.app/",
3524 strlen("/Applications/Camera.app/")) != 0) {
3525 /*
3526 * not writable
3527 * AND from "/Applications/"
3528 * AND not from "/Applications/Camera.app/"
3529 * ==> eligible for secluded
3530 */
3531 memory_object_mark_eligible_for_secluded(moc,
3532 TRUE);
3533 }
3534 } else if (secluded_for_filecache == 2) {
3535/* not implemented... */
3536 if (!strncmp(vp->v_name,
3537 DYLD_SHARED_CACHE_NAME,
3538 strlen(DYLD_SHARED_CACHE_NAME)) ||
3539 !strncmp(vp->v_name,
3540 "dyld",
3541 strlen(vp->v_name)) ||
3542 !strncmp(vp->v_name,
3543 "launchd",
3544 strlen(vp->v_name)) ||
3545 !strncmp(vp->v_name,
3546 "Camera",
3547 strlen(vp->v_name)) ||
3548 !strncmp(vp->v_name,
3549 "mediaserverd",
3550 strlen(vp->v_name))) {
3551 /*
3552 * This file matters when launching Camera:
3553 * do not store its contents in the secluded
3554 * pool that will be drained on Camera launch.
3555 */
3556 memory_object_mark_eligible_for_secluded(moc,
3557 FALSE);
3558 }
3559 }
3560 }
3561#endif /* CONFIG_SECLUDED_MEMORY */
3562
91447636
A
3563 fp_drop(p, indx, fp, 1);
3564 proc_fdunlock(p);
3565
1c79356b 3566 *retval = indx;
91447636 3567
1c79356b 3568 return (0);
55e303ae 3569bad:
3e170ce0 3570 context = *vfs_context_current();
2d21ac55 3571 context.vc_ucred = fp->f_fglob->fg_cred;
39037602 3572
fe8ab488
A
3573 if ((fp->f_fglob->fg_flag & FHASLOCK) &&
3574 (FILEGLOB_DTYPE(fp->f_fglob) == DTYPE_VNODE)) {
3575 lf.l_whence = SEEK_SET;
3576 lf.l_start = 0;
3577 lf.l_len = 0;
3578 lf.l_type = F_UNLCK;
39037602 3579
fe8ab488
A
3580 (void)VNOP_ADVLOCK(
3581 vp, (caddr_t)fp->f_fglob, F_UNLCK, &lf, F_FLOCK, ctx, NULL);
3582 }
2d21ac55
A
3583
3584 vn_close(vp, fp->f_fglob->fg_flag, &context);
91447636
A
3585 vnode_put(vp);
3586 fp_free(p, indx, fp);
3587
55e303ae 3588 return (error);
1c79356b
A
3589}
3590
fe8ab488
A
3591/*
3592 * While most of the *at syscall handlers can call nameiat() which
3593 * is a wrapper around namei, the use of namei and initialisation
3594 * of nameidata are far removed and in different functions - namei
3595 * gets called in vn_open_auth for open1. So we'll just do here what
3596 * nameiat() does.
3597 */
3598static int
3599open1at(vfs_context_t ctx, struct nameidata *ndp, int uflags,
3600 struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra, int32_t *retval,
3601 int dirfd)
3602{
3603 if ((dirfd != AT_FDCWD) && !(ndp->ni_cnd.cn_flags & USEDVP)) {
3604 int error;
3605 char c;
3606
3607 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
3608 error = copyin(ndp->ni_dirp, &c, sizeof(char));
3609 if (error)
3610 return (error);
3611 } else {
3612 c = *((char *)(ndp->ni_dirp));
3613 }
3614
3615 if (c != '/') {
3616 vnode_t dvp_at;
3617
3618 error = vnode_getfromfd(ndp->ni_cnd.cn_context, dirfd,
3619 &dvp_at);
3620 if (error)
3621 return (error);
3622
3623 if (vnode_vtype(dvp_at) != VDIR) {
3624 vnode_put(dvp_at);
3625 return (ENOTDIR);
3626 }
3627
3628 ndp->ni_dvp = dvp_at;
3629 ndp->ni_cnd.cn_flags |= USEDVP;
3630 error = open1(ctx, ndp, uflags, vap, fp_zalloc, cra,
3631 retval);
3632 vnode_put(dvp_at);
3633 return (error);
3634 }
3635 }
3636
3637 return (open1(ctx, ndp, uflags, vap, fp_zalloc, cra, retval));
3638}
3639
0c530ab8 3640/*
b0d623f7 3641 * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)).
0c530ab8
A
3642 *
3643 * Parameters: p Process requesting the open
3644 * uap User argument descriptor (see below)
3645 * retval Pointer to an area to receive the
3646 * return calue from the system call
3647 *
3648 * Indirect: uap->path Path to open (same as 'open')
3649 * uap->flags Flags to open (same as 'open'
3650 * uap->uid UID to set, if creating
3651 * uap->gid GID to set, if creating
3652 * uap->mode File mode, if creating (same as 'open')
3653 * uap->xsecurity ACL to set, if creating
3654 *
3655 * Returns: 0 Success
3656 * !0 errno value
3657 *
3658 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
3659 *
3660 * XXX: We should enummerate the possible errno values here, and where
3661 * in the code they originated.
3662 */
1c79356b 3663int
b0d623f7 3664open_extended(proc_t p, struct open_extended_args *uap, int32_t *retval)
91447636 3665{
2d21ac55 3666 struct filedesc *fdp = p->p_fd;
91447636
A
3667 int ciferror;
3668 kauth_filesec_t xsecdst;
3669 struct vnode_attr va;
2d21ac55 3670 struct nameidata nd;
91447636
A
3671 int cmode;
3672
b0d623f7
A
3673 AUDIT_ARG(owner, uap->uid, uap->gid);
3674
91447636
A
3675 xsecdst = NULL;
3676 if ((uap->xsecurity != USER_ADDR_NULL) &&
3677 ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
3678 return ciferror;
3679
91447636
A
3680 VATTR_INIT(&va);
3681 cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3682 VATTR_SET(&va, va_mode, cmode);
3683 if (uap->uid != KAUTH_UID_NONE)
3684 VATTR_SET(&va, va_uid, uap->uid);
3685 if (uap->gid != KAUTH_GID_NONE)
3686 VATTR_SET(&va, va_gid, uap->gid);
3687 if (xsecdst != NULL)
3688 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
3689
6d2010ae
A
3690 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
3691 uap->path, vfs_context_current());
2d21ac55 3692
39236c6e
A
3693 ciferror = open1(vfs_context_current(), &nd, uap->flags, &va,
3694 fileproc_alloc_init, NULL, retval);
91447636
A
3695 if (xsecdst != NULL)
3696 kauth_filesec_free(xsecdst);
3697
3698 return ciferror;
3699}
3700
39037602 3701/*
316670eb 3702 * Go through the data-protected atomically controlled open (2)
39037602 3703 *
316670eb
A
3704 * int open_dprotected_np(user_addr_t path, int flags, int class, int dpflags, int mode)
3705 */
3706int open_dprotected_np (__unused proc_t p, struct open_dprotected_np_args *uap, int32_t *retval) {
3707 int flags = uap->flags;
3708 int class = uap->class;
3709 int dpflags = uap->dpflags;
3710
39037602 3711 /*
316670eb
A
3712 * Follow the same path as normal open(2)
3713 * Look up the item if it exists, and acquire the vnode.
3714 */
3715 struct filedesc *fdp = p->p_fd;
3716 struct vnode_attr va;
3717 struct nameidata nd;
3718 int cmode;
3719 int error;
39037602 3720
316670eb
A
3721 VATTR_INIT(&va);
3722 /* Mask off all but regular access permissions */
3723 cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3724 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
3725
3726 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
3727 uap->path, vfs_context_current());
3728
39037602
A
3729 /*
3730 * Initialize the extra fields in vnode_attr to pass down our
316670eb
A
3731 * extra fields.
3732 * 1. target cprotect class.
39037602
A
3733 * 2. set a flag to mark it as requiring open-raw-encrypted semantics.
3734 */
3735 if (flags & O_CREAT) {
3e170ce0
A
3736 /* lower level kernel code validates that the class is valid before applying it. */
3737 if (class != PROTECTION_CLASS_DEFAULT) {
3738 /*
3739 * PROTECTION_CLASS_DEFAULT implies that we make the class for this
3740 * file behave the same as open (2)
3741 */
3742 VATTR_SET(&va, va_dataprotect_class, class);
3743 }
316670eb 3744 }
39037602 3745
3e170ce0 3746 if (dpflags & (O_DP_GETRAWENCRYPTED|O_DP_GETRAWUNENCRYPTED)) {
316670eb
A
3747 if ( flags & (O_RDWR | O_WRONLY)) {
3748 /* Not allowed to write raw encrypted bytes */
39037602
A
3749 return EINVAL;
3750 }
3e170ce0
A
3751 if (uap->dpflags & O_DP_GETRAWENCRYPTED) {
3752 VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED);
3753 }
3754 if (uap->dpflags & O_DP_GETRAWUNENCRYPTED) {
3755 VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWUNENCRYPTED);
3756 }
316670eb
A
3757 }
3758
39236c6e
A
3759 error = open1(vfs_context_current(), &nd, uap->flags, &va,
3760 fileproc_alloc_init, NULL, retval);
316670eb
A
3761
3762 return error;
3763}
3764
fe8ab488
A
3765static int
3766openat_internal(vfs_context_t ctx, user_addr_t path, int flags, int mode,
3767 int fd, enum uio_seg segflg, int *retval)
2d21ac55 3768{
fe8ab488 3769 struct filedesc *fdp = (vfs_context_proc(ctx))->p_fd;
91447636 3770 struct vnode_attr va;
2d21ac55 3771 struct nameidata nd;
91447636 3772 int cmode;
1c79356b 3773
91447636
A
3774 VATTR_INIT(&va);
3775 /* Mask off all but regular access permissions */
fe8ab488 3776 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
91447636
A
3777 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
3778
fe8ab488
A
3779 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1,
3780 segflg, path, ctx);
2d21ac55 3781
fe8ab488
A
3782 return (open1at(ctx, &nd, flags, &va, fileproc_alloc_init, NULL,
3783 retval, fd));
1c79356b 3784}
91447636 3785
fe8ab488
A
3786int
3787open(proc_t p, struct open_args *uap, int32_t *retval)
3788{
3789 __pthread_testcancel(1);
3790 return(open_nocancel(p, (struct open_nocancel_args *)uap, retval));
3791}
1c79356b 3792
fe8ab488
A
3793int
3794open_nocancel(__unused proc_t p, struct open_nocancel_args *uap,
3795 int32_t *retval)
3796{
3797 return (openat_internal(vfs_context_current(), uap->path, uap->flags,
3798 uap->mode, AT_FDCWD, UIO_USERSPACE, retval));
3799}
91447636 3800
1c79356b 3801int
fe8ab488
A
3802openat_nocancel(__unused proc_t p, struct openat_nocancel_args *uap,
3803 int32_t *retval)
1c79356b 3804{
fe8ab488
A
3805 return (openat_internal(vfs_context_current(), uap->path, uap->flags,
3806 uap->mode, uap->fd, UIO_USERSPACE, retval));
3807}
91447636 3808
fe8ab488
A
3809int
3810openat(proc_t p, struct openat_args *uap, int32_t *retval)
3811{
3812 __pthread_testcancel(1);
3813 return(openat_nocancel(p, (struct openat_nocancel_args *)uap, retval));
3814}
3815
3816/*
3817 * openbyid_np: open a file given a file system id and a file system object id
3818 * the hfs file system object id is an fsobj_id_t {uint32, uint32}
3819 * file systems that don't support object ids it is a node id (uint64_t).
3820 *
3821 * Parameters: p Process requesting the open
3822 * uap User argument descriptor (see below)
3823 * retval Pointer to an area to receive the
3824 * return calue from the system call
3825 *
3826 * Indirect: uap->path Path to open (same as 'open')
3827 *
3828 * uap->fsid id of target file system
3829 * uap->objid id of target file system object
3830 * uap->flags Flags to open (same as 'open')
3831 *
3832 * Returns: 0 Success
3833 * !0 errno value
3834 *
3835 *
3836 * XXX: We should enummerate the possible errno values here, and where
3837 * in the code they originated.
3838 */
3839int
3840openbyid_np(__unused proc_t p, struct openbyid_np_args *uap, int *retval)
3841{
3842 fsid_t fsid;
3843 uint64_t objid;
3844 int error;
3845 char *buf = NULL;
3846 int buflen = MAXPATHLEN;
3847 int pathlen = 0;
3848 vfs_context_t ctx = vfs_context_current();
3849
490019cf
A
3850 if ((error = priv_check_cred(vfs_context_ucred(ctx), PRIV_VFS_OPEN_BY_ID, 0))) {
3851 return (error);
3852 }
3853
fe8ab488
A
3854 if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
3855 return (error);
3856 }
3857
3858 /*uap->obj is an fsobj_id_t defined as struct {uint32_t, uint32_t} */
3859 if ((error = copyin(uap->objid, (caddr_t)&objid, sizeof(uint64_t)))) {
3860 return (error);
3861 }
3862
3863 AUDIT_ARG(value32, fsid.val[0]);
3864 AUDIT_ARG(value64, objid);
3865
3866 /*resolve path from fsis, objid*/
3867 do {
3868 MALLOC(buf, char *, buflen + 1, M_TEMP, M_WAITOK);
3869 if (buf == NULL) {
3870 return (ENOMEM);
3871 }
3872
3873 error = fsgetpath_internal(
3874 ctx, fsid.val[0], objid,
3875 buflen, buf, &pathlen);
3876
3877 if (error) {
3878 FREE(buf, M_TEMP);
3879 buf = NULL;
3880 }
3881 } while (error == ENOSPC && (buflen += MAXPATHLEN));
3882
3883 if (error) {
3884 return error;
3885 }
3886
3887 buf[pathlen] = 0;
3888
3889 error = openat_internal(
3890 ctx, (user_addr_t)buf, uap->oflags, 0, AT_FDCWD, UIO_SYSSPACE, retval);
3891
3892 FREE(buf, M_TEMP);
3893
3894 return error;
3895}
3896
3897
3898/*
3899 * Create a special file.
3900 */
3901static int mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap);
3902
3903int
3904mknod(proc_t p, struct mknod_args *uap, __unused int32_t *retval)
3905{
3906 struct vnode_attr va;
3907 vfs_context_t ctx = vfs_context_current();
3908 int error;
3909 struct nameidata nd;
3910 vnode_t vp, dvp;
3911
3912 VATTR_INIT(&va);
3913 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
3914 VATTR_SET(&va, va_rdev, uap->dev);
91447636
A
3915
3916 /* If it's a mknod() of a FIFO, call mkfifo1() instead */
3917 if ((uap->mode & S_IFMT) == S_IFIFO)
2d21ac55 3918 return(mkfifo1(ctx, uap->path, &va));
1c79356b 3919
55e303ae 3920 AUDIT_ARG(mode, uap->mode);
b0d623f7 3921 AUDIT_ARG(value32, uap->dev);
91447636 3922
2d21ac55 3923 if ((error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
1c79356b 3924 return (error);
39037602 3925 NDINIT(&nd, CREATE, OP_MKNOD, LOCKPARENT | AUDITVNPATH1,
2d21ac55 3926 UIO_USERSPACE, uap->path, ctx);
55e303ae
A
3927 error = namei(&nd);
3928 if (error)
1c79356b 3929 return (error);
91447636 3930 dvp = nd.ni_dvp;
1c79356b 3931 vp = nd.ni_vp;
91447636
A
3932
3933 if (vp != NULL) {
1c79356b 3934 error = EEXIST;
91447636 3935 goto out;
1c79356b 3936 }
55e303ae 3937
91447636 3938 switch (uap->mode & S_IFMT) {
91447636
A
3939 case S_IFCHR:
3940 VATTR_SET(&va, va_type, VCHR);
3941 break;
3942 case S_IFBLK:
3943 VATTR_SET(&va, va_type, VBLK);
3944 break;
91447636
A
3945 default:
3946 error = EINVAL;
3947 goto out;
3948 }
2d21ac55
A
3949
3950#if CONFIG_MACF
6d2010ae
A
3951 error = mac_vnode_check_create(ctx,
3952 nd.ni_dvp, &nd.ni_cnd, &va);
3953 if (error)
3954 goto out;
2d21ac55
A
3955#endif
3956
3957 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
3958 goto out;
3959
6d2010ae 3960 if ((error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx)) != 0)
91447636
A
3961 goto out;
3962
3963 if (vp) {
3964 int update_flags = 0;
3965
3966 // Make sure the name & parent pointers are hooked up
3967 if (vp->v_name == NULL)
3968 update_flags |= VNODE_UPDATE_NAME;
3969 if (vp->v_parent == NULLVP)
3970 update_flags |= VNODE_UPDATE_PARENT;
3971
3972 if (update_flags)
3973 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
3974
2d21ac55
A
3975#if CONFIG_FSE
3976 add_fsevent(FSE_CREATE_FILE, ctx,
91447636
A
3977 FSE_ARG_VNODE, vp,
3978 FSE_ARG_DONE);
2d21ac55 3979#endif
1c79356b 3980 }
91447636
A
3981
3982out:
3983 /*
3984 * nameidone has to happen before we vnode_put(dvp)
3985 * since it may need to release the fs_nodelock on the dvp
3986 */
3987 nameidone(&nd);
3988
3989 if (vp)
3990 vnode_put(vp);
3991 vnode_put(dvp);
3992
1c79356b
A
3993 return (error);
3994}
3995
3996/*
3997 * Create a named pipe.
2d21ac55
A
3998 *
3999 * Returns: 0 Success
4000 * EEXIST
4001 * namei:???
4002 * vnode_authorize:???
4003 * vn_create:???
1c79356b 4004 */
91447636
A
4005static int
4006mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap)
1c79356b 4007{
91447636 4008 vnode_t vp, dvp;
1c79356b
A
4009 int error;
4010 struct nameidata nd;
55e303ae 4011
39037602 4012 NDINIT(&nd, CREATE, OP_MKFIFO, LOCKPARENT | AUDITVNPATH1,
91447636 4013 UIO_USERSPACE, upath, ctx);
55e303ae
A
4014 error = namei(&nd);
4015 if (error)
1c79356b 4016 return (error);
91447636
A
4017 dvp = nd.ni_dvp;
4018 vp = nd.ni_vp;
4019
4020 /* check that this is a new file and authorize addition */
4021 if (vp != NULL) {
4022 error = EEXIST;
4023 goto out;
4024 }
2d21ac55
A
4025 VATTR_SET(vap, va_type, VFIFO);
4026
6d2010ae 4027 if ((error = vn_authorize_create(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0)
2d21ac55 4028 goto out;
2d21ac55 4029
6d2010ae 4030 error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx);
91447636
A
4031out:
4032 /*
4033 * nameidone has to happen before we vnode_put(dvp)
4034 * since it may need to release the fs_nodelock on the dvp
4035 */
4036 nameidone(&nd);
4037
4038 if (vp)
4039 vnode_put(vp);
4040 vnode_put(dvp);
4041
55e303ae 4042 return error;
91447636
A
4043}
4044
0c530ab8
A
4045
4046/*
b0d623f7 4047 * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)).
0c530ab8
A
4048 *
4049 * Parameters: p Process requesting the open
4050 * uap User argument descriptor (see below)
4051 * retval (Ignored)
4052 *
4053 * Indirect: uap->path Path to fifo (same as 'mkfifo')
4054 * uap->uid UID to set
4055 * uap->gid GID to set
4056 * uap->mode File mode to set (same as 'mkfifo')
4057 * uap->xsecurity ACL to set, if creating
4058 *
4059 * Returns: 0 Success
4060 * !0 errno value
4061 *
4062 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
4063 *
4064 * XXX: We should enummerate the possible errno values here, and where
4065 * in the code they originated.
4066 */
91447636 4067int
b0d623f7 4068mkfifo_extended(proc_t p, struct mkfifo_extended_args *uap, __unused int32_t *retval)
91447636
A
4069{
4070 int ciferror;
4071 kauth_filesec_t xsecdst;
91447636
A
4072 struct vnode_attr va;
4073
b0d623f7
A
4074 AUDIT_ARG(owner, uap->uid, uap->gid);
4075
91447636
A
4076 xsecdst = KAUTH_FILESEC_NONE;
4077 if (uap->xsecurity != USER_ADDR_NULL) {
4078 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
4079 return ciferror;
4080 }
4081
91447636
A
4082 VATTR_INIT(&va);
4083 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
4084 if (uap->uid != KAUTH_UID_NONE)
4085 VATTR_SET(&va, va_uid, uap->uid);
4086 if (uap->gid != KAUTH_GID_NONE)
4087 VATTR_SET(&va, va_gid, uap->gid);
4088 if (xsecdst != KAUTH_FILESEC_NONE)
4089 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
4090
2d21ac55 4091 ciferror = mkfifo1(vfs_context_current(), uap->path, &va);
91447636
A
4092
4093 if (xsecdst != KAUTH_FILESEC_NONE)
4094 kauth_filesec_free(xsecdst);
4095 return ciferror;
4096}
4097
4098/* ARGSUSED */
4099int
b0d623f7 4100mkfifo(proc_t p, struct mkfifo_args *uap, __unused int32_t *retval)
91447636 4101{
91447636
A
4102 struct vnode_attr va;
4103
91447636
A
4104 VATTR_INIT(&va);
4105 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
4106
2d21ac55 4107 return(mkfifo1(vfs_context_current(), uap->path, &va));
1c79356b
A
4108}
4109
b0d623f7
A
4110
4111static char *
4112my_strrchr(char *p, int ch)
4113{
4114 char *save;
4115
4116 for (save = NULL;; ++p) {
4117 if (*p == ch)
4118 save = p;
4119 if (!*p)
4120 return(save);
4121 }
4122 /* NOTREACHED */
4123}
4124
4125extern int safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path);
4126
4127int
4128safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path)
4129{
4130 int ret, len = _len;
4131
4132 *truncated_path = 0;
4133 ret = vn_getpath(dvp, path, &len);
4134 if (ret == 0 && len < (MAXPATHLEN - 1)) {
4135 if (leafname) {
4136 path[len-1] = '/';
4137 len += strlcpy(&path[len], leafname, MAXPATHLEN-len) + 1;
4138 if (len > MAXPATHLEN) {
4139 char *ptr;
39037602 4140
b0d623f7
A
4141 // the string got truncated!
4142 *truncated_path = 1;
4143 ptr = my_strrchr(path, '/');
4144 if (ptr) {
4145 *ptr = '\0'; // chop off the string at the last directory component
4146 }
4147 len = strlen(path) + 1;
4148 }
4149 }
4150 } else if (ret == 0) {
4151 *truncated_path = 1;
4152 } else if (ret != 0) {
4153 struct vnode *mydvp=dvp;
4154
4155 if (ret != ENOSPC) {
4156 printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n",
4157 dvp, dvp->v_name ? dvp->v_name : "no-name", ret);
39037602 4158 }
b0d623f7 4159 *truncated_path = 1;
39037602 4160
b0d623f7
A
4161 do {
4162 if (mydvp->v_parent != NULL) {
4163 mydvp = mydvp->v_parent;
4164 } else if (mydvp->v_mount) {
4165 strlcpy(path, mydvp->v_mount->mnt_vfsstat.f_mntonname, _len);
4166 break;
4167 } else {
4168 // no parent and no mount point? only thing is to punt and say "/" changed
4169 strlcpy(path, "/", _len);
4170 len = 2;
4171 mydvp = NULL;
4172 }
39037602 4173
b0d623f7
A
4174 if (mydvp == NULL) {
4175 break;
4176 }
4177
4178 len = _len;
4179 ret = vn_getpath(mydvp, path, &len);
4180 } while (ret == ENOSPC);
4181 }
4182
4183 return len;
4184}
4185
4186
1c79356b
A
4187/*
4188 * Make a hard file link.
2d21ac55
A
4189 *
4190 * Returns: 0 Success
4191 * EPERM
4192 * EEXIST
4193 * EXDEV
4194 * namei:???
4195 * vnode_authorize:???
4196 * VNOP_LINK:???
1c79356b 4197 */
1c79356b 4198/* ARGSUSED */
fe8ab488
A
4199static int
4200linkat_internal(vfs_context_t ctx, int fd1, user_addr_t path, int fd2,
4201 user_addr_t link, int flag, enum uio_seg segflg)
1c79356b 4202{
91447636 4203 vnode_t vp, dvp, lvp;
1c79356b 4204 struct nameidata nd;
fe8ab488 4205 int follow;
1c79356b 4206 int error;
b0d623f7 4207#if CONFIG_FSE
91447636 4208 fse_info finfo;
b0d623f7 4209#endif
91447636 4210 int need_event, has_listeners;
2d21ac55 4211 char *target_path = NULL;
b0d623f7 4212 int truncated=0;
1c79356b 4213
91447636
A
4214 vp = dvp = lvp = NULLVP;
4215
4216 /* look up the object we are linking to */
fe8ab488
A
4217 follow = (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW;
4218 NDINIT(&nd, LOOKUP, OP_LOOKUP, AUDITVNPATH1 | follow,
4219 segflg, path, ctx);
4220
4221 error = nameiat(&nd, fd1);
55e303ae 4222 if (error)
1c79356b
A
4223 return (error);
4224 vp = nd.ni_vp;
91447636
A
4225
4226 nameidone(&nd);
4227
2d21ac55
A
4228 /*
4229 * Normally, linking to directories is not supported.
4230 * However, some file systems may have limited support.
4231 */
91447636 4232 if (vp->v_type == VDIR) {
39037602 4233 if (!ISSET(vp->v_mount->mnt_kern_flag, MNTK_DIR_HARDLINKS)) {
2d21ac55
A
4234 error = EPERM; /* POSIX */
4235 goto out;
4236 }
39037602 4237
2d21ac55
A
4238 /* Linking to a directory requires ownership. */
4239 if (!kauth_cred_issuser(vfs_context_ucred(ctx))) {
4240 struct vnode_attr dva;
4241
4242 VATTR_INIT(&dva);
4243 VATTR_WANTED(&dva, va_uid);
4244 if (vnode_getattr(vp, &dva, ctx) != 0 ||
4245 !VATTR_IS_SUPPORTED(&dva, va_uid) ||
4246 (dva.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)))) {
4247 error = EACCES;
4248 goto out;
4249 }
4250 }
91447636
A
4251 }
4252
91447636 4253 /* lookup the target node */
6d2010ae
A
4254#if CONFIG_TRIGGERS
4255 nd.ni_op = OP_LINK;
4256#endif
91447636 4257 nd.ni_cnd.cn_nameiop = CREATE;
2d21ac55 4258 nd.ni_cnd.cn_flags = LOCKPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK;
fe8ab488
A
4259 nd.ni_dirp = link;
4260 error = nameiat(&nd, fd2);
91447636
A
4261 if (error != 0)
4262 goto out;
4263 dvp = nd.ni_dvp;
4264 lvp = nd.ni_vp;
2d21ac55
A
4265
4266#if CONFIG_MACF
4267 if ((error = mac_vnode_check_link(ctx, dvp, vp, &nd.ni_cnd)) != 0)
4268 goto out2;
4269#endif
4270
4271 /* or to anything that kauth doesn't want us to (eg. immutable items) */
4272 if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_LINKTARGET, ctx)) != 0)
4273 goto out2;
4274
91447636
A
4275 /* target node must not exist */
4276 if (lvp != NULLVP) {
4277 error = EEXIST;
4278 goto out2;
4279 }
4280 /* cannot link across mountpoints */
4281 if (vnode_mount(vp) != vnode_mount(dvp)) {
4282 error = EXDEV;
4283 goto out2;
4284 }
39037602 4285
91447636 4286 /* authorize creation of the target note */
2d21ac55 4287 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
91447636
A
4288 goto out2;
4289
4290 /* and finally make the link */
2d21ac55 4291 error = VNOP_LINK(vp, dvp, &nd.ni_cnd, ctx);
91447636
A
4292 if (error)
4293 goto out2;
4294
39236c6e
A
4295#if CONFIG_MACF
4296 (void)mac_vnode_notify_link(ctx, vp, dvp, &nd.ni_cnd);
4297#endif
4298
2d21ac55 4299#if CONFIG_FSE
91447636 4300 need_event = need_fsevent(FSE_CREATE_FILE, dvp);
2d21ac55
A
4301#else
4302 need_event = 0;
4303#endif
91447636
A
4304 has_listeners = kauth_authorize_fileop_has_listeners();
4305
4306 if (need_event || has_listeners) {
91447636
A
4307 char *link_to_path = NULL;
4308 int len, link_name_len;
4309
4310 /* build the path to the new link file */
2d21ac55
A
4311 GET_PATH(target_path);
4312 if (target_path == NULL) {
4313 error = ENOMEM;
4314 goto out2;
4315 }
4316
b0d623f7 4317 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, target_path, MAXPATHLEN, &truncated);
91447636
A
4318
4319 if (has_listeners) {
4320 /* build the path to file we are linking to */
2d21ac55
A
4321 GET_PATH(link_to_path);
4322 if (link_to_path == NULL) {
4323 error = ENOMEM;
4324 goto out2;
4325 }
4326
91447636 4327 link_name_len = MAXPATHLEN;
fe8ab488
A
4328 if (vn_getpath(vp, link_to_path, &link_name_len) == 0) {
4329 /*
39037602 4330 * Call out to allow 3rd party notification of rename.
fe8ab488
A
4331 * Ignore result of kauth_authorize_fileop call.
4332 */
39037602
A
4333 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_LINK,
4334 (uintptr_t)link_to_path,
fe8ab488
A
4335 (uintptr_t)target_path);
4336 }
2d21ac55
A
4337 if (link_to_path != NULL) {
4338 RELEASE_PATH(link_to_path);
4339 }
91447636 4340 }
2d21ac55 4341#if CONFIG_FSE
91447636
A
4342 if (need_event) {
4343 /* construct fsevent */
2d21ac55 4344 if (get_fse_info(vp, &finfo, ctx) == 0) {
b0d623f7
A
4345 if (truncated) {
4346 finfo.mode |= FSE_TRUNCATED_PATH;
4347 }
4348
91447636 4349 // build the path to the destination of the link
2d21ac55 4350 add_fsevent(FSE_CREATE_FILE, ctx,
91447636
A
4351 FSE_ARG_STRING, len, target_path,
4352 FSE_ARG_FINFO, &finfo,
4353 FSE_ARG_DONE);
1c79356b 4354 }
b0d623f7
A
4355 if (vp->v_parent) {
4356 add_fsevent(FSE_STAT_CHANGED, ctx,
4357 FSE_ARG_VNODE, vp->v_parent,
4358 FSE_ARG_DONE);
4359 }
1c79356b 4360 }
2d21ac55 4361#endif
1c79356b 4362 }
91447636
A
4363out2:
4364 /*
4365 * nameidone has to happen before we vnode_put(dvp)
4366 * since it may need to release the fs_nodelock on the dvp
4367 */
4368 nameidone(&nd);
2d21ac55
A
4369 if (target_path != NULL) {
4370 RELEASE_PATH(target_path);
4371 }
91447636
A
4372out:
4373 if (lvp)
4374 vnode_put(lvp);
4375 if (dvp)
4376 vnode_put(dvp);
4377 vnode_put(vp);
4378 return (error);
4379}
1c79356b 4380
fe8ab488
A
4381int
4382link(__unused proc_t p, struct link_args *uap, __unused int32_t *retval)
4383{
4384 return (linkat_internal(vfs_context_current(), AT_FDCWD, uap->path,
4385 AT_FDCWD, uap->link, AT_SYMLINK_FOLLOW, UIO_USERSPACE));
4386}
4387
4388int
4389linkat(__unused proc_t p, struct linkat_args *uap, __unused int32_t *retval)
4390{
4391 if (uap->flag & ~AT_SYMLINK_FOLLOW)
4392 return (EINVAL);
4393
4394 return (linkat_internal(vfs_context_current(), uap->fd1, uap->path,
4395 uap->fd2, uap->link, uap->flag, UIO_USERSPACE));
4396}
4397
1c79356b
A
4398/*
4399 * Make a symbolic link.
91447636
A
4400 *
4401 * We could add support for ACLs here too...
1c79356b 4402 */
1c79356b 4403/* ARGSUSED */
fe8ab488
A
4404static int
4405symlinkat_internal(vfs_context_t ctx, user_addr_t path_data, int fd,
4406 user_addr_t link, enum uio_seg segflg)
1c79356b 4407{
91447636
A
4408 struct vnode_attr va;
4409 char *path;
1c79356b
A
4410 int error;
4411 struct nameidata nd;
91447636 4412 vnode_t vp, dvp;
1c79356b 4413 size_t dummy=0;
fe8ab488
A
4414 proc_t p;
4415
4416 error = 0;
4417 if (UIO_SEG_IS_USER_SPACE(segflg)) {
4418 MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
4419 error = copyinstr(path_data, path, MAXPATHLEN, &dummy);
4420 } else {
4421 path = (char *)path_data;
4422 }
91447636 4423 if (error)
1c79356b 4424 goto out;
55e303ae 4425 AUDIT_ARG(text, path); /* This is the link string */
91447636 4426
fe8ab488
A
4427 NDINIT(&nd, CREATE, OP_SYMLINK, LOCKPARENT | AUDITVNPATH1,
4428 segflg, link, ctx);
4429
4430 error = nameiat(&nd, fd);
55e303ae 4431 if (error)
1c79356b 4432 goto out;
91447636
A
4433 dvp = nd.ni_dvp;
4434 vp = nd.ni_vp;
55e303ae 4435
fe8ab488 4436 p = vfs_context_proc(ctx);
2d21ac55
A
4437 VATTR_INIT(&va);
4438 VATTR_SET(&va, va_type, VLNK);
4439 VATTR_SET(&va, va_mode, ACCESSPERMS & ~p->p_fd->fd_cmask);
fe8ab488 4440
2d21ac55
A
4441#if CONFIG_MACF
4442 error = mac_vnode_check_create(ctx,
4443 dvp, &nd.ni_cnd, &va);
4444#endif
4445 if (error != 0) {
4446 goto skipit;
4447 }
91447636 4448
2d21ac55
A
4449 if (vp != NULL) {
4450 error = EEXIST;
4451 goto skipit;
4452 }
4453
4454 /* authorize */
4455 if (error == 0)
4456 error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
4457 /* get default ownership, etc. */
4458 if (error == 0)
4459 error = vnode_authattr_new(dvp, &va, 0, ctx);
4460 if (error == 0)
4461 error = VNOP_SYMLINK(dvp, &vp, &nd.ni_cnd, &va, path, ctx);
4462
39236c6e 4463#if CONFIG_MACF
3e170ce0 4464 if (error == 0 && vp)
39236c6e
A
4465 error = vnode_label(vnode_mount(vp), dvp, vp, &nd.ni_cnd, VNODE_LABEL_CREATE, ctx);
4466#endif
4467
2d21ac55 4468 /* do fallback attribute handling */
3e170ce0 4469 if (error == 0 && vp)
2d21ac55 4470 error = vnode_setattr_fallback(vp, &va, ctx);
39236c6e 4471
2d21ac55
A
4472 if (error == 0) {
4473 int update_flags = 0;
55e303ae 4474
3e170ce0 4475 /*check if a new vnode was created, else try to get one*/
2d21ac55
A
4476 if (vp == NULL) {
4477 nd.ni_cnd.cn_nameiop = LOOKUP;
6d2010ae
A
4478#if CONFIG_TRIGGERS
4479 nd.ni_op = OP_LOOKUP;
4480#endif
2d21ac55 4481 nd.ni_cnd.cn_flags = 0;
fe8ab488 4482 error = nameiat(&nd, fd);
2d21ac55 4483 vp = nd.ni_vp;
55e303ae 4484
2d21ac55
A
4485 if (vp == NULL)
4486 goto skipit;
4487 }
fe8ab488 4488
91447636 4489#if 0 /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
fe8ab488 4490 /* call out to allow 3rd party notification of rename.
2d21ac55
A
4491 * Ignore result of kauth_authorize_fileop call.
4492 */
4493 if (kauth_authorize_fileop_has_listeners() &&
4494 namei(&nd) == 0) {
4495 char *new_link_path = NULL;
4496 int len;
fe8ab488 4497
2d21ac55
A
4498 /* build the path to the new link file */
4499 new_link_path = get_pathbuff();
4500 len = MAXPATHLEN;
4501 vn_getpath(dvp, new_link_path, &len);
4502 if ((len + 1 + nd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) {
91447636 4503 new_link_path[len - 1] = '/';
2d21ac55 4504 strlcpy(&new_link_path[len], nd.ni_cnd.cn_nameptr, MAXPATHLEN-len);
91447636 4505 }
fe8ab488
A
4506
4507 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_SYMLINK,
2d21ac55
A
4508 (uintptr_t)path, (uintptr_t)new_link_path);
4509 if (new_link_path != NULL)
4510 release_pathbuff(new_link_path);
4511 }
fe8ab488 4512#endif
2d21ac55
A
4513 // Make sure the name & parent pointers are hooked up
4514 if (vp->v_name == NULL)
4515 update_flags |= VNODE_UPDATE_NAME;
4516 if (vp->v_parent == NULLVP)
4517 update_flags |= VNODE_UPDATE_PARENT;
fe8ab488 4518
2d21ac55
A
4519 if (update_flags)
4520 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
91447636 4521
2d21ac55
A
4522#if CONFIG_FSE
4523 add_fsevent(FSE_CREATE_FILE, ctx,
4524 FSE_ARG_VNODE, vp,
4525 FSE_ARG_DONE);
4526#endif
4527 }
91447636
A
4528
4529skipit:
4530 /*
4531 * nameidone has to happen before we vnode_put(dvp)
4532 * since it may need to release the fs_nodelock on the dvp
4533 */
4534 nameidone(&nd);
4535
4536 if (vp)
4537 vnode_put(vp);
4538 vnode_put(dvp);
1c79356b 4539out:
fe8ab488
A
4540 if (path && (path != (char *)path_data))
4541 FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
91447636 4542
1c79356b
A
4543 return (error);
4544}
4545
fe8ab488
A
4546int
4547symlink(__unused proc_t p, struct symlink_args *uap, __unused int32_t *retval)
4548{
4549 return (symlinkat_internal(vfs_context_current(), uap->path, AT_FDCWD,
4550 uap->link, UIO_USERSPACE));
4551}
4552
4553int
4554symlinkat(__unused proc_t p, struct symlinkat_args *uap,
4555 __unused int32_t *retval)
4556{
4557 return (symlinkat_internal(vfs_context_current(), uap->path1, uap->fd,
4558 uap->path2, UIO_USERSPACE));
4559}
4560
1c79356b
A
4561/*
4562 * Delete a whiteout from the filesystem.
fe8ab488 4563 * No longer supported.
1c79356b 4564 */
1c79356b 4565int
fe8ab488 4566undelete(__unused proc_t p, __unused struct undelete_args *uap, __unused int32_t *retval)
1c79356b 4567{
fe8ab488 4568 return (ENOTSUP);
1c79356b
A
4569}
4570
4571/*
4572 * Delete a name from the filesystem.
4573 */
1c79356b 4574/* ARGSUSED */
fe8ab488 4575static int
c18c124e
A
4576unlinkat_internal(vfs_context_t ctx, int fd, vnode_t start_dvp,
4577 user_addr_t path_arg, enum uio_seg segflg, int unlink_flags)
1c79356b 4578{
c18c124e 4579 struct nameidata nd;
91447636 4580 vnode_t vp, dvp;
1c79356b 4581 int error;
91447636 4582 struct componentname *cnp;
2d21ac55 4583 char *path = NULL;
b0d623f7
A
4584 int len=0;
4585#if CONFIG_FSE
2d21ac55 4586 fse_info finfo;
6d2010ae 4587 struct vnode_attr va;
b0d623f7 4588#endif
c18c124e
A
4589 int flags;
4590 int need_event;
4591 int has_listeners;
4592 int truncated_path;
6d2010ae 4593 int batched;
c18c124e
A
4594 struct vnode_attr *vap;
4595 int do_retry;
4596 int retry_count = 0;
4597 int cn_flags;
4598
4599 cn_flags = LOCKPARENT;
4600 if (!(unlink_flags & VNODE_REMOVE_NO_AUDIT_PATH))
4601 cn_flags |= AUDITVNPATH1;
4602 /* If a starting dvp is passed, it trumps any fd passed. */
4603 if (start_dvp)
4604 cn_flags |= USEDVP;
6d2010ae 4605
c910b4d9
A
4606#if NAMEDRSRCFORK
4607 /* unlink or delete is allowed on rsrc forks and named streams */
c18c124e 4608 cn_flags |= CN_ALLOWRSRCFORK;
c910b4d9
A
4609#endif
4610
c18c124e
A
4611retry:
4612 do_retry = 0;
4613 flags = 0;
4614 need_event = 0;
4615 has_listeners = 0;
4616 truncated_path = 0;
4617 vap = NULL;
4618
4619 NDINIT(&nd, DELETE, OP_UNLINK, cn_flags, segflg, path_arg, ctx);
4620
4621 nd.ni_dvp = start_dvp;
4622 nd.ni_flag |= NAMEI_COMPOUNDREMOVE;
4623 cnp = &nd.ni_cnd;
91447636 4624
813fb2f6 4625continue_lookup:
c18c124e 4626 error = nameiat(&nd, fd);
2d21ac55
A
4627 if (error)
4628 return (error);
b0d623f7 4629
c18c124e
A
4630 dvp = nd.ni_dvp;
4631 vp = nd.ni_vp;
91447636 4632
6d2010ae 4633
91447636 4634 /* With Carbon delete semantics, busy files cannot be deleted */
316670eb 4635 if (unlink_flags & VNODE_REMOVE_NODELETEBUSY) {
91447636 4636 flags |= VNODE_REMOVE_NODELETEBUSY;
2d21ac55 4637 }
39037602 4638
39236c6e 4639 /* Skip any potential upcalls if told to. */
316670eb
A
4640 if (unlink_flags & VNODE_REMOVE_SKIP_NAMESPACE_EVENT) {
4641 flags |= VNODE_REMOVE_SKIP_NAMESPACE_EVENT;
4642 }
4643
6d2010ae
A
4644 if (vp) {
4645 batched = vnode_compound_remove_available(vp);
4646 /*
4647 * The root of a mounted filesystem cannot be deleted.
4648 */
4649 if (vp->v_flag & VROOT) {
4650 error = EBUSY;
4651 }
2d21ac55 4652
00867663
A
4653#if DEVELOPMENT || DEBUG
4654 /*
4655 * XXX VSWAP: Check for entitlements or special flag here
4656 * so we can restrict access appropriately.
4657 */
4658#else /* DEVELOPMENT || DEBUG */
4659
4660 if (vnode_isswap(vp) && (ctx != vfs_context_kernel())) {
4661 error = EPERM;
4662 goto out;
4663 }
4664#endif /* DEVELOPMENT || DEBUG */
4665
6d2010ae
A
4666 if (!batched) {
4667 error = vn_authorize_unlink(dvp, vp, cnp, ctx, NULL);
4668 if (error) {
3e170ce0
A
4669 if (error == ENOENT) {
4670 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
4671 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
4672 do_retry = 1;
4673 retry_count++;
4674 }
c18c124e 4675 }
6d2010ae
A
4676 goto out;
4677 }
4678 }
4679 } else {
4680 batched = 1;
2d21ac55 4681
6d2010ae
A
4682 if (!vnode_compound_remove_available(dvp)) {
4683 panic("No vp, but no compound remove?");
4684 }
4685 }
2d21ac55 4686
2d21ac55
A
4687#if CONFIG_FSE
4688 need_event = need_fsevent(FSE_DELETE, dvp);
4689 if (need_event) {
6d2010ae
A
4690 if (!batched) {
4691 if ((vp->v_flag & VISHARDLINK) == 0) {
4692 /* XXX need to get these data in batched VNOP */
4693 get_fse_info(vp, &finfo, ctx);
4694 }
4695 } else {
4696 error = vfs_get_notify_attributes(&va);
4697 if (error) {
4698 goto out;
4699 }
4700
4701 vap = &va;
2d21ac55
A
4702 }
4703 }
4704#endif
4705 has_listeners = kauth_authorize_fileop_has_listeners();
4706 if (need_event || has_listeners) {
2d21ac55 4707 if (path == NULL) {
6d2010ae
A
4708 GET_PATH(path);
4709 if (path == NULL) {
4710 error = ENOMEM;
4711 goto out;
4712 }
2d21ac55 4713 }
c18c124e 4714 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated_path);
2d21ac55
A
4715 }
4716
4717#if NAMEDRSRCFORK
c18c124e 4718 if (nd.ni_cnd.cn_flags & CN_WANTSRSRCFORK)
2d21ac55
A
4719 error = vnode_removenamedstream(dvp, vp, XATTR_RESOURCEFORK_NAME, 0, ctx);
4720 else
4721#endif
6d2010ae 4722 {
c18c124e
A
4723 error = vn_remove(dvp, &nd.ni_vp, &nd, flags, vap, ctx);
4724 vp = nd.ni_vp;
6d2010ae
A
4725 if (error == EKEEPLOOKING) {
4726 if (!batched) {
4727 panic("EKEEPLOOKING, but not a filesystem that supports compound VNOPs?");
4728 }
4729
c18c124e 4730 if ((nd.ni_flag & NAMEI_CONTLOOKUP) == 0) {
6d2010ae
A
4731 panic("EKEEPLOOKING, but continue flag not set?");
4732 }
4733
4734 if (vnode_isdir(vp)) {
4735 error = EISDIR;
4736 goto out;
4737 }
813fb2f6 4738 goto continue_lookup;
3e170ce0
A
4739 } else if (error == ENOENT && batched) {
4740 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
4741 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
4742 /*
4743 * For compound VNOPs, the authorization callback may
4744 * return ENOENT in case of racing hardlink lookups
4745 * hitting the name cache, redrive the lookup.
4746 */
4747 do_retry = 1;
4748 retry_count += 1;
4749 goto out;
4750 }
6d2010ae
A
4751 }
4752 }
2d21ac55
A
4753
4754 /*
39037602 4755 * Call out to allow 3rd party notification of delete.
2d21ac55
A
4756 * Ignore result of kauth_authorize_fileop call.
4757 */
1c79356b 4758 if (!error) {
2d21ac55 4759 if (has_listeners) {
39037602
A
4760 kauth_authorize_fileop(vfs_context_ucred(ctx),
4761 KAUTH_FILEOP_DELETE,
2d21ac55
A
4762 (uintptr_t)vp,
4763 (uintptr_t)path);
4764 }
91447636 4765
2d21ac55
A
4766 if (vp->v_flag & VISHARDLINK) {
4767 //
4768 // if a hardlink gets deleted we want to blow away the
4769 // v_parent link because the path that got us to this
4770 // instance of the link is no longer valid. this will
4771 // force the next call to get the path to ask the file
4772 // system instead of just following the v_parent link.
4773 //
4774 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
91447636 4775 }
91447636 4776
2d21ac55
A
4777#if CONFIG_FSE
4778 if (need_event) {
4779 if (vp->v_flag & VISHARDLINK) {
4780 get_fse_info(vp, &finfo, ctx);
6d2010ae
A
4781 } else if (vap) {
4782 vnode_get_fse_info_from_vap(vp, &finfo, vap);
2d21ac55 4783 }
b0d623f7
A
4784 if (truncated_path) {
4785 finfo.mode |= FSE_TRUNCATED_PATH;
4786 }
2d21ac55
A
4787 add_fsevent(FSE_DELETE, ctx,
4788 FSE_ARG_STRING, len, path,
4789 FSE_ARG_FINFO, &finfo,
4790 FSE_ARG_DONE);
4791 }
4792#endif
1c79356b 4793 }
6d2010ae
A
4794
4795out:
2d21ac55
A
4796 if (path != NULL)
4797 RELEASE_PATH(path);
4798
c910b4d9 4799#if NAMEDRSRCFORK
39037602 4800 /* recycle the deleted rsrc fork vnode to force a reclaim, which
b0d623f7
A
4801 * will cause its shadow file to go away if necessary.
4802 */
6d2010ae
A
4803 if (vp && (vnode_isnamedstream(vp)) &&
4804 (vp->v_parent != NULLVP) &&
4805 vnode_isshadow(vp)) {
4806 vnode_recycle(vp);
39037602 4807 }
c910b4d9 4808#endif
6d2010ae
A
4809 /*
4810 * nameidone has to happen before we vnode_put(dvp)
4811 * since it may need to release the fs_nodelock on the dvp
4812 */
c18c124e 4813 nameidone(&nd);
91447636 4814 vnode_put(dvp);
6d2010ae
A
4815 if (vp) {
4816 vnode_put(vp);
4817 }
c18c124e
A
4818
4819 if (do_retry) {
4820 goto retry;
4821 }
4822
1c79356b
A
4823 return (error);
4824}
4825
fe8ab488 4826int
c18c124e
A
4827unlink1(vfs_context_t ctx, vnode_t start_dvp, user_addr_t path_arg,
4828 enum uio_seg segflg, int unlink_flags)
fe8ab488 4829{
c18c124e
A
4830 return (unlinkat_internal(ctx, AT_FDCWD, start_dvp, path_arg, segflg,
4831 unlink_flags));
fe8ab488
A
4832}
4833
1c79356b 4834/*
c18c124e 4835 * Delete a name from the filesystem using Carbon semantics.
1c79356b 4836 */
c18c124e
A
4837int
4838delete(__unused proc_t p, struct delete_args *uap, __unused int32_t *retval)
fe8ab488 4839{
c18c124e
A
4840 return (unlinkat_internal(vfs_context_current(), AT_FDCWD, NULLVP,
4841 uap->path, UIO_USERSPACE, VNODE_REMOVE_NODELETEBUSY));
fe8ab488
A
4842}
4843
c18c124e
A
4844/*
4845 * Delete a name from the filesystem using POSIX semantics.
4846 */
1c79356b 4847int
b0d623f7 4848unlink(__unused proc_t p, struct unlink_args *uap, __unused int32_t *retval)
1c79356b 4849{
c18c124e
A
4850 return (unlinkat_internal(vfs_context_current(), AT_FDCWD, NULLVP,
4851 uap->path, UIO_USERSPACE, 0));
fe8ab488 4852}
2d21ac55 4853
fe8ab488
A
4854int
4855unlinkat(__unused proc_t p, struct unlinkat_args *uap, __unused int32_t *retval)
4856{
4857 if (uap->flag & ~AT_REMOVEDIR)
4858 return (EINVAL);
4859
4860 if (uap->flag & AT_REMOVEDIR)
4861 return (rmdirat_internal(vfs_context_current(), uap->fd,
4862 uap->path, UIO_USERSPACE));
4863 else
4864 return (unlinkat_internal(vfs_context_current(), uap->fd,
c18c124e 4865 NULLVP, uap->path, UIO_USERSPACE, 0));
1c79356b
A
4866}
4867
4868/*
4869 * Reposition read/write file offset.
4870 */
1c79356b 4871int
2d21ac55 4872lseek(proc_t p, struct lseek_args *uap, off_t *retval)
1c79356b 4873{
91447636 4874 struct fileproc *fp;
2d21ac55
A
4875 vnode_t vp;
4876 struct vfs_context *ctx;
91447636 4877 off_t offset = uap->offset, file_size;
1c79356b
A
4878 int error;
4879
91447636
A
4880 if ( (error = fp_getfvp(p,uap->fd, &fp, &vp)) ) {
4881 if (error == ENOTSUP)
4882 return (ESPIPE);
1c79356b 4883 return (error);
55e303ae 4884 }
91447636
A
4885 if (vnode_isfifo(vp)) {
4886 file_drop(uap->fd);
4887 return(ESPIPE);
4888 }
2d21ac55
A
4889
4890
4891 ctx = vfs_context_current();
4892#if CONFIG_MACF
4893 if (uap->whence == L_INCR && uap->offset == 0)
4894 error = mac_file_check_get_offset(vfs_context_ucred(ctx),
4895 fp->f_fglob);
4896 else
4897 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
4898 fp->f_fglob);
4899 if (error) {
4900 file_drop(uap->fd);
4901 return (error);
4902 }
4903#endif
91447636
A
4904 if ( (error = vnode_getwithref(vp)) ) {
4905 file_drop(uap->fd);
4906 return(error);
4907 }
4908
1c79356b
A
4909 switch (uap->whence) {
4910 case L_INCR:
91447636 4911 offset += fp->f_fglob->fg_offset;
1c79356b
A
4912 break;
4913 case L_XTND:
2d21ac55 4914 if ((error = vnode_size(vp, &file_size, ctx)) != 0)
55e303ae 4915 break;
91447636 4916 offset += file_size;
1c79356b
A
4917 break;
4918 case L_SET:
1c79356b 4919 break;
813fb2f6
A
4920 case SEEK_HOLE:
4921 error = VNOP_IOCTL(vp, FSCTL_FIOSEEKHOLE, (caddr_t)&offset, 0, ctx);
4922 break;
4923 case SEEK_DATA:
4924 error = VNOP_IOCTL(vp, FSCTL_FIOSEEKDATA, (caddr_t)&offset, 0, ctx);
4925 break;
1c79356b 4926 default:
55e303ae 4927 error = EINVAL;
1c79356b 4928 }
55e303ae
A
4929 if (error == 0) {
4930 if (uap->offset > 0 && offset < 0) {
4931 /* Incremented/relative move past max size */
4932 error = EOVERFLOW;
4933 } else {
4934 /*
4935 * Allow negative offsets on character devices, per
4936 * POSIX 1003.1-2001. Most likely for writing disk
4937 * labels.
4938 */
4939 if (offset < 0 && vp->v_type != VCHR) {
4940 /* Decremented/relative move before start */
4941 error = EINVAL;
4942 } else {
4943 /* Success */
91447636
A
4944 fp->f_fglob->fg_offset = offset;
4945 *retval = fp->f_fglob->fg_offset;
55e303ae
A
4946 }
4947 }
4948 }
b0d623f7 4949
39037602 4950 /*
b0d623f7
A
4951 * An lseek can affect whether data is "available to read." Use
4952 * hint of NOTE_NONE so no EVFILT_VNODE events fire
4953 */
4954 post_event_if_success(vp, error, NOTE_NONE);
91447636
A
4955 (void)vnode_put(vp);
4956 file_drop(uap->fd);
55e303ae 4957 return (error);
1c79356b
A
4958}
4959
91447636 4960
1c79356b 4961/*
91447636 4962 * Check access permissions.
2d21ac55
A
4963 *
4964 * Returns: 0 Success
4965 * vnode_authorize:???
1c79356b 4966 */
91447636
A
4967static int
4968access1(vnode_t vp, vnode_t dvp, int uflags, vfs_context_t ctx)
1c79356b 4969{
91447636 4970 kauth_action_t action;
1c79356b
A
4971 int error;
4972
91447636
A
4973 /*
4974 * If just the regular access bits, convert them to something
4975 * that vnode_authorize will understand.
4976 */
4977 if (!(uflags & _ACCESS_EXTENDED_MASK)) {
4978 action = 0;
4979 if (uflags & R_OK)
4980 action |= KAUTH_VNODE_READ_DATA; /* aka KAUTH_VNODE_LIST_DIRECTORY */
4981 if (uflags & W_OK) {
4982 if (vnode_isdir(vp)) {
4983 action |= KAUTH_VNODE_ADD_FILE |
4984 KAUTH_VNODE_ADD_SUBDIRECTORY;
4985 /* might want delete rights here too */
4986 } else {
4987 action |= KAUTH_VNODE_WRITE_DATA;
4988 }
4989 }
4990 if (uflags & X_OK) {
4991 if (vnode_isdir(vp)) {
4992 action |= KAUTH_VNODE_SEARCH;
4993 } else {
4994 action |= KAUTH_VNODE_EXECUTE;
4995 }
4996 }
4997 } else {
4998 /* take advantage of definition of uflags */
4999 action = uflags >> 8;
5000 }
39037602 5001
2d21ac55
A
5002#if CONFIG_MACF
5003 error = mac_vnode_check_access(ctx, vp, uflags);
5004 if (error)
5005 return (error);
5006#endif /* MAC */
5007
91447636
A
5008 /* action == 0 means only check for existence */
5009 if (action != 0) {
5010 error = vnode_authorize(vp, dvp, action | KAUTH_VNODE_ACCESS, ctx);
5011 } else {
5012 error = 0;
5013 }
5014
5015 return(error);
1c79356b 5016}
1c79356b 5017
91447636
A
5018
5019
2d21ac55 5020/*
b0d623f7 5021 * access_extended: Check access permissions in bulk.
2d21ac55 5022 *
b0d623f7 5023 * Description: uap->entries Pointer to an array of accessx
39037602
A
5024 * descriptor structs, plus one or
5025 * more NULL terminated strings (see
b0d623f7
A
5026 * "Notes" section below).
5027 * uap->size Size of the area pointed to by
5028 * uap->entries.
5029 * uap->results Pointer to the results array.
2d21ac55
A
5030 *
5031 * Returns: 0 Success
5032 * ENOMEM Insufficient memory
5033 * EINVAL Invalid arguments
5034 * namei:EFAULT Bad address
5035 * namei:ENAMETOOLONG Filename too long
5036 * namei:ENOENT No such file or directory
5037 * namei:ELOOP Too many levels of symbolic links
5038 * namei:EBADF Bad file descriptor
5039 * namei:ENOTDIR Not a directory
5040 * namei:???
5041 * access1:
5042 *
5043 * Implicit returns:
5044 * uap->results Array contents modified
5045 *
5046 * Notes: The uap->entries are structured as an arbitrary length array
b0d623f7 5047 * of accessx descriptors, followed by one or more NULL terminated
2d21ac55
A
5048 * strings
5049 *
5050 * struct accessx_descriptor[0]
5051 * ...
5052 * struct accessx_descriptor[n]
5053 * char name_data[0];
5054 *
5055 * We determine the entry count by walking the buffer containing
b0d623f7 5056 * the uap->entries argument descriptor. For each descriptor we
2d21ac55
A
5057 * see, the valid values for the offset ad_name_offset will be
5058 * in the byte range:
5059 *
5060 * [ uap->entries + sizeof(struct accessx_descriptor) ]
5061 * to
5062 * [ uap->entries + uap->size - 2 ]
5063 *
5064 * since we must have at least one string, and the string must
b0d623f7 5065 * be at least one character plus the NULL terminator in length.
39037602 5066 *
2d21ac55
A
5067 * XXX: Need to support the check-as uid argument
5068 */
1c79356b 5069int
b0d623f7 5070access_extended(__unused proc_t p, struct access_extended_args *uap, __unused int32_t *retval)
1c79356b 5071{
2d21ac55
A
5072 struct accessx_descriptor *input = NULL;
5073 errno_t *result = NULL;
5074 errno_t error = 0;
5075 int wantdelete = 0;
5076 unsigned int desc_max, desc_actual, i, j;
91447636 5077 struct vfs_context context;
1c79356b 5078 struct nameidata nd;
91447636 5079 int niopts;
2d21ac55
A
5080 vnode_t vp = NULL;
5081 vnode_t dvp = NULL;
5082#define ACCESSX_MAX_DESCR_ON_STACK 10
5083 struct accessx_descriptor stack_input[ACCESSX_MAX_DESCR_ON_STACK];
91447636 5084
91447636
A
5085 context.vc_ucred = NULL;
5086
2d21ac55
A
5087 /*
5088 * Validate parameters; if valid, copy the descriptor array and string
5089 * arguments into local memory. Before proceeding, the following
5090 * conditions must have been met:
5091 *
5092 * o The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
5093 * o There must be sufficient room in the request for at least one
5094 * descriptor and a one yte NUL terminated string.
5095 * o The allocation of local storage must not fail.
5096 */
91447636
A
5097 if (uap->size > ACCESSX_MAX_TABLESIZE)
5098 return(ENOMEM);
2d21ac55 5099 if (uap->size < (sizeof(struct accessx_descriptor) + 2))
91447636 5100 return(EINVAL);
2d21ac55
A
5101 if (uap->size <= sizeof (stack_input)) {
5102 input = stack_input;
5103 } else {
91447636
A
5104 MALLOC(input, struct accessx_descriptor *, uap->size, M_TEMP, M_WAITOK);
5105 if (input == NULL) {
5106 error = ENOMEM;
5107 goto out;
5108 }
2d21ac55 5109 }
91447636 5110 error = copyin(uap->entries, input, uap->size);
55e303ae 5111 if (error)
91447636 5112 goto out;
1c79356b 5113
b0d623f7
A
5114 AUDIT_ARG(opaque, input, uap->size);
5115
91447636 5116 /*
2d21ac55
A
5117 * Force NUL termination of the copyin buffer to avoid nami() running
5118 * off the end. If the caller passes us bogus data, they may get a
5119 * bogus result.
5120 */
5121 ((char *)input)[uap->size - 1] = 0;
5122
5123 /*
5124 * Access is defined as checking against the process' real identity,
5125 * even if operations are checking the effective identity. This
5126 * requires that we use a local vfs context.
91447636
A
5127 */
5128 context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
2d21ac55 5129 context.vc_thread = current_thread();
91447636
A
5130
5131 /*
2d21ac55
A
5132 * Find out how many entries we have, so we can allocate the result
5133 * array by walking the list and adjusting the count downward by the
5134 * earliest string offset we see.
91447636 5135 */
2d21ac55
A
5136 desc_max = (uap->size - 2) / sizeof(struct accessx_descriptor);
5137 desc_actual = desc_max;
5138 for (i = 0; i < desc_actual; i++) {
91447636 5139 /*
2d21ac55
A
5140 * Take the offset to the name string for this entry and
5141 * convert to an input array index, which would be one off
5142 * the end of the array if this entry was the lowest-addressed
5143 * name string.
91447636
A
5144 */
5145 j = input[i].ad_name_offset / sizeof(struct accessx_descriptor);
2d21ac55
A
5146
5147 /*
5148 * An offset greater than the max allowable offset is an error.
5149 * It is also an error for any valid entry to point
5150 * to a location prior to the end of the current entry, if
5151 * it's not a reference to the string of the previous entry.
5152 */
5153 if (j > desc_max || (j != 0 && j <= i)) {
91447636
A
5154 error = EINVAL;
5155 goto out;
5156 }
2d21ac55 5157
39037602
A
5158 /* Also do not let ad_name_offset point to something beyond the size of the input */
5159 if (input[i].ad_name_offset >= uap->size) {
5160 error = EINVAL;
5161 goto out;
5162 }
5163
2d21ac55
A
5164 /*
5165 * An offset of 0 means use the previous descriptor's offset;
5166 * this is used to chain multiple requests for the same file
5167 * to avoid multiple lookups.
5168 */
91447636 5169 if (j == 0) {
2d21ac55 5170 /* This is not valid for the first entry */
91447636
A
5171 if (i == 0) {
5172 error = EINVAL;
5173 goto out;
5174 }
5175 continue;
5176 }
2d21ac55
A
5177
5178 /*
5179 * If the offset of the string for this descriptor is before
5180 * what we believe is the current actual last descriptor,
5181 * then we need to adjust our estimate downward; this permits
5182 * the string table following the last descriptor to be out
5183 * of order relative to the descriptor list.
5184 */
5185 if (j < desc_actual)
5186 desc_actual = j;
91447636 5187 }
2d21ac55
A
5188
5189 /*
5190 * We limit the actual number of descriptors we are willing to process
5191 * to a hard maximum of ACCESSX_MAX_DESCRIPTORS. If the number being
5192 * requested does not exceed this limit,
5193 */
5194 if (desc_actual > ACCESSX_MAX_DESCRIPTORS) {
91447636
A
5195 error = ENOMEM;
5196 goto out;
5197 }
2d21ac55 5198 MALLOC(result, errno_t *, desc_actual * sizeof(errno_t), M_TEMP, M_WAITOK);
91447636
A
5199 if (result == NULL) {
5200 error = ENOMEM;
5201 goto out;
5202 }
5203
5204 /*
2d21ac55
A
5205 * Do the work by iterating over the descriptor entries we know to
5206 * at least appear to contain valid data.
91447636
A
5207 */
5208 error = 0;
2d21ac55 5209 for (i = 0; i < desc_actual; i++) {
91447636 5210 /*
2d21ac55
A
5211 * If the ad_name_offset is 0, then we use the previous
5212 * results to make the check; otherwise, we are looking up
5213 * a new file name.
91447636
A
5214 */
5215 if (input[i].ad_name_offset != 0) {
5216 /* discard old vnodes */
5217 if (vp) {
5218 vnode_put(vp);
5219 vp = NULL;
5220 }
5221 if (dvp) {
5222 vnode_put(dvp);
5223 dvp = NULL;
5224 }
39037602 5225
2d21ac55
A
5226 /*
5227 * Scan forward in the descriptor list to see if we
5228 * need the parent vnode. We will need it if we are
5229 * deleting, since we must have rights to remove
5230 * entries in the parent directory, as well as the
5231 * rights to delete the object itself.
5232 */
91447636 5233 wantdelete = input[i].ad_flags & _DELETE_OK;
2d21ac55 5234 for (j = i + 1; (j < desc_actual) && (input[j].ad_name_offset == 0); j++)
91447636
A
5235 if (input[j].ad_flags & _DELETE_OK)
5236 wantdelete = 1;
39037602 5237
91447636 5238 niopts = FOLLOW | AUDITVNPATH1;
2d21ac55 5239
91447636
A
5240 /* need parent for vnode_authorize for deletion test */
5241 if (wantdelete)
5242 niopts |= WANTPARENT;
5243
5244 /* do the lookup */
6d2010ae
A
5245 NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, UIO_SYSSPACE,
5246 CAST_USER_ADDR_T(((const char *)input) + input[i].ad_name_offset),
5247 &context);
91447636
A
5248 error = namei(&nd);
5249 if (!error) {
5250 vp = nd.ni_vp;
5251 if (wantdelete)
5252 dvp = nd.ni_dvp;
5253 }
5254 nameidone(&nd);
5255 }
5256
5257 /*
5258 * Handle lookup errors.
5259 */
5260 switch(error) {
5261 case ENOENT:
5262 case EACCES:
5263 case EPERM:
5264 case ENOTDIR:
5265 result[i] = error;
5266 break;
5267 case 0:
5268 /* run this access check */
5269 result[i] = access1(vp, dvp, input[i].ad_flags, &context);
5270 break;
5271 default:
5272 /* fatal lookup error */
5273
5274 goto out;
5275 }
5276 }
5277
b0d623f7
A
5278 AUDIT_ARG(data, result, sizeof(errno_t), desc_actual);
5279
91447636 5280 /* copy out results */
2d21ac55 5281 error = copyout(result, uap->results, desc_actual * sizeof(errno_t));
39037602 5282
91447636 5283out:
2d21ac55 5284 if (input && input != stack_input)
91447636
A
5285 FREE(input, M_TEMP);
5286 if (result)
5287 FREE(result, M_TEMP);
5288 if (vp)
5289 vnode_put(vp);
5290 if (dvp)
5291 vnode_put(dvp);
0c530ab8
A
5292 if (IS_VALID_CRED(context.vc_ucred))
5293 kauth_cred_unref(&context.vc_ucred);
91447636 5294 return(error);
1c79356b
A
5295}
5296
2d21ac55
A
5297
5298/*
5299 * Returns: 0 Success
5300 * namei:EFAULT Bad address
5301 * namei:ENAMETOOLONG Filename too long
5302 * namei:ENOENT No such file or directory
5303 * namei:ELOOP Too many levels of symbolic links
5304 * namei:EBADF Bad file descriptor
5305 * namei:ENOTDIR Not a directory
5306 * namei:???
5307 * access1:
5308 */
fe8ab488
A
5309static int
5310faccessat_internal(vfs_context_t ctx, int fd, user_addr_t path, int amode,
5311 int flag, enum uio_seg segflg)
1c79356b 5312{
1c79356b
A
5313 int error;
5314 struct nameidata nd;
91447636
A
5315 int niopts;
5316 struct vfs_context context;
cf7d32b8
A
5317#if NAMEDRSRCFORK
5318 int is_namedstream = 0;
5319#endif
5320
91447636 5321 /*
fe8ab488
A
5322 * Unless the AT_EACCESS option is used, Access is defined as checking
5323 * against the process' real identity, even if operations are checking
5324 * the effective identity. So we need to tweak the credential
5325 * in the context for that case.
91447636 5326 */
fe8ab488
A
5327 if (!(flag & AT_EACCESS))
5328 context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
5329 else
5330 context.vc_ucred = ctx->vc_ucred;
5331 context.vc_thread = ctx->vc_thread;
5332
91447636
A
5333
5334 niopts = FOLLOW | AUDITVNPATH1;
5335 /* need parent for vnode_authorize for deletion test */
fe8ab488 5336 if (amode & _DELETE_OK)
91447636 5337 niopts |= WANTPARENT;
fe8ab488
A
5338 NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, segflg,
5339 path, &context);
2d21ac55
A
5340
5341#if NAMEDRSRCFORK
5342 /* access(F_OK) calls are allowed for resource forks. */
fe8ab488 5343 if (amode == F_OK)
2d21ac55
A
5344 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
5345#endif
fe8ab488 5346 error = nameiat(&nd, fd);
91447636
A
5347 if (error)
5348 goto out;
5349
cf7d32b8 5350#if NAMEDRSRCFORK
39037602 5351 /* Grab reference on the shadow stream file vnode to
b0d623f7
A
5352 * force an inactive on release which will mark it
5353 * for recycle.
cf7d32b8
A
5354 */
5355 if (vnode_isnamedstream(nd.ni_vp) &&
b0d623f7
A
5356 (nd.ni_vp->v_parent != NULLVP) &&
5357 vnode_isshadow(nd.ni_vp)) {
cf7d32b8
A
5358 is_namedstream = 1;
5359 vnode_ref(nd.ni_vp);
5360 }
5361#endif
5362
fe8ab488 5363 error = access1(nd.ni_vp, nd.ni_dvp, amode, &context);
b0d623f7 5364
cf7d32b8
A
5365#if NAMEDRSRCFORK
5366 if (is_namedstream) {
5367 vnode_rele(nd.ni_vp);
5368 }
5369#endif
5370
91447636 5371 vnode_put(nd.ni_vp);
fe8ab488 5372 if (amode & _DELETE_OK)
91447636
A
5373 vnode_put(nd.ni_dvp);
5374 nameidone(&nd);
39037602 5375
91447636 5376out:
fe8ab488
A
5377 if (!(flag & AT_EACCESS))
5378 kauth_cred_unref(&context.vc_ucred);
5379 return (error);
5380}
5381
5382int
5383access(__unused proc_t p, struct access_args *uap, __unused int32_t *retval)
5384{
5385 return (faccessat_internal(vfs_context_current(), AT_FDCWD,
5386 uap->path, uap->flags, 0, UIO_USERSPACE));
91447636
A
5387}
5388
fe8ab488
A
5389int
5390faccessat(__unused proc_t p, struct faccessat_args *uap,
5391 __unused int32_t *retval)
5392{
5393 if (uap->flag & ~AT_EACCESS)
5394 return (EINVAL);
5395
5396 return (faccessat_internal(vfs_context_current(), uap->fd,
5397 uap->path, uap->amode, uap->flag, UIO_USERSPACE));
5398}
91447636 5399
2d21ac55
A
5400/*
5401 * Returns: 0 Success
5402 * EFAULT
5403 * copyout:EFAULT
5404 * namei:???
5405 * vn_stat:???
5406 */
91447636 5407static int
fe8ab488
A
5408fstatat_internal(vfs_context_t ctx, user_addr_t path, user_addr_t ub,
5409 user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64,
5410 enum uio_seg segflg, int fd, int flag)
91447636 5411{
fe8ab488
A
5412 struct nameidata nd;
5413 int follow;
b0d623f7
A
5414 union {
5415 struct stat sb;
5416 struct stat64 sb64;
5417 } source;
5418 union {
5419 struct user64_stat user64_sb;
5420 struct user32_stat user32_sb;
5421 struct user64_stat64 user64_sb64;
5422 struct user32_stat64 user32_sb64;
5423 } dest;
91447636
A
5424 caddr_t sbp;
5425 int error, my_size;
5426 kauth_filesec_t fsec;
5427 size_t xsecurity_bufsize;
2d21ac55 5428 void * statptr;
1c79356b 5429
fe8ab488
A
5430 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
5431 NDINIT(&nd, LOOKUP, OP_GETATTR, follow | AUDITVNPATH1,
5432 segflg, path, ctx);
5433
2d21ac55 5434#if NAMEDRSRCFORK
cf7d32b8 5435 int is_namedstream = 0;
2d21ac55 5436 /* stat calls are allowed for resource forks. */
fe8ab488 5437 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
2d21ac55 5438#endif
fe8ab488 5439 error = nameiat(&nd, fd);
91447636 5440 if (error)
1c79356b 5441 return (error);
91447636 5442 fsec = KAUTH_FILESEC_NONE;
b0d623f7
A
5443
5444 statptr = (void *)&source;
cf7d32b8
A
5445
5446#if NAMEDRSRCFORK
39037602
A
5447 /* Grab reference on the shadow stream file vnode to
5448 * force an inactive on release which will mark it
b0d623f7 5449 * for recycle.
cf7d32b8 5450 */
fe8ab488
A
5451 if (vnode_isnamedstream(nd.ni_vp) &&
5452 (nd.ni_vp->v_parent != NULLVP) &&
5453 vnode_isshadow(nd.ni_vp)) {
cf7d32b8 5454 is_namedstream = 1;
fe8ab488 5455 vnode_ref(nd.ni_vp);
cf7d32b8
A
5456 }
5457#endif
5458
fe8ab488 5459 error = vn_stat(nd.ni_vp, statptr, (xsecurity != USER_ADDR_NULL ? &fsec : NULL), isstat64, ctx);
2d21ac55
A
5460
5461#if NAMEDRSRCFORK
cf7d32b8 5462 if (is_namedstream) {
fe8ab488 5463 vnode_rele(nd.ni_vp);
2d21ac55
A
5464 }
5465#endif
fe8ab488
A
5466 vnode_put(nd.ni_vp);
5467 nameidone(&nd);
91447636 5468
1c79356b
A
5469 if (error)
5470 return (error);
91447636 5471 /* Zap spare fields */
2d21ac55 5472 if (isstat64 != 0) {
b0d623f7
A
5473 source.sb64.st_lspare = 0;
5474 source.sb64.st_qspare[0] = 0LL;
5475 source.sb64.st_qspare[1] = 0LL;
2d21ac55 5476 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
39037602 5477 munge_user64_stat64(&source.sb64, &dest.user64_sb64);
b0d623f7
A
5478 my_size = sizeof(dest.user64_sb64);
5479 sbp = (caddr_t)&dest.user64_sb64;
2d21ac55 5480 } else {
39037602 5481 munge_user32_stat64(&source.sb64, &dest.user32_sb64);
b0d623f7
A
5482 my_size = sizeof(dest.user32_sb64);
5483 sbp = (caddr_t)&dest.user32_sb64;
2d21ac55
A
5484 }
5485 /*
5486 * Check if we raced (post lookup) against the last unlink of a file.
5487 */
b0d623f7
A
5488 if ((source.sb64.st_nlink == 0) && S_ISREG(source.sb64.st_mode)) {
5489 source.sb64.st_nlink = 1;
2d21ac55
A
5490 }
5491 } else {
b0d623f7
A
5492 source.sb.st_lspare = 0;
5493 source.sb.st_qspare[0] = 0LL;
5494 source.sb.st_qspare[1] = 0LL;
2d21ac55 5495 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
39037602 5496 munge_user64_stat(&source.sb, &dest.user64_sb);
b0d623f7
A
5497 my_size = sizeof(dest.user64_sb);
5498 sbp = (caddr_t)&dest.user64_sb;
2d21ac55 5499 } else {
39037602 5500 munge_user32_stat(&source.sb, &dest.user32_sb);
b0d623f7
A
5501 my_size = sizeof(dest.user32_sb);
5502 sbp = (caddr_t)&dest.user32_sb;
2d21ac55
A
5503 }
5504
5505 /*
5506 * Check if we raced (post lookup) against the last unlink of a file.
5507 */
b0d623f7
A
5508 if ((source.sb.st_nlink == 0) && S_ISREG(source.sb.st_mode)) {
5509 source.sb.st_nlink = 1;
2d21ac55 5510 }
91447636
A
5511 }
5512 if ((error = copyout(sbp, ub, my_size)) != 0)
5513 goto out;
5514
5515 /* caller wants extended security information? */
5516 if (xsecurity != USER_ADDR_NULL) {
5517
5518 /* did we get any? */
5519 if (fsec == KAUTH_FILESEC_NONE) {
5520 if (susize(xsecurity_size, 0) != 0) {
5521 error = EFAULT;
5522 goto out;
5523 }
5524 } else {
5525 /* find the user buffer size */
5526 xsecurity_bufsize = fusize(xsecurity_size);
5527
5528 /* copy out the actual data size */
5529 if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
5530 error = EFAULT;
5531 goto out;
5532 }
5533
5534 /* if the caller supplied enough room, copy out to it */
5535 if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec))
5536 error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
5537 }
5538 }
5539out:
5540 if (fsec != KAUTH_FILESEC_NONE)
5541 kauth_filesec_free(fsec);
1c79356b
A
5542 return (error);
5543}
5544
b0d623f7
A
5545/*
5546 * stat_extended: Get file status; with extended security (ACL).
5547 *
5548 * Parameters: p (ignored)
5549 * uap User argument descriptor (see below)
39037602 5550 * retval (ignored)
b0d623f7
A
5551 *
5552 * Indirect: uap->path Path of file to get status from
5553 * uap->ub User buffer (holds file status info)
5554 * uap->xsecurity ACL to get (extended security)
5555 * uap->xsecurity_size Size of ACL
39037602 5556 *
b0d623f7
A
5557 * Returns: 0 Success
5558 * !0 errno value
5559 *
5560 */
2d21ac55 5561int
fe8ab488
A
5562stat_extended(__unused proc_t p, struct stat_extended_args *uap,
5563 __unused int32_t *retval)
2d21ac55 5564{
fe8ab488
A
5565 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5566 uap->xsecurity, uap->xsecurity_size, 0, UIO_USERSPACE, AT_FDCWD,
5567 0));
1c79356b
A
5568}
5569
2d21ac55
A
5570/*
5571 * Returns: 0 Success
fe8ab488 5572 * fstatat_internal:??? [see fstatat_internal() in this file]
2d21ac55 5573 */
91447636 5574int
b0d623f7 5575stat(__unused proc_t p, struct stat_args *uap, __unused int32_t *retval)
1c79356b 5576{
fe8ab488
A
5577 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5578 0, 0, 0, UIO_USERSPACE, AT_FDCWD, 0));
91447636 5579}
1c79356b 5580
91447636 5581int
b0d623f7 5582stat64(__unused proc_t p, struct stat64_args *uap, __unused int32_t *retval)
91447636 5583{
fe8ab488
A
5584 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5585 0, 0, 1, UIO_USERSPACE, AT_FDCWD, 0));
1c79356b 5586}
1c79356b 5587
b0d623f7
A
5588/*
5589 * stat64_extended: Get file status; can handle large inode numbers; with extended security (ACL).
5590 *
5591 * Parameters: p (ignored)
5592 * uap User argument descriptor (see below)
39037602 5593 * retval (ignored)
b0d623f7
A
5594 *
5595 * Indirect: uap->path Path of file to get status from
5596 * uap->ub User buffer (holds file status info)
5597 * uap->xsecurity ACL to get (extended security)
5598 * uap->xsecurity_size Size of ACL
39037602 5599 *
b0d623f7
A
5600 * Returns: 0 Success
5601 * !0 errno value
5602 *
5603 */
2d21ac55 5604int
b0d623f7 5605stat64_extended(__unused proc_t p, struct stat64_extended_args *uap, __unused int32_t *retval)
2d21ac55 5606{
fe8ab488
A
5607 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5608 uap->xsecurity, uap->xsecurity_size, 1, UIO_USERSPACE, AT_FDCWD,
5609 0));
2d21ac55 5610}
91447636 5611
b0d623f7
A
5612/*
5613 * lstat_extended: Get file status; does not follow links; with extended security (ACL).
5614 *
5615 * Parameters: p (ignored)
5616 * uap User argument descriptor (see below)
39037602 5617 * retval (ignored)
b0d623f7
A
5618 *
5619 * Indirect: uap->path Path of file to get status from
5620 * uap->ub User buffer (holds file status info)
5621 * uap->xsecurity ACL to get (extended security)
5622 * uap->xsecurity_size Size of ACL
39037602 5623 *
b0d623f7
A
5624 * Returns: 0 Success
5625 * !0 errno value
5626 *
5627 */
2d21ac55 5628int
b0d623f7 5629lstat_extended(__unused proc_t p, struct lstat_extended_args *uap, __unused int32_t *retval)
2d21ac55 5630{
fe8ab488
A
5631 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5632 uap->xsecurity, uap->xsecurity_size, 0, UIO_USERSPACE, AT_FDCWD,
5633 AT_SYMLINK_NOFOLLOW));
91447636
A
5634}
5635
fe8ab488
A
5636/*
5637 * Get file status; this version does not follow links.
5638 */
91447636 5639int
b0d623f7 5640lstat(__unused proc_t p, struct lstat_args *uap, __unused int32_t *retval)
91447636 5641{
fe8ab488
A
5642 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5643 0, 0, 0, UIO_USERSPACE, AT_FDCWD, AT_SYMLINK_NOFOLLOW));
2d21ac55 5644}
b0d623f7 5645
2d21ac55 5646int
b0d623f7 5647lstat64(__unused proc_t p, struct lstat64_args *uap, __unused int32_t *retval)
2d21ac55 5648{
fe8ab488
A
5649 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5650 0, 0, 1, UIO_USERSPACE, AT_FDCWD, AT_SYMLINK_NOFOLLOW));
91447636
A
5651}
5652
b0d623f7
A
5653/*
5654 * lstat64_extended: Get file status; can handle large inode numbers; does not
5655 * follow links; with extended security (ACL).
5656 *
5657 * Parameters: p (ignored)
5658 * uap User argument descriptor (see below)
39037602 5659 * retval (ignored)
b0d623f7
A
5660 *
5661 * Indirect: uap->path Path of file to get status from
5662 * uap->ub User buffer (holds file status info)
5663 * uap->xsecurity ACL to get (extended security)
5664 * uap->xsecurity_size Size of ACL
39037602 5665 *
b0d623f7
A
5666 * Returns: 0 Success
5667 * !0 errno value
5668 *
5669 */
91447636 5670int
b0d623f7 5671lstat64_extended(__unused proc_t p, struct lstat64_extended_args *uap, __unused int32_t *retval)
91447636 5672{
fe8ab488
A
5673 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5674 uap->xsecurity, uap->xsecurity_size, 1, UIO_USERSPACE, AT_FDCWD,
5675 AT_SYMLINK_NOFOLLOW));
5676}
5677
5678int
5679fstatat(__unused proc_t p, struct fstatat_args *uap, __unused int32_t *retval)
5680{
5681 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
5682 return (EINVAL);
5683
5684 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5685 0, 0, 0, UIO_USERSPACE, uap->fd, uap->flag));
5686}
5687
5688int
5689fstatat64(__unused proc_t p, struct fstatat64_args *uap,
5690 __unused int32_t *retval)
5691{
5692 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
5693 return (EINVAL);
5694
5695 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5696 0, 0, 1, UIO_USERSPACE, uap->fd, uap->flag));
91447636
A
5697}
5698
1c79356b 5699/*
91447636 5700 * Get configurable pathname variables.
2d21ac55
A
5701 *
5702 * Returns: 0 Success
5703 * namei:???
5704 * vn_pathconf:???
5705 *
5706 * Notes: Global implementation constants are intended to be
5707 * implemented in this function directly; all other constants
5708 * are per-FS implementation, and therefore must be handled in
5709 * each respective FS, instead.
5710 *
5711 * XXX We implement some things globally right now that should actually be
5712 * XXX per-FS; we will need to deal with this at some point.
1c79356b 5713 */
1c79356b
A
5714/* ARGSUSED */
5715int
b0d623f7 5716pathconf(__unused proc_t p, struct pathconf_args *uap, int32_t *retval)
1c79356b 5717{
1c79356b
A
5718 int error;
5719 struct nameidata nd;
2d21ac55 5720 vfs_context_t ctx = vfs_context_current();
91447636 5721
39037602 5722 NDINIT(&nd, LOOKUP, OP_PATHCONF, FOLLOW | AUDITVNPATH1,
2d21ac55 5723 UIO_USERSPACE, uap->path, ctx);
55e303ae
A
5724 error = namei(&nd);
5725 if (error)
1c79356b 5726 return (error);
1c79356b 5727
2d21ac55 5728 error = vn_pathconf(nd.ni_vp, uap->name, retval, ctx);
1c79356b 5729
91447636
A
5730 vnode_put(nd.ni_vp);
5731 nameidone(&nd);
1c79356b
A
5732 return (error);
5733}
5734
5735/*
5736 * Return target name of a symbolic link.
5737 */
1c79356b 5738/* ARGSUSED */
fe8ab488
A
5739static int
5740readlinkat_internal(vfs_context_t ctx, int fd, user_addr_t path,
5741 enum uio_seg seg, user_addr_t buf, size_t bufsize, enum uio_seg bufseg,
5742 int *retval)
1c79356b 5743{
2d21ac55 5744 vnode_t vp;
91447636 5745 uio_t auio;
1c79356b
A
5746 int error;
5747 struct nameidata nd;
91447636
A
5748 char uio_buf[ UIO_SIZEOF(1) ];
5749
fe8ab488
A
5750 NDINIT(&nd, LOOKUP, OP_READLINK, NOFOLLOW | AUDITVNPATH1,
5751 seg, path, ctx);
5752
5753 error = nameiat(&nd, fd);
55e303ae 5754 if (error)
1c79356b
A
5755 return (error);
5756 vp = nd.ni_vp;
91447636
A
5757
5758 nameidone(&nd);
5759
fe8ab488
A
5760 auio = uio_createwithbuffer(1, 0, bufseg, UIO_READ,
5761 &uio_buf[0], sizeof(uio_buf));
5762 uio_addiov(auio, buf, bufsize);
5763 if (vp->v_type != VLNK) {
1c79356b 5764 error = EINVAL;
fe8ab488 5765 } else {
2d21ac55 5766#if CONFIG_MACF
fe8ab488 5767 error = mac_vnode_check_readlink(ctx, vp);
2d21ac55
A
5768#endif
5769 if (error == 0)
fe8ab488
A
5770 error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA,
5771 ctx);
91447636 5772 if (error == 0)
2d21ac55 5773 error = VNOP_READLINK(vp, auio, ctx);
91447636
A
5774 }
5775 vnode_put(vp);
b0d623f7 5776
fe8ab488 5777 *retval = bufsize - (int)uio_resid(auio);
1c79356b
A
5778 return (error);
5779}
5780
fe8ab488
A
5781int
5782readlink(proc_t p, struct readlink_args *uap, int32_t *retval)
5783{
5784 enum uio_seg procseg;
5785
5786 procseg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
5787 return (readlinkat_internal(vfs_context_current(), AT_FDCWD,
5788 CAST_USER_ADDR_T(uap->path), procseg, CAST_USER_ADDR_T(uap->buf),
5789 uap->count, procseg, retval));
5790}
5791
5792int
5793readlinkat(proc_t p, struct readlinkat_args *uap, int32_t *retval)
5794{
5795 enum uio_seg procseg;
5796
5797 procseg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
5798 return (readlinkat_internal(vfs_context_current(), uap->fd, uap->path,
5799 procseg, uap->buf, uap->bufsize, procseg, retval));
5800}
5801
5802/*
5803 * Change file flags.
813fb2f6
A
5804 *
5805 * NOTE: this will vnode_put() `vp'
91447636
A
5806 */
5807static int
5808chflags1(vnode_t vp, int flags, vfs_context_t ctx)
5809{
5810 struct vnode_attr va;
5811 kauth_action_t action;
5812 int error;
5813
5814 VATTR_INIT(&va);
5815 VATTR_SET(&va, va_flags, flags);
5816
2d21ac55
A
5817#if CONFIG_MACF
5818 error = mac_vnode_check_setflags(ctx, vp, flags);
5819 if (error)
5820 goto out;
5821#endif
5822
91447636
A
5823 /* request authorisation, disregard immutability */
5824 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
5825 goto out;
5826 /*
5827 * Request that the auth layer disregard those file flags it's allowed to when
5828 * authorizing this operation; we need to do this in order to be able to
5829 * clear immutable flags.
5830 */
5831 if (action && ((error = vnode_authorize(vp, NULL, action | KAUTH_VNODE_NOIMMUTABLE, ctx)) != 0))
5832 goto out;
5833 error = vnode_setattr(vp, &va, ctx);
5834
39037602
A
5835#if CONFIG_MACF
5836 if (error == 0)
5837 mac_vnode_notify_setflags(ctx, vp, flags);
5838#endif
5839
2d21ac55
A
5840 if ((error == 0) && !VATTR_IS_SUPPORTED(&va, va_flags)) {
5841 error = ENOTSUP;
5842 }
91447636
A
5843out:
5844 vnode_put(vp);
5845 return(error);
5846}
5847
1c79356b
A
5848/*
5849 * Change flags of a file given a path name.
5850 */
1c79356b
A
5851/* ARGSUSED */
5852int
b0d623f7 5853chflags(__unused proc_t p, struct chflags_args *uap, __unused int32_t *retval)
1c79356b 5854{
2d21ac55
A
5855 vnode_t vp;
5856 vfs_context_t ctx = vfs_context_current();
1c79356b
A
5857 int error;
5858 struct nameidata nd;
5859
55e303ae 5860 AUDIT_ARG(fflags, uap->flags);
39037602 5861 NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
2d21ac55 5862 UIO_USERSPACE, uap->path, ctx);
55e303ae
A
5863 error = namei(&nd);
5864 if (error)
1c79356b
A
5865 return (error);
5866 vp = nd.ni_vp;
91447636
A
5867 nameidone(&nd);
5868
813fb2f6 5869 /* we don't vnode_put() here because chflags1 does internally */
2d21ac55 5870 error = chflags1(vp, uap->flags, ctx);
91447636
A
5871
5872 return(error);
1c79356b
A
5873}
5874
5875/*
5876 * Change flags of a file given a file descriptor.
5877 */
1c79356b
A
5878/* ARGSUSED */
5879int
b0d623f7 5880fchflags(__unused proc_t p, struct fchflags_args *uap, __unused int32_t *retval)
1c79356b 5881{
2d21ac55 5882 vnode_t vp;
1c79356b
A
5883 int error;
5884
55e303ae
A
5885 AUDIT_ARG(fd, uap->fd);
5886 AUDIT_ARG(fflags, uap->flags);
91447636 5887 if ( (error = file_vnode(uap->fd, &vp)) )
1c79356b 5888 return (error);
55e303ae 5889
91447636
A
5890 if ((error = vnode_getwithref(vp))) {
5891 file_drop(uap->fd);
5892 return(error);
5893 }
e5568f75
A
5894
5895 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5896
813fb2f6 5897 /* we don't vnode_put() here because chflags1 does internally */
2d21ac55 5898 error = chflags1(vp, uap->flags, vfs_context_current());
91447636
A
5899
5900 file_drop(uap->fd);
5901 return (error);
5902}
5903
5904/*
5905 * Change security information on a filesystem object.
2d21ac55
A
5906 *
5907 * Returns: 0 Success
5908 * EPERM Operation not permitted
5909 * vnode_authattr:??? [anything vnode_authattr can return]
5910 * vnode_authorize:??? [anything vnode_authorize can return]
5911 * vnode_setattr:??? [anything vnode_setattr can return]
5912 *
5913 * Notes: If vnode_authattr or vnode_authorize return EACCES, it will be
5914 * translated to EPERM before being returned.
91447636
A
5915 */
5916static int
fe8ab488 5917chmod_vnode(vfs_context_t ctx, vnode_t vp, struct vnode_attr *vap)
91447636
A
5918{
5919 kauth_action_t action;
5920 int error;
39037602 5921
b0d623f7
A
5922 AUDIT_ARG(mode, vap->va_mode);
5923 /* XXX audit new args */
91447636 5924
2d21ac55
A
5925#if NAMEDSTREAMS
5926 /* chmod calls are not allowed for resource forks. */
5927 if (vp->v_flag & VISNAMEDSTREAM) {
5928 return (EPERM);
5929 }
5930#endif
5931
5932#if CONFIG_MACF
316670eb
A
5933 if (VATTR_IS_ACTIVE(vap, va_mode) &&
5934 (error = mac_vnode_check_setmode(ctx, vp, (mode_t)vap->va_mode)) != 0)
2d21ac55 5935 return (error);
39037602
A
5936
5937 if (VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid)) {
5938 if ((error = mac_vnode_check_setowner(ctx, vp,
5939 VATTR_IS_ACTIVE(vap, va_uid) ? vap->va_uid : -1,
5940 VATTR_IS_ACTIVE(vap, va_gid) ? vap->va_gid : -1)))
5941 return (error);
5942 }
5943
5944 if (VATTR_IS_ACTIVE(vap, va_acl) &&
5945 (error = mac_vnode_check_setacl(ctx, vp, vap->va_acl)))
5946 return (error);
2d21ac55
A
5947#endif
5948
91447636
A
5949 /* make sure that the caller is allowed to set this security information */
5950 if (((error = vnode_authattr(vp, vap, &action, ctx)) != 0) ||
5951 ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
5952 if (error == EACCES)
5953 error = EPERM;
5954 return(error);
5955 }
39037602
A
5956
5957 if ((error = vnode_setattr(vp, vap, ctx)) != 0)
5958 return (error);
5959
5960#if CONFIG_MACF
5961 if (VATTR_IS_ACTIVE(vap, va_mode))
5962 mac_vnode_notify_setmode(ctx, vp, (mode_t)vap->va_mode);
5963
5964 if (VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid))
5965 mac_vnode_notify_setowner(ctx, vp,
5966 VATTR_IS_ACTIVE(vap, va_uid) ? vap->va_uid : -1,
5967 VATTR_IS_ACTIVE(vap, va_gid) ? vap->va_gid : -1);
5968
5969 if (VATTR_IS_ACTIVE(vap, va_acl))
5970 mac_vnode_notify_setacl(ctx, vp, vap->va_acl);
5971#endif
91447636 5972
1c79356b
A
5973 return (error);
5974}
5975
91447636 5976
1c79356b 5977/*
b0d623f7 5978 * Change mode of a file given a path name.
2d21ac55
A
5979 *
5980 * Returns: 0 Success
5981 * namei:??? [anything namei can return]
fe8ab488 5982 * chmod_vnode:??? [anything chmod_vnode can return]
1c79356b 5983 */
91447636 5984static int
fe8ab488
A
5985chmodat(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap,
5986 int fd, int flag, enum uio_seg segflg)
91447636
A
5987{
5988 struct nameidata nd;
fe8ab488 5989 int follow, error;
91447636 5990
fe8ab488
A
5991 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
5992 NDINIT(&nd, LOOKUP, OP_SETATTR, follow | AUDITVNPATH1,
5993 segflg, path, ctx);
5994 if ((error = nameiat(&nd, fd)))
91447636 5995 return (error);
fe8ab488 5996 error = chmod_vnode(ctx, nd.ni_vp, vap);
91447636
A
5997 vnode_put(nd.ni_vp);
5998 nameidone(&nd);
5999 return(error);
6000}
6001
0c530ab8 6002/*
39037602 6003 * chmod_extended: Change the mode of a file given a path name; with extended
b0d623f7 6004 * argument list (including extended security (ACL)).
0c530ab8
A
6005 *
6006 * Parameters: p Process requesting the open
6007 * uap User argument descriptor (see below)
6008 * retval (ignored)
6009 *
6010 * Indirect: uap->path Path to object (same as 'chmod')
6011 * uap->uid UID to set
6012 * uap->gid GID to set
6013 * uap->mode File mode to set (same as 'chmod')
6014 * uap->xsecurity ACL to set (or delete)
6015 *
6016 * Returns: 0 Success
6017 * !0 errno value
6018 *
6019 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
6020 *
6021 * XXX: We should enummerate the possible errno values here, and where
6022 * in the code they originated.
6023 */
1c79356b 6024int
b0d623f7 6025chmod_extended(__unused proc_t p, struct chmod_extended_args *uap, __unused int32_t *retval)
1c79356b 6026{
1c79356b 6027 int error;
91447636
A
6028 struct vnode_attr va;
6029 kauth_filesec_t xsecdst;
6030
b0d623f7
A
6031 AUDIT_ARG(owner, uap->uid, uap->gid);
6032
91447636
A
6033 VATTR_INIT(&va);
6034 if (uap->mode != -1)
6035 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
6036 if (uap->uid != KAUTH_UID_NONE)
6037 VATTR_SET(&va, va_uid, uap->uid);
6038 if (uap->gid != KAUTH_GID_NONE)
6039 VATTR_SET(&va, va_gid, uap->gid);
6040
6041 xsecdst = NULL;
6042 switch(uap->xsecurity) {
6043 /* explicit remove request */
6044 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
6045 VATTR_SET(&va, va_acl, NULL);
6046 break;
6047 /* not being set */
6048 case USER_ADDR_NULL:
6049 break;
6050 default:
6051 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
6052 return(error);
6053 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
6054 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va.va_acl->acl_entrycount);
6055 }
1c79356b 6056
fe8ab488
A
6057 error = chmodat(vfs_context_current(), uap->path, &va, AT_FDCWD, 0,
6058 UIO_USERSPACE);
55e303ae 6059
91447636
A
6060 if (xsecdst != NULL)
6061 kauth_filesec_free(xsecdst);
6062 return(error);
6063}
4a249263 6064
2d21ac55
A
6065/*
6066 * Returns: 0 Success
fe8ab488 6067 * chmodat:??? [anything chmodat can return]
2d21ac55 6068 */
fe8ab488
A
6069static int
6070fchmodat_internal(vfs_context_t ctx, user_addr_t path, int mode, int fd,
6071 int flag, enum uio_seg segflg)
91447636 6072{
91447636
A
6073 struct vnode_attr va;
6074
6075 VATTR_INIT(&va);
fe8ab488
A
6076 VATTR_SET(&va, va_mode, mode & ALLPERMS);
6077
6078 return (chmodat(ctx, path, &va, fd, flag, segflg));
6079}
6080
6081int
6082chmod(__unused proc_t p, struct chmod_args *uap, __unused int32_t *retval)
6083{
6084 return (fchmodat_internal(vfs_context_current(), uap->path, uap->mode,
6085 AT_FDCWD, 0, UIO_USERSPACE));
6086}
91447636 6087
fe8ab488
A
6088int
6089fchmodat(__unused proc_t p, struct fchmodat_args *uap, __unused int32_t *retval)
6090{
6091 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
6092 return (EINVAL);
6093
6094 return (fchmodat_internal(vfs_context_current(), uap->path, uap->mode,
6095 uap->fd, uap->flag, UIO_USERSPACE));
1c79356b
A
6096}
6097
6098/*
6099 * Change mode of a file given a file descriptor.
6100 */
91447636 6101static int
2d21ac55 6102fchmod1(__unused proc_t p, int fd, struct vnode_attr *vap)
1c79356b 6103{
2d21ac55 6104 vnode_t vp;
1c79356b 6105 int error;
55e303ae 6106
91447636 6107 AUDIT_ARG(fd, fd);
55e303ae 6108
91447636
A
6109 if ((error = file_vnode(fd, &vp)) != 0)
6110 return (error);
6111 if ((error = vnode_getwithref(vp)) != 0) {
6112 file_drop(fd);
6113 return(error);
6114 }
55e303ae
A
6115 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6116
fe8ab488 6117 error = chmod_vnode(vfs_context_current(), vp, vap);
91447636
A
6118 (void)vnode_put(vp);
6119 file_drop(fd);
55e303ae 6120
1c79356b
A
6121 return (error);
6122}
6123
b0d623f7
A
6124/*
6125 * fchmod_extended: Change mode of a file given a file descriptor; with
6126 * extended argument list (including extended security (ACL)).
6127 *
6128 * Parameters: p Process requesting to change file mode
6129 * uap User argument descriptor (see below)
39037602 6130 * retval (ignored)
b0d623f7
A
6131 *
6132 * Indirect: uap->mode File mode to set (same as 'chmod')
6133 * uap->uid UID to set
6134 * uap->gid GID to set
6135 * uap->xsecurity ACL to set (or delete)
6136 * uap->fd File descriptor of file to change mode
39037602 6137 *
b0d623f7
A
6138 * Returns: 0 Success
6139 * !0 errno value
6140 *
6141 */
91447636 6142int
b0d623f7 6143fchmod_extended(proc_t p, struct fchmod_extended_args *uap, __unused int32_t *retval)
91447636
A
6144{
6145 int error;
6146 struct vnode_attr va;
6147 kauth_filesec_t xsecdst;
6148
b0d623f7
A
6149 AUDIT_ARG(owner, uap->uid, uap->gid);
6150
91447636
A
6151 VATTR_INIT(&va);
6152 if (uap->mode != -1)
6153 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
6154 if (uap->uid != KAUTH_UID_NONE)
6155 VATTR_SET(&va, va_uid, uap->uid);
6156 if (uap->gid != KAUTH_GID_NONE)
6157 VATTR_SET(&va, va_gid, uap->gid);
6158
6159 xsecdst = NULL;
6160 switch(uap->xsecurity) {
6161 case USER_ADDR_NULL:
6162 VATTR_SET(&va, va_acl, NULL);
6163 break;
39236c6e
A
6164 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
6165 VATTR_SET(&va, va_acl, NULL);
6166 break;
6167 /* not being set */
91447636
A
6168 case CAST_USER_ADDR_T(-1):
6169 break;
6170 default:
6171 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
6172 return(error);
6173 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
6174 }
6175
6176 error = fchmod1(p, uap->fd, &va);
6177
39037602 6178
91447636
A
6179 switch(uap->xsecurity) {
6180 case USER_ADDR_NULL:
6181 case CAST_USER_ADDR_T(-1):
6182 break;
6183 default:
6184 if (xsecdst != NULL)
6185 kauth_filesec_free(xsecdst);
6186 }
6187 return(error);
6188}
6189
6190int
b0d623f7 6191fchmod(proc_t p, struct fchmod_args *uap, __unused int32_t *retval)
91447636
A
6192{
6193 struct vnode_attr va;
6194
6195 VATTR_INIT(&va);
6196 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
6197
6198 return(fchmod1(p, uap->fd, &va));
6199}
6200
6201
1c79356b
A
6202/*
6203 * Set ownership given a path name.
6204 */
1c79356b 6205/* ARGSUSED */
91447636 6206static int
fe8ab488
A
6207fchownat_internal(vfs_context_t ctx, int fd, user_addr_t path, uid_t uid,
6208 gid_t gid, int flag, enum uio_seg segflg)
1c79356b 6209{
2d21ac55 6210 vnode_t vp;
91447636 6211 struct vnode_attr va;
1c79356b
A
6212 int error;
6213 struct nameidata nd;
fe8ab488 6214 int follow;
91447636 6215 kauth_action_t action;
1c79356b 6216
fe8ab488 6217 AUDIT_ARG(owner, uid, gid);
55e303ae 6218
fe8ab488
A
6219 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
6220 NDINIT(&nd, LOOKUP, OP_SETATTR, follow | AUDITVNPATH1, segflg,
6221 path, ctx);
6222 error = nameiat(&nd, fd);
55e303ae 6223 if (error)
1c79356b
A
6224 return (error);
6225 vp = nd.ni_vp;
6226
91447636
A
6227 nameidone(&nd);
6228
91447636 6229 VATTR_INIT(&va);
fe8ab488
A
6230 if (uid != (uid_t)VNOVAL)
6231 VATTR_SET(&va, va_uid, uid);
6232 if (gid != (gid_t)VNOVAL)
6233 VATTR_SET(&va, va_gid, gid);
91447636 6234
2d21ac55 6235#if CONFIG_MACF
fe8ab488 6236 error = mac_vnode_check_setowner(ctx, vp, uid, gid);
2d21ac55
A
6237 if (error)
6238 goto out;
6239#endif
6240
91447636
A
6241 /* preflight and authorize attribute changes */
6242 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
6243 goto out;
6244 if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
6245 goto out;
6246 error = vnode_setattr(vp, &va, ctx);
39037602
A
6247
6248#if CONFIG_MACF
6249 if (error == 0)
6250 mac_vnode_notify_setowner(ctx, vp, uid, gid);
6251#endif
6252
91447636
A
6253out:
6254 /*
6255 * EACCES is only allowed from namei(); permissions failure should
6256 * return EPERM, so we need to translate the error code.
6257 */
6258 if (error == EACCES)
6259 error = EPERM;
fe8ab488 6260
91447636 6261 vnode_put(vp);
1c79356b
A
6262 return (error);
6263}
6264
91447636 6265int
fe8ab488 6266chown(__unused proc_t p, struct chown_args *uap, __unused int32_t *retval)
91447636 6267{
fe8ab488
A
6268 return (fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path,
6269 uap->uid, uap->gid, 0, UIO_USERSPACE));
91447636
A
6270}
6271
6272int
fe8ab488 6273lchown(__unused proc_t p, struct lchown_args *uap, __unused int32_t *retval)
91447636 6274{
fe8ab488
A
6275 return (fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path,
6276 uap->owner, uap->group, AT_SYMLINK_NOFOLLOW, UIO_USERSPACE));
6277}
6278
6279int
6280fchownat(__unused proc_t p, struct fchownat_args *uap, __unused int32_t *retval)
6281{
6282 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
6283 return (EINVAL);
6284
6285 return (fchownat_internal(vfs_context_current(), uap->fd, uap->path,
6286 uap->uid, uap->gid, uap->flag, UIO_USERSPACE));
91447636
A
6287}
6288
1c79356b
A
6289/*
6290 * Set ownership given a file descriptor.
6291 */
1c79356b
A
6292/* ARGSUSED */
6293int
b0d623f7 6294fchown(__unused proc_t p, struct fchown_args *uap, __unused int32_t *retval)
1c79356b 6295{
91447636 6296 struct vnode_attr va;
2d21ac55
A
6297 vfs_context_t ctx = vfs_context_current();
6298 vnode_t vp;
1c79356b 6299 int error;
91447636 6300 kauth_action_t action;
1c79356b 6301
55e303ae
A
6302 AUDIT_ARG(owner, uap->uid, uap->gid);
6303 AUDIT_ARG(fd, uap->fd);
6304
91447636 6305 if ( (error = file_vnode(uap->fd, &vp)) )
1c79356b 6306 return (error);
55e303ae 6307
91447636
A
6308 if ( (error = vnode_getwithref(vp)) ) {
6309 file_drop(uap->fd);
6310 return(error);
6311 }
55e303ae
A
6312 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6313
91447636
A
6314 VATTR_INIT(&va);
6315 if (uap->uid != VNOVAL)
6316 VATTR_SET(&va, va_uid, uap->uid);
6317 if (uap->gid != VNOVAL)
6318 VATTR_SET(&va, va_gid, uap->gid);
6319
2d21ac55
A
6320#if NAMEDSTREAMS
6321 /* chown calls are not allowed for resource forks. */
6322 if (vp->v_flag & VISNAMEDSTREAM) {
6323 error = EPERM;
6324 goto out;
6325 }
6326#endif
6327
6328#if CONFIG_MACF
6329 error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
6330 if (error)
6331 goto out;
6332#endif
91447636
A
6333
6334 /* preflight and authorize attribute changes */
2d21ac55 6335 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
91447636 6336 goto out;
2d21ac55 6337 if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
91447636
A
6338 if (error == EACCES)
6339 error = EPERM;
6340 goto out;
6341 }
2d21ac55 6342 error = vnode_setattr(vp, &va, ctx);
4a249263 6343
39037602
A
6344#if CONFIG_MACF
6345 if (error == 0)
6346 mac_vnode_notify_setowner(ctx, vp, uap->uid, uap->gid);
6347#endif
6348
91447636
A
6349out:
6350 (void)vnode_put(vp);
6351 file_drop(uap->fd);
1c79356b
A
6352 return (error);
6353}
6354
9bccf70c 6355static int
2d21ac55 6356getutimes(user_addr_t usrtvp, struct timespec *tsp)
9bccf70c 6357{
9bccf70c
A
6358 int error;
6359
91447636
A
6360 if (usrtvp == USER_ADDR_NULL) {
6361 struct timeval old_tv;
6362 /* XXX Y2038 bug because of microtime argument */
6363 microtime(&old_tv);
6364 TIMEVAL_TO_TIMESPEC(&old_tv, &tsp[0]);
9bccf70c
A
6365 tsp[1] = tsp[0];
6366 } else {
91447636 6367 if (IS_64BIT_PROCESS(current_proc())) {
b0d623f7 6368 struct user64_timeval tv[2];
91447636 6369 error = copyin(usrtvp, (void *)tv, sizeof(tv));
b0d623f7
A
6370 if (error)
6371 return (error);
6372 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
6373 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
91447636 6374 } else {
b0d623f7
A
6375 struct user32_timeval tv[2];
6376 error = copyin(usrtvp, (void *)tv, sizeof(tv));
6377 if (error)
6378 return (error);
6379 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
6380 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
91447636 6381 }
9bccf70c
A
6382 }
6383 return 0;
6384}
6385
6386static int
2d21ac55 6387setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts,
91447636 6388 int nullflag)
9bccf70c
A
6389{
6390 int error;
91447636
A
6391 struct vnode_attr va;
6392 kauth_action_t action;
e5568f75
A
6393
6394 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6395
91447636
A
6396 VATTR_INIT(&va);
6397 VATTR_SET(&va, va_access_time, ts[0]);
6398 VATTR_SET(&va, va_modify_time, ts[1]);
9bccf70c 6399 if (nullflag)
91447636
A
6400 va.va_vaflags |= VA_UTIMES_NULL;
6401
2d21ac55
A
6402#if NAMEDSTREAMS
6403 /* utimes calls are not allowed for resource forks. */
6404 if (vp->v_flag & VISNAMEDSTREAM) {
6405 error = EPERM;
6406 goto out;
6407 }
6408#endif
6409
6410#if CONFIG_MACF
6411 error = mac_vnode_check_setutimes(ctx, vp, ts[0], ts[1]);
6412 if (error)
6413 goto out;
6414#endif
6415 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
6416 if (!nullflag && error == EACCES)
6417 error = EPERM;
91447636 6418 goto out;
2d21ac55
A
6419 }
6420
91447636 6421 /* since we may not need to auth anything, check here */
2d21ac55
A
6422 if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
6423 if (!nullflag && error == EACCES)
6424 error = EPERM;
91447636 6425 goto out;
2d21ac55 6426 }
91447636 6427 error = vnode_setattr(vp, &va, ctx);
4a249263 6428
39037602
A
6429#if CONFIG_MACF
6430 if (error == 0)
6431 mac_vnode_notify_setutimes(ctx, vp, ts[0], ts[1]);
6432#endif
6433
9bccf70c
A
6434out:
6435 return error;
6436}
6437
1c79356b
A
6438/*
6439 * Set the access and modification times of a file.
6440 */
1c79356b
A
6441/* ARGSUSED */
6442int
b0d623f7 6443utimes(__unused proc_t p, struct utimes_args *uap, __unused int32_t *retval)
1c79356b 6444{
9bccf70c 6445 struct timespec ts[2];
91447636 6446 user_addr_t usrtvp;
1c79356b
A
6447 int error;
6448 struct nameidata nd;
2d21ac55 6449 vfs_context_t ctx = vfs_context_current();
1c79356b 6450
2d21ac55 6451 /*
39037602 6452 * AUDIT: Needed to change the order of operations to do the
55e303ae
A
6453 * name lookup first because auditing wants the path.
6454 */
39037602 6455 NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
2d21ac55 6456 UIO_USERSPACE, uap->path, ctx);
55e303ae
A
6457 error = namei(&nd);
6458 if (error)
9bccf70c 6459 return (error);
91447636 6460 nameidone(&nd);
55e303ae 6461
91447636
A
6462 /*
6463 * Fetch the user-supplied time. If usrtvp is USER_ADDR_NULL, we fetch
6464 * the current time instead.
6465 */
55e303ae 6466 usrtvp = uap->tptr;
91447636
A
6467 if ((error = getutimes(usrtvp, ts)) != 0)
6468 goto out;
6469
2d21ac55 6470 error = setutimes(ctx, nd.ni_vp, ts, usrtvp == USER_ADDR_NULL);
91447636
A
6471
6472out:
6473 vnode_put(nd.ni_vp);
1c79356b
A
6474 return (error);
6475}
6476
9bccf70c
A
6477/*
6478 * Set the access and modification times of a file.
6479 */
9bccf70c
A
6480/* ARGSUSED */
6481int
b0d623f7 6482futimes(__unused proc_t p, struct futimes_args *uap, __unused int32_t *retval)
9bccf70c
A
6483{
6484 struct timespec ts[2];
2d21ac55 6485 vnode_t vp;
91447636 6486 user_addr_t usrtvp;
9bccf70c
A
6487 int error;
6488
55e303ae 6489 AUDIT_ARG(fd, uap->fd);
9bccf70c
A
6490 usrtvp = uap->tptr;
6491 if ((error = getutimes(usrtvp, ts)) != 0)
6492 return (error);
91447636 6493 if ((error = file_vnode(uap->fd, &vp)) != 0)
9bccf70c 6494 return (error);
91447636
A
6495 if((error = vnode_getwithref(vp))) {
6496 file_drop(uap->fd);
6497 return(error);
6498 }
55e303ae 6499
2d21ac55 6500 error = setutimes(vfs_context_current(), vp, ts, usrtvp == 0);
91447636
A
6501 vnode_put(vp);
6502 file_drop(uap->fd);
6503 return(error);
9bccf70c
A
6504}
6505
1c79356b
A
6506/*
6507 * Truncate a file given its path name.
6508 */
1c79356b
A
6509/* ARGSUSED */
6510int
b0d623f7 6511truncate(__unused proc_t p, struct truncate_args *uap, __unused int32_t *retval)
1c79356b 6512{
2d21ac55 6513 vnode_t vp;
91447636 6514 struct vnode_attr va;
2d21ac55 6515 vfs_context_t ctx = vfs_context_current();
1c79356b
A
6516 int error;
6517 struct nameidata nd;
91447636
A
6518 kauth_action_t action;
6519
0b4e3aa0
A
6520 if (uap->length < 0)
6521 return(EINVAL);
39037602 6522 NDINIT(&nd, LOOKUP, OP_TRUNCATE, FOLLOW | AUDITVNPATH1,
2d21ac55 6523 UIO_USERSPACE, uap->path, ctx);
91447636 6524 if ((error = namei(&nd)))
1c79356b
A
6525 return (error);
6526 vp = nd.ni_vp;
91447636
A
6527
6528 nameidone(&nd);
6529
6530 VATTR_INIT(&va);
6531 VATTR_SET(&va, va_data_size, uap->length);
2d21ac55
A
6532
6533#if CONFIG_MACF
6534 error = mac_vnode_check_truncate(ctx, NOCRED, vp);
6535 if (error)
6536 goto out;
6537#endif
6538
6539 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
91447636 6540 goto out;
2d21ac55 6541 if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
91447636 6542 goto out;
2d21ac55 6543 error = vnode_setattr(vp, &va, ctx);
39037602
A
6544
6545#if CONFIG_MACF
6546 if (error == 0)
6547 mac_vnode_notify_truncate(ctx, NOCRED, vp);
6548#endif
6549
91447636
A
6550out:
6551 vnode_put(vp);
1c79356b
A
6552 return (error);
6553}
6554
6555/*
6556 * Truncate a file given a file descriptor.
6557 */
1c79356b
A
6558/* ARGSUSED */
6559int
b0d623f7 6560ftruncate(proc_t p, struct ftruncate_args *uap, int32_t *retval)
1c79356b 6561{
2d21ac55 6562 vfs_context_t ctx = vfs_context_current();
91447636 6563 struct vnode_attr va;
2d21ac55 6564 vnode_t vp;
91447636
A
6565 struct fileproc *fp;
6566 int error ;
6567 int fd = uap->fd;
1c79356b 6568
55e303ae 6569 AUDIT_ARG(fd, uap->fd);
0b4e3aa0
A
6570 if (uap->length < 0)
6571 return(EINVAL);
39037602 6572
91447636
A
6573 if ( (error = fp_lookup(p,fd,&fp,0)) ) {
6574 return(error);
6575 }
1c79356b 6576
39236c6e
A
6577 switch (FILEGLOB_DTYPE(fp->f_fglob)) {
6578 case DTYPE_PSXSHM:
91447636
A
6579 error = pshm_truncate(p, fp, uap->fd, uap->length, retval);
6580 goto out;
39236c6e
A
6581 case DTYPE_VNODE:
6582 break;
6583 default:
91447636
A
6584 error = EINVAL;
6585 goto out;
1c79356b 6586 }
1c79356b 6587
2d21ac55 6588 vp = (vnode_t)fp->f_fglob->fg_data;
e5568f75 6589
91447636
A
6590 if ((fp->f_fglob->fg_flag & FWRITE) == 0) {
6591 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
6592 error = EINVAL;
6593 goto out;
1c79356b 6594 }
1c79356b 6595
91447636
A
6596 if ((error = vnode_getwithref(vp)) != 0) {
6597 goto out;
6598 }
1c79356b 6599
91447636 6600 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
1c79356b 6601
2d21ac55
A
6602#if CONFIG_MACF
6603 error = mac_vnode_check_truncate(ctx,
6604 fp->f_fglob->fg_cred, vp);
6605 if (error) {
6606 (void)vnode_put(vp);
6607 goto out;
6608 }
6609#endif
91447636
A
6610 VATTR_INIT(&va);
6611 VATTR_SET(&va, va_data_size, uap->length);
2d21ac55 6612 error = vnode_setattr(vp, &va, ctx);
39037602
A
6613
6614#if CONFIG_MACF
6615 if (error == 0)
6616 mac_vnode_notify_truncate(ctx, fp->f_fglob->fg_cred, vp);
6617#endif
6618
91447636
A
6619 (void)vnode_put(vp);
6620out:
6621 file_drop(fd);
6622 return (error);
1c79356b 6623}
91447636 6624
1c79356b
A
6625
6626/*
b0d623f7 6627 * Sync an open file with synchronized I/O _file_ integrity completion
1c79356b 6628 */
1c79356b
A
6629/* ARGSUSED */
6630int
b0d623f7 6631fsync(proc_t p, struct fsync_args *uap, __unused int32_t *retval)
1c79356b 6632{
2d21ac55 6633 __pthread_testcancel(1);
b0d623f7
A
6634 return(fsync_common(p, uap, MNT_WAIT));
6635}
6636
6637
6638/*
6639 * Sync an open file with synchronized I/O _file_ integrity completion
6640 *
6641 * Notes: This is a legacy support function that does not test for
6642 * thread cancellation points.
6643 */
6644/* ARGSUSED */
39037602 6645int
b0d623f7
A
6646fsync_nocancel(proc_t p, struct fsync_nocancel_args *uap, __unused int32_t *retval)
6647{
6648 return(fsync_common(p, (struct fsync_args *)uap, MNT_WAIT));
2d21ac55
A
6649}
6650
b0d623f7
A
6651
6652/*
6653 * Sync an open file with synchronized I/O _data_ integrity completion
6654 */
6655/* ARGSUSED */
2d21ac55 6656int
b0d623f7
A
6657fdatasync(proc_t p, struct fdatasync_args *uap, __unused int32_t *retval)
6658{
6659 __pthread_testcancel(1);
6660 return(fsync_common(p, (struct fsync_args *)uap, MNT_DWAIT));
6661}
6662
6663
6664/*
6665 * fsync_common
6666 *
6667 * Common fsync code to support both synchronized I/O file integrity completion
6668 * (normal fsync) and synchronized I/O data integrity completion (fdatasync).
6669 *
6670 * If 'flags' is MNT_DWAIT, the caller is requesting data integrity, which
6671 * will only guarantee that the file data contents are retrievable. If
6672 * 'flags' is MNT_WAIT, the caller is rewuesting file integrity, which also
6673 * includes additional metadata unnecessary for retrieving the file data
6674 * contents, such as atime, mtime, ctime, etc., also be committed to stable
6675 * storage.
6676 *
6677 * Parameters: p The process
6678 * uap->fd The descriptor to synchronize
6679 * flags The data integrity flags
6680 *
6681 * Returns: int Success
6682 * fp_getfvp:EBADF Bad file descriptor
6683 * fp_getfvp:ENOTSUP fd does not refer to a vnode
6684 * VNOP_FSYNC:??? unspecified
6685 *
6686 * Notes: We use struct fsync_args because it is a short name, and all
6687 * caller argument structures are otherwise identical.
6688 */
6689static int
6690fsync_common(proc_t p, struct fsync_args *uap, int flags)
2d21ac55
A
6691{
6692 vnode_t vp;
91447636 6693 struct fileproc *fp;
2d21ac55 6694 vfs_context_t ctx = vfs_context_current();
1c79356b
A
6695 int error;
6696
b0d623f7
A
6697 AUDIT_ARG(fd, uap->fd);
6698
91447636 6699 if ( (error = fp_getfvp(p, uap->fd, &fp, &vp)) )
1c79356b 6700 return (error);
91447636
A
6701 if ( (error = vnode_getwithref(vp)) ) {
6702 file_drop(uap->fd);
6703 return(error);
6704 }
91447636 6705
b0d623f7
A
6706 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6707
6708 error = VNOP_FSYNC(vp, flags, ctx);
2d21ac55
A
6709
6710#if NAMEDRSRCFORK
6711 /* Sync resource fork shadow file if necessary. */
6712 if ((error == 0) &&
39037602 6713 (vp->v_flag & VISNAMEDSTREAM) &&
2d21ac55 6714 (vp->v_parent != NULLVP) &&
b0d623f7 6715 vnode_isshadow(vp) &&
2d21ac55
A
6716 (fp->f_flags & FP_WRITTEN)) {
6717 (void) vnode_flushnamedstream(vp->v_parent, vp, ctx);
6718 }
6719#endif
91447636
A
6720
6721 (void)vnode_put(vp);
6722 file_drop(uap->fd);
1c79356b
A
6723 return (error);
6724}
6725
6726/*
39037602 6727 * Duplicate files. Source must be a file, target must be a file or
1c79356b 6728 * must not exist.
91447636
A
6729 *
6730 * XXX Copyfile authorisation checking is woefully inadequate, and will not
6731 * perform inheritance correctly.
1c79356b 6732 */
1c79356b
A
6733/* ARGSUSED */
6734int
b0d623f7 6735copyfile(__unused proc_t p, struct copyfile_args *uap, __unused int32_t *retval)
1c79356b 6736{
91447636 6737 vnode_t tvp, fvp, tdvp, sdvp;
1c79356b
A
6738 struct nameidata fromnd, tond;
6739 int error;
2d21ac55 6740 vfs_context_t ctx = vfs_context_current();
39037602
A
6741#if CONFIG_MACF
6742 struct filedesc *fdp = (vfs_context_proc(ctx))->p_fd;
6743 struct vnode_attr va;
6744#endif
55e303ae
A
6745
6746 /* Check that the flags are valid. */
1c79356b
A
6747
6748 if (uap->flags & ~CPF_MASK) {
55e303ae
A
6749 return(EINVAL);
6750 }
1c79356b 6751
4bd07ac2 6752 NDINIT(&fromnd, LOOKUP, OP_COPYFILE, AUDITVNPATH1,
2d21ac55 6753 UIO_USERSPACE, uap->from, ctx);
91447636 6754 if ((error = namei(&fromnd)))
1c79356b
A
6755 return (error);
6756 fvp = fromnd.ni_vp;
6757
6d2010ae
A
6758 NDINIT(&tond, CREATE, OP_LINK,
6759 LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNPATH2 | CN_NBMOUNTLOOK,
6760 UIO_USERSPACE, uap->to, ctx);
91447636 6761 if ((error = namei(&tond))) {
1c79356b
A
6762 goto out1;
6763 }
6764 tdvp = tond.ni_dvp;
6765 tvp = tond.ni_vp;
91447636 6766
1c79356b
A
6767 if (tvp != NULL) {
6768 if (!(uap->flags & CPF_OVERWRITE)) {
6769 error = EEXIST;
6770 goto out;
6771 }
6772 }
39037602 6773
1c79356b
A
6774 if (fvp->v_type == VDIR || (tvp && tvp->v_type == VDIR)) {
6775 error = EISDIR;
6776 goto out;
6777 }
6778
39037602
A
6779 /* This calls existing MAC hooks for open */
6780 if ((error = vn_authorize_open_existing(fvp, &fromnd.ni_cnd, FREAD, ctx,
6781 NULL))) {
6782 goto out;
6783 }
6784
6785 if (tvp) {
6786 /*
6787 * See unlinkat_internal for an explanation of the potential
6788 * ENOENT from the MAC hook but the gist is that the MAC hook
6789 * can fail because vn_getpath isn't able to return the full
6790 * path. We choose to ignore this failure.
6791 */
6792 error = vn_authorize_unlink(tdvp, tvp, &tond.ni_cnd, ctx, NULL);
6793 if (error && error != ENOENT)
6794 goto out;
6795 error = 0;
6796 }
6797
6798#if CONFIG_MACF
6799 VATTR_INIT(&va);
6800 VATTR_SET(&va, va_type, fvp->v_type);
6801 /* Mask off all but regular access permissions */
6802 VATTR_SET(&va, va_mode,
6803 ((((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT) & ACCESSPERMS));
6804 error = mac_vnode_check_create(ctx, tdvp, &tond.ni_cnd, &va);
6805 if (error)
6806 goto out;
6807#endif /* CONFIG_MACF */
6808
2d21ac55 6809 if ((error = vnode_authorize(tdvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
1c79356b
A
6810 goto out;
6811
6812 if (fvp == tdvp)
6813 error = EINVAL;
6814 /*
6815 * If source is the same as the destination (that is the
6816 * same inode number) then there is nothing to do.
6817 * (fixed to have POSIX semantics - CSM 3/2/98)
6818 */
6819 if (fvp == tvp)
6820 error = -1;
91447636 6821 if (!error)
2d21ac55 6822 error = VNOP_COPYFILE(fvp, tdvp, tvp, &tond.ni_cnd, uap->mode, uap->flags, ctx);
1c79356b 6823out:
91447636
A
6824 sdvp = tond.ni_startdir;
6825 /*
6826 * nameidone has to happen before we vnode_put(tdvp)
6827 * since it may need to release the fs_nodelock on the tdvp
6828 */
6829 nameidone(&tond);
6830
6831 if (tvp)
6832 vnode_put(tvp);
6833 vnode_put(tdvp);
6834 vnode_put(sdvp);
1c79356b 6835out1:
91447636
A
6836 vnode_put(fvp);
6837
91447636
A
6838 nameidone(&fromnd);
6839
1c79356b
A
6840 if (error == -1)
6841 return (0);
6842 return (error);
6843}
6844
39037602 6845#define CLONE_SNAPSHOT_FALLBACKS_ENABLED 1
91447636 6846
1c79356b 6847/*
39037602
A
6848 * Helper function for doing clones. The caller is expected to provide an
6849 * iocounted source vnode and release it.
1c79356b 6850 */
fe8ab488 6851static int
39037602
A
6852clonefile_internal(vnode_t fvp, boolean_t data_read_authorised, int dst_dirfd,
6853 user_addr_t dst, uint32_t flags, vfs_context_t ctx)
1c79356b 6854{
91447636 6855 vnode_t tvp, tdvp;
39037602 6856 struct nameidata tond;
1c79356b 6857 int error;
39037602 6858 int follow;
813fb2f6 6859 boolean_t free_src_acl;
39037602
A
6860 boolean_t attr_cleanup;
6861 enum vtype v_type;
6862 kauth_action_t action;
6863 struct componentname *cnp;
6864 uint32_t defaulted;
6865 struct vnode_attr va;
813fb2f6 6866 struct vnode_attr nva;
316670eb 6867
39037602
A
6868 v_type = vnode_vtype(fvp);
6869 switch (v_type) {
6870 case VLNK:
6871 /* FALLTHRU */
6872 case VREG:
6873 action = KAUTH_VNODE_ADD_FILE;
6874 break;
6875 case VDIR:
6876 if (vnode_isvroot(fvp) || vnode_ismount(fvp) ||
6877 fvp->v_mountedhere) {
6878 return (EINVAL);
6879 }
6880 action = KAUTH_VNODE_ADD_SUBDIRECTORY;
6881 break;
6882 default:
6883 return (EINVAL);
6884 }
6885
6886 AUDIT_ARG(fd2, dst_dirfd);
6887 AUDIT_ARG(value32, flags);
6888
6889 follow = (flags & CLONE_NOFOLLOW) ? NOFOLLOW : FOLLOW;
6890 NDINIT(&tond, CREATE, OP_LINK, follow | WANTPARENT | AUDITVNPATH2,
6891 UIO_USERSPACE, dst, ctx);
6892 if ((error = nameiat(&tond, dst_dirfd)))
6893 return (error);
6894 cnp = &tond.ni_cnd;
6895 tdvp = tond.ni_dvp;
6896 tvp = tond.ni_vp;
6897
813fb2f6 6898 free_src_acl = FALSE;
39037602
A
6899 attr_cleanup = FALSE;
6900
6901 if (tvp != NULL) {
6902 error = EEXIST;
6903 goto out;
6904 }
6905
6906 if (vnode_mount(tdvp) != vnode_mount(fvp)) {
6907 error = EXDEV;
6908 goto out;
6909 }
6910
6911#if CONFIG_MACF
6912 if ((error = mac_vnode_check_clone(ctx, tdvp, fvp, cnp)))
6913 goto out;
6914#endif
6915 if ((error = vnode_authorize(tdvp, NULL, action, ctx)))
6916 goto out;
6917
6918 action = KAUTH_VNODE_GENERIC_READ_BITS;
6919 if (data_read_authorised)
6920 action &= ~KAUTH_VNODE_READ_DATA;
6921 if ((error = vnode_authorize(fvp, NULL, action, ctx)))
6922 goto out;
6923
6924 /*
6925 * certain attributes may need to be changed from the source, we ask for
6926 * those here.
6927 */
6928 VATTR_INIT(&va);
813fb2f6
A
6929 VATTR_WANTED(&va, va_uid);
6930 VATTR_WANTED(&va, va_gid);
39037602
A
6931 VATTR_WANTED(&va, va_mode);
6932 VATTR_WANTED(&va, va_flags);
6933 VATTR_WANTED(&va, va_acl);
6934
6935 if ((error = vnode_getattr(fvp, &va, ctx)) != 0)
6936 goto out;
6937
813fb2f6
A
6938 VATTR_INIT(&nva);
6939 VATTR_SET(&nva, va_type, v_type);
6940 if (VATTR_IS_SUPPORTED(&va, va_acl) && va.va_acl != NULL) {
6941 VATTR_SET(&nva, va_acl, va.va_acl);
6942 free_src_acl = TRUE;
39037602
A
6943 }
6944
6945 /* Handle ACL inheritance, initialize vap. */
6946 if (v_type == VLNK) {
813fb2f6 6947 error = vnode_authattr_new(tdvp, &nva, 0, ctx);
39037602 6948 } else {
813fb2f6
A
6949 error = vn_attribute_prepare(tdvp, &nva, &defaulted, ctx);
6950 if (error)
6951 goto out;
39037602
A
6952 attr_cleanup = TRUE;
6953 }
6954
813fb2f6
A
6955 /*
6956 * We've got initial values for all security parameters,
6957 * If we are superuser, then we can change owners to be the
6958 * same as the source. Both superuser and the owner have default
6959 * WRITE_SECURITY privileges so all other fields can be taken
6960 * from source as well.
6961 */
6962 if (vfs_context_issuser(ctx)) {
6963 if (VATTR_IS_SUPPORTED(&va, va_uid))
6964 VATTR_SET(&nva, va_uid, va.va_uid);
6965 if (VATTR_IS_SUPPORTED(&va, va_gid))
6966 VATTR_SET(&nva, va_gid, va.va_gid);
6967 }
6968 if (VATTR_IS_SUPPORTED(&va, va_mode))
6969 VATTR_SET(&nva, va_mode, va.va_mode);
6970 if (VATTR_IS_SUPPORTED(&va, va_flags)) {
6971 VATTR_SET(&nva, va_flags,
6972 ((va.va_flags & ~SF_RESTRICTED) | /* Turn off from source */
6973 (nva.va_flags & SF_RESTRICTED)));
39037602
A
6974 }
6975
813fb2f6
A
6976 error = VNOP_CLONEFILE(fvp, tdvp, &tvp, cnp, &nva,
6977 VNODE_CLONEFILE_DEFAULT, ctx);
39037602
A
6978
6979 if (!error && tvp) {
6980 int update_flags = 0;
6981#if CONFIG_FSE
6982 int fsevent;
6983#endif /* CONFIG_FSE */
6984
6985#if CONFIG_MACF
6986 (void)vnode_label(vnode_mount(tvp), tdvp, tvp, cnp,
6987 VNODE_LABEL_CREATE, ctx);
6988#endif
6989 /*
6990 * If some of the requested attributes weren't handled by the
6991 * VNOP, use our fallback code.
6992 */
6993 if (!VATTR_ALL_SUPPORTED(&va))
813fb2f6 6994 (void)vnode_setattr_fallback(tvp, &nva, ctx);
39037602
A
6995
6996 // Make sure the name & parent pointers are hooked up
6997 if (tvp->v_name == NULL)
6998 update_flags |= VNODE_UPDATE_NAME;
6999 if (tvp->v_parent == NULLVP)
7000 update_flags |= VNODE_UPDATE_PARENT;
7001
7002 if (update_flags) {
7003 (void)vnode_update_identity(tvp, tdvp, cnp->cn_nameptr,
7004 cnp->cn_namelen, cnp->cn_hash, update_flags);
7005 }
7006
7007#if CONFIG_FSE
7008 switch (vnode_vtype(tvp)) {
7009 case VLNK:
7010 /* FALLTHRU */
7011 case VREG:
7012 fsevent = FSE_CREATE_FILE;
7013 break;
7014 case VDIR:
7015 fsevent = FSE_CREATE_DIR;
7016 break;
7017 default:
7018 goto out;
7019 }
7020
7021 if (need_fsevent(fsevent, tvp)) {
7022 add_fsevent(fsevent, ctx, FSE_ARG_VNODE, tvp,
7023 FSE_ARG_DONE);
7024 }
7025#endif /* CONFIG_FSE */
7026 }
39037602
A
7027
7028out:
7029 if (attr_cleanup)
813fb2f6
A
7030 vn_attribute_cleanup(&nva, defaulted);
7031 if (free_src_acl && va.va_acl)
39037602
A
7032 kauth_acl_free(va.va_acl);
7033 nameidone(&tond);
7034 if (tvp)
7035 vnode_put(tvp);
7036 vnode_put(tdvp);
7037 return (error);
7038}
7039
7040/*
7041 * clone files or directories, target must not exist.
7042 */
7043/* ARGSUSED */
7044int
7045clonefileat(__unused proc_t p, struct clonefileat_args *uap,
7046 __unused int32_t *retval)
7047{
7048 vnode_t fvp;
7049 struct nameidata fromnd;
7050 int follow;
7051 int error;
7052 vfs_context_t ctx = vfs_context_current();
7053
7054 /* Check that the flags are valid. */
7055 if (uap->flags & ~CLONE_NOFOLLOW)
7056 return (EINVAL);
7057
7058 AUDIT_ARG(fd, uap->src_dirfd);
7059
7060 follow = (uap->flags & CLONE_NOFOLLOW) ? NOFOLLOW : FOLLOW;
7061 NDINIT(&fromnd, LOOKUP, OP_COPYFILE, follow | AUDITVNPATH1,
7062 UIO_USERSPACE, uap->src, ctx);
7063 if ((error = nameiat(&fromnd, uap->src_dirfd)))
7064 return (error);
7065
7066 fvp = fromnd.ni_vp;
7067 nameidone(&fromnd);
7068
7069 error = clonefile_internal(fvp, FALSE, uap->dst_dirfd, uap->dst,
7070 uap->flags, ctx);
7071
7072 vnode_put(fvp);
7073 return (error);
7074}
7075
7076int
7077fclonefileat(__unused proc_t p, struct fclonefileat_args *uap,
7078 __unused int32_t *retval)
7079{
7080 vnode_t fvp;
7081 struct fileproc *fp;
7082 int error;
7083 vfs_context_t ctx = vfs_context_current();
7084
7085 AUDIT_ARG(fd, uap->src_fd);
7086 error = fp_getfvp(p, uap->src_fd, &fp, &fvp);
7087 if (error)
7088 return (error);
7089
7090 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
7091 AUDIT_ARG(vnpath_withref, fvp, ARG_VNODE1);
7092 error = EBADF;
7093 goto out;
7094 }
7095
7096 if ((error = vnode_getwithref(fvp)))
7097 goto out;
7098
7099 AUDIT_ARG(vnpath, fvp, ARG_VNODE1);
7100
7101 error = clonefile_internal(fvp, TRUE, uap->dst_dirfd, uap->dst,
7102 uap->flags, ctx);
7103
7104 vnode_put(fvp);
7105out:
7106 file_drop(uap->src_fd);
7107 return (error);
7108}
7109
7110/*
7111 * Rename files. Source and destination must either both be directories,
7112 * or both not be directories. If target is a directory, it must be empty.
7113 */
7114/* ARGSUSED */
7115static int
7116renameat_internal(vfs_context_t ctx, int fromfd, user_addr_t from,
7117 int tofd, user_addr_t to, int segflg, vfs_rename_flags_t flags)
7118{
7119 if (flags & ~VFS_RENAME_FLAGS_MASK)
7120 return EINVAL;
7121
7122 if (ISSET(flags, VFS_RENAME_SWAP) && ISSET(flags, VFS_RENAME_EXCL))
7123 return EINVAL;
7124
7125 vnode_t tvp, tdvp;
7126 vnode_t fvp, fdvp;
7127 struct nameidata *fromnd, *tond;
7128 int error;
7129 int do_retry;
7130 int retry_count;
7131 int mntrename;
7132 int need_event;
7133 const char *oname = NULL;
7134 char *from_name = NULL, *to_name = NULL;
7135 int from_len=0, to_len=0;
7136 int holding_mntlock;
7137 mount_t locked_mp = NULL;
7138 vnode_t oparent = NULLVP;
7139#if CONFIG_FSE
7140 fse_info from_finfo, to_finfo;
7141#endif
7142 int from_truncated=0, to_truncated;
7143 int batched = 0;
7144 struct vnode_attr *fvap, *tvap;
7145 int continuing = 0;
7146 /* carving out a chunk for structs that are too big to be on stack. */
7147 struct {
7148 struct nameidata from_node, to_node;
7149 struct vnode_attr fv_attr, tv_attr;
7150 } * __rename_data;
7151 MALLOC(__rename_data, void *, sizeof(*__rename_data), M_TEMP, M_WAITOK);
7152 fromnd = &__rename_data->from_node;
7153 tond = &__rename_data->to_node;
7154
7155 holding_mntlock = 0;
7156 do_retry = 0;
7157 retry_count = 0;
91447636
A
7158retry:
7159 fvp = tvp = NULL;
7160 fdvp = tdvp = NULL;
6d2010ae 7161 fvap = tvap = NULL;
1c79356b
A
7162 mntrename = FALSE;
7163
316670eb 7164 NDINIT(fromnd, DELETE, OP_UNLINK, WANTPARENT | AUDITVNPATH1,
fe8ab488 7165 segflg, from, ctx);
316670eb 7166 fromnd->ni_flag = NAMEI_COMPOUNDRENAME;
fe8ab488 7167
316670eb 7168 NDINIT(tond, RENAME, OP_RENAME, WANTPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK,
fe8ab488 7169 segflg, to, ctx);
316670eb 7170 tond->ni_flag = NAMEI_COMPOUNDRENAME;
fe8ab488 7171
6d2010ae 7172continue_lookup:
316670eb 7173 if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
fe8ab488 7174 if ( (error = nameiat(fromnd, fromfd)) )
6d2010ae 7175 goto out1;
316670eb
A
7176 fdvp = fromnd->ni_dvp;
7177 fvp = fromnd->ni_vp;
1c79356b 7178
6d2010ae 7179 if (fvp && fvp->v_type == VDIR)
316670eb 7180 tond->ni_cnd.cn_flags |= WILLBEDIR;
6d2010ae 7181 }
2d21ac55 7182
316670eb 7183 if ((tond->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
fe8ab488 7184 if ( (error = nameiat(tond, tofd)) ) {
6d2010ae
A
7185 /*
7186 * Translate error code for rename("dir1", "dir2/.").
7187 */
fe8ab488 7188 if (error == EISDIR && fvp->v_type == VDIR)
6d2010ae
A
7189 error = EINVAL;
7190 goto out1;
7191 }
316670eb
A
7192 tdvp = tond->ni_dvp;
7193 tvp = tond->ni_vp;
fe8ab488 7194 }
91447636 7195
00867663
A
7196#if DEVELOPMENT || DEBUG
7197 /*
7198 * XXX VSWAP: Check for entitlements or special flag here
7199 * so we can restrict access appropriately.
7200 */
7201#else /* DEVELOPMENT || DEBUG */
7202
7203 if (fromnd->ni_vp && vnode_isswap(fromnd->ni_vp) && (ctx != vfs_context_kernel())) {
7204 error = EPERM;
7205 goto out1;
7206 }
7207
7208 if (tond->ni_vp && vnode_isswap(tond->ni_vp) && (ctx != vfs_context_kernel())) {
7209 error = EPERM;
7210 goto out1;
7211 }
7212#endif /* DEVELOPMENT || DEBUG */
7213
39037602
A
7214 if (!tvp && ISSET(flags, VFS_RENAME_SWAP)) {
7215 error = ENOENT;
7216 goto out1;
7217 }
7218
7219 if (tvp && ISSET(flags, VFS_RENAME_EXCL)) {
7220 error = EEXIST;
7221 goto out1;
7222 }
7223
6d2010ae
A
7224 batched = vnode_compound_rename_available(fdvp);
7225 if (!fvp) {
fe8ab488 7226 /*
6d2010ae
A
7227 * Claim: this check will never reject a valid rename.
7228 * For success, either fvp must be on the same mount as tdvp, or fvp must sit atop a vnode on the same mount as tdvp.
7229 * Suppose fdvp and tdvp are not on the same mount.
fe8ab488 7230 * If fvp is on the same mount as tdvp, then fvp is not on the same mount as fdvp, so fvp is the root of its filesystem. If fvp is the root,
6d2010ae
A
7231 * then you can't move it to within another dir on the same mountpoint.
7232 * If fvp sits atop a vnode on the same mount as fdvp, then that vnode must be part of the same mount as fdvp, which is a contradiction.
7233 *
7234 * If this check passes, then we are safe to pass these vnodes to the same FS.
91447636 7235 */
6d2010ae
A
7236 if (fdvp->v_mount != tdvp->v_mount) {
7237 error = EXDEV;
7238 goto out1;
7239 }
7240 goto skipped_lookup;
1c79356b 7241 }
2d21ac55 7242
6d2010ae 7243 if (!batched) {
39037602 7244 error = vn_authorize_renamex(fdvp, fvp, &fromnd->ni_cnd, tdvp, tvp, &tond->ni_cnd, ctx, flags, NULL);
6d2010ae 7245 if (error) {
3e170ce0
A
7246 if (error == ENOENT) {
7247 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
7248 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
7249 /*
7250 * We encountered a race where after doing the namei, tvp stops
7251 * being valid. If so, simply re-drive the rename call from the
7252 * top.
7253 */
7254 do_retry = 1;
7255 retry_count += 1;
7256 }
6d2010ae 7257 }
91447636 7258 goto out1;
1c79356b
A
7259 }
7260 }
6d2010ae 7261
2d21ac55
A
7262 /*
7263 * If the source and destination are the same (i.e. they're
7264 * links to the same vnode) and the target file system is
7265 * case sensitive, then there is nothing to do.
6d2010ae
A
7266 *
7267 * XXX Come back to this.
2d21ac55
A
7268 */
7269 if (fvp == tvp) {
7270 int pathconf_val;
fe8ab488 7271
2d21ac55
A
7272 /*
7273 * Note: if _PC_CASE_SENSITIVE selector isn't supported,
7274 * then assume that this file system is case sensitive.
7275 */
7276 if (VNOP_PATHCONF(fvp, _PC_CASE_SENSITIVE, &pathconf_val, ctx) != 0 ||
7277 pathconf_val != 0) {
7278 goto out1;
fe8ab488 7279 }
2d21ac55 7280 }
91447636 7281
1c79356b
A
7282 /*
7283 * Allow the renaming of mount points.
7284 * - target must not exist
7285 * - target must reside in the same directory as source
7286 * - union mounts cannot be renamed
7287 * - "/" cannot be renamed
6d2010ae
A
7288 *
7289 * XXX Handle this in VFS after a continued lookup (if we missed
7290 * in the cache to start off)
39037602
A
7291 *
7292 * N.B. If RENAME_SWAP is being used, then @tvp != NULL and so
7293 * we'll skip past here. The file system is responsible for
7294 * checking that @tvp is not a descendent of @fvp and vice versa
7295 * so it should always return EINVAL if either @tvp or @fvp is the
7296 * root of a volume.
1c79356b 7297 */
91447636 7298 if ((fvp->v_flag & VROOT) &&
1c79356b
A
7299 (fvp->v_type == VDIR) &&
7300 (tvp == NULL) &&
7301 (fvp->v_mountedhere == NULL) &&
91447636 7302 (fdvp == tdvp) &&
1c79356b
A
7303 ((fvp->v_mount->mnt_flag & (MNT_UNION | MNT_ROOTFS)) == 0) &&
7304 (fvp->v_mount->mnt_vnodecovered != NULLVP)) {
2d21ac55 7305 vnode_t coveredvp;
fe8ab488 7306
1c79356b 7307 /* switch fvp to the covered vnode */
91447636
A
7308 coveredvp = fvp->v_mount->mnt_vnodecovered;
7309 if ( (vnode_getwithref(coveredvp)) ) {
7310 error = ENOENT;
7311 goto out1;
7312 }
7313 vnode_put(fvp);
7314
7315 fvp = coveredvp;
1c79356b
A
7316 mntrename = TRUE;
7317 }
91447636
A
7318 /*
7319 * Check for cross-device rename.
7320 */
7321 if ((fvp->v_mount != tdvp->v_mount) ||
7322 (tvp && (fvp->v_mount != tvp->v_mount))) {
7323 error = EXDEV;
7324 goto out1;
7325 }
55e303ae 7326
91447636
A
7327 /*
7328 * If source is the same as the destination (that is the
7329 * same inode number) then there is nothing to do...
7330 * EXCEPT if the underlying file system supports case
7331 * insensitivity and is case preserving. In this case
7332 * the file system needs to handle the special case of
7333 * getting the same vnode as target (fvp) and source (tvp).
7334 *
7335 * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
7336 * and _PC_CASE_PRESERVING can have this exception, and they need to
7337 * handle the special case of getting the same vnode as target and
7338 * source. NOTE: Then the target is unlocked going into vnop_rename,
7339 * so not to cause locking problems. There is a single reference on tvp.
7340 *
fe8ab488 7341 * NOTE - that fvp == tvp also occurs if they are hard linked and
b0d623f7
A
7342 * that correct behaviour then is just to return success without doing
7343 * anything.
6d2010ae
A
7344 *
7345 * XXX filesystem should take care of this itself, perhaps...
91447636
A
7346 */
7347 if (fvp == tvp && fdvp == tdvp) {
316670eb
A
7348 if (fromnd->ni_cnd.cn_namelen == tond->ni_cnd.cn_namelen &&
7349 !bcmp(fromnd->ni_cnd.cn_nameptr, tond->ni_cnd.cn_nameptr,
7350 fromnd->ni_cnd.cn_namelen)) {
91447636 7351 goto out1;
55e303ae 7352 }
91447636 7353 }
55e303ae 7354
91447636
A
7355 if (holding_mntlock && fvp->v_mount != locked_mp) {
7356 /*
7357 * we're holding a reference and lock
7358 * on locked_mp, but it no longer matches
7359 * what we want to do... so drop our hold
7360 */
7361 mount_unlock_renames(locked_mp);
7362 mount_drop(locked_mp, 0);
7363 holding_mntlock = 0;
7364 }
7365 if (tdvp != fdvp && fvp->v_type == VDIR) {
7366 /*
7367 * serialize renames that re-shape
7368 * the tree... if holding_mntlock is
7369 * set, then we're ready to go...
7370 * otherwise we
7371 * first need to drop the iocounts
7372 * we picked up, second take the
7373 * lock to serialize the access,
7374 * then finally start the lookup
7375 * process over with the lock held
7376 */
7377 if (!holding_mntlock) {
7378 /*
7379 * need to grab a reference on
7380 * the mount point before we
7381 * drop all the iocounts... once
7382 * the iocounts are gone, the mount
7383 * could follow
7384 */
7385 locked_mp = fvp->v_mount;
7386 mount_ref(locked_mp, 0);
55e303ae 7387
91447636
A
7388 /*
7389 * nameidone has to happen before we vnode_put(tvp)
7390 * since it may need to release the fs_nodelock on the tvp
7391 */
316670eb 7392 nameidone(tond);
55e303ae 7393
91447636
A
7394 if (tvp)
7395 vnode_put(tvp);
7396 vnode_put(tdvp);
7397
7398 /*
7399 * nameidone has to happen before we vnode_put(fdvp)
7400 * since it may need to release the fs_nodelock on the fvp
7401 */
316670eb 7402 nameidone(fromnd);
55e303ae 7403
91447636
A
7404 vnode_put(fvp);
7405 vnode_put(fdvp);
7406
7407 mount_lock_renames(locked_mp);
7408 holding_mntlock = 1;
7409
7410 goto retry;
55e303ae 7411 }
91447636
A
7412 } else {
7413 /*
7414 * when we dropped the iocounts to take
fe8ab488 7415 * the lock, we allowed the identity of
91447636
A
7416 * the various vnodes to change... if they did,
7417 * we may no longer be dealing with a rename
7418 * that reshapes the tree... once we're holding
7419 * the iocounts, the vnodes can't change type
7420 * so we're free to drop the lock at this point
7421 * and continue on
1c79356b 7422 */
91447636
A
7423 if (holding_mntlock) {
7424 mount_unlock_renames(locked_mp);
7425 mount_drop(locked_mp, 0);
7426 holding_mntlock = 0;
1c79356b 7427 }
91447636 7428 }
6d2010ae 7429
91447636
A
7430 // save these off so we can later verify that fvp is the same
7431 oname = fvp->v_name;
7432 oparent = fvp->v_parent;
55e303ae 7433
6d2010ae 7434skipped_lookup:
2d21ac55 7435#if CONFIG_FSE
6d2010ae 7436 need_event = need_fsevent(FSE_RENAME, fdvp);
fe8ab488 7437 if (need_event) {
6d2010ae
A
7438 if (fvp) {
7439 get_fse_info(fvp, &from_finfo, ctx);
7440 } else {
316670eb 7441 error = vfs_get_notify_attributes(&__rename_data->fv_attr);
6d2010ae
A
7442 if (error) {
7443 goto out1;
7444 }
7445
316670eb 7446 fvap = &__rename_data->fv_attr;
6d2010ae 7447 }
55e303ae 7448
91447636 7449 if (tvp) {
2d21ac55 7450 get_fse_info(tvp, &to_finfo, ctx);
6d2010ae 7451 } else if (batched) {
316670eb 7452 error = vfs_get_notify_attributes(&__rename_data->tv_attr);
6d2010ae
A
7453 if (error) {
7454 goto out1;
7455 }
7456
316670eb 7457 tvap = &__rename_data->tv_attr;
2d21ac55
A
7458 }
7459 }
7460#else
7461 need_event = 0;
7462#endif /* CONFIG_FSE */
7463
7464 if (need_event || kauth_authorize_fileop_has_listeners()) {
2d21ac55 7465 if (from_name == NULL) {
6d2010ae
A
7466 GET_PATH(from_name);
7467 if (from_name == NULL) {
7468 error = ENOMEM;
7469 goto out1;
7470 }
91447636 7471 }
b0d623f7 7472
316670eb 7473 from_len = safe_getpath(fdvp, fromnd->ni_cnd.cn_nameptr, from_name, MAXPATHLEN, &from_truncated);
55e303ae 7474
2d21ac55 7475 if (to_name == NULL) {
6d2010ae
A
7476 GET_PATH(to_name);
7477 if (to_name == NULL) {
7478 error = ENOMEM;
7479 goto out1;
7480 }
2d21ac55 7481 }
91447636 7482
316670eb 7483 to_len = safe_getpath(tdvp, tond->ni_cnd.cn_nameptr, to_name, MAXPATHLEN, &to_truncated);
fe8ab488 7484 }
316670eb
A
7485 error = vn_rename(fdvp, &fvp, &fromnd->ni_cnd, fvap,
7486 tdvp, &tvp, &tond->ni_cnd, tvap,
39037602 7487 flags, ctx);
55e303ae 7488
91447636
A
7489 if (holding_mntlock) {
7490 /*
7491 * we can drop our serialization
7492 * lock now
7493 */
7494 mount_unlock_renames(locked_mp);
7495 mount_drop(locked_mp, 0);
7496 holding_mntlock = 0;
7497 }
7498 if (error) {
6d2010ae 7499 if (error == EKEEPLOOKING) {
316670eb
A
7500 if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) == 0) {
7501 if ((tond->ni_flag & NAMEI_CONTLOOKUP) == 0) {
6d2010ae
A
7502 panic("EKEEPLOOKING without NAMEI_CONTLOOKUP on either ndp?");
7503 }
7504 }
7505
316670eb
A
7506 fromnd->ni_vp = fvp;
7507 tond->ni_vp = tvp;
fe8ab488 7508
6d2010ae
A
7509 goto continue_lookup;
7510 }
7511
7512 /*
fe8ab488
A
7513 * We may encounter a race in the VNOP where the destination didn't
7514 * exist when we did the namei, but it does by the time we go and
6d2010ae
A
7515 * try to create the entry. In this case, we should re-drive this rename
7516 * call from the top again. Currently, only HFS bubbles out ERECYCLE,
fe8ab488 7517 * but other filesystems susceptible to this race could return it, too.
6d2010ae
A
7518 */
7519 if (error == ERECYCLE) {
7520 do_retry = 1;
7521 }
55e303ae 7522
c18c124e
A
7523 /*
7524 * For compound VNOPs, the authorization callback may return
7525 * ENOENT in case of racing hardlink lookups hitting the name
7526 * cache, redrive the lookup.
7527 */
3e170ce0
A
7528 if (batched && error == ENOENT) {
7529 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
7530 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
7531 do_retry = 1;
7532 retry_count += 1;
7533 }
c18c124e
A
7534 }
7535
91447636 7536 goto out1;
fe8ab488
A
7537 }
7538
7539 /* call out to allow 3rd party notification of rename.
91447636
A
7540 * Ignore result of kauth_authorize_fileop call.
7541 */
fe8ab488
A
7542 kauth_authorize_fileop(vfs_context_ucred(ctx),
7543 KAUTH_FILEOP_RENAME,
2d21ac55 7544 (uintptr_t)from_name, (uintptr_t)to_name);
39037602
A
7545 if (flags & VFS_RENAME_SWAP) {
7546 kauth_authorize_fileop(vfs_context_ucred(ctx),
7547 KAUTH_FILEOP_RENAME,
7548 (uintptr_t)to_name, (uintptr_t)from_name);
7549 }
91447636 7550
2d21ac55 7551#if CONFIG_FSE
91447636 7552 if (from_name != NULL && to_name != NULL) {
b0d623f7
A
7553 if (from_truncated || to_truncated) {
7554 // set it here since only the from_finfo gets reported up to user space
7555 from_finfo.mode |= FSE_TRUNCATED_PATH;
7556 }
6d2010ae
A
7557
7558 if (tvap && tvp) {
7559 vnode_get_fse_info_from_vap(tvp, &to_finfo, tvap);
7560 }
7561 if (fvap) {
7562 vnode_get_fse_info_from_vap(fvp, &from_finfo, fvap);
7563 }
7564
39037602
A
7565 if (tvp) {
7566 add_fsevent(FSE_RENAME, ctx,
7567 FSE_ARG_STRING, from_len, from_name,
7568 FSE_ARG_FINFO, &from_finfo,
7569 FSE_ARG_STRING, to_len, to_name,
7570 FSE_ARG_FINFO, &to_finfo,
7571 FSE_ARG_DONE);
7572 if (flags & VFS_RENAME_SWAP) {
7573 /*
7574 * Strictly speaking, swap is the equivalent of
7575 * *three* renames. FSEvents clients should only take
7576 * the events as a hint, so we only bother reporting
7577 * two.
7578 */
7579 add_fsevent(FSE_RENAME, ctx,
7580 FSE_ARG_STRING, to_len, to_name,
7581 FSE_ARG_FINFO, &to_finfo,
7582 FSE_ARG_STRING, from_len, from_name,
7583 FSE_ARG_FINFO, &from_finfo,
7584 FSE_ARG_DONE);
7585 }
55e303ae 7586 } else {
2d21ac55 7587 add_fsevent(FSE_RENAME, ctx,
91447636
A
7588 FSE_ARG_STRING, from_len, from_name,
7589 FSE_ARG_FINFO, &from_finfo,
7590 FSE_ARG_STRING, to_len, to_name,
7591 FSE_ARG_DONE);
7592 }
7593 }
2d21ac55 7594#endif /* CONFIG_FSE */
fe8ab488 7595
91447636
A
7596 /*
7597 * update filesystem's mount point data
7598 */
7599 if (mntrename) {
7600 char *cp, *pathend, *mpname;
7601 char * tobuf;
7602 struct mount *mp;
7603 int maxlen;
7604 size_t len = 0;
7605
7606 mp = fvp->v_mountedhere;
7607
7608 if (vfs_busy(mp, LK_NOWAIT)) {
7609 error = EBUSY;
7610 goto out1;
55e303ae 7611 }
91447636 7612 MALLOC_ZONE(tobuf, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
55e303ae 7613
fe8ab488
A
7614 if (UIO_SEG_IS_USER_SPACE(segflg))
7615 error = copyinstr(to, tobuf, MAXPATHLEN, &len);
7616 else
7617 error = copystr((void *)to, tobuf, MAXPATHLEN, &len);
91447636
A
7618 if (!error) {
7619 /* find current mount point prefix */
7620 pathend = &mp->mnt_vfsstat.f_mntonname[0];
7621 for (cp = pathend; *cp != '\0'; ++cp) {
7622 if (*cp == '/')
7623 pathend = cp + 1;
7624 }
7625 /* find last component of target name */
7626 for (mpname = cp = tobuf; *cp != '\0'; ++cp) {
7627 if (*cp == '/')
7628 mpname = cp + 1;
7629 }
7630 /* append name to prefix */
7631 maxlen = MAXPATHLEN - (pathend - mp->mnt_vfsstat.f_mntonname);
7632 bzero(pathend, maxlen);
2d21ac55 7633 strlcpy(pathend, mpname, maxlen);
91447636
A
7634 }
7635 FREE_ZONE(tobuf, MAXPATHLEN, M_NAMEI);
7636
7637 vfs_unbusy(mp);
7638 }
7639 /*
fe8ab488 7640 * fix up name & parent pointers. note that we first
91447636
A
7641 * check that fvp has the same name/parent pointers it
7642 * had before the rename call... this is a 'weak' check
7643 * at best...
6d2010ae
A
7644 *
7645 * XXX oparent and oname may not be set in the compound vnop case
91447636 7646 */
6d2010ae 7647 if (batched || (oname == fvp->v_name && oparent == fvp->v_parent)) {
91447636
A
7648 int update_flags;
7649
7650 update_flags = VNODE_UPDATE_NAME;
7651
7652 if (fdvp != tdvp)
7653 update_flags |= VNODE_UPDATE_PARENT;
7654
316670eb 7655 vnode_update_identity(fvp, tdvp, tond->ni_cnd.cn_nameptr, tond->ni_cnd.cn_namelen, tond->ni_cnd.cn_hash, update_flags);
1c79356b
A
7656 }
7657out1:
593a1d5f
A
7658 if (to_name != NULL) {
7659 RELEASE_PATH(to_name);
7660 to_name = NULL;
7661 }
7662 if (from_name != NULL) {
7663 RELEASE_PATH(from_name);
7664 from_name = NULL;
7665 }
91447636
A
7666 if (holding_mntlock) {
7667 mount_unlock_renames(locked_mp);
7668 mount_drop(locked_mp, 0);
593a1d5f 7669 holding_mntlock = 0;
91447636
A
7670 }
7671 if (tdvp) {
7672 /*
7673 * nameidone has to happen before we vnode_put(tdvp)
7674 * since it may need to release the fs_nodelock on the tdvp
7675 */
316670eb 7676 nameidone(tond);
91447636
A
7677
7678 if (tvp)
7679 vnode_put(tvp);
7680 vnode_put(tdvp);
7681 }
7682 if (fdvp) {
7683 /*
7684 * nameidone has to happen before we vnode_put(fdvp)
7685 * since it may need to release the fs_nodelock on the fdvp
7686 */
316670eb 7687 nameidone(fromnd);
91447636
A
7688
7689 if (fvp)
7690 vnode_put(fvp);
7691 vnode_put(fdvp);
7692 }
fe8ab488 7693
6d2010ae
A
7694 /*
7695 * If things changed after we did the namei, then we will re-drive
7696 * this rename call from the top.
7697 */
316670eb 7698 if (do_retry) {
6d2010ae 7699 do_retry = 0;
593a1d5f
A
7700 goto retry;
7701 }
316670eb
A
7702
7703 FREE(__rename_data, M_TEMP);
1c79356b
A
7704 return (error);
7705}
7706
fe8ab488
A
7707int
7708rename(__unused proc_t p, struct rename_args *uap, __unused int32_t *retval)
7709{
7710 return (renameat_internal(vfs_context_current(), AT_FDCWD, uap->from,
7711 AT_FDCWD, uap->to, UIO_USERSPACE, 0));
7712}
7713
39037602 7714int renameatx_np(__unused proc_t p, struct renameatx_np_args *uap, __unused int32_t *retval)
fe8ab488
A
7715{
7716 return renameat_internal(
39037602
A
7717 vfs_context_current(),
7718 uap->fromfd, uap->from,
7719 uap->tofd, uap->to,
fe8ab488
A
7720 UIO_USERSPACE, uap->flags);
7721}
39037602 7722
fe8ab488
A
7723int
7724renameat(__unused proc_t p, struct renameat_args *uap, __unused int32_t *retval)
7725{
7726 return (renameat_internal(vfs_context_current(), uap->fromfd, uap->from,
7727 uap->tofd, uap->to, UIO_USERSPACE, 0));
7728}
7729
1c79356b
A
7730/*
7731 * Make a directory file.
2d21ac55
A
7732 *
7733 * Returns: 0 Success
7734 * EEXIST
7735 * namei:???
7736 * vnode_authorize:???
7737 * vn_create:???
1c79356b 7738 */
1c79356b 7739/* ARGSUSED */
91447636 7740static int
fe8ab488
A
7741mkdir1at(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap, int fd,
7742 enum uio_seg segflg)
1c79356b 7743{
91447636 7744 vnode_t vp, dvp;
1c79356b 7745 int error;
91447636 7746 int update_flags = 0;
6d2010ae 7747 int batched;
1c79356b
A
7748 struct nameidata nd;
7749
91447636 7750 AUDIT_ARG(mode, vap->va_mode);
fe8ab488 7751 NDINIT(&nd, CREATE, OP_MKDIR, LOCKPARENT | AUDITVNPATH1, segflg,
6d2010ae 7752 path, ctx);
9bccf70c 7753 nd.ni_cnd.cn_flags |= WILLBEDIR;
6d2010ae
A
7754 nd.ni_flag = NAMEI_COMPOUNDMKDIR;
7755
7756continue_lookup:
fe8ab488 7757 error = nameiat(&nd, fd);
55e303ae 7758 if (error)
1c79356b 7759 return (error);
91447636 7760 dvp = nd.ni_dvp;
1c79356b 7761 vp = nd.ni_vp;
55e303ae 7762
fe8ab488
A
7763 if (vp != NULL) {
7764 error = EEXIST;
7765 goto out;
7766 }
7767
6d2010ae 7768 batched = vnode_compound_mkdir_available(dvp);
2d21ac55
A
7769
7770 VATTR_SET(vap, va_type, VDIR);
fe8ab488 7771
6d2010ae
A
7772 /*
7773 * XXX
7774 * Don't authorize in VFS for compound VNOP.... mkdir -p today assumes that it will
7775 * only get EXISTS or EISDIR for existing path components, and not that it could see
7776 * EACCESS/EPERM--so if we authorize for mkdir on "/" for "mkdir -p /tmp/foo/bar/baz"
7777 * it will fail in a spurious manner. Need to figure out if this is valid behavior.
7778 */
fe8ab488 7779 if ((error = vn_authorize_mkdir(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0) {
6d2010ae
A
7780 if (error == EACCES || error == EPERM) {
7781 int error2;
7782
7783 nameidone(&nd);
7784 vnode_put(dvp);
7785 dvp = NULLVP;
7786
fe8ab488
A
7787 /*
7788 * Try a lookup without "NAMEI_COMPOUNDVNOP" to make sure we return EEXIST
6d2010ae
A
7789 * rather than EACCESS if the target exists.
7790 */
fe8ab488
A
7791 NDINIT(&nd, LOOKUP, OP_MKDIR, AUDITVNPATH1, segflg,
7792 path, ctx);
7793 error2 = nameiat(&nd, fd);
6d2010ae
A
7794 if (error2) {
7795 goto out;
7796 } else {
7797 vp = nd.ni_vp;
7798 error = EEXIST;
7799 goto out;
7800 }
7801 }
7802
2d21ac55 7803 goto out;
6d2010ae
A
7804 }
7805
7806 /*
fe8ab488 7807 * make the directory
6d2010ae 7808 */
fe8ab488 7809 if ((error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx)) != 0) {
6d2010ae
A
7810 if (error == EKEEPLOOKING) {
7811 nd.ni_vp = vp;
7812 goto continue_lookup;
7813 }
2d21ac55 7814
fe8ab488 7815 goto out;
6d2010ae 7816 }
fe8ab488 7817
91447636
A
7818 // Make sure the name & parent pointers are hooked up
7819 if (vp->v_name == NULL)
7820 update_flags |= VNODE_UPDATE_NAME;
7821 if (vp->v_parent == NULLVP)
7822 update_flags |= VNODE_UPDATE_PARENT;
7823
7824 if (update_flags)
7825 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
55e303ae 7826
2d21ac55 7827#if CONFIG_FSE
91447636 7828 add_fsevent(FSE_CREATE_DIR, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE);
2d21ac55 7829#endif
91447636
A
7830
7831out:
7832 /*
7833 * nameidone has to happen before we vnode_put(dvp)
7834 * since it may need to release the fs_nodelock on the dvp
7835 */
7836 nameidone(&nd);
7837
7838 if (vp)
6d2010ae 7839 vnode_put(vp);
fe8ab488 7840 if (dvp)
6d2010ae 7841 vnode_put(dvp);
55e303ae 7842
1c79356b
A
7843 return (error);
7844}
7845
b0d623f7
A
7846/*
7847 * mkdir_extended: Create a directory; with extended security (ACL).
7848 *
7849 * Parameters: p Process requesting to create the directory
7850 * uap User argument descriptor (see below)
fe8ab488 7851 * retval (ignored)
b0d623f7
A
7852 *
7853 * Indirect: uap->path Path of directory to create
7854 * uap->mode Access permissions to set
7855 * uap->xsecurity ACL to set
fe8ab488 7856 *
b0d623f7
A
7857 * Returns: 0 Success
7858 * !0 Not success
7859 *
7860 */
1c79356b 7861int
b0d623f7 7862mkdir_extended(proc_t p, struct mkdir_extended_args *uap, __unused int32_t *retval)
1c79356b 7863{
91447636
A
7864 int ciferror;
7865 kauth_filesec_t xsecdst;
7866 struct vnode_attr va;
7867
b0d623f7
A
7868 AUDIT_ARG(owner, uap->uid, uap->gid);
7869
91447636
A
7870 xsecdst = NULL;
7871 if ((uap->xsecurity != USER_ADDR_NULL) &&
7872 ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
7873 return ciferror;
7874
91447636 7875 VATTR_INIT(&va);
fe8ab488 7876 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
91447636
A
7877 if (xsecdst != NULL)
7878 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
7879
fe8ab488
A
7880 ciferror = mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD,
7881 UIO_USERSPACE);
91447636
A
7882 if (xsecdst != NULL)
7883 kauth_filesec_free(xsecdst);
7884 return ciferror;
1c79356b
A
7885}
7886
1c79356b 7887int
b0d623f7 7888mkdir(proc_t p, struct mkdir_args *uap, __unused int32_t *retval)
1c79356b 7889{
91447636 7890 struct vnode_attr va;
1c79356b 7891
91447636 7892 VATTR_INIT(&va);
fe8ab488 7893 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
e5568f75 7894
fe8ab488
A
7895 return (mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD,
7896 UIO_USERSPACE));
91447636 7897}
1c79356b 7898
91447636 7899int
fe8ab488
A
7900mkdirat(proc_t p, struct mkdirat_args *uap, __unused int32_t *retval)
7901{
7902 struct vnode_attr va;
7903
7904 VATTR_INIT(&va);
7905 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
7906
7907 return(mkdir1at(vfs_context_current(), uap->path, &va, uap->fd,
7908 UIO_USERSPACE));
7909}
7910
7911static int
7912rmdirat_internal(vfs_context_t ctx, int fd, user_addr_t dirpath,
7913 enum uio_seg segflg)
1c79356b 7914{
2d21ac55 7915 vnode_t vp, dvp;
91447636
A
7916 int error;
7917 struct nameidata nd;
6d2010ae
A
7918 char *path = NULL;
7919 int len=0;
7920 int has_listeners = 0;
7921 int need_event = 0;
7922 int truncated = 0;
6d2010ae
A
7923#if CONFIG_FSE
7924 struct vnode_attr va;
7925#endif /* CONFIG_FSE */
7926 struct vnode_attr *vap = NULL;
c18c124e 7927 int restart_count = 0;
6d2010ae 7928 int batched;
91447636 7929
b0d623f7 7930 int restart_flag;
91447636 7931
fe8ab488 7932 /*
2d21ac55
A
7933 * This loop exists to restart rmdir in the unlikely case that two
7934 * processes are simultaneously trying to remove the same directory
7935 * containing orphaned appleDouble files.
7936 */
7937 do {
6d2010ae 7938 NDINIT(&nd, DELETE, OP_RMDIR, LOCKPARENT | AUDITVNPATH1,
fe8ab488 7939 segflg, dirpath, ctx);
6d2010ae
A
7940 nd.ni_flag = NAMEI_COMPOUNDRMDIR;
7941continue_lookup:
2d21ac55 7942 restart_flag = 0;
6d2010ae 7943 vap = NULL;
2d21ac55 7944
fe8ab488 7945 error = nameiat(&nd, fd);
2d21ac55
A
7946 if (error)
7947 return (error);
7948
7949 dvp = nd.ni_dvp;
7950 vp = nd.ni_vp;
7951
6d2010ae
A
7952 if (vp) {
7953 batched = vnode_compound_rmdir_available(vp);
2d21ac55 7954
6d2010ae
A
7955 if (vp->v_flag & VROOT) {
7956 /*
7957 * The root of a mounted filesystem cannot be deleted.
7958 */
7959 error = EBUSY;
7960 goto out;
7961 }
1c79356b 7962
00867663
A
7963#if DEVELOPMENT || DEBUG
7964 /*
7965 * XXX VSWAP: Check for entitlements or special flag here
7966 * so we can restrict access appropriately.
7967 */
7968#else /* DEVELOPMENT || DEBUG */
7969
7970 if (vnode_isswap(vp) && (ctx != vfs_context_kernel())) {
7971 error = EPERM;
7972 goto out;
7973 }
7974#endif /* DEVELOPMENT || DEBUG */
7975
2d21ac55 7976 /*
6d2010ae
A
7977 * Removed a check here; we used to abort if vp's vid
7978 * was not the same as what we'd seen the last time around.
7979 * I do not think that check was valid, because if we retry
7980 * and all dirents are gone, the directory could legitimately
7981 * be recycled but still be present in a situation where we would
fe8ab488 7982 * have had permission to delete. Therefore, we won't make
6d2010ae
A
7983 * an effort to preserve that check now that we may not have a
7984 * vp here.
2d21ac55 7985 */
6d2010ae
A
7986
7987 if (!batched) {
7988 error = vn_authorize_rmdir(dvp, vp, &nd.ni_cnd, ctx, NULL);
7989 if (error) {
3e170ce0
A
7990 if (error == ENOENT) {
7991 assert(restart_count < MAX_AUTHORIZE_ENOENT_RETRIES);
7992 if (restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
7993 restart_flag = 1;
7994 restart_count += 1;
7995 }
c18c124e 7996 }
6d2010ae
A
7997 goto out;
7998 }
7999 }
2d21ac55 8000 } else {
6d2010ae
A
8001 batched = 1;
8002
8003 if (!vnode_compound_rmdir_available(dvp)) {
8004 panic("No error, but no compound rmdir?");
8005 }
91447636 8006 }
6d2010ae 8007
2d21ac55 8008#if CONFIG_FSE
6d2010ae 8009 fse_info finfo;
b0d623f7 8010
6d2010ae
A
8011 need_event = need_fsevent(FSE_DELETE, dvp);
8012 if (need_event) {
8013 if (!batched) {
2d21ac55 8014 get_fse_info(vp, &finfo, ctx);
6d2010ae
A
8015 } else {
8016 error = vfs_get_notify_attributes(&va);
8017 if (error) {
8018 goto out;
8019 }
8020
8021 vap = &va;
2d21ac55 8022 }
6d2010ae 8023 }
2d21ac55 8024#endif
6d2010ae
A
8025 has_listeners = kauth_authorize_fileop_has_listeners();
8026 if (need_event || has_listeners) {
8027 if (path == NULL) {
2d21ac55
A
8028 GET_PATH(path);
8029 if (path == NULL) {
8030 error = ENOMEM;
8031 goto out;
8032 }
6d2010ae 8033 }
b0d623f7 8034
6d2010ae 8035 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated);
b0d623f7 8036#if CONFIG_FSE
6d2010ae
A
8037 if (truncated) {
8038 finfo.mode |= FSE_TRUNCATED_PATH;
2d21ac55 8039 }
6d2010ae
A
8040#endif
8041 }
91447636 8042
6d2010ae
A
8043 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
8044 nd.ni_vp = vp;
8045 if (vp == NULLVP) {
8046 /* Couldn't find a vnode */
8047 goto out;
8048 }
2d21ac55 8049
6d2010ae
A
8050 if (error == EKEEPLOOKING) {
8051 goto continue_lookup;
3e170ce0
A
8052 } else if (batched && error == ENOENT) {
8053 assert(restart_count < MAX_AUTHORIZE_ENOENT_RETRIES);
8054 if (restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
8055 /*
8056 * For compound VNOPs, the authorization callback
8057 * may return ENOENT in case of racing hard link lookups
8058 * redrive the lookup.
8059 */
8060 restart_flag = 1;
8061 restart_count += 1;
8062 goto out;
8063 }
6d2010ae 8064 }
39236c6e 8065#if CONFIG_APPLEDOUBLE
6d2010ae
A
8066 /*
8067 * Special case to remove orphaned AppleDouble
8068 * files. I don't like putting this in the kernel,
8069 * but carbon does not like putting this in carbon either,
8070 * so here we are.
8071 */
8072 if (error == ENOTEMPTY) {
8073 error = rmdir_remove_orphaned_appleDouble(vp, ctx, &restart_flag);
8074 if (error == EBUSY) {
8075 goto out;
2d21ac55
A
8076 }
8077
6d2010ae 8078
2d21ac55 8079 /*
fe8ab488 8080 * Assuming everything went well, we will try the RMDIR again
2d21ac55 8081 */
6d2010ae
A
8082 if (!error)
8083 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
8084 }
39236c6e 8085#endif /* CONFIG_APPLEDOUBLE */
6d2010ae 8086 /*
fe8ab488 8087 * Call out to allow 3rd party notification of delete.
6d2010ae
A
8088 * Ignore result of kauth_authorize_fileop call.
8089 */
8090 if (!error) {
8091 if (has_listeners) {
fe8ab488
A
8092 kauth_authorize_fileop(vfs_context_ucred(ctx),
8093 KAUTH_FILEOP_DELETE,
6d2010ae
A
8094 (uintptr_t)vp,
8095 (uintptr_t)path);
8096 }
8097
8098 if (vp->v_flag & VISHARDLINK) {
8099 // see the comment in unlink1() about why we update
8100 // the parent of a hard link when it is removed
8101 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
8102 }
2d21ac55
A
8103
8104#if CONFIG_FSE
6d2010ae
A
8105 if (need_event) {
8106 if (vap) {
8107 vnode_get_fse_info_from_vap(vp, &finfo, vap);
2d21ac55 8108 }
6d2010ae
A
8109 add_fsevent(FSE_DELETE, ctx,
8110 FSE_ARG_STRING, len, path,
8111 FSE_ARG_FINFO, &finfo,
8112 FSE_ARG_DONE);
2d21ac55 8113 }
6d2010ae 8114#endif
2d21ac55
A
8115 }
8116
8117out:
6d2010ae
A
8118 if (path != NULL) {
8119 RELEASE_PATH(path);
8120 path = NULL;
8121 }
2d21ac55
A
8122 /*
8123 * nameidone has to happen before we vnode_put(dvp)
8124 * since it may need to release the fs_nodelock on the dvp
8125 */
8126 nameidone(&nd);
2d21ac55 8127 vnode_put(dvp);
6d2010ae 8128
fe8ab488 8129 if (vp)
6d2010ae 8130 vnode_put(vp);
2d21ac55
A
8131
8132 if (restart_flag == 0) {
8133 wakeup_one((caddr_t)vp);
8134 return (error);
8135 }
8136 tsleep(vp, PVFS, "rm AD", 1);
8137
8138 } while (restart_flag != 0);
91447636 8139
1c79356b 8140 return (error);
2d21ac55 8141
1c79356b 8142}
91447636 8143
fe8ab488
A
8144/*
8145 * Remove a directory file.
8146 */
8147/* ARGSUSED */
8148int
8149rmdir(__unused proc_t p, struct rmdir_args *uap, __unused int32_t *retval)
8150{
8151 return (rmdirat_internal(vfs_context_current(), AT_FDCWD,
8152 CAST_USER_ADDR_T(uap->path), UIO_USERSPACE));
8153}
8154
2d21ac55
A
8155/* Get direntry length padded to 8 byte alignment */
8156#define DIRENT64_LEN(namlen) \
8157 ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
8158
fe8ab488 8159errno_t
2d21ac55
A
8160vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag,
8161 int *numdirent, vfs_context_t ctxp)
8162{
8163 /* Check if fs natively supports VNODE_READDIR_EXTENDED */
39037602 8164 if ((vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED) &&
6d2010ae 8165 ((vp->v_mount->mnt_kern_flag & MNTK_DENY_READDIREXT) == 0)) {
2d21ac55
A
8166 return VNOP_READDIR(vp, uio, flags, eofflag, numdirent, ctxp);
8167 } else {
8168 size_t bufsize;
8169 void * bufptr;
8170 uio_t auio;
15129b1c 8171 struct direntry *entry64;
2d21ac55
A
8172 struct dirent *dep;
8173 int bytesread;
8174 int error;
8175
8176 /*
8177 * Our kernel buffer needs to be smaller since re-packing
8178 * will expand each dirent. The worse case (when the name
8179 * length is 3) corresponds to a struct direntry size of 32
8180 * bytes (8-byte aligned) and a struct dirent size of 12 bytes
8181 * (4-byte aligned). So having a buffer that is 3/8 the size
8182 * will prevent us from reading more than we can pack.
8183 *
8184 * Since this buffer is wired memory, we will limit the
39037602 8185 * buffer size to a maximum of 32K. We would really like to
2d21ac55 8186 * use 32K in the MIN(), but we use magic number 87371 to
39037602 8187 * prevent uio_resid() * 3 / 8 from overflowing.
2d21ac55 8188 */
316670eb 8189 bufsize = 3 * MIN((user_size_t)uio_resid(uio), 87371u) / 8;
2d21ac55 8190 MALLOC(bufptr, void *, bufsize, M_TEMP, M_WAITOK);
b0d623f7
A
8191 if (bufptr == NULL) {
8192 return ENOMEM;
8193 }
2d21ac55 8194
b0d623f7 8195 auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
2d21ac55
A
8196 uio_addiov(auio, (uintptr_t)bufptr, bufsize);
8197 auio->uio_offset = uio->uio_offset;
8198
8199 error = VNOP_READDIR(vp, auio, 0, eofflag, numdirent, ctxp);
8200
8201 dep = (struct dirent *)bufptr;
8202 bytesread = bufsize - uio_resid(auio);
8203
15129b1c
A
8204 MALLOC(entry64, struct direntry *, sizeof(struct direntry),
8205 M_TEMP, M_WAITOK);
2d21ac55
A
8206 /*
8207 * Convert all the entries and copy them out to user's buffer.
8208 */
8209 while (error == 0 && (char *)dep < ((char *)bufptr + bytesread)) {
15129b1c
A
8210 size_t enbufsize = DIRENT64_LEN(dep->d_namlen);
8211
8212 bzero(entry64, enbufsize);
2d21ac55 8213 /* Convert a dirent to a dirent64. */
15129b1c
A
8214 entry64->d_ino = dep->d_ino;
8215 entry64->d_seekoff = 0;
8216 entry64->d_reclen = enbufsize;
8217 entry64->d_namlen = dep->d_namlen;
8218 entry64->d_type = dep->d_type;
8219 bcopy(dep->d_name, entry64->d_name, dep->d_namlen + 1);
2d21ac55
A
8220
8221 /* Move to next entry. */
8222 dep = (struct dirent *)((char *)dep + dep->d_reclen);
8223
8224 /* Copy entry64 to user's buffer. */
15129b1c 8225 error = uiomove((caddr_t)entry64, entry64->d_reclen, uio);
2d21ac55
A
8226 }
8227
8228 /* Update the real offset using the offset we got from VNOP_READDIR. */
8229 if (error == 0) {
8230 uio->uio_offset = auio->uio_offset;
8231 }
8232 uio_free(auio);
8233 FREE(bufptr, M_TEMP);
15129b1c 8234 FREE(entry64, M_TEMP);
2d21ac55
A
8235 return (error);
8236 }
8237}
1c79356b 8238
39236c6e
A
8239#define GETDIRENTRIES_MAXBUFSIZE (128 * 1024 * 1024U)
8240
1c79356b
A
8241/*
8242 * Read a block of directory entries in a file system independent format.
8243 */
2d21ac55
A
8244static int
8245getdirentries_common(int fd, user_addr_t bufp, user_size_t bufsize, ssize_t *bytesread,
8246 off_t *offset, int flags)
1c79356b 8247{
2d21ac55
A
8248 vnode_t vp;
8249 struct vfs_context context = *vfs_context_current(); /* local copy */
91447636
A
8250 struct fileproc *fp;
8251 uio_t auio;
2d21ac55
A
8252 int spacetype = proc_is64bit(vfs_context_proc(&context)) ? UIO_USERSPACE64 : UIO_USERSPACE32;
8253 off_t loff;
8254 int error, eofflag, numdirent;
91447636 8255 char uio_buf[ UIO_SIZEOF(1) ];
1c79356b 8256
2d21ac55
A
8257 error = fp_getfvp(vfs_context_proc(&context), fd, &fp, &vp);
8258 if (error) {
1c79356b 8259 return (error);
2d21ac55 8260 }
91447636
A
8261 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
8262 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
8263 error = EBADF;
8264 goto out;
8265 }
2d21ac55 8266
39236c6e
A
8267 if (bufsize > GETDIRENTRIES_MAXBUFSIZE)
8268 bufsize = GETDIRENTRIES_MAXBUFSIZE;
8269
2d21ac55
A
8270#if CONFIG_MACF
8271 error = mac_file_check_change_offset(vfs_context_ucred(&context), fp->f_fglob);
8272 if (error)
8273 goto out;
8274#endif
91447636
A
8275 if ( (error = vnode_getwithref(vp)) ) {
8276 goto out;
8277 }
91447636 8278 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
55e303ae 8279
1c79356b 8280unionread:
91447636
A
8281 if (vp->v_type != VDIR) {
8282 (void)vnode_put(vp);
8283 error = EINVAL;
8284 goto out;
8285 }
2d21ac55
A
8286
8287#if CONFIG_MACF
8288 error = mac_vnode_check_readdir(&context, vp);
8289 if (error != 0) {
8290 (void)vnode_put(vp);
8291 goto out;
8292 }
8293#endif /* MAC */
91447636
A
8294
8295 loff = fp->f_fglob->fg_offset;
2d21ac55
A
8296 auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
8297 uio_addiov(auio, bufp, bufsize);
91447636 8298
2d21ac55
A
8299 if (flags & VNODE_READDIR_EXTENDED) {
8300 error = vnode_readdir64(vp, auio, flags, &eofflag, &numdirent, &context);
8301 fp->f_fglob->fg_offset = uio_offset(auio);
8302 } else {
8303 error = VNOP_READDIR(vp, auio, 0, &eofflag, &numdirent, &context);
8304 fp->f_fglob->fg_offset = uio_offset(auio);
8305 }
91447636
A
8306 if (error) {
8307 (void)vnode_put(vp);
8308 goto out;
8309 }
1c79356b 8310
2d21ac55
A
8311 if ((user_ssize_t)bufsize == uio_resid(auio)){
8312 if (union_dircheckp) {
8313 error = union_dircheckp(&vp, fp, &context);
8314 if (error == -1)
8315 goto unionread;
813fb2f6
A
8316 if (error) {
8317 (void)vnode_put(vp);
2d21ac55 8318 goto out;
813fb2f6 8319 }
1c79356b
A
8320 }
8321
39236c6e 8322 if ((vp->v_mount->mnt_flag & MNT_UNION)) {
2d21ac55 8323 struct vnode *tvp = vp;
39236c6e
A
8324 if (lookup_traverse_union(tvp, &vp, &context) == 0) {
8325 vnode_ref(vp);
8326 fp->f_fglob->fg_data = (caddr_t) vp;
8327 fp->f_fglob->fg_offset = 0;
8328 vnode_rele(tvp);
8329 vnode_put(tvp);
8330 goto unionread;
8331 }
8332 vp = tvp;
1c79356b
A
8333 }
8334 }
2d21ac55 8335
91447636 8336 vnode_put(vp);
2d21ac55
A
8337 if (offset) {
8338 *offset = loff;
8339 }
39037602 8340
2d21ac55 8341 *bytesread = bufsize - uio_resid(auio);
91447636
A
8342out:
8343 file_drop(fd);
1c79356b
A
8344 return (error);
8345}
8346
2d21ac55
A
8347
8348int
b0d623f7 8349getdirentries(__unused struct proc *p, struct getdirentries_args *uap, int32_t *retval)
2d21ac55
A
8350{
8351 off_t offset;
2d21ac55
A
8352 ssize_t bytesread;
8353 int error;
8354
8355 AUDIT_ARG(fd, uap->fd);
8356 error = getdirentries_common(uap->fd, uap->buf, uap->count, &bytesread, &offset, 0);
8357
8358 if (error == 0) {
b0d623f7
A
8359 if (proc_is64bit(p)) {
8360 user64_long_t base = (user64_long_t)offset;
8361 error = copyout((caddr_t)&base, uap->basep, sizeof(user64_long_t));
8362 } else {
8363 user32_long_t base = (user32_long_t)offset;
8364 error = copyout((caddr_t)&base, uap->basep, sizeof(user32_long_t));
8365 }
2d21ac55
A
8366 *retval = bytesread;
8367 }
8368 return (error);
8369}
8370
8371int
8372getdirentries64(__unused struct proc *p, struct getdirentries64_args *uap, user_ssize_t *retval)
8373{
8374 off_t offset;
8375 ssize_t bytesread;
8376 int error;
8377
8378 AUDIT_ARG(fd, uap->fd);
8379 error = getdirentries_common(uap->fd, uap->buf, uap->bufsize, &bytesread, &offset, VNODE_READDIR_EXTENDED);
8380
8381 if (error == 0) {
8382 *retval = bytesread;
8383 error = copyout((caddr_t)&offset, uap->position, sizeof(off_t));
8384 }
8385 return (error);
8386}
8387
8388
1c79356b
A
8389/*
8390 * Set the mode mask for creation of filesystem nodes.
b0d623f7 8391 * XXX implement xsecurity
1c79356b 8392 */
91447636
A
8393#define UMASK_NOXSECURITY (void *)1 /* leave existing xsecurity alone */
8394static int
b0d623f7 8395umask1(proc_t p, int newmask, __unused kauth_filesec_t fsec, int32_t *retval)
1c79356b 8396{
2d21ac55 8397 struct filedesc *fdp;
1c79356b 8398
91447636 8399 AUDIT_ARG(mask, newmask);
2d21ac55 8400 proc_fdlock(p);
1c79356b
A
8401 fdp = p->p_fd;
8402 *retval = fdp->fd_cmask;
91447636 8403 fdp->fd_cmask = newmask & ALLPERMS;
2d21ac55 8404 proc_fdunlock(p);
1c79356b
A
8405 return (0);
8406}
8407
b0d623f7
A
8408/*
8409 * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL).
8410 *
8411 * Parameters: p Process requesting to set the umask
8412 * uap User argument descriptor (see below)
8413 * retval umask of the process (parameter p)
8414 *
8415 * Indirect: uap->newmask umask to set
8416 * uap->xsecurity ACL to set
39037602 8417 *
b0d623f7
A
8418 * Returns: 0 Success
8419 * !0 Not success
8420 *
8421 */
8422int
8423umask_extended(proc_t p, struct umask_extended_args *uap, int32_t *retval)
91447636
A
8424{
8425 int ciferror;
8426 kauth_filesec_t xsecdst;
8427
8428 xsecdst = KAUTH_FILESEC_NONE;
8429 if (uap->xsecurity != USER_ADDR_NULL) {
8430 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
8431 return ciferror;
8432 } else {
8433 xsecdst = KAUTH_FILESEC_NONE;
8434 }
8435
8436 ciferror = umask1(p, uap->newmask, xsecdst, retval);
8437
8438 if (xsecdst != KAUTH_FILESEC_NONE)
8439 kauth_filesec_free(xsecdst);
8440 return ciferror;
8441}
8442
8443int
b0d623f7 8444umask(proc_t p, struct umask_args *uap, int32_t *retval)
91447636
A
8445{
8446 return(umask1(p, uap->newmask, UMASK_NOXSECURITY, retval));
8447}
8448
1c79356b
A
8449/*
8450 * Void all references to file by ripping underlying filesystem
8451 * away from vnode.
8452 */
1c79356b
A
8453/* ARGSUSED */
8454int
b0d623f7 8455revoke(proc_t p, struct revoke_args *uap, __unused int32_t *retval)
1c79356b 8456{
2d21ac55 8457 vnode_t vp;
91447636 8458 struct vnode_attr va;
2d21ac55 8459 vfs_context_t ctx = vfs_context_current();
1c79356b
A
8460 int error;
8461 struct nameidata nd;
8462
6d2010ae
A
8463 NDINIT(&nd, LOOKUP, OP_REVOKE, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
8464 uap->path, ctx);
55e303ae
A
8465 error = namei(&nd);
8466 if (error)
1c79356b
A
8467 return (error);
8468 vp = nd.ni_vp;
91447636
A
8469
8470 nameidone(&nd);
8471
b0d623f7
A
8472 if (!(vnode_ischr(vp) || vnode_isblk(vp))) {
8473 error = ENOTSUP;
8474 goto out;
8475 }
8476
8477 if (vnode_isblk(vp) && vnode_ismountedon(vp)) {
8478 error = EBUSY;
8479 goto out;
8480 }
8481
2d21ac55
A
8482#if CONFIG_MACF
8483 error = mac_vnode_check_revoke(ctx, vp);
8484 if (error)
8485 goto out;
8486#endif
8487
91447636
A
8488 VATTR_INIT(&va);
8489 VATTR_WANTED(&va, va_uid);
2d21ac55 8490 if ((error = vnode_getattr(vp, &va, ctx)))
1c79356b 8491 goto out;
2d21ac55
A
8492 if (kauth_cred_getuid(vfs_context_ucred(ctx)) != va.va_uid &&
8493 (error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
1c79356b 8494 goto out;
b0d623f7 8495 if (vp->v_usecount > 0 || (vnode_isaliased(vp)))
2d21ac55 8496 VNOP_REVOKE(vp, REVOKEALL, ctx);
1c79356b 8497out:
91447636 8498 vnode_put(vp);
1c79356b
A
8499 return (error);
8500}
8501
0b4e3aa0 8502
1c79356b
A
8503/*
8504 * HFS/HFS PlUS SPECIFIC SYSTEM CALLS
9bccf70c 8505 * The following system calls are designed to support features
1c79356b
A
8506 * which are specific to the HFS & HFS Plus volume formats
8507 */
8508
9bccf70c 8509
1c79356b 8510/*
39236c6e
A
8511 * Obtain attribute information on objects in a directory while enumerating
8512 * the directory.
8513 */
1c79356b
A
8514/* ARGSUSED */
8515int
b0d623f7 8516getdirentriesattr (proc_t p, struct getdirentriesattr_args *uap, int32_t *retval)
1c79356b 8517{
2d21ac55 8518 vnode_t vp;
91447636
A
8519 struct fileproc *fp;
8520 uio_t auio = NULL;
8521 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
39236c6e 8522 uint32_t count, savecount;
2d21ac55 8523 uint32_t newstate;
91447636 8524 int error, eofflag;
2d21ac55 8525 uint32_t loff;
39037602 8526 struct attrlist attributelist;
2d21ac55 8527 vfs_context_t ctx = vfs_context_current();
91447636
A
8528 int fd = uap->fd;
8529 char uio_buf[ UIO_SIZEOF(1) ];
8530 kauth_action_t action;
8531
8532 AUDIT_ARG(fd, fd);
39037602 8533
91447636 8534 /* Get the attributes into kernel space */
2d21ac55 8535 if ((error = copyin(uap->alist, (caddr_t)&attributelist, sizeof(attributelist)))) {
91447636 8536 return(error);
2d21ac55
A
8537 }
8538 if ((error = copyin(uap->count, (caddr_t)&count, sizeof(count)))) {
8539 return(error);
8540 }
39236c6e 8541 savecount = count;
2d21ac55 8542 if ( (error = fp_getfvp(p, fd, &fp, &vp)) ) {
91447636 8543 return (error);
2d21ac55 8544 }
91447636
A
8545 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
8546 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
8547 error = EBADF;
8548 goto out;
8549 }
2d21ac55
A
8550
8551
8552#if CONFIG_MACF
8553 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
8554 fp->f_fglob);
8555 if (error)
8556 goto out;
8557#endif
8558
8559
91447636
A
8560 if ( (error = vnode_getwithref(vp)) )
8561 goto out;
55e303ae 8562
91447636 8563 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
1c79356b 8564
39236c6e 8565unionread:
91447636
A
8566 if (vp->v_type != VDIR) {
8567 (void)vnode_put(vp);
8568 error = EINVAL;
8569 goto out;
8570 }
55e303ae 8571
2d21ac55
A
8572#if CONFIG_MACF
8573 error = mac_vnode_check_readdir(ctx, vp);
8574 if (error != 0) {
8575 (void)vnode_put(vp);
8576 goto out;
8577 }
8578#endif /* MAC */
8579
91447636
A
8580 /* set up the uio structure which will contain the users return buffer */
8581 loff = fp->f_fglob->fg_offset;
39236c6e 8582 auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
91447636 8583 uio_addiov(auio, uap->buffer, uap->buffersize);
39037602 8584
91447636
A
8585 /*
8586 * If the only item requested is file names, we can let that past with
8587 * just LIST_DIRECTORY. If they want any other attributes, that means
8588 * they need SEARCH as well.
8589 */
8590 action = KAUTH_VNODE_LIST_DIRECTORY;
8591 if ((attributelist.commonattr & ~ATTR_CMN_NAME) ||
8592 attributelist.fileattr || attributelist.dirattr)
8593 action |= KAUTH_VNODE_SEARCH;
39037602 8594
2d21ac55 8595 if ((error = vnode_authorize(vp, NULL, action, ctx)) == 0) {
2d21ac55 8596
b0d623f7
A
8597 /* Believe it or not, uap->options only has 32-bits of valid
8598 * info, so truncate before extending again */
39236c6e
A
8599
8600 error = VNOP_READDIRATTR(vp, &attributelist, auio, count,
8601 (u_long)(uint32_t)uap->options, &newstate, &eofflag, &count, ctx);
8602 }
8603
8604 if (error) {
8605 (void) vnode_put(vp);
8606 goto out;
8607 }
8608
8609 /*
8610 * If we've got the last entry of a directory in a union mount
8611 * then reset the eofflag and pretend there's still more to come.
8612 * The next call will again set eofflag and the buffer will be empty,
8613 * so traverse to the underlying directory and do the directory
8614 * read there.
8615 */
8616 if (eofflag && vp->v_mount->mnt_flag & MNT_UNION) {
8617 if (uio_resid(auio) < (user_ssize_t) uap->buffersize) { // Got some entries
8618 eofflag = 0;
8619 } else { // Empty buffer
8620 struct vnode *tvp = vp;
8621 if (lookup_traverse_union(tvp, &vp, ctx) == 0) {
8622 vnode_ref_ext(vp, fp->f_fglob->fg_flag & O_EVTONLY, 0);
8623 fp->f_fglob->fg_data = (caddr_t) vp;
8624 fp->f_fglob->fg_offset = 0; // reset index for new dir
8625 count = savecount;
8626 vnode_rele_internal(tvp, fp->f_fglob->fg_flag & O_EVTONLY, 0, 0);
8627 vnode_put(tvp);
8628 goto unionread;
8629 }
8630 vp = tvp;
8631 }
2d21ac55 8632 }
39236c6e 8633
91447636 8634 (void)vnode_put(vp);
1c79356b 8635
39037602 8636 if (error)
91447636
A
8637 goto out;
8638 fp->f_fglob->fg_offset = uio_offset(auio); /* should be multiple of dirent, not variable */
1c79356b 8639
2d21ac55 8640 if ((error = copyout((caddr_t) &count, uap->count, sizeof(count))))
91447636 8641 goto out;
2d21ac55 8642 if ((error = copyout((caddr_t) &newstate, uap->newstate, sizeof(newstate))))
91447636 8643 goto out;
2d21ac55 8644 if ((error = copyout((caddr_t) &loff, uap->basep, sizeof(loff))))
91447636 8645 goto out;
1c79356b
A
8646
8647 *retval = eofflag; /* similar to getdirentries */
91447636 8648 error = 0;
2d21ac55 8649out:
91447636
A
8650 file_drop(fd);
8651 return (error); /* return error earlier, an retval of 0 or 1 now */
1c79356b 8652
39236c6e 8653} /* end of getdirentriesattr system call */
1c79356b
A
8654
8655/*
8656* Exchange data between two files
8657*/
8658
1c79356b
A
8659/* ARGSUSED */
8660int
b0d623f7 8661exchangedata (__unused proc_t p, struct exchangedata_args *uap, __unused int32_t *retval)
1c79356b
A
8662{
8663
8664 struct nameidata fnd, snd;
2d21ac55
A
8665 vfs_context_t ctx = vfs_context_current();
8666 vnode_t fvp;
8667 vnode_t svp;
8668 int error;
b0d623f7 8669 u_int32_t nameiflags;
91447636
A
8670 char *fpath = NULL;
8671 char *spath = NULL;
b0d623f7
A
8672 int flen=0, slen=0;
8673 int from_truncated=0, to_truncated=0;
8674#if CONFIG_FSE
91447636 8675 fse_info f_finfo, s_finfo;
b0d623f7 8676#endif
39037602 8677
1c79356b
A
8678 nameiflags = 0;
8679 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
8680
6d2010ae
A
8681 NDINIT(&fnd, LOOKUP, OP_EXCHANGEDATA, nameiflags | AUDITVNPATH1,
8682 UIO_USERSPACE, uap->path1, ctx);
1c79356b 8683
6d2010ae
A
8684 error = namei(&fnd);
8685 if (error)
8686 goto out2;
1c79356b 8687
91447636
A
8688 nameidone(&fnd);
8689 fvp = fnd.ni_vp;
1c79356b 8690
39037602 8691 NDINIT(&snd, LOOKUP, OP_EXCHANGEDATA, CN_NBMOUNTLOOK | nameiflags | AUDITVNPATH2,
6d2010ae 8692 UIO_USERSPACE, uap->path2, ctx);
1c79356b 8693
6d2010ae
A
8694 error = namei(&snd);
8695 if (error) {
91447636 8696 vnode_put(fvp);
55e303ae 8697 goto out2;
6d2010ae 8698 }
91447636 8699 nameidone(&snd);
1c79356b
A
8700 svp = snd.ni_vp;
8701
91447636
A
8702 /*
8703 * if the files are the same, return an inval error
8704 */
1c79356b 8705 if (svp == fvp) {
91447636
A
8706 error = EINVAL;
8707 goto out;
39037602 8708 }
1c79356b 8709
91447636
A
8710 /*
8711 * if the files are on different volumes, return an error
8712 */
8713 if (svp->v_mount != fvp->v_mount) {
8714 error = EXDEV;
8715 goto out;
8716 }
2d21ac55 8717
39236c6e
A
8718 /* If they're not files, return an error */
8719 if ( (vnode_isreg(fvp) == 0) || (vnode_isreg(svp) == 0)) {
db609669
A
8720 error = EINVAL;
8721 goto out;
8722 }
8723
2d21ac55
A
8724#if CONFIG_MACF
8725 error = mac_vnode_check_exchangedata(ctx,
8726 fvp, svp);
8727 if (error)
8728 goto out;
8729#endif
8730 if (((error = vnode_authorize(fvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0) ||
8731 ((error = vnode_authorize(svp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0))
91447636 8732 goto out;
1c79356b 8733
2d21ac55
A
8734 if (
8735#if CONFIG_FSE
39037602 8736 need_fsevent(FSE_EXCHANGE, fvp) ||
2d21ac55
A
8737#endif
8738 kauth_authorize_fileop_has_listeners()) {
8739 GET_PATH(fpath);
8740 GET_PATH(spath);
8741 if (fpath == NULL || spath == NULL) {
8742 error = ENOMEM;
8743 goto out;
8744 }
b0d623f7
A
8745
8746 flen = safe_getpath(fvp, NULL, fpath, MAXPATHLEN, &from_truncated);
8747 slen = safe_getpath(svp, NULL, spath, MAXPATHLEN, &to_truncated);
39037602 8748
2d21ac55
A
8749#if CONFIG_FSE
8750 get_fse_info(fvp, &f_finfo, ctx);
8751 get_fse_info(svp, &s_finfo, ctx);
b0d623f7
A
8752 if (from_truncated || to_truncated) {
8753 // set it here since only the f_finfo gets reported up to user space
8754 f_finfo.mode |= FSE_TRUNCATED_PATH;
8755 }
2d21ac55 8756#endif
91447636 8757 }
1c79356b 8758 /* Ok, make the call */
2d21ac55 8759 error = VNOP_EXCHANGE(fvp, svp, 0, ctx);
55e303ae 8760
91447636 8761 if (error == 0) {
2d21ac55 8762 const char *tmpname;
91447636
A
8763
8764 if (fpath != NULL && spath != NULL) {
39037602 8765 /* call out to allow 3rd party notification of exchangedata.
91447636
A
8766 * Ignore result of kauth_authorize_fileop call.
8767 */
39037602 8768 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_EXCHANGE,
91447636
A
8769 (uintptr_t)fpath, (uintptr_t)spath);
8770 }
8771 name_cache_lock();
8772
8773 tmpname = fvp->v_name;
8774 fvp->v_name = svp->v_name;
8775 svp->v_name = tmpname;
39037602 8776
91447636 8777 if (fvp->v_parent != svp->v_parent) {
2d21ac55 8778 vnode_t tmp;
91447636
A
8779
8780 tmp = fvp->v_parent;
8781 fvp->v_parent = svp->v_parent;
8782 svp->v_parent = tmp;
8783 }
8784 name_cache_unlock();
8785
2d21ac55 8786#if CONFIG_FSE
91447636 8787 if (fpath != NULL && spath != NULL) {
2d21ac55 8788 add_fsevent(FSE_EXCHANGE, ctx,
91447636
A
8789 FSE_ARG_STRING, flen, fpath,
8790 FSE_ARG_FINFO, &f_finfo,
8791 FSE_ARG_STRING, slen, spath,
8792 FSE_ARG_FINFO, &s_finfo,
8793 FSE_ARG_DONE);
8794 }
2d21ac55 8795#endif
55e303ae
A
8796 }
8797
1c79356b 8798out:
2d21ac55
A
8799 if (fpath != NULL)
8800 RELEASE_PATH(fpath);
8801 if (spath != NULL)
8802 RELEASE_PATH(spath);
91447636
A
8803 vnode_put(svp);
8804 vnode_put(fvp);
1c79356b 8805out2:
1c79356b 8806 return (error);
91447636 8807}
1c79356b 8808
39236c6e
A
8809/*
8810 * Return (in MB) the amount of freespace on the given vnode's volume.
8811 */
8812uint32_t freespace_mb(vnode_t vp);
8813
8814uint32_t
8815freespace_mb(vnode_t vp)
8816{
39037602 8817 vfs_update_vfsstat(vp->v_mount, vfs_context_current(), VFS_USER_EVENT);
39236c6e
A
8818 return (((uint64_t)vp->v_mount->mnt_vfsstat.f_bavail *
8819 vp->v_mount->mnt_vfsstat.f_bsize) >> 20);
8820}
8821
316670eb 8822#if CONFIG_SEARCHFS
1c79356b 8823
1c79356b
A
8824/* ARGSUSED */
8825
8826int
b0d623f7 8827searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval)
1c79356b 8828{
39236c6e
A
8829 vnode_t vp, tvp;
8830 int i, error=0;
1c79356b
A
8831 int fserror = 0;
8832 struct nameidata nd;
b0d623f7 8833 struct user64_fssearchblock searchblock;
1c79356b
A
8834 struct searchstate *state;
8835 struct attrlist *returnattrs;
b0d623f7 8836 struct timeval timelimit;
1c79356b 8837 void *searchparams1,*searchparams2;
91447636
A
8838 uio_t auio = NULL;
8839 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
b0d623f7 8840 uint32_t nummatches;
1c79356b 8841 int mallocsize;
b0d623f7 8842 uint32_t nameiflags;
2d21ac55 8843 vfs_context_t ctx = vfs_context_current();
91447636 8844 char uio_buf[ UIO_SIZEOF(1) ];
1c79356b 8845
39236c6e 8846 /* Start by copying in fsearchblock parameter list */
91447636 8847 if (IS_64BIT_PROCESS(p)) {
b0d623f7
A
8848 error = copyin(uap->searchblock, (caddr_t) &searchblock, sizeof(searchblock));
8849 timelimit.tv_sec = searchblock.timelimit.tv_sec;
8850 timelimit.tv_usec = searchblock.timelimit.tv_usec;
91447636
A
8851 }
8852 else {
b0d623f7
A
8853 struct user32_fssearchblock tmp_searchblock;
8854
91447636
A
8855 error = copyin(uap->searchblock, (caddr_t) &tmp_searchblock, sizeof(tmp_searchblock));
8856 // munge into 64-bit version
8857 searchblock.returnattrs = CAST_USER_ADDR_T(tmp_searchblock.returnattrs);
8858 searchblock.returnbuffer = CAST_USER_ADDR_T(tmp_searchblock.returnbuffer);
8859 searchblock.returnbuffersize = tmp_searchblock.returnbuffersize;
8860 searchblock.maxmatches = tmp_searchblock.maxmatches;
39037602 8861 /*
b0d623f7
A
8862 * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary
8863 * from a 32 bit long, and tv_usec is already a signed 32 bit int.
8864 */
8865 timelimit.tv_sec = (__darwin_time_t) tmp_searchblock.timelimit.tv_sec;
8866 timelimit.tv_usec = (__darwin_useconds_t) tmp_searchblock.timelimit.tv_usec;
91447636
A
8867 searchblock.searchparams1 = CAST_USER_ADDR_T(tmp_searchblock.searchparams1);
8868 searchblock.sizeofsearchparams1 = tmp_searchblock.sizeofsearchparams1;
8869 searchblock.searchparams2 = CAST_USER_ADDR_T(tmp_searchblock.searchparams2);
8870 searchblock.sizeofsearchparams2 = tmp_searchblock.sizeofsearchparams2;
8871 searchblock.searchattrs = tmp_searchblock.searchattrs;
8872 }
8873 if (error)
1c79356b
A
8874 return(error);
8875
39037602 8876 /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
a3d08fcd 8877 */
39037602 8878 if (searchblock.sizeofsearchparams1 > SEARCHFS_MAX_SEARCHPARMS ||
a3d08fcd
A
8879 searchblock.sizeofsearchparams2 > SEARCHFS_MAX_SEARCHPARMS)
8880 return(EINVAL);
39037602 8881
1c79356b
A
8882 /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
8883 /* It all has to do into local memory and it's not that big so we might as well put it all together. */
8884 /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
8885 /* block. */
fe8ab488
A
8886 /* */
8887 /* NOTE: we allocate an extra 8 bytes to account for the difference in size of the searchstate */
8888 /* due to the changes in rdar://problem/12438273. That way if a 3rd party file system */
8889 /* assumes the size is still 556 bytes it will continue to work */
39037602 8890
91447636 8891 mallocsize = searchblock.sizeofsearchparams1 + searchblock.sizeofsearchparams2 +
fe8ab488 8892 sizeof(struct attrlist) + sizeof(struct searchstate) + (2*sizeof(uint32_t));
1c79356b
A
8893
8894 MALLOC(searchparams1, void *, mallocsize, M_TEMP, M_WAITOK);
8895
8896 /* Now set up the various pointers to the correct place in our newly allocated memory */
8897
8898 searchparams2 = (void *) (((caddr_t) searchparams1) + searchblock.sizeofsearchparams1);
8899 returnattrs = (struct attrlist *) (((caddr_t) searchparams2) + searchblock.sizeofsearchparams2);
8900 state = (struct searchstate *) (((caddr_t) returnattrs) + sizeof (struct attrlist));
8901
8902 /* Now copy in the stuff given our local variables. */
8903
91447636 8904 if ((error = copyin(searchblock.searchparams1, searchparams1, searchblock.sizeofsearchparams1)))
1c79356b
A
8905 goto freeandexit;
8906
91447636 8907 if ((error = copyin(searchblock.searchparams2, searchparams2, searchblock.sizeofsearchparams2)))
1c79356b
A
8908 goto freeandexit;
8909
91447636 8910 if ((error = copyin(searchblock.returnattrs, (caddr_t) returnattrs, sizeof(struct attrlist))))
1c79356b 8911 goto freeandexit;
39037602 8912
91447636 8913 if ((error = copyin(uap->state, (caddr_t) state, sizeof(struct searchstate))))
1c79356b 8914 goto freeandexit;
1c79356b 8915
39236c6e
A
8916 /*
8917 * When searching a union mount, need to set the
8918 * start flag at the first call on each layer to
8919 * reset state for the new volume.
8920 */
8921 if (uap->options & SRCHFS_START)
8922 state->ss_union_layer = 0;
39037602 8923 else
39236c6e
A
8924 uap->options |= state->ss_union_flags;
8925 state->ss_union_flags = 0;
b0d623f7
A
8926
8927 /*
8928 * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter,
8929 * which is passed in with an attrreference_t, we need to inspect the buffer manually here.
39037602
A
8930 * The KPI does not provide us the ability to pass in the length of the buffers searchparams1
8931 * and searchparams2. To obviate the need for all searchfs-supporting filesystems to
b0d623f7
A
8932 * validate the user-supplied data offset of the attrreference_t, we'll do it here.
8933 */
8934
8935 if (searchblock.searchattrs.commonattr & ATTR_CMN_NAME) {
8936 attrreference_t* string_ref;
8937 u_int32_t* start_length;
39037602 8938 user64_size_t param_length;
b0d623f7
A
8939
8940 /* validate searchparams1 */
39037602 8941 param_length = searchblock.sizeofsearchparams1;
b0d623f7
A
8942 /* skip the word that specifies length of the buffer */
8943 start_length= (u_int32_t*) searchparams1;
8944 start_length= start_length+1;
8945 string_ref= (attrreference_t*) start_length;
8946
8947 /* ensure no negative offsets or too big offsets */
8948 if (string_ref->attr_dataoffset < 0 ) {
8949 error = EINVAL;
39037602 8950 goto freeandexit;
b0d623f7
A
8951 }
8952 if (string_ref->attr_length > MAXPATHLEN) {
8953 error = EINVAL;
8954 goto freeandexit;
8955 }
39037602 8956
b0d623f7
A
8957 /* Check for pointer overflow in the string ref */
8958 if (((char*) string_ref + string_ref->attr_dataoffset) < (char*) string_ref) {
8959 error = EINVAL;
8960 goto freeandexit;
8961 }
8962
8963 if (((char*) string_ref + string_ref->attr_dataoffset) > ((char*)searchparams1 + param_length)) {
8964 error = EINVAL;
8965 goto freeandexit;
8966 }
8967 if (((char*)string_ref + string_ref->attr_dataoffset + string_ref->attr_length) > ((char*)searchparams1 + param_length)) {
8968 error = EINVAL;
8969 goto freeandexit;
8970 }
8971 }
8972
8973 /* set up the uio structure which will contain the users return buffer */
39236c6e
A
8974 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
8975 uio_addiov(auio, searchblock.returnbuffer, searchblock.returnbuffersize);
1c79356b 8976
91447636 8977 nameiflags = 0;
1c79356b 8978 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
6d2010ae
A
8979 NDINIT(&nd, LOOKUP, OP_SEARCHFS, nameiflags | AUDITVNPATH1,
8980 UIO_USERSPACE, uap->path, ctx);
1c79356b 8981
55e303ae
A
8982 error = namei(&nd);
8983 if (error)
1c79356b 8984 goto freeandexit;
39236c6e 8985 vp = nd.ni_vp;
91447636 8986 nameidone(&nd);
39236c6e
A
8987
8988 /*
8989 * Switch to the root vnode for the volume
8990 */
8991 error = VFS_ROOT(vnode_mount(vp), &tvp, ctx);
fe8ab488 8992 vnode_put(vp);
39236c6e
A
8993 if (error)
8994 goto freeandexit;
39236c6e
A
8995 vp = tvp;
8996
8997 /*
8998 * If it's a union mount, the path lookup takes
8999 * us to the top layer. But we may need to descend
9000 * to a lower layer. For non-union mounts the layer
9001 * is always zero.
9002 */
9003 for (i = 0; i < (int) state->ss_union_layer; i++) {
9004 if ((vp->v_mount->mnt_flag & MNT_UNION) == 0)
9005 break;
9006 tvp = vp;
9007 vp = vp->v_mount->mnt_vnodecovered;
9008 if (vp == NULL) {
fe8ab488 9009 vnode_put(tvp);
39236c6e
A
9010 error = ENOENT;
9011 goto freeandexit;
9012 }
813fb2f6 9013 error = vnode_getwithref(vp);
39236c6e 9014 vnode_put(tvp);
813fb2f6
A
9015 if (error)
9016 goto freeandexit;
39236c6e 9017 }
1c79356b 9018
6d2010ae
A
9019#if CONFIG_MACF
9020 error = mac_vnode_check_searchfs(ctx, vp, &searchblock.searchattrs);
9021 if (error) {
9022 vnode_put(vp);
9023 goto freeandexit;
9024 }
9025#endif
9026
39037602 9027
1c79356b 9028 /*
39037602 9029 * If searchblock.maxmatches == 0, then skip the search. This has happened
39236c6e 9030 * before and sometimes the underlying code doesnt deal with it well.
1c79356b
A
9031 */
9032 if (searchblock.maxmatches == 0) {
9033 nummatches = 0;
9034 goto saveandexit;
9035 }
9036
9037 /*
39236c6e 9038 * Allright, we have everything we need, so lets make that call.
39037602 9039 *
39236c6e
A
9040 * We keep special track of the return value from the file system:
9041 * EAGAIN is an acceptable error condition that shouldn't keep us
9042 * from copying out any results...
1c79356b
A
9043 */
9044
6d2010ae 9045 fserror = VNOP_SEARCHFS(vp,
39236c6e
A
9046 searchparams1,
9047 searchparams2,
9048 &searchblock.searchattrs,
9049 (u_long)searchblock.maxmatches,
9050 &timelimit,
9051 returnattrs,
9052 &nummatches,
9053 (u_long)uap->scriptcode,
9054 (u_long)uap->options,
9055 auio,
9056 (struct searchstate *) &state->ss_fsstate,
9057 ctx);
39037602 9058
39236c6e
A
9059 /*
9060 * If it's a union mount we need to be called again
9061 * to search the mounted-on filesystem.
9062 */
9063 if ((vp->v_mount->mnt_flag & MNT_UNION) && fserror == 0) {
9064 state->ss_union_flags = SRCHFS_START;
9065 state->ss_union_layer++; // search next layer down
9066 fserror = EAGAIN;
9067 }
9068
6d2010ae
A
9069saveandexit:
9070
9071 vnode_put(vp);
9072
9073 /* Now copy out the stuff that needs copying out. That means the number of matches, the
9074 search state. Everything was already put into he return buffer by the vop call. */
9075
9076 if ((error = copyout((caddr_t) state, uap->state, sizeof(struct searchstate))) != 0)
9077 goto freeandexit;
9078
39236c6e 9079 if ((error = suulong(uap->nummatches, (uint64_t)nummatches)) != 0)
6d2010ae 9080 goto freeandexit;
39037602 9081
6d2010ae
A
9082 error = fserror;
9083
9084freeandexit:
9085
9086 FREE(searchparams1,M_TEMP);
9087
9088 return(error);
9089
9090
9091} /* end of searchfs system call */
9092
316670eb
A
9093#else /* CONFIG_SEARCHFS */
9094
9095int
9096searchfs(__unused proc_t p, __unused struct searchfs_args *uap, __unused int32_t *retval)
9097{
9098 return (ENOTSUP);
9099}
9100
9101#endif /* CONFIG_SEARCHFS */
6d2010ae
A
9102
9103
9104lck_grp_attr_t * nspace_group_attr;
9105lck_attr_t * nspace_lock_attr;
9106lck_grp_t * nspace_mutex_group;
9107
9108lck_mtx_t nspace_handler_lock;
9109lck_mtx_t nspace_handler_exclusion_lock;
9110
9111time_t snapshot_timestamp=0;
9112int nspace_allow_virtual_devs=0;
9113
9114void nspace_handler_init(void);
9115
9116typedef struct nspace_item_info {
9117 struct vnode *vp;
9118 void *arg;
9119 uint64_t op;
9120 uint32_t vid;
9121 uint32_t flags;
9122 uint32_t token;
9123 uint32_t refcount;
9124} nspace_item_info;
9125
9126#define MAX_NSPACE_ITEMS 128
9127nspace_item_info nspace_items[MAX_NSPACE_ITEMS];
9128uint32_t nspace_item_idx=0; // also used as the sleep/wakeup rendezvous address
9129uint32_t nspace_token_id=0;
9130uint32_t nspace_handler_timeout = 15; // seconds
9131
9132#define NSPACE_ITEM_NEW 0x0001
9133#define NSPACE_ITEM_PROCESSING 0x0002
9134#define NSPACE_ITEM_DEAD 0x0004
9135#define NSPACE_ITEM_CANCELLED 0x0008
9136#define NSPACE_ITEM_DONE 0x0010
9137#define NSPACE_ITEM_RESET_TIMER 0x0020
9138
9139#define NSPACE_ITEM_NSPACE_EVENT 0x0040
9140#define NSPACE_ITEM_SNAPSHOT_EVENT 0x0080
6d2010ae 9141
fe8ab488 9142#define NSPACE_ITEM_ALL_EVENT_TYPES (NSPACE_ITEM_NSPACE_EVENT | NSPACE_ITEM_SNAPSHOT_EVENT)
6d2010ae
A
9143
9144//#pragma optimization_level 0
9145
9146typedef enum {
9147 NSPACE_HANDLER_NSPACE = 0,
9148 NSPACE_HANDLER_SNAPSHOT = 1,
6d2010ae
A
9149
9150 NSPACE_HANDLER_COUNT,
9151} nspace_type_t;
9152
9153typedef struct {
9154 uint64_t handler_tid;
9155 struct proc *handler_proc;
9156 int handler_busy;
9157} nspace_handler_t;
9158
9159nspace_handler_t nspace_handlers[NSPACE_HANDLER_COUNT];
9160
39236c6e
A
9161/* namespace fsctl functions */
9162static int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type);
9163static int nspace_item_flags_for_type(nspace_type_t nspace_type);
9164static int nspace_open_flags_for_type(nspace_type_t nspace_type);
9165static nspace_type_t nspace_type_for_op(uint64_t op);
9166static int nspace_is_special_process(struct proc *proc);
9167static int vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx);
9168static int wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type);
9169static int validate_namespace_args (int is64bit, int size);
9170static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data);
9171
9172
6d2010ae
A
9173static inline int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type)
9174{
9175 switch(nspace_type) {
9176 case NSPACE_HANDLER_NSPACE:
9177 return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_NSPACE_EVENT;
9178 case NSPACE_HANDLER_SNAPSHOT:
9179 return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_SNAPSHOT_EVENT;
6d2010ae
A
9180 default:
9181 printf("nspace_flags_matches_handler: invalid type %u\n", (int)nspace_type);
9182 return 0;
9183 }
9184}
9185
9186static inline int nspace_item_flags_for_type(nspace_type_t nspace_type)
9187{
9188 switch(nspace_type) {
9189 case NSPACE_HANDLER_NSPACE:
9190 return NSPACE_ITEM_NSPACE_EVENT;
9191 case NSPACE_HANDLER_SNAPSHOT:
9192 return NSPACE_ITEM_SNAPSHOT_EVENT;
6d2010ae
A
9193 default:
9194 printf("nspace_item_flags_for_type: invalid type %u\n", (int)nspace_type);
9195 return 0;
9196 }
9197}
9198
9199static inline int nspace_open_flags_for_type(nspace_type_t nspace_type)
9200{
9201 switch(nspace_type) {
9202 case NSPACE_HANDLER_NSPACE:
9203 return FREAD | FWRITE | O_EVTONLY;
9204 case NSPACE_HANDLER_SNAPSHOT:
6d2010ae
A
9205 return FREAD | O_EVTONLY;
9206 default:
9207 printf("nspace_open_flags_for_type: invalid type %u\n", (int)nspace_type);
9208 return 0;
9209 }
9210}
9211
9212static inline nspace_type_t nspace_type_for_op(uint64_t op)
9213{
9214 switch(op & NAMESPACE_HANDLER_EVENT_TYPE_MASK) {
9215 case NAMESPACE_HANDLER_NSPACE_EVENT:
9216 return NSPACE_HANDLER_NSPACE;
9217 case NAMESPACE_HANDLER_SNAPSHOT_EVENT:
9218 return NSPACE_HANDLER_SNAPSHOT;
6d2010ae
A
9219 default:
9220 printf("nspace_type_for_op: invalid op mask %llx\n", op & NAMESPACE_HANDLER_EVENT_TYPE_MASK);
9221 return NSPACE_HANDLER_NSPACE;
9222 }
9223}
9224
9225static inline int nspace_is_special_process(struct proc *proc)
9226{
9227 int i;
9228 for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
9229 if (proc == nspace_handlers[i].handler_proc)
9230 return 1;
9231 }
9232 return 0;
9233}
9234
9235void
9236nspace_handler_init(void)
9237{
9238 nspace_lock_attr = lck_attr_alloc_init();
9239 nspace_group_attr = lck_grp_attr_alloc_init();
9240 nspace_mutex_group = lck_grp_alloc_init("nspace-mutex", nspace_group_attr);
9241 lck_mtx_init(&nspace_handler_lock, nspace_mutex_group, nspace_lock_attr);
9242 lck_mtx_init(&nspace_handler_exclusion_lock, nspace_mutex_group, nspace_lock_attr);
9243 memset(&nspace_items[0], 0, sizeof(nspace_items));
9244}
9245
9246void
9247nspace_proc_exit(struct proc *p)
9248{
9249 int i, event_mask = 0;
39037602 9250
6d2010ae
A
9251 for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
9252 if (p == nspace_handlers[i].handler_proc) {
9253 event_mask |= nspace_item_flags_for_type(i);
9254 nspace_handlers[i].handler_tid = 0;
9255 nspace_handlers[i].handler_proc = NULL;
9256 }
9257 }
9258
9259 if (event_mask == 0) {
9260 return;
9261 }
39037602
A
9262
9263 lck_mtx_lock(&nspace_handler_lock);
6d2010ae
A
9264 if (event_mask & NSPACE_ITEM_SNAPSHOT_EVENT) {
9265 // if this process was the snapshot handler, zero snapshot_timeout
9266 snapshot_timestamp = 0;
9267 }
39037602 9268
6d2010ae
A
9269 //
9270 // unblock anyone that's waiting for the handler that died
9271 //
6d2010ae
A
9272 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9273 if (nspace_items[i].flags & (NSPACE_ITEM_NEW | NSPACE_ITEM_PROCESSING)) {
9274
9275 if ( nspace_items[i].flags & event_mask ) {
9276
9277 if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
9278 vnode_lock_spin(nspace_items[i].vp);
9279 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
9280 vnode_unlock(nspace_items[i].vp);
9281 }
9282 nspace_items[i].vp = NULL;
9283 nspace_items[i].vid = 0;
9284 nspace_items[i].flags = NSPACE_ITEM_DONE;
9285 nspace_items[i].token = 0;
39037602 9286
6d2010ae
A
9287 wakeup((caddr_t)&(nspace_items[i].vp));
9288 }
9289 }
9290 }
39037602 9291
6d2010ae
A
9292 wakeup((caddr_t)&nspace_item_idx);
9293 lck_mtx_unlock(&nspace_handler_lock);
9294}
9295
9296
39037602 9297int
6d2010ae
A
9298resolve_nspace_item(struct vnode *vp, uint64_t op)
9299{
9300 return resolve_nspace_item_ext(vp, op, NULL);
9301}
9302
39037602 9303int
6d2010ae
A
9304resolve_nspace_item_ext(struct vnode *vp, uint64_t op, void *arg)
9305{
9306 int i, error, keep_waiting;
9307 struct timespec ts;
9308 nspace_type_t nspace_type = nspace_type_for_op(op);
9309
9310 // only allow namespace events on regular files, directories and symlinks.
9311 if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
9312 return 0;
9313 }
9314
9315 //
9316 // if this is a snapshot event and the vnode is on a
9317 // disk image just pretend nothing happened since any
9318 // change to the disk image will cause the disk image
9319 // itself to get backed up and this avoids multi-way
9320 // deadlocks between the snapshot handler and the ever
9321 // popular diskimages-helper process. the variable
9322 // nspace_allow_virtual_devs allows this behavior to
9323 // be overridden (for use by the Mobile TimeMachine
9324 // testing infrastructure which uses disk images)
9325 //
9326 if ( (op & NAMESPACE_HANDLER_SNAPSHOT_EVENT)
9327 && (vp->v_mount != NULL)
9328 && (vp->v_mount->mnt_kern_flag & MNTK_VIRTUALDEV)
9329 && !nspace_allow_virtual_devs) {
9330
9331 return 0;
9332 }
9333
9334 // if (thread_tid(current_thread()) == namespace_handler_tid) {
9335 if (nspace_handlers[nspace_type].handler_proc == NULL) {
9336 return 0;
9337 }
9338
9339 if (nspace_is_special_process(current_proc())) {
9340 return EDEADLK;
9341 }
9342
9343 lck_mtx_lock(&nspace_handler_lock);
9344
9345retry:
9346 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9347 if (vp == nspace_items[i].vp && op == nspace_items[i].op) {
9348 break;
9349 }
9350 }
9351
9352 if (i >= MAX_NSPACE_ITEMS) {
9353 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9354 if (nspace_items[i].flags == 0) {
9355 break;
9356 }
9357 }
9358 } else {
9359 nspace_items[i].refcount++;
9360 }
39037602 9361
6d2010ae
A
9362 if (i >= MAX_NSPACE_ITEMS) {
9363 ts.tv_sec = nspace_handler_timeout;
9364 ts.tv_nsec = 0;
9365
9366 error = msleep((caddr_t)&nspace_token_id, &nspace_handler_lock, PVFS|PCATCH, "nspace-no-space", &ts);
9367 if (error == 0) {
9368 // an entry got free'd up, go see if we can get a slot
9369 goto retry;
9370 } else {
9371 lck_mtx_unlock(&nspace_handler_lock);
9372 return error;
9373 }
9374 }
9375
9376 //
9377 // if it didn't already exist, add it. if it did exist
9378 // we'll get woken up when someone does a wakeup() on
9379 // the slot in the nspace_items table.
9380 //
9381 if (vp != nspace_items[i].vp) {
9382 nspace_items[i].vp = vp;
39236c6e 9383 nspace_items[i].arg = (arg == NSPACE_REARM_NO_ARG) ? NULL : arg; // arg is {NULL, true, uio *} - only pass uio thru to the user
6d2010ae
A
9384 nspace_items[i].op = op;
9385 nspace_items[i].vid = vnode_vid(vp);
9386 nspace_items[i].flags = NSPACE_ITEM_NEW;
9387 nspace_items[i].flags |= nspace_item_flags_for_type(nspace_type);
9388 if (nspace_items[i].flags & NSPACE_ITEM_SNAPSHOT_EVENT) {
9389 if (arg) {
9390 vnode_lock_spin(vp);
9391 vp->v_flag |= VNEEDSSNAPSHOT;
9392 vnode_unlock(vp);
9393 }
9394 }
9395
9396 nspace_items[i].token = 0;
9397 nspace_items[i].refcount = 1;
39037602 9398
6d2010ae
A
9399 wakeup((caddr_t)&nspace_item_idx);
9400 }
9401
9402 //
9403 // Now go to sleep until the handler does a wakeup on this
9404 // slot in the nspace_items table (or we timeout).
9405 //
9406 keep_waiting = 1;
9407 while(keep_waiting) {
9408 ts.tv_sec = nspace_handler_timeout;
9409 ts.tv_nsec = 0;
9410 error = msleep((caddr_t)&(nspace_items[i].vp), &nspace_handler_lock, PVFS|PCATCH, "namespace-done", &ts);
9411
9412 if (nspace_items[i].flags & NSPACE_ITEM_DONE) {
9413 error = 0;
9414 } else if (nspace_items[i].flags & NSPACE_ITEM_CANCELLED) {
9415 error = nspace_items[i].token;
9416 } else if (error == EWOULDBLOCK || error == ETIMEDOUT) {
9417 if (nspace_items[i].flags & NSPACE_ITEM_RESET_TIMER) {
9418 nspace_items[i].flags &= ~NSPACE_ITEM_RESET_TIMER;
9419 continue;
9420 } else {
9421 error = ETIMEDOUT;
9422 }
9423 } else if (error == 0) {
9424 // hmmm, why did we get woken up?
9425 printf("woken up for token %d but it's not done, cancelled or timedout and error == 0.\n",
9426 nspace_items[i].token);
39037602 9427 }
6d2010ae
A
9428
9429 if (--nspace_items[i].refcount == 0) {
9430 nspace_items[i].vp = NULL; // clear this so that no one will match on it again
9431 nspace_items[i].arg = NULL;
9432 nspace_items[i].token = 0; // clear this so that the handler will not find it anymore
9433 nspace_items[i].flags = 0; // this clears it for re-use
9434 }
9435 wakeup(&nspace_token_id);
9436 keep_waiting = 0;
9437 }
9438
9439 lck_mtx_unlock(&nspace_handler_lock);
9440
9441 return error;
9442}
9443
39037602 9444int nspace_snapshot_event(vnode_t vp, time_t ctime, uint64_t op_type, void *arg)
6d2010ae 9445{
39037602 9446 int snapshot_error = 0;
6d2010ae 9447
39037602
A
9448 if (vp == NULL) {
9449 return 0;
9450 }
9451
9452 /* Swap files are special; skip them */
9453 if (vnode_isswap(vp)) {
9454 return 0;
9455 }
9456
9457 if (ctime != 0 && snapshot_timestamp != 0 && (ctime <= snapshot_timestamp || vnode_needssnapshots(vp))) {
9458 // the change time is within this epoch
9459 int error;
9460
9461 error = resolve_nspace_item_ext(vp, op_type | NAMESPACE_HANDLER_SNAPSHOT_EVENT, arg);
9462 if (error == EDEADLK) {
9463 snapshot_error = 0;
9464 } else if (error) {
9465 if (error == EAGAIN) {
9466 printf("nspace_snapshot_event: timed out waiting for namespace handler...\n");
9467 } else if (error == EINTR) {
9468 // printf("nspace_snapshot_event: got a signal while waiting for namespace handler...\n");
9469 snapshot_error = EINTR;
9470 }
9471 }
9472 }
9473
9474 return snapshot_error;
9475}
9476
9477int
9478get_nspace_item_status(struct vnode *vp, int32_t *status)
9479{
9480 int i;
9481
9482 lck_mtx_lock(&nspace_handler_lock);
9483 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9484 if (nspace_items[i].vp == vp) {
9485 break;
6d2010ae
A
9486 }
9487 }
9488
9489 if (i >= MAX_NSPACE_ITEMS) {
9490 lck_mtx_unlock(&nspace_handler_lock);
9491 return ENOENT;
9492 }
9493
9494 *status = nspace_items[i].flags;
9495 lck_mtx_unlock(&nspace_handler_lock);
9496 return 0;
9497}
39037602 9498
6d2010ae
A
9499
9500#if 0
9501static int
9502build_volfs_path(struct vnode *vp, char *path, int *len)
9503{
9504 struct vnode_attr va;
9505 int ret;
9506
9507 VATTR_INIT(&va);
9508 VATTR_WANTED(&va, va_fsid);
9509 VATTR_WANTED(&va, va_fileid);
9510
9511 if (vnode_getattr(vp, &va, vfs_context_kernel()) != 0) {
9512 *len = snprintf(path, *len, "/non/existent/path/because/vnode_getattr/failed") + 1;
9513 ret = -1;
9514 } else {
9515 *len = snprintf(path, *len, "/.vol/%d/%lld", (dev_t)va.va_fsid, va.va_fileid) + 1;
9516 ret = 0;
9517 }
9518
9519 return ret;
9520}
9521#endif
9522
9523//
9524// Note: this function does NOT check permissions on all of the
9525// parent directories leading to this vnode. It should only be
9526// called on behalf of a root process. Otherwise a process may
9527// get access to a file because the file itself is readable even
9528// though its parent directories would prevent access.
9529//
9530static int
9531vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx)
9532{
9533 int error, action;
9534
9535 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
9536 return error;
9537 }
9538
9539#if CONFIG_MACF
9540 error = mac_vnode_check_open(ctx, vp, fmode);
9541 if (error)
9542 return error;
9543#endif
1c79356b 9544
6d2010ae
A
9545 /* compute action to be authorized */
9546 action = 0;
9547 if (fmode & FREAD) {
9548 action |= KAUTH_VNODE_READ_DATA;
9549 }
9550 if (fmode & (FWRITE | O_TRUNC)) {
9551 /*
9552 * If we are writing, appending, and not truncating,
9553 * indicate that we are appending so that if the
9554 * UF_APPEND or SF_APPEND bits are set, we do not deny
9555 * the open.
9556 */
9557 if ((fmode & O_APPEND) && !(fmode & O_TRUNC)) {
9558 action |= KAUTH_VNODE_APPEND_DATA;
9559 } else {
9560 action |= KAUTH_VNODE_WRITE_DATA;
9561 }
9562 }
1c79356b 9563
6d2010ae
A
9564 if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)
9565 return error;
39037602 9566
1c79356b 9567
6d2010ae
A
9568 //
9569 // if the vnode is tagged VOPENEVT and the current process
9570 // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY
9571 // flag to the open mode so that this open won't count against
9572 // the vnode when carbon delete() does a vnode_isinuse() to see
9573 // if a file is currently in use. this allows spotlight
9574 // importers to not interfere with carbon apps that depend on
9575 // the no-delete-if-busy semantics of carbon delete().
9576 //
9577 if ((vp->v_flag & VOPENEVT) && (current_proc()->p_flag & P_CHECKOPENEVT)) {
9578 fmode |= O_EVTONLY;
9579 }
1c79356b 9580
6d2010ae
A
9581 if ( (error = VNOP_OPEN(vp, fmode, ctx)) ) {
9582 return error;
9583 }
9584 if ( (error = vnode_ref_ext(vp, fmode, 0)) ) {
9585 VNOP_CLOSE(vp, fmode, ctx);
9586 return error;
9587 }
1c79356b 9588
39037602 9589 /* Call out to allow 3rd party notification of open.
6d2010ae
A
9590 * Ignore result of kauth_authorize_fileop call.
9591 */
4b17d6b6
A
9592#if CONFIG_MACF
9593 mac_vnode_notify_open(ctx, vp, fmode);
9594#endif
39037602 9595 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN,
6d2010ae 9596 (uintptr_t)vp, 0);
1c79356b 9597
1c79356b 9598
6d2010ae
A
9599 return 0;
9600}
1c79356b 9601
6d2010ae 9602static int
39236c6e 9603wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type)
6d2010ae 9604{
39037602
A
9605 int i;
9606 int error = 0;
9607 int unblock = 0;
6d2010ae 9608 task_t curtask;
39037602 9609
6d2010ae
A
9610 lck_mtx_lock(&nspace_handler_exclusion_lock);
9611 if (nspace_handlers[nspace_type].handler_busy) {
9612 lck_mtx_unlock(&nspace_handler_exclusion_lock);
9613 return EBUSY;
9614 }
39037602 9615
6d2010ae
A
9616 nspace_handlers[nspace_type].handler_busy = 1;
9617 lck_mtx_unlock(&nspace_handler_exclusion_lock);
39037602
A
9618
9619 /*
6d2010ae
A
9620 * Any process that gets here will be one of the namespace handlers.
9621 * As such, they should be prevented from acquiring DMG vnodes during vnode reclamation
9622 * as we can cause deadlocks to occur, because the namespace handler may prevent
39037602 9623 * VNOP_INACTIVE from proceeding. Mark the current task as a P_DEPENDENCY_CAPABLE
6d2010ae
A
9624 * process.
9625 */
9626 curtask = current_task();
39037602
A
9627 bsd_set_dependency_capable (curtask);
9628
6d2010ae
A
9629 lck_mtx_lock(&nspace_handler_lock);
9630 if (nspace_handlers[nspace_type].handler_proc == NULL) {
9631 nspace_handlers[nspace_type].handler_tid = thread_tid(current_thread());
9632 nspace_handlers[nspace_type].handler_proc = current_proc();
9633 }
39037602
A
9634
9635 if (nspace_type == NSPACE_HANDLER_SNAPSHOT &&
9636 (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
9637 error = EINVAL;
9638 }
9639
6d2010ae 9640 while (error == 0) {
39037602
A
9641
9642 /* Try to find matching namespace item */
9643 for (i = 0; i < MAX_NSPACE_ITEMS; i++) {
6d2010ae 9644 if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
39037602
A
9645 if (nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
9646 break;
6d2010ae 9647 }
6d2010ae
A
9648 }
9649 }
39236c6e 9650
39037602
A
9651 if (i >= MAX_NSPACE_ITEMS) {
9652 /* Nothing is there yet. Wait for wake up and retry */
6d2010ae
A
9653 error = msleep((caddr_t)&nspace_item_idx, &nspace_handler_lock, PVFS|PCATCH, "namespace-items", 0);
9654 if ((nspace_type == NSPACE_HANDLER_SNAPSHOT) && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
39037602 9655 /* Prevent infinite loop if snapshot handler exited */
6d2010ae
A
9656 error = EINVAL;
9657 break;
9658 }
39037602 9659 continue;
6d2010ae 9660 }
39037602
A
9661
9662 nspace_items[i].flags &= ~NSPACE_ITEM_NEW;
9663 nspace_items[i].flags |= NSPACE_ITEM_PROCESSING;
9664 nspace_items[i].token = ++nspace_token_id;
9665
9666 assert(nspace_items[i].vp);
9667 struct fileproc *fp;
9668 int32_t indx;
9669 int32_t fmode;
9670 struct proc *p = current_proc();
9671 vfs_context_t ctx = vfs_context_current();
9672 struct vnode_attr va;
9673 bool vn_get_succsessful = false;
9674 bool vn_open_successful = false;
9675 bool fp_alloc_successful = false;
9676
9677 /*
9678 * Use vnode pointer to acquire a file descriptor for
9679 * hand-off to userland
9680 */
9681 fmode = nspace_open_flags_for_type(nspace_type);
9682 error = vnode_getwithvid(nspace_items[i].vp, nspace_items[i].vid);
9683 if (error) goto cleanup;
9684 vn_get_succsessful = true;
9685
9686 error = vn_open_with_vp(nspace_items[i].vp, fmode, ctx);
9687 if (error) goto cleanup;
9688 vn_open_successful = true;
9689
9690 error = falloc(p, &fp, &indx, ctx);
9691 if (error) goto cleanup;
9692 fp_alloc_successful = true;
9693
9694 fp->f_fglob->fg_flag = fmode;
9695 fp->f_fglob->fg_ops = &vnops;
9696 fp->f_fglob->fg_data = (caddr_t)nspace_items[i].vp;
9697
9698 proc_fdlock(p);
9699 procfdtbl_releasefd(p, indx, NULL);
9700 fp_drop(p, indx, fp, 1);
9701 proc_fdunlock(p);
9702
9703 /*
9704 * All variants of the namespace handler struct support these three fields:
9705 * token, flags, and the FD pointer
9706 */
9707 error = copyout(&nspace_items[i].token, nhd->token, sizeof(uint32_t));
9708 if (error) goto cleanup;
9709 error = copyout(&nspace_items[i].op, nhd->flags, sizeof(uint64_t));
9710 if (error) goto cleanup;
9711 error = copyout(&indx, nhd->fdptr, sizeof(uint32_t));
9712 if (error) goto cleanup;
9713
9714 /*
9715 * Handle optional fields:
9716 * extended version support an info ptr (offset, length), and the
9717 *
9718 * namedata version supports a unique per-link object ID
9719 *
9720 */
9721 if (nhd->infoptr) {
9722 uio_t uio = (uio_t)nspace_items[i].arg;
9723 uint64_t u_offset, u_length;
9724
9725 if (uio) {
9726 u_offset = uio_offset(uio);
9727 u_length = uio_resid(uio);
9728 } else {
9729 u_offset = 0;
9730 u_length = 0;
9731 }
9732 error = copyout(&u_offset, nhd->infoptr, sizeof(uint64_t));
9733 if (error) goto cleanup;
9734 error = copyout(&u_length, nhd->infoptr + sizeof(uint64_t), sizeof(uint64_t));
9735 if (error) goto cleanup;
9736 }
9737
9738 if (nhd->objid) {
9739 VATTR_INIT(&va);
9740 VATTR_WANTED(&va, va_linkid);
9741 error = vnode_getattr(nspace_items[i].vp, &va, ctx);
9742 if (error) goto cleanup;
9743
9744 uint64_t linkid = 0;
9745 if (VATTR_IS_SUPPORTED (&va, va_linkid)) {
9746 linkid = (uint64_t)va.va_linkid;
9747 }
9748 error = copyout(&linkid, nhd->objid, sizeof(uint64_t));
9749 }
9750cleanup:
9751 if (error) {
9752 if (fp_alloc_successful) fp_free(p, indx, fp);
9753 if (vn_open_successful) vn_close(nspace_items[i].vp, fmode, ctx);
9754 unblock = 1;
9755 }
9756
9757 if (vn_get_succsessful) vnode_put(nspace_items[i].vp);
9758
9759 break;
6d2010ae 9760 }
39037602 9761
6d2010ae
A
9762 if (unblock) {
9763 if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
9764 vnode_lock_spin(nspace_items[i].vp);
9765 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
9766 vnode_unlock(nspace_items[i].vp);
9767 }
9768 nspace_items[i].vp = NULL;
9769 nspace_items[i].vid = 0;
9770 nspace_items[i].flags = NSPACE_ITEM_DONE;
9771 nspace_items[i].token = 0;
39037602 9772
6d2010ae
A
9773 wakeup((caddr_t)&(nspace_items[i].vp));
9774 }
39037602 9775
6d2010ae
A
9776 if (nspace_type == NSPACE_HANDLER_SNAPSHOT) {
9777 // just go through every snapshot event and unblock it immediately.
9778 if (error && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
39037602 9779 for(i = 0; i < MAX_NSPACE_ITEMS; i++) {
6d2010ae
A
9780 if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
9781 if (nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
9782 nspace_items[i].vp = NULL;
9783 nspace_items[i].vid = 0;
9784 nspace_items[i].flags = NSPACE_ITEM_DONE;
9785 nspace_items[i].token = 0;
39037602
A
9786
9787 wakeup((caddr_t)&(nspace_items[i].vp));
6d2010ae
A
9788 }
9789 }
9790 }
9791 }
9792 }
39037602 9793
6d2010ae 9794 lck_mtx_unlock(&nspace_handler_lock);
39037602 9795
6d2010ae
A
9796 lck_mtx_lock(&nspace_handler_exclusion_lock);
9797 nspace_handlers[nspace_type].handler_busy = 0;
9798 lck_mtx_unlock(&nspace_handler_exclusion_lock);
39037602 9799
6d2010ae
A
9800 return error;
9801}
1c79356b 9802
39236c6e
A
9803static inline int validate_namespace_args (int is64bit, int size) {
9804
9805 if (is64bit) {
9806 /* Must be one of these */
9807 if (size == sizeof(user64_namespace_handler_info)) {
9808 goto sizeok;
9809 }
9810 if (size == sizeof(user64_namespace_handler_info_ext)) {
9811 goto sizeok;
9812 }
9813 if (size == sizeof(user64_namespace_handler_data)) {
9814 goto sizeok;
9815 }
9816 return EINVAL;
9817 }
9818 else {
9819 /* 32 bit -- must be one of these */
9820 if (size == sizeof(user32_namespace_handler_info)) {
9821 goto sizeok;
9822 }
9823 if (size == sizeof(user32_namespace_handler_info_ext)) {
9824 goto sizeok;
9825 }
9826 if (size == sizeof(user32_namespace_handler_data)) {
9827 goto sizeok;
9828 }
9829 return EINVAL;
9830 }
9831
9832sizeok:
9833
9834 return 0;
9835
9836}
1c79356b 9837
6d2010ae
A
9838static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data)
9839{
9840 int error = 0;
39236c6e 9841 namespace_handler_data nhd;
39037602 9842
39236c6e
A
9843 bzero (&nhd, sizeof(namespace_handler_data));
9844
6d2010ae
A
9845 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
9846 return error;
9847 }
39037602 9848
39236c6e
A
9849 error = validate_namespace_args (is64bit, size);
9850 if (error) {
9851 return error;
6d2010ae 9852 }
39037602 9853
39236c6e
A
9854 /* Copy in the userland pointers into our kernel-only struct */
9855
6d2010ae 9856 if (is64bit) {
39236c6e
A
9857 /* 64 bit userland structures */
9858 nhd.token = (user_addr_t)((user64_namespace_handler_info *)data)->token;
9859 nhd.flags = (user_addr_t)((user64_namespace_handler_info *)data)->flags;
9860 nhd.fdptr = (user_addr_t)((user64_namespace_handler_info *)data)->fdptr;
9861
9862 /* If the size is greater than the standard info struct, add in extra fields */
9863 if (size > (sizeof(user64_namespace_handler_info))) {
9864 if (size >= (sizeof(user64_namespace_handler_info_ext))) {
9865 nhd.infoptr = (user_addr_t)((user64_namespace_handler_info_ext *)data)->infoptr;
9866 }
9867 if (size == (sizeof(user64_namespace_handler_data))) {
9868 nhd.objid = (user_addr_t)((user64_namespace_handler_data*)data)->objid;
9869 }
9870 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
6d2010ae 9871 }
39037602 9872 }
39236c6e
A
9873 else {
9874 /* 32 bit userland structures */
9875 nhd.token = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->token);
9876 nhd.flags = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->flags);
9877 nhd.fdptr = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->fdptr);
39037602 9878
39236c6e
A
9879 if (size > (sizeof(user32_namespace_handler_info))) {
9880 if (size >= (sizeof(user32_namespace_handler_info_ext))) {
9881 nhd.infoptr = CAST_USER_ADDR_T(((user32_namespace_handler_info_ext *)data)->infoptr);
9882 }
9883 if (size == (sizeof(user32_namespace_handler_data))) {
9884 nhd.objid = (user_addr_t)((user32_namespace_handler_data*)data)->objid;
9885 }
9886 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
6d2010ae
A
9887 }
9888 }
39037602 9889
39236c6e 9890 return wait_for_namespace_event(&nhd, nspace_type);
6d2010ae 9891}
1c79356b
A
9892
9893/*
9894 * Make a filesystem-specific control call:
9895 */
1c79356b 9896/* ARGSUSED */
b0d623f7
A
9897static int
9898fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long options, vfs_context_t ctx)
1c79356b 9899{
b0d623f7 9900 int error=0;
91447636 9901 boolean_t is64bit;
2d21ac55 9902 u_int size;
1c79356b 9903#define STK_PARAMS 128
39037602 9904 char stkbuf[STK_PARAMS] = {0};
1c79356b 9905 caddr_t data, memp;
b0d623f7 9906 vnode_t vp = *arg_vp;
1c79356b
A
9907
9908 size = IOCPARM_LEN(cmd);
9909 if (size > IOCPARM_MAX) return (EINVAL);
9910
6d2010ae 9911 is64bit = proc_is64bit(p);
91447636 9912
1c79356b 9913 memp = NULL;
04b8595b 9914
3e170ce0 9915
04b8595b
A
9916 /*
9917 * ensure the buffer is large enough for underlying calls
9918 */
9919#ifndef HFSIOC_GETPATH
3e170ce0 9920 typedef char pn_t[MAXPATHLEN];
04b8595b
A
9921#define HFSIOC_GETPATH _IOWR('h', 13, pn_t)
9922#endif
9923
9924#ifndef HFS_GETPATH
9925#define HFS_GETPATH IOCBASECMD(HFSIOC_GETPATH)
9926#endif
9927 if (IOCBASECMD(cmd) == HFS_GETPATH) {
9928 /* Round up to MAXPATHLEN regardless of user input */
9929 size = MAXPATHLEN;
9930 }
00867663
A
9931 else if (vp->v_tag == VT_CIFS) {
9932 /*
9933 * XXX Until fsctl's length encoding can be
9934 * XXX fixed properly.
9935 */
9936 if (IOCBASECMD(cmd) == _IOWR('z', 19, 0) && size < 1432) {
9937 size = 1432; /* sizeof(struct UniqueSMBShareID) */
9938 } else if (IOCBASECMD(cmd) == _IOWR('z', 28, 0) && size < 308) {
9939 size = 308; /* sizeof(struct smbDebugTestPB) */
9940 }
9941 }
04b8595b 9942
1c79356b
A
9943 if (size > sizeof (stkbuf)) {
9944 if ((memp = (caddr_t)kalloc(size)) == 0) return ENOMEM;
9945 data = memp;
9946 } else {
91447636 9947 data = &stkbuf[0];
1c79356b 9948 };
39037602 9949
1c79356b
A
9950 if (cmd & IOC_IN) {
9951 if (size) {
b0d623f7 9952 error = copyin(udata, data, size);
39037602 9953 if (error) {
fe8ab488 9954 if (memp) {
39037602 9955 kfree (memp, size);
fe8ab488
A
9956 }
9957 return error;
9958 }
1c79356b 9959 } else {
6d2010ae
A
9960 if (is64bit) {
9961 *(user_addr_t *)data = udata;
9962 }
9963 else {
9964 *(uint32_t *)data = (uint32_t)udata;
9965 }
1c79356b
A
9966 };
9967 } else if ((cmd & IOC_OUT) && size) {
9968 /*
9969 * Zero the buffer so the user always
9970 * gets back something deterministic.
9971 */
9972 bzero(data, size);
91447636 9973 } else if (cmd & IOC_VOID) {
b0d623f7 9974 if (is64bit) {
6d2010ae 9975 *(user_addr_t *)data = udata;
b0d623f7
A
9976 }
9977 else {
6d2010ae 9978 *(uint32_t *)data = (uint32_t)udata;
b0d623f7 9979 }
91447636 9980 }
1c79356b 9981
b0d623f7 9982 /* Check to see if it's a generic command */
fe8ab488 9983 switch (IOCBASECMD(cmd)) {
91447636 9984
fe8ab488
A
9985 case FSCTL_SYNC_VOLUME: {
9986 mount_t mp = vp->v_mount;
9987 int arg = *(uint32_t*)data;
b0d623f7 9988
fe8ab488
A
9989 /* record vid of vp so we can drop it below. */
9990 uint32_t vvid = vp->v_id;
b0d623f7 9991
fe8ab488
A
9992 /*
9993 * Then grab mount_iterref so that we can release the vnode.
9994 * Without this, a thread may call vnode_iterate_prepare then
9995 * get into a deadlock because we've never released the root vp
9996 */
9997 error = mount_iterref (mp, 0);
9998 if (error) {
9999 break;
10000 }
10001 vnode_put(vp);
10002
10003 /* issue the sync for this volume */
10004 (void)sync_callback(mp, (arg & FSCTL_SYNC_WAIT) ? &arg : NULL);
10005
39037602 10006 /*
fe8ab488
A
10007 * Then release the mount_iterref once we're done syncing; it's not
10008 * needed for the VNOP_IOCTL below
10009 */
10010 mount_iterdrop(mp);
10011
10012 if (arg & FSCTL_SYNC_FULLSYNC) {
10013 /* re-obtain vnode iocount on the root vp, if possible */
10014 error = vnode_getwithvid (vp, vvid);
10015 if (error == 0) {
10016 error = VNOP_IOCTL(vp, F_FULLFSYNC, (caddr_t)NULL, 0, ctx);
10017 vnode_put (vp);
10018 }
b0d623f7 10019 }
fe8ab488
A
10020 /* mark the argument VP as having been released */
10021 *arg_vp = NULL;
b0d623f7 10022 }
fe8ab488 10023 break;
b0d623f7 10024
490019cf
A
10025 case FSCTL_ROUTEFS_SETROUTEID: {
10026#if ROUTEFS
10027 char routepath[MAXPATHLEN];
10028 size_t len = 0;
39037602 10029
490019cf
A
10030 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
10031 break;
10032 }
10033 bzero(routepath, MAXPATHLEN);
10034 error = copyinstr(udata, &routepath[0], MAXPATHLEN, &len);
10035 if (error) {
10036 break;
10037 }
10038 error = routefs_kernel_mount(routepath);
10039 if (error) {
10040 break;
10041 }
10042#endif
10043 }
10044 break;
10045
fe8ab488
A
10046 case FSCTL_SET_PACKAGE_EXTS: {
10047 user_addr_t ext_strings;
10048 uint32_t num_entries;
10049 uint32_t max_width;
b0d623f7 10050
39037602
A
10051 if ((error = priv_check_cred(kauth_cred_get(), PRIV_PACKAGE_EXTENSIONS, 0)))
10052 break;
10053
fe8ab488
A
10054 if ( (is64bit && size != sizeof(user64_package_ext_info))
10055 || (is64bit == 0 && size != sizeof(user32_package_ext_info))) {
10056
10057 // either you're 64-bit and passed a 64-bit struct or
10058 // you're 32-bit and passed a 32-bit struct. otherwise
10059 // it's not ok.
10060 error = EINVAL;
10061 break;
10062 }
10063
10064 if (is64bit) {
10065 ext_strings = ((user64_package_ext_info *)data)->strings;
10066 num_entries = ((user64_package_ext_info *)data)->num_entries;
10067 max_width = ((user64_package_ext_info *)data)->max_width;
10068 } else {
10069 ext_strings = CAST_USER_ADDR_T(((user32_package_ext_info *)data)->strings);
10070 num_entries = ((user32_package_ext_info *)data)->num_entries;
10071 max_width = ((user32_package_ext_info *)data)->max_width;
10072 }
10073 error = set_package_extensions_table(ext_strings, num_entries, max_width);
6d2010ae 10074 }
fe8ab488 10075 break;
2d21ac55 10076
39037602 10077 /* namespace handlers */
fe8ab488
A
10078 case FSCTL_NAMESPACE_HANDLER_GET: {
10079 error = process_namespace_fsctl(NSPACE_HANDLER_NSPACE, is64bit, size, data);
6d2010ae 10080 }
fe8ab488 10081 break;
b0d623f7 10082
fe8ab488
A
10083 /* Snapshot handlers */
10084 case FSCTL_OLD_SNAPSHOT_HANDLER_GET: {
10085 error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
39037602 10086 }
fe8ab488 10087 break;
39236c6e 10088
fe8ab488
A
10089 case FSCTL_SNAPSHOT_HANDLER_GET_EXT: {
10090 error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
10091 }
39037602 10092 break;
39236c6e 10093
fe8ab488
A
10094 case FSCTL_NAMESPACE_HANDLER_UPDATE: {
10095 uint32_t token, val;
10096 int i;
39236c6e 10097
fe8ab488
A
10098 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
10099 break;
10100 }
39236c6e 10101
fe8ab488
A
10102 if (!nspace_is_special_process(p)) {
10103 error = EINVAL;
10104 break;
10105 }
6d2010ae 10106
fe8ab488
A
10107 token = ((uint32_t *)data)[0];
10108 val = ((uint32_t *)data)[1];
6d2010ae 10109
fe8ab488 10110 lck_mtx_lock(&nspace_handler_lock);
6d2010ae 10111
fe8ab488
A
10112 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
10113 if (nspace_items[i].token == token) {
10114 break; /* exit for loop, not case stmt */
10115 }
10116 }
6d2010ae 10117
fe8ab488
A
10118 if (i >= MAX_NSPACE_ITEMS) {
10119 error = ENOENT;
10120 } else {
10121 //
10122 // if this bit is set, when resolve_nspace_item() times out
10123 // it will loop and go back to sleep.
10124 //
10125 nspace_items[i].flags |= NSPACE_ITEM_RESET_TIMER;
10126 }
6d2010ae 10127
fe8ab488
A
10128 lck_mtx_unlock(&nspace_handler_lock);
10129
10130 if (error) {
10131 printf("nspace-handler-update: did not find token %u\n", token);
10132 }
39037602 10133 }
fe8ab488 10134 break;
39037602
A
10135
10136 case FSCTL_NAMESPACE_HANDLER_UNBLOCK: {
fe8ab488
A
10137 uint32_t token, val;
10138 int i;
10139
10140 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
6d2010ae
A
10141 break;
10142 }
6d2010ae 10143
fe8ab488
A
10144 if (!nspace_is_special_process(p)) {
10145 error = EINVAL;
10146 break;
10147 }
6d2010ae 10148
fe8ab488
A
10149 token = ((uint32_t *)data)[0];
10150 val = ((uint32_t *)data)[1];
6d2010ae 10151
fe8ab488 10152 lck_mtx_lock(&nspace_handler_lock);
6d2010ae 10153
fe8ab488
A
10154 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
10155 if (nspace_items[i].token == token) {
10156 break; /* exit for loop, not case statement */
10157 }
10158 }
6d2010ae 10159
fe8ab488
A
10160 if (i >= MAX_NSPACE_ITEMS) {
10161 printf("nspace-handler-unblock: did not find token %u\n", token);
10162 error = ENOENT;
10163 } else {
10164 if (val == 0 && nspace_items[i].vp) {
10165 vnode_lock_spin(nspace_items[i].vp);
10166 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
10167 vnode_unlock(nspace_items[i].vp);
10168 }
6d2010ae 10169
fe8ab488
A
10170 nspace_items[i].vp = NULL;
10171 nspace_items[i].arg = NULL;
10172 nspace_items[i].op = 0;
10173 nspace_items[i].vid = 0;
10174 nspace_items[i].flags = NSPACE_ITEM_DONE;
10175 nspace_items[i].token = 0;
6d2010ae 10176
fe8ab488
A
10177 wakeup((caddr_t)&(nspace_items[i].vp));
10178 }
10179
10180 lck_mtx_unlock(&nspace_handler_lock);
39037602 10181 }
fe8ab488 10182 break;
6d2010ae 10183
fe8ab488
A
10184 case FSCTL_NAMESPACE_HANDLER_CANCEL: {
10185 uint32_t token, val;
10186 int i;
6d2010ae 10187
fe8ab488 10188 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
6d2010ae
A
10189 break;
10190 }
6d2010ae 10191
fe8ab488
A
10192 if (!nspace_is_special_process(p)) {
10193 error = EINVAL;
10194 break;
6d2010ae
A
10195 }
10196
fe8ab488
A
10197 token = ((uint32_t *)data)[0];
10198 val = ((uint32_t *)data)[1];
6d2010ae 10199
fe8ab488 10200 lck_mtx_lock(&nspace_handler_lock);
6d2010ae 10201
fe8ab488
A
10202 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
10203 if (nspace_items[i].token == token) {
10204 break; /* exit for loop, not case stmt */
10205 }
10206 }
6d2010ae 10207
fe8ab488
A
10208 if (i >= MAX_NSPACE_ITEMS) {
10209 printf("nspace-handler-cancel: did not find token %u\n", token);
10210 error = ENOENT;
10211 } else {
10212 if (nspace_items[i].vp) {
10213 vnode_lock_spin(nspace_items[i].vp);
10214 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
10215 vnode_unlock(nspace_items[i].vp);
10216 }
6d2010ae 10217
39037602
A
10218 nspace_items[i].vp = NULL;
10219 nspace_items[i].arg = NULL;
fe8ab488
A
10220 nspace_items[i].vid = 0;
10221 nspace_items[i].token = val;
10222 nspace_items[i].flags &= ~NSPACE_ITEM_PROCESSING;
39037602 10223 nspace_items[i].flags |= NSPACE_ITEM_CANCELLED;
6d2010ae 10224
fe8ab488
A
10225 wakeup((caddr_t)&(nspace_items[i].vp));
10226 }
6d2010ae 10227
fe8ab488 10228 lck_mtx_unlock(&nspace_handler_lock);
39037602 10229 }
fe8ab488 10230 break;
6d2010ae 10231
fe8ab488
A
10232 case FSCTL_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME: {
10233 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
6d2010ae
A
10234 break;
10235 }
6d2010ae 10236
fe8ab488 10237 // we explicitly do not do the namespace_handler_proc check here
6d2010ae 10238
fe8ab488
A
10239 lck_mtx_lock(&nspace_handler_lock);
10240 snapshot_timestamp = ((uint32_t *)data)[0];
10241 wakeup(&nspace_item_idx);
10242 lck_mtx_unlock(&nspace_handler_lock);
10243 printf("nspace-handler-set-snapshot-time: %d\n", (int)snapshot_timestamp);
6d2010ae 10244
39037602 10245 }
fe8ab488 10246 break;
6d2010ae 10247
fe8ab488
A
10248 case FSCTL_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS:
10249 {
10250 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
10251 break;
10252 }
6d2010ae 10253
fe8ab488
A
10254 lck_mtx_lock(&nspace_handler_lock);
10255 nspace_allow_virtual_devs = ((uint32_t *)data)[0];
10256 lck_mtx_unlock(&nspace_handler_lock);
10257 printf("nspace-snapshot-handler will%s allow events on disk-images\n",
10258 nspace_allow_virtual_devs ? "" : " NOT");
10259 error = 0;
6d2010ae 10260
6d2010ae 10261 }
fe8ab488 10262 break;
6d2010ae 10263
39037602
A
10264 case FSCTL_SET_FSTYPENAME_OVERRIDE:
10265 {
fe8ab488
A
10266 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
10267 break;
10268 }
10269 if (vp->v_mount) {
10270 mount_lock(vp->v_mount);
10271 if (data[0] != 0) {
10272 strlcpy(&vp->v_mount->fstypename_override[0], data, MFSTYPENAMELEN);
10273 vp->v_mount->mnt_kern_flag |= MNTK_TYPENAME_OVERRIDE;
10274 if (vfs_isrdonly(vp->v_mount) && strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
10275 vp->v_mount->mnt_kern_flag |= MNTK_EXTENDED_SECURITY;
10276 vp->v_mount->mnt_kern_flag &= ~MNTK_AUTH_OPAQUE;
10277 }
10278 } else {
10279 if (strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
10280 vp->v_mount->mnt_kern_flag &= ~MNTK_EXTENDED_SECURITY;
10281 }
10282 vp->v_mount->mnt_kern_flag &= ~MNTK_TYPENAME_OVERRIDE;
10283 vp->v_mount->fstypename_override[0] = '\0';
6d2010ae 10284 }
fe8ab488 10285 mount_unlock(vp->v_mount);
6d2010ae 10286 }
6d2010ae 10287 }
fe8ab488 10288 break;
39037602 10289
fe8ab488
A
10290 default: {
10291 /* Invoke the filesystem-specific code */
10292 error = VNOP_IOCTL(vp, IOCBASECMD(cmd), data, options, ctx);
10293 }
10294
10295 } /* end switch stmt */
10296
1c79356b 10297 /*
fe8ab488 10298 * if no errors, copy any data to user. Size was
1c79356b
A
10299 * already set and checked above.
10300 */
39037602 10301 if (error == 0 && (cmd & IOC_OUT) && size)
b0d623f7 10302 error = copyout(data, udata, size);
39037602 10303
fe8ab488
A
10304 if (memp) {
10305 kfree(memp, size);
10306 }
39037602 10307
1c79356b
A
10308 return error;
10309}
b0d623f7
A
10310
10311/* ARGSUSED */
10312int
10313fsctl (proc_t p, struct fsctl_args *uap, __unused int32_t *retval)
10314{
10315 int error;
39037602 10316 struct nameidata nd;
b0d623f7
A
10317 u_long nameiflags;
10318 vnode_t vp = NULL;
10319 vfs_context_t ctx = vfs_context_current();
10320
10321 AUDIT_ARG(cmd, uap->cmd);
10322 AUDIT_ARG(value32, uap->options);
10323 /* Get the vnode for the file we are getting info on: */
10324 nameiflags = 0;
10325 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
6d2010ae
A
10326 NDINIT(&nd, LOOKUP, OP_FSCTL, nameiflags | AUDITVNPATH1,
10327 UIO_USERSPACE, uap->path, ctx);
b0d623f7
A
10328 if ((error = namei(&nd))) goto done;
10329 vp = nd.ni_vp;
10330 nameidone(&nd);
10331
10332#if CONFIG_MACF
10333 error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd);
10334 if (error) {
10335 goto done;
10336 }
10337#endif
10338
10339 error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
10340
10341done:
10342 if (vp)
10343 vnode_put(vp);
10344 return error;
10345}
10346/* ARGSUSED */
10347int
10348ffsctl (proc_t p, struct ffsctl_args *uap, __unused int32_t *retval)
10349{
10350 int error;
10351 vnode_t vp = NULL;
10352 vfs_context_t ctx = vfs_context_current();
10353 int fd = -1;
10354
10355 AUDIT_ARG(fd, uap->fd);
10356 AUDIT_ARG(cmd, uap->cmd);
10357 AUDIT_ARG(value32, uap->options);
39037602 10358
b0d623f7
A
10359 /* Get the vnode for the file we are getting info on: */
10360 if ((error = file_vnode(uap->fd, &vp)))
3e170ce0 10361 return error;
b0d623f7
A
10362 fd = uap->fd;
10363 if ((error = vnode_getwithref(vp))) {
3e170ce0
A
10364 file_drop(fd);
10365 return error;
b0d623f7
A
10366 }
10367
10368#if CONFIG_MACF
3e170ce0
A
10369 if ((error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd))) {
10370 file_drop(fd);
10371 vnode_put(vp);
10372 return error;
b0d623f7
A
10373 }
10374#endif
10375
10376 error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
10377
3e170ce0 10378 file_drop(fd);
b0d623f7 10379
3e170ce0
A
10380 /*validate vp; fsctl_internal() can drop iocount and reset vp to NULL*/
10381 if (vp) {
b0d623f7 10382 vnode_put(vp);
3e170ce0
A
10383 }
10384
b0d623f7
A
10385 return error;
10386}
1c79356b 10387/* end of fsctl system call */
0b4e3aa0 10388
91447636
A
10389/*
10390 * Retrieve the data of an extended attribute.
10391 */
10392int
2d21ac55 10393getxattr(proc_t p, struct getxattr_args *uap, user_ssize_t *retval)
91447636 10394{
2d21ac55 10395 vnode_t vp;
91447636
A
10396 struct nameidata nd;
10397 char attrname[XATTR_MAXNAMELEN+1];
2d21ac55 10398 vfs_context_t ctx = vfs_context_current();
91447636
A
10399 uio_t auio = NULL;
10400 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10401 size_t attrsize = 0;
10402 size_t namelen;
b0d623f7 10403 u_int32_t nameiflags;
91447636
A
10404 int error;
10405 char uio_buf[ UIO_SIZEOF(1) ];
55e303ae 10406
2d21ac55 10407 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636 10408 return (EINVAL);
55e303ae 10409
91447636 10410 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
6d2010ae 10411 NDINIT(&nd, LOOKUP, OP_GETXATTR, nameiflags, spacetype, uap->path, ctx);
91447636
A
10412 if ((error = namei(&nd))) {
10413 return (error);
10414 }
10415 vp = nd.ni_vp;
10416 nameidone(&nd);
55e303ae 10417
91447636
A
10418 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
10419 goto out;
10420 }
10421 if (xattr_protected(attrname)) {
6d2010ae
A
10422 if (!vfs_context_issuser(ctx) || strcmp(attrname, "com.apple.system.Security") != 0) {
10423 error = EPERM;
10424 goto out;
10425 }
91447636 10426 }
b0d623f7
A
10427 /*
10428 * the specific check for 0xffffffff is a hack to preserve
10429 * binaray compatibilty in K64 with applications that discovered
39037602 10430 * that passing in a buf pointer and a size of -1 resulted in
b0d623f7
A
10431 * just the size of the indicated extended attribute being returned.
10432 * this isn't part of the documented behavior, but because of the
10433 * original implemtation's check for "uap->size > 0", this behavior
10434 * was allowed. In K32 that check turned into a signed comparison
10435 * even though uap->size is unsigned... in K64, we blow by that
10436 * check because uap->size is unsigned and doesn't get sign smeared
39037602 10437 * in the munger for a 32 bit user app. we also need to add a
b0d623f7
A
10438 * check to limit the maximum size of the buffer being passed in...
10439 * unfortunately, the underlying fileystems seem to just malloc
10440 * the requested size even if the actual extended attribute is tiny.
10441 * because that malloc is for kernel wired memory, we have to put a
10442 * sane limit on it.
10443 *
10444 * U32 running on K64 will yield 0x00000000ffffffff for uap->size
10445 * U64 running on K64 will yield -1 (64 bits wide)
10446 * U32/U64 running on K32 will yield -1 (32 bits wide)
10447 */
10448 if (uap->size == 0xffffffff || uap->size == (size_t)-1)
10449 goto no_uio;
10450
b0d623f7 10451 if (uap->value) {
6d2010ae
A
10452 if (uap->size > (size_t)XATTR_MAXSIZE)
10453 uap->size = XATTR_MAXSIZE;
39037602 10454
91447636
A
10455 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
10456 &uio_buf[0], sizeof(uio_buf));
10457 uio_addiov(auio, uap->value, uap->size);
10458 }
b0d623f7 10459no_uio:
2d21ac55 10460 error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, ctx);
91447636
A
10461out:
10462 vnode_put(vp);
55e303ae 10463
91447636
A
10464 if (auio) {
10465 *retval = uap->size - uio_resid(auio);
10466 } else {
10467 *retval = (user_ssize_t)attrsize;
55e303ae
A
10468 }
10469
91447636
A
10470 return (error);
10471}
55e303ae 10472
91447636
A
10473/*
10474 * Retrieve the data of an extended attribute.
10475 */
10476int
2d21ac55 10477fgetxattr(proc_t p, struct fgetxattr_args *uap, user_ssize_t *retval)
91447636 10478{
2d21ac55 10479 vnode_t vp;
91447636 10480 char attrname[XATTR_MAXNAMELEN+1];
91447636
A
10481 uio_t auio = NULL;
10482 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10483 size_t attrsize = 0;
10484 size_t namelen;
10485 int error;
10486 char uio_buf[ UIO_SIZEOF(1) ];
55e303ae 10487
2d21ac55 10488 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636 10489 return (EINVAL);
55e303ae 10490
91447636
A
10491 if ( (error = file_vnode(uap->fd, &vp)) ) {
10492 return (error);
10493 }
10494 if ( (error = vnode_getwithref(vp)) ) {
10495 file_drop(uap->fd);
10496 return(error);
10497 }
10498 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
10499 goto out;
10500 }
10501 if (xattr_protected(attrname)) {
10502 error = EPERM;
10503 goto out;
10504 }
10505 if (uap->value && uap->size > 0) {
10506 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
10507 &uio_buf[0], sizeof(uio_buf));
10508 uio_addiov(auio, uap->value, uap->size);
10509 }
55e303ae 10510
2d21ac55 10511 error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, vfs_context_current());
91447636
A
10512out:
10513 (void)vnode_put(vp);
10514 file_drop(uap->fd);
55e303ae 10515
91447636
A
10516 if (auio) {
10517 *retval = uap->size - uio_resid(auio);
10518 } else {
10519 *retval = (user_ssize_t)attrsize;
10520 }
10521 return (error);
10522}
55e303ae 10523
91447636
A
10524/*
10525 * Set the data of an extended attribute.
10526 */
55e303ae 10527int
2d21ac55 10528setxattr(proc_t p, struct setxattr_args *uap, int *retval)
55e303ae 10529{
2d21ac55 10530 vnode_t vp;
91447636
A
10531 struct nameidata nd;
10532 char attrname[XATTR_MAXNAMELEN+1];
2d21ac55 10533 vfs_context_t ctx = vfs_context_current();
91447636
A
10534 uio_t auio = NULL;
10535 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10536 size_t namelen;
b0d623f7 10537 u_int32_t nameiflags;
91447636
A
10538 int error;
10539 char uio_buf[ UIO_SIZEOF(1) ];
55e303ae 10540
2d21ac55 10541 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636 10542 return (EINVAL);
55e303ae 10543
91447636 10544 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
6d2010ae
A
10545 if (error == EPERM) {
10546 /* if the string won't fit in attrname, copyinstr emits EPERM */
10547 return (ENAMETOOLONG);
10548 }
10549 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
10550 return error;
91447636
A
10551 }
10552 if (xattr_protected(attrname))
10553 return(EPERM);
2d21ac55 10554 if (uap->size != 0 && uap->value == 0) {
91447636 10555 return (EINVAL);
55e303ae 10556 }
55e303ae 10557
91447636 10558 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
6d2010ae 10559 NDINIT(&nd, LOOKUP, OP_SETXATTR, nameiflags, spacetype, uap->path, ctx);
91447636
A
10560 if ((error = namei(&nd))) {
10561 return (error);
10562 }
10563 vp = nd.ni_vp;
10564 nameidone(&nd);
55e303ae 10565
91447636
A
10566 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
10567 &uio_buf[0], sizeof(uio_buf));
10568 uio_addiov(auio, uap->value, uap->size);
55e303ae 10569
2d21ac55
A
10570 error = vn_setxattr(vp, attrname, auio, uap->options, ctx);
10571#if CONFIG_FSE
10572 if (error == 0) {
10573 add_fsevent(FSE_XATTR_MODIFIED, ctx,
10574 FSE_ARG_VNODE, vp,
10575 FSE_ARG_DONE);
10576 }
10577#endif
91447636
A
10578 vnode_put(vp);
10579 *retval = 0;
10580 return (error);
10581}
55e303ae 10582
91447636
A
10583/*
10584 * Set the data of an extended attribute.
10585 */
10586int
2d21ac55 10587fsetxattr(proc_t p, struct fsetxattr_args *uap, int *retval)
91447636 10588{
2d21ac55 10589 vnode_t vp;
91447636 10590 char attrname[XATTR_MAXNAMELEN+1];
91447636
A
10591 uio_t auio = NULL;
10592 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10593 size_t namelen;
10594 int error;
10595 char uio_buf[ UIO_SIZEOF(1) ];
6d2010ae 10596#if CONFIG_FSE
2d21ac55 10597 vfs_context_t ctx = vfs_context_current();
6d2010ae 10598#endif
55e303ae 10599
2d21ac55 10600 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636 10601 return (EINVAL);
55e303ae 10602
91447636 10603 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
3e170ce0
A
10604 if (error == EPERM) {
10605 /* if the string won't fit in attrname, copyinstr emits EPERM */
10606 return (ENAMETOOLONG);
10607 }
10608 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
10609 return error;
55e303ae 10610 }
91447636
A
10611 if (xattr_protected(attrname))
10612 return(EPERM);
2d21ac55 10613 if (uap->size != 0 && uap->value == 0) {
91447636 10614 return (EINVAL);
55e303ae 10615 }
91447636
A
10616 if ( (error = file_vnode(uap->fd, &vp)) ) {
10617 return (error);
55e303ae 10618 }
91447636
A
10619 if ( (error = vnode_getwithref(vp)) ) {
10620 file_drop(uap->fd);
10621 return(error);
10622 }
10623 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
10624 &uio_buf[0], sizeof(uio_buf));
10625 uio_addiov(auio, uap->value, uap->size);
91447636 10626
2d21ac55
A
10627 error = vn_setxattr(vp, attrname, auio, uap->options, vfs_context_current());
10628#if CONFIG_FSE
10629 if (error == 0) {
10630 add_fsevent(FSE_XATTR_MODIFIED, ctx,
10631 FSE_ARG_VNODE, vp,
10632 FSE_ARG_DONE);
10633 }
10634#endif
91447636
A
10635 vnode_put(vp);
10636 file_drop(uap->fd);
10637 *retval = 0;
10638 return (error);
10639}
55e303ae 10640
91447636
A
10641/*
10642 * Remove an extended attribute.
b0d623f7 10643 * XXX Code duplication here.
91447636 10644 */
91447636 10645int
2d21ac55 10646removexattr(proc_t p, struct removexattr_args *uap, int *retval)
91447636 10647{
2d21ac55 10648 vnode_t vp;
91447636
A
10649 struct nameidata nd;
10650 char attrname[XATTR_MAXNAMELEN+1];
10651 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
2d21ac55 10652 vfs_context_t ctx = vfs_context_current();
91447636 10653 size_t namelen;
b0d623f7 10654 u_int32_t nameiflags;
91447636 10655 int error;
55e303ae 10656
2d21ac55 10657 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636 10658 return (EINVAL);
55e303ae 10659
91447636
A
10660 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
10661 if (error != 0) {
10662 return (error);
10663 }
10664 if (xattr_protected(attrname))
10665 return(EPERM);
10666 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
6d2010ae 10667 NDINIT(&nd, LOOKUP, OP_REMOVEXATTR, nameiflags, spacetype, uap->path, ctx);
91447636
A
10668 if ((error = namei(&nd))) {
10669 return (error);
10670 }
10671 vp = nd.ni_vp;
10672 nameidone(&nd);
55e303ae 10673
2d21ac55
A
10674 error = vn_removexattr(vp, attrname, uap->options, ctx);
10675#if CONFIG_FSE
10676 if (error == 0) {
10677 add_fsevent(FSE_XATTR_REMOVED, ctx,
10678 FSE_ARG_VNODE, vp,
10679 FSE_ARG_DONE);
10680 }
10681#endif
91447636
A
10682 vnode_put(vp);
10683 *retval = 0;
10684 return (error);
55e303ae
A
10685}
10686
91447636
A
10687/*
10688 * Remove an extended attribute.
b0d623f7 10689 * XXX Code duplication here.
91447636 10690 */
91447636 10691int
2d21ac55 10692fremovexattr(__unused proc_t p, struct fremovexattr_args *uap, int *retval)
55e303ae 10693{
2d21ac55 10694 vnode_t vp;
91447636 10695 char attrname[XATTR_MAXNAMELEN+1];
91447636
A
10696 size_t namelen;
10697 int error;
6d2010ae 10698#if CONFIG_FSE
2d21ac55 10699 vfs_context_t ctx = vfs_context_current();
6d2010ae 10700#endif
55e303ae 10701
2d21ac55 10702 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636
A
10703 return (EINVAL);
10704
10705 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
10706 if (error != 0) {
10707 return (error);
10708 }
10709 if (xattr_protected(attrname))
10710 return(EPERM);
10711 if ( (error = file_vnode(uap->fd, &vp)) ) {
10712 return (error);
10713 }
10714 if ( (error = vnode_getwithref(vp)) ) {
10715 file_drop(uap->fd);
10716 return(error);
10717 }
4a249263 10718
2d21ac55
A
10719 error = vn_removexattr(vp, attrname, uap->options, vfs_context_current());
10720#if CONFIG_FSE
10721 if (error == 0) {
10722 add_fsevent(FSE_XATTR_REMOVED, ctx,
10723 FSE_ARG_VNODE, vp,
10724 FSE_ARG_DONE);
10725 }
10726#endif
91447636
A
10727 vnode_put(vp);
10728 file_drop(uap->fd);
10729 *retval = 0;
10730 return (error);
55e303ae
A
10731}
10732
91447636
A
10733/*
10734 * Retrieve the list of extended attribute names.
b0d623f7 10735 * XXX Code duplication here.
91447636 10736 */
91447636 10737int
2d21ac55 10738listxattr(proc_t p, struct listxattr_args *uap, user_ssize_t *retval)
55e303ae 10739{
2d21ac55 10740 vnode_t vp;
91447636 10741 struct nameidata nd;
2d21ac55 10742 vfs_context_t ctx = vfs_context_current();
91447636
A
10743 uio_t auio = NULL;
10744 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10745 size_t attrsize = 0;
b0d623f7 10746 u_int32_t nameiflags;
91447636
A
10747 int error;
10748 char uio_buf[ UIO_SIZEOF(1) ];
4a249263 10749
2d21ac55 10750 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636 10751 return (EINVAL);
55e303ae 10752
fe8ab488 10753 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
6d2010ae 10754 NDINIT(&nd, LOOKUP, OP_LISTXATTR, nameiflags, spacetype, uap->path, ctx);
91447636
A
10755 if ((error = namei(&nd))) {
10756 return (error);
10757 }
10758 vp = nd.ni_vp;
10759 nameidone(&nd);
10760 if (uap->namebuf != 0 && uap->bufsize > 0) {
6d2010ae
A
10761 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
10762 &uio_buf[0], sizeof(uio_buf));
91447636
A
10763 uio_addiov(auio, uap->namebuf, uap->bufsize);
10764 }
55e303ae 10765
2d21ac55 10766 error = vn_listxattr(vp, auio, &attrsize, uap->options, ctx);
55e303ae 10767
91447636
A
10768 vnode_put(vp);
10769 if (auio) {
10770 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
10771 } else {
10772 *retval = (user_ssize_t)attrsize;
10773 }
10774 return (error);
55e303ae
A
10775}
10776
91447636
A
10777/*
10778 * Retrieve the list of extended attribute names.
b0d623f7 10779 * XXX Code duplication here.
91447636 10780 */
55e303ae 10781int
2d21ac55 10782flistxattr(proc_t p, struct flistxattr_args *uap, user_ssize_t *retval)
55e303ae 10783{
2d21ac55 10784 vnode_t vp;
91447636
A
10785 uio_t auio = NULL;
10786 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10787 size_t attrsize = 0;
10788 int error;
10789 char uio_buf[ UIO_SIZEOF(1) ];
10790
2d21ac55 10791 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636
A
10792 return (EINVAL);
10793
10794 if ( (error = file_vnode(uap->fd, &vp)) ) {
10795 return (error);
10796 }
10797 if ( (error = vnode_getwithref(vp)) ) {
10798 file_drop(uap->fd);
10799 return(error);
10800 }
10801 if (uap->namebuf != 0 && uap->bufsize > 0) {
39037602 10802 auio = uio_createwithbuffer(1, 0, spacetype,
91447636
A
10803 UIO_READ, &uio_buf[0], sizeof(uio_buf));
10804 uio_addiov(auio, uap->namebuf, uap->bufsize);
10805 }
91447636 10806
2d21ac55 10807 error = vn_listxattr(vp, auio, &attrsize, uap->options, vfs_context_current());
55e303ae 10808
91447636
A
10809 vnode_put(vp);
10810 file_drop(uap->fd);
10811 if (auio) {
10812 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
10813 } else {
10814 *retval = (user_ssize_t)attrsize;
10815 }
10816 return (error);
55e303ae 10817}
4a249263 10818
fe8ab488
A
10819static int fsgetpath_internal(
10820 vfs_context_t ctx, int volfs_id, uint64_t objid,
10821 vm_size_t bufsize, caddr_t buf, int *pathlen)
b0d623f7 10822{
fe8ab488 10823 int error;
b0d623f7 10824 struct mount *mp = NULL;
fe8ab488 10825 vnode_t vp;
b0d623f7 10826 int length;
fe8ab488 10827 int bpflags;
813fb2f6
A
10828 /* maximum number of times to retry build_path */
10829 unsigned int retries = 0x10;
b0d623f7 10830
fe8ab488 10831 if (bufsize > PAGE_SIZE) {
b0d623f7 10832 return (EINVAL);
fe8ab488
A
10833 }
10834
10835 if (buf == NULL) {
b0d623f7
A
10836 return (ENOMEM);
10837 }
fe8ab488 10838
813fb2f6 10839retry:
fe8ab488 10840 if ((mp = mount_lookupby_volfsid(volfs_id, 1)) == NULL) {
b0d623f7 10841 error = ENOTSUP; /* unexpected failure */
fe8ab488 10842 return ENOTSUP;
b0d623f7 10843 }
fe8ab488 10844
39236c6e 10845unionget:
fe8ab488 10846 if (objid == 2) {
b0d623f7
A
10847 error = VFS_ROOT(mp, &vp, ctx);
10848 } else {
fe8ab488 10849 error = VFS_VGET(mp, (ino64_t)objid, &vp, ctx);
b0d623f7 10850 }
39236c6e
A
10851
10852 if (error == ENOENT && (mp->mnt_flag & MNT_UNION)) {
10853 /*
10854 * If the fileid isn't found and we're in a union
10855 * mount volume, then see if the fileid is in the
10856 * mounted-on volume.
10857 */
10858 struct mount *tmp = mp;
10859 mp = vnode_mount(tmp->mnt_vnodecovered);
10860 vfs_unbusy(tmp);
10861 if (vfs_busy(mp, LK_NOWAIT) == 0)
10862 goto unionget;
fe8ab488 10863 } else {
39236c6e 10864 vfs_unbusy(mp);
fe8ab488 10865 }
39236c6e 10866
b0d623f7 10867 if (error) {
fe8ab488 10868 return error;
b0d623f7 10869 }
fe8ab488 10870
6d2010ae
A
10871#if CONFIG_MACF
10872 error = mac_vnode_check_fsgetpath(ctx, vp);
10873 if (error) {
10874 vnode_put(vp);
fe8ab488 10875 return error;
6d2010ae
A
10876 }
10877#endif
fe8ab488 10878
b0d623f7
A
10879 /* Obtain the absolute path to this vnode. */
10880 bpflags = vfs_context_suser(ctx) ? BUILDPATH_CHECKACCESS : 0;
316670eb 10881 bpflags |= BUILDPATH_CHECK_MOVED;
fe8ab488 10882 error = build_path(vp, buf, bufsize, &length, bpflags, ctx);
b0d623f7 10883 vnode_put(vp);
fe8ab488 10884
b0d623f7 10885 if (error) {
813fb2f6
A
10886 /* there was a race building the path, try a few more times */
10887 if (error == EAGAIN) {
10888 --retries;
10889 if (retries > 0)
10890 goto retry;
10891
10892 error = ENOENT;
10893 }
b0d623f7
A
10894 goto out;
10895 }
fe8ab488
A
10896
10897 AUDIT_ARG(text, buf);
39236c6e
A
10898
10899 if (kdebug_enable) {
10900 long dbg_parms[NUMPARMS];
10901 int dbg_namelen;
10902
10903 dbg_namelen = (int)sizeof(dbg_parms);
10904
fe8ab488
A
10905 if (length < dbg_namelen) {
10906 memcpy((char *)dbg_parms, buf, length);
39236c6e
A
10907 memset((char *)dbg_parms + length, 0, dbg_namelen - length);
10908
10909 dbg_namelen = length;
fe8ab488
A
10910 } else {
10911 memcpy((char *)dbg_parms, buf + (length - dbg_namelen), dbg_namelen);
10912 }
39236c6e
A
10913
10914 kdebug_lookup_gen_events(dbg_parms, dbg_namelen, (void *)vp, TRUE);
10915 }
fe8ab488
A
10916
10917 *pathlen = (user_ssize_t)length; /* may be superseded by error */
10918
10919out:
10920 return (error);
10921}
10922
10923/*
10924 * Obtain the full pathname of a file system object by id.
10925 *
10926 * This is a private SPI used by the File Manager.
10927 */
10928__private_extern__
10929int
10930fsgetpath(__unused proc_t p, struct fsgetpath_args *uap, user_ssize_t *retval)
10931{
10932 vfs_context_t ctx = vfs_context_current();
10933 fsid_t fsid;
10934 char *realpath;
10935 int length;
10936 int error;
10937
10938 if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
10939 return (error);
10940 }
10941 AUDIT_ARG(value32, fsid.val[0]);
10942 AUDIT_ARG(value64, uap->objid);
10943 /* Restrict output buffer size for now. */
39037602 10944
fe8ab488
A
10945 if (uap->bufsize > PAGE_SIZE) {
10946 return (EINVAL);
39037602 10947 }
fe8ab488
A
10948 MALLOC(realpath, char *, uap->bufsize, M_TEMP, M_WAITOK);
10949 if (realpath == NULL) {
10950 return (ENOMEM);
10951 }
10952
10953 error = fsgetpath_internal(
39037602 10954 ctx, fsid.val[0], uap->objid,
fe8ab488
A
10955 uap->bufsize, realpath, &length);
10956
10957 if (error) {
10958 goto out;
10959 }
39037602 10960
b0d623f7
A
10961 error = copyout((caddr_t)realpath, uap->buf, length);
10962
10963 *retval = (user_ssize_t)length; /* may be superseded by error */
10964out:
10965 if (realpath) {
10966 FREE(realpath, M_TEMP);
10967 }
10968 return (error);
10969}
10970
91447636
A
10971/*
10972 * Common routine to handle various flavors of statfs data heading out
10973 * to user space.
2d21ac55
A
10974 *
10975 * Returns: 0 Success
10976 * EFAULT
91447636
A
10977 */
10978static int
39037602
A
10979munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
10980 user_addr_t bufp, int *sizep, boolean_t is_64_bit,
91447636 10981 boolean_t partial_copy)
4a249263 10982{
91447636
A
10983 int error;
10984 int my_size, copy_size;
10985
10986 if (is_64_bit) {
b0d623f7 10987 struct user64_statfs sfs;
91447636
A
10988 my_size = copy_size = sizeof(sfs);
10989 bzero(&sfs, my_size);
10990 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
10991 sfs.f_type = mp->mnt_vtable->vfc_typenum;
10992 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
b0d623f7
A
10993 sfs.f_bsize = (user64_long_t)sfsp->f_bsize;
10994 sfs.f_iosize = (user64_long_t)sfsp->f_iosize;
10995 sfs.f_blocks = (user64_long_t)sfsp->f_blocks;
10996 sfs.f_bfree = (user64_long_t)sfsp->f_bfree;
10997 sfs.f_bavail = (user64_long_t)sfsp->f_bavail;
10998 sfs.f_files = (user64_long_t)sfsp->f_files;
10999 sfs.f_ffree = (user64_long_t)sfsp->f_ffree;
91447636
A
11000 sfs.f_fsid = sfsp->f_fsid;
11001 sfs.f_owner = sfsp->f_owner;
6d2010ae 11002 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
fe8ab488 11003 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
6d2010ae
A
11004 } else {
11005 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
11006 }
2d21ac55
A
11007 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
11008 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
91447636
A
11009
11010 if (partial_copy) {
11011 copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
11012 }
11013 error = copyout((caddr_t)&sfs, bufp, copy_size);
11014 }
11015 else {
b0d623f7
A
11016 struct user32_statfs sfs;
11017
91447636
A
11018 my_size = copy_size = sizeof(sfs);
11019 bzero(&sfs, my_size);
39037602 11020
91447636
A
11021 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
11022 sfs.f_type = mp->mnt_vtable->vfc_typenum;
11023 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
39037602 11024
91447636
A
11025 /*
11026 * It's possible for there to be more than 2^^31 blocks in the filesystem, so we
11027 * have to fudge the numbers here in that case. We inflate the blocksize in order
11028 * to reflect the filesystem size as best we can.
11029 */
39037602
A
11030 if ((sfsp->f_blocks > INT_MAX)
11031 /* Hack for 4061702 . I think the real fix is for Carbon to
91447636 11032 * look for some volume capability and not depend on hidden
39037602 11033 * semantics agreed between a FS and carbon.
91447636
A
11034 * f_blocks, f_bfree, and f_bavail set to -1 is the trigger
11035 * for Carbon to set bNoVolumeSizes volume attribute.
39037602 11036 * Without this the webdavfs files cannot be copied onto
91447636
A
11037 * disk as they look huge. This change should not affect
11038 * XSAN as they should not setting these to -1..
11039 */
2d21ac55
A
11040 && (sfsp->f_blocks != 0xffffffffffffffffULL)
11041 && (sfsp->f_bfree != 0xffffffffffffffffULL)
11042 && (sfsp->f_bavail != 0xffffffffffffffffULL)) {
91447636
A
11043 int shift;
11044
11045 /*
11046 * Work out how far we have to shift the block count down to make it fit.
11047 * Note that it's possible to have to shift so far that the resulting
11048 * blocksize would be unreportably large. At that point, we will clip
11049 * any values that don't fit.
11050 *
11051 * For safety's sake, we also ensure that f_iosize is never reported as
11052 * being smaller than f_bsize.
11053 */
11054 for (shift = 0; shift < 32; shift++) {
b0d623f7 11055 if ((sfsp->f_blocks >> shift) <= INT_MAX)
91447636 11056 break;
b0d623f7 11057 if ((sfsp->f_bsize << (shift + 1)) > INT_MAX)
91447636
A
11058 break;
11059 }
b0d623f7
A
11060#define __SHIFT_OR_CLIP(x, s) ((((x) >> (s)) > INT_MAX) ? INT_MAX : ((x) >> (s)))
11061 sfs.f_blocks = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_blocks, shift);
11062 sfs.f_bfree = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bfree, shift);
11063 sfs.f_bavail = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bavail, shift);
91447636 11064#undef __SHIFT_OR_CLIP
b0d623f7 11065 sfs.f_bsize = (user32_long_t)(sfsp->f_bsize << shift);
91447636
A
11066 sfs.f_iosize = lmax(sfsp->f_iosize, sfsp->f_bsize);
11067 } else {
11068 /* filesystem is small enough to be reported honestly */
b0d623f7
A
11069 sfs.f_bsize = (user32_long_t)sfsp->f_bsize;
11070 sfs.f_iosize = (user32_long_t)sfsp->f_iosize;
11071 sfs.f_blocks = (user32_long_t)sfsp->f_blocks;
11072 sfs.f_bfree = (user32_long_t)sfsp->f_bfree;
11073 sfs.f_bavail = (user32_long_t)sfsp->f_bavail;
91447636 11074 }
b0d623f7
A
11075 sfs.f_files = (user32_long_t)sfsp->f_files;
11076 sfs.f_ffree = (user32_long_t)sfsp->f_ffree;
91447636
A
11077 sfs.f_fsid = sfsp->f_fsid;
11078 sfs.f_owner = sfsp->f_owner;
6d2010ae 11079 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
fe8ab488 11080 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
6d2010ae
A
11081 } else {
11082 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
11083 }
2d21ac55
A
11084 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
11085 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
91447636
A
11086
11087 if (partial_copy) {
11088 copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
11089 }
11090 error = copyout((caddr_t)&sfs, bufp, copy_size);
11091 }
39037602 11092
91447636
A
11093 if (sizep != NULL) {
11094 *sizep = my_size;
11095 }
11096 return(error);
11097}
11098
11099/*
11100 * copy stat structure into user_stat structure.
11101 */
b0d623f7 11102void munge_user64_stat(struct stat *sbp, struct user64_stat *usbp)
91447636 11103{
b0d623f7
A
11104 bzero(usbp, sizeof(*usbp));
11105
11106 usbp->st_dev = sbp->st_dev;
11107 usbp->st_ino = sbp->st_ino;
11108 usbp->st_mode = sbp->st_mode;
11109 usbp->st_nlink = sbp->st_nlink;
11110 usbp->st_uid = sbp->st_uid;
11111 usbp->st_gid = sbp->st_gid;
11112 usbp->st_rdev = sbp->st_rdev;
11113#ifndef _POSIX_C_SOURCE
11114 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
11115 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
11116 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
11117 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
11118 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
11119 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
11120#else
11121 usbp->st_atime = sbp->st_atime;
11122 usbp->st_atimensec = sbp->st_atimensec;
11123 usbp->st_mtime = sbp->st_mtime;
11124 usbp->st_mtimensec = sbp->st_mtimensec;
11125 usbp->st_ctime = sbp->st_ctime;
11126 usbp->st_ctimensec = sbp->st_ctimensec;
11127#endif
11128 usbp->st_size = sbp->st_size;
11129 usbp->st_blocks = sbp->st_blocks;
11130 usbp->st_blksize = sbp->st_blksize;
11131 usbp->st_flags = sbp->st_flags;
11132 usbp->st_gen = sbp->st_gen;
11133 usbp->st_lspare = sbp->st_lspare;
11134 usbp->st_qspare[0] = sbp->st_qspare[0];
11135 usbp->st_qspare[1] = sbp->st_qspare[1];
11136}
11137
11138void munge_user32_stat(struct stat *sbp, struct user32_stat *usbp)
11139{
11140 bzero(usbp, sizeof(*usbp));
0c530ab8 11141
91447636
A
11142 usbp->st_dev = sbp->st_dev;
11143 usbp->st_ino = sbp->st_ino;
11144 usbp->st_mode = sbp->st_mode;
11145 usbp->st_nlink = sbp->st_nlink;
11146 usbp->st_uid = sbp->st_uid;
11147 usbp->st_gid = sbp->st_gid;
11148 usbp->st_rdev = sbp->st_rdev;
2d21ac55
A
11149#ifndef _POSIX_C_SOURCE
11150 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
11151 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
11152 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
11153 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
11154 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
11155 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
11156#else
11157 usbp->st_atime = sbp->st_atime;
11158 usbp->st_atimensec = sbp->st_atimensec;
11159 usbp->st_mtime = sbp->st_mtime;
11160 usbp->st_mtimensec = sbp->st_mtimensec;
11161 usbp->st_ctime = sbp->st_ctime;
11162 usbp->st_ctimensec = sbp->st_ctimensec;
11163#endif
11164 usbp->st_size = sbp->st_size;
11165 usbp->st_blocks = sbp->st_blocks;
11166 usbp->st_blksize = sbp->st_blksize;
11167 usbp->st_flags = sbp->st_flags;
11168 usbp->st_gen = sbp->st_gen;
11169 usbp->st_lspare = sbp->st_lspare;
11170 usbp->st_qspare[0] = sbp->st_qspare[0];
11171 usbp->st_qspare[1] = sbp->st_qspare[1];
11172}
11173
11174/*
11175 * copy stat64 structure into user_stat64 structure.
11176 */
b0d623f7
A
11177void munge_user64_stat64(struct stat64 *sbp, struct user64_stat64 *usbp)
11178{
11179 bzero(usbp, sizeof(*usbp));
11180
11181 usbp->st_dev = sbp->st_dev;
11182 usbp->st_ino = sbp->st_ino;
11183 usbp->st_mode = sbp->st_mode;
11184 usbp->st_nlink = sbp->st_nlink;
11185 usbp->st_uid = sbp->st_uid;
11186 usbp->st_gid = sbp->st_gid;
11187 usbp->st_rdev = sbp->st_rdev;
11188#ifndef _POSIX_C_SOURCE
11189 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
11190 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
11191 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
11192 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
11193 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
11194 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
11195 usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
11196 usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
11197#else
11198 usbp->st_atime = sbp->st_atime;
11199 usbp->st_atimensec = sbp->st_atimensec;
11200 usbp->st_mtime = sbp->st_mtime;
11201 usbp->st_mtimensec = sbp->st_mtimensec;
11202 usbp->st_ctime = sbp->st_ctime;
11203 usbp->st_ctimensec = sbp->st_ctimensec;
11204 usbp->st_birthtime = sbp->st_birthtime;
11205 usbp->st_birthtimensec = sbp->st_birthtimensec;
11206#endif
11207 usbp->st_size = sbp->st_size;
11208 usbp->st_blocks = sbp->st_blocks;
11209 usbp->st_blksize = sbp->st_blksize;
11210 usbp->st_flags = sbp->st_flags;
11211 usbp->st_gen = sbp->st_gen;
11212 usbp->st_lspare = sbp->st_lspare;
11213 usbp->st_qspare[0] = sbp->st_qspare[0];
11214 usbp->st_qspare[1] = sbp->st_qspare[1];
11215}
11216
11217void munge_user32_stat64(struct stat64 *sbp, struct user32_stat64 *usbp)
2d21ac55 11218{
b0d623f7 11219 bzero(usbp, sizeof(*usbp));
2d21ac55
A
11220
11221 usbp->st_dev = sbp->st_dev;
11222 usbp->st_ino = sbp->st_ino;
11223 usbp->st_mode = sbp->st_mode;
11224 usbp->st_nlink = sbp->st_nlink;
11225 usbp->st_uid = sbp->st_uid;
11226 usbp->st_gid = sbp->st_gid;
11227 usbp->st_rdev = sbp->st_rdev;
11228#ifndef _POSIX_C_SOURCE
91447636
A
11229 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
11230 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
11231 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
11232 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
11233 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
11234 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
2d21ac55
A
11235 usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
11236 usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
91447636
A
11237#else
11238 usbp->st_atime = sbp->st_atime;
11239 usbp->st_atimensec = sbp->st_atimensec;
11240 usbp->st_mtime = sbp->st_mtime;
11241 usbp->st_mtimensec = sbp->st_mtimensec;
11242 usbp->st_ctime = sbp->st_ctime;
11243 usbp->st_ctimensec = sbp->st_ctimensec;
2d21ac55
A
11244 usbp->st_birthtime = sbp->st_birthtime;
11245 usbp->st_birthtimensec = sbp->st_birthtimensec;
91447636
A
11246#endif
11247 usbp->st_size = sbp->st_size;
11248 usbp->st_blocks = sbp->st_blocks;
11249 usbp->st_blksize = sbp->st_blksize;
11250 usbp->st_flags = sbp->st_flags;
11251 usbp->st_gen = sbp->st_gen;
11252 usbp->st_lspare = sbp->st_lspare;
11253 usbp->st_qspare[0] = sbp->st_qspare[0];
11254 usbp->st_qspare[1] = sbp->st_qspare[1];
4a249263 11255}
39236c6e
A
11256
11257/*
11258 * Purge buffer cache for simulating cold starts
11259 */
11260static int vnode_purge_callback(struct vnode *vp, __unused void *cargs)
11261{
11262 ubc_msync(vp, (off_t)0, ubc_getsize(vp), NULL /* off_t *resid_off */, UBC_PUSHALL | UBC_INVALIDATE);
11263
11264 return VNODE_RETURNED;
11265}
11266
11267static int vfs_purge_callback(mount_t mp, __unused void * arg)
11268{
11269 vnode_iterate(mp, VNODE_WAIT | VNODE_ITERATE_ALL, vnode_purge_callback, NULL);
11270
11271 return VFS_RETURNED;
11272}
11273
11274int
11275vfs_purge(__unused struct proc *p, __unused struct vfs_purge_args *uap, __unused int32_t *retval)
11276{
11277 if (!kauth_cred_issuser(kauth_cred_get()))
11278 return EPERM;
11279
11280 vfs_iterate(0/* flags */, vfs_purge_callback, NULL);
11281
11282 return 0;
11283}
11284
39037602
A
11285/*
11286 * gets the vnode associated with the (unnamed) snapshot directory
11287 * for a Filesystem. The snapshot directory vnode is returned with
11288 * an iocount on it.
11289 */
11290int
11291vnode_get_snapdir(vnode_t rvp, vnode_t *sdvpp, vfs_context_t ctx)
11292{
813fb2f6 11293 return (VFS_VGET_SNAPDIR(vnode_mount(rvp), sdvpp, ctx));
39037602
A
11294}
11295
11296/*
11297 * Get the snapshot vnode.
11298 *
11299 * If successful, the call returns with an iocount on *rvpp ,*sdvpp and
11300 * needs nameidone() on ndp.
11301 *
11302 * If the snapshot vnode exists it is returned in ndp->ni_vp.
11303 *
11304 * If it returns with an error, *rvpp, *sdvpp are NULL and nameidone() is
11305 * not needed.
11306 */
11307static int
11308vnode_get_snapshot(int dirfd, vnode_t *rvpp, vnode_t *sdvpp,
11309 user_addr_t name, struct nameidata *ndp, int32_t op,
11310#if !CONFIG_TRIGGERS
11311 __unused
11312#endif
11313 enum path_operation pathop,
11314 vfs_context_t ctx)
11315{
11316 int error, i;
11317 caddr_t name_buf;
11318 size_t name_len;
11319 struct vfs_attr vfa;
11320
11321 *sdvpp = NULLVP;
11322 *rvpp = NULLVP;
11323
11324 error = vnode_getfromfd(ctx, dirfd, rvpp);
11325 if (error)
11326 return (error);
11327
11328 if (!vnode_isvroot(*rvpp)) {
11329 error = EINVAL;
11330 goto out;
11331 }
11332
11333 /* Make sure the filesystem supports snapshots */
11334 VFSATTR_INIT(&vfa);
11335 VFSATTR_WANTED(&vfa, f_capabilities);
11336 if ((vfs_getattr(vnode_mount(*rvpp), &vfa, ctx) != 0) ||
11337 !VFSATTR_IS_SUPPORTED(&vfa, f_capabilities) ||
11338 !((vfa.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] &
11339 VOL_CAP_INT_SNAPSHOT)) ||
11340 !((vfa.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] &
11341 VOL_CAP_INT_SNAPSHOT))) {
11342 error = ENOTSUP;
11343 goto out;
11344 }
11345
11346 error = vnode_get_snapdir(*rvpp, sdvpp, ctx);
11347 if (error)
11348 goto out;
11349
11350 MALLOC(name_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
11351 error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
11352 if (error)
11353 goto out1;
11354
11355 /*
11356 * Some sanity checks- name can't be empty, "." or ".." or have slashes.
11357 * (the length returned by copyinstr includes the terminating NUL)
11358 */
11359 if ((name_len == 1) || (name_len == 2 && name_buf[0] == '.') ||
11360 (name_len == 3 && name_buf[0] == '.' && name_buf[1] == '.')) {
11361 error = EINVAL;
11362 goto out1;
11363 }
11364 for (i = 0; i < (int)name_len && name_buf[i] != '/'; i++);
11365 if (i < (int)name_len) {
11366 error = EINVAL;
11367 goto out1;
11368 }
11369
11370#if CONFIG_MACF
11371 if (op == CREATE) {
11372 error = mac_mount_check_snapshot_create(ctx, vnode_mount(*rvpp),
11373 name_buf);
11374 } else if (op == DELETE) {
11375 error = mac_mount_check_snapshot_delete(ctx, vnode_mount(*rvpp),
11376 name_buf);
11377 }
11378 if (error)
11379 goto out1;
11380#endif
11381
11382 /* Check if the snapshot already exists ... */
11383 NDINIT(ndp, op, pathop, USEDVP | NOCACHE | AUDITVNPATH1,
11384 UIO_SYSSPACE, CAST_USER_ADDR_T(name_buf), ctx);
11385 ndp->ni_dvp = *sdvpp;
11386
11387 error = namei(ndp);
11388out1:
11389 FREE(name_buf, M_TEMP);
11390out:
11391 if (error) {
11392 if (*sdvpp) {
11393 vnode_put(*sdvpp);
11394 *sdvpp = NULLVP;
11395 }
11396 if (*rvpp) {
11397 vnode_put(*rvpp);
11398 *rvpp = NULLVP;
11399 }
11400 }
11401 return (error);
11402}
11403
11404/*
11405 * create a filesystem snapshot (for supporting filesystems)
11406 *
11407 * A much simplified version of openat(dirfd, name, O_CREAT | O_EXCL)
11408 * We get to the (unnamed) snapshot directory vnode and create the vnode
11409 * for the snapshot in it.
11410 *
11411 * Restrictions:
11412 *
11413 * a) Passed in name for snapshot cannot have slashes.
11414 * b) name can't be "." or ".."
11415 *
11416 * Since this requires superuser privileges, vnode_authorize calls are not
11417 * made.
11418 */
11419static int
11420snapshot_create(int dirfd, user_addr_t name, __unused uint32_t flags,
11421 vfs_context_t ctx)
11422{
11423 vnode_t rvp, snapdvp;
11424 int error;
11425 struct nameidata namend;
11426
11427 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, CREATE,
11428 OP_LINK, ctx);
11429 if (error)
11430 return (error);
11431
11432 if (namend.ni_vp) {
11433 vnode_put(namend.ni_vp);
11434 error = EEXIST;
11435 } else {
11436 struct vnode_attr va;
11437 vnode_t vp = NULLVP;
11438
11439 VATTR_INIT(&va);
11440 VATTR_SET(&va, va_type, VREG);
11441 VATTR_SET(&va, va_mode, 0);
11442
11443 error = vn_create(snapdvp, &vp, &namend, &va,
11444 VN_CREATE_NOAUTH | VN_CREATE_NOINHERIT, 0, NULL, ctx);
11445 if (!error && vp)
11446 vnode_put(vp);
39037602
A
11447 }
11448
11449 nameidone(&namend);
11450 vnode_put(snapdvp);
11451 vnode_put(rvp);
11452 return (error);
11453}
11454
11455/*
11456 * Delete a Filesystem snapshot
11457 *
11458 * get the vnode for the unnamed snapshot directory and the snapshot and
11459 * delete the snapshot.
11460 */
11461static int
11462snapshot_delete(int dirfd, user_addr_t name, __unused uint32_t flags,
11463 vfs_context_t ctx)
11464{
11465 vnode_t rvp, snapdvp;
11466 int error;
11467 struct nameidata namend;
11468
11469 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, DELETE,
11470 OP_UNLINK, ctx);
11471 if (error)
11472 goto out;
11473
11474 error = VNOP_REMOVE(snapdvp, namend.ni_vp, &namend.ni_cnd,
11475 VNODE_REMOVE_SKIP_NAMESPACE_EVENT, ctx);
11476
11477 vnode_put(namend.ni_vp);
11478 nameidone(&namend);
11479 vnode_put(snapdvp);
11480 vnode_put(rvp);
11481out:
11482 return (error);
11483}
11484
11485/*
11486 * Revert a filesystem to a snapshot
11487 *
11488 * Marks the filesystem to revert to the given snapshot on next mount.
11489 */
11490static int
11491snapshot_revert(int dirfd, user_addr_t name, __unused uint32_t flags,
11492 vfs_context_t ctx)
11493{
11494 int error;
11495 vnode_t rvp;
11496 mount_t mp;
11497 struct fs_snapshot_revert_args revert_data;
11498 struct componentname cnp;
11499 caddr_t name_buf;
11500 size_t name_len;
11501
11502 error = vnode_getfromfd(ctx, dirfd, &rvp);
11503 if (error) {
11504 return (error);
11505 }
11506 mp = vnode_mount(rvp);
11507
813fb2f6
A
11508 MALLOC(name_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
11509 error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
11510 if (error) {
11511 FREE(name_buf, M_TEMP);
11512 vnode_put(rvp);
11513 return (error);
11514 }
11515
11516#if CONFIG_MACF
11517 error = mac_mount_check_snapshot_revert(ctx, mp, name_buf);
11518 if (error) {
11519 FREE(name_buf, M_TEMP);
11520 vnode_put(rvp);
11521 return (error);
11522 }
11523#endif
11524
39037602
A
11525 /*
11526 * Grab mount_iterref so that we can release the vnode,
11527 * since VFSIOC_REVERT_SNAPSHOT could conceivably cause a sync.
11528 */
11529 error = mount_iterref (mp, 0);
11530 vnode_put(rvp);
11531 if (error) {
39037602
A
11532 FREE(name_buf, M_TEMP);
11533 return (error);
11534 }
11535
11536 memset(&cnp, 0, sizeof(cnp));
11537 cnp.cn_pnbuf = (char *)name_buf;
11538 cnp.cn_nameiop = LOOKUP;
11539 cnp.cn_flags = ISLASTCN | HASBUF;
11540 cnp.cn_pnlen = MAXPATHLEN;
11541 cnp.cn_nameptr = cnp.cn_pnbuf;
11542 cnp.cn_namelen = (int)name_len;
11543 revert_data.sr_cnp = &cnp;
11544
11545 error = VFS_IOCTL(mp, VFSIOC_REVERT_SNAPSHOT, (caddr_t)&revert_data, 0, ctx);
11546 mount_iterdrop(mp);
11547 FREE(name_buf, M_TEMP);
11548
11549 if (error) {
11550 /* If there was any error, try again using VNOP_IOCTL */
11551
11552 vnode_t snapdvp;
11553 struct nameidata namend;
11554
11555 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, LOOKUP,
11556 OP_LOOKUP, ctx);
11557 if (error) {
11558 return (error);
11559 }
11560
11561
11562#ifndef APFSIOC_REVERT_TO_SNAPSHOT
11563#define APFSIOC_REVERT_TO_SNAPSHOT _IOW('J', 1, u_int64_t)
11564#endif
11565
11566#ifndef APFS_REVERT_TO_SNAPSHOT
11567#define APFS_REVERT_TO_SNAPSHOT IOCBASECMD(APFSIOC_REVERT_TO_SNAPSHOT)
11568#endif
11569
11570 error = VNOP_IOCTL(namend.ni_vp, APFS_REVERT_TO_SNAPSHOT, (caddr_t) NULL,
11571 0, ctx);
11572
11573 vnode_put(namend.ni_vp);
11574 nameidone(&namend);
11575 vnode_put(snapdvp);
11576 vnode_put(rvp);
11577 }
11578
11579 return (error);
11580}
11581
11582/*
11583 * rename a Filesystem snapshot
11584 *
11585 * get the vnode for the unnamed snapshot directory and the snapshot and
11586 * rename the snapshot. This is a very specialised (and simple) case of
11587 * rename(2) (which has to deal with a lot more complications). It differs
11588 * slightly from rename(2) in that EEXIST is returned if the new name exists.
11589 */
11590static int
11591snapshot_rename(int dirfd, user_addr_t old, user_addr_t new,
11592 __unused uint32_t flags, vfs_context_t ctx)
11593{
11594 vnode_t rvp, snapdvp;
11595 int error, i;
11596 caddr_t newname_buf;
11597 size_t name_len;
11598 vnode_t fvp;
11599 struct nameidata *fromnd, *tond;
11600 /* carving out a chunk for structs that are too big to be on stack. */
11601 struct {
11602 struct nameidata from_node;
11603 struct nameidata to_node;
11604 } * __rename_data;
11605
11606 MALLOC(__rename_data, void *, sizeof(*__rename_data), M_TEMP, M_WAITOK);
11607 fromnd = &__rename_data->from_node;
11608 tond = &__rename_data->to_node;
11609
11610 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, old, fromnd, DELETE,
11611 OP_UNLINK, ctx);
11612 if (error)
11613 goto out;
11614 fvp = fromnd->ni_vp;
11615
11616 MALLOC(newname_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
11617 error = copyinstr(new, newname_buf, MAXPATHLEN, &name_len);
11618 if (error)
11619 goto out1;
11620
11621 /*
11622 * Some sanity checks- new name can't be empty, "." or ".." or have
11623 * slashes.
11624 * (the length returned by copyinstr includes the terminating NUL)
11625 *
11626 * The FS rename VNOP is suppossed to handle this but we'll pick it
11627 * off here itself.
11628 */
11629 if ((name_len == 1) || (name_len == 2 && newname_buf[0] == '.') ||
11630 (name_len == 3 && newname_buf[0] == '.' && newname_buf[1] == '.')) {
11631 error = EINVAL;
11632 goto out1;
11633 }
11634 for (i = 0; i < (int)name_len && newname_buf[i] != '/'; i++);
11635 if (i < (int)name_len) {
11636 error = EINVAL;
11637 goto out1;
11638 }
11639
11640#if CONFIG_MACF
11641 error = mac_mount_check_snapshot_create(ctx, vnode_mount(rvp),
11642 newname_buf);
11643 if (error)
11644 goto out1;
11645#endif
11646
11647 NDINIT(tond, RENAME, OP_RENAME, USEDVP | NOCACHE | AUDITVNPATH2,
11648 UIO_SYSSPACE, CAST_USER_ADDR_T(newname_buf), ctx);
11649 tond->ni_dvp = snapdvp;
11650
11651 error = namei(tond);
11652 if (error) {
11653 goto out2;
11654 } else if (tond->ni_vp) {
11655 /*
11656 * snapshot rename behaves differently than rename(2) - if the
11657 * new name exists, EEXIST is returned.
11658 */
11659 vnode_put(tond->ni_vp);
11660 error = EEXIST;
11661 goto out2;
11662 }
11663
11664 error = VNOP_RENAME(snapdvp, fvp, &fromnd->ni_cnd, snapdvp, NULLVP,
11665 &tond->ni_cnd, ctx);
11666
11667out2:
11668 nameidone(tond);
11669out1:
11670 FREE(newname_buf, M_TEMP);
11671 vnode_put(fvp);
11672 vnode_put(snapdvp);
11673 vnode_put(rvp);
11674 nameidone(fromnd);
11675out:
11676 FREE(__rename_data, M_TEMP);
11677 return (error);
11678}
11679
11680/*
11681 * Mount a Filesystem snapshot
11682 *
11683 * get the vnode for the unnamed snapshot directory and the snapshot and
11684 * mount the snapshot.
11685 */
11686static int
11687snapshot_mount(int dirfd, user_addr_t name, user_addr_t directory,
813fb2f6 11688 __unused user_addr_t mnt_data, __unused uint32_t flags, vfs_context_t ctx)
39037602
A
11689{
11690 vnode_t rvp, snapdvp, snapvp, vp, pvp;
11691 int error;
11692 struct nameidata *snapndp, *dirndp;
11693 /* carving out a chunk for structs that are too big to be on stack. */
11694 struct {
11695 struct nameidata snapnd;
11696 struct nameidata dirnd;
11697 } * __snapshot_mount_data;
11698
11699 MALLOC(__snapshot_mount_data, void *, sizeof(*__snapshot_mount_data),
11700 M_TEMP, M_WAITOK);
11701 snapndp = &__snapshot_mount_data->snapnd;
11702 dirndp = &__snapshot_mount_data->dirnd;
11703
11704 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, snapndp, LOOKUP,
11705 OP_LOOKUP, ctx);
11706 if (error)
11707 goto out;
11708
11709 snapvp = snapndp->ni_vp;
11710 if (!vnode_mount(rvp) || (vnode_mount(rvp) == dead_mountp)) {
11711 error = EIO;
11712 goto out1;
11713 }
11714
11715 /* Get the vnode to be covered */
11716 NDINIT(dirndp, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
11717 UIO_USERSPACE, directory, ctx);
11718 error = namei(dirndp);
11719 if (error)
11720 goto out1;
11721
11722 vp = dirndp->ni_vp;
11723 pvp = dirndp->ni_dvp;
11724
11725 if ((vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_ROOTFS)) {
11726 error = EINVAL;
11727 } else {
11728 mount_t mp = vnode_mount(rvp);
11729 struct fs_snapshot_mount_args smnt_data;
11730
11731 smnt_data.sm_mp = mp;
11732 smnt_data.sm_cnp = &snapndp->ni_cnd;
11733 error = mount_common(mp->mnt_vfsstat.f_fstypename, pvp, vp,
11734 &dirndp->ni_cnd, CAST_USER_ADDR_T(&smnt_data), 0,
11735 KERNEL_MOUNT_SNAPSHOT, NULL, FALSE, ctx);
39037602
A
11736 }
11737
11738 vnode_put(vp);
11739 vnode_put(pvp);
11740 nameidone(dirndp);
11741out1:
11742 vnode_put(snapvp);
11743 vnode_put(snapdvp);
11744 vnode_put(rvp);
11745 nameidone(snapndp);
11746out:
11747 FREE(__snapshot_mount_data, M_TEMP);
11748 return (error);
11749}
11750
813fb2f6
A
11751/*
11752 * Root from a snapshot of the filesystem
11753 *
11754 * Marks the filesystem to root from the given snapshot on next boot.
11755 */
11756static int
11757snapshot_root(int dirfd, user_addr_t name, __unused uint32_t flags,
11758 vfs_context_t ctx)
11759{
11760 int error;
11761 vnode_t rvp;
11762 mount_t mp;
11763 struct fs_snapshot_root_args root_data;
11764 struct componentname cnp;
11765 caddr_t name_buf;
11766 size_t name_len;
11767
11768 error = vnode_getfromfd(ctx, dirfd, &rvp);
11769 if (error) {
11770 return (error);
11771 }
11772 mp = vnode_mount(rvp);
11773
11774 MALLOC(name_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
11775 error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
11776 if (error) {
11777 FREE(name_buf, M_TEMP);
11778 vnode_put(rvp);
11779 return (error);
11780 }
11781
11782 // XXX MAC checks ?
11783
11784 /*
11785 * Grab mount_iterref so that we can release the vnode,
11786 * since VFSIOC_ROOT_SNAPSHOT could conceivably cause a sync.
11787 */
11788 error = mount_iterref (mp, 0);
11789 vnode_put(rvp);
11790 if (error) {
11791 FREE(name_buf, M_TEMP);
11792 return (error);
11793 }
11794
11795 memset(&cnp, 0, sizeof(cnp));
11796 cnp.cn_pnbuf = (char *)name_buf;
11797 cnp.cn_nameiop = LOOKUP;
11798 cnp.cn_flags = ISLASTCN | HASBUF;
11799 cnp.cn_pnlen = MAXPATHLEN;
11800 cnp.cn_nameptr = cnp.cn_pnbuf;
11801 cnp.cn_namelen = (int)name_len;
11802 root_data.sr_cnp = &cnp;
11803
11804 error = VFS_IOCTL(mp, VFSIOC_ROOT_SNAPSHOT, (caddr_t)&root_data, 0, ctx);
11805
11806 mount_iterdrop(mp);
11807 FREE(name_buf, M_TEMP);
11808
11809 return (error);
11810}
11811
39037602
A
11812/*
11813 * FS snapshot operations dispatcher
11814 */
11815int
11816fs_snapshot(__unused proc_t p, struct fs_snapshot_args *uap,
11817 __unused int32_t *retval)
11818{
11819 int error;
11820 vfs_context_t ctx = vfs_context_current();
11821
813fb2f6
A
11822 AUDIT_ARG(fd, uap->dirfd);
11823 AUDIT_ARG(value32, uap->op);
11824
39037602
A
11825 error = priv_check_cred(vfs_context_ucred(ctx), PRIV_VFS_SNAPSHOT, 0);
11826 if (error)
11827 return (error);
11828
11829 switch (uap->op) {
11830 case SNAPSHOT_OP_CREATE:
11831 error = snapshot_create(uap->dirfd, uap->name1, uap->flags, ctx);
11832 break;
11833 case SNAPSHOT_OP_DELETE:
11834 error = snapshot_delete(uap->dirfd, uap->name1, uap->flags, ctx);
11835 break;
11836 case SNAPSHOT_OP_RENAME:
11837 error = snapshot_rename(uap->dirfd, uap->name1, uap->name2,
11838 uap->flags, ctx);
11839 break;
11840 case SNAPSHOT_OP_MOUNT:
11841 error = snapshot_mount(uap->dirfd, uap->name1, uap->name2,
11842 uap->data, uap->flags, ctx);
11843 break;
11844 case SNAPSHOT_OP_REVERT:
11845 error = snapshot_revert(uap->dirfd, uap->name1, uap->flags, ctx);
11846 break;
813fb2f6
A
11847 case SNAPSHOT_OP_ROOT:
11848 error = snapshot_root(uap->dirfd, uap->name1, uap->flags, ctx);
11849 break;
39037602
A
11850 default:
11851 error = ENOSYS;
11852 }
11853
11854 return (error);
11855}