]> git.saurik.com Git - apple/xnu.git/blame - bsd/vfs/vfs_syscalls.c
xnu-4570.61.1.tar.gz
[apple/xnu.git] / bsd / vfs / vfs_syscalls.c
CommitLineData
1c79356b 1/*
5ba3f43e 2 * Copyright (c) 1995-2017 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
39037602 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
39037602 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
39037602 17 *
2d21ac55
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
39037602 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * Copyright (c) 1989, 1993
30 * The Regents of the University of California. All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)vfs_syscalls.c 8.41 (Berkeley) 6/15/95
66 */
2d21ac55
A
67/*
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
71 * Version 2.0.
72 */
1c79356b
A
73
74#include <sys/param.h>
75#include <sys/systm.h>
76#include <sys/namei.h>
77#include <sys/filedesc.h>
78#include <sys/kernel.h>
91447636 79#include <sys/file_internal.h>
1c79356b 80#include <sys/stat.h>
91447636
A
81#include <sys/vnode_internal.h>
82#include <sys/mount_internal.h>
83#include <sys/proc_internal.h>
84#include <sys/kauth.h>
85#include <sys/uio_internal.h>
1c79356b 86#include <sys/malloc.h>
91447636 87#include <sys/mman.h>
1c79356b
A
88#include <sys/dirent.h>
89#include <sys/attr.h>
90#include <sys/sysctl.h>
91#include <sys/ubc.h>
9bccf70c 92#include <sys/quota.h>
91447636
A
93#include <sys/kdebug.h>
94#include <sys/fsevents.h>
6d2010ae 95#include <sys/imgsrc.h>
91447636
A
96#include <sys/sysproto.h>
97#include <sys/xattr.h>
b0d623f7
A
98#include <sys/fcntl.h>
99#include <sys/fsctl.h>
91447636 100#include <sys/ubc_internal.h>
593a1d5f 101#include <sys/disk.h>
3e170ce0 102#include <sys/content_protection.h>
39037602
A
103#include <sys/clonefile.h>
104#include <sys/snapshot.h>
490019cf 105#include <sys/priv.h>
91447636
A
106#include <machine/cons.h>
107#include <machine/limits.h>
108#include <miscfs/specfs/specdev.h>
e5568f75 109
5ba3f43e
A
110#include <vfs/vfs_disk_conditioner.h>
111
b0d623f7 112#include <security/audit/audit.h>
e5568f75
A
113#include <bsm/audit_kevents.h>
114
91447636
A
115#include <mach/mach_types.h>
116#include <kern/kern_types.h>
117#include <kern/kalloc.h>
6d2010ae 118#include <kern/task.h>
91447636
A
119
120#include <vm/vm_pageout.h>
39037602 121#include <vm/vm_protos.h>
1c79356b 122
91447636 123#include <libkern/OSAtomic.h>
b0d623f7 124#include <pexpert/pexpert.h>
3e170ce0 125#include <IOKit/IOBSD.h>
55e303ae 126
490019cf
A
127#if ROUTEFS
128#include <miscfs/routefs/routefs.h>
129#endif /* ROUTEFS */
130
2d21ac55
A
131#if CONFIG_MACF
132#include <security/mac.h>
133#include <security/mac_framework.h>
134#endif
1c79356b 135
39037602 136#if CONFIG_FSE
2d21ac55 137#define GET_PATH(x) \
39037602 138 (x) = get_pathbuff();
2d21ac55
A
139#define RELEASE_PATH(x) \
140 release_pathbuff(x);
39037602 141#else
2d21ac55 142#define GET_PATH(x) \
39037602 143 MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
2d21ac55
A
144#define RELEASE_PATH(x) \
145 FREE_ZONE((x), MAXPATHLEN, M_NAMEI);
146#endif /* CONFIG_FSE */
147
a39ff7e2
A
148#ifndef HFS_GET_BOOT_INFO
149#define HFS_GET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00004)
150#endif
151
152#ifndef HFS_SET_BOOT_INFO
153#define HFS_SET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00005)
154#endif
155
156#ifndef APFSIOC_REVERT_TO_SNAPSHOT
157#define APFSIOC_REVERT_TO_SNAPSHOT _IOW('J', 1, u_int64_t)
158#endif
159
5ba3f43e
A
160extern void disk_conditioner_unmount(mount_t mp);
161
2d21ac55
A
162/* struct for checkdirs iteration */
163struct cdirargs {
164 vnode_t olddp;
165 vnode_t newdp;
166};
167/* callback for checkdirs iteration */
168static int checkdirs_callback(proc_t p, void * arg);
1c79356b 169
91447636 170static int change_dir(struct nameidata *ndp, vfs_context_t ctx);
6601e61a 171static int checkdirs(vnode_t olddp, vfs_context_t ctx);
91447636
A
172void enablequotas(struct mount *mp, vfs_context_t ctx);
173static int getfsstat_callback(mount_t mp, void * arg);
174static int getutimes(user_addr_t usrtvp, struct timespec *tsp);
2d21ac55 175static int setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts, int nullflag);
91447636 176static int sync_callback(mount_t, void *);
a39ff7e2
A
177static void hibernate_sync_thread(void *, __unused wait_result_t);
178static int hibernate_sync_async(int);
39037602
A
179static int munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
180 user_addr_t bufp, int *sizep, boolean_t is_64_bit,
91447636 181 boolean_t partial_copy);
b0d623f7
A
182static int statfs64_common(struct mount *mp, struct vfsstatfs *sfsp,
183 user_addr_t bufp);
184static int fsync_common(proc_t p, struct fsync_args *uap, int flags);
6d2010ae
A
185static int mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
186 struct componentname *cnp, user_addr_t fsmountargs,
187 int flags, uint32_t internal_flags, char *labelstr, boolean_t kernelmount,
188 vfs_context_t ctx);
189void vfs_notify_mount(vnode_t pdvp);
190
191int prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth);
b7266188 192
fe8ab488
A
193struct fd_vn_data * fg_vn_data_alloc(void);
194
c18c124e
A
195/*
196 * Max retries for ENOENT returns from vn_authorize_{rmdir, unlink, rename}
197 * Concurrent lookups (or lookups by ids) on hard links can cause the
198 * vn_getpath (which does not re-enter the filesystem as vn_getpath_fsenter
199 * does) to return ENOENT as the path cannot be returned from the name cache
200 * alone. We have no option but to retry and hope to get one namei->reverse path
201 * generation done without an intervening lookup, lookup by id on the hard link
202 * item. This is only an issue for MAC hooks which cannot reenter the filesystem
203 * which currently are the MAC hooks for rename, unlink and rmdir.
204 */
205#define MAX_AUTHORIZE_ENOENT_RETRIES 1024
206
fe8ab488
A
207static int rmdirat_internal(vfs_context_t, int, user_addr_t, enum uio_seg);
208
209static int fsgetpath_internal(vfs_context_t, int, uint64_t, vm_size_t, caddr_t, int *);
210
b7266188 211#ifdef CONFIG_IMGSRC_ACCESS
b7266188
A
212static int authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx);
213static int place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx);
214static void undo_place_on_covered_vp(mount_t mp, vnode_t vp);
215static int mount_begin_update(mount_t mp, vfs_context_t ctx, int flags);
216static void mount_end_update(mount_t mp);
6d2010ae 217static int relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp, const char *fsname, vfs_context_t ctx, boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index);
b7266188
A
218#endif /* CONFIG_IMGSRC_ACCESS */
219
2d21ac55
A
220int (*union_dircheckp)(struct vnode **, struct fileproc *, vfs_context_t);
221
222__private_extern__
223int sync_internal(void);
224
2d21ac55 225__private_extern__
c18c124e 226int unlink1(vfs_context_t, vnode_t, user_addr_t, enum uio_seg, int);
91447636 227
fe8ab488
A
228extern lck_grp_t *fd_vn_lck_grp;
229extern lck_grp_attr_t *fd_vn_lck_grp_attr;
230extern lck_attr_t *fd_vn_lck_attr;
231
2d21ac55
A
232/*
233 * incremented each time a mount or unmount operation occurs
234 * used to invalidate the cached value of the rootvp in the
235 * mount structure utilized by cache_lookup_path
236 */
b0d623f7 237uint32_t mount_generation = 0;
1c79356b
A
238
239/* counts number of mount and unmount operations */
240unsigned int vfs_nummntops=0;
241
39236c6e
A
242extern const struct fileops vnops;
243#if CONFIG_APPLEDOUBLE
39037602 244extern errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *);
39236c6e 245#endif /* CONFIG_APPLEDOUBLE */
91447636 246
1c79356b
A
247/*
248 * Virtual File System System Calls
249 */
250
490019cf 251#if NFSCLIENT || DEVFS || ROUTEFS
6d2010ae
A
252/*
253 * Private in-kernel mounting spi (NFS only, not exported)
254 */
255 __private_extern__
256boolean_t
257vfs_iskernelmount(mount_t mp)
258{
259 return ((mp->mnt_kern_flag & MNTK_KERNEL_MOUNT) ? TRUE : FALSE);
260}
261
262 __private_extern__
263int
264kernel_mount(char *fstype, vnode_t pvp, vnode_t vp, const char *path,
265 void *data, __unused size_t datalen, int syscall_flags, __unused uint32_t kern_flags, vfs_context_t ctx)
266{
267 struct nameidata nd;
268 boolean_t did_namei;
269 int error;
270
39037602 271 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
6d2010ae
A
272 UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx);
273
274 /*
275 * Get the vnode to be covered if it's not supplied
276 */
277 if (vp == NULLVP) {
278 error = namei(&nd);
279 if (error)
280 return (error);
281 vp = nd.ni_vp;
282 pvp = nd.ni_dvp;
283 did_namei = TRUE;
284 } else {
285 char *pnbuf = CAST_DOWN(char *, path);
286
287 nd.ni_cnd.cn_pnbuf = pnbuf;
288 nd.ni_cnd.cn_pnlen = strlen(pnbuf) + 1;
289 did_namei = FALSE;
290 }
291
292 error = mount_common(fstype, pvp, vp, &nd.ni_cnd, CAST_USER_ADDR_T(data),
293 syscall_flags, kern_flags, NULL, TRUE, ctx);
294
295 if (did_namei) {
296 vnode_put(vp);
297 vnode_put(pvp);
298 nameidone(&nd);
299 }
300
301 return (error);
302}
fe8ab488 303#endif /* NFSCLIENT || DEVFS */
6d2010ae 304
1c79356b
A
305/*
306 * Mount a file system.
307 */
1c79356b
A
308/* ARGSUSED */
309int
b0d623f7 310mount(proc_t p, struct mount_args *uap, __unused int32_t *retval)
2d21ac55
A
311{
312 struct __mac_mount_args muap;
313
314 muap.type = uap->type;
315 muap.path = uap->path;
316 muap.flags = uap->flags;
317 muap.data = uap->data;
318 muap.mac_p = USER_ADDR_NULL;
319 return (__mac_mount(p, &muap, retval));
320}
321
5ba3f43e
A
322int
323fmount(__unused proc_t p, struct fmount_args *uap, __unused int32_t *retval)
324{
325 struct componentname cn;
326 vfs_context_t ctx = vfs_context_current();
327 size_t dummy = 0;
328 int error;
329 int flags = uap->flags;
330 char fstypename[MFSNAMELEN];
331 char *labelstr = NULL; /* regular mount call always sets it to NULL for __mac_mount() */
332 vnode_t pvp;
333 vnode_t vp;
334
335 AUDIT_ARG(fd, uap->fd);
336 AUDIT_ARG(fflags, flags);
337 /* fstypename will get audited by mount_common */
338
339 /* Sanity check the flags */
340 if (flags & (MNT_IMGSRC_BY_INDEX|MNT_ROOTFS)) {
341 return (ENOTSUP);
342 }
343
344 if (flags & MNT_UNION) {
345 return (EPERM);
346 }
347
348 error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy);
349 if (error) {
350 return (error);
351 }
352
353 if ((error = file_vnode(uap->fd, &vp)) != 0) {
354 return (error);
355 }
356
357 if ((error = vnode_getwithref(vp)) != 0) {
358 file_drop(uap->fd);
359 return (error);
360 }
361
362 pvp = vnode_getparent(vp);
363 if (pvp == NULL) {
364 vnode_put(vp);
365 file_drop(uap->fd);
366 return (EINVAL);
367 }
368
369 memset(&cn, 0, sizeof(struct componentname));
370 MALLOC(cn.cn_pnbuf, char *, MAXPATHLEN, M_TEMP, M_WAITOK);
371 cn.cn_pnlen = MAXPATHLEN;
372
373 if((error = vn_getpath(vp, cn.cn_pnbuf, &cn.cn_pnlen)) != 0) {
374 FREE(cn.cn_pnbuf, M_TEMP);
375 vnode_put(pvp);
376 vnode_put(vp);
377 file_drop(uap->fd);
378 return (error);
379 }
380
381 error = mount_common(fstypename, pvp, vp, &cn, uap->data, flags, 0, labelstr, FALSE, ctx);
382
383 FREE(cn.cn_pnbuf, M_TEMP);
384 vnode_put(pvp);
385 vnode_put(vp);
386 file_drop(uap->fd);
387
388 return (error);
389}
390
6d2010ae 391void
39037602 392vfs_notify_mount(vnode_t pdvp)
6d2010ae
A
393{
394 vfs_event_signal(NULL, VQ_MOUNT, (intptr_t)NULL);
395 lock_vnode_and_post(pdvp, NOTE_WRITE);
396}
397
b0d623f7
A
398/*
399 * __mac_mount:
400 * Mount a file system taking into account MAC label behavior.
401 * See mount(2) man page for more information
402 *
403 * Parameters: p Process requesting the mount
404 * uap User argument descriptor (see below)
39037602 405 * retval (ignored)
b0d623f7
A
406 *
407 * Indirect: uap->type Filesystem type
408 * uap->path Path to mount
39037602
A
409 * uap->data Mount arguments
410 * uap->mac_p MAC info
b0d623f7 411 * uap->flags Mount flags
39037602 412 *
b0d623f7
A
413 *
414 * Returns: 0 Success
415 * !0 Not success
416 */
6d2010ae
A
417boolean_t root_fs_upgrade_try = FALSE;
418
2d21ac55 419int
b0d623f7 420__mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int32_t *retval)
1c79356b 421{
39236c6e
A
422 vnode_t pvp = NULL;
423 vnode_t vp = NULL;
424 int need_nameidone = 0;
6d2010ae
A
425 vfs_context_t ctx = vfs_context_current();
426 char fstypename[MFSNAMELEN];
427 struct nameidata nd;
428 size_t dummy=0;
429 char *labelstr = NULL;
430 int flags = uap->flags;
431 int error;
39037602 432#if CONFIG_IMGSRC_ACCESS || CONFIG_MACF
6d2010ae 433 boolean_t is_64bit = IS_64BIT_PROCESS(p);
39236c6e
A
434#else
435#pragma unused(p)
436#endif
6d2010ae
A
437 /*
438 * Get the fs type name from user space
439 */
440 error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy);
441 if (error)
442 return (error);
443
444 /*
445 * Get the vnode to be covered
446 */
39037602 447 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
6d2010ae
A
448 UIO_USERSPACE, uap->path, ctx);
449 error = namei(&nd);
39236c6e
A
450 if (error) {
451 goto out;
452 }
453 need_nameidone = 1;
6d2010ae
A
454 vp = nd.ni_vp;
455 pvp = nd.ni_dvp;
39037602 456
6d2010ae
A
457#ifdef CONFIG_IMGSRC_ACCESS
458 /* Mounting image source cannot be batched with other operations */
459 if (flags == MNT_IMGSRC_BY_INDEX) {
460 error = relocate_imageboot_source(pvp, vp, &nd.ni_cnd, fstypename,
461 ctx, is_64bit, uap->data, (flags == MNT_IMGSRC_BY_INDEX));
462 goto out;
463 }
464#endif /* CONFIG_IMGSRC_ACCESS */
465
466#if CONFIG_MACF
467 /*
468 * Get the label string (if any) from user space
469 */
470 if (uap->mac_p != USER_ADDR_NULL) {
471 struct user_mac mac;
472 size_t ulen = 0;
473
474 if (is_64bit) {
475 struct user64_mac mac64;
476 error = copyin(uap->mac_p, &mac64, sizeof(mac64));
477 mac.m_buflen = mac64.m_buflen;
478 mac.m_string = mac64.m_string;
479 } else {
480 struct user32_mac mac32;
481 error = copyin(uap->mac_p, &mac32, sizeof(mac32));
482 mac.m_buflen = mac32.m_buflen;
483 mac.m_string = mac32.m_string;
484 }
485 if (error)
486 goto out;
487 if ((mac.m_buflen > MAC_MAX_LABEL_BUF_LEN) ||
488 (mac.m_buflen < 2)) {
489 error = EINVAL;
490 goto out;
491 }
492 MALLOC(labelstr, char *, mac.m_buflen, M_MACTEMP, M_WAITOK);
493 error = copyinstr(mac.m_string, labelstr, mac.m_buflen, &ulen);
494 if (error) {
495 goto out;
496 }
497 AUDIT_ARG(mac_string, labelstr);
498 }
499#endif /* CONFIG_MACF */
500
501 AUDIT_ARG(fflags, flags);
502
4bd07ac2
A
503#if SECURE_KERNEL
504 if (flags & MNT_UNION) {
505 /* No union mounts on release kernels */
506 error = EPERM;
507 goto out;
508 }
509#endif
510
6d2010ae 511 if ((vp->v_flag & VROOT) &&
39236c6e
A
512 (vp->v_mount->mnt_flag & MNT_ROOTFS)) {
513 if (!(flags & MNT_UNION)) {
6d2010ae 514 flags |= MNT_UPDATE;
39236c6e
A
515 }
516 else {
39037602 517 /*
39236c6e 518 * For a union mount on '/', treat it as fresh
39037602
A
519 * mount instead of update.
520 * Otherwise, union mouting on '/' used to panic the
521 * system before, since mnt_vnodecovered was found to
522 * be NULL for '/' which is required for unionlookup
39236c6e
A
523 * after it gets ENOENT on union mount.
524 */
525 flags = (flags & ~(MNT_UPDATE));
526 }
527
4bd07ac2 528#if SECURE_KERNEL
39236c6e
A
529 if ((flags & MNT_RDONLY) == 0) {
530 /* Release kernels are not allowed to mount "/" as rw */
531 error = EPERM;
39037602 532 goto out;
39236c6e 533 }
39236c6e
A
534#endif
535 /*
536 * See 7392553 for more details on why this check exists.
537 * Suffice to say: If this check is ON and something tries
538 * to mount the rootFS RW, we'll turn off the codesign
39037602
A
539 * bitmap optimization.
540 */
6d2010ae 541#if CHECK_CS_VALIDATION_BITMAP
39236c6e 542 if ((flags & MNT_RDONLY) == 0 ) {
6d2010ae
A
543 root_fs_upgrade_try = TRUE;
544 }
545#endif
546 }
547
548 error = mount_common(fstypename, pvp, vp, &nd.ni_cnd, uap->data, flags, 0,
549 labelstr, FALSE, ctx);
39236c6e 550
6d2010ae 551out:
39236c6e 552
6d2010ae
A
553#if CONFIG_MACF
554 if (labelstr)
555 FREE(labelstr, M_MACTEMP);
556#endif /* CONFIG_MACF */
557
39236c6e
A
558 if (vp) {
559 vnode_put(vp);
560 }
561 if (pvp) {
562 vnode_put(pvp);
563 }
564 if (need_nameidone) {
565 nameidone(&nd);
566 }
6d2010ae
A
567
568 return (error);
569}
570
571/*
572 * common mount implementation (final stage of mounting)
39037602 573
6d2010ae
A
574 * Arguments:
575 * fstypename file system type (ie it's vfs name)
576 * pvp parent of covered vnode
577 * vp covered vnode
578 * cnp component name (ie path) of covered vnode
579 * flags generic mount flags
580 * fsmountargs file system specific data
581 * labelstr optional MAC label
582 * kernelmount TRUE for mounts initiated from inside the kernel
583 * ctx caller's context
584 */
585static int
586mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
587 struct componentname *cnp, user_addr_t fsmountargs, int flags, uint32_t internal_flags,
588 char *labelstr, boolean_t kernelmount, vfs_context_t ctx)
589{
39236c6e
A
590#if !CONFIG_MACF
591#pragma unused(labelstr)
592#endif
91447636
A
593 struct vnode *devvp = NULLVP;
594 struct vnode *device_vnode = NULLVP;
2d21ac55
A
595#if CONFIG_MACF
596 struct vnode *rvp;
597#endif
1c79356b 598 struct mount *mp;
6601e61a 599 struct vfstable *vfsp = (struct vfstable *)0;
6d2010ae 600 struct proc *p = vfs_context_proc(ctx);
91447636 601 int error, flag = 0;
91447636 602 user_addr_t devpath = USER_ADDR_NULL;
91447636
A
603 int ronly = 0;
604 int mntalloc = 0;
b0d623f7 605 boolean_t vfsp_ref = FALSE;
743b1565 606 boolean_t is_rwlock_locked = FALSE;
b0d623f7
A
607 boolean_t did_rele = FALSE;
608 boolean_t have_usecount = FALSE;
9bccf70c 609
1c79356b 610 /*
6d2010ae 611 * Process an update for an existing mount
1c79356b 612 */
6d2010ae 613 if (flags & MNT_UPDATE) {
1c79356b 614 if ((vp->v_flag & VROOT) == 0) {
91447636
A
615 error = EINVAL;
616 goto out1;
1c79356b
A
617 }
618 mp = vp->v_mount;
d12e1678 619
91447636 620 /* unmount in progress return error */
b0d623f7 621 mount_lock_spin(mp);
91447636
A
622 if (mp->mnt_lflag & MNT_LUNMOUNT) {
623 mount_unlock(mp);
624 error = EBUSY;
625 goto out1;
d12e1678 626 }
91447636
A
627 mount_unlock(mp);
628 lck_rw_lock_exclusive(&mp->mnt_rwlock);
743b1565 629 is_rwlock_locked = TRUE;
1c79356b
A
630 /*
631 * We only allow the filesystem to be reloaded if it
632 * is currently mounted read-only.
633 */
6d2010ae 634 if ((flags & MNT_RELOAD) &&
1c79356b 635 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
91447636
A
636 error = ENOTSUP;
637 goto out1;
1c79356b 638 }
b7266188 639
316670eb
A
640 /*
641 * If content protection is enabled, update mounts are not
642 * allowed to turn it off.
643 */
39037602 644 if ((mp->mnt_flag & MNT_CPROTECT) &&
316670eb
A
645 ((flags & MNT_CPROTECT) == 0)) {
646 error = EINVAL;
647 goto out1;
648 }
649
39037602 650#ifdef CONFIG_IMGSRC_ACCESS
b7266188
A
651 /* Can't downgrade the backer of the root FS */
652 if ((mp->mnt_kern_flag & MNTK_BACKS_ROOT) &&
6d2010ae 653 (!vfs_isrdonly(mp)) && (flags & MNT_RDONLY)) {
b7266188
A
654 error = ENOTSUP;
655 goto out1;
656 }
657#endif /* CONFIG_IMGSRC_ACCESS */
658
1c79356b
A
659 /*
660 * Only root, or the user that did the original mount is
661 * permitted to update it.
662 */
2d21ac55
A
663 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
664 (error = suser(vfs_context_ucred(ctx), &p->p_acflag))) {
665 goto out1;
666 }
667#if CONFIG_MACF
668 error = mac_mount_check_remount(ctx, mp);
669 if (error != 0) {
91447636 670 goto out1;
1c79356b 671 }
2d21ac55 672#endif
1c79356b 673 /*
91447636
A
674 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
675 * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
1c79356b 676 */
6d2010ae
A
677 if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
678 flags |= MNT_NOSUID | MNT_NODEV;
d12e1678 679 if (mp->mnt_flag & MNT_NOEXEC)
6d2010ae 680 flags |= MNT_NOEXEC;
1c79356b 681 }
d12e1678
A
682 flag = mp->mnt_flag;
683
316670eb
A
684
685
6d2010ae 686 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
d12e1678 687
91447636 688 vfsp = mp->mnt_vtable;
1c79356b
A
689 goto update;
690 }
5ba3f43e 691
1c79356b 692 /*
91447636 693 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
1c79356b
A
694 * MNT_NOEXEC if mount point is already MNT_NOEXEC.
695 */
6d2010ae
A
696 if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
697 flags |= MNT_NOSUID | MNT_NODEV;
1c79356b 698 if (vp->v_mount->mnt_flag & MNT_NOEXEC)
6d2010ae 699 flags |= MNT_NOEXEC;
1c79356b 700 }
91447636 701
55e303ae
A
702 /* XXXAUDIT: Should we capture the type on the error path as well? */
703 AUDIT_ARG(text, fstypename);
91447636 704 mount_list_lock();
1c79356b 705 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
b0d623f7
A
706 if (!strncmp(vfsp->vfc_name, fstypename, MFSNAMELEN)) {
707 vfsp->vfc_refcount++;
708 vfsp_ref = TRUE;
1c79356b 709 break;
b0d623f7 710 }
91447636 711 mount_list_unlock();
1c79356b 712 if (vfsp == NULL) {
91447636
A
713 error = ENODEV;
714 goto out1;
1c79356b 715 }
6d2010ae
A
716
717 /*
718 * VFC_VFSLOCALARGS is not currently supported for kernel mounts
719 */
720 if (kernelmount && (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS)) {
721 error = EINVAL; /* unsupported request */
2d21ac55 722 goto out1;
6d2010ae
A
723 }
724
725 error = prepare_coveredvp(vp, ctx, cnp, fstypename, ((internal_flags & KERNEL_MOUNT_NOAUTH) != 0));
726 if (error != 0) {
91447636 727 goto out1;
1c79356b 728 }
1c79356b
A
729
730 /*
6d2010ae 731 * Allocate and initialize the filesystem (mount_t)
1c79356b 732 */
b0d623f7 733 MALLOC_ZONE(mp, struct mount *, (u_int32_t)sizeof(struct mount),
1c79356b 734 M_MOUNT, M_WAITOK);
b0d623f7 735 bzero((char *)mp, (u_int32_t)sizeof(struct mount));
91447636 736 mntalloc = 1;
0b4e3aa0
A
737
738 /* Initialize the default IO constraints */
739 mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
740 mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
91447636
A
741 mp->mnt_maxsegreadsize = mp->mnt_maxreadcnt;
742 mp->mnt_maxsegwritesize = mp->mnt_maxwritecnt;
743 mp->mnt_devblocksize = DEV_BSIZE;
2d21ac55 744 mp->mnt_alignmentmask = PAGE_MASK;
b0d623f7
A
745 mp->mnt_ioqueue_depth = MNT_DEFAULT_IOQUEUE_DEPTH;
746 mp->mnt_ioscale = 1;
2d21ac55
A
747 mp->mnt_ioflags = 0;
748 mp->mnt_realrootvp = NULLVP;
749 mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL;
91447636
A
750
751 TAILQ_INIT(&mp->mnt_vnodelist);
752 TAILQ_INIT(&mp->mnt_workerqueue);
753 TAILQ_INIT(&mp->mnt_newvnodes);
754 mount_lock_init(mp);
755 lck_rw_lock_exclusive(&mp->mnt_rwlock);
743b1565 756 is_rwlock_locked = TRUE;
1c79356b 757 mp->mnt_op = vfsp->vfc_vfsops;
91447636 758 mp->mnt_vtable = vfsp;
91447636 759 //mp->mnt_stat.f_type = vfsp->vfc_typenum;
1c79356b 760 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
fe8ab488
A
761 strlcpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN);
762 strlcpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
1c79356b 763 mp->mnt_vnodecovered = vp;
2d21ac55 764 mp->mnt_vfsstat.f_owner = kauth_cred_getuid(vfs_context_ucred(ctx));
6d2010ae
A
765 mp->mnt_throttle_mask = LOWPRI_MAX_NUM_DEV - 1;
766 mp->mnt_devbsdunit = 0;
1c79356b 767
91447636
A
768 /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
769 vfs_setowner(mp, KAUTH_UID_NONE, KAUTH_GID_NONE);
6d2010ae 770
490019cf 771#if NFSCLIENT || DEVFS || ROUTEFS
6d2010ae
A
772 if (kernelmount)
773 mp->mnt_kern_flag |= MNTK_KERNEL_MOUNT;
774 if ((internal_flags & KERNEL_MOUNT_PERMIT_UNMOUNT) != 0)
775 mp->mnt_kern_flag |= MNTK_PERMIT_UNMOUNT;
fe8ab488 776#endif /* NFSCLIENT || DEVFS */
6d2010ae 777
1c79356b 778update:
5ba3f43e 779
1c79356b
A
780 /*
781 * Set the mount level flags.
782 */
6d2010ae 783 if (flags & MNT_RDONLY)
1c79356b 784 mp->mnt_flag |= MNT_RDONLY;
6d2010ae
A
785 else if (mp->mnt_flag & MNT_RDONLY) {
786 // disallow read/write upgrades of file systems that
787 // had the TYPENAME_OVERRIDE feature set.
788 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
789 error = EPERM;
790 goto out1;
791 }
1c79356b 792 mp->mnt_kern_flag |= MNTK_WANTRDWR;
6d2010ae 793 }
0b4e3aa0
A
794 mp->mnt_flag &= ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
795 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
6d2010ae
A
796 MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
797 MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
798 MNT_QUARANTINE | MNT_CPROTECT);
813fb2f6
A
799
800#if SECURE_KERNEL
801#if !CONFIG_MNT_SUID
802 /*
5ba3f43e 803 * On release builds of iOS based platforms, always enforce NOSUID on
813fb2f6
A
804 * all mounts. We do this here because we can catch update mounts as well as
805 * non-update mounts in this case.
806 */
807 mp->mnt_flag |= (MNT_NOSUID);
808#endif
809#endif
810
6d2010ae
A
811 mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
812 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
813 MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
814 MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
815 MNT_QUARANTINE | MNT_CPROTECT);
2d21ac55
A
816
817#if CONFIG_MACF
6d2010ae 818 if (flags & MNT_MULTILABEL) {
2d21ac55
A
819 if (vfsp->vfc_vfsflags & VFC_VFSNOMACLABEL) {
820 error = EINVAL;
821 goto out1;
822 }
823 mp->mnt_flag |= MNT_MULTILABEL;
824 }
825#endif
6d2010ae
A
826 /*
827 * Process device path for local file systems if requested
828 */
39037602
A
829 if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS &&
830 !(internal_flags & KERNEL_MOUNT_SNAPSHOT)) {
6d2010ae 831 if (vfs_context_is64bit(ctx)) {
91447636 832 if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
39037602 833 goto out1;
91447636
A
834 fsmountargs += sizeof(devpath);
835 } else {
b0d623f7 836 user32_addr_t tmp;
91447636 837 if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
39037602 838 goto out1;
91447636
A
839 /* munge into LP64 addr */
840 devpath = CAST_USER_ADDR_T(tmp);
841 fsmountargs += sizeof(tmp);
842 }
843
6d2010ae 844 /* Lookup device and authorize access to it */
91447636 845 if ((devpath)) {
6d2010ae
A
846 struct nameidata nd;
847
848 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW, UIO_USERSPACE, devpath, ctx);
849 if ( (error = namei(&nd)) )
91447636
A
850 goto out1;
851
3e170ce0 852 strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
6d2010ae 853 devvp = nd.ni_vp;
91447636 854
6d2010ae 855 nameidone(&nd);
91447636
A
856
857 if (devvp->v_type != VBLK) {
858 error = ENOTBLK;
859 goto out2;
860 }
861 if (major(devvp->v_rdev) >= nblkdev) {
862 error = ENXIO;
863 goto out2;
864 }
865 /*
866 * If mount by non-root, then verify that user has necessary
867 * permissions on the device.
868 */
2d21ac55 869 if (suser(vfs_context_ucred(ctx), NULL) != 0) {
6d2010ae
A
870 mode_t accessmode = KAUTH_VNODE_READ_DATA;
871
91447636
A
872 if ((mp->mnt_flag & MNT_RDONLY) == 0)
873 accessmode |= KAUTH_VNODE_WRITE_DATA;
2d21ac55 874 if ((error = vnode_authorize(devvp, NULL, accessmode, ctx)) != 0)
91447636
A
875 goto out2;
876 }
877 }
6d2010ae
A
878 /* On first mount, preflight and open device */
879 if (devpath && ((flags & MNT_UPDATE) == 0)) {
91447636
A
880 if ( (error = vnode_ref(devvp)) )
881 goto out2;
882 /*
883 * Disallow multiple mounts of the same device.
884 * Disallow mounting of a device that is currently in use
885 * (except for root, which might share swap device for miniroot).
886 * Flush out any old buffers remaining from a previous use.
887 */
888 if ( (error = vfs_mountedon(devvp)) )
889 goto out3;
39037602 890
91447636
A
891 if (vcount(devvp) > 1 && !(vfs_flags(mp) & MNT_ROOTFS)) {
892 error = EBUSY;
893 goto out3;
894 }
2d21ac55 895 if ( (error = VNOP_FSYNC(devvp, MNT_WAIT, ctx)) ) {
91447636
A
896 error = ENOTBLK;
897 goto out3;
898 }
899 if ( (error = buf_invalidateblks(devvp, BUF_WRITE_DATA, 0, 0)) )
900 goto out3;
901
902 ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
2d21ac55
A
903#if CONFIG_MACF
904 error = mac_vnode_check_open(ctx,
905 devvp,
906 ronly ? FREAD : FREAD|FWRITE);
907 if (error)
908 goto out3;
909#endif /* MAC */
910 if ( (error = VNOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, ctx)) )
91447636
A
911 goto out3;
912
913 mp->mnt_devvp = devvp;
914 device_vnode = devvp;
b0d623f7 915
6d2010ae
A
916 } else if ((mp->mnt_flag & MNT_RDONLY) &&
917 (mp->mnt_kern_flag & MNTK_WANTRDWR) &&
918 (device_vnode = mp->mnt_devvp)) {
919 dev_t dev;
920 int maj;
921 /*
922 * If upgrade to read-write by non-root, then verify
923 * that user has necessary permissions on the device.
924 */
925 vnode_getalways(device_vnode);
b0d623f7 926
6d2010ae 927 if (suser(vfs_context_ucred(ctx), NULL) &&
39037602 928 (error = vnode_authorize(device_vnode, NULL,
6d2010ae
A
929 KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA,
930 ctx)) != 0) {
931 vnode_put(device_vnode);
932 goto out2;
933 }
b0d623f7 934
6d2010ae
A
935 /* Tell the device that we're upgrading */
936 dev = (dev_t)device_vnode->v_rdev;
937 maj = major(dev);
b0d623f7 938
6d2010ae
A
939 if ((u_int)maj >= (u_int)nblkdev)
940 panic("Volume mounted on a device with invalid major number.");
b0d623f7 941
6d2010ae
A
942 error = bdevsw[maj].d_open(dev, FREAD | FWRITE, S_IFBLK, p);
943 vnode_put(device_vnode);
91447636 944 device_vnode = NULLVP;
6d2010ae
A
945 if (error != 0) {
946 goto out2;
947 }
91447636
A
948 }
949 }
2d21ac55 950#if CONFIG_MACF
6d2010ae 951 if ((flags & MNT_UPDATE) == 0) {
2d21ac55
A
952 mac_mount_label_init(mp);
953 mac_mount_label_associate(ctx, mp);
954 }
6d2010ae
A
955 if (labelstr) {
956 if ((flags & MNT_UPDATE) != 0) {
957 error = mac_mount_check_label_update(ctx, mp);
2d21ac55
A
958 if (error != 0)
959 goto out3;
960 }
2d21ac55
A
961 }
962#endif
1c79356b
A
963 /*
964 * Mount the filesystem.
965 */
39037602
A
966 if (internal_flags & KERNEL_MOUNT_SNAPSHOT) {
967 error = VFS_IOCTL(mp, VFSIOC_MOUNT_SNAPSHOT,
968 (caddr_t)fsmountargs, 0, ctx);
969 } else {
970 error = VFS_MOUNT(mp, device_vnode, fsmountargs, ctx);
971 }
d12e1678 972
6d2010ae 973 if (flags & MNT_UPDATE) {
1c79356b
A
974 if (mp->mnt_kern_flag & MNTK_WANTRDWR)
975 mp->mnt_flag &= ~MNT_RDONLY;
976 mp->mnt_flag &=~
977 (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
978 mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
979 if (error)
6d2010ae 980 mp->mnt_flag = flag; /* restore flag value */
91447636
A
981 vfs_event_signal(NULL, VQ_UPDATE, (intptr_t)NULL);
982 lck_rw_done(&mp->mnt_rwlock);
743b1565 983 is_rwlock_locked = FALSE;
9bccf70c 984 if (!error)
2d21ac55 985 enablequotas(mp, ctx);
6d2010ae 986 goto exit;
1c79356b 987 }
6d2010ae 988
1c79356b
A
989 /*
990 * Put the new filesystem on the mount list after root.
991 */
6601e61a 992 if (error == 0) {
2d21ac55
A
993 struct vfs_attr vfsattr;
994#if CONFIG_MACF
995 if (vfs_flags(mp) & MNT_MULTILABEL) {
996 error = VFS_ROOT(mp, &rvp, ctx);
997 if (error) {
998 printf("%s() VFS_ROOT returned %d\n", __func__, error);
999 goto out3;
1000 }
2d21ac55 1001 error = vnode_label(mp, NULL, rvp, NULL, 0, ctx);
b0d623f7
A
1002 /*
1003 * drop reference provided by VFS_ROOT
1004 */
1005 vnode_put(rvp);
1006
2d21ac55
A
1007 if (error)
1008 goto out3;
1009 }
1010#endif /* MAC */
1011
1012 vnode_lock_spin(vp);
1013 CLR(vp->v_flag, VMOUNT);
91447636
A
1014 vp->v_mountedhere = mp;
1015 vnode_unlock(vp);
1016
2d21ac55
A
1017 /*
1018 * taking the name_cache_lock exclusively will
1019 * insure that everyone is out of the fast path who
1020 * might be trying to use a now stale copy of
1021 * vp->v_mountedhere->mnt_realrootvp
1022 * bumping mount_generation causes the cached values
1023 * to be invalidated
1024 */
1025 name_cache_lock();
1026 mount_generation++;
1027 name_cache_unlock();
1028
b0d623f7
A
1029 error = vnode_ref(vp);
1030 if (error != 0) {
1031 goto out4;
1032 }
1033
1034 have_usecount = TRUE;
91447636 1035
2d21ac55 1036 error = checkdirs(vp, ctx);
6601e61a
A
1037 if (error != 0) {
1038 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1039 goto out4;
1040 }
39037602
A
1041 /*
1042 * there is no cleanup code here so I have made it void
91447636
A
1043 * we need to revisit this
1044 */
2d21ac55 1045 (void)VFS_START(mp, 0, ctx);
1c79356b 1046
6d2010ae
A
1047 if (mount_list_add(mp) != 0) {
1048 /*
1049 * The system is shutting down trying to umount
1050 * everything, so fail with a plausible errno.
1051 */
1052 error = EBUSY;
b0d623f7
A
1053 goto out4;
1054 }
6601e61a
A
1055 lck_rw_done(&mp->mnt_rwlock);
1056 is_rwlock_locked = FALSE;
1057
2d21ac55
A
1058 /* Check if this mounted file system supports EAs or named streams. */
1059 /* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */
1060 VFSATTR_INIT(&vfsattr);
1061 VFSATTR_WANTED(&vfsattr, f_capabilities);
1062 if (strncmp(mp->mnt_vfsstat.f_fstypename, "webdav", sizeof("webdav")) != 0 &&
39037602 1063 vfs_getattr(mp, &vfsattr, ctx) == 0 &&
2d21ac55
A
1064 VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) {
1065 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR) &&
1066 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR)) {
1067 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
1068 }
1069#if NAMEDSTREAMS
1070 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS) &&
1071 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS)) {
1072 mp->mnt_kern_flag |= MNTK_NAMED_STREAMS;
1073 }
1074#endif
1075 /* Check if this file system supports path from id lookups. */
1076 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID) &&
1077 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID)) {
1078 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
1079 } else if (mp->mnt_flag & MNT_DOVOLFS) {
1080 /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */
1081 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
1082 }
39037602
A
1083
1084 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_DIR_HARDLINKS) &&
1085 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_DIR_HARDLINKS)) {
1086 mp->mnt_kern_flag |= MNTK_DIR_HARDLINKS;
1087 }
2d21ac55
A
1088 }
1089 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSNATIVEXATTR) {
1090 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
1091 }
1092 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSPREFLIGHT) {
1093 mp->mnt_kern_flag |= MNTK_UNMOUNT_PREFLIGHT;
1094 }
1c79356b 1095 /* increment the operations count */
b0d623f7 1096 OSAddAtomic(1, &vfs_nummntops);
2d21ac55 1097 enablequotas(mp, ctx);
91447636
A
1098
1099 if (device_vnode) {
1100 device_vnode->v_specflags |= SI_MOUNTEDON;
1101
1102 /*
1103 * cache the IO attributes for the underlying physical media...
1104 * an error return indicates the underlying driver doesn't
1105 * support all the queries necessary... however, reasonable
1106 * defaults will have been set, so no reason to bail or care
1107 */
1108 vfs_init_io_attributes(device_vnode, mp);
39037602 1109 }
6601e61a
A
1110
1111 /* Now that mount is setup, notify the listeners */
6d2010ae 1112 vfs_notify_mount(pvp);
3e170ce0
A
1113 IOBSDMountChange(mp, kIOMountChangeMount);
1114
1c79356b 1115 } else {
6d2010ae
A
1116 /* If we fail a fresh mount, there should be no vnodes left hooked into the mountpoint. */
1117 if (mp->mnt_vnodelist.tqh_first != NULL) {
39037602 1118 panic("mount_common(): mount of %s filesystem failed with %d, but vnode list is not empty.",
6d2010ae
A
1119 mp->mnt_vtable->vfc_name, error);
1120 }
1121
2d21ac55 1122 vnode_lock_spin(vp);
1c79356b 1123 CLR(vp->v_flag, VMOUNT);
6601e61a 1124 vnode_unlock(vp);
91447636
A
1125 mount_list_lock();
1126 mp->mnt_vtable->vfc_refcount--;
1127 mount_list_unlock();
55e303ae 1128
91447636 1129 if (device_vnode ) {
91447636 1130 vnode_rele(device_vnode);
b0d623f7 1131 VNOP_CLOSE(device_vnode, ronly ? FREAD : FREAD|FWRITE, ctx);
91447636
A
1132 }
1133 lck_rw_done(&mp->mnt_rwlock);
743b1565 1134 is_rwlock_locked = FALSE;
39037602 1135
6d2010ae
A
1136 /*
1137 * if we get here, we have a mount structure that needs to be freed,
1138 * but since the coveredvp hasn't yet been updated to point at it,
1139 * no need to worry about other threads holding a crossref on this mp
1140 * so it's ok to just free it
1141 */
91447636 1142 mount_lock_destroy(mp);
2d21ac55
A
1143#if CONFIG_MACF
1144 mac_mount_label_destroy(mp);
1145#endif
55e303ae 1146 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1c79356b 1147 }
6d2010ae 1148exit:
91447636 1149 /*
6d2010ae 1150 * drop I/O count on the device vp if there was one
91447636
A
1151 */
1152 if (devpath && devvp)
1153 vnode_put(devvp);
b0d623f7 1154
91447636 1155 return(error);
b0d623f7 1156
6d2010ae 1157/* Error condition exits */
6601e61a 1158out4:
2d21ac55 1159 (void)VFS_UNMOUNT(mp, MNT_FORCE, ctx);
39037602
A
1160
1161 /*
6d2010ae
A
1162 * If the mount has been placed on the covered vp,
1163 * it may have been discovered by now, so we have
1164 * to treat this just like an unmount
1165 */
1166 mount_lock_spin(mp);
1167 mp->mnt_lflag |= MNT_LDEAD;
1168 mount_unlock(mp);
1169
6601e61a 1170 if (device_vnode != NULLVP) {
b0d623f7 1171 vnode_rele(device_vnode);
2d21ac55
A
1172 VNOP_CLOSE(device_vnode, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
1173 ctx);
b0d623f7 1174 did_rele = TRUE;
6601e61a 1175 }
6d2010ae 1176
2d21ac55 1177 vnode_lock_spin(vp);
6d2010ae
A
1178
1179 mp->mnt_crossref++;
6601e61a 1180 vp->v_mountedhere = (mount_t) 0;
6d2010ae 1181
6601e61a 1182 vnode_unlock(vp);
6d2010ae 1183
b0d623f7
A
1184 if (have_usecount) {
1185 vnode_rele(vp);
1186 }
91447636 1187out3:
6d2010ae 1188 if (devpath && ((flags & MNT_UPDATE) == 0) && (!did_rele))
2d21ac55 1189 vnode_rele(devvp);
91447636
A
1190out2:
1191 if (devpath && devvp)
1192 vnode_put(devvp);
1193out1:
743b1565
A
1194 /* Release mnt_rwlock only when it was taken */
1195 if (is_rwlock_locked == TRUE) {
1196 lck_rw_done(&mp->mnt_rwlock);
1197 }
39037602 1198
6601e61a 1199 if (mntalloc) {
6d2010ae
A
1200 if (mp->mnt_crossref)
1201 mount_dropcrossref(mp, vp, 0);
1202 else {
1203 mount_lock_destroy(mp);
2d21ac55 1204#if CONFIG_MACF
6d2010ae 1205 mac_mount_label_destroy(mp);
2d21ac55 1206#endif
6d2010ae
A
1207 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1208 }
b0d623f7 1209 }
b0d623f7 1210 if (vfsp_ref) {
6601e61a
A
1211 mount_list_lock();
1212 vfsp->vfc_refcount--;
1213 mount_list_unlock();
6601e61a 1214 }
91447636
A
1215
1216 return(error);
1c79356b
A
1217}
1218
39037602 1219/*
b7266188
A
1220 * Flush in-core data, check for competing mount attempts,
1221 * and set VMOUNT
1222 */
6d2010ae
A
1223int
1224prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth)
b7266188 1225{
39236c6e
A
1226#if !CONFIG_MACF
1227#pragma unused(cnp,fsname)
1228#endif
b7266188
A
1229 struct vnode_attr va;
1230 int error;
1231
6d2010ae
A
1232 if (!skip_auth) {
1233 /*
1234 * If the user is not root, ensure that they own the directory
1235 * onto which we are attempting to mount.
1236 */
1237 VATTR_INIT(&va);
1238 VATTR_WANTED(&va, va_uid);
1239 if ((error = vnode_getattr(vp, &va, ctx)) ||
1240 (va.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
39037602 1241 (!vfs_context_issuser(ctx)))) {
6d2010ae
A
1242 error = EPERM;
1243 goto out;
1244 }
b7266188
A
1245 }
1246
1247 if ( (error = VNOP_FSYNC(vp, MNT_WAIT, ctx)) )
1248 goto out;
1249
1250 if ( (error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0)) )
1251 goto out;
1252
1253 if (vp->v_type != VDIR) {
1254 error = ENOTDIR;
1255 goto out;
1256 }
1257
1258 if (ISSET(vp->v_flag, VMOUNT) && (vp->v_mountedhere != NULL)) {
1259 error = EBUSY;
1260 goto out;
1261 }
1262
1263#if CONFIG_MACF
1264 error = mac_mount_check_mount(ctx, vp,
1265 cnp, fsname);
1266 if (error != 0)
1267 goto out;
1268#endif
1269
1270 vnode_lock_spin(vp);
1271 SET(vp->v_flag, VMOUNT);
1272 vnode_unlock(vp);
1273
1274out:
1275 return error;
1276}
1277
6d2010ae
A
1278#if CONFIG_IMGSRC_ACCESS
1279
1280#if DEBUG
1281#define IMGSRC_DEBUG(args...) printf(args)
1282#else
1283#define IMGSRC_DEBUG(args...) do { } while(0)
39037602 1284#endif
6d2010ae 1285
b7266188
A
1286static int
1287authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx)
1288{
1289 struct nameidata nd;
6d2010ae 1290 vnode_t vp, realdevvp;
b7266188
A
1291 mode_t accessmode;
1292 int error;
1293
6d2010ae
A
1294 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW, UIO_USERSPACE, devpath, ctx);
1295 if ( (error = namei(&nd)) ) {
1296 IMGSRC_DEBUG("namei() failed with %d\n", error);
b7266188 1297 return error;
6d2010ae 1298 }
b7266188 1299
b7266188 1300 vp = nd.ni_vp;
b7266188 1301
6d2010ae
A
1302 if (!vnode_isblk(vp)) {
1303 IMGSRC_DEBUG("Not block device.\n");
b7266188
A
1304 error = ENOTBLK;
1305 goto out;
1306 }
6d2010ae
A
1307
1308 realdevvp = mp->mnt_devvp;
1309 if (realdevvp == NULLVP) {
1310 IMGSRC_DEBUG("No device backs the mount.\n");
b7266188
A
1311 error = ENXIO;
1312 goto out;
1313 }
6d2010ae
A
1314
1315 error = vnode_getwithref(realdevvp);
1316 if (error != 0) {
1317 IMGSRC_DEBUG("Coudn't get iocount on device.\n");
1318 goto out;
1319 }
1320
1321 if (vnode_specrdev(vp) != vnode_specrdev(realdevvp)) {
1322 IMGSRC_DEBUG("Wrong dev_t.\n");
1323 error = ENXIO;
1324 goto out1;
1325 }
1326
1327 strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
1328
b7266188
A
1329 /*
1330 * If mount by non-root, then verify that user has necessary
1331 * permissions on the device.
1332 */
1333 if (!vfs_context_issuser(ctx)) {
1334 accessmode = KAUTH_VNODE_READ_DATA;
1335 if ((mp->mnt_flag & MNT_RDONLY) == 0)
1336 accessmode |= KAUTH_VNODE_WRITE_DATA;
6d2010ae
A
1337 if ((error = vnode_authorize(vp, NULL, accessmode, ctx)) != 0) {
1338 IMGSRC_DEBUG("Access denied.\n");
1339 goto out1;
1340 }
b7266188
A
1341 }
1342
1343 *devvpp = vp;
6d2010ae
A
1344
1345out1:
1346 vnode_put(realdevvp);
b7266188 1347out:
6d2010ae 1348 nameidone(&nd);
b7266188
A
1349 if (error) {
1350 vnode_put(vp);
1351 }
1352
1353 return error;
1354}
1355
1356/*
1357 * Clear VMOUNT, set v_mountedhere, and mnt_vnodecovered, ref the vnode,
1358 * and call checkdirs()
1359 */
1360static int
1361place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx)
1362{
1363 int error;
1364
1365 mp->mnt_vnodecovered = vp; /* XXX This is normally only set at init-time ... */
1366
1367 vnode_lock_spin(vp);
1368 CLR(vp->v_flag, VMOUNT);
1369 vp->v_mountedhere = mp;
1370 vnode_unlock(vp);
1371
1372 /*
1373 * taking the name_cache_lock exclusively will
1374 * insure that everyone is out of the fast path who
1375 * might be trying to use a now stale copy of
1376 * vp->v_mountedhere->mnt_realrootvp
1377 * bumping mount_generation causes the cached values
1378 * to be invalidated
1379 */
1380 name_cache_lock();
1381 mount_generation++;
1382 name_cache_unlock();
1383
1384 error = vnode_ref(vp);
1385 if (error != 0) {
1386 goto out;
1387 }
1388
1389 error = checkdirs(vp, ctx);
1390 if (error != 0) {
1391 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1392 vnode_rele(vp);
1393 goto out;
1394 }
1395
1396out:
1397 if (error != 0) {
1398 mp->mnt_vnodecovered = NULLVP;
1399 }
1400 return error;
1401}
1402
1403static void
1404undo_place_on_covered_vp(mount_t mp, vnode_t vp)
1405{
1406 vnode_rele(vp);
1407 vnode_lock_spin(vp);
1408 vp->v_mountedhere = (mount_t)NULL;
1409 vnode_unlock(vp);
1410
1411 mp->mnt_vnodecovered = NULLVP;
1412}
1413
1414static int
1415mount_begin_update(mount_t mp, vfs_context_t ctx, int flags)
1416{
1417 int error;
1418
1419 /* unmount in progress return error */
1420 mount_lock_spin(mp);
1421 if (mp->mnt_lflag & MNT_LUNMOUNT) {
1422 mount_unlock(mp);
1423 return EBUSY;
1424 }
1425 mount_unlock(mp);
1426 lck_rw_lock_exclusive(&mp->mnt_rwlock);
1427
1428 /*
1429 * We only allow the filesystem to be reloaded if it
1430 * is currently mounted read-only.
1431 */
1432 if ((flags & MNT_RELOAD) &&
1433 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
1434 error = ENOTSUP;
1435 goto out;
1436 }
1437
1438 /*
1439 * Only root, or the user that did the original mount is
1440 * permitted to update it.
1441 */
1442 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
39037602 1443 (!vfs_context_issuser(ctx))) {
b7266188
A
1444 error = EPERM;
1445 goto out;
1446 }
1447#if CONFIG_MACF
1448 error = mac_mount_check_remount(ctx, mp);
1449 if (error != 0) {
1450 goto out;
1451 }
1452#endif
1453
1454out:
1455 if (error) {
1456 lck_rw_done(&mp->mnt_rwlock);
1457 }
1458
1459 return error;
1460}
1461
39037602 1462static void
b7266188
A
1463mount_end_update(mount_t mp)
1464{
1465 lck_rw_done(&mp->mnt_rwlock);
1466}
1467
1468static int
6d2010ae
A
1469get_imgsrc_rootvnode(uint32_t height, vnode_t *rvpp)
1470{
1471 vnode_t vp;
1472
1473 if (height >= MAX_IMAGEBOOT_NESTING) {
1474 return EINVAL;
1475 }
1476
1477 vp = imgsrc_rootvnodes[height];
1478 if ((vp != NULLVP) && (vnode_get(vp) == 0)) {
1479 *rvpp = vp;
1480 return 0;
1481 } else {
1482 return ENOENT;
1483 }
1484}
1485
1486static int
39037602
A
1487relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp,
1488 const char *fsname, vfs_context_t ctx,
6d2010ae 1489 boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index)
b7266188
A
1490{
1491 int error;
1492 mount_t mp;
1493 boolean_t placed = FALSE;
6d2010ae 1494 vnode_t devvp = NULLVP;
b7266188
A
1495 struct vfstable *vfsp;
1496 user_addr_t devpath;
1497 char *old_mntonname;
6d2010ae
A
1498 vnode_t rvp;
1499 uint32_t height;
1500 uint32_t flags;
b7266188
A
1501
1502 /* If we didn't imageboot, nothing to move */
6d2010ae 1503 if (imgsrc_rootvnodes[0] == NULLVP) {
b7266188
A
1504 return EINVAL;
1505 }
1506
1507 /* Only root can do this */
1508 if (!vfs_context_issuser(ctx)) {
1509 return EPERM;
1510 }
1511
6d2010ae
A
1512 IMGSRC_DEBUG("looking for root vnode.\n");
1513
1514 /*
1515 * Get root vnode of filesystem we're moving.
1516 */
1517 if (by_index) {
1518 if (is64bit) {
1519 struct user64_mnt_imgsrc_args mia64;
1520 error = copyin(fsmountargs, &mia64, sizeof(mia64));
1521 if (error != 0) {
1522 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1523 return error;
1524 }
1525
1526 height = mia64.mi_height;
1527 flags = mia64.mi_flags;
1528 devpath = mia64.mi_devpath;
1529 } else {
1530 struct user32_mnt_imgsrc_args mia32;
1531 error = copyin(fsmountargs, &mia32, sizeof(mia32));
1532 if (error != 0) {
1533 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1534 return error;
1535 }
1536
1537 height = mia32.mi_height;
1538 flags = mia32.mi_flags;
1539 devpath = mia32.mi_devpath;
1540 }
1541 } else {
1542 /*
1543 * For binary compatibility--assumes one level of nesting.
1544 */
1545 if (is64bit) {
1546 if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
1547 return error;
1548 } else {
1549 user32_addr_t tmp;
1550 if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
1551 return error;
1552
1553 /* munge into LP64 addr */
1554 devpath = CAST_USER_ADDR_T(tmp);
1555 }
1556
1557 height = 0;
1558 flags = 0;
1559 }
1560
1561 if (flags != 0) {
1562 IMGSRC_DEBUG("%s: Got nonzero flags.\n", __FUNCTION__);
1563 return EINVAL;
1564 }
1565
1566 error = get_imgsrc_rootvnode(height, &rvp);
b7266188 1567 if (error != 0) {
6d2010ae 1568 IMGSRC_DEBUG("getting root vnode failed with %d\n", error);
b7266188
A
1569 return error;
1570 }
1571
6d2010ae
A
1572 IMGSRC_DEBUG("got root vnode.\n");
1573
b7266188
A
1574 MALLOC(old_mntonname, char*, MAXPATHLEN, M_TEMP, M_WAITOK);
1575
1576 /* Can only move once */
6d2010ae 1577 mp = vnode_mount(rvp);
b7266188 1578 if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
6d2010ae 1579 IMGSRC_DEBUG("Already moved.\n");
b7266188
A
1580 error = EBUSY;
1581 goto out0;
1582 }
1583
6d2010ae
A
1584 IMGSRC_DEBUG("Starting updated.\n");
1585
b7266188
A
1586 /* Get exclusive rwlock on mount, authorize update on mp */
1587 error = mount_begin_update(mp , ctx, 0);
1588 if (error != 0) {
6d2010ae 1589 IMGSRC_DEBUG("Starting updated failed with %d\n", error);
b7266188
A
1590 goto out0;
1591 }
1592
39037602 1593 /*
b7266188
A
1594 * It can only be moved once. Flag is set under the rwlock,
1595 * so we're now safe to proceed.
1596 */
1597 if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
6d2010ae 1598 IMGSRC_DEBUG("Already moved [2]\n");
b7266188
A
1599 goto out1;
1600 }
39037602
A
1601
1602
6d2010ae 1603 IMGSRC_DEBUG("Preparing coveredvp.\n");
b7266188
A
1604
1605 /* Mark covered vnode as mount in progress, authorize placing mount on top */
6d2010ae 1606 error = prepare_coveredvp(vp, ctx, cnp, fsname, FALSE);
b7266188 1607 if (error != 0) {
6d2010ae 1608 IMGSRC_DEBUG("Preparing coveredvp failed with %d.\n", error);
b7266188
A
1609 goto out1;
1610 }
39037602 1611
6d2010ae
A
1612 IMGSRC_DEBUG("Covered vp OK.\n");
1613
b7266188
A
1614 /* Sanity check the name caller has provided */
1615 vfsp = mp->mnt_vtable;
1616 if (strncmp(vfsp->vfc_name, fsname, MFSNAMELEN) != 0) {
6d2010ae 1617 IMGSRC_DEBUG("Wrong fs name.\n");
b7266188
A
1618 error = EINVAL;
1619 goto out2;
1620 }
1621
1622 /* Check the device vnode and update mount-from name, for local filesystems */
1623 if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
6d2010ae 1624 IMGSRC_DEBUG("Local, doing device validation.\n");
b7266188
A
1625
1626 if (devpath != USER_ADDR_NULL) {
1627 error = authorize_devpath_and_update_mntfromname(mp, devpath, &devvp, ctx);
1628 if (error) {
6d2010ae 1629 IMGSRC_DEBUG("authorize_devpath_and_update_mntfromname() failed.\n");
b7266188
A
1630 goto out2;
1631 }
1632
1633 vnode_put(devvp);
1634 }
1635 }
1636
39037602 1637 /*
b7266188 1638 * Place mp on top of vnode, ref the vnode, call checkdirs(),
39037602 1639 * and increment the name cache's mount generation
b7266188 1640 */
6d2010ae
A
1641
1642 IMGSRC_DEBUG("About to call place_mount_and_checkdirs().\n");
b7266188
A
1643 error = place_mount_and_checkdirs(mp, vp, ctx);
1644 if (error != 0) {
1645 goto out2;
1646 }
1647
1648 placed = TRUE;
1649
3e170ce0
A
1650 strlcpy(old_mntonname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN);
1651 strlcpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
b7266188
A
1652
1653 /* Forbid future moves */
1654 mount_lock(mp);
1655 mp->mnt_kern_flag |= MNTK_HAS_MOVED;
1656 mount_unlock(mp);
1657
1658 /* Finally, add to mount list, completely ready to go */
6d2010ae
A
1659 if (mount_list_add(mp) != 0) {
1660 /*
1661 * The system is shutting down trying to umount
1662 * everything, so fail with a plausible errno.
1663 */
1664 error = EBUSY;
b7266188
A
1665 goto out3;
1666 }
1667
1668 mount_end_update(mp);
6d2010ae 1669 vnode_put(rvp);
b7266188
A
1670 FREE(old_mntonname, M_TEMP);
1671
6d2010ae
A
1672 vfs_notify_mount(pvp);
1673
b7266188
A
1674 return 0;
1675out3:
3e170ce0 1676 strlcpy(mp->mnt_vfsstat.f_mntonname, old_mntonname, MAXPATHLEN);
b7266188
A
1677
1678 mount_lock(mp);
1679 mp->mnt_kern_flag &= ~(MNTK_HAS_MOVED);
1680 mount_unlock(mp);
1681
1682out2:
39037602 1683 /*
b7266188 1684 * Placing the mp on the vnode clears VMOUNT,
39037602 1685 * so cleanup is different after that point
b7266188
A
1686 */
1687 if (placed) {
1688 /* Rele the vp, clear VMOUNT and v_mountedhere */
1689 undo_place_on_covered_vp(mp, vp);
1690 } else {
1691 vnode_lock_spin(vp);
1692 CLR(vp->v_flag, VMOUNT);
1693 vnode_unlock(vp);
1694 }
1695out1:
1696 mount_end_update(mp);
1697
1698out0:
6d2010ae 1699 vnode_put(rvp);
b7266188
A
1700 FREE(old_mntonname, M_TEMP);
1701 return error;
1702}
1703
1704#endif /* CONFIG_IMGSRC_ACCESS */
1705
91447636 1706void
2d21ac55 1707enablequotas(struct mount *mp, vfs_context_t ctx)
9bccf70c 1708{
9bccf70c
A
1709 struct nameidata qnd;
1710 int type;
1711 char qfpath[MAXPATHLEN];
91447636
A
1712 const char *qfname = QUOTAFILENAME;
1713 const char *qfopsname = QUOTAOPSNAME;
1714 const char *qfextension[] = INITQFNAMES;
9bccf70c 1715
2d21ac55 1716 /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
b0d623f7
A
1717 if (strncmp(mp->mnt_vfsstat.f_fstypename, "hfs", sizeof("hfs")) != 0 ) {
1718 return;
1719 }
39037602 1720 /*
9bccf70c
A
1721 * Enable filesystem disk quotas if necessary.
1722 * We ignore errors as this should not interfere with final mount
1723 */
1724 for (type=0; type < MAXQUOTAS; type++) {
2d21ac55 1725 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfopsname, qfextension[type]);
6d2010ae
A
1726 NDINIT(&qnd, LOOKUP, OP_MOUNT, FOLLOW, UIO_SYSSPACE,
1727 CAST_USER_ADDR_T(qfpath), ctx);
91447636
A
1728 if (namei(&qnd) != 0)
1729 continue; /* option file to trigger quotas is not present */
1730 vnode_put(qnd.ni_vp);
1731 nameidone(&qnd);
2d21ac55 1732 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfname, qfextension[type]);
91447636 1733
2d21ac55 1734 (void) VFS_QUOTACTL(mp, QCMD(Q_QUOTAON, type), 0, qfpath, ctx);
9bccf70c
A
1735 }
1736 return;
1737}
1738
2d21ac55
A
1739
1740static int
39037602 1741checkdirs_callback(proc_t p, void * arg)
2d21ac55
A
1742{
1743 struct cdirargs * cdrp = (struct cdirargs * )arg;
1744 vnode_t olddp = cdrp->olddp;
1745 vnode_t newdp = cdrp->newdp;
1746 struct filedesc *fdp;
1747 vnode_t tvp;
1748 vnode_t fdp_cvp;
1749 vnode_t fdp_rvp;
1750 int cdir_changed = 0;
1751 int rdir_changed = 0;
1752
1753 /*
1754 * XXX Also needs to iterate each thread in the process to see if it
1755 * XXX is using a per-thread current working directory, and, if so,
1756 * XXX update that as well.
1757 */
1758
1759 proc_fdlock(p);
1760 fdp = p->p_fd;
1761 if (fdp == (struct filedesc *)0) {
1762 proc_fdunlock(p);
1763 return(PROC_RETURNED);
1764 }
1765 fdp_cvp = fdp->fd_cdir;
1766 fdp_rvp = fdp->fd_rdir;
1767 proc_fdunlock(p);
1768
1769 if (fdp_cvp == olddp) {
1770 vnode_ref(newdp);
1771 tvp = fdp->fd_cdir;
1772 fdp_cvp = newdp;
1773 cdir_changed = 1;
1774 vnode_rele(tvp);
1775 }
1776 if (fdp_rvp == olddp) {
1777 vnode_ref(newdp);
1778 tvp = fdp->fd_rdir;
1779 fdp_rvp = newdp;
1780 rdir_changed = 1;
1781 vnode_rele(tvp);
1782 }
1783 if (cdir_changed || rdir_changed) {
1784 proc_fdlock(p);
1785 fdp->fd_cdir = fdp_cvp;
1786 fdp->fd_rdir = fdp_rvp;
1787 proc_fdunlock(p);
1788 }
1789 return(PROC_RETURNED);
1790}
1791
1792
1793
1c79356b
A
1794/*
1795 * Scan all active processes to see if any of them have a current
1796 * or root directory onto which the new filesystem has just been
1797 * mounted. If so, replace them with the new mount point.
1798 */
6601e61a 1799static int
2d21ac55 1800checkdirs(vnode_t olddp, vfs_context_t ctx)
1c79356b 1801{
2d21ac55
A
1802 vnode_t newdp;
1803 vnode_t tvp;
6601e61a 1804 int err;
2d21ac55 1805 struct cdirargs cdr;
1c79356b
A
1806
1807 if (olddp->v_usecount == 1)
6601e61a 1808 return(0);
2d21ac55 1809 err = VFS_ROOT(olddp->v_mountedhere, &newdp, ctx);
2d21ac55
A
1810
1811 if (err != 0) {
6601e61a 1812#if DIAGNOSTIC
2d21ac55 1813 panic("mount: lost mount: error %d", err);
6601e61a
A
1814#endif
1815 return(err);
1816 }
91447636 1817
2d21ac55
A
1818 cdr.olddp = olddp;
1819 cdr.newdp = newdp;
1820 /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
1821 proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS, checkdirs_callback, (void *)&cdr, NULL, NULL);
91447636 1822
1c79356b 1823 if (rootvnode == olddp) {
91447636 1824 vnode_ref(newdp);
fa4905b1 1825 tvp = rootvnode;
1c79356b 1826 rootvnode = newdp;
91447636 1827 vnode_rele(tvp);
1c79356b 1828 }
91447636
A
1829
1830 vnode_put(newdp);
6601e61a 1831 return(0);
1c79356b
A
1832}
1833
1834/*
1835 * Unmount a file system.
1836 *
1837 * Note: unmount takes a path to the vnode mounted on as argument,
1838 * not special file (as before).
1839 */
1c79356b
A
1840/* ARGSUSED */
1841int
b0d623f7 1842unmount(__unused proc_t p, struct unmount_args *uap, __unused int32_t *retval)
1c79356b 1843{
2d21ac55 1844 vnode_t vp;
1c79356b
A
1845 struct mount *mp;
1846 int error;
1847 struct nameidata nd;
2d21ac55 1848 vfs_context_t ctx = vfs_context_current();
91447636 1849
39037602 1850 NDINIT(&nd, LOOKUP, OP_UNMOUNT, FOLLOW | AUDITVNPATH1,
2d21ac55 1851 UIO_USERSPACE, uap->path, ctx);
55e303ae
A
1852 error = namei(&nd);
1853 if (error)
1c79356b
A
1854 return (error);
1855 vp = nd.ni_vp;
1856 mp = vp->v_mount;
91447636 1857 nameidone(&nd);
1c79356b 1858
2d21ac55
A
1859#if CONFIG_MACF
1860 error = mac_mount_check_umount(ctx, mp);
1861 if (error != 0) {
1862 vnode_put(vp);
1863 return (error);
1864 }
1865#endif
55e303ae
A
1866 /*
1867 * Must be the root of the filesystem
1868 */
1869 if ((vp->v_flag & VROOT) == 0) {
91447636 1870 vnode_put(vp);
55e303ae
A
1871 return (EINVAL);
1872 }
6601e61a 1873 mount_ref(mp, 0);
91447636 1874 vnode_put(vp);
6601e61a 1875 /* safedounmount consumes the mount ref */
2d21ac55
A
1876 return (safedounmount(mp, uap->flags, ctx));
1877}
1878
1879int
39037602 1880vfs_unmountbyfsid(fsid_t *fsid, int flags, vfs_context_t ctx)
2d21ac55
A
1881{
1882 mount_t mp;
1883
1884 mp = mount_list_lookupby_fsid(fsid, 0, 1);
1885 if (mp == (mount_t)0) {
1886 return(ENOENT);
1887 }
1888 mount_ref(mp, 0);
1889 mount_iterdrop(mp);
1890 /* safedounmount consumes the mount ref */
1891 return(safedounmount(mp, flags, ctx));
55e303ae
A
1892}
1893
2d21ac55 1894
55e303ae 1895/*
6601e61a 1896 * The mount struct comes with a mount ref which will be consumed.
55e303ae
A
1897 * Do the actual file system unmount, prevent some common foot shooting.
1898 */
1899int
2d21ac55 1900safedounmount(struct mount *mp, int flags, vfs_context_t ctx)
55e303ae
A
1901{
1902 int error;
2d21ac55 1903 proc_t p = vfs_context_proc(ctx);
55e303ae 1904
316670eb
A
1905 /*
1906 * If the file system is not responding and MNT_NOBLOCK
1907 * is set and not a forced unmount then return EBUSY.
1908 */
1909 if ((mp->mnt_kern_flag & MNT_LNOTRESP) &&
1910 (flags & MNT_NOBLOCK) && ((flags & MNT_FORCE) == 0)) {
1911 error = EBUSY;
1912 goto out;
1913 }
1914
1c79356b 1915 /*
39037602 1916 * Skip authorization if the mount is tagged as permissive and
6d2010ae 1917 * this is not a forced-unmount attempt.
1c79356b 1918 */
6d2010ae
A
1919 if (!(((mp->mnt_kern_flag & MNTK_PERMIT_UNMOUNT) != 0) && ((flags & MNT_FORCE) == 0))) {
1920 /*
1921 * Only root, or the user that did the original mount is
1922 * permitted to unmount this filesystem.
1923 */
1924 if ((mp->mnt_vfsstat.f_owner != kauth_cred_getuid(kauth_cred_get())) &&
1925 (error = suser(kauth_cred_get(), &p->p_acflag)))
1926 goto out;
1927 }
1c79356b
A
1928 /*
1929 * Don't allow unmounting the root file system.
1930 */
6601e61a 1931 if (mp->mnt_flag & MNT_ROOTFS) {
2d21ac55 1932 error = EBUSY; /* the root is always busy */
6601e61a
A
1933 goto out;
1934 }
1c79356b 1935
b7266188
A
1936#ifdef CONFIG_IMGSRC_ACCESS
1937 if (mp->mnt_kern_flag & MNTK_BACKS_ROOT) {
1938 error = EBUSY;
1939 goto out;
1940 }
1941#endif /* CONFIG_IMGSRC_ACCESS */
1942
2d21ac55
A
1943 return (dounmount(mp, flags, 1, ctx));
1944
6601e61a
A
1945out:
1946 mount_drop(mp, 0);
1947 return(error);
1c79356b
A
1948}
1949
1950/*
1951 * Do the actual file system unmount.
1952 */
1953int
2d21ac55 1954dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx)
1c79356b 1955{
2d21ac55 1956 vnode_t coveredvp = (vnode_t)0;
1c79356b 1957 int error;
91447636 1958 int needwakeup = 0;
91447636
A
1959 int forcedunmount = 0;
1960 int lflags = 0;
593a1d5f 1961 struct vnode *devvp = NULLVP;
6d2010ae 1962#if CONFIG_TRIGGERS
39236c6e 1963 proc_t p = vfs_context_proc(ctx);
6d2010ae 1964 int did_vflush = 0;
39236c6e 1965 int pflags_save = 0;
6d2010ae 1966#endif /* CONFIG_TRIGGERS */
91447636 1967
813fb2f6
A
1968#if CONFIG_FSE
1969 if (!(flags & MNT_FORCE)) {
1970 fsevent_unmount(mp, ctx); /* has to come first! */
1971 }
1972#endif
1973
91447636 1974 mount_lock(mp);
fe8ab488
A
1975
1976 /*
1977 * If already an unmount in progress just return EBUSY.
1978 * Even a forced unmount cannot override.
1979 */
91447636 1980 if (mp->mnt_lflag & MNT_LUNMOUNT) {
fe8ab488 1981 if (withref != 0)
6601e61a 1982 mount_drop(mp, 1);
fe8ab488 1983 mount_unlock(mp);
9bccf70c
A
1984 return (EBUSY);
1985 }
39236c6e 1986
fe8ab488
A
1987 if (flags & MNT_FORCE) {
1988 forcedunmount = 1;
1989 mp->mnt_lflag |= MNT_LFORCE;
1990 }
1991
39236c6e
A
1992#if CONFIG_TRIGGERS
1993 if (flags & MNT_NOBLOCK && p != kernproc)
1994 pflags_save = OSBitOrAtomic(P_NOREMOTEHANG, &p->p_flag);
1995#endif
1996
1c79356b 1997 mp->mnt_kern_flag |= MNTK_UNMOUNT;
91447636
A
1998 mp->mnt_lflag |= MNT_LUNMOUNT;
1999 mp->mnt_flag &=~ MNT_ASYNC;
2d21ac55
A
2000 /*
2001 * anyone currently in the fast path that
2002 * trips over the cached rootvp will be
2003 * dumped out and forced into the slow path
2004 * to regenerate a new cached value
2005 */
2006 mp->mnt_realrootvp = NULLVP;
91447636 2007 mount_unlock(mp);
39037602 2008
fe8ab488
A
2009 if (forcedunmount && (flags & MNT_LNOSUB) == 0) {
2010 /*
2011 * Force unmount any mounts in this filesystem.
2012 * If any unmounts fail - just leave them dangling.
2013 * Avoids recursion.
2014 */
2015 (void) dounmount_submounts(mp, flags | MNT_LNOSUB, ctx);
2016 }
2017
2d21ac55
A
2018 /*
2019 * taking the name_cache_lock exclusively will
2020 * insure that everyone is out of the fast path who
2021 * might be trying to use a now stale copy of
2022 * vp->v_mountedhere->mnt_realrootvp
2023 * bumping mount_generation causes the cached values
2024 * to be invalidated
2025 */
2026 name_cache_lock();
2027 mount_generation++;
2028 name_cache_unlock();
2029
2030
91447636 2031 lck_rw_lock_exclusive(&mp->mnt_rwlock);
6601e61a
A
2032 if (withref != 0)
2033 mount_drop(mp, 0);
91447636
A
2034 error = 0;
2035 if (forcedunmount == 0) {
2036 ubc_umount(mp); /* release cached vnodes */
2037 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
2d21ac55 2038 error = VFS_SYNC(mp, MNT_WAIT, ctx);
91447636
A
2039 if (error) {
2040 mount_lock(mp);
2041 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
2042 mp->mnt_lflag &= ~MNT_LUNMOUNT;
2043 mp->mnt_lflag &= ~MNT_LFORCE;
2044 goto out;
2045 }
2046 }
2047 }
6d2010ae 2048
5ba3f43e
A
2049 /* free disk_conditioner_info structure for this mount */
2050 disk_conditioner_unmount(mp);
2051
3e170ce0
A
2052 IOBSDMountChange(mp, kIOMountChangeUnmount);
2053
6d2010ae
A
2054#if CONFIG_TRIGGERS
2055 vfs_nested_trigger_unmounts(mp, flags, ctx);
2056 did_vflush = 1;
39037602 2057#endif
91447636
A
2058 if (forcedunmount)
2059 lflags |= FORCECLOSE;
2060 error = vflush(mp, NULLVP, SKIPSWAP | SKIPSYSTEM | SKIPROOT | lflags);
2061 if ((forcedunmount == 0) && error) {
2062 mount_lock(mp);
9bccf70c 2063 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
91447636
A
2064 mp->mnt_lflag &= ~MNT_LUNMOUNT;
2065 mp->mnt_lflag &= ~MNT_LFORCE;
9bccf70c
A
2066 goto out;
2067 }
91447636
A
2068
2069 /* make sure there are no one in the mount iterations or lookup */
2070 mount_iterdrain(mp);
2071
2d21ac55 2072 error = VFS_UNMOUNT(mp, flags, ctx);
1c79356b 2073 if (error) {
91447636
A
2074 mount_iterreset(mp);
2075 mount_lock(mp);
1c79356b 2076 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
91447636
A
2077 mp->mnt_lflag &= ~MNT_LUNMOUNT;
2078 mp->mnt_lflag &= ~MNT_LFORCE;
1c79356b
A
2079 goto out;
2080 }
2081
2082 /* increment the operations count */
2083 if (!error)
b0d623f7 2084 OSAddAtomic(1, &vfs_nummntops);
91447636
A
2085
2086 if ( mp->mnt_devvp && mp->mnt_vtable->vfc_vfsflags & VFC_VFSLOCALARGS) {
593a1d5f
A
2087 /* hold an io reference and drop the usecount before close */
2088 devvp = mp->mnt_devvp;
593a1d5f
A
2089 vnode_getalways(devvp);
2090 vnode_rele(devvp);
2091 VNOP_CLOSE(devvp, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
2d21ac55 2092 ctx);
b0d623f7 2093 vnode_clearmountedon(devvp);
593a1d5f 2094 vnode_put(devvp);
91447636
A
2095 }
2096 lck_rw_done(&mp->mnt_rwlock);
2097 mount_list_remove(mp);
2098 lck_rw_lock_exclusive(&mp->mnt_rwlock);
6d2010ae 2099
91447636 2100 /* mark the mount point hook in the vp but not drop the ref yet */
1c79356b 2101 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
fe8ab488
A
2102 /*
2103 * The covered vnode needs special handling. Trying to get an
2104 * iocount must not block here as this may lead to deadlocks
2105 * if the Filesystem to which the covered vnode belongs is
2106 * undergoing forced unmounts. Since we hold a usecount, the
2107 * vnode cannot be reused (it can, however, still be terminated)
2108 */
2109 vnode_getalways(coveredvp);
6d2010ae
A
2110 vnode_lock_spin(coveredvp);
2111
2112 mp->mnt_crossref++;
2113 coveredvp->v_mountedhere = (struct mount *)0;
fe8ab488 2114 CLR(coveredvp->v_flag, VMOUNT);
6d2010ae
A
2115
2116 vnode_unlock(coveredvp);
2117 vnode_put(coveredvp);
1c79356b 2118 }
91447636
A
2119
2120 mount_list_lock();
2121 mp->mnt_vtable->vfc_refcount--;
2122 mount_list_unlock();
2123
2124 cache_purgevfs(mp); /* remove cache entries for this file sys */
2125 vfs_event_signal(NULL, VQ_UNMOUNT, (intptr_t)NULL);
2126 mount_lock(mp);
2127 mp->mnt_lflag |= MNT_LDEAD;
2128
2129 if (mp->mnt_lflag & MNT_LWAIT) {
2130 /*
2131 * do the wakeup here
2132 * in case we block in mount_refdrain
2133 * which will drop the mount lock
2134 * and allow anyone blocked in vfs_busy
2135 * to wakeup and see the LDEAD state
2136 */
2137 mp->mnt_lflag &= ~MNT_LWAIT;
2138 wakeup((caddr_t)mp);
1c79356b 2139 }
91447636 2140 mount_refdrain(mp);
1c79356b 2141out:
91447636
A
2142 if (mp->mnt_lflag & MNT_LWAIT) {
2143 mp->mnt_lflag &= ~MNT_LWAIT;
39037602 2144 needwakeup = 1;
91447636 2145 }
6d2010ae 2146
6d2010ae 2147#if CONFIG_TRIGGERS
39236c6e
A
2148 if (flags & MNT_NOBLOCK && p != kernproc) {
2149 // Restore P_NOREMOTEHANG bit to its previous value
2150 if ((pflags_save & P_NOREMOTEHANG) == 0)
2151 OSBitAndAtomic(~((uint32_t) P_NOREMOTEHANG), &p->p_flag);
2152 }
2153
39037602 2154 /*
6d2010ae 2155 * Callback and context are set together under the mount lock, and
39037602 2156 * never cleared, so we're safe to examine them here, drop the lock,
6d2010ae
A
2157 * and call out.
2158 */
2159 if (mp->mnt_triggercallback != NULL) {
2160 mount_unlock(mp);
2161 if (error == 0) {
2162 mp->mnt_triggercallback(mp, VTC_RELEASE, mp->mnt_triggerdata, ctx);
2163 } else if (did_vflush) {
2164 mp->mnt_triggercallback(mp, VTC_REPLACE, mp->mnt_triggerdata, ctx);
2165 }
2166 } else {
2167 mount_unlock(mp);
2168 }
39037602 2169#else
91447636 2170 mount_unlock(mp);
6d2010ae
A
2171#endif /* CONFIG_TRIGGERS */
2172
91447636
A
2173 lck_rw_done(&mp->mnt_rwlock);
2174
2175 if (needwakeup)
1c79356b 2176 wakeup((caddr_t)mp);
6d2010ae 2177
55e303ae 2178 if (!error) {
91447636 2179 if ((coveredvp != NULLVP)) {
fe8ab488 2180 vnode_t pvp = NULLVP;
b0d623f7 2181
fe8ab488
A
2182 /*
2183 * The covered vnode needs special handling. Trying to
2184 * get an iocount must not block here as this may lead
2185 * to deadlocks if the Filesystem to which the covered
2186 * vnode belongs is undergoing forced unmounts. Since we
2187 * hold a usecount, the vnode cannot be reused
2188 * (it can, however, still be terminated).
2189 */
2190 vnode_getalways(coveredvp);
6d2010ae
A
2191
2192 mount_dropcrossref(mp, coveredvp, 0);
fe8ab488
A
2193 /*
2194 * We'll _try_ to detect if this really needs to be
2195 * done. The coveredvp can only be in termination (or
2196 * terminated) if the coveredvp's mount point is in a
2197 * forced unmount (or has been) since we still hold the
2198 * ref.
2199 */
2200 if (!vnode_isrecycled(coveredvp)) {
2201 pvp = vnode_getparent(coveredvp);
6d2010ae 2202#if CONFIG_TRIGGERS
fe8ab488
A
2203 if (coveredvp->v_resolve) {
2204 vnode_trigger_rearm(coveredvp, ctx);
2205 }
2206#endif
2207 }
2208
2209 vnode_rele(coveredvp);
91447636 2210 vnode_put(coveredvp);
fe8ab488 2211 coveredvp = NULLVP;
b0d623f7
A
2212
2213 if (pvp) {
2214 lock_vnode_and_post(pvp, NOTE_WRITE);
2215 vnode_put(pvp);
2216 }
91447636
A
2217 } else if (mp->mnt_flag & MNT_ROOTFS) {
2218 mount_lock_destroy(mp);
2d21ac55
A
2219#if CONFIG_MACF
2220 mac_mount_label_destroy(mp);
2221#endif
91447636
A
2222 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
2223 } else
2224 panic("dounmount: no coveredvp");
55e303ae 2225 }
1c79356b
A
2226 return (error);
2227}
2228
fe8ab488
A
2229/*
2230 * Unmount any mounts in this filesystem.
2231 */
2232void
2233dounmount_submounts(struct mount *mp, int flags, vfs_context_t ctx)
2234{
2235 mount_t smp;
2236 fsid_t *fsids, fsid;
2237 int fsids_sz;
2238 int count = 0, i, m = 0;
2239 vnode_t vp;
2240
2241 mount_list_lock();
2242
2243 // Get an array to hold the submounts fsids.
2244 TAILQ_FOREACH(smp, &mountlist, mnt_list)
2245 count++;
2246 fsids_sz = count * sizeof(fsid_t);
2247 MALLOC(fsids, fsid_t *, fsids_sz, M_TEMP, M_NOWAIT);
2248 if (fsids == NULL) {
2249 mount_list_unlock();
2250 goto out;
2251 }
2252 fsids[0] = mp->mnt_vfsstat.f_fsid; // Prime the pump
2253
2254 /*
2255 * Fill the array with submount fsids.
2256 * Since mounts are always added to the tail of the mount list, the
39037602 2257 * list is always in mount order.
fe8ab488
A
2258 * For each mount check if the mounted-on vnode belongs to a
2259 * mount that's already added to our array of mounts to be unmounted.
2260 */
2261 for (smp = TAILQ_NEXT(mp, mnt_list); smp; smp = TAILQ_NEXT(smp, mnt_list)) {
2262 vp = smp->mnt_vnodecovered;
2263 if (vp == NULL)
2264 continue;
2265 fsid = vnode_mount(vp)->mnt_vfsstat.f_fsid; // Underlying fsid
2266 for (i = 0; i <= m; i++) {
2267 if (fsids[i].val[0] == fsid.val[0] &&
2268 fsids[i].val[1] == fsid.val[1]) {
2269 fsids[++m] = smp->mnt_vfsstat.f_fsid;
2270 break;
2271 }
2272 }
2273 }
2274 mount_list_unlock();
2275
2276 // Unmount the submounts in reverse order. Ignore errors.
2277 for (i = m; i > 0; i--) {
2278 smp = mount_list_lookupby_fsid(&fsids[i], 0, 1);
2279 if (smp) {
2280 mount_ref(smp, 0);
2281 mount_iterdrop(smp);
2282 (void) dounmount(smp, flags, 1, ctx);
2283 }
2284 }
2285out:
2286 if (fsids)
2287 FREE(fsids, M_TEMP);
2288}
2289
91447636
A
2290void
2291mount_dropcrossref(mount_t mp, vnode_t dp, int need_put)
2292{
6d2010ae
A
2293 vnode_lock(dp);
2294 mp->mnt_crossref--;
2295
2296 if (mp->mnt_crossref < 0)
2297 panic("mount cross refs -ve");
2298
2299 if ((mp != dp->v_mountedhere) && (mp->mnt_crossref == 0)) {
39037602 2300
91447636 2301 if (need_put)
6d2010ae 2302 vnode_put_locked(dp);
91447636 2303 vnode_unlock(dp);
6d2010ae
A
2304
2305 mount_lock_destroy(mp);
2306#if CONFIG_MACF
2307 mac_mount_label_destroy(mp);
2308#endif
2309 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
2310 return;
2311 }
2312 if (need_put)
2313 vnode_put_locked(dp);
2314 vnode_unlock(dp);
91447636
A
2315}
2316
2317
1c79356b
A
2318/*
2319 * Sync each mounted filesystem.
2320 */
2321#if DIAGNOSTIC
2322int syncprt = 0;
1c79356b
A
2323#endif
2324
1c79356b 2325int print_vmpage_stat=0;
fe8ab488 2326int sync_timeout = 60; // Sync time limit (sec)
1c79356b 2327
a39ff7e2 2328
39037602 2329static int
fe8ab488 2330sync_callback(mount_t mp, __unused void *arg)
1c79356b 2331{
91447636 2332 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
fe8ab488
A
2333 int asyncflag = mp->mnt_flag & MNT_ASYNC;
2334
2335 mp->mnt_flag &= ~MNT_ASYNC;
2336 VFS_SYNC(mp, arg ? MNT_WAIT : MNT_NOWAIT, vfs_context_kernel());
2337 if (asyncflag)
2338 mp->mnt_flag |= MNT_ASYNC;
1c79356b 2339 }
1c79356b 2340
fe8ab488
A
2341 return (VFS_RETURNED);
2342}
91447636 2343
91447636
A
2344/* ARGSUSED */
2345int
b0d623f7 2346sync(__unused proc_t p, __unused struct sync_args *uap, __unused int32_t *retval)
91447636 2347{
fe8ab488 2348 vfs_iterate(LK_NOWAIT, sync_callback, NULL);
b0d623f7 2349
fe8ab488
A
2350 if (print_vmpage_stat) {
2351 vm_countdirtypages();
2352 }
2353
2354#if DIAGNOSTIC
2355 if (syncprt)
2356 vfs_bufstats();
2357#endif /* DIAGNOSTIC */
2358 return 0;
2359}
2360
2361static void
a39ff7e2 2362hibernate_sync_thread(void *arg, __unused wait_result_t wr)
fe8ab488
A
2363{
2364 int *timeout = (int *) arg;
2365
2366 vfs_iterate(LK_NOWAIT, sync_callback, NULL);
2367
2368 if (timeout)
2369 wakeup((caddr_t) timeout);
2370 if (print_vmpage_stat) {
1c79356b 2371 vm_countdirtypages();
1c79356b 2372 }
39236c6e 2373
1c79356b
A
2374#if DIAGNOSTIC
2375 if (syncprt)
2376 vfs_bufstats();
2377#endif /* DIAGNOSTIC */
1c79356b
A
2378}
2379
2380/*
fe8ab488 2381 * Sync in a separate thread so we can time out if it blocks.
1c79356b 2382 */
fe8ab488 2383static int
a39ff7e2 2384hibernate_sync_async(int timeout)
2d21ac55 2385{
fe8ab488 2386 thread_t thd;
2d21ac55 2387 int error;
fe8ab488
A
2388 struct timespec ts = {timeout, 0};
2389
2390 lck_mtx_lock(sync_mtx_lck);
a39ff7e2
A
2391 if (kernel_thread_start(hibernate_sync_thread, &timeout, &thd) != KERN_SUCCESS) {
2392 printf("hibernate_sync_thread failed\n");
fe8ab488
A
2393 lck_mtx_unlock(sync_mtx_lck);
2394 return (0);
2395 }
2396
a39ff7e2 2397 error = msleep((caddr_t) &timeout, sync_mtx_lck, (PVFS | PDROP | PCATCH), "hibernate_sync_thread", &ts);
fe8ab488
A
2398 if (error) {
2399 printf("sync timed out: %d sec\n", timeout);
2400 }
2401 thread_deallocate(thd);
2402
2403 return (0);
2d21ac55
A
2404}
2405
fe8ab488
A
2406/*
2407 * An in-kernel sync for power management to call.
2408 */
2409__private_extern__ int
2410sync_internal(void)
2411{
a39ff7e2 2412 (void) hibernate_sync_async(sync_timeout);
fe8ab488
A
2413
2414 return 0;
2415} /* end of sync_internal call */
2416
2417/*
2418 * Change filesystem quotas.
2419 */
2420#if QUOTA
2421int
2422quotactl(proc_t p, struct quotactl_args *uap, __unused int32_t *retval)
1c79356b 2423{
2d21ac55 2424 struct mount *mp;
91447636
A
2425 int error, quota_cmd, quota_status;
2426 caddr_t datap;
2427 size_t fnamelen;
1c79356b 2428 struct nameidata nd;
2d21ac55 2429 vfs_context_t ctx = vfs_context_current();
91447636
A
2430 struct dqblk my_dqblk;
2431
b0d623f7 2432 AUDIT_ARG(uid, uap->uid);
55e303ae 2433 AUDIT_ARG(cmd, uap->cmd);
6d2010ae
A
2434 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
2435 uap->path, ctx);
55e303ae
A
2436 error = namei(&nd);
2437 if (error)
1c79356b
A
2438 return (error);
2439 mp = nd.ni_vp->v_mount;
91447636
A
2440 vnode_put(nd.ni_vp);
2441 nameidone(&nd);
2442
2443 /* copyin any data we will need for downstream code */
2444 quota_cmd = uap->cmd >> SUBCMDSHIFT;
2445
2446 switch (quota_cmd) {
2447 case Q_QUOTAON:
2448 /* uap->arg specifies a file from which to take the quotas */
2449 fnamelen = MAXPATHLEN;
2450 datap = kalloc(MAXPATHLEN);
2451 error = copyinstr(uap->arg, datap, MAXPATHLEN, &fnamelen);
2452 break;
2453 case Q_GETQUOTA:
2454 /* uap->arg is a pointer to a dqblk structure. */
2455 datap = (caddr_t) &my_dqblk;
2456 break;
2457 case Q_SETQUOTA:
2458 case Q_SETUSE:
2459 /* uap->arg is a pointer to a dqblk structure. */
2460 datap = (caddr_t) &my_dqblk;
2461 if (proc_is64bit(p)) {
2462 struct user_dqblk my_dqblk64;
2463 error = copyin(uap->arg, (caddr_t)&my_dqblk64, sizeof (my_dqblk64));
2464 if (error == 0) {
2465 munge_dqblk(&my_dqblk, &my_dqblk64, FALSE);
2466 }
2467 }
2468 else {
2469 error = copyin(uap->arg, (caddr_t)&my_dqblk, sizeof (my_dqblk));
2470 }
2471 break;
2472 case Q_QUOTASTAT:
2473 /* uap->arg is a pointer to an integer */
2474 datap = (caddr_t) &quota_status;
2475 break;
2476 default:
2477 datap = NULL;
2478 break;
2479 } /* switch */
2480
2481 if (error == 0) {
2d21ac55 2482 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, datap, ctx);
91447636
A
2483 }
2484
2485 switch (quota_cmd) {
2486 case Q_QUOTAON:
2487 if (datap != NULL)
2488 kfree(datap, MAXPATHLEN);
2489 break;
2490 case Q_GETQUOTA:
2491 /* uap->arg is a pointer to a dqblk structure we need to copy out to */
2492 if (error == 0) {
2493 if (proc_is64bit(p)) {
5ba3f43e
A
2494 struct user_dqblk my_dqblk64;
2495
2496 memset(&my_dqblk64, 0, sizeof(my_dqblk64));
91447636
A
2497 munge_dqblk(&my_dqblk, &my_dqblk64, TRUE);
2498 error = copyout((caddr_t)&my_dqblk64, uap->arg, sizeof (my_dqblk64));
2499 }
2500 else {
2501 error = copyout(datap, uap->arg, sizeof (struct dqblk));
2502 }
2503 }
2504 break;
2505 case Q_QUOTASTAT:
2506 /* uap->arg is a pointer to an integer */
2507 if (error == 0) {
2508 error = copyout(datap, uap->arg, sizeof(quota_status));
2509 }
2510 break;
2511 default:
2512 break;
2513 } /* switch */
2514
2515 return (error);
1c79356b 2516}
2d21ac55
A
2517#else
2518int
b0d623f7 2519quotactl(__unused proc_t p, __unused struct quotactl_args *uap, __unused int32_t *retval)
2d21ac55
A
2520{
2521 return (EOPNOTSUPP);
2522}
2523#endif /* QUOTA */
1c79356b
A
2524
2525/*
2526 * Get filesystem statistics.
2d21ac55
A
2527 *
2528 * Returns: 0 Success
2529 * namei:???
2530 * vfs_update_vfsstat:???
2531 * munge_statfs:EFAULT
1c79356b 2532 */
1c79356b
A
2533/* ARGSUSED */
2534int
b0d623f7 2535statfs(__unused proc_t p, struct statfs_args *uap, __unused int32_t *retval)
1c79356b 2536{
91447636
A
2537 struct mount *mp;
2538 struct vfsstatfs *sp;
1c79356b
A
2539 int error;
2540 struct nameidata nd;
2d21ac55 2541 vfs_context_t ctx = vfs_context_current();
91447636 2542 vnode_t vp;
1c79356b 2543
39037602 2544 NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1,
2d21ac55 2545 UIO_USERSPACE, uap->path, ctx);
55e303ae 2546 error = namei(&nd);
39037602 2547 if (error != 0)
1c79356b 2548 return (error);
91447636
A
2549 vp = nd.ni_vp;
2550 mp = vp->v_mount;
2551 sp = &mp->mnt_vfsstat;
2552 nameidone(&nd);
2553
39037602
A
2554#if CONFIG_MACF
2555 error = mac_mount_check_stat(ctx, mp);
2556 if (error != 0)
2557 return (error);
2558#endif
2559
2d21ac55 2560 error = vfs_update_vfsstat(mp, ctx, VFS_USER_EVENT);
39037602 2561 if (error != 0) {
39236c6e 2562 vnode_put(vp);
1c79356b 2563 return (error);
39236c6e 2564 }
91447636
A
2565
2566 error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
39236c6e 2567 vnode_put(vp);
91447636 2568 return (error);
1c79356b
A
2569}
2570
2571/*
2572 * Get filesystem statistics.
2573 */
1c79356b
A
2574/* ARGSUSED */
2575int
b0d623f7 2576fstatfs(__unused proc_t p, struct fstatfs_args *uap, __unused int32_t *retval)
1c79356b 2577{
2d21ac55 2578 vnode_t vp;
1c79356b 2579 struct mount *mp;
91447636 2580 struct vfsstatfs *sp;
1c79356b
A
2581 int error;
2582
55e303ae
A
2583 AUDIT_ARG(fd, uap->fd);
2584
91447636 2585 if ( (error = file_vnode(uap->fd, &vp)) )
1c79356b 2586 return (error);
55e303ae 2587
d1ecb069
A
2588 error = vnode_getwithref(vp);
2589 if (error) {
2590 file_drop(uap->fd);
2591 return (error);
2592 }
2593
91447636 2594 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
55e303ae 2595
91447636
A
2596 mp = vp->v_mount;
2597 if (!mp) {
d1ecb069
A
2598 error = EBADF;
2599 goto out;
91447636 2600 }
39037602
A
2601
2602#if CONFIG_MACF
2603 error = mac_mount_check_stat(vfs_context_current(), mp);
2604 if (error != 0)
2605 goto out;
2606#endif
2607
91447636 2608 sp = &mp->mnt_vfsstat;
39037602 2609 if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
d1ecb069 2610 goto out;
91447636 2611 }
91447636
A
2612
2613 error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
2614
d1ecb069
A
2615out:
2616 file_drop(uap->fd);
2617 vnode_put(vp);
2618
91447636 2619 return (error);
1c79356b
A
2620}
2621
39037602
A
2622/*
2623 * Common routine to handle copying of statfs64 data to user space
2d21ac55 2624 */
39037602 2625static int
2d21ac55
A
2626statfs64_common(struct mount *mp, struct vfsstatfs *sfsp, user_addr_t bufp)
2627{
2628 int error;
2629 struct statfs64 sfs;
39037602 2630
2d21ac55
A
2631 bzero(&sfs, sizeof(sfs));
2632
2633 sfs.f_bsize = sfsp->f_bsize;
2634 sfs.f_iosize = (int32_t)sfsp->f_iosize;
2635 sfs.f_blocks = sfsp->f_blocks;
2636 sfs.f_bfree = sfsp->f_bfree;
2637 sfs.f_bavail = sfsp->f_bavail;
2638 sfs.f_files = sfsp->f_files;
2639 sfs.f_ffree = sfsp->f_ffree;
2640 sfs.f_fsid = sfsp->f_fsid;
2641 sfs.f_owner = sfsp->f_owner;
2642 sfs.f_type = mp->mnt_vtable->vfc_typenum;
2643 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
2644 sfs.f_fssubtype = sfsp->f_fssubtype;
6d2010ae
A
2645 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
2646 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN);
2647 } else {
2648 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSTYPENAMELEN);
2649 }
2d21ac55
A
2650 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MAXPATHLEN);
2651 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MAXPATHLEN);
2652
2653 error = copyout((caddr_t)&sfs, bufp, sizeof(sfs));
2654
2655 return(error);
2656}
2657
39037602
A
2658/*
2659 * Get file system statistics in 64-bit mode
2d21ac55
A
2660 */
2661int
b0d623f7 2662statfs64(__unused struct proc *p, struct statfs64_args *uap, __unused int32_t *retval)
2d21ac55
A
2663{
2664 struct mount *mp;
2665 struct vfsstatfs *sp;
2666 int error;
2667 struct nameidata nd;
2668 vfs_context_t ctxp = vfs_context_current();
2669 vnode_t vp;
2670
39037602 2671 NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1,
2d21ac55
A
2672 UIO_USERSPACE, uap->path, ctxp);
2673 error = namei(&nd);
39037602 2674 if (error != 0)
2d21ac55
A
2675 return (error);
2676 vp = nd.ni_vp;
2677 mp = vp->v_mount;
2678 sp = &mp->mnt_vfsstat;
2679 nameidone(&nd);
2680
39037602
A
2681#if CONFIG_MACF
2682 error = mac_mount_check_stat(ctxp, mp);
2683 if (error != 0)
2684 return (error);
2685#endif
2686
2d21ac55 2687 error = vfs_update_vfsstat(mp, ctxp, VFS_USER_EVENT);
39037602 2688 if (error != 0) {
39236c6e 2689 vnode_put(vp);
2d21ac55 2690 return (error);
39236c6e 2691 }
2d21ac55
A
2692
2693 error = statfs64_common(mp, sp, uap->buf);
39236c6e 2694 vnode_put(vp);
2d21ac55
A
2695
2696 return (error);
2697}
2698
39037602
A
2699/*
2700 * Get file system statistics in 64-bit mode
2d21ac55
A
2701 */
2702int
b0d623f7 2703fstatfs64(__unused struct proc *p, struct fstatfs64_args *uap, __unused int32_t *retval)
2d21ac55
A
2704{
2705 struct vnode *vp;
2706 struct mount *mp;
2707 struct vfsstatfs *sp;
2708 int error;
2709
2710 AUDIT_ARG(fd, uap->fd);
2711
2712 if ( (error = file_vnode(uap->fd, &vp)) )
2713 return (error);
2714
d1ecb069
A
2715 error = vnode_getwithref(vp);
2716 if (error) {
2717 file_drop(uap->fd);
2718 return (error);
2719 }
2720
2d21ac55
A
2721 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2722
2723 mp = vp->v_mount;
2724 if (!mp) {
316670eb 2725 error = EBADF;
d1ecb069 2726 goto out;
2d21ac55 2727 }
39037602
A
2728
2729#if CONFIG_MACF
2730 error = mac_mount_check_stat(vfs_context_current(), mp);
2731 if (error != 0)
2732 goto out;
2733#endif
2734
2d21ac55
A
2735 sp = &mp->mnt_vfsstat;
2736 if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
d1ecb069 2737 goto out;
2d21ac55 2738 }
2d21ac55
A
2739
2740 error = statfs64_common(mp, sp, uap->buf);
2741
d1ecb069
A
2742out:
2743 file_drop(uap->fd);
2744 vnode_put(vp);
2745
2d21ac55
A
2746 return (error);
2747}
91447636
A
2748
2749struct getfsstat_struct {
2750 user_addr_t sfsp;
2d21ac55 2751 user_addr_t *mp;
91447636
A
2752 int count;
2753 int maxcount;
2754 int flags;
2755 int error;
1c79356b 2756};
1c79356b 2757
91447636
A
2758
2759static int
2760getfsstat_callback(mount_t mp, void * arg)
2761{
39037602 2762
91447636
A
2763 struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
2764 struct vfsstatfs *sp;
91447636 2765 int error, my_size;
2d21ac55 2766 vfs_context_t ctx = vfs_context_current();
91447636
A
2767
2768 if (fstp->sfsp && fstp->count < fstp->maxcount) {
39037602
A
2769#if CONFIG_MACF
2770 error = mac_mount_check_stat(ctx, mp);
2771 if (error != 0) {
2772 fstp->error = error;
2773 return(VFS_RETURNED_DONE);
2774 }
2775#endif
91447636
A
2776 sp = &mp->mnt_vfsstat;
2777 /*
2778 * If MNT_NOWAIT is specified, do not refresh the
b0d623f7 2779 * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT.
91447636 2780 */
b0d623f7 2781 if (((fstp->flags & MNT_NOWAIT) == 0 || (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
2d21ac55
A
2782 (error = vfs_update_vfsstat(mp, ctx,
2783 VFS_USER_EVENT))) {
91447636
A
2784 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
2785 return(VFS_RETURNED);
1c79356b 2786 }
91447636
A
2787
2788 /*
2789 * Need to handle LP64 version of struct statfs
2790 */
2d21ac55 2791 error = munge_statfs(mp, sp, fstp->sfsp, &my_size, IS_64BIT_PROCESS(vfs_context_proc(ctx)), FALSE);
91447636
A
2792 if (error) {
2793 fstp->error = error;
2794 return(VFS_RETURNED_DONE);
1c79356b 2795 }
91447636 2796 fstp->sfsp += my_size;
2d21ac55
A
2797
2798 if (fstp->mp) {
39236c6e 2799#if CONFIG_MACF
2d21ac55
A
2800 error = mac_mount_label_get(mp, *fstp->mp);
2801 if (error) {
2802 fstp->error = error;
2803 return(VFS_RETURNED_DONE);
2804 }
39236c6e 2805#endif
2d21ac55
A
2806 fstp->mp++;
2807 }
2808 }
91447636
A
2809 fstp->count++;
2810 return(VFS_RETURNED);
2811}
2812
2813/*
2814 * Get statistics on all filesystems.
2815 */
2816int
2817getfsstat(__unused proc_t p, struct getfsstat_args *uap, int *retval)
2d21ac55
A
2818{
2819 struct __mac_getfsstat_args muap;
2820
2821 muap.buf = uap->buf;
2822 muap.bufsize = uap->bufsize;
2823 muap.mac = USER_ADDR_NULL;
2824 muap.macsize = 0;
2825 muap.flags = uap->flags;
2826
2827 return (__mac_getfsstat(p, &muap, retval));
2828}
2829
b0d623f7
A
2830/*
2831 * __mac_getfsstat: Get MAC-related file system statistics
2832 *
2833 * Parameters: p (ignored)
2834 * uap User argument descriptor (see below)
39037602 2835 * retval Count of file system statistics (N stats)
b0d623f7
A
2836 *
2837 * Indirect: uap->bufsize Buffer size
2838 * uap->macsize MAC info size
2839 * uap->buf Buffer where information will be returned
2840 * uap->mac MAC info
2841 * uap->flags File system flags
39037602 2842 *
b0d623f7
A
2843 *
2844 * Returns: 0 Success
2845 * !0 Not success
2846 *
2847 */
2d21ac55
A
2848int
2849__mac_getfsstat(__unused proc_t p, struct __mac_getfsstat_args *uap, int *retval)
91447636
A
2850{
2851 user_addr_t sfsp;
2d21ac55 2852 user_addr_t *mp;
b0d623f7 2853 size_t count, maxcount, bufsize, macsize;
91447636
A
2854 struct getfsstat_struct fst;
2855
b0d623f7
A
2856 bufsize = (size_t) uap->bufsize;
2857 macsize = (size_t) uap->macsize;
2858
91447636 2859 if (IS_64BIT_PROCESS(p)) {
b0d623f7 2860 maxcount = bufsize / sizeof(struct user64_statfs);
91447636
A
2861 }
2862 else {
b0d623f7 2863 maxcount = bufsize / sizeof(struct user32_statfs);
91447636
A
2864 }
2865 sfsp = uap->buf;
2866 count = 0;
2867
2d21ac55
A
2868 mp = NULL;
2869
2870#if CONFIG_MACF
2871 if (uap->mac != USER_ADDR_NULL) {
2872 u_int32_t *mp0;
2873 int error;
b0d623f7 2874 unsigned int i;
2d21ac55 2875
b0d623f7 2876 count = (macsize / (IS_64BIT_PROCESS(p) ? 8 : 4));
2d21ac55
A
2877 if (count != maxcount)
2878 return (EINVAL);
2879
2880 /* Copy in the array */
b0d623f7
A
2881 MALLOC(mp0, u_int32_t *, macsize, M_MACTEMP, M_WAITOK);
2882 if (mp0 == NULL) {
2883 return (ENOMEM);
2884 }
2885
2886 error = copyin(uap->mac, mp0, macsize);
2887 if (error) {
2888 FREE(mp0, M_MACTEMP);
2d21ac55 2889 return (error);
b0d623f7 2890 }
2d21ac55
A
2891
2892 /* Normalize to an array of user_addr_t */
2893 MALLOC(mp, user_addr_t *, count * sizeof(user_addr_t), M_MACTEMP, M_WAITOK);
b0d623f7
A
2894 if (mp == NULL) {
2895 FREE(mp0, M_MACTEMP);
2896 return (ENOMEM);
2897 }
2898
2d21ac55
A
2899 for (i = 0; i < count; i++) {
2900 if (IS_64BIT_PROCESS(p))
2901 mp[i] = ((user_addr_t *)mp0)[i];
2902 else
2903 mp[i] = (user_addr_t)mp0[i];
2904 }
2905 FREE(mp0, M_MACTEMP);
2906 }
2907#endif
2908
2909
91447636 2910 fst.sfsp = sfsp;
2d21ac55 2911 fst.mp = mp;
91447636
A
2912 fst.flags = uap->flags;
2913 fst.count = 0;
2914 fst.error = 0;
2915 fst.maxcount = maxcount;
2916
39037602 2917
91447636
A
2918 vfs_iterate(0, getfsstat_callback, &fst);
2919
2d21ac55
A
2920 if (mp)
2921 FREE(mp, M_MACTEMP);
2922
91447636
A
2923 if (fst.error ) {
2924 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
2925 return(fst.error);
2926 }
2927
2928 if (fst.sfsp && fst.count > fst.maxcount)
2929 *retval = fst.maxcount;
1c79356b 2930 else
91447636 2931 *retval = fst.count;
1c79356b
A
2932 return (0);
2933}
2934
2d21ac55
A
2935static int
2936getfsstat64_callback(mount_t mp, void * arg)
2937{
2938 struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
2939 struct vfsstatfs *sp;
2940 int error;
2941
2942 if (fstp->sfsp && fstp->count < fstp->maxcount) {
39037602
A
2943#if CONFIG_MACF
2944 error = mac_mount_check_stat(vfs_context_current(), mp);
2945 if (error != 0) {
2946 fstp->error = error;
2947 return(VFS_RETURNED_DONE);
2948 }
2949#endif
2d21ac55
A
2950 sp = &mp->mnt_vfsstat;
2951 /*
b0d623f7
A
2952 * If MNT_NOWAIT is specified, do not refresh the fsstat
2953 * cache. MNT_WAIT overrides MNT_NOWAIT.
2954 *
2955 * We treat MNT_DWAIT as MNT_WAIT for all instances of
2956 * getfsstat, since the constants are out of the same
2957 * namespace.
2d21ac55 2958 */
b0d623f7
A
2959 if (((fstp->flags & MNT_NOWAIT) == 0 ||
2960 (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
2d21ac55
A
2961 (error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT))) {
2962 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
2963 return(VFS_RETURNED);
2964 }
2965
2966 error = statfs64_common(mp, sp, fstp->sfsp);
2967 if (error) {
2968 fstp->error = error;
2969 return(VFS_RETURNED_DONE);
2970 }
2971 fstp->sfsp += sizeof(struct statfs64);
2972 }
2973 fstp->count++;
2974 return(VFS_RETURNED);
2975}
2976
2977/*
2978 * Get statistics on all file systems in 64 bit mode.
2979 */
2980int
2981getfsstat64(__unused proc_t p, struct getfsstat64_args *uap, int *retval)
2982{
2983 user_addr_t sfsp;
2984 int count, maxcount;
2985 struct getfsstat_struct fst;
2986
2987 maxcount = uap->bufsize / sizeof(struct statfs64);
2988
2989 sfsp = uap->buf;
2990 count = 0;
2991
2992 fst.sfsp = sfsp;
2993 fst.flags = uap->flags;
2994 fst.count = 0;
2995 fst.error = 0;
2996 fst.maxcount = maxcount;
2997
2998 vfs_iterate(0, getfsstat64_callback, &fst);
2999
3000 if (fst.error ) {
3001 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
3002 return(fst.error);
3003 }
3004
3005 if (fst.sfsp && fst.count > fst.maxcount)
3006 *retval = fst.maxcount;
3007 else
3008 *retval = fst.count;
3009
3010 return (0);
3011}
3012
fe8ab488
A
3013/*
3014 * gets the associated vnode with the file descriptor passed.
3015 * as input
3016 *
3017 * INPUT
3018 * ctx - vfs context of caller
3019 * fd - file descriptor for which vnode is required.
3020 * vpp - Pointer to pointer to vnode to be returned.
3021 *
3022 * The vnode is returned with an iocount so any vnode obtained
3023 * by this call needs a vnode_put
3024 *
3025 */
39037602 3026int
fe8ab488
A
3027vnode_getfromfd(vfs_context_t ctx, int fd, vnode_t *vpp)
3028{
3029 int error;
3030 vnode_t vp;
3031 struct fileproc *fp;
3032 proc_t p = vfs_context_proc(ctx);
3033
3034 *vpp = NULLVP;
3035
3036 error = fp_getfvp(p, fd, &fp, &vp);
3037 if (error)
3038 return (error);
3039
3040 error = vnode_getwithref(vp);
3041 if (error) {
3042 (void)fp_drop(p, fd, fp, 0);
3043 return (error);
3044 }
3045
3046 (void)fp_drop(p, fd, fp, 0);
3047 *vpp = vp;
3048 return (error);
3049}
3050
3051/*
3052 * Wrapper function around namei to start lookup from a directory
3053 * specified by a file descriptor ni_dirfd.
3054 *
3055 * In addition to all the errors returned by namei, this call can
3056 * return ENOTDIR if the file descriptor does not refer to a directory.
3057 * and EBADF if the file descriptor is not valid.
3058 */
3059int
3060nameiat(struct nameidata *ndp, int dirfd)
3061{
3062 if ((dirfd != AT_FDCWD) &&
3063 !(ndp->ni_flag & NAMEI_CONTLOOKUP) &&
3064 !(ndp->ni_cnd.cn_flags & USEDVP)) {
3065 int error = 0;
3066 char c;
3067
3068 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
3069 error = copyin(ndp->ni_dirp, &c, sizeof(char));
3070 if (error)
3071 return (error);
3072 } else {
3073 c = *((char *)(ndp->ni_dirp));
3074 }
3075
3076 if (c != '/') {
3077 vnode_t dvp_at;
3078
3079 error = vnode_getfromfd(ndp->ni_cnd.cn_context, dirfd,
3080 &dvp_at);
3081 if (error)
3082 return (error);
3083
3084 if (vnode_vtype(dvp_at) != VDIR) {
3085 vnode_put(dvp_at);
3086 return (ENOTDIR);
3087 }
3088
3089 ndp->ni_dvp = dvp_at;
3090 ndp->ni_cnd.cn_flags |= USEDVP;
3091 error = namei(ndp);
3092 ndp->ni_cnd.cn_flags &= ~USEDVP;
3093 vnode_put(dvp_at);
3094 return (error);
3095 }
3096 }
3097
3098 return (namei(ndp));
3099}
3100
1c79356b
A
3101/*
3102 * Change current working directory to a given file descriptor.
3103 */
1c79356b 3104/* ARGSUSED */
2d21ac55
A
3105static int
3106common_fchdir(proc_t p, struct fchdir_args *uap, int per_thread)
1c79356b 3107{
2d21ac55
A
3108 struct filedesc *fdp = p->p_fd;
3109 vnode_t vp;
3110 vnode_t tdp;
3111 vnode_t tvp;
1c79356b 3112 struct mount *mp;
1c79356b 3113 int error;
2d21ac55 3114 vfs_context_t ctx = vfs_context_current();
1c79356b 3115
b0d623f7 3116 AUDIT_ARG(fd, uap->fd);
2d21ac55
A
3117 if (per_thread && uap->fd == -1) {
3118 /*
3119 * Switching back from per-thread to per process CWD; verify we
3120 * in fact have one before proceeding. The only success case
3121 * for this code path is to return 0 preemptively after zapping
3122 * the thread structure contents.
3123 */
3124 thread_t th = vfs_context_thread(ctx);
3125 if (th) {
3126 uthread_t uth = get_bsdthread_info(th);
3127 tvp = uth->uu_cdir;
3128 uth->uu_cdir = NULLVP;
3129 if (tvp != NULLVP) {
3130 vnode_rele(tvp);
3131 return (0);
3132 }
3133 }
3134 return (EBADF);
3135 }
91447636
A
3136
3137 if ( (error = file_vnode(uap->fd, &vp)) )
3138 return(error);
3139 if ( (error = vnode_getwithref(vp)) ) {
3140 file_drop(uap->fd);
3141 return(error);
3142 }
55e303ae
A
3143
3144 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
3145
2d21ac55 3146 if (vp->v_type != VDIR) {
1c79356b 3147 error = ENOTDIR;
2d21ac55
A
3148 goto out;
3149 }
3150
3151#if CONFIG_MACF
3152 error = mac_vnode_check_chdir(ctx, vp);
3153 if (error)
3154 goto out;
3155#endif
3156 error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
3157 if (error)
3158 goto out;
3159
1c79356b 3160 while (!error && (mp = vp->v_mountedhere) != NULL) {
91447636
A
3161 if (vfs_busy(mp, LK_NOWAIT)) {
3162 error = EACCES;
3163 goto out;
55e303ae 3164 }
2d21ac55 3165 error = VFS_ROOT(mp, &tdp, ctx);
91447636 3166 vfs_unbusy(mp);
1c79356b
A
3167 if (error)
3168 break;
91447636 3169 vnode_put(vp);
1c79356b
A
3170 vp = tdp;
3171 }
91447636
A
3172 if (error)
3173 goto out;
3174 if ( (error = vnode_ref(vp)) )
3175 goto out;
3176 vnode_put(vp);
3177
2d21ac55
A
3178 if (per_thread) {
3179 thread_t th = vfs_context_thread(ctx);
3180 if (th) {
3181 uthread_t uth = get_bsdthread_info(th);
3182 tvp = uth->uu_cdir;
3183 uth->uu_cdir = vp;
b0d623f7 3184 OSBitOrAtomic(P_THCWD, &p->p_flag);
2d21ac55
A
3185 } else {
3186 vnode_rele(vp);
3187 return (ENOENT);
3188 }
3189 } else {
3190 proc_fdlock(p);
3191 tvp = fdp->fd_cdir;
3192 fdp->fd_cdir = vp;
3193 proc_fdunlock(p);
3194 }
91447636
A
3195
3196 if (tvp)
3197 vnode_rele(tvp);
3198 file_drop(uap->fd);
3199
1c79356b 3200 return (0);
91447636
A
3201out:
3202 vnode_put(vp);
3203 file_drop(uap->fd);
3204
3205 return(error);
1c79356b
A
3206}
3207
2d21ac55 3208int
b0d623f7 3209fchdir(proc_t p, struct fchdir_args *uap, __unused int32_t *retval)
2d21ac55
A
3210{
3211 return common_fchdir(p, uap, 0);
3212}
3213
3214int
b0d623f7 3215__pthread_fchdir(proc_t p, struct __pthread_fchdir_args *uap, __unused int32_t *retval)
2d21ac55
A
3216{
3217 return common_fchdir(p, (void *)uap, 1);
3218}
3219
1c79356b 3220/*
b0d623f7 3221 * Change current working directory (".").
2d21ac55
A
3222 *
3223 * Returns: 0 Success
3224 * change_dir:ENOTDIR
3225 * change_dir:???
3226 * vnode_ref:ENOENT No such file or directory
1c79356b 3227 */
1c79356b 3228/* ARGSUSED */
2d21ac55
A
3229static int
3230common_chdir(proc_t p, struct chdir_args *uap, int per_thread)
1c79356b 3231{
2d21ac55 3232 struct filedesc *fdp = p->p_fd;
1c79356b
A
3233 int error;
3234 struct nameidata nd;
2d21ac55
A
3235 vnode_t tvp;
3236 vfs_context_t ctx = vfs_context_current();
91447636 3237
39037602 3238 NDINIT(&nd, LOOKUP, OP_CHDIR, FOLLOW | AUDITVNPATH1,
2d21ac55
A
3239 UIO_USERSPACE, uap->path, ctx);
3240 error = change_dir(&nd, ctx);
55e303ae 3241 if (error)
1c79356b 3242 return (error);
91447636
A
3243 if ( (error = vnode_ref(nd.ni_vp)) ) {
3244 vnode_put(nd.ni_vp);
3245 return (error);
3246 }
3247 /*
3248 * drop the iocount we picked up in change_dir
3249 */
3250 vnode_put(nd.ni_vp);
3251
2d21ac55
A
3252 if (per_thread) {
3253 thread_t th = vfs_context_thread(ctx);
3254 if (th) {
3255 uthread_t uth = get_bsdthread_info(th);
3256 tvp = uth->uu_cdir;
3257 uth->uu_cdir = nd.ni_vp;
b0d623f7 3258 OSBitOrAtomic(P_THCWD, &p->p_flag);
2d21ac55
A
3259 } else {
3260 vnode_rele(nd.ni_vp);
3261 return (ENOENT);
3262 }
3263 } else {
3264 proc_fdlock(p);
3265 tvp = fdp->fd_cdir;
3266 fdp->fd_cdir = nd.ni_vp;
3267 proc_fdunlock(p);
3268 }
91447636
A
3269
3270 if (tvp)
3271 vnode_rele(tvp);
3272
1c79356b
A
3273 return (0);
3274}
3275
b0d623f7
A
3276
3277/*
3278 * chdir
3279 *
3280 * Change current working directory (".") for the entire process
3281 *
3282 * Parameters: p Process requesting the call
3283 * uap User argument descriptor (see below)
3284 * retval (ignored)
3285 *
3286 * Indirect parameters: uap->path Directory path
3287 *
3288 * Returns: 0 Success
3289 * common_chdir: ENOTDIR
3290 * common_chdir: ENOENT No such file or directory
3291 * common_chdir: ???
3292 *
3293 */
2d21ac55 3294int
b0d623f7 3295chdir(proc_t p, struct chdir_args *uap, __unused int32_t *retval)
2d21ac55
A
3296{
3297 return common_chdir(p, (void *)uap, 0);
3298}
3299
b0d623f7
A
3300/*
3301 * __pthread_chdir
3302 *
3303 * Change current working directory (".") for a single thread
3304 *
3305 * Parameters: p Process requesting the call
3306 * uap User argument descriptor (see below)
3307 * retval (ignored)
3308 *
3309 * Indirect parameters: uap->path Directory path
3310 *
3311 * Returns: 0 Success
3312 * common_chdir: ENOTDIR
3313 * common_chdir: ENOENT No such file or directory
3314 * common_chdir: ???
3315 *
3316 */
2d21ac55 3317int
b0d623f7 3318__pthread_chdir(proc_t p, struct __pthread_chdir_args *uap, __unused int32_t *retval)
2d21ac55
A
3319{
3320 return common_chdir(p, (void *)uap, 1);
3321}
3322
3323
1c79356b
A
3324/*
3325 * Change notion of root (``/'') directory.
3326 */
1c79356b
A
3327/* ARGSUSED */
3328int
b0d623f7 3329chroot(proc_t p, struct chroot_args *uap, __unused int32_t *retval)
1c79356b 3330{
2d21ac55 3331 struct filedesc *fdp = p->p_fd;
1c79356b
A
3332 int error;
3333 struct nameidata nd;
2d21ac55
A
3334 vnode_t tvp;
3335 vfs_context_t ctx = vfs_context_current();
1c79356b 3336
91447636 3337 if ((error = suser(kauth_cred_get(), &p->p_acflag)))
1c79356b
A
3338 return (error);
3339
39037602 3340 NDINIT(&nd, LOOKUP, OP_CHROOT, FOLLOW | AUDITVNPATH1,
2d21ac55
A
3341 UIO_USERSPACE, uap->path, ctx);
3342 error = change_dir(&nd, ctx);
55e303ae 3343 if (error)
1c79356b
A
3344 return (error);
3345
2d21ac55
A
3346#if CONFIG_MACF
3347 error = mac_vnode_check_chroot(ctx, nd.ni_vp,
3348 &nd.ni_cnd);
3349 if (error) {
91447636
A
3350 vnode_put(nd.ni_vp);
3351 return (error);
3352 }
2d21ac55
A
3353#endif
3354
91447636
A
3355 if ( (error = vnode_ref(nd.ni_vp)) ) {
3356 vnode_put(nd.ni_vp);
1c79356b
A
3357 return (error);
3358 }
91447636 3359 vnode_put(nd.ni_vp);
1c79356b 3360
91447636 3361 proc_fdlock(p);
fa4905b1 3362 tvp = fdp->fd_rdir;
1c79356b 3363 fdp->fd_rdir = nd.ni_vp;
91447636
A
3364 fdp->fd_flags |= FD_CHROOT;
3365 proc_fdunlock(p);
3366
fa4905b1 3367 if (tvp != NULL)
91447636
A
3368 vnode_rele(tvp);
3369
1c79356b
A
3370 return (0);
3371}
3372
3373/*
3374 * Common routine for chroot and chdir.
2d21ac55
A
3375 *
3376 * Returns: 0 Success
3377 * ENOTDIR Not a directory
3378 * namei:??? [anything namei can return]
3379 * vnode_authorize:??? [anything vnode_authorize can return]
1c79356b
A
3380 */
3381static int
91447636 3382change_dir(struct nameidata *ndp, vfs_context_t ctx)
1c79356b 3383{
2d21ac55 3384 vnode_t vp;
1c79356b
A
3385 int error;
3386
91447636 3387 if ((error = namei(ndp)))
1c79356b 3388 return (error);
91447636 3389 nameidone(ndp);
1c79356b 3390 vp = ndp->ni_vp;
2d21ac55
A
3391
3392 if (vp->v_type != VDIR) {
91447636 3393 vnode_put(vp);
2d21ac55
A
3394 return (ENOTDIR);
3395 }
3396
3397#if CONFIG_MACF
3398 error = mac_vnode_check_chdir(ctx, vp);
3399 if (error) {
3400 vnode_put(vp);
3401 return (error);
3402 }
3403#endif
3404
3405 error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
3406 if (error) {
3407 vnode_put(vp);
3408 return (error);
3409 }
91447636 3410
1c79356b
A
3411 return (error);
3412}
3413
fe8ab488
A
3414/*
3415 * Free the vnode data (for directories) associated with the file glob.
3416 */
3417struct fd_vn_data *
3418fg_vn_data_alloc(void)
3419{
3420 struct fd_vn_data *fvdata;
3421
3422 /* Allocate per fd vnode data */
3423 MALLOC(fvdata, struct fd_vn_data *, (sizeof(struct fd_vn_data)),
3424 M_FD_VN_DATA, M_WAITOK | M_ZERO);
3425 lck_mtx_init(&fvdata->fv_lock, fd_vn_lck_grp, fd_vn_lck_attr);
3426 return fvdata;
3427}
3428
3429/*
3430 * Free the vnode data (for directories) associated with the file glob.
3431 */
3432void
3433fg_vn_data_free(void *fgvndata)
3434{
3435 struct fd_vn_data *fvdata = (struct fd_vn_data *)fgvndata;
3436
3437 if (fvdata->fv_buf)
3438 FREE(fvdata->fv_buf, M_FD_DIRBUF);
3439 lck_mtx_destroy(&fvdata->fv_lock, fd_vn_lck_grp);
3440 FREE(fvdata, M_FD_VN_DATA);
3441}
3442
1c79356b
A
3443/*
3444 * Check permissions, allocate an open file structure,
3445 * and call the device open routine if any.
2d21ac55
A
3446 *
3447 * Returns: 0 Success
3448 * EINVAL
3449 * EINTR
3450 * falloc:ENFILE
3451 * falloc:EMFILE
3452 * falloc:ENOMEM
3453 * vn_open_auth:???
3454 * dupfdopen:???
3455 * VNOP_ADVLOCK:???
3456 * vnode_setsize:???
b0d623f7
A
3457 *
3458 * XXX Need to implement uid, gid
1c79356b 3459 */
2d21ac55 3460int
39236c6e
A
3461open1(vfs_context_t ctx, struct nameidata *ndp, int uflags,
3462 struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra,
3463 int32_t *retval)
1c79356b 3464{
2d21ac55
A
3465 proc_t p = vfs_context_proc(ctx);
3466 uthread_t uu = get_bsdthread_info(vfs_context_thread(ctx));
2d21ac55
A
3467 struct fileproc *fp;
3468 vnode_t vp;
91447636 3469 int flags, oflags;
1c79356b
A
3470 int type, indx, error;
3471 struct flock lf;
3e170ce0 3472 struct vfs_context context;
ccc36f2f 3473
91447636 3474 oflags = uflags;
ccc36f2f
A
3475
3476 if ((oflags & O_ACCMODE) == O_ACCMODE)
3477 return(EINVAL);
3e170ce0 3478
91447636 3479 flags = FFLAGS(uflags);
3e170ce0
A
3480 CLR(flags, FENCRYPTED);
3481 CLR(flags, FUNENCRYPTED);
91447636
A
3482
3483 AUDIT_ARG(fflags, oflags);
3484 AUDIT_ARG(mode, vap->va_mode);
3485
39236c6e
A
3486 if ((error = falloc_withalloc(p,
3487 &fp, &indx, ctx, fp_zalloc, cra)) != 0) {
1c79356b 3488 return (error);
91447636 3489 }
2d21ac55 3490 uu->uu_dupfd = -indx - 1;
91447636 3491
2d21ac55
A
3492 if ((error = vn_open_auth(ndp, &flags, vap))) {
3493 if ((error == ENODEV || error == ENXIO) && (uu->uu_dupfd >= 0)){ /* XXX from fdopen */
39236c6e 3494 if ((error = dupfdopen(p->p_fd, indx, uu->uu_dupfd, flags, error)) == 0) {
2d21ac55 3495 fp_drop(p, indx, NULL, 0);
91447636
A
3496 *retval = indx;
3497 return (0);
3498 }
1c79356b
A
3499 }
3500 if (error == ERESTART)
91447636
A
3501 error = EINTR;
3502 fp_free(p, indx, fp);
1c79356b
A
3503 return (error);
3504 }
2d21ac55
A
3505 uu->uu_dupfd = 0;
3506 vp = ndp->ni_vp;
55e303ae 3507
3e170ce0 3508 fp->f_fglob->fg_flag = flags & (FMASK | O_EVTONLY | FENCRYPTED | FUNENCRYPTED);
91447636
A
3509 fp->f_fglob->fg_ops = &vnops;
3510 fp->f_fglob->fg_data = (caddr_t)vp;
3511
1c79356b
A
3512 if (flags & (O_EXLOCK | O_SHLOCK)) {
3513 lf.l_whence = SEEK_SET;
3514 lf.l_start = 0;
3515 lf.l_len = 0;
3516 if (flags & O_EXLOCK)
3517 lf.l_type = F_WRLCK;
3518 else
3519 lf.l_type = F_RDLCK;
3520 type = F_FLOCK;
3521 if ((flags & FNONBLOCK) == 0)
3522 type |= F_WAIT;
2d21ac55
A
3523#if CONFIG_MACF
3524 error = mac_file_check_lock(vfs_context_ucred(ctx), fp->f_fglob,
3525 F_SETLK, &lf);
3526 if (error)
3527 goto bad;
3528#endif
39236c6e 3529 if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx, NULL)))
55e303ae 3530 goto bad;
91447636 3531 fp->f_fglob->fg_flag |= FHASLOCK;
1c79356b 3532 }
55e303ae 3533
00867663
A
3534#if DEVELOPMENT || DEBUG
3535 /*
3536 * XXX VSWAP: Check for entitlements or special flag here
3537 * so we can restrict access appropriately.
3538 */
3539#else /* DEVELOPMENT || DEBUG */
3540
3541 if (vnode_isswap(vp) && (flags & (FWRITE | O_TRUNC)) && (ctx != vfs_context_kernel())) {
3542 /* block attempt to write/truncate swapfile */
3543 error = EPERM;
3544 goto bad;
3545 }
3546#endif /* DEVELOPMENT || DEBUG */
3547
91447636
A
3548 /* try to truncate by setting the size attribute */
3549 if ((flags & O_TRUNC) && ((error = vnode_setsize(vp, (off_t)0, 0, ctx)) != 0))
3550 goto bad;
55e303ae 3551
fe8ab488
A
3552 /*
3553 * For directories we hold some additional information in the fd.
3554 */
3555 if (vnode_vtype(vp) == VDIR) {
3556 fp->f_fglob->fg_vn_data = fg_vn_data_alloc();
3557 } else {
3558 fp->f_fglob->fg_vn_data = NULL;
2d21ac55
A
3559 }
3560
91447636 3561 vnode_put(vp);
55e303ae 3562
3e170ce0
A
3563 /*
3564 * The first terminal open (without a O_NOCTTY) by a session leader
3565 * results in it being set as the controlling terminal.
3566 */
3567 if (vnode_istty(vp) && !(p->p_flag & P_CONTROLT) &&
3568 !(flags & O_NOCTTY)) {
3569 int tmp = 0;
3570
3571 (void)(*fp->f_fglob->fg_ops->fo_ioctl)(fp, (int)TIOCSCTTY,
3572 (caddr_t)&tmp, ctx);
3573 }
3574
91447636 3575 proc_fdlock(p);
6d2010ae
A
3576 if (flags & O_CLOEXEC)
3577 *fdflags(p, indx) |= UF_EXCLOSE;
39236c6e
A
3578 if (flags & O_CLOFORK)
3579 *fdflags(p, indx) |= UF_FORKCLOSE;
6601e61a 3580 procfdtbl_releasefd(p, indx, NULL);
39037602
A
3581
3582#if CONFIG_SECLUDED_MEMORY
3583 if (secluded_for_filecache &&
3584 FILEGLOB_DTYPE(fp->f_fglob) == DTYPE_VNODE &&
3585 vnode_vtype(vp) == VREG) {
3586 memory_object_control_t moc;
3587
3588 moc = ubc_getobject(vp, UBC_FLAGS_NONE);
3589
3590 if (moc == MEMORY_OBJECT_CONTROL_NULL) {
3591 /* nothing to do... */
3592 } else if (fp->f_fglob->fg_flag & FWRITE) {
3593 /* writable -> no longer eligible for secluded pages */
3594 memory_object_mark_eligible_for_secluded(moc,
3595 FALSE);
3596 } else if (secluded_for_filecache == 1) {
3597 char pathname[32] = { 0, };
3598 size_t copied;
3599 /* XXX FBDP: better way to detect /Applications/ ? */
3600 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
3601 copyinstr(ndp->ni_dirp,
3602 pathname,
3603 sizeof (pathname),
3604 &copied);
3605 } else {
3606 copystr(CAST_DOWN(void *, ndp->ni_dirp),
3607 pathname,
3608 sizeof (pathname),
3609 &copied);
3610 }
3611 pathname[sizeof (pathname) - 1] = '\0';
3612 if (strncmp(pathname,
3613 "/Applications/",
3614 strlen("/Applications/")) == 0 &&
3615 strncmp(pathname,
3616 "/Applications/Camera.app/",
3617 strlen("/Applications/Camera.app/")) != 0) {
3618 /*
3619 * not writable
3620 * AND from "/Applications/"
3621 * AND not from "/Applications/Camera.app/"
3622 * ==> eligible for secluded
3623 */
3624 memory_object_mark_eligible_for_secluded(moc,
3625 TRUE);
3626 }
3627 } else if (secluded_for_filecache == 2) {
5ba3f43e
A
3628#if __arm64__
3629#define DYLD_SHARED_CACHE_NAME "dyld_shared_cache_arm64"
3630#elif __arm__
3631#define DYLD_SHARED_CACHE_NAME "dyld_shared_cache_armv7"
3632#else
39037602 3633/* not implemented... */
5ba3f43e 3634#endif
39037602
A
3635 if (!strncmp(vp->v_name,
3636 DYLD_SHARED_CACHE_NAME,
3637 strlen(DYLD_SHARED_CACHE_NAME)) ||
3638 !strncmp(vp->v_name,
3639 "dyld",
3640 strlen(vp->v_name)) ||
3641 !strncmp(vp->v_name,
3642 "launchd",
3643 strlen(vp->v_name)) ||
3644 !strncmp(vp->v_name,
3645 "Camera",
3646 strlen(vp->v_name)) ||
3647 !strncmp(vp->v_name,
3648 "mediaserverd",
3649 strlen(vp->v_name))) {
3650 /*
3651 * This file matters when launching Camera:
3652 * do not store its contents in the secluded
3653 * pool that will be drained on Camera launch.
3654 */
3655 memory_object_mark_eligible_for_secluded(moc,
3656 FALSE);
3657 }
3658 }
3659 }
3660#endif /* CONFIG_SECLUDED_MEMORY */
3661
91447636
A
3662 fp_drop(p, indx, fp, 1);
3663 proc_fdunlock(p);
3664
1c79356b 3665 *retval = indx;
91447636 3666
1c79356b 3667 return (0);
55e303ae 3668bad:
3e170ce0 3669 context = *vfs_context_current();
2d21ac55 3670 context.vc_ucred = fp->f_fglob->fg_cred;
39037602 3671
fe8ab488
A
3672 if ((fp->f_fglob->fg_flag & FHASLOCK) &&
3673 (FILEGLOB_DTYPE(fp->f_fglob) == DTYPE_VNODE)) {
3674 lf.l_whence = SEEK_SET;
3675 lf.l_start = 0;
3676 lf.l_len = 0;
3677 lf.l_type = F_UNLCK;
39037602 3678
fe8ab488
A
3679 (void)VNOP_ADVLOCK(
3680 vp, (caddr_t)fp->f_fglob, F_UNLCK, &lf, F_FLOCK, ctx, NULL);
3681 }
2d21ac55
A
3682
3683 vn_close(vp, fp->f_fglob->fg_flag, &context);
91447636
A
3684 vnode_put(vp);
3685 fp_free(p, indx, fp);
3686
55e303ae 3687 return (error);
1c79356b
A
3688}
3689
fe8ab488
A
3690/*
3691 * While most of the *at syscall handlers can call nameiat() which
3692 * is a wrapper around namei, the use of namei and initialisation
3693 * of nameidata are far removed and in different functions - namei
3694 * gets called in vn_open_auth for open1. So we'll just do here what
3695 * nameiat() does.
3696 */
3697static int
3698open1at(vfs_context_t ctx, struct nameidata *ndp, int uflags,
3699 struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra, int32_t *retval,
3700 int dirfd)
3701{
3702 if ((dirfd != AT_FDCWD) && !(ndp->ni_cnd.cn_flags & USEDVP)) {
3703 int error;
3704 char c;
3705
3706 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
3707 error = copyin(ndp->ni_dirp, &c, sizeof(char));
3708 if (error)
3709 return (error);
3710 } else {
3711 c = *((char *)(ndp->ni_dirp));
3712 }
3713
3714 if (c != '/') {
3715 vnode_t dvp_at;
3716
3717 error = vnode_getfromfd(ndp->ni_cnd.cn_context, dirfd,
3718 &dvp_at);
3719 if (error)
3720 return (error);
3721
3722 if (vnode_vtype(dvp_at) != VDIR) {
3723 vnode_put(dvp_at);
3724 return (ENOTDIR);
3725 }
3726
3727 ndp->ni_dvp = dvp_at;
3728 ndp->ni_cnd.cn_flags |= USEDVP;
3729 error = open1(ctx, ndp, uflags, vap, fp_zalloc, cra,
3730 retval);
3731 vnode_put(dvp_at);
3732 return (error);
3733 }
3734 }
3735
3736 return (open1(ctx, ndp, uflags, vap, fp_zalloc, cra, retval));
3737}
3738
0c530ab8 3739/*
b0d623f7 3740 * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)).
0c530ab8
A
3741 *
3742 * Parameters: p Process requesting the open
3743 * uap User argument descriptor (see below)
3744 * retval Pointer to an area to receive the
3745 * return calue from the system call
3746 *
3747 * Indirect: uap->path Path to open (same as 'open')
3748 * uap->flags Flags to open (same as 'open'
3749 * uap->uid UID to set, if creating
3750 * uap->gid GID to set, if creating
3751 * uap->mode File mode, if creating (same as 'open')
3752 * uap->xsecurity ACL to set, if creating
3753 *
3754 * Returns: 0 Success
3755 * !0 errno value
3756 *
3757 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
3758 *
3759 * XXX: We should enummerate the possible errno values here, and where
3760 * in the code they originated.
3761 */
1c79356b 3762int
b0d623f7 3763open_extended(proc_t p, struct open_extended_args *uap, int32_t *retval)
91447636 3764{
2d21ac55 3765 struct filedesc *fdp = p->p_fd;
91447636
A
3766 int ciferror;
3767 kauth_filesec_t xsecdst;
3768 struct vnode_attr va;
2d21ac55 3769 struct nameidata nd;
91447636
A
3770 int cmode;
3771
b0d623f7
A
3772 AUDIT_ARG(owner, uap->uid, uap->gid);
3773
91447636
A
3774 xsecdst = NULL;
3775 if ((uap->xsecurity != USER_ADDR_NULL) &&
3776 ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
3777 return ciferror;
3778
91447636
A
3779 VATTR_INIT(&va);
3780 cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3781 VATTR_SET(&va, va_mode, cmode);
3782 if (uap->uid != KAUTH_UID_NONE)
3783 VATTR_SET(&va, va_uid, uap->uid);
3784 if (uap->gid != KAUTH_GID_NONE)
3785 VATTR_SET(&va, va_gid, uap->gid);
3786 if (xsecdst != NULL)
3787 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
3788
6d2010ae
A
3789 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
3790 uap->path, vfs_context_current());
2d21ac55 3791
39236c6e
A
3792 ciferror = open1(vfs_context_current(), &nd, uap->flags, &va,
3793 fileproc_alloc_init, NULL, retval);
91447636
A
3794 if (xsecdst != NULL)
3795 kauth_filesec_free(xsecdst);
3796
3797 return ciferror;
3798}
3799
39037602 3800/*
316670eb 3801 * Go through the data-protected atomically controlled open (2)
39037602 3802 *
316670eb
A
3803 * int open_dprotected_np(user_addr_t path, int flags, int class, int dpflags, int mode)
3804 */
3805int open_dprotected_np (__unused proc_t p, struct open_dprotected_np_args *uap, int32_t *retval) {
3806 int flags = uap->flags;
3807 int class = uap->class;
3808 int dpflags = uap->dpflags;
3809
39037602 3810 /*
316670eb
A
3811 * Follow the same path as normal open(2)
3812 * Look up the item if it exists, and acquire the vnode.
3813 */
3814 struct filedesc *fdp = p->p_fd;
3815 struct vnode_attr va;
3816 struct nameidata nd;
3817 int cmode;
3818 int error;
39037602 3819
316670eb
A
3820 VATTR_INIT(&va);
3821 /* Mask off all but regular access permissions */
3822 cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3823 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
3824
3825 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
3826 uap->path, vfs_context_current());
3827
39037602
A
3828 /*
3829 * Initialize the extra fields in vnode_attr to pass down our
316670eb
A
3830 * extra fields.
3831 * 1. target cprotect class.
39037602
A
3832 * 2. set a flag to mark it as requiring open-raw-encrypted semantics.
3833 */
3834 if (flags & O_CREAT) {
3e170ce0
A
3835 /* lower level kernel code validates that the class is valid before applying it. */
3836 if (class != PROTECTION_CLASS_DEFAULT) {
3837 /*
3838 * PROTECTION_CLASS_DEFAULT implies that we make the class for this
3839 * file behave the same as open (2)
3840 */
3841 VATTR_SET(&va, va_dataprotect_class, class);
3842 }
316670eb 3843 }
39037602 3844
3e170ce0 3845 if (dpflags & (O_DP_GETRAWENCRYPTED|O_DP_GETRAWUNENCRYPTED)) {
316670eb
A
3846 if ( flags & (O_RDWR | O_WRONLY)) {
3847 /* Not allowed to write raw encrypted bytes */
39037602
A
3848 return EINVAL;
3849 }
3e170ce0
A
3850 if (uap->dpflags & O_DP_GETRAWENCRYPTED) {
3851 VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED);
3852 }
3853 if (uap->dpflags & O_DP_GETRAWUNENCRYPTED) {
3854 VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWUNENCRYPTED);
3855 }
316670eb
A
3856 }
3857
39236c6e
A
3858 error = open1(vfs_context_current(), &nd, uap->flags, &va,
3859 fileproc_alloc_init, NULL, retval);
316670eb
A
3860
3861 return error;
3862}
3863
fe8ab488
A
3864static int
3865openat_internal(vfs_context_t ctx, user_addr_t path, int flags, int mode,
3866 int fd, enum uio_seg segflg, int *retval)
2d21ac55 3867{
fe8ab488 3868 struct filedesc *fdp = (vfs_context_proc(ctx))->p_fd;
91447636 3869 struct vnode_attr va;
2d21ac55 3870 struct nameidata nd;
91447636 3871 int cmode;
1c79356b 3872
91447636
A
3873 VATTR_INIT(&va);
3874 /* Mask off all but regular access permissions */
fe8ab488 3875 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
91447636
A
3876 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
3877
fe8ab488
A
3878 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1,
3879 segflg, path, ctx);
2d21ac55 3880
fe8ab488
A
3881 return (open1at(ctx, &nd, flags, &va, fileproc_alloc_init, NULL,
3882 retval, fd));
1c79356b 3883}
91447636 3884
fe8ab488
A
3885int
3886open(proc_t p, struct open_args *uap, int32_t *retval)
3887{
3888 __pthread_testcancel(1);
3889 return(open_nocancel(p, (struct open_nocancel_args *)uap, retval));
3890}
1c79356b 3891
fe8ab488
A
3892int
3893open_nocancel(__unused proc_t p, struct open_nocancel_args *uap,
3894 int32_t *retval)
3895{
3896 return (openat_internal(vfs_context_current(), uap->path, uap->flags,
3897 uap->mode, AT_FDCWD, UIO_USERSPACE, retval));
3898}
91447636 3899
1c79356b 3900int
fe8ab488
A
3901openat_nocancel(__unused proc_t p, struct openat_nocancel_args *uap,
3902 int32_t *retval)
1c79356b 3903{
fe8ab488
A
3904 return (openat_internal(vfs_context_current(), uap->path, uap->flags,
3905 uap->mode, uap->fd, UIO_USERSPACE, retval));
3906}
91447636 3907
fe8ab488
A
3908int
3909openat(proc_t p, struct openat_args *uap, int32_t *retval)
3910{
3911 __pthread_testcancel(1);
3912 return(openat_nocancel(p, (struct openat_nocancel_args *)uap, retval));
3913}
3914
3915/*
3916 * openbyid_np: open a file given a file system id and a file system object id
3917 * the hfs file system object id is an fsobj_id_t {uint32, uint32}
3918 * file systems that don't support object ids it is a node id (uint64_t).
3919 *
3920 * Parameters: p Process requesting the open
3921 * uap User argument descriptor (see below)
3922 * retval Pointer to an area to receive the
3923 * return calue from the system call
3924 *
3925 * Indirect: uap->path Path to open (same as 'open')
3926 *
3927 * uap->fsid id of target file system
3928 * uap->objid id of target file system object
3929 * uap->flags Flags to open (same as 'open')
3930 *
3931 * Returns: 0 Success
3932 * !0 errno value
3933 *
3934 *
3935 * XXX: We should enummerate the possible errno values here, and where
3936 * in the code they originated.
3937 */
3938int
3939openbyid_np(__unused proc_t p, struct openbyid_np_args *uap, int *retval)
3940{
3941 fsid_t fsid;
3942 uint64_t objid;
3943 int error;
3944 char *buf = NULL;
3945 int buflen = MAXPATHLEN;
3946 int pathlen = 0;
3947 vfs_context_t ctx = vfs_context_current();
3948
490019cf
A
3949 if ((error = priv_check_cred(vfs_context_ucred(ctx), PRIV_VFS_OPEN_BY_ID, 0))) {
3950 return (error);
3951 }
3952
fe8ab488
A
3953 if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
3954 return (error);
3955 }
3956
3957 /*uap->obj is an fsobj_id_t defined as struct {uint32_t, uint32_t} */
3958 if ((error = copyin(uap->objid, (caddr_t)&objid, sizeof(uint64_t)))) {
3959 return (error);
3960 }
3961
3962 AUDIT_ARG(value32, fsid.val[0]);
3963 AUDIT_ARG(value64, objid);
3964
3965 /*resolve path from fsis, objid*/
3966 do {
3967 MALLOC(buf, char *, buflen + 1, M_TEMP, M_WAITOK);
3968 if (buf == NULL) {
3969 return (ENOMEM);
3970 }
3971
3972 error = fsgetpath_internal(
3973 ctx, fsid.val[0], objid,
3974 buflen, buf, &pathlen);
3975
3976 if (error) {
3977 FREE(buf, M_TEMP);
3978 buf = NULL;
3979 }
3980 } while (error == ENOSPC && (buflen += MAXPATHLEN));
3981
3982 if (error) {
3983 return error;
3984 }
3985
3986 buf[pathlen] = 0;
3987
3988 error = openat_internal(
3989 ctx, (user_addr_t)buf, uap->oflags, 0, AT_FDCWD, UIO_SYSSPACE, retval);
3990
3991 FREE(buf, M_TEMP);
3992
3993 return error;
3994}
3995
3996
3997/*
3998 * Create a special file.
3999 */
4000static int mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap);
4001
4002int
4003mknod(proc_t p, struct mknod_args *uap, __unused int32_t *retval)
4004{
4005 struct vnode_attr va;
4006 vfs_context_t ctx = vfs_context_current();
4007 int error;
4008 struct nameidata nd;
4009 vnode_t vp, dvp;
4010
4011 VATTR_INIT(&va);
4012 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
4013 VATTR_SET(&va, va_rdev, uap->dev);
91447636
A
4014
4015 /* If it's a mknod() of a FIFO, call mkfifo1() instead */
4016 if ((uap->mode & S_IFMT) == S_IFIFO)
2d21ac55 4017 return(mkfifo1(ctx, uap->path, &va));
1c79356b 4018
55e303ae 4019 AUDIT_ARG(mode, uap->mode);
b0d623f7 4020 AUDIT_ARG(value32, uap->dev);
91447636 4021
2d21ac55 4022 if ((error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
1c79356b 4023 return (error);
39037602 4024 NDINIT(&nd, CREATE, OP_MKNOD, LOCKPARENT | AUDITVNPATH1,
2d21ac55 4025 UIO_USERSPACE, uap->path, ctx);
55e303ae
A
4026 error = namei(&nd);
4027 if (error)
1c79356b 4028 return (error);
91447636 4029 dvp = nd.ni_dvp;
1c79356b 4030 vp = nd.ni_vp;
91447636
A
4031
4032 if (vp != NULL) {
1c79356b 4033 error = EEXIST;
91447636 4034 goto out;
1c79356b 4035 }
55e303ae 4036
91447636 4037 switch (uap->mode & S_IFMT) {
91447636
A
4038 case S_IFCHR:
4039 VATTR_SET(&va, va_type, VCHR);
4040 break;
4041 case S_IFBLK:
4042 VATTR_SET(&va, va_type, VBLK);
4043 break;
91447636
A
4044 default:
4045 error = EINVAL;
4046 goto out;
4047 }
2d21ac55
A
4048
4049#if CONFIG_MACF
6d2010ae
A
4050 error = mac_vnode_check_create(ctx,
4051 nd.ni_dvp, &nd.ni_cnd, &va);
4052 if (error)
4053 goto out;
2d21ac55
A
4054#endif
4055
4056 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
4057 goto out;
4058
6d2010ae 4059 if ((error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx)) != 0)
91447636
A
4060 goto out;
4061
4062 if (vp) {
4063 int update_flags = 0;
4064
4065 // Make sure the name & parent pointers are hooked up
4066 if (vp->v_name == NULL)
4067 update_flags |= VNODE_UPDATE_NAME;
4068 if (vp->v_parent == NULLVP)
4069 update_flags |= VNODE_UPDATE_PARENT;
4070
4071 if (update_flags)
4072 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
4073
2d21ac55
A
4074#if CONFIG_FSE
4075 add_fsevent(FSE_CREATE_FILE, ctx,
91447636
A
4076 FSE_ARG_VNODE, vp,
4077 FSE_ARG_DONE);
2d21ac55 4078#endif
1c79356b 4079 }
91447636
A
4080
4081out:
4082 /*
4083 * nameidone has to happen before we vnode_put(dvp)
4084 * since it may need to release the fs_nodelock on the dvp
4085 */
4086 nameidone(&nd);
4087
4088 if (vp)
4089 vnode_put(vp);
4090 vnode_put(dvp);
4091
1c79356b
A
4092 return (error);
4093}
4094
4095/*
4096 * Create a named pipe.
2d21ac55
A
4097 *
4098 * Returns: 0 Success
4099 * EEXIST
4100 * namei:???
4101 * vnode_authorize:???
4102 * vn_create:???
1c79356b 4103 */
91447636
A
4104static int
4105mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap)
1c79356b 4106{
91447636 4107 vnode_t vp, dvp;
1c79356b
A
4108 int error;
4109 struct nameidata nd;
55e303ae 4110
39037602 4111 NDINIT(&nd, CREATE, OP_MKFIFO, LOCKPARENT | AUDITVNPATH1,
91447636 4112 UIO_USERSPACE, upath, ctx);
55e303ae
A
4113 error = namei(&nd);
4114 if (error)
1c79356b 4115 return (error);
91447636
A
4116 dvp = nd.ni_dvp;
4117 vp = nd.ni_vp;
4118
4119 /* check that this is a new file and authorize addition */
4120 if (vp != NULL) {
4121 error = EEXIST;
4122 goto out;
4123 }
2d21ac55
A
4124 VATTR_SET(vap, va_type, VFIFO);
4125
6d2010ae 4126 if ((error = vn_authorize_create(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0)
2d21ac55 4127 goto out;
2d21ac55 4128
6d2010ae 4129 error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx);
91447636
A
4130out:
4131 /*
4132 * nameidone has to happen before we vnode_put(dvp)
4133 * since it may need to release the fs_nodelock on the dvp
4134 */
4135 nameidone(&nd);
4136
4137 if (vp)
4138 vnode_put(vp);
4139 vnode_put(dvp);
4140
55e303ae 4141 return error;
91447636
A
4142}
4143
0c530ab8
A
4144
4145/*
b0d623f7 4146 * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)).
0c530ab8
A
4147 *
4148 * Parameters: p Process requesting the open
4149 * uap User argument descriptor (see below)
4150 * retval (Ignored)
4151 *
4152 * Indirect: uap->path Path to fifo (same as 'mkfifo')
4153 * uap->uid UID to set
4154 * uap->gid GID to set
4155 * uap->mode File mode to set (same as 'mkfifo')
4156 * uap->xsecurity ACL to set, if creating
4157 *
4158 * Returns: 0 Success
4159 * !0 errno value
4160 *
4161 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
4162 *
4163 * XXX: We should enummerate the possible errno values here, and where
4164 * in the code they originated.
4165 */
91447636 4166int
b0d623f7 4167mkfifo_extended(proc_t p, struct mkfifo_extended_args *uap, __unused int32_t *retval)
91447636
A
4168{
4169 int ciferror;
4170 kauth_filesec_t xsecdst;
91447636
A
4171 struct vnode_attr va;
4172
b0d623f7
A
4173 AUDIT_ARG(owner, uap->uid, uap->gid);
4174
91447636
A
4175 xsecdst = KAUTH_FILESEC_NONE;
4176 if (uap->xsecurity != USER_ADDR_NULL) {
4177 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
4178 return ciferror;
4179 }
4180
91447636
A
4181 VATTR_INIT(&va);
4182 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
4183 if (uap->uid != KAUTH_UID_NONE)
4184 VATTR_SET(&va, va_uid, uap->uid);
4185 if (uap->gid != KAUTH_GID_NONE)
4186 VATTR_SET(&va, va_gid, uap->gid);
4187 if (xsecdst != KAUTH_FILESEC_NONE)
4188 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
4189
2d21ac55 4190 ciferror = mkfifo1(vfs_context_current(), uap->path, &va);
91447636
A
4191
4192 if (xsecdst != KAUTH_FILESEC_NONE)
4193 kauth_filesec_free(xsecdst);
4194 return ciferror;
4195}
4196
4197/* ARGSUSED */
4198int
b0d623f7 4199mkfifo(proc_t p, struct mkfifo_args *uap, __unused int32_t *retval)
91447636 4200{
91447636
A
4201 struct vnode_attr va;
4202
91447636
A
4203 VATTR_INIT(&va);
4204 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
4205
2d21ac55 4206 return(mkfifo1(vfs_context_current(), uap->path, &va));
1c79356b
A
4207}
4208
b0d623f7
A
4209
4210static char *
4211my_strrchr(char *p, int ch)
4212{
4213 char *save;
4214
4215 for (save = NULL;; ++p) {
4216 if (*p == ch)
4217 save = p;
4218 if (!*p)
4219 return(save);
4220 }
4221 /* NOTREACHED */
4222}
4223
4224extern int safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path);
4225
4226int
4227safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path)
4228{
4229 int ret, len = _len;
4230
4231 *truncated_path = 0;
4232 ret = vn_getpath(dvp, path, &len);
4233 if (ret == 0 && len < (MAXPATHLEN - 1)) {
4234 if (leafname) {
4235 path[len-1] = '/';
4236 len += strlcpy(&path[len], leafname, MAXPATHLEN-len) + 1;
4237 if (len > MAXPATHLEN) {
4238 char *ptr;
39037602 4239
b0d623f7
A
4240 // the string got truncated!
4241 *truncated_path = 1;
4242 ptr = my_strrchr(path, '/');
4243 if (ptr) {
4244 *ptr = '\0'; // chop off the string at the last directory component
4245 }
4246 len = strlen(path) + 1;
4247 }
4248 }
4249 } else if (ret == 0) {
4250 *truncated_path = 1;
4251 } else if (ret != 0) {
4252 struct vnode *mydvp=dvp;
4253
4254 if (ret != ENOSPC) {
4255 printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n",
4256 dvp, dvp->v_name ? dvp->v_name : "no-name", ret);
39037602 4257 }
b0d623f7 4258 *truncated_path = 1;
39037602 4259
b0d623f7
A
4260 do {
4261 if (mydvp->v_parent != NULL) {
4262 mydvp = mydvp->v_parent;
4263 } else if (mydvp->v_mount) {
4264 strlcpy(path, mydvp->v_mount->mnt_vfsstat.f_mntonname, _len);
4265 break;
4266 } else {
4267 // no parent and no mount point? only thing is to punt and say "/" changed
4268 strlcpy(path, "/", _len);
4269 len = 2;
4270 mydvp = NULL;
4271 }
39037602 4272
b0d623f7
A
4273 if (mydvp == NULL) {
4274 break;
4275 }
4276
4277 len = _len;
4278 ret = vn_getpath(mydvp, path, &len);
4279 } while (ret == ENOSPC);
4280 }
4281
4282 return len;
4283}
4284
4285
1c79356b
A
4286/*
4287 * Make a hard file link.
2d21ac55
A
4288 *
4289 * Returns: 0 Success
4290 * EPERM
4291 * EEXIST
4292 * EXDEV
4293 * namei:???
4294 * vnode_authorize:???
4295 * VNOP_LINK:???
1c79356b 4296 */
1c79356b 4297/* ARGSUSED */
fe8ab488
A
4298static int
4299linkat_internal(vfs_context_t ctx, int fd1, user_addr_t path, int fd2,
4300 user_addr_t link, int flag, enum uio_seg segflg)
1c79356b 4301{
91447636 4302 vnode_t vp, dvp, lvp;
1c79356b 4303 struct nameidata nd;
fe8ab488 4304 int follow;
1c79356b 4305 int error;
b0d623f7 4306#if CONFIG_FSE
91447636 4307 fse_info finfo;
b0d623f7 4308#endif
91447636 4309 int need_event, has_listeners;
2d21ac55 4310 char *target_path = NULL;
b0d623f7 4311 int truncated=0;
1c79356b 4312
91447636
A
4313 vp = dvp = lvp = NULLVP;
4314
4315 /* look up the object we are linking to */
fe8ab488
A
4316 follow = (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW;
4317 NDINIT(&nd, LOOKUP, OP_LOOKUP, AUDITVNPATH1 | follow,
4318 segflg, path, ctx);
4319
4320 error = nameiat(&nd, fd1);
55e303ae 4321 if (error)
1c79356b
A
4322 return (error);
4323 vp = nd.ni_vp;
91447636
A
4324
4325 nameidone(&nd);
4326
2d21ac55
A
4327 /*
4328 * Normally, linking to directories is not supported.
4329 * However, some file systems may have limited support.
4330 */
91447636 4331 if (vp->v_type == VDIR) {
39037602 4332 if (!ISSET(vp->v_mount->mnt_kern_flag, MNTK_DIR_HARDLINKS)) {
2d21ac55
A
4333 error = EPERM; /* POSIX */
4334 goto out;
4335 }
39037602 4336
2d21ac55
A
4337 /* Linking to a directory requires ownership. */
4338 if (!kauth_cred_issuser(vfs_context_ucred(ctx))) {
4339 struct vnode_attr dva;
4340
4341 VATTR_INIT(&dva);
4342 VATTR_WANTED(&dva, va_uid);
4343 if (vnode_getattr(vp, &dva, ctx) != 0 ||
4344 !VATTR_IS_SUPPORTED(&dva, va_uid) ||
4345 (dva.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)))) {
4346 error = EACCES;
4347 goto out;
4348 }
4349 }
91447636
A
4350 }
4351
91447636 4352 /* lookup the target node */
6d2010ae
A
4353#if CONFIG_TRIGGERS
4354 nd.ni_op = OP_LINK;
4355#endif
91447636 4356 nd.ni_cnd.cn_nameiop = CREATE;
2d21ac55 4357 nd.ni_cnd.cn_flags = LOCKPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK;
fe8ab488
A
4358 nd.ni_dirp = link;
4359 error = nameiat(&nd, fd2);
91447636
A
4360 if (error != 0)
4361 goto out;
4362 dvp = nd.ni_dvp;
4363 lvp = nd.ni_vp;
2d21ac55
A
4364
4365#if CONFIG_MACF
4366 if ((error = mac_vnode_check_link(ctx, dvp, vp, &nd.ni_cnd)) != 0)
4367 goto out2;
4368#endif
4369
4370 /* or to anything that kauth doesn't want us to (eg. immutable items) */
4371 if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_LINKTARGET, ctx)) != 0)
4372 goto out2;
4373
91447636
A
4374 /* target node must not exist */
4375 if (lvp != NULLVP) {
4376 error = EEXIST;
4377 goto out2;
4378 }
4379 /* cannot link across mountpoints */
4380 if (vnode_mount(vp) != vnode_mount(dvp)) {
4381 error = EXDEV;
4382 goto out2;
4383 }
39037602 4384
91447636 4385 /* authorize creation of the target note */
2d21ac55 4386 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
91447636
A
4387 goto out2;
4388
4389 /* and finally make the link */
2d21ac55 4390 error = VNOP_LINK(vp, dvp, &nd.ni_cnd, ctx);
91447636
A
4391 if (error)
4392 goto out2;
4393
39236c6e
A
4394#if CONFIG_MACF
4395 (void)mac_vnode_notify_link(ctx, vp, dvp, &nd.ni_cnd);
4396#endif
4397
2d21ac55 4398#if CONFIG_FSE
91447636 4399 need_event = need_fsevent(FSE_CREATE_FILE, dvp);
2d21ac55
A
4400#else
4401 need_event = 0;
4402#endif
91447636
A
4403 has_listeners = kauth_authorize_fileop_has_listeners();
4404
4405 if (need_event || has_listeners) {
91447636
A
4406 char *link_to_path = NULL;
4407 int len, link_name_len;
4408
4409 /* build the path to the new link file */
2d21ac55
A
4410 GET_PATH(target_path);
4411 if (target_path == NULL) {
4412 error = ENOMEM;
4413 goto out2;
4414 }
4415
b0d623f7 4416 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, target_path, MAXPATHLEN, &truncated);
91447636
A
4417
4418 if (has_listeners) {
4419 /* build the path to file we are linking to */
2d21ac55
A
4420 GET_PATH(link_to_path);
4421 if (link_to_path == NULL) {
4422 error = ENOMEM;
4423 goto out2;
4424 }
4425
91447636 4426 link_name_len = MAXPATHLEN;
fe8ab488
A
4427 if (vn_getpath(vp, link_to_path, &link_name_len) == 0) {
4428 /*
39037602 4429 * Call out to allow 3rd party notification of rename.
fe8ab488
A
4430 * Ignore result of kauth_authorize_fileop call.
4431 */
39037602
A
4432 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_LINK,
4433 (uintptr_t)link_to_path,
fe8ab488
A
4434 (uintptr_t)target_path);
4435 }
2d21ac55
A
4436 if (link_to_path != NULL) {
4437 RELEASE_PATH(link_to_path);
4438 }
91447636 4439 }
2d21ac55 4440#if CONFIG_FSE
91447636
A
4441 if (need_event) {
4442 /* construct fsevent */
2d21ac55 4443 if (get_fse_info(vp, &finfo, ctx) == 0) {
b0d623f7
A
4444 if (truncated) {
4445 finfo.mode |= FSE_TRUNCATED_PATH;
4446 }
4447
91447636 4448 // build the path to the destination of the link
2d21ac55 4449 add_fsevent(FSE_CREATE_FILE, ctx,
91447636
A
4450 FSE_ARG_STRING, len, target_path,
4451 FSE_ARG_FINFO, &finfo,
4452 FSE_ARG_DONE);
1c79356b 4453 }
b0d623f7
A
4454 if (vp->v_parent) {
4455 add_fsevent(FSE_STAT_CHANGED, ctx,
4456 FSE_ARG_VNODE, vp->v_parent,
4457 FSE_ARG_DONE);
4458 }
1c79356b 4459 }
2d21ac55 4460#endif
1c79356b 4461 }
91447636
A
4462out2:
4463 /*
4464 * nameidone has to happen before we vnode_put(dvp)
4465 * since it may need to release the fs_nodelock on the dvp
4466 */
4467 nameidone(&nd);
2d21ac55
A
4468 if (target_path != NULL) {
4469 RELEASE_PATH(target_path);
4470 }
91447636
A
4471out:
4472 if (lvp)
4473 vnode_put(lvp);
4474 if (dvp)
4475 vnode_put(dvp);
4476 vnode_put(vp);
4477 return (error);
4478}
1c79356b 4479
fe8ab488
A
4480int
4481link(__unused proc_t p, struct link_args *uap, __unused int32_t *retval)
4482{
4483 return (linkat_internal(vfs_context_current(), AT_FDCWD, uap->path,
4484 AT_FDCWD, uap->link, AT_SYMLINK_FOLLOW, UIO_USERSPACE));
4485}
4486
4487int
4488linkat(__unused proc_t p, struct linkat_args *uap, __unused int32_t *retval)
4489{
4490 if (uap->flag & ~AT_SYMLINK_FOLLOW)
4491 return (EINVAL);
4492
4493 return (linkat_internal(vfs_context_current(), uap->fd1, uap->path,
4494 uap->fd2, uap->link, uap->flag, UIO_USERSPACE));
4495}
4496
1c79356b
A
4497/*
4498 * Make a symbolic link.
91447636
A
4499 *
4500 * We could add support for ACLs here too...
1c79356b 4501 */
1c79356b 4502/* ARGSUSED */
fe8ab488
A
4503static int
4504symlinkat_internal(vfs_context_t ctx, user_addr_t path_data, int fd,
4505 user_addr_t link, enum uio_seg segflg)
1c79356b 4506{
91447636
A
4507 struct vnode_attr va;
4508 char *path;
1c79356b
A
4509 int error;
4510 struct nameidata nd;
91447636 4511 vnode_t vp, dvp;
1c79356b 4512 size_t dummy=0;
fe8ab488
A
4513 proc_t p;
4514
4515 error = 0;
4516 if (UIO_SEG_IS_USER_SPACE(segflg)) {
4517 MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
4518 error = copyinstr(path_data, path, MAXPATHLEN, &dummy);
4519 } else {
4520 path = (char *)path_data;
4521 }
91447636 4522 if (error)
1c79356b 4523 goto out;
55e303ae 4524 AUDIT_ARG(text, path); /* This is the link string */
91447636 4525
fe8ab488
A
4526 NDINIT(&nd, CREATE, OP_SYMLINK, LOCKPARENT | AUDITVNPATH1,
4527 segflg, link, ctx);
4528
4529 error = nameiat(&nd, fd);
55e303ae 4530 if (error)
1c79356b 4531 goto out;
91447636
A
4532 dvp = nd.ni_dvp;
4533 vp = nd.ni_vp;
55e303ae 4534
fe8ab488 4535 p = vfs_context_proc(ctx);
2d21ac55
A
4536 VATTR_INIT(&va);
4537 VATTR_SET(&va, va_type, VLNK);
4538 VATTR_SET(&va, va_mode, ACCESSPERMS & ~p->p_fd->fd_cmask);
fe8ab488 4539
2d21ac55
A
4540#if CONFIG_MACF
4541 error = mac_vnode_check_create(ctx,
4542 dvp, &nd.ni_cnd, &va);
4543#endif
4544 if (error != 0) {
4545 goto skipit;
4546 }
91447636 4547
2d21ac55
A
4548 if (vp != NULL) {
4549 error = EEXIST;
4550 goto skipit;
4551 }
4552
4553 /* authorize */
4554 if (error == 0)
4555 error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
4556 /* get default ownership, etc. */
4557 if (error == 0)
4558 error = vnode_authattr_new(dvp, &va, 0, ctx);
4559 if (error == 0)
4560 error = VNOP_SYMLINK(dvp, &vp, &nd.ni_cnd, &va, path, ctx);
4561
39236c6e 4562#if CONFIG_MACF
3e170ce0 4563 if (error == 0 && vp)
39236c6e
A
4564 error = vnode_label(vnode_mount(vp), dvp, vp, &nd.ni_cnd, VNODE_LABEL_CREATE, ctx);
4565#endif
4566
2d21ac55 4567 /* do fallback attribute handling */
3e170ce0 4568 if (error == 0 && vp)
2d21ac55 4569 error = vnode_setattr_fallback(vp, &va, ctx);
39236c6e 4570
2d21ac55
A
4571 if (error == 0) {
4572 int update_flags = 0;
55e303ae 4573
3e170ce0 4574 /*check if a new vnode was created, else try to get one*/
2d21ac55
A
4575 if (vp == NULL) {
4576 nd.ni_cnd.cn_nameiop = LOOKUP;
6d2010ae
A
4577#if CONFIG_TRIGGERS
4578 nd.ni_op = OP_LOOKUP;
4579#endif
2d21ac55 4580 nd.ni_cnd.cn_flags = 0;
fe8ab488 4581 error = nameiat(&nd, fd);
2d21ac55 4582 vp = nd.ni_vp;
55e303ae 4583
2d21ac55
A
4584 if (vp == NULL)
4585 goto skipit;
4586 }
fe8ab488 4587
91447636 4588#if 0 /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
fe8ab488 4589 /* call out to allow 3rd party notification of rename.
2d21ac55
A
4590 * Ignore result of kauth_authorize_fileop call.
4591 */
4592 if (kauth_authorize_fileop_has_listeners() &&
4593 namei(&nd) == 0) {
4594 char *new_link_path = NULL;
4595 int len;
fe8ab488 4596
2d21ac55
A
4597 /* build the path to the new link file */
4598 new_link_path = get_pathbuff();
4599 len = MAXPATHLEN;
4600 vn_getpath(dvp, new_link_path, &len);
4601 if ((len + 1 + nd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) {
91447636 4602 new_link_path[len - 1] = '/';
2d21ac55 4603 strlcpy(&new_link_path[len], nd.ni_cnd.cn_nameptr, MAXPATHLEN-len);
91447636 4604 }
fe8ab488
A
4605
4606 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_SYMLINK,
2d21ac55
A
4607 (uintptr_t)path, (uintptr_t)new_link_path);
4608 if (new_link_path != NULL)
4609 release_pathbuff(new_link_path);
4610 }
fe8ab488 4611#endif
2d21ac55
A
4612 // Make sure the name & parent pointers are hooked up
4613 if (vp->v_name == NULL)
4614 update_flags |= VNODE_UPDATE_NAME;
4615 if (vp->v_parent == NULLVP)
4616 update_flags |= VNODE_UPDATE_PARENT;
fe8ab488 4617
2d21ac55
A
4618 if (update_flags)
4619 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
91447636 4620
2d21ac55
A
4621#if CONFIG_FSE
4622 add_fsevent(FSE_CREATE_FILE, ctx,
4623 FSE_ARG_VNODE, vp,
4624 FSE_ARG_DONE);
4625#endif
4626 }
91447636
A
4627
4628skipit:
4629 /*
4630 * nameidone has to happen before we vnode_put(dvp)
4631 * since it may need to release the fs_nodelock on the dvp
4632 */
4633 nameidone(&nd);
4634
4635 if (vp)
4636 vnode_put(vp);
4637 vnode_put(dvp);
1c79356b 4638out:
fe8ab488
A
4639 if (path && (path != (char *)path_data))
4640 FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
91447636 4641
1c79356b
A
4642 return (error);
4643}
4644
fe8ab488
A
4645int
4646symlink(__unused proc_t p, struct symlink_args *uap, __unused int32_t *retval)
4647{
4648 return (symlinkat_internal(vfs_context_current(), uap->path, AT_FDCWD,
4649 uap->link, UIO_USERSPACE));
4650}
4651
4652int
4653symlinkat(__unused proc_t p, struct symlinkat_args *uap,
4654 __unused int32_t *retval)
4655{
4656 return (symlinkat_internal(vfs_context_current(), uap->path1, uap->fd,
4657 uap->path2, UIO_USERSPACE));
4658}
4659
1c79356b
A
4660/*
4661 * Delete a whiteout from the filesystem.
fe8ab488 4662 * No longer supported.
1c79356b 4663 */
1c79356b 4664int
fe8ab488 4665undelete(__unused proc_t p, __unused struct undelete_args *uap, __unused int32_t *retval)
1c79356b 4666{
fe8ab488 4667 return (ENOTSUP);
1c79356b
A
4668}
4669
4670/*
4671 * Delete a name from the filesystem.
4672 */
1c79356b 4673/* ARGSUSED */
fe8ab488 4674static int
c18c124e
A
4675unlinkat_internal(vfs_context_t ctx, int fd, vnode_t start_dvp,
4676 user_addr_t path_arg, enum uio_seg segflg, int unlink_flags)
1c79356b 4677{
c18c124e 4678 struct nameidata nd;
91447636 4679 vnode_t vp, dvp;
1c79356b 4680 int error;
91447636 4681 struct componentname *cnp;
2d21ac55 4682 char *path = NULL;
b0d623f7
A
4683 int len=0;
4684#if CONFIG_FSE
2d21ac55 4685 fse_info finfo;
6d2010ae 4686 struct vnode_attr va;
b0d623f7 4687#endif
c18c124e
A
4688 int flags;
4689 int need_event;
4690 int has_listeners;
4691 int truncated_path;
6d2010ae 4692 int batched;
c18c124e
A
4693 struct vnode_attr *vap;
4694 int do_retry;
4695 int retry_count = 0;
4696 int cn_flags;
4697
4698 cn_flags = LOCKPARENT;
4699 if (!(unlink_flags & VNODE_REMOVE_NO_AUDIT_PATH))
4700 cn_flags |= AUDITVNPATH1;
4701 /* If a starting dvp is passed, it trumps any fd passed. */
4702 if (start_dvp)
4703 cn_flags |= USEDVP;
6d2010ae 4704
c910b4d9
A
4705#if NAMEDRSRCFORK
4706 /* unlink or delete is allowed on rsrc forks and named streams */
c18c124e 4707 cn_flags |= CN_ALLOWRSRCFORK;
c910b4d9
A
4708#endif
4709
c18c124e
A
4710retry:
4711 do_retry = 0;
4712 flags = 0;
4713 need_event = 0;
4714 has_listeners = 0;
4715 truncated_path = 0;
4716 vap = NULL;
4717
4718 NDINIT(&nd, DELETE, OP_UNLINK, cn_flags, segflg, path_arg, ctx);
4719
4720 nd.ni_dvp = start_dvp;
4721 nd.ni_flag |= NAMEI_COMPOUNDREMOVE;
4722 cnp = &nd.ni_cnd;
91447636 4723
813fb2f6 4724continue_lookup:
c18c124e 4725 error = nameiat(&nd, fd);
2d21ac55
A
4726 if (error)
4727 return (error);
b0d623f7 4728
c18c124e
A
4729 dvp = nd.ni_dvp;
4730 vp = nd.ni_vp;
91447636 4731
6d2010ae 4732
91447636 4733 /* With Carbon delete semantics, busy files cannot be deleted */
316670eb 4734 if (unlink_flags & VNODE_REMOVE_NODELETEBUSY) {
91447636 4735 flags |= VNODE_REMOVE_NODELETEBUSY;
2d21ac55 4736 }
39037602 4737
39236c6e 4738 /* Skip any potential upcalls if told to. */
316670eb
A
4739 if (unlink_flags & VNODE_REMOVE_SKIP_NAMESPACE_EVENT) {
4740 flags |= VNODE_REMOVE_SKIP_NAMESPACE_EVENT;
4741 }
4742
6d2010ae
A
4743 if (vp) {
4744 batched = vnode_compound_remove_available(vp);
4745 /*
4746 * The root of a mounted filesystem cannot be deleted.
4747 */
4748 if (vp->v_flag & VROOT) {
4749 error = EBUSY;
4750 }
2d21ac55 4751
00867663
A
4752#if DEVELOPMENT || DEBUG
4753 /*
4754 * XXX VSWAP: Check for entitlements or special flag here
4755 * so we can restrict access appropriately.
4756 */
4757#else /* DEVELOPMENT || DEBUG */
4758
4759 if (vnode_isswap(vp) && (ctx != vfs_context_kernel())) {
4760 error = EPERM;
4761 goto out;
4762 }
4763#endif /* DEVELOPMENT || DEBUG */
4764
6d2010ae
A
4765 if (!batched) {
4766 error = vn_authorize_unlink(dvp, vp, cnp, ctx, NULL);
4767 if (error) {
3e170ce0
A
4768 if (error == ENOENT) {
4769 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
4770 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
4771 do_retry = 1;
4772 retry_count++;
4773 }
c18c124e 4774 }
6d2010ae
A
4775 goto out;
4776 }
4777 }
4778 } else {
4779 batched = 1;
2d21ac55 4780
6d2010ae
A
4781 if (!vnode_compound_remove_available(dvp)) {
4782 panic("No vp, but no compound remove?");
4783 }
4784 }
2d21ac55 4785
2d21ac55
A
4786#if CONFIG_FSE
4787 need_event = need_fsevent(FSE_DELETE, dvp);
4788 if (need_event) {
6d2010ae
A
4789 if (!batched) {
4790 if ((vp->v_flag & VISHARDLINK) == 0) {
4791 /* XXX need to get these data in batched VNOP */
4792 get_fse_info(vp, &finfo, ctx);
4793 }
4794 } else {
4795 error = vfs_get_notify_attributes(&va);
4796 if (error) {
4797 goto out;
4798 }
4799
4800 vap = &va;
2d21ac55
A
4801 }
4802 }
4803#endif
4804 has_listeners = kauth_authorize_fileop_has_listeners();
4805 if (need_event || has_listeners) {
2d21ac55 4806 if (path == NULL) {
6d2010ae
A
4807 GET_PATH(path);
4808 if (path == NULL) {
4809 error = ENOMEM;
4810 goto out;
4811 }
2d21ac55 4812 }
c18c124e 4813 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated_path);
2d21ac55
A
4814 }
4815
4816#if NAMEDRSRCFORK
c18c124e 4817 if (nd.ni_cnd.cn_flags & CN_WANTSRSRCFORK)
2d21ac55
A
4818 error = vnode_removenamedstream(dvp, vp, XATTR_RESOURCEFORK_NAME, 0, ctx);
4819 else
4820#endif
6d2010ae 4821 {
c18c124e
A
4822 error = vn_remove(dvp, &nd.ni_vp, &nd, flags, vap, ctx);
4823 vp = nd.ni_vp;
6d2010ae
A
4824 if (error == EKEEPLOOKING) {
4825 if (!batched) {
4826 panic("EKEEPLOOKING, but not a filesystem that supports compound VNOPs?");
4827 }
4828
c18c124e 4829 if ((nd.ni_flag & NAMEI_CONTLOOKUP) == 0) {
6d2010ae
A
4830 panic("EKEEPLOOKING, but continue flag not set?");
4831 }
4832
4833 if (vnode_isdir(vp)) {
4834 error = EISDIR;
4835 goto out;
4836 }
813fb2f6 4837 goto continue_lookup;
3e170ce0
A
4838 } else if (error == ENOENT && batched) {
4839 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
4840 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
4841 /*
4842 * For compound VNOPs, the authorization callback may
4843 * return ENOENT in case of racing hardlink lookups
4844 * hitting the name cache, redrive the lookup.
4845 */
4846 do_retry = 1;
4847 retry_count += 1;
4848 goto out;
4849 }
6d2010ae
A
4850 }
4851 }
2d21ac55
A
4852
4853 /*
39037602 4854 * Call out to allow 3rd party notification of delete.
2d21ac55
A
4855 * Ignore result of kauth_authorize_fileop call.
4856 */
1c79356b 4857 if (!error) {
2d21ac55 4858 if (has_listeners) {
39037602
A
4859 kauth_authorize_fileop(vfs_context_ucred(ctx),
4860 KAUTH_FILEOP_DELETE,
2d21ac55
A
4861 (uintptr_t)vp,
4862 (uintptr_t)path);
4863 }
91447636 4864
2d21ac55
A
4865 if (vp->v_flag & VISHARDLINK) {
4866 //
4867 // if a hardlink gets deleted we want to blow away the
4868 // v_parent link because the path that got us to this
4869 // instance of the link is no longer valid. this will
4870 // force the next call to get the path to ask the file
4871 // system instead of just following the v_parent link.
4872 //
4873 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
91447636 4874 }
91447636 4875
2d21ac55
A
4876#if CONFIG_FSE
4877 if (need_event) {
4878 if (vp->v_flag & VISHARDLINK) {
4879 get_fse_info(vp, &finfo, ctx);
6d2010ae
A
4880 } else if (vap) {
4881 vnode_get_fse_info_from_vap(vp, &finfo, vap);
2d21ac55 4882 }
b0d623f7
A
4883 if (truncated_path) {
4884 finfo.mode |= FSE_TRUNCATED_PATH;
4885 }
2d21ac55
A
4886 add_fsevent(FSE_DELETE, ctx,
4887 FSE_ARG_STRING, len, path,
4888 FSE_ARG_FINFO, &finfo,
4889 FSE_ARG_DONE);
4890 }
4891#endif
1c79356b 4892 }
6d2010ae
A
4893
4894out:
2d21ac55
A
4895 if (path != NULL)
4896 RELEASE_PATH(path);
4897
c910b4d9 4898#if NAMEDRSRCFORK
39037602 4899 /* recycle the deleted rsrc fork vnode to force a reclaim, which
b0d623f7
A
4900 * will cause its shadow file to go away if necessary.
4901 */
6d2010ae
A
4902 if (vp && (vnode_isnamedstream(vp)) &&
4903 (vp->v_parent != NULLVP) &&
4904 vnode_isshadow(vp)) {
4905 vnode_recycle(vp);
39037602 4906 }
c910b4d9 4907#endif
6d2010ae
A
4908 /*
4909 * nameidone has to happen before we vnode_put(dvp)
4910 * since it may need to release the fs_nodelock on the dvp
4911 */
c18c124e 4912 nameidone(&nd);
91447636 4913 vnode_put(dvp);
6d2010ae
A
4914 if (vp) {
4915 vnode_put(vp);
4916 }
c18c124e
A
4917
4918 if (do_retry) {
4919 goto retry;
4920 }
4921
1c79356b
A
4922 return (error);
4923}
4924
fe8ab488 4925int
c18c124e
A
4926unlink1(vfs_context_t ctx, vnode_t start_dvp, user_addr_t path_arg,
4927 enum uio_seg segflg, int unlink_flags)
fe8ab488 4928{
c18c124e
A
4929 return (unlinkat_internal(ctx, AT_FDCWD, start_dvp, path_arg, segflg,
4930 unlink_flags));
fe8ab488
A
4931}
4932
1c79356b 4933/*
c18c124e 4934 * Delete a name from the filesystem using Carbon semantics.
1c79356b 4935 */
c18c124e
A
4936int
4937delete(__unused proc_t p, struct delete_args *uap, __unused int32_t *retval)
fe8ab488 4938{
c18c124e
A
4939 return (unlinkat_internal(vfs_context_current(), AT_FDCWD, NULLVP,
4940 uap->path, UIO_USERSPACE, VNODE_REMOVE_NODELETEBUSY));
fe8ab488
A
4941}
4942
c18c124e
A
4943/*
4944 * Delete a name from the filesystem using POSIX semantics.
4945 */
1c79356b 4946int
b0d623f7 4947unlink(__unused proc_t p, struct unlink_args *uap, __unused int32_t *retval)
1c79356b 4948{
c18c124e
A
4949 return (unlinkat_internal(vfs_context_current(), AT_FDCWD, NULLVP,
4950 uap->path, UIO_USERSPACE, 0));
fe8ab488 4951}
2d21ac55 4952
fe8ab488
A
4953int
4954unlinkat(__unused proc_t p, struct unlinkat_args *uap, __unused int32_t *retval)
4955{
4956 if (uap->flag & ~AT_REMOVEDIR)
4957 return (EINVAL);
4958
4959 if (uap->flag & AT_REMOVEDIR)
4960 return (rmdirat_internal(vfs_context_current(), uap->fd,
4961 uap->path, UIO_USERSPACE));
4962 else
4963 return (unlinkat_internal(vfs_context_current(), uap->fd,
c18c124e 4964 NULLVP, uap->path, UIO_USERSPACE, 0));
1c79356b
A
4965}
4966
4967/*
4968 * Reposition read/write file offset.
4969 */
1c79356b 4970int
2d21ac55 4971lseek(proc_t p, struct lseek_args *uap, off_t *retval)
1c79356b 4972{
91447636 4973 struct fileproc *fp;
2d21ac55
A
4974 vnode_t vp;
4975 struct vfs_context *ctx;
91447636 4976 off_t offset = uap->offset, file_size;
1c79356b
A
4977 int error;
4978
91447636
A
4979 if ( (error = fp_getfvp(p,uap->fd, &fp, &vp)) ) {
4980 if (error == ENOTSUP)
4981 return (ESPIPE);
1c79356b 4982 return (error);
55e303ae 4983 }
91447636
A
4984 if (vnode_isfifo(vp)) {
4985 file_drop(uap->fd);
4986 return(ESPIPE);
4987 }
2d21ac55
A
4988
4989
4990 ctx = vfs_context_current();
4991#if CONFIG_MACF
4992 if (uap->whence == L_INCR && uap->offset == 0)
4993 error = mac_file_check_get_offset(vfs_context_ucred(ctx),
4994 fp->f_fglob);
4995 else
4996 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
4997 fp->f_fglob);
4998 if (error) {
4999 file_drop(uap->fd);
5000 return (error);
5001 }
5002#endif
91447636
A
5003 if ( (error = vnode_getwithref(vp)) ) {
5004 file_drop(uap->fd);
5005 return(error);
5006 }
5007
1c79356b
A
5008 switch (uap->whence) {
5009 case L_INCR:
91447636 5010 offset += fp->f_fglob->fg_offset;
1c79356b
A
5011 break;
5012 case L_XTND:
2d21ac55 5013 if ((error = vnode_size(vp, &file_size, ctx)) != 0)
55e303ae 5014 break;
91447636 5015 offset += file_size;
1c79356b
A
5016 break;
5017 case L_SET:
1c79356b 5018 break;
813fb2f6 5019 case SEEK_HOLE:
5ba3f43e 5020 error = VNOP_IOCTL(vp, FSIOC_FIOSEEKHOLE, (caddr_t)&offset, 0, ctx);
813fb2f6
A
5021 break;
5022 case SEEK_DATA:
5ba3f43e 5023 error = VNOP_IOCTL(vp, FSIOC_FIOSEEKDATA, (caddr_t)&offset, 0, ctx);
813fb2f6 5024 break;
1c79356b 5025 default:
55e303ae 5026 error = EINVAL;
1c79356b 5027 }
55e303ae
A
5028 if (error == 0) {
5029 if (uap->offset > 0 && offset < 0) {
5030 /* Incremented/relative move past max size */
5031 error = EOVERFLOW;
5032 } else {
5033 /*
5034 * Allow negative offsets on character devices, per
5035 * POSIX 1003.1-2001. Most likely for writing disk
5036 * labels.
5037 */
5038 if (offset < 0 && vp->v_type != VCHR) {
5039 /* Decremented/relative move before start */
5040 error = EINVAL;
5041 } else {
5042 /* Success */
91447636
A
5043 fp->f_fglob->fg_offset = offset;
5044 *retval = fp->f_fglob->fg_offset;
55e303ae
A
5045 }
5046 }
5047 }
b0d623f7 5048
39037602 5049 /*
b0d623f7
A
5050 * An lseek can affect whether data is "available to read." Use
5051 * hint of NOTE_NONE so no EVFILT_VNODE events fire
5052 */
5053 post_event_if_success(vp, error, NOTE_NONE);
91447636
A
5054 (void)vnode_put(vp);
5055 file_drop(uap->fd);
55e303ae 5056 return (error);
1c79356b
A
5057}
5058
91447636 5059
1c79356b 5060/*
91447636 5061 * Check access permissions.
2d21ac55
A
5062 *
5063 * Returns: 0 Success
5064 * vnode_authorize:???
1c79356b 5065 */
91447636
A
5066static int
5067access1(vnode_t vp, vnode_t dvp, int uflags, vfs_context_t ctx)
1c79356b 5068{
91447636 5069 kauth_action_t action;
1c79356b
A
5070 int error;
5071
91447636
A
5072 /*
5073 * If just the regular access bits, convert them to something
5074 * that vnode_authorize will understand.
5075 */
5076 if (!(uflags & _ACCESS_EXTENDED_MASK)) {
5077 action = 0;
5078 if (uflags & R_OK)
5079 action |= KAUTH_VNODE_READ_DATA; /* aka KAUTH_VNODE_LIST_DIRECTORY */
5080 if (uflags & W_OK) {
5081 if (vnode_isdir(vp)) {
5082 action |= KAUTH_VNODE_ADD_FILE |
5083 KAUTH_VNODE_ADD_SUBDIRECTORY;
5084 /* might want delete rights here too */
5085 } else {
5086 action |= KAUTH_VNODE_WRITE_DATA;
5087 }
5088 }
5089 if (uflags & X_OK) {
5090 if (vnode_isdir(vp)) {
5091 action |= KAUTH_VNODE_SEARCH;
5092 } else {
5093 action |= KAUTH_VNODE_EXECUTE;
5094 }
5095 }
5096 } else {
5097 /* take advantage of definition of uflags */
5098 action = uflags >> 8;
5099 }
39037602 5100
2d21ac55
A
5101#if CONFIG_MACF
5102 error = mac_vnode_check_access(ctx, vp, uflags);
5103 if (error)
5104 return (error);
5105#endif /* MAC */
5106
91447636
A
5107 /* action == 0 means only check for existence */
5108 if (action != 0) {
5109 error = vnode_authorize(vp, dvp, action | KAUTH_VNODE_ACCESS, ctx);
5110 } else {
5111 error = 0;
5112 }
5113
5114 return(error);
1c79356b 5115}
1c79356b 5116
91447636
A
5117
5118
2d21ac55 5119/*
b0d623f7 5120 * access_extended: Check access permissions in bulk.
2d21ac55 5121 *
b0d623f7 5122 * Description: uap->entries Pointer to an array of accessx
39037602
A
5123 * descriptor structs, plus one or
5124 * more NULL terminated strings (see
b0d623f7
A
5125 * "Notes" section below).
5126 * uap->size Size of the area pointed to by
5127 * uap->entries.
5128 * uap->results Pointer to the results array.
2d21ac55
A
5129 *
5130 * Returns: 0 Success
5131 * ENOMEM Insufficient memory
5132 * EINVAL Invalid arguments
5133 * namei:EFAULT Bad address
5134 * namei:ENAMETOOLONG Filename too long
5135 * namei:ENOENT No such file or directory
5136 * namei:ELOOP Too many levels of symbolic links
5137 * namei:EBADF Bad file descriptor
5138 * namei:ENOTDIR Not a directory
5139 * namei:???
5140 * access1:
5141 *
5142 * Implicit returns:
5143 * uap->results Array contents modified
5144 *
5145 * Notes: The uap->entries are structured as an arbitrary length array
b0d623f7 5146 * of accessx descriptors, followed by one or more NULL terminated
2d21ac55
A
5147 * strings
5148 *
5149 * struct accessx_descriptor[0]
5150 * ...
5151 * struct accessx_descriptor[n]
5152 * char name_data[0];
5153 *
5154 * We determine the entry count by walking the buffer containing
b0d623f7 5155 * the uap->entries argument descriptor. For each descriptor we
2d21ac55
A
5156 * see, the valid values for the offset ad_name_offset will be
5157 * in the byte range:
5158 *
5159 * [ uap->entries + sizeof(struct accessx_descriptor) ]
5160 * to
5161 * [ uap->entries + uap->size - 2 ]
5162 *
5163 * since we must have at least one string, and the string must
b0d623f7 5164 * be at least one character plus the NULL terminator in length.
39037602 5165 *
2d21ac55
A
5166 * XXX: Need to support the check-as uid argument
5167 */
1c79356b 5168int
b0d623f7 5169access_extended(__unused proc_t p, struct access_extended_args *uap, __unused int32_t *retval)
1c79356b 5170{
2d21ac55
A
5171 struct accessx_descriptor *input = NULL;
5172 errno_t *result = NULL;
5173 errno_t error = 0;
5174 int wantdelete = 0;
5175 unsigned int desc_max, desc_actual, i, j;
91447636 5176 struct vfs_context context;
1c79356b 5177 struct nameidata nd;
91447636 5178 int niopts;
2d21ac55
A
5179 vnode_t vp = NULL;
5180 vnode_t dvp = NULL;
5181#define ACCESSX_MAX_DESCR_ON_STACK 10
5182 struct accessx_descriptor stack_input[ACCESSX_MAX_DESCR_ON_STACK];
91447636 5183
91447636
A
5184 context.vc_ucred = NULL;
5185
2d21ac55
A
5186 /*
5187 * Validate parameters; if valid, copy the descriptor array and string
5188 * arguments into local memory. Before proceeding, the following
5189 * conditions must have been met:
5190 *
5191 * o The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
5192 * o There must be sufficient room in the request for at least one
5193 * descriptor and a one yte NUL terminated string.
5194 * o The allocation of local storage must not fail.
5195 */
91447636
A
5196 if (uap->size > ACCESSX_MAX_TABLESIZE)
5197 return(ENOMEM);
2d21ac55 5198 if (uap->size < (sizeof(struct accessx_descriptor) + 2))
91447636 5199 return(EINVAL);
2d21ac55
A
5200 if (uap->size <= sizeof (stack_input)) {
5201 input = stack_input;
5202 } else {
91447636
A
5203 MALLOC(input, struct accessx_descriptor *, uap->size, M_TEMP, M_WAITOK);
5204 if (input == NULL) {
5205 error = ENOMEM;
5206 goto out;
5207 }
2d21ac55 5208 }
91447636 5209 error = copyin(uap->entries, input, uap->size);
55e303ae 5210 if (error)
91447636 5211 goto out;
1c79356b 5212
b0d623f7
A
5213 AUDIT_ARG(opaque, input, uap->size);
5214
91447636 5215 /*
2d21ac55
A
5216 * Force NUL termination of the copyin buffer to avoid nami() running
5217 * off the end. If the caller passes us bogus data, they may get a
5218 * bogus result.
5219 */
5220 ((char *)input)[uap->size - 1] = 0;
5221
5222 /*
5223 * Access is defined as checking against the process' real identity,
5224 * even if operations are checking the effective identity. This
5225 * requires that we use a local vfs context.
91447636
A
5226 */
5227 context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
2d21ac55 5228 context.vc_thread = current_thread();
91447636
A
5229
5230 /*
2d21ac55
A
5231 * Find out how many entries we have, so we can allocate the result
5232 * array by walking the list and adjusting the count downward by the
5233 * earliest string offset we see.
91447636 5234 */
2d21ac55
A
5235 desc_max = (uap->size - 2) / sizeof(struct accessx_descriptor);
5236 desc_actual = desc_max;
5237 for (i = 0; i < desc_actual; i++) {
91447636 5238 /*
2d21ac55
A
5239 * Take the offset to the name string for this entry and
5240 * convert to an input array index, which would be one off
5241 * the end of the array if this entry was the lowest-addressed
5242 * name string.
91447636
A
5243 */
5244 j = input[i].ad_name_offset / sizeof(struct accessx_descriptor);
2d21ac55
A
5245
5246 /*
5247 * An offset greater than the max allowable offset is an error.
5248 * It is also an error for any valid entry to point
5249 * to a location prior to the end of the current entry, if
5250 * it's not a reference to the string of the previous entry.
5251 */
5252 if (j > desc_max || (j != 0 && j <= i)) {
91447636
A
5253 error = EINVAL;
5254 goto out;
5255 }
2d21ac55 5256
39037602
A
5257 /* Also do not let ad_name_offset point to something beyond the size of the input */
5258 if (input[i].ad_name_offset >= uap->size) {
5259 error = EINVAL;
5260 goto out;
5261 }
5262
2d21ac55
A
5263 /*
5264 * An offset of 0 means use the previous descriptor's offset;
5265 * this is used to chain multiple requests for the same file
5266 * to avoid multiple lookups.
5267 */
91447636 5268 if (j == 0) {
2d21ac55 5269 /* This is not valid for the first entry */
91447636
A
5270 if (i == 0) {
5271 error = EINVAL;
5272 goto out;
5273 }
5274 continue;
5275 }
2d21ac55
A
5276
5277 /*
5278 * If the offset of the string for this descriptor is before
5279 * what we believe is the current actual last descriptor,
5280 * then we need to adjust our estimate downward; this permits
5281 * the string table following the last descriptor to be out
5282 * of order relative to the descriptor list.
5283 */
5284 if (j < desc_actual)
5285 desc_actual = j;
91447636 5286 }
2d21ac55
A
5287
5288 /*
5289 * We limit the actual number of descriptors we are willing to process
5290 * to a hard maximum of ACCESSX_MAX_DESCRIPTORS. If the number being
5291 * requested does not exceed this limit,
5292 */
5293 if (desc_actual > ACCESSX_MAX_DESCRIPTORS) {
91447636
A
5294 error = ENOMEM;
5295 goto out;
5296 }
2d21ac55 5297 MALLOC(result, errno_t *, desc_actual * sizeof(errno_t), M_TEMP, M_WAITOK);
91447636
A
5298 if (result == NULL) {
5299 error = ENOMEM;
5300 goto out;
5301 }
5302
5303 /*
2d21ac55
A
5304 * Do the work by iterating over the descriptor entries we know to
5305 * at least appear to contain valid data.
91447636
A
5306 */
5307 error = 0;
2d21ac55 5308 for (i = 0; i < desc_actual; i++) {
91447636 5309 /*
2d21ac55
A
5310 * If the ad_name_offset is 0, then we use the previous
5311 * results to make the check; otherwise, we are looking up
5312 * a new file name.
91447636
A
5313 */
5314 if (input[i].ad_name_offset != 0) {
5315 /* discard old vnodes */
5316 if (vp) {
5317 vnode_put(vp);
5318 vp = NULL;
5319 }
5320 if (dvp) {
5321 vnode_put(dvp);
5322 dvp = NULL;
5323 }
39037602 5324
2d21ac55
A
5325 /*
5326 * Scan forward in the descriptor list to see if we
5327 * need the parent vnode. We will need it if we are
5328 * deleting, since we must have rights to remove
5329 * entries in the parent directory, as well as the
5330 * rights to delete the object itself.
5331 */
91447636 5332 wantdelete = input[i].ad_flags & _DELETE_OK;
2d21ac55 5333 for (j = i + 1; (j < desc_actual) && (input[j].ad_name_offset == 0); j++)
91447636
A
5334 if (input[j].ad_flags & _DELETE_OK)
5335 wantdelete = 1;
39037602 5336
91447636 5337 niopts = FOLLOW | AUDITVNPATH1;
2d21ac55 5338
91447636
A
5339 /* need parent for vnode_authorize for deletion test */
5340 if (wantdelete)
5341 niopts |= WANTPARENT;
5342
5343 /* do the lookup */
6d2010ae
A
5344 NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, UIO_SYSSPACE,
5345 CAST_USER_ADDR_T(((const char *)input) + input[i].ad_name_offset),
5346 &context);
91447636
A
5347 error = namei(&nd);
5348 if (!error) {
5349 vp = nd.ni_vp;
5350 if (wantdelete)
5351 dvp = nd.ni_dvp;
5352 }
5353 nameidone(&nd);
5354 }
5355
5356 /*
5357 * Handle lookup errors.
5358 */
5359 switch(error) {
5360 case ENOENT:
5361 case EACCES:
5362 case EPERM:
5363 case ENOTDIR:
5364 result[i] = error;
5365 break;
5366 case 0:
5367 /* run this access check */
5368 result[i] = access1(vp, dvp, input[i].ad_flags, &context);
5369 break;
5370 default:
5371 /* fatal lookup error */
5372
5373 goto out;
5374 }
5375 }
5376
b0d623f7
A
5377 AUDIT_ARG(data, result, sizeof(errno_t), desc_actual);
5378
91447636 5379 /* copy out results */
2d21ac55 5380 error = copyout(result, uap->results, desc_actual * sizeof(errno_t));
39037602 5381
91447636 5382out:
2d21ac55 5383 if (input && input != stack_input)
91447636
A
5384 FREE(input, M_TEMP);
5385 if (result)
5386 FREE(result, M_TEMP);
5387 if (vp)
5388 vnode_put(vp);
5389 if (dvp)
5390 vnode_put(dvp);
0c530ab8
A
5391 if (IS_VALID_CRED(context.vc_ucred))
5392 kauth_cred_unref(&context.vc_ucred);
91447636 5393 return(error);
1c79356b
A
5394}
5395
2d21ac55
A
5396
5397/*
5398 * Returns: 0 Success
5399 * namei:EFAULT Bad address
5400 * namei:ENAMETOOLONG Filename too long
5401 * namei:ENOENT No such file or directory
5402 * namei:ELOOP Too many levels of symbolic links
5403 * namei:EBADF Bad file descriptor
5404 * namei:ENOTDIR Not a directory
5405 * namei:???
5406 * access1:
5407 */
fe8ab488
A
5408static int
5409faccessat_internal(vfs_context_t ctx, int fd, user_addr_t path, int amode,
5410 int flag, enum uio_seg segflg)
1c79356b 5411{
1c79356b
A
5412 int error;
5413 struct nameidata nd;
91447636
A
5414 int niopts;
5415 struct vfs_context context;
cf7d32b8
A
5416#if NAMEDRSRCFORK
5417 int is_namedstream = 0;
5418#endif
5419
91447636 5420 /*
fe8ab488
A
5421 * Unless the AT_EACCESS option is used, Access is defined as checking
5422 * against the process' real identity, even if operations are checking
5423 * the effective identity. So we need to tweak the credential
5424 * in the context for that case.
91447636 5425 */
fe8ab488
A
5426 if (!(flag & AT_EACCESS))
5427 context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
5428 else
5429 context.vc_ucred = ctx->vc_ucred;
5430 context.vc_thread = ctx->vc_thread;
5431
91447636
A
5432
5433 niopts = FOLLOW | AUDITVNPATH1;
5434 /* need parent for vnode_authorize for deletion test */
fe8ab488 5435 if (amode & _DELETE_OK)
91447636 5436 niopts |= WANTPARENT;
fe8ab488
A
5437 NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, segflg,
5438 path, &context);
2d21ac55
A
5439
5440#if NAMEDRSRCFORK
5441 /* access(F_OK) calls are allowed for resource forks. */
fe8ab488 5442 if (amode == F_OK)
2d21ac55
A
5443 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
5444#endif
fe8ab488 5445 error = nameiat(&nd, fd);
91447636
A
5446 if (error)
5447 goto out;
5448
cf7d32b8 5449#if NAMEDRSRCFORK
39037602 5450 /* Grab reference on the shadow stream file vnode to
b0d623f7
A
5451 * force an inactive on release which will mark it
5452 * for recycle.
cf7d32b8
A
5453 */
5454 if (vnode_isnamedstream(nd.ni_vp) &&
b0d623f7
A
5455 (nd.ni_vp->v_parent != NULLVP) &&
5456 vnode_isshadow(nd.ni_vp)) {
cf7d32b8
A
5457 is_namedstream = 1;
5458 vnode_ref(nd.ni_vp);
5459 }
5460#endif
5461
fe8ab488 5462 error = access1(nd.ni_vp, nd.ni_dvp, amode, &context);
b0d623f7 5463
cf7d32b8
A
5464#if NAMEDRSRCFORK
5465 if (is_namedstream) {
5466 vnode_rele(nd.ni_vp);
5467 }
5468#endif
5469
91447636 5470 vnode_put(nd.ni_vp);
fe8ab488 5471 if (amode & _DELETE_OK)
91447636
A
5472 vnode_put(nd.ni_dvp);
5473 nameidone(&nd);
39037602 5474
91447636 5475out:
fe8ab488
A
5476 if (!(flag & AT_EACCESS))
5477 kauth_cred_unref(&context.vc_ucred);
5478 return (error);
5479}
5480
5481int
5482access(__unused proc_t p, struct access_args *uap, __unused int32_t *retval)
5483{
5484 return (faccessat_internal(vfs_context_current(), AT_FDCWD,
5485 uap->path, uap->flags, 0, UIO_USERSPACE));
91447636
A
5486}
5487
fe8ab488
A
5488int
5489faccessat(__unused proc_t p, struct faccessat_args *uap,
5490 __unused int32_t *retval)
5491{
5492 if (uap->flag & ~AT_EACCESS)
5493 return (EINVAL);
5494
5495 return (faccessat_internal(vfs_context_current(), uap->fd,
5496 uap->path, uap->amode, uap->flag, UIO_USERSPACE));
5497}
91447636 5498
2d21ac55
A
5499/*
5500 * Returns: 0 Success
5501 * EFAULT
5502 * copyout:EFAULT
5503 * namei:???
5504 * vn_stat:???
5505 */
91447636 5506static int
fe8ab488
A
5507fstatat_internal(vfs_context_t ctx, user_addr_t path, user_addr_t ub,
5508 user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64,
5509 enum uio_seg segflg, int fd, int flag)
91447636 5510{
fe8ab488
A
5511 struct nameidata nd;
5512 int follow;
b0d623f7
A
5513 union {
5514 struct stat sb;
5515 struct stat64 sb64;
527f9951 5516 } source = {};
b0d623f7
A
5517 union {
5518 struct user64_stat user64_sb;
5519 struct user32_stat user32_sb;
5520 struct user64_stat64 user64_sb64;
5521 struct user32_stat64 user32_sb64;
527f9951 5522 } dest = {};
91447636
A
5523 caddr_t sbp;
5524 int error, my_size;
5525 kauth_filesec_t fsec;
5526 size_t xsecurity_bufsize;
2d21ac55 5527 void * statptr;
1c79356b 5528
fe8ab488
A
5529 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
5530 NDINIT(&nd, LOOKUP, OP_GETATTR, follow | AUDITVNPATH1,
5531 segflg, path, ctx);
5532
2d21ac55 5533#if NAMEDRSRCFORK
cf7d32b8 5534 int is_namedstream = 0;
2d21ac55 5535 /* stat calls are allowed for resource forks. */
fe8ab488 5536 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
2d21ac55 5537#endif
fe8ab488 5538 error = nameiat(&nd, fd);
91447636 5539 if (error)
1c79356b 5540 return (error);
91447636 5541 fsec = KAUTH_FILESEC_NONE;
b0d623f7
A
5542
5543 statptr = (void *)&source;
cf7d32b8
A
5544
5545#if NAMEDRSRCFORK
39037602
A
5546 /* Grab reference on the shadow stream file vnode to
5547 * force an inactive on release which will mark it
b0d623f7 5548 * for recycle.
cf7d32b8 5549 */
fe8ab488
A
5550 if (vnode_isnamedstream(nd.ni_vp) &&
5551 (nd.ni_vp->v_parent != NULLVP) &&
5552 vnode_isshadow(nd.ni_vp)) {
cf7d32b8 5553 is_namedstream = 1;
fe8ab488 5554 vnode_ref(nd.ni_vp);
cf7d32b8
A
5555 }
5556#endif
5557
fe8ab488 5558 error = vn_stat(nd.ni_vp, statptr, (xsecurity != USER_ADDR_NULL ? &fsec : NULL), isstat64, ctx);
2d21ac55
A
5559
5560#if NAMEDRSRCFORK
cf7d32b8 5561 if (is_namedstream) {
fe8ab488 5562 vnode_rele(nd.ni_vp);
2d21ac55
A
5563 }
5564#endif
fe8ab488
A
5565 vnode_put(nd.ni_vp);
5566 nameidone(&nd);
91447636 5567
1c79356b
A
5568 if (error)
5569 return (error);
91447636 5570 /* Zap spare fields */
2d21ac55 5571 if (isstat64 != 0) {
b0d623f7
A
5572 source.sb64.st_lspare = 0;
5573 source.sb64.st_qspare[0] = 0LL;
5574 source.sb64.st_qspare[1] = 0LL;
2d21ac55 5575 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
39037602 5576 munge_user64_stat64(&source.sb64, &dest.user64_sb64);
b0d623f7
A
5577 my_size = sizeof(dest.user64_sb64);
5578 sbp = (caddr_t)&dest.user64_sb64;
2d21ac55 5579 } else {
39037602 5580 munge_user32_stat64(&source.sb64, &dest.user32_sb64);
b0d623f7
A
5581 my_size = sizeof(dest.user32_sb64);
5582 sbp = (caddr_t)&dest.user32_sb64;
2d21ac55
A
5583 }
5584 /*
5585 * Check if we raced (post lookup) against the last unlink of a file.
5586 */
b0d623f7
A
5587 if ((source.sb64.st_nlink == 0) && S_ISREG(source.sb64.st_mode)) {
5588 source.sb64.st_nlink = 1;
2d21ac55
A
5589 }
5590 } else {
b0d623f7
A
5591 source.sb.st_lspare = 0;
5592 source.sb.st_qspare[0] = 0LL;
5593 source.sb.st_qspare[1] = 0LL;
2d21ac55 5594 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
39037602 5595 munge_user64_stat(&source.sb, &dest.user64_sb);
b0d623f7
A
5596 my_size = sizeof(dest.user64_sb);
5597 sbp = (caddr_t)&dest.user64_sb;
2d21ac55 5598 } else {
39037602 5599 munge_user32_stat(&source.sb, &dest.user32_sb);
b0d623f7
A
5600 my_size = sizeof(dest.user32_sb);
5601 sbp = (caddr_t)&dest.user32_sb;
2d21ac55
A
5602 }
5603
5604 /*
5605 * Check if we raced (post lookup) against the last unlink of a file.
5606 */
b0d623f7
A
5607 if ((source.sb.st_nlink == 0) && S_ISREG(source.sb.st_mode)) {
5608 source.sb.st_nlink = 1;
2d21ac55 5609 }
91447636
A
5610 }
5611 if ((error = copyout(sbp, ub, my_size)) != 0)
5612 goto out;
5613
5614 /* caller wants extended security information? */
5615 if (xsecurity != USER_ADDR_NULL) {
5616
5617 /* did we get any? */
5618 if (fsec == KAUTH_FILESEC_NONE) {
5619 if (susize(xsecurity_size, 0) != 0) {
5620 error = EFAULT;
5621 goto out;
5622 }
5623 } else {
5624 /* find the user buffer size */
5625 xsecurity_bufsize = fusize(xsecurity_size);
5626
5627 /* copy out the actual data size */
5628 if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
5629 error = EFAULT;
5630 goto out;
5631 }
5632
5633 /* if the caller supplied enough room, copy out to it */
5634 if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec))
5635 error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
5636 }
5637 }
5638out:
5639 if (fsec != KAUTH_FILESEC_NONE)
5640 kauth_filesec_free(fsec);
1c79356b
A
5641 return (error);
5642}
5643
b0d623f7
A
5644/*
5645 * stat_extended: Get file status; with extended security (ACL).
5646 *
5647 * Parameters: p (ignored)
5648 * uap User argument descriptor (see below)
39037602 5649 * retval (ignored)
b0d623f7
A
5650 *
5651 * Indirect: uap->path Path of file to get status from
5652 * uap->ub User buffer (holds file status info)
5653 * uap->xsecurity ACL to get (extended security)
5654 * uap->xsecurity_size Size of ACL
39037602 5655 *
b0d623f7
A
5656 * Returns: 0 Success
5657 * !0 errno value
5658 *
5659 */
2d21ac55 5660int
fe8ab488
A
5661stat_extended(__unused proc_t p, struct stat_extended_args *uap,
5662 __unused int32_t *retval)
2d21ac55 5663{
fe8ab488
A
5664 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5665 uap->xsecurity, uap->xsecurity_size, 0, UIO_USERSPACE, AT_FDCWD,
5666 0));
1c79356b
A
5667}
5668
2d21ac55
A
5669/*
5670 * Returns: 0 Success
fe8ab488 5671 * fstatat_internal:??? [see fstatat_internal() in this file]
2d21ac55 5672 */
91447636 5673int
b0d623f7 5674stat(__unused proc_t p, struct stat_args *uap, __unused int32_t *retval)
1c79356b 5675{
fe8ab488
A
5676 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5677 0, 0, 0, UIO_USERSPACE, AT_FDCWD, 0));
91447636 5678}
1c79356b 5679
91447636 5680int
b0d623f7 5681stat64(__unused proc_t p, struct stat64_args *uap, __unused int32_t *retval)
91447636 5682{
fe8ab488
A
5683 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5684 0, 0, 1, UIO_USERSPACE, AT_FDCWD, 0));
1c79356b 5685}
1c79356b 5686
b0d623f7
A
5687/*
5688 * stat64_extended: Get file status; can handle large inode numbers; with extended security (ACL).
5689 *
5690 * Parameters: p (ignored)
5691 * uap User argument descriptor (see below)
39037602 5692 * retval (ignored)
b0d623f7
A
5693 *
5694 * Indirect: uap->path Path of file to get status from
5695 * uap->ub User buffer (holds file status info)
5696 * uap->xsecurity ACL to get (extended security)
5697 * uap->xsecurity_size Size of ACL
39037602 5698 *
b0d623f7
A
5699 * Returns: 0 Success
5700 * !0 errno value
5701 *
5702 */
2d21ac55 5703int
b0d623f7 5704stat64_extended(__unused proc_t p, struct stat64_extended_args *uap, __unused int32_t *retval)
2d21ac55 5705{
fe8ab488
A
5706 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5707 uap->xsecurity, uap->xsecurity_size, 1, UIO_USERSPACE, AT_FDCWD,
5708 0));
2d21ac55 5709}
91447636 5710
b0d623f7
A
5711/*
5712 * lstat_extended: Get file status; does not follow links; with extended security (ACL).
5713 *
5714 * Parameters: p (ignored)
5715 * uap User argument descriptor (see below)
39037602 5716 * retval (ignored)
b0d623f7
A
5717 *
5718 * Indirect: uap->path Path of file to get status from
5719 * uap->ub User buffer (holds file status info)
5720 * uap->xsecurity ACL to get (extended security)
5721 * uap->xsecurity_size Size of ACL
39037602 5722 *
b0d623f7
A
5723 * Returns: 0 Success
5724 * !0 errno value
5725 *
5726 */
2d21ac55 5727int
b0d623f7 5728lstat_extended(__unused proc_t p, struct lstat_extended_args *uap, __unused int32_t *retval)
2d21ac55 5729{
fe8ab488
A
5730 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5731 uap->xsecurity, uap->xsecurity_size, 0, UIO_USERSPACE, AT_FDCWD,
5732 AT_SYMLINK_NOFOLLOW));
91447636
A
5733}
5734
fe8ab488
A
5735/*
5736 * Get file status; this version does not follow links.
5737 */
91447636 5738int
b0d623f7 5739lstat(__unused proc_t p, struct lstat_args *uap, __unused int32_t *retval)
91447636 5740{
fe8ab488
A
5741 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5742 0, 0, 0, UIO_USERSPACE, AT_FDCWD, AT_SYMLINK_NOFOLLOW));
2d21ac55 5743}
b0d623f7 5744
2d21ac55 5745int
b0d623f7 5746lstat64(__unused proc_t p, struct lstat64_args *uap, __unused int32_t *retval)
2d21ac55 5747{
fe8ab488
A
5748 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5749 0, 0, 1, UIO_USERSPACE, AT_FDCWD, AT_SYMLINK_NOFOLLOW));
91447636
A
5750}
5751
b0d623f7
A
5752/*
5753 * lstat64_extended: Get file status; can handle large inode numbers; does not
5754 * follow links; with extended security (ACL).
5755 *
5756 * Parameters: p (ignored)
5757 * uap User argument descriptor (see below)
39037602 5758 * retval (ignored)
b0d623f7
A
5759 *
5760 * Indirect: uap->path Path of file to get status from
5761 * uap->ub User buffer (holds file status info)
5762 * uap->xsecurity ACL to get (extended security)
5763 * uap->xsecurity_size Size of ACL
39037602 5764 *
b0d623f7
A
5765 * Returns: 0 Success
5766 * !0 errno value
5767 *
5768 */
91447636 5769int
b0d623f7 5770lstat64_extended(__unused proc_t p, struct lstat64_extended_args *uap, __unused int32_t *retval)
91447636 5771{
fe8ab488
A
5772 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5773 uap->xsecurity, uap->xsecurity_size, 1, UIO_USERSPACE, AT_FDCWD,
5774 AT_SYMLINK_NOFOLLOW));
5775}
5776
5777int
5778fstatat(__unused proc_t p, struct fstatat_args *uap, __unused int32_t *retval)
5779{
5780 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
5781 return (EINVAL);
5782
5783 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5784 0, 0, 0, UIO_USERSPACE, uap->fd, uap->flag));
5785}
5786
5787int
5788fstatat64(__unused proc_t p, struct fstatat64_args *uap,
5789 __unused int32_t *retval)
5790{
5791 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
5792 return (EINVAL);
5793
5794 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5795 0, 0, 1, UIO_USERSPACE, uap->fd, uap->flag));
91447636
A
5796}
5797
1c79356b 5798/*
91447636 5799 * Get configurable pathname variables.
2d21ac55
A
5800 *
5801 * Returns: 0 Success
5802 * namei:???
5803 * vn_pathconf:???
5804 *
5805 * Notes: Global implementation constants are intended to be
5806 * implemented in this function directly; all other constants
5807 * are per-FS implementation, and therefore must be handled in
5808 * each respective FS, instead.
5809 *
5810 * XXX We implement some things globally right now that should actually be
5811 * XXX per-FS; we will need to deal with this at some point.
1c79356b 5812 */
1c79356b
A
5813/* ARGSUSED */
5814int
b0d623f7 5815pathconf(__unused proc_t p, struct pathconf_args *uap, int32_t *retval)
1c79356b 5816{
1c79356b
A
5817 int error;
5818 struct nameidata nd;
2d21ac55 5819 vfs_context_t ctx = vfs_context_current();
91447636 5820
39037602 5821 NDINIT(&nd, LOOKUP, OP_PATHCONF, FOLLOW | AUDITVNPATH1,
2d21ac55 5822 UIO_USERSPACE, uap->path, ctx);
55e303ae
A
5823 error = namei(&nd);
5824 if (error)
1c79356b 5825 return (error);
1c79356b 5826
2d21ac55 5827 error = vn_pathconf(nd.ni_vp, uap->name, retval, ctx);
1c79356b 5828
91447636
A
5829 vnode_put(nd.ni_vp);
5830 nameidone(&nd);
1c79356b
A
5831 return (error);
5832}
5833
5834/*
5835 * Return target name of a symbolic link.
5836 */
1c79356b 5837/* ARGSUSED */
fe8ab488
A
5838static int
5839readlinkat_internal(vfs_context_t ctx, int fd, user_addr_t path,
5840 enum uio_seg seg, user_addr_t buf, size_t bufsize, enum uio_seg bufseg,
5841 int *retval)
1c79356b 5842{
2d21ac55 5843 vnode_t vp;
91447636 5844 uio_t auio;
1c79356b
A
5845 int error;
5846 struct nameidata nd;
91447636
A
5847 char uio_buf[ UIO_SIZEOF(1) ];
5848
fe8ab488
A
5849 NDINIT(&nd, LOOKUP, OP_READLINK, NOFOLLOW | AUDITVNPATH1,
5850 seg, path, ctx);
5851
5852 error = nameiat(&nd, fd);
55e303ae 5853 if (error)
1c79356b
A
5854 return (error);
5855 vp = nd.ni_vp;
91447636
A
5856
5857 nameidone(&nd);
5858
fe8ab488
A
5859 auio = uio_createwithbuffer(1, 0, bufseg, UIO_READ,
5860 &uio_buf[0], sizeof(uio_buf));
5861 uio_addiov(auio, buf, bufsize);
5862 if (vp->v_type != VLNK) {
1c79356b 5863 error = EINVAL;
fe8ab488 5864 } else {
2d21ac55 5865#if CONFIG_MACF
fe8ab488 5866 error = mac_vnode_check_readlink(ctx, vp);
2d21ac55
A
5867#endif
5868 if (error == 0)
fe8ab488
A
5869 error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA,
5870 ctx);
91447636 5871 if (error == 0)
2d21ac55 5872 error = VNOP_READLINK(vp, auio, ctx);
91447636
A
5873 }
5874 vnode_put(vp);
b0d623f7 5875
fe8ab488 5876 *retval = bufsize - (int)uio_resid(auio);
1c79356b
A
5877 return (error);
5878}
5879
fe8ab488
A
5880int
5881readlink(proc_t p, struct readlink_args *uap, int32_t *retval)
5882{
5883 enum uio_seg procseg;
5884
5885 procseg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
5886 return (readlinkat_internal(vfs_context_current(), AT_FDCWD,
5887 CAST_USER_ADDR_T(uap->path), procseg, CAST_USER_ADDR_T(uap->buf),
5888 uap->count, procseg, retval));
5889}
5890
5891int
5892readlinkat(proc_t p, struct readlinkat_args *uap, int32_t *retval)
5893{
5894 enum uio_seg procseg;
5895
5896 procseg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
5897 return (readlinkat_internal(vfs_context_current(), uap->fd, uap->path,
5898 procseg, uap->buf, uap->bufsize, procseg, retval));
5899}
5900
5901/*
5902 * Change file flags.
813fb2f6
A
5903 *
5904 * NOTE: this will vnode_put() `vp'
91447636
A
5905 */
5906static int
5907chflags1(vnode_t vp, int flags, vfs_context_t ctx)
5908{
5909 struct vnode_attr va;
5910 kauth_action_t action;
5911 int error;
5912
5913 VATTR_INIT(&va);
5914 VATTR_SET(&va, va_flags, flags);
5915
2d21ac55
A
5916#if CONFIG_MACF
5917 error = mac_vnode_check_setflags(ctx, vp, flags);
5918 if (error)
5919 goto out;
5920#endif
5921
91447636
A
5922 /* request authorisation, disregard immutability */
5923 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
5924 goto out;
5925 /*
5926 * Request that the auth layer disregard those file flags it's allowed to when
5927 * authorizing this operation; we need to do this in order to be able to
5928 * clear immutable flags.
5929 */
5930 if (action && ((error = vnode_authorize(vp, NULL, action | KAUTH_VNODE_NOIMMUTABLE, ctx)) != 0))
5931 goto out;
5932 error = vnode_setattr(vp, &va, ctx);
5933
39037602
A
5934#if CONFIG_MACF
5935 if (error == 0)
5936 mac_vnode_notify_setflags(ctx, vp, flags);
5937#endif
5938
2d21ac55
A
5939 if ((error == 0) && !VATTR_IS_SUPPORTED(&va, va_flags)) {
5940 error = ENOTSUP;
5941 }
91447636
A
5942out:
5943 vnode_put(vp);
5944 return(error);
5945}
5946
1c79356b
A
5947/*
5948 * Change flags of a file given a path name.
5949 */
1c79356b
A
5950/* ARGSUSED */
5951int
b0d623f7 5952chflags(__unused proc_t p, struct chflags_args *uap, __unused int32_t *retval)
1c79356b 5953{
2d21ac55
A
5954 vnode_t vp;
5955 vfs_context_t ctx = vfs_context_current();
1c79356b
A
5956 int error;
5957 struct nameidata nd;
5958
55e303ae 5959 AUDIT_ARG(fflags, uap->flags);
39037602 5960 NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
2d21ac55 5961 UIO_USERSPACE, uap->path, ctx);
55e303ae
A
5962 error = namei(&nd);
5963 if (error)
1c79356b
A
5964 return (error);
5965 vp = nd.ni_vp;
91447636
A
5966 nameidone(&nd);
5967
813fb2f6 5968 /* we don't vnode_put() here because chflags1 does internally */
2d21ac55 5969 error = chflags1(vp, uap->flags, ctx);
91447636
A
5970
5971 return(error);
1c79356b
A
5972}
5973
5974/*
5975 * Change flags of a file given a file descriptor.
5976 */
1c79356b
A
5977/* ARGSUSED */
5978int
b0d623f7 5979fchflags(__unused proc_t p, struct fchflags_args *uap, __unused int32_t *retval)
1c79356b 5980{
2d21ac55 5981 vnode_t vp;
1c79356b
A
5982 int error;
5983
55e303ae
A
5984 AUDIT_ARG(fd, uap->fd);
5985 AUDIT_ARG(fflags, uap->flags);
91447636 5986 if ( (error = file_vnode(uap->fd, &vp)) )
1c79356b 5987 return (error);
55e303ae 5988
91447636
A
5989 if ((error = vnode_getwithref(vp))) {
5990 file_drop(uap->fd);
5991 return(error);
5992 }
e5568f75
A
5993
5994 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5995
813fb2f6 5996 /* we don't vnode_put() here because chflags1 does internally */
2d21ac55 5997 error = chflags1(vp, uap->flags, vfs_context_current());
91447636
A
5998
5999 file_drop(uap->fd);
6000 return (error);
6001}
6002
6003/*
6004 * Change security information on a filesystem object.
2d21ac55
A
6005 *
6006 * Returns: 0 Success
6007 * EPERM Operation not permitted
6008 * vnode_authattr:??? [anything vnode_authattr can return]
6009 * vnode_authorize:??? [anything vnode_authorize can return]
6010 * vnode_setattr:??? [anything vnode_setattr can return]
6011 *
6012 * Notes: If vnode_authattr or vnode_authorize return EACCES, it will be
6013 * translated to EPERM before being returned.
91447636
A
6014 */
6015static int
fe8ab488 6016chmod_vnode(vfs_context_t ctx, vnode_t vp, struct vnode_attr *vap)
91447636
A
6017{
6018 kauth_action_t action;
6019 int error;
39037602 6020
b0d623f7
A
6021 AUDIT_ARG(mode, vap->va_mode);
6022 /* XXX audit new args */
91447636 6023
2d21ac55
A
6024#if NAMEDSTREAMS
6025 /* chmod calls are not allowed for resource forks. */
6026 if (vp->v_flag & VISNAMEDSTREAM) {
6027 return (EPERM);
6028 }
6029#endif
6030
6031#if CONFIG_MACF
316670eb
A
6032 if (VATTR_IS_ACTIVE(vap, va_mode) &&
6033 (error = mac_vnode_check_setmode(ctx, vp, (mode_t)vap->va_mode)) != 0)
2d21ac55 6034 return (error);
39037602
A
6035
6036 if (VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid)) {
6037 if ((error = mac_vnode_check_setowner(ctx, vp,
6038 VATTR_IS_ACTIVE(vap, va_uid) ? vap->va_uid : -1,
6039 VATTR_IS_ACTIVE(vap, va_gid) ? vap->va_gid : -1)))
6040 return (error);
6041 }
6042
6043 if (VATTR_IS_ACTIVE(vap, va_acl) &&
6044 (error = mac_vnode_check_setacl(ctx, vp, vap->va_acl)))
6045 return (error);
2d21ac55
A
6046#endif
6047
91447636
A
6048 /* make sure that the caller is allowed to set this security information */
6049 if (((error = vnode_authattr(vp, vap, &action, ctx)) != 0) ||
6050 ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
6051 if (error == EACCES)
6052 error = EPERM;
6053 return(error);
6054 }
39037602
A
6055
6056 if ((error = vnode_setattr(vp, vap, ctx)) != 0)
6057 return (error);
6058
6059#if CONFIG_MACF
6060 if (VATTR_IS_ACTIVE(vap, va_mode))
6061 mac_vnode_notify_setmode(ctx, vp, (mode_t)vap->va_mode);
6062
6063 if (VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid))
6064 mac_vnode_notify_setowner(ctx, vp,
6065 VATTR_IS_ACTIVE(vap, va_uid) ? vap->va_uid : -1,
6066 VATTR_IS_ACTIVE(vap, va_gid) ? vap->va_gid : -1);
6067
6068 if (VATTR_IS_ACTIVE(vap, va_acl))
6069 mac_vnode_notify_setacl(ctx, vp, vap->va_acl);
6070#endif
91447636 6071
1c79356b
A
6072 return (error);
6073}
6074
91447636 6075
1c79356b 6076/*
b0d623f7 6077 * Change mode of a file given a path name.
2d21ac55
A
6078 *
6079 * Returns: 0 Success
6080 * namei:??? [anything namei can return]
fe8ab488 6081 * chmod_vnode:??? [anything chmod_vnode can return]
1c79356b 6082 */
91447636 6083static int
fe8ab488
A
6084chmodat(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap,
6085 int fd, int flag, enum uio_seg segflg)
91447636
A
6086{
6087 struct nameidata nd;
fe8ab488 6088 int follow, error;
91447636 6089
fe8ab488
A
6090 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
6091 NDINIT(&nd, LOOKUP, OP_SETATTR, follow | AUDITVNPATH1,
6092 segflg, path, ctx);
6093 if ((error = nameiat(&nd, fd)))
91447636 6094 return (error);
fe8ab488 6095 error = chmod_vnode(ctx, nd.ni_vp, vap);
91447636
A
6096 vnode_put(nd.ni_vp);
6097 nameidone(&nd);
6098 return(error);
6099}
6100
0c530ab8 6101/*
39037602 6102 * chmod_extended: Change the mode of a file given a path name; with extended
b0d623f7 6103 * argument list (including extended security (ACL)).
0c530ab8
A
6104 *
6105 * Parameters: p Process requesting the open
6106 * uap User argument descriptor (see below)
6107 * retval (ignored)
6108 *
6109 * Indirect: uap->path Path to object (same as 'chmod')
6110 * uap->uid UID to set
6111 * uap->gid GID to set
6112 * uap->mode File mode to set (same as 'chmod')
6113 * uap->xsecurity ACL to set (or delete)
6114 *
6115 * Returns: 0 Success
6116 * !0 errno value
6117 *
6118 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
6119 *
6120 * XXX: We should enummerate the possible errno values here, and where
6121 * in the code they originated.
6122 */
1c79356b 6123int
b0d623f7 6124chmod_extended(__unused proc_t p, struct chmod_extended_args *uap, __unused int32_t *retval)
1c79356b 6125{
1c79356b 6126 int error;
91447636
A
6127 struct vnode_attr va;
6128 kauth_filesec_t xsecdst;
6129
b0d623f7
A
6130 AUDIT_ARG(owner, uap->uid, uap->gid);
6131
91447636
A
6132 VATTR_INIT(&va);
6133 if (uap->mode != -1)
6134 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
6135 if (uap->uid != KAUTH_UID_NONE)
6136 VATTR_SET(&va, va_uid, uap->uid);
6137 if (uap->gid != KAUTH_GID_NONE)
6138 VATTR_SET(&va, va_gid, uap->gid);
6139
6140 xsecdst = NULL;
6141 switch(uap->xsecurity) {
6142 /* explicit remove request */
6143 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
6144 VATTR_SET(&va, va_acl, NULL);
6145 break;
6146 /* not being set */
6147 case USER_ADDR_NULL:
6148 break;
6149 default:
6150 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
6151 return(error);
6152 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
6153 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va.va_acl->acl_entrycount);
6154 }
1c79356b 6155
fe8ab488
A
6156 error = chmodat(vfs_context_current(), uap->path, &va, AT_FDCWD, 0,
6157 UIO_USERSPACE);
55e303ae 6158
91447636
A
6159 if (xsecdst != NULL)
6160 kauth_filesec_free(xsecdst);
6161 return(error);
6162}
4a249263 6163
2d21ac55
A
6164/*
6165 * Returns: 0 Success
fe8ab488 6166 * chmodat:??? [anything chmodat can return]
2d21ac55 6167 */
fe8ab488
A
6168static int
6169fchmodat_internal(vfs_context_t ctx, user_addr_t path, int mode, int fd,
6170 int flag, enum uio_seg segflg)
91447636 6171{
91447636
A
6172 struct vnode_attr va;
6173
6174 VATTR_INIT(&va);
fe8ab488
A
6175 VATTR_SET(&va, va_mode, mode & ALLPERMS);
6176
6177 return (chmodat(ctx, path, &va, fd, flag, segflg));
6178}
6179
6180int
6181chmod(__unused proc_t p, struct chmod_args *uap, __unused int32_t *retval)
6182{
6183 return (fchmodat_internal(vfs_context_current(), uap->path, uap->mode,
6184 AT_FDCWD, 0, UIO_USERSPACE));
6185}
91447636 6186
fe8ab488
A
6187int
6188fchmodat(__unused proc_t p, struct fchmodat_args *uap, __unused int32_t *retval)
6189{
6190 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
6191 return (EINVAL);
6192
6193 return (fchmodat_internal(vfs_context_current(), uap->path, uap->mode,
6194 uap->fd, uap->flag, UIO_USERSPACE));
1c79356b
A
6195}
6196
6197/*
6198 * Change mode of a file given a file descriptor.
6199 */
91447636 6200static int
2d21ac55 6201fchmod1(__unused proc_t p, int fd, struct vnode_attr *vap)
1c79356b 6202{
2d21ac55 6203 vnode_t vp;
1c79356b 6204 int error;
55e303ae 6205
91447636 6206 AUDIT_ARG(fd, fd);
55e303ae 6207
91447636
A
6208 if ((error = file_vnode(fd, &vp)) != 0)
6209 return (error);
6210 if ((error = vnode_getwithref(vp)) != 0) {
6211 file_drop(fd);
6212 return(error);
6213 }
55e303ae
A
6214 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6215
fe8ab488 6216 error = chmod_vnode(vfs_context_current(), vp, vap);
91447636
A
6217 (void)vnode_put(vp);
6218 file_drop(fd);
55e303ae 6219
1c79356b
A
6220 return (error);
6221}
6222
b0d623f7
A
6223/*
6224 * fchmod_extended: Change mode of a file given a file descriptor; with
6225 * extended argument list (including extended security (ACL)).
6226 *
6227 * Parameters: p Process requesting to change file mode
6228 * uap User argument descriptor (see below)
39037602 6229 * retval (ignored)
b0d623f7
A
6230 *
6231 * Indirect: uap->mode File mode to set (same as 'chmod')
6232 * uap->uid UID to set
6233 * uap->gid GID to set
6234 * uap->xsecurity ACL to set (or delete)
6235 * uap->fd File descriptor of file to change mode
39037602 6236 *
b0d623f7
A
6237 * Returns: 0 Success
6238 * !0 errno value
6239 *
6240 */
91447636 6241int
b0d623f7 6242fchmod_extended(proc_t p, struct fchmod_extended_args *uap, __unused int32_t *retval)
91447636
A
6243{
6244 int error;
6245 struct vnode_attr va;
6246 kauth_filesec_t xsecdst;
6247
b0d623f7
A
6248 AUDIT_ARG(owner, uap->uid, uap->gid);
6249
91447636
A
6250 VATTR_INIT(&va);
6251 if (uap->mode != -1)
6252 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
6253 if (uap->uid != KAUTH_UID_NONE)
6254 VATTR_SET(&va, va_uid, uap->uid);
6255 if (uap->gid != KAUTH_GID_NONE)
6256 VATTR_SET(&va, va_gid, uap->gid);
6257
6258 xsecdst = NULL;
6259 switch(uap->xsecurity) {
6260 case USER_ADDR_NULL:
6261 VATTR_SET(&va, va_acl, NULL);
6262 break;
39236c6e
A
6263 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
6264 VATTR_SET(&va, va_acl, NULL);
6265 break;
6266 /* not being set */
91447636
A
6267 case CAST_USER_ADDR_T(-1):
6268 break;
6269 default:
6270 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
6271 return(error);
6272 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
6273 }
6274
6275 error = fchmod1(p, uap->fd, &va);
6276
39037602 6277
91447636
A
6278 switch(uap->xsecurity) {
6279 case USER_ADDR_NULL:
6280 case CAST_USER_ADDR_T(-1):
6281 break;
6282 default:
6283 if (xsecdst != NULL)
6284 kauth_filesec_free(xsecdst);
6285 }
6286 return(error);
6287}
6288
6289int
b0d623f7 6290fchmod(proc_t p, struct fchmod_args *uap, __unused int32_t *retval)
91447636
A
6291{
6292 struct vnode_attr va;
6293
6294 VATTR_INIT(&va);
6295 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
6296
6297 return(fchmod1(p, uap->fd, &va));
6298}
6299
6300
1c79356b
A
6301/*
6302 * Set ownership given a path name.
6303 */
1c79356b 6304/* ARGSUSED */
91447636 6305static int
fe8ab488
A
6306fchownat_internal(vfs_context_t ctx, int fd, user_addr_t path, uid_t uid,
6307 gid_t gid, int flag, enum uio_seg segflg)
1c79356b 6308{
2d21ac55 6309 vnode_t vp;
91447636 6310 struct vnode_attr va;
1c79356b
A
6311 int error;
6312 struct nameidata nd;
fe8ab488 6313 int follow;
91447636 6314 kauth_action_t action;
1c79356b 6315
fe8ab488 6316 AUDIT_ARG(owner, uid, gid);
55e303ae 6317
fe8ab488
A
6318 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
6319 NDINIT(&nd, LOOKUP, OP_SETATTR, follow | AUDITVNPATH1, segflg,
6320 path, ctx);
6321 error = nameiat(&nd, fd);
55e303ae 6322 if (error)
1c79356b
A
6323 return (error);
6324 vp = nd.ni_vp;
6325
91447636
A
6326 nameidone(&nd);
6327
91447636 6328 VATTR_INIT(&va);
fe8ab488
A
6329 if (uid != (uid_t)VNOVAL)
6330 VATTR_SET(&va, va_uid, uid);
6331 if (gid != (gid_t)VNOVAL)
6332 VATTR_SET(&va, va_gid, gid);
91447636 6333
2d21ac55 6334#if CONFIG_MACF
fe8ab488 6335 error = mac_vnode_check_setowner(ctx, vp, uid, gid);
2d21ac55
A
6336 if (error)
6337 goto out;
6338#endif
6339
91447636
A
6340 /* preflight and authorize attribute changes */
6341 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
6342 goto out;
6343 if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
6344 goto out;
6345 error = vnode_setattr(vp, &va, ctx);
39037602
A
6346
6347#if CONFIG_MACF
6348 if (error == 0)
6349 mac_vnode_notify_setowner(ctx, vp, uid, gid);
6350#endif
6351
91447636
A
6352out:
6353 /*
6354 * EACCES is only allowed from namei(); permissions failure should
6355 * return EPERM, so we need to translate the error code.
6356 */
6357 if (error == EACCES)
6358 error = EPERM;
fe8ab488 6359
91447636 6360 vnode_put(vp);
1c79356b
A
6361 return (error);
6362}
6363
91447636 6364int
fe8ab488 6365chown(__unused proc_t p, struct chown_args *uap, __unused int32_t *retval)
91447636 6366{
fe8ab488
A
6367 return (fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path,
6368 uap->uid, uap->gid, 0, UIO_USERSPACE));
91447636
A
6369}
6370
6371int
fe8ab488 6372lchown(__unused proc_t p, struct lchown_args *uap, __unused int32_t *retval)
91447636 6373{
fe8ab488
A
6374 return (fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path,
6375 uap->owner, uap->group, AT_SYMLINK_NOFOLLOW, UIO_USERSPACE));
6376}
6377
6378int
6379fchownat(__unused proc_t p, struct fchownat_args *uap, __unused int32_t *retval)
6380{
6381 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
6382 return (EINVAL);
6383
6384 return (fchownat_internal(vfs_context_current(), uap->fd, uap->path,
6385 uap->uid, uap->gid, uap->flag, UIO_USERSPACE));
91447636
A
6386}
6387
1c79356b
A
6388/*
6389 * Set ownership given a file descriptor.
6390 */
1c79356b
A
6391/* ARGSUSED */
6392int
b0d623f7 6393fchown(__unused proc_t p, struct fchown_args *uap, __unused int32_t *retval)
1c79356b 6394{
91447636 6395 struct vnode_attr va;
2d21ac55
A
6396 vfs_context_t ctx = vfs_context_current();
6397 vnode_t vp;
1c79356b 6398 int error;
91447636 6399 kauth_action_t action;
1c79356b 6400
55e303ae
A
6401 AUDIT_ARG(owner, uap->uid, uap->gid);
6402 AUDIT_ARG(fd, uap->fd);
6403
91447636 6404 if ( (error = file_vnode(uap->fd, &vp)) )
1c79356b 6405 return (error);
55e303ae 6406
91447636
A
6407 if ( (error = vnode_getwithref(vp)) ) {
6408 file_drop(uap->fd);
6409 return(error);
6410 }
55e303ae
A
6411 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6412
91447636
A
6413 VATTR_INIT(&va);
6414 if (uap->uid != VNOVAL)
6415 VATTR_SET(&va, va_uid, uap->uid);
6416 if (uap->gid != VNOVAL)
6417 VATTR_SET(&va, va_gid, uap->gid);
6418
2d21ac55
A
6419#if NAMEDSTREAMS
6420 /* chown calls are not allowed for resource forks. */
6421 if (vp->v_flag & VISNAMEDSTREAM) {
6422 error = EPERM;
6423 goto out;
6424 }
6425#endif
6426
6427#if CONFIG_MACF
6428 error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
6429 if (error)
6430 goto out;
6431#endif
91447636
A
6432
6433 /* preflight and authorize attribute changes */
2d21ac55 6434 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
91447636 6435 goto out;
2d21ac55 6436 if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
91447636
A
6437 if (error == EACCES)
6438 error = EPERM;
6439 goto out;
6440 }
2d21ac55 6441 error = vnode_setattr(vp, &va, ctx);
4a249263 6442
39037602
A
6443#if CONFIG_MACF
6444 if (error == 0)
6445 mac_vnode_notify_setowner(ctx, vp, uap->uid, uap->gid);
6446#endif
6447
91447636
A
6448out:
6449 (void)vnode_put(vp);
6450 file_drop(uap->fd);
1c79356b
A
6451 return (error);
6452}
6453
9bccf70c 6454static int
2d21ac55 6455getutimes(user_addr_t usrtvp, struct timespec *tsp)
9bccf70c 6456{
9bccf70c
A
6457 int error;
6458
91447636
A
6459 if (usrtvp == USER_ADDR_NULL) {
6460 struct timeval old_tv;
6461 /* XXX Y2038 bug because of microtime argument */
6462 microtime(&old_tv);
6463 TIMEVAL_TO_TIMESPEC(&old_tv, &tsp[0]);
9bccf70c
A
6464 tsp[1] = tsp[0];
6465 } else {
91447636 6466 if (IS_64BIT_PROCESS(current_proc())) {
b0d623f7 6467 struct user64_timeval tv[2];
91447636 6468 error = copyin(usrtvp, (void *)tv, sizeof(tv));
b0d623f7
A
6469 if (error)
6470 return (error);
6471 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
6472 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
91447636 6473 } else {
b0d623f7
A
6474 struct user32_timeval tv[2];
6475 error = copyin(usrtvp, (void *)tv, sizeof(tv));
6476 if (error)
6477 return (error);
6478 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
6479 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
91447636 6480 }
9bccf70c
A
6481 }
6482 return 0;
6483}
6484
6485static int
2d21ac55 6486setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts,
91447636 6487 int nullflag)
9bccf70c
A
6488{
6489 int error;
91447636
A
6490 struct vnode_attr va;
6491 kauth_action_t action;
e5568f75
A
6492
6493 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6494
91447636
A
6495 VATTR_INIT(&va);
6496 VATTR_SET(&va, va_access_time, ts[0]);
6497 VATTR_SET(&va, va_modify_time, ts[1]);
9bccf70c 6498 if (nullflag)
91447636
A
6499 va.va_vaflags |= VA_UTIMES_NULL;
6500
2d21ac55
A
6501#if NAMEDSTREAMS
6502 /* utimes calls are not allowed for resource forks. */
6503 if (vp->v_flag & VISNAMEDSTREAM) {
6504 error = EPERM;
6505 goto out;
6506 }
6507#endif
6508
6509#if CONFIG_MACF
6510 error = mac_vnode_check_setutimes(ctx, vp, ts[0], ts[1]);
6511 if (error)
6512 goto out;
6513#endif
6514 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
6515 if (!nullflag && error == EACCES)
6516 error = EPERM;
91447636 6517 goto out;
2d21ac55
A
6518 }
6519
91447636 6520 /* since we may not need to auth anything, check here */
2d21ac55
A
6521 if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
6522 if (!nullflag && error == EACCES)
6523 error = EPERM;
91447636 6524 goto out;
2d21ac55 6525 }
91447636 6526 error = vnode_setattr(vp, &va, ctx);
4a249263 6527
39037602
A
6528#if CONFIG_MACF
6529 if (error == 0)
6530 mac_vnode_notify_setutimes(ctx, vp, ts[0], ts[1]);
6531#endif
6532
9bccf70c
A
6533out:
6534 return error;
6535}
6536
1c79356b
A
6537/*
6538 * Set the access and modification times of a file.
6539 */
1c79356b
A
6540/* ARGSUSED */
6541int
b0d623f7 6542utimes(__unused proc_t p, struct utimes_args *uap, __unused int32_t *retval)
1c79356b 6543{
9bccf70c 6544 struct timespec ts[2];
91447636 6545 user_addr_t usrtvp;
1c79356b
A
6546 int error;
6547 struct nameidata nd;
2d21ac55 6548 vfs_context_t ctx = vfs_context_current();
1c79356b 6549
2d21ac55 6550 /*
39037602 6551 * AUDIT: Needed to change the order of operations to do the
55e303ae
A
6552 * name lookup first because auditing wants the path.
6553 */
39037602 6554 NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
2d21ac55 6555 UIO_USERSPACE, uap->path, ctx);
55e303ae
A
6556 error = namei(&nd);
6557 if (error)
9bccf70c 6558 return (error);
91447636 6559 nameidone(&nd);
55e303ae 6560
91447636
A
6561 /*
6562 * Fetch the user-supplied time. If usrtvp is USER_ADDR_NULL, we fetch
6563 * the current time instead.
6564 */
55e303ae 6565 usrtvp = uap->tptr;
91447636
A
6566 if ((error = getutimes(usrtvp, ts)) != 0)
6567 goto out;
6568
2d21ac55 6569 error = setutimes(ctx, nd.ni_vp, ts, usrtvp == USER_ADDR_NULL);
91447636
A
6570
6571out:
6572 vnode_put(nd.ni_vp);
1c79356b
A
6573 return (error);
6574}
6575
9bccf70c
A
6576/*
6577 * Set the access and modification times of a file.
6578 */
9bccf70c
A
6579/* ARGSUSED */
6580int
b0d623f7 6581futimes(__unused proc_t p, struct futimes_args *uap, __unused int32_t *retval)
9bccf70c
A
6582{
6583 struct timespec ts[2];
2d21ac55 6584 vnode_t vp;
91447636 6585 user_addr_t usrtvp;
9bccf70c
A
6586 int error;
6587
55e303ae 6588 AUDIT_ARG(fd, uap->fd);
9bccf70c
A
6589 usrtvp = uap->tptr;
6590 if ((error = getutimes(usrtvp, ts)) != 0)
6591 return (error);
91447636 6592 if ((error = file_vnode(uap->fd, &vp)) != 0)
9bccf70c 6593 return (error);
91447636
A
6594 if((error = vnode_getwithref(vp))) {
6595 file_drop(uap->fd);
6596 return(error);
6597 }
55e303ae 6598
2d21ac55 6599 error = setutimes(vfs_context_current(), vp, ts, usrtvp == 0);
91447636
A
6600 vnode_put(vp);
6601 file_drop(uap->fd);
6602 return(error);
9bccf70c
A
6603}
6604
1c79356b
A
6605/*
6606 * Truncate a file given its path name.
6607 */
1c79356b
A
6608/* ARGSUSED */
6609int
b0d623f7 6610truncate(__unused proc_t p, struct truncate_args *uap, __unused int32_t *retval)
1c79356b 6611{
2d21ac55 6612 vnode_t vp;
91447636 6613 struct vnode_attr va;
2d21ac55 6614 vfs_context_t ctx = vfs_context_current();
1c79356b
A
6615 int error;
6616 struct nameidata nd;
91447636
A
6617 kauth_action_t action;
6618
0b4e3aa0
A
6619 if (uap->length < 0)
6620 return(EINVAL);
39037602 6621 NDINIT(&nd, LOOKUP, OP_TRUNCATE, FOLLOW | AUDITVNPATH1,
2d21ac55 6622 UIO_USERSPACE, uap->path, ctx);
91447636 6623 if ((error = namei(&nd)))
1c79356b
A
6624 return (error);
6625 vp = nd.ni_vp;
91447636
A
6626
6627 nameidone(&nd);
6628
6629 VATTR_INIT(&va);
6630 VATTR_SET(&va, va_data_size, uap->length);
2d21ac55
A
6631
6632#if CONFIG_MACF
6633 error = mac_vnode_check_truncate(ctx, NOCRED, vp);
6634 if (error)
6635 goto out;
6636#endif
6637
6638 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
91447636 6639 goto out;
2d21ac55 6640 if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
91447636 6641 goto out;
2d21ac55 6642 error = vnode_setattr(vp, &va, ctx);
39037602
A
6643
6644#if CONFIG_MACF
6645 if (error == 0)
6646 mac_vnode_notify_truncate(ctx, NOCRED, vp);
6647#endif
6648
91447636
A
6649out:
6650 vnode_put(vp);
1c79356b
A
6651 return (error);
6652}
6653
6654/*
6655 * Truncate a file given a file descriptor.
6656 */
1c79356b
A
6657/* ARGSUSED */
6658int
b0d623f7 6659ftruncate(proc_t p, struct ftruncate_args *uap, int32_t *retval)
1c79356b 6660{
2d21ac55 6661 vfs_context_t ctx = vfs_context_current();
91447636 6662 struct vnode_attr va;
2d21ac55 6663 vnode_t vp;
91447636
A
6664 struct fileproc *fp;
6665 int error ;
6666 int fd = uap->fd;
1c79356b 6667
55e303ae 6668 AUDIT_ARG(fd, uap->fd);
0b4e3aa0
A
6669 if (uap->length < 0)
6670 return(EINVAL);
39037602 6671
91447636
A
6672 if ( (error = fp_lookup(p,fd,&fp,0)) ) {
6673 return(error);
6674 }
1c79356b 6675
39236c6e
A
6676 switch (FILEGLOB_DTYPE(fp->f_fglob)) {
6677 case DTYPE_PSXSHM:
91447636
A
6678 error = pshm_truncate(p, fp, uap->fd, uap->length, retval);
6679 goto out;
39236c6e
A
6680 case DTYPE_VNODE:
6681 break;
6682 default:
91447636
A
6683 error = EINVAL;
6684 goto out;
1c79356b 6685 }
1c79356b 6686
2d21ac55 6687 vp = (vnode_t)fp->f_fglob->fg_data;
e5568f75 6688
91447636
A
6689 if ((fp->f_fglob->fg_flag & FWRITE) == 0) {
6690 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
6691 error = EINVAL;
6692 goto out;
1c79356b 6693 }
1c79356b 6694
91447636
A
6695 if ((error = vnode_getwithref(vp)) != 0) {
6696 goto out;
6697 }
1c79356b 6698
91447636 6699 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
1c79356b 6700
2d21ac55
A
6701#if CONFIG_MACF
6702 error = mac_vnode_check_truncate(ctx,
6703 fp->f_fglob->fg_cred, vp);
6704 if (error) {
6705 (void)vnode_put(vp);
6706 goto out;
6707 }
6708#endif
91447636
A
6709 VATTR_INIT(&va);
6710 VATTR_SET(&va, va_data_size, uap->length);
2d21ac55 6711 error = vnode_setattr(vp, &va, ctx);
39037602
A
6712
6713#if CONFIG_MACF
6714 if (error == 0)
6715 mac_vnode_notify_truncate(ctx, fp->f_fglob->fg_cred, vp);
6716#endif
6717
91447636
A
6718 (void)vnode_put(vp);
6719out:
6720 file_drop(fd);
6721 return (error);
1c79356b 6722}
91447636 6723
1c79356b
A
6724
6725/*
b0d623f7 6726 * Sync an open file with synchronized I/O _file_ integrity completion
1c79356b 6727 */
1c79356b
A
6728/* ARGSUSED */
6729int
b0d623f7 6730fsync(proc_t p, struct fsync_args *uap, __unused int32_t *retval)
1c79356b 6731{
2d21ac55 6732 __pthread_testcancel(1);
b0d623f7
A
6733 return(fsync_common(p, uap, MNT_WAIT));
6734}
6735
6736
6737/*
6738 * Sync an open file with synchronized I/O _file_ integrity completion
6739 *
6740 * Notes: This is a legacy support function that does not test for
6741 * thread cancellation points.
6742 */
6743/* ARGSUSED */
39037602 6744int
b0d623f7
A
6745fsync_nocancel(proc_t p, struct fsync_nocancel_args *uap, __unused int32_t *retval)
6746{
6747 return(fsync_common(p, (struct fsync_args *)uap, MNT_WAIT));
2d21ac55
A
6748}
6749
b0d623f7
A
6750
6751/*
6752 * Sync an open file with synchronized I/O _data_ integrity completion
6753 */
6754/* ARGSUSED */
2d21ac55 6755int
b0d623f7
A
6756fdatasync(proc_t p, struct fdatasync_args *uap, __unused int32_t *retval)
6757{
6758 __pthread_testcancel(1);
6759 return(fsync_common(p, (struct fsync_args *)uap, MNT_DWAIT));
6760}
6761
6762
6763/*
6764 * fsync_common
6765 *
6766 * Common fsync code to support both synchronized I/O file integrity completion
6767 * (normal fsync) and synchronized I/O data integrity completion (fdatasync).
6768 *
6769 * If 'flags' is MNT_DWAIT, the caller is requesting data integrity, which
6770 * will only guarantee that the file data contents are retrievable. If
6771 * 'flags' is MNT_WAIT, the caller is rewuesting file integrity, which also
6772 * includes additional metadata unnecessary for retrieving the file data
6773 * contents, such as atime, mtime, ctime, etc., also be committed to stable
6774 * storage.
6775 *
6776 * Parameters: p The process
6777 * uap->fd The descriptor to synchronize
6778 * flags The data integrity flags
6779 *
6780 * Returns: int Success
6781 * fp_getfvp:EBADF Bad file descriptor
6782 * fp_getfvp:ENOTSUP fd does not refer to a vnode
6783 * VNOP_FSYNC:??? unspecified
6784 *
6785 * Notes: We use struct fsync_args because it is a short name, and all
6786 * caller argument structures are otherwise identical.
6787 */
6788static int
6789fsync_common(proc_t p, struct fsync_args *uap, int flags)
2d21ac55
A
6790{
6791 vnode_t vp;
91447636 6792 struct fileproc *fp;
2d21ac55 6793 vfs_context_t ctx = vfs_context_current();
1c79356b
A
6794 int error;
6795
b0d623f7
A
6796 AUDIT_ARG(fd, uap->fd);
6797
91447636 6798 if ( (error = fp_getfvp(p, uap->fd, &fp, &vp)) )
1c79356b 6799 return (error);
91447636
A
6800 if ( (error = vnode_getwithref(vp)) ) {
6801 file_drop(uap->fd);
6802 return(error);
6803 }
91447636 6804
b0d623f7
A
6805 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6806
6807 error = VNOP_FSYNC(vp, flags, ctx);
2d21ac55
A
6808
6809#if NAMEDRSRCFORK
6810 /* Sync resource fork shadow file if necessary. */
6811 if ((error == 0) &&
39037602 6812 (vp->v_flag & VISNAMEDSTREAM) &&
2d21ac55 6813 (vp->v_parent != NULLVP) &&
b0d623f7 6814 vnode_isshadow(vp) &&
2d21ac55
A
6815 (fp->f_flags & FP_WRITTEN)) {
6816 (void) vnode_flushnamedstream(vp->v_parent, vp, ctx);
6817 }
6818#endif
91447636
A
6819
6820 (void)vnode_put(vp);
6821 file_drop(uap->fd);
1c79356b
A
6822 return (error);
6823}
6824
6825/*
39037602 6826 * Duplicate files. Source must be a file, target must be a file or
1c79356b 6827 * must not exist.
91447636
A
6828 *
6829 * XXX Copyfile authorisation checking is woefully inadequate, and will not
6830 * perform inheritance correctly.
1c79356b 6831 */
1c79356b
A
6832/* ARGSUSED */
6833int
b0d623f7 6834copyfile(__unused proc_t p, struct copyfile_args *uap, __unused int32_t *retval)
1c79356b 6835{
91447636 6836 vnode_t tvp, fvp, tdvp, sdvp;
1c79356b
A
6837 struct nameidata fromnd, tond;
6838 int error;
2d21ac55 6839 vfs_context_t ctx = vfs_context_current();
39037602
A
6840#if CONFIG_MACF
6841 struct filedesc *fdp = (vfs_context_proc(ctx))->p_fd;
6842 struct vnode_attr va;
6843#endif
55e303ae
A
6844
6845 /* Check that the flags are valid. */
1c79356b
A
6846
6847 if (uap->flags & ~CPF_MASK) {
55e303ae
A
6848 return(EINVAL);
6849 }
1c79356b 6850
4bd07ac2 6851 NDINIT(&fromnd, LOOKUP, OP_COPYFILE, AUDITVNPATH1,
2d21ac55 6852 UIO_USERSPACE, uap->from, ctx);
91447636 6853 if ((error = namei(&fromnd)))
1c79356b
A
6854 return (error);
6855 fvp = fromnd.ni_vp;
6856
6d2010ae
A
6857 NDINIT(&tond, CREATE, OP_LINK,
6858 LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNPATH2 | CN_NBMOUNTLOOK,
6859 UIO_USERSPACE, uap->to, ctx);
91447636 6860 if ((error = namei(&tond))) {
1c79356b
A
6861 goto out1;
6862 }
6863 tdvp = tond.ni_dvp;
6864 tvp = tond.ni_vp;
91447636 6865
1c79356b
A
6866 if (tvp != NULL) {
6867 if (!(uap->flags & CPF_OVERWRITE)) {
6868 error = EEXIST;
6869 goto out;
6870 }
6871 }
39037602 6872
1c79356b
A
6873 if (fvp->v_type == VDIR || (tvp && tvp->v_type == VDIR)) {
6874 error = EISDIR;
6875 goto out;
6876 }
6877
39037602
A
6878 /* This calls existing MAC hooks for open */
6879 if ((error = vn_authorize_open_existing(fvp, &fromnd.ni_cnd, FREAD, ctx,
6880 NULL))) {
6881 goto out;
6882 }
6883
6884 if (tvp) {
6885 /*
6886 * See unlinkat_internal for an explanation of the potential
6887 * ENOENT from the MAC hook but the gist is that the MAC hook
6888 * can fail because vn_getpath isn't able to return the full
6889 * path. We choose to ignore this failure.
6890 */
6891 error = vn_authorize_unlink(tdvp, tvp, &tond.ni_cnd, ctx, NULL);
6892 if (error && error != ENOENT)
6893 goto out;
6894 error = 0;
6895 }
6896
6897#if CONFIG_MACF
6898 VATTR_INIT(&va);
6899 VATTR_SET(&va, va_type, fvp->v_type);
6900 /* Mask off all but regular access permissions */
6901 VATTR_SET(&va, va_mode,
6902 ((((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT) & ACCESSPERMS));
6903 error = mac_vnode_check_create(ctx, tdvp, &tond.ni_cnd, &va);
6904 if (error)
6905 goto out;
6906#endif /* CONFIG_MACF */
6907
2d21ac55 6908 if ((error = vnode_authorize(tdvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
1c79356b
A
6909 goto out;
6910
6911 if (fvp == tdvp)
6912 error = EINVAL;
6913 /*
6914 * If source is the same as the destination (that is the
6915 * same inode number) then there is nothing to do.
6916 * (fixed to have POSIX semantics - CSM 3/2/98)
6917 */
6918 if (fvp == tvp)
6919 error = -1;
91447636 6920 if (!error)
2d21ac55 6921 error = VNOP_COPYFILE(fvp, tdvp, tvp, &tond.ni_cnd, uap->mode, uap->flags, ctx);
1c79356b 6922out:
91447636
A
6923 sdvp = tond.ni_startdir;
6924 /*
6925 * nameidone has to happen before we vnode_put(tdvp)
6926 * since it may need to release the fs_nodelock on the tdvp
6927 */
6928 nameidone(&tond);
6929
6930 if (tvp)
6931 vnode_put(tvp);
6932 vnode_put(tdvp);
6933 vnode_put(sdvp);
1c79356b 6934out1:
91447636
A
6935 vnode_put(fvp);
6936
91447636
A
6937 nameidone(&fromnd);
6938
1c79356b
A
6939 if (error == -1)
6940 return (0);
6941 return (error);
6942}
6943
39037602 6944#define CLONE_SNAPSHOT_FALLBACKS_ENABLED 1
91447636 6945
1c79356b 6946/*
39037602
A
6947 * Helper function for doing clones. The caller is expected to provide an
6948 * iocounted source vnode and release it.
1c79356b 6949 */
fe8ab488 6950static int
39037602
A
6951clonefile_internal(vnode_t fvp, boolean_t data_read_authorised, int dst_dirfd,
6952 user_addr_t dst, uint32_t flags, vfs_context_t ctx)
1c79356b 6953{
91447636 6954 vnode_t tvp, tdvp;
39037602 6955 struct nameidata tond;
1c79356b 6956 int error;
39037602 6957 int follow;
813fb2f6 6958 boolean_t free_src_acl;
39037602
A
6959 boolean_t attr_cleanup;
6960 enum vtype v_type;
6961 kauth_action_t action;
6962 struct componentname *cnp;
6963 uint32_t defaulted;
6964 struct vnode_attr va;
813fb2f6 6965 struct vnode_attr nva;
5ba3f43e 6966 uint32_t vnop_flags;
316670eb 6967
39037602
A
6968 v_type = vnode_vtype(fvp);
6969 switch (v_type) {
6970 case VLNK:
6971 /* FALLTHRU */
6972 case VREG:
6973 action = KAUTH_VNODE_ADD_FILE;
6974 break;
6975 case VDIR:
6976 if (vnode_isvroot(fvp) || vnode_ismount(fvp) ||
6977 fvp->v_mountedhere) {
6978 return (EINVAL);
6979 }
6980 action = KAUTH_VNODE_ADD_SUBDIRECTORY;
6981 break;
6982 default:
6983 return (EINVAL);
6984 }
6985
6986 AUDIT_ARG(fd2, dst_dirfd);
6987 AUDIT_ARG(value32, flags);
6988
6989 follow = (flags & CLONE_NOFOLLOW) ? NOFOLLOW : FOLLOW;
6990 NDINIT(&tond, CREATE, OP_LINK, follow | WANTPARENT | AUDITVNPATH2,
6991 UIO_USERSPACE, dst, ctx);
6992 if ((error = nameiat(&tond, dst_dirfd)))
6993 return (error);
6994 cnp = &tond.ni_cnd;
6995 tdvp = tond.ni_dvp;
6996 tvp = tond.ni_vp;
6997
813fb2f6 6998 free_src_acl = FALSE;
39037602
A
6999 attr_cleanup = FALSE;
7000
7001 if (tvp != NULL) {
7002 error = EEXIST;
7003 goto out;
7004 }
7005
7006 if (vnode_mount(tdvp) != vnode_mount(fvp)) {
7007 error = EXDEV;
7008 goto out;
7009 }
7010
7011#if CONFIG_MACF
7012 if ((error = mac_vnode_check_clone(ctx, tdvp, fvp, cnp)))
7013 goto out;
7014#endif
7015 if ((error = vnode_authorize(tdvp, NULL, action, ctx)))
7016 goto out;
7017
7018 action = KAUTH_VNODE_GENERIC_READ_BITS;
7019 if (data_read_authorised)
7020 action &= ~KAUTH_VNODE_READ_DATA;
7021 if ((error = vnode_authorize(fvp, NULL, action, ctx)))
7022 goto out;
7023
7024 /*
7025 * certain attributes may need to be changed from the source, we ask for
7026 * those here.
7027 */
7028 VATTR_INIT(&va);
813fb2f6
A
7029 VATTR_WANTED(&va, va_uid);
7030 VATTR_WANTED(&va, va_gid);
39037602
A
7031 VATTR_WANTED(&va, va_mode);
7032 VATTR_WANTED(&va, va_flags);
7033 VATTR_WANTED(&va, va_acl);
7034
7035 if ((error = vnode_getattr(fvp, &va, ctx)) != 0)
7036 goto out;
7037
813fb2f6
A
7038 VATTR_INIT(&nva);
7039 VATTR_SET(&nva, va_type, v_type);
7040 if (VATTR_IS_SUPPORTED(&va, va_acl) && va.va_acl != NULL) {
7041 VATTR_SET(&nva, va_acl, va.va_acl);
7042 free_src_acl = TRUE;
39037602
A
7043 }
7044
7045 /* Handle ACL inheritance, initialize vap. */
7046 if (v_type == VLNK) {
813fb2f6 7047 error = vnode_authattr_new(tdvp, &nva, 0, ctx);
39037602 7048 } else {
813fb2f6
A
7049 error = vn_attribute_prepare(tdvp, &nva, &defaulted, ctx);
7050 if (error)
7051 goto out;
39037602
A
7052 attr_cleanup = TRUE;
7053 }
7054
5ba3f43e 7055 vnop_flags = VNODE_CLONEFILE_DEFAULT;
813fb2f6
A
7056 /*
7057 * We've got initial values for all security parameters,
7058 * If we are superuser, then we can change owners to be the
7059 * same as the source. Both superuser and the owner have default
7060 * WRITE_SECURITY privileges so all other fields can be taken
7061 * from source as well.
7062 */
5ba3f43e 7063 if (!(flags & CLONE_NOOWNERCOPY) && vfs_context_issuser(ctx)) {
813fb2f6
A
7064 if (VATTR_IS_SUPPORTED(&va, va_uid))
7065 VATTR_SET(&nva, va_uid, va.va_uid);
7066 if (VATTR_IS_SUPPORTED(&va, va_gid))
7067 VATTR_SET(&nva, va_gid, va.va_gid);
5ba3f43e
A
7068 } else {
7069 vnop_flags |= VNODE_CLONEFILE_NOOWNERCOPY;
813fb2f6 7070 }
5ba3f43e 7071
813fb2f6
A
7072 if (VATTR_IS_SUPPORTED(&va, va_mode))
7073 VATTR_SET(&nva, va_mode, va.va_mode);
7074 if (VATTR_IS_SUPPORTED(&va, va_flags)) {
7075 VATTR_SET(&nva, va_flags,
5ba3f43e
A
7076 ((va.va_flags & ~(UF_DATAVAULT | SF_RESTRICTED)) | /* Turn off from source */
7077 (nva.va_flags & (UF_DATAVAULT | SF_RESTRICTED))));
39037602
A
7078 }
7079
5ba3f43e 7080 error = VNOP_CLONEFILE(fvp, tdvp, &tvp, cnp, &nva, vnop_flags, ctx);
39037602
A
7081
7082 if (!error && tvp) {
7083 int update_flags = 0;
7084#if CONFIG_FSE
7085 int fsevent;
7086#endif /* CONFIG_FSE */
7087
7088#if CONFIG_MACF
7089 (void)vnode_label(vnode_mount(tvp), tdvp, tvp, cnp,
7090 VNODE_LABEL_CREATE, ctx);
7091#endif
7092 /*
7093 * If some of the requested attributes weren't handled by the
7094 * VNOP, use our fallback code.
7095 */
7096 if (!VATTR_ALL_SUPPORTED(&va))
813fb2f6 7097 (void)vnode_setattr_fallback(tvp, &nva, ctx);
39037602
A
7098
7099 // Make sure the name & parent pointers are hooked up
7100 if (tvp->v_name == NULL)
7101 update_flags |= VNODE_UPDATE_NAME;
7102 if (tvp->v_parent == NULLVP)
7103 update_flags |= VNODE_UPDATE_PARENT;
7104
7105 if (update_flags) {
7106 (void)vnode_update_identity(tvp, tdvp, cnp->cn_nameptr,
7107 cnp->cn_namelen, cnp->cn_hash, update_flags);
7108 }
7109
7110#if CONFIG_FSE
7111 switch (vnode_vtype(tvp)) {
7112 case VLNK:
7113 /* FALLTHRU */
7114 case VREG:
7115 fsevent = FSE_CREATE_FILE;
7116 break;
7117 case VDIR:
7118 fsevent = FSE_CREATE_DIR;
7119 break;
7120 default:
7121 goto out;
7122 }
7123
7124 if (need_fsevent(fsevent, tvp)) {
5ba3f43e
A
7125 /*
7126 * The following is a sequence of three explicit events.
7127 * A pair of FSE_CLONE events representing the source and destination
7128 * followed by an FSE_CREATE_[FILE | DIR] for the destination.
7129 * fseventsd may coalesce the destination clone and create events
7130 * into a single event resulting in the following sequence for a client
7131 * FSE_CLONE (src)
7132 * FSE_CLONE | FSE_CREATE (dst)
7133 */
7134 add_fsevent(FSE_CLONE, ctx, FSE_ARG_VNODE, fvp, FSE_ARG_VNODE, tvp,
7135 FSE_ARG_DONE);
39037602
A
7136 add_fsevent(fsevent, ctx, FSE_ARG_VNODE, tvp,
7137 FSE_ARG_DONE);
7138 }
7139#endif /* CONFIG_FSE */
7140 }
39037602
A
7141
7142out:
7143 if (attr_cleanup)
813fb2f6
A
7144 vn_attribute_cleanup(&nva, defaulted);
7145 if (free_src_acl && va.va_acl)
39037602
A
7146 kauth_acl_free(va.va_acl);
7147 nameidone(&tond);
7148 if (tvp)
7149 vnode_put(tvp);
7150 vnode_put(tdvp);
7151 return (error);
7152}
7153
7154/*
7155 * clone files or directories, target must not exist.
7156 */
7157/* ARGSUSED */
7158int
7159clonefileat(__unused proc_t p, struct clonefileat_args *uap,
7160 __unused int32_t *retval)
7161{
7162 vnode_t fvp;
7163 struct nameidata fromnd;
7164 int follow;
7165 int error;
7166 vfs_context_t ctx = vfs_context_current();
7167
7168 /* Check that the flags are valid. */
5ba3f43e 7169 if (uap->flags & ~(CLONE_NOFOLLOW | CLONE_NOOWNERCOPY))
39037602
A
7170 return (EINVAL);
7171
7172 AUDIT_ARG(fd, uap->src_dirfd);
7173
7174 follow = (uap->flags & CLONE_NOFOLLOW) ? NOFOLLOW : FOLLOW;
7175 NDINIT(&fromnd, LOOKUP, OP_COPYFILE, follow | AUDITVNPATH1,
7176 UIO_USERSPACE, uap->src, ctx);
7177 if ((error = nameiat(&fromnd, uap->src_dirfd)))
7178 return (error);
7179
7180 fvp = fromnd.ni_vp;
7181 nameidone(&fromnd);
7182
7183 error = clonefile_internal(fvp, FALSE, uap->dst_dirfd, uap->dst,
7184 uap->flags, ctx);
7185
7186 vnode_put(fvp);
7187 return (error);
7188}
7189
7190int
7191fclonefileat(__unused proc_t p, struct fclonefileat_args *uap,
7192 __unused int32_t *retval)
7193{
7194 vnode_t fvp;
7195 struct fileproc *fp;
7196 int error;
7197 vfs_context_t ctx = vfs_context_current();
7198
5ba3f43e
A
7199 /* Check that the flags are valid. */
7200 if (uap->flags & ~(CLONE_NOFOLLOW | CLONE_NOOWNERCOPY))
7201 return (EINVAL);
7202
39037602
A
7203 AUDIT_ARG(fd, uap->src_fd);
7204 error = fp_getfvp(p, uap->src_fd, &fp, &fvp);
7205 if (error)
7206 return (error);
7207
7208 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
7209 AUDIT_ARG(vnpath_withref, fvp, ARG_VNODE1);
7210 error = EBADF;
7211 goto out;
7212 }
7213
7214 if ((error = vnode_getwithref(fvp)))
7215 goto out;
7216
7217 AUDIT_ARG(vnpath, fvp, ARG_VNODE1);
7218
7219 error = clonefile_internal(fvp, TRUE, uap->dst_dirfd, uap->dst,
7220 uap->flags, ctx);
7221
7222 vnode_put(fvp);
7223out:
7224 file_drop(uap->src_fd);
7225 return (error);
7226}
7227
7228/*
7229 * Rename files. Source and destination must either both be directories,
7230 * or both not be directories. If target is a directory, it must be empty.
7231 */
7232/* ARGSUSED */
7233static int
7234renameat_internal(vfs_context_t ctx, int fromfd, user_addr_t from,
7235 int tofd, user_addr_t to, int segflg, vfs_rename_flags_t flags)
7236{
7237 if (flags & ~VFS_RENAME_FLAGS_MASK)
7238 return EINVAL;
7239
7240 if (ISSET(flags, VFS_RENAME_SWAP) && ISSET(flags, VFS_RENAME_EXCL))
7241 return EINVAL;
7242
7243 vnode_t tvp, tdvp;
7244 vnode_t fvp, fdvp;
7245 struct nameidata *fromnd, *tond;
7246 int error;
7247 int do_retry;
7248 int retry_count;
7249 int mntrename;
7250 int need_event;
7251 const char *oname = NULL;
7252 char *from_name = NULL, *to_name = NULL;
7253 int from_len=0, to_len=0;
7254 int holding_mntlock;
7255 mount_t locked_mp = NULL;
7256 vnode_t oparent = NULLVP;
7257#if CONFIG_FSE
7258 fse_info from_finfo, to_finfo;
7259#endif
7260 int from_truncated=0, to_truncated;
7261 int batched = 0;
7262 struct vnode_attr *fvap, *tvap;
7263 int continuing = 0;
7264 /* carving out a chunk for structs that are too big to be on stack. */
7265 struct {
7266 struct nameidata from_node, to_node;
7267 struct vnode_attr fv_attr, tv_attr;
7268 } * __rename_data;
7269 MALLOC(__rename_data, void *, sizeof(*__rename_data), M_TEMP, M_WAITOK);
7270 fromnd = &__rename_data->from_node;
7271 tond = &__rename_data->to_node;
7272
7273 holding_mntlock = 0;
7274 do_retry = 0;
7275 retry_count = 0;
91447636
A
7276retry:
7277 fvp = tvp = NULL;
7278 fdvp = tdvp = NULL;
6d2010ae 7279 fvap = tvap = NULL;
1c79356b
A
7280 mntrename = FALSE;
7281
316670eb 7282 NDINIT(fromnd, DELETE, OP_UNLINK, WANTPARENT | AUDITVNPATH1,
fe8ab488 7283 segflg, from, ctx);
316670eb 7284 fromnd->ni_flag = NAMEI_COMPOUNDRENAME;
fe8ab488 7285
316670eb 7286 NDINIT(tond, RENAME, OP_RENAME, WANTPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK,
fe8ab488 7287 segflg, to, ctx);
316670eb 7288 tond->ni_flag = NAMEI_COMPOUNDRENAME;
fe8ab488 7289
6d2010ae 7290continue_lookup:
316670eb 7291 if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
fe8ab488 7292 if ( (error = nameiat(fromnd, fromfd)) )
6d2010ae 7293 goto out1;
316670eb
A
7294 fdvp = fromnd->ni_dvp;
7295 fvp = fromnd->ni_vp;
1c79356b 7296
6d2010ae 7297 if (fvp && fvp->v_type == VDIR)
316670eb 7298 tond->ni_cnd.cn_flags |= WILLBEDIR;
6d2010ae 7299 }
2d21ac55 7300
316670eb 7301 if ((tond->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
fe8ab488 7302 if ( (error = nameiat(tond, tofd)) ) {
6d2010ae
A
7303 /*
7304 * Translate error code for rename("dir1", "dir2/.").
7305 */
fe8ab488 7306 if (error == EISDIR && fvp->v_type == VDIR)
6d2010ae
A
7307 error = EINVAL;
7308 goto out1;
7309 }
316670eb
A
7310 tdvp = tond->ni_dvp;
7311 tvp = tond->ni_vp;
fe8ab488 7312 }
91447636 7313
00867663
A
7314#if DEVELOPMENT || DEBUG
7315 /*
7316 * XXX VSWAP: Check for entitlements or special flag here
7317 * so we can restrict access appropriately.
7318 */
7319#else /* DEVELOPMENT || DEBUG */
7320
7321 if (fromnd->ni_vp && vnode_isswap(fromnd->ni_vp) && (ctx != vfs_context_kernel())) {
7322 error = EPERM;
7323 goto out1;
7324 }
7325
7326 if (tond->ni_vp && vnode_isswap(tond->ni_vp) && (ctx != vfs_context_kernel())) {
7327 error = EPERM;
7328 goto out1;
7329 }
7330#endif /* DEVELOPMENT || DEBUG */
7331
39037602
A
7332 if (!tvp && ISSET(flags, VFS_RENAME_SWAP)) {
7333 error = ENOENT;
7334 goto out1;
7335 }
7336
7337 if (tvp && ISSET(flags, VFS_RENAME_EXCL)) {
7338 error = EEXIST;
7339 goto out1;
7340 }
7341
6d2010ae
A
7342 batched = vnode_compound_rename_available(fdvp);
7343 if (!fvp) {
fe8ab488 7344 /*
6d2010ae
A
7345 * Claim: this check will never reject a valid rename.
7346 * For success, either fvp must be on the same mount as tdvp, or fvp must sit atop a vnode on the same mount as tdvp.
7347 * Suppose fdvp and tdvp are not on the same mount.
fe8ab488 7348 * If fvp is on the same mount as tdvp, then fvp is not on the same mount as fdvp, so fvp is the root of its filesystem. If fvp is the root,
6d2010ae
A
7349 * then you can't move it to within another dir on the same mountpoint.
7350 * If fvp sits atop a vnode on the same mount as fdvp, then that vnode must be part of the same mount as fdvp, which is a contradiction.
7351 *
7352 * If this check passes, then we are safe to pass these vnodes to the same FS.
91447636 7353 */
6d2010ae
A
7354 if (fdvp->v_mount != tdvp->v_mount) {
7355 error = EXDEV;
7356 goto out1;
7357 }
7358 goto skipped_lookup;
1c79356b 7359 }
2d21ac55 7360
6d2010ae 7361 if (!batched) {
39037602 7362 error = vn_authorize_renamex(fdvp, fvp, &fromnd->ni_cnd, tdvp, tvp, &tond->ni_cnd, ctx, flags, NULL);
6d2010ae 7363 if (error) {
3e170ce0
A
7364 if (error == ENOENT) {
7365 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
7366 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
7367 /*
7368 * We encountered a race where after doing the namei, tvp stops
7369 * being valid. If so, simply re-drive the rename call from the
7370 * top.
7371 */
7372 do_retry = 1;
7373 retry_count += 1;
7374 }
6d2010ae 7375 }
91447636 7376 goto out1;
1c79356b
A
7377 }
7378 }
6d2010ae 7379
2d21ac55
A
7380 /*
7381 * If the source and destination are the same (i.e. they're
7382 * links to the same vnode) and the target file system is
7383 * case sensitive, then there is nothing to do.
6d2010ae
A
7384 *
7385 * XXX Come back to this.
2d21ac55
A
7386 */
7387 if (fvp == tvp) {
7388 int pathconf_val;
fe8ab488 7389
2d21ac55
A
7390 /*
7391 * Note: if _PC_CASE_SENSITIVE selector isn't supported,
7392 * then assume that this file system is case sensitive.
7393 */
7394 if (VNOP_PATHCONF(fvp, _PC_CASE_SENSITIVE, &pathconf_val, ctx) != 0 ||
7395 pathconf_val != 0) {
7396 goto out1;
fe8ab488 7397 }
2d21ac55 7398 }
91447636 7399
1c79356b
A
7400 /*
7401 * Allow the renaming of mount points.
7402 * - target must not exist
7403 * - target must reside in the same directory as source
7404 * - union mounts cannot be renamed
7405 * - "/" cannot be renamed
6d2010ae
A
7406 *
7407 * XXX Handle this in VFS after a continued lookup (if we missed
7408 * in the cache to start off)
39037602
A
7409 *
7410 * N.B. If RENAME_SWAP is being used, then @tvp != NULL and so
7411 * we'll skip past here. The file system is responsible for
7412 * checking that @tvp is not a descendent of @fvp and vice versa
7413 * so it should always return EINVAL if either @tvp or @fvp is the
7414 * root of a volume.
1c79356b 7415 */
91447636 7416 if ((fvp->v_flag & VROOT) &&
1c79356b
A
7417 (fvp->v_type == VDIR) &&
7418 (tvp == NULL) &&
7419 (fvp->v_mountedhere == NULL) &&
91447636 7420 (fdvp == tdvp) &&
1c79356b
A
7421 ((fvp->v_mount->mnt_flag & (MNT_UNION | MNT_ROOTFS)) == 0) &&
7422 (fvp->v_mount->mnt_vnodecovered != NULLVP)) {
2d21ac55 7423 vnode_t coveredvp;
fe8ab488 7424
1c79356b 7425 /* switch fvp to the covered vnode */
91447636
A
7426 coveredvp = fvp->v_mount->mnt_vnodecovered;
7427 if ( (vnode_getwithref(coveredvp)) ) {
7428 error = ENOENT;
7429 goto out1;
7430 }
7431 vnode_put(fvp);
7432
7433 fvp = coveredvp;
1c79356b
A
7434 mntrename = TRUE;
7435 }
91447636
A
7436 /*
7437 * Check for cross-device rename.
7438 */
7439 if ((fvp->v_mount != tdvp->v_mount) ||
7440 (tvp && (fvp->v_mount != tvp->v_mount))) {
7441 error = EXDEV;
7442 goto out1;
7443 }
55e303ae 7444
91447636
A
7445 /*
7446 * If source is the same as the destination (that is the
7447 * same inode number) then there is nothing to do...
7448 * EXCEPT if the underlying file system supports case
7449 * insensitivity and is case preserving. In this case
7450 * the file system needs to handle the special case of
7451 * getting the same vnode as target (fvp) and source (tvp).
7452 *
7453 * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
7454 * and _PC_CASE_PRESERVING can have this exception, and they need to
7455 * handle the special case of getting the same vnode as target and
7456 * source. NOTE: Then the target is unlocked going into vnop_rename,
7457 * so not to cause locking problems. There is a single reference on tvp.
7458 *
fe8ab488 7459 * NOTE - that fvp == tvp also occurs if they are hard linked and
b0d623f7
A
7460 * that correct behaviour then is just to return success without doing
7461 * anything.
6d2010ae
A
7462 *
7463 * XXX filesystem should take care of this itself, perhaps...
91447636
A
7464 */
7465 if (fvp == tvp && fdvp == tdvp) {
316670eb
A
7466 if (fromnd->ni_cnd.cn_namelen == tond->ni_cnd.cn_namelen &&
7467 !bcmp(fromnd->ni_cnd.cn_nameptr, tond->ni_cnd.cn_nameptr,
7468 fromnd->ni_cnd.cn_namelen)) {
91447636 7469 goto out1;
55e303ae 7470 }
91447636 7471 }
55e303ae 7472
91447636
A
7473 if (holding_mntlock && fvp->v_mount != locked_mp) {
7474 /*
7475 * we're holding a reference and lock
7476 * on locked_mp, but it no longer matches
7477 * what we want to do... so drop our hold
7478 */
7479 mount_unlock_renames(locked_mp);
7480 mount_drop(locked_mp, 0);
7481 holding_mntlock = 0;
7482 }
7483 if (tdvp != fdvp && fvp->v_type == VDIR) {
7484 /*
7485 * serialize renames that re-shape
7486 * the tree... if holding_mntlock is
7487 * set, then we're ready to go...
7488 * otherwise we
7489 * first need to drop the iocounts
7490 * we picked up, second take the
7491 * lock to serialize the access,
7492 * then finally start the lookup
7493 * process over with the lock held
7494 */
7495 if (!holding_mntlock) {
7496 /*
7497 * need to grab a reference on
7498 * the mount point before we
7499 * drop all the iocounts... once
7500 * the iocounts are gone, the mount
7501 * could follow
7502 */
7503 locked_mp = fvp->v_mount;
7504 mount_ref(locked_mp, 0);
55e303ae 7505
91447636
A
7506 /*
7507 * nameidone has to happen before we vnode_put(tvp)
7508 * since it may need to release the fs_nodelock on the tvp
7509 */
316670eb 7510 nameidone(tond);
55e303ae 7511
91447636
A
7512 if (tvp)
7513 vnode_put(tvp);
7514 vnode_put(tdvp);
7515
7516 /*
7517 * nameidone has to happen before we vnode_put(fdvp)
7518 * since it may need to release the fs_nodelock on the fvp
7519 */
316670eb 7520 nameidone(fromnd);
55e303ae 7521
91447636
A
7522 vnode_put(fvp);
7523 vnode_put(fdvp);
7524
7525 mount_lock_renames(locked_mp);
7526 holding_mntlock = 1;
7527
7528 goto retry;
55e303ae 7529 }
91447636
A
7530 } else {
7531 /*
7532 * when we dropped the iocounts to take
fe8ab488 7533 * the lock, we allowed the identity of
91447636
A
7534 * the various vnodes to change... if they did,
7535 * we may no longer be dealing with a rename
7536 * that reshapes the tree... once we're holding
7537 * the iocounts, the vnodes can't change type
7538 * so we're free to drop the lock at this point
7539 * and continue on
1c79356b 7540 */
91447636
A
7541 if (holding_mntlock) {
7542 mount_unlock_renames(locked_mp);
7543 mount_drop(locked_mp, 0);
7544 holding_mntlock = 0;
1c79356b 7545 }
91447636 7546 }
6d2010ae 7547
91447636
A
7548 // save these off so we can later verify that fvp is the same
7549 oname = fvp->v_name;
7550 oparent = fvp->v_parent;
55e303ae 7551
6d2010ae 7552skipped_lookup:
2d21ac55 7553#if CONFIG_FSE
6d2010ae 7554 need_event = need_fsevent(FSE_RENAME, fdvp);
fe8ab488 7555 if (need_event) {
6d2010ae
A
7556 if (fvp) {
7557 get_fse_info(fvp, &from_finfo, ctx);
7558 } else {
316670eb 7559 error = vfs_get_notify_attributes(&__rename_data->fv_attr);
6d2010ae
A
7560 if (error) {
7561 goto out1;
7562 }
7563
316670eb 7564 fvap = &__rename_data->fv_attr;
6d2010ae 7565 }
55e303ae 7566
91447636 7567 if (tvp) {
2d21ac55 7568 get_fse_info(tvp, &to_finfo, ctx);
6d2010ae 7569 } else if (batched) {
316670eb 7570 error = vfs_get_notify_attributes(&__rename_data->tv_attr);
6d2010ae
A
7571 if (error) {
7572 goto out1;
7573 }
7574
316670eb 7575 tvap = &__rename_data->tv_attr;
2d21ac55
A
7576 }
7577 }
7578#else
7579 need_event = 0;
7580#endif /* CONFIG_FSE */
7581
7582 if (need_event || kauth_authorize_fileop_has_listeners()) {
2d21ac55 7583 if (from_name == NULL) {
6d2010ae
A
7584 GET_PATH(from_name);
7585 if (from_name == NULL) {
7586 error = ENOMEM;
7587 goto out1;
7588 }
91447636 7589 }
b0d623f7 7590
316670eb 7591 from_len = safe_getpath(fdvp, fromnd->ni_cnd.cn_nameptr, from_name, MAXPATHLEN, &from_truncated);
55e303ae 7592
2d21ac55 7593 if (to_name == NULL) {
6d2010ae
A
7594 GET_PATH(to_name);
7595 if (to_name == NULL) {
7596 error = ENOMEM;
7597 goto out1;
7598 }
2d21ac55 7599 }
91447636 7600
316670eb 7601 to_len = safe_getpath(tdvp, tond->ni_cnd.cn_nameptr, to_name, MAXPATHLEN, &to_truncated);
fe8ab488 7602 }
316670eb
A
7603 error = vn_rename(fdvp, &fvp, &fromnd->ni_cnd, fvap,
7604 tdvp, &tvp, &tond->ni_cnd, tvap,
39037602 7605 flags, ctx);
55e303ae 7606
91447636
A
7607 if (holding_mntlock) {
7608 /*
7609 * we can drop our serialization
7610 * lock now
7611 */
7612 mount_unlock_renames(locked_mp);
7613 mount_drop(locked_mp, 0);
7614 holding_mntlock = 0;
7615 }
7616 if (error) {
6d2010ae 7617 if (error == EKEEPLOOKING) {
316670eb
A
7618 if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) == 0) {
7619 if ((tond->ni_flag & NAMEI_CONTLOOKUP) == 0) {
6d2010ae
A
7620 panic("EKEEPLOOKING without NAMEI_CONTLOOKUP on either ndp?");
7621 }
7622 }
7623
316670eb
A
7624 fromnd->ni_vp = fvp;
7625 tond->ni_vp = tvp;
fe8ab488 7626
6d2010ae
A
7627 goto continue_lookup;
7628 }
7629
7630 /*
fe8ab488
A
7631 * We may encounter a race in the VNOP where the destination didn't
7632 * exist when we did the namei, but it does by the time we go and
6d2010ae
A
7633 * try to create the entry. In this case, we should re-drive this rename
7634 * call from the top again. Currently, only HFS bubbles out ERECYCLE,
fe8ab488 7635 * but other filesystems susceptible to this race could return it, too.
6d2010ae
A
7636 */
7637 if (error == ERECYCLE) {
7638 do_retry = 1;
7639 }
55e303ae 7640
c18c124e
A
7641 /*
7642 * For compound VNOPs, the authorization callback may return
7643 * ENOENT in case of racing hardlink lookups hitting the name
7644 * cache, redrive the lookup.
7645 */
3e170ce0
A
7646 if (batched && error == ENOENT) {
7647 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
7648 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
7649 do_retry = 1;
7650 retry_count += 1;
7651 }
c18c124e
A
7652 }
7653
91447636 7654 goto out1;
fe8ab488
A
7655 }
7656
7657 /* call out to allow 3rd party notification of rename.
91447636
A
7658 * Ignore result of kauth_authorize_fileop call.
7659 */
fe8ab488
A
7660 kauth_authorize_fileop(vfs_context_ucred(ctx),
7661 KAUTH_FILEOP_RENAME,
2d21ac55 7662 (uintptr_t)from_name, (uintptr_t)to_name);
39037602
A
7663 if (flags & VFS_RENAME_SWAP) {
7664 kauth_authorize_fileop(vfs_context_ucred(ctx),
7665 KAUTH_FILEOP_RENAME,
7666 (uintptr_t)to_name, (uintptr_t)from_name);
7667 }
91447636 7668
2d21ac55 7669#if CONFIG_FSE
91447636 7670 if (from_name != NULL && to_name != NULL) {
b0d623f7
A
7671 if (from_truncated || to_truncated) {
7672 // set it here since only the from_finfo gets reported up to user space
7673 from_finfo.mode |= FSE_TRUNCATED_PATH;
7674 }
6d2010ae
A
7675
7676 if (tvap && tvp) {
7677 vnode_get_fse_info_from_vap(tvp, &to_finfo, tvap);
7678 }
7679 if (fvap) {
7680 vnode_get_fse_info_from_vap(fvp, &from_finfo, fvap);
7681 }
7682
39037602
A
7683 if (tvp) {
7684 add_fsevent(FSE_RENAME, ctx,
7685 FSE_ARG_STRING, from_len, from_name,
7686 FSE_ARG_FINFO, &from_finfo,
7687 FSE_ARG_STRING, to_len, to_name,
7688 FSE_ARG_FINFO, &to_finfo,
7689 FSE_ARG_DONE);
7690 if (flags & VFS_RENAME_SWAP) {
7691 /*
7692 * Strictly speaking, swap is the equivalent of
7693 * *three* renames. FSEvents clients should only take
7694 * the events as a hint, so we only bother reporting
7695 * two.
7696 */
7697 add_fsevent(FSE_RENAME, ctx,
7698 FSE_ARG_STRING, to_len, to_name,
7699 FSE_ARG_FINFO, &to_finfo,
7700 FSE_ARG_STRING, from_len, from_name,
7701 FSE_ARG_FINFO, &from_finfo,
7702 FSE_ARG_DONE);
7703 }
55e303ae 7704 } else {
2d21ac55 7705 add_fsevent(FSE_RENAME, ctx,
91447636
A
7706 FSE_ARG_STRING, from_len, from_name,
7707 FSE_ARG_FINFO, &from_finfo,
7708 FSE_ARG_STRING, to_len, to_name,
7709 FSE_ARG_DONE);
7710 }
7711 }
2d21ac55 7712#endif /* CONFIG_FSE */
fe8ab488 7713
91447636
A
7714 /*
7715 * update filesystem's mount point data
7716 */
7717 if (mntrename) {
7718 char *cp, *pathend, *mpname;
7719 char * tobuf;
7720 struct mount *mp;
7721 int maxlen;
7722 size_t len = 0;
7723
7724 mp = fvp->v_mountedhere;
7725
7726 if (vfs_busy(mp, LK_NOWAIT)) {
7727 error = EBUSY;
7728 goto out1;
55e303ae 7729 }
91447636 7730 MALLOC_ZONE(tobuf, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
55e303ae 7731
fe8ab488
A
7732 if (UIO_SEG_IS_USER_SPACE(segflg))
7733 error = copyinstr(to, tobuf, MAXPATHLEN, &len);
7734 else
7735 error = copystr((void *)to, tobuf, MAXPATHLEN, &len);
91447636
A
7736 if (!error) {
7737 /* find current mount point prefix */
7738 pathend = &mp->mnt_vfsstat.f_mntonname[0];
7739 for (cp = pathend; *cp != '\0'; ++cp) {
7740 if (*cp == '/')
7741 pathend = cp + 1;
7742 }
7743 /* find last component of target name */
7744 for (mpname = cp = tobuf; *cp != '\0'; ++cp) {
7745 if (*cp == '/')
7746 mpname = cp + 1;
7747 }
7748 /* append name to prefix */
7749 maxlen = MAXPATHLEN - (pathend - mp->mnt_vfsstat.f_mntonname);
7750 bzero(pathend, maxlen);
2d21ac55 7751 strlcpy(pathend, mpname, maxlen);
91447636
A
7752 }
7753 FREE_ZONE(tobuf, MAXPATHLEN, M_NAMEI);
7754
7755 vfs_unbusy(mp);
7756 }
7757 /*
fe8ab488 7758 * fix up name & parent pointers. note that we first
91447636
A
7759 * check that fvp has the same name/parent pointers it
7760 * had before the rename call... this is a 'weak' check
7761 * at best...
6d2010ae
A
7762 *
7763 * XXX oparent and oname may not be set in the compound vnop case
91447636 7764 */
6d2010ae 7765 if (batched || (oname == fvp->v_name && oparent == fvp->v_parent)) {
91447636
A
7766 int update_flags;
7767
7768 update_flags = VNODE_UPDATE_NAME;
7769
7770 if (fdvp != tdvp)
7771 update_flags |= VNODE_UPDATE_PARENT;
7772
316670eb 7773 vnode_update_identity(fvp, tdvp, tond->ni_cnd.cn_nameptr, tond->ni_cnd.cn_namelen, tond->ni_cnd.cn_hash, update_flags);
1c79356b
A
7774 }
7775out1:
593a1d5f
A
7776 if (to_name != NULL) {
7777 RELEASE_PATH(to_name);
7778 to_name = NULL;
7779 }
7780 if (from_name != NULL) {
7781 RELEASE_PATH(from_name);
7782 from_name = NULL;
7783 }
91447636
A
7784 if (holding_mntlock) {
7785 mount_unlock_renames(locked_mp);
7786 mount_drop(locked_mp, 0);
593a1d5f 7787 holding_mntlock = 0;
91447636
A
7788 }
7789 if (tdvp) {
7790 /*
7791 * nameidone has to happen before we vnode_put(tdvp)
7792 * since it may need to release the fs_nodelock on the tdvp
7793 */
316670eb 7794 nameidone(tond);
91447636
A
7795
7796 if (tvp)
7797 vnode_put(tvp);
7798 vnode_put(tdvp);
7799 }
7800 if (fdvp) {
7801 /*
7802 * nameidone has to happen before we vnode_put(fdvp)
7803 * since it may need to release the fs_nodelock on the fdvp
7804 */
316670eb 7805 nameidone(fromnd);
91447636
A
7806
7807 if (fvp)
7808 vnode_put(fvp);
7809 vnode_put(fdvp);
7810 }
fe8ab488 7811
6d2010ae
A
7812 /*
7813 * If things changed after we did the namei, then we will re-drive
7814 * this rename call from the top.
7815 */
316670eb 7816 if (do_retry) {
6d2010ae 7817 do_retry = 0;
593a1d5f
A
7818 goto retry;
7819 }
316670eb
A
7820
7821 FREE(__rename_data, M_TEMP);
1c79356b
A
7822 return (error);
7823}
7824
fe8ab488
A
7825int
7826rename(__unused proc_t p, struct rename_args *uap, __unused int32_t *retval)
7827{
7828 return (renameat_internal(vfs_context_current(), AT_FDCWD, uap->from,
7829 AT_FDCWD, uap->to, UIO_USERSPACE, 0));
7830}
7831
39037602 7832int renameatx_np(__unused proc_t p, struct renameatx_np_args *uap, __unused int32_t *retval)
fe8ab488
A
7833{
7834 return renameat_internal(
39037602
A
7835 vfs_context_current(),
7836 uap->fromfd, uap->from,
7837 uap->tofd, uap->to,
fe8ab488
A
7838 UIO_USERSPACE, uap->flags);
7839}
39037602 7840
fe8ab488
A
7841int
7842renameat(__unused proc_t p, struct renameat_args *uap, __unused int32_t *retval)
7843{
7844 return (renameat_internal(vfs_context_current(), uap->fromfd, uap->from,
7845 uap->tofd, uap->to, UIO_USERSPACE, 0));
7846}
7847
1c79356b
A
7848/*
7849 * Make a directory file.
2d21ac55
A
7850 *
7851 * Returns: 0 Success
7852 * EEXIST
7853 * namei:???
7854 * vnode_authorize:???
7855 * vn_create:???
1c79356b 7856 */
1c79356b 7857/* ARGSUSED */
91447636 7858static int
fe8ab488
A
7859mkdir1at(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap, int fd,
7860 enum uio_seg segflg)
1c79356b 7861{
91447636 7862 vnode_t vp, dvp;
1c79356b 7863 int error;
91447636 7864 int update_flags = 0;
6d2010ae 7865 int batched;
1c79356b
A
7866 struct nameidata nd;
7867
91447636 7868 AUDIT_ARG(mode, vap->va_mode);
fe8ab488 7869 NDINIT(&nd, CREATE, OP_MKDIR, LOCKPARENT | AUDITVNPATH1, segflg,
6d2010ae 7870 path, ctx);
9bccf70c 7871 nd.ni_cnd.cn_flags |= WILLBEDIR;
6d2010ae
A
7872 nd.ni_flag = NAMEI_COMPOUNDMKDIR;
7873
7874continue_lookup:
fe8ab488 7875 error = nameiat(&nd, fd);
55e303ae 7876 if (error)
1c79356b 7877 return (error);
91447636 7878 dvp = nd.ni_dvp;
1c79356b 7879 vp = nd.ni_vp;
55e303ae 7880
fe8ab488
A
7881 if (vp != NULL) {
7882 error = EEXIST;
7883 goto out;
7884 }
7885
6d2010ae 7886 batched = vnode_compound_mkdir_available(dvp);
2d21ac55
A
7887
7888 VATTR_SET(vap, va_type, VDIR);
fe8ab488 7889
6d2010ae
A
7890 /*
7891 * XXX
7892 * Don't authorize in VFS for compound VNOP.... mkdir -p today assumes that it will
7893 * only get EXISTS or EISDIR for existing path components, and not that it could see
7894 * EACCESS/EPERM--so if we authorize for mkdir on "/" for "mkdir -p /tmp/foo/bar/baz"
7895 * it will fail in a spurious manner. Need to figure out if this is valid behavior.
7896 */
fe8ab488 7897 if ((error = vn_authorize_mkdir(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0) {
6d2010ae
A
7898 if (error == EACCES || error == EPERM) {
7899 int error2;
7900
7901 nameidone(&nd);
7902 vnode_put(dvp);
7903 dvp = NULLVP;
7904
fe8ab488
A
7905 /*
7906 * Try a lookup without "NAMEI_COMPOUNDVNOP" to make sure we return EEXIST
6d2010ae
A
7907 * rather than EACCESS if the target exists.
7908 */
fe8ab488
A
7909 NDINIT(&nd, LOOKUP, OP_MKDIR, AUDITVNPATH1, segflg,
7910 path, ctx);
7911 error2 = nameiat(&nd, fd);
6d2010ae
A
7912 if (error2) {
7913 goto out;
7914 } else {
7915 vp = nd.ni_vp;
7916 error = EEXIST;
7917 goto out;
7918 }
7919 }
7920
2d21ac55 7921 goto out;
6d2010ae
A
7922 }
7923
7924 /*
fe8ab488 7925 * make the directory
6d2010ae 7926 */
fe8ab488 7927 if ((error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx)) != 0) {
6d2010ae
A
7928 if (error == EKEEPLOOKING) {
7929 nd.ni_vp = vp;
7930 goto continue_lookup;
7931 }
2d21ac55 7932
fe8ab488 7933 goto out;
6d2010ae 7934 }
fe8ab488 7935
91447636
A
7936 // Make sure the name & parent pointers are hooked up
7937 if (vp->v_name == NULL)
7938 update_flags |= VNODE_UPDATE_NAME;
7939 if (vp->v_parent == NULLVP)
7940 update_flags |= VNODE_UPDATE_PARENT;
7941
7942 if (update_flags)
7943 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
55e303ae 7944
2d21ac55 7945#if CONFIG_FSE
91447636 7946 add_fsevent(FSE_CREATE_DIR, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE);
2d21ac55 7947#endif
91447636
A
7948
7949out:
7950 /*
7951 * nameidone has to happen before we vnode_put(dvp)
7952 * since it may need to release the fs_nodelock on the dvp
7953 */
7954 nameidone(&nd);
7955
7956 if (vp)
6d2010ae 7957 vnode_put(vp);
fe8ab488 7958 if (dvp)
6d2010ae 7959 vnode_put(dvp);
55e303ae 7960
1c79356b
A
7961 return (error);
7962}
7963
b0d623f7
A
7964/*
7965 * mkdir_extended: Create a directory; with extended security (ACL).
7966 *
7967 * Parameters: p Process requesting to create the directory
7968 * uap User argument descriptor (see below)
fe8ab488 7969 * retval (ignored)
b0d623f7
A
7970 *
7971 * Indirect: uap->path Path of directory to create
7972 * uap->mode Access permissions to set
7973 * uap->xsecurity ACL to set
fe8ab488 7974 *
b0d623f7
A
7975 * Returns: 0 Success
7976 * !0 Not success
7977 *
7978 */
1c79356b 7979int
b0d623f7 7980mkdir_extended(proc_t p, struct mkdir_extended_args *uap, __unused int32_t *retval)
1c79356b 7981{
91447636
A
7982 int ciferror;
7983 kauth_filesec_t xsecdst;
7984 struct vnode_attr va;
7985
b0d623f7
A
7986 AUDIT_ARG(owner, uap->uid, uap->gid);
7987
91447636
A
7988 xsecdst = NULL;
7989 if ((uap->xsecurity != USER_ADDR_NULL) &&
7990 ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
7991 return ciferror;
7992
91447636 7993 VATTR_INIT(&va);
fe8ab488 7994 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
91447636
A
7995 if (xsecdst != NULL)
7996 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
7997
fe8ab488
A
7998 ciferror = mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD,
7999 UIO_USERSPACE);
91447636
A
8000 if (xsecdst != NULL)
8001 kauth_filesec_free(xsecdst);
8002 return ciferror;
1c79356b
A
8003}
8004
1c79356b 8005int
b0d623f7 8006mkdir(proc_t p, struct mkdir_args *uap, __unused int32_t *retval)
1c79356b 8007{
91447636 8008 struct vnode_attr va;
1c79356b 8009
91447636 8010 VATTR_INIT(&va);
fe8ab488 8011 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
e5568f75 8012
fe8ab488
A
8013 return (mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD,
8014 UIO_USERSPACE));
91447636 8015}
1c79356b 8016
91447636 8017int
fe8ab488
A
8018mkdirat(proc_t p, struct mkdirat_args *uap, __unused int32_t *retval)
8019{
8020 struct vnode_attr va;
8021
8022 VATTR_INIT(&va);
8023 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
8024
8025 return(mkdir1at(vfs_context_current(), uap->path, &va, uap->fd,
8026 UIO_USERSPACE));
8027}
8028
8029static int
8030rmdirat_internal(vfs_context_t ctx, int fd, user_addr_t dirpath,
8031 enum uio_seg segflg)
1c79356b 8032{
2d21ac55 8033 vnode_t vp, dvp;
91447636
A
8034 int error;
8035 struct nameidata nd;
6d2010ae
A
8036 char *path = NULL;
8037 int len=0;
8038 int has_listeners = 0;
8039 int need_event = 0;
8040 int truncated = 0;
6d2010ae
A
8041#if CONFIG_FSE
8042 struct vnode_attr va;
8043#endif /* CONFIG_FSE */
8044 struct vnode_attr *vap = NULL;
c18c124e 8045 int restart_count = 0;
6d2010ae 8046 int batched;
91447636 8047
b0d623f7 8048 int restart_flag;
91447636 8049
fe8ab488 8050 /*
2d21ac55
A
8051 * This loop exists to restart rmdir in the unlikely case that two
8052 * processes are simultaneously trying to remove the same directory
8053 * containing orphaned appleDouble files.
8054 */
8055 do {
6d2010ae 8056 NDINIT(&nd, DELETE, OP_RMDIR, LOCKPARENT | AUDITVNPATH1,
fe8ab488 8057 segflg, dirpath, ctx);
6d2010ae
A
8058 nd.ni_flag = NAMEI_COMPOUNDRMDIR;
8059continue_lookup:
2d21ac55 8060 restart_flag = 0;
6d2010ae 8061 vap = NULL;
2d21ac55 8062
fe8ab488 8063 error = nameiat(&nd, fd);
2d21ac55
A
8064 if (error)
8065 return (error);
8066
8067 dvp = nd.ni_dvp;
8068 vp = nd.ni_vp;
8069
6d2010ae
A
8070 if (vp) {
8071 batched = vnode_compound_rmdir_available(vp);
2d21ac55 8072
6d2010ae
A
8073 if (vp->v_flag & VROOT) {
8074 /*
8075 * The root of a mounted filesystem cannot be deleted.
8076 */
8077 error = EBUSY;
8078 goto out;
8079 }
1c79356b 8080
00867663
A
8081#if DEVELOPMENT || DEBUG
8082 /*
8083 * XXX VSWAP: Check for entitlements or special flag here
8084 * so we can restrict access appropriately.
8085 */
8086#else /* DEVELOPMENT || DEBUG */
8087
8088 if (vnode_isswap(vp) && (ctx != vfs_context_kernel())) {
8089 error = EPERM;
8090 goto out;
8091 }
8092#endif /* DEVELOPMENT || DEBUG */
8093
2d21ac55 8094 /*
6d2010ae
A
8095 * Removed a check here; we used to abort if vp's vid
8096 * was not the same as what we'd seen the last time around.
8097 * I do not think that check was valid, because if we retry
8098 * and all dirents are gone, the directory could legitimately
8099 * be recycled but still be present in a situation where we would
fe8ab488 8100 * have had permission to delete. Therefore, we won't make
6d2010ae
A
8101 * an effort to preserve that check now that we may not have a
8102 * vp here.
2d21ac55 8103 */
6d2010ae
A
8104
8105 if (!batched) {
8106 error = vn_authorize_rmdir(dvp, vp, &nd.ni_cnd, ctx, NULL);
8107 if (error) {
3e170ce0
A
8108 if (error == ENOENT) {
8109 assert(restart_count < MAX_AUTHORIZE_ENOENT_RETRIES);
8110 if (restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
8111 restart_flag = 1;
8112 restart_count += 1;
8113 }
c18c124e 8114 }
6d2010ae
A
8115 goto out;
8116 }
8117 }
2d21ac55 8118 } else {
6d2010ae
A
8119 batched = 1;
8120
8121 if (!vnode_compound_rmdir_available(dvp)) {
8122 panic("No error, but no compound rmdir?");
8123 }
91447636 8124 }
6d2010ae 8125
2d21ac55 8126#if CONFIG_FSE
6d2010ae 8127 fse_info finfo;
b0d623f7 8128
6d2010ae
A
8129 need_event = need_fsevent(FSE_DELETE, dvp);
8130 if (need_event) {
8131 if (!batched) {
2d21ac55 8132 get_fse_info(vp, &finfo, ctx);
6d2010ae
A
8133 } else {
8134 error = vfs_get_notify_attributes(&va);
8135 if (error) {
8136 goto out;
8137 }
8138
8139 vap = &va;
2d21ac55 8140 }
6d2010ae 8141 }
2d21ac55 8142#endif
6d2010ae
A
8143 has_listeners = kauth_authorize_fileop_has_listeners();
8144 if (need_event || has_listeners) {
8145 if (path == NULL) {
2d21ac55
A
8146 GET_PATH(path);
8147 if (path == NULL) {
8148 error = ENOMEM;
8149 goto out;
8150 }
6d2010ae 8151 }
b0d623f7 8152
6d2010ae 8153 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated);
b0d623f7 8154#if CONFIG_FSE
6d2010ae
A
8155 if (truncated) {
8156 finfo.mode |= FSE_TRUNCATED_PATH;
2d21ac55 8157 }
6d2010ae
A
8158#endif
8159 }
91447636 8160
6d2010ae
A
8161 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
8162 nd.ni_vp = vp;
8163 if (vp == NULLVP) {
8164 /* Couldn't find a vnode */
8165 goto out;
8166 }
2d21ac55 8167
6d2010ae
A
8168 if (error == EKEEPLOOKING) {
8169 goto continue_lookup;
3e170ce0
A
8170 } else if (batched && error == ENOENT) {
8171 assert(restart_count < MAX_AUTHORIZE_ENOENT_RETRIES);
8172 if (restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
8173 /*
8174 * For compound VNOPs, the authorization callback
8175 * may return ENOENT in case of racing hard link lookups
8176 * redrive the lookup.
8177 */
8178 restart_flag = 1;
8179 restart_count += 1;
8180 goto out;
8181 }
6d2010ae 8182 }
39236c6e 8183#if CONFIG_APPLEDOUBLE
6d2010ae
A
8184 /*
8185 * Special case to remove orphaned AppleDouble
8186 * files. I don't like putting this in the kernel,
8187 * but carbon does not like putting this in carbon either,
8188 * so here we are.
8189 */
8190 if (error == ENOTEMPTY) {
8191 error = rmdir_remove_orphaned_appleDouble(vp, ctx, &restart_flag);
8192 if (error == EBUSY) {
8193 goto out;
2d21ac55
A
8194 }
8195
6d2010ae 8196
2d21ac55 8197 /*
fe8ab488 8198 * Assuming everything went well, we will try the RMDIR again
2d21ac55 8199 */
6d2010ae
A
8200 if (!error)
8201 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
8202 }
39236c6e 8203#endif /* CONFIG_APPLEDOUBLE */
6d2010ae 8204 /*
fe8ab488 8205 * Call out to allow 3rd party notification of delete.
6d2010ae
A
8206 * Ignore result of kauth_authorize_fileop call.
8207 */
8208 if (!error) {
8209 if (has_listeners) {
fe8ab488
A
8210 kauth_authorize_fileop(vfs_context_ucred(ctx),
8211 KAUTH_FILEOP_DELETE,
6d2010ae
A
8212 (uintptr_t)vp,
8213 (uintptr_t)path);
8214 }
8215
8216 if (vp->v_flag & VISHARDLINK) {
8217 // see the comment in unlink1() about why we update
8218 // the parent of a hard link when it is removed
8219 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
8220 }
2d21ac55
A
8221
8222#if CONFIG_FSE
6d2010ae
A
8223 if (need_event) {
8224 if (vap) {
8225 vnode_get_fse_info_from_vap(vp, &finfo, vap);
2d21ac55 8226 }
6d2010ae
A
8227 add_fsevent(FSE_DELETE, ctx,
8228 FSE_ARG_STRING, len, path,
8229 FSE_ARG_FINFO, &finfo,
8230 FSE_ARG_DONE);
2d21ac55 8231 }
6d2010ae 8232#endif
2d21ac55
A
8233 }
8234
8235out:
6d2010ae
A
8236 if (path != NULL) {
8237 RELEASE_PATH(path);
8238 path = NULL;
8239 }
2d21ac55
A
8240 /*
8241 * nameidone has to happen before we vnode_put(dvp)
8242 * since it may need to release the fs_nodelock on the dvp
8243 */
8244 nameidone(&nd);
2d21ac55 8245 vnode_put(dvp);
6d2010ae 8246
fe8ab488 8247 if (vp)
6d2010ae 8248 vnode_put(vp);
2d21ac55
A
8249
8250 if (restart_flag == 0) {
8251 wakeup_one((caddr_t)vp);
8252 return (error);
8253 }
8254 tsleep(vp, PVFS, "rm AD", 1);
8255
8256 } while (restart_flag != 0);
91447636 8257
1c79356b 8258 return (error);
2d21ac55 8259
1c79356b 8260}
91447636 8261
fe8ab488
A
8262/*
8263 * Remove a directory file.
8264 */
8265/* ARGSUSED */
8266int
8267rmdir(__unused proc_t p, struct rmdir_args *uap, __unused int32_t *retval)
8268{
8269 return (rmdirat_internal(vfs_context_current(), AT_FDCWD,
8270 CAST_USER_ADDR_T(uap->path), UIO_USERSPACE));
8271}
8272
2d21ac55
A
8273/* Get direntry length padded to 8 byte alignment */
8274#define DIRENT64_LEN(namlen) \
8275 ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
8276
5ba3f43e
A
8277/* Get dirent length padded to 4 byte alignment */
8278#define DIRENT_LEN(namelen) \
8279 ((sizeof(struct dirent) + (namelen + 1) - (__DARWIN_MAXNAMLEN + 1) + 3) & ~3)
8280
8281/* Get the end of this dirent */
8282#define DIRENT_END(dep) \
8283 (((char *)(dep)) + (dep)->d_reclen - 1)
8284
fe8ab488 8285errno_t
2d21ac55
A
8286vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag,
8287 int *numdirent, vfs_context_t ctxp)
8288{
8289 /* Check if fs natively supports VNODE_READDIR_EXTENDED */
39037602 8290 if ((vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED) &&
6d2010ae 8291 ((vp->v_mount->mnt_kern_flag & MNTK_DENY_READDIREXT) == 0)) {
2d21ac55
A
8292 return VNOP_READDIR(vp, uio, flags, eofflag, numdirent, ctxp);
8293 } else {
8294 size_t bufsize;
8295 void * bufptr;
8296 uio_t auio;
15129b1c 8297 struct direntry *entry64;
2d21ac55
A
8298 struct dirent *dep;
8299 int bytesread;
8300 int error;
8301
8302 /*
5ba3f43e
A
8303 * We're here because the underlying file system does not
8304 * support direnties or we mounted denying support so we must
8305 * fall back to dirents and convert them to direntries.
8306 *
8307 * Our kernel buffer needs to be smaller since re-packing will
8308 * expand each dirent. The worse case (when the name length
8309 * is 3 or less) corresponds to a struct direntry size of 32
2d21ac55
A
8310 * bytes (8-byte aligned) and a struct dirent size of 12 bytes
8311 * (4-byte aligned). So having a buffer that is 3/8 the size
8312 * will prevent us from reading more than we can pack.
8313 *
8314 * Since this buffer is wired memory, we will limit the
39037602 8315 * buffer size to a maximum of 32K. We would really like to
2d21ac55 8316 * use 32K in the MIN(), but we use magic number 87371 to
39037602 8317 * prevent uio_resid() * 3 / 8 from overflowing.
2d21ac55 8318 */
316670eb 8319 bufsize = 3 * MIN((user_size_t)uio_resid(uio), 87371u) / 8;
2d21ac55 8320 MALLOC(bufptr, void *, bufsize, M_TEMP, M_WAITOK);
b0d623f7
A
8321 if (bufptr == NULL) {
8322 return ENOMEM;
8323 }
2d21ac55 8324
b0d623f7 8325 auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
2d21ac55
A
8326 uio_addiov(auio, (uintptr_t)bufptr, bufsize);
8327 auio->uio_offset = uio->uio_offset;
8328
8329 error = VNOP_READDIR(vp, auio, 0, eofflag, numdirent, ctxp);
8330
8331 dep = (struct dirent *)bufptr;
8332 bytesread = bufsize - uio_resid(auio);
8333
15129b1c
A
8334 MALLOC(entry64, struct direntry *, sizeof(struct direntry),
8335 M_TEMP, M_WAITOK);
2d21ac55
A
8336 /*
8337 * Convert all the entries and copy them out to user's buffer.
8338 */
8339 while (error == 0 && (char *)dep < ((char *)bufptr + bytesread)) {
15129b1c
A
8340 size_t enbufsize = DIRENT64_LEN(dep->d_namlen);
8341
5ba3f43e
A
8342 if (DIRENT_END(dep) > ((char *)bufptr + bytesread) ||
8343 DIRENT_LEN(dep->d_namlen) > dep->d_reclen) {
8344 printf("%s: %s: Bad dirent recived from directory %s\n", __func__,
8345 vp->v_mount->mnt_vfsstat.f_mntonname,
8346 vp->v_name ? vp->v_name : "<unknown>");
8347 error = EIO;
8348 break;
8349 }
8350
15129b1c 8351 bzero(entry64, enbufsize);
2d21ac55 8352 /* Convert a dirent to a dirent64. */
15129b1c
A
8353 entry64->d_ino = dep->d_ino;
8354 entry64->d_seekoff = 0;
8355 entry64->d_reclen = enbufsize;
8356 entry64->d_namlen = dep->d_namlen;
8357 entry64->d_type = dep->d_type;
8358 bcopy(dep->d_name, entry64->d_name, dep->d_namlen + 1);
2d21ac55
A
8359
8360 /* Move to next entry. */
8361 dep = (struct dirent *)((char *)dep + dep->d_reclen);
8362
8363 /* Copy entry64 to user's buffer. */
15129b1c 8364 error = uiomove((caddr_t)entry64, entry64->d_reclen, uio);
2d21ac55
A
8365 }
8366
8367 /* Update the real offset using the offset we got from VNOP_READDIR. */
8368 if (error == 0) {
8369 uio->uio_offset = auio->uio_offset;
8370 }
8371 uio_free(auio);
8372 FREE(bufptr, M_TEMP);
15129b1c 8373 FREE(entry64, M_TEMP);
2d21ac55
A
8374 return (error);
8375 }
8376}
1c79356b 8377
39236c6e
A
8378#define GETDIRENTRIES_MAXBUFSIZE (128 * 1024 * 1024U)
8379
1c79356b
A
8380/*
8381 * Read a block of directory entries in a file system independent format.
8382 */
2d21ac55
A
8383static int
8384getdirentries_common(int fd, user_addr_t bufp, user_size_t bufsize, ssize_t *bytesread,
8385 off_t *offset, int flags)
1c79356b 8386{
2d21ac55
A
8387 vnode_t vp;
8388 struct vfs_context context = *vfs_context_current(); /* local copy */
91447636
A
8389 struct fileproc *fp;
8390 uio_t auio;
2d21ac55
A
8391 int spacetype = proc_is64bit(vfs_context_proc(&context)) ? UIO_USERSPACE64 : UIO_USERSPACE32;
8392 off_t loff;
8393 int error, eofflag, numdirent;
91447636 8394 char uio_buf[ UIO_SIZEOF(1) ];
1c79356b 8395
2d21ac55
A
8396 error = fp_getfvp(vfs_context_proc(&context), fd, &fp, &vp);
8397 if (error) {
1c79356b 8398 return (error);
2d21ac55 8399 }
91447636
A
8400 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
8401 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
8402 error = EBADF;
8403 goto out;
8404 }
2d21ac55 8405
39236c6e
A
8406 if (bufsize > GETDIRENTRIES_MAXBUFSIZE)
8407 bufsize = GETDIRENTRIES_MAXBUFSIZE;
8408
2d21ac55
A
8409#if CONFIG_MACF
8410 error = mac_file_check_change_offset(vfs_context_ucred(&context), fp->f_fglob);
8411 if (error)
8412 goto out;
8413#endif
91447636
A
8414 if ( (error = vnode_getwithref(vp)) ) {
8415 goto out;
8416 }
91447636 8417 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
55e303ae 8418
1c79356b 8419unionread:
91447636
A
8420 if (vp->v_type != VDIR) {
8421 (void)vnode_put(vp);
8422 error = EINVAL;
8423 goto out;
8424 }
2d21ac55
A
8425
8426#if CONFIG_MACF
8427 error = mac_vnode_check_readdir(&context, vp);
8428 if (error != 0) {
8429 (void)vnode_put(vp);
8430 goto out;
8431 }
8432#endif /* MAC */
91447636
A
8433
8434 loff = fp->f_fglob->fg_offset;
2d21ac55
A
8435 auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
8436 uio_addiov(auio, bufp, bufsize);
91447636 8437
2d21ac55
A
8438 if (flags & VNODE_READDIR_EXTENDED) {
8439 error = vnode_readdir64(vp, auio, flags, &eofflag, &numdirent, &context);
8440 fp->f_fglob->fg_offset = uio_offset(auio);
8441 } else {
8442 error = VNOP_READDIR(vp, auio, 0, &eofflag, &numdirent, &context);
8443 fp->f_fglob->fg_offset = uio_offset(auio);
8444 }
91447636
A
8445 if (error) {
8446 (void)vnode_put(vp);
8447 goto out;
8448 }
1c79356b 8449
2d21ac55
A
8450 if ((user_ssize_t)bufsize == uio_resid(auio)){
8451 if (union_dircheckp) {
8452 error = union_dircheckp(&vp, fp, &context);
8453 if (error == -1)
8454 goto unionread;
813fb2f6
A
8455 if (error) {
8456 (void)vnode_put(vp);
2d21ac55 8457 goto out;
813fb2f6 8458 }
1c79356b
A
8459 }
8460
39236c6e 8461 if ((vp->v_mount->mnt_flag & MNT_UNION)) {
2d21ac55 8462 struct vnode *tvp = vp;
39236c6e
A
8463 if (lookup_traverse_union(tvp, &vp, &context) == 0) {
8464 vnode_ref(vp);
8465 fp->f_fglob->fg_data = (caddr_t) vp;
8466 fp->f_fglob->fg_offset = 0;
8467 vnode_rele(tvp);
8468 vnode_put(tvp);
8469 goto unionread;
8470 }
8471 vp = tvp;
1c79356b
A
8472 }
8473 }
2d21ac55 8474
91447636 8475 vnode_put(vp);
2d21ac55
A
8476 if (offset) {
8477 *offset = loff;
8478 }
39037602 8479
2d21ac55 8480 *bytesread = bufsize - uio_resid(auio);
91447636
A
8481out:
8482 file_drop(fd);
1c79356b
A
8483 return (error);
8484}
8485
2d21ac55
A
8486
8487int
b0d623f7 8488getdirentries(__unused struct proc *p, struct getdirentries_args *uap, int32_t *retval)
2d21ac55
A
8489{
8490 off_t offset;
2d21ac55
A
8491 ssize_t bytesread;
8492 int error;
8493
8494 AUDIT_ARG(fd, uap->fd);
8495 error = getdirentries_common(uap->fd, uap->buf, uap->count, &bytesread, &offset, 0);
8496
8497 if (error == 0) {
b0d623f7
A
8498 if (proc_is64bit(p)) {
8499 user64_long_t base = (user64_long_t)offset;
8500 error = copyout((caddr_t)&base, uap->basep, sizeof(user64_long_t));
8501 } else {
8502 user32_long_t base = (user32_long_t)offset;
8503 error = copyout((caddr_t)&base, uap->basep, sizeof(user32_long_t));
8504 }
2d21ac55
A
8505 *retval = bytesread;
8506 }
8507 return (error);
8508}
8509
8510int
8511getdirentries64(__unused struct proc *p, struct getdirentries64_args *uap, user_ssize_t *retval)
8512{
8513 off_t offset;
8514 ssize_t bytesread;
8515 int error;
8516
8517 AUDIT_ARG(fd, uap->fd);
8518 error = getdirentries_common(uap->fd, uap->buf, uap->bufsize, &bytesread, &offset, VNODE_READDIR_EXTENDED);
8519
8520 if (error == 0) {
8521 *retval = bytesread;
8522 error = copyout((caddr_t)&offset, uap->position, sizeof(off_t));
8523 }
8524 return (error);
8525}
8526
8527
1c79356b
A
8528/*
8529 * Set the mode mask for creation of filesystem nodes.
b0d623f7 8530 * XXX implement xsecurity
1c79356b 8531 */
91447636
A
8532#define UMASK_NOXSECURITY (void *)1 /* leave existing xsecurity alone */
8533static int
b0d623f7 8534umask1(proc_t p, int newmask, __unused kauth_filesec_t fsec, int32_t *retval)
1c79356b 8535{
2d21ac55 8536 struct filedesc *fdp;
1c79356b 8537
91447636 8538 AUDIT_ARG(mask, newmask);
2d21ac55 8539 proc_fdlock(p);
1c79356b
A
8540 fdp = p->p_fd;
8541 *retval = fdp->fd_cmask;
91447636 8542 fdp->fd_cmask = newmask & ALLPERMS;
2d21ac55 8543 proc_fdunlock(p);
1c79356b
A
8544 return (0);
8545}
8546
b0d623f7
A
8547/*
8548 * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL).
8549 *
8550 * Parameters: p Process requesting to set the umask
8551 * uap User argument descriptor (see below)
8552 * retval umask of the process (parameter p)
8553 *
8554 * Indirect: uap->newmask umask to set
8555 * uap->xsecurity ACL to set
39037602 8556 *
b0d623f7
A
8557 * Returns: 0 Success
8558 * !0 Not success
8559 *
8560 */
8561int
8562umask_extended(proc_t p, struct umask_extended_args *uap, int32_t *retval)
91447636
A
8563{
8564 int ciferror;
8565 kauth_filesec_t xsecdst;
8566
8567 xsecdst = KAUTH_FILESEC_NONE;
8568 if (uap->xsecurity != USER_ADDR_NULL) {
8569 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
8570 return ciferror;
8571 } else {
8572 xsecdst = KAUTH_FILESEC_NONE;
8573 }
8574
8575 ciferror = umask1(p, uap->newmask, xsecdst, retval);
8576
8577 if (xsecdst != KAUTH_FILESEC_NONE)
8578 kauth_filesec_free(xsecdst);
8579 return ciferror;
8580}
8581
8582int
b0d623f7 8583umask(proc_t p, struct umask_args *uap, int32_t *retval)
91447636
A
8584{
8585 return(umask1(p, uap->newmask, UMASK_NOXSECURITY, retval));
8586}
8587
1c79356b
A
8588/*
8589 * Void all references to file by ripping underlying filesystem
8590 * away from vnode.
8591 */
1c79356b
A
8592/* ARGSUSED */
8593int
b0d623f7 8594revoke(proc_t p, struct revoke_args *uap, __unused int32_t *retval)
1c79356b 8595{
2d21ac55 8596 vnode_t vp;
91447636 8597 struct vnode_attr va;
2d21ac55 8598 vfs_context_t ctx = vfs_context_current();
1c79356b
A
8599 int error;
8600 struct nameidata nd;
8601
6d2010ae
A
8602 NDINIT(&nd, LOOKUP, OP_REVOKE, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
8603 uap->path, ctx);
55e303ae
A
8604 error = namei(&nd);
8605 if (error)
1c79356b
A
8606 return (error);
8607 vp = nd.ni_vp;
91447636
A
8608
8609 nameidone(&nd);
8610
b0d623f7
A
8611 if (!(vnode_ischr(vp) || vnode_isblk(vp))) {
8612 error = ENOTSUP;
8613 goto out;
8614 }
8615
8616 if (vnode_isblk(vp) && vnode_ismountedon(vp)) {
8617 error = EBUSY;
8618 goto out;
8619 }
8620
2d21ac55
A
8621#if CONFIG_MACF
8622 error = mac_vnode_check_revoke(ctx, vp);
8623 if (error)
8624 goto out;
8625#endif
8626
91447636
A
8627 VATTR_INIT(&va);
8628 VATTR_WANTED(&va, va_uid);
2d21ac55 8629 if ((error = vnode_getattr(vp, &va, ctx)))
1c79356b 8630 goto out;
2d21ac55
A
8631 if (kauth_cred_getuid(vfs_context_ucred(ctx)) != va.va_uid &&
8632 (error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
1c79356b 8633 goto out;
b0d623f7 8634 if (vp->v_usecount > 0 || (vnode_isaliased(vp)))
2d21ac55 8635 VNOP_REVOKE(vp, REVOKEALL, ctx);
1c79356b 8636out:
91447636 8637 vnode_put(vp);
1c79356b
A
8638 return (error);
8639}
8640
0b4e3aa0 8641
1c79356b
A
8642/*
8643 * HFS/HFS PlUS SPECIFIC SYSTEM CALLS
9bccf70c 8644 * The following system calls are designed to support features
1c79356b
A
8645 * which are specific to the HFS & HFS Plus volume formats
8646 */
8647
9bccf70c 8648
1c79356b 8649/*
39236c6e
A
8650 * Obtain attribute information on objects in a directory while enumerating
8651 * the directory.
8652 */
1c79356b
A
8653/* ARGSUSED */
8654int
b0d623f7 8655getdirentriesattr (proc_t p, struct getdirentriesattr_args *uap, int32_t *retval)
1c79356b 8656{
2d21ac55 8657 vnode_t vp;
91447636
A
8658 struct fileproc *fp;
8659 uio_t auio = NULL;
8660 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
39236c6e 8661 uint32_t count, savecount;
2d21ac55 8662 uint32_t newstate;
91447636 8663 int error, eofflag;
2d21ac55 8664 uint32_t loff;
39037602 8665 struct attrlist attributelist;
2d21ac55 8666 vfs_context_t ctx = vfs_context_current();
91447636
A
8667 int fd = uap->fd;
8668 char uio_buf[ UIO_SIZEOF(1) ];
8669 kauth_action_t action;
8670
8671 AUDIT_ARG(fd, fd);
39037602 8672
91447636 8673 /* Get the attributes into kernel space */
2d21ac55 8674 if ((error = copyin(uap->alist, (caddr_t)&attributelist, sizeof(attributelist)))) {
91447636 8675 return(error);
2d21ac55
A
8676 }
8677 if ((error = copyin(uap->count, (caddr_t)&count, sizeof(count)))) {
8678 return(error);
8679 }
39236c6e 8680 savecount = count;
2d21ac55 8681 if ( (error = fp_getfvp(p, fd, &fp, &vp)) ) {
91447636 8682 return (error);
2d21ac55 8683 }
91447636
A
8684 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
8685 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
8686 error = EBADF;
8687 goto out;
8688 }
2d21ac55
A
8689
8690
8691#if CONFIG_MACF
8692 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
8693 fp->f_fglob);
8694 if (error)
8695 goto out;
8696#endif
8697
8698
91447636
A
8699 if ( (error = vnode_getwithref(vp)) )
8700 goto out;
55e303ae 8701
91447636 8702 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
1c79356b 8703
39236c6e 8704unionread:
91447636
A
8705 if (vp->v_type != VDIR) {
8706 (void)vnode_put(vp);
8707 error = EINVAL;
8708 goto out;
8709 }
55e303ae 8710
2d21ac55
A
8711#if CONFIG_MACF
8712 error = mac_vnode_check_readdir(ctx, vp);
8713 if (error != 0) {
8714 (void)vnode_put(vp);
8715 goto out;
8716 }
8717#endif /* MAC */
8718
91447636
A
8719 /* set up the uio structure which will contain the users return buffer */
8720 loff = fp->f_fglob->fg_offset;
39236c6e 8721 auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
91447636 8722 uio_addiov(auio, uap->buffer, uap->buffersize);
39037602 8723
91447636
A
8724 /*
8725 * If the only item requested is file names, we can let that past with
8726 * just LIST_DIRECTORY. If they want any other attributes, that means
8727 * they need SEARCH as well.
8728 */
8729 action = KAUTH_VNODE_LIST_DIRECTORY;
8730 if ((attributelist.commonattr & ~ATTR_CMN_NAME) ||
8731 attributelist.fileattr || attributelist.dirattr)
8732 action |= KAUTH_VNODE_SEARCH;
39037602 8733
2d21ac55 8734 if ((error = vnode_authorize(vp, NULL, action, ctx)) == 0) {
2d21ac55 8735
b0d623f7
A
8736 /* Believe it or not, uap->options only has 32-bits of valid
8737 * info, so truncate before extending again */
39236c6e
A
8738
8739 error = VNOP_READDIRATTR(vp, &attributelist, auio, count,
8740 (u_long)(uint32_t)uap->options, &newstate, &eofflag, &count, ctx);
8741 }
8742
8743 if (error) {
8744 (void) vnode_put(vp);
8745 goto out;
8746 }
8747
8748 /*
8749 * If we've got the last entry of a directory in a union mount
8750 * then reset the eofflag and pretend there's still more to come.
8751 * The next call will again set eofflag and the buffer will be empty,
8752 * so traverse to the underlying directory and do the directory
8753 * read there.
8754 */
8755 if (eofflag && vp->v_mount->mnt_flag & MNT_UNION) {
8756 if (uio_resid(auio) < (user_ssize_t) uap->buffersize) { // Got some entries
8757 eofflag = 0;
8758 } else { // Empty buffer
8759 struct vnode *tvp = vp;
8760 if (lookup_traverse_union(tvp, &vp, ctx) == 0) {
8761 vnode_ref_ext(vp, fp->f_fglob->fg_flag & O_EVTONLY, 0);
8762 fp->f_fglob->fg_data = (caddr_t) vp;
8763 fp->f_fglob->fg_offset = 0; // reset index for new dir
8764 count = savecount;
8765 vnode_rele_internal(tvp, fp->f_fglob->fg_flag & O_EVTONLY, 0, 0);
8766 vnode_put(tvp);
8767 goto unionread;
8768 }
8769 vp = tvp;
8770 }
2d21ac55 8771 }
39236c6e 8772
91447636 8773 (void)vnode_put(vp);
1c79356b 8774
39037602 8775 if (error)
91447636
A
8776 goto out;
8777 fp->f_fglob->fg_offset = uio_offset(auio); /* should be multiple of dirent, not variable */
1c79356b 8778
2d21ac55 8779 if ((error = copyout((caddr_t) &count, uap->count, sizeof(count))))
91447636 8780 goto out;
2d21ac55 8781 if ((error = copyout((caddr_t) &newstate, uap->newstate, sizeof(newstate))))
91447636 8782 goto out;
2d21ac55 8783 if ((error = copyout((caddr_t) &loff, uap->basep, sizeof(loff))))
91447636 8784 goto out;
1c79356b
A
8785
8786 *retval = eofflag; /* similar to getdirentries */
91447636 8787 error = 0;
2d21ac55 8788out:
91447636
A
8789 file_drop(fd);
8790 return (error); /* return error earlier, an retval of 0 or 1 now */
1c79356b 8791
39236c6e 8792} /* end of getdirentriesattr system call */
1c79356b
A
8793
8794/*
8795* Exchange data between two files
8796*/
8797
1c79356b
A
8798/* ARGSUSED */
8799int
b0d623f7 8800exchangedata (__unused proc_t p, struct exchangedata_args *uap, __unused int32_t *retval)
1c79356b
A
8801{
8802
8803 struct nameidata fnd, snd;
2d21ac55
A
8804 vfs_context_t ctx = vfs_context_current();
8805 vnode_t fvp;
8806 vnode_t svp;
8807 int error;
b0d623f7 8808 u_int32_t nameiflags;
91447636
A
8809 char *fpath = NULL;
8810 char *spath = NULL;
b0d623f7
A
8811 int flen=0, slen=0;
8812 int from_truncated=0, to_truncated=0;
8813#if CONFIG_FSE
91447636 8814 fse_info f_finfo, s_finfo;
b0d623f7 8815#endif
39037602 8816
1c79356b
A
8817 nameiflags = 0;
8818 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
8819
6d2010ae
A
8820 NDINIT(&fnd, LOOKUP, OP_EXCHANGEDATA, nameiflags | AUDITVNPATH1,
8821 UIO_USERSPACE, uap->path1, ctx);
1c79356b 8822
6d2010ae
A
8823 error = namei(&fnd);
8824 if (error)
8825 goto out2;
1c79356b 8826
91447636
A
8827 nameidone(&fnd);
8828 fvp = fnd.ni_vp;
1c79356b 8829
39037602 8830 NDINIT(&snd, LOOKUP, OP_EXCHANGEDATA, CN_NBMOUNTLOOK | nameiflags | AUDITVNPATH2,
6d2010ae 8831 UIO_USERSPACE, uap->path2, ctx);
1c79356b 8832
6d2010ae
A
8833 error = namei(&snd);
8834 if (error) {
91447636 8835 vnode_put(fvp);
55e303ae 8836 goto out2;
6d2010ae 8837 }
91447636 8838 nameidone(&snd);
1c79356b
A
8839 svp = snd.ni_vp;
8840
91447636
A
8841 /*
8842 * if the files are the same, return an inval error
8843 */
1c79356b 8844 if (svp == fvp) {
91447636
A
8845 error = EINVAL;
8846 goto out;
39037602 8847 }
1c79356b 8848
91447636
A
8849 /*
8850 * if the files are on different volumes, return an error
8851 */
8852 if (svp->v_mount != fvp->v_mount) {
8853 error = EXDEV;
8854 goto out;
8855 }
2d21ac55 8856
39236c6e
A
8857 /* If they're not files, return an error */
8858 if ( (vnode_isreg(fvp) == 0) || (vnode_isreg(svp) == 0)) {
db609669
A
8859 error = EINVAL;
8860 goto out;
8861 }
8862
2d21ac55
A
8863#if CONFIG_MACF
8864 error = mac_vnode_check_exchangedata(ctx,
8865 fvp, svp);
8866 if (error)
8867 goto out;
8868#endif
8869 if (((error = vnode_authorize(fvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0) ||
8870 ((error = vnode_authorize(svp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0))
91447636 8871 goto out;
1c79356b 8872
2d21ac55
A
8873 if (
8874#if CONFIG_FSE
39037602 8875 need_fsevent(FSE_EXCHANGE, fvp) ||
2d21ac55
A
8876#endif
8877 kauth_authorize_fileop_has_listeners()) {
8878 GET_PATH(fpath);
8879 GET_PATH(spath);
8880 if (fpath == NULL || spath == NULL) {
8881 error = ENOMEM;
8882 goto out;
8883 }
b0d623f7
A
8884
8885 flen = safe_getpath(fvp, NULL, fpath, MAXPATHLEN, &from_truncated);
8886 slen = safe_getpath(svp, NULL, spath, MAXPATHLEN, &to_truncated);
39037602 8887
2d21ac55
A
8888#if CONFIG_FSE
8889 get_fse_info(fvp, &f_finfo, ctx);
8890 get_fse_info(svp, &s_finfo, ctx);
b0d623f7
A
8891 if (from_truncated || to_truncated) {
8892 // set it here since only the f_finfo gets reported up to user space
8893 f_finfo.mode |= FSE_TRUNCATED_PATH;
8894 }
2d21ac55 8895#endif
91447636 8896 }
1c79356b 8897 /* Ok, make the call */
2d21ac55 8898 error = VNOP_EXCHANGE(fvp, svp, 0, ctx);
55e303ae 8899
91447636 8900 if (error == 0) {
2d21ac55 8901 const char *tmpname;
91447636
A
8902
8903 if (fpath != NULL && spath != NULL) {
39037602 8904 /* call out to allow 3rd party notification of exchangedata.
91447636
A
8905 * Ignore result of kauth_authorize_fileop call.
8906 */
39037602 8907 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_EXCHANGE,
91447636
A
8908 (uintptr_t)fpath, (uintptr_t)spath);
8909 }
8910 name_cache_lock();
8911
8912 tmpname = fvp->v_name;
8913 fvp->v_name = svp->v_name;
8914 svp->v_name = tmpname;
39037602 8915
91447636 8916 if (fvp->v_parent != svp->v_parent) {
2d21ac55 8917 vnode_t tmp;
91447636
A
8918
8919 tmp = fvp->v_parent;
8920 fvp->v_parent = svp->v_parent;
8921 svp->v_parent = tmp;
8922 }
8923 name_cache_unlock();
8924
2d21ac55 8925#if CONFIG_FSE
91447636 8926 if (fpath != NULL && spath != NULL) {
2d21ac55 8927 add_fsevent(FSE_EXCHANGE, ctx,
91447636
A
8928 FSE_ARG_STRING, flen, fpath,
8929 FSE_ARG_FINFO, &f_finfo,
8930 FSE_ARG_STRING, slen, spath,
8931 FSE_ARG_FINFO, &s_finfo,
8932 FSE_ARG_DONE);
8933 }
2d21ac55 8934#endif
55e303ae
A
8935 }
8936
1c79356b 8937out:
2d21ac55
A
8938 if (fpath != NULL)
8939 RELEASE_PATH(fpath);
8940 if (spath != NULL)
8941 RELEASE_PATH(spath);
91447636
A
8942 vnode_put(svp);
8943 vnode_put(fvp);
1c79356b 8944out2:
1c79356b 8945 return (error);
91447636 8946}
1c79356b 8947
39236c6e
A
8948/*
8949 * Return (in MB) the amount of freespace on the given vnode's volume.
8950 */
8951uint32_t freespace_mb(vnode_t vp);
8952
8953uint32_t
8954freespace_mb(vnode_t vp)
8955{
39037602 8956 vfs_update_vfsstat(vp->v_mount, vfs_context_current(), VFS_USER_EVENT);
39236c6e
A
8957 return (((uint64_t)vp->v_mount->mnt_vfsstat.f_bavail *
8958 vp->v_mount->mnt_vfsstat.f_bsize) >> 20);
8959}
8960
316670eb 8961#if CONFIG_SEARCHFS
1c79356b 8962
1c79356b
A
8963/* ARGSUSED */
8964
8965int
b0d623f7 8966searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval)
1c79356b 8967{
39236c6e
A
8968 vnode_t vp, tvp;
8969 int i, error=0;
1c79356b
A
8970 int fserror = 0;
8971 struct nameidata nd;
b0d623f7 8972 struct user64_fssearchblock searchblock;
1c79356b
A
8973 struct searchstate *state;
8974 struct attrlist *returnattrs;
b0d623f7 8975 struct timeval timelimit;
1c79356b 8976 void *searchparams1,*searchparams2;
91447636
A
8977 uio_t auio = NULL;
8978 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
b0d623f7 8979 uint32_t nummatches;
1c79356b 8980 int mallocsize;
b0d623f7 8981 uint32_t nameiflags;
2d21ac55 8982 vfs_context_t ctx = vfs_context_current();
91447636 8983 char uio_buf[ UIO_SIZEOF(1) ];
1c79356b 8984
39236c6e 8985 /* Start by copying in fsearchblock parameter list */
91447636 8986 if (IS_64BIT_PROCESS(p)) {
b0d623f7
A
8987 error = copyin(uap->searchblock, (caddr_t) &searchblock, sizeof(searchblock));
8988 timelimit.tv_sec = searchblock.timelimit.tv_sec;
8989 timelimit.tv_usec = searchblock.timelimit.tv_usec;
91447636
A
8990 }
8991 else {
b0d623f7
A
8992 struct user32_fssearchblock tmp_searchblock;
8993
91447636
A
8994 error = copyin(uap->searchblock, (caddr_t) &tmp_searchblock, sizeof(tmp_searchblock));
8995 // munge into 64-bit version
8996 searchblock.returnattrs = CAST_USER_ADDR_T(tmp_searchblock.returnattrs);
8997 searchblock.returnbuffer = CAST_USER_ADDR_T(tmp_searchblock.returnbuffer);
8998 searchblock.returnbuffersize = tmp_searchblock.returnbuffersize;
8999 searchblock.maxmatches = tmp_searchblock.maxmatches;
39037602 9000 /*
b0d623f7
A
9001 * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary
9002 * from a 32 bit long, and tv_usec is already a signed 32 bit int.
9003 */
9004 timelimit.tv_sec = (__darwin_time_t) tmp_searchblock.timelimit.tv_sec;
9005 timelimit.tv_usec = (__darwin_useconds_t) tmp_searchblock.timelimit.tv_usec;
91447636
A
9006 searchblock.searchparams1 = CAST_USER_ADDR_T(tmp_searchblock.searchparams1);
9007 searchblock.sizeofsearchparams1 = tmp_searchblock.sizeofsearchparams1;
9008 searchblock.searchparams2 = CAST_USER_ADDR_T(tmp_searchblock.searchparams2);
9009 searchblock.sizeofsearchparams2 = tmp_searchblock.sizeofsearchparams2;
9010 searchblock.searchattrs = tmp_searchblock.searchattrs;
9011 }
9012 if (error)
1c79356b
A
9013 return(error);
9014
39037602 9015 /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
a3d08fcd 9016 */
39037602 9017 if (searchblock.sizeofsearchparams1 > SEARCHFS_MAX_SEARCHPARMS ||
a3d08fcd
A
9018 searchblock.sizeofsearchparams2 > SEARCHFS_MAX_SEARCHPARMS)
9019 return(EINVAL);
39037602 9020
1c79356b
A
9021 /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
9022 /* It all has to do into local memory and it's not that big so we might as well put it all together. */
9023 /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
9024 /* block. */
fe8ab488
A
9025 /* */
9026 /* NOTE: we allocate an extra 8 bytes to account for the difference in size of the searchstate */
9027 /* due to the changes in rdar://problem/12438273. That way if a 3rd party file system */
9028 /* assumes the size is still 556 bytes it will continue to work */
39037602 9029
91447636 9030 mallocsize = searchblock.sizeofsearchparams1 + searchblock.sizeofsearchparams2 +
fe8ab488 9031 sizeof(struct attrlist) + sizeof(struct searchstate) + (2*sizeof(uint32_t));
1c79356b
A
9032
9033 MALLOC(searchparams1, void *, mallocsize, M_TEMP, M_WAITOK);
9034
9035 /* Now set up the various pointers to the correct place in our newly allocated memory */
9036
9037 searchparams2 = (void *) (((caddr_t) searchparams1) + searchblock.sizeofsearchparams1);
9038 returnattrs = (struct attrlist *) (((caddr_t) searchparams2) + searchblock.sizeofsearchparams2);
9039 state = (struct searchstate *) (((caddr_t) returnattrs) + sizeof (struct attrlist));
9040
9041 /* Now copy in the stuff given our local variables. */
9042
91447636 9043 if ((error = copyin(searchblock.searchparams1, searchparams1, searchblock.sizeofsearchparams1)))
1c79356b
A
9044 goto freeandexit;
9045
91447636 9046 if ((error = copyin(searchblock.searchparams2, searchparams2, searchblock.sizeofsearchparams2)))
1c79356b
A
9047 goto freeandexit;
9048
91447636 9049 if ((error = copyin(searchblock.returnattrs, (caddr_t) returnattrs, sizeof(struct attrlist))))
1c79356b 9050 goto freeandexit;
39037602 9051
91447636 9052 if ((error = copyin(uap->state, (caddr_t) state, sizeof(struct searchstate))))
1c79356b 9053 goto freeandexit;
1c79356b 9054
39236c6e
A
9055 /*
9056 * When searching a union mount, need to set the
9057 * start flag at the first call on each layer to
9058 * reset state for the new volume.
9059 */
9060 if (uap->options & SRCHFS_START)
9061 state->ss_union_layer = 0;
39037602 9062 else
39236c6e
A
9063 uap->options |= state->ss_union_flags;
9064 state->ss_union_flags = 0;
b0d623f7
A
9065
9066 /*
9067 * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter,
9068 * which is passed in with an attrreference_t, we need to inspect the buffer manually here.
39037602
A
9069 * The KPI does not provide us the ability to pass in the length of the buffers searchparams1
9070 * and searchparams2. To obviate the need for all searchfs-supporting filesystems to
b0d623f7
A
9071 * validate the user-supplied data offset of the attrreference_t, we'll do it here.
9072 */
9073
9074 if (searchblock.searchattrs.commonattr & ATTR_CMN_NAME) {
9075 attrreference_t* string_ref;
9076 u_int32_t* start_length;
39037602 9077 user64_size_t param_length;
b0d623f7
A
9078
9079 /* validate searchparams1 */
39037602 9080 param_length = searchblock.sizeofsearchparams1;
b0d623f7
A
9081 /* skip the word that specifies length of the buffer */
9082 start_length= (u_int32_t*) searchparams1;
9083 start_length= start_length+1;
9084 string_ref= (attrreference_t*) start_length;
9085
9086 /* ensure no negative offsets or too big offsets */
9087 if (string_ref->attr_dataoffset < 0 ) {
9088 error = EINVAL;
39037602 9089 goto freeandexit;
b0d623f7
A
9090 }
9091 if (string_ref->attr_length > MAXPATHLEN) {
9092 error = EINVAL;
9093 goto freeandexit;
9094 }
39037602 9095
b0d623f7
A
9096 /* Check for pointer overflow in the string ref */
9097 if (((char*) string_ref + string_ref->attr_dataoffset) < (char*) string_ref) {
9098 error = EINVAL;
9099 goto freeandexit;
9100 }
9101
9102 if (((char*) string_ref + string_ref->attr_dataoffset) > ((char*)searchparams1 + param_length)) {
9103 error = EINVAL;
9104 goto freeandexit;
9105 }
9106 if (((char*)string_ref + string_ref->attr_dataoffset + string_ref->attr_length) > ((char*)searchparams1 + param_length)) {
9107 error = EINVAL;
9108 goto freeandexit;
9109 }
9110 }
9111
9112 /* set up the uio structure which will contain the users return buffer */
39236c6e
A
9113 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
9114 uio_addiov(auio, searchblock.returnbuffer, searchblock.returnbuffersize);
1c79356b 9115
91447636 9116 nameiflags = 0;
1c79356b 9117 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
6d2010ae
A
9118 NDINIT(&nd, LOOKUP, OP_SEARCHFS, nameiflags | AUDITVNPATH1,
9119 UIO_USERSPACE, uap->path, ctx);
1c79356b 9120
55e303ae
A
9121 error = namei(&nd);
9122 if (error)
1c79356b 9123 goto freeandexit;
39236c6e 9124 vp = nd.ni_vp;
91447636 9125 nameidone(&nd);
39236c6e
A
9126
9127 /*
9128 * Switch to the root vnode for the volume
9129 */
9130 error = VFS_ROOT(vnode_mount(vp), &tvp, ctx);
fe8ab488 9131 vnode_put(vp);
39236c6e
A
9132 if (error)
9133 goto freeandexit;
39236c6e
A
9134 vp = tvp;
9135
9136 /*
9137 * If it's a union mount, the path lookup takes
9138 * us to the top layer. But we may need to descend
9139 * to a lower layer. For non-union mounts the layer
9140 * is always zero.
9141 */
9142 for (i = 0; i < (int) state->ss_union_layer; i++) {
9143 if ((vp->v_mount->mnt_flag & MNT_UNION) == 0)
9144 break;
9145 tvp = vp;
9146 vp = vp->v_mount->mnt_vnodecovered;
9147 if (vp == NULL) {
fe8ab488 9148 vnode_put(tvp);
39236c6e
A
9149 error = ENOENT;
9150 goto freeandexit;
9151 }
813fb2f6 9152 error = vnode_getwithref(vp);
39236c6e 9153 vnode_put(tvp);
813fb2f6
A
9154 if (error)
9155 goto freeandexit;
39236c6e 9156 }
1c79356b 9157
6d2010ae
A
9158#if CONFIG_MACF
9159 error = mac_vnode_check_searchfs(ctx, vp, &searchblock.searchattrs);
9160 if (error) {
9161 vnode_put(vp);
9162 goto freeandexit;
9163 }
9164#endif
9165
39037602 9166
1c79356b 9167 /*
39037602 9168 * If searchblock.maxmatches == 0, then skip the search. This has happened
39236c6e 9169 * before and sometimes the underlying code doesnt deal with it well.
1c79356b
A
9170 */
9171 if (searchblock.maxmatches == 0) {
9172 nummatches = 0;
9173 goto saveandexit;
9174 }
9175
9176 /*
39236c6e 9177 * Allright, we have everything we need, so lets make that call.
39037602 9178 *
39236c6e
A
9179 * We keep special track of the return value from the file system:
9180 * EAGAIN is an acceptable error condition that shouldn't keep us
9181 * from copying out any results...
1c79356b
A
9182 */
9183
6d2010ae 9184 fserror = VNOP_SEARCHFS(vp,
39236c6e
A
9185 searchparams1,
9186 searchparams2,
9187 &searchblock.searchattrs,
9188 (u_long)searchblock.maxmatches,
9189 &timelimit,
9190 returnattrs,
9191 &nummatches,
9192 (u_long)uap->scriptcode,
9193 (u_long)uap->options,
9194 auio,
9195 (struct searchstate *) &state->ss_fsstate,
9196 ctx);
39037602 9197
39236c6e
A
9198 /*
9199 * If it's a union mount we need to be called again
9200 * to search the mounted-on filesystem.
9201 */
9202 if ((vp->v_mount->mnt_flag & MNT_UNION) && fserror == 0) {
9203 state->ss_union_flags = SRCHFS_START;
9204 state->ss_union_layer++; // search next layer down
9205 fserror = EAGAIN;
9206 }
9207
6d2010ae
A
9208saveandexit:
9209
9210 vnode_put(vp);
9211
9212 /* Now copy out the stuff that needs copying out. That means the number of matches, the
9213 search state. Everything was already put into he return buffer by the vop call. */
9214
9215 if ((error = copyout((caddr_t) state, uap->state, sizeof(struct searchstate))) != 0)
9216 goto freeandexit;
9217
39236c6e 9218 if ((error = suulong(uap->nummatches, (uint64_t)nummatches)) != 0)
6d2010ae 9219 goto freeandexit;
39037602 9220
6d2010ae
A
9221 error = fserror;
9222
9223freeandexit:
9224
9225 FREE(searchparams1,M_TEMP);
9226
9227 return(error);
9228
9229
9230} /* end of searchfs system call */
9231
316670eb
A
9232#else /* CONFIG_SEARCHFS */
9233
9234int
9235searchfs(__unused proc_t p, __unused struct searchfs_args *uap, __unused int32_t *retval)
9236{
9237 return (ENOTSUP);
9238}
9239
9240#endif /* CONFIG_SEARCHFS */
6d2010ae
A
9241
9242
9243lck_grp_attr_t * nspace_group_attr;
9244lck_attr_t * nspace_lock_attr;
9245lck_grp_t * nspace_mutex_group;
9246
9247lck_mtx_t nspace_handler_lock;
9248lck_mtx_t nspace_handler_exclusion_lock;
9249
9250time_t snapshot_timestamp=0;
9251int nspace_allow_virtual_devs=0;
9252
9253void nspace_handler_init(void);
9254
9255typedef struct nspace_item_info {
9256 struct vnode *vp;
9257 void *arg;
9258 uint64_t op;
9259 uint32_t vid;
9260 uint32_t flags;
9261 uint32_t token;
9262 uint32_t refcount;
9263} nspace_item_info;
9264
9265#define MAX_NSPACE_ITEMS 128
9266nspace_item_info nspace_items[MAX_NSPACE_ITEMS];
9267uint32_t nspace_item_idx=0; // also used as the sleep/wakeup rendezvous address
9268uint32_t nspace_token_id=0;
9269uint32_t nspace_handler_timeout = 15; // seconds
9270
9271#define NSPACE_ITEM_NEW 0x0001
9272#define NSPACE_ITEM_PROCESSING 0x0002
9273#define NSPACE_ITEM_DEAD 0x0004
9274#define NSPACE_ITEM_CANCELLED 0x0008
9275#define NSPACE_ITEM_DONE 0x0010
9276#define NSPACE_ITEM_RESET_TIMER 0x0020
9277
9278#define NSPACE_ITEM_NSPACE_EVENT 0x0040
9279#define NSPACE_ITEM_SNAPSHOT_EVENT 0x0080
6d2010ae 9280
fe8ab488 9281#define NSPACE_ITEM_ALL_EVENT_TYPES (NSPACE_ITEM_NSPACE_EVENT | NSPACE_ITEM_SNAPSHOT_EVENT)
6d2010ae
A
9282
9283//#pragma optimization_level 0
9284
9285typedef enum {
9286 NSPACE_HANDLER_NSPACE = 0,
9287 NSPACE_HANDLER_SNAPSHOT = 1,
6d2010ae
A
9288
9289 NSPACE_HANDLER_COUNT,
9290} nspace_type_t;
9291
9292typedef struct {
9293 uint64_t handler_tid;
9294 struct proc *handler_proc;
9295 int handler_busy;
9296} nspace_handler_t;
9297
9298nspace_handler_t nspace_handlers[NSPACE_HANDLER_COUNT];
9299
39236c6e
A
9300/* namespace fsctl functions */
9301static int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type);
9302static int nspace_item_flags_for_type(nspace_type_t nspace_type);
9303static int nspace_open_flags_for_type(nspace_type_t nspace_type);
9304static nspace_type_t nspace_type_for_op(uint64_t op);
9305static int nspace_is_special_process(struct proc *proc);
9306static int vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx);
9307static int wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type);
9308static int validate_namespace_args (int is64bit, int size);
9309static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data);
9310
9311
6d2010ae
A
9312static inline int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type)
9313{
9314 switch(nspace_type) {
9315 case NSPACE_HANDLER_NSPACE:
9316 return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_NSPACE_EVENT;
9317 case NSPACE_HANDLER_SNAPSHOT:
9318 return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_SNAPSHOT_EVENT;
6d2010ae
A
9319 default:
9320 printf("nspace_flags_matches_handler: invalid type %u\n", (int)nspace_type);
9321 return 0;
9322 }
9323}
9324
9325static inline int nspace_item_flags_for_type(nspace_type_t nspace_type)
9326{
9327 switch(nspace_type) {
9328 case NSPACE_HANDLER_NSPACE:
9329 return NSPACE_ITEM_NSPACE_EVENT;
9330 case NSPACE_HANDLER_SNAPSHOT:
9331 return NSPACE_ITEM_SNAPSHOT_EVENT;
6d2010ae
A
9332 default:
9333 printf("nspace_item_flags_for_type: invalid type %u\n", (int)nspace_type);
9334 return 0;
9335 }
9336}
9337
9338static inline int nspace_open_flags_for_type(nspace_type_t nspace_type)
9339{
9340 switch(nspace_type) {
9341 case NSPACE_HANDLER_NSPACE:
9342 return FREAD | FWRITE | O_EVTONLY;
9343 case NSPACE_HANDLER_SNAPSHOT:
6d2010ae
A
9344 return FREAD | O_EVTONLY;
9345 default:
9346 printf("nspace_open_flags_for_type: invalid type %u\n", (int)nspace_type);
9347 return 0;
9348 }
9349}
9350
9351static inline nspace_type_t nspace_type_for_op(uint64_t op)
9352{
9353 switch(op & NAMESPACE_HANDLER_EVENT_TYPE_MASK) {
9354 case NAMESPACE_HANDLER_NSPACE_EVENT:
9355 return NSPACE_HANDLER_NSPACE;
9356 case NAMESPACE_HANDLER_SNAPSHOT_EVENT:
9357 return NSPACE_HANDLER_SNAPSHOT;
6d2010ae
A
9358 default:
9359 printf("nspace_type_for_op: invalid op mask %llx\n", op & NAMESPACE_HANDLER_EVENT_TYPE_MASK);
9360 return NSPACE_HANDLER_NSPACE;
9361 }
9362}
9363
9364static inline int nspace_is_special_process(struct proc *proc)
9365{
9366 int i;
9367 for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
9368 if (proc == nspace_handlers[i].handler_proc)
9369 return 1;
9370 }
9371 return 0;
9372}
9373
9374void
9375nspace_handler_init(void)
9376{
9377 nspace_lock_attr = lck_attr_alloc_init();
9378 nspace_group_attr = lck_grp_attr_alloc_init();
9379 nspace_mutex_group = lck_grp_alloc_init("nspace-mutex", nspace_group_attr);
9380 lck_mtx_init(&nspace_handler_lock, nspace_mutex_group, nspace_lock_attr);
9381 lck_mtx_init(&nspace_handler_exclusion_lock, nspace_mutex_group, nspace_lock_attr);
9382 memset(&nspace_items[0], 0, sizeof(nspace_items));
9383}
9384
9385void
9386nspace_proc_exit(struct proc *p)
9387{
9388 int i, event_mask = 0;
39037602 9389
6d2010ae
A
9390 for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
9391 if (p == nspace_handlers[i].handler_proc) {
9392 event_mask |= nspace_item_flags_for_type(i);
9393 nspace_handlers[i].handler_tid = 0;
9394 nspace_handlers[i].handler_proc = NULL;
9395 }
9396 }
9397
9398 if (event_mask == 0) {
9399 return;
9400 }
39037602
A
9401
9402 lck_mtx_lock(&nspace_handler_lock);
6d2010ae
A
9403 if (event_mask & NSPACE_ITEM_SNAPSHOT_EVENT) {
9404 // if this process was the snapshot handler, zero snapshot_timeout
9405 snapshot_timestamp = 0;
9406 }
39037602 9407
6d2010ae
A
9408 //
9409 // unblock anyone that's waiting for the handler that died
9410 //
6d2010ae
A
9411 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9412 if (nspace_items[i].flags & (NSPACE_ITEM_NEW | NSPACE_ITEM_PROCESSING)) {
9413
9414 if ( nspace_items[i].flags & event_mask ) {
9415
9416 if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
9417 vnode_lock_spin(nspace_items[i].vp);
9418 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
9419 vnode_unlock(nspace_items[i].vp);
9420 }
9421 nspace_items[i].vp = NULL;
9422 nspace_items[i].vid = 0;
9423 nspace_items[i].flags = NSPACE_ITEM_DONE;
9424 nspace_items[i].token = 0;
39037602 9425
6d2010ae
A
9426 wakeup((caddr_t)&(nspace_items[i].vp));
9427 }
9428 }
9429 }
39037602 9430
6d2010ae
A
9431 wakeup((caddr_t)&nspace_item_idx);
9432 lck_mtx_unlock(&nspace_handler_lock);
9433}
9434
9435
39037602 9436int
6d2010ae
A
9437resolve_nspace_item(struct vnode *vp, uint64_t op)
9438{
9439 return resolve_nspace_item_ext(vp, op, NULL);
9440}
9441
39037602 9442int
6d2010ae
A
9443resolve_nspace_item_ext(struct vnode *vp, uint64_t op, void *arg)
9444{
9445 int i, error, keep_waiting;
9446 struct timespec ts;
9447 nspace_type_t nspace_type = nspace_type_for_op(op);
9448
9449 // only allow namespace events on regular files, directories and symlinks.
9450 if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
9451 return 0;
9452 }
9453
9454 //
9455 // if this is a snapshot event and the vnode is on a
9456 // disk image just pretend nothing happened since any
9457 // change to the disk image will cause the disk image
9458 // itself to get backed up and this avoids multi-way
9459 // deadlocks between the snapshot handler and the ever
9460 // popular diskimages-helper process. the variable
9461 // nspace_allow_virtual_devs allows this behavior to
9462 // be overridden (for use by the Mobile TimeMachine
9463 // testing infrastructure which uses disk images)
9464 //
9465 if ( (op & NAMESPACE_HANDLER_SNAPSHOT_EVENT)
9466 && (vp->v_mount != NULL)
9467 && (vp->v_mount->mnt_kern_flag & MNTK_VIRTUALDEV)
9468 && !nspace_allow_virtual_devs) {
9469
9470 return 0;
9471 }
9472
9473 // if (thread_tid(current_thread()) == namespace_handler_tid) {
9474 if (nspace_handlers[nspace_type].handler_proc == NULL) {
9475 return 0;
9476 }
9477
9478 if (nspace_is_special_process(current_proc())) {
9479 return EDEADLK;
9480 }
9481
9482 lck_mtx_lock(&nspace_handler_lock);
9483
9484retry:
9485 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9486 if (vp == nspace_items[i].vp && op == nspace_items[i].op) {
9487 break;
9488 }
9489 }
9490
9491 if (i >= MAX_NSPACE_ITEMS) {
9492 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9493 if (nspace_items[i].flags == 0) {
9494 break;
9495 }
9496 }
9497 } else {
9498 nspace_items[i].refcount++;
9499 }
39037602 9500
6d2010ae
A
9501 if (i >= MAX_NSPACE_ITEMS) {
9502 ts.tv_sec = nspace_handler_timeout;
9503 ts.tv_nsec = 0;
9504
9505 error = msleep((caddr_t)&nspace_token_id, &nspace_handler_lock, PVFS|PCATCH, "nspace-no-space", &ts);
9506 if (error == 0) {
9507 // an entry got free'd up, go see if we can get a slot
9508 goto retry;
9509 } else {
9510 lck_mtx_unlock(&nspace_handler_lock);
9511 return error;
9512 }
9513 }
9514
9515 //
9516 // if it didn't already exist, add it. if it did exist
9517 // we'll get woken up when someone does a wakeup() on
9518 // the slot in the nspace_items table.
9519 //
9520 if (vp != nspace_items[i].vp) {
9521 nspace_items[i].vp = vp;
39236c6e 9522 nspace_items[i].arg = (arg == NSPACE_REARM_NO_ARG) ? NULL : arg; // arg is {NULL, true, uio *} - only pass uio thru to the user
6d2010ae
A
9523 nspace_items[i].op = op;
9524 nspace_items[i].vid = vnode_vid(vp);
9525 nspace_items[i].flags = NSPACE_ITEM_NEW;
9526 nspace_items[i].flags |= nspace_item_flags_for_type(nspace_type);
9527 if (nspace_items[i].flags & NSPACE_ITEM_SNAPSHOT_EVENT) {
9528 if (arg) {
9529 vnode_lock_spin(vp);
9530 vp->v_flag |= VNEEDSSNAPSHOT;
9531 vnode_unlock(vp);
9532 }
9533 }
9534
9535 nspace_items[i].token = 0;
9536 nspace_items[i].refcount = 1;
39037602 9537
6d2010ae
A
9538 wakeup((caddr_t)&nspace_item_idx);
9539 }
9540
9541 //
9542 // Now go to sleep until the handler does a wakeup on this
9543 // slot in the nspace_items table (or we timeout).
9544 //
9545 keep_waiting = 1;
9546 while(keep_waiting) {
9547 ts.tv_sec = nspace_handler_timeout;
9548 ts.tv_nsec = 0;
9549 error = msleep((caddr_t)&(nspace_items[i].vp), &nspace_handler_lock, PVFS|PCATCH, "namespace-done", &ts);
9550
9551 if (nspace_items[i].flags & NSPACE_ITEM_DONE) {
9552 error = 0;
9553 } else if (nspace_items[i].flags & NSPACE_ITEM_CANCELLED) {
9554 error = nspace_items[i].token;
9555 } else if (error == EWOULDBLOCK || error == ETIMEDOUT) {
9556 if (nspace_items[i].flags & NSPACE_ITEM_RESET_TIMER) {
9557 nspace_items[i].flags &= ~NSPACE_ITEM_RESET_TIMER;
9558 continue;
9559 } else {
9560 error = ETIMEDOUT;
9561 }
9562 } else if (error == 0) {
9563 // hmmm, why did we get woken up?
9564 printf("woken up for token %d but it's not done, cancelled or timedout and error == 0.\n",
9565 nspace_items[i].token);
39037602 9566 }
6d2010ae
A
9567
9568 if (--nspace_items[i].refcount == 0) {
9569 nspace_items[i].vp = NULL; // clear this so that no one will match on it again
9570 nspace_items[i].arg = NULL;
9571 nspace_items[i].token = 0; // clear this so that the handler will not find it anymore
9572 nspace_items[i].flags = 0; // this clears it for re-use
9573 }
9574 wakeup(&nspace_token_id);
9575 keep_waiting = 0;
9576 }
9577
9578 lck_mtx_unlock(&nspace_handler_lock);
9579
9580 return error;
9581}
9582
39037602 9583int nspace_snapshot_event(vnode_t vp, time_t ctime, uint64_t op_type, void *arg)
6d2010ae 9584{
39037602 9585 int snapshot_error = 0;
6d2010ae 9586
39037602
A
9587 if (vp == NULL) {
9588 return 0;
9589 }
9590
9591 /* Swap files are special; skip them */
9592 if (vnode_isswap(vp)) {
9593 return 0;
9594 }
9595
9596 if (ctime != 0 && snapshot_timestamp != 0 && (ctime <= snapshot_timestamp || vnode_needssnapshots(vp))) {
9597 // the change time is within this epoch
9598 int error;
9599
9600 error = resolve_nspace_item_ext(vp, op_type | NAMESPACE_HANDLER_SNAPSHOT_EVENT, arg);
9601 if (error == EDEADLK) {
9602 snapshot_error = 0;
9603 } else if (error) {
9604 if (error == EAGAIN) {
9605 printf("nspace_snapshot_event: timed out waiting for namespace handler...\n");
9606 } else if (error == EINTR) {
9607 // printf("nspace_snapshot_event: got a signal while waiting for namespace handler...\n");
9608 snapshot_error = EINTR;
9609 }
9610 }
9611 }
9612
9613 return snapshot_error;
9614}
9615
9616int
9617get_nspace_item_status(struct vnode *vp, int32_t *status)
9618{
9619 int i;
9620
9621 lck_mtx_lock(&nspace_handler_lock);
9622 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9623 if (nspace_items[i].vp == vp) {
9624 break;
6d2010ae
A
9625 }
9626 }
9627
9628 if (i >= MAX_NSPACE_ITEMS) {
9629 lck_mtx_unlock(&nspace_handler_lock);
9630 return ENOENT;
9631 }
9632
9633 *status = nspace_items[i].flags;
9634 lck_mtx_unlock(&nspace_handler_lock);
9635 return 0;
9636}
39037602 9637
6d2010ae
A
9638
9639#if 0
9640static int
9641build_volfs_path(struct vnode *vp, char *path, int *len)
9642{
9643 struct vnode_attr va;
9644 int ret;
9645
9646 VATTR_INIT(&va);
9647 VATTR_WANTED(&va, va_fsid);
9648 VATTR_WANTED(&va, va_fileid);
9649
9650 if (vnode_getattr(vp, &va, vfs_context_kernel()) != 0) {
9651 *len = snprintf(path, *len, "/non/existent/path/because/vnode_getattr/failed") + 1;
9652 ret = -1;
9653 } else {
9654 *len = snprintf(path, *len, "/.vol/%d/%lld", (dev_t)va.va_fsid, va.va_fileid) + 1;
9655 ret = 0;
9656 }
9657
9658 return ret;
9659}
9660#endif
9661
9662//
9663// Note: this function does NOT check permissions on all of the
9664// parent directories leading to this vnode. It should only be
9665// called on behalf of a root process. Otherwise a process may
9666// get access to a file because the file itself is readable even
9667// though its parent directories would prevent access.
9668//
9669static int
9670vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx)
9671{
9672 int error, action;
9673
9674 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
9675 return error;
9676 }
9677
9678#if CONFIG_MACF
9679 error = mac_vnode_check_open(ctx, vp, fmode);
9680 if (error)
9681 return error;
9682#endif
1c79356b 9683
6d2010ae
A
9684 /* compute action to be authorized */
9685 action = 0;
9686 if (fmode & FREAD) {
9687 action |= KAUTH_VNODE_READ_DATA;
9688 }
9689 if (fmode & (FWRITE | O_TRUNC)) {
9690 /*
9691 * If we are writing, appending, and not truncating,
9692 * indicate that we are appending so that if the
9693 * UF_APPEND or SF_APPEND bits are set, we do not deny
9694 * the open.
9695 */
9696 if ((fmode & O_APPEND) && !(fmode & O_TRUNC)) {
9697 action |= KAUTH_VNODE_APPEND_DATA;
9698 } else {
9699 action |= KAUTH_VNODE_WRITE_DATA;
9700 }
9701 }
1c79356b 9702
6d2010ae
A
9703 if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)
9704 return error;
39037602 9705
1c79356b 9706
6d2010ae
A
9707 //
9708 // if the vnode is tagged VOPENEVT and the current process
9709 // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY
9710 // flag to the open mode so that this open won't count against
9711 // the vnode when carbon delete() does a vnode_isinuse() to see
9712 // if a file is currently in use. this allows spotlight
9713 // importers to not interfere with carbon apps that depend on
9714 // the no-delete-if-busy semantics of carbon delete().
9715 //
9716 if ((vp->v_flag & VOPENEVT) && (current_proc()->p_flag & P_CHECKOPENEVT)) {
9717 fmode |= O_EVTONLY;
9718 }
1c79356b 9719
6d2010ae
A
9720 if ( (error = VNOP_OPEN(vp, fmode, ctx)) ) {
9721 return error;
9722 }
9723 if ( (error = vnode_ref_ext(vp, fmode, 0)) ) {
9724 VNOP_CLOSE(vp, fmode, ctx);
9725 return error;
9726 }
1c79356b 9727
39037602 9728 /* Call out to allow 3rd party notification of open.
6d2010ae
A
9729 * Ignore result of kauth_authorize_fileop call.
9730 */
4b17d6b6
A
9731#if CONFIG_MACF
9732 mac_vnode_notify_open(ctx, vp, fmode);
9733#endif
39037602 9734 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN,
6d2010ae 9735 (uintptr_t)vp, 0);
1c79356b 9736
1c79356b 9737
6d2010ae
A
9738 return 0;
9739}
1c79356b 9740
6d2010ae 9741static int
39236c6e 9742wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type)
6d2010ae 9743{
39037602
A
9744 int i;
9745 int error = 0;
9746 int unblock = 0;
6d2010ae 9747 task_t curtask;
39037602 9748
6d2010ae
A
9749 lck_mtx_lock(&nspace_handler_exclusion_lock);
9750 if (nspace_handlers[nspace_type].handler_busy) {
9751 lck_mtx_unlock(&nspace_handler_exclusion_lock);
9752 return EBUSY;
9753 }
39037602 9754
6d2010ae
A
9755 nspace_handlers[nspace_type].handler_busy = 1;
9756 lck_mtx_unlock(&nspace_handler_exclusion_lock);
39037602
A
9757
9758 /*
6d2010ae
A
9759 * Any process that gets here will be one of the namespace handlers.
9760 * As such, they should be prevented from acquiring DMG vnodes during vnode reclamation
9761 * as we can cause deadlocks to occur, because the namespace handler may prevent
39037602 9762 * VNOP_INACTIVE from proceeding. Mark the current task as a P_DEPENDENCY_CAPABLE
6d2010ae
A
9763 * process.
9764 */
9765 curtask = current_task();
39037602
A
9766 bsd_set_dependency_capable (curtask);
9767
6d2010ae
A
9768 lck_mtx_lock(&nspace_handler_lock);
9769 if (nspace_handlers[nspace_type].handler_proc == NULL) {
9770 nspace_handlers[nspace_type].handler_tid = thread_tid(current_thread());
9771 nspace_handlers[nspace_type].handler_proc = current_proc();
9772 }
39037602
A
9773
9774 if (nspace_type == NSPACE_HANDLER_SNAPSHOT &&
9775 (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
9776 error = EINVAL;
9777 }
9778
6d2010ae 9779 while (error == 0) {
39037602
A
9780
9781 /* Try to find matching namespace item */
9782 for (i = 0; i < MAX_NSPACE_ITEMS; i++) {
6d2010ae 9783 if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
39037602
A
9784 if (nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
9785 break;
6d2010ae 9786 }
6d2010ae
A
9787 }
9788 }
39236c6e 9789
39037602
A
9790 if (i >= MAX_NSPACE_ITEMS) {
9791 /* Nothing is there yet. Wait for wake up and retry */
6d2010ae
A
9792 error = msleep((caddr_t)&nspace_item_idx, &nspace_handler_lock, PVFS|PCATCH, "namespace-items", 0);
9793 if ((nspace_type == NSPACE_HANDLER_SNAPSHOT) && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
39037602 9794 /* Prevent infinite loop if snapshot handler exited */
6d2010ae
A
9795 error = EINVAL;
9796 break;
9797 }
39037602 9798 continue;
6d2010ae 9799 }
39037602
A
9800
9801 nspace_items[i].flags &= ~NSPACE_ITEM_NEW;
9802 nspace_items[i].flags |= NSPACE_ITEM_PROCESSING;
9803 nspace_items[i].token = ++nspace_token_id;
9804
9805 assert(nspace_items[i].vp);
9806 struct fileproc *fp;
9807 int32_t indx;
9808 int32_t fmode;
9809 struct proc *p = current_proc();
9810 vfs_context_t ctx = vfs_context_current();
9811 struct vnode_attr va;
9812 bool vn_get_succsessful = false;
9813 bool vn_open_successful = false;
9814 bool fp_alloc_successful = false;
9815
9816 /*
9817 * Use vnode pointer to acquire a file descriptor for
9818 * hand-off to userland
9819 */
9820 fmode = nspace_open_flags_for_type(nspace_type);
9821 error = vnode_getwithvid(nspace_items[i].vp, nspace_items[i].vid);
9822 if (error) goto cleanup;
9823 vn_get_succsessful = true;
9824
9825 error = vn_open_with_vp(nspace_items[i].vp, fmode, ctx);
9826 if (error) goto cleanup;
9827 vn_open_successful = true;
9828
9829 error = falloc(p, &fp, &indx, ctx);
9830 if (error) goto cleanup;
9831 fp_alloc_successful = true;
9832
9833 fp->f_fglob->fg_flag = fmode;
9834 fp->f_fglob->fg_ops = &vnops;
9835 fp->f_fglob->fg_data = (caddr_t)nspace_items[i].vp;
9836
9837 proc_fdlock(p);
9838 procfdtbl_releasefd(p, indx, NULL);
9839 fp_drop(p, indx, fp, 1);
9840 proc_fdunlock(p);
9841
9842 /*
9843 * All variants of the namespace handler struct support these three fields:
9844 * token, flags, and the FD pointer
9845 */
9846 error = copyout(&nspace_items[i].token, nhd->token, sizeof(uint32_t));
9847 if (error) goto cleanup;
9848 error = copyout(&nspace_items[i].op, nhd->flags, sizeof(uint64_t));
9849 if (error) goto cleanup;
9850 error = copyout(&indx, nhd->fdptr, sizeof(uint32_t));
9851 if (error) goto cleanup;
9852
9853 /*
9854 * Handle optional fields:
9855 * extended version support an info ptr (offset, length), and the
9856 *
9857 * namedata version supports a unique per-link object ID
9858 *
9859 */
9860 if (nhd->infoptr) {
9861 uio_t uio = (uio_t)nspace_items[i].arg;
9862 uint64_t u_offset, u_length;
9863
9864 if (uio) {
9865 u_offset = uio_offset(uio);
9866 u_length = uio_resid(uio);
9867 } else {
9868 u_offset = 0;
9869 u_length = 0;
9870 }
9871 error = copyout(&u_offset, nhd->infoptr, sizeof(uint64_t));
9872 if (error) goto cleanup;
9873 error = copyout(&u_length, nhd->infoptr + sizeof(uint64_t), sizeof(uint64_t));
9874 if (error) goto cleanup;
9875 }
9876
9877 if (nhd->objid) {
9878 VATTR_INIT(&va);
9879 VATTR_WANTED(&va, va_linkid);
9880 error = vnode_getattr(nspace_items[i].vp, &va, ctx);
9881 if (error) goto cleanup;
9882
9883 uint64_t linkid = 0;
9884 if (VATTR_IS_SUPPORTED (&va, va_linkid)) {
9885 linkid = (uint64_t)va.va_linkid;
9886 }
9887 error = copyout(&linkid, nhd->objid, sizeof(uint64_t));
9888 }
9889cleanup:
9890 if (error) {
9891 if (fp_alloc_successful) fp_free(p, indx, fp);
9892 if (vn_open_successful) vn_close(nspace_items[i].vp, fmode, ctx);
9893 unblock = 1;
9894 }
9895
9896 if (vn_get_succsessful) vnode_put(nspace_items[i].vp);
9897
9898 break;
6d2010ae 9899 }
39037602 9900
6d2010ae
A
9901 if (unblock) {
9902 if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
9903 vnode_lock_spin(nspace_items[i].vp);
9904 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
9905 vnode_unlock(nspace_items[i].vp);
9906 }
9907 nspace_items[i].vp = NULL;
9908 nspace_items[i].vid = 0;
9909 nspace_items[i].flags = NSPACE_ITEM_DONE;
9910 nspace_items[i].token = 0;
39037602 9911
6d2010ae
A
9912 wakeup((caddr_t)&(nspace_items[i].vp));
9913 }
39037602 9914
6d2010ae
A
9915 if (nspace_type == NSPACE_HANDLER_SNAPSHOT) {
9916 // just go through every snapshot event and unblock it immediately.
9917 if (error && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
39037602 9918 for(i = 0; i < MAX_NSPACE_ITEMS; i++) {
6d2010ae
A
9919 if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
9920 if (nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
9921 nspace_items[i].vp = NULL;
9922 nspace_items[i].vid = 0;
9923 nspace_items[i].flags = NSPACE_ITEM_DONE;
9924 nspace_items[i].token = 0;
39037602
A
9925
9926 wakeup((caddr_t)&(nspace_items[i].vp));
6d2010ae
A
9927 }
9928 }
9929 }
9930 }
9931 }
39037602 9932
6d2010ae 9933 lck_mtx_unlock(&nspace_handler_lock);
39037602 9934
6d2010ae
A
9935 lck_mtx_lock(&nspace_handler_exclusion_lock);
9936 nspace_handlers[nspace_type].handler_busy = 0;
9937 lck_mtx_unlock(&nspace_handler_exclusion_lock);
39037602 9938
6d2010ae
A
9939 return error;
9940}
1c79356b 9941
39236c6e
A
9942static inline int validate_namespace_args (int is64bit, int size) {
9943
9944 if (is64bit) {
9945 /* Must be one of these */
9946 if (size == sizeof(user64_namespace_handler_info)) {
9947 goto sizeok;
9948 }
9949 if (size == sizeof(user64_namespace_handler_info_ext)) {
9950 goto sizeok;
9951 }
9952 if (size == sizeof(user64_namespace_handler_data)) {
9953 goto sizeok;
9954 }
9955 return EINVAL;
9956 }
9957 else {
9958 /* 32 bit -- must be one of these */
9959 if (size == sizeof(user32_namespace_handler_info)) {
9960 goto sizeok;
9961 }
9962 if (size == sizeof(user32_namespace_handler_info_ext)) {
9963 goto sizeok;
9964 }
9965 if (size == sizeof(user32_namespace_handler_data)) {
9966 goto sizeok;
9967 }
9968 return EINVAL;
9969 }
9970
9971sizeok:
9972
9973 return 0;
9974
9975}
1c79356b 9976
6d2010ae
A
9977static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data)
9978{
9979 int error = 0;
39236c6e 9980 namespace_handler_data nhd;
39037602 9981
39236c6e
A
9982 bzero (&nhd, sizeof(namespace_handler_data));
9983
6d2010ae
A
9984 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
9985 return error;
9986 }
39037602 9987
39236c6e
A
9988 error = validate_namespace_args (is64bit, size);
9989 if (error) {
9990 return error;
6d2010ae 9991 }
39037602 9992
39236c6e
A
9993 /* Copy in the userland pointers into our kernel-only struct */
9994
6d2010ae 9995 if (is64bit) {
39236c6e
A
9996 /* 64 bit userland structures */
9997 nhd.token = (user_addr_t)((user64_namespace_handler_info *)data)->token;
9998 nhd.flags = (user_addr_t)((user64_namespace_handler_info *)data)->flags;
9999 nhd.fdptr = (user_addr_t)((user64_namespace_handler_info *)data)->fdptr;
10000
10001 /* If the size is greater than the standard info struct, add in extra fields */
10002 if (size > (sizeof(user64_namespace_handler_info))) {
10003 if (size >= (sizeof(user64_namespace_handler_info_ext))) {
10004 nhd.infoptr = (user_addr_t)((user64_namespace_handler_info_ext *)data)->infoptr;
10005 }
10006 if (size == (sizeof(user64_namespace_handler_data))) {
10007 nhd.objid = (user_addr_t)((user64_namespace_handler_data*)data)->objid;
10008 }
10009 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
6d2010ae 10010 }
39037602 10011 }
39236c6e
A
10012 else {
10013 /* 32 bit userland structures */
10014 nhd.token = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->token);
10015 nhd.flags = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->flags);
10016 nhd.fdptr = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->fdptr);
39037602 10017
39236c6e
A
10018 if (size > (sizeof(user32_namespace_handler_info))) {
10019 if (size >= (sizeof(user32_namespace_handler_info_ext))) {
10020 nhd.infoptr = CAST_USER_ADDR_T(((user32_namespace_handler_info_ext *)data)->infoptr);
10021 }
10022 if (size == (sizeof(user32_namespace_handler_data))) {
10023 nhd.objid = (user_addr_t)((user32_namespace_handler_data*)data)->objid;
10024 }
10025 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
6d2010ae
A
10026 }
10027 }
39037602 10028
39236c6e 10029 return wait_for_namespace_event(&nhd, nspace_type);
6d2010ae 10030}
1c79356b 10031
5ba3f43e
A
10032static unsigned long
10033fsctl_bogus_command_compat(unsigned long cmd)
10034{
10035
10036 switch (cmd) {
10037 case IOCBASECMD(FSIOC_SYNC_VOLUME):
10038 return (FSIOC_SYNC_VOLUME);
10039 case IOCBASECMD(FSIOC_ROUTEFS_SETROUTEID):
10040 return (FSIOC_ROUTEFS_SETROUTEID);
10041 case IOCBASECMD(FSIOC_SET_PACKAGE_EXTS):
10042 return (FSIOC_SET_PACKAGE_EXTS);
10043 case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_GET):
10044 return (FSIOC_NAMESPACE_HANDLER_GET);
10045 case IOCBASECMD(FSIOC_OLD_SNAPSHOT_HANDLER_GET):
10046 return (FSIOC_OLD_SNAPSHOT_HANDLER_GET);
10047 case IOCBASECMD(FSIOC_SNAPSHOT_HANDLER_GET_EXT):
10048 return (FSIOC_SNAPSHOT_HANDLER_GET_EXT);
10049 case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_UPDATE):
10050 return (FSIOC_NAMESPACE_HANDLER_UPDATE);
10051 case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_UNBLOCK):
10052 return (FSIOC_NAMESPACE_HANDLER_UNBLOCK);
10053 case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_CANCEL):
10054 return (FSIOC_NAMESPACE_HANDLER_CANCEL);
10055 case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME):
10056 return (FSIOC_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME);
10057 case IOCBASECMD(FSIOC_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS):
10058 return (FSIOC_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS);
10059 case IOCBASECMD(FSIOC_SET_FSTYPENAME_OVERRIDE):
10060 return (FSIOC_SET_FSTYPENAME_OVERRIDE);
10061 case IOCBASECMD(DISK_CONDITIONER_IOC_GET):
10062 return (DISK_CONDITIONER_IOC_GET);
10063 case IOCBASECMD(DISK_CONDITIONER_IOC_SET):
10064 return (DISK_CONDITIONER_IOC_SET);
10065 case IOCBASECMD(FSIOC_FIOSEEKHOLE):
10066 return (FSIOC_FIOSEEKHOLE);
10067 case IOCBASECMD(FSIOC_FIOSEEKDATA):
10068 return (FSIOC_FIOSEEKDATA);
10069 case IOCBASECMD(SPOTLIGHT_IOC_GET_MOUNT_TIME):
10070 return (SPOTLIGHT_IOC_GET_MOUNT_TIME);
10071 case IOCBASECMD(SPOTLIGHT_IOC_GET_LAST_MTIME):
10072 return (SPOTLIGHT_IOC_GET_LAST_MTIME);
10073 }
10074
10075 return (cmd);
10076}
10077
1c79356b
A
10078/*
10079 * Make a filesystem-specific control call:
10080 */
1c79356b 10081/* ARGSUSED */
b0d623f7
A
10082static int
10083fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long options, vfs_context_t ctx)
1c79356b 10084{
b0d623f7 10085 int error=0;
91447636 10086 boolean_t is64bit;
2d21ac55 10087 u_int size;
1c79356b 10088#define STK_PARAMS 128
39037602 10089 char stkbuf[STK_PARAMS] = {0};
1c79356b 10090 caddr_t data, memp;
b0d623f7 10091 vnode_t vp = *arg_vp;
1c79356b 10092
5ba3f43e
A
10093 cmd = fsctl_bogus_command_compat(cmd);
10094
1c79356b
A
10095 size = IOCPARM_LEN(cmd);
10096 if (size > IOCPARM_MAX) return (EINVAL);
10097
6d2010ae 10098 is64bit = proc_is64bit(p);
91447636 10099
1c79356b 10100 memp = NULL;
04b8595b 10101
1c79356b
A
10102 if (size > sizeof (stkbuf)) {
10103 if ((memp = (caddr_t)kalloc(size)) == 0) return ENOMEM;
10104 data = memp;
10105 } else {
91447636 10106 data = &stkbuf[0];
1c79356b 10107 };
39037602 10108
1c79356b
A
10109 if (cmd & IOC_IN) {
10110 if (size) {
b0d623f7 10111 error = copyin(udata, data, size);
39037602 10112 if (error) {
fe8ab488 10113 if (memp) {
39037602 10114 kfree (memp, size);
fe8ab488
A
10115 }
10116 return error;
10117 }
1c79356b 10118 } else {
6d2010ae
A
10119 if (is64bit) {
10120 *(user_addr_t *)data = udata;
10121 }
10122 else {
10123 *(uint32_t *)data = (uint32_t)udata;
10124 }
1c79356b
A
10125 };
10126 } else if ((cmd & IOC_OUT) && size) {
10127 /*
10128 * Zero the buffer so the user always
10129 * gets back something deterministic.
10130 */
10131 bzero(data, size);
91447636 10132 } else if (cmd & IOC_VOID) {
b0d623f7 10133 if (is64bit) {
6d2010ae 10134 *(user_addr_t *)data = udata;
b0d623f7
A
10135 }
10136 else {
6d2010ae 10137 *(uint32_t *)data = (uint32_t)udata;
b0d623f7 10138 }
91447636 10139 }
1c79356b 10140
b0d623f7 10141 /* Check to see if it's a generic command */
5ba3f43e 10142 switch (cmd) {
91447636 10143
5ba3f43e 10144 case FSIOC_SYNC_VOLUME: {
fe8ab488
A
10145 mount_t mp = vp->v_mount;
10146 int arg = *(uint32_t*)data;
b0d623f7 10147
fe8ab488
A
10148 /* record vid of vp so we can drop it below. */
10149 uint32_t vvid = vp->v_id;
b0d623f7 10150
fe8ab488
A
10151 /*
10152 * Then grab mount_iterref so that we can release the vnode.
10153 * Without this, a thread may call vnode_iterate_prepare then
10154 * get into a deadlock because we've never released the root vp
10155 */
10156 error = mount_iterref (mp, 0);
10157 if (error) {
10158 break;
10159 }
10160 vnode_put(vp);
10161
10162 /* issue the sync for this volume */
10163 (void)sync_callback(mp, (arg & FSCTL_SYNC_WAIT) ? &arg : NULL);
10164
39037602 10165 /*
fe8ab488
A
10166 * Then release the mount_iterref once we're done syncing; it's not
10167 * needed for the VNOP_IOCTL below
10168 */
10169 mount_iterdrop(mp);
10170
10171 if (arg & FSCTL_SYNC_FULLSYNC) {
10172 /* re-obtain vnode iocount on the root vp, if possible */
10173 error = vnode_getwithvid (vp, vvid);
10174 if (error == 0) {
10175 error = VNOP_IOCTL(vp, F_FULLFSYNC, (caddr_t)NULL, 0, ctx);
10176 vnode_put (vp);
10177 }
b0d623f7 10178 }
fe8ab488
A
10179 /* mark the argument VP as having been released */
10180 *arg_vp = NULL;
b0d623f7 10181 }
fe8ab488 10182 break;
b0d623f7 10183
5ba3f43e 10184 case FSIOC_ROUTEFS_SETROUTEID: {
490019cf
A
10185#if ROUTEFS
10186 char routepath[MAXPATHLEN];
10187 size_t len = 0;
39037602 10188
490019cf
A
10189 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
10190 break;
10191 }
10192 bzero(routepath, MAXPATHLEN);
10193 error = copyinstr(udata, &routepath[0], MAXPATHLEN, &len);
10194 if (error) {
10195 break;
10196 }
10197 error = routefs_kernel_mount(routepath);
10198 if (error) {
10199 break;
10200 }
10201#endif
10202 }
10203 break;
10204
5ba3f43e 10205 case FSIOC_SET_PACKAGE_EXTS: {
fe8ab488
A
10206 user_addr_t ext_strings;
10207 uint32_t num_entries;
10208 uint32_t max_width;
b0d623f7 10209
39037602
A
10210 if ((error = priv_check_cred(kauth_cred_get(), PRIV_PACKAGE_EXTENSIONS, 0)))
10211 break;
10212
fe8ab488
A
10213 if ( (is64bit && size != sizeof(user64_package_ext_info))
10214 || (is64bit == 0 && size != sizeof(user32_package_ext_info))) {
10215
10216 // either you're 64-bit and passed a 64-bit struct or
10217 // you're 32-bit and passed a 32-bit struct. otherwise
10218 // it's not ok.
10219 error = EINVAL;
10220 break;
10221 }
10222
10223 if (is64bit) {
10224 ext_strings = ((user64_package_ext_info *)data)->strings;
10225 num_entries = ((user64_package_ext_info *)data)->num_entries;
10226 max_width = ((user64_package_ext_info *)data)->max_width;
10227 } else {
10228 ext_strings = CAST_USER_ADDR_T(((user32_package_ext_info *)data)->strings);
10229 num_entries = ((user32_package_ext_info *)data)->num_entries;
10230 max_width = ((user32_package_ext_info *)data)->max_width;
10231 }
10232 error = set_package_extensions_table(ext_strings, num_entries, max_width);
6d2010ae 10233 }
fe8ab488 10234 break;
2d21ac55 10235
39037602 10236 /* namespace handlers */
5ba3f43e 10237 case FSIOC_NAMESPACE_HANDLER_GET: {
fe8ab488 10238 error = process_namespace_fsctl(NSPACE_HANDLER_NSPACE, is64bit, size, data);
6d2010ae 10239 }
fe8ab488 10240 break;
b0d623f7 10241
fe8ab488 10242 /* Snapshot handlers */
5ba3f43e 10243 case FSIOC_OLD_SNAPSHOT_HANDLER_GET: {
fe8ab488 10244 error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
39037602 10245 }
fe8ab488 10246 break;
39236c6e 10247
5ba3f43e 10248 case FSIOC_SNAPSHOT_HANDLER_GET_EXT: {
fe8ab488
A
10249 error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
10250 }
39037602 10251 break;
39236c6e 10252
5ba3f43e 10253 case FSIOC_NAMESPACE_HANDLER_UPDATE: {
fe8ab488
A
10254 uint32_t token, val;
10255 int i;
39236c6e 10256
fe8ab488
A
10257 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
10258 break;
10259 }
39236c6e 10260
fe8ab488
A
10261 if (!nspace_is_special_process(p)) {
10262 error = EINVAL;
10263 break;
10264 }
6d2010ae 10265
fe8ab488
A
10266 token = ((uint32_t *)data)[0];
10267 val = ((uint32_t *)data)[1];
6d2010ae 10268
fe8ab488 10269 lck_mtx_lock(&nspace_handler_lock);
6d2010ae 10270
fe8ab488
A
10271 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
10272 if (nspace_items[i].token == token) {
10273 break; /* exit for loop, not case stmt */
10274 }
10275 }
6d2010ae 10276
fe8ab488
A
10277 if (i >= MAX_NSPACE_ITEMS) {
10278 error = ENOENT;
10279 } else {
10280 //
10281 // if this bit is set, when resolve_nspace_item() times out
10282 // it will loop and go back to sleep.
10283 //
10284 nspace_items[i].flags |= NSPACE_ITEM_RESET_TIMER;
10285 }
6d2010ae 10286
fe8ab488
A
10287 lck_mtx_unlock(&nspace_handler_lock);
10288
10289 if (error) {
10290 printf("nspace-handler-update: did not find token %u\n", token);
10291 }
39037602 10292 }
fe8ab488 10293 break;
39037602 10294
5ba3f43e 10295 case FSIOC_NAMESPACE_HANDLER_UNBLOCK: {
fe8ab488
A
10296 uint32_t token, val;
10297 int i;
10298
10299 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
6d2010ae
A
10300 break;
10301 }
6d2010ae 10302
fe8ab488
A
10303 if (!nspace_is_special_process(p)) {
10304 error = EINVAL;
10305 break;
10306 }
6d2010ae 10307
fe8ab488
A
10308 token = ((uint32_t *)data)[0];
10309 val = ((uint32_t *)data)[1];
6d2010ae 10310
fe8ab488 10311 lck_mtx_lock(&nspace_handler_lock);
6d2010ae 10312
fe8ab488
A
10313 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
10314 if (nspace_items[i].token == token) {
10315 break; /* exit for loop, not case statement */
10316 }
10317 }
6d2010ae 10318
fe8ab488
A
10319 if (i >= MAX_NSPACE_ITEMS) {
10320 printf("nspace-handler-unblock: did not find token %u\n", token);
10321 error = ENOENT;
10322 } else {
10323 if (val == 0 && nspace_items[i].vp) {
10324 vnode_lock_spin(nspace_items[i].vp);
10325 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
10326 vnode_unlock(nspace_items[i].vp);
10327 }
6d2010ae 10328
fe8ab488
A
10329 nspace_items[i].vp = NULL;
10330 nspace_items[i].arg = NULL;
10331 nspace_items[i].op = 0;
10332 nspace_items[i].vid = 0;
10333 nspace_items[i].flags = NSPACE_ITEM_DONE;
10334 nspace_items[i].token = 0;
6d2010ae 10335
fe8ab488
A
10336 wakeup((caddr_t)&(nspace_items[i].vp));
10337 }
10338
10339 lck_mtx_unlock(&nspace_handler_lock);
39037602 10340 }
fe8ab488 10341 break;
6d2010ae 10342
5ba3f43e 10343 case FSIOC_NAMESPACE_HANDLER_CANCEL: {
fe8ab488
A
10344 uint32_t token, val;
10345 int i;
6d2010ae 10346
fe8ab488 10347 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
6d2010ae
A
10348 break;
10349 }
6d2010ae 10350
fe8ab488
A
10351 if (!nspace_is_special_process(p)) {
10352 error = EINVAL;
10353 break;
6d2010ae
A
10354 }
10355
fe8ab488
A
10356 token = ((uint32_t *)data)[0];
10357 val = ((uint32_t *)data)[1];
6d2010ae 10358
fe8ab488 10359 lck_mtx_lock(&nspace_handler_lock);
6d2010ae 10360
fe8ab488
A
10361 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
10362 if (nspace_items[i].token == token) {
10363 break; /* exit for loop, not case stmt */
10364 }
10365 }
6d2010ae 10366
fe8ab488
A
10367 if (i >= MAX_NSPACE_ITEMS) {
10368 printf("nspace-handler-cancel: did not find token %u\n", token);
10369 error = ENOENT;
10370 } else {
10371 if (nspace_items[i].vp) {
10372 vnode_lock_spin(nspace_items[i].vp);
10373 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
10374 vnode_unlock(nspace_items[i].vp);
10375 }
6d2010ae 10376
39037602
A
10377 nspace_items[i].vp = NULL;
10378 nspace_items[i].arg = NULL;
fe8ab488
A
10379 nspace_items[i].vid = 0;
10380 nspace_items[i].token = val;
10381 nspace_items[i].flags &= ~NSPACE_ITEM_PROCESSING;
39037602 10382 nspace_items[i].flags |= NSPACE_ITEM_CANCELLED;
6d2010ae 10383
fe8ab488
A
10384 wakeup((caddr_t)&(nspace_items[i].vp));
10385 }
6d2010ae 10386
fe8ab488 10387 lck_mtx_unlock(&nspace_handler_lock);
39037602 10388 }
fe8ab488 10389 break;
6d2010ae 10390
5ba3f43e 10391 case FSIOC_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME: {
fe8ab488 10392 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
6d2010ae
A
10393 break;
10394 }
6d2010ae 10395
fe8ab488 10396 // we explicitly do not do the namespace_handler_proc check here
6d2010ae 10397
fe8ab488
A
10398 lck_mtx_lock(&nspace_handler_lock);
10399 snapshot_timestamp = ((uint32_t *)data)[0];
10400 wakeup(&nspace_item_idx);
10401 lck_mtx_unlock(&nspace_handler_lock);
10402 printf("nspace-handler-set-snapshot-time: %d\n", (int)snapshot_timestamp);
6d2010ae 10403
39037602 10404 }
fe8ab488 10405 break;
6d2010ae 10406
5ba3f43e 10407 case FSIOC_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS:
fe8ab488
A
10408 {
10409 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
10410 break;
10411 }
6d2010ae 10412
fe8ab488
A
10413 lck_mtx_lock(&nspace_handler_lock);
10414 nspace_allow_virtual_devs = ((uint32_t *)data)[0];
10415 lck_mtx_unlock(&nspace_handler_lock);
10416 printf("nspace-snapshot-handler will%s allow events on disk-images\n",
10417 nspace_allow_virtual_devs ? "" : " NOT");
10418 error = 0;
6d2010ae 10419
6d2010ae 10420 }
fe8ab488 10421 break;
6d2010ae 10422
5ba3f43e 10423 case FSIOC_SET_FSTYPENAME_OVERRIDE:
39037602 10424 {
fe8ab488
A
10425 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
10426 break;
10427 }
10428 if (vp->v_mount) {
10429 mount_lock(vp->v_mount);
10430 if (data[0] != 0) {
10431 strlcpy(&vp->v_mount->fstypename_override[0], data, MFSTYPENAMELEN);
10432 vp->v_mount->mnt_kern_flag |= MNTK_TYPENAME_OVERRIDE;
10433 if (vfs_isrdonly(vp->v_mount) && strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
10434 vp->v_mount->mnt_kern_flag |= MNTK_EXTENDED_SECURITY;
10435 vp->v_mount->mnt_kern_flag &= ~MNTK_AUTH_OPAQUE;
10436 }
10437 } else {
10438 if (strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
10439 vp->v_mount->mnt_kern_flag &= ~MNTK_EXTENDED_SECURITY;
10440 }
10441 vp->v_mount->mnt_kern_flag &= ~MNTK_TYPENAME_OVERRIDE;
10442 vp->v_mount->fstypename_override[0] = '\0';
6d2010ae 10443 }
fe8ab488 10444 mount_unlock(vp->v_mount);
6d2010ae 10445 }
6d2010ae 10446 }
fe8ab488 10447 break;
39037602 10448
5ba3f43e
A
10449 case DISK_CONDITIONER_IOC_GET: {
10450 error = disk_conditioner_get_info(vp->v_mount, (disk_conditioner_info *)data);
10451 }
10452 break;
10453
10454 case DISK_CONDITIONER_IOC_SET: {
10455 error = disk_conditioner_set_info(vp->v_mount, (disk_conditioner_info *)data);
10456 }
10457 break;
10458
fe8ab488 10459 default: {
a39ff7e2
A
10460 /* other, known commands shouldn't be passed down here */
10461 switch (cmd) {
10462 case F_PUNCHHOLE:
10463 case F_TRIM_ACTIVE_FILE:
10464 case F_RDADVISE:
10465 case F_TRANSCODEKEY:
10466 case F_GETPROTECTIONLEVEL:
10467 case F_GETDEFAULTPROTLEVEL:
10468 case F_MAKECOMPRESSED:
10469 case F_SET_GREEDY_MODE:
10470 case F_SETSTATICCONTENT:
10471 case F_SETIOTYPE:
10472 case F_SETBACKINGSTORE:
10473 case F_GETPATH_MTMINFO:
10474 case APFSIOC_REVERT_TO_SNAPSHOT:
10475 case FSIOC_FIOSEEKHOLE:
10476 case FSIOC_FIOSEEKDATA:
10477 case HFS_GET_BOOT_INFO:
10478 case HFS_SET_BOOT_INFO:
10479 case FIOPINSWAP:
10480 case F_CHKCLEAN:
10481 case F_FULLFSYNC:
10482 case F_BARRIERFSYNC:
10483 case F_FREEZE_FS:
10484 case F_THAW_FS:
10485 error = EINVAL;
10486 goto outdrop;
10487 }
fe8ab488 10488 /* Invoke the filesystem-specific code */
5ba3f43e 10489 error = VNOP_IOCTL(vp, cmd, data, options, ctx);
fe8ab488
A
10490 }
10491
10492 } /* end switch stmt */
10493
1c79356b 10494 /*
fe8ab488 10495 * if no errors, copy any data to user. Size was
1c79356b
A
10496 * already set and checked above.
10497 */
39037602 10498 if (error == 0 && (cmd & IOC_OUT) && size)
b0d623f7 10499 error = copyout(data, udata, size);
39037602 10500
a39ff7e2 10501outdrop:
fe8ab488
A
10502 if (memp) {
10503 kfree(memp, size);
10504 }
39037602 10505
1c79356b
A
10506 return error;
10507}
b0d623f7
A
10508
10509/* ARGSUSED */
10510int
10511fsctl (proc_t p, struct fsctl_args *uap, __unused int32_t *retval)
10512{
10513 int error;
39037602 10514 struct nameidata nd;
b0d623f7
A
10515 u_long nameiflags;
10516 vnode_t vp = NULL;
10517 vfs_context_t ctx = vfs_context_current();
10518
10519 AUDIT_ARG(cmd, uap->cmd);
10520 AUDIT_ARG(value32, uap->options);
10521 /* Get the vnode for the file we are getting info on: */
10522 nameiflags = 0;
10523 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
6d2010ae
A
10524 NDINIT(&nd, LOOKUP, OP_FSCTL, nameiflags | AUDITVNPATH1,
10525 UIO_USERSPACE, uap->path, ctx);
b0d623f7
A
10526 if ((error = namei(&nd))) goto done;
10527 vp = nd.ni_vp;
10528 nameidone(&nd);
10529
10530#if CONFIG_MACF
10531 error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd);
10532 if (error) {
10533 goto done;
10534 }
10535#endif
10536
10537 error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
10538
10539done:
10540 if (vp)
10541 vnode_put(vp);
10542 return error;
10543}
10544/* ARGSUSED */
10545int
10546ffsctl (proc_t p, struct ffsctl_args *uap, __unused int32_t *retval)
10547{
10548 int error;
10549 vnode_t vp = NULL;
10550 vfs_context_t ctx = vfs_context_current();
10551 int fd = -1;
10552
10553 AUDIT_ARG(fd, uap->fd);
10554 AUDIT_ARG(cmd, uap->cmd);
10555 AUDIT_ARG(value32, uap->options);
39037602 10556
b0d623f7
A
10557 /* Get the vnode for the file we are getting info on: */
10558 if ((error = file_vnode(uap->fd, &vp)))
3e170ce0 10559 return error;
b0d623f7
A
10560 fd = uap->fd;
10561 if ((error = vnode_getwithref(vp))) {
3e170ce0
A
10562 file_drop(fd);
10563 return error;
b0d623f7
A
10564 }
10565
10566#if CONFIG_MACF
3e170ce0
A
10567 if ((error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd))) {
10568 file_drop(fd);
10569 vnode_put(vp);
10570 return error;
b0d623f7
A
10571 }
10572#endif
10573
10574 error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
10575
3e170ce0 10576 file_drop(fd);
b0d623f7 10577
3e170ce0
A
10578 /*validate vp; fsctl_internal() can drop iocount and reset vp to NULL*/
10579 if (vp) {
b0d623f7 10580 vnode_put(vp);
3e170ce0
A
10581 }
10582
b0d623f7
A
10583 return error;
10584}
1c79356b 10585/* end of fsctl system call */
0b4e3aa0 10586
91447636
A
10587/*
10588 * Retrieve the data of an extended attribute.
10589 */
10590int
2d21ac55 10591getxattr(proc_t p, struct getxattr_args *uap, user_ssize_t *retval)
91447636 10592{
2d21ac55 10593 vnode_t vp;
91447636
A
10594 struct nameidata nd;
10595 char attrname[XATTR_MAXNAMELEN+1];
2d21ac55 10596 vfs_context_t ctx = vfs_context_current();
91447636
A
10597 uio_t auio = NULL;
10598 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10599 size_t attrsize = 0;
10600 size_t namelen;
b0d623f7 10601 u_int32_t nameiflags;
91447636
A
10602 int error;
10603 char uio_buf[ UIO_SIZEOF(1) ];
55e303ae 10604
2d21ac55 10605 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636 10606 return (EINVAL);
55e303ae 10607
91447636 10608 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
6d2010ae 10609 NDINIT(&nd, LOOKUP, OP_GETXATTR, nameiflags, spacetype, uap->path, ctx);
91447636
A
10610 if ((error = namei(&nd))) {
10611 return (error);
10612 }
10613 vp = nd.ni_vp;
10614 nameidone(&nd);
55e303ae 10615
91447636
A
10616 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
10617 goto out;
10618 }
10619 if (xattr_protected(attrname)) {
6d2010ae
A
10620 if (!vfs_context_issuser(ctx) || strcmp(attrname, "com.apple.system.Security") != 0) {
10621 error = EPERM;
10622 goto out;
10623 }
91447636 10624 }
b0d623f7
A
10625 /*
10626 * the specific check for 0xffffffff is a hack to preserve
10627 * binaray compatibilty in K64 with applications that discovered
39037602 10628 * that passing in a buf pointer and a size of -1 resulted in
b0d623f7
A
10629 * just the size of the indicated extended attribute being returned.
10630 * this isn't part of the documented behavior, but because of the
10631 * original implemtation's check for "uap->size > 0", this behavior
10632 * was allowed. In K32 that check turned into a signed comparison
10633 * even though uap->size is unsigned... in K64, we blow by that
10634 * check because uap->size is unsigned and doesn't get sign smeared
39037602 10635 * in the munger for a 32 bit user app. we also need to add a
b0d623f7
A
10636 * check to limit the maximum size of the buffer being passed in...
10637 * unfortunately, the underlying fileystems seem to just malloc
10638 * the requested size even if the actual extended attribute is tiny.
10639 * because that malloc is for kernel wired memory, we have to put a
10640 * sane limit on it.
10641 *
10642 * U32 running on K64 will yield 0x00000000ffffffff for uap->size
10643 * U64 running on K64 will yield -1 (64 bits wide)
10644 * U32/U64 running on K32 will yield -1 (32 bits wide)
10645 */
10646 if (uap->size == 0xffffffff || uap->size == (size_t)-1)
10647 goto no_uio;
10648
b0d623f7 10649 if (uap->value) {
6d2010ae
A
10650 if (uap->size > (size_t)XATTR_MAXSIZE)
10651 uap->size = XATTR_MAXSIZE;
39037602 10652
91447636
A
10653 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
10654 &uio_buf[0], sizeof(uio_buf));
10655 uio_addiov(auio, uap->value, uap->size);
10656 }
b0d623f7 10657no_uio:
2d21ac55 10658 error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, ctx);
91447636
A
10659out:
10660 vnode_put(vp);
55e303ae 10661
91447636
A
10662 if (auio) {
10663 *retval = uap->size - uio_resid(auio);
10664 } else {
10665 *retval = (user_ssize_t)attrsize;
55e303ae
A
10666 }
10667
91447636
A
10668 return (error);
10669}
55e303ae 10670
91447636
A
10671/*
10672 * Retrieve the data of an extended attribute.
10673 */
10674int
2d21ac55 10675fgetxattr(proc_t p, struct fgetxattr_args *uap, user_ssize_t *retval)
91447636 10676{
2d21ac55 10677 vnode_t vp;
91447636 10678 char attrname[XATTR_MAXNAMELEN+1];
91447636
A
10679 uio_t auio = NULL;
10680 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10681 size_t attrsize = 0;
10682 size_t namelen;
10683 int error;
10684 char uio_buf[ UIO_SIZEOF(1) ];
55e303ae 10685
2d21ac55 10686 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636 10687 return (EINVAL);
55e303ae 10688
91447636
A
10689 if ( (error = file_vnode(uap->fd, &vp)) ) {
10690 return (error);
10691 }
10692 if ( (error = vnode_getwithref(vp)) ) {
10693 file_drop(uap->fd);
10694 return(error);
10695 }
10696 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
10697 goto out;
10698 }
10699 if (xattr_protected(attrname)) {
10700 error = EPERM;
10701 goto out;
10702 }
10703 if (uap->value && uap->size > 0) {
10704 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
10705 &uio_buf[0], sizeof(uio_buf));
10706 uio_addiov(auio, uap->value, uap->size);
10707 }
55e303ae 10708
2d21ac55 10709 error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, vfs_context_current());
91447636
A
10710out:
10711 (void)vnode_put(vp);
10712 file_drop(uap->fd);
55e303ae 10713
91447636
A
10714 if (auio) {
10715 *retval = uap->size - uio_resid(auio);
10716 } else {
10717 *retval = (user_ssize_t)attrsize;
10718 }
10719 return (error);
10720}
55e303ae 10721
91447636
A
10722/*
10723 * Set the data of an extended attribute.
10724 */
55e303ae 10725int
2d21ac55 10726setxattr(proc_t p, struct setxattr_args *uap, int *retval)
55e303ae 10727{
2d21ac55 10728 vnode_t vp;
91447636
A
10729 struct nameidata nd;
10730 char attrname[XATTR_MAXNAMELEN+1];
2d21ac55 10731 vfs_context_t ctx = vfs_context_current();
91447636
A
10732 uio_t auio = NULL;
10733 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10734 size_t namelen;
b0d623f7 10735 u_int32_t nameiflags;
91447636
A
10736 int error;
10737 char uio_buf[ UIO_SIZEOF(1) ];
55e303ae 10738
2d21ac55 10739 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636 10740 return (EINVAL);
55e303ae 10741
91447636 10742 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
6d2010ae
A
10743 if (error == EPERM) {
10744 /* if the string won't fit in attrname, copyinstr emits EPERM */
10745 return (ENAMETOOLONG);
10746 }
10747 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
10748 return error;
91447636
A
10749 }
10750 if (xattr_protected(attrname))
10751 return(EPERM);
2d21ac55 10752 if (uap->size != 0 && uap->value == 0) {
91447636 10753 return (EINVAL);
55e303ae 10754 }
55e303ae 10755
91447636 10756 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
6d2010ae 10757 NDINIT(&nd, LOOKUP, OP_SETXATTR, nameiflags, spacetype, uap->path, ctx);
91447636
A
10758 if ((error = namei(&nd))) {
10759 return (error);
10760 }
10761 vp = nd.ni_vp;
10762 nameidone(&nd);
55e303ae 10763
91447636
A
10764 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
10765 &uio_buf[0], sizeof(uio_buf));
10766 uio_addiov(auio, uap->value, uap->size);
55e303ae 10767
2d21ac55
A
10768 error = vn_setxattr(vp, attrname, auio, uap->options, ctx);
10769#if CONFIG_FSE
10770 if (error == 0) {
10771 add_fsevent(FSE_XATTR_MODIFIED, ctx,
10772 FSE_ARG_VNODE, vp,
10773 FSE_ARG_DONE);
10774 }
10775#endif
91447636
A
10776 vnode_put(vp);
10777 *retval = 0;
10778 return (error);
10779}
55e303ae 10780
91447636
A
10781/*
10782 * Set the data of an extended attribute.
10783 */
10784int
2d21ac55 10785fsetxattr(proc_t p, struct fsetxattr_args *uap, int *retval)
91447636 10786{
2d21ac55 10787 vnode_t vp;
91447636 10788 char attrname[XATTR_MAXNAMELEN+1];
91447636
A
10789 uio_t auio = NULL;
10790 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10791 size_t namelen;
10792 int error;
10793 char uio_buf[ UIO_SIZEOF(1) ];
6d2010ae 10794#if CONFIG_FSE
2d21ac55 10795 vfs_context_t ctx = vfs_context_current();
6d2010ae 10796#endif
55e303ae 10797
2d21ac55 10798 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636 10799 return (EINVAL);
55e303ae 10800
91447636 10801 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
3e170ce0
A
10802 if (error == EPERM) {
10803 /* if the string won't fit in attrname, copyinstr emits EPERM */
10804 return (ENAMETOOLONG);
10805 }
10806 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
10807 return error;
55e303ae 10808 }
91447636
A
10809 if (xattr_protected(attrname))
10810 return(EPERM);
2d21ac55 10811 if (uap->size != 0 && uap->value == 0) {
91447636 10812 return (EINVAL);
55e303ae 10813 }
91447636
A
10814 if ( (error = file_vnode(uap->fd, &vp)) ) {
10815 return (error);
55e303ae 10816 }
91447636
A
10817 if ( (error = vnode_getwithref(vp)) ) {
10818 file_drop(uap->fd);
10819 return(error);
10820 }
10821 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
10822 &uio_buf[0], sizeof(uio_buf));
10823 uio_addiov(auio, uap->value, uap->size);
91447636 10824
2d21ac55
A
10825 error = vn_setxattr(vp, attrname, auio, uap->options, vfs_context_current());
10826#if CONFIG_FSE
10827 if (error == 0) {
10828 add_fsevent(FSE_XATTR_MODIFIED, ctx,
10829 FSE_ARG_VNODE, vp,
10830 FSE_ARG_DONE);
10831 }
10832#endif
91447636
A
10833 vnode_put(vp);
10834 file_drop(uap->fd);
10835 *retval = 0;
10836 return (error);
10837}
55e303ae 10838
91447636
A
10839/*
10840 * Remove an extended attribute.
b0d623f7 10841 * XXX Code duplication here.
91447636 10842 */
91447636 10843int
2d21ac55 10844removexattr(proc_t p, struct removexattr_args *uap, int *retval)
91447636 10845{
2d21ac55 10846 vnode_t vp;
91447636
A
10847 struct nameidata nd;
10848 char attrname[XATTR_MAXNAMELEN+1];
10849 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
2d21ac55 10850 vfs_context_t ctx = vfs_context_current();
91447636 10851 size_t namelen;
b0d623f7 10852 u_int32_t nameiflags;
91447636 10853 int error;
55e303ae 10854
2d21ac55 10855 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636 10856 return (EINVAL);
55e303ae 10857
91447636
A
10858 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
10859 if (error != 0) {
10860 return (error);
10861 }
10862 if (xattr_protected(attrname))
10863 return(EPERM);
10864 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
6d2010ae 10865 NDINIT(&nd, LOOKUP, OP_REMOVEXATTR, nameiflags, spacetype, uap->path, ctx);
91447636
A
10866 if ((error = namei(&nd))) {
10867 return (error);
10868 }
10869 vp = nd.ni_vp;
10870 nameidone(&nd);
55e303ae 10871
2d21ac55
A
10872 error = vn_removexattr(vp, attrname, uap->options, ctx);
10873#if CONFIG_FSE
10874 if (error == 0) {
10875 add_fsevent(FSE_XATTR_REMOVED, ctx,
10876 FSE_ARG_VNODE, vp,
10877 FSE_ARG_DONE);
10878 }
10879#endif
91447636
A
10880 vnode_put(vp);
10881 *retval = 0;
10882 return (error);
55e303ae
A
10883}
10884
91447636
A
10885/*
10886 * Remove an extended attribute.
b0d623f7 10887 * XXX Code duplication here.
91447636 10888 */
91447636 10889int
2d21ac55 10890fremovexattr(__unused proc_t p, struct fremovexattr_args *uap, int *retval)
55e303ae 10891{
2d21ac55 10892 vnode_t vp;
91447636 10893 char attrname[XATTR_MAXNAMELEN+1];
91447636
A
10894 size_t namelen;
10895 int error;
6d2010ae 10896#if CONFIG_FSE
2d21ac55 10897 vfs_context_t ctx = vfs_context_current();
6d2010ae 10898#endif
55e303ae 10899
2d21ac55 10900 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636
A
10901 return (EINVAL);
10902
10903 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
10904 if (error != 0) {
10905 return (error);
10906 }
10907 if (xattr_protected(attrname))
10908 return(EPERM);
10909 if ( (error = file_vnode(uap->fd, &vp)) ) {
10910 return (error);
10911 }
10912 if ( (error = vnode_getwithref(vp)) ) {
10913 file_drop(uap->fd);
10914 return(error);
10915 }
4a249263 10916
2d21ac55
A
10917 error = vn_removexattr(vp, attrname, uap->options, vfs_context_current());
10918#if CONFIG_FSE
10919 if (error == 0) {
10920 add_fsevent(FSE_XATTR_REMOVED, ctx,
10921 FSE_ARG_VNODE, vp,
10922 FSE_ARG_DONE);
10923 }
10924#endif
91447636
A
10925 vnode_put(vp);
10926 file_drop(uap->fd);
10927 *retval = 0;
10928 return (error);
55e303ae
A
10929}
10930
91447636
A
10931/*
10932 * Retrieve the list of extended attribute names.
b0d623f7 10933 * XXX Code duplication here.
91447636 10934 */
91447636 10935int
2d21ac55 10936listxattr(proc_t p, struct listxattr_args *uap, user_ssize_t *retval)
55e303ae 10937{
2d21ac55 10938 vnode_t vp;
91447636 10939 struct nameidata nd;
2d21ac55 10940 vfs_context_t ctx = vfs_context_current();
91447636
A
10941 uio_t auio = NULL;
10942 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10943 size_t attrsize = 0;
b0d623f7 10944 u_int32_t nameiflags;
91447636
A
10945 int error;
10946 char uio_buf[ UIO_SIZEOF(1) ];
4a249263 10947
2d21ac55 10948 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636 10949 return (EINVAL);
55e303ae 10950
fe8ab488 10951 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
6d2010ae 10952 NDINIT(&nd, LOOKUP, OP_LISTXATTR, nameiflags, spacetype, uap->path, ctx);
91447636
A
10953 if ((error = namei(&nd))) {
10954 return (error);
10955 }
10956 vp = nd.ni_vp;
10957 nameidone(&nd);
10958 if (uap->namebuf != 0 && uap->bufsize > 0) {
6d2010ae
A
10959 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
10960 &uio_buf[0], sizeof(uio_buf));
91447636
A
10961 uio_addiov(auio, uap->namebuf, uap->bufsize);
10962 }
55e303ae 10963
2d21ac55 10964 error = vn_listxattr(vp, auio, &attrsize, uap->options, ctx);
55e303ae 10965
91447636
A
10966 vnode_put(vp);
10967 if (auio) {
10968 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
10969 } else {
10970 *retval = (user_ssize_t)attrsize;
10971 }
10972 return (error);
55e303ae
A
10973}
10974
91447636
A
10975/*
10976 * Retrieve the list of extended attribute names.
b0d623f7 10977 * XXX Code duplication here.
91447636 10978 */
55e303ae 10979int
2d21ac55 10980flistxattr(proc_t p, struct flistxattr_args *uap, user_ssize_t *retval)
55e303ae 10981{
2d21ac55 10982 vnode_t vp;
91447636
A
10983 uio_t auio = NULL;
10984 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10985 size_t attrsize = 0;
10986 int error;
10987 char uio_buf[ UIO_SIZEOF(1) ];
10988
2d21ac55 10989 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636
A
10990 return (EINVAL);
10991
10992 if ( (error = file_vnode(uap->fd, &vp)) ) {
10993 return (error);
10994 }
10995 if ( (error = vnode_getwithref(vp)) ) {
10996 file_drop(uap->fd);
10997 return(error);
10998 }
10999 if (uap->namebuf != 0 && uap->bufsize > 0) {
39037602 11000 auio = uio_createwithbuffer(1, 0, spacetype,
91447636
A
11001 UIO_READ, &uio_buf[0], sizeof(uio_buf));
11002 uio_addiov(auio, uap->namebuf, uap->bufsize);
11003 }
91447636 11004
2d21ac55 11005 error = vn_listxattr(vp, auio, &attrsize, uap->options, vfs_context_current());
55e303ae 11006
91447636
A
11007 vnode_put(vp);
11008 file_drop(uap->fd);
11009 if (auio) {
11010 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
11011 } else {
11012 *retval = (user_ssize_t)attrsize;
11013 }
11014 return (error);
55e303ae 11015}
4a249263 11016
fe8ab488
A
11017static int fsgetpath_internal(
11018 vfs_context_t ctx, int volfs_id, uint64_t objid,
11019 vm_size_t bufsize, caddr_t buf, int *pathlen)
b0d623f7 11020{
fe8ab488 11021 int error;
b0d623f7 11022 struct mount *mp = NULL;
fe8ab488 11023 vnode_t vp;
b0d623f7 11024 int length;
fe8ab488 11025 int bpflags;
813fb2f6
A
11026 /* maximum number of times to retry build_path */
11027 unsigned int retries = 0x10;
b0d623f7 11028
fe8ab488 11029 if (bufsize > PAGE_SIZE) {
b0d623f7 11030 return (EINVAL);
fe8ab488
A
11031 }
11032
11033 if (buf == NULL) {
b0d623f7
A
11034 return (ENOMEM);
11035 }
fe8ab488 11036
813fb2f6 11037retry:
fe8ab488 11038 if ((mp = mount_lookupby_volfsid(volfs_id, 1)) == NULL) {
b0d623f7 11039 error = ENOTSUP; /* unexpected failure */
fe8ab488 11040 return ENOTSUP;
b0d623f7 11041 }
fe8ab488 11042
39236c6e 11043unionget:
fe8ab488 11044 if (objid == 2) {
b0d623f7
A
11045 error = VFS_ROOT(mp, &vp, ctx);
11046 } else {
fe8ab488 11047 error = VFS_VGET(mp, (ino64_t)objid, &vp, ctx);
b0d623f7 11048 }
39236c6e
A
11049
11050 if (error == ENOENT && (mp->mnt_flag & MNT_UNION)) {
11051 /*
11052 * If the fileid isn't found and we're in a union
11053 * mount volume, then see if the fileid is in the
11054 * mounted-on volume.
11055 */
11056 struct mount *tmp = mp;
11057 mp = vnode_mount(tmp->mnt_vnodecovered);
11058 vfs_unbusy(tmp);
11059 if (vfs_busy(mp, LK_NOWAIT) == 0)
11060 goto unionget;
fe8ab488 11061 } else {
39236c6e 11062 vfs_unbusy(mp);
fe8ab488 11063 }
39236c6e 11064
b0d623f7 11065 if (error) {
fe8ab488 11066 return error;
b0d623f7 11067 }
fe8ab488 11068
6d2010ae
A
11069#if CONFIG_MACF
11070 error = mac_vnode_check_fsgetpath(ctx, vp);
11071 if (error) {
11072 vnode_put(vp);
fe8ab488 11073 return error;
6d2010ae
A
11074 }
11075#endif
fe8ab488 11076
b0d623f7
A
11077 /* Obtain the absolute path to this vnode. */
11078 bpflags = vfs_context_suser(ctx) ? BUILDPATH_CHECKACCESS : 0;
316670eb 11079 bpflags |= BUILDPATH_CHECK_MOVED;
fe8ab488 11080 error = build_path(vp, buf, bufsize, &length, bpflags, ctx);
b0d623f7 11081 vnode_put(vp);
fe8ab488 11082
b0d623f7 11083 if (error) {
813fb2f6
A
11084 /* there was a race building the path, try a few more times */
11085 if (error == EAGAIN) {
11086 --retries;
11087 if (retries > 0)
11088 goto retry;
11089
11090 error = ENOENT;
11091 }
b0d623f7
A
11092 goto out;
11093 }
fe8ab488
A
11094
11095 AUDIT_ARG(text, buf);
39236c6e
A
11096
11097 if (kdebug_enable) {
11098 long dbg_parms[NUMPARMS];
11099 int dbg_namelen;
11100
11101 dbg_namelen = (int)sizeof(dbg_parms);
11102
fe8ab488
A
11103 if (length < dbg_namelen) {
11104 memcpy((char *)dbg_parms, buf, length);
39236c6e
A
11105 memset((char *)dbg_parms + length, 0, dbg_namelen - length);
11106
11107 dbg_namelen = length;
fe8ab488
A
11108 } else {
11109 memcpy((char *)dbg_parms, buf + (length - dbg_namelen), dbg_namelen);
11110 }
39236c6e
A
11111
11112 kdebug_lookup_gen_events(dbg_parms, dbg_namelen, (void *)vp, TRUE);
11113 }
fe8ab488
A
11114
11115 *pathlen = (user_ssize_t)length; /* may be superseded by error */
11116
11117out:
11118 return (error);
11119}
11120
11121/*
11122 * Obtain the full pathname of a file system object by id.
fe8ab488 11123 */
fe8ab488
A
11124int
11125fsgetpath(__unused proc_t p, struct fsgetpath_args *uap, user_ssize_t *retval)
11126{
11127 vfs_context_t ctx = vfs_context_current();
11128 fsid_t fsid;
11129 char *realpath;
11130 int length;
11131 int error;
11132
11133 if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
11134 return (error);
11135 }
11136 AUDIT_ARG(value32, fsid.val[0]);
11137 AUDIT_ARG(value64, uap->objid);
11138 /* Restrict output buffer size for now. */
39037602 11139
fe8ab488
A
11140 if (uap->bufsize > PAGE_SIZE) {
11141 return (EINVAL);
39037602 11142 }
fe8ab488
A
11143 MALLOC(realpath, char *, uap->bufsize, M_TEMP, M_WAITOK);
11144 if (realpath == NULL) {
11145 return (ENOMEM);
11146 }
11147
11148 error = fsgetpath_internal(
39037602 11149 ctx, fsid.val[0], uap->objid,
fe8ab488
A
11150 uap->bufsize, realpath, &length);
11151
11152 if (error) {
11153 goto out;
11154 }
39037602 11155
b0d623f7
A
11156 error = copyout((caddr_t)realpath, uap->buf, length);
11157
11158 *retval = (user_ssize_t)length; /* may be superseded by error */
11159out:
11160 if (realpath) {
11161 FREE(realpath, M_TEMP);
11162 }
11163 return (error);
11164}
11165
91447636
A
11166/*
11167 * Common routine to handle various flavors of statfs data heading out
11168 * to user space.
2d21ac55
A
11169 *
11170 * Returns: 0 Success
11171 * EFAULT
91447636
A
11172 */
11173static int
39037602
A
11174munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
11175 user_addr_t bufp, int *sizep, boolean_t is_64_bit,
91447636 11176 boolean_t partial_copy)
4a249263 11177{
91447636
A
11178 int error;
11179 int my_size, copy_size;
11180
11181 if (is_64_bit) {
b0d623f7 11182 struct user64_statfs sfs;
91447636
A
11183 my_size = copy_size = sizeof(sfs);
11184 bzero(&sfs, my_size);
11185 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
11186 sfs.f_type = mp->mnt_vtable->vfc_typenum;
11187 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
b0d623f7
A
11188 sfs.f_bsize = (user64_long_t)sfsp->f_bsize;
11189 sfs.f_iosize = (user64_long_t)sfsp->f_iosize;
11190 sfs.f_blocks = (user64_long_t)sfsp->f_blocks;
11191 sfs.f_bfree = (user64_long_t)sfsp->f_bfree;
11192 sfs.f_bavail = (user64_long_t)sfsp->f_bavail;
11193 sfs.f_files = (user64_long_t)sfsp->f_files;
11194 sfs.f_ffree = (user64_long_t)sfsp->f_ffree;
91447636
A
11195 sfs.f_fsid = sfsp->f_fsid;
11196 sfs.f_owner = sfsp->f_owner;
6d2010ae 11197 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
fe8ab488 11198 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
6d2010ae
A
11199 } else {
11200 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
11201 }
2d21ac55
A
11202 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
11203 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
91447636
A
11204
11205 if (partial_copy) {
11206 copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
11207 }
11208 error = copyout((caddr_t)&sfs, bufp, copy_size);
11209 }
11210 else {
b0d623f7
A
11211 struct user32_statfs sfs;
11212
91447636
A
11213 my_size = copy_size = sizeof(sfs);
11214 bzero(&sfs, my_size);
39037602 11215
91447636
A
11216 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
11217 sfs.f_type = mp->mnt_vtable->vfc_typenum;
11218 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
39037602 11219
91447636
A
11220 /*
11221 * It's possible for there to be more than 2^^31 blocks in the filesystem, so we
11222 * have to fudge the numbers here in that case. We inflate the blocksize in order
11223 * to reflect the filesystem size as best we can.
11224 */
39037602
A
11225 if ((sfsp->f_blocks > INT_MAX)
11226 /* Hack for 4061702 . I think the real fix is for Carbon to
91447636 11227 * look for some volume capability and not depend on hidden
39037602 11228 * semantics agreed between a FS and carbon.
91447636
A
11229 * f_blocks, f_bfree, and f_bavail set to -1 is the trigger
11230 * for Carbon to set bNoVolumeSizes volume attribute.
39037602 11231 * Without this the webdavfs files cannot be copied onto
91447636
A
11232 * disk as they look huge. This change should not affect
11233 * XSAN as they should not setting these to -1..
11234 */
2d21ac55
A
11235 && (sfsp->f_blocks != 0xffffffffffffffffULL)
11236 && (sfsp->f_bfree != 0xffffffffffffffffULL)
11237 && (sfsp->f_bavail != 0xffffffffffffffffULL)) {
91447636
A
11238 int shift;
11239
11240 /*
11241 * Work out how far we have to shift the block count down to make it fit.
11242 * Note that it's possible to have to shift so far that the resulting
11243 * blocksize would be unreportably large. At that point, we will clip
11244 * any values that don't fit.
11245 *
11246 * For safety's sake, we also ensure that f_iosize is never reported as
11247 * being smaller than f_bsize.
11248 */
11249 for (shift = 0; shift < 32; shift++) {
b0d623f7 11250 if ((sfsp->f_blocks >> shift) <= INT_MAX)
91447636 11251 break;
b0d623f7 11252 if ((sfsp->f_bsize << (shift + 1)) > INT_MAX)
91447636
A
11253 break;
11254 }
b0d623f7
A
11255#define __SHIFT_OR_CLIP(x, s) ((((x) >> (s)) > INT_MAX) ? INT_MAX : ((x) >> (s)))
11256 sfs.f_blocks = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_blocks, shift);
11257 sfs.f_bfree = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bfree, shift);
11258 sfs.f_bavail = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bavail, shift);
91447636 11259#undef __SHIFT_OR_CLIP
b0d623f7 11260 sfs.f_bsize = (user32_long_t)(sfsp->f_bsize << shift);
91447636
A
11261 sfs.f_iosize = lmax(sfsp->f_iosize, sfsp->f_bsize);
11262 } else {
11263 /* filesystem is small enough to be reported honestly */
b0d623f7
A
11264 sfs.f_bsize = (user32_long_t)sfsp->f_bsize;
11265 sfs.f_iosize = (user32_long_t)sfsp->f_iosize;
11266 sfs.f_blocks = (user32_long_t)sfsp->f_blocks;
11267 sfs.f_bfree = (user32_long_t)sfsp->f_bfree;
11268 sfs.f_bavail = (user32_long_t)sfsp->f_bavail;
91447636 11269 }
b0d623f7
A
11270 sfs.f_files = (user32_long_t)sfsp->f_files;
11271 sfs.f_ffree = (user32_long_t)sfsp->f_ffree;
91447636
A
11272 sfs.f_fsid = sfsp->f_fsid;
11273 sfs.f_owner = sfsp->f_owner;
6d2010ae 11274 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
fe8ab488 11275 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
6d2010ae
A
11276 } else {
11277 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
11278 }
2d21ac55
A
11279 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
11280 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
91447636
A
11281
11282 if (partial_copy) {
11283 copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
11284 }
11285 error = copyout((caddr_t)&sfs, bufp, copy_size);
11286 }
39037602 11287
91447636
A
11288 if (sizep != NULL) {
11289 *sizep = my_size;
11290 }
11291 return(error);
11292}
11293
11294/*
11295 * copy stat structure into user_stat structure.
11296 */
b0d623f7 11297void munge_user64_stat(struct stat *sbp, struct user64_stat *usbp)
91447636 11298{
b0d623f7
A
11299 bzero(usbp, sizeof(*usbp));
11300
11301 usbp->st_dev = sbp->st_dev;
11302 usbp->st_ino = sbp->st_ino;
11303 usbp->st_mode = sbp->st_mode;
11304 usbp->st_nlink = sbp->st_nlink;
11305 usbp->st_uid = sbp->st_uid;
11306 usbp->st_gid = sbp->st_gid;
11307 usbp->st_rdev = sbp->st_rdev;
11308#ifndef _POSIX_C_SOURCE
11309 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
11310 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
11311 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
11312 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
11313 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
11314 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
11315#else
11316 usbp->st_atime = sbp->st_atime;
11317 usbp->st_atimensec = sbp->st_atimensec;
11318 usbp->st_mtime = sbp->st_mtime;
11319 usbp->st_mtimensec = sbp->st_mtimensec;
11320 usbp->st_ctime = sbp->st_ctime;
11321 usbp->st_ctimensec = sbp->st_ctimensec;
11322#endif
11323 usbp->st_size = sbp->st_size;
11324 usbp->st_blocks = sbp->st_blocks;
11325 usbp->st_blksize = sbp->st_blksize;
11326 usbp->st_flags = sbp->st_flags;
11327 usbp->st_gen = sbp->st_gen;
11328 usbp->st_lspare = sbp->st_lspare;
11329 usbp->st_qspare[0] = sbp->st_qspare[0];
11330 usbp->st_qspare[1] = sbp->st_qspare[1];
11331}
11332
11333void munge_user32_stat(struct stat *sbp, struct user32_stat *usbp)
11334{
11335 bzero(usbp, sizeof(*usbp));
0c530ab8 11336
91447636
A
11337 usbp->st_dev = sbp->st_dev;
11338 usbp->st_ino = sbp->st_ino;
11339 usbp->st_mode = sbp->st_mode;
11340 usbp->st_nlink = sbp->st_nlink;
11341 usbp->st_uid = sbp->st_uid;
11342 usbp->st_gid = sbp->st_gid;
11343 usbp->st_rdev = sbp->st_rdev;
2d21ac55
A
11344#ifndef _POSIX_C_SOURCE
11345 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
11346 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
11347 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
11348 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
11349 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
11350 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
11351#else
11352 usbp->st_atime = sbp->st_atime;
11353 usbp->st_atimensec = sbp->st_atimensec;
11354 usbp->st_mtime = sbp->st_mtime;
11355 usbp->st_mtimensec = sbp->st_mtimensec;
11356 usbp->st_ctime = sbp->st_ctime;
11357 usbp->st_ctimensec = sbp->st_ctimensec;
11358#endif
11359 usbp->st_size = sbp->st_size;
11360 usbp->st_blocks = sbp->st_blocks;
11361 usbp->st_blksize = sbp->st_blksize;
11362 usbp->st_flags = sbp->st_flags;
11363 usbp->st_gen = sbp->st_gen;
11364 usbp->st_lspare = sbp->st_lspare;
11365 usbp->st_qspare[0] = sbp->st_qspare[0];
11366 usbp->st_qspare[1] = sbp->st_qspare[1];
11367}
11368
11369/*
11370 * copy stat64 structure into user_stat64 structure.
11371 */
b0d623f7
A
11372void munge_user64_stat64(struct stat64 *sbp, struct user64_stat64 *usbp)
11373{
11374 bzero(usbp, sizeof(*usbp));
11375
11376 usbp->st_dev = sbp->st_dev;
11377 usbp->st_ino = sbp->st_ino;
11378 usbp->st_mode = sbp->st_mode;
11379 usbp->st_nlink = sbp->st_nlink;
11380 usbp->st_uid = sbp->st_uid;
11381 usbp->st_gid = sbp->st_gid;
11382 usbp->st_rdev = sbp->st_rdev;
11383#ifndef _POSIX_C_SOURCE
11384 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
11385 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
11386 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
11387 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
11388 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
11389 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
11390 usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
11391 usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
11392#else
11393 usbp->st_atime = sbp->st_atime;
11394 usbp->st_atimensec = sbp->st_atimensec;
11395 usbp->st_mtime = sbp->st_mtime;
11396 usbp->st_mtimensec = sbp->st_mtimensec;
11397 usbp->st_ctime = sbp->st_ctime;
11398 usbp->st_ctimensec = sbp->st_ctimensec;
11399 usbp->st_birthtime = sbp->st_birthtime;
11400 usbp->st_birthtimensec = sbp->st_birthtimensec;
11401#endif
11402 usbp->st_size = sbp->st_size;
11403 usbp->st_blocks = sbp->st_blocks;
11404 usbp->st_blksize = sbp->st_blksize;
11405 usbp->st_flags = sbp->st_flags;
11406 usbp->st_gen = sbp->st_gen;
11407 usbp->st_lspare = sbp->st_lspare;
11408 usbp->st_qspare[0] = sbp->st_qspare[0];
11409 usbp->st_qspare[1] = sbp->st_qspare[1];
11410}
11411
11412void munge_user32_stat64(struct stat64 *sbp, struct user32_stat64 *usbp)
2d21ac55 11413{
b0d623f7 11414 bzero(usbp, sizeof(*usbp));
2d21ac55
A
11415
11416 usbp->st_dev = sbp->st_dev;
11417 usbp->st_ino = sbp->st_ino;
11418 usbp->st_mode = sbp->st_mode;
11419 usbp->st_nlink = sbp->st_nlink;
11420 usbp->st_uid = sbp->st_uid;
11421 usbp->st_gid = sbp->st_gid;
11422 usbp->st_rdev = sbp->st_rdev;
11423#ifndef _POSIX_C_SOURCE
91447636
A
11424 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
11425 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
11426 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
11427 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
11428 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
11429 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
2d21ac55
A
11430 usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
11431 usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
91447636
A
11432#else
11433 usbp->st_atime = sbp->st_atime;
11434 usbp->st_atimensec = sbp->st_atimensec;
11435 usbp->st_mtime = sbp->st_mtime;
11436 usbp->st_mtimensec = sbp->st_mtimensec;
11437 usbp->st_ctime = sbp->st_ctime;
11438 usbp->st_ctimensec = sbp->st_ctimensec;
2d21ac55
A
11439 usbp->st_birthtime = sbp->st_birthtime;
11440 usbp->st_birthtimensec = sbp->st_birthtimensec;
91447636
A
11441#endif
11442 usbp->st_size = sbp->st_size;
11443 usbp->st_blocks = sbp->st_blocks;
11444 usbp->st_blksize = sbp->st_blksize;
11445 usbp->st_flags = sbp->st_flags;
11446 usbp->st_gen = sbp->st_gen;
11447 usbp->st_lspare = sbp->st_lspare;
11448 usbp->st_qspare[0] = sbp->st_qspare[0];
11449 usbp->st_qspare[1] = sbp->st_qspare[1];
4a249263 11450}
39236c6e
A
11451
11452/*
11453 * Purge buffer cache for simulating cold starts
11454 */
11455static int vnode_purge_callback(struct vnode *vp, __unused void *cargs)
11456{
11457 ubc_msync(vp, (off_t)0, ubc_getsize(vp), NULL /* off_t *resid_off */, UBC_PUSHALL | UBC_INVALIDATE);
11458
11459 return VNODE_RETURNED;
11460}
11461
11462static int vfs_purge_callback(mount_t mp, __unused void * arg)
11463{
11464 vnode_iterate(mp, VNODE_WAIT | VNODE_ITERATE_ALL, vnode_purge_callback, NULL);
11465
11466 return VFS_RETURNED;
11467}
11468
11469int
11470vfs_purge(__unused struct proc *p, __unused struct vfs_purge_args *uap, __unused int32_t *retval)
11471{
11472 if (!kauth_cred_issuser(kauth_cred_get()))
11473 return EPERM;
11474
11475 vfs_iterate(0/* flags */, vfs_purge_callback, NULL);
11476
11477 return 0;
11478}
11479
39037602
A
11480/*
11481 * gets the vnode associated with the (unnamed) snapshot directory
11482 * for a Filesystem. The snapshot directory vnode is returned with
11483 * an iocount on it.
11484 */
11485int
11486vnode_get_snapdir(vnode_t rvp, vnode_t *sdvpp, vfs_context_t ctx)
11487{
813fb2f6 11488 return (VFS_VGET_SNAPDIR(vnode_mount(rvp), sdvpp, ctx));
39037602
A
11489}
11490
11491/*
11492 * Get the snapshot vnode.
11493 *
11494 * If successful, the call returns with an iocount on *rvpp ,*sdvpp and
11495 * needs nameidone() on ndp.
11496 *
11497 * If the snapshot vnode exists it is returned in ndp->ni_vp.
11498 *
11499 * If it returns with an error, *rvpp, *sdvpp are NULL and nameidone() is
11500 * not needed.
11501 */
11502static int
11503vnode_get_snapshot(int dirfd, vnode_t *rvpp, vnode_t *sdvpp,
11504 user_addr_t name, struct nameidata *ndp, int32_t op,
11505#if !CONFIG_TRIGGERS
11506 __unused
11507#endif
11508 enum path_operation pathop,
11509 vfs_context_t ctx)
11510{
11511 int error, i;
11512 caddr_t name_buf;
11513 size_t name_len;
11514 struct vfs_attr vfa;
11515
11516 *sdvpp = NULLVP;
11517 *rvpp = NULLVP;
11518
11519 error = vnode_getfromfd(ctx, dirfd, rvpp);
11520 if (error)
11521 return (error);
11522
11523 if (!vnode_isvroot(*rvpp)) {
11524 error = EINVAL;
11525 goto out;
11526 }
11527
11528 /* Make sure the filesystem supports snapshots */
11529 VFSATTR_INIT(&vfa);
11530 VFSATTR_WANTED(&vfa, f_capabilities);
11531 if ((vfs_getattr(vnode_mount(*rvpp), &vfa, ctx) != 0) ||
11532 !VFSATTR_IS_SUPPORTED(&vfa, f_capabilities) ||
11533 !((vfa.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] &
11534 VOL_CAP_INT_SNAPSHOT)) ||
11535 !((vfa.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] &
11536 VOL_CAP_INT_SNAPSHOT))) {
11537 error = ENOTSUP;
11538 goto out;
11539 }
11540
11541 error = vnode_get_snapdir(*rvpp, sdvpp, ctx);
11542 if (error)
11543 goto out;
11544
11545 MALLOC(name_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
11546 error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
11547 if (error)
11548 goto out1;
11549
11550 /*
11551 * Some sanity checks- name can't be empty, "." or ".." or have slashes.
11552 * (the length returned by copyinstr includes the terminating NUL)
11553 */
11554 if ((name_len == 1) || (name_len == 2 && name_buf[0] == '.') ||
11555 (name_len == 3 && name_buf[0] == '.' && name_buf[1] == '.')) {
11556 error = EINVAL;
11557 goto out1;
11558 }
11559 for (i = 0; i < (int)name_len && name_buf[i] != '/'; i++);
11560 if (i < (int)name_len) {
11561 error = EINVAL;
11562 goto out1;
11563 }
11564
11565#if CONFIG_MACF
11566 if (op == CREATE) {
11567 error = mac_mount_check_snapshot_create(ctx, vnode_mount(*rvpp),
11568 name_buf);
11569 } else if (op == DELETE) {
11570 error = mac_mount_check_snapshot_delete(ctx, vnode_mount(*rvpp),
11571 name_buf);
11572 }
11573 if (error)
11574 goto out1;
11575#endif
11576
11577 /* Check if the snapshot already exists ... */
11578 NDINIT(ndp, op, pathop, USEDVP | NOCACHE | AUDITVNPATH1,
11579 UIO_SYSSPACE, CAST_USER_ADDR_T(name_buf), ctx);
11580 ndp->ni_dvp = *sdvpp;
11581
11582 error = namei(ndp);
11583out1:
11584 FREE(name_buf, M_TEMP);
11585out:
11586 if (error) {
11587 if (*sdvpp) {
11588 vnode_put(*sdvpp);
11589 *sdvpp = NULLVP;
11590 }
11591 if (*rvpp) {
11592 vnode_put(*rvpp);
11593 *rvpp = NULLVP;
11594 }
11595 }
11596 return (error);
11597}
11598
11599/*
11600 * create a filesystem snapshot (for supporting filesystems)
11601 *
11602 * A much simplified version of openat(dirfd, name, O_CREAT | O_EXCL)
11603 * We get to the (unnamed) snapshot directory vnode and create the vnode
11604 * for the snapshot in it.
11605 *
11606 * Restrictions:
11607 *
11608 * a) Passed in name for snapshot cannot have slashes.
11609 * b) name can't be "." or ".."
11610 *
11611 * Since this requires superuser privileges, vnode_authorize calls are not
11612 * made.
11613 */
11614static int
11615snapshot_create(int dirfd, user_addr_t name, __unused uint32_t flags,
11616 vfs_context_t ctx)
11617{
11618 vnode_t rvp, snapdvp;
11619 int error;
11620 struct nameidata namend;
11621
11622 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, CREATE,
11623 OP_LINK, ctx);
11624 if (error)
11625 return (error);
11626
11627 if (namend.ni_vp) {
11628 vnode_put(namend.ni_vp);
11629 error = EEXIST;
11630 } else {
11631 struct vnode_attr va;
11632 vnode_t vp = NULLVP;
11633
11634 VATTR_INIT(&va);
11635 VATTR_SET(&va, va_type, VREG);
11636 VATTR_SET(&va, va_mode, 0);
11637
11638 error = vn_create(snapdvp, &vp, &namend, &va,
11639 VN_CREATE_NOAUTH | VN_CREATE_NOINHERIT, 0, NULL, ctx);
11640 if (!error && vp)
11641 vnode_put(vp);
39037602
A
11642 }
11643
11644 nameidone(&namend);
11645 vnode_put(snapdvp);
11646 vnode_put(rvp);
11647 return (error);
11648}
11649
11650/*
11651 * Delete a Filesystem snapshot
11652 *
11653 * get the vnode for the unnamed snapshot directory and the snapshot and
11654 * delete the snapshot.
11655 */
11656static int
11657snapshot_delete(int dirfd, user_addr_t name, __unused uint32_t flags,
11658 vfs_context_t ctx)
11659{
11660 vnode_t rvp, snapdvp;
11661 int error;
11662 struct nameidata namend;
11663
11664 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, DELETE,
11665 OP_UNLINK, ctx);
11666 if (error)
11667 goto out;
11668
11669 error = VNOP_REMOVE(snapdvp, namend.ni_vp, &namend.ni_cnd,
11670 VNODE_REMOVE_SKIP_NAMESPACE_EVENT, ctx);
11671
11672 vnode_put(namend.ni_vp);
11673 nameidone(&namend);
11674 vnode_put(snapdvp);
11675 vnode_put(rvp);
11676out:
11677 return (error);
11678}
11679
11680/*
11681 * Revert a filesystem to a snapshot
11682 *
11683 * Marks the filesystem to revert to the given snapshot on next mount.
11684 */
11685static int
11686snapshot_revert(int dirfd, user_addr_t name, __unused uint32_t flags,
11687 vfs_context_t ctx)
11688{
11689 int error;
11690 vnode_t rvp;
11691 mount_t mp;
11692 struct fs_snapshot_revert_args revert_data;
11693 struct componentname cnp;
11694 caddr_t name_buf;
11695 size_t name_len;
11696
11697 error = vnode_getfromfd(ctx, dirfd, &rvp);
11698 if (error) {
11699 return (error);
11700 }
11701 mp = vnode_mount(rvp);
11702
813fb2f6
A
11703 MALLOC(name_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
11704 error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
11705 if (error) {
11706 FREE(name_buf, M_TEMP);
11707 vnode_put(rvp);
11708 return (error);
11709 }
11710
11711#if CONFIG_MACF
11712 error = mac_mount_check_snapshot_revert(ctx, mp, name_buf);
11713 if (error) {
11714 FREE(name_buf, M_TEMP);
11715 vnode_put(rvp);
11716 return (error);
11717 }
11718#endif
11719
39037602
A
11720 /*
11721 * Grab mount_iterref so that we can release the vnode,
11722 * since VFSIOC_REVERT_SNAPSHOT could conceivably cause a sync.
11723 */
11724 error = mount_iterref (mp, 0);
11725 vnode_put(rvp);
11726 if (error) {
39037602
A
11727 FREE(name_buf, M_TEMP);
11728 return (error);
11729 }
11730
11731 memset(&cnp, 0, sizeof(cnp));
11732 cnp.cn_pnbuf = (char *)name_buf;
11733 cnp.cn_nameiop = LOOKUP;
11734 cnp.cn_flags = ISLASTCN | HASBUF;
11735 cnp.cn_pnlen = MAXPATHLEN;
11736 cnp.cn_nameptr = cnp.cn_pnbuf;
11737 cnp.cn_namelen = (int)name_len;
11738 revert_data.sr_cnp = &cnp;
11739
11740 error = VFS_IOCTL(mp, VFSIOC_REVERT_SNAPSHOT, (caddr_t)&revert_data, 0, ctx);
11741 mount_iterdrop(mp);
11742 FREE(name_buf, M_TEMP);
11743
11744 if (error) {
11745 /* If there was any error, try again using VNOP_IOCTL */
11746
11747 vnode_t snapdvp;
11748 struct nameidata namend;
11749
11750 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, LOOKUP,
11751 OP_LOOKUP, ctx);
11752 if (error) {
11753 return (error);
11754 }
11755
11756
5ba3f43e 11757 error = VNOP_IOCTL(namend.ni_vp, APFSIOC_REVERT_TO_SNAPSHOT, (caddr_t) NULL,
39037602
A
11758 0, ctx);
11759
11760 vnode_put(namend.ni_vp);
11761 nameidone(&namend);
11762 vnode_put(snapdvp);
11763 vnode_put(rvp);
11764 }
11765
11766 return (error);
11767}
11768
11769/*
11770 * rename a Filesystem snapshot
11771 *
11772 * get the vnode for the unnamed snapshot directory and the snapshot and
11773 * rename the snapshot. This is a very specialised (and simple) case of
11774 * rename(2) (which has to deal with a lot more complications). It differs
11775 * slightly from rename(2) in that EEXIST is returned if the new name exists.
11776 */
11777static int
11778snapshot_rename(int dirfd, user_addr_t old, user_addr_t new,
11779 __unused uint32_t flags, vfs_context_t ctx)
11780{
11781 vnode_t rvp, snapdvp;
11782 int error, i;
11783 caddr_t newname_buf;
11784 size_t name_len;
11785 vnode_t fvp;
11786 struct nameidata *fromnd, *tond;
11787 /* carving out a chunk for structs that are too big to be on stack. */
11788 struct {
11789 struct nameidata from_node;
11790 struct nameidata to_node;
11791 } * __rename_data;
11792
11793 MALLOC(__rename_data, void *, sizeof(*__rename_data), M_TEMP, M_WAITOK);
11794 fromnd = &__rename_data->from_node;
11795 tond = &__rename_data->to_node;
11796
11797 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, old, fromnd, DELETE,
11798 OP_UNLINK, ctx);
11799 if (error)
11800 goto out;
11801 fvp = fromnd->ni_vp;
11802
11803 MALLOC(newname_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
11804 error = copyinstr(new, newname_buf, MAXPATHLEN, &name_len);
11805 if (error)
11806 goto out1;
11807
11808 /*
11809 * Some sanity checks- new name can't be empty, "." or ".." or have
11810 * slashes.
11811 * (the length returned by copyinstr includes the terminating NUL)
11812 *
11813 * The FS rename VNOP is suppossed to handle this but we'll pick it
11814 * off here itself.
11815 */
11816 if ((name_len == 1) || (name_len == 2 && newname_buf[0] == '.') ||
11817 (name_len == 3 && newname_buf[0] == '.' && newname_buf[1] == '.')) {
11818 error = EINVAL;
11819 goto out1;
11820 }
11821 for (i = 0; i < (int)name_len && newname_buf[i] != '/'; i++);
11822 if (i < (int)name_len) {
11823 error = EINVAL;
11824 goto out1;
11825 }
11826
11827#if CONFIG_MACF
11828 error = mac_mount_check_snapshot_create(ctx, vnode_mount(rvp),
11829 newname_buf);
11830 if (error)
11831 goto out1;
11832#endif
11833
11834 NDINIT(tond, RENAME, OP_RENAME, USEDVP | NOCACHE | AUDITVNPATH2,
11835 UIO_SYSSPACE, CAST_USER_ADDR_T(newname_buf), ctx);
11836 tond->ni_dvp = snapdvp;
11837
11838 error = namei(tond);
11839 if (error) {
11840 goto out2;
11841 } else if (tond->ni_vp) {
11842 /*
11843 * snapshot rename behaves differently than rename(2) - if the
11844 * new name exists, EEXIST is returned.
11845 */
11846 vnode_put(tond->ni_vp);
11847 error = EEXIST;
11848 goto out2;
11849 }
11850
11851 error = VNOP_RENAME(snapdvp, fvp, &fromnd->ni_cnd, snapdvp, NULLVP,
11852 &tond->ni_cnd, ctx);
11853
11854out2:
11855 nameidone(tond);
11856out1:
11857 FREE(newname_buf, M_TEMP);
11858 vnode_put(fvp);
11859 vnode_put(snapdvp);
11860 vnode_put(rvp);
11861 nameidone(fromnd);
11862out:
11863 FREE(__rename_data, M_TEMP);
11864 return (error);
11865}
11866
11867/*
11868 * Mount a Filesystem snapshot
11869 *
11870 * get the vnode for the unnamed snapshot directory and the snapshot and
11871 * mount the snapshot.
11872 */
11873static int
11874snapshot_mount(int dirfd, user_addr_t name, user_addr_t directory,
813fb2f6 11875 __unused user_addr_t mnt_data, __unused uint32_t flags, vfs_context_t ctx)
39037602
A
11876{
11877 vnode_t rvp, snapdvp, snapvp, vp, pvp;
11878 int error;
11879 struct nameidata *snapndp, *dirndp;
11880 /* carving out a chunk for structs that are too big to be on stack. */
11881 struct {
11882 struct nameidata snapnd;
11883 struct nameidata dirnd;
11884 } * __snapshot_mount_data;
11885
11886 MALLOC(__snapshot_mount_data, void *, sizeof(*__snapshot_mount_data),
11887 M_TEMP, M_WAITOK);
11888 snapndp = &__snapshot_mount_data->snapnd;
11889 dirndp = &__snapshot_mount_data->dirnd;
11890
11891 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, snapndp, LOOKUP,
11892 OP_LOOKUP, ctx);
11893 if (error)
11894 goto out;
11895
11896 snapvp = snapndp->ni_vp;
11897 if (!vnode_mount(rvp) || (vnode_mount(rvp) == dead_mountp)) {
11898 error = EIO;
11899 goto out1;
11900 }
11901
11902 /* Get the vnode to be covered */
11903 NDINIT(dirndp, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
11904 UIO_USERSPACE, directory, ctx);
11905 error = namei(dirndp);
11906 if (error)
11907 goto out1;
11908
11909 vp = dirndp->ni_vp;
11910 pvp = dirndp->ni_dvp;
11911
11912 if ((vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_ROOTFS)) {
11913 error = EINVAL;
11914 } else {
11915 mount_t mp = vnode_mount(rvp);
11916 struct fs_snapshot_mount_args smnt_data;
11917
11918 smnt_data.sm_mp = mp;
11919 smnt_data.sm_cnp = &snapndp->ni_cnd;
11920 error = mount_common(mp->mnt_vfsstat.f_fstypename, pvp, vp,
5ba3f43e 11921 &dirndp->ni_cnd, CAST_USER_ADDR_T(&smnt_data), flags & MNT_DONTBROWSE,
39037602 11922 KERNEL_MOUNT_SNAPSHOT, NULL, FALSE, ctx);
39037602
A
11923 }
11924
11925 vnode_put(vp);
11926 vnode_put(pvp);
11927 nameidone(dirndp);
11928out1:
11929 vnode_put(snapvp);
11930 vnode_put(snapdvp);
11931 vnode_put(rvp);
11932 nameidone(snapndp);
11933out:
11934 FREE(__snapshot_mount_data, M_TEMP);
11935 return (error);
11936}
11937
813fb2f6
A
11938/*
11939 * Root from a snapshot of the filesystem
11940 *
11941 * Marks the filesystem to root from the given snapshot on next boot.
11942 */
11943static int
11944snapshot_root(int dirfd, user_addr_t name, __unused uint32_t flags,
11945 vfs_context_t ctx)
11946{
11947 int error;
11948 vnode_t rvp;
11949 mount_t mp;
11950 struct fs_snapshot_root_args root_data;
11951 struct componentname cnp;
11952 caddr_t name_buf;
11953 size_t name_len;
11954
11955 error = vnode_getfromfd(ctx, dirfd, &rvp);
11956 if (error) {
11957 return (error);
11958 }
11959 mp = vnode_mount(rvp);
11960
11961 MALLOC(name_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
11962 error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
11963 if (error) {
11964 FREE(name_buf, M_TEMP);
11965 vnode_put(rvp);
11966 return (error);
11967 }
11968
11969 // XXX MAC checks ?
11970
11971 /*
11972 * Grab mount_iterref so that we can release the vnode,
11973 * since VFSIOC_ROOT_SNAPSHOT could conceivably cause a sync.
11974 */
11975 error = mount_iterref (mp, 0);
11976 vnode_put(rvp);
11977 if (error) {
11978 FREE(name_buf, M_TEMP);
11979 return (error);
11980 }
11981
11982 memset(&cnp, 0, sizeof(cnp));
11983 cnp.cn_pnbuf = (char *)name_buf;
11984 cnp.cn_nameiop = LOOKUP;
11985 cnp.cn_flags = ISLASTCN | HASBUF;
11986 cnp.cn_pnlen = MAXPATHLEN;
11987 cnp.cn_nameptr = cnp.cn_pnbuf;
11988 cnp.cn_namelen = (int)name_len;
11989 root_data.sr_cnp = &cnp;
11990
11991 error = VFS_IOCTL(mp, VFSIOC_ROOT_SNAPSHOT, (caddr_t)&root_data, 0, ctx);
11992
11993 mount_iterdrop(mp);
11994 FREE(name_buf, M_TEMP);
11995
11996 return (error);
11997}
11998
39037602
A
11999/*
12000 * FS snapshot operations dispatcher
12001 */
12002int
12003fs_snapshot(__unused proc_t p, struct fs_snapshot_args *uap,
12004 __unused int32_t *retval)
12005{
12006 int error;
12007 vfs_context_t ctx = vfs_context_current();
12008
813fb2f6
A
12009 AUDIT_ARG(fd, uap->dirfd);
12010 AUDIT_ARG(value32, uap->op);
12011
39037602
A
12012 error = priv_check_cred(vfs_context_ucred(ctx), PRIV_VFS_SNAPSHOT, 0);
12013 if (error)
12014 return (error);
12015
12016 switch (uap->op) {
12017 case SNAPSHOT_OP_CREATE:
12018 error = snapshot_create(uap->dirfd, uap->name1, uap->flags, ctx);
12019 break;
12020 case SNAPSHOT_OP_DELETE:
12021 error = snapshot_delete(uap->dirfd, uap->name1, uap->flags, ctx);
12022 break;
12023 case SNAPSHOT_OP_RENAME:
12024 error = snapshot_rename(uap->dirfd, uap->name1, uap->name2,
12025 uap->flags, ctx);
12026 break;
12027 case SNAPSHOT_OP_MOUNT:
12028 error = snapshot_mount(uap->dirfd, uap->name1, uap->name2,
12029 uap->data, uap->flags, ctx);
12030 break;
12031 case SNAPSHOT_OP_REVERT:
12032 error = snapshot_revert(uap->dirfd, uap->name1, uap->flags, ctx);
12033 break;
5c9f4661 12034#if !TARGET_OS_OSX
813fb2f6
A
12035 case SNAPSHOT_OP_ROOT:
12036 error = snapshot_root(uap->dirfd, uap->name1, uap->flags, ctx);
12037 break;
5c9f4661 12038#endif /* !TARGET_OS_OSX */
39037602
A
12039 default:
12040 error = ENOSYS;
12041 }
12042
12043 return (error);
12044}