]> git.saurik.com Git - apple/xnu.git/blame - bsd/vfs/vfs_syscalls.c
xnu-4570.20.62.tar.gz
[apple/xnu.git] / bsd / vfs / vfs_syscalls.c
CommitLineData
1c79356b 1/*
5ba3f43e 2 * Copyright (c) 1995-2017 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
39037602 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
39037602 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
39037602 17 *
2d21ac55
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
39037602 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * Copyright (c) 1989, 1993
30 * The Regents of the University of California. All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)vfs_syscalls.c 8.41 (Berkeley) 6/15/95
66 */
2d21ac55
A
67/*
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
71 * Version 2.0.
72 */
1c79356b
A
73
74#include <sys/param.h>
75#include <sys/systm.h>
76#include <sys/namei.h>
77#include <sys/filedesc.h>
78#include <sys/kernel.h>
91447636 79#include <sys/file_internal.h>
1c79356b 80#include <sys/stat.h>
91447636
A
81#include <sys/vnode_internal.h>
82#include <sys/mount_internal.h>
83#include <sys/proc_internal.h>
84#include <sys/kauth.h>
85#include <sys/uio_internal.h>
1c79356b 86#include <sys/malloc.h>
91447636 87#include <sys/mman.h>
1c79356b
A
88#include <sys/dirent.h>
89#include <sys/attr.h>
90#include <sys/sysctl.h>
91#include <sys/ubc.h>
9bccf70c 92#include <sys/quota.h>
91447636
A
93#include <sys/kdebug.h>
94#include <sys/fsevents.h>
6d2010ae 95#include <sys/imgsrc.h>
91447636
A
96#include <sys/sysproto.h>
97#include <sys/xattr.h>
b0d623f7
A
98#include <sys/fcntl.h>
99#include <sys/fsctl.h>
91447636 100#include <sys/ubc_internal.h>
593a1d5f 101#include <sys/disk.h>
3e170ce0 102#include <sys/content_protection.h>
39037602
A
103#include <sys/clonefile.h>
104#include <sys/snapshot.h>
490019cf 105#include <sys/priv.h>
91447636
A
106#include <machine/cons.h>
107#include <machine/limits.h>
108#include <miscfs/specfs/specdev.h>
e5568f75 109
5ba3f43e
A
110#include <vfs/vfs_disk_conditioner.h>
111
b0d623f7 112#include <security/audit/audit.h>
e5568f75
A
113#include <bsm/audit_kevents.h>
114
91447636
A
115#include <mach/mach_types.h>
116#include <kern/kern_types.h>
117#include <kern/kalloc.h>
6d2010ae 118#include <kern/task.h>
91447636
A
119
120#include <vm/vm_pageout.h>
39037602 121#include <vm/vm_protos.h>
1c79356b 122
91447636 123#include <libkern/OSAtomic.h>
b0d623f7 124#include <pexpert/pexpert.h>
3e170ce0 125#include <IOKit/IOBSD.h>
55e303ae 126
490019cf
A
127#if ROUTEFS
128#include <miscfs/routefs/routefs.h>
129#endif /* ROUTEFS */
130
2d21ac55
A
131#if CONFIG_MACF
132#include <security/mac.h>
133#include <security/mac_framework.h>
134#endif
1c79356b 135
39037602 136#if CONFIG_FSE
2d21ac55 137#define GET_PATH(x) \
39037602 138 (x) = get_pathbuff();
2d21ac55
A
139#define RELEASE_PATH(x) \
140 release_pathbuff(x);
39037602 141#else
2d21ac55 142#define GET_PATH(x) \
39037602 143 MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
2d21ac55
A
144#define RELEASE_PATH(x) \
145 FREE_ZONE((x), MAXPATHLEN, M_NAMEI);
146#endif /* CONFIG_FSE */
147
5ba3f43e
A
148extern void disk_conditioner_unmount(mount_t mp);
149
2d21ac55
A
150/* struct for checkdirs iteration */
151struct cdirargs {
152 vnode_t olddp;
153 vnode_t newdp;
154};
155/* callback for checkdirs iteration */
156static int checkdirs_callback(proc_t p, void * arg);
1c79356b 157
91447636 158static int change_dir(struct nameidata *ndp, vfs_context_t ctx);
6601e61a 159static int checkdirs(vnode_t olddp, vfs_context_t ctx);
91447636
A
160void enablequotas(struct mount *mp, vfs_context_t ctx);
161static int getfsstat_callback(mount_t mp, void * arg);
162static int getutimes(user_addr_t usrtvp, struct timespec *tsp);
2d21ac55 163static int setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts, int nullflag);
91447636 164static int sync_callback(mount_t, void *);
fe8ab488
A
165static void sync_thread(void *, __unused wait_result_t);
166static int sync_async(int);
39037602
A
167static int munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
168 user_addr_t bufp, int *sizep, boolean_t is_64_bit,
91447636 169 boolean_t partial_copy);
b0d623f7
A
170static int statfs64_common(struct mount *mp, struct vfsstatfs *sfsp,
171 user_addr_t bufp);
172static int fsync_common(proc_t p, struct fsync_args *uap, int flags);
6d2010ae
A
173static int mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
174 struct componentname *cnp, user_addr_t fsmountargs,
175 int flags, uint32_t internal_flags, char *labelstr, boolean_t kernelmount,
176 vfs_context_t ctx);
177void vfs_notify_mount(vnode_t pdvp);
178
179int prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth);
b7266188 180
fe8ab488
A
181struct fd_vn_data * fg_vn_data_alloc(void);
182
c18c124e
A
183/*
184 * Max retries for ENOENT returns from vn_authorize_{rmdir, unlink, rename}
185 * Concurrent lookups (or lookups by ids) on hard links can cause the
186 * vn_getpath (which does not re-enter the filesystem as vn_getpath_fsenter
187 * does) to return ENOENT as the path cannot be returned from the name cache
188 * alone. We have no option but to retry and hope to get one namei->reverse path
189 * generation done without an intervening lookup, lookup by id on the hard link
190 * item. This is only an issue for MAC hooks which cannot reenter the filesystem
191 * which currently are the MAC hooks for rename, unlink and rmdir.
192 */
193#define MAX_AUTHORIZE_ENOENT_RETRIES 1024
194
fe8ab488
A
195static int rmdirat_internal(vfs_context_t, int, user_addr_t, enum uio_seg);
196
197static int fsgetpath_internal(vfs_context_t, int, uint64_t, vm_size_t, caddr_t, int *);
198
b7266188 199#ifdef CONFIG_IMGSRC_ACCESS
b7266188
A
200static int authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx);
201static int place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx);
202static void undo_place_on_covered_vp(mount_t mp, vnode_t vp);
203static int mount_begin_update(mount_t mp, vfs_context_t ctx, int flags);
204static void mount_end_update(mount_t mp);
6d2010ae 205static int relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp, const char *fsname, vfs_context_t ctx, boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index);
b7266188
A
206#endif /* CONFIG_IMGSRC_ACCESS */
207
2d21ac55
A
208int (*union_dircheckp)(struct vnode **, struct fileproc *, vfs_context_t);
209
210__private_extern__
211int sync_internal(void);
212
2d21ac55 213__private_extern__
c18c124e 214int unlink1(vfs_context_t, vnode_t, user_addr_t, enum uio_seg, int);
91447636 215
fe8ab488
A
216extern lck_grp_t *fd_vn_lck_grp;
217extern lck_grp_attr_t *fd_vn_lck_grp_attr;
218extern lck_attr_t *fd_vn_lck_attr;
219
2d21ac55
A
220/*
221 * incremented each time a mount or unmount operation occurs
222 * used to invalidate the cached value of the rootvp in the
223 * mount structure utilized by cache_lookup_path
224 */
b0d623f7 225uint32_t mount_generation = 0;
1c79356b
A
226
227/* counts number of mount and unmount operations */
228unsigned int vfs_nummntops=0;
229
39236c6e
A
230extern const struct fileops vnops;
231#if CONFIG_APPLEDOUBLE
39037602 232extern errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *);
39236c6e 233#endif /* CONFIG_APPLEDOUBLE */
91447636 234
1c79356b
A
235/*
236 * Virtual File System System Calls
237 */
238
490019cf 239#if NFSCLIENT || DEVFS || ROUTEFS
6d2010ae
A
240/*
241 * Private in-kernel mounting spi (NFS only, not exported)
242 */
243 __private_extern__
244boolean_t
245vfs_iskernelmount(mount_t mp)
246{
247 return ((mp->mnt_kern_flag & MNTK_KERNEL_MOUNT) ? TRUE : FALSE);
248}
249
250 __private_extern__
251int
252kernel_mount(char *fstype, vnode_t pvp, vnode_t vp, const char *path,
253 void *data, __unused size_t datalen, int syscall_flags, __unused uint32_t kern_flags, vfs_context_t ctx)
254{
255 struct nameidata nd;
256 boolean_t did_namei;
257 int error;
258
39037602 259 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
6d2010ae
A
260 UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx);
261
262 /*
263 * Get the vnode to be covered if it's not supplied
264 */
265 if (vp == NULLVP) {
266 error = namei(&nd);
267 if (error)
268 return (error);
269 vp = nd.ni_vp;
270 pvp = nd.ni_dvp;
271 did_namei = TRUE;
272 } else {
273 char *pnbuf = CAST_DOWN(char *, path);
274
275 nd.ni_cnd.cn_pnbuf = pnbuf;
276 nd.ni_cnd.cn_pnlen = strlen(pnbuf) + 1;
277 did_namei = FALSE;
278 }
279
280 error = mount_common(fstype, pvp, vp, &nd.ni_cnd, CAST_USER_ADDR_T(data),
281 syscall_flags, kern_flags, NULL, TRUE, ctx);
282
283 if (did_namei) {
284 vnode_put(vp);
285 vnode_put(pvp);
286 nameidone(&nd);
287 }
288
289 return (error);
290}
fe8ab488 291#endif /* NFSCLIENT || DEVFS */
6d2010ae 292
1c79356b
A
293/*
294 * Mount a file system.
295 */
1c79356b
A
296/* ARGSUSED */
297int
b0d623f7 298mount(proc_t p, struct mount_args *uap, __unused int32_t *retval)
2d21ac55
A
299{
300 struct __mac_mount_args muap;
301
302 muap.type = uap->type;
303 muap.path = uap->path;
304 muap.flags = uap->flags;
305 muap.data = uap->data;
306 muap.mac_p = USER_ADDR_NULL;
307 return (__mac_mount(p, &muap, retval));
308}
309
5ba3f43e
A
310int
311fmount(__unused proc_t p, struct fmount_args *uap, __unused int32_t *retval)
312{
313 struct componentname cn;
314 vfs_context_t ctx = vfs_context_current();
315 size_t dummy = 0;
316 int error;
317 int flags = uap->flags;
318 char fstypename[MFSNAMELEN];
319 char *labelstr = NULL; /* regular mount call always sets it to NULL for __mac_mount() */
320 vnode_t pvp;
321 vnode_t vp;
322
323 AUDIT_ARG(fd, uap->fd);
324 AUDIT_ARG(fflags, flags);
325 /* fstypename will get audited by mount_common */
326
327 /* Sanity check the flags */
328 if (flags & (MNT_IMGSRC_BY_INDEX|MNT_ROOTFS)) {
329 return (ENOTSUP);
330 }
331
332 if (flags & MNT_UNION) {
333 return (EPERM);
334 }
335
336 error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy);
337 if (error) {
338 return (error);
339 }
340
341 if ((error = file_vnode(uap->fd, &vp)) != 0) {
342 return (error);
343 }
344
345 if ((error = vnode_getwithref(vp)) != 0) {
346 file_drop(uap->fd);
347 return (error);
348 }
349
350 pvp = vnode_getparent(vp);
351 if (pvp == NULL) {
352 vnode_put(vp);
353 file_drop(uap->fd);
354 return (EINVAL);
355 }
356
357 memset(&cn, 0, sizeof(struct componentname));
358 MALLOC(cn.cn_pnbuf, char *, MAXPATHLEN, M_TEMP, M_WAITOK);
359 cn.cn_pnlen = MAXPATHLEN;
360
361 if((error = vn_getpath(vp, cn.cn_pnbuf, &cn.cn_pnlen)) != 0) {
362 FREE(cn.cn_pnbuf, M_TEMP);
363 vnode_put(pvp);
364 vnode_put(vp);
365 file_drop(uap->fd);
366 return (error);
367 }
368
369 error = mount_common(fstypename, pvp, vp, &cn, uap->data, flags, 0, labelstr, FALSE, ctx);
370
371 FREE(cn.cn_pnbuf, M_TEMP);
372 vnode_put(pvp);
373 vnode_put(vp);
374 file_drop(uap->fd);
375
376 return (error);
377}
378
6d2010ae 379void
39037602 380vfs_notify_mount(vnode_t pdvp)
6d2010ae
A
381{
382 vfs_event_signal(NULL, VQ_MOUNT, (intptr_t)NULL);
383 lock_vnode_and_post(pdvp, NOTE_WRITE);
384}
385
b0d623f7
A
386/*
387 * __mac_mount:
388 * Mount a file system taking into account MAC label behavior.
389 * See mount(2) man page for more information
390 *
391 * Parameters: p Process requesting the mount
392 * uap User argument descriptor (see below)
39037602 393 * retval (ignored)
b0d623f7
A
394 *
395 * Indirect: uap->type Filesystem type
396 * uap->path Path to mount
39037602
A
397 * uap->data Mount arguments
398 * uap->mac_p MAC info
b0d623f7 399 * uap->flags Mount flags
39037602 400 *
b0d623f7
A
401 *
402 * Returns: 0 Success
403 * !0 Not success
404 */
6d2010ae
A
405boolean_t root_fs_upgrade_try = FALSE;
406
2d21ac55 407int
b0d623f7 408__mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int32_t *retval)
1c79356b 409{
39236c6e
A
410 vnode_t pvp = NULL;
411 vnode_t vp = NULL;
412 int need_nameidone = 0;
6d2010ae
A
413 vfs_context_t ctx = vfs_context_current();
414 char fstypename[MFSNAMELEN];
415 struct nameidata nd;
416 size_t dummy=0;
417 char *labelstr = NULL;
418 int flags = uap->flags;
419 int error;
39037602 420#if CONFIG_IMGSRC_ACCESS || CONFIG_MACF
6d2010ae 421 boolean_t is_64bit = IS_64BIT_PROCESS(p);
39236c6e
A
422#else
423#pragma unused(p)
424#endif
6d2010ae
A
425 /*
426 * Get the fs type name from user space
427 */
428 error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy);
429 if (error)
430 return (error);
431
432 /*
433 * Get the vnode to be covered
434 */
39037602 435 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
6d2010ae
A
436 UIO_USERSPACE, uap->path, ctx);
437 error = namei(&nd);
39236c6e
A
438 if (error) {
439 goto out;
440 }
441 need_nameidone = 1;
6d2010ae
A
442 vp = nd.ni_vp;
443 pvp = nd.ni_dvp;
39037602 444
6d2010ae
A
445#ifdef CONFIG_IMGSRC_ACCESS
446 /* Mounting image source cannot be batched with other operations */
447 if (flags == MNT_IMGSRC_BY_INDEX) {
448 error = relocate_imageboot_source(pvp, vp, &nd.ni_cnd, fstypename,
449 ctx, is_64bit, uap->data, (flags == MNT_IMGSRC_BY_INDEX));
450 goto out;
451 }
452#endif /* CONFIG_IMGSRC_ACCESS */
453
454#if CONFIG_MACF
455 /*
456 * Get the label string (if any) from user space
457 */
458 if (uap->mac_p != USER_ADDR_NULL) {
459 struct user_mac mac;
460 size_t ulen = 0;
461
462 if (is_64bit) {
463 struct user64_mac mac64;
464 error = copyin(uap->mac_p, &mac64, sizeof(mac64));
465 mac.m_buflen = mac64.m_buflen;
466 mac.m_string = mac64.m_string;
467 } else {
468 struct user32_mac mac32;
469 error = copyin(uap->mac_p, &mac32, sizeof(mac32));
470 mac.m_buflen = mac32.m_buflen;
471 mac.m_string = mac32.m_string;
472 }
473 if (error)
474 goto out;
475 if ((mac.m_buflen > MAC_MAX_LABEL_BUF_LEN) ||
476 (mac.m_buflen < 2)) {
477 error = EINVAL;
478 goto out;
479 }
480 MALLOC(labelstr, char *, mac.m_buflen, M_MACTEMP, M_WAITOK);
481 error = copyinstr(mac.m_string, labelstr, mac.m_buflen, &ulen);
482 if (error) {
483 goto out;
484 }
485 AUDIT_ARG(mac_string, labelstr);
486 }
487#endif /* CONFIG_MACF */
488
489 AUDIT_ARG(fflags, flags);
490
4bd07ac2
A
491#if SECURE_KERNEL
492 if (flags & MNT_UNION) {
493 /* No union mounts on release kernels */
494 error = EPERM;
495 goto out;
496 }
497#endif
498
6d2010ae 499 if ((vp->v_flag & VROOT) &&
39236c6e
A
500 (vp->v_mount->mnt_flag & MNT_ROOTFS)) {
501 if (!(flags & MNT_UNION)) {
6d2010ae 502 flags |= MNT_UPDATE;
39236c6e
A
503 }
504 else {
39037602 505 /*
39236c6e 506 * For a union mount on '/', treat it as fresh
39037602
A
507 * mount instead of update.
508 * Otherwise, union mouting on '/' used to panic the
509 * system before, since mnt_vnodecovered was found to
510 * be NULL for '/' which is required for unionlookup
39236c6e
A
511 * after it gets ENOENT on union mount.
512 */
513 flags = (flags & ~(MNT_UPDATE));
514 }
515
4bd07ac2 516#if SECURE_KERNEL
39236c6e
A
517 if ((flags & MNT_RDONLY) == 0) {
518 /* Release kernels are not allowed to mount "/" as rw */
519 error = EPERM;
39037602 520 goto out;
39236c6e 521 }
39236c6e
A
522#endif
523 /*
524 * See 7392553 for more details on why this check exists.
525 * Suffice to say: If this check is ON and something tries
526 * to mount the rootFS RW, we'll turn off the codesign
39037602
A
527 * bitmap optimization.
528 */
6d2010ae 529#if CHECK_CS_VALIDATION_BITMAP
39236c6e 530 if ((flags & MNT_RDONLY) == 0 ) {
6d2010ae
A
531 root_fs_upgrade_try = TRUE;
532 }
533#endif
534 }
535
536 error = mount_common(fstypename, pvp, vp, &nd.ni_cnd, uap->data, flags, 0,
537 labelstr, FALSE, ctx);
39236c6e 538
6d2010ae 539out:
39236c6e 540
6d2010ae
A
541#if CONFIG_MACF
542 if (labelstr)
543 FREE(labelstr, M_MACTEMP);
544#endif /* CONFIG_MACF */
545
39236c6e
A
546 if (vp) {
547 vnode_put(vp);
548 }
549 if (pvp) {
550 vnode_put(pvp);
551 }
552 if (need_nameidone) {
553 nameidone(&nd);
554 }
6d2010ae
A
555
556 return (error);
557}
558
559/*
560 * common mount implementation (final stage of mounting)
39037602 561
6d2010ae
A
562 * Arguments:
563 * fstypename file system type (ie it's vfs name)
564 * pvp parent of covered vnode
565 * vp covered vnode
566 * cnp component name (ie path) of covered vnode
567 * flags generic mount flags
568 * fsmountargs file system specific data
569 * labelstr optional MAC label
570 * kernelmount TRUE for mounts initiated from inside the kernel
571 * ctx caller's context
572 */
573static int
574mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
575 struct componentname *cnp, user_addr_t fsmountargs, int flags, uint32_t internal_flags,
576 char *labelstr, boolean_t kernelmount, vfs_context_t ctx)
577{
39236c6e
A
578#if !CONFIG_MACF
579#pragma unused(labelstr)
580#endif
91447636
A
581 struct vnode *devvp = NULLVP;
582 struct vnode *device_vnode = NULLVP;
2d21ac55
A
583#if CONFIG_MACF
584 struct vnode *rvp;
585#endif
1c79356b 586 struct mount *mp;
6601e61a 587 struct vfstable *vfsp = (struct vfstable *)0;
6d2010ae 588 struct proc *p = vfs_context_proc(ctx);
91447636 589 int error, flag = 0;
91447636 590 user_addr_t devpath = USER_ADDR_NULL;
91447636
A
591 int ronly = 0;
592 int mntalloc = 0;
b0d623f7 593 boolean_t vfsp_ref = FALSE;
743b1565 594 boolean_t is_rwlock_locked = FALSE;
b0d623f7
A
595 boolean_t did_rele = FALSE;
596 boolean_t have_usecount = FALSE;
9bccf70c 597
1c79356b 598 /*
6d2010ae 599 * Process an update for an existing mount
1c79356b 600 */
6d2010ae 601 if (flags & MNT_UPDATE) {
1c79356b 602 if ((vp->v_flag & VROOT) == 0) {
91447636
A
603 error = EINVAL;
604 goto out1;
1c79356b
A
605 }
606 mp = vp->v_mount;
d12e1678 607
91447636 608 /* unmount in progress return error */
b0d623f7 609 mount_lock_spin(mp);
91447636
A
610 if (mp->mnt_lflag & MNT_LUNMOUNT) {
611 mount_unlock(mp);
612 error = EBUSY;
613 goto out1;
d12e1678 614 }
91447636
A
615 mount_unlock(mp);
616 lck_rw_lock_exclusive(&mp->mnt_rwlock);
743b1565 617 is_rwlock_locked = TRUE;
1c79356b
A
618 /*
619 * We only allow the filesystem to be reloaded if it
620 * is currently mounted read-only.
621 */
6d2010ae 622 if ((flags & MNT_RELOAD) &&
1c79356b 623 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
91447636
A
624 error = ENOTSUP;
625 goto out1;
1c79356b 626 }
b7266188 627
316670eb
A
628 /*
629 * If content protection is enabled, update mounts are not
630 * allowed to turn it off.
631 */
39037602 632 if ((mp->mnt_flag & MNT_CPROTECT) &&
316670eb
A
633 ((flags & MNT_CPROTECT) == 0)) {
634 error = EINVAL;
635 goto out1;
636 }
637
39037602 638#ifdef CONFIG_IMGSRC_ACCESS
b7266188
A
639 /* Can't downgrade the backer of the root FS */
640 if ((mp->mnt_kern_flag & MNTK_BACKS_ROOT) &&
6d2010ae 641 (!vfs_isrdonly(mp)) && (flags & MNT_RDONLY)) {
b7266188
A
642 error = ENOTSUP;
643 goto out1;
644 }
645#endif /* CONFIG_IMGSRC_ACCESS */
646
1c79356b
A
647 /*
648 * Only root, or the user that did the original mount is
649 * permitted to update it.
650 */
2d21ac55
A
651 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
652 (error = suser(vfs_context_ucred(ctx), &p->p_acflag))) {
653 goto out1;
654 }
655#if CONFIG_MACF
656 error = mac_mount_check_remount(ctx, mp);
657 if (error != 0) {
91447636 658 goto out1;
1c79356b 659 }
2d21ac55 660#endif
1c79356b 661 /*
91447636
A
662 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
663 * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
1c79356b 664 */
6d2010ae
A
665 if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
666 flags |= MNT_NOSUID | MNT_NODEV;
d12e1678 667 if (mp->mnt_flag & MNT_NOEXEC)
6d2010ae 668 flags |= MNT_NOEXEC;
1c79356b 669 }
d12e1678
A
670 flag = mp->mnt_flag;
671
316670eb
A
672
673
6d2010ae 674 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
d12e1678 675
91447636 676 vfsp = mp->mnt_vtable;
1c79356b
A
677 goto update;
678 }
5ba3f43e 679
1c79356b 680 /*
91447636 681 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
1c79356b
A
682 * MNT_NOEXEC if mount point is already MNT_NOEXEC.
683 */
6d2010ae
A
684 if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
685 flags |= MNT_NOSUID | MNT_NODEV;
1c79356b 686 if (vp->v_mount->mnt_flag & MNT_NOEXEC)
6d2010ae 687 flags |= MNT_NOEXEC;
1c79356b 688 }
91447636 689
55e303ae
A
690 /* XXXAUDIT: Should we capture the type on the error path as well? */
691 AUDIT_ARG(text, fstypename);
91447636 692 mount_list_lock();
1c79356b 693 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
b0d623f7
A
694 if (!strncmp(vfsp->vfc_name, fstypename, MFSNAMELEN)) {
695 vfsp->vfc_refcount++;
696 vfsp_ref = TRUE;
1c79356b 697 break;
b0d623f7 698 }
91447636 699 mount_list_unlock();
1c79356b 700 if (vfsp == NULL) {
91447636
A
701 error = ENODEV;
702 goto out1;
1c79356b 703 }
6d2010ae
A
704
705 /*
706 * VFC_VFSLOCALARGS is not currently supported for kernel mounts
707 */
708 if (kernelmount && (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS)) {
709 error = EINVAL; /* unsupported request */
2d21ac55 710 goto out1;
6d2010ae
A
711 }
712
713 error = prepare_coveredvp(vp, ctx, cnp, fstypename, ((internal_flags & KERNEL_MOUNT_NOAUTH) != 0));
714 if (error != 0) {
91447636 715 goto out1;
1c79356b 716 }
1c79356b
A
717
718 /*
6d2010ae 719 * Allocate and initialize the filesystem (mount_t)
1c79356b 720 */
b0d623f7 721 MALLOC_ZONE(mp, struct mount *, (u_int32_t)sizeof(struct mount),
1c79356b 722 M_MOUNT, M_WAITOK);
b0d623f7 723 bzero((char *)mp, (u_int32_t)sizeof(struct mount));
91447636 724 mntalloc = 1;
0b4e3aa0
A
725
726 /* Initialize the default IO constraints */
727 mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
728 mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
91447636
A
729 mp->mnt_maxsegreadsize = mp->mnt_maxreadcnt;
730 mp->mnt_maxsegwritesize = mp->mnt_maxwritecnt;
731 mp->mnt_devblocksize = DEV_BSIZE;
2d21ac55 732 mp->mnt_alignmentmask = PAGE_MASK;
b0d623f7
A
733 mp->mnt_ioqueue_depth = MNT_DEFAULT_IOQUEUE_DEPTH;
734 mp->mnt_ioscale = 1;
2d21ac55
A
735 mp->mnt_ioflags = 0;
736 mp->mnt_realrootvp = NULLVP;
737 mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL;
91447636
A
738
739 TAILQ_INIT(&mp->mnt_vnodelist);
740 TAILQ_INIT(&mp->mnt_workerqueue);
741 TAILQ_INIT(&mp->mnt_newvnodes);
742 mount_lock_init(mp);
743 lck_rw_lock_exclusive(&mp->mnt_rwlock);
743b1565 744 is_rwlock_locked = TRUE;
1c79356b 745 mp->mnt_op = vfsp->vfc_vfsops;
91447636 746 mp->mnt_vtable = vfsp;
91447636 747 //mp->mnt_stat.f_type = vfsp->vfc_typenum;
1c79356b 748 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
fe8ab488
A
749 strlcpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN);
750 strlcpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
1c79356b 751 mp->mnt_vnodecovered = vp;
2d21ac55 752 mp->mnt_vfsstat.f_owner = kauth_cred_getuid(vfs_context_ucred(ctx));
6d2010ae
A
753 mp->mnt_throttle_mask = LOWPRI_MAX_NUM_DEV - 1;
754 mp->mnt_devbsdunit = 0;
1c79356b 755
91447636
A
756 /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
757 vfs_setowner(mp, KAUTH_UID_NONE, KAUTH_GID_NONE);
6d2010ae 758
490019cf 759#if NFSCLIENT || DEVFS || ROUTEFS
6d2010ae
A
760 if (kernelmount)
761 mp->mnt_kern_flag |= MNTK_KERNEL_MOUNT;
762 if ((internal_flags & KERNEL_MOUNT_PERMIT_UNMOUNT) != 0)
763 mp->mnt_kern_flag |= MNTK_PERMIT_UNMOUNT;
fe8ab488 764#endif /* NFSCLIENT || DEVFS */
6d2010ae 765
1c79356b 766update:
5ba3f43e 767
1c79356b
A
768 /*
769 * Set the mount level flags.
770 */
6d2010ae 771 if (flags & MNT_RDONLY)
1c79356b 772 mp->mnt_flag |= MNT_RDONLY;
6d2010ae
A
773 else if (mp->mnt_flag & MNT_RDONLY) {
774 // disallow read/write upgrades of file systems that
775 // had the TYPENAME_OVERRIDE feature set.
776 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
777 error = EPERM;
778 goto out1;
779 }
1c79356b 780 mp->mnt_kern_flag |= MNTK_WANTRDWR;
6d2010ae 781 }
0b4e3aa0
A
782 mp->mnt_flag &= ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
783 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
6d2010ae
A
784 MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
785 MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
786 MNT_QUARANTINE | MNT_CPROTECT);
813fb2f6
A
787
788#if SECURE_KERNEL
789#if !CONFIG_MNT_SUID
790 /*
5ba3f43e 791 * On release builds of iOS based platforms, always enforce NOSUID on
813fb2f6
A
792 * all mounts. We do this here because we can catch update mounts as well as
793 * non-update mounts in this case.
794 */
795 mp->mnt_flag |= (MNT_NOSUID);
796#endif
797#endif
798
6d2010ae
A
799 mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
800 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
801 MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
802 MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
803 MNT_QUARANTINE | MNT_CPROTECT);
2d21ac55
A
804
805#if CONFIG_MACF
6d2010ae 806 if (flags & MNT_MULTILABEL) {
2d21ac55
A
807 if (vfsp->vfc_vfsflags & VFC_VFSNOMACLABEL) {
808 error = EINVAL;
809 goto out1;
810 }
811 mp->mnt_flag |= MNT_MULTILABEL;
812 }
813#endif
6d2010ae
A
814 /*
815 * Process device path for local file systems if requested
816 */
39037602
A
817 if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS &&
818 !(internal_flags & KERNEL_MOUNT_SNAPSHOT)) {
6d2010ae 819 if (vfs_context_is64bit(ctx)) {
91447636 820 if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
39037602 821 goto out1;
91447636
A
822 fsmountargs += sizeof(devpath);
823 } else {
b0d623f7 824 user32_addr_t tmp;
91447636 825 if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
39037602 826 goto out1;
91447636
A
827 /* munge into LP64 addr */
828 devpath = CAST_USER_ADDR_T(tmp);
829 fsmountargs += sizeof(tmp);
830 }
831
6d2010ae 832 /* Lookup device and authorize access to it */
91447636 833 if ((devpath)) {
6d2010ae
A
834 struct nameidata nd;
835
836 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW, UIO_USERSPACE, devpath, ctx);
837 if ( (error = namei(&nd)) )
91447636
A
838 goto out1;
839
3e170ce0 840 strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
6d2010ae 841 devvp = nd.ni_vp;
91447636 842
6d2010ae 843 nameidone(&nd);
91447636
A
844
845 if (devvp->v_type != VBLK) {
846 error = ENOTBLK;
847 goto out2;
848 }
849 if (major(devvp->v_rdev) >= nblkdev) {
850 error = ENXIO;
851 goto out2;
852 }
853 /*
854 * If mount by non-root, then verify that user has necessary
855 * permissions on the device.
856 */
2d21ac55 857 if (suser(vfs_context_ucred(ctx), NULL) != 0) {
6d2010ae
A
858 mode_t accessmode = KAUTH_VNODE_READ_DATA;
859
91447636
A
860 if ((mp->mnt_flag & MNT_RDONLY) == 0)
861 accessmode |= KAUTH_VNODE_WRITE_DATA;
2d21ac55 862 if ((error = vnode_authorize(devvp, NULL, accessmode, ctx)) != 0)
91447636
A
863 goto out2;
864 }
865 }
6d2010ae
A
866 /* On first mount, preflight and open device */
867 if (devpath && ((flags & MNT_UPDATE) == 0)) {
91447636
A
868 if ( (error = vnode_ref(devvp)) )
869 goto out2;
870 /*
871 * Disallow multiple mounts of the same device.
872 * Disallow mounting of a device that is currently in use
873 * (except for root, which might share swap device for miniroot).
874 * Flush out any old buffers remaining from a previous use.
875 */
876 if ( (error = vfs_mountedon(devvp)) )
877 goto out3;
39037602 878
91447636
A
879 if (vcount(devvp) > 1 && !(vfs_flags(mp) & MNT_ROOTFS)) {
880 error = EBUSY;
881 goto out3;
882 }
2d21ac55 883 if ( (error = VNOP_FSYNC(devvp, MNT_WAIT, ctx)) ) {
91447636
A
884 error = ENOTBLK;
885 goto out3;
886 }
887 if ( (error = buf_invalidateblks(devvp, BUF_WRITE_DATA, 0, 0)) )
888 goto out3;
889
890 ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
2d21ac55
A
891#if CONFIG_MACF
892 error = mac_vnode_check_open(ctx,
893 devvp,
894 ronly ? FREAD : FREAD|FWRITE);
895 if (error)
896 goto out3;
897#endif /* MAC */
898 if ( (error = VNOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, ctx)) )
91447636
A
899 goto out3;
900
901 mp->mnt_devvp = devvp;
902 device_vnode = devvp;
b0d623f7 903
6d2010ae
A
904 } else if ((mp->mnt_flag & MNT_RDONLY) &&
905 (mp->mnt_kern_flag & MNTK_WANTRDWR) &&
906 (device_vnode = mp->mnt_devvp)) {
907 dev_t dev;
908 int maj;
909 /*
910 * If upgrade to read-write by non-root, then verify
911 * that user has necessary permissions on the device.
912 */
913 vnode_getalways(device_vnode);
b0d623f7 914
6d2010ae 915 if (suser(vfs_context_ucred(ctx), NULL) &&
39037602 916 (error = vnode_authorize(device_vnode, NULL,
6d2010ae
A
917 KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA,
918 ctx)) != 0) {
919 vnode_put(device_vnode);
920 goto out2;
921 }
b0d623f7 922
6d2010ae
A
923 /* Tell the device that we're upgrading */
924 dev = (dev_t)device_vnode->v_rdev;
925 maj = major(dev);
b0d623f7 926
6d2010ae
A
927 if ((u_int)maj >= (u_int)nblkdev)
928 panic("Volume mounted on a device with invalid major number.");
b0d623f7 929
6d2010ae
A
930 error = bdevsw[maj].d_open(dev, FREAD | FWRITE, S_IFBLK, p);
931 vnode_put(device_vnode);
91447636 932 device_vnode = NULLVP;
6d2010ae
A
933 if (error != 0) {
934 goto out2;
935 }
91447636
A
936 }
937 }
2d21ac55 938#if CONFIG_MACF
6d2010ae 939 if ((flags & MNT_UPDATE) == 0) {
2d21ac55
A
940 mac_mount_label_init(mp);
941 mac_mount_label_associate(ctx, mp);
942 }
6d2010ae
A
943 if (labelstr) {
944 if ((flags & MNT_UPDATE) != 0) {
945 error = mac_mount_check_label_update(ctx, mp);
2d21ac55
A
946 if (error != 0)
947 goto out3;
948 }
2d21ac55
A
949 }
950#endif
1c79356b
A
951 /*
952 * Mount the filesystem.
953 */
39037602
A
954 if (internal_flags & KERNEL_MOUNT_SNAPSHOT) {
955 error = VFS_IOCTL(mp, VFSIOC_MOUNT_SNAPSHOT,
956 (caddr_t)fsmountargs, 0, ctx);
957 } else {
958 error = VFS_MOUNT(mp, device_vnode, fsmountargs, ctx);
959 }
d12e1678 960
6d2010ae 961 if (flags & MNT_UPDATE) {
1c79356b
A
962 if (mp->mnt_kern_flag & MNTK_WANTRDWR)
963 mp->mnt_flag &= ~MNT_RDONLY;
964 mp->mnt_flag &=~
965 (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
966 mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
967 if (error)
6d2010ae 968 mp->mnt_flag = flag; /* restore flag value */
91447636
A
969 vfs_event_signal(NULL, VQ_UPDATE, (intptr_t)NULL);
970 lck_rw_done(&mp->mnt_rwlock);
743b1565 971 is_rwlock_locked = FALSE;
9bccf70c 972 if (!error)
2d21ac55 973 enablequotas(mp, ctx);
6d2010ae 974 goto exit;
1c79356b 975 }
6d2010ae 976
1c79356b
A
977 /*
978 * Put the new filesystem on the mount list after root.
979 */
6601e61a 980 if (error == 0) {
2d21ac55
A
981 struct vfs_attr vfsattr;
982#if CONFIG_MACF
983 if (vfs_flags(mp) & MNT_MULTILABEL) {
984 error = VFS_ROOT(mp, &rvp, ctx);
985 if (error) {
986 printf("%s() VFS_ROOT returned %d\n", __func__, error);
987 goto out3;
988 }
2d21ac55 989 error = vnode_label(mp, NULL, rvp, NULL, 0, ctx);
b0d623f7
A
990 /*
991 * drop reference provided by VFS_ROOT
992 */
993 vnode_put(rvp);
994
2d21ac55
A
995 if (error)
996 goto out3;
997 }
998#endif /* MAC */
999
1000 vnode_lock_spin(vp);
1001 CLR(vp->v_flag, VMOUNT);
91447636
A
1002 vp->v_mountedhere = mp;
1003 vnode_unlock(vp);
1004
2d21ac55
A
1005 /*
1006 * taking the name_cache_lock exclusively will
1007 * insure that everyone is out of the fast path who
1008 * might be trying to use a now stale copy of
1009 * vp->v_mountedhere->mnt_realrootvp
1010 * bumping mount_generation causes the cached values
1011 * to be invalidated
1012 */
1013 name_cache_lock();
1014 mount_generation++;
1015 name_cache_unlock();
1016
b0d623f7
A
1017 error = vnode_ref(vp);
1018 if (error != 0) {
1019 goto out4;
1020 }
1021
1022 have_usecount = TRUE;
91447636 1023
2d21ac55 1024 error = checkdirs(vp, ctx);
6601e61a
A
1025 if (error != 0) {
1026 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1027 goto out4;
1028 }
39037602
A
1029 /*
1030 * there is no cleanup code here so I have made it void
91447636
A
1031 * we need to revisit this
1032 */
2d21ac55 1033 (void)VFS_START(mp, 0, ctx);
1c79356b 1034
6d2010ae
A
1035 if (mount_list_add(mp) != 0) {
1036 /*
1037 * The system is shutting down trying to umount
1038 * everything, so fail with a plausible errno.
1039 */
1040 error = EBUSY;
b0d623f7
A
1041 goto out4;
1042 }
6601e61a
A
1043 lck_rw_done(&mp->mnt_rwlock);
1044 is_rwlock_locked = FALSE;
1045
2d21ac55
A
1046 /* Check if this mounted file system supports EAs or named streams. */
1047 /* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */
1048 VFSATTR_INIT(&vfsattr);
1049 VFSATTR_WANTED(&vfsattr, f_capabilities);
1050 if (strncmp(mp->mnt_vfsstat.f_fstypename, "webdav", sizeof("webdav")) != 0 &&
39037602 1051 vfs_getattr(mp, &vfsattr, ctx) == 0 &&
2d21ac55
A
1052 VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) {
1053 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR) &&
1054 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR)) {
1055 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
1056 }
1057#if NAMEDSTREAMS
1058 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS) &&
1059 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS)) {
1060 mp->mnt_kern_flag |= MNTK_NAMED_STREAMS;
1061 }
1062#endif
1063 /* Check if this file system supports path from id lookups. */
1064 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID) &&
1065 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID)) {
1066 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
1067 } else if (mp->mnt_flag & MNT_DOVOLFS) {
1068 /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */
1069 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
1070 }
39037602
A
1071
1072 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_DIR_HARDLINKS) &&
1073 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_DIR_HARDLINKS)) {
1074 mp->mnt_kern_flag |= MNTK_DIR_HARDLINKS;
1075 }
2d21ac55
A
1076 }
1077 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSNATIVEXATTR) {
1078 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
1079 }
1080 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSPREFLIGHT) {
1081 mp->mnt_kern_flag |= MNTK_UNMOUNT_PREFLIGHT;
1082 }
1c79356b 1083 /* increment the operations count */
b0d623f7 1084 OSAddAtomic(1, &vfs_nummntops);
2d21ac55 1085 enablequotas(mp, ctx);
91447636
A
1086
1087 if (device_vnode) {
1088 device_vnode->v_specflags |= SI_MOUNTEDON;
1089
1090 /*
1091 * cache the IO attributes for the underlying physical media...
1092 * an error return indicates the underlying driver doesn't
1093 * support all the queries necessary... however, reasonable
1094 * defaults will have been set, so no reason to bail or care
1095 */
1096 vfs_init_io_attributes(device_vnode, mp);
39037602 1097 }
6601e61a
A
1098
1099 /* Now that mount is setup, notify the listeners */
6d2010ae 1100 vfs_notify_mount(pvp);
3e170ce0
A
1101 IOBSDMountChange(mp, kIOMountChangeMount);
1102
1c79356b 1103 } else {
6d2010ae
A
1104 /* If we fail a fresh mount, there should be no vnodes left hooked into the mountpoint. */
1105 if (mp->mnt_vnodelist.tqh_first != NULL) {
39037602 1106 panic("mount_common(): mount of %s filesystem failed with %d, but vnode list is not empty.",
6d2010ae
A
1107 mp->mnt_vtable->vfc_name, error);
1108 }
1109
2d21ac55 1110 vnode_lock_spin(vp);
1c79356b 1111 CLR(vp->v_flag, VMOUNT);
6601e61a 1112 vnode_unlock(vp);
91447636
A
1113 mount_list_lock();
1114 mp->mnt_vtable->vfc_refcount--;
1115 mount_list_unlock();
55e303ae 1116
91447636 1117 if (device_vnode ) {
91447636 1118 vnode_rele(device_vnode);
b0d623f7 1119 VNOP_CLOSE(device_vnode, ronly ? FREAD : FREAD|FWRITE, ctx);
91447636
A
1120 }
1121 lck_rw_done(&mp->mnt_rwlock);
743b1565 1122 is_rwlock_locked = FALSE;
39037602 1123
6d2010ae
A
1124 /*
1125 * if we get here, we have a mount structure that needs to be freed,
1126 * but since the coveredvp hasn't yet been updated to point at it,
1127 * no need to worry about other threads holding a crossref on this mp
1128 * so it's ok to just free it
1129 */
91447636 1130 mount_lock_destroy(mp);
2d21ac55
A
1131#if CONFIG_MACF
1132 mac_mount_label_destroy(mp);
1133#endif
55e303ae 1134 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1c79356b 1135 }
6d2010ae 1136exit:
91447636 1137 /*
6d2010ae 1138 * drop I/O count on the device vp if there was one
91447636
A
1139 */
1140 if (devpath && devvp)
1141 vnode_put(devvp);
b0d623f7 1142
91447636 1143 return(error);
b0d623f7 1144
6d2010ae 1145/* Error condition exits */
6601e61a 1146out4:
2d21ac55 1147 (void)VFS_UNMOUNT(mp, MNT_FORCE, ctx);
39037602
A
1148
1149 /*
6d2010ae
A
1150 * If the mount has been placed on the covered vp,
1151 * it may have been discovered by now, so we have
1152 * to treat this just like an unmount
1153 */
1154 mount_lock_spin(mp);
1155 mp->mnt_lflag |= MNT_LDEAD;
1156 mount_unlock(mp);
1157
6601e61a 1158 if (device_vnode != NULLVP) {
b0d623f7 1159 vnode_rele(device_vnode);
2d21ac55
A
1160 VNOP_CLOSE(device_vnode, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
1161 ctx);
b0d623f7 1162 did_rele = TRUE;
6601e61a 1163 }
6d2010ae 1164
2d21ac55 1165 vnode_lock_spin(vp);
6d2010ae
A
1166
1167 mp->mnt_crossref++;
6601e61a 1168 vp->v_mountedhere = (mount_t) 0;
6d2010ae 1169
6601e61a 1170 vnode_unlock(vp);
6d2010ae 1171
b0d623f7
A
1172 if (have_usecount) {
1173 vnode_rele(vp);
1174 }
91447636 1175out3:
6d2010ae 1176 if (devpath && ((flags & MNT_UPDATE) == 0) && (!did_rele))
2d21ac55 1177 vnode_rele(devvp);
91447636
A
1178out2:
1179 if (devpath && devvp)
1180 vnode_put(devvp);
1181out1:
743b1565
A
1182 /* Release mnt_rwlock only when it was taken */
1183 if (is_rwlock_locked == TRUE) {
1184 lck_rw_done(&mp->mnt_rwlock);
1185 }
39037602 1186
6601e61a 1187 if (mntalloc) {
6d2010ae
A
1188 if (mp->mnt_crossref)
1189 mount_dropcrossref(mp, vp, 0);
1190 else {
1191 mount_lock_destroy(mp);
2d21ac55 1192#if CONFIG_MACF
6d2010ae 1193 mac_mount_label_destroy(mp);
2d21ac55 1194#endif
6d2010ae
A
1195 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1196 }
b0d623f7 1197 }
b0d623f7 1198 if (vfsp_ref) {
6601e61a
A
1199 mount_list_lock();
1200 vfsp->vfc_refcount--;
1201 mount_list_unlock();
6601e61a 1202 }
91447636
A
1203
1204 return(error);
1c79356b
A
1205}
1206
39037602 1207/*
b7266188
A
1208 * Flush in-core data, check for competing mount attempts,
1209 * and set VMOUNT
1210 */
6d2010ae
A
1211int
1212prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth)
b7266188 1213{
39236c6e
A
1214#if !CONFIG_MACF
1215#pragma unused(cnp,fsname)
1216#endif
b7266188
A
1217 struct vnode_attr va;
1218 int error;
1219
6d2010ae
A
1220 if (!skip_auth) {
1221 /*
1222 * If the user is not root, ensure that they own the directory
1223 * onto which we are attempting to mount.
1224 */
1225 VATTR_INIT(&va);
1226 VATTR_WANTED(&va, va_uid);
1227 if ((error = vnode_getattr(vp, &va, ctx)) ||
1228 (va.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
39037602 1229 (!vfs_context_issuser(ctx)))) {
6d2010ae
A
1230 error = EPERM;
1231 goto out;
1232 }
b7266188
A
1233 }
1234
1235 if ( (error = VNOP_FSYNC(vp, MNT_WAIT, ctx)) )
1236 goto out;
1237
1238 if ( (error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0)) )
1239 goto out;
1240
1241 if (vp->v_type != VDIR) {
1242 error = ENOTDIR;
1243 goto out;
1244 }
1245
1246 if (ISSET(vp->v_flag, VMOUNT) && (vp->v_mountedhere != NULL)) {
1247 error = EBUSY;
1248 goto out;
1249 }
1250
1251#if CONFIG_MACF
1252 error = mac_mount_check_mount(ctx, vp,
1253 cnp, fsname);
1254 if (error != 0)
1255 goto out;
1256#endif
1257
1258 vnode_lock_spin(vp);
1259 SET(vp->v_flag, VMOUNT);
1260 vnode_unlock(vp);
1261
1262out:
1263 return error;
1264}
1265
6d2010ae
A
1266#if CONFIG_IMGSRC_ACCESS
1267
1268#if DEBUG
1269#define IMGSRC_DEBUG(args...) printf(args)
1270#else
1271#define IMGSRC_DEBUG(args...) do { } while(0)
39037602 1272#endif
6d2010ae 1273
b7266188
A
1274static int
1275authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx)
1276{
1277 struct nameidata nd;
6d2010ae 1278 vnode_t vp, realdevvp;
b7266188
A
1279 mode_t accessmode;
1280 int error;
1281
6d2010ae
A
1282 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW, UIO_USERSPACE, devpath, ctx);
1283 if ( (error = namei(&nd)) ) {
1284 IMGSRC_DEBUG("namei() failed with %d\n", error);
b7266188 1285 return error;
6d2010ae 1286 }
b7266188 1287
b7266188 1288 vp = nd.ni_vp;
b7266188 1289
6d2010ae
A
1290 if (!vnode_isblk(vp)) {
1291 IMGSRC_DEBUG("Not block device.\n");
b7266188
A
1292 error = ENOTBLK;
1293 goto out;
1294 }
6d2010ae
A
1295
1296 realdevvp = mp->mnt_devvp;
1297 if (realdevvp == NULLVP) {
1298 IMGSRC_DEBUG("No device backs the mount.\n");
b7266188
A
1299 error = ENXIO;
1300 goto out;
1301 }
6d2010ae
A
1302
1303 error = vnode_getwithref(realdevvp);
1304 if (error != 0) {
1305 IMGSRC_DEBUG("Coudn't get iocount on device.\n");
1306 goto out;
1307 }
1308
1309 if (vnode_specrdev(vp) != vnode_specrdev(realdevvp)) {
1310 IMGSRC_DEBUG("Wrong dev_t.\n");
1311 error = ENXIO;
1312 goto out1;
1313 }
1314
1315 strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
1316
b7266188
A
1317 /*
1318 * If mount by non-root, then verify that user has necessary
1319 * permissions on the device.
1320 */
1321 if (!vfs_context_issuser(ctx)) {
1322 accessmode = KAUTH_VNODE_READ_DATA;
1323 if ((mp->mnt_flag & MNT_RDONLY) == 0)
1324 accessmode |= KAUTH_VNODE_WRITE_DATA;
6d2010ae
A
1325 if ((error = vnode_authorize(vp, NULL, accessmode, ctx)) != 0) {
1326 IMGSRC_DEBUG("Access denied.\n");
1327 goto out1;
1328 }
b7266188
A
1329 }
1330
1331 *devvpp = vp;
6d2010ae
A
1332
1333out1:
1334 vnode_put(realdevvp);
b7266188 1335out:
6d2010ae 1336 nameidone(&nd);
b7266188
A
1337 if (error) {
1338 vnode_put(vp);
1339 }
1340
1341 return error;
1342}
1343
1344/*
1345 * Clear VMOUNT, set v_mountedhere, and mnt_vnodecovered, ref the vnode,
1346 * and call checkdirs()
1347 */
1348static int
1349place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx)
1350{
1351 int error;
1352
1353 mp->mnt_vnodecovered = vp; /* XXX This is normally only set at init-time ... */
1354
1355 vnode_lock_spin(vp);
1356 CLR(vp->v_flag, VMOUNT);
1357 vp->v_mountedhere = mp;
1358 vnode_unlock(vp);
1359
1360 /*
1361 * taking the name_cache_lock exclusively will
1362 * insure that everyone is out of the fast path who
1363 * might be trying to use a now stale copy of
1364 * vp->v_mountedhere->mnt_realrootvp
1365 * bumping mount_generation causes the cached values
1366 * to be invalidated
1367 */
1368 name_cache_lock();
1369 mount_generation++;
1370 name_cache_unlock();
1371
1372 error = vnode_ref(vp);
1373 if (error != 0) {
1374 goto out;
1375 }
1376
1377 error = checkdirs(vp, ctx);
1378 if (error != 0) {
1379 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1380 vnode_rele(vp);
1381 goto out;
1382 }
1383
1384out:
1385 if (error != 0) {
1386 mp->mnt_vnodecovered = NULLVP;
1387 }
1388 return error;
1389}
1390
1391static void
1392undo_place_on_covered_vp(mount_t mp, vnode_t vp)
1393{
1394 vnode_rele(vp);
1395 vnode_lock_spin(vp);
1396 vp->v_mountedhere = (mount_t)NULL;
1397 vnode_unlock(vp);
1398
1399 mp->mnt_vnodecovered = NULLVP;
1400}
1401
1402static int
1403mount_begin_update(mount_t mp, vfs_context_t ctx, int flags)
1404{
1405 int error;
1406
1407 /* unmount in progress return error */
1408 mount_lock_spin(mp);
1409 if (mp->mnt_lflag & MNT_LUNMOUNT) {
1410 mount_unlock(mp);
1411 return EBUSY;
1412 }
1413 mount_unlock(mp);
1414 lck_rw_lock_exclusive(&mp->mnt_rwlock);
1415
1416 /*
1417 * We only allow the filesystem to be reloaded if it
1418 * is currently mounted read-only.
1419 */
1420 if ((flags & MNT_RELOAD) &&
1421 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
1422 error = ENOTSUP;
1423 goto out;
1424 }
1425
1426 /*
1427 * Only root, or the user that did the original mount is
1428 * permitted to update it.
1429 */
1430 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
39037602 1431 (!vfs_context_issuser(ctx))) {
b7266188
A
1432 error = EPERM;
1433 goto out;
1434 }
1435#if CONFIG_MACF
1436 error = mac_mount_check_remount(ctx, mp);
1437 if (error != 0) {
1438 goto out;
1439 }
1440#endif
1441
1442out:
1443 if (error) {
1444 lck_rw_done(&mp->mnt_rwlock);
1445 }
1446
1447 return error;
1448}
1449
39037602 1450static void
b7266188
A
1451mount_end_update(mount_t mp)
1452{
1453 lck_rw_done(&mp->mnt_rwlock);
1454}
1455
1456static int
6d2010ae
A
1457get_imgsrc_rootvnode(uint32_t height, vnode_t *rvpp)
1458{
1459 vnode_t vp;
1460
1461 if (height >= MAX_IMAGEBOOT_NESTING) {
1462 return EINVAL;
1463 }
1464
1465 vp = imgsrc_rootvnodes[height];
1466 if ((vp != NULLVP) && (vnode_get(vp) == 0)) {
1467 *rvpp = vp;
1468 return 0;
1469 } else {
1470 return ENOENT;
1471 }
1472}
1473
1474static int
39037602
A
1475relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp,
1476 const char *fsname, vfs_context_t ctx,
6d2010ae 1477 boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index)
b7266188
A
1478{
1479 int error;
1480 mount_t mp;
1481 boolean_t placed = FALSE;
6d2010ae 1482 vnode_t devvp = NULLVP;
b7266188
A
1483 struct vfstable *vfsp;
1484 user_addr_t devpath;
1485 char *old_mntonname;
6d2010ae
A
1486 vnode_t rvp;
1487 uint32_t height;
1488 uint32_t flags;
b7266188
A
1489
1490 /* If we didn't imageboot, nothing to move */
6d2010ae 1491 if (imgsrc_rootvnodes[0] == NULLVP) {
b7266188
A
1492 return EINVAL;
1493 }
1494
1495 /* Only root can do this */
1496 if (!vfs_context_issuser(ctx)) {
1497 return EPERM;
1498 }
1499
6d2010ae
A
1500 IMGSRC_DEBUG("looking for root vnode.\n");
1501
1502 /*
1503 * Get root vnode of filesystem we're moving.
1504 */
1505 if (by_index) {
1506 if (is64bit) {
1507 struct user64_mnt_imgsrc_args mia64;
1508 error = copyin(fsmountargs, &mia64, sizeof(mia64));
1509 if (error != 0) {
1510 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1511 return error;
1512 }
1513
1514 height = mia64.mi_height;
1515 flags = mia64.mi_flags;
1516 devpath = mia64.mi_devpath;
1517 } else {
1518 struct user32_mnt_imgsrc_args mia32;
1519 error = copyin(fsmountargs, &mia32, sizeof(mia32));
1520 if (error != 0) {
1521 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1522 return error;
1523 }
1524
1525 height = mia32.mi_height;
1526 flags = mia32.mi_flags;
1527 devpath = mia32.mi_devpath;
1528 }
1529 } else {
1530 /*
1531 * For binary compatibility--assumes one level of nesting.
1532 */
1533 if (is64bit) {
1534 if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
1535 return error;
1536 } else {
1537 user32_addr_t tmp;
1538 if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
1539 return error;
1540
1541 /* munge into LP64 addr */
1542 devpath = CAST_USER_ADDR_T(tmp);
1543 }
1544
1545 height = 0;
1546 flags = 0;
1547 }
1548
1549 if (flags != 0) {
1550 IMGSRC_DEBUG("%s: Got nonzero flags.\n", __FUNCTION__);
1551 return EINVAL;
1552 }
1553
1554 error = get_imgsrc_rootvnode(height, &rvp);
b7266188 1555 if (error != 0) {
6d2010ae 1556 IMGSRC_DEBUG("getting root vnode failed with %d\n", error);
b7266188
A
1557 return error;
1558 }
1559
6d2010ae
A
1560 IMGSRC_DEBUG("got root vnode.\n");
1561
b7266188
A
1562 MALLOC(old_mntonname, char*, MAXPATHLEN, M_TEMP, M_WAITOK);
1563
1564 /* Can only move once */
6d2010ae 1565 mp = vnode_mount(rvp);
b7266188 1566 if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
6d2010ae 1567 IMGSRC_DEBUG("Already moved.\n");
b7266188
A
1568 error = EBUSY;
1569 goto out0;
1570 }
1571
6d2010ae
A
1572 IMGSRC_DEBUG("Starting updated.\n");
1573
b7266188
A
1574 /* Get exclusive rwlock on mount, authorize update on mp */
1575 error = mount_begin_update(mp , ctx, 0);
1576 if (error != 0) {
6d2010ae 1577 IMGSRC_DEBUG("Starting updated failed with %d\n", error);
b7266188
A
1578 goto out0;
1579 }
1580
39037602 1581 /*
b7266188
A
1582 * It can only be moved once. Flag is set under the rwlock,
1583 * so we're now safe to proceed.
1584 */
1585 if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
6d2010ae 1586 IMGSRC_DEBUG("Already moved [2]\n");
b7266188
A
1587 goto out1;
1588 }
39037602
A
1589
1590
6d2010ae 1591 IMGSRC_DEBUG("Preparing coveredvp.\n");
b7266188
A
1592
1593 /* Mark covered vnode as mount in progress, authorize placing mount on top */
6d2010ae 1594 error = prepare_coveredvp(vp, ctx, cnp, fsname, FALSE);
b7266188 1595 if (error != 0) {
6d2010ae 1596 IMGSRC_DEBUG("Preparing coveredvp failed with %d.\n", error);
b7266188
A
1597 goto out1;
1598 }
39037602 1599
6d2010ae
A
1600 IMGSRC_DEBUG("Covered vp OK.\n");
1601
b7266188
A
1602 /* Sanity check the name caller has provided */
1603 vfsp = mp->mnt_vtable;
1604 if (strncmp(vfsp->vfc_name, fsname, MFSNAMELEN) != 0) {
6d2010ae 1605 IMGSRC_DEBUG("Wrong fs name.\n");
b7266188
A
1606 error = EINVAL;
1607 goto out2;
1608 }
1609
1610 /* Check the device vnode and update mount-from name, for local filesystems */
1611 if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
6d2010ae 1612 IMGSRC_DEBUG("Local, doing device validation.\n");
b7266188
A
1613
1614 if (devpath != USER_ADDR_NULL) {
1615 error = authorize_devpath_and_update_mntfromname(mp, devpath, &devvp, ctx);
1616 if (error) {
6d2010ae 1617 IMGSRC_DEBUG("authorize_devpath_and_update_mntfromname() failed.\n");
b7266188
A
1618 goto out2;
1619 }
1620
1621 vnode_put(devvp);
1622 }
1623 }
1624
39037602 1625 /*
b7266188 1626 * Place mp on top of vnode, ref the vnode, call checkdirs(),
39037602 1627 * and increment the name cache's mount generation
b7266188 1628 */
6d2010ae
A
1629
1630 IMGSRC_DEBUG("About to call place_mount_and_checkdirs().\n");
b7266188
A
1631 error = place_mount_and_checkdirs(mp, vp, ctx);
1632 if (error != 0) {
1633 goto out2;
1634 }
1635
1636 placed = TRUE;
1637
3e170ce0
A
1638 strlcpy(old_mntonname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN);
1639 strlcpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
b7266188
A
1640
1641 /* Forbid future moves */
1642 mount_lock(mp);
1643 mp->mnt_kern_flag |= MNTK_HAS_MOVED;
1644 mount_unlock(mp);
1645
1646 /* Finally, add to mount list, completely ready to go */
6d2010ae
A
1647 if (mount_list_add(mp) != 0) {
1648 /*
1649 * The system is shutting down trying to umount
1650 * everything, so fail with a plausible errno.
1651 */
1652 error = EBUSY;
b7266188
A
1653 goto out3;
1654 }
1655
1656 mount_end_update(mp);
6d2010ae 1657 vnode_put(rvp);
b7266188
A
1658 FREE(old_mntonname, M_TEMP);
1659
6d2010ae
A
1660 vfs_notify_mount(pvp);
1661
b7266188
A
1662 return 0;
1663out3:
3e170ce0 1664 strlcpy(mp->mnt_vfsstat.f_mntonname, old_mntonname, MAXPATHLEN);
b7266188
A
1665
1666 mount_lock(mp);
1667 mp->mnt_kern_flag &= ~(MNTK_HAS_MOVED);
1668 mount_unlock(mp);
1669
1670out2:
39037602 1671 /*
b7266188 1672 * Placing the mp on the vnode clears VMOUNT,
39037602 1673 * so cleanup is different after that point
b7266188
A
1674 */
1675 if (placed) {
1676 /* Rele the vp, clear VMOUNT and v_mountedhere */
1677 undo_place_on_covered_vp(mp, vp);
1678 } else {
1679 vnode_lock_spin(vp);
1680 CLR(vp->v_flag, VMOUNT);
1681 vnode_unlock(vp);
1682 }
1683out1:
1684 mount_end_update(mp);
1685
1686out0:
6d2010ae 1687 vnode_put(rvp);
b7266188
A
1688 FREE(old_mntonname, M_TEMP);
1689 return error;
1690}
1691
1692#endif /* CONFIG_IMGSRC_ACCESS */
1693
91447636 1694void
2d21ac55 1695enablequotas(struct mount *mp, vfs_context_t ctx)
9bccf70c 1696{
9bccf70c
A
1697 struct nameidata qnd;
1698 int type;
1699 char qfpath[MAXPATHLEN];
91447636
A
1700 const char *qfname = QUOTAFILENAME;
1701 const char *qfopsname = QUOTAOPSNAME;
1702 const char *qfextension[] = INITQFNAMES;
9bccf70c 1703
2d21ac55 1704 /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
b0d623f7
A
1705 if (strncmp(mp->mnt_vfsstat.f_fstypename, "hfs", sizeof("hfs")) != 0 ) {
1706 return;
1707 }
39037602 1708 /*
9bccf70c
A
1709 * Enable filesystem disk quotas if necessary.
1710 * We ignore errors as this should not interfere with final mount
1711 */
1712 for (type=0; type < MAXQUOTAS; type++) {
2d21ac55 1713 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfopsname, qfextension[type]);
6d2010ae
A
1714 NDINIT(&qnd, LOOKUP, OP_MOUNT, FOLLOW, UIO_SYSSPACE,
1715 CAST_USER_ADDR_T(qfpath), ctx);
91447636
A
1716 if (namei(&qnd) != 0)
1717 continue; /* option file to trigger quotas is not present */
1718 vnode_put(qnd.ni_vp);
1719 nameidone(&qnd);
2d21ac55 1720 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfname, qfextension[type]);
91447636 1721
2d21ac55 1722 (void) VFS_QUOTACTL(mp, QCMD(Q_QUOTAON, type), 0, qfpath, ctx);
9bccf70c
A
1723 }
1724 return;
1725}
1726
2d21ac55
A
1727
1728static int
39037602 1729checkdirs_callback(proc_t p, void * arg)
2d21ac55
A
1730{
1731 struct cdirargs * cdrp = (struct cdirargs * )arg;
1732 vnode_t olddp = cdrp->olddp;
1733 vnode_t newdp = cdrp->newdp;
1734 struct filedesc *fdp;
1735 vnode_t tvp;
1736 vnode_t fdp_cvp;
1737 vnode_t fdp_rvp;
1738 int cdir_changed = 0;
1739 int rdir_changed = 0;
1740
1741 /*
1742 * XXX Also needs to iterate each thread in the process to see if it
1743 * XXX is using a per-thread current working directory, and, if so,
1744 * XXX update that as well.
1745 */
1746
1747 proc_fdlock(p);
1748 fdp = p->p_fd;
1749 if (fdp == (struct filedesc *)0) {
1750 proc_fdunlock(p);
1751 return(PROC_RETURNED);
1752 }
1753 fdp_cvp = fdp->fd_cdir;
1754 fdp_rvp = fdp->fd_rdir;
1755 proc_fdunlock(p);
1756
1757 if (fdp_cvp == olddp) {
1758 vnode_ref(newdp);
1759 tvp = fdp->fd_cdir;
1760 fdp_cvp = newdp;
1761 cdir_changed = 1;
1762 vnode_rele(tvp);
1763 }
1764 if (fdp_rvp == olddp) {
1765 vnode_ref(newdp);
1766 tvp = fdp->fd_rdir;
1767 fdp_rvp = newdp;
1768 rdir_changed = 1;
1769 vnode_rele(tvp);
1770 }
1771 if (cdir_changed || rdir_changed) {
1772 proc_fdlock(p);
1773 fdp->fd_cdir = fdp_cvp;
1774 fdp->fd_rdir = fdp_rvp;
1775 proc_fdunlock(p);
1776 }
1777 return(PROC_RETURNED);
1778}
1779
1780
1781
1c79356b
A
1782/*
1783 * Scan all active processes to see if any of them have a current
1784 * or root directory onto which the new filesystem has just been
1785 * mounted. If so, replace them with the new mount point.
1786 */
6601e61a 1787static int
2d21ac55 1788checkdirs(vnode_t olddp, vfs_context_t ctx)
1c79356b 1789{
2d21ac55
A
1790 vnode_t newdp;
1791 vnode_t tvp;
6601e61a 1792 int err;
2d21ac55 1793 struct cdirargs cdr;
1c79356b
A
1794
1795 if (olddp->v_usecount == 1)
6601e61a 1796 return(0);
2d21ac55 1797 err = VFS_ROOT(olddp->v_mountedhere, &newdp, ctx);
2d21ac55
A
1798
1799 if (err != 0) {
6601e61a 1800#if DIAGNOSTIC
2d21ac55 1801 panic("mount: lost mount: error %d", err);
6601e61a
A
1802#endif
1803 return(err);
1804 }
91447636 1805
2d21ac55
A
1806 cdr.olddp = olddp;
1807 cdr.newdp = newdp;
1808 /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
1809 proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS, checkdirs_callback, (void *)&cdr, NULL, NULL);
91447636 1810
1c79356b 1811 if (rootvnode == olddp) {
91447636 1812 vnode_ref(newdp);
fa4905b1 1813 tvp = rootvnode;
1c79356b 1814 rootvnode = newdp;
91447636 1815 vnode_rele(tvp);
1c79356b 1816 }
91447636
A
1817
1818 vnode_put(newdp);
6601e61a 1819 return(0);
1c79356b
A
1820}
1821
1822/*
1823 * Unmount a file system.
1824 *
1825 * Note: unmount takes a path to the vnode mounted on as argument,
1826 * not special file (as before).
1827 */
1c79356b
A
1828/* ARGSUSED */
1829int
b0d623f7 1830unmount(__unused proc_t p, struct unmount_args *uap, __unused int32_t *retval)
1c79356b 1831{
2d21ac55 1832 vnode_t vp;
1c79356b
A
1833 struct mount *mp;
1834 int error;
1835 struct nameidata nd;
2d21ac55 1836 vfs_context_t ctx = vfs_context_current();
91447636 1837
39037602 1838 NDINIT(&nd, LOOKUP, OP_UNMOUNT, FOLLOW | AUDITVNPATH1,
2d21ac55 1839 UIO_USERSPACE, uap->path, ctx);
55e303ae
A
1840 error = namei(&nd);
1841 if (error)
1c79356b
A
1842 return (error);
1843 vp = nd.ni_vp;
1844 mp = vp->v_mount;
91447636 1845 nameidone(&nd);
1c79356b 1846
2d21ac55
A
1847#if CONFIG_MACF
1848 error = mac_mount_check_umount(ctx, mp);
1849 if (error != 0) {
1850 vnode_put(vp);
1851 return (error);
1852 }
1853#endif
55e303ae
A
1854 /*
1855 * Must be the root of the filesystem
1856 */
1857 if ((vp->v_flag & VROOT) == 0) {
91447636 1858 vnode_put(vp);
55e303ae
A
1859 return (EINVAL);
1860 }
6601e61a 1861 mount_ref(mp, 0);
91447636 1862 vnode_put(vp);
6601e61a 1863 /* safedounmount consumes the mount ref */
2d21ac55
A
1864 return (safedounmount(mp, uap->flags, ctx));
1865}
1866
1867int
39037602 1868vfs_unmountbyfsid(fsid_t *fsid, int flags, vfs_context_t ctx)
2d21ac55
A
1869{
1870 mount_t mp;
1871
1872 mp = mount_list_lookupby_fsid(fsid, 0, 1);
1873 if (mp == (mount_t)0) {
1874 return(ENOENT);
1875 }
1876 mount_ref(mp, 0);
1877 mount_iterdrop(mp);
1878 /* safedounmount consumes the mount ref */
1879 return(safedounmount(mp, flags, ctx));
55e303ae
A
1880}
1881
2d21ac55 1882
55e303ae 1883/*
6601e61a 1884 * The mount struct comes with a mount ref which will be consumed.
55e303ae
A
1885 * Do the actual file system unmount, prevent some common foot shooting.
1886 */
1887int
2d21ac55 1888safedounmount(struct mount *mp, int flags, vfs_context_t ctx)
55e303ae
A
1889{
1890 int error;
2d21ac55 1891 proc_t p = vfs_context_proc(ctx);
55e303ae 1892
316670eb
A
1893 /*
1894 * If the file system is not responding and MNT_NOBLOCK
1895 * is set and not a forced unmount then return EBUSY.
1896 */
1897 if ((mp->mnt_kern_flag & MNT_LNOTRESP) &&
1898 (flags & MNT_NOBLOCK) && ((flags & MNT_FORCE) == 0)) {
1899 error = EBUSY;
1900 goto out;
1901 }
1902
1c79356b 1903 /*
39037602 1904 * Skip authorization if the mount is tagged as permissive and
6d2010ae 1905 * this is not a forced-unmount attempt.
1c79356b 1906 */
6d2010ae
A
1907 if (!(((mp->mnt_kern_flag & MNTK_PERMIT_UNMOUNT) != 0) && ((flags & MNT_FORCE) == 0))) {
1908 /*
1909 * Only root, or the user that did the original mount is
1910 * permitted to unmount this filesystem.
1911 */
1912 if ((mp->mnt_vfsstat.f_owner != kauth_cred_getuid(kauth_cred_get())) &&
1913 (error = suser(kauth_cred_get(), &p->p_acflag)))
1914 goto out;
1915 }
1c79356b
A
1916 /*
1917 * Don't allow unmounting the root file system.
1918 */
6601e61a 1919 if (mp->mnt_flag & MNT_ROOTFS) {
2d21ac55 1920 error = EBUSY; /* the root is always busy */
6601e61a
A
1921 goto out;
1922 }
1c79356b 1923
b7266188
A
1924#ifdef CONFIG_IMGSRC_ACCESS
1925 if (mp->mnt_kern_flag & MNTK_BACKS_ROOT) {
1926 error = EBUSY;
1927 goto out;
1928 }
1929#endif /* CONFIG_IMGSRC_ACCESS */
1930
2d21ac55
A
1931 return (dounmount(mp, flags, 1, ctx));
1932
6601e61a
A
1933out:
1934 mount_drop(mp, 0);
1935 return(error);
1c79356b
A
1936}
1937
1938/*
1939 * Do the actual file system unmount.
1940 */
1941int
2d21ac55 1942dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx)
1c79356b 1943{
2d21ac55 1944 vnode_t coveredvp = (vnode_t)0;
1c79356b 1945 int error;
91447636 1946 int needwakeup = 0;
91447636
A
1947 int forcedunmount = 0;
1948 int lflags = 0;
593a1d5f 1949 struct vnode *devvp = NULLVP;
6d2010ae 1950#if CONFIG_TRIGGERS
39236c6e 1951 proc_t p = vfs_context_proc(ctx);
6d2010ae 1952 int did_vflush = 0;
39236c6e 1953 int pflags_save = 0;
6d2010ae 1954#endif /* CONFIG_TRIGGERS */
91447636 1955
813fb2f6
A
1956#if CONFIG_FSE
1957 if (!(flags & MNT_FORCE)) {
1958 fsevent_unmount(mp, ctx); /* has to come first! */
1959 }
1960#endif
1961
91447636 1962 mount_lock(mp);
fe8ab488
A
1963
1964 /*
1965 * If already an unmount in progress just return EBUSY.
1966 * Even a forced unmount cannot override.
1967 */
91447636 1968 if (mp->mnt_lflag & MNT_LUNMOUNT) {
fe8ab488 1969 if (withref != 0)
6601e61a 1970 mount_drop(mp, 1);
fe8ab488 1971 mount_unlock(mp);
9bccf70c
A
1972 return (EBUSY);
1973 }
39236c6e 1974
fe8ab488
A
1975 if (flags & MNT_FORCE) {
1976 forcedunmount = 1;
1977 mp->mnt_lflag |= MNT_LFORCE;
1978 }
1979
39236c6e
A
1980#if CONFIG_TRIGGERS
1981 if (flags & MNT_NOBLOCK && p != kernproc)
1982 pflags_save = OSBitOrAtomic(P_NOREMOTEHANG, &p->p_flag);
1983#endif
1984
1c79356b 1985 mp->mnt_kern_flag |= MNTK_UNMOUNT;
91447636
A
1986 mp->mnt_lflag |= MNT_LUNMOUNT;
1987 mp->mnt_flag &=~ MNT_ASYNC;
2d21ac55
A
1988 /*
1989 * anyone currently in the fast path that
1990 * trips over the cached rootvp will be
1991 * dumped out and forced into the slow path
1992 * to regenerate a new cached value
1993 */
1994 mp->mnt_realrootvp = NULLVP;
91447636 1995 mount_unlock(mp);
39037602 1996
fe8ab488
A
1997 if (forcedunmount && (flags & MNT_LNOSUB) == 0) {
1998 /*
1999 * Force unmount any mounts in this filesystem.
2000 * If any unmounts fail - just leave them dangling.
2001 * Avoids recursion.
2002 */
2003 (void) dounmount_submounts(mp, flags | MNT_LNOSUB, ctx);
2004 }
2005
2d21ac55
A
2006 /*
2007 * taking the name_cache_lock exclusively will
2008 * insure that everyone is out of the fast path who
2009 * might be trying to use a now stale copy of
2010 * vp->v_mountedhere->mnt_realrootvp
2011 * bumping mount_generation causes the cached values
2012 * to be invalidated
2013 */
2014 name_cache_lock();
2015 mount_generation++;
2016 name_cache_unlock();
2017
2018
91447636 2019 lck_rw_lock_exclusive(&mp->mnt_rwlock);
6601e61a
A
2020 if (withref != 0)
2021 mount_drop(mp, 0);
91447636
A
2022 error = 0;
2023 if (forcedunmount == 0) {
2024 ubc_umount(mp); /* release cached vnodes */
2025 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
2d21ac55 2026 error = VFS_SYNC(mp, MNT_WAIT, ctx);
91447636
A
2027 if (error) {
2028 mount_lock(mp);
2029 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
2030 mp->mnt_lflag &= ~MNT_LUNMOUNT;
2031 mp->mnt_lflag &= ~MNT_LFORCE;
2032 goto out;
2033 }
2034 }
2035 }
6d2010ae 2036
5ba3f43e
A
2037 /* free disk_conditioner_info structure for this mount */
2038 disk_conditioner_unmount(mp);
2039
3e170ce0
A
2040 IOBSDMountChange(mp, kIOMountChangeUnmount);
2041
6d2010ae
A
2042#if CONFIG_TRIGGERS
2043 vfs_nested_trigger_unmounts(mp, flags, ctx);
2044 did_vflush = 1;
39037602 2045#endif
91447636
A
2046 if (forcedunmount)
2047 lflags |= FORCECLOSE;
2048 error = vflush(mp, NULLVP, SKIPSWAP | SKIPSYSTEM | SKIPROOT | lflags);
2049 if ((forcedunmount == 0) && error) {
2050 mount_lock(mp);
9bccf70c 2051 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
91447636
A
2052 mp->mnt_lflag &= ~MNT_LUNMOUNT;
2053 mp->mnt_lflag &= ~MNT_LFORCE;
9bccf70c
A
2054 goto out;
2055 }
91447636
A
2056
2057 /* make sure there are no one in the mount iterations or lookup */
2058 mount_iterdrain(mp);
2059
2d21ac55 2060 error = VFS_UNMOUNT(mp, flags, ctx);
1c79356b 2061 if (error) {
91447636
A
2062 mount_iterreset(mp);
2063 mount_lock(mp);
1c79356b 2064 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
91447636
A
2065 mp->mnt_lflag &= ~MNT_LUNMOUNT;
2066 mp->mnt_lflag &= ~MNT_LFORCE;
1c79356b
A
2067 goto out;
2068 }
2069
2070 /* increment the operations count */
2071 if (!error)
b0d623f7 2072 OSAddAtomic(1, &vfs_nummntops);
91447636
A
2073
2074 if ( mp->mnt_devvp && mp->mnt_vtable->vfc_vfsflags & VFC_VFSLOCALARGS) {
593a1d5f
A
2075 /* hold an io reference and drop the usecount before close */
2076 devvp = mp->mnt_devvp;
593a1d5f
A
2077 vnode_getalways(devvp);
2078 vnode_rele(devvp);
2079 VNOP_CLOSE(devvp, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
2d21ac55 2080 ctx);
b0d623f7 2081 vnode_clearmountedon(devvp);
593a1d5f 2082 vnode_put(devvp);
91447636
A
2083 }
2084 lck_rw_done(&mp->mnt_rwlock);
2085 mount_list_remove(mp);
2086 lck_rw_lock_exclusive(&mp->mnt_rwlock);
6d2010ae 2087
91447636 2088 /* mark the mount point hook in the vp but not drop the ref yet */
1c79356b 2089 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
fe8ab488
A
2090 /*
2091 * The covered vnode needs special handling. Trying to get an
2092 * iocount must not block here as this may lead to deadlocks
2093 * if the Filesystem to which the covered vnode belongs is
2094 * undergoing forced unmounts. Since we hold a usecount, the
2095 * vnode cannot be reused (it can, however, still be terminated)
2096 */
2097 vnode_getalways(coveredvp);
6d2010ae
A
2098 vnode_lock_spin(coveredvp);
2099
2100 mp->mnt_crossref++;
2101 coveredvp->v_mountedhere = (struct mount *)0;
fe8ab488 2102 CLR(coveredvp->v_flag, VMOUNT);
6d2010ae
A
2103
2104 vnode_unlock(coveredvp);
2105 vnode_put(coveredvp);
1c79356b 2106 }
91447636
A
2107
2108 mount_list_lock();
2109 mp->mnt_vtable->vfc_refcount--;
2110 mount_list_unlock();
2111
2112 cache_purgevfs(mp); /* remove cache entries for this file sys */
2113 vfs_event_signal(NULL, VQ_UNMOUNT, (intptr_t)NULL);
2114 mount_lock(mp);
2115 mp->mnt_lflag |= MNT_LDEAD;
2116
2117 if (mp->mnt_lflag & MNT_LWAIT) {
2118 /*
2119 * do the wakeup here
2120 * in case we block in mount_refdrain
2121 * which will drop the mount lock
2122 * and allow anyone blocked in vfs_busy
2123 * to wakeup and see the LDEAD state
2124 */
2125 mp->mnt_lflag &= ~MNT_LWAIT;
2126 wakeup((caddr_t)mp);
1c79356b 2127 }
91447636 2128 mount_refdrain(mp);
1c79356b 2129out:
91447636
A
2130 if (mp->mnt_lflag & MNT_LWAIT) {
2131 mp->mnt_lflag &= ~MNT_LWAIT;
39037602 2132 needwakeup = 1;
91447636 2133 }
6d2010ae 2134
6d2010ae 2135#if CONFIG_TRIGGERS
39236c6e
A
2136 if (flags & MNT_NOBLOCK && p != kernproc) {
2137 // Restore P_NOREMOTEHANG bit to its previous value
2138 if ((pflags_save & P_NOREMOTEHANG) == 0)
2139 OSBitAndAtomic(~((uint32_t) P_NOREMOTEHANG), &p->p_flag);
2140 }
2141
39037602 2142 /*
6d2010ae 2143 * Callback and context are set together under the mount lock, and
39037602 2144 * never cleared, so we're safe to examine them here, drop the lock,
6d2010ae
A
2145 * and call out.
2146 */
2147 if (mp->mnt_triggercallback != NULL) {
2148 mount_unlock(mp);
2149 if (error == 0) {
2150 mp->mnt_triggercallback(mp, VTC_RELEASE, mp->mnt_triggerdata, ctx);
2151 } else if (did_vflush) {
2152 mp->mnt_triggercallback(mp, VTC_REPLACE, mp->mnt_triggerdata, ctx);
2153 }
2154 } else {
2155 mount_unlock(mp);
2156 }
39037602 2157#else
91447636 2158 mount_unlock(mp);
6d2010ae
A
2159#endif /* CONFIG_TRIGGERS */
2160
91447636
A
2161 lck_rw_done(&mp->mnt_rwlock);
2162
2163 if (needwakeup)
1c79356b 2164 wakeup((caddr_t)mp);
6d2010ae 2165
55e303ae 2166 if (!error) {
91447636 2167 if ((coveredvp != NULLVP)) {
fe8ab488 2168 vnode_t pvp = NULLVP;
b0d623f7 2169
fe8ab488
A
2170 /*
2171 * The covered vnode needs special handling. Trying to
2172 * get an iocount must not block here as this may lead
2173 * to deadlocks if the Filesystem to which the covered
2174 * vnode belongs is undergoing forced unmounts. Since we
2175 * hold a usecount, the vnode cannot be reused
2176 * (it can, however, still be terminated).
2177 */
2178 vnode_getalways(coveredvp);
6d2010ae
A
2179
2180 mount_dropcrossref(mp, coveredvp, 0);
fe8ab488
A
2181 /*
2182 * We'll _try_ to detect if this really needs to be
2183 * done. The coveredvp can only be in termination (or
2184 * terminated) if the coveredvp's mount point is in a
2185 * forced unmount (or has been) since we still hold the
2186 * ref.
2187 */
2188 if (!vnode_isrecycled(coveredvp)) {
2189 pvp = vnode_getparent(coveredvp);
6d2010ae 2190#if CONFIG_TRIGGERS
fe8ab488
A
2191 if (coveredvp->v_resolve) {
2192 vnode_trigger_rearm(coveredvp, ctx);
2193 }
2194#endif
2195 }
2196
2197 vnode_rele(coveredvp);
91447636 2198 vnode_put(coveredvp);
fe8ab488 2199 coveredvp = NULLVP;
b0d623f7
A
2200
2201 if (pvp) {
2202 lock_vnode_and_post(pvp, NOTE_WRITE);
2203 vnode_put(pvp);
2204 }
91447636
A
2205 } else if (mp->mnt_flag & MNT_ROOTFS) {
2206 mount_lock_destroy(mp);
2d21ac55
A
2207#if CONFIG_MACF
2208 mac_mount_label_destroy(mp);
2209#endif
91447636
A
2210 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
2211 } else
2212 panic("dounmount: no coveredvp");
55e303ae 2213 }
1c79356b
A
2214 return (error);
2215}
2216
fe8ab488
A
2217/*
2218 * Unmount any mounts in this filesystem.
2219 */
2220void
2221dounmount_submounts(struct mount *mp, int flags, vfs_context_t ctx)
2222{
2223 mount_t smp;
2224 fsid_t *fsids, fsid;
2225 int fsids_sz;
2226 int count = 0, i, m = 0;
2227 vnode_t vp;
2228
2229 mount_list_lock();
2230
2231 // Get an array to hold the submounts fsids.
2232 TAILQ_FOREACH(smp, &mountlist, mnt_list)
2233 count++;
2234 fsids_sz = count * sizeof(fsid_t);
2235 MALLOC(fsids, fsid_t *, fsids_sz, M_TEMP, M_NOWAIT);
2236 if (fsids == NULL) {
2237 mount_list_unlock();
2238 goto out;
2239 }
2240 fsids[0] = mp->mnt_vfsstat.f_fsid; // Prime the pump
2241
2242 /*
2243 * Fill the array with submount fsids.
2244 * Since mounts are always added to the tail of the mount list, the
39037602 2245 * list is always in mount order.
fe8ab488
A
2246 * For each mount check if the mounted-on vnode belongs to a
2247 * mount that's already added to our array of mounts to be unmounted.
2248 */
2249 for (smp = TAILQ_NEXT(mp, mnt_list); smp; smp = TAILQ_NEXT(smp, mnt_list)) {
2250 vp = smp->mnt_vnodecovered;
2251 if (vp == NULL)
2252 continue;
2253 fsid = vnode_mount(vp)->mnt_vfsstat.f_fsid; // Underlying fsid
2254 for (i = 0; i <= m; i++) {
2255 if (fsids[i].val[0] == fsid.val[0] &&
2256 fsids[i].val[1] == fsid.val[1]) {
2257 fsids[++m] = smp->mnt_vfsstat.f_fsid;
2258 break;
2259 }
2260 }
2261 }
2262 mount_list_unlock();
2263
2264 // Unmount the submounts in reverse order. Ignore errors.
2265 for (i = m; i > 0; i--) {
2266 smp = mount_list_lookupby_fsid(&fsids[i], 0, 1);
2267 if (smp) {
2268 mount_ref(smp, 0);
2269 mount_iterdrop(smp);
2270 (void) dounmount(smp, flags, 1, ctx);
2271 }
2272 }
2273out:
2274 if (fsids)
2275 FREE(fsids, M_TEMP);
2276}
2277
91447636
A
2278void
2279mount_dropcrossref(mount_t mp, vnode_t dp, int need_put)
2280{
6d2010ae
A
2281 vnode_lock(dp);
2282 mp->mnt_crossref--;
2283
2284 if (mp->mnt_crossref < 0)
2285 panic("mount cross refs -ve");
2286
2287 if ((mp != dp->v_mountedhere) && (mp->mnt_crossref == 0)) {
39037602 2288
91447636 2289 if (need_put)
6d2010ae 2290 vnode_put_locked(dp);
91447636 2291 vnode_unlock(dp);
6d2010ae
A
2292
2293 mount_lock_destroy(mp);
2294#if CONFIG_MACF
2295 mac_mount_label_destroy(mp);
2296#endif
2297 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
2298 return;
2299 }
2300 if (need_put)
2301 vnode_put_locked(dp);
2302 vnode_unlock(dp);
91447636
A
2303}
2304
2305
1c79356b
A
2306/*
2307 * Sync each mounted filesystem.
2308 */
2309#if DIAGNOSTIC
2310int syncprt = 0;
1c79356b
A
2311#endif
2312
1c79356b 2313int print_vmpage_stat=0;
fe8ab488 2314int sync_timeout = 60; // Sync time limit (sec)
1c79356b 2315
39037602 2316static int
fe8ab488 2317sync_callback(mount_t mp, __unused void *arg)
1c79356b 2318{
91447636 2319 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
fe8ab488
A
2320 int asyncflag = mp->mnt_flag & MNT_ASYNC;
2321
2322 mp->mnt_flag &= ~MNT_ASYNC;
2323 VFS_SYNC(mp, arg ? MNT_WAIT : MNT_NOWAIT, vfs_context_kernel());
2324 if (asyncflag)
2325 mp->mnt_flag |= MNT_ASYNC;
1c79356b 2326 }
1c79356b 2327
fe8ab488
A
2328 return (VFS_RETURNED);
2329}
91447636 2330
91447636
A
2331/* ARGSUSED */
2332int
b0d623f7 2333sync(__unused proc_t p, __unused struct sync_args *uap, __unused int32_t *retval)
91447636 2334{
fe8ab488 2335 vfs_iterate(LK_NOWAIT, sync_callback, NULL);
b0d623f7 2336
fe8ab488
A
2337 if (print_vmpage_stat) {
2338 vm_countdirtypages();
2339 }
2340
2341#if DIAGNOSTIC
2342 if (syncprt)
2343 vfs_bufstats();
2344#endif /* DIAGNOSTIC */
2345 return 0;
2346}
2347
2348static void
2349sync_thread(void *arg, __unused wait_result_t wr)
2350{
2351 int *timeout = (int *) arg;
2352
2353 vfs_iterate(LK_NOWAIT, sync_callback, NULL);
2354
2355 if (timeout)
2356 wakeup((caddr_t) timeout);
2357 if (print_vmpage_stat) {
1c79356b 2358 vm_countdirtypages();
1c79356b 2359 }
39236c6e 2360
1c79356b
A
2361#if DIAGNOSTIC
2362 if (syncprt)
2363 vfs_bufstats();
2364#endif /* DIAGNOSTIC */
1c79356b
A
2365}
2366
2367/*
fe8ab488 2368 * Sync in a separate thread so we can time out if it blocks.
1c79356b 2369 */
fe8ab488
A
2370static int
2371sync_async(int timeout)
2d21ac55 2372{
fe8ab488 2373 thread_t thd;
2d21ac55 2374 int error;
fe8ab488
A
2375 struct timespec ts = {timeout, 0};
2376
2377 lck_mtx_lock(sync_mtx_lck);
2378 if (kernel_thread_start(sync_thread, &timeout, &thd) != KERN_SUCCESS) {
2379 printf("sync_thread failed\n");
2380 lck_mtx_unlock(sync_mtx_lck);
2381 return (0);
2382 }
2383
2384 error = msleep((caddr_t) &timeout, sync_mtx_lck, (PVFS | PDROP | PCATCH), "sync_thread", &ts);
2385 if (error) {
2386 printf("sync timed out: %d sec\n", timeout);
2387 }
2388 thread_deallocate(thd);
2389
2390 return (0);
2d21ac55
A
2391}
2392
fe8ab488
A
2393/*
2394 * An in-kernel sync for power management to call.
2395 */
2396__private_extern__ int
2397sync_internal(void)
2398{
2399 (void) sync_async(sync_timeout);
2400
2401 return 0;
2402} /* end of sync_internal call */
2403
2404/*
2405 * Change filesystem quotas.
2406 */
2407#if QUOTA
2408int
2409quotactl(proc_t p, struct quotactl_args *uap, __unused int32_t *retval)
1c79356b 2410{
2d21ac55 2411 struct mount *mp;
91447636
A
2412 int error, quota_cmd, quota_status;
2413 caddr_t datap;
2414 size_t fnamelen;
1c79356b 2415 struct nameidata nd;
2d21ac55 2416 vfs_context_t ctx = vfs_context_current();
91447636
A
2417 struct dqblk my_dqblk;
2418
b0d623f7 2419 AUDIT_ARG(uid, uap->uid);
55e303ae 2420 AUDIT_ARG(cmd, uap->cmd);
6d2010ae
A
2421 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
2422 uap->path, ctx);
55e303ae
A
2423 error = namei(&nd);
2424 if (error)
1c79356b
A
2425 return (error);
2426 mp = nd.ni_vp->v_mount;
91447636
A
2427 vnode_put(nd.ni_vp);
2428 nameidone(&nd);
2429
2430 /* copyin any data we will need for downstream code */
2431 quota_cmd = uap->cmd >> SUBCMDSHIFT;
2432
2433 switch (quota_cmd) {
2434 case Q_QUOTAON:
2435 /* uap->arg specifies a file from which to take the quotas */
2436 fnamelen = MAXPATHLEN;
2437 datap = kalloc(MAXPATHLEN);
2438 error = copyinstr(uap->arg, datap, MAXPATHLEN, &fnamelen);
2439 break;
2440 case Q_GETQUOTA:
2441 /* uap->arg is a pointer to a dqblk structure. */
2442 datap = (caddr_t) &my_dqblk;
2443 break;
2444 case Q_SETQUOTA:
2445 case Q_SETUSE:
2446 /* uap->arg is a pointer to a dqblk structure. */
2447 datap = (caddr_t) &my_dqblk;
2448 if (proc_is64bit(p)) {
2449 struct user_dqblk my_dqblk64;
2450 error = copyin(uap->arg, (caddr_t)&my_dqblk64, sizeof (my_dqblk64));
2451 if (error == 0) {
2452 munge_dqblk(&my_dqblk, &my_dqblk64, FALSE);
2453 }
2454 }
2455 else {
2456 error = copyin(uap->arg, (caddr_t)&my_dqblk, sizeof (my_dqblk));
2457 }
2458 break;
2459 case Q_QUOTASTAT:
2460 /* uap->arg is a pointer to an integer */
2461 datap = (caddr_t) &quota_status;
2462 break;
2463 default:
2464 datap = NULL;
2465 break;
2466 } /* switch */
2467
2468 if (error == 0) {
2d21ac55 2469 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, datap, ctx);
91447636
A
2470 }
2471
2472 switch (quota_cmd) {
2473 case Q_QUOTAON:
2474 if (datap != NULL)
2475 kfree(datap, MAXPATHLEN);
2476 break;
2477 case Q_GETQUOTA:
2478 /* uap->arg is a pointer to a dqblk structure we need to copy out to */
2479 if (error == 0) {
2480 if (proc_is64bit(p)) {
5ba3f43e
A
2481 struct user_dqblk my_dqblk64;
2482
2483 memset(&my_dqblk64, 0, sizeof(my_dqblk64));
91447636
A
2484 munge_dqblk(&my_dqblk, &my_dqblk64, TRUE);
2485 error = copyout((caddr_t)&my_dqblk64, uap->arg, sizeof (my_dqblk64));
2486 }
2487 else {
2488 error = copyout(datap, uap->arg, sizeof (struct dqblk));
2489 }
2490 }
2491 break;
2492 case Q_QUOTASTAT:
2493 /* uap->arg is a pointer to an integer */
2494 if (error == 0) {
2495 error = copyout(datap, uap->arg, sizeof(quota_status));
2496 }
2497 break;
2498 default:
2499 break;
2500 } /* switch */
2501
2502 return (error);
1c79356b 2503}
2d21ac55
A
2504#else
2505int
b0d623f7 2506quotactl(__unused proc_t p, __unused struct quotactl_args *uap, __unused int32_t *retval)
2d21ac55
A
2507{
2508 return (EOPNOTSUPP);
2509}
2510#endif /* QUOTA */
1c79356b
A
2511
2512/*
2513 * Get filesystem statistics.
2d21ac55
A
2514 *
2515 * Returns: 0 Success
2516 * namei:???
2517 * vfs_update_vfsstat:???
2518 * munge_statfs:EFAULT
1c79356b 2519 */
1c79356b
A
2520/* ARGSUSED */
2521int
b0d623f7 2522statfs(__unused proc_t p, struct statfs_args *uap, __unused int32_t *retval)
1c79356b 2523{
91447636
A
2524 struct mount *mp;
2525 struct vfsstatfs *sp;
1c79356b
A
2526 int error;
2527 struct nameidata nd;
2d21ac55 2528 vfs_context_t ctx = vfs_context_current();
91447636 2529 vnode_t vp;
1c79356b 2530
39037602 2531 NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1,
2d21ac55 2532 UIO_USERSPACE, uap->path, ctx);
55e303ae 2533 error = namei(&nd);
39037602 2534 if (error != 0)
1c79356b 2535 return (error);
91447636
A
2536 vp = nd.ni_vp;
2537 mp = vp->v_mount;
2538 sp = &mp->mnt_vfsstat;
2539 nameidone(&nd);
2540
39037602
A
2541#if CONFIG_MACF
2542 error = mac_mount_check_stat(ctx, mp);
2543 if (error != 0)
2544 return (error);
2545#endif
2546
2d21ac55 2547 error = vfs_update_vfsstat(mp, ctx, VFS_USER_EVENT);
39037602 2548 if (error != 0) {
39236c6e 2549 vnode_put(vp);
1c79356b 2550 return (error);
39236c6e 2551 }
91447636
A
2552
2553 error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
39236c6e 2554 vnode_put(vp);
91447636 2555 return (error);
1c79356b
A
2556}
2557
2558/*
2559 * Get filesystem statistics.
2560 */
1c79356b
A
2561/* ARGSUSED */
2562int
b0d623f7 2563fstatfs(__unused proc_t p, struct fstatfs_args *uap, __unused int32_t *retval)
1c79356b 2564{
2d21ac55 2565 vnode_t vp;
1c79356b 2566 struct mount *mp;
91447636 2567 struct vfsstatfs *sp;
1c79356b
A
2568 int error;
2569
55e303ae
A
2570 AUDIT_ARG(fd, uap->fd);
2571
91447636 2572 if ( (error = file_vnode(uap->fd, &vp)) )
1c79356b 2573 return (error);
55e303ae 2574
d1ecb069
A
2575 error = vnode_getwithref(vp);
2576 if (error) {
2577 file_drop(uap->fd);
2578 return (error);
2579 }
2580
91447636 2581 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
55e303ae 2582
91447636
A
2583 mp = vp->v_mount;
2584 if (!mp) {
d1ecb069
A
2585 error = EBADF;
2586 goto out;
91447636 2587 }
39037602
A
2588
2589#if CONFIG_MACF
2590 error = mac_mount_check_stat(vfs_context_current(), mp);
2591 if (error != 0)
2592 goto out;
2593#endif
2594
91447636 2595 sp = &mp->mnt_vfsstat;
39037602 2596 if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
d1ecb069 2597 goto out;
91447636 2598 }
91447636
A
2599
2600 error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
2601
d1ecb069
A
2602out:
2603 file_drop(uap->fd);
2604 vnode_put(vp);
2605
91447636 2606 return (error);
1c79356b
A
2607}
2608
39037602
A
2609/*
2610 * Common routine to handle copying of statfs64 data to user space
2d21ac55 2611 */
39037602 2612static int
2d21ac55
A
2613statfs64_common(struct mount *mp, struct vfsstatfs *sfsp, user_addr_t bufp)
2614{
2615 int error;
2616 struct statfs64 sfs;
39037602 2617
2d21ac55
A
2618 bzero(&sfs, sizeof(sfs));
2619
2620 sfs.f_bsize = sfsp->f_bsize;
2621 sfs.f_iosize = (int32_t)sfsp->f_iosize;
2622 sfs.f_blocks = sfsp->f_blocks;
2623 sfs.f_bfree = sfsp->f_bfree;
2624 sfs.f_bavail = sfsp->f_bavail;
2625 sfs.f_files = sfsp->f_files;
2626 sfs.f_ffree = sfsp->f_ffree;
2627 sfs.f_fsid = sfsp->f_fsid;
2628 sfs.f_owner = sfsp->f_owner;
2629 sfs.f_type = mp->mnt_vtable->vfc_typenum;
2630 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
2631 sfs.f_fssubtype = sfsp->f_fssubtype;
6d2010ae
A
2632 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
2633 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN);
2634 } else {
2635 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSTYPENAMELEN);
2636 }
2d21ac55
A
2637 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MAXPATHLEN);
2638 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MAXPATHLEN);
2639
2640 error = copyout((caddr_t)&sfs, bufp, sizeof(sfs));
2641
2642 return(error);
2643}
2644
39037602
A
2645/*
2646 * Get file system statistics in 64-bit mode
2d21ac55
A
2647 */
2648int
b0d623f7 2649statfs64(__unused struct proc *p, struct statfs64_args *uap, __unused int32_t *retval)
2d21ac55
A
2650{
2651 struct mount *mp;
2652 struct vfsstatfs *sp;
2653 int error;
2654 struct nameidata nd;
2655 vfs_context_t ctxp = vfs_context_current();
2656 vnode_t vp;
2657
39037602 2658 NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1,
2d21ac55
A
2659 UIO_USERSPACE, uap->path, ctxp);
2660 error = namei(&nd);
39037602 2661 if (error != 0)
2d21ac55
A
2662 return (error);
2663 vp = nd.ni_vp;
2664 mp = vp->v_mount;
2665 sp = &mp->mnt_vfsstat;
2666 nameidone(&nd);
2667
39037602
A
2668#if CONFIG_MACF
2669 error = mac_mount_check_stat(ctxp, mp);
2670 if (error != 0)
2671 return (error);
2672#endif
2673
2d21ac55 2674 error = vfs_update_vfsstat(mp, ctxp, VFS_USER_EVENT);
39037602 2675 if (error != 0) {
39236c6e 2676 vnode_put(vp);
2d21ac55 2677 return (error);
39236c6e 2678 }
2d21ac55
A
2679
2680 error = statfs64_common(mp, sp, uap->buf);
39236c6e 2681 vnode_put(vp);
2d21ac55
A
2682
2683 return (error);
2684}
2685
39037602
A
2686/*
2687 * Get file system statistics in 64-bit mode
2d21ac55
A
2688 */
2689int
b0d623f7 2690fstatfs64(__unused struct proc *p, struct fstatfs64_args *uap, __unused int32_t *retval)
2d21ac55
A
2691{
2692 struct vnode *vp;
2693 struct mount *mp;
2694 struct vfsstatfs *sp;
2695 int error;
2696
2697 AUDIT_ARG(fd, uap->fd);
2698
2699 if ( (error = file_vnode(uap->fd, &vp)) )
2700 return (error);
2701
d1ecb069
A
2702 error = vnode_getwithref(vp);
2703 if (error) {
2704 file_drop(uap->fd);
2705 return (error);
2706 }
2707
2d21ac55
A
2708 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2709
2710 mp = vp->v_mount;
2711 if (!mp) {
316670eb 2712 error = EBADF;
d1ecb069 2713 goto out;
2d21ac55 2714 }
39037602
A
2715
2716#if CONFIG_MACF
2717 error = mac_mount_check_stat(vfs_context_current(), mp);
2718 if (error != 0)
2719 goto out;
2720#endif
2721
2d21ac55
A
2722 sp = &mp->mnt_vfsstat;
2723 if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
d1ecb069 2724 goto out;
2d21ac55 2725 }
2d21ac55
A
2726
2727 error = statfs64_common(mp, sp, uap->buf);
2728
d1ecb069
A
2729out:
2730 file_drop(uap->fd);
2731 vnode_put(vp);
2732
2d21ac55
A
2733 return (error);
2734}
91447636
A
2735
2736struct getfsstat_struct {
2737 user_addr_t sfsp;
2d21ac55 2738 user_addr_t *mp;
91447636
A
2739 int count;
2740 int maxcount;
2741 int flags;
2742 int error;
1c79356b 2743};
1c79356b 2744
91447636
A
2745
2746static int
2747getfsstat_callback(mount_t mp, void * arg)
2748{
39037602 2749
91447636
A
2750 struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
2751 struct vfsstatfs *sp;
91447636 2752 int error, my_size;
2d21ac55 2753 vfs_context_t ctx = vfs_context_current();
91447636
A
2754
2755 if (fstp->sfsp && fstp->count < fstp->maxcount) {
39037602
A
2756#if CONFIG_MACF
2757 error = mac_mount_check_stat(ctx, mp);
2758 if (error != 0) {
2759 fstp->error = error;
2760 return(VFS_RETURNED_DONE);
2761 }
2762#endif
91447636
A
2763 sp = &mp->mnt_vfsstat;
2764 /*
2765 * If MNT_NOWAIT is specified, do not refresh the
b0d623f7 2766 * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT.
91447636 2767 */
b0d623f7 2768 if (((fstp->flags & MNT_NOWAIT) == 0 || (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
2d21ac55
A
2769 (error = vfs_update_vfsstat(mp, ctx,
2770 VFS_USER_EVENT))) {
91447636
A
2771 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
2772 return(VFS_RETURNED);
1c79356b 2773 }
91447636
A
2774
2775 /*
2776 * Need to handle LP64 version of struct statfs
2777 */
2d21ac55 2778 error = munge_statfs(mp, sp, fstp->sfsp, &my_size, IS_64BIT_PROCESS(vfs_context_proc(ctx)), FALSE);
91447636
A
2779 if (error) {
2780 fstp->error = error;
2781 return(VFS_RETURNED_DONE);
1c79356b 2782 }
91447636 2783 fstp->sfsp += my_size;
2d21ac55
A
2784
2785 if (fstp->mp) {
39236c6e 2786#if CONFIG_MACF
2d21ac55
A
2787 error = mac_mount_label_get(mp, *fstp->mp);
2788 if (error) {
2789 fstp->error = error;
2790 return(VFS_RETURNED_DONE);
2791 }
39236c6e 2792#endif
2d21ac55
A
2793 fstp->mp++;
2794 }
2795 }
91447636
A
2796 fstp->count++;
2797 return(VFS_RETURNED);
2798}
2799
2800/*
2801 * Get statistics on all filesystems.
2802 */
2803int
2804getfsstat(__unused proc_t p, struct getfsstat_args *uap, int *retval)
2d21ac55
A
2805{
2806 struct __mac_getfsstat_args muap;
2807
2808 muap.buf = uap->buf;
2809 muap.bufsize = uap->bufsize;
2810 muap.mac = USER_ADDR_NULL;
2811 muap.macsize = 0;
2812 muap.flags = uap->flags;
2813
2814 return (__mac_getfsstat(p, &muap, retval));
2815}
2816
b0d623f7
A
2817/*
2818 * __mac_getfsstat: Get MAC-related file system statistics
2819 *
2820 * Parameters: p (ignored)
2821 * uap User argument descriptor (see below)
39037602 2822 * retval Count of file system statistics (N stats)
b0d623f7
A
2823 *
2824 * Indirect: uap->bufsize Buffer size
2825 * uap->macsize MAC info size
2826 * uap->buf Buffer where information will be returned
2827 * uap->mac MAC info
2828 * uap->flags File system flags
39037602 2829 *
b0d623f7
A
2830 *
2831 * Returns: 0 Success
2832 * !0 Not success
2833 *
2834 */
2d21ac55
A
2835int
2836__mac_getfsstat(__unused proc_t p, struct __mac_getfsstat_args *uap, int *retval)
91447636
A
2837{
2838 user_addr_t sfsp;
2d21ac55 2839 user_addr_t *mp;
b0d623f7 2840 size_t count, maxcount, bufsize, macsize;
91447636
A
2841 struct getfsstat_struct fst;
2842
b0d623f7
A
2843 bufsize = (size_t) uap->bufsize;
2844 macsize = (size_t) uap->macsize;
2845
91447636 2846 if (IS_64BIT_PROCESS(p)) {
b0d623f7 2847 maxcount = bufsize / sizeof(struct user64_statfs);
91447636
A
2848 }
2849 else {
b0d623f7 2850 maxcount = bufsize / sizeof(struct user32_statfs);
91447636
A
2851 }
2852 sfsp = uap->buf;
2853 count = 0;
2854
2d21ac55
A
2855 mp = NULL;
2856
2857#if CONFIG_MACF
2858 if (uap->mac != USER_ADDR_NULL) {
2859 u_int32_t *mp0;
2860 int error;
b0d623f7 2861 unsigned int i;
2d21ac55 2862
b0d623f7 2863 count = (macsize / (IS_64BIT_PROCESS(p) ? 8 : 4));
2d21ac55
A
2864 if (count != maxcount)
2865 return (EINVAL);
2866
2867 /* Copy in the array */
b0d623f7
A
2868 MALLOC(mp0, u_int32_t *, macsize, M_MACTEMP, M_WAITOK);
2869 if (mp0 == NULL) {
2870 return (ENOMEM);
2871 }
2872
2873 error = copyin(uap->mac, mp0, macsize);
2874 if (error) {
2875 FREE(mp0, M_MACTEMP);
2d21ac55 2876 return (error);
b0d623f7 2877 }
2d21ac55
A
2878
2879 /* Normalize to an array of user_addr_t */
2880 MALLOC(mp, user_addr_t *, count * sizeof(user_addr_t), M_MACTEMP, M_WAITOK);
b0d623f7
A
2881 if (mp == NULL) {
2882 FREE(mp0, M_MACTEMP);
2883 return (ENOMEM);
2884 }
2885
2d21ac55
A
2886 for (i = 0; i < count; i++) {
2887 if (IS_64BIT_PROCESS(p))
2888 mp[i] = ((user_addr_t *)mp0)[i];
2889 else
2890 mp[i] = (user_addr_t)mp0[i];
2891 }
2892 FREE(mp0, M_MACTEMP);
2893 }
2894#endif
2895
2896
91447636 2897 fst.sfsp = sfsp;
2d21ac55 2898 fst.mp = mp;
91447636
A
2899 fst.flags = uap->flags;
2900 fst.count = 0;
2901 fst.error = 0;
2902 fst.maxcount = maxcount;
2903
39037602 2904
91447636
A
2905 vfs_iterate(0, getfsstat_callback, &fst);
2906
2d21ac55
A
2907 if (mp)
2908 FREE(mp, M_MACTEMP);
2909
91447636
A
2910 if (fst.error ) {
2911 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
2912 return(fst.error);
2913 }
2914
2915 if (fst.sfsp && fst.count > fst.maxcount)
2916 *retval = fst.maxcount;
1c79356b 2917 else
91447636 2918 *retval = fst.count;
1c79356b
A
2919 return (0);
2920}
2921
2d21ac55
A
2922static int
2923getfsstat64_callback(mount_t mp, void * arg)
2924{
2925 struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
2926 struct vfsstatfs *sp;
2927 int error;
2928
2929 if (fstp->sfsp && fstp->count < fstp->maxcount) {
39037602
A
2930#if CONFIG_MACF
2931 error = mac_mount_check_stat(vfs_context_current(), mp);
2932 if (error != 0) {
2933 fstp->error = error;
2934 return(VFS_RETURNED_DONE);
2935 }
2936#endif
2d21ac55
A
2937 sp = &mp->mnt_vfsstat;
2938 /*
b0d623f7
A
2939 * If MNT_NOWAIT is specified, do not refresh the fsstat
2940 * cache. MNT_WAIT overrides MNT_NOWAIT.
2941 *
2942 * We treat MNT_DWAIT as MNT_WAIT for all instances of
2943 * getfsstat, since the constants are out of the same
2944 * namespace.
2d21ac55 2945 */
b0d623f7
A
2946 if (((fstp->flags & MNT_NOWAIT) == 0 ||
2947 (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
2d21ac55
A
2948 (error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT))) {
2949 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
2950 return(VFS_RETURNED);
2951 }
2952
2953 error = statfs64_common(mp, sp, fstp->sfsp);
2954 if (error) {
2955 fstp->error = error;
2956 return(VFS_RETURNED_DONE);
2957 }
2958 fstp->sfsp += sizeof(struct statfs64);
2959 }
2960 fstp->count++;
2961 return(VFS_RETURNED);
2962}
2963
2964/*
2965 * Get statistics on all file systems in 64 bit mode.
2966 */
2967int
2968getfsstat64(__unused proc_t p, struct getfsstat64_args *uap, int *retval)
2969{
2970 user_addr_t sfsp;
2971 int count, maxcount;
2972 struct getfsstat_struct fst;
2973
2974 maxcount = uap->bufsize / sizeof(struct statfs64);
2975
2976 sfsp = uap->buf;
2977 count = 0;
2978
2979 fst.sfsp = sfsp;
2980 fst.flags = uap->flags;
2981 fst.count = 0;
2982 fst.error = 0;
2983 fst.maxcount = maxcount;
2984
2985 vfs_iterate(0, getfsstat64_callback, &fst);
2986
2987 if (fst.error ) {
2988 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
2989 return(fst.error);
2990 }
2991
2992 if (fst.sfsp && fst.count > fst.maxcount)
2993 *retval = fst.maxcount;
2994 else
2995 *retval = fst.count;
2996
2997 return (0);
2998}
2999
fe8ab488
A
3000/*
3001 * gets the associated vnode with the file descriptor passed.
3002 * as input
3003 *
3004 * INPUT
3005 * ctx - vfs context of caller
3006 * fd - file descriptor for which vnode is required.
3007 * vpp - Pointer to pointer to vnode to be returned.
3008 *
3009 * The vnode is returned with an iocount so any vnode obtained
3010 * by this call needs a vnode_put
3011 *
3012 */
39037602 3013int
fe8ab488
A
3014vnode_getfromfd(vfs_context_t ctx, int fd, vnode_t *vpp)
3015{
3016 int error;
3017 vnode_t vp;
3018 struct fileproc *fp;
3019 proc_t p = vfs_context_proc(ctx);
3020
3021 *vpp = NULLVP;
3022
3023 error = fp_getfvp(p, fd, &fp, &vp);
3024 if (error)
3025 return (error);
3026
3027 error = vnode_getwithref(vp);
3028 if (error) {
3029 (void)fp_drop(p, fd, fp, 0);
3030 return (error);
3031 }
3032
3033 (void)fp_drop(p, fd, fp, 0);
3034 *vpp = vp;
3035 return (error);
3036}
3037
3038/*
3039 * Wrapper function around namei to start lookup from a directory
3040 * specified by a file descriptor ni_dirfd.
3041 *
3042 * In addition to all the errors returned by namei, this call can
3043 * return ENOTDIR if the file descriptor does not refer to a directory.
3044 * and EBADF if the file descriptor is not valid.
3045 */
3046int
3047nameiat(struct nameidata *ndp, int dirfd)
3048{
3049 if ((dirfd != AT_FDCWD) &&
3050 !(ndp->ni_flag & NAMEI_CONTLOOKUP) &&
3051 !(ndp->ni_cnd.cn_flags & USEDVP)) {
3052 int error = 0;
3053 char c;
3054
3055 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
3056 error = copyin(ndp->ni_dirp, &c, sizeof(char));
3057 if (error)
3058 return (error);
3059 } else {
3060 c = *((char *)(ndp->ni_dirp));
3061 }
3062
3063 if (c != '/') {
3064 vnode_t dvp_at;
3065
3066 error = vnode_getfromfd(ndp->ni_cnd.cn_context, dirfd,
3067 &dvp_at);
3068 if (error)
3069 return (error);
3070
3071 if (vnode_vtype(dvp_at) != VDIR) {
3072 vnode_put(dvp_at);
3073 return (ENOTDIR);
3074 }
3075
3076 ndp->ni_dvp = dvp_at;
3077 ndp->ni_cnd.cn_flags |= USEDVP;
3078 error = namei(ndp);
3079 ndp->ni_cnd.cn_flags &= ~USEDVP;
3080 vnode_put(dvp_at);
3081 return (error);
3082 }
3083 }
3084
3085 return (namei(ndp));
3086}
3087
1c79356b
A
3088/*
3089 * Change current working directory to a given file descriptor.
3090 */
1c79356b 3091/* ARGSUSED */
2d21ac55
A
3092static int
3093common_fchdir(proc_t p, struct fchdir_args *uap, int per_thread)
1c79356b 3094{
2d21ac55
A
3095 struct filedesc *fdp = p->p_fd;
3096 vnode_t vp;
3097 vnode_t tdp;
3098 vnode_t tvp;
1c79356b 3099 struct mount *mp;
1c79356b 3100 int error;
2d21ac55 3101 vfs_context_t ctx = vfs_context_current();
1c79356b 3102
b0d623f7 3103 AUDIT_ARG(fd, uap->fd);
2d21ac55
A
3104 if (per_thread && uap->fd == -1) {
3105 /*
3106 * Switching back from per-thread to per process CWD; verify we
3107 * in fact have one before proceeding. The only success case
3108 * for this code path is to return 0 preemptively after zapping
3109 * the thread structure contents.
3110 */
3111 thread_t th = vfs_context_thread(ctx);
3112 if (th) {
3113 uthread_t uth = get_bsdthread_info(th);
3114 tvp = uth->uu_cdir;
3115 uth->uu_cdir = NULLVP;
3116 if (tvp != NULLVP) {
3117 vnode_rele(tvp);
3118 return (0);
3119 }
3120 }
3121 return (EBADF);
3122 }
91447636
A
3123
3124 if ( (error = file_vnode(uap->fd, &vp)) )
3125 return(error);
3126 if ( (error = vnode_getwithref(vp)) ) {
3127 file_drop(uap->fd);
3128 return(error);
3129 }
55e303ae
A
3130
3131 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
3132
2d21ac55 3133 if (vp->v_type != VDIR) {
1c79356b 3134 error = ENOTDIR;
2d21ac55
A
3135 goto out;
3136 }
3137
3138#if CONFIG_MACF
3139 error = mac_vnode_check_chdir(ctx, vp);
3140 if (error)
3141 goto out;
3142#endif
3143 error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
3144 if (error)
3145 goto out;
3146
1c79356b 3147 while (!error && (mp = vp->v_mountedhere) != NULL) {
91447636
A
3148 if (vfs_busy(mp, LK_NOWAIT)) {
3149 error = EACCES;
3150 goto out;
55e303ae 3151 }
2d21ac55 3152 error = VFS_ROOT(mp, &tdp, ctx);
91447636 3153 vfs_unbusy(mp);
1c79356b
A
3154 if (error)
3155 break;
91447636 3156 vnode_put(vp);
1c79356b
A
3157 vp = tdp;
3158 }
91447636
A
3159 if (error)
3160 goto out;
3161 if ( (error = vnode_ref(vp)) )
3162 goto out;
3163 vnode_put(vp);
3164
2d21ac55
A
3165 if (per_thread) {
3166 thread_t th = vfs_context_thread(ctx);
3167 if (th) {
3168 uthread_t uth = get_bsdthread_info(th);
3169 tvp = uth->uu_cdir;
3170 uth->uu_cdir = vp;
b0d623f7 3171 OSBitOrAtomic(P_THCWD, &p->p_flag);
2d21ac55
A
3172 } else {
3173 vnode_rele(vp);
3174 return (ENOENT);
3175 }
3176 } else {
3177 proc_fdlock(p);
3178 tvp = fdp->fd_cdir;
3179 fdp->fd_cdir = vp;
3180 proc_fdunlock(p);
3181 }
91447636
A
3182
3183 if (tvp)
3184 vnode_rele(tvp);
3185 file_drop(uap->fd);
3186
1c79356b 3187 return (0);
91447636
A
3188out:
3189 vnode_put(vp);
3190 file_drop(uap->fd);
3191
3192 return(error);
1c79356b
A
3193}
3194
2d21ac55 3195int
b0d623f7 3196fchdir(proc_t p, struct fchdir_args *uap, __unused int32_t *retval)
2d21ac55
A
3197{
3198 return common_fchdir(p, uap, 0);
3199}
3200
3201int
b0d623f7 3202__pthread_fchdir(proc_t p, struct __pthread_fchdir_args *uap, __unused int32_t *retval)
2d21ac55
A
3203{
3204 return common_fchdir(p, (void *)uap, 1);
3205}
3206
1c79356b 3207/*
b0d623f7 3208 * Change current working directory (".").
2d21ac55
A
3209 *
3210 * Returns: 0 Success
3211 * change_dir:ENOTDIR
3212 * change_dir:???
3213 * vnode_ref:ENOENT No such file or directory
1c79356b 3214 */
1c79356b 3215/* ARGSUSED */
2d21ac55
A
3216static int
3217common_chdir(proc_t p, struct chdir_args *uap, int per_thread)
1c79356b 3218{
2d21ac55 3219 struct filedesc *fdp = p->p_fd;
1c79356b
A
3220 int error;
3221 struct nameidata nd;
2d21ac55
A
3222 vnode_t tvp;
3223 vfs_context_t ctx = vfs_context_current();
91447636 3224
39037602 3225 NDINIT(&nd, LOOKUP, OP_CHDIR, FOLLOW | AUDITVNPATH1,
2d21ac55
A
3226 UIO_USERSPACE, uap->path, ctx);
3227 error = change_dir(&nd, ctx);
55e303ae 3228 if (error)
1c79356b 3229 return (error);
91447636
A
3230 if ( (error = vnode_ref(nd.ni_vp)) ) {
3231 vnode_put(nd.ni_vp);
3232 return (error);
3233 }
3234 /*
3235 * drop the iocount we picked up in change_dir
3236 */
3237 vnode_put(nd.ni_vp);
3238
2d21ac55
A
3239 if (per_thread) {
3240 thread_t th = vfs_context_thread(ctx);
3241 if (th) {
3242 uthread_t uth = get_bsdthread_info(th);
3243 tvp = uth->uu_cdir;
3244 uth->uu_cdir = nd.ni_vp;
b0d623f7 3245 OSBitOrAtomic(P_THCWD, &p->p_flag);
2d21ac55
A
3246 } else {
3247 vnode_rele(nd.ni_vp);
3248 return (ENOENT);
3249 }
3250 } else {
3251 proc_fdlock(p);
3252 tvp = fdp->fd_cdir;
3253 fdp->fd_cdir = nd.ni_vp;
3254 proc_fdunlock(p);
3255 }
91447636
A
3256
3257 if (tvp)
3258 vnode_rele(tvp);
3259
1c79356b
A
3260 return (0);
3261}
3262
b0d623f7
A
3263
3264/*
3265 * chdir
3266 *
3267 * Change current working directory (".") for the entire process
3268 *
3269 * Parameters: p Process requesting the call
3270 * uap User argument descriptor (see below)
3271 * retval (ignored)
3272 *
3273 * Indirect parameters: uap->path Directory path
3274 *
3275 * Returns: 0 Success
3276 * common_chdir: ENOTDIR
3277 * common_chdir: ENOENT No such file or directory
3278 * common_chdir: ???
3279 *
3280 */
2d21ac55 3281int
b0d623f7 3282chdir(proc_t p, struct chdir_args *uap, __unused int32_t *retval)
2d21ac55
A
3283{
3284 return common_chdir(p, (void *)uap, 0);
3285}
3286
b0d623f7
A
3287/*
3288 * __pthread_chdir
3289 *
3290 * Change current working directory (".") for a single thread
3291 *
3292 * Parameters: p Process requesting the call
3293 * uap User argument descriptor (see below)
3294 * retval (ignored)
3295 *
3296 * Indirect parameters: uap->path Directory path
3297 *
3298 * Returns: 0 Success
3299 * common_chdir: ENOTDIR
3300 * common_chdir: ENOENT No such file or directory
3301 * common_chdir: ???
3302 *
3303 */
2d21ac55 3304int
b0d623f7 3305__pthread_chdir(proc_t p, struct __pthread_chdir_args *uap, __unused int32_t *retval)
2d21ac55
A
3306{
3307 return common_chdir(p, (void *)uap, 1);
3308}
3309
3310
1c79356b
A
3311/*
3312 * Change notion of root (``/'') directory.
3313 */
1c79356b
A
3314/* ARGSUSED */
3315int
b0d623f7 3316chroot(proc_t p, struct chroot_args *uap, __unused int32_t *retval)
1c79356b 3317{
2d21ac55 3318 struct filedesc *fdp = p->p_fd;
1c79356b
A
3319 int error;
3320 struct nameidata nd;
2d21ac55
A
3321 vnode_t tvp;
3322 vfs_context_t ctx = vfs_context_current();
1c79356b 3323
91447636 3324 if ((error = suser(kauth_cred_get(), &p->p_acflag)))
1c79356b
A
3325 return (error);
3326
39037602 3327 NDINIT(&nd, LOOKUP, OP_CHROOT, FOLLOW | AUDITVNPATH1,
2d21ac55
A
3328 UIO_USERSPACE, uap->path, ctx);
3329 error = change_dir(&nd, ctx);
55e303ae 3330 if (error)
1c79356b
A
3331 return (error);
3332
2d21ac55
A
3333#if CONFIG_MACF
3334 error = mac_vnode_check_chroot(ctx, nd.ni_vp,
3335 &nd.ni_cnd);
3336 if (error) {
91447636
A
3337 vnode_put(nd.ni_vp);
3338 return (error);
3339 }
2d21ac55
A
3340#endif
3341
91447636
A
3342 if ( (error = vnode_ref(nd.ni_vp)) ) {
3343 vnode_put(nd.ni_vp);
1c79356b
A
3344 return (error);
3345 }
91447636 3346 vnode_put(nd.ni_vp);
1c79356b 3347
91447636 3348 proc_fdlock(p);
fa4905b1 3349 tvp = fdp->fd_rdir;
1c79356b 3350 fdp->fd_rdir = nd.ni_vp;
91447636
A
3351 fdp->fd_flags |= FD_CHROOT;
3352 proc_fdunlock(p);
3353
fa4905b1 3354 if (tvp != NULL)
91447636
A
3355 vnode_rele(tvp);
3356
1c79356b
A
3357 return (0);
3358}
3359
3360/*
3361 * Common routine for chroot and chdir.
2d21ac55
A
3362 *
3363 * Returns: 0 Success
3364 * ENOTDIR Not a directory
3365 * namei:??? [anything namei can return]
3366 * vnode_authorize:??? [anything vnode_authorize can return]
1c79356b
A
3367 */
3368static int
91447636 3369change_dir(struct nameidata *ndp, vfs_context_t ctx)
1c79356b 3370{
2d21ac55 3371 vnode_t vp;
1c79356b
A
3372 int error;
3373
91447636 3374 if ((error = namei(ndp)))
1c79356b 3375 return (error);
91447636 3376 nameidone(ndp);
1c79356b 3377 vp = ndp->ni_vp;
2d21ac55
A
3378
3379 if (vp->v_type != VDIR) {
91447636 3380 vnode_put(vp);
2d21ac55
A
3381 return (ENOTDIR);
3382 }
3383
3384#if CONFIG_MACF
3385 error = mac_vnode_check_chdir(ctx, vp);
3386 if (error) {
3387 vnode_put(vp);
3388 return (error);
3389 }
3390#endif
3391
3392 error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
3393 if (error) {
3394 vnode_put(vp);
3395 return (error);
3396 }
91447636 3397
1c79356b
A
3398 return (error);
3399}
3400
fe8ab488
A
3401/*
3402 * Free the vnode data (for directories) associated with the file glob.
3403 */
3404struct fd_vn_data *
3405fg_vn_data_alloc(void)
3406{
3407 struct fd_vn_data *fvdata;
3408
3409 /* Allocate per fd vnode data */
3410 MALLOC(fvdata, struct fd_vn_data *, (sizeof(struct fd_vn_data)),
3411 M_FD_VN_DATA, M_WAITOK | M_ZERO);
3412 lck_mtx_init(&fvdata->fv_lock, fd_vn_lck_grp, fd_vn_lck_attr);
3413 return fvdata;
3414}
3415
3416/*
3417 * Free the vnode data (for directories) associated with the file glob.
3418 */
3419void
3420fg_vn_data_free(void *fgvndata)
3421{
3422 struct fd_vn_data *fvdata = (struct fd_vn_data *)fgvndata;
3423
3424 if (fvdata->fv_buf)
3425 FREE(fvdata->fv_buf, M_FD_DIRBUF);
3426 lck_mtx_destroy(&fvdata->fv_lock, fd_vn_lck_grp);
3427 FREE(fvdata, M_FD_VN_DATA);
3428}
3429
1c79356b
A
3430/*
3431 * Check permissions, allocate an open file structure,
3432 * and call the device open routine if any.
2d21ac55
A
3433 *
3434 * Returns: 0 Success
3435 * EINVAL
3436 * EINTR
3437 * falloc:ENFILE
3438 * falloc:EMFILE
3439 * falloc:ENOMEM
3440 * vn_open_auth:???
3441 * dupfdopen:???
3442 * VNOP_ADVLOCK:???
3443 * vnode_setsize:???
b0d623f7
A
3444 *
3445 * XXX Need to implement uid, gid
1c79356b 3446 */
2d21ac55 3447int
39236c6e
A
3448open1(vfs_context_t ctx, struct nameidata *ndp, int uflags,
3449 struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra,
3450 int32_t *retval)
1c79356b 3451{
2d21ac55
A
3452 proc_t p = vfs_context_proc(ctx);
3453 uthread_t uu = get_bsdthread_info(vfs_context_thread(ctx));
2d21ac55
A
3454 struct fileproc *fp;
3455 vnode_t vp;
91447636 3456 int flags, oflags;
1c79356b
A
3457 int type, indx, error;
3458 struct flock lf;
3e170ce0 3459 struct vfs_context context;
ccc36f2f 3460
91447636 3461 oflags = uflags;
ccc36f2f
A
3462
3463 if ((oflags & O_ACCMODE) == O_ACCMODE)
3464 return(EINVAL);
3e170ce0 3465
91447636 3466 flags = FFLAGS(uflags);
3e170ce0
A
3467 CLR(flags, FENCRYPTED);
3468 CLR(flags, FUNENCRYPTED);
91447636
A
3469
3470 AUDIT_ARG(fflags, oflags);
3471 AUDIT_ARG(mode, vap->va_mode);
3472
39236c6e
A
3473 if ((error = falloc_withalloc(p,
3474 &fp, &indx, ctx, fp_zalloc, cra)) != 0) {
1c79356b 3475 return (error);
91447636 3476 }
2d21ac55 3477 uu->uu_dupfd = -indx - 1;
91447636 3478
2d21ac55
A
3479 if ((error = vn_open_auth(ndp, &flags, vap))) {
3480 if ((error == ENODEV || error == ENXIO) && (uu->uu_dupfd >= 0)){ /* XXX from fdopen */
39236c6e 3481 if ((error = dupfdopen(p->p_fd, indx, uu->uu_dupfd, flags, error)) == 0) {
2d21ac55 3482 fp_drop(p, indx, NULL, 0);
91447636
A
3483 *retval = indx;
3484 return (0);
3485 }
1c79356b
A
3486 }
3487 if (error == ERESTART)
91447636
A
3488 error = EINTR;
3489 fp_free(p, indx, fp);
1c79356b
A
3490 return (error);
3491 }
2d21ac55
A
3492 uu->uu_dupfd = 0;
3493 vp = ndp->ni_vp;
55e303ae 3494
3e170ce0 3495 fp->f_fglob->fg_flag = flags & (FMASK | O_EVTONLY | FENCRYPTED | FUNENCRYPTED);
91447636
A
3496 fp->f_fglob->fg_ops = &vnops;
3497 fp->f_fglob->fg_data = (caddr_t)vp;
3498
1c79356b
A
3499 if (flags & (O_EXLOCK | O_SHLOCK)) {
3500 lf.l_whence = SEEK_SET;
3501 lf.l_start = 0;
3502 lf.l_len = 0;
3503 if (flags & O_EXLOCK)
3504 lf.l_type = F_WRLCK;
3505 else
3506 lf.l_type = F_RDLCK;
3507 type = F_FLOCK;
3508 if ((flags & FNONBLOCK) == 0)
3509 type |= F_WAIT;
2d21ac55
A
3510#if CONFIG_MACF
3511 error = mac_file_check_lock(vfs_context_ucred(ctx), fp->f_fglob,
3512 F_SETLK, &lf);
3513 if (error)
3514 goto bad;
3515#endif
39236c6e 3516 if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx, NULL)))
55e303ae 3517 goto bad;
91447636 3518 fp->f_fglob->fg_flag |= FHASLOCK;
1c79356b 3519 }
55e303ae 3520
00867663
A
3521#if DEVELOPMENT || DEBUG
3522 /*
3523 * XXX VSWAP: Check for entitlements or special flag here
3524 * so we can restrict access appropriately.
3525 */
3526#else /* DEVELOPMENT || DEBUG */
3527
3528 if (vnode_isswap(vp) && (flags & (FWRITE | O_TRUNC)) && (ctx != vfs_context_kernel())) {
3529 /* block attempt to write/truncate swapfile */
3530 error = EPERM;
3531 goto bad;
3532 }
3533#endif /* DEVELOPMENT || DEBUG */
3534
91447636
A
3535 /* try to truncate by setting the size attribute */
3536 if ((flags & O_TRUNC) && ((error = vnode_setsize(vp, (off_t)0, 0, ctx)) != 0))
3537 goto bad;
55e303ae 3538
fe8ab488
A
3539 /*
3540 * For directories we hold some additional information in the fd.
3541 */
3542 if (vnode_vtype(vp) == VDIR) {
3543 fp->f_fglob->fg_vn_data = fg_vn_data_alloc();
3544 } else {
3545 fp->f_fglob->fg_vn_data = NULL;
2d21ac55
A
3546 }
3547
91447636 3548 vnode_put(vp);
55e303ae 3549
3e170ce0
A
3550 /*
3551 * The first terminal open (without a O_NOCTTY) by a session leader
3552 * results in it being set as the controlling terminal.
3553 */
3554 if (vnode_istty(vp) && !(p->p_flag & P_CONTROLT) &&
3555 !(flags & O_NOCTTY)) {
3556 int tmp = 0;
3557
3558 (void)(*fp->f_fglob->fg_ops->fo_ioctl)(fp, (int)TIOCSCTTY,
3559 (caddr_t)&tmp, ctx);
3560 }
3561
91447636 3562 proc_fdlock(p);
6d2010ae
A
3563 if (flags & O_CLOEXEC)
3564 *fdflags(p, indx) |= UF_EXCLOSE;
39236c6e
A
3565 if (flags & O_CLOFORK)
3566 *fdflags(p, indx) |= UF_FORKCLOSE;
6601e61a 3567 procfdtbl_releasefd(p, indx, NULL);
39037602
A
3568
3569#if CONFIG_SECLUDED_MEMORY
3570 if (secluded_for_filecache &&
3571 FILEGLOB_DTYPE(fp->f_fglob) == DTYPE_VNODE &&
3572 vnode_vtype(vp) == VREG) {
3573 memory_object_control_t moc;
3574
3575 moc = ubc_getobject(vp, UBC_FLAGS_NONE);
3576
3577 if (moc == MEMORY_OBJECT_CONTROL_NULL) {
3578 /* nothing to do... */
3579 } else if (fp->f_fglob->fg_flag & FWRITE) {
3580 /* writable -> no longer eligible for secluded pages */
3581 memory_object_mark_eligible_for_secluded(moc,
3582 FALSE);
3583 } else if (secluded_for_filecache == 1) {
3584 char pathname[32] = { 0, };
3585 size_t copied;
3586 /* XXX FBDP: better way to detect /Applications/ ? */
3587 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
3588 copyinstr(ndp->ni_dirp,
3589 pathname,
3590 sizeof (pathname),
3591 &copied);
3592 } else {
3593 copystr(CAST_DOWN(void *, ndp->ni_dirp),
3594 pathname,
3595 sizeof (pathname),
3596 &copied);
3597 }
3598 pathname[sizeof (pathname) - 1] = '\0';
3599 if (strncmp(pathname,
3600 "/Applications/",
3601 strlen("/Applications/")) == 0 &&
3602 strncmp(pathname,
3603 "/Applications/Camera.app/",
3604 strlen("/Applications/Camera.app/")) != 0) {
3605 /*
3606 * not writable
3607 * AND from "/Applications/"
3608 * AND not from "/Applications/Camera.app/"
3609 * ==> eligible for secluded
3610 */
3611 memory_object_mark_eligible_for_secluded(moc,
3612 TRUE);
3613 }
3614 } else if (secluded_for_filecache == 2) {
5ba3f43e
A
3615#if __arm64__
3616#define DYLD_SHARED_CACHE_NAME "dyld_shared_cache_arm64"
3617#elif __arm__
3618#define DYLD_SHARED_CACHE_NAME "dyld_shared_cache_armv7"
3619#else
39037602 3620/* not implemented... */
5ba3f43e 3621#endif
39037602
A
3622 if (!strncmp(vp->v_name,
3623 DYLD_SHARED_CACHE_NAME,
3624 strlen(DYLD_SHARED_CACHE_NAME)) ||
3625 !strncmp(vp->v_name,
3626 "dyld",
3627 strlen(vp->v_name)) ||
3628 !strncmp(vp->v_name,
3629 "launchd",
3630 strlen(vp->v_name)) ||
3631 !strncmp(vp->v_name,
3632 "Camera",
3633 strlen(vp->v_name)) ||
3634 !strncmp(vp->v_name,
3635 "mediaserverd",
3636 strlen(vp->v_name))) {
3637 /*
3638 * This file matters when launching Camera:
3639 * do not store its contents in the secluded
3640 * pool that will be drained on Camera launch.
3641 */
3642 memory_object_mark_eligible_for_secluded(moc,
3643 FALSE);
3644 }
3645 }
3646 }
3647#endif /* CONFIG_SECLUDED_MEMORY */
3648
91447636
A
3649 fp_drop(p, indx, fp, 1);
3650 proc_fdunlock(p);
3651
1c79356b 3652 *retval = indx;
91447636 3653
1c79356b 3654 return (0);
55e303ae 3655bad:
3e170ce0 3656 context = *vfs_context_current();
2d21ac55 3657 context.vc_ucred = fp->f_fglob->fg_cred;
39037602 3658
fe8ab488
A
3659 if ((fp->f_fglob->fg_flag & FHASLOCK) &&
3660 (FILEGLOB_DTYPE(fp->f_fglob) == DTYPE_VNODE)) {
3661 lf.l_whence = SEEK_SET;
3662 lf.l_start = 0;
3663 lf.l_len = 0;
3664 lf.l_type = F_UNLCK;
39037602 3665
fe8ab488
A
3666 (void)VNOP_ADVLOCK(
3667 vp, (caddr_t)fp->f_fglob, F_UNLCK, &lf, F_FLOCK, ctx, NULL);
3668 }
2d21ac55
A
3669
3670 vn_close(vp, fp->f_fglob->fg_flag, &context);
91447636
A
3671 vnode_put(vp);
3672 fp_free(p, indx, fp);
3673
55e303ae 3674 return (error);
1c79356b
A
3675}
3676
fe8ab488
A
3677/*
3678 * While most of the *at syscall handlers can call nameiat() which
3679 * is a wrapper around namei, the use of namei and initialisation
3680 * of nameidata are far removed and in different functions - namei
3681 * gets called in vn_open_auth for open1. So we'll just do here what
3682 * nameiat() does.
3683 */
3684static int
3685open1at(vfs_context_t ctx, struct nameidata *ndp, int uflags,
3686 struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra, int32_t *retval,
3687 int dirfd)
3688{
3689 if ((dirfd != AT_FDCWD) && !(ndp->ni_cnd.cn_flags & USEDVP)) {
3690 int error;
3691 char c;
3692
3693 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
3694 error = copyin(ndp->ni_dirp, &c, sizeof(char));
3695 if (error)
3696 return (error);
3697 } else {
3698 c = *((char *)(ndp->ni_dirp));
3699 }
3700
3701 if (c != '/') {
3702 vnode_t dvp_at;
3703
3704 error = vnode_getfromfd(ndp->ni_cnd.cn_context, dirfd,
3705 &dvp_at);
3706 if (error)
3707 return (error);
3708
3709 if (vnode_vtype(dvp_at) != VDIR) {
3710 vnode_put(dvp_at);
3711 return (ENOTDIR);
3712 }
3713
3714 ndp->ni_dvp = dvp_at;
3715 ndp->ni_cnd.cn_flags |= USEDVP;
3716 error = open1(ctx, ndp, uflags, vap, fp_zalloc, cra,
3717 retval);
3718 vnode_put(dvp_at);
3719 return (error);
3720 }
3721 }
3722
3723 return (open1(ctx, ndp, uflags, vap, fp_zalloc, cra, retval));
3724}
3725
0c530ab8 3726/*
b0d623f7 3727 * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)).
0c530ab8
A
3728 *
3729 * Parameters: p Process requesting the open
3730 * uap User argument descriptor (see below)
3731 * retval Pointer to an area to receive the
3732 * return calue from the system call
3733 *
3734 * Indirect: uap->path Path to open (same as 'open')
3735 * uap->flags Flags to open (same as 'open'
3736 * uap->uid UID to set, if creating
3737 * uap->gid GID to set, if creating
3738 * uap->mode File mode, if creating (same as 'open')
3739 * uap->xsecurity ACL to set, if creating
3740 *
3741 * Returns: 0 Success
3742 * !0 errno value
3743 *
3744 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
3745 *
3746 * XXX: We should enummerate the possible errno values here, and where
3747 * in the code they originated.
3748 */
1c79356b 3749int
b0d623f7 3750open_extended(proc_t p, struct open_extended_args *uap, int32_t *retval)
91447636 3751{
2d21ac55 3752 struct filedesc *fdp = p->p_fd;
91447636
A
3753 int ciferror;
3754 kauth_filesec_t xsecdst;
3755 struct vnode_attr va;
2d21ac55 3756 struct nameidata nd;
91447636
A
3757 int cmode;
3758
b0d623f7
A
3759 AUDIT_ARG(owner, uap->uid, uap->gid);
3760
91447636
A
3761 xsecdst = NULL;
3762 if ((uap->xsecurity != USER_ADDR_NULL) &&
3763 ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
3764 return ciferror;
3765
91447636
A
3766 VATTR_INIT(&va);
3767 cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3768 VATTR_SET(&va, va_mode, cmode);
3769 if (uap->uid != KAUTH_UID_NONE)
3770 VATTR_SET(&va, va_uid, uap->uid);
3771 if (uap->gid != KAUTH_GID_NONE)
3772 VATTR_SET(&va, va_gid, uap->gid);
3773 if (xsecdst != NULL)
3774 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
3775
6d2010ae
A
3776 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
3777 uap->path, vfs_context_current());
2d21ac55 3778
39236c6e
A
3779 ciferror = open1(vfs_context_current(), &nd, uap->flags, &va,
3780 fileproc_alloc_init, NULL, retval);
91447636
A
3781 if (xsecdst != NULL)
3782 kauth_filesec_free(xsecdst);
3783
3784 return ciferror;
3785}
3786
39037602 3787/*
316670eb 3788 * Go through the data-protected atomically controlled open (2)
39037602 3789 *
316670eb
A
3790 * int open_dprotected_np(user_addr_t path, int flags, int class, int dpflags, int mode)
3791 */
3792int open_dprotected_np (__unused proc_t p, struct open_dprotected_np_args *uap, int32_t *retval) {
3793 int flags = uap->flags;
3794 int class = uap->class;
3795 int dpflags = uap->dpflags;
3796
39037602 3797 /*
316670eb
A
3798 * Follow the same path as normal open(2)
3799 * Look up the item if it exists, and acquire the vnode.
3800 */
3801 struct filedesc *fdp = p->p_fd;
3802 struct vnode_attr va;
3803 struct nameidata nd;
3804 int cmode;
3805 int error;
39037602 3806
316670eb
A
3807 VATTR_INIT(&va);
3808 /* Mask off all but regular access permissions */
3809 cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3810 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
3811
3812 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
3813 uap->path, vfs_context_current());
3814
39037602
A
3815 /*
3816 * Initialize the extra fields in vnode_attr to pass down our
316670eb
A
3817 * extra fields.
3818 * 1. target cprotect class.
39037602
A
3819 * 2. set a flag to mark it as requiring open-raw-encrypted semantics.
3820 */
3821 if (flags & O_CREAT) {
3e170ce0
A
3822 /* lower level kernel code validates that the class is valid before applying it. */
3823 if (class != PROTECTION_CLASS_DEFAULT) {
3824 /*
3825 * PROTECTION_CLASS_DEFAULT implies that we make the class for this
3826 * file behave the same as open (2)
3827 */
3828 VATTR_SET(&va, va_dataprotect_class, class);
3829 }
316670eb 3830 }
39037602 3831
3e170ce0 3832 if (dpflags & (O_DP_GETRAWENCRYPTED|O_DP_GETRAWUNENCRYPTED)) {
316670eb
A
3833 if ( flags & (O_RDWR | O_WRONLY)) {
3834 /* Not allowed to write raw encrypted bytes */
39037602
A
3835 return EINVAL;
3836 }
3e170ce0
A
3837 if (uap->dpflags & O_DP_GETRAWENCRYPTED) {
3838 VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED);
3839 }
3840 if (uap->dpflags & O_DP_GETRAWUNENCRYPTED) {
3841 VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWUNENCRYPTED);
3842 }
316670eb
A
3843 }
3844
39236c6e
A
3845 error = open1(vfs_context_current(), &nd, uap->flags, &va,
3846 fileproc_alloc_init, NULL, retval);
316670eb
A
3847
3848 return error;
3849}
3850
fe8ab488
A
3851static int
3852openat_internal(vfs_context_t ctx, user_addr_t path, int flags, int mode,
3853 int fd, enum uio_seg segflg, int *retval)
2d21ac55 3854{
fe8ab488 3855 struct filedesc *fdp = (vfs_context_proc(ctx))->p_fd;
91447636 3856 struct vnode_attr va;
2d21ac55 3857 struct nameidata nd;
91447636 3858 int cmode;
1c79356b 3859
91447636
A
3860 VATTR_INIT(&va);
3861 /* Mask off all but regular access permissions */
fe8ab488 3862 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
91447636
A
3863 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
3864
fe8ab488
A
3865 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1,
3866 segflg, path, ctx);
2d21ac55 3867
fe8ab488
A
3868 return (open1at(ctx, &nd, flags, &va, fileproc_alloc_init, NULL,
3869 retval, fd));
1c79356b 3870}
91447636 3871
fe8ab488
A
3872int
3873open(proc_t p, struct open_args *uap, int32_t *retval)
3874{
3875 __pthread_testcancel(1);
3876 return(open_nocancel(p, (struct open_nocancel_args *)uap, retval));
3877}
1c79356b 3878
fe8ab488
A
3879int
3880open_nocancel(__unused proc_t p, struct open_nocancel_args *uap,
3881 int32_t *retval)
3882{
3883 return (openat_internal(vfs_context_current(), uap->path, uap->flags,
3884 uap->mode, AT_FDCWD, UIO_USERSPACE, retval));
3885}
91447636 3886
1c79356b 3887int
fe8ab488
A
3888openat_nocancel(__unused proc_t p, struct openat_nocancel_args *uap,
3889 int32_t *retval)
1c79356b 3890{
fe8ab488
A
3891 return (openat_internal(vfs_context_current(), uap->path, uap->flags,
3892 uap->mode, uap->fd, UIO_USERSPACE, retval));
3893}
91447636 3894
fe8ab488
A
3895int
3896openat(proc_t p, struct openat_args *uap, int32_t *retval)
3897{
3898 __pthread_testcancel(1);
3899 return(openat_nocancel(p, (struct openat_nocancel_args *)uap, retval));
3900}
3901
3902/*
3903 * openbyid_np: open a file given a file system id and a file system object id
3904 * the hfs file system object id is an fsobj_id_t {uint32, uint32}
3905 * file systems that don't support object ids it is a node id (uint64_t).
3906 *
3907 * Parameters: p Process requesting the open
3908 * uap User argument descriptor (see below)
3909 * retval Pointer to an area to receive the
3910 * return calue from the system call
3911 *
3912 * Indirect: uap->path Path to open (same as 'open')
3913 *
3914 * uap->fsid id of target file system
3915 * uap->objid id of target file system object
3916 * uap->flags Flags to open (same as 'open')
3917 *
3918 * Returns: 0 Success
3919 * !0 errno value
3920 *
3921 *
3922 * XXX: We should enummerate the possible errno values here, and where
3923 * in the code they originated.
3924 */
3925int
3926openbyid_np(__unused proc_t p, struct openbyid_np_args *uap, int *retval)
3927{
3928 fsid_t fsid;
3929 uint64_t objid;
3930 int error;
3931 char *buf = NULL;
3932 int buflen = MAXPATHLEN;
3933 int pathlen = 0;
3934 vfs_context_t ctx = vfs_context_current();
3935
490019cf
A
3936 if ((error = priv_check_cred(vfs_context_ucred(ctx), PRIV_VFS_OPEN_BY_ID, 0))) {
3937 return (error);
3938 }
3939
fe8ab488
A
3940 if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
3941 return (error);
3942 }
3943
3944 /*uap->obj is an fsobj_id_t defined as struct {uint32_t, uint32_t} */
3945 if ((error = copyin(uap->objid, (caddr_t)&objid, sizeof(uint64_t)))) {
3946 return (error);
3947 }
3948
3949 AUDIT_ARG(value32, fsid.val[0]);
3950 AUDIT_ARG(value64, objid);
3951
3952 /*resolve path from fsis, objid*/
3953 do {
3954 MALLOC(buf, char *, buflen + 1, M_TEMP, M_WAITOK);
3955 if (buf == NULL) {
3956 return (ENOMEM);
3957 }
3958
3959 error = fsgetpath_internal(
3960 ctx, fsid.val[0], objid,
3961 buflen, buf, &pathlen);
3962
3963 if (error) {
3964 FREE(buf, M_TEMP);
3965 buf = NULL;
3966 }
3967 } while (error == ENOSPC && (buflen += MAXPATHLEN));
3968
3969 if (error) {
3970 return error;
3971 }
3972
3973 buf[pathlen] = 0;
3974
3975 error = openat_internal(
3976 ctx, (user_addr_t)buf, uap->oflags, 0, AT_FDCWD, UIO_SYSSPACE, retval);
3977
3978 FREE(buf, M_TEMP);
3979
3980 return error;
3981}
3982
3983
3984/*
3985 * Create a special file.
3986 */
3987static int mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap);
3988
3989int
3990mknod(proc_t p, struct mknod_args *uap, __unused int32_t *retval)
3991{
3992 struct vnode_attr va;
3993 vfs_context_t ctx = vfs_context_current();
3994 int error;
3995 struct nameidata nd;
3996 vnode_t vp, dvp;
3997
3998 VATTR_INIT(&va);
3999 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
4000 VATTR_SET(&va, va_rdev, uap->dev);
91447636
A
4001
4002 /* If it's a mknod() of a FIFO, call mkfifo1() instead */
4003 if ((uap->mode & S_IFMT) == S_IFIFO)
2d21ac55 4004 return(mkfifo1(ctx, uap->path, &va));
1c79356b 4005
55e303ae 4006 AUDIT_ARG(mode, uap->mode);
b0d623f7 4007 AUDIT_ARG(value32, uap->dev);
91447636 4008
2d21ac55 4009 if ((error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
1c79356b 4010 return (error);
39037602 4011 NDINIT(&nd, CREATE, OP_MKNOD, LOCKPARENT | AUDITVNPATH1,
2d21ac55 4012 UIO_USERSPACE, uap->path, ctx);
55e303ae
A
4013 error = namei(&nd);
4014 if (error)
1c79356b 4015 return (error);
91447636 4016 dvp = nd.ni_dvp;
1c79356b 4017 vp = nd.ni_vp;
91447636
A
4018
4019 if (vp != NULL) {
1c79356b 4020 error = EEXIST;
91447636 4021 goto out;
1c79356b 4022 }
55e303ae 4023
91447636 4024 switch (uap->mode & S_IFMT) {
91447636
A
4025 case S_IFCHR:
4026 VATTR_SET(&va, va_type, VCHR);
4027 break;
4028 case S_IFBLK:
4029 VATTR_SET(&va, va_type, VBLK);
4030 break;
91447636
A
4031 default:
4032 error = EINVAL;
4033 goto out;
4034 }
2d21ac55
A
4035
4036#if CONFIG_MACF
6d2010ae
A
4037 error = mac_vnode_check_create(ctx,
4038 nd.ni_dvp, &nd.ni_cnd, &va);
4039 if (error)
4040 goto out;
2d21ac55
A
4041#endif
4042
4043 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
4044 goto out;
4045
6d2010ae 4046 if ((error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx)) != 0)
91447636
A
4047 goto out;
4048
4049 if (vp) {
4050 int update_flags = 0;
4051
4052 // Make sure the name & parent pointers are hooked up
4053 if (vp->v_name == NULL)
4054 update_flags |= VNODE_UPDATE_NAME;
4055 if (vp->v_parent == NULLVP)
4056 update_flags |= VNODE_UPDATE_PARENT;
4057
4058 if (update_flags)
4059 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
4060
2d21ac55
A
4061#if CONFIG_FSE
4062 add_fsevent(FSE_CREATE_FILE, ctx,
91447636
A
4063 FSE_ARG_VNODE, vp,
4064 FSE_ARG_DONE);
2d21ac55 4065#endif
1c79356b 4066 }
91447636
A
4067
4068out:
4069 /*
4070 * nameidone has to happen before we vnode_put(dvp)
4071 * since it may need to release the fs_nodelock on the dvp
4072 */
4073 nameidone(&nd);
4074
4075 if (vp)
4076 vnode_put(vp);
4077 vnode_put(dvp);
4078
1c79356b
A
4079 return (error);
4080}
4081
4082/*
4083 * Create a named pipe.
2d21ac55
A
4084 *
4085 * Returns: 0 Success
4086 * EEXIST
4087 * namei:???
4088 * vnode_authorize:???
4089 * vn_create:???
1c79356b 4090 */
91447636
A
4091static int
4092mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap)
1c79356b 4093{
91447636 4094 vnode_t vp, dvp;
1c79356b
A
4095 int error;
4096 struct nameidata nd;
55e303ae 4097
39037602 4098 NDINIT(&nd, CREATE, OP_MKFIFO, LOCKPARENT | AUDITVNPATH1,
91447636 4099 UIO_USERSPACE, upath, ctx);
55e303ae
A
4100 error = namei(&nd);
4101 if (error)
1c79356b 4102 return (error);
91447636
A
4103 dvp = nd.ni_dvp;
4104 vp = nd.ni_vp;
4105
4106 /* check that this is a new file and authorize addition */
4107 if (vp != NULL) {
4108 error = EEXIST;
4109 goto out;
4110 }
2d21ac55
A
4111 VATTR_SET(vap, va_type, VFIFO);
4112
6d2010ae 4113 if ((error = vn_authorize_create(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0)
2d21ac55 4114 goto out;
2d21ac55 4115
6d2010ae 4116 error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx);
91447636
A
4117out:
4118 /*
4119 * nameidone has to happen before we vnode_put(dvp)
4120 * since it may need to release the fs_nodelock on the dvp
4121 */
4122 nameidone(&nd);
4123
4124 if (vp)
4125 vnode_put(vp);
4126 vnode_put(dvp);
4127
55e303ae 4128 return error;
91447636
A
4129}
4130
0c530ab8
A
4131
4132/*
b0d623f7 4133 * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)).
0c530ab8
A
4134 *
4135 * Parameters: p Process requesting the open
4136 * uap User argument descriptor (see below)
4137 * retval (Ignored)
4138 *
4139 * Indirect: uap->path Path to fifo (same as 'mkfifo')
4140 * uap->uid UID to set
4141 * uap->gid GID to set
4142 * uap->mode File mode to set (same as 'mkfifo')
4143 * uap->xsecurity ACL to set, if creating
4144 *
4145 * Returns: 0 Success
4146 * !0 errno value
4147 *
4148 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
4149 *
4150 * XXX: We should enummerate the possible errno values here, and where
4151 * in the code they originated.
4152 */
91447636 4153int
b0d623f7 4154mkfifo_extended(proc_t p, struct mkfifo_extended_args *uap, __unused int32_t *retval)
91447636
A
4155{
4156 int ciferror;
4157 kauth_filesec_t xsecdst;
91447636
A
4158 struct vnode_attr va;
4159
b0d623f7
A
4160 AUDIT_ARG(owner, uap->uid, uap->gid);
4161
91447636
A
4162 xsecdst = KAUTH_FILESEC_NONE;
4163 if (uap->xsecurity != USER_ADDR_NULL) {
4164 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
4165 return ciferror;
4166 }
4167
91447636
A
4168 VATTR_INIT(&va);
4169 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
4170 if (uap->uid != KAUTH_UID_NONE)
4171 VATTR_SET(&va, va_uid, uap->uid);
4172 if (uap->gid != KAUTH_GID_NONE)
4173 VATTR_SET(&va, va_gid, uap->gid);
4174 if (xsecdst != KAUTH_FILESEC_NONE)
4175 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
4176
2d21ac55 4177 ciferror = mkfifo1(vfs_context_current(), uap->path, &va);
91447636
A
4178
4179 if (xsecdst != KAUTH_FILESEC_NONE)
4180 kauth_filesec_free(xsecdst);
4181 return ciferror;
4182}
4183
4184/* ARGSUSED */
4185int
b0d623f7 4186mkfifo(proc_t p, struct mkfifo_args *uap, __unused int32_t *retval)
91447636 4187{
91447636
A
4188 struct vnode_attr va;
4189
91447636
A
4190 VATTR_INIT(&va);
4191 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
4192
2d21ac55 4193 return(mkfifo1(vfs_context_current(), uap->path, &va));
1c79356b
A
4194}
4195
b0d623f7
A
4196
4197static char *
4198my_strrchr(char *p, int ch)
4199{
4200 char *save;
4201
4202 for (save = NULL;; ++p) {
4203 if (*p == ch)
4204 save = p;
4205 if (!*p)
4206 return(save);
4207 }
4208 /* NOTREACHED */
4209}
4210
4211extern int safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path);
4212
4213int
4214safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path)
4215{
4216 int ret, len = _len;
4217
4218 *truncated_path = 0;
4219 ret = vn_getpath(dvp, path, &len);
4220 if (ret == 0 && len < (MAXPATHLEN - 1)) {
4221 if (leafname) {
4222 path[len-1] = '/';
4223 len += strlcpy(&path[len], leafname, MAXPATHLEN-len) + 1;
4224 if (len > MAXPATHLEN) {
4225 char *ptr;
39037602 4226
b0d623f7
A
4227 // the string got truncated!
4228 *truncated_path = 1;
4229 ptr = my_strrchr(path, '/');
4230 if (ptr) {
4231 *ptr = '\0'; // chop off the string at the last directory component
4232 }
4233 len = strlen(path) + 1;
4234 }
4235 }
4236 } else if (ret == 0) {
4237 *truncated_path = 1;
4238 } else if (ret != 0) {
4239 struct vnode *mydvp=dvp;
4240
4241 if (ret != ENOSPC) {
4242 printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n",
4243 dvp, dvp->v_name ? dvp->v_name : "no-name", ret);
39037602 4244 }
b0d623f7 4245 *truncated_path = 1;
39037602 4246
b0d623f7
A
4247 do {
4248 if (mydvp->v_parent != NULL) {
4249 mydvp = mydvp->v_parent;
4250 } else if (mydvp->v_mount) {
4251 strlcpy(path, mydvp->v_mount->mnt_vfsstat.f_mntonname, _len);
4252 break;
4253 } else {
4254 // no parent and no mount point? only thing is to punt and say "/" changed
4255 strlcpy(path, "/", _len);
4256 len = 2;
4257 mydvp = NULL;
4258 }
39037602 4259
b0d623f7
A
4260 if (mydvp == NULL) {
4261 break;
4262 }
4263
4264 len = _len;
4265 ret = vn_getpath(mydvp, path, &len);
4266 } while (ret == ENOSPC);
4267 }
4268
4269 return len;
4270}
4271
4272
1c79356b
A
4273/*
4274 * Make a hard file link.
2d21ac55
A
4275 *
4276 * Returns: 0 Success
4277 * EPERM
4278 * EEXIST
4279 * EXDEV
4280 * namei:???
4281 * vnode_authorize:???
4282 * VNOP_LINK:???
1c79356b 4283 */
1c79356b 4284/* ARGSUSED */
fe8ab488
A
4285static int
4286linkat_internal(vfs_context_t ctx, int fd1, user_addr_t path, int fd2,
4287 user_addr_t link, int flag, enum uio_seg segflg)
1c79356b 4288{
91447636 4289 vnode_t vp, dvp, lvp;
1c79356b 4290 struct nameidata nd;
fe8ab488 4291 int follow;
1c79356b 4292 int error;
b0d623f7 4293#if CONFIG_FSE
91447636 4294 fse_info finfo;
b0d623f7 4295#endif
91447636 4296 int need_event, has_listeners;
2d21ac55 4297 char *target_path = NULL;
b0d623f7 4298 int truncated=0;
1c79356b 4299
91447636
A
4300 vp = dvp = lvp = NULLVP;
4301
4302 /* look up the object we are linking to */
fe8ab488
A
4303 follow = (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW;
4304 NDINIT(&nd, LOOKUP, OP_LOOKUP, AUDITVNPATH1 | follow,
4305 segflg, path, ctx);
4306
4307 error = nameiat(&nd, fd1);
55e303ae 4308 if (error)
1c79356b
A
4309 return (error);
4310 vp = nd.ni_vp;
91447636
A
4311
4312 nameidone(&nd);
4313
2d21ac55
A
4314 /*
4315 * Normally, linking to directories is not supported.
4316 * However, some file systems may have limited support.
4317 */
91447636 4318 if (vp->v_type == VDIR) {
39037602 4319 if (!ISSET(vp->v_mount->mnt_kern_flag, MNTK_DIR_HARDLINKS)) {
2d21ac55
A
4320 error = EPERM; /* POSIX */
4321 goto out;
4322 }
39037602 4323
2d21ac55
A
4324 /* Linking to a directory requires ownership. */
4325 if (!kauth_cred_issuser(vfs_context_ucred(ctx))) {
4326 struct vnode_attr dva;
4327
4328 VATTR_INIT(&dva);
4329 VATTR_WANTED(&dva, va_uid);
4330 if (vnode_getattr(vp, &dva, ctx) != 0 ||
4331 !VATTR_IS_SUPPORTED(&dva, va_uid) ||
4332 (dva.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)))) {
4333 error = EACCES;
4334 goto out;
4335 }
4336 }
91447636
A
4337 }
4338
91447636 4339 /* lookup the target node */
6d2010ae
A
4340#if CONFIG_TRIGGERS
4341 nd.ni_op = OP_LINK;
4342#endif
91447636 4343 nd.ni_cnd.cn_nameiop = CREATE;
2d21ac55 4344 nd.ni_cnd.cn_flags = LOCKPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK;
fe8ab488
A
4345 nd.ni_dirp = link;
4346 error = nameiat(&nd, fd2);
91447636
A
4347 if (error != 0)
4348 goto out;
4349 dvp = nd.ni_dvp;
4350 lvp = nd.ni_vp;
2d21ac55
A
4351
4352#if CONFIG_MACF
4353 if ((error = mac_vnode_check_link(ctx, dvp, vp, &nd.ni_cnd)) != 0)
4354 goto out2;
4355#endif
4356
4357 /* or to anything that kauth doesn't want us to (eg. immutable items) */
4358 if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_LINKTARGET, ctx)) != 0)
4359 goto out2;
4360
91447636
A
4361 /* target node must not exist */
4362 if (lvp != NULLVP) {
4363 error = EEXIST;
4364 goto out2;
4365 }
4366 /* cannot link across mountpoints */
4367 if (vnode_mount(vp) != vnode_mount(dvp)) {
4368 error = EXDEV;
4369 goto out2;
4370 }
39037602 4371
91447636 4372 /* authorize creation of the target note */
2d21ac55 4373 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
91447636
A
4374 goto out2;
4375
4376 /* and finally make the link */
2d21ac55 4377 error = VNOP_LINK(vp, dvp, &nd.ni_cnd, ctx);
91447636
A
4378 if (error)
4379 goto out2;
4380
39236c6e
A
4381#if CONFIG_MACF
4382 (void)mac_vnode_notify_link(ctx, vp, dvp, &nd.ni_cnd);
4383#endif
4384
2d21ac55 4385#if CONFIG_FSE
91447636 4386 need_event = need_fsevent(FSE_CREATE_FILE, dvp);
2d21ac55
A
4387#else
4388 need_event = 0;
4389#endif
91447636
A
4390 has_listeners = kauth_authorize_fileop_has_listeners();
4391
4392 if (need_event || has_listeners) {
91447636
A
4393 char *link_to_path = NULL;
4394 int len, link_name_len;
4395
4396 /* build the path to the new link file */
2d21ac55
A
4397 GET_PATH(target_path);
4398 if (target_path == NULL) {
4399 error = ENOMEM;
4400 goto out2;
4401 }
4402
b0d623f7 4403 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, target_path, MAXPATHLEN, &truncated);
91447636
A
4404
4405 if (has_listeners) {
4406 /* build the path to file we are linking to */
2d21ac55
A
4407 GET_PATH(link_to_path);
4408 if (link_to_path == NULL) {
4409 error = ENOMEM;
4410 goto out2;
4411 }
4412
91447636 4413 link_name_len = MAXPATHLEN;
fe8ab488
A
4414 if (vn_getpath(vp, link_to_path, &link_name_len) == 0) {
4415 /*
39037602 4416 * Call out to allow 3rd party notification of rename.
fe8ab488
A
4417 * Ignore result of kauth_authorize_fileop call.
4418 */
39037602
A
4419 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_LINK,
4420 (uintptr_t)link_to_path,
fe8ab488
A
4421 (uintptr_t)target_path);
4422 }
2d21ac55
A
4423 if (link_to_path != NULL) {
4424 RELEASE_PATH(link_to_path);
4425 }
91447636 4426 }
2d21ac55 4427#if CONFIG_FSE
91447636
A
4428 if (need_event) {
4429 /* construct fsevent */
2d21ac55 4430 if (get_fse_info(vp, &finfo, ctx) == 0) {
b0d623f7
A
4431 if (truncated) {
4432 finfo.mode |= FSE_TRUNCATED_PATH;
4433 }
4434
91447636 4435 // build the path to the destination of the link
2d21ac55 4436 add_fsevent(FSE_CREATE_FILE, ctx,
91447636
A
4437 FSE_ARG_STRING, len, target_path,
4438 FSE_ARG_FINFO, &finfo,
4439 FSE_ARG_DONE);
1c79356b 4440 }
b0d623f7
A
4441 if (vp->v_parent) {
4442 add_fsevent(FSE_STAT_CHANGED, ctx,
4443 FSE_ARG_VNODE, vp->v_parent,
4444 FSE_ARG_DONE);
4445 }
1c79356b 4446 }
2d21ac55 4447#endif
1c79356b 4448 }
91447636
A
4449out2:
4450 /*
4451 * nameidone has to happen before we vnode_put(dvp)
4452 * since it may need to release the fs_nodelock on the dvp
4453 */
4454 nameidone(&nd);
2d21ac55
A
4455 if (target_path != NULL) {
4456 RELEASE_PATH(target_path);
4457 }
91447636
A
4458out:
4459 if (lvp)
4460 vnode_put(lvp);
4461 if (dvp)
4462 vnode_put(dvp);
4463 vnode_put(vp);
4464 return (error);
4465}
1c79356b 4466
fe8ab488
A
4467int
4468link(__unused proc_t p, struct link_args *uap, __unused int32_t *retval)
4469{
4470 return (linkat_internal(vfs_context_current(), AT_FDCWD, uap->path,
4471 AT_FDCWD, uap->link, AT_SYMLINK_FOLLOW, UIO_USERSPACE));
4472}
4473
4474int
4475linkat(__unused proc_t p, struct linkat_args *uap, __unused int32_t *retval)
4476{
4477 if (uap->flag & ~AT_SYMLINK_FOLLOW)
4478 return (EINVAL);
4479
4480 return (linkat_internal(vfs_context_current(), uap->fd1, uap->path,
4481 uap->fd2, uap->link, uap->flag, UIO_USERSPACE));
4482}
4483
1c79356b
A
4484/*
4485 * Make a symbolic link.
91447636
A
4486 *
4487 * We could add support for ACLs here too...
1c79356b 4488 */
1c79356b 4489/* ARGSUSED */
fe8ab488
A
4490static int
4491symlinkat_internal(vfs_context_t ctx, user_addr_t path_data, int fd,
4492 user_addr_t link, enum uio_seg segflg)
1c79356b 4493{
91447636
A
4494 struct vnode_attr va;
4495 char *path;
1c79356b
A
4496 int error;
4497 struct nameidata nd;
91447636 4498 vnode_t vp, dvp;
1c79356b 4499 size_t dummy=0;
fe8ab488
A
4500 proc_t p;
4501
4502 error = 0;
4503 if (UIO_SEG_IS_USER_SPACE(segflg)) {
4504 MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
4505 error = copyinstr(path_data, path, MAXPATHLEN, &dummy);
4506 } else {
4507 path = (char *)path_data;
4508 }
91447636 4509 if (error)
1c79356b 4510 goto out;
55e303ae 4511 AUDIT_ARG(text, path); /* This is the link string */
91447636 4512
fe8ab488
A
4513 NDINIT(&nd, CREATE, OP_SYMLINK, LOCKPARENT | AUDITVNPATH1,
4514 segflg, link, ctx);
4515
4516 error = nameiat(&nd, fd);
55e303ae 4517 if (error)
1c79356b 4518 goto out;
91447636
A
4519 dvp = nd.ni_dvp;
4520 vp = nd.ni_vp;
55e303ae 4521
fe8ab488 4522 p = vfs_context_proc(ctx);
2d21ac55
A
4523 VATTR_INIT(&va);
4524 VATTR_SET(&va, va_type, VLNK);
4525 VATTR_SET(&va, va_mode, ACCESSPERMS & ~p->p_fd->fd_cmask);
fe8ab488 4526
2d21ac55
A
4527#if CONFIG_MACF
4528 error = mac_vnode_check_create(ctx,
4529 dvp, &nd.ni_cnd, &va);
4530#endif
4531 if (error != 0) {
4532 goto skipit;
4533 }
91447636 4534
2d21ac55
A
4535 if (vp != NULL) {
4536 error = EEXIST;
4537 goto skipit;
4538 }
4539
4540 /* authorize */
4541 if (error == 0)
4542 error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
4543 /* get default ownership, etc. */
4544 if (error == 0)
4545 error = vnode_authattr_new(dvp, &va, 0, ctx);
4546 if (error == 0)
4547 error = VNOP_SYMLINK(dvp, &vp, &nd.ni_cnd, &va, path, ctx);
4548
39236c6e 4549#if CONFIG_MACF
3e170ce0 4550 if (error == 0 && vp)
39236c6e
A
4551 error = vnode_label(vnode_mount(vp), dvp, vp, &nd.ni_cnd, VNODE_LABEL_CREATE, ctx);
4552#endif
4553
2d21ac55 4554 /* do fallback attribute handling */
3e170ce0 4555 if (error == 0 && vp)
2d21ac55 4556 error = vnode_setattr_fallback(vp, &va, ctx);
39236c6e 4557
2d21ac55
A
4558 if (error == 0) {
4559 int update_flags = 0;
55e303ae 4560
3e170ce0 4561 /*check if a new vnode was created, else try to get one*/
2d21ac55
A
4562 if (vp == NULL) {
4563 nd.ni_cnd.cn_nameiop = LOOKUP;
6d2010ae
A
4564#if CONFIG_TRIGGERS
4565 nd.ni_op = OP_LOOKUP;
4566#endif
2d21ac55 4567 nd.ni_cnd.cn_flags = 0;
fe8ab488 4568 error = nameiat(&nd, fd);
2d21ac55 4569 vp = nd.ni_vp;
55e303ae 4570
2d21ac55
A
4571 if (vp == NULL)
4572 goto skipit;
4573 }
fe8ab488 4574
91447636 4575#if 0 /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
fe8ab488 4576 /* call out to allow 3rd party notification of rename.
2d21ac55
A
4577 * Ignore result of kauth_authorize_fileop call.
4578 */
4579 if (kauth_authorize_fileop_has_listeners() &&
4580 namei(&nd) == 0) {
4581 char *new_link_path = NULL;
4582 int len;
fe8ab488 4583
2d21ac55
A
4584 /* build the path to the new link file */
4585 new_link_path = get_pathbuff();
4586 len = MAXPATHLEN;
4587 vn_getpath(dvp, new_link_path, &len);
4588 if ((len + 1 + nd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) {
91447636 4589 new_link_path[len - 1] = '/';
2d21ac55 4590 strlcpy(&new_link_path[len], nd.ni_cnd.cn_nameptr, MAXPATHLEN-len);
91447636 4591 }
fe8ab488
A
4592
4593 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_SYMLINK,
2d21ac55
A
4594 (uintptr_t)path, (uintptr_t)new_link_path);
4595 if (new_link_path != NULL)
4596 release_pathbuff(new_link_path);
4597 }
fe8ab488 4598#endif
2d21ac55
A
4599 // Make sure the name & parent pointers are hooked up
4600 if (vp->v_name == NULL)
4601 update_flags |= VNODE_UPDATE_NAME;
4602 if (vp->v_parent == NULLVP)
4603 update_flags |= VNODE_UPDATE_PARENT;
fe8ab488 4604
2d21ac55
A
4605 if (update_flags)
4606 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
91447636 4607
2d21ac55
A
4608#if CONFIG_FSE
4609 add_fsevent(FSE_CREATE_FILE, ctx,
4610 FSE_ARG_VNODE, vp,
4611 FSE_ARG_DONE);
4612#endif
4613 }
91447636
A
4614
4615skipit:
4616 /*
4617 * nameidone has to happen before we vnode_put(dvp)
4618 * since it may need to release the fs_nodelock on the dvp
4619 */
4620 nameidone(&nd);
4621
4622 if (vp)
4623 vnode_put(vp);
4624 vnode_put(dvp);
1c79356b 4625out:
fe8ab488
A
4626 if (path && (path != (char *)path_data))
4627 FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
91447636 4628
1c79356b
A
4629 return (error);
4630}
4631
fe8ab488
A
4632int
4633symlink(__unused proc_t p, struct symlink_args *uap, __unused int32_t *retval)
4634{
4635 return (symlinkat_internal(vfs_context_current(), uap->path, AT_FDCWD,
4636 uap->link, UIO_USERSPACE));
4637}
4638
4639int
4640symlinkat(__unused proc_t p, struct symlinkat_args *uap,
4641 __unused int32_t *retval)
4642{
4643 return (symlinkat_internal(vfs_context_current(), uap->path1, uap->fd,
4644 uap->path2, UIO_USERSPACE));
4645}
4646
1c79356b
A
4647/*
4648 * Delete a whiteout from the filesystem.
fe8ab488 4649 * No longer supported.
1c79356b 4650 */
1c79356b 4651int
fe8ab488 4652undelete(__unused proc_t p, __unused struct undelete_args *uap, __unused int32_t *retval)
1c79356b 4653{
fe8ab488 4654 return (ENOTSUP);
1c79356b
A
4655}
4656
4657/*
4658 * Delete a name from the filesystem.
4659 */
1c79356b 4660/* ARGSUSED */
fe8ab488 4661static int
c18c124e
A
4662unlinkat_internal(vfs_context_t ctx, int fd, vnode_t start_dvp,
4663 user_addr_t path_arg, enum uio_seg segflg, int unlink_flags)
1c79356b 4664{
c18c124e 4665 struct nameidata nd;
91447636 4666 vnode_t vp, dvp;
1c79356b 4667 int error;
91447636 4668 struct componentname *cnp;
2d21ac55 4669 char *path = NULL;
b0d623f7
A
4670 int len=0;
4671#if CONFIG_FSE
2d21ac55 4672 fse_info finfo;
6d2010ae 4673 struct vnode_attr va;
b0d623f7 4674#endif
c18c124e
A
4675 int flags;
4676 int need_event;
4677 int has_listeners;
4678 int truncated_path;
6d2010ae 4679 int batched;
c18c124e
A
4680 struct vnode_attr *vap;
4681 int do_retry;
4682 int retry_count = 0;
4683 int cn_flags;
4684
4685 cn_flags = LOCKPARENT;
4686 if (!(unlink_flags & VNODE_REMOVE_NO_AUDIT_PATH))
4687 cn_flags |= AUDITVNPATH1;
4688 /* If a starting dvp is passed, it trumps any fd passed. */
4689 if (start_dvp)
4690 cn_flags |= USEDVP;
6d2010ae 4691
c910b4d9
A
4692#if NAMEDRSRCFORK
4693 /* unlink or delete is allowed on rsrc forks and named streams */
c18c124e 4694 cn_flags |= CN_ALLOWRSRCFORK;
c910b4d9
A
4695#endif
4696
c18c124e
A
4697retry:
4698 do_retry = 0;
4699 flags = 0;
4700 need_event = 0;
4701 has_listeners = 0;
4702 truncated_path = 0;
4703 vap = NULL;
4704
4705 NDINIT(&nd, DELETE, OP_UNLINK, cn_flags, segflg, path_arg, ctx);
4706
4707 nd.ni_dvp = start_dvp;
4708 nd.ni_flag |= NAMEI_COMPOUNDREMOVE;
4709 cnp = &nd.ni_cnd;
91447636 4710
813fb2f6 4711continue_lookup:
c18c124e 4712 error = nameiat(&nd, fd);
2d21ac55
A
4713 if (error)
4714 return (error);
b0d623f7 4715
c18c124e
A
4716 dvp = nd.ni_dvp;
4717 vp = nd.ni_vp;
91447636 4718
6d2010ae 4719
91447636 4720 /* With Carbon delete semantics, busy files cannot be deleted */
316670eb 4721 if (unlink_flags & VNODE_REMOVE_NODELETEBUSY) {
91447636 4722 flags |= VNODE_REMOVE_NODELETEBUSY;
2d21ac55 4723 }
39037602 4724
39236c6e 4725 /* Skip any potential upcalls if told to. */
316670eb
A
4726 if (unlink_flags & VNODE_REMOVE_SKIP_NAMESPACE_EVENT) {
4727 flags |= VNODE_REMOVE_SKIP_NAMESPACE_EVENT;
4728 }
4729
6d2010ae
A
4730 if (vp) {
4731 batched = vnode_compound_remove_available(vp);
4732 /*
4733 * The root of a mounted filesystem cannot be deleted.
4734 */
4735 if (vp->v_flag & VROOT) {
4736 error = EBUSY;
4737 }
2d21ac55 4738
00867663
A
4739#if DEVELOPMENT || DEBUG
4740 /*
4741 * XXX VSWAP: Check for entitlements or special flag here
4742 * so we can restrict access appropriately.
4743 */
4744#else /* DEVELOPMENT || DEBUG */
4745
4746 if (vnode_isswap(vp) && (ctx != vfs_context_kernel())) {
4747 error = EPERM;
4748 goto out;
4749 }
4750#endif /* DEVELOPMENT || DEBUG */
4751
6d2010ae
A
4752 if (!batched) {
4753 error = vn_authorize_unlink(dvp, vp, cnp, ctx, NULL);
4754 if (error) {
3e170ce0
A
4755 if (error == ENOENT) {
4756 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
4757 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
4758 do_retry = 1;
4759 retry_count++;
4760 }
c18c124e 4761 }
6d2010ae
A
4762 goto out;
4763 }
4764 }
4765 } else {
4766 batched = 1;
2d21ac55 4767
6d2010ae
A
4768 if (!vnode_compound_remove_available(dvp)) {
4769 panic("No vp, but no compound remove?");
4770 }
4771 }
2d21ac55 4772
2d21ac55
A
4773#if CONFIG_FSE
4774 need_event = need_fsevent(FSE_DELETE, dvp);
4775 if (need_event) {
6d2010ae
A
4776 if (!batched) {
4777 if ((vp->v_flag & VISHARDLINK) == 0) {
4778 /* XXX need to get these data in batched VNOP */
4779 get_fse_info(vp, &finfo, ctx);
4780 }
4781 } else {
4782 error = vfs_get_notify_attributes(&va);
4783 if (error) {
4784 goto out;
4785 }
4786
4787 vap = &va;
2d21ac55
A
4788 }
4789 }
4790#endif
4791 has_listeners = kauth_authorize_fileop_has_listeners();
4792 if (need_event || has_listeners) {
2d21ac55 4793 if (path == NULL) {
6d2010ae
A
4794 GET_PATH(path);
4795 if (path == NULL) {
4796 error = ENOMEM;
4797 goto out;
4798 }
2d21ac55 4799 }
c18c124e 4800 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated_path);
2d21ac55
A
4801 }
4802
4803#if NAMEDRSRCFORK
c18c124e 4804 if (nd.ni_cnd.cn_flags & CN_WANTSRSRCFORK)
2d21ac55
A
4805 error = vnode_removenamedstream(dvp, vp, XATTR_RESOURCEFORK_NAME, 0, ctx);
4806 else
4807#endif
6d2010ae 4808 {
c18c124e
A
4809 error = vn_remove(dvp, &nd.ni_vp, &nd, flags, vap, ctx);
4810 vp = nd.ni_vp;
6d2010ae
A
4811 if (error == EKEEPLOOKING) {
4812 if (!batched) {
4813 panic("EKEEPLOOKING, but not a filesystem that supports compound VNOPs?");
4814 }
4815
c18c124e 4816 if ((nd.ni_flag & NAMEI_CONTLOOKUP) == 0) {
6d2010ae
A
4817 panic("EKEEPLOOKING, but continue flag not set?");
4818 }
4819
4820 if (vnode_isdir(vp)) {
4821 error = EISDIR;
4822 goto out;
4823 }
813fb2f6 4824 goto continue_lookup;
3e170ce0
A
4825 } else if (error == ENOENT && batched) {
4826 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
4827 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
4828 /*
4829 * For compound VNOPs, the authorization callback may
4830 * return ENOENT in case of racing hardlink lookups
4831 * hitting the name cache, redrive the lookup.
4832 */
4833 do_retry = 1;
4834 retry_count += 1;
4835 goto out;
4836 }
6d2010ae
A
4837 }
4838 }
2d21ac55
A
4839
4840 /*
39037602 4841 * Call out to allow 3rd party notification of delete.
2d21ac55
A
4842 * Ignore result of kauth_authorize_fileop call.
4843 */
1c79356b 4844 if (!error) {
2d21ac55 4845 if (has_listeners) {
39037602
A
4846 kauth_authorize_fileop(vfs_context_ucred(ctx),
4847 KAUTH_FILEOP_DELETE,
2d21ac55
A
4848 (uintptr_t)vp,
4849 (uintptr_t)path);
4850 }
91447636 4851
2d21ac55
A
4852 if (vp->v_flag & VISHARDLINK) {
4853 //
4854 // if a hardlink gets deleted we want to blow away the
4855 // v_parent link because the path that got us to this
4856 // instance of the link is no longer valid. this will
4857 // force the next call to get the path to ask the file
4858 // system instead of just following the v_parent link.
4859 //
4860 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
91447636 4861 }
91447636 4862
2d21ac55
A
4863#if CONFIG_FSE
4864 if (need_event) {
4865 if (vp->v_flag & VISHARDLINK) {
4866 get_fse_info(vp, &finfo, ctx);
6d2010ae
A
4867 } else if (vap) {
4868 vnode_get_fse_info_from_vap(vp, &finfo, vap);
2d21ac55 4869 }
b0d623f7
A
4870 if (truncated_path) {
4871 finfo.mode |= FSE_TRUNCATED_PATH;
4872 }
2d21ac55
A
4873 add_fsevent(FSE_DELETE, ctx,
4874 FSE_ARG_STRING, len, path,
4875 FSE_ARG_FINFO, &finfo,
4876 FSE_ARG_DONE);
4877 }
4878#endif
1c79356b 4879 }
6d2010ae
A
4880
4881out:
2d21ac55
A
4882 if (path != NULL)
4883 RELEASE_PATH(path);
4884
c910b4d9 4885#if NAMEDRSRCFORK
39037602 4886 /* recycle the deleted rsrc fork vnode to force a reclaim, which
b0d623f7
A
4887 * will cause its shadow file to go away if necessary.
4888 */
6d2010ae
A
4889 if (vp && (vnode_isnamedstream(vp)) &&
4890 (vp->v_parent != NULLVP) &&
4891 vnode_isshadow(vp)) {
4892 vnode_recycle(vp);
39037602 4893 }
c910b4d9 4894#endif
6d2010ae
A
4895 /*
4896 * nameidone has to happen before we vnode_put(dvp)
4897 * since it may need to release the fs_nodelock on the dvp
4898 */
c18c124e 4899 nameidone(&nd);
91447636 4900 vnode_put(dvp);
6d2010ae
A
4901 if (vp) {
4902 vnode_put(vp);
4903 }
c18c124e
A
4904
4905 if (do_retry) {
4906 goto retry;
4907 }
4908
1c79356b
A
4909 return (error);
4910}
4911
fe8ab488 4912int
c18c124e
A
4913unlink1(vfs_context_t ctx, vnode_t start_dvp, user_addr_t path_arg,
4914 enum uio_seg segflg, int unlink_flags)
fe8ab488 4915{
c18c124e
A
4916 return (unlinkat_internal(ctx, AT_FDCWD, start_dvp, path_arg, segflg,
4917 unlink_flags));
fe8ab488
A
4918}
4919
1c79356b 4920/*
c18c124e 4921 * Delete a name from the filesystem using Carbon semantics.
1c79356b 4922 */
c18c124e
A
4923int
4924delete(__unused proc_t p, struct delete_args *uap, __unused int32_t *retval)
fe8ab488 4925{
c18c124e
A
4926 return (unlinkat_internal(vfs_context_current(), AT_FDCWD, NULLVP,
4927 uap->path, UIO_USERSPACE, VNODE_REMOVE_NODELETEBUSY));
fe8ab488
A
4928}
4929
c18c124e
A
4930/*
4931 * Delete a name from the filesystem using POSIX semantics.
4932 */
1c79356b 4933int
b0d623f7 4934unlink(__unused proc_t p, struct unlink_args *uap, __unused int32_t *retval)
1c79356b 4935{
c18c124e
A
4936 return (unlinkat_internal(vfs_context_current(), AT_FDCWD, NULLVP,
4937 uap->path, UIO_USERSPACE, 0));
fe8ab488 4938}
2d21ac55 4939
fe8ab488
A
4940int
4941unlinkat(__unused proc_t p, struct unlinkat_args *uap, __unused int32_t *retval)
4942{
4943 if (uap->flag & ~AT_REMOVEDIR)
4944 return (EINVAL);
4945
4946 if (uap->flag & AT_REMOVEDIR)
4947 return (rmdirat_internal(vfs_context_current(), uap->fd,
4948 uap->path, UIO_USERSPACE));
4949 else
4950 return (unlinkat_internal(vfs_context_current(), uap->fd,
c18c124e 4951 NULLVP, uap->path, UIO_USERSPACE, 0));
1c79356b
A
4952}
4953
4954/*
4955 * Reposition read/write file offset.
4956 */
1c79356b 4957int
2d21ac55 4958lseek(proc_t p, struct lseek_args *uap, off_t *retval)
1c79356b 4959{
91447636 4960 struct fileproc *fp;
2d21ac55
A
4961 vnode_t vp;
4962 struct vfs_context *ctx;
91447636 4963 off_t offset = uap->offset, file_size;
1c79356b
A
4964 int error;
4965
91447636
A
4966 if ( (error = fp_getfvp(p,uap->fd, &fp, &vp)) ) {
4967 if (error == ENOTSUP)
4968 return (ESPIPE);
1c79356b 4969 return (error);
55e303ae 4970 }
91447636
A
4971 if (vnode_isfifo(vp)) {
4972 file_drop(uap->fd);
4973 return(ESPIPE);
4974 }
2d21ac55
A
4975
4976
4977 ctx = vfs_context_current();
4978#if CONFIG_MACF
4979 if (uap->whence == L_INCR && uap->offset == 0)
4980 error = mac_file_check_get_offset(vfs_context_ucred(ctx),
4981 fp->f_fglob);
4982 else
4983 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
4984 fp->f_fglob);
4985 if (error) {
4986 file_drop(uap->fd);
4987 return (error);
4988 }
4989#endif
91447636
A
4990 if ( (error = vnode_getwithref(vp)) ) {
4991 file_drop(uap->fd);
4992 return(error);
4993 }
4994
1c79356b
A
4995 switch (uap->whence) {
4996 case L_INCR:
91447636 4997 offset += fp->f_fglob->fg_offset;
1c79356b
A
4998 break;
4999 case L_XTND:
2d21ac55 5000 if ((error = vnode_size(vp, &file_size, ctx)) != 0)
55e303ae 5001 break;
91447636 5002 offset += file_size;
1c79356b
A
5003 break;
5004 case L_SET:
1c79356b 5005 break;
813fb2f6 5006 case SEEK_HOLE:
5ba3f43e 5007 error = VNOP_IOCTL(vp, FSIOC_FIOSEEKHOLE, (caddr_t)&offset, 0, ctx);
813fb2f6
A
5008 break;
5009 case SEEK_DATA:
5ba3f43e 5010 error = VNOP_IOCTL(vp, FSIOC_FIOSEEKDATA, (caddr_t)&offset, 0, ctx);
813fb2f6 5011 break;
1c79356b 5012 default:
55e303ae 5013 error = EINVAL;
1c79356b 5014 }
55e303ae
A
5015 if (error == 0) {
5016 if (uap->offset > 0 && offset < 0) {
5017 /* Incremented/relative move past max size */
5018 error = EOVERFLOW;
5019 } else {
5020 /*
5021 * Allow negative offsets on character devices, per
5022 * POSIX 1003.1-2001. Most likely for writing disk
5023 * labels.
5024 */
5025 if (offset < 0 && vp->v_type != VCHR) {
5026 /* Decremented/relative move before start */
5027 error = EINVAL;
5028 } else {
5029 /* Success */
91447636
A
5030 fp->f_fglob->fg_offset = offset;
5031 *retval = fp->f_fglob->fg_offset;
55e303ae
A
5032 }
5033 }
5034 }
b0d623f7 5035
39037602 5036 /*
b0d623f7
A
5037 * An lseek can affect whether data is "available to read." Use
5038 * hint of NOTE_NONE so no EVFILT_VNODE events fire
5039 */
5040 post_event_if_success(vp, error, NOTE_NONE);
91447636
A
5041 (void)vnode_put(vp);
5042 file_drop(uap->fd);
55e303ae 5043 return (error);
1c79356b
A
5044}
5045
91447636 5046
1c79356b 5047/*
91447636 5048 * Check access permissions.
2d21ac55
A
5049 *
5050 * Returns: 0 Success
5051 * vnode_authorize:???
1c79356b 5052 */
91447636
A
5053static int
5054access1(vnode_t vp, vnode_t dvp, int uflags, vfs_context_t ctx)
1c79356b 5055{
91447636 5056 kauth_action_t action;
1c79356b
A
5057 int error;
5058
91447636
A
5059 /*
5060 * If just the regular access bits, convert them to something
5061 * that vnode_authorize will understand.
5062 */
5063 if (!(uflags & _ACCESS_EXTENDED_MASK)) {
5064 action = 0;
5065 if (uflags & R_OK)
5066 action |= KAUTH_VNODE_READ_DATA; /* aka KAUTH_VNODE_LIST_DIRECTORY */
5067 if (uflags & W_OK) {
5068 if (vnode_isdir(vp)) {
5069 action |= KAUTH_VNODE_ADD_FILE |
5070 KAUTH_VNODE_ADD_SUBDIRECTORY;
5071 /* might want delete rights here too */
5072 } else {
5073 action |= KAUTH_VNODE_WRITE_DATA;
5074 }
5075 }
5076 if (uflags & X_OK) {
5077 if (vnode_isdir(vp)) {
5078 action |= KAUTH_VNODE_SEARCH;
5079 } else {
5080 action |= KAUTH_VNODE_EXECUTE;
5081 }
5082 }
5083 } else {
5084 /* take advantage of definition of uflags */
5085 action = uflags >> 8;
5086 }
39037602 5087
2d21ac55
A
5088#if CONFIG_MACF
5089 error = mac_vnode_check_access(ctx, vp, uflags);
5090 if (error)
5091 return (error);
5092#endif /* MAC */
5093
91447636
A
5094 /* action == 0 means only check for existence */
5095 if (action != 0) {
5096 error = vnode_authorize(vp, dvp, action | KAUTH_VNODE_ACCESS, ctx);
5097 } else {
5098 error = 0;
5099 }
5100
5101 return(error);
1c79356b 5102}
1c79356b 5103
91447636
A
5104
5105
2d21ac55 5106/*
b0d623f7 5107 * access_extended: Check access permissions in bulk.
2d21ac55 5108 *
b0d623f7 5109 * Description: uap->entries Pointer to an array of accessx
39037602
A
5110 * descriptor structs, plus one or
5111 * more NULL terminated strings (see
b0d623f7
A
5112 * "Notes" section below).
5113 * uap->size Size of the area pointed to by
5114 * uap->entries.
5115 * uap->results Pointer to the results array.
2d21ac55
A
5116 *
5117 * Returns: 0 Success
5118 * ENOMEM Insufficient memory
5119 * EINVAL Invalid arguments
5120 * namei:EFAULT Bad address
5121 * namei:ENAMETOOLONG Filename too long
5122 * namei:ENOENT No such file or directory
5123 * namei:ELOOP Too many levels of symbolic links
5124 * namei:EBADF Bad file descriptor
5125 * namei:ENOTDIR Not a directory
5126 * namei:???
5127 * access1:
5128 *
5129 * Implicit returns:
5130 * uap->results Array contents modified
5131 *
5132 * Notes: The uap->entries are structured as an arbitrary length array
b0d623f7 5133 * of accessx descriptors, followed by one or more NULL terminated
2d21ac55
A
5134 * strings
5135 *
5136 * struct accessx_descriptor[0]
5137 * ...
5138 * struct accessx_descriptor[n]
5139 * char name_data[0];
5140 *
5141 * We determine the entry count by walking the buffer containing
b0d623f7 5142 * the uap->entries argument descriptor. For each descriptor we
2d21ac55
A
5143 * see, the valid values for the offset ad_name_offset will be
5144 * in the byte range:
5145 *
5146 * [ uap->entries + sizeof(struct accessx_descriptor) ]
5147 * to
5148 * [ uap->entries + uap->size - 2 ]
5149 *
5150 * since we must have at least one string, and the string must
b0d623f7 5151 * be at least one character plus the NULL terminator in length.
39037602 5152 *
2d21ac55
A
5153 * XXX: Need to support the check-as uid argument
5154 */
1c79356b 5155int
b0d623f7 5156access_extended(__unused proc_t p, struct access_extended_args *uap, __unused int32_t *retval)
1c79356b 5157{
2d21ac55
A
5158 struct accessx_descriptor *input = NULL;
5159 errno_t *result = NULL;
5160 errno_t error = 0;
5161 int wantdelete = 0;
5162 unsigned int desc_max, desc_actual, i, j;
91447636 5163 struct vfs_context context;
1c79356b 5164 struct nameidata nd;
91447636 5165 int niopts;
2d21ac55
A
5166 vnode_t vp = NULL;
5167 vnode_t dvp = NULL;
5168#define ACCESSX_MAX_DESCR_ON_STACK 10
5169 struct accessx_descriptor stack_input[ACCESSX_MAX_DESCR_ON_STACK];
91447636 5170
91447636
A
5171 context.vc_ucred = NULL;
5172
2d21ac55
A
5173 /*
5174 * Validate parameters; if valid, copy the descriptor array and string
5175 * arguments into local memory. Before proceeding, the following
5176 * conditions must have been met:
5177 *
5178 * o The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
5179 * o There must be sufficient room in the request for at least one
5180 * descriptor and a one yte NUL terminated string.
5181 * o The allocation of local storage must not fail.
5182 */
91447636
A
5183 if (uap->size > ACCESSX_MAX_TABLESIZE)
5184 return(ENOMEM);
2d21ac55 5185 if (uap->size < (sizeof(struct accessx_descriptor) + 2))
91447636 5186 return(EINVAL);
2d21ac55
A
5187 if (uap->size <= sizeof (stack_input)) {
5188 input = stack_input;
5189 } else {
91447636
A
5190 MALLOC(input, struct accessx_descriptor *, uap->size, M_TEMP, M_WAITOK);
5191 if (input == NULL) {
5192 error = ENOMEM;
5193 goto out;
5194 }
2d21ac55 5195 }
91447636 5196 error = copyin(uap->entries, input, uap->size);
55e303ae 5197 if (error)
91447636 5198 goto out;
1c79356b 5199
b0d623f7
A
5200 AUDIT_ARG(opaque, input, uap->size);
5201
91447636 5202 /*
2d21ac55
A
5203 * Force NUL termination of the copyin buffer to avoid nami() running
5204 * off the end. If the caller passes us bogus data, they may get a
5205 * bogus result.
5206 */
5207 ((char *)input)[uap->size - 1] = 0;
5208
5209 /*
5210 * Access is defined as checking against the process' real identity,
5211 * even if operations are checking the effective identity. This
5212 * requires that we use a local vfs context.
91447636
A
5213 */
5214 context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
2d21ac55 5215 context.vc_thread = current_thread();
91447636
A
5216
5217 /*
2d21ac55
A
5218 * Find out how many entries we have, so we can allocate the result
5219 * array by walking the list and adjusting the count downward by the
5220 * earliest string offset we see.
91447636 5221 */
2d21ac55
A
5222 desc_max = (uap->size - 2) / sizeof(struct accessx_descriptor);
5223 desc_actual = desc_max;
5224 for (i = 0; i < desc_actual; i++) {
91447636 5225 /*
2d21ac55
A
5226 * Take the offset to the name string for this entry and
5227 * convert to an input array index, which would be one off
5228 * the end of the array if this entry was the lowest-addressed
5229 * name string.
91447636
A
5230 */
5231 j = input[i].ad_name_offset / sizeof(struct accessx_descriptor);
2d21ac55
A
5232
5233 /*
5234 * An offset greater than the max allowable offset is an error.
5235 * It is also an error for any valid entry to point
5236 * to a location prior to the end of the current entry, if
5237 * it's not a reference to the string of the previous entry.
5238 */
5239 if (j > desc_max || (j != 0 && j <= i)) {
91447636
A
5240 error = EINVAL;
5241 goto out;
5242 }
2d21ac55 5243
39037602
A
5244 /* Also do not let ad_name_offset point to something beyond the size of the input */
5245 if (input[i].ad_name_offset >= uap->size) {
5246 error = EINVAL;
5247 goto out;
5248 }
5249
2d21ac55
A
5250 /*
5251 * An offset of 0 means use the previous descriptor's offset;
5252 * this is used to chain multiple requests for the same file
5253 * to avoid multiple lookups.
5254 */
91447636 5255 if (j == 0) {
2d21ac55 5256 /* This is not valid for the first entry */
91447636
A
5257 if (i == 0) {
5258 error = EINVAL;
5259 goto out;
5260 }
5261 continue;
5262 }
2d21ac55
A
5263
5264 /*
5265 * If the offset of the string for this descriptor is before
5266 * what we believe is the current actual last descriptor,
5267 * then we need to adjust our estimate downward; this permits
5268 * the string table following the last descriptor to be out
5269 * of order relative to the descriptor list.
5270 */
5271 if (j < desc_actual)
5272 desc_actual = j;
91447636 5273 }
2d21ac55
A
5274
5275 /*
5276 * We limit the actual number of descriptors we are willing to process
5277 * to a hard maximum of ACCESSX_MAX_DESCRIPTORS. If the number being
5278 * requested does not exceed this limit,
5279 */
5280 if (desc_actual > ACCESSX_MAX_DESCRIPTORS) {
91447636
A
5281 error = ENOMEM;
5282 goto out;
5283 }
2d21ac55 5284 MALLOC(result, errno_t *, desc_actual * sizeof(errno_t), M_TEMP, M_WAITOK);
91447636
A
5285 if (result == NULL) {
5286 error = ENOMEM;
5287 goto out;
5288 }
5289
5290 /*
2d21ac55
A
5291 * Do the work by iterating over the descriptor entries we know to
5292 * at least appear to contain valid data.
91447636
A
5293 */
5294 error = 0;
2d21ac55 5295 for (i = 0; i < desc_actual; i++) {
91447636 5296 /*
2d21ac55
A
5297 * If the ad_name_offset is 0, then we use the previous
5298 * results to make the check; otherwise, we are looking up
5299 * a new file name.
91447636
A
5300 */
5301 if (input[i].ad_name_offset != 0) {
5302 /* discard old vnodes */
5303 if (vp) {
5304 vnode_put(vp);
5305 vp = NULL;
5306 }
5307 if (dvp) {
5308 vnode_put(dvp);
5309 dvp = NULL;
5310 }
39037602 5311
2d21ac55
A
5312 /*
5313 * Scan forward in the descriptor list to see if we
5314 * need the parent vnode. We will need it if we are
5315 * deleting, since we must have rights to remove
5316 * entries in the parent directory, as well as the
5317 * rights to delete the object itself.
5318 */
91447636 5319 wantdelete = input[i].ad_flags & _DELETE_OK;
2d21ac55 5320 for (j = i + 1; (j < desc_actual) && (input[j].ad_name_offset == 0); j++)
91447636
A
5321 if (input[j].ad_flags & _DELETE_OK)
5322 wantdelete = 1;
39037602 5323
91447636 5324 niopts = FOLLOW | AUDITVNPATH1;
2d21ac55 5325
91447636
A
5326 /* need parent for vnode_authorize for deletion test */
5327 if (wantdelete)
5328 niopts |= WANTPARENT;
5329
5330 /* do the lookup */
6d2010ae
A
5331 NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, UIO_SYSSPACE,
5332 CAST_USER_ADDR_T(((const char *)input) + input[i].ad_name_offset),
5333 &context);
91447636
A
5334 error = namei(&nd);
5335 if (!error) {
5336 vp = nd.ni_vp;
5337 if (wantdelete)
5338 dvp = nd.ni_dvp;
5339 }
5340 nameidone(&nd);
5341 }
5342
5343 /*
5344 * Handle lookup errors.
5345 */
5346 switch(error) {
5347 case ENOENT:
5348 case EACCES:
5349 case EPERM:
5350 case ENOTDIR:
5351 result[i] = error;
5352 break;
5353 case 0:
5354 /* run this access check */
5355 result[i] = access1(vp, dvp, input[i].ad_flags, &context);
5356 break;
5357 default:
5358 /* fatal lookup error */
5359
5360 goto out;
5361 }
5362 }
5363
b0d623f7
A
5364 AUDIT_ARG(data, result, sizeof(errno_t), desc_actual);
5365
91447636 5366 /* copy out results */
2d21ac55 5367 error = copyout(result, uap->results, desc_actual * sizeof(errno_t));
39037602 5368
91447636 5369out:
2d21ac55 5370 if (input && input != stack_input)
91447636
A
5371 FREE(input, M_TEMP);
5372 if (result)
5373 FREE(result, M_TEMP);
5374 if (vp)
5375 vnode_put(vp);
5376 if (dvp)
5377 vnode_put(dvp);
0c530ab8
A
5378 if (IS_VALID_CRED(context.vc_ucred))
5379 kauth_cred_unref(&context.vc_ucred);
91447636 5380 return(error);
1c79356b
A
5381}
5382
2d21ac55
A
5383
5384/*
5385 * Returns: 0 Success
5386 * namei:EFAULT Bad address
5387 * namei:ENAMETOOLONG Filename too long
5388 * namei:ENOENT No such file or directory
5389 * namei:ELOOP Too many levels of symbolic links
5390 * namei:EBADF Bad file descriptor
5391 * namei:ENOTDIR Not a directory
5392 * namei:???
5393 * access1:
5394 */
fe8ab488
A
5395static int
5396faccessat_internal(vfs_context_t ctx, int fd, user_addr_t path, int amode,
5397 int flag, enum uio_seg segflg)
1c79356b 5398{
1c79356b
A
5399 int error;
5400 struct nameidata nd;
91447636
A
5401 int niopts;
5402 struct vfs_context context;
cf7d32b8
A
5403#if NAMEDRSRCFORK
5404 int is_namedstream = 0;
5405#endif
5406
91447636 5407 /*
fe8ab488
A
5408 * Unless the AT_EACCESS option is used, Access is defined as checking
5409 * against the process' real identity, even if operations are checking
5410 * the effective identity. So we need to tweak the credential
5411 * in the context for that case.
91447636 5412 */
fe8ab488
A
5413 if (!(flag & AT_EACCESS))
5414 context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
5415 else
5416 context.vc_ucred = ctx->vc_ucred;
5417 context.vc_thread = ctx->vc_thread;
5418
91447636
A
5419
5420 niopts = FOLLOW | AUDITVNPATH1;
5421 /* need parent for vnode_authorize for deletion test */
fe8ab488 5422 if (amode & _DELETE_OK)
91447636 5423 niopts |= WANTPARENT;
fe8ab488
A
5424 NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, segflg,
5425 path, &context);
2d21ac55
A
5426
5427#if NAMEDRSRCFORK
5428 /* access(F_OK) calls are allowed for resource forks. */
fe8ab488 5429 if (amode == F_OK)
2d21ac55
A
5430 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
5431#endif
fe8ab488 5432 error = nameiat(&nd, fd);
91447636
A
5433 if (error)
5434 goto out;
5435
cf7d32b8 5436#if NAMEDRSRCFORK
39037602 5437 /* Grab reference on the shadow stream file vnode to
b0d623f7
A
5438 * force an inactive on release which will mark it
5439 * for recycle.
cf7d32b8
A
5440 */
5441 if (vnode_isnamedstream(nd.ni_vp) &&
b0d623f7
A
5442 (nd.ni_vp->v_parent != NULLVP) &&
5443 vnode_isshadow(nd.ni_vp)) {
cf7d32b8
A
5444 is_namedstream = 1;
5445 vnode_ref(nd.ni_vp);
5446 }
5447#endif
5448
fe8ab488 5449 error = access1(nd.ni_vp, nd.ni_dvp, amode, &context);
b0d623f7 5450
cf7d32b8
A
5451#if NAMEDRSRCFORK
5452 if (is_namedstream) {
5453 vnode_rele(nd.ni_vp);
5454 }
5455#endif
5456
91447636 5457 vnode_put(nd.ni_vp);
fe8ab488 5458 if (amode & _DELETE_OK)
91447636
A
5459 vnode_put(nd.ni_dvp);
5460 nameidone(&nd);
39037602 5461
91447636 5462out:
fe8ab488
A
5463 if (!(flag & AT_EACCESS))
5464 kauth_cred_unref(&context.vc_ucred);
5465 return (error);
5466}
5467
5468int
5469access(__unused proc_t p, struct access_args *uap, __unused int32_t *retval)
5470{
5471 return (faccessat_internal(vfs_context_current(), AT_FDCWD,
5472 uap->path, uap->flags, 0, UIO_USERSPACE));
91447636
A
5473}
5474
fe8ab488
A
5475int
5476faccessat(__unused proc_t p, struct faccessat_args *uap,
5477 __unused int32_t *retval)
5478{
5479 if (uap->flag & ~AT_EACCESS)
5480 return (EINVAL);
5481
5482 return (faccessat_internal(vfs_context_current(), uap->fd,
5483 uap->path, uap->amode, uap->flag, UIO_USERSPACE));
5484}
91447636 5485
2d21ac55
A
5486/*
5487 * Returns: 0 Success
5488 * EFAULT
5489 * copyout:EFAULT
5490 * namei:???
5491 * vn_stat:???
5492 */
91447636 5493static int
fe8ab488
A
5494fstatat_internal(vfs_context_t ctx, user_addr_t path, user_addr_t ub,
5495 user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64,
5496 enum uio_seg segflg, int fd, int flag)
91447636 5497{
fe8ab488
A
5498 struct nameidata nd;
5499 int follow;
b0d623f7
A
5500 union {
5501 struct stat sb;
5502 struct stat64 sb64;
5503 } source;
5504 union {
5505 struct user64_stat user64_sb;
5506 struct user32_stat user32_sb;
5507 struct user64_stat64 user64_sb64;
5508 struct user32_stat64 user32_sb64;
5509 } dest;
91447636
A
5510 caddr_t sbp;
5511 int error, my_size;
5512 kauth_filesec_t fsec;
5513 size_t xsecurity_bufsize;
2d21ac55 5514 void * statptr;
1c79356b 5515
fe8ab488
A
5516 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
5517 NDINIT(&nd, LOOKUP, OP_GETATTR, follow | AUDITVNPATH1,
5518 segflg, path, ctx);
5519
2d21ac55 5520#if NAMEDRSRCFORK
cf7d32b8 5521 int is_namedstream = 0;
2d21ac55 5522 /* stat calls are allowed for resource forks. */
fe8ab488 5523 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
2d21ac55 5524#endif
fe8ab488 5525 error = nameiat(&nd, fd);
91447636 5526 if (error)
1c79356b 5527 return (error);
91447636 5528 fsec = KAUTH_FILESEC_NONE;
b0d623f7
A
5529
5530 statptr = (void *)&source;
cf7d32b8
A
5531
5532#if NAMEDRSRCFORK
39037602
A
5533 /* Grab reference on the shadow stream file vnode to
5534 * force an inactive on release which will mark it
b0d623f7 5535 * for recycle.
cf7d32b8 5536 */
fe8ab488
A
5537 if (vnode_isnamedstream(nd.ni_vp) &&
5538 (nd.ni_vp->v_parent != NULLVP) &&
5539 vnode_isshadow(nd.ni_vp)) {
cf7d32b8 5540 is_namedstream = 1;
fe8ab488 5541 vnode_ref(nd.ni_vp);
cf7d32b8
A
5542 }
5543#endif
5544
fe8ab488 5545 error = vn_stat(nd.ni_vp, statptr, (xsecurity != USER_ADDR_NULL ? &fsec : NULL), isstat64, ctx);
2d21ac55
A
5546
5547#if NAMEDRSRCFORK
cf7d32b8 5548 if (is_namedstream) {
fe8ab488 5549 vnode_rele(nd.ni_vp);
2d21ac55
A
5550 }
5551#endif
fe8ab488
A
5552 vnode_put(nd.ni_vp);
5553 nameidone(&nd);
91447636 5554
1c79356b
A
5555 if (error)
5556 return (error);
91447636 5557 /* Zap spare fields */
2d21ac55 5558 if (isstat64 != 0) {
b0d623f7
A
5559 source.sb64.st_lspare = 0;
5560 source.sb64.st_qspare[0] = 0LL;
5561 source.sb64.st_qspare[1] = 0LL;
2d21ac55 5562 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
39037602 5563 munge_user64_stat64(&source.sb64, &dest.user64_sb64);
b0d623f7
A
5564 my_size = sizeof(dest.user64_sb64);
5565 sbp = (caddr_t)&dest.user64_sb64;
2d21ac55 5566 } else {
39037602 5567 munge_user32_stat64(&source.sb64, &dest.user32_sb64);
b0d623f7
A
5568 my_size = sizeof(dest.user32_sb64);
5569 sbp = (caddr_t)&dest.user32_sb64;
2d21ac55
A
5570 }
5571 /*
5572 * Check if we raced (post lookup) against the last unlink of a file.
5573 */
b0d623f7
A
5574 if ((source.sb64.st_nlink == 0) && S_ISREG(source.sb64.st_mode)) {
5575 source.sb64.st_nlink = 1;
2d21ac55
A
5576 }
5577 } else {
b0d623f7
A
5578 source.sb.st_lspare = 0;
5579 source.sb.st_qspare[0] = 0LL;
5580 source.sb.st_qspare[1] = 0LL;
2d21ac55 5581 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
39037602 5582 munge_user64_stat(&source.sb, &dest.user64_sb);
b0d623f7
A
5583 my_size = sizeof(dest.user64_sb);
5584 sbp = (caddr_t)&dest.user64_sb;
2d21ac55 5585 } else {
39037602 5586 munge_user32_stat(&source.sb, &dest.user32_sb);
b0d623f7
A
5587 my_size = sizeof(dest.user32_sb);
5588 sbp = (caddr_t)&dest.user32_sb;
2d21ac55
A
5589 }
5590
5591 /*
5592 * Check if we raced (post lookup) against the last unlink of a file.
5593 */
b0d623f7
A
5594 if ((source.sb.st_nlink == 0) && S_ISREG(source.sb.st_mode)) {
5595 source.sb.st_nlink = 1;
2d21ac55 5596 }
91447636
A
5597 }
5598 if ((error = copyout(sbp, ub, my_size)) != 0)
5599 goto out;
5600
5601 /* caller wants extended security information? */
5602 if (xsecurity != USER_ADDR_NULL) {
5603
5604 /* did we get any? */
5605 if (fsec == KAUTH_FILESEC_NONE) {
5606 if (susize(xsecurity_size, 0) != 0) {
5607 error = EFAULT;
5608 goto out;
5609 }
5610 } else {
5611 /* find the user buffer size */
5612 xsecurity_bufsize = fusize(xsecurity_size);
5613
5614 /* copy out the actual data size */
5615 if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
5616 error = EFAULT;
5617 goto out;
5618 }
5619
5620 /* if the caller supplied enough room, copy out to it */
5621 if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec))
5622 error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
5623 }
5624 }
5625out:
5626 if (fsec != KAUTH_FILESEC_NONE)
5627 kauth_filesec_free(fsec);
1c79356b
A
5628 return (error);
5629}
5630
b0d623f7
A
5631/*
5632 * stat_extended: Get file status; with extended security (ACL).
5633 *
5634 * Parameters: p (ignored)
5635 * uap User argument descriptor (see below)
39037602 5636 * retval (ignored)
b0d623f7
A
5637 *
5638 * Indirect: uap->path Path of file to get status from
5639 * uap->ub User buffer (holds file status info)
5640 * uap->xsecurity ACL to get (extended security)
5641 * uap->xsecurity_size Size of ACL
39037602 5642 *
b0d623f7
A
5643 * Returns: 0 Success
5644 * !0 errno value
5645 *
5646 */
2d21ac55 5647int
fe8ab488
A
5648stat_extended(__unused proc_t p, struct stat_extended_args *uap,
5649 __unused int32_t *retval)
2d21ac55 5650{
fe8ab488
A
5651 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5652 uap->xsecurity, uap->xsecurity_size, 0, UIO_USERSPACE, AT_FDCWD,
5653 0));
1c79356b
A
5654}
5655
2d21ac55
A
5656/*
5657 * Returns: 0 Success
fe8ab488 5658 * fstatat_internal:??? [see fstatat_internal() in this file]
2d21ac55 5659 */
91447636 5660int
b0d623f7 5661stat(__unused proc_t p, struct stat_args *uap, __unused int32_t *retval)
1c79356b 5662{
fe8ab488
A
5663 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5664 0, 0, 0, UIO_USERSPACE, AT_FDCWD, 0));
91447636 5665}
1c79356b 5666
91447636 5667int
b0d623f7 5668stat64(__unused proc_t p, struct stat64_args *uap, __unused int32_t *retval)
91447636 5669{
fe8ab488
A
5670 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5671 0, 0, 1, UIO_USERSPACE, AT_FDCWD, 0));
1c79356b 5672}
1c79356b 5673
b0d623f7
A
5674/*
5675 * stat64_extended: Get file status; can handle large inode numbers; with extended security (ACL).
5676 *
5677 * Parameters: p (ignored)
5678 * uap User argument descriptor (see below)
39037602 5679 * retval (ignored)
b0d623f7
A
5680 *
5681 * Indirect: uap->path Path of file to get status from
5682 * uap->ub User buffer (holds file status info)
5683 * uap->xsecurity ACL to get (extended security)
5684 * uap->xsecurity_size Size of ACL
39037602 5685 *
b0d623f7
A
5686 * Returns: 0 Success
5687 * !0 errno value
5688 *
5689 */
2d21ac55 5690int
b0d623f7 5691stat64_extended(__unused proc_t p, struct stat64_extended_args *uap, __unused int32_t *retval)
2d21ac55 5692{
fe8ab488
A
5693 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5694 uap->xsecurity, uap->xsecurity_size, 1, UIO_USERSPACE, AT_FDCWD,
5695 0));
2d21ac55 5696}
91447636 5697
b0d623f7
A
5698/*
5699 * lstat_extended: Get file status; does not follow links; with extended security (ACL).
5700 *
5701 * Parameters: p (ignored)
5702 * uap User argument descriptor (see below)
39037602 5703 * retval (ignored)
b0d623f7
A
5704 *
5705 * Indirect: uap->path Path of file to get status from
5706 * uap->ub User buffer (holds file status info)
5707 * uap->xsecurity ACL to get (extended security)
5708 * uap->xsecurity_size Size of ACL
39037602 5709 *
b0d623f7
A
5710 * Returns: 0 Success
5711 * !0 errno value
5712 *
5713 */
2d21ac55 5714int
b0d623f7 5715lstat_extended(__unused proc_t p, struct lstat_extended_args *uap, __unused int32_t *retval)
2d21ac55 5716{
fe8ab488
A
5717 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5718 uap->xsecurity, uap->xsecurity_size, 0, UIO_USERSPACE, AT_FDCWD,
5719 AT_SYMLINK_NOFOLLOW));
91447636
A
5720}
5721
fe8ab488
A
5722/*
5723 * Get file status; this version does not follow links.
5724 */
91447636 5725int
b0d623f7 5726lstat(__unused proc_t p, struct lstat_args *uap, __unused int32_t *retval)
91447636 5727{
fe8ab488
A
5728 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5729 0, 0, 0, UIO_USERSPACE, AT_FDCWD, AT_SYMLINK_NOFOLLOW));
2d21ac55 5730}
b0d623f7 5731
2d21ac55 5732int
b0d623f7 5733lstat64(__unused proc_t p, struct lstat64_args *uap, __unused int32_t *retval)
2d21ac55 5734{
fe8ab488
A
5735 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5736 0, 0, 1, UIO_USERSPACE, AT_FDCWD, AT_SYMLINK_NOFOLLOW));
91447636
A
5737}
5738
b0d623f7
A
5739/*
5740 * lstat64_extended: Get file status; can handle large inode numbers; does not
5741 * follow links; with extended security (ACL).
5742 *
5743 * Parameters: p (ignored)
5744 * uap User argument descriptor (see below)
39037602 5745 * retval (ignored)
b0d623f7
A
5746 *
5747 * Indirect: uap->path Path of file to get status from
5748 * uap->ub User buffer (holds file status info)
5749 * uap->xsecurity ACL to get (extended security)
5750 * uap->xsecurity_size Size of ACL
39037602 5751 *
b0d623f7
A
5752 * Returns: 0 Success
5753 * !0 errno value
5754 *
5755 */
91447636 5756int
b0d623f7 5757lstat64_extended(__unused proc_t p, struct lstat64_extended_args *uap, __unused int32_t *retval)
91447636 5758{
fe8ab488
A
5759 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5760 uap->xsecurity, uap->xsecurity_size, 1, UIO_USERSPACE, AT_FDCWD,
5761 AT_SYMLINK_NOFOLLOW));
5762}
5763
5764int
5765fstatat(__unused proc_t p, struct fstatat_args *uap, __unused int32_t *retval)
5766{
5767 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
5768 return (EINVAL);
5769
5770 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5771 0, 0, 0, UIO_USERSPACE, uap->fd, uap->flag));
5772}
5773
5774int
5775fstatat64(__unused proc_t p, struct fstatat64_args *uap,
5776 __unused int32_t *retval)
5777{
5778 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
5779 return (EINVAL);
5780
5781 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5782 0, 0, 1, UIO_USERSPACE, uap->fd, uap->flag));
91447636
A
5783}
5784
1c79356b 5785/*
91447636 5786 * Get configurable pathname variables.
2d21ac55
A
5787 *
5788 * Returns: 0 Success
5789 * namei:???
5790 * vn_pathconf:???
5791 *
5792 * Notes: Global implementation constants are intended to be
5793 * implemented in this function directly; all other constants
5794 * are per-FS implementation, and therefore must be handled in
5795 * each respective FS, instead.
5796 *
5797 * XXX We implement some things globally right now that should actually be
5798 * XXX per-FS; we will need to deal with this at some point.
1c79356b 5799 */
1c79356b
A
5800/* ARGSUSED */
5801int
b0d623f7 5802pathconf(__unused proc_t p, struct pathconf_args *uap, int32_t *retval)
1c79356b 5803{
1c79356b
A
5804 int error;
5805 struct nameidata nd;
2d21ac55 5806 vfs_context_t ctx = vfs_context_current();
91447636 5807
39037602 5808 NDINIT(&nd, LOOKUP, OP_PATHCONF, FOLLOW | AUDITVNPATH1,
2d21ac55 5809 UIO_USERSPACE, uap->path, ctx);
55e303ae
A
5810 error = namei(&nd);
5811 if (error)
1c79356b 5812 return (error);
1c79356b 5813
2d21ac55 5814 error = vn_pathconf(nd.ni_vp, uap->name, retval, ctx);
1c79356b 5815
91447636
A
5816 vnode_put(nd.ni_vp);
5817 nameidone(&nd);
1c79356b
A
5818 return (error);
5819}
5820
5821/*
5822 * Return target name of a symbolic link.
5823 */
1c79356b 5824/* ARGSUSED */
fe8ab488
A
5825static int
5826readlinkat_internal(vfs_context_t ctx, int fd, user_addr_t path,
5827 enum uio_seg seg, user_addr_t buf, size_t bufsize, enum uio_seg bufseg,
5828 int *retval)
1c79356b 5829{
2d21ac55 5830 vnode_t vp;
91447636 5831 uio_t auio;
1c79356b
A
5832 int error;
5833 struct nameidata nd;
91447636
A
5834 char uio_buf[ UIO_SIZEOF(1) ];
5835
fe8ab488
A
5836 NDINIT(&nd, LOOKUP, OP_READLINK, NOFOLLOW | AUDITVNPATH1,
5837 seg, path, ctx);
5838
5839 error = nameiat(&nd, fd);
55e303ae 5840 if (error)
1c79356b
A
5841 return (error);
5842 vp = nd.ni_vp;
91447636
A
5843
5844 nameidone(&nd);
5845
fe8ab488
A
5846 auio = uio_createwithbuffer(1, 0, bufseg, UIO_READ,
5847 &uio_buf[0], sizeof(uio_buf));
5848 uio_addiov(auio, buf, bufsize);
5849 if (vp->v_type != VLNK) {
1c79356b 5850 error = EINVAL;
fe8ab488 5851 } else {
2d21ac55 5852#if CONFIG_MACF
fe8ab488 5853 error = mac_vnode_check_readlink(ctx, vp);
2d21ac55
A
5854#endif
5855 if (error == 0)
fe8ab488
A
5856 error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA,
5857 ctx);
91447636 5858 if (error == 0)
2d21ac55 5859 error = VNOP_READLINK(vp, auio, ctx);
91447636
A
5860 }
5861 vnode_put(vp);
b0d623f7 5862
fe8ab488 5863 *retval = bufsize - (int)uio_resid(auio);
1c79356b
A
5864 return (error);
5865}
5866
fe8ab488
A
5867int
5868readlink(proc_t p, struct readlink_args *uap, int32_t *retval)
5869{
5870 enum uio_seg procseg;
5871
5872 procseg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
5873 return (readlinkat_internal(vfs_context_current(), AT_FDCWD,
5874 CAST_USER_ADDR_T(uap->path), procseg, CAST_USER_ADDR_T(uap->buf),
5875 uap->count, procseg, retval));
5876}
5877
5878int
5879readlinkat(proc_t p, struct readlinkat_args *uap, int32_t *retval)
5880{
5881 enum uio_seg procseg;
5882
5883 procseg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
5884 return (readlinkat_internal(vfs_context_current(), uap->fd, uap->path,
5885 procseg, uap->buf, uap->bufsize, procseg, retval));
5886}
5887
5888/*
5889 * Change file flags.
813fb2f6
A
5890 *
5891 * NOTE: this will vnode_put() `vp'
91447636
A
5892 */
5893static int
5894chflags1(vnode_t vp, int flags, vfs_context_t ctx)
5895{
5896 struct vnode_attr va;
5897 kauth_action_t action;
5898 int error;
5899
5900 VATTR_INIT(&va);
5901 VATTR_SET(&va, va_flags, flags);
5902
2d21ac55
A
5903#if CONFIG_MACF
5904 error = mac_vnode_check_setflags(ctx, vp, flags);
5905 if (error)
5906 goto out;
5907#endif
5908
91447636
A
5909 /* request authorisation, disregard immutability */
5910 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
5911 goto out;
5912 /*
5913 * Request that the auth layer disregard those file flags it's allowed to when
5914 * authorizing this operation; we need to do this in order to be able to
5915 * clear immutable flags.
5916 */
5917 if (action && ((error = vnode_authorize(vp, NULL, action | KAUTH_VNODE_NOIMMUTABLE, ctx)) != 0))
5918 goto out;
5919 error = vnode_setattr(vp, &va, ctx);
5920
39037602
A
5921#if CONFIG_MACF
5922 if (error == 0)
5923 mac_vnode_notify_setflags(ctx, vp, flags);
5924#endif
5925
2d21ac55
A
5926 if ((error == 0) && !VATTR_IS_SUPPORTED(&va, va_flags)) {
5927 error = ENOTSUP;
5928 }
91447636
A
5929out:
5930 vnode_put(vp);
5931 return(error);
5932}
5933
1c79356b
A
5934/*
5935 * Change flags of a file given a path name.
5936 */
1c79356b
A
5937/* ARGSUSED */
5938int
b0d623f7 5939chflags(__unused proc_t p, struct chflags_args *uap, __unused int32_t *retval)
1c79356b 5940{
2d21ac55
A
5941 vnode_t vp;
5942 vfs_context_t ctx = vfs_context_current();
1c79356b
A
5943 int error;
5944 struct nameidata nd;
5945
55e303ae 5946 AUDIT_ARG(fflags, uap->flags);
39037602 5947 NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
2d21ac55 5948 UIO_USERSPACE, uap->path, ctx);
55e303ae
A
5949 error = namei(&nd);
5950 if (error)
1c79356b
A
5951 return (error);
5952 vp = nd.ni_vp;
91447636
A
5953 nameidone(&nd);
5954
813fb2f6 5955 /* we don't vnode_put() here because chflags1 does internally */
2d21ac55 5956 error = chflags1(vp, uap->flags, ctx);
91447636
A
5957
5958 return(error);
1c79356b
A
5959}
5960
5961/*
5962 * Change flags of a file given a file descriptor.
5963 */
1c79356b
A
5964/* ARGSUSED */
5965int
b0d623f7 5966fchflags(__unused proc_t p, struct fchflags_args *uap, __unused int32_t *retval)
1c79356b 5967{
2d21ac55 5968 vnode_t vp;
1c79356b
A
5969 int error;
5970
55e303ae
A
5971 AUDIT_ARG(fd, uap->fd);
5972 AUDIT_ARG(fflags, uap->flags);
91447636 5973 if ( (error = file_vnode(uap->fd, &vp)) )
1c79356b 5974 return (error);
55e303ae 5975
91447636
A
5976 if ((error = vnode_getwithref(vp))) {
5977 file_drop(uap->fd);
5978 return(error);
5979 }
e5568f75
A
5980
5981 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5982
813fb2f6 5983 /* we don't vnode_put() here because chflags1 does internally */
2d21ac55 5984 error = chflags1(vp, uap->flags, vfs_context_current());
91447636
A
5985
5986 file_drop(uap->fd);
5987 return (error);
5988}
5989
5990/*
5991 * Change security information on a filesystem object.
2d21ac55
A
5992 *
5993 * Returns: 0 Success
5994 * EPERM Operation not permitted
5995 * vnode_authattr:??? [anything vnode_authattr can return]
5996 * vnode_authorize:??? [anything vnode_authorize can return]
5997 * vnode_setattr:??? [anything vnode_setattr can return]
5998 *
5999 * Notes: If vnode_authattr or vnode_authorize return EACCES, it will be
6000 * translated to EPERM before being returned.
91447636
A
6001 */
6002static int
fe8ab488 6003chmod_vnode(vfs_context_t ctx, vnode_t vp, struct vnode_attr *vap)
91447636
A
6004{
6005 kauth_action_t action;
6006 int error;
39037602 6007
b0d623f7
A
6008 AUDIT_ARG(mode, vap->va_mode);
6009 /* XXX audit new args */
91447636 6010
2d21ac55
A
6011#if NAMEDSTREAMS
6012 /* chmod calls are not allowed for resource forks. */
6013 if (vp->v_flag & VISNAMEDSTREAM) {
6014 return (EPERM);
6015 }
6016#endif
6017
6018#if CONFIG_MACF
316670eb
A
6019 if (VATTR_IS_ACTIVE(vap, va_mode) &&
6020 (error = mac_vnode_check_setmode(ctx, vp, (mode_t)vap->va_mode)) != 0)
2d21ac55 6021 return (error);
39037602
A
6022
6023 if (VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid)) {
6024 if ((error = mac_vnode_check_setowner(ctx, vp,
6025 VATTR_IS_ACTIVE(vap, va_uid) ? vap->va_uid : -1,
6026 VATTR_IS_ACTIVE(vap, va_gid) ? vap->va_gid : -1)))
6027 return (error);
6028 }
6029
6030 if (VATTR_IS_ACTIVE(vap, va_acl) &&
6031 (error = mac_vnode_check_setacl(ctx, vp, vap->va_acl)))
6032 return (error);
2d21ac55
A
6033#endif
6034
91447636
A
6035 /* make sure that the caller is allowed to set this security information */
6036 if (((error = vnode_authattr(vp, vap, &action, ctx)) != 0) ||
6037 ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
6038 if (error == EACCES)
6039 error = EPERM;
6040 return(error);
6041 }
39037602
A
6042
6043 if ((error = vnode_setattr(vp, vap, ctx)) != 0)
6044 return (error);
6045
6046#if CONFIG_MACF
6047 if (VATTR_IS_ACTIVE(vap, va_mode))
6048 mac_vnode_notify_setmode(ctx, vp, (mode_t)vap->va_mode);
6049
6050 if (VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid))
6051 mac_vnode_notify_setowner(ctx, vp,
6052 VATTR_IS_ACTIVE(vap, va_uid) ? vap->va_uid : -1,
6053 VATTR_IS_ACTIVE(vap, va_gid) ? vap->va_gid : -1);
6054
6055 if (VATTR_IS_ACTIVE(vap, va_acl))
6056 mac_vnode_notify_setacl(ctx, vp, vap->va_acl);
6057#endif
91447636 6058
1c79356b
A
6059 return (error);
6060}
6061
91447636 6062
1c79356b 6063/*
b0d623f7 6064 * Change mode of a file given a path name.
2d21ac55
A
6065 *
6066 * Returns: 0 Success
6067 * namei:??? [anything namei can return]
fe8ab488 6068 * chmod_vnode:??? [anything chmod_vnode can return]
1c79356b 6069 */
91447636 6070static int
fe8ab488
A
6071chmodat(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap,
6072 int fd, int flag, enum uio_seg segflg)
91447636
A
6073{
6074 struct nameidata nd;
fe8ab488 6075 int follow, error;
91447636 6076
fe8ab488
A
6077 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
6078 NDINIT(&nd, LOOKUP, OP_SETATTR, follow | AUDITVNPATH1,
6079 segflg, path, ctx);
6080 if ((error = nameiat(&nd, fd)))
91447636 6081 return (error);
fe8ab488 6082 error = chmod_vnode(ctx, nd.ni_vp, vap);
91447636
A
6083 vnode_put(nd.ni_vp);
6084 nameidone(&nd);
6085 return(error);
6086}
6087
0c530ab8 6088/*
39037602 6089 * chmod_extended: Change the mode of a file given a path name; with extended
b0d623f7 6090 * argument list (including extended security (ACL)).
0c530ab8
A
6091 *
6092 * Parameters: p Process requesting the open
6093 * uap User argument descriptor (see below)
6094 * retval (ignored)
6095 *
6096 * Indirect: uap->path Path to object (same as 'chmod')
6097 * uap->uid UID to set
6098 * uap->gid GID to set
6099 * uap->mode File mode to set (same as 'chmod')
6100 * uap->xsecurity ACL to set (or delete)
6101 *
6102 * Returns: 0 Success
6103 * !0 errno value
6104 *
6105 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
6106 *
6107 * XXX: We should enummerate the possible errno values here, and where
6108 * in the code they originated.
6109 */
1c79356b 6110int
b0d623f7 6111chmod_extended(__unused proc_t p, struct chmod_extended_args *uap, __unused int32_t *retval)
1c79356b 6112{
1c79356b 6113 int error;
91447636
A
6114 struct vnode_attr va;
6115 kauth_filesec_t xsecdst;
6116
b0d623f7
A
6117 AUDIT_ARG(owner, uap->uid, uap->gid);
6118
91447636
A
6119 VATTR_INIT(&va);
6120 if (uap->mode != -1)
6121 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
6122 if (uap->uid != KAUTH_UID_NONE)
6123 VATTR_SET(&va, va_uid, uap->uid);
6124 if (uap->gid != KAUTH_GID_NONE)
6125 VATTR_SET(&va, va_gid, uap->gid);
6126
6127 xsecdst = NULL;
6128 switch(uap->xsecurity) {
6129 /* explicit remove request */
6130 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
6131 VATTR_SET(&va, va_acl, NULL);
6132 break;
6133 /* not being set */
6134 case USER_ADDR_NULL:
6135 break;
6136 default:
6137 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
6138 return(error);
6139 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
6140 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va.va_acl->acl_entrycount);
6141 }
1c79356b 6142
fe8ab488
A
6143 error = chmodat(vfs_context_current(), uap->path, &va, AT_FDCWD, 0,
6144 UIO_USERSPACE);
55e303ae 6145
91447636
A
6146 if (xsecdst != NULL)
6147 kauth_filesec_free(xsecdst);
6148 return(error);
6149}
4a249263 6150
2d21ac55
A
6151/*
6152 * Returns: 0 Success
fe8ab488 6153 * chmodat:??? [anything chmodat can return]
2d21ac55 6154 */
fe8ab488
A
6155static int
6156fchmodat_internal(vfs_context_t ctx, user_addr_t path, int mode, int fd,
6157 int flag, enum uio_seg segflg)
91447636 6158{
91447636
A
6159 struct vnode_attr va;
6160
6161 VATTR_INIT(&va);
fe8ab488
A
6162 VATTR_SET(&va, va_mode, mode & ALLPERMS);
6163
6164 return (chmodat(ctx, path, &va, fd, flag, segflg));
6165}
6166
6167int
6168chmod(__unused proc_t p, struct chmod_args *uap, __unused int32_t *retval)
6169{
6170 return (fchmodat_internal(vfs_context_current(), uap->path, uap->mode,
6171 AT_FDCWD, 0, UIO_USERSPACE));
6172}
91447636 6173
fe8ab488
A
6174int
6175fchmodat(__unused proc_t p, struct fchmodat_args *uap, __unused int32_t *retval)
6176{
6177 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
6178 return (EINVAL);
6179
6180 return (fchmodat_internal(vfs_context_current(), uap->path, uap->mode,
6181 uap->fd, uap->flag, UIO_USERSPACE));
1c79356b
A
6182}
6183
6184/*
6185 * Change mode of a file given a file descriptor.
6186 */
91447636 6187static int
2d21ac55 6188fchmod1(__unused proc_t p, int fd, struct vnode_attr *vap)
1c79356b 6189{
2d21ac55 6190 vnode_t vp;
1c79356b 6191 int error;
55e303ae 6192
91447636 6193 AUDIT_ARG(fd, fd);
55e303ae 6194
91447636
A
6195 if ((error = file_vnode(fd, &vp)) != 0)
6196 return (error);
6197 if ((error = vnode_getwithref(vp)) != 0) {
6198 file_drop(fd);
6199 return(error);
6200 }
55e303ae
A
6201 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6202
fe8ab488 6203 error = chmod_vnode(vfs_context_current(), vp, vap);
91447636
A
6204 (void)vnode_put(vp);
6205 file_drop(fd);
55e303ae 6206
1c79356b
A
6207 return (error);
6208}
6209
b0d623f7
A
6210/*
6211 * fchmod_extended: Change mode of a file given a file descriptor; with
6212 * extended argument list (including extended security (ACL)).
6213 *
6214 * Parameters: p Process requesting to change file mode
6215 * uap User argument descriptor (see below)
39037602 6216 * retval (ignored)
b0d623f7
A
6217 *
6218 * Indirect: uap->mode File mode to set (same as 'chmod')
6219 * uap->uid UID to set
6220 * uap->gid GID to set
6221 * uap->xsecurity ACL to set (or delete)
6222 * uap->fd File descriptor of file to change mode
39037602 6223 *
b0d623f7
A
6224 * Returns: 0 Success
6225 * !0 errno value
6226 *
6227 */
91447636 6228int
b0d623f7 6229fchmod_extended(proc_t p, struct fchmod_extended_args *uap, __unused int32_t *retval)
91447636
A
6230{
6231 int error;
6232 struct vnode_attr va;
6233 kauth_filesec_t xsecdst;
6234
b0d623f7
A
6235 AUDIT_ARG(owner, uap->uid, uap->gid);
6236
91447636
A
6237 VATTR_INIT(&va);
6238 if (uap->mode != -1)
6239 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
6240 if (uap->uid != KAUTH_UID_NONE)
6241 VATTR_SET(&va, va_uid, uap->uid);
6242 if (uap->gid != KAUTH_GID_NONE)
6243 VATTR_SET(&va, va_gid, uap->gid);
6244
6245 xsecdst = NULL;
6246 switch(uap->xsecurity) {
6247 case USER_ADDR_NULL:
6248 VATTR_SET(&va, va_acl, NULL);
6249 break;
39236c6e
A
6250 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
6251 VATTR_SET(&va, va_acl, NULL);
6252 break;
6253 /* not being set */
91447636
A
6254 case CAST_USER_ADDR_T(-1):
6255 break;
6256 default:
6257 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
6258 return(error);
6259 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
6260 }
6261
6262 error = fchmod1(p, uap->fd, &va);
6263
39037602 6264
91447636
A
6265 switch(uap->xsecurity) {
6266 case USER_ADDR_NULL:
6267 case CAST_USER_ADDR_T(-1):
6268 break;
6269 default:
6270 if (xsecdst != NULL)
6271 kauth_filesec_free(xsecdst);
6272 }
6273 return(error);
6274}
6275
6276int
b0d623f7 6277fchmod(proc_t p, struct fchmod_args *uap, __unused int32_t *retval)
91447636
A
6278{
6279 struct vnode_attr va;
6280
6281 VATTR_INIT(&va);
6282 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
6283
6284 return(fchmod1(p, uap->fd, &va));
6285}
6286
6287
1c79356b
A
6288/*
6289 * Set ownership given a path name.
6290 */
1c79356b 6291/* ARGSUSED */
91447636 6292static int
fe8ab488
A
6293fchownat_internal(vfs_context_t ctx, int fd, user_addr_t path, uid_t uid,
6294 gid_t gid, int flag, enum uio_seg segflg)
1c79356b 6295{
2d21ac55 6296 vnode_t vp;
91447636 6297 struct vnode_attr va;
1c79356b
A
6298 int error;
6299 struct nameidata nd;
fe8ab488 6300 int follow;
91447636 6301 kauth_action_t action;
1c79356b 6302
fe8ab488 6303 AUDIT_ARG(owner, uid, gid);
55e303ae 6304
fe8ab488
A
6305 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
6306 NDINIT(&nd, LOOKUP, OP_SETATTR, follow | AUDITVNPATH1, segflg,
6307 path, ctx);
6308 error = nameiat(&nd, fd);
55e303ae 6309 if (error)
1c79356b
A
6310 return (error);
6311 vp = nd.ni_vp;
6312
91447636
A
6313 nameidone(&nd);
6314
91447636 6315 VATTR_INIT(&va);
fe8ab488
A
6316 if (uid != (uid_t)VNOVAL)
6317 VATTR_SET(&va, va_uid, uid);
6318 if (gid != (gid_t)VNOVAL)
6319 VATTR_SET(&va, va_gid, gid);
91447636 6320
2d21ac55 6321#if CONFIG_MACF
fe8ab488 6322 error = mac_vnode_check_setowner(ctx, vp, uid, gid);
2d21ac55
A
6323 if (error)
6324 goto out;
6325#endif
6326
91447636
A
6327 /* preflight and authorize attribute changes */
6328 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
6329 goto out;
6330 if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
6331 goto out;
6332 error = vnode_setattr(vp, &va, ctx);
39037602
A
6333
6334#if CONFIG_MACF
6335 if (error == 0)
6336 mac_vnode_notify_setowner(ctx, vp, uid, gid);
6337#endif
6338
91447636
A
6339out:
6340 /*
6341 * EACCES is only allowed from namei(); permissions failure should
6342 * return EPERM, so we need to translate the error code.
6343 */
6344 if (error == EACCES)
6345 error = EPERM;
fe8ab488 6346
91447636 6347 vnode_put(vp);
1c79356b
A
6348 return (error);
6349}
6350
91447636 6351int
fe8ab488 6352chown(__unused proc_t p, struct chown_args *uap, __unused int32_t *retval)
91447636 6353{
fe8ab488
A
6354 return (fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path,
6355 uap->uid, uap->gid, 0, UIO_USERSPACE));
91447636
A
6356}
6357
6358int
fe8ab488 6359lchown(__unused proc_t p, struct lchown_args *uap, __unused int32_t *retval)
91447636 6360{
fe8ab488
A
6361 return (fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path,
6362 uap->owner, uap->group, AT_SYMLINK_NOFOLLOW, UIO_USERSPACE));
6363}
6364
6365int
6366fchownat(__unused proc_t p, struct fchownat_args *uap, __unused int32_t *retval)
6367{
6368 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
6369 return (EINVAL);
6370
6371 return (fchownat_internal(vfs_context_current(), uap->fd, uap->path,
6372 uap->uid, uap->gid, uap->flag, UIO_USERSPACE));
91447636
A
6373}
6374
1c79356b
A
6375/*
6376 * Set ownership given a file descriptor.
6377 */
1c79356b
A
6378/* ARGSUSED */
6379int
b0d623f7 6380fchown(__unused proc_t p, struct fchown_args *uap, __unused int32_t *retval)
1c79356b 6381{
91447636 6382 struct vnode_attr va;
2d21ac55
A
6383 vfs_context_t ctx = vfs_context_current();
6384 vnode_t vp;
1c79356b 6385 int error;
91447636 6386 kauth_action_t action;
1c79356b 6387
55e303ae
A
6388 AUDIT_ARG(owner, uap->uid, uap->gid);
6389 AUDIT_ARG(fd, uap->fd);
6390
91447636 6391 if ( (error = file_vnode(uap->fd, &vp)) )
1c79356b 6392 return (error);
55e303ae 6393
91447636
A
6394 if ( (error = vnode_getwithref(vp)) ) {
6395 file_drop(uap->fd);
6396 return(error);
6397 }
55e303ae
A
6398 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6399
91447636
A
6400 VATTR_INIT(&va);
6401 if (uap->uid != VNOVAL)
6402 VATTR_SET(&va, va_uid, uap->uid);
6403 if (uap->gid != VNOVAL)
6404 VATTR_SET(&va, va_gid, uap->gid);
6405
2d21ac55
A
6406#if NAMEDSTREAMS
6407 /* chown calls are not allowed for resource forks. */
6408 if (vp->v_flag & VISNAMEDSTREAM) {
6409 error = EPERM;
6410 goto out;
6411 }
6412#endif
6413
6414#if CONFIG_MACF
6415 error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
6416 if (error)
6417 goto out;
6418#endif
91447636
A
6419
6420 /* preflight and authorize attribute changes */
2d21ac55 6421 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
91447636 6422 goto out;
2d21ac55 6423 if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
91447636
A
6424 if (error == EACCES)
6425 error = EPERM;
6426 goto out;
6427 }
2d21ac55 6428 error = vnode_setattr(vp, &va, ctx);
4a249263 6429
39037602
A
6430#if CONFIG_MACF
6431 if (error == 0)
6432 mac_vnode_notify_setowner(ctx, vp, uap->uid, uap->gid);
6433#endif
6434
91447636
A
6435out:
6436 (void)vnode_put(vp);
6437 file_drop(uap->fd);
1c79356b
A
6438 return (error);
6439}
6440
9bccf70c 6441static int
2d21ac55 6442getutimes(user_addr_t usrtvp, struct timespec *tsp)
9bccf70c 6443{
9bccf70c
A
6444 int error;
6445
91447636
A
6446 if (usrtvp == USER_ADDR_NULL) {
6447 struct timeval old_tv;
6448 /* XXX Y2038 bug because of microtime argument */
6449 microtime(&old_tv);
6450 TIMEVAL_TO_TIMESPEC(&old_tv, &tsp[0]);
9bccf70c
A
6451 tsp[1] = tsp[0];
6452 } else {
91447636 6453 if (IS_64BIT_PROCESS(current_proc())) {
b0d623f7 6454 struct user64_timeval tv[2];
91447636 6455 error = copyin(usrtvp, (void *)tv, sizeof(tv));
b0d623f7
A
6456 if (error)
6457 return (error);
6458 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
6459 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
91447636 6460 } else {
b0d623f7
A
6461 struct user32_timeval tv[2];
6462 error = copyin(usrtvp, (void *)tv, sizeof(tv));
6463 if (error)
6464 return (error);
6465 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
6466 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
91447636 6467 }
9bccf70c
A
6468 }
6469 return 0;
6470}
6471
6472static int
2d21ac55 6473setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts,
91447636 6474 int nullflag)
9bccf70c
A
6475{
6476 int error;
91447636
A
6477 struct vnode_attr va;
6478 kauth_action_t action;
e5568f75
A
6479
6480 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6481
91447636
A
6482 VATTR_INIT(&va);
6483 VATTR_SET(&va, va_access_time, ts[0]);
6484 VATTR_SET(&va, va_modify_time, ts[1]);
9bccf70c 6485 if (nullflag)
91447636
A
6486 va.va_vaflags |= VA_UTIMES_NULL;
6487
2d21ac55
A
6488#if NAMEDSTREAMS
6489 /* utimes calls are not allowed for resource forks. */
6490 if (vp->v_flag & VISNAMEDSTREAM) {
6491 error = EPERM;
6492 goto out;
6493 }
6494#endif
6495
6496#if CONFIG_MACF
6497 error = mac_vnode_check_setutimes(ctx, vp, ts[0], ts[1]);
6498 if (error)
6499 goto out;
6500#endif
6501 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
6502 if (!nullflag && error == EACCES)
6503 error = EPERM;
91447636 6504 goto out;
2d21ac55
A
6505 }
6506
91447636 6507 /* since we may not need to auth anything, check here */
2d21ac55
A
6508 if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
6509 if (!nullflag && error == EACCES)
6510 error = EPERM;
91447636 6511 goto out;
2d21ac55 6512 }
91447636 6513 error = vnode_setattr(vp, &va, ctx);
4a249263 6514
39037602
A
6515#if CONFIG_MACF
6516 if (error == 0)
6517 mac_vnode_notify_setutimes(ctx, vp, ts[0], ts[1]);
6518#endif
6519
9bccf70c
A
6520out:
6521 return error;
6522}
6523
1c79356b
A
6524/*
6525 * Set the access and modification times of a file.
6526 */
1c79356b
A
6527/* ARGSUSED */
6528int
b0d623f7 6529utimes(__unused proc_t p, struct utimes_args *uap, __unused int32_t *retval)
1c79356b 6530{
9bccf70c 6531 struct timespec ts[2];
91447636 6532 user_addr_t usrtvp;
1c79356b
A
6533 int error;
6534 struct nameidata nd;
2d21ac55 6535 vfs_context_t ctx = vfs_context_current();
1c79356b 6536
2d21ac55 6537 /*
39037602 6538 * AUDIT: Needed to change the order of operations to do the
55e303ae
A
6539 * name lookup first because auditing wants the path.
6540 */
39037602 6541 NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
2d21ac55 6542 UIO_USERSPACE, uap->path, ctx);
55e303ae
A
6543 error = namei(&nd);
6544 if (error)
9bccf70c 6545 return (error);
91447636 6546 nameidone(&nd);
55e303ae 6547
91447636
A
6548 /*
6549 * Fetch the user-supplied time. If usrtvp is USER_ADDR_NULL, we fetch
6550 * the current time instead.
6551 */
55e303ae 6552 usrtvp = uap->tptr;
91447636
A
6553 if ((error = getutimes(usrtvp, ts)) != 0)
6554 goto out;
6555
2d21ac55 6556 error = setutimes(ctx, nd.ni_vp, ts, usrtvp == USER_ADDR_NULL);
91447636
A
6557
6558out:
6559 vnode_put(nd.ni_vp);
1c79356b
A
6560 return (error);
6561}
6562
9bccf70c
A
6563/*
6564 * Set the access and modification times of a file.
6565 */
9bccf70c
A
6566/* ARGSUSED */
6567int
b0d623f7 6568futimes(__unused proc_t p, struct futimes_args *uap, __unused int32_t *retval)
9bccf70c
A
6569{
6570 struct timespec ts[2];
2d21ac55 6571 vnode_t vp;
91447636 6572 user_addr_t usrtvp;
9bccf70c
A
6573 int error;
6574
55e303ae 6575 AUDIT_ARG(fd, uap->fd);
9bccf70c
A
6576 usrtvp = uap->tptr;
6577 if ((error = getutimes(usrtvp, ts)) != 0)
6578 return (error);
91447636 6579 if ((error = file_vnode(uap->fd, &vp)) != 0)
9bccf70c 6580 return (error);
91447636
A
6581 if((error = vnode_getwithref(vp))) {
6582 file_drop(uap->fd);
6583 return(error);
6584 }
55e303ae 6585
2d21ac55 6586 error = setutimes(vfs_context_current(), vp, ts, usrtvp == 0);
91447636
A
6587 vnode_put(vp);
6588 file_drop(uap->fd);
6589 return(error);
9bccf70c
A
6590}
6591
1c79356b
A
6592/*
6593 * Truncate a file given its path name.
6594 */
1c79356b
A
6595/* ARGSUSED */
6596int
b0d623f7 6597truncate(__unused proc_t p, struct truncate_args *uap, __unused int32_t *retval)
1c79356b 6598{
2d21ac55 6599 vnode_t vp;
91447636 6600 struct vnode_attr va;
2d21ac55 6601 vfs_context_t ctx = vfs_context_current();
1c79356b
A
6602 int error;
6603 struct nameidata nd;
91447636
A
6604 kauth_action_t action;
6605
0b4e3aa0
A
6606 if (uap->length < 0)
6607 return(EINVAL);
39037602 6608 NDINIT(&nd, LOOKUP, OP_TRUNCATE, FOLLOW | AUDITVNPATH1,
2d21ac55 6609 UIO_USERSPACE, uap->path, ctx);
91447636 6610 if ((error = namei(&nd)))
1c79356b
A
6611 return (error);
6612 vp = nd.ni_vp;
91447636
A
6613
6614 nameidone(&nd);
6615
6616 VATTR_INIT(&va);
6617 VATTR_SET(&va, va_data_size, uap->length);
2d21ac55
A
6618
6619#if CONFIG_MACF
6620 error = mac_vnode_check_truncate(ctx, NOCRED, vp);
6621 if (error)
6622 goto out;
6623#endif
6624
6625 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
91447636 6626 goto out;
2d21ac55 6627 if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
91447636 6628 goto out;
2d21ac55 6629 error = vnode_setattr(vp, &va, ctx);
39037602
A
6630
6631#if CONFIG_MACF
6632 if (error == 0)
6633 mac_vnode_notify_truncate(ctx, NOCRED, vp);
6634#endif
6635
91447636
A
6636out:
6637 vnode_put(vp);
1c79356b
A
6638 return (error);
6639}
6640
6641/*
6642 * Truncate a file given a file descriptor.
6643 */
1c79356b
A
6644/* ARGSUSED */
6645int
b0d623f7 6646ftruncate(proc_t p, struct ftruncate_args *uap, int32_t *retval)
1c79356b 6647{
2d21ac55 6648 vfs_context_t ctx = vfs_context_current();
91447636 6649 struct vnode_attr va;
2d21ac55 6650 vnode_t vp;
91447636
A
6651 struct fileproc *fp;
6652 int error ;
6653 int fd = uap->fd;
1c79356b 6654
55e303ae 6655 AUDIT_ARG(fd, uap->fd);
0b4e3aa0
A
6656 if (uap->length < 0)
6657 return(EINVAL);
39037602 6658
91447636
A
6659 if ( (error = fp_lookup(p,fd,&fp,0)) ) {
6660 return(error);
6661 }
1c79356b 6662
39236c6e
A
6663 switch (FILEGLOB_DTYPE(fp->f_fglob)) {
6664 case DTYPE_PSXSHM:
91447636
A
6665 error = pshm_truncate(p, fp, uap->fd, uap->length, retval);
6666 goto out;
39236c6e
A
6667 case DTYPE_VNODE:
6668 break;
6669 default:
91447636
A
6670 error = EINVAL;
6671 goto out;
1c79356b 6672 }
1c79356b 6673
2d21ac55 6674 vp = (vnode_t)fp->f_fglob->fg_data;
e5568f75 6675
91447636
A
6676 if ((fp->f_fglob->fg_flag & FWRITE) == 0) {
6677 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
6678 error = EINVAL;
6679 goto out;
1c79356b 6680 }
1c79356b 6681
91447636
A
6682 if ((error = vnode_getwithref(vp)) != 0) {
6683 goto out;
6684 }
1c79356b 6685
91447636 6686 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
1c79356b 6687
2d21ac55
A
6688#if CONFIG_MACF
6689 error = mac_vnode_check_truncate(ctx,
6690 fp->f_fglob->fg_cred, vp);
6691 if (error) {
6692 (void)vnode_put(vp);
6693 goto out;
6694 }
6695#endif
91447636
A
6696 VATTR_INIT(&va);
6697 VATTR_SET(&va, va_data_size, uap->length);
2d21ac55 6698 error = vnode_setattr(vp, &va, ctx);
39037602
A
6699
6700#if CONFIG_MACF
6701 if (error == 0)
6702 mac_vnode_notify_truncate(ctx, fp->f_fglob->fg_cred, vp);
6703#endif
6704
91447636
A
6705 (void)vnode_put(vp);
6706out:
6707 file_drop(fd);
6708 return (error);
1c79356b 6709}
91447636 6710
1c79356b
A
6711
6712/*
b0d623f7 6713 * Sync an open file with synchronized I/O _file_ integrity completion
1c79356b 6714 */
1c79356b
A
6715/* ARGSUSED */
6716int
b0d623f7 6717fsync(proc_t p, struct fsync_args *uap, __unused int32_t *retval)
1c79356b 6718{
2d21ac55 6719 __pthread_testcancel(1);
b0d623f7
A
6720 return(fsync_common(p, uap, MNT_WAIT));
6721}
6722
6723
6724/*
6725 * Sync an open file with synchronized I/O _file_ integrity completion
6726 *
6727 * Notes: This is a legacy support function that does not test for
6728 * thread cancellation points.
6729 */
6730/* ARGSUSED */
39037602 6731int
b0d623f7
A
6732fsync_nocancel(proc_t p, struct fsync_nocancel_args *uap, __unused int32_t *retval)
6733{
6734 return(fsync_common(p, (struct fsync_args *)uap, MNT_WAIT));
2d21ac55
A
6735}
6736
b0d623f7
A
6737
6738/*
6739 * Sync an open file with synchronized I/O _data_ integrity completion
6740 */
6741/* ARGSUSED */
2d21ac55 6742int
b0d623f7
A
6743fdatasync(proc_t p, struct fdatasync_args *uap, __unused int32_t *retval)
6744{
6745 __pthread_testcancel(1);
6746 return(fsync_common(p, (struct fsync_args *)uap, MNT_DWAIT));
6747}
6748
6749
6750/*
6751 * fsync_common
6752 *
6753 * Common fsync code to support both synchronized I/O file integrity completion
6754 * (normal fsync) and synchronized I/O data integrity completion (fdatasync).
6755 *
6756 * If 'flags' is MNT_DWAIT, the caller is requesting data integrity, which
6757 * will only guarantee that the file data contents are retrievable. If
6758 * 'flags' is MNT_WAIT, the caller is rewuesting file integrity, which also
6759 * includes additional metadata unnecessary for retrieving the file data
6760 * contents, such as atime, mtime, ctime, etc., also be committed to stable
6761 * storage.
6762 *
6763 * Parameters: p The process
6764 * uap->fd The descriptor to synchronize
6765 * flags The data integrity flags
6766 *
6767 * Returns: int Success
6768 * fp_getfvp:EBADF Bad file descriptor
6769 * fp_getfvp:ENOTSUP fd does not refer to a vnode
6770 * VNOP_FSYNC:??? unspecified
6771 *
6772 * Notes: We use struct fsync_args because it is a short name, and all
6773 * caller argument structures are otherwise identical.
6774 */
6775static int
6776fsync_common(proc_t p, struct fsync_args *uap, int flags)
2d21ac55
A
6777{
6778 vnode_t vp;
91447636 6779 struct fileproc *fp;
2d21ac55 6780 vfs_context_t ctx = vfs_context_current();
1c79356b
A
6781 int error;
6782
b0d623f7
A
6783 AUDIT_ARG(fd, uap->fd);
6784
91447636 6785 if ( (error = fp_getfvp(p, uap->fd, &fp, &vp)) )
1c79356b 6786 return (error);
91447636
A
6787 if ( (error = vnode_getwithref(vp)) ) {
6788 file_drop(uap->fd);
6789 return(error);
6790 }
91447636 6791
b0d623f7
A
6792 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6793
6794 error = VNOP_FSYNC(vp, flags, ctx);
2d21ac55
A
6795
6796#if NAMEDRSRCFORK
6797 /* Sync resource fork shadow file if necessary. */
6798 if ((error == 0) &&
39037602 6799 (vp->v_flag & VISNAMEDSTREAM) &&
2d21ac55 6800 (vp->v_parent != NULLVP) &&
b0d623f7 6801 vnode_isshadow(vp) &&
2d21ac55
A
6802 (fp->f_flags & FP_WRITTEN)) {
6803 (void) vnode_flushnamedstream(vp->v_parent, vp, ctx);
6804 }
6805#endif
91447636
A
6806
6807 (void)vnode_put(vp);
6808 file_drop(uap->fd);
1c79356b
A
6809 return (error);
6810}
6811
6812/*
39037602 6813 * Duplicate files. Source must be a file, target must be a file or
1c79356b 6814 * must not exist.
91447636
A
6815 *
6816 * XXX Copyfile authorisation checking is woefully inadequate, and will not
6817 * perform inheritance correctly.
1c79356b 6818 */
1c79356b
A
6819/* ARGSUSED */
6820int
b0d623f7 6821copyfile(__unused proc_t p, struct copyfile_args *uap, __unused int32_t *retval)
1c79356b 6822{
91447636 6823 vnode_t tvp, fvp, tdvp, sdvp;
1c79356b
A
6824 struct nameidata fromnd, tond;
6825 int error;
2d21ac55 6826 vfs_context_t ctx = vfs_context_current();
39037602
A
6827#if CONFIG_MACF
6828 struct filedesc *fdp = (vfs_context_proc(ctx))->p_fd;
6829 struct vnode_attr va;
6830#endif
55e303ae
A
6831
6832 /* Check that the flags are valid. */
1c79356b
A
6833
6834 if (uap->flags & ~CPF_MASK) {
55e303ae
A
6835 return(EINVAL);
6836 }
1c79356b 6837
4bd07ac2 6838 NDINIT(&fromnd, LOOKUP, OP_COPYFILE, AUDITVNPATH1,
2d21ac55 6839 UIO_USERSPACE, uap->from, ctx);
91447636 6840 if ((error = namei(&fromnd)))
1c79356b
A
6841 return (error);
6842 fvp = fromnd.ni_vp;
6843
6d2010ae
A
6844 NDINIT(&tond, CREATE, OP_LINK,
6845 LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNPATH2 | CN_NBMOUNTLOOK,
6846 UIO_USERSPACE, uap->to, ctx);
91447636 6847 if ((error = namei(&tond))) {
1c79356b
A
6848 goto out1;
6849 }
6850 tdvp = tond.ni_dvp;
6851 tvp = tond.ni_vp;
91447636 6852
1c79356b
A
6853 if (tvp != NULL) {
6854 if (!(uap->flags & CPF_OVERWRITE)) {
6855 error = EEXIST;
6856 goto out;
6857 }
6858 }
39037602 6859
1c79356b
A
6860 if (fvp->v_type == VDIR || (tvp && tvp->v_type == VDIR)) {
6861 error = EISDIR;
6862 goto out;
6863 }
6864
39037602
A
6865 /* This calls existing MAC hooks for open */
6866 if ((error = vn_authorize_open_existing(fvp, &fromnd.ni_cnd, FREAD, ctx,
6867 NULL))) {
6868 goto out;
6869 }
6870
6871 if (tvp) {
6872 /*
6873 * See unlinkat_internal for an explanation of the potential
6874 * ENOENT from the MAC hook but the gist is that the MAC hook
6875 * can fail because vn_getpath isn't able to return the full
6876 * path. We choose to ignore this failure.
6877 */
6878 error = vn_authorize_unlink(tdvp, tvp, &tond.ni_cnd, ctx, NULL);
6879 if (error && error != ENOENT)
6880 goto out;
6881 error = 0;
6882 }
6883
6884#if CONFIG_MACF
6885 VATTR_INIT(&va);
6886 VATTR_SET(&va, va_type, fvp->v_type);
6887 /* Mask off all but regular access permissions */
6888 VATTR_SET(&va, va_mode,
6889 ((((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT) & ACCESSPERMS));
6890 error = mac_vnode_check_create(ctx, tdvp, &tond.ni_cnd, &va);
6891 if (error)
6892 goto out;
6893#endif /* CONFIG_MACF */
6894
2d21ac55 6895 if ((error = vnode_authorize(tdvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
1c79356b
A
6896 goto out;
6897
6898 if (fvp == tdvp)
6899 error = EINVAL;
6900 /*
6901 * If source is the same as the destination (that is the
6902 * same inode number) then there is nothing to do.
6903 * (fixed to have POSIX semantics - CSM 3/2/98)
6904 */
6905 if (fvp == tvp)
6906 error = -1;
91447636 6907 if (!error)
2d21ac55 6908 error = VNOP_COPYFILE(fvp, tdvp, tvp, &tond.ni_cnd, uap->mode, uap->flags, ctx);
1c79356b 6909out:
91447636
A
6910 sdvp = tond.ni_startdir;
6911 /*
6912 * nameidone has to happen before we vnode_put(tdvp)
6913 * since it may need to release the fs_nodelock on the tdvp
6914 */
6915 nameidone(&tond);
6916
6917 if (tvp)
6918 vnode_put(tvp);
6919 vnode_put(tdvp);
6920 vnode_put(sdvp);
1c79356b 6921out1:
91447636
A
6922 vnode_put(fvp);
6923
91447636
A
6924 nameidone(&fromnd);
6925
1c79356b
A
6926 if (error == -1)
6927 return (0);
6928 return (error);
6929}
6930
39037602 6931#define CLONE_SNAPSHOT_FALLBACKS_ENABLED 1
91447636 6932
1c79356b 6933/*
39037602
A
6934 * Helper function for doing clones. The caller is expected to provide an
6935 * iocounted source vnode and release it.
1c79356b 6936 */
fe8ab488 6937static int
39037602
A
6938clonefile_internal(vnode_t fvp, boolean_t data_read_authorised, int dst_dirfd,
6939 user_addr_t dst, uint32_t flags, vfs_context_t ctx)
1c79356b 6940{
91447636 6941 vnode_t tvp, tdvp;
39037602 6942 struct nameidata tond;
1c79356b 6943 int error;
39037602 6944 int follow;
813fb2f6 6945 boolean_t free_src_acl;
39037602
A
6946 boolean_t attr_cleanup;
6947 enum vtype v_type;
6948 kauth_action_t action;
6949 struct componentname *cnp;
6950 uint32_t defaulted;
6951 struct vnode_attr va;
813fb2f6 6952 struct vnode_attr nva;
5ba3f43e 6953 uint32_t vnop_flags;
316670eb 6954
39037602
A
6955 v_type = vnode_vtype(fvp);
6956 switch (v_type) {
6957 case VLNK:
6958 /* FALLTHRU */
6959 case VREG:
6960 action = KAUTH_VNODE_ADD_FILE;
6961 break;
6962 case VDIR:
6963 if (vnode_isvroot(fvp) || vnode_ismount(fvp) ||
6964 fvp->v_mountedhere) {
6965 return (EINVAL);
6966 }
6967 action = KAUTH_VNODE_ADD_SUBDIRECTORY;
6968 break;
6969 default:
6970 return (EINVAL);
6971 }
6972
6973 AUDIT_ARG(fd2, dst_dirfd);
6974 AUDIT_ARG(value32, flags);
6975
6976 follow = (flags & CLONE_NOFOLLOW) ? NOFOLLOW : FOLLOW;
6977 NDINIT(&tond, CREATE, OP_LINK, follow | WANTPARENT | AUDITVNPATH2,
6978 UIO_USERSPACE, dst, ctx);
6979 if ((error = nameiat(&tond, dst_dirfd)))
6980 return (error);
6981 cnp = &tond.ni_cnd;
6982 tdvp = tond.ni_dvp;
6983 tvp = tond.ni_vp;
6984
813fb2f6 6985 free_src_acl = FALSE;
39037602
A
6986 attr_cleanup = FALSE;
6987
6988 if (tvp != NULL) {
6989 error = EEXIST;
6990 goto out;
6991 }
6992
6993 if (vnode_mount(tdvp) != vnode_mount(fvp)) {
6994 error = EXDEV;
6995 goto out;
6996 }
6997
6998#if CONFIG_MACF
6999 if ((error = mac_vnode_check_clone(ctx, tdvp, fvp, cnp)))
7000 goto out;
7001#endif
7002 if ((error = vnode_authorize(tdvp, NULL, action, ctx)))
7003 goto out;
7004
7005 action = KAUTH_VNODE_GENERIC_READ_BITS;
7006 if (data_read_authorised)
7007 action &= ~KAUTH_VNODE_READ_DATA;
7008 if ((error = vnode_authorize(fvp, NULL, action, ctx)))
7009 goto out;
7010
7011 /*
7012 * certain attributes may need to be changed from the source, we ask for
7013 * those here.
7014 */
7015 VATTR_INIT(&va);
813fb2f6
A
7016 VATTR_WANTED(&va, va_uid);
7017 VATTR_WANTED(&va, va_gid);
39037602
A
7018 VATTR_WANTED(&va, va_mode);
7019 VATTR_WANTED(&va, va_flags);
7020 VATTR_WANTED(&va, va_acl);
7021
7022 if ((error = vnode_getattr(fvp, &va, ctx)) != 0)
7023 goto out;
7024
813fb2f6
A
7025 VATTR_INIT(&nva);
7026 VATTR_SET(&nva, va_type, v_type);
7027 if (VATTR_IS_SUPPORTED(&va, va_acl) && va.va_acl != NULL) {
7028 VATTR_SET(&nva, va_acl, va.va_acl);
7029 free_src_acl = TRUE;
39037602
A
7030 }
7031
7032 /* Handle ACL inheritance, initialize vap. */
7033 if (v_type == VLNK) {
813fb2f6 7034 error = vnode_authattr_new(tdvp, &nva, 0, ctx);
39037602 7035 } else {
813fb2f6
A
7036 error = vn_attribute_prepare(tdvp, &nva, &defaulted, ctx);
7037 if (error)
7038 goto out;
39037602
A
7039 attr_cleanup = TRUE;
7040 }
7041
5ba3f43e 7042 vnop_flags = VNODE_CLONEFILE_DEFAULT;
813fb2f6
A
7043 /*
7044 * We've got initial values for all security parameters,
7045 * If we are superuser, then we can change owners to be the
7046 * same as the source. Both superuser and the owner have default
7047 * WRITE_SECURITY privileges so all other fields can be taken
7048 * from source as well.
7049 */
5ba3f43e 7050 if (!(flags & CLONE_NOOWNERCOPY) && vfs_context_issuser(ctx)) {
813fb2f6
A
7051 if (VATTR_IS_SUPPORTED(&va, va_uid))
7052 VATTR_SET(&nva, va_uid, va.va_uid);
7053 if (VATTR_IS_SUPPORTED(&va, va_gid))
7054 VATTR_SET(&nva, va_gid, va.va_gid);
5ba3f43e
A
7055 } else {
7056 vnop_flags |= VNODE_CLONEFILE_NOOWNERCOPY;
813fb2f6 7057 }
5ba3f43e 7058
813fb2f6
A
7059 if (VATTR_IS_SUPPORTED(&va, va_mode))
7060 VATTR_SET(&nva, va_mode, va.va_mode);
7061 if (VATTR_IS_SUPPORTED(&va, va_flags)) {
7062 VATTR_SET(&nva, va_flags,
5ba3f43e
A
7063 ((va.va_flags & ~(UF_DATAVAULT | SF_RESTRICTED)) | /* Turn off from source */
7064 (nva.va_flags & (UF_DATAVAULT | SF_RESTRICTED))));
39037602
A
7065 }
7066
5ba3f43e 7067 error = VNOP_CLONEFILE(fvp, tdvp, &tvp, cnp, &nva, vnop_flags, ctx);
39037602
A
7068
7069 if (!error && tvp) {
7070 int update_flags = 0;
7071#if CONFIG_FSE
7072 int fsevent;
7073#endif /* CONFIG_FSE */
7074
7075#if CONFIG_MACF
7076 (void)vnode_label(vnode_mount(tvp), tdvp, tvp, cnp,
7077 VNODE_LABEL_CREATE, ctx);
7078#endif
7079 /*
7080 * If some of the requested attributes weren't handled by the
7081 * VNOP, use our fallback code.
7082 */
7083 if (!VATTR_ALL_SUPPORTED(&va))
813fb2f6 7084 (void)vnode_setattr_fallback(tvp, &nva, ctx);
39037602
A
7085
7086 // Make sure the name & parent pointers are hooked up
7087 if (tvp->v_name == NULL)
7088 update_flags |= VNODE_UPDATE_NAME;
7089 if (tvp->v_parent == NULLVP)
7090 update_flags |= VNODE_UPDATE_PARENT;
7091
7092 if (update_flags) {
7093 (void)vnode_update_identity(tvp, tdvp, cnp->cn_nameptr,
7094 cnp->cn_namelen, cnp->cn_hash, update_flags);
7095 }
7096
7097#if CONFIG_FSE
7098 switch (vnode_vtype(tvp)) {
7099 case VLNK:
7100 /* FALLTHRU */
7101 case VREG:
7102 fsevent = FSE_CREATE_FILE;
7103 break;
7104 case VDIR:
7105 fsevent = FSE_CREATE_DIR;
7106 break;
7107 default:
7108 goto out;
7109 }
7110
7111 if (need_fsevent(fsevent, tvp)) {
5ba3f43e
A
7112 /*
7113 * The following is a sequence of three explicit events.
7114 * A pair of FSE_CLONE events representing the source and destination
7115 * followed by an FSE_CREATE_[FILE | DIR] for the destination.
7116 * fseventsd may coalesce the destination clone and create events
7117 * into a single event resulting in the following sequence for a client
7118 * FSE_CLONE (src)
7119 * FSE_CLONE | FSE_CREATE (dst)
7120 */
7121 add_fsevent(FSE_CLONE, ctx, FSE_ARG_VNODE, fvp, FSE_ARG_VNODE, tvp,
7122 FSE_ARG_DONE);
39037602
A
7123 add_fsevent(fsevent, ctx, FSE_ARG_VNODE, tvp,
7124 FSE_ARG_DONE);
7125 }
7126#endif /* CONFIG_FSE */
7127 }
39037602
A
7128
7129out:
7130 if (attr_cleanup)
813fb2f6
A
7131 vn_attribute_cleanup(&nva, defaulted);
7132 if (free_src_acl && va.va_acl)
39037602
A
7133 kauth_acl_free(va.va_acl);
7134 nameidone(&tond);
7135 if (tvp)
7136 vnode_put(tvp);
7137 vnode_put(tdvp);
7138 return (error);
7139}
7140
7141/*
7142 * clone files or directories, target must not exist.
7143 */
7144/* ARGSUSED */
7145int
7146clonefileat(__unused proc_t p, struct clonefileat_args *uap,
7147 __unused int32_t *retval)
7148{
7149 vnode_t fvp;
7150 struct nameidata fromnd;
7151 int follow;
7152 int error;
7153 vfs_context_t ctx = vfs_context_current();
7154
7155 /* Check that the flags are valid. */
5ba3f43e 7156 if (uap->flags & ~(CLONE_NOFOLLOW | CLONE_NOOWNERCOPY))
39037602
A
7157 return (EINVAL);
7158
7159 AUDIT_ARG(fd, uap->src_dirfd);
7160
7161 follow = (uap->flags & CLONE_NOFOLLOW) ? NOFOLLOW : FOLLOW;
7162 NDINIT(&fromnd, LOOKUP, OP_COPYFILE, follow | AUDITVNPATH1,
7163 UIO_USERSPACE, uap->src, ctx);
7164 if ((error = nameiat(&fromnd, uap->src_dirfd)))
7165 return (error);
7166
7167 fvp = fromnd.ni_vp;
7168 nameidone(&fromnd);
7169
7170 error = clonefile_internal(fvp, FALSE, uap->dst_dirfd, uap->dst,
7171 uap->flags, ctx);
7172
7173 vnode_put(fvp);
7174 return (error);
7175}
7176
7177int
7178fclonefileat(__unused proc_t p, struct fclonefileat_args *uap,
7179 __unused int32_t *retval)
7180{
7181 vnode_t fvp;
7182 struct fileproc *fp;
7183 int error;
7184 vfs_context_t ctx = vfs_context_current();
7185
5ba3f43e
A
7186 /* Check that the flags are valid. */
7187 if (uap->flags & ~(CLONE_NOFOLLOW | CLONE_NOOWNERCOPY))
7188 return (EINVAL);
7189
39037602
A
7190 AUDIT_ARG(fd, uap->src_fd);
7191 error = fp_getfvp(p, uap->src_fd, &fp, &fvp);
7192 if (error)
7193 return (error);
7194
7195 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
7196 AUDIT_ARG(vnpath_withref, fvp, ARG_VNODE1);
7197 error = EBADF;
7198 goto out;
7199 }
7200
7201 if ((error = vnode_getwithref(fvp)))
7202 goto out;
7203
7204 AUDIT_ARG(vnpath, fvp, ARG_VNODE1);
7205
7206 error = clonefile_internal(fvp, TRUE, uap->dst_dirfd, uap->dst,
7207 uap->flags, ctx);
7208
7209 vnode_put(fvp);
7210out:
7211 file_drop(uap->src_fd);
7212 return (error);
7213}
7214
7215/*
7216 * Rename files. Source and destination must either both be directories,
7217 * or both not be directories. If target is a directory, it must be empty.
7218 */
7219/* ARGSUSED */
7220static int
7221renameat_internal(vfs_context_t ctx, int fromfd, user_addr_t from,
7222 int tofd, user_addr_t to, int segflg, vfs_rename_flags_t flags)
7223{
7224 if (flags & ~VFS_RENAME_FLAGS_MASK)
7225 return EINVAL;
7226
7227 if (ISSET(flags, VFS_RENAME_SWAP) && ISSET(flags, VFS_RENAME_EXCL))
7228 return EINVAL;
7229
7230 vnode_t tvp, tdvp;
7231 vnode_t fvp, fdvp;
7232 struct nameidata *fromnd, *tond;
7233 int error;
7234 int do_retry;
7235 int retry_count;
7236 int mntrename;
7237 int need_event;
7238 const char *oname = NULL;
7239 char *from_name = NULL, *to_name = NULL;
7240 int from_len=0, to_len=0;
7241 int holding_mntlock;
7242 mount_t locked_mp = NULL;
7243 vnode_t oparent = NULLVP;
7244#if CONFIG_FSE
7245 fse_info from_finfo, to_finfo;
7246#endif
7247 int from_truncated=0, to_truncated;
7248 int batched = 0;
7249 struct vnode_attr *fvap, *tvap;
7250 int continuing = 0;
7251 /* carving out a chunk for structs that are too big to be on stack. */
7252 struct {
7253 struct nameidata from_node, to_node;
7254 struct vnode_attr fv_attr, tv_attr;
7255 } * __rename_data;
7256 MALLOC(__rename_data, void *, sizeof(*__rename_data), M_TEMP, M_WAITOK);
7257 fromnd = &__rename_data->from_node;
7258 tond = &__rename_data->to_node;
7259
7260 holding_mntlock = 0;
7261 do_retry = 0;
7262 retry_count = 0;
91447636
A
7263retry:
7264 fvp = tvp = NULL;
7265 fdvp = tdvp = NULL;
6d2010ae 7266 fvap = tvap = NULL;
1c79356b
A
7267 mntrename = FALSE;
7268
316670eb 7269 NDINIT(fromnd, DELETE, OP_UNLINK, WANTPARENT | AUDITVNPATH1,
fe8ab488 7270 segflg, from, ctx);
316670eb 7271 fromnd->ni_flag = NAMEI_COMPOUNDRENAME;
fe8ab488 7272
316670eb 7273 NDINIT(tond, RENAME, OP_RENAME, WANTPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK,
fe8ab488 7274 segflg, to, ctx);
316670eb 7275 tond->ni_flag = NAMEI_COMPOUNDRENAME;
fe8ab488 7276
6d2010ae 7277continue_lookup:
316670eb 7278 if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
fe8ab488 7279 if ( (error = nameiat(fromnd, fromfd)) )
6d2010ae 7280 goto out1;
316670eb
A
7281 fdvp = fromnd->ni_dvp;
7282 fvp = fromnd->ni_vp;
1c79356b 7283
6d2010ae 7284 if (fvp && fvp->v_type == VDIR)
316670eb 7285 tond->ni_cnd.cn_flags |= WILLBEDIR;
6d2010ae 7286 }
2d21ac55 7287
316670eb 7288 if ((tond->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
fe8ab488 7289 if ( (error = nameiat(tond, tofd)) ) {
6d2010ae
A
7290 /*
7291 * Translate error code for rename("dir1", "dir2/.").
7292 */
fe8ab488 7293 if (error == EISDIR && fvp->v_type == VDIR)
6d2010ae
A
7294 error = EINVAL;
7295 goto out1;
7296 }
316670eb
A
7297 tdvp = tond->ni_dvp;
7298 tvp = tond->ni_vp;
fe8ab488 7299 }
91447636 7300
00867663
A
7301#if DEVELOPMENT || DEBUG
7302 /*
7303 * XXX VSWAP: Check for entitlements or special flag here
7304 * so we can restrict access appropriately.
7305 */
7306#else /* DEVELOPMENT || DEBUG */
7307
7308 if (fromnd->ni_vp && vnode_isswap(fromnd->ni_vp) && (ctx != vfs_context_kernel())) {
7309 error = EPERM;
7310 goto out1;
7311 }
7312
7313 if (tond->ni_vp && vnode_isswap(tond->ni_vp) && (ctx != vfs_context_kernel())) {
7314 error = EPERM;
7315 goto out1;
7316 }
7317#endif /* DEVELOPMENT || DEBUG */
7318
39037602
A
7319 if (!tvp && ISSET(flags, VFS_RENAME_SWAP)) {
7320 error = ENOENT;
7321 goto out1;
7322 }
7323
7324 if (tvp && ISSET(flags, VFS_RENAME_EXCL)) {
7325 error = EEXIST;
7326 goto out1;
7327 }
7328
6d2010ae
A
7329 batched = vnode_compound_rename_available(fdvp);
7330 if (!fvp) {
fe8ab488 7331 /*
6d2010ae
A
7332 * Claim: this check will never reject a valid rename.
7333 * For success, either fvp must be on the same mount as tdvp, or fvp must sit atop a vnode on the same mount as tdvp.
7334 * Suppose fdvp and tdvp are not on the same mount.
fe8ab488 7335 * If fvp is on the same mount as tdvp, then fvp is not on the same mount as fdvp, so fvp is the root of its filesystem. If fvp is the root,
6d2010ae
A
7336 * then you can't move it to within another dir on the same mountpoint.
7337 * If fvp sits atop a vnode on the same mount as fdvp, then that vnode must be part of the same mount as fdvp, which is a contradiction.
7338 *
7339 * If this check passes, then we are safe to pass these vnodes to the same FS.
91447636 7340 */
6d2010ae
A
7341 if (fdvp->v_mount != tdvp->v_mount) {
7342 error = EXDEV;
7343 goto out1;
7344 }
7345 goto skipped_lookup;
1c79356b 7346 }
2d21ac55 7347
6d2010ae 7348 if (!batched) {
39037602 7349 error = vn_authorize_renamex(fdvp, fvp, &fromnd->ni_cnd, tdvp, tvp, &tond->ni_cnd, ctx, flags, NULL);
6d2010ae 7350 if (error) {
3e170ce0
A
7351 if (error == ENOENT) {
7352 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
7353 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
7354 /*
7355 * We encountered a race where after doing the namei, tvp stops
7356 * being valid. If so, simply re-drive the rename call from the
7357 * top.
7358 */
7359 do_retry = 1;
7360 retry_count += 1;
7361 }
6d2010ae 7362 }
91447636 7363 goto out1;
1c79356b
A
7364 }
7365 }
6d2010ae 7366
2d21ac55
A
7367 /*
7368 * If the source and destination are the same (i.e. they're
7369 * links to the same vnode) and the target file system is
7370 * case sensitive, then there is nothing to do.
6d2010ae
A
7371 *
7372 * XXX Come back to this.
2d21ac55
A
7373 */
7374 if (fvp == tvp) {
7375 int pathconf_val;
fe8ab488 7376
2d21ac55
A
7377 /*
7378 * Note: if _PC_CASE_SENSITIVE selector isn't supported,
7379 * then assume that this file system is case sensitive.
7380 */
7381 if (VNOP_PATHCONF(fvp, _PC_CASE_SENSITIVE, &pathconf_val, ctx) != 0 ||
7382 pathconf_val != 0) {
7383 goto out1;
fe8ab488 7384 }
2d21ac55 7385 }
91447636 7386
1c79356b
A
7387 /*
7388 * Allow the renaming of mount points.
7389 * - target must not exist
7390 * - target must reside in the same directory as source
7391 * - union mounts cannot be renamed
7392 * - "/" cannot be renamed
6d2010ae
A
7393 *
7394 * XXX Handle this in VFS after a continued lookup (if we missed
7395 * in the cache to start off)
39037602
A
7396 *
7397 * N.B. If RENAME_SWAP is being used, then @tvp != NULL and so
7398 * we'll skip past here. The file system is responsible for
7399 * checking that @tvp is not a descendent of @fvp and vice versa
7400 * so it should always return EINVAL if either @tvp or @fvp is the
7401 * root of a volume.
1c79356b 7402 */
91447636 7403 if ((fvp->v_flag & VROOT) &&
1c79356b
A
7404 (fvp->v_type == VDIR) &&
7405 (tvp == NULL) &&
7406 (fvp->v_mountedhere == NULL) &&
91447636 7407 (fdvp == tdvp) &&
1c79356b
A
7408 ((fvp->v_mount->mnt_flag & (MNT_UNION | MNT_ROOTFS)) == 0) &&
7409 (fvp->v_mount->mnt_vnodecovered != NULLVP)) {
2d21ac55 7410 vnode_t coveredvp;
fe8ab488 7411
1c79356b 7412 /* switch fvp to the covered vnode */
91447636
A
7413 coveredvp = fvp->v_mount->mnt_vnodecovered;
7414 if ( (vnode_getwithref(coveredvp)) ) {
7415 error = ENOENT;
7416 goto out1;
7417 }
7418 vnode_put(fvp);
7419
7420 fvp = coveredvp;
1c79356b
A
7421 mntrename = TRUE;
7422 }
91447636
A
7423 /*
7424 * Check for cross-device rename.
7425 */
7426 if ((fvp->v_mount != tdvp->v_mount) ||
7427 (tvp && (fvp->v_mount != tvp->v_mount))) {
7428 error = EXDEV;
7429 goto out1;
7430 }
55e303ae 7431
91447636
A
7432 /*
7433 * If source is the same as the destination (that is the
7434 * same inode number) then there is nothing to do...
7435 * EXCEPT if the underlying file system supports case
7436 * insensitivity and is case preserving. In this case
7437 * the file system needs to handle the special case of
7438 * getting the same vnode as target (fvp) and source (tvp).
7439 *
7440 * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
7441 * and _PC_CASE_PRESERVING can have this exception, and they need to
7442 * handle the special case of getting the same vnode as target and
7443 * source. NOTE: Then the target is unlocked going into vnop_rename,
7444 * so not to cause locking problems. There is a single reference on tvp.
7445 *
fe8ab488 7446 * NOTE - that fvp == tvp also occurs if they are hard linked and
b0d623f7
A
7447 * that correct behaviour then is just to return success without doing
7448 * anything.
6d2010ae
A
7449 *
7450 * XXX filesystem should take care of this itself, perhaps...
91447636
A
7451 */
7452 if (fvp == tvp && fdvp == tdvp) {
316670eb
A
7453 if (fromnd->ni_cnd.cn_namelen == tond->ni_cnd.cn_namelen &&
7454 !bcmp(fromnd->ni_cnd.cn_nameptr, tond->ni_cnd.cn_nameptr,
7455 fromnd->ni_cnd.cn_namelen)) {
91447636 7456 goto out1;
55e303ae 7457 }
91447636 7458 }
55e303ae 7459
91447636
A
7460 if (holding_mntlock && fvp->v_mount != locked_mp) {
7461 /*
7462 * we're holding a reference and lock
7463 * on locked_mp, but it no longer matches
7464 * what we want to do... so drop our hold
7465 */
7466 mount_unlock_renames(locked_mp);
7467 mount_drop(locked_mp, 0);
7468 holding_mntlock = 0;
7469 }
7470 if (tdvp != fdvp && fvp->v_type == VDIR) {
7471 /*
7472 * serialize renames that re-shape
7473 * the tree... if holding_mntlock is
7474 * set, then we're ready to go...
7475 * otherwise we
7476 * first need to drop the iocounts
7477 * we picked up, second take the
7478 * lock to serialize the access,
7479 * then finally start the lookup
7480 * process over with the lock held
7481 */
7482 if (!holding_mntlock) {
7483 /*
7484 * need to grab a reference on
7485 * the mount point before we
7486 * drop all the iocounts... once
7487 * the iocounts are gone, the mount
7488 * could follow
7489 */
7490 locked_mp = fvp->v_mount;
7491 mount_ref(locked_mp, 0);
55e303ae 7492
91447636
A
7493 /*
7494 * nameidone has to happen before we vnode_put(tvp)
7495 * since it may need to release the fs_nodelock on the tvp
7496 */
316670eb 7497 nameidone(tond);
55e303ae 7498
91447636
A
7499 if (tvp)
7500 vnode_put(tvp);
7501 vnode_put(tdvp);
7502
7503 /*
7504 * nameidone has to happen before we vnode_put(fdvp)
7505 * since it may need to release the fs_nodelock on the fvp
7506 */
316670eb 7507 nameidone(fromnd);
55e303ae 7508
91447636
A
7509 vnode_put(fvp);
7510 vnode_put(fdvp);
7511
7512 mount_lock_renames(locked_mp);
7513 holding_mntlock = 1;
7514
7515 goto retry;
55e303ae 7516 }
91447636
A
7517 } else {
7518 /*
7519 * when we dropped the iocounts to take
fe8ab488 7520 * the lock, we allowed the identity of
91447636
A
7521 * the various vnodes to change... if they did,
7522 * we may no longer be dealing with a rename
7523 * that reshapes the tree... once we're holding
7524 * the iocounts, the vnodes can't change type
7525 * so we're free to drop the lock at this point
7526 * and continue on
1c79356b 7527 */
91447636
A
7528 if (holding_mntlock) {
7529 mount_unlock_renames(locked_mp);
7530 mount_drop(locked_mp, 0);
7531 holding_mntlock = 0;
1c79356b 7532 }
91447636 7533 }
6d2010ae 7534
91447636
A
7535 // save these off so we can later verify that fvp is the same
7536 oname = fvp->v_name;
7537 oparent = fvp->v_parent;
55e303ae 7538
6d2010ae 7539skipped_lookup:
2d21ac55 7540#if CONFIG_FSE
6d2010ae 7541 need_event = need_fsevent(FSE_RENAME, fdvp);
fe8ab488 7542 if (need_event) {
6d2010ae
A
7543 if (fvp) {
7544 get_fse_info(fvp, &from_finfo, ctx);
7545 } else {
316670eb 7546 error = vfs_get_notify_attributes(&__rename_data->fv_attr);
6d2010ae
A
7547 if (error) {
7548 goto out1;
7549 }
7550
316670eb 7551 fvap = &__rename_data->fv_attr;
6d2010ae 7552 }
55e303ae 7553
91447636 7554 if (tvp) {
2d21ac55 7555 get_fse_info(tvp, &to_finfo, ctx);
6d2010ae 7556 } else if (batched) {
316670eb 7557 error = vfs_get_notify_attributes(&__rename_data->tv_attr);
6d2010ae
A
7558 if (error) {
7559 goto out1;
7560 }
7561
316670eb 7562 tvap = &__rename_data->tv_attr;
2d21ac55
A
7563 }
7564 }
7565#else
7566 need_event = 0;
7567#endif /* CONFIG_FSE */
7568
7569 if (need_event || kauth_authorize_fileop_has_listeners()) {
2d21ac55 7570 if (from_name == NULL) {
6d2010ae
A
7571 GET_PATH(from_name);
7572 if (from_name == NULL) {
7573 error = ENOMEM;
7574 goto out1;
7575 }
91447636 7576 }
b0d623f7 7577
316670eb 7578 from_len = safe_getpath(fdvp, fromnd->ni_cnd.cn_nameptr, from_name, MAXPATHLEN, &from_truncated);
55e303ae 7579
2d21ac55 7580 if (to_name == NULL) {
6d2010ae
A
7581 GET_PATH(to_name);
7582 if (to_name == NULL) {
7583 error = ENOMEM;
7584 goto out1;
7585 }
2d21ac55 7586 }
91447636 7587
316670eb 7588 to_len = safe_getpath(tdvp, tond->ni_cnd.cn_nameptr, to_name, MAXPATHLEN, &to_truncated);
fe8ab488 7589 }
316670eb
A
7590 error = vn_rename(fdvp, &fvp, &fromnd->ni_cnd, fvap,
7591 tdvp, &tvp, &tond->ni_cnd, tvap,
39037602 7592 flags, ctx);
55e303ae 7593
91447636
A
7594 if (holding_mntlock) {
7595 /*
7596 * we can drop our serialization
7597 * lock now
7598 */
7599 mount_unlock_renames(locked_mp);
7600 mount_drop(locked_mp, 0);
7601 holding_mntlock = 0;
7602 }
7603 if (error) {
6d2010ae 7604 if (error == EKEEPLOOKING) {
316670eb
A
7605 if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) == 0) {
7606 if ((tond->ni_flag & NAMEI_CONTLOOKUP) == 0) {
6d2010ae
A
7607 panic("EKEEPLOOKING without NAMEI_CONTLOOKUP on either ndp?");
7608 }
7609 }
7610
316670eb
A
7611 fromnd->ni_vp = fvp;
7612 tond->ni_vp = tvp;
fe8ab488 7613
6d2010ae
A
7614 goto continue_lookup;
7615 }
7616
7617 /*
fe8ab488
A
7618 * We may encounter a race in the VNOP where the destination didn't
7619 * exist when we did the namei, but it does by the time we go and
6d2010ae
A
7620 * try to create the entry. In this case, we should re-drive this rename
7621 * call from the top again. Currently, only HFS bubbles out ERECYCLE,
fe8ab488 7622 * but other filesystems susceptible to this race could return it, too.
6d2010ae
A
7623 */
7624 if (error == ERECYCLE) {
7625 do_retry = 1;
7626 }
55e303ae 7627
c18c124e
A
7628 /*
7629 * For compound VNOPs, the authorization callback may return
7630 * ENOENT in case of racing hardlink lookups hitting the name
7631 * cache, redrive the lookup.
7632 */
3e170ce0
A
7633 if (batched && error == ENOENT) {
7634 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
7635 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
7636 do_retry = 1;
7637 retry_count += 1;
7638 }
c18c124e
A
7639 }
7640
91447636 7641 goto out1;
fe8ab488
A
7642 }
7643
7644 /* call out to allow 3rd party notification of rename.
91447636
A
7645 * Ignore result of kauth_authorize_fileop call.
7646 */
fe8ab488
A
7647 kauth_authorize_fileop(vfs_context_ucred(ctx),
7648 KAUTH_FILEOP_RENAME,
2d21ac55 7649 (uintptr_t)from_name, (uintptr_t)to_name);
39037602
A
7650 if (flags & VFS_RENAME_SWAP) {
7651 kauth_authorize_fileop(vfs_context_ucred(ctx),
7652 KAUTH_FILEOP_RENAME,
7653 (uintptr_t)to_name, (uintptr_t)from_name);
7654 }
91447636 7655
2d21ac55 7656#if CONFIG_FSE
91447636 7657 if (from_name != NULL && to_name != NULL) {
b0d623f7
A
7658 if (from_truncated || to_truncated) {
7659 // set it here since only the from_finfo gets reported up to user space
7660 from_finfo.mode |= FSE_TRUNCATED_PATH;
7661 }
6d2010ae
A
7662
7663 if (tvap && tvp) {
7664 vnode_get_fse_info_from_vap(tvp, &to_finfo, tvap);
7665 }
7666 if (fvap) {
7667 vnode_get_fse_info_from_vap(fvp, &from_finfo, fvap);
7668 }
7669
39037602
A
7670 if (tvp) {
7671 add_fsevent(FSE_RENAME, ctx,
7672 FSE_ARG_STRING, from_len, from_name,
7673 FSE_ARG_FINFO, &from_finfo,
7674 FSE_ARG_STRING, to_len, to_name,
7675 FSE_ARG_FINFO, &to_finfo,
7676 FSE_ARG_DONE);
7677 if (flags & VFS_RENAME_SWAP) {
7678 /*
7679 * Strictly speaking, swap is the equivalent of
7680 * *three* renames. FSEvents clients should only take
7681 * the events as a hint, so we only bother reporting
7682 * two.
7683 */
7684 add_fsevent(FSE_RENAME, ctx,
7685 FSE_ARG_STRING, to_len, to_name,
7686 FSE_ARG_FINFO, &to_finfo,
7687 FSE_ARG_STRING, from_len, from_name,
7688 FSE_ARG_FINFO, &from_finfo,
7689 FSE_ARG_DONE);
7690 }
55e303ae 7691 } else {
2d21ac55 7692 add_fsevent(FSE_RENAME, ctx,
91447636
A
7693 FSE_ARG_STRING, from_len, from_name,
7694 FSE_ARG_FINFO, &from_finfo,
7695 FSE_ARG_STRING, to_len, to_name,
7696 FSE_ARG_DONE);
7697 }
7698 }
2d21ac55 7699#endif /* CONFIG_FSE */
fe8ab488 7700
91447636
A
7701 /*
7702 * update filesystem's mount point data
7703 */
7704 if (mntrename) {
7705 char *cp, *pathend, *mpname;
7706 char * tobuf;
7707 struct mount *mp;
7708 int maxlen;
7709 size_t len = 0;
7710
7711 mp = fvp->v_mountedhere;
7712
7713 if (vfs_busy(mp, LK_NOWAIT)) {
7714 error = EBUSY;
7715 goto out1;
55e303ae 7716 }
91447636 7717 MALLOC_ZONE(tobuf, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
55e303ae 7718
fe8ab488
A
7719 if (UIO_SEG_IS_USER_SPACE(segflg))
7720 error = copyinstr(to, tobuf, MAXPATHLEN, &len);
7721 else
7722 error = copystr((void *)to, tobuf, MAXPATHLEN, &len);
91447636
A
7723 if (!error) {
7724 /* find current mount point prefix */
7725 pathend = &mp->mnt_vfsstat.f_mntonname[0];
7726 for (cp = pathend; *cp != '\0'; ++cp) {
7727 if (*cp == '/')
7728 pathend = cp + 1;
7729 }
7730 /* find last component of target name */
7731 for (mpname = cp = tobuf; *cp != '\0'; ++cp) {
7732 if (*cp == '/')
7733 mpname = cp + 1;
7734 }
7735 /* append name to prefix */
7736 maxlen = MAXPATHLEN - (pathend - mp->mnt_vfsstat.f_mntonname);
7737 bzero(pathend, maxlen);
2d21ac55 7738 strlcpy(pathend, mpname, maxlen);
91447636
A
7739 }
7740 FREE_ZONE(tobuf, MAXPATHLEN, M_NAMEI);
7741
7742 vfs_unbusy(mp);
7743 }
7744 /*
fe8ab488 7745 * fix up name & parent pointers. note that we first
91447636
A
7746 * check that fvp has the same name/parent pointers it
7747 * had before the rename call... this is a 'weak' check
7748 * at best...
6d2010ae
A
7749 *
7750 * XXX oparent and oname may not be set in the compound vnop case
91447636 7751 */
6d2010ae 7752 if (batched || (oname == fvp->v_name && oparent == fvp->v_parent)) {
91447636
A
7753 int update_flags;
7754
7755 update_flags = VNODE_UPDATE_NAME;
7756
7757 if (fdvp != tdvp)
7758 update_flags |= VNODE_UPDATE_PARENT;
7759
316670eb 7760 vnode_update_identity(fvp, tdvp, tond->ni_cnd.cn_nameptr, tond->ni_cnd.cn_namelen, tond->ni_cnd.cn_hash, update_flags);
1c79356b
A
7761 }
7762out1:
593a1d5f
A
7763 if (to_name != NULL) {
7764 RELEASE_PATH(to_name);
7765 to_name = NULL;
7766 }
7767 if (from_name != NULL) {
7768 RELEASE_PATH(from_name);
7769 from_name = NULL;
7770 }
91447636
A
7771 if (holding_mntlock) {
7772 mount_unlock_renames(locked_mp);
7773 mount_drop(locked_mp, 0);
593a1d5f 7774 holding_mntlock = 0;
91447636
A
7775 }
7776 if (tdvp) {
7777 /*
7778 * nameidone has to happen before we vnode_put(tdvp)
7779 * since it may need to release the fs_nodelock on the tdvp
7780 */
316670eb 7781 nameidone(tond);
91447636
A
7782
7783 if (tvp)
7784 vnode_put(tvp);
7785 vnode_put(tdvp);
7786 }
7787 if (fdvp) {
7788 /*
7789 * nameidone has to happen before we vnode_put(fdvp)
7790 * since it may need to release the fs_nodelock on the fdvp
7791 */
316670eb 7792 nameidone(fromnd);
91447636
A
7793
7794 if (fvp)
7795 vnode_put(fvp);
7796 vnode_put(fdvp);
7797 }
fe8ab488 7798
6d2010ae
A
7799 /*
7800 * If things changed after we did the namei, then we will re-drive
7801 * this rename call from the top.
7802 */
316670eb 7803 if (do_retry) {
6d2010ae 7804 do_retry = 0;
593a1d5f
A
7805 goto retry;
7806 }
316670eb
A
7807
7808 FREE(__rename_data, M_TEMP);
1c79356b
A
7809 return (error);
7810}
7811
fe8ab488
A
7812int
7813rename(__unused proc_t p, struct rename_args *uap, __unused int32_t *retval)
7814{
7815 return (renameat_internal(vfs_context_current(), AT_FDCWD, uap->from,
7816 AT_FDCWD, uap->to, UIO_USERSPACE, 0));
7817}
7818
39037602 7819int renameatx_np(__unused proc_t p, struct renameatx_np_args *uap, __unused int32_t *retval)
fe8ab488
A
7820{
7821 return renameat_internal(
39037602
A
7822 vfs_context_current(),
7823 uap->fromfd, uap->from,
7824 uap->tofd, uap->to,
fe8ab488
A
7825 UIO_USERSPACE, uap->flags);
7826}
39037602 7827
fe8ab488
A
7828int
7829renameat(__unused proc_t p, struct renameat_args *uap, __unused int32_t *retval)
7830{
7831 return (renameat_internal(vfs_context_current(), uap->fromfd, uap->from,
7832 uap->tofd, uap->to, UIO_USERSPACE, 0));
7833}
7834
1c79356b
A
7835/*
7836 * Make a directory file.
2d21ac55
A
7837 *
7838 * Returns: 0 Success
7839 * EEXIST
7840 * namei:???
7841 * vnode_authorize:???
7842 * vn_create:???
1c79356b 7843 */
1c79356b 7844/* ARGSUSED */
91447636 7845static int
fe8ab488
A
7846mkdir1at(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap, int fd,
7847 enum uio_seg segflg)
1c79356b 7848{
91447636 7849 vnode_t vp, dvp;
1c79356b 7850 int error;
91447636 7851 int update_flags = 0;
6d2010ae 7852 int batched;
1c79356b
A
7853 struct nameidata nd;
7854
91447636 7855 AUDIT_ARG(mode, vap->va_mode);
fe8ab488 7856 NDINIT(&nd, CREATE, OP_MKDIR, LOCKPARENT | AUDITVNPATH1, segflg,
6d2010ae 7857 path, ctx);
9bccf70c 7858 nd.ni_cnd.cn_flags |= WILLBEDIR;
6d2010ae
A
7859 nd.ni_flag = NAMEI_COMPOUNDMKDIR;
7860
7861continue_lookup:
fe8ab488 7862 error = nameiat(&nd, fd);
55e303ae 7863 if (error)
1c79356b 7864 return (error);
91447636 7865 dvp = nd.ni_dvp;
1c79356b 7866 vp = nd.ni_vp;
55e303ae 7867
fe8ab488
A
7868 if (vp != NULL) {
7869 error = EEXIST;
7870 goto out;
7871 }
7872
6d2010ae 7873 batched = vnode_compound_mkdir_available(dvp);
2d21ac55
A
7874
7875 VATTR_SET(vap, va_type, VDIR);
fe8ab488 7876
6d2010ae
A
7877 /*
7878 * XXX
7879 * Don't authorize in VFS for compound VNOP.... mkdir -p today assumes that it will
7880 * only get EXISTS or EISDIR for existing path components, and not that it could see
7881 * EACCESS/EPERM--so if we authorize for mkdir on "/" for "mkdir -p /tmp/foo/bar/baz"
7882 * it will fail in a spurious manner. Need to figure out if this is valid behavior.
7883 */
fe8ab488 7884 if ((error = vn_authorize_mkdir(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0) {
6d2010ae
A
7885 if (error == EACCES || error == EPERM) {
7886 int error2;
7887
7888 nameidone(&nd);
7889 vnode_put(dvp);
7890 dvp = NULLVP;
7891
fe8ab488
A
7892 /*
7893 * Try a lookup without "NAMEI_COMPOUNDVNOP" to make sure we return EEXIST
6d2010ae
A
7894 * rather than EACCESS if the target exists.
7895 */
fe8ab488
A
7896 NDINIT(&nd, LOOKUP, OP_MKDIR, AUDITVNPATH1, segflg,
7897 path, ctx);
7898 error2 = nameiat(&nd, fd);
6d2010ae
A
7899 if (error2) {
7900 goto out;
7901 } else {
7902 vp = nd.ni_vp;
7903 error = EEXIST;
7904 goto out;
7905 }
7906 }
7907
2d21ac55 7908 goto out;
6d2010ae
A
7909 }
7910
7911 /*
fe8ab488 7912 * make the directory
6d2010ae 7913 */
fe8ab488 7914 if ((error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx)) != 0) {
6d2010ae
A
7915 if (error == EKEEPLOOKING) {
7916 nd.ni_vp = vp;
7917 goto continue_lookup;
7918 }
2d21ac55 7919
fe8ab488 7920 goto out;
6d2010ae 7921 }
fe8ab488 7922
91447636
A
7923 // Make sure the name & parent pointers are hooked up
7924 if (vp->v_name == NULL)
7925 update_flags |= VNODE_UPDATE_NAME;
7926 if (vp->v_parent == NULLVP)
7927 update_flags |= VNODE_UPDATE_PARENT;
7928
7929 if (update_flags)
7930 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
55e303ae 7931
2d21ac55 7932#if CONFIG_FSE
91447636 7933 add_fsevent(FSE_CREATE_DIR, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE);
2d21ac55 7934#endif
91447636
A
7935
7936out:
7937 /*
7938 * nameidone has to happen before we vnode_put(dvp)
7939 * since it may need to release the fs_nodelock on the dvp
7940 */
7941 nameidone(&nd);
7942
7943 if (vp)
6d2010ae 7944 vnode_put(vp);
fe8ab488 7945 if (dvp)
6d2010ae 7946 vnode_put(dvp);
55e303ae 7947
1c79356b
A
7948 return (error);
7949}
7950
b0d623f7
A
7951/*
7952 * mkdir_extended: Create a directory; with extended security (ACL).
7953 *
7954 * Parameters: p Process requesting to create the directory
7955 * uap User argument descriptor (see below)
fe8ab488 7956 * retval (ignored)
b0d623f7
A
7957 *
7958 * Indirect: uap->path Path of directory to create
7959 * uap->mode Access permissions to set
7960 * uap->xsecurity ACL to set
fe8ab488 7961 *
b0d623f7
A
7962 * Returns: 0 Success
7963 * !0 Not success
7964 *
7965 */
1c79356b 7966int
b0d623f7 7967mkdir_extended(proc_t p, struct mkdir_extended_args *uap, __unused int32_t *retval)
1c79356b 7968{
91447636
A
7969 int ciferror;
7970 kauth_filesec_t xsecdst;
7971 struct vnode_attr va;
7972
b0d623f7
A
7973 AUDIT_ARG(owner, uap->uid, uap->gid);
7974
91447636
A
7975 xsecdst = NULL;
7976 if ((uap->xsecurity != USER_ADDR_NULL) &&
7977 ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
7978 return ciferror;
7979
91447636 7980 VATTR_INIT(&va);
fe8ab488 7981 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
91447636
A
7982 if (xsecdst != NULL)
7983 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
7984
fe8ab488
A
7985 ciferror = mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD,
7986 UIO_USERSPACE);
91447636
A
7987 if (xsecdst != NULL)
7988 kauth_filesec_free(xsecdst);
7989 return ciferror;
1c79356b
A
7990}
7991
1c79356b 7992int
b0d623f7 7993mkdir(proc_t p, struct mkdir_args *uap, __unused int32_t *retval)
1c79356b 7994{
91447636 7995 struct vnode_attr va;
1c79356b 7996
91447636 7997 VATTR_INIT(&va);
fe8ab488 7998 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
e5568f75 7999
fe8ab488
A
8000 return (mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD,
8001 UIO_USERSPACE));
91447636 8002}
1c79356b 8003
91447636 8004int
fe8ab488
A
8005mkdirat(proc_t p, struct mkdirat_args *uap, __unused int32_t *retval)
8006{
8007 struct vnode_attr va;
8008
8009 VATTR_INIT(&va);
8010 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
8011
8012 return(mkdir1at(vfs_context_current(), uap->path, &va, uap->fd,
8013 UIO_USERSPACE));
8014}
8015
8016static int
8017rmdirat_internal(vfs_context_t ctx, int fd, user_addr_t dirpath,
8018 enum uio_seg segflg)
1c79356b 8019{
2d21ac55 8020 vnode_t vp, dvp;
91447636
A
8021 int error;
8022 struct nameidata nd;
6d2010ae
A
8023 char *path = NULL;
8024 int len=0;
8025 int has_listeners = 0;
8026 int need_event = 0;
8027 int truncated = 0;
6d2010ae
A
8028#if CONFIG_FSE
8029 struct vnode_attr va;
8030#endif /* CONFIG_FSE */
8031 struct vnode_attr *vap = NULL;
c18c124e 8032 int restart_count = 0;
6d2010ae 8033 int batched;
91447636 8034
b0d623f7 8035 int restart_flag;
91447636 8036
fe8ab488 8037 /*
2d21ac55
A
8038 * This loop exists to restart rmdir in the unlikely case that two
8039 * processes are simultaneously trying to remove the same directory
8040 * containing orphaned appleDouble files.
8041 */
8042 do {
6d2010ae 8043 NDINIT(&nd, DELETE, OP_RMDIR, LOCKPARENT | AUDITVNPATH1,
fe8ab488 8044 segflg, dirpath, ctx);
6d2010ae
A
8045 nd.ni_flag = NAMEI_COMPOUNDRMDIR;
8046continue_lookup:
2d21ac55 8047 restart_flag = 0;
6d2010ae 8048 vap = NULL;
2d21ac55 8049
fe8ab488 8050 error = nameiat(&nd, fd);
2d21ac55
A
8051 if (error)
8052 return (error);
8053
8054 dvp = nd.ni_dvp;
8055 vp = nd.ni_vp;
8056
6d2010ae
A
8057 if (vp) {
8058 batched = vnode_compound_rmdir_available(vp);
2d21ac55 8059
6d2010ae
A
8060 if (vp->v_flag & VROOT) {
8061 /*
8062 * The root of a mounted filesystem cannot be deleted.
8063 */
8064 error = EBUSY;
8065 goto out;
8066 }
1c79356b 8067
00867663
A
8068#if DEVELOPMENT || DEBUG
8069 /*
8070 * XXX VSWAP: Check for entitlements or special flag here
8071 * so we can restrict access appropriately.
8072 */
8073#else /* DEVELOPMENT || DEBUG */
8074
8075 if (vnode_isswap(vp) && (ctx != vfs_context_kernel())) {
8076 error = EPERM;
8077 goto out;
8078 }
8079#endif /* DEVELOPMENT || DEBUG */
8080
2d21ac55 8081 /*
6d2010ae
A
8082 * Removed a check here; we used to abort if vp's vid
8083 * was not the same as what we'd seen the last time around.
8084 * I do not think that check was valid, because if we retry
8085 * and all dirents are gone, the directory could legitimately
8086 * be recycled but still be present in a situation where we would
fe8ab488 8087 * have had permission to delete. Therefore, we won't make
6d2010ae
A
8088 * an effort to preserve that check now that we may not have a
8089 * vp here.
2d21ac55 8090 */
6d2010ae
A
8091
8092 if (!batched) {
8093 error = vn_authorize_rmdir(dvp, vp, &nd.ni_cnd, ctx, NULL);
8094 if (error) {
3e170ce0
A
8095 if (error == ENOENT) {
8096 assert(restart_count < MAX_AUTHORIZE_ENOENT_RETRIES);
8097 if (restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
8098 restart_flag = 1;
8099 restart_count += 1;
8100 }
c18c124e 8101 }
6d2010ae
A
8102 goto out;
8103 }
8104 }
2d21ac55 8105 } else {
6d2010ae
A
8106 batched = 1;
8107
8108 if (!vnode_compound_rmdir_available(dvp)) {
8109 panic("No error, but no compound rmdir?");
8110 }
91447636 8111 }
6d2010ae 8112
2d21ac55 8113#if CONFIG_FSE
6d2010ae 8114 fse_info finfo;
b0d623f7 8115
6d2010ae
A
8116 need_event = need_fsevent(FSE_DELETE, dvp);
8117 if (need_event) {
8118 if (!batched) {
2d21ac55 8119 get_fse_info(vp, &finfo, ctx);
6d2010ae
A
8120 } else {
8121 error = vfs_get_notify_attributes(&va);
8122 if (error) {
8123 goto out;
8124 }
8125
8126 vap = &va;
2d21ac55 8127 }
6d2010ae 8128 }
2d21ac55 8129#endif
6d2010ae
A
8130 has_listeners = kauth_authorize_fileop_has_listeners();
8131 if (need_event || has_listeners) {
8132 if (path == NULL) {
2d21ac55
A
8133 GET_PATH(path);
8134 if (path == NULL) {
8135 error = ENOMEM;
8136 goto out;
8137 }
6d2010ae 8138 }
b0d623f7 8139
6d2010ae 8140 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated);
b0d623f7 8141#if CONFIG_FSE
6d2010ae
A
8142 if (truncated) {
8143 finfo.mode |= FSE_TRUNCATED_PATH;
2d21ac55 8144 }
6d2010ae
A
8145#endif
8146 }
91447636 8147
6d2010ae
A
8148 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
8149 nd.ni_vp = vp;
8150 if (vp == NULLVP) {
8151 /* Couldn't find a vnode */
8152 goto out;
8153 }
2d21ac55 8154
6d2010ae
A
8155 if (error == EKEEPLOOKING) {
8156 goto continue_lookup;
3e170ce0
A
8157 } else if (batched && error == ENOENT) {
8158 assert(restart_count < MAX_AUTHORIZE_ENOENT_RETRIES);
8159 if (restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
8160 /*
8161 * For compound VNOPs, the authorization callback
8162 * may return ENOENT in case of racing hard link lookups
8163 * redrive the lookup.
8164 */
8165 restart_flag = 1;
8166 restart_count += 1;
8167 goto out;
8168 }
6d2010ae 8169 }
39236c6e 8170#if CONFIG_APPLEDOUBLE
6d2010ae
A
8171 /*
8172 * Special case to remove orphaned AppleDouble
8173 * files. I don't like putting this in the kernel,
8174 * but carbon does not like putting this in carbon either,
8175 * so here we are.
8176 */
8177 if (error == ENOTEMPTY) {
8178 error = rmdir_remove_orphaned_appleDouble(vp, ctx, &restart_flag);
8179 if (error == EBUSY) {
8180 goto out;
2d21ac55
A
8181 }
8182
6d2010ae 8183
2d21ac55 8184 /*
fe8ab488 8185 * Assuming everything went well, we will try the RMDIR again
2d21ac55 8186 */
6d2010ae
A
8187 if (!error)
8188 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
8189 }
39236c6e 8190#endif /* CONFIG_APPLEDOUBLE */
6d2010ae 8191 /*
fe8ab488 8192 * Call out to allow 3rd party notification of delete.
6d2010ae
A
8193 * Ignore result of kauth_authorize_fileop call.
8194 */
8195 if (!error) {
8196 if (has_listeners) {
fe8ab488
A
8197 kauth_authorize_fileop(vfs_context_ucred(ctx),
8198 KAUTH_FILEOP_DELETE,
6d2010ae
A
8199 (uintptr_t)vp,
8200 (uintptr_t)path);
8201 }
8202
8203 if (vp->v_flag & VISHARDLINK) {
8204 // see the comment in unlink1() about why we update
8205 // the parent of a hard link when it is removed
8206 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
8207 }
2d21ac55
A
8208
8209#if CONFIG_FSE
6d2010ae
A
8210 if (need_event) {
8211 if (vap) {
8212 vnode_get_fse_info_from_vap(vp, &finfo, vap);
2d21ac55 8213 }
6d2010ae
A
8214 add_fsevent(FSE_DELETE, ctx,
8215 FSE_ARG_STRING, len, path,
8216 FSE_ARG_FINFO, &finfo,
8217 FSE_ARG_DONE);
2d21ac55 8218 }
6d2010ae 8219#endif
2d21ac55
A
8220 }
8221
8222out:
6d2010ae
A
8223 if (path != NULL) {
8224 RELEASE_PATH(path);
8225 path = NULL;
8226 }
2d21ac55
A
8227 /*
8228 * nameidone has to happen before we vnode_put(dvp)
8229 * since it may need to release the fs_nodelock on the dvp
8230 */
8231 nameidone(&nd);
2d21ac55 8232 vnode_put(dvp);
6d2010ae 8233
fe8ab488 8234 if (vp)
6d2010ae 8235 vnode_put(vp);
2d21ac55
A
8236
8237 if (restart_flag == 0) {
8238 wakeup_one((caddr_t)vp);
8239 return (error);
8240 }
8241 tsleep(vp, PVFS, "rm AD", 1);
8242
8243 } while (restart_flag != 0);
91447636 8244
1c79356b 8245 return (error);
2d21ac55 8246
1c79356b 8247}
91447636 8248
fe8ab488
A
8249/*
8250 * Remove a directory file.
8251 */
8252/* ARGSUSED */
8253int
8254rmdir(__unused proc_t p, struct rmdir_args *uap, __unused int32_t *retval)
8255{
8256 return (rmdirat_internal(vfs_context_current(), AT_FDCWD,
8257 CAST_USER_ADDR_T(uap->path), UIO_USERSPACE));
8258}
8259
2d21ac55
A
8260/* Get direntry length padded to 8 byte alignment */
8261#define DIRENT64_LEN(namlen) \
8262 ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
8263
5ba3f43e
A
8264/* Get dirent length padded to 4 byte alignment */
8265#define DIRENT_LEN(namelen) \
8266 ((sizeof(struct dirent) + (namelen + 1) - (__DARWIN_MAXNAMLEN + 1) + 3) & ~3)
8267
8268/* Get the end of this dirent */
8269#define DIRENT_END(dep) \
8270 (((char *)(dep)) + (dep)->d_reclen - 1)
8271
fe8ab488 8272errno_t
2d21ac55
A
8273vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag,
8274 int *numdirent, vfs_context_t ctxp)
8275{
8276 /* Check if fs natively supports VNODE_READDIR_EXTENDED */
39037602 8277 if ((vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED) &&
6d2010ae 8278 ((vp->v_mount->mnt_kern_flag & MNTK_DENY_READDIREXT) == 0)) {
2d21ac55
A
8279 return VNOP_READDIR(vp, uio, flags, eofflag, numdirent, ctxp);
8280 } else {
8281 size_t bufsize;
8282 void * bufptr;
8283 uio_t auio;
15129b1c 8284 struct direntry *entry64;
2d21ac55
A
8285 struct dirent *dep;
8286 int bytesread;
8287 int error;
8288
8289 /*
5ba3f43e
A
8290 * We're here because the underlying file system does not
8291 * support direnties or we mounted denying support so we must
8292 * fall back to dirents and convert them to direntries.
8293 *
8294 * Our kernel buffer needs to be smaller since re-packing will
8295 * expand each dirent. The worse case (when the name length
8296 * is 3 or less) corresponds to a struct direntry size of 32
2d21ac55
A
8297 * bytes (8-byte aligned) and a struct dirent size of 12 bytes
8298 * (4-byte aligned). So having a buffer that is 3/8 the size
8299 * will prevent us from reading more than we can pack.
8300 *
8301 * Since this buffer is wired memory, we will limit the
39037602 8302 * buffer size to a maximum of 32K. We would really like to
2d21ac55 8303 * use 32K in the MIN(), but we use magic number 87371 to
39037602 8304 * prevent uio_resid() * 3 / 8 from overflowing.
2d21ac55 8305 */
316670eb 8306 bufsize = 3 * MIN((user_size_t)uio_resid(uio), 87371u) / 8;
2d21ac55 8307 MALLOC(bufptr, void *, bufsize, M_TEMP, M_WAITOK);
b0d623f7
A
8308 if (bufptr == NULL) {
8309 return ENOMEM;
8310 }
2d21ac55 8311
b0d623f7 8312 auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
2d21ac55
A
8313 uio_addiov(auio, (uintptr_t)bufptr, bufsize);
8314 auio->uio_offset = uio->uio_offset;
8315
8316 error = VNOP_READDIR(vp, auio, 0, eofflag, numdirent, ctxp);
8317
8318 dep = (struct dirent *)bufptr;
8319 bytesread = bufsize - uio_resid(auio);
8320
15129b1c
A
8321 MALLOC(entry64, struct direntry *, sizeof(struct direntry),
8322 M_TEMP, M_WAITOK);
2d21ac55
A
8323 /*
8324 * Convert all the entries and copy them out to user's buffer.
8325 */
8326 while (error == 0 && (char *)dep < ((char *)bufptr + bytesread)) {
15129b1c
A
8327 size_t enbufsize = DIRENT64_LEN(dep->d_namlen);
8328
5ba3f43e
A
8329 if (DIRENT_END(dep) > ((char *)bufptr + bytesread) ||
8330 DIRENT_LEN(dep->d_namlen) > dep->d_reclen) {
8331 printf("%s: %s: Bad dirent recived from directory %s\n", __func__,
8332 vp->v_mount->mnt_vfsstat.f_mntonname,
8333 vp->v_name ? vp->v_name : "<unknown>");
8334 error = EIO;
8335 break;
8336 }
8337
15129b1c 8338 bzero(entry64, enbufsize);
2d21ac55 8339 /* Convert a dirent to a dirent64. */
15129b1c
A
8340 entry64->d_ino = dep->d_ino;
8341 entry64->d_seekoff = 0;
8342 entry64->d_reclen = enbufsize;
8343 entry64->d_namlen = dep->d_namlen;
8344 entry64->d_type = dep->d_type;
8345 bcopy(dep->d_name, entry64->d_name, dep->d_namlen + 1);
2d21ac55
A
8346
8347 /* Move to next entry. */
8348 dep = (struct dirent *)((char *)dep + dep->d_reclen);
8349
8350 /* Copy entry64 to user's buffer. */
15129b1c 8351 error = uiomove((caddr_t)entry64, entry64->d_reclen, uio);
2d21ac55
A
8352 }
8353
8354 /* Update the real offset using the offset we got from VNOP_READDIR. */
8355 if (error == 0) {
8356 uio->uio_offset = auio->uio_offset;
8357 }
8358 uio_free(auio);
8359 FREE(bufptr, M_TEMP);
15129b1c 8360 FREE(entry64, M_TEMP);
2d21ac55
A
8361 return (error);
8362 }
8363}
1c79356b 8364
39236c6e
A
8365#define GETDIRENTRIES_MAXBUFSIZE (128 * 1024 * 1024U)
8366
1c79356b
A
8367/*
8368 * Read a block of directory entries in a file system independent format.
8369 */
2d21ac55
A
8370static int
8371getdirentries_common(int fd, user_addr_t bufp, user_size_t bufsize, ssize_t *bytesread,
8372 off_t *offset, int flags)
1c79356b 8373{
2d21ac55
A
8374 vnode_t vp;
8375 struct vfs_context context = *vfs_context_current(); /* local copy */
91447636
A
8376 struct fileproc *fp;
8377 uio_t auio;
2d21ac55
A
8378 int spacetype = proc_is64bit(vfs_context_proc(&context)) ? UIO_USERSPACE64 : UIO_USERSPACE32;
8379 off_t loff;
8380 int error, eofflag, numdirent;
91447636 8381 char uio_buf[ UIO_SIZEOF(1) ];
1c79356b 8382
2d21ac55
A
8383 error = fp_getfvp(vfs_context_proc(&context), fd, &fp, &vp);
8384 if (error) {
1c79356b 8385 return (error);
2d21ac55 8386 }
91447636
A
8387 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
8388 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
8389 error = EBADF;
8390 goto out;
8391 }
2d21ac55 8392
39236c6e
A
8393 if (bufsize > GETDIRENTRIES_MAXBUFSIZE)
8394 bufsize = GETDIRENTRIES_MAXBUFSIZE;
8395
2d21ac55
A
8396#if CONFIG_MACF
8397 error = mac_file_check_change_offset(vfs_context_ucred(&context), fp->f_fglob);
8398 if (error)
8399 goto out;
8400#endif
91447636
A
8401 if ( (error = vnode_getwithref(vp)) ) {
8402 goto out;
8403 }
91447636 8404 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
55e303ae 8405
1c79356b 8406unionread:
91447636
A
8407 if (vp->v_type != VDIR) {
8408 (void)vnode_put(vp);
8409 error = EINVAL;
8410 goto out;
8411 }
2d21ac55
A
8412
8413#if CONFIG_MACF
8414 error = mac_vnode_check_readdir(&context, vp);
8415 if (error != 0) {
8416 (void)vnode_put(vp);
8417 goto out;
8418 }
8419#endif /* MAC */
91447636
A
8420
8421 loff = fp->f_fglob->fg_offset;
2d21ac55
A
8422 auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
8423 uio_addiov(auio, bufp, bufsize);
91447636 8424
2d21ac55
A
8425 if (flags & VNODE_READDIR_EXTENDED) {
8426 error = vnode_readdir64(vp, auio, flags, &eofflag, &numdirent, &context);
8427 fp->f_fglob->fg_offset = uio_offset(auio);
8428 } else {
8429 error = VNOP_READDIR(vp, auio, 0, &eofflag, &numdirent, &context);
8430 fp->f_fglob->fg_offset = uio_offset(auio);
8431 }
91447636
A
8432 if (error) {
8433 (void)vnode_put(vp);
8434 goto out;
8435 }
1c79356b 8436
2d21ac55
A
8437 if ((user_ssize_t)bufsize == uio_resid(auio)){
8438 if (union_dircheckp) {
8439 error = union_dircheckp(&vp, fp, &context);
8440 if (error == -1)
8441 goto unionread;
813fb2f6
A
8442 if (error) {
8443 (void)vnode_put(vp);
2d21ac55 8444 goto out;
813fb2f6 8445 }
1c79356b
A
8446 }
8447
39236c6e 8448 if ((vp->v_mount->mnt_flag & MNT_UNION)) {
2d21ac55 8449 struct vnode *tvp = vp;
39236c6e
A
8450 if (lookup_traverse_union(tvp, &vp, &context) == 0) {
8451 vnode_ref(vp);
8452 fp->f_fglob->fg_data = (caddr_t) vp;
8453 fp->f_fglob->fg_offset = 0;
8454 vnode_rele(tvp);
8455 vnode_put(tvp);
8456 goto unionread;
8457 }
8458 vp = tvp;
1c79356b
A
8459 }
8460 }
2d21ac55 8461
91447636 8462 vnode_put(vp);
2d21ac55
A
8463 if (offset) {
8464 *offset = loff;
8465 }
39037602 8466
2d21ac55 8467 *bytesread = bufsize - uio_resid(auio);
91447636
A
8468out:
8469 file_drop(fd);
1c79356b
A
8470 return (error);
8471}
8472
2d21ac55
A
8473
8474int
b0d623f7 8475getdirentries(__unused struct proc *p, struct getdirentries_args *uap, int32_t *retval)
2d21ac55
A
8476{
8477 off_t offset;
2d21ac55
A
8478 ssize_t bytesread;
8479 int error;
8480
8481 AUDIT_ARG(fd, uap->fd);
8482 error = getdirentries_common(uap->fd, uap->buf, uap->count, &bytesread, &offset, 0);
8483
8484 if (error == 0) {
b0d623f7
A
8485 if (proc_is64bit(p)) {
8486 user64_long_t base = (user64_long_t)offset;
8487 error = copyout((caddr_t)&base, uap->basep, sizeof(user64_long_t));
8488 } else {
8489 user32_long_t base = (user32_long_t)offset;
8490 error = copyout((caddr_t)&base, uap->basep, sizeof(user32_long_t));
8491 }
2d21ac55
A
8492 *retval = bytesread;
8493 }
8494 return (error);
8495}
8496
8497int
8498getdirentries64(__unused struct proc *p, struct getdirentries64_args *uap, user_ssize_t *retval)
8499{
8500 off_t offset;
8501 ssize_t bytesread;
8502 int error;
8503
8504 AUDIT_ARG(fd, uap->fd);
8505 error = getdirentries_common(uap->fd, uap->buf, uap->bufsize, &bytesread, &offset, VNODE_READDIR_EXTENDED);
8506
8507 if (error == 0) {
8508 *retval = bytesread;
8509 error = copyout((caddr_t)&offset, uap->position, sizeof(off_t));
8510 }
8511 return (error);
8512}
8513
8514
1c79356b
A
8515/*
8516 * Set the mode mask for creation of filesystem nodes.
b0d623f7 8517 * XXX implement xsecurity
1c79356b 8518 */
91447636
A
8519#define UMASK_NOXSECURITY (void *)1 /* leave existing xsecurity alone */
8520static int
b0d623f7 8521umask1(proc_t p, int newmask, __unused kauth_filesec_t fsec, int32_t *retval)
1c79356b 8522{
2d21ac55 8523 struct filedesc *fdp;
1c79356b 8524
91447636 8525 AUDIT_ARG(mask, newmask);
2d21ac55 8526 proc_fdlock(p);
1c79356b
A
8527 fdp = p->p_fd;
8528 *retval = fdp->fd_cmask;
91447636 8529 fdp->fd_cmask = newmask & ALLPERMS;
2d21ac55 8530 proc_fdunlock(p);
1c79356b
A
8531 return (0);
8532}
8533
b0d623f7
A
8534/*
8535 * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL).
8536 *
8537 * Parameters: p Process requesting to set the umask
8538 * uap User argument descriptor (see below)
8539 * retval umask of the process (parameter p)
8540 *
8541 * Indirect: uap->newmask umask to set
8542 * uap->xsecurity ACL to set
39037602 8543 *
b0d623f7
A
8544 * Returns: 0 Success
8545 * !0 Not success
8546 *
8547 */
8548int
8549umask_extended(proc_t p, struct umask_extended_args *uap, int32_t *retval)
91447636
A
8550{
8551 int ciferror;
8552 kauth_filesec_t xsecdst;
8553
8554 xsecdst = KAUTH_FILESEC_NONE;
8555 if (uap->xsecurity != USER_ADDR_NULL) {
8556 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
8557 return ciferror;
8558 } else {
8559 xsecdst = KAUTH_FILESEC_NONE;
8560 }
8561
8562 ciferror = umask1(p, uap->newmask, xsecdst, retval);
8563
8564 if (xsecdst != KAUTH_FILESEC_NONE)
8565 kauth_filesec_free(xsecdst);
8566 return ciferror;
8567}
8568
8569int
b0d623f7 8570umask(proc_t p, struct umask_args *uap, int32_t *retval)
91447636
A
8571{
8572 return(umask1(p, uap->newmask, UMASK_NOXSECURITY, retval));
8573}
8574
1c79356b
A
8575/*
8576 * Void all references to file by ripping underlying filesystem
8577 * away from vnode.
8578 */
1c79356b
A
8579/* ARGSUSED */
8580int
b0d623f7 8581revoke(proc_t p, struct revoke_args *uap, __unused int32_t *retval)
1c79356b 8582{
2d21ac55 8583 vnode_t vp;
91447636 8584 struct vnode_attr va;
2d21ac55 8585 vfs_context_t ctx = vfs_context_current();
1c79356b
A
8586 int error;
8587 struct nameidata nd;
8588
6d2010ae
A
8589 NDINIT(&nd, LOOKUP, OP_REVOKE, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
8590 uap->path, ctx);
55e303ae
A
8591 error = namei(&nd);
8592 if (error)
1c79356b
A
8593 return (error);
8594 vp = nd.ni_vp;
91447636
A
8595
8596 nameidone(&nd);
8597
b0d623f7
A
8598 if (!(vnode_ischr(vp) || vnode_isblk(vp))) {
8599 error = ENOTSUP;
8600 goto out;
8601 }
8602
8603 if (vnode_isblk(vp) && vnode_ismountedon(vp)) {
8604 error = EBUSY;
8605 goto out;
8606 }
8607
2d21ac55
A
8608#if CONFIG_MACF
8609 error = mac_vnode_check_revoke(ctx, vp);
8610 if (error)
8611 goto out;
8612#endif
8613
91447636
A
8614 VATTR_INIT(&va);
8615 VATTR_WANTED(&va, va_uid);
2d21ac55 8616 if ((error = vnode_getattr(vp, &va, ctx)))
1c79356b 8617 goto out;
2d21ac55
A
8618 if (kauth_cred_getuid(vfs_context_ucred(ctx)) != va.va_uid &&
8619 (error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
1c79356b 8620 goto out;
b0d623f7 8621 if (vp->v_usecount > 0 || (vnode_isaliased(vp)))
2d21ac55 8622 VNOP_REVOKE(vp, REVOKEALL, ctx);
1c79356b 8623out:
91447636 8624 vnode_put(vp);
1c79356b
A
8625 return (error);
8626}
8627
0b4e3aa0 8628
1c79356b
A
8629/*
8630 * HFS/HFS PlUS SPECIFIC SYSTEM CALLS
9bccf70c 8631 * The following system calls are designed to support features
1c79356b
A
8632 * which are specific to the HFS & HFS Plus volume formats
8633 */
8634
9bccf70c 8635
1c79356b 8636/*
39236c6e
A
8637 * Obtain attribute information on objects in a directory while enumerating
8638 * the directory.
8639 */
1c79356b
A
8640/* ARGSUSED */
8641int
b0d623f7 8642getdirentriesattr (proc_t p, struct getdirentriesattr_args *uap, int32_t *retval)
1c79356b 8643{
2d21ac55 8644 vnode_t vp;
91447636
A
8645 struct fileproc *fp;
8646 uio_t auio = NULL;
8647 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
39236c6e 8648 uint32_t count, savecount;
2d21ac55 8649 uint32_t newstate;
91447636 8650 int error, eofflag;
2d21ac55 8651 uint32_t loff;
39037602 8652 struct attrlist attributelist;
2d21ac55 8653 vfs_context_t ctx = vfs_context_current();
91447636
A
8654 int fd = uap->fd;
8655 char uio_buf[ UIO_SIZEOF(1) ];
8656 kauth_action_t action;
8657
8658 AUDIT_ARG(fd, fd);
39037602 8659
91447636 8660 /* Get the attributes into kernel space */
2d21ac55 8661 if ((error = copyin(uap->alist, (caddr_t)&attributelist, sizeof(attributelist)))) {
91447636 8662 return(error);
2d21ac55
A
8663 }
8664 if ((error = copyin(uap->count, (caddr_t)&count, sizeof(count)))) {
8665 return(error);
8666 }
39236c6e 8667 savecount = count;
2d21ac55 8668 if ( (error = fp_getfvp(p, fd, &fp, &vp)) ) {
91447636 8669 return (error);
2d21ac55 8670 }
91447636
A
8671 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
8672 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
8673 error = EBADF;
8674 goto out;
8675 }
2d21ac55
A
8676
8677
8678#if CONFIG_MACF
8679 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
8680 fp->f_fglob);
8681 if (error)
8682 goto out;
8683#endif
8684
8685
91447636
A
8686 if ( (error = vnode_getwithref(vp)) )
8687 goto out;
55e303ae 8688
91447636 8689 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
1c79356b 8690
39236c6e 8691unionread:
91447636
A
8692 if (vp->v_type != VDIR) {
8693 (void)vnode_put(vp);
8694 error = EINVAL;
8695 goto out;
8696 }
55e303ae 8697
2d21ac55
A
8698#if CONFIG_MACF
8699 error = mac_vnode_check_readdir(ctx, vp);
8700 if (error != 0) {
8701 (void)vnode_put(vp);
8702 goto out;
8703 }
8704#endif /* MAC */
8705
91447636
A
8706 /* set up the uio structure which will contain the users return buffer */
8707 loff = fp->f_fglob->fg_offset;
39236c6e 8708 auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
91447636 8709 uio_addiov(auio, uap->buffer, uap->buffersize);
39037602 8710
91447636
A
8711 /*
8712 * If the only item requested is file names, we can let that past with
8713 * just LIST_DIRECTORY. If they want any other attributes, that means
8714 * they need SEARCH as well.
8715 */
8716 action = KAUTH_VNODE_LIST_DIRECTORY;
8717 if ((attributelist.commonattr & ~ATTR_CMN_NAME) ||
8718 attributelist.fileattr || attributelist.dirattr)
8719 action |= KAUTH_VNODE_SEARCH;
39037602 8720
2d21ac55 8721 if ((error = vnode_authorize(vp, NULL, action, ctx)) == 0) {
2d21ac55 8722
b0d623f7
A
8723 /* Believe it or not, uap->options only has 32-bits of valid
8724 * info, so truncate before extending again */
39236c6e
A
8725
8726 error = VNOP_READDIRATTR(vp, &attributelist, auio, count,
8727 (u_long)(uint32_t)uap->options, &newstate, &eofflag, &count, ctx);
8728 }
8729
8730 if (error) {
8731 (void) vnode_put(vp);
8732 goto out;
8733 }
8734
8735 /*
8736 * If we've got the last entry of a directory in a union mount
8737 * then reset the eofflag and pretend there's still more to come.
8738 * The next call will again set eofflag and the buffer will be empty,
8739 * so traverse to the underlying directory and do the directory
8740 * read there.
8741 */
8742 if (eofflag && vp->v_mount->mnt_flag & MNT_UNION) {
8743 if (uio_resid(auio) < (user_ssize_t) uap->buffersize) { // Got some entries
8744 eofflag = 0;
8745 } else { // Empty buffer
8746 struct vnode *tvp = vp;
8747 if (lookup_traverse_union(tvp, &vp, ctx) == 0) {
8748 vnode_ref_ext(vp, fp->f_fglob->fg_flag & O_EVTONLY, 0);
8749 fp->f_fglob->fg_data = (caddr_t) vp;
8750 fp->f_fglob->fg_offset = 0; // reset index for new dir
8751 count = savecount;
8752 vnode_rele_internal(tvp, fp->f_fglob->fg_flag & O_EVTONLY, 0, 0);
8753 vnode_put(tvp);
8754 goto unionread;
8755 }
8756 vp = tvp;
8757 }
2d21ac55 8758 }
39236c6e 8759
91447636 8760 (void)vnode_put(vp);
1c79356b 8761
39037602 8762 if (error)
91447636
A
8763 goto out;
8764 fp->f_fglob->fg_offset = uio_offset(auio); /* should be multiple of dirent, not variable */
1c79356b 8765
2d21ac55 8766 if ((error = copyout((caddr_t) &count, uap->count, sizeof(count))))
91447636 8767 goto out;
2d21ac55 8768 if ((error = copyout((caddr_t) &newstate, uap->newstate, sizeof(newstate))))
91447636 8769 goto out;
2d21ac55 8770 if ((error = copyout((caddr_t) &loff, uap->basep, sizeof(loff))))
91447636 8771 goto out;
1c79356b
A
8772
8773 *retval = eofflag; /* similar to getdirentries */
91447636 8774 error = 0;
2d21ac55 8775out:
91447636
A
8776 file_drop(fd);
8777 return (error); /* return error earlier, an retval of 0 or 1 now */
1c79356b 8778
39236c6e 8779} /* end of getdirentriesattr system call */
1c79356b
A
8780
8781/*
8782* Exchange data between two files
8783*/
8784
1c79356b
A
8785/* ARGSUSED */
8786int
b0d623f7 8787exchangedata (__unused proc_t p, struct exchangedata_args *uap, __unused int32_t *retval)
1c79356b
A
8788{
8789
8790 struct nameidata fnd, snd;
2d21ac55
A
8791 vfs_context_t ctx = vfs_context_current();
8792 vnode_t fvp;
8793 vnode_t svp;
8794 int error;
b0d623f7 8795 u_int32_t nameiflags;
91447636
A
8796 char *fpath = NULL;
8797 char *spath = NULL;
b0d623f7
A
8798 int flen=0, slen=0;
8799 int from_truncated=0, to_truncated=0;
8800#if CONFIG_FSE
91447636 8801 fse_info f_finfo, s_finfo;
b0d623f7 8802#endif
39037602 8803
1c79356b
A
8804 nameiflags = 0;
8805 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
8806
6d2010ae
A
8807 NDINIT(&fnd, LOOKUP, OP_EXCHANGEDATA, nameiflags | AUDITVNPATH1,
8808 UIO_USERSPACE, uap->path1, ctx);
1c79356b 8809
6d2010ae
A
8810 error = namei(&fnd);
8811 if (error)
8812 goto out2;
1c79356b 8813
91447636
A
8814 nameidone(&fnd);
8815 fvp = fnd.ni_vp;
1c79356b 8816
39037602 8817 NDINIT(&snd, LOOKUP, OP_EXCHANGEDATA, CN_NBMOUNTLOOK | nameiflags | AUDITVNPATH2,
6d2010ae 8818 UIO_USERSPACE, uap->path2, ctx);
1c79356b 8819
6d2010ae
A
8820 error = namei(&snd);
8821 if (error) {
91447636 8822 vnode_put(fvp);
55e303ae 8823 goto out2;
6d2010ae 8824 }
91447636 8825 nameidone(&snd);
1c79356b
A
8826 svp = snd.ni_vp;
8827
91447636
A
8828 /*
8829 * if the files are the same, return an inval error
8830 */
1c79356b 8831 if (svp == fvp) {
91447636
A
8832 error = EINVAL;
8833 goto out;
39037602 8834 }
1c79356b 8835
91447636
A
8836 /*
8837 * if the files are on different volumes, return an error
8838 */
8839 if (svp->v_mount != fvp->v_mount) {
8840 error = EXDEV;
8841 goto out;
8842 }
2d21ac55 8843
39236c6e
A
8844 /* If they're not files, return an error */
8845 if ( (vnode_isreg(fvp) == 0) || (vnode_isreg(svp) == 0)) {
db609669
A
8846 error = EINVAL;
8847 goto out;
8848 }
8849
2d21ac55
A
8850#if CONFIG_MACF
8851 error = mac_vnode_check_exchangedata(ctx,
8852 fvp, svp);
8853 if (error)
8854 goto out;
8855#endif
8856 if (((error = vnode_authorize(fvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0) ||
8857 ((error = vnode_authorize(svp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0))
91447636 8858 goto out;
1c79356b 8859
2d21ac55
A
8860 if (
8861#if CONFIG_FSE
39037602 8862 need_fsevent(FSE_EXCHANGE, fvp) ||
2d21ac55
A
8863#endif
8864 kauth_authorize_fileop_has_listeners()) {
8865 GET_PATH(fpath);
8866 GET_PATH(spath);
8867 if (fpath == NULL || spath == NULL) {
8868 error = ENOMEM;
8869 goto out;
8870 }
b0d623f7
A
8871
8872 flen = safe_getpath(fvp, NULL, fpath, MAXPATHLEN, &from_truncated);
8873 slen = safe_getpath(svp, NULL, spath, MAXPATHLEN, &to_truncated);
39037602 8874
2d21ac55
A
8875#if CONFIG_FSE
8876 get_fse_info(fvp, &f_finfo, ctx);
8877 get_fse_info(svp, &s_finfo, ctx);
b0d623f7
A
8878 if (from_truncated || to_truncated) {
8879 // set it here since only the f_finfo gets reported up to user space
8880 f_finfo.mode |= FSE_TRUNCATED_PATH;
8881 }
2d21ac55 8882#endif
91447636 8883 }
1c79356b 8884 /* Ok, make the call */
2d21ac55 8885 error = VNOP_EXCHANGE(fvp, svp, 0, ctx);
55e303ae 8886
91447636 8887 if (error == 0) {
2d21ac55 8888 const char *tmpname;
91447636
A
8889
8890 if (fpath != NULL && spath != NULL) {
39037602 8891 /* call out to allow 3rd party notification of exchangedata.
91447636
A
8892 * Ignore result of kauth_authorize_fileop call.
8893 */
39037602 8894 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_EXCHANGE,
91447636
A
8895 (uintptr_t)fpath, (uintptr_t)spath);
8896 }
8897 name_cache_lock();
8898
8899 tmpname = fvp->v_name;
8900 fvp->v_name = svp->v_name;
8901 svp->v_name = tmpname;
39037602 8902
91447636 8903 if (fvp->v_parent != svp->v_parent) {
2d21ac55 8904 vnode_t tmp;
91447636
A
8905
8906 tmp = fvp->v_parent;
8907 fvp->v_parent = svp->v_parent;
8908 svp->v_parent = tmp;
8909 }
8910 name_cache_unlock();
8911
2d21ac55 8912#if CONFIG_FSE
91447636 8913 if (fpath != NULL && spath != NULL) {
2d21ac55 8914 add_fsevent(FSE_EXCHANGE, ctx,
91447636
A
8915 FSE_ARG_STRING, flen, fpath,
8916 FSE_ARG_FINFO, &f_finfo,
8917 FSE_ARG_STRING, slen, spath,
8918 FSE_ARG_FINFO, &s_finfo,
8919 FSE_ARG_DONE);
8920 }
2d21ac55 8921#endif
55e303ae
A
8922 }
8923
1c79356b 8924out:
2d21ac55
A
8925 if (fpath != NULL)
8926 RELEASE_PATH(fpath);
8927 if (spath != NULL)
8928 RELEASE_PATH(spath);
91447636
A
8929 vnode_put(svp);
8930 vnode_put(fvp);
1c79356b 8931out2:
1c79356b 8932 return (error);
91447636 8933}
1c79356b 8934
39236c6e
A
8935/*
8936 * Return (in MB) the amount of freespace on the given vnode's volume.
8937 */
8938uint32_t freespace_mb(vnode_t vp);
8939
8940uint32_t
8941freespace_mb(vnode_t vp)
8942{
39037602 8943 vfs_update_vfsstat(vp->v_mount, vfs_context_current(), VFS_USER_EVENT);
39236c6e
A
8944 return (((uint64_t)vp->v_mount->mnt_vfsstat.f_bavail *
8945 vp->v_mount->mnt_vfsstat.f_bsize) >> 20);
8946}
8947
316670eb 8948#if CONFIG_SEARCHFS
1c79356b 8949
1c79356b
A
8950/* ARGSUSED */
8951
8952int
b0d623f7 8953searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval)
1c79356b 8954{
39236c6e
A
8955 vnode_t vp, tvp;
8956 int i, error=0;
1c79356b
A
8957 int fserror = 0;
8958 struct nameidata nd;
b0d623f7 8959 struct user64_fssearchblock searchblock;
1c79356b
A
8960 struct searchstate *state;
8961 struct attrlist *returnattrs;
b0d623f7 8962 struct timeval timelimit;
1c79356b 8963 void *searchparams1,*searchparams2;
91447636
A
8964 uio_t auio = NULL;
8965 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
b0d623f7 8966 uint32_t nummatches;
1c79356b 8967 int mallocsize;
b0d623f7 8968 uint32_t nameiflags;
2d21ac55 8969 vfs_context_t ctx = vfs_context_current();
91447636 8970 char uio_buf[ UIO_SIZEOF(1) ];
1c79356b 8971
39236c6e 8972 /* Start by copying in fsearchblock parameter list */
91447636 8973 if (IS_64BIT_PROCESS(p)) {
b0d623f7
A
8974 error = copyin(uap->searchblock, (caddr_t) &searchblock, sizeof(searchblock));
8975 timelimit.tv_sec = searchblock.timelimit.tv_sec;
8976 timelimit.tv_usec = searchblock.timelimit.tv_usec;
91447636
A
8977 }
8978 else {
b0d623f7
A
8979 struct user32_fssearchblock tmp_searchblock;
8980
91447636
A
8981 error = copyin(uap->searchblock, (caddr_t) &tmp_searchblock, sizeof(tmp_searchblock));
8982 // munge into 64-bit version
8983 searchblock.returnattrs = CAST_USER_ADDR_T(tmp_searchblock.returnattrs);
8984 searchblock.returnbuffer = CAST_USER_ADDR_T(tmp_searchblock.returnbuffer);
8985 searchblock.returnbuffersize = tmp_searchblock.returnbuffersize;
8986 searchblock.maxmatches = tmp_searchblock.maxmatches;
39037602 8987 /*
b0d623f7
A
8988 * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary
8989 * from a 32 bit long, and tv_usec is already a signed 32 bit int.
8990 */
8991 timelimit.tv_sec = (__darwin_time_t) tmp_searchblock.timelimit.tv_sec;
8992 timelimit.tv_usec = (__darwin_useconds_t) tmp_searchblock.timelimit.tv_usec;
91447636
A
8993 searchblock.searchparams1 = CAST_USER_ADDR_T(tmp_searchblock.searchparams1);
8994 searchblock.sizeofsearchparams1 = tmp_searchblock.sizeofsearchparams1;
8995 searchblock.searchparams2 = CAST_USER_ADDR_T(tmp_searchblock.searchparams2);
8996 searchblock.sizeofsearchparams2 = tmp_searchblock.sizeofsearchparams2;
8997 searchblock.searchattrs = tmp_searchblock.searchattrs;
8998 }
8999 if (error)
1c79356b
A
9000 return(error);
9001
39037602 9002 /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
a3d08fcd 9003 */
39037602 9004 if (searchblock.sizeofsearchparams1 > SEARCHFS_MAX_SEARCHPARMS ||
a3d08fcd
A
9005 searchblock.sizeofsearchparams2 > SEARCHFS_MAX_SEARCHPARMS)
9006 return(EINVAL);
39037602 9007
1c79356b
A
9008 /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
9009 /* It all has to do into local memory and it's not that big so we might as well put it all together. */
9010 /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
9011 /* block. */
fe8ab488
A
9012 /* */
9013 /* NOTE: we allocate an extra 8 bytes to account for the difference in size of the searchstate */
9014 /* due to the changes in rdar://problem/12438273. That way if a 3rd party file system */
9015 /* assumes the size is still 556 bytes it will continue to work */
39037602 9016
91447636 9017 mallocsize = searchblock.sizeofsearchparams1 + searchblock.sizeofsearchparams2 +
fe8ab488 9018 sizeof(struct attrlist) + sizeof(struct searchstate) + (2*sizeof(uint32_t));
1c79356b
A
9019
9020 MALLOC(searchparams1, void *, mallocsize, M_TEMP, M_WAITOK);
9021
9022 /* Now set up the various pointers to the correct place in our newly allocated memory */
9023
9024 searchparams2 = (void *) (((caddr_t) searchparams1) + searchblock.sizeofsearchparams1);
9025 returnattrs = (struct attrlist *) (((caddr_t) searchparams2) + searchblock.sizeofsearchparams2);
9026 state = (struct searchstate *) (((caddr_t) returnattrs) + sizeof (struct attrlist));
9027
9028 /* Now copy in the stuff given our local variables. */
9029
91447636 9030 if ((error = copyin(searchblock.searchparams1, searchparams1, searchblock.sizeofsearchparams1)))
1c79356b
A
9031 goto freeandexit;
9032
91447636 9033 if ((error = copyin(searchblock.searchparams2, searchparams2, searchblock.sizeofsearchparams2)))
1c79356b
A
9034 goto freeandexit;
9035
91447636 9036 if ((error = copyin(searchblock.returnattrs, (caddr_t) returnattrs, sizeof(struct attrlist))))
1c79356b 9037 goto freeandexit;
39037602 9038
91447636 9039 if ((error = copyin(uap->state, (caddr_t) state, sizeof(struct searchstate))))
1c79356b 9040 goto freeandexit;
1c79356b 9041
39236c6e
A
9042 /*
9043 * When searching a union mount, need to set the
9044 * start flag at the first call on each layer to
9045 * reset state for the new volume.
9046 */
9047 if (uap->options & SRCHFS_START)
9048 state->ss_union_layer = 0;
39037602 9049 else
39236c6e
A
9050 uap->options |= state->ss_union_flags;
9051 state->ss_union_flags = 0;
b0d623f7
A
9052
9053 /*
9054 * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter,
9055 * which is passed in with an attrreference_t, we need to inspect the buffer manually here.
39037602
A
9056 * The KPI does not provide us the ability to pass in the length of the buffers searchparams1
9057 * and searchparams2. To obviate the need for all searchfs-supporting filesystems to
b0d623f7
A
9058 * validate the user-supplied data offset of the attrreference_t, we'll do it here.
9059 */
9060
9061 if (searchblock.searchattrs.commonattr & ATTR_CMN_NAME) {
9062 attrreference_t* string_ref;
9063 u_int32_t* start_length;
39037602 9064 user64_size_t param_length;
b0d623f7
A
9065
9066 /* validate searchparams1 */
39037602 9067 param_length = searchblock.sizeofsearchparams1;
b0d623f7
A
9068 /* skip the word that specifies length of the buffer */
9069 start_length= (u_int32_t*) searchparams1;
9070 start_length= start_length+1;
9071 string_ref= (attrreference_t*) start_length;
9072
9073 /* ensure no negative offsets or too big offsets */
9074 if (string_ref->attr_dataoffset < 0 ) {
9075 error = EINVAL;
39037602 9076 goto freeandexit;
b0d623f7
A
9077 }
9078 if (string_ref->attr_length > MAXPATHLEN) {
9079 error = EINVAL;
9080 goto freeandexit;
9081 }
39037602 9082
b0d623f7
A
9083 /* Check for pointer overflow in the string ref */
9084 if (((char*) string_ref + string_ref->attr_dataoffset) < (char*) string_ref) {
9085 error = EINVAL;
9086 goto freeandexit;
9087 }
9088
9089 if (((char*) string_ref + string_ref->attr_dataoffset) > ((char*)searchparams1 + param_length)) {
9090 error = EINVAL;
9091 goto freeandexit;
9092 }
9093 if (((char*)string_ref + string_ref->attr_dataoffset + string_ref->attr_length) > ((char*)searchparams1 + param_length)) {
9094 error = EINVAL;
9095 goto freeandexit;
9096 }
9097 }
9098
9099 /* set up the uio structure which will contain the users return buffer */
39236c6e
A
9100 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
9101 uio_addiov(auio, searchblock.returnbuffer, searchblock.returnbuffersize);
1c79356b 9102
91447636 9103 nameiflags = 0;
1c79356b 9104 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
6d2010ae
A
9105 NDINIT(&nd, LOOKUP, OP_SEARCHFS, nameiflags | AUDITVNPATH1,
9106 UIO_USERSPACE, uap->path, ctx);
1c79356b 9107
55e303ae
A
9108 error = namei(&nd);
9109 if (error)
1c79356b 9110 goto freeandexit;
39236c6e 9111 vp = nd.ni_vp;
91447636 9112 nameidone(&nd);
39236c6e
A
9113
9114 /*
9115 * Switch to the root vnode for the volume
9116 */
9117 error = VFS_ROOT(vnode_mount(vp), &tvp, ctx);
fe8ab488 9118 vnode_put(vp);
39236c6e
A
9119 if (error)
9120 goto freeandexit;
39236c6e
A
9121 vp = tvp;
9122
9123 /*
9124 * If it's a union mount, the path lookup takes
9125 * us to the top layer. But we may need to descend
9126 * to a lower layer. For non-union mounts the layer
9127 * is always zero.
9128 */
9129 for (i = 0; i < (int) state->ss_union_layer; i++) {
9130 if ((vp->v_mount->mnt_flag & MNT_UNION) == 0)
9131 break;
9132 tvp = vp;
9133 vp = vp->v_mount->mnt_vnodecovered;
9134 if (vp == NULL) {
fe8ab488 9135 vnode_put(tvp);
39236c6e
A
9136 error = ENOENT;
9137 goto freeandexit;
9138 }
813fb2f6 9139 error = vnode_getwithref(vp);
39236c6e 9140 vnode_put(tvp);
813fb2f6
A
9141 if (error)
9142 goto freeandexit;
39236c6e 9143 }
1c79356b 9144
6d2010ae
A
9145#if CONFIG_MACF
9146 error = mac_vnode_check_searchfs(ctx, vp, &searchblock.searchattrs);
9147 if (error) {
9148 vnode_put(vp);
9149 goto freeandexit;
9150 }
9151#endif
9152
39037602 9153
1c79356b 9154 /*
39037602 9155 * If searchblock.maxmatches == 0, then skip the search. This has happened
39236c6e 9156 * before and sometimes the underlying code doesnt deal with it well.
1c79356b
A
9157 */
9158 if (searchblock.maxmatches == 0) {
9159 nummatches = 0;
9160 goto saveandexit;
9161 }
9162
9163 /*
39236c6e 9164 * Allright, we have everything we need, so lets make that call.
39037602 9165 *
39236c6e
A
9166 * We keep special track of the return value from the file system:
9167 * EAGAIN is an acceptable error condition that shouldn't keep us
9168 * from copying out any results...
1c79356b
A
9169 */
9170
6d2010ae 9171 fserror = VNOP_SEARCHFS(vp,
39236c6e
A
9172 searchparams1,
9173 searchparams2,
9174 &searchblock.searchattrs,
9175 (u_long)searchblock.maxmatches,
9176 &timelimit,
9177 returnattrs,
9178 &nummatches,
9179 (u_long)uap->scriptcode,
9180 (u_long)uap->options,
9181 auio,
9182 (struct searchstate *) &state->ss_fsstate,
9183 ctx);
39037602 9184
39236c6e
A
9185 /*
9186 * If it's a union mount we need to be called again
9187 * to search the mounted-on filesystem.
9188 */
9189 if ((vp->v_mount->mnt_flag & MNT_UNION) && fserror == 0) {
9190 state->ss_union_flags = SRCHFS_START;
9191 state->ss_union_layer++; // search next layer down
9192 fserror = EAGAIN;
9193 }
9194
6d2010ae
A
9195saveandexit:
9196
9197 vnode_put(vp);
9198
9199 /* Now copy out the stuff that needs copying out. That means the number of matches, the
9200 search state. Everything was already put into he return buffer by the vop call. */
9201
9202 if ((error = copyout((caddr_t) state, uap->state, sizeof(struct searchstate))) != 0)
9203 goto freeandexit;
9204
39236c6e 9205 if ((error = suulong(uap->nummatches, (uint64_t)nummatches)) != 0)
6d2010ae 9206 goto freeandexit;
39037602 9207
6d2010ae
A
9208 error = fserror;
9209
9210freeandexit:
9211
9212 FREE(searchparams1,M_TEMP);
9213
9214 return(error);
9215
9216
9217} /* end of searchfs system call */
9218
316670eb
A
9219#else /* CONFIG_SEARCHFS */
9220
9221int
9222searchfs(__unused proc_t p, __unused struct searchfs_args *uap, __unused int32_t *retval)
9223{
9224 return (ENOTSUP);
9225}
9226
9227#endif /* CONFIG_SEARCHFS */
6d2010ae
A
9228
9229
9230lck_grp_attr_t * nspace_group_attr;
9231lck_attr_t * nspace_lock_attr;
9232lck_grp_t * nspace_mutex_group;
9233
9234lck_mtx_t nspace_handler_lock;
9235lck_mtx_t nspace_handler_exclusion_lock;
9236
9237time_t snapshot_timestamp=0;
9238int nspace_allow_virtual_devs=0;
9239
9240void nspace_handler_init(void);
9241
9242typedef struct nspace_item_info {
9243 struct vnode *vp;
9244 void *arg;
9245 uint64_t op;
9246 uint32_t vid;
9247 uint32_t flags;
9248 uint32_t token;
9249 uint32_t refcount;
9250} nspace_item_info;
9251
9252#define MAX_NSPACE_ITEMS 128
9253nspace_item_info nspace_items[MAX_NSPACE_ITEMS];
9254uint32_t nspace_item_idx=0; // also used as the sleep/wakeup rendezvous address
9255uint32_t nspace_token_id=0;
9256uint32_t nspace_handler_timeout = 15; // seconds
9257
9258#define NSPACE_ITEM_NEW 0x0001
9259#define NSPACE_ITEM_PROCESSING 0x0002
9260#define NSPACE_ITEM_DEAD 0x0004
9261#define NSPACE_ITEM_CANCELLED 0x0008
9262#define NSPACE_ITEM_DONE 0x0010
9263#define NSPACE_ITEM_RESET_TIMER 0x0020
9264
9265#define NSPACE_ITEM_NSPACE_EVENT 0x0040
9266#define NSPACE_ITEM_SNAPSHOT_EVENT 0x0080
6d2010ae 9267
fe8ab488 9268#define NSPACE_ITEM_ALL_EVENT_TYPES (NSPACE_ITEM_NSPACE_EVENT | NSPACE_ITEM_SNAPSHOT_EVENT)
6d2010ae
A
9269
9270//#pragma optimization_level 0
9271
9272typedef enum {
9273 NSPACE_HANDLER_NSPACE = 0,
9274 NSPACE_HANDLER_SNAPSHOT = 1,
6d2010ae
A
9275
9276 NSPACE_HANDLER_COUNT,
9277} nspace_type_t;
9278
9279typedef struct {
9280 uint64_t handler_tid;
9281 struct proc *handler_proc;
9282 int handler_busy;
9283} nspace_handler_t;
9284
9285nspace_handler_t nspace_handlers[NSPACE_HANDLER_COUNT];
9286
39236c6e
A
9287/* namespace fsctl functions */
9288static int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type);
9289static int nspace_item_flags_for_type(nspace_type_t nspace_type);
9290static int nspace_open_flags_for_type(nspace_type_t nspace_type);
9291static nspace_type_t nspace_type_for_op(uint64_t op);
9292static int nspace_is_special_process(struct proc *proc);
9293static int vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx);
9294static int wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type);
9295static int validate_namespace_args (int is64bit, int size);
9296static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data);
9297
9298
6d2010ae
A
9299static inline int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type)
9300{
9301 switch(nspace_type) {
9302 case NSPACE_HANDLER_NSPACE:
9303 return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_NSPACE_EVENT;
9304 case NSPACE_HANDLER_SNAPSHOT:
9305 return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_SNAPSHOT_EVENT;
6d2010ae
A
9306 default:
9307 printf("nspace_flags_matches_handler: invalid type %u\n", (int)nspace_type);
9308 return 0;
9309 }
9310}
9311
9312static inline int nspace_item_flags_for_type(nspace_type_t nspace_type)
9313{
9314 switch(nspace_type) {
9315 case NSPACE_HANDLER_NSPACE:
9316 return NSPACE_ITEM_NSPACE_EVENT;
9317 case NSPACE_HANDLER_SNAPSHOT:
9318 return NSPACE_ITEM_SNAPSHOT_EVENT;
6d2010ae
A
9319 default:
9320 printf("nspace_item_flags_for_type: invalid type %u\n", (int)nspace_type);
9321 return 0;
9322 }
9323}
9324
9325static inline int nspace_open_flags_for_type(nspace_type_t nspace_type)
9326{
9327 switch(nspace_type) {
9328 case NSPACE_HANDLER_NSPACE:
9329 return FREAD | FWRITE | O_EVTONLY;
9330 case NSPACE_HANDLER_SNAPSHOT:
6d2010ae
A
9331 return FREAD | O_EVTONLY;
9332 default:
9333 printf("nspace_open_flags_for_type: invalid type %u\n", (int)nspace_type);
9334 return 0;
9335 }
9336}
9337
9338static inline nspace_type_t nspace_type_for_op(uint64_t op)
9339{
9340 switch(op & NAMESPACE_HANDLER_EVENT_TYPE_MASK) {
9341 case NAMESPACE_HANDLER_NSPACE_EVENT:
9342 return NSPACE_HANDLER_NSPACE;
9343 case NAMESPACE_HANDLER_SNAPSHOT_EVENT:
9344 return NSPACE_HANDLER_SNAPSHOT;
6d2010ae
A
9345 default:
9346 printf("nspace_type_for_op: invalid op mask %llx\n", op & NAMESPACE_HANDLER_EVENT_TYPE_MASK);
9347 return NSPACE_HANDLER_NSPACE;
9348 }
9349}
9350
9351static inline int nspace_is_special_process(struct proc *proc)
9352{
9353 int i;
9354 for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
9355 if (proc == nspace_handlers[i].handler_proc)
9356 return 1;
9357 }
9358 return 0;
9359}
9360
9361void
9362nspace_handler_init(void)
9363{
9364 nspace_lock_attr = lck_attr_alloc_init();
9365 nspace_group_attr = lck_grp_attr_alloc_init();
9366 nspace_mutex_group = lck_grp_alloc_init("nspace-mutex", nspace_group_attr);
9367 lck_mtx_init(&nspace_handler_lock, nspace_mutex_group, nspace_lock_attr);
9368 lck_mtx_init(&nspace_handler_exclusion_lock, nspace_mutex_group, nspace_lock_attr);
9369 memset(&nspace_items[0], 0, sizeof(nspace_items));
9370}
9371
9372void
9373nspace_proc_exit(struct proc *p)
9374{
9375 int i, event_mask = 0;
39037602 9376
6d2010ae
A
9377 for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
9378 if (p == nspace_handlers[i].handler_proc) {
9379 event_mask |= nspace_item_flags_for_type(i);
9380 nspace_handlers[i].handler_tid = 0;
9381 nspace_handlers[i].handler_proc = NULL;
9382 }
9383 }
9384
9385 if (event_mask == 0) {
9386 return;
9387 }
39037602
A
9388
9389 lck_mtx_lock(&nspace_handler_lock);
6d2010ae
A
9390 if (event_mask & NSPACE_ITEM_SNAPSHOT_EVENT) {
9391 // if this process was the snapshot handler, zero snapshot_timeout
9392 snapshot_timestamp = 0;
9393 }
39037602 9394
6d2010ae
A
9395 //
9396 // unblock anyone that's waiting for the handler that died
9397 //
6d2010ae
A
9398 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9399 if (nspace_items[i].flags & (NSPACE_ITEM_NEW | NSPACE_ITEM_PROCESSING)) {
9400
9401 if ( nspace_items[i].flags & event_mask ) {
9402
9403 if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
9404 vnode_lock_spin(nspace_items[i].vp);
9405 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
9406 vnode_unlock(nspace_items[i].vp);
9407 }
9408 nspace_items[i].vp = NULL;
9409 nspace_items[i].vid = 0;
9410 nspace_items[i].flags = NSPACE_ITEM_DONE;
9411 nspace_items[i].token = 0;
39037602 9412
6d2010ae
A
9413 wakeup((caddr_t)&(nspace_items[i].vp));
9414 }
9415 }
9416 }
39037602 9417
6d2010ae
A
9418 wakeup((caddr_t)&nspace_item_idx);
9419 lck_mtx_unlock(&nspace_handler_lock);
9420}
9421
9422
39037602 9423int
6d2010ae
A
9424resolve_nspace_item(struct vnode *vp, uint64_t op)
9425{
9426 return resolve_nspace_item_ext(vp, op, NULL);
9427}
9428
39037602 9429int
6d2010ae
A
9430resolve_nspace_item_ext(struct vnode *vp, uint64_t op, void *arg)
9431{
9432 int i, error, keep_waiting;
9433 struct timespec ts;
9434 nspace_type_t nspace_type = nspace_type_for_op(op);
9435
9436 // only allow namespace events on regular files, directories and symlinks.
9437 if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
9438 return 0;
9439 }
9440
9441 //
9442 // if this is a snapshot event and the vnode is on a
9443 // disk image just pretend nothing happened since any
9444 // change to the disk image will cause the disk image
9445 // itself to get backed up and this avoids multi-way
9446 // deadlocks between the snapshot handler and the ever
9447 // popular diskimages-helper process. the variable
9448 // nspace_allow_virtual_devs allows this behavior to
9449 // be overridden (for use by the Mobile TimeMachine
9450 // testing infrastructure which uses disk images)
9451 //
9452 if ( (op & NAMESPACE_HANDLER_SNAPSHOT_EVENT)
9453 && (vp->v_mount != NULL)
9454 && (vp->v_mount->mnt_kern_flag & MNTK_VIRTUALDEV)
9455 && !nspace_allow_virtual_devs) {
9456
9457 return 0;
9458 }
9459
9460 // if (thread_tid(current_thread()) == namespace_handler_tid) {
9461 if (nspace_handlers[nspace_type].handler_proc == NULL) {
9462 return 0;
9463 }
9464
9465 if (nspace_is_special_process(current_proc())) {
9466 return EDEADLK;
9467 }
9468
9469 lck_mtx_lock(&nspace_handler_lock);
9470
9471retry:
9472 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9473 if (vp == nspace_items[i].vp && op == nspace_items[i].op) {
9474 break;
9475 }
9476 }
9477
9478 if (i >= MAX_NSPACE_ITEMS) {
9479 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9480 if (nspace_items[i].flags == 0) {
9481 break;
9482 }
9483 }
9484 } else {
9485 nspace_items[i].refcount++;
9486 }
39037602 9487
6d2010ae
A
9488 if (i >= MAX_NSPACE_ITEMS) {
9489 ts.tv_sec = nspace_handler_timeout;
9490 ts.tv_nsec = 0;
9491
9492 error = msleep((caddr_t)&nspace_token_id, &nspace_handler_lock, PVFS|PCATCH, "nspace-no-space", &ts);
9493 if (error == 0) {
9494 // an entry got free'd up, go see if we can get a slot
9495 goto retry;
9496 } else {
9497 lck_mtx_unlock(&nspace_handler_lock);
9498 return error;
9499 }
9500 }
9501
9502 //
9503 // if it didn't already exist, add it. if it did exist
9504 // we'll get woken up when someone does a wakeup() on
9505 // the slot in the nspace_items table.
9506 //
9507 if (vp != nspace_items[i].vp) {
9508 nspace_items[i].vp = vp;
39236c6e 9509 nspace_items[i].arg = (arg == NSPACE_REARM_NO_ARG) ? NULL : arg; // arg is {NULL, true, uio *} - only pass uio thru to the user
6d2010ae
A
9510 nspace_items[i].op = op;
9511 nspace_items[i].vid = vnode_vid(vp);
9512 nspace_items[i].flags = NSPACE_ITEM_NEW;
9513 nspace_items[i].flags |= nspace_item_flags_for_type(nspace_type);
9514 if (nspace_items[i].flags & NSPACE_ITEM_SNAPSHOT_EVENT) {
9515 if (arg) {
9516 vnode_lock_spin(vp);
9517 vp->v_flag |= VNEEDSSNAPSHOT;
9518 vnode_unlock(vp);
9519 }
9520 }
9521
9522 nspace_items[i].token = 0;
9523 nspace_items[i].refcount = 1;
39037602 9524
6d2010ae
A
9525 wakeup((caddr_t)&nspace_item_idx);
9526 }
9527
9528 //
9529 // Now go to sleep until the handler does a wakeup on this
9530 // slot in the nspace_items table (or we timeout).
9531 //
9532 keep_waiting = 1;
9533 while(keep_waiting) {
9534 ts.tv_sec = nspace_handler_timeout;
9535 ts.tv_nsec = 0;
9536 error = msleep((caddr_t)&(nspace_items[i].vp), &nspace_handler_lock, PVFS|PCATCH, "namespace-done", &ts);
9537
9538 if (nspace_items[i].flags & NSPACE_ITEM_DONE) {
9539 error = 0;
9540 } else if (nspace_items[i].flags & NSPACE_ITEM_CANCELLED) {
9541 error = nspace_items[i].token;
9542 } else if (error == EWOULDBLOCK || error == ETIMEDOUT) {
9543 if (nspace_items[i].flags & NSPACE_ITEM_RESET_TIMER) {
9544 nspace_items[i].flags &= ~NSPACE_ITEM_RESET_TIMER;
9545 continue;
9546 } else {
9547 error = ETIMEDOUT;
9548 }
9549 } else if (error == 0) {
9550 // hmmm, why did we get woken up?
9551 printf("woken up for token %d but it's not done, cancelled or timedout and error == 0.\n",
9552 nspace_items[i].token);
39037602 9553 }
6d2010ae
A
9554
9555 if (--nspace_items[i].refcount == 0) {
9556 nspace_items[i].vp = NULL; // clear this so that no one will match on it again
9557 nspace_items[i].arg = NULL;
9558 nspace_items[i].token = 0; // clear this so that the handler will not find it anymore
9559 nspace_items[i].flags = 0; // this clears it for re-use
9560 }
9561 wakeup(&nspace_token_id);
9562 keep_waiting = 0;
9563 }
9564
9565 lck_mtx_unlock(&nspace_handler_lock);
9566
9567 return error;
9568}
9569
39037602 9570int nspace_snapshot_event(vnode_t vp, time_t ctime, uint64_t op_type, void *arg)
6d2010ae 9571{
39037602 9572 int snapshot_error = 0;
6d2010ae 9573
39037602
A
9574 if (vp == NULL) {
9575 return 0;
9576 }
9577
9578 /* Swap files are special; skip them */
9579 if (vnode_isswap(vp)) {
9580 return 0;
9581 }
9582
9583 if (ctime != 0 && snapshot_timestamp != 0 && (ctime <= snapshot_timestamp || vnode_needssnapshots(vp))) {
9584 // the change time is within this epoch
9585 int error;
9586
9587 error = resolve_nspace_item_ext(vp, op_type | NAMESPACE_HANDLER_SNAPSHOT_EVENT, arg);
9588 if (error == EDEADLK) {
9589 snapshot_error = 0;
9590 } else if (error) {
9591 if (error == EAGAIN) {
9592 printf("nspace_snapshot_event: timed out waiting for namespace handler...\n");
9593 } else if (error == EINTR) {
9594 // printf("nspace_snapshot_event: got a signal while waiting for namespace handler...\n");
9595 snapshot_error = EINTR;
9596 }
9597 }
9598 }
9599
9600 return snapshot_error;
9601}
9602
9603int
9604get_nspace_item_status(struct vnode *vp, int32_t *status)
9605{
9606 int i;
9607
9608 lck_mtx_lock(&nspace_handler_lock);
9609 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9610 if (nspace_items[i].vp == vp) {
9611 break;
6d2010ae
A
9612 }
9613 }
9614
9615 if (i >= MAX_NSPACE_ITEMS) {
9616 lck_mtx_unlock(&nspace_handler_lock);
9617 return ENOENT;
9618 }
9619
9620 *status = nspace_items[i].flags;
9621 lck_mtx_unlock(&nspace_handler_lock);
9622 return 0;
9623}
39037602 9624
6d2010ae
A
9625
9626#if 0
9627static int
9628build_volfs_path(struct vnode *vp, char *path, int *len)
9629{
9630 struct vnode_attr va;
9631 int ret;
9632
9633 VATTR_INIT(&va);
9634 VATTR_WANTED(&va, va_fsid);
9635 VATTR_WANTED(&va, va_fileid);
9636
9637 if (vnode_getattr(vp, &va, vfs_context_kernel()) != 0) {
9638 *len = snprintf(path, *len, "/non/existent/path/because/vnode_getattr/failed") + 1;
9639 ret = -1;
9640 } else {
9641 *len = snprintf(path, *len, "/.vol/%d/%lld", (dev_t)va.va_fsid, va.va_fileid) + 1;
9642 ret = 0;
9643 }
9644
9645 return ret;
9646}
9647#endif
9648
9649//
9650// Note: this function does NOT check permissions on all of the
9651// parent directories leading to this vnode. It should only be
9652// called on behalf of a root process. Otherwise a process may
9653// get access to a file because the file itself is readable even
9654// though its parent directories would prevent access.
9655//
9656static int
9657vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx)
9658{
9659 int error, action;
9660
9661 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
9662 return error;
9663 }
9664
9665#if CONFIG_MACF
9666 error = mac_vnode_check_open(ctx, vp, fmode);
9667 if (error)
9668 return error;
9669#endif
1c79356b 9670
6d2010ae
A
9671 /* compute action to be authorized */
9672 action = 0;
9673 if (fmode & FREAD) {
9674 action |= KAUTH_VNODE_READ_DATA;
9675 }
9676 if (fmode & (FWRITE | O_TRUNC)) {
9677 /*
9678 * If we are writing, appending, and not truncating,
9679 * indicate that we are appending so that if the
9680 * UF_APPEND or SF_APPEND bits are set, we do not deny
9681 * the open.
9682 */
9683 if ((fmode & O_APPEND) && !(fmode & O_TRUNC)) {
9684 action |= KAUTH_VNODE_APPEND_DATA;
9685 } else {
9686 action |= KAUTH_VNODE_WRITE_DATA;
9687 }
9688 }
1c79356b 9689
6d2010ae
A
9690 if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)
9691 return error;
39037602 9692
1c79356b 9693
6d2010ae
A
9694 //
9695 // if the vnode is tagged VOPENEVT and the current process
9696 // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY
9697 // flag to the open mode so that this open won't count against
9698 // the vnode when carbon delete() does a vnode_isinuse() to see
9699 // if a file is currently in use. this allows spotlight
9700 // importers to not interfere with carbon apps that depend on
9701 // the no-delete-if-busy semantics of carbon delete().
9702 //
9703 if ((vp->v_flag & VOPENEVT) && (current_proc()->p_flag & P_CHECKOPENEVT)) {
9704 fmode |= O_EVTONLY;
9705 }
1c79356b 9706
6d2010ae
A
9707 if ( (error = VNOP_OPEN(vp, fmode, ctx)) ) {
9708 return error;
9709 }
9710 if ( (error = vnode_ref_ext(vp, fmode, 0)) ) {
9711 VNOP_CLOSE(vp, fmode, ctx);
9712 return error;
9713 }
1c79356b 9714
39037602 9715 /* Call out to allow 3rd party notification of open.
6d2010ae
A
9716 * Ignore result of kauth_authorize_fileop call.
9717 */
4b17d6b6
A
9718#if CONFIG_MACF
9719 mac_vnode_notify_open(ctx, vp, fmode);
9720#endif
39037602 9721 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN,
6d2010ae 9722 (uintptr_t)vp, 0);
1c79356b 9723
1c79356b 9724
6d2010ae
A
9725 return 0;
9726}
1c79356b 9727
6d2010ae 9728static int
39236c6e 9729wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type)
6d2010ae 9730{
39037602
A
9731 int i;
9732 int error = 0;
9733 int unblock = 0;
6d2010ae 9734 task_t curtask;
39037602 9735
6d2010ae
A
9736 lck_mtx_lock(&nspace_handler_exclusion_lock);
9737 if (nspace_handlers[nspace_type].handler_busy) {
9738 lck_mtx_unlock(&nspace_handler_exclusion_lock);
9739 return EBUSY;
9740 }
39037602 9741
6d2010ae
A
9742 nspace_handlers[nspace_type].handler_busy = 1;
9743 lck_mtx_unlock(&nspace_handler_exclusion_lock);
39037602
A
9744
9745 /*
6d2010ae
A
9746 * Any process that gets here will be one of the namespace handlers.
9747 * As such, they should be prevented from acquiring DMG vnodes during vnode reclamation
9748 * as we can cause deadlocks to occur, because the namespace handler may prevent
39037602 9749 * VNOP_INACTIVE from proceeding. Mark the current task as a P_DEPENDENCY_CAPABLE
6d2010ae
A
9750 * process.
9751 */
9752 curtask = current_task();
39037602
A
9753 bsd_set_dependency_capable (curtask);
9754
6d2010ae
A
9755 lck_mtx_lock(&nspace_handler_lock);
9756 if (nspace_handlers[nspace_type].handler_proc == NULL) {
9757 nspace_handlers[nspace_type].handler_tid = thread_tid(current_thread());
9758 nspace_handlers[nspace_type].handler_proc = current_proc();
9759 }
39037602
A
9760
9761 if (nspace_type == NSPACE_HANDLER_SNAPSHOT &&
9762 (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
9763 error = EINVAL;
9764 }
9765
6d2010ae 9766 while (error == 0) {
39037602
A
9767
9768 /* Try to find matching namespace item */
9769 for (i = 0; i < MAX_NSPACE_ITEMS; i++) {
6d2010ae 9770 if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
39037602
A
9771 if (nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
9772 break;
6d2010ae 9773 }
6d2010ae
A
9774 }
9775 }
39236c6e 9776
39037602
A
9777 if (i >= MAX_NSPACE_ITEMS) {
9778 /* Nothing is there yet. Wait for wake up and retry */
6d2010ae
A
9779 error = msleep((caddr_t)&nspace_item_idx, &nspace_handler_lock, PVFS|PCATCH, "namespace-items", 0);
9780 if ((nspace_type == NSPACE_HANDLER_SNAPSHOT) && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
39037602 9781 /* Prevent infinite loop if snapshot handler exited */
6d2010ae
A
9782 error = EINVAL;
9783 break;
9784 }
39037602 9785 continue;
6d2010ae 9786 }
39037602
A
9787
9788 nspace_items[i].flags &= ~NSPACE_ITEM_NEW;
9789 nspace_items[i].flags |= NSPACE_ITEM_PROCESSING;
9790 nspace_items[i].token = ++nspace_token_id;
9791
9792 assert(nspace_items[i].vp);
9793 struct fileproc *fp;
9794 int32_t indx;
9795 int32_t fmode;
9796 struct proc *p = current_proc();
9797 vfs_context_t ctx = vfs_context_current();
9798 struct vnode_attr va;
9799 bool vn_get_succsessful = false;
9800 bool vn_open_successful = false;
9801 bool fp_alloc_successful = false;
9802
9803 /*
9804 * Use vnode pointer to acquire a file descriptor for
9805 * hand-off to userland
9806 */
9807 fmode = nspace_open_flags_for_type(nspace_type);
9808 error = vnode_getwithvid(nspace_items[i].vp, nspace_items[i].vid);
9809 if (error) goto cleanup;
9810 vn_get_succsessful = true;
9811
9812 error = vn_open_with_vp(nspace_items[i].vp, fmode, ctx);
9813 if (error) goto cleanup;
9814 vn_open_successful = true;
9815
9816 error = falloc(p, &fp, &indx, ctx);
9817 if (error) goto cleanup;
9818 fp_alloc_successful = true;
9819
9820 fp->f_fglob->fg_flag = fmode;
9821 fp->f_fglob->fg_ops = &vnops;
9822 fp->f_fglob->fg_data = (caddr_t)nspace_items[i].vp;
9823
9824 proc_fdlock(p);
9825 procfdtbl_releasefd(p, indx, NULL);
9826 fp_drop(p, indx, fp, 1);
9827 proc_fdunlock(p);
9828
9829 /*
9830 * All variants of the namespace handler struct support these three fields:
9831 * token, flags, and the FD pointer
9832 */
9833 error = copyout(&nspace_items[i].token, nhd->token, sizeof(uint32_t));
9834 if (error) goto cleanup;
9835 error = copyout(&nspace_items[i].op, nhd->flags, sizeof(uint64_t));
9836 if (error) goto cleanup;
9837 error = copyout(&indx, nhd->fdptr, sizeof(uint32_t));
9838 if (error) goto cleanup;
9839
9840 /*
9841 * Handle optional fields:
9842 * extended version support an info ptr (offset, length), and the
9843 *
9844 * namedata version supports a unique per-link object ID
9845 *
9846 */
9847 if (nhd->infoptr) {
9848 uio_t uio = (uio_t)nspace_items[i].arg;
9849 uint64_t u_offset, u_length;
9850
9851 if (uio) {
9852 u_offset = uio_offset(uio);
9853 u_length = uio_resid(uio);
9854 } else {
9855 u_offset = 0;
9856 u_length = 0;
9857 }
9858 error = copyout(&u_offset, nhd->infoptr, sizeof(uint64_t));
9859 if (error) goto cleanup;
9860 error = copyout(&u_length, nhd->infoptr + sizeof(uint64_t), sizeof(uint64_t));
9861 if (error) goto cleanup;
9862 }
9863
9864 if (nhd->objid) {
9865 VATTR_INIT(&va);
9866 VATTR_WANTED(&va, va_linkid);
9867 error = vnode_getattr(nspace_items[i].vp, &va, ctx);
9868 if (error) goto cleanup;
9869
9870 uint64_t linkid = 0;
9871 if (VATTR_IS_SUPPORTED (&va, va_linkid)) {
9872 linkid = (uint64_t)va.va_linkid;
9873 }
9874 error = copyout(&linkid, nhd->objid, sizeof(uint64_t));
9875 }
9876cleanup:
9877 if (error) {
9878 if (fp_alloc_successful) fp_free(p, indx, fp);
9879 if (vn_open_successful) vn_close(nspace_items[i].vp, fmode, ctx);
9880 unblock = 1;
9881 }
9882
9883 if (vn_get_succsessful) vnode_put(nspace_items[i].vp);
9884
9885 break;
6d2010ae 9886 }
39037602 9887
6d2010ae
A
9888 if (unblock) {
9889 if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
9890 vnode_lock_spin(nspace_items[i].vp);
9891 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
9892 vnode_unlock(nspace_items[i].vp);
9893 }
9894 nspace_items[i].vp = NULL;
9895 nspace_items[i].vid = 0;
9896 nspace_items[i].flags = NSPACE_ITEM_DONE;
9897 nspace_items[i].token = 0;
39037602 9898
6d2010ae
A
9899 wakeup((caddr_t)&(nspace_items[i].vp));
9900 }
39037602 9901
6d2010ae
A
9902 if (nspace_type == NSPACE_HANDLER_SNAPSHOT) {
9903 // just go through every snapshot event and unblock it immediately.
9904 if (error && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
39037602 9905 for(i = 0; i < MAX_NSPACE_ITEMS; i++) {
6d2010ae
A
9906 if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
9907 if (nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
9908 nspace_items[i].vp = NULL;
9909 nspace_items[i].vid = 0;
9910 nspace_items[i].flags = NSPACE_ITEM_DONE;
9911 nspace_items[i].token = 0;
39037602
A
9912
9913 wakeup((caddr_t)&(nspace_items[i].vp));
6d2010ae
A
9914 }
9915 }
9916 }
9917 }
9918 }
39037602 9919
6d2010ae 9920 lck_mtx_unlock(&nspace_handler_lock);
39037602 9921
6d2010ae
A
9922 lck_mtx_lock(&nspace_handler_exclusion_lock);
9923 nspace_handlers[nspace_type].handler_busy = 0;
9924 lck_mtx_unlock(&nspace_handler_exclusion_lock);
39037602 9925
6d2010ae
A
9926 return error;
9927}
1c79356b 9928
39236c6e
A
9929static inline int validate_namespace_args (int is64bit, int size) {
9930
9931 if (is64bit) {
9932 /* Must be one of these */
9933 if (size == sizeof(user64_namespace_handler_info)) {
9934 goto sizeok;
9935 }
9936 if (size == sizeof(user64_namespace_handler_info_ext)) {
9937 goto sizeok;
9938 }
9939 if (size == sizeof(user64_namespace_handler_data)) {
9940 goto sizeok;
9941 }
9942 return EINVAL;
9943 }
9944 else {
9945 /* 32 bit -- must be one of these */
9946 if (size == sizeof(user32_namespace_handler_info)) {
9947 goto sizeok;
9948 }
9949 if (size == sizeof(user32_namespace_handler_info_ext)) {
9950 goto sizeok;
9951 }
9952 if (size == sizeof(user32_namespace_handler_data)) {
9953 goto sizeok;
9954 }
9955 return EINVAL;
9956 }
9957
9958sizeok:
9959
9960 return 0;
9961
9962}
1c79356b 9963
6d2010ae
A
9964static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data)
9965{
9966 int error = 0;
39236c6e 9967 namespace_handler_data nhd;
39037602 9968
39236c6e
A
9969 bzero (&nhd, sizeof(namespace_handler_data));
9970
6d2010ae
A
9971 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
9972 return error;
9973 }
39037602 9974
39236c6e
A
9975 error = validate_namespace_args (is64bit, size);
9976 if (error) {
9977 return error;
6d2010ae 9978 }
39037602 9979
39236c6e
A
9980 /* Copy in the userland pointers into our kernel-only struct */
9981
6d2010ae 9982 if (is64bit) {
39236c6e
A
9983 /* 64 bit userland structures */
9984 nhd.token = (user_addr_t)((user64_namespace_handler_info *)data)->token;
9985 nhd.flags = (user_addr_t)((user64_namespace_handler_info *)data)->flags;
9986 nhd.fdptr = (user_addr_t)((user64_namespace_handler_info *)data)->fdptr;
9987
9988 /* If the size is greater than the standard info struct, add in extra fields */
9989 if (size > (sizeof(user64_namespace_handler_info))) {
9990 if (size >= (sizeof(user64_namespace_handler_info_ext))) {
9991 nhd.infoptr = (user_addr_t)((user64_namespace_handler_info_ext *)data)->infoptr;
9992 }
9993 if (size == (sizeof(user64_namespace_handler_data))) {
9994 nhd.objid = (user_addr_t)((user64_namespace_handler_data*)data)->objid;
9995 }
9996 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
6d2010ae 9997 }
39037602 9998 }
39236c6e
A
9999 else {
10000 /* 32 bit userland structures */
10001 nhd.token = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->token);
10002 nhd.flags = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->flags);
10003 nhd.fdptr = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->fdptr);
39037602 10004
39236c6e
A
10005 if (size > (sizeof(user32_namespace_handler_info))) {
10006 if (size >= (sizeof(user32_namespace_handler_info_ext))) {
10007 nhd.infoptr = CAST_USER_ADDR_T(((user32_namespace_handler_info_ext *)data)->infoptr);
10008 }
10009 if (size == (sizeof(user32_namespace_handler_data))) {
10010 nhd.objid = (user_addr_t)((user32_namespace_handler_data*)data)->objid;
10011 }
10012 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
6d2010ae
A
10013 }
10014 }
39037602 10015
39236c6e 10016 return wait_for_namespace_event(&nhd, nspace_type);
6d2010ae 10017}
1c79356b 10018
5ba3f43e
A
10019static unsigned long
10020fsctl_bogus_command_compat(unsigned long cmd)
10021{
10022
10023 switch (cmd) {
10024 case IOCBASECMD(FSIOC_SYNC_VOLUME):
10025 return (FSIOC_SYNC_VOLUME);
10026 case IOCBASECMD(FSIOC_ROUTEFS_SETROUTEID):
10027 return (FSIOC_ROUTEFS_SETROUTEID);
10028 case IOCBASECMD(FSIOC_SET_PACKAGE_EXTS):
10029 return (FSIOC_SET_PACKAGE_EXTS);
10030 case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_GET):
10031 return (FSIOC_NAMESPACE_HANDLER_GET);
10032 case IOCBASECMD(FSIOC_OLD_SNAPSHOT_HANDLER_GET):
10033 return (FSIOC_OLD_SNAPSHOT_HANDLER_GET);
10034 case IOCBASECMD(FSIOC_SNAPSHOT_HANDLER_GET_EXT):
10035 return (FSIOC_SNAPSHOT_HANDLER_GET_EXT);
10036 case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_UPDATE):
10037 return (FSIOC_NAMESPACE_HANDLER_UPDATE);
10038 case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_UNBLOCK):
10039 return (FSIOC_NAMESPACE_HANDLER_UNBLOCK);
10040 case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_CANCEL):
10041 return (FSIOC_NAMESPACE_HANDLER_CANCEL);
10042 case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME):
10043 return (FSIOC_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME);
10044 case IOCBASECMD(FSIOC_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS):
10045 return (FSIOC_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS);
10046 case IOCBASECMD(FSIOC_SET_FSTYPENAME_OVERRIDE):
10047 return (FSIOC_SET_FSTYPENAME_OVERRIDE);
10048 case IOCBASECMD(DISK_CONDITIONER_IOC_GET):
10049 return (DISK_CONDITIONER_IOC_GET);
10050 case IOCBASECMD(DISK_CONDITIONER_IOC_SET):
10051 return (DISK_CONDITIONER_IOC_SET);
10052 case IOCBASECMD(FSIOC_FIOSEEKHOLE):
10053 return (FSIOC_FIOSEEKHOLE);
10054 case IOCBASECMD(FSIOC_FIOSEEKDATA):
10055 return (FSIOC_FIOSEEKDATA);
10056 case IOCBASECMD(SPOTLIGHT_IOC_GET_MOUNT_TIME):
10057 return (SPOTLIGHT_IOC_GET_MOUNT_TIME);
10058 case IOCBASECMD(SPOTLIGHT_IOC_GET_LAST_MTIME):
10059 return (SPOTLIGHT_IOC_GET_LAST_MTIME);
10060 }
10061
10062 return (cmd);
10063}
10064
1c79356b
A
10065/*
10066 * Make a filesystem-specific control call:
10067 */
1c79356b 10068/* ARGSUSED */
b0d623f7
A
10069static int
10070fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long options, vfs_context_t ctx)
1c79356b 10071{
b0d623f7 10072 int error=0;
91447636 10073 boolean_t is64bit;
2d21ac55 10074 u_int size;
1c79356b 10075#define STK_PARAMS 128
39037602 10076 char stkbuf[STK_PARAMS] = {0};
1c79356b 10077 caddr_t data, memp;
b0d623f7 10078 vnode_t vp = *arg_vp;
1c79356b 10079
5ba3f43e
A
10080 cmd = fsctl_bogus_command_compat(cmd);
10081
1c79356b
A
10082 size = IOCPARM_LEN(cmd);
10083 if (size > IOCPARM_MAX) return (EINVAL);
10084
6d2010ae 10085 is64bit = proc_is64bit(p);
91447636 10086
1c79356b 10087 memp = NULL;
04b8595b 10088
1c79356b
A
10089 if (size > sizeof (stkbuf)) {
10090 if ((memp = (caddr_t)kalloc(size)) == 0) return ENOMEM;
10091 data = memp;
10092 } else {
91447636 10093 data = &stkbuf[0];
1c79356b 10094 };
39037602 10095
1c79356b
A
10096 if (cmd & IOC_IN) {
10097 if (size) {
b0d623f7 10098 error = copyin(udata, data, size);
39037602 10099 if (error) {
fe8ab488 10100 if (memp) {
39037602 10101 kfree (memp, size);
fe8ab488
A
10102 }
10103 return error;
10104 }
1c79356b 10105 } else {
6d2010ae
A
10106 if (is64bit) {
10107 *(user_addr_t *)data = udata;
10108 }
10109 else {
10110 *(uint32_t *)data = (uint32_t)udata;
10111 }
1c79356b
A
10112 };
10113 } else if ((cmd & IOC_OUT) && size) {
10114 /*
10115 * Zero the buffer so the user always
10116 * gets back something deterministic.
10117 */
10118 bzero(data, size);
91447636 10119 } else if (cmd & IOC_VOID) {
b0d623f7 10120 if (is64bit) {
6d2010ae 10121 *(user_addr_t *)data = udata;
b0d623f7
A
10122 }
10123 else {
6d2010ae 10124 *(uint32_t *)data = (uint32_t)udata;
b0d623f7 10125 }
91447636 10126 }
1c79356b 10127
b0d623f7 10128 /* Check to see if it's a generic command */
5ba3f43e 10129 switch (cmd) {
91447636 10130
5ba3f43e 10131 case FSIOC_SYNC_VOLUME: {
fe8ab488
A
10132 mount_t mp = vp->v_mount;
10133 int arg = *(uint32_t*)data;
b0d623f7 10134
fe8ab488
A
10135 /* record vid of vp so we can drop it below. */
10136 uint32_t vvid = vp->v_id;
b0d623f7 10137
fe8ab488
A
10138 /*
10139 * Then grab mount_iterref so that we can release the vnode.
10140 * Without this, a thread may call vnode_iterate_prepare then
10141 * get into a deadlock because we've never released the root vp
10142 */
10143 error = mount_iterref (mp, 0);
10144 if (error) {
10145 break;
10146 }
10147 vnode_put(vp);
10148
10149 /* issue the sync for this volume */
10150 (void)sync_callback(mp, (arg & FSCTL_SYNC_WAIT) ? &arg : NULL);
10151
39037602 10152 /*
fe8ab488
A
10153 * Then release the mount_iterref once we're done syncing; it's not
10154 * needed for the VNOP_IOCTL below
10155 */
10156 mount_iterdrop(mp);
10157
10158 if (arg & FSCTL_SYNC_FULLSYNC) {
10159 /* re-obtain vnode iocount on the root vp, if possible */
10160 error = vnode_getwithvid (vp, vvid);
10161 if (error == 0) {
10162 error = VNOP_IOCTL(vp, F_FULLFSYNC, (caddr_t)NULL, 0, ctx);
10163 vnode_put (vp);
10164 }
b0d623f7 10165 }
fe8ab488
A
10166 /* mark the argument VP as having been released */
10167 *arg_vp = NULL;
b0d623f7 10168 }
fe8ab488 10169 break;
b0d623f7 10170
5ba3f43e 10171 case FSIOC_ROUTEFS_SETROUTEID: {
490019cf
A
10172#if ROUTEFS
10173 char routepath[MAXPATHLEN];
10174 size_t len = 0;
39037602 10175
490019cf
A
10176 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
10177 break;
10178 }
10179 bzero(routepath, MAXPATHLEN);
10180 error = copyinstr(udata, &routepath[0], MAXPATHLEN, &len);
10181 if (error) {
10182 break;
10183 }
10184 error = routefs_kernel_mount(routepath);
10185 if (error) {
10186 break;
10187 }
10188#endif
10189 }
10190 break;
10191
5ba3f43e 10192 case FSIOC_SET_PACKAGE_EXTS: {
fe8ab488
A
10193 user_addr_t ext_strings;
10194 uint32_t num_entries;
10195 uint32_t max_width;
b0d623f7 10196
39037602
A
10197 if ((error = priv_check_cred(kauth_cred_get(), PRIV_PACKAGE_EXTENSIONS, 0)))
10198 break;
10199
fe8ab488
A
10200 if ( (is64bit && size != sizeof(user64_package_ext_info))
10201 || (is64bit == 0 && size != sizeof(user32_package_ext_info))) {
10202
10203 // either you're 64-bit and passed a 64-bit struct or
10204 // you're 32-bit and passed a 32-bit struct. otherwise
10205 // it's not ok.
10206 error = EINVAL;
10207 break;
10208 }
10209
10210 if (is64bit) {
10211 ext_strings = ((user64_package_ext_info *)data)->strings;
10212 num_entries = ((user64_package_ext_info *)data)->num_entries;
10213 max_width = ((user64_package_ext_info *)data)->max_width;
10214 } else {
10215 ext_strings = CAST_USER_ADDR_T(((user32_package_ext_info *)data)->strings);
10216 num_entries = ((user32_package_ext_info *)data)->num_entries;
10217 max_width = ((user32_package_ext_info *)data)->max_width;
10218 }
10219 error = set_package_extensions_table(ext_strings, num_entries, max_width);
6d2010ae 10220 }
fe8ab488 10221 break;
2d21ac55 10222
39037602 10223 /* namespace handlers */
5ba3f43e 10224 case FSIOC_NAMESPACE_HANDLER_GET: {
fe8ab488 10225 error = process_namespace_fsctl(NSPACE_HANDLER_NSPACE, is64bit, size, data);
6d2010ae 10226 }
fe8ab488 10227 break;
b0d623f7 10228
fe8ab488 10229 /* Snapshot handlers */
5ba3f43e 10230 case FSIOC_OLD_SNAPSHOT_HANDLER_GET: {
fe8ab488 10231 error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
39037602 10232 }
fe8ab488 10233 break;
39236c6e 10234
5ba3f43e 10235 case FSIOC_SNAPSHOT_HANDLER_GET_EXT: {
fe8ab488
A
10236 error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
10237 }
39037602 10238 break;
39236c6e 10239
5ba3f43e 10240 case FSIOC_NAMESPACE_HANDLER_UPDATE: {
fe8ab488
A
10241 uint32_t token, val;
10242 int i;
39236c6e 10243
fe8ab488
A
10244 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
10245 break;
10246 }
39236c6e 10247
fe8ab488
A
10248 if (!nspace_is_special_process(p)) {
10249 error = EINVAL;
10250 break;
10251 }
6d2010ae 10252
fe8ab488
A
10253 token = ((uint32_t *)data)[0];
10254 val = ((uint32_t *)data)[1];
6d2010ae 10255
fe8ab488 10256 lck_mtx_lock(&nspace_handler_lock);
6d2010ae 10257
fe8ab488
A
10258 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
10259 if (nspace_items[i].token == token) {
10260 break; /* exit for loop, not case stmt */
10261 }
10262 }
6d2010ae 10263
fe8ab488
A
10264 if (i >= MAX_NSPACE_ITEMS) {
10265 error = ENOENT;
10266 } else {
10267 //
10268 // if this bit is set, when resolve_nspace_item() times out
10269 // it will loop and go back to sleep.
10270 //
10271 nspace_items[i].flags |= NSPACE_ITEM_RESET_TIMER;
10272 }
6d2010ae 10273
fe8ab488
A
10274 lck_mtx_unlock(&nspace_handler_lock);
10275
10276 if (error) {
10277 printf("nspace-handler-update: did not find token %u\n", token);
10278 }
39037602 10279 }
fe8ab488 10280 break;
39037602 10281
5ba3f43e 10282 case FSIOC_NAMESPACE_HANDLER_UNBLOCK: {
fe8ab488
A
10283 uint32_t token, val;
10284 int i;
10285
10286 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
6d2010ae
A
10287 break;
10288 }
6d2010ae 10289
fe8ab488
A
10290 if (!nspace_is_special_process(p)) {
10291 error = EINVAL;
10292 break;
10293 }
6d2010ae 10294
fe8ab488
A
10295 token = ((uint32_t *)data)[0];
10296 val = ((uint32_t *)data)[1];
6d2010ae 10297
fe8ab488 10298 lck_mtx_lock(&nspace_handler_lock);
6d2010ae 10299
fe8ab488
A
10300 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
10301 if (nspace_items[i].token == token) {
10302 break; /* exit for loop, not case statement */
10303 }
10304 }
6d2010ae 10305
fe8ab488
A
10306 if (i >= MAX_NSPACE_ITEMS) {
10307 printf("nspace-handler-unblock: did not find token %u\n", token);
10308 error = ENOENT;
10309 } else {
10310 if (val == 0 && nspace_items[i].vp) {
10311 vnode_lock_spin(nspace_items[i].vp);
10312 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
10313 vnode_unlock(nspace_items[i].vp);
10314 }
6d2010ae 10315
fe8ab488
A
10316 nspace_items[i].vp = NULL;
10317 nspace_items[i].arg = NULL;
10318 nspace_items[i].op = 0;
10319 nspace_items[i].vid = 0;
10320 nspace_items[i].flags = NSPACE_ITEM_DONE;
10321 nspace_items[i].token = 0;
6d2010ae 10322
fe8ab488
A
10323 wakeup((caddr_t)&(nspace_items[i].vp));
10324 }
10325
10326 lck_mtx_unlock(&nspace_handler_lock);
39037602 10327 }
fe8ab488 10328 break;
6d2010ae 10329
5ba3f43e 10330 case FSIOC_NAMESPACE_HANDLER_CANCEL: {
fe8ab488
A
10331 uint32_t token, val;
10332 int i;
6d2010ae 10333
fe8ab488 10334 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
6d2010ae
A
10335 break;
10336 }
6d2010ae 10337
fe8ab488
A
10338 if (!nspace_is_special_process(p)) {
10339 error = EINVAL;
10340 break;
6d2010ae
A
10341 }
10342
fe8ab488
A
10343 token = ((uint32_t *)data)[0];
10344 val = ((uint32_t *)data)[1];
6d2010ae 10345
fe8ab488 10346 lck_mtx_lock(&nspace_handler_lock);
6d2010ae 10347
fe8ab488
A
10348 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
10349 if (nspace_items[i].token == token) {
10350 break; /* exit for loop, not case stmt */
10351 }
10352 }
6d2010ae 10353
fe8ab488
A
10354 if (i >= MAX_NSPACE_ITEMS) {
10355 printf("nspace-handler-cancel: did not find token %u\n", token);
10356 error = ENOENT;
10357 } else {
10358 if (nspace_items[i].vp) {
10359 vnode_lock_spin(nspace_items[i].vp);
10360 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
10361 vnode_unlock(nspace_items[i].vp);
10362 }
6d2010ae 10363
39037602
A
10364 nspace_items[i].vp = NULL;
10365 nspace_items[i].arg = NULL;
fe8ab488
A
10366 nspace_items[i].vid = 0;
10367 nspace_items[i].token = val;
10368 nspace_items[i].flags &= ~NSPACE_ITEM_PROCESSING;
39037602 10369 nspace_items[i].flags |= NSPACE_ITEM_CANCELLED;
6d2010ae 10370
fe8ab488
A
10371 wakeup((caddr_t)&(nspace_items[i].vp));
10372 }
6d2010ae 10373
fe8ab488 10374 lck_mtx_unlock(&nspace_handler_lock);
39037602 10375 }
fe8ab488 10376 break;
6d2010ae 10377
5ba3f43e 10378 case FSIOC_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME: {
fe8ab488 10379 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
6d2010ae
A
10380 break;
10381 }
6d2010ae 10382
fe8ab488 10383 // we explicitly do not do the namespace_handler_proc check here
6d2010ae 10384
fe8ab488
A
10385 lck_mtx_lock(&nspace_handler_lock);
10386 snapshot_timestamp = ((uint32_t *)data)[0];
10387 wakeup(&nspace_item_idx);
10388 lck_mtx_unlock(&nspace_handler_lock);
10389 printf("nspace-handler-set-snapshot-time: %d\n", (int)snapshot_timestamp);
6d2010ae 10390
39037602 10391 }
fe8ab488 10392 break;
6d2010ae 10393
5ba3f43e 10394 case FSIOC_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS:
fe8ab488
A
10395 {
10396 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
10397 break;
10398 }
6d2010ae 10399
fe8ab488
A
10400 lck_mtx_lock(&nspace_handler_lock);
10401 nspace_allow_virtual_devs = ((uint32_t *)data)[0];
10402 lck_mtx_unlock(&nspace_handler_lock);
10403 printf("nspace-snapshot-handler will%s allow events on disk-images\n",
10404 nspace_allow_virtual_devs ? "" : " NOT");
10405 error = 0;
6d2010ae 10406
6d2010ae 10407 }
fe8ab488 10408 break;
6d2010ae 10409
5ba3f43e 10410 case FSIOC_SET_FSTYPENAME_OVERRIDE:
39037602 10411 {
fe8ab488
A
10412 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
10413 break;
10414 }
10415 if (vp->v_mount) {
10416 mount_lock(vp->v_mount);
10417 if (data[0] != 0) {
10418 strlcpy(&vp->v_mount->fstypename_override[0], data, MFSTYPENAMELEN);
10419 vp->v_mount->mnt_kern_flag |= MNTK_TYPENAME_OVERRIDE;
10420 if (vfs_isrdonly(vp->v_mount) && strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
10421 vp->v_mount->mnt_kern_flag |= MNTK_EXTENDED_SECURITY;
10422 vp->v_mount->mnt_kern_flag &= ~MNTK_AUTH_OPAQUE;
10423 }
10424 } else {
10425 if (strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
10426 vp->v_mount->mnt_kern_flag &= ~MNTK_EXTENDED_SECURITY;
10427 }
10428 vp->v_mount->mnt_kern_flag &= ~MNTK_TYPENAME_OVERRIDE;
10429 vp->v_mount->fstypename_override[0] = '\0';
6d2010ae 10430 }
fe8ab488 10431 mount_unlock(vp->v_mount);
6d2010ae 10432 }
6d2010ae 10433 }
fe8ab488 10434 break;
39037602 10435
5ba3f43e
A
10436 case DISK_CONDITIONER_IOC_GET: {
10437 error = disk_conditioner_get_info(vp->v_mount, (disk_conditioner_info *)data);
10438 }
10439 break;
10440
10441 case DISK_CONDITIONER_IOC_SET: {
10442 error = disk_conditioner_set_info(vp->v_mount, (disk_conditioner_info *)data);
10443 }
10444 break;
10445
fe8ab488
A
10446 default: {
10447 /* Invoke the filesystem-specific code */
5ba3f43e 10448 error = VNOP_IOCTL(vp, cmd, data, options, ctx);
fe8ab488
A
10449 }
10450
10451 } /* end switch stmt */
10452
1c79356b 10453 /*
fe8ab488 10454 * if no errors, copy any data to user. Size was
1c79356b
A
10455 * already set and checked above.
10456 */
39037602 10457 if (error == 0 && (cmd & IOC_OUT) && size)
b0d623f7 10458 error = copyout(data, udata, size);
39037602 10459
fe8ab488
A
10460 if (memp) {
10461 kfree(memp, size);
10462 }
39037602 10463
1c79356b
A
10464 return error;
10465}
b0d623f7
A
10466
10467/* ARGSUSED */
10468int
10469fsctl (proc_t p, struct fsctl_args *uap, __unused int32_t *retval)
10470{
10471 int error;
39037602 10472 struct nameidata nd;
b0d623f7
A
10473 u_long nameiflags;
10474 vnode_t vp = NULL;
10475 vfs_context_t ctx = vfs_context_current();
10476
10477 AUDIT_ARG(cmd, uap->cmd);
10478 AUDIT_ARG(value32, uap->options);
10479 /* Get the vnode for the file we are getting info on: */
10480 nameiflags = 0;
10481 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
6d2010ae
A
10482 NDINIT(&nd, LOOKUP, OP_FSCTL, nameiflags | AUDITVNPATH1,
10483 UIO_USERSPACE, uap->path, ctx);
b0d623f7
A
10484 if ((error = namei(&nd))) goto done;
10485 vp = nd.ni_vp;
10486 nameidone(&nd);
10487
10488#if CONFIG_MACF
10489 error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd);
10490 if (error) {
10491 goto done;
10492 }
10493#endif
10494
10495 error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
10496
10497done:
10498 if (vp)
10499 vnode_put(vp);
10500 return error;
10501}
10502/* ARGSUSED */
10503int
10504ffsctl (proc_t p, struct ffsctl_args *uap, __unused int32_t *retval)
10505{
10506 int error;
10507 vnode_t vp = NULL;
10508 vfs_context_t ctx = vfs_context_current();
10509 int fd = -1;
10510
10511 AUDIT_ARG(fd, uap->fd);
10512 AUDIT_ARG(cmd, uap->cmd);
10513 AUDIT_ARG(value32, uap->options);
39037602 10514
b0d623f7
A
10515 /* Get the vnode for the file we are getting info on: */
10516 if ((error = file_vnode(uap->fd, &vp)))
3e170ce0 10517 return error;
b0d623f7
A
10518 fd = uap->fd;
10519 if ((error = vnode_getwithref(vp))) {
3e170ce0
A
10520 file_drop(fd);
10521 return error;
b0d623f7
A
10522 }
10523
10524#if CONFIG_MACF
3e170ce0
A
10525 if ((error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd))) {
10526 file_drop(fd);
10527 vnode_put(vp);
10528 return error;
b0d623f7
A
10529 }
10530#endif
10531
10532 error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
10533
3e170ce0 10534 file_drop(fd);
b0d623f7 10535
3e170ce0
A
10536 /*validate vp; fsctl_internal() can drop iocount and reset vp to NULL*/
10537 if (vp) {
b0d623f7 10538 vnode_put(vp);
3e170ce0
A
10539 }
10540
b0d623f7
A
10541 return error;
10542}
1c79356b 10543/* end of fsctl system call */
0b4e3aa0 10544
91447636
A
10545/*
10546 * Retrieve the data of an extended attribute.
10547 */
10548int
2d21ac55 10549getxattr(proc_t p, struct getxattr_args *uap, user_ssize_t *retval)
91447636 10550{
2d21ac55 10551 vnode_t vp;
91447636
A
10552 struct nameidata nd;
10553 char attrname[XATTR_MAXNAMELEN+1];
2d21ac55 10554 vfs_context_t ctx = vfs_context_current();
91447636
A
10555 uio_t auio = NULL;
10556 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10557 size_t attrsize = 0;
10558 size_t namelen;
b0d623f7 10559 u_int32_t nameiflags;
91447636
A
10560 int error;
10561 char uio_buf[ UIO_SIZEOF(1) ];
55e303ae 10562
2d21ac55 10563 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636 10564 return (EINVAL);
55e303ae 10565
91447636 10566 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
6d2010ae 10567 NDINIT(&nd, LOOKUP, OP_GETXATTR, nameiflags, spacetype, uap->path, ctx);
91447636
A
10568 if ((error = namei(&nd))) {
10569 return (error);
10570 }
10571 vp = nd.ni_vp;
10572 nameidone(&nd);
55e303ae 10573
91447636
A
10574 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
10575 goto out;
10576 }
10577 if (xattr_protected(attrname)) {
6d2010ae
A
10578 if (!vfs_context_issuser(ctx) || strcmp(attrname, "com.apple.system.Security") != 0) {
10579 error = EPERM;
10580 goto out;
10581 }
91447636 10582 }
b0d623f7
A
10583 /*
10584 * the specific check for 0xffffffff is a hack to preserve
10585 * binaray compatibilty in K64 with applications that discovered
39037602 10586 * that passing in a buf pointer and a size of -1 resulted in
b0d623f7
A
10587 * just the size of the indicated extended attribute being returned.
10588 * this isn't part of the documented behavior, but because of the
10589 * original implemtation's check for "uap->size > 0", this behavior
10590 * was allowed. In K32 that check turned into a signed comparison
10591 * even though uap->size is unsigned... in K64, we blow by that
10592 * check because uap->size is unsigned and doesn't get sign smeared
39037602 10593 * in the munger for a 32 bit user app. we also need to add a
b0d623f7
A
10594 * check to limit the maximum size of the buffer being passed in...
10595 * unfortunately, the underlying fileystems seem to just malloc
10596 * the requested size even if the actual extended attribute is tiny.
10597 * because that malloc is for kernel wired memory, we have to put a
10598 * sane limit on it.
10599 *
10600 * U32 running on K64 will yield 0x00000000ffffffff for uap->size
10601 * U64 running on K64 will yield -1 (64 bits wide)
10602 * U32/U64 running on K32 will yield -1 (32 bits wide)
10603 */
10604 if (uap->size == 0xffffffff || uap->size == (size_t)-1)
10605 goto no_uio;
10606
b0d623f7 10607 if (uap->value) {
6d2010ae
A
10608 if (uap->size > (size_t)XATTR_MAXSIZE)
10609 uap->size = XATTR_MAXSIZE;
39037602 10610
91447636
A
10611 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
10612 &uio_buf[0], sizeof(uio_buf));
10613 uio_addiov(auio, uap->value, uap->size);
10614 }
b0d623f7 10615no_uio:
2d21ac55 10616 error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, ctx);
91447636
A
10617out:
10618 vnode_put(vp);
55e303ae 10619
91447636
A
10620 if (auio) {
10621 *retval = uap->size - uio_resid(auio);
10622 } else {
10623 *retval = (user_ssize_t)attrsize;
55e303ae
A
10624 }
10625
91447636
A
10626 return (error);
10627}
55e303ae 10628
91447636
A
10629/*
10630 * Retrieve the data of an extended attribute.
10631 */
10632int
2d21ac55 10633fgetxattr(proc_t p, struct fgetxattr_args *uap, user_ssize_t *retval)
91447636 10634{
2d21ac55 10635 vnode_t vp;
91447636 10636 char attrname[XATTR_MAXNAMELEN+1];
91447636
A
10637 uio_t auio = NULL;
10638 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10639 size_t attrsize = 0;
10640 size_t namelen;
10641 int error;
10642 char uio_buf[ UIO_SIZEOF(1) ];
55e303ae 10643
2d21ac55 10644 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636 10645 return (EINVAL);
55e303ae 10646
91447636
A
10647 if ( (error = file_vnode(uap->fd, &vp)) ) {
10648 return (error);
10649 }
10650 if ( (error = vnode_getwithref(vp)) ) {
10651 file_drop(uap->fd);
10652 return(error);
10653 }
10654 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
10655 goto out;
10656 }
10657 if (xattr_protected(attrname)) {
10658 error = EPERM;
10659 goto out;
10660 }
10661 if (uap->value && uap->size > 0) {
10662 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
10663 &uio_buf[0], sizeof(uio_buf));
10664 uio_addiov(auio, uap->value, uap->size);
10665 }
55e303ae 10666
2d21ac55 10667 error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, vfs_context_current());
91447636
A
10668out:
10669 (void)vnode_put(vp);
10670 file_drop(uap->fd);
55e303ae 10671
91447636
A
10672 if (auio) {
10673 *retval = uap->size - uio_resid(auio);
10674 } else {
10675 *retval = (user_ssize_t)attrsize;
10676 }
10677 return (error);
10678}
55e303ae 10679
91447636
A
10680/*
10681 * Set the data of an extended attribute.
10682 */
55e303ae 10683int
2d21ac55 10684setxattr(proc_t p, struct setxattr_args *uap, int *retval)
55e303ae 10685{
2d21ac55 10686 vnode_t vp;
91447636
A
10687 struct nameidata nd;
10688 char attrname[XATTR_MAXNAMELEN+1];
2d21ac55 10689 vfs_context_t ctx = vfs_context_current();
91447636
A
10690 uio_t auio = NULL;
10691 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10692 size_t namelen;
b0d623f7 10693 u_int32_t nameiflags;
91447636
A
10694 int error;
10695 char uio_buf[ UIO_SIZEOF(1) ];
55e303ae 10696
2d21ac55 10697 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636 10698 return (EINVAL);
55e303ae 10699
91447636 10700 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
6d2010ae
A
10701 if (error == EPERM) {
10702 /* if the string won't fit in attrname, copyinstr emits EPERM */
10703 return (ENAMETOOLONG);
10704 }
10705 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
10706 return error;
91447636
A
10707 }
10708 if (xattr_protected(attrname))
10709 return(EPERM);
2d21ac55 10710 if (uap->size != 0 && uap->value == 0) {
91447636 10711 return (EINVAL);
55e303ae 10712 }
55e303ae 10713
91447636 10714 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
6d2010ae 10715 NDINIT(&nd, LOOKUP, OP_SETXATTR, nameiflags, spacetype, uap->path, ctx);
91447636
A
10716 if ((error = namei(&nd))) {
10717 return (error);
10718 }
10719 vp = nd.ni_vp;
10720 nameidone(&nd);
55e303ae 10721
91447636
A
10722 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
10723 &uio_buf[0], sizeof(uio_buf));
10724 uio_addiov(auio, uap->value, uap->size);
55e303ae 10725
2d21ac55
A
10726 error = vn_setxattr(vp, attrname, auio, uap->options, ctx);
10727#if CONFIG_FSE
10728 if (error == 0) {
10729 add_fsevent(FSE_XATTR_MODIFIED, ctx,
10730 FSE_ARG_VNODE, vp,
10731 FSE_ARG_DONE);
10732 }
10733#endif
91447636
A
10734 vnode_put(vp);
10735 *retval = 0;
10736 return (error);
10737}
55e303ae 10738
91447636
A
10739/*
10740 * Set the data of an extended attribute.
10741 */
10742int
2d21ac55 10743fsetxattr(proc_t p, struct fsetxattr_args *uap, int *retval)
91447636 10744{
2d21ac55 10745 vnode_t vp;
91447636 10746 char attrname[XATTR_MAXNAMELEN+1];
91447636
A
10747 uio_t auio = NULL;
10748 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10749 size_t namelen;
10750 int error;
10751 char uio_buf[ UIO_SIZEOF(1) ];
6d2010ae 10752#if CONFIG_FSE
2d21ac55 10753 vfs_context_t ctx = vfs_context_current();
6d2010ae 10754#endif
55e303ae 10755
2d21ac55 10756 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636 10757 return (EINVAL);
55e303ae 10758
91447636 10759 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
3e170ce0
A
10760 if (error == EPERM) {
10761 /* if the string won't fit in attrname, copyinstr emits EPERM */
10762 return (ENAMETOOLONG);
10763 }
10764 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
10765 return error;
55e303ae 10766 }
91447636
A
10767 if (xattr_protected(attrname))
10768 return(EPERM);
2d21ac55 10769 if (uap->size != 0 && uap->value == 0) {
91447636 10770 return (EINVAL);
55e303ae 10771 }
91447636
A
10772 if ( (error = file_vnode(uap->fd, &vp)) ) {
10773 return (error);
55e303ae 10774 }
91447636
A
10775 if ( (error = vnode_getwithref(vp)) ) {
10776 file_drop(uap->fd);
10777 return(error);
10778 }
10779 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
10780 &uio_buf[0], sizeof(uio_buf));
10781 uio_addiov(auio, uap->value, uap->size);
91447636 10782
2d21ac55
A
10783 error = vn_setxattr(vp, attrname, auio, uap->options, vfs_context_current());
10784#if CONFIG_FSE
10785 if (error == 0) {
10786 add_fsevent(FSE_XATTR_MODIFIED, ctx,
10787 FSE_ARG_VNODE, vp,
10788 FSE_ARG_DONE);
10789 }
10790#endif
91447636
A
10791 vnode_put(vp);
10792 file_drop(uap->fd);
10793 *retval = 0;
10794 return (error);
10795}
55e303ae 10796
91447636
A
10797/*
10798 * Remove an extended attribute.
b0d623f7 10799 * XXX Code duplication here.
91447636 10800 */
91447636 10801int
2d21ac55 10802removexattr(proc_t p, struct removexattr_args *uap, int *retval)
91447636 10803{
2d21ac55 10804 vnode_t vp;
91447636
A
10805 struct nameidata nd;
10806 char attrname[XATTR_MAXNAMELEN+1];
10807 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
2d21ac55 10808 vfs_context_t ctx = vfs_context_current();
91447636 10809 size_t namelen;
b0d623f7 10810 u_int32_t nameiflags;
91447636 10811 int error;
55e303ae 10812
2d21ac55 10813 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636 10814 return (EINVAL);
55e303ae 10815
91447636
A
10816 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
10817 if (error != 0) {
10818 return (error);
10819 }
10820 if (xattr_protected(attrname))
10821 return(EPERM);
10822 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
6d2010ae 10823 NDINIT(&nd, LOOKUP, OP_REMOVEXATTR, nameiflags, spacetype, uap->path, ctx);
91447636
A
10824 if ((error = namei(&nd))) {
10825 return (error);
10826 }
10827 vp = nd.ni_vp;
10828 nameidone(&nd);
55e303ae 10829
2d21ac55
A
10830 error = vn_removexattr(vp, attrname, uap->options, ctx);
10831#if CONFIG_FSE
10832 if (error == 0) {
10833 add_fsevent(FSE_XATTR_REMOVED, ctx,
10834 FSE_ARG_VNODE, vp,
10835 FSE_ARG_DONE);
10836 }
10837#endif
91447636
A
10838 vnode_put(vp);
10839 *retval = 0;
10840 return (error);
55e303ae
A
10841}
10842
91447636
A
10843/*
10844 * Remove an extended attribute.
b0d623f7 10845 * XXX Code duplication here.
91447636 10846 */
91447636 10847int
2d21ac55 10848fremovexattr(__unused proc_t p, struct fremovexattr_args *uap, int *retval)
55e303ae 10849{
2d21ac55 10850 vnode_t vp;
91447636 10851 char attrname[XATTR_MAXNAMELEN+1];
91447636
A
10852 size_t namelen;
10853 int error;
6d2010ae 10854#if CONFIG_FSE
2d21ac55 10855 vfs_context_t ctx = vfs_context_current();
6d2010ae 10856#endif
55e303ae 10857
2d21ac55 10858 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636
A
10859 return (EINVAL);
10860
10861 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
10862 if (error != 0) {
10863 return (error);
10864 }
10865 if (xattr_protected(attrname))
10866 return(EPERM);
10867 if ( (error = file_vnode(uap->fd, &vp)) ) {
10868 return (error);
10869 }
10870 if ( (error = vnode_getwithref(vp)) ) {
10871 file_drop(uap->fd);
10872 return(error);
10873 }
4a249263 10874
2d21ac55
A
10875 error = vn_removexattr(vp, attrname, uap->options, vfs_context_current());
10876#if CONFIG_FSE
10877 if (error == 0) {
10878 add_fsevent(FSE_XATTR_REMOVED, ctx,
10879 FSE_ARG_VNODE, vp,
10880 FSE_ARG_DONE);
10881 }
10882#endif
91447636
A
10883 vnode_put(vp);
10884 file_drop(uap->fd);
10885 *retval = 0;
10886 return (error);
55e303ae
A
10887}
10888
91447636
A
10889/*
10890 * Retrieve the list of extended attribute names.
b0d623f7 10891 * XXX Code duplication here.
91447636 10892 */
91447636 10893int
2d21ac55 10894listxattr(proc_t p, struct listxattr_args *uap, user_ssize_t *retval)
55e303ae 10895{
2d21ac55 10896 vnode_t vp;
91447636 10897 struct nameidata nd;
2d21ac55 10898 vfs_context_t ctx = vfs_context_current();
91447636
A
10899 uio_t auio = NULL;
10900 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10901 size_t attrsize = 0;
b0d623f7 10902 u_int32_t nameiflags;
91447636
A
10903 int error;
10904 char uio_buf[ UIO_SIZEOF(1) ];
4a249263 10905
2d21ac55 10906 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636 10907 return (EINVAL);
55e303ae 10908
fe8ab488 10909 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
6d2010ae 10910 NDINIT(&nd, LOOKUP, OP_LISTXATTR, nameiflags, spacetype, uap->path, ctx);
91447636
A
10911 if ((error = namei(&nd))) {
10912 return (error);
10913 }
10914 vp = nd.ni_vp;
10915 nameidone(&nd);
10916 if (uap->namebuf != 0 && uap->bufsize > 0) {
6d2010ae
A
10917 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
10918 &uio_buf[0], sizeof(uio_buf));
91447636
A
10919 uio_addiov(auio, uap->namebuf, uap->bufsize);
10920 }
55e303ae 10921
2d21ac55 10922 error = vn_listxattr(vp, auio, &attrsize, uap->options, ctx);
55e303ae 10923
91447636
A
10924 vnode_put(vp);
10925 if (auio) {
10926 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
10927 } else {
10928 *retval = (user_ssize_t)attrsize;
10929 }
10930 return (error);
55e303ae
A
10931}
10932
91447636
A
10933/*
10934 * Retrieve the list of extended attribute names.
b0d623f7 10935 * XXX Code duplication here.
91447636 10936 */
55e303ae 10937int
2d21ac55 10938flistxattr(proc_t p, struct flistxattr_args *uap, user_ssize_t *retval)
55e303ae 10939{
2d21ac55 10940 vnode_t vp;
91447636
A
10941 uio_t auio = NULL;
10942 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10943 size_t attrsize = 0;
10944 int error;
10945 char uio_buf[ UIO_SIZEOF(1) ];
10946
2d21ac55 10947 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636
A
10948 return (EINVAL);
10949
10950 if ( (error = file_vnode(uap->fd, &vp)) ) {
10951 return (error);
10952 }
10953 if ( (error = vnode_getwithref(vp)) ) {
10954 file_drop(uap->fd);
10955 return(error);
10956 }
10957 if (uap->namebuf != 0 && uap->bufsize > 0) {
39037602 10958 auio = uio_createwithbuffer(1, 0, spacetype,
91447636
A
10959 UIO_READ, &uio_buf[0], sizeof(uio_buf));
10960 uio_addiov(auio, uap->namebuf, uap->bufsize);
10961 }
91447636 10962
2d21ac55 10963 error = vn_listxattr(vp, auio, &attrsize, uap->options, vfs_context_current());
55e303ae 10964
91447636
A
10965 vnode_put(vp);
10966 file_drop(uap->fd);
10967 if (auio) {
10968 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
10969 } else {
10970 *retval = (user_ssize_t)attrsize;
10971 }
10972 return (error);
55e303ae 10973}
4a249263 10974
fe8ab488
A
10975static int fsgetpath_internal(
10976 vfs_context_t ctx, int volfs_id, uint64_t objid,
10977 vm_size_t bufsize, caddr_t buf, int *pathlen)
b0d623f7 10978{
fe8ab488 10979 int error;
b0d623f7 10980 struct mount *mp = NULL;
fe8ab488 10981 vnode_t vp;
b0d623f7 10982 int length;
fe8ab488 10983 int bpflags;
813fb2f6
A
10984 /* maximum number of times to retry build_path */
10985 unsigned int retries = 0x10;
b0d623f7 10986
fe8ab488 10987 if (bufsize > PAGE_SIZE) {
b0d623f7 10988 return (EINVAL);
fe8ab488
A
10989 }
10990
10991 if (buf == NULL) {
b0d623f7
A
10992 return (ENOMEM);
10993 }
fe8ab488 10994
813fb2f6 10995retry:
fe8ab488 10996 if ((mp = mount_lookupby_volfsid(volfs_id, 1)) == NULL) {
b0d623f7 10997 error = ENOTSUP; /* unexpected failure */
fe8ab488 10998 return ENOTSUP;
b0d623f7 10999 }
fe8ab488 11000
39236c6e 11001unionget:
fe8ab488 11002 if (objid == 2) {
b0d623f7
A
11003 error = VFS_ROOT(mp, &vp, ctx);
11004 } else {
fe8ab488 11005 error = VFS_VGET(mp, (ino64_t)objid, &vp, ctx);
b0d623f7 11006 }
39236c6e
A
11007
11008 if (error == ENOENT && (mp->mnt_flag & MNT_UNION)) {
11009 /*
11010 * If the fileid isn't found and we're in a union
11011 * mount volume, then see if the fileid is in the
11012 * mounted-on volume.
11013 */
11014 struct mount *tmp = mp;
11015 mp = vnode_mount(tmp->mnt_vnodecovered);
11016 vfs_unbusy(tmp);
11017 if (vfs_busy(mp, LK_NOWAIT) == 0)
11018 goto unionget;
fe8ab488 11019 } else {
39236c6e 11020 vfs_unbusy(mp);
fe8ab488 11021 }
39236c6e 11022
b0d623f7 11023 if (error) {
fe8ab488 11024 return error;
b0d623f7 11025 }
fe8ab488 11026
6d2010ae
A
11027#if CONFIG_MACF
11028 error = mac_vnode_check_fsgetpath(ctx, vp);
11029 if (error) {
11030 vnode_put(vp);
fe8ab488 11031 return error;
6d2010ae
A
11032 }
11033#endif
fe8ab488 11034
b0d623f7
A
11035 /* Obtain the absolute path to this vnode. */
11036 bpflags = vfs_context_suser(ctx) ? BUILDPATH_CHECKACCESS : 0;
316670eb 11037 bpflags |= BUILDPATH_CHECK_MOVED;
fe8ab488 11038 error = build_path(vp, buf, bufsize, &length, bpflags, ctx);
b0d623f7 11039 vnode_put(vp);
fe8ab488 11040
b0d623f7 11041 if (error) {
813fb2f6
A
11042 /* there was a race building the path, try a few more times */
11043 if (error == EAGAIN) {
11044 --retries;
11045 if (retries > 0)
11046 goto retry;
11047
11048 error = ENOENT;
11049 }
b0d623f7
A
11050 goto out;
11051 }
fe8ab488
A
11052
11053 AUDIT_ARG(text, buf);
39236c6e
A
11054
11055 if (kdebug_enable) {
11056 long dbg_parms[NUMPARMS];
11057 int dbg_namelen;
11058
11059 dbg_namelen = (int)sizeof(dbg_parms);
11060
fe8ab488
A
11061 if (length < dbg_namelen) {
11062 memcpy((char *)dbg_parms, buf, length);
39236c6e
A
11063 memset((char *)dbg_parms + length, 0, dbg_namelen - length);
11064
11065 dbg_namelen = length;
fe8ab488
A
11066 } else {
11067 memcpy((char *)dbg_parms, buf + (length - dbg_namelen), dbg_namelen);
11068 }
39236c6e
A
11069
11070 kdebug_lookup_gen_events(dbg_parms, dbg_namelen, (void *)vp, TRUE);
11071 }
fe8ab488
A
11072
11073 *pathlen = (user_ssize_t)length; /* may be superseded by error */
11074
11075out:
11076 return (error);
11077}
11078
11079/*
11080 * Obtain the full pathname of a file system object by id.
fe8ab488 11081 */
fe8ab488
A
11082int
11083fsgetpath(__unused proc_t p, struct fsgetpath_args *uap, user_ssize_t *retval)
11084{
11085 vfs_context_t ctx = vfs_context_current();
11086 fsid_t fsid;
11087 char *realpath;
11088 int length;
11089 int error;
11090
11091 if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
11092 return (error);
11093 }
11094 AUDIT_ARG(value32, fsid.val[0]);
11095 AUDIT_ARG(value64, uap->objid);
11096 /* Restrict output buffer size for now. */
39037602 11097
fe8ab488
A
11098 if (uap->bufsize > PAGE_SIZE) {
11099 return (EINVAL);
39037602 11100 }
fe8ab488
A
11101 MALLOC(realpath, char *, uap->bufsize, M_TEMP, M_WAITOK);
11102 if (realpath == NULL) {
11103 return (ENOMEM);
11104 }
11105
11106 error = fsgetpath_internal(
39037602 11107 ctx, fsid.val[0], uap->objid,
fe8ab488
A
11108 uap->bufsize, realpath, &length);
11109
11110 if (error) {
11111 goto out;
11112 }
39037602 11113
b0d623f7
A
11114 error = copyout((caddr_t)realpath, uap->buf, length);
11115
11116 *retval = (user_ssize_t)length; /* may be superseded by error */
11117out:
11118 if (realpath) {
11119 FREE(realpath, M_TEMP);
11120 }
11121 return (error);
11122}
11123
91447636
A
11124/*
11125 * Common routine to handle various flavors of statfs data heading out
11126 * to user space.
2d21ac55
A
11127 *
11128 * Returns: 0 Success
11129 * EFAULT
91447636
A
11130 */
11131static int
39037602
A
11132munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
11133 user_addr_t bufp, int *sizep, boolean_t is_64_bit,
91447636 11134 boolean_t partial_copy)
4a249263 11135{
91447636
A
11136 int error;
11137 int my_size, copy_size;
11138
11139 if (is_64_bit) {
b0d623f7 11140 struct user64_statfs sfs;
91447636
A
11141 my_size = copy_size = sizeof(sfs);
11142 bzero(&sfs, my_size);
11143 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
11144 sfs.f_type = mp->mnt_vtable->vfc_typenum;
11145 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
b0d623f7
A
11146 sfs.f_bsize = (user64_long_t)sfsp->f_bsize;
11147 sfs.f_iosize = (user64_long_t)sfsp->f_iosize;
11148 sfs.f_blocks = (user64_long_t)sfsp->f_blocks;
11149 sfs.f_bfree = (user64_long_t)sfsp->f_bfree;
11150 sfs.f_bavail = (user64_long_t)sfsp->f_bavail;
11151 sfs.f_files = (user64_long_t)sfsp->f_files;
11152 sfs.f_ffree = (user64_long_t)sfsp->f_ffree;
91447636
A
11153 sfs.f_fsid = sfsp->f_fsid;
11154 sfs.f_owner = sfsp->f_owner;
6d2010ae 11155 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
fe8ab488 11156 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
6d2010ae
A
11157 } else {
11158 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
11159 }
2d21ac55
A
11160 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
11161 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
91447636
A
11162
11163 if (partial_copy) {
11164 copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
11165 }
11166 error = copyout((caddr_t)&sfs, bufp, copy_size);
11167 }
11168 else {
b0d623f7
A
11169 struct user32_statfs sfs;
11170
91447636
A
11171 my_size = copy_size = sizeof(sfs);
11172 bzero(&sfs, my_size);
39037602 11173
91447636
A
11174 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
11175 sfs.f_type = mp->mnt_vtable->vfc_typenum;
11176 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
39037602 11177
91447636
A
11178 /*
11179 * It's possible for there to be more than 2^^31 blocks in the filesystem, so we
11180 * have to fudge the numbers here in that case. We inflate the blocksize in order
11181 * to reflect the filesystem size as best we can.
11182 */
39037602
A
11183 if ((sfsp->f_blocks > INT_MAX)
11184 /* Hack for 4061702 . I think the real fix is for Carbon to
91447636 11185 * look for some volume capability and not depend on hidden
39037602 11186 * semantics agreed between a FS and carbon.
91447636
A
11187 * f_blocks, f_bfree, and f_bavail set to -1 is the trigger
11188 * for Carbon to set bNoVolumeSizes volume attribute.
39037602 11189 * Without this the webdavfs files cannot be copied onto
91447636
A
11190 * disk as they look huge. This change should not affect
11191 * XSAN as they should not setting these to -1..
11192 */
2d21ac55
A
11193 && (sfsp->f_blocks != 0xffffffffffffffffULL)
11194 && (sfsp->f_bfree != 0xffffffffffffffffULL)
11195 && (sfsp->f_bavail != 0xffffffffffffffffULL)) {
91447636
A
11196 int shift;
11197
11198 /*
11199 * Work out how far we have to shift the block count down to make it fit.
11200 * Note that it's possible to have to shift so far that the resulting
11201 * blocksize would be unreportably large. At that point, we will clip
11202 * any values that don't fit.
11203 *
11204 * For safety's sake, we also ensure that f_iosize is never reported as
11205 * being smaller than f_bsize.
11206 */
11207 for (shift = 0; shift < 32; shift++) {
b0d623f7 11208 if ((sfsp->f_blocks >> shift) <= INT_MAX)
91447636 11209 break;
b0d623f7 11210 if ((sfsp->f_bsize << (shift + 1)) > INT_MAX)
91447636
A
11211 break;
11212 }
b0d623f7
A
11213#define __SHIFT_OR_CLIP(x, s) ((((x) >> (s)) > INT_MAX) ? INT_MAX : ((x) >> (s)))
11214 sfs.f_blocks = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_blocks, shift);
11215 sfs.f_bfree = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bfree, shift);
11216 sfs.f_bavail = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bavail, shift);
91447636 11217#undef __SHIFT_OR_CLIP
b0d623f7 11218 sfs.f_bsize = (user32_long_t)(sfsp->f_bsize << shift);
91447636
A
11219 sfs.f_iosize = lmax(sfsp->f_iosize, sfsp->f_bsize);
11220 } else {
11221 /* filesystem is small enough to be reported honestly */
b0d623f7
A
11222 sfs.f_bsize = (user32_long_t)sfsp->f_bsize;
11223 sfs.f_iosize = (user32_long_t)sfsp->f_iosize;
11224 sfs.f_blocks = (user32_long_t)sfsp->f_blocks;
11225 sfs.f_bfree = (user32_long_t)sfsp->f_bfree;
11226 sfs.f_bavail = (user32_long_t)sfsp->f_bavail;
91447636 11227 }
b0d623f7
A
11228 sfs.f_files = (user32_long_t)sfsp->f_files;
11229 sfs.f_ffree = (user32_long_t)sfsp->f_ffree;
91447636
A
11230 sfs.f_fsid = sfsp->f_fsid;
11231 sfs.f_owner = sfsp->f_owner;
6d2010ae 11232 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
fe8ab488 11233 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
6d2010ae
A
11234 } else {
11235 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
11236 }
2d21ac55
A
11237 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
11238 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
91447636
A
11239
11240 if (partial_copy) {
11241 copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
11242 }
11243 error = copyout((caddr_t)&sfs, bufp, copy_size);
11244 }
39037602 11245
91447636
A
11246 if (sizep != NULL) {
11247 *sizep = my_size;
11248 }
11249 return(error);
11250}
11251
11252/*
11253 * copy stat structure into user_stat structure.
11254 */
b0d623f7 11255void munge_user64_stat(struct stat *sbp, struct user64_stat *usbp)
91447636 11256{
b0d623f7
A
11257 bzero(usbp, sizeof(*usbp));
11258
11259 usbp->st_dev = sbp->st_dev;
11260 usbp->st_ino = sbp->st_ino;
11261 usbp->st_mode = sbp->st_mode;
11262 usbp->st_nlink = sbp->st_nlink;
11263 usbp->st_uid = sbp->st_uid;
11264 usbp->st_gid = sbp->st_gid;
11265 usbp->st_rdev = sbp->st_rdev;
11266#ifndef _POSIX_C_SOURCE
11267 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
11268 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
11269 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
11270 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
11271 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
11272 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
11273#else
11274 usbp->st_atime = sbp->st_atime;
11275 usbp->st_atimensec = sbp->st_atimensec;
11276 usbp->st_mtime = sbp->st_mtime;
11277 usbp->st_mtimensec = sbp->st_mtimensec;
11278 usbp->st_ctime = sbp->st_ctime;
11279 usbp->st_ctimensec = sbp->st_ctimensec;
11280#endif
11281 usbp->st_size = sbp->st_size;
11282 usbp->st_blocks = sbp->st_blocks;
11283 usbp->st_blksize = sbp->st_blksize;
11284 usbp->st_flags = sbp->st_flags;
11285 usbp->st_gen = sbp->st_gen;
11286 usbp->st_lspare = sbp->st_lspare;
11287 usbp->st_qspare[0] = sbp->st_qspare[0];
11288 usbp->st_qspare[1] = sbp->st_qspare[1];
11289}
11290
11291void munge_user32_stat(struct stat *sbp, struct user32_stat *usbp)
11292{
11293 bzero(usbp, sizeof(*usbp));
0c530ab8 11294
91447636
A
11295 usbp->st_dev = sbp->st_dev;
11296 usbp->st_ino = sbp->st_ino;
11297 usbp->st_mode = sbp->st_mode;
11298 usbp->st_nlink = sbp->st_nlink;
11299 usbp->st_uid = sbp->st_uid;
11300 usbp->st_gid = sbp->st_gid;
11301 usbp->st_rdev = sbp->st_rdev;
2d21ac55
A
11302#ifndef _POSIX_C_SOURCE
11303 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
11304 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
11305 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
11306 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
11307 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
11308 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
11309#else
11310 usbp->st_atime = sbp->st_atime;
11311 usbp->st_atimensec = sbp->st_atimensec;
11312 usbp->st_mtime = sbp->st_mtime;
11313 usbp->st_mtimensec = sbp->st_mtimensec;
11314 usbp->st_ctime = sbp->st_ctime;
11315 usbp->st_ctimensec = sbp->st_ctimensec;
11316#endif
11317 usbp->st_size = sbp->st_size;
11318 usbp->st_blocks = sbp->st_blocks;
11319 usbp->st_blksize = sbp->st_blksize;
11320 usbp->st_flags = sbp->st_flags;
11321 usbp->st_gen = sbp->st_gen;
11322 usbp->st_lspare = sbp->st_lspare;
11323 usbp->st_qspare[0] = sbp->st_qspare[0];
11324 usbp->st_qspare[1] = sbp->st_qspare[1];
11325}
11326
11327/*
11328 * copy stat64 structure into user_stat64 structure.
11329 */
b0d623f7
A
11330void munge_user64_stat64(struct stat64 *sbp, struct user64_stat64 *usbp)
11331{
11332 bzero(usbp, sizeof(*usbp));
11333
11334 usbp->st_dev = sbp->st_dev;
11335 usbp->st_ino = sbp->st_ino;
11336 usbp->st_mode = sbp->st_mode;
11337 usbp->st_nlink = sbp->st_nlink;
11338 usbp->st_uid = sbp->st_uid;
11339 usbp->st_gid = sbp->st_gid;
11340 usbp->st_rdev = sbp->st_rdev;
11341#ifndef _POSIX_C_SOURCE
11342 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
11343 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
11344 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
11345 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
11346 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
11347 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
11348 usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
11349 usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
11350#else
11351 usbp->st_atime = sbp->st_atime;
11352 usbp->st_atimensec = sbp->st_atimensec;
11353 usbp->st_mtime = sbp->st_mtime;
11354 usbp->st_mtimensec = sbp->st_mtimensec;
11355 usbp->st_ctime = sbp->st_ctime;
11356 usbp->st_ctimensec = sbp->st_ctimensec;
11357 usbp->st_birthtime = sbp->st_birthtime;
11358 usbp->st_birthtimensec = sbp->st_birthtimensec;
11359#endif
11360 usbp->st_size = sbp->st_size;
11361 usbp->st_blocks = sbp->st_blocks;
11362 usbp->st_blksize = sbp->st_blksize;
11363 usbp->st_flags = sbp->st_flags;
11364 usbp->st_gen = sbp->st_gen;
11365 usbp->st_lspare = sbp->st_lspare;
11366 usbp->st_qspare[0] = sbp->st_qspare[0];
11367 usbp->st_qspare[1] = sbp->st_qspare[1];
11368}
11369
11370void munge_user32_stat64(struct stat64 *sbp, struct user32_stat64 *usbp)
2d21ac55 11371{
b0d623f7 11372 bzero(usbp, sizeof(*usbp));
2d21ac55
A
11373
11374 usbp->st_dev = sbp->st_dev;
11375 usbp->st_ino = sbp->st_ino;
11376 usbp->st_mode = sbp->st_mode;
11377 usbp->st_nlink = sbp->st_nlink;
11378 usbp->st_uid = sbp->st_uid;
11379 usbp->st_gid = sbp->st_gid;
11380 usbp->st_rdev = sbp->st_rdev;
11381#ifndef _POSIX_C_SOURCE
91447636
A
11382 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
11383 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
11384 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
11385 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
11386 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
11387 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
2d21ac55
A
11388 usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
11389 usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
91447636
A
11390#else
11391 usbp->st_atime = sbp->st_atime;
11392 usbp->st_atimensec = sbp->st_atimensec;
11393 usbp->st_mtime = sbp->st_mtime;
11394 usbp->st_mtimensec = sbp->st_mtimensec;
11395 usbp->st_ctime = sbp->st_ctime;
11396 usbp->st_ctimensec = sbp->st_ctimensec;
2d21ac55
A
11397 usbp->st_birthtime = sbp->st_birthtime;
11398 usbp->st_birthtimensec = sbp->st_birthtimensec;
91447636
A
11399#endif
11400 usbp->st_size = sbp->st_size;
11401 usbp->st_blocks = sbp->st_blocks;
11402 usbp->st_blksize = sbp->st_blksize;
11403 usbp->st_flags = sbp->st_flags;
11404 usbp->st_gen = sbp->st_gen;
11405 usbp->st_lspare = sbp->st_lspare;
11406 usbp->st_qspare[0] = sbp->st_qspare[0];
11407 usbp->st_qspare[1] = sbp->st_qspare[1];
4a249263 11408}
39236c6e
A
11409
11410/*
11411 * Purge buffer cache for simulating cold starts
11412 */
11413static int vnode_purge_callback(struct vnode *vp, __unused void *cargs)
11414{
11415 ubc_msync(vp, (off_t)0, ubc_getsize(vp), NULL /* off_t *resid_off */, UBC_PUSHALL | UBC_INVALIDATE);
11416
11417 return VNODE_RETURNED;
11418}
11419
11420static int vfs_purge_callback(mount_t mp, __unused void * arg)
11421{
11422 vnode_iterate(mp, VNODE_WAIT | VNODE_ITERATE_ALL, vnode_purge_callback, NULL);
11423
11424 return VFS_RETURNED;
11425}
11426
11427int
11428vfs_purge(__unused struct proc *p, __unused struct vfs_purge_args *uap, __unused int32_t *retval)
11429{
11430 if (!kauth_cred_issuser(kauth_cred_get()))
11431 return EPERM;
11432
11433 vfs_iterate(0/* flags */, vfs_purge_callback, NULL);
11434
11435 return 0;
11436}
11437
39037602
A
11438/*
11439 * gets the vnode associated with the (unnamed) snapshot directory
11440 * for a Filesystem. The snapshot directory vnode is returned with
11441 * an iocount on it.
11442 */
11443int
11444vnode_get_snapdir(vnode_t rvp, vnode_t *sdvpp, vfs_context_t ctx)
11445{
813fb2f6 11446 return (VFS_VGET_SNAPDIR(vnode_mount(rvp), sdvpp, ctx));
39037602
A
11447}
11448
11449/*
11450 * Get the snapshot vnode.
11451 *
11452 * If successful, the call returns with an iocount on *rvpp ,*sdvpp and
11453 * needs nameidone() on ndp.
11454 *
11455 * If the snapshot vnode exists it is returned in ndp->ni_vp.
11456 *
11457 * If it returns with an error, *rvpp, *sdvpp are NULL and nameidone() is
11458 * not needed.
11459 */
11460static int
11461vnode_get_snapshot(int dirfd, vnode_t *rvpp, vnode_t *sdvpp,
11462 user_addr_t name, struct nameidata *ndp, int32_t op,
11463#if !CONFIG_TRIGGERS
11464 __unused
11465#endif
11466 enum path_operation pathop,
11467 vfs_context_t ctx)
11468{
11469 int error, i;
11470 caddr_t name_buf;
11471 size_t name_len;
11472 struct vfs_attr vfa;
11473
11474 *sdvpp = NULLVP;
11475 *rvpp = NULLVP;
11476
11477 error = vnode_getfromfd(ctx, dirfd, rvpp);
11478 if (error)
11479 return (error);
11480
11481 if (!vnode_isvroot(*rvpp)) {
11482 error = EINVAL;
11483 goto out;
11484 }
11485
11486 /* Make sure the filesystem supports snapshots */
11487 VFSATTR_INIT(&vfa);
11488 VFSATTR_WANTED(&vfa, f_capabilities);
11489 if ((vfs_getattr(vnode_mount(*rvpp), &vfa, ctx) != 0) ||
11490 !VFSATTR_IS_SUPPORTED(&vfa, f_capabilities) ||
11491 !((vfa.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] &
11492 VOL_CAP_INT_SNAPSHOT)) ||
11493 !((vfa.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] &
11494 VOL_CAP_INT_SNAPSHOT))) {
11495 error = ENOTSUP;
11496 goto out;
11497 }
11498
11499 error = vnode_get_snapdir(*rvpp, sdvpp, ctx);
11500 if (error)
11501 goto out;
11502
11503 MALLOC(name_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
11504 error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
11505 if (error)
11506 goto out1;
11507
11508 /*
11509 * Some sanity checks- name can't be empty, "." or ".." or have slashes.
11510 * (the length returned by copyinstr includes the terminating NUL)
11511 */
11512 if ((name_len == 1) || (name_len == 2 && name_buf[0] == '.') ||
11513 (name_len == 3 && name_buf[0] == '.' && name_buf[1] == '.')) {
11514 error = EINVAL;
11515 goto out1;
11516 }
11517 for (i = 0; i < (int)name_len && name_buf[i] != '/'; i++);
11518 if (i < (int)name_len) {
11519 error = EINVAL;
11520 goto out1;
11521 }
11522
11523#if CONFIG_MACF
11524 if (op == CREATE) {
11525 error = mac_mount_check_snapshot_create(ctx, vnode_mount(*rvpp),
11526 name_buf);
11527 } else if (op == DELETE) {
11528 error = mac_mount_check_snapshot_delete(ctx, vnode_mount(*rvpp),
11529 name_buf);
11530 }
11531 if (error)
11532 goto out1;
11533#endif
11534
11535 /* Check if the snapshot already exists ... */
11536 NDINIT(ndp, op, pathop, USEDVP | NOCACHE | AUDITVNPATH1,
11537 UIO_SYSSPACE, CAST_USER_ADDR_T(name_buf), ctx);
11538 ndp->ni_dvp = *sdvpp;
11539
11540 error = namei(ndp);
11541out1:
11542 FREE(name_buf, M_TEMP);
11543out:
11544 if (error) {
11545 if (*sdvpp) {
11546 vnode_put(*sdvpp);
11547 *sdvpp = NULLVP;
11548 }
11549 if (*rvpp) {
11550 vnode_put(*rvpp);
11551 *rvpp = NULLVP;
11552 }
11553 }
11554 return (error);
11555}
11556
11557/*
11558 * create a filesystem snapshot (for supporting filesystems)
11559 *
11560 * A much simplified version of openat(dirfd, name, O_CREAT | O_EXCL)
11561 * We get to the (unnamed) snapshot directory vnode and create the vnode
11562 * for the snapshot in it.
11563 *
11564 * Restrictions:
11565 *
11566 * a) Passed in name for snapshot cannot have slashes.
11567 * b) name can't be "." or ".."
11568 *
11569 * Since this requires superuser privileges, vnode_authorize calls are not
11570 * made.
11571 */
11572static int
11573snapshot_create(int dirfd, user_addr_t name, __unused uint32_t flags,
11574 vfs_context_t ctx)
11575{
11576 vnode_t rvp, snapdvp;
11577 int error;
11578 struct nameidata namend;
11579
11580 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, CREATE,
11581 OP_LINK, ctx);
11582 if (error)
11583 return (error);
11584
11585 if (namend.ni_vp) {
11586 vnode_put(namend.ni_vp);
11587 error = EEXIST;
11588 } else {
11589 struct vnode_attr va;
11590 vnode_t vp = NULLVP;
11591
11592 VATTR_INIT(&va);
11593 VATTR_SET(&va, va_type, VREG);
11594 VATTR_SET(&va, va_mode, 0);
11595
11596 error = vn_create(snapdvp, &vp, &namend, &va,
11597 VN_CREATE_NOAUTH | VN_CREATE_NOINHERIT, 0, NULL, ctx);
11598 if (!error && vp)
11599 vnode_put(vp);
39037602
A
11600 }
11601
11602 nameidone(&namend);
11603 vnode_put(snapdvp);
11604 vnode_put(rvp);
11605 return (error);
11606}
11607
11608/*
11609 * Delete a Filesystem snapshot
11610 *
11611 * get the vnode for the unnamed snapshot directory and the snapshot and
11612 * delete the snapshot.
11613 */
11614static int
11615snapshot_delete(int dirfd, user_addr_t name, __unused uint32_t flags,
11616 vfs_context_t ctx)
11617{
11618 vnode_t rvp, snapdvp;
11619 int error;
11620 struct nameidata namend;
11621
11622 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, DELETE,
11623 OP_UNLINK, ctx);
11624 if (error)
11625 goto out;
11626
11627 error = VNOP_REMOVE(snapdvp, namend.ni_vp, &namend.ni_cnd,
11628 VNODE_REMOVE_SKIP_NAMESPACE_EVENT, ctx);
11629
11630 vnode_put(namend.ni_vp);
11631 nameidone(&namend);
11632 vnode_put(snapdvp);
11633 vnode_put(rvp);
11634out:
11635 return (error);
11636}
11637
11638/*
11639 * Revert a filesystem to a snapshot
11640 *
11641 * Marks the filesystem to revert to the given snapshot on next mount.
11642 */
11643static int
11644snapshot_revert(int dirfd, user_addr_t name, __unused uint32_t flags,
11645 vfs_context_t ctx)
11646{
11647 int error;
11648 vnode_t rvp;
11649 mount_t mp;
11650 struct fs_snapshot_revert_args revert_data;
11651 struct componentname cnp;
11652 caddr_t name_buf;
11653 size_t name_len;
11654
11655 error = vnode_getfromfd(ctx, dirfd, &rvp);
11656 if (error) {
11657 return (error);
11658 }
11659 mp = vnode_mount(rvp);
11660
813fb2f6
A
11661 MALLOC(name_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
11662 error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
11663 if (error) {
11664 FREE(name_buf, M_TEMP);
11665 vnode_put(rvp);
11666 return (error);
11667 }
11668
11669#if CONFIG_MACF
11670 error = mac_mount_check_snapshot_revert(ctx, mp, name_buf);
11671 if (error) {
11672 FREE(name_buf, M_TEMP);
11673 vnode_put(rvp);
11674 return (error);
11675 }
11676#endif
11677
39037602
A
11678 /*
11679 * Grab mount_iterref so that we can release the vnode,
11680 * since VFSIOC_REVERT_SNAPSHOT could conceivably cause a sync.
11681 */
11682 error = mount_iterref (mp, 0);
11683 vnode_put(rvp);
11684 if (error) {
39037602
A
11685 FREE(name_buf, M_TEMP);
11686 return (error);
11687 }
11688
11689 memset(&cnp, 0, sizeof(cnp));
11690 cnp.cn_pnbuf = (char *)name_buf;
11691 cnp.cn_nameiop = LOOKUP;
11692 cnp.cn_flags = ISLASTCN | HASBUF;
11693 cnp.cn_pnlen = MAXPATHLEN;
11694 cnp.cn_nameptr = cnp.cn_pnbuf;
11695 cnp.cn_namelen = (int)name_len;
11696 revert_data.sr_cnp = &cnp;
11697
11698 error = VFS_IOCTL(mp, VFSIOC_REVERT_SNAPSHOT, (caddr_t)&revert_data, 0, ctx);
11699 mount_iterdrop(mp);
11700 FREE(name_buf, M_TEMP);
11701
11702 if (error) {
11703 /* If there was any error, try again using VNOP_IOCTL */
11704
11705 vnode_t snapdvp;
11706 struct nameidata namend;
11707
11708 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, LOOKUP,
11709 OP_LOOKUP, ctx);
11710 if (error) {
11711 return (error);
11712 }
11713
11714
11715#ifndef APFSIOC_REVERT_TO_SNAPSHOT
11716#define APFSIOC_REVERT_TO_SNAPSHOT _IOW('J', 1, u_int64_t)
11717#endif
11718
5ba3f43e 11719 error = VNOP_IOCTL(namend.ni_vp, APFSIOC_REVERT_TO_SNAPSHOT, (caddr_t) NULL,
39037602
A
11720 0, ctx);
11721
11722 vnode_put(namend.ni_vp);
11723 nameidone(&namend);
11724 vnode_put(snapdvp);
11725 vnode_put(rvp);
11726 }
11727
11728 return (error);
11729}
11730
11731/*
11732 * rename a Filesystem snapshot
11733 *
11734 * get the vnode for the unnamed snapshot directory and the snapshot and
11735 * rename the snapshot. This is a very specialised (and simple) case of
11736 * rename(2) (which has to deal with a lot more complications). It differs
11737 * slightly from rename(2) in that EEXIST is returned if the new name exists.
11738 */
11739static int
11740snapshot_rename(int dirfd, user_addr_t old, user_addr_t new,
11741 __unused uint32_t flags, vfs_context_t ctx)
11742{
11743 vnode_t rvp, snapdvp;
11744 int error, i;
11745 caddr_t newname_buf;
11746 size_t name_len;
11747 vnode_t fvp;
11748 struct nameidata *fromnd, *tond;
11749 /* carving out a chunk for structs that are too big to be on stack. */
11750 struct {
11751 struct nameidata from_node;
11752 struct nameidata to_node;
11753 } * __rename_data;
11754
11755 MALLOC(__rename_data, void *, sizeof(*__rename_data), M_TEMP, M_WAITOK);
11756 fromnd = &__rename_data->from_node;
11757 tond = &__rename_data->to_node;
11758
11759 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, old, fromnd, DELETE,
11760 OP_UNLINK, ctx);
11761 if (error)
11762 goto out;
11763 fvp = fromnd->ni_vp;
11764
11765 MALLOC(newname_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
11766 error = copyinstr(new, newname_buf, MAXPATHLEN, &name_len);
11767 if (error)
11768 goto out1;
11769
11770 /*
11771 * Some sanity checks- new name can't be empty, "." or ".." or have
11772 * slashes.
11773 * (the length returned by copyinstr includes the terminating NUL)
11774 *
11775 * The FS rename VNOP is suppossed to handle this but we'll pick it
11776 * off here itself.
11777 */
11778 if ((name_len == 1) || (name_len == 2 && newname_buf[0] == '.') ||
11779 (name_len == 3 && newname_buf[0] == '.' && newname_buf[1] == '.')) {
11780 error = EINVAL;
11781 goto out1;
11782 }
11783 for (i = 0; i < (int)name_len && newname_buf[i] != '/'; i++);
11784 if (i < (int)name_len) {
11785 error = EINVAL;
11786 goto out1;
11787 }
11788
11789#if CONFIG_MACF
11790 error = mac_mount_check_snapshot_create(ctx, vnode_mount(rvp),
11791 newname_buf);
11792 if (error)
11793 goto out1;
11794#endif
11795
11796 NDINIT(tond, RENAME, OP_RENAME, USEDVP | NOCACHE | AUDITVNPATH2,
11797 UIO_SYSSPACE, CAST_USER_ADDR_T(newname_buf), ctx);
11798 tond->ni_dvp = snapdvp;
11799
11800 error = namei(tond);
11801 if (error) {
11802 goto out2;
11803 } else if (tond->ni_vp) {
11804 /*
11805 * snapshot rename behaves differently than rename(2) - if the
11806 * new name exists, EEXIST is returned.
11807 */
11808 vnode_put(tond->ni_vp);
11809 error = EEXIST;
11810 goto out2;
11811 }
11812
11813 error = VNOP_RENAME(snapdvp, fvp, &fromnd->ni_cnd, snapdvp, NULLVP,
11814 &tond->ni_cnd, ctx);
11815
11816out2:
11817 nameidone(tond);
11818out1:
11819 FREE(newname_buf, M_TEMP);
11820 vnode_put(fvp);
11821 vnode_put(snapdvp);
11822 vnode_put(rvp);
11823 nameidone(fromnd);
11824out:
11825 FREE(__rename_data, M_TEMP);
11826 return (error);
11827}
11828
11829/*
11830 * Mount a Filesystem snapshot
11831 *
11832 * get the vnode for the unnamed snapshot directory and the snapshot and
11833 * mount the snapshot.
11834 */
11835static int
11836snapshot_mount(int dirfd, user_addr_t name, user_addr_t directory,
813fb2f6 11837 __unused user_addr_t mnt_data, __unused uint32_t flags, vfs_context_t ctx)
39037602
A
11838{
11839 vnode_t rvp, snapdvp, snapvp, vp, pvp;
11840 int error;
11841 struct nameidata *snapndp, *dirndp;
11842 /* carving out a chunk for structs that are too big to be on stack. */
11843 struct {
11844 struct nameidata snapnd;
11845 struct nameidata dirnd;
11846 } * __snapshot_mount_data;
11847
11848 MALLOC(__snapshot_mount_data, void *, sizeof(*__snapshot_mount_data),
11849 M_TEMP, M_WAITOK);
11850 snapndp = &__snapshot_mount_data->snapnd;
11851 dirndp = &__snapshot_mount_data->dirnd;
11852
11853 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, snapndp, LOOKUP,
11854 OP_LOOKUP, ctx);
11855 if (error)
11856 goto out;
11857
11858 snapvp = snapndp->ni_vp;
11859 if (!vnode_mount(rvp) || (vnode_mount(rvp) == dead_mountp)) {
11860 error = EIO;
11861 goto out1;
11862 }
11863
11864 /* Get the vnode to be covered */
11865 NDINIT(dirndp, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
11866 UIO_USERSPACE, directory, ctx);
11867 error = namei(dirndp);
11868 if (error)
11869 goto out1;
11870
11871 vp = dirndp->ni_vp;
11872 pvp = dirndp->ni_dvp;
11873
11874 if ((vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_ROOTFS)) {
11875 error = EINVAL;
11876 } else {
11877 mount_t mp = vnode_mount(rvp);
11878 struct fs_snapshot_mount_args smnt_data;
11879
11880 smnt_data.sm_mp = mp;
11881 smnt_data.sm_cnp = &snapndp->ni_cnd;
11882 error = mount_common(mp->mnt_vfsstat.f_fstypename, pvp, vp,
5ba3f43e 11883 &dirndp->ni_cnd, CAST_USER_ADDR_T(&smnt_data), flags & MNT_DONTBROWSE,
39037602 11884 KERNEL_MOUNT_SNAPSHOT, NULL, FALSE, ctx);
39037602
A
11885 }
11886
11887 vnode_put(vp);
11888 vnode_put(pvp);
11889 nameidone(dirndp);
11890out1:
11891 vnode_put(snapvp);
11892 vnode_put(snapdvp);
11893 vnode_put(rvp);
11894 nameidone(snapndp);
11895out:
11896 FREE(__snapshot_mount_data, M_TEMP);
11897 return (error);
11898}
11899
813fb2f6
A
11900/*
11901 * Root from a snapshot of the filesystem
11902 *
11903 * Marks the filesystem to root from the given snapshot on next boot.
11904 */
11905static int
11906snapshot_root(int dirfd, user_addr_t name, __unused uint32_t flags,
11907 vfs_context_t ctx)
11908{
11909 int error;
11910 vnode_t rvp;
11911 mount_t mp;
11912 struct fs_snapshot_root_args root_data;
11913 struct componentname cnp;
11914 caddr_t name_buf;
11915 size_t name_len;
11916
11917 error = vnode_getfromfd(ctx, dirfd, &rvp);
11918 if (error) {
11919 return (error);
11920 }
11921 mp = vnode_mount(rvp);
11922
11923 MALLOC(name_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
11924 error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
11925 if (error) {
11926 FREE(name_buf, M_TEMP);
11927 vnode_put(rvp);
11928 return (error);
11929 }
11930
11931 // XXX MAC checks ?
11932
11933 /*
11934 * Grab mount_iterref so that we can release the vnode,
11935 * since VFSIOC_ROOT_SNAPSHOT could conceivably cause a sync.
11936 */
11937 error = mount_iterref (mp, 0);
11938 vnode_put(rvp);
11939 if (error) {
11940 FREE(name_buf, M_TEMP);
11941 return (error);
11942 }
11943
11944 memset(&cnp, 0, sizeof(cnp));
11945 cnp.cn_pnbuf = (char *)name_buf;
11946 cnp.cn_nameiop = LOOKUP;
11947 cnp.cn_flags = ISLASTCN | HASBUF;
11948 cnp.cn_pnlen = MAXPATHLEN;
11949 cnp.cn_nameptr = cnp.cn_pnbuf;
11950 cnp.cn_namelen = (int)name_len;
11951 root_data.sr_cnp = &cnp;
11952
11953 error = VFS_IOCTL(mp, VFSIOC_ROOT_SNAPSHOT, (caddr_t)&root_data, 0, ctx);
11954
11955 mount_iterdrop(mp);
11956 FREE(name_buf, M_TEMP);
11957
11958 return (error);
11959}
11960
39037602
A
11961/*
11962 * FS snapshot operations dispatcher
11963 */
11964int
11965fs_snapshot(__unused proc_t p, struct fs_snapshot_args *uap,
11966 __unused int32_t *retval)
11967{
11968 int error;
11969 vfs_context_t ctx = vfs_context_current();
11970
813fb2f6
A
11971 AUDIT_ARG(fd, uap->dirfd);
11972 AUDIT_ARG(value32, uap->op);
11973
39037602
A
11974 error = priv_check_cred(vfs_context_ucred(ctx), PRIV_VFS_SNAPSHOT, 0);
11975 if (error)
11976 return (error);
11977
11978 switch (uap->op) {
11979 case SNAPSHOT_OP_CREATE:
11980 error = snapshot_create(uap->dirfd, uap->name1, uap->flags, ctx);
11981 break;
11982 case SNAPSHOT_OP_DELETE:
11983 error = snapshot_delete(uap->dirfd, uap->name1, uap->flags, ctx);
11984 break;
11985 case SNAPSHOT_OP_RENAME:
11986 error = snapshot_rename(uap->dirfd, uap->name1, uap->name2,
11987 uap->flags, ctx);
11988 break;
11989 case SNAPSHOT_OP_MOUNT:
11990 error = snapshot_mount(uap->dirfd, uap->name1, uap->name2,
11991 uap->data, uap->flags, ctx);
11992 break;
11993 case SNAPSHOT_OP_REVERT:
11994 error = snapshot_revert(uap->dirfd, uap->name1, uap->flags, ctx);
11995 break;
813fb2f6
A
11996 case SNAPSHOT_OP_ROOT:
11997 error = snapshot_root(uap->dirfd, uap->name1, uap->flags, ctx);
11998 break;
39037602
A
11999 default:
12000 error = ENOSYS;
12001 }
12002
12003 return (error);
12004}