]> git.saurik.com Git - apple/xnu.git/blame - bsd/vfs/vfs_syscalls.c
xnu-3248.20.55.tar.gz
[apple/xnu.git] / bsd / vfs / vfs_syscalls.c
CommitLineData
1c79356b 1/*
c18c124e 2 * Copyright (c) 1995-2015 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * Copyright (c) 1989, 1993
30 * The Regents of the University of California. All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)vfs_syscalls.c 8.41 (Berkeley) 6/15/95
66 */
2d21ac55
A
67/*
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
71 * Version 2.0.
72 */
1c79356b
A
73
74#include <sys/param.h>
75#include <sys/systm.h>
76#include <sys/namei.h>
77#include <sys/filedesc.h>
78#include <sys/kernel.h>
91447636 79#include <sys/file_internal.h>
1c79356b 80#include <sys/stat.h>
91447636
A
81#include <sys/vnode_internal.h>
82#include <sys/mount_internal.h>
83#include <sys/proc_internal.h>
84#include <sys/kauth.h>
85#include <sys/uio_internal.h>
1c79356b 86#include <sys/malloc.h>
91447636 87#include <sys/mman.h>
1c79356b
A
88#include <sys/dirent.h>
89#include <sys/attr.h>
90#include <sys/sysctl.h>
91#include <sys/ubc.h>
9bccf70c 92#include <sys/quota.h>
91447636
A
93#include <sys/kdebug.h>
94#include <sys/fsevents.h>
6d2010ae 95#include <sys/imgsrc.h>
91447636
A
96#include <sys/sysproto.h>
97#include <sys/xattr.h>
b0d623f7
A
98#include <sys/fcntl.h>
99#include <sys/fsctl.h>
91447636 100#include <sys/ubc_internal.h>
593a1d5f 101#include <sys/disk.h>
3e170ce0 102#include <sys/content_protection.h>
91447636
A
103#include <machine/cons.h>
104#include <machine/limits.h>
105#include <miscfs/specfs/specdev.h>
e5568f75 106
b0d623f7 107#include <security/audit/audit.h>
e5568f75
A
108#include <bsm/audit_kevents.h>
109
91447636
A
110#include <mach/mach_types.h>
111#include <kern/kern_types.h>
112#include <kern/kalloc.h>
6d2010ae 113#include <kern/task.h>
91447636
A
114
115#include <vm/vm_pageout.h>
1c79356b 116
91447636 117#include <libkern/OSAtomic.h>
b0d623f7 118#include <pexpert/pexpert.h>
3e170ce0 119#include <IOKit/IOBSD.h>
55e303ae 120
2d21ac55
A
121#if CONFIG_MACF
122#include <security/mac.h>
123#include <security/mac_framework.h>
124#endif
1c79356b 125
2d21ac55
A
126#if CONFIG_FSE
127#define GET_PATH(x) \
128 (x) = get_pathbuff();
129#define RELEASE_PATH(x) \
130 release_pathbuff(x);
131#else
132#define GET_PATH(x) \
133 MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
134#define RELEASE_PATH(x) \
135 FREE_ZONE((x), MAXPATHLEN, M_NAMEI);
136#endif /* CONFIG_FSE */
137
138/* struct for checkdirs iteration */
139struct cdirargs {
140 vnode_t olddp;
141 vnode_t newdp;
142};
143/* callback for checkdirs iteration */
144static int checkdirs_callback(proc_t p, void * arg);
1c79356b 145
91447636 146static int change_dir(struct nameidata *ndp, vfs_context_t ctx);
6601e61a 147static int checkdirs(vnode_t olddp, vfs_context_t ctx);
91447636
A
148void enablequotas(struct mount *mp, vfs_context_t ctx);
149static int getfsstat_callback(mount_t mp, void * arg);
150static int getutimes(user_addr_t usrtvp, struct timespec *tsp);
2d21ac55 151static int setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts, int nullflag);
91447636 152static int sync_callback(mount_t, void *);
fe8ab488
A
153static void sync_thread(void *, __unused wait_result_t);
154static int sync_async(int);
91447636
A
155static int munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
156 user_addr_t bufp, int *sizep, boolean_t is_64_bit,
157 boolean_t partial_copy);
b0d623f7
A
158static int statfs64_common(struct mount *mp, struct vfsstatfs *sfsp,
159 user_addr_t bufp);
160static int fsync_common(proc_t p, struct fsync_args *uap, int flags);
6d2010ae
A
161static int mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
162 struct componentname *cnp, user_addr_t fsmountargs,
163 int flags, uint32_t internal_flags, char *labelstr, boolean_t kernelmount,
164 vfs_context_t ctx);
165void vfs_notify_mount(vnode_t pdvp);
166
167int prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth);
b7266188 168
fe8ab488
A
169struct fd_vn_data * fg_vn_data_alloc(void);
170
c18c124e
A
171/*
172 * Max retries for ENOENT returns from vn_authorize_{rmdir, unlink, rename}
173 * Concurrent lookups (or lookups by ids) on hard links can cause the
174 * vn_getpath (which does not re-enter the filesystem as vn_getpath_fsenter
175 * does) to return ENOENT as the path cannot be returned from the name cache
176 * alone. We have no option but to retry and hope to get one namei->reverse path
177 * generation done without an intervening lookup, lookup by id on the hard link
178 * item. This is only an issue for MAC hooks which cannot reenter the filesystem
179 * which currently are the MAC hooks for rename, unlink and rmdir.
180 */
181#define MAX_AUTHORIZE_ENOENT_RETRIES 1024
182
fe8ab488
A
183static int rmdirat_internal(vfs_context_t, int, user_addr_t, enum uio_seg);
184
185static int fsgetpath_internal(vfs_context_t, int, uint64_t, vm_size_t, caddr_t, int *);
186
b7266188 187#ifdef CONFIG_IMGSRC_ACCESS
b7266188
A
188static int authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx);
189static int place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx);
190static void undo_place_on_covered_vp(mount_t mp, vnode_t vp);
191static int mount_begin_update(mount_t mp, vfs_context_t ctx, int flags);
192static void mount_end_update(mount_t mp);
6d2010ae 193static int relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp, const char *fsname, vfs_context_t ctx, boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index);
b7266188
A
194#endif /* CONFIG_IMGSRC_ACCESS */
195
2d21ac55
A
196int (*union_dircheckp)(struct vnode **, struct fileproc *, vfs_context_t);
197
198__private_extern__
199int sync_internal(void);
200
2d21ac55 201__private_extern__
c18c124e 202int unlink1(vfs_context_t, vnode_t, user_addr_t, enum uio_seg, int);
91447636 203
fe8ab488
A
204extern lck_grp_t *fd_vn_lck_grp;
205extern lck_grp_attr_t *fd_vn_lck_grp_attr;
206extern lck_attr_t *fd_vn_lck_attr;
207
2d21ac55
A
208/*
209 * incremented each time a mount or unmount operation occurs
210 * used to invalidate the cached value of the rootvp in the
211 * mount structure utilized by cache_lookup_path
212 */
b0d623f7 213uint32_t mount_generation = 0;
1c79356b
A
214
215/* counts number of mount and unmount operations */
216unsigned int vfs_nummntops=0;
217
39236c6e
A
218extern const struct fileops vnops;
219#if CONFIG_APPLEDOUBLE
2d21ac55 220extern errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *);
39236c6e 221#endif /* CONFIG_APPLEDOUBLE */
91447636 222
fe8ab488
A
223typedef uint32_t vfs_rename_flags_t;
224#if CONFIG_SECLUDED_RENAME
225enum {
226 VFS_SECLUDE_RENAME = 0x00000001
227};
228#endif
229
1c79356b
A
230/*
231 * Virtual File System System Calls
232 */
233
fe8ab488 234#if NFSCLIENT || DEVFS
6d2010ae
A
235/*
236 * Private in-kernel mounting spi (NFS only, not exported)
237 */
238 __private_extern__
239boolean_t
240vfs_iskernelmount(mount_t mp)
241{
242 return ((mp->mnt_kern_flag & MNTK_KERNEL_MOUNT) ? TRUE : FALSE);
243}
244
245 __private_extern__
246int
247kernel_mount(char *fstype, vnode_t pvp, vnode_t vp, const char *path,
248 void *data, __unused size_t datalen, int syscall_flags, __unused uint32_t kern_flags, vfs_context_t ctx)
249{
250 struct nameidata nd;
251 boolean_t did_namei;
252 int error;
253
254 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
255 UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx);
256
257 /*
258 * Get the vnode to be covered if it's not supplied
259 */
260 if (vp == NULLVP) {
261 error = namei(&nd);
262 if (error)
263 return (error);
264 vp = nd.ni_vp;
265 pvp = nd.ni_dvp;
266 did_namei = TRUE;
267 } else {
268 char *pnbuf = CAST_DOWN(char *, path);
269
270 nd.ni_cnd.cn_pnbuf = pnbuf;
271 nd.ni_cnd.cn_pnlen = strlen(pnbuf) + 1;
272 did_namei = FALSE;
273 }
274
275 error = mount_common(fstype, pvp, vp, &nd.ni_cnd, CAST_USER_ADDR_T(data),
276 syscall_flags, kern_flags, NULL, TRUE, ctx);
277
278 if (did_namei) {
279 vnode_put(vp);
280 vnode_put(pvp);
281 nameidone(&nd);
282 }
283
284 return (error);
285}
fe8ab488 286#endif /* NFSCLIENT || DEVFS */
6d2010ae 287
1c79356b
A
288/*
289 * Mount a file system.
290 */
1c79356b
A
291/* ARGSUSED */
292int
b0d623f7 293mount(proc_t p, struct mount_args *uap, __unused int32_t *retval)
2d21ac55
A
294{
295 struct __mac_mount_args muap;
296
297 muap.type = uap->type;
298 muap.path = uap->path;
299 muap.flags = uap->flags;
300 muap.data = uap->data;
301 muap.mac_p = USER_ADDR_NULL;
302 return (__mac_mount(p, &muap, retval));
303}
304
6d2010ae
A
305void
306vfs_notify_mount(vnode_t pdvp)
307{
308 vfs_event_signal(NULL, VQ_MOUNT, (intptr_t)NULL);
309 lock_vnode_and_post(pdvp, NOTE_WRITE);
310}
311
b0d623f7
A
312/*
313 * __mac_mount:
314 * Mount a file system taking into account MAC label behavior.
315 * See mount(2) man page for more information
316 *
317 * Parameters: p Process requesting the mount
318 * uap User argument descriptor (see below)
319 * retval (ignored)
320 *
321 * Indirect: uap->type Filesystem type
322 * uap->path Path to mount
323 * uap->data Mount arguments
324 * uap->mac_p MAC info
325 * uap->flags Mount flags
326 *
327 *
328 * Returns: 0 Success
329 * !0 Not success
330 */
6d2010ae
A
331boolean_t root_fs_upgrade_try = FALSE;
332
2d21ac55 333int
b0d623f7 334__mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int32_t *retval)
1c79356b 335{
39236c6e
A
336 vnode_t pvp = NULL;
337 vnode_t vp = NULL;
338 int need_nameidone = 0;
6d2010ae
A
339 vfs_context_t ctx = vfs_context_current();
340 char fstypename[MFSNAMELEN];
341 struct nameidata nd;
342 size_t dummy=0;
343 char *labelstr = NULL;
344 int flags = uap->flags;
345 int error;
39236c6e 346#if CONFIG_IMGSRC_ACCESS || CONFIG_MACF
6d2010ae 347 boolean_t is_64bit = IS_64BIT_PROCESS(p);
39236c6e
A
348#else
349#pragma unused(p)
350#endif
6d2010ae
A
351 /*
352 * Get the fs type name from user space
353 */
354 error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy);
355 if (error)
356 return (error);
357
358 /*
359 * Get the vnode to be covered
360 */
fe8ab488 361 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
6d2010ae
A
362 UIO_USERSPACE, uap->path, ctx);
363 error = namei(&nd);
39236c6e
A
364 if (error) {
365 goto out;
366 }
367 need_nameidone = 1;
6d2010ae
A
368 vp = nd.ni_vp;
369 pvp = nd.ni_dvp;
370
371#ifdef CONFIG_IMGSRC_ACCESS
372 /* Mounting image source cannot be batched with other operations */
373 if (flags == MNT_IMGSRC_BY_INDEX) {
374 error = relocate_imageboot_source(pvp, vp, &nd.ni_cnd, fstypename,
375 ctx, is_64bit, uap->data, (flags == MNT_IMGSRC_BY_INDEX));
376 goto out;
377 }
378#endif /* CONFIG_IMGSRC_ACCESS */
379
380#if CONFIG_MACF
381 /*
382 * Get the label string (if any) from user space
383 */
384 if (uap->mac_p != USER_ADDR_NULL) {
385 struct user_mac mac;
386 size_t ulen = 0;
387
388 if (is_64bit) {
389 struct user64_mac mac64;
390 error = copyin(uap->mac_p, &mac64, sizeof(mac64));
391 mac.m_buflen = mac64.m_buflen;
392 mac.m_string = mac64.m_string;
393 } else {
394 struct user32_mac mac32;
395 error = copyin(uap->mac_p, &mac32, sizeof(mac32));
396 mac.m_buflen = mac32.m_buflen;
397 mac.m_string = mac32.m_string;
398 }
399 if (error)
400 goto out;
401 if ((mac.m_buflen > MAC_MAX_LABEL_BUF_LEN) ||
402 (mac.m_buflen < 2)) {
403 error = EINVAL;
404 goto out;
405 }
406 MALLOC(labelstr, char *, mac.m_buflen, M_MACTEMP, M_WAITOK);
407 error = copyinstr(mac.m_string, labelstr, mac.m_buflen, &ulen);
408 if (error) {
409 goto out;
410 }
411 AUDIT_ARG(mac_string, labelstr);
412 }
413#endif /* CONFIG_MACF */
414
415 AUDIT_ARG(fflags, flags);
416
4bd07ac2
A
417#if SECURE_KERNEL
418 if (flags & MNT_UNION) {
419 /* No union mounts on release kernels */
420 error = EPERM;
421 goto out;
422 }
423#endif
424
6d2010ae 425 if ((vp->v_flag & VROOT) &&
39236c6e
A
426 (vp->v_mount->mnt_flag & MNT_ROOTFS)) {
427 if (!(flags & MNT_UNION)) {
6d2010ae 428 flags |= MNT_UPDATE;
39236c6e
A
429 }
430 else {
431 /*
432 * For a union mount on '/', treat it as fresh
433 * mount instead of update.
434 * Otherwise, union mouting on '/' used to panic the
435 * system before, since mnt_vnodecovered was found to
436 * be NULL for '/' which is required for unionlookup
437 * after it gets ENOENT on union mount.
438 */
439 flags = (flags & ~(MNT_UPDATE));
440 }
441
4bd07ac2 442#if SECURE_KERNEL
39236c6e
A
443 if ((flags & MNT_RDONLY) == 0) {
444 /* Release kernels are not allowed to mount "/" as rw */
445 error = EPERM;
446 goto out;
447 }
39236c6e
A
448#endif
449 /*
450 * See 7392553 for more details on why this check exists.
451 * Suffice to say: If this check is ON and something tries
452 * to mount the rootFS RW, we'll turn off the codesign
453 * bitmap optimization.
454 */
6d2010ae 455#if CHECK_CS_VALIDATION_BITMAP
39236c6e 456 if ((flags & MNT_RDONLY) == 0 ) {
6d2010ae
A
457 root_fs_upgrade_try = TRUE;
458 }
459#endif
460 }
461
462 error = mount_common(fstypename, pvp, vp, &nd.ni_cnd, uap->data, flags, 0,
463 labelstr, FALSE, ctx);
39236c6e 464
6d2010ae 465out:
39236c6e 466
6d2010ae
A
467#if CONFIG_MACF
468 if (labelstr)
469 FREE(labelstr, M_MACTEMP);
470#endif /* CONFIG_MACF */
471
39236c6e
A
472 if (vp) {
473 vnode_put(vp);
474 }
475 if (pvp) {
476 vnode_put(pvp);
477 }
478 if (need_nameidone) {
479 nameidone(&nd);
480 }
6d2010ae
A
481
482 return (error);
483}
484
485/*
486 * common mount implementation (final stage of mounting)
487
488 * Arguments:
489 * fstypename file system type (ie it's vfs name)
490 * pvp parent of covered vnode
491 * vp covered vnode
492 * cnp component name (ie path) of covered vnode
493 * flags generic mount flags
494 * fsmountargs file system specific data
495 * labelstr optional MAC label
496 * kernelmount TRUE for mounts initiated from inside the kernel
497 * ctx caller's context
498 */
499static int
500mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
501 struct componentname *cnp, user_addr_t fsmountargs, int flags, uint32_t internal_flags,
502 char *labelstr, boolean_t kernelmount, vfs_context_t ctx)
503{
39236c6e
A
504#if !CONFIG_MACF
505#pragma unused(labelstr)
506#endif
91447636
A
507 struct vnode *devvp = NULLVP;
508 struct vnode *device_vnode = NULLVP;
2d21ac55
A
509#if CONFIG_MACF
510 struct vnode *rvp;
511#endif
1c79356b 512 struct mount *mp;
6601e61a 513 struct vfstable *vfsp = (struct vfstable *)0;
6d2010ae 514 struct proc *p = vfs_context_proc(ctx);
91447636 515 int error, flag = 0;
91447636 516 user_addr_t devpath = USER_ADDR_NULL;
91447636
A
517 int ronly = 0;
518 int mntalloc = 0;
b0d623f7 519 boolean_t vfsp_ref = FALSE;
743b1565 520 boolean_t is_rwlock_locked = FALSE;
b0d623f7
A
521 boolean_t did_rele = FALSE;
522 boolean_t have_usecount = FALSE;
9bccf70c 523
1c79356b 524 /*
6d2010ae 525 * Process an update for an existing mount
1c79356b 526 */
6d2010ae 527 if (flags & MNT_UPDATE) {
1c79356b 528 if ((vp->v_flag & VROOT) == 0) {
91447636
A
529 error = EINVAL;
530 goto out1;
1c79356b
A
531 }
532 mp = vp->v_mount;
d12e1678 533
91447636 534 /* unmount in progress return error */
b0d623f7 535 mount_lock_spin(mp);
91447636
A
536 if (mp->mnt_lflag & MNT_LUNMOUNT) {
537 mount_unlock(mp);
538 error = EBUSY;
539 goto out1;
d12e1678 540 }
91447636
A
541 mount_unlock(mp);
542 lck_rw_lock_exclusive(&mp->mnt_rwlock);
743b1565 543 is_rwlock_locked = TRUE;
1c79356b
A
544 /*
545 * We only allow the filesystem to be reloaded if it
546 * is currently mounted read-only.
547 */
6d2010ae 548 if ((flags & MNT_RELOAD) &&
1c79356b 549 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
91447636
A
550 error = ENOTSUP;
551 goto out1;
1c79356b 552 }
b7266188 553
316670eb
A
554 /*
555 * If content protection is enabled, update mounts are not
556 * allowed to turn it off.
557 */
558 if ((mp->mnt_flag & MNT_CPROTECT) &&
559 ((flags & MNT_CPROTECT) == 0)) {
560 error = EINVAL;
561 goto out1;
562 }
563
b7266188
A
564#ifdef CONFIG_IMGSRC_ACCESS
565 /* Can't downgrade the backer of the root FS */
566 if ((mp->mnt_kern_flag & MNTK_BACKS_ROOT) &&
6d2010ae 567 (!vfs_isrdonly(mp)) && (flags & MNT_RDONLY)) {
b7266188
A
568 error = ENOTSUP;
569 goto out1;
570 }
571#endif /* CONFIG_IMGSRC_ACCESS */
572
1c79356b
A
573 /*
574 * Only root, or the user that did the original mount is
575 * permitted to update it.
576 */
2d21ac55
A
577 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
578 (error = suser(vfs_context_ucred(ctx), &p->p_acflag))) {
579 goto out1;
580 }
581#if CONFIG_MACF
582 error = mac_mount_check_remount(ctx, mp);
583 if (error != 0) {
91447636 584 goto out1;
1c79356b 585 }
2d21ac55 586#endif
1c79356b 587 /*
91447636
A
588 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
589 * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
1c79356b 590 */
6d2010ae
A
591 if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
592 flags |= MNT_NOSUID | MNT_NODEV;
d12e1678 593 if (mp->mnt_flag & MNT_NOEXEC)
6d2010ae 594 flags |= MNT_NOEXEC;
1c79356b 595 }
d12e1678
A
596 flag = mp->mnt_flag;
597
316670eb
A
598
599
6d2010ae 600 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
d12e1678 601
91447636 602 vfsp = mp->mnt_vtable;
1c79356b
A
603 goto update;
604 }
1c79356b 605 /*
91447636 606 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
1c79356b
A
607 * MNT_NOEXEC if mount point is already MNT_NOEXEC.
608 */
6d2010ae
A
609 if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
610 flags |= MNT_NOSUID | MNT_NODEV;
1c79356b 611 if (vp->v_mount->mnt_flag & MNT_NOEXEC)
6d2010ae 612 flags |= MNT_NOEXEC;
1c79356b 613 }
91447636 614
55e303ae
A
615 /* XXXAUDIT: Should we capture the type on the error path as well? */
616 AUDIT_ARG(text, fstypename);
91447636 617 mount_list_lock();
1c79356b 618 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
b0d623f7
A
619 if (!strncmp(vfsp->vfc_name, fstypename, MFSNAMELEN)) {
620 vfsp->vfc_refcount++;
621 vfsp_ref = TRUE;
1c79356b 622 break;
b0d623f7 623 }
91447636 624 mount_list_unlock();
1c79356b 625 if (vfsp == NULL) {
91447636
A
626 error = ENODEV;
627 goto out1;
1c79356b 628 }
6d2010ae
A
629
630 /*
631 * VFC_VFSLOCALARGS is not currently supported for kernel mounts
632 */
633 if (kernelmount && (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS)) {
634 error = EINVAL; /* unsupported request */
2d21ac55 635 goto out1;
6d2010ae
A
636 }
637
638 error = prepare_coveredvp(vp, ctx, cnp, fstypename, ((internal_flags & KERNEL_MOUNT_NOAUTH) != 0));
639 if (error != 0) {
91447636 640 goto out1;
1c79356b 641 }
1c79356b
A
642
643 /*
6d2010ae 644 * Allocate and initialize the filesystem (mount_t)
1c79356b 645 */
b0d623f7 646 MALLOC_ZONE(mp, struct mount *, (u_int32_t)sizeof(struct mount),
1c79356b 647 M_MOUNT, M_WAITOK);
b0d623f7 648 bzero((char *)mp, (u_int32_t)sizeof(struct mount));
91447636 649 mntalloc = 1;
0b4e3aa0
A
650
651 /* Initialize the default IO constraints */
652 mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
653 mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
91447636
A
654 mp->mnt_maxsegreadsize = mp->mnt_maxreadcnt;
655 mp->mnt_maxsegwritesize = mp->mnt_maxwritecnt;
656 mp->mnt_devblocksize = DEV_BSIZE;
2d21ac55 657 mp->mnt_alignmentmask = PAGE_MASK;
b0d623f7
A
658 mp->mnt_ioqueue_depth = MNT_DEFAULT_IOQUEUE_DEPTH;
659 mp->mnt_ioscale = 1;
2d21ac55
A
660 mp->mnt_ioflags = 0;
661 mp->mnt_realrootvp = NULLVP;
662 mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL;
91447636
A
663
664 TAILQ_INIT(&mp->mnt_vnodelist);
665 TAILQ_INIT(&mp->mnt_workerqueue);
666 TAILQ_INIT(&mp->mnt_newvnodes);
667 mount_lock_init(mp);
668 lck_rw_lock_exclusive(&mp->mnt_rwlock);
743b1565 669 is_rwlock_locked = TRUE;
1c79356b 670 mp->mnt_op = vfsp->vfc_vfsops;
91447636 671 mp->mnt_vtable = vfsp;
91447636 672 //mp->mnt_stat.f_type = vfsp->vfc_typenum;
1c79356b 673 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
fe8ab488
A
674 strlcpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN);
675 strlcpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
1c79356b 676 mp->mnt_vnodecovered = vp;
2d21ac55 677 mp->mnt_vfsstat.f_owner = kauth_cred_getuid(vfs_context_ucred(ctx));
6d2010ae
A
678 mp->mnt_throttle_mask = LOWPRI_MAX_NUM_DEV - 1;
679 mp->mnt_devbsdunit = 0;
1c79356b 680
91447636
A
681 /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
682 vfs_setowner(mp, KAUTH_UID_NONE, KAUTH_GID_NONE);
6d2010ae 683
fe8ab488 684#if NFSCLIENT || DEVFS
6d2010ae
A
685 if (kernelmount)
686 mp->mnt_kern_flag |= MNTK_KERNEL_MOUNT;
687 if ((internal_flags & KERNEL_MOUNT_PERMIT_UNMOUNT) != 0)
688 mp->mnt_kern_flag |= MNTK_PERMIT_UNMOUNT;
fe8ab488 689#endif /* NFSCLIENT || DEVFS */
6d2010ae 690
1c79356b
A
691update:
692 /*
693 * Set the mount level flags.
694 */
6d2010ae 695 if (flags & MNT_RDONLY)
1c79356b 696 mp->mnt_flag |= MNT_RDONLY;
6d2010ae
A
697 else if (mp->mnt_flag & MNT_RDONLY) {
698 // disallow read/write upgrades of file systems that
699 // had the TYPENAME_OVERRIDE feature set.
700 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
701 error = EPERM;
702 goto out1;
703 }
1c79356b 704 mp->mnt_kern_flag |= MNTK_WANTRDWR;
6d2010ae 705 }
0b4e3aa0
A
706 mp->mnt_flag &= ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
707 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
6d2010ae
A
708 MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
709 MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
710 MNT_QUARANTINE | MNT_CPROTECT);
711 mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
712 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
713 MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
714 MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
715 MNT_QUARANTINE | MNT_CPROTECT);
2d21ac55
A
716
717#if CONFIG_MACF
6d2010ae 718 if (flags & MNT_MULTILABEL) {
2d21ac55
A
719 if (vfsp->vfc_vfsflags & VFC_VFSNOMACLABEL) {
720 error = EINVAL;
721 goto out1;
722 }
723 mp->mnt_flag |= MNT_MULTILABEL;
724 }
725#endif
6d2010ae
A
726 /*
727 * Process device path for local file systems if requested
728 */
91447636 729 if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
6d2010ae 730 if (vfs_context_is64bit(ctx)) {
91447636
A
731 if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
732 goto out1;
733 fsmountargs += sizeof(devpath);
734 } else {
b0d623f7 735 user32_addr_t tmp;
91447636
A
736 if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
737 goto out1;
738 /* munge into LP64 addr */
739 devpath = CAST_USER_ADDR_T(tmp);
740 fsmountargs += sizeof(tmp);
741 }
742
6d2010ae 743 /* Lookup device and authorize access to it */
91447636 744 if ((devpath)) {
6d2010ae
A
745 struct nameidata nd;
746
747 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW, UIO_USERSPACE, devpath, ctx);
748 if ( (error = namei(&nd)) )
91447636
A
749 goto out1;
750
3e170ce0 751 strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
6d2010ae 752 devvp = nd.ni_vp;
91447636 753
6d2010ae 754 nameidone(&nd);
91447636
A
755
756 if (devvp->v_type != VBLK) {
757 error = ENOTBLK;
758 goto out2;
759 }
760 if (major(devvp->v_rdev) >= nblkdev) {
761 error = ENXIO;
762 goto out2;
763 }
764 /*
765 * If mount by non-root, then verify that user has necessary
766 * permissions on the device.
767 */
2d21ac55 768 if (suser(vfs_context_ucred(ctx), NULL) != 0) {
6d2010ae
A
769 mode_t accessmode = KAUTH_VNODE_READ_DATA;
770
91447636
A
771 if ((mp->mnt_flag & MNT_RDONLY) == 0)
772 accessmode |= KAUTH_VNODE_WRITE_DATA;
2d21ac55 773 if ((error = vnode_authorize(devvp, NULL, accessmode, ctx)) != 0)
91447636
A
774 goto out2;
775 }
776 }
6d2010ae
A
777 /* On first mount, preflight and open device */
778 if (devpath && ((flags & MNT_UPDATE) == 0)) {
91447636
A
779 if ( (error = vnode_ref(devvp)) )
780 goto out2;
781 /*
782 * Disallow multiple mounts of the same device.
783 * Disallow mounting of a device that is currently in use
784 * (except for root, which might share swap device for miniroot).
785 * Flush out any old buffers remaining from a previous use.
786 */
787 if ( (error = vfs_mountedon(devvp)) )
788 goto out3;
789
790 if (vcount(devvp) > 1 && !(vfs_flags(mp) & MNT_ROOTFS)) {
791 error = EBUSY;
792 goto out3;
793 }
2d21ac55 794 if ( (error = VNOP_FSYNC(devvp, MNT_WAIT, ctx)) ) {
91447636
A
795 error = ENOTBLK;
796 goto out3;
797 }
798 if ( (error = buf_invalidateblks(devvp, BUF_WRITE_DATA, 0, 0)) )
799 goto out3;
800
801 ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
2d21ac55
A
802#if CONFIG_MACF
803 error = mac_vnode_check_open(ctx,
804 devvp,
805 ronly ? FREAD : FREAD|FWRITE);
806 if (error)
807 goto out3;
808#endif /* MAC */
809 if ( (error = VNOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, ctx)) )
91447636
A
810 goto out3;
811
812 mp->mnt_devvp = devvp;
813 device_vnode = devvp;
b0d623f7 814
6d2010ae
A
815 } else if ((mp->mnt_flag & MNT_RDONLY) &&
816 (mp->mnt_kern_flag & MNTK_WANTRDWR) &&
817 (device_vnode = mp->mnt_devvp)) {
818 dev_t dev;
819 int maj;
820 /*
821 * If upgrade to read-write by non-root, then verify
822 * that user has necessary permissions on the device.
823 */
824 vnode_getalways(device_vnode);
b0d623f7 825
6d2010ae
A
826 if (suser(vfs_context_ucred(ctx), NULL) &&
827 (error = vnode_authorize(device_vnode, NULL,
828 KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA,
829 ctx)) != 0) {
830 vnode_put(device_vnode);
831 goto out2;
832 }
b0d623f7 833
6d2010ae
A
834 /* Tell the device that we're upgrading */
835 dev = (dev_t)device_vnode->v_rdev;
836 maj = major(dev);
b0d623f7 837
6d2010ae
A
838 if ((u_int)maj >= (u_int)nblkdev)
839 panic("Volume mounted on a device with invalid major number.");
b0d623f7 840
6d2010ae
A
841 error = bdevsw[maj].d_open(dev, FREAD | FWRITE, S_IFBLK, p);
842 vnode_put(device_vnode);
91447636 843 device_vnode = NULLVP;
6d2010ae
A
844 if (error != 0) {
845 goto out2;
846 }
91447636
A
847 }
848 }
2d21ac55 849#if CONFIG_MACF
6d2010ae 850 if ((flags & MNT_UPDATE) == 0) {
2d21ac55
A
851 mac_mount_label_init(mp);
852 mac_mount_label_associate(ctx, mp);
853 }
6d2010ae
A
854 if (labelstr) {
855 if ((flags & MNT_UPDATE) != 0) {
856 error = mac_mount_check_label_update(ctx, mp);
2d21ac55
A
857 if (error != 0)
858 goto out3;
859 }
2d21ac55
A
860 }
861#endif
1c79356b
A
862 /*
863 * Mount the filesystem.
864 */
2d21ac55 865 error = VFS_MOUNT(mp, device_vnode, fsmountargs, ctx);
d12e1678 866
6d2010ae 867 if (flags & MNT_UPDATE) {
1c79356b
A
868 if (mp->mnt_kern_flag & MNTK_WANTRDWR)
869 mp->mnt_flag &= ~MNT_RDONLY;
870 mp->mnt_flag &=~
871 (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
872 mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
873 if (error)
6d2010ae 874 mp->mnt_flag = flag; /* restore flag value */
91447636
A
875 vfs_event_signal(NULL, VQ_UPDATE, (intptr_t)NULL);
876 lck_rw_done(&mp->mnt_rwlock);
743b1565 877 is_rwlock_locked = FALSE;
9bccf70c 878 if (!error)
2d21ac55 879 enablequotas(mp, ctx);
6d2010ae 880 goto exit;
1c79356b 881 }
6d2010ae 882
1c79356b
A
883 /*
884 * Put the new filesystem on the mount list after root.
885 */
6601e61a 886 if (error == 0) {
2d21ac55
A
887 struct vfs_attr vfsattr;
888#if CONFIG_MACF
889 if (vfs_flags(mp) & MNT_MULTILABEL) {
890 error = VFS_ROOT(mp, &rvp, ctx);
891 if (error) {
892 printf("%s() VFS_ROOT returned %d\n", __func__, error);
893 goto out3;
894 }
2d21ac55 895 error = vnode_label(mp, NULL, rvp, NULL, 0, ctx);
b0d623f7
A
896 /*
897 * drop reference provided by VFS_ROOT
898 */
899 vnode_put(rvp);
900
2d21ac55
A
901 if (error)
902 goto out3;
903 }
904#endif /* MAC */
905
906 vnode_lock_spin(vp);
907 CLR(vp->v_flag, VMOUNT);
91447636
A
908 vp->v_mountedhere = mp;
909 vnode_unlock(vp);
910
2d21ac55
A
911 /*
912 * taking the name_cache_lock exclusively will
913 * insure that everyone is out of the fast path who
914 * might be trying to use a now stale copy of
915 * vp->v_mountedhere->mnt_realrootvp
916 * bumping mount_generation causes the cached values
917 * to be invalidated
918 */
919 name_cache_lock();
920 mount_generation++;
921 name_cache_unlock();
922
b0d623f7
A
923 error = vnode_ref(vp);
924 if (error != 0) {
925 goto out4;
926 }
927
928 have_usecount = TRUE;
91447636 929
2d21ac55 930 error = checkdirs(vp, ctx);
6601e61a
A
931 if (error != 0) {
932 /* Unmount the filesystem as cdir/rdirs cannot be updated */
933 goto out4;
934 }
91447636
A
935 /*
936 * there is no cleanup code here so I have made it void
937 * we need to revisit this
938 */
2d21ac55 939 (void)VFS_START(mp, 0, ctx);
1c79356b 940
6d2010ae
A
941 if (mount_list_add(mp) != 0) {
942 /*
943 * The system is shutting down trying to umount
944 * everything, so fail with a plausible errno.
945 */
946 error = EBUSY;
b0d623f7
A
947 goto out4;
948 }
6601e61a
A
949 lck_rw_done(&mp->mnt_rwlock);
950 is_rwlock_locked = FALSE;
951
2d21ac55
A
952 /* Check if this mounted file system supports EAs or named streams. */
953 /* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */
954 VFSATTR_INIT(&vfsattr);
955 VFSATTR_WANTED(&vfsattr, f_capabilities);
956 if (strncmp(mp->mnt_vfsstat.f_fstypename, "webdav", sizeof("webdav")) != 0 &&
957 vfs_getattr(mp, &vfsattr, ctx) == 0 &&
958 VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) {
959 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR) &&
960 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR)) {
961 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
962 }
963#if NAMEDSTREAMS
964 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS) &&
965 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS)) {
966 mp->mnt_kern_flag |= MNTK_NAMED_STREAMS;
967 }
968#endif
969 /* Check if this file system supports path from id lookups. */
970 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID) &&
971 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID)) {
972 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
973 } else if (mp->mnt_flag & MNT_DOVOLFS) {
974 /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */
975 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
976 }
977 }
978 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSNATIVEXATTR) {
979 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
980 }
981 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSPREFLIGHT) {
982 mp->mnt_kern_flag |= MNTK_UNMOUNT_PREFLIGHT;
983 }
1c79356b 984 /* increment the operations count */
b0d623f7 985 OSAddAtomic(1, &vfs_nummntops);
2d21ac55 986 enablequotas(mp, ctx);
91447636
A
987
988 if (device_vnode) {
989 device_vnode->v_specflags |= SI_MOUNTEDON;
990
991 /*
992 * cache the IO attributes for the underlying physical media...
993 * an error return indicates the underlying driver doesn't
994 * support all the queries necessary... however, reasonable
995 * defaults will have been set, so no reason to bail or care
996 */
997 vfs_init_io_attributes(device_vnode, mp);
998 }
6601e61a
A
999
1000 /* Now that mount is setup, notify the listeners */
6d2010ae 1001 vfs_notify_mount(pvp);
3e170ce0
A
1002 IOBSDMountChange(mp, kIOMountChangeMount);
1003
1c79356b 1004 } else {
6d2010ae
A
1005 /* If we fail a fresh mount, there should be no vnodes left hooked into the mountpoint. */
1006 if (mp->mnt_vnodelist.tqh_first != NULL) {
1007 panic("mount_common(): mount of %s filesystem failed with %d, but vnode list is not empty.",
1008 mp->mnt_vtable->vfc_name, error);
1009 }
1010
2d21ac55 1011 vnode_lock_spin(vp);
1c79356b 1012 CLR(vp->v_flag, VMOUNT);
6601e61a 1013 vnode_unlock(vp);
91447636
A
1014 mount_list_lock();
1015 mp->mnt_vtable->vfc_refcount--;
1016 mount_list_unlock();
55e303ae 1017
91447636 1018 if (device_vnode ) {
91447636 1019 vnode_rele(device_vnode);
b0d623f7 1020 VNOP_CLOSE(device_vnode, ronly ? FREAD : FREAD|FWRITE, ctx);
91447636
A
1021 }
1022 lck_rw_done(&mp->mnt_rwlock);
743b1565 1023 is_rwlock_locked = FALSE;
6d2010ae
A
1024
1025 /*
1026 * if we get here, we have a mount structure that needs to be freed,
1027 * but since the coveredvp hasn't yet been updated to point at it,
1028 * no need to worry about other threads holding a crossref on this mp
1029 * so it's ok to just free it
1030 */
91447636 1031 mount_lock_destroy(mp);
2d21ac55
A
1032#if CONFIG_MACF
1033 mac_mount_label_destroy(mp);
1034#endif
55e303ae 1035 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1c79356b 1036 }
6d2010ae 1037exit:
91447636 1038 /*
6d2010ae 1039 * drop I/O count on the device vp if there was one
91447636
A
1040 */
1041 if (devpath && devvp)
1042 vnode_put(devvp);
b0d623f7 1043
91447636 1044 return(error);
b0d623f7 1045
6d2010ae 1046/* Error condition exits */
6601e61a 1047out4:
2d21ac55 1048 (void)VFS_UNMOUNT(mp, MNT_FORCE, ctx);
6d2010ae
A
1049
1050 /*
1051 * If the mount has been placed on the covered vp,
1052 * it may have been discovered by now, so we have
1053 * to treat this just like an unmount
1054 */
1055 mount_lock_spin(mp);
1056 mp->mnt_lflag |= MNT_LDEAD;
1057 mount_unlock(mp);
1058
6601e61a 1059 if (device_vnode != NULLVP) {
b0d623f7 1060 vnode_rele(device_vnode);
2d21ac55
A
1061 VNOP_CLOSE(device_vnode, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
1062 ctx);
b0d623f7 1063 did_rele = TRUE;
6601e61a 1064 }
6d2010ae 1065
2d21ac55 1066 vnode_lock_spin(vp);
6d2010ae
A
1067
1068 mp->mnt_crossref++;
6601e61a 1069 vp->v_mountedhere = (mount_t) 0;
6d2010ae 1070
6601e61a 1071 vnode_unlock(vp);
6d2010ae 1072
b0d623f7
A
1073 if (have_usecount) {
1074 vnode_rele(vp);
1075 }
91447636 1076out3:
6d2010ae 1077 if (devpath && ((flags & MNT_UPDATE) == 0) && (!did_rele))
2d21ac55 1078 vnode_rele(devvp);
91447636
A
1079out2:
1080 if (devpath && devvp)
1081 vnode_put(devvp);
1082out1:
743b1565
A
1083 /* Release mnt_rwlock only when it was taken */
1084 if (is_rwlock_locked == TRUE) {
1085 lck_rw_done(&mp->mnt_rwlock);
1086 }
6d2010ae 1087
6601e61a 1088 if (mntalloc) {
6d2010ae
A
1089 if (mp->mnt_crossref)
1090 mount_dropcrossref(mp, vp, 0);
1091 else {
1092 mount_lock_destroy(mp);
2d21ac55 1093#if CONFIG_MACF
6d2010ae 1094 mac_mount_label_destroy(mp);
2d21ac55 1095#endif
6d2010ae
A
1096 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1097 }
b0d623f7 1098 }
b0d623f7 1099 if (vfsp_ref) {
6601e61a
A
1100 mount_list_lock();
1101 vfsp->vfc_refcount--;
1102 mount_list_unlock();
6601e61a 1103 }
91447636
A
1104
1105 return(error);
1c79356b
A
1106}
1107
b7266188
A
1108/*
1109 * Flush in-core data, check for competing mount attempts,
1110 * and set VMOUNT
1111 */
6d2010ae
A
1112int
1113prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth)
b7266188 1114{
39236c6e
A
1115#if !CONFIG_MACF
1116#pragma unused(cnp,fsname)
1117#endif
b7266188
A
1118 struct vnode_attr va;
1119 int error;
1120
6d2010ae
A
1121 if (!skip_auth) {
1122 /*
1123 * If the user is not root, ensure that they own the directory
1124 * onto which we are attempting to mount.
1125 */
1126 VATTR_INIT(&va);
1127 VATTR_WANTED(&va, va_uid);
1128 if ((error = vnode_getattr(vp, &va, ctx)) ||
1129 (va.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
1130 (!vfs_context_issuser(ctx)))) {
1131 error = EPERM;
1132 goto out;
1133 }
b7266188
A
1134 }
1135
1136 if ( (error = VNOP_FSYNC(vp, MNT_WAIT, ctx)) )
1137 goto out;
1138
1139 if ( (error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0)) )
1140 goto out;
1141
1142 if (vp->v_type != VDIR) {
1143 error = ENOTDIR;
1144 goto out;
1145 }
1146
1147 if (ISSET(vp->v_flag, VMOUNT) && (vp->v_mountedhere != NULL)) {
1148 error = EBUSY;
1149 goto out;
1150 }
1151
1152#if CONFIG_MACF
1153 error = mac_mount_check_mount(ctx, vp,
1154 cnp, fsname);
1155 if (error != 0)
1156 goto out;
1157#endif
1158
1159 vnode_lock_spin(vp);
1160 SET(vp->v_flag, VMOUNT);
1161 vnode_unlock(vp);
1162
1163out:
1164 return error;
1165}
1166
6d2010ae
A
1167#if CONFIG_IMGSRC_ACCESS
1168
1169#if DEBUG
1170#define IMGSRC_DEBUG(args...) printf(args)
1171#else
1172#define IMGSRC_DEBUG(args...) do { } while(0)
1173#endif
1174
b7266188
A
1175static int
1176authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx)
1177{
1178 struct nameidata nd;
6d2010ae 1179 vnode_t vp, realdevvp;
b7266188
A
1180 mode_t accessmode;
1181 int error;
1182
6d2010ae
A
1183 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW, UIO_USERSPACE, devpath, ctx);
1184 if ( (error = namei(&nd)) ) {
1185 IMGSRC_DEBUG("namei() failed with %d\n", error);
b7266188 1186 return error;
6d2010ae 1187 }
b7266188 1188
b7266188 1189 vp = nd.ni_vp;
b7266188 1190
6d2010ae
A
1191 if (!vnode_isblk(vp)) {
1192 IMGSRC_DEBUG("Not block device.\n");
b7266188
A
1193 error = ENOTBLK;
1194 goto out;
1195 }
6d2010ae
A
1196
1197 realdevvp = mp->mnt_devvp;
1198 if (realdevvp == NULLVP) {
1199 IMGSRC_DEBUG("No device backs the mount.\n");
b7266188
A
1200 error = ENXIO;
1201 goto out;
1202 }
6d2010ae
A
1203
1204 error = vnode_getwithref(realdevvp);
1205 if (error != 0) {
1206 IMGSRC_DEBUG("Coudn't get iocount on device.\n");
1207 goto out;
1208 }
1209
1210 if (vnode_specrdev(vp) != vnode_specrdev(realdevvp)) {
1211 IMGSRC_DEBUG("Wrong dev_t.\n");
1212 error = ENXIO;
1213 goto out1;
1214 }
1215
1216 strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
1217
b7266188
A
1218 /*
1219 * If mount by non-root, then verify that user has necessary
1220 * permissions on the device.
1221 */
1222 if (!vfs_context_issuser(ctx)) {
1223 accessmode = KAUTH_VNODE_READ_DATA;
1224 if ((mp->mnt_flag & MNT_RDONLY) == 0)
1225 accessmode |= KAUTH_VNODE_WRITE_DATA;
6d2010ae
A
1226 if ((error = vnode_authorize(vp, NULL, accessmode, ctx)) != 0) {
1227 IMGSRC_DEBUG("Access denied.\n");
1228 goto out1;
1229 }
b7266188
A
1230 }
1231
1232 *devvpp = vp;
6d2010ae
A
1233
1234out1:
1235 vnode_put(realdevvp);
b7266188 1236out:
6d2010ae 1237 nameidone(&nd);
b7266188
A
1238 if (error) {
1239 vnode_put(vp);
1240 }
1241
1242 return error;
1243}
1244
1245/*
1246 * Clear VMOUNT, set v_mountedhere, and mnt_vnodecovered, ref the vnode,
1247 * and call checkdirs()
1248 */
1249static int
1250place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx)
1251{
1252 int error;
1253
1254 mp->mnt_vnodecovered = vp; /* XXX This is normally only set at init-time ... */
1255
1256 vnode_lock_spin(vp);
1257 CLR(vp->v_flag, VMOUNT);
1258 vp->v_mountedhere = mp;
1259 vnode_unlock(vp);
1260
1261 /*
1262 * taking the name_cache_lock exclusively will
1263 * insure that everyone is out of the fast path who
1264 * might be trying to use a now stale copy of
1265 * vp->v_mountedhere->mnt_realrootvp
1266 * bumping mount_generation causes the cached values
1267 * to be invalidated
1268 */
1269 name_cache_lock();
1270 mount_generation++;
1271 name_cache_unlock();
1272
1273 error = vnode_ref(vp);
1274 if (error != 0) {
1275 goto out;
1276 }
1277
1278 error = checkdirs(vp, ctx);
1279 if (error != 0) {
1280 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1281 vnode_rele(vp);
1282 goto out;
1283 }
1284
1285out:
1286 if (error != 0) {
1287 mp->mnt_vnodecovered = NULLVP;
1288 }
1289 return error;
1290}
1291
1292static void
1293undo_place_on_covered_vp(mount_t mp, vnode_t vp)
1294{
1295 vnode_rele(vp);
1296 vnode_lock_spin(vp);
1297 vp->v_mountedhere = (mount_t)NULL;
1298 vnode_unlock(vp);
1299
1300 mp->mnt_vnodecovered = NULLVP;
1301}
1302
1303static int
1304mount_begin_update(mount_t mp, vfs_context_t ctx, int flags)
1305{
1306 int error;
1307
1308 /* unmount in progress return error */
1309 mount_lock_spin(mp);
1310 if (mp->mnt_lflag & MNT_LUNMOUNT) {
1311 mount_unlock(mp);
1312 return EBUSY;
1313 }
1314 mount_unlock(mp);
1315 lck_rw_lock_exclusive(&mp->mnt_rwlock);
1316
1317 /*
1318 * We only allow the filesystem to be reloaded if it
1319 * is currently mounted read-only.
1320 */
1321 if ((flags & MNT_RELOAD) &&
1322 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
1323 error = ENOTSUP;
1324 goto out;
1325 }
1326
1327 /*
1328 * Only root, or the user that did the original mount is
1329 * permitted to update it.
1330 */
1331 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
1332 (!vfs_context_issuser(ctx))) {
1333 error = EPERM;
1334 goto out;
1335 }
1336#if CONFIG_MACF
1337 error = mac_mount_check_remount(ctx, mp);
1338 if (error != 0) {
1339 goto out;
1340 }
1341#endif
1342
1343out:
1344 if (error) {
1345 lck_rw_done(&mp->mnt_rwlock);
1346 }
1347
1348 return error;
1349}
1350
1351static void
1352mount_end_update(mount_t mp)
1353{
1354 lck_rw_done(&mp->mnt_rwlock);
1355}
1356
1357static int
6d2010ae
A
1358get_imgsrc_rootvnode(uint32_t height, vnode_t *rvpp)
1359{
1360 vnode_t vp;
1361
1362 if (height >= MAX_IMAGEBOOT_NESTING) {
1363 return EINVAL;
1364 }
1365
1366 vp = imgsrc_rootvnodes[height];
1367 if ((vp != NULLVP) && (vnode_get(vp) == 0)) {
1368 *rvpp = vp;
1369 return 0;
1370 } else {
1371 return ENOENT;
1372 }
1373}
1374
1375static int
1376relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp,
b7266188 1377 const char *fsname, vfs_context_t ctx,
6d2010ae 1378 boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index)
b7266188
A
1379{
1380 int error;
1381 mount_t mp;
1382 boolean_t placed = FALSE;
6d2010ae 1383 vnode_t devvp = NULLVP;
b7266188
A
1384 struct vfstable *vfsp;
1385 user_addr_t devpath;
1386 char *old_mntonname;
6d2010ae
A
1387 vnode_t rvp;
1388 uint32_t height;
1389 uint32_t flags;
b7266188
A
1390
1391 /* If we didn't imageboot, nothing to move */
6d2010ae 1392 if (imgsrc_rootvnodes[0] == NULLVP) {
b7266188
A
1393 return EINVAL;
1394 }
1395
1396 /* Only root can do this */
1397 if (!vfs_context_issuser(ctx)) {
1398 return EPERM;
1399 }
1400
6d2010ae
A
1401 IMGSRC_DEBUG("looking for root vnode.\n");
1402
1403 /*
1404 * Get root vnode of filesystem we're moving.
1405 */
1406 if (by_index) {
1407 if (is64bit) {
1408 struct user64_mnt_imgsrc_args mia64;
1409 error = copyin(fsmountargs, &mia64, sizeof(mia64));
1410 if (error != 0) {
1411 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1412 return error;
1413 }
1414
1415 height = mia64.mi_height;
1416 flags = mia64.mi_flags;
1417 devpath = mia64.mi_devpath;
1418 } else {
1419 struct user32_mnt_imgsrc_args mia32;
1420 error = copyin(fsmountargs, &mia32, sizeof(mia32));
1421 if (error != 0) {
1422 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1423 return error;
1424 }
1425
1426 height = mia32.mi_height;
1427 flags = mia32.mi_flags;
1428 devpath = mia32.mi_devpath;
1429 }
1430 } else {
1431 /*
1432 * For binary compatibility--assumes one level of nesting.
1433 */
1434 if (is64bit) {
1435 if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
1436 return error;
1437 } else {
1438 user32_addr_t tmp;
1439 if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
1440 return error;
1441
1442 /* munge into LP64 addr */
1443 devpath = CAST_USER_ADDR_T(tmp);
1444 }
1445
1446 height = 0;
1447 flags = 0;
1448 }
1449
1450 if (flags != 0) {
1451 IMGSRC_DEBUG("%s: Got nonzero flags.\n", __FUNCTION__);
1452 return EINVAL;
1453 }
1454
1455 error = get_imgsrc_rootvnode(height, &rvp);
b7266188 1456 if (error != 0) {
6d2010ae 1457 IMGSRC_DEBUG("getting root vnode failed with %d\n", error);
b7266188
A
1458 return error;
1459 }
1460
6d2010ae
A
1461 IMGSRC_DEBUG("got root vnode.\n");
1462
b7266188
A
1463 MALLOC(old_mntonname, char*, MAXPATHLEN, M_TEMP, M_WAITOK);
1464
1465 /* Can only move once */
6d2010ae 1466 mp = vnode_mount(rvp);
b7266188 1467 if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
6d2010ae 1468 IMGSRC_DEBUG("Already moved.\n");
b7266188
A
1469 error = EBUSY;
1470 goto out0;
1471 }
1472
6d2010ae
A
1473 IMGSRC_DEBUG("Starting updated.\n");
1474
b7266188
A
1475 /* Get exclusive rwlock on mount, authorize update on mp */
1476 error = mount_begin_update(mp , ctx, 0);
1477 if (error != 0) {
6d2010ae 1478 IMGSRC_DEBUG("Starting updated failed with %d\n", error);
b7266188
A
1479 goto out0;
1480 }
1481
1482 /*
1483 * It can only be moved once. Flag is set under the rwlock,
1484 * so we're now safe to proceed.
1485 */
1486 if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
6d2010ae 1487 IMGSRC_DEBUG("Already moved [2]\n");
b7266188
A
1488 goto out1;
1489 }
6d2010ae
A
1490
1491
1492 IMGSRC_DEBUG("Preparing coveredvp.\n");
b7266188
A
1493
1494 /* Mark covered vnode as mount in progress, authorize placing mount on top */
6d2010ae 1495 error = prepare_coveredvp(vp, ctx, cnp, fsname, FALSE);
b7266188 1496 if (error != 0) {
6d2010ae 1497 IMGSRC_DEBUG("Preparing coveredvp failed with %d.\n", error);
b7266188
A
1498 goto out1;
1499 }
1500
6d2010ae
A
1501 IMGSRC_DEBUG("Covered vp OK.\n");
1502
b7266188
A
1503 /* Sanity check the name caller has provided */
1504 vfsp = mp->mnt_vtable;
1505 if (strncmp(vfsp->vfc_name, fsname, MFSNAMELEN) != 0) {
6d2010ae 1506 IMGSRC_DEBUG("Wrong fs name.\n");
b7266188
A
1507 error = EINVAL;
1508 goto out2;
1509 }
1510
1511 /* Check the device vnode and update mount-from name, for local filesystems */
1512 if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
6d2010ae 1513 IMGSRC_DEBUG("Local, doing device validation.\n");
b7266188
A
1514
1515 if (devpath != USER_ADDR_NULL) {
1516 error = authorize_devpath_and_update_mntfromname(mp, devpath, &devvp, ctx);
1517 if (error) {
6d2010ae 1518 IMGSRC_DEBUG("authorize_devpath_and_update_mntfromname() failed.\n");
b7266188
A
1519 goto out2;
1520 }
1521
1522 vnode_put(devvp);
1523 }
1524 }
1525
1526 /*
1527 * Place mp on top of vnode, ref the vnode, call checkdirs(),
1528 * and increment the name cache's mount generation
1529 */
6d2010ae
A
1530
1531 IMGSRC_DEBUG("About to call place_mount_and_checkdirs().\n");
b7266188
A
1532 error = place_mount_and_checkdirs(mp, vp, ctx);
1533 if (error != 0) {
1534 goto out2;
1535 }
1536
1537 placed = TRUE;
1538
3e170ce0
A
1539 strlcpy(old_mntonname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN);
1540 strlcpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
b7266188
A
1541
1542 /* Forbid future moves */
1543 mount_lock(mp);
1544 mp->mnt_kern_flag |= MNTK_HAS_MOVED;
1545 mount_unlock(mp);
1546
1547 /* Finally, add to mount list, completely ready to go */
6d2010ae
A
1548 if (mount_list_add(mp) != 0) {
1549 /*
1550 * The system is shutting down trying to umount
1551 * everything, so fail with a plausible errno.
1552 */
1553 error = EBUSY;
b7266188
A
1554 goto out3;
1555 }
1556
1557 mount_end_update(mp);
6d2010ae 1558 vnode_put(rvp);
b7266188
A
1559 FREE(old_mntonname, M_TEMP);
1560
6d2010ae
A
1561 vfs_notify_mount(pvp);
1562
b7266188
A
1563 return 0;
1564out3:
3e170ce0 1565 strlcpy(mp->mnt_vfsstat.f_mntonname, old_mntonname, MAXPATHLEN);
b7266188
A
1566
1567 mount_lock(mp);
1568 mp->mnt_kern_flag &= ~(MNTK_HAS_MOVED);
1569 mount_unlock(mp);
1570
1571out2:
1572 /*
1573 * Placing the mp on the vnode clears VMOUNT,
1574 * so cleanup is different after that point
1575 */
1576 if (placed) {
1577 /* Rele the vp, clear VMOUNT and v_mountedhere */
1578 undo_place_on_covered_vp(mp, vp);
1579 } else {
1580 vnode_lock_spin(vp);
1581 CLR(vp->v_flag, VMOUNT);
1582 vnode_unlock(vp);
1583 }
1584out1:
1585 mount_end_update(mp);
1586
1587out0:
6d2010ae 1588 vnode_put(rvp);
b7266188
A
1589 FREE(old_mntonname, M_TEMP);
1590 return error;
1591}
1592
1593#endif /* CONFIG_IMGSRC_ACCESS */
1594
91447636 1595void
2d21ac55 1596enablequotas(struct mount *mp, vfs_context_t ctx)
9bccf70c 1597{
9bccf70c
A
1598 struct nameidata qnd;
1599 int type;
1600 char qfpath[MAXPATHLEN];
91447636
A
1601 const char *qfname = QUOTAFILENAME;
1602 const char *qfopsname = QUOTAOPSNAME;
1603 const char *qfextension[] = INITQFNAMES;
9bccf70c 1604
2d21ac55 1605 /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
b0d623f7
A
1606 if (strncmp(mp->mnt_vfsstat.f_fstypename, "hfs", sizeof("hfs")) != 0 ) {
1607 return;
1608 }
9bccf70c
A
1609 /*
1610 * Enable filesystem disk quotas if necessary.
1611 * We ignore errors as this should not interfere with final mount
1612 */
1613 for (type=0; type < MAXQUOTAS; type++) {
2d21ac55 1614 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfopsname, qfextension[type]);
6d2010ae
A
1615 NDINIT(&qnd, LOOKUP, OP_MOUNT, FOLLOW, UIO_SYSSPACE,
1616 CAST_USER_ADDR_T(qfpath), ctx);
91447636
A
1617 if (namei(&qnd) != 0)
1618 continue; /* option file to trigger quotas is not present */
1619 vnode_put(qnd.ni_vp);
1620 nameidone(&qnd);
2d21ac55 1621 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfname, qfextension[type]);
91447636 1622
2d21ac55 1623 (void) VFS_QUOTACTL(mp, QCMD(Q_QUOTAON, type), 0, qfpath, ctx);
9bccf70c
A
1624 }
1625 return;
1626}
1627
2d21ac55
A
1628
1629static int
1630checkdirs_callback(proc_t p, void * arg)
1631{
1632 struct cdirargs * cdrp = (struct cdirargs * )arg;
1633 vnode_t olddp = cdrp->olddp;
1634 vnode_t newdp = cdrp->newdp;
1635 struct filedesc *fdp;
1636 vnode_t tvp;
1637 vnode_t fdp_cvp;
1638 vnode_t fdp_rvp;
1639 int cdir_changed = 0;
1640 int rdir_changed = 0;
1641
1642 /*
1643 * XXX Also needs to iterate each thread in the process to see if it
1644 * XXX is using a per-thread current working directory, and, if so,
1645 * XXX update that as well.
1646 */
1647
1648 proc_fdlock(p);
1649 fdp = p->p_fd;
1650 if (fdp == (struct filedesc *)0) {
1651 proc_fdunlock(p);
1652 return(PROC_RETURNED);
1653 }
1654 fdp_cvp = fdp->fd_cdir;
1655 fdp_rvp = fdp->fd_rdir;
1656 proc_fdunlock(p);
1657
1658 if (fdp_cvp == olddp) {
1659 vnode_ref(newdp);
1660 tvp = fdp->fd_cdir;
1661 fdp_cvp = newdp;
1662 cdir_changed = 1;
1663 vnode_rele(tvp);
1664 }
1665 if (fdp_rvp == olddp) {
1666 vnode_ref(newdp);
1667 tvp = fdp->fd_rdir;
1668 fdp_rvp = newdp;
1669 rdir_changed = 1;
1670 vnode_rele(tvp);
1671 }
1672 if (cdir_changed || rdir_changed) {
1673 proc_fdlock(p);
1674 fdp->fd_cdir = fdp_cvp;
1675 fdp->fd_rdir = fdp_rvp;
1676 proc_fdunlock(p);
1677 }
1678 return(PROC_RETURNED);
1679}
1680
1681
1682
1c79356b
A
1683/*
1684 * Scan all active processes to see if any of them have a current
1685 * or root directory onto which the new filesystem has just been
1686 * mounted. If so, replace them with the new mount point.
1687 */
6601e61a 1688static int
2d21ac55 1689checkdirs(vnode_t olddp, vfs_context_t ctx)
1c79356b 1690{
2d21ac55
A
1691 vnode_t newdp;
1692 vnode_t tvp;
6601e61a 1693 int err;
2d21ac55 1694 struct cdirargs cdr;
1c79356b
A
1695
1696 if (olddp->v_usecount == 1)
6601e61a 1697 return(0);
2d21ac55 1698 err = VFS_ROOT(olddp->v_mountedhere, &newdp, ctx);
2d21ac55
A
1699
1700 if (err != 0) {
6601e61a 1701#if DIAGNOSTIC
2d21ac55 1702 panic("mount: lost mount: error %d", err);
6601e61a
A
1703#endif
1704 return(err);
1705 }
91447636 1706
2d21ac55
A
1707 cdr.olddp = olddp;
1708 cdr.newdp = newdp;
1709 /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
1710 proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS, checkdirs_callback, (void *)&cdr, NULL, NULL);
91447636 1711
1c79356b 1712 if (rootvnode == olddp) {
91447636 1713 vnode_ref(newdp);
fa4905b1 1714 tvp = rootvnode;
1c79356b 1715 rootvnode = newdp;
91447636 1716 vnode_rele(tvp);
1c79356b 1717 }
91447636
A
1718
1719 vnode_put(newdp);
6601e61a 1720 return(0);
1c79356b
A
1721}
1722
1723/*
1724 * Unmount a file system.
1725 *
1726 * Note: unmount takes a path to the vnode mounted on as argument,
1727 * not special file (as before).
1728 */
1c79356b
A
1729/* ARGSUSED */
1730int
b0d623f7 1731unmount(__unused proc_t p, struct unmount_args *uap, __unused int32_t *retval)
1c79356b 1732{
2d21ac55 1733 vnode_t vp;
1c79356b
A
1734 struct mount *mp;
1735 int error;
1736 struct nameidata nd;
2d21ac55 1737 vfs_context_t ctx = vfs_context_current();
91447636 1738
fe8ab488 1739 NDINIT(&nd, LOOKUP, OP_UNMOUNT, FOLLOW | AUDITVNPATH1,
2d21ac55 1740 UIO_USERSPACE, uap->path, ctx);
55e303ae
A
1741 error = namei(&nd);
1742 if (error)
1c79356b
A
1743 return (error);
1744 vp = nd.ni_vp;
1745 mp = vp->v_mount;
91447636 1746 nameidone(&nd);
1c79356b 1747
2d21ac55
A
1748#if CONFIG_MACF
1749 error = mac_mount_check_umount(ctx, mp);
1750 if (error != 0) {
1751 vnode_put(vp);
1752 return (error);
1753 }
1754#endif
55e303ae
A
1755 /*
1756 * Must be the root of the filesystem
1757 */
1758 if ((vp->v_flag & VROOT) == 0) {
91447636 1759 vnode_put(vp);
55e303ae
A
1760 return (EINVAL);
1761 }
6601e61a 1762 mount_ref(mp, 0);
91447636 1763 vnode_put(vp);
6601e61a 1764 /* safedounmount consumes the mount ref */
2d21ac55
A
1765 return (safedounmount(mp, uap->flags, ctx));
1766}
1767
1768int
1769vfs_unmountbyfsid(fsid_t * fsid, int flags, vfs_context_t ctx)
1770{
1771 mount_t mp;
1772
1773 mp = mount_list_lookupby_fsid(fsid, 0, 1);
1774 if (mp == (mount_t)0) {
1775 return(ENOENT);
1776 }
1777 mount_ref(mp, 0);
1778 mount_iterdrop(mp);
1779 /* safedounmount consumes the mount ref */
1780 return(safedounmount(mp, flags, ctx));
55e303ae
A
1781}
1782
2d21ac55 1783
55e303ae 1784/*
6601e61a 1785 * The mount struct comes with a mount ref which will be consumed.
55e303ae
A
1786 * Do the actual file system unmount, prevent some common foot shooting.
1787 */
1788int
2d21ac55 1789safedounmount(struct mount *mp, int flags, vfs_context_t ctx)
55e303ae
A
1790{
1791 int error;
2d21ac55 1792 proc_t p = vfs_context_proc(ctx);
55e303ae 1793
316670eb
A
1794 /*
1795 * If the file system is not responding and MNT_NOBLOCK
1796 * is set and not a forced unmount then return EBUSY.
1797 */
1798 if ((mp->mnt_kern_flag & MNT_LNOTRESP) &&
1799 (flags & MNT_NOBLOCK) && ((flags & MNT_FORCE) == 0)) {
1800 error = EBUSY;
1801 goto out;
1802 }
1803
1c79356b 1804 /*
6d2010ae
A
1805 * Skip authorization if the mount is tagged as permissive and
1806 * this is not a forced-unmount attempt.
1c79356b 1807 */
6d2010ae
A
1808 if (!(((mp->mnt_kern_flag & MNTK_PERMIT_UNMOUNT) != 0) && ((flags & MNT_FORCE) == 0))) {
1809 /*
1810 * Only root, or the user that did the original mount is
1811 * permitted to unmount this filesystem.
1812 */
1813 if ((mp->mnt_vfsstat.f_owner != kauth_cred_getuid(kauth_cred_get())) &&
1814 (error = suser(kauth_cred_get(), &p->p_acflag)))
1815 goto out;
1816 }
1c79356b
A
1817 /*
1818 * Don't allow unmounting the root file system.
1819 */
6601e61a 1820 if (mp->mnt_flag & MNT_ROOTFS) {
2d21ac55 1821 error = EBUSY; /* the root is always busy */
6601e61a
A
1822 goto out;
1823 }
1c79356b 1824
b7266188
A
1825#ifdef CONFIG_IMGSRC_ACCESS
1826 if (mp->mnt_kern_flag & MNTK_BACKS_ROOT) {
1827 error = EBUSY;
1828 goto out;
1829 }
1830#endif /* CONFIG_IMGSRC_ACCESS */
1831
2d21ac55
A
1832 return (dounmount(mp, flags, 1, ctx));
1833
6601e61a
A
1834out:
1835 mount_drop(mp, 0);
1836 return(error);
1c79356b
A
1837}
1838
1839/*
1840 * Do the actual file system unmount.
1841 */
1842int
2d21ac55 1843dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx)
1c79356b 1844{
2d21ac55 1845 vnode_t coveredvp = (vnode_t)0;
1c79356b 1846 int error;
91447636 1847 int needwakeup = 0;
91447636
A
1848 int forcedunmount = 0;
1849 int lflags = 0;
593a1d5f 1850 struct vnode *devvp = NULLVP;
6d2010ae 1851#if CONFIG_TRIGGERS
39236c6e 1852 proc_t p = vfs_context_proc(ctx);
6d2010ae 1853 int did_vflush = 0;
39236c6e 1854 int pflags_save = 0;
6d2010ae 1855#endif /* CONFIG_TRIGGERS */
91447636 1856
91447636 1857 mount_lock(mp);
fe8ab488
A
1858
1859 /*
1860 * If already an unmount in progress just return EBUSY.
1861 * Even a forced unmount cannot override.
1862 */
91447636 1863 if (mp->mnt_lflag & MNT_LUNMOUNT) {
fe8ab488 1864 if (withref != 0)
6601e61a 1865 mount_drop(mp, 1);
fe8ab488 1866 mount_unlock(mp);
9bccf70c
A
1867 return (EBUSY);
1868 }
39236c6e 1869
fe8ab488
A
1870 if (flags & MNT_FORCE) {
1871 forcedunmount = 1;
1872 mp->mnt_lflag |= MNT_LFORCE;
1873 }
1874
39236c6e
A
1875#if CONFIG_TRIGGERS
1876 if (flags & MNT_NOBLOCK && p != kernproc)
1877 pflags_save = OSBitOrAtomic(P_NOREMOTEHANG, &p->p_flag);
1878#endif
1879
1c79356b 1880 mp->mnt_kern_flag |= MNTK_UNMOUNT;
91447636
A
1881 mp->mnt_lflag |= MNT_LUNMOUNT;
1882 mp->mnt_flag &=~ MNT_ASYNC;
2d21ac55
A
1883 /*
1884 * anyone currently in the fast path that
1885 * trips over the cached rootvp will be
1886 * dumped out and forced into the slow path
1887 * to regenerate a new cached value
1888 */
1889 mp->mnt_realrootvp = NULLVP;
91447636 1890 mount_unlock(mp);
2d21ac55 1891
fe8ab488
A
1892 if (forcedunmount && (flags & MNT_LNOSUB) == 0) {
1893 /*
1894 * Force unmount any mounts in this filesystem.
1895 * If any unmounts fail - just leave them dangling.
1896 * Avoids recursion.
1897 */
1898 (void) dounmount_submounts(mp, flags | MNT_LNOSUB, ctx);
1899 }
1900
2d21ac55
A
1901 /*
1902 * taking the name_cache_lock exclusively will
1903 * insure that everyone is out of the fast path who
1904 * might be trying to use a now stale copy of
1905 * vp->v_mountedhere->mnt_realrootvp
1906 * bumping mount_generation causes the cached values
1907 * to be invalidated
1908 */
1909 name_cache_lock();
1910 mount_generation++;
1911 name_cache_unlock();
1912
1913
91447636 1914 lck_rw_lock_exclusive(&mp->mnt_rwlock);
6601e61a
A
1915 if (withref != 0)
1916 mount_drop(mp, 0);
2d21ac55 1917#if CONFIG_FSE
91447636 1918 fsevent_unmount(mp); /* has to come first! */
2d21ac55 1919#endif
91447636
A
1920 error = 0;
1921 if (forcedunmount == 0) {
1922 ubc_umount(mp); /* release cached vnodes */
1923 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
2d21ac55 1924 error = VFS_SYNC(mp, MNT_WAIT, ctx);
91447636
A
1925 if (error) {
1926 mount_lock(mp);
1927 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1928 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1929 mp->mnt_lflag &= ~MNT_LFORCE;
1930 goto out;
1931 }
1932 }
1933 }
6d2010ae 1934
3e170ce0
A
1935 IOBSDMountChange(mp, kIOMountChangeUnmount);
1936
6d2010ae
A
1937#if CONFIG_TRIGGERS
1938 vfs_nested_trigger_unmounts(mp, flags, ctx);
1939 did_vflush = 1;
1940#endif
91447636
A
1941 if (forcedunmount)
1942 lflags |= FORCECLOSE;
1943 error = vflush(mp, NULLVP, SKIPSWAP | SKIPSYSTEM | SKIPROOT | lflags);
1944 if ((forcedunmount == 0) && error) {
1945 mount_lock(mp);
9bccf70c 1946 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
91447636
A
1947 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1948 mp->mnt_lflag &= ~MNT_LFORCE;
9bccf70c
A
1949 goto out;
1950 }
91447636
A
1951
1952 /* make sure there are no one in the mount iterations or lookup */
1953 mount_iterdrain(mp);
1954
2d21ac55 1955 error = VFS_UNMOUNT(mp, flags, ctx);
1c79356b 1956 if (error) {
91447636
A
1957 mount_iterreset(mp);
1958 mount_lock(mp);
1c79356b 1959 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
91447636
A
1960 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1961 mp->mnt_lflag &= ~MNT_LFORCE;
1c79356b
A
1962 goto out;
1963 }
1964
1965 /* increment the operations count */
1966 if (!error)
b0d623f7 1967 OSAddAtomic(1, &vfs_nummntops);
91447636
A
1968
1969 if ( mp->mnt_devvp && mp->mnt_vtable->vfc_vfsflags & VFC_VFSLOCALARGS) {
593a1d5f
A
1970 /* hold an io reference and drop the usecount before close */
1971 devvp = mp->mnt_devvp;
593a1d5f
A
1972 vnode_getalways(devvp);
1973 vnode_rele(devvp);
1974 VNOP_CLOSE(devvp, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
2d21ac55 1975 ctx);
b0d623f7 1976 vnode_clearmountedon(devvp);
593a1d5f 1977 vnode_put(devvp);
91447636
A
1978 }
1979 lck_rw_done(&mp->mnt_rwlock);
1980 mount_list_remove(mp);
1981 lck_rw_lock_exclusive(&mp->mnt_rwlock);
6d2010ae 1982
91447636 1983 /* mark the mount point hook in the vp but not drop the ref yet */
1c79356b 1984 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
fe8ab488
A
1985 /*
1986 * The covered vnode needs special handling. Trying to get an
1987 * iocount must not block here as this may lead to deadlocks
1988 * if the Filesystem to which the covered vnode belongs is
1989 * undergoing forced unmounts. Since we hold a usecount, the
1990 * vnode cannot be reused (it can, however, still be terminated)
1991 */
1992 vnode_getalways(coveredvp);
6d2010ae
A
1993 vnode_lock_spin(coveredvp);
1994
1995 mp->mnt_crossref++;
1996 coveredvp->v_mountedhere = (struct mount *)0;
fe8ab488 1997 CLR(coveredvp->v_flag, VMOUNT);
6d2010ae
A
1998
1999 vnode_unlock(coveredvp);
2000 vnode_put(coveredvp);
1c79356b 2001 }
91447636
A
2002
2003 mount_list_lock();
2004 mp->mnt_vtable->vfc_refcount--;
2005 mount_list_unlock();
2006
2007 cache_purgevfs(mp); /* remove cache entries for this file sys */
2008 vfs_event_signal(NULL, VQ_UNMOUNT, (intptr_t)NULL);
2009 mount_lock(mp);
2010 mp->mnt_lflag |= MNT_LDEAD;
2011
2012 if (mp->mnt_lflag & MNT_LWAIT) {
2013 /*
2014 * do the wakeup here
2015 * in case we block in mount_refdrain
2016 * which will drop the mount lock
2017 * and allow anyone blocked in vfs_busy
2018 * to wakeup and see the LDEAD state
2019 */
2020 mp->mnt_lflag &= ~MNT_LWAIT;
2021 wakeup((caddr_t)mp);
1c79356b 2022 }
91447636 2023 mount_refdrain(mp);
1c79356b 2024out:
91447636
A
2025 if (mp->mnt_lflag & MNT_LWAIT) {
2026 mp->mnt_lflag &= ~MNT_LWAIT;
2027 needwakeup = 1;
2028 }
6d2010ae 2029
6d2010ae 2030#if CONFIG_TRIGGERS
39236c6e
A
2031 if (flags & MNT_NOBLOCK && p != kernproc) {
2032 // Restore P_NOREMOTEHANG bit to its previous value
2033 if ((pflags_save & P_NOREMOTEHANG) == 0)
2034 OSBitAndAtomic(~((uint32_t) P_NOREMOTEHANG), &p->p_flag);
2035 }
2036
6d2010ae
A
2037 /*
2038 * Callback and context are set together under the mount lock, and
2039 * never cleared, so we're safe to examine them here, drop the lock,
2040 * and call out.
2041 */
2042 if (mp->mnt_triggercallback != NULL) {
2043 mount_unlock(mp);
2044 if (error == 0) {
2045 mp->mnt_triggercallback(mp, VTC_RELEASE, mp->mnt_triggerdata, ctx);
2046 } else if (did_vflush) {
2047 mp->mnt_triggercallback(mp, VTC_REPLACE, mp->mnt_triggerdata, ctx);
2048 }
2049 } else {
2050 mount_unlock(mp);
2051 }
2052#else
91447636 2053 mount_unlock(mp);
6d2010ae
A
2054#endif /* CONFIG_TRIGGERS */
2055
91447636
A
2056 lck_rw_done(&mp->mnt_rwlock);
2057
2058 if (needwakeup)
1c79356b 2059 wakeup((caddr_t)mp);
6d2010ae 2060
55e303ae 2061 if (!error) {
91447636 2062 if ((coveredvp != NULLVP)) {
fe8ab488 2063 vnode_t pvp = NULLVP;
b0d623f7 2064
fe8ab488
A
2065 /*
2066 * The covered vnode needs special handling. Trying to
2067 * get an iocount must not block here as this may lead
2068 * to deadlocks if the Filesystem to which the covered
2069 * vnode belongs is undergoing forced unmounts. Since we
2070 * hold a usecount, the vnode cannot be reused
2071 * (it can, however, still be terminated).
2072 */
2073 vnode_getalways(coveredvp);
6d2010ae
A
2074
2075 mount_dropcrossref(mp, coveredvp, 0);
fe8ab488
A
2076 /*
2077 * We'll _try_ to detect if this really needs to be
2078 * done. The coveredvp can only be in termination (or
2079 * terminated) if the coveredvp's mount point is in a
2080 * forced unmount (or has been) since we still hold the
2081 * ref.
2082 */
2083 if (!vnode_isrecycled(coveredvp)) {
2084 pvp = vnode_getparent(coveredvp);
6d2010ae 2085#if CONFIG_TRIGGERS
fe8ab488
A
2086 if (coveredvp->v_resolve) {
2087 vnode_trigger_rearm(coveredvp, ctx);
2088 }
2089#endif
2090 }
2091
2092 vnode_rele(coveredvp);
91447636 2093 vnode_put(coveredvp);
fe8ab488 2094 coveredvp = NULLVP;
b0d623f7
A
2095
2096 if (pvp) {
2097 lock_vnode_and_post(pvp, NOTE_WRITE);
2098 vnode_put(pvp);
2099 }
91447636
A
2100 } else if (mp->mnt_flag & MNT_ROOTFS) {
2101 mount_lock_destroy(mp);
2d21ac55
A
2102#if CONFIG_MACF
2103 mac_mount_label_destroy(mp);
2104#endif
91447636
A
2105 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
2106 } else
2107 panic("dounmount: no coveredvp");
55e303ae 2108 }
1c79356b
A
2109 return (error);
2110}
2111
fe8ab488
A
2112/*
2113 * Unmount any mounts in this filesystem.
2114 */
2115void
2116dounmount_submounts(struct mount *mp, int flags, vfs_context_t ctx)
2117{
2118 mount_t smp;
2119 fsid_t *fsids, fsid;
2120 int fsids_sz;
2121 int count = 0, i, m = 0;
2122 vnode_t vp;
2123
2124 mount_list_lock();
2125
2126 // Get an array to hold the submounts fsids.
2127 TAILQ_FOREACH(smp, &mountlist, mnt_list)
2128 count++;
2129 fsids_sz = count * sizeof(fsid_t);
2130 MALLOC(fsids, fsid_t *, fsids_sz, M_TEMP, M_NOWAIT);
2131 if (fsids == NULL) {
2132 mount_list_unlock();
2133 goto out;
2134 }
2135 fsids[0] = mp->mnt_vfsstat.f_fsid; // Prime the pump
2136
2137 /*
2138 * Fill the array with submount fsids.
2139 * Since mounts are always added to the tail of the mount list, the
2140 * list is always in mount order.
2141 * For each mount check if the mounted-on vnode belongs to a
2142 * mount that's already added to our array of mounts to be unmounted.
2143 */
2144 for (smp = TAILQ_NEXT(mp, mnt_list); smp; smp = TAILQ_NEXT(smp, mnt_list)) {
2145 vp = smp->mnt_vnodecovered;
2146 if (vp == NULL)
2147 continue;
2148 fsid = vnode_mount(vp)->mnt_vfsstat.f_fsid; // Underlying fsid
2149 for (i = 0; i <= m; i++) {
2150 if (fsids[i].val[0] == fsid.val[0] &&
2151 fsids[i].val[1] == fsid.val[1]) {
2152 fsids[++m] = smp->mnt_vfsstat.f_fsid;
2153 break;
2154 }
2155 }
2156 }
2157 mount_list_unlock();
2158
2159 // Unmount the submounts in reverse order. Ignore errors.
2160 for (i = m; i > 0; i--) {
2161 smp = mount_list_lookupby_fsid(&fsids[i], 0, 1);
2162 if (smp) {
2163 mount_ref(smp, 0);
2164 mount_iterdrop(smp);
2165 (void) dounmount(smp, flags, 1, ctx);
2166 }
2167 }
2168out:
2169 if (fsids)
2170 FREE(fsids, M_TEMP);
2171}
2172
91447636
A
2173void
2174mount_dropcrossref(mount_t mp, vnode_t dp, int need_put)
2175{
6d2010ae
A
2176 vnode_lock(dp);
2177 mp->mnt_crossref--;
2178
2179 if (mp->mnt_crossref < 0)
2180 panic("mount cross refs -ve");
2181
2182 if ((mp != dp->v_mountedhere) && (mp->mnt_crossref == 0)) {
2183
91447636 2184 if (need_put)
6d2010ae 2185 vnode_put_locked(dp);
91447636 2186 vnode_unlock(dp);
6d2010ae
A
2187
2188 mount_lock_destroy(mp);
2189#if CONFIG_MACF
2190 mac_mount_label_destroy(mp);
2191#endif
2192 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
2193 return;
2194 }
2195 if (need_put)
2196 vnode_put_locked(dp);
2197 vnode_unlock(dp);
91447636
A
2198}
2199
2200
1c79356b
A
2201/*
2202 * Sync each mounted filesystem.
2203 */
2204#if DIAGNOSTIC
2205int syncprt = 0;
1c79356b
A
2206#endif
2207
1c79356b 2208int print_vmpage_stat=0;
fe8ab488 2209int sync_timeout = 60; // Sync time limit (sec)
1c79356b 2210
91447636 2211static int
fe8ab488 2212sync_callback(mount_t mp, __unused void *arg)
1c79356b 2213{
91447636 2214 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
fe8ab488
A
2215 int asyncflag = mp->mnt_flag & MNT_ASYNC;
2216
2217 mp->mnt_flag &= ~MNT_ASYNC;
2218 VFS_SYNC(mp, arg ? MNT_WAIT : MNT_NOWAIT, vfs_context_kernel());
2219 if (asyncflag)
2220 mp->mnt_flag |= MNT_ASYNC;
1c79356b 2221 }
1c79356b 2222
fe8ab488
A
2223 return (VFS_RETURNED);
2224}
91447636 2225
91447636
A
2226/* ARGSUSED */
2227int
b0d623f7 2228sync(__unused proc_t p, __unused struct sync_args *uap, __unused int32_t *retval)
91447636 2229{
fe8ab488 2230 vfs_iterate(LK_NOWAIT, sync_callback, NULL);
b0d623f7 2231
fe8ab488
A
2232 if (print_vmpage_stat) {
2233 vm_countdirtypages();
2234 }
2235
2236#if DIAGNOSTIC
2237 if (syncprt)
2238 vfs_bufstats();
2239#endif /* DIAGNOSTIC */
2240 return 0;
2241}
2242
2243static void
2244sync_thread(void *arg, __unused wait_result_t wr)
2245{
2246 int *timeout = (int *) arg;
2247
2248 vfs_iterate(LK_NOWAIT, sync_callback, NULL);
2249
2250 if (timeout)
2251 wakeup((caddr_t) timeout);
2252 if (print_vmpage_stat) {
1c79356b 2253 vm_countdirtypages();
1c79356b 2254 }
39236c6e 2255
1c79356b
A
2256#if DIAGNOSTIC
2257 if (syncprt)
2258 vfs_bufstats();
2259#endif /* DIAGNOSTIC */
1c79356b
A
2260}
2261
2262/*
fe8ab488 2263 * Sync in a separate thread so we can time out if it blocks.
1c79356b 2264 */
fe8ab488
A
2265static int
2266sync_async(int timeout)
2d21ac55 2267{
fe8ab488 2268 thread_t thd;
2d21ac55 2269 int error;
fe8ab488
A
2270 struct timespec ts = {timeout, 0};
2271
2272 lck_mtx_lock(sync_mtx_lck);
2273 if (kernel_thread_start(sync_thread, &timeout, &thd) != KERN_SUCCESS) {
2274 printf("sync_thread failed\n");
2275 lck_mtx_unlock(sync_mtx_lck);
2276 return (0);
2277 }
2278
2279 error = msleep((caddr_t) &timeout, sync_mtx_lck, (PVFS | PDROP | PCATCH), "sync_thread", &ts);
2280 if (error) {
2281 printf("sync timed out: %d sec\n", timeout);
2282 }
2283 thread_deallocate(thd);
2284
2285 return (0);
2d21ac55
A
2286}
2287
fe8ab488
A
2288/*
2289 * An in-kernel sync for power management to call.
2290 */
2291__private_extern__ int
2292sync_internal(void)
2293{
2294 (void) sync_async(sync_timeout);
2295
2296 return 0;
2297} /* end of sync_internal call */
2298
2299/*
2300 * Change filesystem quotas.
2301 */
2302#if QUOTA
2303int
2304quotactl(proc_t p, struct quotactl_args *uap, __unused int32_t *retval)
1c79356b 2305{
2d21ac55 2306 struct mount *mp;
91447636
A
2307 int error, quota_cmd, quota_status;
2308 caddr_t datap;
2309 size_t fnamelen;
1c79356b 2310 struct nameidata nd;
2d21ac55 2311 vfs_context_t ctx = vfs_context_current();
91447636
A
2312 struct dqblk my_dqblk;
2313
b0d623f7 2314 AUDIT_ARG(uid, uap->uid);
55e303ae 2315 AUDIT_ARG(cmd, uap->cmd);
6d2010ae
A
2316 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
2317 uap->path, ctx);
55e303ae
A
2318 error = namei(&nd);
2319 if (error)
1c79356b
A
2320 return (error);
2321 mp = nd.ni_vp->v_mount;
91447636
A
2322 vnode_put(nd.ni_vp);
2323 nameidone(&nd);
2324
2325 /* copyin any data we will need for downstream code */
2326 quota_cmd = uap->cmd >> SUBCMDSHIFT;
2327
2328 switch (quota_cmd) {
2329 case Q_QUOTAON:
2330 /* uap->arg specifies a file from which to take the quotas */
2331 fnamelen = MAXPATHLEN;
2332 datap = kalloc(MAXPATHLEN);
2333 error = copyinstr(uap->arg, datap, MAXPATHLEN, &fnamelen);
2334 break;
2335 case Q_GETQUOTA:
2336 /* uap->arg is a pointer to a dqblk structure. */
2337 datap = (caddr_t) &my_dqblk;
2338 break;
2339 case Q_SETQUOTA:
2340 case Q_SETUSE:
2341 /* uap->arg is a pointer to a dqblk structure. */
2342 datap = (caddr_t) &my_dqblk;
2343 if (proc_is64bit(p)) {
2344 struct user_dqblk my_dqblk64;
2345 error = copyin(uap->arg, (caddr_t)&my_dqblk64, sizeof (my_dqblk64));
2346 if (error == 0) {
2347 munge_dqblk(&my_dqblk, &my_dqblk64, FALSE);
2348 }
2349 }
2350 else {
2351 error = copyin(uap->arg, (caddr_t)&my_dqblk, sizeof (my_dqblk));
2352 }
2353 break;
2354 case Q_QUOTASTAT:
2355 /* uap->arg is a pointer to an integer */
2356 datap = (caddr_t) &quota_status;
2357 break;
2358 default:
2359 datap = NULL;
2360 break;
2361 } /* switch */
2362
2363 if (error == 0) {
2d21ac55 2364 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, datap, ctx);
91447636
A
2365 }
2366
2367 switch (quota_cmd) {
2368 case Q_QUOTAON:
2369 if (datap != NULL)
2370 kfree(datap, MAXPATHLEN);
2371 break;
2372 case Q_GETQUOTA:
2373 /* uap->arg is a pointer to a dqblk structure we need to copy out to */
2374 if (error == 0) {
2375 if (proc_is64bit(p)) {
fe8ab488 2376 struct user_dqblk my_dqblk64 = {.dqb_bhardlimit = 0};
91447636
A
2377 munge_dqblk(&my_dqblk, &my_dqblk64, TRUE);
2378 error = copyout((caddr_t)&my_dqblk64, uap->arg, sizeof (my_dqblk64));
2379 }
2380 else {
2381 error = copyout(datap, uap->arg, sizeof (struct dqblk));
2382 }
2383 }
2384 break;
2385 case Q_QUOTASTAT:
2386 /* uap->arg is a pointer to an integer */
2387 if (error == 0) {
2388 error = copyout(datap, uap->arg, sizeof(quota_status));
2389 }
2390 break;
2391 default:
2392 break;
2393 } /* switch */
2394
2395 return (error);
1c79356b 2396}
2d21ac55
A
2397#else
2398int
b0d623f7 2399quotactl(__unused proc_t p, __unused struct quotactl_args *uap, __unused int32_t *retval)
2d21ac55
A
2400{
2401 return (EOPNOTSUPP);
2402}
2403#endif /* QUOTA */
1c79356b
A
2404
2405/*
2406 * Get filesystem statistics.
2d21ac55
A
2407 *
2408 * Returns: 0 Success
2409 * namei:???
2410 * vfs_update_vfsstat:???
2411 * munge_statfs:EFAULT
1c79356b 2412 */
1c79356b
A
2413/* ARGSUSED */
2414int
b0d623f7 2415statfs(__unused proc_t p, struct statfs_args *uap, __unused int32_t *retval)
1c79356b 2416{
91447636
A
2417 struct mount *mp;
2418 struct vfsstatfs *sp;
1c79356b
A
2419 int error;
2420 struct nameidata nd;
2d21ac55 2421 vfs_context_t ctx = vfs_context_current();
91447636 2422 vnode_t vp;
1c79356b 2423
fe8ab488 2424 NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1,
2d21ac55 2425 UIO_USERSPACE, uap->path, ctx);
55e303ae
A
2426 error = namei(&nd);
2427 if (error)
1c79356b 2428 return (error);
91447636
A
2429 vp = nd.ni_vp;
2430 mp = vp->v_mount;
2431 sp = &mp->mnt_vfsstat;
2432 nameidone(&nd);
2433
2d21ac55 2434 error = vfs_update_vfsstat(mp, ctx, VFS_USER_EVENT);
39236c6e
A
2435 if (error != 0) {
2436 vnode_put(vp);
1c79356b 2437 return (error);
39236c6e 2438 }
91447636
A
2439
2440 error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
39236c6e 2441 vnode_put(vp);
91447636 2442 return (error);
1c79356b
A
2443}
2444
2445/*
2446 * Get filesystem statistics.
2447 */
1c79356b
A
2448/* ARGSUSED */
2449int
b0d623f7 2450fstatfs(__unused proc_t p, struct fstatfs_args *uap, __unused int32_t *retval)
1c79356b 2451{
2d21ac55 2452 vnode_t vp;
1c79356b 2453 struct mount *mp;
91447636 2454 struct vfsstatfs *sp;
1c79356b
A
2455 int error;
2456
55e303ae
A
2457 AUDIT_ARG(fd, uap->fd);
2458
91447636 2459 if ( (error = file_vnode(uap->fd, &vp)) )
1c79356b 2460 return (error);
55e303ae 2461
d1ecb069
A
2462 error = vnode_getwithref(vp);
2463 if (error) {
2464 file_drop(uap->fd);
2465 return (error);
2466 }
2467
91447636 2468 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
55e303ae 2469
91447636
A
2470 mp = vp->v_mount;
2471 if (!mp) {
d1ecb069
A
2472 error = EBADF;
2473 goto out;
91447636
A
2474 }
2475 sp = &mp->mnt_vfsstat;
2d21ac55 2476 if ((error = vfs_update_vfsstat(mp,vfs_context_current(),VFS_USER_EVENT)) != 0) {
d1ecb069 2477 goto out;
91447636 2478 }
91447636
A
2479
2480 error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
2481
d1ecb069
A
2482out:
2483 file_drop(uap->fd);
2484 vnode_put(vp);
2485
91447636 2486 return (error);
1c79356b
A
2487}
2488
2d21ac55
A
2489/*
2490 * Common routine to handle copying of statfs64 data to user space
2491 */
2492static int
2493statfs64_common(struct mount *mp, struct vfsstatfs *sfsp, user_addr_t bufp)
2494{
2495 int error;
2496 struct statfs64 sfs;
2497
2498 bzero(&sfs, sizeof(sfs));
2499
2500 sfs.f_bsize = sfsp->f_bsize;
2501 sfs.f_iosize = (int32_t)sfsp->f_iosize;
2502 sfs.f_blocks = sfsp->f_blocks;
2503 sfs.f_bfree = sfsp->f_bfree;
2504 sfs.f_bavail = sfsp->f_bavail;
2505 sfs.f_files = sfsp->f_files;
2506 sfs.f_ffree = sfsp->f_ffree;
2507 sfs.f_fsid = sfsp->f_fsid;
2508 sfs.f_owner = sfsp->f_owner;
2509 sfs.f_type = mp->mnt_vtable->vfc_typenum;
2510 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
2511 sfs.f_fssubtype = sfsp->f_fssubtype;
6d2010ae
A
2512 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
2513 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN);
2514 } else {
2515 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSTYPENAMELEN);
2516 }
2d21ac55
A
2517 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MAXPATHLEN);
2518 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MAXPATHLEN);
2519
2520 error = copyout((caddr_t)&sfs, bufp, sizeof(sfs));
2521
2522 return(error);
2523}
2524
2525/*
2526 * Get file system statistics in 64-bit mode
2527 */
2528int
b0d623f7 2529statfs64(__unused struct proc *p, struct statfs64_args *uap, __unused int32_t *retval)
2d21ac55
A
2530{
2531 struct mount *mp;
2532 struct vfsstatfs *sp;
2533 int error;
2534 struct nameidata nd;
2535 vfs_context_t ctxp = vfs_context_current();
2536 vnode_t vp;
2537
fe8ab488 2538 NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1,
2d21ac55
A
2539 UIO_USERSPACE, uap->path, ctxp);
2540 error = namei(&nd);
2541 if (error)
2542 return (error);
2543 vp = nd.ni_vp;
2544 mp = vp->v_mount;
2545 sp = &mp->mnt_vfsstat;
2546 nameidone(&nd);
2547
2548 error = vfs_update_vfsstat(mp, ctxp, VFS_USER_EVENT);
39236c6e
A
2549 if (error != 0) {
2550 vnode_put(vp);
2d21ac55 2551 return (error);
39236c6e 2552 }
2d21ac55
A
2553
2554 error = statfs64_common(mp, sp, uap->buf);
39236c6e 2555 vnode_put(vp);
2d21ac55
A
2556
2557 return (error);
2558}
2559
2560/*
2561 * Get file system statistics in 64-bit mode
2562 */
2563int
b0d623f7 2564fstatfs64(__unused struct proc *p, struct fstatfs64_args *uap, __unused int32_t *retval)
2d21ac55
A
2565{
2566 struct vnode *vp;
2567 struct mount *mp;
2568 struct vfsstatfs *sp;
2569 int error;
2570
2571 AUDIT_ARG(fd, uap->fd);
2572
2573 if ( (error = file_vnode(uap->fd, &vp)) )
2574 return (error);
2575
d1ecb069
A
2576 error = vnode_getwithref(vp);
2577 if (error) {
2578 file_drop(uap->fd);
2579 return (error);
2580 }
2581
2d21ac55
A
2582 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2583
2584 mp = vp->v_mount;
2585 if (!mp) {
316670eb 2586 error = EBADF;
d1ecb069 2587 goto out;
2d21ac55
A
2588 }
2589 sp = &mp->mnt_vfsstat;
2590 if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
d1ecb069 2591 goto out;
2d21ac55 2592 }
2d21ac55
A
2593
2594 error = statfs64_common(mp, sp, uap->buf);
2595
d1ecb069
A
2596out:
2597 file_drop(uap->fd);
2598 vnode_put(vp);
2599
2d21ac55
A
2600 return (error);
2601}
91447636
A
2602
2603struct getfsstat_struct {
2604 user_addr_t sfsp;
2d21ac55 2605 user_addr_t *mp;
91447636
A
2606 int count;
2607 int maxcount;
2608 int flags;
2609 int error;
1c79356b 2610};
1c79356b 2611
91447636
A
2612
2613static int
2614getfsstat_callback(mount_t mp, void * arg)
2615{
2616
2617 struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
2618 struct vfsstatfs *sp;
91447636 2619 int error, my_size;
2d21ac55 2620 vfs_context_t ctx = vfs_context_current();
91447636
A
2621
2622 if (fstp->sfsp && fstp->count < fstp->maxcount) {
2623 sp = &mp->mnt_vfsstat;
2624 /*
2625 * If MNT_NOWAIT is specified, do not refresh the
b0d623f7 2626 * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT.
91447636 2627 */
b0d623f7 2628 if (((fstp->flags & MNT_NOWAIT) == 0 || (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
2d21ac55
A
2629 (error = vfs_update_vfsstat(mp, ctx,
2630 VFS_USER_EVENT))) {
91447636
A
2631 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
2632 return(VFS_RETURNED);
1c79356b 2633 }
91447636
A
2634
2635 /*
2636 * Need to handle LP64 version of struct statfs
2637 */
2d21ac55 2638 error = munge_statfs(mp, sp, fstp->sfsp, &my_size, IS_64BIT_PROCESS(vfs_context_proc(ctx)), FALSE);
91447636
A
2639 if (error) {
2640 fstp->error = error;
2641 return(VFS_RETURNED_DONE);
1c79356b 2642 }
91447636 2643 fstp->sfsp += my_size;
2d21ac55
A
2644
2645 if (fstp->mp) {
39236c6e 2646#if CONFIG_MACF
2d21ac55
A
2647 error = mac_mount_label_get(mp, *fstp->mp);
2648 if (error) {
2649 fstp->error = error;
2650 return(VFS_RETURNED_DONE);
2651 }
39236c6e 2652#endif
2d21ac55
A
2653 fstp->mp++;
2654 }
2655 }
91447636
A
2656 fstp->count++;
2657 return(VFS_RETURNED);
2658}
2659
2660/*
2661 * Get statistics on all filesystems.
2662 */
2663int
2664getfsstat(__unused proc_t p, struct getfsstat_args *uap, int *retval)
2d21ac55
A
2665{
2666 struct __mac_getfsstat_args muap;
2667
2668 muap.buf = uap->buf;
2669 muap.bufsize = uap->bufsize;
2670 muap.mac = USER_ADDR_NULL;
2671 muap.macsize = 0;
2672 muap.flags = uap->flags;
2673
2674 return (__mac_getfsstat(p, &muap, retval));
2675}
2676
b0d623f7
A
2677/*
2678 * __mac_getfsstat: Get MAC-related file system statistics
2679 *
2680 * Parameters: p (ignored)
2681 * uap User argument descriptor (see below)
2682 * retval Count of file system statistics (N stats)
2683 *
2684 * Indirect: uap->bufsize Buffer size
2685 * uap->macsize MAC info size
2686 * uap->buf Buffer where information will be returned
2687 * uap->mac MAC info
2688 * uap->flags File system flags
2689 *
2690 *
2691 * Returns: 0 Success
2692 * !0 Not success
2693 *
2694 */
2d21ac55
A
2695int
2696__mac_getfsstat(__unused proc_t p, struct __mac_getfsstat_args *uap, int *retval)
91447636
A
2697{
2698 user_addr_t sfsp;
2d21ac55 2699 user_addr_t *mp;
b0d623f7 2700 size_t count, maxcount, bufsize, macsize;
91447636
A
2701 struct getfsstat_struct fst;
2702
b0d623f7
A
2703 bufsize = (size_t) uap->bufsize;
2704 macsize = (size_t) uap->macsize;
2705
91447636 2706 if (IS_64BIT_PROCESS(p)) {
b0d623f7 2707 maxcount = bufsize / sizeof(struct user64_statfs);
91447636
A
2708 }
2709 else {
b0d623f7 2710 maxcount = bufsize / sizeof(struct user32_statfs);
91447636
A
2711 }
2712 sfsp = uap->buf;
2713 count = 0;
2714
2d21ac55
A
2715 mp = NULL;
2716
2717#if CONFIG_MACF
2718 if (uap->mac != USER_ADDR_NULL) {
2719 u_int32_t *mp0;
2720 int error;
b0d623f7 2721 unsigned int i;
2d21ac55 2722
b0d623f7 2723 count = (macsize / (IS_64BIT_PROCESS(p) ? 8 : 4));
2d21ac55
A
2724 if (count != maxcount)
2725 return (EINVAL);
2726
2727 /* Copy in the array */
b0d623f7
A
2728 MALLOC(mp0, u_int32_t *, macsize, M_MACTEMP, M_WAITOK);
2729 if (mp0 == NULL) {
2730 return (ENOMEM);
2731 }
2732
2733 error = copyin(uap->mac, mp0, macsize);
2734 if (error) {
2735 FREE(mp0, M_MACTEMP);
2d21ac55 2736 return (error);
b0d623f7 2737 }
2d21ac55
A
2738
2739 /* Normalize to an array of user_addr_t */
2740 MALLOC(mp, user_addr_t *, count * sizeof(user_addr_t), M_MACTEMP, M_WAITOK);
b0d623f7
A
2741 if (mp == NULL) {
2742 FREE(mp0, M_MACTEMP);
2743 return (ENOMEM);
2744 }
2745
2d21ac55
A
2746 for (i = 0; i < count; i++) {
2747 if (IS_64BIT_PROCESS(p))
2748 mp[i] = ((user_addr_t *)mp0)[i];
2749 else
2750 mp[i] = (user_addr_t)mp0[i];
2751 }
2752 FREE(mp0, M_MACTEMP);
2753 }
2754#endif
2755
2756
91447636 2757 fst.sfsp = sfsp;
2d21ac55 2758 fst.mp = mp;
91447636
A
2759 fst.flags = uap->flags;
2760 fst.count = 0;
2761 fst.error = 0;
2762 fst.maxcount = maxcount;
2763
2764
2765 vfs_iterate(0, getfsstat_callback, &fst);
2766
2d21ac55
A
2767 if (mp)
2768 FREE(mp, M_MACTEMP);
2769
91447636
A
2770 if (fst.error ) {
2771 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
2772 return(fst.error);
2773 }
2774
2775 if (fst.sfsp && fst.count > fst.maxcount)
2776 *retval = fst.maxcount;
1c79356b 2777 else
91447636 2778 *retval = fst.count;
1c79356b
A
2779 return (0);
2780}
2781
2d21ac55
A
2782static int
2783getfsstat64_callback(mount_t mp, void * arg)
2784{
2785 struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
2786 struct vfsstatfs *sp;
2787 int error;
2788
2789 if (fstp->sfsp && fstp->count < fstp->maxcount) {
2790 sp = &mp->mnt_vfsstat;
2791 /*
b0d623f7
A
2792 * If MNT_NOWAIT is specified, do not refresh the fsstat
2793 * cache. MNT_WAIT overrides MNT_NOWAIT.
2794 *
2795 * We treat MNT_DWAIT as MNT_WAIT for all instances of
2796 * getfsstat, since the constants are out of the same
2797 * namespace.
2d21ac55 2798 */
b0d623f7
A
2799 if (((fstp->flags & MNT_NOWAIT) == 0 ||
2800 (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
2d21ac55
A
2801 (error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT))) {
2802 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
2803 return(VFS_RETURNED);
2804 }
2805
2806 error = statfs64_common(mp, sp, fstp->sfsp);
2807 if (error) {
2808 fstp->error = error;
2809 return(VFS_RETURNED_DONE);
2810 }
2811 fstp->sfsp += sizeof(struct statfs64);
2812 }
2813 fstp->count++;
2814 return(VFS_RETURNED);
2815}
2816
2817/*
2818 * Get statistics on all file systems in 64 bit mode.
2819 */
2820int
2821getfsstat64(__unused proc_t p, struct getfsstat64_args *uap, int *retval)
2822{
2823 user_addr_t sfsp;
2824 int count, maxcount;
2825 struct getfsstat_struct fst;
2826
2827 maxcount = uap->bufsize / sizeof(struct statfs64);
2828
2829 sfsp = uap->buf;
2830 count = 0;
2831
2832 fst.sfsp = sfsp;
2833 fst.flags = uap->flags;
2834 fst.count = 0;
2835 fst.error = 0;
2836 fst.maxcount = maxcount;
2837
2838 vfs_iterate(0, getfsstat64_callback, &fst);
2839
2840 if (fst.error ) {
2841 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
2842 return(fst.error);
2843 }
2844
2845 if (fst.sfsp && fst.count > fst.maxcount)
2846 *retval = fst.maxcount;
2847 else
2848 *retval = fst.count;
2849
2850 return (0);
2851}
2852
fe8ab488
A
2853/*
2854 * gets the associated vnode with the file descriptor passed.
2855 * as input
2856 *
2857 * INPUT
2858 * ctx - vfs context of caller
2859 * fd - file descriptor for which vnode is required.
2860 * vpp - Pointer to pointer to vnode to be returned.
2861 *
2862 * The vnode is returned with an iocount so any vnode obtained
2863 * by this call needs a vnode_put
2864 *
2865 */
2866static int
2867vnode_getfromfd(vfs_context_t ctx, int fd, vnode_t *vpp)
2868{
2869 int error;
2870 vnode_t vp;
2871 struct fileproc *fp;
2872 proc_t p = vfs_context_proc(ctx);
2873
2874 *vpp = NULLVP;
2875
2876 error = fp_getfvp(p, fd, &fp, &vp);
2877 if (error)
2878 return (error);
2879
2880 error = vnode_getwithref(vp);
2881 if (error) {
2882 (void)fp_drop(p, fd, fp, 0);
2883 return (error);
2884 }
2885
2886 (void)fp_drop(p, fd, fp, 0);
2887 *vpp = vp;
2888 return (error);
2889}
2890
2891/*
2892 * Wrapper function around namei to start lookup from a directory
2893 * specified by a file descriptor ni_dirfd.
2894 *
2895 * In addition to all the errors returned by namei, this call can
2896 * return ENOTDIR if the file descriptor does not refer to a directory.
2897 * and EBADF if the file descriptor is not valid.
2898 */
2899int
2900nameiat(struct nameidata *ndp, int dirfd)
2901{
2902 if ((dirfd != AT_FDCWD) &&
2903 !(ndp->ni_flag & NAMEI_CONTLOOKUP) &&
2904 !(ndp->ni_cnd.cn_flags & USEDVP)) {
2905 int error = 0;
2906 char c;
2907
2908 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
2909 error = copyin(ndp->ni_dirp, &c, sizeof(char));
2910 if (error)
2911 return (error);
2912 } else {
2913 c = *((char *)(ndp->ni_dirp));
2914 }
2915
2916 if (c != '/') {
2917 vnode_t dvp_at;
2918
2919 error = vnode_getfromfd(ndp->ni_cnd.cn_context, dirfd,
2920 &dvp_at);
2921 if (error)
2922 return (error);
2923
2924 if (vnode_vtype(dvp_at) != VDIR) {
2925 vnode_put(dvp_at);
2926 return (ENOTDIR);
2927 }
2928
2929 ndp->ni_dvp = dvp_at;
2930 ndp->ni_cnd.cn_flags |= USEDVP;
2931 error = namei(ndp);
2932 ndp->ni_cnd.cn_flags &= ~USEDVP;
2933 vnode_put(dvp_at);
2934 return (error);
2935 }
2936 }
2937
2938 return (namei(ndp));
2939}
2940
1c79356b
A
2941/*
2942 * Change current working directory to a given file descriptor.
2943 */
1c79356b 2944/* ARGSUSED */
2d21ac55
A
2945static int
2946common_fchdir(proc_t p, struct fchdir_args *uap, int per_thread)
1c79356b 2947{
2d21ac55
A
2948 struct filedesc *fdp = p->p_fd;
2949 vnode_t vp;
2950 vnode_t tdp;
2951 vnode_t tvp;
1c79356b 2952 struct mount *mp;
1c79356b 2953 int error;
2d21ac55 2954 vfs_context_t ctx = vfs_context_current();
1c79356b 2955
b0d623f7 2956 AUDIT_ARG(fd, uap->fd);
2d21ac55
A
2957 if (per_thread && uap->fd == -1) {
2958 /*
2959 * Switching back from per-thread to per process CWD; verify we
2960 * in fact have one before proceeding. The only success case
2961 * for this code path is to return 0 preemptively after zapping
2962 * the thread structure contents.
2963 */
2964 thread_t th = vfs_context_thread(ctx);
2965 if (th) {
2966 uthread_t uth = get_bsdthread_info(th);
2967 tvp = uth->uu_cdir;
2968 uth->uu_cdir = NULLVP;
2969 if (tvp != NULLVP) {
2970 vnode_rele(tvp);
2971 return (0);
2972 }
2973 }
2974 return (EBADF);
2975 }
91447636
A
2976
2977 if ( (error = file_vnode(uap->fd, &vp)) )
2978 return(error);
2979 if ( (error = vnode_getwithref(vp)) ) {
2980 file_drop(uap->fd);
2981 return(error);
2982 }
55e303ae
A
2983
2984 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
2985
2d21ac55 2986 if (vp->v_type != VDIR) {
1c79356b 2987 error = ENOTDIR;
2d21ac55
A
2988 goto out;
2989 }
2990
2991#if CONFIG_MACF
2992 error = mac_vnode_check_chdir(ctx, vp);
2993 if (error)
2994 goto out;
2995#endif
2996 error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
2997 if (error)
2998 goto out;
2999
1c79356b 3000 while (!error && (mp = vp->v_mountedhere) != NULL) {
91447636
A
3001 if (vfs_busy(mp, LK_NOWAIT)) {
3002 error = EACCES;
3003 goto out;
55e303ae 3004 }
2d21ac55 3005 error = VFS_ROOT(mp, &tdp, ctx);
91447636 3006 vfs_unbusy(mp);
1c79356b
A
3007 if (error)
3008 break;
91447636 3009 vnode_put(vp);
1c79356b
A
3010 vp = tdp;
3011 }
91447636
A
3012 if (error)
3013 goto out;
3014 if ( (error = vnode_ref(vp)) )
3015 goto out;
3016 vnode_put(vp);
3017
2d21ac55
A
3018 if (per_thread) {
3019 thread_t th = vfs_context_thread(ctx);
3020 if (th) {
3021 uthread_t uth = get_bsdthread_info(th);
3022 tvp = uth->uu_cdir;
3023 uth->uu_cdir = vp;
b0d623f7 3024 OSBitOrAtomic(P_THCWD, &p->p_flag);
2d21ac55
A
3025 } else {
3026 vnode_rele(vp);
3027 return (ENOENT);
3028 }
3029 } else {
3030 proc_fdlock(p);
3031 tvp = fdp->fd_cdir;
3032 fdp->fd_cdir = vp;
3033 proc_fdunlock(p);
3034 }
91447636
A
3035
3036 if (tvp)
3037 vnode_rele(tvp);
3038 file_drop(uap->fd);
3039
1c79356b 3040 return (0);
91447636
A
3041out:
3042 vnode_put(vp);
3043 file_drop(uap->fd);
3044
3045 return(error);
1c79356b
A
3046}
3047
2d21ac55 3048int
b0d623f7 3049fchdir(proc_t p, struct fchdir_args *uap, __unused int32_t *retval)
2d21ac55
A
3050{
3051 return common_fchdir(p, uap, 0);
3052}
3053
3054int
b0d623f7 3055__pthread_fchdir(proc_t p, struct __pthread_fchdir_args *uap, __unused int32_t *retval)
2d21ac55
A
3056{
3057 return common_fchdir(p, (void *)uap, 1);
3058}
3059
1c79356b 3060/*
b0d623f7 3061 * Change current working directory (".").
2d21ac55
A
3062 *
3063 * Returns: 0 Success
3064 * change_dir:ENOTDIR
3065 * change_dir:???
3066 * vnode_ref:ENOENT No such file or directory
1c79356b 3067 */
1c79356b 3068/* ARGSUSED */
2d21ac55
A
3069static int
3070common_chdir(proc_t p, struct chdir_args *uap, int per_thread)
1c79356b 3071{
2d21ac55 3072 struct filedesc *fdp = p->p_fd;
1c79356b
A
3073 int error;
3074 struct nameidata nd;
2d21ac55
A
3075 vnode_t tvp;
3076 vfs_context_t ctx = vfs_context_current();
91447636 3077
6d2010ae 3078 NDINIT(&nd, LOOKUP, OP_CHDIR, FOLLOW | AUDITVNPATH1,
2d21ac55
A
3079 UIO_USERSPACE, uap->path, ctx);
3080 error = change_dir(&nd, ctx);
55e303ae 3081 if (error)
1c79356b 3082 return (error);
91447636
A
3083 if ( (error = vnode_ref(nd.ni_vp)) ) {
3084 vnode_put(nd.ni_vp);
3085 return (error);
3086 }
3087 /*
3088 * drop the iocount we picked up in change_dir
3089 */
3090 vnode_put(nd.ni_vp);
3091
2d21ac55
A
3092 if (per_thread) {
3093 thread_t th = vfs_context_thread(ctx);
3094 if (th) {
3095 uthread_t uth = get_bsdthread_info(th);
3096 tvp = uth->uu_cdir;
3097 uth->uu_cdir = nd.ni_vp;
b0d623f7 3098 OSBitOrAtomic(P_THCWD, &p->p_flag);
2d21ac55
A
3099 } else {
3100 vnode_rele(nd.ni_vp);
3101 return (ENOENT);
3102 }
3103 } else {
3104 proc_fdlock(p);
3105 tvp = fdp->fd_cdir;
3106 fdp->fd_cdir = nd.ni_vp;
3107 proc_fdunlock(p);
3108 }
91447636
A
3109
3110 if (tvp)
3111 vnode_rele(tvp);
3112
1c79356b
A
3113 return (0);
3114}
3115
b0d623f7
A
3116
3117/*
3118 * chdir
3119 *
3120 * Change current working directory (".") for the entire process
3121 *
3122 * Parameters: p Process requesting the call
3123 * uap User argument descriptor (see below)
3124 * retval (ignored)
3125 *
3126 * Indirect parameters: uap->path Directory path
3127 *
3128 * Returns: 0 Success
3129 * common_chdir: ENOTDIR
3130 * common_chdir: ENOENT No such file or directory
3131 * common_chdir: ???
3132 *
3133 */
2d21ac55 3134int
b0d623f7 3135chdir(proc_t p, struct chdir_args *uap, __unused int32_t *retval)
2d21ac55
A
3136{
3137 return common_chdir(p, (void *)uap, 0);
3138}
3139
b0d623f7
A
3140/*
3141 * __pthread_chdir
3142 *
3143 * Change current working directory (".") for a single thread
3144 *
3145 * Parameters: p Process requesting the call
3146 * uap User argument descriptor (see below)
3147 * retval (ignored)
3148 *
3149 * Indirect parameters: uap->path Directory path
3150 *
3151 * Returns: 0 Success
3152 * common_chdir: ENOTDIR
3153 * common_chdir: ENOENT No such file or directory
3154 * common_chdir: ???
3155 *
3156 */
2d21ac55 3157int
b0d623f7 3158__pthread_chdir(proc_t p, struct __pthread_chdir_args *uap, __unused int32_t *retval)
2d21ac55
A
3159{
3160 return common_chdir(p, (void *)uap, 1);
3161}
3162
3163
1c79356b
A
3164/*
3165 * Change notion of root (``/'') directory.
3166 */
1c79356b
A
3167/* ARGSUSED */
3168int
b0d623f7 3169chroot(proc_t p, struct chroot_args *uap, __unused int32_t *retval)
1c79356b 3170{
2d21ac55 3171 struct filedesc *fdp = p->p_fd;
1c79356b
A
3172 int error;
3173 struct nameidata nd;
2d21ac55
A
3174 vnode_t tvp;
3175 vfs_context_t ctx = vfs_context_current();
1c79356b 3176
91447636 3177 if ((error = suser(kauth_cred_get(), &p->p_acflag)))
1c79356b
A
3178 return (error);
3179
6d2010ae 3180 NDINIT(&nd, LOOKUP, OP_CHROOT, FOLLOW | AUDITVNPATH1,
2d21ac55
A
3181 UIO_USERSPACE, uap->path, ctx);
3182 error = change_dir(&nd, ctx);
55e303ae 3183 if (error)
1c79356b
A
3184 return (error);
3185
2d21ac55
A
3186#if CONFIG_MACF
3187 error = mac_vnode_check_chroot(ctx, nd.ni_vp,
3188 &nd.ni_cnd);
3189 if (error) {
91447636
A
3190 vnode_put(nd.ni_vp);
3191 return (error);
3192 }
2d21ac55
A
3193#endif
3194
91447636
A
3195 if ( (error = vnode_ref(nd.ni_vp)) ) {
3196 vnode_put(nd.ni_vp);
1c79356b
A
3197 return (error);
3198 }
91447636 3199 vnode_put(nd.ni_vp);
1c79356b 3200
91447636 3201 proc_fdlock(p);
fa4905b1 3202 tvp = fdp->fd_rdir;
1c79356b 3203 fdp->fd_rdir = nd.ni_vp;
91447636
A
3204 fdp->fd_flags |= FD_CHROOT;
3205 proc_fdunlock(p);
3206
fa4905b1 3207 if (tvp != NULL)
91447636
A
3208 vnode_rele(tvp);
3209
1c79356b
A
3210 return (0);
3211}
3212
3213/*
3214 * Common routine for chroot and chdir.
2d21ac55
A
3215 *
3216 * Returns: 0 Success
3217 * ENOTDIR Not a directory
3218 * namei:??? [anything namei can return]
3219 * vnode_authorize:??? [anything vnode_authorize can return]
1c79356b
A
3220 */
3221static int
91447636 3222change_dir(struct nameidata *ndp, vfs_context_t ctx)
1c79356b 3223{
2d21ac55 3224 vnode_t vp;
1c79356b
A
3225 int error;
3226
91447636 3227 if ((error = namei(ndp)))
1c79356b 3228 return (error);
91447636 3229 nameidone(ndp);
1c79356b 3230 vp = ndp->ni_vp;
2d21ac55
A
3231
3232 if (vp->v_type != VDIR) {
91447636 3233 vnode_put(vp);
2d21ac55
A
3234 return (ENOTDIR);
3235 }
3236
3237#if CONFIG_MACF
3238 error = mac_vnode_check_chdir(ctx, vp);
3239 if (error) {
3240 vnode_put(vp);
3241 return (error);
3242 }
3243#endif
3244
3245 error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
3246 if (error) {
3247 vnode_put(vp);
3248 return (error);
3249 }
91447636 3250
1c79356b
A
3251 return (error);
3252}
3253
fe8ab488
A
3254/*
3255 * Free the vnode data (for directories) associated with the file glob.
3256 */
3257struct fd_vn_data *
3258fg_vn_data_alloc(void)
3259{
3260 struct fd_vn_data *fvdata;
3261
3262 /* Allocate per fd vnode data */
3263 MALLOC(fvdata, struct fd_vn_data *, (sizeof(struct fd_vn_data)),
3264 M_FD_VN_DATA, M_WAITOK | M_ZERO);
3265 lck_mtx_init(&fvdata->fv_lock, fd_vn_lck_grp, fd_vn_lck_attr);
3266 return fvdata;
3267}
3268
3269/*
3270 * Free the vnode data (for directories) associated with the file glob.
3271 */
3272void
3273fg_vn_data_free(void *fgvndata)
3274{
3275 struct fd_vn_data *fvdata = (struct fd_vn_data *)fgvndata;
3276
3277 if (fvdata->fv_buf)
3278 FREE(fvdata->fv_buf, M_FD_DIRBUF);
3279 lck_mtx_destroy(&fvdata->fv_lock, fd_vn_lck_grp);
3280 FREE(fvdata, M_FD_VN_DATA);
3281}
3282
1c79356b
A
3283/*
3284 * Check permissions, allocate an open file structure,
3285 * and call the device open routine if any.
2d21ac55
A
3286 *
3287 * Returns: 0 Success
3288 * EINVAL
3289 * EINTR
3290 * falloc:ENFILE
3291 * falloc:EMFILE
3292 * falloc:ENOMEM
3293 * vn_open_auth:???
3294 * dupfdopen:???
3295 * VNOP_ADVLOCK:???
3296 * vnode_setsize:???
b0d623f7
A
3297 *
3298 * XXX Need to implement uid, gid
1c79356b 3299 */
2d21ac55 3300int
39236c6e
A
3301open1(vfs_context_t ctx, struct nameidata *ndp, int uflags,
3302 struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra,
3303 int32_t *retval)
1c79356b 3304{
2d21ac55
A
3305 proc_t p = vfs_context_proc(ctx);
3306 uthread_t uu = get_bsdthread_info(vfs_context_thread(ctx));
2d21ac55
A
3307 struct fileproc *fp;
3308 vnode_t vp;
91447636 3309 int flags, oflags;
1c79356b
A
3310 int type, indx, error;
3311 struct flock lf;
3e170ce0 3312 struct vfs_context context;
ccc36f2f 3313
91447636 3314 oflags = uflags;
ccc36f2f
A
3315
3316 if ((oflags & O_ACCMODE) == O_ACCMODE)
3317 return(EINVAL);
3e170ce0 3318
91447636 3319 flags = FFLAGS(uflags);
3e170ce0
A
3320 CLR(flags, FENCRYPTED);
3321 CLR(flags, FUNENCRYPTED);
91447636
A
3322
3323 AUDIT_ARG(fflags, oflags);
3324 AUDIT_ARG(mode, vap->va_mode);
3325
39236c6e
A
3326 if ((error = falloc_withalloc(p,
3327 &fp, &indx, ctx, fp_zalloc, cra)) != 0) {
1c79356b 3328 return (error);
91447636 3329 }
2d21ac55 3330 uu->uu_dupfd = -indx - 1;
91447636 3331
2d21ac55
A
3332 if ((error = vn_open_auth(ndp, &flags, vap))) {
3333 if ((error == ENODEV || error == ENXIO) && (uu->uu_dupfd >= 0)){ /* XXX from fdopen */
39236c6e 3334 if ((error = dupfdopen(p->p_fd, indx, uu->uu_dupfd, flags, error)) == 0) {
2d21ac55 3335 fp_drop(p, indx, NULL, 0);
91447636
A
3336 *retval = indx;
3337 return (0);
3338 }
1c79356b
A
3339 }
3340 if (error == ERESTART)
91447636
A
3341 error = EINTR;
3342 fp_free(p, indx, fp);
1c79356b
A
3343 return (error);
3344 }
2d21ac55
A
3345 uu->uu_dupfd = 0;
3346 vp = ndp->ni_vp;
55e303ae 3347
3e170ce0 3348 fp->f_fglob->fg_flag = flags & (FMASK | O_EVTONLY | FENCRYPTED | FUNENCRYPTED);
91447636
A
3349 fp->f_fglob->fg_ops = &vnops;
3350 fp->f_fglob->fg_data = (caddr_t)vp;
3351
1c79356b
A
3352 if (flags & (O_EXLOCK | O_SHLOCK)) {
3353 lf.l_whence = SEEK_SET;
3354 lf.l_start = 0;
3355 lf.l_len = 0;
3356 if (flags & O_EXLOCK)
3357 lf.l_type = F_WRLCK;
3358 else
3359 lf.l_type = F_RDLCK;
3360 type = F_FLOCK;
3361 if ((flags & FNONBLOCK) == 0)
3362 type |= F_WAIT;
2d21ac55
A
3363#if CONFIG_MACF
3364 error = mac_file_check_lock(vfs_context_ucred(ctx), fp->f_fglob,
3365 F_SETLK, &lf);
3366 if (error)
3367 goto bad;
3368#endif
39236c6e 3369 if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx, NULL)))
55e303ae 3370 goto bad;
91447636 3371 fp->f_fglob->fg_flag |= FHASLOCK;
1c79356b 3372 }
55e303ae 3373
91447636
A
3374 /* try to truncate by setting the size attribute */
3375 if ((flags & O_TRUNC) && ((error = vnode_setsize(vp, (off_t)0, 0, ctx)) != 0))
3376 goto bad;
55e303ae 3377
fe8ab488
A
3378 /*
3379 * For directories we hold some additional information in the fd.
3380 */
3381 if (vnode_vtype(vp) == VDIR) {
3382 fp->f_fglob->fg_vn_data = fg_vn_data_alloc();
3383 } else {
3384 fp->f_fglob->fg_vn_data = NULL;
2d21ac55
A
3385 }
3386
91447636 3387 vnode_put(vp);
55e303ae 3388
3e170ce0
A
3389 /*
3390 * The first terminal open (without a O_NOCTTY) by a session leader
3391 * results in it being set as the controlling terminal.
3392 */
3393 if (vnode_istty(vp) && !(p->p_flag & P_CONTROLT) &&
3394 !(flags & O_NOCTTY)) {
3395 int tmp = 0;
3396
3397 (void)(*fp->f_fglob->fg_ops->fo_ioctl)(fp, (int)TIOCSCTTY,
3398 (caddr_t)&tmp, ctx);
3399 }
3400
91447636 3401 proc_fdlock(p);
6d2010ae
A
3402 if (flags & O_CLOEXEC)
3403 *fdflags(p, indx) |= UF_EXCLOSE;
39236c6e
A
3404 if (flags & O_CLOFORK)
3405 *fdflags(p, indx) |= UF_FORKCLOSE;
6601e61a 3406 procfdtbl_releasefd(p, indx, NULL);
91447636
A
3407 fp_drop(p, indx, fp, 1);
3408 proc_fdunlock(p);
3409
1c79356b 3410 *retval = indx;
91447636 3411
1c79356b 3412 return (0);
55e303ae 3413bad:
3e170ce0 3414 context = *vfs_context_current();
2d21ac55 3415 context.vc_ucred = fp->f_fglob->fg_cred;
fe8ab488
A
3416
3417 if ((fp->f_fglob->fg_flag & FHASLOCK) &&
3418 (FILEGLOB_DTYPE(fp->f_fglob) == DTYPE_VNODE)) {
3419 lf.l_whence = SEEK_SET;
3420 lf.l_start = 0;
3421 lf.l_len = 0;
3422 lf.l_type = F_UNLCK;
3423
3424 (void)VNOP_ADVLOCK(
3425 vp, (caddr_t)fp->f_fglob, F_UNLCK, &lf, F_FLOCK, ctx, NULL);
3426 }
2d21ac55
A
3427
3428 vn_close(vp, fp->f_fglob->fg_flag, &context);
91447636
A
3429 vnode_put(vp);
3430 fp_free(p, indx, fp);
3431
55e303ae 3432 return (error);
1c79356b
A
3433}
3434
fe8ab488
A
3435/*
3436 * While most of the *at syscall handlers can call nameiat() which
3437 * is a wrapper around namei, the use of namei and initialisation
3438 * of nameidata are far removed and in different functions - namei
3439 * gets called in vn_open_auth for open1. So we'll just do here what
3440 * nameiat() does.
3441 */
3442static int
3443open1at(vfs_context_t ctx, struct nameidata *ndp, int uflags,
3444 struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra, int32_t *retval,
3445 int dirfd)
3446{
3447 if ((dirfd != AT_FDCWD) && !(ndp->ni_cnd.cn_flags & USEDVP)) {
3448 int error;
3449 char c;
3450
3451 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
3452 error = copyin(ndp->ni_dirp, &c, sizeof(char));
3453 if (error)
3454 return (error);
3455 } else {
3456 c = *((char *)(ndp->ni_dirp));
3457 }
3458
3459 if (c != '/') {
3460 vnode_t dvp_at;
3461
3462 error = vnode_getfromfd(ndp->ni_cnd.cn_context, dirfd,
3463 &dvp_at);
3464 if (error)
3465 return (error);
3466
3467 if (vnode_vtype(dvp_at) != VDIR) {
3468 vnode_put(dvp_at);
3469 return (ENOTDIR);
3470 }
3471
3472 ndp->ni_dvp = dvp_at;
3473 ndp->ni_cnd.cn_flags |= USEDVP;
3474 error = open1(ctx, ndp, uflags, vap, fp_zalloc, cra,
3475 retval);
3476 vnode_put(dvp_at);
3477 return (error);
3478 }
3479 }
3480
3481 return (open1(ctx, ndp, uflags, vap, fp_zalloc, cra, retval));
3482}
3483
0c530ab8 3484/*
b0d623f7 3485 * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)).
0c530ab8
A
3486 *
3487 * Parameters: p Process requesting the open
3488 * uap User argument descriptor (see below)
3489 * retval Pointer to an area to receive the
3490 * return calue from the system call
3491 *
3492 * Indirect: uap->path Path to open (same as 'open')
3493 * uap->flags Flags to open (same as 'open'
3494 * uap->uid UID to set, if creating
3495 * uap->gid GID to set, if creating
3496 * uap->mode File mode, if creating (same as 'open')
3497 * uap->xsecurity ACL to set, if creating
3498 *
3499 * Returns: 0 Success
3500 * !0 errno value
3501 *
3502 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
3503 *
3504 * XXX: We should enummerate the possible errno values here, and where
3505 * in the code they originated.
3506 */
1c79356b 3507int
b0d623f7 3508open_extended(proc_t p, struct open_extended_args *uap, int32_t *retval)
91447636 3509{
2d21ac55 3510 struct filedesc *fdp = p->p_fd;
91447636
A
3511 int ciferror;
3512 kauth_filesec_t xsecdst;
3513 struct vnode_attr va;
2d21ac55 3514 struct nameidata nd;
91447636
A
3515 int cmode;
3516
b0d623f7
A
3517 AUDIT_ARG(owner, uap->uid, uap->gid);
3518
91447636
A
3519 xsecdst = NULL;
3520 if ((uap->xsecurity != USER_ADDR_NULL) &&
3521 ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
3522 return ciferror;
3523
91447636
A
3524 VATTR_INIT(&va);
3525 cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3526 VATTR_SET(&va, va_mode, cmode);
3527 if (uap->uid != KAUTH_UID_NONE)
3528 VATTR_SET(&va, va_uid, uap->uid);
3529 if (uap->gid != KAUTH_GID_NONE)
3530 VATTR_SET(&va, va_gid, uap->gid);
3531 if (xsecdst != NULL)
3532 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
3533
6d2010ae
A
3534 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
3535 uap->path, vfs_context_current());
2d21ac55 3536
39236c6e
A
3537 ciferror = open1(vfs_context_current(), &nd, uap->flags, &va,
3538 fileproc_alloc_init, NULL, retval);
91447636
A
3539 if (xsecdst != NULL)
3540 kauth_filesec_free(xsecdst);
3541
3542 return ciferror;
3543}
3544
316670eb
A
3545/*
3546 * Go through the data-protected atomically controlled open (2)
3547 *
3548 * int open_dprotected_np(user_addr_t path, int flags, int class, int dpflags, int mode)
3549 */
3550int open_dprotected_np (__unused proc_t p, struct open_dprotected_np_args *uap, int32_t *retval) {
3551 int flags = uap->flags;
3552 int class = uap->class;
3553 int dpflags = uap->dpflags;
3554
3555 /*
3556 * Follow the same path as normal open(2)
3557 * Look up the item if it exists, and acquire the vnode.
3558 */
3559 struct filedesc *fdp = p->p_fd;
3560 struct vnode_attr va;
3561 struct nameidata nd;
3562 int cmode;
3563 int error;
3564
3565 VATTR_INIT(&va);
3566 /* Mask off all but regular access permissions */
3567 cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3568 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
3569
3570 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
3571 uap->path, vfs_context_current());
3572
3573 /*
3574 * Initialize the extra fields in vnode_attr to pass down our
3575 * extra fields.
3576 * 1. target cprotect class.
3577 * 2. set a flag to mark it as requiring open-raw-encrypted semantics.
3578 */
3579 if (flags & O_CREAT) {
3e170ce0
A
3580 /* lower level kernel code validates that the class is valid before applying it. */
3581 if (class != PROTECTION_CLASS_DEFAULT) {
3582 /*
3583 * PROTECTION_CLASS_DEFAULT implies that we make the class for this
3584 * file behave the same as open (2)
3585 */
3586 VATTR_SET(&va, va_dataprotect_class, class);
3587 }
316670eb
A
3588 }
3589
3e170ce0 3590 if (dpflags & (O_DP_GETRAWENCRYPTED|O_DP_GETRAWUNENCRYPTED)) {
316670eb
A
3591 if ( flags & (O_RDWR | O_WRONLY)) {
3592 /* Not allowed to write raw encrypted bytes */
3593 return EINVAL;
3594 }
3e170ce0
A
3595 if (uap->dpflags & O_DP_GETRAWENCRYPTED) {
3596 VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED);
3597 }
3598 if (uap->dpflags & O_DP_GETRAWUNENCRYPTED) {
3599 VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWUNENCRYPTED);
3600 }
316670eb
A
3601 }
3602
39236c6e
A
3603 error = open1(vfs_context_current(), &nd, uap->flags, &va,
3604 fileproc_alloc_init, NULL, retval);
316670eb
A
3605
3606 return error;
3607}
3608
fe8ab488
A
3609static int
3610openat_internal(vfs_context_t ctx, user_addr_t path, int flags, int mode,
3611 int fd, enum uio_seg segflg, int *retval)
2d21ac55 3612{
fe8ab488 3613 struct filedesc *fdp = (vfs_context_proc(ctx))->p_fd;
91447636 3614 struct vnode_attr va;
2d21ac55 3615 struct nameidata nd;
91447636 3616 int cmode;
1c79356b 3617
91447636
A
3618 VATTR_INIT(&va);
3619 /* Mask off all but regular access permissions */
fe8ab488 3620 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
91447636
A
3621 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
3622
fe8ab488
A
3623 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1,
3624 segflg, path, ctx);
2d21ac55 3625
fe8ab488
A
3626 return (open1at(ctx, &nd, flags, &va, fileproc_alloc_init, NULL,
3627 retval, fd));
1c79356b 3628}
91447636 3629
fe8ab488
A
3630int
3631open(proc_t p, struct open_args *uap, int32_t *retval)
3632{
3633 __pthread_testcancel(1);
3634 return(open_nocancel(p, (struct open_nocancel_args *)uap, retval));
3635}
1c79356b 3636
fe8ab488
A
3637int
3638open_nocancel(__unused proc_t p, struct open_nocancel_args *uap,
3639 int32_t *retval)
3640{
3641 return (openat_internal(vfs_context_current(), uap->path, uap->flags,
3642 uap->mode, AT_FDCWD, UIO_USERSPACE, retval));
3643}
91447636 3644
1c79356b 3645int
fe8ab488
A
3646openat_nocancel(__unused proc_t p, struct openat_nocancel_args *uap,
3647 int32_t *retval)
1c79356b 3648{
fe8ab488
A
3649 return (openat_internal(vfs_context_current(), uap->path, uap->flags,
3650 uap->mode, uap->fd, UIO_USERSPACE, retval));
3651}
91447636 3652
fe8ab488
A
3653int
3654openat(proc_t p, struct openat_args *uap, int32_t *retval)
3655{
3656 __pthread_testcancel(1);
3657 return(openat_nocancel(p, (struct openat_nocancel_args *)uap, retval));
3658}
3659
3660/*
3661 * openbyid_np: open a file given a file system id and a file system object id
3662 * the hfs file system object id is an fsobj_id_t {uint32, uint32}
3663 * file systems that don't support object ids it is a node id (uint64_t).
3664 *
3665 * Parameters: p Process requesting the open
3666 * uap User argument descriptor (see below)
3667 * retval Pointer to an area to receive the
3668 * return calue from the system call
3669 *
3670 * Indirect: uap->path Path to open (same as 'open')
3671 *
3672 * uap->fsid id of target file system
3673 * uap->objid id of target file system object
3674 * uap->flags Flags to open (same as 'open')
3675 *
3676 * Returns: 0 Success
3677 * !0 errno value
3678 *
3679 *
3680 * XXX: We should enummerate the possible errno values here, and where
3681 * in the code they originated.
3682 */
3683int
3684openbyid_np(__unused proc_t p, struct openbyid_np_args *uap, int *retval)
3685{
3686 fsid_t fsid;
3687 uint64_t objid;
3688 int error;
3689 char *buf = NULL;
3690 int buflen = MAXPATHLEN;
3691 int pathlen = 0;
3692 vfs_context_t ctx = vfs_context_current();
3693
3694 if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
3695 return (error);
3696 }
3697
3698 /*uap->obj is an fsobj_id_t defined as struct {uint32_t, uint32_t} */
3699 if ((error = copyin(uap->objid, (caddr_t)&objid, sizeof(uint64_t)))) {
3700 return (error);
3701 }
3702
3703 AUDIT_ARG(value32, fsid.val[0]);
3704 AUDIT_ARG(value64, objid);
3705
3706 /*resolve path from fsis, objid*/
3707 do {
3708 MALLOC(buf, char *, buflen + 1, M_TEMP, M_WAITOK);
3709 if (buf == NULL) {
3710 return (ENOMEM);
3711 }
3712
3713 error = fsgetpath_internal(
3714 ctx, fsid.val[0], objid,
3715 buflen, buf, &pathlen);
3716
3717 if (error) {
3718 FREE(buf, M_TEMP);
3719 buf = NULL;
3720 }
3721 } while (error == ENOSPC && (buflen += MAXPATHLEN));
3722
3723 if (error) {
3724 return error;
3725 }
3726
3727 buf[pathlen] = 0;
3728
3729 error = openat_internal(
3730 ctx, (user_addr_t)buf, uap->oflags, 0, AT_FDCWD, UIO_SYSSPACE, retval);
3731
3732 FREE(buf, M_TEMP);
3733
3734 return error;
3735}
3736
3737
3738/*
3739 * Create a special file.
3740 */
3741static int mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap);
3742
3743int
3744mknod(proc_t p, struct mknod_args *uap, __unused int32_t *retval)
3745{
3746 struct vnode_attr va;
3747 vfs_context_t ctx = vfs_context_current();
3748 int error;
3749 struct nameidata nd;
3750 vnode_t vp, dvp;
3751
3752 VATTR_INIT(&va);
3753 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
3754 VATTR_SET(&va, va_rdev, uap->dev);
91447636
A
3755
3756 /* If it's a mknod() of a FIFO, call mkfifo1() instead */
3757 if ((uap->mode & S_IFMT) == S_IFIFO)
2d21ac55 3758 return(mkfifo1(ctx, uap->path, &va));
1c79356b 3759
55e303ae 3760 AUDIT_ARG(mode, uap->mode);
b0d623f7 3761 AUDIT_ARG(value32, uap->dev);
91447636 3762
2d21ac55 3763 if ((error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
1c79356b 3764 return (error);
6d2010ae 3765 NDINIT(&nd, CREATE, OP_MKNOD, LOCKPARENT | AUDITVNPATH1,
2d21ac55 3766 UIO_USERSPACE, uap->path, ctx);
55e303ae
A
3767 error = namei(&nd);
3768 if (error)
1c79356b 3769 return (error);
91447636 3770 dvp = nd.ni_dvp;
1c79356b 3771 vp = nd.ni_vp;
91447636
A
3772
3773 if (vp != NULL) {
1c79356b 3774 error = EEXIST;
91447636 3775 goto out;
1c79356b 3776 }
55e303ae 3777
91447636 3778 switch (uap->mode & S_IFMT) {
91447636
A
3779 case S_IFCHR:
3780 VATTR_SET(&va, va_type, VCHR);
3781 break;
3782 case S_IFBLK:
3783 VATTR_SET(&va, va_type, VBLK);
3784 break;
91447636
A
3785 default:
3786 error = EINVAL;
3787 goto out;
3788 }
2d21ac55
A
3789
3790#if CONFIG_MACF
6d2010ae
A
3791 error = mac_vnode_check_create(ctx,
3792 nd.ni_dvp, &nd.ni_cnd, &va);
3793 if (error)
3794 goto out;
2d21ac55
A
3795#endif
3796
3797 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
3798 goto out;
3799
6d2010ae 3800 if ((error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx)) != 0)
91447636
A
3801 goto out;
3802
3803 if (vp) {
3804 int update_flags = 0;
3805
3806 // Make sure the name & parent pointers are hooked up
3807 if (vp->v_name == NULL)
3808 update_flags |= VNODE_UPDATE_NAME;
3809 if (vp->v_parent == NULLVP)
3810 update_flags |= VNODE_UPDATE_PARENT;
3811
3812 if (update_flags)
3813 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
3814
2d21ac55
A
3815#if CONFIG_FSE
3816 add_fsevent(FSE_CREATE_FILE, ctx,
91447636
A
3817 FSE_ARG_VNODE, vp,
3818 FSE_ARG_DONE);
2d21ac55 3819#endif
1c79356b 3820 }
91447636
A
3821
3822out:
3823 /*
3824 * nameidone has to happen before we vnode_put(dvp)
3825 * since it may need to release the fs_nodelock on the dvp
3826 */
3827 nameidone(&nd);
3828
3829 if (vp)
3830 vnode_put(vp);
3831 vnode_put(dvp);
3832
1c79356b
A
3833 return (error);
3834}
3835
3836/*
3837 * Create a named pipe.
2d21ac55
A
3838 *
3839 * Returns: 0 Success
3840 * EEXIST
3841 * namei:???
3842 * vnode_authorize:???
3843 * vn_create:???
1c79356b 3844 */
91447636
A
3845static int
3846mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap)
1c79356b 3847{
91447636 3848 vnode_t vp, dvp;
1c79356b
A
3849 int error;
3850 struct nameidata nd;
55e303ae 3851
6d2010ae 3852 NDINIT(&nd, CREATE, OP_MKFIFO, LOCKPARENT | AUDITVNPATH1,
91447636 3853 UIO_USERSPACE, upath, ctx);
55e303ae
A
3854 error = namei(&nd);
3855 if (error)
1c79356b 3856 return (error);
91447636
A
3857 dvp = nd.ni_dvp;
3858 vp = nd.ni_vp;
3859
3860 /* check that this is a new file and authorize addition */
3861 if (vp != NULL) {
3862 error = EEXIST;
3863 goto out;
3864 }
2d21ac55
A
3865 VATTR_SET(vap, va_type, VFIFO);
3866
6d2010ae 3867 if ((error = vn_authorize_create(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0)
2d21ac55 3868 goto out;
2d21ac55 3869
6d2010ae 3870 error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx);
91447636
A
3871out:
3872 /*
3873 * nameidone has to happen before we vnode_put(dvp)
3874 * since it may need to release the fs_nodelock on the dvp
3875 */
3876 nameidone(&nd);
3877
3878 if (vp)
3879 vnode_put(vp);
3880 vnode_put(dvp);
3881
55e303ae 3882 return error;
91447636
A
3883}
3884
0c530ab8
A
3885
3886/*
b0d623f7 3887 * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)).
0c530ab8
A
3888 *
3889 * Parameters: p Process requesting the open
3890 * uap User argument descriptor (see below)
3891 * retval (Ignored)
3892 *
3893 * Indirect: uap->path Path to fifo (same as 'mkfifo')
3894 * uap->uid UID to set
3895 * uap->gid GID to set
3896 * uap->mode File mode to set (same as 'mkfifo')
3897 * uap->xsecurity ACL to set, if creating
3898 *
3899 * Returns: 0 Success
3900 * !0 errno value
3901 *
3902 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
3903 *
3904 * XXX: We should enummerate the possible errno values here, and where
3905 * in the code they originated.
3906 */
91447636 3907int
b0d623f7 3908mkfifo_extended(proc_t p, struct mkfifo_extended_args *uap, __unused int32_t *retval)
91447636
A
3909{
3910 int ciferror;
3911 kauth_filesec_t xsecdst;
91447636
A
3912 struct vnode_attr va;
3913
b0d623f7
A
3914 AUDIT_ARG(owner, uap->uid, uap->gid);
3915
91447636
A
3916 xsecdst = KAUTH_FILESEC_NONE;
3917 if (uap->xsecurity != USER_ADDR_NULL) {
3918 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
3919 return ciferror;
3920 }
3921
91447636
A
3922 VATTR_INIT(&va);
3923 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
3924 if (uap->uid != KAUTH_UID_NONE)
3925 VATTR_SET(&va, va_uid, uap->uid);
3926 if (uap->gid != KAUTH_GID_NONE)
3927 VATTR_SET(&va, va_gid, uap->gid);
3928 if (xsecdst != KAUTH_FILESEC_NONE)
3929 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
3930
2d21ac55 3931 ciferror = mkfifo1(vfs_context_current(), uap->path, &va);
91447636
A
3932
3933 if (xsecdst != KAUTH_FILESEC_NONE)
3934 kauth_filesec_free(xsecdst);
3935 return ciferror;
3936}
3937
3938/* ARGSUSED */
3939int
b0d623f7 3940mkfifo(proc_t p, struct mkfifo_args *uap, __unused int32_t *retval)
91447636 3941{
91447636
A
3942 struct vnode_attr va;
3943
91447636
A
3944 VATTR_INIT(&va);
3945 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
3946
2d21ac55 3947 return(mkfifo1(vfs_context_current(), uap->path, &va));
1c79356b
A
3948}
3949
b0d623f7
A
3950
3951static char *
3952my_strrchr(char *p, int ch)
3953{
3954 char *save;
3955
3956 for (save = NULL;; ++p) {
3957 if (*p == ch)
3958 save = p;
3959 if (!*p)
3960 return(save);
3961 }
3962 /* NOTREACHED */
3963}
3964
3965extern int safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path);
3966
3967int
3968safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path)
3969{
3970 int ret, len = _len;
3971
3972 *truncated_path = 0;
3973 ret = vn_getpath(dvp, path, &len);
3974 if (ret == 0 && len < (MAXPATHLEN - 1)) {
3975 if (leafname) {
3976 path[len-1] = '/';
3977 len += strlcpy(&path[len], leafname, MAXPATHLEN-len) + 1;
3978 if (len > MAXPATHLEN) {
3979 char *ptr;
3980
3981 // the string got truncated!
3982 *truncated_path = 1;
3983 ptr = my_strrchr(path, '/');
3984 if (ptr) {
3985 *ptr = '\0'; // chop off the string at the last directory component
3986 }
3987 len = strlen(path) + 1;
3988 }
3989 }
3990 } else if (ret == 0) {
3991 *truncated_path = 1;
3992 } else if (ret != 0) {
3993 struct vnode *mydvp=dvp;
3994
3995 if (ret != ENOSPC) {
3996 printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n",
3997 dvp, dvp->v_name ? dvp->v_name : "no-name", ret);
3998 }
3999 *truncated_path = 1;
4000
4001 do {
4002 if (mydvp->v_parent != NULL) {
4003 mydvp = mydvp->v_parent;
4004 } else if (mydvp->v_mount) {
4005 strlcpy(path, mydvp->v_mount->mnt_vfsstat.f_mntonname, _len);
4006 break;
4007 } else {
4008 // no parent and no mount point? only thing is to punt and say "/" changed
4009 strlcpy(path, "/", _len);
4010 len = 2;
4011 mydvp = NULL;
4012 }
4013
4014 if (mydvp == NULL) {
4015 break;
4016 }
4017
4018 len = _len;
4019 ret = vn_getpath(mydvp, path, &len);
4020 } while (ret == ENOSPC);
4021 }
4022
4023 return len;
4024}
4025
4026
1c79356b
A
4027/*
4028 * Make a hard file link.
2d21ac55
A
4029 *
4030 * Returns: 0 Success
4031 * EPERM
4032 * EEXIST
4033 * EXDEV
4034 * namei:???
4035 * vnode_authorize:???
4036 * VNOP_LINK:???
1c79356b 4037 */
1c79356b 4038/* ARGSUSED */
fe8ab488
A
4039static int
4040linkat_internal(vfs_context_t ctx, int fd1, user_addr_t path, int fd2,
4041 user_addr_t link, int flag, enum uio_seg segflg)
1c79356b 4042{
91447636 4043 vnode_t vp, dvp, lvp;
1c79356b 4044 struct nameidata nd;
fe8ab488 4045 int follow;
1c79356b 4046 int error;
b0d623f7 4047#if CONFIG_FSE
91447636 4048 fse_info finfo;
b0d623f7 4049#endif
91447636 4050 int need_event, has_listeners;
2d21ac55 4051 char *target_path = NULL;
b0d623f7 4052 int truncated=0;
1c79356b 4053
91447636
A
4054 vp = dvp = lvp = NULLVP;
4055
4056 /* look up the object we are linking to */
fe8ab488
A
4057 follow = (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW;
4058 NDINIT(&nd, LOOKUP, OP_LOOKUP, AUDITVNPATH1 | follow,
4059 segflg, path, ctx);
4060
4061 error = nameiat(&nd, fd1);
55e303ae 4062 if (error)
1c79356b
A
4063 return (error);
4064 vp = nd.ni_vp;
91447636
A
4065
4066 nameidone(&nd);
4067
2d21ac55
A
4068 /*
4069 * Normally, linking to directories is not supported.
4070 * However, some file systems may have limited support.
4071 */
91447636 4072 if (vp->v_type == VDIR) {
2d21ac55
A
4073 if (!(vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSDIRLINKS)) {
4074 error = EPERM; /* POSIX */
4075 goto out;
4076 }
4077 /* Linking to a directory requires ownership. */
4078 if (!kauth_cred_issuser(vfs_context_ucred(ctx))) {
4079 struct vnode_attr dva;
4080
4081 VATTR_INIT(&dva);
4082 VATTR_WANTED(&dva, va_uid);
4083 if (vnode_getattr(vp, &dva, ctx) != 0 ||
4084 !VATTR_IS_SUPPORTED(&dva, va_uid) ||
4085 (dva.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)))) {
4086 error = EACCES;
4087 goto out;
4088 }
4089 }
91447636
A
4090 }
4091
91447636 4092 /* lookup the target node */
6d2010ae
A
4093#if CONFIG_TRIGGERS
4094 nd.ni_op = OP_LINK;
4095#endif
91447636 4096 nd.ni_cnd.cn_nameiop = CREATE;
2d21ac55 4097 nd.ni_cnd.cn_flags = LOCKPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK;
fe8ab488
A
4098 nd.ni_dirp = link;
4099 error = nameiat(&nd, fd2);
91447636
A
4100 if (error != 0)
4101 goto out;
4102 dvp = nd.ni_dvp;
4103 lvp = nd.ni_vp;
2d21ac55
A
4104
4105#if CONFIG_MACF
4106 if ((error = mac_vnode_check_link(ctx, dvp, vp, &nd.ni_cnd)) != 0)
4107 goto out2;
4108#endif
4109
4110 /* or to anything that kauth doesn't want us to (eg. immutable items) */
4111 if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_LINKTARGET, ctx)) != 0)
4112 goto out2;
4113
91447636
A
4114 /* target node must not exist */
4115 if (lvp != NULLVP) {
4116 error = EEXIST;
4117 goto out2;
4118 }
4119 /* cannot link across mountpoints */
4120 if (vnode_mount(vp) != vnode_mount(dvp)) {
4121 error = EXDEV;
4122 goto out2;
4123 }
4124
4125 /* authorize creation of the target note */
2d21ac55 4126 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
91447636
A
4127 goto out2;
4128
4129 /* and finally make the link */
2d21ac55 4130 error = VNOP_LINK(vp, dvp, &nd.ni_cnd, ctx);
91447636
A
4131 if (error)
4132 goto out2;
4133
39236c6e
A
4134#if CONFIG_MACF
4135 (void)mac_vnode_notify_link(ctx, vp, dvp, &nd.ni_cnd);
4136#endif
4137
2d21ac55 4138#if CONFIG_FSE
91447636 4139 need_event = need_fsevent(FSE_CREATE_FILE, dvp);
2d21ac55
A
4140#else
4141 need_event = 0;
4142#endif
91447636
A
4143 has_listeners = kauth_authorize_fileop_has_listeners();
4144
4145 if (need_event || has_listeners) {
91447636
A
4146 char *link_to_path = NULL;
4147 int len, link_name_len;
4148
4149 /* build the path to the new link file */
2d21ac55
A
4150 GET_PATH(target_path);
4151 if (target_path == NULL) {
4152 error = ENOMEM;
4153 goto out2;
4154 }
4155
b0d623f7 4156 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, target_path, MAXPATHLEN, &truncated);
91447636
A
4157
4158 if (has_listeners) {
4159 /* build the path to file we are linking to */
2d21ac55
A
4160 GET_PATH(link_to_path);
4161 if (link_to_path == NULL) {
4162 error = ENOMEM;
4163 goto out2;
4164 }
4165
91447636 4166 link_name_len = MAXPATHLEN;
fe8ab488
A
4167 if (vn_getpath(vp, link_to_path, &link_name_len) == 0) {
4168 /*
4169 * Call out to allow 3rd party notification of rename.
4170 * Ignore result of kauth_authorize_fileop call.
4171 */
4172 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_LINK,
4173 (uintptr_t)link_to_path,
4174 (uintptr_t)target_path);
4175 }
2d21ac55
A
4176 if (link_to_path != NULL) {
4177 RELEASE_PATH(link_to_path);
4178 }
91447636 4179 }
2d21ac55 4180#if CONFIG_FSE
91447636
A
4181 if (need_event) {
4182 /* construct fsevent */
2d21ac55 4183 if (get_fse_info(vp, &finfo, ctx) == 0) {
b0d623f7
A
4184 if (truncated) {
4185 finfo.mode |= FSE_TRUNCATED_PATH;
4186 }
4187
91447636 4188 // build the path to the destination of the link
2d21ac55 4189 add_fsevent(FSE_CREATE_FILE, ctx,
91447636
A
4190 FSE_ARG_STRING, len, target_path,
4191 FSE_ARG_FINFO, &finfo,
4192 FSE_ARG_DONE);
1c79356b 4193 }
b0d623f7
A
4194 if (vp->v_parent) {
4195 add_fsevent(FSE_STAT_CHANGED, ctx,
4196 FSE_ARG_VNODE, vp->v_parent,
4197 FSE_ARG_DONE);
4198 }
1c79356b 4199 }
2d21ac55 4200#endif
1c79356b 4201 }
91447636
A
4202out2:
4203 /*
4204 * nameidone has to happen before we vnode_put(dvp)
4205 * since it may need to release the fs_nodelock on the dvp
4206 */
4207 nameidone(&nd);
2d21ac55
A
4208 if (target_path != NULL) {
4209 RELEASE_PATH(target_path);
4210 }
91447636
A
4211out:
4212 if (lvp)
4213 vnode_put(lvp);
4214 if (dvp)
4215 vnode_put(dvp);
4216 vnode_put(vp);
4217 return (error);
4218}
1c79356b 4219
fe8ab488
A
4220int
4221link(__unused proc_t p, struct link_args *uap, __unused int32_t *retval)
4222{
4223 return (linkat_internal(vfs_context_current(), AT_FDCWD, uap->path,
4224 AT_FDCWD, uap->link, AT_SYMLINK_FOLLOW, UIO_USERSPACE));
4225}
4226
4227int
4228linkat(__unused proc_t p, struct linkat_args *uap, __unused int32_t *retval)
4229{
4230 if (uap->flag & ~AT_SYMLINK_FOLLOW)
4231 return (EINVAL);
4232
4233 return (linkat_internal(vfs_context_current(), uap->fd1, uap->path,
4234 uap->fd2, uap->link, uap->flag, UIO_USERSPACE));
4235}
4236
1c79356b
A
4237/*
4238 * Make a symbolic link.
91447636
A
4239 *
4240 * We could add support for ACLs here too...
1c79356b 4241 */
1c79356b 4242/* ARGSUSED */
fe8ab488
A
4243static int
4244symlinkat_internal(vfs_context_t ctx, user_addr_t path_data, int fd,
4245 user_addr_t link, enum uio_seg segflg)
1c79356b 4246{
91447636
A
4247 struct vnode_attr va;
4248 char *path;
1c79356b
A
4249 int error;
4250 struct nameidata nd;
91447636 4251 vnode_t vp, dvp;
fe8ab488 4252 uint32_t dfflags; // Directory file flags
1c79356b 4253 size_t dummy=0;
fe8ab488
A
4254 proc_t p;
4255
4256 error = 0;
4257 if (UIO_SEG_IS_USER_SPACE(segflg)) {
4258 MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
4259 error = copyinstr(path_data, path, MAXPATHLEN, &dummy);
4260 } else {
4261 path = (char *)path_data;
4262 }
91447636 4263 if (error)
1c79356b 4264 goto out;
55e303ae 4265 AUDIT_ARG(text, path); /* This is the link string */
91447636 4266
fe8ab488
A
4267 NDINIT(&nd, CREATE, OP_SYMLINK, LOCKPARENT | AUDITVNPATH1,
4268 segflg, link, ctx);
4269
4270 error = nameiat(&nd, fd);
55e303ae 4271 if (error)
1c79356b 4272 goto out;
91447636
A
4273 dvp = nd.ni_dvp;
4274 vp = nd.ni_vp;
55e303ae 4275
fe8ab488 4276 p = vfs_context_proc(ctx);
2d21ac55
A
4277 VATTR_INIT(&va);
4278 VATTR_SET(&va, va_type, VLNK);
4279 VATTR_SET(&va, va_mode, ACCESSPERMS & ~p->p_fd->fd_cmask);
fe8ab488
A
4280
4281 /*
4282 * Handle inheritance of restricted flag
4283 */
4284 error = vnode_flags(dvp, &dfflags, ctx);
4285 if (error)
4286 goto skipit;
4287 if (dfflags & SF_RESTRICTED)
4288 VATTR_SET(&va, va_flags, SF_RESTRICTED);
4289
2d21ac55
A
4290#if CONFIG_MACF
4291 error = mac_vnode_check_create(ctx,
4292 dvp, &nd.ni_cnd, &va);
4293#endif
4294 if (error != 0) {
4295 goto skipit;
4296 }
91447636 4297
2d21ac55
A
4298 if (vp != NULL) {
4299 error = EEXIST;
4300 goto skipit;
4301 }
4302
4303 /* authorize */
4304 if (error == 0)
4305 error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
4306 /* get default ownership, etc. */
4307 if (error == 0)
4308 error = vnode_authattr_new(dvp, &va, 0, ctx);
4309 if (error == 0)
4310 error = VNOP_SYMLINK(dvp, &vp, &nd.ni_cnd, &va, path, ctx);
4311
39236c6e 4312#if CONFIG_MACF
3e170ce0 4313 if (error == 0 && vp)
39236c6e
A
4314 error = vnode_label(vnode_mount(vp), dvp, vp, &nd.ni_cnd, VNODE_LABEL_CREATE, ctx);
4315#endif
4316
2d21ac55 4317 /* do fallback attribute handling */
3e170ce0 4318 if (error == 0 && vp)
2d21ac55 4319 error = vnode_setattr_fallback(vp, &va, ctx);
39236c6e 4320
2d21ac55
A
4321 if (error == 0) {
4322 int update_flags = 0;
55e303ae 4323
3e170ce0 4324 /*check if a new vnode was created, else try to get one*/
2d21ac55
A
4325 if (vp == NULL) {
4326 nd.ni_cnd.cn_nameiop = LOOKUP;
6d2010ae
A
4327#if CONFIG_TRIGGERS
4328 nd.ni_op = OP_LOOKUP;
4329#endif
2d21ac55 4330 nd.ni_cnd.cn_flags = 0;
fe8ab488 4331 error = nameiat(&nd, fd);
2d21ac55 4332 vp = nd.ni_vp;
55e303ae 4333
2d21ac55
A
4334 if (vp == NULL)
4335 goto skipit;
4336 }
fe8ab488 4337
91447636 4338#if 0 /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
fe8ab488 4339 /* call out to allow 3rd party notification of rename.
2d21ac55
A
4340 * Ignore result of kauth_authorize_fileop call.
4341 */
4342 if (kauth_authorize_fileop_has_listeners() &&
4343 namei(&nd) == 0) {
4344 char *new_link_path = NULL;
4345 int len;
fe8ab488 4346
2d21ac55
A
4347 /* build the path to the new link file */
4348 new_link_path = get_pathbuff();
4349 len = MAXPATHLEN;
4350 vn_getpath(dvp, new_link_path, &len);
4351 if ((len + 1 + nd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) {
91447636 4352 new_link_path[len - 1] = '/';
2d21ac55 4353 strlcpy(&new_link_path[len], nd.ni_cnd.cn_nameptr, MAXPATHLEN-len);
91447636 4354 }
fe8ab488
A
4355
4356 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_SYMLINK,
2d21ac55
A
4357 (uintptr_t)path, (uintptr_t)new_link_path);
4358 if (new_link_path != NULL)
4359 release_pathbuff(new_link_path);
4360 }
fe8ab488 4361#endif
2d21ac55
A
4362 // Make sure the name & parent pointers are hooked up
4363 if (vp->v_name == NULL)
4364 update_flags |= VNODE_UPDATE_NAME;
4365 if (vp->v_parent == NULLVP)
4366 update_flags |= VNODE_UPDATE_PARENT;
fe8ab488 4367
2d21ac55
A
4368 if (update_flags)
4369 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
91447636 4370
2d21ac55
A
4371#if CONFIG_FSE
4372 add_fsevent(FSE_CREATE_FILE, ctx,
4373 FSE_ARG_VNODE, vp,
4374 FSE_ARG_DONE);
4375#endif
4376 }
91447636
A
4377
4378skipit:
4379 /*
4380 * nameidone has to happen before we vnode_put(dvp)
4381 * since it may need to release the fs_nodelock on the dvp
4382 */
4383 nameidone(&nd);
4384
4385 if (vp)
4386 vnode_put(vp);
4387 vnode_put(dvp);
1c79356b 4388out:
fe8ab488
A
4389 if (path && (path != (char *)path_data))
4390 FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
91447636 4391
1c79356b
A
4392 return (error);
4393}
4394
fe8ab488
A
4395int
4396symlink(__unused proc_t p, struct symlink_args *uap, __unused int32_t *retval)
4397{
4398 return (symlinkat_internal(vfs_context_current(), uap->path, AT_FDCWD,
4399 uap->link, UIO_USERSPACE));
4400}
4401
4402int
4403symlinkat(__unused proc_t p, struct symlinkat_args *uap,
4404 __unused int32_t *retval)
4405{
4406 return (symlinkat_internal(vfs_context_current(), uap->path1, uap->fd,
4407 uap->path2, UIO_USERSPACE));
4408}
4409
1c79356b
A
4410/*
4411 * Delete a whiteout from the filesystem.
fe8ab488 4412 * No longer supported.
1c79356b 4413 */
1c79356b 4414int
fe8ab488 4415undelete(__unused proc_t p, __unused struct undelete_args *uap, __unused int32_t *retval)
1c79356b 4416{
fe8ab488 4417 return (ENOTSUP);
1c79356b
A
4418}
4419
4420/*
4421 * Delete a name from the filesystem.
4422 */
1c79356b 4423/* ARGSUSED */
fe8ab488 4424static int
c18c124e
A
4425unlinkat_internal(vfs_context_t ctx, int fd, vnode_t start_dvp,
4426 user_addr_t path_arg, enum uio_seg segflg, int unlink_flags)
1c79356b 4427{
c18c124e 4428 struct nameidata nd;
91447636 4429 vnode_t vp, dvp;
1c79356b 4430 int error;
91447636 4431 struct componentname *cnp;
2d21ac55 4432 char *path = NULL;
b0d623f7
A
4433 int len=0;
4434#if CONFIG_FSE
2d21ac55 4435 fse_info finfo;
6d2010ae 4436 struct vnode_attr va;
b0d623f7 4437#endif
c18c124e
A
4438 int flags;
4439 int need_event;
4440 int has_listeners;
4441 int truncated_path;
6d2010ae 4442 int batched;
c18c124e
A
4443 struct vnode_attr *vap;
4444 int do_retry;
4445 int retry_count = 0;
4446 int cn_flags;
4447
4448 cn_flags = LOCKPARENT;
4449 if (!(unlink_flags & VNODE_REMOVE_NO_AUDIT_PATH))
4450 cn_flags |= AUDITVNPATH1;
4451 /* If a starting dvp is passed, it trumps any fd passed. */
4452 if (start_dvp)
4453 cn_flags |= USEDVP;
6d2010ae 4454
c910b4d9
A
4455#if NAMEDRSRCFORK
4456 /* unlink or delete is allowed on rsrc forks and named streams */
c18c124e 4457 cn_flags |= CN_ALLOWRSRCFORK;
c910b4d9
A
4458#endif
4459
c18c124e
A
4460retry:
4461 do_retry = 0;
4462 flags = 0;
4463 need_event = 0;
4464 has_listeners = 0;
4465 truncated_path = 0;
4466 vap = NULL;
4467
4468 NDINIT(&nd, DELETE, OP_UNLINK, cn_flags, segflg, path_arg, ctx);
4469
4470 nd.ni_dvp = start_dvp;
4471 nd.ni_flag |= NAMEI_COMPOUNDREMOVE;
4472 cnp = &nd.ni_cnd;
91447636 4473
6d2010ae 4474lookup_continue:
c18c124e 4475 error = nameiat(&nd, fd);
2d21ac55
A
4476 if (error)
4477 return (error);
b0d623f7 4478
c18c124e
A
4479 dvp = nd.ni_dvp;
4480 vp = nd.ni_vp;
91447636 4481
6d2010ae 4482
91447636 4483 /* With Carbon delete semantics, busy files cannot be deleted */
316670eb 4484 if (unlink_flags & VNODE_REMOVE_NODELETEBUSY) {
91447636 4485 flags |= VNODE_REMOVE_NODELETEBUSY;
2d21ac55 4486 }
316670eb 4487
39236c6e 4488 /* Skip any potential upcalls if told to. */
316670eb
A
4489 if (unlink_flags & VNODE_REMOVE_SKIP_NAMESPACE_EVENT) {
4490 flags |= VNODE_REMOVE_SKIP_NAMESPACE_EVENT;
4491 }
4492
6d2010ae
A
4493 if (vp) {
4494 batched = vnode_compound_remove_available(vp);
4495 /*
4496 * The root of a mounted filesystem cannot be deleted.
4497 */
4498 if (vp->v_flag & VROOT) {
4499 error = EBUSY;
4500 }
2d21ac55 4501
6d2010ae
A
4502 if (!batched) {
4503 error = vn_authorize_unlink(dvp, vp, cnp, ctx, NULL);
4504 if (error) {
3e170ce0
A
4505 if (error == ENOENT) {
4506 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
4507 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
4508 do_retry = 1;
4509 retry_count++;
4510 }
c18c124e 4511 }
6d2010ae
A
4512 goto out;
4513 }
4514 }
4515 } else {
4516 batched = 1;
2d21ac55 4517
6d2010ae
A
4518 if (!vnode_compound_remove_available(dvp)) {
4519 panic("No vp, but no compound remove?");
4520 }
4521 }
2d21ac55 4522
2d21ac55
A
4523#if CONFIG_FSE
4524 need_event = need_fsevent(FSE_DELETE, dvp);
4525 if (need_event) {
6d2010ae
A
4526 if (!batched) {
4527 if ((vp->v_flag & VISHARDLINK) == 0) {
4528 /* XXX need to get these data in batched VNOP */
4529 get_fse_info(vp, &finfo, ctx);
4530 }
4531 } else {
4532 error = vfs_get_notify_attributes(&va);
4533 if (error) {
4534 goto out;
4535 }
4536
4537 vap = &va;
2d21ac55
A
4538 }
4539 }
4540#endif
4541 has_listeners = kauth_authorize_fileop_has_listeners();
4542 if (need_event || has_listeners) {
2d21ac55 4543 if (path == NULL) {
6d2010ae
A
4544 GET_PATH(path);
4545 if (path == NULL) {
4546 error = ENOMEM;
4547 goto out;
4548 }
2d21ac55 4549 }
c18c124e 4550 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated_path);
2d21ac55
A
4551 }
4552
4553#if NAMEDRSRCFORK
c18c124e 4554 if (nd.ni_cnd.cn_flags & CN_WANTSRSRCFORK)
2d21ac55
A
4555 error = vnode_removenamedstream(dvp, vp, XATTR_RESOURCEFORK_NAME, 0, ctx);
4556 else
4557#endif
6d2010ae 4558 {
c18c124e
A
4559 error = vn_remove(dvp, &nd.ni_vp, &nd, flags, vap, ctx);
4560 vp = nd.ni_vp;
6d2010ae
A
4561 if (error == EKEEPLOOKING) {
4562 if (!batched) {
4563 panic("EKEEPLOOKING, but not a filesystem that supports compound VNOPs?");
4564 }
4565
c18c124e 4566 if ((nd.ni_flag & NAMEI_CONTLOOKUP) == 0) {
6d2010ae
A
4567 panic("EKEEPLOOKING, but continue flag not set?");
4568 }
4569
4570 if (vnode_isdir(vp)) {
4571 error = EISDIR;
4572 goto out;
4573 }
4574 goto lookup_continue;
3e170ce0
A
4575 } else if (error == ENOENT && batched) {
4576 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
4577 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
4578 /*
4579 * For compound VNOPs, the authorization callback may
4580 * return ENOENT in case of racing hardlink lookups
4581 * hitting the name cache, redrive the lookup.
4582 */
4583 do_retry = 1;
4584 retry_count += 1;
4585 goto out;
4586 }
6d2010ae
A
4587 }
4588 }
2d21ac55
A
4589
4590 /*
4591 * Call out to allow 3rd party notification of delete.
4592 * Ignore result of kauth_authorize_fileop call.
4593 */
1c79356b 4594 if (!error) {
2d21ac55
A
4595 if (has_listeners) {
4596 kauth_authorize_fileop(vfs_context_ucred(ctx),
4597 KAUTH_FILEOP_DELETE,
4598 (uintptr_t)vp,
4599 (uintptr_t)path);
4600 }
91447636 4601
2d21ac55
A
4602 if (vp->v_flag & VISHARDLINK) {
4603 //
4604 // if a hardlink gets deleted we want to blow away the
4605 // v_parent link because the path that got us to this
4606 // instance of the link is no longer valid. this will
4607 // force the next call to get the path to ask the file
4608 // system instead of just following the v_parent link.
4609 //
4610 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
91447636 4611 }
91447636 4612
2d21ac55
A
4613#if CONFIG_FSE
4614 if (need_event) {
4615 if (vp->v_flag & VISHARDLINK) {
4616 get_fse_info(vp, &finfo, ctx);
6d2010ae
A
4617 } else if (vap) {
4618 vnode_get_fse_info_from_vap(vp, &finfo, vap);
2d21ac55 4619 }
b0d623f7
A
4620 if (truncated_path) {
4621 finfo.mode |= FSE_TRUNCATED_PATH;
4622 }
2d21ac55
A
4623 add_fsevent(FSE_DELETE, ctx,
4624 FSE_ARG_STRING, len, path,
4625 FSE_ARG_FINFO, &finfo,
4626 FSE_ARG_DONE);
4627 }
4628#endif
1c79356b 4629 }
6d2010ae
A
4630
4631out:
2d21ac55
A
4632 if (path != NULL)
4633 RELEASE_PATH(path);
4634
c910b4d9 4635#if NAMEDRSRCFORK
b0d623f7
A
4636 /* recycle the deleted rsrc fork vnode to force a reclaim, which
4637 * will cause its shadow file to go away if necessary.
4638 */
6d2010ae
A
4639 if (vp && (vnode_isnamedstream(vp)) &&
4640 (vp->v_parent != NULLVP) &&
4641 vnode_isshadow(vp)) {
4642 vnode_recycle(vp);
b0d623f7 4643 }
c910b4d9 4644#endif
6d2010ae
A
4645 /*
4646 * nameidone has to happen before we vnode_put(dvp)
4647 * since it may need to release the fs_nodelock on the dvp
4648 */
c18c124e 4649 nameidone(&nd);
91447636 4650 vnode_put(dvp);
6d2010ae
A
4651 if (vp) {
4652 vnode_put(vp);
4653 }
c18c124e
A
4654
4655 if (do_retry) {
4656 goto retry;
4657 }
4658
1c79356b
A
4659 return (error);
4660}
4661
fe8ab488 4662int
c18c124e
A
4663unlink1(vfs_context_t ctx, vnode_t start_dvp, user_addr_t path_arg,
4664 enum uio_seg segflg, int unlink_flags)
fe8ab488 4665{
c18c124e
A
4666 return (unlinkat_internal(ctx, AT_FDCWD, start_dvp, path_arg, segflg,
4667 unlink_flags));
fe8ab488
A
4668}
4669
1c79356b 4670/*
c18c124e 4671 * Delete a name from the filesystem using Carbon semantics.
1c79356b 4672 */
c18c124e
A
4673int
4674delete(__unused proc_t p, struct delete_args *uap, __unused int32_t *retval)
fe8ab488 4675{
c18c124e
A
4676 return (unlinkat_internal(vfs_context_current(), AT_FDCWD, NULLVP,
4677 uap->path, UIO_USERSPACE, VNODE_REMOVE_NODELETEBUSY));
fe8ab488
A
4678}
4679
c18c124e
A
4680/*
4681 * Delete a name from the filesystem using POSIX semantics.
4682 */
1c79356b 4683int
b0d623f7 4684unlink(__unused proc_t p, struct unlink_args *uap, __unused int32_t *retval)
1c79356b 4685{
c18c124e
A
4686 return (unlinkat_internal(vfs_context_current(), AT_FDCWD, NULLVP,
4687 uap->path, UIO_USERSPACE, 0));
fe8ab488 4688}
2d21ac55 4689
fe8ab488
A
4690int
4691unlinkat(__unused proc_t p, struct unlinkat_args *uap, __unused int32_t *retval)
4692{
4693 if (uap->flag & ~AT_REMOVEDIR)
4694 return (EINVAL);
4695
4696 if (uap->flag & AT_REMOVEDIR)
4697 return (rmdirat_internal(vfs_context_current(), uap->fd,
4698 uap->path, UIO_USERSPACE));
4699 else
4700 return (unlinkat_internal(vfs_context_current(), uap->fd,
c18c124e 4701 NULLVP, uap->path, UIO_USERSPACE, 0));
1c79356b
A
4702}
4703
4704/*
4705 * Reposition read/write file offset.
4706 */
1c79356b 4707int
2d21ac55 4708lseek(proc_t p, struct lseek_args *uap, off_t *retval)
1c79356b 4709{
91447636 4710 struct fileproc *fp;
2d21ac55
A
4711 vnode_t vp;
4712 struct vfs_context *ctx;
91447636 4713 off_t offset = uap->offset, file_size;
1c79356b
A
4714 int error;
4715
91447636
A
4716 if ( (error = fp_getfvp(p,uap->fd, &fp, &vp)) ) {
4717 if (error == ENOTSUP)
4718 return (ESPIPE);
1c79356b 4719 return (error);
55e303ae 4720 }
91447636
A
4721 if (vnode_isfifo(vp)) {
4722 file_drop(uap->fd);
4723 return(ESPIPE);
4724 }
2d21ac55
A
4725
4726
4727 ctx = vfs_context_current();
4728#if CONFIG_MACF
4729 if (uap->whence == L_INCR && uap->offset == 0)
4730 error = mac_file_check_get_offset(vfs_context_ucred(ctx),
4731 fp->f_fglob);
4732 else
4733 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
4734 fp->f_fglob);
4735 if (error) {
4736 file_drop(uap->fd);
4737 return (error);
4738 }
4739#endif
91447636
A
4740 if ( (error = vnode_getwithref(vp)) ) {
4741 file_drop(uap->fd);
4742 return(error);
4743 }
4744
1c79356b
A
4745 switch (uap->whence) {
4746 case L_INCR:
91447636 4747 offset += fp->f_fglob->fg_offset;
1c79356b
A
4748 break;
4749 case L_XTND:
2d21ac55 4750 if ((error = vnode_size(vp, &file_size, ctx)) != 0)
55e303ae 4751 break;
91447636 4752 offset += file_size;
1c79356b
A
4753 break;
4754 case L_SET:
1c79356b
A
4755 break;
4756 default:
55e303ae 4757 error = EINVAL;
1c79356b 4758 }
55e303ae
A
4759 if (error == 0) {
4760 if (uap->offset > 0 && offset < 0) {
4761 /* Incremented/relative move past max size */
4762 error = EOVERFLOW;
4763 } else {
4764 /*
4765 * Allow negative offsets on character devices, per
4766 * POSIX 1003.1-2001. Most likely for writing disk
4767 * labels.
4768 */
4769 if (offset < 0 && vp->v_type != VCHR) {
4770 /* Decremented/relative move before start */
4771 error = EINVAL;
4772 } else {
4773 /* Success */
91447636
A
4774 fp->f_fglob->fg_offset = offset;
4775 *retval = fp->f_fglob->fg_offset;
55e303ae
A
4776 }
4777 }
4778 }
b0d623f7
A
4779
4780 /*
4781 * An lseek can affect whether data is "available to read." Use
4782 * hint of NOTE_NONE so no EVFILT_VNODE events fire
4783 */
4784 post_event_if_success(vp, error, NOTE_NONE);
91447636
A
4785 (void)vnode_put(vp);
4786 file_drop(uap->fd);
55e303ae 4787 return (error);
1c79356b
A
4788}
4789
91447636 4790
1c79356b 4791/*
91447636 4792 * Check access permissions.
2d21ac55
A
4793 *
4794 * Returns: 0 Success
4795 * vnode_authorize:???
1c79356b 4796 */
91447636
A
4797static int
4798access1(vnode_t vp, vnode_t dvp, int uflags, vfs_context_t ctx)
1c79356b 4799{
91447636 4800 kauth_action_t action;
1c79356b
A
4801 int error;
4802
91447636
A
4803 /*
4804 * If just the regular access bits, convert them to something
4805 * that vnode_authorize will understand.
4806 */
4807 if (!(uflags & _ACCESS_EXTENDED_MASK)) {
4808 action = 0;
4809 if (uflags & R_OK)
4810 action |= KAUTH_VNODE_READ_DATA; /* aka KAUTH_VNODE_LIST_DIRECTORY */
4811 if (uflags & W_OK) {
4812 if (vnode_isdir(vp)) {
4813 action |= KAUTH_VNODE_ADD_FILE |
4814 KAUTH_VNODE_ADD_SUBDIRECTORY;
4815 /* might want delete rights here too */
4816 } else {
4817 action |= KAUTH_VNODE_WRITE_DATA;
4818 }
4819 }
4820 if (uflags & X_OK) {
4821 if (vnode_isdir(vp)) {
4822 action |= KAUTH_VNODE_SEARCH;
4823 } else {
4824 action |= KAUTH_VNODE_EXECUTE;
4825 }
4826 }
4827 } else {
4828 /* take advantage of definition of uflags */
4829 action = uflags >> 8;
4830 }
4831
2d21ac55
A
4832#if CONFIG_MACF
4833 error = mac_vnode_check_access(ctx, vp, uflags);
4834 if (error)
4835 return (error);
4836#endif /* MAC */
4837
91447636
A
4838 /* action == 0 means only check for existence */
4839 if (action != 0) {
4840 error = vnode_authorize(vp, dvp, action | KAUTH_VNODE_ACCESS, ctx);
4841 } else {
4842 error = 0;
4843 }
4844
4845 return(error);
1c79356b 4846}
1c79356b 4847
91447636
A
4848
4849
2d21ac55 4850/*
b0d623f7 4851 * access_extended: Check access permissions in bulk.
2d21ac55 4852 *
b0d623f7
A
4853 * Description: uap->entries Pointer to an array of accessx
4854 * descriptor structs, plus one or
4855 * more NULL terminated strings (see
4856 * "Notes" section below).
4857 * uap->size Size of the area pointed to by
4858 * uap->entries.
4859 * uap->results Pointer to the results array.
2d21ac55
A
4860 *
4861 * Returns: 0 Success
4862 * ENOMEM Insufficient memory
4863 * EINVAL Invalid arguments
4864 * namei:EFAULT Bad address
4865 * namei:ENAMETOOLONG Filename too long
4866 * namei:ENOENT No such file or directory
4867 * namei:ELOOP Too many levels of symbolic links
4868 * namei:EBADF Bad file descriptor
4869 * namei:ENOTDIR Not a directory
4870 * namei:???
4871 * access1:
4872 *
4873 * Implicit returns:
4874 * uap->results Array contents modified
4875 *
4876 * Notes: The uap->entries are structured as an arbitrary length array
b0d623f7 4877 * of accessx descriptors, followed by one or more NULL terminated
2d21ac55
A
4878 * strings
4879 *
4880 * struct accessx_descriptor[0]
4881 * ...
4882 * struct accessx_descriptor[n]
4883 * char name_data[0];
4884 *
4885 * We determine the entry count by walking the buffer containing
b0d623f7 4886 * the uap->entries argument descriptor. For each descriptor we
2d21ac55
A
4887 * see, the valid values for the offset ad_name_offset will be
4888 * in the byte range:
4889 *
4890 * [ uap->entries + sizeof(struct accessx_descriptor) ]
4891 * to
4892 * [ uap->entries + uap->size - 2 ]
4893 *
4894 * since we must have at least one string, and the string must
b0d623f7 4895 * be at least one character plus the NULL terminator in length.
2d21ac55
A
4896 *
4897 * XXX: Need to support the check-as uid argument
4898 */
1c79356b 4899int
b0d623f7 4900access_extended(__unused proc_t p, struct access_extended_args *uap, __unused int32_t *retval)
1c79356b 4901{
2d21ac55
A
4902 struct accessx_descriptor *input = NULL;
4903 errno_t *result = NULL;
4904 errno_t error = 0;
4905 int wantdelete = 0;
4906 unsigned int desc_max, desc_actual, i, j;
91447636 4907 struct vfs_context context;
1c79356b 4908 struct nameidata nd;
91447636 4909 int niopts;
2d21ac55
A
4910 vnode_t vp = NULL;
4911 vnode_t dvp = NULL;
4912#define ACCESSX_MAX_DESCR_ON_STACK 10
4913 struct accessx_descriptor stack_input[ACCESSX_MAX_DESCR_ON_STACK];
91447636 4914
91447636
A
4915 context.vc_ucred = NULL;
4916
2d21ac55
A
4917 /*
4918 * Validate parameters; if valid, copy the descriptor array and string
4919 * arguments into local memory. Before proceeding, the following
4920 * conditions must have been met:
4921 *
4922 * o The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
4923 * o There must be sufficient room in the request for at least one
4924 * descriptor and a one yte NUL terminated string.
4925 * o The allocation of local storage must not fail.
4926 */
91447636
A
4927 if (uap->size > ACCESSX_MAX_TABLESIZE)
4928 return(ENOMEM);
2d21ac55 4929 if (uap->size < (sizeof(struct accessx_descriptor) + 2))
91447636 4930 return(EINVAL);
2d21ac55
A
4931 if (uap->size <= sizeof (stack_input)) {
4932 input = stack_input;
4933 } else {
91447636
A
4934 MALLOC(input, struct accessx_descriptor *, uap->size, M_TEMP, M_WAITOK);
4935 if (input == NULL) {
4936 error = ENOMEM;
4937 goto out;
4938 }
2d21ac55 4939 }
91447636 4940 error = copyin(uap->entries, input, uap->size);
55e303ae 4941 if (error)
91447636 4942 goto out;
1c79356b 4943
b0d623f7
A
4944 AUDIT_ARG(opaque, input, uap->size);
4945
91447636 4946 /*
2d21ac55
A
4947 * Force NUL termination of the copyin buffer to avoid nami() running
4948 * off the end. If the caller passes us bogus data, they may get a
4949 * bogus result.
4950 */
4951 ((char *)input)[uap->size - 1] = 0;
4952
4953 /*
4954 * Access is defined as checking against the process' real identity,
4955 * even if operations are checking the effective identity. This
4956 * requires that we use a local vfs context.
91447636
A
4957 */
4958 context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
2d21ac55 4959 context.vc_thread = current_thread();
91447636
A
4960
4961 /*
2d21ac55
A
4962 * Find out how many entries we have, so we can allocate the result
4963 * array by walking the list and adjusting the count downward by the
4964 * earliest string offset we see.
91447636 4965 */
2d21ac55
A
4966 desc_max = (uap->size - 2) / sizeof(struct accessx_descriptor);
4967 desc_actual = desc_max;
4968 for (i = 0; i < desc_actual; i++) {
91447636 4969 /*
2d21ac55
A
4970 * Take the offset to the name string for this entry and
4971 * convert to an input array index, which would be one off
4972 * the end of the array if this entry was the lowest-addressed
4973 * name string.
91447636
A
4974 */
4975 j = input[i].ad_name_offset / sizeof(struct accessx_descriptor);
2d21ac55
A
4976
4977 /*
4978 * An offset greater than the max allowable offset is an error.
4979 * It is also an error for any valid entry to point
4980 * to a location prior to the end of the current entry, if
4981 * it's not a reference to the string of the previous entry.
4982 */
4983 if (j > desc_max || (j != 0 && j <= i)) {
91447636
A
4984 error = EINVAL;
4985 goto out;
4986 }
2d21ac55
A
4987
4988 /*
4989 * An offset of 0 means use the previous descriptor's offset;
4990 * this is used to chain multiple requests for the same file
4991 * to avoid multiple lookups.
4992 */
91447636 4993 if (j == 0) {
2d21ac55 4994 /* This is not valid for the first entry */
91447636
A
4995 if (i == 0) {
4996 error = EINVAL;
4997 goto out;
4998 }
4999 continue;
5000 }
2d21ac55
A
5001
5002 /*
5003 * If the offset of the string for this descriptor is before
5004 * what we believe is the current actual last descriptor,
5005 * then we need to adjust our estimate downward; this permits
5006 * the string table following the last descriptor to be out
5007 * of order relative to the descriptor list.
5008 */
5009 if (j < desc_actual)
5010 desc_actual = j;
91447636 5011 }
2d21ac55
A
5012
5013 /*
5014 * We limit the actual number of descriptors we are willing to process
5015 * to a hard maximum of ACCESSX_MAX_DESCRIPTORS. If the number being
5016 * requested does not exceed this limit,
5017 */
5018 if (desc_actual > ACCESSX_MAX_DESCRIPTORS) {
91447636
A
5019 error = ENOMEM;
5020 goto out;
5021 }
2d21ac55 5022 MALLOC(result, errno_t *, desc_actual * sizeof(errno_t), M_TEMP, M_WAITOK);
91447636
A
5023 if (result == NULL) {
5024 error = ENOMEM;
5025 goto out;
5026 }
5027
5028 /*
2d21ac55
A
5029 * Do the work by iterating over the descriptor entries we know to
5030 * at least appear to contain valid data.
91447636
A
5031 */
5032 error = 0;
2d21ac55 5033 for (i = 0; i < desc_actual; i++) {
91447636 5034 /*
2d21ac55
A
5035 * If the ad_name_offset is 0, then we use the previous
5036 * results to make the check; otherwise, we are looking up
5037 * a new file name.
91447636
A
5038 */
5039 if (input[i].ad_name_offset != 0) {
5040 /* discard old vnodes */
5041 if (vp) {
5042 vnode_put(vp);
5043 vp = NULL;
5044 }
5045 if (dvp) {
5046 vnode_put(dvp);
5047 dvp = NULL;
5048 }
5049
2d21ac55
A
5050 /*
5051 * Scan forward in the descriptor list to see if we
5052 * need the parent vnode. We will need it if we are
5053 * deleting, since we must have rights to remove
5054 * entries in the parent directory, as well as the
5055 * rights to delete the object itself.
5056 */
91447636 5057 wantdelete = input[i].ad_flags & _DELETE_OK;
2d21ac55 5058 for (j = i + 1; (j < desc_actual) && (input[j].ad_name_offset == 0); j++)
91447636
A
5059 if (input[j].ad_flags & _DELETE_OK)
5060 wantdelete = 1;
5061
5062 niopts = FOLLOW | AUDITVNPATH1;
2d21ac55 5063
91447636
A
5064 /* need parent for vnode_authorize for deletion test */
5065 if (wantdelete)
5066 niopts |= WANTPARENT;
5067
5068 /* do the lookup */
6d2010ae
A
5069 NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, UIO_SYSSPACE,
5070 CAST_USER_ADDR_T(((const char *)input) + input[i].ad_name_offset),
5071 &context);
91447636
A
5072 error = namei(&nd);
5073 if (!error) {
5074 vp = nd.ni_vp;
5075 if (wantdelete)
5076 dvp = nd.ni_dvp;
5077 }
5078 nameidone(&nd);
5079 }
5080
5081 /*
5082 * Handle lookup errors.
5083 */
5084 switch(error) {
5085 case ENOENT:
5086 case EACCES:
5087 case EPERM:
5088 case ENOTDIR:
5089 result[i] = error;
5090 break;
5091 case 0:
5092 /* run this access check */
5093 result[i] = access1(vp, dvp, input[i].ad_flags, &context);
5094 break;
5095 default:
5096 /* fatal lookup error */
5097
5098 goto out;
5099 }
5100 }
5101
b0d623f7
A
5102 AUDIT_ARG(data, result, sizeof(errno_t), desc_actual);
5103
91447636 5104 /* copy out results */
2d21ac55 5105 error = copyout(result, uap->results, desc_actual * sizeof(errno_t));
91447636
A
5106
5107out:
2d21ac55 5108 if (input && input != stack_input)
91447636
A
5109 FREE(input, M_TEMP);
5110 if (result)
5111 FREE(result, M_TEMP);
5112 if (vp)
5113 vnode_put(vp);
5114 if (dvp)
5115 vnode_put(dvp);
0c530ab8
A
5116 if (IS_VALID_CRED(context.vc_ucred))
5117 kauth_cred_unref(&context.vc_ucred);
91447636 5118 return(error);
1c79356b
A
5119}
5120
2d21ac55
A
5121
5122/*
5123 * Returns: 0 Success
5124 * namei:EFAULT Bad address
5125 * namei:ENAMETOOLONG Filename too long
5126 * namei:ENOENT No such file or directory
5127 * namei:ELOOP Too many levels of symbolic links
5128 * namei:EBADF Bad file descriptor
5129 * namei:ENOTDIR Not a directory
5130 * namei:???
5131 * access1:
5132 */
fe8ab488
A
5133static int
5134faccessat_internal(vfs_context_t ctx, int fd, user_addr_t path, int amode,
5135 int flag, enum uio_seg segflg)
1c79356b 5136{
1c79356b
A
5137 int error;
5138 struct nameidata nd;
91447636
A
5139 int niopts;
5140 struct vfs_context context;
cf7d32b8
A
5141#if NAMEDRSRCFORK
5142 int is_namedstream = 0;
5143#endif
5144
91447636 5145 /*
fe8ab488
A
5146 * Unless the AT_EACCESS option is used, Access is defined as checking
5147 * against the process' real identity, even if operations are checking
5148 * the effective identity. So we need to tweak the credential
5149 * in the context for that case.
91447636 5150 */
fe8ab488
A
5151 if (!(flag & AT_EACCESS))
5152 context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
5153 else
5154 context.vc_ucred = ctx->vc_ucred;
5155 context.vc_thread = ctx->vc_thread;
5156
91447636
A
5157
5158 niopts = FOLLOW | AUDITVNPATH1;
5159 /* need parent for vnode_authorize for deletion test */
fe8ab488 5160 if (amode & _DELETE_OK)
91447636 5161 niopts |= WANTPARENT;
fe8ab488
A
5162 NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, segflg,
5163 path, &context);
2d21ac55
A
5164
5165#if NAMEDRSRCFORK
5166 /* access(F_OK) calls are allowed for resource forks. */
fe8ab488 5167 if (amode == F_OK)
2d21ac55
A
5168 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
5169#endif
fe8ab488 5170 error = nameiat(&nd, fd);
91447636
A
5171 if (error)
5172 goto out;
5173
cf7d32b8 5174#if NAMEDRSRCFORK
b0d623f7
A
5175 /* Grab reference on the shadow stream file vnode to
5176 * force an inactive on release which will mark it
5177 * for recycle.
cf7d32b8
A
5178 */
5179 if (vnode_isnamedstream(nd.ni_vp) &&
b0d623f7
A
5180 (nd.ni_vp->v_parent != NULLVP) &&
5181 vnode_isshadow(nd.ni_vp)) {
cf7d32b8
A
5182 is_namedstream = 1;
5183 vnode_ref(nd.ni_vp);
5184 }
5185#endif
5186
fe8ab488 5187 error = access1(nd.ni_vp, nd.ni_dvp, amode, &context);
b0d623f7 5188
cf7d32b8
A
5189#if NAMEDRSRCFORK
5190 if (is_namedstream) {
5191 vnode_rele(nd.ni_vp);
5192 }
5193#endif
5194
91447636 5195 vnode_put(nd.ni_vp);
fe8ab488 5196 if (amode & _DELETE_OK)
91447636
A
5197 vnode_put(nd.ni_dvp);
5198 nameidone(&nd);
5199
5200out:
fe8ab488
A
5201 if (!(flag & AT_EACCESS))
5202 kauth_cred_unref(&context.vc_ucred);
5203 return (error);
5204}
5205
5206int
5207access(__unused proc_t p, struct access_args *uap, __unused int32_t *retval)
5208{
5209 return (faccessat_internal(vfs_context_current(), AT_FDCWD,
5210 uap->path, uap->flags, 0, UIO_USERSPACE));
91447636
A
5211}
5212
fe8ab488
A
5213int
5214faccessat(__unused proc_t p, struct faccessat_args *uap,
5215 __unused int32_t *retval)
5216{
5217 if (uap->flag & ~AT_EACCESS)
5218 return (EINVAL);
5219
5220 return (faccessat_internal(vfs_context_current(), uap->fd,
5221 uap->path, uap->amode, uap->flag, UIO_USERSPACE));
5222}
91447636 5223
2d21ac55
A
5224/*
5225 * Returns: 0 Success
5226 * EFAULT
5227 * copyout:EFAULT
5228 * namei:???
5229 * vn_stat:???
5230 */
91447636 5231static int
fe8ab488
A
5232fstatat_internal(vfs_context_t ctx, user_addr_t path, user_addr_t ub,
5233 user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64,
5234 enum uio_seg segflg, int fd, int flag)
91447636 5235{
fe8ab488
A
5236 struct nameidata nd;
5237 int follow;
b0d623f7
A
5238 union {
5239 struct stat sb;
5240 struct stat64 sb64;
5241 } source;
5242 union {
5243 struct user64_stat user64_sb;
5244 struct user32_stat user32_sb;
5245 struct user64_stat64 user64_sb64;
5246 struct user32_stat64 user32_sb64;
5247 } dest;
91447636
A
5248 caddr_t sbp;
5249 int error, my_size;
5250 kauth_filesec_t fsec;
5251 size_t xsecurity_bufsize;
2d21ac55 5252 void * statptr;
1c79356b 5253
fe8ab488
A
5254 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
5255 NDINIT(&nd, LOOKUP, OP_GETATTR, follow | AUDITVNPATH1,
5256 segflg, path, ctx);
5257
2d21ac55 5258#if NAMEDRSRCFORK
cf7d32b8 5259 int is_namedstream = 0;
2d21ac55 5260 /* stat calls are allowed for resource forks. */
fe8ab488 5261 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
2d21ac55 5262#endif
fe8ab488 5263 error = nameiat(&nd, fd);
91447636 5264 if (error)
1c79356b 5265 return (error);
91447636 5266 fsec = KAUTH_FILESEC_NONE;
b0d623f7
A
5267
5268 statptr = (void *)&source;
cf7d32b8
A
5269
5270#if NAMEDRSRCFORK
b0d623f7
A
5271 /* Grab reference on the shadow stream file vnode to
5272 * force an inactive on release which will mark it
5273 * for recycle.
cf7d32b8 5274 */
fe8ab488
A
5275 if (vnode_isnamedstream(nd.ni_vp) &&
5276 (nd.ni_vp->v_parent != NULLVP) &&
5277 vnode_isshadow(nd.ni_vp)) {
cf7d32b8 5278 is_namedstream = 1;
fe8ab488 5279 vnode_ref(nd.ni_vp);
cf7d32b8
A
5280 }
5281#endif
5282
fe8ab488 5283 error = vn_stat(nd.ni_vp, statptr, (xsecurity != USER_ADDR_NULL ? &fsec : NULL), isstat64, ctx);
2d21ac55
A
5284
5285#if NAMEDRSRCFORK
cf7d32b8 5286 if (is_namedstream) {
fe8ab488 5287 vnode_rele(nd.ni_vp);
2d21ac55
A
5288 }
5289#endif
fe8ab488
A
5290 vnode_put(nd.ni_vp);
5291 nameidone(&nd);
91447636 5292
1c79356b
A
5293 if (error)
5294 return (error);
91447636 5295 /* Zap spare fields */
2d21ac55 5296 if (isstat64 != 0) {
b0d623f7
A
5297 source.sb64.st_lspare = 0;
5298 source.sb64.st_qspare[0] = 0LL;
5299 source.sb64.st_qspare[1] = 0LL;
2d21ac55 5300 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
b0d623f7
A
5301 munge_user64_stat64(&source.sb64, &dest.user64_sb64);
5302 my_size = sizeof(dest.user64_sb64);
5303 sbp = (caddr_t)&dest.user64_sb64;
2d21ac55 5304 } else {
b0d623f7
A
5305 munge_user32_stat64(&source.sb64, &dest.user32_sb64);
5306 my_size = sizeof(dest.user32_sb64);
5307 sbp = (caddr_t)&dest.user32_sb64;
2d21ac55
A
5308 }
5309 /*
5310 * Check if we raced (post lookup) against the last unlink of a file.
5311 */
b0d623f7
A
5312 if ((source.sb64.st_nlink == 0) && S_ISREG(source.sb64.st_mode)) {
5313 source.sb64.st_nlink = 1;
2d21ac55
A
5314 }
5315 } else {
b0d623f7
A
5316 source.sb.st_lspare = 0;
5317 source.sb.st_qspare[0] = 0LL;
5318 source.sb.st_qspare[1] = 0LL;
2d21ac55 5319 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
b0d623f7
A
5320 munge_user64_stat(&source.sb, &dest.user64_sb);
5321 my_size = sizeof(dest.user64_sb);
5322 sbp = (caddr_t)&dest.user64_sb;
2d21ac55 5323 } else {
b0d623f7
A
5324 munge_user32_stat(&source.sb, &dest.user32_sb);
5325 my_size = sizeof(dest.user32_sb);
5326 sbp = (caddr_t)&dest.user32_sb;
2d21ac55
A
5327 }
5328
5329 /*
5330 * Check if we raced (post lookup) against the last unlink of a file.
5331 */
b0d623f7
A
5332 if ((source.sb.st_nlink == 0) && S_ISREG(source.sb.st_mode)) {
5333 source.sb.st_nlink = 1;
2d21ac55 5334 }
91447636
A
5335 }
5336 if ((error = copyout(sbp, ub, my_size)) != 0)
5337 goto out;
5338
5339 /* caller wants extended security information? */
5340 if (xsecurity != USER_ADDR_NULL) {
5341
5342 /* did we get any? */
5343 if (fsec == KAUTH_FILESEC_NONE) {
5344 if (susize(xsecurity_size, 0) != 0) {
5345 error = EFAULT;
5346 goto out;
5347 }
5348 } else {
5349 /* find the user buffer size */
5350 xsecurity_bufsize = fusize(xsecurity_size);
5351
5352 /* copy out the actual data size */
5353 if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
5354 error = EFAULT;
5355 goto out;
5356 }
5357
5358 /* if the caller supplied enough room, copy out to it */
5359 if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec))
5360 error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
5361 }
5362 }
5363out:
5364 if (fsec != KAUTH_FILESEC_NONE)
5365 kauth_filesec_free(fsec);
1c79356b
A
5366 return (error);
5367}
5368
b0d623f7
A
5369/*
5370 * stat_extended: Get file status; with extended security (ACL).
5371 *
5372 * Parameters: p (ignored)
5373 * uap User argument descriptor (see below)
5374 * retval (ignored)
5375 *
5376 * Indirect: uap->path Path of file to get status from
5377 * uap->ub User buffer (holds file status info)
5378 * uap->xsecurity ACL to get (extended security)
5379 * uap->xsecurity_size Size of ACL
5380 *
5381 * Returns: 0 Success
5382 * !0 errno value
5383 *
5384 */
2d21ac55 5385int
fe8ab488
A
5386stat_extended(__unused proc_t p, struct stat_extended_args *uap,
5387 __unused int32_t *retval)
2d21ac55 5388{
fe8ab488
A
5389 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5390 uap->xsecurity, uap->xsecurity_size, 0, UIO_USERSPACE, AT_FDCWD,
5391 0));
1c79356b
A
5392}
5393
2d21ac55
A
5394/*
5395 * Returns: 0 Success
fe8ab488 5396 * fstatat_internal:??? [see fstatat_internal() in this file]
2d21ac55 5397 */
91447636 5398int
b0d623f7 5399stat(__unused proc_t p, struct stat_args *uap, __unused int32_t *retval)
1c79356b 5400{
fe8ab488
A
5401 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5402 0, 0, 0, UIO_USERSPACE, AT_FDCWD, 0));
91447636 5403}
1c79356b 5404
91447636 5405int
b0d623f7 5406stat64(__unused proc_t p, struct stat64_args *uap, __unused int32_t *retval)
91447636 5407{
fe8ab488
A
5408 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5409 0, 0, 1, UIO_USERSPACE, AT_FDCWD, 0));
1c79356b 5410}
1c79356b 5411
b0d623f7
A
5412/*
5413 * stat64_extended: Get file status; can handle large inode numbers; with extended security (ACL).
5414 *
5415 * Parameters: p (ignored)
5416 * uap User argument descriptor (see below)
5417 * retval (ignored)
5418 *
5419 * Indirect: uap->path Path of file to get status from
5420 * uap->ub User buffer (holds file status info)
5421 * uap->xsecurity ACL to get (extended security)
5422 * uap->xsecurity_size Size of ACL
5423 *
5424 * Returns: 0 Success
5425 * !0 errno value
5426 *
5427 */
2d21ac55 5428int
b0d623f7 5429stat64_extended(__unused proc_t p, struct stat64_extended_args *uap, __unused int32_t *retval)
2d21ac55 5430{
fe8ab488
A
5431 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5432 uap->xsecurity, uap->xsecurity_size, 1, UIO_USERSPACE, AT_FDCWD,
5433 0));
2d21ac55 5434}
91447636 5435
b0d623f7
A
5436/*
5437 * lstat_extended: Get file status; does not follow links; with extended security (ACL).
5438 *
5439 * Parameters: p (ignored)
5440 * uap User argument descriptor (see below)
5441 * retval (ignored)
5442 *
5443 * Indirect: uap->path Path of file to get status from
5444 * uap->ub User buffer (holds file status info)
5445 * uap->xsecurity ACL to get (extended security)
5446 * uap->xsecurity_size Size of ACL
5447 *
5448 * Returns: 0 Success
5449 * !0 errno value
5450 *
5451 */
2d21ac55 5452int
b0d623f7 5453lstat_extended(__unused proc_t p, struct lstat_extended_args *uap, __unused int32_t *retval)
2d21ac55 5454{
fe8ab488
A
5455 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5456 uap->xsecurity, uap->xsecurity_size, 0, UIO_USERSPACE, AT_FDCWD,
5457 AT_SYMLINK_NOFOLLOW));
91447636
A
5458}
5459
fe8ab488
A
5460/*
5461 * Get file status; this version does not follow links.
5462 */
91447636 5463int
b0d623f7 5464lstat(__unused proc_t p, struct lstat_args *uap, __unused int32_t *retval)
91447636 5465{
fe8ab488
A
5466 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5467 0, 0, 0, UIO_USERSPACE, AT_FDCWD, AT_SYMLINK_NOFOLLOW));
2d21ac55 5468}
b0d623f7 5469
2d21ac55 5470int
b0d623f7 5471lstat64(__unused proc_t p, struct lstat64_args *uap, __unused int32_t *retval)
2d21ac55 5472{
fe8ab488
A
5473 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5474 0, 0, 1, UIO_USERSPACE, AT_FDCWD, AT_SYMLINK_NOFOLLOW));
91447636
A
5475}
5476
b0d623f7
A
5477/*
5478 * lstat64_extended: Get file status; can handle large inode numbers; does not
5479 * follow links; with extended security (ACL).
5480 *
5481 * Parameters: p (ignored)
5482 * uap User argument descriptor (see below)
5483 * retval (ignored)
5484 *
5485 * Indirect: uap->path Path of file to get status from
5486 * uap->ub User buffer (holds file status info)
5487 * uap->xsecurity ACL to get (extended security)
5488 * uap->xsecurity_size Size of ACL
5489 *
5490 * Returns: 0 Success
5491 * !0 errno value
5492 *
5493 */
91447636 5494int
b0d623f7 5495lstat64_extended(__unused proc_t p, struct lstat64_extended_args *uap, __unused int32_t *retval)
91447636 5496{
fe8ab488
A
5497 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5498 uap->xsecurity, uap->xsecurity_size, 1, UIO_USERSPACE, AT_FDCWD,
5499 AT_SYMLINK_NOFOLLOW));
5500}
5501
5502int
5503fstatat(__unused proc_t p, struct fstatat_args *uap, __unused int32_t *retval)
5504{
5505 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
5506 return (EINVAL);
5507
5508 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5509 0, 0, 0, UIO_USERSPACE, uap->fd, uap->flag));
5510}
5511
5512int
5513fstatat64(__unused proc_t p, struct fstatat64_args *uap,
5514 __unused int32_t *retval)
5515{
5516 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
5517 return (EINVAL);
5518
5519 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5520 0, 0, 1, UIO_USERSPACE, uap->fd, uap->flag));
91447636
A
5521}
5522
1c79356b 5523/*
91447636 5524 * Get configurable pathname variables.
2d21ac55
A
5525 *
5526 * Returns: 0 Success
5527 * namei:???
5528 * vn_pathconf:???
5529 *
5530 * Notes: Global implementation constants are intended to be
5531 * implemented in this function directly; all other constants
5532 * are per-FS implementation, and therefore must be handled in
5533 * each respective FS, instead.
5534 *
5535 * XXX We implement some things globally right now that should actually be
5536 * XXX per-FS; we will need to deal with this at some point.
1c79356b 5537 */
1c79356b
A
5538/* ARGSUSED */
5539int
b0d623f7 5540pathconf(__unused proc_t p, struct pathconf_args *uap, int32_t *retval)
1c79356b 5541{
1c79356b
A
5542 int error;
5543 struct nameidata nd;
2d21ac55 5544 vfs_context_t ctx = vfs_context_current();
91447636 5545
6d2010ae 5546 NDINIT(&nd, LOOKUP, OP_PATHCONF, FOLLOW | AUDITVNPATH1,
2d21ac55 5547 UIO_USERSPACE, uap->path, ctx);
55e303ae
A
5548 error = namei(&nd);
5549 if (error)
1c79356b 5550 return (error);
1c79356b 5551
2d21ac55 5552 error = vn_pathconf(nd.ni_vp, uap->name, retval, ctx);
1c79356b 5553
91447636
A
5554 vnode_put(nd.ni_vp);
5555 nameidone(&nd);
1c79356b
A
5556 return (error);
5557}
5558
5559/*
5560 * Return target name of a symbolic link.
5561 */
1c79356b 5562/* ARGSUSED */
fe8ab488
A
5563static int
5564readlinkat_internal(vfs_context_t ctx, int fd, user_addr_t path,
5565 enum uio_seg seg, user_addr_t buf, size_t bufsize, enum uio_seg bufseg,
5566 int *retval)
1c79356b 5567{
2d21ac55 5568 vnode_t vp;
91447636 5569 uio_t auio;
1c79356b
A
5570 int error;
5571 struct nameidata nd;
91447636
A
5572 char uio_buf[ UIO_SIZEOF(1) ];
5573
fe8ab488
A
5574 NDINIT(&nd, LOOKUP, OP_READLINK, NOFOLLOW | AUDITVNPATH1,
5575 seg, path, ctx);
5576
5577 error = nameiat(&nd, fd);
55e303ae 5578 if (error)
1c79356b
A
5579 return (error);
5580 vp = nd.ni_vp;
91447636
A
5581
5582 nameidone(&nd);
5583
fe8ab488
A
5584 auio = uio_createwithbuffer(1, 0, bufseg, UIO_READ,
5585 &uio_buf[0], sizeof(uio_buf));
5586 uio_addiov(auio, buf, bufsize);
5587 if (vp->v_type != VLNK) {
1c79356b 5588 error = EINVAL;
fe8ab488 5589 } else {
2d21ac55 5590#if CONFIG_MACF
fe8ab488 5591 error = mac_vnode_check_readlink(ctx, vp);
2d21ac55
A
5592#endif
5593 if (error == 0)
fe8ab488
A
5594 error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA,
5595 ctx);
91447636 5596 if (error == 0)
2d21ac55 5597 error = VNOP_READLINK(vp, auio, ctx);
91447636
A
5598 }
5599 vnode_put(vp);
b0d623f7 5600
fe8ab488 5601 *retval = bufsize - (int)uio_resid(auio);
1c79356b
A
5602 return (error);
5603}
5604
fe8ab488
A
5605int
5606readlink(proc_t p, struct readlink_args *uap, int32_t *retval)
5607{
5608 enum uio_seg procseg;
5609
5610 procseg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
5611 return (readlinkat_internal(vfs_context_current(), AT_FDCWD,
5612 CAST_USER_ADDR_T(uap->path), procseg, CAST_USER_ADDR_T(uap->buf),
5613 uap->count, procseg, retval));
5614}
5615
5616int
5617readlinkat(proc_t p, struct readlinkat_args *uap, int32_t *retval)
5618{
5619 enum uio_seg procseg;
5620
5621 procseg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
5622 return (readlinkat_internal(vfs_context_current(), uap->fd, uap->path,
5623 procseg, uap->buf, uap->bufsize, procseg, retval));
5624}
5625
5626/*
5627 * Change file flags.
91447636
A
5628 */
5629static int
5630chflags1(vnode_t vp, int flags, vfs_context_t ctx)
5631{
5632 struct vnode_attr va;
5633 kauth_action_t action;
5634 int error;
5635
5636 VATTR_INIT(&va);
5637 VATTR_SET(&va, va_flags, flags);
5638
2d21ac55
A
5639#if CONFIG_MACF
5640 error = mac_vnode_check_setflags(ctx, vp, flags);
5641 if (error)
5642 goto out;
5643#endif
5644
91447636
A
5645 /* request authorisation, disregard immutability */
5646 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
5647 goto out;
5648 /*
5649 * Request that the auth layer disregard those file flags it's allowed to when
5650 * authorizing this operation; we need to do this in order to be able to
5651 * clear immutable flags.
5652 */
5653 if (action && ((error = vnode_authorize(vp, NULL, action | KAUTH_VNODE_NOIMMUTABLE, ctx)) != 0))
5654 goto out;
5655 error = vnode_setattr(vp, &va, ctx);
5656
2d21ac55
A
5657 if ((error == 0) && !VATTR_IS_SUPPORTED(&va, va_flags)) {
5658 error = ENOTSUP;
5659 }
91447636
A
5660out:
5661 vnode_put(vp);
5662 return(error);
5663}
5664
1c79356b
A
5665/*
5666 * Change flags of a file given a path name.
5667 */
1c79356b
A
5668/* ARGSUSED */
5669int
b0d623f7 5670chflags(__unused proc_t p, struct chflags_args *uap, __unused int32_t *retval)
1c79356b 5671{
2d21ac55
A
5672 vnode_t vp;
5673 vfs_context_t ctx = vfs_context_current();
1c79356b
A
5674 int error;
5675 struct nameidata nd;
5676
55e303ae 5677 AUDIT_ARG(fflags, uap->flags);
6d2010ae 5678 NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
2d21ac55 5679 UIO_USERSPACE, uap->path, ctx);
55e303ae
A
5680 error = namei(&nd);
5681 if (error)
1c79356b
A
5682 return (error);
5683 vp = nd.ni_vp;
91447636
A
5684 nameidone(&nd);
5685
2d21ac55 5686 error = chflags1(vp, uap->flags, ctx);
91447636
A
5687
5688 return(error);
1c79356b
A
5689}
5690
5691/*
5692 * Change flags of a file given a file descriptor.
5693 */
1c79356b
A
5694/* ARGSUSED */
5695int
b0d623f7 5696fchflags(__unused proc_t p, struct fchflags_args *uap, __unused int32_t *retval)
1c79356b 5697{
2d21ac55 5698 vnode_t vp;
1c79356b
A
5699 int error;
5700
55e303ae
A
5701 AUDIT_ARG(fd, uap->fd);
5702 AUDIT_ARG(fflags, uap->flags);
91447636 5703 if ( (error = file_vnode(uap->fd, &vp)) )
1c79356b 5704 return (error);
55e303ae 5705
91447636
A
5706 if ((error = vnode_getwithref(vp))) {
5707 file_drop(uap->fd);
5708 return(error);
5709 }
e5568f75
A
5710
5711 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5712
2d21ac55 5713 error = chflags1(vp, uap->flags, vfs_context_current());
91447636
A
5714
5715 file_drop(uap->fd);
5716 return (error);
5717}
5718
5719/*
5720 * Change security information on a filesystem object.
2d21ac55
A
5721 *
5722 * Returns: 0 Success
5723 * EPERM Operation not permitted
5724 * vnode_authattr:??? [anything vnode_authattr can return]
5725 * vnode_authorize:??? [anything vnode_authorize can return]
5726 * vnode_setattr:??? [anything vnode_setattr can return]
5727 *
5728 * Notes: If vnode_authattr or vnode_authorize return EACCES, it will be
5729 * translated to EPERM before being returned.
91447636
A
5730 */
5731static int
fe8ab488 5732chmod_vnode(vfs_context_t ctx, vnode_t vp, struct vnode_attr *vap)
91447636
A
5733{
5734 kauth_action_t action;
5735 int error;
5736
b0d623f7
A
5737 AUDIT_ARG(mode, vap->va_mode);
5738 /* XXX audit new args */
91447636 5739
2d21ac55
A
5740#if NAMEDSTREAMS
5741 /* chmod calls are not allowed for resource forks. */
5742 if (vp->v_flag & VISNAMEDSTREAM) {
5743 return (EPERM);
5744 }
5745#endif
5746
5747#if CONFIG_MACF
316670eb
A
5748 if (VATTR_IS_ACTIVE(vap, va_mode) &&
5749 (error = mac_vnode_check_setmode(ctx, vp, (mode_t)vap->va_mode)) != 0)
2d21ac55
A
5750 return (error);
5751#endif
5752
91447636
A
5753 /* make sure that the caller is allowed to set this security information */
5754 if (((error = vnode_authattr(vp, vap, &action, ctx)) != 0) ||
5755 ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
5756 if (error == EACCES)
5757 error = EPERM;
5758 return(error);
5759 }
5760
5761 error = vnode_setattr(vp, vap, ctx);
5762
1c79356b
A
5763 return (error);
5764}
5765
91447636 5766
1c79356b 5767/*
b0d623f7 5768 * Change mode of a file given a path name.
2d21ac55
A
5769 *
5770 * Returns: 0 Success
5771 * namei:??? [anything namei can return]
fe8ab488 5772 * chmod_vnode:??? [anything chmod_vnode can return]
1c79356b 5773 */
91447636 5774static int
fe8ab488
A
5775chmodat(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap,
5776 int fd, int flag, enum uio_seg segflg)
91447636
A
5777{
5778 struct nameidata nd;
fe8ab488 5779 int follow, error;
91447636 5780
fe8ab488
A
5781 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
5782 NDINIT(&nd, LOOKUP, OP_SETATTR, follow | AUDITVNPATH1,
5783 segflg, path, ctx);
5784 if ((error = nameiat(&nd, fd)))
91447636 5785 return (error);
fe8ab488 5786 error = chmod_vnode(ctx, nd.ni_vp, vap);
91447636
A
5787 vnode_put(nd.ni_vp);
5788 nameidone(&nd);
5789 return(error);
5790}
5791
0c530ab8 5792/*
b0d623f7
A
5793 * chmod_extended: Change the mode of a file given a path name; with extended
5794 * argument list (including extended security (ACL)).
0c530ab8
A
5795 *
5796 * Parameters: p Process requesting the open
5797 * uap User argument descriptor (see below)
5798 * retval (ignored)
5799 *
5800 * Indirect: uap->path Path to object (same as 'chmod')
5801 * uap->uid UID to set
5802 * uap->gid GID to set
5803 * uap->mode File mode to set (same as 'chmod')
5804 * uap->xsecurity ACL to set (or delete)
5805 *
5806 * Returns: 0 Success
5807 * !0 errno value
5808 *
5809 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
5810 *
5811 * XXX: We should enummerate the possible errno values here, and where
5812 * in the code they originated.
5813 */
1c79356b 5814int
b0d623f7 5815chmod_extended(__unused proc_t p, struct chmod_extended_args *uap, __unused int32_t *retval)
1c79356b 5816{
1c79356b 5817 int error;
91447636
A
5818 struct vnode_attr va;
5819 kauth_filesec_t xsecdst;
5820
b0d623f7
A
5821 AUDIT_ARG(owner, uap->uid, uap->gid);
5822
91447636
A
5823 VATTR_INIT(&va);
5824 if (uap->mode != -1)
5825 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
5826 if (uap->uid != KAUTH_UID_NONE)
5827 VATTR_SET(&va, va_uid, uap->uid);
5828 if (uap->gid != KAUTH_GID_NONE)
5829 VATTR_SET(&va, va_gid, uap->gid);
5830
5831 xsecdst = NULL;
5832 switch(uap->xsecurity) {
5833 /* explicit remove request */
5834 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
5835 VATTR_SET(&va, va_acl, NULL);
5836 break;
5837 /* not being set */
5838 case USER_ADDR_NULL:
5839 break;
5840 default:
5841 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
5842 return(error);
5843 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
5844 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va.va_acl->acl_entrycount);
5845 }
1c79356b 5846
fe8ab488
A
5847 error = chmodat(vfs_context_current(), uap->path, &va, AT_FDCWD, 0,
5848 UIO_USERSPACE);
55e303ae 5849
91447636
A
5850 if (xsecdst != NULL)
5851 kauth_filesec_free(xsecdst);
5852 return(error);
5853}
4a249263 5854
2d21ac55
A
5855/*
5856 * Returns: 0 Success
fe8ab488 5857 * chmodat:??? [anything chmodat can return]
2d21ac55 5858 */
fe8ab488
A
5859static int
5860fchmodat_internal(vfs_context_t ctx, user_addr_t path, int mode, int fd,
5861 int flag, enum uio_seg segflg)
91447636 5862{
91447636
A
5863 struct vnode_attr va;
5864
5865 VATTR_INIT(&va);
fe8ab488
A
5866 VATTR_SET(&va, va_mode, mode & ALLPERMS);
5867
5868 return (chmodat(ctx, path, &va, fd, flag, segflg));
5869}
5870
5871int
5872chmod(__unused proc_t p, struct chmod_args *uap, __unused int32_t *retval)
5873{
5874 return (fchmodat_internal(vfs_context_current(), uap->path, uap->mode,
5875 AT_FDCWD, 0, UIO_USERSPACE));
5876}
91447636 5877
fe8ab488
A
5878int
5879fchmodat(__unused proc_t p, struct fchmodat_args *uap, __unused int32_t *retval)
5880{
5881 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
5882 return (EINVAL);
5883
5884 return (fchmodat_internal(vfs_context_current(), uap->path, uap->mode,
5885 uap->fd, uap->flag, UIO_USERSPACE));
1c79356b
A
5886}
5887
5888/*
5889 * Change mode of a file given a file descriptor.
5890 */
91447636 5891static int
2d21ac55 5892fchmod1(__unused proc_t p, int fd, struct vnode_attr *vap)
1c79356b 5893{
2d21ac55 5894 vnode_t vp;
1c79356b 5895 int error;
55e303ae 5896
91447636 5897 AUDIT_ARG(fd, fd);
55e303ae 5898
91447636
A
5899 if ((error = file_vnode(fd, &vp)) != 0)
5900 return (error);
5901 if ((error = vnode_getwithref(vp)) != 0) {
5902 file_drop(fd);
5903 return(error);
5904 }
55e303ae
A
5905 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5906
fe8ab488 5907 error = chmod_vnode(vfs_context_current(), vp, vap);
91447636
A
5908 (void)vnode_put(vp);
5909 file_drop(fd);
55e303ae 5910
1c79356b
A
5911 return (error);
5912}
5913
b0d623f7
A
5914/*
5915 * fchmod_extended: Change mode of a file given a file descriptor; with
5916 * extended argument list (including extended security (ACL)).
5917 *
5918 * Parameters: p Process requesting to change file mode
5919 * uap User argument descriptor (see below)
5920 * retval (ignored)
5921 *
5922 * Indirect: uap->mode File mode to set (same as 'chmod')
5923 * uap->uid UID to set
5924 * uap->gid GID to set
5925 * uap->xsecurity ACL to set (or delete)
5926 * uap->fd File descriptor of file to change mode
5927 *
5928 * Returns: 0 Success
5929 * !0 errno value
5930 *
5931 */
91447636 5932int
b0d623f7 5933fchmod_extended(proc_t p, struct fchmod_extended_args *uap, __unused int32_t *retval)
91447636
A
5934{
5935 int error;
5936 struct vnode_attr va;
5937 kauth_filesec_t xsecdst;
5938
b0d623f7
A
5939 AUDIT_ARG(owner, uap->uid, uap->gid);
5940
91447636
A
5941 VATTR_INIT(&va);
5942 if (uap->mode != -1)
5943 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
5944 if (uap->uid != KAUTH_UID_NONE)
5945 VATTR_SET(&va, va_uid, uap->uid);
5946 if (uap->gid != KAUTH_GID_NONE)
5947 VATTR_SET(&va, va_gid, uap->gid);
5948
5949 xsecdst = NULL;
5950 switch(uap->xsecurity) {
5951 case USER_ADDR_NULL:
5952 VATTR_SET(&va, va_acl, NULL);
5953 break;
39236c6e
A
5954 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
5955 VATTR_SET(&va, va_acl, NULL);
5956 break;
5957 /* not being set */
91447636
A
5958 case CAST_USER_ADDR_T(-1):
5959 break;
5960 default:
5961 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
5962 return(error);
5963 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
5964 }
5965
5966 error = fchmod1(p, uap->fd, &va);
5967
5968
5969 switch(uap->xsecurity) {
5970 case USER_ADDR_NULL:
5971 case CAST_USER_ADDR_T(-1):
5972 break;
5973 default:
5974 if (xsecdst != NULL)
5975 kauth_filesec_free(xsecdst);
5976 }
5977 return(error);
5978}
5979
5980int
b0d623f7 5981fchmod(proc_t p, struct fchmod_args *uap, __unused int32_t *retval)
91447636
A
5982{
5983 struct vnode_attr va;
5984
5985 VATTR_INIT(&va);
5986 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
5987
5988 return(fchmod1(p, uap->fd, &va));
5989}
5990
5991
1c79356b
A
5992/*
5993 * Set ownership given a path name.
5994 */
1c79356b 5995/* ARGSUSED */
91447636 5996static int
fe8ab488
A
5997fchownat_internal(vfs_context_t ctx, int fd, user_addr_t path, uid_t uid,
5998 gid_t gid, int flag, enum uio_seg segflg)
1c79356b 5999{
2d21ac55 6000 vnode_t vp;
91447636 6001 struct vnode_attr va;
1c79356b
A
6002 int error;
6003 struct nameidata nd;
fe8ab488 6004 int follow;
91447636 6005 kauth_action_t action;
1c79356b 6006
fe8ab488 6007 AUDIT_ARG(owner, uid, gid);
55e303ae 6008
fe8ab488
A
6009 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
6010 NDINIT(&nd, LOOKUP, OP_SETATTR, follow | AUDITVNPATH1, segflg,
6011 path, ctx);
6012 error = nameiat(&nd, fd);
55e303ae 6013 if (error)
1c79356b
A
6014 return (error);
6015 vp = nd.ni_vp;
6016
91447636
A
6017 nameidone(&nd);
6018
91447636 6019 VATTR_INIT(&va);
fe8ab488
A
6020 if (uid != (uid_t)VNOVAL)
6021 VATTR_SET(&va, va_uid, uid);
6022 if (gid != (gid_t)VNOVAL)
6023 VATTR_SET(&va, va_gid, gid);
91447636 6024
2d21ac55 6025#if CONFIG_MACF
fe8ab488 6026 error = mac_vnode_check_setowner(ctx, vp, uid, gid);
2d21ac55
A
6027 if (error)
6028 goto out;
6029#endif
6030
91447636
A
6031 /* preflight and authorize attribute changes */
6032 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
6033 goto out;
6034 if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
6035 goto out;
6036 error = vnode_setattr(vp, &va, ctx);
6037
6038out:
6039 /*
6040 * EACCES is only allowed from namei(); permissions failure should
6041 * return EPERM, so we need to translate the error code.
6042 */
6043 if (error == EACCES)
6044 error = EPERM;
fe8ab488 6045
91447636 6046 vnode_put(vp);
1c79356b
A
6047 return (error);
6048}
6049
91447636 6050int
fe8ab488 6051chown(__unused proc_t p, struct chown_args *uap, __unused int32_t *retval)
91447636 6052{
fe8ab488
A
6053 return (fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path,
6054 uap->uid, uap->gid, 0, UIO_USERSPACE));
91447636
A
6055}
6056
6057int
fe8ab488 6058lchown(__unused proc_t p, struct lchown_args *uap, __unused int32_t *retval)
91447636 6059{
fe8ab488
A
6060 return (fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path,
6061 uap->owner, uap->group, AT_SYMLINK_NOFOLLOW, UIO_USERSPACE));
6062}
6063
6064int
6065fchownat(__unused proc_t p, struct fchownat_args *uap, __unused int32_t *retval)
6066{
6067 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
6068 return (EINVAL);
6069
6070 return (fchownat_internal(vfs_context_current(), uap->fd, uap->path,
6071 uap->uid, uap->gid, uap->flag, UIO_USERSPACE));
91447636
A
6072}
6073
1c79356b
A
6074/*
6075 * Set ownership given a file descriptor.
6076 */
1c79356b
A
6077/* ARGSUSED */
6078int
b0d623f7 6079fchown(__unused proc_t p, struct fchown_args *uap, __unused int32_t *retval)
1c79356b 6080{
91447636 6081 struct vnode_attr va;
2d21ac55
A
6082 vfs_context_t ctx = vfs_context_current();
6083 vnode_t vp;
1c79356b 6084 int error;
91447636 6085 kauth_action_t action;
1c79356b 6086
55e303ae
A
6087 AUDIT_ARG(owner, uap->uid, uap->gid);
6088 AUDIT_ARG(fd, uap->fd);
6089
91447636 6090 if ( (error = file_vnode(uap->fd, &vp)) )
1c79356b 6091 return (error);
55e303ae 6092
91447636
A
6093 if ( (error = vnode_getwithref(vp)) ) {
6094 file_drop(uap->fd);
6095 return(error);
6096 }
55e303ae
A
6097 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6098
91447636
A
6099 VATTR_INIT(&va);
6100 if (uap->uid != VNOVAL)
6101 VATTR_SET(&va, va_uid, uap->uid);
6102 if (uap->gid != VNOVAL)
6103 VATTR_SET(&va, va_gid, uap->gid);
6104
2d21ac55
A
6105#if NAMEDSTREAMS
6106 /* chown calls are not allowed for resource forks. */
6107 if (vp->v_flag & VISNAMEDSTREAM) {
6108 error = EPERM;
6109 goto out;
6110 }
6111#endif
6112
6113#if CONFIG_MACF
6114 error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
6115 if (error)
6116 goto out;
6117#endif
91447636
A
6118
6119 /* preflight and authorize attribute changes */
2d21ac55 6120 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
91447636 6121 goto out;
2d21ac55 6122 if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
91447636
A
6123 if (error == EACCES)
6124 error = EPERM;
6125 goto out;
6126 }
2d21ac55 6127 error = vnode_setattr(vp, &va, ctx);
4a249263 6128
91447636
A
6129out:
6130 (void)vnode_put(vp);
6131 file_drop(uap->fd);
1c79356b
A
6132 return (error);
6133}
6134
9bccf70c 6135static int
2d21ac55 6136getutimes(user_addr_t usrtvp, struct timespec *tsp)
9bccf70c 6137{
9bccf70c
A
6138 int error;
6139
91447636
A
6140 if (usrtvp == USER_ADDR_NULL) {
6141 struct timeval old_tv;
6142 /* XXX Y2038 bug because of microtime argument */
6143 microtime(&old_tv);
6144 TIMEVAL_TO_TIMESPEC(&old_tv, &tsp[0]);
9bccf70c
A
6145 tsp[1] = tsp[0];
6146 } else {
91447636 6147 if (IS_64BIT_PROCESS(current_proc())) {
b0d623f7 6148 struct user64_timeval tv[2];
91447636 6149 error = copyin(usrtvp, (void *)tv, sizeof(tv));
b0d623f7
A
6150 if (error)
6151 return (error);
6152 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
6153 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
91447636 6154 } else {
b0d623f7
A
6155 struct user32_timeval tv[2];
6156 error = copyin(usrtvp, (void *)tv, sizeof(tv));
6157 if (error)
6158 return (error);
6159 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
6160 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
91447636 6161 }
9bccf70c
A
6162 }
6163 return 0;
6164}
6165
6166static int
2d21ac55 6167setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts,
91447636 6168 int nullflag)
9bccf70c
A
6169{
6170 int error;
91447636
A
6171 struct vnode_attr va;
6172 kauth_action_t action;
e5568f75
A
6173
6174 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6175
91447636
A
6176 VATTR_INIT(&va);
6177 VATTR_SET(&va, va_access_time, ts[0]);
6178 VATTR_SET(&va, va_modify_time, ts[1]);
9bccf70c 6179 if (nullflag)
91447636
A
6180 va.va_vaflags |= VA_UTIMES_NULL;
6181
2d21ac55
A
6182#if NAMEDSTREAMS
6183 /* utimes calls are not allowed for resource forks. */
6184 if (vp->v_flag & VISNAMEDSTREAM) {
6185 error = EPERM;
6186 goto out;
6187 }
6188#endif
6189
6190#if CONFIG_MACF
6191 error = mac_vnode_check_setutimes(ctx, vp, ts[0], ts[1]);
6192 if (error)
6193 goto out;
6194#endif
6195 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
6196 if (!nullflag && error == EACCES)
6197 error = EPERM;
91447636 6198 goto out;
2d21ac55
A
6199 }
6200
91447636 6201 /* since we may not need to auth anything, check here */
2d21ac55
A
6202 if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
6203 if (!nullflag && error == EACCES)
6204 error = EPERM;
91447636 6205 goto out;
2d21ac55 6206 }
91447636 6207 error = vnode_setattr(vp, &va, ctx);
4a249263 6208
9bccf70c
A
6209out:
6210 return error;
6211}
6212
1c79356b
A
6213/*
6214 * Set the access and modification times of a file.
6215 */
1c79356b
A
6216/* ARGSUSED */
6217int
b0d623f7 6218utimes(__unused proc_t p, struct utimes_args *uap, __unused int32_t *retval)
1c79356b 6219{
9bccf70c 6220 struct timespec ts[2];
91447636 6221 user_addr_t usrtvp;
1c79356b
A
6222 int error;
6223 struct nameidata nd;
2d21ac55 6224 vfs_context_t ctx = vfs_context_current();
1c79356b 6225
2d21ac55
A
6226 /*
6227 * AUDIT: Needed to change the order of operations to do the
55e303ae
A
6228 * name lookup first because auditing wants the path.
6229 */
6d2010ae 6230 NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
2d21ac55 6231 UIO_USERSPACE, uap->path, ctx);
55e303ae
A
6232 error = namei(&nd);
6233 if (error)
9bccf70c 6234 return (error);
91447636 6235 nameidone(&nd);
55e303ae 6236
91447636
A
6237 /*
6238 * Fetch the user-supplied time. If usrtvp is USER_ADDR_NULL, we fetch
6239 * the current time instead.
6240 */
55e303ae 6241 usrtvp = uap->tptr;
91447636
A
6242 if ((error = getutimes(usrtvp, ts)) != 0)
6243 goto out;
6244
2d21ac55 6245 error = setutimes(ctx, nd.ni_vp, ts, usrtvp == USER_ADDR_NULL);
91447636
A
6246
6247out:
6248 vnode_put(nd.ni_vp);
1c79356b
A
6249 return (error);
6250}
6251
9bccf70c
A
6252/*
6253 * Set the access and modification times of a file.
6254 */
9bccf70c
A
6255/* ARGSUSED */
6256int
b0d623f7 6257futimes(__unused proc_t p, struct futimes_args *uap, __unused int32_t *retval)
9bccf70c
A
6258{
6259 struct timespec ts[2];
2d21ac55 6260 vnode_t vp;
91447636 6261 user_addr_t usrtvp;
9bccf70c
A
6262 int error;
6263
55e303ae 6264 AUDIT_ARG(fd, uap->fd);
9bccf70c
A
6265 usrtvp = uap->tptr;
6266 if ((error = getutimes(usrtvp, ts)) != 0)
6267 return (error);
91447636 6268 if ((error = file_vnode(uap->fd, &vp)) != 0)
9bccf70c 6269 return (error);
91447636
A
6270 if((error = vnode_getwithref(vp))) {
6271 file_drop(uap->fd);
6272 return(error);
6273 }
55e303ae 6274
2d21ac55 6275 error = setutimes(vfs_context_current(), vp, ts, usrtvp == 0);
91447636
A
6276 vnode_put(vp);
6277 file_drop(uap->fd);
6278 return(error);
9bccf70c
A
6279}
6280
1c79356b
A
6281/*
6282 * Truncate a file given its path name.
6283 */
1c79356b
A
6284/* ARGSUSED */
6285int
b0d623f7 6286truncate(__unused proc_t p, struct truncate_args *uap, __unused int32_t *retval)
1c79356b 6287{
2d21ac55 6288 vnode_t vp;
91447636 6289 struct vnode_attr va;
2d21ac55 6290 vfs_context_t ctx = vfs_context_current();
1c79356b
A
6291 int error;
6292 struct nameidata nd;
91447636
A
6293 kauth_action_t action;
6294
0b4e3aa0
A
6295 if (uap->length < 0)
6296 return(EINVAL);
6d2010ae 6297 NDINIT(&nd, LOOKUP, OP_TRUNCATE, FOLLOW | AUDITVNPATH1,
2d21ac55 6298 UIO_USERSPACE, uap->path, ctx);
91447636 6299 if ((error = namei(&nd)))
1c79356b
A
6300 return (error);
6301 vp = nd.ni_vp;
91447636
A
6302
6303 nameidone(&nd);
6304
6305 VATTR_INIT(&va);
6306 VATTR_SET(&va, va_data_size, uap->length);
2d21ac55
A
6307
6308#if CONFIG_MACF
6309 error = mac_vnode_check_truncate(ctx, NOCRED, vp);
6310 if (error)
6311 goto out;
6312#endif
6313
6314 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
91447636 6315 goto out;
2d21ac55 6316 if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
91447636 6317 goto out;
2d21ac55 6318 error = vnode_setattr(vp, &va, ctx);
91447636
A
6319out:
6320 vnode_put(vp);
1c79356b
A
6321 return (error);
6322}
6323
6324/*
6325 * Truncate a file given a file descriptor.
6326 */
1c79356b
A
6327/* ARGSUSED */
6328int
b0d623f7 6329ftruncate(proc_t p, struct ftruncate_args *uap, int32_t *retval)
1c79356b 6330{
2d21ac55 6331 vfs_context_t ctx = vfs_context_current();
91447636 6332 struct vnode_attr va;
2d21ac55 6333 vnode_t vp;
91447636
A
6334 struct fileproc *fp;
6335 int error ;
6336 int fd = uap->fd;
1c79356b 6337
55e303ae 6338 AUDIT_ARG(fd, uap->fd);
0b4e3aa0
A
6339 if (uap->length < 0)
6340 return(EINVAL);
1c79356b 6341
91447636
A
6342 if ( (error = fp_lookup(p,fd,&fp,0)) ) {
6343 return(error);
6344 }
1c79356b 6345
39236c6e
A
6346 switch (FILEGLOB_DTYPE(fp->f_fglob)) {
6347 case DTYPE_PSXSHM:
91447636
A
6348 error = pshm_truncate(p, fp, uap->fd, uap->length, retval);
6349 goto out;
39236c6e
A
6350 case DTYPE_VNODE:
6351 break;
6352 default:
91447636
A
6353 error = EINVAL;
6354 goto out;
1c79356b 6355 }
1c79356b 6356
2d21ac55 6357 vp = (vnode_t)fp->f_fglob->fg_data;
e5568f75 6358
91447636
A
6359 if ((fp->f_fglob->fg_flag & FWRITE) == 0) {
6360 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
6361 error = EINVAL;
6362 goto out;
1c79356b 6363 }
1c79356b 6364
91447636
A
6365 if ((error = vnode_getwithref(vp)) != 0) {
6366 goto out;
6367 }
1c79356b 6368
91447636 6369 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
1c79356b 6370
2d21ac55
A
6371#if CONFIG_MACF
6372 error = mac_vnode_check_truncate(ctx,
6373 fp->f_fglob->fg_cred, vp);
6374 if (error) {
6375 (void)vnode_put(vp);
6376 goto out;
6377 }
6378#endif
91447636
A
6379 VATTR_INIT(&va);
6380 VATTR_SET(&va, va_data_size, uap->length);
2d21ac55 6381 error = vnode_setattr(vp, &va, ctx);
91447636
A
6382 (void)vnode_put(vp);
6383out:
6384 file_drop(fd);
6385 return (error);
1c79356b 6386}
91447636 6387
1c79356b
A
6388
6389/*
b0d623f7 6390 * Sync an open file with synchronized I/O _file_ integrity completion
1c79356b 6391 */
1c79356b
A
6392/* ARGSUSED */
6393int
b0d623f7 6394fsync(proc_t p, struct fsync_args *uap, __unused int32_t *retval)
1c79356b 6395{
2d21ac55 6396 __pthread_testcancel(1);
b0d623f7
A
6397 return(fsync_common(p, uap, MNT_WAIT));
6398}
6399
6400
6401/*
6402 * Sync an open file with synchronized I/O _file_ integrity completion
6403 *
6404 * Notes: This is a legacy support function that does not test for
6405 * thread cancellation points.
6406 */
6407/* ARGSUSED */
6408int
6409fsync_nocancel(proc_t p, struct fsync_nocancel_args *uap, __unused int32_t *retval)
6410{
6411 return(fsync_common(p, (struct fsync_args *)uap, MNT_WAIT));
2d21ac55
A
6412}
6413
b0d623f7
A
6414
6415/*
6416 * Sync an open file with synchronized I/O _data_ integrity completion
6417 */
6418/* ARGSUSED */
2d21ac55 6419int
b0d623f7
A
6420fdatasync(proc_t p, struct fdatasync_args *uap, __unused int32_t *retval)
6421{
6422 __pthread_testcancel(1);
6423 return(fsync_common(p, (struct fsync_args *)uap, MNT_DWAIT));
6424}
6425
6426
6427/*
6428 * fsync_common
6429 *
6430 * Common fsync code to support both synchronized I/O file integrity completion
6431 * (normal fsync) and synchronized I/O data integrity completion (fdatasync).
6432 *
6433 * If 'flags' is MNT_DWAIT, the caller is requesting data integrity, which
6434 * will only guarantee that the file data contents are retrievable. If
6435 * 'flags' is MNT_WAIT, the caller is rewuesting file integrity, which also
6436 * includes additional metadata unnecessary for retrieving the file data
6437 * contents, such as atime, mtime, ctime, etc., also be committed to stable
6438 * storage.
6439 *
6440 * Parameters: p The process
6441 * uap->fd The descriptor to synchronize
6442 * flags The data integrity flags
6443 *
6444 * Returns: int Success
6445 * fp_getfvp:EBADF Bad file descriptor
6446 * fp_getfvp:ENOTSUP fd does not refer to a vnode
6447 * VNOP_FSYNC:??? unspecified
6448 *
6449 * Notes: We use struct fsync_args because it is a short name, and all
6450 * caller argument structures are otherwise identical.
6451 */
6452static int
6453fsync_common(proc_t p, struct fsync_args *uap, int flags)
2d21ac55
A
6454{
6455 vnode_t vp;
91447636 6456 struct fileproc *fp;
2d21ac55 6457 vfs_context_t ctx = vfs_context_current();
1c79356b
A
6458 int error;
6459
b0d623f7
A
6460 AUDIT_ARG(fd, uap->fd);
6461
91447636 6462 if ( (error = fp_getfvp(p, uap->fd, &fp, &vp)) )
1c79356b 6463 return (error);
91447636
A
6464 if ( (error = vnode_getwithref(vp)) ) {
6465 file_drop(uap->fd);
6466 return(error);
6467 }
91447636 6468
b0d623f7
A
6469 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6470
6471 error = VNOP_FSYNC(vp, flags, ctx);
2d21ac55
A
6472
6473#if NAMEDRSRCFORK
6474 /* Sync resource fork shadow file if necessary. */
6475 if ((error == 0) &&
6476 (vp->v_flag & VISNAMEDSTREAM) &&
6477 (vp->v_parent != NULLVP) &&
b0d623f7 6478 vnode_isshadow(vp) &&
2d21ac55
A
6479 (fp->f_flags & FP_WRITTEN)) {
6480 (void) vnode_flushnamedstream(vp->v_parent, vp, ctx);
6481 }
6482#endif
91447636
A
6483
6484 (void)vnode_put(vp);
6485 file_drop(uap->fd);
1c79356b
A
6486 return (error);
6487}
6488
6489/*
6490 * Duplicate files. Source must be a file, target must be a file or
6491 * must not exist.
91447636
A
6492 *
6493 * XXX Copyfile authorisation checking is woefully inadequate, and will not
6494 * perform inheritance correctly.
1c79356b 6495 */
1c79356b
A
6496/* ARGSUSED */
6497int
b0d623f7 6498copyfile(__unused proc_t p, struct copyfile_args *uap, __unused int32_t *retval)
1c79356b 6499{
91447636 6500 vnode_t tvp, fvp, tdvp, sdvp;
1c79356b
A
6501 struct nameidata fromnd, tond;
6502 int error;
2d21ac55 6503 vfs_context_t ctx = vfs_context_current();
55e303ae
A
6504
6505 /* Check that the flags are valid. */
1c79356b
A
6506
6507 if (uap->flags & ~CPF_MASK) {
55e303ae
A
6508 return(EINVAL);
6509 }
1c79356b 6510
4bd07ac2 6511 NDINIT(&fromnd, LOOKUP, OP_COPYFILE, AUDITVNPATH1,
2d21ac55 6512 UIO_USERSPACE, uap->from, ctx);
91447636 6513 if ((error = namei(&fromnd)))
1c79356b
A
6514 return (error);
6515 fvp = fromnd.ni_vp;
6516
6d2010ae
A
6517 NDINIT(&tond, CREATE, OP_LINK,
6518 LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNPATH2 | CN_NBMOUNTLOOK,
6519 UIO_USERSPACE, uap->to, ctx);
91447636 6520 if ((error = namei(&tond))) {
1c79356b
A
6521 goto out1;
6522 }
6523 tdvp = tond.ni_dvp;
6524 tvp = tond.ni_vp;
91447636 6525
1c79356b
A
6526 if (tvp != NULL) {
6527 if (!(uap->flags & CPF_OVERWRITE)) {
6528 error = EEXIST;
6529 goto out;
6530 }
6531 }
1c79356b
A
6532 if (fvp->v_type == VDIR || (tvp && tvp->v_type == VDIR)) {
6533 error = EISDIR;
6534 goto out;
6535 }
6536
2d21ac55 6537 if ((error = vnode_authorize(tdvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
1c79356b
A
6538 goto out;
6539
6540 if (fvp == tdvp)
6541 error = EINVAL;
6542 /*
6543 * If source is the same as the destination (that is the
6544 * same inode number) then there is nothing to do.
6545 * (fixed to have POSIX semantics - CSM 3/2/98)
6546 */
6547 if (fvp == tvp)
6548 error = -1;
91447636 6549 if (!error)
2d21ac55 6550 error = VNOP_COPYFILE(fvp, tdvp, tvp, &tond.ni_cnd, uap->mode, uap->flags, ctx);
1c79356b 6551out:
91447636
A
6552 sdvp = tond.ni_startdir;
6553 /*
6554 * nameidone has to happen before we vnode_put(tdvp)
6555 * since it may need to release the fs_nodelock on the tdvp
6556 */
6557 nameidone(&tond);
6558
6559 if (tvp)
6560 vnode_put(tvp);
6561 vnode_put(tdvp);
6562 vnode_put(sdvp);
1c79356b 6563out1:
91447636
A
6564 vnode_put(fvp);
6565
91447636
A
6566 nameidone(&fromnd);
6567
1c79356b
A
6568 if (error == -1)
6569 return (0);
6570 return (error);
6571}
6572
91447636 6573
1c79356b
A
6574/*
6575 * Rename files. Source and destination must either both be directories,
6576 * or both not be directories. If target is a directory, it must be empty.
6577 */
1c79356b 6578/* ARGSUSED */
fe8ab488
A
6579static int
6580renameat_internal(vfs_context_t ctx, int fromfd, user_addr_t from,
6581 int tofd, user_addr_t to, int segflg, vfs_rename_flags_t flags)
1c79356b 6582{
91447636
A
6583 vnode_t tvp, tdvp;
6584 vnode_t fvp, fdvp;
316670eb 6585 struct nameidata *fromnd, *tond;
1c79356b 6586 int error;
593a1d5f 6587 int do_retry;
c18c124e 6588 int retry_count;
1c79356b 6589 int mntrename;
2d21ac55 6590 int need_event;
6d2010ae 6591 const char *oname = NULL;
2d21ac55 6592 char *from_name = NULL, *to_name = NULL;
b0d623f7 6593 int from_len=0, to_len=0;
91447636
A
6594 int holding_mntlock;
6595 mount_t locked_mp = NULL;
6d2010ae 6596 vnode_t oparent = NULLVP;
b0d623f7 6597#if CONFIG_FSE
91447636 6598 fse_info from_finfo, to_finfo;
b0d623f7
A
6599#endif
6600 int from_truncated=0, to_truncated;
6d2010ae
A
6601 int batched = 0;
6602 struct vnode_attr *fvap, *tvap;
6603 int continuing = 0;
316670eb
A
6604 /* carving out a chunk for structs that are too big to be on stack. */
6605 struct {
6606 struct nameidata from_node, to_node;
6607 struct vnode_attr fv_attr, tv_attr;
6608 } * __rename_data;
6609 MALLOC(__rename_data, void *, sizeof(*__rename_data), M_TEMP, M_WAITOK);
6610 fromnd = &__rename_data->from_node;
6611 tond = &__rename_data->to_node;
6612
91447636 6613 holding_mntlock = 0;
316670eb 6614 do_retry = 0;
c18c124e 6615 retry_count = 0;
91447636
A
6616retry:
6617 fvp = tvp = NULL;
6618 fdvp = tdvp = NULL;
6d2010ae 6619 fvap = tvap = NULL;
1c79356b
A
6620 mntrename = FALSE;
6621
316670eb 6622 NDINIT(fromnd, DELETE, OP_UNLINK, WANTPARENT | AUDITVNPATH1,
fe8ab488 6623 segflg, from, ctx);
316670eb 6624 fromnd->ni_flag = NAMEI_COMPOUNDRENAME;
fe8ab488 6625
316670eb 6626 NDINIT(tond, RENAME, OP_RENAME, WANTPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK,
fe8ab488 6627 segflg, to, ctx);
316670eb 6628 tond->ni_flag = NAMEI_COMPOUNDRENAME;
fe8ab488 6629
6d2010ae 6630continue_lookup:
316670eb 6631 if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
fe8ab488 6632 if ( (error = nameiat(fromnd, fromfd)) )
6d2010ae 6633 goto out1;
316670eb
A
6634 fdvp = fromnd->ni_dvp;
6635 fvp = fromnd->ni_vp;
1c79356b 6636
6d2010ae 6637 if (fvp && fvp->v_type == VDIR)
316670eb 6638 tond->ni_cnd.cn_flags |= WILLBEDIR;
6d2010ae 6639 }
2d21ac55 6640
316670eb 6641 if ((tond->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
fe8ab488 6642 if ( (error = nameiat(tond, tofd)) ) {
6d2010ae
A
6643 /*
6644 * Translate error code for rename("dir1", "dir2/.").
6645 */
fe8ab488 6646 if (error == EISDIR && fvp->v_type == VDIR)
6d2010ae
A
6647 error = EINVAL;
6648 goto out1;
6649 }
316670eb
A
6650 tdvp = tond->ni_dvp;
6651 tvp = tond->ni_vp;
fe8ab488 6652 }
91447636 6653
6d2010ae
A
6654 batched = vnode_compound_rename_available(fdvp);
6655 if (!fvp) {
fe8ab488 6656 /*
6d2010ae
A
6657 * Claim: this check will never reject a valid rename.
6658 * For success, either fvp must be on the same mount as tdvp, or fvp must sit atop a vnode on the same mount as tdvp.
6659 * Suppose fdvp and tdvp are not on the same mount.
fe8ab488 6660 * If fvp is on the same mount as tdvp, then fvp is not on the same mount as fdvp, so fvp is the root of its filesystem. If fvp is the root,
6d2010ae
A
6661 * then you can't move it to within another dir on the same mountpoint.
6662 * If fvp sits atop a vnode on the same mount as fdvp, then that vnode must be part of the same mount as fdvp, which is a contradiction.
6663 *
6664 * If this check passes, then we are safe to pass these vnodes to the same FS.
91447636 6665 */
6d2010ae
A
6666 if (fdvp->v_mount != tdvp->v_mount) {
6667 error = EXDEV;
6668 goto out1;
6669 }
6670 goto skipped_lookup;
1c79356b 6671 }
2d21ac55 6672
6d2010ae 6673 if (!batched) {
316670eb 6674 error = vn_authorize_rename(fdvp, fvp, &fromnd->ni_cnd, tdvp, tvp, &tond->ni_cnd, ctx, NULL);
6d2010ae 6675 if (error) {
3e170ce0
A
6676 if (error == ENOENT) {
6677 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
6678 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
6679 /*
6680 * We encountered a race where after doing the namei, tvp stops
6681 * being valid. If so, simply re-drive the rename call from the
6682 * top.
6683 */
6684 do_retry = 1;
6685 retry_count += 1;
6686 }
6d2010ae 6687 }
91447636 6688 goto out1;
1c79356b
A
6689 }
6690 }
6d2010ae 6691
2d21ac55
A
6692 /*
6693 * If the source and destination are the same (i.e. they're
6694 * links to the same vnode) and the target file system is
6695 * case sensitive, then there is nothing to do.
6d2010ae
A
6696 *
6697 * XXX Come back to this.
2d21ac55
A
6698 */
6699 if (fvp == tvp) {
6700 int pathconf_val;
fe8ab488 6701
2d21ac55
A
6702 /*
6703 * Note: if _PC_CASE_SENSITIVE selector isn't supported,
6704 * then assume that this file system is case sensitive.
6705 */
6706 if (VNOP_PATHCONF(fvp, _PC_CASE_SENSITIVE, &pathconf_val, ctx) != 0 ||
6707 pathconf_val != 0) {
6708 goto out1;
fe8ab488 6709 }
2d21ac55 6710 }
91447636 6711
1c79356b
A
6712 /*
6713 * Allow the renaming of mount points.
6714 * - target must not exist
6715 * - target must reside in the same directory as source
6716 * - union mounts cannot be renamed
6717 * - "/" cannot be renamed
6d2010ae
A
6718 *
6719 * XXX Handle this in VFS after a continued lookup (if we missed
6720 * in the cache to start off)
1c79356b 6721 */
91447636 6722 if ((fvp->v_flag & VROOT) &&
1c79356b
A
6723 (fvp->v_type == VDIR) &&
6724 (tvp == NULL) &&
6725 (fvp->v_mountedhere == NULL) &&
91447636 6726 (fdvp == tdvp) &&
1c79356b
A
6727 ((fvp->v_mount->mnt_flag & (MNT_UNION | MNT_ROOTFS)) == 0) &&
6728 (fvp->v_mount->mnt_vnodecovered != NULLVP)) {
2d21ac55 6729 vnode_t coveredvp;
fe8ab488 6730
1c79356b 6731 /* switch fvp to the covered vnode */
91447636
A
6732 coveredvp = fvp->v_mount->mnt_vnodecovered;
6733 if ( (vnode_getwithref(coveredvp)) ) {
6734 error = ENOENT;
6735 goto out1;
6736 }
6737 vnode_put(fvp);
6738
6739 fvp = coveredvp;
1c79356b
A
6740 mntrename = TRUE;
6741 }
91447636
A
6742 /*
6743 * Check for cross-device rename.
6744 */
6745 if ((fvp->v_mount != tdvp->v_mount) ||
6746 (tvp && (fvp->v_mount != tvp->v_mount))) {
6747 error = EXDEV;
6748 goto out1;
6749 }
55e303ae 6750
91447636
A
6751 /*
6752 * If source is the same as the destination (that is the
6753 * same inode number) then there is nothing to do...
6754 * EXCEPT if the underlying file system supports case
6755 * insensitivity and is case preserving. In this case
6756 * the file system needs to handle the special case of
6757 * getting the same vnode as target (fvp) and source (tvp).
6758 *
6759 * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
6760 * and _PC_CASE_PRESERVING can have this exception, and they need to
6761 * handle the special case of getting the same vnode as target and
6762 * source. NOTE: Then the target is unlocked going into vnop_rename,
6763 * so not to cause locking problems. There is a single reference on tvp.
6764 *
fe8ab488 6765 * NOTE - that fvp == tvp also occurs if they are hard linked and
b0d623f7
A
6766 * that correct behaviour then is just to return success without doing
6767 * anything.
6d2010ae
A
6768 *
6769 * XXX filesystem should take care of this itself, perhaps...
91447636
A
6770 */
6771 if (fvp == tvp && fdvp == tdvp) {
316670eb
A
6772 if (fromnd->ni_cnd.cn_namelen == tond->ni_cnd.cn_namelen &&
6773 !bcmp(fromnd->ni_cnd.cn_nameptr, tond->ni_cnd.cn_nameptr,
6774 fromnd->ni_cnd.cn_namelen)) {
91447636 6775 goto out1;
55e303ae 6776 }
91447636 6777 }
55e303ae 6778
91447636
A
6779 if (holding_mntlock && fvp->v_mount != locked_mp) {
6780 /*
6781 * we're holding a reference and lock
6782 * on locked_mp, but it no longer matches
6783 * what we want to do... so drop our hold
6784 */
6785 mount_unlock_renames(locked_mp);
6786 mount_drop(locked_mp, 0);
6787 holding_mntlock = 0;
6788 }
6789 if (tdvp != fdvp && fvp->v_type == VDIR) {
6790 /*
6791 * serialize renames that re-shape
6792 * the tree... if holding_mntlock is
6793 * set, then we're ready to go...
6794 * otherwise we
6795 * first need to drop the iocounts
6796 * we picked up, second take the
6797 * lock to serialize the access,
6798 * then finally start the lookup
6799 * process over with the lock held
6800 */
6801 if (!holding_mntlock) {
6802 /*
6803 * need to grab a reference on
6804 * the mount point before we
6805 * drop all the iocounts... once
6806 * the iocounts are gone, the mount
6807 * could follow
6808 */
6809 locked_mp = fvp->v_mount;
6810 mount_ref(locked_mp, 0);
55e303ae 6811
91447636
A
6812 /*
6813 * nameidone has to happen before we vnode_put(tvp)
6814 * since it may need to release the fs_nodelock on the tvp
6815 */
316670eb 6816 nameidone(tond);
55e303ae 6817
91447636
A
6818 if (tvp)
6819 vnode_put(tvp);
6820 vnode_put(tdvp);
6821
6822 /*
6823 * nameidone has to happen before we vnode_put(fdvp)
6824 * since it may need to release the fs_nodelock on the fvp
6825 */
316670eb 6826 nameidone(fromnd);
55e303ae 6827
91447636
A
6828 vnode_put(fvp);
6829 vnode_put(fdvp);
6830
6831 mount_lock_renames(locked_mp);
6832 holding_mntlock = 1;
6833
6834 goto retry;
55e303ae 6835 }
91447636
A
6836 } else {
6837 /*
6838 * when we dropped the iocounts to take
fe8ab488 6839 * the lock, we allowed the identity of
91447636
A
6840 * the various vnodes to change... if they did,
6841 * we may no longer be dealing with a rename
6842 * that reshapes the tree... once we're holding
6843 * the iocounts, the vnodes can't change type
6844 * so we're free to drop the lock at this point
6845 * and continue on
1c79356b 6846 */
91447636
A
6847 if (holding_mntlock) {
6848 mount_unlock_renames(locked_mp);
6849 mount_drop(locked_mp, 0);
6850 holding_mntlock = 0;
1c79356b 6851 }
91447636 6852 }
6d2010ae 6853
91447636
A
6854 // save these off so we can later verify that fvp is the same
6855 oname = fvp->v_name;
6856 oparent = fvp->v_parent;
55e303ae 6857
6d2010ae 6858skipped_lookup:
2d21ac55 6859#if CONFIG_FSE
6d2010ae 6860 need_event = need_fsevent(FSE_RENAME, fdvp);
fe8ab488 6861 if (need_event) {
6d2010ae
A
6862 if (fvp) {
6863 get_fse_info(fvp, &from_finfo, ctx);
6864 } else {
316670eb 6865 error = vfs_get_notify_attributes(&__rename_data->fv_attr);
6d2010ae
A
6866 if (error) {
6867 goto out1;
6868 }
6869
316670eb 6870 fvap = &__rename_data->fv_attr;
6d2010ae 6871 }
55e303ae 6872
91447636 6873 if (tvp) {
2d21ac55 6874 get_fse_info(tvp, &to_finfo, ctx);
6d2010ae 6875 } else if (batched) {
316670eb 6876 error = vfs_get_notify_attributes(&__rename_data->tv_attr);
6d2010ae
A
6877 if (error) {
6878 goto out1;
6879 }
6880
316670eb 6881 tvap = &__rename_data->tv_attr;
2d21ac55
A
6882 }
6883 }
6884#else
6885 need_event = 0;
6886#endif /* CONFIG_FSE */
6887
6888 if (need_event || kauth_authorize_fileop_has_listeners()) {
2d21ac55 6889 if (from_name == NULL) {
6d2010ae
A
6890 GET_PATH(from_name);
6891 if (from_name == NULL) {
6892 error = ENOMEM;
6893 goto out1;
6894 }
91447636 6895 }
b0d623f7 6896
316670eb 6897 from_len = safe_getpath(fdvp, fromnd->ni_cnd.cn_nameptr, from_name, MAXPATHLEN, &from_truncated);
55e303ae 6898
2d21ac55 6899 if (to_name == NULL) {
6d2010ae
A
6900 GET_PATH(to_name);
6901 if (to_name == NULL) {
6902 error = ENOMEM;
6903 goto out1;
6904 }
2d21ac55 6905 }
91447636 6906
316670eb 6907 to_len = safe_getpath(tdvp, tond->ni_cnd.cn_nameptr, to_name, MAXPATHLEN, &to_truncated);
fe8ab488
A
6908 }
6909#if CONFIG_SECLUDED_RENAME
6910 if (flags & VFS_SECLUDE_RENAME) {
6911 fromnd->ni_cnd.cn_flags |= CN_SECLUDE_RENAME;
6912 }
6913#else
6914 #pragma unused(flags)
6915#endif
316670eb
A
6916 error = vn_rename(fdvp, &fvp, &fromnd->ni_cnd, fvap,
6917 tdvp, &tvp, &tond->ni_cnd, tvap,
6d2010ae 6918 0, ctx);
55e303ae 6919
91447636
A
6920 if (holding_mntlock) {
6921 /*
6922 * we can drop our serialization
6923 * lock now
6924 */
6925 mount_unlock_renames(locked_mp);
6926 mount_drop(locked_mp, 0);
6927 holding_mntlock = 0;
6928 }
6929 if (error) {
6d2010ae 6930 if (error == EKEEPLOOKING) {
316670eb
A
6931 if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) == 0) {
6932 if ((tond->ni_flag & NAMEI_CONTLOOKUP) == 0) {
6d2010ae
A
6933 panic("EKEEPLOOKING without NAMEI_CONTLOOKUP on either ndp?");
6934 }
6935 }
6936
316670eb
A
6937 fromnd->ni_vp = fvp;
6938 tond->ni_vp = tvp;
fe8ab488 6939
6d2010ae
A
6940 goto continue_lookup;
6941 }
6942
6943 /*
fe8ab488
A
6944 * We may encounter a race in the VNOP where the destination didn't
6945 * exist when we did the namei, but it does by the time we go and
6d2010ae
A
6946 * try to create the entry. In this case, we should re-drive this rename
6947 * call from the top again. Currently, only HFS bubbles out ERECYCLE,
fe8ab488 6948 * but other filesystems susceptible to this race could return it, too.
6d2010ae
A
6949 */
6950 if (error == ERECYCLE) {
6951 do_retry = 1;
6952 }
55e303ae 6953
c18c124e
A
6954 /*
6955 * For compound VNOPs, the authorization callback may return
6956 * ENOENT in case of racing hardlink lookups hitting the name
6957 * cache, redrive the lookup.
6958 */
3e170ce0
A
6959 if (batched && error == ENOENT) {
6960 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
6961 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
6962 do_retry = 1;
6963 retry_count += 1;
6964 }
c18c124e
A
6965 }
6966
91447636 6967 goto out1;
fe8ab488
A
6968 }
6969
6970 /* call out to allow 3rd party notification of rename.
91447636
A
6971 * Ignore result of kauth_authorize_fileop call.
6972 */
fe8ab488
A
6973 kauth_authorize_fileop(vfs_context_ucred(ctx),
6974 KAUTH_FILEOP_RENAME,
2d21ac55 6975 (uintptr_t)from_name, (uintptr_t)to_name);
91447636 6976
2d21ac55 6977#if CONFIG_FSE
91447636 6978 if (from_name != NULL && to_name != NULL) {
b0d623f7
A
6979 if (from_truncated || to_truncated) {
6980 // set it here since only the from_finfo gets reported up to user space
6981 from_finfo.mode |= FSE_TRUNCATED_PATH;
6982 }
6d2010ae
A
6983
6984 if (tvap && tvp) {
6985 vnode_get_fse_info_from_vap(tvp, &to_finfo, tvap);
6986 }
6987 if (fvap) {
6988 vnode_get_fse_info_from_vap(fvp, &from_finfo, fvap);
6989 }
6990
91447636 6991 if (tvp) {
2d21ac55 6992 add_fsevent(FSE_RENAME, ctx,
91447636
A
6993 FSE_ARG_STRING, from_len, from_name,
6994 FSE_ARG_FINFO, &from_finfo,
6995 FSE_ARG_STRING, to_len, to_name,
6996 FSE_ARG_FINFO, &to_finfo,
6997 FSE_ARG_DONE);
55e303ae 6998 } else {
2d21ac55 6999 add_fsevent(FSE_RENAME, ctx,
91447636
A
7000 FSE_ARG_STRING, from_len, from_name,
7001 FSE_ARG_FINFO, &from_finfo,
7002 FSE_ARG_STRING, to_len, to_name,
7003 FSE_ARG_DONE);
7004 }
7005 }
2d21ac55 7006#endif /* CONFIG_FSE */
fe8ab488 7007
91447636
A
7008 /*
7009 * update filesystem's mount point data
7010 */
7011 if (mntrename) {
7012 char *cp, *pathend, *mpname;
7013 char * tobuf;
7014 struct mount *mp;
7015 int maxlen;
7016 size_t len = 0;
7017
7018 mp = fvp->v_mountedhere;
7019
7020 if (vfs_busy(mp, LK_NOWAIT)) {
7021 error = EBUSY;
7022 goto out1;
55e303ae 7023 }
91447636 7024 MALLOC_ZONE(tobuf, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
55e303ae 7025
fe8ab488
A
7026 if (UIO_SEG_IS_USER_SPACE(segflg))
7027 error = copyinstr(to, tobuf, MAXPATHLEN, &len);
7028 else
7029 error = copystr((void *)to, tobuf, MAXPATHLEN, &len);
91447636
A
7030 if (!error) {
7031 /* find current mount point prefix */
7032 pathend = &mp->mnt_vfsstat.f_mntonname[0];
7033 for (cp = pathend; *cp != '\0'; ++cp) {
7034 if (*cp == '/')
7035 pathend = cp + 1;
7036 }
7037 /* find last component of target name */
7038 for (mpname = cp = tobuf; *cp != '\0'; ++cp) {
7039 if (*cp == '/')
7040 mpname = cp + 1;
7041 }
7042 /* append name to prefix */
7043 maxlen = MAXPATHLEN - (pathend - mp->mnt_vfsstat.f_mntonname);
7044 bzero(pathend, maxlen);
2d21ac55 7045 strlcpy(pathend, mpname, maxlen);
91447636
A
7046 }
7047 FREE_ZONE(tobuf, MAXPATHLEN, M_NAMEI);
7048
7049 vfs_unbusy(mp);
7050 }
7051 /*
fe8ab488 7052 * fix up name & parent pointers. note that we first
91447636
A
7053 * check that fvp has the same name/parent pointers it
7054 * had before the rename call... this is a 'weak' check
7055 * at best...
6d2010ae
A
7056 *
7057 * XXX oparent and oname may not be set in the compound vnop case
91447636 7058 */
6d2010ae 7059 if (batched || (oname == fvp->v_name && oparent == fvp->v_parent)) {
91447636
A
7060 int update_flags;
7061
7062 update_flags = VNODE_UPDATE_NAME;
7063
7064 if (fdvp != tdvp)
7065 update_flags |= VNODE_UPDATE_PARENT;
7066
316670eb 7067 vnode_update_identity(fvp, tdvp, tond->ni_cnd.cn_nameptr, tond->ni_cnd.cn_namelen, tond->ni_cnd.cn_hash, update_flags);
1c79356b
A
7068 }
7069out1:
593a1d5f
A
7070 if (to_name != NULL) {
7071 RELEASE_PATH(to_name);
7072 to_name = NULL;
7073 }
7074 if (from_name != NULL) {
7075 RELEASE_PATH(from_name);
7076 from_name = NULL;
7077 }
91447636
A
7078 if (holding_mntlock) {
7079 mount_unlock_renames(locked_mp);
7080 mount_drop(locked_mp, 0);
593a1d5f 7081 holding_mntlock = 0;
91447636
A
7082 }
7083 if (tdvp) {
7084 /*
7085 * nameidone has to happen before we vnode_put(tdvp)
7086 * since it may need to release the fs_nodelock on the tdvp
7087 */
316670eb 7088 nameidone(tond);
91447636
A
7089
7090 if (tvp)
7091 vnode_put(tvp);
7092 vnode_put(tdvp);
7093 }
7094 if (fdvp) {
7095 /*
7096 * nameidone has to happen before we vnode_put(fdvp)
7097 * since it may need to release the fs_nodelock on the fdvp
7098 */
316670eb 7099 nameidone(fromnd);
91447636
A
7100
7101 if (fvp)
7102 vnode_put(fvp);
7103 vnode_put(fdvp);
7104 }
fe8ab488 7105
6d2010ae
A
7106 /*
7107 * If things changed after we did the namei, then we will re-drive
7108 * this rename call from the top.
7109 */
316670eb 7110 if (do_retry) {
6d2010ae 7111 do_retry = 0;
593a1d5f
A
7112 goto retry;
7113 }
316670eb
A
7114
7115 FREE(__rename_data, M_TEMP);
1c79356b
A
7116 return (error);
7117}
7118
fe8ab488
A
7119int
7120rename(__unused proc_t p, struct rename_args *uap, __unused int32_t *retval)
7121{
7122 return (renameat_internal(vfs_context_current(), AT_FDCWD, uap->from,
7123 AT_FDCWD, uap->to, UIO_USERSPACE, 0));
7124}
7125
7126#if CONFIG_SECLUDED_RENAME
7127int rename_ext(__unused proc_t p, struct rename_ext_args *uap, __unused int32_t *retval)
7128{
7129 return renameat_internal(
7130 vfs_context_current(),
7131 AT_FDCWD, uap->from,
7132 AT_FDCWD, uap->to,
7133 UIO_USERSPACE, uap->flags);
7134}
7135#endif
7136
7137int
7138renameat(__unused proc_t p, struct renameat_args *uap, __unused int32_t *retval)
7139{
7140 return (renameat_internal(vfs_context_current(), uap->fromfd, uap->from,
7141 uap->tofd, uap->to, UIO_USERSPACE, 0));
7142}
7143
1c79356b
A
7144/*
7145 * Make a directory file.
2d21ac55
A
7146 *
7147 * Returns: 0 Success
7148 * EEXIST
7149 * namei:???
7150 * vnode_authorize:???
7151 * vn_create:???
1c79356b 7152 */
1c79356b 7153/* ARGSUSED */
91447636 7154static int
fe8ab488
A
7155mkdir1at(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap, int fd,
7156 enum uio_seg segflg)
1c79356b 7157{
91447636 7158 vnode_t vp, dvp;
1c79356b 7159 int error;
91447636 7160 int update_flags = 0;
6d2010ae 7161 int batched;
1c79356b
A
7162 struct nameidata nd;
7163
91447636 7164 AUDIT_ARG(mode, vap->va_mode);
fe8ab488 7165 NDINIT(&nd, CREATE, OP_MKDIR, LOCKPARENT | AUDITVNPATH1, segflg,
6d2010ae 7166 path, ctx);
9bccf70c 7167 nd.ni_cnd.cn_flags |= WILLBEDIR;
6d2010ae
A
7168 nd.ni_flag = NAMEI_COMPOUNDMKDIR;
7169
7170continue_lookup:
fe8ab488 7171 error = nameiat(&nd, fd);
55e303ae 7172 if (error)
1c79356b 7173 return (error);
91447636 7174 dvp = nd.ni_dvp;
1c79356b 7175 vp = nd.ni_vp;
55e303ae 7176
fe8ab488
A
7177 if (vp != NULL) {
7178 error = EEXIST;
7179 goto out;
7180 }
7181
6d2010ae 7182 batched = vnode_compound_mkdir_available(dvp);
2d21ac55
A
7183
7184 VATTR_SET(vap, va_type, VDIR);
fe8ab488 7185
6d2010ae
A
7186 /*
7187 * XXX
7188 * Don't authorize in VFS for compound VNOP.... mkdir -p today assumes that it will
7189 * only get EXISTS or EISDIR for existing path components, and not that it could see
7190 * EACCESS/EPERM--so if we authorize for mkdir on "/" for "mkdir -p /tmp/foo/bar/baz"
7191 * it will fail in a spurious manner. Need to figure out if this is valid behavior.
7192 */
fe8ab488 7193 if ((error = vn_authorize_mkdir(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0) {
6d2010ae
A
7194 if (error == EACCES || error == EPERM) {
7195 int error2;
7196
7197 nameidone(&nd);
7198 vnode_put(dvp);
7199 dvp = NULLVP;
7200
fe8ab488
A
7201 /*
7202 * Try a lookup without "NAMEI_COMPOUNDVNOP" to make sure we return EEXIST
6d2010ae
A
7203 * rather than EACCESS if the target exists.
7204 */
fe8ab488
A
7205 NDINIT(&nd, LOOKUP, OP_MKDIR, AUDITVNPATH1, segflg,
7206 path, ctx);
7207 error2 = nameiat(&nd, fd);
6d2010ae
A
7208 if (error2) {
7209 goto out;
7210 } else {
7211 vp = nd.ni_vp;
7212 error = EEXIST;
7213 goto out;
7214 }
7215 }
7216
2d21ac55 7217 goto out;
6d2010ae
A
7218 }
7219
7220 /*
fe8ab488 7221 * make the directory
6d2010ae 7222 */
fe8ab488 7223 if ((error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx)) != 0) {
6d2010ae
A
7224 if (error == EKEEPLOOKING) {
7225 nd.ni_vp = vp;
7226 goto continue_lookup;
7227 }
2d21ac55 7228
fe8ab488 7229 goto out;
6d2010ae 7230 }
fe8ab488 7231
91447636
A
7232 // Make sure the name & parent pointers are hooked up
7233 if (vp->v_name == NULL)
7234 update_flags |= VNODE_UPDATE_NAME;
7235 if (vp->v_parent == NULLVP)
7236 update_flags |= VNODE_UPDATE_PARENT;
7237
7238 if (update_flags)
7239 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
55e303ae 7240
2d21ac55 7241#if CONFIG_FSE
91447636 7242 add_fsevent(FSE_CREATE_DIR, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE);
2d21ac55 7243#endif
91447636
A
7244
7245out:
7246 /*
7247 * nameidone has to happen before we vnode_put(dvp)
7248 * since it may need to release the fs_nodelock on the dvp
7249 */
7250 nameidone(&nd);
7251
7252 if (vp)
6d2010ae 7253 vnode_put(vp);
fe8ab488 7254 if (dvp)
6d2010ae 7255 vnode_put(dvp);
55e303ae 7256
1c79356b
A
7257 return (error);
7258}
7259
b0d623f7
A
7260/*
7261 * mkdir_extended: Create a directory; with extended security (ACL).
7262 *
7263 * Parameters: p Process requesting to create the directory
7264 * uap User argument descriptor (see below)
fe8ab488 7265 * retval (ignored)
b0d623f7
A
7266 *
7267 * Indirect: uap->path Path of directory to create
7268 * uap->mode Access permissions to set
7269 * uap->xsecurity ACL to set
fe8ab488 7270 *
b0d623f7
A
7271 * Returns: 0 Success
7272 * !0 Not success
7273 *
7274 */
1c79356b 7275int
b0d623f7 7276mkdir_extended(proc_t p, struct mkdir_extended_args *uap, __unused int32_t *retval)
1c79356b 7277{
91447636
A
7278 int ciferror;
7279 kauth_filesec_t xsecdst;
7280 struct vnode_attr va;
7281
b0d623f7
A
7282 AUDIT_ARG(owner, uap->uid, uap->gid);
7283
91447636
A
7284 xsecdst = NULL;
7285 if ((uap->xsecurity != USER_ADDR_NULL) &&
7286 ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
7287 return ciferror;
7288
91447636 7289 VATTR_INIT(&va);
fe8ab488 7290 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
91447636
A
7291 if (xsecdst != NULL)
7292 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
7293
fe8ab488
A
7294 ciferror = mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD,
7295 UIO_USERSPACE);
91447636
A
7296 if (xsecdst != NULL)
7297 kauth_filesec_free(xsecdst);
7298 return ciferror;
1c79356b
A
7299}
7300
1c79356b 7301int
b0d623f7 7302mkdir(proc_t p, struct mkdir_args *uap, __unused int32_t *retval)
1c79356b 7303{
91447636 7304 struct vnode_attr va;
1c79356b 7305
91447636 7306 VATTR_INIT(&va);
fe8ab488 7307 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
e5568f75 7308
fe8ab488
A
7309 return (mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD,
7310 UIO_USERSPACE));
91447636 7311}
1c79356b 7312
91447636 7313int
fe8ab488
A
7314mkdirat(proc_t p, struct mkdirat_args *uap, __unused int32_t *retval)
7315{
7316 struct vnode_attr va;
7317
7318 VATTR_INIT(&va);
7319 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
7320
7321 return(mkdir1at(vfs_context_current(), uap->path, &va, uap->fd,
7322 UIO_USERSPACE));
7323}
7324
7325static int
7326rmdirat_internal(vfs_context_t ctx, int fd, user_addr_t dirpath,
7327 enum uio_seg segflg)
1c79356b 7328{
2d21ac55 7329 vnode_t vp, dvp;
91447636
A
7330 int error;
7331 struct nameidata nd;
6d2010ae
A
7332 char *path = NULL;
7333 int len=0;
7334 int has_listeners = 0;
7335 int need_event = 0;
7336 int truncated = 0;
6d2010ae
A
7337#if CONFIG_FSE
7338 struct vnode_attr va;
7339#endif /* CONFIG_FSE */
7340 struct vnode_attr *vap = NULL;
c18c124e 7341 int restart_count = 0;
6d2010ae 7342 int batched;
91447636 7343
b0d623f7 7344 int restart_flag;
91447636 7345
fe8ab488 7346 /*
2d21ac55
A
7347 * This loop exists to restart rmdir in the unlikely case that two
7348 * processes are simultaneously trying to remove the same directory
7349 * containing orphaned appleDouble files.
7350 */
7351 do {
6d2010ae 7352 NDINIT(&nd, DELETE, OP_RMDIR, LOCKPARENT | AUDITVNPATH1,
fe8ab488 7353 segflg, dirpath, ctx);
6d2010ae
A
7354 nd.ni_flag = NAMEI_COMPOUNDRMDIR;
7355continue_lookup:
2d21ac55 7356 restart_flag = 0;
6d2010ae 7357 vap = NULL;
2d21ac55 7358
fe8ab488 7359 error = nameiat(&nd, fd);
2d21ac55
A
7360 if (error)
7361 return (error);
7362
7363 dvp = nd.ni_dvp;
7364 vp = nd.ni_vp;
7365
6d2010ae
A
7366 if (vp) {
7367 batched = vnode_compound_rmdir_available(vp);
2d21ac55 7368
6d2010ae
A
7369 if (vp->v_flag & VROOT) {
7370 /*
7371 * The root of a mounted filesystem cannot be deleted.
7372 */
7373 error = EBUSY;
7374 goto out;
7375 }
1c79356b 7376
2d21ac55 7377 /*
6d2010ae
A
7378 * Removed a check here; we used to abort if vp's vid
7379 * was not the same as what we'd seen the last time around.
7380 * I do not think that check was valid, because if we retry
7381 * and all dirents are gone, the directory could legitimately
7382 * be recycled but still be present in a situation where we would
fe8ab488 7383 * have had permission to delete. Therefore, we won't make
6d2010ae
A
7384 * an effort to preserve that check now that we may not have a
7385 * vp here.
2d21ac55 7386 */
6d2010ae
A
7387
7388 if (!batched) {
7389 error = vn_authorize_rmdir(dvp, vp, &nd.ni_cnd, ctx, NULL);
7390 if (error) {
3e170ce0
A
7391 if (error == ENOENT) {
7392 assert(restart_count < MAX_AUTHORIZE_ENOENT_RETRIES);
7393 if (restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
7394 restart_flag = 1;
7395 restart_count += 1;
7396 }
c18c124e 7397 }
6d2010ae
A
7398 goto out;
7399 }
7400 }
2d21ac55 7401 } else {
6d2010ae
A
7402 batched = 1;
7403
7404 if (!vnode_compound_rmdir_available(dvp)) {
7405 panic("No error, but no compound rmdir?");
7406 }
91447636 7407 }
6d2010ae 7408
2d21ac55 7409#if CONFIG_FSE
6d2010ae 7410 fse_info finfo;
b0d623f7 7411
6d2010ae
A
7412 need_event = need_fsevent(FSE_DELETE, dvp);
7413 if (need_event) {
7414 if (!batched) {
2d21ac55 7415 get_fse_info(vp, &finfo, ctx);
6d2010ae
A
7416 } else {
7417 error = vfs_get_notify_attributes(&va);
7418 if (error) {
7419 goto out;
7420 }
7421
7422 vap = &va;
2d21ac55 7423 }
6d2010ae 7424 }
2d21ac55 7425#endif
6d2010ae
A
7426 has_listeners = kauth_authorize_fileop_has_listeners();
7427 if (need_event || has_listeners) {
7428 if (path == NULL) {
2d21ac55
A
7429 GET_PATH(path);
7430 if (path == NULL) {
7431 error = ENOMEM;
7432 goto out;
7433 }
6d2010ae 7434 }
b0d623f7 7435
6d2010ae 7436 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated);
b0d623f7 7437#if CONFIG_FSE
6d2010ae
A
7438 if (truncated) {
7439 finfo.mode |= FSE_TRUNCATED_PATH;
2d21ac55 7440 }
6d2010ae
A
7441#endif
7442 }
91447636 7443
6d2010ae
A
7444 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
7445 nd.ni_vp = vp;
7446 if (vp == NULLVP) {
7447 /* Couldn't find a vnode */
7448 goto out;
7449 }
2d21ac55 7450
6d2010ae
A
7451 if (error == EKEEPLOOKING) {
7452 goto continue_lookup;
3e170ce0
A
7453 } else if (batched && error == ENOENT) {
7454 assert(restart_count < MAX_AUTHORIZE_ENOENT_RETRIES);
7455 if (restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
7456 /*
7457 * For compound VNOPs, the authorization callback
7458 * may return ENOENT in case of racing hard link lookups
7459 * redrive the lookup.
7460 */
7461 restart_flag = 1;
7462 restart_count += 1;
7463 goto out;
7464 }
6d2010ae 7465 }
39236c6e 7466#if CONFIG_APPLEDOUBLE
6d2010ae
A
7467 /*
7468 * Special case to remove orphaned AppleDouble
7469 * files. I don't like putting this in the kernel,
7470 * but carbon does not like putting this in carbon either,
7471 * so here we are.
7472 */
7473 if (error == ENOTEMPTY) {
7474 error = rmdir_remove_orphaned_appleDouble(vp, ctx, &restart_flag);
7475 if (error == EBUSY) {
7476 goto out;
2d21ac55
A
7477 }
7478
6d2010ae 7479
2d21ac55 7480 /*
fe8ab488 7481 * Assuming everything went well, we will try the RMDIR again
2d21ac55 7482 */
6d2010ae
A
7483 if (!error)
7484 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
7485 }
39236c6e 7486#endif /* CONFIG_APPLEDOUBLE */
6d2010ae 7487 /*
fe8ab488 7488 * Call out to allow 3rd party notification of delete.
6d2010ae
A
7489 * Ignore result of kauth_authorize_fileop call.
7490 */
7491 if (!error) {
7492 if (has_listeners) {
fe8ab488
A
7493 kauth_authorize_fileop(vfs_context_ucred(ctx),
7494 KAUTH_FILEOP_DELETE,
6d2010ae
A
7495 (uintptr_t)vp,
7496 (uintptr_t)path);
7497 }
7498
7499 if (vp->v_flag & VISHARDLINK) {
7500 // see the comment in unlink1() about why we update
7501 // the parent of a hard link when it is removed
7502 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
7503 }
2d21ac55
A
7504
7505#if CONFIG_FSE
6d2010ae
A
7506 if (need_event) {
7507 if (vap) {
7508 vnode_get_fse_info_from_vap(vp, &finfo, vap);
2d21ac55 7509 }
6d2010ae
A
7510 add_fsevent(FSE_DELETE, ctx,
7511 FSE_ARG_STRING, len, path,
7512 FSE_ARG_FINFO, &finfo,
7513 FSE_ARG_DONE);
2d21ac55 7514 }
6d2010ae 7515#endif
2d21ac55
A
7516 }
7517
7518out:
6d2010ae
A
7519 if (path != NULL) {
7520 RELEASE_PATH(path);
7521 path = NULL;
7522 }
2d21ac55
A
7523 /*
7524 * nameidone has to happen before we vnode_put(dvp)
7525 * since it may need to release the fs_nodelock on the dvp
7526 */
7527 nameidone(&nd);
2d21ac55 7528 vnode_put(dvp);
6d2010ae 7529
fe8ab488 7530 if (vp)
6d2010ae 7531 vnode_put(vp);
2d21ac55
A
7532
7533 if (restart_flag == 0) {
7534 wakeup_one((caddr_t)vp);
7535 return (error);
7536 }
7537 tsleep(vp, PVFS, "rm AD", 1);
7538
7539 } while (restart_flag != 0);
91447636 7540
1c79356b 7541 return (error);
2d21ac55 7542
1c79356b 7543}
91447636 7544
fe8ab488
A
7545/*
7546 * Remove a directory file.
7547 */
7548/* ARGSUSED */
7549int
7550rmdir(__unused proc_t p, struct rmdir_args *uap, __unused int32_t *retval)
7551{
7552 return (rmdirat_internal(vfs_context_current(), AT_FDCWD,
7553 CAST_USER_ADDR_T(uap->path), UIO_USERSPACE));
7554}
7555
2d21ac55
A
7556/* Get direntry length padded to 8 byte alignment */
7557#define DIRENT64_LEN(namlen) \
7558 ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
7559
fe8ab488 7560errno_t
2d21ac55
A
7561vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag,
7562 int *numdirent, vfs_context_t ctxp)
7563{
7564 /* Check if fs natively supports VNODE_READDIR_EXTENDED */
6d2010ae
A
7565 if ((vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED) &&
7566 ((vp->v_mount->mnt_kern_flag & MNTK_DENY_READDIREXT) == 0)) {
2d21ac55
A
7567 return VNOP_READDIR(vp, uio, flags, eofflag, numdirent, ctxp);
7568 } else {
7569 size_t bufsize;
7570 void * bufptr;
7571 uio_t auio;
15129b1c 7572 struct direntry *entry64;
2d21ac55
A
7573 struct dirent *dep;
7574 int bytesread;
7575 int error;
7576
7577 /*
7578 * Our kernel buffer needs to be smaller since re-packing
7579 * will expand each dirent. The worse case (when the name
7580 * length is 3) corresponds to a struct direntry size of 32
7581 * bytes (8-byte aligned) and a struct dirent size of 12 bytes
7582 * (4-byte aligned). So having a buffer that is 3/8 the size
7583 * will prevent us from reading more than we can pack.
7584 *
7585 * Since this buffer is wired memory, we will limit the
7586 * buffer size to a maximum of 32K. We would really like to
7587 * use 32K in the MIN(), but we use magic number 87371 to
7588 * prevent uio_resid() * 3 / 8 from overflowing.
7589 */
316670eb 7590 bufsize = 3 * MIN((user_size_t)uio_resid(uio), 87371u) / 8;
2d21ac55 7591 MALLOC(bufptr, void *, bufsize, M_TEMP, M_WAITOK);
b0d623f7
A
7592 if (bufptr == NULL) {
7593 return ENOMEM;
7594 }
2d21ac55 7595
b0d623f7 7596 auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
2d21ac55
A
7597 uio_addiov(auio, (uintptr_t)bufptr, bufsize);
7598 auio->uio_offset = uio->uio_offset;
7599
7600 error = VNOP_READDIR(vp, auio, 0, eofflag, numdirent, ctxp);
7601
7602 dep = (struct dirent *)bufptr;
7603 bytesread = bufsize - uio_resid(auio);
7604
15129b1c
A
7605 MALLOC(entry64, struct direntry *, sizeof(struct direntry),
7606 M_TEMP, M_WAITOK);
2d21ac55
A
7607 /*
7608 * Convert all the entries and copy them out to user's buffer.
7609 */
7610 while (error == 0 && (char *)dep < ((char *)bufptr + bytesread)) {
15129b1c
A
7611 size_t enbufsize = DIRENT64_LEN(dep->d_namlen);
7612
7613 bzero(entry64, enbufsize);
2d21ac55 7614 /* Convert a dirent to a dirent64. */
15129b1c
A
7615 entry64->d_ino = dep->d_ino;
7616 entry64->d_seekoff = 0;
7617 entry64->d_reclen = enbufsize;
7618 entry64->d_namlen = dep->d_namlen;
7619 entry64->d_type = dep->d_type;
7620 bcopy(dep->d_name, entry64->d_name, dep->d_namlen + 1);
2d21ac55
A
7621
7622 /* Move to next entry. */
7623 dep = (struct dirent *)((char *)dep + dep->d_reclen);
7624
7625 /* Copy entry64 to user's buffer. */
15129b1c 7626 error = uiomove((caddr_t)entry64, entry64->d_reclen, uio);
2d21ac55
A
7627 }
7628
7629 /* Update the real offset using the offset we got from VNOP_READDIR. */
7630 if (error == 0) {
7631 uio->uio_offset = auio->uio_offset;
7632 }
7633 uio_free(auio);
7634 FREE(bufptr, M_TEMP);
15129b1c 7635 FREE(entry64, M_TEMP);
2d21ac55
A
7636 return (error);
7637 }
7638}
1c79356b 7639
39236c6e
A
7640#define GETDIRENTRIES_MAXBUFSIZE (128 * 1024 * 1024U)
7641
1c79356b
A
7642/*
7643 * Read a block of directory entries in a file system independent format.
7644 */
2d21ac55
A
7645static int
7646getdirentries_common(int fd, user_addr_t bufp, user_size_t bufsize, ssize_t *bytesread,
7647 off_t *offset, int flags)
1c79356b 7648{
2d21ac55
A
7649 vnode_t vp;
7650 struct vfs_context context = *vfs_context_current(); /* local copy */
91447636
A
7651 struct fileproc *fp;
7652 uio_t auio;
2d21ac55
A
7653 int spacetype = proc_is64bit(vfs_context_proc(&context)) ? UIO_USERSPACE64 : UIO_USERSPACE32;
7654 off_t loff;
7655 int error, eofflag, numdirent;
91447636 7656 char uio_buf[ UIO_SIZEOF(1) ];
1c79356b 7657
2d21ac55
A
7658 error = fp_getfvp(vfs_context_proc(&context), fd, &fp, &vp);
7659 if (error) {
1c79356b 7660 return (error);
2d21ac55 7661 }
91447636
A
7662 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
7663 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
7664 error = EBADF;
7665 goto out;
7666 }
2d21ac55 7667
39236c6e
A
7668 if (bufsize > GETDIRENTRIES_MAXBUFSIZE)
7669 bufsize = GETDIRENTRIES_MAXBUFSIZE;
7670
2d21ac55
A
7671#if CONFIG_MACF
7672 error = mac_file_check_change_offset(vfs_context_ucred(&context), fp->f_fglob);
7673 if (error)
7674 goto out;
7675#endif
91447636
A
7676 if ( (error = vnode_getwithref(vp)) ) {
7677 goto out;
7678 }
91447636 7679 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
55e303ae 7680
1c79356b 7681unionread:
91447636
A
7682 if (vp->v_type != VDIR) {
7683 (void)vnode_put(vp);
7684 error = EINVAL;
7685 goto out;
7686 }
2d21ac55
A
7687
7688#if CONFIG_MACF
7689 error = mac_vnode_check_readdir(&context, vp);
7690 if (error != 0) {
7691 (void)vnode_put(vp);
7692 goto out;
7693 }
7694#endif /* MAC */
91447636
A
7695
7696 loff = fp->f_fglob->fg_offset;
2d21ac55
A
7697 auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
7698 uio_addiov(auio, bufp, bufsize);
91447636 7699
2d21ac55
A
7700 if (flags & VNODE_READDIR_EXTENDED) {
7701 error = vnode_readdir64(vp, auio, flags, &eofflag, &numdirent, &context);
7702 fp->f_fglob->fg_offset = uio_offset(auio);
7703 } else {
7704 error = VNOP_READDIR(vp, auio, 0, &eofflag, &numdirent, &context);
7705 fp->f_fglob->fg_offset = uio_offset(auio);
7706 }
91447636
A
7707 if (error) {
7708 (void)vnode_put(vp);
7709 goto out;
7710 }
1c79356b 7711
2d21ac55
A
7712 if ((user_ssize_t)bufsize == uio_resid(auio)){
7713 if (union_dircheckp) {
7714 error = union_dircheckp(&vp, fp, &context);
7715 if (error == -1)
7716 goto unionread;
7717 if (error)
7718 goto out;
1c79356b
A
7719 }
7720
39236c6e 7721 if ((vp->v_mount->mnt_flag & MNT_UNION)) {
2d21ac55 7722 struct vnode *tvp = vp;
39236c6e
A
7723 if (lookup_traverse_union(tvp, &vp, &context) == 0) {
7724 vnode_ref(vp);
7725 fp->f_fglob->fg_data = (caddr_t) vp;
7726 fp->f_fglob->fg_offset = 0;
7727 vnode_rele(tvp);
7728 vnode_put(tvp);
7729 goto unionread;
7730 }
7731 vp = tvp;
1c79356b
A
7732 }
7733 }
2d21ac55 7734
91447636 7735 vnode_put(vp);
2d21ac55
A
7736 if (offset) {
7737 *offset = loff;
7738 }
b0d623f7 7739
2d21ac55 7740 *bytesread = bufsize - uio_resid(auio);
91447636
A
7741out:
7742 file_drop(fd);
1c79356b
A
7743 return (error);
7744}
7745
2d21ac55
A
7746
7747int
b0d623f7 7748getdirentries(__unused struct proc *p, struct getdirentries_args *uap, int32_t *retval)
2d21ac55
A
7749{
7750 off_t offset;
2d21ac55
A
7751 ssize_t bytesread;
7752 int error;
7753
7754 AUDIT_ARG(fd, uap->fd);
7755 error = getdirentries_common(uap->fd, uap->buf, uap->count, &bytesread, &offset, 0);
7756
7757 if (error == 0) {
b0d623f7
A
7758 if (proc_is64bit(p)) {
7759 user64_long_t base = (user64_long_t)offset;
7760 error = copyout((caddr_t)&base, uap->basep, sizeof(user64_long_t));
7761 } else {
7762 user32_long_t base = (user32_long_t)offset;
7763 error = copyout((caddr_t)&base, uap->basep, sizeof(user32_long_t));
7764 }
2d21ac55
A
7765 *retval = bytesread;
7766 }
7767 return (error);
7768}
7769
7770int
7771getdirentries64(__unused struct proc *p, struct getdirentries64_args *uap, user_ssize_t *retval)
7772{
7773 off_t offset;
7774 ssize_t bytesread;
7775 int error;
7776
7777 AUDIT_ARG(fd, uap->fd);
7778 error = getdirentries_common(uap->fd, uap->buf, uap->bufsize, &bytesread, &offset, VNODE_READDIR_EXTENDED);
7779
7780 if (error == 0) {
7781 *retval = bytesread;
7782 error = copyout((caddr_t)&offset, uap->position, sizeof(off_t));
7783 }
7784 return (error);
7785}
7786
7787
1c79356b
A
7788/*
7789 * Set the mode mask for creation of filesystem nodes.
b0d623f7 7790 * XXX implement xsecurity
1c79356b 7791 */
91447636
A
7792#define UMASK_NOXSECURITY (void *)1 /* leave existing xsecurity alone */
7793static int
b0d623f7 7794umask1(proc_t p, int newmask, __unused kauth_filesec_t fsec, int32_t *retval)
1c79356b 7795{
2d21ac55 7796 struct filedesc *fdp;
1c79356b 7797
91447636 7798 AUDIT_ARG(mask, newmask);
2d21ac55 7799 proc_fdlock(p);
1c79356b
A
7800 fdp = p->p_fd;
7801 *retval = fdp->fd_cmask;
91447636 7802 fdp->fd_cmask = newmask & ALLPERMS;
2d21ac55 7803 proc_fdunlock(p);
1c79356b
A
7804 return (0);
7805}
7806
b0d623f7
A
7807/*
7808 * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL).
7809 *
7810 * Parameters: p Process requesting to set the umask
7811 * uap User argument descriptor (see below)
7812 * retval umask of the process (parameter p)
7813 *
7814 * Indirect: uap->newmask umask to set
7815 * uap->xsecurity ACL to set
7816 *
7817 * Returns: 0 Success
7818 * !0 Not success
7819 *
7820 */
7821int
7822umask_extended(proc_t p, struct umask_extended_args *uap, int32_t *retval)
91447636
A
7823{
7824 int ciferror;
7825 kauth_filesec_t xsecdst;
7826
7827 xsecdst = KAUTH_FILESEC_NONE;
7828 if (uap->xsecurity != USER_ADDR_NULL) {
7829 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
7830 return ciferror;
7831 } else {
7832 xsecdst = KAUTH_FILESEC_NONE;
7833 }
7834
7835 ciferror = umask1(p, uap->newmask, xsecdst, retval);
7836
7837 if (xsecdst != KAUTH_FILESEC_NONE)
7838 kauth_filesec_free(xsecdst);
7839 return ciferror;
7840}
7841
7842int
b0d623f7 7843umask(proc_t p, struct umask_args *uap, int32_t *retval)
91447636
A
7844{
7845 return(umask1(p, uap->newmask, UMASK_NOXSECURITY, retval));
7846}
7847
1c79356b
A
7848/*
7849 * Void all references to file by ripping underlying filesystem
7850 * away from vnode.
7851 */
1c79356b
A
7852/* ARGSUSED */
7853int
b0d623f7 7854revoke(proc_t p, struct revoke_args *uap, __unused int32_t *retval)
1c79356b 7855{
2d21ac55 7856 vnode_t vp;
91447636 7857 struct vnode_attr va;
2d21ac55 7858 vfs_context_t ctx = vfs_context_current();
1c79356b
A
7859 int error;
7860 struct nameidata nd;
7861
6d2010ae
A
7862 NDINIT(&nd, LOOKUP, OP_REVOKE, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
7863 uap->path, ctx);
55e303ae
A
7864 error = namei(&nd);
7865 if (error)
1c79356b
A
7866 return (error);
7867 vp = nd.ni_vp;
91447636
A
7868
7869 nameidone(&nd);
7870
b0d623f7
A
7871 if (!(vnode_ischr(vp) || vnode_isblk(vp))) {
7872 error = ENOTSUP;
7873 goto out;
7874 }
7875
7876 if (vnode_isblk(vp) && vnode_ismountedon(vp)) {
7877 error = EBUSY;
7878 goto out;
7879 }
7880
2d21ac55
A
7881#if CONFIG_MACF
7882 error = mac_vnode_check_revoke(ctx, vp);
7883 if (error)
7884 goto out;
7885#endif
7886
91447636
A
7887 VATTR_INIT(&va);
7888 VATTR_WANTED(&va, va_uid);
2d21ac55 7889 if ((error = vnode_getattr(vp, &va, ctx)))
1c79356b 7890 goto out;
2d21ac55
A
7891 if (kauth_cred_getuid(vfs_context_ucred(ctx)) != va.va_uid &&
7892 (error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
1c79356b 7893 goto out;
b0d623f7 7894 if (vp->v_usecount > 0 || (vnode_isaliased(vp)))
2d21ac55 7895 VNOP_REVOKE(vp, REVOKEALL, ctx);
1c79356b 7896out:
91447636 7897 vnode_put(vp);
1c79356b
A
7898 return (error);
7899}
7900
0b4e3aa0 7901
1c79356b
A
7902/*
7903 * HFS/HFS PlUS SPECIFIC SYSTEM CALLS
9bccf70c 7904 * The following system calls are designed to support features
1c79356b
A
7905 * which are specific to the HFS & HFS Plus volume formats
7906 */
7907
9bccf70c 7908
1c79356b 7909/*
39236c6e
A
7910 * Obtain attribute information on objects in a directory while enumerating
7911 * the directory.
7912 */
1c79356b
A
7913/* ARGSUSED */
7914int
b0d623f7 7915getdirentriesattr (proc_t p, struct getdirentriesattr_args *uap, int32_t *retval)
1c79356b 7916{
2d21ac55 7917 vnode_t vp;
91447636
A
7918 struct fileproc *fp;
7919 uio_t auio = NULL;
7920 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
39236c6e 7921 uint32_t count, savecount;
2d21ac55 7922 uint32_t newstate;
91447636 7923 int error, eofflag;
2d21ac55 7924 uint32_t loff;
91447636 7925 struct attrlist attributelist;
2d21ac55 7926 vfs_context_t ctx = vfs_context_current();
91447636
A
7927 int fd = uap->fd;
7928 char uio_buf[ UIO_SIZEOF(1) ];
7929 kauth_action_t action;
7930
7931 AUDIT_ARG(fd, fd);
7932
7933 /* Get the attributes into kernel space */
2d21ac55 7934 if ((error = copyin(uap->alist, (caddr_t)&attributelist, sizeof(attributelist)))) {
91447636 7935 return(error);
2d21ac55
A
7936 }
7937 if ((error = copyin(uap->count, (caddr_t)&count, sizeof(count)))) {
7938 return(error);
7939 }
39236c6e 7940 savecount = count;
2d21ac55 7941 if ( (error = fp_getfvp(p, fd, &fp, &vp)) ) {
91447636 7942 return (error);
2d21ac55 7943 }
91447636
A
7944 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
7945 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
7946 error = EBADF;
7947 goto out;
7948 }
2d21ac55
A
7949
7950
7951#if CONFIG_MACF
7952 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
7953 fp->f_fglob);
7954 if (error)
7955 goto out;
7956#endif
7957
7958
91447636
A
7959 if ( (error = vnode_getwithref(vp)) )
7960 goto out;
55e303ae 7961
91447636 7962 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
1c79356b 7963
39236c6e 7964unionread:
91447636
A
7965 if (vp->v_type != VDIR) {
7966 (void)vnode_put(vp);
7967 error = EINVAL;
7968 goto out;
7969 }
55e303ae 7970
2d21ac55
A
7971#if CONFIG_MACF
7972 error = mac_vnode_check_readdir(ctx, vp);
7973 if (error != 0) {
7974 (void)vnode_put(vp);
7975 goto out;
7976 }
7977#endif /* MAC */
7978
91447636
A
7979 /* set up the uio structure which will contain the users return buffer */
7980 loff = fp->f_fglob->fg_offset;
39236c6e 7981 auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
91447636
A
7982 uio_addiov(auio, uap->buffer, uap->buffersize);
7983
91447636
A
7984 /*
7985 * If the only item requested is file names, we can let that past with
7986 * just LIST_DIRECTORY. If they want any other attributes, that means
7987 * they need SEARCH as well.
7988 */
7989 action = KAUTH_VNODE_LIST_DIRECTORY;
7990 if ((attributelist.commonattr & ~ATTR_CMN_NAME) ||
7991 attributelist.fileattr || attributelist.dirattr)
7992 action |= KAUTH_VNODE_SEARCH;
7993
2d21ac55 7994 if ((error = vnode_authorize(vp, NULL, action, ctx)) == 0) {
2d21ac55 7995
b0d623f7
A
7996 /* Believe it or not, uap->options only has 32-bits of valid
7997 * info, so truncate before extending again */
39236c6e
A
7998
7999 error = VNOP_READDIRATTR(vp, &attributelist, auio, count,
8000 (u_long)(uint32_t)uap->options, &newstate, &eofflag, &count, ctx);
8001 }
8002
8003 if (error) {
8004 (void) vnode_put(vp);
8005 goto out;
8006 }
8007
8008 /*
8009 * If we've got the last entry of a directory in a union mount
8010 * then reset the eofflag and pretend there's still more to come.
8011 * The next call will again set eofflag and the buffer will be empty,
8012 * so traverse to the underlying directory and do the directory
8013 * read there.
8014 */
8015 if (eofflag && vp->v_mount->mnt_flag & MNT_UNION) {
8016 if (uio_resid(auio) < (user_ssize_t) uap->buffersize) { // Got some entries
8017 eofflag = 0;
8018 } else { // Empty buffer
8019 struct vnode *tvp = vp;
8020 if (lookup_traverse_union(tvp, &vp, ctx) == 0) {
8021 vnode_ref_ext(vp, fp->f_fglob->fg_flag & O_EVTONLY, 0);
8022 fp->f_fglob->fg_data = (caddr_t) vp;
8023 fp->f_fglob->fg_offset = 0; // reset index for new dir
8024 count = savecount;
8025 vnode_rele_internal(tvp, fp->f_fglob->fg_flag & O_EVTONLY, 0, 0);
8026 vnode_put(tvp);
8027 goto unionread;
8028 }
8029 vp = tvp;
8030 }
2d21ac55 8031 }
39236c6e 8032
91447636 8033 (void)vnode_put(vp);
1c79356b 8034
91447636
A
8035 if (error)
8036 goto out;
8037 fp->f_fglob->fg_offset = uio_offset(auio); /* should be multiple of dirent, not variable */
1c79356b 8038
2d21ac55 8039 if ((error = copyout((caddr_t) &count, uap->count, sizeof(count))))
91447636 8040 goto out;
2d21ac55 8041 if ((error = copyout((caddr_t) &newstate, uap->newstate, sizeof(newstate))))
91447636 8042 goto out;
2d21ac55 8043 if ((error = copyout((caddr_t) &loff, uap->basep, sizeof(loff))))
91447636 8044 goto out;
1c79356b
A
8045
8046 *retval = eofflag; /* similar to getdirentries */
91447636 8047 error = 0;
2d21ac55 8048out:
91447636
A
8049 file_drop(fd);
8050 return (error); /* return error earlier, an retval of 0 or 1 now */
1c79356b 8051
39236c6e 8052} /* end of getdirentriesattr system call */
1c79356b
A
8053
8054/*
8055* Exchange data between two files
8056*/
8057
1c79356b
A
8058/* ARGSUSED */
8059int
b0d623f7 8060exchangedata (__unused proc_t p, struct exchangedata_args *uap, __unused int32_t *retval)
1c79356b
A
8061{
8062
8063 struct nameidata fnd, snd;
2d21ac55
A
8064 vfs_context_t ctx = vfs_context_current();
8065 vnode_t fvp;
8066 vnode_t svp;
8067 int error;
b0d623f7 8068 u_int32_t nameiflags;
91447636
A
8069 char *fpath = NULL;
8070 char *spath = NULL;
b0d623f7
A
8071 int flen=0, slen=0;
8072 int from_truncated=0, to_truncated=0;
8073#if CONFIG_FSE
91447636 8074 fse_info f_finfo, s_finfo;
b0d623f7
A
8075#endif
8076
1c79356b
A
8077 nameiflags = 0;
8078 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
8079
6d2010ae
A
8080 NDINIT(&fnd, LOOKUP, OP_EXCHANGEDATA, nameiflags | AUDITVNPATH1,
8081 UIO_USERSPACE, uap->path1, ctx);
1c79356b 8082
6d2010ae
A
8083 error = namei(&fnd);
8084 if (error)
8085 goto out2;
1c79356b 8086
91447636
A
8087 nameidone(&fnd);
8088 fvp = fnd.ni_vp;
1c79356b 8089
6d2010ae
A
8090 NDINIT(&snd, LOOKUP, OP_EXCHANGEDATA, CN_NBMOUNTLOOK | nameiflags | AUDITVNPATH2,
8091 UIO_USERSPACE, uap->path2, ctx);
1c79356b 8092
6d2010ae
A
8093 error = namei(&snd);
8094 if (error) {
91447636 8095 vnode_put(fvp);
55e303ae 8096 goto out2;
6d2010ae 8097 }
91447636 8098 nameidone(&snd);
1c79356b
A
8099 svp = snd.ni_vp;
8100
91447636
A
8101 /*
8102 * if the files are the same, return an inval error
8103 */
1c79356b 8104 if (svp == fvp) {
91447636
A
8105 error = EINVAL;
8106 goto out;
8107 }
1c79356b 8108
91447636
A
8109 /*
8110 * if the files are on different volumes, return an error
8111 */
8112 if (svp->v_mount != fvp->v_mount) {
8113 error = EXDEV;
8114 goto out;
8115 }
2d21ac55 8116
39236c6e
A
8117 /* If they're not files, return an error */
8118 if ( (vnode_isreg(fvp) == 0) || (vnode_isreg(svp) == 0)) {
db609669
A
8119 error = EINVAL;
8120 goto out;
8121 }
8122
2d21ac55
A
8123#if CONFIG_MACF
8124 error = mac_vnode_check_exchangedata(ctx,
8125 fvp, svp);
8126 if (error)
8127 goto out;
8128#endif
8129 if (((error = vnode_authorize(fvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0) ||
8130 ((error = vnode_authorize(svp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0))
91447636 8131 goto out;
1c79356b 8132
2d21ac55
A
8133 if (
8134#if CONFIG_FSE
8135 need_fsevent(FSE_EXCHANGE, fvp) ||
8136#endif
8137 kauth_authorize_fileop_has_listeners()) {
8138 GET_PATH(fpath);
8139 GET_PATH(spath);
8140 if (fpath == NULL || spath == NULL) {
8141 error = ENOMEM;
8142 goto out;
8143 }
b0d623f7
A
8144
8145 flen = safe_getpath(fvp, NULL, fpath, MAXPATHLEN, &from_truncated);
8146 slen = safe_getpath(svp, NULL, spath, MAXPATHLEN, &to_truncated);
8147
2d21ac55
A
8148#if CONFIG_FSE
8149 get_fse_info(fvp, &f_finfo, ctx);
8150 get_fse_info(svp, &s_finfo, ctx);
b0d623f7
A
8151 if (from_truncated || to_truncated) {
8152 // set it here since only the f_finfo gets reported up to user space
8153 f_finfo.mode |= FSE_TRUNCATED_PATH;
8154 }
2d21ac55 8155#endif
91447636 8156 }
1c79356b 8157 /* Ok, make the call */
2d21ac55 8158 error = VNOP_EXCHANGE(fvp, svp, 0, ctx);
55e303ae 8159
91447636 8160 if (error == 0) {
2d21ac55 8161 const char *tmpname;
91447636
A
8162
8163 if (fpath != NULL && spath != NULL) {
8164 /* call out to allow 3rd party notification of exchangedata.
8165 * Ignore result of kauth_authorize_fileop call.
8166 */
2d21ac55 8167 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_EXCHANGE,
91447636
A
8168 (uintptr_t)fpath, (uintptr_t)spath);
8169 }
8170 name_cache_lock();
8171
8172 tmpname = fvp->v_name;
8173 fvp->v_name = svp->v_name;
8174 svp->v_name = tmpname;
8175
8176 if (fvp->v_parent != svp->v_parent) {
2d21ac55 8177 vnode_t tmp;
91447636
A
8178
8179 tmp = fvp->v_parent;
8180 fvp->v_parent = svp->v_parent;
8181 svp->v_parent = tmp;
8182 }
8183 name_cache_unlock();
8184
2d21ac55 8185#if CONFIG_FSE
91447636 8186 if (fpath != NULL && spath != NULL) {
2d21ac55 8187 add_fsevent(FSE_EXCHANGE, ctx,
91447636
A
8188 FSE_ARG_STRING, flen, fpath,
8189 FSE_ARG_FINFO, &f_finfo,
8190 FSE_ARG_STRING, slen, spath,
8191 FSE_ARG_FINFO, &s_finfo,
8192 FSE_ARG_DONE);
8193 }
2d21ac55 8194#endif
55e303ae
A
8195 }
8196
1c79356b 8197out:
2d21ac55
A
8198 if (fpath != NULL)
8199 RELEASE_PATH(fpath);
8200 if (spath != NULL)
8201 RELEASE_PATH(spath);
91447636
A
8202 vnode_put(svp);
8203 vnode_put(fvp);
1c79356b 8204out2:
1c79356b 8205 return (error);
91447636 8206}
1c79356b 8207
39236c6e
A
8208/*
8209 * Return (in MB) the amount of freespace on the given vnode's volume.
8210 */
8211uint32_t freespace_mb(vnode_t vp);
8212
8213uint32_t
8214freespace_mb(vnode_t vp)
8215{
8216 vfs_update_vfsstat(vp->v_mount, vfs_context_current(), VFS_USER_EVENT);
8217 return (((uint64_t)vp->v_mount->mnt_vfsstat.f_bavail *
8218 vp->v_mount->mnt_vfsstat.f_bsize) >> 20);
8219}
8220
316670eb 8221#if CONFIG_SEARCHFS
1c79356b 8222
1c79356b
A
8223/* ARGSUSED */
8224
8225int
b0d623f7 8226searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval)
1c79356b 8227{
39236c6e
A
8228 vnode_t vp, tvp;
8229 int i, error=0;
1c79356b
A
8230 int fserror = 0;
8231 struct nameidata nd;
b0d623f7 8232 struct user64_fssearchblock searchblock;
1c79356b
A
8233 struct searchstate *state;
8234 struct attrlist *returnattrs;
b0d623f7 8235 struct timeval timelimit;
1c79356b 8236 void *searchparams1,*searchparams2;
91447636
A
8237 uio_t auio = NULL;
8238 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
b0d623f7 8239 uint32_t nummatches;
1c79356b 8240 int mallocsize;
b0d623f7 8241 uint32_t nameiflags;
2d21ac55 8242 vfs_context_t ctx = vfs_context_current();
91447636 8243 char uio_buf[ UIO_SIZEOF(1) ];
1c79356b 8244
39236c6e 8245 /* Start by copying in fsearchblock parameter list */
91447636 8246 if (IS_64BIT_PROCESS(p)) {
b0d623f7
A
8247 error = copyin(uap->searchblock, (caddr_t) &searchblock, sizeof(searchblock));
8248 timelimit.tv_sec = searchblock.timelimit.tv_sec;
8249 timelimit.tv_usec = searchblock.timelimit.tv_usec;
91447636
A
8250 }
8251 else {
b0d623f7
A
8252 struct user32_fssearchblock tmp_searchblock;
8253
91447636
A
8254 error = copyin(uap->searchblock, (caddr_t) &tmp_searchblock, sizeof(tmp_searchblock));
8255 // munge into 64-bit version
8256 searchblock.returnattrs = CAST_USER_ADDR_T(tmp_searchblock.returnattrs);
8257 searchblock.returnbuffer = CAST_USER_ADDR_T(tmp_searchblock.returnbuffer);
8258 searchblock.returnbuffersize = tmp_searchblock.returnbuffersize;
8259 searchblock.maxmatches = tmp_searchblock.maxmatches;
b0d623f7
A
8260 /*
8261 * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary
8262 * from a 32 bit long, and tv_usec is already a signed 32 bit int.
8263 */
8264 timelimit.tv_sec = (__darwin_time_t) tmp_searchblock.timelimit.tv_sec;
8265 timelimit.tv_usec = (__darwin_useconds_t) tmp_searchblock.timelimit.tv_usec;
91447636
A
8266 searchblock.searchparams1 = CAST_USER_ADDR_T(tmp_searchblock.searchparams1);
8267 searchblock.sizeofsearchparams1 = tmp_searchblock.sizeofsearchparams1;
8268 searchblock.searchparams2 = CAST_USER_ADDR_T(tmp_searchblock.searchparams2);
8269 searchblock.sizeofsearchparams2 = tmp_searchblock.sizeofsearchparams2;
8270 searchblock.searchattrs = tmp_searchblock.searchattrs;
8271 }
8272 if (error)
1c79356b
A
8273 return(error);
8274
a3d08fcd
A
8275 /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
8276 */
8277 if (searchblock.sizeofsearchparams1 > SEARCHFS_MAX_SEARCHPARMS ||
8278 searchblock.sizeofsearchparams2 > SEARCHFS_MAX_SEARCHPARMS)
8279 return(EINVAL);
91447636 8280
1c79356b
A
8281 /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
8282 /* It all has to do into local memory and it's not that big so we might as well put it all together. */
8283 /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
8284 /* block. */
fe8ab488
A
8285 /* */
8286 /* NOTE: we allocate an extra 8 bytes to account for the difference in size of the searchstate */
8287 /* due to the changes in rdar://problem/12438273. That way if a 3rd party file system */
8288 /* assumes the size is still 556 bytes it will continue to work */
8289
91447636 8290 mallocsize = searchblock.sizeofsearchparams1 + searchblock.sizeofsearchparams2 +
fe8ab488 8291 sizeof(struct attrlist) + sizeof(struct searchstate) + (2*sizeof(uint32_t));
1c79356b
A
8292
8293 MALLOC(searchparams1, void *, mallocsize, M_TEMP, M_WAITOK);
8294
8295 /* Now set up the various pointers to the correct place in our newly allocated memory */
8296
8297 searchparams2 = (void *) (((caddr_t) searchparams1) + searchblock.sizeofsearchparams1);
8298 returnattrs = (struct attrlist *) (((caddr_t) searchparams2) + searchblock.sizeofsearchparams2);
8299 state = (struct searchstate *) (((caddr_t) returnattrs) + sizeof (struct attrlist));
8300
8301 /* Now copy in the stuff given our local variables. */
8302
91447636 8303 if ((error = copyin(searchblock.searchparams1, searchparams1, searchblock.sizeofsearchparams1)))
1c79356b
A
8304 goto freeandexit;
8305
91447636 8306 if ((error = copyin(searchblock.searchparams2, searchparams2, searchblock.sizeofsearchparams2)))
1c79356b
A
8307 goto freeandexit;
8308
91447636 8309 if ((error = copyin(searchblock.returnattrs, (caddr_t) returnattrs, sizeof(struct attrlist))))
1c79356b
A
8310 goto freeandexit;
8311
91447636 8312 if ((error = copyin(uap->state, (caddr_t) state, sizeof(struct searchstate))))
1c79356b 8313 goto freeandexit;
1c79356b 8314
39236c6e
A
8315 /*
8316 * When searching a union mount, need to set the
8317 * start flag at the first call on each layer to
8318 * reset state for the new volume.
8319 */
8320 if (uap->options & SRCHFS_START)
8321 state->ss_union_layer = 0;
8322 else
8323 uap->options |= state->ss_union_flags;
8324 state->ss_union_flags = 0;
b0d623f7
A
8325
8326 /*
8327 * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter,
8328 * which is passed in with an attrreference_t, we need to inspect the buffer manually here.
8329 * The KPI does not provide us the ability to pass in the length of the buffers searchparams1
8330 * and searchparams2. To obviate the need for all searchfs-supporting filesystems to
8331 * validate the user-supplied data offset of the attrreference_t, we'll do it here.
8332 */
8333
8334 if (searchblock.searchattrs.commonattr & ATTR_CMN_NAME) {
8335 attrreference_t* string_ref;
8336 u_int32_t* start_length;
8337 user64_size_t param_length;
8338
8339 /* validate searchparams1 */
8340 param_length = searchblock.sizeofsearchparams1;
8341 /* skip the word that specifies length of the buffer */
8342 start_length= (u_int32_t*) searchparams1;
8343 start_length= start_length+1;
8344 string_ref= (attrreference_t*) start_length;
8345
8346 /* ensure no negative offsets or too big offsets */
8347 if (string_ref->attr_dataoffset < 0 ) {
8348 error = EINVAL;
8349 goto freeandexit;
8350 }
8351 if (string_ref->attr_length > MAXPATHLEN) {
8352 error = EINVAL;
8353 goto freeandexit;
8354 }
8355
8356 /* Check for pointer overflow in the string ref */
8357 if (((char*) string_ref + string_ref->attr_dataoffset) < (char*) string_ref) {
8358 error = EINVAL;
8359 goto freeandexit;
8360 }
8361
8362 if (((char*) string_ref + string_ref->attr_dataoffset) > ((char*)searchparams1 + param_length)) {
8363 error = EINVAL;
8364 goto freeandexit;
8365 }
8366 if (((char*)string_ref + string_ref->attr_dataoffset + string_ref->attr_length) > ((char*)searchparams1 + param_length)) {
8367 error = EINVAL;
8368 goto freeandexit;
8369 }
8370 }
8371
8372 /* set up the uio structure which will contain the users return buffer */
39236c6e
A
8373 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
8374 uio_addiov(auio, searchblock.returnbuffer, searchblock.returnbuffersize);
1c79356b 8375
91447636 8376 nameiflags = 0;
1c79356b 8377 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
6d2010ae
A
8378 NDINIT(&nd, LOOKUP, OP_SEARCHFS, nameiflags | AUDITVNPATH1,
8379 UIO_USERSPACE, uap->path, ctx);
1c79356b 8380
55e303ae
A
8381 error = namei(&nd);
8382 if (error)
1c79356b 8383 goto freeandexit;
39236c6e 8384 vp = nd.ni_vp;
91447636 8385 nameidone(&nd);
39236c6e
A
8386
8387 /*
8388 * Switch to the root vnode for the volume
8389 */
8390 error = VFS_ROOT(vnode_mount(vp), &tvp, ctx);
fe8ab488 8391 vnode_put(vp);
39236c6e
A
8392 if (error)
8393 goto freeandexit;
39236c6e
A
8394 vp = tvp;
8395
8396 /*
8397 * If it's a union mount, the path lookup takes
8398 * us to the top layer. But we may need to descend
8399 * to a lower layer. For non-union mounts the layer
8400 * is always zero.
8401 */
8402 for (i = 0; i < (int) state->ss_union_layer; i++) {
8403 if ((vp->v_mount->mnt_flag & MNT_UNION) == 0)
8404 break;
8405 tvp = vp;
8406 vp = vp->v_mount->mnt_vnodecovered;
8407 if (vp == NULL) {
fe8ab488 8408 vnode_put(tvp);
39236c6e
A
8409 error = ENOENT;
8410 goto freeandexit;
8411 }
8412 vnode_getwithref(vp);
8413 vnode_put(tvp);
8414 }
1c79356b 8415
6d2010ae
A
8416#if CONFIG_MACF
8417 error = mac_vnode_check_searchfs(ctx, vp, &searchblock.searchattrs);
8418 if (error) {
8419 vnode_put(vp);
8420 goto freeandexit;
8421 }
8422#endif
8423
1c79356b
A
8424
8425 /*
8426 * If searchblock.maxmatches == 0, then skip the search. This has happened
39236c6e 8427 * before and sometimes the underlying code doesnt deal with it well.
1c79356b
A
8428 */
8429 if (searchblock.maxmatches == 0) {
8430 nummatches = 0;
8431 goto saveandexit;
8432 }
8433
8434 /*
39236c6e
A
8435 * Allright, we have everything we need, so lets make that call.
8436 *
8437 * We keep special track of the return value from the file system:
8438 * EAGAIN is an acceptable error condition that shouldn't keep us
8439 * from copying out any results...
1c79356b
A
8440 */
8441
6d2010ae 8442 fserror = VNOP_SEARCHFS(vp,
39236c6e
A
8443 searchparams1,
8444 searchparams2,
8445 &searchblock.searchattrs,
8446 (u_long)searchblock.maxmatches,
8447 &timelimit,
8448 returnattrs,
8449 &nummatches,
8450 (u_long)uap->scriptcode,
8451 (u_long)uap->options,
8452 auio,
8453 (struct searchstate *) &state->ss_fsstate,
8454 ctx);
6d2010ae 8455
39236c6e
A
8456 /*
8457 * If it's a union mount we need to be called again
8458 * to search the mounted-on filesystem.
8459 */
8460 if ((vp->v_mount->mnt_flag & MNT_UNION) && fserror == 0) {
8461 state->ss_union_flags = SRCHFS_START;
8462 state->ss_union_layer++; // search next layer down
8463 fserror = EAGAIN;
8464 }
8465
6d2010ae
A
8466saveandexit:
8467
8468 vnode_put(vp);
8469
8470 /* Now copy out the stuff that needs copying out. That means the number of matches, the
8471 search state. Everything was already put into he return buffer by the vop call. */
8472
8473 if ((error = copyout((caddr_t) state, uap->state, sizeof(struct searchstate))) != 0)
8474 goto freeandexit;
8475
39236c6e 8476 if ((error = suulong(uap->nummatches, (uint64_t)nummatches)) != 0)
6d2010ae
A
8477 goto freeandexit;
8478
8479 error = fserror;
8480
8481freeandexit:
8482
8483 FREE(searchparams1,M_TEMP);
8484
8485 return(error);
8486
8487
8488} /* end of searchfs system call */
8489
316670eb
A
8490#else /* CONFIG_SEARCHFS */
8491
8492int
8493searchfs(__unused proc_t p, __unused struct searchfs_args *uap, __unused int32_t *retval)
8494{
8495 return (ENOTSUP);
8496}
8497
8498#endif /* CONFIG_SEARCHFS */
6d2010ae
A
8499
8500
8501lck_grp_attr_t * nspace_group_attr;
8502lck_attr_t * nspace_lock_attr;
8503lck_grp_t * nspace_mutex_group;
8504
8505lck_mtx_t nspace_handler_lock;
8506lck_mtx_t nspace_handler_exclusion_lock;
8507
8508time_t snapshot_timestamp=0;
8509int nspace_allow_virtual_devs=0;
8510
8511void nspace_handler_init(void);
8512
8513typedef struct nspace_item_info {
8514 struct vnode *vp;
8515 void *arg;
8516 uint64_t op;
8517 uint32_t vid;
8518 uint32_t flags;
8519 uint32_t token;
8520 uint32_t refcount;
8521} nspace_item_info;
8522
8523#define MAX_NSPACE_ITEMS 128
8524nspace_item_info nspace_items[MAX_NSPACE_ITEMS];
8525uint32_t nspace_item_idx=0; // also used as the sleep/wakeup rendezvous address
8526uint32_t nspace_token_id=0;
8527uint32_t nspace_handler_timeout = 15; // seconds
8528
8529#define NSPACE_ITEM_NEW 0x0001
8530#define NSPACE_ITEM_PROCESSING 0x0002
8531#define NSPACE_ITEM_DEAD 0x0004
8532#define NSPACE_ITEM_CANCELLED 0x0008
8533#define NSPACE_ITEM_DONE 0x0010
8534#define NSPACE_ITEM_RESET_TIMER 0x0020
8535
8536#define NSPACE_ITEM_NSPACE_EVENT 0x0040
8537#define NSPACE_ITEM_SNAPSHOT_EVENT 0x0080
6d2010ae 8538
fe8ab488 8539#define NSPACE_ITEM_ALL_EVENT_TYPES (NSPACE_ITEM_NSPACE_EVENT | NSPACE_ITEM_SNAPSHOT_EVENT)
6d2010ae
A
8540
8541//#pragma optimization_level 0
8542
8543typedef enum {
8544 NSPACE_HANDLER_NSPACE = 0,
8545 NSPACE_HANDLER_SNAPSHOT = 1,
6d2010ae
A
8546
8547 NSPACE_HANDLER_COUNT,
8548} nspace_type_t;
8549
8550typedef struct {
8551 uint64_t handler_tid;
8552 struct proc *handler_proc;
8553 int handler_busy;
8554} nspace_handler_t;
8555
8556nspace_handler_t nspace_handlers[NSPACE_HANDLER_COUNT];
8557
39236c6e
A
8558/* namespace fsctl functions */
8559static int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type);
8560static int nspace_item_flags_for_type(nspace_type_t nspace_type);
8561static int nspace_open_flags_for_type(nspace_type_t nspace_type);
8562static nspace_type_t nspace_type_for_op(uint64_t op);
8563static int nspace_is_special_process(struct proc *proc);
8564static int vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx);
8565static int wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type);
8566static int validate_namespace_args (int is64bit, int size);
8567static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data);
8568
8569
6d2010ae
A
8570static inline int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type)
8571{
8572 switch(nspace_type) {
8573 case NSPACE_HANDLER_NSPACE:
8574 return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_NSPACE_EVENT;
8575 case NSPACE_HANDLER_SNAPSHOT:
8576 return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_SNAPSHOT_EVENT;
6d2010ae
A
8577 default:
8578 printf("nspace_flags_matches_handler: invalid type %u\n", (int)nspace_type);
8579 return 0;
8580 }
8581}
8582
8583static inline int nspace_item_flags_for_type(nspace_type_t nspace_type)
8584{
8585 switch(nspace_type) {
8586 case NSPACE_HANDLER_NSPACE:
8587 return NSPACE_ITEM_NSPACE_EVENT;
8588 case NSPACE_HANDLER_SNAPSHOT:
8589 return NSPACE_ITEM_SNAPSHOT_EVENT;
6d2010ae
A
8590 default:
8591 printf("nspace_item_flags_for_type: invalid type %u\n", (int)nspace_type);
8592 return 0;
8593 }
8594}
8595
8596static inline int nspace_open_flags_for_type(nspace_type_t nspace_type)
8597{
8598 switch(nspace_type) {
8599 case NSPACE_HANDLER_NSPACE:
8600 return FREAD | FWRITE | O_EVTONLY;
8601 case NSPACE_HANDLER_SNAPSHOT:
6d2010ae
A
8602 return FREAD | O_EVTONLY;
8603 default:
8604 printf("nspace_open_flags_for_type: invalid type %u\n", (int)nspace_type);
8605 return 0;
8606 }
8607}
8608
8609static inline nspace_type_t nspace_type_for_op(uint64_t op)
8610{
8611 switch(op & NAMESPACE_HANDLER_EVENT_TYPE_MASK) {
8612 case NAMESPACE_HANDLER_NSPACE_EVENT:
8613 return NSPACE_HANDLER_NSPACE;
8614 case NAMESPACE_HANDLER_SNAPSHOT_EVENT:
8615 return NSPACE_HANDLER_SNAPSHOT;
6d2010ae
A
8616 default:
8617 printf("nspace_type_for_op: invalid op mask %llx\n", op & NAMESPACE_HANDLER_EVENT_TYPE_MASK);
8618 return NSPACE_HANDLER_NSPACE;
8619 }
8620}
8621
8622static inline int nspace_is_special_process(struct proc *proc)
8623{
8624 int i;
8625 for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
8626 if (proc == nspace_handlers[i].handler_proc)
8627 return 1;
8628 }
8629 return 0;
8630}
8631
8632void
8633nspace_handler_init(void)
8634{
8635 nspace_lock_attr = lck_attr_alloc_init();
8636 nspace_group_attr = lck_grp_attr_alloc_init();
8637 nspace_mutex_group = lck_grp_alloc_init("nspace-mutex", nspace_group_attr);
8638 lck_mtx_init(&nspace_handler_lock, nspace_mutex_group, nspace_lock_attr);
8639 lck_mtx_init(&nspace_handler_exclusion_lock, nspace_mutex_group, nspace_lock_attr);
8640 memset(&nspace_items[0], 0, sizeof(nspace_items));
8641}
8642
8643void
8644nspace_proc_exit(struct proc *p)
8645{
8646 int i, event_mask = 0;
8647
8648 for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
8649 if (p == nspace_handlers[i].handler_proc) {
8650 event_mask |= nspace_item_flags_for_type(i);
8651 nspace_handlers[i].handler_tid = 0;
8652 nspace_handlers[i].handler_proc = NULL;
8653 }
8654 }
8655
8656 if (event_mask == 0) {
8657 return;
8658 }
8659
8660 if (event_mask & NSPACE_ITEM_SNAPSHOT_EVENT) {
8661 // if this process was the snapshot handler, zero snapshot_timeout
8662 snapshot_timestamp = 0;
8663 }
8664
8665 //
8666 // unblock anyone that's waiting for the handler that died
8667 //
8668 lck_mtx_lock(&nspace_handler_lock);
8669 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8670 if (nspace_items[i].flags & (NSPACE_ITEM_NEW | NSPACE_ITEM_PROCESSING)) {
8671
8672 if ( nspace_items[i].flags & event_mask ) {
8673
8674 if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
8675 vnode_lock_spin(nspace_items[i].vp);
8676 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
8677 vnode_unlock(nspace_items[i].vp);
8678 }
8679 nspace_items[i].vp = NULL;
8680 nspace_items[i].vid = 0;
8681 nspace_items[i].flags = NSPACE_ITEM_DONE;
8682 nspace_items[i].token = 0;
8683
8684 wakeup((caddr_t)&(nspace_items[i].vp));
8685 }
8686 }
8687 }
8688
8689 wakeup((caddr_t)&nspace_item_idx);
8690 lck_mtx_unlock(&nspace_handler_lock);
8691}
8692
8693
8694int
8695resolve_nspace_item(struct vnode *vp, uint64_t op)
8696{
8697 return resolve_nspace_item_ext(vp, op, NULL);
8698}
8699
8700int
8701resolve_nspace_item_ext(struct vnode *vp, uint64_t op, void *arg)
8702{
8703 int i, error, keep_waiting;
8704 struct timespec ts;
8705 nspace_type_t nspace_type = nspace_type_for_op(op);
8706
8707 // only allow namespace events on regular files, directories and symlinks.
8708 if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
8709 return 0;
8710 }
8711
8712 //
8713 // if this is a snapshot event and the vnode is on a
8714 // disk image just pretend nothing happened since any
8715 // change to the disk image will cause the disk image
8716 // itself to get backed up and this avoids multi-way
8717 // deadlocks between the snapshot handler and the ever
8718 // popular diskimages-helper process. the variable
8719 // nspace_allow_virtual_devs allows this behavior to
8720 // be overridden (for use by the Mobile TimeMachine
8721 // testing infrastructure which uses disk images)
8722 //
8723 if ( (op & NAMESPACE_HANDLER_SNAPSHOT_EVENT)
8724 && (vp->v_mount != NULL)
8725 && (vp->v_mount->mnt_kern_flag & MNTK_VIRTUALDEV)
8726 && !nspace_allow_virtual_devs) {
8727
8728 return 0;
8729 }
8730
8731 // if (thread_tid(current_thread()) == namespace_handler_tid) {
8732 if (nspace_handlers[nspace_type].handler_proc == NULL) {
8733 return 0;
8734 }
8735
8736 if (nspace_is_special_process(current_proc())) {
8737 return EDEADLK;
8738 }
8739
8740 lck_mtx_lock(&nspace_handler_lock);
8741
8742retry:
8743 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8744 if (vp == nspace_items[i].vp && op == nspace_items[i].op) {
8745 break;
8746 }
8747 }
8748
8749 if (i >= MAX_NSPACE_ITEMS) {
8750 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8751 if (nspace_items[i].flags == 0) {
8752 break;
8753 }
8754 }
8755 } else {
8756 nspace_items[i].refcount++;
8757 }
8758
8759 if (i >= MAX_NSPACE_ITEMS) {
8760 ts.tv_sec = nspace_handler_timeout;
8761 ts.tv_nsec = 0;
8762
8763 error = msleep((caddr_t)&nspace_token_id, &nspace_handler_lock, PVFS|PCATCH, "nspace-no-space", &ts);
8764 if (error == 0) {
8765 // an entry got free'd up, go see if we can get a slot
8766 goto retry;
8767 } else {
8768 lck_mtx_unlock(&nspace_handler_lock);
8769 return error;
8770 }
8771 }
8772
8773 //
8774 // if it didn't already exist, add it. if it did exist
8775 // we'll get woken up when someone does a wakeup() on
8776 // the slot in the nspace_items table.
8777 //
8778 if (vp != nspace_items[i].vp) {
8779 nspace_items[i].vp = vp;
39236c6e 8780 nspace_items[i].arg = (arg == NSPACE_REARM_NO_ARG) ? NULL : arg; // arg is {NULL, true, uio *} - only pass uio thru to the user
6d2010ae
A
8781 nspace_items[i].op = op;
8782 nspace_items[i].vid = vnode_vid(vp);
8783 nspace_items[i].flags = NSPACE_ITEM_NEW;
8784 nspace_items[i].flags |= nspace_item_flags_for_type(nspace_type);
8785 if (nspace_items[i].flags & NSPACE_ITEM_SNAPSHOT_EVENT) {
8786 if (arg) {
8787 vnode_lock_spin(vp);
8788 vp->v_flag |= VNEEDSSNAPSHOT;
8789 vnode_unlock(vp);
8790 }
8791 }
8792
8793 nspace_items[i].token = 0;
8794 nspace_items[i].refcount = 1;
8795
8796 wakeup((caddr_t)&nspace_item_idx);
8797 }
8798
8799 //
8800 // Now go to sleep until the handler does a wakeup on this
8801 // slot in the nspace_items table (or we timeout).
8802 //
8803 keep_waiting = 1;
8804 while(keep_waiting) {
8805 ts.tv_sec = nspace_handler_timeout;
8806 ts.tv_nsec = 0;
8807 error = msleep((caddr_t)&(nspace_items[i].vp), &nspace_handler_lock, PVFS|PCATCH, "namespace-done", &ts);
8808
8809 if (nspace_items[i].flags & NSPACE_ITEM_DONE) {
8810 error = 0;
8811 } else if (nspace_items[i].flags & NSPACE_ITEM_CANCELLED) {
8812 error = nspace_items[i].token;
8813 } else if (error == EWOULDBLOCK || error == ETIMEDOUT) {
8814 if (nspace_items[i].flags & NSPACE_ITEM_RESET_TIMER) {
8815 nspace_items[i].flags &= ~NSPACE_ITEM_RESET_TIMER;
8816 continue;
8817 } else {
8818 error = ETIMEDOUT;
8819 }
8820 } else if (error == 0) {
8821 // hmmm, why did we get woken up?
8822 printf("woken up for token %d but it's not done, cancelled or timedout and error == 0.\n",
8823 nspace_items[i].token);
8824 }
8825
8826 if (--nspace_items[i].refcount == 0) {
8827 nspace_items[i].vp = NULL; // clear this so that no one will match on it again
8828 nspace_items[i].arg = NULL;
8829 nspace_items[i].token = 0; // clear this so that the handler will not find it anymore
8830 nspace_items[i].flags = 0; // this clears it for re-use
8831 }
8832 wakeup(&nspace_token_id);
8833 keep_waiting = 0;
8834 }
8835
8836 lck_mtx_unlock(&nspace_handler_lock);
8837
8838 return error;
8839}
8840
8841
8842int
8843get_nspace_item_status(struct vnode *vp, int32_t *status)
8844{
8845 int i;
8846
8847 lck_mtx_lock(&nspace_handler_lock);
8848 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8849 if (nspace_items[i].vp == vp) {
8850 break;
8851 }
8852 }
8853
8854 if (i >= MAX_NSPACE_ITEMS) {
8855 lck_mtx_unlock(&nspace_handler_lock);
8856 return ENOENT;
8857 }
8858
8859 *status = nspace_items[i].flags;
8860 lck_mtx_unlock(&nspace_handler_lock);
8861 return 0;
8862}
8863
8864
8865#if 0
8866static int
8867build_volfs_path(struct vnode *vp, char *path, int *len)
8868{
8869 struct vnode_attr va;
8870 int ret;
8871
8872 VATTR_INIT(&va);
8873 VATTR_WANTED(&va, va_fsid);
8874 VATTR_WANTED(&va, va_fileid);
8875
8876 if (vnode_getattr(vp, &va, vfs_context_kernel()) != 0) {
8877 *len = snprintf(path, *len, "/non/existent/path/because/vnode_getattr/failed") + 1;
8878 ret = -1;
8879 } else {
8880 *len = snprintf(path, *len, "/.vol/%d/%lld", (dev_t)va.va_fsid, va.va_fileid) + 1;
8881 ret = 0;
8882 }
8883
8884 return ret;
8885}
8886#endif
8887
8888//
8889// Note: this function does NOT check permissions on all of the
8890// parent directories leading to this vnode. It should only be
8891// called on behalf of a root process. Otherwise a process may
8892// get access to a file because the file itself is readable even
8893// though its parent directories would prevent access.
8894//
8895static int
8896vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx)
8897{
8898 int error, action;
8899
8900 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
8901 return error;
8902 }
8903
8904#if CONFIG_MACF
8905 error = mac_vnode_check_open(ctx, vp, fmode);
8906 if (error)
8907 return error;
8908#endif
1c79356b 8909
6d2010ae
A
8910 /* compute action to be authorized */
8911 action = 0;
8912 if (fmode & FREAD) {
8913 action |= KAUTH_VNODE_READ_DATA;
8914 }
8915 if (fmode & (FWRITE | O_TRUNC)) {
8916 /*
8917 * If we are writing, appending, and not truncating,
8918 * indicate that we are appending so that if the
8919 * UF_APPEND or SF_APPEND bits are set, we do not deny
8920 * the open.
8921 */
8922 if ((fmode & O_APPEND) && !(fmode & O_TRUNC)) {
8923 action |= KAUTH_VNODE_APPEND_DATA;
8924 } else {
8925 action |= KAUTH_VNODE_WRITE_DATA;
8926 }
8927 }
1c79356b 8928
6d2010ae
A
8929 if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)
8930 return error;
8931
1c79356b 8932
6d2010ae
A
8933 //
8934 // if the vnode is tagged VOPENEVT and the current process
8935 // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY
8936 // flag to the open mode so that this open won't count against
8937 // the vnode when carbon delete() does a vnode_isinuse() to see
8938 // if a file is currently in use. this allows spotlight
8939 // importers to not interfere with carbon apps that depend on
8940 // the no-delete-if-busy semantics of carbon delete().
8941 //
8942 if ((vp->v_flag & VOPENEVT) && (current_proc()->p_flag & P_CHECKOPENEVT)) {
8943 fmode |= O_EVTONLY;
8944 }
1c79356b 8945
6d2010ae
A
8946 if ( (error = VNOP_OPEN(vp, fmode, ctx)) ) {
8947 return error;
8948 }
8949 if ( (error = vnode_ref_ext(vp, fmode, 0)) ) {
8950 VNOP_CLOSE(vp, fmode, ctx);
8951 return error;
8952 }
1c79356b 8953
4b17d6b6 8954 /* Call out to allow 3rd party notification of open.
6d2010ae
A
8955 * Ignore result of kauth_authorize_fileop call.
8956 */
4b17d6b6
A
8957#if CONFIG_MACF
8958 mac_vnode_notify_open(ctx, vp, fmode);
8959#endif
6d2010ae
A
8960 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN,
8961 (uintptr_t)vp, 0);
1c79356b 8962
1c79356b 8963
6d2010ae
A
8964 return 0;
8965}
1c79356b 8966
6d2010ae 8967static int
39236c6e 8968wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type)
6d2010ae
A
8969{
8970 int i, error=0, unblock=0;
8971 task_t curtask;
8972
8973 lck_mtx_lock(&nspace_handler_exclusion_lock);
8974 if (nspace_handlers[nspace_type].handler_busy) {
8975 lck_mtx_unlock(&nspace_handler_exclusion_lock);
8976 return EBUSY;
8977 }
8978 nspace_handlers[nspace_type].handler_busy = 1;
8979 lck_mtx_unlock(&nspace_handler_exclusion_lock);
8980
8981 /*
8982 * Any process that gets here will be one of the namespace handlers.
8983 * As such, they should be prevented from acquiring DMG vnodes during vnode reclamation
8984 * as we can cause deadlocks to occur, because the namespace handler may prevent
8985 * VNOP_INACTIVE from proceeding. Mark the current task as a P_DEPENDENCY_CAPABLE
8986 * process.
8987 */
8988 curtask = current_task();
8989 bsd_set_dependency_capable (curtask);
8990
8991 lck_mtx_lock(&nspace_handler_lock);
8992 if (nspace_handlers[nspace_type].handler_proc == NULL) {
8993 nspace_handlers[nspace_type].handler_tid = thread_tid(current_thread());
8994 nspace_handlers[nspace_type].handler_proc = current_proc();
8995 }
8996
8997 while (error == 0) {
8998
8999 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9000 if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
9001 if (!nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
9002 continue;
9003 }
9004 break;
9005 }
9006 }
9007
9008 if (i < MAX_NSPACE_ITEMS) {
9009 nspace_items[i].flags &= ~NSPACE_ITEM_NEW;
9010 nspace_items[i].flags |= NSPACE_ITEM_PROCESSING;
9011 nspace_items[i].token = ++nspace_token_id;
9012
9013 if (nspace_items[i].vp) {
9014 struct fileproc *fp;
9015 int32_t indx, fmode;
9016 struct proc *p = current_proc();
9017 vfs_context_t ctx = vfs_context_current();
39236c6e
A
9018 struct vnode_attr va;
9019
9020
9021 /*
9022 * Use vnode pointer to acquire a file descriptor for
9023 * hand-off to userland
9024 */
6d2010ae 9025 fmode = nspace_open_flags_for_type(nspace_type);
6d2010ae
A
9026 error = vnode_getwithvid(nspace_items[i].vp, nspace_items[i].vid);
9027 if (error) {
9028 unblock = 1;
9029 break;
9030 }
9031 error = vn_open_with_vp(nspace_items[i].vp, fmode, ctx);
9032 if (error) {
9033 unblock = 1;
9034 vnode_put(nspace_items[i].vp);
9035 break;
9036 }
9037
9038 if ((error = falloc(p, &fp, &indx, ctx))) {
9039 vn_close(nspace_items[i].vp, fmode, ctx);
9040 vnode_put(nspace_items[i].vp);
9041 unblock = 1;
9042 break;
9043 }
9044
9045 fp->f_fglob->fg_flag = fmode;
6d2010ae
A
9046 fp->f_fglob->fg_ops = &vnops;
9047 fp->f_fglob->fg_data = (caddr_t)nspace_items[i].vp;
9048
9049 proc_fdlock(p);
9050 procfdtbl_releasefd(p, indx, NULL);
9051 fp_drop(p, indx, fp, 1);
39236c6e
A
9052 proc_fdunlock(p);
9053
9054 /*
9055 * All variants of the namespace handler struct support these three fields:
9056 * token, flags, and the FD pointer
9057 */
9058 error = copyout(&nspace_items[i].token, nhd->token, sizeof(uint32_t));
9059 error = copyout(&nspace_items[i].op, nhd->flags, sizeof(uint64_t));
9060 error = copyout(&indx, nhd->fdptr, sizeof(uint32_t));
9061
9062 /*
9063 * Handle optional fields:
9064 * extended version support an info ptr (offset, length), and the
9065 *
9066 * namedata version supports a unique per-link object ID
9067 *
9068 */
9069 if (nhd->infoptr) {
6d2010ae
A
9070 uio_t uio = (uio_t)nspace_items[i].arg;
9071 uint64_t u_offset, u_length;
9072
9073 if (uio) {
9074 u_offset = uio_offset(uio);
9075 u_length = uio_resid(uio);
9076 } else {
9077 u_offset = 0;
9078 u_length = 0;
9079 }
39236c6e
A
9080 error = copyout(&u_offset, nhd->infoptr, sizeof(uint64_t));
9081 error = copyout(&u_length, nhd->infoptr+sizeof(uint64_t), sizeof(uint64_t));
6d2010ae 9082 }
39236c6e
A
9083
9084 if (nhd->objid) {
9085 VATTR_INIT(&va);
9086 VATTR_WANTED(&va, va_linkid);
9087 error = vnode_getattr(nspace_items[i].vp, &va, ctx);
9088 if (error == 0 ) {
9089 uint64_t linkid = 0;
9090 if (VATTR_IS_SUPPORTED (&va, va_linkid)) {
9091 linkid = (uint64_t)va.va_linkid;
9092 }
9093 error = copyout (&linkid, nhd->objid, sizeof(uint64_t));
9094 }
9095 }
9096
6d2010ae
A
9097 if (error) {
9098 vn_close(nspace_items[i].vp, fmode, ctx);
9099 fp_free(p, indx, fp);
9100 unblock = 1;
9101 }
9102
9103 vnode_put(nspace_items[i].vp);
9104
9105 break;
9106 } else {
9107 printf("wait_for_nspace_event: failed (nspace_items[%d] == %p error %d, name %s)\n",
9108 i, nspace_items[i].vp, error, nspace_items[i].vp->v_name);
9109 }
9110
9111 } else {
9112 error = msleep((caddr_t)&nspace_item_idx, &nspace_handler_lock, PVFS|PCATCH, "namespace-items", 0);
9113 if ((nspace_type == NSPACE_HANDLER_SNAPSHOT) && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
9114 error = EINVAL;
9115 break;
9116 }
9117
9118 }
9119 }
9120
9121 if (unblock) {
9122 if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
9123 vnode_lock_spin(nspace_items[i].vp);
9124 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
9125 vnode_unlock(nspace_items[i].vp);
9126 }
9127 nspace_items[i].vp = NULL;
9128 nspace_items[i].vid = 0;
9129 nspace_items[i].flags = NSPACE_ITEM_DONE;
9130 nspace_items[i].token = 0;
9131
9132 wakeup((caddr_t)&(nspace_items[i].vp));
9133 }
9134
9135 if (nspace_type == NSPACE_HANDLER_SNAPSHOT) {
9136 // just go through every snapshot event and unblock it immediately.
9137 if (error && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
9138 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9139 if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
9140 if (nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
9141 nspace_items[i].vp = NULL;
9142 nspace_items[i].vid = 0;
9143 nspace_items[i].flags = NSPACE_ITEM_DONE;
9144 nspace_items[i].token = 0;
9145
9146 wakeup((caddr_t)&(nspace_items[i].vp));
9147 }
9148 }
9149 }
9150 }
9151 }
9152
9153 lck_mtx_unlock(&nspace_handler_lock);
9154
9155 lck_mtx_lock(&nspace_handler_exclusion_lock);
9156 nspace_handlers[nspace_type].handler_busy = 0;
9157 lck_mtx_unlock(&nspace_handler_exclusion_lock);
9158
9159 return error;
9160}
1c79356b 9161
39236c6e
A
9162static inline int validate_namespace_args (int is64bit, int size) {
9163
9164 if (is64bit) {
9165 /* Must be one of these */
9166 if (size == sizeof(user64_namespace_handler_info)) {
9167 goto sizeok;
9168 }
9169 if (size == sizeof(user64_namespace_handler_info_ext)) {
9170 goto sizeok;
9171 }
9172 if (size == sizeof(user64_namespace_handler_data)) {
9173 goto sizeok;
9174 }
9175 return EINVAL;
9176 }
9177 else {
9178 /* 32 bit -- must be one of these */
9179 if (size == sizeof(user32_namespace_handler_info)) {
9180 goto sizeok;
9181 }
9182 if (size == sizeof(user32_namespace_handler_info_ext)) {
9183 goto sizeok;
9184 }
9185 if (size == sizeof(user32_namespace_handler_data)) {
9186 goto sizeok;
9187 }
9188 return EINVAL;
9189 }
9190
9191sizeok:
9192
9193 return 0;
9194
9195}
1c79356b 9196
6d2010ae
A
9197static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data)
9198{
9199 int error = 0;
39236c6e 9200 namespace_handler_data nhd;
6d2010ae 9201
39236c6e
A
9202 bzero (&nhd, sizeof(namespace_handler_data));
9203
9204 if (nspace_type == NSPACE_HANDLER_SNAPSHOT &&
9205 (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
6d2010ae
A
9206 return EINVAL;
9207 }
9208
9209 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
9210 return error;
9211 }
9212
39236c6e
A
9213 error = validate_namespace_args (is64bit, size);
9214 if (error) {
9215 return error;
6d2010ae
A
9216 }
9217
39236c6e
A
9218 /* Copy in the userland pointers into our kernel-only struct */
9219
6d2010ae 9220 if (is64bit) {
39236c6e
A
9221 /* 64 bit userland structures */
9222 nhd.token = (user_addr_t)((user64_namespace_handler_info *)data)->token;
9223 nhd.flags = (user_addr_t)((user64_namespace_handler_info *)data)->flags;
9224 nhd.fdptr = (user_addr_t)((user64_namespace_handler_info *)data)->fdptr;
9225
9226 /* If the size is greater than the standard info struct, add in extra fields */
9227 if (size > (sizeof(user64_namespace_handler_info))) {
9228 if (size >= (sizeof(user64_namespace_handler_info_ext))) {
9229 nhd.infoptr = (user_addr_t)((user64_namespace_handler_info_ext *)data)->infoptr;
9230 }
9231 if (size == (sizeof(user64_namespace_handler_data))) {
9232 nhd.objid = (user_addr_t)((user64_namespace_handler_data*)data)->objid;
9233 }
9234 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
6d2010ae 9235 }
39236c6e
A
9236 }
9237 else {
9238 /* 32 bit userland structures */
9239 nhd.token = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->token);
9240 nhd.flags = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->flags);
9241 nhd.fdptr = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->fdptr);
9242
9243 if (size > (sizeof(user32_namespace_handler_info))) {
9244 if (size >= (sizeof(user32_namespace_handler_info_ext))) {
9245 nhd.infoptr = CAST_USER_ADDR_T(((user32_namespace_handler_info_ext *)data)->infoptr);
9246 }
9247 if (size == (sizeof(user32_namespace_handler_data))) {
9248 nhd.objid = (user_addr_t)((user32_namespace_handler_data*)data)->objid;
9249 }
9250 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
6d2010ae
A
9251 }
9252 }
9253
39236c6e 9254 return wait_for_namespace_event(&nhd, nspace_type);
6d2010ae 9255}
1c79356b
A
9256
9257/*
9258 * Make a filesystem-specific control call:
9259 */
1c79356b 9260/* ARGSUSED */
b0d623f7
A
9261static int
9262fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long options, vfs_context_t ctx)
1c79356b 9263{
b0d623f7 9264 int error=0;
91447636 9265 boolean_t is64bit;
2d21ac55 9266 u_int size;
1c79356b
A
9267#define STK_PARAMS 128
9268 char stkbuf[STK_PARAMS];
9269 caddr_t data, memp;
b0d623f7 9270 vnode_t vp = *arg_vp;
1c79356b
A
9271
9272 size = IOCPARM_LEN(cmd);
9273 if (size > IOCPARM_MAX) return (EINVAL);
9274
6d2010ae 9275 is64bit = proc_is64bit(p);
91447636 9276
1c79356b 9277 memp = NULL;
04b8595b 9278
3e170ce0 9279
04b8595b
A
9280 /*
9281 * ensure the buffer is large enough for underlying calls
9282 */
9283#ifndef HFSIOC_GETPATH
3e170ce0 9284 typedef char pn_t[MAXPATHLEN];
04b8595b
A
9285#define HFSIOC_GETPATH _IOWR('h', 13, pn_t)
9286#endif
9287
9288#ifndef HFS_GETPATH
9289#define HFS_GETPATH IOCBASECMD(HFSIOC_GETPATH)
9290#endif
9291 if (IOCBASECMD(cmd) == HFS_GETPATH) {
9292 /* Round up to MAXPATHLEN regardless of user input */
9293 size = MAXPATHLEN;
9294 }
9295
1c79356b
A
9296 if (size > sizeof (stkbuf)) {
9297 if ((memp = (caddr_t)kalloc(size)) == 0) return ENOMEM;
9298 data = memp;
9299 } else {
91447636 9300 data = &stkbuf[0];
1c79356b
A
9301 };
9302
9303 if (cmd & IOC_IN) {
9304 if (size) {
b0d623f7 9305 error = copyin(udata, data, size);
fe8ab488
A
9306 if (error) {
9307 if (memp) {
9308 kfree (memp, size);
9309 }
9310 return error;
9311 }
1c79356b 9312 } else {
6d2010ae
A
9313 if (is64bit) {
9314 *(user_addr_t *)data = udata;
9315 }
9316 else {
9317 *(uint32_t *)data = (uint32_t)udata;
9318 }
1c79356b
A
9319 };
9320 } else if ((cmd & IOC_OUT) && size) {
9321 /*
9322 * Zero the buffer so the user always
9323 * gets back something deterministic.
9324 */
9325 bzero(data, size);
91447636 9326 } else if (cmd & IOC_VOID) {
b0d623f7 9327 if (is64bit) {
6d2010ae 9328 *(user_addr_t *)data = udata;
b0d623f7
A
9329 }
9330 else {
6d2010ae 9331 *(uint32_t *)data = (uint32_t)udata;
b0d623f7 9332 }
91447636 9333 }
1c79356b 9334
b0d623f7 9335 /* Check to see if it's a generic command */
fe8ab488 9336 switch (IOCBASECMD(cmd)) {
91447636 9337
fe8ab488
A
9338 case FSCTL_SYNC_VOLUME: {
9339 mount_t mp = vp->v_mount;
9340 int arg = *(uint32_t*)data;
b0d623f7 9341
fe8ab488
A
9342 /* record vid of vp so we can drop it below. */
9343 uint32_t vvid = vp->v_id;
b0d623f7 9344
fe8ab488
A
9345 /*
9346 * Then grab mount_iterref so that we can release the vnode.
9347 * Without this, a thread may call vnode_iterate_prepare then
9348 * get into a deadlock because we've never released the root vp
9349 */
9350 error = mount_iterref (mp, 0);
9351 if (error) {
9352 break;
9353 }
9354 vnode_put(vp);
9355
9356 /* issue the sync for this volume */
9357 (void)sync_callback(mp, (arg & FSCTL_SYNC_WAIT) ? &arg : NULL);
9358
9359 /*
9360 * Then release the mount_iterref once we're done syncing; it's not
9361 * needed for the VNOP_IOCTL below
9362 */
9363 mount_iterdrop(mp);
9364
9365 if (arg & FSCTL_SYNC_FULLSYNC) {
9366 /* re-obtain vnode iocount on the root vp, if possible */
9367 error = vnode_getwithvid (vp, vvid);
9368 if (error == 0) {
9369 error = VNOP_IOCTL(vp, F_FULLFSYNC, (caddr_t)NULL, 0, ctx);
9370 vnode_put (vp);
9371 }
b0d623f7 9372 }
fe8ab488
A
9373 /* mark the argument VP as having been released */
9374 *arg_vp = NULL;
b0d623f7 9375 }
fe8ab488 9376 break;
b0d623f7 9377
fe8ab488
A
9378 case FSCTL_SET_PACKAGE_EXTS: {
9379 user_addr_t ext_strings;
9380 uint32_t num_entries;
9381 uint32_t max_width;
b0d623f7 9382
fe8ab488
A
9383 if ( (is64bit && size != sizeof(user64_package_ext_info))
9384 || (is64bit == 0 && size != sizeof(user32_package_ext_info))) {
9385
9386 // either you're 64-bit and passed a 64-bit struct or
9387 // you're 32-bit and passed a 32-bit struct. otherwise
9388 // it's not ok.
9389 error = EINVAL;
9390 break;
9391 }
9392
9393 if (is64bit) {
9394 ext_strings = ((user64_package_ext_info *)data)->strings;
9395 num_entries = ((user64_package_ext_info *)data)->num_entries;
9396 max_width = ((user64_package_ext_info *)data)->max_width;
9397 } else {
9398 ext_strings = CAST_USER_ADDR_T(((user32_package_ext_info *)data)->strings);
9399 num_entries = ((user32_package_ext_info *)data)->num_entries;
9400 max_width = ((user32_package_ext_info *)data)->max_width;
9401 }
9402 error = set_package_extensions_table(ext_strings, num_entries, max_width);
6d2010ae 9403 }
fe8ab488 9404 break;
2d21ac55 9405
fe8ab488
A
9406 /* namespace handlers */
9407 case FSCTL_NAMESPACE_HANDLER_GET: {
9408 error = process_namespace_fsctl(NSPACE_HANDLER_NSPACE, is64bit, size, data);
6d2010ae 9409 }
fe8ab488 9410 break;
b0d623f7 9411
fe8ab488
A
9412 /* Snapshot handlers */
9413 case FSCTL_OLD_SNAPSHOT_HANDLER_GET: {
9414 error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
9415 }
9416 break;
39236c6e 9417
fe8ab488
A
9418 case FSCTL_SNAPSHOT_HANDLER_GET_EXT: {
9419 error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
9420 }
9421 break;
39236c6e 9422
fe8ab488
A
9423 case FSCTL_NAMESPACE_HANDLER_UPDATE: {
9424 uint32_t token, val;
9425 int i;
39236c6e 9426
fe8ab488
A
9427 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
9428 break;
9429 }
39236c6e 9430
fe8ab488
A
9431 if (!nspace_is_special_process(p)) {
9432 error = EINVAL;
9433 break;
9434 }
6d2010ae 9435
fe8ab488
A
9436 token = ((uint32_t *)data)[0];
9437 val = ((uint32_t *)data)[1];
6d2010ae 9438
fe8ab488 9439 lck_mtx_lock(&nspace_handler_lock);
6d2010ae 9440
fe8ab488
A
9441 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9442 if (nspace_items[i].token == token) {
9443 break; /* exit for loop, not case stmt */
9444 }
9445 }
6d2010ae 9446
fe8ab488
A
9447 if (i >= MAX_NSPACE_ITEMS) {
9448 error = ENOENT;
9449 } else {
9450 //
9451 // if this bit is set, when resolve_nspace_item() times out
9452 // it will loop and go back to sleep.
9453 //
9454 nspace_items[i].flags |= NSPACE_ITEM_RESET_TIMER;
9455 }
6d2010ae 9456
fe8ab488
A
9457 lck_mtx_unlock(&nspace_handler_lock);
9458
9459 if (error) {
9460 printf("nspace-handler-update: did not find token %u\n", token);
9461 }
9462 }
9463 break;
9464
9465 case FSCTL_NAMESPACE_HANDLER_UNBLOCK: {
9466 uint32_t token, val;
9467 int i;
9468
9469 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
6d2010ae
A
9470 break;
9471 }
6d2010ae 9472
fe8ab488
A
9473 if (!nspace_is_special_process(p)) {
9474 error = EINVAL;
9475 break;
9476 }
6d2010ae 9477
fe8ab488
A
9478 token = ((uint32_t *)data)[0];
9479 val = ((uint32_t *)data)[1];
6d2010ae 9480
fe8ab488 9481 lck_mtx_lock(&nspace_handler_lock);
6d2010ae 9482
fe8ab488
A
9483 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9484 if (nspace_items[i].token == token) {
9485 break; /* exit for loop, not case statement */
9486 }
9487 }
6d2010ae 9488
fe8ab488
A
9489 if (i >= MAX_NSPACE_ITEMS) {
9490 printf("nspace-handler-unblock: did not find token %u\n", token);
9491 error = ENOENT;
9492 } else {
9493 if (val == 0 && nspace_items[i].vp) {
9494 vnode_lock_spin(nspace_items[i].vp);
9495 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
9496 vnode_unlock(nspace_items[i].vp);
9497 }
6d2010ae 9498
fe8ab488
A
9499 nspace_items[i].vp = NULL;
9500 nspace_items[i].arg = NULL;
9501 nspace_items[i].op = 0;
9502 nspace_items[i].vid = 0;
9503 nspace_items[i].flags = NSPACE_ITEM_DONE;
9504 nspace_items[i].token = 0;
6d2010ae 9505
fe8ab488
A
9506 wakeup((caddr_t)&(nspace_items[i].vp));
9507 }
9508
9509 lck_mtx_unlock(&nspace_handler_lock);
9510 }
9511 break;
6d2010ae 9512
fe8ab488
A
9513 case FSCTL_NAMESPACE_HANDLER_CANCEL: {
9514 uint32_t token, val;
9515 int i;
6d2010ae 9516
fe8ab488 9517 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
6d2010ae
A
9518 break;
9519 }
6d2010ae 9520
fe8ab488
A
9521 if (!nspace_is_special_process(p)) {
9522 error = EINVAL;
9523 break;
6d2010ae
A
9524 }
9525
fe8ab488
A
9526 token = ((uint32_t *)data)[0];
9527 val = ((uint32_t *)data)[1];
6d2010ae 9528
fe8ab488 9529 lck_mtx_lock(&nspace_handler_lock);
6d2010ae 9530
fe8ab488
A
9531 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9532 if (nspace_items[i].token == token) {
9533 break; /* exit for loop, not case stmt */
9534 }
9535 }
6d2010ae 9536
fe8ab488
A
9537 if (i >= MAX_NSPACE_ITEMS) {
9538 printf("nspace-handler-cancel: did not find token %u\n", token);
9539 error = ENOENT;
9540 } else {
9541 if (nspace_items[i].vp) {
9542 vnode_lock_spin(nspace_items[i].vp);
9543 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
9544 vnode_unlock(nspace_items[i].vp);
9545 }
6d2010ae 9546
fe8ab488
A
9547 nspace_items[i].vp = NULL;
9548 nspace_items[i].arg = NULL;
9549 nspace_items[i].vid = 0;
9550 nspace_items[i].token = val;
9551 nspace_items[i].flags &= ~NSPACE_ITEM_PROCESSING;
9552 nspace_items[i].flags |= NSPACE_ITEM_CANCELLED;
6d2010ae 9553
fe8ab488
A
9554 wakeup((caddr_t)&(nspace_items[i].vp));
9555 }
6d2010ae 9556
fe8ab488
A
9557 lck_mtx_unlock(&nspace_handler_lock);
9558 }
9559 break;
6d2010ae 9560
fe8ab488
A
9561 case FSCTL_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME: {
9562 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
6d2010ae
A
9563 break;
9564 }
6d2010ae 9565
fe8ab488 9566 // we explicitly do not do the namespace_handler_proc check here
6d2010ae 9567
fe8ab488
A
9568 lck_mtx_lock(&nspace_handler_lock);
9569 snapshot_timestamp = ((uint32_t *)data)[0];
9570 wakeup(&nspace_item_idx);
9571 lck_mtx_unlock(&nspace_handler_lock);
9572 printf("nspace-handler-set-snapshot-time: %d\n", (int)snapshot_timestamp);
6d2010ae 9573
fe8ab488
A
9574 }
9575 break;
6d2010ae 9576
fe8ab488
A
9577 case FSCTL_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS:
9578 {
9579 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
9580 break;
9581 }
6d2010ae 9582
fe8ab488
A
9583 lck_mtx_lock(&nspace_handler_lock);
9584 nspace_allow_virtual_devs = ((uint32_t *)data)[0];
9585 lck_mtx_unlock(&nspace_handler_lock);
9586 printf("nspace-snapshot-handler will%s allow events on disk-images\n",
9587 nspace_allow_virtual_devs ? "" : " NOT");
9588 error = 0;
6d2010ae 9589
6d2010ae 9590 }
fe8ab488 9591 break;
6d2010ae 9592
fe8ab488
A
9593 case FSCTL_SET_FSTYPENAME_OVERRIDE:
9594 {
9595 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
9596 break;
9597 }
9598 if (vp->v_mount) {
9599 mount_lock(vp->v_mount);
9600 if (data[0] != 0) {
9601 strlcpy(&vp->v_mount->fstypename_override[0], data, MFSTYPENAMELEN);
9602 vp->v_mount->mnt_kern_flag |= MNTK_TYPENAME_OVERRIDE;
9603 if (vfs_isrdonly(vp->v_mount) && strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
9604 vp->v_mount->mnt_kern_flag |= MNTK_EXTENDED_SECURITY;
9605 vp->v_mount->mnt_kern_flag &= ~MNTK_AUTH_OPAQUE;
9606 }
9607 } else {
9608 if (strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
9609 vp->v_mount->mnt_kern_flag &= ~MNTK_EXTENDED_SECURITY;
9610 }
9611 vp->v_mount->mnt_kern_flag &= ~MNTK_TYPENAME_OVERRIDE;
9612 vp->v_mount->fstypename_override[0] = '\0';
6d2010ae 9613 }
fe8ab488 9614 mount_unlock(vp->v_mount);
6d2010ae 9615 }
6d2010ae 9616 }
fe8ab488
A
9617 break;
9618
9619 default: {
9620 /* Invoke the filesystem-specific code */
9621 error = VNOP_IOCTL(vp, IOCBASECMD(cmd), data, options, ctx);
9622 }
9623
9624 } /* end switch stmt */
9625
1c79356b 9626 /*
fe8ab488 9627 * if no errors, copy any data to user. Size was
1c79356b
A
9628 * already set and checked above.
9629 */
91447636 9630 if (error == 0 && (cmd & IOC_OUT) && size)
b0d623f7 9631 error = copyout(data, udata, size);
1c79356b 9632
fe8ab488
A
9633 if (memp) {
9634 kfree(memp, size);
9635 }
1c79356b
A
9636
9637 return error;
9638}
b0d623f7
A
9639
9640/* ARGSUSED */
9641int
9642fsctl (proc_t p, struct fsctl_args *uap, __unused int32_t *retval)
9643{
9644 int error;
9645 struct nameidata nd;
9646 u_long nameiflags;
9647 vnode_t vp = NULL;
9648 vfs_context_t ctx = vfs_context_current();
9649
9650 AUDIT_ARG(cmd, uap->cmd);
9651 AUDIT_ARG(value32, uap->options);
9652 /* Get the vnode for the file we are getting info on: */
9653 nameiflags = 0;
9654 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
6d2010ae
A
9655 NDINIT(&nd, LOOKUP, OP_FSCTL, nameiflags | AUDITVNPATH1,
9656 UIO_USERSPACE, uap->path, ctx);
b0d623f7
A
9657 if ((error = namei(&nd))) goto done;
9658 vp = nd.ni_vp;
9659 nameidone(&nd);
9660
9661#if CONFIG_MACF
9662 error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd);
9663 if (error) {
9664 goto done;
9665 }
9666#endif
9667
9668 error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
9669
9670done:
9671 if (vp)
9672 vnode_put(vp);
9673 return error;
9674}
9675/* ARGSUSED */
9676int
9677ffsctl (proc_t p, struct ffsctl_args *uap, __unused int32_t *retval)
9678{
9679 int error;
9680 vnode_t vp = NULL;
9681 vfs_context_t ctx = vfs_context_current();
9682 int fd = -1;
9683
9684 AUDIT_ARG(fd, uap->fd);
9685 AUDIT_ARG(cmd, uap->cmd);
9686 AUDIT_ARG(value32, uap->options);
9687
9688 /* Get the vnode for the file we are getting info on: */
9689 if ((error = file_vnode(uap->fd, &vp)))
3e170ce0 9690 return error;
b0d623f7
A
9691 fd = uap->fd;
9692 if ((error = vnode_getwithref(vp))) {
3e170ce0
A
9693 file_drop(fd);
9694 return error;
b0d623f7
A
9695 }
9696
9697#if CONFIG_MACF
3e170ce0
A
9698 if ((error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd))) {
9699 file_drop(fd);
9700 vnode_put(vp);
9701 return error;
b0d623f7
A
9702 }
9703#endif
9704
9705 error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
9706
3e170ce0 9707 file_drop(fd);
b0d623f7 9708
3e170ce0
A
9709 /*validate vp; fsctl_internal() can drop iocount and reset vp to NULL*/
9710 if (vp) {
b0d623f7 9711 vnode_put(vp);
3e170ce0
A
9712 }
9713
b0d623f7
A
9714 return error;
9715}
1c79356b 9716/* end of fsctl system call */
0b4e3aa0 9717
91447636
A
9718/*
9719 * Retrieve the data of an extended attribute.
9720 */
9721int
2d21ac55 9722getxattr(proc_t p, struct getxattr_args *uap, user_ssize_t *retval)
91447636 9723{
2d21ac55 9724 vnode_t vp;
91447636
A
9725 struct nameidata nd;
9726 char attrname[XATTR_MAXNAMELEN+1];
2d21ac55 9727 vfs_context_t ctx = vfs_context_current();
91447636
A
9728 uio_t auio = NULL;
9729 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9730 size_t attrsize = 0;
9731 size_t namelen;
b0d623f7 9732 u_int32_t nameiflags;
91447636
A
9733 int error;
9734 char uio_buf[ UIO_SIZEOF(1) ];
55e303ae 9735
2d21ac55 9736 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636 9737 return (EINVAL);
55e303ae 9738
91447636 9739 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
6d2010ae 9740 NDINIT(&nd, LOOKUP, OP_GETXATTR, nameiflags, spacetype, uap->path, ctx);
91447636
A
9741 if ((error = namei(&nd))) {
9742 return (error);
9743 }
9744 vp = nd.ni_vp;
9745 nameidone(&nd);
55e303ae 9746
91447636
A
9747 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
9748 goto out;
9749 }
9750 if (xattr_protected(attrname)) {
6d2010ae
A
9751 if (!vfs_context_issuser(ctx) || strcmp(attrname, "com.apple.system.Security") != 0) {
9752 error = EPERM;
9753 goto out;
9754 }
91447636 9755 }
b0d623f7
A
9756 /*
9757 * the specific check for 0xffffffff is a hack to preserve
9758 * binaray compatibilty in K64 with applications that discovered
9759 * that passing in a buf pointer and a size of -1 resulted in
9760 * just the size of the indicated extended attribute being returned.
9761 * this isn't part of the documented behavior, but because of the
9762 * original implemtation's check for "uap->size > 0", this behavior
9763 * was allowed. In K32 that check turned into a signed comparison
9764 * even though uap->size is unsigned... in K64, we blow by that
9765 * check because uap->size is unsigned and doesn't get sign smeared
9766 * in the munger for a 32 bit user app. we also need to add a
9767 * check to limit the maximum size of the buffer being passed in...
9768 * unfortunately, the underlying fileystems seem to just malloc
9769 * the requested size even if the actual extended attribute is tiny.
9770 * because that malloc is for kernel wired memory, we have to put a
9771 * sane limit on it.
9772 *
9773 * U32 running on K64 will yield 0x00000000ffffffff for uap->size
9774 * U64 running on K64 will yield -1 (64 bits wide)
9775 * U32/U64 running on K32 will yield -1 (32 bits wide)
9776 */
9777 if (uap->size == 0xffffffff || uap->size == (size_t)-1)
9778 goto no_uio;
9779
b0d623f7 9780 if (uap->value) {
6d2010ae
A
9781 if (uap->size > (size_t)XATTR_MAXSIZE)
9782 uap->size = XATTR_MAXSIZE;
9783
91447636
A
9784 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
9785 &uio_buf[0], sizeof(uio_buf));
9786 uio_addiov(auio, uap->value, uap->size);
9787 }
b0d623f7 9788no_uio:
2d21ac55 9789 error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, ctx);
91447636
A
9790out:
9791 vnode_put(vp);
55e303ae 9792
91447636
A
9793 if (auio) {
9794 *retval = uap->size - uio_resid(auio);
9795 } else {
9796 *retval = (user_ssize_t)attrsize;
55e303ae
A
9797 }
9798
91447636
A
9799 return (error);
9800}
55e303ae 9801
91447636
A
9802/*
9803 * Retrieve the data of an extended attribute.
9804 */
9805int
2d21ac55 9806fgetxattr(proc_t p, struct fgetxattr_args *uap, user_ssize_t *retval)
91447636 9807{
2d21ac55 9808 vnode_t vp;
91447636 9809 char attrname[XATTR_MAXNAMELEN+1];
91447636
A
9810 uio_t auio = NULL;
9811 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9812 size_t attrsize = 0;
9813 size_t namelen;
9814 int error;
9815 char uio_buf[ UIO_SIZEOF(1) ];
55e303ae 9816
2d21ac55 9817 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636 9818 return (EINVAL);
55e303ae 9819
91447636
A
9820 if ( (error = file_vnode(uap->fd, &vp)) ) {
9821 return (error);
9822 }
9823 if ( (error = vnode_getwithref(vp)) ) {
9824 file_drop(uap->fd);
9825 return(error);
9826 }
9827 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
9828 goto out;
9829 }
9830 if (xattr_protected(attrname)) {
9831 error = EPERM;
9832 goto out;
9833 }
9834 if (uap->value && uap->size > 0) {
9835 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
9836 &uio_buf[0], sizeof(uio_buf));
9837 uio_addiov(auio, uap->value, uap->size);
9838 }
55e303ae 9839
2d21ac55 9840 error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, vfs_context_current());
91447636
A
9841out:
9842 (void)vnode_put(vp);
9843 file_drop(uap->fd);
55e303ae 9844
91447636
A
9845 if (auio) {
9846 *retval = uap->size - uio_resid(auio);
9847 } else {
9848 *retval = (user_ssize_t)attrsize;
9849 }
9850 return (error);
9851}
55e303ae 9852
91447636
A
9853/*
9854 * Set the data of an extended attribute.
9855 */
55e303ae 9856int
2d21ac55 9857setxattr(proc_t p, struct setxattr_args *uap, int *retval)
55e303ae 9858{
2d21ac55 9859 vnode_t vp;
91447636
A
9860 struct nameidata nd;
9861 char attrname[XATTR_MAXNAMELEN+1];
2d21ac55 9862 vfs_context_t ctx = vfs_context_current();
91447636
A
9863 uio_t auio = NULL;
9864 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9865 size_t namelen;
b0d623f7 9866 u_int32_t nameiflags;
91447636
A
9867 int error;
9868 char uio_buf[ UIO_SIZEOF(1) ];
55e303ae 9869
2d21ac55 9870 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636 9871 return (EINVAL);
55e303ae 9872
91447636 9873 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
6d2010ae
A
9874 if (error == EPERM) {
9875 /* if the string won't fit in attrname, copyinstr emits EPERM */
9876 return (ENAMETOOLONG);
9877 }
9878 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
9879 return error;
91447636
A
9880 }
9881 if (xattr_protected(attrname))
9882 return(EPERM);
2d21ac55 9883 if (uap->size != 0 && uap->value == 0) {
91447636 9884 return (EINVAL);
55e303ae 9885 }
55e303ae 9886
91447636 9887 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
6d2010ae 9888 NDINIT(&nd, LOOKUP, OP_SETXATTR, nameiflags, spacetype, uap->path, ctx);
91447636
A
9889 if ((error = namei(&nd))) {
9890 return (error);
9891 }
9892 vp = nd.ni_vp;
9893 nameidone(&nd);
55e303ae 9894
91447636
A
9895 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
9896 &uio_buf[0], sizeof(uio_buf));
9897 uio_addiov(auio, uap->value, uap->size);
55e303ae 9898
2d21ac55
A
9899 error = vn_setxattr(vp, attrname, auio, uap->options, ctx);
9900#if CONFIG_FSE
9901 if (error == 0) {
9902 add_fsevent(FSE_XATTR_MODIFIED, ctx,
9903 FSE_ARG_VNODE, vp,
9904 FSE_ARG_DONE);
9905 }
9906#endif
91447636
A
9907 vnode_put(vp);
9908 *retval = 0;
9909 return (error);
9910}
55e303ae 9911
91447636
A
9912/*
9913 * Set the data of an extended attribute.
9914 */
9915int
2d21ac55 9916fsetxattr(proc_t p, struct fsetxattr_args *uap, int *retval)
91447636 9917{
2d21ac55 9918 vnode_t vp;
91447636 9919 char attrname[XATTR_MAXNAMELEN+1];
91447636
A
9920 uio_t auio = NULL;
9921 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9922 size_t namelen;
9923 int error;
9924 char uio_buf[ UIO_SIZEOF(1) ];
6d2010ae 9925#if CONFIG_FSE
2d21ac55 9926 vfs_context_t ctx = vfs_context_current();
6d2010ae 9927#endif
55e303ae 9928
2d21ac55 9929 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636 9930 return (EINVAL);
55e303ae 9931
91447636 9932 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
3e170ce0
A
9933 if (error == EPERM) {
9934 /* if the string won't fit in attrname, copyinstr emits EPERM */
9935 return (ENAMETOOLONG);
9936 }
9937 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
9938 return error;
55e303ae 9939 }
91447636
A
9940 if (xattr_protected(attrname))
9941 return(EPERM);
2d21ac55 9942 if (uap->size != 0 && uap->value == 0) {
91447636 9943 return (EINVAL);
55e303ae 9944 }
91447636
A
9945 if ( (error = file_vnode(uap->fd, &vp)) ) {
9946 return (error);
55e303ae 9947 }
91447636
A
9948 if ( (error = vnode_getwithref(vp)) ) {
9949 file_drop(uap->fd);
9950 return(error);
9951 }
9952 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
9953 &uio_buf[0], sizeof(uio_buf));
9954 uio_addiov(auio, uap->value, uap->size);
91447636 9955
2d21ac55
A
9956 error = vn_setxattr(vp, attrname, auio, uap->options, vfs_context_current());
9957#if CONFIG_FSE
9958 if (error == 0) {
9959 add_fsevent(FSE_XATTR_MODIFIED, ctx,
9960 FSE_ARG_VNODE, vp,
9961 FSE_ARG_DONE);
9962 }
9963#endif
91447636
A
9964 vnode_put(vp);
9965 file_drop(uap->fd);
9966 *retval = 0;
9967 return (error);
9968}
55e303ae 9969
91447636
A
9970/*
9971 * Remove an extended attribute.
b0d623f7 9972 * XXX Code duplication here.
91447636 9973 */
91447636 9974int
2d21ac55 9975removexattr(proc_t p, struct removexattr_args *uap, int *retval)
91447636 9976{
2d21ac55 9977 vnode_t vp;
91447636
A
9978 struct nameidata nd;
9979 char attrname[XATTR_MAXNAMELEN+1];
9980 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
2d21ac55 9981 vfs_context_t ctx = vfs_context_current();
91447636 9982 size_t namelen;
b0d623f7 9983 u_int32_t nameiflags;
91447636 9984 int error;
55e303ae 9985
2d21ac55 9986 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636 9987 return (EINVAL);
55e303ae 9988
91447636
A
9989 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
9990 if (error != 0) {
9991 return (error);
9992 }
9993 if (xattr_protected(attrname))
9994 return(EPERM);
9995 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
6d2010ae 9996 NDINIT(&nd, LOOKUP, OP_REMOVEXATTR, nameiflags, spacetype, uap->path, ctx);
91447636
A
9997 if ((error = namei(&nd))) {
9998 return (error);
9999 }
10000 vp = nd.ni_vp;
10001 nameidone(&nd);
55e303ae 10002
2d21ac55
A
10003 error = vn_removexattr(vp, attrname, uap->options, ctx);
10004#if CONFIG_FSE
10005 if (error == 0) {
10006 add_fsevent(FSE_XATTR_REMOVED, ctx,
10007 FSE_ARG_VNODE, vp,
10008 FSE_ARG_DONE);
10009 }
10010#endif
91447636
A
10011 vnode_put(vp);
10012 *retval = 0;
10013 return (error);
55e303ae
A
10014}
10015
91447636
A
10016/*
10017 * Remove an extended attribute.
b0d623f7 10018 * XXX Code duplication here.
91447636 10019 */
91447636 10020int
2d21ac55 10021fremovexattr(__unused proc_t p, struct fremovexattr_args *uap, int *retval)
55e303ae 10022{
2d21ac55 10023 vnode_t vp;
91447636 10024 char attrname[XATTR_MAXNAMELEN+1];
91447636
A
10025 size_t namelen;
10026 int error;
6d2010ae 10027#if CONFIG_FSE
2d21ac55 10028 vfs_context_t ctx = vfs_context_current();
6d2010ae 10029#endif
55e303ae 10030
2d21ac55 10031 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636
A
10032 return (EINVAL);
10033
10034 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
10035 if (error != 0) {
10036 return (error);
10037 }
10038 if (xattr_protected(attrname))
10039 return(EPERM);
10040 if ( (error = file_vnode(uap->fd, &vp)) ) {
10041 return (error);
10042 }
10043 if ( (error = vnode_getwithref(vp)) ) {
10044 file_drop(uap->fd);
10045 return(error);
10046 }
4a249263 10047
2d21ac55
A
10048 error = vn_removexattr(vp, attrname, uap->options, vfs_context_current());
10049#if CONFIG_FSE
10050 if (error == 0) {
10051 add_fsevent(FSE_XATTR_REMOVED, ctx,
10052 FSE_ARG_VNODE, vp,
10053 FSE_ARG_DONE);
10054 }
10055#endif
91447636
A
10056 vnode_put(vp);
10057 file_drop(uap->fd);
10058 *retval = 0;
10059 return (error);
55e303ae
A
10060}
10061
91447636
A
10062/*
10063 * Retrieve the list of extended attribute names.
b0d623f7 10064 * XXX Code duplication here.
91447636 10065 */
91447636 10066int
2d21ac55 10067listxattr(proc_t p, struct listxattr_args *uap, user_ssize_t *retval)
55e303ae 10068{
2d21ac55 10069 vnode_t vp;
91447636 10070 struct nameidata nd;
2d21ac55 10071 vfs_context_t ctx = vfs_context_current();
91447636
A
10072 uio_t auio = NULL;
10073 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10074 size_t attrsize = 0;
b0d623f7 10075 u_int32_t nameiflags;
91447636
A
10076 int error;
10077 char uio_buf[ UIO_SIZEOF(1) ];
4a249263 10078
2d21ac55 10079 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636 10080 return (EINVAL);
55e303ae 10081
fe8ab488 10082 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
6d2010ae 10083 NDINIT(&nd, LOOKUP, OP_LISTXATTR, nameiflags, spacetype, uap->path, ctx);
91447636
A
10084 if ((error = namei(&nd))) {
10085 return (error);
10086 }
10087 vp = nd.ni_vp;
10088 nameidone(&nd);
10089 if (uap->namebuf != 0 && uap->bufsize > 0) {
6d2010ae
A
10090 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
10091 &uio_buf[0], sizeof(uio_buf));
91447636
A
10092 uio_addiov(auio, uap->namebuf, uap->bufsize);
10093 }
55e303ae 10094
2d21ac55 10095 error = vn_listxattr(vp, auio, &attrsize, uap->options, ctx);
55e303ae 10096
91447636
A
10097 vnode_put(vp);
10098 if (auio) {
10099 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
10100 } else {
10101 *retval = (user_ssize_t)attrsize;
10102 }
10103 return (error);
55e303ae
A
10104}
10105
91447636
A
10106/*
10107 * Retrieve the list of extended attribute names.
b0d623f7 10108 * XXX Code duplication here.
91447636 10109 */
55e303ae 10110int
2d21ac55 10111flistxattr(proc_t p, struct flistxattr_args *uap, user_ssize_t *retval)
55e303ae 10112{
2d21ac55 10113 vnode_t vp;
91447636
A
10114 uio_t auio = NULL;
10115 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10116 size_t attrsize = 0;
10117 int error;
10118 char uio_buf[ UIO_SIZEOF(1) ];
10119
2d21ac55 10120 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
91447636
A
10121 return (EINVAL);
10122
10123 if ( (error = file_vnode(uap->fd, &vp)) ) {
10124 return (error);
10125 }
10126 if ( (error = vnode_getwithref(vp)) ) {
10127 file_drop(uap->fd);
10128 return(error);
10129 }
10130 if (uap->namebuf != 0 && uap->bufsize > 0) {
91447636
A
10131 auio = uio_createwithbuffer(1, 0, spacetype,
10132 UIO_READ, &uio_buf[0], sizeof(uio_buf));
10133 uio_addiov(auio, uap->namebuf, uap->bufsize);
10134 }
91447636 10135
2d21ac55 10136 error = vn_listxattr(vp, auio, &attrsize, uap->options, vfs_context_current());
55e303ae 10137
91447636
A
10138 vnode_put(vp);
10139 file_drop(uap->fd);
10140 if (auio) {
10141 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
10142 } else {
10143 *retval = (user_ssize_t)attrsize;
10144 }
10145 return (error);
55e303ae 10146}
4a249263 10147
fe8ab488
A
10148static int fsgetpath_internal(
10149 vfs_context_t ctx, int volfs_id, uint64_t objid,
10150 vm_size_t bufsize, caddr_t buf, int *pathlen)
b0d623f7 10151{
fe8ab488 10152 int error;
b0d623f7 10153 struct mount *mp = NULL;
fe8ab488 10154 vnode_t vp;
b0d623f7 10155 int length;
fe8ab488 10156 int bpflags;
b0d623f7 10157
fe8ab488 10158 if (bufsize > PAGE_SIZE) {
b0d623f7 10159 return (EINVAL);
fe8ab488
A
10160 }
10161
10162 if (buf == NULL) {
b0d623f7
A
10163 return (ENOMEM);
10164 }
fe8ab488
A
10165
10166 if ((mp = mount_lookupby_volfsid(volfs_id, 1)) == NULL) {
b0d623f7 10167 error = ENOTSUP; /* unexpected failure */
fe8ab488 10168 return ENOTSUP;
b0d623f7 10169 }
fe8ab488 10170
39236c6e 10171unionget:
fe8ab488 10172 if (objid == 2) {
b0d623f7
A
10173 error = VFS_ROOT(mp, &vp, ctx);
10174 } else {
fe8ab488 10175 error = VFS_VGET(mp, (ino64_t)objid, &vp, ctx);
b0d623f7 10176 }
39236c6e
A
10177
10178 if (error == ENOENT && (mp->mnt_flag & MNT_UNION)) {
10179 /*
10180 * If the fileid isn't found and we're in a union
10181 * mount volume, then see if the fileid is in the
10182 * mounted-on volume.
10183 */
10184 struct mount *tmp = mp;
10185 mp = vnode_mount(tmp->mnt_vnodecovered);
10186 vfs_unbusy(tmp);
10187 if (vfs_busy(mp, LK_NOWAIT) == 0)
10188 goto unionget;
fe8ab488 10189 } else {
39236c6e 10190 vfs_unbusy(mp);
fe8ab488 10191 }
39236c6e 10192
b0d623f7 10193 if (error) {
fe8ab488 10194 return error;
b0d623f7 10195 }
fe8ab488 10196
6d2010ae
A
10197#if CONFIG_MACF
10198 error = mac_vnode_check_fsgetpath(ctx, vp);
10199 if (error) {
10200 vnode_put(vp);
fe8ab488 10201 return error;
6d2010ae
A
10202 }
10203#endif
fe8ab488 10204
b0d623f7
A
10205 /* Obtain the absolute path to this vnode. */
10206 bpflags = vfs_context_suser(ctx) ? BUILDPATH_CHECKACCESS : 0;
316670eb 10207 bpflags |= BUILDPATH_CHECK_MOVED;
fe8ab488 10208 error = build_path(vp, buf, bufsize, &length, bpflags, ctx);
b0d623f7 10209 vnode_put(vp);
fe8ab488 10210
b0d623f7
A
10211 if (error) {
10212 goto out;
10213 }
fe8ab488
A
10214
10215 AUDIT_ARG(text, buf);
39236c6e
A
10216
10217 if (kdebug_enable) {
10218 long dbg_parms[NUMPARMS];
10219 int dbg_namelen;
10220
10221 dbg_namelen = (int)sizeof(dbg_parms);
10222
fe8ab488
A
10223 if (length < dbg_namelen) {
10224 memcpy((char *)dbg_parms, buf, length);
39236c6e
A
10225 memset((char *)dbg_parms + length, 0, dbg_namelen - length);
10226
10227 dbg_namelen = length;
fe8ab488
A
10228 } else {
10229 memcpy((char *)dbg_parms, buf + (length - dbg_namelen), dbg_namelen);
10230 }
39236c6e
A
10231
10232 kdebug_lookup_gen_events(dbg_parms, dbg_namelen, (void *)vp, TRUE);
10233 }
fe8ab488
A
10234
10235 *pathlen = (user_ssize_t)length; /* may be superseded by error */
10236
10237out:
10238 return (error);
10239}
10240
10241/*
10242 * Obtain the full pathname of a file system object by id.
10243 *
10244 * This is a private SPI used by the File Manager.
10245 */
10246__private_extern__
10247int
10248fsgetpath(__unused proc_t p, struct fsgetpath_args *uap, user_ssize_t *retval)
10249{
10250 vfs_context_t ctx = vfs_context_current();
10251 fsid_t fsid;
10252 char *realpath;
10253 int length;
10254 int error;
10255
10256 if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
10257 return (error);
10258 }
10259 AUDIT_ARG(value32, fsid.val[0]);
10260 AUDIT_ARG(value64, uap->objid);
10261 /* Restrict output buffer size for now. */
10262
10263 if (uap->bufsize > PAGE_SIZE) {
10264 return (EINVAL);
10265 }
10266 MALLOC(realpath, char *, uap->bufsize, M_TEMP, M_WAITOK);
10267 if (realpath == NULL) {
10268 return (ENOMEM);
10269 }
10270
10271 error = fsgetpath_internal(
10272 ctx, fsid.val[0], uap->objid,
10273 uap->bufsize, realpath, &length);
10274
10275 if (error) {
10276 goto out;
10277 }
10278
b0d623f7
A
10279 error = copyout((caddr_t)realpath, uap->buf, length);
10280
10281 *retval = (user_ssize_t)length; /* may be superseded by error */
10282out:
10283 if (realpath) {
10284 FREE(realpath, M_TEMP);
10285 }
10286 return (error);
10287}
10288
91447636
A
10289/*
10290 * Common routine to handle various flavors of statfs data heading out
10291 * to user space.
2d21ac55
A
10292 *
10293 * Returns: 0 Success
10294 * EFAULT
91447636
A
10295 */
10296static int
10297munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
10298 user_addr_t bufp, int *sizep, boolean_t is_64_bit,
10299 boolean_t partial_copy)
4a249263 10300{
91447636
A
10301 int error;
10302 int my_size, copy_size;
10303
10304 if (is_64_bit) {
b0d623f7 10305 struct user64_statfs sfs;
91447636
A
10306 my_size = copy_size = sizeof(sfs);
10307 bzero(&sfs, my_size);
10308 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
10309 sfs.f_type = mp->mnt_vtable->vfc_typenum;
10310 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
b0d623f7
A
10311 sfs.f_bsize = (user64_long_t)sfsp->f_bsize;
10312 sfs.f_iosize = (user64_long_t)sfsp->f_iosize;
10313 sfs.f_blocks = (user64_long_t)sfsp->f_blocks;
10314 sfs.f_bfree = (user64_long_t)sfsp->f_bfree;
10315 sfs.f_bavail = (user64_long_t)sfsp->f_bavail;
10316 sfs.f_files = (user64_long_t)sfsp->f_files;
10317 sfs.f_ffree = (user64_long_t)sfsp->f_ffree;
91447636
A
10318 sfs.f_fsid = sfsp->f_fsid;
10319 sfs.f_owner = sfsp->f_owner;
6d2010ae 10320 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
fe8ab488 10321 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
6d2010ae
A
10322 } else {
10323 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
10324 }
2d21ac55
A
10325 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
10326 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
91447636
A
10327
10328 if (partial_copy) {
10329 copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
10330 }
10331 error = copyout((caddr_t)&sfs, bufp, copy_size);
10332 }
10333 else {
b0d623f7
A
10334 struct user32_statfs sfs;
10335
91447636
A
10336 my_size = copy_size = sizeof(sfs);
10337 bzero(&sfs, my_size);
10338
10339 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
10340 sfs.f_type = mp->mnt_vtable->vfc_typenum;
10341 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
10342
10343 /*
10344 * It's possible for there to be more than 2^^31 blocks in the filesystem, so we
10345 * have to fudge the numbers here in that case. We inflate the blocksize in order
10346 * to reflect the filesystem size as best we can.
10347 */
b0d623f7 10348 if ((sfsp->f_blocks > INT_MAX)
91447636
A
10349 /* Hack for 4061702 . I think the real fix is for Carbon to
10350 * look for some volume capability and not depend on hidden
10351 * semantics agreed between a FS and carbon.
10352 * f_blocks, f_bfree, and f_bavail set to -1 is the trigger
10353 * for Carbon to set bNoVolumeSizes volume attribute.
10354 * Without this the webdavfs files cannot be copied onto
10355 * disk as they look huge. This change should not affect
10356 * XSAN as they should not setting these to -1..
10357 */
2d21ac55
A
10358 && (sfsp->f_blocks != 0xffffffffffffffffULL)
10359 && (sfsp->f_bfree != 0xffffffffffffffffULL)
10360 && (sfsp->f_bavail != 0xffffffffffffffffULL)) {
91447636
A
10361 int shift;
10362
10363 /*
10364 * Work out how far we have to shift the block count down to make it fit.
10365 * Note that it's possible to have to shift so far that the resulting
10366 * blocksize would be unreportably large. At that point, we will clip
10367 * any values that don't fit.
10368 *
10369 * For safety's sake, we also ensure that f_iosize is never reported as
10370 * being smaller than f_bsize.
10371 */
10372 for (shift = 0; shift < 32; shift++) {
b0d623f7 10373 if ((sfsp->f_blocks >> shift) <= INT_MAX)
91447636 10374 break;
b0d623f7 10375 if ((sfsp->f_bsize << (shift + 1)) > INT_MAX)
91447636
A
10376 break;
10377 }
b0d623f7
A
10378#define __SHIFT_OR_CLIP(x, s) ((((x) >> (s)) > INT_MAX) ? INT_MAX : ((x) >> (s)))
10379 sfs.f_blocks = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_blocks, shift);
10380 sfs.f_bfree = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bfree, shift);
10381 sfs.f_bavail = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bavail, shift);
91447636 10382#undef __SHIFT_OR_CLIP
b0d623f7 10383 sfs.f_bsize = (user32_long_t)(sfsp->f_bsize << shift);
91447636
A
10384 sfs.f_iosize = lmax(sfsp->f_iosize, sfsp->f_bsize);
10385 } else {
10386 /* filesystem is small enough to be reported honestly */
b0d623f7
A
10387 sfs.f_bsize = (user32_long_t)sfsp->f_bsize;
10388 sfs.f_iosize = (user32_long_t)sfsp->f_iosize;
10389 sfs.f_blocks = (user32_long_t)sfsp->f_blocks;
10390 sfs.f_bfree = (user32_long_t)sfsp->f_bfree;
10391 sfs.f_bavail = (user32_long_t)sfsp->f_bavail;
91447636 10392 }
b0d623f7
A
10393 sfs.f_files = (user32_long_t)sfsp->f_files;
10394 sfs.f_ffree = (user32_long_t)sfsp->f_ffree;
91447636
A
10395 sfs.f_fsid = sfsp->f_fsid;
10396 sfs.f_owner = sfsp->f_owner;
6d2010ae 10397 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
fe8ab488 10398 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
6d2010ae
A
10399 } else {
10400 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
10401 }
2d21ac55
A
10402 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
10403 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
91447636
A
10404
10405 if (partial_copy) {
10406 copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
10407 }
10408 error = copyout((caddr_t)&sfs, bufp, copy_size);
10409 }
4a249263 10410
91447636
A
10411 if (sizep != NULL) {
10412 *sizep = my_size;
10413 }
10414 return(error);
10415}
10416
10417/*
10418 * copy stat structure into user_stat structure.
10419 */
b0d623f7 10420void munge_user64_stat(struct stat *sbp, struct user64_stat *usbp)
91447636 10421{
b0d623f7
A
10422 bzero(usbp, sizeof(*usbp));
10423
10424 usbp->st_dev = sbp->st_dev;
10425 usbp->st_ino = sbp->st_ino;
10426 usbp->st_mode = sbp->st_mode;
10427 usbp->st_nlink = sbp->st_nlink;
10428 usbp->st_uid = sbp->st_uid;
10429 usbp->st_gid = sbp->st_gid;
10430 usbp->st_rdev = sbp->st_rdev;
10431#ifndef _POSIX_C_SOURCE
10432 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
10433 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
10434 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
10435 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
10436 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
10437 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
10438#else
10439 usbp->st_atime = sbp->st_atime;
10440 usbp->st_atimensec = sbp->st_atimensec;
10441 usbp->st_mtime = sbp->st_mtime;
10442 usbp->st_mtimensec = sbp->st_mtimensec;
10443 usbp->st_ctime = sbp->st_ctime;
10444 usbp->st_ctimensec = sbp->st_ctimensec;
10445#endif
10446 usbp->st_size = sbp->st_size;
10447 usbp->st_blocks = sbp->st_blocks;
10448 usbp->st_blksize = sbp->st_blksize;
10449 usbp->st_flags = sbp->st_flags;
10450 usbp->st_gen = sbp->st_gen;
10451 usbp->st_lspare = sbp->st_lspare;
10452 usbp->st_qspare[0] = sbp->st_qspare[0];
10453 usbp->st_qspare[1] = sbp->st_qspare[1];
10454}
10455
10456void munge_user32_stat(struct stat *sbp, struct user32_stat *usbp)
10457{
10458 bzero(usbp, sizeof(*usbp));
0c530ab8 10459
91447636
A
10460 usbp->st_dev = sbp->st_dev;
10461 usbp->st_ino = sbp->st_ino;
10462 usbp->st_mode = sbp->st_mode;
10463 usbp->st_nlink = sbp->st_nlink;
10464 usbp->st_uid = sbp->st_uid;
10465 usbp->st_gid = sbp->st_gid;
10466 usbp->st_rdev = sbp->st_rdev;
2d21ac55
A
10467#ifndef _POSIX_C_SOURCE
10468 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
10469 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
10470 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
10471 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
10472 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
10473 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
10474#else
10475 usbp->st_atime = sbp->st_atime;
10476 usbp->st_atimensec = sbp->st_atimensec;
10477 usbp->st_mtime = sbp->st_mtime;
10478 usbp->st_mtimensec = sbp->st_mtimensec;
10479 usbp->st_ctime = sbp->st_ctime;
10480 usbp->st_ctimensec = sbp->st_ctimensec;
10481#endif
10482 usbp->st_size = sbp->st_size;
10483 usbp->st_blocks = sbp->st_blocks;
10484 usbp->st_blksize = sbp->st_blksize;
10485 usbp->st_flags = sbp->st_flags;
10486 usbp->st_gen = sbp->st_gen;
10487 usbp->st_lspare = sbp->st_lspare;
10488 usbp->st_qspare[0] = sbp->st_qspare[0];
10489 usbp->st_qspare[1] = sbp->st_qspare[1];
10490}
10491
10492/*
10493 * copy stat64 structure into user_stat64 structure.
10494 */
b0d623f7
A
10495void munge_user64_stat64(struct stat64 *sbp, struct user64_stat64 *usbp)
10496{
10497 bzero(usbp, sizeof(*usbp));
10498
10499 usbp->st_dev = sbp->st_dev;
10500 usbp->st_ino = sbp->st_ino;
10501 usbp->st_mode = sbp->st_mode;
10502 usbp->st_nlink = sbp->st_nlink;
10503 usbp->st_uid = sbp->st_uid;
10504 usbp->st_gid = sbp->st_gid;
10505 usbp->st_rdev = sbp->st_rdev;
10506#ifndef _POSIX_C_SOURCE
10507 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
10508 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
10509 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
10510 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
10511 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
10512 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
10513 usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
10514 usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
10515#else
10516 usbp->st_atime = sbp->st_atime;
10517 usbp->st_atimensec = sbp->st_atimensec;
10518 usbp->st_mtime = sbp->st_mtime;
10519 usbp->st_mtimensec = sbp->st_mtimensec;
10520 usbp->st_ctime = sbp->st_ctime;
10521 usbp->st_ctimensec = sbp->st_ctimensec;
10522 usbp->st_birthtime = sbp->st_birthtime;
10523 usbp->st_birthtimensec = sbp->st_birthtimensec;
10524#endif
10525 usbp->st_size = sbp->st_size;
10526 usbp->st_blocks = sbp->st_blocks;
10527 usbp->st_blksize = sbp->st_blksize;
10528 usbp->st_flags = sbp->st_flags;
10529 usbp->st_gen = sbp->st_gen;
10530 usbp->st_lspare = sbp->st_lspare;
10531 usbp->st_qspare[0] = sbp->st_qspare[0];
10532 usbp->st_qspare[1] = sbp->st_qspare[1];
10533}
10534
10535void munge_user32_stat64(struct stat64 *sbp, struct user32_stat64 *usbp)
2d21ac55 10536{
b0d623f7 10537 bzero(usbp, sizeof(*usbp));
2d21ac55
A
10538
10539 usbp->st_dev = sbp->st_dev;
10540 usbp->st_ino = sbp->st_ino;
10541 usbp->st_mode = sbp->st_mode;
10542 usbp->st_nlink = sbp->st_nlink;
10543 usbp->st_uid = sbp->st_uid;
10544 usbp->st_gid = sbp->st_gid;
10545 usbp->st_rdev = sbp->st_rdev;
10546#ifndef _POSIX_C_SOURCE
91447636
A
10547 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
10548 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
10549 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
10550 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
10551 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
10552 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
2d21ac55
A
10553 usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
10554 usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
91447636
A
10555#else
10556 usbp->st_atime = sbp->st_atime;
10557 usbp->st_atimensec = sbp->st_atimensec;
10558 usbp->st_mtime = sbp->st_mtime;
10559 usbp->st_mtimensec = sbp->st_mtimensec;
10560 usbp->st_ctime = sbp->st_ctime;
10561 usbp->st_ctimensec = sbp->st_ctimensec;
2d21ac55
A
10562 usbp->st_birthtime = sbp->st_birthtime;
10563 usbp->st_birthtimensec = sbp->st_birthtimensec;
91447636
A
10564#endif
10565 usbp->st_size = sbp->st_size;
10566 usbp->st_blocks = sbp->st_blocks;
10567 usbp->st_blksize = sbp->st_blksize;
10568 usbp->st_flags = sbp->st_flags;
10569 usbp->st_gen = sbp->st_gen;
10570 usbp->st_lspare = sbp->st_lspare;
10571 usbp->st_qspare[0] = sbp->st_qspare[0];
10572 usbp->st_qspare[1] = sbp->st_qspare[1];
4a249263 10573}
39236c6e
A
10574
10575/*
10576 * Purge buffer cache for simulating cold starts
10577 */
10578static int vnode_purge_callback(struct vnode *vp, __unused void *cargs)
10579{
10580 ubc_msync(vp, (off_t)0, ubc_getsize(vp), NULL /* off_t *resid_off */, UBC_PUSHALL | UBC_INVALIDATE);
10581
10582 return VNODE_RETURNED;
10583}
10584
10585static int vfs_purge_callback(mount_t mp, __unused void * arg)
10586{
10587 vnode_iterate(mp, VNODE_WAIT | VNODE_ITERATE_ALL, vnode_purge_callback, NULL);
10588
10589 return VFS_RETURNED;
10590}
10591
10592int
10593vfs_purge(__unused struct proc *p, __unused struct vfs_purge_args *uap, __unused int32_t *retval)
10594{
10595 if (!kauth_cred_issuser(kauth_cred_get()))
10596 return EPERM;
10597
10598 vfs_iterate(0/* flags */, vfs_purge_callback, NULL);
10599
10600 return 0;
10601}
10602