]> git.saurik.com Git - apple/xnu.git/blob - bsd/vfs/vfs_syscalls.c
xnu-3248.60.10.tar.gz
[apple/xnu.git] / bsd / vfs / vfs_syscalls.c
1 /*
2 * Copyright (c) 1995-2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1989, 1993
30 * The Regents of the University of California. All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)vfs_syscalls.c 8.41 (Berkeley) 6/15/95
66 */
67 /*
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
71 * Version 2.0.
72 */
73
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/namei.h>
77 #include <sys/filedesc.h>
78 #include <sys/kernel.h>
79 #include <sys/file_internal.h>
80 #include <sys/stat.h>
81 #include <sys/vnode_internal.h>
82 #include <sys/mount_internal.h>
83 #include <sys/proc_internal.h>
84 #include <sys/kauth.h>
85 #include <sys/uio_internal.h>
86 #include <sys/malloc.h>
87 #include <sys/mman.h>
88 #include <sys/dirent.h>
89 #include <sys/attr.h>
90 #include <sys/sysctl.h>
91 #include <sys/ubc.h>
92 #include <sys/quota.h>
93 #include <sys/kdebug.h>
94 #include <sys/fsevents.h>
95 #include <sys/imgsrc.h>
96 #include <sys/sysproto.h>
97 #include <sys/xattr.h>
98 #include <sys/fcntl.h>
99 #include <sys/fsctl.h>
100 #include <sys/ubc_internal.h>
101 #include <sys/disk.h>
102 #include <sys/content_protection.h>
103 #include <sys/priv.h>
104 #include <machine/cons.h>
105 #include <machine/limits.h>
106 #include <miscfs/specfs/specdev.h>
107
108 #include <security/audit/audit.h>
109 #include <bsm/audit_kevents.h>
110
111 #include <mach/mach_types.h>
112 #include <kern/kern_types.h>
113 #include <kern/kalloc.h>
114 #include <kern/task.h>
115
116 #include <vm/vm_pageout.h>
117
118 #include <libkern/OSAtomic.h>
119 #include <pexpert/pexpert.h>
120 #include <IOKit/IOBSD.h>
121
122 #if ROUTEFS
123 #include <miscfs/routefs/routefs.h>
124 #endif /* ROUTEFS */
125
126 #if CONFIG_MACF
127 #include <security/mac.h>
128 #include <security/mac_framework.h>
129 #endif
130
131 #if CONFIG_FSE
132 #define GET_PATH(x) \
133 (x) = get_pathbuff();
134 #define RELEASE_PATH(x) \
135 release_pathbuff(x);
136 #else
137 #define GET_PATH(x) \
138 MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
139 #define RELEASE_PATH(x) \
140 FREE_ZONE((x), MAXPATHLEN, M_NAMEI);
141 #endif /* CONFIG_FSE */
142
143 /* struct for checkdirs iteration */
144 struct cdirargs {
145 vnode_t olddp;
146 vnode_t newdp;
147 };
148 /* callback for checkdirs iteration */
149 static int checkdirs_callback(proc_t p, void * arg);
150
151 static int change_dir(struct nameidata *ndp, vfs_context_t ctx);
152 static int checkdirs(vnode_t olddp, vfs_context_t ctx);
153 void enablequotas(struct mount *mp, vfs_context_t ctx);
154 static int getfsstat_callback(mount_t mp, void * arg);
155 static int getutimes(user_addr_t usrtvp, struct timespec *tsp);
156 static int setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts, int nullflag);
157 static int sync_callback(mount_t, void *);
158 static void sync_thread(void *, __unused wait_result_t);
159 static int sync_async(int);
160 static int munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
161 user_addr_t bufp, int *sizep, boolean_t is_64_bit,
162 boolean_t partial_copy);
163 static int statfs64_common(struct mount *mp, struct vfsstatfs *sfsp,
164 user_addr_t bufp);
165 static int fsync_common(proc_t p, struct fsync_args *uap, int flags);
166 static int mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
167 struct componentname *cnp, user_addr_t fsmountargs,
168 int flags, uint32_t internal_flags, char *labelstr, boolean_t kernelmount,
169 vfs_context_t ctx);
170 void vfs_notify_mount(vnode_t pdvp);
171
172 int prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth);
173
174 struct fd_vn_data * fg_vn_data_alloc(void);
175
176 /*
177 * Max retries for ENOENT returns from vn_authorize_{rmdir, unlink, rename}
178 * Concurrent lookups (or lookups by ids) on hard links can cause the
179 * vn_getpath (which does not re-enter the filesystem as vn_getpath_fsenter
180 * does) to return ENOENT as the path cannot be returned from the name cache
181 * alone. We have no option but to retry and hope to get one namei->reverse path
182 * generation done without an intervening lookup, lookup by id on the hard link
183 * item. This is only an issue for MAC hooks which cannot reenter the filesystem
184 * which currently are the MAC hooks for rename, unlink and rmdir.
185 */
186 #define MAX_AUTHORIZE_ENOENT_RETRIES 1024
187
188 static int rmdirat_internal(vfs_context_t, int, user_addr_t, enum uio_seg);
189
190 static int fsgetpath_internal(vfs_context_t, int, uint64_t, vm_size_t, caddr_t, int *);
191
192 #ifdef CONFIG_IMGSRC_ACCESS
193 static int authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx);
194 static int place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx);
195 static void undo_place_on_covered_vp(mount_t mp, vnode_t vp);
196 static int mount_begin_update(mount_t mp, vfs_context_t ctx, int flags);
197 static void mount_end_update(mount_t mp);
198 static int relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp, const char *fsname, vfs_context_t ctx, boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index);
199 #endif /* CONFIG_IMGSRC_ACCESS */
200
201 int (*union_dircheckp)(struct vnode **, struct fileproc *, vfs_context_t);
202
203 __private_extern__
204 int sync_internal(void);
205
206 __private_extern__
207 int unlink1(vfs_context_t, vnode_t, user_addr_t, enum uio_seg, int);
208
209 extern lck_grp_t *fd_vn_lck_grp;
210 extern lck_grp_attr_t *fd_vn_lck_grp_attr;
211 extern lck_attr_t *fd_vn_lck_attr;
212
213 /*
214 * incremented each time a mount or unmount operation occurs
215 * used to invalidate the cached value of the rootvp in the
216 * mount structure utilized by cache_lookup_path
217 */
218 uint32_t mount_generation = 0;
219
220 /* counts number of mount and unmount operations */
221 unsigned int vfs_nummntops=0;
222
223 extern const struct fileops vnops;
224 #if CONFIG_APPLEDOUBLE
225 extern errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *);
226 #endif /* CONFIG_APPLEDOUBLE */
227
228 typedef uint32_t vfs_rename_flags_t;
229 #if CONFIG_SECLUDED_RENAME
230 enum {
231 VFS_SECLUDE_RENAME = 0x00000001
232 };
233 #endif
234
235 /*
236 * Virtual File System System Calls
237 */
238
239 #if NFSCLIENT || DEVFS || ROUTEFS
240 /*
241 * Private in-kernel mounting spi (NFS only, not exported)
242 */
243 __private_extern__
244 boolean_t
245 vfs_iskernelmount(mount_t mp)
246 {
247 return ((mp->mnt_kern_flag & MNTK_KERNEL_MOUNT) ? TRUE : FALSE);
248 }
249
250 __private_extern__
251 int
252 kernel_mount(char *fstype, vnode_t pvp, vnode_t vp, const char *path,
253 void *data, __unused size_t datalen, int syscall_flags, __unused uint32_t kern_flags, vfs_context_t ctx)
254 {
255 struct nameidata nd;
256 boolean_t did_namei;
257 int error;
258
259 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
260 UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx);
261
262 /*
263 * Get the vnode to be covered if it's not supplied
264 */
265 if (vp == NULLVP) {
266 error = namei(&nd);
267 if (error)
268 return (error);
269 vp = nd.ni_vp;
270 pvp = nd.ni_dvp;
271 did_namei = TRUE;
272 } else {
273 char *pnbuf = CAST_DOWN(char *, path);
274
275 nd.ni_cnd.cn_pnbuf = pnbuf;
276 nd.ni_cnd.cn_pnlen = strlen(pnbuf) + 1;
277 did_namei = FALSE;
278 }
279
280 error = mount_common(fstype, pvp, vp, &nd.ni_cnd, CAST_USER_ADDR_T(data),
281 syscall_flags, kern_flags, NULL, TRUE, ctx);
282
283 if (did_namei) {
284 vnode_put(vp);
285 vnode_put(pvp);
286 nameidone(&nd);
287 }
288
289 return (error);
290 }
291 #endif /* NFSCLIENT || DEVFS */
292
293 /*
294 * Mount a file system.
295 */
296 /* ARGSUSED */
297 int
298 mount(proc_t p, struct mount_args *uap, __unused int32_t *retval)
299 {
300 struct __mac_mount_args muap;
301
302 muap.type = uap->type;
303 muap.path = uap->path;
304 muap.flags = uap->flags;
305 muap.data = uap->data;
306 muap.mac_p = USER_ADDR_NULL;
307 return (__mac_mount(p, &muap, retval));
308 }
309
310 void
311 vfs_notify_mount(vnode_t pdvp)
312 {
313 vfs_event_signal(NULL, VQ_MOUNT, (intptr_t)NULL);
314 lock_vnode_and_post(pdvp, NOTE_WRITE);
315 }
316
317 /*
318 * __mac_mount:
319 * Mount a file system taking into account MAC label behavior.
320 * See mount(2) man page for more information
321 *
322 * Parameters: p Process requesting the mount
323 * uap User argument descriptor (see below)
324 * retval (ignored)
325 *
326 * Indirect: uap->type Filesystem type
327 * uap->path Path to mount
328 * uap->data Mount arguments
329 * uap->mac_p MAC info
330 * uap->flags Mount flags
331 *
332 *
333 * Returns: 0 Success
334 * !0 Not success
335 */
336 boolean_t root_fs_upgrade_try = FALSE;
337
338 int
339 __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int32_t *retval)
340 {
341 vnode_t pvp = NULL;
342 vnode_t vp = NULL;
343 int need_nameidone = 0;
344 vfs_context_t ctx = vfs_context_current();
345 char fstypename[MFSNAMELEN];
346 struct nameidata nd;
347 size_t dummy=0;
348 char *labelstr = NULL;
349 int flags = uap->flags;
350 int error;
351 #if CONFIG_IMGSRC_ACCESS || CONFIG_MACF
352 boolean_t is_64bit = IS_64BIT_PROCESS(p);
353 #else
354 #pragma unused(p)
355 #endif
356 /*
357 * Get the fs type name from user space
358 */
359 error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy);
360 if (error)
361 return (error);
362
363 /*
364 * Get the vnode to be covered
365 */
366 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
367 UIO_USERSPACE, uap->path, ctx);
368 error = namei(&nd);
369 if (error) {
370 goto out;
371 }
372 need_nameidone = 1;
373 vp = nd.ni_vp;
374 pvp = nd.ni_dvp;
375
376 #ifdef CONFIG_IMGSRC_ACCESS
377 /* Mounting image source cannot be batched with other operations */
378 if (flags == MNT_IMGSRC_BY_INDEX) {
379 error = relocate_imageboot_source(pvp, vp, &nd.ni_cnd, fstypename,
380 ctx, is_64bit, uap->data, (flags == MNT_IMGSRC_BY_INDEX));
381 goto out;
382 }
383 #endif /* CONFIG_IMGSRC_ACCESS */
384
385 #if CONFIG_MACF
386 /*
387 * Get the label string (if any) from user space
388 */
389 if (uap->mac_p != USER_ADDR_NULL) {
390 struct user_mac mac;
391 size_t ulen = 0;
392
393 if (is_64bit) {
394 struct user64_mac mac64;
395 error = copyin(uap->mac_p, &mac64, sizeof(mac64));
396 mac.m_buflen = mac64.m_buflen;
397 mac.m_string = mac64.m_string;
398 } else {
399 struct user32_mac mac32;
400 error = copyin(uap->mac_p, &mac32, sizeof(mac32));
401 mac.m_buflen = mac32.m_buflen;
402 mac.m_string = mac32.m_string;
403 }
404 if (error)
405 goto out;
406 if ((mac.m_buflen > MAC_MAX_LABEL_BUF_LEN) ||
407 (mac.m_buflen < 2)) {
408 error = EINVAL;
409 goto out;
410 }
411 MALLOC(labelstr, char *, mac.m_buflen, M_MACTEMP, M_WAITOK);
412 error = copyinstr(mac.m_string, labelstr, mac.m_buflen, &ulen);
413 if (error) {
414 goto out;
415 }
416 AUDIT_ARG(mac_string, labelstr);
417 }
418 #endif /* CONFIG_MACF */
419
420 AUDIT_ARG(fflags, flags);
421
422 #if SECURE_KERNEL
423 if (flags & MNT_UNION) {
424 /* No union mounts on release kernels */
425 error = EPERM;
426 goto out;
427 }
428 #endif
429
430 if ((vp->v_flag & VROOT) &&
431 (vp->v_mount->mnt_flag & MNT_ROOTFS)) {
432 if (!(flags & MNT_UNION)) {
433 flags |= MNT_UPDATE;
434 }
435 else {
436 /*
437 * For a union mount on '/', treat it as fresh
438 * mount instead of update.
439 * Otherwise, union mouting on '/' used to panic the
440 * system before, since mnt_vnodecovered was found to
441 * be NULL for '/' which is required for unionlookup
442 * after it gets ENOENT on union mount.
443 */
444 flags = (flags & ~(MNT_UPDATE));
445 }
446
447 #if SECURE_KERNEL
448 if ((flags & MNT_RDONLY) == 0) {
449 /* Release kernels are not allowed to mount "/" as rw */
450 error = EPERM;
451 goto out;
452 }
453 #endif
454 /*
455 * See 7392553 for more details on why this check exists.
456 * Suffice to say: If this check is ON and something tries
457 * to mount the rootFS RW, we'll turn off the codesign
458 * bitmap optimization.
459 */
460 #if CHECK_CS_VALIDATION_BITMAP
461 if ((flags & MNT_RDONLY) == 0 ) {
462 root_fs_upgrade_try = TRUE;
463 }
464 #endif
465 }
466
467 error = mount_common(fstypename, pvp, vp, &nd.ni_cnd, uap->data, flags, 0,
468 labelstr, FALSE, ctx);
469
470 out:
471
472 #if CONFIG_MACF
473 if (labelstr)
474 FREE(labelstr, M_MACTEMP);
475 #endif /* CONFIG_MACF */
476
477 if (vp) {
478 vnode_put(vp);
479 }
480 if (pvp) {
481 vnode_put(pvp);
482 }
483 if (need_nameidone) {
484 nameidone(&nd);
485 }
486
487 return (error);
488 }
489
490 /*
491 * common mount implementation (final stage of mounting)
492
493 * Arguments:
494 * fstypename file system type (ie it's vfs name)
495 * pvp parent of covered vnode
496 * vp covered vnode
497 * cnp component name (ie path) of covered vnode
498 * flags generic mount flags
499 * fsmountargs file system specific data
500 * labelstr optional MAC label
501 * kernelmount TRUE for mounts initiated from inside the kernel
502 * ctx caller's context
503 */
504 static int
505 mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
506 struct componentname *cnp, user_addr_t fsmountargs, int flags, uint32_t internal_flags,
507 char *labelstr, boolean_t kernelmount, vfs_context_t ctx)
508 {
509 #if !CONFIG_MACF
510 #pragma unused(labelstr)
511 #endif
512 struct vnode *devvp = NULLVP;
513 struct vnode *device_vnode = NULLVP;
514 #if CONFIG_MACF
515 struct vnode *rvp;
516 #endif
517 struct mount *mp;
518 struct vfstable *vfsp = (struct vfstable *)0;
519 struct proc *p = vfs_context_proc(ctx);
520 int error, flag = 0;
521 user_addr_t devpath = USER_ADDR_NULL;
522 int ronly = 0;
523 int mntalloc = 0;
524 boolean_t vfsp_ref = FALSE;
525 boolean_t is_rwlock_locked = FALSE;
526 boolean_t did_rele = FALSE;
527 boolean_t have_usecount = FALSE;
528
529 /*
530 * Process an update for an existing mount
531 */
532 if (flags & MNT_UPDATE) {
533 if ((vp->v_flag & VROOT) == 0) {
534 error = EINVAL;
535 goto out1;
536 }
537 mp = vp->v_mount;
538
539 /* unmount in progress return error */
540 mount_lock_spin(mp);
541 if (mp->mnt_lflag & MNT_LUNMOUNT) {
542 mount_unlock(mp);
543 error = EBUSY;
544 goto out1;
545 }
546 mount_unlock(mp);
547 lck_rw_lock_exclusive(&mp->mnt_rwlock);
548 is_rwlock_locked = TRUE;
549 /*
550 * We only allow the filesystem to be reloaded if it
551 * is currently mounted read-only.
552 */
553 if ((flags & MNT_RELOAD) &&
554 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
555 error = ENOTSUP;
556 goto out1;
557 }
558
559 /*
560 * If content protection is enabled, update mounts are not
561 * allowed to turn it off.
562 */
563 if ((mp->mnt_flag & MNT_CPROTECT) &&
564 ((flags & MNT_CPROTECT) == 0)) {
565 error = EINVAL;
566 goto out1;
567 }
568
569 #ifdef CONFIG_IMGSRC_ACCESS
570 /* Can't downgrade the backer of the root FS */
571 if ((mp->mnt_kern_flag & MNTK_BACKS_ROOT) &&
572 (!vfs_isrdonly(mp)) && (flags & MNT_RDONLY)) {
573 error = ENOTSUP;
574 goto out1;
575 }
576 #endif /* CONFIG_IMGSRC_ACCESS */
577
578 /*
579 * Only root, or the user that did the original mount is
580 * permitted to update it.
581 */
582 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
583 (error = suser(vfs_context_ucred(ctx), &p->p_acflag))) {
584 goto out1;
585 }
586 #if CONFIG_MACF
587 error = mac_mount_check_remount(ctx, mp);
588 if (error != 0) {
589 goto out1;
590 }
591 #endif
592 /*
593 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
594 * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
595 */
596 if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
597 flags |= MNT_NOSUID | MNT_NODEV;
598 if (mp->mnt_flag & MNT_NOEXEC)
599 flags |= MNT_NOEXEC;
600 }
601 flag = mp->mnt_flag;
602
603
604
605 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
606
607 vfsp = mp->mnt_vtable;
608 goto update;
609 }
610 /*
611 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
612 * MNT_NOEXEC if mount point is already MNT_NOEXEC.
613 */
614 if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
615 flags |= MNT_NOSUID | MNT_NODEV;
616 if (vp->v_mount->mnt_flag & MNT_NOEXEC)
617 flags |= MNT_NOEXEC;
618 }
619
620 /* XXXAUDIT: Should we capture the type on the error path as well? */
621 AUDIT_ARG(text, fstypename);
622 mount_list_lock();
623 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
624 if (!strncmp(vfsp->vfc_name, fstypename, MFSNAMELEN)) {
625 vfsp->vfc_refcount++;
626 vfsp_ref = TRUE;
627 break;
628 }
629 mount_list_unlock();
630 if (vfsp == NULL) {
631 error = ENODEV;
632 goto out1;
633 }
634
635 /*
636 * VFC_VFSLOCALARGS is not currently supported for kernel mounts
637 */
638 if (kernelmount && (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS)) {
639 error = EINVAL; /* unsupported request */
640 goto out1;
641 }
642
643 error = prepare_coveredvp(vp, ctx, cnp, fstypename, ((internal_flags & KERNEL_MOUNT_NOAUTH) != 0));
644 if (error != 0) {
645 goto out1;
646 }
647
648 /*
649 * Allocate and initialize the filesystem (mount_t)
650 */
651 MALLOC_ZONE(mp, struct mount *, (u_int32_t)sizeof(struct mount),
652 M_MOUNT, M_WAITOK);
653 bzero((char *)mp, (u_int32_t)sizeof(struct mount));
654 mntalloc = 1;
655
656 /* Initialize the default IO constraints */
657 mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
658 mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
659 mp->mnt_maxsegreadsize = mp->mnt_maxreadcnt;
660 mp->mnt_maxsegwritesize = mp->mnt_maxwritecnt;
661 mp->mnt_devblocksize = DEV_BSIZE;
662 mp->mnt_alignmentmask = PAGE_MASK;
663 mp->mnt_ioqueue_depth = MNT_DEFAULT_IOQUEUE_DEPTH;
664 mp->mnt_ioscale = 1;
665 mp->mnt_ioflags = 0;
666 mp->mnt_realrootvp = NULLVP;
667 mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL;
668
669 TAILQ_INIT(&mp->mnt_vnodelist);
670 TAILQ_INIT(&mp->mnt_workerqueue);
671 TAILQ_INIT(&mp->mnt_newvnodes);
672 mount_lock_init(mp);
673 lck_rw_lock_exclusive(&mp->mnt_rwlock);
674 is_rwlock_locked = TRUE;
675 mp->mnt_op = vfsp->vfc_vfsops;
676 mp->mnt_vtable = vfsp;
677 //mp->mnt_stat.f_type = vfsp->vfc_typenum;
678 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
679 strlcpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN);
680 strlcpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
681 mp->mnt_vnodecovered = vp;
682 mp->mnt_vfsstat.f_owner = kauth_cred_getuid(vfs_context_ucred(ctx));
683 mp->mnt_throttle_mask = LOWPRI_MAX_NUM_DEV - 1;
684 mp->mnt_devbsdunit = 0;
685
686 /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
687 vfs_setowner(mp, KAUTH_UID_NONE, KAUTH_GID_NONE);
688
689 #if NFSCLIENT || DEVFS || ROUTEFS
690 if (kernelmount)
691 mp->mnt_kern_flag |= MNTK_KERNEL_MOUNT;
692 if ((internal_flags & KERNEL_MOUNT_PERMIT_UNMOUNT) != 0)
693 mp->mnt_kern_flag |= MNTK_PERMIT_UNMOUNT;
694 #endif /* NFSCLIENT || DEVFS */
695
696 update:
697 /*
698 * Set the mount level flags.
699 */
700 if (flags & MNT_RDONLY)
701 mp->mnt_flag |= MNT_RDONLY;
702 else if (mp->mnt_flag & MNT_RDONLY) {
703 // disallow read/write upgrades of file systems that
704 // had the TYPENAME_OVERRIDE feature set.
705 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
706 error = EPERM;
707 goto out1;
708 }
709 mp->mnt_kern_flag |= MNTK_WANTRDWR;
710 }
711 mp->mnt_flag &= ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
712 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
713 MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
714 MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
715 MNT_QUARANTINE | MNT_CPROTECT);
716 mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
717 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
718 MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
719 MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
720 MNT_QUARANTINE | MNT_CPROTECT);
721
722 #if CONFIG_MACF
723 if (flags & MNT_MULTILABEL) {
724 if (vfsp->vfc_vfsflags & VFC_VFSNOMACLABEL) {
725 error = EINVAL;
726 goto out1;
727 }
728 mp->mnt_flag |= MNT_MULTILABEL;
729 }
730 #endif
731 /*
732 * Process device path for local file systems if requested
733 */
734 if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
735 if (vfs_context_is64bit(ctx)) {
736 if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
737 goto out1;
738 fsmountargs += sizeof(devpath);
739 } else {
740 user32_addr_t tmp;
741 if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
742 goto out1;
743 /* munge into LP64 addr */
744 devpath = CAST_USER_ADDR_T(tmp);
745 fsmountargs += sizeof(tmp);
746 }
747
748 /* Lookup device and authorize access to it */
749 if ((devpath)) {
750 struct nameidata nd;
751
752 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW, UIO_USERSPACE, devpath, ctx);
753 if ( (error = namei(&nd)) )
754 goto out1;
755
756 strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
757 devvp = nd.ni_vp;
758
759 nameidone(&nd);
760
761 if (devvp->v_type != VBLK) {
762 error = ENOTBLK;
763 goto out2;
764 }
765 if (major(devvp->v_rdev) >= nblkdev) {
766 error = ENXIO;
767 goto out2;
768 }
769 /*
770 * If mount by non-root, then verify that user has necessary
771 * permissions on the device.
772 */
773 if (suser(vfs_context_ucred(ctx), NULL) != 0) {
774 mode_t accessmode = KAUTH_VNODE_READ_DATA;
775
776 if ((mp->mnt_flag & MNT_RDONLY) == 0)
777 accessmode |= KAUTH_VNODE_WRITE_DATA;
778 if ((error = vnode_authorize(devvp, NULL, accessmode, ctx)) != 0)
779 goto out2;
780 }
781 }
782 /* On first mount, preflight and open device */
783 if (devpath && ((flags & MNT_UPDATE) == 0)) {
784 if ( (error = vnode_ref(devvp)) )
785 goto out2;
786 /*
787 * Disallow multiple mounts of the same device.
788 * Disallow mounting of a device that is currently in use
789 * (except for root, which might share swap device for miniroot).
790 * Flush out any old buffers remaining from a previous use.
791 */
792 if ( (error = vfs_mountedon(devvp)) )
793 goto out3;
794
795 if (vcount(devvp) > 1 && !(vfs_flags(mp) & MNT_ROOTFS)) {
796 error = EBUSY;
797 goto out3;
798 }
799 if ( (error = VNOP_FSYNC(devvp, MNT_WAIT, ctx)) ) {
800 error = ENOTBLK;
801 goto out3;
802 }
803 if ( (error = buf_invalidateblks(devvp, BUF_WRITE_DATA, 0, 0)) )
804 goto out3;
805
806 ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
807 #if CONFIG_MACF
808 error = mac_vnode_check_open(ctx,
809 devvp,
810 ronly ? FREAD : FREAD|FWRITE);
811 if (error)
812 goto out3;
813 #endif /* MAC */
814 if ( (error = VNOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, ctx)) )
815 goto out3;
816
817 mp->mnt_devvp = devvp;
818 device_vnode = devvp;
819
820 } else if ((mp->mnt_flag & MNT_RDONLY) &&
821 (mp->mnt_kern_flag & MNTK_WANTRDWR) &&
822 (device_vnode = mp->mnt_devvp)) {
823 dev_t dev;
824 int maj;
825 /*
826 * If upgrade to read-write by non-root, then verify
827 * that user has necessary permissions on the device.
828 */
829 vnode_getalways(device_vnode);
830
831 if (suser(vfs_context_ucred(ctx), NULL) &&
832 (error = vnode_authorize(device_vnode, NULL,
833 KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA,
834 ctx)) != 0) {
835 vnode_put(device_vnode);
836 goto out2;
837 }
838
839 /* Tell the device that we're upgrading */
840 dev = (dev_t)device_vnode->v_rdev;
841 maj = major(dev);
842
843 if ((u_int)maj >= (u_int)nblkdev)
844 panic("Volume mounted on a device with invalid major number.");
845
846 error = bdevsw[maj].d_open(dev, FREAD | FWRITE, S_IFBLK, p);
847 vnode_put(device_vnode);
848 device_vnode = NULLVP;
849 if (error != 0) {
850 goto out2;
851 }
852 }
853 }
854 #if CONFIG_MACF
855 if ((flags & MNT_UPDATE) == 0) {
856 mac_mount_label_init(mp);
857 mac_mount_label_associate(ctx, mp);
858 }
859 if (labelstr) {
860 if ((flags & MNT_UPDATE) != 0) {
861 error = mac_mount_check_label_update(ctx, mp);
862 if (error != 0)
863 goto out3;
864 }
865 }
866 #endif
867 /*
868 * Mount the filesystem.
869 */
870 error = VFS_MOUNT(mp, device_vnode, fsmountargs, ctx);
871
872 if (flags & MNT_UPDATE) {
873 if (mp->mnt_kern_flag & MNTK_WANTRDWR)
874 mp->mnt_flag &= ~MNT_RDONLY;
875 mp->mnt_flag &=~
876 (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
877 mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
878 if (error)
879 mp->mnt_flag = flag; /* restore flag value */
880 vfs_event_signal(NULL, VQ_UPDATE, (intptr_t)NULL);
881 lck_rw_done(&mp->mnt_rwlock);
882 is_rwlock_locked = FALSE;
883 if (!error)
884 enablequotas(mp, ctx);
885 goto exit;
886 }
887
888 /*
889 * Put the new filesystem on the mount list after root.
890 */
891 if (error == 0) {
892 struct vfs_attr vfsattr;
893 #if CONFIG_MACF
894 if (vfs_flags(mp) & MNT_MULTILABEL) {
895 error = VFS_ROOT(mp, &rvp, ctx);
896 if (error) {
897 printf("%s() VFS_ROOT returned %d\n", __func__, error);
898 goto out3;
899 }
900 error = vnode_label(mp, NULL, rvp, NULL, 0, ctx);
901 /*
902 * drop reference provided by VFS_ROOT
903 */
904 vnode_put(rvp);
905
906 if (error)
907 goto out3;
908 }
909 #endif /* MAC */
910
911 vnode_lock_spin(vp);
912 CLR(vp->v_flag, VMOUNT);
913 vp->v_mountedhere = mp;
914 vnode_unlock(vp);
915
916 /*
917 * taking the name_cache_lock exclusively will
918 * insure that everyone is out of the fast path who
919 * might be trying to use a now stale copy of
920 * vp->v_mountedhere->mnt_realrootvp
921 * bumping mount_generation causes the cached values
922 * to be invalidated
923 */
924 name_cache_lock();
925 mount_generation++;
926 name_cache_unlock();
927
928 error = vnode_ref(vp);
929 if (error != 0) {
930 goto out4;
931 }
932
933 have_usecount = TRUE;
934
935 error = checkdirs(vp, ctx);
936 if (error != 0) {
937 /* Unmount the filesystem as cdir/rdirs cannot be updated */
938 goto out4;
939 }
940 /*
941 * there is no cleanup code here so I have made it void
942 * we need to revisit this
943 */
944 (void)VFS_START(mp, 0, ctx);
945
946 if (mount_list_add(mp) != 0) {
947 /*
948 * The system is shutting down trying to umount
949 * everything, so fail with a plausible errno.
950 */
951 error = EBUSY;
952 goto out4;
953 }
954 lck_rw_done(&mp->mnt_rwlock);
955 is_rwlock_locked = FALSE;
956
957 /* Check if this mounted file system supports EAs or named streams. */
958 /* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */
959 VFSATTR_INIT(&vfsattr);
960 VFSATTR_WANTED(&vfsattr, f_capabilities);
961 if (strncmp(mp->mnt_vfsstat.f_fstypename, "webdav", sizeof("webdav")) != 0 &&
962 vfs_getattr(mp, &vfsattr, ctx) == 0 &&
963 VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) {
964 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR) &&
965 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR)) {
966 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
967 }
968 #if NAMEDSTREAMS
969 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS) &&
970 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS)) {
971 mp->mnt_kern_flag |= MNTK_NAMED_STREAMS;
972 }
973 #endif
974 /* Check if this file system supports path from id lookups. */
975 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID) &&
976 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID)) {
977 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
978 } else if (mp->mnt_flag & MNT_DOVOLFS) {
979 /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */
980 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
981 }
982 }
983 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSNATIVEXATTR) {
984 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
985 }
986 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSPREFLIGHT) {
987 mp->mnt_kern_flag |= MNTK_UNMOUNT_PREFLIGHT;
988 }
989 /* increment the operations count */
990 OSAddAtomic(1, &vfs_nummntops);
991 enablequotas(mp, ctx);
992
993 if (device_vnode) {
994 device_vnode->v_specflags |= SI_MOUNTEDON;
995
996 /*
997 * cache the IO attributes for the underlying physical media...
998 * an error return indicates the underlying driver doesn't
999 * support all the queries necessary... however, reasonable
1000 * defaults will have been set, so no reason to bail or care
1001 */
1002 vfs_init_io_attributes(device_vnode, mp);
1003 }
1004
1005 /* Now that mount is setup, notify the listeners */
1006 vfs_notify_mount(pvp);
1007 IOBSDMountChange(mp, kIOMountChangeMount);
1008
1009 } else {
1010 /* If we fail a fresh mount, there should be no vnodes left hooked into the mountpoint. */
1011 if (mp->mnt_vnodelist.tqh_first != NULL) {
1012 panic("mount_common(): mount of %s filesystem failed with %d, but vnode list is not empty.",
1013 mp->mnt_vtable->vfc_name, error);
1014 }
1015
1016 vnode_lock_spin(vp);
1017 CLR(vp->v_flag, VMOUNT);
1018 vnode_unlock(vp);
1019 mount_list_lock();
1020 mp->mnt_vtable->vfc_refcount--;
1021 mount_list_unlock();
1022
1023 if (device_vnode ) {
1024 vnode_rele(device_vnode);
1025 VNOP_CLOSE(device_vnode, ronly ? FREAD : FREAD|FWRITE, ctx);
1026 }
1027 lck_rw_done(&mp->mnt_rwlock);
1028 is_rwlock_locked = FALSE;
1029
1030 /*
1031 * if we get here, we have a mount structure that needs to be freed,
1032 * but since the coveredvp hasn't yet been updated to point at it,
1033 * no need to worry about other threads holding a crossref on this mp
1034 * so it's ok to just free it
1035 */
1036 mount_lock_destroy(mp);
1037 #if CONFIG_MACF
1038 mac_mount_label_destroy(mp);
1039 #endif
1040 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1041 }
1042 exit:
1043 /*
1044 * drop I/O count on the device vp if there was one
1045 */
1046 if (devpath && devvp)
1047 vnode_put(devvp);
1048
1049 return(error);
1050
1051 /* Error condition exits */
1052 out4:
1053 (void)VFS_UNMOUNT(mp, MNT_FORCE, ctx);
1054
1055 /*
1056 * If the mount has been placed on the covered vp,
1057 * it may have been discovered by now, so we have
1058 * to treat this just like an unmount
1059 */
1060 mount_lock_spin(mp);
1061 mp->mnt_lflag |= MNT_LDEAD;
1062 mount_unlock(mp);
1063
1064 if (device_vnode != NULLVP) {
1065 vnode_rele(device_vnode);
1066 VNOP_CLOSE(device_vnode, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
1067 ctx);
1068 did_rele = TRUE;
1069 }
1070
1071 vnode_lock_spin(vp);
1072
1073 mp->mnt_crossref++;
1074 vp->v_mountedhere = (mount_t) 0;
1075
1076 vnode_unlock(vp);
1077
1078 if (have_usecount) {
1079 vnode_rele(vp);
1080 }
1081 out3:
1082 if (devpath && ((flags & MNT_UPDATE) == 0) && (!did_rele))
1083 vnode_rele(devvp);
1084 out2:
1085 if (devpath && devvp)
1086 vnode_put(devvp);
1087 out1:
1088 /* Release mnt_rwlock only when it was taken */
1089 if (is_rwlock_locked == TRUE) {
1090 lck_rw_done(&mp->mnt_rwlock);
1091 }
1092
1093 if (mntalloc) {
1094 if (mp->mnt_crossref)
1095 mount_dropcrossref(mp, vp, 0);
1096 else {
1097 mount_lock_destroy(mp);
1098 #if CONFIG_MACF
1099 mac_mount_label_destroy(mp);
1100 #endif
1101 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1102 }
1103 }
1104 if (vfsp_ref) {
1105 mount_list_lock();
1106 vfsp->vfc_refcount--;
1107 mount_list_unlock();
1108 }
1109
1110 return(error);
1111 }
1112
1113 /*
1114 * Flush in-core data, check for competing mount attempts,
1115 * and set VMOUNT
1116 */
1117 int
1118 prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth)
1119 {
1120 #if !CONFIG_MACF
1121 #pragma unused(cnp,fsname)
1122 #endif
1123 struct vnode_attr va;
1124 int error;
1125
1126 if (!skip_auth) {
1127 /*
1128 * If the user is not root, ensure that they own the directory
1129 * onto which we are attempting to mount.
1130 */
1131 VATTR_INIT(&va);
1132 VATTR_WANTED(&va, va_uid);
1133 if ((error = vnode_getattr(vp, &va, ctx)) ||
1134 (va.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
1135 (!vfs_context_issuser(ctx)))) {
1136 error = EPERM;
1137 goto out;
1138 }
1139 }
1140
1141 if ( (error = VNOP_FSYNC(vp, MNT_WAIT, ctx)) )
1142 goto out;
1143
1144 if ( (error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0)) )
1145 goto out;
1146
1147 if (vp->v_type != VDIR) {
1148 error = ENOTDIR;
1149 goto out;
1150 }
1151
1152 if (ISSET(vp->v_flag, VMOUNT) && (vp->v_mountedhere != NULL)) {
1153 error = EBUSY;
1154 goto out;
1155 }
1156
1157 #if CONFIG_MACF
1158 error = mac_mount_check_mount(ctx, vp,
1159 cnp, fsname);
1160 if (error != 0)
1161 goto out;
1162 #endif
1163
1164 vnode_lock_spin(vp);
1165 SET(vp->v_flag, VMOUNT);
1166 vnode_unlock(vp);
1167
1168 out:
1169 return error;
1170 }
1171
1172 #if CONFIG_IMGSRC_ACCESS
1173
1174 #if DEBUG
1175 #define IMGSRC_DEBUG(args...) printf(args)
1176 #else
1177 #define IMGSRC_DEBUG(args...) do { } while(0)
1178 #endif
1179
1180 static int
1181 authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx)
1182 {
1183 struct nameidata nd;
1184 vnode_t vp, realdevvp;
1185 mode_t accessmode;
1186 int error;
1187
1188 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW, UIO_USERSPACE, devpath, ctx);
1189 if ( (error = namei(&nd)) ) {
1190 IMGSRC_DEBUG("namei() failed with %d\n", error);
1191 return error;
1192 }
1193
1194 vp = nd.ni_vp;
1195
1196 if (!vnode_isblk(vp)) {
1197 IMGSRC_DEBUG("Not block device.\n");
1198 error = ENOTBLK;
1199 goto out;
1200 }
1201
1202 realdevvp = mp->mnt_devvp;
1203 if (realdevvp == NULLVP) {
1204 IMGSRC_DEBUG("No device backs the mount.\n");
1205 error = ENXIO;
1206 goto out;
1207 }
1208
1209 error = vnode_getwithref(realdevvp);
1210 if (error != 0) {
1211 IMGSRC_DEBUG("Coudn't get iocount on device.\n");
1212 goto out;
1213 }
1214
1215 if (vnode_specrdev(vp) != vnode_specrdev(realdevvp)) {
1216 IMGSRC_DEBUG("Wrong dev_t.\n");
1217 error = ENXIO;
1218 goto out1;
1219 }
1220
1221 strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
1222
1223 /*
1224 * If mount by non-root, then verify that user has necessary
1225 * permissions on the device.
1226 */
1227 if (!vfs_context_issuser(ctx)) {
1228 accessmode = KAUTH_VNODE_READ_DATA;
1229 if ((mp->mnt_flag & MNT_RDONLY) == 0)
1230 accessmode |= KAUTH_VNODE_WRITE_DATA;
1231 if ((error = vnode_authorize(vp, NULL, accessmode, ctx)) != 0) {
1232 IMGSRC_DEBUG("Access denied.\n");
1233 goto out1;
1234 }
1235 }
1236
1237 *devvpp = vp;
1238
1239 out1:
1240 vnode_put(realdevvp);
1241 out:
1242 nameidone(&nd);
1243 if (error) {
1244 vnode_put(vp);
1245 }
1246
1247 return error;
1248 }
1249
1250 /*
1251 * Clear VMOUNT, set v_mountedhere, and mnt_vnodecovered, ref the vnode,
1252 * and call checkdirs()
1253 */
1254 static int
1255 place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx)
1256 {
1257 int error;
1258
1259 mp->mnt_vnodecovered = vp; /* XXX This is normally only set at init-time ... */
1260
1261 vnode_lock_spin(vp);
1262 CLR(vp->v_flag, VMOUNT);
1263 vp->v_mountedhere = mp;
1264 vnode_unlock(vp);
1265
1266 /*
1267 * taking the name_cache_lock exclusively will
1268 * insure that everyone is out of the fast path who
1269 * might be trying to use a now stale copy of
1270 * vp->v_mountedhere->mnt_realrootvp
1271 * bumping mount_generation causes the cached values
1272 * to be invalidated
1273 */
1274 name_cache_lock();
1275 mount_generation++;
1276 name_cache_unlock();
1277
1278 error = vnode_ref(vp);
1279 if (error != 0) {
1280 goto out;
1281 }
1282
1283 error = checkdirs(vp, ctx);
1284 if (error != 0) {
1285 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1286 vnode_rele(vp);
1287 goto out;
1288 }
1289
1290 out:
1291 if (error != 0) {
1292 mp->mnt_vnodecovered = NULLVP;
1293 }
1294 return error;
1295 }
1296
1297 static void
1298 undo_place_on_covered_vp(mount_t mp, vnode_t vp)
1299 {
1300 vnode_rele(vp);
1301 vnode_lock_spin(vp);
1302 vp->v_mountedhere = (mount_t)NULL;
1303 vnode_unlock(vp);
1304
1305 mp->mnt_vnodecovered = NULLVP;
1306 }
1307
1308 static int
1309 mount_begin_update(mount_t mp, vfs_context_t ctx, int flags)
1310 {
1311 int error;
1312
1313 /* unmount in progress return error */
1314 mount_lock_spin(mp);
1315 if (mp->mnt_lflag & MNT_LUNMOUNT) {
1316 mount_unlock(mp);
1317 return EBUSY;
1318 }
1319 mount_unlock(mp);
1320 lck_rw_lock_exclusive(&mp->mnt_rwlock);
1321
1322 /*
1323 * We only allow the filesystem to be reloaded if it
1324 * is currently mounted read-only.
1325 */
1326 if ((flags & MNT_RELOAD) &&
1327 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
1328 error = ENOTSUP;
1329 goto out;
1330 }
1331
1332 /*
1333 * Only root, or the user that did the original mount is
1334 * permitted to update it.
1335 */
1336 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
1337 (!vfs_context_issuser(ctx))) {
1338 error = EPERM;
1339 goto out;
1340 }
1341 #if CONFIG_MACF
1342 error = mac_mount_check_remount(ctx, mp);
1343 if (error != 0) {
1344 goto out;
1345 }
1346 #endif
1347
1348 out:
1349 if (error) {
1350 lck_rw_done(&mp->mnt_rwlock);
1351 }
1352
1353 return error;
1354 }
1355
1356 static void
1357 mount_end_update(mount_t mp)
1358 {
1359 lck_rw_done(&mp->mnt_rwlock);
1360 }
1361
1362 static int
1363 get_imgsrc_rootvnode(uint32_t height, vnode_t *rvpp)
1364 {
1365 vnode_t vp;
1366
1367 if (height >= MAX_IMAGEBOOT_NESTING) {
1368 return EINVAL;
1369 }
1370
1371 vp = imgsrc_rootvnodes[height];
1372 if ((vp != NULLVP) && (vnode_get(vp) == 0)) {
1373 *rvpp = vp;
1374 return 0;
1375 } else {
1376 return ENOENT;
1377 }
1378 }
1379
1380 static int
1381 relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp,
1382 const char *fsname, vfs_context_t ctx,
1383 boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index)
1384 {
1385 int error;
1386 mount_t mp;
1387 boolean_t placed = FALSE;
1388 vnode_t devvp = NULLVP;
1389 struct vfstable *vfsp;
1390 user_addr_t devpath;
1391 char *old_mntonname;
1392 vnode_t rvp;
1393 uint32_t height;
1394 uint32_t flags;
1395
1396 /* If we didn't imageboot, nothing to move */
1397 if (imgsrc_rootvnodes[0] == NULLVP) {
1398 return EINVAL;
1399 }
1400
1401 /* Only root can do this */
1402 if (!vfs_context_issuser(ctx)) {
1403 return EPERM;
1404 }
1405
1406 IMGSRC_DEBUG("looking for root vnode.\n");
1407
1408 /*
1409 * Get root vnode of filesystem we're moving.
1410 */
1411 if (by_index) {
1412 if (is64bit) {
1413 struct user64_mnt_imgsrc_args mia64;
1414 error = copyin(fsmountargs, &mia64, sizeof(mia64));
1415 if (error != 0) {
1416 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1417 return error;
1418 }
1419
1420 height = mia64.mi_height;
1421 flags = mia64.mi_flags;
1422 devpath = mia64.mi_devpath;
1423 } else {
1424 struct user32_mnt_imgsrc_args mia32;
1425 error = copyin(fsmountargs, &mia32, sizeof(mia32));
1426 if (error != 0) {
1427 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1428 return error;
1429 }
1430
1431 height = mia32.mi_height;
1432 flags = mia32.mi_flags;
1433 devpath = mia32.mi_devpath;
1434 }
1435 } else {
1436 /*
1437 * For binary compatibility--assumes one level of nesting.
1438 */
1439 if (is64bit) {
1440 if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
1441 return error;
1442 } else {
1443 user32_addr_t tmp;
1444 if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
1445 return error;
1446
1447 /* munge into LP64 addr */
1448 devpath = CAST_USER_ADDR_T(tmp);
1449 }
1450
1451 height = 0;
1452 flags = 0;
1453 }
1454
1455 if (flags != 0) {
1456 IMGSRC_DEBUG("%s: Got nonzero flags.\n", __FUNCTION__);
1457 return EINVAL;
1458 }
1459
1460 error = get_imgsrc_rootvnode(height, &rvp);
1461 if (error != 0) {
1462 IMGSRC_DEBUG("getting root vnode failed with %d\n", error);
1463 return error;
1464 }
1465
1466 IMGSRC_DEBUG("got root vnode.\n");
1467
1468 MALLOC(old_mntonname, char*, MAXPATHLEN, M_TEMP, M_WAITOK);
1469
1470 /* Can only move once */
1471 mp = vnode_mount(rvp);
1472 if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
1473 IMGSRC_DEBUG("Already moved.\n");
1474 error = EBUSY;
1475 goto out0;
1476 }
1477
1478 IMGSRC_DEBUG("Starting updated.\n");
1479
1480 /* Get exclusive rwlock on mount, authorize update on mp */
1481 error = mount_begin_update(mp , ctx, 0);
1482 if (error != 0) {
1483 IMGSRC_DEBUG("Starting updated failed with %d\n", error);
1484 goto out0;
1485 }
1486
1487 /*
1488 * It can only be moved once. Flag is set under the rwlock,
1489 * so we're now safe to proceed.
1490 */
1491 if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
1492 IMGSRC_DEBUG("Already moved [2]\n");
1493 goto out1;
1494 }
1495
1496
1497 IMGSRC_DEBUG("Preparing coveredvp.\n");
1498
1499 /* Mark covered vnode as mount in progress, authorize placing mount on top */
1500 error = prepare_coveredvp(vp, ctx, cnp, fsname, FALSE);
1501 if (error != 0) {
1502 IMGSRC_DEBUG("Preparing coveredvp failed with %d.\n", error);
1503 goto out1;
1504 }
1505
1506 IMGSRC_DEBUG("Covered vp OK.\n");
1507
1508 /* Sanity check the name caller has provided */
1509 vfsp = mp->mnt_vtable;
1510 if (strncmp(vfsp->vfc_name, fsname, MFSNAMELEN) != 0) {
1511 IMGSRC_DEBUG("Wrong fs name.\n");
1512 error = EINVAL;
1513 goto out2;
1514 }
1515
1516 /* Check the device vnode and update mount-from name, for local filesystems */
1517 if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
1518 IMGSRC_DEBUG("Local, doing device validation.\n");
1519
1520 if (devpath != USER_ADDR_NULL) {
1521 error = authorize_devpath_and_update_mntfromname(mp, devpath, &devvp, ctx);
1522 if (error) {
1523 IMGSRC_DEBUG("authorize_devpath_and_update_mntfromname() failed.\n");
1524 goto out2;
1525 }
1526
1527 vnode_put(devvp);
1528 }
1529 }
1530
1531 /*
1532 * Place mp on top of vnode, ref the vnode, call checkdirs(),
1533 * and increment the name cache's mount generation
1534 */
1535
1536 IMGSRC_DEBUG("About to call place_mount_and_checkdirs().\n");
1537 error = place_mount_and_checkdirs(mp, vp, ctx);
1538 if (error != 0) {
1539 goto out2;
1540 }
1541
1542 placed = TRUE;
1543
1544 strlcpy(old_mntonname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN);
1545 strlcpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
1546
1547 /* Forbid future moves */
1548 mount_lock(mp);
1549 mp->mnt_kern_flag |= MNTK_HAS_MOVED;
1550 mount_unlock(mp);
1551
1552 /* Finally, add to mount list, completely ready to go */
1553 if (mount_list_add(mp) != 0) {
1554 /*
1555 * The system is shutting down trying to umount
1556 * everything, so fail with a plausible errno.
1557 */
1558 error = EBUSY;
1559 goto out3;
1560 }
1561
1562 mount_end_update(mp);
1563 vnode_put(rvp);
1564 FREE(old_mntonname, M_TEMP);
1565
1566 vfs_notify_mount(pvp);
1567
1568 return 0;
1569 out3:
1570 strlcpy(mp->mnt_vfsstat.f_mntonname, old_mntonname, MAXPATHLEN);
1571
1572 mount_lock(mp);
1573 mp->mnt_kern_flag &= ~(MNTK_HAS_MOVED);
1574 mount_unlock(mp);
1575
1576 out2:
1577 /*
1578 * Placing the mp on the vnode clears VMOUNT,
1579 * so cleanup is different after that point
1580 */
1581 if (placed) {
1582 /* Rele the vp, clear VMOUNT and v_mountedhere */
1583 undo_place_on_covered_vp(mp, vp);
1584 } else {
1585 vnode_lock_spin(vp);
1586 CLR(vp->v_flag, VMOUNT);
1587 vnode_unlock(vp);
1588 }
1589 out1:
1590 mount_end_update(mp);
1591
1592 out0:
1593 vnode_put(rvp);
1594 FREE(old_mntonname, M_TEMP);
1595 return error;
1596 }
1597
1598 #endif /* CONFIG_IMGSRC_ACCESS */
1599
1600 void
1601 enablequotas(struct mount *mp, vfs_context_t ctx)
1602 {
1603 struct nameidata qnd;
1604 int type;
1605 char qfpath[MAXPATHLEN];
1606 const char *qfname = QUOTAFILENAME;
1607 const char *qfopsname = QUOTAOPSNAME;
1608 const char *qfextension[] = INITQFNAMES;
1609
1610 /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
1611 if (strncmp(mp->mnt_vfsstat.f_fstypename, "hfs", sizeof("hfs")) != 0 ) {
1612 return;
1613 }
1614 /*
1615 * Enable filesystem disk quotas if necessary.
1616 * We ignore errors as this should not interfere with final mount
1617 */
1618 for (type=0; type < MAXQUOTAS; type++) {
1619 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfopsname, qfextension[type]);
1620 NDINIT(&qnd, LOOKUP, OP_MOUNT, FOLLOW, UIO_SYSSPACE,
1621 CAST_USER_ADDR_T(qfpath), ctx);
1622 if (namei(&qnd) != 0)
1623 continue; /* option file to trigger quotas is not present */
1624 vnode_put(qnd.ni_vp);
1625 nameidone(&qnd);
1626 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfname, qfextension[type]);
1627
1628 (void) VFS_QUOTACTL(mp, QCMD(Q_QUOTAON, type), 0, qfpath, ctx);
1629 }
1630 return;
1631 }
1632
1633
1634 static int
1635 checkdirs_callback(proc_t p, void * arg)
1636 {
1637 struct cdirargs * cdrp = (struct cdirargs * )arg;
1638 vnode_t olddp = cdrp->olddp;
1639 vnode_t newdp = cdrp->newdp;
1640 struct filedesc *fdp;
1641 vnode_t tvp;
1642 vnode_t fdp_cvp;
1643 vnode_t fdp_rvp;
1644 int cdir_changed = 0;
1645 int rdir_changed = 0;
1646
1647 /*
1648 * XXX Also needs to iterate each thread in the process to see if it
1649 * XXX is using a per-thread current working directory, and, if so,
1650 * XXX update that as well.
1651 */
1652
1653 proc_fdlock(p);
1654 fdp = p->p_fd;
1655 if (fdp == (struct filedesc *)0) {
1656 proc_fdunlock(p);
1657 return(PROC_RETURNED);
1658 }
1659 fdp_cvp = fdp->fd_cdir;
1660 fdp_rvp = fdp->fd_rdir;
1661 proc_fdunlock(p);
1662
1663 if (fdp_cvp == olddp) {
1664 vnode_ref(newdp);
1665 tvp = fdp->fd_cdir;
1666 fdp_cvp = newdp;
1667 cdir_changed = 1;
1668 vnode_rele(tvp);
1669 }
1670 if (fdp_rvp == olddp) {
1671 vnode_ref(newdp);
1672 tvp = fdp->fd_rdir;
1673 fdp_rvp = newdp;
1674 rdir_changed = 1;
1675 vnode_rele(tvp);
1676 }
1677 if (cdir_changed || rdir_changed) {
1678 proc_fdlock(p);
1679 fdp->fd_cdir = fdp_cvp;
1680 fdp->fd_rdir = fdp_rvp;
1681 proc_fdunlock(p);
1682 }
1683 return(PROC_RETURNED);
1684 }
1685
1686
1687
1688 /*
1689 * Scan all active processes to see if any of them have a current
1690 * or root directory onto which the new filesystem has just been
1691 * mounted. If so, replace them with the new mount point.
1692 */
1693 static int
1694 checkdirs(vnode_t olddp, vfs_context_t ctx)
1695 {
1696 vnode_t newdp;
1697 vnode_t tvp;
1698 int err;
1699 struct cdirargs cdr;
1700
1701 if (olddp->v_usecount == 1)
1702 return(0);
1703 err = VFS_ROOT(olddp->v_mountedhere, &newdp, ctx);
1704
1705 if (err != 0) {
1706 #if DIAGNOSTIC
1707 panic("mount: lost mount: error %d", err);
1708 #endif
1709 return(err);
1710 }
1711
1712 cdr.olddp = olddp;
1713 cdr.newdp = newdp;
1714 /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
1715 proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS, checkdirs_callback, (void *)&cdr, NULL, NULL);
1716
1717 if (rootvnode == olddp) {
1718 vnode_ref(newdp);
1719 tvp = rootvnode;
1720 rootvnode = newdp;
1721 vnode_rele(tvp);
1722 }
1723
1724 vnode_put(newdp);
1725 return(0);
1726 }
1727
1728 /*
1729 * Unmount a file system.
1730 *
1731 * Note: unmount takes a path to the vnode mounted on as argument,
1732 * not special file (as before).
1733 */
1734 /* ARGSUSED */
1735 int
1736 unmount(__unused proc_t p, struct unmount_args *uap, __unused int32_t *retval)
1737 {
1738 vnode_t vp;
1739 struct mount *mp;
1740 int error;
1741 struct nameidata nd;
1742 vfs_context_t ctx = vfs_context_current();
1743
1744 NDINIT(&nd, LOOKUP, OP_UNMOUNT, FOLLOW | AUDITVNPATH1,
1745 UIO_USERSPACE, uap->path, ctx);
1746 error = namei(&nd);
1747 if (error)
1748 return (error);
1749 vp = nd.ni_vp;
1750 mp = vp->v_mount;
1751 nameidone(&nd);
1752
1753 #if CONFIG_MACF
1754 error = mac_mount_check_umount(ctx, mp);
1755 if (error != 0) {
1756 vnode_put(vp);
1757 return (error);
1758 }
1759 #endif
1760 /*
1761 * Must be the root of the filesystem
1762 */
1763 if ((vp->v_flag & VROOT) == 0) {
1764 vnode_put(vp);
1765 return (EINVAL);
1766 }
1767 mount_ref(mp, 0);
1768 vnode_put(vp);
1769 /* safedounmount consumes the mount ref */
1770 return (safedounmount(mp, uap->flags, ctx));
1771 }
1772
1773 int
1774 vfs_unmountbyfsid(fsid_t * fsid, int flags, vfs_context_t ctx)
1775 {
1776 mount_t mp;
1777
1778 mp = mount_list_lookupby_fsid(fsid, 0, 1);
1779 if (mp == (mount_t)0) {
1780 return(ENOENT);
1781 }
1782 mount_ref(mp, 0);
1783 mount_iterdrop(mp);
1784 /* safedounmount consumes the mount ref */
1785 return(safedounmount(mp, flags, ctx));
1786 }
1787
1788
1789 /*
1790 * The mount struct comes with a mount ref which will be consumed.
1791 * Do the actual file system unmount, prevent some common foot shooting.
1792 */
1793 int
1794 safedounmount(struct mount *mp, int flags, vfs_context_t ctx)
1795 {
1796 int error;
1797 proc_t p = vfs_context_proc(ctx);
1798
1799 /*
1800 * If the file system is not responding and MNT_NOBLOCK
1801 * is set and not a forced unmount then return EBUSY.
1802 */
1803 if ((mp->mnt_kern_flag & MNT_LNOTRESP) &&
1804 (flags & MNT_NOBLOCK) && ((flags & MNT_FORCE) == 0)) {
1805 error = EBUSY;
1806 goto out;
1807 }
1808
1809 /*
1810 * Skip authorization if the mount is tagged as permissive and
1811 * this is not a forced-unmount attempt.
1812 */
1813 if (!(((mp->mnt_kern_flag & MNTK_PERMIT_UNMOUNT) != 0) && ((flags & MNT_FORCE) == 0))) {
1814 /*
1815 * Only root, or the user that did the original mount is
1816 * permitted to unmount this filesystem.
1817 */
1818 if ((mp->mnt_vfsstat.f_owner != kauth_cred_getuid(kauth_cred_get())) &&
1819 (error = suser(kauth_cred_get(), &p->p_acflag)))
1820 goto out;
1821 }
1822 /*
1823 * Don't allow unmounting the root file system.
1824 */
1825 if (mp->mnt_flag & MNT_ROOTFS) {
1826 error = EBUSY; /* the root is always busy */
1827 goto out;
1828 }
1829
1830 #ifdef CONFIG_IMGSRC_ACCESS
1831 if (mp->mnt_kern_flag & MNTK_BACKS_ROOT) {
1832 error = EBUSY;
1833 goto out;
1834 }
1835 #endif /* CONFIG_IMGSRC_ACCESS */
1836
1837 return (dounmount(mp, flags, 1, ctx));
1838
1839 out:
1840 mount_drop(mp, 0);
1841 return(error);
1842 }
1843
1844 /*
1845 * Do the actual file system unmount.
1846 */
1847 int
1848 dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx)
1849 {
1850 vnode_t coveredvp = (vnode_t)0;
1851 int error;
1852 int needwakeup = 0;
1853 int forcedunmount = 0;
1854 int lflags = 0;
1855 struct vnode *devvp = NULLVP;
1856 #if CONFIG_TRIGGERS
1857 proc_t p = vfs_context_proc(ctx);
1858 int did_vflush = 0;
1859 int pflags_save = 0;
1860 #endif /* CONFIG_TRIGGERS */
1861
1862 mount_lock(mp);
1863
1864 /*
1865 * If already an unmount in progress just return EBUSY.
1866 * Even a forced unmount cannot override.
1867 */
1868 if (mp->mnt_lflag & MNT_LUNMOUNT) {
1869 if (withref != 0)
1870 mount_drop(mp, 1);
1871 mount_unlock(mp);
1872 return (EBUSY);
1873 }
1874
1875 if (flags & MNT_FORCE) {
1876 forcedunmount = 1;
1877 mp->mnt_lflag |= MNT_LFORCE;
1878 }
1879
1880 #if CONFIG_TRIGGERS
1881 if (flags & MNT_NOBLOCK && p != kernproc)
1882 pflags_save = OSBitOrAtomic(P_NOREMOTEHANG, &p->p_flag);
1883 #endif
1884
1885 mp->mnt_kern_flag |= MNTK_UNMOUNT;
1886 mp->mnt_lflag |= MNT_LUNMOUNT;
1887 mp->mnt_flag &=~ MNT_ASYNC;
1888 /*
1889 * anyone currently in the fast path that
1890 * trips over the cached rootvp will be
1891 * dumped out and forced into the slow path
1892 * to regenerate a new cached value
1893 */
1894 mp->mnt_realrootvp = NULLVP;
1895 mount_unlock(mp);
1896
1897 if (forcedunmount && (flags & MNT_LNOSUB) == 0) {
1898 /*
1899 * Force unmount any mounts in this filesystem.
1900 * If any unmounts fail - just leave them dangling.
1901 * Avoids recursion.
1902 */
1903 (void) dounmount_submounts(mp, flags | MNT_LNOSUB, ctx);
1904 }
1905
1906 /*
1907 * taking the name_cache_lock exclusively will
1908 * insure that everyone is out of the fast path who
1909 * might be trying to use a now stale copy of
1910 * vp->v_mountedhere->mnt_realrootvp
1911 * bumping mount_generation causes the cached values
1912 * to be invalidated
1913 */
1914 name_cache_lock();
1915 mount_generation++;
1916 name_cache_unlock();
1917
1918
1919 lck_rw_lock_exclusive(&mp->mnt_rwlock);
1920 if (withref != 0)
1921 mount_drop(mp, 0);
1922 #if CONFIG_FSE
1923 fsevent_unmount(mp); /* has to come first! */
1924 #endif
1925 error = 0;
1926 if (forcedunmount == 0) {
1927 ubc_umount(mp); /* release cached vnodes */
1928 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
1929 error = VFS_SYNC(mp, MNT_WAIT, ctx);
1930 if (error) {
1931 mount_lock(mp);
1932 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1933 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1934 mp->mnt_lflag &= ~MNT_LFORCE;
1935 goto out;
1936 }
1937 }
1938 }
1939
1940 IOBSDMountChange(mp, kIOMountChangeUnmount);
1941
1942 #if CONFIG_TRIGGERS
1943 vfs_nested_trigger_unmounts(mp, flags, ctx);
1944 did_vflush = 1;
1945 #endif
1946 if (forcedunmount)
1947 lflags |= FORCECLOSE;
1948 error = vflush(mp, NULLVP, SKIPSWAP | SKIPSYSTEM | SKIPROOT | lflags);
1949 if ((forcedunmount == 0) && error) {
1950 mount_lock(mp);
1951 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1952 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1953 mp->mnt_lflag &= ~MNT_LFORCE;
1954 goto out;
1955 }
1956
1957 /* make sure there are no one in the mount iterations or lookup */
1958 mount_iterdrain(mp);
1959
1960 error = VFS_UNMOUNT(mp, flags, ctx);
1961 if (error) {
1962 mount_iterreset(mp);
1963 mount_lock(mp);
1964 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1965 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1966 mp->mnt_lflag &= ~MNT_LFORCE;
1967 goto out;
1968 }
1969
1970 /* increment the operations count */
1971 if (!error)
1972 OSAddAtomic(1, &vfs_nummntops);
1973
1974 if ( mp->mnt_devvp && mp->mnt_vtable->vfc_vfsflags & VFC_VFSLOCALARGS) {
1975 /* hold an io reference and drop the usecount before close */
1976 devvp = mp->mnt_devvp;
1977 vnode_getalways(devvp);
1978 vnode_rele(devvp);
1979 VNOP_CLOSE(devvp, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
1980 ctx);
1981 vnode_clearmountedon(devvp);
1982 vnode_put(devvp);
1983 }
1984 lck_rw_done(&mp->mnt_rwlock);
1985 mount_list_remove(mp);
1986 lck_rw_lock_exclusive(&mp->mnt_rwlock);
1987
1988 /* mark the mount point hook in the vp but not drop the ref yet */
1989 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
1990 /*
1991 * The covered vnode needs special handling. Trying to get an
1992 * iocount must not block here as this may lead to deadlocks
1993 * if the Filesystem to which the covered vnode belongs is
1994 * undergoing forced unmounts. Since we hold a usecount, the
1995 * vnode cannot be reused (it can, however, still be terminated)
1996 */
1997 vnode_getalways(coveredvp);
1998 vnode_lock_spin(coveredvp);
1999
2000 mp->mnt_crossref++;
2001 coveredvp->v_mountedhere = (struct mount *)0;
2002 CLR(coveredvp->v_flag, VMOUNT);
2003
2004 vnode_unlock(coveredvp);
2005 vnode_put(coveredvp);
2006 }
2007
2008 mount_list_lock();
2009 mp->mnt_vtable->vfc_refcount--;
2010 mount_list_unlock();
2011
2012 cache_purgevfs(mp); /* remove cache entries for this file sys */
2013 vfs_event_signal(NULL, VQ_UNMOUNT, (intptr_t)NULL);
2014 mount_lock(mp);
2015 mp->mnt_lflag |= MNT_LDEAD;
2016
2017 if (mp->mnt_lflag & MNT_LWAIT) {
2018 /*
2019 * do the wakeup here
2020 * in case we block in mount_refdrain
2021 * which will drop the mount lock
2022 * and allow anyone blocked in vfs_busy
2023 * to wakeup and see the LDEAD state
2024 */
2025 mp->mnt_lflag &= ~MNT_LWAIT;
2026 wakeup((caddr_t)mp);
2027 }
2028 mount_refdrain(mp);
2029 out:
2030 if (mp->mnt_lflag & MNT_LWAIT) {
2031 mp->mnt_lflag &= ~MNT_LWAIT;
2032 needwakeup = 1;
2033 }
2034
2035 #if CONFIG_TRIGGERS
2036 if (flags & MNT_NOBLOCK && p != kernproc) {
2037 // Restore P_NOREMOTEHANG bit to its previous value
2038 if ((pflags_save & P_NOREMOTEHANG) == 0)
2039 OSBitAndAtomic(~((uint32_t) P_NOREMOTEHANG), &p->p_flag);
2040 }
2041
2042 /*
2043 * Callback and context are set together under the mount lock, and
2044 * never cleared, so we're safe to examine them here, drop the lock,
2045 * and call out.
2046 */
2047 if (mp->mnt_triggercallback != NULL) {
2048 mount_unlock(mp);
2049 if (error == 0) {
2050 mp->mnt_triggercallback(mp, VTC_RELEASE, mp->mnt_triggerdata, ctx);
2051 } else if (did_vflush) {
2052 mp->mnt_triggercallback(mp, VTC_REPLACE, mp->mnt_triggerdata, ctx);
2053 }
2054 } else {
2055 mount_unlock(mp);
2056 }
2057 #else
2058 mount_unlock(mp);
2059 #endif /* CONFIG_TRIGGERS */
2060
2061 lck_rw_done(&mp->mnt_rwlock);
2062
2063 if (needwakeup)
2064 wakeup((caddr_t)mp);
2065
2066 if (!error) {
2067 if ((coveredvp != NULLVP)) {
2068 vnode_t pvp = NULLVP;
2069
2070 /*
2071 * The covered vnode needs special handling. Trying to
2072 * get an iocount must not block here as this may lead
2073 * to deadlocks if the Filesystem to which the covered
2074 * vnode belongs is undergoing forced unmounts. Since we
2075 * hold a usecount, the vnode cannot be reused
2076 * (it can, however, still be terminated).
2077 */
2078 vnode_getalways(coveredvp);
2079
2080 mount_dropcrossref(mp, coveredvp, 0);
2081 /*
2082 * We'll _try_ to detect if this really needs to be
2083 * done. The coveredvp can only be in termination (or
2084 * terminated) if the coveredvp's mount point is in a
2085 * forced unmount (or has been) since we still hold the
2086 * ref.
2087 */
2088 if (!vnode_isrecycled(coveredvp)) {
2089 pvp = vnode_getparent(coveredvp);
2090 #if CONFIG_TRIGGERS
2091 if (coveredvp->v_resolve) {
2092 vnode_trigger_rearm(coveredvp, ctx);
2093 }
2094 #endif
2095 }
2096
2097 vnode_rele(coveredvp);
2098 vnode_put(coveredvp);
2099 coveredvp = NULLVP;
2100
2101 if (pvp) {
2102 lock_vnode_and_post(pvp, NOTE_WRITE);
2103 vnode_put(pvp);
2104 }
2105 } else if (mp->mnt_flag & MNT_ROOTFS) {
2106 mount_lock_destroy(mp);
2107 #if CONFIG_MACF
2108 mac_mount_label_destroy(mp);
2109 #endif
2110 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
2111 } else
2112 panic("dounmount: no coveredvp");
2113 }
2114 return (error);
2115 }
2116
2117 /*
2118 * Unmount any mounts in this filesystem.
2119 */
2120 void
2121 dounmount_submounts(struct mount *mp, int flags, vfs_context_t ctx)
2122 {
2123 mount_t smp;
2124 fsid_t *fsids, fsid;
2125 int fsids_sz;
2126 int count = 0, i, m = 0;
2127 vnode_t vp;
2128
2129 mount_list_lock();
2130
2131 // Get an array to hold the submounts fsids.
2132 TAILQ_FOREACH(smp, &mountlist, mnt_list)
2133 count++;
2134 fsids_sz = count * sizeof(fsid_t);
2135 MALLOC(fsids, fsid_t *, fsids_sz, M_TEMP, M_NOWAIT);
2136 if (fsids == NULL) {
2137 mount_list_unlock();
2138 goto out;
2139 }
2140 fsids[0] = mp->mnt_vfsstat.f_fsid; // Prime the pump
2141
2142 /*
2143 * Fill the array with submount fsids.
2144 * Since mounts are always added to the tail of the mount list, the
2145 * list is always in mount order.
2146 * For each mount check if the mounted-on vnode belongs to a
2147 * mount that's already added to our array of mounts to be unmounted.
2148 */
2149 for (smp = TAILQ_NEXT(mp, mnt_list); smp; smp = TAILQ_NEXT(smp, mnt_list)) {
2150 vp = smp->mnt_vnodecovered;
2151 if (vp == NULL)
2152 continue;
2153 fsid = vnode_mount(vp)->mnt_vfsstat.f_fsid; // Underlying fsid
2154 for (i = 0; i <= m; i++) {
2155 if (fsids[i].val[0] == fsid.val[0] &&
2156 fsids[i].val[1] == fsid.val[1]) {
2157 fsids[++m] = smp->mnt_vfsstat.f_fsid;
2158 break;
2159 }
2160 }
2161 }
2162 mount_list_unlock();
2163
2164 // Unmount the submounts in reverse order. Ignore errors.
2165 for (i = m; i > 0; i--) {
2166 smp = mount_list_lookupby_fsid(&fsids[i], 0, 1);
2167 if (smp) {
2168 mount_ref(smp, 0);
2169 mount_iterdrop(smp);
2170 (void) dounmount(smp, flags, 1, ctx);
2171 }
2172 }
2173 out:
2174 if (fsids)
2175 FREE(fsids, M_TEMP);
2176 }
2177
2178 void
2179 mount_dropcrossref(mount_t mp, vnode_t dp, int need_put)
2180 {
2181 vnode_lock(dp);
2182 mp->mnt_crossref--;
2183
2184 if (mp->mnt_crossref < 0)
2185 panic("mount cross refs -ve");
2186
2187 if ((mp != dp->v_mountedhere) && (mp->mnt_crossref == 0)) {
2188
2189 if (need_put)
2190 vnode_put_locked(dp);
2191 vnode_unlock(dp);
2192
2193 mount_lock_destroy(mp);
2194 #if CONFIG_MACF
2195 mac_mount_label_destroy(mp);
2196 #endif
2197 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
2198 return;
2199 }
2200 if (need_put)
2201 vnode_put_locked(dp);
2202 vnode_unlock(dp);
2203 }
2204
2205
2206 /*
2207 * Sync each mounted filesystem.
2208 */
2209 #if DIAGNOSTIC
2210 int syncprt = 0;
2211 #endif
2212
2213 int print_vmpage_stat=0;
2214 int sync_timeout = 60; // Sync time limit (sec)
2215
2216 static int
2217 sync_callback(mount_t mp, __unused void *arg)
2218 {
2219 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
2220 int asyncflag = mp->mnt_flag & MNT_ASYNC;
2221
2222 mp->mnt_flag &= ~MNT_ASYNC;
2223 VFS_SYNC(mp, arg ? MNT_WAIT : MNT_NOWAIT, vfs_context_kernel());
2224 if (asyncflag)
2225 mp->mnt_flag |= MNT_ASYNC;
2226 }
2227
2228 return (VFS_RETURNED);
2229 }
2230
2231 /* ARGSUSED */
2232 int
2233 sync(__unused proc_t p, __unused struct sync_args *uap, __unused int32_t *retval)
2234 {
2235 vfs_iterate(LK_NOWAIT, sync_callback, NULL);
2236
2237 if (print_vmpage_stat) {
2238 vm_countdirtypages();
2239 }
2240
2241 #if DIAGNOSTIC
2242 if (syncprt)
2243 vfs_bufstats();
2244 #endif /* DIAGNOSTIC */
2245 return 0;
2246 }
2247
2248 static void
2249 sync_thread(void *arg, __unused wait_result_t wr)
2250 {
2251 int *timeout = (int *) arg;
2252
2253 vfs_iterate(LK_NOWAIT, sync_callback, NULL);
2254
2255 if (timeout)
2256 wakeup((caddr_t) timeout);
2257 if (print_vmpage_stat) {
2258 vm_countdirtypages();
2259 }
2260
2261 #if DIAGNOSTIC
2262 if (syncprt)
2263 vfs_bufstats();
2264 #endif /* DIAGNOSTIC */
2265 }
2266
2267 /*
2268 * Sync in a separate thread so we can time out if it blocks.
2269 */
2270 static int
2271 sync_async(int timeout)
2272 {
2273 thread_t thd;
2274 int error;
2275 struct timespec ts = {timeout, 0};
2276
2277 lck_mtx_lock(sync_mtx_lck);
2278 if (kernel_thread_start(sync_thread, &timeout, &thd) != KERN_SUCCESS) {
2279 printf("sync_thread failed\n");
2280 lck_mtx_unlock(sync_mtx_lck);
2281 return (0);
2282 }
2283
2284 error = msleep((caddr_t) &timeout, sync_mtx_lck, (PVFS | PDROP | PCATCH), "sync_thread", &ts);
2285 if (error) {
2286 printf("sync timed out: %d sec\n", timeout);
2287 }
2288 thread_deallocate(thd);
2289
2290 return (0);
2291 }
2292
2293 /*
2294 * An in-kernel sync for power management to call.
2295 */
2296 __private_extern__ int
2297 sync_internal(void)
2298 {
2299 (void) sync_async(sync_timeout);
2300
2301 return 0;
2302 } /* end of sync_internal call */
2303
2304 /*
2305 * Change filesystem quotas.
2306 */
2307 #if QUOTA
2308 int
2309 quotactl(proc_t p, struct quotactl_args *uap, __unused int32_t *retval)
2310 {
2311 struct mount *mp;
2312 int error, quota_cmd, quota_status;
2313 caddr_t datap;
2314 size_t fnamelen;
2315 struct nameidata nd;
2316 vfs_context_t ctx = vfs_context_current();
2317 struct dqblk my_dqblk;
2318
2319 AUDIT_ARG(uid, uap->uid);
2320 AUDIT_ARG(cmd, uap->cmd);
2321 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
2322 uap->path, ctx);
2323 error = namei(&nd);
2324 if (error)
2325 return (error);
2326 mp = nd.ni_vp->v_mount;
2327 vnode_put(nd.ni_vp);
2328 nameidone(&nd);
2329
2330 /* copyin any data we will need for downstream code */
2331 quota_cmd = uap->cmd >> SUBCMDSHIFT;
2332
2333 switch (quota_cmd) {
2334 case Q_QUOTAON:
2335 /* uap->arg specifies a file from which to take the quotas */
2336 fnamelen = MAXPATHLEN;
2337 datap = kalloc(MAXPATHLEN);
2338 error = copyinstr(uap->arg, datap, MAXPATHLEN, &fnamelen);
2339 break;
2340 case Q_GETQUOTA:
2341 /* uap->arg is a pointer to a dqblk structure. */
2342 datap = (caddr_t) &my_dqblk;
2343 break;
2344 case Q_SETQUOTA:
2345 case Q_SETUSE:
2346 /* uap->arg is a pointer to a dqblk structure. */
2347 datap = (caddr_t) &my_dqblk;
2348 if (proc_is64bit(p)) {
2349 struct user_dqblk my_dqblk64;
2350 error = copyin(uap->arg, (caddr_t)&my_dqblk64, sizeof (my_dqblk64));
2351 if (error == 0) {
2352 munge_dqblk(&my_dqblk, &my_dqblk64, FALSE);
2353 }
2354 }
2355 else {
2356 error = copyin(uap->arg, (caddr_t)&my_dqblk, sizeof (my_dqblk));
2357 }
2358 break;
2359 case Q_QUOTASTAT:
2360 /* uap->arg is a pointer to an integer */
2361 datap = (caddr_t) &quota_status;
2362 break;
2363 default:
2364 datap = NULL;
2365 break;
2366 } /* switch */
2367
2368 if (error == 0) {
2369 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, datap, ctx);
2370 }
2371
2372 switch (quota_cmd) {
2373 case Q_QUOTAON:
2374 if (datap != NULL)
2375 kfree(datap, MAXPATHLEN);
2376 break;
2377 case Q_GETQUOTA:
2378 /* uap->arg is a pointer to a dqblk structure we need to copy out to */
2379 if (error == 0) {
2380 if (proc_is64bit(p)) {
2381 struct user_dqblk my_dqblk64 = {.dqb_bhardlimit = 0};
2382 munge_dqblk(&my_dqblk, &my_dqblk64, TRUE);
2383 error = copyout((caddr_t)&my_dqblk64, uap->arg, sizeof (my_dqblk64));
2384 }
2385 else {
2386 error = copyout(datap, uap->arg, sizeof (struct dqblk));
2387 }
2388 }
2389 break;
2390 case Q_QUOTASTAT:
2391 /* uap->arg is a pointer to an integer */
2392 if (error == 0) {
2393 error = copyout(datap, uap->arg, sizeof(quota_status));
2394 }
2395 break;
2396 default:
2397 break;
2398 } /* switch */
2399
2400 return (error);
2401 }
2402 #else
2403 int
2404 quotactl(__unused proc_t p, __unused struct quotactl_args *uap, __unused int32_t *retval)
2405 {
2406 return (EOPNOTSUPP);
2407 }
2408 #endif /* QUOTA */
2409
2410 /*
2411 * Get filesystem statistics.
2412 *
2413 * Returns: 0 Success
2414 * namei:???
2415 * vfs_update_vfsstat:???
2416 * munge_statfs:EFAULT
2417 */
2418 /* ARGSUSED */
2419 int
2420 statfs(__unused proc_t p, struct statfs_args *uap, __unused int32_t *retval)
2421 {
2422 struct mount *mp;
2423 struct vfsstatfs *sp;
2424 int error;
2425 struct nameidata nd;
2426 vfs_context_t ctx = vfs_context_current();
2427 vnode_t vp;
2428
2429 NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1,
2430 UIO_USERSPACE, uap->path, ctx);
2431 error = namei(&nd);
2432 if (error)
2433 return (error);
2434 vp = nd.ni_vp;
2435 mp = vp->v_mount;
2436 sp = &mp->mnt_vfsstat;
2437 nameidone(&nd);
2438
2439 error = vfs_update_vfsstat(mp, ctx, VFS_USER_EVENT);
2440 if (error != 0) {
2441 vnode_put(vp);
2442 return (error);
2443 }
2444
2445 error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
2446 vnode_put(vp);
2447 return (error);
2448 }
2449
2450 /*
2451 * Get filesystem statistics.
2452 */
2453 /* ARGSUSED */
2454 int
2455 fstatfs(__unused proc_t p, struct fstatfs_args *uap, __unused int32_t *retval)
2456 {
2457 vnode_t vp;
2458 struct mount *mp;
2459 struct vfsstatfs *sp;
2460 int error;
2461
2462 AUDIT_ARG(fd, uap->fd);
2463
2464 if ( (error = file_vnode(uap->fd, &vp)) )
2465 return (error);
2466
2467 error = vnode_getwithref(vp);
2468 if (error) {
2469 file_drop(uap->fd);
2470 return (error);
2471 }
2472
2473 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2474
2475 mp = vp->v_mount;
2476 if (!mp) {
2477 error = EBADF;
2478 goto out;
2479 }
2480 sp = &mp->mnt_vfsstat;
2481 if ((error = vfs_update_vfsstat(mp,vfs_context_current(),VFS_USER_EVENT)) != 0) {
2482 goto out;
2483 }
2484
2485 error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
2486
2487 out:
2488 file_drop(uap->fd);
2489 vnode_put(vp);
2490
2491 return (error);
2492 }
2493
2494 /*
2495 * Common routine to handle copying of statfs64 data to user space
2496 */
2497 static int
2498 statfs64_common(struct mount *mp, struct vfsstatfs *sfsp, user_addr_t bufp)
2499 {
2500 int error;
2501 struct statfs64 sfs;
2502
2503 bzero(&sfs, sizeof(sfs));
2504
2505 sfs.f_bsize = sfsp->f_bsize;
2506 sfs.f_iosize = (int32_t)sfsp->f_iosize;
2507 sfs.f_blocks = sfsp->f_blocks;
2508 sfs.f_bfree = sfsp->f_bfree;
2509 sfs.f_bavail = sfsp->f_bavail;
2510 sfs.f_files = sfsp->f_files;
2511 sfs.f_ffree = sfsp->f_ffree;
2512 sfs.f_fsid = sfsp->f_fsid;
2513 sfs.f_owner = sfsp->f_owner;
2514 sfs.f_type = mp->mnt_vtable->vfc_typenum;
2515 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
2516 sfs.f_fssubtype = sfsp->f_fssubtype;
2517 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
2518 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN);
2519 } else {
2520 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSTYPENAMELEN);
2521 }
2522 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MAXPATHLEN);
2523 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MAXPATHLEN);
2524
2525 error = copyout((caddr_t)&sfs, bufp, sizeof(sfs));
2526
2527 return(error);
2528 }
2529
2530 /*
2531 * Get file system statistics in 64-bit mode
2532 */
2533 int
2534 statfs64(__unused struct proc *p, struct statfs64_args *uap, __unused int32_t *retval)
2535 {
2536 struct mount *mp;
2537 struct vfsstatfs *sp;
2538 int error;
2539 struct nameidata nd;
2540 vfs_context_t ctxp = vfs_context_current();
2541 vnode_t vp;
2542
2543 NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1,
2544 UIO_USERSPACE, uap->path, ctxp);
2545 error = namei(&nd);
2546 if (error)
2547 return (error);
2548 vp = nd.ni_vp;
2549 mp = vp->v_mount;
2550 sp = &mp->mnt_vfsstat;
2551 nameidone(&nd);
2552
2553 error = vfs_update_vfsstat(mp, ctxp, VFS_USER_EVENT);
2554 if (error != 0) {
2555 vnode_put(vp);
2556 return (error);
2557 }
2558
2559 error = statfs64_common(mp, sp, uap->buf);
2560 vnode_put(vp);
2561
2562 return (error);
2563 }
2564
2565 /*
2566 * Get file system statistics in 64-bit mode
2567 */
2568 int
2569 fstatfs64(__unused struct proc *p, struct fstatfs64_args *uap, __unused int32_t *retval)
2570 {
2571 struct vnode *vp;
2572 struct mount *mp;
2573 struct vfsstatfs *sp;
2574 int error;
2575
2576 AUDIT_ARG(fd, uap->fd);
2577
2578 if ( (error = file_vnode(uap->fd, &vp)) )
2579 return (error);
2580
2581 error = vnode_getwithref(vp);
2582 if (error) {
2583 file_drop(uap->fd);
2584 return (error);
2585 }
2586
2587 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2588
2589 mp = vp->v_mount;
2590 if (!mp) {
2591 error = EBADF;
2592 goto out;
2593 }
2594 sp = &mp->mnt_vfsstat;
2595 if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
2596 goto out;
2597 }
2598
2599 error = statfs64_common(mp, sp, uap->buf);
2600
2601 out:
2602 file_drop(uap->fd);
2603 vnode_put(vp);
2604
2605 return (error);
2606 }
2607
2608 struct getfsstat_struct {
2609 user_addr_t sfsp;
2610 user_addr_t *mp;
2611 int count;
2612 int maxcount;
2613 int flags;
2614 int error;
2615 };
2616
2617
2618 static int
2619 getfsstat_callback(mount_t mp, void * arg)
2620 {
2621
2622 struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
2623 struct vfsstatfs *sp;
2624 int error, my_size;
2625 vfs_context_t ctx = vfs_context_current();
2626
2627 if (fstp->sfsp && fstp->count < fstp->maxcount) {
2628 sp = &mp->mnt_vfsstat;
2629 /*
2630 * If MNT_NOWAIT is specified, do not refresh the
2631 * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT.
2632 */
2633 if (((fstp->flags & MNT_NOWAIT) == 0 || (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
2634 (error = vfs_update_vfsstat(mp, ctx,
2635 VFS_USER_EVENT))) {
2636 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
2637 return(VFS_RETURNED);
2638 }
2639
2640 /*
2641 * Need to handle LP64 version of struct statfs
2642 */
2643 error = munge_statfs(mp, sp, fstp->sfsp, &my_size, IS_64BIT_PROCESS(vfs_context_proc(ctx)), FALSE);
2644 if (error) {
2645 fstp->error = error;
2646 return(VFS_RETURNED_DONE);
2647 }
2648 fstp->sfsp += my_size;
2649
2650 if (fstp->mp) {
2651 #if CONFIG_MACF
2652 error = mac_mount_label_get(mp, *fstp->mp);
2653 if (error) {
2654 fstp->error = error;
2655 return(VFS_RETURNED_DONE);
2656 }
2657 #endif
2658 fstp->mp++;
2659 }
2660 }
2661 fstp->count++;
2662 return(VFS_RETURNED);
2663 }
2664
2665 /*
2666 * Get statistics on all filesystems.
2667 */
2668 int
2669 getfsstat(__unused proc_t p, struct getfsstat_args *uap, int *retval)
2670 {
2671 struct __mac_getfsstat_args muap;
2672
2673 muap.buf = uap->buf;
2674 muap.bufsize = uap->bufsize;
2675 muap.mac = USER_ADDR_NULL;
2676 muap.macsize = 0;
2677 muap.flags = uap->flags;
2678
2679 return (__mac_getfsstat(p, &muap, retval));
2680 }
2681
2682 /*
2683 * __mac_getfsstat: Get MAC-related file system statistics
2684 *
2685 * Parameters: p (ignored)
2686 * uap User argument descriptor (see below)
2687 * retval Count of file system statistics (N stats)
2688 *
2689 * Indirect: uap->bufsize Buffer size
2690 * uap->macsize MAC info size
2691 * uap->buf Buffer where information will be returned
2692 * uap->mac MAC info
2693 * uap->flags File system flags
2694 *
2695 *
2696 * Returns: 0 Success
2697 * !0 Not success
2698 *
2699 */
2700 int
2701 __mac_getfsstat(__unused proc_t p, struct __mac_getfsstat_args *uap, int *retval)
2702 {
2703 user_addr_t sfsp;
2704 user_addr_t *mp;
2705 size_t count, maxcount, bufsize, macsize;
2706 struct getfsstat_struct fst;
2707
2708 bufsize = (size_t) uap->bufsize;
2709 macsize = (size_t) uap->macsize;
2710
2711 if (IS_64BIT_PROCESS(p)) {
2712 maxcount = bufsize / sizeof(struct user64_statfs);
2713 }
2714 else {
2715 maxcount = bufsize / sizeof(struct user32_statfs);
2716 }
2717 sfsp = uap->buf;
2718 count = 0;
2719
2720 mp = NULL;
2721
2722 #if CONFIG_MACF
2723 if (uap->mac != USER_ADDR_NULL) {
2724 u_int32_t *mp0;
2725 int error;
2726 unsigned int i;
2727
2728 count = (macsize / (IS_64BIT_PROCESS(p) ? 8 : 4));
2729 if (count != maxcount)
2730 return (EINVAL);
2731
2732 /* Copy in the array */
2733 MALLOC(mp0, u_int32_t *, macsize, M_MACTEMP, M_WAITOK);
2734 if (mp0 == NULL) {
2735 return (ENOMEM);
2736 }
2737
2738 error = copyin(uap->mac, mp0, macsize);
2739 if (error) {
2740 FREE(mp0, M_MACTEMP);
2741 return (error);
2742 }
2743
2744 /* Normalize to an array of user_addr_t */
2745 MALLOC(mp, user_addr_t *, count * sizeof(user_addr_t), M_MACTEMP, M_WAITOK);
2746 if (mp == NULL) {
2747 FREE(mp0, M_MACTEMP);
2748 return (ENOMEM);
2749 }
2750
2751 for (i = 0; i < count; i++) {
2752 if (IS_64BIT_PROCESS(p))
2753 mp[i] = ((user_addr_t *)mp0)[i];
2754 else
2755 mp[i] = (user_addr_t)mp0[i];
2756 }
2757 FREE(mp0, M_MACTEMP);
2758 }
2759 #endif
2760
2761
2762 fst.sfsp = sfsp;
2763 fst.mp = mp;
2764 fst.flags = uap->flags;
2765 fst.count = 0;
2766 fst.error = 0;
2767 fst.maxcount = maxcount;
2768
2769
2770 vfs_iterate(0, getfsstat_callback, &fst);
2771
2772 if (mp)
2773 FREE(mp, M_MACTEMP);
2774
2775 if (fst.error ) {
2776 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
2777 return(fst.error);
2778 }
2779
2780 if (fst.sfsp && fst.count > fst.maxcount)
2781 *retval = fst.maxcount;
2782 else
2783 *retval = fst.count;
2784 return (0);
2785 }
2786
2787 static int
2788 getfsstat64_callback(mount_t mp, void * arg)
2789 {
2790 struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
2791 struct vfsstatfs *sp;
2792 int error;
2793
2794 if (fstp->sfsp && fstp->count < fstp->maxcount) {
2795 sp = &mp->mnt_vfsstat;
2796 /*
2797 * If MNT_NOWAIT is specified, do not refresh the fsstat
2798 * cache. MNT_WAIT overrides MNT_NOWAIT.
2799 *
2800 * We treat MNT_DWAIT as MNT_WAIT for all instances of
2801 * getfsstat, since the constants are out of the same
2802 * namespace.
2803 */
2804 if (((fstp->flags & MNT_NOWAIT) == 0 ||
2805 (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
2806 (error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT))) {
2807 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
2808 return(VFS_RETURNED);
2809 }
2810
2811 error = statfs64_common(mp, sp, fstp->sfsp);
2812 if (error) {
2813 fstp->error = error;
2814 return(VFS_RETURNED_DONE);
2815 }
2816 fstp->sfsp += sizeof(struct statfs64);
2817 }
2818 fstp->count++;
2819 return(VFS_RETURNED);
2820 }
2821
2822 /*
2823 * Get statistics on all file systems in 64 bit mode.
2824 */
2825 int
2826 getfsstat64(__unused proc_t p, struct getfsstat64_args *uap, int *retval)
2827 {
2828 user_addr_t sfsp;
2829 int count, maxcount;
2830 struct getfsstat_struct fst;
2831
2832 maxcount = uap->bufsize / sizeof(struct statfs64);
2833
2834 sfsp = uap->buf;
2835 count = 0;
2836
2837 fst.sfsp = sfsp;
2838 fst.flags = uap->flags;
2839 fst.count = 0;
2840 fst.error = 0;
2841 fst.maxcount = maxcount;
2842
2843 vfs_iterate(0, getfsstat64_callback, &fst);
2844
2845 if (fst.error ) {
2846 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
2847 return(fst.error);
2848 }
2849
2850 if (fst.sfsp && fst.count > fst.maxcount)
2851 *retval = fst.maxcount;
2852 else
2853 *retval = fst.count;
2854
2855 return (0);
2856 }
2857
2858 /*
2859 * gets the associated vnode with the file descriptor passed.
2860 * as input
2861 *
2862 * INPUT
2863 * ctx - vfs context of caller
2864 * fd - file descriptor for which vnode is required.
2865 * vpp - Pointer to pointer to vnode to be returned.
2866 *
2867 * The vnode is returned with an iocount so any vnode obtained
2868 * by this call needs a vnode_put
2869 *
2870 */
2871 static int
2872 vnode_getfromfd(vfs_context_t ctx, int fd, vnode_t *vpp)
2873 {
2874 int error;
2875 vnode_t vp;
2876 struct fileproc *fp;
2877 proc_t p = vfs_context_proc(ctx);
2878
2879 *vpp = NULLVP;
2880
2881 error = fp_getfvp(p, fd, &fp, &vp);
2882 if (error)
2883 return (error);
2884
2885 error = vnode_getwithref(vp);
2886 if (error) {
2887 (void)fp_drop(p, fd, fp, 0);
2888 return (error);
2889 }
2890
2891 (void)fp_drop(p, fd, fp, 0);
2892 *vpp = vp;
2893 return (error);
2894 }
2895
2896 /*
2897 * Wrapper function around namei to start lookup from a directory
2898 * specified by a file descriptor ni_dirfd.
2899 *
2900 * In addition to all the errors returned by namei, this call can
2901 * return ENOTDIR if the file descriptor does not refer to a directory.
2902 * and EBADF if the file descriptor is not valid.
2903 */
2904 int
2905 nameiat(struct nameidata *ndp, int dirfd)
2906 {
2907 if ((dirfd != AT_FDCWD) &&
2908 !(ndp->ni_flag & NAMEI_CONTLOOKUP) &&
2909 !(ndp->ni_cnd.cn_flags & USEDVP)) {
2910 int error = 0;
2911 char c;
2912
2913 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
2914 error = copyin(ndp->ni_dirp, &c, sizeof(char));
2915 if (error)
2916 return (error);
2917 } else {
2918 c = *((char *)(ndp->ni_dirp));
2919 }
2920
2921 if (c != '/') {
2922 vnode_t dvp_at;
2923
2924 error = vnode_getfromfd(ndp->ni_cnd.cn_context, dirfd,
2925 &dvp_at);
2926 if (error)
2927 return (error);
2928
2929 if (vnode_vtype(dvp_at) != VDIR) {
2930 vnode_put(dvp_at);
2931 return (ENOTDIR);
2932 }
2933
2934 ndp->ni_dvp = dvp_at;
2935 ndp->ni_cnd.cn_flags |= USEDVP;
2936 error = namei(ndp);
2937 ndp->ni_cnd.cn_flags &= ~USEDVP;
2938 vnode_put(dvp_at);
2939 return (error);
2940 }
2941 }
2942
2943 return (namei(ndp));
2944 }
2945
2946 /*
2947 * Change current working directory to a given file descriptor.
2948 */
2949 /* ARGSUSED */
2950 static int
2951 common_fchdir(proc_t p, struct fchdir_args *uap, int per_thread)
2952 {
2953 struct filedesc *fdp = p->p_fd;
2954 vnode_t vp;
2955 vnode_t tdp;
2956 vnode_t tvp;
2957 struct mount *mp;
2958 int error;
2959 vfs_context_t ctx = vfs_context_current();
2960
2961 AUDIT_ARG(fd, uap->fd);
2962 if (per_thread && uap->fd == -1) {
2963 /*
2964 * Switching back from per-thread to per process CWD; verify we
2965 * in fact have one before proceeding. The only success case
2966 * for this code path is to return 0 preemptively after zapping
2967 * the thread structure contents.
2968 */
2969 thread_t th = vfs_context_thread(ctx);
2970 if (th) {
2971 uthread_t uth = get_bsdthread_info(th);
2972 tvp = uth->uu_cdir;
2973 uth->uu_cdir = NULLVP;
2974 if (tvp != NULLVP) {
2975 vnode_rele(tvp);
2976 return (0);
2977 }
2978 }
2979 return (EBADF);
2980 }
2981
2982 if ( (error = file_vnode(uap->fd, &vp)) )
2983 return(error);
2984 if ( (error = vnode_getwithref(vp)) ) {
2985 file_drop(uap->fd);
2986 return(error);
2987 }
2988
2989 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
2990
2991 if (vp->v_type != VDIR) {
2992 error = ENOTDIR;
2993 goto out;
2994 }
2995
2996 #if CONFIG_MACF
2997 error = mac_vnode_check_chdir(ctx, vp);
2998 if (error)
2999 goto out;
3000 #endif
3001 error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
3002 if (error)
3003 goto out;
3004
3005 while (!error && (mp = vp->v_mountedhere) != NULL) {
3006 if (vfs_busy(mp, LK_NOWAIT)) {
3007 error = EACCES;
3008 goto out;
3009 }
3010 error = VFS_ROOT(mp, &tdp, ctx);
3011 vfs_unbusy(mp);
3012 if (error)
3013 break;
3014 vnode_put(vp);
3015 vp = tdp;
3016 }
3017 if (error)
3018 goto out;
3019 if ( (error = vnode_ref(vp)) )
3020 goto out;
3021 vnode_put(vp);
3022
3023 if (per_thread) {
3024 thread_t th = vfs_context_thread(ctx);
3025 if (th) {
3026 uthread_t uth = get_bsdthread_info(th);
3027 tvp = uth->uu_cdir;
3028 uth->uu_cdir = vp;
3029 OSBitOrAtomic(P_THCWD, &p->p_flag);
3030 } else {
3031 vnode_rele(vp);
3032 return (ENOENT);
3033 }
3034 } else {
3035 proc_fdlock(p);
3036 tvp = fdp->fd_cdir;
3037 fdp->fd_cdir = vp;
3038 proc_fdunlock(p);
3039 }
3040
3041 if (tvp)
3042 vnode_rele(tvp);
3043 file_drop(uap->fd);
3044
3045 return (0);
3046 out:
3047 vnode_put(vp);
3048 file_drop(uap->fd);
3049
3050 return(error);
3051 }
3052
3053 int
3054 fchdir(proc_t p, struct fchdir_args *uap, __unused int32_t *retval)
3055 {
3056 return common_fchdir(p, uap, 0);
3057 }
3058
3059 int
3060 __pthread_fchdir(proc_t p, struct __pthread_fchdir_args *uap, __unused int32_t *retval)
3061 {
3062 return common_fchdir(p, (void *)uap, 1);
3063 }
3064
3065 /*
3066 * Change current working directory (".").
3067 *
3068 * Returns: 0 Success
3069 * change_dir:ENOTDIR
3070 * change_dir:???
3071 * vnode_ref:ENOENT No such file or directory
3072 */
3073 /* ARGSUSED */
3074 static int
3075 common_chdir(proc_t p, struct chdir_args *uap, int per_thread)
3076 {
3077 struct filedesc *fdp = p->p_fd;
3078 int error;
3079 struct nameidata nd;
3080 vnode_t tvp;
3081 vfs_context_t ctx = vfs_context_current();
3082
3083 NDINIT(&nd, LOOKUP, OP_CHDIR, FOLLOW | AUDITVNPATH1,
3084 UIO_USERSPACE, uap->path, ctx);
3085 error = change_dir(&nd, ctx);
3086 if (error)
3087 return (error);
3088 if ( (error = vnode_ref(nd.ni_vp)) ) {
3089 vnode_put(nd.ni_vp);
3090 return (error);
3091 }
3092 /*
3093 * drop the iocount we picked up in change_dir
3094 */
3095 vnode_put(nd.ni_vp);
3096
3097 if (per_thread) {
3098 thread_t th = vfs_context_thread(ctx);
3099 if (th) {
3100 uthread_t uth = get_bsdthread_info(th);
3101 tvp = uth->uu_cdir;
3102 uth->uu_cdir = nd.ni_vp;
3103 OSBitOrAtomic(P_THCWD, &p->p_flag);
3104 } else {
3105 vnode_rele(nd.ni_vp);
3106 return (ENOENT);
3107 }
3108 } else {
3109 proc_fdlock(p);
3110 tvp = fdp->fd_cdir;
3111 fdp->fd_cdir = nd.ni_vp;
3112 proc_fdunlock(p);
3113 }
3114
3115 if (tvp)
3116 vnode_rele(tvp);
3117
3118 return (0);
3119 }
3120
3121
3122 /*
3123 * chdir
3124 *
3125 * Change current working directory (".") for the entire process
3126 *
3127 * Parameters: p Process requesting the call
3128 * uap User argument descriptor (see below)
3129 * retval (ignored)
3130 *
3131 * Indirect parameters: uap->path Directory path
3132 *
3133 * Returns: 0 Success
3134 * common_chdir: ENOTDIR
3135 * common_chdir: ENOENT No such file or directory
3136 * common_chdir: ???
3137 *
3138 */
3139 int
3140 chdir(proc_t p, struct chdir_args *uap, __unused int32_t *retval)
3141 {
3142 return common_chdir(p, (void *)uap, 0);
3143 }
3144
3145 /*
3146 * __pthread_chdir
3147 *
3148 * Change current working directory (".") for a single thread
3149 *
3150 * Parameters: p Process requesting the call
3151 * uap User argument descriptor (see below)
3152 * retval (ignored)
3153 *
3154 * Indirect parameters: uap->path Directory path
3155 *
3156 * Returns: 0 Success
3157 * common_chdir: ENOTDIR
3158 * common_chdir: ENOENT No such file or directory
3159 * common_chdir: ???
3160 *
3161 */
3162 int
3163 __pthread_chdir(proc_t p, struct __pthread_chdir_args *uap, __unused int32_t *retval)
3164 {
3165 return common_chdir(p, (void *)uap, 1);
3166 }
3167
3168
3169 /*
3170 * Change notion of root (``/'') directory.
3171 */
3172 /* ARGSUSED */
3173 int
3174 chroot(proc_t p, struct chroot_args *uap, __unused int32_t *retval)
3175 {
3176 struct filedesc *fdp = p->p_fd;
3177 int error;
3178 struct nameidata nd;
3179 vnode_t tvp;
3180 vfs_context_t ctx = vfs_context_current();
3181
3182 if ((error = suser(kauth_cred_get(), &p->p_acflag)))
3183 return (error);
3184
3185 NDINIT(&nd, LOOKUP, OP_CHROOT, FOLLOW | AUDITVNPATH1,
3186 UIO_USERSPACE, uap->path, ctx);
3187 error = change_dir(&nd, ctx);
3188 if (error)
3189 return (error);
3190
3191 #if CONFIG_MACF
3192 error = mac_vnode_check_chroot(ctx, nd.ni_vp,
3193 &nd.ni_cnd);
3194 if (error) {
3195 vnode_put(nd.ni_vp);
3196 return (error);
3197 }
3198 #endif
3199
3200 if ( (error = vnode_ref(nd.ni_vp)) ) {
3201 vnode_put(nd.ni_vp);
3202 return (error);
3203 }
3204 vnode_put(nd.ni_vp);
3205
3206 proc_fdlock(p);
3207 tvp = fdp->fd_rdir;
3208 fdp->fd_rdir = nd.ni_vp;
3209 fdp->fd_flags |= FD_CHROOT;
3210 proc_fdunlock(p);
3211
3212 if (tvp != NULL)
3213 vnode_rele(tvp);
3214
3215 return (0);
3216 }
3217
3218 /*
3219 * Common routine for chroot and chdir.
3220 *
3221 * Returns: 0 Success
3222 * ENOTDIR Not a directory
3223 * namei:??? [anything namei can return]
3224 * vnode_authorize:??? [anything vnode_authorize can return]
3225 */
3226 static int
3227 change_dir(struct nameidata *ndp, vfs_context_t ctx)
3228 {
3229 vnode_t vp;
3230 int error;
3231
3232 if ((error = namei(ndp)))
3233 return (error);
3234 nameidone(ndp);
3235 vp = ndp->ni_vp;
3236
3237 if (vp->v_type != VDIR) {
3238 vnode_put(vp);
3239 return (ENOTDIR);
3240 }
3241
3242 #if CONFIG_MACF
3243 error = mac_vnode_check_chdir(ctx, vp);
3244 if (error) {
3245 vnode_put(vp);
3246 return (error);
3247 }
3248 #endif
3249
3250 error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
3251 if (error) {
3252 vnode_put(vp);
3253 return (error);
3254 }
3255
3256 return (error);
3257 }
3258
3259 /*
3260 * Free the vnode data (for directories) associated with the file glob.
3261 */
3262 struct fd_vn_data *
3263 fg_vn_data_alloc(void)
3264 {
3265 struct fd_vn_data *fvdata;
3266
3267 /* Allocate per fd vnode data */
3268 MALLOC(fvdata, struct fd_vn_data *, (sizeof(struct fd_vn_data)),
3269 M_FD_VN_DATA, M_WAITOK | M_ZERO);
3270 lck_mtx_init(&fvdata->fv_lock, fd_vn_lck_grp, fd_vn_lck_attr);
3271 return fvdata;
3272 }
3273
3274 /*
3275 * Free the vnode data (for directories) associated with the file glob.
3276 */
3277 void
3278 fg_vn_data_free(void *fgvndata)
3279 {
3280 struct fd_vn_data *fvdata = (struct fd_vn_data *)fgvndata;
3281
3282 if (fvdata->fv_buf)
3283 FREE(fvdata->fv_buf, M_FD_DIRBUF);
3284 lck_mtx_destroy(&fvdata->fv_lock, fd_vn_lck_grp);
3285 FREE(fvdata, M_FD_VN_DATA);
3286 }
3287
3288 /*
3289 * Check permissions, allocate an open file structure,
3290 * and call the device open routine if any.
3291 *
3292 * Returns: 0 Success
3293 * EINVAL
3294 * EINTR
3295 * falloc:ENFILE
3296 * falloc:EMFILE
3297 * falloc:ENOMEM
3298 * vn_open_auth:???
3299 * dupfdopen:???
3300 * VNOP_ADVLOCK:???
3301 * vnode_setsize:???
3302 *
3303 * XXX Need to implement uid, gid
3304 */
3305 int
3306 open1(vfs_context_t ctx, struct nameidata *ndp, int uflags,
3307 struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra,
3308 int32_t *retval)
3309 {
3310 proc_t p = vfs_context_proc(ctx);
3311 uthread_t uu = get_bsdthread_info(vfs_context_thread(ctx));
3312 struct fileproc *fp;
3313 vnode_t vp;
3314 int flags, oflags;
3315 int type, indx, error;
3316 struct flock lf;
3317 struct vfs_context context;
3318
3319 oflags = uflags;
3320
3321 if ((oflags & O_ACCMODE) == O_ACCMODE)
3322 return(EINVAL);
3323
3324 flags = FFLAGS(uflags);
3325 CLR(flags, FENCRYPTED);
3326 CLR(flags, FUNENCRYPTED);
3327
3328 AUDIT_ARG(fflags, oflags);
3329 AUDIT_ARG(mode, vap->va_mode);
3330
3331 if ((error = falloc_withalloc(p,
3332 &fp, &indx, ctx, fp_zalloc, cra)) != 0) {
3333 return (error);
3334 }
3335 uu->uu_dupfd = -indx - 1;
3336
3337 if ((error = vn_open_auth(ndp, &flags, vap))) {
3338 if ((error == ENODEV || error == ENXIO) && (uu->uu_dupfd >= 0)){ /* XXX from fdopen */
3339 if ((error = dupfdopen(p->p_fd, indx, uu->uu_dupfd, flags, error)) == 0) {
3340 fp_drop(p, indx, NULL, 0);
3341 *retval = indx;
3342 return (0);
3343 }
3344 }
3345 if (error == ERESTART)
3346 error = EINTR;
3347 fp_free(p, indx, fp);
3348 return (error);
3349 }
3350 uu->uu_dupfd = 0;
3351 vp = ndp->ni_vp;
3352
3353 fp->f_fglob->fg_flag = flags & (FMASK | O_EVTONLY | FENCRYPTED | FUNENCRYPTED);
3354 fp->f_fglob->fg_ops = &vnops;
3355 fp->f_fglob->fg_data = (caddr_t)vp;
3356
3357 if (flags & (O_EXLOCK | O_SHLOCK)) {
3358 lf.l_whence = SEEK_SET;
3359 lf.l_start = 0;
3360 lf.l_len = 0;
3361 if (flags & O_EXLOCK)
3362 lf.l_type = F_WRLCK;
3363 else
3364 lf.l_type = F_RDLCK;
3365 type = F_FLOCK;
3366 if ((flags & FNONBLOCK) == 0)
3367 type |= F_WAIT;
3368 #if CONFIG_MACF
3369 error = mac_file_check_lock(vfs_context_ucred(ctx), fp->f_fglob,
3370 F_SETLK, &lf);
3371 if (error)
3372 goto bad;
3373 #endif
3374 if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx, NULL)))
3375 goto bad;
3376 fp->f_fglob->fg_flag |= FHASLOCK;
3377 }
3378
3379 /* try to truncate by setting the size attribute */
3380 if ((flags & O_TRUNC) && ((error = vnode_setsize(vp, (off_t)0, 0, ctx)) != 0))
3381 goto bad;
3382
3383 /*
3384 * For directories we hold some additional information in the fd.
3385 */
3386 if (vnode_vtype(vp) == VDIR) {
3387 fp->f_fglob->fg_vn_data = fg_vn_data_alloc();
3388 } else {
3389 fp->f_fglob->fg_vn_data = NULL;
3390 }
3391
3392 vnode_put(vp);
3393
3394 /*
3395 * The first terminal open (without a O_NOCTTY) by a session leader
3396 * results in it being set as the controlling terminal.
3397 */
3398 if (vnode_istty(vp) && !(p->p_flag & P_CONTROLT) &&
3399 !(flags & O_NOCTTY)) {
3400 int tmp = 0;
3401
3402 (void)(*fp->f_fglob->fg_ops->fo_ioctl)(fp, (int)TIOCSCTTY,
3403 (caddr_t)&tmp, ctx);
3404 }
3405
3406 proc_fdlock(p);
3407 if (flags & O_CLOEXEC)
3408 *fdflags(p, indx) |= UF_EXCLOSE;
3409 if (flags & O_CLOFORK)
3410 *fdflags(p, indx) |= UF_FORKCLOSE;
3411 procfdtbl_releasefd(p, indx, NULL);
3412 fp_drop(p, indx, fp, 1);
3413 proc_fdunlock(p);
3414
3415 *retval = indx;
3416
3417 return (0);
3418 bad:
3419 context = *vfs_context_current();
3420 context.vc_ucred = fp->f_fglob->fg_cred;
3421
3422 if ((fp->f_fglob->fg_flag & FHASLOCK) &&
3423 (FILEGLOB_DTYPE(fp->f_fglob) == DTYPE_VNODE)) {
3424 lf.l_whence = SEEK_SET;
3425 lf.l_start = 0;
3426 lf.l_len = 0;
3427 lf.l_type = F_UNLCK;
3428
3429 (void)VNOP_ADVLOCK(
3430 vp, (caddr_t)fp->f_fglob, F_UNLCK, &lf, F_FLOCK, ctx, NULL);
3431 }
3432
3433 vn_close(vp, fp->f_fglob->fg_flag, &context);
3434 vnode_put(vp);
3435 fp_free(p, indx, fp);
3436
3437 return (error);
3438 }
3439
3440 /*
3441 * While most of the *at syscall handlers can call nameiat() which
3442 * is a wrapper around namei, the use of namei and initialisation
3443 * of nameidata are far removed and in different functions - namei
3444 * gets called in vn_open_auth for open1. So we'll just do here what
3445 * nameiat() does.
3446 */
3447 static int
3448 open1at(vfs_context_t ctx, struct nameidata *ndp, int uflags,
3449 struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra, int32_t *retval,
3450 int dirfd)
3451 {
3452 if ((dirfd != AT_FDCWD) && !(ndp->ni_cnd.cn_flags & USEDVP)) {
3453 int error;
3454 char c;
3455
3456 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
3457 error = copyin(ndp->ni_dirp, &c, sizeof(char));
3458 if (error)
3459 return (error);
3460 } else {
3461 c = *((char *)(ndp->ni_dirp));
3462 }
3463
3464 if (c != '/') {
3465 vnode_t dvp_at;
3466
3467 error = vnode_getfromfd(ndp->ni_cnd.cn_context, dirfd,
3468 &dvp_at);
3469 if (error)
3470 return (error);
3471
3472 if (vnode_vtype(dvp_at) != VDIR) {
3473 vnode_put(dvp_at);
3474 return (ENOTDIR);
3475 }
3476
3477 ndp->ni_dvp = dvp_at;
3478 ndp->ni_cnd.cn_flags |= USEDVP;
3479 error = open1(ctx, ndp, uflags, vap, fp_zalloc, cra,
3480 retval);
3481 vnode_put(dvp_at);
3482 return (error);
3483 }
3484 }
3485
3486 return (open1(ctx, ndp, uflags, vap, fp_zalloc, cra, retval));
3487 }
3488
3489 /*
3490 * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)).
3491 *
3492 * Parameters: p Process requesting the open
3493 * uap User argument descriptor (see below)
3494 * retval Pointer to an area to receive the
3495 * return calue from the system call
3496 *
3497 * Indirect: uap->path Path to open (same as 'open')
3498 * uap->flags Flags to open (same as 'open'
3499 * uap->uid UID to set, if creating
3500 * uap->gid GID to set, if creating
3501 * uap->mode File mode, if creating (same as 'open')
3502 * uap->xsecurity ACL to set, if creating
3503 *
3504 * Returns: 0 Success
3505 * !0 errno value
3506 *
3507 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
3508 *
3509 * XXX: We should enummerate the possible errno values here, and where
3510 * in the code they originated.
3511 */
3512 int
3513 open_extended(proc_t p, struct open_extended_args *uap, int32_t *retval)
3514 {
3515 struct filedesc *fdp = p->p_fd;
3516 int ciferror;
3517 kauth_filesec_t xsecdst;
3518 struct vnode_attr va;
3519 struct nameidata nd;
3520 int cmode;
3521
3522 AUDIT_ARG(owner, uap->uid, uap->gid);
3523
3524 xsecdst = NULL;
3525 if ((uap->xsecurity != USER_ADDR_NULL) &&
3526 ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
3527 return ciferror;
3528
3529 VATTR_INIT(&va);
3530 cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3531 VATTR_SET(&va, va_mode, cmode);
3532 if (uap->uid != KAUTH_UID_NONE)
3533 VATTR_SET(&va, va_uid, uap->uid);
3534 if (uap->gid != KAUTH_GID_NONE)
3535 VATTR_SET(&va, va_gid, uap->gid);
3536 if (xsecdst != NULL)
3537 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
3538
3539 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
3540 uap->path, vfs_context_current());
3541
3542 ciferror = open1(vfs_context_current(), &nd, uap->flags, &va,
3543 fileproc_alloc_init, NULL, retval);
3544 if (xsecdst != NULL)
3545 kauth_filesec_free(xsecdst);
3546
3547 return ciferror;
3548 }
3549
3550 /*
3551 * Go through the data-protected atomically controlled open (2)
3552 *
3553 * int open_dprotected_np(user_addr_t path, int flags, int class, int dpflags, int mode)
3554 */
3555 int open_dprotected_np (__unused proc_t p, struct open_dprotected_np_args *uap, int32_t *retval) {
3556 int flags = uap->flags;
3557 int class = uap->class;
3558 int dpflags = uap->dpflags;
3559
3560 /*
3561 * Follow the same path as normal open(2)
3562 * Look up the item if it exists, and acquire the vnode.
3563 */
3564 struct filedesc *fdp = p->p_fd;
3565 struct vnode_attr va;
3566 struct nameidata nd;
3567 int cmode;
3568 int error;
3569
3570 VATTR_INIT(&va);
3571 /* Mask off all but regular access permissions */
3572 cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3573 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
3574
3575 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
3576 uap->path, vfs_context_current());
3577
3578 /*
3579 * Initialize the extra fields in vnode_attr to pass down our
3580 * extra fields.
3581 * 1. target cprotect class.
3582 * 2. set a flag to mark it as requiring open-raw-encrypted semantics.
3583 */
3584 if (flags & O_CREAT) {
3585 /* lower level kernel code validates that the class is valid before applying it. */
3586 if (class != PROTECTION_CLASS_DEFAULT) {
3587 /*
3588 * PROTECTION_CLASS_DEFAULT implies that we make the class for this
3589 * file behave the same as open (2)
3590 */
3591 VATTR_SET(&va, va_dataprotect_class, class);
3592 }
3593 }
3594
3595 if (dpflags & (O_DP_GETRAWENCRYPTED|O_DP_GETRAWUNENCRYPTED)) {
3596 if ( flags & (O_RDWR | O_WRONLY)) {
3597 /* Not allowed to write raw encrypted bytes */
3598 return EINVAL;
3599 }
3600 if (uap->dpflags & O_DP_GETRAWENCRYPTED) {
3601 VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED);
3602 }
3603 if (uap->dpflags & O_DP_GETRAWUNENCRYPTED) {
3604 VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWUNENCRYPTED);
3605 }
3606 }
3607
3608 error = open1(vfs_context_current(), &nd, uap->flags, &va,
3609 fileproc_alloc_init, NULL, retval);
3610
3611 return error;
3612 }
3613
3614 static int
3615 openat_internal(vfs_context_t ctx, user_addr_t path, int flags, int mode,
3616 int fd, enum uio_seg segflg, int *retval)
3617 {
3618 struct filedesc *fdp = (vfs_context_proc(ctx))->p_fd;
3619 struct vnode_attr va;
3620 struct nameidata nd;
3621 int cmode;
3622
3623 VATTR_INIT(&va);
3624 /* Mask off all but regular access permissions */
3625 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3626 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
3627
3628 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1,
3629 segflg, path, ctx);
3630
3631 return (open1at(ctx, &nd, flags, &va, fileproc_alloc_init, NULL,
3632 retval, fd));
3633 }
3634
3635 int
3636 open(proc_t p, struct open_args *uap, int32_t *retval)
3637 {
3638 __pthread_testcancel(1);
3639 return(open_nocancel(p, (struct open_nocancel_args *)uap, retval));
3640 }
3641
3642 int
3643 open_nocancel(__unused proc_t p, struct open_nocancel_args *uap,
3644 int32_t *retval)
3645 {
3646 return (openat_internal(vfs_context_current(), uap->path, uap->flags,
3647 uap->mode, AT_FDCWD, UIO_USERSPACE, retval));
3648 }
3649
3650 int
3651 openat_nocancel(__unused proc_t p, struct openat_nocancel_args *uap,
3652 int32_t *retval)
3653 {
3654 return (openat_internal(vfs_context_current(), uap->path, uap->flags,
3655 uap->mode, uap->fd, UIO_USERSPACE, retval));
3656 }
3657
3658 int
3659 openat(proc_t p, struct openat_args *uap, int32_t *retval)
3660 {
3661 __pthread_testcancel(1);
3662 return(openat_nocancel(p, (struct openat_nocancel_args *)uap, retval));
3663 }
3664
3665 /*
3666 * openbyid_np: open a file given a file system id and a file system object id
3667 * the hfs file system object id is an fsobj_id_t {uint32, uint32}
3668 * file systems that don't support object ids it is a node id (uint64_t).
3669 *
3670 * Parameters: p Process requesting the open
3671 * uap User argument descriptor (see below)
3672 * retval Pointer to an area to receive the
3673 * return calue from the system call
3674 *
3675 * Indirect: uap->path Path to open (same as 'open')
3676 *
3677 * uap->fsid id of target file system
3678 * uap->objid id of target file system object
3679 * uap->flags Flags to open (same as 'open')
3680 *
3681 * Returns: 0 Success
3682 * !0 errno value
3683 *
3684 *
3685 * XXX: We should enummerate the possible errno values here, and where
3686 * in the code they originated.
3687 */
3688 int
3689 openbyid_np(__unused proc_t p, struct openbyid_np_args *uap, int *retval)
3690 {
3691 fsid_t fsid;
3692 uint64_t objid;
3693 int error;
3694 char *buf = NULL;
3695 int buflen = MAXPATHLEN;
3696 int pathlen = 0;
3697 vfs_context_t ctx = vfs_context_current();
3698
3699 if ((error = priv_check_cred(vfs_context_ucred(ctx), PRIV_VFS_OPEN_BY_ID, 0))) {
3700 return (error);
3701 }
3702
3703 if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
3704 return (error);
3705 }
3706
3707 /*uap->obj is an fsobj_id_t defined as struct {uint32_t, uint32_t} */
3708 if ((error = copyin(uap->objid, (caddr_t)&objid, sizeof(uint64_t)))) {
3709 return (error);
3710 }
3711
3712 AUDIT_ARG(value32, fsid.val[0]);
3713 AUDIT_ARG(value64, objid);
3714
3715 /*resolve path from fsis, objid*/
3716 do {
3717 MALLOC(buf, char *, buflen + 1, M_TEMP, M_WAITOK);
3718 if (buf == NULL) {
3719 return (ENOMEM);
3720 }
3721
3722 error = fsgetpath_internal(
3723 ctx, fsid.val[0], objid,
3724 buflen, buf, &pathlen);
3725
3726 if (error) {
3727 FREE(buf, M_TEMP);
3728 buf = NULL;
3729 }
3730 } while (error == ENOSPC && (buflen += MAXPATHLEN));
3731
3732 if (error) {
3733 return error;
3734 }
3735
3736 buf[pathlen] = 0;
3737
3738 error = openat_internal(
3739 ctx, (user_addr_t)buf, uap->oflags, 0, AT_FDCWD, UIO_SYSSPACE, retval);
3740
3741 FREE(buf, M_TEMP);
3742
3743 return error;
3744 }
3745
3746
3747 /*
3748 * Create a special file.
3749 */
3750 static int mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap);
3751
3752 int
3753 mknod(proc_t p, struct mknod_args *uap, __unused int32_t *retval)
3754 {
3755 struct vnode_attr va;
3756 vfs_context_t ctx = vfs_context_current();
3757 int error;
3758 struct nameidata nd;
3759 vnode_t vp, dvp;
3760
3761 VATTR_INIT(&va);
3762 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
3763 VATTR_SET(&va, va_rdev, uap->dev);
3764
3765 /* If it's a mknod() of a FIFO, call mkfifo1() instead */
3766 if ((uap->mode & S_IFMT) == S_IFIFO)
3767 return(mkfifo1(ctx, uap->path, &va));
3768
3769 AUDIT_ARG(mode, uap->mode);
3770 AUDIT_ARG(value32, uap->dev);
3771
3772 if ((error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
3773 return (error);
3774 NDINIT(&nd, CREATE, OP_MKNOD, LOCKPARENT | AUDITVNPATH1,
3775 UIO_USERSPACE, uap->path, ctx);
3776 error = namei(&nd);
3777 if (error)
3778 return (error);
3779 dvp = nd.ni_dvp;
3780 vp = nd.ni_vp;
3781
3782 if (vp != NULL) {
3783 error = EEXIST;
3784 goto out;
3785 }
3786
3787 switch (uap->mode & S_IFMT) {
3788 case S_IFCHR:
3789 VATTR_SET(&va, va_type, VCHR);
3790 break;
3791 case S_IFBLK:
3792 VATTR_SET(&va, va_type, VBLK);
3793 break;
3794 default:
3795 error = EINVAL;
3796 goto out;
3797 }
3798
3799 #if CONFIG_MACF
3800 error = mac_vnode_check_create(ctx,
3801 nd.ni_dvp, &nd.ni_cnd, &va);
3802 if (error)
3803 goto out;
3804 #endif
3805
3806 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
3807 goto out;
3808
3809 if ((error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx)) != 0)
3810 goto out;
3811
3812 if (vp) {
3813 int update_flags = 0;
3814
3815 // Make sure the name & parent pointers are hooked up
3816 if (vp->v_name == NULL)
3817 update_flags |= VNODE_UPDATE_NAME;
3818 if (vp->v_parent == NULLVP)
3819 update_flags |= VNODE_UPDATE_PARENT;
3820
3821 if (update_flags)
3822 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
3823
3824 #if CONFIG_FSE
3825 add_fsevent(FSE_CREATE_FILE, ctx,
3826 FSE_ARG_VNODE, vp,
3827 FSE_ARG_DONE);
3828 #endif
3829 }
3830
3831 out:
3832 /*
3833 * nameidone has to happen before we vnode_put(dvp)
3834 * since it may need to release the fs_nodelock on the dvp
3835 */
3836 nameidone(&nd);
3837
3838 if (vp)
3839 vnode_put(vp);
3840 vnode_put(dvp);
3841
3842 return (error);
3843 }
3844
3845 /*
3846 * Create a named pipe.
3847 *
3848 * Returns: 0 Success
3849 * EEXIST
3850 * namei:???
3851 * vnode_authorize:???
3852 * vn_create:???
3853 */
3854 static int
3855 mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap)
3856 {
3857 vnode_t vp, dvp;
3858 int error;
3859 struct nameidata nd;
3860
3861 NDINIT(&nd, CREATE, OP_MKFIFO, LOCKPARENT | AUDITVNPATH1,
3862 UIO_USERSPACE, upath, ctx);
3863 error = namei(&nd);
3864 if (error)
3865 return (error);
3866 dvp = nd.ni_dvp;
3867 vp = nd.ni_vp;
3868
3869 /* check that this is a new file and authorize addition */
3870 if (vp != NULL) {
3871 error = EEXIST;
3872 goto out;
3873 }
3874 VATTR_SET(vap, va_type, VFIFO);
3875
3876 if ((error = vn_authorize_create(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0)
3877 goto out;
3878
3879 error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx);
3880 out:
3881 /*
3882 * nameidone has to happen before we vnode_put(dvp)
3883 * since it may need to release the fs_nodelock on the dvp
3884 */
3885 nameidone(&nd);
3886
3887 if (vp)
3888 vnode_put(vp);
3889 vnode_put(dvp);
3890
3891 return error;
3892 }
3893
3894
3895 /*
3896 * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)).
3897 *
3898 * Parameters: p Process requesting the open
3899 * uap User argument descriptor (see below)
3900 * retval (Ignored)
3901 *
3902 * Indirect: uap->path Path to fifo (same as 'mkfifo')
3903 * uap->uid UID to set
3904 * uap->gid GID to set
3905 * uap->mode File mode to set (same as 'mkfifo')
3906 * uap->xsecurity ACL to set, if creating
3907 *
3908 * Returns: 0 Success
3909 * !0 errno value
3910 *
3911 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
3912 *
3913 * XXX: We should enummerate the possible errno values here, and where
3914 * in the code they originated.
3915 */
3916 int
3917 mkfifo_extended(proc_t p, struct mkfifo_extended_args *uap, __unused int32_t *retval)
3918 {
3919 int ciferror;
3920 kauth_filesec_t xsecdst;
3921 struct vnode_attr va;
3922
3923 AUDIT_ARG(owner, uap->uid, uap->gid);
3924
3925 xsecdst = KAUTH_FILESEC_NONE;
3926 if (uap->xsecurity != USER_ADDR_NULL) {
3927 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
3928 return ciferror;
3929 }
3930
3931 VATTR_INIT(&va);
3932 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
3933 if (uap->uid != KAUTH_UID_NONE)
3934 VATTR_SET(&va, va_uid, uap->uid);
3935 if (uap->gid != KAUTH_GID_NONE)
3936 VATTR_SET(&va, va_gid, uap->gid);
3937 if (xsecdst != KAUTH_FILESEC_NONE)
3938 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
3939
3940 ciferror = mkfifo1(vfs_context_current(), uap->path, &va);
3941
3942 if (xsecdst != KAUTH_FILESEC_NONE)
3943 kauth_filesec_free(xsecdst);
3944 return ciferror;
3945 }
3946
3947 /* ARGSUSED */
3948 int
3949 mkfifo(proc_t p, struct mkfifo_args *uap, __unused int32_t *retval)
3950 {
3951 struct vnode_attr va;
3952
3953 VATTR_INIT(&va);
3954 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
3955
3956 return(mkfifo1(vfs_context_current(), uap->path, &va));
3957 }
3958
3959
3960 static char *
3961 my_strrchr(char *p, int ch)
3962 {
3963 char *save;
3964
3965 for (save = NULL;; ++p) {
3966 if (*p == ch)
3967 save = p;
3968 if (!*p)
3969 return(save);
3970 }
3971 /* NOTREACHED */
3972 }
3973
3974 extern int safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path);
3975
3976 int
3977 safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path)
3978 {
3979 int ret, len = _len;
3980
3981 *truncated_path = 0;
3982 ret = vn_getpath(dvp, path, &len);
3983 if (ret == 0 && len < (MAXPATHLEN - 1)) {
3984 if (leafname) {
3985 path[len-1] = '/';
3986 len += strlcpy(&path[len], leafname, MAXPATHLEN-len) + 1;
3987 if (len > MAXPATHLEN) {
3988 char *ptr;
3989
3990 // the string got truncated!
3991 *truncated_path = 1;
3992 ptr = my_strrchr(path, '/');
3993 if (ptr) {
3994 *ptr = '\0'; // chop off the string at the last directory component
3995 }
3996 len = strlen(path) + 1;
3997 }
3998 }
3999 } else if (ret == 0) {
4000 *truncated_path = 1;
4001 } else if (ret != 0) {
4002 struct vnode *mydvp=dvp;
4003
4004 if (ret != ENOSPC) {
4005 printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n",
4006 dvp, dvp->v_name ? dvp->v_name : "no-name", ret);
4007 }
4008 *truncated_path = 1;
4009
4010 do {
4011 if (mydvp->v_parent != NULL) {
4012 mydvp = mydvp->v_parent;
4013 } else if (mydvp->v_mount) {
4014 strlcpy(path, mydvp->v_mount->mnt_vfsstat.f_mntonname, _len);
4015 break;
4016 } else {
4017 // no parent and no mount point? only thing is to punt and say "/" changed
4018 strlcpy(path, "/", _len);
4019 len = 2;
4020 mydvp = NULL;
4021 }
4022
4023 if (mydvp == NULL) {
4024 break;
4025 }
4026
4027 len = _len;
4028 ret = vn_getpath(mydvp, path, &len);
4029 } while (ret == ENOSPC);
4030 }
4031
4032 return len;
4033 }
4034
4035
4036 /*
4037 * Make a hard file link.
4038 *
4039 * Returns: 0 Success
4040 * EPERM
4041 * EEXIST
4042 * EXDEV
4043 * namei:???
4044 * vnode_authorize:???
4045 * VNOP_LINK:???
4046 */
4047 /* ARGSUSED */
4048 static int
4049 linkat_internal(vfs_context_t ctx, int fd1, user_addr_t path, int fd2,
4050 user_addr_t link, int flag, enum uio_seg segflg)
4051 {
4052 vnode_t vp, dvp, lvp;
4053 struct nameidata nd;
4054 int follow;
4055 int error;
4056 #if CONFIG_FSE
4057 fse_info finfo;
4058 #endif
4059 int need_event, has_listeners;
4060 char *target_path = NULL;
4061 int truncated=0;
4062
4063 vp = dvp = lvp = NULLVP;
4064
4065 /* look up the object we are linking to */
4066 follow = (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW;
4067 NDINIT(&nd, LOOKUP, OP_LOOKUP, AUDITVNPATH1 | follow,
4068 segflg, path, ctx);
4069
4070 error = nameiat(&nd, fd1);
4071 if (error)
4072 return (error);
4073 vp = nd.ni_vp;
4074
4075 nameidone(&nd);
4076
4077 /*
4078 * Normally, linking to directories is not supported.
4079 * However, some file systems may have limited support.
4080 */
4081 if (vp->v_type == VDIR) {
4082 if (!(vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSDIRLINKS)) {
4083 error = EPERM; /* POSIX */
4084 goto out;
4085 }
4086 /* Linking to a directory requires ownership. */
4087 if (!kauth_cred_issuser(vfs_context_ucred(ctx))) {
4088 struct vnode_attr dva;
4089
4090 VATTR_INIT(&dva);
4091 VATTR_WANTED(&dva, va_uid);
4092 if (vnode_getattr(vp, &dva, ctx) != 0 ||
4093 !VATTR_IS_SUPPORTED(&dva, va_uid) ||
4094 (dva.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)))) {
4095 error = EACCES;
4096 goto out;
4097 }
4098 }
4099 }
4100
4101 /* lookup the target node */
4102 #if CONFIG_TRIGGERS
4103 nd.ni_op = OP_LINK;
4104 #endif
4105 nd.ni_cnd.cn_nameiop = CREATE;
4106 nd.ni_cnd.cn_flags = LOCKPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK;
4107 nd.ni_dirp = link;
4108 error = nameiat(&nd, fd2);
4109 if (error != 0)
4110 goto out;
4111 dvp = nd.ni_dvp;
4112 lvp = nd.ni_vp;
4113
4114 #if CONFIG_MACF
4115 if ((error = mac_vnode_check_link(ctx, dvp, vp, &nd.ni_cnd)) != 0)
4116 goto out2;
4117 #endif
4118
4119 /* or to anything that kauth doesn't want us to (eg. immutable items) */
4120 if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_LINKTARGET, ctx)) != 0)
4121 goto out2;
4122
4123 /* target node must not exist */
4124 if (lvp != NULLVP) {
4125 error = EEXIST;
4126 goto out2;
4127 }
4128 /* cannot link across mountpoints */
4129 if (vnode_mount(vp) != vnode_mount(dvp)) {
4130 error = EXDEV;
4131 goto out2;
4132 }
4133
4134 /* authorize creation of the target note */
4135 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
4136 goto out2;
4137
4138 /* and finally make the link */
4139 error = VNOP_LINK(vp, dvp, &nd.ni_cnd, ctx);
4140 if (error)
4141 goto out2;
4142
4143 #if CONFIG_MACF
4144 (void)mac_vnode_notify_link(ctx, vp, dvp, &nd.ni_cnd);
4145 #endif
4146
4147 #if CONFIG_FSE
4148 need_event = need_fsevent(FSE_CREATE_FILE, dvp);
4149 #else
4150 need_event = 0;
4151 #endif
4152 has_listeners = kauth_authorize_fileop_has_listeners();
4153
4154 if (need_event || has_listeners) {
4155 char *link_to_path = NULL;
4156 int len, link_name_len;
4157
4158 /* build the path to the new link file */
4159 GET_PATH(target_path);
4160 if (target_path == NULL) {
4161 error = ENOMEM;
4162 goto out2;
4163 }
4164
4165 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, target_path, MAXPATHLEN, &truncated);
4166
4167 if (has_listeners) {
4168 /* build the path to file we are linking to */
4169 GET_PATH(link_to_path);
4170 if (link_to_path == NULL) {
4171 error = ENOMEM;
4172 goto out2;
4173 }
4174
4175 link_name_len = MAXPATHLEN;
4176 if (vn_getpath(vp, link_to_path, &link_name_len) == 0) {
4177 /*
4178 * Call out to allow 3rd party notification of rename.
4179 * Ignore result of kauth_authorize_fileop call.
4180 */
4181 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_LINK,
4182 (uintptr_t)link_to_path,
4183 (uintptr_t)target_path);
4184 }
4185 if (link_to_path != NULL) {
4186 RELEASE_PATH(link_to_path);
4187 }
4188 }
4189 #if CONFIG_FSE
4190 if (need_event) {
4191 /* construct fsevent */
4192 if (get_fse_info(vp, &finfo, ctx) == 0) {
4193 if (truncated) {
4194 finfo.mode |= FSE_TRUNCATED_PATH;
4195 }
4196
4197 // build the path to the destination of the link
4198 add_fsevent(FSE_CREATE_FILE, ctx,
4199 FSE_ARG_STRING, len, target_path,
4200 FSE_ARG_FINFO, &finfo,
4201 FSE_ARG_DONE);
4202 }
4203 if (vp->v_parent) {
4204 add_fsevent(FSE_STAT_CHANGED, ctx,
4205 FSE_ARG_VNODE, vp->v_parent,
4206 FSE_ARG_DONE);
4207 }
4208 }
4209 #endif
4210 }
4211 out2:
4212 /*
4213 * nameidone has to happen before we vnode_put(dvp)
4214 * since it may need to release the fs_nodelock on the dvp
4215 */
4216 nameidone(&nd);
4217 if (target_path != NULL) {
4218 RELEASE_PATH(target_path);
4219 }
4220 out:
4221 if (lvp)
4222 vnode_put(lvp);
4223 if (dvp)
4224 vnode_put(dvp);
4225 vnode_put(vp);
4226 return (error);
4227 }
4228
4229 int
4230 link(__unused proc_t p, struct link_args *uap, __unused int32_t *retval)
4231 {
4232 return (linkat_internal(vfs_context_current(), AT_FDCWD, uap->path,
4233 AT_FDCWD, uap->link, AT_SYMLINK_FOLLOW, UIO_USERSPACE));
4234 }
4235
4236 int
4237 linkat(__unused proc_t p, struct linkat_args *uap, __unused int32_t *retval)
4238 {
4239 if (uap->flag & ~AT_SYMLINK_FOLLOW)
4240 return (EINVAL);
4241
4242 return (linkat_internal(vfs_context_current(), uap->fd1, uap->path,
4243 uap->fd2, uap->link, uap->flag, UIO_USERSPACE));
4244 }
4245
4246 /*
4247 * Make a symbolic link.
4248 *
4249 * We could add support for ACLs here too...
4250 */
4251 /* ARGSUSED */
4252 static int
4253 symlinkat_internal(vfs_context_t ctx, user_addr_t path_data, int fd,
4254 user_addr_t link, enum uio_seg segflg)
4255 {
4256 struct vnode_attr va;
4257 char *path;
4258 int error;
4259 struct nameidata nd;
4260 vnode_t vp, dvp;
4261 uint32_t dfflags; // Directory file flags
4262 size_t dummy=0;
4263 proc_t p;
4264
4265 error = 0;
4266 if (UIO_SEG_IS_USER_SPACE(segflg)) {
4267 MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
4268 error = copyinstr(path_data, path, MAXPATHLEN, &dummy);
4269 } else {
4270 path = (char *)path_data;
4271 }
4272 if (error)
4273 goto out;
4274 AUDIT_ARG(text, path); /* This is the link string */
4275
4276 NDINIT(&nd, CREATE, OP_SYMLINK, LOCKPARENT | AUDITVNPATH1,
4277 segflg, link, ctx);
4278
4279 error = nameiat(&nd, fd);
4280 if (error)
4281 goto out;
4282 dvp = nd.ni_dvp;
4283 vp = nd.ni_vp;
4284
4285 p = vfs_context_proc(ctx);
4286 VATTR_INIT(&va);
4287 VATTR_SET(&va, va_type, VLNK);
4288 VATTR_SET(&va, va_mode, ACCESSPERMS & ~p->p_fd->fd_cmask);
4289
4290 /*
4291 * Handle inheritance of restricted flag
4292 */
4293 error = vnode_flags(dvp, &dfflags, ctx);
4294 if (error)
4295 goto skipit;
4296 if (dfflags & SF_RESTRICTED)
4297 VATTR_SET(&va, va_flags, SF_RESTRICTED);
4298
4299 #if CONFIG_MACF
4300 error = mac_vnode_check_create(ctx,
4301 dvp, &nd.ni_cnd, &va);
4302 #endif
4303 if (error != 0) {
4304 goto skipit;
4305 }
4306
4307 if (vp != NULL) {
4308 error = EEXIST;
4309 goto skipit;
4310 }
4311
4312 /* authorize */
4313 if (error == 0)
4314 error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
4315 /* get default ownership, etc. */
4316 if (error == 0)
4317 error = vnode_authattr_new(dvp, &va, 0, ctx);
4318 if (error == 0)
4319 error = VNOP_SYMLINK(dvp, &vp, &nd.ni_cnd, &va, path, ctx);
4320
4321 #if CONFIG_MACF
4322 if (error == 0 && vp)
4323 error = vnode_label(vnode_mount(vp), dvp, vp, &nd.ni_cnd, VNODE_LABEL_CREATE, ctx);
4324 #endif
4325
4326 /* do fallback attribute handling */
4327 if (error == 0 && vp)
4328 error = vnode_setattr_fallback(vp, &va, ctx);
4329
4330 if (error == 0) {
4331 int update_flags = 0;
4332
4333 /*check if a new vnode was created, else try to get one*/
4334 if (vp == NULL) {
4335 nd.ni_cnd.cn_nameiop = LOOKUP;
4336 #if CONFIG_TRIGGERS
4337 nd.ni_op = OP_LOOKUP;
4338 #endif
4339 nd.ni_cnd.cn_flags = 0;
4340 error = nameiat(&nd, fd);
4341 vp = nd.ni_vp;
4342
4343 if (vp == NULL)
4344 goto skipit;
4345 }
4346
4347 #if 0 /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
4348 /* call out to allow 3rd party notification of rename.
4349 * Ignore result of kauth_authorize_fileop call.
4350 */
4351 if (kauth_authorize_fileop_has_listeners() &&
4352 namei(&nd) == 0) {
4353 char *new_link_path = NULL;
4354 int len;
4355
4356 /* build the path to the new link file */
4357 new_link_path = get_pathbuff();
4358 len = MAXPATHLEN;
4359 vn_getpath(dvp, new_link_path, &len);
4360 if ((len + 1 + nd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) {
4361 new_link_path[len - 1] = '/';
4362 strlcpy(&new_link_path[len], nd.ni_cnd.cn_nameptr, MAXPATHLEN-len);
4363 }
4364
4365 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_SYMLINK,
4366 (uintptr_t)path, (uintptr_t)new_link_path);
4367 if (new_link_path != NULL)
4368 release_pathbuff(new_link_path);
4369 }
4370 #endif
4371 // Make sure the name & parent pointers are hooked up
4372 if (vp->v_name == NULL)
4373 update_flags |= VNODE_UPDATE_NAME;
4374 if (vp->v_parent == NULLVP)
4375 update_flags |= VNODE_UPDATE_PARENT;
4376
4377 if (update_flags)
4378 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
4379
4380 #if CONFIG_FSE
4381 add_fsevent(FSE_CREATE_FILE, ctx,
4382 FSE_ARG_VNODE, vp,
4383 FSE_ARG_DONE);
4384 #endif
4385 }
4386
4387 skipit:
4388 /*
4389 * nameidone has to happen before we vnode_put(dvp)
4390 * since it may need to release the fs_nodelock on the dvp
4391 */
4392 nameidone(&nd);
4393
4394 if (vp)
4395 vnode_put(vp);
4396 vnode_put(dvp);
4397 out:
4398 if (path && (path != (char *)path_data))
4399 FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
4400
4401 return (error);
4402 }
4403
4404 int
4405 symlink(__unused proc_t p, struct symlink_args *uap, __unused int32_t *retval)
4406 {
4407 return (symlinkat_internal(vfs_context_current(), uap->path, AT_FDCWD,
4408 uap->link, UIO_USERSPACE));
4409 }
4410
4411 int
4412 symlinkat(__unused proc_t p, struct symlinkat_args *uap,
4413 __unused int32_t *retval)
4414 {
4415 return (symlinkat_internal(vfs_context_current(), uap->path1, uap->fd,
4416 uap->path2, UIO_USERSPACE));
4417 }
4418
4419 /*
4420 * Delete a whiteout from the filesystem.
4421 * No longer supported.
4422 */
4423 int
4424 undelete(__unused proc_t p, __unused struct undelete_args *uap, __unused int32_t *retval)
4425 {
4426 return (ENOTSUP);
4427 }
4428
4429 /*
4430 * Delete a name from the filesystem.
4431 */
4432 /* ARGSUSED */
4433 static int
4434 unlinkat_internal(vfs_context_t ctx, int fd, vnode_t start_dvp,
4435 user_addr_t path_arg, enum uio_seg segflg, int unlink_flags)
4436 {
4437 struct nameidata nd;
4438 vnode_t vp, dvp;
4439 int error;
4440 struct componentname *cnp;
4441 char *path = NULL;
4442 int len=0;
4443 #if CONFIG_FSE
4444 fse_info finfo;
4445 struct vnode_attr va;
4446 #endif
4447 int flags;
4448 int need_event;
4449 int has_listeners;
4450 int truncated_path;
4451 int batched;
4452 struct vnode_attr *vap;
4453 int do_retry;
4454 int retry_count = 0;
4455 int cn_flags;
4456
4457 cn_flags = LOCKPARENT;
4458 if (!(unlink_flags & VNODE_REMOVE_NO_AUDIT_PATH))
4459 cn_flags |= AUDITVNPATH1;
4460 /* If a starting dvp is passed, it trumps any fd passed. */
4461 if (start_dvp)
4462 cn_flags |= USEDVP;
4463
4464 #if NAMEDRSRCFORK
4465 /* unlink or delete is allowed on rsrc forks and named streams */
4466 cn_flags |= CN_ALLOWRSRCFORK;
4467 #endif
4468
4469 retry:
4470 do_retry = 0;
4471 flags = 0;
4472 need_event = 0;
4473 has_listeners = 0;
4474 truncated_path = 0;
4475 vap = NULL;
4476
4477 NDINIT(&nd, DELETE, OP_UNLINK, cn_flags, segflg, path_arg, ctx);
4478
4479 nd.ni_dvp = start_dvp;
4480 nd.ni_flag |= NAMEI_COMPOUNDREMOVE;
4481 cnp = &nd.ni_cnd;
4482
4483 lookup_continue:
4484 error = nameiat(&nd, fd);
4485 if (error)
4486 return (error);
4487
4488 dvp = nd.ni_dvp;
4489 vp = nd.ni_vp;
4490
4491
4492 /* With Carbon delete semantics, busy files cannot be deleted */
4493 if (unlink_flags & VNODE_REMOVE_NODELETEBUSY) {
4494 flags |= VNODE_REMOVE_NODELETEBUSY;
4495 }
4496
4497 /* Skip any potential upcalls if told to. */
4498 if (unlink_flags & VNODE_REMOVE_SKIP_NAMESPACE_EVENT) {
4499 flags |= VNODE_REMOVE_SKIP_NAMESPACE_EVENT;
4500 }
4501
4502 if (vp) {
4503 batched = vnode_compound_remove_available(vp);
4504 /*
4505 * The root of a mounted filesystem cannot be deleted.
4506 */
4507 if (vp->v_flag & VROOT) {
4508 error = EBUSY;
4509 }
4510
4511 if (!batched) {
4512 error = vn_authorize_unlink(dvp, vp, cnp, ctx, NULL);
4513 if (error) {
4514 if (error == ENOENT) {
4515 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
4516 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
4517 do_retry = 1;
4518 retry_count++;
4519 }
4520 }
4521 goto out;
4522 }
4523 }
4524 } else {
4525 batched = 1;
4526
4527 if (!vnode_compound_remove_available(dvp)) {
4528 panic("No vp, but no compound remove?");
4529 }
4530 }
4531
4532 #if CONFIG_FSE
4533 need_event = need_fsevent(FSE_DELETE, dvp);
4534 if (need_event) {
4535 if (!batched) {
4536 if ((vp->v_flag & VISHARDLINK) == 0) {
4537 /* XXX need to get these data in batched VNOP */
4538 get_fse_info(vp, &finfo, ctx);
4539 }
4540 } else {
4541 error = vfs_get_notify_attributes(&va);
4542 if (error) {
4543 goto out;
4544 }
4545
4546 vap = &va;
4547 }
4548 }
4549 #endif
4550 has_listeners = kauth_authorize_fileop_has_listeners();
4551 if (need_event || has_listeners) {
4552 if (path == NULL) {
4553 GET_PATH(path);
4554 if (path == NULL) {
4555 error = ENOMEM;
4556 goto out;
4557 }
4558 }
4559 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated_path);
4560 }
4561
4562 #if NAMEDRSRCFORK
4563 if (nd.ni_cnd.cn_flags & CN_WANTSRSRCFORK)
4564 error = vnode_removenamedstream(dvp, vp, XATTR_RESOURCEFORK_NAME, 0, ctx);
4565 else
4566 #endif
4567 {
4568 error = vn_remove(dvp, &nd.ni_vp, &nd, flags, vap, ctx);
4569 vp = nd.ni_vp;
4570 if (error == EKEEPLOOKING) {
4571 if (!batched) {
4572 panic("EKEEPLOOKING, but not a filesystem that supports compound VNOPs?");
4573 }
4574
4575 if ((nd.ni_flag & NAMEI_CONTLOOKUP) == 0) {
4576 panic("EKEEPLOOKING, but continue flag not set?");
4577 }
4578
4579 if (vnode_isdir(vp)) {
4580 error = EISDIR;
4581 goto out;
4582 }
4583 goto lookup_continue;
4584 } else if (error == ENOENT && batched) {
4585 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
4586 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
4587 /*
4588 * For compound VNOPs, the authorization callback may
4589 * return ENOENT in case of racing hardlink lookups
4590 * hitting the name cache, redrive the lookup.
4591 */
4592 do_retry = 1;
4593 retry_count += 1;
4594 goto out;
4595 }
4596 }
4597 }
4598
4599 /*
4600 * Call out to allow 3rd party notification of delete.
4601 * Ignore result of kauth_authorize_fileop call.
4602 */
4603 if (!error) {
4604 if (has_listeners) {
4605 kauth_authorize_fileop(vfs_context_ucred(ctx),
4606 KAUTH_FILEOP_DELETE,
4607 (uintptr_t)vp,
4608 (uintptr_t)path);
4609 }
4610
4611 if (vp->v_flag & VISHARDLINK) {
4612 //
4613 // if a hardlink gets deleted we want to blow away the
4614 // v_parent link because the path that got us to this
4615 // instance of the link is no longer valid. this will
4616 // force the next call to get the path to ask the file
4617 // system instead of just following the v_parent link.
4618 //
4619 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
4620 }
4621
4622 #if CONFIG_FSE
4623 if (need_event) {
4624 if (vp->v_flag & VISHARDLINK) {
4625 get_fse_info(vp, &finfo, ctx);
4626 } else if (vap) {
4627 vnode_get_fse_info_from_vap(vp, &finfo, vap);
4628 }
4629 if (truncated_path) {
4630 finfo.mode |= FSE_TRUNCATED_PATH;
4631 }
4632 add_fsevent(FSE_DELETE, ctx,
4633 FSE_ARG_STRING, len, path,
4634 FSE_ARG_FINFO, &finfo,
4635 FSE_ARG_DONE);
4636 }
4637 #endif
4638 }
4639
4640 out:
4641 if (path != NULL)
4642 RELEASE_PATH(path);
4643
4644 #if NAMEDRSRCFORK
4645 /* recycle the deleted rsrc fork vnode to force a reclaim, which
4646 * will cause its shadow file to go away if necessary.
4647 */
4648 if (vp && (vnode_isnamedstream(vp)) &&
4649 (vp->v_parent != NULLVP) &&
4650 vnode_isshadow(vp)) {
4651 vnode_recycle(vp);
4652 }
4653 #endif
4654 /*
4655 * nameidone has to happen before we vnode_put(dvp)
4656 * since it may need to release the fs_nodelock on the dvp
4657 */
4658 nameidone(&nd);
4659 vnode_put(dvp);
4660 if (vp) {
4661 vnode_put(vp);
4662 }
4663
4664 if (do_retry) {
4665 goto retry;
4666 }
4667
4668 return (error);
4669 }
4670
4671 int
4672 unlink1(vfs_context_t ctx, vnode_t start_dvp, user_addr_t path_arg,
4673 enum uio_seg segflg, int unlink_flags)
4674 {
4675 return (unlinkat_internal(ctx, AT_FDCWD, start_dvp, path_arg, segflg,
4676 unlink_flags));
4677 }
4678
4679 /*
4680 * Delete a name from the filesystem using Carbon semantics.
4681 */
4682 int
4683 delete(__unused proc_t p, struct delete_args *uap, __unused int32_t *retval)
4684 {
4685 return (unlinkat_internal(vfs_context_current(), AT_FDCWD, NULLVP,
4686 uap->path, UIO_USERSPACE, VNODE_REMOVE_NODELETEBUSY));
4687 }
4688
4689 /*
4690 * Delete a name from the filesystem using POSIX semantics.
4691 */
4692 int
4693 unlink(__unused proc_t p, struct unlink_args *uap, __unused int32_t *retval)
4694 {
4695 return (unlinkat_internal(vfs_context_current(), AT_FDCWD, NULLVP,
4696 uap->path, UIO_USERSPACE, 0));
4697 }
4698
4699 int
4700 unlinkat(__unused proc_t p, struct unlinkat_args *uap, __unused int32_t *retval)
4701 {
4702 if (uap->flag & ~AT_REMOVEDIR)
4703 return (EINVAL);
4704
4705 if (uap->flag & AT_REMOVEDIR)
4706 return (rmdirat_internal(vfs_context_current(), uap->fd,
4707 uap->path, UIO_USERSPACE));
4708 else
4709 return (unlinkat_internal(vfs_context_current(), uap->fd,
4710 NULLVP, uap->path, UIO_USERSPACE, 0));
4711 }
4712
4713 /*
4714 * Reposition read/write file offset.
4715 */
4716 int
4717 lseek(proc_t p, struct lseek_args *uap, off_t *retval)
4718 {
4719 struct fileproc *fp;
4720 vnode_t vp;
4721 struct vfs_context *ctx;
4722 off_t offset = uap->offset, file_size;
4723 int error;
4724
4725 if ( (error = fp_getfvp(p,uap->fd, &fp, &vp)) ) {
4726 if (error == ENOTSUP)
4727 return (ESPIPE);
4728 return (error);
4729 }
4730 if (vnode_isfifo(vp)) {
4731 file_drop(uap->fd);
4732 return(ESPIPE);
4733 }
4734
4735
4736 ctx = vfs_context_current();
4737 #if CONFIG_MACF
4738 if (uap->whence == L_INCR && uap->offset == 0)
4739 error = mac_file_check_get_offset(vfs_context_ucred(ctx),
4740 fp->f_fglob);
4741 else
4742 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
4743 fp->f_fglob);
4744 if (error) {
4745 file_drop(uap->fd);
4746 return (error);
4747 }
4748 #endif
4749 if ( (error = vnode_getwithref(vp)) ) {
4750 file_drop(uap->fd);
4751 return(error);
4752 }
4753
4754 switch (uap->whence) {
4755 case L_INCR:
4756 offset += fp->f_fglob->fg_offset;
4757 break;
4758 case L_XTND:
4759 if ((error = vnode_size(vp, &file_size, ctx)) != 0)
4760 break;
4761 offset += file_size;
4762 break;
4763 case L_SET:
4764 break;
4765 default:
4766 error = EINVAL;
4767 }
4768 if (error == 0) {
4769 if (uap->offset > 0 && offset < 0) {
4770 /* Incremented/relative move past max size */
4771 error = EOVERFLOW;
4772 } else {
4773 /*
4774 * Allow negative offsets on character devices, per
4775 * POSIX 1003.1-2001. Most likely for writing disk
4776 * labels.
4777 */
4778 if (offset < 0 && vp->v_type != VCHR) {
4779 /* Decremented/relative move before start */
4780 error = EINVAL;
4781 } else {
4782 /* Success */
4783 fp->f_fglob->fg_offset = offset;
4784 *retval = fp->f_fglob->fg_offset;
4785 }
4786 }
4787 }
4788
4789 /*
4790 * An lseek can affect whether data is "available to read." Use
4791 * hint of NOTE_NONE so no EVFILT_VNODE events fire
4792 */
4793 post_event_if_success(vp, error, NOTE_NONE);
4794 (void)vnode_put(vp);
4795 file_drop(uap->fd);
4796 return (error);
4797 }
4798
4799
4800 /*
4801 * Check access permissions.
4802 *
4803 * Returns: 0 Success
4804 * vnode_authorize:???
4805 */
4806 static int
4807 access1(vnode_t vp, vnode_t dvp, int uflags, vfs_context_t ctx)
4808 {
4809 kauth_action_t action;
4810 int error;
4811
4812 /*
4813 * If just the regular access bits, convert them to something
4814 * that vnode_authorize will understand.
4815 */
4816 if (!(uflags & _ACCESS_EXTENDED_MASK)) {
4817 action = 0;
4818 if (uflags & R_OK)
4819 action |= KAUTH_VNODE_READ_DATA; /* aka KAUTH_VNODE_LIST_DIRECTORY */
4820 if (uflags & W_OK) {
4821 if (vnode_isdir(vp)) {
4822 action |= KAUTH_VNODE_ADD_FILE |
4823 KAUTH_VNODE_ADD_SUBDIRECTORY;
4824 /* might want delete rights here too */
4825 } else {
4826 action |= KAUTH_VNODE_WRITE_DATA;
4827 }
4828 }
4829 if (uflags & X_OK) {
4830 if (vnode_isdir(vp)) {
4831 action |= KAUTH_VNODE_SEARCH;
4832 } else {
4833 action |= KAUTH_VNODE_EXECUTE;
4834 }
4835 }
4836 } else {
4837 /* take advantage of definition of uflags */
4838 action = uflags >> 8;
4839 }
4840
4841 #if CONFIG_MACF
4842 error = mac_vnode_check_access(ctx, vp, uflags);
4843 if (error)
4844 return (error);
4845 #endif /* MAC */
4846
4847 /* action == 0 means only check for existence */
4848 if (action != 0) {
4849 error = vnode_authorize(vp, dvp, action | KAUTH_VNODE_ACCESS, ctx);
4850 } else {
4851 error = 0;
4852 }
4853
4854 return(error);
4855 }
4856
4857
4858
4859 /*
4860 * access_extended: Check access permissions in bulk.
4861 *
4862 * Description: uap->entries Pointer to an array of accessx
4863 * descriptor structs, plus one or
4864 * more NULL terminated strings (see
4865 * "Notes" section below).
4866 * uap->size Size of the area pointed to by
4867 * uap->entries.
4868 * uap->results Pointer to the results array.
4869 *
4870 * Returns: 0 Success
4871 * ENOMEM Insufficient memory
4872 * EINVAL Invalid arguments
4873 * namei:EFAULT Bad address
4874 * namei:ENAMETOOLONG Filename too long
4875 * namei:ENOENT No such file or directory
4876 * namei:ELOOP Too many levels of symbolic links
4877 * namei:EBADF Bad file descriptor
4878 * namei:ENOTDIR Not a directory
4879 * namei:???
4880 * access1:
4881 *
4882 * Implicit returns:
4883 * uap->results Array contents modified
4884 *
4885 * Notes: The uap->entries are structured as an arbitrary length array
4886 * of accessx descriptors, followed by one or more NULL terminated
4887 * strings
4888 *
4889 * struct accessx_descriptor[0]
4890 * ...
4891 * struct accessx_descriptor[n]
4892 * char name_data[0];
4893 *
4894 * We determine the entry count by walking the buffer containing
4895 * the uap->entries argument descriptor. For each descriptor we
4896 * see, the valid values for the offset ad_name_offset will be
4897 * in the byte range:
4898 *
4899 * [ uap->entries + sizeof(struct accessx_descriptor) ]
4900 * to
4901 * [ uap->entries + uap->size - 2 ]
4902 *
4903 * since we must have at least one string, and the string must
4904 * be at least one character plus the NULL terminator in length.
4905 *
4906 * XXX: Need to support the check-as uid argument
4907 */
4908 int
4909 access_extended(__unused proc_t p, struct access_extended_args *uap, __unused int32_t *retval)
4910 {
4911 struct accessx_descriptor *input = NULL;
4912 errno_t *result = NULL;
4913 errno_t error = 0;
4914 int wantdelete = 0;
4915 unsigned int desc_max, desc_actual, i, j;
4916 struct vfs_context context;
4917 struct nameidata nd;
4918 int niopts;
4919 vnode_t vp = NULL;
4920 vnode_t dvp = NULL;
4921 #define ACCESSX_MAX_DESCR_ON_STACK 10
4922 struct accessx_descriptor stack_input[ACCESSX_MAX_DESCR_ON_STACK];
4923
4924 context.vc_ucred = NULL;
4925
4926 /*
4927 * Validate parameters; if valid, copy the descriptor array and string
4928 * arguments into local memory. Before proceeding, the following
4929 * conditions must have been met:
4930 *
4931 * o The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
4932 * o There must be sufficient room in the request for at least one
4933 * descriptor and a one yte NUL terminated string.
4934 * o The allocation of local storage must not fail.
4935 */
4936 if (uap->size > ACCESSX_MAX_TABLESIZE)
4937 return(ENOMEM);
4938 if (uap->size < (sizeof(struct accessx_descriptor) + 2))
4939 return(EINVAL);
4940 if (uap->size <= sizeof (stack_input)) {
4941 input = stack_input;
4942 } else {
4943 MALLOC(input, struct accessx_descriptor *, uap->size, M_TEMP, M_WAITOK);
4944 if (input == NULL) {
4945 error = ENOMEM;
4946 goto out;
4947 }
4948 }
4949 error = copyin(uap->entries, input, uap->size);
4950 if (error)
4951 goto out;
4952
4953 AUDIT_ARG(opaque, input, uap->size);
4954
4955 /*
4956 * Force NUL termination of the copyin buffer to avoid nami() running
4957 * off the end. If the caller passes us bogus data, they may get a
4958 * bogus result.
4959 */
4960 ((char *)input)[uap->size - 1] = 0;
4961
4962 /*
4963 * Access is defined as checking against the process' real identity,
4964 * even if operations are checking the effective identity. This
4965 * requires that we use a local vfs context.
4966 */
4967 context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
4968 context.vc_thread = current_thread();
4969
4970 /*
4971 * Find out how many entries we have, so we can allocate the result
4972 * array by walking the list and adjusting the count downward by the
4973 * earliest string offset we see.
4974 */
4975 desc_max = (uap->size - 2) / sizeof(struct accessx_descriptor);
4976 desc_actual = desc_max;
4977 for (i = 0; i < desc_actual; i++) {
4978 /*
4979 * Take the offset to the name string for this entry and
4980 * convert to an input array index, which would be one off
4981 * the end of the array if this entry was the lowest-addressed
4982 * name string.
4983 */
4984 j = input[i].ad_name_offset / sizeof(struct accessx_descriptor);
4985
4986 /*
4987 * An offset greater than the max allowable offset is an error.
4988 * It is also an error for any valid entry to point
4989 * to a location prior to the end of the current entry, if
4990 * it's not a reference to the string of the previous entry.
4991 */
4992 if (j > desc_max || (j != 0 && j <= i)) {
4993 error = EINVAL;
4994 goto out;
4995 }
4996
4997 /*
4998 * An offset of 0 means use the previous descriptor's offset;
4999 * this is used to chain multiple requests for the same file
5000 * to avoid multiple lookups.
5001 */
5002 if (j == 0) {
5003 /* This is not valid for the first entry */
5004 if (i == 0) {
5005 error = EINVAL;
5006 goto out;
5007 }
5008 continue;
5009 }
5010
5011 /*
5012 * If the offset of the string for this descriptor is before
5013 * what we believe is the current actual last descriptor,
5014 * then we need to adjust our estimate downward; this permits
5015 * the string table following the last descriptor to be out
5016 * of order relative to the descriptor list.
5017 */
5018 if (j < desc_actual)
5019 desc_actual = j;
5020 }
5021
5022 /*
5023 * We limit the actual number of descriptors we are willing to process
5024 * to a hard maximum of ACCESSX_MAX_DESCRIPTORS. If the number being
5025 * requested does not exceed this limit,
5026 */
5027 if (desc_actual > ACCESSX_MAX_DESCRIPTORS) {
5028 error = ENOMEM;
5029 goto out;
5030 }
5031 MALLOC(result, errno_t *, desc_actual * sizeof(errno_t), M_TEMP, M_WAITOK);
5032 if (result == NULL) {
5033 error = ENOMEM;
5034 goto out;
5035 }
5036
5037 /*
5038 * Do the work by iterating over the descriptor entries we know to
5039 * at least appear to contain valid data.
5040 */
5041 error = 0;
5042 for (i = 0; i < desc_actual; i++) {
5043 /*
5044 * If the ad_name_offset is 0, then we use the previous
5045 * results to make the check; otherwise, we are looking up
5046 * a new file name.
5047 */
5048 if (input[i].ad_name_offset != 0) {
5049 /* discard old vnodes */
5050 if (vp) {
5051 vnode_put(vp);
5052 vp = NULL;
5053 }
5054 if (dvp) {
5055 vnode_put(dvp);
5056 dvp = NULL;
5057 }
5058
5059 /*
5060 * Scan forward in the descriptor list to see if we
5061 * need the parent vnode. We will need it if we are
5062 * deleting, since we must have rights to remove
5063 * entries in the parent directory, as well as the
5064 * rights to delete the object itself.
5065 */
5066 wantdelete = input[i].ad_flags & _DELETE_OK;
5067 for (j = i + 1; (j < desc_actual) && (input[j].ad_name_offset == 0); j++)
5068 if (input[j].ad_flags & _DELETE_OK)
5069 wantdelete = 1;
5070
5071 niopts = FOLLOW | AUDITVNPATH1;
5072
5073 /* need parent for vnode_authorize for deletion test */
5074 if (wantdelete)
5075 niopts |= WANTPARENT;
5076
5077 /* do the lookup */
5078 NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, UIO_SYSSPACE,
5079 CAST_USER_ADDR_T(((const char *)input) + input[i].ad_name_offset),
5080 &context);
5081 error = namei(&nd);
5082 if (!error) {
5083 vp = nd.ni_vp;
5084 if (wantdelete)
5085 dvp = nd.ni_dvp;
5086 }
5087 nameidone(&nd);
5088 }
5089
5090 /*
5091 * Handle lookup errors.
5092 */
5093 switch(error) {
5094 case ENOENT:
5095 case EACCES:
5096 case EPERM:
5097 case ENOTDIR:
5098 result[i] = error;
5099 break;
5100 case 0:
5101 /* run this access check */
5102 result[i] = access1(vp, dvp, input[i].ad_flags, &context);
5103 break;
5104 default:
5105 /* fatal lookup error */
5106
5107 goto out;
5108 }
5109 }
5110
5111 AUDIT_ARG(data, result, sizeof(errno_t), desc_actual);
5112
5113 /* copy out results */
5114 error = copyout(result, uap->results, desc_actual * sizeof(errno_t));
5115
5116 out:
5117 if (input && input != stack_input)
5118 FREE(input, M_TEMP);
5119 if (result)
5120 FREE(result, M_TEMP);
5121 if (vp)
5122 vnode_put(vp);
5123 if (dvp)
5124 vnode_put(dvp);
5125 if (IS_VALID_CRED(context.vc_ucred))
5126 kauth_cred_unref(&context.vc_ucred);
5127 return(error);
5128 }
5129
5130
5131 /*
5132 * Returns: 0 Success
5133 * namei:EFAULT Bad address
5134 * namei:ENAMETOOLONG Filename too long
5135 * namei:ENOENT No such file or directory
5136 * namei:ELOOP Too many levels of symbolic links
5137 * namei:EBADF Bad file descriptor
5138 * namei:ENOTDIR Not a directory
5139 * namei:???
5140 * access1:
5141 */
5142 static int
5143 faccessat_internal(vfs_context_t ctx, int fd, user_addr_t path, int amode,
5144 int flag, enum uio_seg segflg)
5145 {
5146 int error;
5147 struct nameidata nd;
5148 int niopts;
5149 struct vfs_context context;
5150 #if NAMEDRSRCFORK
5151 int is_namedstream = 0;
5152 #endif
5153
5154 /*
5155 * Unless the AT_EACCESS option is used, Access is defined as checking
5156 * against the process' real identity, even if operations are checking
5157 * the effective identity. So we need to tweak the credential
5158 * in the context for that case.
5159 */
5160 if (!(flag & AT_EACCESS))
5161 context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
5162 else
5163 context.vc_ucred = ctx->vc_ucred;
5164 context.vc_thread = ctx->vc_thread;
5165
5166
5167 niopts = FOLLOW | AUDITVNPATH1;
5168 /* need parent for vnode_authorize for deletion test */
5169 if (amode & _DELETE_OK)
5170 niopts |= WANTPARENT;
5171 NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, segflg,
5172 path, &context);
5173
5174 #if NAMEDRSRCFORK
5175 /* access(F_OK) calls are allowed for resource forks. */
5176 if (amode == F_OK)
5177 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
5178 #endif
5179 error = nameiat(&nd, fd);
5180 if (error)
5181 goto out;
5182
5183 #if NAMEDRSRCFORK
5184 /* Grab reference on the shadow stream file vnode to
5185 * force an inactive on release which will mark it
5186 * for recycle.
5187 */
5188 if (vnode_isnamedstream(nd.ni_vp) &&
5189 (nd.ni_vp->v_parent != NULLVP) &&
5190 vnode_isshadow(nd.ni_vp)) {
5191 is_namedstream = 1;
5192 vnode_ref(nd.ni_vp);
5193 }
5194 #endif
5195
5196 error = access1(nd.ni_vp, nd.ni_dvp, amode, &context);
5197
5198 #if NAMEDRSRCFORK
5199 if (is_namedstream) {
5200 vnode_rele(nd.ni_vp);
5201 }
5202 #endif
5203
5204 vnode_put(nd.ni_vp);
5205 if (amode & _DELETE_OK)
5206 vnode_put(nd.ni_dvp);
5207 nameidone(&nd);
5208
5209 out:
5210 if (!(flag & AT_EACCESS))
5211 kauth_cred_unref(&context.vc_ucred);
5212 return (error);
5213 }
5214
5215 int
5216 access(__unused proc_t p, struct access_args *uap, __unused int32_t *retval)
5217 {
5218 return (faccessat_internal(vfs_context_current(), AT_FDCWD,
5219 uap->path, uap->flags, 0, UIO_USERSPACE));
5220 }
5221
5222 int
5223 faccessat(__unused proc_t p, struct faccessat_args *uap,
5224 __unused int32_t *retval)
5225 {
5226 if (uap->flag & ~AT_EACCESS)
5227 return (EINVAL);
5228
5229 return (faccessat_internal(vfs_context_current(), uap->fd,
5230 uap->path, uap->amode, uap->flag, UIO_USERSPACE));
5231 }
5232
5233 /*
5234 * Returns: 0 Success
5235 * EFAULT
5236 * copyout:EFAULT
5237 * namei:???
5238 * vn_stat:???
5239 */
5240 static int
5241 fstatat_internal(vfs_context_t ctx, user_addr_t path, user_addr_t ub,
5242 user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64,
5243 enum uio_seg segflg, int fd, int flag)
5244 {
5245 struct nameidata nd;
5246 int follow;
5247 union {
5248 struct stat sb;
5249 struct stat64 sb64;
5250 } source;
5251 union {
5252 struct user64_stat user64_sb;
5253 struct user32_stat user32_sb;
5254 struct user64_stat64 user64_sb64;
5255 struct user32_stat64 user32_sb64;
5256 } dest;
5257 caddr_t sbp;
5258 int error, my_size;
5259 kauth_filesec_t fsec;
5260 size_t xsecurity_bufsize;
5261 void * statptr;
5262
5263 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
5264 NDINIT(&nd, LOOKUP, OP_GETATTR, follow | AUDITVNPATH1,
5265 segflg, path, ctx);
5266
5267 #if NAMEDRSRCFORK
5268 int is_namedstream = 0;
5269 /* stat calls are allowed for resource forks. */
5270 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
5271 #endif
5272 error = nameiat(&nd, fd);
5273 if (error)
5274 return (error);
5275 fsec = KAUTH_FILESEC_NONE;
5276
5277 statptr = (void *)&source;
5278
5279 #if NAMEDRSRCFORK
5280 /* Grab reference on the shadow stream file vnode to
5281 * force an inactive on release which will mark it
5282 * for recycle.
5283 */
5284 if (vnode_isnamedstream(nd.ni_vp) &&
5285 (nd.ni_vp->v_parent != NULLVP) &&
5286 vnode_isshadow(nd.ni_vp)) {
5287 is_namedstream = 1;
5288 vnode_ref(nd.ni_vp);
5289 }
5290 #endif
5291
5292 error = vn_stat(nd.ni_vp, statptr, (xsecurity != USER_ADDR_NULL ? &fsec : NULL), isstat64, ctx);
5293
5294 #if NAMEDRSRCFORK
5295 if (is_namedstream) {
5296 vnode_rele(nd.ni_vp);
5297 }
5298 #endif
5299 vnode_put(nd.ni_vp);
5300 nameidone(&nd);
5301
5302 if (error)
5303 return (error);
5304 /* Zap spare fields */
5305 if (isstat64 != 0) {
5306 source.sb64.st_lspare = 0;
5307 source.sb64.st_qspare[0] = 0LL;
5308 source.sb64.st_qspare[1] = 0LL;
5309 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
5310 munge_user64_stat64(&source.sb64, &dest.user64_sb64);
5311 my_size = sizeof(dest.user64_sb64);
5312 sbp = (caddr_t)&dest.user64_sb64;
5313 } else {
5314 munge_user32_stat64(&source.sb64, &dest.user32_sb64);
5315 my_size = sizeof(dest.user32_sb64);
5316 sbp = (caddr_t)&dest.user32_sb64;
5317 }
5318 /*
5319 * Check if we raced (post lookup) against the last unlink of a file.
5320 */
5321 if ((source.sb64.st_nlink == 0) && S_ISREG(source.sb64.st_mode)) {
5322 source.sb64.st_nlink = 1;
5323 }
5324 } else {
5325 source.sb.st_lspare = 0;
5326 source.sb.st_qspare[0] = 0LL;
5327 source.sb.st_qspare[1] = 0LL;
5328 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
5329 munge_user64_stat(&source.sb, &dest.user64_sb);
5330 my_size = sizeof(dest.user64_sb);
5331 sbp = (caddr_t)&dest.user64_sb;
5332 } else {
5333 munge_user32_stat(&source.sb, &dest.user32_sb);
5334 my_size = sizeof(dest.user32_sb);
5335 sbp = (caddr_t)&dest.user32_sb;
5336 }
5337
5338 /*
5339 * Check if we raced (post lookup) against the last unlink of a file.
5340 */
5341 if ((source.sb.st_nlink == 0) && S_ISREG(source.sb.st_mode)) {
5342 source.sb.st_nlink = 1;
5343 }
5344 }
5345 if ((error = copyout(sbp, ub, my_size)) != 0)
5346 goto out;
5347
5348 /* caller wants extended security information? */
5349 if (xsecurity != USER_ADDR_NULL) {
5350
5351 /* did we get any? */
5352 if (fsec == KAUTH_FILESEC_NONE) {
5353 if (susize(xsecurity_size, 0) != 0) {
5354 error = EFAULT;
5355 goto out;
5356 }
5357 } else {
5358 /* find the user buffer size */
5359 xsecurity_bufsize = fusize(xsecurity_size);
5360
5361 /* copy out the actual data size */
5362 if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
5363 error = EFAULT;
5364 goto out;
5365 }
5366
5367 /* if the caller supplied enough room, copy out to it */
5368 if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec))
5369 error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
5370 }
5371 }
5372 out:
5373 if (fsec != KAUTH_FILESEC_NONE)
5374 kauth_filesec_free(fsec);
5375 return (error);
5376 }
5377
5378 /*
5379 * stat_extended: Get file status; with extended security (ACL).
5380 *
5381 * Parameters: p (ignored)
5382 * uap User argument descriptor (see below)
5383 * retval (ignored)
5384 *
5385 * Indirect: uap->path Path of file to get status from
5386 * uap->ub User buffer (holds file status info)
5387 * uap->xsecurity ACL to get (extended security)
5388 * uap->xsecurity_size Size of ACL
5389 *
5390 * Returns: 0 Success
5391 * !0 errno value
5392 *
5393 */
5394 int
5395 stat_extended(__unused proc_t p, struct stat_extended_args *uap,
5396 __unused int32_t *retval)
5397 {
5398 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5399 uap->xsecurity, uap->xsecurity_size, 0, UIO_USERSPACE, AT_FDCWD,
5400 0));
5401 }
5402
5403 /*
5404 * Returns: 0 Success
5405 * fstatat_internal:??? [see fstatat_internal() in this file]
5406 */
5407 int
5408 stat(__unused proc_t p, struct stat_args *uap, __unused int32_t *retval)
5409 {
5410 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5411 0, 0, 0, UIO_USERSPACE, AT_FDCWD, 0));
5412 }
5413
5414 int
5415 stat64(__unused proc_t p, struct stat64_args *uap, __unused int32_t *retval)
5416 {
5417 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5418 0, 0, 1, UIO_USERSPACE, AT_FDCWD, 0));
5419 }
5420
5421 /*
5422 * stat64_extended: Get file status; can handle large inode numbers; with extended security (ACL).
5423 *
5424 * Parameters: p (ignored)
5425 * uap User argument descriptor (see below)
5426 * retval (ignored)
5427 *
5428 * Indirect: uap->path Path of file to get status from
5429 * uap->ub User buffer (holds file status info)
5430 * uap->xsecurity ACL to get (extended security)
5431 * uap->xsecurity_size Size of ACL
5432 *
5433 * Returns: 0 Success
5434 * !0 errno value
5435 *
5436 */
5437 int
5438 stat64_extended(__unused proc_t p, struct stat64_extended_args *uap, __unused int32_t *retval)
5439 {
5440 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5441 uap->xsecurity, uap->xsecurity_size, 1, UIO_USERSPACE, AT_FDCWD,
5442 0));
5443 }
5444
5445 /*
5446 * lstat_extended: Get file status; does not follow links; with extended security (ACL).
5447 *
5448 * Parameters: p (ignored)
5449 * uap User argument descriptor (see below)
5450 * retval (ignored)
5451 *
5452 * Indirect: uap->path Path of file to get status from
5453 * uap->ub User buffer (holds file status info)
5454 * uap->xsecurity ACL to get (extended security)
5455 * uap->xsecurity_size Size of ACL
5456 *
5457 * Returns: 0 Success
5458 * !0 errno value
5459 *
5460 */
5461 int
5462 lstat_extended(__unused proc_t p, struct lstat_extended_args *uap, __unused int32_t *retval)
5463 {
5464 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5465 uap->xsecurity, uap->xsecurity_size, 0, UIO_USERSPACE, AT_FDCWD,
5466 AT_SYMLINK_NOFOLLOW));
5467 }
5468
5469 /*
5470 * Get file status; this version does not follow links.
5471 */
5472 int
5473 lstat(__unused proc_t p, struct lstat_args *uap, __unused int32_t *retval)
5474 {
5475 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5476 0, 0, 0, UIO_USERSPACE, AT_FDCWD, AT_SYMLINK_NOFOLLOW));
5477 }
5478
5479 int
5480 lstat64(__unused proc_t p, struct lstat64_args *uap, __unused int32_t *retval)
5481 {
5482 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5483 0, 0, 1, UIO_USERSPACE, AT_FDCWD, AT_SYMLINK_NOFOLLOW));
5484 }
5485
5486 /*
5487 * lstat64_extended: Get file status; can handle large inode numbers; does not
5488 * follow links; with extended security (ACL).
5489 *
5490 * Parameters: p (ignored)
5491 * uap User argument descriptor (see below)
5492 * retval (ignored)
5493 *
5494 * Indirect: uap->path Path of file to get status from
5495 * uap->ub User buffer (holds file status info)
5496 * uap->xsecurity ACL to get (extended security)
5497 * uap->xsecurity_size Size of ACL
5498 *
5499 * Returns: 0 Success
5500 * !0 errno value
5501 *
5502 */
5503 int
5504 lstat64_extended(__unused proc_t p, struct lstat64_extended_args *uap, __unused int32_t *retval)
5505 {
5506 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5507 uap->xsecurity, uap->xsecurity_size, 1, UIO_USERSPACE, AT_FDCWD,
5508 AT_SYMLINK_NOFOLLOW));
5509 }
5510
5511 int
5512 fstatat(__unused proc_t p, struct fstatat_args *uap, __unused int32_t *retval)
5513 {
5514 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
5515 return (EINVAL);
5516
5517 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5518 0, 0, 0, UIO_USERSPACE, uap->fd, uap->flag));
5519 }
5520
5521 int
5522 fstatat64(__unused proc_t p, struct fstatat64_args *uap,
5523 __unused int32_t *retval)
5524 {
5525 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
5526 return (EINVAL);
5527
5528 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5529 0, 0, 1, UIO_USERSPACE, uap->fd, uap->flag));
5530 }
5531
5532 /*
5533 * Get configurable pathname variables.
5534 *
5535 * Returns: 0 Success
5536 * namei:???
5537 * vn_pathconf:???
5538 *
5539 * Notes: Global implementation constants are intended to be
5540 * implemented in this function directly; all other constants
5541 * are per-FS implementation, and therefore must be handled in
5542 * each respective FS, instead.
5543 *
5544 * XXX We implement some things globally right now that should actually be
5545 * XXX per-FS; we will need to deal with this at some point.
5546 */
5547 /* ARGSUSED */
5548 int
5549 pathconf(__unused proc_t p, struct pathconf_args *uap, int32_t *retval)
5550 {
5551 int error;
5552 struct nameidata nd;
5553 vfs_context_t ctx = vfs_context_current();
5554
5555 NDINIT(&nd, LOOKUP, OP_PATHCONF, FOLLOW | AUDITVNPATH1,
5556 UIO_USERSPACE, uap->path, ctx);
5557 error = namei(&nd);
5558 if (error)
5559 return (error);
5560
5561 error = vn_pathconf(nd.ni_vp, uap->name, retval, ctx);
5562
5563 vnode_put(nd.ni_vp);
5564 nameidone(&nd);
5565 return (error);
5566 }
5567
5568 /*
5569 * Return target name of a symbolic link.
5570 */
5571 /* ARGSUSED */
5572 static int
5573 readlinkat_internal(vfs_context_t ctx, int fd, user_addr_t path,
5574 enum uio_seg seg, user_addr_t buf, size_t bufsize, enum uio_seg bufseg,
5575 int *retval)
5576 {
5577 vnode_t vp;
5578 uio_t auio;
5579 int error;
5580 struct nameidata nd;
5581 char uio_buf[ UIO_SIZEOF(1) ];
5582
5583 NDINIT(&nd, LOOKUP, OP_READLINK, NOFOLLOW | AUDITVNPATH1,
5584 seg, path, ctx);
5585
5586 error = nameiat(&nd, fd);
5587 if (error)
5588 return (error);
5589 vp = nd.ni_vp;
5590
5591 nameidone(&nd);
5592
5593 auio = uio_createwithbuffer(1, 0, bufseg, UIO_READ,
5594 &uio_buf[0], sizeof(uio_buf));
5595 uio_addiov(auio, buf, bufsize);
5596 if (vp->v_type != VLNK) {
5597 error = EINVAL;
5598 } else {
5599 #if CONFIG_MACF
5600 error = mac_vnode_check_readlink(ctx, vp);
5601 #endif
5602 if (error == 0)
5603 error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA,
5604 ctx);
5605 if (error == 0)
5606 error = VNOP_READLINK(vp, auio, ctx);
5607 }
5608 vnode_put(vp);
5609
5610 *retval = bufsize - (int)uio_resid(auio);
5611 return (error);
5612 }
5613
5614 int
5615 readlink(proc_t p, struct readlink_args *uap, int32_t *retval)
5616 {
5617 enum uio_seg procseg;
5618
5619 procseg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
5620 return (readlinkat_internal(vfs_context_current(), AT_FDCWD,
5621 CAST_USER_ADDR_T(uap->path), procseg, CAST_USER_ADDR_T(uap->buf),
5622 uap->count, procseg, retval));
5623 }
5624
5625 int
5626 readlinkat(proc_t p, struct readlinkat_args *uap, int32_t *retval)
5627 {
5628 enum uio_seg procseg;
5629
5630 procseg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
5631 return (readlinkat_internal(vfs_context_current(), uap->fd, uap->path,
5632 procseg, uap->buf, uap->bufsize, procseg, retval));
5633 }
5634
5635 /*
5636 * Change file flags.
5637 */
5638 static int
5639 chflags1(vnode_t vp, int flags, vfs_context_t ctx)
5640 {
5641 struct vnode_attr va;
5642 kauth_action_t action;
5643 int error;
5644
5645 VATTR_INIT(&va);
5646 VATTR_SET(&va, va_flags, flags);
5647
5648 #if CONFIG_MACF
5649 error = mac_vnode_check_setflags(ctx, vp, flags);
5650 if (error)
5651 goto out;
5652 #endif
5653
5654 /* request authorisation, disregard immutability */
5655 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
5656 goto out;
5657 /*
5658 * Request that the auth layer disregard those file flags it's allowed to when
5659 * authorizing this operation; we need to do this in order to be able to
5660 * clear immutable flags.
5661 */
5662 if (action && ((error = vnode_authorize(vp, NULL, action | KAUTH_VNODE_NOIMMUTABLE, ctx)) != 0))
5663 goto out;
5664 error = vnode_setattr(vp, &va, ctx);
5665
5666 if ((error == 0) && !VATTR_IS_SUPPORTED(&va, va_flags)) {
5667 error = ENOTSUP;
5668 }
5669 out:
5670 vnode_put(vp);
5671 return(error);
5672 }
5673
5674 /*
5675 * Change flags of a file given a path name.
5676 */
5677 /* ARGSUSED */
5678 int
5679 chflags(__unused proc_t p, struct chflags_args *uap, __unused int32_t *retval)
5680 {
5681 vnode_t vp;
5682 vfs_context_t ctx = vfs_context_current();
5683 int error;
5684 struct nameidata nd;
5685
5686 AUDIT_ARG(fflags, uap->flags);
5687 NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
5688 UIO_USERSPACE, uap->path, ctx);
5689 error = namei(&nd);
5690 if (error)
5691 return (error);
5692 vp = nd.ni_vp;
5693 nameidone(&nd);
5694
5695 error = chflags1(vp, uap->flags, ctx);
5696
5697 return(error);
5698 }
5699
5700 /*
5701 * Change flags of a file given a file descriptor.
5702 */
5703 /* ARGSUSED */
5704 int
5705 fchflags(__unused proc_t p, struct fchflags_args *uap, __unused int32_t *retval)
5706 {
5707 vnode_t vp;
5708 int error;
5709
5710 AUDIT_ARG(fd, uap->fd);
5711 AUDIT_ARG(fflags, uap->flags);
5712 if ( (error = file_vnode(uap->fd, &vp)) )
5713 return (error);
5714
5715 if ((error = vnode_getwithref(vp))) {
5716 file_drop(uap->fd);
5717 return(error);
5718 }
5719
5720 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5721
5722 error = chflags1(vp, uap->flags, vfs_context_current());
5723
5724 file_drop(uap->fd);
5725 return (error);
5726 }
5727
5728 /*
5729 * Change security information on a filesystem object.
5730 *
5731 * Returns: 0 Success
5732 * EPERM Operation not permitted
5733 * vnode_authattr:??? [anything vnode_authattr can return]
5734 * vnode_authorize:??? [anything vnode_authorize can return]
5735 * vnode_setattr:??? [anything vnode_setattr can return]
5736 *
5737 * Notes: If vnode_authattr or vnode_authorize return EACCES, it will be
5738 * translated to EPERM before being returned.
5739 */
5740 static int
5741 chmod_vnode(vfs_context_t ctx, vnode_t vp, struct vnode_attr *vap)
5742 {
5743 kauth_action_t action;
5744 int error;
5745
5746 AUDIT_ARG(mode, vap->va_mode);
5747 /* XXX audit new args */
5748
5749 #if NAMEDSTREAMS
5750 /* chmod calls are not allowed for resource forks. */
5751 if (vp->v_flag & VISNAMEDSTREAM) {
5752 return (EPERM);
5753 }
5754 #endif
5755
5756 #if CONFIG_MACF
5757 if (VATTR_IS_ACTIVE(vap, va_mode) &&
5758 (error = mac_vnode_check_setmode(ctx, vp, (mode_t)vap->va_mode)) != 0)
5759 return (error);
5760 #endif
5761
5762 /* make sure that the caller is allowed to set this security information */
5763 if (((error = vnode_authattr(vp, vap, &action, ctx)) != 0) ||
5764 ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
5765 if (error == EACCES)
5766 error = EPERM;
5767 return(error);
5768 }
5769
5770 error = vnode_setattr(vp, vap, ctx);
5771
5772 return (error);
5773 }
5774
5775
5776 /*
5777 * Change mode of a file given a path name.
5778 *
5779 * Returns: 0 Success
5780 * namei:??? [anything namei can return]
5781 * chmod_vnode:??? [anything chmod_vnode can return]
5782 */
5783 static int
5784 chmodat(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap,
5785 int fd, int flag, enum uio_seg segflg)
5786 {
5787 struct nameidata nd;
5788 int follow, error;
5789
5790 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
5791 NDINIT(&nd, LOOKUP, OP_SETATTR, follow | AUDITVNPATH1,
5792 segflg, path, ctx);
5793 if ((error = nameiat(&nd, fd)))
5794 return (error);
5795 error = chmod_vnode(ctx, nd.ni_vp, vap);
5796 vnode_put(nd.ni_vp);
5797 nameidone(&nd);
5798 return(error);
5799 }
5800
5801 /*
5802 * chmod_extended: Change the mode of a file given a path name; with extended
5803 * argument list (including extended security (ACL)).
5804 *
5805 * Parameters: p Process requesting the open
5806 * uap User argument descriptor (see below)
5807 * retval (ignored)
5808 *
5809 * Indirect: uap->path Path to object (same as 'chmod')
5810 * uap->uid UID to set
5811 * uap->gid GID to set
5812 * uap->mode File mode to set (same as 'chmod')
5813 * uap->xsecurity ACL to set (or delete)
5814 *
5815 * Returns: 0 Success
5816 * !0 errno value
5817 *
5818 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
5819 *
5820 * XXX: We should enummerate the possible errno values here, and where
5821 * in the code they originated.
5822 */
5823 int
5824 chmod_extended(__unused proc_t p, struct chmod_extended_args *uap, __unused int32_t *retval)
5825 {
5826 int error;
5827 struct vnode_attr va;
5828 kauth_filesec_t xsecdst;
5829
5830 AUDIT_ARG(owner, uap->uid, uap->gid);
5831
5832 VATTR_INIT(&va);
5833 if (uap->mode != -1)
5834 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
5835 if (uap->uid != KAUTH_UID_NONE)
5836 VATTR_SET(&va, va_uid, uap->uid);
5837 if (uap->gid != KAUTH_GID_NONE)
5838 VATTR_SET(&va, va_gid, uap->gid);
5839
5840 xsecdst = NULL;
5841 switch(uap->xsecurity) {
5842 /* explicit remove request */
5843 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
5844 VATTR_SET(&va, va_acl, NULL);
5845 break;
5846 /* not being set */
5847 case USER_ADDR_NULL:
5848 break;
5849 default:
5850 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
5851 return(error);
5852 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
5853 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va.va_acl->acl_entrycount);
5854 }
5855
5856 error = chmodat(vfs_context_current(), uap->path, &va, AT_FDCWD, 0,
5857 UIO_USERSPACE);
5858
5859 if (xsecdst != NULL)
5860 kauth_filesec_free(xsecdst);
5861 return(error);
5862 }
5863
5864 /*
5865 * Returns: 0 Success
5866 * chmodat:??? [anything chmodat can return]
5867 */
5868 static int
5869 fchmodat_internal(vfs_context_t ctx, user_addr_t path, int mode, int fd,
5870 int flag, enum uio_seg segflg)
5871 {
5872 struct vnode_attr va;
5873
5874 VATTR_INIT(&va);
5875 VATTR_SET(&va, va_mode, mode & ALLPERMS);
5876
5877 return (chmodat(ctx, path, &va, fd, flag, segflg));
5878 }
5879
5880 int
5881 chmod(__unused proc_t p, struct chmod_args *uap, __unused int32_t *retval)
5882 {
5883 return (fchmodat_internal(vfs_context_current(), uap->path, uap->mode,
5884 AT_FDCWD, 0, UIO_USERSPACE));
5885 }
5886
5887 int
5888 fchmodat(__unused proc_t p, struct fchmodat_args *uap, __unused int32_t *retval)
5889 {
5890 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
5891 return (EINVAL);
5892
5893 return (fchmodat_internal(vfs_context_current(), uap->path, uap->mode,
5894 uap->fd, uap->flag, UIO_USERSPACE));
5895 }
5896
5897 /*
5898 * Change mode of a file given a file descriptor.
5899 */
5900 static int
5901 fchmod1(__unused proc_t p, int fd, struct vnode_attr *vap)
5902 {
5903 vnode_t vp;
5904 int error;
5905
5906 AUDIT_ARG(fd, fd);
5907
5908 if ((error = file_vnode(fd, &vp)) != 0)
5909 return (error);
5910 if ((error = vnode_getwithref(vp)) != 0) {
5911 file_drop(fd);
5912 return(error);
5913 }
5914 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5915
5916 error = chmod_vnode(vfs_context_current(), vp, vap);
5917 (void)vnode_put(vp);
5918 file_drop(fd);
5919
5920 return (error);
5921 }
5922
5923 /*
5924 * fchmod_extended: Change mode of a file given a file descriptor; with
5925 * extended argument list (including extended security (ACL)).
5926 *
5927 * Parameters: p Process requesting to change file mode
5928 * uap User argument descriptor (see below)
5929 * retval (ignored)
5930 *
5931 * Indirect: uap->mode File mode to set (same as 'chmod')
5932 * uap->uid UID to set
5933 * uap->gid GID to set
5934 * uap->xsecurity ACL to set (or delete)
5935 * uap->fd File descriptor of file to change mode
5936 *
5937 * Returns: 0 Success
5938 * !0 errno value
5939 *
5940 */
5941 int
5942 fchmod_extended(proc_t p, struct fchmod_extended_args *uap, __unused int32_t *retval)
5943 {
5944 int error;
5945 struct vnode_attr va;
5946 kauth_filesec_t xsecdst;
5947
5948 AUDIT_ARG(owner, uap->uid, uap->gid);
5949
5950 VATTR_INIT(&va);
5951 if (uap->mode != -1)
5952 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
5953 if (uap->uid != KAUTH_UID_NONE)
5954 VATTR_SET(&va, va_uid, uap->uid);
5955 if (uap->gid != KAUTH_GID_NONE)
5956 VATTR_SET(&va, va_gid, uap->gid);
5957
5958 xsecdst = NULL;
5959 switch(uap->xsecurity) {
5960 case USER_ADDR_NULL:
5961 VATTR_SET(&va, va_acl, NULL);
5962 break;
5963 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
5964 VATTR_SET(&va, va_acl, NULL);
5965 break;
5966 /* not being set */
5967 case CAST_USER_ADDR_T(-1):
5968 break;
5969 default:
5970 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
5971 return(error);
5972 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
5973 }
5974
5975 error = fchmod1(p, uap->fd, &va);
5976
5977
5978 switch(uap->xsecurity) {
5979 case USER_ADDR_NULL:
5980 case CAST_USER_ADDR_T(-1):
5981 break;
5982 default:
5983 if (xsecdst != NULL)
5984 kauth_filesec_free(xsecdst);
5985 }
5986 return(error);
5987 }
5988
5989 int
5990 fchmod(proc_t p, struct fchmod_args *uap, __unused int32_t *retval)
5991 {
5992 struct vnode_attr va;
5993
5994 VATTR_INIT(&va);
5995 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
5996
5997 return(fchmod1(p, uap->fd, &va));
5998 }
5999
6000
6001 /*
6002 * Set ownership given a path name.
6003 */
6004 /* ARGSUSED */
6005 static int
6006 fchownat_internal(vfs_context_t ctx, int fd, user_addr_t path, uid_t uid,
6007 gid_t gid, int flag, enum uio_seg segflg)
6008 {
6009 vnode_t vp;
6010 struct vnode_attr va;
6011 int error;
6012 struct nameidata nd;
6013 int follow;
6014 kauth_action_t action;
6015
6016 AUDIT_ARG(owner, uid, gid);
6017
6018 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
6019 NDINIT(&nd, LOOKUP, OP_SETATTR, follow | AUDITVNPATH1, segflg,
6020 path, ctx);
6021 error = nameiat(&nd, fd);
6022 if (error)
6023 return (error);
6024 vp = nd.ni_vp;
6025
6026 nameidone(&nd);
6027
6028 VATTR_INIT(&va);
6029 if (uid != (uid_t)VNOVAL)
6030 VATTR_SET(&va, va_uid, uid);
6031 if (gid != (gid_t)VNOVAL)
6032 VATTR_SET(&va, va_gid, gid);
6033
6034 #if CONFIG_MACF
6035 error = mac_vnode_check_setowner(ctx, vp, uid, gid);
6036 if (error)
6037 goto out;
6038 #endif
6039
6040 /* preflight and authorize attribute changes */
6041 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
6042 goto out;
6043 if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
6044 goto out;
6045 error = vnode_setattr(vp, &va, ctx);
6046
6047 out:
6048 /*
6049 * EACCES is only allowed from namei(); permissions failure should
6050 * return EPERM, so we need to translate the error code.
6051 */
6052 if (error == EACCES)
6053 error = EPERM;
6054
6055 vnode_put(vp);
6056 return (error);
6057 }
6058
6059 int
6060 chown(__unused proc_t p, struct chown_args *uap, __unused int32_t *retval)
6061 {
6062 return (fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path,
6063 uap->uid, uap->gid, 0, UIO_USERSPACE));
6064 }
6065
6066 int
6067 lchown(__unused proc_t p, struct lchown_args *uap, __unused int32_t *retval)
6068 {
6069 return (fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path,
6070 uap->owner, uap->group, AT_SYMLINK_NOFOLLOW, UIO_USERSPACE));
6071 }
6072
6073 int
6074 fchownat(__unused proc_t p, struct fchownat_args *uap, __unused int32_t *retval)
6075 {
6076 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
6077 return (EINVAL);
6078
6079 return (fchownat_internal(vfs_context_current(), uap->fd, uap->path,
6080 uap->uid, uap->gid, uap->flag, UIO_USERSPACE));
6081 }
6082
6083 /*
6084 * Set ownership given a file descriptor.
6085 */
6086 /* ARGSUSED */
6087 int
6088 fchown(__unused proc_t p, struct fchown_args *uap, __unused int32_t *retval)
6089 {
6090 struct vnode_attr va;
6091 vfs_context_t ctx = vfs_context_current();
6092 vnode_t vp;
6093 int error;
6094 kauth_action_t action;
6095
6096 AUDIT_ARG(owner, uap->uid, uap->gid);
6097 AUDIT_ARG(fd, uap->fd);
6098
6099 if ( (error = file_vnode(uap->fd, &vp)) )
6100 return (error);
6101
6102 if ( (error = vnode_getwithref(vp)) ) {
6103 file_drop(uap->fd);
6104 return(error);
6105 }
6106 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6107
6108 VATTR_INIT(&va);
6109 if (uap->uid != VNOVAL)
6110 VATTR_SET(&va, va_uid, uap->uid);
6111 if (uap->gid != VNOVAL)
6112 VATTR_SET(&va, va_gid, uap->gid);
6113
6114 #if NAMEDSTREAMS
6115 /* chown calls are not allowed for resource forks. */
6116 if (vp->v_flag & VISNAMEDSTREAM) {
6117 error = EPERM;
6118 goto out;
6119 }
6120 #endif
6121
6122 #if CONFIG_MACF
6123 error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
6124 if (error)
6125 goto out;
6126 #endif
6127
6128 /* preflight and authorize attribute changes */
6129 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
6130 goto out;
6131 if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
6132 if (error == EACCES)
6133 error = EPERM;
6134 goto out;
6135 }
6136 error = vnode_setattr(vp, &va, ctx);
6137
6138 out:
6139 (void)vnode_put(vp);
6140 file_drop(uap->fd);
6141 return (error);
6142 }
6143
6144 static int
6145 getutimes(user_addr_t usrtvp, struct timespec *tsp)
6146 {
6147 int error;
6148
6149 if (usrtvp == USER_ADDR_NULL) {
6150 struct timeval old_tv;
6151 /* XXX Y2038 bug because of microtime argument */
6152 microtime(&old_tv);
6153 TIMEVAL_TO_TIMESPEC(&old_tv, &tsp[0]);
6154 tsp[1] = tsp[0];
6155 } else {
6156 if (IS_64BIT_PROCESS(current_proc())) {
6157 struct user64_timeval tv[2];
6158 error = copyin(usrtvp, (void *)tv, sizeof(tv));
6159 if (error)
6160 return (error);
6161 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
6162 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
6163 } else {
6164 struct user32_timeval tv[2];
6165 error = copyin(usrtvp, (void *)tv, sizeof(tv));
6166 if (error)
6167 return (error);
6168 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
6169 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
6170 }
6171 }
6172 return 0;
6173 }
6174
6175 static int
6176 setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts,
6177 int nullflag)
6178 {
6179 int error;
6180 struct vnode_attr va;
6181 kauth_action_t action;
6182
6183 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6184
6185 VATTR_INIT(&va);
6186 VATTR_SET(&va, va_access_time, ts[0]);
6187 VATTR_SET(&va, va_modify_time, ts[1]);
6188 if (nullflag)
6189 va.va_vaflags |= VA_UTIMES_NULL;
6190
6191 #if NAMEDSTREAMS
6192 /* utimes calls are not allowed for resource forks. */
6193 if (vp->v_flag & VISNAMEDSTREAM) {
6194 error = EPERM;
6195 goto out;
6196 }
6197 #endif
6198
6199 #if CONFIG_MACF
6200 error = mac_vnode_check_setutimes(ctx, vp, ts[0], ts[1]);
6201 if (error)
6202 goto out;
6203 #endif
6204 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
6205 if (!nullflag && error == EACCES)
6206 error = EPERM;
6207 goto out;
6208 }
6209
6210 /* since we may not need to auth anything, check here */
6211 if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
6212 if (!nullflag && error == EACCES)
6213 error = EPERM;
6214 goto out;
6215 }
6216 error = vnode_setattr(vp, &va, ctx);
6217
6218 out:
6219 return error;
6220 }
6221
6222 /*
6223 * Set the access and modification times of a file.
6224 */
6225 /* ARGSUSED */
6226 int
6227 utimes(__unused proc_t p, struct utimes_args *uap, __unused int32_t *retval)
6228 {
6229 struct timespec ts[2];
6230 user_addr_t usrtvp;
6231 int error;
6232 struct nameidata nd;
6233 vfs_context_t ctx = vfs_context_current();
6234
6235 /*
6236 * AUDIT: Needed to change the order of operations to do the
6237 * name lookup first because auditing wants the path.
6238 */
6239 NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
6240 UIO_USERSPACE, uap->path, ctx);
6241 error = namei(&nd);
6242 if (error)
6243 return (error);
6244 nameidone(&nd);
6245
6246 /*
6247 * Fetch the user-supplied time. If usrtvp is USER_ADDR_NULL, we fetch
6248 * the current time instead.
6249 */
6250 usrtvp = uap->tptr;
6251 if ((error = getutimes(usrtvp, ts)) != 0)
6252 goto out;
6253
6254 error = setutimes(ctx, nd.ni_vp, ts, usrtvp == USER_ADDR_NULL);
6255
6256 out:
6257 vnode_put(nd.ni_vp);
6258 return (error);
6259 }
6260
6261 /*
6262 * Set the access and modification times of a file.
6263 */
6264 /* ARGSUSED */
6265 int
6266 futimes(__unused proc_t p, struct futimes_args *uap, __unused int32_t *retval)
6267 {
6268 struct timespec ts[2];
6269 vnode_t vp;
6270 user_addr_t usrtvp;
6271 int error;
6272
6273 AUDIT_ARG(fd, uap->fd);
6274 usrtvp = uap->tptr;
6275 if ((error = getutimes(usrtvp, ts)) != 0)
6276 return (error);
6277 if ((error = file_vnode(uap->fd, &vp)) != 0)
6278 return (error);
6279 if((error = vnode_getwithref(vp))) {
6280 file_drop(uap->fd);
6281 return(error);
6282 }
6283
6284 error = setutimes(vfs_context_current(), vp, ts, usrtvp == 0);
6285 vnode_put(vp);
6286 file_drop(uap->fd);
6287 return(error);
6288 }
6289
6290 /*
6291 * Truncate a file given its path name.
6292 */
6293 /* ARGSUSED */
6294 int
6295 truncate(__unused proc_t p, struct truncate_args *uap, __unused int32_t *retval)
6296 {
6297 vnode_t vp;
6298 struct vnode_attr va;
6299 vfs_context_t ctx = vfs_context_current();
6300 int error;
6301 struct nameidata nd;
6302 kauth_action_t action;
6303
6304 if (uap->length < 0)
6305 return(EINVAL);
6306 NDINIT(&nd, LOOKUP, OP_TRUNCATE, FOLLOW | AUDITVNPATH1,
6307 UIO_USERSPACE, uap->path, ctx);
6308 if ((error = namei(&nd)))
6309 return (error);
6310 vp = nd.ni_vp;
6311
6312 nameidone(&nd);
6313
6314 VATTR_INIT(&va);
6315 VATTR_SET(&va, va_data_size, uap->length);
6316
6317 #if CONFIG_MACF
6318 error = mac_vnode_check_truncate(ctx, NOCRED, vp);
6319 if (error)
6320 goto out;
6321 #endif
6322
6323 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
6324 goto out;
6325 if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
6326 goto out;
6327 error = vnode_setattr(vp, &va, ctx);
6328 out:
6329 vnode_put(vp);
6330 return (error);
6331 }
6332
6333 /*
6334 * Truncate a file given a file descriptor.
6335 */
6336 /* ARGSUSED */
6337 int
6338 ftruncate(proc_t p, struct ftruncate_args *uap, int32_t *retval)
6339 {
6340 vfs_context_t ctx = vfs_context_current();
6341 struct vnode_attr va;
6342 vnode_t vp;
6343 struct fileproc *fp;
6344 int error ;
6345 int fd = uap->fd;
6346
6347 AUDIT_ARG(fd, uap->fd);
6348 if (uap->length < 0)
6349 return(EINVAL);
6350
6351 if ( (error = fp_lookup(p,fd,&fp,0)) ) {
6352 return(error);
6353 }
6354
6355 switch (FILEGLOB_DTYPE(fp->f_fglob)) {
6356 case DTYPE_PSXSHM:
6357 error = pshm_truncate(p, fp, uap->fd, uap->length, retval);
6358 goto out;
6359 case DTYPE_VNODE:
6360 break;
6361 default:
6362 error = EINVAL;
6363 goto out;
6364 }
6365
6366 vp = (vnode_t)fp->f_fglob->fg_data;
6367
6368 if ((fp->f_fglob->fg_flag & FWRITE) == 0) {
6369 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
6370 error = EINVAL;
6371 goto out;
6372 }
6373
6374 if ((error = vnode_getwithref(vp)) != 0) {
6375 goto out;
6376 }
6377
6378 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6379
6380 #if CONFIG_MACF
6381 error = mac_vnode_check_truncate(ctx,
6382 fp->f_fglob->fg_cred, vp);
6383 if (error) {
6384 (void)vnode_put(vp);
6385 goto out;
6386 }
6387 #endif
6388 VATTR_INIT(&va);
6389 VATTR_SET(&va, va_data_size, uap->length);
6390 error = vnode_setattr(vp, &va, ctx);
6391 (void)vnode_put(vp);
6392 out:
6393 file_drop(fd);
6394 return (error);
6395 }
6396
6397
6398 /*
6399 * Sync an open file with synchronized I/O _file_ integrity completion
6400 */
6401 /* ARGSUSED */
6402 int
6403 fsync(proc_t p, struct fsync_args *uap, __unused int32_t *retval)
6404 {
6405 __pthread_testcancel(1);
6406 return(fsync_common(p, uap, MNT_WAIT));
6407 }
6408
6409
6410 /*
6411 * Sync an open file with synchronized I/O _file_ integrity completion
6412 *
6413 * Notes: This is a legacy support function that does not test for
6414 * thread cancellation points.
6415 */
6416 /* ARGSUSED */
6417 int
6418 fsync_nocancel(proc_t p, struct fsync_nocancel_args *uap, __unused int32_t *retval)
6419 {
6420 return(fsync_common(p, (struct fsync_args *)uap, MNT_WAIT));
6421 }
6422
6423
6424 /*
6425 * Sync an open file with synchronized I/O _data_ integrity completion
6426 */
6427 /* ARGSUSED */
6428 int
6429 fdatasync(proc_t p, struct fdatasync_args *uap, __unused int32_t *retval)
6430 {
6431 __pthread_testcancel(1);
6432 return(fsync_common(p, (struct fsync_args *)uap, MNT_DWAIT));
6433 }
6434
6435
6436 /*
6437 * fsync_common
6438 *
6439 * Common fsync code to support both synchronized I/O file integrity completion
6440 * (normal fsync) and synchronized I/O data integrity completion (fdatasync).
6441 *
6442 * If 'flags' is MNT_DWAIT, the caller is requesting data integrity, which
6443 * will only guarantee that the file data contents are retrievable. If
6444 * 'flags' is MNT_WAIT, the caller is rewuesting file integrity, which also
6445 * includes additional metadata unnecessary for retrieving the file data
6446 * contents, such as atime, mtime, ctime, etc., also be committed to stable
6447 * storage.
6448 *
6449 * Parameters: p The process
6450 * uap->fd The descriptor to synchronize
6451 * flags The data integrity flags
6452 *
6453 * Returns: int Success
6454 * fp_getfvp:EBADF Bad file descriptor
6455 * fp_getfvp:ENOTSUP fd does not refer to a vnode
6456 * VNOP_FSYNC:??? unspecified
6457 *
6458 * Notes: We use struct fsync_args because it is a short name, and all
6459 * caller argument structures are otherwise identical.
6460 */
6461 static int
6462 fsync_common(proc_t p, struct fsync_args *uap, int flags)
6463 {
6464 vnode_t vp;
6465 struct fileproc *fp;
6466 vfs_context_t ctx = vfs_context_current();
6467 int error;
6468
6469 AUDIT_ARG(fd, uap->fd);
6470
6471 if ( (error = fp_getfvp(p, uap->fd, &fp, &vp)) )
6472 return (error);
6473 if ( (error = vnode_getwithref(vp)) ) {
6474 file_drop(uap->fd);
6475 return(error);
6476 }
6477
6478 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6479
6480 error = VNOP_FSYNC(vp, flags, ctx);
6481
6482 #if NAMEDRSRCFORK
6483 /* Sync resource fork shadow file if necessary. */
6484 if ((error == 0) &&
6485 (vp->v_flag & VISNAMEDSTREAM) &&
6486 (vp->v_parent != NULLVP) &&
6487 vnode_isshadow(vp) &&
6488 (fp->f_flags & FP_WRITTEN)) {
6489 (void) vnode_flushnamedstream(vp->v_parent, vp, ctx);
6490 }
6491 #endif
6492
6493 (void)vnode_put(vp);
6494 file_drop(uap->fd);
6495 return (error);
6496 }
6497
6498 /*
6499 * Duplicate files. Source must be a file, target must be a file or
6500 * must not exist.
6501 *
6502 * XXX Copyfile authorisation checking is woefully inadequate, and will not
6503 * perform inheritance correctly.
6504 */
6505 /* ARGSUSED */
6506 int
6507 copyfile(__unused proc_t p, struct copyfile_args *uap, __unused int32_t *retval)
6508 {
6509 vnode_t tvp, fvp, tdvp, sdvp;
6510 struct nameidata fromnd, tond;
6511 int error;
6512 vfs_context_t ctx = vfs_context_current();
6513
6514 /* Check that the flags are valid. */
6515
6516 if (uap->flags & ~CPF_MASK) {
6517 return(EINVAL);
6518 }
6519
6520 NDINIT(&fromnd, LOOKUP, OP_COPYFILE, AUDITVNPATH1,
6521 UIO_USERSPACE, uap->from, ctx);
6522 if ((error = namei(&fromnd)))
6523 return (error);
6524 fvp = fromnd.ni_vp;
6525
6526 NDINIT(&tond, CREATE, OP_LINK,
6527 LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNPATH2 | CN_NBMOUNTLOOK,
6528 UIO_USERSPACE, uap->to, ctx);
6529 if ((error = namei(&tond))) {
6530 goto out1;
6531 }
6532 tdvp = tond.ni_dvp;
6533 tvp = tond.ni_vp;
6534
6535 if (tvp != NULL) {
6536 if (!(uap->flags & CPF_OVERWRITE)) {
6537 error = EEXIST;
6538 goto out;
6539 }
6540 }
6541 if (fvp->v_type == VDIR || (tvp && tvp->v_type == VDIR)) {
6542 error = EISDIR;
6543 goto out;
6544 }
6545
6546 if ((error = vnode_authorize(tdvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
6547 goto out;
6548
6549 if (fvp == tdvp)
6550 error = EINVAL;
6551 /*
6552 * If source is the same as the destination (that is the
6553 * same inode number) then there is nothing to do.
6554 * (fixed to have POSIX semantics - CSM 3/2/98)
6555 */
6556 if (fvp == tvp)
6557 error = -1;
6558 if (!error)
6559 error = VNOP_COPYFILE(fvp, tdvp, tvp, &tond.ni_cnd, uap->mode, uap->flags, ctx);
6560 out:
6561 sdvp = tond.ni_startdir;
6562 /*
6563 * nameidone has to happen before we vnode_put(tdvp)
6564 * since it may need to release the fs_nodelock on the tdvp
6565 */
6566 nameidone(&tond);
6567
6568 if (tvp)
6569 vnode_put(tvp);
6570 vnode_put(tdvp);
6571 vnode_put(sdvp);
6572 out1:
6573 vnode_put(fvp);
6574
6575 nameidone(&fromnd);
6576
6577 if (error == -1)
6578 return (0);
6579 return (error);
6580 }
6581
6582
6583 /*
6584 * Rename files. Source and destination must either both be directories,
6585 * or both not be directories. If target is a directory, it must be empty.
6586 */
6587 /* ARGSUSED */
6588 static int
6589 renameat_internal(vfs_context_t ctx, int fromfd, user_addr_t from,
6590 int tofd, user_addr_t to, int segflg, vfs_rename_flags_t flags)
6591 {
6592 vnode_t tvp, tdvp;
6593 vnode_t fvp, fdvp;
6594 struct nameidata *fromnd, *tond;
6595 int error;
6596 int do_retry;
6597 int retry_count;
6598 int mntrename;
6599 int need_event;
6600 const char *oname = NULL;
6601 char *from_name = NULL, *to_name = NULL;
6602 int from_len=0, to_len=0;
6603 int holding_mntlock;
6604 mount_t locked_mp = NULL;
6605 vnode_t oparent = NULLVP;
6606 #if CONFIG_FSE
6607 fse_info from_finfo, to_finfo;
6608 #endif
6609 int from_truncated=0, to_truncated;
6610 int batched = 0;
6611 struct vnode_attr *fvap, *tvap;
6612 int continuing = 0;
6613 /* carving out a chunk for structs that are too big to be on stack. */
6614 struct {
6615 struct nameidata from_node, to_node;
6616 struct vnode_attr fv_attr, tv_attr;
6617 } * __rename_data;
6618 MALLOC(__rename_data, void *, sizeof(*__rename_data), M_TEMP, M_WAITOK);
6619 fromnd = &__rename_data->from_node;
6620 tond = &__rename_data->to_node;
6621
6622 holding_mntlock = 0;
6623 do_retry = 0;
6624 retry_count = 0;
6625 retry:
6626 fvp = tvp = NULL;
6627 fdvp = tdvp = NULL;
6628 fvap = tvap = NULL;
6629 mntrename = FALSE;
6630
6631 NDINIT(fromnd, DELETE, OP_UNLINK, WANTPARENT | AUDITVNPATH1,
6632 segflg, from, ctx);
6633 fromnd->ni_flag = NAMEI_COMPOUNDRENAME;
6634
6635 NDINIT(tond, RENAME, OP_RENAME, WANTPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK,
6636 segflg, to, ctx);
6637 tond->ni_flag = NAMEI_COMPOUNDRENAME;
6638
6639 continue_lookup:
6640 if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
6641 if ( (error = nameiat(fromnd, fromfd)) )
6642 goto out1;
6643 fdvp = fromnd->ni_dvp;
6644 fvp = fromnd->ni_vp;
6645
6646 if (fvp && fvp->v_type == VDIR)
6647 tond->ni_cnd.cn_flags |= WILLBEDIR;
6648 }
6649
6650 if ((tond->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
6651 if ( (error = nameiat(tond, tofd)) ) {
6652 /*
6653 * Translate error code for rename("dir1", "dir2/.").
6654 */
6655 if (error == EISDIR && fvp->v_type == VDIR)
6656 error = EINVAL;
6657 goto out1;
6658 }
6659 tdvp = tond->ni_dvp;
6660 tvp = tond->ni_vp;
6661 }
6662
6663 batched = vnode_compound_rename_available(fdvp);
6664 if (!fvp) {
6665 /*
6666 * Claim: this check will never reject a valid rename.
6667 * For success, either fvp must be on the same mount as tdvp, or fvp must sit atop a vnode on the same mount as tdvp.
6668 * Suppose fdvp and tdvp are not on the same mount.
6669 * If fvp is on the same mount as tdvp, then fvp is not on the same mount as fdvp, so fvp is the root of its filesystem. If fvp is the root,
6670 * then you can't move it to within another dir on the same mountpoint.
6671 * If fvp sits atop a vnode on the same mount as fdvp, then that vnode must be part of the same mount as fdvp, which is a contradiction.
6672 *
6673 * If this check passes, then we are safe to pass these vnodes to the same FS.
6674 */
6675 if (fdvp->v_mount != tdvp->v_mount) {
6676 error = EXDEV;
6677 goto out1;
6678 }
6679 goto skipped_lookup;
6680 }
6681
6682 if (!batched) {
6683 error = vn_authorize_rename(fdvp, fvp, &fromnd->ni_cnd, tdvp, tvp, &tond->ni_cnd, ctx, NULL);
6684 if (error) {
6685 if (error == ENOENT) {
6686 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
6687 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
6688 /*
6689 * We encountered a race where after doing the namei, tvp stops
6690 * being valid. If so, simply re-drive the rename call from the
6691 * top.
6692 */
6693 do_retry = 1;
6694 retry_count += 1;
6695 }
6696 }
6697 goto out1;
6698 }
6699 }
6700
6701 /*
6702 * If the source and destination are the same (i.e. they're
6703 * links to the same vnode) and the target file system is
6704 * case sensitive, then there is nothing to do.
6705 *
6706 * XXX Come back to this.
6707 */
6708 if (fvp == tvp) {
6709 int pathconf_val;
6710
6711 /*
6712 * Note: if _PC_CASE_SENSITIVE selector isn't supported,
6713 * then assume that this file system is case sensitive.
6714 */
6715 if (VNOP_PATHCONF(fvp, _PC_CASE_SENSITIVE, &pathconf_val, ctx) != 0 ||
6716 pathconf_val != 0) {
6717 goto out1;
6718 }
6719 }
6720
6721 /*
6722 * Allow the renaming of mount points.
6723 * - target must not exist
6724 * - target must reside in the same directory as source
6725 * - union mounts cannot be renamed
6726 * - "/" cannot be renamed
6727 *
6728 * XXX Handle this in VFS after a continued lookup (if we missed
6729 * in the cache to start off)
6730 */
6731 if ((fvp->v_flag & VROOT) &&
6732 (fvp->v_type == VDIR) &&
6733 (tvp == NULL) &&
6734 (fvp->v_mountedhere == NULL) &&
6735 (fdvp == tdvp) &&
6736 ((fvp->v_mount->mnt_flag & (MNT_UNION | MNT_ROOTFS)) == 0) &&
6737 (fvp->v_mount->mnt_vnodecovered != NULLVP)) {
6738 vnode_t coveredvp;
6739
6740 /* switch fvp to the covered vnode */
6741 coveredvp = fvp->v_mount->mnt_vnodecovered;
6742 if ( (vnode_getwithref(coveredvp)) ) {
6743 error = ENOENT;
6744 goto out1;
6745 }
6746 vnode_put(fvp);
6747
6748 fvp = coveredvp;
6749 mntrename = TRUE;
6750 }
6751 /*
6752 * Check for cross-device rename.
6753 */
6754 if ((fvp->v_mount != tdvp->v_mount) ||
6755 (tvp && (fvp->v_mount != tvp->v_mount))) {
6756 error = EXDEV;
6757 goto out1;
6758 }
6759
6760 /*
6761 * If source is the same as the destination (that is the
6762 * same inode number) then there is nothing to do...
6763 * EXCEPT if the underlying file system supports case
6764 * insensitivity and is case preserving. In this case
6765 * the file system needs to handle the special case of
6766 * getting the same vnode as target (fvp) and source (tvp).
6767 *
6768 * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
6769 * and _PC_CASE_PRESERVING can have this exception, and they need to
6770 * handle the special case of getting the same vnode as target and
6771 * source. NOTE: Then the target is unlocked going into vnop_rename,
6772 * so not to cause locking problems. There is a single reference on tvp.
6773 *
6774 * NOTE - that fvp == tvp also occurs if they are hard linked and
6775 * that correct behaviour then is just to return success without doing
6776 * anything.
6777 *
6778 * XXX filesystem should take care of this itself, perhaps...
6779 */
6780 if (fvp == tvp && fdvp == tdvp) {
6781 if (fromnd->ni_cnd.cn_namelen == tond->ni_cnd.cn_namelen &&
6782 !bcmp(fromnd->ni_cnd.cn_nameptr, tond->ni_cnd.cn_nameptr,
6783 fromnd->ni_cnd.cn_namelen)) {
6784 goto out1;
6785 }
6786 }
6787
6788 if (holding_mntlock && fvp->v_mount != locked_mp) {
6789 /*
6790 * we're holding a reference and lock
6791 * on locked_mp, but it no longer matches
6792 * what we want to do... so drop our hold
6793 */
6794 mount_unlock_renames(locked_mp);
6795 mount_drop(locked_mp, 0);
6796 holding_mntlock = 0;
6797 }
6798 if (tdvp != fdvp && fvp->v_type == VDIR) {
6799 /*
6800 * serialize renames that re-shape
6801 * the tree... if holding_mntlock is
6802 * set, then we're ready to go...
6803 * otherwise we
6804 * first need to drop the iocounts
6805 * we picked up, second take the
6806 * lock to serialize the access,
6807 * then finally start the lookup
6808 * process over with the lock held
6809 */
6810 if (!holding_mntlock) {
6811 /*
6812 * need to grab a reference on
6813 * the mount point before we
6814 * drop all the iocounts... once
6815 * the iocounts are gone, the mount
6816 * could follow
6817 */
6818 locked_mp = fvp->v_mount;
6819 mount_ref(locked_mp, 0);
6820
6821 /*
6822 * nameidone has to happen before we vnode_put(tvp)
6823 * since it may need to release the fs_nodelock on the tvp
6824 */
6825 nameidone(tond);
6826
6827 if (tvp)
6828 vnode_put(tvp);
6829 vnode_put(tdvp);
6830
6831 /*
6832 * nameidone has to happen before we vnode_put(fdvp)
6833 * since it may need to release the fs_nodelock on the fvp
6834 */
6835 nameidone(fromnd);
6836
6837 vnode_put(fvp);
6838 vnode_put(fdvp);
6839
6840 mount_lock_renames(locked_mp);
6841 holding_mntlock = 1;
6842
6843 goto retry;
6844 }
6845 } else {
6846 /*
6847 * when we dropped the iocounts to take
6848 * the lock, we allowed the identity of
6849 * the various vnodes to change... if they did,
6850 * we may no longer be dealing with a rename
6851 * that reshapes the tree... once we're holding
6852 * the iocounts, the vnodes can't change type
6853 * so we're free to drop the lock at this point
6854 * and continue on
6855 */
6856 if (holding_mntlock) {
6857 mount_unlock_renames(locked_mp);
6858 mount_drop(locked_mp, 0);
6859 holding_mntlock = 0;
6860 }
6861 }
6862
6863 // save these off so we can later verify that fvp is the same
6864 oname = fvp->v_name;
6865 oparent = fvp->v_parent;
6866
6867 skipped_lookup:
6868 #if CONFIG_FSE
6869 need_event = need_fsevent(FSE_RENAME, fdvp);
6870 if (need_event) {
6871 if (fvp) {
6872 get_fse_info(fvp, &from_finfo, ctx);
6873 } else {
6874 error = vfs_get_notify_attributes(&__rename_data->fv_attr);
6875 if (error) {
6876 goto out1;
6877 }
6878
6879 fvap = &__rename_data->fv_attr;
6880 }
6881
6882 if (tvp) {
6883 get_fse_info(tvp, &to_finfo, ctx);
6884 } else if (batched) {
6885 error = vfs_get_notify_attributes(&__rename_data->tv_attr);
6886 if (error) {
6887 goto out1;
6888 }
6889
6890 tvap = &__rename_data->tv_attr;
6891 }
6892 }
6893 #else
6894 need_event = 0;
6895 #endif /* CONFIG_FSE */
6896
6897 if (need_event || kauth_authorize_fileop_has_listeners()) {
6898 if (from_name == NULL) {
6899 GET_PATH(from_name);
6900 if (from_name == NULL) {
6901 error = ENOMEM;
6902 goto out1;
6903 }
6904 }
6905
6906 from_len = safe_getpath(fdvp, fromnd->ni_cnd.cn_nameptr, from_name, MAXPATHLEN, &from_truncated);
6907
6908 if (to_name == NULL) {
6909 GET_PATH(to_name);
6910 if (to_name == NULL) {
6911 error = ENOMEM;
6912 goto out1;
6913 }
6914 }
6915
6916 to_len = safe_getpath(tdvp, tond->ni_cnd.cn_nameptr, to_name, MAXPATHLEN, &to_truncated);
6917 }
6918 #if CONFIG_SECLUDED_RENAME
6919 if (flags & VFS_SECLUDE_RENAME) {
6920 fromnd->ni_cnd.cn_flags |= CN_SECLUDE_RENAME;
6921 }
6922 #else
6923 #pragma unused(flags)
6924 #endif
6925 error = vn_rename(fdvp, &fvp, &fromnd->ni_cnd, fvap,
6926 tdvp, &tvp, &tond->ni_cnd, tvap,
6927 0, ctx);
6928
6929 if (holding_mntlock) {
6930 /*
6931 * we can drop our serialization
6932 * lock now
6933 */
6934 mount_unlock_renames(locked_mp);
6935 mount_drop(locked_mp, 0);
6936 holding_mntlock = 0;
6937 }
6938 if (error) {
6939 if (error == EKEEPLOOKING) {
6940 if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) == 0) {
6941 if ((tond->ni_flag & NAMEI_CONTLOOKUP) == 0) {
6942 panic("EKEEPLOOKING without NAMEI_CONTLOOKUP on either ndp?");
6943 }
6944 }
6945
6946 fromnd->ni_vp = fvp;
6947 tond->ni_vp = tvp;
6948
6949 goto continue_lookup;
6950 }
6951
6952 /*
6953 * We may encounter a race in the VNOP where the destination didn't
6954 * exist when we did the namei, but it does by the time we go and
6955 * try to create the entry. In this case, we should re-drive this rename
6956 * call from the top again. Currently, only HFS bubbles out ERECYCLE,
6957 * but other filesystems susceptible to this race could return it, too.
6958 */
6959 if (error == ERECYCLE) {
6960 do_retry = 1;
6961 }
6962
6963 /*
6964 * For compound VNOPs, the authorization callback may return
6965 * ENOENT in case of racing hardlink lookups hitting the name
6966 * cache, redrive the lookup.
6967 */
6968 if (batched && error == ENOENT) {
6969 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
6970 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
6971 do_retry = 1;
6972 retry_count += 1;
6973 }
6974 }
6975
6976 goto out1;
6977 }
6978
6979 /* call out to allow 3rd party notification of rename.
6980 * Ignore result of kauth_authorize_fileop call.
6981 */
6982 kauth_authorize_fileop(vfs_context_ucred(ctx),
6983 KAUTH_FILEOP_RENAME,
6984 (uintptr_t)from_name, (uintptr_t)to_name);
6985
6986 #if CONFIG_FSE
6987 if (from_name != NULL && to_name != NULL) {
6988 if (from_truncated || to_truncated) {
6989 // set it here since only the from_finfo gets reported up to user space
6990 from_finfo.mode |= FSE_TRUNCATED_PATH;
6991 }
6992
6993 if (tvap && tvp) {
6994 vnode_get_fse_info_from_vap(tvp, &to_finfo, tvap);
6995 }
6996 if (fvap) {
6997 vnode_get_fse_info_from_vap(fvp, &from_finfo, fvap);
6998 }
6999
7000 if (tvp) {
7001 add_fsevent(FSE_RENAME, ctx,
7002 FSE_ARG_STRING, from_len, from_name,
7003 FSE_ARG_FINFO, &from_finfo,
7004 FSE_ARG_STRING, to_len, to_name,
7005 FSE_ARG_FINFO, &to_finfo,
7006 FSE_ARG_DONE);
7007 } else {
7008 add_fsevent(FSE_RENAME, ctx,
7009 FSE_ARG_STRING, from_len, from_name,
7010 FSE_ARG_FINFO, &from_finfo,
7011 FSE_ARG_STRING, to_len, to_name,
7012 FSE_ARG_DONE);
7013 }
7014 }
7015 #endif /* CONFIG_FSE */
7016
7017 /*
7018 * update filesystem's mount point data
7019 */
7020 if (mntrename) {
7021 char *cp, *pathend, *mpname;
7022 char * tobuf;
7023 struct mount *mp;
7024 int maxlen;
7025 size_t len = 0;
7026
7027 mp = fvp->v_mountedhere;
7028
7029 if (vfs_busy(mp, LK_NOWAIT)) {
7030 error = EBUSY;
7031 goto out1;
7032 }
7033 MALLOC_ZONE(tobuf, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
7034
7035 if (UIO_SEG_IS_USER_SPACE(segflg))
7036 error = copyinstr(to, tobuf, MAXPATHLEN, &len);
7037 else
7038 error = copystr((void *)to, tobuf, MAXPATHLEN, &len);
7039 if (!error) {
7040 /* find current mount point prefix */
7041 pathend = &mp->mnt_vfsstat.f_mntonname[0];
7042 for (cp = pathend; *cp != '\0'; ++cp) {
7043 if (*cp == '/')
7044 pathend = cp + 1;
7045 }
7046 /* find last component of target name */
7047 for (mpname = cp = tobuf; *cp != '\0'; ++cp) {
7048 if (*cp == '/')
7049 mpname = cp + 1;
7050 }
7051 /* append name to prefix */
7052 maxlen = MAXPATHLEN - (pathend - mp->mnt_vfsstat.f_mntonname);
7053 bzero(pathend, maxlen);
7054 strlcpy(pathend, mpname, maxlen);
7055 }
7056 FREE_ZONE(tobuf, MAXPATHLEN, M_NAMEI);
7057
7058 vfs_unbusy(mp);
7059 }
7060 /*
7061 * fix up name & parent pointers. note that we first
7062 * check that fvp has the same name/parent pointers it
7063 * had before the rename call... this is a 'weak' check
7064 * at best...
7065 *
7066 * XXX oparent and oname may not be set in the compound vnop case
7067 */
7068 if (batched || (oname == fvp->v_name && oparent == fvp->v_parent)) {
7069 int update_flags;
7070
7071 update_flags = VNODE_UPDATE_NAME;
7072
7073 if (fdvp != tdvp)
7074 update_flags |= VNODE_UPDATE_PARENT;
7075
7076 vnode_update_identity(fvp, tdvp, tond->ni_cnd.cn_nameptr, tond->ni_cnd.cn_namelen, tond->ni_cnd.cn_hash, update_flags);
7077 }
7078 out1:
7079 if (to_name != NULL) {
7080 RELEASE_PATH(to_name);
7081 to_name = NULL;
7082 }
7083 if (from_name != NULL) {
7084 RELEASE_PATH(from_name);
7085 from_name = NULL;
7086 }
7087 if (holding_mntlock) {
7088 mount_unlock_renames(locked_mp);
7089 mount_drop(locked_mp, 0);
7090 holding_mntlock = 0;
7091 }
7092 if (tdvp) {
7093 /*
7094 * nameidone has to happen before we vnode_put(tdvp)
7095 * since it may need to release the fs_nodelock on the tdvp
7096 */
7097 nameidone(tond);
7098
7099 if (tvp)
7100 vnode_put(tvp);
7101 vnode_put(tdvp);
7102 }
7103 if (fdvp) {
7104 /*
7105 * nameidone has to happen before we vnode_put(fdvp)
7106 * since it may need to release the fs_nodelock on the fdvp
7107 */
7108 nameidone(fromnd);
7109
7110 if (fvp)
7111 vnode_put(fvp);
7112 vnode_put(fdvp);
7113 }
7114
7115 /*
7116 * If things changed after we did the namei, then we will re-drive
7117 * this rename call from the top.
7118 */
7119 if (do_retry) {
7120 do_retry = 0;
7121 goto retry;
7122 }
7123
7124 FREE(__rename_data, M_TEMP);
7125 return (error);
7126 }
7127
7128 int
7129 rename(__unused proc_t p, struct rename_args *uap, __unused int32_t *retval)
7130 {
7131 return (renameat_internal(vfs_context_current(), AT_FDCWD, uap->from,
7132 AT_FDCWD, uap->to, UIO_USERSPACE, 0));
7133 }
7134
7135 #if CONFIG_SECLUDED_RENAME
7136 int rename_ext(__unused proc_t p, struct rename_ext_args *uap, __unused int32_t *retval)
7137 {
7138 return renameat_internal(
7139 vfs_context_current(),
7140 AT_FDCWD, uap->from,
7141 AT_FDCWD, uap->to,
7142 UIO_USERSPACE, uap->flags);
7143 }
7144 #endif
7145
7146 int
7147 renameat(__unused proc_t p, struct renameat_args *uap, __unused int32_t *retval)
7148 {
7149 return (renameat_internal(vfs_context_current(), uap->fromfd, uap->from,
7150 uap->tofd, uap->to, UIO_USERSPACE, 0));
7151 }
7152
7153 /*
7154 * Make a directory file.
7155 *
7156 * Returns: 0 Success
7157 * EEXIST
7158 * namei:???
7159 * vnode_authorize:???
7160 * vn_create:???
7161 */
7162 /* ARGSUSED */
7163 static int
7164 mkdir1at(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap, int fd,
7165 enum uio_seg segflg)
7166 {
7167 vnode_t vp, dvp;
7168 int error;
7169 int update_flags = 0;
7170 int batched;
7171 struct nameidata nd;
7172
7173 AUDIT_ARG(mode, vap->va_mode);
7174 NDINIT(&nd, CREATE, OP_MKDIR, LOCKPARENT | AUDITVNPATH1, segflg,
7175 path, ctx);
7176 nd.ni_cnd.cn_flags |= WILLBEDIR;
7177 nd.ni_flag = NAMEI_COMPOUNDMKDIR;
7178
7179 continue_lookup:
7180 error = nameiat(&nd, fd);
7181 if (error)
7182 return (error);
7183 dvp = nd.ni_dvp;
7184 vp = nd.ni_vp;
7185
7186 if (vp != NULL) {
7187 error = EEXIST;
7188 goto out;
7189 }
7190
7191 batched = vnode_compound_mkdir_available(dvp);
7192
7193 VATTR_SET(vap, va_type, VDIR);
7194
7195 /*
7196 * XXX
7197 * Don't authorize in VFS for compound VNOP.... mkdir -p today assumes that it will
7198 * only get EXISTS or EISDIR for existing path components, and not that it could see
7199 * EACCESS/EPERM--so if we authorize for mkdir on "/" for "mkdir -p /tmp/foo/bar/baz"
7200 * it will fail in a spurious manner. Need to figure out if this is valid behavior.
7201 */
7202 if ((error = vn_authorize_mkdir(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0) {
7203 if (error == EACCES || error == EPERM) {
7204 int error2;
7205
7206 nameidone(&nd);
7207 vnode_put(dvp);
7208 dvp = NULLVP;
7209
7210 /*
7211 * Try a lookup without "NAMEI_COMPOUNDVNOP" to make sure we return EEXIST
7212 * rather than EACCESS if the target exists.
7213 */
7214 NDINIT(&nd, LOOKUP, OP_MKDIR, AUDITVNPATH1, segflg,
7215 path, ctx);
7216 error2 = nameiat(&nd, fd);
7217 if (error2) {
7218 goto out;
7219 } else {
7220 vp = nd.ni_vp;
7221 error = EEXIST;
7222 goto out;
7223 }
7224 }
7225
7226 goto out;
7227 }
7228
7229 /*
7230 * make the directory
7231 */
7232 if ((error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx)) != 0) {
7233 if (error == EKEEPLOOKING) {
7234 nd.ni_vp = vp;
7235 goto continue_lookup;
7236 }
7237
7238 goto out;
7239 }
7240
7241 // Make sure the name & parent pointers are hooked up
7242 if (vp->v_name == NULL)
7243 update_flags |= VNODE_UPDATE_NAME;
7244 if (vp->v_parent == NULLVP)
7245 update_flags |= VNODE_UPDATE_PARENT;
7246
7247 if (update_flags)
7248 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
7249
7250 #if CONFIG_FSE
7251 add_fsevent(FSE_CREATE_DIR, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE);
7252 #endif
7253
7254 out:
7255 /*
7256 * nameidone has to happen before we vnode_put(dvp)
7257 * since it may need to release the fs_nodelock on the dvp
7258 */
7259 nameidone(&nd);
7260
7261 if (vp)
7262 vnode_put(vp);
7263 if (dvp)
7264 vnode_put(dvp);
7265
7266 return (error);
7267 }
7268
7269 /*
7270 * mkdir_extended: Create a directory; with extended security (ACL).
7271 *
7272 * Parameters: p Process requesting to create the directory
7273 * uap User argument descriptor (see below)
7274 * retval (ignored)
7275 *
7276 * Indirect: uap->path Path of directory to create
7277 * uap->mode Access permissions to set
7278 * uap->xsecurity ACL to set
7279 *
7280 * Returns: 0 Success
7281 * !0 Not success
7282 *
7283 */
7284 int
7285 mkdir_extended(proc_t p, struct mkdir_extended_args *uap, __unused int32_t *retval)
7286 {
7287 int ciferror;
7288 kauth_filesec_t xsecdst;
7289 struct vnode_attr va;
7290
7291 AUDIT_ARG(owner, uap->uid, uap->gid);
7292
7293 xsecdst = NULL;
7294 if ((uap->xsecurity != USER_ADDR_NULL) &&
7295 ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
7296 return ciferror;
7297
7298 VATTR_INIT(&va);
7299 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
7300 if (xsecdst != NULL)
7301 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
7302
7303 ciferror = mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD,
7304 UIO_USERSPACE);
7305 if (xsecdst != NULL)
7306 kauth_filesec_free(xsecdst);
7307 return ciferror;
7308 }
7309
7310 int
7311 mkdir(proc_t p, struct mkdir_args *uap, __unused int32_t *retval)
7312 {
7313 struct vnode_attr va;
7314
7315 VATTR_INIT(&va);
7316 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
7317
7318 return (mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD,
7319 UIO_USERSPACE));
7320 }
7321
7322 int
7323 mkdirat(proc_t p, struct mkdirat_args *uap, __unused int32_t *retval)
7324 {
7325 struct vnode_attr va;
7326
7327 VATTR_INIT(&va);
7328 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
7329
7330 return(mkdir1at(vfs_context_current(), uap->path, &va, uap->fd,
7331 UIO_USERSPACE));
7332 }
7333
7334 static int
7335 rmdirat_internal(vfs_context_t ctx, int fd, user_addr_t dirpath,
7336 enum uio_seg segflg)
7337 {
7338 vnode_t vp, dvp;
7339 int error;
7340 struct nameidata nd;
7341 char *path = NULL;
7342 int len=0;
7343 int has_listeners = 0;
7344 int need_event = 0;
7345 int truncated = 0;
7346 #if CONFIG_FSE
7347 struct vnode_attr va;
7348 #endif /* CONFIG_FSE */
7349 struct vnode_attr *vap = NULL;
7350 int restart_count = 0;
7351 int batched;
7352
7353 int restart_flag;
7354
7355 /*
7356 * This loop exists to restart rmdir in the unlikely case that two
7357 * processes are simultaneously trying to remove the same directory
7358 * containing orphaned appleDouble files.
7359 */
7360 do {
7361 NDINIT(&nd, DELETE, OP_RMDIR, LOCKPARENT | AUDITVNPATH1,
7362 segflg, dirpath, ctx);
7363 nd.ni_flag = NAMEI_COMPOUNDRMDIR;
7364 continue_lookup:
7365 restart_flag = 0;
7366 vap = NULL;
7367
7368 error = nameiat(&nd, fd);
7369 if (error)
7370 return (error);
7371
7372 dvp = nd.ni_dvp;
7373 vp = nd.ni_vp;
7374
7375 if (vp) {
7376 batched = vnode_compound_rmdir_available(vp);
7377
7378 if (vp->v_flag & VROOT) {
7379 /*
7380 * The root of a mounted filesystem cannot be deleted.
7381 */
7382 error = EBUSY;
7383 goto out;
7384 }
7385
7386 /*
7387 * Removed a check here; we used to abort if vp's vid
7388 * was not the same as what we'd seen the last time around.
7389 * I do not think that check was valid, because if we retry
7390 * and all dirents are gone, the directory could legitimately
7391 * be recycled but still be present in a situation where we would
7392 * have had permission to delete. Therefore, we won't make
7393 * an effort to preserve that check now that we may not have a
7394 * vp here.
7395 */
7396
7397 if (!batched) {
7398 error = vn_authorize_rmdir(dvp, vp, &nd.ni_cnd, ctx, NULL);
7399 if (error) {
7400 if (error == ENOENT) {
7401 assert(restart_count < MAX_AUTHORIZE_ENOENT_RETRIES);
7402 if (restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
7403 restart_flag = 1;
7404 restart_count += 1;
7405 }
7406 }
7407 goto out;
7408 }
7409 }
7410 } else {
7411 batched = 1;
7412
7413 if (!vnode_compound_rmdir_available(dvp)) {
7414 panic("No error, but no compound rmdir?");
7415 }
7416 }
7417
7418 #if CONFIG_FSE
7419 fse_info finfo;
7420
7421 need_event = need_fsevent(FSE_DELETE, dvp);
7422 if (need_event) {
7423 if (!batched) {
7424 get_fse_info(vp, &finfo, ctx);
7425 } else {
7426 error = vfs_get_notify_attributes(&va);
7427 if (error) {
7428 goto out;
7429 }
7430
7431 vap = &va;
7432 }
7433 }
7434 #endif
7435 has_listeners = kauth_authorize_fileop_has_listeners();
7436 if (need_event || has_listeners) {
7437 if (path == NULL) {
7438 GET_PATH(path);
7439 if (path == NULL) {
7440 error = ENOMEM;
7441 goto out;
7442 }
7443 }
7444
7445 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated);
7446 #if CONFIG_FSE
7447 if (truncated) {
7448 finfo.mode |= FSE_TRUNCATED_PATH;
7449 }
7450 #endif
7451 }
7452
7453 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
7454 nd.ni_vp = vp;
7455 if (vp == NULLVP) {
7456 /* Couldn't find a vnode */
7457 goto out;
7458 }
7459
7460 if (error == EKEEPLOOKING) {
7461 goto continue_lookup;
7462 } else if (batched && error == ENOENT) {
7463 assert(restart_count < MAX_AUTHORIZE_ENOENT_RETRIES);
7464 if (restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
7465 /*
7466 * For compound VNOPs, the authorization callback
7467 * may return ENOENT in case of racing hard link lookups
7468 * redrive the lookup.
7469 */
7470 restart_flag = 1;
7471 restart_count += 1;
7472 goto out;
7473 }
7474 }
7475 #if CONFIG_APPLEDOUBLE
7476 /*
7477 * Special case to remove orphaned AppleDouble
7478 * files. I don't like putting this in the kernel,
7479 * but carbon does not like putting this in carbon either,
7480 * so here we are.
7481 */
7482 if (error == ENOTEMPTY) {
7483 error = rmdir_remove_orphaned_appleDouble(vp, ctx, &restart_flag);
7484 if (error == EBUSY) {
7485 goto out;
7486 }
7487
7488
7489 /*
7490 * Assuming everything went well, we will try the RMDIR again
7491 */
7492 if (!error)
7493 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
7494 }
7495 #endif /* CONFIG_APPLEDOUBLE */
7496 /*
7497 * Call out to allow 3rd party notification of delete.
7498 * Ignore result of kauth_authorize_fileop call.
7499 */
7500 if (!error) {
7501 if (has_listeners) {
7502 kauth_authorize_fileop(vfs_context_ucred(ctx),
7503 KAUTH_FILEOP_DELETE,
7504 (uintptr_t)vp,
7505 (uintptr_t)path);
7506 }
7507
7508 if (vp->v_flag & VISHARDLINK) {
7509 // see the comment in unlink1() about why we update
7510 // the parent of a hard link when it is removed
7511 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
7512 }
7513
7514 #if CONFIG_FSE
7515 if (need_event) {
7516 if (vap) {
7517 vnode_get_fse_info_from_vap(vp, &finfo, vap);
7518 }
7519 add_fsevent(FSE_DELETE, ctx,
7520 FSE_ARG_STRING, len, path,
7521 FSE_ARG_FINFO, &finfo,
7522 FSE_ARG_DONE);
7523 }
7524 #endif
7525 }
7526
7527 out:
7528 if (path != NULL) {
7529 RELEASE_PATH(path);
7530 path = NULL;
7531 }
7532 /*
7533 * nameidone has to happen before we vnode_put(dvp)
7534 * since it may need to release the fs_nodelock on the dvp
7535 */
7536 nameidone(&nd);
7537 vnode_put(dvp);
7538
7539 if (vp)
7540 vnode_put(vp);
7541
7542 if (restart_flag == 0) {
7543 wakeup_one((caddr_t)vp);
7544 return (error);
7545 }
7546 tsleep(vp, PVFS, "rm AD", 1);
7547
7548 } while (restart_flag != 0);
7549
7550 return (error);
7551
7552 }
7553
7554 /*
7555 * Remove a directory file.
7556 */
7557 /* ARGSUSED */
7558 int
7559 rmdir(__unused proc_t p, struct rmdir_args *uap, __unused int32_t *retval)
7560 {
7561 return (rmdirat_internal(vfs_context_current(), AT_FDCWD,
7562 CAST_USER_ADDR_T(uap->path), UIO_USERSPACE));
7563 }
7564
7565 /* Get direntry length padded to 8 byte alignment */
7566 #define DIRENT64_LEN(namlen) \
7567 ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
7568
7569 errno_t
7570 vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag,
7571 int *numdirent, vfs_context_t ctxp)
7572 {
7573 /* Check if fs natively supports VNODE_READDIR_EXTENDED */
7574 if ((vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED) &&
7575 ((vp->v_mount->mnt_kern_flag & MNTK_DENY_READDIREXT) == 0)) {
7576 return VNOP_READDIR(vp, uio, flags, eofflag, numdirent, ctxp);
7577 } else {
7578 size_t bufsize;
7579 void * bufptr;
7580 uio_t auio;
7581 struct direntry *entry64;
7582 struct dirent *dep;
7583 int bytesread;
7584 int error;
7585
7586 /*
7587 * Our kernel buffer needs to be smaller since re-packing
7588 * will expand each dirent. The worse case (when the name
7589 * length is 3) corresponds to a struct direntry size of 32
7590 * bytes (8-byte aligned) and a struct dirent size of 12 bytes
7591 * (4-byte aligned). So having a buffer that is 3/8 the size
7592 * will prevent us from reading more than we can pack.
7593 *
7594 * Since this buffer is wired memory, we will limit the
7595 * buffer size to a maximum of 32K. We would really like to
7596 * use 32K in the MIN(), but we use magic number 87371 to
7597 * prevent uio_resid() * 3 / 8 from overflowing.
7598 */
7599 bufsize = 3 * MIN((user_size_t)uio_resid(uio), 87371u) / 8;
7600 MALLOC(bufptr, void *, bufsize, M_TEMP, M_WAITOK);
7601 if (bufptr == NULL) {
7602 return ENOMEM;
7603 }
7604
7605 auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
7606 uio_addiov(auio, (uintptr_t)bufptr, bufsize);
7607 auio->uio_offset = uio->uio_offset;
7608
7609 error = VNOP_READDIR(vp, auio, 0, eofflag, numdirent, ctxp);
7610
7611 dep = (struct dirent *)bufptr;
7612 bytesread = bufsize - uio_resid(auio);
7613
7614 MALLOC(entry64, struct direntry *, sizeof(struct direntry),
7615 M_TEMP, M_WAITOK);
7616 /*
7617 * Convert all the entries and copy them out to user's buffer.
7618 */
7619 while (error == 0 && (char *)dep < ((char *)bufptr + bytesread)) {
7620 size_t enbufsize = DIRENT64_LEN(dep->d_namlen);
7621
7622 bzero(entry64, enbufsize);
7623 /* Convert a dirent to a dirent64. */
7624 entry64->d_ino = dep->d_ino;
7625 entry64->d_seekoff = 0;
7626 entry64->d_reclen = enbufsize;
7627 entry64->d_namlen = dep->d_namlen;
7628 entry64->d_type = dep->d_type;
7629 bcopy(dep->d_name, entry64->d_name, dep->d_namlen + 1);
7630
7631 /* Move to next entry. */
7632 dep = (struct dirent *)((char *)dep + dep->d_reclen);
7633
7634 /* Copy entry64 to user's buffer. */
7635 error = uiomove((caddr_t)entry64, entry64->d_reclen, uio);
7636 }
7637
7638 /* Update the real offset using the offset we got from VNOP_READDIR. */
7639 if (error == 0) {
7640 uio->uio_offset = auio->uio_offset;
7641 }
7642 uio_free(auio);
7643 FREE(bufptr, M_TEMP);
7644 FREE(entry64, M_TEMP);
7645 return (error);
7646 }
7647 }
7648
7649 #define GETDIRENTRIES_MAXBUFSIZE (128 * 1024 * 1024U)
7650
7651 /*
7652 * Read a block of directory entries in a file system independent format.
7653 */
7654 static int
7655 getdirentries_common(int fd, user_addr_t bufp, user_size_t bufsize, ssize_t *bytesread,
7656 off_t *offset, int flags)
7657 {
7658 vnode_t vp;
7659 struct vfs_context context = *vfs_context_current(); /* local copy */
7660 struct fileproc *fp;
7661 uio_t auio;
7662 int spacetype = proc_is64bit(vfs_context_proc(&context)) ? UIO_USERSPACE64 : UIO_USERSPACE32;
7663 off_t loff;
7664 int error, eofflag, numdirent;
7665 char uio_buf[ UIO_SIZEOF(1) ];
7666
7667 error = fp_getfvp(vfs_context_proc(&context), fd, &fp, &vp);
7668 if (error) {
7669 return (error);
7670 }
7671 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
7672 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
7673 error = EBADF;
7674 goto out;
7675 }
7676
7677 if (bufsize > GETDIRENTRIES_MAXBUFSIZE)
7678 bufsize = GETDIRENTRIES_MAXBUFSIZE;
7679
7680 #if CONFIG_MACF
7681 error = mac_file_check_change_offset(vfs_context_ucred(&context), fp->f_fglob);
7682 if (error)
7683 goto out;
7684 #endif
7685 if ( (error = vnode_getwithref(vp)) ) {
7686 goto out;
7687 }
7688 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
7689
7690 unionread:
7691 if (vp->v_type != VDIR) {
7692 (void)vnode_put(vp);
7693 error = EINVAL;
7694 goto out;
7695 }
7696
7697 #if CONFIG_MACF
7698 error = mac_vnode_check_readdir(&context, vp);
7699 if (error != 0) {
7700 (void)vnode_put(vp);
7701 goto out;
7702 }
7703 #endif /* MAC */
7704
7705 loff = fp->f_fglob->fg_offset;
7706 auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
7707 uio_addiov(auio, bufp, bufsize);
7708
7709 if (flags & VNODE_READDIR_EXTENDED) {
7710 error = vnode_readdir64(vp, auio, flags, &eofflag, &numdirent, &context);
7711 fp->f_fglob->fg_offset = uio_offset(auio);
7712 } else {
7713 error = VNOP_READDIR(vp, auio, 0, &eofflag, &numdirent, &context);
7714 fp->f_fglob->fg_offset = uio_offset(auio);
7715 }
7716 if (error) {
7717 (void)vnode_put(vp);
7718 goto out;
7719 }
7720
7721 if ((user_ssize_t)bufsize == uio_resid(auio)){
7722 if (union_dircheckp) {
7723 error = union_dircheckp(&vp, fp, &context);
7724 if (error == -1)
7725 goto unionread;
7726 if (error)
7727 goto out;
7728 }
7729
7730 if ((vp->v_mount->mnt_flag & MNT_UNION)) {
7731 struct vnode *tvp = vp;
7732 if (lookup_traverse_union(tvp, &vp, &context) == 0) {
7733 vnode_ref(vp);
7734 fp->f_fglob->fg_data = (caddr_t) vp;
7735 fp->f_fglob->fg_offset = 0;
7736 vnode_rele(tvp);
7737 vnode_put(tvp);
7738 goto unionread;
7739 }
7740 vp = tvp;
7741 }
7742 }
7743
7744 vnode_put(vp);
7745 if (offset) {
7746 *offset = loff;
7747 }
7748
7749 *bytesread = bufsize - uio_resid(auio);
7750 out:
7751 file_drop(fd);
7752 return (error);
7753 }
7754
7755
7756 int
7757 getdirentries(__unused struct proc *p, struct getdirentries_args *uap, int32_t *retval)
7758 {
7759 off_t offset;
7760 ssize_t bytesread;
7761 int error;
7762
7763 AUDIT_ARG(fd, uap->fd);
7764 error = getdirentries_common(uap->fd, uap->buf, uap->count, &bytesread, &offset, 0);
7765
7766 if (error == 0) {
7767 if (proc_is64bit(p)) {
7768 user64_long_t base = (user64_long_t)offset;
7769 error = copyout((caddr_t)&base, uap->basep, sizeof(user64_long_t));
7770 } else {
7771 user32_long_t base = (user32_long_t)offset;
7772 error = copyout((caddr_t)&base, uap->basep, sizeof(user32_long_t));
7773 }
7774 *retval = bytesread;
7775 }
7776 return (error);
7777 }
7778
7779 int
7780 getdirentries64(__unused struct proc *p, struct getdirentries64_args *uap, user_ssize_t *retval)
7781 {
7782 off_t offset;
7783 ssize_t bytesread;
7784 int error;
7785
7786 AUDIT_ARG(fd, uap->fd);
7787 error = getdirentries_common(uap->fd, uap->buf, uap->bufsize, &bytesread, &offset, VNODE_READDIR_EXTENDED);
7788
7789 if (error == 0) {
7790 *retval = bytesread;
7791 error = copyout((caddr_t)&offset, uap->position, sizeof(off_t));
7792 }
7793 return (error);
7794 }
7795
7796
7797 /*
7798 * Set the mode mask for creation of filesystem nodes.
7799 * XXX implement xsecurity
7800 */
7801 #define UMASK_NOXSECURITY (void *)1 /* leave existing xsecurity alone */
7802 static int
7803 umask1(proc_t p, int newmask, __unused kauth_filesec_t fsec, int32_t *retval)
7804 {
7805 struct filedesc *fdp;
7806
7807 AUDIT_ARG(mask, newmask);
7808 proc_fdlock(p);
7809 fdp = p->p_fd;
7810 *retval = fdp->fd_cmask;
7811 fdp->fd_cmask = newmask & ALLPERMS;
7812 proc_fdunlock(p);
7813 return (0);
7814 }
7815
7816 /*
7817 * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL).
7818 *
7819 * Parameters: p Process requesting to set the umask
7820 * uap User argument descriptor (see below)
7821 * retval umask of the process (parameter p)
7822 *
7823 * Indirect: uap->newmask umask to set
7824 * uap->xsecurity ACL to set
7825 *
7826 * Returns: 0 Success
7827 * !0 Not success
7828 *
7829 */
7830 int
7831 umask_extended(proc_t p, struct umask_extended_args *uap, int32_t *retval)
7832 {
7833 int ciferror;
7834 kauth_filesec_t xsecdst;
7835
7836 xsecdst = KAUTH_FILESEC_NONE;
7837 if (uap->xsecurity != USER_ADDR_NULL) {
7838 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
7839 return ciferror;
7840 } else {
7841 xsecdst = KAUTH_FILESEC_NONE;
7842 }
7843
7844 ciferror = umask1(p, uap->newmask, xsecdst, retval);
7845
7846 if (xsecdst != KAUTH_FILESEC_NONE)
7847 kauth_filesec_free(xsecdst);
7848 return ciferror;
7849 }
7850
7851 int
7852 umask(proc_t p, struct umask_args *uap, int32_t *retval)
7853 {
7854 return(umask1(p, uap->newmask, UMASK_NOXSECURITY, retval));
7855 }
7856
7857 /*
7858 * Void all references to file by ripping underlying filesystem
7859 * away from vnode.
7860 */
7861 /* ARGSUSED */
7862 int
7863 revoke(proc_t p, struct revoke_args *uap, __unused int32_t *retval)
7864 {
7865 vnode_t vp;
7866 struct vnode_attr va;
7867 vfs_context_t ctx = vfs_context_current();
7868 int error;
7869 struct nameidata nd;
7870
7871 NDINIT(&nd, LOOKUP, OP_REVOKE, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
7872 uap->path, ctx);
7873 error = namei(&nd);
7874 if (error)
7875 return (error);
7876 vp = nd.ni_vp;
7877
7878 nameidone(&nd);
7879
7880 if (!(vnode_ischr(vp) || vnode_isblk(vp))) {
7881 error = ENOTSUP;
7882 goto out;
7883 }
7884
7885 if (vnode_isblk(vp) && vnode_ismountedon(vp)) {
7886 error = EBUSY;
7887 goto out;
7888 }
7889
7890 #if CONFIG_MACF
7891 error = mac_vnode_check_revoke(ctx, vp);
7892 if (error)
7893 goto out;
7894 #endif
7895
7896 VATTR_INIT(&va);
7897 VATTR_WANTED(&va, va_uid);
7898 if ((error = vnode_getattr(vp, &va, ctx)))
7899 goto out;
7900 if (kauth_cred_getuid(vfs_context_ucred(ctx)) != va.va_uid &&
7901 (error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
7902 goto out;
7903 if (vp->v_usecount > 0 || (vnode_isaliased(vp)))
7904 VNOP_REVOKE(vp, REVOKEALL, ctx);
7905 out:
7906 vnode_put(vp);
7907 return (error);
7908 }
7909
7910
7911 /*
7912 * HFS/HFS PlUS SPECIFIC SYSTEM CALLS
7913 * The following system calls are designed to support features
7914 * which are specific to the HFS & HFS Plus volume formats
7915 */
7916
7917
7918 /*
7919 * Obtain attribute information on objects in a directory while enumerating
7920 * the directory.
7921 */
7922 /* ARGSUSED */
7923 int
7924 getdirentriesattr (proc_t p, struct getdirentriesattr_args *uap, int32_t *retval)
7925 {
7926 vnode_t vp;
7927 struct fileproc *fp;
7928 uio_t auio = NULL;
7929 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
7930 uint32_t count, savecount;
7931 uint32_t newstate;
7932 int error, eofflag;
7933 uint32_t loff;
7934 struct attrlist attributelist;
7935 vfs_context_t ctx = vfs_context_current();
7936 int fd = uap->fd;
7937 char uio_buf[ UIO_SIZEOF(1) ];
7938 kauth_action_t action;
7939
7940 AUDIT_ARG(fd, fd);
7941
7942 /* Get the attributes into kernel space */
7943 if ((error = copyin(uap->alist, (caddr_t)&attributelist, sizeof(attributelist)))) {
7944 return(error);
7945 }
7946 if ((error = copyin(uap->count, (caddr_t)&count, sizeof(count)))) {
7947 return(error);
7948 }
7949 savecount = count;
7950 if ( (error = fp_getfvp(p, fd, &fp, &vp)) ) {
7951 return (error);
7952 }
7953 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
7954 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
7955 error = EBADF;
7956 goto out;
7957 }
7958
7959
7960 #if CONFIG_MACF
7961 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
7962 fp->f_fglob);
7963 if (error)
7964 goto out;
7965 #endif
7966
7967
7968 if ( (error = vnode_getwithref(vp)) )
7969 goto out;
7970
7971 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
7972
7973 unionread:
7974 if (vp->v_type != VDIR) {
7975 (void)vnode_put(vp);
7976 error = EINVAL;
7977 goto out;
7978 }
7979
7980 #if CONFIG_MACF
7981 error = mac_vnode_check_readdir(ctx, vp);
7982 if (error != 0) {
7983 (void)vnode_put(vp);
7984 goto out;
7985 }
7986 #endif /* MAC */
7987
7988 /* set up the uio structure which will contain the users return buffer */
7989 loff = fp->f_fglob->fg_offset;
7990 auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
7991 uio_addiov(auio, uap->buffer, uap->buffersize);
7992
7993 /*
7994 * If the only item requested is file names, we can let that past with
7995 * just LIST_DIRECTORY. If they want any other attributes, that means
7996 * they need SEARCH as well.
7997 */
7998 action = KAUTH_VNODE_LIST_DIRECTORY;
7999 if ((attributelist.commonattr & ~ATTR_CMN_NAME) ||
8000 attributelist.fileattr || attributelist.dirattr)
8001 action |= KAUTH_VNODE_SEARCH;
8002
8003 if ((error = vnode_authorize(vp, NULL, action, ctx)) == 0) {
8004
8005 /* Believe it or not, uap->options only has 32-bits of valid
8006 * info, so truncate before extending again */
8007
8008 error = VNOP_READDIRATTR(vp, &attributelist, auio, count,
8009 (u_long)(uint32_t)uap->options, &newstate, &eofflag, &count, ctx);
8010 }
8011
8012 if (error) {
8013 (void) vnode_put(vp);
8014 goto out;
8015 }
8016
8017 /*
8018 * If we've got the last entry of a directory in a union mount
8019 * then reset the eofflag and pretend there's still more to come.
8020 * The next call will again set eofflag and the buffer will be empty,
8021 * so traverse to the underlying directory and do the directory
8022 * read there.
8023 */
8024 if (eofflag && vp->v_mount->mnt_flag & MNT_UNION) {
8025 if (uio_resid(auio) < (user_ssize_t) uap->buffersize) { // Got some entries
8026 eofflag = 0;
8027 } else { // Empty buffer
8028 struct vnode *tvp = vp;
8029 if (lookup_traverse_union(tvp, &vp, ctx) == 0) {
8030 vnode_ref_ext(vp, fp->f_fglob->fg_flag & O_EVTONLY, 0);
8031 fp->f_fglob->fg_data = (caddr_t) vp;
8032 fp->f_fglob->fg_offset = 0; // reset index for new dir
8033 count = savecount;
8034 vnode_rele_internal(tvp, fp->f_fglob->fg_flag & O_EVTONLY, 0, 0);
8035 vnode_put(tvp);
8036 goto unionread;
8037 }
8038 vp = tvp;
8039 }
8040 }
8041
8042 (void)vnode_put(vp);
8043
8044 if (error)
8045 goto out;
8046 fp->f_fglob->fg_offset = uio_offset(auio); /* should be multiple of dirent, not variable */
8047
8048 if ((error = copyout((caddr_t) &count, uap->count, sizeof(count))))
8049 goto out;
8050 if ((error = copyout((caddr_t) &newstate, uap->newstate, sizeof(newstate))))
8051 goto out;
8052 if ((error = copyout((caddr_t) &loff, uap->basep, sizeof(loff))))
8053 goto out;
8054
8055 *retval = eofflag; /* similar to getdirentries */
8056 error = 0;
8057 out:
8058 file_drop(fd);
8059 return (error); /* return error earlier, an retval of 0 or 1 now */
8060
8061 } /* end of getdirentriesattr system call */
8062
8063 /*
8064 * Exchange data between two files
8065 */
8066
8067 /* ARGSUSED */
8068 int
8069 exchangedata (__unused proc_t p, struct exchangedata_args *uap, __unused int32_t *retval)
8070 {
8071
8072 struct nameidata fnd, snd;
8073 vfs_context_t ctx = vfs_context_current();
8074 vnode_t fvp;
8075 vnode_t svp;
8076 int error;
8077 u_int32_t nameiflags;
8078 char *fpath = NULL;
8079 char *spath = NULL;
8080 int flen=0, slen=0;
8081 int from_truncated=0, to_truncated=0;
8082 #if CONFIG_FSE
8083 fse_info f_finfo, s_finfo;
8084 #endif
8085
8086 nameiflags = 0;
8087 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
8088
8089 NDINIT(&fnd, LOOKUP, OP_EXCHANGEDATA, nameiflags | AUDITVNPATH1,
8090 UIO_USERSPACE, uap->path1, ctx);
8091
8092 error = namei(&fnd);
8093 if (error)
8094 goto out2;
8095
8096 nameidone(&fnd);
8097 fvp = fnd.ni_vp;
8098
8099 NDINIT(&snd, LOOKUP, OP_EXCHANGEDATA, CN_NBMOUNTLOOK | nameiflags | AUDITVNPATH2,
8100 UIO_USERSPACE, uap->path2, ctx);
8101
8102 error = namei(&snd);
8103 if (error) {
8104 vnode_put(fvp);
8105 goto out2;
8106 }
8107 nameidone(&snd);
8108 svp = snd.ni_vp;
8109
8110 /*
8111 * if the files are the same, return an inval error
8112 */
8113 if (svp == fvp) {
8114 error = EINVAL;
8115 goto out;
8116 }
8117
8118 /*
8119 * if the files are on different volumes, return an error
8120 */
8121 if (svp->v_mount != fvp->v_mount) {
8122 error = EXDEV;
8123 goto out;
8124 }
8125
8126 /* If they're not files, return an error */
8127 if ( (vnode_isreg(fvp) == 0) || (vnode_isreg(svp) == 0)) {
8128 error = EINVAL;
8129 goto out;
8130 }
8131
8132 #if CONFIG_MACF
8133 error = mac_vnode_check_exchangedata(ctx,
8134 fvp, svp);
8135 if (error)
8136 goto out;
8137 #endif
8138 if (((error = vnode_authorize(fvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0) ||
8139 ((error = vnode_authorize(svp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0))
8140 goto out;
8141
8142 if (
8143 #if CONFIG_FSE
8144 need_fsevent(FSE_EXCHANGE, fvp) ||
8145 #endif
8146 kauth_authorize_fileop_has_listeners()) {
8147 GET_PATH(fpath);
8148 GET_PATH(spath);
8149 if (fpath == NULL || spath == NULL) {
8150 error = ENOMEM;
8151 goto out;
8152 }
8153
8154 flen = safe_getpath(fvp, NULL, fpath, MAXPATHLEN, &from_truncated);
8155 slen = safe_getpath(svp, NULL, spath, MAXPATHLEN, &to_truncated);
8156
8157 #if CONFIG_FSE
8158 get_fse_info(fvp, &f_finfo, ctx);
8159 get_fse_info(svp, &s_finfo, ctx);
8160 if (from_truncated || to_truncated) {
8161 // set it here since only the f_finfo gets reported up to user space
8162 f_finfo.mode |= FSE_TRUNCATED_PATH;
8163 }
8164 #endif
8165 }
8166 /* Ok, make the call */
8167 error = VNOP_EXCHANGE(fvp, svp, 0, ctx);
8168
8169 if (error == 0) {
8170 const char *tmpname;
8171
8172 if (fpath != NULL && spath != NULL) {
8173 /* call out to allow 3rd party notification of exchangedata.
8174 * Ignore result of kauth_authorize_fileop call.
8175 */
8176 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_EXCHANGE,
8177 (uintptr_t)fpath, (uintptr_t)spath);
8178 }
8179 name_cache_lock();
8180
8181 tmpname = fvp->v_name;
8182 fvp->v_name = svp->v_name;
8183 svp->v_name = tmpname;
8184
8185 if (fvp->v_parent != svp->v_parent) {
8186 vnode_t tmp;
8187
8188 tmp = fvp->v_parent;
8189 fvp->v_parent = svp->v_parent;
8190 svp->v_parent = tmp;
8191 }
8192 name_cache_unlock();
8193
8194 #if CONFIG_FSE
8195 if (fpath != NULL && spath != NULL) {
8196 add_fsevent(FSE_EXCHANGE, ctx,
8197 FSE_ARG_STRING, flen, fpath,
8198 FSE_ARG_FINFO, &f_finfo,
8199 FSE_ARG_STRING, slen, spath,
8200 FSE_ARG_FINFO, &s_finfo,
8201 FSE_ARG_DONE);
8202 }
8203 #endif
8204 }
8205
8206 out:
8207 if (fpath != NULL)
8208 RELEASE_PATH(fpath);
8209 if (spath != NULL)
8210 RELEASE_PATH(spath);
8211 vnode_put(svp);
8212 vnode_put(fvp);
8213 out2:
8214 return (error);
8215 }
8216
8217 /*
8218 * Return (in MB) the amount of freespace on the given vnode's volume.
8219 */
8220 uint32_t freespace_mb(vnode_t vp);
8221
8222 uint32_t
8223 freespace_mb(vnode_t vp)
8224 {
8225 vfs_update_vfsstat(vp->v_mount, vfs_context_current(), VFS_USER_EVENT);
8226 return (((uint64_t)vp->v_mount->mnt_vfsstat.f_bavail *
8227 vp->v_mount->mnt_vfsstat.f_bsize) >> 20);
8228 }
8229
8230 #if CONFIG_SEARCHFS
8231
8232 /* ARGSUSED */
8233
8234 int
8235 searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval)
8236 {
8237 vnode_t vp, tvp;
8238 int i, error=0;
8239 int fserror = 0;
8240 struct nameidata nd;
8241 struct user64_fssearchblock searchblock;
8242 struct searchstate *state;
8243 struct attrlist *returnattrs;
8244 struct timeval timelimit;
8245 void *searchparams1,*searchparams2;
8246 uio_t auio = NULL;
8247 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
8248 uint32_t nummatches;
8249 int mallocsize;
8250 uint32_t nameiflags;
8251 vfs_context_t ctx = vfs_context_current();
8252 char uio_buf[ UIO_SIZEOF(1) ];
8253
8254 /* Start by copying in fsearchblock parameter list */
8255 if (IS_64BIT_PROCESS(p)) {
8256 error = copyin(uap->searchblock, (caddr_t) &searchblock, sizeof(searchblock));
8257 timelimit.tv_sec = searchblock.timelimit.tv_sec;
8258 timelimit.tv_usec = searchblock.timelimit.tv_usec;
8259 }
8260 else {
8261 struct user32_fssearchblock tmp_searchblock;
8262
8263 error = copyin(uap->searchblock, (caddr_t) &tmp_searchblock, sizeof(tmp_searchblock));
8264 // munge into 64-bit version
8265 searchblock.returnattrs = CAST_USER_ADDR_T(tmp_searchblock.returnattrs);
8266 searchblock.returnbuffer = CAST_USER_ADDR_T(tmp_searchblock.returnbuffer);
8267 searchblock.returnbuffersize = tmp_searchblock.returnbuffersize;
8268 searchblock.maxmatches = tmp_searchblock.maxmatches;
8269 /*
8270 * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary
8271 * from a 32 bit long, and tv_usec is already a signed 32 bit int.
8272 */
8273 timelimit.tv_sec = (__darwin_time_t) tmp_searchblock.timelimit.tv_sec;
8274 timelimit.tv_usec = (__darwin_useconds_t) tmp_searchblock.timelimit.tv_usec;
8275 searchblock.searchparams1 = CAST_USER_ADDR_T(tmp_searchblock.searchparams1);
8276 searchblock.sizeofsearchparams1 = tmp_searchblock.sizeofsearchparams1;
8277 searchblock.searchparams2 = CAST_USER_ADDR_T(tmp_searchblock.searchparams2);
8278 searchblock.sizeofsearchparams2 = tmp_searchblock.sizeofsearchparams2;
8279 searchblock.searchattrs = tmp_searchblock.searchattrs;
8280 }
8281 if (error)
8282 return(error);
8283
8284 /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
8285 */
8286 if (searchblock.sizeofsearchparams1 > SEARCHFS_MAX_SEARCHPARMS ||
8287 searchblock.sizeofsearchparams2 > SEARCHFS_MAX_SEARCHPARMS)
8288 return(EINVAL);
8289
8290 /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
8291 /* It all has to do into local memory and it's not that big so we might as well put it all together. */
8292 /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
8293 /* block. */
8294 /* */
8295 /* NOTE: we allocate an extra 8 bytes to account for the difference in size of the searchstate */
8296 /* due to the changes in rdar://problem/12438273. That way if a 3rd party file system */
8297 /* assumes the size is still 556 bytes it will continue to work */
8298
8299 mallocsize = searchblock.sizeofsearchparams1 + searchblock.sizeofsearchparams2 +
8300 sizeof(struct attrlist) + sizeof(struct searchstate) + (2*sizeof(uint32_t));
8301
8302 MALLOC(searchparams1, void *, mallocsize, M_TEMP, M_WAITOK);
8303
8304 /* Now set up the various pointers to the correct place in our newly allocated memory */
8305
8306 searchparams2 = (void *) (((caddr_t) searchparams1) + searchblock.sizeofsearchparams1);
8307 returnattrs = (struct attrlist *) (((caddr_t) searchparams2) + searchblock.sizeofsearchparams2);
8308 state = (struct searchstate *) (((caddr_t) returnattrs) + sizeof (struct attrlist));
8309
8310 /* Now copy in the stuff given our local variables. */
8311
8312 if ((error = copyin(searchblock.searchparams1, searchparams1, searchblock.sizeofsearchparams1)))
8313 goto freeandexit;
8314
8315 if ((error = copyin(searchblock.searchparams2, searchparams2, searchblock.sizeofsearchparams2)))
8316 goto freeandexit;
8317
8318 if ((error = copyin(searchblock.returnattrs, (caddr_t) returnattrs, sizeof(struct attrlist))))
8319 goto freeandexit;
8320
8321 if ((error = copyin(uap->state, (caddr_t) state, sizeof(struct searchstate))))
8322 goto freeandexit;
8323
8324 /*
8325 * When searching a union mount, need to set the
8326 * start flag at the first call on each layer to
8327 * reset state for the new volume.
8328 */
8329 if (uap->options & SRCHFS_START)
8330 state->ss_union_layer = 0;
8331 else
8332 uap->options |= state->ss_union_flags;
8333 state->ss_union_flags = 0;
8334
8335 /*
8336 * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter,
8337 * which is passed in with an attrreference_t, we need to inspect the buffer manually here.
8338 * The KPI does not provide us the ability to pass in the length of the buffers searchparams1
8339 * and searchparams2. To obviate the need for all searchfs-supporting filesystems to
8340 * validate the user-supplied data offset of the attrreference_t, we'll do it here.
8341 */
8342
8343 if (searchblock.searchattrs.commonattr & ATTR_CMN_NAME) {
8344 attrreference_t* string_ref;
8345 u_int32_t* start_length;
8346 user64_size_t param_length;
8347
8348 /* validate searchparams1 */
8349 param_length = searchblock.sizeofsearchparams1;
8350 /* skip the word that specifies length of the buffer */
8351 start_length= (u_int32_t*) searchparams1;
8352 start_length= start_length+1;
8353 string_ref= (attrreference_t*) start_length;
8354
8355 /* ensure no negative offsets or too big offsets */
8356 if (string_ref->attr_dataoffset < 0 ) {
8357 error = EINVAL;
8358 goto freeandexit;
8359 }
8360 if (string_ref->attr_length > MAXPATHLEN) {
8361 error = EINVAL;
8362 goto freeandexit;
8363 }
8364
8365 /* Check for pointer overflow in the string ref */
8366 if (((char*) string_ref + string_ref->attr_dataoffset) < (char*) string_ref) {
8367 error = EINVAL;
8368 goto freeandexit;
8369 }
8370
8371 if (((char*) string_ref + string_ref->attr_dataoffset) > ((char*)searchparams1 + param_length)) {
8372 error = EINVAL;
8373 goto freeandexit;
8374 }
8375 if (((char*)string_ref + string_ref->attr_dataoffset + string_ref->attr_length) > ((char*)searchparams1 + param_length)) {
8376 error = EINVAL;
8377 goto freeandexit;
8378 }
8379 }
8380
8381 /* set up the uio structure which will contain the users return buffer */
8382 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
8383 uio_addiov(auio, searchblock.returnbuffer, searchblock.returnbuffersize);
8384
8385 nameiflags = 0;
8386 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
8387 NDINIT(&nd, LOOKUP, OP_SEARCHFS, nameiflags | AUDITVNPATH1,
8388 UIO_USERSPACE, uap->path, ctx);
8389
8390 error = namei(&nd);
8391 if (error)
8392 goto freeandexit;
8393 vp = nd.ni_vp;
8394 nameidone(&nd);
8395
8396 /*
8397 * Switch to the root vnode for the volume
8398 */
8399 error = VFS_ROOT(vnode_mount(vp), &tvp, ctx);
8400 vnode_put(vp);
8401 if (error)
8402 goto freeandexit;
8403 vp = tvp;
8404
8405 /*
8406 * If it's a union mount, the path lookup takes
8407 * us to the top layer. But we may need to descend
8408 * to a lower layer. For non-union mounts the layer
8409 * is always zero.
8410 */
8411 for (i = 0; i < (int) state->ss_union_layer; i++) {
8412 if ((vp->v_mount->mnt_flag & MNT_UNION) == 0)
8413 break;
8414 tvp = vp;
8415 vp = vp->v_mount->mnt_vnodecovered;
8416 if (vp == NULL) {
8417 vnode_put(tvp);
8418 error = ENOENT;
8419 goto freeandexit;
8420 }
8421 vnode_getwithref(vp);
8422 vnode_put(tvp);
8423 }
8424
8425 #if CONFIG_MACF
8426 error = mac_vnode_check_searchfs(ctx, vp, &searchblock.searchattrs);
8427 if (error) {
8428 vnode_put(vp);
8429 goto freeandexit;
8430 }
8431 #endif
8432
8433
8434 /*
8435 * If searchblock.maxmatches == 0, then skip the search. This has happened
8436 * before and sometimes the underlying code doesnt deal with it well.
8437 */
8438 if (searchblock.maxmatches == 0) {
8439 nummatches = 0;
8440 goto saveandexit;
8441 }
8442
8443 /*
8444 * Allright, we have everything we need, so lets make that call.
8445 *
8446 * We keep special track of the return value from the file system:
8447 * EAGAIN is an acceptable error condition that shouldn't keep us
8448 * from copying out any results...
8449 */
8450
8451 fserror = VNOP_SEARCHFS(vp,
8452 searchparams1,
8453 searchparams2,
8454 &searchblock.searchattrs,
8455 (u_long)searchblock.maxmatches,
8456 &timelimit,
8457 returnattrs,
8458 &nummatches,
8459 (u_long)uap->scriptcode,
8460 (u_long)uap->options,
8461 auio,
8462 (struct searchstate *) &state->ss_fsstate,
8463 ctx);
8464
8465 /*
8466 * If it's a union mount we need to be called again
8467 * to search the mounted-on filesystem.
8468 */
8469 if ((vp->v_mount->mnt_flag & MNT_UNION) && fserror == 0) {
8470 state->ss_union_flags = SRCHFS_START;
8471 state->ss_union_layer++; // search next layer down
8472 fserror = EAGAIN;
8473 }
8474
8475 saveandexit:
8476
8477 vnode_put(vp);
8478
8479 /* Now copy out the stuff that needs copying out. That means the number of matches, the
8480 search state. Everything was already put into he return buffer by the vop call. */
8481
8482 if ((error = copyout((caddr_t) state, uap->state, sizeof(struct searchstate))) != 0)
8483 goto freeandexit;
8484
8485 if ((error = suulong(uap->nummatches, (uint64_t)nummatches)) != 0)
8486 goto freeandexit;
8487
8488 error = fserror;
8489
8490 freeandexit:
8491
8492 FREE(searchparams1,M_TEMP);
8493
8494 return(error);
8495
8496
8497 } /* end of searchfs system call */
8498
8499 #else /* CONFIG_SEARCHFS */
8500
8501 int
8502 searchfs(__unused proc_t p, __unused struct searchfs_args *uap, __unused int32_t *retval)
8503 {
8504 return (ENOTSUP);
8505 }
8506
8507 #endif /* CONFIG_SEARCHFS */
8508
8509
8510 lck_grp_attr_t * nspace_group_attr;
8511 lck_attr_t * nspace_lock_attr;
8512 lck_grp_t * nspace_mutex_group;
8513
8514 lck_mtx_t nspace_handler_lock;
8515 lck_mtx_t nspace_handler_exclusion_lock;
8516
8517 time_t snapshot_timestamp=0;
8518 int nspace_allow_virtual_devs=0;
8519
8520 void nspace_handler_init(void);
8521
8522 typedef struct nspace_item_info {
8523 struct vnode *vp;
8524 void *arg;
8525 uint64_t op;
8526 uint32_t vid;
8527 uint32_t flags;
8528 uint32_t token;
8529 uint32_t refcount;
8530 } nspace_item_info;
8531
8532 #define MAX_NSPACE_ITEMS 128
8533 nspace_item_info nspace_items[MAX_NSPACE_ITEMS];
8534 uint32_t nspace_item_idx=0; // also used as the sleep/wakeup rendezvous address
8535 uint32_t nspace_token_id=0;
8536 uint32_t nspace_handler_timeout = 15; // seconds
8537
8538 #define NSPACE_ITEM_NEW 0x0001
8539 #define NSPACE_ITEM_PROCESSING 0x0002
8540 #define NSPACE_ITEM_DEAD 0x0004
8541 #define NSPACE_ITEM_CANCELLED 0x0008
8542 #define NSPACE_ITEM_DONE 0x0010
8543 #define NSPACE_ITEM_RESET_TIMER 0x0020
8544
8545 #define NSPACE_ITEM_NSPACE_EVENT 0x0040
8546 #define NSPACE_ITEM_SNAPSHOT_EVENT 0x0080
8547
8548 #define NSPACE_ITEM_ALL_EVENT_TYPES (NSPACE_ITEM_NSPACE_EVENT | NSPACE_ITEM_SNAPSHOT_EVENT)
8549
8550 //#pragma optimization_level 0
8551
8552 typedef enum {
8553 NSPACE_HANDLER_NSPACE = 0,
8554 NSPACE_HANDLER_SNAPSHOT = 1,
8555
8556 NSPACE_HANDLER_COUNT,
8557 } nspace_type_t;
8558
8559 typedef struct {
8560 uint64_t handler_tid;
8561 struct proc *handler_proc;
8562 int handler_busy;
8563 } nspace_handler_t;
8564
8565 nspace_handler_t nspace_handlers[NSPACE_HANDLER_COUNT];
8566
8567 /* namespace fsctl functions */
8568 static int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type);
8569 static int nspace_item_flags_for_type(nspace_type_t nspace_type);
8570 static int nspace_open_flags_for_type(nspace_type_t nspace_type);
8571 static nspace_type_t nspace_type_for_op(uint64_t op);
8572 static int nspace_is_special_process(struct proc *proc);
8573 static int vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx);
8574 static int wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type);
8575 static int validate_namespace_args (int is64bit, int size);
8576 static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data);
8577
8578
8579 static inline int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type)
8580 {
8581 switch(nspace_type) {
8582 case NSPACE_HANDLER_NSPACE:
8583 return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_NSPACE_EVENT;
8584 case NSPACE_HANDLER_SNAPSHOT:
8585 return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_SNAPSHOT_EVENT;
8586 default:
8587 printf("nspace_flags_matches_handler: invalid type %u\n", (int)nspace_type);
8588 return 0;
8589 }
8590 }
8591
8592 static inline int nspace_item_flags_for_type(nspace_type_t nspace_type)
8593 {
8594 switch(nspace_type) {
8595 case NSPACE_HANDLER_NSPACE:
8596 return NSPACE_ITEM_NSPACE_EVENT;
8597 case NSPACE_HANDLER_SNAPSHOT:
8598 return NSPACE_ITEM_SNAPSHOT_EVENT;
8599 default:
8600 printf("nspace_item_flags_for_type: invalid type %u\n", (int)nspace_type);
8601 return 0;
8602 }
8603 }
8604
8605 static inline int nspace_open_flags_for_type(nspace_type_t nspace_type)
8606 {
8607 switch(nspace_type) {
8608 case NSPACE_HANDLER_NSPACE:
8609 return FREAD | FWRITE | O_EVTONLY;
8610 case NSPACE_HANDLER_SNAPSHOT:
8611 return FREAD | O_EVTONLY;
8612 default:
8613 printf("nspace_open_flags_for_type: invalid type %u\n", (int)nspace_type);
8614 return 0;
8615 }
8616 }
8617
8618 static inline nspace_type_t nspace_type_for_op(uint64_t op)
8619 {
8620 switch(op & NAMESPACE_HANDLER_EVENT_TYPE_MASK) {
8621 case NAMESPACE_HANDLER_NSPACE_EVENT:
8622 return NSPACE_HANDLER_NSPACE;
8623 case NAMESPACE_HANDLER_SNAPSHOT_EVENT:
8624 return NSPACE_HANDLER_SNAPSHOT;
8625 default:
8626 printf("nspace_type_for_op: invalid op mask %llx\n", op & NAMESPACE_HANDLER_EVENT_TYPE_MASK);
8627 return NSPACE_HANDLER_NSPACE;
8628 }
8629 }
8630
8631 static inline int nspace_is_special_process(struct proc *proc)
8632 {
8633 int i;
8634 for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
8635 if (proc == nspace_handlers[i].handler_proc)
8636 return 1;
8637 }
8638 return 0;
8639 }
8640
8641 void
8642 nspace_handler_init(void)
8643 {
8644 nspace_lock_attr = lck_attr_alloc_init();
8645 nspace_group_attr = lck_grp_attr_alloc_init();
8646 nspace_mutex_group = lck_grp_alloc_init("nspace-mutex", nspace_group_attr);
8647 lck_mtx_init(&nspace_handler_lock, nspace_mutex_group, nspace_lock_attr);
8648 lck_mtx_init(&nspace_handler_exclusion_lock, nspace_mutex_group, nspace_lock_attr);
8649 memset(&nspace_items[0], 0, sizeof(nspace_items));
8650 }
8651
8652 void
8653 nspace_proc_exit(struct proc *p)
8654 {
8655 int i, event_mask = 0;
8656
8657 for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
8658 if (p == nspace_handlers[i].handler_proc) {
8659 event_mask |= nspace_item_flags_for_type(i);
8660 nspace_handlers[i].handler_tid = 0;
8661 nspace_handlers[i].handler_proc = NULL;
8662 }
8663 }
8664
8665 if (event_mask == 0) {
8666 return;
8667 }
8668
8669 if (event_mask & NSPACE_ITEM_SNAPSHOT_EVENT) {
8670 // if this process was the snapshot handler, zero snapshot_timeout
8671 snapshot_timestamp = 0;
8672 }
8673
8674 //
8675 // unblock anyone that's waiting for the handler that died
8676 //
8677 lck_mtx_lock(&nspace_handler_lock);
8678 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8679 if (nspace_items[i].flags & (NSPACE_ITEM_NEW | NSPACE_ITEM_PROCESSING)) {
8680
8681 if ( nspace_items[i].flags & event_mask ) {
8682
8683 if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
8684 vnode_lock_spin(nspace_items[i].vp);
8685 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
8686 vnode_unlock(nspace_items[i].vp);
8687 }
8688 nspace_items[i].vp = NULL;
8689 nspace_items[i].vid = 0;
8690 nspace_items[i].flags = NSPACE_ITEM_DONE;
8691 nspace_items[i].token = 0;
8692
8693 wakeup((caddr_t)&(nspace_items[i].vp));
8694 }
8695 }
8696 }
8697
8698 wakeup((caddr_t)&nspace_item_idx);
8699 lck_mtx_unlock(&nspace_handler_lock);
8700 }
8701
8702
8703 int
8704 resolve_nspace_item(struct vnode *vp, uint64_t op)
8705 {
8706 return resolve_nspace_item_ext(vp, op, NULL);
8707 }
8708
8709 int
8710 resolve_nspace_item_ext(struct vnode *vp, uint64_t op, void *arg)
8711 {
8712 int i, error, keep_waiting;
8713 struct timespec ts;
8714 nspace_type_t nspace_type = nspace_type_for_op(op);
8715
8716 // only allow namespace events on regular files, directories and symlinks.
8717 if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
8718 return 0;
8719 }
8720
8721 //
8722 // if this is a snapshot event and the vnode is on a
8723 // disk image just pretend nothing happened since any
8724 // change to the disk image will cause the disk image
8725 // itself to get backed up and this avoids multi-way
8726 // deadlocks between the snapshot handler and the ever
8727 // popular diskimages-helper process. the variable
8728 // nspace_allow_virtual_devs allows this behavior to
8729 // be overridden (for use by the Mobile TimeMachine
8730 // testing infrastructure which uses disk images)
8731 //
8732 if ( (op & NAMESPACE_HANDLER_SNAPSHOT_EVENT)
8733 && (vp->v_mount != NULL)
8734 && (vp->v_mount->mnt_kern_flag & MNTK_VIRTUALDEV)
8735 && !nspace_allow_virtual_devs) {
8736
8737 return 0;
8738 }
8739
8740 // if (thread_tid(current_thread()) == namespace_handler_tid) {
8741 if (nspace_handlers[nspace_type].handler_proc == NULL) {
8742 return 0;
8743 }
8744
8745 if (nspace_is_special_process(current_proc())) {
8746 return EDEADLK;
8747 }
8748
8749 lck_mtx_lock(&nspace_handler_lock);
8750
8751 retry:
8752 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8753 if (vp == nspace_items[i].vp && op == nspace_items[i].op) {
8754 break;
8755 }
8756 }
8757
8758 if (i >= MAX_NSPACE_ITEMS) {
8759 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8760 if (nspace_items[i].flags == 0) {
8761 break;
8762 }
8763 }
8764 } else {
8765 nspace_items[i].refcount++;
8766 }
8767
8768 if (i >= MAX_NSPACE_ITEMS) {
8769 ts.tv_sec = nspace_handler_timeout;
8770 ts.tv_nsec = 0;
8771
8772 error = msleep((caddr_t)&nspace_token_id, &nspace_handler_lock, PVFS|PCATCH, "nspace-no-space", &ts);
8773 if (error == 0) {
8774 // an entry got free'd up, go see if we can get a slot
8775 goto retry;
8776 } else {
8777 lck_mtx_unlock(&nspace_handler_lock);
8778 return error;
8779 }
8780 }
8781
8782 //
8783 // if it didn't already exist, add it. if it did exist
8784 // we'll get woken up when someone does a wakeup() on
8785 // the slot in the nspace_items table.
8786 //
8787 if (vp != nspace_items[i].vp) {
8788 nspace_items[i].vp = vp;
8789 nspace_items[i].arg = (arg == NSPACE_REARM_NO_ARG) ? NULL : arg; // arg is {NULL, true, uio *} - only pass uio thru to the user
8790 nspace_items[i].op = op;
8791 nspace_items[i].vid = vnode_vid(vp);
8792 nspace_items[i].flags = NSPACE_ITEM_NEW;
8793 nspace_items[i].flags |= nspace_item_flags_for_type(nspace_type);
8794 if (nspace_items[i].flags & NSPACE_ITEM_SNAPSHOT_EVENT) {
8795 if (arg) {
8796 vnode_lock_spin(vp);
8797 vp->v_flag |= VNEEDSSNAPSHOT;
8798 vnode_unlock(vp);
8799 }
8800 }
8801
8802 nspace_items[i].token = 0;
8803 nspace_items[i].refcount = 1;
8804
8805 wakeup((caddr_t)&nspace_item_idx);
8806 }
8807
8808 //
8809 // Now go to sleep until the handler does a wakeup on this
8810 // slot in the nspace_items table (or we timeout).
8811 //
8812 keep_waiting = 1;
8813 while(keep_waiting) {
8814 ts.tv_sec = nspace_handler_timeout;
8815 ts.tv_nsec = 0;
8816 error = msleep((caddr_t)&(nspace_items[i].vp), &nspace_handler_lock, PVFS|PCATCH, "namespace-done", &ts);
8817
8818 if (nspace_items[i].flags & NSPACE_ITEM_DONE) {
8819 error = 0;
8820 } else if (nspace_items[i].flags & NSPACE_ITEM_CANCELLED) {
8821 error = nspace_items[i].token;
8822 } else if (error == EWOULDBLOCK || error == ETIMEDOUT) {
8823 if (nspace_items[i].flags & NSPACE_ITEM_RESET_TIMER) {
8824 nspace_items[i].flags &= ~NSPACE_ITEM_RESET_TIMER;
8825 continue;
8826 } else {
8827 error = ETIMEDOUT;
8828 }
8829 } else if (error == 0) {
8830 // hmmm, why did we get woken up?
8831 printf("woken up for token %d but it's not done, cancelled or timedout and error == 0.\n",
8832 nspace_items[i].token);
8833 }
8834
8835 if (--nspace_items[i].refcount == 0) {
8836 nspace_items[i].vp = NULL; // clear this so that no one will match on it again
8837 nspace_items[i].arg = NULL;
8838 nspace_items[i].token = 0; // clear this so that the handler will not find it anymore
8839 nspace_items[i].flags = 0; // this clears it for re-use
8840 }
8841 wakeup(&nspace_token_id);
8842 keep_waiting = 0;
8843 }
8844
8845 lck_mtx_unlock(&nspace_handler_lock);
8846
8847 return error;
8848 }
8849
8850
8851 int
8852 get_nspace_item_status(struct vnode *vp, int32_t *status)
8853 {
8854 int i;
8855
8856 lck_mtx_lock(&nspace_handler_lock);
8857 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8858 if (nspace_items[i].vp == vp) {
8859 break;
8860 }
8861 }
8862
8863 if (i >= MAX_NSPACE_ITEMS) {
8864 lck_mtx_unlock(&nspace_handler_lock);
8865 return ENOENT;
8866 }
8867
8868 *status = nspace_items[i].flags;
8869 lck_mtx_unlock(&nspace_handler_lock);
8870 return 0;
8871 }
8872
8873
8874 #if 0
8875 static int
8876 build_volfs_path(struct vnode *vp, char *path, int *len)
8877 {
8878 struct vnode_attr va;
8879 int ret;
8880
8881 VATTR_INIT(&va);
8882 VATTR_WANTED(&va, va_fsid);
8883 VATTR_WANTED(&va, va_fileid);
8884
8885 if (vnode_getattr(vp, &va, vfs_context_kernel()) != 0) {
8886 *len = snprintf(path, *len, "/non/existent/path/because/vnode_getattr/failed") + 1;
8887 ret = -1;
8888 } else {
8889 *len = snprintf(path, *len, "/.vol/%d/%lld", (dev_t)va.va_fsid, va.va_fileid) + 1;
8890 ret = 0;
8891 }
8892
8893 return ret;
8894 }
8895 #endif
8896
8897 //
8898 // Note: this function does NOT check permissions on all of the
8899 // parent directories leading to this vnode. It should only be
8900 // called on behalf of a root process. Otherwise a process may
8901 // get access to a file because the file itself is readable even
8902 // though its parent directories would prevent access.
8903 //
8904 static int
8905 vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx)
8906 {
8907 int error, action;
8908
8909 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
8910 return error;
8911 }
8912
8913 #if CONFIG_MACF
8914 error = mac_vnode_check_open(ctx, vp, fmode);
8915 if (error)
8916 return error;
8917 #endif
8918
8919 /* compute action to be authorized */
8920 action = 0;
8921 if (fmode & FREAD) {
8922 action |= KAUTH_VNODE_READ_DATA;
8923 }
8924 if (fmode & (FWRITE | O_TRUNC)) {
8925 /*
8926 * If we are writing, appending, and not truncating,
8927 * indicate that we are appending so that if the
8928 * UF_APPEND or SF_APPEND bits are set, we do not deny
8929 * the open.
8930 */
8931 if ((fmode & O_APPEND) && !(fmode & O_TRUNC)) {
8932 action |= KAUTH_VNODE_APPEND_DATA;
8933 } else {
8934 action |= KAUTH_VNODE_WRITE_DATA;
8935 }
8936 }
8937
8938 if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)
8939 return error;
8940
8941
8942 //
8943 // if the vnode is tagged VOPENEVT and the current process
8944 // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY
8945 // flag to the open mode so that this open won't count against
8946 // the vnode when carbon delete() does a vnode_isinuse() to see
8947 // if a file is currently in use. this allows spotlight
8948 // importers to not interfere with carbon apps that depend on
8949 // the no-delete-if-busy semantics of carbon delete().
8950 //
8951 if ((vp->v_flag & VOPENEVT) && (current_proc()->p_flag & P_CHECKOPENEVT)) {
8952 fmode |= O_EVTONLY;
8953 }
8954
8955 if ( (error = VNOP_OPEN(vp, fmode, ctx)) ) {
8956 return error;
8957 }
8958 if ( (error = vnode_ref_ext(vp, fmode, 0)) ) {
8959 VNOP_CLOSE(vp, fmode, ctx);
8960 return error;
8961 }
8962
8963 /* Call out to allow 3rd party notification of open.
8964 * Ignore result of kauth_authorize_fileop call.
8965 */
8966 #if CONFIG_MACF
8967 mac_vnode_notify_open(ctx, vp, fmode);
8968 #endif
8969 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN,
8970 (uintptr_t)vp, 0);
8971
8972
8973 return 0;
8974 }
8975
8976 static int
8977 wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type)
8978 {
8979 int i, error=0, unblock=0;
8980 task_t curtask;
8981
8982 lck_mtx_lock(&nspace_handler_exclusion_lock);
8983 if (nspace_handlers[nspace_type].handler_busy) {
8984 lck_mtx_unlock(&nspace_handler_exclusion_lock);
8985 return EBUSY;
8986 }
8987 nspace_handlers[nspace_type].handler_busy = 1;
8988 lck_mtx_unlock(&nspace_handler_exclusion_lock);
8989
8990 /*
8991 * Any process that gets here will be one of the namespace handlers.
8992 * As such, they should be prevented from acquiring DMG vnodes during vnode reclamation
8993 * as we can cause deadlocks to occur, because the namespace handler may prevent
8994 * VNOP_INACTIVE from proceeding. Mark the current task as a P_DEPENDENCY_CAPABLE
8995 * process.
8996 */
8997 curtask = current_task();
8998 bsd_set_dependency_capable (curtask);
8999
9000 lck_mtx_lock(&nspace_handler_lock);
9001 if (nspace_handlers[nspace_type].handler_proc == NULL) {
9002 nspace_handlers[nspace_type].handler_tid = thread_tid(current_thread());
9003 nspace_handlers[nspace_type].handler_proc = current_proc();
9004 }
9005
9006 while (error == 0) {
9007
9008 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9009 if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
9010 if (!nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
9011 continue;
9012 }
9013 break;
9014 }
9015 }
9016
9017 if (i < MAX_NSPACE_ITEMS) {
9018 nspace_items[i].flags &= ~NSPACE_ITEM_NEW;
9019 nspace_items[i].flags |= NSPACE_ITEM_PROCESSING;
9020 nspace_items[i].token = ++nspace_token_id;
9021
9022 if (nspace_items[i].vp) {
9023 struct fileproc *fp;
9024 int32_t indx, fmode;
9025 struct proc *p = current_proc();
9026 vfs_context_t ctx = vfs_context_current();
9027 struct vnode_attr va;
9028
9029
9030 /*
9031 * Use vnode pointer to acquire a file descriptor for
9032 * hand-off to userland
9033 */
9034 fmode = nspace_open_flags_for_type(nspace_type);
9035 error = vnode_getwithvid(nspace_items[i].vp, nspace_items[i].vid);
9036 if (error) {
9037 unblock = 1;
9038 break;
9039 }
9040 error = vn_open_with_vp(nspace_items[i].vp, fmode, ctx);
9041 if (error) {
9042 unblock = 1;
9043 vnode_put(nspace_items[i].vp);
9044 break;
9045 }
9046
9047 if ((error = falloc(p, &fp, &indx, ctx))) {
9048 vn_close(nspace_items[i].vp, fmode, ctx);
9049 vnode_put(nspace_items[i].vp);
9050 unblock = 1;
9051 break;
9052 }
9053
9054 fp->f_fglob->fg_flag = fmode;
9055 fp->f_fglob->fg_ops = &vnops;
9056 fp->f_fglob->fg_data = (caddr_t)nspace_items[i].vp;
9057
9058 proc_fdlock(p);
9059 procfdtbl_releasefd(p, indx, NULL);
9060 fp_drop(p, indx, fp, 1);
9061 proc_fdunlock(p);
9062
9063 /*
9064 * All variants of the namespace handler struct support these three fields:
9065 * token, flags, and the FD pointer
9066 */
9067 error = copyout(&nspace_items[i].token, nhd->token, sizeof(uint32_t));
9068 error = copyout(&nspace_items[i].op, nhd->flags, sizeof(uint64_t));
9069 error = copyout(&indx, nhd->fdptr, sizeof(uint32_t));
9070
9071 /*
9072 * Handle optional fields:
9073 * extended version support an info ptr (offset, length), and the
9074 *
9075 * namedata version supports a unique per-link object ID
9076 *
9077 */
9078 if (nhd->infoptr) {
9079 uio_t uio = (uio_t)nspace_items[i].arg;
9080 uint64_t u_offset, u_length;
9081
9082 if (uio) {
9083 u_offset = uio_offset(uio);
9084 u_length = uio_resid(uio);
9085 } else {
9086 u_offset = 0;
9087 u_length = 0;
9088 }
9089 error = copyout(&u_offset, nhd->infoptr, sizeof(uint64_t));
9090 error = copyout(&u_length, nhd->infoptr+sizeof(uint64_t), sizeof(uint64_t));
9091 }
9092
9093 if (nhd->objid) {
9094 VATTR_INIT(&va);
9095 VATTR_WANTED(&va, va_linkid);
9096 error = vnode_getattr(nspace_items[i].vp, &va, ctx);
9097 if (error == 0 ) {
9098 uint64_t linkid = 0;
9099 if (VATTR_IS_SUPPORTED (&va, va_linkid)) {
9100 linkid = (uint64_t)va.va_linkid;
9101 }
9102 error = copyout (&linkid, nhd->objid, sizeof(uint64_t));
9103 }
9104 }
9105
9106 if (error) {
9107 vn_close(nspace_items[i].vp, fmode, ctx);
9108 fp_free(p, indx, fp);
9109 unblock = 1;
9110 }
9111
9112 vnode_put(nspace_items[i].vp);
9113
9114 break;
9115 } else {
9116 printf("wait_for_nspace_event: failed (nspace_items[%d] == %p error %d, name %s)\n",
9117 i, nspace_items[i].vp, error, nspace_items[i].vp->v_name);
9118 }
9119
9120 } else {
9121 error = msleep((caddr_t)&nspace_item_idx, &nspace_handler_lock, PVFS|PCATCH, "namespace-items", 0);
9122 if ((nspace_type == NSPACE_HANDLER_SNAPSHOT) && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
9123 error = EINVAL;
9124 break;
9125 }
9126
9127 }
9128 }
9129
9130 if (unblock) {
9131 if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
9132 vnode_lock_spin(nspace_items[i].vp);
9133 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
9134 vnode_unlock(nspace_items[i].vp);
9135 }
9136 nspace_items[i].vp = NULL;
9137 nspace_items[i].vid = 0;
9138 nspace_items[i].flags = NSPACE_ITEM_DONE;
9139 nspace_items[i].token = 0;
9140
9141 wakeup((caddr_t)&(nspace_items[i].vp));
9142 }
9143
9144 if (nspace_type == NSPACE_HANDLER_SNAPSHOT) {
9145 // just go through every snapshot event and unblock it immediately.
9146 if (error && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
9147 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9148 if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
9149 if (nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
9150 nspace_items[i].vp = NULL;
9151 nspace_items[i].vid = 0;
9152 nspace_items[i].flags = NSPACE_ITEM_DONE;
9153 nspace_items[i].token = 0;
9154
9155 wakeup((caddr_t)&(nspace_items[i].vp));
9156 }
9157 }
9158 }
9159 }
9160 }
9161
9162 lck_mtx_unlock(&nspace_handler_lock);
9163
9164 lck_mtx_lock(&nspace_handler_exclusion_lock);
9165 nspace_handlers[nspace_type].handler_busy = 0;
9166 lck_mtx_unlock(&nspace_handler_exclusion_lock);
9167
9168 return error;
9169 }
9170
9171 static inline int validate_namespace_args (int is64bit, int size) {
9172
9173 if (is64bit) {
9174 /* Must be one of these */
9175 if (size == sizeof(user64_namespace_handler_info)) {
9176 goto sizeok;
9177 }
9178 if (size == sizeof(user64_namespace_handler_info_ext)) {
9179 goto sizeok;
9180 }
9181 if (size == sizeof(user64_namespace_handler_data)) {
9182 goto sizeok;
9183 }
9184 return EINVAL;
9185 }
9186 else {
9187 /* 32 bit -- must be one of these */
9188 if (size == sizeof(user32_namespace_handler_info)) {
9189 goto sizeok;
9190 }
9191 if (size == sizeof(user32_namespace_handler_info_ext)) {
9192 goto sizeok;
9193 }
9194 if (size == sizeof(user32_namespace_handler_data)) {
9195 goto sizeok;
9196 }
9197 return EINVAL;
9198 }
9199
9200 sizeok:
9201
9202 return 0;
9203
9204 }
9205
9206 static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data)
9207 {
9208 int error = 0;
9209 namespace_handler_data nhd;
9210
9211 bzero (&nhd, sizeof(namespace_handler_data));
9212
9213 if (nspace_type == NSPACE_HANDLER_SNAPSHOT &&
9214 (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
9215 return EINVAL;
9216 }
9217
9218 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
9219 return error;
9220 }
9221
9222 error = validate_namespace_args (is64bit, size);
9223 if (error) {
9224 return error;
9225 }
9226
9227 /* Copy in the userland pointers into our kernel-only struct */
9228
9229 if (is64bit) {
9230 /* 64 bit userland structures */
9231 nhd.token = (user_addr_t)((user64_namespace_handler_info *)data)->token;
9232 nhd.flags = (user_addr_t)((user64_namespace_handler_info *)data)->flags;
9233 nhd.fdptr = (user_addr_t)((user64_namespace_handler_info *)data)->fdptr;
9234
9235 /* If the size is greater than the standard info struct, add in extra fields */
9236 if (size > (sizeof(user64_namespace_handler_info))) {
9237 if (size >= (sizeof(user64_namespace_handler_info_ext))) {
9238 nhd.infoptr = (user_addr_t)((user64_namespace_handler_info_ext *)data)->infoptr;
9239 }
9240 if (size == (sizeof(user64_namespace_handler_data))) {
9241 nhd.objid = (user_addr_t)((user64_namespace_handler_data*)data)->objid;
9242 }
9243 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
9244 }
9245 }
9246 else {
9247 /* 32 bit userland structures */
9248 nhd.token = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->token);
9249 nhd.flags = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->flags);
9250 nhd.fdptr = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->fdptr);
9251
9252 if (size > (sizeof(user32_namespace_handler_info))) {
9253 if (size >= (sizeof(user32_namespace_handler_info_ext))) {
9254 nhd.infoptr = CAST_USER_ADDR_T(((user32_namespace_handler_info_ext *)data)->infoptr);
9255 }
9256 if (size == (sizeof(user32_namespace_handler_data))) {
9257 nhd.objid = (user_addr_t)((user32_namespace_handler_data*)data)->objid;
9258 }
9259 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
9260 }
9261 }
9262
9263 return wait_for_namespace_event(&nhd, nspace_type);
9264 }
9265
9266 /*
9267 * Make a filesystem-specific control call:
9268 */
9269 /* ARGSUSED */
9270 static int
9271 fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long options, vfs_context_t ctx)
9272 {
9273 int error=0;
9274 boolean_t is64bit;
9275 u_int size;
9276 #define STK_PARAMS 128
9277 char stkbuf[STK_PARAMS];
9278 caddr_t data, memp;
9279 vnode_t vp = *arg_vp;
9280
9281 size = IOCPARM_LEN(cmd);
9282 if (size > IOCPARM_MAX) return (EINVAL);
9283
9284 is64bit = proc_is64bit(p);
9285
9286 memp = NULL;
9287
9288
9289 /*
9290 * ensure the buffer is large enough for underlying calls
9291 */
9292 #ifndef HFSIOC_GETPATH
9293 typedef char pn_t[MAXPATHLEN];
9294 #define HFSIOC_GETPATH _IOWR('h', 13, pn_t)
9295 #endif
9296
9297 #ifndef HFS_GETPATH
9298 #define HFS_GETPATH IOCBASECMD(HFSIOC_GETPATH)
9299 #endif
9300 if (IOCBASECMD(cmd) == HFS_GETPATH) {
9301 /* Round up to MAXPATHLEN regardless of user input */
9302 size = MAXPATHLEN;
9303 }
9304
9305 if (size > sizeof (stkbuf)) {
9306 if ((memp = (caddr_t)kalloc(size)) == 0) return ENOMEM;
9307 data = memp;
9308 } else {
9309 data = &stkbuf[0];
9310 };
9311
9312 if (cmd & IOC_IN) {
9313 if (size) {
9314 error = copyin(udata, data, size);
9315 if (error) {
9316 if (memp) {
9317 kfree (memp, size);
9318 }
9319 return error;
9320 }
9321 } else {
9322 if (is64bit) {
9323 *(user_addr_t *)data = udata;
9324 }
9325 else {
9326 *(uint32_t *)data = (uint32_t)udata;
9327 }
9328 };
9329 } else if ((cmd & IOC_OUT) && size) {
9330 /*
9331 * Zero the buffer so the user always
9332 * gets back something deterministic.
9333 */
9334 bzero(data, size);
9335 } else if (cmd & IOC_VOID) {
9336 if (is64bit) {
9337 *(user_addr_t *)data = udata;
9338 }
9339 else {
9340 *(uint32_t *)data = (uint32_t)udata;
9341 }
9342 }
9343
9344 /* Check to see if it's a generic command */
9345 switch (IOCBASECMD(cmd)) {
9346
9347 case FSCTL_SYNC_VOLUME: {
9348 mount_t mp = vp->v_mount;
9349 int arg = *(uint32_t*)data;
9350
9351 /* record vid of vp so we can drop it below. */
9352 uint32_t vvid = vp->v_id;
9353
9354 /*
9355 * Then grab mount_iterref so that we can release the vnode.
9356 * Without this, a thread may call vnode_iterate_prepare then
9357 * get into a deadlock because we've never released the root vp
9358 */
9359 error = mount_iterref (mp, 0);
9360 if (error) {
9361 break;
9362 }
9363 vnode_put(vp);
9364
9365 /* issue the sync for this volume */
9366 (void)sync_callback(mp, (arg & FSCTL_SYNC_WAIT) ? &arg : NULL);
9367
9368 /*
9369 * Then release the mount_iterref once we're done syncing; it's not
9370 * needed for the VNOP_IOCTL below
9371 */
9372 mount_iterdrop(mp);
9373
9374 if (arg & FSCTL_SYNC_FULLSYNC) {
9375 /* re-obtain vnode iocount on the root vp, if possible */
9376 error = vnode_getwithvid (vp, vvid);
9377 if (error == 0) {
9378 error = VNOP_IOCTL(vp, F_FULLFSYNC, (caddr_t)NULL, 0, ctx);
9379 vnode_put (vp);
9380 }
9381 }
9382 /* mark the argument VP as having been released */
9383 *arg_vp = NULL;
9384 }
9385 break;
9386
9387 case FSCTL_ROUTEFS_SETROUTEID: {
9388 #if ROUTEFS
9389 char routepath[MAXPATHLEN];
9390 size_t len = 0;
9391
9392 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
9393 break;
9394 }
9395 bzero(routepath, MAXPATHLEN);
9396 error = copyinstr(udata, &routepath[0], MAXPATHLEN, &len);
9397 if (error) {
9398 break;
9399 }
9400 error = routefs_kernel_mount(routepath);
9401 if (error) {
9402 break;
9403 }
9404 #endif
9405 }
9406 break;
9407
9408 case FSCTL_SET_PACKAGE_EXTS: {
9409 user_addr_t ext_strings;
9410 uint32_t num_entries;
9411 uint32_t max_width;
9412
9413 if ( (is64bit && size != sizeof(user64_package_ext_info))
9414 || (is64bit == 0 && size != sizeof(user32_package_ext_info))) {
9415
9416 // either you're 64-bit and passed a 64-bit struct or
9417 // you're 32-bit and passed a 32-bit struct. otherwise
9418 // it's not ok.
9419 error = EINVAL;
9420 break;
9421 }
9422
9423 if (is64bit) {
9424 ext_strings = ((user64_package_ext_info *)data)->strings;
9425 num_entries = ((user64_package_ext_info *)data)->num_entries;
9426 max_width = ((user64_package_ext_info *)data)->max_width;
9427 } else {
9428 ext_strings = CAST_USER_ADDR_T(((user32_package_ext_info *)data)->strings);
9429 num_entries = ((user32_package_ext_info *)data)->num_entries;
9430 max_width = ((user32_package_ext_info *)data)->max_width;
9431 }
9432 error = set_package_extensions_table(ext_strings, num_entries, max_width);
9433 }
9434 break;
9435
9436 /* namespace handlers */
9437 case FSCTL_NAMESPACE_HANDLER_GET: {
9438 error = process_namespace_fsctl(NSPACE_HANDLER_NSPACE, is64bit, size, data);
9439 }
9440 break;
9441
9442 /* Snapshot handlers */
9443 case FSCTL_OLD_SNAPSHOT_HANDLER_GET: {
9444 error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
9445 }
9446 break;
9447
9448 case FSCTL_SNAPSHOT_HANDLER_GET_EXT: {
9449 error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
9450 }
9451 break;
9452
9453 case FSCTL_NAMESPACE_HANDLER_UPDATE: {
9454 uint32_t token, val;
9455 int i;
9456
9457 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
9458 break;
9459 }
9460
9461 if (!nspace_is_special_process(p)) {
9462 error = EINVAL;
9463 break;
9464 }
9465
9466 token = ((uint32_t *)data)[0];
9467 val = ((uint32_t *)data)[1];
9468
9469 lck_mtx_lock(&nspace_handler_lock);
9470
9471 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9472 if (nspace_items[i].token == token) {
9473 break; /* exit for loop, not case stmt */
9474 }
9475 }
9476
9477 if (i >= MAX_NSPACE_ITEMS) {
9478 error = ENOENT;
9479 } else {
9480 //
9481 // if this bit is set, when resolve_nspace_item() times out
9482 // it will loop and go back to sleep.
9483 //
9484 nspace_items[i].flags |= NSPACE_ITEM_RESET_TIMER;
9485 }
9486
9487 lck_mtx_unlock(&nspace_handler_lock);
9488
9489 if (error) {
9490 printf("nspace-handler-update: did not find token %u\n", token);
9491 }
9492 }
9493 break;
9494
9495 case FSCTL_NAMESPACE_HANDLER_UNBLOCK: {
9496 uint32_t token, val;
9497 int i;
9498
9499 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
9500 break;
9501 }
9502
9503 if (!nspace_is_special_process(p)) {
9504 error = EINVAL;
9505 break;
9506 }
9507
9508 token = ((uint32_t *)data)[0];
9509 val = ((uint32_t *)data)[1];
9510
9511 lck_mtx_lock(&nspace_handler_lock);
9512
9513 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9514 if (nspace_items[i].token == token) {
9515 break; /* exit for loop, not case statement */
9516 }
9517 }
9518
9519 if (i >= MAX_NSPACE_ITEMS) {
9520 printf("nspace-handler-unblock: did not find token %u\n", token);
9521 error = ENOENT;
9522 } else {
9523 if (val == 0 && nspace_items[i].vp) {
9524 vnode_lock_spin(nspace_items[i].vp);
9525 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
9526 vnode_unlock(nspace_items[i].vp);
9527 }
9528
9529 nspace_items[i].vp = NULL;
9530 nspace_items[i].arg = NULL;
9531 nspace_items[i].op = 0;
9532 nspace_items[i].vid = 0;
9533 nspace_items[i].flags = NSPACE_ITEM_DONE;
9534 nspace_items[i].token = 0;
9535
9536 wakeup((caddr_t)&(nspace_items[i].vp));
9537 }
9538
9539 lck_mtx_unlock(&nspace_handler_lock);
9540 }
9541 break;
9542
9543 case FSCTL_NAMESPACE_HANDLER_CANCEL: {
9544 uint32_t token, val;
9545 int i;
9546
9547 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
9548 break;
9549 }
9550
9551 if (!nspace_is_special_process(p)) {
9552 error = EINVAL;
9553 break;
9554 }
9555
9556 token = ((uint32_t *)data)[0];
9557 val = ((uint32_t *)data)[1];
9558
9559 lck_mtx_lock(&nspace_handler_lock);
9560
9561 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9562 if (nspace_items[i].token == token) {
9563 break; /* exit for loop, not case stmt */
9564 }
9565 }
9566
9567 if (i >= MAX_NSPACE_ITEMS) {
9568 printf("nspace-handler-cancel: did not find token %u\n", token);
9569 error = ENOENT;
9570 } else {
9571 if (nspace_items[i].vp) {
9572 vnode_lock_spin(nspace_items[i].vp);
9573 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
9574 vnode_unlock(nspace_items[i].vp);
9575 }
9576
9577 nspace_items[i].vp = NULL;
9578 nspace_items[i].arg = NULL;
9579 nspace_items[i].vid = 0;
9580 nspace_items[i].token = val;
9581 nspace_items[i].flags &= ~NSPACE_ITEM_PROCESSING;
9582 nspace_items[i].flags |= NSPACE_ITEM_CANCELLED;
9583
9584 wakeup((caddr_t)&(nspace_items[i].vp));
9585 }
9586
9587 lck_mtx_unlock(&nspace_handler_lock);
9588 }
9589 break;
9590
9591 case FSCTL_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME: {
9592 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
9593 break;
9594 }
9595
9596 // we explicitly do not do the namespace_handler_proc check here
9597
9598 lck_mtx_lock(&nspace_handler_lock);
9599 snapshot_timestamp = ((uint32_t *)data)[0];
9600 wakeup(&nspace_item_idx);
9601 lck_mtx_unlock(&nspace_handler_lock);
9602 printf("nspace-handler-set-snapshot-time: %d\n", (int)snapshot_timestamp);
9603
9604 }
9605 break;
9606
9607 case FSCTL_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS:
9608 {
9609 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
9610 break;
9611 }
9612
9613 lck_mtx_lock(&nspace_handler_lock);
9614 nspace_allow_virtual_devs = ((uint32_t *)data)[0];
9615 lck_mtx_unlock(&nspace_handler_lock);
9616 printf("nspace-snapshot-handler will%s allow events on disk-images\n",
9617 nspace_allow_virtual_devs ? "" : " NOT");
9618 error = 0;
9619
9620 }
9621 break;
9622
9623 case FSCTL_SET_FSTYPENAME_OVERRIDE:
9624 {
9625 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
9626 break;
9627 }
9628 if (vp->v_mount) {
9629 mount_lock(vp->v_mount);
9630 if (data[0] != 0) {
9631 strlcpy(&vp->v_mount->fstypename_override[0], data, MFSTYPENAMELEN);
9632 vp->v_mount->mnt_kern_flag |= MNTK_TYPENAME_OVERRIDE;
9633 if (vfs_isrdonly(vp->v_mount) && strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
9634 vp->v_mount->mnt_kern_flag |= MNTK_EXTENDED_SECURITY;
9635 vp->v_mount->mnt_kern_flag &= ~MNTK_AUTH_OPAQUE;
9636 }
9637 } else {
9638 if (strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
9639 vp->v_mount->mnt_kern_flag &= ~MNTK_EXTENDED_SECURITY;
9640 }
9641 vp->v_mount->mnt_kern_flag &= ~MNTK_TYPENAME_OVERRIDE;
9642 vp->v_mount->fstypename_override[0] = '\0';
9643 }
9644 mount_unlock(vp->v_mount);
9645 }
9646 }
9647 break;
9648
9649 default: {
9650 /* Invoke the filesystem-specific code */
9651 error = VNOP_IOCTL(vp, IOCBASECMD(cmd), data, options, ctx);
9652 }
9653
9654 } /* end switch stmt */
9655
9656 /*
9657 * if no errors, copy any data to user. Size was
9658 * already set and checked above.
9659 */
9660 if (error == 0 && (cmd & IOC_OUT) && size)
9661 error = copyout(data, udata, size);
9662
9663 if (memp) {
9664 kfree(memp, size);
9665 }
9666
9667 return error;
9668 }
9669
9670 /* ARGSUSED */
9671 int
9672 fsctl (proc_t p, struct fsctl_args *uap, __unused int32_t *retval)
9673 {
9674 int error;
9675 struct nameidata nd;
9676 u_long nameiflags;
9677 vnode_t vp = NULL;
9678 vfs_context_t ctx = vfs_context_current();
9679
9680 AUDIT_ARG(cmd, uap->cmd);
9681 AUDIT_ARG(value32, uap->options);
9682 /* Get the vnode for the file we are getting info on: */
9683 nameiflags = 0;
9684 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
9685 NDINIT(&nd, LOOKUP, OP_FSCTL, nameiflags | AUDITVNPATH1,
9686 UIO_USERSPACE, uap->path, ctx);
9687 if ((error = namei(&nd))) goto done;
9688 vp = nd.ni_vp;
9689 nameidone(&nd);
9690
9691 #if CONFIG_MACF
9692 error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd);
9693 if (error) {
9694 goto done;
9695 }
9696 #endif
9697
9698 error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
9699
9700 done:
9701 if (vp)
9702 vnode_put(vp);
9703 return error;
9704 }
9705 /* ARGSUSED */
9706 int
9707 ffsctl (proc_t p, struct ffsctl_args *uap, __unused int32_t *retval)
9708 {
9709 int error;
9710 vnode_t vp = NULL;
9711 vfs_context_t ctx = vfs_context_current();
9712 int fd = -1;
9713
9714 AUDIT_ARG(fd, uap->fd);
9715 AUDIT_ARG(cmd, uap->cmd);
9716 AUDIT_ARG(value32, uap->options);
9717
9718 /* Get the vnode for the file we are getting info on: */
9719 if ((error = file_vnode(uap->fd, &vp)))
9720 return error;
9721 fd = uap->fd;
9722 if ((error = vnode_getwithref(vp))) {
9723 file_drop(fd);
9724 return error;
9725 }
9726
9727 #if CONFIG_MACF
9728 if ((error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd))) {
9729 file_drop(fd);
9730 vnode_put(vp);
9731 return error;
9732 }
9733 #endif
9734
9735 error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
9736
9737 file_drop(fd);
9738
9739 /*validate vp; fsctl_internal() can drop iocount and reset vp to NULL*/
9740 if (vp) {
9741 vnode_put(vp);
9742 }
9743
9744 return error;
9745 }
9746 /* end of fsctl system call */
9747
9748 /*
9749 * Retrieve the data of an extended attribute.
9750 */
9751 int
9752 getxattr(proc_t p, struct getxattr_args *uap, user_ssize_t *retval)
9753 {
9754 vnode_t vp;
9755 struct nameidata nd;
9756 char attrname[XATTR_MAXNAMELEN+1];
9757 vfs_context_t ctx = vfs_context_current();
9758 uio_t auio = NULL;
9759 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9760 size_t attrsize = 0;
9761 size_t namelen;
9762 u_int32_t nameiflags;
9763 int error;
9764 char uio_buf[ UIO_SIZEOF(1) ];
9765
9766 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
9767 return (EINVAL);
9768
9769 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
9770 NDINIT(&nd, LOOKUP, OP_GETXATTR, nameiflags, spacetype, uap->path, ctx);
9771 if ((error = namei(&nd))) {
9772 return (error);
9773 }
9774 vp = nd.ni_vp;
9775 nameidone(&nd);
9776
9777 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
9778 goto out;
9779 }
9780 if (xattr_protected(attrname)) {
9781 if (!vfs_context_issuser(ctx) || strcmp(attrname, "com.apple.system.Security") != 0) {
9782 error = EPERM;
9783 goto out;
9784 }
9785 }
9786 /*
9787 * the specific check for 0xffffffff is a hack to preserve
9788 * binaray compatibilty in K64 with applications that discovered
9789 * that passing in a buf pointer and a size of -1 resulted in
9790 * just the size of the indicated extended attribute being returned.
9791 * this isn't part of the documented behavior, but because of the
9792 * original implemtation's check for "uap->size > 0", this behavior
9793 * was allowed. In K32 that check turned into a signed comparison
9794 * even though uap->size is unsigned... in K64, we blow by that
9795 * check because uap->size is unsigned and doesn't get sign smeared
9796 * in the munger for a 32 bit user app. we also need to add a
9797 * check to limit the maximum size of the buffer being passed in...
9798 * unfortunately, the underlying fileystems seem to just malloc
9799 * the requested size even if the actual extended attribute is tiny.
9800 * because that malloc is for kernel wired memory, we have to put a
9801 * sane limit on it.
9802 *
9803 * U32 running on K64 will yield 0x00000000ffffffff for uap->size
9804 * U64 running on K64 will yield -1 (64 bits wide)
9805 * U32/U64 running on K32 will yield -1 (32 bits wide)
9806 */
9807 if (uap->size == 0xffffffff || uap->size == (size_t)-1)
9808 goto no_uio;
9809
9810 if (uap->value) {
9811 if (uap->size > (size_t)XATTR_MAXSIZE)
9812 uap->size = XATTR_MAXSIZE;
9813
9814 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
9815 &uio_buf[0], sizeof(uio_buf));
9816 uio_addiov(auio, uap->value, uap->size);
9817 }
9818 no_uio:
9819 error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, ctx);
9820 out:
9821 vnode_put(vp);
9822
9823 if (auio) {
9824 *retval = uap->size - uio_resid(auio);
9825 } else {
9826 *retval = (user_ssize_t)attrsize;
9827 }
9828
9829 return (error);
9830 }
9831
9832 /*
9833 * Retrieve the data of an extended attribute.
9834 */
9835 int
9836 fgetxattr(proc_t p, struct fgetxattr_args *uap, user_ssize_t *retval)
9837 {
9838 vnode_t vp;
9839 char attrname[XATTR_MAXNAMELEN+1];
9840 uio_t auio = NULL;
9841 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9842 size_t attrsize = 0;
9843 size_t namelen;
9844 int error;
9845 char uio_buf[ UIO_SIZEOF(1) ];
9846
9847 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
9848 return (EINVAL);
9849
9850 if ( (error = file_vnode(uap->fd, &vp)) ) {
9851 return (error);
9852 }
9853 if ( (error = vnode_getwithref(vp)) ) {
9854 file_drop(uap->fd);
9855 return(error);
9856 }
9857 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
9858 goto out;
9859 }
9860 if (xattr_protected(attrname)) {
9861 error = EPERM;
9862 goto out;
9863 }
9864 if (uap->value && uap->size > 0) {
9865 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
9866 &uio_buf[0], sizeof(uio_buf));
9867 uio_addiov(auio, uap->value, uap->size);
9868 }
9869
9870 error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, vfs_context_current());
9871 out:
9872 (void)vnode_put(vp);
9873 file_drop(uap->fd);
9874
9875 if (auio) {
9876 *retval = uap->size - uio_resid(auio);
9877 } else {
9878 *retval = (user_ssize_t)attrsize;
9879 }
9880 return (error);
9881 }
9882
9883 /*
9884 * Set the data of an extended attribute.
9885 */
9886 int
9887 setxattr(proc_t p, struct setxattr_args *uap, int *retval)
9888 {
9889 vnode_t vp;
9890 struct nameidata nd;
9891 char attrname[XATTR_MAXNAMELEN+1];
9892 vfs_context_t ctx = vfs_context_current();
9893 uio_t auio = NULL;
9894 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9895 size_t namelen;
9896 u_int32_t nameiflags;
9897 int error;
9898 char uio_buf[ UIO_SIZEOF(1) ];
9899
9900 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
9901 return (EINVAL);
9902
9903 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
9904 if (error == EPERM) {
9905 /* if the string won't fit in attrname, copyinstr emits EPERM */
9906 return (ENAMETOOLONG);
9907 }
9908 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
9909 return error;
9910 }
9911 if (xattr_protected(attrname))
9912 return(EPERM);
9913 if (uap->size != 0 && uap->value == 0) {
9914 return (EINVAL);
9915 }
9916
9917 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
9918 NDINIT(&nd, LOOKUP, OP_SETXATTR, nameiflags, spacetype, uap->path, ctx);
9919 if ((error = namei(&nd))) {
9920 return (error);
9921 }
9922 vp = nd.ni_vp;
9923 nameidone(&nd);
9924
9925 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
9926 &uio_buf[0], sizeof(uio_buf));
9927 uio_addiov(auio, uap->value, uap->size);
9928
9929 error = vn_setxattr(vp, attrname, auio, uap->options, ctx);
9930 #if CONFIG_FSE
9931 if (error == 0) {
9932 add_fsevent(FSE_XATTR_MODIFIED, ctx,
9933 FSE_ARG_VNODE, vp,
9934 FSE_ARG_DONE);
9935 }
9936 #endif
9937 vnode_put(vp);
9938 *retval = 0;
9939 return (error);
9940 }
9941
9942 /*
9943 * Set the data of an extended attribute.
9944 */
9945 int
9946 fsetxattr(proc_t p, struct fsetxattr_args *uap, int *retval)
9947 {
9948 vnode_t vp;
9949 char attrname[XATTR_MAXNAMELEN+1];
9950 uio_t auio = NULL;
9951 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9952 size_t namelen;
9953 int error;
9954 char uio_buf[ UIO_SIZEOF(1) ];
9955 #if CONFIG_FSE
9956 vfs_context_t ctx = vfs_context_current();
9957 #endif
9958
9959 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
9960 return (EINVAL);
9961
9962 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
9963 if (error == EPERM) {
9964 /* if the string won't fit in attrname, copyinstr emits EPERM */
9965 return (ENAMETOOLONG);
9966 }
9967 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
9968 return error;
9969 }
9970 if (xattr_protected(attrname))
9971 return(EPERM);
9972 if (uap->size != 0 && uap->value == 0) {
9973 return (EINVAL);
9974 }
9975 if ( (error = file_vnode(uap->fd, &vp)) ) {
9976 return (error);
9977 }
9978 if ( (error = vnode_getwithref(vp)) ) {
9979 file_drop(uap->fd);
9980 return(error);
9981 }
9982 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
9983 &uio_buf[0], sizeof(uio_buf));
9984 uio_addiov(auio, uap->value, uap->size);
9985
9986 error = vn_setxattr(vp, attrname, auio, uap->options, vfs_context_current());
9987 #if CONFIG_FSE
9988 if (error == 0) {
9989 add_fsevent(FSE_XATTR_MODIFIED, ctx,
9990 FSE_ARG_VNODE, vp,
9991 FSE_ARG_DONE);
9992 }
9993 #endif
9994 vnode_put(vp);
9995 file_drop(uap->fd);
9996 *retval = 0;
9997 return (error);
9998 }
9999
10000 /*
10001 * Remove an extended attribute.
10002 * XXX Code duplication here.
10003 */
10004 int
10005 removexattr(proc_t p, struct removexattr_args *uap, int *retval)
10006 {
10007 vnode_t vp;
10008 struct nameidata nd;
10009 char attrname[XATTR_MAXNAMELEN+1];
10010 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10011 vfs_context_t ctx = vfs_context_current();
10012 size_t namelen;
10013 u_int32_t nameiflags;
10014 int error;
10015
10016 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
10017 return (EINVAL);
10018
10019 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
10020 if (error != 0) {
10021 return (error);
10022 }
10023 if (xattr_protected(attrname))
10024 return(EPERM);
10025 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
10026 NDINIT(&nd, LOOKUP, OP_REMOVEXATTR, nameiflags, spacetype, uap->path, ctx);
10027 if ((error = namei(&nd))) {
10028 return (error);
10029 }
10030 vp = nd.ni_vp;
10031 nameidone(&nd);
10032
10033 error = vn_removexattr(vp, attrname, uap->options, ctx);
10034 #if CONFIG_FSE
10035 if (error == 0) {
10036 add_fsevent(FSE_XATTR_REMOVED, ctx,
10037 FSE_ARG_VNODE, vp,
10038 FSE_ARG_DONE);
10039 }
10040 #endif
10041 vnode_put(vp);
10042 *retval = 0;
10043 return (error);
10044 }
10045
10046 /*
10047 * Remove an extended attribute.
10048 * XXX Code duplication here.
10049 */
10050 int
10051 fremovexattr(__unused proc_t p, struct fremovexattr_args *uap, int *retval)
10052 {
10053 vnode_t vp;
10054 char attrname[XATTR_MAXNAMELEN+1];
10055 size_t namelen;
10056 int error;
10057 #if CONFIG_FSE
10058 vfs_context_t ctx = vfs_context_current();
10059 #endif
10060
10061 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
10062 return (EINVAL);
10063
10064 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
10065 if (error != 0) {
10066 return (error);
10067 }
10068 if (xattr_protected(attrname))
10069 return(EPERM);
10070 if ( (error = file_vnode(uap->fd, &vp)) ) {
10071 return (error);
10072 }
10073 if ( (error = vnode_getwithref(vp)) ) {
10074 file_drop(uap->fd);
10075 return(error);
10076 }
10077
10078 error = vn_removexattr(vp, attrname, uap->options, vfs_context_current());
10079 #if CONFIG_FSE
10080 if (error == 0) {
10081 add_fsevent(FSE_XATTR_REMOVED, ctx,
10082 FSE_ARG_VNODE, vp,
10083 FSE_ARG_DONE);
10084 }
10085 #endif
10086 vnode_put(vp);
10087 file_drop(uap->fd);
10088 *retval = 0;
10089 return (error);
10090 }
10091
10092 /*
10093 * Retrieve the list of extended attribute names.
10094 * XXX Code duplication here.
10095 */
10096 int
10097 listxattr(proc_t p, struct listxattr_args *uap, user_ssize_t *retval)
10098 {
10099 vnode_t vp;
10100 struct nameidata nd;
10101 vfs_context_t ctx = vfs_context_current();
10102 uio_t auio = NULL;
10103 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10104 size_t attrsize = 0;
10105 u_int32_t nameiflags;
10106 int error;
10107 char uio_buf[ UIO_SIZEOF(1) ];
10108
10109 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
10110 return (EINVAL);
10111
10112 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
10113 NDINIT(&nd, LOOKUP, OP_LISTXATTR, nameiflags, spacetype, uap->path, ctx);
10114 if ((error = namei(&nd))) {
10115 return (error);
10116 }
10117 vp = nd.ni_vp;
10118 nameidone(&nd);
10119 if (uap->namebuf != 0 && uap->bufsize > 0) {
10120 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
10121 &uio_buf[0], sizeof(uio_buf));
10122 uio_addiov(auio, uap->namebuf, uap->bufsize);
10123 }
10124
10125 error = vn_listxattr(vp, auio, &attrsize, uap->options, ctx);
10126
10127 vnode_put(vp);
10128 if (auio) {
10129 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
10130 } else {
10131 *retval = (user_ssize_t)attrsize;
10132 }
10133 return (error);
10134 }
10135
10136 /*
10137 * Retrieve the list of extended attribute names.
10138 * XXX Code duplication here.
10139 */
10140 int
10141 flistxattr(proc_t p, struct flistxattr_args *uap, user_ssize_t *retval)
10142 {
10143 vnode_t vp;
10144 uio_t auio = NULL;
10145 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10146 size_t attrsize = 0;
10147 int error;
10148 char uio_buf[ UIO_SIZEOF(1) ];
10149
10150 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
10151 return (EINVAL);
10152
10153 if ( (error = file_vnode(uap->fd, &vp)) ) {
10154 return (error);
10155 }
10156 if ( (error = vnode_getwithref(vp)) ) {
10157 file_drop(uap->fd);
10158 return(error);
10159 }
10160 if (uap->namebuf != 0 && uap->bufsize > 0) {
10161 auio = uio_createwithbuffer(1, 0, spacetype,
10162 UIO_READ, &uio_buf[0], sizeof(uio_buf));
10163 uio_addiov(auio, uap->namebuf, uap->bufsize);
10164 }
10165
10166 error = vn_listxattr(vp, auio, &attrsize, uap->options, vfs_context_current());
10167
10168 vnode_put(vp);
10169 file_drop(uap->fd);
10170 if (auio) {
10171 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
10172 } else {
10173 *retval = (user_ssize_t)attrsize;
10174 }
10175 return (error);
10176 }
10177
10178 static int fsgetpath_internal(
10179 vfs_context_t ctx, int volfs_id, uint64_t objid,
10180 vm_size_t bufsize, caddr_t buf, int *pathlen)
10181 {
10182 int error;
10183 struct mount *mp = NULL;
10184 vnode_t vp;
10185 int length;
10186 int bpflags;
10187
10188 if (bufsize > PAGE_SIZE) {
10189 return (EINVAL);
10190 }
10191
10192 if (buf == NULL) {
10193 return (ENOMEM);
10194 }
10195
10196 if ((mp = mount_lookupby_volfsid(volfs_id, 1)) == NULL) {
10197 error = ENOTSUP; /* unexpected failure */
10198 return ENOTSUP;
10199 }
10200
10201 unionget:
10202 if (objid == 2) {
10203 error = VFS_ROOT(mp, &vp, ctx);
10204 } else {
10205 error = VFS_VGET(mp, (ino64_t)objid, &vp, ctx);
10206 }
10207
10208 if (error == ENOENT && (mp->mnt_flag & MNT_UNION)) {
10209 /*
10210 * If the fileid isn't found and we're in a union
10211 * mount volume, then see if the fileid is in the
10212 * mounted-on volume.
10213 */
10214 struct mount *tmp = mp;
10215 mp = vnode_mount(tmp->mnt_vnodecovered);
10216 vfs_unbusy(tmp);
10217 if (vfs_busy(mp, LK_NOWAIT) == 0)
10218 goto unionget;
10219 } else {
10220 vfs_unbusy(mp);
10221 }
10222
10223 if (error) {
10224 return error;
10225 }
10226
10227 #if CONFIG_MACF
10228 error = mac_vnode_check_fsgetpath(ctx, vp);
10229 if (error) {
10230 vnode_put(vp);
10231 return error;
10232 }
10233 #endif
10234
10235 /* Obtain the absolute path to this vnode. */
10236 bpflags = vfs_context_suser(ctx) ? BUILDPATH_CHECKACCESS : 0;
10237 bpflags |= BUILDPATH_CHECK_MOVED;
10238 error = build_path(vp, buf, bufsize, &length, bpflags, ctx);
10239 vnode_put(vp);
10240
10241 if (error) {
10242 goto out;
10243 }
10244
10245 AUDIT_ARG(text, buf);
10246
10247 if (kdebug_enable) {
10248 long dbg_parms[NUMPARMS];
10249 int dbg_namelen;
10250
10251 dbg_namelen = (int)sizeof(dbg_parms);
10252
10253 if (length < dbg_namelen) {
10254 memcpy((char *)dbg_parms, buf, length);
10255 memset((char *)dbg_parms + length, 0, dbg_namelen - length);
10256
10257 dbg_namelen = length;
10258 } else {
10259 memcpy((char *)dbg_parms, buf + (length - dbg_namelen), dbg_namelen);
10260 }
10261
10262 kdebug_lookup_gen_events(dbg_parms, dbg_namelen, (void *)vp, TRUE);
10263 }
10264
10265 *pathlen = (user_ssize_t)length; /* may be superseded by error */
10266
10267 out:
10268 return (error);
10269 }
10270
10271 /*
10272 * Obtain the full pathname of a file system object by id.
10273 *
10274 * This is a private SPI used by the File Manager.
10275 */
10276 __private_extern__
10277 int
10278 fsgetpath(__unused proc_t p, struct fsgetpath_args *uap, user_ssize_t *retval)
10279 {
10280 vfs_context_t ctx = vfs_context_current();
10281 fsid_t fsid;
10282 char *realpath;
10283 int length;
10284 int error;
10285
10286 if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
10287 return (error);
10288 }
10289 AUDIT_ARG(value32, fsid.val[0]);
10290 AUDIT_ARG(value64, uap->objid);
10291 /* Restrict output buffer size for now. */
10292
10293 if (uap->bufsize > PAGE_SIZE) {
10294 return (EINVAL);
10295 }
10296 MALLOC(realpath, char *, uap->bufsize, M_TEMP, M_WAITOK);
10297 if (realpath == NULL) {
10298 return (ENOMEM);
10299 }
10300
10301 error = fsgetpath_internal(
10302 ctx, fsid.val[0], uap->objid,
10303 uap->bufsize, realpath, &length);
10304
10305 if (error) {
10306 goto out;
10307 }
10308
10309 error = copyout((caddr_t)realpath, uap->buf, length);
10310
10311 *retval = (user_ssize_t)length; /* may be superseded by error */
10312 out:
10313 if (realpath) {
10314 FREE(realpath, M_TEMP);
10315 }
10316 return (error);
10317 }
10318
10319 /*
10320 * Common routine to handle various flavors of statfs data heading out
10321 * to user space.
10322 *
10323 * Returns: 0 Success
10324 * EFAULT
10325 */
10326 static int
10327 munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
10328 user_addr_t bufp, int *sizep, boolean_t is_64_bit,
10329 boolean_t partial_copy)
10330 {
10331 int error;
10332 int my_size, copy_size;
10333
10334 if (is_64_bit) {
10335 struct user64_statfs sfs;
10336 my_size = copy_size = sizeof(sfs);
10337 bzero(&sfs, my_size);
10338 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
10339 sfs.f_type = mp->mnt_vtable->vfc_typenum;
10340 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
10341 sfs.f_bsize = (user64_long_t)sfsp->f_bsize;
10342 sfs.f_iosize = (user64_long_t)sfsp->f_iosize;
10343 sfs.f_blocks = (user64_long_t)sfsp->f_blocks;
10344 sfs.f_bfree = (user64_long_t)sfsp->f_bfree;
10345 sfs.f_bavail = (user64_long_t)sfsp->f_bavail;
10346 sfs.f_files = (user64_long_t)sfsp->f_files;
10347 sfs.f_ffree = (user64_long_t)sfsp->f_ffree;
10348 sfs.f_fsid = sfsp->f_fsid;
10349 sfs.f_owner = sfsp->f_owner;
10350 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
10351 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
10352 } else {
10353 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
10354 }
10355 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
10356 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
10357
10358 if (partial_copy) {
10359 copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
10360 }
10361 error = copyout((caddr_t)&sfs, bufp, copy_size);
10362 }
10363 else {
10364 struct user32_statfs sfs;
10365
10366 my_size = copy_size = sizeof(sfs);
10367 bzero(&sfs, my_size);
10368
10369 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
10370 sfs.f_type = mp->mnt_vtable->vfc_typenum;
10371 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
10372
10373 /*
10374 * It's possible for there to be more than 2^^31 blocks in the filesystem, so we
10375 * have to fudge the numbers here in that case. We inflate the blocksize in order
10376 * to reflect the filesystem size as best we can.
10377 */
10378 if ((sfsp->f_blocks > INT_MAX)
10379 /* Hack for 4061702 . I think the real fix is for Carbon to
10380 * look for some volume capability and not depend on hidden
10381 * semantics agreed between a FS and carbon.
10382 * f_blocks, f_bfree, and f_bavail set to -1 is the trigger
10383 * for Carbon to set bNoVolumeSizes volume attribute.
10384 * Without this the webdavfs files cannot be copied onto
10385 * disk as they look huge. This change should not affect
10386 * XSAN as they should not setting these to -1..
10387 */
10388 && (sfsp->f_blocks != 0xffffffffffffffffULL)
10389 && (sfsp->f_bfree != 0xffffffffffffffffULL)
10390 && (sfsp->f_bavail != 0xffffffffffffffffULL)) {
10391 int shift;
10392
10393 /*
10394 * Work out how far we have to shift the block count down to make it fit.
10395 * Note that it's possible to have to shift so far that the resulting
10396 * blocksize would be unreportably large. At that point, we will clip
10397 * any values that don't fit.
10398 *
10399 * For safety's sake, we also ensure that f_iosize is never reported as
10400 * being smaller than f_bsize.
10401 */
10402 for (shift = 0; shift < 32; shift++) {
10403 if ((sfsp->f_blocks >> shift) <= INT_MAX)
10404 break;
10405 if ((sfsp->f_bsize << (shift + 1)) > INT_MAX)
10406 break;
10407 }
10408 #define __SHIFT_OR_CLIP(x, s) ((((x) >> (s)) > INT_MAX) ? INT_MAX : ((x) >> (s)))
10409 sfs.f_blocks = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_blocks, shift);
10410 sfs.f_bfree = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bfree, shift);
10411 sfs.f_bavail = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bavail, shift);
10412 #undef __SHIFT_OR_CLIP
10413 sfs.f_bsize = (user32_long_t)(sfsp->f_bsize << shift);
10414 sfs.f_iosize = lmax(sfsp->f_iosize, sfsp->f_bsize);
10415 } else {
10416 /* filesystem is small enough to be reported honestly */
10417 sfs.f_bsize = (user32_long_t)sfsp->f_bsize;
10418 sfs.f_iosize = (user32_long_t)sfsp->f_iosize;
10419 sfs.f_blocks = (user32_long_t)sfsp->f_blocks;
10420 sfs.f_bfree = (user32_long_t)sfsp->f_bfree;
10421 sfs.f_bavail = (user32_long_t)sfsp->f_bavail;
10422 }
10423 sfs.f_files = (user32_long_t)sfsp->f_files;
10424 sfs.f_ffree = (user32_long_t)sfsp->f_ffree;
10425 sfs.f_fsid = sfsp->f_fsid;
10426 sfs.f_owner = sfsp->f_owner;
10427 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
10428 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
10429 } else {
10430 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
10431 }
10432 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
10433 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
10434
10435 if (partial_copy) {
10436 copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
10437 }
10438 error = copyout((caddr_t)&sfs, bufp, copy_size);
10439 }
10440
10441 if (sizep != NULL) {
10442 *sizep = my_size;
10443 }
10444 return(error);
10445 }
10446
10447 /*
10448 * copy stat structure into user_stat structure.
10449 */
10450 void munge_user64_stat(struct stat *sbp, struct user64_stat *usbp)
10451 {
10452 bzero(usbp, sizeof(*usbp));
10453
10454 usbp->st_dev = sbp->st_dev;
10455 usbp->st_ino = sbp->st_ino;
10456 usbp->st_mode = sbp->st_mode;
10457 usbp->st_nlink = sbp->st_nlink;
10458 usbp->st_uid = sbp->st_uid;
10459 usbp->st_gid = sbp->st_gid;
10460 usbp->st_rdev = sbp->st_rdev;
10461 #ifndef _POSIX_C_SOURCE
10462 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
10463 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
10464 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
10465 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
10466 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
10467 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
10468 #else
10469 usbp->st_atime = sbp->st_atime;
10470 usbp->st_atimensec = sbp->st_atimensec;
10471 usbp->st_mtime = sbp->st_mtime;
10472 usbp->st_mtimensec = sbp->st_mtimensec;
10473 usbp->st_ctime = sbp->st_ctime;
10474 usbp->st_ctimensec = sbp->st_ctimensec;
10475 #endif
10476 usbp->st_size = sbp->st_size;
10477 usbp->st_blocks = sbp->st_blocks;
10478 usbp->st_blksize = sbp->st_blksize;
10479 usbp->st_flags = sbp->st_flags;
10480 usbp->st_gen = sbp->st_gen;
10481 usbp->st_lspare = sbp->st_lspare;
10482 usbp->st_qspare[0] = sbp->st_qspare[0];
10483 usbp->st_qspare[1] = sbp->st_qspare[1];
10484 }
10485
10486 void munge_user32_stat(struct stat *sbp, struct user32_stat *usbp)
10487 {
10488 bzero(usbp, sizeof(*usbp));
10489
10490 usbp->st_dev = sbp->st_dev;
10491 usbp->st_ino = sbp->st_ino;
10492 usbp->st_mode = sbp->st_mode;
10493 usbp->st_nlink = sbp->st_nlink;
10494 usbp->st_uid = sbp->st_uid;
10495 usbp->st_gid = sbp->st_gid;
10496 usbp->st_rdev = sbp->st_rdev;
10497 #ifndef _POSIX_C_SOURCE
10498 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
10499 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
10500 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
10501 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
10502 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
10503 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
10504 #else
10505 usbp->st_atime = sbp->st_atime;
10506 usbp->st_atimensec = sbp->st_atimensec;
10507 usbp->st_mtime = sbp->st_mtime;
10508 usbp->st_mtimensec = sbp->st_mtimensec;
10509 usbp->st_ctime = sbp->st_ctime;
10510 usbp->st_ctimensec = sbp->st_ctimensec;
10511 #endif
10512 usbp->st_size = sbp->st_size;
10513 usbp->st_blocks = sbp->st_blocks;
10514 usbp->st_blksize = sbp->st_blksize;
10515 usbp->st_flags = sbp->st_flags;
10516 usbp->st_gen = sbp->st_gen;
10517 usbp->st_lspare = sbp->st_lspare;
10518 usbp->st_qspare[0] = sbp->st_qspare[0];
10519 usbp->st_qspare[1] = sbp->st_qspare[1];
10520 }
10521
10522 /*
10523 * copy stat64 structure into user_stat64 structure.
10524 */
10525 void munge_user64_stat64(struct stat64 *sbp, struct user64_stat64 *usbp)
10526 {
10527 bzero(usbp, sizeof(*usbp));
10528
10529 usbp->st_dev = sbp->st_dev;
10530 usbp->st_ino = sbp->st_ino;
10531 usbp->st_mode = sbp->st_mode;
10532 usbp->st_nlink = sbp->st_nlink;
10533 usbp->st_uid = sbp->st_uid;
10534 usbp->st_gid = sbp->st_gid;
10535 usbp->st_rdev = sbp->st_rdev;
10536 #ifndef _POSIX_C_SOURCE
10537 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
10538 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
10539 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
10540 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
10541 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
10542 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
10543 usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
10544 usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
10545 #else
10546 usbp->st_atime = sbp->st_atime;
10547 usbp->st_atimensec = sbp->st_atimensec;
10548 usbp->st_mtime = sbp->st_mtime;
10549 usbp->st_mtimensec = sbp->st_mtimensec;
10550 usbp->st_ctime = sbp->st_ctime;
10551 usbp->st_ctimensec = sbp->st_ctimensec;
10552 usbp->st_birthtime = sbp->st_birthtime;
10553 usbp->st_birthtimensec = sbp->st_birthtimensec;
10554 #endif
10555 usbp->st_size = sbp->st_size;
10556 usbp->st_blocks = sbp->st_blocks;
10557 usbp->st_blksize = sbp->st_blksize;
10558 usbp->st_flags = sbp->st_flags;
10559 usbp->st_gen = sbp->st_gen;
10560 usbp->st_lspare = sbp->st_lspare;
10561 usbp->st_qspare[0] = sbp->st_qspare[0];
10562 usbp->st_qspare[1] = sbp->st_qspare[1];
10563 }
10564
10565 void munge_user32_stat64(struct stat64 *sbp, struct user32_stat64 *usbp)
10566 {
10567 bzero(usbp, sizeof(*usbp));
10568
10569 usbp->st_dev = sbp->st_dev;
10570 usbp->st_ino = sbp->st_ino;
10571 usbp->st_mode = sbp->st_mode;
10572 usbp->st_nlink = sbp->st_nlink;
10573 usbp->st_uid = sbp->st_uid;
10574 usbp->st_gid = sbp->st_gid;
10575 usbp->st_rdev = sbp->st_rdev;
10576 #ifndef _POSIX_C_SOURCE
10577 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
10578 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
10579 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
10580 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
10581 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
10582 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
10583 usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
10584 usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
10585 #else
10586 usbp->st_atime = sbp->st_atime;
10587 usbp->st_atimensec = sbp->st_atimensec;
10588 usbp->st_mtime = sbp->st_mtime;
10589 usbp->st_mtimensec = sbp->st_mtimensec;
10590 usbp->st_ctime = sbp->st_ctime;
10591 usbp->st_ctimensec = sbp->st_ctimensec;
10592 usbp->st_birthtime = sbp->st_birthtime;
10593 usbp->st_birthtimensec = sbp->st_birthtimensec;
10594 #endif
10595 usbp->st_size = sbp->st_size;
10596 usbp->st_blocks = sbp->st_blocks;
10597 usbp->st_blksize = sbp->st_blksize;
10598 usbp->st_flags = sbp->st_flags;
10599 usbp->st_gen = sbp->st_gen;
10600 usbp->st_lspare = sbp->st_lspare;
10601 usbp->st_qspare[0] = sbp->st_qspare[0];
10602 usbp->st_qspare[1] = sbp->st_qspare[1];
10603 }
10604
10605 /*
10606 * Purge buffer cache for simulating cold starts
10607 */
10608 static int vnode_purge_callback(struct vnode *vp, __unused void *cargs)
10609 {
10610 ubc_msync(vp, (off_t)0, ubc_getsize(vp), NULL /* off_t *resid_off */, UBC_PUSHALL | UBC_INVALIDATE);
10611
10612 return VNODE_RETURNED;
10613 }
10614
10615 static int vfs_purge_callback(mount_t mp, __unused void * arg)
10616 {
10617 vnode_iterate(mp, VNODE_WAIT | VNODE_ITERATE_ALL, vnode_purge_callback, NULL);
10618
10619 return VFS_RETURNED;
10620 }
10621
10622 int
10623 vfs_purge(__unused struct proc *p, __unused struct vfs_purge_args *uap, __unused int32_t *retval)
10624 {
10625 if (!kauth_cred_issuser(kauth_cred_get()))
10626 return EPERM;
10627
10628 vfs_iterate(0/* flags */, vfs_purge_callback, NULL);
10629
10630 return 0;
10631 }
10632