]> git.saurik.com Git - apple/xnu.git/blob - bsd/vfs/vfs_syscalls.c
e2b135a7bfc2f9d567bbae06f35ee652c6ceff46
[apple/xnu.git] / bsd / vfs / vfs_syscalls.c
1 /*
2 * Copyright (c) 1995-2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1989, 1993
30 * The Regents of the University of California. All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)vfs_syscalls.c 8.41 (Berkeley) 6/15/95
66 */
67 /*
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
71 * Version 2.0.
72 */
73
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/namei.h>
77 #include <sys/filedesc.h>
78 #include <sys/kernel.h>
79 #include <sys/file_internal.h>
80 #include <sys/stat.h>
81 #include <sys/vnode_internal.h>
82 #include <sys/mount_internal.h>
83 #include <sys/proc_internal.h>
84 #include <sys/kauth.h>
85 #include <sys/uio_internal.h>
86 #include <sys/malloc.h>
87 #include <sys/mman.h>
88 #include <sys/dirent.h>
89 #include <sys/attr.h>
90 #include <sys/sysctl.h>
91 #include <sys/ubc.h>
92 #include <sys/quota.h>
93 #include <sys/kdebug.h>
94 #include <sys/fsevents.h>
95 #include <sys/imgsrc.h>
96 #include <sys/sysproto.h>
97 #include <sys/xattr.h>
98 #include <sys/fcntl.h>
99 #include <sys/fsctl.h>
100 #include <sys/ubc_internal.h>
101 #include <sys/disk.h>
102 #include <machine/cons.h>
103 #include <machine/limits.h>
104 #include <miscfs/specfs/specdev.h>
105
106 #include <security/audit/audit.h>
107 #include <bsm/audit_kevents.h>
108
109 #include <mach/mach_types.h>
110 #include <kern/kern_types.h>
111 #include <kern/kalloc.h>
112 #include <kern/task.h>
113
114 #include <vm/vm_pageout.h>
115
116 #include <libkern/OSAtomic.h>
117 #include <pexpert/pexpert.h>
118
119 #if CONFIG_MACF
120 #include <security/mac.h>
121 #include <security/mac_framework.h>
122 #endif
123
124 #if CONFIG_FSE
125 #define GET_PATH(x) \
126 (x) = get_pathbuff();
127 #define RELEASE_PATH(x) \
128 release_pathbuff(x);
129 #else
130 #define GET_PATH(x) \
131 MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
132 #define RELEASE_PATH(x) \
133 FREE_ZONE((x), MAXPATHLEN, M_NAMEI);
134 #endif /* CONFIG_FSE */
135
136 /* struct for checkdirs iteration */
137 struct cdirargs {
138 vnode_t olddp;
139 vnode_t newdp;
140 };
141 /* callback for checkdirs iteration */
142 static int checkdirs_callback(proc_t p, void * arg);
143
144 static int change_dir(struct nameidata *ndp, vfs_context_t ctx);
145 static int checkdirs(vnode_t olddp, vfs_context_t ctx);
146 void enablequotas(struct mount *mp, vfs_context_t ctx);
147 static int getfsstat_callback(mount_t mp, void * arg);
148 static int getutimes(user_addr_t usrtvp, struct timespec *tsp);
149 static int setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts, int nullflag);
150 static int sync_callback(mount_t, void *);
151 static void sync_thread(void *, __unused wait_result_t);
152 static int sync_async(int);
153 static int munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
154 user_addr_t bufp, int *sizep, boolean_t is_64_bit,
155 boolean_t partial_copy);
156 static int statfs64_common(struct mount *mp, struct vfsstatfs *sfsp,
157 user_addr_t bufp);
158 static int fsync_common(proc_t p, struct fsync_args *uap, int flags);
159 static int mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
160 struct componentname *cnp, user_addr_t fsmountargs,
161 int flags, uint32_t internal_flags, char *labelstr, boolean_t kernelmount,
162 vfs_context_t ctx);
163 void vfs_notify_mount(vnode_t pdvp);
164
165 int prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth);
166
167 struct fd_vn_data * fg_vn_data_alloc(void);
168
169 /*
170 * Max retries for ENOENT returns from vn_authorize_{rmdir, unlink, rename}
171 * Concurrent lookups (or lookups by ids) on hard links can cause the
172 * vn_getpath (which does not re-enter the filesystem as vn_getpath_fsenter
173 * does) to return ENOENT as the path cannot be returned from the name cache
174 * alone. We have no option but to retry and hope to get one namei->reverse path
175 * generation done without an intervening lookup, lookup by id on the hard link
176 * item. This is only an issue for MAC hooks which cannot reenter the filesystem
177 * which currently are the MAC hooks for rename, unlink and rmdir.
178 */
179 #define MAX_AUTHORIZE_ENOENT_RETRIES 1024
180
181 static int rmdirat_internal(vfs_context_t, int, user_addr_t, enum uio_seg);
182
183 static int fsgetpath_internal(vfs_context_t, int, uint64_t, vm_size_t, caddr_t, int *);
184
185 #ifdef CONFIG_IMGSRC_ACCESS
186 static int authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx);
187 static int place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx);
188 static void undo_place_on_covered_vp(mount_t mp, vnode_t vp);
189 static int mount_begin_update(mount_t mp, vfs_context_t ctx, int flags);
190 static void mount_end_update(mount_t mp);
191 static int relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp, const char *fsname, vfs_context_t ctx, boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index);
192 #endif /* CONFIG_IMGSRC_ACCESS */
193
194 int (*union_dircheckp)(struct vnode **, struct fileproc *, vfs_context_t);
195
196 __private_extern__
197 int sync_internal(void);
198
199 __private_extern__
200 int unlink1(vfs_context_t, vnode_t, user_addr_t, enum uio_seg, int);
201
202 extern lck_grp_t *fd_vn_lck_grp;
203 extern lck_grp_attr_t *fd_vn_lck_grp_attr;
204 extern lck_attr_t *fd_vn_lck_attr;
205
206 /*
207 * incremented each time a mount or unmount operation occurs
208 * used to invalidate the cached value of the rootvp in the
209 * mount structure utilized by cache_lookup_path
210 */
211 uint32_t mount_generation = 0;
212
213 /* counts number of mount and unmount operations */
214 unsigned int vfs_nummntops=0;
215
216 extern const struct fileops vnops;
217 #if CONFIG_APPLEDOUBLE
218 extern errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *);
219 #endif /* CONFIG_APPLEDOUBLE */
220
221 typedef uint32_t vfs_rename_flags_t;
222 #if CONFIG_SECLUDED_RENAME
223 enum {
224 VFS_SECLUDE_RENAME = 0x00000001
225 };
226 #endif
227
228 /*
229 * Virtual File System System Calls
230 */
231
232 #if NFSCLIENT || DEVFS
233 /*
234 * Private in-kernel mounting spi (NFS only, not exported)
235 */
236 __private_extern__
237 boolean_t
238 vfs_iskernelmount(mount_t mp)
239 {
240 return ((mp->mnt_kern_flag & MNTK_KERNEL_MOUNT) ? TRUE : FALSE);
241 }
242
243 __private_extern__
244 int
245 kernel_mount(char *fstype, vnode_t pvp, vnode_t vp, const char *path,
246 void *data, __unused size_t datalen, int syscall_flags, __unused uint32_t kern_flags, vfs_context_t ctx)
247 {
248 struct nameidata nd;
249 boolean_t did_namei;
250 int error;
251
252 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
253 UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx);
254
255 /*
256 * Get the vnode to be covered if it's not supplied
257 */
258 if (vp == NULLVP) {
259 error = namei(&nd);
260 if (error)
261 return (error);
262 vp = nd.ni_vp;
263 pvp = nd.ni_dvp;
264 did_namei = TRUE;
265 } else {
266 char *pnbuf = CAST_DOWN(char *, path);
267
268 nd.ni_cnd.cn_pnbuf = pnbuf;
269 nd.ni_cnd.cn_pnlen = strlen(pnbuf) + 1;
270 did_namei = FALSE;
271 }
272
273 error = mount_common(fstype, pvp, vp, &nd.ni_cnd, CAST_USER_ADDR_T(data),
274 syscall_flags, kern_flags, NULL, TRUE, ctx);
275
276 if (did_namei) {
277 vnode_put(vp);
278 vnode_put(pvp);
279 nameidone(&nd);
280 }
281
282 return (error);
283 }
284 #endif /* NFSCLIENT || DEVFS */
285
286 /*
287 * Mount a file system.
288 */
289 /* ARGSUSED */
290 int
291 mount(proc_t p, struct mount_args *uap, __unused int32_t *retval)
292 {
293 struct __mac_mount_args muap;
294
295 muap.type = uap->type;
296 muap.path = uap->path;
297 muap.flags = uap->flags;
298 muap.data = uap->data;
299 muap.mac_p = USER_ADDR_NULL;
300 return (__mac_mount(p, &muap, retval));
301 }
302
303 void
304 vfs_notify_mount(vnode_t pdvp)
305 {
306 vfs_event_signal(NULL, VQ_MOUNT, (intptr_t)NULL);
307 lock_vnode_and_post(pdvp, NOTE_WRITE);
308 }
309
310 /*
311 * __mac_mount:
312 * Mount a file system taking into account MAC label behavior.
313 * See mount(2) man page for more information
314 *
315 * Parameters: p Process requesting the mount
316 * uap User argument descriptor (see below)
317 * retval (ignored)
318 *
319 * Indirect: uap->type Filesystem type
320 * uap->path Path to mount
321 * uap->data Mount arguments
322 * uap->mac_p MAC info
323 * uap->flags Mount flags
324 *
325 *
326 * Returns: 0 Success
327 * !0 Not success
328 */
329 boolean_t root_fs_upgrade_try = FALSE;
330
331 int
332 __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int32_t *retval)
333 {
334 vnode_t pvp = NULL;
335 vnode_t vp = NULL;
336 int need_nameidone = 0;
337 vfs_context_t ctx = vfs_context_current();
338 char fstypename[MFSNAMELEN];
339 struct nameidata nd;
340 size_t dummy=0;
341 char *labelstr = NULL;
342 int flags = uap->flags;
343 int error;
344 #if CONFIG_IMGSRC_ACCESS || CONFIG_MACF
345 boolean_t is_64bit = IS_64BIT_PROCESS(p);
346 #else
347 #pragma unused(p)
348 #endif
349 /*
350 * Get the fs type name from user space
351 */
352 error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy);
353 if (error)
354 return (error);
355
356 /*
357 * Get the vnode to be covered
358 */
359 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
360 UIO_USERSPACE, uap->path, ctx);
361 error = namei(&nd);
362 if (error) {
363 goto out;
364 }
365 need_nameidone = 1;
366 vp = nd.ni_vp;
367 pvp = nd.ni_dvp;
368
369 #ifdef CONFIG_IMGSRC_ACCESS
370 /* Mounting image source cannot be batched with other operations */
371 if (flags == MNT_IMGSRC_BY_INDEX) {
372 error = relocate_imageboot_source(pvp, vp, &nd.ni_cnd, fstypename,
373 ctx, is_64bit, uap->data, (flags == MNT_IMGSRC_BY_INDEX));
374 goto out;
375 }
376 #endif /* CONFIG_IMGSRC_ACCESS */
377
378 #if CONFIG_MACF
379 /*
380 * Get the label string (if any) from user space
381 */
382 if (uap->mac_p != USER_ADDR_NULL) {
383 struct user_mac mac;
384 size_t ulen = 0;
385
386 if (is_64bit) {
387 struct user64_mac mac64;
388 error = copyin(uap->mac_p, &mac64, sizeof(mac64));
389 mac.m_buflen = mac64.m_buflen;
390 mac.m_string = mac64.m_string;
391 } else {
392 struct user32_mac mac32;
393 error = copyin(uap->mac_p, &mac32, sizeof(mac32));
394 mac.m_buflen = mac32.m_buflen;
395 mac.m_string = mac32.m_string;
396 }
397 if (error)
398 goto out;
399 if ((mac.m_buflen > MAC_MAX_LABEL_BUF_LEN) ||
400 (mac.m_buflen < 2)) {
401 error = EINVAL;
402 goto out;
403 }
404 MALLOC(labelstr, char *, mac.m_buflen, M_MACTEMP, M_WAITOK);
405 error = copyinstr(mac.m_string, labelstr, mac.m_buflen, &ulen);
406 if (error) {
407 goto out;
408 }
409 AUDIT_ARG(mac_string, labelstr);
410 }
411 #endif /* CONFIG_MACF */
412
413 AUDIT_ARG(fflags, flags);
414
415 if ((vp->v_flag & VROOT) &&
416 (vp->v_mount->mnt_flag & MNT_ROOTFS)) {
417 if (!(flags & MNT_UNION)) {
418 flags |= MNT_UPDATE;
419 }
420 else {
421 /*
422 * For a union mount on '/', treat it as fresh
423 * mount instead of update.
424 * Otherwise, union mouting on '/' used to panic the
425 * system before, since mnt_vnodecovered was found to
426 * be NULL for '/' which is required for unionlookup
427 * after it gets ENOENT on union mount.
428 */
429 flags = (flags & ~(MNT_UPDATE));
430 }
431
432 #ifdef SECURE_KERNEL
433 if ((flags & MNT_RDONLY) == 0) {
434 /* Release kernels are not allowed to mount "/" as rw */
435 error = EPERM;
436 goto out;
437 }
438 #endif
439 /*
440 * See 7392553 for more details on why this check exists.
441 * Suffice to say: If this check is ON and something tries
442 * to mount the rootFS RW, we'll turn off the codesign
443 * bitmap optimization.
444 */
445 #if CHECK_CS_VALIDATION_BITMAP
446 if ((flags & MNT_RDONLY) == 0 ) {
447 root_fs_upgrade_try = TRUE;
448 }
449 #endif
450 }
451
452 error = mount_common(fstypename, pvp, vp, &nd.ni_cnd, uap->data, flags, 0,
453 labelstr, FALSE, ctx);
454
455 out:
456
457 #if CONFIG_MACF
458 if (labelstr)
459 FREE(labelstr, M_MACTEMP);
460 #endif /* CONFIG_MACF */
461
462 if (vp) {
463 vnode_put(vp);
464 }
465 if (pvp) {
466 vnode_put(pvp);
467 }
468 if (need_nameidone) {
469 nameidone(&nd);
470 }
471
472 return (error);
473 }
474
475 /*
476 * common mount implementation (final stage of mounting)
477
478 * Arguments:
479 * fstypename file system type (ie it's vfs name)
480 * pvp parent of covered vnode
481 * vp covered vnode
482 * cnp component name (ie path) of covered vnode
483 * flags generic mount flags
484 * fsmountargs file system specific data
485 * labelstr optional MAC label
486 * kernelmount TRUE for mounts initiated from inside the kernel
487 * ctx caller's context
488 */
489 static int
490 mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
491 struct componentname *cnp, user_addr_t fsmountargs, int flags, uint32_t internal_flags,
492 char *labelstr, boolean_t kernelmount, vfs_context_t ctx)
493 {
494 #if !CONFIG_MACF
495 #pragma unused(labelstr)
496 #endif
497 struct vnode *devvp = NULLVP;
498 struct vnode *device_vnode = NULLVP;
499 #if CONFIG_MACF
500 struct vnode *rvp;
501 #endif
502 struct mount *mp;
503 struct vfstable *vfsp = (struct vfstable *)0;
504 struct proc *p = vfs_context_proc(ctx);
505 int error, flag = 0;
506 user_addr_t devpath = USER_ADDR_NULL;
507 int ronly = 0;
508 int mntalloc = 0;
509 boolean_t vfsp_ref = FALSE;
510 boolean_t is_rwlock_locked = FALSE;
511 boolean_t did_rele = FALSE;
512 boolean_t have_usecount = FALSE;
513
514 /*
515 * Process an update for an existing mount
516 */
517 if (flags & MNT_UPDATE) {
518 if ((vp->v_flag & VROOT) == 0) {
519 error = EINVAL;
520 goto out1;
521 }
522 mp = vp->v_mount;
523
524 /* unmount in progress return error */
525 mount_lock_spin(mp);
526 if (mp->mnt_lflag & MNT_LUNMOUNT) {
527 mount_unlock(mp);
528 error = EBUSY;
529 goto out1;
530 }
531 mount_unlock(mp);
532 lck_rw_lock_exclusive(&mp->mnt_rwlock);
533 is_rwlock_locked = TRUE;
534 /*
535 * We only allow the filesystem to be reloaded if it
536 * is currently mounted read-only.
537 */
538 if ((flags & MNT_RELOAD) &&
539 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
540 error = ENOTSUP;
541 goto out1;
542 }
543
544 /*
545 * If content protection is enabled, update mounts are not
546 * allowed to turn it off.
547 */
548 if ((mp->mnt_flag & MNT_CPROTECT) &&
549 ((flags & MNT_CPROTECT) == 0)) {
550 error = EINVAL;
551 goto out1;
552 }
553
554 #ifdef CONFIG_IMGSRC_ACCESS
555 /* Can't downgrade the backer of the root FS */
556 if ((mp->mnt_kern_flag & MNTK_BACKS_ROOT) &&
557 (!vfs_isrdonly(mp)) && (flags & MNT_RDONLY)) {
558 error = ENOTSUP;
559 goto out1;
560 }
561 #endif /* CONFIG_IMGSRC_ACCESS */
562
563 /*
564 * Only root, or the user that did the original mount is
565 * permitted to update it.
566 */
567 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
568 (error = suser(vfs_context_ucred(ctx), &p->p_acflag))) {
569 goto out1;
570 }
571 #if CONFIG_MACF
572 error = mac_mount_check_remount(ctx, mp);
573 if (error != 0) {
574 goto out1;
575 }
576 #endif
577 /*
578 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
579 * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
580 */
581 if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
582 flags |= MNT_NOSUID | MNT_NODEV;
583 if (mp->mnt_flag & MNT_NOEXEC)
584 flags |= MNT_NOEXEC;
585 }
586 flag = mp->mnt_flag;
587
588
589
590 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
591
592 vfsp = mp->mnt_vtable;
593 goto update;
594 }
595 /*
596 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
597 * MNT_NOEXEC if mount point is already MNT_NOEXEC.
598 */
599 if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
600 flags |= MNT_NOSUID | MNT_NODEV;
601 if (vp->v_mount->mnt_flag & MNT_NOEXEC)
602 flags |= MNT_NOEXEC;
603 }
604
605 /* XXXAUDIT: Should we capture the type on the error path as well? */
606 AUDIT_ARG(text, fstypename);
607 mount_list_lock();
608 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
609 if (!strncmp(vfsp->vfc_name, fstypename, MFSNAMELEN)) {
610 vfsp->vfc_refcount++;
611 vfsp_ref = TRUE;
612 break;
613 }
614 mount_list_unlock();
615 if (vfsp == NULL) {
616 error = ENODEV;
617 goto out1;
618 }
619
620 /*
621 * VFC_VFSLOCALARGS is not currently supported for kernel mounts
622 */
623 if (kernelmount && (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS)) {
624 error = EINVAL; /* unsupported request */
625 goto out1;
626 }
627
628 error = prepare_coveredvp(vp, ctx, cnp, fstypename, ((internal_flags & KERNEL_MOUNT_NOAUTH) != 0));
629 if (error != 0) {
630 goto out1;
631 }
632
633 /*
634 * Allocate and initialize the filesystem (mount_t)
635 */
636 MALLOC_ZONE(mp, struct mount *, (u_int32_t)sizeof(struct mount),
637 M_MOUNT, M_WAITOK);
638 bzero((char *)mp, (u_int32_t)sizeof(struct mount));
639 mntalloc = 1;
640
641 /* Initialize the default IO constraints */
642 mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
643 mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
644 mp->mnt_maxsegreadsize = mp->mnt_maxreadcnt;
645 mp->mnt_maxsegwritesize = mp->mnt_maxwritecnt;
646 mp->mnt_devblocksize = DEV_BSIZE;
647 mp->mnt_alignmentmask = PAGE_MASK;
648 mp->mnt_ioqueue_depth = MNT_DEFAULT_IOQUEUE_DEPTH;
649 mp->mnt_ioscale = 1;
650 mp->mnt_ioflags = 0;
651 mp->mnt_realrootvp = NULLVP;
652 mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL;
653
654 TAILQ_INIT(&mp->mnt_vnodelist);
655 TAILQ_INIT(&mp->mnt_workerqueue);
656 TAILQ_INIT(&mp->mnt_newvnodes);
657 mount_lock_init(mp);
658 lck_rw_lock_exclusive(&mp->mnt_rwlock);
659 is_rwlock_locked = TRUE;
660 mp->mnt_op = vfsp->vfc_vfsops;
661 mp->mnt_vtable = vfsp;
662 //mp->mnt_stat.f_type = vfsp->vfc_typenum;
663 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
664 strlcpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN);
665 strlcpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
666 mp->mnt_vnodecovered = vp;
667 mp->mnt_vfsstat.f_owner = kauth_cred_getuid(vfs_context_ucred(ctx));
668 mp->mnt_throttle_mask = LOWPRI_MAX_NUM_DEV - 1;
669 mp->mnt_devbsdunit = 0;
670
671 /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
672 vfs_setowner(mp, KAUTH_UID_NONE, KAUTH_GID_NONE);
673
674 #if NFSCLIENT || DEVFS
675 if (kernelmount)
676 mp->mnt_kern_flag |= MNTK_KERNEL_MOUNT;
677 if ((internal_flags & KERNEL_MOUNT_PERMIT_UNMOUNT) != 0)
678 mp->mnt_kern_flag |= MNTK_PERMIT_UNMOUNT;
679 #endif /* NFSCLIENT || DEVFS */
680
681 update:
682 /*
683 * Set the mount level flags.
684 */
685 if (flags & MNT_RDONLY)
686 mp->mnt_flag |= MNT_RDONLY;
687 else if (mp->mnt_flag & MNT_RDONLY) {
688 // disallow read/write upgrades of file systems that
689 // had the TYPENAME_OVERRIDE feature set.
690 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
691 error = EPERM;
692 goto out1;
693 }
694 mp->mnt_kern_flag |= MNTK_WANTRDWR;
695 }
696 mp->mnt_flag &= ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
697 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
698 MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
699 MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
700 MNT_QUARANTINE | MNT_CPROTECT);
701 mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
702 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
703 MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
704 MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
705 MNT_QUARANTINE | MNT_CPROTECT);
706
707 #if CONFIG_MACF
708 if (flags & MNT_MULTILABEL) {
709 if (vfsp->vfc_vfsflags & VFC_VFSNOMACLABEL) {
710 error = EINVAL;
711 goto out1;
712 }
713 mp->mnt_flag |= MNT_MULTILABEL;
714 }
715 #endif
716 /*
717 * Process device path for local file systems if requested
718 */
719 if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
720 if (vfs_context_is64bit(ctx)) {
721 if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
722 goto out1;
723 fsmountargs += sizeof(devpath);
724 } else {
725 user32_addr_t tmp;
726 if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
727 goto out1;
728 /* munge into LP64 addr */
729 devpath = CAST_USER_ADDR_T(tmp);
730 fsmountargs += sizeof(tmp);
731 }
732
733 /* Lookup device and authorize access to it */
734 if ((devpath)) {
735 struct nameidata nd;
736
737 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW, UIO_USERSPACE, devpath, ctx);
738 if ( (error = namei(&nd)) )
739 goto out1;
740
741 strncpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
742 devvp = nd.ni_vp;
743
744 nameidone(&nd);
745
746 if (devvp->v_type != VBLK) {
747 error = ENOTBLK;
748 goto out2;
749 }
750 if (major(devvp->v_rdev) >= nblkdev) {
751 error = ENXIO;
752 goto out2;
753 }
754 /*
755 * If mount by non-root, then verify that user has necessary
756 * permissions on the device.
757 */
758 if (suser(vfs_context_ucred(ctx), NULL) != 0) {
759 mode_t accessmode = KAUTH_VNODE_READ_DATA;
760
761 if ((mp->mnt_flag & MNT_RDONLY) == 0)
762 accessmode |= KAUTH_VNODE_WRITE_DATA;
763 if ((error = vnode_authorize(devvp, NULL, accessmode, ctx)) != 0)
764 goto out2;
765 }
766 }
767 /* On first mount, preflight and open device */
768 if (devpath && ((flags & MNT_UPDATE) == 0)) {
769 if ( (error = vnode_ref(devvp)) )
770 goto out2;
771 /*
772 * Disallow multiple mounts of the same device.
773 * Disallow mounting of a device that is currently in use
774 * (except for root, which might share swap device for miniroot).
775 * Flush out any old buffers remaining from a previous use.
776 */
777 if ( (error = vfs_mountedon(devvp)) )
778 goto out3;
779
780 if (vcount(devvp) > 1 && !(vfs_flags(mp) & MNT_ROOTFS)) {
781 error = EBUSY;
782 goto out3;
783 }
784 if ( (error = VNOP_FSYNC(devvp, MNT_WAIT, ctx)) ) {
785 error = ENOTBLK;
786 goto out3;
787 }
788 if ( (error = buf_invalidateblks(devvp, BUF_WRITE_DATA, 0, 0)) )
789 goto out3;
790
791 ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
792 #if CONFIG_MACF
793 error = mac_vnode_check_open(ctx,
794 devvp,
795 ronly ? FREAD : FREAD|FWRITE);
796 if (error)
797 goto out3;
798 #endif /* MAC */
799 if ( (error = VNOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, ctx)) )
800 goto out3;
801
802 mp->mnt_devvp = devvp;
803 device_vnode = devvp;
804
805 } else if ((mp->mnt_flag & MNT_RDONLY) &&
806 (mp->mnt_kern_flag & MNTK_WANTRDWR) &&
807 (device_vnode = mp->mnt_devvp)) {
808 dev_t dev;
809 int maj;
810 /*
811 * If upgrade to read-write by non-root, then verify
812 * that user has necessary permissions on the device.
813 */
814 vnode_getalways(device_vnode);
815
816 if (suser(vfs_context_ucred(ctx), NULL) &&
817 (error = vnode_authorize(device_vnode, NULL,
818 KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA,
819 ctx)) != 0) {
820 vnode_put(device_vnode);
821 goto out2;
822 }
823
824 /* Tell the device that we're upgrading */
825 dev = (dev_t)device_vnode->v_rdev;
826 maj = major(dev);
827
828 if ((u_int)maj >= (u_int)nblkdev)
829 panic("Volume mounted on a device with invalid major number.");
830
831 error = bdevsw[maj].d_open(dev, FREAD | FWRITE, S_IFBLK, p);
832 vnode_put(device_vnode);
833 device_vnode = NULLVP;
834 if (error != 0) {
835 goto out2;
836 }
837 }
838 }
839 #if CONFIG_MACF
840 if ((flags & MNT_UPDATE) == 0) {
841 mac_mount_label_init(mp);
842 mac_mount_label_associate(ctx, mp);
843 }
844 if (labelstr) {
845 if ((flags & MNT_UPDATE) != 0) {
846 error = mac_mount_check_label_update(ctx, mp);
847 if (error != 0)
848 goto out3;
849 }
850 }
851 #endif
852 /*
853 * Mount the filesystem.
854 */
855 error = VFS_MOUNT(mp, device_vnode, fsmountargs, ctx);
856
857 if (flags & MNT_UPDATE) {
858 if (mp->mnt_kern_flag & MNTK_WANTRDWR)
859 mp->mnt_flag &= ~MNT_RDONLY;
860 mp->mnt_flag &=~
861 (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
862 mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
863 if (error)
864 mp->mnt_flag = flag; /* restore flag value */
865 vfs_event_signal(NULL, VQ_UPDATE, (intptr_t)NULL);
866 lck_rw_done(&mp->mnt_rwlock);
867 is_rwlock_locked = FALSE;
868 if (!error)
869 enablequotas(mp, ctx);
870 goto exit;
871 }
872
873 /*
874 * Put the new filesystem on the mount list after root.
875 */
876 if (error == 0) {
877 struct vfs_attr vfsattr;
878 #if CONFIG_MACF
879 if (vfs_flags(mp) & MNT_MULTILABEL) {
880 error = VFS_ROOT(mp, &rvp, ctx);
881 if (error) {
882 printf("%s() VFS_ROOT returned %d\n", __func__, error);
883 goto out3;
884 }
885 error = vnode_label(mp, NULL, rvp, NULL, 0, ctx);
886 /*
887 * drop reference provided by VFS_ROOT
888 */
889 vnode_put(rvp);
890
891 if (error)
892 goto out3;
893 }
894 #endif /* MAC */
895
896 vnode_lock_spin(vp);
897 CLR(vp->v_flag, VMOUNT);
898 vp->v_mountedhere = mp;
899 vnode_unlock(vp);
900
901 /*
902 * taking the name_cache_lock exclusively will
903 * insure that everyone is out of the fast path who
904 * might be trying to use a now stale copy of
905 * vp->v_mountedhere->mnt_realrootvp
906 * bumping mount_generation causes the cached values
907 * to be invalidated
908 */
909 name_cache_lock();
910 mount_generation++;
911 name_cache_unlock();
912
913 error = vnode_ref(vp);
914 if (error != 0) {
915 goto out4;
916 }
917
918 have_usecount = TRUE;
919
920 error = checkdirs(vp, ctx);
921 if (error != 0) {
922 /* Unmount the filesystem as cdir/rdirs cannot be updated */
923 goto out4;
924 }
925 /*
926 * there is no cleanup code here so I have made it void
927 * we need to revisit this
928 */
929 (void)VFS_START(mp, 0, ctx);
930
931 if (mount_list_add(mp) != 0) {
932 /*
933 * The system is shutting down trying to umount
934 * everything, so fail with a plausible errno.
935 */
936 error = EBUSY;
937 goto out4;
938 }
939 lck_rw_done(&mp->mnt_rwlock);
940 is_rwlock_locked = FALSE;
941
942 /* Check if this mounted file system supports EAs or named streams. */
943 /* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */
944 VFSATTR_INIT(&vfsattr);
945 VFSATTR_WANTED(&vfsattr, f_capabilities);
946 if (strncmp(mp->mnt_vfsstat.f_fstypename, "webdav", sizeof("webdav")) != 0 &&
947 vfs_getattr(mp, &vfsattr, ctx) == 0 &&
948 VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) {
949 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR) &&
950 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR)) {
951 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
952 }
953 #if NAMEDSTREAMS
954 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS) &&
955 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS)) {
956 mp->mnt_kern_flag |= MNTK_NAMED_STREAMS;
957 }
958 #endif
959 /* Check if this file system supports path from id lookups. */
960 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID) &&
961 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID)) {
962 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
963 } else if (mp->mnt_flag & MNT_DOVOLFS) {
964 /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */
965 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
966 }
967 }
968 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSNATIVEXATTR) {
969 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
970 }
971 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSPREFLIGHT) {
972 mp->mnt_kern_flag |= MNTK_UNMOUNT_PREFLIGHT;
973 }
974 /* increment the operations count */
975 OSAddAtomic(1, &vfs_nummntops);
976 enablequotas(mp, ctx);
977
978 if (device_vnode) {
979 device_vnode->v_specflags |= SI_MOUNTEDON;
980
981 /*
982 * cache the IO attributes for the underlying physical media...
983 * an error return indicates the underlying driver doesn't
984 * support all the queries necessary... however, reasonable
985 * defaults will have been set, so no reason to bail or care
986 */
987 vfs_init_io_attributes(device_vnode, mp);
988 }
989
990 /* Now that mount is setup, notify the listeners */
991 vfs_notify_mount(pvp);
992 } else {
993 /* If we fail a fresh mount, there should be no vnodes left hooked into the mountpoint. */
994 if (mp->mnt_vnodelist.tqh_first != NULL) {
995 panic("mount_common(): mount of %s filesystem failed with %d, but vnode list is not empty.",
996 mp->mnt_vtable->vfc_name, error);
997 }
998
999 vnode_lock_spin(vp);
1000 CLR(vp->v_flag, VMOUNT);
1001 vnode_unlock(vp);
1002 mount_list_lock();
1003 mp->mnt_vtable->vfc_refcount--;
1004 mount_list_unlock();
1005
1006 if (device_vnode ) {
1007 vnode_rele(device_vnode);
1008 VNOP_CLOSE(device_vnode, ronly ? FREAD : FREAD|FWRITE, ctx);
1009 }
1010 lck_rw_done(&mp->mnt_rwlock);
1011 is_rwlock_locked = FALSE;
1012
1013 /*
1014 * if we get here, we have a mount structure that needs to be freed,
1015 * but since the coveredvp hasn't yet been updated to point at it,
1016 * no need to worry about other threads holding a crossref on this mp
1017 * so it's ok to just free it
1018 */
1019 mount_lock_destroy(mp);
1020 #if CONFIG_MACF
1021 mac_mount_label_destroy(mp);
1022 #endif
1023 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1024 }
1025 exit:
1026 /*
1027 * drop I/O count on the device vp if there was one
1028 */
1029 if (devpath && devvp)
1030 vnode_put(devvp);
1031
1032 return(error);
1033
1034 /* Error condition exits */
1035 out4:
1036 (void)VFS_UNMOUNT(mp, MNT_FORCE, ctx);
1037
1038 /*
1039 * If the mount has been placed on the covered vp,
1040 * it may have been discovered by now, so we have
1041 * to treat this just like an unmount
1042 */
1043 mount_lock_spin(mp);
1044 mp->mnt_lflag |= MNT_LDEAD;
1045 mount_unlock(mp);
1046
1047 if (device_vnode != NULLVP) {
1048 vnode_rele(device_vnode);
1049 VNOP_CLOSE(device_vnode, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
1050 ctx);
1051 did_rele = TRUE;
1052 }
1053
1054 vnode_lock_spin(vp);
1055
1056 mp->mnt_crossref++;
1057 vp->v_mountedhere = (mount_t) 0;
1058
1059 vnode_unlock(vp);
1060
1061 if (have_usecount) {
1062 vnode_rele(vp);
1063 }
1064 out3:
1065 if (devpath && ((flags & MNT_UPDATE) == 0) && (!did_rele))
1066 vnode_rele(devvp);
1067 out2:
1068 if (devpath && devvp)
1069 vnode_put(devvp);
1070 out1:
1071 /* Release mnt_rwlock only when it was taken */
1072 if (is_rwlock_locked == TRUE) {
1073 lck_rw_done(&mp->mnt_rwlock);
1074 }
1075
1076 if (mntalloc) {
1077 if (mp->mnt_crossref)
1078 mount_dropcrossref(mp, vp, 0);
1079 else {
1080 mount_lock_destroy(mp);
1081 #if CONFIG_MACF
1082 mac_mount_label_destroy(mp);
1083 #endif
1084 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1085 }
1086 }
1087 if (vfsp_ref) {
1088 mount_list_lock();
1089 vfsp->vfc_refcount--;
1090 mount_list_unlock();
1091 }
1092
1093 return(error);
1094 }
1095
1096 /*
1097 * Flush in-core data, check for competing mount attempts,
1098 * and set VMOUNT
1099 */
1100 int
1101 prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth)
1102 {
1103 #if !CONFIG_MACF
1104 #pragma unused(cnp,fsname)
1105 #endif
1106 struct vnode_attr va;
1107 int error;
1108
1109 if (!skip_auth) {
1110 /*
1111 * If the user is not root, ensure that they own the directory
1112 * onto which we are attempting to mount.
1113 */
1114 VATTR_INIT(&va);
1115 VATTR_WANTED(&va, va_uid);
1116 if ((error = vnode_getattr(vp, &va, ctx)) ||
1117 (va.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
1118 (!vfs_context_issuser(ctx)))) {
1119 error = EPERM;
1120 goto out;
1121 }
1122 }
1123
1124 if ( (error = VNOP_FSYNC(vp, MNT_WAIT, ctx)) )
1125 goto out;
1126
1127 if ( (error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0)) )
1128 goto out;
1129
1130 if (vp->v_type != VDIR) {
1131 error = ENOTDIR;
1132 goto out;
1133 }
1134
1135 if (ISSET(vp->v_flag, VMOUNT) && (vp->v_mountedhere != NULL)) {
1136 error = EBUSY;
1137 goto out;
1138 }
1139
1140 #if CONFIG_MACF
1141 error = mac_mount_check_mount(ctx, vp,
1142 cnp, fsname);
1143 if (error != 0)
1144 goto out;
1145 #endif
1146
1147 vnode_lock_spin(vp);
1148 SET(vp->v_flag, VMOUNT);
1149 vnode_unlock(vp);
1150
1151 out:
1152 return error;
1153 }
1154
1155 #if CONFIG_IMGSRC_ACCESS
1156
1157 #if DEBUG
1158 #define IMGSRC_DEBUG(args...) printf(args)
1159 #else
1160 #define IMGSRC_DEBUG(args...) do { } while(0)
1161 #endif
1162
1163 static int
1164 authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx)
1165 {
1166 struct nameidata nd;
1167 vnode_t vp, realdevvp;
1168 mode_t accessmode;
1169 int error;
1170
1171 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW, UIO_USERSPACE, devpath, ctx);
1172 if ( (error = namei(&nd)) ) {
1173 IMGSRC_DEBUG("namei() failed with %d\n", error);
1174 return error;
1175 }
1176
1177 vp = nd.ni_vp;
1178
1179 if (!vnode_isblk(vp)) {
1180 IMGSRC_DEBUG("Not block device.\n");
1181 error = ENOTBLK;
1182 goto out;
1183 }
1184
1185 realdevvp = mp->mnt_devvp;
1186 if (realdevvp == NULLVP) {
1187 IMGSRC_DEBUG("No device backs the mount.\n");
1188 error = ENXIO;
1189 goto out;
1190 }
1191
1192 error = vnode_getwithref(realdevvp);
1193 if (error != 0) {
1194 IMGSRC_DEBUG("Coudn't get iocount on device.\n");
1195 goto out;
1196 }
1197
1198 if (vnode_specrdev(vp) != vnode_specrdev(realdevvp)) {
1199 IMGSRC_DEBUG("Wrong dev_t.\n");
1200 error = ENXIO;
1201 goto out1;
1202 }
1203
1204 strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
1205
1206 /*
1207 * If mount by non-root, then verify that user has necessary
1208 * permissions on the device.
1209 */
1210 if (!vfs_context_issuser(ctx)) {
1211 accessmode = KAUTH_VNODE_READ_DATA;
1212 if ((mp->mnt_flag & MNT_RDONLY) == 0)
1213 accessmode |= KAUTH_VNODE_WRITE_DATA;
1214 if ((error = vnode_authorize(vp, NULL, accessmode, ctx)) != 0) {
1215 IMGSRC_DEBUG("Access denied.\n");
1216 goto out1;
1217 }
1218 }
1219
1220 *devvpp = vp;
1221
1222 out1:
1223 vnode_put(realdevvp);
1224 out:
1225 nameidone(&nd);
1226 if (error) {
1227 vnode_put(vp);
1228 }
1229
1230 return error;
1231 }
1232
1233 /*
1234 * Clear VMOUNT, set v_mountedhere, and mnt_vnodecovered, ref the vnode,
1235 * and call checkdirs()
1236 */
1237 static int
1238 place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx)
1239 {
1240 int error;
1241
1242 mp->mnt_vnodecovered = vp; /* XXX This is normally only set at init-time ... */
1243
1244 vnode_lock_spin(vp);
1245 CLR(vp->v_flag, VMOUNT);
1246 vp->v_mountedhere = mp;
1247 vnode_unlock(vp);
1248
1249 /*
1250 * taking the name_cache_lock exclusively will
1251 * insure that everyone is out of the fast path who
1252 * might be trying to use a now stale copy of
1253 * vp->v_mountedhere->mnt_realrootvp
1254 * bumping mount_generation causes the cached values
1255 * to be invalidated
1256 */
1257 name_cache_lock();
1258 mount_generation++;
1259 name_cache_unlock();
1260
1261 error = vnode_ref(vp);
1262 if (error != 0) {
1263 goto out;
1264 }
1265
1266 error = checkdirs(vp, ctx);
1267 if (error != 0) {
1268 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1269 vnode_rele(vp);
1270 goto out;
1271 }
1272
1273 out:
1274 if (error != 0) {
1275 mp->mnt_vnodecovered = NULLVP;
1276 }
1277 return error;
1278 }
1279
1280 static void
1281 undo_place_on_covered_vp(mount_t mp, vnode_t vp)
1282 {
1283 vnode_rele(vp);
1284 vnode_lock_spin(vp);
1285 vp->v_mountedhere = (mount_t)NULL;
1286 vnode_unlock(vp);
1287
1288 mp->mnt_vnodecovered = NULLVP;
1289 }
1290
1291 static int
1292 mount_begin_update(mount_t mp, vfs_context_t ctx, int flags)
1293 {
1294 int error;
1295
1296 /* unmount in progress return error */
1297 mount_lock_spin(mp);
1298 if (mp->mnt_lflag & MNT_LUNMOUNT) {
1299 mount_unlock(mp);
1300 return EBUSY;
1301 }
1302 mount_unlock(mp);
1303 lck_rw_lock_exclusive(&mp->mnt_rwlock);
1304
1305 /*
1306 * We only allow the filesystem to be reloaded if it
1307 * is currently mounted read-only.
1308 */
1309 if ((flags & MNT_RELOAD) &&
1310 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
1311 error = ENOTSUP;
1312 goto out;
1313 }
1314
1315 /*
1316 * Only root, or the user that did the original mount is
1317 * permitted to update it.
1318 */
1319 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
1320 (!vfs_context_issuser(ctx))) {
1321 error = EPERM;
1322 goto out;
1323 }
1324 #if CONFIG_MACF
1325 error = mac_mount_check_remount(ctx, mp);
1326 if (error != 0) {
1327 goto out;
1328 }
1329 #endif
1330
1331 out:
1332 if (error) {
1333 lck_rw_done(&mp->mnt_rwlock);
1334 }
1335
1336 return error;
1337 }
1338
1339 static void
1340 mount_end_update(mount_t mp)
1341 {
1342 lck_rw_done(&mp->mnt_rwlock);
1343 }
1344
1345 static int
1346 get_imgsrc_rootvnode(uint32_t height, vnode_t *rvpp)
1347 {
1348 vnode_t vp;
1349
1350 if (height >= MAX_IMAGEBOOT_NESTING) {
1351 return EINVAL;
1352 }
1353
1354 vp = imgsrc_rootvnodes[height];
1355 if ((vp != NULLVP) && (vnode_get(vp) == 0)) {
1356 *rvpp = vp;
1357 return 0;
1358 } else {
1359 return ENOENT;
1360 }
1361 }
1362
1363 static int
1364 relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp,
1365 const char *fsname, vfs_context_t ctx,
1366 boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index)
1367 {
1368 int error;
1369 mount_t mp;
1370 boolean_t placed = FALSE;
1371 vnode_t devvp = NULLVP;
1372 struct vfstable *vfsp;
1373 user_addr_t devpath;
1374 char *old_mntonname;
1375 vnode_t rvp;
1376 uint32_t height;
1377 uint32_t flags;
1378
1379 /* If we didn't imageboot, nothing to move */
1380 if (imgsrc_rootvnodes[0] == NULLVP) {
1381 return EINVAL;
1382 }
1383
1384 /* Only root can do this */
1385 if (!vfs_context_issuser(ctx)) {
1386 return EPERM;
1387 }
1388
1389 IMGSRC_DEBUG("looking for root vnode.\n");
1390
1391 /*
1392 * Get root vnode of filesystem we're moving.
1393 */
1394 if (by_index) {
1395 if (is64bit) {
1396 struct user64_mnt_imgsrc_args mia64;
1397 error = copyin(fsmountargs, &mia64, sizeof(mia64));
1398 if (error != 0) {
1399 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1400 return error;
1401 }
1402
1403 height = mia64.mi_height;
1404 flags = mia64.mi_flags;
1405 devpath = mia64.mi_devpath;
1406 } else {
1407 struct user32_mnt_imgsrc_args mia32;
1408 error = copyin(fsmountargs, &mia32, sizeof(mia32));
1409 if (error != 0) {
1410 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1411 return error;
1412 }
1413
1414 height = mia32.mi_height;
1415 flags = mia32.mi_flags;
1416 devpath = mia32.mi_devpath;
1417 }
1418 } else {
1419 /*
1420 * For binary compatibility--assumes one level of nesting.
1421 */
1422 if (is64bit) {
1423 if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
1424 return error;
1425 } else {
1426 user32_addr_t tmp;
1427 if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
1428 return error;
1429
1430 /* munge into LP64 addr */
1431 devpath = CAST_USER_ADDR_T(tmp);
1432 }
1433
1434 height = 0;
1435 flags = 0;
1436 }
1437
1438 if (flags != 0) {
1439 IMGSRC_DEBUG("%s: Got nonzero flags.\n", __FUNCTION__);
1440 return EINVAL;
1441 }
1442
1443 error = get_imgsrc_rootvnode(height, &rvp);
1444 if (error != 0) {
1445 IMGSRC_DEBUG("getting root vnode failed with %d\n", error);
1446 return error;
1447 }
1448
1449 IMGSRC_DEBUG("got root vnode.\n");
1450
1451 MALLOC(old_mntonname, char*, MAXPATHLEN, M_TEMP, M_WAITOK);
1452
1453 /* Can only move once */
1454 mp = vnode_mount(rvp);
1455 if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
1456 IMGSRC_DEBUG("Already moved.\n");
1457 error = EBUSY;
1458 goto out0;
1459 }
1460
1461 IMGSRC_DEBUG("Starting updated.\n");
1462
1463 /* Get exclusive rwlock on mount, authorize update on mp */
1464 error = mount_begin_update(mp , ctx, 0);
1465 if (error != 0) {
1466 IMGSRC_DEBUG("Starting updated failed with %d\n", error);
1467 goto out0;
1468 }
1469
1470 /*
1471 * It can only be moved once. Flag is set under the rwlock,
1472 * so we're now safe to proceed.
1473 */
1474 if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
1475 IMGSRC_DEBUG("Already moved [2]\n");
1476 goto out1;
1477 }
1478
1479
1480 IMGSRC_DEBUG("Preparing coveredvp.\n");
1481
1482 /* Mark covered vnode as mount in progress, authorize placing mount on top */
1483 error = prepare_coveredvp(vp, ctx, cnp, fsname, FALSE);
1484 if (error != 0) {
1485 IMGSRC_DEBUG("Preparing coveredvp failed with %d.\n", error);
1486 goto out1;
1487 }
1488
1489 IMGSRC_DEBUG("Covered vp OK.\n");
1490
1491 /* Sanity check the name caller has provided */
1492 vfsp = mp->mnt_vtable;
1493 if (strncmp(vfsp->vfc_name, fsname, MFSNAMELEN) != 0) {
1494 IMGSRC_DEBUG("Wrong fs name.\n");
1495 error = EINVAL;
1496 goto out2;
1497 }
1498
1499 /* Check the device vnode and update mount-from name, for local filesystems */
1500 if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
1501 IMGSRC_DEBUG("Local, doing device validation.\n");
1502
1503 if (devpath != USER_ADDR_NULL) {
1504 error = authorize_devpath_and_update_mntfromname(mp, devpath, &devvp, ctx);
1505 if (error) {
1506 IMGSRC_DEBUG("authorize_devpath_and_update_mntfromname() failed.\n");
1507 goto out2;
1508 }
1509
1510 vnode_put(devvp);
1511 }
1512 }
1513
1514 /*
1515 * Place mp on top of vnode, ref the vnode, call checkdirs(),
1516 * and increment the name cache's mount generation
1517 */
1518
1519 IMGSRC_DEBUG("About to call place_mount_and_checkdirs().\n");
1520 error = place_mount_and_checkdirs(mp, vp, ctx);
1521 if (error != 0) {
1522 goto out2;
1523 }
1524
1525 placed = TRUE;
1526
1527 strncpy(old_mntonname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN);
1528 strncpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
1529
1530 /* Forbid future moves */
1531 mount_lock(mp);
1532 mp->mnt_kern_flag |= MNTK_HAS_MOVED;
1533 mount_unlock(mp);
1534
1535 /* Finally, add to mount list, completely ready to go */
1536 if (mount_list_add(mp) != 0) {
1537 /*
1538 * The system is shutting down trying to umount
1539 * everything, so fail with a plausible errno.
1540 */
1541 error = EBUSY;
1542 goto out3;
1543 }
1544
1545 mount_end_update(mp);
1546 vnode_put(rvp);
1547 FREE(old_mntonname, M_TEMP);
1548
1549 vfs_notify_mount(pvp);
1550
1551 return 0;
1552 out3:
1553 strncpy(mp->mnt_vfsstat.f_mntonname, old_mntonname, MAXPATHLEN);
1554
1555 mount_lock(mp);
1556 mp->mnt_kern_flag &= ~(MNTK_HAS_MOVED);
1557 mount_unlock(mp);
1558
1559 out2:
1560 /*
1561 * Placing the mp on the vnode clears VMOUNT,
1562 * so cleanup is different after that point
1563 */
1564 if (placed) {
1565 /* Rele the vp, clear VMOUNT and v_mountedhere */
1566 undo_place_on_covered_vp(mp, vp);
1567 } else {
1568 vnode_lock_spin(vp);
1569 CLR(vp->v_flag, VMOUNT);
1570 vnode_unlock(vp);
1571 }
1572 out1:
1573 mount_end_update(mp);
1574
1575 out0:
1576 vnode_put(rvp);
1577 FREE(old_mntonname, M_TEMP);
1578 return error;
1579 }
1580
1581 #endif /* CONFIG_IMGSRC_ACCESS */
1582
1583 void
1584 enablequotas(struct mount *mp, vfs_context_t ctx)
1585 {
1586 struct nameidata qnd;
1587 int type;
1588 char qfpath[MAXPATHLEN];
1589 const char *qfname = QUOTAFILENAME;
1590 const char *qfopsname = QUOTAOPSNAME;
1591 const char *qfextension[] = INITQFNAMES;
1592
1593 /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
1594 if (strncmp(mp->mnt_vfsstat.f_fstypename, "hfs", sizeof("hfs")) != 0 ) {
1595 return;
1596 }
1597 /*
1598 * Enable filesystem disk quotas if necessary.
1599 * We ignore errors as this should not interfere with final mount
1600 */
1601 for (type=0; type < MAXQUOTAS; type++) {
1602 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfopsname, qfextension[type]);
1603 NDINIT(&qnd, LOOKUP, OP_MOUNT, FOLLOW, UIO_SYSSPACE,
1604 CAST_USER_ADDR_T(qfpath), ctx);
1605 if (namei(&qnd) != 0)
1606 continue; /* option file to trigger quotas is not present */
1607 vnode_put(qnd.ni_vp);
1608 nameidone(&qnd);
1609 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfname, qfextension[type]);
1610
1611 (void) VFS_QUOTACTL(mp, QCMD(Q_QUOTAON, type), 0, qfpath, ctx);
1612 }
1613 return;
1614 }
1615
1616
1617 static int
1618 checkdirs_callback(proc_t p, void * arg)
1619 {
1620 struct cdirargs * cdrp = (struct cdirargs * )arg;
1621 vnode_t olddp = cdrp->olddp;
1622 vnode_t newdp = cdrp->newdp;
1623 struct filedesc *fdp;
1624 vnode_t tvp;
1625 vnode_t fdp_cvp;
1626 vnode_t fdp_rvp;
1627 int cdir_changed = 0;
1628 int rdir_changed = 0;
1629
1630 /*
1631 * XXX Also needs to iterate each thread in the process to see if it
1632 * XXX is using a per-thread current working directory, and, if so,
1633 * XXX update that as well.
1634 */
1635
1636 proc_fdlock(p);
1637 fdp = p->p_fd;
1638 if (fdp == (struct filedesc *)0) {
1639 proc_fdunlock(p);
1640 return(PROC_RETURNED);
1641 }
1642 fdp_cvp = fdp->fd_cdir;
1643 fdp_rvp = fdp->fd_rdir;
1644 proc_fdunlock(p);
1645
1646 if (fdp_cvp == olddp) {
1647 vnode_ref(newdp);
1648 tvp = fdp->fd_cdir;
1649 fdp_cvp = newdp;
1650 cdir_changed = 1;
1651 vnode_rele(tvp);
1652 }
1653 if (fdp_rvp == olddp) {
1654 vnode_ref(newdp);
1655 tvp = fdp->fd_rdir;
1656 fdp_rvp = newdp;
1657 rdir_changed = 1;
1658 vnode_rele(tvp);
1659 }
1660 if (cdir_changed || rdir_changed) {
1661 proc_fdlock(p);
1662 fdp->fd_cdir = fdp_cvp;
1663 fdp->fd_rdir = fdp_rvp;
1664 proc_fdunlock(p);
1665 }
1666 return(PROC_RETURNED);
1667 }
1668
1669
1670
1671 /*
1672 * Scan all active processes to see if any of them have a current
1673 * or root directory onto which the new filesystem has just been
1674 * mounted. If so, replace them with the new mount point.
1675 */
1676 static int
1677 checkdirs(vnode_t olddp, vfs_context_t ctx)
1678 {
1679 vnode_t newdp;
1680 vnode_t tvp;
1681 int err;
1682 struct cdirargs cdr;
1683
1684 if (olddp->v_usecount == 1)
1685 return(0);
1686 err = VFS_ROOT(olddp->v_mountedhere, &newdp, ctx);
1687
1688 if (err != 0) {
1689 #if DIAGNOSTIC
1690 panic("mount: lost mount: error %d", err);
1691 #endif
1692 return(err);
1693 }
1694
1695 cdr.olddp = olddp;
1696 cdr.newdp = newdp;
1697 /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
1698 proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS, checkdirs_callback, (void *)&cdr, NULL, NULL);
1699
1700 if (rootvnode == olddp) {
1701 vnode_ref(newdp);
1702 tvp = rootvnode;
1703 rootvnode = newdp;
1704 vnode_rele(tvp);
1705 }
1706
1707 vnode_put(newdp);
1708 return(0);
1709 }
1710
1711 /*
1712 * Unmount a file system.
1713 *
1714 * Note: unmount takes a path to the vnode mounted on as argument,
1715 * not special file (as before).
1716 */
1717 /* ARGSUSED */
1718 int
1719 unmount(__unused proc_t p, struct unmount_args *uap, __unused int32_t *retval)
1720 {
1721 vnode_t vp;
1722 struct mount *mp;
1723 int error;
1724 struct nameidata nd;
1725 vfs_context_t ctx = vfs_context_current();
1726
1727 NDINIT(&nd, LOOKUP, OP_UNMOUNT, FOLLOW | AUDITVNPATH1,
1728 UIO_USERSPACE, uap->path, ctx);
1729 error = namei(&nd);
1730 if (error)
1731 return (error);
1732 vp = nd.ni_vp;
1733 mp = vp->v_mount;
1734 nameidone(&nd);
1735
1736 #if CONFIG_MACF
1737 error = mac_mount_check_umount(ctx, mp);
1738 if (error != 0) {
1739 vnode_put(vp);
1740 return (error);
1741 }
1742 #endif
1743 /*
1744 * Must be the root of the filesystem
1745 */
1746 if ((vp->v_flag & VROOT) == 0) {
1747 vnode_put(vp);
1748 return (EINVAL);
1749 }
1750 mount_ref(mp, 0);
1751 vnode_put(vp);
1752 /* safedounmount consumes the mount ref */
1753 return (safedounmount(mp, uap->flags, ctx));
1754 }
1755
1756 int
1757 vfs_unmountbyfsid(fsid_t * fsid, int flags, vfs_context_t ctx)
1758 {
1759 mount_t mp;
1760
1761 mp = mount_list_lookupby_fsid(fsid, 0, 1);
1762 if (mp == (mount_t)0) {
1763 return(ENOENT);
1764 }
1765 mount_ref(mp, 0);
1766 mount_iterdrop(mp);
1767 /* safedounmount consumes the mount ref */
1768 return(safedounmount(mp, flags, ctx));
1769 }
1770
1771
1772 /*
1773 * The mount struct comes with a mount ref which will be consumed.
1774 * Do the actual file system unmount, prevent some common foot shooting.
1775 */
1776 int
1777 safedounmount(struct mount *mp, int flags, vfs_context_t ctx)
1778 {
1779 int error;
1780 proc_t p = vfs_context_proc(ctx);
1781
1782 /*
1783 * If the file system is not responding and MNT_NOBLOCK
1784 * is set and not a forced unmount then return EBUSY.
1785 */
1786 if ((mp->mnt_kern_flag & MNT_LNOTRESP) &&
1787 (flags & MNT_NOBLOCK) && ((flags & MNT_FORCE) == 0)) {
1788 error = EBUSY;
1789 goto out;
1790 }
1791
1792 /*
1793 * Skip authorization if the mount is tagged as permissive and
1794 * this is not a forced-unmount attempt.
1795 */
1796 if (!(((mp->mnt_kern_flag & MNTK_PERMIT_UNMOUNT) != 0) && ((flags & MNT_FORCE) == 0))) {
1797 /*
1798 * Only root, or the user that did the original mount is
1799 * permitted to unmount this filesystem.
1800 */
1801 if ((mp->mnt_vfsstat.f_owner != kauth_cred_getuid(kauth_cred_get())) &&
1802 (error = suser(kauth_cred_get(), &p->p_acflag)))
1803 goto out;
1804 }
1805 /*
1806 * Don't allow unmounting the root file system.
1807 */
1808 if (mp->mnt_flag & MNT_ROOTFS) {
1809 error = EBUSY; /* the root is always busy */
1810 goto out;
1811 }
1812
1813 #ifdef CONFIG_IMGSRC_ACCESS
1814 if (mp->mnt_kern_flag & MNTK_BACKS_ROOT) {
1815 error = EBUSY;
1816 goto out;
1817 }
1818 #endif /* CONFIG_IMGSRC_ACCESS */
1819
1820 return (dounmount(mp, flags, 1, ctx));
1821
1822 out:
1823 mount_drop(mp, 0);
1824 return(error);
1825 }
1826
1827 /*
1828 * Do the actual file system unmount.
1829 */
1830 int
1831 dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx)
1832 {
1833 vnode_t coveredvp = (vnode_t)0;
1834 int error;
1835 int needwakeup = 0;
1836 int forcedunmount = 0;
1837 int lflags = 0;
1838 struct vnode *devvp = NULLVP;
1839 #if CONFIG_TRIGGERS
1840 proc_t p = vfs_context_proc(ctx);
1841 int did_vflush = 0;
1842 int pflags_save = 0;
1843 #endif /* CONFIG_TRIGGERS */
1844
1845 mount_lock(mp);
1846
1847 /*
1848 * If already an unmount in progress just return EBUSY.
1849 * Even a forced unmount cannot override.
1850 */
1851 if (mp->mnt_lflag & MNT_LUNMOUNT) {
1852 if (withref != 0)
1853 mount_drop(mp, 1);
1854 mount_unlock(mp);
1855 return (EBUSY);
1856 }
1857
1858 if (flags & MNT_FORCE) {
1859 forcedunmount = 1;
1860 mp->mnt_lflag |= MNT_LFORCE;
1861 }
1862
1863 #if CONFIG_TRIGGERS
1864 if (flags & MNT_NOBLOCK && p != kernproc)
1865 pflags_save = OSBitOrAtomic(P_NOREMOTEHANG, &p->p_flag);
1866 #endif
1867
1868 mp->mnt_kern_flag |= MNTK_UNMOUNT;
1869 mp->mnt_lflag |= MNT_LUNMOUNT;
1870 mp->mnt_flag &=~ MNT_ASYNC;
1871 /*
1872 * anyone currently in the fast path that
1873 * trips over the cached rootvp will be
1874 * dumped out and forced into the slow path
1875 * to regenerate a new cached value
1876 */
1877 mp->mnt_realrootvp = NULLVP;
1878 mount_unlock(mp);
1879
1880 if (forcedunmount && (flags & MNT_LNOSUB) == 0) {
1881 /*
1882 * Force unmount any mounts in this filesystem.
1883 * If any unmounts fail - just leave them dangling.
1884 * Avoids recursion.
1885 */
1886 (void) dounmount_submounts(mp, flags | MNT_LNOSUB, ctx);
1887 }
1888
1889 /*
1890 * taking the name_cache_lock exclusively will
1891 * insure that everyone is out of the fast path who
1892 * might be trying to use a now stale copy of
1893 * vp->v_mountedhere->mnt_realrootvp
1894 * bumping mount_generation causes the cached values
1895 * to be invalidated
1896 */
1897 name_cache_lock();
1898 mount_generation++;
1899 name_cache_unlock();
1900
1901
1902 lck_rw_lock_exclusive(&mp->mnt_rwlock);
1903 if (withref != 0)
1904 mount_drop(mp, 0);
1905 #if CONFIG_FSE
1906 fsevent_unmount(mp); /* has to come first! */
1907 #endif
1908 error = 0;
1909 if (forcedunmount == 0) {
1910 ubc_umount(mp); /* release cached vnodes */
1911 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
1912 error = VFS_SYNC(mp, MNT_WAIT, ctx);
1913 if (error) {
1914 mount_lock(mp);
1915 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1916 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1917 mp->mnt_lflag &= ~MNT_LFORCE;
1918 goto out;
1919 }
1920 }
1921 }
1922
1923 #if CONFIG_TRIGGERS
1924 vfs_nested_trigger_unmounts(mp, flags, ctx);
1925 did_vflush = 1;
1926 #endif
1927 if (forcedunmount)
1928 lflags |= FORCECLOSE;
1929 error = vflush(mp, NULLVP, SKIPSWAP | SKIPSYSTEM | SKIPROOT | lflags);
1930 if ((forcedunmount == 0) && error) {
1931 mount_lock(mp);
1932 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1933 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1934 mp->mnt_lflag &= ~MNT_LFORCE;
1935 goto out;
1936 }
1937
1938 /* make sure there are no one in the mount iterations or lookup */
1939 mount_iterdrain(mp);
1940
1941 error = VFS_UNMOUNT(mp, flags, ctx);
1942 if (error) {
1943 mount_iterreset(mp);
1944 mount_lock(mp);
1945 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1946 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1947 mp->mnt_lflag &= ~MNT_LFORCE;
1948 goto out;
1949 }
1950
1951 /* increment the operations count */
1952 if (!error)
1953 OSAddAtomic(1, &vfs_nummntops);
1954
1955 if ( mp->mnt_devvp && mp->mnt_vtable->vfc_vfsflags & VFC_VFSLOCALARGS) {
1956 /* hold an io reference and drop the usecount before close */
1957 devvp = mp->mnt_devvp;
1958 vnode_getalways(devvp);
1959 vnode_rele(devvp);
1960 VNOP_CLOSE(devvp, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
1961 ctx);
1962 vnode_clearmountedon(devvp);
1963 vnode_put(devvp);
1964 }
1965 lck_rw_done(&mp->mnt_rwlock);
1966 mount_list_remove(mp);
1967 lck_rw_lock_exclusive(&mp->mnt_rwlock);
1968
1969 /* mark the mount point hook in the vp but not drop the ref yet */
1970 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
1971 /*
1972 * The covered vnode needs special handling. Trying to get an
1973 * iocount must not block here as this may lead to deadlocks
1974 * if the Filesystem to which the covered vnode belongs is
1975 * undergoing forced unmounts. Since we hold a usecount, the
1976 * vnode cannot be reused (it can, however, still be terminated)
1977 */
1978 vnode_getalways(coveredvp);
1979 vnode_lock_spin(coveredvp);
1980
1981 mp->mnt_crossref++;
1982 coveredvp->v_mountedhere = (struct mount *)0;
1983 CLR(coveredvp->v_flag, VMOUNT);
1984
1985 vnode_unlock(coveredvp);
1986 vnode_put(coveredvp);
1987 }
1988
1989 mount_list_lock();
1990 mp->mnt_vtable->vfc_refcount--;
1991 mount_list_unlock();
1992
1993 cache_purgevfs(mp); /* remove cache entries for this file sys */
1994 vfs_event_signal(NULL, VQ_UNMOUNT, (intptr_t)NULL);
1995 mount_lock(mp);
1996 mp->mnt_lflag |= MNT_LDEAD;
1997
1998 if (mp->mnt_lflag & MNT_LWAIT) {
1999 /*
2000 * do the wakeup here
2001 * in case we block in mount_refdrain
2002 * which will drop the mount lock
2003 * and allow anyone blocked in vfs_busy
2004 * to wakeup and see the LDEAD state
2005 */
2006 mp->mnt_lflag &= ~MNT_LWAIT;
2007 wakeup((caddr_t)mp);
2008 }
2009 mount_refdrain(mp);
2010 out:
2011 if (mp->mnt_lflag & MNT_LWAIT) {
2012 mp->mnt_lflag &= ~MNT_LWAIT;
2013 needwakeup = 1;
2014 }
2015
2016 #if CONFIG_TRIGGERS
2017 if (flags & MNT_NOBLOCK && p != kernproc) {
2018 // Restore P_NOREMOTEHANG bit to its previous value
2019 if ((pflags_save & P_NOREMOTEHANG) == 0)
2020 OSBitAndAtomic(~((uint32_t) P_NOREMOTEHANG), &p->p_flag);
2021 }
2022
2023 /*
2024 * Callback and context are set together under the mount lock, and
2025 * never cleared, so we're safe to examine them here, drop the lock,
2026 * and call out.
2027 */
2028 if (mp->mnt_triggercallback != NULL) {
2029 mount_unlock(mp);
2030 if (error == 0) {
2031 mp->mnt_triggercallback(mp, VTC_RELEASE, mp->mnt_triggerdata, ctx);
2032 } else if (did_vflush) {
2033 mp->mnt_triggercallback(mp, VTC_REPLACE, mp->mnt_triggerdata, ctx);
2034 }
2035 } else {
2036 mount_unlock(mp);
2037 }
2038 #else
2039 mount_unlock(mp);
2040 #endif /* CONFIG_TRIGGERS */
2041
2042 lck_rw_done(&mp->mnt_rwlock);
2043
2044 if (needwakeup)
2045 wakeup((caddr_t)mp);
2046
2047 if (!error) {
2048 if ((coveredvp != NULLVP)) {
2049 vnode_t pvp = NULLVP;
2050
2051 /*
2052 * The covered vnode needs special handling. Trying to
2053 * get an iocount must not block here as this may lead
2054 * to deadlocks if the Filesystem to which the covered
2055 * vnode belongs is undergoing forced unmounts. Since we
2056 * hold a usecount, the vnode cannot be reused
2057 * (it can, however, still be terminated).
2058 */
2059 vnode_getalways(coveredvp);
2060
2061 mount_dropcrossref(mp, coveredvp, 0);
2062 /*
2063 * We'll _try_ to detect if this really needs to be
2064 * done. The coveredvp can only be in termination (or
2065 * terminated) if the coveredvp's mount point is in a
2066 * forced unmount (or has been) since we still hold the
2067 * ref.
2068 */
2069 if (!vnode_isrecycled(coveredvp)) {
2070 pvp = vnode_getparent(coveredvp);
2071 #if CONFIG_TRIGGERS
2072 if (coveredvp->v_resolve) {
2073 vnode_trigger_rearm(coveredvp, ctx);
2074 }
2075 #endif
2076 }
2077
2078 vnode_rele(coveredvp);
2079 vnode_put(coveredvp);
2080 coveredvp = NULLVP;
2081
2082 if (pvp) {
2083 lock_vnode_and_post(pvp, NOTE_WRITE);
2084 vnode_put(pvp);
2085 }
2086 } else if (mp->mnt_flag & MNT_ROOTFS) {
2087 mount_lock_destroy(mp);
2088 #if CONFIG_MACF
2089 mac_mount_label_destroy(mp);
2090 #endif
2091 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
2092 } else
2093 panic("dounmount: no coveredvp");
2094 }
2095 return (error);
2096 }
2097
2098 /*
2099 * Unmount any mounts in this filesystem.
2100 */
2101 void
2102 dounmount_submounts(struct mount *mp, int flags, vfs_context_t ctx)
2103 {
2104 mount_t smp;
2105 fsid_t *fsids, fsid;
2106 int fsids_sz;
2107 int count = 0, i, m = 0;
2108 vnode_t vp;
2109
2110 mount_list_lock();
2111
2112 // Get an array to hold the submounts fsids.
2113 TAILQ_FOREACH(smp, &mountlist, mnt_list)
2114 count++;
2115 fsids_sz = count * sizeof(fsid_t);
2116 MALLOC(fsids, fsid_t *, fsids_sz, M_TEMP, M_NOWAIT);
2117 if (fsids == NULL) {
2118 mount_list_unlock();
2119 goto out;
2120 }
2121 fsids[0] = mp->mnt_vfsstat.f_fsid; // Prime the pump
2122
2123 /*
2124 * Fill the array with submount fsids.
2125 * Since mounts are always added to the tail of the mount list, the
2126 * list is always in mount order.
2127 * For each mount check if the mounted-on vnode belongs to a
2128 * mount that's already added to our array of mounts to be unmounted.
2129 */
2130 for (smp = TAILQ_NEXT(mp, mnt_list); smp; smp = TAILQ_NEXT(smp, mnt_list)) {
2131 vp = smp->mnt_vnodecovered;
2132 if (vp == NULL)
2133 continue;
2134 fsid = vnode_mount(vp)->mnt_vfsstat.f_fsid; // Underlying fsid
2135 for (i = 0; i <= m; i++) {
2136 if (fsids[i].val[0] == fsid.val[0] &&
2137 fsids[i].val[1] == fsid.val[1]) {
2138 fsids[++m] = smp->mnt_vfsstat.f_fsid;
2139 break;
2140 }
2141 }
2142 }
2143 mount_list_unlock();
2144
2145 // Unmount the submounts in reverse order. Ignore errors.
2146 for (i = m; i > 0; i--) {
2147 smp = mount_list_lookupby_fsid(&fsids[i], 0, 1);
2148 if (smp) {
2149 mount_ref(smp, 0);
2150 mount_iterdrop(smp);
2151 (void) dounmount(smp, flags, 1, ctx);
2152 }
2153 }
2154 out:
2155 if (fsids)
2156 FREE(fsids, M_TEMP);
2157 }
2158
2159 void
2160 mount_dropcrossref(mount_t mp, vnode_t dp, int need_put)
2161 {
2162 vnode_lock(dp);
2163 mp->mnt_crossref--;
2164
2165 if (mp->mnt_crossref < 0)
2166 panic("mount cross refs -ve");
2167
2168 if ((mp != dp->v_mountedhere) && (mp->mnt_crossref == 0)) {
2169
2170 if (need_put)
2171 vnode_put_locked(dp);
2172 vnode_unlock(dp);
2173
2174 mount_lock_destroy(mp);
2175 #if CONFIG_MACF
2176 mac_mount_label_destroy(mp);
2177 #endif
2178 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
2179 return;
2180 }
2181 if (need_put)
2182 vnode_put_locked(dp);
2183 vnode_unlock(dp);
2184 }
2185
2186
2187 /*
2188 * Sync each mounted filesystem.
2189 */
2190 #if DIAGNOSTIC
2191 int syncprt = 0;
2192 #endif
2193
2194 int print_vmpage_stat=0;
2195 int sync_timeout = 60; // Sync time limit (sec)
2196
2197 static int
2198 sync_callback(mount_t mp, __unused void *arg)
2199 {
2200 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
2201 int asyncflag = mp->mnt_flag & MNT_ASYNC;
2202
2203 mp->mnt_flag &= ~MNT_ASYNC;
2204 VFS_SYNC(mp, arg ? MNT_WAIT : MNT_NOWAIT, vfs_context_kernel());
2205 if (asyncflag)
2206 mp->mnt_flag |= MNT_ASYNC;
2207 }
2208
2209 return (VFS_RETURNED);
2210 }
2211
2212 /* ARGSUSED */
2213 int
2214 sync(__unused proc_t p, __unused struct sync_args *uap, __unused int32_t *retval)
2215 {
2216 vfs_iterate(LK_NOWAIT, sync_callback, NULL);
2217
2218 if (print_vmpage_stat) {
2219 vm_countdirtypages();
2220 }
2221
2222 #if DIAGNOSTIC
2223 if (syncprt)
2224 vfs_bufstats();
2225 #endif /* DIAGNOSTIC */
2226 return 0;
2227 }
2228
2229 static void
2230 sync_thread(void *arg, __unused wait_result_t wr)
2231 {
2232 int *timeout = (int *) arg;
2233
2234 vfs_iterate(LK_NOWAIT, sync_callback, NULL);
2235
2236 if (timeout)
2237 wakeup((caddr_t) timeout);
2238 if (print_vmpage_stat) {
2239 vm_countdirtypages();
2240 }
2241
2242 #if DIAGNOSTIC
2243 if (syncprt)
2244 vfs_bufstats();
2245 #endif /* DIAGNOSTIC */
2246 }
2247
2248 /*
2249 * Sync in a separate thread so we can time out if it blocks.
2250 */
2251 static int
2252 sync_async(int timeout)
2253 {
2254 thread_t thd;
2255 int error;
2256 struct timespec ts = {timeout, 0};
2257
2258 lck_mtx_lock(sync_mtx_lck);
2259 if (kernel_thread_start(sync_thread, &timeout, &thd) != KERN_SUCCESS) {
2260 printf("sync_thread failed\n");
2261 lck_mtx_unlock(sync_mtx_lck);
2262 return (0);
2263 }
2264
2265 error = msleep((caddr_t) &timeout, sync_mtx_lck, (PVFS | PDROP | PCATCH), "sync_thread", &ts);
2266 if (error) {
2267 printf("sync timed out: %d sec\n", timeout);
2268 }
2269 thread_deallocate(thd);
2270
2271 return (0);
2272 }
2273
2274 /*
2275 * An in-kernel sync for power management to call.
2276 */
2277 __private_extern__ int
2278 sync_internal(void)
2279 {
2280 (void) sync_async(sync_timeout);
2281
2282 return 0;
2283 } /* end of sync_internal call */
2284
2285 /*
2286 * Change filesystem quotas.
2287 */
2288 #if QUOTA
2289 int
2290 quotactl(proc_t p, struct quotactl_args *uap, __unused int32_t *retval)
2291 {
2292 struct mount *mp;
2293 int error, quota_cmd, quota_status;
2294 caddr_t datap;
2295 size_t fnamelen;
2296 struct nameidata nd;
2297 vfs_context_t ctx = vfs_context_current();
2298 struct dqblk my_dqblk;
2299
2300 AUDIT_ARG(uid, uap->uid);
2301 AUDIT_ARG(cmd, uap->cmd);
2302 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
2303 uap->path, ctx);
2304 error = namei(&nd);
2305 if (error)
2306 return (error);
2307 mp = nd.ni_vp->v_mount;
2308 vnode_put(nd.ni_vp);
2309 nameidone(&nd);
2310
2311 /* copyin any data we will need for downstream code */
2312 quota_cmd = uap->cmd >> SUBCMDSHIFT;
2313
2314 switch (quota_cmd) {
2315 case Q_QUOTAON:
2316 /* uap->arg specifies a file from which to take the quotas */
2317 fnamelen = MAXPATHLEN;
2318 datap = kalloc(MAXPATHLEN);
2319 error = copyinstr(uap->arg, datap, MAXPATHLEN, &fnamelen);
2320 break;
2321 case Q_GETQUOTA:
2322 /* uap->arg is a pointer to a dqblk structure. */
2323 datap = (caddr_t) &my_dqblk;
2324 break;
2325 case Q_SETQUOTA:
2326 case Q_SETUSE:
2327 /* uap->arg is a pointer to a dqblk structure. */
2328 datap = (caddr_t) &my_dqblk;
2329 if (proc_is64bit(p)) {
2330 struct user_dqblk my_dqblk64;
2331 error = copyin(uap->arg, (caddr_t)&my_dqblk64, sizeof (my_dqblk64));
2332 if (error == 0) {
2333 munge_dqblk(&my_dqblk, &my_dqblk64, FALSE);
2334 }
2335 }
2336 else {
2337 error = copyin(uap->arg, (caddr_t)&my_dqblk, sizeof (my_dqblk));
2338 }
2339 break;
2340 case Q_QUOTASTAT:
2341 /* uap->arg is a pointer to an integer */
2342 datap = (caddr_t) &quota_status;
2343 break;
2344 default:
2345 datap = NULL;
2346 break;
2347 } /* switch */
2348
2349 if (error == 0) {
2350 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, datap, ctx);
2351 }
2352
2353 switch (quota_cmd) {
2354 case Q_QUOTAON:
2355 if (datap != NULL)
2356 kfree(datap, MAXPATHLEN);
2357 break;
2358 case Q_GETQUOTA:
2359 /* uap->arg is a pointer to a dqblk structure we need to copy out to */
2360 if (error == 0) {
2361 if (proc_is64bit(p)) {
2362 struct user_dqblk my_dqblk64 = {.dqb_bhardlimit = 0};
2363 munge_dqblk(&my_dqblk, &my_dqblk64, TRUE);
2364 error = copyout((caddr_t)&my_dqblk64, uap->arg, sizeof (my_dqblk64));
2365 }
2366 else {
2367 error = copyout(datap, uap->arg, sizeof (struct dqblk));
2368 }
2369 }
2370 break;
2371 case Q_QUOTASTAT:
2372 /* uap->arg is a pointer to an integer */
2373 if (error == 0) {
2374 error = copyout(datap, uap->arg, sizeof(quota_status));
2375 }
2376 break;
2377 default:
2378 break;
2379 } /* switch */
2380
2381 return (error);
2382 }
2383 #else
2384 int
2385 quotactl(__unused proc_t p, __unused struct quotactl_args *uap, __unused int32_t *retval)
2386 {
2387 return (EOPNOTSUPP);
2388 }
2389 #endif /* QUOTA */
2390
2391 /*
2392 * Get filesystem statistics.
2393 *
2394 * Returns: 0 Success
2395 * namei:???
2396 * vfs_update_vfsstat:???
2397 * munge_statfs:EFAULT
2398 */
2399 /* ARGSUSED */
2400 int
2401 statfs(__unused proc_t p, struct statfs_args *uap, __unused int32_t *retval)
2402 {
2403 struct mount *mp;
2404 struct vfsstatfs *sp;
2405 int error;
2406 struct nameidata nd;
2407 vfs_context_t ctx = vfs_context_current();
2408 vnode_t vp;
2409
2410 NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1,
2411 UIO_USERSPACE, uap->path, ctx);
2412 error = namei(&nd);
2413 if (error)
2414 return (error);
2415 vp = nd.ni_vp;
2416 mp = vp->v_mount;
2417 sp = &mp->mnt_vfsstat;
2418 nameidone(&nd);
2419
2420 error = vfs_update_vfsstat(mp, ctx, VFS_USER_EVENT);
2421 if (error != 0) {
2422 vnode_put(vp);
2423 return (error);
2424 }
2425
2426 error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
2427 vnode_put(vp);
2428 return (error);
2429 }
2430
2431 /*
2432 * Get filesystem statistics.
2433 */
2434 /* ARGSUSED */
2435 int
2436 fstatfs(__unused proc_t p, struct fstatfs_args *uap, __unused int32_t *retval)
2437 {
2438 vnode_t vp;
2439 struct mount *mp;
2440 struct vfsstatfs *sp;
2441 int error;
2442
2443 AUDIT_ARG(fd, uap->fd);
2444
2445 if ( (error = file_vnode(uap->fd, &vp)) )
2446 return (error);
2447
2448 error = vnode_getwithref(vp);
2449 if (error) {
2450 file_drop(uap->fd);
2451 return (error);
2452 }
2453
2454 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2455
2456 mp = vp->v_mount;
2457 if (!mp) {
2458 error = EBADF;
2459 goto out;
2460 }
2461 sp = &mp->mnt_vfsstat;
2462 if ((error = vfs_update_vfsstat(mp,vfs_context_current(),VFS_USER_EVENT)) != 0) {
2463 goto out;
2464 }
2465
2466 error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
2467
2468 out:
2469 file_drop(uap->fd);
2470 vnode_put(vp);
2471
2472 return (error);
2473 }
2474
2475 /*
2476 * Common routine to handle copying of statfs64 data to user space
2477 */
2478 static int
2479 statfs64_common(struct mount *mp, struct vfsstatfs *sfsp, user_addr_t bufp)
2480 {
2481 int error;
2482 struct statfs64 sfs;
2483
2484 bzero(&sfs, sizeof(sfs));
2485
2486 sfs.f_bsize = sfsp->f_bsize;
2487 sfs.f_iosize = (int32_t)sfsp->f_iosize;
2488 sfs.f_blocks = sfsp->f_blocks;
2489 sfs.f_bfree = sfsp->f_bfree;
2490 sfs.f_bavail = sfsp->f_bavail;
2491 sfs.f_files = sfsp->f_files;
2492 sfs.f_ffree = sfsp->f_ffree;
2493 sfs.f_fsid = sfsp->f_fsid;
2494 sfs.f_owner = sfsp->f_owner;
2495 sfs.f_type = mp->mnt_vtable->vfc_typenum;
2496 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
2497 sfs.f_fssubtype = sfsp->f_fssubtype;
2498 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
2499 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN);
2500 } else {
2501 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSTYPENAMELEN);
2502 }
2503 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MAXPATHLEN);
2504 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MAXPATHLEN);
2505
2506 error = copyout((caddr_t)&sfs, bufp, sizeof(sfs));
2507
2508 return(error);
2509 }
2510
2511 /*
2512 * Get file system statistics in 64-bit mode
2513 */
2514 int
2515 statfs64(__unused struct proc *p, struct statfs64_args *uap, __unused int32_t *retval)
2516 {
2517 struct mount *mp;
2518 struct vfsstatfs *sp;
2519 int error;
2520 struct nameidata nd;
2521 vfs_context_t ctxp = vfs_context_current();
2522 vnode_t vp;
2523
2524 NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1,
2525 UIO_USERSPACE, uap->path, ctxp);
2526 error = namei(&nd);
2527 if (error)
2528 return (error);
2529 vp = nd.ni_vp;
2530 mp = vp->v_mount;
2531 sp = &mp->mnt_vfsstat;
2532 nameidone(&nd);
2533
2534 error = vfs_update_vfsstat(mp, ctxp, VFS_USER_EVENT);
2535 if (error != 0) {
2536 vnode_put(vp);
2537 return (error);
2538 }
2539
2540 error = statfs64_common(mp, sp, uap->buf);
2541 vnode_put(vp);
2542
2543 return (error);
2544 }
2545
2546 /*
2547 * Get file system statistics in 64-bit mode
2548 */
2549 int
2550 fstatfs64(__unused struct proc *p, struct fstatfs64_args *uap, __unused int32_t *retval)
2551 {
2552 struct vnode *vp;
2553 struct mount *mp;
2554 struct vfsstatfs *sp;
2555 int error;
2556
2557 AUDIT_ARG(fd, uap->fd);
2558
2559 if ( (error = file_vnode(uap->fd, &vp)) )
2560 return (error);
2561
2562 error = vnode_getwithref(vp);
2563 if (error) {
2564 file_drop(uap->fd);
2565 return (error);
2566 }
2567
2568 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2569
2570 mp = vp->v_mount;
2571 if (!mp) {
2572 error = EBADF;
2573 goto out;
2574 }
2575 sp = &mp->mnt_vfsstat;
2576 if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
2577 goto out;
2578 }
2579
2580 error = statfs64_common(mp, sp, uap->buf);
2581
2582 out:
2583 file_drop(uap->fd);
2584 vnode_put(vp);
2585
2586 return (error);
2587 }
2588
2589 struct getfsstat_struct {
2590 user_addr_t sfsp;
2591 user_addr_t *mp;
2592 int count;
2593 int maxcount;
2594 int flags;
2595 int error;
2596 };
2597
2598
2599 static int
2600 getfsstat_callback(mount_t mp, void * arg)
2601 {
2602
2603 struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
2604 struct vfsstatfs *sp;
2605 int error, my_size;
2606 vfs_context_t ctx = vfs_context_current();
2607
2608 if (fstp->sfsp && fstp->count < fstp->maxcount) {
2609 sp = &mp->mnt_vfsstat;
2610 /*
2611 * If MNT_NOWAIT is specified, do not refresh the
2612 * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT.
2613 */
2614 if (((fstp->flags & MNT_NOWAIT) == 0 || (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
2615 (error = vfs_update_vfsstat(mp, ctx,
2616 VFS_USER_EVENT))) {
2617 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
2618 return(VFS_RETURNED);
2619 }
2620
2621 /*
2622 * Need to handle LP64 version of struct statfs
2623 */
2624 error = munge_statfs(mp, sp, fstp->sfsp, &my_size, IS_64BIT_PROCESS(vfs_context_proc(ctx)), FALSE);
2625 if (error) {
2626 fstp->error = error;
2627 return(VFS_RETURNED_DONE);
2628 }
2629 fstp->sfsp += my_size;
2630
2631 if (fstp->mp) {
2632 #if CONFIG_MACF
2633 error = mac_mount_label_get(mp, *fstp->mp);
2634 if (error) {
2635 fstp->error = error;
2636 return(VFS_RETURNED_DONE);
2637 }
2638 #endif
2639 fstp->mp++;
2640 }
2641 }
2642 fstp->count++;
2643 return(VFS_RETURNED);
2644 }
2645
2646 /*
2647 * Get statistics on all filesystems.
2648 */
2649 int
2650 getfsstat(__unused proc_t p, struct getfsstat_args *uap, int *retval)
2651 {
2652 struct __mac_getfsstat_args muap;
2653
2654 muap.buf = uap->buf;
2655 muap.bufsize = uap->bufsize;
2656 muap.mac = USER_ADDR_NULL;
2657 muap.macsize = 0;
2658 muap.flags = uap->flags;
2659
2660 return (__mac_getfsstat(p, &muap, retval));
2661 }
2662
2663 /*
2664 * __mac_getfsstat: Get MAC-related file system statistics
2665 *
2666 * Parameters: p (ignored)
2667 * uap User argument descriptor (see below)
2668 * retval Count of file system statistics (N stats)
2669 *
2670 * Indirect: uap->bufsize Buffer size
2671 * uap->macsize MAC info size
2672 * uap->buf Buffer where information will be returned
2673 * uap->mac MAC info
2674 * uap->flags File system flags
2675 *
2676 *
2677 * Returns: 0 Success
2678 * !0 Not success
2679 *
2680 */
2681 int
2682 __mac_getfsstat(__unused proc_t p, struct __mac_getfsstat_args *uap, int *retval)
2683 {
2684 user_addr_t sfsp;
2685 user_addr_t *mp;
2686 size_t count, maxcount, bufsize, macsize;
2687 struct getfsstat_struct fst;
2688
2689 bufsize = (size_t) uap->bufsize;
2690 macsize = (size_t) uap->macsize;
2691
2692 if (IS_64BIT_PROCESS(p)) {
2693 maxcount = bufsize / sizeof(struct user64_statfs);
2694 }
2695 else {
2696 maxcount = bufsize / sizeof(struct user32_statfs);
2697 }
2698 sfsp = uap->buf;
2699 count = 0;
2700
2701 mp = NULL;
2702
2703 #if CONFIG_MACF
2704 if (uap->mac != USER_ADDR_NULL) {
2705 u_int32_t *mp0;
2706 int error;
2707 unsigned int i;
2708
2709 count = (macsize / (IS_64BIT_PROCESS(p) ? 8 : 4));
2710 if (count != maxcount)
2711 return (EINVAL);
2712
2713 /* Copy in the array */
2714 MALLOC(mp0, u_int32_t *, macsize, M_MACTEMP, M_WAITOK);
2715 if (mp0 == NULL) {
2716 return (ENOMEM);
2717 }
2718
2719 error = copyin(uap->mac, mp0, macsize);
2720 if (error) {
2721 FREE(mp0, M_MACTEMP);
2722 return (error);
2723 }
2724
2725 /* Normalize to an array of user_addr_t */
2726 MALLOC(mp, user_addr_t *, count * sizeof(user_addr_t), M_MACTEMP, M_WAITOK);
2727 if (mp == NULL) {
2728 FREE(mp0, M_MACTEMP);
2729 return (ENOMEM);
2730 }
2731
2732 for (i = 0; i < count; i++) {
2733 if (IS_64BIT_PROCESS(p))
2734 mp[i] = ((user_addr_t *)mp0)[i];
2735 else
2736 mp[i] = (user_addr_t)mp0[i];
2737 }
2738 FREE(mp0, M_MACTEMP);
2739 }
2740 #endif
2741
2742
2743 fst.sfsp = sfsp;
2744 fst.mp = mp;
2745 fst.flags = uap->flags;
2746 fst.count = 0;
2747 fst.error = 0;
2748 fst.maxcount = maxcount;
2749
2750
2751 vfs_iterate(0, getfsstat_callback, &fst);
2752
2753 if (mp)
2754 FREE(mp, M_MACTEMP);
2755
2756 if (fst.error ) {
2757 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
2758 return(fst.error);
2759 }
2760
2761 if (fst.sfsp && fst.count > fst.maxcount)
2762 *retval = fst.maxcount;
2763 else
2764 *retval = fst.count;
2765 return (0);
2766 }
2767
2768 static int
2769 getfsstat64_callback(mount_t mp, void * arg)
2770 {
2771 struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
2772 struct vfsstatfs *sp;
2773 int error;
2774
2775 if (fstp->sfsp && fstp->count < fstp->maxcount) {
2776 sp = &mp->mnt_vfsstat;
2777 /*
2778 * If MNT_NOWAIT is specified, do not refresh the fsstat
2779 * cache. MNT_WAIT overrides MNT_NOWAIT.
2780 *
2781 * We treat MNT_DWAIT as MNT_WAIT for all instances of
2782 * getfsstat, since the constants are out of the same
2783 * namespace.
2784 */
2785 if (((fstp->flags & MNT_NOWAIT) == 0 ||
2786 (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
2787 (error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT))) {
2788 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
2789 return(VFS_RETURNED);
2790 }
2791
2792 error = statfs64_common(mp, sp, fstp->sfsp);
2793 if (error) {
2794 fstp->error = error;
2795 return(VFS_RETURNED_DONE);
2796 }
2797 fstp->sfsp += sizeof(struct statfs64);
2798 }
2799 fstp->count++;
2800 return(VFS_RETURNED);
2801 }
2802
2803 /*
2804 * Get statistics on all file systems in 64 bit mode.
2805 */
2806 int
2807 getfsstat64(__unused proc_t p, struct getfsstat64_args *uap, int *retval)
2808 {
2809 user_addr_t sfsp;
2810 int count, maxcount;
2811 struct getfsstat_struct fst;
2812
2813 maxcount = uap->bufsize / sizeof(struct statfs64);
2814
2815 sfsp = uap->buf;
2816 count = 0;
2817
2818 fst.sfsp = sfsp;
2819 fst.flags = uap->flags;
2820 fst.count = 0;
2821 fst.error = 0;
2822 fst.maxcount = maxcount;
2823
2824 vfs_iterate(0, getfsstat64_callback, &fst);
2825
2826 if (fst.error ) {
2827 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
2828 return(fst.error);
2829 }
2830
2831 if (fst.sfsp && fst.count > fst.maxcount)
2832 *retval = fst.maxcount;
2833 else
2834 *retval = fst.count;
2835
2836 return (0);
2837 }
2838
2839 /*
2840 * gets the associated vnode with the file descriptor passed.
2841 * as input
2842 *
2843 * INPUT
2844 * ctx - vfs context of caller
2845 * fd - file descriptor for which vnode is required.
2846 * vpp - Pointer to pointer to vnode to be returned.
2847 *
2848 * The vnode is returned with an iocount so any vnode obtained
2849 * by this call needs a vnode_put
2850 *
2851 */
2852 static int
2853 vnode_getfromfd(vfs_context_t ctx, int fd, vnode_t *vpp)
2854 {
2855 int error;
2856 vnode_t vp;
2857 struct fileproc *fp;
2858 proc_t p = vfs_context_proc(ctx);
2859
2860 *vpp = NULLVP;
2861
2862 error = fp_getfvp(p, fd, &fp, &vp);
2863 if (error)
2864 return (error);
2865
2866 error = vnode_getwithref(vp);
2867 if (error) {
2868 (void)fp_drop(p, fd, fp, 0);
2869 return (error);
2870 }
2871
2872 (void)fp_drop(p, fd, fp, 0);
2873 *vpp = vp;
2874 return (error);
2875 }
2876
2877 /*
2878 * Wrapper function around namei to start lookup from a directory
2879 * specified by a file descriptor ni_dirfd.
2880 *
2881 * In addition to all the errors returned by namei, this call can
2882 * return ENOTDIR if the file descriptor does not refer to a directory.
2883 * and EBADF if the file descriptor is not valid.
2884 */
2885 int
2886 nameiat(struct nameidata *ndp, int dirfd)
2887 {
2888 if ((dirfd != AT_FDCWD) &&
2889 !(ndp->ni_flag & NAMEI_CONTLOOKUP) &&
2890 !(ndp->ni_cnd.cn_flags & USEDVP)) {
2891 int error = 0;
2892 char c;
2893
2894 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
2895 error = copyin(ndp->ni_dirp, &c, sizeof(char));
2896 if (error)
2897 return (error);
2898 } else {
2899 c = *((char *)(ndp->ni_dirp));
2900 }
2901
2902 if (c != '/') {
2903 vnode_t dvp_at;
2904
2905 error = vnode_getfromfd(ndp->ni_cnd.cn_context, dirfd,
2906 &dvp_at);
2907 if (error)
2908 return (error);
2909
2910 if (vnode_vtype(dvp_at) != VDIR) {
2911 vnode_put(dvp_at);
2912 return (ENOTDIR);
2913 }
2914
2915 ndp->ni_dvp = dvp_at;
2916 ndp->ni_cnd.cn_flags |= USEDVP;
2917 error = namei(ndp);
2918 ndp->ni_cnd.cn_flags &= ~USEDVP;
2919 vnode_put(dvp_at);
2920 return (error);
2921 }
2922 }
2923
2924 return (namei(ndp));
2925 }
2926
2927 /*
2928 * Change current working directory to a given file descriptor.
2929 */
2930 /* ARGSUSED */
2931 static int
2932 common_fchdir(proc_t p, struct fchdir_args *uap, int per_thread)
2933 {
2934 struct filedesc *fdp = p->p_fd;
2935 vnode_t vp;
2936 vnode_t tdp;
2937 vnode_t tvp;
2938 struct mount *mp;
2939 int error;
2940 vfs_context_t ctx = vfs_context_current();
2941
2942 AUDIT_ARG(fd, uap->fd);
2943 if (per_thread && uap->fd == -1) {
2944 /*
2945 * Switching back from per-thread to per process CWD; verify we
2946 * in fact have one before proceeding. The only success case
2947 * for this code path is to return 0 preemptively after zapping
2948 * the thread structure contents.
2949 */
2950 thread_t th = vfs_context_thread(ctx);
2951 if (th) {
2952 uthread_t uth = get_bsdthread_info(th);
2953 tvp = uth->uu_cdir;
2954 uth->uu_cdir = NULLVP;
2955 if (tvp != NULLVP) {
2956 vnode_rele(tvp);
2957 return (0);
2958 }
2959 }
2960 return (EBADF);
2961 }
2962
2963 if ( (error = file_vnode(uap->fd, &vp)) )
2964 return(error);
2965 if ( (error = vnode_getwithref(vp)) ) {
2966 file_drop(uap->fd);
2967 return(error);
2968 }
2969
2970 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
2971
2972 if (vp->v_type != VDIR) {
2973 error = ENOTDIR;
2974 goto out;
2975 }
2976
2977 #if CONFIG_MACF
2978 error = mac_vnode_check_chdir(ctx, vp);
2979 if (error)
2980 goto out;
2981 #endif
2982 error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
2983 if (error)
2984 goto out;
2985
2986 while (!error && (mp = vp->v_mountedhere) != NULL) {
2987 if (vfs_busy(mp, LK_NOWAIT)) {
2988 error = EACCES;
2989 goto out;
2990 }
2991 error = VFS_ROOT(mp, &tdp, ctx);
2992 vfs_unbusy(mp);
2993 if (error)
2994 break;
2995 vnode_put(vp);
2996 vp = tdp;
2997 }
2998 if (error)
2999 goto out;
3000 if ( (error = vnode_ref(vp)) )
3001 goto out;
3002 vnode_put(vp);
3003
3004 if (per_thread) {
3005 thread_t th = vfs_context_thread(ctx);
3006 if (th) {
3007 uthread_t uth = get_bsdthread_info(th);
3008 tvp = uth->uu_cdir;
3009 uth->uu_cdir = vp;
3010 OSBitOrAtomic(P_THCWD, &p->p_flag);
3011 } else {
3012 vnode_rele(vp);
3013 return (ENOENT);
3014 }
3015 } else {
3016 proc_fdlock(p);
3017 tvp = fdp->fd_cdir;
3018 fdp->fd_cdir = vp;
3019 proc_fdunlock(p);
3020 }
3021
3022 if (tvp)
3023 vnode_rele(tvp);
3024 file_drop(uap->fd);
3025
3026 return (0);
3027 out:
3028 vnode_put(vp);
3029 file_drop(uap->fd);
3030
3031 return(error);
3032 }
3033
3034 int
3035 fchdir(proc_t p, struct fchdir_args *uap, __unused int32_t *retval)
3036 {
3037 return common_fchdir(p, uap, 0);
3038 }
3039
3040 int
3041 __pthread_fchdir(proc_t p, struct __pthread_fchdir_args *uap, __unused int32_t *retval)
3042 {
3043 return common_fchdir(p, (void *)uap, 1);
3044 }
3045
3046 /*
3047 * Change current working directory (".").
3048 *
3049 * Returns: 0 Success
3050 * change_dir:ENOTDIR
3051 * change_dir:???
3052 * vnode_ref:ENOENT No such file or directory
3053 */
3054 /* ARGSUSED */
3055 static int
3056 common_chdir(proc_t p, struct chdir_args *uap, int per_thread)
3057 {
3058 struct filedesc *fdp = p->p_fd;
3059 int error;
3060 struct nameidata nd;
3061 vnode_t tvp;
3062 vfs_context_t ctx = vfs_context_current();
3063
3064 NDINIT(&nd, LOOKUP, OP_CHDIR, FOLLOW | AUDITVNPATH1,
3065 UIO_USERSPACE, uap->path, ctx);
3066 error = change_dir(&nd, ctx);
3067 if (error)
3068 return (error);
3069 if ( (error = vnode_ref(nd.ni_vp)) ) {
3070 vnode_put(nd.ni_vp);
3071 return (error);
3072 }
3073 /*
3074 * drop the iocount we picked up in change_dir
3075 */
3076 vnode_put(nd.ni_vp);
3077
3078 if (per_thread) {
3079 thread_t th = vfs_context_thread(ctx);
3080 if (th) {
3081 uthread_t uth = get_bsdthread_info(th);
3082 tvp = uth->uu_cdir;
3083 uth->uu_cdir = nd.ni_vp;
3084 OSBitOrAtomic(P_THCWD, &p->p_flag);
3085 } else {
3086 vnode_rele(nd.ni_vp);
3087 return (ENOENT);
3088 }
3089 } else {
3090 proc_fdlock(p);
3091 tvp = fdp->fd_cdir;
3092 fdp->fd_cdir = nd.ni_vp;
3093 proc_fdunlock(p);
3094 }
3095
3096 if (tvp)
3097 vnode_rele(tvp);
3098
3099 return (0);
3100 }
3101
3102
3103 /*
3104 * chdir
3105 *
3106 * Change current working directory (".") for the entire process
3107 *
3108 * Parameters: p Process requesting the call
3109 * uap User argument descriptor (see below)
3110 * retval (ignored)
3111 *
3112 * Indirect parameters: uap->path Directory path
3113 *
3114 * Returns: 0 Success
3115 * common_chdir: ENOTDIR
3116 * common_chdir: ENOENT No such file or directory
3117 * common_chdir: ???
3118 *
3119 */
3120 int
3121 chdir(proc_t p, struct chdir_args *uap, __unused int32_t *retval)
3122 {
3123 return common_chdir(p, (void *)uap, 0);
3124 }
3125
3126 /*
3127 * __pthread_chdir
3128 *
3129 * Change current working directory (".") for a single thread
3130 *
3131 * Parameters: p Process requesting the call
3132 * uap User argument descriptor (see below)
3133 * retval (ignored)
3134 *
3135 * Indirect parameters: uap->path Directory path
3136 *
3137 * Returns: 0 Success
3138 * common_chdir: ENOTDIR
3139 * common_chdir: ENOENT No such file or directory
3140 * common_chdir: ???
3141 *
3142 */
3143 int
3144 __pthread_chdir(proc_t p, struct __pthread_chdir_args *uap, __unused int32_t *retval)
3145 {
3146 return common_chdir(p, (void *)uap, 1);
3147 }
3148
3149
3150 /*
3151 * Change notion of root (``/'') directory.
3152 */
3153 /* ARGSUSED */
3154 int
3155 chroot(proc_t p, struct chroot_args *uap, __unused int32_t *retval)
3156 {
3157 struct filedesc *fdp = p->p_fd;
3158 int error;
3159 struct nameidata nd;
3160 vnode_t tvp;
3161 vfs_context_t ctx = vfs_context_current();
3162
3163 if ((error = suser(kauth_cred_get(), &p->p_acflag)))
3164 return (error);
3165
3166 NDINIT(&nd, LOOKUP, OP_CHROOT, FOLLOW | AUDITVNPATH1,
3167 UIO_USERSPACE, uap->path, ctx);
3168 error = change_dir(&nd, ctx);
3169 if (error)
3170 return (error);
3171
3172 #if CONFIG_MACF
3173 error = mac_vnode_check_chroot(ctx, nd.ni_vp,
3174 &nd.ni_cnd);
3175 if (error) {
3176 vnode_put(nd.ni_vp);
3177 return (error);
3178 }
3179 #endif
3180
3181 if ( (error = vnode_ref(nd.ni_vp)) ) {
3182 vnode_put(nd.ni_vp);
3183 return (error);
3184 }
3185 vnode_put(nd.ni_vp);
3186
3187 proc_fdlock(p);
3188 tvp = fdp->fd_rdir;
3189 fdp->fd_rdir = nd.ni_vp;
3190 fdp->fd_flags |= FD_CHROOT;
3191 proc_fdunlock(p);
3192
3193 if (tvp != NULL)
3194 vnode_rele(tvp);
3195
3196 return (0);
3197 }
3198
3199 /*
3200 * Common routine for chroot and chdir.
3201 *
3202 * Returns: 0 Success
3203 * ENOTDIR Not a directory
3204 * namei:??? [anything namei can return]
3205 * vnode_authorize:??? [anything vnode_authorize can return]
3206 */
3207 static int
3208 change_dir(struct nameidata *ndp, vfs_context_t ctx)
3209 {
3210 vnode_t vp;
3211 int error;
3212
3213 if ((error = namei(ndp)))
3214 return (error);
3215 nameidone(ndp);
3216 vp = ndp->ni_vp;
3217
3218 if (vp->v_type != VDIR) {
3219 vnode_put(vp);
3220 return (ENOTDIR);
3221 }
3222
3223 #if CONFIG_MACF
3224 error = mac_vnode_check_chdir(ctx, vp);
3225 if (error) {
3226 vnode_put(vp);
3227 return (error);
3228 }
3229 #endif
3230
3231 error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
3232 if (error) {
3233 vnode_put(vp);
3234 return (error);
3235 }
3236
3237 return (error);
3238 }
3239
3240 /*
3241 * Free the vnode data (for directories) associated with the file glob.
3242 */
3243 struct fd_vn_data *
3244 fg_vn_data_alloc(void)
3245 {
3246 struct fd_vn_data *fvdata;
3247
3248 /* Allocate per fd vnode data */
3249 MALLOC(fvdata, struct fd_vn_data *, (sizeof(struct fd_vn_data)),
3250 M_FD_VN_DATA, M_WAITOK | M_ZERO);
3251 lck_mtx_init(&fvdata->fv_lock, fd_vn_lck_grp, fd_vn_lck_attr);
3252 return fvdata;
3253 }
3254
3255 /*
3256 * Free the vnode data (for directories) associated with the file glob.
3257 */
3258 void
3259 fg_vn_data_free(void *fgvndata)
3260 {
3261 struct fd_vn_data *fvdata = (struct fd_vn_data *)fgvndata;
3262
3263 if (fvdata->fv_buf)
3264 FREE(fvdata->fv_buf, M_FD_DIRBUF);
3265 lck_mtx_destroy(&fvdata->fv_lock, fd_vn_lck_grp);
3266 FREE(fvdata, M_FD_VN_DATA);
3267 }
3268
3269 /*
3270 * Check permissions, allocate an open file structure,
3271 * and call the device open routine if any.
3272 *
3273 * Returns: 0 Success
3274 * EINVAL
3275 * EINTR
3276 * falloc:ENFILE
3277 * falloc:EMFILE
3278 * falloc:ENOMEM
3279 * vn_open_auth:???
3280 * dupfdopen:???
3281 * VNOP_ADVLOCK:???
3282 * vnode_setsize:???
3283 *
3284 * XXX Need to implement uid, gid
3285 */
3286 int
3287 open1(vfs_context_t ctx, struct nameidata *ndp, int uflags,
3288 struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra,
3289 int32_t *retval)
3290 {
3291 proc_t p = vfs_context_proc(ctx);
3292 uthread_t uu = get_bsdthread_info(vfs_context_thread(ctx));
3293 struct fileproc *fp;
3294 vnode_t vp;
3295 int flags, oflags;
3296 int type, indx, error;
3297 struct flock lf;
3298 int no_controlling_tty = 0;
3299 int deny_controlling_tty = 0;
3300 struct session *sessp = SESSION_NULL;
3301
3302 oflags = uflags;
3303
3304 if ((oflags & O_ACCMODE) == O_ACCMODE)
3305 return(EINVAL);
3306 flags = FFLAGS(uflags);
3307
3308 AUDIT_ARG(fflags, oflags);
3309 AUDIT_ARG(mode, vap->va_mode);
3310
3311 if ((error = falloc_withalloc(p,
3312 &fp, &indx, ctx, fp_zalloc, cra)) != 0) {
3313 return (error);
3314 }
3315 uu->uu_dupfd = -indx - 1;
3316
3317 if (!(p->p_flag & P_CONTROLT)) {
3318 sessp = proc_session(p);
3319 no_controlling_tty = 1;
3320 /*
3321 * If conditions would warrant getting a controlling tty if
3322 * the device being opened is a tty (see ttyopen in tty.c),
3323 * but the open flags deny it, set a flag in the session to
3324 * prevent it.
3325 */
3326 if (SESS_LEADER(p, sessp) &&
3327 sessp->s_ttyvp == NULL &&
3328 (flags & O_NOCTTY)) {
3329 session_lock(sessp);
3330 sessp->s_flags |= S_NOCTTY;
3331 session_unlock(sessp);
3332 deny_controlling_tty = 1;
3333 }
3334 }
3335
3336 if ((error = vn_open_auth(ndp, &flags, vap))) {
3337 if ((error == ENODEV || error == ENXIO) && (uu->uu_dupfd >= 0)){ /* XXX from fdopen */
3338 if ((error = dupfdopen(p->p_fd, indx, uu->uu_dupfd, flags, error)) == 0) {
3339 fp_drop(p, indx, NULL, 0);
3340 *retval = indx;
3341 if (deny_controlling_tty) {
3342 session_lock(sessp);
3343 sessp->s_flags &= ~S_NOCTTY;
3344 session_unlock(sessp);
3345 }
3346 if (sessp != SESSION_NULL)
3347 session_rele(sessp);
3348 return (0);
3349 }
3350 }
3351 if (error == ERESTART)
3352 error = EINTR;
3353 fp_free(p, indx, fp);
3354
3355 if (deny_controlling_tty) {
3356 session_lock(sessp);
3357 sessp->s_flags &= ~S_NOCTTY;
3358 session_unlock(sessp);
3359 }
3360 if (sessp != SESSION_NULL)
3361 session_rele(sessp);
3362 return (error);
3363 }
3364 uu->uu_dupfd = 0;
3365 vp = ndp->ni_vp;
3366
3367 fp->f_fglob->fg_flag = flags & (FMASK | O_EVTONLY);
3368 fp->f_fglob->fg_ops = &vnops;
3369 fp->f_fglob->fg_data = (caddr_t)vp;
3370
3371 #if CONFIG_PROTECT
3372 if (VATTR_IS_ACTIVE (vap, va_dataprotect_flags)) {
3373 if (vap->va_dataprotect_flags & VA_DP_RAWENCRYPTED) {
3374 fp->f_fglob->fg_flag |= FENCRYPTED;
3375 }
3376 }
3377 #endif
3378
3379 if (flags & (O_EXLOCK | O_SHLOCK)) {
3380 lf.l_whence = SEEK_SET;
3381 lf.l_start = 0;
3382 lf.l_len = 0;
3383 if (flags & O_EXLOCK)
3384 lf.l_type = F_WRLCK;
3385 else
3386 lf.l_type = F_RDLCK;
3387 type = F_FLOCK;
3388 if ((flags & FNONBLOCK) == 0)
3389 type |= F_WAIT;
3390 #if CONFIG_MACF
3391 error = mac_file_check_lock(vfs_context_ucred(ctx), fp->f_fglob,
3392 F_SETLK, &lf);
3393 if (error)
3394 goto bad;
3395 #endif
3396 if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx, NULL)))
3397 goto bad;
3398 fp->f_fglob->fg_flag |= FHASLOCK;
3399 }
3400
3401 /* try to truncate by setting the size attribute */
3402 if ((flags & O_TRUNC) && ((error = vnode_setsize(vp, (off_t)0, 0, ctx)) != 0))
3403 goto bad;
3404
3405 /*
3406 * If the open flags denied the acquisition of a controlling tty,
3407 * clear the flag in the session structure that prevented the lower
3408 * level code from assigning one.
3409 */
3410 if (deny_controlling_tty) {
3411 session_lock(sessp);
3412 sessp->s_flags &= ~S_NOCTTY;
3413 session_unlock(sessp);
3414 }
3415
3416 /*
3417 * If a controlling tty was set by the tty line discipline, then we
3418 * want to set the vp of the tty into the session structure. We have
3419 * a race here because we can't get to the vp for the tp in ttyopen,
3420 * because it's not passed as a parameter in the open path.
3421 */
3422 if (no_controlling_tty && (p->p_flag & P_CONTROLT)) {
3423 vnode_t ttyvp;
3424
3425 session_lock(sessp);
3426 ttyvp = sessp->s_ttyvp;
3427 sessp->s_ttyvp = vp;
3428 sessp->s_ttyvid = vnode_vid(vp);
3429 session_unlock(sessp);
3430 }
3431
3432 /*
3433 * For directories we hold some additional information in the fd.
3434 */
3435 if (vnode_vtype(vp) == VDIR) {
3436 fp->f_fglob->fg_vn_data = fg_vn_data_alloc();
3437 } else {
3438 fp->f_fglob->fg_vn_data = NULL;
3439 }
3440
3441 vnode_put(vp);
3442
3443 proc_fdlock(p);
3444 if (flags & O_CLOEXEC)
3445 *fdflags(p, indx) |= UF_EXCLOSE;
3446 if (flags & O_CLOFORK)
3447 *fdflags(p, indx) |= UF_FORKCLOSE;
3448 procfdtbl_releasefd(p, indx, NULL);
3449 fp_drop(p, indx, fp, 1);
3450 proc_fdunlock(p);
3451
3452 *retval = indx;
3453
3454 if (sessp != SESSION_NULL)
3455 session_rele(sessp);
3456 return (0);
3457 bad:
3458 if (deny_controlling_tty) {
3459 session_lock(sessp);
3460 sessp->s_flags &= ~S_NOCTTY;
3461 session_unlock(sessp);
3462 }
3463 if (sessp != SESSION_NULL)
3464 session_rele(sessp);
3465
3466 struct vfs_context context = *vfs_context_current();
3467 context.vc_ucred = fp->f_fglob->fg_cred;
3468
3469 if ((fp->f_fglob->fg_flag & FHASLOCK) &&
3470 (FILEGLOB_DTYPE(fp->f_fglob) == DTYPE_VNODE)) {
3471 lf.l_whence = SEEK_SET;
3472 lf.l_start = 0;
3473 lf.l_len = 0;
3474 lf.l_type = F_UNLCK;
3475
3476 (void)VNOP_ADVLOCK(
3477 vp, (caddr_t)fp->f_fglob, F_UNLCK, &lf, F_FLOCK, ctx, NULL);
3478 }
3479
3480 vn_close(vp, fp->f_fglob->fg_flag, &context);
3481 vnode_put(vp);
3482 fp_free(p, indx, fp);
3483
3484 return (error);
3485 }
3486
3487 /*
3488 * While most of the *at syscall handlers can call nameiat() which
3489 * is a wrapper around namei, the use of namei and initialisation
3490 * of nameidata are far removed and in different functions - namei
3491 * gets called in vn_open_auth for open1. So we'll just do here what
3492 * nameiat() does.
3493 */
3494 static int
3495 open1at(vfs_context_t ctx, struct nameidata *ndp, int uflags,
3496 struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra, int32_t *retval,
3497 int dirfd)
3498 {
3499 if ((dirfd != AT_FDCWD) && !(ndp->ni_cnd.cn_flags & USEDVP)) {
3500 int error;
3501 char c;
3502
3503 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
3504 error = copyin(ndp->ni_dirp, &c, sizeof(char));
3505 if (error)
3506 return (error);
3507 } else {
3508 c = *((char *)(ndp->ni_dirp));
3509 }
3510
3511 if (c != '/') {
3512 vnode_t dvp_at;
3513
3514 error = vnode_getfromfd(ndp->ni_cnd.cn_context, dirfd,
3515 &dvp_at);
3516 if (error)
3517 return (error);
3518
3519 if (vnode_vtype(dvp_at) != VDIR) {
3520 vnode_put(dvp_at);
3521 return (ENOTDIR);
3522 }
3523
3524 ndp->ni_dvp = dvp_at;
3525 ndp->ni_cnd.cn_flags |= USEDVP;
3526 error = open1(ctx, ndp, uflags, vap, fp_zalloc, cra,
3527 retval);
3528 vnode_put(dvp_at);
3529 return (error);
3530 }
3531 }
3532
3533 return (open1(ctx, ndp, uflags, vap, fp_zalloc, cra, retval));
3534 }
3535
3536 /*
3537 * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)).
3538 *
3539 * Parameters: p Process requesting the open
3540 * uap User argument descriptor (see below)
3541 * retval Pointer to an area to receive the
3542 * return calue from the system call
3543 *
3544 * Indirect: uap->path Path to open (same as 'open')
3545 * uap->flags Flags to open (same as 'open'
3546 * uap->uid UID to set, if creating
3547 * uap->gid GID to set, if creating
3548 * uap->mode File mode, if creating (same as 'open')
3549 * uap->xsecurity ACL to set, if creating
3550 *
3551 * Returns: 0 Success
3552 * !0 errno value
3553 *
3554 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
3555 *
3556 * XXX: We should enummerate the possible errno values here, and where
3557 * in the code they originated.
3558 */
3559 int
3560 open_extended(proc_t p, struct open_extended_args *uap, int32_t *retval)
3561 {
3562 struct filedesc *fdp = p->p_fd;
3563 int ciferror;
3564 kauth_filesec_t xsecdst;
3565 struct vnode_attr va;
3566 struct nameidata nd;
3567 int cmode;
3568
3569 AUDIT_ARG(owner, uap->uid, uap->gid);
3570
3571 xsecdst = NULL;
3572 if ((uap->xsecurity != USER_ADDR_NULL) &&
3573 ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
3574 return ciferror;
3575
3576 VATTR_INIT(&va);
3577 cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3578 VATTR_SET(&va, va_mode, cmode);
3579 if (uap->uid != KAUTH_UID_NONE)
3580 VATTR_SET(&va, va_uid, uap->uid);
3581 if (uap->gid != KAUTH_GID_NONE)
3582 VATTR_SET(&va, va_gid, uap->gid);
3583 if (xsecdst != NULL)
3584 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
3585
3586 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
3587 uap->path, vfs_context_current());
3588
3589 ciferror = open1(vfs_context_current(), &nd, uap->flags, &va,
3590 fileproc_alloc_init, NULL, retval);
3591 if (xsecdst != NULL)
3592 kauth_filesec_free(xsecdst);
3593
3594 return ciferror;
3595 }
3596
3597 /*
3598 * Go through the data-protected atomically controlled open (2)
3599 *
3600 * int open_dprotected_np(user_addr_t path, int flags, int class, int dpflags, int mode)
3601 */
3602 int open_dprotected_np (__unused proc_t p, struct open_dprotected_np_args *uap, int32_t *retval) {
3603 int flags = uap->flags;
3604 int class = uap->class;
3605 int dpflags = uap->dpflags;
3606
3607 /*
3608 * Follow the same path as normal open(2)
3609 * Look up the item if it exists, and acquire the vnode.
3610 */
3611 struct filedesc *fdp = p->p_fd;
3612 struct vnode_attr va;
3613 struct nameidata nd;
3614 int cmode;
3615 int error;
3616
3617 VATTR_INIT(&va);
3618 /* Mask off all but regular access permissions */
3619 cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3620 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
3621
3622 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
3623 uap->path, vfs_context_current());
3624
3625 /*
3626 * Initialize the extra fields in vnode_attr to pass down our
3627 * extra fields.
3628 * 1. target cprotect class.
3629 * 2. set a flag to mark it as requiring open-raw-encrypted semantics.
3630 */
3631 if (flags & O_CREAT) {
3632 VATTR_SET(&va, va_dataprotect_class, class);
3633 }
3634
3635 if (dpflags & O_DP_GETRAWENCRYPTED) {
3636 if ( flags & (O_RDWR | O_WRONLY)) {
3637 /* Not allowed to write raw encrypted bytes */
3638 return EINVAL;
3639 }
3640 VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED);
3641 }
3642
3643 error = open1(vfs_context_current(), &nd, uap->flags, &va,
3644 fileproc_alloc_init, NULL, retval);
3645
3646 return error;
3647 }
3648
3649 static int
3650 openat_internal(vfs_context_t ctx, user_addr_t path, int flags, int mode,
3651 int fd, enum uio_seg segflg, int *retval)
3652 {
3653 struct filedesc *fdp = (vfs_context_proc(ctx))->p_fd;
3654 struct vnode_attr va;
3655 struct nameidata nd;
3656 int cmode;
3657
3658 VATTR_INIT(&va);
3659 /* Mask off all but regular access permissions */
3660 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3661 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
3662
3663 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1,
3664 segflg, path, ctx);
3665
3666 return (open1at(ctx, &nd, flags, &va, fileproc_alloc_init, NULL,
3667 retval, fd));
3668 }
3669
3670 int
3671 open(proc_t p, struct open_args *uap, int32_t *retval)
3672 {
3673 __pthread_testcancel(1);
3674 return(open_nocancel(p, (struct open_nocancel_args *)uap, retval));
3675 }
3676
3677 int
3678 open_nocancel(__unused proc_t p, struct open_nocancel_args *uap,
3679 int32_t *retval)
3680 {
3681 return (openat_internal(vfs_context_current(), uap->path, uap->flags,
3682 uap->mode, AT_FDCWD, UIO_USERSPACE, retval));
3683 }
3684
3685 int
3686 openat_nocancel(__unused proc_t p, struct openat_nocancel_args *uap,
3687 int32_t *retval)
3688 {
3689 return (openat_internal(vfs_context_current(), uap->path, uap->flags,
3690 uap->mode, uap->fd, UIO_USERSPACE, retval));
3691 }
3692
3693 int
3694 openat(proc_t p, struct openat_args *uap, int32_t *retval)
3695 {
3696 __pthread_testcancel(1);
3697 return(openat_nocancel(p, (struct openat_nocancel_args *)uap, retval));
3698 }
3699
3700 /*
3701 * openbyid_np: open a file given a file system id and a file system object id
3702 * the hfs file system object id is an fsobj_id_t {uint32, uint32}
3703 * file systems that don't support object ids it is a node id (uint64_t).
3704 *
3705 * Parameters: p Process requesting the open
3706 * uap User argument descriptor (see below)
3707 * retval Pointer to an area to receive the
3708 * return calue from the system call
3709 *
3710 * Indirect: uap->path Path to open (same as 'open')
3711 *
3712 * uap->fsid id of target file system
3713 * uap->objid id of target file system object
3714 * uap->flags Flags to open (same as 'open')
3715 *
3716 * Returns: 0 Success
3717 * !0 errno value
3718 *
3719 *
3720 * XXX: We should enummerate the possible errno values here, and where
3721 * in the code they originated.
3722 */
3723 int
3724 openbyid_np(__unused proc_t p, struct openbyid_np_args *uap, int *retval)
3725 {
3726 fsid_t fsid;
3727 uint64_t objid;
3728 int error;
3729 char *buf = NULL;
3730 int buflen = MAXPATHLEN;
3731 int pathlen = 0;
3732 vfs_context_t ctx = vfs_context_current();
3733
3734 if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
3735 return (error);
3736 }
3737
3738 /*uap->obj is an fsobj_id_t defined as struct {uint32_t, uint32_t} */
3739 if ((error = copyin(uap->objid, (caddr_t)&objid, sizeof(uint64_t)))) {
3740 return (error);
3741 }
3742
3743 AUDIT_ARG(value32, fsid.val[0]);
3744 AUDIT_ARG(value64, objid);
3745
3746 /*resolve path from fsis, objid*/
3747 do {
3748 MALLOC(buf, char *, buflen + 1, M_TEMP, M_WAITOK);
3749 if (buf == NULL) {
3750 return (ENOMEM);
3751 }
3752
3753 error = fsgetpath_internal(
3754 ctx, fsid.val[0], objid,
3755 buflen, buf, &pathlen);
3756
3757 if (error) {
3758 FREE(buf, M_TEMP);
3759 buf = NULL;
3760 }
3761 } while (error == ENOSPC && (buflen += MAXPATHLEN));
3762
3763 if (error) {
3764 return error;
3765 }
3766
3767 buf[pathlen] = 0;
3768
3769 error = openat_internal(
3770 ctx, (user_addr_t)buf, uap->oflags, 0, AT_FDCWD, UIO_SYSSPACE, retval);
3771
3772 FREE(buf, M_TEMP);
3773
3774 return error;
3775 }
3776
3777
3778 /*
3779 * Create a special file.
3780 */
3781 static int mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap);
3782
3783 int
3784 mknod(proc_t p, struct mknod_args *uap, __unused int32_t *retval)
3785 {
3786 struct vnode_attr va;
3787 vfs_context_t ctx = vfs_context_current();
3788 int error;
3789 struct nameidata nd;
3790 vnode_t vp, dvp;
3791
3792 VATTR_INIT(&va);
3793 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
3794 VATTR_SET(&va, va_rdev, uap->dev);
3795
3796 /* If it's a mknod() of a FIFO, call mkfifo1() instead */
3797 if ((uap->mode & S_IFMT) == S_IFIFO)
3798 return(mkfifo1(ctx, uap->path, &va));
3799
3800 AUDIT_ARG(mode, uap->mode);
3801 AUDIT_ARG(value32, uap->dev);
3802
3803 if ((error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
3804 return (error);
3805 NDINIT(&nd, CREATE, OP_MKNOD, LOCKPARENT | AUDITVNPATH1,
3806 UIO_USERSPACE, uap->path, ctx);
3807 error = namei(&nd);
3808 if (error)
3809 return (error);
3810 dvp = nd.ni_dvp;
3811 vp = nd.ni_vp;
3812
3813 if (vp != NULL) {
3814 error = EEXIST;
3815 goto out;
3816 }
3817
3818 switch (uap->mode & S_IFMT) {
3819 case S_IFMT: /* used by badsect to flag bad sectors */
3820 VATTR_SET(&va, va_type, VBAD);
3821 break;
3822 case S_IFCHR:
3823 VATTR_SET(&va, va_type, VCHR);
3824 break;
3825 case S_IFBLK:
3826 VATTR_SET(&va, va_type, VBLK);
3827 break;
3828 default:
3829 error = EINVAL;
3830 goto out;
3831 }
3832
3833 #if CONFIG_MACF
3834 error = mac_vnode_check_create(ctx,
3835 nd.ni_dvp, &nd.ni_cnd, &va);
3836 if (error)
3837 goto out;
3838 #endif
3839
3840 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
3841 goto out;
3842
3843 if ((error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx)) != 0)
3844 goto out;
3845
3846 if (vp) {
3847 int update_flags = 0;
3848
3849 // Make sure the name & parent pointers are hooked up
3850 if (vp->v_name == NULL)
3851 update_flags |= VNODE_UPDATE_NAME;
3852 if (vp->v_parent == NULLVP)
3853 update_flags |= VNODE_UPDATE_PARENT;
3854
3855 if (update_flags)
3856 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
3857
3858 #if CONFIG_FSE
3859 add_fsevent(FSE_CREATE_FILE, ctx,
3860 FSE_ARG_VNODE, vp,
3861 FSE_ARG_DONE);
3862 #endif
3863 }
3864
3865 out:
3866 /*
3867 * nameidone has to happen before we vnode_put(dvp)
3868 * since it may need to release the fs_nodelock on the dvp
3869 */
3870 nameidone(&nd);
3871
3872 if (vp)
3873 vnode_put(vp);
3874 vnode_put(dvp);
3875
3876 return (error);
3877 }
3878
3879 /*
3880 * Create a named pipe.
3881 *
3882 * Returns: 0 Success
3883 * EEXIST
3884 * namei:???
3885 * vnode_authorize:???
3886 * vn_create:???
3887 */
3888 static int
3889 mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap)
3890 {
3891 vnode_t vp, dvp;
3892 int error;
3893 struct nameidata nd;
3894
3895 NDINIT(&nd, CREATE, OP_MKFIFO, LOCKPARENT | AUDITVNPATH1,
3896 UIO_USERSPACE, upath, ctx);
3897 error = namei(&nd);
3898 if (error)
3899 return (error);
3900 dvp = nd.ni_dvp;
3901 vp = nd.ni_vp;
3902
3903 /* check that this is a new file and authorize addition */
3904 if (vp != NULL) {
3905 error = EEXIST;
3906 goto out;
3907 }
3908 VATTR_SET(vap, va_type, VFIFO);
3909
3910 if ((error = vn_authorize_create(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0)
3911 goto out;
3912
3913 error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx);
3914 out:
3915 /*
3916 * nameidone has to happen before we vnode_put(dvp)
3917 * since it may need to release the fs_nodelock on the dvp
3918 */
3919 nameidone(&nd);
3920
3921 if (vp)
3922 vnode_put(vp);
3923 vnode_put(dvp);
3924
3925 return error;
3926 }
3927
3928
3929 /*
3930 * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)).
3931 *
3932 * Parameters: p Process requesting the open
3933 * uap User argument descriptor (see below)
3934 * retval (Ignored)
3935 *
3936 * Indirect: uap->path Path to fifo (same as 'mkfifo')
3937 * uap->uid UID to set
3938 * uap->gid GID to set
3939 * uap->mode File mode to set (same as 'mkfifo')
3940 * uap->xsecurity ACL to set, if creating
3941 *
3942 * Returns: 0 Success
3943 * !0 errno value
3944 *
3945 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
3946 *
3947 * XXX: We should enummerate the possible errno values here, and where
3948 * in the code they originated.
3949 */
3950 int
3951 mkfifo_extended(proc_t p, struct mkfifo_extended_args *uap, __unused int32_t *retval)
3952 {
3953 int ciferror;
3954 kauth_filesec_t xsecdst;
3955 struct vnode_attr va;
3956
3957 AUDIT_ARG(owner, uap->uid, uap->gid);
3958
3959 xsecdst = KAUTH_FILESEC_NONE;
3960 if (uap->xsecurity != USER_ADDR_NULL) {
3961 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
3962 return ciferror;
3963 }
3964
3965 VATTR_INIT(&va);
3966 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
3967 if (uap->uid != KAUTH_UID_NONE)
3968 VATTR_SET(&va, va_uid, uap->uid);
3969 if (uap->gid != KAUTH_GID_NONE)
3970 VATTR_SET(&va, va_gid, uap->gid);
3971 if (xsecdst != KAUTH_FILESEC_NONE)
3972 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
3973
3974 ciferror = mkfifo1(vfs_context_current(), uap->path, &va);
3975
3976 if (xsecdst != KAUTH_FILESEC_NONE)
3977 kauth_filesec_free(xsecdst);
3978 return ciferror;
3979 }
3980
3981 /* ARGSUSED */
3982 int
3983 mkfifo(proc_t p, struct mkfifo_args *uap, __unused int32_t *retval)
3984 {
3985 struct vnode_attr va;
3986
3987 VATTR_INIT(&va);
3988 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
3989
3990 return(mkfifo1(vfs_context_current(), uap->path, &va));
3991 }
3992
3993
3994 static char *
3995 my_strrchr(char *p, int ch)
3996 {
3997 char *save;
3998
3999 for (save = NULL;; ++p) {
4000 if (*p == ch)
4001 save = p;
4002 if (!*p)
4003 return(save);
4004 }
4005 /* NOTREACHED */
4006 }
4007
4008 extern int safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path);
4009
4010 int
4011 safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path)
4012 {
4013 int ret, len = _len;
4014
4015 *truncated_path = 0;
4016 ret = vn_getpath(dvp, path, &len);
4017 if (ret == 0 && len < (MAXPATHLEN - 1)) {
4018 if (leafname) {
4019 path[len-1] = '/';
4020 len += strlcpy(&path[len], leafname, MAXPATHLEN-len) + 1;
4021 if (len > MAXPATHLEN) {
4022 char *ptr;
4023
4024 // the string got truncated!
4025 *truncated_path = 1;
4026 ptr = my_strrchr(path, '/');
4027 if (ptr) {
4028 *ptr = '\0'; // chop off the string at the last directory component
4029 }
4030 len = strlen(path) + 1;
4031 }
4032 }
4033 } else if (ret == 0) {
4034 *truncated_path = 1;
4035 } else if (ret != 0) {
4036 struct vnode *mydvp=dvp;
4037
4038 if (ret != ENOSPC) {
4039 printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n",
4040 dvp, dvp->v_name ? dvp->v_name : "no-name", ret);
4041 }
4042 *truncated_path = 1;
4043
4044 do {
4045 if (mydvp->v_parent != NULL) {
4046 mydvp = mydvp->v_parent;
4047 } else if (mydvp->v_mount) {
4048 strlcpy(path, mydvp->v_mount->mnt_vfsstat.f_mntonname, _len);
4049 break;
4050 } else {
4051 // no parent and no mount point? only thing is to punt and say "/" changed
4052 strlcpy(path, "/", _len);
4053 len = 2;
4054 mydvp = NULL;
4055 }
4056
4057 if (mydvp == NULL) {
4058 break;
4059 }
4060
4061 len = _len;
4062 ret = vn_getpath(mydvp, path, &len);
4063 } while (ret == ENOSPC);
4064 }
4065
4066 return len;
4067 }
4068
4069
4070 /*
4071 * Make a hard file link.
4072 *
4073 * Returns: 0 Success
4074 * EPERM
4075 * EEXIST
4076 * EXDEV
4077 * namei:???
4078 * vnode_authorize:???
4079 * VNOP_LINK:???
4080 */
4081 /* ARGSUSED */
4082 static int
4083 linkat_internal(vfs_context_t ctx, int fd1, user_addr_t path, int fd2,
4084 user_addr_t link, int flag, enum uio_seg segflg)
4085 {
4086 vnode_t vp, dvp, lvp;
4087 struct nameidata nd;
4088 int follow;
4089 int error;
4090 #if CONFIG_FSE
4091 fse_info finfo;
4092 #endif
4093 int need_event, has_listeners;
4094 char *target_path = NULL;
4095 int truncated=0;
4096
4097 vp = dvp = lvp = NULLVP;
4098
4099 /* look up the object we are linking to */
4100 follow = (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW;
4101 NDINIT(&nd, LOOKUP, OP_LOOKUP, AUDITVNPATH1 | follow,
4102 segflg, path, ctx);
4103
4104 error = nameiat(&nd, fd1);
4105 if (error)
4106 return (error);
4107 vp = nd.ni_vp;
4108
4109 nameidone(&nd);
4110
4111 /*
4112 * Normally, linking to directories is not supported.
4113 * However, some file systems may have limited support.
4114 */
4115 if (vp->v_type == VDIR) {
4116 if (!(vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSDIRLINKS)) {
4117 error = EPERM; /* POSIX */
4118 goto out;
4119 }
4120 /* Linking to a directory requires ownership. */
4121 if (!kauth_cred_issuser(vfs_context_ucred(ctx))) {
4122 struct vnode_attr dva;
4123
4124 VATTR_INIT(&dva);
4125 VATTR_WANTED(&dva, va_uid);
4126 if (vnode_getattr(vp, &dva, ctx) != 0 ||
4127 !VATTR_IS_SUPPORTED(&dva, va_uid) ||
4128 (dva.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)))) {
4129 error = EACCES;
4130 goto out;
4131 }
4132 }
4133 }
4134
4135 /* lookup the target node */
4136 #if CONFIG_TRIGGERS
4137 nd.ni_op = OP_LINK;
4138 #endif
4139 nd.ni_cnd.cn_nameiop = CREATE;
4140 nd.ni_cnd.cn_flags = LOCKPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK;
4141 nd.ni_dirp = link;
4142 error = nameiat(&nd, fd2);
4143 if (error != 0)
4144 goto out;
4145 dvp = nd.ni_dvp;
4146 lvp = nd.ni_vp;
4147
4148 #if CONFIG_MACF
4149 if ((error = mac_vnode_check_link(ctx, dvp, vp, &nd.ni_cnd)) != 0)
4150 goto out2;
4151 #endif
4152
4153 /* or to anything that kauth doesn't want us to (eg. immutable items) */
4154 if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_LINKTARGET, ctx)) != 0)
4155 goto out2;
4156
4157 /* target node must not exist */
4158 if (lvp != NULLVP) {
4159 error = EEXIST;
4160 goto out2;
4161 }
4162 /* cannot link across mountpoints */
4163 if (vnode_mount(vp) != vnode_mount(dvp)) {
4164 error = EXDEV;
4165 goto out2;
4166 }
4167
4168 /* authorize creation of the target note */
4169 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
4170 goto out2;
4171
4172 /* and finally make the link */
4173 error = VNOP_LINK(vp, dvp, &nd.ni_cnd, ctx);
4174 if (error)
4175 goto out2;
4176
4177 #if CONFIG_MACF
4178 (void)mac_vnode_notify_link(ctx, vp, dvp, &nd.ni_cnd);
4179 #endif
4180
4181 #if CONFIG_FSE
4182 need_event = need_fsevent(FSE_CREATE_FILE, dvp);
4183 #else
4184 need_event = 0;
4185 #endif
4186 has_listeners = kauth_authorize_fileop_has_listeners();
4187
4188 if (need_event || has_listeners) {
4189 char *link_to_path = NULL;
4190 int len, link_name_len;
4191
4192 /* build the path to the new link file */
4193 GET_PATH(target_path);
4194 if (target_path == NULL) {
4195 error = ENOMEM;
4196 goto out2;
4197 }
4198
4199 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, target_path, MAXPATHLEN, &truncated);
4200
4201 if (has_listeners) {
4202 /* build the path to file we are linking to */
4203 GET_PATH(link_to_path);
4204 if (link_to_path == NULL) {
4205 error = ENOMEM;
4206 goto out2;
4207 }
4208
4209 link_name_len = MAXPATHLEN;
4210 if (vn_getpath(vp, link_to_path, &link_name_len) == 0) {
4211 /*
4212 * Call out to allow 3rd party notification of rename.
4213 * Ignore result of kauth_authorize_fileop call.
4214 */
4215 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_LINK,
4216 (uintptr_t)link_to_path,
4217 (uintptr_t)target_path);
4218 }
4219 if (link_to_path != NULL) {
4220 RELEASE_PATH(link_to_path);
4221 }
4222 }
4223 #if CONFIG_FSE
4224 if (need_event) {
4225 /* construct fsevent */
4226 if (get_fse_info(vp, &finfo, ctx) == 0) {
4227 if (truncated) {
4228 finfo.mode |= FSE_TRUNCATED_PATH;
4229 }
4230
4231 // build the path to the destination of the link
4232 add_fsevent(FSE_CREATE_FILE, ctx,
4233 FSE_ARG_STRING, len, target_path,
4234 FSE_ARG_FINFO, &finfo,
4235 FSE_ARG_DONE);
4236 }
4237 if (vp->v_parent) {
4238 add_fsevent(FSE_STAT_CHANGED, ctx,
4239 FSE_ARG_VNODE, vp->v_parent,
4240 FSE_ARG_DONE);
4241 }
4242 }
4243 #endif
4244 }
4245 out2:
4246 /*
4247 * nameidone has to happen before we vnode_put(dvp)
4248 * since it may need to release the fs_nodelock on the dvp
4249 */
4250 nameidone(&nd);
4251 if (target_path != NULL) {
4252 RELEASE_PATH(target_path);
4253 }
4254 out:
4255 if (lvp)
4256 vnode_put(lvp);
4257 if (dvp)
4258 vnode_put(dvp);
4259 vnode_put(vp);
4260 return (error);
4261 }
4262
4263 int
4264 link(__unused proc_t p, struct link_args *uap, __unused int32_t *retval)
4265 {
4266 return (linkat_internal(vfs_context_current(), AT_FDCWD, uap->path,
4267 AT_FDCWD, uap->link, AT_SYMLINK_FOLLOW, UIO_USERSPACE));
4268 }
4269
4270 int
4271 linkat(__unused proc_t p, struct linkat_args *uap, __unused int32_t *retval)
4272 {
4273 if (uap->flag & ~AT_SYMLINK_FOLLOW)
4274 return (EINVAL);
4275
4276 return (linkat_internal(vfs_context_current(), uap->fd1, uap->path,
4277 uap->fd2, uap->link, uap->flag, UIO_USERSPACE));
4278 }
4279
4280 /*
4281 * Make a symbolic link.
4282 *
4283 * We could add support for ACLs here too...
4284 */
4285 /* ARGSUSED */
4286 static int
4287 symlinkat_internal(vfs_context_t ctx, user_addr_t path_data, int fd,
4288 user_addr_t link, enum uio_seg segflg)
4289 {
4290 struct vnode_attr va;
4291 char *path;
4292 int error;
4293 struct nameidata nd;
4294 vnode_t vp, dvp;
4295 uint32_t dfflags; // Directory file flags
4296 size_t dummy=0;
4297 proc_t p;
4298
4299 error = 0;
4300 if (UIO_SEG_IS_USER_SPACE(segflg)) {
4301 MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
4302 error = copyinstr(path_data, path, MAXPATHLEN, &dummy);
4303 } else {
4304 path = (char *)path_data;
4305 }
4306 if (error)
4307 goto out;
4308 AUDIT_ARG(text, path); /* This is the link string */
4309
4310 NDINIT(&nd, CREATE, OP_SYMLINK, LOCKPARENT | AUDITVNPATH1,
4311 segflg, link, ctx);
4312
4313 error = nameiat(&nd, fd);
4314 if (error)
4315 goto out;
4316 dvp = nd.ni_dvp;
4317 vp = nd.ni_vp;
4318
4319 p = vfs_context_proc(ctx);
4320 VATTR_INIT(&va);
4321 VATTR_SET(&va, va_type, VLNK);
4322 VATTR_SET(&va, va_mode, ACCESSPERMS & ~p->p_fd->fd_cmask);
4323
4324 /*
4325 * Handle inheritance of restricted flag
4326 */
4327 error = vnode_flags(dvp, &dfflags, ctx);
4328 if (error)
4329 goto skipit;
4330 if (dfflags & SF_RESTRICTED)
4331 VATTR_SET(&va, va_flags, SF_RESTRICTED);
4332
4333 #if CONFIG_MACF
4334 error = mac_vnode_check_create(ctx,
4335 dvp, &nd.ni_cnd, &va);
4336 #endif
4337 if (error != 0) {
4338 goto skipit;
4339 }
4340
4341 if (vp != NULL) {
4342 error = EEXIST;
4343 goto skipit;
4344 }
4345
4346 /* authorize */
4347 if (error == 0)
4348 error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
4349 /* get default ownership, etc. */
4350 if (error == 0)
4351 error = vnode_authattr_new(dvp, &va, 0, ctx);
4352 if (error == 0)
4353 error = VNOP_SYMLINK(dvp, &vp, &nd.ni_cnd, &va, path, ctx);
4354
4355 #if CONFIG_MACF
4356 if (error == 0)
4357 error = vnode_label(vnode_mount(vp), dvp, vp, &nd.ni_cnd, VNODE_LABEL_CREATE, ctx);
4358 #endif
4359
4360 /* do fallback attribute handling */
4361 if (error == 0)
4362 error = vnode_setattr_fallback(vp, &va, ctx);
4363
4364 if (error == 0) {
4365 int update_flags = 0;
4366
4367 if (vp == NULL) {
4368 nd.ni_cnd.cn_nameiop = LOOKUP;
4369 #if CONFIG_TRIGGERS
4370 nd.ni_op = OP_LOOKUP;
4371 #endif
4372 nd.ni_cnd.cn_flags = 0;
4373 error = nameiat(&nd, fd);
4374 vp = nd.ni_vp;
4375
4376 if (vp == NULL)
4377 goto skipit;
4378 }
4379
4380 #if 0 /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
4381 /* call out to allow 3rd party notification of rename.
4382 * Ignore result of kauth_authorize_fileop call.
4383 */
4384 if (kauth_authorize_fileop_has_listeners() &&
4385 namei(&nd) == 0) {
4386 char *new_link_path = NULL;
4387 int len;
4388
4389 /* build the path to the new link file */
4390 new_link_path = get_pathbuff();
4391 len = MAXPATHLEN;
4392 vn_getpath(dvp, new_link_path, &len);
4393 if ((len + 1 + nd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) {
4394 new_link_path[len - 1] = '/';
4395 strlcpy(&new_link_path[len], nd.ni_cnd.cn_nameptr, MAXPATHLEN-len);
4396 }
4397
4398 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_SYMLINK,
4399 (uintptr_t)path, (uintptr_t)new_link_path);
4400 if (new_link_path != NULL)
4401 release_pathbuff(new_link_path);
4402 }
4403 #endif
4404 // Make sure the name & parent pointers are hooked up
4405 if (vp->v_name == NULL)
4406 update_flags |= VNODE_UPDATE_NAME;
4407 if (vp->v_parent == NULLVP)
4408 update_flags |= VNODE_UPDATE_PARENT;
4409
4410 if (update_flags)
4411 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
4412
4413 #if CONFIG_FSE
4414 add_fsevent(FSE_CREATE_FILE, ctx,
4415 FSE_ARG_VNODE, vp,
4416 FSE_ARG_DONE);
4417 #endif
4418 }
4419
4420 skipit:
4421 /*
4422 * nameidone has to happen before we vnode_put(dvp)
4423 * since it may need to release the fs_nodelock on the dvp
4424 */
4425 nameidone(&nd);
4426
4427 if (vp)
4428 vnode_put(vp);
4429 vnode_put(dvp);
4430 out:
4431 if (path && (path != (char *)path_data))
4432 FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
4433
4434 return (error);
4435 }
4436
4437 int
4438 symlink(__unused proc_t p, struct symlink_args *uap, __unused int32_t *retval)
4439 {
4440 return (symlinkat_internal(vfs_context_current(), uap->path, AT_FDCWD,
4441 uap->link, UIO_USERSPACE));
4442 }
4443
4444 int
4445 symlinkat(__unused proc_t p, struct symlinkat_args *uap,
4446 __unused int32_t *retval)
4447 {
4448 return (symlinkat_internal(vfs_context_current(), uap->path1, uap->fd,
4449 uap->path2, UIO_USERSPACE));
4450 }
4451
4452 /*
4453 * Delete a whiteout from the filesystem.
4454 * No longer supported.
4455 */
4456 int
4457 undelete(__unused proc_t p, __unused struct undelete_args *uap, __unused int32_t *retval)
4458 {
4459 return (ENOTSUP);
4460 }
4461
4462 /*
4463 * Delete a name from the filesystem.
4464 */
4465 /* ARGSUSED */
4466 static int
4467 unlinkat_internal(vfs_context_t ctx, int fd, vnode_t start_dvp,
4468 user_addr_t path_arg, enum uio_seg segflg, int unlink_flags)
4469 {
4470 struct nameidata nd;
4471 vnode_t vp, dvp;
4472 int error;
4473 struct componentname *cnp;
4474 char *path = NULL;
4475 int len=0;
4476 #if CONFIG_FSE
4477 fse_info finfo;
4478 struct vnode_attr va;
4479 #endif
4480 int flags;
4481 int need_event;
4482 int has_listeners;
4483 int truncated_path;
4484 int batched;
4485 struct vnode_attr *vap;
4486 int do_retry;
4487 int retry_count = 0;
4488 int cn_flags;
4489
4490 cn_flags = LOCKPARENT;
4491 if (!(unlink_flags & VNODE_REMOVE_NO_AUDIT_PATH))
4492 cn_flags |= AUDITVNPATH1;
4493 /* If a starting dvp is passed, it trumps any fd passed. */
4494 if (start_dvp)
4495 cn_flags |= USEDVP;
4496
4497 #if NAMEDRSRCFORK
4498 /* unlink or delete is allowed on rsrc forks and named streams */
4499 cn_flags |= CN_ALLOWRSRCFORK;
4500 #endif
4501
4502 retry:
4503 do_retry = 0;
4504 flags = 0;
4505 need_event = 0;
4506 has_listeners = 0;
4507 truncated_path = 0;
4508 vap = NULL;
4509
4510 NDINIT(&nd, DELETE, OP_UNLINK, cn_flags, segflg, path_arg, ctx);
4511
4512 nd.ni_dvp = start_dvp;
4513 nd.ni_flag |= NAMEI_COMPOUNDREMOVE;
4514 cnp = &nd.ni_cnd;
4515
4516 lookup_continue:
4517 error = nameiat(&nd, fd);
4518 if (error)
4519 return (error);
4520
4521 dvp = nd.ni_dvp;
4522 vp = nd.ni_vp;
4523
4524
4525 /* With Carbon delete semantics, busy files cannot be deleted */
4526 if (unlink_flags & VNODE_REMOVE_NODELETEBUSY) {
4527 flags |= VNODE_REMOVE_NODELETEBUSY;
4528 }
4529
4530 /* Skip any potential upcalls if told to. */
4531 if (unlink_flags & VNODE_REMOVE_SKIP_NAMESPACE_EVENT) {
4532 flags |= VNODE_REMOVE_SKIP_NAMESPACE_EVENT;
4533 }
4534
4535 if (vp) {
4536 batched = vnode_compound_remove_available(vp);
4537 /*
4538 * The root of a mounted filesystem cannot be deleted.
4539 */
4540 if (vp->v_flag & VROOT) {
4541 error = EBUSY;
4542 }
4543
4544 if (!batched) {
4545 error = vn_authorize_unlink(dvp, vp, cnp, ctx, NULL);
4546 if (error) {
4547 if (error == ENOENT &&
4548 retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
4549 do_retry = 1;
4550 retry_count++;
4551 }
4552 goto out;
4553 }
4554 }
4555 } else {
4556 batched = 1;
4557
4558 if (!vnode_compound_remove_available(dvp)) {
4559 panic("No vp, but no compound remove?");
4560 }
4561 }
4562
4563 #if CONFIG_FSE
4564 need_event = need_fsevent(FSE_DELETE, dvp);
4565 if (need_event) {
4566 if (!batched) {
4567 if ((vp->v_flag & VISHARDLINK) == 0) {
4568 /* XXX need to get these data in batched VNOP */
4569 get_fse_info(vp, &finfo, ctx);
4570 }
4571 } else {
4572 error = vfs_get_notify_attributes(&va);
4573 if (error) {
4574 goto out;
4575 }
4576
4577 vap = &va;
4578 }
4579 }
4580 #endif
4581 has_listeners = kauth_authorize_fileop_has_listeners();
4582 if (need_event || has_listeners) {
4583 if (path == NULL) {
4584 GET_PATH(path);
4585 if (path == NULL) {
4586 error = ENOMEM;
4587 goto out;
4588 }
4589 }
4590 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated_path);
4591 }
4592
4593 #if NAMEDRSRCFORK
4594 if (nd.ni_cnd.cn_flags & CN_WANTSRSRCFORK)
4595 error = vnode_removenamedstream(dvp, vp, XATTR_RESOURCEFORK_NAME, 0, ctx);
4596 else
4597 #endif
4598 {
4599 error = vn_remove(dvp, &nd.ni_vp, &nd, flags, vap, ctx);
4600 vp = nd.ni_vp;
4601 if (error == EKEEPLOOKING) {
4602 if (!batched) {
4603 panic("EKEEPLOOKING, but not a filesystem that supports compound VNOPs?");
4604 }
4605
4606 if ((nd.ni_flag & NAMEI_CONTLOOKUP) == 0) {
4607 panic("EKEEPLOOKING, but continue flag not set?");
4608 }
4609
4610 if (vnode_isdir(vp)) {
4611 error = EISDIR;
4612 goto out;
4613 }
4614 goto lookup_continue;
4615 } else if (error == ENOENT && batched &&
4616 retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
4617 /*
4618 * For compound VNOPs, the authorization callback may
4619 * return ENOENT in case of racing hardlink lookups
4620 * hitting the name cache, redrive the lookup.
4621 */
4622 do_retry = 1;
4623 retry_count += 1;
4624 goto out;
4625 }
4626 }
4627
4628 /*
4629 * Call out to allow 3rd party notification of delete.
4630 * Ignore result of kauth_authorize_fileop call.
4631 */
4632 if (!error) {
4633 if (has_listeners) {
4634 kauth_authorize_fileop(vfs_context_ucred(ctx),
4635 KAUTH_FILEOP_DELETE,
4636 (uintptr_t)vp,
4637 (uintptr_t)path);
4638 }
4639
4640 if (vp->v_flag & VISHARDLINK) {
4641 //
4642 // if a hardlink gets deleted we want to blow away the
4643 // v_parent link because the path that got us to this
4644 // instance of the link is no longer valid. this will
4645 // force the next call to get the path to ask the file
4646 // system instead of just following the v_parent link.
4647 //
4648 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
4649 }
4650
4651 #if CONFIG_FSE
4652 if (need_event) {
4653 if (vp->v_flag & VISHARDLINK) {
4654 get_fse_info(vp, &finfo, ctx);
4655 } else if (vap) {
4656 vnode_get_fse_info_from_vap(vp, &finfo, vap);
4657 }
4658 if (truncated_path) {
4659 finfo.mode |= FSE_TRUNCATED_PATH;
4660 }
4661 add_fsevent(FSE_DELETE, ctx,
4662 FSE_ARG_STRING, len, path,
4663 FSE_ARG_FINFO, &finfo,
4664 FSE_ARG_DONE);
4665 }
4666 #endif
4667 }
4668
4669 out:
4670 if (path != NULL)
4671 RELEASE_PATH(path);
4672
4673 #if NAMEDRSRCFORK
4674 /* recycle the deleted rsrc fork vnode to force a reclaim, which
4675 * will cause its shadow file to go away if necessary.
4676 */
4677 if (vp && (vnode_isnamedstream(vp)) &&
4678 (vp->v_parent != NULLVP) &&
4679 vnode_isshadow(vp)) {
4680 vnode_recycle(vp);
4681 }
4682 #endif
4683 /*
4684 * nameidone has to happen before we vnode_put(dvp)
4685 * since it may need to release the fs_nodelock on the dvp
4686 */
4687 nameidone(&nd);
4688 vnode_put(dvp);
4689 if (vp) {
4690 vnode_put(vp);
4691 }
4692
4693 if (do_retry) {
4694 goto retry;
4695 }
4696
4697 return (error);
4698 }
4699
4700 int
4701 unlink1(vfs_context_t ctx, vnode_t start_dvp, user_addr_t path_arg,
4702 enum uio_seg segflg, int unlink_flags)
4703 {
4704 return (unlinkat_internal(ctx, AT_FDCWD, start_dvp, path_arg, segflg,
4705 unlink_flags));
4706 }
4707
4708 /*
4709 * Delete a name from the filesystem using Carbon semantics.
4710 */
4711 int
4712 delete(__unused proc_t p, struct delete_args *uap, __unused int32_t *retval)
4713 {
4714 return (unlinkat_internal(vfs_context_current(), AT_FDCWD, NULLVP,
4715 uap->path, UIO_USERSPACE, VNODE_REMOVE_NODELETEBUSY));
4716 }
4717
4718 /*
4719 * Delete a name from the filesystem using POSIX semantics.
4720 */
4721 int
4722 unlink(__unused proc_t p, struct unlink_args *uap, __unused int32_t *retval)
4723 {
4724 return (unlinkat_internal(vfs_context_current(), AT_FDCWD, NULLVP,
4725 uap->path, UIO_USERSPACE, 0));
4726 }
4727
4728 int
4729 unlinkat(__unused proc_t p, struct unlinkat_args *uap, __unused int32_t *retval)
4730 {
4731 if (uap->flag & ~AT_REMOVEDIR)
4732 return (EINVAL);
4733
4734 if (uap->flag & AT_REMOVEDIR)
4735 return (rmdirat_internal(vfs_context_current(), uap->fd,
4736 uap->path, UIO_USERSPACE));
4737 else
4738 return (unlinkat_internal(vfs_context_current(), uap->fd,
4739 NULLVP, uap->path, UIO_USERSPACE, 0));
4740 }
4741
4742 /*
4743 * Reposition read/write file offset.
4744 */
4745 int
4746 lseek(proc_t p, struct lseek_args *uap, off_t *retval)
4747 {
4748 struct fileproc *fp;
4749 vnode_t vp;
4750 struct vfs_context *ctx;
4751 off_t offset = uap->offset, file_size;
4752 int error;
4753
4754 if ( (error = fp_getfvp(p,uap->fd, &fp, &vp)) ) {
4755 if (error == ENOTSUP)
4756 return (ESPIPE);
4757 return (error);
4758 }
4759 if (vnode_isfifo(vp)) {
4760 file_drop(uap->fd);
4761 return(ESPIPE);
4762 }
4763
4764
4765 ctx = vfs_context_current();
4766 #if CONFIG_MACF
4767 if (uap->whence == L_INCR && uap->offset == 0)
4768 error = mac_file_check_get_offset(vfs_context_ucred(ctx),
4769 fp->f_fglob);
4770 else
4771 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
4772 fp->f_fglob);
4773 if (error) {
4774 file_drop(uap->fd);
4775 return (error);
4776 }
4777 #endif
4778 if ( (error = vnode_getwithref(vp)) ) {
4779 file_drop(uap->fd);
4780 return(error);
4781 }
4782
4783 switch (uap->whence) {
4784 case L_INCR:
4785 offset += fp->f_fglob->fg_offset;
4786 break;
4787 case L_XTND:
4788 if ((error = vnode_size(vp, &file_size, ctx)) != 0)
4789 break;
4790 offset += file_size;
4791 break;
4792 case L_SET:
4793 break;
4794 default:
4795 error = EINVAL;
4796 }
4797 if (error == 0) {
4798 if (uap->offset > 0 && offset < 0) {
4799 /* Incremented/relative move past max size */
4800 error = EOVERFLOW;
4801 } else {
4802 /*
4803 * Allow negative offsets on character devices, per
4804 * POSIX 1003.1-2001. Most likely for writing disk
4805 * labels.
4806 */
4807 if (offset < 0 && vp->v_type != VCHR) {
4808 /* Decremented/relative move before start */
4809 error = EINVAL;
4810 } else {
4811 /* Success */
4812 fp->f_fglob->fg_offset = offset;
4813 *retval = fp->f_fglob->fg_offset;
4814 }
4815 }
4816 }
4817
4818 /*
4819 * An lseek can affect whether data is "available to read." Use
4820 * hint of NOTE_NONE so no EVFILT_VNODE events fire
4821 */
4822 post_event_if_success(vp, error, NOTE_NONE);
4823 (void)vnode_put(vp);
4824 file_drop(uap->fd);
4825 return (error);
4826 }
4827
4828
4829 /*
4830 * Check access permissions.
4831 *
4832 * Returns: 0 Success
4833 * vnode_authorize:???
4834 */
4835 static int
4836 access1(vnode_t vp, vnode_t dvp, int uflags, vfs_context_t ctx)
4837 {
4838 kauth_action_t action;
4839 int error;
4840
4841 /*
4842 * If just the regular access bits, convert them to something
4843 * that vnode_authorize will understand.
4844 */
4845 if (!(uflags & _ACCESS_EXTENDED_MASK)) {
4846 action = 0;
4847 if (uflags & R_OK)
4848 action |= KAUTH_VNODE_READ_DATA; /* aka KAUTH_VNODE_LIST_DIRECTORY */
4849 if (uflags & W_OK) {
4850 if (vnode_isdir(vp)) {
4851 action |= KAUTH_VNODE_ADD_FILE |
4852 KAUTH_VNODE_ADD_SUBDIRECTORY;
4853 /* might want delete rights here too */
4854 } else {
4855 action |= KAUTH_VNODE_WRITE_DATA;
4856 }
4857 }
4858 if (uflags & X_OK) {
4859 if (vnode_isdir(vp)) {
4860 action |= KAUTH_VNODE_SEARCH;
4861 } else {
4862 action |= KAUTH_VNODE_EXECUTE;
4863 }
4864 }
4865 } else {
4866 /* take advantage of definition of uflags */
4867 action = uflags >> 8;
4868 }
4869
4870 #if CONFIG_MACF
4871 error = mac_vnode_check_access(ctx, vp, uflags);
4872 if (error)
4873 return (error);
4874 #endif /* MAC */
4875
4876 /* action == 0 means only check for existence */
4877 if (action != 0) {
4878 error = vnode_authorize(vp, dvp, action | KAUTH_VNODE_ACCESS, ctx);
4879 } else {
4880 error = 0;
4881 }
4882
4883 return(error);
4884 }
4885
4886
4887
4888 /*
4889 * access_extended: Check access permissions in bulk.
4890 *
4891 * Description: uap->entries Pointer to an array of accessx
4892 * descriptor structs, plus one or
4893 * more NULL terminated strings (see
4894 * "Notes" section below).
4895 * uap->size Size of the area pointed to by
4896 * uap->entries.
4897 * uap->results Pointer to the results array.
4898 *
4899 * Returns: 0 Success
4900 * ENOMEM Insufficient memory
4901 * EINVAL Invalid arguments
4902 * namei:EFAULT Bad address
4903 * namei:ENAMETOOLONG Filename too long
4904 * namei:ENOENT No such file or directory
4905 * namei:ELOOP Too many levels of symbolic links
4906 * namei:EBADF Bad file descriptor
4907 * namei:ENOTDIR Not a directory
4908 * namei:???
4909 * access1:
4910 *
4911 * Implicit returns:
4912 * uap->results Array contents modified
4913 *
4914 * Notes: The uap->entries are structured as an arbitrary length array
4915 * of accessx descriptors, followed by one or more NULL terminated
4916 * strings
4917 *
4918 * struct accessx_descriptor[0]
4919 * ...
4920 * struct accessx_descriptor[n]
4921 * char name_data[0];
4922 *
4923 * We determine the entry count by walking the buffer containing
4924 * the uap->entries argument descriptor. For each descriptor we
4925 * see, the valid values for the offset ad_name_offset will be
4926 * in the byte range:
4927 *
4928 * [ uap->entries + sizeof(struct accessx_descriptor) ]
4929 * to
4930 * [ uap->entries + uap->size - 2 ]
4931 *
4932 * since we must have at least one string, and the string must
4933 * be at least one character plus the NULL terminator in length.
4934 *
4935 * XXX: Need to support the check-as uid argument
4936 */
4937 int
4938 access_extended(__unused proc_t p, struct access_extended_args *uap, __unused int32_t *retval)
4939 {
4940 struct accessx_descriptor *input = NULL;
4941 errno_t *result = NULL;
4942 errno_t error = 0;
4943 int wantdelete = 0;
4944 unsigned int desc_max, desc_actual, i, j;
4945 struct vfs_context context;
4946 struct nameidata nd;
4947 int niopts;
4948 vnode_t vp = NULL;
4949 vnode_t dvp = NULL;
4950 #define ACCESSX_MAX_DESCR_ON_STACK 10
4951 struct accessx_descriptor stack_input[ACCESSX_MAX_DESCR_ON_STACK];
4952
4953 context.vc_ucred = NULL;
4954
4955 /*
4956 * Validate parameters; if valid, copy the descriptor array and string
4957 * arguments into local memory. Before proceeding, the following
4958 * conditions must have been met:
4959 *
4960 * o The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
4961 * o There must be sufficient room in the request for at least one
4962 * descriptor and a one yte NUL terminated string.
4963 * o The allocation of local storage must not fail.
4964 */
4965 if (uap->size > ACCESSX_MAX_TABLESIZE)
4966 return(ENOMEM);
4967 if (uap->size < (sizeof(struct accessx_descriptor) + 2))
4968 return(EINVAL);
4969 if (uap->size <= sizeof (stack_input)) {
4970 input = stack_input;
4971 } else {
4972 MALLOC(input, struct accessx_descriptor *, uap->size, M_TEMP, M_WAITOK);
4973 if (input == NULL) {
4974 error = ENOMEM;
4975 goto out;
4976 }
4977 }
4978 error = copyin(uap->entries, input, uap->size);
4979 if (error)
4980 goto out;
4981
4982 AUDIT_ARG(opaque, input, uap->size);
4983
4984 /*
4985 * Force NUL termination of the copyin buffer to avoid nami() running
4986 * off the end. If the caller passes us bogus data, they may get a
4987 * bogus result.
4988 */
4989 ((char *)input)[uap->size - 1] = 0;
4990
4991 /*
4992 * Access is defined as checking against the process' real identity,
4993 * even if operations are checking the effective identity. This
4994 * requires that we use a local vfs context.
4995 */
4996 context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
4997 context.vc_thread = current_thread();
4998
4999 /*
5000 * Find out how many entries we have, so we can allocate the result
5001 * array by walking the list and adjusting the count downward by the
5002 * earliest string offset we see.
5003 */
5004 desc_max = (uap->size - 2) / sizeof(struct accessx_descriptor);
5005 desc_actual = desc_max;
5006 for (i = 0; i < desc_actual; i++) {
5007 /*
5008 * Take the offset to the name string for this entry and
5009 * convert to an input array index, which would be one off
5010 * the end of the array if this entry was the lowest-addressed
5011 * name string.
5012 */
5013 j = input[i].ad_name_offset / sizeof(struct accessx_descriptor);
5014
5015 /*
5016 * An offset greater than the max allowable offset is an error.
5017 * It is also an error for any valid entry to point
5018 * to a location prior to the end of the current entry, if
5019 * it's not a reference to the string of the previous entry.
5020 */
5021 if (j > desc_max || (j != 0 && j <= i)) {
5022 error = EINVAL;
5023 goto out;
5024 }
5025
5026 /*
5027 * An offset of 0 means use the previous descriptor's offset;
5028 * this is used to chain multiple requests for the same file
5029 * to avoid multiple lookups.
5030 */
5031 if (j == 0) {
5032 /* This is not valid for the first entry */
5033 if (i == 0) {
5034 error = EINVAL;
5035 goto out;
5036 }
5037 continue;
5038 }
5039
5040 /*
5041 * If the offset of the string for this descriptor is before
5042 * what we believe is the current actual last descriptor,
5043 * then we need to adjust our estimate downward; this permits
5044 * the string table following the last descriptor to be out
5045 * of order relative to the descriptor list.
5046 */
5047 if (j < desc_actual)
5048 desc_actual = j;
5049 }
5050
5051 /*
5052 * We limit the actual number of descriptors we are willing to process
5053 * to a hard maximum of ACCESSX_MAX_DESCRIPTORS. If the number being
5054 * requested does not exceed this limit,
5055 */
5056 if (desc_actual > ACCESSX_MAX_DESCRIPTORS) {
5057 error = ENOMEM;
5058 goto out;
5059 }
5060 MALLOC(result, errno_t *, desc_actual * sizeof(errno_t), M_TEMP, M_WAITOK);
5061 if (result == NULL) {
5062 error = ENOMEM;
5063 goto out;
5064 }
5065
5066 /*
5067 * Do the work by iterating over the descriptor entries we know to
5068 * at least appear to contain valid data.
5069 */
5070 error = 0;
5071 for (i = 0; i < desc_actual; i++) {
5072 /*
5073 * If the ad_name_offset is 0, then we use the previous
5074 * results to make the check; otherwise, we are looking up
5075 * a new file name.
5076 */
5077 if (input[i].ad_name_offset != 0) {
5078 /* discard old vnodes */
5079 if (vp) {
5080 vnode_put(vp);
5081 vp = NULL;
5082 }
5083 if (dvp) {
5084 vnode_put(dvp);
5085 dvp = NULL;
5086 }
5087
5088 /*
5089 * Scan forward in the descriptor list to see if we
5090 * need the parent vnode. We will need it if we are
5091 * deleting, since we must have rights to remove
5092 * entries in the parent directory, as well as the
5093 * rights to delete the object itself.
5094 */
5095 wantdelete = input[i].ad_flags & _DELETE_OK;
5096 for (j = i + 1; (j < desc_actual) && (input[j].ad_name_offset == 0); j++)
5097 if (input[j].ad_flags & _DELETE_OK)
5098 wantdelete = 1;
5099
5100 niopts = FOLLOW | AUDITVNPATH1;
5101
5102 /* need parent for vnode_authorize for deletion test */
5103 if (wantdelete)
5104 niopts |= WANTPARENT;
5105
5106 /* do the lookup */
5107 NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, UIO_SYSSPACE,
5108 CAST_USER_ADDR_T(((const char *)input) + input[i].ad_name_offset),
5109 &context);
5110 error = namei(&nd);
5111 if (!error) {
5112 vp = nd.ni_vp;
5113 if (wantdelete)
5114 dvp = nd.ni_dvp;
5115 }
5116 nameidone(&nd);
5117 }
5118
5119 /*
5120 * Handle lookup errors.
5121 */
5122 switch(error) {
5123 case ENOENT:
5124 case EACCES:
5125 case EPERM:
5126 case ENOTDIR:
5127 result[i] = error;
5128 break;
5129 case 0:
5130 /* run this access check */
5131 result[i] = access1(vp, dvp, input[i].ad_flags, &context);
5132 break;
5133 default:
5134 /* fatal lookup error */
5135
5136 goto out;
5137 }
5138 }
5139
5140 AUDIT_ARG(data, result, sizeof(errno_t), desc_actual);
5141
5142 /* copy out results */
5143 error = copyout(result, uap->results, desc_actual * sizeof(errno_t));
5144
5145 out:
5146 if (input && input != stack_input)
5147 FREE(input, M_TEMP);
5148 if (result)
5149 FREE(result, M_TEMP);
5150 if (vp)
5151 vnode_put(vp);
5152 if (dvp)
5153 vnode_put(dvp);
5154 if (IS_VALID_CRED(context.vc_ucred))
5155 kauth_cred_unref(&context.vc_ucred);
5156 return(error);
5157 }
5158
5159
5160 /*
5161 * Returns: 0 Success
5162 * namei:EFAULT Bad address
5163 * namei:ENAMETOOLONG Filename too long
5164 * namei:ENOENT No such file or directory
5165 * namei:ELOOP Too many levels of symbolic links
5166 * namei:EBADF Bad file descriptor
5167 * namei:ENOTDIR Not a directory
5168 * namei:???
5169 * access1:
5170 */
5171 static int
5172 faccessat_internal(vfs_context_t ctx, int fd, user_addr_t path, int amode,
5173 int flag, enum uio_seg segflg)
5174 {
5175 int error;
5176 struct nameidata nd;
5177 int niopts;
5178 struct vfs_context context;
5179 #if NAMEDRSRCFORK
5180 int is_namedstream = 0;
5181 #endif
5182
5183 /*
5184 * Unless the AT_EACCESS option is used, Access is defined as checking
5185 * against the process' real identity, even if operations are checking
5186 * the effective identity. So we need to tweak the credential
5187 * in the context for that case.
5188 */
5189 if (!(flag & AT_EACCESS))
5190 context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
5191 else
5192 context.vc_ucred = ctx->vc_ucred;
5193 context.vc_thread = ctx->vc_thread;
5194
5195
5196 niopts = FOLLOW | AUDITVNPATH1;
5197 /* need parent for vnode_authorize for deletion test */
5198 if (amode & _DELETE_OK)
5199 niopts |= WANTPARENT;
5200 NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, segflg,
5201 path, &context);
5202
5203 #if NAMEDRSRCFORK
5204 /* access(F_OK) calls are allowed for resource forks. */
5205 if (amode == F_OK)
5206 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
5207 #endif
5208 error = nameiat(&nd, fd);
5209 if (error)
5210 goto out;
5211
5212 #if NAMEDRSRCFORK
5213 /* Grab reference on the shadow stream file vnode to
5214 * force an inactive on release which will mark it
5215 * for recycle.
5216 */
5217 if (vnode_isnamedstream(nd.ni_vp) &&
5218 (nd.ni_vp->v_parent != NULLVP) &&
5219 vnode_isshadow(nd.ni_vp)) {
5220 is_namedstream = 1;
5221 vnode_ref(nd.ni_vp);
5222 }
5223 #endif
5224
5225 error = access1(nd.ni_vp, nd.ni_dvp, amode, &context);
5226
5227 #if NAMEDRSRCFORK
5228 if (is_namedstream) {
5229 vnode_rele(nd.ni_vp);
5230 }
5231 #endif
5232
5233 vnode_put(nd.ni_vp);
5234 if (amode & _DELETE_OK)
5235 vnode_put(nd.ni_dvp);
5236 nameidone(&nd);
5237
5238 out:
5239 if (!(flag & AT_EACCESS))
5240 kauth_cred_unref(&context.vc_ucred);
5241 return (error);
5242 }
5243
5244 int
5245 access(__unused proc_t p, struct access_args *uap, __unused int32_t *retval)
5246 {
5247 return (faccessat_internal(vfs_context_current(), AT_FDCWD,
5248 uap->path, uap->flags, 0, UIO_USERSPACE));
5249 }
5250
5251 int
5252 faccessat(__unused proc_t p, struct faccessat_args *uap,
5253 __unused int32_t *retval)
5254 {
5255 if (uap->flag & ~AT_EACCESS)
5256 return (EINVAL);
5257
5258 return (faccessat_internal(vfs_context_current(), uap->fd,
5259 uap->path, uap->amode, uap->flag, UIO_USERSPACE));
5260 }
5261
5262 /*
5263 * Returns: 0 Success
5264 * EFAULT
5265 * copyout:EFAULT
5266 * namei:???
5267 * vn_stat:???
5268 */
5269 static int
5270 fstatat_internal(vfs_context_t ctx, user_addr_t path, user_addr_t ub,
5271 user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64,
5272 enum uio_seg segflg, int fd, int flag)
5273 {
5274 struct nameidata nd;
5275 int follow;
5276 union {
5277 struct stat sb;
5278 struct stat64 sb64;
5279 } source;
5280 union {
5281 struct user64_stat user64_sb;
5282 struct user32_stat user32_sb;
5283 struct user64_stat64 user64_sb64;
5284 struct user32_stat64 user32_sb64;
5285 } dest;
5286 caddr_t sbp;
5287 int error, my_size;
5288 kauth_filesec_t fsec;
5289 size_t xsecurity_bufsize;
5290 void * statptr;
5291
5292 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
5293 NDINIT(&nd, LOOKUP, OP_GETATTR, follow | AUDITVNPATH1,
5294 segflg, path, ctx);
5295
5296 #if NAMEDRSRCFORK
5297 int is_namedstream = 0;
5298 /* stat calls are allowed for resource forks. */
5299 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
5300 #endif
5301 error = nameiat(&nd, fd);
5302 if (error)
5303 return (error);
5304 fsec = KAUTH_FILESEC_NONE;
5305
5306 statptr = (void *)&source;
5307
5308 #if NAMEDRSRCFORK
5309 /* Grab reference on the shadow stream file vnode to
5310 * force an inactive on release which will mark it
5311 * for recycle.
5312 */
5313 if (vnode_isnamedstream(nd.ni_vp) &&
5314 (nd.ni_vp->v_parent != NULLVP) &&
5315 vnode_isshadow(nd.ni_vp)) {
5316 is_namedstream = 1;
5317 vnode_ref(nd.ni_vp);
5318 }
5319 #endif
5320
5321 error = vn_stat(nd.ni_vp, statptr, (xsecurity != USER_ADDR_NULL ? &fsec : NULL), isstat64, ctx);
5322
5323 #if NAMEDRSRCFORK
5324 if (is_namedstream) {
5325 vnode_rele(nd.ni_vp);
5326 }
5327 #endif
5328 vnode_put(nd.ni_vp);
5329 nameidone(&nd);
5330
5331 if (error)
5332 return (error);
5333 /* Zap spare fields */
5334 if (isstat64 != 0) {
5335 source.sb64.st_lspare = 0;
5336 source.sb64.st_qspare[0] = 0LL;
5337 source.sb64.st_qspare[1] = 0LL;
5338 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
5339 munge_user64_stat64(&source.sb64, &dest.user64_sb64);
5340 my_size = sizeof(dest.user64_sb64);
5341 sbp = (caddr_t)&dest.user64_sb64;
5342 } else {
5343 munge_user32_stat64(&source.sb64, &dest.user32_sb64);
5344 my_size = sizeof(dest.user32_sb64);
5345 sbp = (caddr_t)&dest.user32_sb64;
5346 }
5347 /*
5348 * Check if we raced (post lookup) against the last unlink of a file.
5349 */
5350 if ((source.sb64.st_nlink == 0) && S_ISREG(source.sb64.st_mode)) {
5351 source.sb64.st_nlink = 1;
5352 }
5353 } else {
5354 source.sb.st_lspare = 0;
5355 source.sb.st_qspare[0] = 0LL;
5356 source.sb.st_qspare[1] = 0LL;
5357 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
5358 munge_user64_stat(&source.sb, &dest.user64_sb);
5359 my_size = sizeof(dest.user64_sb);
5360 sbp = (caddr_t)&dest.user64_sb;
5361 } else {
5362 munge_user32_stat(&source.sb, &dest.user32_sb);
5363 my_size = sizeof(dest.user32_sb);
5364 sbp = (caddr_t)&dest.user32_sb;
5365 }
5366
5367 /*
5368 * Check if we raced (post lookup) against the last unlink of a file.
5369 */
5370 if ((source.sb.st_nlink == 0) && S_ISREG(source.sb.st_mode)) {
5371 source.sb.st_nlink = 1;
5372 }
5373 }
5374 if ((error = copyout(sbp, ub, my_size)) != 0)
5375 goto out;
5376
5377 /* caller wants extended security information? */
5378 if (xsecurity != USER_ADDR_NULL) {
5379
5380 /* did we get any? */
5381 if (fsec == KAUTH_FILESEC_NONE) {
5382 if (susize(xsecurity_size, 0) != 0) {
5383 error = EFAULT;
5384 goto out;
5385 }
5386 } else {
5387 /* find the user buffer size */
5388 xsecurity_bufsize = fusize(xsecurity_size);
5389
5390 /* copy out the actual data size */
5391 if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
5392 error = EFAULT;
5393 goto out;
5394 }
5395
5396 /* if the caller supplied enough room, copy out to it */
5397 if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec))
5398 error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
5399 }
5400 }
5401 out:
5402 if (fsec != KAUTH_FILESEC_NONE)
5403 kauth_filesec_free(fsec);
5404 return (error);
5405 }
5406
5407 /*
5408 * stat_extended: Get file status; with extended security (ACL).
5409 *
5410 * Parameters: p (ignored)
5411 * uap User argument descriptor (see below)
5412 * retval (ignored)
5413 *
5414 * Indirect: uap->path Path of file to get status from
5415 * uap->ub User buffer (holds file status info)
5416 * uap->xsecurity ACL to get (extended security)
5417 * uap->xsecurity_size Size of ACL
5418 *
5419 * Returns: 0 Success
5420 * !0 errno value
5421 *
5422 */
5423 int
5424 stat_extended(__unused proc_t p, struct stat_extended_args *uap,
5425 __unused int32_t *retval)
5426 {
5427 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5428 uap->xsecurity, uap->xsecurity_size, 0, UIO_USERSPACE, AT_FDCWD,
5429 0));
5430 }
5431
5432 /*
5433 * Returns: 0 Success
5434 * fstatat_internal:??? [see fstatat_internal() in this file]
5435 */
5436 int
5437 stat(__unused proc_t p, struct stat_args *uap, __unused int32_t *retval)
5438 {
5439 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5440 0, 0, 0, UIO_USERSPACE, AT_FDCWD, 0));
5441 }
5442
5443 int
5444 stat64(__unused proc_t p, struct stat64_args *uap, __unused int32_t *retval)
5445 {
5446 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5447 0, 0, 1, UIO_USERSPACE, AT_FDCWD, 0));
5448 }
5449
5450 /*
5451 * stat64_extended: Get file status; can handle large inode numbers; with extended security (ACL).
5452 *
5453 * Parameters: p (ignored)
5454 * uap User argument descriptor (see below)
5455 * retval (ignored)
5456 *
5457 * Indirect: uap->path Path of file to get status from
5458 * uap->ub User buffer (holds file status info)
5459 * uap->xsecurity ACL to get (extended security)
5460 * uap->xsecurity_size Size of ACL
5461 *
5462 * Returns: 0 Success
5463 * !0 errno value
5464 *
5465 */
5466 int
5467 stat64_extended(__unused proc_t p, struct stat64_extended_args *uap, __unused int32_t *retval)
5468 {
5469 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5470 uap->xsecurity, uap->xsecurity_size, 1, UIO_USERSPACE, AT_FDCWD,
5471 0));
5472 }
5473
5474 /*
5475 * lstat_extended: Get file status; does not follow links; with extended security (ACL).
5476 *
5477 * Parameters: p (ignored)
5478 * uap User argument descriptor (see below)
5479 * retval (ignored)
5480 *
5481 * Indirect: uap->path Path of file to get status from
5482 * uap->ub User buffer (holds file status info)
5483 * uap->xsecurity ACL to get (extended security)
5484 * uap->xsecurity_size Size of ACL
5485 *
5486 * Returns: 0 Success
5487 * !0 errno value
5488 *
5489 */
5490 int
5491 lstat_extended(__unused proc_t p, struct lstat_extended_args *uap, __unused int32_t *retval)
5492 {
5493 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5494 uap->xsecurity, uap->xsecurity_size, 0, UIO_USERSPACE, AT_FDCWD,
5495 AT_SYMLINK_NOFOLLOW));
5496 }
5497
5498 /*
5499 * Get file status; this version does not follow links.
5500 */
5501 int
5502 lstat(__unused proc_t p, struct lstat_args *uap, __unused int32_t *retval)
5503 {
5504 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5505 0, 0, 0, UIO_USERSPACE, AT_FDCWD, AT_SYMLINK_NOFOLLOW));
5506 }
5507
5508 int
5509 lstat64(__unused proc_t p, struct lstat64_args *uap, __unused int32_t *retval)
5510 {
5511 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5512 0, 0, 1, UIO_USERSPACE, AT_FDCWD, AT_SYMLINK_NOFOLLOW));
5513 }
5514
5515 /*
5516 * lstat64_extended: Get file status; can handle large inode numbers; does not
5517 * follow links; with extended security (ACL).
5518 *
5519 * Parameters: p (ignored)
5520 * uap User argument descriptor (see below)
5521 * retval (ignored)
5522 *
5523 * Indirect: uap->path Path of file to get status from
5524 * uap->ub User buffer (holds file status info)
5525 * uap->xsecurity ACL to get (extended security)
5526 * uap->xsecurity_size Size of ACL
5527 *
5528 * Returns: 0 Success
5529 * !0 errno value
5530 *
5531 */
5532 int
5533 lstat64_extended(__unused proc_t p, struct lstat64_extended_args *uap, __unused int32_t *retval)
5534 {
5535 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5536 uap->xsecurity, uap->xsecurity_size, 1, UIO_USERSPACE, AT_FDCWD,
5537 AT_SYMLINK_NOFOLLOW));
5538 }
5539
5540 int
5541 fstatat(__unused proc_t p, struct fstatat_args *uap, __unused int32_t *retval)
5542 {
5543 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
5544 return (EINVAL);
5545
5546 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5547 0, 0, 0, UIO_USERSPACE, uap->fd, uap->flag));
5548 }
5549
5550 int
5551 fstatat64(__unused proc_t p, struct fstatat64_args *uap,
5552 __unused int32_t *retval)
5553 {
5554 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
5555 return (EINVAL);
5556
5557 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5558 0, 0, 1, UIO_USERSPACE, uap->fd, uap->flag));
5559 }
5560
5561 /*
5562 * Get configurable pathname variables.
5563 *
5564 * Returns: 0 Success
5565 * namei:???
5566 * vn_pathconf:???
5567 *
5568 * Notes: Global implementation constants are intended to be
5569 * implemented in this function directly; all other constants
5570 * are per-FS implementation, and therefore must be handled in
5571 * each respective FS, instead.
5572 *
5573 * XXX We implement some things globally right now that should actually be
5574 * XXX per-FS; we will need to deal with this at some point.
5575 */
5576 /* ARGSUSED */
5577 int
5578 pathconf(__unused proc_t p, struct pathconf_args *uap, int32_t *retval)
5579 {
5580 int error;
5581 struct nameidata nd;
5582 vfs_context_t ctx = vfs_context_current();
5583
5584 NDINIT(&nd, LOOKUP, OP_PATHCONF, FOLLOW | AUDITVNPATH1,
5585 UIO_USERSPACE, uap->path, ctx);
5586 error = namei(&nd);
5587 if (error)
5588 return (error);
5589
5590 error = vn_pathconf(nd.ni_vp, uap->name, retval, ctx);
5591
5592 vnode_put(nd.ni_vp);
5593 nameidone(&nd);
5594 return (error);
5595 }
5596
5597 /*
5598 * Return target name of a symbolic link.
5599 */
5600 /* ARGSUSED */
5601 static int
5602 readlinkat_internal(vfs_context_t ctx, int fd, user_addr_t path,
5603 enum uio_seg seg, user_addr_t buf, size_t bufsize, enum uio_seg bufseg,
5604 int *retval)
5605 {
5606 vnode_t vp;
5607 uio_t auio;
5608 int error;
5609 struct nameidata nd;
5610 char uio_buf[ UIO_SIZEOF(1) ];
5611
5612 NDINIT(&nd, LOOKUP, OP_READLINK, NOFOLLOW | AUDITVNPATH1,
5613 seg, path, ctx);
5614
5615 error = nameiat(&nd, fd);
5616 if (error)
5617 return (error);
5618 vp = nd.ni_vp;
5619
5620 nameidone(&nd);
5621
5622 auio = uio_createwithbuffer(1, 0, bufseg, UIO_READ,
5623 &uio_buf[0], sizeof(uio_buf));
5624 uio_addiov(auio, buf, bufsize);
5625 if (vp->v_type != VLNK) {
5626 error = EINVAL;
5627 } else {
5628 #if CONFIG_MACF
5629 error = mac_vnode_check_readlink(ctx, vp);
5630 #endif
5631 if (error == 0)
5632 error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA,
5633 ctx);
5634 if (error == 0)
5635 error = VNOP_READLINK(vp, auio, ctx);
5636 }
5637 vnode_put(vp);
5638
5639 *retval = bufsize - (int)uio_resid(auio);
5640 return (error);
5641 }
5642
5643 int
5644 readlink(proc_t p, struct readlink_args *uap, int32_t *retval)
5645 {
5646 enum uio_seg procseg;
5647
5648 procseg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
5649 return (readlinkat_internal(vfs_context_current(), AT_FDCWD,
5650 CAST_USER_ADDR_T(uap->path), procseg, CAST_USER_ADDR_T(uap->buf),
5651 uap->count, procseg, retval));
5652 }
5653
5654 int
5655 readlinkat(proc_t p, struct readlinkat_args *uap, int32_t *retval)
5656 {
5657 enum uio_seg procseg;
5658
5659 procseg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
5660 return (readlinkat_internal(vfs_context_current(), uap->fd, uap->path,
5661 procseg, uap->buf, uap->bufsize, procseg, retval));
5662 }
5663
5664 /*
5665 * Change file flags.
5666 */
5667 static int
5668 chflags1(vnode_t vp, int flags, vfs_context_t ctx)
5669 {
5670 struct vnode_attr va;
5671 kauth_action_t action;
5672 int error;
5673
5674 VATTR_INIT(&va);
5675 VATTR_SET(&va, va_flags, flags);
5676
5677 #if CONFIG_MACF
5678 error = mac_vnode_check_setflags(ctx, vp, flags);
5679 if (error)
5680 goto out;
5681 #endif
5682
5683 /* request authorisation, disregard immutability */
5684 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
5685 goto out;
5686 /*
5687 * Request that the auth layer disregard those file flags it's allowed to when
5688 * authorizing this operation; we need to do this in order to be able to
5689 * clear immutable flags.
5690 */
5691 if (action && ((error = vnode_authorize(vp, NULL, action | KAUTH_VNODE_NOIMMUTABLE, ctx)) != 0))
5692 goto out;
5693 error = vnode_setattr(vp, &va, ctx);
5694
5695 if ((error == 0) && !VATTR_IS_SUPPORTED(&va, va_flags)) {
5696 error = ENOTSUP;
5697 }
5698 out:
5699 vnode_put(vp);
5700 return(error);
5701 }
5702
5703 /*
5704 * Change flags of a file given a path name.
5705 */
5706 /* ARGSUSED */
5707 int
5708 chflags(__unused proc_t p, struct chflags_args *uap, __unused int32_t *retval)
5709 {
5710 vnode_t vp;
5711 vfs_context_t ctx = vfs_context_current();
5712 int error;
5713 struct nameidata nd;
5714
5715 AUDIT_ARG(fflags, uap->flags);
5716 NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
5717 UIO_USERSPACE, uap->path, ctx);
5718 error = namei(&nd);
5719 if (error)
5720 return (error);
5721 vp = nd.ni_vp;
5722 nameidone(&nd);
5723
5724 error = chflags1(vp, uap->flags, ctx);
5725
5726 return(error);
5727 }
5728
5729 /*
5730 * Change flags of a file given a file descriptor.
5731 */
5732 /* ARGSUSED */
5733 int
5734 fchflags(__unused proc_t p, struct fchflags_args *uap, __unused int32_t *retval)
5735 {
5736 vnode_t vp;
5737 int error;
5738
5739 AUDIT_ARG(fd, uap->fd);
5740 AUDIT_ARG(fflags, uap->flags);
5741 if ( (error = file_vnode(uap->fd, &vp)) )
5742 return (error);
5743
5744 if ((error = vnode_getwithref(vp))) {
5745 file_drop(uap->fd);
5746 return(error);
5747 }
5748
5749 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5750
5751 error = chflags1(vp, uap->flags, vfs_context_current());
5752
5753 file_drop(uap->fd);
5754 return (error);
5755 }
5756
5757 /*
5758 * Change security information on a filesystem object.
5759 *
5760 * Returns: 0 Success
5761 * EPERM Operation not permitted
5762 * vnode_authattr:??? [anything vnode_authattr can return]
5763 * vnode_authorize:??? [anything vnode_authorize can return]
5764 * vnode_setattr:??? [anything vnode_setattr can return]
5765 *
5766 * Notes: If vnode_authattr or vnode_authorize return EACCES, it will be
5767 * translated to EPERM before being returned.
5768 */
5769 static int
5770 chmod_vnode(vfs_context_t ctx, vnode_t vp, struct vnode_attr *vap)
5771 {
5772 kauth_action_t action;
5773 int error;
5774
5775 AUDIT_ARG(mode, vap->va_mode);
5776 /* XXX audit new args */
5777
5778 #if NAMEDSTREAMS
5779 /* chmod calls are not allowed for resource forks. */
5780 if (vp->v_flag & VISNAMEDSTREAM) {
5781 return (EPERM);
5782 }
5783 #endif
5784
5785 #if CONFIG_MACF
5786 if (VATTR_IS_ACTIVE(vap, va_mode) &&
5787 (error = mac_vnode_check_setmode(ctx, vp, (mode_t)vap->va_mode)) != 0)
5788 return (error);
5789 #endif
5790
5791 /* make sure that the caller is allowed to set this security information */
5792 if (((error = vnode_authattr(vp, vap, &action, ctx)) != 0) ||
5793 ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
5794 if (error == EACCES)
5795 error = EPERM;
5796 return(error);
5797 }
5798
5799 error = vnode_setattr(vp, vap, ctx);
5800
5801 return (error);
5802 }
5803
5804
5805 /*
5806 * Change mode of a file given a path name.
5807 *
5808 * Returns: 0 Success
5809 * namei:??? [anything namei can return]
5810 * chmod_vnode:??? [anything chmod_vnode can return]
5811 */
5812 static int
5813 chmodat(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap,
5814 int fd, int flag, enum uio_seg segflg)
5815 {
5816 struct nameidata nd;
5817 int follow, error;
5818
5819 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
5820 NDINIT(&nd, LOOKUP, OP_SETATTR, follow | AUDITVNPATH1,
5821 segflg, path, ctx);
5822 if ((error = nameiat(&nd, fd)))
5823 return (error);
5824 error = chmod_vnode(ctx, nd.ni_vp, vap);
5825 vnode_put(nd.ni_vp);
5826 nameidone(&nd);
5827 return(error);
5828 }
5829
5830 /*
5831 * chmod_extended: Change the mode of a file given a path name; with extended
5832 * argument list (including extended security (ACL)).
5833 *
5834 * Parameters: p Process requesting the open
5835 * uap User argument descriptor (see below)
5836 * retval (ignored)
5837 *
5838 * Indirect: uap->path Path to object (same as 'chmod')
5839 * uap->uid UID to set
5840 * uap->gid GID to set
5841 * uap->mode File mode to set (same as 'chmod')
5842 * uap->xsecurity ACL to set (or delete)
5843 *
5844 * Returns: 0 Success
5845 * !0 errno value
5846 *
5847 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
5848 *
5849 * XXX: We should enummerate the possible errno values here, and where
5850 * in the code they originated.
5851 */
5852 int
5853 chmod_extended(__unused proc_t p, struct chmod_extended_args *uap, __unused int32_t *retval)
5854 {
5855 int error;
5856 struct vnode_attr va;
5857 kauth_filesec_t xsecdst;
5858
5859 AUDIT_ARG(owner, uap->uid, uap->gid);
5860
5861 VATTR_INIT(&va);
5862 if (uap->mode != -1)
5863 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
5864 if (uap->uid != KAUTH_UID_NONE)
5865 VATTR_SET(&va, va_uid, uap->uid);
5866 if (uap->gid != KAUTH_GID_NONE)
5867 VATTR_SET(&va, va_gid, uap->gid);
5868
5869 xsecdst = NULL;
5870 switch(uap->xsecurity) {
5871 /* explicit remove request */
5872 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
5873 VATTR_SET(&va, va_acl, NULL);
5874 break;
5875 /* not being set */
5876 case USER_ADDR_NULL:
5877 break;
5878 default:
5879 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
5880 return(error);
5881 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
5882 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va.va_acl->acl_entrycount);
5883 }
5884
5885 error = chmodat(vfs_context_current(), uap->path, &va, AT_FDCWD, 0,
5886 UIO_USERSPACE);
5887
5888 if (xsecdst != NULL)
5889 kauth_filesec_free(xsecdst);
5890 return(error);
5891 }
5892
5893 /*
5894 * Returns: 0 Success
5895 * chmodat:??? [anything chmodat can return]
5896 */
5897 static int
5898 fchmodat_internal(vfs_context_t ctx, user_addr_t path, int mode, int fd,
5899 int flag, enum uio_seg segflg)
5900 {
5901 struct vnode_attr va;
5902
5903 VATTR_INIT(&va);
5904 VATTR_SET(&va, va_mode, mode & ALLPERMS);
5905
5906 return (chmodat(ctx, path, &va, fd, flag, segflg));
5907 }
5908
5909 int
5910 chmod(__unused proc_t p, struct chmod_args *uap, __unused int32_t *retval)
5911 {
5912 return (fchmodat_internal(vfs_context_current(), uap->path, uap->mode,
5913 AT_FDCWD, 0, UIO_USERSPACE));
5914 }
5915
5916 int
5917 fchmodat(__unused proc_t p, struct fchmodat_args *uap, __unused int32_t *retval)
5918 {
5919 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
5920 return (EINVAL);
5921
5922 return (fchmodat_internal(vfs_context_current(), uap->path, uap->mode,
5923 uap->fd, uap->flag, UIO_USERSPACE));
5924 }
5925
5926 /*
5927 * Change mode of a file given a file descriptor.
5928 */
5929 static int
5930 fchmod1(__unused proc_t p, int fd, struct vnode_attr *vap)
5931 {
5932 vnode_t vp;
5933 int error;
5934
5935 AUDIT_ARG(fd, fd);
5936
5937 if ((error = file_vnode(fd, &vp)) != 0)
5938 return (error);
5939 if ((error = vnode_getwithref(vp)) != 0) {
5940 file_drop(fd);
5941 return(error);
5942 }
5943 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5944
5945 error = chmod_vnode(vfs_context_current(), vp, vap);
5946 (void)vnode_put(vp);
5947 file_drop(fd);
5948
5949 return (error);
5950 }
5951
5952 /*
5953 * fchmod_extended: Change mode of a file given a file descriptor; with
5954 * extended argument list (including extended security (ACL)).
5955 *
5956 * Parameters: p Process requesting to change file mode
5957 * uap User argument descriptor (see below)
5958 * retval (ignored)
5959 *
5960 * Indirect: uap->mode File mode to set (same as 'chmod')
5961 * uap->uid UID to set
5962 * uap->gid GID to set
5963 * uap->xsecurity ACL to set (or delete)
5964 * uap->fd File descriptor of file to change mode
5965 *
5966 * Returns: 0 Success
5967 * !0 errno value
5968 *
5969 */
5970 int
5971 fchmod_extended(proc_t p, struct fchmod_extended_args *uap, __unused int32_t *retval)
5972 {
5973 int error;
5974 struct vnode_attr va;
5975 kauth_filesec_t xsecdst;
5976
5977 AUDIT_ARG(owner, uap->uid, uap->gid);
5978
5979 VATTR_INIT(&va);
5980 if (uap->mode != -1)
5981 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
5982 if (uap->uid != KAUTH_UID_NONE)
5983 VATTR_SET(&va, va_uid, uap->uid);
5984 if (uap->gid != KAUTH_GID_NONE)
5985 VATTR_SET(&va, va_gid, uap->gid);
5986
5987 xsecdst = NULL;
5988 switch(uap->xsecurity) {
5989 case USER_ADDR_NULL:
5990 VATTR_SET(&va, va_acl, NULL);
5991 break;
5992 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
5993 VATTR_SET(&va, va_acl, NULL);
5994 break;
5995 /* not being set */
5996 case CAST_USER_ADDR_T(-1):
5997 break;
5998 default:
5999 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
6000 return(error);
6001 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
6002 }
6003
6004 error = fchmod1(p, uap->fd, &va);
6005
6006
6007 switch(uap->xsecurity) {
6008 case USER_ADDR_NULL:
6009 case CAST_USER_ADDR_T(-1):
6010 break;
6011 default:
6012 if (xsecdst != NULL)
6013 kauth_filesec_free(xsecdst);
6014 }
6015 return(error);
6016 }
6017
6018 int
6019 fchmod(proc_t p, struct fchmod_args *uap, __unused int32_t *retval)
6020 {
6021 struct vnode_attr va;
6022
6023 VATTR_INIT(&va);
6024 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
6025
6026 return(fchmod1(p, uap->fd, &va));
6027 }
6028
6029
6030 /*
6031 * Set ownership given a path name.
6032 */
6033 /* ARGSUSED */
6034 static int
6035 fchownat_internal(vfs_context_t ctx, int fd, user_addr_t path, uid_t uid,
6036 gid_t gid, int flag, enum uio_seg segflg)
6037 {
6038 vnode_t vp;
6039 struct vnode_attr va;
6040 int error;
6041 struct nameidata nd;
6042 int follow;
6043 kauth_action_t action;
6044
6045 AUDIT_ARG(owner, uid, gid);
6046
6047 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
6048 NDINIT(&nd, LOOKUP, OP_SETATTR, follow | AUDITVNPATH1, segflg,
6049 path, ctx);
6050 error = nameiat(&nd, fd);
6051 if (error)
6052 return (error);
6053 vp = nd.ni_vp;
6054
6055 nameidone(&nd);
6056
6057 VATTR_INIT(&va);
6058 if (uid != (uid_t)VNOVAL)
6059 VATTR_SET(&va, va_uid, uid);
6060 if (gid != (gid_t)VNOVAL)
6061 VATTR_SET(&va, va_gid, gid);
6062
6063 #if CONFIG_MACF
6064 error = mac_vnode_check_setowner(ctx, vp, uid, gid);
6065 if (error)
6066 goto out;
6067 #endif
6068
6069 /* preflight and authorize attribute changes */
6070 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
6071 goto out;
6072 if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
6073 goto out;
6074 error = vnode_setattr(vp, &va, ctx);
6075
6076 out:
6077 /*
6078 * EACCES is only allowed from namei(); permissions failure should
6079 * return EPERM, so we need to translate the error code.
6080 */
6081 if (error == EACCES)
6082 error = EPERM;
6083
6084 vnode_put(vp);
6085 return (error);
6086 }
6087
6088 int
6089 chown(__unused proc_t p, struct chown_args *uap, __unused int32_t *retval)
6090 {
6091 return (fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path,
6092 uap->uid, uap->gid, 0, UIO_USERSPACE));
6093 }
6094
6095 int
6096 lchown(__unused proc_t p, struct lchown_args *uap, __unused int32_t *retval)
6097 {
6098 return (fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path,
6099 uap->owner, uap->group, AT_SYMLINK_NOFOLLOW, UIO_USERSPACE));
6100 }
6101
6102 int
6103 fchownat(__unused proc_t p, struct fchownat_args *uap, __unused int32_t *retval)
6104 {
6105 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
6106 return (EINVAL);
6107
6108 return (fchownat_internal(vfs_context_current(), uap->fd, uap->path,
6109 uap->uid, uap->gid, uap->flag, UIO_USERSPACE));
6110 }
6111
6112 /*
6113 * Set ownership given a file descriptor.
6114 */
6115 /* ARGSUSED */
6116 int
6117 fchown(__unused proc_t p, struct fchown_args *uap, __unused int32_t *retval)
6118 {
6119 struct vnode_attr va;
6120 vfs_context_t ctx = vfs_context_current();
6121 vnode_t vp;
6122 int error;
6123 kauth_action_t action;
6124
6125 AUDIT_ARG(owner, uap->uid, uap->gid);
6126 AUDIT_ARG(fd, uap->fd);
6127
6128 if ( (error = file_vnode(uap->fd, &vp)) )
6129 return (error);
6130
6131 if ( (error = vnode_getwithref(vp)) ) {
6132 file_drop(uap->fd);
6133 return(error);
6134 }
6135 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6136
6137 VATTR_INIT(&va);
6138 if (uap->uid != VNOVAL)
6139 VATTR_SET(&va, va_uid, uap->uid);
6140 if (uap->gid != VNOVAL)
6141 VATTR_SET(&va, va_gid, uap->gid);
6142
6143 #if NAMEDSTREAMS
6144 /* chown calls are not allowed for resource forks. */
6145 if (vp->v_flag & VISNAMEDSTREAM) {
6146 error = EPERM;
6147 goto out;
6148 }
6149 #endif
6150
6151 #if CONFIG_MACF
6152 error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
6153 if (error)
6154 goto out;
6155 #endif
6156
6157 /* preflight and authorize attribute changes */
6158 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
6159 goto out;
6160 if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
6161 if (error == EACCES)
6162 error = EPERM;
6163 goto out;
6164 }
6165 error = vnode_setattr(vp, &va, ctx);
6166
6167 out:
6168 (void)vnode_put(vp);
6169 file_drop(uap->fd);
6170 return (error);
6171 }
6172
6173 static int
6174 getutimes(user_addr_t usrtvp, struct timespec *tsp)
6175 {
6176 int error;
6177
6178 if (usrtvp == USER_ADDR_NULL) {
6179 struct timeval old_tv;
6180 /* XXX Y2038 bug because of microtime argument */
6181 microtime(&old_tv);
6182 TIMEVAL_TO_TIMESPEC(&old_tv, &tsp[0]);
6183 tsp[1] = tsp[0];
6184 } else {
6185 if (IS_64BIT_PROCESS(current_proc())) {
6186 struct user64_timeval tv[2];
6187 error = copyin(usrtvp, (void *)tv, sizeof(tv));
6188 if (error)
6189 return (error);
6190 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
6191 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
6192 } else {
6193 struct user32_timeval tv[2];
6194 error = copyin(usrtvp, (void *)tv, sizeof(tv));
6195 if (error)
6196 return (error);
6197 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
6198 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
6199 }
6200 }
6201 return 0;
6202 }
6203
6204 static int
6205 setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts,
6206 int nullflag)
6207 {
6208 int error;
6209 struct vnode_attr va;
6210 kauth_action_t action;
6211
6212 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6213
6214 VATTR_INIT(&va);
6215 VATTR_SET(&va, va_access_time, ts[0]);
6216 VATTR_SET(&va, va_modify_time, ts[1]);
6217 if (nullflag)
6218 va.va_vaflags |= VA_UTIMES_NULL;
6219
6220 #if NAMEDSTREAMS
6221 /* utimes calls are not allowed for resource forks. */
6222 if (vp->v_flag & VISNAMEDSTREAM) {
6223 error = EPERM;
6224 goto out;
6225 }
6226 #endif
6227
6228 #if CONFIG_MACF
6229 error = mac_vnode_check_setutimes(ctx, vp, ts[0], ts[1]);
6230 if (error)
6231 goto out;
6232 #endif
6233 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
6234 if (!nullflag && error == EACCES)
6235 error = EPERM;
6236 goto out;
6237 }
6238
6239 /* since we may not need to auth anything, check here */
6240 if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
6241 if (!nullflag && error == EACCES)
6242 error = EPERM;
6243 goto out;
6244 }
6245 error = vnode_setattr(vp, &va, ctx);
6246
6247 out:
6248 return error;
6249 }
6250
6251 /*
6252 * Set the access and modification times of a file.
6253 */
6254 /* ARGSUSED */
6255 int
6256 utimes(__unused proc_t p, struct utimes_args *uap, __unused int32_t *retval)
6257 {
6258 struct timespec ts[2];
6259 user_addr_t usrtvp;
6260 int error;
6261 struct nameidata nd;
6262 vfs_context_t ctx = vfs_context_current();
6263
6264 /*
6265 * AUDIT: Needed to change the order of operations to do the
6266 * name lookup first because auditing wants the path.
6267 */
6268 NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
6269 UIO_USERSPACE, uap->path, ctx);
6270 error = namei(&nd);
6271 if (error)
6272 return (error);
6273 nameidone(&nd);
6274
6275 /*
6276 * Fetch the user-supplied time. If usrtvp is USER_ADDR_NULL, we fetch
6277 * the current time instead.
6278 */
6279 usrtvp = uap->tptr;
6280 if ((error = getutimes(usrtvp, ts)) != 0)
6281 goto out;
6282
6283 error = setutimes(ctx, nd.ni_vp, ts, usrtvp == USER_ADDR_NULL);
6284
6285 out:
6286 vnode_put(nd.ni_vp);
6287 return (error);
6288 }
6289
6290 /*
6291 * Set the access and modification times of a file.
6292 */
6293 /* ARGSUSED */
6294 int
6295 futimes(__unused proc_t p, struct futimes_args *uap, __unused int32_t *retval)
6296 {
6297 struct timespec ts[2];
6298 vnode_t vp;
6299 user_addr_t usrtvp;
6300 int error;
6301
6302 AUDIT_ARG(fd, uap->fd);
6303 usrtvp = uap->tptr;
6304 if ((error = getutimes(usrtvp, ts)) != 0)
6305 return (error);
6306 if ((error = file_vnode(uap->fd, &vp)) != 0)
6307 return (error);
6308 if((error = vnode_getwithref(vp))) {
6309 file_drop(uap->fd);
6310 return(error);
6311 }
6312
6313 error = setutimes(vfs_context_current(), vp, ts, usrtvp == 0);
6314 vnode_put(vp);
6315 file_drop(uap->fd);
6316 return(error);
6317 }
6318
6319 /*
6320 * Truncate a file given its path name.
6321 */
6322 /* ARGSUSED */
6323 int
6324 truncate(__unused proc_t p, struct truncate_args *uap, __unused int32_t *retval)
6325 {
6326 vnode_t vp;
6327 struct vnode_attr va;
6328 vfs_context_t ctx = vfs_context_current();
6329 int error;
6330 struct nameidata nd;
6331 kauth_action_t action;
6332
6333 if (uap->length < 0)
6334 return(EINVAL);
6335 NDINIT(&nd, LOOKUP, OP_TRUNCATE, FOLLOW | AUDITVNPATH1,
6336 UIO_USERSPACE, uap->path, ctx);
6337 if ((error = namei(&nd)))
6338 return (error);
6339 vp = nd.ni_vp;
6340
6341 nameidone(&nd);
6342
6343 VATTR_INIT(&va);
6344 VATTR_SET(&va, va_data_size, uap->length);
6345
6346 #if CONFIG_MACF
6347 error = mac_vnode_check_truncate(ctx, NOCRED, vp);
6348 if (error)
6349 goto out;
6350 #endif
6351
6352 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
6353 goto out;
6354 if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
6355 goto out;
6356 error = vnode_setattr(vp, &va, ctx);
6357 out:
6358 vnode_put(vp);
6359 return (error);
6360 }
6361
6362 /*
6363 * Truncate a file given a file descriptor.
6364 */
6365 /* ARGSUSED */
6366 int
6367 ftruncate(proc_t p, struct ftruncate_args *uap, int32_t *retval)
6368 {
6369 vfs_context_t ctx = vfs_context_current();
6370 struct vnode_attr va;
6371 vnode_t vp;
6372 struct fileproc *fp;
6373 int error ;
6374 int fd = uap->fd;
6375
6376 AUDIT_ARG(fd, uap->fd);
6377 if (uap->length < 0)
6378 return(EINVAL);
6379
6380 if ( (error = fp_lookup(p,fd,&fp,0)) ) {
6381 return(error);
6382 }
6383
6384 switch (FILEGLOB_DTYPE(fp->f_fglob)) {
6385 case DTYPE_PSXSHM:
6386 error = pshm_truncate(p, fp, uap->fd, uap->length, retval);
6387 goto out;
6388 case DTYPE_VNODE:
6389 break;
6390 default:
6391 error = EINVAL;
6392 goto out;
6393 }
6394
6395 vp = (vnode_t)fp->f_fglob->fg_data;
6396
6397 if ((fp->f_fglob->fg_flag & FWRITE) == 0) {
6398 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
6399 error = EINVAL;
6400 goto out;
6401 }
6402
6403 if ((error = vnode_getwithref(vp)) != 0) {
6404 goto out;
6405 }
6406
6407 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6408
6409 #if CONFIG_MACF
6410 error = mac_vnode_check_truncate(ctx,
6411 fp->f_fglob->fg_cred, vp);
6412 if (error) {
6413 (void)vnode_put(vp);
6414 goto out;
6415 }
6416 #endif
6417 VATTR_INIT(&va);
6418 VATTR_SET(&va, va_data_size, uap->length);
6419 error = vnode_setattr(vp, &va, ctx);
6420 (void)vnode_put(vp);
6421 out:
6422 file_drop(fd);
6423 return (error);
6424 }
6425
6426
6427 /*
6428 * Sync an open file with synchronized I/O _file_ integrity completion
6429 */
6430 /* ARGSUSED */
6431 int
6432 fsync(proc_t p, struct fsync_args *uap, __unused int32_t *retval)
6433 {
6434 __pthread_testcancel(1);
6435 return(fsync_common(p, uap, MNT_WAIT));
6436 }
6437
6438
6439 /*
6440 * Sync an open file with synchronized I/O _file_ integrity completion
6441 *
6442 * Notes: This is a legacy support function that does not test for
6443 * thread cancellation points.
6444 */
6445 /* ARGSUSED */
6446 int
6447 fsync_nocancel(proc_t p, struct fsync_nocancel_args *uap, __unused int32_t *retval)
6448 {
6449 return(fsync_common(p, (struct fsync_args *)uap, MNT_WAIT));
6450 }
6451
6452
6453 /*
6454 * Sync an open file with synchronized I/O _data_ integrity completion
6455 */
6456 /* ARGSUSED */
6457 int
6458 fdatasync(proc_t p, struct fdatasync_args *uap, __unused int32_t *retval)
6459 {
6460 __pthread_testcancel(1);
6461 return(fsync_common(p, (struct fsync_args *)uap, MNT_DWAIT));
6462 }
6463
6464
6465 /*
6466 * fsync_common
6467 *
6468 * Common fsync code to support both synchronized I/O file integrity completion
6469 * (normal fsync) and synchronized I/O data integrity completion (fdatasync).
6470 *
6471 * If 'flags' is MNT_DWAIT, the caller is requesting data integrity, which
6472 * will only guarantee that the file data contents are retrievable. If
6473 * 'flags' is MNT_WAIT, the caller is rewuesting file integrity, which also
6474 * includes additional metadata unnecessary for retrieving the file data
6475 * contents, such as atime, mtime, ctime, etc., also be committed to stable
6476 * storage.
6477 *
6478 * Parameters: p The process
6479 * uap->fd The descriptor to synchronize
6480 * flags The data integrity flags
6481 *
6482 * Returns: int Success
6483 * fp_getfvp:EBADF Bad file descriptor
6484 * fp_getfvp:ENOTSUP fd does not refer to a vnode
6485 * VNOP_FSYNC:??? unspecified
6486 *
6487 * Notes: We use struct fsync_args because it is a short name, and all
6488 * caller argument structures are otherwise identical.
6489 */
6490 static int
6491 fsync_common(proc_t p, struct fsync_args *uap, int flags)
6492 {
6493 vnode_t vp;
6494 struct fileproc *fp;
6495 vfs_context_t ctx = vfs_context_current();
6496 int error;
6497
6498 AUDIT_ARG(fd, uap->fd);
6499
6500 if ( (error = fp_getfvp(p, uap->fd, &fp, &vp)) )
6501 return (error);
6502 if ( (error = vnode_getwithref(vp)) ) {
6503 file_drop(uap->fd);
6504 return(error);
6505 }
6506
6507 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6508
6509 error = VNOP_FSYNC(vp, flags, ctx);
6510
6511 #if NAMEDRSRCFORK
6512 /* Sync resource fork shadow file if necessary. */
6513 if ((error == 0) &&
6514 (vp->v_flag & VISNAMEDSTREAM) &&
6515 (vp->v_parent != NULLVP) &&
6516 vnode_isshadow(vp) &&
6517 (fp->f_flags & FP_WRITTEN)) {
6518 (void) vnode_flushnamedstream(vp->v_parent, vp, ctx);
6519 }
6520 #endif
6521
6522 (void)vnode_put(vp);
6523 file_drop(uap->fd);
6524 return (error);
6525 }
6526
6527 /*
6528 * Duplicate files. Source must be a file, target must be a file or
6529 * must not exist.
6530 *
6531 * XXX Copyfile authorisation checking is woefully inadequate, and will not
6532 * perform inheritance correctly.
6533 */
6534 /* ARGSUSED */
6535 int
6536 copyfile(__unused proc_t p, struct copyfile_args *uap, __unused int32_t *retval)
6537 {
6538 vnode_t tvp, fvp, tdvp, sdvp;
6539 struct nameidata fromnd, tond;
6540 int error;
6541 vfs_context_t ctx = vfs_context_current();
6542
6543 /* Check that the flags are valid. */
6544
6545 if (uap->flags & ~CPF_MASK) {
6546 return(EINVAL);
6547 }
6548
6549 NDINIT(&fromnd, LOOKUP, OP_COPYFILE, SAVESTART | AUDITVNPATH1,
6550 UIO_USERSPACE, uap->from, ctx);
6551 if ((error = namei(&fromnd)))
6552 return (error);
6553 fvp = fromnd.ni_vp;
6554
6555 NDINIT(&tond, CREATE, OP_LINK,
6556 LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNPATH2 | CN_NBMOUNTLOOK,
6557 UIO_USERSPACE, uap->to, ctx);
6558 if ((error = namei(&tond))) {
6559 goto out1;
6560 }
6561 tdvp = tond.ni_dvp;
6562 tvp = tond.ni_vp;
6563
6564 if (tvp != NULL) {
6565 if (!(uap->flags & CPF_OVERWRITE)) {
6566 error = EEXIST;
6567 goto out;
6568 }
6569 }
6570 if (fvp->v_type == VDIR || (tvp && tvp->v_type == VDIR)) {
6571 error = EISDIR;
6572 goto out;
6573 }
6574
6575 if ((error = vnode_authorize(tdvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
6576 goto out;
6577
6578 if (fvp == tdvp)
6579 error = EINVAL;
6580 /*
6581 * If source is the same as the destination (that is the
6582 * same inode number) then there is nothing to do.
6583 * (fixed to have POSIX semantics - CSM 3/2/98)
6584 */
6585 if (fvp == tvp)
6586 error = -1;
6587 if (!error)
6588 error = VNOP_COPYFILE(fvp, tdvp, tvp, &tond.ni_cnd, uap->mode, uap->flags, ctx);
6589 out:
6590 sdvp = tond.ni_startdir;
6591 /*
6592 * nameidone has to happen before we vnode_put(tdvp)
6593 * since it may need to release the fs_nodelock on the tdvp
6594 */
6595 nameidone(&tond);
6596
6597 if (tvp)
6598 vnode_put(tvp);
6599 vnode_put(tdvp);
6600 vnode_put(sdvp);
6601 out1:
6602 vnode_put(fvp);
6603
6604 if (fromnd.ni_startdir)
6605 vnode_put(fromnd.ni_startdir);
6606 nameidone(&fromnd);
6607
6608 if (error == -1)
6609 return (0);
6610 return (error);
6611 }
6612
6613
6614 /*
6615 * Rename files. Source and destination must either both be directories,
6616 * or both not be directories. If target is a directory, it must be empty.
6617 */
6618 /* ARGSUSED */
6619 static int
6620 renameat_internal(vfs_context_t ctx, int fromfd, user_addr_t from,
6621 int tofd, user_addr_t to, int segflg, vfs_rename_flags_t flags)
6622 {
6623 vnode_t tvp, tdvp;
6624 vnode_t fvp, fdvp;
6625 struct nameidata *fromnd, *tond;
6626 int error;
6627 int do_retry;
6628 int retry_count;
6629 int mntrename;
6630 int need_event;
6631 const char *oname = NULL;
6632 char *from_name = NULL, *to_name = NULL;
6633 int from_len=0, to_len=0;
6634 int holding_mntlock;
6635 mount_t locked_mp = NULL;
6636 vnode_t oparent = NULLVP;
6637 #if CONFIG_FSE
6638 fse_info from_finfo, to_finfo;
6639 #endif
6640 int from_truncated=0, to_truncated;
6641 int batched = 0;
6642 struct vnode_attr *fvap, *tvap;
6643 int continuing = 0;
6644 /* carving out a chunk for structs that are too big to be on stack. */
6645 struct {
6646 struct nameidata from_node, to_node;
6647 struct vnode_attr fv_attr, tv_attr;
6648 } * __rename_data;
6649 MALLOC(__rename_data, void *, sizeof(*__rename_data), M_TEMP, M_WAITOK);
6650 fromnd = &__rename_data->from_node;
6651 tond = &__rename_data->to_node;
6652
6653 holding_mntlock = 0;
6654 do_retry = 0;
6655 retry_count = 0;
6656 retry:
6657 fvp = tvp = NULL;
6658 fdvp = tdvp = NULL;
6659 fvap = tvap = NULL;
6660 mntrename = FALSE;
6661
6662 NDINIT(fromnd, DELETE, OP_UNLINK, WANTPARENT | AUDITVNPATH1,
6663 segflg, from, ctx);
6664 fromnd->ni_flag = NAMEI_COMPOUNDRENAME;
6665
6666 NDINIT(tond, RENAME, OP_RENAME, WANTPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK,
6667 segflg, to, ctx);
6668 tond->ni_flag = NAMEI_COMPOUNDRENAME;
6669
6670 continue_lookup:
6671 if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
6672 if ( (error = nameiat(fromnd, fromfd)) )
6673 goto out1;
6674 fdvp = fromnd->ni_dvp;
6675 fvp = fromnd->ni_vp;
6676
6677 if (fvp && fvp->v_type == VDIR)
6678 tond->ni_cnd.cn_flags |= WILLBEDIR;
6679 }
6680
6681 if ((tond->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
6682 if ( (error = nameiat(tond, tofd)) ) {
6683 /*
6684 * Translate error code for rename("dir1", "dir2/.").
6685 */
6686 if (error == EISDIR && fvp->v_type == VDIR)
6687 error = EINVAL;
6688 goto out1;
6689 }
6690 tdvp = tond->ni_dvp;
6691 tvp = tond->ni_vp;
6692 }
6693
6694 batched = vnode_compound_rename_available(fdvp);
6695 if (!fvp) {
6696 /*
6697 * Claim: this check will never reject a valid rename.
6698 * For success, either fvp must be on the same mount as tdvp, or fvp must sit atop a vnode on the same mount as tdvp.
6699 * Suppose fdvp and tdvp are not on the same mount.
6700 * If fvp is on the same mount as tdvp, then fvp is not on the same mount as fdvp, so fvp is the root of its filesystem. If fvp is the root,
6701 * then you can't move it to within another dir on the same mountpoint.
6702 * If fvp sits atop a vnode on the same mount as fdvp, then that vnode must be part of the same mount as fdvp, which is a contradiction.
6703 *
6704 * If this check passes, then we are safe to pass these vnodes to the same FS.
6705 */
6706 if (fdvp->v_mount != tdvp->v_mount) {
6707 error = EXDEV;
6708 goto out1;
6709 }
6710 goto skipped_lookup;
6711 }
6712
6713 if (!batched) {
6714 error = vn_authorize_rename(fdvp, fvp, &fromnd->ni_cnd, tdvp, tvp, &tond->ni_cnd, ctx, NULL);
6715 if (error) {
6716 if (error == ENOENT &&
6717 retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
6718 /*
6719 * We encountered a race where after doing the namei, tvp stops
6720 * being valid. If so, simply re-drive the rename call from the
6721 * top.
6722 */
6723 do_retry = 1;
6724 retry_count += 1;
6725 }
6726 goto out1;
6727 }
6728 }
6729
6730 /*
6731 * If the source and destination are the same (i.e. they're
6732 * links to the same vnode) and the target file system is
6733 * case sensitive, then there is nothing to do.
6734 *
6735 * XXX Come back to this.
6736 */
6737 if (fvp == tvp) {
6738 int pathconf_val;
6739
6740 /*
6741 * Note: if _PC_CASE_SENSITIVE selector isn't supported,
6742 * then assume that this file system is case sensitive.
6743 */
6744 if (VNOP_PATHCONF(fvp, _PC_CASE_SENSITIVE, &pathconf_val, ctx) != 0 ||
6745 pathconf_val != 0) {
6746 goto out1;
6747 }
6748 }
6749
6750 /*
6751 * Allow the renaming of mount points.
6752 * - target must not exist
6753 * - target must reside in the same directory as source
6754 * - union mounts cannot be renamed
6755 * - "/" cannot be renamed
6756 *
6757 * XXX Handle this in VFS after a continued lookup (if we missed
6758 * in the cache to start off)
6759 */
6760 if ((fvp->v_flag & VROOT) &&
6761 (fvp->v_type == VDIR) &&
6762 (tvp == NULL) &&
6763 (fvp->v_mountedhere == NULL) &&
6764 (fdvp == tdvp) &&
6765 ((fvp->v_mount->mnt_flag & (MNT_UNION | MNT_ROOTFS)) == 0) &&
6766 (fvp->v_mount->mnt_vnodecovered != NULLVP)) {
6767 vnode_t coveredvp;
6768
6769 /* switch fvp to the covered vnode */
6770 coveredvp = fvp->v_mount->mnt_vnodecovered;
6771 if ( (vnode_getwithref(coveredvp)) ) {
6772 error = ENOENT;
6773 goto out1;
6774 }
6775 vnode_put(fvp);
6776
6777 fvp = coveredvp;
6778 mntrename = TRUE;
6779 }
6780 /*
6781 * Check for cross-device rename.
6782 */
6783 if ((fvp->v_mount != tdvp->v_mount) ||
6784 (tvp && (fvp->v_mount != tvp->v_mount))) {
6785 error = EXDEV;
6786 goto out1;
6787 }
6788
6789 /*
6790 * If source is the same as the destination (that is the
6791 * same inode number) then there is nothing to do...
6792 * EXCEPT if the underlying file system supports case
6793 * insensitivity and is case preserving. In this case
6794 * the file system needs to handle the special case of
6795 * getting the same vnode as target (fvp) and source (tvp).
6796 *
6797 * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
6798 * and _PC_CASE_PRESERVING can have this exception, and they need to
6799 * handle the special case of getting the same vnode as target and
6800 * source. NOTE: Then the target is unlocked going into vnop_rename,
6801 * so not to cause locking problems. There is a single reference on tvp.
6802 *
6803 * NOTE - that fvp == tvp also occurs if they are hard linked and
6804 * that correct behaviour then is just to return success without doing
6805 * anything.
6806 *
6807 * XXX filesystem should take care of this itself, perhaps...
6808 */
6809 if (fvp == tvp && fdvp == tdvp) {
6810 if (fromnd->ni_cnd.cn_namelen == tond->ni_cnd.cn_namelen &&
6811 !bcmp(fromnd->ni_cnd.cn_nameptr, tond->ni_cnd.cn_nameptr,
6812 fromnd->ni_cnd.cn_namelen)) {
6813 goto out1;
6814 }
6815 }
6816
6817 if (holding_mntlock && fvp->v_mount != locked_mp) {
6818 /*
6819 * we're holding a reference and lock
6820 * on locked_mp, but it no longer matches
6821 * what we want to do... so drop our hold
6822 */
6823 mount_unlock_renames(locked_mp);
6824 mount_drop(locked_mp, 0);
6825 holding_mntlock = 0;
6826 }
6827 if (tdvp != fdvp && fvp->v_type == VDIR) {
6828 /*
6829 * serialize renames that re-shape
6830 * the tree... if holding_mntlock is
6831 * set, then we're ready to go...
6832 * otherwise we
6833 * first need to drop the iocounts
6834 * we picked up, second take the
6835 * lock to serialize the access,
6836 * then finally start the lookup
6837 * process over with the lock held
6838 */
6839 if (!holding_mntlock) {
6840 /*
6841 * need to grab a reference on
6842 * the mount point before we
6843 * drop all the iocounts... once
6844 * the iocounts are gone, the mount
6845 * could follow
6846 */
6847 locked_mp = fvp->v_mount;
6848 mount_ref(locked_mp, 0);
6849
6850 /*
6851 * nameidone has to happen before we vnode_put(tvp)
6852 * since it may need to release the fs_nodelock on the tvp
6853 */
6854 nameidone(tond);
6855
6856 if (tvp)
6857 vnode_put(tvp);
6858 vnode_put(tdvp);
6859
6860 /*
6861 * nameidone has to happen before we vnode_put(fdvp)
6862 * since it may need to release the fs_nodelock on the fvp
6863 */
6864 nameidone(fromnd);
6865
6866 vnode_put(fvp);
6867 vnode_put(fdvp);
6868
6869 mount_lock_renames(locked_mp);
6870 holding_mntlock = 1;
6871
6872 goto retry;
6873 }
6874 } else {
6875 /*
6876 * when we dropped the iocounts to take
6877 * the lock, we allowed the identity of
6878 * the various vnodes to change... if they did,
6879 * we may no longer be dealing with a rename
6880 * that reshapes the tree... once we're holding
6881 * the iocounts, the vnodes can't change type
6882 * so we're free to drop the lock at this point
6883 * and continue on
6884 */
6885 if (holding_mntlock) {
6886 mount_unlock_renames(locked_mp);
6887 mount_drop(locked_mp, 0);
6888 holding_mntlock = 0;
6889 }
6890 }
6891
6892 // save these off so we can later verify that fvp is the same
6893 oname = fvp->v_name;
6894 oparent = fvp->v_parent;
6895
6896 skipped_lookup:
6897 #if CONFIG_FSE
6898 need_event = need_fsevent(FSE_RENAME, fdvp);
6899 if (need_event) {
6900 if (fvp) {
6901 get_fse_info(fvp, &from_finfo, ctx);
6902 } else {
6903 error = vfs_get_notify_attributes(&__rename_data->fv_attr);
6904 if (error) {
6905 goto out1;
6906 }
6907
6908 fvap = &__rename_data->fv_attr;
6909 }
6910
6911 if (tvp) {
6912 get_fse_info(tvp, &to_finfo, ctx);
6913 } else if (batched) {
6914 error = vfs_get_notify_attributes(&__rename_data->tv_attr);
6915 if (error) {
6916 goto out1;
6917 }
6918
6919 tvap = &__rename_data->tv_attr;
6920 }
6921 }
6922 #else
6923 need_event = 0;
6924 #endif /* CONFIG_FSE */
6925
6926 if (need_event || kauth_authorize_fileop_has_listeners()) {
6927 if (from_name == NULL) {
6928 GET_PATH(from_name);
6929 if (from_name == NULL) {
6930 error = ENOMEM;
6931 goto out1;
6932 }
6933 }
6934
6935 from_len = safe_getpath(fdvp, fromnd->ni_cnd.cn_nameptr, from_name, MAXPATHLEN, &from_truncated);
6936
6937 if (to_name == NULL) {
6938 GET_PATH(to_name);
6939 if (to_name == NULL) {
6940 error = ENOMEM;
6941 goto out1;
6942 }
6943 }
6944
6945 to_len = safe_getpath(tdvp, tond->ni_cnd.cn_nameptr, to_name, MAXPATHLEN, &to_truncated);
6946 }
6947 #if CONFIG_SECLUDED_RENAME
6948 if (flags & VFS_SECLUDE_RENAME) {
6949 fromnd->ni_cnd.cn_flags |= CN_SECLUDE_RENAME;
6950 }
6951 #else
6952 #pragma unused(flags)
6953 #endif
6954 error = vn_rename(fdvp, &fvp, &fromnd->ni_cnd, fvap,
6955 tdvp, &tvp, &tond->ni_cnd, tvap,
6956 0, ctx);
6957
6958 if (holding_mntlock) {
6959 /*
6960 * we can drop our serialization
6961 * lock now
6962 */
6963 mount_unlock_renames(locked_mp);
6964 mount_drop(locked_mp, 0);
6965 holding_mntlock = 0;
6966 }
6967 if (error) {
6968 if (error == EKEEPLOOKING) {
6969 if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) == 0) {
6970 if ((tond->ni_flag & NAMEI_CONTLOOKUP) == 0) {
6971 panic("EKEEPLOOKING without NAMEI_CONTLOOKUP on either ndp?");
6972 }
6973 }
6974
6975 fromnd->ni_vp = fvp;
6976 tond->ni_vp = tvp;
6977
6978 goto continue_lookup;
6979 }
6980
6981 /*
6982 * We may encounter a race in the VNOP where the destination didn't
6983 * exist when we did the namei, but it does by the time we go and
6984 * try to create the entry. In this case, we should re-drive this rename
6985 * call from the top again. Currently, only HFS bubbles out ERECYCLE,
6986 * but other filesystems susceptible to this race could return it, too.
6987 */
6988 if (error == ERECYCLE) {
6989 do_retry = 1;
6990 }
6991
6992 /*
6993 * For compound VNOPs, the authorization callback may return
6994 * ENOENT in case of racing hardlink lookups hitting the name
6995 * cache, redrive the lookup.
6996 */
6997 if (batched && error == ENOENT &&
6998 retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
6999 do_retry = 1;
7000 retry_count += 1;
7001 }
7002
7003 goto out1;
7004 }
7005
7006 /* call out to allow 3rd party notification of rename.
7007 * Ignore result of kauth_authorize_fileop call.
7008 */
7009 kauth_authorize_fileop(vfs_context_ucred(ctx),
7010 KAUTH_FILEOP_RENAME,
7011 (uintptr_t)from_name, (uintptr_t)to_name);
7012
7013 #if CONFIG_FSE
7014 if (from_name != NULL && to_name != NULL) {
7015 if (from_truncated || to_truncated) {
7016 // set it here since only the from_finfo gets reported up to user space
7017 from_finfo.mode |= FSE_TRUNCATED_PATH;
7018 }
7019
7020 if (tvap && tvp) {
7021 vnode_get_fse_info_from_vap(tvp, &to_finfo, tvap);
7022 }
7023 if (fvap) {
7024 vnode_get_fse_info_from_vap(fvp, &from_finfo, fvap);
7025 }
7026
7027 if (tvp) {
7028 add_fsevent(FSE_RENAME, ctx,
7029 FSE_ARG_STRING, from_len, from_name,
7030 FSE_ARG_FINFO, &from_finfo,
7031 FSE_ARG_STRING, to_len, to_name,
7032 FSE_ARG_FINFO, &to_finfo,
7033 FSE_ARG_DONE);
7034 } else {
7035 add_fsevent(FSE_RENAME, ctx,
7036 FSE_ARG_STRING, from_len, from_name,
7037 FSE_ARG_FINFO, &from_finfo,
7038 FSE_ARG_STRING, to_len, to_name,
7039 FSE_ARG_DONE);
7040 }
7041 }
7042 #endif /* CONFIG_FSE */
7043
7044 /*
7045 * update filesystem's mount point data
7046 */
7047 if (mntrename) {
7048 char *cp, *pathend, *mpname;
7049 char * tobuf;
7050 struct mount *mp;
7051 int maxlen;
7052 size_t len = 0;
7053
7054 mp = fvp->v_mountedhere;
7055
7056 if (vfs_busy(mp, LK_NOWAIT)) {
7057 error = EBUSY;
7058 goto out1;
7059 }
7060 MALLOC_ZONE(tobuf, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
7061
7062 if (UIO_SEG_IS_USER_SPACE(segflg))
7063 error = copyinstr(to, tobuf, MAXPATHLEN, &len);
7064 else
7065 error = copystr((void *)to, tobuf, MAXPATHLEN, &len);
7066 if (!error) {
7067 /* find current mount point prefix */
7068 pathend = &mp->mnt_vfsstat.f_mntonname[0];
7069 for (cp = pathend; *cp != '\0'; ++cp) {
7070 if (*cp == '/')
7071 pathend = cp + 1;
7072 }
7073 /* find last component of target name */
7074 for (mpname = cp = tobuf; *cp != '\0'; ++cp) {
7075 if (*cp == '/')
7076 mpname = cp + 1;
7077 }
7078 /* append name to prefix */
7079 maxlen = MAXPATHLEN - (pathend - mp->mnt_vfsstat.f_mntonname);
7080 bzero(pathend, maxlen);
7081 strlcpy(pathend, mpname, maxlen);
7082 }
7083 FREE_ZONE(tobuf, MAXPATHLEN, M_NAMEI);
7084
7085 vfs_unbusy(mp);
7086 }
7087 /*
7088 * fix up name & parent pointers. note that we first
7089 * check that fvp has the same name/parent pointers it
7090 * had before the rename call... this is a 'weak' check
7091 * at best...
7092 *
7093 * XXX oparent and oname may not be set in the compound vnop case
7094 */
7095 if (batched || (oname == fvp->v_name && oparent == fvp->v_parent)) {
7096 int update_flags;
7097
7098 update_flags = VNODE_UPDATE_NAME;
7099
7100 if (fdvp != tdvp)
7101 update_flags |= VNODE_UPDATE_PARENT;
7102
7103 vnode_update_identity(fvp, tdvp, tond->ni_cnd.cn_nameptr, tond->ni_cnd.cn_namelen, tond->ni_cnd.cn_hash, update_flags);
7104 }
7105 out1:
7106 if (to_name != NULL) {
7107 RELEASE_PATH(to_name);
7108 to_name = NULL;
7109 }
7110 if (from_name != NULL) {
7111 RELEASE_PATH(from_name);
7112 from_name = NULL;
7113 }
7114 if (holding_mntlock) {
7115 mount_unlock_renames(locked_mp);
7116 mount_drop(locked_mp, 0);
7117 holding_mntlock = 0;
7118 }
7119 if (tdvp) {
7120 /*
7121 * nameidone has to happen before we vnode_put(tdvp)
7122 * since it may need to release the fs_nodelock on the tdvp
7123 */
7124 nameidone(tond);
7125
7126 if (tvp)
7127 vnode_put(tvp);
7128 vnode_put(tdvp);
7129 }
7130 if (fdvp) {
7131 /*
7132 * nameidone has to happen before we vnode_put(fdvp)
7133 * since it may need to release the fs_nodelock on the fdvp
7134 */
7135 nameidone(fromnd);
7136
7137 if (fvp)
7138 vnode_put(fvp);
7139 vnode_put(fdvp);
7140 }
7141
7142 /*
7143 * If things changed after we did the namei, then we will re-drive
7144 * this rename call from the top.
7145 */
7146 if (do_retry) {
7147 do_retry = 0;
7148 goto retry;
7149 }
7150
7151 FREE(__rename_data, M_TEMP);
7152 return (error);
7153 }
7154
7155 int
7156 rename(__unused proc_t p, struct rename_args *uap, __unused int32_t *retval)
7157 {
7158 return (renameat_internal(vfs_context_current(), AT_FDCWD, uap->from,
7159 AT_FDCWD, uap->to, UIO_USERSPACE, 0));
7160 }
7161
7162 #if CONFIG_SECLUDED_RENAME
7163 int rename_ext(__unused proc_t p, struct rename_ext_args *uap, __unused int32_t *retval)
7164 {
7165 return renameat_internal(
7166 vfs_context_current(),
7167 AT_FDCWD, uap->from,
7168 AT_FDCWD, uap->to,
7169 UIO_USERSPACE, uap->flags);
7170 }
7171 #endif
7172
7173 int
7174 renameat(__unused proc_t p, struct renameat_args *uap, __unused int32_t *retval)
7175 {
7176 return (renameat_internal(vfs_context_current(), uap->fromfd, uap->from,
7177 uap->tofd, uap->to, UIO_USERSPACE, 0));
7178 }
7179
7180 /*
7181 * Make a directory file.
7182 *
7183 * Returns: 0 Success
7184 * EEXIST
7185 * namei:???
7186 * vnode_authorize:???
7187 * vn_create:???
7188 */
7189 /* ARGSUSED */
7190 static int
7191 mkdir1at(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap, int fd,
7192 enum uio_seg segflg)
7193 {
7194 vnode_t vp, dvp;
7195 int error;
7196 int update_flags = 0;
7197 int batched;
7198 struct nameidata nd;
7199
7200 AUDIT_ARG(mode, vap->va_mode);
7201 NDINIT(&nd, CREATE, OP_MKDIR, LOCKPARENT | AUDITVNPATH1, segflg,
7202 path, ctx);
7203 nd.ni_cnd.cn_flags |= WILLBEDIR;
7204 nd.ni_flag = NAMEI_COMPOUNDMKDIR;
7205
7206 continue_lookup:
7207 error = nameiat(&nd, fd);
7208 if (error)
7209 return (error);
7210 dvp = nd.ni_dvp;
7211 vp = nd.ni_vp;
7212
7213 if (vp != NULL) {
7214 error = EEXIST;
7215 goto out;
7216 }
7217
7218 batched = vnode_compound_mkdir_available(dvp);
7219
7220 VATTR_SET(vap, va_type, VDIR);
7221
7222 /*
7223 * XXX
7224 * Don't authorize in VFS for compound VNOP.... mkdir -p today assumes that it will
7225 * only get EXISTS or EISDIR for existing path components, and not that it could see
7226 * EACCESS/EPERM--so if we authorize for mkdir on "/" for "mkdir -p /tmp/foo/bar/baz"
7227 * it will fail in a spurious manner. Need to figure out if this is valid behavior.
7228 */
7229 if ((error = vn_authorize_mkdir(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0) {
7230 if (error == EACCES || error == EPERM) {
7231 int error2;
7232
7233 nameidone(&nd);
7234 vnode_put(dvp);
7235 dvp = NULLVP;
7236
7237 /*
7238 * Try a lookup without "NAMEI_COMPOUNDVNOP" to make sure we return EEXIST
7239 * rather than EACCESS if the target exists.
7240 */
7241 NDINIT(&nd, LOOKUP, OP_MKDIR, AUDITVNPATH1, segflg,
7242 path, ctx);
7243 error2 = nameiat(&nd, fd);
7244 if (error2) {
7245 goto out;
7246 } else {
7247 vp = nd.ni_vp;
7248 error = EEXIST;
7249 goto out;
7250 }
7251 }
7252
7253 goto out;
7254 }
7255
7256 /*
7257 * make the directory
7258 */
7259 if ((error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx)) != 0) {
7260 if (error == EKEEPLOOKING) {
7261 nd.ni_vp = vp;
7262 goto continue_lookup;
7263 }
7264
7265 goto out;
7266 }
7267
7268 // Make sure the name & parent pointers are hooked up
7269 if (vp->v_name == NULL)
7270 update_flags |= VNODE_UPDATE_NAME;
7271 if (vp->v_parent == NULLVP)
7272 update_flags |= VNODE_UPDATE_PARENT;
7273
7274 if (update_flags)
7275 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
7276
7277 #if CONFIG_FSE
7278 add_fsevent(FSE_CREATE_DIR, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE);
7279 #endif
7280
7281 out:
7282 /*
7283 * nameidone has to happen before we vnode_put(dvp)
7284 * since it may need to release the fs_nodelock on the dvp
7285 */
7286 nameidone(&nd);
7287
7288 if (vp)
7289 vnode_put(vp);
7290 if (dvp)
7291 vnode_put(dvp);
7292
7293 return (error);
7294 }
7295
7296 /*
7297 * mkdir_extended: Create a directory; with extended security (ACL).
7298 *
7299 * Parameters: p Process requesting to create the directory
7300 * uap User argument descriptor (see below)
7301 * retval (ignored)
7302 *
7303 * Indirect: uap->path Path of directory to create
7304 * uap->mode Access permissions to set
7305 * uap->xsecurity ACL to set
7306 *
7307 * Returns: 0 Success
7308 * !0 Not success
7309 *
7310 */
7311 int
7312 mkdir_extended(proc_t p, struct mkdir_extended_args *uap, __unused int32_t *retval)
7313 {
7314 int ciferror;
7315 kauth_filesec_t xsecdst;
7316 struct vnode_attr va;
7317
7318 AUDIT_ARG(owner, uap->uid, uap->gid);
7319
7320 xsecdst = NULL;
7321 if ((uap->xsecurity != USER_ADDR_NULL) &&
7322 ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
7323 return ciferror;
7324
7325 VATTR_INIT(&va);
7326 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
7327 if (xsecdst != NULL)
7328 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
7329
7330 ciferror = mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD,
7331 UIO_USERSPACE);
7332 if (xsecdst != NULL)
7333 kauth_filesec_free(xsecdst);
7334 return ciferror;
7335 }
7336
7337 int
7338 mkdir(proc_t p, struct mkdir_args *uap, __unused int32_t *retval)
7339 {
7340 struct vnode_attr va;
7341
7342 VATTR_INIT(&va);
7343 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
7344
7345 return (mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD,
7346 UIO_USERSPACE));
7347 }
7348
7349 int
7350 mkdirat(proc_t p, struct mkdirat_args *uap, __unused int32_t *retval)
7351 {
7352 struct vnode_attr va;
7353
7354 VATTR_INIT(&va);
7355 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
7356
7357 return(mkdir1at(vfs_context_current(), uap->path, &va, uap->fd,
7358 UIO_USERSPACE));
7359 }
7360
7361 static int
7362 rmdirat_internal(vfs_context_t ctx, int fd, user_addr_t dirpath,
7363 enum uio_seg segflg)
7364 {
7365 vnode_t vp, dvp;
7366 int error;
7367 struct nameidata nd;
7368 char *path = NULL;
7369 int len=0;
7370 int has_listeners = 0;
7371 int need_event = 0;
7372 int truncated = 0;
7373 #if CONFIG_FSE
7374 struct vnode_attr va;
7375 #endif /* CONFIG_FSE */
7376 struct vnode_attr *vap = NULL;
7377 int restart_count = 0;
7378 int batched;
7379
7380 int restart_flag;
7381
7382 /*
7383 * This loop exists to restart rmdir in the unlikely case that two
7384 * processes are simultaneously trying to remove the same directory
7385 * containing orphaned appleDouble files.
7386 */
7387 do {
7388 NDINIT(&nd, DELETE, OP_RMDIR, LOCKPARENT | AUDITVNPATH1,
7389 segflg, dirpath, ctx);
7390 nd.ni_flag = NAMEI_COMPOUNDRMDIR;
7391 continue_lookup:
7392 restart_flag = 0;
7393 vap = NULL;
7394
7395 error = nameiat(&nd, fd);
7396 if (error)
7397 return (error);
7398
7399 dvp = nd.ni_dvp;
7400 vp = nd.ni_vp;
7401
7402 if (vp) {
7403 batched = vnode_compound_rmdir_available(vp);
7404
7405 if (vp->v_flag & VROOT) {
7406 /*
7407 * The root of a mounted filesystem cannot be deleted.
7408 */
7409 error = EBUSY;
7410 goto out;
7411 }
7412
7413 /*
7414 * Removed a check here; we used to abort if vp's vid
7415 * was not the same as what we'd seen the last time around.
7416 * I do not think that check was valid, because if we retry
7417 * and all dirents are gone, the directory could legitimately
7418 * be recycled but still be present in a situation where we would
7419 * have had permission to delete. Therefore, we won't make
7420 * an effort to preserve that check now that we may not have a
7421 * vp here.
7422 */
7423
7424 if (!batched) {
7425 error = vn_authorize_rmdir(dvp, vp, &nd.ni_cnd, ctx, NULL);
7426 if (error) {
7427 if (error == ENOENT &&
7428 restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
7429 restart_flag = 1;
7430 restart_count += 1;
7431 }
7432 goto out;
7433 }
7434 }
7435 } else {
7436 batched = 1;
7437
7438 if (!vnode_compound_rmdir_available(dvp)) {
7439 panic("No error, but no compound rmdir?");
7440 }
7441 }
7442
7443 #if CONFIG_FSE
7444 fse_info finfo;
7445
7446 need_event = need_fsevent(FSE_DELETE, dvp);
7447 if (need_event) {
7448 if (!batched) {
7449 get_fse_info(vp, &finfo, ctx);
7450 } else {
7451 error = vfs_get_notify_attributes(&va);
7452 if (error) {
7453 goto out;
7454 }
7455
7456 vap = &va;
7457 }
7458 }
7459 #endif
7460 has_listeners = kauth_authorize_fileop_has_listeners();
7461 if (need_event || has_listeners) {
7462 if (path == NULL) {
7463 GET_PATH(path);
7464 if (path == NULL) {
7465 error = ENOMEM;
7466 goto out;
7467 }
7468 }
7469
7470 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated);
7471 #if CONFIG_FSE
7472 if (truncated) {
7473 finfo.mode |= FSE_TRUNCATED_PATH;
7474 }
7475 #endif
7476 }
7477
7478 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
7479 nd.ni_vp = vp;
7480 if (vp == NULLVP) {
7481 /* Couldn't find a vnode */
7482 goto out;
7483 }
7484
7485 if (error == EKEEPLOOKING) {
7486 goto continue_lookup;
7487 } else if (batched && error == ENOENT &&
7488 restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
7489 /*
7490 * For compound VNOPs, the authorization callback
7491 * may return ENOENT in case of racing hard link lookups
7492 * redrive the lookup.
7493 */
7494 restart_flag = 1;
7495 restart_count += 1;
7496 goto out;
7497 }
7498 #if CONFIG_APPLEDOUBLE
7499 /*
7500 * Special case to remove orphaned AppleDouble
7501 * files. I don't like putting this in the kernel,
7502 * but carbon does not like putting this in carbon either,
7503 * so here we are.
7504 */
7505 if (error == ENOTEMPTY) {
7506 error = rmdir_remove_orphaned_appleDouble(vp, ctx, &restart_flag);
7507 if (error == EBUSY) {
7508 goto out;
7509 }
7510
7511
7512 /*
7513 * Assuming everything went well, we will try the RMDIR again
7514 */
7515 if (!error)
7516 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
7517 }
7518 #endif /* CONFIG_APPLEDOUBLE */
7519 /*
7520 * Call out to allow 3rd party notification of delete.
7521 * Ignore result of kauth_authorize_fileop call.
7522 */
7523 if (!error) {
7524 if (has_listeners) {
7525 kauth_authorize_fileop(vfs_context_ucred(ctx),
7526 KAUTH_FILEOP_DELETE,
7527 (uintptr_t)vp,
7528 (uintptr_t)path);
7529 }
7530
7531 if (vp->v_flag & VISHARDLINK) {
7532 // see the comment in unlink1() about why we update
7533 // the parent of a hard link when it is removed
7534 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
7535 }
7536
7537 #if CONFIG_FSE
7538 if (need_event) {
7539 if (vap) {
7540 vnode_get_fse_info_from_vap(vp, &finfo, vap);
7541 }
7542 add_fsevent(FSE_DELETE, ctx,
7543 FSE_ARG_STRING, len, path,
7544 FSE_ARG_FINFO, &finfo,
7545 FSE_ARG_DONE);
7546 }
7547 #endif
7548 }
7549
7550 out:
7551 if (path != NULL) {
7552 RELEASE_PATH(path);
7553 path = NULL;
7554 }
7555 /*
7556 * nameidone has to happen before we vnode_put(dvp)
7557 * since it may need to release the fs_nodelock on the dvp
7558 */
7559 nameidone(&nd);
7560 vnode_put(dvp);
7561
7562 if (vp)
7563 vnode_put(vp);
7564
7565 if (restart_flag == 0) {
7566 wakeup_one((caddr_t)vp);
7567 return (error);
7568 }
7569 tsleep(vp, PVFS, "rm AD", 1);
7570
7571 } while (restart_flag != 0);
7572
7573 return (error);
7574
7575 }
7576
7577 /*
7578 * Remove a directory file.
7579 */
7580 /* ARGSUSED */
7581 int
7582 rmdir(__unused proc_t p, struct rmdir_args *uap, __unused int32_t *retval)
7583 {
7584 return (rmdirat_internal(vfs_context_current(), AT_FDCWD,
7585 CAST_USER_ADDR_T(uap->path), UIO_USERSPACE));
7586 }
7587
7588 /* Get direntry length padded to 8 byte alignment */
7589 #define DIRENT64_LEN(namlen) \
7590 ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
7591
7592 errno_t
7593 vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag,
7594 int *numdirent, vfs_context_t ctxp)
7595 {
7596 /* Check if fs natively supports VNODE_READDIR_EXTENDED */
7597 if ((vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED) &&
7598 ((vp->v_mount->mnt_kern_flag & MNTK_DENY_READDIREXT) == 0)) {
7599 return VNOP_READDIR(vp, uio, flags, eofflag, numdirent, ctxp);
7600 } else {
7601 size_t bufsize;
7602 void * bufptr;
7603 uio_t auio;
7604 struct direntry *entry64;
7605 struct dirent *dep;
7606 int bytesread;
7607 int error;
7608
7609 /*
7610 * Our kernel buffer needs to be smaller since re-packing
7611 * will expand each dirent. The worse case (when the name
7612 * length is 3) corresponds to a struct direntry size of 32
7613 * bytes (8-byte aligned) and a struct dirent size of 12 bytes
7614 * (4-byte aligned). So having a buffer that is 3/8 the size
7615 * will prevent us from reading more than we can pack.
7616 *
7617 * Since this buffer is wired memory, we will limit the
7618 * buffer size to a maximum of 32K. We would really like to
7619 * use 32K in the MIN(), but we use magic number 87371 to
7620 * prevent uio_resid() * 3 / 8 from overflowing.
7621 */
7622 bufsize = 3 * MIN((user_size_t)uio_resid(uio), 87371u) / 8;
7623 MALLOC(bufptr, void *, bufsize, M_TEMP, M_WAITOK);
7624 if (bufptr == NULL) {
7625 return ENOMEM;
7626 }
7627
7628 auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
7629 uio_addiov(auio, (uintptr_t)bufptr, bufsize);
7630 auio->uio_offset = uio->uio_offset;
7631
7632 error = VNOP_READDIR(vp, auio, 0, eofflag, numdirent, ctxp);
7633
7634 dep = (struct dirent *)bufptr;
7635 bytesread = bufsize - uio_resid(auio);
7636
7637 MALLOC(entry64, struct direntry *, sizeof(struct direntry),
7638 M_TEMP, M_WAITOK);
7639 /*
7640 * Convert all the entries and copy them out to user's buffer.
7641 */
7642 while (error == 0 && (char *)dep < ((char *)bufptr + bytesread)) {
7643 size_t enbufsize = DIRENT64_LEN(dep->d_namlen);
7644
7645 bzero(entry64, enbufsize);
7646 /* Convert a dirent to a dirent64. */
7647 entry64->d_ino = dep->d_ino;
7648 entry64->d_seekoff = 0;
7649 entry64->d_reclen = enbufsize;
7650 entry64->d_namlen = dep->d_namlen;
7651 entry64->d_type = dep->d_type;
7652 bcopy(dep->d_name, entry64->d_name, dep->d_namlen + 1);
7653
7654 /* Move to next entry. */
7655 dep = (struct dirent *)((char *)dep + dep->d_reclen);
7656
7657 /* Copy entry64 to user's buffer. */
7658 error = uiomove((caddr_t)entry64, entry64->d_reclen, uio);
7659 }
7660
7661 /* Update the real offset using the offset we got from VNOP_READDIR. */
7662 if (error == 0) {
7663 uio->uio_offset = auio->uio_offset;
7664 }
7665 uio_free(auio);
7666 FREE(bufptr, M_TEMP);
7667 FREE(entry64, M_TEMP);
7668 return (error);
7669 }
7670 }
7671
7672 #define GETDIRENTRIES_MAXBUFSIZE (128 * 1024 * 1024U)
7673
7674 /*
7675 * Read a block of directory entries in a file system independent format.
7676 */
7677 static int
7678 getdirentries_common(int fd, user_addr_t bufp, user_size_t bufsize, ssize_t *bytesread,
7679 off_t *offset, int flags)
7680 {
7681 vnode_t vp;
7682 struct vfs_context context = *vfs_context_current(); /* local copy */
7683 struct fileproc *fp;
7684 uio_t auio;
7685 int spacetype = proc_is64bit(vfs_context_proc(&context)) ? UIO_USERSPACE64 : UIO_USERSPACE32;
7686 off_t loff;
7687 int error, eofflag, numdirent;
7688 char uio_buf[ UIO_SIZEOF(1) ];
7689
7690 error = fp_getfvp(vfs_context_proc(&context), fd, &fp, &vp);
7691 if (error) {
7692 return (error);
7693 }
7694 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
7695 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
7696 error = EBADF;
7697 goto out;
7698 }
7699
7700 if (bufsize > GETDIRENTRIES_MAXBUFSIZE)
7701 bufsize = GETDIRENTRIES_MAXBUFSIZE;
7702
7703 #if CONFIG_MACF
7704 error = mac_file_check_change_offset(vfs_context_ucred(&context), fp->f_fglob);
7705 if (error)
7706 goto out;
7707 #endif
7708 if ( (error = vnode_getwithref(vp)) ) {
7709 goto out;
7710 }
7711 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
7712
7713 unionread:
7714 if (vp->v_type != VDIR) {
7715 (void)vnode_put(vp);
7716 error = EINVAL;
7717 goto out;
7718 }
7719
7720 #if CONFIG_MACF
7721 error = mac_vnode_check_readdir(&context, vp);
7722 if (error != 0) {
7723 (void)vnode_put(vp);
7724 goto out;
7725 }
7726 #endif /* MAC */
7727
7728 loff = fp->f_fglob->fg_offset;
7729 auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
7730 uio_addiov(auio, bufp, bufsize);
7731
7732 if (flags & VNODE_READDIR_EXTENDED) {
7733 error = vnode_readdir64(vp, auio, flags, &eofflag, &numdirent, &context);
7734 fp->f_fglob->fg_offset = uio_offset(auio);
7735 } else {
7736 error = VNOP_READDIR(vp, auio, 0, &eofflag, &numdirent, &context);
7737 fp->f_fglob->fg_offset = uio_offset(auio);
7738 }
7739 if (error) {
7740 (void)vnode_put(vp);
7741 goto out;
7742 }
7743
7744 if ((user_ssize_t)bufsize == uio_resid(auio)){
7745 if (union_dircheckp) {
7746 error = union_dircheckp(&vp, fp, &context);
7747 if (error == -1)
7748 goto unionread;
7749 if (error)
7750 goto out;
7751 }
7752
7753 if ((vp->v_mount->mnt_flag & MNT_UNION)) {
7754 struct vnode *tvp = vp;
7755 if (lookup_traverse_union(tvp, &vp, &context) == 0) {
7756 vnode_ref(vp);
7757 fp->f_fglob->fg_data = (caddr_t) vp;
7758 fp->f_fglob->fg_offset = 0;
7759 vnode_rele(tvp);
7760 vnode_put(tvp);
7761 goto unionread;
7762 }
7763 vp = tvp;
7764 }
7765 }
7766
7767 vnode_put(vp);
7768 if (offset) {
7769 *offset = loff;
7770 }
7771
7772 *bytesread = bufsize - uio_resid(auio);
7773 out:
7774 file_drop(fd);
7775 return (error);
7776 }
7777
7778
7779 int
7780 getdirentries(__unused struct proc *p, struct getdirentries_args *uap, int32_t *retval)
7781 {
7782 off_t offset;
7783 ssize_t bytesread;
7784 int error;
7785
7786 AUDIT_ARG(fd, uap->fd);
7787 error = getdirentries_common(uap->fd, uap->buf, uap->count, &bytesread, &offset, 0);
7788
7789 if (error == 0) {
7790 if (proc_is64bit(p)) {
7791 user64_long_t base = (user64_long_t)offset;
7792 error = copyout((caddr_t)&base, uap->basep, sizeof(user64_long_t));
7793 } else {
7794 user32_long_t base = (user32_long_t)offset;
7795 error = copyout((caddr_t)&base, uap->basep, sizeof(user32_long_t));
7796 }
7797 *retval = bytesread;
7798 }
7799 return (error);
7800 }
7801
7802 int
7803 getdirentries64(__unused struct proc *p, struct getdirentries64_args *uap, user_ssize_t *retval)
7804 {
7805 off_t offset;
7806 ssize_t bytesread;
7807 int error;
7808
7809 AUDIT_ARG(fd, uap->fd);
7810 error = getdirentries_common(uap->fd, uap->buf, uap->bufsize, &bytesread, &offset, VNODE_READDIR_EXTENDED);
7811
7812 if (error == 0) {
7813 *retval = bytesread;
7814 error = copyout((caddr_t)&offset, uap->position, sizeof(off_t));
7815 }
7816 return (error);
7817 }
7818
7819
7820 /*
7821 * Set the mode mask for creation of filesystem nodes.
7822 * XXX implement xsecurity
7823 */
7824 #define UMASK_NOXSECURITY (void *)1 /* leave existing xsecurity alone */
7825 static int
7826 umask1(proc_t p, int newmask, __unused kauth_filesec_t fsec, int32_t *retval)
7827 {
7828 struct filedesc *fdp;
7829
7830 AUDIT_ARG(mask, newmask);
7831 proc_fdlock(p);
7832 fdp = p->p_fd;
7833 *retval = fdp->fd_cmask;
7834 fdp->fd_cmask = newmask & ALLPERMS;
7835 proc_fdunlock(p);
7836 return (0);
7837 }
7838
7839 /*
7840 * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL).
7841 *
7842 * Parameters: p Process requesting to set the umask
7843 * uap User argument descriptor (see below)
7844 * retval umask of the process (parameter p)
7845 *
7846 * Indirect: uap->newmask umask to set
7847 * uap->xsecurity ACL to set
7848 *
7849 * Returns: 0 Success
7850 * !0 Not success
7851 *
7852 */
7853 int
7854 umask_extended(proc_t p, struct umask_extended_args *uap, int32_t *retval)
7855 {
7856 int ciferror;
7857 kauth_filesec_t xsecdst;
7858
7859 xsecdst = KAUTH_FILESEC_NONE;
7860 if (uap->xsecurity != USER_ADDR_NULL) {
7861 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
7862 return ciferror;
7863 } else {
7864 xsecdst = KAUTH_FILESEC_NONE;
7865 }
7866
7867 ciferror = umask1(p, uap->newmask, xsecdst, retval);
7868
7869 if (xsecdst != KAUTH_FILESEC_NONE)
7870 kauth_filesec_free(xsecdst);
7871 return ciferror;
7872 }
7873
7874 int
7875 umask(proc_t p, struct umask_args *uap, int32_t *retval)
7876 {
7877 return(umask1(p, uap->newmask, UMASK_NOXSECURITY, retval));
7878 }
7879
7880 /*
7881 * Void all references to file by ripping underlying filesystem
7882 * away from vnode.
7883 */
7884 /* ARGSUSED */
7885 int
7886 revoke(proc_t p, struct revoke_args *uap, __unused int32_t *retval)
7887 {
7888 vnode_t vp;
7889 struct vnode_attr va;
7890 vfs_context_t ctx = vfs_context_current();
7891 int error;
7892 struct nameidata nd;
7893
7894 NDINIT(&nd, LOOKUP, OP_REVOKE, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
7895 uap->path, ctx);
7896 error = namei(&nd);
7897 if (error)
7898 return (error);
7899 vp = nd.ni_vp;
7900
7901 nameidone(&nd);
7902
7903 if (!(vnode_ischr(vp) || vnode_isblk(vp))) {
7904 error = ENOTSUP;
7905 goto out;
7906 }
7907
7908 if (vnode_isblk(vp) && vnode_ismountedon(vp)) {
7909 error = EBUSY;
7910 goto out;
7911 }
7912
7913 #if CONFIG_MACF
7914 error = mac_vnode_check_revoke(ctx, vp);
7915 if (error)
7916 goto out;
7917 #endif
7918
7919 VATTR_INIT(&va);
7920 VATTR_WANTED(&va, va_uid);
7921 if ((error = vnode_getattr(vp, &va, ctx)))
7922 goto out;
7923 if (kauth_cred_getuid(vfs_context_ucred(ctx)) != va.va_uid &&
7924 (error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
7925 goto out;
7926 if (vp->v_usecount > 0 || (vnode_isaliased(vp)))
7927 VNOP_REVOKE(vp, REVOKEALL, ctx);
7928 out:
7929 vnode_put(vp);
7930 return (error);
7931 }
7932
7933
7934 /*
7935 * HFS/HFS PlUS SPECIFIC SYSTEM CALLS
7936 * The following system calls are designed to support features
7937 * which are specific to the HFS & HFS Plus volume formats
7938 */
7939
7940
7941 /*
7942 * Obtain attribute information on objects in a directory while enumerating
7943 * the directory.
7944 */
7945 /* ARGSUSED */
7946 int
7947 getdirentriesattr (proc_t p, struct getdirentriesattr_args *uap, int32_t *retval)
7948 {
7949 vnode_t vp;
7950 struct fileproc *fp;
7951 uio_t auio = NULL;
7952 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
7953 uint32_t count, savecount;
7954 uint32_t newstate;
7955 int error, eofflag;
7956 uint32_t loff;
7957 struct attrlist attributelist;
7958 vfs_context_t ctx = vfs_context_current();
7959 int fd = uap->fd;
7960 char uio_buf[ UIO_SIZEOF(1) ];
7961 kauth_action_t action;
7962
7963 AUDIT_ARG(fd, fd);
7964
7965 /* Get the attributes into kernel space */
7966 if ((error = copyin(uap->alist, (caddr_t)&attributelist, sizeof(attributelist)))) {
7967 return(error);
7968 }
7969 if ((error = copyin(uap->count, (caddr_t)&count, sizeof(count)))) {
7970 return(error);
7971 }
7972 savecount = count;
7973 if ( (error = fp_getfvp(p, fd, &fp, &vp)) ) {
7974 return (error);
7975 }
7976 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
7977 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
7978 error = EBADF;
7979 goto out;
7980 }
7981
7982
7983 #if CONFIG_MACF
7984 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
7985 fp->f_fglob);
7986 if (error)
7987 goto out;
7988 #endif
7989
7990
7991 if ( (error = vnode_getwithref(vp)) )
7992 goto out;
7993
7994 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
7995
7996 unionread:
7997 if (vp->v_type != VDIR) {
7998 (void)vnode_put(vp);
7999 error = EINVAL;
8000 goto out;
8001 }
8002
8003 #if CONFIG_MACF
8004 error = mac_vnode_check_readdir(ctx, vp);
8005 if (error != 0) {
8006 (void)vnode_put(vp);
8007 goto out;
8008 }
8009 #endif /* MAC */
8010
8011 /* set up the uio structure which will contain the users return buffer */
8012 loff = fp->f_fglob->fg_offset;
8013 auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
8014 uio_addiov(auio, uap->buffer, uap->buffersize);
8015
8016 /*
8017 * If the only item requested is file names, we can let that past with
8018 * just LIST_DIRECTORY. If they want any other attributes, that means
8019 * they need SEARCH as well.
8020 */
8021 action = KAUTH_VNODE_LIST_DIRECTORY;
8022 if ((attributelist.commonattr & ~ATTR_CMN_NAME) ||
8023 attributelist.fileattr || attributelist.dirattr)
8024 action |= KAUTH_VNODE_SEARCH;
8025
8026 if ((error = vnode_authorize(vp, NULL, action, ctx)) == 0) {
8027
8028 /* Believe it or not, uap->options only has 32-bits of valid
8029 * info, so truncate before extending again */
8030
8031 error = VNOP_READDIRATTR(vp, &attributelist, auio, count,
8032 (u_long)(uint32_t)uap->options, &newstate, &eofflag, &count, ctx);
8033 }
8034
8035 if (error) {
8036 (void) vnode_put(vp);
8037 goto out;
8038 }
8039
8040 /*
8041 * If we've got the last entry of a directory in a union mount
8042 * then reset the eofflag and pretend there's still more to come.
8043 * The next call will again set eofflag and the buffer will be empty,
8044 * so traverse to the underlying directory and do the directory
8045 * read there.
8046 */
8047 if (eofflag && vp->v_mount->mnt_flag & MNT_UNION) {
8048 if (uio_resid(auio) < (user_ssize_t) uap->buffersize) { // Got some entries
8049 eofflag = 0;
8050 } else { // Empty buffer
8051 struct vnode *tvp = vp;
8052 if (lookup_traverse_union(tvp, &vp, ctx) == 0) {
8053 vnode_ref_ext(vp, fp->f_fglob->fg_flag & O_EVTONLY, 0);
8054 fp->f_fglob->fg_data = (caddr_t) vp;
8055 fp->f_fglob->fg_offset = 0; // reset index for new dir
8056 count = savecount;
8057 vnode_rele_internal(tvp, fp->f_fglob->fg_flag & O_EVTONLY, 0, 0);
8058 vnode_put(tvp);
8059 goto unionread;
8060 }
8061 vp = tvp;
8062 }
8063 }
8064
8065 (void)vnode_put(vp);
8066
8067 if (error)
8068 goto out;
8069 fp->f_fglob->fg_offset = uio_offset(auio); /* should be multiple of dirent, not variable */
8070
8071 if ((error = copyout((caddr_t) &count, uap->count, sizeof(count))))
8072 goto out;
8073 if ((error = copyout((caddr_t) &newstate, uap->newstate, sizeof(newstate))))
8074 goto out;
8075 if ((error = copyout((caddr_t) &loff, uap->basep, sizeof(loff))))
8076 goto out;
8077
8078 *retval = eofflag; /* similar to getdirentries */
8079 error = 0;
8080 out:
8081 file_drop(fd);
8082 return (error); /* return error earlier, an retval of 0 or 1 now */
8083
8084 } /* end of getdirentriesattr system call */
8085
8086 /*
8087 * Exchange data between two files
8088 */
8089
8090 /* ARGSUSED */
8091 int
8092 exchangedata (__unused proc_t p, struct exchangedata_args *uap, __unused int32_t *retval)
8093 {
8094
8095 struct nameidata fnd, snd;
8096 vfs_context_t ctx = vfs_context_current();
8097 vnode_t fvp;
8098 vnode_t svp;
8099 int error;
8100 u_int32_t nameiflags;
8101 char *fpath = NULL;
8102 char *spath = NULL;
8103 int flen=0, slen=0;
8104 int from_truncated=0, to_truncated=0;
8105 #if CONFIG_FSE
8106 fse_info f_finfo, s_finfo;
8107 #endif
8108
8109 nameiflags = 0;
8110 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
8111
8112 NDINIT(&fnd, LOOKUP, OP_EXCHANGEDATA, nameiflags | AUDITVNPATH1,
8113 UIO_USERSPACE, uap->path1, ctx);
8114
8115 error = namei(&fnd);
8116 if (error)
8117 goto out2;
8118
8119 nameidone(&fnd);
8120 fvp = fnd.ni_vp;
8121
8122 NDINIT(&snd, LOOKUP, OP_EXCHANGEDATA, CN_NBMOUNTLOOK | nameiflags | AUDITVNPATH2,
8123 UIO_USERSPACE, uap->path2, ctx);
8124
8125 error = namei(&snd);
8126 if (error) {
8127 vnode_put(fvp);
8128 goto out2;
8129 }
8130 nameidone(&snd);
8131 svp = snd.ni_vp;
8132
8133 /*
8134 * if the files are the same, return an inval error
8135 */
8136 if (svp == fvp) {
8137 error = EINVAL;
8138 goto out;
8139 }
8140
8141 /*
8142 * if the files are on different volumes, return an error
8143 */
8144 if (svp->v_mount != fvp->v_mount) {
8145 error = EXDEV;
8146 goto out;
8147 }
8148
8149 /* If they're not files, return an error */
8150 if ( (vnode_isreg(fvp) == 0) || (vnode_isreg(svp) == 0)) {
8151 error = EINVAL;
8152 goto out;
8153 }
8154
8155 #if CONFIG_MACF
8156 error = mac_vnode_check_exchangedata(ctx,
8157 fvp, svp);
8158 if (error)
8159 goto out;
8160 #endif
8161 if (((error = vnode_authorize(fvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0) ||
8162 ((error = vnode_authorize(svp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0))
8163 goto out;
8164
8165 if (
8166 #if CONFIG_FSE
8167 need_fsevent(FSE_EXCHANGE, fvp) ||
8168 #endif
8169 kauth_authorize_fileop_has_listeners()) {
8170 GET_PATH(fpath);
8171 GET_PATH(spath);
8172 if (fpath == NULL || spath == NULL) {
8173 error = ENOMEM;
8174 goto out;
8175 }
8176
8177 flen = safe_getpath(fvp, NULL, fpath, MAXPATHLEN, &from_truncated);
8178 slen = safe_getpath(svp, NULL, spath, MAXPATHLEN, &to_truncated);
8179
8180 #if CONFIG_FSE
8181 get_fse_info(fvp, &f_finfo, ctx);
8182 get_fse_info(svp, &s_finfo, ctx);
8183 if (from_truncated || to_truncated) {
8184 // set it here since only the f_finfo gets reported up to user space
8185 f_finfo.mode |= FSE_TRUNCATED_PATH;
8186 }
8187 #endif
8188 }
8189 /* Ok, make the call */
8190 error = VNOP_EXCHANGE(fvp, svp, 0, ctx);
8191
8192 if (error == 0) {
8193 const char *tmpname;
8194
8195 if (fpath != NULL && spath != NULL) {
8196 /* call out to allow 3rd party notification of exchangedata.
8197 * Ignore result of kauth_authorize_fileop call.
8198 */
8199 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_EXCHANGE,
8200 (uintptr_t)fpath, (uintptr_t)spath);
8201 }
8202 name_cache_lock();
8203
8204 tmpname = fvp->v_name;
8205 fvp->v_name = svp->v_name;
8206 svp->v_name = tmpname;
8207
8208 if (fvp->v_parent != svp->v_parent) {
8209 vnode_t tmp;
8210
8211 tmp = fvp->v_parent;
8212 fvp->v_parent = svp->v_parent;
8213 svp->v_parent = tmp;
8214 }
8215 name_cache_unlock();
8216
8217 #if CONFIG_FSE
8218 if (fpath != NULL && spath != NULL) {
8219 add_fsevent(FSE_EXCHANGE, ctx,
8220 FSE_ARG_STRING, flen, fpath,
8221 FSE_ARG_FINFO, &f_finfo,
8222 FSE_ARG_STRING, slen, spath,
8223 FSE_ARG_FINFO, &s_finfo,
8224 FSE_ARG_DONE);
8225 }
8226 #endif
8227 }
8228
8229 out:
8230 if (fpath != NULL)
8231 RELEASE_PATH(fpath);
8232 if (spath != NULL)
8233 RELEASE_PATH(spath);
8234 vnode_put(svp);
8235 vnode_put(fvp);
8236 out2:
8237 return (error);
8238 }
8239
8240 /*
8241 * Return (in MB) the amount of freespace on the given vnode's volume.
8242 */
8243 uint32_t freespace_mb(vnode_t vp);
8244
8245 uint32_t
8246 freespace_mb(vnode_t vp)
8247 {
8248 vfs_update_vfsstat(vp->v_mount, vfs_context_current(), VFS_USER_EVENT);
8249 return (((uint64_t)vp->v_mount->mnt_vfsstat.f_bavail *
8250 vp->v_mount->mnt_vfsstat.f_bsize) >> 20);
8251 }
8252
8253 #if CONFIG_SEARCHFS
8254
8255 /* ARGSUSED */
8256
8257 int
8258 searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval)
8259 {
8260 vnode_t vp, tvp;
8261 int i, error=0;
8262 int fserror = 0;
8263 struct nameidata nd;
8264 struct user64_fssearchblock searchblock;
8265 struct searchstate *state;
8266 struct attrlist *returnattrs;
8267 struct timeval timelimit;
8268 void *searchparams1,*searchparams2;
8269 uio_t auio = NULL;
8270 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
8271 uint32_t nummatches;
8272 int mallocsize;
8273 uint32_t nameiflags;
8274 vfs_context_t ctx = vfs_context_current();
8275 char uio_buf[ UIO_SIZEOF(1) ];
8276
8277 /* Start by copying in fsearchblock parameter list */
8278 if (IS_64BIT_PROCESS(p)) {
8279 error = copyin(uap->searchblock, (caddr_t) &searchblock, sizeof(searchblock));
8280 timelimit.tv_sec = searchblock.timelimit.tv_sec;
8281 timelimit.tv_usec = searchblock.timelimit.tv_usec;
8282 }
8283 else {
8284 struct user32_fssearchblock tmp_searchblock;
8285
8286 error = copyin(uap->searchblock, (caddr_t) &tmp_searchblock, sizeof(tmp_searchblock));
8287 // munge into 64-bit version
8288 searchblock.returnattrs = CAST_USER_ADDR_T(tmp_searchblock.returnattrs);
8289 searchblock.returnbuffer = CAST_USER_ADDR_T(tmp_searchblock.returnbuffer);
8290 searchblock.returnbuffersize = tmp_searchblock.returnbuffersize;
8291 searchblock.maxmatches = tmp_searchblock.maxmatches;
8292 /*
8293 * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary
8294 * from a 32 bit long, and tv_usec is already a signed 32 bit int.
8295 */
8296 timelimit.tv_sec = (__darwin_time_t) tmp_searchblock.timelimit.tv_sec;
8297 timelimit.tv_usec = (__darwin_useconds_t) tmp_searchblock.timelimit.tv_usec;
8298 searchblock.searchparams1 = CAST_USER_ADDR_T(tmp_searchblock.searchparams1);
8299 searchblock.sizeofsearchparams1 = tmp_searchblock.sizeofsearchparams1;
8300 searchblock.searchparams2 = CAST_USER_ADDR_T(tmp_searchblock.searchparams2);
8301 searchblock.sizeofsearchparams2 = tmp_searchblock.sizeofsearchparams2;
8302 searchblock.searchattrs = tmp_searchblock.searchattrs;
8303 }
8304 if (error)
8305 return(error);
8306
8307 /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
8308 */
8309 if (searchblock.sizeofsearchparams1 > SEARCHFS_MAX_SEARCHPARMS ||
8310 searchblock.sizeofsearchparams2 > SEARCHFS_MAX_SEARCHPARMS)
8311 return(EINVAL);
8312
8313 /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
8314 /* It all has to do into local memory and it's not that big so we might as well put it all together. */
8315 /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
8316 /* block. */
8317 /* */
8318 /* NOTE: we allocate an extra 8 bytes to account for the difference in size of the searchstate */
8319 /* due to the changes in rdar://problem/12438273. That way if a 3rd party file system */
8320 /* assumes the size is still 556 bytes it will continue to work */
8321
8322 mallocsize = searchblock.sizeofsearchparams1 + searchblock.sizeofsearchparams2 +
8323 sizeof(struct attrlist) + sizeof(struct searchstate) + (2*sizeof(uint32_t));
8324
8325 MALLOC(searchparams1, void *, mallocsize, M_TEMP, M_WAITOK);
8326
8327 /* Now set up the various pointers to the correct place in our newly allocated memory */
8328
8329 searchparams2 = (void *) (((caddr_t) searchparams1) + searchblock.sizeofsearchparams1);
8330 returnattrs = (struct attrlist *) (((caddr_t) searchparams2) + searchblock.sizeofsearchparams2);
8331 state = (struct searchstate *) (((caddr_t) returnattrs) + sizeof (struct attrlist));
8332
8333 /* Now copy in the stuff given our local variables. */
8334
8335 if ((error = copyin(searchblock.searchparams1, searchparams1, searchblock.sizeofsearchparams1)))
8336 goto freeandexit;
8337
8338 if ((error = copyin(searchblock.searchparams2, searchparams2, searchblock.sizeofsearchparams2)))
8339 goto freeandexit;
8340
8341 if ((error = copyin(searchblock.returnattrs, (caddr_t) returnattrs, sizeof(struct attrlist))))
8342 goto freeandexit;
8343
8344 if ((error = copyin(uap->state, (caddr_t) state, sizeof(struct searchstate))))
8345 goto freeandexit;
8346
8347 /*
8348 * When searching a union mount, need to set the
8349 * start flag at the first call on each layer to
8350 * reset state for the new volume.
8351 */
8352 if (uap->options & SRCHFS_START)
8353 state->ss_union_layer = 0;
8354 else
8355 uap->options |= state->ss_union_flags;
8356 state->ss_union_flags = 0;
8357
8358 /*
8359 * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter,
8360 * which is passed in with an attrreference_t, we need to inspect the buffer manually here.
8361 * The KPI does not provide us the ability to pass in the length of the buffers searchparams1
8362 * and searchparams2. To obviate the need for all searchfs-supporting filesystems to
8363 * validate the user-supplied data offset of the attrreference_t, we'll do it here.
8364 */
8365
8366 if (searchblock.searchattrs.commonattr & ATTR_CMN_NAME) {
8367 attrreference_t* string_ref;
8368 u_int32_t* start_length;
8369 user64_size_t param_length;
8370
8371 /* validate searchparams1 */
8372 param_length = searchblock.sizeofsearchparams1;
8373 /* skip the word that specifies length of the buffer */
8374 start_length= (u_int32_t*) searchparams1;
8375 start_length= start_length+1;
8376 string_ref= (attrreference_t*) start_length;
8377
8378 /* ensure no negative offsets or too big offsets */
8379 if (string_ref->attr_dataoffset < 0 ) {
8380 error = EINVAL;
8381 goto freeandexit;
8382 }
8383 if (string_ref->attr_length > MAXPATHLEN) {
8384 error = EINVAL;
8385 goto freeandexit;
8386 }
8387
8388 /* Check for pointer overflow in the string ref */
8389 if (((char*) string_ref + string_ref->attr_dataoffset) < (char*) string_ref) {
8390 error = EINVAL;
8391 goto freeandexit;
8392 }
8393
8394 if (((char*) string_ref + string_ref->attr_dataoffset) > ((char*)searchparams1 + param_length)) {
8395 error = EINVAL;
8396 goto freeandexit;
8397 }
8398 if (((char*)string_ref + string_ref->attr_dataoffset + string_ref->attr_length) > ((char*)searchparams1 + param_length)) {
8399 error = EINVAL;
8400 goto freeandexit;
8401 }
8402 }
8403
8404 /* set up the uio structure which will contain the users return buffer */
8405 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
8406 uio_addiov(auio, searchblock.returnbuffer, searchblock.returnbuffersize);
8407
8408 nameiflags = 0;
8409 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
8410 NDINIT(&nd, LOOKUP, OP_SEARCHFS, nameiflags | AUDITVNPATH1,
8411 UIO_USERSPACE, uap->path, ctx);
8412
8413 error = namei(&nd);
8414 if (error)
8415 goto freeandexit;
8416 vp = nd.ni_vp;
8417 nameidone(&nd);
8418
8419 /*
8420 * Switch to the root vnode for the volume
8421 */
8422 error = VFS_ROOT(vnode_mount(vp), &tvp, ctx);
8423 vnode_put(vp);
8424 if (error)
8425 goto freeandexit;
8426 vp = tvp;
8427
8428 /*
8429 * If it's a union mount, the path lookup takes
8430 * us to the top layer. But we may need to descend
8431 * to a lower layer. For non-union mounts the layer
8432 * is always zero.
8433 */
8434 for (i = 0; i < (int) state->ss_union_layer; i++) {
8435 if ((vp->v_mount->mnt_flag & MNT_UNION) == 0)
8436 break;
8437 tvp = vp;
8438 vp = vp->v_mount->mnt_vnodecovered;
8439 if (vp == NULL) {
8440 vnode_put(tvp);
8441 error = ENOENT;
8442 goto freeandexit;
8443 }
8444 vnode_getwithref(vp);
8445 vnode_put(tvp);
8446 }
8447
8448 #if CONFIG_MACF
8449 error = mac_vnode_check_searchfs(ctx, vp, &searchblock.searchattrs);
8450 if (error) {
8451 vnode_put(vp);
8452 goto freeandexit;
8453 }
8454 #endif
8455
8456
8457 /*
8458 * If searchblock.maxmatches == 0, then skip the search. This has happened
8459 * before and sometimes the underlying code doesnt deal with it well.
8460 */
8461 if (searchblock.maxmatches == 0) {
8462 nummatches = 0;
8463 goto saveandexit;
8464 }
8465
8466 /*
8467 * Allright, we have everything we need, so lets make that call.
8468 *
8469 * We keep special track of the return value from the file system:
8470 * EAGAIN is an acceptable error condition that shouldn't keep us
8471 * from copying out any results...
8472 */
8473
8474 fserror = VNOP_SEARCHFS(vp,
8475 searchparams1,
8476 searchparams2,
8477 &searchblock.searchattrs,
8478 (u_long)searchblock.maxmatches,
8479 &timelimit,
8480 returnattrs,
8481 &nummatches,
8482 (u_long)uap->scriptcode,
8483 (u_long)uap->options,
8484 auio,
8485 (struct searchstate *) &state->ss_fsstate,
8486 ctx);
8487
8488 /*
8489 * If it's a union mount we need to be called again
8490 * to search the mounted-on filesystem.
8491 */
8492 if ((vp->v_mount->mnt_flag & MNT_UNION) && fserror == 0) {
8493 state->ss_union_flags = SRCHFS_START;
8494 state->ss_union_layer++; // search next layer down
8495 fserror = EAGAIN;
8496 }
8497
8498 saveandexit:
8499
8500 vnode_put(vp);
8501
8502 /* Now copy out the stuff that needs copying out. That means the number of matches, the
8503 search state. Everything was already put into he return buffer by the vop call. */
8504
8505 if ((error = copyout((caddr_t) state, uap->state, sizeof(struct searchstate))) != 0)
8506 goto freeandexit;
8507
8508 if ((error = suulong(uap->nummatches, (uint64_t)nummatches)) != 0)
8509 goto freeandexit;
8510
8511 error = fserror;
8512
8513 freeandexit:
8514
8515 FREE(searchparams1,M_TEMP);
8516
8517 return(error);
8518
8519
8520 } /* end of searchfs system call */
8521
8522 #else /* CONFIG_SEARCHFS */
8523
8524 int
8525 searchfs(__unused proc_t p, __unused struct searchfs_args *uap, __unused int32_t *retval)
8526 {
8527 return (ENOTSUP);
8528 }
8529
8530 #endif /* CONFIG_SEARCHFS */
8531
8532
8533 lck_grp_attr_t * nspace_group_attr;
8534 lck_attr_t * nspace_lock_attr;
8535 lck_grp_t * nspace_mutex_group;
8536
8537 lck_mtx_t nspace_handler_lock;
8538 lck_mtx_t nspace_handler_exclusion_lock;
8539
8540 time_t snapshot_timestamp=0;
8541 int nspace_allow_virtual_devs=0;
8542
8543 void nspace_handler_init(void);
8544
8545 typedef struct nspace_item_info {
8546 struct vnode *vp;
8547 void *arg;
8548 uint64_t op;
8549 uint32_t vid;
8550 uint32_t flags;
8551 uint32_t token;
8552 uint32_t refcount;
8553 } nspace_item_info;
8554
8555 #define MAX_NSPACE_ITEMS 128
8556 nspace_item_info nspace_items[MAX_NSPACE_ITEMS];
8557 uint32_t nspace_item_idx=0; // also used as the sleep/wakeup rendezvous address
8558 uint32_t nspace_token_id=0;
8559 uint32_t nspace_handler_timeout = 15; // seconds
8560
8561 #define NSPACE_ITEM_NEW 0x0001
8562 #define NSPACE_ITEM_PROCESSING 0x0002
8563 #define NSPACE_ITEM_DEAD 0x0004
8564 #define NSPACE_ITEM_CANCELLED 0x0008
8565 #define NSPACE_ITEM_DONE 0x0010
8566 #define NSPACE_ITEM_RESET_TIMER 0x0020
8567
8568 #define NSPACE_ITEM_NSPACE_EVENT 0x0040
8569 #define NSPACE_ITEM_SNAPSHOT_EVENT 0x0080
8570
8571 #define NSPACE_ITEM_ALL_EVENT_TYPES (NSPACE_ITEM_NSPACE_EVENT | NSPACE_ITEM_SNAPSHOT_EVENT)
8572
8573 //#pragma optimization_level 0
8574
8575 typedef enum {
8576 NSPACE_HANDLER_NSPACE = 0,
8577 NSPACE_HANDLER_SNAPSHOT = 1,
8578
8579 NSPACE_HANDLER_COUNT,
8580 } nspace_type_t;
8581
8582 typedef struct {
8583 uint64_t handler_tid;
8584 struct proc *handler_proc;
8585 int handler_busy;
8586 } nspace_handler_t;
8587
8588 nspace_handler_t nspace_handlers[NSPACE_HANDLER_COUNT];
8589
8590 /* namespace fsctl functions */
8591 static int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type);
8592 static int nspace_item_flags_for_type(nspace_type_t nspace_type);
8593 static int nspace_open_flags_for_type(nspace_type_t nspace_type);
8594 static nspace_type_t nspace_type_for_op(uint64_t op);
8595 static int nspace_is_special_process(struct proc *proc);
8596 static int vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx);
8597 static int wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type);
8598 static int validate_namespace_args (int is64bit, int size);
8599 static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data);
8600
8601
8602 static inline int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type)
8603 {
8604 switch(nspace_type) {
8605 case NSPACE_HANDLER_NSPACE:
8606 return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_NSPACE_EVENT;
8607 case NSPACE_HANDLER_SNAPSHOT:
8608 return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_SNAPSHOT_EVENT;
8609 default:
8610 printf("nspace_flags_matches_handler: invalid type %u\n", (int)nspace_type);
8611 return 0;
8612 }
8613 }
8614
8615 static inline int nspace_item_flags_for_type(nspace_type_t nspace_type)
8616 {
8617 switch(nspace_type) {
8618 case NSPACE_HANDLER_NSPACE:
8619 return NSPACE_ITEM_NSPACE_EVENT;
8620 case NSPACE_HANDLER_SNAPSHOT:
8621 return NSPACE_ITEM_SNAPSHOT_EVENT;
8622 default:
8623 printf("nspace_item_flags_for_type: invalid type %u\n", (int)nspace_type);
8624 return 0;
8625 }
8626 }
8627
8628 static inline int nspace_open_flags_for_type(nspace_type_t nspace_type)
8629 {
8630 switch(nspace_type) {
8631 case NSPACE_HANDLER_NSPACE:
8632 return FREAD | FWRITE | O_EVTONLY;
8633 case NSPACE_HANDLER_SNAPSHOT:
8634 return FREAD | O_EVTONLY;
8635 default:
8636 printf("nspace_open_flags_for_type: invalid type %u\n", (int)nspace_type);
8637 return 0;
8638 }
8639 }
8640
8641 static inline nspace_type_t nspace_type_for_op(uint64_t op)
8642 {
8643 switch(op & NAMESPACE_HANDLER_EVENT_TYPE_MASK) {
8644 case NAMESPACE_HANDLER_NSPACE_EVENT:
8645 return NSPACE_HANDLER_NSPACE;
8646 case NAMESPACE_HANDLER_SNAPSHOT_EVENT:
8647 return NSPACE_HANDLER_SNAPSHOT;
8648 default:
8649 printf("nspace_type_for_op: invalid op mask %llx\n", op & NAMESPACE_HANDLER_EVENT_TYPE_MASK);
8650 return NSPACE_HANDLER_NSPACE;
8651 }
8652 }
8653
8654 static inline int nspace_is_special_process(struct proc *proc)
8655 {
8656 int i;
8657 for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
8658 if (proc == nspace_handlers[i].handler_proc)
8659 return 1;
8660 }
8661 return 0;
8662 }
8663
8664 void
8665 nspace_handler_init(void)
8666 {
8667 nspace_lock_attr = lck_attr_alloc_init();
8668 nspace_group_attr = lck_grp_attr_alloc_init();
8669 nspace_mutex_group = lck_grp_alloc_init("nspace-mutex", nspace_group_attr);
8670 lck_mtx_init(&nspace_handler_lock, nspace_mutex_group, nspace_lock_attr);
8671 lck_mtx_init(&nspace_handler_exclusion_lock, nspace_mutex_group, nspace_lock_attr);
8672 memset(&nspace_items[0], 0, sizeof(nspace_items));
8673 }
8674
8675 void
8676 nspace_proc_exit(struct proc *p)
8677 {
8678 int i, event_mask = 0;
8679
8680 for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
8681 if (p == nspace_handlers[i].handler_proc) {
8682 event_mask |= nspace_item_flags_for_type(i);
8683 nspace_handlers[i].handler_tid = 0;
8684 nspace_handlers[i].handler_proc = NULL;
8685 }
8686 }
8687
8688 if (event_mask == 0) {
8689 return;
8690 }
8691
8692 if (event_mask & NSPACE_ITEM_SNAPSHOT_EVENT) {
8693 // if this process was the snapshot handler, zero snapshot_timeout
8694 snapshot_timestamp = 0;
8695 }
8696
8697 //
8698 // unblock anyone that's waiting for the handler that died
8699 //
8700 lck_mtx_lock(&nspace_handler_lock);
8701 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8702 if (nspace_items[i].flags & (NSPACE_ITEM_NEW | NSPACE_ITEM_PROCESSING)) {
8703
8704 if ( nspace_items[i].flags & event_mask ) {
8705
8706 if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
8707 vnode_lock_spin(nspace_items[i].vp);
8708 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
8709 vnode_unlock(nspace_items[i].vp);
8710 }
8711 nspace_items[i].vp = NULL;
8712 nspace_items[i].vid = 0;
8713 nspace_items[i].flags = NSPACE_ITEM_DONE;
8714 nspace_items[i].token = 0;
8715
8716 wakeup((caddr_t)&(nspace_items[i].vp));
8717 }
8718 }
8719 }
8720
8721 wakeup((caddr_t)&nspace_item_idx);
8722 lck_mtx_unlock(&nspace_handler_lock);
8723 }
8724
8725
8726 int
8727 resolve_nspace_item(struct vnode *vp, uint64_t op)
8728 {
8729 return resolve_nspace_item_ext(vp, op, NULL);
8730 }
8731
8732 int
8733 resolve_nspace_item_ext(struct vnode *vp, uint64_t op, void *arg)
8734 {
8735 int i, error, keep_waiting;
8736 struct timespec ts;
8737 nspace_type_t nspace_type = nspace_type_for_op(op);
8738
8739 // only allow namespace events on regular files, directories and symlinks.
8740 if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
8741 return 0;
8742 }
8743
8744 //
8745 // if this is a snapshot event and the vnode is on a
8746 // disk image just pretend nothing happened since any
8747 // change to the disk image will cause the disk image
8748 // itself to get backed up and this avoids multi-way
8749 // deadlocks between the snapshot handler and the ever
8750 // popular diskimages-helper process. the variable
8751 // nspace_allow_virtual_devs allows this behavior to
8752 // be overridden (for use by the Mobile TimeMachine
8753 // testing infrastructure which uses disk images)
8754 //
8755 if ( (op & NAMESPACE_HANDLER_SNAPSHOT_EVENT)
8756 && (vp->v_mount != NULL)
8757 && (vp->v_mount->mnt_kern_flag & MNTK_VIRTUALDEV)
8758 && !nspace_allow_virtual_devs) {
8759
8760 return 0;
8761 }
8762
8763 // if (thread_tid(current_thread()) == namespace_handler_tid) {
8764 if (nspace_handlers[nspace_type].handler_proc == NULL) {
8765 return 0;
8766 }
8767
8768 if (nspace_is_special_process(current_proc())) {
8769 return EDEADLK;
8770 }
8771
8772 lck_mtx_lock(&nspace_handler_lock);
8773
8774 retry:
8775 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8776 if (vp == nspace_items[i].vp && op == nspace_items[i].op) {
8777 break;
8778 }
8779 }
8780
8781 if (i >= MAX_NSPACE_ITEMS) {
8782 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8783 if (nspace_items[i].flags == 0) {
8784 break;
8785 }
8786 }
8787 } else {
8788 nspace_items[i].refcount++;
8789 }
8790
8791 if (i >= MAX_NSPACE_ITEMS) {
8792 ts.tv_sec = nspace_handler_timeout;
8793 ts.tv_nsec = 0;
8794
8795 error = msleep((caddr_t)&nspace_token_id, &nspace_handler_lock, PVFS|PCATCH, "nspace-no-space", &ts);
8796 if (error == 0) {
8797 // an entry got free'd up, go see if we can get a slot
8798 goto retry;
8799 } else {
8800 lck_mtx_unlock(&nspace_handler_lock);
8801 return error;
8802 }
8803 }
8804
8805 //
8806 // if it didn't already exist, add it. if it did exist
8807 // we'll get woken up when someone does a wakeup() on
8808 // the slot in the nspace_items table.
8809 //
8810 if (vp != nspace_items[i].vp) {
8811 nspace_items[i].vp = vp;
8812 nspace_items[i].arg = (arg == NSPACE_REARM_NO_ARG) ? NULL : arg; // arg is {NULL, true, uio *} - only pass uio thru to the user
8813 nspace_items[i].op = op;
8814 nspace_items[i].vid = vnode_vid(vp);
8815 nspace_items[i].flags = NSPACE_ITEM_NEW;
8816 nspace_items[i].flags |= nspace_item_flags_for_type(nspace_type);
8817 if (nspace_items[i].flags & NSPACE_ITEM_SNAPSHOT_EVENT) {
8818 if (arg) {
8819 vnode_lock_spin(vp);
8820 vp->v_flag |= VNEEDSSNAPSHOT;
8821 vnode_unlock(vp);
8822 }
8823 }
8824
8825 nspace_items[i].token = 0;
8826 nspace_items[i].refcount = 1;
8827
8828 wakeup((caddr_t)&nspace_item_idx);
8829 }
8830
8831 //
8832 // Now go to sleep until the handler does a wakeup on this
8833 // slot in the nspace_items table (or we timeout).
8834 //
8835 keep_waiting = 1;
8836 while(keep_waiting) {
8837 ts.tv_sec = nspace_handler_timeout;
8838 ts.tv_nsec = 0;
8839 error = msleep((caddr_t)&(nspace_items[i].vp), &nspace_handler_lock, PVFS|PCATCH, "namespace-done", &ts);
8840
8841 if (nspace_items[i].flags & NSPACE_ITEM_DONE) {
8842 error = 0;
8843 } else if (nspace_items[i].flags & NSPACE_ITEM_CANCELLED) {
8844 error = nspace_items[i].token;
8845 } else if (error == EWOULDBLOCK || error == ETIMEDOUT) {
8846 if (nspace_items[i].flags & NSPACE_ITEM_RESET_TIMER) {
8847 nspace_items[i].flags &= ~NSPACE_ITEM_RESET_TIMER;
8848 continue;
8849 } else {
8850 error = ETIMEDOUT;
8851 }
8852 } else if (error == 0) {
8853 // hmmm, why did we get woken up?
8854 printf("woken up for token %d but it's not done, cancelled or timedout and error == 0.\n",
8855 nspace_items[i].token);
8856 }
8857
8858 if (--nspace_items[i].refcount == 0) {
8859 nspace_items[i].vp = NULL; // clear this so that no one will match on it again
8860 nspace_items[i].arg = NULL;
8861 nspace_items[i].token = 0; // clear this so that the handler will not find it anymore
8862 nspace_items[i].flags = 0; // this clears it for re-use
8863 }
8864 wakeup(&nspace_token_id);
8865 keep_waiting = 0;
8866 }
8867
8868 lck_mtx_unlock(&nspace_handler_lock);
8869
8870 return error;
8871 }
8872
8873
8874 int
8875 get_nspace_item_status(struct vnode *vp, int32_t *status)
8876 {
8877 int i;
8878
8879 lck_mtx_lock(&nspace_handler_lock);
8880 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8881 if (nspace_items[i].vp == vp) {
8882 break;
8883 }
8884 }
8885
8886 if (i >= MAX_NSPACE_ITEMS) {
8887 lck_mtx_unlock(&nspace_handler_lock);
8888 return ENOENT;
8889 }
8890
8891 *status = nspace_items[i].flags;
8892 lck_mtx_unlock(&nspace_handler_lock);
8893 return 0;
8894 }
8895
8896
8897 #if 0
8898 static int
8899 build_volfs_path(struct vnode *vp, char *path, int *len)
8900 {
8901 struct vnode_attr va;
8902 int ret;
8903
8904 VATTR_INIT(&va);
8905 VATTR_WANTED(&va, va_fsid);
8906 VATTR_WANTED(&va, va_fileid);
8907
8908 if (vnode_getattr(vp, &va, vfs_context_kernel()) != 0) {
8909 *len = snprintf(path, *len, "/non/existent/path/because/vnode_getattr/failed") + 1;
8910 ret = -1;
8911 } else {
8912 *len = snprintf(path, *len, "/.vol/%d/%lld", (dev_t)va.va_fsid, va.va_fileid) + 1;
8913 ret = 0;
8914 }
8915
8916 return ret;
8917 }
8918 #endif
8919
8920 //
8921 // Note: this function does NOT check permissions on all of the
8922 // parent directories leading to this vnode. It should only be
8923 // called on behalf of a root process. Otherwise a process may
8924 // get access to a file because the file itself is readable even
8925 // though its parent directories would prevent access.
8926 //
8927 static int
8928 vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx)
8929 {
8930 int error, action;
8931
8932 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
8933 return error;
8934 }
8935
8936 #if CONFIG_MACF
8937 error = mac_vnode_check_open(ctx, vp, fmode);
8938 if (error)
8939 return error;
8940 #endif
8941
8942 /* compute action to be authorized */
8943 action = 0;
8944 if (fmode & FREAD) {
8945 action |= KAUTH_VNODE_READ_DATA;
8946 }
8947 if (fmode & (FWRITE | O_TRUNC)) {
8948 /*
8949 * If we are writing, appending, and not truncating,
8950 * indicate that we are appending so that if the
8951 * UF_APPEND or SF_APPEND bits are set, we do not deny
8952 * the open.
8953 */
8954 if ((fmode & O_APPEND) && !(fmode & O_TRUNC)) {
8955 action |= KAUTH_VNODE_APPEND_DATA;
8956 } else {
8957 action |= KAUTH_VNODE_WRITE_DATA;
8958 }
8959 }
8960
8961 if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)
8962 return error;
8963
8964
8965 //
8966 // if the vnode is tagged VOPENEVT and the current process
8967 // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY
8968 // flag to the open mode so that this open won't count against
8969 // the vnode when carbon delete() does a vnode_isinuse() to see
8970 // if a file is currently in use. this allows spotlight
8971 // importers to not interfere with carbon apps that depend on
8972 // the no-delete-if-busy semantics of carbon delete().
8973 //
8974 if ((vp->v_flag & VOPENEVT) && (current_proc()->p_flag & P_CHECKOPENEVT)) {
8975 fmode |= O_EVTONLY;
8976 }
8977
8978 if ( (error = VNOP_OPEN(vp, fmode, ctx)) ) {
8979 return error;
8980 }
8981 if ( (error = vnode_ref_ext(vp, fmode, 0)) ) {
8982 VNOP_CLOSE(vp, fmode, ctx);
8983 return error;
8984 }
8985
8986 /* Call out to allow 3rd party notification of open.
8987 * Ignore result of kauth_authorize_fileop call.
8988 */
8989 #if CONFIG_MACF
8990 mac_vnode_notify_open(ctx, vp, fmode);
8991 #endif
8992 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN,
8993 (uintptr_t)vp, 0);
8994
8995
8996 return 0;
8997 }
8998
8999 static int
9000 wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type)
9001 {
9002 int i, error=0, unblock=0;
9003 task_t curtask;
9004
9005 lck_mtx_lock(&nspace_handler_exclusion_lock);
9006 if (nspace_handlers[nspace_type].handler_busy) {
9007 lck_mtx_unlock(&nspace_handler_exclusion_lock);
9008 return EBUSY;
9009 }
9010 nspace_handlers[nspace_type].handler_busy = 1;
9011 lck_mtx_unlock(&nspace_handler_exclusion_lock);
9012
9013 /*
9014 * Any process that gets here will be one of the namespace handlers.
9015 * As such, they should be prevented from acquiring DMG vnodes during vnode reclamation
9016 * as we can cause deadlocks to occur, because the namespace handler may prevent
9017 * VNOP_INACTIVE from proceeding. Mark the current task as a P_DEPENDENCY_CAPABLE
9018 * process.
9019 */
9020 curtask = current_task();
9021 bsd_set_dependency_capable (curtask);
9022
9023 lck_mtx_lock(&nspace_handler_lock);
9024 if (nspace_handlers[nspace_type].handler_proc == NULL) {
9025 nspace_handlers[nspace_type].handler_tid = thread_tid(current_thread());
9026 nspace_handlers[nspace_type].handler_proc = current_proc();
9027 }
9028
9029 while (error == 0) {
9030
9031 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9032 if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
9033 if (!nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
9034 continue;
9035 }
9036 break;
9037 }
9038 }
9039
9040 if (i < MAX_NSPACE_ITEMS) {
9041 nspace_items[i].flags &= ~NSPACE_ITEM_NEW;
9042 nspace_items[i].flags |= NSPACE_ITEM_PROCESSING;
9043 nspace_items[i].token = ++nspace_token_id;
9044
9045 if (nspace_items[i].vp) {
9046 struct fileproc *fp;
9047 int32_t indx, fmode;
9048 struct proc *p = current_proc();
9049 vfs_context_t ctx = vfs_context_current();
9050 struct vnode_attr va;
9051
9052
9053 /*
9054 * Use vnode pointer to acquire a file descriptor for
9055 * hand-off to userland
9056 */
9057 fmode = nspace_open_flags_for_type(nspace_type);
9058 error = vnode_getwithvid(nspace_items[i].vp, nspace_items[i].vid);
9059 if (error) {
9060 unblock = 1;
9061 break;
9062 }
9063 error = vn_open_with_vp(nspace_items[i].vp, fmode, ctx);
9064 if (error) {
9065 unblock = 1;
9066 vnode_put(nspace_items[i].vp);
9067 break;
9068 }
9069
9070 if ((error = falloc(p, &fp, &indx, ctx))) {
9071 vn_close(nspace_items[i].vp, fmode, ctx);
9072 vnode_put(nspace_items[i].vp);
9073 unblock = 1;
9074 break;
9075 }
9076
9077 fp->f_fglob->fg_flag = fmode;
9078 fp->f_fglob->fg_ops = &vnops;
9079 fp->f_fglob->fg_data = (caddr_t)nspace_items[i].vp;
9080
9081 proc_fdlock(p);
9082 procfdtbl_releasefd(p, indx, NULL);
9083 fp_drop(p, indx, fp, 1);
9084 proc_fdunlock(p);
9085
9086 /*
9087 * All variants of the namespace handler struct support these three fields:
9088 * token, flags, and the FD pointer
9089 */
9090 error = copyout(&nspace_items[i].token, nhd->token, sizeof(uint32_t));
9091 error = copyout(&nspace_items[i].op, nhd->flags, sizeof(uint64_t));
9092 error = copyout(&indx, nhd->fdptr, sizeof(uint32_t));
9093
9094 /*
9095 * Handle optional fields:
9096 * extended version support an info ptr (offset, length), and the
9097 *
9098 * namedata version supports a unique per-link object ID
9099 *
9100 */
9101 if (nhd->infoptr) {
9102 uio_t uio = (uio_t)nspace_items[i].arg;
9103 uint64_t u_offset, u_length;
9104
9105 if (uio) {
9106 u_offset = uio_offset(uio);
9107 u_length = uio_resid(uio);
9108 } else {
9109 u_offset = 0;
9110 u_length = 0;
9111 }
9112 error = copyout(&u_offset, nhd->infoptr, sizeof(uint64_t));
9113 error = copyout(&u_length, nhd->infoptr+sizeof(uint64_t), sizeof(uint64_t));
9114 }
9115
9116 if (nhd->objid) {
9117 VATTR_INIT(&va);
9118 VATTR_WANTED(&va, va_linkid);
9119 error = vnode_getattr(nspace_items[i].vp, &va, ctx);
9120 if (error == 0 ) {
9121 uint64_t linkid = 0;
9122 if (VATTR_IS_SUPPORTED (&va, va_linkid)) {
9123 linkid = (uint64_t)va.va_linkid;
9124 }
9125 error = copyout (&linkid, nhd->objid, sizeof(uint64_t));
9126 }
9127 }
9128
9129 if (error) {
9130 vn_close(nspace_items[i].vp, fmode, ctx);
9131 fp_free(p, indx, fp);
9132 unblock = 1;
9133 }
9134
9135 vnode_put(nspace_items[i].vp);
9136
9137 break;
9138 } else {
9139 printf("wait_for_nspace_event: failed (nspace_items[%d] == %p error %d, name %s)\n",
9140 i, nspace_items[i].vp, error, nspace_items[i].vp->v_name);
9141 }
9142
9143 } else {
9144 error = msleep((caddr_t)&nspace_item_idx, &nspace_handler_lock, PVFS|PCATCH, "namespace-items", 0);
9145 if ((nspace_type == NSPACE_HANDLER_SNAPSHOT) && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
9146 error = EINVAL;
9147 break;
9148 }
9149
9150 }
9151 }
9152
9153 if (unblock) {
9154 if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
9155 vnode_lock_spin(nspace_items[i].vp);
9156 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
9157 vnode_unlock(nspace_items[i].vp);
9158 }
9159 nspace_items[i].vp = NULL;
9160 nspace_items[i].vid = 0;
9161 nspace_items[i].flags = NSPACE_ITEM_DONE;
9162 nspace_items[i].token = 0;
9163
9164 wakeup((caddr_t)&(nspace_items[i].vp));
9165 }
9166
9167 if (nspace_type == NSPACE_HANDLER_SNAPSHOT) {
9168 // just go through every snapshot event and unblock it immediately.
9169 if (error && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
9170 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9171 if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
9172 if (nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
9173 nspace_items[i].vp = NULL;
9174 nspace_items[i].vid = 0;
9175 nspace_items[i].flags = NSPACE_ITEM_DONE;
9176 nspace_items[i].token = 0;
9177
9178 wakeup((caddr_t)&(nspace_items[i].vp));
9179 }
9180 }
9181 }
9182 }
9183 }
9184
9185 lck_mtx_unlock(&nspace_handler_lock);
9186
9187 lck_mtx_lock(&nspace_handler_exclusion_lock);
9188 nspace_handlers[nspace_type].handler_busy = 0;
9189 lck_mtx_unlock(&nspace_handler_exclusion_lock);
9190
9191 return error;
9192 }
9193
9194 static inline int validate_namespace_args (int is64bit, int size) {
9195
9196 if (is64bit) {
9197 /* Must be one of these */
9198 if (size == sizeof(user64_namespace_handler_info)) {
9199 goto sizeok;
9200 }
9201 if (size == sizeof(user64_namespace_handler_info_ext)) {
9202 goto sizeok;
9203 }
9204 if (size == sizeof(user64_namespace_handler_data)) {
9205 goto sizeok;
9206 }
9207 return EINVAL;
9208 }
9209 else {
9210 /* 32 bit -- must be one of these */
9211 if (size == sizeof(user32_namespace_handler_info)) {
9212 goto sizeok;
9213 }
9214 if (size == sizeof(user32_namespace_handler_info_ext)) {
9215 goto sizeok;
9216 }
9217 if (size == sizeof(user32_namespace_handler_data)) {
9218 goto sizeok;
9219 }
9220 return EINVAL;
9221 }
9222
9223 sizeok:
9224
9225 return 0;
9226
9227 }
9228
9229 static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data)
9230 {
9231 int error = 0;
9232 namespace_handler_data nhd;
9233
9234 bzero (&nhd, sizeof(namespace_handler_data));
9235
9236 if (nspace_type == NSPACE_HANDLER_SNAPSHOT &&
9237 (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
9238 return EINVAL;
9239 }
9240
9241 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
9242 return error;
9243 }
9244
9245 error = validate_namespace_args (is64bit, size);
9246 if (error) {
9247 return error;
9248 }
9249
9250 /* Copy in the userland pointers into our kernel-only struct */
9251
9252 if (is64bit) {
9253 /* 64 bit userland structures */
9254 nhd.token = (user_addr_t)((user64_namespace_handler_info *)data)->token;
9255 nhd.flags = (user_addr_t)((user64_namespace_handler_info *)data)->flags;
9256 nhd.fdptr = (user_addr_t)((user64_namespace_handler_info *)data)->fdptr;
9257
9258 /* If the size is greater than the standard info struct, add in extra fields */
9259 if (size > (sizeof(user64_namespace_handler_info))) {
9260 if (size >= (sizeof(user64_namespace_handler_info_ext))) {
9261 nhd.infoptr = (user_addr_t)((user64_namespace_handler_info_ext *)data)->infoptr;
9262 }
9263 if (size == (sizeof(user64_namespace_handler_data))) {
9264 nhd.objid = (user_addr_t)((user64_namespace_handler_data*)data)->objid;
9265 }
9266 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
9267 }
9268 }
9269 else {
9270 /* 32 bit userland structures */
9271 nhd.token = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->token);
9272 nhd.flags = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->flags);
9273 nhd.fdptr = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->fdptr);
9274
9275 if (size > (sizeof(user32_namespace_handler_info))) {
9276 if (size >= (sizeof(user32_namespace_handler_info_ext))) {
9277 nhd.infoptr = CAST_USER_ADDR_T(((user32_namespace_handler_info_ext *)data)->infoptr);
9278 }
9279 if (size == (sizeof(user32_namespace_handler_data))) {
9280 nhd.objid = (user_addr_t)((user32_namespace_handler_data*)data)->objid;
9281 }
9282 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
9283 }
9284 }
9285
9286 return wait_for_namespace_event(&nhd, nspace_type);
9287 }
9288
9289 /*
9290 * Make a filesystem-specific control call:
9291 */
9292 /* ARGSUSED */
9293 static int
9294 fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long options, vfs_context_t ctx)
9295 {
9296 int error=0;
9297 boolean_t is64bit;
9298 u_int size;
9299 #define STK_PARAMS 128
9300 char stkbuf[STK_PARAMS];
9301 caddr_t data, memp;
9302 vnode_t vp = *arg_vp;
9303
9304 size = IOCPARM_LEN(cmd);
9305 if (size > IOCPARM_MAX) return (EINVAL);
9306
9307 is64bit = proc_is64bit(p);
9308
9309 memp = NULL;
9310
9311 /*
9312 * ensure the buffer is large enough for underlying calls
9313 */
9314 #ifndef HFSIOC_GETPATH
9315 typedef char pn_t[MAXPATHLEN];
9316 #define HFSIOC_GETPATH _IOWR('h', 13, pn_t)
9317 #endif
9318
9319 #ifndef HFS_GETPATH
9320 #define HFS_GETPATH IOCBASECMD(HFSIOC_GETPATH)
9321 #endif
9322 if (IOCBASECMD(cmd) == HFS_GETPATH) {
9323 /* Round up to MAXPATHLEN regardless of user input */
9324 size = MAXPATHLEN;
9325 }
9326
9327
9328 if (size > sizeof (stkbuf)) {
9329 if ((memp = (caddr_t)kalloc(size)) == 0) return ENOMEM;
9330 data = memp;
9331 } else {
9332 data = &stkbuf[0];
9333 };
9334
9335 if (cmd & IOC_IN) {
9336 if (size) {
9337 error = copyin(udata, data, size);
9338 if (error) {
9339 if (memp) {
9340 kfree (memp, size);
9341 }
9342 return error;
9343 }
9344 } else {
9345 if (is64bit) {
9346 *(user_addr_t *)data = udata;
9347 }
9348 else {
9349 *(uint32_t *)data = (uint32_t)udata;
9350 }
9351 };
9352 } else if ((cmd & IOC_OUT) && size) {
9353 /*
9354 * Zero the buffer so the user always
9355 * gets back something deterministic.
9356 */
9357 bzero(data, size);
9358 } else if (cmd & IOC_VOID) {
9359 if (is64bit) {
9360 *(user_addr_t *)data = udata;
9361 }
9362 else {
9363 *(uint32_t *)data = (uint32_t)udata;
9364 }
9365 }
9366
9367 /* Check to see if it's a generic command */
9368 switch (IOCBASECMD(cmd)) {
9369
9370 case FSCTL_SYNC_VOLUME: {
9371 mount_t mp = vp->v_mount;
9372 int arg = *(uint32_t*)data;
9373
9374 /* record vid of vp so we can drop it below. */
9375 uint32_t vvid = vp->v_id;
9376
9377 /*
9378 * Then grab mount_iterref so that we can release the vnode.
9379 * Without this, a thread may call vnode_iterate_prepare then
9380 * get into a deadlock because we've never released the root vp
9381 */
9382 error = mount_iterref (mp, 0);
9383 if (error) {
9384 break;
9385 }
9386 vnode_put(vp);
9387
9388 /* issue the sync for this volume */
9389 (void)sync_callback(mp, (arg & FSCTL_SYNC_WAIT) ? &arg : NULL);
9390
9391 /*
9392 * Then release the mount_iterref once we're done syncing; it's not
9393 * needed for the VNOP_IOCTL below
9394 */
9395 mount_iterdrop(mp);
9396
9397 if (arg & FSCTL_SYNC_FULLSYNC) {
9398 /* re-obtain vnode iocount on the root vp, if possible */
9399 error = vnode_getwithvid (vp, vvid);
9400 if (error == 0) {
9401 error = VNOP_IOCTL(vp, F_FULLFSYNC, (caddr_t)NULL, 0, ctx);
9402 vnode_put (vp);
9403 }
9404 }
9405 /* mark the argument VP as having been released */
9406 *arg_vp = NULL;
9407 }
9408 break;
9409
9410 case FSCTL_SET_PACKAGE_EXTS: {
9411 user_addr_t ext_strings;
9412 uint32_t num_entries;
9413 uint32_t max_width;
9414
9415 if ( (is64bit && size != sizeof(user64_package_ext_info))
9416 || (is64bit == 0 && size != sizeof(user32_package_ext_info))) {
9417
9418 // either you're 64-bit and passed a 64-bit struct or
9419 // you're 32-bit and passed a 32-bit struct. otherwise
9420 // it's not ok.
9421 error = EINVAL;
9422 break;
9423 }
9424
9425 if (is64bit) {
9426 ext_strings = ((user64_package_ext_info *)data)->strings;
9427 num_entries = ((user64_package_ext_info *)data)->num_entries;
9428 max_width = ((user64_package_ext_info *)data)->max_width;
9429 } else {
9430 ext_strings = CAST_USER_ADDR_T(((user32_package_ext_info *)data)->strings);
9431 num_entries = ((user32_package_ext_info *)data)->num_entries;
9432 max_width = ((user32_package_ext_info *)data)->max_width;
9433 }
9434 error = set_package_extensions_table(ext_strings, num_entries, max_width);
9435 }
9436 break;
9437
9438 /* namespace handlers */
9439 case FSCTL_NAMESPACE_HANDLER_GET: {
9440 error = process_namespace_fsctl(NSPACE_HANDLER_NSPACE, is64bit, size, data);
9441 }
9442 break;
9443
9444 /* Snapshot handlers */
9445 case FSCTL_OLD_SNAPSHOT_HANDLER_GET: {
9446 error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
9447 }
9448 break;
9449
9450 case FSCTL_SNAPSHOT_HANDLER_GET_EXT: {
9451 error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
9452 }
9453 break;
9454
9455 case FSCTL_NAMESPACE_HANDLER_UPDATE: {
9456 uint32_t token, val;
9457 int i;
9458
9459 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
9460 break;
9461 }
9462
9463 if (!nspace_is_special_process(p)) {
9464 error = EINVAL;
9465 break;
9466 }
9467
9468 token = ((uint32_t *)data)[0];
9469 val = ((uint32_t *)data)[1];
9470
9471 lck_mtx_lock(&nspace_handler_lock);
9472
9473 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9474 if (nspace_items[i].token == token) {
9475 break; /* exit for loop, not case stmt */
9476 }
9477 }
9478
9479 if (i >= MAX_NSPACE_ITEMS) {
9480 error = ENOENT;
9481 } else {
9482 //
9483 // if this bit is set, when resolve_nspace_item() times out
9484 // it will loop and go back to sleep.
9485 //
9486 nspace_items[i].flags |= NSPACE_ITEM_RESET_TIMER;
9487 }
9488
9489 lck_mtx_unlock(&nspace_handler_lock);
9490
9491 if (error) {
9492 printf("nspace-handler-update: did not find token %u\n", token);
9493 }
9494 }
9495 break;
9496
9497 case FSCTL_NAMESPACE_HANDLER_UNBLOCK: {
9498 uint32_t token, val;
9499 int i;
9500
9501 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
9502 break;
9503 }
9504
9505 if (!nspace_is_special_process(p)) {
9506 error = EINVAL;
9507 break;
9508 }
9509
9510 token = ((uint32_t *)data)[0];
9511 val = ((uint32_t *)data)[1];
9512
9513 lck_mtx_lock(&nspace_handler_lock);
9514
9515 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9516 if (nspace_items[i].token == token) {
9517 break; /* exit for loop, not case statement */
9518 }
9519 }
9520
9521 if (i >= MAX_NSPACE_ITEMS) {
9522 printf("nspace-handler-unblock: did not find token %u\n", token);
9523 error = ENOENT;
9524 } else {
9525 if (val == 0 && nspace_items[i].vp) {
9526 vnode_lock_spin(nspace_items[i].vp);
9527 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
9528 vnode_unlock(nspace_items[i].vp);
9529 }
9530
9531 nspace_items[i].vp = NULL;
9532 nspace_items[i].arg = NULL;
9533 nspace_items[i].op = 0;
9534 nspace_items[i].vid = 0;
9535 nspace_items[i].flags = NSPACE_ITEM_DONE;
9536 nspace_items[i].token = 0;
9537
9538 wakeup((caddr_t)&(nspace_items[i].vp));
9539 }
9540
9541 lck_mtx_unlock(&nspace_handler_lock);
9542 }
9543 break;
9544
9545 case FSCTL_NAMESPACE_HANDLER_CANCEL: {
9546 uint32_t token, val;
9547 int i;
9548
9549 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
9550 break;
9551 }
9552
9553 if (!nspace_is_special_process(p)) {
9554 error = EINVAL;
9555 break;
9556 }
9557
9558 token = ((uint32_t *)data)[0];
9559 val = ((uint32_t *)data)[1];
9560
9561 lck_mtx_lock(&nspace_handler_lock);
9562
9563 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9564 if (nspace_items[i].token == token) {
9565 break; /* exit for loop, not case stmt */
9566 }
9567 }
9568
9569 if (i >= MAX_NSPACE_ITEMS) {
9570 printf("nspace-handler-cancel: did not find token %u\n", token);
9571 error = ENOENT;
9572 } else {
9573 if (nspace_items[i].vp) {
9574 vnode_lock_spin(nspace_items[i].vp);
9575 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
9576 vnode_unlock(nspace_items[i].vp);
9577 }
9578
9579 nspace_items[i].vp = NULL;
9580 nspace_items[i].arg = NULL;
9581 nspace_items[i].vid = 0;
9582 nspace_items[i].token = val;
9583 nspace_items[i].flags &= ~NSPACE_ITEM_PROCESSING;
9584 nspace_items[i].flags |= NSPACE_ITEM_CANCELLED;
9585
9586 wakeup((caddr_t)&(nspace_items[i].vp));
9587 }
9588
9589 lck_mtx_unlock(&nspace_handler_lock);
9590 }
9591 break;
9592
9593 case FSCTL_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME: {
9594 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
9595 break;
9596 }
9597
9598 // we explicitly do not do the namespace_handler_proc check here
9599
9600 lck_mtx_lock(&nspace_handler_lock);
9601 snapshot_timestamp = ((uint32_t *)data)[0];
9602 wakeup(&nspace_item_idx);
9603 lck_mtx_unlock(&nspace_handler_lock);
9604 printf("nspace-handler-set-snapshot-time: %d\n", (int)snapshot_timestamp);
9605
9606 }
9607 break;
9608
9609 case FSCTL_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS:
9610 {
9611 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
9612 break;
9613 }
9614
9615 lck_mtx_lock(&nspace_handler_lock);
9616 nspace_allow_virtual_devs = ((uint32_t *)data)[0];
9617 lck_mtx_unlock(&nspace_handler_lock);
9618 printf("nspace-snapshot-handler will%s allow events on disk-images\n",
9619 nspace_allow_virtual_devs ? "" : " NOT");
9620 error = 0;
9621
9622 }
9623 break;
9624
9625 case FSCTL_SET_FSTYPENAME_OVERRIDE:
9626 {
9627 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
9628 break;
9629 }
9630 if (vp->v_mount) {
9631 mount_lock(vp->v_mount);
9632 if (data[0] != 0) {
9633 strlcpy(&vp->v_mount->fstypename_override[0], data, MFSTYPENAMELEN);
9634 vp->v_mount->mnt_kern_flag |= MNTK_TYPENAME_OVERRIDE;
9635 if (vfs_isrdonly(vp->v_mount) && strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
9636 vp->v_mount->mnt_kern_flag |= MNTK_EXTENDED_SECURITY;
9637 vp->v_mount->mnt_kern_flag &= ~MNTK_AUTH_OPAQUE;
9638 }
9639 } else {
9640 if (strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
9641 vp->v_mount->mnt_kern_flag &= ~MNTK_EXTENDED_SECURITY;
9642 }
9643 vp->v_mount->mnt_kern_flag &= ~MNTK_TYPENAME_OVERRIDE;
9644 vp->v_mount->fstypename_override[0] = '\0';
9645 }
9646 mount_unlock(vp->v_mount);
9647 }
9648 }
9649 break;
9650
9651 default: {
9652 /* Invoke the filesystem-specific code */
9653 error = VNOP_IOCTL(vp, IOCBASECMD(cmd), data, options, ctx);
9654 }
9655
9656 } /* end switch stmt */
9657
9658 /*
9659 * if no errors, copy any data to user. Size was
9660 * already set and checked above.
9661 */
9662 if (error == 0 && (cmd & IOC_OUT) && size)
9663 error = copyout(data, udata, size);
9664
9665 if (memp) {
9666 kfree(memp, size);
9667 }
9668
9669 return error;
9670 }
9671
9672 /* ARGSUSED */
9673 int
9674 fsctl (proc_t p, struct fsctl_args *uap, __unused int32_t *retval)
9675 {
9676 int error;
9677 struct nameidata nd;
9678 u_long nameiflags;
9679 vnode_t vp = NULL;
9680 vfs_context_t ctx = vfs_context_current();
9681
9682 AUDIT_ARG(cmd, uap->cmd);
9683 AUDIT_ARG(value32, uap->options);
9684 /* Get the vnode for the file we are getting info on: */
9685 nameiflags = 0;
9686 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
9687 NDINIT(&nd, LOOKUP, OP_FSCTL, nameiflags | AUDITVNPATH1,
9688 UIO_USERSPACE, uap->path, ctx);
9689 if ((error = namei(&nd))) goto done;
9690 vp = nd.ni_vp;
9691 nameidone(&nd);
9692
9693 #if CONFIG_MACF
9694 error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd);
9695 if (error) {
9696 goto done;
9697 }
9698 #endif
9699
9700 error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
9701
9702 done:
9703 if (vp)
9704 vnode_put(vp);
9705 return error;
9706 }
9707 /* ARGSUSED */
9708 int
9709 ffsctl (proc_t p, struct ffsctl_args *uap, __unused int32_t *retval)
9710 {
9711 int error;
9712 vnode_t vp = NULL;
9713 vfs_context_t ctx = vfs_context_current();
9714 int fd = -1;
9715
9716 AUDIT_ARG(fd, uap->fd);
9717 AUDIT_ARG(cmd, uap->cmd);
9718 AUDIT_ARG(value32, uap->options);
9719
9720 /* Get the vnode for the file we are getting info on: */
9721 if ((error = file_vnode(uap->fd, &vp)))
9722 goto done;
9723 fd = uap->fd;
9724 if ((error = vnode_getwithref(vp))) {
9725 goto done;
9726 }
9727
9728 #if CONFIG_MACF
9729 error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd);
9730 if (error) {
9731 goto done;
9732 }
9733 #endif
9734
9735 error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
9736
9737 done:
9738 if (fd != -1)
9739 file_drop(fd);
9740
9741 if (vp)
9742 vnode_put(vp);
9743 return error;
9744 }
9745 /* end of fsctl system call */
9746
9747 /*
9748 * Retrieve the data of an extended attribute.
9749 */
9750 int
9751 getxattr(proc_t p, struct getxattr_args *uap, user_ssize_t *retval)
9752 {
9753 vnode_t vp;
9754 struct nameidata nd;
9755 char attrname[XATTR_MAXNAMELEN+1];
9756 vfs_context_t ctx = vfs_context_current();
9757 uio_t auio = NULL;
9758 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9759 size_t attrsize = 0;
9760 size_t namelen;
9761 u_int32_t nameiflags;
9762 int error;
9763 char uio_buf[ UIO_SIZEOF(1) ];
9764
9765 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
9766 return (EINVAL);
9767
9768 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
9769 NDINIT(&nd, LOOKUP, OP_GETXATTR, nameiflags, spacetype, uap->path, ctx);
9770 if ((error = namei(&nd))) {
9771 return (error);
9772 }
9773 vp = nd.ni_vp;
9774 nameidone(&nd);
9775
9776 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
9777 goto out;
9778 }
9779 if (xattr_protected(attrname)) {
9780 if (!vfs_context_issuser(ctx) || strcmp(attrname, "com.apple.system.Security") != 0) {
9781 error = EPERM;
9782 goto out;
9783 }
9784 }
9785 /*
9786 * the specific check for 0xffffffff is a hack to preserve
9787 * binaray compatibilty in K64 with applications that discovered
9788 * that passing in a buf pointer and a size of -1 resulted in
9789 * just the size of the indicated extended attribute being returned.
9790 * this isn't part of the documented behavior, but because of the
9791 * original implemtation's check for "uap->size > 0", this behavior
9792 * was allowed. In K32 that check turned into a signed comparison
9793 * even though uap->size is unsigned... in K64, we blow by that
9794 * check because uap->size is unsigned and doesn't get sign smeared
9795 * in the munger for a 32 bit user app. we also need to add a
9796 * check to limit the maximum size of the buffer being passed in...
9797 * unfortunately, the underlying fileystems seem to just malloc
9798 * the requested size even if the actual extended attribute is tiny.
9799 * because that malloc is for kernel wired memory, we have to put a
9800 * sane limit on it.
9801 *
9802 * U32 running on K64 will yield 0x00000000ffffffff for uap->size
9803 * U64 running on K64 will yield -1 (64 bits wide)
9804 * U32/U64 running on K32 will yield -1 (32 bits wide)
9805 */
9806 if (uap->size == 0xffffffff || uap->size == (size_t)-1)
9807 goto no_uio;
9808
9809 if (uap->value) {
9810 if (uap->size > (size_t)XATTR_MAXSIZE)
9811 uap->size = XATTR_MAXSIZE;
9812
9813 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
9814 &uio_buf[0], sizeof(uio_buf));
9815 uio_addiov(auio, uap->value, uap->size);
9816 }
9817 no_uio:
9818 error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, ctx);
9819 out:
9820 vnode_put(vp);
9821
9822 if (auio) {
9823 *retval = uap->size - uio_resid(auio);
9824 } else {
9825 *retval = (user_ssize_t)attrsize;
9826 }
9827
9828 return (error);
9829 }
9830
9831 /*
9832 * Retrieve the data of an extended attribute.
9833 */
9834 int
9835 fgetxattr(proc_t p, struct fgetxattr_args *uap, user_ssize_t *retval)
9836 {
9837 vnode_t vp;
9838 char attrname[XATTR_MAXNAMELEN+1];
9839 uio_t auio = NULL;
9840 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9841 size_t attrsize = 0;
9842 size_t namelen;
9843 int error;
9844 char uio_buf[ UIO_SIZEOF(1) ];
9845
9846 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
9847 return (EINVAL);
9848
9849 if ( (error = file_vnode(uap->fd, &vp)) ) {
9850 return (error);
9851 }
9852 if ( (error = vnode_getwithref(vp)) ) {
9853 file_drop(uap->fd);
9854 return(error);
9855 }
9856 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
9857 goto out;
9858 }
9859 if (xattr_protected(attrname)) {
9860 error = EPERM;
9861 goto out;
9862 }
9863 if (uap->value && uap->size > 0) {
9864 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
9865 &uio_buf[0], sizeof(uio_buf));
9866 uio_addiov(auio, uap->value, uap->size);
9867 }
9868
9869 error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, vfs_context_current());
9870 out:
9871 (void)vnode_put(vp);
9872 file_drop(uap->fd);
9873
9874 if (auio) {
9875 *retval = uap->size - uio_resid(auio);
9876 } else {
9877 *retval = (user_ssize_t)attrsize;
9878 }
9879 return (error);
9880 }
9881
9882 /*
9883 * Set the data of an extended attribute.
9884 */
9885 int
9886 setxattr(proc_t p, struct setxattr_args *uap, int *retval)
9887 {
9888 vnode_t vp;
9889 struct nameidata nd;
9890 char attrname[XATTR_MAXNAMELEN+1];
9891 vfs_context_t ctx = vfs_context_current();
9892 uio_t auio = NULL;
9893 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9894 size_t namelen;
9895 u_int32_t nameiflags;
9896 int error;
9897 char uio_buf[ UIO_SIZEOF(1) ];
9898
9899 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
9900 return (EINVAL);
9901
9902 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
9903 if (error == EPERM) {
9904 /* if the string won't fit in attrname, copyinstr emits EPERM */
9905 return (ENAMETOOLONG);
9906 }
9907 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
9908 return error;
9909 }
9910 if (xattr_protected(attrname))
9911 return(EPERM);
9912 if (uap->size != 0 && uap->value == 0) {
9913 return (EINVAL);
9914 }
9915
9916 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
9917 NDINIT(&nd, LOOKUP, OP_SETXATTR, nameiflags, spacetype, uap->path, ctx);
9918 if ((error = namei(&nd))) {
9919 return (error);
9920 }
9921 vp = nd.ni_vp;
9922 nameidone(&nd);
9923
9924 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
9925 &uio_buf[0], sizeof(uio_buf));
9926 uio_addiov(auio, uap->value, uap->size);
9927
9928 error = vn_setxattr(vp, attrname, auio, uap->options, ctx);
9929 #if CONFIG_FSE
9930 if (error == 0) {
9931 add_fsevent(FSE_XATTR_MODIFIED, ctx,
9932 FSE_ARG_VNODE, vp,
9933 FSE_ARG_DONE);
9934 }
9935 #endif
9936 vnode_put(vp);
9937 *retval = 0;
9938 return (error);
9939 }
9940
9941 /*
9942 * Set the data of an extended attribute.
9943 */
9944 int
9945 fsetxattr(proc_t p, struct fsetxattr_args *uap, int *retval)
9946 {
9947 vnode_t vp;
9948 char attrname[XATTR_MAXNAMELEN+1];
9949 uio_t auio = NULL;
9950 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9951 size_t namelen;
9952 int error;
9953 char uio_buf[ UIO_SIZEOF(1) ];
9954 #if CONFIG_FSE
9955 vfs_context_t ctx = vfs_context_current();
9956 #endif
9957
9958 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
9959 return (EINVAL);
9960
9961 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
9962 return (error);
9963 }
9964 if (xattr_protected(attrname))
9965 return(EPERM);
9966 if (uap->size != 0 && uap->value == 0) {
9967 return (EINVAL);
9968 }
9969 if ( (error = file_vnode(uap->fd, &vp)) ) {
9970 return (error);
9971 }
9972 if ( (error = vnode_getwithref(vp)) ) {
9973 file_drop(uap->fd);
9974 return(error);
9975 }
9976 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
9977 &uio_buf[0], sizeof(uio_buf));
9978 uio_addiov(auio, uap->value, uap->size);
9979
9980 error = vn_setxattr(vp, attrname, auio, uap->options, vfs_context_current());
9981 #if CONFIG_FSE
9982 if (error == 0) {
9983 add_fsevent(FSE_XATTR_MODIFIED, ctx,
9984 FSE_ARG_VNODE, vp,
9985 FSE_ARG_DONE);
9986 }
9987 #endif
9988 vnode_put(vp);
9989 file_drop(uap->fd);
9990 *retval = 0;
9991 return (error);
9992 }
9993
9994 /*
9995 * Remove an extended attribute.
9996 * XXX Code duplication here.
9997 */
9998 int
9999 removexattr(proc_t p, struct removexattr_args *uap, int *retval)
10000 {
10001 vnode_t vp;
10002 struct nameidata nd;
10003 char attrname[XATTR_MAXNAMELEN+1];
10004 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10005 vfs_context_t ctx = vfs_context_current();
10006 size_t namelen;
10007 u_int32_t nameiflags;
10008 int error;
10009
10010 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
10011 return (EINVAL);
10012
10013 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
10014 if (error != 0) {
10015 return (error);
10016 }
10017 if (xattr_protected(attrname))
10018 return(EPERM);
10019 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
10020 NDINIT(&nd, LOOKUP, OP_REMOVEXATTR, nameiflags, spacetype, uap->path, ctx);
10021 if ((error = namei(&nd))) {
10022 return (error);
10023 }
10024 vp = nd.ni_vp;
10025 nameidone(&nd);
10026
10027 error = vn_removexattr(vp, attrname, uap->options, ctx);
10028 #if CONFIG_FSE
10029 if (error == 0) {
10030 add_fsevent(FSE_XATTR_REMOVED, ctx,
10031 FSE_ARG_VNODE, vp,
10032 FSE_ARG_DONE);
10033 }
10034 #endif
10035 vnode_put(vp);
10036 *retval = 0;
10037 return (error);
10038 }
10039
10040 /*
10041 * Remove an extended attribute.
10042 * XXX Code duplication here.
10043 */
10044 int
10045 fremovexattr(__unused proc_t p, struct fremovexattr_args *uap, int *retval)
10046 {
10047 vnode_t vp;
10048 char attrname[XATTR_MAXNAMELEN+1];
10049 size_t namelen;
10050 int error;
10051 #if CONFIG_FSE
10052 vfs_context_t ctx = vfs_context_current();
10053 #endif
10054
10055 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
10056 return (EINVAL);
10057
10058 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
10059 if (error != 0) {
10060 return (error);
10061 }
10062 if (xattr_protected(attrname))
10063 return(EPERM);
10064 if ( (error = file_vnode(uap->fd, &vp)) ) {
10065 return (error);
10066 }
10067 if ( (error = vnode_getwithref(vp)) ) {
10068 file_drop(uap->fd);
10069 return(error);
10070 }
10071
10072 error = vn_removexattr(vp, attrname, uap->options, vfs_context_current());
10073 #if CONFIG_FSE
10074 if (error == 0) {
10075 add_fsevent(FSE_XATTR_REMOVED, ctx,
10076 FSE_ARG_VNODE, vp,
10077 FSE_ARG_DONE);
10078 }
10079 #endif
10080 vnode_put(vp);
10081 file_drop(uap->fd);
10082 *retval = 0;
10083 return (error);
10084 }
10085
10086 /*
10087 * Retrieve the list of extended attribute names.
10088 * XXX Code duplication here.
10089 */
10090 int
10091 listxattr(proc_t p, struct listxattr_args *uap, user_ssize_t *retval)
10092 {
10093 vnode_t vp;
10094 struct nameidata nd;
10095 vfs_context_t ctx = vfs_context_current();
10096 uio_t auio = NULL;
10097 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10098 size_t attrsize = 0;
10099 u_int32_t nameiflags;
10100 int error;
10101 char uio_buf[ UIO_SIZEOF(1) ];
10102
10103 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
10104 return (EINVAL);
10105
10106 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
10107 NDINIT(&nd, LOOKUP, OP_LISTXATTR, nameiflags, spacetype, uap->path, ctx);
10108 if ((error = namei(&nd))) {
10109 return (error);
10110 }
10111 vp = nd.ni_vp;
10112 nameidone(&nd);
10113 if (uap->namebuf != 0 && uap->bufsize > 0) {
10114 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
10115 &uio_buf[0], sizeof(uio_buf));
10116 uio_addiov(auio, uap->namebuf, uap->bufsize);
10117 }
10118
10119 error = vn_listxattr(vp, auio, &attrsize, uap->options, ctx);
10120
10121 vnode_put(vp);
10122 if (auio) {
10123 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
10124 } else {
10125 *retval = (user_ssize_t)attrsize;
10126 }
10127 return (error);
10128 }
10129
10130 /*
10131 * Retrieve the list of extended attribute names.
10132 * XXX Code duplication here.
10133 */
10134 int
10135 flistxattr(proc_t p, struct flistxattr_args *uap, user_ssize_t *retval)
10136 {
10137 vnode_t vp;
10138 uio_t auio = NULL;
10139 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10140 size_t attrsize = 0;
10141 int error;
10142 char uio_buf[ UIO_SIZEOF(1) ];
10143
10144 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
10145 return (EINVAL);
10146
10147 if ( (error = file_vnode(uap->fd, &vp)) ) {
10148 return (error);
10149 }
10150 if ( (error = vnode_getwithref(vp)) ) {
10151 file_drop(uap->fd);
10152 return(error);
10153 }
10154 if (uap->namebuf != 0 && uap->bufsize > 0) {
10155 auio = uio_createwithbuffer(1, 0, spacetype,
10156 UIO_READ, &uio_buf[0], sizeof(uio_buf));
10157 uio_addiov(auio, uap->namebuf, uap->bufsize);
10158 }
10159
10160 error = vn_listxattr(vp, auio, &attrsize, uap->options, vfs_context_current());
10161
10162 vnode_put(vp);
10163 file_drop(uap->fd);
10164 if (auio) {
10165 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
10166 } else {
10167 *retval = (user_ssize_t)attrsize;
10168 }
10169 return (error);
10170 }
10171
10172 static int fsgetpath_internal(
10173 vfs_context_t ctx, int volfs_id, uint64_t objid,
10174 vm_size_t bufsize, caddr_t buf, int *pathlen)
10175 {
10176 int error;
10177 struct mount *mp = NULL;
10178 vnode_t vp;
10179 int length;
10180 int bpflags;
10181
10182 if (bufsize > PAGE_SIZE) {
10183 return (EINVAL);
10184 }
10185
10186 if (buf == NULL) {
10187 return (ENOMEM);
10188 }
10189
10190 if ((mp = mount_lookupby_volfsid(volfs_id, 1)) == NULL) {
10191 error = ENOTSUP; /* unexpected failure */
10192 return ENOTSUP;
10193 }
10194
10195 unionget:
10196 if (objid == 2) {
10197 error = VFS_ROOT(mp, &vp, ctx);
10198 } else {
10199 error = VFS_VGET(mp, (ino64_t)objid, &vp, ctx);
10200 }
10201
10202 if (error == ENOENT && (mp->mnt_flag & MNT_UNION)) {
10203 /*
10204 * If the fileid isn't found and we're in a union
10205 * mount volume, then see if the fileid is in the
10206 * mounted-on volume.
10207 */
10208 struct mount *tmp = mp;
10209 mp = vnode_mount(tmp->mnt_vnodecovered);
10210 vfs_unbusy(tmp);
10211 if (vfs_busy(mp, LK_NOWAIT) == 0)
10212 goto unionget;
10213 } else {
10214 vfs_unbusy(mp);
10215 }
10216
10217 if (error) {
10218 return error;
10219 }
10220
10221 #if CONFIG_MACF
10222 error = mac_vnode_check_fsgetpath(ctx, vp);
10223 if (error) {
10224 vnode_put(vp);
10225 return error;
10226 }
10227 #endif
10228
10229 /* Obtain the absolute path to this vnode. */
10230 bpflags = vfs_context_suser(ctx) ? BUILDPATH_CHECKACCESS : 0;
10231 bpflags |= BUILDPATH_CHECK_MOVED;
10232 error = build_path(vp, buf, bufsize, &length, bpflags, ctx);
10233 vnode_put(vp);
10234
10235 if (error) {
10236 goto out;
10237 }
10238
10239 AUDIT_ARG(text, buf);
10240
10241 if (kdebug_enable) {
10242 long dbg_parms[NUMPARMS];
10243 int dbg_namelen;
10244
10245 dbg_namelen = (int)sizeof(dbg_parms);
10246
10247 if (length < dbg_namelen) {
10248 memcpy((char *)dbg_parms, buf, length);
10249 memset((char *)dbg_parms + length, 0, dbg_namelen - length);
10250
10251 dbg_namelen = length;
10252 } else {
10253 memcpy((char *)dbg_parms, buf + (length - dbg_namelen), dbg_namelen);
10254 }
10255
10256 kdebug_lookup_gen_events(dbg_parms, dbg_namelen, (void *)vp, TRUE);
10257 }
10258
10259 *pathlen = (user_ssize_t)length; /* may be superseded by error */
10260
10261 out:
10262 return (error);
10263 }
10264
10265 /*
10266 * Obtain the full pathname of a file system object by id.
10267 *
10268 * This is a private SPI used by the File Manager.
10269 */
10270 __private_extern__
10271 int
10272 fsgetpath(__unused proc_t p, struct fsgetpath_args *uap, user_ssize_t *retval)
10273 {
10274 vfs_context_t ctx = vfs_context_current();
10275 fsid_t fsid;
10276 char *realpath;
10277 int length;
10278 int error;
10279
10280 if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
10281 return (error);
10282 }
10283 AUDIT_ARG(value32, fsid.val[0]);
10284 AUDIT_ARG(value64, uap->objid);
10285 /* Restrict output buffer size for now. */
10286
10287 if (uap->bufsize > PAGE_SIZE) {
10288 return (EINVAL);
10289 }
10290 MALLOC(realpath, char *, uap->bufsize, M_TEMP, M_WAITOK);
10291 if (realpath == NULL) {
10292 return (ENOMEM);
10293 }
10294
10295 error = fsgetpath_internal(
10296 ctx, fsid.val[0], uap->objid,
10297 uap->bufsize, realpath, &length);
10298
10299 if (error) {
10300 goto out;
10301 }
10302
10303 error = copyout((caddr_t)realpath, uap->buf, length);
10304
10305 *retval = (user_ssize_t)length; /* may be superseded by error */
10306 out:
10307 if (realpath) {
10308 FREE(realpath, M_TEMP);
10309 }
10310 return (error);
10311 }
10312
10313 /*
10314 * Common routine to handle various flavors of statfs data heading out
10315 * to user space.
10316 *
10317 * Returns: 0 Success
10318 * EFAULT
10319 */
10320 static int
10321 munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
10322 user_addr_t bufp, int *sizep, boolean_t is_64_bit,
10323 boolean_t partial_copy)
10324 {
10325 int error;
10326 int my_size, copy_size;
10327
10328 if (is_64_bit) {
10329 struct user64_statfs sfs;
10330 my_size = copy_size = sizeof(sfs);
10331 bzero(&sfs, my_size);
10332 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
10333 sfs.f_type = mp->mnt_vtable->vfc_typenum;
10334 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
10335 sfs.f_bsize = (user64_long_t)sfsp->f_bsize;
10336 sfs.f_iosize = (user64_long_t)sfsp->f_iosize;
10337 sfs.f_blocks = (user64_long_t)sfsp->f_blocks;
10338 sfs.f_bfree = (user64_long_t)sfsp->f_bfree;
10339 sfs.f_bavail = (user64_long_t)sfsp->f_bavail;
10340 sfs.f_files = (user64_long_t)sfsp->f_files;
10341 sfs.f_ffree = (user64_long_t)sfsp->f_ffree;
10342 sfs.f_fsid = sfsp->f_fsid;
10343 sfs.f_owner = sfsp->f_owner;
10344 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
10345 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
10346 } else {
10347 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
10348 }
10349 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
10350 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
10351
10352 if (partial_copy) {
10353 copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
10354 }
10355 error = copyout((caddr_t)&sfs, bufp, copy_size);
10356 }
10357 else {
10358 struct user32_statfs sfs;
10359
10360 my_size = copy_size = sizeof(sfs);
10361 bzero(&sfs, my_size);
10362
10363 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
10364 sfs.f_type = mp->mnt_vtable->vfc_typenum;
10365 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
10366
10367 /*
10368 * It's possible for there to be more than 2^^31 blocks in the filesystem, so we
10369 * have to fudge the numbers here in that case. We inflate the blocksize in order
10370 * to reflect the filesystem size as best we can.
10371 */
10372 if ((sfsp->f_blocks > INT_MAX)
10373 /* Hack for 4061702 . I think the real fix is for Carbon to
10374 * look for some volume capability and not depend on hidden
10375 * semantics agreed between a FS and carbon.
10376 * f_blocks, f_bfree, and f_bavail set to -1 is the trigger
10377 * for Carbon to set bNoVolumeSizes volume attribute.
10378 * Without this the webdavfs files cannot be copied onto
10379 * disk as they look huge. This change should not affect
10380 * XSAN as they should not setting these to -1..
10381 */
10382 && (sfsp->f_blocks != 0xffffffffffffffffULL)
10383 && (sfsp->f_bfree != 0xffffffffffffffffULL)
10384 && (sfsp->f_bavail != 0xffffffffffffffffULL)) {
10385 int shift;
10386
10387 /*
10388 * Work out how far we have to shift the block count down to make it fit.
10389 * Note that it's possible to have to shift so far that the resulting
10390 * blocksize would be unreportably large. At that point, we will clip
10391 * any values that don't fit.
10392 *
10393 * For safety's sake, we also ensure that f_iosize is never reported as
10394 * being smaller than f_bsize.
10395 */
10396 for (shift = 0; shift < 32; shift++) {
10397 if ((sfsp->f_blocks >> shift) <= INT_MAX)
10398 break;
10399 if ((sfsp->f_bsize << (shift + 1)) > INT_MAX)
10400 break;
10401 }
10402 #define __SHIFT_OR_CLIP(x, s) ((((x) >> (s)) > INT_MAX) ? INT_MAX : ((x) >> (s)))
10403 sfs.f_blocks = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_blocks, shift);
10404 sfs.f_bfree = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bfree, shift);
10405 sfs.f_bavail = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bavail, shift);
10406 #undef __SHIFT_OR_CLIP
10407 sfs.f_bsize = (user32_long_t)(sfsp->f_bsize << shift);
10408 sfs.f_iosize = lmax(sfsp->f_iosize, sfsp->f_bsize);
10409 } else {
10410 /* filesystem is small enough to be reported honestly */
10411 sfs.f_bsize = (user32_long_t)sfsp->f_bsize;
10412 sfs.f_iosize = (user32_long_t)sfsp->f_iosize;
10413 sfs.f_blocks = (user32_long_t)sfsp->f_blocks;
10414 sfs.f_bfree = (user32_long_t)sfsp->f_bfree;
10415 sfs.f_bavail = (user32_long_t)sfsp->f_bavail;
10416 }
10417 sfs.f_files = (user32_long_t)sfsp->f_files;
10418 sfs.f_ffree = (user32_long_t)sfsp->f_ffree;
10419 sfs.f_fsid = sfsp->f_fsid;
10420 sfs.f_owner = sfsp->f_owner;
10421 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
10422 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
10423 } else {
10424 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
10425 }
10426 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
10427 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
10428
10429 if (partial_copy) {
10430 copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
10431 }
10432 error = copyout((caddr_t)&sfs, bufp, copy_size);
10433 }
10434
10435 if (sizep != NULL) {
10436 *sizep = my_size;
10437 }
10438 return(error);
10439 }
10440
10441 /*
10442 * copy stat structure into user_stat structure.
10443 */
10444 void munge_user64_stat(struct stat *sbp, struct user64_stat *usbp)
10445 {
10446 bzero(usbp, sizeof(*usbp));
10447
10448 usbp->st_dev = sbp->st_dev;
10449 usbp->st_ino = sbp->st_ino;
10450 usbp->st_mode = sbp->st_mode;
10451 usbp->st_nlink = sbp->st_nlink;
10452 usbp->st_uid = sbp->st_uid;
10453 usbp->st_gid = sbp->st_gid;
10454 usbp->st_rdev = sbp->st_rdev;
10455 #ifndef _POSIX_C_SOURCE
10456 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
10457 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
10458 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
10459 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
10460 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
10461 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
10462 #else
10463 usbp->st_atime = sbp->st_atime;
10464 usbp->st_atimensec = sbp->st_atimensec;
10465 usbp->st_mtime = sbp->st_mtime;
10466 usbp->st_mtimensec = sbp->st_mtimensec;
10467 usbp->st_ctime = sbp->st_ctime;
10468 usbp->st_ctimensec = sbp->st_ctimensec;
10469 #endif
10470 usbp->st_size = sbp->st_size;
10471 usbp->st_blocks = sbp->st_blocks;
10472 usbp->st_blksize = sbp->st_blksize;
10473 usbp->st_flags = sbp->st_flags;
10474 usbp->st_gen = sbp->st_gen;
10475 usbp->st_lspare = sbp->st_lspare;
10476 usbp->st_qspare[0] = sbp->st_qspare[0];
10477 usbp->st_qspare[1] = sbp->st_qspare[1];
10478 }
10479
10480 void munge_user32_stat(struct stat *sbp, struct user32_stat *usbp)
10481 {
10482 bzero(usbp, sizeof(*usbp));
10483
10484 usbp->st_dev = sbp->st_dev;
10485 usbp->st_ino = sbp->st_ino;
10486 usbp->st_mode = sbp->st_mode;
10487 usbp->st_nlink = sbp->st_nlink;
10488 usbp->st_uid = sbp->st_uid;
10489 usbp->st_gid = sbp->st_gid;
10490 usbp->st_rdev = sbp->st_rdev;
10491 #ifndef _POSIX_C_SOURCE
10492 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
10493 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
10494 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
10495 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
10496 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
10497 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
10498 #else
10499 usbp->st_atime = sbp->st_atime;
10500 usbp->st_atimensec = sbp->st_atimensec;
10501 usbp->st_mtime = sbp->st_mtime;
10502 usbp->st_mtimensec = sbp->st_mtimensec;
10503 usbp->st_ctime = sbp->st_ctime;
10504 usbp->st_ctimensec = sbp->st_ctimensec;
10505 #endif
10506 usbp->st_size = sbp->st_size;
10507 usbp->st_blocks = sbp->st_blocks;
10508 usbp->st_blksize = sbp->st_blksize;
10509 usbp->st_flags = sbp->st_flags;
10510 usbp->st_gen = sbp->st_gen;
10511 usbp->st_lspare = sbp->st_lspare;
10512 usbp->st_qspare[0] = sbp->st_qspare[0];
10513 usbp->st_qspare[1] = sbp->st_qspare[1];
10514 }
10515
10516 /*
10517 * copy stat64 structure into user_stat64 structure.
10518 */
10519 void munge_user64_stat64(struct stat64 *sbp, struct user64_stat64 *usbp)
10520 {
10521 bzero(usbp, sizeof(*usbp));
10522
10523 usbp->st_dev = sbp->st_dev;
10524 usbp->st_ino = sbp->st_ino;
10525 usbp->st_mode = sbp->st_mode;
10526 usbp->st_nlink = sbp->st_nlink;
10527 usbp->st_uid = sbp->st_uid;
10528 usbp->st_gid = sbp->st_gid;
10529 usbp->st_rdev = sbp->st_rdev;
10530 #ifndef _POSIX_C_SOURCE
10531 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
10532 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
10533 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
10534 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
10535 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
10536 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
10537 usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
10538 usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
10539 #else
10540 usbp->st_atime = sbp->st_atime;
10541 usbp->st_atimensec = sbp->st_atimensec;
10542 usbp->st_mtime = sbp->st_mtime;
10543 usbp->st_mtimensec = sbp->st_mtimensec;
10544 usbp->st_ctime = sbp->st_ctime;
10545 usbp->st_ctimensec = sbp->st_ctimensec;
10546 usbp->st_birthtime = sbp->st_birthtime;
10547 usbp->st_birthtimensec = sbp->st_birthtimensec;
10548 #endif
10549 usbp->st_size = sbp->st_size;
10550 usbp->st_blocks = sbp->st_blocks;
10551 usbp->st_blksize = sbp->st_blksize;
10552 usbp->st_flags = sbp->st_flags;
10553 usbp->st_gen = sbp->st_gen;
10554 usbp->st_lspare = sbp->st_lspare;
10555 usbp->st_qspare[0] = sbp->st_qspare[0];
10556 usbp->st_qspare[1] = sbp->st_qspare[1];
10557 }
10558
10559 void munge_user32_stat64(struct stat64 *sbp, struct user32_stat64 *usbp)
10560 {
10561 bzero(usbp, sizeof(*usbp));
10562
10563 usbp->st_dev = sbp->st_dev;
10564 usbp->st_ino = sbp->st_ino;
10565 usbp->st_mode = sbp->st_mode;
10566 usbp->st_nlink = sbp->st_nlink;
10567 usbp->st_uid = sbp->st_uid;
10568 usbp->st_gid = sbp->st_gid;
10569 usbp->st_rdev = sbp->st_rdev;
10570 #ifndef _POSIX_C_SOURCE
10571 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
10572 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
10573 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
10574 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
10575 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
10576 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
10577 usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
10578 usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
10579 #else
10580 usbp->st_atime = sbp->st_atime;
10581 usbp->st_atimensec = sbp->st_atimensec;
10582 usbp->st_mtime = sbp->st_mtime;
10583 usbp->st_mtimensec = sbp->st_mtimensec;
10584 usbp->st_ctime = sbp->st_ctime;
10585 usbp->st_ctimensec = sbp->st_ctimensec;
10586 usbp->st_birthtime = sbp->st_birthtime;
10587 usbp->st_birthtimensec = sbp->st_birthtimensec;
10588 #endif
10589 usbp->st_size = sbp->st_size;
10590 usbp->st_blocks = sbp->st_blocks;
10591 usbp->st_blksize = sbp->st_blksize;
10592 usbp->st_flags = sbp->st_flags;
10593 usbp->st_gen = sbp->st_gen;
10594 usbp->st_lspare = sbp->st_lspare;
10595 usbp->st_qspare[0] = sbp->st_qspare[0];
10596 usbp->st_qspare[1] = sbp->st_qspare[1];
10597 }
10598
10599 /*
10600 * Purge buffer cache for simulating cold starts
10601 */
10602 static int vnode_purge_callback(struct vnode *vp, __unused void *cargs)
10603 {
10604 ubc_msync(vp, (off_t)0, ubc_getsize(vp), NULL /* off_t *resid_off */, UBC_PUSHALL | UBC_INVALIDATE);
10605
10606 return VNODE_RETURNED;
10607 }
10608
10609 static int vfs_purge_callback(mount_t mp, __unused void * arg)
10610 {
10611 vnode_iterate(mp, VNODE_WAIT | VNODE_ITERATE_ALL, vnode_purge_callback, NULL);
10612
10613 return VFS_RETURNED;
10614 }
10615
10616 int
10617 vfs_purge(__unused struct proc *p, __unused struct vfs_purge_args *uap, __unused int32_t *retval)
10618 {
10619 if (!kauth_cred_issuser(kauth_cred_get()))
10620 return EPERM;
10621
10622 vfs_iterate(0/* flags */, vfs_purge_callback, NULL);
10623
10624 return 0;
10625 }
10626