]> git.saurik.com Git - apple/xnu.git/blob - bsd/vfs/vfs_syscalls.c
xnu-3789.70.16.tar.gz
[apple/xnu.git] / bsd / vfs / vfs_syscalls.c
1 /*
2 * Copyright (c) 1995-2016 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1989, 1993
30 * The Regents of the University of California. All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)vfs_syscalls.c 8.41 (Berkeley) 6/15/95
66 */
67 /*
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
71 * Version 2.0.
72 */
73
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/namei.h>
77 #include <sys/filedesc.h>
78 #include <sys/kernel.h>
79 #include <sys/file_internal.h>
80 #include <sys/stat.h>
81 #include <sys/vnode_internal.h>
82 #include <sys/mount_internal.h>
83 #include <sys/proc_internal.h>
84 #include <sys/kauth.h>
85 #include <sys/uio_internal.h>
86 #include <sys/malloc.h>
87 #include <sys/mman.h>
88 #include <sys/dirent.h>
89 #include <sys/attr.h>
90 #include <sys/sysctl.h>
91 #include <sys/ubc.h>
92 #include <sys/quota.h>
93 #include <sys/kdebug.h>
94 #include <sys/fsevents.h>
95 #include <sys/imgsrc.h>
96 #include <sys/sysproto.h>
97 #include <sys/xattr.h>
98 #include <sys/fcntl.h>
99 #include <sys/fsctl.h>
100 #include <sys/ubc_internal.h>
101 #include <sys/disk.h>
102 #include <sys/content_protection.h>
103 #include <sys/clonefile.h>
104 #include <sys/snapshot.h>
105 #include <sys/priv.h>
106 #include <machine/cons.h>
107 #include <machine/limits.h>
108 #include <miscfs/specfs/specdev.h>
109
110 #include <security/audit/audit.h>
111 #include <bsm/audit_kevents.h>
112
113 #include <mach/mach_types.h>
114 #include <kern/kern_types.h>
115 #include <kern/kalloc.h>
116 #include <kern/task.h>
117
118 #include <vm/vm_pageout.h>
119 #include <vm/vm_protos.h>
120
121 #include <libkern/OSAtomic.h>
122 #include <pexpert/pexpert.h>
123 #include <IOKit/IOBSD.h>
124
125 #if ROUTEFS
126 #include <miscfs/routefs/routefs.h>
127 #endif /* ROUTEFS */
128
129 #if CONFIG_MACF
130 #include <security/mac.h>
131 #include <security/mac_framework.h>
132 #endif
133
134 #if CONFIG_FSE
135 #define GET_PATH(x) \
136 (x) = get_pathbuff();
137 #define RELEASE_PATH(x) \
138 release_pathbuff(x);
139 #else
140 #define GET_PATH(x) \
141 MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
142 #define RELEASE_PATH(x) \
143 FREE_ZONE((x), MAXPATHLEN, M_NAMEI);
144 #endif /* CONFIG_FSE */
145
146 /* struct for checkdirs iteration */
147 struct cdirargs {
148 vnode_t olddp;
149 vnode_t newdp;
150 };
151 /* callback for checkdirs iteration */
152 static int checkdirs_callback(proc_t p, void * arg);
153
154 static int change_dir(struct nameidata *ndp, vfs_context_t ctx);
155 static int checkdirs(vnode_t olddp, vfs_context_t ctx);
156 void enablequotas(struct mount *mp, vfs_context_t ctx);
157 static int getfsstat_callback(mount_t mp, void * arg);
158 static int getutimes(user_addr_t usrtvp, struct timespec *tsp);
159 static int setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts, int nullflag);
160 static int sync_callback(mount_t, void *);
161 static void sync_thread(void *, __unused wait_result_t);
162 static int sync_async(int);
163 static int munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
164 user_addr_t bufp, int *sizep, boolean_t is_64_bit,
165 boolean_t partial_copy);
166 static int statfs64_common(struct mount *mp, struct vfsstatfs *sfsp,
167 user_addr_t bufp);
168 static int fsync_common(proc_t p, struct fsync_args *uap, int flags);
169 static int mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
170 struct componentname *cnp, user_addr_t fsmountargs,
171 int flags, uint32_t internal_flags, char *labelstr, boolean_t kernelmount,
172 vfs_context_t ctx);
173 void vfs_notify_mount(vnode_t pdvp);
174
175 int prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth);
176
177 struct fd_vn_data * fg_vn_data_alloc(void);
178
179 /*
180 * Max retries for ENOENT returns from vn_authorize_{rmdir, unlink, rename}
181 * Concurrent lookups (or lookups by ids) on hard links can cause the
182 * vn_getpath (which does not re-enter the filesystem as vn_getpath_fsenter
183 * does) to return ENOENT as the path cannot be returned from the name cache
184 * alone. We have no option but to retry and hope to get one namei->reverse path
185 * generation done without an intervening lookup, lookup by id on the hard link
186 * item. This is only an issue for MAC hooks which cannot reenter the filesystem
187 * which currently are the MAC hooks for rename, unlink and rmdir.
188 */
189 #define MAX_AUTHORIZE_ENOENT_RETRIES 1024
190
191 static int rmdirat_internal(vfs_context_t, int, user_addr_t, enum uio_seg);
192
193 static int fsgetpath_internal(vfs_context_t, int, uint64_t, vm_size_t, caddr_t, int *);
194
195 #ifdef CONFIG_IMGSRC_ACCESS
196 static int authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx);
197 static int place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx);
198 static void undo_place_on_covered_vp(mount_t mp, vnode_t vp);
199 static int mount_begin_update(mount_t mp, vfs_context_t ctx, int flags);
200 static void mount_end_update(mount_t mp);
201 static int relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp, const char *fsname, vfs_context_t ctx, boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index);
202 #endif /* CONFIG_IMGSRC_ACCESS */
203
204 int (*union_dircheckp)(struct vnode **, struct fileproc *, vfs_context_t);
205
206 __private_extern__
207 int sync_internal(void);
208
209 __private_extern__
210 int unlink1(vfs_context_t, vnode_t, user_addr_t, enum uio_seg, int);
211
212 extern lck_grp_t *fd_vn_lck_grp;
213 extern lck_grp_attr_t *fd_vn_lck_grp_attr;
214 extern lck_attr_t *fd_vn_lck_attr;
215
216 /*
217 * incremented each time a mount or unmount operation occurs
218 * used to invalidate the cached value of the rootvp in the
219 * mount structure utilized by cache_lookup_path
220 */
221 uint32_t mount_generation = 0;
222
223 /* counts number of mount and unmount operations */
224 unsigned int vfs_nummntops=0;
225
226 extern const struct fileops vnops;
227 #if CONFIG_APPLEDOUBLE
228 extern errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *);
229 #endif /* CONFIG_APPLEDOUBLE */
230
231 /*
232 * Virtual File System System Calls
233 */
234
235 #if NFSCLIENT || DEVFS || ROUTEFS
236 /*
237 * Private in-kernel mounting spi (NFS only, not exported)
238 */
239 __private_extern__
240 boolean_t
241 vfs_iskernelmount(mount_t mp)
242 {
243 return ((mp->mnt_kern_flag & MNTK_KERNEL_MOUNT) ? TRUE : FALSE);
244 }
245
246 __private_extern__
247 int
248 kernel_mount(char *fstype, vnode_t pvp, vnode_t vp, const char *path,
249 void *data, __unused size_t datalen, int syscall_flags, __unused uint32_t kern_flags, vfs_context_t ctx)
250 {
251 struct nameidata nd;
252 boolean_t did_namei;
253 int error;
254
255 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
256 UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx);
257
258 /*
259 * Get the vnode to be covered if it's not supplied
260 */
261 if (vp == NULLVP) {
262 error = namei(&nd);
263 if (error)
264 return (error);
265 vp = nd.ni_vp;
266 pvp = nd.ni_dvp;
267 did_namei = TRUE;
268 } else {
269 char *pnbuf = CAST_DOWN(char *, path);
270
271 nd.ni_cnd.cn_pnbuf = pnbuf;
272 nd.ni_cnd.cn_pnlen = strlen(pnbuf) + 1;
273 did_namei = FALSE;
274 }
275
276 error = mount_common(fstype, pvp, vp, &nd.ni_cnd, CAST_USER_ADDR_T(data),
277 syscall_flags, kern_flags, NULL, TRUE, ctx);
278
279 if (did_namei) {
280 vnode_put(vp);
281 vnode_put(pvp);
282 nameidone(&nd);
283 }
284
285 return (error);
286 }
287 #endif /* NFSCLIENT || DEVFS */
288
289 /*
290 * Mount a file system.
291 */
292 /* ARGSUSED */
293 int
294 mount(proc_t p, struct mount_args *uap, __unused int32_t *retval)
295 {
296 struct __mac_mount_args muap;
297
298 muap.type = uap->type;
299 muap.path = uap->path;
300 muap.flags = uap->flags;
301 muap.data = uap->data;
302 muap.mac_p = USER_ADDR_NULL;
303 return (__mac_mount(p, &muap, retval));
304 }
305
306 void
307 vfs_notify_mount(vnode_t pdvp)
308 {
309 vfs_event_signal(NULL, VQ_MOUNT, (intptr_t)NULL);
310 lock_vnode_and_post(pdvp, NOTE_WRITE);
311 }
312
313 /*
314 * __mac_mount:
315 * Mount a file system taking into account MAC label behavior.
316 * See mount(2) man page for more information
317 *
318 * Parameters: p Process requesting the mount
319 * uap User argument descriptor (see below)
320 * retval (ignored)
321 *
322 * Indirect: uap->type Filesystem type
323 * uap->path Path to mount
324 * uap->data Mount arguments
325 * uap->mac_p MAC info
326 * uap->flags Mount flags
327 *
328 *
329 * Returns: 0 Success
330 * !0 Not success
331 */
332 boolean_t root_fs_upgrade_try = FALSE;
333
334 int
335 __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int32_t *retval)
336 {
337 vnode_t pvp = NULL;
338 vnode_t vp = NULL;
339 int need_nameidone = 0;
340 vfs_context_t ctx = vfs_context_current();
341 char fstypename[MFSNAMELEN];
342 struct nameidata nd;
343 size_t dummy=0;
344 char *labelstr = NULL;
345 int flags = uap->flags;
346 int error;
347 #if CONFIG_IMGSRC_ACCESS || CONFIG_MACF
348 boolean_t is_64bit = IS_64BIT_PROCESS(p);
349 #else
350 #pragma unused(p)
351 #endif
352 /*
353 * Get the fs type name from user space
354 */
355 error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy);
356 if (error)
357 return (error);
358
359 /*
360 * Get the vnode to be covered
361 */
362 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
363 UIO_USERSPACE, uap->path, ctx);
364 error = namei(&nd);
365 if (error) {
366 goto out;
367 }
368 need_nameidone = 1;
369 vp = nd.ni_vp;
370 pvp = nd.ni_dvp;
371
372 #ifdef CONFIG_IMGSRC_ACCESS
373 /* Mounting image source cannot be batched with other operations */
374 if (flags == MNT_IMGSRC_BY_INDEX) {
375 error = relocate_imageboot_source(pvp, vp, &nd.ni_cnd, fstypename,
376 ctx, is_64bit, uap->data, (flags == MNT_IMGSRC_BY_INDEX));
377 goto out;
378 }
379 #endif /* CONFIG_IMGSRC_ACCESS */
380
381 #if CONFIG_MACF
382 /*
383 * Get the label string (if any) from user space
384 */
385 if (uap->mac_p != USER_ADDR_NULL) {
386 struct user_mac mac;
387 size_t ulen = 0;
388
389 if (is_64bit) {
390 struct user64_mac mac64;
391 error = copyin(uap->mac_p, &mac64, sizeof(mac64));
392 mac.m_buflen = mac64.m_buflen;
393 mac.m_string = mac64.m_string;
394 } else {
395 struct user32_mac mac32;
396 error = copyin(uap->mac_p, &mac32, sizeof(mac32));
397 mac.m_buflen = mac32.m_buflen;
398 mac.m_string = mac32.m_string;
399 }
400 if (error)
401 goto out;
402 if ((mac.m_buflen > MAC_MAX_LABEL_BUF_LEN) ||
403 (mac.m_buflen < 2)) {
404 error = EINVAL;
405 goto out;
406 }
407 MALLOC(labelstr, char *, mac.m_buflen, M_MACTEMP, M_WAITOK);
408 error = copyinstr(mac.m_string, labelstr, mac.m_buflen, &ulen);
409 if (error) {
410 goto out;
411 }
412 AUDIT_ARG(mac_string, labelstr);
413 }
414 #endif /* CONFIG_MACF */
415
416 AUDIT_ARG(fflags, flags);
417
418 #if SECURE_KERNEL
419 if (flags & MNT_UNION) {
420 /* No union mounts on release kernels */
421 error = EPERM;
422 goto out;
423 }
424 #endif
425
426 if ((vp->v_flag & VROOT) &&
427 (vp->v_mount->mnt_flag & MNT_ROOTFS)) {
428 if (!(flags & MNT_UNION)) {
429 flags |= MNT_UPDATE;
430 }
431 else {
432 /*
433 * For a union mount on '/', treat it as fresh
434 * mount instead of update.
435 * Otherwise, union mouting on '/' used to panic the
436 * system before, since mnt_vnodecovered was found to
437 * be NULL for '/' which is required for unionlookup
438 * after it gets ENOENT on union mount.
439 */
440 flags = (flags & ~(MNT_UPDATE));
441 }
442
443 #if SECURE_KERNEL
444 if ((flags & MNT_RDONLY) == 0) {
445 /* Release kernels are not allowed to mount "/" as rw */
446 error = EPERM;
447 goto out;
448 }
449 #endif
450 /*
451 * See 7392553 for more details on why this check exists.
452 * Suffice to say: If this check is ON and something tries
453 * to mount the rootFS RW, we'll turn off the codesign
454 * bitmap optimization.
455 */
456 #if CHECK_CS_VALIDATION_BITMAP
457 if ((flags & MNT_RDONLY) == 0 ) {
458 root_fs_upgrade_try = TRUE;
459 }
460 #endif
461 }
462
463 error = mount_common(fstypename, pvp, vp, &nd.ni_cnd, uap->data, flags, 0,
464 labelstr, FALSE, ctx);
465
466 out:
467
468 #if CONFIG_MACF
469 if (labelstr)
470 FREE(labelstr, M_MACTEMP);
471 #endif /* CONFIG_MACF */
472
473 if (vp) {
474 vnode_put(vp);
475 }
476 if (pvp) {
477 vnode_put(pvp);
478 }
479 if (need_nameidone) {
480 nameidone(&nd);
481 }
482
483 return (error);
484 }
485
486 /*
487 * common mount implementation (final stage of mounting)
488
489 * Arguments:
490 * fstypename file system type (ie it's vfs name)
491 * pvp parent of covered vnode
492 * vp covered vnode
493 * cnp component name (ie path) of covered vnode
494 * flags generic mount flags
495 * fsmountargs file system specific data
496 * labelstr optional MAC label
497 * kernelmount TRUE for mounts initiated from inside the kernel
498 * ctx caller's context
499 */
500 static int
501 mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
502 struct componentname *cnp, user_addr_t fsmountargs, int flags, uint32_t internal_flags,
503 char *labelstr, boolean_t kernelmount, vfs_context_t ctx)
504 {
505 #if !CONFIG_MACF
506 #pragma unused(labelstr)
507 #endif
508 struct vnode *devvp = NULLVP;
509 struct vnode *device_vnode = NULLVP;
510 #if CONFIG_MACF
511 struct vnode *rvp;
512 #endif
513 struct mount *mp;
514 struct vfstable *vfsp = (struct vfstable *)0;
515 struct proc *p = vfs_context_proc(ctx);
516 int error, flag = 0;
517 user_addr_t devpath = USER_ADDR_NULL;
518 int ronly = 0;
519 int mntalloc = 0;
520 boolean_t vfsp_ref = FALSE;
521 boolean_t is_rwlock_locked = FALSE;
522 boolean_t did_rele = FALSE;
523 boolean_t have_usecount = FALSE;
524
525 /*
526 * Process an update for an existing mount
527 */
528 if (flags & MNT_UPDATE) {
529 if ((vp->v_flag & VROOT) == 0) {
530 error = EINVAL;
531 goto out1;
532 }
533 mp = vp->v_mount;
534
535 /* unmount in progress return error */
536 mount_lock_spin(mp);
537 if (mp->mnt_lflag & MNT_LUNMOUNT) {
538 mount_unlock(mp);
539 error = EBUSY;
540 goto out1;
541 }
542 mount_unlock(mp);
543 lck_rw_lock_exclusive(&mp->mnt_rwlock);
544 is_rwlock_locked = TRUE;
545 /*
546 * We only allow the filesystem to be reloaded if it
547 * is currently mounted read-only.
548 */
549 if ((flags & MNT_RELOAD) &&
550 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
551 error = ENOTSUP;
552 goto out1;
553 }
554
555 /*
556 * If content protection is enabled, update mounts are not
557 * allowed to turn it off.
558 */
559 if ((mp->mnt_flag & MNT_CPROTECT) &&
560 ((flags & MNT_CPROTECT) == 0)) {
561 error = EINVAL;
562 goto out1;
563 }
564
565 #ifdef CONFIG_IMGSRC_ACCESS
566 /* Can't downgrade the backer of the root FS */
567 if ((mp->mnt_kern_flag & MNTK_BACKS_ROOT) &&
568 (!vfs_isrdonly(mp)) && (flags & MNT_RDONLY)) {
569 error = ENOTSUP;
570 goto out1;
571 }
572 #endif /* CONFIG_IMGSRC_ACCESS */
573
574 /*
575 * Only root, or the user that did the original mount is
576 * permitted to update it.
577 */
578 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
579 (error = suser(vfs_context_ucred(ctx), &p->p_acflag))) {
580 goto out1;
581 }
582 #if CONFIG_MACF
583 error = mac_mount_check_remount(ctx, mp);
584 if (error != 0) {
585 goto out1;
586 }
587 #endif
588 /*
589 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
590 * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
591 */
592 if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
593 flags |= MNT_NOSUID | MNT_NODEV;
594 if (mp->mnt_flag & MNT_NOEXEC)
595 flags |= MNT_NOEXEC;
596 }
597 flag = mp->mnt_flag;
598
599
600
601 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
602
603 vfsp = mp->mnt_vtable;
604 goto update;
605 }
606 /*
607 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
608 * MNT_NOEXEC if mount point is already MNT_NOEXEC.
609 */
610 if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
611 flags |= MNT_NOSUID | MNT_NODEV;
612 if (vp->v_mount->mnt_flag & MNT_NOEXEC)
613 flags |= MNT_NOEXEC;
614 }
615
616 /* XXXAUDIT: Should we capture the type on the error path as well? */
617 AUDIT_ARG(text, fstypename);
618 mount_list_lock();
619 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
620 if (!strncmp(vfsp->vfc_name, fstypename, MFSNAMELEN)) {
621 vfsp->vfc_refcount++;
622 vfsp_ref = TRUE;
623 break;
624 }
625 mount_list_unlock();
626 if (vfsp == NULL) {
627 error = ENODEV;
628 goto out1;
629 }
630
631 /*
632 * VFC_VFSLOCALARGS is not currently supported for kernel mounts
633 */
634 if (kernelmount && (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS)) {
635 error = EINVAL; /* unsupported request */
636 goto out1;
637 }
638
639 error = prepare_coveredvp(vp, ctx, cnp, fstypename, ((internal_flags & KERNEL_MOUNT_NOAUTH) != 0));
640 if (error != 0) {
641 goto out1;
642 }
643
644 /*
645 * Allocate and initialize the filesystem (mount_t)
646 */
647 MALLOC_ZONE(mp, struct mount *, (u_int32_t)sizeof(struct mount),
648 M_MOUNT, M_WAITOK);
649 bzero((char *)mp, (u_int32_t)sizeof(struct mount));
650 mntalloc = 1;
651
652 /* Initialize the default IO constraints */
653 mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
654 mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
655 mp->mnt_maxsegreadsize = mp->mnt_maxreadcnt;
656 mp->mnt_maxsegwritesize = mp->mnt_maxwritecnt;
657 mp->mnt_devblocksize = DEV_BSIZE;
658 mp->mnt_alignmentmask = PAGE_MASK;
659 mp->mnt_ioqueue_depth = MNT_DEFAULT_IOQUEUE_DEPTH;
660 mp->mnt_ioscale = 1;
661 mp->mnt_ioflags = 0;
662 mp->mnt_realrootvp = NULLVP;
663 mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL;
664
665 TAILQ_INIT(&mp->mnt_vnodelist);
666 TAILQ_INIT(&mp->mnt_workerqueue);
667 TAILQ_INIT(&mp->mnt_newvnodes);
668 mount_lock_init(mp);
669 lck_rw_lock_exclusive(&mp->mnt_rwlock);
670 is_rwlock_locked = TRUE;
671 mp->mnt_op = vfsp->vfc_vfsops;
672 mp->mnt_vtable = vfsp;
673 //mp->mnt_stat.f_type = vfsp->vfc_typenum;
674 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
675 strlcpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN);
676 strlcpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
677 mp->mnt_vnodecovered = vp;
678 mp->mnt_vfsstat.f_owner = kauth_cred_getuid(vfs_context_ucred(ctx));
679 mp->mnt_throttle_mask = LOWPRI_MAX_NUM_DEV - 1;
680 mp->mnt_devbsdunit = 0;
681
682 /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
683 vfs_setowner(mp, KAUTH_UID_NONE, KAUTH_GID_NONE);
684
685 #if NFSCLIENT || DEVFS || ROUTEFS
686 if (kernelmount)
687 mp->mnt_kern_flag |= MNTK_KERNEL_MOUNT;
688 if ((internal_flags & KERNEL_MOUNT_PERMIT_UNMOUNT) != 0)
689 mp->mnt_kern_flag |= MNTK_PERMIT_UNMOUNT;
690 #endif /* NFSCLIENT || DEVFS */
691
692 update:
693 /*
694 * Set the mount level flags.
695 */
696 if (flags & MNT_RDONLY)
697 mp->mnt_flag |= MNT_RDONLY;
698 else if (mp->mnt_flag & MNT_RDONLY) {
699 // disallow read/write upgrades of file systems that
700 // had the TYPENAME_OVERRIDE feature set.
701 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
702 error = EPERM;
703 goto out1;
704 }
705 mp->mnt_kern_flag |= MNTK_WANTRDWR;
706 }
707 mp->mnt_flag &= ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
708 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
709 MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
710 MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
711 MNT_QUARANTINE | MNT_CPROTECT);
712
713 #if SECURE_KERNEL
714 #if !CONFIG_MNT_SUID
715 /*
716 * On release builds of iOS based platforms, always enforce NOSUID and NODEV on
717 * all mounts. We do this here because we can catch update mounts as well as
718 * non-update mounts in this case.
719 */
720 mp->mnt_flag |= (MNT_NOSUID);
721 #endif
722 #endif
723
724 mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
725 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
726 MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
727 MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
728 MNT_QUARANTINE | MNT_CPROTECT);
729
730 #if CONFIG_MACF
731 if (flags & MNT_MULTILABEL) {
732 if (vfsp->vfc_vfsflags & VFC_VFSNOMACLABEL) {
733 error = EINVAL;
734 goto out1;
735 }
736 mp->mnt_flag |= MNT_MULTILABEL;
737 }
738 #endif
739 /*
740 * Process device path for local file systems if requested
741 */
742 if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS &&
743 !(internal_flags & KERNEL_MOUNT_SNAPSHOT)) {
744 if (vfs_context_is64bit(ctx)) {
745 if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
746 goto out1;
747 fsmountargs += sizeof(devpath);
748 } else {
749 user32_addr_t tmp;
750 if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
751 goto out1;
752 /* munge into LP64 addr */
753 devpath = CAST_USER_ADDR_T(tmp);
754 fsmountargs += sizeof(tmp);
755 }
756
757 /* Lookup device and authorize access to it */
758 if ((devpath)) {
759 struct nameidata nd;
760
761 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW, UIO_USERSPACE, devpath, ctx);
762 if ( (error = namei(&nd)) )
763 goto out1;
764
765 strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
766 devvp = nd.ni_vp;
767
768 nameidone(&nd);
769
770 if (devvp->v_type != VBLK) {
771 error = ENOTBLK;
772 goto out2;
773 }
774 if (major(devvp->v_rdev) >= nblkdev) {
775 error = ENXIO;
776 goto out2;
777 }
778 /*
779 * If mount by non-root, then verify that user has necessary
780 * permissions on the device.
781 */
782 if (suser(vfs_context_ucred(ctx), NULL) != 0) {
783 mode_t accessmode = KAUTH_VNODE_READ_DATA;
784
785 if ((mp->mnt_flag & MNT_RDONLY) == 0)
786 accessmode |= KAUTH_VNODE_WRITE_DATA;
787 if ((error = vnode_authorize(devvp, NULL, accessmode, ctx)) != 0)
788 goto out2;
789 }
790 }
791 /* On first mount, preflight and open device */
792 if (devpath && ((flags & MNT_UPDATE) == 0)) {
793 if ( (error = vnode_ref(devvp)) )
794 goto out2;
795 /*
796 * Disallow multiple mounts of the same device.
797 * Disallow mounting of a device that is currently in use
798 * (except for root, which might share swap device for miniroot).
799 * Flush out any old buffers remaining from a previous use.
800 */
801 if ( (error = vfs_mountedon(devvp)) )
802 goto out3;
803
804 if (vcount(devvp) > 1 && !(vfs_flags(mp) & MNT_ROOTFS)) {
805 error = EBUSY;
806 goto out3;
807 }
808 if ( (error = VNOP_FSYNC(devvp, MNT_WAIT, ctx)) ) {
809 error = ENOTBLK;
810 goto out3;
811 }
812 if ( (error = buf_invalidateblks(devvp, BUF_WRITE_DATA, 0, 0)) )
813 goto out3;
814
815 ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
816 #if CONFIG_MACF
817 error = mac_vnode_check_open(ctx,
818 devvp,
819 ronly ? FREAD : FREAD|FWRITE);
820 if (error)
821 goto out3;
822 #endif /* MAC */
823 if ( (error = VNOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, ctx)) )
824 goto out3;
825
826 mp->mnt_devvp = devvp;
827 device_vnode = devvp;
828
829 } else if ((mp->mnt_flag & MNT_RDONLY) &&
830 (mp->mnt_kern_flag & MNTK_WANTRDWR) &&
831 (device_vnode = mp->mnt_devvp)) {
832 dev_t dev;
833 int maj;
834 /*
835 * If upgrade to read-write by non-root, then verify
836 * that user has necessary permissions on the device.
837 */
838 vnode_getalways(device_vnode);
839
840 if (suser(vfs_context_ucred(ctx), NULL) &&
841 (error = vnode_authorize(device_vnode, NULL,
842 KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA,
843 ctx)) != 0) {
844 vnode_put(device_vnode);
845 goto out2;
846 }
847
848 /* Tell the device that we're upgrading */
849 dev = (dev_t)device_vnode->v_rdev;
850 maj = major(dev);
851
852 if ((u_int)maj >= (u_int)nblkdev)
853 panic("Volume mounted on a device with invalid major number.");
854
855 error = bdevsw[maj].d_open(dev, FREAD | FWRITE, S_IFBLK, p);
856 vnode_put(device_vnode);
857 device_vnode = NULLVP;
858 if (error != 0) {
859 goto out2;
860 }
861 }
862 }
863 #if CONFIG_MACF
864 if ((flags & MNT_UPDATE) == 0) {
865 mac_mount_label_init(mp);
866 mac_mount_label_associate(ctx, mp);
867 }
868 if (labelstr) {
869 if ((flags & MNT_UPDATE) != 0) {
870 error = mac_mount_check_label_update(ctx, mp);
871 if (error != 0)
872 goto out3;
873 }
874 }
875 #endif
876 /*
877 * Mount the filesystem.
878 */
879 if (internal_flags & KERNEL_MOUNT_SNAPSHOT) {
880 error = VFS_IOCTL(mp, VFSIOC_MOUNT_SNAPSHOT,
881 (caddr_t)fsmountargs, 0, ctx);
882 } else {
883 error = VFS_MOUNT(mp, device_vnode, fsmountargs, ctx);
884 }
885
886 if (flags & MNT_UPDATE) {
887 if (mp->mnt_kern_flag & MNTK_WANTRDWR)
888 mp->mnt_flag &= ~MNT_RDONLY;
889 mp->mnt_flag &=~
890 (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
891 mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
892 if (error)
893 mp->mnt_flag = flag; /* restore flag value */
894 vfs_event_signal(NULL, VQ_UPDATE, (intptr_t)NULL);
895 lck_rw_done(&mp->mnt_rwlock);
896 is_rwlock_locked = FALSE;
897 if (!error)
898 enablequotas(mp, ctx);
899 goto exit;
900 }
901
902 /*
903 * Put the new filesystem on the mount list after root.
904 */
905 if (error == 0) {
906 struct vfs_attr vfsattr;
907 #if CONFIG_MACF
908 if (vfs_flags(mp) & MNT_MULTILABEL) {
909 error = VFS_ROOT(mp, &rvp, ctx);
910 if (error) {
911 printf("%s() VFS_ROOT returned %d\n", __func__, error);
912 goto out3;
913 }
914 error = vnode_label(mp, NULL, rvp, NULL, 0, ctx);
915 /*
916 * drop reference provided by VFS_ROOT
917 */
918 vnode_put(rvp);
919
920 if (error)
921 goto out3;
922 }
923 #endif /* MAC */
924
925 vnode_lock_spin(vp);
926 CLR(vp->v_flag, VMOUNT);
927 vp->v_mountedhere = mp;
928 vnode_unlock(vp);
929
930 /*
931 * taking the name_cache_lock exclusively will
932 * insure that everyone is out of the fast path who
933 * might be trying to use a now stale copy of
934 * vp->v_mountedhere->mnt_realrootvp
935 * bumping mount_generation causes the cached values
936 * to be invalidated
937 */
938 name_cache_lock();
939 mount_generation++;
940 name_cache_unlock();
941
942 error = vnode_ref(vp);
943 if (error != 0) {
944 goto out4;
945 }
946
947 have_usecount = TRUE;
948
949 error = checkdirs(vp, ctx);
950 if (error != 0) {
951 /* Unmount the filesystem as cdir/rdirs cannot be updated */
952 goto out4;
953 }
954 /*
955 * there is no cleanup code here so I have made it void
956 * we need to revisit this
957 */
958 (void)VFS_START(mp, 0, ctx);
959
960 if (mount_list_add(mp) != 0) {
961 /*
962 * The system is shutting down trying to umount
963 * everything, so fail with a plausible errno.
964 */
965 error = EBUSY;
966 goto out4;
967 }
968 lck_rw_done(&mp->mnt_rwlock);
969 is_rwlock_locked = FALSE;
970
971 /* Check if this mounted file system supports EAs or named streams. */
972 /* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */
973 VFSATTR_INIT(&vfsattr);
974 VFSATTR_WANTED(&vfsattr, f_capabilities);
975 if (strncmp(mp->mnt_vfsstat.f_fstypename, "webdav", sizeof("webdav")) != 0 &&
976 vfs_getattr(mp, &vfsattr, ctx) == 0 &&
977 VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) {
978 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR) &&
979 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR)) {
980 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
981 }
982 #if NAMEDSTREAMS
983 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS) &&
984 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS)) {
985 mp->mnt_kern_flag |= MNTK_NAMED_STREAMS;
986 }
987 #endif
988 /* Check if this file system supports path from id lookups. */
989 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID) &&
990 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID)) {
991 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
992 } else if (mp->mnt_flag & MNT_DOVOLFS) {
993 /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */
994 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
995 }
996
997 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_DIR_HARDLINKS) &&
998 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_DIR_HARDLINKS)) {
999 mp->mnt_kern_flag |= MNTK_DIR_HARDLINKS;
1000 }
1001 }
1002 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSNATIVEXATTR) {
1003 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
1004 }
1005 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSPREFLIGHT) {
1006 mp->mnt_kern_flag |= MNTK_UNMOUNT_PREFLIGHT;
1007 }
1008 /* increment the operations count */
1009 OSAddAtomic(1, &vfs_nummntops);
1010 enablequotas(mp, ctx);
1011
1012 if (device_vnode) {
1013 device_vnode->v_specflags |= SI_MOUNTEDON;
1014
1015 /*
1016 * cache the IO attributes for the underlying physical media...
1017 * an error return indicates the underlying driver doesn't
1018 * support all the queries necessary... however, reasonable
1019 * defaults will have been set, so no reason to bail or care
1020 */
1021 vfs_init_io_attributes(device_vnode, mp);
1022 }
1023
1024 /* Now that mount is setup, notify the listeners */
1025 vfs_notify_mount(pvp);
1026 IOBSDMountChange(mp, kIOMountChangeMount);
1027
1028 } else {
1029 /* If we fail a fresh mount, there should be no vnodes left hooked into the mountpoint. */
1030 if (mp->mnt_vnodelist.tqh_first != NULL) {
1031 panic("mount_common(): mount of %s filesystem failed with %d, but vnode list is not empty.",
1032 mp->mnt_vtable->vfc_name, error);
1033 }
1034
1035 vnode_lock_spin(vp);
1036 CLR(vp->v_flag, VMOUNT);
1037 vnode_unlock(vp);
1038 mount_list_lock();
1039 mp->mnt_vtable->vfc_refcount--;
1040 mount_list_unlock();
1041
1042 if (device_vnode ) {
1043 vnode_rele(device_vnode);
1044 VNOP_CLOSE(device_vnode, ronly ? FREAD : FREAD|FWRITE, ctx);
1045 }
1046 lck_rw_done(&mp->mnt_rwlock);
1047 is_rwlock_locked = FALSE;
1048
1049 /*
1050 * if we get here, we have a mount structure that needs to be freed,
1051 * but since the coveredvp hasn't yet been updated to point at it,
1052 * no need to worry about other threads holding a crossref on this mp
1053 * so it's ok to just free it
1054 */
1055 mount_lock_destroy(mp);
1056 #if CONFIG_MACF
1057 mac_mount_label_destroy(mp);
1058 #endif
1059 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1060 }
1061 exit:
1062 /*
1063 * drop I/O count on the device vp if there was one
1064 */
1065 if (devpath && devvp)
1066 vnode_put(devvp);
1067
1068 return(error);
1069
1070 /* Error condition exits */
1071 out4:
1072 (void)VFS_UNMOUNT(mp, MNT_FORCE, ctx);
1073
1074 /*
1075 * If the mount has been placed on the covered vp,
1076 * it may have been discovered by now, so we have
1077 * to treat this just like an unmount
1078 */
1079 mount_lock_spin(mp);
1080 mp->mnt_lflag |= MNT_LDEAD;
1081 mount_unlock(mp);
1082
1083 if (device_vnode != NULLVP) {
1084 vnode_rele(device_vnode);
1085 VNOP_CLOSE(device_vnode, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
1086 ctx);
1087 did_rele = TRUE;
1088 }
1089
1090 vnode_lock_spin(vp);
1091
1092 mp->mnt_crossref++;
1093 vp->v_mountedhere = (mount_t) 0;
1094
1095 vnode_unlock(vp);
1096
1097 if (have_usecount) {
1098 vnode_rele(vp);
1099 }
1100 out3:
1101 if (devpath && ((flags & MNT_UPDATE) == 0) && (!did_rele))
1102 vnode_rele(devvp);
1103 out2:
1104 if (devpath && devvp)
1105 vnode_put(devvp);
1106 out1:
1107 /* Release mnt_rwlock only when it was taken */
1108 if (is_rwlock_locked == TRUE) {
1109 lck_rw_done(&mp->mnt_rwlock);
1110 }
1111
1112 if (mntalloc) {
1113 if (mp->mnt_crossref)
1114 mount_dropcrossref(mp, vp, 0);
1115 else {
1116 mount_lock_destroy(mp);
1117 #if CONFIG_MACF
1118 mac_mount_label_destroy(mp);
1119 #endif
1120 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1121 }
1122 }
1123 if (vfsp_ref) {
1124 mount_list_lock();
1125 vfsp->vfc_refcount--;
1126 mount_list_unlock();
1127 }
1128
1129 return(error);
1130 }
1131
1132 /*
1133 * Flush in-core data, check for competing mount attempts,
1134 * and set VMOUNT
1135 */
1136 int
1137 prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth)
1138 {
1139 #if !CONFIG_MACF
1140 #pragma unused(cnp,fsname)
1141 #endif
1142 struct vnode_attr va;
1143 int error;
1144
1145 if (!skip_auth) {
1146 /*
1147 * If the user is not root, ensure that they own the directory
1148 * onto which we are attempting to mount.
1149 */
1150 VATTR_INIT(&va);
1151 VATTR_WANTED(&va, va_uid);
1152 if ((error = vnode_getattr(vp, &va, ctx)) ||
1153 (va.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
1154 (!vfs_context_issuser(ctx)))) {
1155 error = EPERM;
1156 goto out;
1157 }
1158 }
1159
1160 if ( (error = VNOP_FSYNC(vp, MNT_WAIT, ctx)) )
1161 goto out;
1162
1163 if ( (error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0)) )
1164 goto out;
1165
1166 if (vp->v_type != VDIR) {
1167 error = ENOTDIR;
1168 goto out;
1169 }
1170
1171 if (ISSET(vp->v_flag, VMOUNT) && (vp->v_mountedhere != NULL)) {
1172 error = EBUSY;
1173 goto out;
1174 }
1175
1176 #if CONFIG_MACF
1177 error = mac_mount_check_mount(ctx, vp,
1178 cnp, fsname);
1179 if (error != 0)
1180 goto out;
1181 #endif
1182
1183 vnode_lock_spin(vp);
1184 SET(vp->v_flag, VMOUNT);
1185 vnode_unlock(vp);
1186
1187 out:
1188 return error;
1189 }
1190
1191 #if CONFIG_IMGSRC_ACCESS
1192
1193 #if DEBUG
1194 #define IMGSRC_DEBUG(args...) printf(args)
1195 #else
1196 #define IMGSRC_DEBUG(args...) do { } while(0)
1197 #endif
1198
1199 static int
1200 authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx)
1201 {
1202 struct nameidata nd;
1203 vnode_t vp, realdevvp;
1204 mode_t accessmode;
1205 int error;
1206
1207 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW, UIO_USERSPACE, devpath, ctx);
1208 if ( (error = namei(&nd)) ) {
1209 IMGSRC_DEBUG("namei() failed with %d\n", error);
1210 return error;
1211 }
1212
1213 vp = nd.ni_vp;
1214
1215 if (!vnode_isblk(vp)) {
1216 IMGSRC_DEBUG("Not block device.\n");
1217 error = ENOTBLK;
1218 goto out;
1219 }
1220
1221 realdevvp = mp->mnt_devvp;
1222 if (realdevvp == NULLVP) {
1223 IMGSRC_DEBUG("No device backs the mount.\n");
1224 error = ENXIO;
1225 goto out;
1226 }
1227
1228 error = vnode_getwithref(realdevvp);
1229 if (error != 0) {
1230 IMGSRC_DEBUG("Coudn't get iocount on device.\n");
1231 goto out;
1232 }
1233
1234 if (vnode_specrdev(vp) != vnode_specrdev(realdevvp)) {
1235 IMGSRC_DEBUG("Wrong dev_t.\n");
1236 error = ENXIO;
1237 goto out1;
1238 }
1239
1240 strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
1241
1242 /*
1243 * If mount by non-root, then verify that user has necessary
1244 * permissions on the device.
1245 */
1246 if (!vfs_context_issuser(ctx)) {
1247 accessmode = KAUTH_VNODE_READ_DATA;
1248 if ((mp->mnt_flag & MNT_RDONLY) == 0)
1249 accessmode |= KAUTH_VNODE_WRITE_DATA;
1250 if ((error = vnode_authorize(vp, NULL, accessmode, ctx)) != 0) {
1251 IMGSRC_DEBUG("Access denied.\n");
1252 goto out1;
1253 }
1254 }
1255
1256 *devvpp = vp;
1257
1258 out1:
1259 vnode_put(realdevvp);
1260 out:
1261 nameidone(&nd);
1262 if (error) {
1263 vnode_put(vp);
1264 }
1265
1266 return error;
1267 }
1268
1269 /*
1270 * Clear VMOUNT, set v_mountedhere, and mnt_vnodecovered, ref the vnode,
1271 * and call checkdirs()
1272 */
1273 static int
1274 place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx)
1275 {
1276 int error;
1277
1278 mp->mnt_vnodecovered = vp; /* XXX This is normally only set at init-time ... */
1279
1280 vnode_lock_spin(vp);
1281 CLR(vp->v_flag, VMOUNT);
1282 vp->v_mountedhere = mp;
1283 vnode_unlock(vp);
1284
1285 /*
1286 * taking the name_cache_lock exclusively will
1287 * insure that everyone is out of the fast path who
1288 * might be trying to use a now stale copy of
1289 * vp->v_mountedhere->mnt_realrootvp
1290 * bumping mount_generation causes the cached values
1291 * to be invalidated
1292 */
1293 name_cache_lock();
1294 mount_generation++;
1295 name_cache_unlock();
1296
1297 error = vnode_ref(vp);
1298 if (error != 0) {
1299 goto out;
1300 }
1301
1302 error = checkdirs(vp, ctx);
1303 if (error != 0) {
1304 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1305 vnode_rele(vp);
1306 goto out;
1307 }
1308
1309 out:
1310 if (error != 0) {
1311 mp->mnt_vnodecovered = NULLVP;
1312 }
1313 return error;
1314 }
1315
1316 static void
1317 undo_place_on_covered_vp(mount_t mp, vnode_t vp)
1318 {
1319 vnode_rele(vp);
1320 vnode_lock_spin(vp);
1321 vp->v_mountedhere = (mount_t)NULL;
1322 vnode_unlock(vp);
1323
1324 mp->mnt_vnodecovered = NULLVP;
1325 }
1326
1327 static int
1328 mount_begin_update(mount_t mp, vfs_context_t ctx, int flags)
1329 {
1330 int error;
1331
1332 /* unmount in progress return error */
1333 mount_lock_spin(mp);
1334 if (mp->mnt_lflag & MNT_LUNMOUNT) {
1335 mount_unlock(mp);
1336 return EBUSY;
1337 }
1338 mount_unlock(mp);
1339 lck_rw_lock_exclusive(&mp->mnt_rwlock);
1340
1341 /*
1342 * We only allow the filesystem to be reloaded if it
1343 * is currently mounted read-only.
1344 */
1345 if ((flags & MNT_RELOAD) &&
1346 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
1347 error = ENOTSUP;
1348 goto out;
1349 }
1350
1351 /*
1352 * Only root, or the user that did the original mount is
1353 * permitted to update it.
1354 */
1355 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
1356 (!vfs_context_issuser(ctx))) {
1357 error = EPERM;
1358 goto out;
1359 }
1360 #if CONFIG_MACF
1361 error = mac_mount_check_remount(ctx, mp);
1362 if (error != 0) {
1363 goto out;
1364 }
1365 #endif
1366
1367 out:
1368 if (error) {
1369 lck_rw_done(&mp->mnt_rwlock);
1370 }
1371
1372 return error;
1373 }
1374
1375 static void
1376 mount_end_update(mount_t mp)
1377 {
1378 lck_rw_done(&mp->mnt_rwlock);
1379 }
1380
1381 static int
1382 get_imgsrc_rootvnode(uint32_t height, vnode_t *rvpp)
1383 {
1384 vnode_t vp;
1385
1386 if (height >= MAX_IMAGEBOOT_NESTING) {
1387 return EINVAL;
1388 }
1389
1390 vp = imgsrc_rootvnodes[height];
1391 if ((vp != NULLVP) && (vnode_get(vp) == 0)) {
1392 *rvpp = vp;
1393 return 0;
1394 } else {
1395 return ENOENT;
1396 }
1397 }
1398
1399 static int
1400 relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp,
1401 const char *fsname, vfs_context_t ctx,
1402 boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index)
1403 {
1404 int error;
1405 mount_t mp;
1406 boolean_t placed = FALSE;
1407 vnode_t devvp = NULLVP;
1408 struct vfstable *vfsp;
1409 user_addr_t devpath;
1410 char *old_mntonname;
1411 vnode_t rvp;
1412 uint32_t height;
1413 uint32_t flags;
1414
1415 /* If we didn't imageboot, nothing to move */
1416 if (imgsrc_rootvnodes[0] == NULLVP) {
1417 return EINVAL;
1418 }
1419
1420 /* Only root can do this */
1421 if (!vfs_context_issuser(ctx)) {
1422 return EPERM;
1423 }
1424
1425 IMGSRC_DEBUG("looking for root vnode.\n");
1426
1427 /*
1428 * Get root vnode of filesystem we're moving.
1429 */
1430 if (by_index) {
1431 if (is64bit) {
1432 struct user64_mnt_imgsrc_args mia64;
1433 error = copyin(fsmountargs, &mia64, sizeof(mia64));
1434 if (error != 0) {
1435 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1436 return error;
1437 }
1438
1439 height = mia64.mi_height;
1440 flags = mia64.mi_flags;
1441 devpath = mia64.mi_devpath;
1442 } else {
1443 struct user32_mnt_imgsrc_args mia32;
1444 error = copyin(fsmountargs, &mia32, sizeof(mia32));
1445 if (error != 0) {
1446 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1447 return error;
1448 }
1449
1450 height = mia32.mi_height;
1451 flags = mia32.mi_flags;
1452 devpath = mia32.mi_devpath;
1453 }
1454 } else {
1455 /*
1456 * For binary compatibility--assumes one level of nesting.
1457 */
1458 if (is64bit) {
1459 if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
1460 return error;
1461 } else {
1462 user32_addr_t tmp;
1463 if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
1464 return error;
1465
1466 /* munge into LP64 addr */
1467 devpath = CAST_USER_ADDR_T(tmp);
1468 }
1469
1470 height = 0;
1471 flags = 0;
1472 }
1473
1474 if (flags != 0) {
1475 IMGSRC_DEBUG("%s: Got nonzero flags.\n", __FUNCTION__);
1476 return EINVAL;
1477 }
1478
1479 error = get_imgsrc_rootvnode(height, &rvp);
1480 if (error != 0) {
1481 IMGSRC_DEBUG("getting root vnode failed with %d\n", error);
1482 return error;
1483 }
1484
1485 IMGSRC_DEBUG("got root vnode.\n");
1486
1487 MALLOC(old_mntonname, char*, MAXPATHLEN, M_TEMP, M_WAITOK);
1488
1489 /* Can only move once */
1490 mp = vnode_mount(rvp);
1491 if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
1492 IMGSRC_DEBUG("Already moved.\n");
1493 error = EBUSY;
1494 goto out0;
1495 }
1496
1497 IMGSRC_DEBUG("Starting updated.\n");
1498
1499 /* Get exclusive rwlock on mount, authorize update on mp */
1500 error = mount_begin_update(mp , ctx, 0);
1501 if (error != 0) {
1502 IMGSRC_DEBUG("Starting updated failed with %d\n", error);
1503 goto out0;
1504 }
1505
1506 /*
1507 * It can only be moved once. Flag is set under the rwlock,
1508 * so we're now safe to proceed.
1509 */
1510 if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
1511 IMGSRC_DEBUG("Already moved [2]\n");
1512 goto out1;
1513 }
1514
1515
1516 IMGSRC_DEBUG("Preparing coveredvp.\n");
1517
1518 /* Mark covered vnode as mount in progress, authorize placing mount on top */
1519 error = prepare_coveredvp(vp, ctx, cnp, fsname, FALSE);
1520 if (error != 0) {
1521 IMGSRC_DEBUG("Preparing coveredvp failed with %d.\n", error);
1522 goto out1;
1523 }
1524
1525 IMGSRC_DEBUG("Covered vp OK.\n");
1526
1527 /* Sanity check the name caller has provided */
1528 vfsp = mp->mnt_vtable;
1529 if (strncmp(vfsp->vfc_name, fsname, MFSNAMELEN) != 0) {
1530 IMGSRC_DEBUG("Wrong fs name.\n");
1531 error = EINVAL;
1532 goto out2;
1533 }
1534
1535 /* Check the device vnode and update mount-from name, for local filesystems */
1536 if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
1537 IMGSRC_DEBUG("Local, doing device validation.\n");
1538
1539 if (devpath != USER_ADDR_NULL) {
1540 error = authorize_devpath_and_update_mntfromname(mp, devpath, &devvp, ctx);
1541 if (error) {
1542 IMGSRC_DEBUG("authorize_devpath_and_update_mntfromname() failed.\n");
1543 goto out2;
1544 }
1545
1546 vnode_put(devvp);
1547 }
1548 }
1549
1550 /*
1551 * Place mp on top of vnode, ref the vnode, call checkdirs(),
1552 * and increment the name cache's mount generation
1553 */
1554
1555 IMGSRC_DEBUG("About to call place_mount_and_checkdirs().\n");
1556 error = place_mount_and_checkdirs(mp, vp, ctx);
1557 if (error != 0) {
1558 goto out2;
1559 }
1560
1561 placed = TRUE;
1562
1563 strlcpy(old_mntonname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN);
1564 strlcpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
1565
1566 /* Forbid future moves */
1567 mount_lock(mp);
1568 mp->mnt_kern_flag |= MNTK_HAS_MOVED;
1569 mount_unlock(mp);
1570
1571 /* Finally, add to mount list, completely ready to go */
1572 if (mount_list_add(mp) != 0) {
1573 /*
1574 * The system is shutting down trying to umount
1575 * everything, so fail with a plausible errno.
1576 */
1577 error = EBUSY;
1578 goto out3;
1579 }
1580
1581 mount_end_update(mp);
1582 vnode_put(rvp);
1583 FREE(old_mntonname, M_TEMP);
1584
1585 vfs_notify_mount(pvp);
1586
1587 return 0;
1588 out3:
1589 strlcpy(mp->mnt_vfsstat.f_mntonname, old_mntonname, MAXPATHLEN);
1590
1591 mount_lock(mp);
1592 mp->mnt_kern_flag &= ~(MNTK_HAS_MOVED);
1593 mount_unlock(mp);
1594
1595 out2:
1596 /*
1597 * Placing the mp on the vnode clears VMOUNT,
1598 * so cleanup is different after that point
1599 */
1600 if (placed) {
1601 /* Rele the vp, clear VMOUNT and v_mountedhere */
1602 undo_place_on_covered_vp(mp, vp);
1603 } else {
1604 vnode_lock_spin(vp);
1605 CLR(vp->v_flag, VMOUNT);
1606 vnode_unlock(vp);
1607 }
1608 out1:
1609 mount_end_update(mp);
1610
1611 out0:
1612 vnode_put(rvp);
1613 FREE(old_mntonname, M_TEMP);
1614 return error;
1615 }
1616
1617 #endif /* CONFIG_IMGSRC_ACCESS */
1618
1619 void
1620 enablequotas(struct mount *mp, vfs_context_t ctx)
1621 {
1622 struct nameidata qnd;
1623 int type;
1624 char qfpath[MAXPATHLEN];
1625 const char *qfname = QUOTAFILENAME;
1626 const char *qfopsname = QUOTAOPSNAME;
1627 const char *qfextension[] = INITQFNAMES;
1628
1629 /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
1630 if (strncmp(mp->mnt_vfsstat.f_fstypename, "hfs", sizeof("hfs")) != 0 ) {
1631 return;
1632 }
1633 /*
1634 * Enable filesystem disk quotas if necessary.
1635 * We ignore errors as this should not interfere with final mount
1636 */
1637 for (type=0; type < MAXQUOTAS; type++) {
1638 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfopsname, qfextension[type]);
1639 NDINIT(&qnd, LOOKUP, OP_MOUNT, FOLLOW, UIO_SYSSPACE,
1640 CAST_USER_ADDR_T(qfpath), ctx);
1641 if (namei(&qnd) != 0)
1642 continue; /* option file to trigger quotas is not present */
1643 vnode_put(qnd.ni_vp);
1644 nameidone(&qnd);
1645 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfname, qfextension[type]);
1646
1647 (void) VFS_QUOTACTL(mp, QCMD(Q_QUOTAON, type), 0, qfpath, ctx);
1648 }
1649 return;
1650 }
1651
1652
1653 static int
1654 checkdirs_callback(proc_t p, void * arg)
1655 {
1656 struct cdirargs * cdrp = (struct cdirargs * )arg;
1657 vnode_t olddp = cdrp->olddp;
1658 vnode_t newdp = cdrp->newdp;
1659 struct filedesc *fdp;
1660 vnode_t tvp;
1661 vnode_t fdp_cvp;
1662 vnode_t fdp_rvp;
1663 int cdir_changed = 0;
1664 int rdir_changed = 0;
1665
1666 /*
1667 * XXX Also needs to iterate each thread in the process to see if it
1668 * XXX is using a per-thread current working directory, and, if so,
1669 * XXX update that as well.
1670 */
1671
1672 proc_fdlock(p);
1673 fdp = p->p_fd;
1674 if (fdp == (struct filedesc *)0) {
1675 proc_fdunlock(p);
1676 return(PROC_RETURNED);
1677 }
1678 fdp_cvp = fdp->fd_cdir;
1679 fdp_rvp = fdp->fd_rdir;
1680 proc_fdunlock(p);
1681
1682 if (fdp_cvp == olddp) {
1683 vnode_ref(newdp);
1684 tvp = fdp->fd_cdir;
1685 fdp_cvp = newdp;
1686 cdir_changed = 1;
1687 vnode_rele(tvp);
1688 }
1689 if (fdp_rvp == olddp) {
1690 vnode_ref(newdp);
1691 tvp = fdp->fd_rdir;
1692 fdp_rvp = newdp;
1693 rdir_changed = 1;
1694 vnode_rele(tvp);
1695 }
1696 if (cdir_changed || rdir_changed) {
1697 proc_fdlock(p);
1698 fdp->fd_cdir = fdp_cvp;
1699 fdp->fd_rdir = fdp_rvp;
1700 proc_fdunlock(p);
1701 }
1702 return(PROC_RETURNED);
1703 }
1704
1705
1706
1707 /*
1708 * Scan all active processes to see if any of them have a current
1709 * or root directory onto which the new filesystem has just been
1710 * mounted. If so, replace them with the new mount point.
1711 */
1712 static int
1713 checkdirs(vnode_t olddp, vfs_context_t ctx)
1714 {
1715 vnode_t newdp;
1716 vnode_t tvp;
1717 int err;
1718 struct cdirargs cdr;
1719
1720 if (olddp->v_usecount == 1)
1721 return(0);
1722 err = VFS_ROOT(olddp->v_mountedhere, &newdp, ctx);
1723
1724 if (err != 0) {
1725 #if DIAGNOSTIC
1726 panic("mount: lost mount: error %d", err);
1727 #endif
1728 return(err);
1729 }
1730
1731 cdr.olddp = olddp;
1732 cdr.newdp = newdp;
1733 /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
1734 proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS, checkdirs_callback, (void *)&cdr, NULL, NULL);
1735
1736 if (rootvnode == olddp) {
1737 vnode_ref(newdp);
1738 tvp = rootvnode;
1739 rootvnode = newdp;
1740 vnode_rele(tvp);
1741 }
1742
1743 vnode_put(newdp);
1744 return(0);
1745 }
1746
1747 /*
1748 * Unmount a file system.
1749 *
1750 * Note: unmount takes a path to the vnode mounted on as argument,
1751 * not special file (as before).
1752 */
1753 /* ARGSUSED */
1754 int
1755 unmount(__unused proc_t p, struct unmount_args *uap, __unused int32_t *retval)
1756 {
1757 vnode_t vp;
1758 struct mount *mp;
1759 int error;
1760 struct nameidata nd;
1761 vfs_context_t ctx = vfs_context_current();
1762
1763 NDINIT(&nd, LOOKUP, OP_UNMOUNT, FOLLOW | AUDITVNPATH1,
1764 UIO_USERSPACE, uap->path, ctx);
1765 error = namei(&nd);
1766 if (error)
1767 return (error);
1768 vp = nd.ni_vp;
1769 mp = vp->v_mount;
1770 nameidone(&nd);
1771
1772 #if CONFIG_MACF
1773 error = mac_mount_check_umount(ctx, mp);
1774 if (error != 0) {
1775 vnode_put(vp);
1776 return (error);
1777 }
1778 #endif
1779 /*
1780 * Must be the root of the filesystem
1781 */
1782 if ((vp->v_flag & VROOT) == 0) {
1783 vnode_put(vp);
1784 return (EINVAL);
1785 }
1786 mount_ref(mp, 0);
1787 vnode_put(vp);
1788 /* safedounmount consumes the mount ref */
1789 return (safedounmount(mp, uap->flags, ctx));
1790 }
1791
1792 int
1793 vfs_unmountbyfsid(fsid_t *fsid, int flags, vfs_context_t ctx)
1794 {
1795 mount_t mp;
1796
1797 mp = mount_list_lookupby_fsid(fsid, 0, 1);
1798 if (mp == (mount_t)0) {
1799 return(ENOENT);
1800 }
1801 mount_ref(mp, 0);
1802 mount_iterdrop(mp);
1803 /* safedounmount consumes the mount ref */
1804 return(safedounmount(mp, flags, ctx));
1805 }
1806
1807
1808 /*
1809 * The mount struct comes with a mount ref which will be consumed.
1810 * Do the actual file system unmount, prevent some common foot shooting.
1811 */
1812 int
1813 safedounmount(struct mount *mp, int flags, vfs_context_t ctx)
1814 {
1815 int error;
1816 proc_t p = vfs_context_proc(ctx);
1817
1818 /*
1819 * If the file system is not responding and MNT_NOBLOCK
1820 * is set and not a forced unmount then return EBUSY.
1821 */
1822 if ((mp->mnt_kern_flag & MNT_LNOTRESP) &&
1823 (flags & MNT_NOBLOCK) && ((flags & MNT_FORCE) == 0)) {
1824 error = EBUSY;
1825 goto out;
1826 }
1827
1828 /*
1829 * Skip authorization if the mount is tagged as permissive and
1830 * this is not a forced-unmount attempt.
1831 */
1832 if (!(((mp->mnt_kern_flag & MNTK_PERMIT_UNMOUNT) != 0) && ((flags & MNT_FORCE) == 0))) {
1833 /*
1834 * Only root, or the user that did the original mount is
1835 * permitted to unmount this filesystem.
1836 */
1837 if ((mp->mnt_vfsstat.f_owner != kauth_cred_getuid(kauth_cred_get())) &&
1838 (error = suser(kauth_cred_get(), &p->p_acflag)))
1839 goto out;
1840 }
1841 /*
1842 * Don't allow unmounting the root file system.
1843 */
1844 if (mp->mnt_flag & MNT_ROOTFS) {
1845 error = EBUSY; /* the root is always busy */
1846 goto out;
1847 }
1848
1849 #ifdef CONFIG_IMGSRC_ACCESS
1850 if (mp->mnt_kern_flag & MNTK_BACKS_ROOT) {
1851 error = EBUSY;
1852 goto out;
1853 }
1854 #endif /* CONFIG_IMGSRC_ACCESS */
1855
1856 return (dounmount(mp, flags, 1, ctx));
1857
1858 out:
1859 mount_drop(mp, 0);
1860 return(error);
1861 }
1862
1863 /*
1864 * Do the actual file system unmount.
1865 */
1866 int
1867 dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx)
1868 {
1869 vnode_t coveredvp = (vnode_t)0;
1870 int error;
1871 int needwakeup = 0;
1872 int forcedunmount = 0;
1873 int lflags = 0;
1874 struct vnode *devvp = NULLVP;
1875 #if CONFIG_TRIGGERS
1876 proc_t p = vfs_context_proc(ctx);
1877 int did_vflush = 0;
1878 int pflags_save = 0;
1879 #endif /* CONFIG_TRIGGERS */
1880
1881 #if CONFIG_FSE
1882 if (!(flags & MNT_FORCE)) {
1883 fsevent_unmount(mp, ctx); /* has to come first! */
1884 }
1885 #endif
1886
1887 mount_lock(mp);
1888
1889 /*
1890 * If already an unmount in progress just return EBUSY.
1891 * Even a forced unmount cannot override.
1892 */
1893 if (mp->mnt_lflag & MNT_LUNMOUNT) {
1894 if (withref != 0)
1895 mount_drop(mp, 1);
1896 mount_unlock(mp);
1897 return (EBUSY);
1898 }
1899
1900 if (flags & MNT_FORCE) {
1901 forcedunmount = 1;
1902 mp->mnt_lflag |= MNT_LFORCE;
1903 }
1904
1905 #if CONFIG_TRIGGERS
1906 if (flags & MNT_NOBLOCK && p != kernproc)
1907 pflags_save = OSBitOrAtomic(P_NOREMOTEHANG, &p->p_flag);
1908 #endif
1909
1910 mp->mnt_kern_flag |= MNTK_UNMOUNT;
1911 mp->mnt_lflag |= MNT_LUNMOUNT;
1912 mp->mnt_flag &=~ MNT_ASYNC;
1913 /*
1914 * anyone currently in the fast path that
1915 * trips over the cached rootvp will be
1916 * dumped out and forced into the slow path
1917 * to regenerate a new cached value
1918 */
1919 mp->mnt_realrootvp = NULLVP;
1920 mount_unlock(mp);
1921
1922 if (forcedunmount && (flags & MNT_LNOSUB) == 0) {
1923 /*
1924 * Force unmount any mounts in this filesystem.
1925 * If any unmounts fail - just leave them dangling.
1926 * Avoids recursion.
1927 */
1928 (void) dounmount_submounts(mp, flags | MNT_LNOSUB, ctx);
1929 }
1930
1931 /*
1932 * taking the name_cache_lock exclusively will
1933 * insure that everyone is out of the fast path who
1934 * might be trying to use a now stale copy of
1935 * vp->v_mountedhere->mnt_realrootvp
1936 * bumping mount_generation causes the cached values
1937 * to be invalidated
1938 */
1939 name_cache_lock();
1940 mount_generation++;
1941 name_cache_unlock();
1942
1943
1944 lck_rw_lock_exclusive(&mp->mnt_rwlock);
1945 if (withref != 0)
1946 mount_drop(mp, 0);
1947 error = 0;
1948 if (forcedunmount == 0) {
1949 ubc_umount(mp); /* release cached vnodes */
1950 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
1951 error = VFS_SYNC(mp, MNT_WAIT, ctx);
1952 if (error) {
1953 mount_lock(mp);
1954 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1955 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1956 mp->mnt_lflag &= ~MNT_LFORCE;
1957 goto out;
1958 }
1959 }
1960 }
1961
1962 IOBSDMountChange(mp, kIOMountChangeUnmount);
1963
1964 #if CONFIG_TRIGGERS
1965 vfs_nested_trigger_unmounts(mp, flags, ctx);
1966 did_vflush = 1;
1967 #endif
1968 if (forcedunmount)
1969 lflags |= FORCECLOSE;
1970 error = vflush(mp, NULLVP, SKIPSWAP | SKIPSYSTEM | SKIPROOT | lflags);
1971 if ((forcedunmount == 0) && error) {
1972 mount_lock(mp);
1973 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1974 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1975 mp->mnt_lflag &= ~MNT_LFORCE;
1976 goto out;
1977 }
1978
1979 /* make sure there are no one in the mount iterations or lookup */
1980 mount_iterdrain(mp);
1981
1982 error = VFS_UNMOUNT(mp, flags, ctx);
1983 if (error) {
1984 mount_iterreset(mp);
1985 mount_lock(mp);
1986 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1987 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1988 mp->mnt_lflag &= ~MNT_LFORCE;
1989 goto out;
1990 }
1991
1992 /* increment the operations count */
1993 if (!error)
1994 OSAddAtomic(1, &vfs_nummntops);
1995
1996 if ( mp->mnt_devvp && mp->mnt_vtable->vfc_vfsflags & VFC_VFSLOCALARGS) {
1997 /* hold an io reference and drop the usecount before close */
1998 devvp = mp->mnt_devvp;
1999 vnode_getalways(devvp);
2000 vnode_rele(devvp);
2001 VNOP_CLOSE(devvp, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
2002 ctx);
2003 vnode_clearmountedon(devvp);
2004 vnode_put(devvp);
2005 }
2006 lck_rw_done(&mp->mnt_rwlock);
2007 mount_list_remove(mp);
2008 lck_rw_lock_exclusive(&mp->mnt_rwlock);
2009
2010 /* mark the mount point hook in the vp but not drop the ref yet */
2011 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
2012 /*
2013 * The covered vnode needs special handling. Trying to get an
2014 * iocount must not block here as this may lead to deadlocks
2015 * if the Filesystem to which the covered vnode belongs is
2016 * undergoing forced unmounts. Since we hold a usecount, the
2017 * vnode cannot be reused (it can, however, still be terminated)
2018 */
2019 vnode_getalways(coveredvp);
2020 vnode_lock_spin(coveredvp);
2021
2022 mp->mnt_crossref++;
2023 coveredvp->v_mountedhere = (struct mount *)0;
2024 CLR(coveredvp->v_flag, VMOUNT);
2025
2026 vnode_unlock(coveredvp);
2027 vnode_put(coveredvp);
2028 }
2029
2030 mount_list_lock();
2031 mp->mnt_vtable->vfc_refcount--;
2032 mount_list_unlock();
2033
2034 cache_purgevfs(mp); /* remove cache entries for this file sys */
2035 vfs_event_signal(NULL, VQ_UNMOUNT, (intptr_t)NULL);
2036 mount_lock(mp);
2037 mp->mnt_lflag |= MNT_LDEAD;
2038
2039 if (mp->mnt_lflag & MNT_LWAIT) {
2040 /*
2041 * do the wakeup here
2042 * in case we block in mount_refdrain
2043 * which will drop the mount lock
2044 * and allow anyone blocked in vfs_busy
2045 * to wakeup and see the LDEAD state
2046 */
2047 mp->mnt_lflag &= ~MNT_LWAIT;
2048 wakeup((caddr_t)mp);
2049 }
2050 mount_refdrain(mp);
2051 out:
2052 if (mp->mnt_lflag & MNT_LWAIT) {
2053 mp->mnt_lflag &= ~MNT_LWAIT;
2054 needwakeup = 1;
2055 }
2056
2057 #if CONFIG_TRIGGERS
2058 if (flags & MNT_NOBLOCK && p != kernproc) {
2059 // Restore P_NOREMOTEHANG bit to its previous value
2060 if ((pflags_save & P_NOREMOTEHANG) == 0)
2061 OSBitAndAtomic(~((uint32_t) P_NOREMOTEHANG), &p->p_flag);
2062 }
2063
2064 /*
2065 * Callback and context are set together under the mount lock, and
2066 * never cleared, so we're safe to examine them here, drop the lock,
2067 * and call out.
2068 */
2069 if (mp->mnt_triggercallback != NULL) {
2070 mount_unlock(mp);
2071 if (error == 0) {
2072 mp->mnt_triggercallback(mp, VTC_RELEASE, mp->mnt_triggerdata, ctx);
2073 } else if (did_vflush) {
2074 mp->mnt_triggercallback(mp, VTC_REPLACE, mp->mnt_triggerdata, ctx);
2075 }
2076 } else {
2077 mount_unlock(mp);
2078 }
2079 #else
2080 mount_unlock(mp);
2081 #endif /* CONFIG_TRIGGERS */
2082
2083 lck_rw_done(&mp->mnt_rwlock);
2084
2085 if (needwakeup)
2086 wakeup((caddr_t)mp);
2087
2088 if (!error) {
2089 if ((coveredvp != NULLVP)) {
2090 vnode_t pvp = NULLVP;
2091
2092 /*
2093 * The covered vnode needs special handling. Trying to
2094 * get an iocount must not block here as this may lead
2095 * to deadlocks if the Filesystem to which the covered
2096 * vnode belongs is undergoing forced unmounts. Since we
2097 * hold a usecount, the vnode cannot be reused
2098 * (it can, however, still be terminated).
2099 */
2100 vnode_getalways(coveredvp);
2101
2102 mount_dropcrossref(mp, coveredvp, 0);
2103 /*
2104 * We'll _try_ to detect if this really needs to be
2105 * done. The coveredvp can only be in termination (or
2106 * terminated) if the coveredvp's mount point is in a
2107 * forced unmount (or has been) since we still hold the
2108 * ref.
2109 */
2110 if (!vnode_isrecycled(coveredvp)) {
2111 pvp = vnode_getparent(coveredvp);
2112 #if CONFIG_TRIGGERS
2113 if (coveredvp->v_resolve) {
2114 vnode_trigger_rearm(coveredvp, ctx);
2115 }
2116 #endif
2117 }
2118
2119 vnode_rele(coveredvp);
2120 vnode_put(coveredvp);
2121 coveredvp = NULLVP;
2122
2123 if (pvp) {
2124 lock_vnode_and_post(pvp, NOTE_WRITE);
2125 vnode_put(pvp);
2126 }
2127 } else if (mp->mnt_flag & MNT_ROOTFS) {
2128 mount_lock_destroy(mp);
2129 #if CONFIG_MACF
2130 mac_mount_label_destroy(mp);
2131 #endif
2132 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
2133 } else
2134 panic("dounmount: no coveredvp");
2135 }
2136 return (error);
2137 }
2138
2139 /*
2140 * Unmount any mounts in this filesystem.
2141 */
2142 void
2143 dounmount_submounts(struct mount *mp, int flags, vfs_context_t ctx)
2144 {
2145 mount_t smp;
2146 fsid_t *fsids, fsid;
2147 int fsids_sz;
2148 int count = 0, i, m = 0;
2149 vnode_t vp;
2150
2151 mount_list_lock();
2152
2153 // Get an array to hold the submounts fsids.
2154 TAILQ_FOREACH(smp, &mountlist, mnt_list)
2155 count++;
2156 fsids_sz = count * sizeof(fsid_t);
2157 MALLOC(fsids, fsid_t *, fsids_sz, M_TEMP, M_NOWAIT);
2158 if (fsids == NULL) {
2159 mount_list_unlock();
2160 goto out;
2161 }
2162 fsids[0] = mp->mnt_vfsstat.f_fsid; // Prime the pump
2163
2164 /*
2165 * Fill the array with submount fsids.
2166 * Since mounts are always added to the tail of the mount list, the
2167 * list is always in mount order.
2168 * For each mount check if the mounted-on vnode belongs to a
2169 * mount that's already added to our array of mounts to be unmounted.
2170 */
2171 for (smp = TAILQ_NEXT(mp, mnt_list); smp; smp = TAILQ_NEXT(smp, mnt_list)) {
2172 vp = smp->mnt_vnodecovered;
2173 if (vp == NULL)
2174 continue;
2175 fsid = vnode_mount(vp)->mnt_vfsstat.f_fsid; // Underlying fsid
2176 for (i = 0; i <= m; i++) {
2177 if (fsids[i].val[0] == fsid.val[0] &&
2178 fsids[i].val[1] == fsid.val[1]) {
2179 fsids[++m] = smp->mnt_vfsstat.f_fsid;
2180 break;
2181 }
2182 }
2183 }
2184 mount_list_unlock();
2185
2186 // Unmount the submounts in reverse order. Ignore errors.
2187 for (i = m; i > 0; i--) {
2188 smp = mount_list_lookupby_fsid(&fsids[i], 0, 1);
2189 if (smp) {
2190 mount_ref(smp, 0);
2191 mount_iterdrop(smp);
2192 (void) dounmount(smp, flags, 1, ctx);
2193 }
2194 }
2195 out:
2196 if (fsids)
2197 FREE(fsids, M_TEMP);
2198 }
2199
2200 void
2201 mount_dropcrossref(mount_t mp, vnode_t dp, int need_put)
2202 {
2203 vnode_lock(dp);
2204 mp->mnt_crossref--;
2205
2206 if (mp->mnt_crossref < 0)
2207 panic("mount cross refs -ve");
2208
2209 if ((mp != dp->v_mountedhere) && (mp->mnt_crossref == 0)) {
2210
2211 if (need_put)
2212 vnode_put_locked(dp);
2213 vnode_unlock(dp);
2214
2215 mount_lock_destroy(mp);
2216 #if CONFIG_MACF
2217 mac_mount_label_destroy(mp);
2218 #endif
2219 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
2220 return;
2221 }
2222 if (need_put)
2223 vnode_put_locked(dp);
2224 vnode_unlock(dp);
2225 }
2226
2227
2228 /*
2229 * Sync each mounted filesystem.
2230 */
2231 #if DIAGNOSTIC
2232 int syncprt = 0;
2233 #endif
2234
2235 int print_vmpage_stat=0;
2236 int sync_timeout = 60; // Sync time limit (sec)
2237
2238 static int
2239 sync_callback(mount_t mp, __unused void *arg)
2240 {
2241 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
2242 int asyncflag = mp->mnt_flag & MNT_ASYNC;
2243
2244 mp->mnt_flag &= ~MNT_ASYNC;
2245 VFS_SYNC(mp, arg ? MNT_WAIT : MNT_NOWAIT, vfs_context_kernel());
2246 if (asyncflag)
2247 mp->mnt_flag |= MNT_ASYNC;
2248 }
2249
2250 return (VFS_RETURNED);
2251 }
2252
2253 /* ARGSUSED */
2254 int
2255 sync(__unused proc_t p, __unused struct sync_args *uap, __unused int32_t *retval)
2256 {
2257 vfs_iterate(LK_NOWAIT, sync_callback, NULL);
2258
2259 if (print_vmpage_stat) {
2260 vm_countdirtypages();
2261 }
2262
2263 #if DIAGNOSTIC
2264 if (syncprt)
2265 vfs_bufstats();
2266 #endif /* DIAGNOSTIC */
2267 return 0;
2268 }
2269
2270 static void
2271 sync_thread(void *arg, __unused wait_result_t wr)
2272 {
2273 int *timeout = (int *) arg;
2274
2275 vfs_iterate(LK_NOWAIT, sync_callback, NULL);
2276
2277 if (timeout)
2278 wakeup((caddr_t) timeout);
2279 if (print_vmpage_stat) {
2280 vm_countdirtypages();
2281 }
2282
2283 #if DIAGNOSTIC
2284 if (syncprt)
2285 vfs_bufstats();
2286 #endif /* DIAGNOSTIC */
2287 }
2288
2289 /*
2290 * Sync in a separate thread so we can time out if it blocks.
2291 */
2292 static int
2293 sync_async(int timeout)
2294 {
2295 thread_t thd;
2296 int error;
2297 struct timespec ts = {timeout, 0};
2298
2299 lck_mtx_lock(sync_mtx_lck);
2300 if (kernel_thread_start(sync_thread, &timeout, &thd) != KERN_SUCCESS) {
2301 printf("sync_thread failed\n");
2302 lck_mtx_unlock(sync_mtx_lck);
2303 return (0);
2304 }
2305
2306 error = msleep((caddr_t) &timeout, sync_mtx_lck, (PVFS | PDROP | PCATCH), "sync_thread", &ts);
2307 if (error) {
2308 printf("sync timed out: %d sec\n", timeout);
2309 }
2310 thread_deallocate(thd);
2311
2312 return (0);
2313 }
2314
2315 /*
2316 * An in-kernel sync for power management to call.
2317 */
2318 __private_extern__ int
2319 sync_internal(void)
2320 {
2321 (void) sync_async(sync_timeout);
2322
2323 return 0;
2324 } /* end of sync_internal call */
2325
2326 /*
2327 * Change filesystem quotas.
2328 */
2329 #if QUOTA
2330 int
2331 quotactl(proc_t p, struct quotactl_args *uap, __unused int32_t *retval)
2332 {
2333 struct mount *mp;
2334 int error, quota_cmd, quota_status;
2335 caddr_t datap;
2336 size_t fnamelen;
2337 struct nameidata nd;
2338 vfs_context_t ctx = vfs_context_current();
2339 struct dqblk my_dqblk;
2340
2341 AUDIT_ARG(uid, uap->uid);
2342 AUDIT_ARG(cmd, uap->cmd);
2343 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
2344 uap->path, ctx);
2345 error = namei(&nd);
2346 if (error)
2347 return (error);
2348 mp = nd.ni_vp->v_mount;
2349 vnode_put(nd.ni_vp);
2350 nameidone(&nd);
2351
2352 /* copyin any data we will need for downstream code */
2353 quota_cmd = uap->cmd >> SUBCMDSHIFT;
2354
2355 switch (quota_cmd) {
2356 case Q_QUOTAON:
2357 /* uap->arg specifies a file from which to take the quotas */
2358 fnamelen = MAXPATHLEN;
2359 datap = kalloc(MAXPATHLEN);
2360 error = copyinstr(uap->arg, datap, MAXPATHLEN, &fnamelen);
2361 break;
2362 case Q_GETQUOTA:
2363 /* uap->arg is a pointer to a dqblk structure. */
2364 datap = (caddr_t) &my_dqblk;
2365 break;
2366 case Q_SETQUOTA:
2367 case Q_SETUSE:
2368 /* uap->arg is a pointer to a dqblk structure. */
2369 datap = (caddr_t) &my_dqblk;
2370 if (proc_is64bit(p)) {
2371 struct user_dqblk my_dqblk64;
2372 error = copyin(uap->arg, (caddr_t)&my_dqblk64, sizeof (my_dqblk64));
2373 if (error == 0) {
2374 munge_dqblk(&my_dqblk, &my_dqblk64, FALSE);
2375 }
2376 }
2377 else {
2378 error = copyin(uap->arg, (caddr_t)&my_dqblk, sizeof (my_dqblk));
2379 }
2380 break;
2381 case Q_QUOTASTAT:
2382 /* uap->arg is a pointer to an integer */
2383 datap = (caddr_t) &quota_status;
2384 break;
2385 default:
2386 datap = NULL;
2387 break;
2388 } /* switch */
2389
2390 if (error == 0) {
2391 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, datap, ctx);
2392 }
2393
2394 switch (quota_cmd) {
2395 case Q_QUOTAON:
2396 if (datap != NULL)
2397 kfree(datap, MAXPATHLEN);
2398 break;
2399 case Q_GETQUOTA:
2400 /* uap->arg is a pointer to a dqblk structure we need to copy out to */
2401 if (error == 0) {
2402 if (proc_is64bit(p)) {
2403 struct user_dqblk my_dqblk64 = {.dqb_bhardlimit = 0};
2404 munge_dqblk(&my_dqblk, &my_dqblk64, TRUE);
2405 error = copyout((caddr_t)&my_dqblk64, uap->arg, sizeof (my_dqblk64));
2406 }
2407 else {
2408 error = copyout(datap, uap->arg, sizeof (struct dqblk));
2409 }
2410 }
2411 break;
2412 case Q_QUOTASTAT:
2413 /* uap->arg is a pointer to an integer */
2414 if (error == 0) {
2415 error = copyout(datap, uap->arg, sizeof(quota_status));
2416 }
2417 break;
2418 default:
2419 break;
2420 } /* switch */
2421
2422 return (error);
2423 }
2424 #else
2425 int
2426 quotactl(__unused proc_t p, __unused struct quotactl_args *uap, __unused int32_t *retval)
2427 {
2428 return (EOPNOTSUPP);
2429 }
2430 #endif /* QUOTA */
2431
2432 /*
2433 * Get filesystem statistics.
2434 *
2435 * Returns: 0 Success
2436 * namei:???
2437 * vfs_update_vfsstat:???
2438 * munge_statfs:EFAULT
2439 */
2440 /* ARGSUSED */
2441 int
2442 statfs(__unused proc_t p, struct statfs_args *uap, __unused int32_t *retval)
2443 {
2444 struct mount *mp;
2445 struct vfsstatfs *sp;
2446 int error;
2447 struct nameidata nd;
2448 vfs_context_t ctx = vfs_context_current();
2449 vnode_t vp;
2450
2451 NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1,
2452 UIO_USERSPACE, uap->path, ctx);
2453 error = namei(&nd);
2454 if (error != 0)
2455 return (error);
2456 vp = nd.ni_vp;
2457 mp = vp->v_mount;
2458 sp = &mp->mnt_vfsstat;
2459 nameidone(&nd);
2460
2461 #if CONFIG_MACF
2462 error = mac_mount_check_stat(ctx, mp);
2463 if (error != 0)
2464 return (error);
2465 #endif
2466
2467 error = vfs_update_vfsstat(mp, ctx, VFS_USER_EVENT);
2468 if (error != 0) {
2469 vnode_put(vp);
2470 return (error);
2471 }
2472
2473 error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
2474 vnode_put(vp);
2475 return (error);
2476 }
2477
2478 /*
2479 * Get filesystem statistics.
2480 */
2481 /* ARGSUSED */
2482 int
2483 fstatfs(__unused proc_t p, struct fstatfs_args *uap, __unused int32_t *retval)
2484 {
2485 vnode_t vp;
2486 struct mount *mp;
2487 struct vfsstatfs *sp;
2488 int error;
2489
2490 AUDIT_ARG(fd, uap->fd);
2491
2492 if ( (error = file_vnode(uap->fd, &vp)) )
2493 return (error);
2494
2495 error = vnode_getwithref(vp);
2496 if (error) {
2497 file_drop(uap->fd);
2498 return (error);
2499 }
2500
2501 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2502
2503 mp = vp->v_mount;
2504 if (!mp) {
2505 error = EBADF;
2506 goto out;
2507 }
2508
2509 #if CONFIG_MACF
2510 error = mac_mount_check_stat(vfs_context_current(), mp);
2511 if (error != 0)
2512 goto out;
2513 #endif
2514
2515 sp = &mp->mnt_vfsstat;
2516 if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
2517 goto out;
2518 }
2519
2520 error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
2521
2522 out:
2523 file_drop(uap->fd);
2524 vnode_put(vp);
2525
2526 return (error);
2527 }
2528
2529 /*
2530 * Common routine to handle copying of statfs64 data to user space
2531 */
2532 static int
2533 statfs64_common(struct mount *mp, struct vfsstatfs *sfsp, user_addr_t bufp)
2534 {
2535 int error;
2536 struct statfs64 sfs;
2537
2538 bzero(&sfs, sizeof(sfs));
2539
2540 sfs.f_bsize = sfsp->f_bsize;
2541 sfs.f_iosize = (int32_t)sfsp->f_iosize;
2542 sfs.f_blocks = sfsp->f_blocks;
2543 sfs.f_bfree = sfsp->f_bfree;
2544 sfs.f_bavail = sfsp->f_bavail;
2545 sfs.f_files = sfsp->f_files;
2546 sfs.f_ffree = sfsp->f_ffree;
2547 sfs.f_fsid = sfsp->f_fsid;
2548 sfs.f_owner = sfsp->f_owner;
2549 sfs.f_type = mp->mnt_vtable->vfc_typenum;
2550 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
2551 sfs.f_fssubtype = sfsp->f_fssubtype;
2552 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
2553 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN);
2554 } else {
2555 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSTYPENAMELEN);
2556 }
2557 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MAXPATHLEN);
2558 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MAXPATHLEN);
2559
2560 error = copyout((caddr_t)&sfs, bufp, sizeof(sfs));
2561
2562 return(error);
2563 }
2564
2565 /*
2566 * Get file system statistics in 64-bit mode
2567 */
2568 int
2569 statfs64(__unused struct proc *p, struct statfs64_args *uap, __unused int32_t *retval)
2570 {
2571 struct mount *mp;
2572 struct vfsstatfs *sp;
2573 int error;
2574 struct nameidata nd;
2575 vfs_context_t ctxp = vfs_context_current();
2576 vnode_t vp;
2577
2578 NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1,
2579 UIO_USERSPACE, uap->path, ctxp);
2580 error = namei(&nd);
2581 if (error != 0)
2582 return (error);
2583 vp = nd.ni_vp;
2584 mp = vp->v_mount;
2585 sp = &mp->mnt_vfsstat;
2586 nameidone(&nd);
2587
2588 #if CONFIG_MACF
2589 error = mac_mount_check_stat(ctxp, mp);
2590 if (error != 0)
2591 return (error);
2592 #endif
2593
2594 error = vfs_update_vfsstat(mp, ctxp, VFS_USER_EVENT);
2595 if (error != 0) {
2596 vnode_put(vp);
2597 return (error);
2598 }
2599
2600 error = statfs64_common(mp, sp, uap->buf);
2601 vnode_put(vp);
2602
2603 return (error);
2604 }
2605
2606 /*
2607 * Get file system statistics in 64-bit mode
2608 */
2609 int
2610 fstatfs64(__unused struct proc *p, struct fstatfs64_args *uap, __unused int32_t *retval)
2611 {
2612 struct vnode *vp;
2613 struct mount *mp;
2614 struct vfsstatfs *sp;
2615 int error;
2616
2617 AUDIT_ARG(fd, uap->fd);
2618
2619 if ( (error = file_vnode(uap->fd, &vp)) )
2620 return (error);
2621
2622 error = vnode_getwithref(vp);
2623 if (error) {
2624 file_drop(uap->fd);
2625 return (error);
2626 }
2627
2628 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2629
2630 mp = vp->v_mount;
2631 if (!mp) {
2632 error = EBADF;
2633 goto out;
2634 }
2635
2636 #if CONFIG_MACF
2637 error = mac_mount_check_stat(vfs_context_current(), mp);
2638 if (error != 0)
2639 goto out;
2640 #endif
2641
2642 sp = &mp->mnt_vfsstat;
2643 if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
2644 goto out;
2645 }
2646
2647 error = statfs64_common(mp, sp, uap->buf);
2648
2649 out:
2650 file_drop(uap->fd);
2651 vnode_put(vp);
2652
2653 return (error);
2654 }
2655
2656 struct getfsstat_struct {
2657 user_addr_t sfsp;
2658 user_addr_t *mp;
2659 int count;
2660 int maxcount;
2661 int flags;
2662 int error;
2663 };
2664
2665
2666 static int
2667 getfsstat_callback(mount_t mp, void * arg)
2668 {
2669
2670 struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
2671 struct vfsstatfs *sp;
2672 int error, my_size;
2673 vfs_context_t ctx = vfs_context_current();
2674
2675 if (fstp->sfsp && fstp->count < fstp->maxcount) {
2676 #if CONFIG_MACF
2677 error = mac_mount_check_stat(ctx, mp);
2678 if (error != 0) {
2679 fstp->error = error;
2680 return(VFS_RETURNED_DONE);
2681 }
2682 #endif
2683 sp = &mp->mnt_vfsstat;
2684 /*
2685 * If MNT_NOWAIT is specified, do not refresh the
2686 * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT.
2687 */
2688 if (((fstp->flags & MNT_NOWAIT) == 0 || (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
2689 (error = vfs_update_vfsstat(mp, ctx,
2690 VFS_USER_EVENT))) {
2691 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
2692 return(VFS_RETURNED);
2693 }
2694
2695 /*
2696 * Need to handle LP64 version of struct statfs
2697 */
2698 error = munge_statfs(mp, sp, fstp->sfsp, &my_size, IS_64BIT_PROCESS(vfs_context_proc(ctx)), FALSE);
2699 if (error) {
2700 fstp->error = error;
2701 return(VFS_RETURNED_DONE);
2702 }
2703 fstp->sfsp += my_size;
2704
2705 if (fstp->mp) {
2706 #if CONFIG_MACF
2707 error = mac_mount_label_get(mp, *fstp->mp);
2708 if (error) {
2709 fstp->error = error;
2710 return(VFS_RETURNED_DONE);
2711 }
2712 #endif
2713 fstp->mp++;
2714 }
2715 }
2716 fstp->count++;
2717 return(VFS_RETURNED);
2718 }
2719
2720 /*
2721 * Get statistics on all filesystems.
2722 */
2723 int
2724 getfsstat(__unused proc_t p, struct getfsstat_args *uap, int *retval)
2725 {
2726 struct __mac_getfsstat_args muap;
2727
2728 muap.buf = uap->buf;
2729 muap.bufsize = uap->bufsize;
2730 muap.mac = USER_ADDR_NULL;
2731 muap.macsize = 0;
2732 muap.flags = uap->flags;
2733
2734 return (__mac_getfsstat(p, &muap, retval));
2735 }
2736
2737 /*
2738 * __mac_getfsstat: Get MAC-related file system statistics
2739 *
2740 * Parameters: p (ignored)
2741 * uap User argument descriptor (see below)
2742 * retval Count of file system statistics (N stats)
2743 *
2744 * Indirect: uap->bufsize Buffer size
2745 * uap->macsize MAC info size
2746 * uap->buf Buffer where information will be returned
2747 * uap->mac MAC info
2748 * uap->flags File system flags
2749 *
2750 *
2751 * Returns: 0 Success
2752 * !0 Not success
2753 *
2754 */
2755 int
2756 __mac_getfsstat(__unused proc_t p, struct __mac_getfsstat_args *uap, int *retval)
2757 {
2758 user_addr_t sfsp;
2759 user_addr_t *mp;
2760 size_t count, maxcount, bufsize, macsize;
2761 struct getfsstat_struct fst;
2762
2763 bufsize = (size_t) uap->bufsize;
2764 macsize = (size_t) uap->macsize;
2765
2766 if (IS_64BIT_PROCESS(p)) {
2767 maxcount = bufsize / sizeof(struct user64_statfs);
2768 }
2769 else {
2770 maxcount = bufsize / sizeof(struct user32_statfs);
2771 }
2772 sfsp = uap->buf;
2773 count = 0;
2774
2775 mp = NULL;
2776
2777 #if CONFIG_MACF
2778 if (uap->mac != USER_ADDR_NULL) {
2779 u_int32_t *mp0;
2780 int error;
2781 unsigned int i;
2782
2783 count = (macsize / (IS_64BIT_PROCESS(p) ? 8 : 4));
2784 if (count != maxcount)
2785 return (EINVAL);
2786
2787 /* Copy in the array */
2788 MALLOC(mp0, u_int32_t *, macsize, M_MACTEMP, M_WAITOK);
2789 if (mp0 == NULL) {
2790 return (ENOMEM);
2791 }
2792
2793 error = copyin(uap->mac, mp0, macsize);
2794 if (error) {
2795 FREE(mp0, M_MACTEMP);
2796 return (error);
2797 }
2798
2799 /* Normalize to an array of user_addr_t */
2800 MALLOC(mp, user_addr_t *, count * sizeof(user_addr_t), M_MACTEMP, M_WAITOK);
2801 if (mp == NULL) {
2802 FREE(mp0, M_MACTEMP);
2803 return (ENOMEM);
2804 }
2805
2806 for (i = 0; i < count; i++) {
2807 if (IS_64BIT_PROCESS(p))
2808 mp[i] = ((user_addr_t *)mp0)[i];
2809 else
2810 mp[i] = (user_addr_t)mp0[i];
2811 }
2812 FREE(mp0, M_MACTEMP);
2813 }
2814 #endif
2815
2816
2817 fst.sfsp = sfsp;
2818 fst.mp = mp;
2819 fst.flags = uap->flags;
2820 fst.count = 0;
2821 fst.error = 0;
2822 fst.maxcount = maxcount;
2823
2824
2825 vfs_iterate(0, getfsstat_callback, &fst);
2826
2827 if (mp)
2828 FREE(mp, M_MACTEMP);
2829
2830 if (fst.error ) {
2831 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
2832 return(fst.error);
2833 }
2834
2835 if (fst.sfsp && fst.count > fst.maxcount)
2836 *retval = fst.maxcount;
2837 else
2838 *retval = fst.count;
2839 return (0);
2840 }
2841
2842 static int
2843 getfsstat64_callback(mount_t mp, void * arg)
2844 {
2845 struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
2846 struct vfsstatfs *sp;
2847 int error;
2848
2849 if (fstp->sfsp && fstp->count < fstp->maxcount) {
2850 #if CONFIG_MACF
2851 error = mac_mount_check_stat(vfs_context_current(), mp);
2852 if (error != 0) {
2853 fstp->error = error;
2854 return(VFS_RETURNED_DONE);
2855 }
2856 #endif
2857 sp = &mp->mnt_vfsstat;
2858 /*
2859 * If MNT_NOWAIT is specified, do not refresh the fsstat
2860 * cache. MNT_WAIT overrides MNT_NOWAIT.
2861 *
2862 * We treat MNT_DWAIT as MNT_WAIT for all instances of
2863 * getfsstat, since the constants are out of the same
2864 * namespace.
2865 */
2866 if (((fstp->flags & MNT_NOWAIT) == 0 ||
2867 (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
2868 (error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT))) {
2869 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
2870 return(VFS_RETURNED);
2871 }
2872
2873 error = statfs64_common(mp, sp, fstp->sfsp);
2874 if (error) {
2875 fstp->error = error;
2876 return(VFS_RETURNED_DONE);
2877 }
2878 fstp->sfsp += sizeof(struct statfs64);
2879 }
2880 fstp->count++;
2881 return(VFS_RETURNED);
2882 }
2883
2884 /*
2885 * Get statistics on all file systems in 64 bit mode.
2886 */
2887 int
2888 getfsstat64(__unused proc_t p, struct getfsstat64_args *uap, int *retval)
2889 {
2890 user_addr_t sfsp;
2891 int count, maxcount;
2892 struct getfsstat_struct fst;
2893
2894 maxcount = uap->bufsize / sizeof(struct statfs64);
2895
2896 sfsp = uap->buf;
2897 count = 0;
2898
2899 fst.sfsp = sfsp;
2900 fst.flags = uap->flags;
2901 fst.count = 0;
2902 fst.error = 0;
2903 fst.maxcount = maxcount;
2904
2905 vfs_iterate(0, getfsstat64_callback, &fst);
2906
2907 if (fst.error ) {
2908 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
2909 return(fst.error);
2910 }
2911
2912 if (fst.sfsp && fst.count > fst.maxcount)
2913 *retval = fst.maxcount;
2914 else
2915 *retval = fst.count;
2916
2917 return (0);
2918 }
2919
2920 /*
2921 * gets the associated vnode with the file descriptor passed.
2922 * as input
2923 *
2924 * INPUT
2925 * ctx - vfs context of caller
2926 * fd - file descriptor for which vnode is required.
2927 * vpp - Pointer to pointer to vnode to be returned.
2928 *
2929 * The vnode is returned with an iocount so any vnode obtained
2930 * by this call needs a vnode_put
2931 *
2932 */
2933 int
2934 vnode_getfromfd(vfs_context_t ctx, int fd, vnode_t *vpp)
2935 {
2936 int error;
2937 vnode_t vp;
2938 struct fileproc *fp;
2939 proc_t p = vfs_context_proc(ctx);
2940
2941 *vpp = NULLVP;
2942
2943 error = fp_getfvp(p, fd, &fp, &vp);
2944 if (error)
2945 return (error);
2946
2947 error = vnode_getwithref(vp);
2948 if (error) {
2949 (void)fp_drop(p, fd, fp, 0);
2950 return (error);
2951 }
2952
2953 (void)fp_drop(p, fd, fp, 0);
2954 *vpp = vp;
2955 return (error);
2956 }
2957
2958 /*
2959 * Wrapper function around namei to start lookup from a directory
2960 * specified by a file descriptor ni_dirfd.
2961 *
2962 * In addition to all the errors returned by namei, this call can
2963 * return ENOTDIR if the file descriptor does not refer to a directory.
2964 * and EBADF if the file descriptor is not valid.
2965 */
2966 int
2967 nameiat(struct nameidata *ndp, int dirfd)
2968 {
2969 if ((dirfd != AT_FDCWD) &&
2970 !(ndp->ni_flag & NAMEI_CONTLOOKUP) &&
2971 !(ndp->ni_cnd.cn_flags & USEDVP)) {
2972 int error = 0;
2973 char c;
2974
2975 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
2976 error = copyin(ndp->ni_dirp, &c, sizeof(char));
2977 if (error)
2978 return (error);
2979 } else {
2980 c = *((char *)(ndp->ni_dirp));
2981 }
2982
2983 if (c != '/') {
2984 vnode_t dvp_at;
2985
2986 error = vnode_getfromfd(ndp->ni_cnd.cn_context, dirfd,
2987 &dvp_at);
2988 if (error)
2989 return (error);
2990
2991 if (vnode_vtype(dvp_at) != VDIR) {
2992 vnode_put(dvp_at);
2993 return (ENOTDIR);
2994 }
2995
2996 ndp->ni_dvp = dvp_at;
2997 ndp->ni_cnd.cn_flags |= USEDVP;
2998 error = namei(ndp);
2999 ndp->ni_cnd.cn_flags &= ~USEDVP;
3000 vnode_put(dvp_at);
3001 return (error);
3002 }
3003 }
3004
3005 return (namei(ndp));
3006 }
3007
3008 /*
3009 * Change current working directory to a given file descriptor.
3010 */
3011 /* ARGSUSED */
3012 static int
3013 common_fchdir(proc_t p, struct fchdir_args *uap, int per_thread)
3014 {
3015 struct filedesc *fdp = p->p_fd;
3016 vnode_t vp;
3017 vnode_t tdp;
3018 vnode_t tvp;
3019 struct mount *mp;
3020 int error;
3021 vfs_context_t ctx = vfs_context_current();
3022
3023 AUDIT_ARG(fd, uap->fd);
3024 if (per_thread && uap->fd == -1) {
3025 /*
3026 * Switching back from per-thread to per process CWD; verify we
3027 * in fact have one before proceeding. The only success case
3028 * for this code path is to return 0 preemptively after zapping
3029 * the thread structure contents.
3030 */
3031 thread_t th = vfs_context_thread(ctx);
3032 if (th) {
3033 uthread_t uth = get_bsdthread_info(th);
3034 tvp = uth->uu_cdir;
3035 uth->uu_cdir = NULLVP;
3036 if (tvp != NULLVP) {
3037 vnode_rele(tvp);
3038 return (0);
3039 }
3040 }
3041 return (EBADF);
3042 }
3043
3044 if ( (error = file_vnode(uap->fd, &vp)) )
3045 return(error);
3046 if ( (error = vnode_getwithref(vp)) ) {
3047 file_drop(uap->fd);
3048 return(error);
3049 }
3050
3051 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
3052
3053 if (vp->v_type != VDIR) {
3054 error = ENOTDIR;
3055 goto out;
3056 }
3057
3058 #if CONFIG_MACF
3059 error = mac_vnode_check_chdir(ctx, vp);
3060 if (error)
3061 goto out;
3062 #endif
3063 error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
3064 if (error)
3065 goto out;
3066
3067 while (!error && (mp = vp->v_mountedhere) != NULL) {
3068 if (vfs_busy(mp, LK_NOWAIT)) {
3069 error = EACCES;
3070 goto out;
3071 }
3072 error = VFS_ROOT(mp, &tdp, ctx);
3073 vfs_unbusy(mp);
3074 if (error)
3075 break;
3076 vnode_put(vp);
3077 vp = tdp;
3078 }
3079 if (error)
3080 goto out;
3081 if ( (error = vnode_ref(vp)) )
3082 goto out;
3083 vnode_put(vp);
3084
3085 if (per_thread) {
3086 thread_t th = vfs_context_thread(ctx);
3087 if (th) {
3088 uthread_t uth = get_bsdthread_info(th);
3089 tvp = uth->uu_cdir;
3090 uth->uu_cdir = vp;
3091 OSBitOrAtomic(P_THCWD, &p->p_flag);
3092 } else {
3093 vnode_rele(vp);
3094 return (ENOENT);
3095 }
3096 } else {
3097 proc_fdlock(p);
3098 tvp = fdp->fd_cdir;
3099 fdp->fd_cdir = vp;
3100 proc_fdunlock(p);
3101 }
3102
3103 if (tvp)
3104 vnode_rele(tvp);
3105 file_drop(uap->fd);
3106
3107 return (0);
3108 out:
3109 vnode_put(vp);
3110 file_drop(uap->fd);
3111
3112 return(error);
3113 }
3114
3115 int
3116 fchdir(proc_t p, struct fchdir_args *uap, __unused int32_t *retval)
3117 {
3118 return common_fchdir(p, uap, 0);
3119 }
3120
3121 int
3122 __pthread_fchdir(proc_t p, struct __pthread_fchdir_args *uap, __unused int32_t *retval)
3123 {
3124 return common_fchdir(p, (void *)uap, 1);
3125 }
3126
3127 /*
3128 * Change current working directory (".").
3129 *
3130 * Returns: 0 Success
3131 * change_dir:ENOTDIR
3132 * change_dir:???
3133 * vnode_ref:ENOENT No such file or directory
3134 */
3135 /* ARGSUSED */
3136 static int
3137 common_chdir(proc_t p, struct chdir_args *uap, int per_thread)
3138 {
3139 struct filedesc *fdp = p->p_fd;
3140 int error;
3141 struct nameidata nd;
3142 vnode_t tvp;
3143 vfs_context_t ctx = vfs_context_current();
3144
3145 NDINIT(&nd, LOOKUP, OP_CHDIR, FOLLOW | AUDITVNPATH1,
3146 UIO_USERSPACE, uap->path, ctx);
3147 error = change_dir(&nd, ctx);
3148 if (error)
3149 return (error);
3150 if ( (error = vnode_ref(nd.ni_vp)) ) {
3151 vnode_put(nd.ni_vp);
3152 return (error);
3153 }
3154 /*
3155 * drop the iocount we picked up in change_dir
3156 */
3157 vnode_put(nd.ni_vp);
3158
3159 if (per_thread) {
3160 thread_t th = vfs_context_thread(ctx);
3161 if (th) {
3162 uthread_t uth = get_bsdthread_info(th);
3163 tvp = uth->uu_cdir;
3164 uth->uu_cdir = nd.ni_vp;
3165 OSBitOrAtomic(P_THCWD, &p->p_flag);
3166 } else {
3167 vnode_rele(nd.ni_vp);
3168 return (ENOENT);
3169 }
3170 } else {
3171 proc_fdlock(p);
3172 tvp = fdp->fd_cdir;
3173 fdp->fd_cdir = nd.ni_vp;
3174 proc_fdunlock(p);
3175 }
3176
3177 if (tvp)
3178 vnode_rele(tvp);
3179
3180 return (0);
3181 }
3182
3183
3184 /*
3185 * chdir
3186 *
3187 * Change current working directory (".") for the entire process
3188 *
3189 * Parameters: p Process requesting the call
3190 * uap User argument descriptor (see below)
3191 * retval (ignored)
3192 *
3193 * Indirect parameters: uap->path Directory path
3194 *
3195 * Returns: 0 Success
3196 * common_chdir: ENOTDIR
3197 * common_chdir: ENOENT No such file or directory
3198 * common_chdir: ???
3199 *
3200 */
3201 int
3202 chdir(proc_t p, struct chdir_args *uap, __unused int32_t *retval)
3203 {
3204 return common_chdir(p, (void *)uap, 0);
3205 }
3206
3207 /*
3208 * __pthread_chdir
3209 *
3210 * Change current working directory (".") for a single thread
3211 *
3212 * Parameters: p Process requesting the call
3213 * uap User argument descriptor (see below)
3214 * retval (ignored)
3215 *
3216 * Indirect parameters: uap->path Directory path
3217 *
3218 * Returns: 0 Success
3219 * common_chdir: ENOTDIR
3220 * common_chdir: ENOENT No such file or directory
3221 * common_chdir: ???
3222 *
3223 */
3224 int
3225 __pthread_chdir(proc_t p, struct __pthread_chdir_args *uap, __unused int32_t *retval)
3226 {
3227 return common_chdir(p, (void *)uap, 1);
3228 }
3229
3230
3231 /*
3232 * Change notion of root (``/'') directory.
3233 */
3234 /* ARGSUSED */
3235 int
3236 chroot(proc_t p, struct chroot_args *uap, __unused int32_t *retval)
3237 {
3238 struct filedesc *fdp = p->p_fd;
3239 int error;
3240 struct nameidata nd;
3241 vnode_t tvp;
3242 vfs_context_t ctx = vfs_context_current();
3243
3244 if ((error = suser(kauth_cred_get(), &p->p_acflag)))
3245 return (error);
3246
3247 NDINIT(&nd, LOOKUP, OP_CHROOT, FOLLOW | AUDITVNPATH1,
3248 UIO_USERSPACE, uap->path, ctx);
3249 error = change_dir(&nd, ctx);
3250 if (error)
3251 return (error);
3252
3253 #if CONFIG_MACF
3254 error = mac_vnode_check_chroot(ctx, nd.ni_vp,
3255 &nd.ni_cnd);
3256 if (error) {
3257 vnode_put(nd.ni_vp);
3258 return (error);
3259 }
3260 #endif
3261
3262 if ( (error = vnode_ref(nd.ni_vp)) ) {
3263 vnode_put(nd.ni_vp);
3264 return (error);
3265 }
3266 vnode_put(nd.ni_vp);
3267
3268 proc_fdlock(p);
3269 tvp = fdp->fd_rdir;
3270 fdp->fd_rdir = nd.ni_vp;
3271 fdp->fd_flags |= FD_CHROOT;
3272 proc_fdunlock(p);
3273
3274 if (tvp != NULL)
3275 vnode_rele(tvp);
3276
3277 return (0);
3278 }
3279
3280 /*
3281 * Common routine for chroot and chdir.
3282 *
3283 * Returns: 0 Success
3284 * ENOTDIR Not a directory
3285 * namei:??? [anything namei can return]
3286 * vnode_authorize:??? [anything vnode_authorize can return]
3287 */
3288 static int
3289 change_dir(struct nameidata *ndp, vfs_context_t ctx)
3290 {
3291 vnode_t vp;
3292 int error;
3293
3294 if ((error = namei(ndp)))
3295 return (error);
3296 nameidone(ndp);
3297 vp = ndp->ni_vp;
3298
3299 if (vp->v_type != VDIR) {
3300 vnode_put(vp);
3301 return (ENOTDIR);
3302 }
3303
3304 #if CONFIG_MACF
3305 error = mac_vnode_check_chdir(ctx, vp);
3306 if (error) {
3307 vnode_put(vp);
3308 return (error);
3309 }
3310 #endif
3311
3312 error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
3313 if (error) {
3314 vnode_put(vp);
3315 return (error);
3316 }
3317
3318 return (error);
3319 }
3320
3321 /*
3322 * Free the vnode data (for directories) associated with the file glob.
3323 */
3324 struct fd_vn_data *
3325 fg_vn_data_alloc(void)
3326 {
3327 struct fd_vn_data *fvdata;
3328
3329 /* Allocate per fd vnode data */
3330 MALLOC(fvdata, struct fd_vn_data *, (sizeof(struct fd_vn_data)),
3331 M_FD_VN_DATA, M_WAITOK | M_ZERO);
3332 lck_mtx_init(&fvdata->fv_lock, fd_vn_lck_grp, fd_vn_lck_attr);
3333 return fvdata;
3334 }
3335
3336 /*
3337 * Free the vnode data (for directories) associated with the file glob.
3338 */
3339 void
3340 fg_vn_data_free(void *fgvndata)
3341 {
3342 struct fd_vn_data *fvdata = (struct fd_vn_data *)fgvndata;
3343
3344 if (fvdata->fv_buf)
3345 FREE(fvdata->fv_buf, M_FD_DIRBUF);
3346 lck_mtx_destroy(&fvdata->fv_lock, fd_vn_lck_grp);
3347 FREE(fvdata, M_FD_VN_DATA);
3348 }
3349
3350 /*
3351 * Check permissions, allocate an open file structure,
3352 * and call the device open routine if any.
3353 *
3354 * Returns: 0 Success
3355 * EINVAL
3356 * EINTR
3357 * falloc:ENFILE
3358 * falloc:EMFILE
3359 * falloc:ENOMEM
3360 * vn_open_auth:???
3361 * dupfdopen:???
3362 * VNOP_ADVLOCK:???
3363 * vnode_setsize:???
3364 *
3365 * XXX Need to implement uid, gid
3366 */
3367 int
3368 open1(vfs_context_t ctx, struct nameidata *ndp, int uflags,
3369 struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra,
3370 int32_t *retval)
3371 {
3372 proc_t p = vfs_context_proc(ctx);
3373 uthread_t uu = get_bsdthread_info(vfs_context_thread(ctx));
3374 struct fileproc *fp;
3375 vnode_t vp;
3376 int flags, oflags;
3377 int type, indx, error;
3378 struct flock lf;
3379 struct vfs_context context;
3380
3381 oflags = uflags;
3382
3383 if ((oflags & O_ACCMODE) == O_ACCMODE)
3384 return(EINVAL);
3385
3386 flags = FFLAGS(uflags);
3387 CLR(flags, FENCRYPTED);
3388 CLR(flags, FUNENCRYPTED);
3389
3390 AUDIT_ARG(fflags, oflags);
3391 AUDIT_ARG(mode, vap->va_mode);
3392
3393 if ((error = falloc_withalloc(p,
3394 &fp, &indx, ctx, fp_zalloc, cra)) != 0) {
3395 return (error);
3396 }
3397 uu->uu_dupfd = -indx - 1;
3398
3399 if ((error = vn_open_auth(ndp, &flags, vap))) {
3400 if ((error == ENODEV || error == ENXIO) && (uu->uu_dupfd >= 0)){ /* XXX from fdopen */
3401 if ((error = dupfdopen(p->p_fd, indx, uu->uu_dupfd, flags, error)) == 0) {
3402 fp_drop(p, indx, NULL, 0);
3403 *retval = indx;
3404 return (0);
3405 }
3406 }
3407 if (error == ERESTART)
3408 error = EINTR;
3409 fp_free(p, indx, fp);
3410 return (error);
3411 }
3412 uu->uu_dupfd = 0;
3413 vp = ndp->ni_vp;
3414
3415 fp->f_fglob->fg_flag = flags & (FMASK | O_EVTONLY | FENCRYPTED | FUNENCRYPTED);
3416 fp->f_fglob->fg_ops = &vnops;
3417 fp->f_fglob->fg_data = (caddr_t)vp;
3418
3419 if (flags & (O_EXLOCK | O_SHLOCK)) {
3420 lf.l_whence = SEEK_SET;
3421 lf.l_start = 0;
3422 lf.l_len = 0;
3423 if (flags & O_EXLOCK)
3424 lf.l_type = F_WRLCK;
3425 else
3426 lf.l_type = F_RDLCK;
3427 type = F_FLOCK;
3428 if ((flags & FNONBLOCK) == 0)
3429 type |= F_WAIT;
3430 #if CONFIG_MACF
3431 error = mac_file_check_lock(vfs_context_ucred(ctx), fp->f_fglob,
3432 F_SETLK, &lf);
3433 if (error)
3434 goto bad;
3435 #endif
3436 if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx, NULL)))
3437 goto bad;
3438 fp->f_fglob->fg_flag |= FHASLOCK;
3439 }
3440
3441 #if DEVELOPMENT || DEBUG
3442 /*
3443 * XXX VSWAP: Check for entitlements or special flag here
3444 * so we can restrict access appropriately.
3445 */
3446 #else /* DEVELOPMENT || DEBUG */
3447
3448 if (vnode_isswap(vp) && (flags & (FWRITE | O_TRUNC)) && (ctx != vfs_context_kernel())) {
3449 /* block attempt to write/truncate swapfile */
3450 error = EPERM;
3451 goto bad;
3452 }
3453 #endif /* DEVELOPMENT || DEBUG */
3454
3455 /* try to truncate by setting the size attribute */
3456 if ((flags & O_TRUNC) && ((error = vnode_setsize(vp, (off_t)0, 0, ctx)) != 0))
3457 goto bad;
3458
3459 /*
3460 * For directories we hold some additional information in the fd.
3461 */
3462 if (vnode_vtype(vp) == VDIR) {
3463 fp->f_fglob->fg_vn_data = fg_vn_data_alloc();
3464 } else {
3465 fp->f_fglob->fg_vn_data = NULL;
3466 }
3467
3468 vnode_put(vp);
3469
3470 /*
3471 * The first terminal open (without a O_NOCTTY) by a session leader
3472 * results in it being set as the controlling terminal.
3473 */
3474 if (vnode_istty(vp) && !(p->p_flag & P_CONTROLT) &&
3475 !(flags & O_NOCTTY)) {
3476 int tmp = 0;
3477
3478 (void)(*fp->f_fglob->fg_ops->fo_ioctl)(fp, (int)TIOCSCTTY,
3479 (caddr_t)&tmp, ctx);
3480 }
3481
3482 proc_fdlock(p);
3483 if (flags & O_CLOEXEC)
3484 *fdflags(p, indx) |= UF_EXCLOSE;
3485 if (flags & O_CLOFORK)
3486 *fdflags(p, indx) |= UF_FORKCLOSE;
3487 procfdtbl_releasefd(p, indx, NULL);
3488
3489 #if CONFIG_SECLUDED_MEMORY
3490 if (secluded_for_filecache &&
3491 FILEGLOB_DTYPE(fp->f_fglob) == DTYPE_VNODE &&
3492 vnode_vtype(vp) == VREG) {
3493 memory_object_control_t moc;
3494
3495 moc = ubc_getobject(vp, UBC_FLAGS_NONE);
3496
3497 if (moc == MEMORY_OBJECT_CONTROL_NULL) {
3498 /* nothing to do... */
3499 } else if (fp->f_fglob->fg_flag & FWRITE) {
3500 /* writable -> no longer eligible for secluded pages */
3501 memory_object_mark_eligible_for_secluded(moc,
3502 FALSE);
3503 } else if (secluded_for_filecache == 1) {
3504 char pathname[32] = { 0, };
3505 size_t copied;
3506 /* XXX FBDP: better way to detect /Applications/ ? */
3507 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
3508 copyinstr(ndp->ni_dirp,
3509 pathname,
3510 sizeof (pathname),
3511 &copied);
3512 } else {
3513 copystr(CAST_DOWN(void *, ndp->ni_dirp),
3514 pathname,
3515 sizeof (pathname),
3516 &copied);
3517 }
3518 pathname[sizeof (pathname) - 1] = '\0';
3519 if (strncmp(pathname,
3520 "/Applications/",
3521 strlen("/Applications/")) == 0 &&
3522 strncmp(pathname,
3523 "/Applications/Camera.app/",
3524 strlen("/Applications/Camera.app/")) != 0) {
3525 /*
3526 * not writable
3527 * AND from "/Applications/"
3528 * AND not from "/Applications/Camera.app/"
3529 * ==> eligible for secluded
3530 */
3531 memory_object_mark_eligible_for_secluded(moc,
3532 TRUE);
3533 }
3534 } else if (secluded_for_filecache == 2) {
3535 /* not implemented... */
3536 if (!strncmp(vp->v_name,
3537 DYLD_SHARED_CACHE_NAME,
3538 strlen(DYLD_SHARED_CACHE_NAME)) ||
3539 !strncmp(vp->v_name,
3540 "dyld",
3541 strlen(vp->v_name)) ||
3542 !strncmp(vp->v_name,
3543 "launchd",
3544 strlen(vp->v_name)) ||
3545 !strncmp(vp->v_name,
3546 "Camera",
3547 strlen(vp->v_name)) ||
3548 !strncmp(vp->v_name,
3549 "mediaserverd",
3550 strlen(vp->v_name))) {
3551 /*
3552 * This file matters when launching Camera:
3553 * do not store its contents in the secluded
3554 * pool that will be drained on Camera launch.
3555 */
3556 memory_object_mark_eligible_for_secluded(moc,
3557 FALSE);
3558 }
3559 }
3560 }
3561 #endif /* CONFIG_SECLUDED_MEMORY */
3562
3563 fp_drop(p, indx, fp, 1);
3564 proc_fdunlock(p);
3565
3566 *retval = indx;
3567
3568 return (0);
3569 bad:
3570 context = *vfs_context_current();
3571 context.vc_ucred = fp->f_fglob->fg_cred;
3572
3573 if ((fp->f_fglob->fg_flag & FHASLOCK) &&
3574 (FILEGLOB_DTYPE(fp->f_fglob) == DTYPE_VNODE)) {
3575 lf.l_whence = SEEK_SET;
3576 lf.l_start = 0;
3577 lf.l_len = 0;
3578 lf.l_type = F_UNLCK;
3579
3580 (void)VNOP_ADVLOCK(
3581 vp, (caddr_t)fp->f_fglob, F_UNLCK, &lf, F_FLOCK, ctx, NULL);
3582 }
3583
3584 vn_close(vp, fp->f_fglob->fg_flag, &context);
3585 vnode_put(vp);
3586 fp_free(p, indx, fp);
3587
3588 return (error);
3589 }
3590
3591 /*
3592 * While most of the *at syscall handlers can call nameiat() which
3593 * is a wrapper around namei, the use of namei and initialisation
3594 * of nameidata are far removed and in different functions - namei
3595 * gets called in vn_open_auth for open1. So we'll just do here what
3596 * nameiat() does.
3597 */
3598 static int
3599 open1at(vfs_context_t ctx, struct nameidata *ndp, int uflags,
3600 struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra, int32_t *retval,
3601 int dirfd)
3602 {
3603 if ((dirfd != AT_FDCWD) && !(ndp->ni_cnd.cn_flags & USEDVP)) {
3604 int error;
3605 char c;
3606
3607 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
3608 error = copyin(ndp->ni_dirp, &c, sizeof(char));
3609 if (error)
3610 return (error);
3611 } else {
3612 c = *((char *)(ndp->ni_dirp));
3613 }
3614
3615 if (c != '/') {
3616 vnode_t dvp_at;
3617
3618 error = vnode_getfromfd(ndp->ni_cnd.cn_context, dirfd,
3619 &dvp_at);
3620 if (error)
3621 return (error);
3622
3623 if (vnode_vtype(dvp_at) != VDIR) {
3624 vnode_put(dvp_at);
3625 return (ENOTDIR);
3626 }
3627
3628 ndp->ni_dvp = dvp_at;
3629 ndp->ni_cnd.cn_flags |= USEDVP;
3630 error = open1(ctx, ndp, uflags, vap, fp_zalloc, cra,
3631 retval);
3632 vnode_put(dvp_at);
3633 return (error);
3634 }
3635 }
3636
3637 return (open1(ctx, ndp, uflags, vap, fp_zalloc, cra, retval));
3638 }
3639
3640 /*
3641 * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)).
3642 *
3643 * Parameters: p Process requesting the open
3644 * uap User argument descriptor (see below)
3645 * retval Pointer to an area to receive the
3646 * return calue from the system call
3647 *
3648 * Indirect: uap->path Path to open (same as 'open')
3649 * uap->flags Flags to open (same as 'open'
3650 * uap->uid UID to set, if creating
3651 * uap->gid GID to set, if creating
3652 * uap->mode File mode, if creating (same as 'open')
3653 * uap->xsecurity ACL to set, if creating
3654 *
3655 * Returns: 0 Success
3656 * !0 errno value
3657 *
3658 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
3659 *
3660 * XXX: We should enummerate the possible errno values here, and where
3661 * in the code they originated.
3662 */
3663 int
3664 open_extended(proc_t p, struct open_extended_args *uap, int32_t *retval)
3665 {
3666 struct filedesc *fdp = p->p_fd;
3667 int ciferror;
3668 kauth_filesec_t xsecdst;
3669 struct vnode_attr va;
3670 struct nameidata nd;
3671 int cmode;
3672
3673 AUDIT_ARG(owner, uap->uid, uap->gid);
3674
3675 xsecdst = NULL;
3676 if ((uap->xsecurity != USER_ADDR_NULL) &&
3677 ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
3678 return ciferror;
3679
3680 VATTR_INIT(&va);
3681 cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3682 VATTR_SET(&va, va_mode, cmode);
3683 if (uap->uid != KAUTH_UID_NONE)
3684 VATTR_SET(&va, va_uid, uap->uid);
3685 if (uap->gid != KAUTH_GID_NONE)
3686 VATTR_SET(&va, va_gid, uap->gid);
3687 if (xsecdst != NULL)
3688 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
3689
3690 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
3691 uap->path, vfs_context_current());
3692
3693 ciferror = open1(vfs_context_current(), &nd, uap->flags, &va,
3694 fileproc_alloc_init, NULL, retval);
3695 if (xsecdst != NULL)
3696 kauth_filesec_free(xsecdst);
3697
3698 return ciferror;
3699 }
3700
3701 /*
3702 * Go through the data-protected atomically controlled open (2)
3703 *
3704 * int open_dprotected_np(user_addr_t path, int flags, int class, int dpflags, int mode)
3705 */
3706 int open_dprotected_np (__unused proc_t p, struct open_dprotected_np_args *uap, int32_t *retval) {
3707 int flags = uap->flags;
3708 int class = uap->class;
3709 int dpflags = uap->dpflags;
3710
3711 /*
3712 * Follow the same path as normal open(2)
3713 * Look up the item if it exists, and acquire the vnode.
3714 */
3715 struct filedesc *fdp = p->p_fd;
3716 struct vnode_attr va;
3717 struct nameidata nd;
3718 int cmode;
3719 int error;
3720
3721 VATTR_INIT(&va);
3722 /* Mask off all but regular access permissions */
3723 cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3724 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
3725
3726 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
3727 uap->path, vfs_context_current());
3728
3729 /*
3730 * Initialize the extra fields in vnode_attr to pass down our
3731 * extra fields.
3732 * 1. target cprotect class.
3733 * 2. set a flag to mark it as requiring open-raw-encrypted semantics.
3734 */
3735 if (flags & O_CREAT) {
3736 /* lower level kernel code validates that the class is valid before applying it. */
3737 if (class != PROTECTION_CLASS_DEFAULT) {
3738 /*
3739 * PROTECTION_CLASS_DEFAULT implies that we make the class for this
3740 * file behave the same as open (2)
3741 */
3742 VATTR_SET(&va, va_dataprotect_class, class);
3743 }
3744 }
3745
3746 if (dpflags & (O_DP_GETRAWENCRYPTED|O_DP_GETRAWUNENCRYPTED)) {
3747 if ( flags & (O_RDWR | O_WRONLY)) {
3748 /* Not allowed to write raw encrypted bytes */
3749 return EINVAL;
3750 }
3751 if (uap->dpflags & O_DP_GETRAWENCRYPTED) {
3752 VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED);
3753 }
3754 if (uap->dpflags & O_DP_GETRAWUNENCRYPTED) {
3755 VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWUNENCRYPTED);
3756 }
3757 }
3758
3759 error = open1(vfs_context_current(), &nd, uap->flags, &va,
3760 fileproc_alloc_init, NULL, retval);
3761
3762 return error;
3763 }
3764
3765 static int
3766 openat_internal(vfs_context_t ctx, user_addr_t path, int flags, int mode,
3767 int fd, enum uio_seg segflg, int *retval)
3768 {
3769 struct filedesc *fdp = (vfs_context_proc(ctx))->p_fd;
3770 struct vnode_attr va;
3771 struct nameidata nd;
3772 int cmode;
3773
3774 VATTR_INIT(&va);
3775 /* Mask off all but regular access permissions */
3776 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3777 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
3778
3779 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1,
3780 segflg, path, ctx);
3781
3782 return (open1at(ctx, &nd, flags, &va, fileproc_alloc_init, NULL,
3783 retval, fd));
3784 }
3785
3786 int
3787 open(proc_t p, struct open_args *uap, int32_t *retval)
3788 {
3789 __pthread_testcancel(1);
3790 return(open_nocancel(p, (struct open_nocancel_args *)uap, retval));
3791 }
3792
3793 int
3794 open_nocancel(__unused proc_t p, struct open_nocancel_args *uap,
3795 int32_t *retval)
3796 {
3797 return (openat_internal(vfs_context_current(), uap->path, uap->flags,
3798 uap->mode, AT_FDCWD, UIO_USERSPACE, retval));
3799 }
3800
3801 int
3802 openat_nocancel(__unused proc_t p, struct openat_nocancel_args *uap,
3803 int32_t *retval)
3804 {
3805 return (openat_internal(vfs_context_current(), uap->path, uap->flags,
3806 uap->mode, uap->fd, UIO_USERSPACE, retval));
3807 }
3808
3809 int
3810 openat(proc_t p, struct openat_args *uap, int32_t *retval)
3811 {
3812 __pthread_testcancel(1);
3813 return(openat_nocancel(p, (struct openat_nocancel_args *)uap, retval));
3814 }
3815
3816 /*
3817 * openbyid_np: open a file given a file system id and a file system object id
3818 * the hfs file system object id is an fsobj_id_t {uint32, uint32}
3819 * file systems that don't support object ids it is a node id (uint64_t).
3820 *
3821 * Parameters: p Process requesting the open
3822 * uap User argument descriptor (see below)
3823 * retval Pointer to an area to receive the
3824 * return calue from the system call
3825 *
3826 * Indirect: uap->path Path to open (same as 'open')
3827 *
3828 * uap->fsid id of target file system
3829 * uap->objid id of target file system object
3830 * uap->flags Flags to open (same as 'open')
3831 *
3832 * Returns: 0 Success
3833 * !0 errno value
3834 *
3835 *
3836 * XXX: We should enummerate the possible errno values here, and where
3837 * in the code they originated.
3838 */
3839 int
3840 openbyid_np(__unused proc_t p, struct openbyid_np_args *uap, int *retval)
3841 {
3842 fsid_t fsid;
3843 uint64_t objid;
3844 int error;
3845 char *buf = NULL;
3846 int buflen = MAXPATHLEN;
3847 int pathlen = 0;
3848 vfs_context_t ctx = vfs_context_current();
3849
3850 if ((error = priv_check_cred(vfs_context_ucred(ctx), PRIV_VFS_OPEN_BY_ID, 0))) {
3851 return (error);
3852 }
3853
3854 if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
3855 return (error);
3856 }
3857
3858 /*uap->obj is an fsobj_id_t defined as struct {uint32_t, uint32_t} */
3859 if ((error = copyin(uap->objid, (caddr_t)&objid, sizeof(uint64_t)))) {
3860 return (error);
3861 }
3862
3863 AUDIT_ARG(value32, fsid.val[0]);
3864 AUDIT_ARG(value64, objid);
3865
3866 /*resolve path from fsis, objid*/
3867 do {
3868 MALLOC(buf, char *, buflen + 1, M_TEMP, M_WAITOK);
3869 if (buf == NULL) {
3870 return (ENOMEM);
3871 }
3872
3873 error = fsgetpath_internal(
3874 ctx, fsid.val[0], objid,
3875 buflen, buf, &pathlen);
3876
3877 if (error) {
3878 FREE(buf, M_TEMP);
3879 buf = NULL;
3880 }
3881 } while (error == ENOSPC && (buflen += MAXPATHLEN));
3882
3883 if (error) {
3884 return error;
3885 }
3886
3887 buf[pathlen] = 0;
3888
3889 error = openat_internal(
3890 ctx, (user_addr_t)buf, uap->oflags, 0, AT_FDCWD, UIO_SYSSPACE, retval);
3891
3892 FREE(buf, M_TEMP);
3893
3894 return error;
3895 }
3896
3897
3898 /*
3899 * Create a special file.
3900 */
3901 static int mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap);
3902
3903 int
3904 mknod(proc_t p, struct mknod_args *uap, __unused int32_t *retval)
3905 {
3906 struct vnode_attr va;
3907 vfs_context_t ctx = vfs_context_current();
3908 int error;
3909 struct nameidata nd;
3910 vnode_t vp, dvp;
3911
3912 VATTR_INIT(&va);
3913 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
3914 VATTR_SET(&va, va_rdev, uap->dev);
3915
3916 /* If it's a mknod() of a FIFO, call mkfifo1() instead */
3917 if ((uap->mode & S_IFMT) == S_IFIFO)
3918 return(mkfifo1(ctx, uap->path, &va));
3919
3920 AUDIT_ARG(mode, uap->mode);
3921 AUDIT_ARG(value32, uap->dev);
3922
3923 if ((error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
3924 return (error);
3925 NDINIT(&nd, CREATE, OP_MKNOD, LOCKPARENT | AUDITVNPATH1,
3926 UIO_USERSPACE, uap->path, ctx);
3927 error = namei(&nd);
3928 if (error)
3929 return (error);
3930 dvp = nd.ni_dvp;
3931 vp = nd.ni_vp;
3932
3933 if (vp != NULL) {
3934 error = EEXIST;
3935 goto out;
3936 }
3937
3938 switch (uap->mode & S_IFMT) {
3939 case S_IFCHR:
3940 VATTR_SET(&va, va_type, VCHR);
3941 break;
3942 case S_IFBLK:
3943 VATTR_SET(&va, va_type, VBLK);
3944 break;
3945 default:
3946 error = EINVAL;
3947 goto out;
3948 }
3949
3950 #if CONFIG_MACF
3951 error = mac_vnode_check_create(ctx,
3952 nd.ni_dvp, &nd.ni_cnd, &va);
3953 if (error)
3954 goto out;
3955 #endif
3956
3957 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
3958 goto out;
3959
3960 if ((error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx)) != 0)
3961 goto out;
3962
3963 if (vp) {
3964 int update_flags = 0;
3965
3966 // Make sure the name & parent pointers are hooked up
3967 if (vp->v_name == NULL)
3968 update_flags |= VNODE_UPDATE_NAME;
3969 if (vp->v_parent == NULLVP)
3970 update_flags |= VNODE_UPDATE_PARENT;
3971
3972 if (update_flags)
3973 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
3974
3975 #if CONFIG_FSE
3976 add_fsevent(FSE_CREATE_FILE, ctx,
3977 FSE_ARG_VNODE, vp,
3978 FSE_ARG_DONE);
3979 #endif
3980 }
3981
3982 out:
3983 /*
3984 * nameidone has to happen before we vnode_put(dvp)
3985 * since it may need to release the fs_nodelock on the dvp
3986 */
3987 nameidone(&nd);
3988
3989 if (vp)
3990 vnode_put(vp);
3991 vnode_put(dvp);
3992
3993 return (error);
3994 }
3995
3996 /*
3997 * Create a named pipe.
3998 *
3999 * Returns: 0 Success
4000 * EEXIST
4001 * namei:???
4002 * vnode_authorize:???
4003 * vn_create:???
4004 */
4005 static int
4006 mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap)
4007 {
4008 vnode_t vp, dvp;
4009 int error;
4010 struct nameidata nd;
4011
4012 NDINIT(&nd, CREATE, OP_MKFIFO, LOCKPARENT | AUDITVNPATH1,
4013 UIO_USERSPACE, upath, ctx);
4014 error = namei(&nd);
4015 if (error)
4016 return (error);
4017 dvp = nd.ni_dvp;
4018 vp = nd.ni_vp;
4019
4020 /* check that this is a new file and authorize addition */
4021 if (vp != NULL) {
4022 error = EEXIST;
4023 goto out;
4024 }
4025 VATTR_SET(vap, va_type, VFIFO);
4026
4027 if ((error = vn_authorize_create(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0)
4028 goto out;
4029
4030 error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx);
4031 out:
4032 /*
4033 * nameidone has to happen before we vnode_put(dvp)
4034 * since it may need to release the fs_nodelock on the dvp
4035 */
4036 nameidone(&nd);
4037
4038 if (vp)
4039 vnode_put(vp);
4040 vnode_put(dvp);
4041
4042 return error;
4043 }
4044
4045
4046 /*
4047 * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)).
4048 *
4049 * Parameters: p Process requesting the open
4050 * uap User argument descriptor (see below)
4051 * retval (Ignored)
4052 *
4053 * Indirect: uap->path Path to fifo (same as 'mkfifo')
4054 * uap->uid UID to set
4055 * uap->gid GID to set
4056 * uap->mode File mode to set (same as 'mkfifo')
4057 * uap->xsecurity ACL to set, if creating
4058 *
4059 * Returns: 0 Success
4060 * !0 errno value
4061 *
4062 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
4063 *
4064 * XXX: We should enummerate the possible errno values here, and where
4065 * in the code they originated.
4066 */
4067 int
4068 mkfifo_extended(proc_t p, struct mkfifo_extended_args *uap, __unused int32_t *retval)
4069 {
4070 int ciferror;
4071 kauth_filesec_t xsecdst;
4072 struct vnode_attr va;
4073
4074 AUDIT_ARG(owner, uap->uid, uap->gid);
4075
4076 xsecdst = KAUTH_FILESEC_NONE;
4077 if (uap->xsecurity != USER_ADDR_NULL) {
4078 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
4079 return ciferror;
4080 }
4081
4082 VATTR_INIT(&va);
4083 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
4084 if (uap->uid != KAUTH_UID_NONE)
4085 VATTR_SET(&va, va_uid, uap->uid);
4086 if (uap->gid != KAUTH_GID_NONE)
4087 VATTR_SET(&va, va_gid, uap->gid);
4088 if (xsecdst != KAUTH_FILESEC_NONE)
4089 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
4090
4091 ciferror = mkfifo1(vfs_context_current(), uap->path, &va);
4092
4093 if (xsecdst != KAUTH_FILESEC_NONE)
4094 kauth_filesec_free(xsecdst);
4095 return ciferror;
4096 }
4097
4098 /* ARGSUSED */
4099 int
4100 mkfifo(proc_t p, struct mkfifo_args *uap, __unused int32_t *retval)
4101 {
4102 struct vnode_attr va;
4103
4104 VATTR_INIT(&va);
4105 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
4106
4107 return(mkfifo1(vfs_context_current(), uap->path, &va));
4108 }
4109
4110
4111 static char *
4112 my_strrchr(char *p, int ch)
4113 {
4114 char *save;
4115
4116 for (save = NULL;; ++p) {
4117 if (*p == ch)
4118 save = p;
4119 if (!*p)
4120 return(save);
4121 }
4122 /* NOTREACHED */
4123 }
4124
4125 extern int safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path);
4126
4127 int
4128 safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path)
4129 {
4130 int ret, len = _len;
4131
4132 *truncated_path = 0;
4133 ret = vn_getpath(dvp, path, &len);
4134 if (ret == 0 && len < (MAXPATHLEN - 1)) {
4135 if (leafname) {
4136 path[len-1] = '/';
4137 len += strlcpy(&path[len], leafname, MAXPATHLEN-len) + 1;
4138 if (len > MAXPATHLEN) {
4139 char *ptr;
4140
4141 // the string got truncated!
4142 *truncated_path = 1;
4143 ptr = my_strrchr(path, '/');
4144 if (ptr) {
4145 *ptr = '\0'; // chop off the string at the last directory component
4146 }
4147 len = strlen(path) + 1;
4148 }
4149 }
4150 } else if (ret == 0) {
4151 *truncated_path = 1;
4152 } else if (ret != 0) {
4153 struct vnode *mydvp=dvp;
4154
4155 if (ret != ENOSPC) {
4156 printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n",
4157 dvp, dvp->v_name ? dvp->v_name : "no-name", ret);
4158 }
4159 *truncated_path = 1;
4160
4161 do {
4162 if (mydvp->v_parent != NULL) {
4163 mydvp = mydvp->v_parent;
4164 } else if (mydvp->v_mount) {
4165 strlcpy(path, mydvp->v_mount->mnt_vfsstat.f_mntonname, _len);
4166 break;
4167 } else {
4168 // no parent and no mount point? only thing is to punt and say "/" changed
4169 strlcpy(path, "/", _len);
4170 len = 2;
4171 mydvp = NULL;
4172 }
4173
4174 if (mydvp == NULL) {
4175 break;
4176 }
4177
4178 len = _len;
4179 ret = vn_getpath(mydvp, path, &len);
4180 } while (ret == ENOSPC);
4181 }
4182
4183 return len;
4184 }
4185
4186
4187 /*
4188 * Make a hard file link.
4189 *
4190 * Returns: 0 Success
4191 * EPERM
4192 * EEXIST
4193 * EXDEV
4194 * namei:???
4195 * vnode_authorize:???
4196 * VNOP_LINK:???
4197 */
4198 /* ARGSUSED */
4199 static int
4200 linkat_internal(vfs_context_t ctx, int fd1, user_addr_t path, int fd2,
4201 user_addr_t link, int flag, enum uio_seg segflg)
4202 {
4203 vnode_t vp, dvp, lvp;
4204 struct nameidata nd;
4205 int follow;
4206 int error;
4207 #if CONFIG_FSE
4208 fse_info finfo;
4209 #endif
4210 int need_event, has_listeners;
4211 char *target_path = NULL;
4212 int truncated=0;
4213
4214 vp = dvp = lvp = NULLVP;
4215
4216 /* look up the object we are linking to */
4217 follow = (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW;
4218 NDINIT(&nd, LOOKUP, OP_LOOKUP, AUDITVNPATH1 | follow,
4219 segflg, path, ctx);
4220
4221 error = nameiat(&nd, fd1);
4222 if (error)
4223 return (error);
4224 vp = nd.ni_vp;
4225
4226 nameidone(&nd);
4227
4228 /*
4229 * Normally, linking to directories is not supported.
4230 * However, some file systems may have limited support.
4231 */
4232 if (vp->v_type == VDIR) {
4233 if (!ISSET(vp->v_mount->mnt_kern_flag, MNTK_DIR_HARDLINKS)) {
4234 error = EPERM; /* POSIX */
4235 goto out;
4236 }
4237
4238 /* Linking to a directory requires ownership. */
4239 if (!kauth_cred_issuser(vfs_context_ucred(ctx))) {
4240 struct vnode_attr dva;
4241
4242 VATTR_INIT(&dva);
4243 VATTR_WANTED(&dva, va_uid);
4244 if (vnode_getattr(vp, &dva, ctx) != 0 ||
4245 !VATTR_IS_SUPPORTED(&dva, va_uid) ||
4246 (dva.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)))) {
4247 error = EACCES;
4248 goto out;
4249 }
4250 }
4251 }
4252
4253 /* lookup the target node */
4254 #if CONFIG_TRIGGERS
4255 nd.ni_op = OP_LINK;
4256 #endif
4257 nd.ni_cnd.cn_nameiop = CREATE;
4258 nd.ni_cnd.cn_flags = LOCKPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK;
4259 nd.ni_dirp = link;
4260 error = nameiat(&nd, fd2);
4261 if (error != 0)
4262 goto out;
4263 dvp = nd.ni_dvp;
4264 lvp = nd.ni_vp;
4265
4266 #if CONFIG_MACF
4267 if ((error = mac_vnode_check_link(ctx, dvp, vp, &nd.ni_cnd)) != 0)
4268 goto out2;
4269 #endif
4270
4271 /* or to anything that kauth doesn't want us to (eg. immutable items) */
4272 if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_LINKTARGET, ctx)) != 0)
4273 goto out2;
4274
4275 /* target node must not exist */
4276 if (lvp != NULLVP) {
4277 error = EEXIST;
4278 goto out2;
4279 }
4280 /* cannot link across mountpoints */
4281 if (vnode_mount(vp) != vnode_mount(dvp)) {
4282 error = EXDEV;
4283 goto out2;
4284 }
4285
4286 /* authorize creation of the target note */
4287 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
4288 goto out2;
4289
4290 /* and finally make the link */
4291 error = VNOP_LINK(vp, dvp, &nd.ni_cnd, ctx);
4292 if (error)
4293 goto out2;
4294
4295 #if CONFIG_MACF
4296 (void)mac_vnode_notify_link(ctx, vp, dvp, &nd.ni_cnd);
4297 #endif
4298
4299 #if CONFIG_FSE
4300 need_event = need_fsevent(FSE_CREATE_FILE, dvp);
4301 #else
4302 need_event = 0;
4303 #endif
4304 has_listeners = kauth_authorize_fileop_has_listeners();
4305
4306 if (need_event || has_listeners) {
4307 char *link_to_path = NULL;
4308 int len, link_name_len;
4309
4310 /* build the path to the new link file */
4311 GET_PATH(target_path);
4312 if (target_path == NULL) {
4313 error = ENOMEM;
4314 goto out2;
4315 }
4316
4317 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, target_path, MAXPATHLEN, &truncated);
4318
4319 if (has_listeners) {
4320 /* build the path to file we are linking to */
4321 GET_PATH(link_to_path);
4322 if (link_to_path == NULL) {
4323 error = ENOMEM;
4324 goto out2;
4325 }
4326
4327 link_name_len = MAXPATHLEN;
4328 if (vn_getpath(vp, link_to_path, &link_name_len) == 0) {
4329 /*
4330 * Call out to allow 3rd party notification of rename.
4331 * Ignore result of kauth_authorize_fileop call.
4332 */
4333 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_LINK,
4334 (uintptr_t)link_to_path,
4335 (uintptr_t)target_path);
4336 }
4337 if (link_to_path != NULL) {
4338 RELEASE_PATH(link_to_path);
4339 }
4340 }
4341 #if CONFIG_FSE
4342 if (need_event) {
4343 /* construct fsevent */
4344 if (get_fse_info(vp, &finfo, ctx) == 0) {
4345 if (truncated) {
4346 finfo.mode |= FSE_TRUNCATED_PATH;
4347 }
4348
4349 // build the path to the destination of the link
4350 add_fsevent(FSE_CREATE_FILE, ctx,
4351 FSE_ARG_STRING, len, target_path,
4352 FSE_ARG_FINFO, &finfo,
4353 FSE_ARG_DONE);
4354 }
4355 if (vp->v_parent) {
4356 add_fsevent(FSE_STAT_CHANGED, ctx,
4357 FSE_ARG_VNODE, vp->v_parent,
4358 FSE_ARG_DONE);
4359 }
4360 }
4361 #endif
4362 }
4363 out2:
4364 /*
4365 * nameidone has to happen before we vnode_put(dvp)
4366 * since it may need to release the fs_nodelock on the dvp
4367 */
4368 nameidone(&nd);
4369 if (target_path != NULL) {
4370 RELEASE_PATH(target_path);
4371 }
4372 out:
4373 if (lvp)
4374 vnode_put(lvp);
4375 if (dvp)
4376 vnode_put(dvp);
4377 vnode_put(vp);
4378 return (error);
4379 }
4380
4381 int
4382 link(__unused proc_t p, struct link_args *uap, __unused int32_t *retval)
4383 {
4384 return (linkat_internal(vfs_context_current(), AT_FDCWD, uap->path,
4385 AT_FDCWD, uap->link, AT_SYMLINK_FOLLOW, UIO_USERSPACE));
4386 }
4387
4388 int
4389 linkat(__unused proc_t p, struct linkat_args *uap, __unused int32_t *retval)
4390 {
4391 if (uap->flag & ~AT_SYMLINK_FOLLOW)
4392 return (EINVAL);
4393
4394 return (linkat_internal(vfs_context_current(), uap->fd1, uap->path,
4395 uap->fd2, uap->link, uap->flag, UIO_USERSPACE));
4396 }
4397
4398 /*
4399 * Make a symbolic link.
4400 *
4401 * We could add support for ACLs here too...
4402 */
4403 /* ARGSUSED */
4404 static int
4405 symlinkat_internal(vfs_context_t ctx, user_addr_t path_data, int fd,
4406 user_addr_t link, enum uio_seg segflg)
4407 {
4408 struct vnode_attr va;
4409 char *path;
4410 int error;
4411 struct nameidata nd;
4412 vnode_t vp, dvp;
4413 size_t dummy=0;
4414 proc_t p;
4415
4416 error = 0;
4417 if (UIO_SEG_IS_USER_SPACE(segflg)) {
4418 MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
4419 error = copyinstr(path_data, path, MAXPATHLEN, &dummy);
4420 } else {
4421 path = (char *)path_data;
4422 }
4423 if (error)
4424 goto out;
4425 AUDIT_ARG(text, path); /* This is the link string */
4426
4427 NDINIT(&nd, CREATE, OP_SYMLINK, LOCKPARENT | AUDITVNPATH1,
4428 segflg, link, ctx);
4429
4430 error = nameiat(&nd, fd);
4431 if (error)
4432 goto out;
4433 dvp = nd.ni_dvp;
4434 vp = nd.ni_vp;
4435
4436 p = vfs_context_proc(ctx);
4437 VATTR_INIT(&va);
4438 VATTR_SET(&va, va_type, VLNK);
4439 VATTR_SET(&va, va_mode, ACCESSPERMS & ~p->p_fd->fd_cmask);
4440
4441 #if CONFIG_MACF
4442 error = mac_vnode_check_create(ctx,
4443 dvp, &nd.ni_cnd, &va);
4444 #endif
4445 if (error != 0) {
4446 goto skipit;
4447 }
4448
4449 if (vp != NULL) {
4450 error = EEXIST;
4451 goto skipit;
4452 }
4453
4454 /* authorize */
4455 if (error == 0)
4456 error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
4457 /* get default ownership, etc. */
4458 if (error == 0)
4459 error = vnode_authattr_new(dvp, &va, 0, ctx);
4460 if (error == 0)
4461 error = VNOP_SYMLINK(dvp, &vp, &nd.ni_cnd, &va, path, ctx);
4462
4463 #if CONFIG_MACF
4464 if (error == 0 && vp)
4465 error = vnode_label(vnode_mount(vp), dvp, vp, &nd.ni_cnd, VNODE_LABEL_CREATE, ctx);
4466 #endif
4467
4468 /* do fallback attribute handling */
4469 if (error == 0 && vp)
4470 error = vnode_setattr_fallback(vp, &va, ctx);
4471
4472 if (error == 0) {
4473 int update_flags = 0;
4474
4475 /*check if a new vnode was created, else try to get one*/
4476 if (vp == NULL) {
4477 nd.ni_cnd.cn_nameiop = LOOKUP;
4478 #if CONFIG_TRIGGERS
4479 nd.ni_op = OP_LOOKUP;
4480 #endif
4481 nd.ni_cnd.cn_flags = 0;
4482 error = nameiat(&nd, fd);
4483 vp = nd.ni_vp;
4484
4485 if (vp == NULL)
4486 goto skipit;
4487 }
4488
4489 #if 0 /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
4490 /* call out to allow 3rd party notification of rename.
4491 * Ignore result of kauth_authorize_fileop call.
4492 */
4493 if (kauth_authorize_fileop_has_listeners() &&
4494 namei(&nd) == 0) {
4495 char *new_link_path = NULL;
4496 int len;
4497
4498 /* build the path to the new link file */
4499 new_link_path = get_pathbuff();
4500 len = MAXPATHLEN;
4501 vn_getpath(dvp, new_link_path, &len);
4502 if ((len + 1 + nd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) {
4503 new_link_path[len - 1] = '/';
4504 strlcpy(&new_link_path[len], nd.ni_cnd.cn_nameptr, MAXPATHLEN-len);
4505 }
4506
4507 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_SYMLINK,
4508 (uintptr_t)path, (uintptr_t)new_link_path);
4509 if (new_link_path != NULL)
4510 release_pathbuff(new_link_path);
4511 }
4512 #endif
4513 // Make sure the name & parent pointers are hooked up
4514 if (vp->v_name == NULL)
4515 update_flags |= VNODE_UPDATE_NAME;
4516 if (vp->v_parent == NULLVP)
4517 update_flags |= VNODE_UPDATE_PARENT;
4518
4519 if (update_flags)
4520 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
4521
4522 #if CONFIG_FSE
4523 add_fsevent(FSE_CREATE_FILE, ctx,
4524 FSE_ARG_VNODE, vp,
4525 FSE_ARG_DONE);
4526 #endif
4527 }
4528
4529 skipit:
4530 /*
4531 * nameidone has to happen before we vnode_put(dvp)
4532 * since it may need to release the fs_nodelock on the dvp
4533 */
4534 nameidone(&nd);
4535
4536 if (vp)
4537 vnode_put(vp);
4538 vnode_put(dvp);
4539 out:
4540 if (path && (path != (char *)path_data))
4541 FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
4542
4543 return (error);
4544 }
4545
4546 int
4547 symlink(__unused proc_t p, struct symlink_args *uap, __unused int32_t *retval)
4548 {
4549 return (symlinkat_internal(vfs_context_current(), uap->path, AT_FDCWD,
4550 uap->link, UIO_USERSPACE));
4551 }
4552
4553 int
4554 symlinkat(__unused proc_t p, struct symlinkat_args *uap,
4555 __unused int32_t *retval)
4556 {
4557 return (symlinkat_internal(vfs_context_current(), uap->path1, uap->fd,
4558 uap->path2, UIO_USERSPACE));
4559 }
4560
4561 /*
4562 * Delete a whiteout from the filesystem.
4563 * No longer supported.
4564 */
4565 int
4566 undelete(__unused proc_t p, __unused struct undelete_args *uap, __unused int32_t *retval)
4567 {
4568 return (ENOTSUP);
4569 }
4570
4571 /*
4572 * Delete a name from the filesystem.
4573 */
4574 /* ARGSUSED */
4575 static int
4576 unlinkat_internal(vfs_context_t ctx, int fd, vnode_t start_dvp,
4577 user_addr_t path_arg, enum uio_seg segflg, int unlink_flags)
4578 {
4579 struct nameidata nd;
4580 vnode_t vp, dvp;
4581 int error;
4582 struct componentname *cnp;
4583 char *path = NULL;
4584 int len=0;
4585 #if CONFIG_FSE
4586 fse_info finfo;
4587 struct vnode_attr va;
4588 #endif
4589 int flags;
4590 int need_event;
4591 int has_listeners;
4592 int truncated_path;
4593 int batched;
4594 struct vnode_attr *vap;
4595 int do_retry;
4596 int retry_count = 0;
4597 int cn_flags;
4598
4599 cn_flags = LOCKPARENT;
4600 if (!(unlink_flags & VNODE_REMOVE_NO_AUDIT_PATH))
4601 cn_flags |= AUDITVNPATH1;
4602 /* If a starting dvp is passed, it trumps any fd passed. */
4603 if (start_dvp)
4604 cn_flags |= USEDVP;
4605
4606 #if NAMEDRSRCFORK
4607 /* unlink or delete is allowed on rsrc forks and named streams */
4608 cn_flags |= CN_ALLOWRSRCFORK;
4609 #endif
4610
4611 retry:
4612 do_retry = 0;
4613 flags = 0;
4614 need_event = 0;
4615 has_listeners = 0;
4616 truncated_path = 0;
4617 vap = NULL;
4618
4619 NDINIT(&nd, DELETE, OP_UNLINK, cn_flags, segflg, path_arg, ctx);
4620
4621 nd.ni_dvp = start_dvp;
4622 nd.ni_flag |= NAMEI_COMPOUNDREMOVE;
4623 cnp = &nd.ni_cnd;
4624
4625 continue_lookup:
4626 error = nameiat(&nd, fd);
4627 if (error)
4628 return (error);
4629
4630 dvp = nd.ni_dvp;
4631 vp = nd.ni_vp;
4632
4633
4634 /* With Carbon delete semantics, busy files cannot be deleted */
4635 if (unlink_flags & VNODE_REMOVE_NODELETEBUSY) {
4636 flags |= VNODE_REMOVE_NODELETEBUSY;
4637 }
4638
4639 /* Skip any potential upcalls if told to. */
4640 if (unlink_flags & VNODE_REMOVE_SKIP_NAMESPACE_EVENT) {
4641 flags |= VNODE_REMOVE_SKIP_NAMESPACE_EVENT;
4642 }
4643
4644 if (vp) {
4645 batched = vnode_compound_remove_available(vp);
4646 /*
4647 * The root of a mounted filesystem cannot be deleted.
4648 */
4649 if (vp->v_flag & VROOT) {
4650 error = EBUSY;
4651 }
4652
4653 #if DEVELOPMENT || DEBUG
4654 /*
4655 * XXX VSWAP: Check for entitlements or special flag here
4656 * so we can restrict access appropriately.
4657 */
4658 #else /* DEVELOPMENT || DEBUG */
4659
4660 if (vnode_isswap(vp) && (ctx != vfs_context_kernel())) {
4661 error = EPERM;
4662 goto out;
4663 }
4664 #endif /* DEVELOPMENT || DEBUG */
4665
4666 if (!batched) {
4667 error = vn_authorize_unlink(dvp, vp, cnp, ctx, NULL);
4668 if (error) {
4669 if (error == ENOENT) {
4670 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
4671 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
4672 do_retry = 1;
4673 retry_count++;
4674 }
4675 }
4676 goto out;
4677 }
4678 }
4679 } else {
4680 batched = 1;
4681
4682 if (!vnode_compound_remove_available(dvp)) {
4683 panic("No vp, but no compound remove?");
4684 }
4685 }
4686
4687 #if CONFIG_FSE
4688 need_event = need_fsevent(FSE_DELETE, dvp);
4689 if (need_event) {
4690 if (!batched) {
4691 if ((vp->v_flag & VISHARDLINK) == 0) {
4692 /* XXX need to get these data in batched VNOP */
4693 get_fse_info(vp, &finfo, ctx);
4694 }
4695 } else {
4696 error = vfs_get_notify_attributes(&va);
4697 if (error) {
4698 goto out;
4699 }
4700
4701 vap = &va;
4702 }
4703 }
4704 #endif
4705 has_listeners = kauth_authorize_fileop_has_listeners();
4706 if (need_event || has_listeners) {
4707 if (path == NULL) {
4708 GET_PATH(path);
4709 if (path == NULL) {
4710 error = ENOMEM;
4711 goto out;
4712 }
4713 }
4714 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated_path);
4715 }
4716
4717 #if NAMEDRSRCFORK
4718 if (nd.ni_cnd.cn_flags & CN_WANTSRSRCFORK)
4719 error = vnode_removenamedstream(dvp, vp, XATTR_RESOURCEFORK_NAME, 0, ctx);
4720 else
4721 #endif
4722 {
4723 error = vn_remove(dvp, &nd.ni_vp, &nd, flags, vap, ctx);
4724 vp = nd.ni_vp;
4725 if (error == EKEEPLOOKING) {
4726 if (!batched) {
4727 panic("EKEEPLOOKING, but not a filesystem that supports compound VNOPs?");
4728 }
4729
4730 if ((nd.ni_flag & NAMEI_CONTLOOKUP) == 0) {
4731 panic("EKEEPLOOKING, but continue flag not set?");
4732 }
4733
4734 if (vnode_isdir(vp)) {
4735 error = EISDIR;
4736 goto out;
4737 }
4738 goto continue_lookup;
4739 } else if (error == ENOENT && batched) {
4740 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
4741 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
4742 /*
4743 * For compound VNOPs, the authorization callback may
4744 * return ENOENT in case of racing hardlink lookups
4745 * hitting the name cache, redrive the lookup.
4746 */
4747 do_retry = 1;
4748 retry_count += 1;
4749 goto out;
4750 }
4751 }
4752 }
4753
4754 /*
4755 * Call out to allow 3rd party notification of delete.
4756 * Ignore result of kauth_authorize_fileop call.
4757 */
4758 if (!error) {
4759 if (has_listeners) {
4760 kauth_authorize_fileop(vfs_context_ucred(ctx),
4761 KAUTH_FILEOP_DELETE,
4762 (uintptr_t)vp,
4763 (uintptr_t)path);
4764 }
4765
4766 if (vp->v_flag & VISHARDLINK) {
4767 //
4768 // if a hardlink gets deleted we want to blow away the
4769 // v_parent link because the path that got us to this
4770 // instance of the link is no longer valid. this will
4771 // force the next call to get the path to ask the file
4772 // system instead of just following the v_parent link.
4773 //
4774 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
4775 }
4776
4777 #if CONFIG_FSE
4778 if (need_event) {
4779 if (vp->v_flag & VISHARDLINK) {
4780 get_fse_info(vp, &finfo, ctx);
4781 } else if (vap) {
4782 vnode_get_fse_info_from_vap(vp, &finfo, vap);
4783 }
4784 if (truncated_path) {
4785 finfo.mode |= FSE_TRUNCATED_PATH;
4786 }
4787 add_fsevent(FSE_DELETE, ctx,
4788 FSE_ARG_STRING, len, path,
4789 FSE_ARG_FINFO, &finfo,
4790 FSE_ARG_DONE);
4791 }
4792 #endif
4793 }
4794
4795 out:
4796 if (path != NULL)
4797 RELEASE_PATH(path);
4798
4799 #if NAMEDRSRCFORK
4800 /* recycle the deleted rsrc fork vnode to force a reclaim, which
4801 * will cause its shadow file to go away if necessary.
4802 */
4803 if (vp && (vnode_isnamedstream(vp)) &&
4804 (vp->v_parent != NULLVP) &&
4805 vnode_isshadow(vp)) {
4806 vnode_recycle(vp);
4807 }
4808 #endif
4809 /*
4810 * nameidone has to happen before we vnode_put(dvp)
4811 * since it may need to release the fs_nodelock on the dvp
4812 */
4813 nameidone(&nd);
4814 vnode_put(dvp);
4815 if (vp) {
4816 vnode_put(vp);
4817 }
4818
4819 if (do_retry) {
4820 goto retry;
4821 }
4822
4823 return (error);
4824 }
4825
4826 int
4827 unlink1(vfs_context_t ctx, vnode_t start_dvp, user_addr_t path_arg,
4828 enum uio_seg segflg, int unlink_flags)
4829 {
4830 return (unlinkat_internal(ctx, AT_FDCWD, start_dvp, path_arg, segflg,
4831 unlink_flags));
4832 }
4833
4834 /*
4835 * Delete a name from the filesystem using Carbon semantics.
4836 */
4837 int
4838 delete(__unused proc_t p, struct delete_args *uap, __unused int32_t *retval)
4839 {
4840 return (unlinkat_internal(vfs_context_current(), AT_FDCWD, NULLVP,
4841 uap->path, UIO_USERSPACE, VNODE_REMOVE_NODELETEBUSY));
4842 }
4843
4844 /*
4845 * Delete a name from the filesystem using POSIX semantics.
4846 */
4847 int
4848 unlink(__unused proc_t p, struct unlink_args *uap, __unused int32_t *retval)
4849 {
4850 return (unlinkat_internal(vfs_context_current(), AT_FDCWD, NULLVP,
4851 uap->path, UIO_USERSPACE, 0));
4852 }
4853
4854 int
4855 unlinkat(__unused proc_t p, struct unlinkat_args *uap, __unused int32_t *retval)
4856 {
4857 if (uap->flag & ~AT_REMOVEDIR)
4858 return (EINVAL);
4859
4860 if (uap->flag & AT_REMOVEDIR)
4861 return (rmdirat_internal(vfs_context_current(), uap->fd,
4862 uap->path, UIO_USERSPACE));
4863 else
4864 return (unlinkat_internal(vfs_context_current(), uap->fd,
4865 NULLVP, uap->path, UIO_USERSPACE, 0));
4866 }
4867
4868 /*
4869 * Reposition read/write file offset.
4870 */
4871 int
4872 lseek(proc_t p, struct lseek_args *uap, off_t *retval)
4873 {
4874 struct fileproc *fp;
4875 vnode_t vp;
4876 struct vfs_context *ctx;
4877 off_t offset = uap->offset, file_size;
4878 int error;
4879
4880 if ( (error = fp_getfvp(p,uap->fd, &fp, &vp)) ) {
4881 if (error == ENOTSUP)
4882 return (ESPIPE);
4883 return (error);
4884 }
4885 if (vnode_isfifo(vp)) {
4886 file_drop(uap->fd);
4887 return(ESPIPE);
4888 }
4889
4890
4891 ctx = vfs_context_current();
4892 #if CONFIG_MACF
4893 if (uap->whence == L_INCR && uap->offset == 0)
4894 error = mac_file_check_get_offset(vfs_context_ucred(ctx),
4895 fp->f_fglob);
4896 else
4897 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
4898 fp->f_fglob);
4899 if (error) {
4900 file_drop(uap->fd);
4901 return (error);
4902 }
4903 #endif
4904 if ( (error = vnode_getwithref(vp)) ) {
4905 file_drop(uap->fd);
4906 return(error);
4907 }
4908
4909 switch (uap->whence) {
4910 case L_INCR:
4911 offset += fp->f_fglob->fg_offset;
4912 break;
4913 case L_XTND:
4914 if ((error = vnode_size(vp, &file_size, ctx)) != 0)
4915 break;
4916 offset += file_size;
4917 break;
4918 case L_SET:
4919 break;
4920 case SEEK_HOLE:
4921 error = VNOP_IOCTL(vp, FSCTL_FIOSEEKHOLE, (caddr_t)&offset, 0, ctx);
4922 break;
4923 case SEEK_DATA:
4924 error = VNOP_IOCTL(vp, FSCTL_FIOSEEKDATA, (caddr_t)&offset, 0, ctx);
4925 break;
4926 default:
4927 error = EINVAL;
4928 }
4929 if (error == 0) {
4930 if (uap->offset > 0 && offset < 0) {
4931 /* Incremented/relative move past max size */
4932 error = EOVERFLOW;
4933 } else {
4934 /*
4935 * Allow negative offsets on character devices, per
4936 * POSIX 1003.1-2001. Most likely for writing disk
4937 * labels.
4938 */
4939 if (offset < 0 && vp->v_type != VCHR) {
4940 /* Decremented/relative move before start */
4941 error = EINVAL;
4942 } else {
4943 /* Success */
4944 fp->f_fglob->fg_offset = offset;
4945 *retval = fp->f_fglob->fg_offset;
4946 }
4947 }
4948 }
4949
4950 /*
4951 * An lseek can affect whether data is "available to read." Use
4952 * hint of NOTE_NONE so no EVFILT_VNODE events fire
4953 */
4954 post_event_if_success(vp, error, NOTE_NONE);
4955 (void)vnode_put(vp);
4956 file_drop(uap->fd);
4957 return (error);
4958 }
4959
4960
4961 /*
4962 * Check access permissions.
4963 *
4964 * Returns: 0 Success
4965 * vnode_authorize:???
4966 */
4967 static int
4968 access1(vnode_t vp, vnode_t dvp, int uflags, vfs_context_t ctx)
4969 {
4970 kauth_action_t action;
4971 int error;
4972
4973 /*
4974 * If just the regular access bits, convert them to something
4975 * that vnode_authorize will understand.
4976 */
4977 if (!(uflags & _ACCESS_EXTENDED_MASK)) {
4978 action = 0;
4979 if (uflags & R_OK)
4980 action |= KAUTH_VNODE_READ_DATA; /* aka KAUTH_VNODE_LIST_DIRECTORY */
4981 if (uflags & W_OK) {
4982 if (vnode_isdir(vp)) {
4983 action |= KAUTH_VNODE_ADD_FILE |
4984 KAUTH_VNODE_ADD_SUBDIRECTORY;
4985 /* might want delete rights here too */
4986 } else {
4987 action |= KAUTH_VNODE_WRITE_DATA;
4988 }
4989 }
4990 if (uflags & X_OK) {
4991 if (vnode_isdir(vp)) {
4992 action |= KAUTH_VNODE_SEARCH;
4993 } else {
4994 action |= KAUTH_VNODE_EXECUTE;
4995 }
4996 }
4997 } else {
4998 /* take advantage of definition of uflags */
4999 action = uflags >> 8;
5000 }
5001
5002 #if CONFIG_MACF
5003 error = mac_vnode_check_access(ctx, vp, uflags);
5004 if (error)
5005 return (error);
5006 #endif /* MAC */
5007
5008 /* action == 0 means only check for existence */
5009 if (action != 0) {
5010 error = vnode_authorize(vp, dvp, action | KAUTH_VNODE_ACCESS, ctx);
5011 } else {
5012 error = 0;
5013 }
5014
5015 return(error);
5016 }
5017
5018
5019
5020 /*
5021 * access_extended: Check access permissions in bulk.
5022 *
5023 * Description: uap->entries Pointer to an array of accessx
5024 * descriptor structs, plus one or
5025 * more NULL terminated strings (see
5026 * "Notes" section below).
5027 * uap->size Size of the area pointed to by
5028 * uap->entries.
5029 * uap->results Pointer to the results array.
5030 *
5031 * Returns: 0 Success
5032 * ENOMEM Insufficient memory
5033 * EINVAL Invalid arguments
5034 * namei:EFAULT Bad address
5035 * namei:ENAMETOOLONG Filename too long
5036 * namei:ENOENT No such file or directory
5037 * namei:ELOOP Too many levels of symbolic links
5038 * namei:EBADF Bad file descriptor
5039 * namei:ENOTDIR Not a directory
5040 * namei:???
5041 * access1:
5042 *
5043 * Implicit returns:
5044 * uap->results Array contents modified
5045 *
5046 * Notes: The uap->entries are structured as an arbitrary length array
5047 * of accessx descriptors, followed by one or more NULL terminated
5048 * strings
5049 *
5050 * struct accessx_descriptor[0]
5051 * ...
5052 * struct accessx_descriptor[n]
5053 * char name_data[0];
5054 *
5055 * We determine the entry count by walking the buffer containing
5056 * the uap->entries argument descriptor. For each descriptor we
5057 * see, the valid values for the offset ad_name_offset will be
5058 * in the byte range:
5059 *
5060 * [ uap->entries + sizeof(struct accessx_descriptor) ]
5061 * to
5062 * [ uap->entries + uap->size - 2 ]
5063 *
5064 * since we must have at least one string, and the string must
5065 * be at least one character plus the NULL terminator in length.
5066 *
5067 * XXX: Need to support the check-as uid argument
5068 */
5069 int
5070 access_extended(__unused proc_t p, struct access_extended_args *uap, __unused int32_t *retval)
5071 {
5072 struct accessx_descriptor *input = NULL;
5073 errno_t *result = NULL;
5074 errno_t error = 0;
5075 int wantdelete = 0;
5076 unsigned int desc_max, desc_actual, i, j;
5077 struct vfs_context context;
5078 struct nameidata nd;
5079 int niopts;
5080 vnode_t vp = NULL;
5081 vnode_t dvp = NULL;
5082 #define ACCESSX_MAX_DESCR_ON_STACK 10
5083 struct accessx_descriptor stack_input[ACCESSX_MAX_DESCR_ON_STACK];
5084
5085 context.vc_ucred = NULL;
5086
5087 /*
5088 * Validate parameters; if valid, copy the descriptor array and string
5089 * arguments into local memory. Before proceeding, the following
5090 * conditions must have been met:
5091 *
5092 * o The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
5093 * o There must be sufficient room in the request for at least one
5094 * descriptor and a one yte NUL terminated string.
5095 * o The allocation of local storage must not fail.
5096 */
5097 if (uap->size > ACCESSX_MAX_TABLESIZE)
5098 return(ENOMEM);
5099 if (uap->size < (sizeof(struct accessx_descriptor) + 2))
5100 return(EINVAL);
5101 if (uap->size <= sizeof (stack_input)) {
5102 input = stack_input;
5103 } else {
5104 MALLOC(input, struct accessx_descriptor *, uap->size, M_TEMP, M_WAITOK);
5105 if (input == NULL) {
5106 error = ENOMEM;
5107 goto out;
5108 }
5109 }
5110 error = copyin(uap->entries, input, uap->size);
5111 if (error)
5112 goto out;
5113
5114 AUDIT_ARG(opaque, input, uap->size);
5115
5116 /*
5117 * Force NUL termination of the copyin buffer to avoid nami() running
5118 * off the end. If the caller passes us bogus data, they may get a
5119 * bogus result.
5120 */
5121 ((char *)input)[uap->size - 1] = 0;
5122
5123 /*
5124 * Access is defined as checking against the process' real identity,
5125 * even if operations are checking the effective identity. This
5126 * requires that we use a local vfs context.
5127 */
5128 context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
5129 context.vc_thread = current_thread();
5130
5131 /*
5132 * Find out how many entries we have, so we can allocate the result
5133 * array by walking the list and adjusting the count downward by the
5134 * earliest string offset we see.
5135 */
5136 desc_max = (uap->size - 2) / sizeof(struct accessx_descriptor);
5137 desc_actual = desc_max;
5138 for (i = 0; i < desc_actual; i++) {
5139 /*
5140 * Take the offset to the name string for this entry and
5141 * convert to an input array index, which would be one off
5142 * the end of the array if this entry was the lowest-addressed
5143 * name string.
5144 */
5145 j = input[i].ad_name_offset / sizeof(struct accessx_descriptor);
5146
5147 /*
5148 * An offset greater than the max allowable offset is an error.
5149 * It is also an error for any valid entry to point
5150 * to a location prior to the end of the current entry, if
5151 * it's not a reference to the string of the previous entry.
5152 */
5153 if (j > desc_max || (j != 0 && j <= i)) {
5154 error = EINVAL;
5155 goto out;
5156 }
5157
5158 /* Also do not let ad_name_offset point to something beyond the size of the input */
5159 if (input[i].ad_name_offset >= uap->size) {
5160 error = EINVAL;
5161 goto out;
5162 }
5163
5164 /*
5165 * An offset of 0 means use the previous descriptor's offset;
5166 * this is used to chain multiple requests for the same file
5167 * to avoid multiple lookups.
5168 */
5169 if (j == 0) {
5170 /* This is not valid for the first entry */
5171 if (i == 0) {
5172 error = EINVAL;
5173 goto out;
5174 }
5175 continue;
5176 }
5177
5178 /*
5179 * If the offset of the string for this descriptor is before
5180 * what we believe is the current actual last descriptor,
5181 * then we need to adjust our estimate downward; this permits
5182 * the string table following the last descriptor to be out
5183 * of order relative to the descriptor list.
5184 */
5185 if (j < desc_actual)
5186 desc_actual = j;
5187 }
5188
5189 /*
5190 * We limit the actual number of descriptors we are willing to process
5191 * to a hard maximum of ACCESSX_MAX_DESCRIPTORS. If the number being
5192 * requested does not exceed this limit,
5193 */
5194 if (desc_actual > ACCESSX_MAX_DESCRIPTORS) {
5195 error = ENOMEM;
5196 goto out;
5197 }
5198 MALLOC(result, errno_t *, desc_actual * sizeof(errno_t), M_TEMP, M_WAITOK);
5199 if (result == NULL) {
5200 error = ENOMEM;
5201 goto out;
5202 }
5203
5204 /*
5205 * Do the work by iterating over the descriptor entries we know to
5206 * at least appear to contain valid data.
5207 */
5208 error = 0;
5209 for (i = 0; i < desc_actual; i++) {
5210 /*
5211 * If the ad_name_offset is 0, then we use the previous
5212 * results to make the check; otherwise, we are looking up
5213 * a new file name.
5214 */
5215 if (input[i].ad_name_offset != 0) {
5216 /* discard old vnodes */
5217 if (vp) {
5218 vnode_put(vp);
5219 vp = NULL;
5220 }
5221 if (dvp) {
5222 vnode_put(dvp);
5223 dvp = NULL;
5224 }
5225
5226 /*
5227 * Scan forward in the descriptor list to see if we
5228 * need the parent vnode. We will need it if we are
5229 * deleting, since we must have rights to remove
5230 * entries in the parent directory, as well as the
5231 * rights to delete the object itself.
5232 */
5233 wantdelete = input[i].ad_flags & _DELETE_OK;
5234 for (j = i + 1; (j < desc_actual) && (input[j].ad_name_offset == 0); j++)
5235 if (input[j].ad_flags & _DELETE_OK)
5236 wantdelete = 1;
5237
5238 niopts = FOLLOW | AUDITVNPATH1;
5239
5240 /* need parent for vnode_authorize for deletion test */
5241 if (wantdelete)
5242 niopts |= WANTPARENT;
5243
5244 /* do the lookup */
5245 NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, UIO_SYSSPACE,
5246 CAST_USER_ADDR_T(((const char *)input) + input[i].ad_name_offset),
5247 &context);
5248 error = namei(&nd);
5249 if (!error) {
5250 vp = nd.ni_vp;
5251 if (wantdelete)
5252 dvp = nd.ni_dvp;
5253 }
5254 nameidone(&nd);
5255 }
5256
5257 /*
5258 * Handle lookup errors.
5259 */
5260 switch(error) {
5261 case ENOENT:
5262 case EACCES:
5263 case EPERM:
5264 case ENOTDIR:
5265 result[i] = error;
5266 break;
5267 case 0:
5268 /* run this access check */
5269 result[i] = access1(vp, dvp, input[i].ad_flags, &context);
5270 break;
5271 default:
5272 /* fatal lookup error */
5273
5274 goto out;
5275 }
5276 }
5277
5278 AUDIT_ARG(data, result, sizeof(errno_t), desc_actual);
5279
5280 /* copy out results */
5281 error = copyout(result, uap->results, desc_actual * sizeof(errno_t));
5282
5283 out:
5284 if (input && input != stack_input)
5285 FREE(input, M_TEMP);
5286 if (result)
5287 FREE(result, M_TEMP);
5288 if (vp)
5289 vnode_put(vp);
5290 if (dvp)
5291 vnode_put(dvp);
5292 if (IS_VALID_CRED(context.vc_ucred))
5293 kauth_cred_unref(&context.vc_ucred);
5294 return(error);
5295 }
5296
5297
5298 /*
5299 * Returns: 0 Success
5300 * namei:EFAULT Bad address
5301 * namei:ENAMETOOLONG Filename too long
5302 * namei:ENOENT No such file or directory
5303 * namei:ELOOP Too many levels of symbolic links
5304 * namei:EBADF Bad file descriptor
5305 * namei:ENOTDIR Not a directory
5306 * namei:???
5307 * access1:
5308 */
5309 static int
5310 faccessat_internal(vfs_context_t ctx, int fd, user_addr_t path, int amode,
5311 int flag, enum uio_seg segflg)
5312 {
5313 int error;
5314 struct nameidata nd;
5315 int niopts;
5316 struct vfs_context context;
5317 #if NAMEDRSRCFORK
5318 int is_namedstream = 0;
5319 #endif
5320
5321 /*
5322 * Unless the AT_EACCESS option is used, Access is defined as checking
5323 * against the process' real identity, even if operations are checking
5324 * the effective identity. So we need to tweak the credential
5325 * in the context for that case.
5326 */
5327 if (!(flag & AT_EACCESS))
5328 context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
5329 else
5330 context.vc_ucred = ctx->vc_ucred;
5331 context.vc_thread = ctx->vc_thread;
5332
5333
5334 niopts = FOLLOW | AUDITVNPATH1;
5335 /* need parent for vnode_authorize for deletion test */
5336 if (amode & _DELETE_OK)
5337 niopts |= WANTPARENT;
5338 NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, segflg,
5339 path, &context);
5340
5341 #if NAMEDRSRCFORK
5342 /* access(F_OK) calls are allowed for resource forks. */
5343 if (amode == F_OK)
5344 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
5345 #endif
5346 error = nameiat(&nd, fd);
5347 if (error)
5348 goto out;
5349
5350 #if NAMEDRSRCFORK
5351 /* Grab reference on the shadow stream file vnode to
5352 * force an inactive on release which will mark it
5353 * for recycle.
5354 */
5355 if (vnode_isnamedstream(nd.ni_vp) &&
5356 (nd.ni_vp->v_parent != NULLVP) &&
5357 vnode_isshadow(nd.ni_vp)) {
5358 is_namedstream = 1;
5359 vnode_ref(nd.ni_vp);
5360 }
5361 #endif
5362
5363 error = access1(nd.ni_vp, nd.ni_dvp, amode, &context);
5364
5365 #if NAMEDRSRCFORK
5366 if (is_namedstream) {
5367 vnode_rele(nd.ni_vp);
5368 }
5369 #endif
5370
5371 vnode_put(nd.ni_vp);
5372 if (amode & _DELETE_OK)
5373 vnode_put(nd.ni_dvp);
5374 nameidone(&nd);
5375
5376 out:
5377 if (!(flag & AT_EACCESS))
5378 kauth_cred_unref(&context.vc_ucred);
5379 return (error);
5380 }
5381
5382 int
5383 access(__unused proc_t p, struct access_args *uap, __unused int32_t *retval)
5384 {
5385 return (faccessat_internal(vfs_context_current(), AT_FDCWD,
5386 uap->path, uap->flags, 0, UIO_USERSPACE));
5387 }
5388
5389 int
5390 faccessat(__unused proc_t p, struct faccessat_args *uap,
5391 __unused int32_t *retval)
5392 {
5393 if (uap->flag & ~AT_EACCESS)
5394 return (EINVAL);
5395
5396 return (faccessat_internal(vfs_context_current(), uap->fd,
5397 uap->path, uap->amode, uap->flag, UIO_USERSPACE));
5398 }
5399
5400 /*
5401 * Returns: 0 Success
5402 * EFAULT
5403 * copyout:EFAULT
5404 * namei:???
5405 * vn_stat:???
5406 */
5407 static int
5408 fstatat_internal(vfs_context_t ctx, user_addr_t path, user_addr_t ub,
5409 user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64,
5410 enum uio_seg segflg, int fd, int flag)
5411 {
5412 struct nameidata nd;
5413 int follow;
5414 union {
5415 struct stat sb;
5416 struct stat64 sb64;
5417 } source;
5418 union {
5419 struct user64_stat user64_sb;
5420 struct user32_stat user32_sb;
5421 struct user64_stat64 user64_sb64;
5422 struct user32_stat64 user32_sb64;
5423 } dest;
5424 caddr_t sbp;
5425 int error, my_size;
5426 kauth_filesec_t fsec;
5427 size_t xsecurity_bufsize;
5428 void * statptr;
5429
5430 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
5431 NDINIT(&nd, LOOKUP, OP_GETATTR, follow | AUDITVNPATH1,
5432 segflg, path, ctx);
5433
5434 #if NAMEDRSRCFORK
5435 int is_namedstream = 0;
5436 /* stat calls are allowed for resource forks. */
5437 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
5438 #endif
5439 error = nameiat(&nd, fd);
5440 if (error)
5441 return (error);
5442 fsec = KAUTH_FILESEC_NONE;
5443
5444 statptr = (void *)&source;
5445
5446 #if NAMEDRSRCFORK
5447 /* Grab reference on the shadow stream file vnode to
5448 * force an inactive on release which will mark it
5449 * for recycle.
5450 */
5451 if (vnode_isnamedstream(nd.ni_vp) &&
5452 (nd.ni_vp->v_parent != NULLVP) &&
5453 vnode_isshadow(nd.ni_vp)) {
5454 is_namedstream = 1;
5455 vnode_ref(nd.ni_vp);
5456 }
5457 #endif
5458
5459 error = vn_stat(nd.ni_vp, statptr, (xsecurity != USER_ADDR_NULL ? &fsec : NULL), isstat64, ctx);
5460
5461 #if NAMEDRSRCFORK
5462 if (is_namedstream) {
5463 vnode_rele(nd.ni_vp);
5464 }
5465 #endif
5466 vnode_put(nd.ni_vp);
5467 nameidone(&nd);
5468
5469 if (error)
5470 return (error);
5471 /* Zap spare fields */
5472 if (isstat64 != 0) {
5473 source.sb64.st_lspare = 0;
5474 source.sb64.st_qspare[0] = 0LL;
5475 source.sb64.st_qspare[1] = 0LL;
5476 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
5477 munge_user64_stat64(&source.sb64, &dest.user64_sb64);
5478 my_size = sizeof(dest.user64_sb64);
5479 sbp = (caddr_t)&dest.user64_sb64;
5480 } else {
5481 munge_user32_stat64(&source.sb64, &dest.user32_sb64);
5482 my_size = sizeof(dest.user32_sb64);
5483 sbp = (caddr_t)&dest.user32_sb64;
5484 }
5485 /*
5486 * Check if we raced (post lookup) against the last unlink of a file.
5487 */
5488 if ((source.sb64.st_nlink == 0) && S_ISREG(source.sb64.st_mode)) {
5489 source.sb64.st_nlink = 1;
5490 }
5491 } else {
5492 source.sb.st_lspare = 0;
5493 source.sb.st_qspare[0] = 0LL;
5494 source.sb.st_qspare[1] = 0LL;
5495 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
5496 munge_user64_stat(&source.sb, &dest.user64_sb);
5497 my_size = sizeof(dest.user64_sb);
5498 sbp = (caddr_t)&dest.user64_sb;
5499 } else {
5500 munge_user32_stat(&source.sb, &dest.user32_sb);
5501 my_size = sizeof(dest.user32_sb);
5502 sbp = (caddr_t)&dest.user32_sb;
5503 }
5504
5505 /*
5506 * Check if we raced (post lookup) against the last unlink of a file.
5507 */
5508 if ((source.sb.st_nlink == 0) && S_ISREG(source.sb.st_mode)) {
5509 source.sb.st_nlink = 1;
5510 }
5511 }
5512 if ((error = copyout(sbp, ub, my_size)) != 0)
5513 goto out;
5514
5515 /* caller wants extended security information? */
5516 if (xsecurity != USER_ADDR_NULL) {
5517
5518 /* did we get any? */
5519 if (fsec == KAUTH_FILESEC_NONE) {
5520 if (susize(xsecurity_size, 0) != 0) {
5521 error = EFAULT;
5522 goto out;
5523 }
5524 } else {
5525 /* find the user buffer size */
5526 xsecurity_bufsize = fusize(xsecurity_size);
5527
5528 /* copy out the actual data size */
5529 if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
5530 error = EFAULT;
5531 goto out;
5532 }
5533
5534 /* if the caller supplied enough room, copy out to it */
5535 if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec))
5536 error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
5537 }
5538 }
5539 out:
5540 if (fsec != KAUTH_FILESEC_NONE)
5541 kauth_filesec_free(fsec);
5542 return (error);
5543 }
5544
5545 /*
5546 * stat_extended: Get file status; with extended security (ACL).
5547 *
5548 * Parameters: p (ignored)
5549 * uap User argument descriptor (see below)
5550 * retval (ignored)
5551 *
5552 * Indirect: uap->path Path of file to get status from
5553 * uap->ub User buffer (holds file status info)
5554 * uap->xsecurity ACL to get (extended security)
5555 * uap->xsecurity_size Size of ACL
5556 *
5557 * Returns: 0 Success
5558 * !0 errno value
5559 *
5560 */
5561 int
5562 stat_extended(__unused proc_t p, struct stat_extended_args *uap,
5563 __unused int32_t *retval)
5564 {
5565 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5566 uap->xsecurity, uap->xsecurity_size, 0, UIO_USERSPACE, AT_FDCWD,
5567 0));
5568 }
5569
5570 /*
5571 * Returns: 0 Success
5572 * fstatat_internal:??? [see fstatat_internal() in this file]
5573 */
5574 int
5575 stat(__unused proc_t p, struct stat_args *uap, __unused int32_t *retval)
5576 {
5577 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5578 0, 0, 0, UIO_USERSPACE, AT_FDCWD, 0));
5579 }
5580
5581 int
5582 stat64(__unused proc_t p, struct stat64_args *uap, __unused int32_t *retval)
5583 {
5584 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5585 0, 0, 1, UIO_USERSPACE, AT_FDCWD, 0));
5586 }
5587
5588 /*
5589 * stat64_extended: Get file status; can handle large inode numbers; with extended security (ACL).
5590 *
5591 * Parameters: p (ignored)
5592 * uap User argument descriptor (see below)
5593 * retval (ignored)
5594 *
5595 * Indirect: uap->path Path of file to get status from
5596 * uap->ub User buffer (holds file status info)
5597 * uap->xsecurity ACL to get (extended security)
5598 * uap->xsecurity_size Size of ACL
5599 *
5600 * Returns: 0 Success
5601 * !0 errno value
5602 *
5603 */
5604 int
5605 stat64_extended(__unused proc_t p, struct stat64_extended_args *uap, __unused int32_t *retval)
5606 {
5607 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5608 uap->xsecurity, uap->xsecurity_size, 1, UIO_USERSPACE, AT_FDCWD,
5609 0));
5610 }
5611
5612 /*
5613 * lstat_extended: Get file status; does not follow links; with extended security (ACL).
5614 *
5615 * Parameters: p (ignored)
5616 * uap User argument descriptor (see below)
5617 * retval (ignored)
5618 *
5619 * Indirect: uap->path Path of file to get status from
5620 * uap->ub User buffer (holds file status info)
5621 * uap->xsecurity ACL to get (extended security)
5622 * uap->xsecurity_size Size of ACL
5623 *
5624 * Returns: 0 Success
5625 * !0 errno value
5626 *
5627 */
5628 int
5629 lstat_extended(__unused proc_t p, struct lstat_extended_args *uap, __unused int32_t *retval)
5630 {
5631 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5632 uap->xsecurity, uap->xsecurity_size, 0, UIO_USERSPACE, AT_FDCWD,
5633 AT_SYMLINK_NOFOLLOW));
5634 }
5635
5636 /*
5637 * Get file status; this version does not follow links.
5638 */
5639 int
5640 lstat(__unused proc_t p, struct lstat_args *uap, __unused int32_t *retval)
5641 {
5642 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5643 0, 0, 0, UIO_USERSPACE, AT_FDCWD, AT_SYMLINK_NOFOLLOW));
5644 }
5645
5646 int
5647 lstat64(__unused proc_t p, struct lstat64_args *uap, __unused int32_t *retval)
5648 {
5649 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5650 0, 0, 1, UIO_USERSPACE, AT_FDCWD, AT_SYMLINK_NOFOLLOW));
5651 }
5652
5653 /*
5654 * lstat64_extended: Get file status; can handle large inode numbers; does not
5655 * follow links; with extended security (ACL).
5656 *
5657 * Parameters: p (ignored)
5658 * uap User argument descriptor (see below)
5659 * retval (ignored)
5660 *
5661 * Indirect: uap->path Path of file to get status from
5662 * uap->ub User buffer (holds file status info)
5663 * uap->xsecurity ACL to get (extended security)
5664 * uap->xsecurity_size Size of ACL
5665 *
5666 * Returns: 0 Success
5667 * !0 errno value
5668 *
5669 */
5670 int
5671 lstat64_extended(__unused proc_t p, struct lstat64_extended_args *uap, __unused int32_t *retval)
5672 {
5673 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5674 uap->xsecurity, uap->xsecurity_size, 1, UIO_USERSPACE, AT_FDCWD,
5675 AT_SYMLINK_NOFOLLOW));
5676 }
5677
5678 int
5679 fstatat(__unused proc_t p, struct fstatat_args *uap, __unused int32_t *retval)
5680 {
5681 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
5682 return (EINVAL);
5683
5684 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5685 0, 0, 0, UIO_USERSPACE, uap->fd, uap->flag));
5686 }
5687
5688 int
5689 fstatat64(__unused proc_t p, struct fstatat64_args *uap,
5690 __unused int32_t *retval)
5691 {
5692 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
5693 return (EINVAL);
5694
5695 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5696 0, 0, 1, UIO_USERSPACE, uap->fd, uap->flag));
5697 }
5698
5699 /*
5700 * Get configurable pathname variables.
5701 *
5702 * Returns: 0 Success
5703 * namei:???
5704 * vn_pathconf:???
5705 *
5706 * Notes: Global implementation constants are intended to be
5707 * implemented in this function directly; all other constants
5708 * are per-FS implementation, and therefore must be handled in
5709 * each respective FS, instead.
5710 *
5711 * XXX We implement some things globally right now that should actually be
5712 * XXX per-FS; we will need to deal with this at some point.
5713 */
5714 /* ARGSUSED */
5715 int
5716 pathconf(__unused proc_t p, struct pathconf_args *uap, int32_t *retval)
5717 {
5718 int error;
5719 struct nameidata nd;
5720 vfs_context_t ctx = vfs_context_current();
5721
5722 NDINIT(&nd, LOOKUP, OP_PATHCONF, FOLLOW | AUDITVNPATH1,
5723 UIO_USERSPACE, uap->path, ctx);
5724 error = namei(&nd);
5725 if (error)
5726 return (error);
5727
5728 error = vn_pathconf(nd.ni_vp, uap->name, retval, ctx);
5729
5730 vnode_put(nd.ni_vp);
5731 nameidone(&nd);
5732 return (error);
5733 }
5734
5735 /*
5736 * Return target name of a symbolic link.
5737 */
5738 /* ARGSUSED */
5739 static int
5740 readlinkat_internal(vfs_context_t ctx, int fd, user_addr_t path,
5741 enum uio_seg seg, user_addr_t buf, size_t bufsize, enum uio_seg bufseg,
5742 int *retval)
5743 {
5744 vnode_t vp;
5745 uio_t auio;
5746 int error;
5747 struct nameidata nd;
5748 char uio_buf[ UIO_SIZEOF(1) ];
5749
5750 NDINIT(&nd, LOOKUP, OP_READLINK, NOFOLLOW | AUDITVNPATH1,
5751 seg, path, ctx);
5752
5753 error = nameiat(&nd, fd);
5754 if (error)
5755 return (error);
5756 vp = nd.ni_vp;
5757
5758 nameidone(&nd);
5759
5760 auio = uio_createwithbuffer(1, 0, bufseg, UIO_READ,
5761 &uio_buf[0], sizeof(uio_buf));
5762 uio_addiov(auio, buf, bufsize);
5763 if (vp->v_type != VLNK) {
5764 error = EINVAL;
5765 } else {
5766 #if CONFIG_MACF
5767 error = mac_vnode_check_readlink(ctx, vp);
5768 #endif
5769 if (error == 0)
5770 error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA,
5771 ctx);
5772 if (error == 0)
5773 error = VNOP_READLINK(vp, auio, ctx);
5774 }
5775 vnode_put(vp);
5776
5777 *retval = bufsize - (int)uio_resid(auio);
5778 return (error);
5779 }
5780
5781 int
5782 readlink(proc_t p, struct readlink_args *uap, int32_t *retval)
5783 {
5784 enum uio_seg procseg;
5785
5786 procseg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
5787 return (readlinkat_internal(vfs_context_current(), AT_FDCWD,
5788 CAST_USER_ADDR_T(uap->path), procseg, CAST_USER_ADDR_T(uap->buf),
5789 uap->count, procseg, retval));
5790 }
5791
5792 int
5793 readlinkat(proc_t p, struct readlinkat_args *uap, int32_t *retval)
5794 {
5795 enum uio_seg procseg;
5796
5797 procseg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
5798 return (readlinkat_internal(vfs_context_current(), uap->fd, uap->path,
5799 procseg, uap->buf, uap->bufsize, procseg, retval));
5800 }
5801
5802 /*
5803 * Change file flags.
5804 *
5805 * NOTE: this will vnode_put() `vp'
5806 */
5807 static int
5808 chflags1(vnode_t vp, int flags, vfs_context_t ctx)
5809 {
5810 struct vnode_attr va;
5811 kauth_action_t action;
5812 int error;
5813
5814 VATTR_INIT(&va);
5815 VATTR_SET(&va, va_flags, flags);
5816
5817 #if CONFIG_MACF
5818 error = mac_vnode_check_setflags(ctx, vp, flags);
5819 if (error)
5820 goto out;
5821 #endif
5822
5823 /* request authorisation, disregard immutability */
5824 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
5825 goto out;
5826 /*
5827 * Request that the auth layer disregard those file flags it's allowed to when
5828 * authorizing this operation; we need to do this in order to be able to
5829 * clear immutable flags.
5830 */
5831 if (action && ((error = vnode_authorize(vp, NULL, action | KAUTH_VNODE_NOIMMUTABLE, ctx)) != 0))
5832 goto out;
5833 error = vnode_setattr(vp, &va, ctx);
5834
5835 #if CONFIG_MACF
5836 if (error == 0)
5837 mac_vnode_notify_setflags(ctx, vp, flags);
5838 #endif
5839
5840 if ((error == 0) && !VATTR_IS_SUPPORTED(&va, va_flags)) {
5841 error = ENOTSUP;
5842 }
5843 out:
5844 vnode_put(vp);
5845 return(error);
5846 }
5847
5848 /*
5849 * Change flags of a file given a path name.
5850 */
5851 /* ARGSUSED */
5852 int
5853 chflags(__unused proc_t p, struct chflags_args *uap, __unused int32_t *retval)
5854 {
5855 vnode_t vp;
5856 vfs_context_t ctx = vfs_context_current();
5857 int error;
5858 struct nameidata nd;
5859
5860 AUDIT_ARG(fflags, uap->flags);
5861 NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
5862 UIO_USERSPACE, uap->path, ctx);
5863 error = namei(&nd);
5864 if (error)
5865 return (error);
5866 vp = nd.ni_vp;
5867 nameidone(&nd);
5868
5869 /* we don't vnode_put() here because chflags1 does internally */
5870 error = chflags1(vp, uap->flags, ctx);
5871
5872 return(error);
5873 }
5874
5875 /*
5876 * Change flags of a file given a file descriptor.
5877 */
5878 /* ARGSUSED */
5879 int
5880 fchflags(__unused proc_t p, struct fchflags_args *uap, __unused int32_t *retval)
5881 {
5882 vnode_t vp;
5883 int error;
5884
5885 AUDIT_ARG(fd, uap->fd);
5886 AUDIT_ARG(fflags, uap->flags);
5887 if ( (error = file_vnode(uap->fd, &vp)) )
5888 return (error);
5889
5890 if ((error = vnode_getwithref(vp))) {
5891 file_drop(uap->fd);
5892 return(error);
5893 }
5894
5895 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5896
5897 /* we don't vnode_put() here because chflags1 does internally */
5898 error = chflags1(vp, uap->flags, vfs_context_current());
5899
5900 file_drop(uap->fd);
5901 return (error);
5902 }
5903
5904 /*
5905 * Change security information on a filesystem object.
5906 *
5907 * Returns: 0 Success
5908 * EPERM Operation not permitted
5909 * vnode_authattr:??? [anything vnode_authattr can return]
5910 * vnode_authorize:??? [anything vnode_authorize can return]
5911 * vnode_setattr:??? [anything vnode_setattr can return]
5912 *
5913 * Notes: If vnode_authattr or vnode_authorize return EACCES, it will be
5914 * translated to EPERM before being returned.
5915 */
5916 static int
5917 chmod_vnode(vfs_context_t ctx, vnode_t vp, struct vnode_attr *vap)
5918 {
5919 kauth_action_t action;
5920 int error;
5921
5922 AUDIT_ARG(mode, vap->va_mode);
5923 /* XXX audit new args */
5924
5925 #if NAMEDSTREAMS
5926 /* chmod calls are not allowed for resource forks. */
5927 if (vp->v_flag & VISNAMEDSTREAM) {
5928 return (EPERM);
5929 }
5930 #endif
5931
5932 #if CONFIG_MACF
5933 if (VATTR_IS_ACTIVE(vap, va_mode) &&
5934 (error = mac_vnode_check_setmode(ctx, vp, (mode_t)vap->va_mode)) != 0)
5935 return (error);
5936
5937 if (VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid)) {
5938 if ((error = mac_vnode_check_setowner(ctx, vp,
5939 VATTR_IS_ACTIVE(vap, va_uid) ? vap->va_uid : -1,
5940 VATTR_IS_ACTIVE(vap, va_gid) ? vap->va_gid : -1)))
5941 return (error);
5942 }
5943
5944 if (VATTR_IS_ACTIVE(vap, va_acl) &&
5945 (error = mac_vnode_check_setacl(ctx, vp, vap->va_acl)))
5946 return (error);
5947 #endif
5948
5949 /* make sure that the caller is allowed to set this security information */
5950 if (((error = vnode_authattr(vp, vap, &action, ctx)) != 0) ||
5951 ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
5952 if (error == EACCES)
5953 error = EPERM;
5954 return(error);
5955 }
5956
5957 if ((error = vnode_setattr(vp, vap, ctx)) != 0)
5958 return (error);
5959
5960 #if CONFIG_MACF
5961 if (VATTR_IS_ACTIVE(vap, va_mode))
5962 mac_vnode_notify_setmode(ctx, vp, (mode_t)vap->va_mode);
5963
5964 if (VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid))
5965 mac_vnode_notify_setowner(ctx, vp,
5966 VATTR_IS_ACTIVE(vap, va_uid) ? vap->va_uid : -1,
5967 VATTR_IS_ACTIVE(vap, va_gid) ? vap->va_gid : -1);
5968
5969 if (VATTR_IS_ACTIVE(vap, va_acl))
5970 mac_vnode_notify_setacl(ctx, vp, vap->va_acl);
5971 #endif
5972
5973 return (error);
5974 }
5975
5976
5977 /*
5978 * Change mode of a file given a path name.
5979 *
5980 * Returns: 0 Success
5981 * namei:??? [anything namei can return]
5982 * chmod_vnode:??? [anything chmod_vnode can return]
5983 */
5984 static int
5985 chmodat(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap,
5986 int fd, int flag, enum uio_seg segflg)
5987 {
5988 struct nameidata nd;
5989 int follow, error;
5990
5991 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
5992 NDINIT(&nd, LOOKUP, OP_SETATTR, follow | AUDITVNPATH1,
5993 segflg, path, ctx);
5994 if ((error = nameiat(&nd, fd)))
5995 return (error);
5996 error = chmod_vnode(ctx, nd.ni_vp, vap);
5997 vnode_put(nd.ni_vp);
5998 nameidone(&nd);
5999 return(error);
6000 }
6001
6002 /*
6003 * chmod_extended: Change the mode of a file given a path name; with extended
6004 * argument list (including extended security (ACL)).
6005 *
6006 * Parameters: p Process requesting the open
6007 * uap User argument descriptor (see below)
6008 * retval (ignored)
6009 *
6010 * Indirect: uap->path Path to object (same as 'chmod')
6011 * uap->uid UID to set
6012 * uap->gid GID to set
6013 * uap->mode File mode to set (same as 'chmod')
6014 * uap->xsecurity ACL to set (or delete)
6015 *
6016 * Returns: 0 Success
6017 * !0 errno value
6018 *
6019 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
6020 *
6021 * XXX: We should enummerate the possible errno values here, and where
6022 * in the code they originated.
6023 */
6024 int
6025 chmod_extended(__unused proc_t p, struct chmod_extended_args *uap, __unused int32_t *retval)
6026 {
6027 int error;
6028 struct vnode_attr va;
6029 kauth_filesec_t xsecdst;
6030
6031 AUDIT_ARG(owner, uap->uid, uap->gid);
6032
6033 VATTR_INIT(&va);
6034 if (uap->mode != -1)
6035 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
6036 if (uap->uid != KAUTH_UID_NONE)
6037 VATTR_SET(&va, va_uid, uap->uid);
6038 if (uap->gid != KAUTH_GID_NONE)
6039 VATTR_SET(&va, va_gid, uap->gid);
6040
6041 xsecdst = NULL;
6042 switch(uap->xsecurity) {
6043 /* explicit remove request */
6044 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
6045 VATTR_SET(&va, va_acl, NULL);
6046 break;
6047 /* not being set */
6048 case USER_ADDR_NULL:
6049 break;
6050 default:
6051 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
6052 return(error);
6053 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
6054 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va.va_acl->acl_entrycount);
6055 }
6056
6057 error = chmodat(vfs_context_current(), uap->path, &va, AT_FDCWD, 0,
6058 UIO_USERSPACE);
6059
6060 if (xsecdst != NULL)
6061 kauth_filesec_free(xsecdst);
6062 return(error);
6063 }
6064
6065 /*
6066 * Returns: 0 Success
6067 * chmodat:??? [anything chmodat can return]
6068 */
6069 static int
6070 fchmodat_internal(vfs_context_t ctx, user_addr_t path, int mode, int fd,
6071 int flag, enum uio_seg segflg)
6072 {
6073 struct vnode_attr va;
6074
6075 VATTR_INIT(&va);
6076 VATTR_SET(&va, va_mode, mode & ALLPERMS);
6077
6078 return (chmodat(ctx, path, &va, fd, flag, segflg));
6079 }
6080
6081 int
6082 chmod(__unused proc_t p, struct chmod_args *uap, __unused int32_t *retval)
6083 {
6084 return (fchmodat_internal(vfs_context_current(), uap->path, uap->mode,
6085 AT_FDCWD, 0, UIO_USERSPACE));
6086 }
6087
6088 int
6089 fchmodat(__unused proc_t p, struct fchmodat_args *uap, __unused int32_t *retval)
6090 {
6091 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
6092 return (EINVAL);
6093
6094 return (fchmodat_internal(vfs_context_current(), uap->path, uap->mode,
6095 uap->fd, uap->flag, UIO_USERSPACE));
6096 }
6097
6098 /*
6099 * Change mode of a file given a file descriptor.
6100 */
6101 static int
6102 fchmod1(__unused proc_t p, int fd, struct vnode_attr *vap)
6103 {
6104 vnode_t vp;
6105 int error;
6106
6107 AUDIT_ARG(fd, fd);
6108
6109 if ((error = file_vnode(fd, &vp)) != 0)
6110 return (error);
6111 if ((error = vnode_getwithref(vp)) != 0) {
6112 file_drop(fd);
6113 return(error);
6114 }
6115 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6116
6117 error = chmod_vnode(vfs_context_current(), vp, vap);
6118 (void)vnode_put(vp);
6119 file_drop(fd);
6120
6121 return (error);
6122 }
6123
6124 /*
6125 * fchmod_extended: Change mode of a file given a file descriptor; with
6126 * extended argument list (including extended security (ACL)).
6127 *
6128 * Parameters: p Process requesting to change file mode
6129 * uap User argument descriptor (see below)
6130 * retval (ignored)
6131 *
6132 * Indirect: uap->mode File mode to set (same as 'chmod')
6133 * uap->uid UID to set
6134 * uap->gid GID to set
6135 * uap->xsecurity ACL to set (or delete)
6136 * uap->fd File descriptor of file to change mode
6137 *
6138 * Returns: 0 Success
6139 * !0 errno value
6140 *
6141 */
6142 int
6143 fchmod_extended(proc_t p, struct fchmod_extended_args *uap, __unused int32_t *retval)
6144 {
6145 int error;
6146 struct vnode_attr va;
6147 kauth_filesec_t xsecdst;
6148
6149 AUDIT_ARG(owner, uap->uid, uap->gid);
6150
6151 VATTR_INIT(&va);
6152 if (uap->mode != -1)
6153 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
6154 if (uap->uid != KAUTH_UID_NONE)
6155 VATTR_SET(&va, va_uid, uap->uid);
6156 if (uap->gid != KAUTH_GID_NONE)
6157 VATTR_SET(&va, va_gid, uap->gid);
6158
6159 xsecdst = NULL;
6160 switch(uap->xsecurity) {
6161 case USER_ADDR_NULL:
6162 VATTR_SET(&va, va_acl, NULL);
6163 break;
6164 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
6165 VATTR_SET(&va, va_acl, NULL);
6166 break;
6167 /* not being set */
6168 case CAST_USER_ADDR_T(-1):
6169 break;
6170 default:
6171 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
6172 return(error);
6173 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
6174 }
6175
6176 error = fchmod1(p, uap->fd, &va);
6177
6178
6179 switch(uap->xsecurity) {
6180 case USER_ADDR_NULL:
6181 case CAST_USER_ADDR_T(-1):
6182 break;
6183 default:
6184 if (xsecdst != NULL)
6185 kauth_filesec_free(xsecdst);
6186 }
6187 return(error);
6188 }
6189
6190 int
6191 fchmod(proc_t p, struct fchmod_args *uap, __unused int32_t *retval)
6192 {
6193 struct vnode_attr va;
6194
6195 VATTR_INIT(&va);
6196 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
6197
6198 return(fchmod1(p, uap->fd, &va));
6199 }
6200
6201
6202 /*
6203 * Set ownership given a path name.
6204 */
6205 /* ARGSUSED */
6206 static int
6207 fchownat_internal(vfs_context_t ctx, int fd, user_addr_t path, uid_t uid,
6208 gid_t gid, int flag, enum uio_seg segflg)
6209 {
6210 vnode_t vp;
6211 struct vnode_attr va;
6212 int error;
6213 struct nameidata nd;
6214 int follow;
6215 kauth_action_t action;
6216
6217 AUDIT_ARG(owner, uid, gid);
6218
6219 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
6220 NDINIT(&nd, LOOKUP, OP_SETATTR, follow | AUDITVNPATH1, segflg,
6221 path, ctx);
6222 error = nameiat(&nd, fd);
6223 if (error)
6224 return (error);
6225 vp = nd.ni_vp;
6226
6227 nameidone(&nd);
6228
6229 VATTR_INIT(&va);
6230 if (uid != (uid_t)VNOVAL)
6231 VATTR_SET(&va, va_uid, uid);
6232 if (gid != (gid_t)VNOVAL)
6233 VATTR_SET(&va, va_gid, gid);
6234
6235 #if CONFIG_MACF
6236 error = mac_vnode_check_setowner(ctx, vp, uid, gid);
6237 if (error)
6238 goto out;
6239 #endif
6240
6241 /* preflight and authorize attribute changes */
6242 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
6243 goto out;
6244 if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
6245 goto out;
6246 error = vnode_setattr(vp, &va, ctx);
6247
6248 #if CONFIG_MACF
6249 if (error == 0)
6250 mac_vnode_notify_setowner(ctx, vp, uid, gid);
6251 #endif
6252
6253 out:
6254 /*
6255 * EACCES is only allowed from namei(); permissions failure should
6256 * return EPERM, so we need to translate the error code.
6257 */
6258 if (error == EACCES)
6259 error = EPERM;
6260
6261 vnode_put(vp);
6262 return (error);
6263 }
6264
6265 int
6266 chown(__unused proc_t p, struct chown_args *uap, __unused int32_t *retval)
6267 {
6268 return (fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path,
6269 uap->uid, uap->gid, 0, UIO_USERSPACE));
6270 }
6271
6272 int
6273 lchown(__unused proc_t p, struct lchown_args *uap, __unused int32_t *retval)
6274 {
6275 return (fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path,
6276 uap->owner, uap->group, AT_SYMLINK_NOFOLLOW, UIO_USERSPACE));
6277 }
6278
6279 int
6280 fchownat(__unused proc_t p, struct fchownat_args *uap, __unused int32_t *retval)
6281 {
6282 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
6283 return (EINVAL);
6284
6285 return (fchownat_internal(vfs_context_current(), uap->fd, uap->path,
6286 uap->uid, uap->gid, uap->flag, UIO_USERSPACE));
6287 }
6288
6289 /*
6290 * Set ownership given a file descriptor.
6291 */
6292 /* ARGSUSED */
6293 int
6294 fchown(__unused proc_t p, struct fchown_args *uap, __unused int32_t *retval)
6295 {
6296 struct vnode_attr va;
6297 vfs_context_t ctx = vfs_context_current();
6298 vnode_t vp;
6299 int error;
6300 kauth_action_t action;
6301
6302 AUDIT_ARG(owner, uap->uid, uap->gid);
6303 AUDIT_ARG(fd, uap->fd);
6304
6305 if ( (error = file_vnode(uap->fd, &vp)) )
6306 return (error);
6307
6308 if ( (error = vnode_getwithref(vp)) ) {
6309 file_drop(uap->fd);
6310 return(error);
6311 }
6312 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6313
6314 VATTR_INIT(&va);
6315 if (uap->uid != VNOVAL)
6316 VATTR_SET(&va, va_uid, uap->uid);
6317 if (uap->gid != VNOVAL)
6318 VATTR_SET(&va, va_gid, uap->gid);
6319
6320 #if NAMEDSTREAMS
6321 /* chown calls are not allowed for resource forks. */
6322 if (vp->v_flag & VISNAMEDSTREAM) {
6323 error = EPERM;
6324 goto out;
6325 }
6326 #endif
6327
6328 #if CONFIG_MACF
6329 error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
6330 if (error)
6331 goto out;
6332 #endif
6333
6334 /* preflight and authorize attribute changes */
6335 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
6336 goto out;
6337 if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
6338 if (error == EACCES)
6339 error = EPERM;
6340 goto out;
6341 }
6342 error = vnode_setattr(vp, &va, ctx);
6343
6344 #if CONFIG_MACF
6345 if (error == 0)
6346 mac_vnode_notify_setowner(ctx, vp, uap->uid, uap->gid);
6347 #endif
6348
6349 out:
6350 (void)vnode_put(vp);
6351 file_drop(uap->fd);
6352 return (error);
6353 }
6354
6355 static int
6356 getutimes(user_addr_t usrtvp, struct timespec *tsp)
6357 {
6358 int error;
6359
6360 if (usrtvp == USER_ADDR_NULL) {
6361 struct timeval old_tv;
6362 /* XXX Y2038 bug because of microtime argument */
6363 microtime(&old_tv);
6364 TIMEVAL_TO_TIMESPEC(&old_tv, &tsp[0]);
6365 tsp[1] = tsp[0];
6366 } else {
6367 if (IS_64BIT_PROCESS(current_proc())) {
6368 struct user64_timeval tv[2];
6369 error = copyin(usrtvp, (void *)tv, sizeof(tv));
6370 if (error)
6371 return (error);
6372 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
6373 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
6374 } else {
6375 struct user32_timeval tv[2];
6376 error = copyin(usrtvp, (void *)tv, sizeof(tv));
6377 if (error)
6378 return (error);
6379 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
6380 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
6381 }
6382 }
6383 return 0;
6384 }
6385
6386 static int
6387 setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts,
6388 int nullflag)
6389 {
6390 int error;
6391 struct vnode_attr va;
6392 kauth_action_t action;
6393
6394 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6395
6396 VATTR_INIT(&va);
6397 VATTR_SET(&va, va_access_time, ts[0]);
6398 VATTR_SET(&va, va_modify_time, ts[1]);
6399 if (nullflag)
6400 va.va_vaflags |= VA_UTIMES_NULL;
6401
6402 #if NAMEDSTREAMS
6403 /* utimes calls are not allowed for resource forks. */
6404 if (vp->v_flag & VISNAMEDSTREAM) {
6405 error = EPERM;
6406 goto out;
6407 }
6408 #endif
6409
6410 #if CONFIG_MACF
6411 error = mac_vnode_check_setutimes(ctx, vp, ts[0], ts[1]);
6412 if (error)
6413 goto out;
6414 #endif
6415 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
6416 if (!nullflag && error == EACCES)
6417 error = EPERM;
6418 goto out;
6419 }
6420
6421 /* since we may not need to auth anything, check here */
6422 if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
6423 if (!nullflag && error == EACCES)
6424 error = EPERM;
6425 goto out;
6426 }
6427 error = vnode_setattr(vp, &va, ctx);
6428
6429 #if CONFIG_MACF
6430 if (error == 0)
6431 mac_vnode_notify_setutimes(ctx, vp, ts[0], ts[1]);
6432 #endif
6433
6434 out:
6435 return error;
6436 }
6437
6438 /*
6439 * Set the access and modification times of a file.
6440 */
6441 /* ARGSUSED */
6442 int
6443 utimes(__unused proc_t p, struct utimes_args *uap, __unused int32_t *retval)
6444 {
6445 struct timespec ts[2];
6446 user_addr_t usrtvp;
6447 int error;
6448 struct nameidata nd;
6449 vfs_context_t ctx = vfs_context_current();
6450
6451 /*
6452 * AUDIT: Needed to change the order of operations to do the
6453 * name lookup first because auditing wants the path.
6454 */
6455 NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
6456 UIO_USERSPACE, uap->path, ctx);
6457 error = namei(&nd);
6458 if (error)
6459 return (error);
6460 nameidone(&nd);
6461
6462 /*
6463 * Fetch the user-supplied time. If usrtvp is USER_ADDR_NULL, we fetch
6464 * the current time instead.
6465 */
6466 usrtvp = uap->tptr;
6467 if ((error = getutimes(usrtvp, ts)) != 0)
6468 goto out;
6469
6470 error = setutimes(ctx, nd.ni_vp, ts, usrtvp == USER_ADDR_NULL);
6471
6472 out:
6473 vnode_put(nd.ni_vp);
6474 return (error);
6475 }
6476
6477 /*
6478 * Set the access and modification times of a file.
6479 */
6480 /* ARGSUSED */
6481 int
6482 futimes(__unused proc_t p, struct futimes_args *uap, __unused int32_t *retval)
6483 {
6484 struct timespec ts[2];
6485 vnode_t vp;
6486 user_addr_t usrtvp;
6487 int error;
6488
6489 AUDIT_ARG(fd, uap->fd);
6490 usrtvp = uap->tptr;
6491 if ((error = getutimes(usrtvp, ts)) != 0)
6492 return (error);
6493 if ((error = file_vnode(uap->fd, &vp)) != 0)
6494 return (error);
6495 if((error = vnode_getwithref(vp))) {
6496 file_drop(uap->fd);
6497 return(error);
6498 }
6499
6500 error = setutimes(vfs_context_current(), vp, ts, usrtvp == 0);
6501 vnode_put(vp);
6502 file_drop(uap->fd);
6503 return(error);
6504 }
6505
6506 /*
6507 * Truncate a file given its path name.
6508 */
6509 /* ARGSUSED */
6510 int
6511 truncate(__unused proc_t p, struct truncate_args *uap, __unused int32_t *retval)
6512 {
6513 vnode_t vp;
6514 struct vnode_attr va;
6515 vfs_context_t ctx = vfs_context_current();
6516 int error;
6517 struct nameidata nd;
6518 kauth_action_t action;
6519
6520 if (uap->length < 0)
6521 return(EINVAL);
6522 NDINIT(&nd, LOOKUP, OP_TRUNCATE, FOLLOW | AUDITVNPATH1,
6523 UIO_USERSPACE, uap->path, ctx);
6524 if ((error = namei(&nd)))
6525 return (error);
6526 vp = nd.ni_vp;
6527
6528 nameidone(&nd);
6529
6530 VATTR_INIT(&va);
6531 VATTR_SET(&va, va_data_size, uap->length);
6532
6533 #if CONFIG_MACF
6534 error = mac_vnode_check_truncate(ctx, NOCRED, vp);
6535 if (error)
6536 goto out;
6537 #endif
6538
6539 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
6540 goto out;
6541 if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
6542 goto out;
6543 error = vnode_setattr(vp, &va, ctx);
6544
6545 #if CONFIG_MACF
6546 if (error == 0)
6547 mac_vnode_notify_truncate(ctx, NOCRED, vp);
6548 #endif
6549
6550 out:
6551 vnode_put(vp);
6552 return (error);
6553 }
6554
6555 /*
6556 * Truncate a file given a file descriptor.
6557 */
6558 /* ARGSUSED */
6559 int
6560 ftruncate(proc_t p, struct ftruncate_args *uap, int32_t *retval)
6561 {
6562 vfs_context_t ctx = vfs_context_current();
6563 struct vnode_attr va;
6564 vnode_t vp;
6565 struct fileproc *fp;
6566 int error ;
6567 int fd = uap->fd;
6568
6569 AUDIT_ARG(fd, uap->fd);
6570 if (uap->length < 0)
6571 return(EINVAL);
6572
6573 if ( (error = fp_lookup(p,fd,&fp,0)) ) {
6574 return(error);
6575 }
6576
6577 switch (FILEGLOB_DTYPE(fp->f_fglob)) {
6578 case DTYPE_PSXSHM:
6579 error = pshm_truncate(p, fp, uap->fd, uap->length, retval);
6580 goto out;
6581 case DTYPE_VNODE:
6582 break;
6583 default:
6584 error = EINVAL;
6585 goto out;
6586 }
6587
6588 vp = (vnode_t)fp->f_fglob->fg_data;
6589
6590 if ((fp->f_fglob->fg_flag & FWRITE) == 0) {
6591 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
6592 error = EINVAL;
6593 goto out;
6594 }
6595
6596 if ((error = vnode_getwithref(vp)) != 0) {
6597 goto out;
6598 }
6599
6600 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6601
6602 #if CONFIG_MACF
6603 error = mac_vnode_check_truncate(ctx,
6604 fp->f_fglob->fg_cred, vp);
6605 if (error) {
6606 (void)vnode_put(vp);
6607 goto out;
6608 }
6609 #endif
6610 VATTR_INIT(&va);
6611 VATTR_SET(&va, va_data_size, uap->length);
6612 error = vnode_setattr(vp, &va, ctx);
6613
6614 #if CONFIG_MACF
6615 if (error == 0)
6616 mac_vnode_notify_truncate(ctx, fp->f_fglob->fg_cred, vp);
6617 #endif
6618
6619 (void)vnode_put(vp);
6620 out:
6621 file_drop(fd);
6622 return (error);
6623 }
6624
6625
6626 /*
6627 * Sync an open file with synchronized I/O _file_ integrity completion
6628 */
6629 /* ARGSUSED */
6630 int
6631 fsync(proc_t p, struct fsync_args *uap, __unused int32_t *retval)
6632 {
6633 __pthread_testcancel(1);
6634 return(fsync_common(p, uap, MNT_WAIT));
6635 }
6636
6637
6638 /*
6639 * Sync an open file with synchronized I/O _file_ integrity completion
6640 *
6641 * Notes: This is a legacy support function that does not test for
6642 * thread cancellation points.
6643 */
6644 /* ARGSUSED */
6645 int
6646 fsync_nocancel(proc_t p, struct fsync_nocancel_args *uap, __unused int32_t *retval)
6647 {
6648 return(fsync_common(p, (struct fsync_args *)uap, MNT_WAIT));
6649 }
6650
6651
6652 /*
6653 * Sync an open file with synchronized I/O _data_ integrity completion
6654 */
6655 /* ARGSUSED */
6656 int
6657 fdatasync(proc_t p, struct fdatasync_args *uap, __unused int32_t *retval)
6658 {
6659 __pthread_testcancel(1);
6660 return(fsync_common(p, (struct fsync_args *)uap, MNT_DWAIT));
6661 }
6662
6663
6664 /*
6665 * fsync_common
6666 *
6667 * Common fsync code to support both synchronized I/O file integrity completion
6668 * (normal fsync) and synchronized I/O data integrity completion (fdatasync).
6669 *
6670 * If 'flags' is MNT_DWAIT, the caller is requesting data integrity, which
6671 * will only guarantee that the file data contents are retrievable. If
6672 * 'flags' is MNT_WAIT, the caller is rewuesting file integrity, which also
6673 * includes additional metadata unnecessary for retrieving the file data
6674 * contents, such as atime, mtime, ctime, etc., also be committed to stable
6675 * storage.
6676 *
6677 * Parameters: p The process
6678 * uap->fd The descriptor to synchronize
6679 * flags The data integrity flags
6680 *
6681 * Returns: int Success
6682 * fp_getfvp:EBADF Bad file descriptor
6683 * fp_getfvp:ENOTSUP fd does not refer to a vnode
6684 * VNOP_FSYNC:??? unspecified
6685 *
6686 * Notes: We use struct fsync_args because it is a short name, and all
6687 * caller argument structures are otherwise identical.
6688 */
6689 static int
6690 fsync_common(proc_t p, struct fsync_args *uap, int flags)
6691 {
6692 vnode_t vp;
6693 struct fileproc *fp;
6694 vfs_context_t ctx = vfs_context_current();
6695 int error;
6696
6697 AUDIT_ARG(fd, uap->fd);
6698
6699 if ( (error = fp_getfvp(p, uap->fd, &fp, &vp)) )
6700 return (error);
6701 if ( (error = vnode_getwithref(vp)) ) {
6702 file_drop(uap->fd);
6703 return(error);
6704 }
6705
6706 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6707
6708 error = VNOP_FSYNC(vp, flags, ctx);
6709
6710 #if NAMEDRSRCFORK
6711 /* Sync resource fork shadow file if necessary. */
6712 if ((error == 0) &&
6713 (vp->v_flag & VISNAMEDSTREAM) &&
6714 (vp->v_parent != NULLVP) &&
6715 vnode_isshadow(vp) &&
6716 (fp->f_flags & FP_WRITTEN)) {
6717 (void) vnode_flushnamedstream(vp->v_parent, vp, ctx);
6718 }
6719 #endif
6720
6721 (void)vnode_put(vp);
6722 file_drop(uap->fd);
6723 return (error);
6724 }
6725
6726 /*
6727 * Duplicate files. Source must be a file, target must be a file or
6728 * must not exist.
6729 *
6730 * XXX Copyfile authorisation checking is woefully inadequate, and will not
6731 * perform inheritance correctly.
6732 */
6733 /* ARGSUSED */
6734 int
6735 copyfile(__unused proc_t p, struct copyfile_args *uap, __unused int32_t *retval)
6736 {
6737 vnode_t tvp, fvp, tdvp, sdvp;
6738 struct nameidata fromnd, tond;
6739 int error;
6740 vfs_context_t ctx = vfs_context_current();
6741 #if CONFIG_MACF
6742 struct filedesc *fdp = (vfs_context_proc(ctx))->p_fd;
6743 struct vnode_attr va;
6744 #endif
6745
6746 /* Check that the flags are valid. */
6747
6748 if (uap->flags & ~CPF_MASK) {
6749 return(EINVAL);
6750 }
6751
6752 NDINIT(&fromnd, LOOKUP, OP_COPYFILE, AUDITVNPATH1,
6753 UIO_USERSPACE, uap->from, ctx);
6754 if ((error = namei(&fromnd)))
6755 return (error);
6756 fvp = fromnd.ni_vp;
6757
6758 NDINIT(&tond, CREATE, OP_LINK,
6759 LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNPATH2 | CN_NBMOUNTLOOK,
6760 UIO_USERSPACE, uap->to, ctx);
6761 if ((error = namei(&tond))) {
6762 goto out1;
6763 }
6764 tdvp = tond.ni_dvp;
6765 tvp = tond.ni_vp;
6766
6767 if (tvp != NULL) {
6768 if (!(uap->flags & CPF_OVERWRITE)) {
6769 error = EEXIST;
6770 goto out;
6771 }
6772 }
6773
6774 if (fvp->v_type == VDIR || (tvp && tvp->v_type == VDIR)) {
6775 error = EISDIR;
6776 goto out;
6777 }
6778
6779 /* This calls existing MAC hooks for open */
6780 if ((error = vn_authorize_open_existing(fvp, &fromnd.ni_cnd, FREAD, ctx,
6781 NULL))) {
6782 goto out;
6783 }
6784
6785 if (tvp) {
6786 /*
6787 * See unlinkat_internal for an explanation of the potential
6788 * ENOENT from the MAC hook but the gist is that the MAC hook
6789 * can fail because vn_getpath isn't able to return the full
6790 * path. We choose to ignore this failure.
6791 */
6792 error = vn_authorize_unlink(tdvp, tvp, &tond.ni_cnd, ctx, NULL);
6793 if (error && error != ENOENT)
6794 goto out;
6795 error = 0;
6796 }
6797
6798 #if CONFIG_MACF
6799 VATTR_INIT(&va);
6800 VATTR_SET(&va, va_type, fvp->v_type);
6801 /* Mask off all but regular access permissions */
6802 VATTR_SET(&va, va_mode,
6803 ((((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT) & ACCESSPERMS));
6804 error = mac_vnode_check_create(ctx, tdvp, &tond.ni_cnd, &va);
6805 if (error)
6806 goto out;
6807 #endif /* CONFIG_MACF */
6808
6809 if ((error = vnode_authorize(tdvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
6810 goto out;
6811
6812 if (fvp == tdvp)
6813 error = EINVAL;
6814 /*
6815 * If source is the same as the destination (that is the
6816 * same inode number) then there is nothing to do.
6817 * (fixed to have POSIX semantics - CSM 3/2/98)
6818 */
6819 if (fvp == tvp)
6820 error = -1;
6821 if (!error)
6822 error = VNOP_COPYFILE(fvp, tdvp, tvp, &tond.ni_cnd, uap->mode, uap->flags, ctx);
6823 out:
6824 sdvp = tond.ni_startdir;
6825 /*
6826 * nameidone has to happen before we vnode_put(tdvp)
6827 * since it may need to release the fs_nodelock on the tdvp
6828 */
6829 nameidone(&tond);
6830
6831 if (tvp)
6832 vnode_put(tvp);
6833 vnode_put(tdvp);
6834 vnode_put(sdvp);
6835 out1:
6836 vnode_put(fvp);
6837
6838 nameidone(&fromnd);
6839
6840 if (error == -1)
6841 return (0);
6842 return (error);
6843 }
6844
6845 #define CLONE_SNAPSHOT_FALLBACKS_ENABLED 1
6846
6847 /*
6848 * Helper function for doing clones. The caller is expected to provide an
6849 * iocounted source vnode and release it.
6850 */
6851 static int
6852 clonefile_internal(vnode_t fvp, boolean_t data_read_authorised, int dst_dirfd,
6853 user_addr_t dst, uint32_t flags, vfs_context_t ctx)
6854 {
6855 vnode_t tvp, tdvp;
6856 struct nameidata tond;
6857 int error;
6858 int follow;
6859 boolean_t free_src_acl;
6860 boolean_t attr_cleanup;
6861 enum vtype v_type;
6862 kauth_action_t action;
6863 struct componentname *cnp;
6864 uint32_t defaulted;
6865 struct vnode_attr va;
6866 struct vnode_attr nva;
6867
6868 v_type = vnode_vtype(fvp);
6869 switch (v_type) {
6870 case VLNK:
6871 /* FALLTHRU */
6872 case VREG:
6873 action = KAUTH_VNODE_ADD_FILE;
6874 break;
6875 case VDIR:
6876 if (vnode_isvroot(fvp) || vnode_ismount(fvp) ||
6877 fvp->v_mountedhere) {
6878 return (EINVAL);
6879 }
6880 action = KAUTH_VNODE_ADD_SUBDIRECTORY;
6881 break;
6882 default:
6883 return (EINVAL);
6884 }
6885
6886 AUDIT_ARG(fd2, dst_dirfd);
6887 AUDIT_ARG(value32, flags);
6888
6889 follow = (flags & CLONE_NOFOLLOW) ? NOFOLLOW : FOLLOW;
6890 NDINIT(&tond, CREATE, OP_LINK, follow | WANTPARENT | AUDITVNPATH2,
6891 UIO_USERSPACE, dst, ctx);
6892 if ((error = nameiat(&tond, dst_dirfd)))
6893 return (error);
6894 cnp = &tond.ni_cnd;
6895 tdvp = tond.ni_dvp;
6896 tvp = tond.ni_vp;
6897
6898 free_src_acl = FALSE;
6899 attr_cleanup = FALSE;
6900
6901 if (tvp != NULL) {
6902 error = EEXIST;
6903 goto out;
6904 }
6905
6906 if (vnode_mount(tdvp) != vnode_mount(fvp)) {
6907 error = EXDEV;
6908 goto out;
6909 }
6910
6911 #if CONFIG_MACF
6912 if ((error = mac_vnode_check_clone(ctx, tdvp, fvp, cnp)))
6913 goto out;
6914 #endif
6915 if ((error = vnode_authorize(tdvp, NULL, action, ctx)))
6916 goto out;
6917
6918 action = KAUTH_VNODE_GENERIC_READ_BITS;
6919 if (data_read_authorised)
6920 action &= ~KAUTH_VNODE_READ_DATA;
6921 if ((error = vnode_authorize(fvp, NULL, action, ctx)))
6922 goto out;
6923
6924 /*
6925 * certain attributes may need to be changed from the source, we ask for
6926 * those here.
6927 */
6928 VATTR_INIT(&va);
6929 VATTR_WANTED(&va, va_uid);
6930 VATTR_WANTED(&va, va_gid);
6931 VATTR_WANTED(&va, va_mode);
6932 VATTR_WANTED(&va, va_flags);
6933 VATTR_WANTED(&va, va_acl);
6934
6935 if ((error = vnode_getattr(fvp, &va, ctx)) != 0)
6936 goto out;
6937
6938 VATTR_INIT(&nva);
6939 VATTR_SET(&nva, va_type, v_type);
6940 if (VATTR_IS_SUPPORTED(&va, va_acl) && va.va_acl != NULL) {
6941 VATTR_SET(&nva, va_acl, va.va_acl);
6942 free_src_acl = TRUE;
6943 }
6944
6945 /* Handle ACL inheritance, initialize vap. */
6946 if (v_type == VLNK) {
6947 error = vnode_authattr_new(tdvp, &nva, 0, ctx);
6948 } else {
6949 error = vn_attribute_prepare(tdvp, &nva, &defaulted, ctx);
6950 if (error)
6951 goto out;
6952 attr_cleanup = TRUE;
6953 }
6954
6955 /*
6956 * We've got initial values for all security parameters,
6957 * If we are superuser, then we can change owners to be the
6958 * same as the source. Both superuser and the owner have default
6959 * WRITE_SECURITY privileges so all other fields can be taken
6960 * from source as well.
6961 */
6962 if (vfs_context_issuser(ctx)) {
6963 if (VATTR_IS_SUPPORTED(&va, va_uid))
6964 VATTR_SET(&nva, va_uid, va.va_uid);
6965 if (VATTR_IS_SUPPORTED(&va, va_gid))
6966 VATTR_SET(&nva, va_gid, va.va_gid);
6967 }
6968 if (VATTR_IS_SUPPORTED(&va, va_mode))
6969 VATTR_SET(&nva, va_mode, va.va_mode);
6970 if (VATTR_IS_SUPPORTED(&va, va_flags)) {
6971 VATTR_SET(&nva, va_flags,
6972 ((va.va_flags & ~SF_RESTRICTED) | /* Turn off from source */
6973 (nva.va_flags & SF_RESTRICTED)));
6974 }
6975
6976 error = VNOP_CLONEFILE(fvp, tdvp, &tvp, cnp, &nva,
6977 VNODE_CLONEFILE_DEFAULT, ctx);
6978
6979 if (!error && tvp) {
6980 int update_flags = 0;
6981 #if CONFIG_FSE
6982 int fsevent;
6983 #endif /* CONFIG_FSE */
6984
6985 #if CONFIG_MACF
6986 (void)vnode_label(vnode_mount(tvp), tdvp, tvp, cnp,
6987 VNODE_LABEL_CREATE, ctx);
6988 #endif
6989 /*
6990 * If some of the requested attributes weren't handled by the
6991 * VNOP, use our fallback code.
6992 */
6993 if (!VATTR_ALL_SUPPORTED(&va))
6994 (void)vnode_setattr_fallback(tvp, &nva, ctx);
6995
6996 // Make sure the name & parent pointers are hooked up
6997 if (tvp->v_name == NULL)
6998 update_flags |= VNODE_UPDATE_NAME;
6999 if (tvp->v_parent == NULLVP)
7000 update_flags |= VNODE_UPDATE_PARENT;
7001
7002 if (update_flags) {
7003 (void)vnode_update_identity(tvp, tdvp, cnp->cn_nameptr,
7004 cnp->cn_namelen, cnp->cn_hash, update_flags);
7005 }
7006
7007 #if CONFIG_FSE
7008 switch (vnode_vtype(tvp)) {
7009 case VLNK:
7010 /* FALLTHRU */
7011 case VREG:
7012 fsevent = FSE_CREATE_FILE;
7013 break;
7014 case VDIR:
7015 fsevent = FSE_CREATE_DIR;
7016 break;
7017 default:
7018 goto out;
7019 }
7020
7021 if (need_fsevent(fsevent, tvp)) {
7022 add_fsevent(fsevent, ctx, FSE_ARG_VNODE, tvp,
7023 FSE_ARG_DONE);
7024 }
7025 #endif /* CONFIG_FSE */
7026 }
7027
7028 out:
7029 if (attr_cleanup)
7030 vn_attribute_cleanup(&nva, defaulted);
7031 if (free_src_acl && va.va_acl)
7032 kauth_acl_free(va.va_acl);
7033 nameidone(&tond);
7034 if (tvp)
7035 vnode_put(tvp);
7036 vnode_put(tdvp);
7037 return (error);
7038 }
7039
7040 /*
7041 * clone files or directories, target must not exist.
7042 */
7043 /* ARGSUSED */
7044 int
7045 clonefileat(__unused proc_t p, struct clonefileat_args *uap,
7046 __unused int32_t *retval)
7047 {
7048 vnode_t fvp;
7049 struct nameidata fromnd;
7050 int follow;
7051 int error;
7052 vfs_context_t ctx = vfs_context_current();
7053
7054 /* Check that the flags are valid. */
7055 if (uap->flags & ~CLONE_NOFOLLOW)
7056 return (EINVAL);
7057
7058 AUDIT_ARG(fd, uap->src_dirfd);
7059
7060 follow = (uap->flags & CLONE_NOFOLLOW) ? NOFOLLOW : FOLLOW;
7061 NDINIT(&fromnd, LOOKUP, OP_COPYFILE, follow | AUDITVNPATH1,
7062 UIO_USERSPACE, uap->src, ctx);
7063 if ((error = nameiat(&fromnd, uap->src_dirfd)))
7064 return (error);
7065
7066 fvp = fromnd.ni_vp;
7067 nameidone(&fromnd);
7068
7069 error = clonefile_internal(fvp, FALSE, uap->dst_dirfd, uap->dst,
7070 uap->flags, ctx);
7071
7072 vnode_put(fvp);
7073 return (error);
7074 }
7075
7076 int
7077 fclonefileat(__unused proc_t p, struct fclonefileat_args *uap,
7078 __unused int32_t *retval)
7079 {
7080 vnode_t fvp;
7081 struct fileproc *fp;
7082 int error;
7083 vfs_context_t ctx = vfs_context_current();
7084
7085 AUDIT_ARG(fd, uap->src_fd);
7086 error = fp_getfvp(p, uap->src_fd, &fp, &fvp);
7087 if (error)
7088 return (error);
7089
7090 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
7091 AUDIT_ARG(vnpath_withref, fvp, ARG_VNODE1);
7092 error = EBADF;
7093 goto out;
7094 }
7095
7096 if ((error = vnode_getwithref(fvp)))
7097 goto out;
7098
7099 AUDIT_ARG(vnpath, fvp, ARG_VNODE1);
7100
7101 error = clonefile_internal(fvp, TRUE, uap->dst_dirfd, uap->dst,
7102 uap->flags, ctx);
7103
7104 vnode_put(fvp);
7105 out:
7106 file_drop(uap->src_fd);
7107 return (error);
7108 }
7109
7110 /*
7111 * Rename files. Source and destination must either both be directories,
7112 * or both not be directories. If target is a directory, it must be empty.
7113 */
7114 /* ARGSUSED */
7115 static int
7116 renameat_internal(vfs_context_t ctx, int fromfd, user_addr_t from,
7117 int tofd, user_addr_t to, int segflg, vfs_rename_flags_t flags)
7118 {
7119 if (flags & ~VFS_RENAME_FLAGS_MASK)
7120 return EINVAL;
7121
7122 if (ISSET(flags, VFS_RENAME_SWAP) && ISSET(flags, VFS_RENAME_EXCL))
7123 return EINVAL;
7124
7125 vnode_t tvp, tdvp;
7126 vnode_t fvp, fdvp;
7127 struct nameidata *fromnd, *tond;
7128 int error;
7129 int do_retry;
7130 int retry_count;
7131 int mntrename;
7132 int need_event;
7133 const char *oname = NULL;
7134 char *from_name = NULL, *to_name = NULL;
7135 int from_len=0, to_len=0;
7136 int holding_mntlock;
7137 mount_t locked_mp = NULL;
7138 vnode_t oparent = NULLVP;
7139 #if CONFIG_FSE
7140 fse_info from_finfo, to_finfo;
7141 #endif
7142 int from_truncated=0, to_truncated;
7143 int batched = 0;
7144 struct vnode_attr *fvap, *tvap;
7145 int continuing = 0;
7146 /* carving out a chunk for structs that are too big to be on stack. */
7147 struct {
7148 struct nameidata from_node, to_node;
7149 struct vnode_attr fv_attr, tv_attr;
7150 } * __rename_data;
7151 MALLOC(__rename_data, void *, sizeof(*__rename_data), M_TEMP, M_WAITOK);
7152 fromnd = &__rename_data->from_node;
7153 tond = &__rename_data->to_node;
7154
7155 holding_mntlock = 0;
7156 do_retry = 0;
7157 retry_count = 0;
7158 retry:
7159 fvp = tvp = NULL;
7160 fdvp = tdvp = NULL;
7161 fvap = tvap = NULL;
7162 mntrename = FALSE;
7163
7164 NDINIT(fromnd, DELETE, OP_UNLINK, WANTPARENT | AUDITVNPATH1,
7165 segflg, from, ctx);
7166 fromnd->ni_flag = NAMEI_COMPOUNDRENAME;
7167
7168 NDINIT(tond, RENAME, OP_RENAME, WANTPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK,
7169 segflg, to, ctx);
7170 tond->ni_flag = NAMEI_COMPOUNDRENAME;
7171
7172 continue_lookup:
7173 if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
7174 if ( (error = nameiat(fromnd, fromfd)) )
7175 goto out1;
7176 fdvp = fromnd->ni_dvp;
7177 fvp = fromnd->ni_vp;
7178
7179 if (fvp && fvp->v_type == VDIR)
7180 tond->ni_cnd.cn_flags |= WILLBEDIR;
7181 }
7182
7183 if ((tond->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
7184 if ( (error = nameiat(tond, tofd)) ) {
7185 /*
7186 * Translate error code for rename("dir1", "dir2/.").
7187 */
7188 if (error == EISDIR && fvp->v_type == VDIR)
7189 error = EINVAL;
7190 goto out1;
7191 }
7192 tdvp = tond->ni_dvp;
7193 tvp = tond->ni_vp;
7194 }
7195
7196 #if DEVELOPMENT || DEBUG
7197 /*
7198 * XXX VSWAP: Check for entitlements or special flag here
7199 * so we can restrict access appropriately.
7200 */
7201 #else /* DEVELOPMENT || DEBUG */
7202
7203 if (fromnd->ni_vp && vnode_isswap(fromnd->ni_vp) && (ctx != vfs_context_kernel())) {
7204 error = EPERM;
7205 goto out1;
7206 }
7207
7208 if (tond->ni_vp && vnode_isswap(tond->ni_vp) && (ctx != vfs_context_kernel())) {
7209 error = EPERM;
7210 goto out1;
7211 }
7212 #endif /* DEVELOPMENT || DEBUG */
7213
7214 if (!tvp && ISSET(flags, VFS_RENAME_SWAP)) {
7215 error = ENOENT;
7216 goto out1;
7217 }
7218
7219 if (tvp && ISSET(flags, VFS_RENAME_EXCL)) {
7220 error = EEXIST;
7221 goto out1;
7222 }
7223
7224 batched = vnode_compound_rename_available(fdvp);
7225 if (!fvp) {
7226 /*
7227 * Claim: this check will never reject a valid rename.
7228 * For success, either fvp must be on the same mount as tdvp, or fvp must sit atop a vnode on the same mount as tdvp.
7229 * Suppose fdvp and tdvp are not on the same mount.
7230 * If fvp is on the same mount as tdvp, then fvp is not on the same mount as fdvp, so fvp is the root of its filesystem. If fvp is the root,
7231 * then you can't move it to within another dir on the same mountpoint.
7232 * If fvp sits atop a vnode on the same mount as fdvp, then that vnode must be part of the same mount as fdvp, which is a contradiction.
7233 *
7234 * If this check passes, then we are safe to pass these vnodes to the same FS.
7235 */
7236 if (fdvp->v_mount != tdvp->v_mount) {
7237 error = EXDEV;
7238 goto out1;
7239 }
7240 goto skipped_lookup;
7241 }
7242
7243 if (!batched) {
7244 error = vn_authorize_renamex(fdvp, fvp, &fromnd->ni_cnd, tdvp, tvp, &tond->ni_cnd, ctx, flags, NULL);
7245 if (error) {
7246 if (error == ENOENT) {
7247 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
7248 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
7249 /*
7250 * We encountered a race where after doing the namei, tvp stops
7251 * being valid. If so, simply re-drive the rename call from the
7252 * top.
7253 */
7254 do_retry = 1;
7255 retry_count += 1;
7256 }
7257 }
7258 goto out1;
7259 }
7260 }
7261
7262 /*
7263 * If the source and destination are the same (i.e. they're
7264 * links to the same vnode) and the target file system is
7265 * case sensitive, then there is nothing to do.
7266 *
7267 * XXX Come back to this.
7268 */
7269 if (fvp == tvp) {
7270 int pathconf_val;
7271
7272 /*
7273 * Note: if _PC_CASE_SENSITIVE selector isn't supported,
7274 * then assume that this file system is case sensitive.
7275 */
7276 if (VNOP_PATHCONF(fvp, _PC_CASE_SENSITIVE, &pathconf_val, ctx) != 0 ||
7277 pathconf_val != 0) {
7278 goto out1;
7279 }
7280 }
7281
7282 /*
7283 * Allow the renaming of mount points.
7284 * - target must not exist
7285 * - target must reside in the same directory as source
7286 * - union mounts cannot be renamed
7287 * - "/" cannot be renamed
7288 *
7289 * XXX Handle this in VFS after a continued lookup (if we missed
7290 * in the cache to start off)
7291 *
7292 * N.B. If RENAME_SWAP is being used, then @tvp != NULL and so
7293 * we'll skip past here. The file system is responsible for
7294 * checking that @tvp is not a descendent of @fvp and vice versa
7295 * so it should always return EINVAL if either @tvp or @fvp is the
7296 * root of a volume.
7297 */
7298 if ((fvp->v_flag & VROOT) &&
7299 (fvp->v_type == VDIR) &&
7300 (tvp == NULL) &&
7301 (fvp->v_mountedhere == NULL) &&
7302 (fdvp == tdvp) &&
7303 ((fvp->v_mount->mnt_flag & (MNT_UNION | MNT_ROOTFS)) == 0) &&
7304 (fvp->v_mount->mnt_vnodecovered != NULLVP)) {
7305 vnode_t coveredvp;
7306
7307 /* switch fvp to the covered vnode */
7308 coveredvp = fvp->v_mount->mnt_vnodecovered;
7309 if ( (vnode_getwithref(coveredvp)) ) {
7310 error = ENOENT;
7311 goto out1;
7312 }
7313 vnode_put(fvp);
7314
7315 fvp = coveredvp;
7316 mntrename = TRUE;
7317 }
7318 /*
7319 * Check for cross-device rename.
7320 */
7321 if ((fvp->v_mount != tdvp->v_mount) ||
7322 (tvp && (fvp->v_mount != tvp->v_mount))) {
7323 error = EXDEV;
7324 goto out1;
7325 }
7326
7327 /*
7328 * If source is the same as the destination (that is the
7329 * same inode number) then there is nothing to do...
7330 * EXCEPT if the underlying file system supports case
7331 * insensitivity and is case preserving. In this case
7332 * the file system needs to handle the special case of
7333 * getting the same vnode as target (fvp) and source (tvp).
7334 *
7335 * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
7336 * and _PC_CASE_PRESERVING can have this exception, and they need to
7337 * handle the special case of getting the same vnode as target and
7338 * source. NOTE: Then the target is unlocked going into vnop_rename,
7339 * so not to cause locking problems. There is a single reference on tvp.
7340 *
7341 * NOTE - that fvp == tvp also occurs if they are hard linked and
7342 * that correct behaviour then is just to return success without doing
7343 * anything.
7344 *
7345 * XXX filesystem should take care of this itself, perhaps...
7346 */
7347 if (fvp == tvp && fdvp == tdvp) {
7348 if (fromnd->ni_cnd.cn_namelen == tond->ni_cnd.cn_namelen &&
7349 !bcmp(fromnd->ni_cnd.cn_nameptr, tond->ni_cnd.cn_nameptr,
7350 fromnd->ni_cnd.cn_namelen)) {
7351 goto out1;
7352 }
7353 }
7354
7355 if (holding_mntlock && fvp->v_mount != locked_mp) {
7356 /*
7357 * we're holding a reference and lock
7358 * on locked_mp, but it no longer matches
7359 * what we want to do... so drop our hold
7360 */
7361 mount_unlock_renames(locked_mp);
7362 mount_drop(locked_mp, 0);
7363 holding_mntlock = 0;
7364 }
7365 if (tdvp != fdvp && fvp->v_type == VDIR) {
7366 /*
7367 * serialize renames that re-shape
7368 * the tree... if holding_mntlock is
7369 * set, then we're ready to go...
7370 * otherwise we
7371 * first need to drop the iocounts
7372 * we picked up, second take the
7373 * lock to serialize the access,
7374 * then finally start the lookup
7375 * process over with the lock held
7376 */
7377 if (!holding_mntlock) {
7378 /*
7379 * need to grab a reference on
7380 * the mount point before we
7381 * drop all the iocounts... once
7382 * the iocounts are gone, the mount
7383 * could follow
7384 */
7385 locked_mp = fvp->v_mount;
7386 mount_ref(locked_mp, 0);
7387
7388 /*
7389 * nameidone has to happen before we vnode_put(tvp)
7390 * since it may need to release the fs_nodelock on the tvp
7391 */
7392 nameidone(tond);
7393
7394 if (tvp)
7395 vnode_put(tvp);
7396 vnode_put(tdvp);
7397
7398 /*
7399 * nameidone has to happen before we vnode_put(fdvp)
7400 * since it may need to release the fs_nodelock on the fvp
7401 */
7402 nameidone(fromnd);
7403
7404 vnode_put(fvp);
7405 vnode_put(fdvp);
7406
7407 mount_lock_renames(locked_mp);
7408 holding_mntlock = 1;
7409
7410 goto retry;
7411 }
7412 } else {
7413 /*
7414 * when we dropped the iocounts to take
7415 * the lock, we allowed the identity of
7416 * the various vnodes to change... if they did,
7417 * we may no longer be dealing with a rename
7418 * that reshapes the tree... once we're holding
7419 * the iocounts, the vnodes can't change type
7420 * so we're free to drop the lock at this point
7421 * and continue on
7422 */
7423 if (holding_mntlock) {
7424 mount_unlock_renames(locked_mp);
7425 mount_drop(locked_mp, 0);
7426 holding_mntlock = 0;
7427 }
7428 }
7429
7430 // save these off so we can later verify that fvp is the same
7431 oname = fvp->v_name;
7432 oparent = fvp->v_parent;
7433
7434 skipped_lookup:
7435 #if CONFIG_FSE
7436 need_event = need_fsevent(FSE_RENAME, fdvp);
7437 if (need_event) {
7438 if (fvp) {
7439 get_fse_info(fvp, &from_finfo, ctx);
7440 } else {
7441 error = vfs_get_notify_attributes(&__rename_data->fv_attr);
7442 if (error) {
7443 goto out1;
7444 }
7445
7446 fvap = &__rename_data->fv_attr;
7447 }
7448
7449 if (tvp) {
7450 get_fse_info(tvp, &to_finfo, ctx);
7451 } else if (batched) {
7452 error = vfs_get_notify_attributes(&__rename_data->tv_attr);
7453 if (error) {
7454 goto out1;
7455 }
7456
7457 tvap = &__rename_data->tv_attr;
7458 }
7459 }
7460 #else
7461 need_event = 0;
7462 #endif /* CONFIG_FSE */
7463
7464 if (need_event || kauth_authorize_fileop_has_listeners()) {
7465 if (from_name == NULL) {
7466 GET_PATH(from_name);
7467 if (from_name == NULL) {
7468 error = ENOMEM;
7469 goto out1;
7470 }
7471 }
7472
7473 from_len = safe_getpath(fdvp, fromnd->ni_cnd.cn_nameptr, from_name, MAXPATHLEN, &from_truncated);
7474
7475 if (to_name == NULL) {
7476 GET_PATH(to_name);
7477 if (to_name == NULL) {
7478 error = ENOMEM;
7479 goto out1;
7480 }
7481 }
7482
7483 to_len = safe_getpath(tdvp, tond->ni_cnd.cn_nameptr, to_name, MAXPATHLEN, &to_truncated);
7484 }
7485 error = vn_rename(fdvp, &fvp, &fromnd->ni_cnd, fvap,
7486 tdvp, &tvp, &tond->ni_cnd, tvap,
7487 flags, ctx);
7488
7489 if (holding_mntlock) {
7490 /*
7491 * we can drop our serialization
7492 * lock now
7493 */
7494 mount_unlock_renames(locked_mp);
7495 mount_drop(locked_mp, 0);
7496 holding_mntlock = 0;
7497 }
7498 if (error) {
7499 if (error == EKEEPLOOKING) {
7500 if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) == 0) {
7501 if ((tond->ni_flag & NAMEI_CONTLOOKUP) == 0) {
7502 panic("EKEEPLOOKING without NAMEI_CONTLOOKUP on either ndp?");
7503 }
7504 }
7505
7506 fromnd->ni_vp = fvp;
7507 tond->ni_vp = tvp;
7508
7509 goto continue_lookup;
7510 }
7511
7512 /*
7513 * We may encounter a race in the VNOP where the destination didn't
7514 * exist when we did the namei, but it does by the time we go and
7515 * try to create the entry. In this case, we should re-drive this rename
7516 * call from the top again. Currently, only HFS bubbles out ERECYCLE,
7517 * but other filesystems susceptible to this race could return it, too.
7518 */
7519 if (error == ERECYCLE) {
7520 do_retry = 1;
7521 }
7522
7523 /*
7524 * For compound VNOPs, the authorization callback may return
7525 * ENOENT in case of racing hardlink lookups hitting the name
7526 * cache, redrive the lookup.
7527 */
7528 if (batched && error == ENOENT) {
7529 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
7530 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
7531 do_retry = 1;
7532 retry_count += 1;
7533 }
7534 }
7535
7536 goto out1;
7537 }
7538
7539 /* call out to allow 3rd party notification of rename.
7540 * Ignore result of kauth_authorize_fileop call.
7541 */
7542 kauth_authorize_fileop(vfs_context_ucred(ctx),
7543 KAUTH_FILEOP_RENAME,
7544 (uintptr_t)from_name, (uintptr_t)to_name);
7545 if (flags & VFS_RENAME_SWAP) {
7546 kauth_authorize_fileop(vfs_context_ucred(ctx),
7547 KAUTH_FILEOP_RENAME,
7548 (uintptr_t)to_name, (uintptr_t)from_name);
7549 }
7550
7551 #if CONFIG_FSE
7552 if (from_name != NULL && to_name != NULL) {
7553 if (from_truncated || to_truncated) {
7554 // set it here since only the from_finfo gets reported up to user space
7555 from_finfo.mode |= FSE_TRUNCATED_PATH;
7556 }
7557
7558 if (tvap && tvp) {
7559 vnode_get_fse_info_from_vap(tvp, &to_finfo, tvap);
7560 }
7561 if (fvap) {
7562 vnode_get_fse_info_from_vap(fvp, &from_finfo, fvap);
7563 }
7564
7565 if (tvp) {
7566 add_fsevent(FSE_RENAME, ctx,
7567 FSE_ARG_STRING, from_len, from_name,
7568 FSE_ARG_FINFO, &from_finfo,
7569 FSE_ARG_STRING, to_len, to_name,
7570 FSE_ARG_FINFO, &to_finfo,
7571 FSE_ARG_DONE);
7572 if (flags & VFS_RENAME_SWAP) {
7573 /*
7574 * Strictly speaking, swap is the equivalent of
7575 * *three* renames. FSEvents clients should only take
7576 * the events as a hint, so we only bother reporting
7577 * two.
7578 */
7579 add_fsevent(FSE_RENAME, ctx,
7580 FSE_ARG_STRING, to_len, to_name,
7581 FSE_ARG_FINFO, &to_finfo,
7582 FSE_ARG_STRING, from_len, from_name,
7583 FSE_ARG_FINFO, &from_finfo,
7584 FSE_ARG_DONE);
7585 }
7586 } else {
7587 add_fsevent(FSE_RENAME, ctx,
7588 FSE_ARG_STRING, from_len, from_name,
7589 FSE_ARG_FINFO, &from_finfo,
7590 FSE_ARG_STRING, to_len, to_name,
7591 FSE_ARG_DONE);
7592 }
7593 }
7594 #endif /* CONFIG_FSE */
7595
7596 /*
7597 * update filesystem's mount point data
7598 */
7599 if (mntrename) {
7600 char *cp, *pathend, *mpname;
7601 char * tobuf;
7602 struct mount *mp;
7603 int maxlen;
7604 size_t len = 0;
7605
7606 mp = fvp->v_mountedhere;
7607
7608 if (vfs_busy(mp, LK_NOWAIT)) {
7609 error = EBUSY;
7610 goto out1;
7611 }
7612 MALLOC_ZONE(tobuf, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
7613
7614 if (UIO_SEG_IS_USER_SPACE(segflg))
7615 error = copyinstr(to, tobuf, MAXPATHLEN, &len);
7616 else
7617 error = copystr((void *)to, tobuf, MAXPATHLEN, &len);
7618 if (!error) {
7619 /* find current mount point prefix */
7620 pathend = &mp->mnt_vfsstat.f_mntonname[0];
7621 for (cp = pathend; *cp != '\0'; ++cp) {
7622 if (*cp == '/')
7623 pathend = cp + 1;
7624 }
7625 /* find last component of target name */
7626 for (mpname = cp = tobuf; *cp != '\0'; ++cp) {
7627 if (*cp == '/')
7628 mpname = cp + 1;
7629 }
7630 /* append name to prefix */
7631 maxlen = MAXPATHLEN - (pathend - mp->mnt_vfsstat.f_mntonname);
7632 bzero(pathend, maxlen);
7633 strlcpy(pathend, mpname, maxlen);
7634 }
7635 FREE_ZONE(tobuf, MAXPATHLEN, M_NAMEI);
7636
7637 vfs_unbusy(mp);
7638 }
7639 /*
7640 * fix up name & parent pointers. note that we first
7641 * check that fvp has the same name/parent pointers it
7642 * had before the rename call... this is a 'weak' check
7643 * at best...
7644 *
7645 * XXX oparent and oname may not be set in the compound vnop case
7646 */
7647 if (batched || (oname == fvp->v_name && oparent == fvp->v_parent)) {
7648 int update_flags;
7649
7650 update_flags = VNODE_UPDATE_NAME;
7651
7652 if (fdvp != tdvp)
7653 update_flags |= VNODE_UPDATE_PARENT;
7654
7655 vnode_update_identity(fvp, tdvp, tond->ni_cnd.cn_nameptr, tond->ni_cnd.cn_namelen, tond->ni_cnd.cn_hash, update_flags);
7656 }
7657 out1:
7658 if (to_name != NULL) {
7659 RELEASE_PATH(to_name);
7660 to_name = NULL;
7661 }
7662 if (from_name != NULL) {
7663 RELEASE_PATH(from_name);
7664 from_name = NULL;
7665 }
7666 if (holding_mntlock) {
7667 mount_unlock_renames(locked_mp);
7668 mount_drop(locked_mp, 0);
7669 holding_mntlock = 0;
7670 }
7671 if (tdvp) {
7672 /*
7673 * nameidone has to happen before we vnode_put(tdvp)
7674 * since it may need to release the fs_nodelock on the tdvp
7675 */
7676 nameidone(tond);
7677
7678 if (tvp)
7679 vnode_put(tvp);
7680 vnode_put(tdvp);
7681 }
7682 if (fdvp) {
7683 /*
7684 * nameidone has to happen before we vnode_put(fdvp)
7685 * since it may need to release the fs_nodelock on the fdvp
7686 */
7687 nameidone(fromnd);
7688
7689 if (fvp)
7690 vnode_put(fvp);
7691 vnode_put(fdvp);
7692 }
7693
7694 /*
7695 * If things changed after we did the namei, then we will re-drive
7696 * this rename call from the top.
7697 */
7698 if (do_retry) {
7699 do_retry = 0;
7700 goto retry;
7701 }
7702
7703 FREE(__rename_data, M_TEMP);
7704 return (error);
7705 }
7706
7707 int
7708 rename(__unused proc_t p, struct rename_args *uap, __unused int32_t *retval)
7709 {
7710 return (renameat_internal(vfs_context_current(), AT_FDCWD, uap->from,
7711 AT_FDCWD, uap->to, UIO_USERSPACE, 0));
7712 }
7713
7714 int renameatx_np(__unused proc_t p, struct renameatx_np_args *uap, __unused int32_t *retval)
7715 {
7716 return renameat_internal(
7717 vfs_context_current(),
7718 uap->fromfd, uap->from,
7719 uap->tofd, uap->to,
7720 UIO_USERSPACE, uap->flags);
7721 }
7722
7723 int
7724 renameat(__unused proc_t p, struct renameat_args *uap, __unused int32_t *retval)
7725 {
7726 return (renameat_internal(vfs_context_current(), uap->fromfd, uap->from,
7727 uap->tofd, uap->to, UIO_USERSPACE, 0));
7728 }
7729
7730 /*
7731 * Make a directory file.
7732 *
7733 * Returns: 0 Success
7734 * EEXIST
7735 * namei:???
7736 * vnode_authorize:???
7737 * vn_create:???
7738 */
7739 /* ARGSUSED */
7740 static int
7741 mkdir1at(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap, int fd,
7742 enum uio_seg segflg)
7743 {
7744 vnode_t vp, dvp;
7745 int error;
7746 int update_flags = 0;
7747 int batched;
7748 struct nameidata nd;
7749
7750 AUDIT_ARG(mode, vap->va_mode);
7751 NDINIT(&nd, CREATE, OP_MKDIR, LOCKPARENT | AUDITVNPATH1, segflg,
7752 path, ctx);
7753 nd.ni_cnd.cn_flags |= WILLBEDIR;
7754 nd.ni_flag = NAMEI_COMPOUNDMKDIR;
7755
7756 continue_lookup:
7757 error = nameiat(&nd, fd);
7758 if (error)
7759 return (error);
7760 dvp = nd.ni_dvp;
7761 vp = nd.ni_vp;
7762
7763 if (vp != NULL) {
7764 error = EEXIST;
7765 goto out;
7766 }
7767
7768 batched = vnode_compound_mkdir_available(dvp);
7769
7770 VATTR_SET(vap, va_type, VDIR);
7771
7772 /*
7773 * XXX
7774 * Don't authorize in VFS for compound VNOP.... mkdir -p today assumes that it will
7775 * only get EXISTS or EISDIR for existing path components, and not that it could see
7776 * EACCESS/EPERM--so if we authorize for mkdir on "/" for "mkdir -p /tmp/foo/bar/baz"
7777 * it will fail in a spurious manner. Need to figure out if this is valid behavior.
7778 */
7779 if ((error = vn_authorize_mkdir(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0) {
7780 if (error == EACCES || error == EPERM) {
7781 int error2;
7782
7783 nameidone(&nd);
7784 vnode_put(dvp);
7785 dvp = NULLVP;
7786
7787 /*
7788 * Try a lookup without "NAMEI_COMPOUNDVNOP" to make sure we return EEXIST
7789 * rather than EACCESS if the target exists.
7790 */
7791 NDINIT(&nd, LOOKUP, OP_MKDIR, AUDITVNPATH1, segflg,
7792 path, ctx);
7793 error2 = nameiat(&nd, fd);
7794 if (error2) {
7795 goto out;
7796 } else {
7797 vp = nd.ni_vp;
7798 error = EEXIST;
7799 goto out;
7800 }
7801 }
7802
7803 goto out;
7804 }
7805
7806 /*
7807 * make the directory
7808 */
7809 if ((error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx)) != 0) {
7810 if (error == EKEEPLOOKING) {
7811 nd.ni_vp = vp;
7812 goto continue_lookup;
7813 }
7814
7815 goto out;
7816 }
7817
7818 // Make sure the name & parent pointers are hooked up
7819 if (vp->v_name == NULL)
7820 update_flags |= VNODE_UPDATE_NAME;
7821 if (vp->v_parent == NULLVP)
7822 update_flags |= VNODE_UPDATE_PARENT;
7823
7824 if (update_flags)
7825 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
7826
7827 #if CONFIG_FSE
7828 add_fsevent(FSE_CREATE_DIR, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE);
7829 #endif
7830
7831 out:
7832 /*
7833 * nameidone has to happen before we vnode_put(dvp)
7834 * since it may need to release the fs_nodelock on the dvp
7835 */
7836 nameidone(&nd);
7837
7838 if (vp)
7839 vnode_put(vp);
7840 if (dvp)
7841 vnode_put(dvp);
7842
7843 return (error);
7844 }
7845
7846 /*
7847 * mkdir_extended: Create a directory; with extended security (ACL).
7848 *
7849 * Parameters: p Process requesting to create the directory
7850 * uap User argument descriptor (see below)
7851 * retval (ignored)
7852 *
7853 * Indirect: uap->path Path of directory to create
7854 * uap->mode Access permissions to set
7855 * uap->xsecurity ACL to set
7856 *
7857 * Returns: 0 Success
7858 * !0 Not success
7859 *
7860 */
7861 int
7862 mkdir_extended(proc_t p, struct mkdir_extended_args *uap, __unused int32_t *retval)
7863 {
7864 int ciferror;
7865 kauth_filesec_t xsecdst;
7866 struct vnode_attr va;
7867
7868 AUDIT_ARG(owner, uap->uid, uap->gid);
7869
7870 xsecdst = NULL;
7871 if ((uap->xsecurity != USER_ADDR_NULL) &&
7872 ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
7873 return ciferror;
7874
7875 VATTR_INIT(&va);
7876 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
7877 if (xsecdst != NULL)
7878 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
7879
7880 ciferror = mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD,
7881 UIO_USERSPACE);
7882 if (xsecdst != NULL)
7883 kauth_filesec_free(xsecdst);
7884 return ciferror;
7885 }
7886
7887 int
7888 mkdir(proc_t p, struct mkdir_args *uap, __unused int32_t *retval)
7889 {
7890 struct vnode_attr va;
7891
7892 VATTR_INIT(&va);
7893 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
7894
7895 return (mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD,
7896 UIO_USERSPACE));
7897 }
7898
7899 int
7900 mkdirat(proc_t p, struct mkdirat_args *uap, __unused int32_t *retval)
7901 {
7902 struct vnode_attr va;
7903
7904 VATTR_INIT(&va);
7905 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
7906
7907 return(mkdir1at(vfs_context_current(), uap->path, &va, uap->fd,
7908 UIO_USERSPACE));
7909 }
7910
7911 static int
7912 rmdirat_internal(vfs_context_t ctx, int fd, user_addr_t dirpath,
7913 enum uio_seg segflg)
7914 {
7915 vnode_t vp, dvp;
7916 int error;
7917 struct nameidata nd;
7918 char *path = NULL;
7919 int len=0;
7920 int has_listeners = 0;
7921 int need_event = 0;
7922 int truncated = 0;
7923 #if CONFIG_FSE
7924 struct vnode_attr va;
7925 #endif /* CONFIG_FSE */
7926 struct vnode_attr *vap = NULL;
7927 int restart_count = 0;
7928 int batched;
7929
7930 int restart_flag;
7931
7932 /*
7933 * This loop exists to restart rmdir in the unlikely case that two
7934 * processes are simultaneously trying to remove the same directory
7935 * containing orphaned appleDouble files.
7936 */
7937 do {
7938 NDINIT(&nd, DELETE, OP_RMDIR, LOCKPARENT | AUDITVNPATH1,
7939 segflg, dirpath, ctx);
7940 nd.ni_flag = NAMEI_COMPOUNDRMDIR;
7941 continue_lookup:
7942 restart_flag = 0;
7943 vap = NULL;
7944
7945 error = nameiat(&nd, fd);
7946 if (error)
7947 return (error);
7948
7949 dvp = nd.ni_dvp;
7950 vp = nd.ni_vp;
7951
7952 if (vp) {
7953 batched = vnode_compound_rmdir_available(vp);
7954
7955 if (vp->v_flag & VROOT) {
7956 /*
7957 * The root of a mounted filesystem cannot be deleted.
7958 */
7959 error = EBUSY;
7960 goto out;
7961 }
7962
7963 #if DEVELOPMENT || DEBUG
7964 /*
7965 * XXX VSWAP: Check for entitlements or special flag here
7966 * so we can restrict access appropriately.
7967 */
7968 #else /* DEVELOPMENT || DEBUG */
7969
7970 if (vnode_isswap(vp) && (ctx != vfs_context_kernel())) {
7971 error = EPERM;
7972 goto out;
7973 }
7974 #endif /* DEVELOPMENT || DEBUG */
7975
7976 /*
7977 * Removed a check here; we used to abort if vp's vid
7978 * was not the same as what we'd seen the last time around.
7979 * I do not think that check was valid, because if we retry
7980 * and all dirents are gone, the directory could legitimately
7981 * be recycled but still be present in a situation where we would
7982 * have had permission to delete. Therefore, we won't make
7983 * an effort to preserve that check now that we may not have a
7984 * vp here.
7985 */
7986
7987 if (!batched) {
7988 error = vn_authorize_rmdir(dvp, vp, &nd.ni_cnd, ctx, NULL);
7989 if (error) {
7990 if (error == ENOENT) {
7991 assert(restart_count < MAX_AUTHORIZE_ENOENT_RETRIES);
7992 if (restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
7993 restart_flag = 1;
7994 restart_count += 1;
7995 }
7996 }
7997 goto out;
7998 }
7999 }
8000 } else {
8001 batched = 1;
8002
8003 if (!vnode_compound_rmdir_available(dvp)) {
8004 panic("No error, but no compound rmdir?");
8005 }
8006 }
8007
8008 #if CONFIG_FSE
8009 fse_info finfo;
8010
8011 need_event = need_fsevent(FSE_DELETE, dvp);
8012 if (need_event) {
8013 if (!batched) {
8014 get_fse_info(vp, &finfo, ctx);
8015 } else {
8016 error = vfs_get_notify_attributes(&va);
8017 if (error) {
8018 goto out;
8019 }
8020
8021 vap = &va;
8022 }
8023 }
8024 #endif
8025 has_listeners = kauth_authorize_fileop_has_listeners();
8026 if (need_event || has_listeners) {
8027 if (path == NULL) {
8028 GET_PATH(path);
8029 if (path == NULL) {
8030 error = ENOMEM;
8031 goto out;
8032 }
8033 }
8034
8035 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated);
8036 #if CONFIG_FSE
8037 if (truncated) {
8038 finfo.mode |= FSE_TRUNCATED_PATH;
8039 }
8040 #endif
8041 }
8042
8043 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
8044 nd.ni_vp = vp;
8045 if (vp == NULLVP) {
8046 /* Couldn't find a vnode */
8047 goto out;
8048 }
8049
8050 if (error == EKEEPLOOKING) {
8051 goto continue_lookup;
8052 } else if (batched && error == ENOENT) {
8053 assert(restart_count < MAX_AUTHORIZE_ENOENT_RETRIES);
8054 if (restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
8055 /*
8056 * For compound VNOPs, the authorization callback
8057 * may return ENOENT in case of racing hard link lookups
8058 * redrive the lookup.
8059 */
8060 restart_flag = 1;
8061 restart_count += 1;
8062 goto out;
8063 }
8064 }
8065 #if CONFIG_APPLEDOUBLE
8066 /*
8067 * Special case to remove orphaned AppleDouble
8068 * files. I don't like putting this in the kernel,
8069 * but carbon does not like putting this in carbon either,
8070 * so here we are.
8071 */
8072 if (error == ENOTEMPTY) {
8073 error = rmdir_remove_orphaned_appleDouble(vp, ctx, &restart_flag);
8074 if (error == EBUSY) {
8075 goto out;
8076 }
8077
8078
8079 /*
8080 * Assuming everything went well, we will try the RMDIR again
8081 */
8082 if (!error)
8083 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
8084 }
8085 #endif /* CONFIG_APPLEDOUBLE */
8086 /*
8087 * Call out to allow 3rd party notification of delete.
8088 * Ignore result of kauth_authorize_fileop call.
8089 */
8090 if (!error) {
8091 if (has_listeners) {
8092 kauth_authorize_fileop(vfs_context_ucred(ctx),
8093 KAUTH_FILEOP_DELETE,
8094 (uintptr_t)vp,
8095 (uintptr_t)path);
8096 }
8097
8098 if (vp->v_flag & VISHARDLINK) {
8099 // see the comment in unlink1() about why we update
8100 // the parent of a hard link when it is removed
8101 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
8102 }
8103
8104 #if CONFIG_FSE
8105 if (need_event) {
8106 if (vap) {
8107 vnode_get_fse_info_from_vap(vp, &finfo, vap);
8108 }
8109 add_fsevent(FSE_DELETE, ctx,
8110 FSE_ARG_STRING, len, path,
8111 FSE_ARG_FINFO, &finfo,
8112 FSE_ARG_DONE);
8113 }
8114 #endif
8115 }
8116
8117 out:
8118 if (path != NULL) {
8119 RELEASE_PATH(path);
8120 path = NULL;
8121 }
8122 /*
8123 * nameidone has to happen before we vnode_put(dvp)
8124 * since it may need to release the fs_nodelock on the dvp
8125 */
8126 nameidone(&nd);
8127 vnode_put(dvp);
8128
8129 if (vp)
8130 vnode_put(vp);
8131
8132 if (restart_flag == 0) {
8133 wakeup_one((caddr_t)vp);
8134 return (error);
8135 }
8136 tsleep(vp, PVFS, "rm AD", 1);
8137
8138 } while (restart_flag != 0);
8139
8140 return (error);
8141
8142 }
8143
8144 /*
8145 * Remove a directory file.
8146 */
8147 /* ARGSUSED */
8148 int
8149 rmdir(__unused proc_t p, struct rmdir_args *uap, __unused int32_t *retval)
8150 {
8151 return (rmdirat_internal(vfs_context_current(), AT_FDCWD,
8152 CAST_USER_ADDR_T(uap->path), UIO_USERSPACE));
8153 }
8154
8155 /* Get direntry length padded to 8 byte alignment */
8156 #define DIRENT64_LEN(namlen) \
8157 ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
8158
8159 errno_t
8160 vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag,
8161 int *numdirent, vfs_context_t ctxp)
8162 {
8163 /* Check if fs natively supports VNODE_READDIR_EXTENDED */
8164 if ((vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED) &&
8165 ((vp->v_mount->mnt_kern_flag & MNTK_DENY_READDIREXT) == 0)) {
8166 return VNOP_READDIR(vp, uio, flags, eofflag, numdirent, ctxp);
8167 } else {
8168 size_t bufsize;
8169 void * bufptr;
8170 uio_t auio;
8171 struct direntry *entry64;
8172 struct dirent *dep;
8173 int bytesread;
8174 int error;
8175
8176 /*
8177 * Our kernel buffer needs to be smaller since re-packing
8178 * will expand each dirent. The worse case (when the name
8179 * length is 3) corresponds to a struct direntry size of 32
8180 * bytes (8-byte aligned) and a struct dirent size of 12 bytes
8181 * (4-byte aligned). So having a buffer that is 3/8 the size
8182 * will prevent us from reading more than we can pack.
8183 *
8184 * Since this buffer is wired memory, we will limit the
8185 * buffer size to a maximum of 32K. We would really like to
8186 * use 32K in the MIN(), but we use magic number 87371 to
8187 * prevent uio_resid() * 3 / 8 from overflowing.
8188 */
8189 bufsize = 3 * MIN((user_size_t)uio_resid(uio), 87371u) / 8;
8190 MALLOC(bufptr, void *, bufsize, M_TEMP, M_WAITOK);
8191 if (bufptr == NULL) {
8192 return ENOMEM;
8193 }
8194
8195 auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
8196 uio_addiov(auio, (uintptr_t)bufptr, bufsize);
8197 auio->uio_offset = uio->uio_offset;
8198
8199 error = VNOP_READDIR(vp, auio, 0, eofflag, numdirent, ctxp);
8200
8201 dep = (struct dirent *)bufptr;
8202 bytesread = bufsize - uio_resid(auio);
8203
8204 MALLOC(entry64, struct direntry *, sizeof(struct direntry),
8205 M_TEMP, M_WAITOK);
8206 /*
8207 * Convert all the entries and copy them out to user's buffer.
8208 */
8209 while (error == 0 && (char *)dep < ((char *)bufptr + bytesread)) {
8210 size_t enbufsize = DIRENT64_LEN(dep->d_namlen);
8211
8212 bzero(entry64, enbufsize);
8213 /* Convert a dirent to a dirent64. */
8214 entry64->d_ino = dep->d_ino;
8215 entry64->d_seekoff = 0;
8216 entry64->d_reclen = enbufsize;
8217 entry64->d_namlen = dep->d_namlen;
8218 entry64->d_type = dep->d_type;
8219 bcopy(dep->d_name, entry64->d_name, dep->d_namlen + 1);
8220
8221 /* Move to next entry. */
8222 dep = (struct dirent *)((char *)dep + dep->d_reclen);
8223
8224 /* Copy entry64 to user's buffer. */
8225 error = uiomove((caddr_t)entry64, entry64->d_reclen, uio);
8226 }
8227
8228 /* Update the real offset using the offset we got from VNOP_READDIR. */
8229 if (error == 0) {
8230 uio->uio_offset = auio->uio_offset;
8231 }
8232 uio_free(auio);
8233 FREE(bufptr, M_TEMP);
8234 FREE(entry64, M_TEMP);
8235 return (error);
8236 }
8237 }
8238
8239 #define GETDIRENTRIES_MAXBUFSIZE (128 * 1024 * 1024U)
8240
8241 /*
8242 * Read a block of directory entries in a file system independent format.
8243 */
8244 static int
8245 getdirentries_common(int fd, user_addr_t bufp, user_size_t bufsize, ssize_t *bytesread,
8246 off_t *offset, int flags)
8247 {
8248 vnode_t vp;
8249 struct vfs_context context = *vfs_context_current(); /* local copy */
8250 struct fileproc *fp;
8251 uio_t auio;
8252 int spacetype = proc_is64bit(vfs_context_proc(&context)) ? UIO_USERSPACE64 : UIO_USERSPACE32;
8253 off_t loff;
8254 int error, eofflag, numdirent;
8255 char uio_buf[ UIO_SIZEOF(1) ];
8256
8257 error = fp_getfvp(vfs_context_proc(&context), fd, &fp, &vp);
8258 if (error) {
8259 return (error);
8260 }
8261 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
8262 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
8263 error = EBADF;
8264 goto out;
8265 }
8266
8267 if (bufsize > GETDIRENTRIES_MAXBUFSIZE)
8268 bufsize = GETDIRENTRIES_MAXBUFSIZE;
8269
8270 #if CONFIG_MACF
8271 error = mac_file_check_change_offset(vfs_context_ucred(&context), fp->f_fglob);
8272 if (error)
8273 goto out;
8274 #endif
8275 if ( (error = vnode_getwithref(vp)) ) {
8276 goto out;
8277 }
8278 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
8279
8280 unionread:
8281 if (vp->v_type != VDIR) {
8282 (void)vnode_put(vp);
8283 error = EINVAL;
8284 goto out;
8285 }
8286
8287 #if CONFIG_MACF
8288 error = mac_vnode_check_readdir(&context, vp);
8289 if (error != 0) {
8290 (void)vnode_put(vp);
8291 goto out;
8292 }
8293 #endif /* MAC */
8294
8295 loff = fp->f_fglob->fg_offset;
8296 auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
8297 uio_addiov(auio, bufp, bufsize);
8298
8299 if (flags & VNODE_READDIR_EXTENDED) {
8300 error = vnode_readdir64(vp, auio, flags, &eofflag, &numdirent, &context);
8301 fp->f_fglob->fg_offset = uio_offset(auio);
8302 } else {
8303 error = VNOP_READDIR(vp, auio, 0, &eofflag, &numdirent, &context);
8304 fp->f_fglob->fg_offset = uio_offset(auio);
8305 }
8306 if (error) {
8307 (void)vnode_put(vp);
8308 goto out;
8309 }
8310
8311 if ((user_ssize_t)bufsize == uio_resid(auio)){
8312 if (union_dircheckp) {
8313 error = union_dircheckp(&vp, fp, &context);
8314 if (error == -1)
8315 goto unionread;
8316 if (error) {
8317 (void)vnode_put(vp);
8318 goto out;
8319 }
8320 }
8321
8322 if ((vp->v_mount->mnt_flag & MNT_UNION)) {
8323 struct vnode *tvp = vp;
8324 if (lookup_traverse_union(tvp, &vp, &context) == 0) {
8325 vnode_ref(vp);
8326 fp->f_fglob->fg_data = (caddr_t) vp;
8327 fp->f_fglob->fg_offset = 0;
8328 vnode_rele(tvp);
8329 vnode_put(tvp);
8330 goto unionread;
8331 }
8332 vp = tvp;
8333 }
8334 }
8335
8336 vnode_put(vp);
8337 if (offset) {
8338 *offset = loff;
8339 }
8340
8341 *bytesread = bufsize - uio_resid(auio);
8342 out:
8343 file_drop(fd);
8344 return (error);
8345 }
8346
8347
8348 int
8349 getdirentries(__unused struct proc *p, struct getdirentries_args *uap, int32_t *retval)
8350 {
8351 off_t offset;
8352 ssize_t bytesread;
8353 int error;
8354
8355 AUDIT_ARG(fd, uap->fd);
8356 error = getdirentries_common(uap->fd, uap->buf, uap->count, &bytesread, &offset, 0);
8357
8358 if (error == 0) {
8359 if (proc_is64bit(p)) {
8360 user64_long_t base = (user64_long_t)offset;
8361 error = copyout((caddr_t)&base, uap->basep, sizeof(user64_long_t));
8362 } else {
8363 user32_long_t base = (user32_long_t)offset;
8364 error = copyout((caddr_t)&base, uap->basep, sizeof(user32_long_t));
8365 }
8366 *retval = bytesread;
8367 }
8368 return (error);
8369 }
8370
8371 int
8372 getdirentries64(__unused struct proc *p, struct getdirentries64_args *uap, user_ssize_t *retval)
8373 {
8374 off_t offset;
8375 ssize_t bytesread;
8376 int error;
8377
8378 AUDIT_ARG(fd, uap->fd);
8379 error = getdirentries_common(uap->fd, uap->buf, uap->bufsize, &bytesread, &offset, VNODE_READDIR_EXTENDED);
8380
8381 if (error == 0) {
8382 *retval = bytesread;
8383 error = copyout((caddr_t)&offset, uap->position, sizeof(off_t));
8384 }
8385 return (error);
8386 }
8387
8388
8389 /*
8390 * Set the mode mask for creation of filesystem nodes.
8391 * XXX implement xsecurity
8392 */
8393 #define UMASK_NOXSECURITY (void *)1 /* leave existing xsecurity alone */
8394 static int
8395 umask1(proc_t p, int newmask, __unused kauth_filesec_t fsec, int32_t *retval)
8396 {
8397 struct filedesc *fdp;
8398
8399 AUDIT_ARG(mask, newmask);
8400 proc_fdlock(p);
8401 fdp = p->p_fd;
8402 *retval = fdp->fd_cmask;
8403 fdp->fd_cmask = newmask & ALLPERMS;
8404 proc_fdunlock(p);
8405 return (0);
8406 }
8407
8408 /*
8409 * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL).
8410 *
8411 * Parameters: p Process requesting to set the umask
8412 * uap User argument descriptor (see below)
8413 * retval umask of the process (parameter p)
8414 *
8415 * Indirect: uap->newmask umask to set
8416 * uap->xsecurity ACL to set
8417 *
8418 * Returns: 0 Success
8419 * !0 Not success
8420 *
8421 */
8422 int
8423 umask_extended(proc_t p, struct umask_extended_args *uap, int32_t *retval)
8424 {
8425 int ciferror;
8426 kauth_filesec_t xsecdst;
8427
8428 xsecdst = KAUTH_FILESEC_NONE;
8429 if (uap->xsecurity != USER_ADDR_NULL) {
8430 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
8431 return ciferror;
8432 } else {
8433 xsecdst = KAUTH_FILESEC_NONE;
8434 }
8435
8436 ciferror = umask1(p, uap->newmask, xsecdst, retval);
8437
8438 if (xsecdst != KAUTH_FILESEC_NONE)
8439 kauth_filesec_free(xsecdst);
8440 return ciferror;
8441 }
8442
8443 int
8444 umask(proc_t p, struct umask_args *uap, int32_t *retval)
8445 {
8446 return(umask1(p, uap->newmask, UMASK_NOXSECURITY, retval));
8447 }
8448
8449 /*
8450 * Void all references to file by ripping underlying filesystem
8451 * away from vnode.
8452 */
8453 /* ARGSUSED */
8454 int
8455 revoke(proc_t p, struct revoke_args *uap, __unused int32_t *retval)
8456 {
8457 vnode_t vp;
8458 struct vnode_attr va;
8459 vfs_context_t ctx = vfs_context_current();
8460 int error;
8461 struct nameidata nd;
8462
8463 NDINIT(&nd, LOOKUP, OP_REVOKE, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
8464 uap->path, ctx);
8465 error = namei(&nd);
8466 if (error)
8467 return (error);
8468 vp = nd.ni_vp;
8469
8470 nameidone(&nd);
8471
8472 if (!(vnode_ischr(vp) || vnode_isblk(vp))) {
8473 error = ENOTSUP;
8474 goto out;
8475 }
8476
8477 if (vnode_isblk(vp) && vnode_ismountedon(vp)) {
8478 error = EBUSY;
8479 goto out;
8480 }
8481
8482 #if CONFIG_MACF
8483 error = mac_vnode_check_revoke(ctx, vp);
8484 if (error)
8485 goto out;
8486 #endif
8487
8488 VATTR_INIT(&va);
8489 VATTR_WANTED(&va, va_uid);
8490 if ((error = vnode_getattr(vp, &va, ctx)))
8491 goto out;
8492 if (kauth_cred_getuid(vfs_context_ucred(ctx)) != va.va_uid &&
8493 (error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
8494 goto out;
8495 if (vp->v_usecount > 0 || (vnode_isaliased(vp)))
8496 VNOP_REVOKE(vp, REVOKEALL, ctx);
8497 out:
8498 vnode_put(vp);
8499 return (error);
8500 }
8501
8502
8503 /*
8504 * HFS/HFS PlUS SPECIFIC SYSTEM CALLS
8505 * The following system calls are designed to support features
8506 * which are specific to the HFS & HFS Plus volume formats
8507 */
8508
8509
8510 /*
8511 * Obtain attribute information on objects in a directory while enumerating
8512 * the directory.
8513 */
8514 /* ARGSUSED */
8515 int
8516 getdirentriesattr (proc_t p, struct getdirentriesattr_args *uap, int32_t *retval)
8517 {
8518 vnode_t vp;
8519 struct fileproc *fp;
8520 uio_t auio = NULL;
8521 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
8522 uint32_t count, savecount;
8523 uint32_t newstate;
8524 int error, eofflag;
8525 uint32_t loff;
8526 struct attrlist attributelist;
8527 vfs_context_t ctx = vfs_context_current();
8528 int fd = uap->fd;
8529 char uio_buf[ UIO_SIZEOF(1) ];
8530 kauth_action_t action;
8531
8532 AUDIT_ARG(fd, fd);
8533
8534 /* Get the attributes into kernel space */
8535 if ((error = copyin(uap->alist, (caddr_t)&attributelist, sizeof(attributelist)))) {
8536 return(error);
8537 }
8538 if ((error = copyin(uap->count, (caddr_t)&count, sizeof(count)))) {
8539 return(error);
8540 }
8541 savecount = count;
8542 if ( (error = fp_getfvp(p, fd, &fp, &vp)) ) {
8543 return (error);
8544 }
8545 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
8546 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
8547 error = EBADF;
8548 goto out;
8549 }
8550
8551
8552 #if CONFIG_MACF
8553 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
8554 fp->f_fglob);
8555 if (error)
8556 goto out;
8557 #endif
8558
8559
8560 if ( (error = vnode_getwithref(vp)) )
8561 goto out;
8562
8563 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
8564
8565 unionread:
8566 if (vp->v_type != VDIR) {
8567 (void)vnode_put(vp);
8568 error = EINVAL;
8569 goto out;
8570 }
8571
8572 #if CONFIG_MACF
8573 error = mac_vnode_check_readdir(ctx, vp);
8574 if (error != 0) {
8575 (void)vnode_put(vp);
8576 goto out;
8577 }
8578 #endif /* MAC */
8579
8580 /* set up the uio structure which will contain the users return buffer */
8581 loff = fp->f_fglob->fg_offset;
8582 auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
8583 uio_addiov(auio, uap->buffer, uap->buffersize);
8584
8585 /*
8586 * If the only item requested is file names, we can let that past with
8587 * just LIST_DIRECTORY. If they want any other attributes, that means
8588 * they need SEARCH as well.
8589 */
8590 action = KAUTH_VNODE_LIST_DIRECTORY;
8591 if ((attributelist.commonattr & ~ATTR_CMN_NAME) ||
8592 attributelist.fileattr || attributelist.dirattr)
8593 action |= KAUTH_VNODE_SEARCH;
8594
8595 if ((error = vnode_authorize(vp, NULL, action, ctx)) == 0) {
8596
8597 /* Believe it or not, uap->options only has 32-bits of valid
8598 * info, so truncate before extending again */
8599
8600 error = VNOP_READDIRATTR(vp, &attributelist, auio, count,
8601 (u_long)(uint32_t)uap->options, &newstate, &eofflag, &count, ctx);
8602 }
8603
8604 if (error) {
8605 (void) vnode_put(vp);
8606 goto out;
8607 }
8608
8609 /*
8610 * If we've got the last entry of a directory in a union mount
8611 * then reset the eofflag and pretend there's still more to come.
8612 * The next call will again set eofflag and the buffer will be empty,
8613 * so traverse to the underlying directory and do the directory
8614 * read there.
8615 */
8616 if (eofflag && vp->v_mount->mnt_flag & MNT_UNION) {
8617 if (uio_resid(auio) < (user_ssize_t) uap->buffersize) { // Got some entries
8618 eofflag = 0;
8619 } else { // Empty buffer
8620 struct vnode *tvp = vp;
8621 if (lookup_traverse_union(tvp, &vp, ctx) == 0) {
8622 vnode_ref_ext(vp, fp->f_fglob->fg_flag & O_EVTONLY, 0);
8623 fp->f_fglob->fg_data = (caddr_t) vp;
8624 fp->f_fglob->fg_offset = 0; // reset index for new dir
8625 count = savecount;
8626 vnode_rele_internal(tvp, fp->f_fglob->fg_flag & O_EVTONLY, 0, 0);
8627 vnode_put(tvp);
8628 goto unionread;
8629 }
8630 vp = tvp;
8631 }
8632 }
8633
8634 (void)vnode_put(vp);
8635
8636 if (error)
8637 goto out;
8638 fp->f_fglob->fg_offset = uio_offset(auio); /* should be multiple of dirent, not variable */
8639
8640 if ((error = copyout((caddr_t) &count, uap->count, sizeof(count))))
8641 goto out;
8642 if ((error = copyout((caddr_t) &newstate, uap->newstate, sizeof(newstate))))
8643 goto out;
8644 if ((error = copyout((caddr_t) &loff, uap->basep, sizeof(loff))))
8645 goto out;
8646
8647 *retval = eofflag; /* similar to getdirentries */
8648 error = 0;
8649 out:
8650 file_drop(fd);
8651 return (error); /* return error earlier, an retval of 0 or 1 now */
8652
8653 } /* end of getdirentriesattr system call */
8654
8655 /*
8656 * Exchange data between two files
8657 */
8658
8659 /* ARGSUSED */
8660 int
8661 exchangedata (__unused proc_t p, struct exchangedata_args *uap, __unused int32_t *retval)
8662 {
8663
8664 struct nameidata fnd, snd;
8665 vfs_context_t ctx = vfs_context_current();
8666 vnode_t fvp;
8667 vnode_t svp;
8668 int error;
8669 u_int32_t nameiflags;
8670 char *fpath = NULL;
8671 char *spath = NULL;
8672 int flen=0, slen=0;
8673 int from_truncated=0, to_truncated=0;
8674 #if CONFIG_FSE
8675 fse_info f_finfo, s_finfo;
8676 #endif
8677
8678 nameiflags = 0;
8679 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
8680
8681 NDINIT(&fnd, LOOKUP, OP_EXCHANGEDATA, nameiflags | AUDITVNPATH1,
8682 UIO_USERSPACE, uap->path1, ctx);
8683
8684 error = namei(&fnd);
8685 if (error)
8686 goto out2;
8687
8688 nameidone(&fnd);
8689 fvp = fnd.ni_vp;
8690
8691 NDINIT(&snd, LOOKUP, OP_EXCHANGEDATA, CN_NBMOUNTLOOK | nameiflags | AUDITVNPATH2,
8692 UIO_USERSPACE, uap->path2, ctx);
8693
8694 error = namei(&snd);
8695 if (error) {
8696 vnode_put(fvp);
8697 goto out2;
8698 }
8699 nameidone(&snd);
8700 svp = snd.ni_vp;
8701
8702 /*
8703 * if the files are the same, return an inval error
8704 */
8705 if (svp == fvp) {
8706 error = EINVAL;
8707 goto out;
8708 }
8709
8710 /*
8711 * if the files are on different volumes, return an error
8712 */
8713 if (svp->v_mount != fvp->v_mount) {
8714 error = EXDEV;
8715 goto out;
8716 }
8717
8718 /* If they're not files, return an error */
8719 if ( (vnode_isreg(fvp) == 0) || (vnode_isreg(svp) == 0)) {
8720 error = EINVAL;
8721 goto out;
8722 }
8723
8724 #if CONFIG_MACF
8725 error = mac_vnode_check_exchangedata(ctx,
8726 fvp, svp);
8727 if (error)
8728 goto out;
8729 #endif
8730 if (((error = vnode_authorize(fvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0) ||
8731 ((error = vnode_authorize(svp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0))
8732 goto out;
8733
8734 if (
8735 #if CONFIG_FSE
8736 need_fsevent(FSE_EXCHANGE, fvp) ||
8737 #endif
8738 kauth_authorize_fileop_has_listeners()) {
8739 GET_PATH(fpath);
8740 GET_PATH(spath);
8741 if (fpath == NULL || spath == NULL) {
8742 error = ENOMEM;
8743 goto out;
8744 }
8745
8746 flen = safe_getpath(fvp, NULL, fpath, MAXPATHLEN, &from_truncated);
8747 slen = safe_getpath(svp, NULL, spath, MAXPATHLEN, &to_truncated);
8748
8749 #if CONFIG_FSE
8750 get_fse_info(fvp, &f_finfo, ctx);
8751 get_fse_info(svp, &s_finfo, ctx);
8752 if (from_truncated || to_truncated) {
8753 // set it here since only the f_finfo gets reported up to user space
8754 f_finfo.mode |= FSE_TRUNCATED_PATH;
8755 }
8756 #endif
8757 }
8758 /* Ok, make the call */
8759 error = VNOP_EXCHANGE(fvp, svp, 0, ctx);
8760
8761 if (error == 0) {
8762 const char *tmpname;
8763
8764 if (fpath != NULL && spath != NULL) {
8765 /* call out to allow 3rd party notification of exchangedata.
8766 * Ignore result of kauth_authorize_fileop call.
8767 */
8768 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_EXCHANGE,
8769 (uintptr_t)fpath, (uintptr_t)spath);
8770 }
8771 name_cache_lock();
8772
8773 tmpname = fvp->v_name;
8774 fvp->v_name = svp->v_name;
8775 svp->v_name = tmpname;
8776
8777 if (fvp->v_parent != svp->v_parent) {
8778 vnode_t tmp;
8779
8780 tmp = fvp->v_parent;
8781 fvp->v_parent = svp->v_parent;
8782 svp->v_parent = tmp;
8783 }
8784 name_cache_unlock();
8785
8786 #if CONFIG_FSE
8787 if (fpath != NULL && spath != NULL) {
8788 add_fsevent(FSE_EXCHANGE, ctx,
8789 FSE_ARG_STRING, flen, fpath,
8790 FSE_ARG_FINFO, &f_finfo,
8791 FSE_ARG_STRING, slen, spath,
8792 FSE_ARG_FINFO, &s_finfo,
8793 FSE_ARG_DONE);
8794 }
8795 #endif
8796 }
8797
8798 out:
8799 if (fpath != NULL)
8800 RELEASE_PATH(fpath);
8801 if (spath != NULL)
8802 RELEASE_PATH(spath);
8803 vnode_put(svp);
8804 vnode_put(fvp);
8805 out2:
8806 return (error);
8807 }
8808
8809 /*
8810 * Return (in MB) the amount of freespace on the given vnode's volume.
8811 */
8812 uint32_t freespace_mb(vnode_t vp);
8813
8814 uint32_t
8815 freespace_mb(vnode_t vp)
8816 {
8817 vfs_update_vfsstat(vp->v_mount, vfs_context_current(), VFS_USER_EVENT);
8818 return (((uint64_t)vp->v_mount->mnt_vfsstat.f_bavail *
8819 vp->v_mount->mnt_vfsstat.f_bsize) >> 20);
8820 }
8821
8822 #if CONFIG_SEARCHFS
8823
8824 /* ARGSUSED */
8825
8826 int
8827 searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval)
8828 {
8829 vnode_t vp, tvp;
8830 int i, error=0;
8831 int fserror = 0;
8832 struct nameidata nd;
8833 struct user64_fssearchblock searchblock;
8834 struct searchstate *state;
8835 struct attrlist *returnattrs;
8836 struct timeval timelimit;
8837 void *searchparams1,*searchparams2;
8838 uio_t auio = NULL;
8839 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
8840 uint32_t nummatches;
8841 int mallocsize;
8842 uint32_t nameiflags;
8843 vfs_context_t ctx = vfs_context_current();
8844 char uio_buf[ UIO_SIZEOF(1) ];
8845
8846 /* Start by copying in fsearchblock parameter list */
8847 if (IS_64BIT_PROCESS(p)) {
8848 error = copyin(uap->searchblock, (caddr_t) &searchblock, sizeof(searchblock));
8849 timelimit.tv_sec = searchblock.timelimit.tv_sec;
8850 timelimit.tv_usec = searchblock.timelimit.tv_usec;
8851 }
8852 else {
8853 struct user32_fssearchblock tmp_searchblock;
8854
8855 error = copyin(uap->searchblock, (caddr_t) &tmp_searchblock, sizeof(tmp_searchblock));
8856 // munge into 64-bit version
8857 searchblock.returnattrs = CAST_USER_ADDR_T(tmp_searchblock.returnattrs);
8858 searchblock.returnbuffer = CAST_USER_ADDR_T(tmp_searchblock.returnbuffer);
8859 searchblock.returnbuffersize = tmp_searchblock.returnbuffersize;
8860 searchblock.maxmatches = tmp_searchblock.maxmatches;
8861 /*
8862 * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary
8863 * from a 32 bit long, and tv_usec is already a signed 32 bit int.
8864 */
8865 timelimit.tv_sec = (__darwin_time_t) tmp_searchblock.timelimit.tv_sec;
8866 timelimit.tv_usec = (__darwin_useconds_t) tmp_searchblock.timelimit.tv_usec;
8867 searchblock.searchparams1 = CAST_USER_ADDR_T(tmp_searchblock.searchparams1);
8868 searchblock.sizeofsearchparams1 = tmp_searchblock.sizeofsearchparams1;
8869 searchblock.searchparams2 = CAST_USER_ADDR_T(tmp_searchblock.searchparams2);
8870 searchblock.sizeofsearchparams2 = tmp_searchblock.sizeofsearchparams2;
8871 searchblock.searchattrs = tmp_searchblock.searchattrs;
8872 }
8873 if (error)
8874 return(error);
8875
8876 /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
8877 */
8878 if (searchblock.sizeofsearchparams1 > SEARCHFS_MAX_SEARCHPARMS ||
8879 searchblock.sizeofsearchparams2 > SEARCHFS_MAX_SEARCHPARMS)
8880 return(EINVAL);
8881
8882 /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
8883 /* It all has to do into local memory and it's not that big so we might as well put it all together. */
8884 /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
8885 /* block. */
8886 /* */
8887 /* NOTE: we allocate an extra 8 bytes to account for the difference in size of the searchstate */
8888 /* due to the changes in rdar://problem/12438273. That way if a 3rd party file system */
8889 /* assumes the size is still 556 bytes it will continue to work */
8890
8891 mallocsize = searchblock.sizeofsearchparams1 + searchblock.sizeofsearchparams2 +
8892 sizeof(struct attrlist) + sizeof(struct searchstate) + (2*sizeof(uint32_t));
8893
8894 MALLOC(searchparams1, void *, mallocsize, M_TEMP, M_WAITOK);
8895
8896 /* Now set up the various pointers to the correct place in our newly allocated memory */
8897
8898 searchparams2 = (void *) (((caddr_t) searchparams1) + searchblock.sizeofsearchparams1);
8899 returnattrs = (struct attrlist *) (((caddr_t) searchparams2) + searchblock.sizeofsearchparams2);
8900 state = (struct searchstate *) (((caddr_t) returnattrs) + sizeof (struct attrlist));
8901
8902 /* Now copy in the stuff given our local variables. */
8903
8904 if ((error = copyin(searchblock.searchparams1, searchparams1, searchblock.sizeofsearchparams1)))
8905 goto freeandexit;
8906
8907 if ((error = copyin(searchblock.searchparams2, searchparams2, searchblock.sizeofsearchparams2)))
8908 goto freeandexit;
8909
8910 if ((error = copyin(searchblock.returnattrs, (caddr_t) returnattrs, sizeof(struct attrlist))))
8911 goto freeandexit;
8912
8913 if ((error = copyin(uap->state, (caddr_t) state, sizeof(struct searchstate))))
8914 goto freeandexit;
8915
8916 /*
8917 * When searching a union mount, need to set the
8918 * start flag at the first call on each layer to
8919 * reset state for the new volume.
8920 */
8921 if (uap->options & SRCHFS_START)
8922 state->ss_union_layer = 0;
8923 else
8924 uap->options |= state->ss_union_flags;
8925 state->ss_union_flags = 0;
8926
8927 /*
8928 * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter,
8929 * which is passed in with an attrreference_t, we need to inspect the buffer manually here.
8930 * The KPI does not provide us the ability to pass in the length of the buffers searchparams1
8931 * and searchparams2. To obviate the need for all searchfs-supporting filesystems to
8932 * validate the user-supplied data offset of the attrreference_t, we'll do it here.
8933 */
8934
8935 if (searchblock.searchattrs.commonattr & ATTR_CMN_NAME) {
8936 attrreference_t* string_ref;
8937 u_int32_t* start_length;
8938 user64_size_t param_length;
8939
8940 /* validate searchparams1 */
8941 param_length = searchblock.sizeofsearchparams1;
8942 /* skip the word that specifies length of the buffer */
8943 start_length= (u_int32_t*) searchparams1;
8944 start_length= start_length+1;
8945 string_ref= (attrreference_t*) start_length;
8946
8947 /* ensure no negative offsets or too big offsets */
8948 if (string_ref->attr_dataoffset < 0 ) {
8949 error = EINVAL;
8950 goto freeandexit;
8951 }
8952 if (string_ref->attr_length > MAXPATHLEN) {
8953 error = EINVAL;
8954 goto freeandexit;
8955 }
8956
8957 /* Check for pointer overflow in the string ref */
8958 if (((char*) string_ref + string_ref->attr_dataoffset) < (char*) string_ref) {
8959 error = EINVAL;
8960 goto freeandexit;
8961 }
8962
8963 if (((char*) string_ref + string_ref->attr_dataoffset) > ((char*)searchparams1 + param_length)) {
8964 error = EINVAL;
8965 goto freeandexit;
8966 }
8967 if (((char*)string_ref + string_ref->attr_dataoffset + string_ref->attr_length) > ((char*)searchparams1 + param_length)) {
8968 error = EINVAL;
8969 goto freeandexit;
8970 }
8971 }
8972
8973 /* set up the uio structure which will contain the users return buffer */
8974 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
8975 uio_addiov(auio, searchblock.returnbuffer, searchblock.returnbuffersize);
8976
8977 nameiflags = 0;
8978 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
8979 NDINIT(&nd, LOOKUP, OP_SEARCHFS, nameiflags | AUDITVNPATH1,
8980 UIO_USERSPACE, uap->path, ctx);
8981
8982 error = namei(&nd);
8983 if (error)
8984 goto freeandexit;
8985 vp = nd.ni_vp;
8986 nameidone(&nd);
8987
8988 /*
8989 * Switch to the root vnode for the volume
8990 */
8991 error = VFS_ROOT(vnode_mount(vp), &tvp, ctx);
8992 vnode_put(vp);
8993 if (error)
8994 goto freeandexit;
8995 vp = tvp;
8996
8997 /*
8998 * If it's a union mount, the path lookup takes
8999 * us to the top layer. But we may need to descend
9000 * to a lower layer. For non-union mounts the layer
9001 * is always zero.
9002 */
9003 for (i = 0; i < (int) state->ss_union_layer; i++) {
9004 if ((vp->v_mount->mnt_flag & MNT_UNION) == 0)
9005 break;
9006 tvp = vp;
9007 vp = vp->v_mount->mnt_vnodecovered;
9008 if (vp == NULL) {
9009 vnode_put(tvp);
9010 error = ENOENT;
9011 goto freeandexit;
9012 }
9013 error = vnode_getwithref(vp);
9014 vnode_put(tvp);
9015 if (error)
9016 goto freeandexit;
9017 }
9018
9019 #if CONFIG_MACF
9020 error = mac_vnode_check_searchfs(ctx, vp, &searchblock.searchattrs);
9021 if (error) {
9022 vnode_put(vp);
9023 goto freeandexit;
9024 }
9025 #endif
9026
9027
9028 /*
9029 * If searchblock.maxmatches == 0, then skip the search. This has happened
9030 * before and sometimes the underlying code doesnt deal with it well.
9031 */
9032 if (searchblock.maxmatches == 0) {
9033 nummatches = 0;
9034 goto saveandexit;
9035 }
9036
9037 /*
9038 * Allright, we have everything we need, so lets make that call.
9039 *
9040 * We keep special track of the return value from the file system:
9041 * EAGAIN is an acceptable error condition that shouldn't keep us
9042 * from copying out any results...
9043 */
9044
9045 fserror = VNOP_SEARCHFS(vp,
9046 searchparams1,
9047 searchparams2,
9048 &searchblock.searchattrs,
9049 (u_long)searchblock.maxmatches,
9050 &timelimit,
9051 returnattrs,
9052 &nummatches,
9053 (u_long)uap->scriptcode,
9054 (u_long)uap->options,
9055 auio,
9056 (struct searchstate *) &state->ss_fsstate,
9057 ctx);
9058
9059 /*
9060 * If it's a union mount we need to be called again
9061 * to search the mounted-on filesystem.
9062 */
9063 if ((vp->v_mount->mnt_flag & MNT_UNION) && fserror == 0) {
9064 state->ss_union_flags = SRCHFS_START;
9065 state->ss_union_layer++; // search next layer down
9066 fserror = EAGAIN;
9067 }
9068
9069 saveandexit:
9070
9071 vnode_put(vp);
9072
9073 /* Now copy out the stuff that needs copying out. That means the number of matches, the
9074 search state. Everything was already put into he return buffer by the vop call. */
9075
9076 if ((error = copyout((caddr_t) state, uap->state, sizeof(struct searchstate))) != 0)
9077 goto freeandexit;
9078
9079 if ((error = suulong(uap->nummatches, (uint64_t)nummatches)) != 0)
9080 goto freeandexit;
9081
9082 error = fserror;
9083
9084 freeandexit:
9085
9086 FREE(searchparams1,M_TEMP);
9087
9088 return(error);
9089
9090
9091 } /* end of searchfs system call */
9092
9093 #else /* CONFIG_SEARCHFS */
9094
9095 int
9096 searchfs(__unused proc_t p, __unused struct searchfs_args *uap, __unused int32_t *retval)
9097 {
9098 return (ENOTSUP);
9099 }
9100
9101 #endif /* CONFIG_SEARCHFS */
9102
9103
9104 lck_grp_attr_t * nspace_group_attr;
9105 lck_attr_t * nspace_lock_attr;
9106 lck_grp_t * nspace_mutex_group;
9107
9108 lck_mtx_t nspace_handler_lock;
9109 lck_mtx_t nspace_handler_exclusion_lock;
9110
9111 time_t snapshot_timestamp=0;
9112 int nspace_allow_virtual_devs=0;
9113
9114 void nspace_handler_init(void);
9115
9116 typedef struct nspace_item_info {
9117 struct vnode *vp;
9118 void *arg;
9119 uint64_t op;
9120 uint32_t vid;
9121 uint32_t flags;
9122 uint32_t token;
9123 uint32_t refcount;
9124 } nspace_item_info;
9125
9126 #define MAX_NSPACE_ITEMS 128
9127 nspace_item_info nspace_items[MAX_NSPACE_ITEMS];
9128 uint32_t nspace_item_idx=0; // also used as the sleep/wakeup rendezvous address
9129 uint32_t nspace_token_id=0;
9130 uint32_t nspace_handler_timeout = 15; // seconds
9131
9132 #define NSPACE_ITEM_NEW 0x0001
9133 #define NSPACE_ITEM_PROCESSING 0x0002
9134 #define NSPACE_ITEM_DEAD 0x0004
9135 #define NSPACE_ITEM_CANCELLED 0x0008
9136 #define NSPACE_ITEM_DONE 0x0010
9137 #define NSPACE_ITEM_RESET_TIMER 0x0020
9138
9139 #define NSPACE_ITEM_NSPACE_EVENT 0x0040
9140 #define NSPACE_ITEM_SNAPSHOT_EVENT 0x0080
9141
9142 #define NSPACE_ITEM_ALL_EVENT_TYPES (NSPACE_ITEM_NSPACE_EVENT | NSPACE_ITEM_SNAPSHOT_EVENT)
9143
9144 //#pragma optimization_level 0
9145
9146 typedef enum {
9147 NSPACE_HANDLER_NSPACE = 0,
9148 NSPACE_HANDLER_SNAPSHOT = 1,
9149
9150 NSPACE_HANDLER_COUNT,
9151 } nspace_type_t;
9152
9153 typedef struct {
9154 uint64_t handler_tid;
9155 struct proc *handler_proc;
9156 int handler_busy;
9157 } nspace_handler_t;
9158
9159 nspace_handler_t nspace_handlers[NSPACE_HANDLER_COUNT];
9160
9161 /* namespace fsctl functions */
9162 static int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type);
9163 static int nspace_item_flags_for_type(nspace_type_t nspace_type);
9164 static int nspace_open_flags_for_type(nspace_type_t nspace_type);
9165 static nspace_type_t nspace_type_for_op(uint64_t op);
9166 static int nspace_is_special_process(struct proc *proc);
9167 static int vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx);
9168 static int wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type);
9169 static int validate_namespace_args (int is64bit, int size);
9170 static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data);
9171
9172
9173 static inline int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type)
9174 {
9175 switch(nspace_type) {
9176 case NSPACE_HANDLER_NSPACE:
9177 return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_NSPACE_EVENT;
9178 case NSPACE_HANDLER_SNAPSHOT:
9179 return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_SNAPSHOT_EVENT;
9180 default:
9181 printf("nspace_flags_matches_handler: invalid type %u\n", (int)nspace_type);
9182 return 0;
9183 }
9184 }
9185
9186 static inline int nspace_item_flags_for_type(nspace_type_t nspace_type)
9187 {
9188 switch(nspace_type) {
9189 case NSPACE_HANDLER_NSPACE:
9190 return NSPACE_ITEM_NSPACE_EVENT;
9191 case NSPACE_HANDLER_SNAPSHOT:
9192 return NSPACE_ITEM_SNAPSHOT_EVENT;
9193 default:
9194 printf("nspace_item_flags_for_type: invalid type %u\n", (int)nspace_type);
9195 return 0;
9196 }
9197 }
9198
9199 static inline int nspace_open_flags_for_type(nspace_type_t nspace_type)
9200 {
9201 switch(nspace_type) {
9202 case NSPACE_HANDLER_NSPACE:
9203 return FREAD | FWRITE | O_EVTONLY;
9204 case NSPACE_HANDLER_SNAPSHOT:
9205 return FREAD | O_EVTONLY;
9206 default:
9207 printf("nspace_open_flags_for_type: invalid type %u\n", (int)nspace_type);
9208 return 0;
9209 }
9210 }
9211
9212 static inline nspace_type_t nspace_type_for_op(uint64_t op)
9213 {
9214 switch(op & NAMESPACE_HANDLER_EVENT_TYPE_MASK) {
9215 case NAMESPACE_HANDLER_NSPACE_EVENT:
9216 return NSPACE_HANDLER_NSPACE;
9217 case NAMESPACE_HANDLER_SNAPSHOT_EVENT:
9218 return NSPACE_HANDLER_SNAPSHOT;
9219 default:
9220 printf("nspace_type_for_op: invalid op mask %llx\n", op & NAMESPACE_HANDLER_EVENT_TYPE_MASK);
9221 return NSPACE_HANDLER_NSPACE;
9222 }
9223 }
9224
9225 static inline int nspace_is_special_process(struct proc *proc)
9226 {
9227 int i;
9228 for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
9229 if (proc == nspace_handlers[i].handler_proc)
9230 return 1;
9231 }
9232 return 0;
9233 }
9234
9235 void
9236 nspace_handler_init(void)
9237 {
9238 nspace_lock_attr = lck_attr_alloc_init();
9239 nspace_group_attr = lck_grp_attr_alloc_init();
9240 nspace_mutex_group = lck_grp_alloc_init("nspace-mutex", nspace_group_attr);
9241 lck_mtx_init(&nspace_handler_lock, nspace_mutex_group, nspace_lock_attr);
9242 lck_mtx_init(&nspace_handler_exclusion_lock, nspace_mutex_group, nspace_lock_attr);
9243 memset(&nspace_items[0], 0, sizeof(nspace_items));
9244 }
9245
9246 void
9247 nspace_proc_exit(struct proc *p)
9248 {
9249 int i, event_mask = 0;
9250
9251 for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
9252 if (p == nspace_handlers[i].handler_proc) {
9253 event_mask |= nspace_item_flags_for_type(i);
9254 nspace_handlers[i].handler_tid = 0;
9255 nspace_handlers[i].handler_proc = NULL;
9256 }
9257 }
9258
9259 if (event_mask == 0) {
9260 return;
9261 }
9262
9263 lck_mtx_lock(&nspace_handler_lock);
9264 if (event_mask & NSPACE_ITEM_SNAPSHOT_EVENT) {
9265 // if this process was the snapshot handler, zero snapshot_timeout
9266 snapshot_timestamp = 0;
9267 }
9268
9269 //
9270 // unblock anyone that's waiting for the handler that died
9271 //
9272 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9273 if (nspace_items[i].flags & (NSPACE_ITEM_NEW | NSPACE_ITEM_PROCESSING)) {
9274
9275 if ( nspace_items[i].flags & event_mask ) {
9276
9277 if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
9278 vnode_lock_spin(nspace_items[i].vp);
9279 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
9280 vnode_unlock(nspace_items[i].vp);
9281 }
9282 nspace_items[i].vp = NULL;
9283 nspace_items[i].vid = 0;
9284 nspace_items[i].flags = NSPACE_ITEM_DONE;
9285 nspace_items[i].token = 0;
9286
9287 wakeup((caddr_t)&(nspace_items[i].vp));
9288 }
9289 }
9290 }
9291
9292 wakeup((caddr_t)&nspace_item_idx);
9293 lck_mtx_unlock(&nspace_handler_lock);
9294 }
9295
9296
9297 int
9298 resolve_nspace_item(struct vnode *vp, uint64_t op)
9299 {
9300 return resolve_nspace_item_ext(vp, op, NULL);
9301 }
9302
9303 int
9304 resolve_nspace_item_ext(struct vnode *vp, uint64_t op, void *arg)
9305 {
9306 int i, error, keep_waiting;
9307 struct timespec ts;
9308 nspace_type_t nspace_type = nspace_type_for_op(op);
9309
9310 // only allow namespace events on regular files, directories and symlinks.
9311 if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
9312 return 0;
9313 }
9314
9315 //
9316 // if this is a snapshot event and the vnode is on a
9317 // disk image just pretend nothing happened since any
9318 // change to the disk image will cause the disk image
9319 // itself to get backed up and this avoids multi-way
9320 // deadlocks between the snapshot handler and the ever
9321 // popular diskimages-helper process. the variable
9322 // nspace_allow_virtual_devs allows this behavior to
9323 // be overridden (for use by the Mobile TimeMachine
9324 // testing infrastructure which uses disk images)
9325 //
9326 if ( (op & NAMESPACE_HANDLER_SNAPSHOT_EVENT)
9327 && (vp->v_mount != NULL)
9328 && (vp->v_mount->mnt_kern_flag & MNTK_VIRTUALDEV)
9329 && !nspace_allow_virtual_devs) {
9330
9331 return 0;
9332 }
9333
9334 // if (thread_tid(current_thread()) == namespace_handler_tid) {
9335 if (nspace_handlers[nspace_type].handler_proc == NULL) {
9336 return 0;
9337 }
9338
9339 if (nspace_is_special_process(current_proc())) {
9340 return EDEADLK;
9341 }
9342
9343 lck_mtx_lock(&nspace_handler_lock);
9344
9345 retry:
9346 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9347 if (vp == nspace_items[i].vp && op == nspace_items[i].op) {
9348 break;
9349 }
9350 }
9351
9352 if (i >= MAX_NSPACE_ITEMS) {
9353 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9354 if (nspace_items[i].flags == 0) {
9355 break;
9356 }
9357 }
9358 } else {
9359 nspace_items[i].refcount++;
9360 }
9361
9362 if (i >= MAX_NSPACE_ITEMS) {
9363 ts.tv_sec = nspace_handler_timeout;
9364 ts.tv_nsec = 0;
9365
9366 error = msleep((caddr_t)&nspace_token_id, &nspace_handler_lock, PVFS|PCATCH, "nspace-no-space", &ts);
9367 if (error == 0) {
9368 // an entry got free'd up, go see if we can get a slot
9369 goto retry;
9370 } else {
9371 lck_mtx_unlock(&nspace_handler_lock);
9372 return error;
9373 }
9374 }
9375
9376 //
9377 // if it didn't already exist, add it. if it did exist
9378 // we'll get woken up when someone does a wakeup() on
9379 // the slot in the nspace_items table.
9380 //
9381 if (vp != nspace_items[i].vp) {
9382 nspace_items[i].vp = vp;
9383 nspace_items[i].arg = (arg == NSPACE_REARM_NO_ARG) ? NULL : arg; // arg is {NULL, true, uio *} - only pass uio thru to the user
9384 nspace_items[i].op = op;
9385 nspace_items[i].vid = vnode_vid(vp);
9386 nspace_items[i].flags = NSPACE_ITEM_NEW;
9387 nspace_items[i].flags |= nspace_item_flags_for_type(nspace_type);
9388 if (nspace_items[i].flags & NSPACE_ITEM_SNAPSHOT_EVENT) {
9389 if (arg) {
9390 vnode_lock_spin(vp);
9391 vp->v_flag |= VNEEDSSNAPSHOT;
9392 vnode_unlock(vp);
9393 }
9394 }
9395
9396 nspace_items[i].token = 0;
9397 nspace_items[i].refcount = 1;
9398
9399 wakeup((caddr_t)&nspace_item_idx);
9400 }
9401
9402 //
9403 // Now go to sleep until the handler does a wakeup on this
9404 // slot in the nspace_items table (or we timeout).
9405 //
9406 keep_waiting = 1;
9407 while(keep_waiting) {
9408 ts.tv_sec = nspace_handler_timeout;
9409 ts.tv_nsec = 0;
9410 error = msleep((caddr_t)&(nspace_items[i].vp), &nspace_handler_lock, PVFS|PCATCH, "namespace-done", &ts);
9411
9412 if (nspace_items[i].flags & NSPACE_ITEM_DONE) {
9413 error = 0;
9414 } else if (nspace_items[i].flags & NSPACE_ITEM_CANCELLED) {
9415 error = nspace_items[i].token;
9416 } else if (error == EWOULDBLOCK || error == ETIMEDOUT) {
9417 if (nspace_items[i].flags & NSPACE_ITEM_RESET_TIMER) {
9418 nspace_items[i].flags &= ~NSPACE_ITEM_RESET_TIMER;
9419 continue;
9420 } else {
9421 error = ETIMEDOUT;
9422 }
9423 } else if (error == 0) {
9424 // hmmm, why did we get woken up?
9425 printf("woken up for token %d but it's not done, cancelled or timedout and error == 0.\n",
9426 nspace_items[i].token);
9427 }
9428
9429 if (--nspace_items[i].refcount == 0) {
9430 nspace_items[i].vp = NULL; // clear this so that no one will match on it again
9431 nspace_items[i].arg = NULL;
9432 nspace_items[i].token = 0; // clear this so that the handler will not find it anymore
9433 nspace_items[i].flags = 0; // this clears it for re-use
9434 }
9435 wakeup(&nspace_token_id);
9436 keep_waiting = 0;
9437 }
9438
9439 lck_mtx_unlock(&nspace_handler_lock);
9440
9441 return error;
9442 }
9443
9444 int nspace_snapshot_event(vnode_t vp, time_t ctime, uint64_t op_type, void *arg)
9445 {
9446 int snapshot_error = 0;
9447
9448 if (vp == NULL) {
9449 return 0;
9450 }
9451
9452 /* Swap files are special; skip them */
9453 if (vnode_isswap(vp)) {
9454 return 0;
9455 }
9456
9457 if (ctime != 0 && snapshot_timestamp != 0 && (ctime <= snapshot_timestamp || vnode_needssnapshots(vp))) {
9458 // the change time is within this epoch
9459 int error;
9460
9461 error = resolve_nspace_item_ext(vp, op_type | NAMESPACE_HANDLER_SNAPSHOT_EVENT, arg);
9462 if (error == EDEADLK) {
9463 snapshot_error = 0;
9464 } else if (error) {
9465 if (error == EAGAIN) {
9466 printf("nspace_snapshot_event: timed out waiting for namespace handler...\n");
9467 } else if (error == EINTR) {
9468 // printf("nspace_snapshot_event: got a signal while waiting for namespace handler...\n");
9469 snapshot_error = EINTR;
9470 }
9471 }
9472 }
9473
9474 return snapshot_error;
9475 }
9476
9477 int
9478 get_nspace_item_status(struct vnode *vp, int32_t *status)
9479 {
9480 int i;
9481
9482 lck_mtx_lock(&nspace_handler_lock);
9483 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9484 if (nspace_items[i].vp == vp) {
9485 break;
9486 }
9487 }
9488
9489 if (i >= MAX_NSPACE_ITEMS) {
9490 lck_mtx_unlock(&nspace_handler_lock);
9491 return ENOENT;
9492 }
9493
9494 *status = nspace_items[i].flags;
9495 lck_mtx_unlock(&nspace_handler_lock);
9496 return 0;
9497 }
9498
9499
9500 #if 0
9501 static int
9502 build_volfs_path(struct vnode *vp, char *path, int *len)
9503 {
9504 struct vnode_attr va;
9505 int ret;
9506
9507 VATTR_INIT(&va);
9508 VATTR_WANTED(&va, va_fsid);
9509 VATTR_WANTED(&va, va_fileid);
9510
9511 if (vnode_getattr(vp, &va, vfs_context_kernel()) != 0) {
9512 *len = snprintf(path, *len, "/non/existent/path/because/vnode_getattr/failed") + 1;
9513 ret = -1;
9514 } else {
9515 *len = snprintf(path, *len, "/.vol/%d/%lld", (dev_t)va.va_fsid, va.va_fileid) + 1;
9516 ret = 0;
9517 }
9518
9519 return ret;
9520 }
9521 #endif
9522
9523 //
9524 // Note: this function does NOT check permissions on all of the
9525 // parent directories leading to this vnode. It should only be
9526 // called on behalf of a root process. Otherwise a process may
9527 // get access to a file because the file itself is readable even
9528 // though its parent directories would prevent access.
9529 //
9530 static int
9531 vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx)
9532 {
9533 int error, action;
9534
9535 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
9536 return error;
9537 }
9538
9539 #if CONFIG_MACF
9540 error = mac_vnode_check_open(ctx, vp, fmode);
9541 if (error)
9542 return error;
9543 #endif
9544
9545 /* compute action to be authorized */
9546 action = 0;
9547 if (fmode & FREAD) {
9548 action |= KAUTH_VNODE_READ_DATA;
9549 }
9550 if (fmode & (FWRITE | O_TRUNC)) {
9551 /*
9552 * If we are writing, appending, and not truncating,
9553 * indicate that we are appending so that if the
9554 * UF_APPEND or SF_APPEND bits are set, we do not deny
9555 * the open.
9556 */
9557 if ((fmode & O_APPEND) && !(fmode & O_TRUNC)) {
9558 action |= KAUTH_VNODE_APPEND_DATA;
9559 } else {
9560 action |= KAUTH_VNODE_WRITE_DATA;
9561 }
9562 }
9563
9564 if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)
9565 return error;
9566
9567
9568 //
9569 // if the vnode is tagged VOPENEVT and the current process
9570 // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY
9571 // flag to the open mode so that this open won't count against
9572 // the vnode when carbon delete() does a vnode_isinuse() to see
9573 // if a file is currently in use. this allows spotlight
9574 // importers to not interfere with carbon apps that depend on
9575 // the no-delete-if-busy semantics of carbon delete().
9576 //
9577 if ((vp->v_flag & VOPENEVT) && (current_proc()->p_flag & P_CHECKOPENEVT)) {
9578 fmode |= O_EVTONLY;
9579 }
9580
9581 if ( (error = VNOP_OPEN(vp, fmode, ctx)) ) {
9582 return error;
9583 }
9584 if ( (error = vnode_ref_ext(vp, fmode, 0)) ) {
9585 VNOP_CLOSE(vp, fmode, ctx);
9586 return error;
9587 }
9588
9589 /* Call out to allow 3rd party notification of open.
9590 * Ignore result of kauth_authorize_fileop call.
9591 */
9592 #if CONFIG_MACF
9593 mac_vnode_notify_open(ctx, vp, fmode);
9594 #endif
9595 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN,
9596 (uintptr_t)vp, 0);
9597
9598
9599 return 0;
9600 }
9601
9602 static int
9603 wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type)
9604 {
9605 int i;
9606 int error = 0;
9607 int unblock = 0;
9608 task_t curtask;
9609
9610 lck_mtx_lock(&nspace_handler_exclusion_lock);
9611 if (nspace_handlers[nspace_type].handler_busy) {
9612 lck_mtx_unlock(&nspace_handler_exclusion_lock);
9613 return EBUSY;
9614 }
9615
9616 nspace_handlers[nspace_type].handler_busy = 1;
9617 lck_mtx_unlock(&nspace_handler_exclusion_lock);
9618
9619 /*
9620 * Any process that gets here will be one of the namespace handlers.
9621 * As such, they should be prevented from acquiring DMG vnodes during vnode reclamation
9622 * as we can cause deadlocks to occur, because the namespace handler may prevent
9623 * VNOP_INACTIVE from proceeding. Mark the current task as a P_DEPENDENCY_CAPABLE
9624 * process.
9625 */
9626 curtask = current_task();
9627 bsd_set_dependency_capable (curtask);
9628
9629 lck_mtx_lock(&nspace_handler_lock);
9630 if (nspace_handlers[nspace_type].handler_proc == NULL) {
9631 nspace_handlers[nspace_type].handler_tid = thread_tid(current_thread());
9632 nspace_handlers[nspace_type].handler_proc = current_proc();
9633 }
9634
9635 if (nspace_type == NSPACE_HANDLER_SNAPSHOT &&
9636 (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
9637 error = EINVAL;
9638 }
9639
9640 while (error == 0) {
9641
9642 /* Try to find matching namespace item */
9643 for (i = 0; i < MAX_NSPACE_ITEMS; i++) {
9644 if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
9645 if (nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
9646 break;
9647 }
9648 }
9649 }
9650
9651 if (i >= MAX_NSPACE_ITEMS) {
9652 /* Nothing is there yet. Wait for wake up and retry */
9653 error = msleep((caddr_t)&nspace_item_idx, &nspace_handler_lock, PVFS|PCATCH, "namespace-items", 0);
9654 if ((nspace_type == NSPACE_HANDLER_SNAPSHOT) && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
9655 /* Prevent infinite loop if snapshot handler exited */
9656 error = EINVAL;
9657 break;
9658 }
9659 continue;
9660 }
9661
9662 nspace_items[i].flags &= ~NSPACE_ITEM_NEW;
9663 nspace_items[i].flags |= NSPACE_ITEM_PROCESSING;
9664 nspace_items[i].token = ++nspace_token_id;
9665
9666 assert(nspace_items[i].vp);
9667 struct fileproc *fp;
9668 int32_t indx;
9669 int32_t fmode;
9670 struct proc *p = current_proc();
9671 vfs_context_t ctx = vfs_context_current();
9672 struct vnode_attr va;
9673 bool vn_get_succsessful = false;
9674 bool vn_open_successful = false;
9675 bool fp_alloc_successful = false;
9676
9677 /*
9678 * Use vnode pointer to acquire a file descriptor for
9679 * hand-off to userland
9680 */
9681 fmode = nspace_open_flags_for_type(nspace_type);
9682 error = vnode_getwithvid(nspace_items[i].vp, nspace_items[i].vid);
9683 if (error) goto cleanup;
9684 vn_get_succsessful = true;
9685
9686 error = vn_open_with_vp(nspace_items[i].vp, fmode, ctx);
9687 if (error) goto cleanup;
9688 vn_open_successful = true;
9689
9690 error = falloc(p, &fp, &indx, ctx);
9691 if (error) goto cleanup;
9692 fp_alloc_successful = true;
9693
9694 fp->f_fglob->fg_flag = fmode;
9695 fp->f_fglob->fg_ops = &vnops;
9696 fp->f_fglob->fg_data = (caddr_t)nspace_items[i].vp;
9697
9698 proc_fdlock(p);
9699 procfdtbl_releasefd(p, indx, NULL);
9700 fp_drop(p, indx, fp, 1);
9701 proc_fdunlock(p);
9702
9703 /*
9704 * All variants of the namespace handler struct support these three fields:
9705 * token, flags, and the FD pointer
9706 */
9707 error = copyout(&nspace_items[i].token, nhd->token, sizeof(uint32_t));
9708 if (error) goto cleanup;
9709 error = copyout(&nspace_items[i].op, nhd->flags, sizeof(uint64_t));
9710 if (error) goto cleanup;
9711 error = copyout(&indx, nhd->fdptr, sizeof(uint32_t));
9712 if (error) goto cleanup;
9713
9714 /*
9715 * Handle optional fields:
9716 * extended version support an info ptr (offset, length), and the
9717 *
9718 * namedata version supports a unique per-link object ID
9719 *
9720 */
9721 if (nhd->infoptr) {
9722 uio_t uio = (uio_t)nspace_items[i].arg;
9723 uint64_t u_offset, u_length;
9724
9725 if (uio) {
9726 u_offset = uio_offset(uio);
9727 u_length = uio_resid(uio);
9728 } else {
9729 u_offset = 0;
9730 u_length = 0;
9731 }
9732 error = copyout(&u_offset, nhd->infoptr, sizeof(uint64_t));
9733 if (error) goto cleanup;
9734 error = copyout(&u_length, nhd->infoptr + sizeof(uint64_t), sizeof(uint64_t));
9735 if (error) goto cleanup;
9736 }
9737
9738 if (nhd->objid) {
9739 VATTR_INIT(&va);
9740 VATTR_WANTED(&va, va_linkid);
9741 error = vnode_getattr(nspace_items[i].vp, &va, ctx);
9742 if (error) goto cleanup;
9743
9744 uint64_t linkid = 0;
9745 if (VATTR_IS_SUPPORTED (&va, va_linkid)) {
9746 linkid = (uint64_t)va.va_linkid;
9747 }
9748 error = copyout(&linkid, nhd->objid, sizeof(uint64_t));
9749 }
9750 cleanup:
9751 if (error) {
9752 if (fp_alloc_successful) fp_free(p, indx, fp);
9753 if (vn_open_successful) vn_close(nspace_items[i].vp, fmode, ctx);
9754 unblock = 1;
9755 }
9756
9757 if (vn_get_succsessful) vnode_put(nspace_items[i].vp);
9758
9759 break;
9760 }
9761
9762 if (unblock) {
9763 if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
9764 vnode_lock_spin(nspace_items[i].vp);
9765 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
9766 vnode_unlock(nspace_items[i].vp);
9767 }
9768 nspace_items[i].vp = NULL;
9769 nspace_items[i].vid = 0;
9770 nspace_items[i].flags = NSPACE_ITEM_DONE;
9771 nspace_items[i].token = 0;
9772
9773 wakeup((caddr_t)&(nspace_items[i].vp));
9774 }
9775
9776 if (nspace_type == NSPACE_HANDLER_SNAPSHOT) {
9777 // just go through every snapshot event and unblock it immediately.
9778 if (error && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
9779 for(i = 0; i < MAX_NSPACE_ITEMS; i++) {
9780 if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
9781 if (nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
9782 nspace_items[i].vp = NULL;
9783 nspace_items[i].vid = 0;
9784 nspace_items[i].flags = NSPACE_ITEM_DONE;
9785 nspace_items[i].token = 0;
9786
9787 wakeup((caddr_t)&(nspace_items[i].vp));
9788 }
9789 }
9790 }
9791 }
9792 }
9793
9794 lck_mtx_unlock(&nspace_handler_lock);
9795
9796 lck_mtx_lock(&nspace_handler_exclusion_lock);
9797 nspace_handlers[nspace_type].handler_busy = 0;
9798 lck_mtx_unlock(&nspace_handler_exclusion_lock);
9799
9800 return error;
9801 }
9802
9803 static inline int validate_namespace_args (int is64bit, int size) {
9804
9805 if (is64bit) {
9806 /* Must be one of these */
9807 if (size == sizeof(user64_namespace_handler_info)) {
9808 goto sizeok;
9809 }
9810 if (size == sizeof(user64_namespace_handler_info_ext)) {
9811 goto sizeok;
9812 }
9813 if (size == sizeof(user64_namespace_handler_data)) {
9814 goto sizeok;
9815 }
9816 return EINVAL;
9817 }
9818 else {
9819 /* 32 bit -- must be one of these */
9820 if (size == sizeof(user32_namespace_handler_info)) {
9821 goto sizeok;
9822 }
9823 if (size == sizeof(user32_namespace_handler_info_ext)) {
9824 goto sizeok;
9825 }
9826 if (size == sizeof(user32_namespace_handler_data)) {
9827 goto sizeok;
9828 }
9829 return EINVAL;
9830 }
9831
9832 sizeok:
9833
9834 return 0;
9835
9836 }
9837
9838 static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data)
9839 {
9840 int error = 0;
9841 namespace_handler_data nhd;
9842
9843 bzero (&nhd, sizeof(namespace_handler_data));
9844
9845 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
9846 return error;
9847 }
9848
9849 error = validate_namespace_args (is64bit, size);
9850 if (error) {
9851 return error;
9852 }
9853
9854 /* Copy in the userland pointers into our kernel-only struct */
9855
9856 if (is64bit) {
9857 /* 64 bit userland structures */
9858 nhd.token = (user_addr_t)((user64_namespace_handler_info *)data)->token;
9859 nhd.flags = (user_addr_t)((user64_namespace_handler_info *)data)->flags;
9860 nhd.fdptr = (user_addr_t)((user64_namespace_handler_info *)data)->fdptr;
9861
9862 /* If the size is greater than the standard info struct, add in extra fields */
9863 if (size > (sizeof(user64_namespace_handler_info))) {
9864 if (size >= (sizeof(user64_namespace_handler_info_ext))) {
9865 nhd.infoptr = (user_addr_t)((user64_namespace_handler_info_ext *)data)->infoptr;
9866 }
9867 if (size == (sizeof(user64_namespace_handler_data))) {
9868 nhd.objid = (user_addr_t)((user64_namespace_handler_data*)data)->objid;
9869 }
9870 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
9871 }
9872 }
9873 else {
9874 /* 32 bit userland structures */
9875 nhd.token = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->token);
9876 nhd.flags = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->flags);
9877 nhd.fdptr = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->fdptr);
9878
9879 if (size > (sizeof(user32_namespace_handler_info))) {
9880 if (size >= (sizeof(user32_namespace_handler_info_ext))) {
9881 nhd.infoptr = CAST_USER_ADDR_T(((user32_namespace_handler_info_ext *)data)->infoptr);
9882 }
9883 if (size == (sizeof(user32_namespace_handler_data))) {
9884 nhd.objid = (user_addr_t)((user32_namespace_handler_data*)data)->objid;
9885 }
9886 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
9887 }
9888 }
9889
9890 return wait_for_namespace_event(&nhd, nspace_type);
9891 }
9892
9893 /*
9894 * Make a filesystem-specific control call:
9895 */
9896 /* ARGSUSED */
9897 static int
9898 fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long options, vfs_context_t ctx)
9899 {
9900 int error=0;
9901 boolean_t is64bit;
9902 u_int size;
9903 #define STK_PARAMS 128
9904 char stkbuf[STK_PARAMS] = {0};
9905 caddr_t data, memp;
9906 vnode_t vp = *arg_vp;
9907
9908 size = IOCPARM_LEN(cmd);
9909 if (size > IOCPARM_MAX) return (EINVAL);
9910
9911 is64bit = proc_is64bit(p);
9912
9913 memp = NULL;
9914
9915
9916 /*
9917 * ensure the buffer is large enough for underlying calls
9918 */
9919 #ifndef HFSIOC_GETPATH
9920 typedef char pn_t[MAXPATHLEN];
9921 #define HFSIOC_GETPATH _IOWR('h', 13, pn_t)
9922 #endif
9923
9924 #ifndef HFS_GETPATH
9925 #define HFS_GETPATH IOCBASECMD(HFSIOC_GETPATH)
9926 #endif
9927 if (IOCBASECMD(cmd) == HFS_GETPATH) {
9928 /* Round up to MAXPATHLEN regardless of user input */
9929 size = MAXPATHLEN;
9930 }
9931 else if (vp->v_tag == VT_CIFS) {
9932 /*
9933 * XXX Until fsctl's length encoding can be
9934 * XXX fixed properly.
9935 */
9936 if (IOCBASECMD(cmd) == _IOWR('z', 19, 0) && size < 1432) {
9937 size = 1432; /* sizeof(struct UniqueSMBShareID) */
9938 } else if (IOCBASECMD(cmd) == _IOWR('z', 28, 0) && size < 308) {
9939 size = 308; /* sizeof(struct smbDebugTestPB) */
9940 }
9941 }
9942
9943 if (size > sizeof (stkbuf)) {
9944 if ((memp = (caddr_t)kalloc(size)) == 0) return ENOMEM;
9945 data = memp;
9946 } else {
9947 data = &stkbuf[0];
9948 };
9949
9950 if (cmd & IOC_IN) {
9951 if (size) {
9952 error = copyin(udata, data, size);
9953 if (error) {
9954 if (memp) {
9955 kfree (memp, size);
9956 }
9957 return error;
9958 }
9959 } else {
9960 if (is64bit) {
9961 *(user_addr_t *)data = udata;
9962 }
9963 else {
9964 *(uint32_t *)data = (uint32_t)udata;
9965 }
9966 };
9967 } else if ((cmd & IOC_OUT) && size) {
9968 /*
9969 * Zero the buffer so the user always
9970 * gets back something deterministic.
9971 */
9972 bzero(data, size);
9973 } else if (cmd & IOC_VOID) {
9974 if (is64bit) {
9975 *(user_addr_t *)data = udata;
9976 }
9977 else {
9978 *(uint32_t *)data = (uint32_t)udata;
9979 }
9980 }
9981
9982 /* Check to see if it's a generic command */
9983 switch (IOCBASECMD(cmd)) {
9984
9985 case FSCTL_SYNC_VOLUME: {
9986 mount_t mp = vp->v_mount;
9987 int arg = *(uint32_t*)data;
9988
9989 /* record vid of vp so we can drop it below. */
9990 uint32_t vvid = vp->v_id;
9991
9992 /*
9993 * Then grab mount_iterref so that we can release the vnode.
9994 * Without this, a thread may call vnode_iterate_prepare then
9995 * get into a deadlock because we've never released the root vp
9996 */
9997 error = mount_iterref (mp, 0);
9998 if (error) {
9999 break;
10000 }
10001 vnode_put(vp);
10002
10003 /* issue the sync for this volume */
10004 (void)sync_callback(mp, (arg & FSCTL_SYNC_WAIT) ? &arg : NULL);
10005
10006 /*
10007 * Then release the mount_iterref once we're done syncing; it's not
10008 * needed for the VNOP_IOCTL below
10009 */
10010 mount_iterdrop(mp);
10011
10012 if (arg & FSCTL_SYNC_FULLSYNC) {
10013 /* re-obtain vnode iocount on the root vp, if possible */
10014 error = vnode_getwithvid (vp, vvid);
10015 if (error == 0) {
10016 error = VNOP_IOCTL(vp, F_FULLFSYNC, (caddr_t)NULL, 0, ctx);
10017 vnode_put (vp);
10018 }
10019 }
10020 /* mark the argument VP as having been released */
10021 *arg_vp = NULL;
10022 }
10023 break;
10024
10025 case FSCTL_ROUTEFS_SETROUTEID: {
10026 #if ROUTEFS
10027 char routepath[MAXPATHLEN];
10028 size_t len = 0;
10029
10030 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
10031 break;
10032 }
10033 bzero(routepath, MAXPATHLEN);
10034 error = copyinstr(udata, &routepath[0], MAXPATHLEN, &len);
10035 if (error) {
10036 break;
10037 }
10038 error = routefs_kernel_mount(routepath);
10039 if (error) {
10040 break;
10041 }
10042 #endif
10043 }
10044 break;
10045
10046 case FSCTL_SET_PACKAGE_EXTS: {
10047 user_addr_t ext_strings;
10048 uint32_t num_entries;
10049 uint32_t max_width;
10050
10051 if ((error = priv_check_cred(kauth_cred_get(), PRIV_PACKAGE_EXTENSIONS, 0)))
10052 break;
10053
10054 if ( (is64bit && size != sizeof(user64_package_ext_info))
10055 || (is64bit == 0 && size != sizeof(user32_package_ext_info))) {
10056
10057 // either you're 64-bit and passed a 64-bit struct or
10058 // you're 32-bit and passed a 32-bit struct. otherwise
10059 // it's not ok.
10060 error = EINVAL;
10061 break;
10062 }
10063
10064 if (is64bit) {
10065 ext_strings = ((user64_package_ext_info *)data)->strings;
10066 num_entries = ((user64_package_ext_info *)data)->num_entries;
10067 max_width = ((user64_package_ext_info *)data)->max_width;
10068 } else {
10069 ext_strings = CAST_USER_ADDR_T(((user32_package_ext_info *)data)->strings);
10070 num_entries = ((user32_package_ext_info *)data)->num_entries;
10071 max_width = ((user32_package_ext_info *)data)->max_width;
10072 }
10073 error = set_package_extensions_table(ext_strings, num_entries, max_width);
10074 }
10075 break;
10076
10077 /* namespace handlers */
10078 case FSCTL_NAMESPACE_HANDLER_GET: {
10079 error = process_namespace_fsctl(NSPACE_HANDLER_NSPACE, is64bit, size, data);
10080 }
10081 break;
10082
10083 /* Snapshot handlers */
10084 case FSCTL_OLD_SNAPSHOT_HANDLER_GET: {
10085 error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
10086 }
10087 break;
10088
10089 case FSCTL_SNAPSHOT_HANDLER_GET_EXT: {
10090 error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
10091 }
10092 break;
10093
10094 case FSCTL_NAMESPACE_HANDLER_UPDATE: {
10095 uint32_t token, val;
10096 int i;
10097
10098 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
10099 break;
10100 }
10101
10102 if (!nspace_is_special_process(p)) {
10103 error = EINVAL;
10104 break;
10105 }
10106
10107 token = ((uint32_t *)data)[0];
10108 val = ((uint32_t *)data)[1];
10109
10110 lck_mtx_lock(&nspace_handler_lock);
10111
10112 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
10113 if (nspace_items[i].token == token) {
10114 break; /* exit for loop, not case stmt */
10115 }
10116 }
10117
10118 if (i >= MAX_NSPACE_ITEMS) {
10119 error = ENOENT;
10120 } else {
10121 //
10122 // if this bit is set, when resolve_nspace_item() times out
10123 // it will loop and go back to sleep.
10124 //
10125 nspace_items[i].flags |= NSPACE_ITEM_RESET_TIMER;
10126 }
10127
10128 lck_mtx_unlock(&nspace_handler_lock);
10129
10130 if (error) {
10131 printf("nspace-handler-update: did not find token %u\n", token);
10132 }
10133 }
10134 break;
10135
10136 case FSCTL_NAMESPACE_HANDLER_UNBLOCK: {
10137 uint32_t token, val;
10138 int i;
10139
10140 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
10141 break;
10142 }
10143
10144 if (!nspace_is_special_process(p)) {
10145 error = EINVAL;
10146 break;
10147 }
10148
10149 token = ((uint32_t *)data)[0];
10150 val = ((uint32_t *)data)[1];
10151
10152 lck_mtx_lock(&nspace_handler_lock);
10153
10154 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
10155 if (nspace_items[i].token == token) {
10156 break; /* exit for loop, not case statement */
10157 }
10158 }
10159
10160 if (i >= MAX_NSPACE_ITEMS) {
10161 printf("nspace-handler-unblock: did not find token %u\n", token);
10162 error = ENOENT;
10163 } else {
10164 if (val == 0 && nspace_items[i].vp) {
10165 vnode_lock_spin(nspace_items[i].vp);
10166 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
10167 vnode_unlock(nspace_items[i].vp);
10168 }
10169
10170 nspace_items[i].vp = NULL;
10171 nspace_items[i].arg = NULL;
10172 nspace_items[i].op = 0;
10173 nspace_items[i].vid = 0;
10174 nspace_items[i].flags = NSPACE_ITEM_DONE;
10175 nspace_items[i].token = 0;
10176
10177 wakeup((caddr_t)&(nspace_items[i].vp));
10178 }
10179
10180 lck_mtx_unlock(&nspace_handler_lock);
10181 }
10182 break;
10183
10184 case FSCTL_NAMESPACE_HANDLER_CANCEL: {
10185 uint32_t token, val;
10186 int i;
10187
10188 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
10189 break;
10190 }
10191
10192 if (!nspace_is_special_process(p)) {
10193 error = EINVAL;
10194 break;
10195 }
10196
10197 token = ((uint32_t *)data)[0];
10198 val = ((uint32_t *)data)[1];
10199
10200 lck_mtx_lock(&nspace_handler_lock);
10201
10202 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
10203 if (nspace_items[i].token == token) {
10204 break; /* exit for loop, not case stmt */
10205 }
10206 }
10207
10208 if (i >= MAX_NSPACE_ITEMS) {
10209 printf("nspace-handler-cancel: did not find token %u\n", token);
10210 error = ENOENT;
10211 } else {
10212 if (nspace_items[i].vp) {
10213 vnode_lock_spin(nspace_items[i].vp);
10214 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
10215 vnode_unlock(nspace_items[i].vp);
10216 }
10217
10218 nspace_items[i].vp = NULL;
10219 nspace_items[i].arg = NULL;
10220 nspace_items[i].vid = 0;
10221 nspace_items[i].token = val;
10222 nspace_items[i].flags &= ~NSPACE_ITEM_PROCESSING;
10223 nspace_items[i].flags |= NSPACE_ITEM_CANCELLED;
10224
10225 wakeup((caddr_t)&(nspace_items[i].vp));
10226 }
10227
10228 lck_mtx_unlock(&nspace_handler_lock);
10229 }
10230 break;
10231
10232 case FSCTL_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME: {
10233 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
10234 break;
10235 }
10236
10237 // we explicitly do not do the namespace_handler_proc check here
10238
10239 lck_mtx_lock(&nspace_handler_lock);
10240 snapshot_timestamp = ((uint32_t *)data)[0];
10241 wakeup(&nspace_item_idx);
10242 lck_mtx_unlock(&nspace_handler_lock);
10243 printf("nspace-handler-set-snapshot-time: %d\n", (int)snapshot_timestamp);
10244
10245 }
10246 break;
10247
10248 case FSCTL_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS:
10249 {
10250 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
10251 break;
10252 }
10253
10254 lck_mtx_lock(&nspace_handler_lock);
10255 nspace_allow_virtual_devs = ((uint32_t *)data)[0];
10256 lck_mtx_unlock(&nspace_handler_lock);
10257 printf("nspace-snapshot-handler will%s allow events on disk-images\n",
10258 nspace_allow_virtual_devs ? "" : " NOT");
10259 error = 0;
10260
10261 }
10262 break;
10263
10264 case FSCTL_SET_FSTYPENAME_OVERRIDE:
10265 {
10266 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
10267 break;
10268 }
10269 if (vp->v_mount) {
10270 mount_lock(vp->v_mount);
10271 if (data[0] != 0) {
10272 strlcpy(&vp->v_mount->fstypename_override[0], data, MFSTYPENAMELEN);
10273 vp->v_mount->mnt_kern_flag |= MNTK_TYPENAME_OVERRIDE;
10274 if (vfs_isrdonly(vp->v_mount) && strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
10275 vp->v_mount->mnt_kern_flag |= MNTK_EXTENDED_SECURITY;
10276 vp->v_mount->mnt_kern_flag &= ~MNTK_AUTH_OPAQUE;
10277 }
10278 } else {
10279 if (strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
10280 vp->v_mount->mnt_kern_flag &= ~MNTK_EXTENDED_SECURITY;
10281 }
10282 vp->v_mount->mnt_kern_flag &= ~MNTK_TYPENAME_OVERRIDE;
10283 vp->v_mount->fstypename_override[0] = '\0';
10284 }
10285 mount_unlock(vp->v_mount);
10286 }
10287 }
10288 break;
10289
10290 default: {
10291 /* Invoke the filesystem-specific code */
10292 error = VNOP_IOCTL(vp, IOCBASECMD(cmd), data, options, ctx);
10293 }
10294
10295 } /* end switch stmt */
10296
10297 /*
10298 * if no errors, copy any data to user. Size was
10299 * already set and checked above.
10300 */
10301 if (error == 0 && (cmd & IOC_OUT) && size)
10302 error = copyout(data, udata, size);
10303
10304 if (memp) {
10305 kfree(memp, size);
10306 }
10307
10308 return error;
10309 }
10310
10311 /* ARGSUSED */
10312 int
10313 fsctl (proc_t p, struct fsctl_args *uap, __unused int32_t *retval)
10314 {
10315 int error;
10316 struct nameidata nd;
10317 u_long nameiflags;
10318 vnode_t vp = NULL;
10319 vfs_context_t ctx = vfs_context_current();
10320
10321 AUDIT_ARG(cmd, uap->cmd);
10322 AUDIT_ARG(value32, uap->options);
10323 /* Get the vnode for the file we are getting info on: */
10324 nameiflags = 0;
10325 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
10326 NDINIT(&nd, LOOKUP, OP_FSCTL, nameiflags | AUDITVNPATH1,
10327 UIO_USERSPACE, uap->path, ctx);
10328 if ((error = namei(&nd))) goto done;
10329 vp = nd.ni_vp;
10330 nameidone(&nd);
10331
10332 #if CONFIG_MACF
10333 error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd);
10334 if (error) {
10335 goto done;
10336 }
10337 #endif
10338
10339 error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
10340
10341 done:
10342 if (vp)
10343 vnode_put(vp);
10344 return error;
10345 }
10346 /* ARGSUSED */
10347 int
10348 ffsctl (proc_t p, struct ffsctl_args *uap, __unused int32_t *retval)
10349 {
10350 int error;
10351 vnode_t vp = NULL;
10352 vfs_context_t ctx = vfs_context_current();
10353 int fd = -1;
10354
10355 AUDIT_ARG(fd, uap->fd);
10356 AUDIT_ARG(cmd, uap->cmd);
10357 AUDIT_ARG(value32, uap->options);
10358
10359 /* Get the vnode for the file we are getting info on: */
10360 if ((error = file_vnode(uap->fd, &vp)))
10361 return error;
10362 fd = uap->fd;
10363 if ((error = vnode_getwithref(vp))) {
10364 file_drop(fd);
10365 return error;
10366 }
10367
10368 #if CONFIG_MACF
10369 if ((error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd))) {
10370 file_drop(fd);
10371 vnode_put(vp);
10372 return error;
10373 }
10374 #endif
10375
10376 error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
10377
10378 file_drop(fd);
10379
10380 /*validate vp; fsctl_internal() can drop iocount and reset vp to NULL*/
10381 if (vp) {
10382 vnode_put(vp);
10383 }
10384
10385 return error;
10386 }
10387 /* end of fsctl system call */
10388
10389 /*
10390 * Retrieve the data of an extended attribute.
10391 */
10392 int
10393 getxattr(proc_t p, struct getxattr_args *uap, user_ssize_t *retval)
10394 {
10395 vnode_t vp;
10396 struct nameidata nd;
10397 char attrname[XATTR_MAXNAMELEN+1];
10398 vfs_context_t ctx = vfs_context_current();
10399 uio_t auio = NULL;
10400 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10401 size_t attrsize = 0;
10402 size_t namelen;
10403 u_int32_t nameiflags;
10404 int error;
10405 char uio_buf[ UIO_SIZEOF(1) ];
10406
10407 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
10408 return (EINVAL);
10409
10410 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
10411 NDINIT(&nd, LOOKUP, OP_GETXATTR, nameiflags, spacetype, uap->path, ctx);
10412 if ((error = namei(&nd))) {
10413 return (error);
10414 }
10415 vp = nd.ni_vp;
10416 nameidone(&nd);
10417
10418 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
10419 goto out;
10420 }
10421 if (xattr_protected(attrname)) {
10422 if (!vfs_context_issuser(ctx) || strcmp(attrname, "com.apple.system.Security") != 0) {
10423 error = EPERM;
10424 goto out;
10425 }
10426 }
10427 /*
10428 * the specific check for 0xffffffff is a hack to preserve
10429 * binaray compatibilty in K64 with applications that discovered
10430 * that passing in a buf pointer and a size of -1 resulted in
10431 * just the size of the indicated extended attribute being returned.
10432 * this isn't part of the documented behavior, but because of the
10433 * original implemtation's check for "uap->size > 0", this behavior
10434 * was allowed. In K32 that check turned into a signed comparison
10435 * even though uap->size is unsigned... in K64, we blow by that
10436 * check because uap->size is unsigned and doesn't get sign smeared
10437 * in the munger for a 32 bit user app. we also need to add a
10438 * check to limit the maximum size of the buffer being passed in...
10439 * unfortunately, the underlying fileystems seem to just malloc
10440 * the requested size even if the actual extended attribute is tiny.
10441 * because that malloc is for kernel wired memory, we have to put a
10442 * sane limit on it.
10443 *
10444 * U32 running on K64 will yield 0x00000000ffffffff for uap->size
10445 * U64 running on K64 will yield -1 (64 bits wide)
10446 * U32/U64 running on K32 will yield -1 (32 bits wide)
10447 */
10448 if (uap->size == 0xffffffff || uap->size == (size_t)-1)
10449 goto no_uio;
10450
10451 if (uap->value) {
10452 if (uap->size > (size_t)XATTR_MAXSIZE)
10453 uap->size = XATTR_MAXSIZE;
10454
10455 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
10456 &uio_buf[0], sizeof(uio_buf));
10457 uio_addiov(auio, uap->value, uap->size);
10458 }
10459 no_uio:
10460 error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, ctx);
10461 out:
10462 vnode_put(vp);
10463
10464 if (auio) {
10465 *retval = uap->size - uio_resid(auio);
10466 } else {
10467 *retval = (user_ssize_t)attrsize;
10468 }
10469
10470 return (error);
10471 }
10472
10473 /*
10474 * Retrieve the data of an extended attribute.
10475 */
10476 int
10477 fgetxattr(proc_t p, struct fgetxattr_args *uap, user_ssize_t *retval)
10478 {
10479 vnode_t vp;
10480 char attrname[XATTR_MAXNAMELEN+1];
10481 uio_t auio = NULL;
10482 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10483 size_t attrsize = 0;
10484 size_t namelen;
10485 int error;
10486 char uio_buf[ UIO_SIZEOF(1) ];
10487
10488 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
10489 return (EINVAL);
10490
10491 if ( (error = file_vnode(uap->fd, &vp)) ) {
10492 return (error);
10493 }
10494 if ( (error = vnode_getwithref(vp)) ) {
10495 file_drop(uap->fd);
10496 return(error);
10497 }
10498 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
10499 goto out;
10500 }
10501 if (xattr_protected(attrname)) {
10502 error = EPERM;
10503 goto out;
10504 }
10505 if (uap->value && uap->size > 0) {
10506 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
10507 &uio_buf[0], sizeof(uio_buf));
10508 uio_addiov(auio, uap->value, uap->size);
10509 }
10510
10511 error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, vfs_context_current());
10512 out:
10513 (void)vnode_put(vp);
10514 file_drop(uap->fd);
10515
10516 if (auio) {
10517 *retval = uap->size - uio_resid(auio);
10518 } else {
10519 *retval = (user_ssize_t)attrsize;
10520 }
10521 return (error);
10522 }
10523
10524 /*
10525 * Set the data of an extended attribute.
10526 */
10527 int
10528 setxattr(proc_t p, struct setxattr_args *uap, int *retval)
10529 {
10530 vnode_t vp;
10531 struct nameidata nd;
10532 char attrname[XATTR_MAXNAMELEN+1];
10533 vfs_context_t ctx = vfs_context_current();
10534 uio_t auio = NULL;
10535 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10536 size_t namelen;
10537 u_int32_t nameiflags;
10538 int error;
10539 char uio_buf[ UIO_SIZEOF(1) ];
10540
10541 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
10542 return (EINVAL);
10543
10544 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
10545 if (error == EPERM) {
10546 /* if the string won't fit in attrname, copyinstr emits EPERM */
10547 return (ENAMETOOLONG);
10548 }
10549 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
10550 return error;
10551 }
10552 if (xattr_protected(attrname))
10553 return(EPERM);
10554 if (uap->size != 0 && uap->value == 0) {
10555 return (EINVAL);
10556 }
10557
10558 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
10559 NDINIT(&nd, LOOKUP, OP_SETXATTR, nameiflags, spacetype, uap->path, ctx);
10560 if ((error = namei(&nd))) {
10561 return (error);
10562 }
10563 vp = nd.ni_vp;
10564 nameidone(&nd);
10565
10566 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
10567 &uio_buf[0], sizeof(uio_buf));
10568 uio_addiov(auio, uap->value, uap->size);
10569
10570 error = vn_setxattr(vp, attrname, auio, uap->options, ctx);
10571 #if CONFIG_FSE
10572 if (error == 0) {
10573 add_fsevent(FSE_XATTR_MODIFIED, ctx,
10574 FSE_ARG_VNODE, vp,
10575 FSE_ARG_DONE);
10576 }
10577 #endif
10578 vnode_put(vp);
10579 *retval = 0;
10580 return (error);
10581 }
10582
10583 /*
10584 * Set the data of an extended attribute.
10585 */
10586 int
10587 fsetxattr(proc_t p, struct fsetxattr_args *uap, int *retval)
10588 {
10589 vnode_t vp;
10590 char attrname[XATTR_MAXNAMELEN+1];
10591 uio_t auio = NULL;
10592 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10593 size_t namelen;
10594 int error;
10595 char uio_buf[ UIO_SIZEOF(1) ];
10596 #if CONFIG_FSE
10597 vfs_context_t ctx = vfs_context_current();
10598 #endif
10599
10600 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
10601 return (EINVAL);
10602
10603 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
10604 if (error == EPERM) {
10605 /* if the string won't fit in attrname, copyinstr emits EPERM */
10606 return (ENAMETOOLONG);
10607 }
10608 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
10609 return error;
10610 }
10611 if (xattr_protected(attrname))
10612 return(EPERM);
10613 if (uap->size != 0 && uap->value == 0) {
10614 return (EINVAL);
10615 }
10616 if ( (error = file_vnode(uap->fd, &vp)) ) {
10617 return (error);
10618 }
10619 if ( (error = vnode_getwithref(vp)) ) {
10620 file_drop(uap->fd);
10621 return(error);
10622 }
10623 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
10624 &uio_buf[0], sizeof(uio_buf));
10625 uio_addiov(auio, uap->value, uap->size);
10626
10627 error = vn_setxattr(vp, attrname, auio, uap->options, vfs_context_current());
10628 #if CONFIG_FSE
10629 if (error == 0) {
10630 add_fsevent(FSE_XATTR_MODIFIED, ctx,
10631 FSE_ARG_VNODE, vp,
10632 FSE_ARG_DONE);
10633 }
10634 #endif
10635 vnode_put(vp);
10636 file_drop(uap->fd);
10637 *retval = 0;
10638 return (error);
10639 }
10640
10641 /*
10642 * Remove an extended attribute.
10643 * XXX Code duplication here.
10644 */
10645 int
10646 removexattr(proc_t p, struct removexattr_args *uap, int *retval)
10647 {
10648 vnode_t vp;
10649 struct nameidata nd;
10650 char attrname[XATTR_MAXNAMELEN+1];
10651 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10652 vfs_context_t ctx = vfs_context_current();
10653 size_t namelen;
10654 u_int32_t nameiflags;
10655 int error;
10656
10657 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
10658 return (EINVAL);
10659
10660 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
10661 if (error != 0) {
10662 return (error);
10663 }
10664 if (xattr_protected(attrname))
10665 return(EPERM);
10666 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
10667 NDINIT(&nd, LOOKUP, OP_REMOVEXATTR, nameiflags, spacetype, uap->path, ctx);
10668 if ((error = namei(&nd))) {
10669 return (error);
10670 }
10671 vp = nd.ni_vp;
10672 nameidone(&nd);
10673
10674 error = vn_removexattr(vp, attrname, uap->options, ctx);
10675 #if CONFIG_FSE
10676 if (error == 0) {
10677 add_fsevent(FSE_XATTR_REMOVED, ctx,
10678 FSE_ARG_VNODE, vp,
10679 FSE_ARG_DONE);
10680 }
10681 #endif
10682 vnode_put(vp);
10683 *retval = 0;
10684 return (error);
10685 }
10686
10687 /*
10688 * Remove an extended attribute.
10689 * XXX Code duplication here.
10690 */
10691 int
10692 fremovexattr(__unused proc_t p, struct fremovexattr_args *uap, int *retval)
10693 {
10694 vnode_t vp;
10695 char attrname[XATTR_MAXNAMELEN+1];
10696 size_t namelen;
10697 int error;
10698 #if CONFIG_FSE
10699 vfs_context_t ctx = vfs_context_current();
10700 #endif
10701
10702 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
10703 return (EINVAL);
10704
10705 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
10706 if (error != 0) {
10707 return (error);
10708 }
10709 if (xattr_protected(attrname))
10710 return(EPERM);
10711 if ( (error = file_vnode(uap->fd, &vp)) ) {
10712 return (error);
10713 }
10714 if ( (error = vnode_getwithref(vp)) ) {
10715 file_drop(uap->fd);
10716 return(error);
10717 }
10718
10719 error = vn_removexattr(vp, attrname, uap->options, vfs_context_current());
10720 #if CONFIG_FSE
10721 if (error == 0) {
10722 add_fsevent(FSE_XATTR_REMOVED, ctx,
10723 FSE_ARG_VNODE, vp,
10724 FSE_ARG_DONE);
10725 }
10726 #endif
10727 vnode_put(vp);
10728 file_drop(uap->fd);
10729 *retval = 0;
10730 return (error);
10731 }
10732
10733 /*
10734 * Retrieve the list of extended attribute names.
10735 * XXX Code duplication here.
10736 */
10737 int
10738 listxattr(proc_t p, struct listxattr_args *uap, user_ssize_t *retval)
10739 {
10740 vnode_t vp;
10741 struct nameidata nd;
10742 vfs_context_t ctx = vfs_context_current();
10743 uio_t auio = NULL;
10744 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10745 size_t attrsize = 0;
10746 u_int32_t nameiflags;
10747 int error;
10748 char uio_buf[ UIO_SIZEOF(1) ];
10749
10750 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
10751 return (EINVAL);
10752
10753 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
10754 NDINIT(&nd, LOOKUP, OP_LISTXATTR, nameiflags, spacetype, uap->path, ctx);
10755 if ((error = namei(&nd))) {
10756 return (error);
10757 }
10758 vp = nd.ni_vp;
10759 nameidone(&nd);
10760 if (uap->namebuf != 0 && uap->bufsize > 0) {
10761 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
10762 &uio_buf[0], sizeof(uio_buf));
10763 uio_addiov(auio, uap->namebuf, uap->bufsize);
10764 }
10765
10766 error = vn_listxattr(vp, auio, &attrsize, uap->options, ctx);
10767
10768 vnode_put(vp);
10769 if (auio) {
10770 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
10771 } else {
10772 *retval = (user_ssize_t)attrsize;
10773 }
10774 return (error);
10775 }
10776
10777 /*
10778 * Retrieve the list of extended attribute names.
10779 * XXX Code duplication here.
10780 */
10781 int
10782 flistxattr(proc_t p, struct flistxattr_args *uap, user_ssize_t *retval)
10783 {
10784 vnode_t vp;
10785 uio_t auio = NULL;
10786 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10787 size_t attrsize = 0;
10788 int error;
10789 char uio_buf[ UIO_SIZEOF(1) ];
10790
10791 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
10792 return (EINVAL);
10793
10794 if ( (error = file_vnode(uap->fd, &vp)) ) {
10795 return (error);
10796 }
10797 if ( (error = vnode_getwithref(vp)) ) {
10798 file_drop(uap->fd);
10799 return(error);
10800 }
10801 if (uap->namebuf != 0 && uap->bufsize > 0) {
10802 auio = uio_createwithbuffer(1, 0, spacetype,
10803 UIO_READ, &uio_buf[0], sizeof(uio_buf));
10804 uio_addiov(auio, uap->namebuf, uap->bufsize);
10805 }
10806
10807 error = vn_listxattr(vp, auio, &attrsize, uap->options, vfs_context_current());
10808
10809 vnode_put(vp);
10810 file_drop(uap->fd);
10811 if (auio) {
10812 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
10813 } else {
10814 *retval = (user_ssize_t)attrsize;
10815 }
10816 return (error);
10817 }
10818
10819 static int fsgetpath_internal(
10820 vfs_context_t ctx, int volfs_id, uint64_t objid,
10821 vm_size_t bufsize, caddr_t buf, int *pathlen)
10822 {
10823 int error;
10824 struct mount *mp = NULL;
10825 vnode_t vp;
10826 int length;
10827 int bpflags;
10828 /* maximum number of times to retry build_path */
10829 unsigned int retries = 0x10;
10830
10831 if (bufsize > PAGE_SIZE) {
10832 return (EINVAL);
10833 }
10834
10835 if (buf == NULL) {
10836 return (ENOMEM);
10837 }
10838
10839 retry:
10840 if ((mp = mount_lookupby_volfsid(volfs_id, 1)) == NULL) {
10841 error = ENOTSUP; /* unexpected failure */
10842 return ENOTSUP;
10843 }
10844
10845 unionget:
10846 if (objid == 2) {
10847 error = VFS_ROOT(mp, &vp, ctx);
10848 } else {
10849 error = VFS_VGET(mp, (ino64_t)objid, &vp, ctx);
10850 }
10851
10852 if (error == ENOENT && (mp->mnt_flag & MNT_UNION)) {
10853 /*
10854 * If the fileid isn't found and we're in a union
10855 * mount volume, then see if the fileid is in the
10856 * mounted-on volume.
10857 */
10858 struct mount *tmp = mp;
10859 mp = vnode_mount(tmp->mnt_vnodecovered);
10860 vfs_unbusy(tmp);
10861 if (vfs_busy(mp, LK_NOWAIT) == 0)
10862 goto unionget;
10863 } else {
10864 vfs_unbusy(mp);
10865 }
10866
10867 if (error) {
10868 return error;
10869 }
10870
10871 #if CONFIG_MACF
10872 error = mac_vnode_check_fsgetpath(ctx, vp);
10873 if (error) {
10874 vnode_put(vp);
10875 return error;
10876 }
10877 #endif
10878
10879 /* Obtain the absolute path to this vnode. */
10880 bpflags = vfs_context_suser(ctx) ? BUILDPATH_CHECKACCESS : 0;
10881 bpflags |= BUILDPATH_CHECK_MOVED;
10882 error = build_path(vp, buf, bufsize, &length, bpflags, ctx);
10883 vnode_put(vp);
10884
10885 if (error) {
10886 /* there was a race building the path, try a few more times */
10887 if (error == EAGAIN) {
10888 --retries;
10889 if (retries > 0)
10890 goto retry;
10891
10892 error = ENOENT;
10893 }
10894 goto out;
10895 }
10896
10897 AUDIT_ARG(text, buf);
10898
10899 if (kdebug_enable) {
10900 long dbg_parms[NUMPARMS];
10901 int dbg_namelen;
10902
10903 dbg_namelen = (int)sizeof(dbg_parms);
10904
10905 if (length < dbg_namelen) {
10906 memcpy((char *)dbg_parms, buf, length);
10907 memset((char *)dbg_parms + length, 0, dbg_namelen - length);
10908
10909 dbg_namelen = length;
10910 } else {
10911 memcpy((char *)dbg_parms, buf + (length - dbg_namelen), dbg_namelen);
10912 }
10913
10914 kdebug_lookup_gen_events(dbg_parms, dbg_namelen, (void *)vp, TRUE);
10915 }
10916
10917 *pathlen = (user_ssize_t)length; /* may be superseded by error */
10918
10919 out:
10920 return (error);
10921 }
10922
10923 /*
10924 * Obtain the full pathname of a file system object by id.
10925 *
10926 * This is a private SPI used by the File Manager.
10927 */
10928 __private_extern__
10929 int
10930 fsgetpath(__unused proc_t p, struct fsgetpath_args *uap, user_ssize_t *retval)
10931 {
10932 vfs_context_t ctx = vfs_context_current();
10933 fsid_t fsid;
10934 char *realpath;
10935 int length;
10936 int error;
10937
10938 if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
10939 return (error);
10940 }
10941 AUDIT_ARG(value32, fsid.val[0]);
10942 AUDIT_ARG(value64, uap->objid);
10943 /* Restrict output buffer size for now. */
10944
10945 if (uap->bufsize > PAGE_SIZE) {
10946 return (EINVAL);
10947 }
10948 MALLOC(realpath, char *, uap->bufsize, M_TEMP, M_WAITOK);
10949 if (realpath == NULL) {
10950 return (ENOMEM);
10951 }
10952
10953 error = fsgetpath_internal(
10954 ctx, fsid.val[0], uap->objid,
10955 uap->bufsize, realpath, &length);
10956
10957 if (error) {
10958 goto out;
10959 }
10960
10961 error = copyout((caddr_t)realpath, uap->buf, length);
10962
10963 *retval = (user_ssize_t)length; /* may be superseded by error */
10964 out:
10965 if (realpath) {
10966 FREE(realpath, M_TEMP);
10967 }
10968 return (error);
10969 }
10970
10971 /*
10972 * Common routine to handle various flavors of statfs data heading out
10973 * to user space.
10974 *
10975 * Returns: 0 Success
10976 * EFAULT
10977 */
10978 static int
10979 munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
10980 user_addr_t bufp, int *sizep, boolean_t is_64_bit,
10981 boolean_t partial_copy)
10982 {
10983 int error;
10984 int my_size, copy_size;
10985
10986 if (is_64_bit) {
10987 struct user64_statfs sfs;
10988 my_size = copy_size = sizeof(sfs);
10989 bzero(&sfs, my_size);
10990 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
10991 sfs.f_type = mp->mnt_vtable->vfc_typenum;
10992 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
10993 sfs.f_bsize = (user64_long_t)sfsp->f_bsize;
10994 sfs.f_iosize = (user64_long_t)sfsp->f_iosize;
10995 sfs.f_blocks = (user64_long_t)sfsp->f_blocks;
10996 sfs.f_bfree = (user64_long_t)sfsp->f_bfree;
10997 sfs.f_bavail = (user64_long_t)sfsp->f_bavail;
10998 sfs.f_files = (user64_long_t)sfsp->f_files;
10999 sfs.f_ffree = (user64_long_t)sfsp->f_ffree;
11000 sfs.f_fsid = sfsp->f_fsid;
11001 sfs.f_owner = sfsp->f_owner;
11002 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
11003 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
11004 } else {
11005 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
11006 }
11007 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
11008 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
11009
11010 if (partial_copy) {
11011 copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
11012 }
11013 error = copyout((caddr_t)&sfs, bufp, copy_size);
11014 }
11015 else {
11016 struct user32_statfs sfs;
11017
11018 my_size = copy_size = sizeof(sfs);
11019 bzero(&sfs, my_size);
11020
11021 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
11022 sfs.f_type = mp->mnt_vtable->vfc_typenum;
11023 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
11024
11025 /*
11026 * It's possible for there to be more than 2^^31 blocks in the filesystem, so we
11027 * have to fudge the numbers here in that case. We inflate the blocksize in order
11028 * to reflect the filesystem size as best we can.
11029 */
11030 if ((sfsp->f_blocks > INT_MAX)
11031 /* Hack for 4061702 . I think the real fix is for Carbon to
11032 * look for some volume capability and not depend on hidden
11033 * semantics agreed between a FS and carbon.
11034 * f_blocks, f_bfree, and f_bavail set to -1 is the trigger
11035 * for Carbon to set bNoVolumeSizes volume attribute.
11036 * Without this the webdavfs files cannot be copied onto
11037 * disk as they look huge. This change should not affect
11038 * XSAN as they should not setting these to -1..
11039 */
11040 && (sfsp->f_blocks != 0xffffffffffffffffULL)
11041 && (sfsp->f_bfree != 0xffffffffffffffffULL)
11042 && (sfsp->f_bavail != 0xffffffffffffffffULL)) {
11043 int shift;
11044
11045 /*
11046 * Work out how far we have to shift the block count down to make it fit.
11047 * Note that it's possible to have to shift so far that the resulting
11048 * blocksize would be unreportably large. At that point, we will clip
11049 * any values that don't fit.
11050 *
11051 * For safety's sake, we also ensure that f_iosize is never reported as
11052 * being smaller than f_bsize.
11053 */
11054 for (shift = 0; shift < 32; shift++) {
11055 if ((sfsp->f_blocks >> shift) <= INT_MAX)
11056 break;
11057 if ((sfsp->f_bsize << (shift + 1)) > INT_MAX)
11058 break;
11059 }
11060 #define __SHIFT_OR_CLIP(x, s) ((((x) >> (s)) > INT_MAX) ? INT_MAX : ((x) >> (s)))
11061 sfs.f_blocks = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_blocks, shift);
11062 sfs.f_bfree = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bfree, shift);
11063 sfs.f_bavail = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bavail, shift);
11064 #undef __SHIFT_OR_CLIP
11065 sfs.f_bsize = (user32_long_t)(sfsp->f_bsize << shift);
11066 sfs.f_iosize = lmax(sfsp->f_iosize, sfsp->f_bsize);
11067 } else {
11068 /* filesystem is small enough to be reported honestly */
11069 sfs.f_bsize = (user32_long_t)sfsp->f_bsize;
11070 sfs.f_iosize = (user32_long_t)sfsp->f_iosize;
11071 sfs.f_blocks = (user32_long_t)sfsp->f_blocks;
11072 sfs.f_bfree = (user32_long_t)sfsp->f_bfree;
11073 sfs.f_bavail = (user32_long_t)sfsp->f_bavail;
11074 }
11075 sfs.f_files = (user32_long_t)sfsp->f_files;
11076 sfs.f_ffree = (user32_long_t)sfsp->f_ffree;
11077 sfs.f_fsid = sfsp->f_fsid;
11078 sfs.f_owner = sfsp->f_owner;
11079 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
11080 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
11081 } else {
11082 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
11083 }
11084 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
11085 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
11086
11087 if (partial_copy) {
11088 copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
11089 }
11090 error = copyout((caddr_t)&sfs, bufp, copy_size);
11091 }
11092
11093 if (sizep != NULL) {
11094 *sizep = my_size;
11095 }
11096 return(error);
11097 }
11098
11099 /*
11100 * copy stat structure into user_stat structure.
11101 */
11102 void munge_user64_stat(struct stat *sbp, struct user64_stat *usbp)
11103 {
11104 bzero(usbp, sizeof(*usbp));
11105
11106 usbp->st_dev = sbp->st_dev;
11107 usbp->st_ino = sbp->st_ino;
11108 usbp->st_mode = sbp->st_mode;
11109 usbp->st_nlink = sbp->st_nlink;
11110 usbp->st_uid = sbp->st_uid;
11111 usbp->st_gid = sbp->st_gid;
11112 usbp->st_rdev = sbp->st_rdev;
11113 #ifndef _POSIX_C_SOURCE
11114 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
11115 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
11116 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
11117 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
11118 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
11119 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
11120 #else
11121 usbp->st_atime = sbp->st_atime;
11122 usbp->st_atimensec = sbp->st_atimensec;
11123 usbp->st_mtime = sbp->st_mtime;
11124 usbp->st_mtimensec = sbp->st_mtimensec;
11125 usbp->st_ctime = sbp->st_ctime;
11126 usbp->st_ctimensec = sbp->st_ctimensec;
11127 #endif
11128 usbp->st_size = sbp->st_size;
11129 usbp->st_blocks = sbp->st_blocks;
11130 usbp->st_blksize = sbp->st_blksize;
11131 usbp->st_flags = sbp->st_flags;
11132 usbp->st_gen = sbp->st_gen;
11133 usbp->st_lspare = sbp->st_lspare;
11134 usbp->st_qspare[0] = sbp->st_qspare[0];
11135 usbp->st_qspare[1] = sbp->st_qspare[1];
11136 }
11137
11138 void munge_user32_stat(struct stat *sbp, struct user32_stat *usbp)
11139 {
11140 bzero(usbp, sizeof(*usbp));
11141
11142 usbp->st_dev = sbp->st_dev;
11143 usbp->st_ino = sbp->st_ino;
11144 usbp->st_mode = sbp->st_mode;
11145 usbp->st_nlink = sbp->st_nlink;
11146 usbp->st_uid = sbp->st_uid;
11147 usbp->st_gid = sbp->st_gid;
11148 usbp->st_rdev = sbp->st_rdev;
11149 #ifndef _POSIX_C_SOURCE
11150 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
11151 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
11152 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
11153 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
11154 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
11155 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
11156 #else
11157 usbp->st_atime = sbp->st_atime;
11158 usbp->st_atimensec = sbp->st_atimensec;
11159 usbp->st_mtime = sbp->st_mtime;
11160 usbp->st_mtimensec = sbp->st_mtimensec;
11161 usbp->st_ctime = sbp->st_ctime;
11162 usbp->st_ctimensec = sbp->st_ctimensec;
11163 #endif
11164 usbp->st_size = sbp->st_size;
11165 usbp->st_blocks = sbp->st_blocks;
11166 usbp->st_blksize = sbp->st_blksize;
11167 usbp->st_flags = sbp->st_flags;
11168 usbp->st_gen = sbp->st_gen;
11169 usbp->st_lspare = sbp->st_lspare;
11170 usbp->st_qspare[0] = sbp->st_qspare[0];
11171 usbp->st_qspare[1] = sbp->st_qspare[1];
11172 }
11173
11174 /*
11175 * copy stat64 structure into user_stat64 structure.
11176 */
11177 void munge_user64_stat64(struct stat64 *sbp, struct user64_stat64 *usbp)
11178 {
11179 bzero(usbp, sizeof(*usbp));
11180
11181 usbp->st_dev = sbp->st_dev;
11182 usbp->st_ino = sbp->st_ino;
11183 usbp->st_mode = sbp->st_mode;
11184 usbp->st_nlink = sbp->st_nlink;
11185 usbp->st_uid = sbp->st_uid;
11186 usbp->st_gid = sbp->st_gid;
11187 usbp->st_rdev = sbp->st_rdev;
11188 #ifndef _POSIX_C_SOURCE
11189 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
11190 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
11191 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
11192 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
11193 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
11194 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
11195 usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
11196 usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
11197 #else
11198 usbp->st_atime = sbp->st_atime;
11199 usbp->st_atimensec = sbp->st_atimensec;
11200 usbp->st_mtime = sbp->st_mtime;
11201 usbp->st_mtimensec = sbp->st_mtimensec;
11202 usbp->st_ctime = sbp->st_ctime;
11203 usbp->st_ctimensec = sbp->st_ctimensec;
11204 usbp->st_birthtime = sbp->st_birthtime;
11205 usbp->st_birthtimensec = sbp->st_birthtimensec;
11206 #endif
11207 usbp->st_size = sbp->st_size;
11208 usbp->st_blocks = sbp->st_blocks;
11209 usbp->st_blksize = sbp->st_blksize;
11210 usbp->st_flags = sbp->st_flags;
11211 usbp->st_gen = sbp->st_gen;
11212 usbp->st_lspare = sbp->st_lspare;
11213 usbp->st_qspare[0] = sbp->st_qspare[0];
11214 usbp->st_qspare[1] = sbp->st_qspare[1];
11215 }
11216
11217 void munge_user32_stat64(struct stat64 *sbp, struct user32_stat64 *usbp)
11218 {
11219 bzero(usbp, sizeof(*usbp));
11220
11221 usbp->st_dev = sbp->st_dev;
11222 usbp->st_ino = sbp->st_ino;
11223 usbp->st_mode = sbp->st_mode;
11224 usbp->st_nlink = sbp->st_nlink;
11225 usbp->st_uid = sbp->st_uid;
11226 usbp->st_gid = sbp->st_gid;
11227 usbp->st_rdev = sbp->st_rdev;
11228 #ifndef _POSIX_C_SOURCE
11229 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
11230 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
11231 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
11232 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
11233 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
11234 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
11235 usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
11236 usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
11237 #else
11238 usbp->st_atime = sbp->st_atime;
11239 usbp->st_atimensec = sbp->st_atimensec;
11240 usbp->st_mtime = sbp->st_mtime;
11241 usbp->st_mtimensec = sbp->st_mtimensec;
11242 usbp->st_ctime = sbp->st_ctime;
11243 usbp->st_ctimensec = sbp->st_ctimensec;
11244 usbp->st_birthtime = sbp->st_birthtime;
11245 usbp->st_birthtimensec = sbp->st_birthtimensec;
11246 #endif
11247 usbp->st_size = sbp->st_size;
11248 usbp->st_blocks = sbp->st_blocks;
11249 usbp->st_blksize = sbp->st_blksize;
11250 usbp->st_flags = sbp->st_flags;
11251 usbp->st_gen = sbp->st_gen;
11252 usbp->st_lspare = sbp->st_lspare;
11253 usbp->st_qspare[0] = sbp->st_qspare[0];
11254 usbp->st_qspare[1] = sbp->st_qspare[1];
11255 }
11256
11257 /*
11258 * Purge buffer cache for simulating cold starts
11259 */
11260 static int vnode_purge_callback(struct vnode *vp, __unused void *cargs)
11261 {
11262 ubc_msync(vp, (off_t)0, ubc_getsize(vp), NULL /* off_t *resid_off */, UBC_PUSHALL | UBC_INVALIDATE);
11263
11264 return VNODE_RETURNED;
11265 }
11266
11267 static int vfs_purge_callback(mount_t mp, __unused void * arg)
11268 {
11269 vnode_iterate(mp, VNODE_WAIT | VNODE_ITERATE_ALL, vnode_purge_callback, NULL);
11270
11271 return VFS_RETURNED;
11272 }
11273
11274 int
11275 vfs_purge(__unused struct proc *p, __unused struct vfs_purge_args *uap, __unused int32_t *retval)
11276 {
11277 if (!kauth_cred_issuser(kauth_cred_get()))
11278 return EPERM;
11279
11280 vfs_iterate(0/* flags */, vfs_purge_callback, NULL);
11281
11282 return 0;
11283 }
11284
11285 /*
11286 * gets the vnode associated with the (unnamed) snapshot directory
11287 * for a Filesystem. The snapshot directory vnode is returned with
11288 * an iocount on it.
11289 */
11290 int
11291 vnode_get_snapdir(vnode_t rvp, vnode_t *sdvpp, vfs_context_t ctx)
11292 {
11293 return (VFS_VGET_SNAPDIR(vnode_mount(rvp), sdvpp, ctx));
11294 }
11295
11296 /*
11297 * Get the snapshot vnode.
11298 *
11299 * If successful, the call returns with an iocount on *rvpp ,*sdvpp and
11300 * needs nameidone() on ndp.
11301 *
11302 * If the snapshot vnode exists it is returned in ndp->ni_vp.
11303 *
11304 * If it returns with an error, *rvpp, *sdvpp are NULL and nameidone() is
11305 * not needed.
11306 */
11307 static int
11308 vnode_get_snapshot(int dirfd, vnode_t *rvpp, vnode_t *sdvpp,
11309 user_addr_t name, struct nameidata *ndp, int32_t op,
11310 #if !CONFIG_TRIGGERS
11311 __unused
11312 #endif
11313 enum path_operation pathop,
11314 vfs_context_t ctx)
11315 {
11316 int error, i;
11317 caddr_t name_buf;
11318 size_t name_len;
11319 struct vfs_attr vfa;
11320
11321 *sdvpp = NULLVP;
11322 *rvpp = NULLVP;
11323
11324 error = vnode_getfromfd(ctx, dirfd, rvpp);
11325 if (error)
11326 return (error);
11327
11328 if (!vnode_isvroot(*rvpp)) {
11329 error = EINVAL;
11330 goto out;
11331 }
11332
11333 /* Make sure the filesystem supports snapshots */
11334 VFSATTR_INIT(&vfa);
11335 VFSATTR_WANTED(&vfa, f_capabilities);
11336 if ((vfs_getattr(vnode_mount(*rvpp), &vfa, ctx) != 0) ||
11337 !VFSATTR_IS_SUPPORTED(&vfa, f_capabilities) ||
11338 !((vfa.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] &
11339 VOL_CAP_INT_SNAPSHOT)) ||
11340 !((vfa.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] &
11341 VOL_CAP_INT_SNAPSHOT))) {
11342 error = ENOTSUP;
11343 goto out;
11344 }
11345
11346 error = vnode_get_snapdir(*rvpp, sdvpp, ctx);
11347 if (error)
11348 goto out;
11349
11350 MALLOC(name_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
11351 error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
11352 if (error)
11353 goto out1;
11354
11355 /*
11356 * Some sanity checks- name can't be empty, "." or ".." or have slashes.
11357 * (the length returned by copyinstr includes the terminating NUL)
11358 */
11359 if ((name_len == 1) || (name_len == 2 && name_buf[0] == '.') ||
11360 (name_len == 3 && name_buf[0] == '.' && name_buf[1] == '.')) {
11361 error = EINVAL;
11362 goto out1;
11363 }
11364 for (i = 0; i < (int)name_len && name_buf[i] != '/'; i++);
11365 if (i < (int)name_len) {
11366 error = EINVAL;
11367 goto out1;
11368 }
11369
11370 #if CONFIG_MACF
11371 if (op == CREATE) {
11372 error = mac_mount_check_snapshot_create(ctx, vnode_mount(*rvpp),
11373 name_buf);
11374 } else if (op == DELETE) {
11375 error = mac_mount_check_snapshot_delete(ctx, vnode_mount(*rvpp),
11376 name_buf);
11377 }
11378 if (error)
11379 goto out1;
11380 #endif
11381
11382 /* Check if the snapshot already exists ... */
11383 NDINIT(ndp, op, pathop, USEDVP | NOCACHE | AUDITVNPATH1,
11384 UIO_SYSSPACE, CAST_USER_ADDR_T(name_buf), ctx);
11385 ndp->ni_dvp = *sdvpp;
11386
11387 error = namei(ndp);
11388 out1:
11389 FREE(name_buf, M_TEMP);
11390 out:
11391 if (error) {
11392 if (*sdvpp) {
11393 vnode_put(*sdvpp);
11394 *sdvpp = NULLVP;
11395 }
11396 if (*rvpp) {
11397 vnode_put(*rvpp);
11398 *rvpp = NULLVP;
11399 }
11400 }
11401 return (error);
11402 }
11403
11404 /*
11405 * create a filesystem snapshot (for supporting filesystems)
11406 *
11407 * A much simplified version of openat(dirfd, name, O_CREAT | O_EXCL)
11408 * We get to the (unnamed) snapshot directory vnode and create the vnode
11409 * for the snapshot in it.
11410 *
11411 * Restrictions:
11412 *
11413 * a) Passed in name for snapshot cannot have slashes.
11414 * b) name can't be "." or ".."
11415 *
11416 * Since this requires superuser privileges, vnode_authorize calls are not
11417 * made.
11418 */
11419 static int
11420 snapshot_create(int dirfd, user_addr_t name, __unused uint32_t flags,
11421 vfs_context_t ctx)
11422 {
11423 vnode_t rvp, snapdvp;
11424 int error;
11425 struct nameidata namend;
11426
11427 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, CREATE,
11428 OP_LINK, ctx);
11429 if (error)
11430 return (error);
11431
11432 if (namend.ni_vp) {
11433 vnode_put(namend.ni_vp);
11434 error = EEXIST;
11435 } else {
11436 struct vnode_attr va;
11437 vnode_t vp = NULLVP;
11438
11439 VATTR_INIT(&va);
11440 VATTR_SET(&va, va_type, VREG);
11441 VATTR_SET(&va, va_mode, 0);
11442
11443 error = vn_create(snapdvp, &vp, &namend, &va,
11444 VN_CREATE_NOAUTH | VN_CREATE_NOINHERIT, 0, NULL, ctx);
11445 if (!error && vp)
11446 vnode_put(vp);
11447 }
11448
11449 nameidone(&namend);
11450 vnode_put(snapdvp);
11451 vnode_put(rvp);
11452 return (error);
11453 }
11454
11455 /*
11456 * Delete a Filesystem snapshot
11457 *
11458 * get the vnode for the unnamed snapshot directory and the snapshot and
11459 * delete the snapshot.
11460 */
11461 static int
11462 snapshot_delete(int dirfd, user_addr_t name, __unused uint32_t flags,
11463 vfs_context_t ctx)
11464 {
11465 vnode_t rvp, snapdvp;
11466 int error;
11467 struct nameidata namend;
11468
11469 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, DELETE,
11470 OP_UNLINK, ctx);
11471 if (error)
11472 goto out;
11473
11474 error = VNOP_REMOVE(snapdvp, namend.ni_vp, &namend.ni_cnd,
11475 VNODE_REMOVE_SKIP_NAMESPACE_EVENT, ctx);
11476
11477 vnode_put(namend.ni_vp);
11478 nameidone(&namend);
11479 vnode_put(snapdvp);
11480 vnode_put(rvp);
11481 out:
11482 return (error);
11483 }
11484
11485 /*
11486 * Revert a filesystem to a snapshot
11487 *
11488 * Marks the filesystem to revert to the given snapshot on next mount.
11489 */
11490 static int
11491 snapshot_revert(int dirfd, user_addr_t name, __unused uint32_t flags,
11492 vfs_context_t ctx)
11493 {
11494 int error;
11495 vnode_t rvp;
11496 mount_t mp;
11497 struct fs_snapshot_revert_args revert_data;
11498 struct componentname cnp;
11499 caddr_t name_buf;
11500 size_t name_len;
11501
11502 error = vnode_getfromfd(ctx, dirfd, &rvp);
11503 if (error) {
11504 return (error);
11505 }
11506 mp = vnode_mount(rvp);
11507
11508 MALLOC(name_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
11509 error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
11510 if (error) {
11511 FREE(name_buf, M_TEMP);
11512 vnode_put(rvp);
11513 return (error);
11514 }
11515
11516 #if CONFIG_MACF
11517 error = mac_mount_check_snapshot_revert(ctx, mp, name_buf);
11518 if (error) {
11519 FREE(name_buf, M_TEMP);
11520 vnode_put(rvp);
11521 return (error);
11522 }
11523 #endif
11524
11525 /*
11526 * Grab mount_iterref so that we can release the vnode,
11527 * since VFSIOC_REVERT_SNAPSHOT could conceivably cause a sync.
11528 */
11529 error = mount_iterref (mp, 0);
11530 vnode_put(rvp);
11531 if (error) {
11532 FREE(name_buf, M_TEMP);
11533 return (error);
11534 }
11535
11536 memset(&cnp, 0, sizeof(cnp));
11537 cnp.cn_pnbuf = (char *)name_buf;
11538 cnp.cn_nameiop = LOOKUP;
11539 cnp.cn_flags = ISLASTCN | HASBUF;
11540 cnp.cn_pnlen = MAXPATHLEN;
11541 cnp.cn_nameptr = cnp.cn_pnbuf;
11542 cnp.cn_namelen = (int)name_len;
11543 revert_data.sr_cnp = &cnp;
11544
11545 error = VFS_IOCTL(mp, VFSIOC_REVERT_SNAPSHOT, (caddr_t)&revert_data, 0, ctx);
11546 mount_iterdrop(mp);
11547 FREE(name_buf, M_TEMP);
11548
11549 if (error) {
11550 /* If there was any error, try again using VNOP_IOCTL */
11551
11552 vnode_t snapdvp;
11553 struct nameidata namend;
11554
11555 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, LOOKUP,
11556 OP_LOOKUP, ctx);
11557 if (error) {
11558 return (error);
11559 }
11560
11561
11562 #ifndef APFSIOC_REVERT_TO_SNAPSHOT
11563 #define APFSIOC_REVERT_TO_SNAPSHOT _IOW('J', 1, u_int64_t)
11564 #endif
11565
11566 #ifndef APFS_REVERT_TO_SNAPSHOT
11567 #define APFS_REVERT_TO_SNAPSHOT IOCBASECMD(APFSIOC_REVERT_TO_SNAPSHOT)
11568 #endif
11569
11570 error = VNOP_IOCTL(namend.ni_vp, APFS_REVERT_TO_SNAPSHOT, (caddr_t) NULL,
11571 0, ctx);
11572
11573 vnode_put(namend.ni_vp);
11574 nameidone(&namend);
11575 vnode_put(snapdvp);
11576 vnode_put(rvp);
11577 }
11578
11579 return (error);
11580 }
11581
11582 /*
11583 * rename a Filesystem snapshot
11584 *
11585 * get the vnode for the unnamed snapshot directory and the snapshot and
11586 * rename the snapshot. This is a very specialised (and simple) case of
11587 * rename(2) (which has to deal with a lot more complications). It differs
11588 * slightly from rename(2) in that EEXIST is returned if the new name exists.
11589 */
11590 static int
11591 snapshot_rename(int dirfd, user_addr_t old, user_addr_t new,
11592 __unused uint32_t flags, vfs_context_t ctx)
11593 {
11594 vnode_t rvp, snapdvp;
11595 int error, i;
11596 caddr_t newname_buf;
11597 size_t name_len;
11598 vnode_t fvp;
11599 struct nameidata *fromnd, *tond;
11600 /* carving out a chunk for structs that are too big to be on stack. */
11601 struct {
11602 struct nameidata from_node;
11603 struct nameidata to_node;
11604 } * __rename_data;
11605
11606 MALLOC(__rename_data, void *, sizeof(*__rename_data), M_TEMP, M_WAITOK);
11607 fromnd = &__rename_data->from_node;
11608 tond = &__rename_data->to_node;
11609
11610 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, old, fromnd, DELETE,
11611 OP_UNLINK, ctx);
11612 if (error)
11613 goto out;
11614 fvp = fromnd->ni_vp;
11615
11616 MALLOC(newname_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
11617 error = copyinstr(new, newname_buf, MAXPATHLEN, &name_len);
11618 if (error)
11619 goto out1;
11620
11621 /*
11622 * Some sanity checks- new name can't be empty, "." or ".." or have
11623 * slashes.
11624 * (the length returned by copyinstr includes the terminating NUL)
11625 *
11626 * The FS rename VNOP is suppossed to handle this but we'll pick it
11627 * off here itself.
11628 */
11629 if ((name_len == 1) || (name_len == 2 && newname_buf[0] == '.') ||
11630 (name_len == 3 && newname_buf[0] == '.' && newname_buf[1] == '.')) {
11631 error = EINVAL;
11632 goto out1;
11633 }
11634 for (i = 0; i < (int)name_len && newname_buf[i] != '/'; i++);
11635 if (i < (int)name_len) {
11636 error = EINVAL;
11637 goto out1;
11638 }
11639
11640 #if CONFIG_MACF
11641 error = mac_mount_check_snapshot_create(ctx, vnode_mount(rvp),
11642 newname_buf);
11643 if (error)
11644 goto out1;
11645 #endif
11646
11647 NDINIT(tond, RENAME, OP_RENAME, USEDVP | NOCACHE | AUDITVNPATH2,
11648 UIO_SYSSPACE, CAST_USER_ADDR_T(newname_buf), ctx);
11649 tond->ni_dvp = snapdvp;
11650
11651 error = namei(tond);
11652 if (error) {
11653 goto out2;
11654 } else if (tond->ni_vp) {
11655 /*
11656 * snapshot rename behaves differently than rename(2) - if the
11657 * new name exists, EEXIST is returned.
11658 */
11659 vnode_put(tond->ni_vp);
11660 error = EEXIST;
11661 goto out2;
11662 }
11663
11664 error = VNOP_RENAME(snapdvp, fvp, &fromnd->ni_cnd, snapdvp, NULLVP,
11665 &tond->ni_cnd, ctx);
11666
11667 out2:
11668 nameidone(tond);
11669 out1:
11670 FREE(newname_buf, M_TEMP);
11671 vnode_put(fvp);
11672 vnode_put(snapdvp);
11673 vnode_put(rvp);
11674 nameidone(fromnd);
11675 out:
11676 FREE(__rename_data, M_TEMP);
11677 return (error);
11678 }
11679
11680 /*
11681 * Mount a Filesystem snapshot
11682 *
11683 * get the vnode for the unnamed snapshot directory and the snapshot and
11684 * mount the snapshot.
11685 */
11686 static int
11687 snapshot_mount(int dirfd, user_addr_t name, user_addr_t directory,
11688 __unused user_addr_t mnt_data, __unused uint32_t flags, vfs_context_t ctx)
11689 {
11690 vnode_t rvp, snapdvp, snapvp, vp, pvp;
11691 int error;
11692 struct nameidata *snapndp, *dirndp;
11693 /* carving out a chunk for structs that are too big to be on stack. */
11694 struct {
11695 struct nameidata snapnd;
11696 struct nameidata dirnd;
11697 } * __snapshot_mount_data;
11698
11699 MALLOC(__snapshot_mount_data, void *, sizeof(*__snapshot_mount_data),
11700 M_TEMP, M_WAITOK);
11701 snapndp = &__snapshot_mount_data->snapnd;
11702 dirndp = &__snapshot_mount_data->dirnd;
11703
11704 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, snapndp, LOOKUP,
11705 OP_LOOKUP, ctx);
11706 if (error)
11707 goto out;
11708
11709 snapvp = snapndp->ni_vp;
11710 if (!vnode_mount(rvp) || (vnode_mount(rvp) == dead_mountp)) {
11711 error = EIO;
11712 goto out1;
11713 }
11714
11715 /* Get the vnode to be covered */
11716 NDINIT(dirndp, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
11717 UIO_USERSPACE, directory, ctx);
11718 error = namei(dirndp);
11719 if (error)
11720 goto out1;
11721
11722 vp = dirndp->ni_vp;
11723 pvp = dirndp->ni_dvp;
11724
11725 if ((vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_ROOTFS)) {
11726 error = EINVAL;
11727 } else {
11728 mount_t mp = vnode_mount(rvp);
11729 struct fs_snapshot_mount_args smnt_data;
11730
11731 smnt_data.sm_mp = mp;
11732 smnt_data.sm_cnp = &snapndp->ni_cnd;
11733 error = mount_common(mp->mnt_vfsstat.f_fstypename, pvp, vp,
11734 &dirndp->ni_cnd, CAST_USER_ADDR_T(&smnt_data), 0,
11735 KERNEL_MOUNT_SNAPSHOT, NULL, FALSE, ctx);
11736 }
11737
11738 vnode_put(vp);
11739 vnode_put(pvp);
11740 nameidone(dirndp);
11741 out1:
11742 vnode_put(snapvp);
11743 vnode_put(snapdvp);
11744 vnode_put(rvp);
11745 nameidone(snapndp);
11746 out:
11747 FREE(__snapshot_mount_data, M_TEMP);
11748 return (error);
11749 }
11750
11751 /*
11752 * Root from a snapshot of the filesystem
11753 *
11754 * Marks the filesystem to root from the given snapshot on next boot.
11755 */
11756 static int
11757 snapshot_root(int dirfd, user_addr_t name, __unused uint32_t flags,
11758 vfs_context_t ctx)
11759 {
11760 int error;
11761 vnode_t rvp;
11762 mount_t mp;
11763 struct fs_snapshot_root_args root_data;
11764 struct componentname cnp;
11765 caddr_t name_buf;
11766 size_t name_len;
11767
11768 error = vnode_getfromfd(ctx, dirfd, &rvp);
11769 if (error) {
11770 return (error);
11771 }
11772 mp = vnode_mount(rvp);
11773
11774 MALLOC(name_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
11775 error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
11776 if (error) {
11777 FREE(name_buf, M_TEMP);
11778 vnode_put(rvp);
11779 return (error);
11780 }
11781
11782 // XXX MAC checks ?
11783
11784 /*
11785 * Grab mount_iterref so that we can release the vnode,
11786 * since VFSIOC_ROOT_SNAPSHOT could conceivably cause a sync.
11787 */
11788 error = mount_iterref (mp, 0);
11789 vnode_put(rvp);
11790 if (error) {
11791 FREE(name_buf, M_TEMP);
11792 return (error);
11793 }
11794
11795 memset(&cnp, 0, sizeof(cnp));
11796 cnp.cn_pnbuf = (char *)name_buf;
11797 cnp.cn_nameiop = LOOKUP;
11798 cnp.cn_flags = ISLASTCN | HASBUF;
11799 cnp.cn_pnlen = MAXPATHLEN;
11800 cnp.cn_nameptr = cnp.cn_pnbuf;
11801 cnp.cn_namelen = (int)name_len;
11802 root_data.sr_cnp = &cnp;
11803
11804 error = VFS_IOCTL(mp, VFSIOC_ROOT_SNAPSHOT, (caddr_t)&root_data, 0, ctx);
11805
11806 mount_iterdrop(mp);
11807 FREE(name_buf, M_TEMP);
11808
11809 return (error);
11810 }
11811
11812 /*
11813 * FS snapshot operations dispatcher
11814 */
11815 int
11816 fs_snapshot(__unused proc_t p, struct fs_snapshot_args *uap,
11817 __unused int32_t *retval)
11818 {
11819 int error;
11820 vfs_context_t ctx = vfs_context_current();
11821
11822 AUDIT_ARG(fd, uap->dirfd);
11823 AUDIT_ARG(value32, uap->op);
11824
11825 error = priv_check_cred(vfs_context_ucred(ctx), PRIV_VFS_SNAPSHOT, 0);
11826 if (error)
11827 return (error);
11828
11829 switch (uap->op) {
11830 case SNAPSHOT_OP_CREATE:
11831 error = snapshot_create(uap->dirfd, uap->name1, uap->flags, ctx);
11832 break;
11833 case SNAPSHOT_OP_DELETE:
11834 error = snapshot_delete(uap->dirfd, uap->name1, uap->flags, ctx);
11835 break;
11836 case SNAPSHOT_OP_RENAME:
11837 error = snapshot_rename(uap->dirfd, uap->name1, uap->name2,
11838 uap->flags, ctx);
11839 break;
11840 case SNAPSHOT_OP_MOUNT:
11841 error = snapshot_mount(uap->dirfd, uap->name1, uap->name2,
11842 uap->data, uap->flags, ctx);
11843 break;
11844 case SNAPSHOT_OP_REVERT:
11845 error = snapshot_revert(uap->dirfd, uap->name1, uap->flags, ctx);
11846 break;
11847 case SNAPSHOT_OP_ROOT:
11848 error = snapshot_root(uap->dirfd, uap->name1, uap->flags, ctx);
11849 break;
11850 default:
11851 error = ENOSYS;
11852 }
11853
11854 return (error);
11855 }