]> git.saurik.com Git - apple/xnu.git/blob - bsd/vfs/vfs_syscalls.c
xnu-3789.31.2.tar.gz
[apple/xnu.git] / bsd / vfs / vfs_syscalls.c
1 /*
2 * Copyright (c) 1995-2016 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1989, 1993
30 * The Regents of the University of California. All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)vfs_syscalls.c 8.41 (Berkeley) 6/15/95
66 */
67 /*
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
71 * Version 2.0.
72 */
73
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/namei.h>
77 #include <sys/filedesc.h>
78 #include <sys/kernel.h>
79 #include <sys/file_internal.h>
80 #include <sys/stat.h>
81 #include <sys/vnode_internal.h>
82 #include <sys/mount_internal.h>
83 #include <sys/proc_internal.h>
84 #include <sys/kauth.h>
85 #include <sys/uio_internal.h>
86 #include <sys/malloc.h>
87 #include <sys/mman.h>
88 #include <sys/dirent.h>
89 #include <sys/attr.h>
90 #include <sys/sysctl.h>
91 #include <sys/ubc.h>
92 #include <sys/quota.h>
93 #include <sys/kdebug.h>
94 #include <sys/fsevents.h>
95 #include <sys/imgsrc.h>
96 #include <sys/sysproto.h>
97 #include <sys/xattr.h>
98 #include <sys/fcntl.h>
99 #include <sys/fsctl.h>
100 #include <sys/ubc_internal.h>
101 #include <sys/disk.h>
102 #include <sys/content_protection.h>
103 #include <sys/clonefile.h>
104 #include <sys/snapshot.h>
105 #include <sys/priv.h>
106 #include <machine/cons.h>
107 #include <machine/limits.h>
108 #include <miscfs/specfs/specdev.h>
109
110 #include <security/audit/audit.h>
111 #include <bsm/audit_kevents.h>
112
113 #include <mach/mach_types.h>
114 #include <kern/kern_types.h>
115 #include <kern/kalloc.h>
116 #include <kern/task.h>
117
118 #include <vm/vm_pageout.h>
119 #include <vm/vm_protos.h>
120
121 #include <libkern/OSAtomic.h>
122 #include <pexpert/pexpert.h>
123 #include <IOKit/IOBSD.h>
124
125 #if ROUTEFS
126 #include <miscfs/routefs/routefs.h>
127 #endif /* ROUTEFS */
128
129 #if CONFIG_MACF
130 #include <security/mac.h>
131 #include <security/mac_framework.h>
132 #endif
133
134 #if CONFIG_FSE
135 #define GET_PATH(x) \
136 (x) = get_pathbuff();
137 #define RELEASE_PATH(x) \
138 release_pathbuff(x);
139 #else
140 #define GET_PATH(x) \
141 MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
142 #define RELEASE_PATH(x) \
143 FREE_ZONE((x), MAXPATHLEN, M_NAMEI);
144 #endif /* CONFIG_FSE */
145
146 /* struct for checkdirs iteration */
147 struct cdirargs {
148 vnode_t olddp;
149 vnode_t newdp;
150 };
151 /* callback for checkdirs iteration */
152 static int checkdirs_callback(proc_t p, void * arg);
153
154 static int change_dir(struct nameidata *ndp, vfs_context_t ctx);
155 static int checkdirs(vnode_t olddp, vfs_context_t ctx);
156 void enablequotas(struct mount *mp, vfs_context_t ctx);
157 static int getfsstat_callback(mount_t mp, void * arg);
158 static int getutimes(user_addr_t usrtvp, struct timespec *tsp);
159 static int setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts, int nullflag);
160 static int sync_callback(mount_t, void *);
161 static void sync_thread(void *, __unused wait_result_t);
162 static int sync_async(int);
163 static int munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
164 user_addr_t bufp, int *sizep, boolean_t is_64_bit,
165 boolean_t partial_copy);
166 static int statfs64_common(struct mount *mp, struct vfsstatfs *sfsp,
167 user_addr_t bufp);
168 static int fsync_common(proc_t p, struct fsync_args *uap, int flags);
169 static int mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
170 struct componentname *cnp, user_addr_t fsmountargs,
171 int flags, uint32_t internal_flags, char *labelstr, boolean_t kernelmount,
172 vfs_context_t ctx);
173 void vfs_notify_mount(vnode_t pdvp);
174
175 int prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth);
176
177 struct fd_vn_data * fg_vn_data_alloc(void);
178
179 /*
180 * Max retries for ENOENT returns from vn_authorize_{rmdir, unlink, rename}
181 * Concurrent lookups (or lookups by ids) on hard links can cause the
182 * vn_getpath (which does not re-enter the filesystem as vn_getpath_fsenter
183 * does) to return ENOENT as the path cannot be returned from the name cache
184 * alone. We have no option but to retry and hope to get one namei->reverse path
185 * generation done without an intervening lookup, lookup by id on the hard link
186 * item. This is only an issue for MAC hooks which cannot reenter the filesystem
187 * which currently are the MAC hooks for rename, unlink and rmdir.
188 */
189 #define MAX_AUTHORIZE_ENOENT_RETRIES 1024
190
191 static int rmdirat_internal(vfs_context_t, int, user_addr_t, enum uio_seg);
192
193 static int fsgetpath_internal(vfs_context_t, int, uint64_t, vm_size_t, caddr_t, int *);
194
195 #ifdef CONFIG_IMGSRC_ACCESS
196 static int authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx);
197 static int place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx);
198 static void undo_place_on_covered_vp(mount_t mp, vnode_t vp);
199 static int mount_begin_update(mount_t mp, vfs_context_t ctx, int flags);
200 static void mount_end_update(mount_t mp);
201 static int relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp, const char *fsname, vfs_context_t ctx, boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index);
202 #endif /* CONFIG_IMGSRC_ACCESS */
203
204 int (*union_dircheckp)(struct vnode **, struct fileproc *, vfs_context_t);
205
206 __private_extern__
207 int sync_internal(void);
208
209 __private_extern__
210 int unlink1(vfs_context_t, vnode_t, user_addr_t, enum uio_seg, int);
211
212 extern lck_grp_t *fd_vn_lck_grp;
213 extern lck_grp_attr_t *fd_vn_lck_grp_attr;
214 extern lck_attr_t *fd_vn_lck_attr;
215
216 /*
217 * incremented each time a mount or unmount operation occurs
218 * used to invalidate the cached value of the rootvp in the
219 * mount structure utilized by cache_lookup_path
220 */
221 uint32_t mount_generation = 0;
222
223 /* counts number of mount and unmount operations */
224 unsigned int vfs_nummntops=0;
225
226 extern const struct fileops vnops;
227 #if CONFIG_APPLEDOUBLE
228 extern errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *);
229 #endif /* CONFIG_APPLEDOUBLE */
230
231 /*
232 * Virtual File System System Calls
233 */
234
235 #if NFSCLIENT || DEVFS || ROUTEFS
236 /*
237 * Private in-kernel mounting spi (NFS only, not exported)
238 */
239 __private_extern__
240 boolean_t
241 vfs_iskernelmount(mount_t mp)
242 {
243 return ((mp->mnt_kern_flag & MNTK_KERNEL_MOUNT) ? TRUE : FALSE);
244 }
245
246 __private_extern__
247 int
248 kernel_mount(char *fstype, vnode_t pvp, vnode_t vp, const char *path,
249 void *data, __unused size_t datalen, int syscall_flags, __unused uint32_t kern_flags, vfs_context_t ctx)
250 {
251 struct nameidata nd;
252 boolean_t did_namei;
253 int error;
254
255 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
256 UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx);
257
258 /*
259 * Get the vnode to be covered if it's not supplied
260 */
261 if (vp == NULLVP) {
262 error = namei(&nd);
263 if (error)
264 return (error);
265 vp = nd.ni_vp;
266 pvp = nd.ni_dvp;
267 did_namei = TRUE;
268 } else {
269 char *pnbuf = CAST_DOWN(char *, path);
270
271 nd.ni_cnd.cn_pnbuf = pnbuf;
272 nd.ni_cnd.cn_pnlen = strlen(pnbuf) + 1;
273 did_namei = FALSE;
274 }
275
276 error = mount_common(fstype, pvp, vp, &nd.ni_cnd, CAST_USER_ADDR_T(data),
277 syscall_flags, kern_flags, NULL, TRUE, ctx);
278
279 if (did_namei) {
280 vnode_put(vp);
281 vnode_put(pvp);
282 nameidone(&nd);
283 }
284
285 return (error);
286 }
287 #endif /* NFSCLIENT || DEVFS */
288
289 /*
290 * Mount a file system.
291 */
292 /* ARGSUSED */
293 int
294 mount(proc_t p, struct mount_args *uap, __unused int32_t *retval)
295 {
296 struct __mac_mount_args muap;
297
298 muap.type = uap->type;
299 muap.path = uap->path;
300 muap.flags = uap->flags;
301 muap.data = uap->data;
302 muap.mac_p = USER_ADDR_NULL;
303 return (__mac_mount(p, &muap, retval));
304 }
305
306 void
307 vfs_notify_mount(vnode_t pdvp)
308 {
309 vfs_event_signal(NULL, VQ_MOUNT, (intptr_t)NULL);
310 lock_vnode_and_post(pdvp, NOTE_WRITE);
311 }
312
313 /*
314 * __mac_mount:
315 * Mount a file system taking into account MAC label behavior.
316 * See mount(2) man page for more information
317 *
318 * Parameters: p Process requesting the mount
319 * uap User argument descriptor (see below)
320 * retval (ignored)
321 *
322 * Indirect: uap->type Filesystem type
323 * uap->path Path to mount
324 * uap->data Mount arguments
325 * uap->mac_p MAC info
326 * uap->flags Mount flags
327 *
328 *
329 * Returns: 0 Success
330 * !0 Not success
331 */
332 boolean_t root_fs_upgrade_try = FALSE;
333
334 int
335 __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int32_t *retval)
336 {
337 vnode_t pvp = NULL;
338 vnode_t vp = NULL;
339 int need_nameidone = 0;
340 vfs_context_t ctx = vfs_context_current();
341 char fstypename[MFSNAMELEN];
342 struct nameidata nd;
343 size_t dummy=0;
344 char *labelstr = NULL;
345 int flags = uap->flags;
346 int error;
347 #if CONFIG_IMGSRC_ACCESS || CONFIG_MACF
348 boolean_t is_64bit = IS_64BIT_PROCESS(p);
349 #else
350 #pragma unused(p)
351 #endif
352 /*
353 * Get the fs type name from user space
354 */
355 error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy);
356 if (error)
357 return (error);
358
359 /*
360 * Get the vnode to be covered
361 */
362 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
363 UIO_USERSPACE, uap->path, ctx);
364 error = namei(&nd);
365 if (error) {
366 goto out;
367 }
368 need_nameidone = 1;
369 vp = nd.ni_vp;
370 pvp = nd.ni_dvp;
371
372 #ifdef CONFIG_IMGSRC_ACCESS
373 /* Mounting image source cannot be batched with other operations */
374 if (flags == MNT_IMGSRC_BY_INDEX) {
375 error = relocate_imageboot_source(pvp, vp, &nd.ni_cnd, fstypename,
376 ctx, is_64bit, uap->data, (flags == MNT_IMGSRC_BY_INDEX));
377 goto out;
378 }
379 #endif /* CONFIG_IMGSRC_ACCESS */
380
381 #if CONFIG_MACF
382 /*
383 * Get the label string (if any) from user space
384 */
385 if (uap->mac_p != USER_ADDR_NULL) {
386 struct user_mac mac;
387 size_t ulen = 0;
388
389 if (is_64bit) {
390 struct user64_mac mac64;
391 error = copyin(uap->mac_p, &mac64, sizeof(mac64));
392 mac.m_buflen = mac64.m_buflen;
393 mac.m_string = mac64.m_string;
394 } else {
395 struct user32_mac mac32;
396 error = copyin(uap->mac_p, &mac32, sizeof(mac32));
397 mac.m_buflen = mac32.m_buflen;
398 mac.m_string = mac32.m_string;
399 }
400 if (error)
401 goto out;
402 if ((mac.m_buflen > MAC_MAX_LABEL_BUF_LEN) ||
403 (mac.m_buflen < 2)) {
404 error = EINVAL;
405 goto out;
406 }
407 MALLOC(labelstr, char *, mac.m_buflen, M_MACTEMP, M_WAITOK);
408 error = copyinstr(mac.m_string, labelstr, mac.m_buflen, &ulen);
409 if (error) {
410 goto out;
411 }
412 AUDIT_ARG(mac_string, labelstr);
413 }
414 #endif /* CONFIG_MACF */
415
416 AUDIT_ARG(fflags, flags);
417
418 #if SECURE_KERNEL
419 if (flags & MNT_UNION) {
420 /* No union mounts on release kernels */
421 error = EPERM;
422 goto out;
423 }
424 #endif
425
426 if ((vp->v_flag & VROOT) &&
427 (vp->v_mount->mnt_flag & MNT_ROOTFS)) {
428 if (!(flags & MNT_UNION)) {
429 flags |= MNT_UPDATE;
430 }
431 else {
432 /*
433 * For a union mount on '/', treat it as fresh
434 * mount instead of update.
435 * Otherwise, union mouting on '/' used to panic the
436 * system before, since mnt_vnodecovered was found to
437 * be NULL for '/' which is required for unionlookup
438 * after it gets ENOENT on union mount.
439 */
440 flags = (flags & ~(MNT_UPDATE));
441 }
442
443 #if SECURE_KERNEL
444 if ((flags & MNT_RDONLY) == 0) {
445 /* Release kernels are not allowed to mount "/" as rw */
446 error = EPERM;
447 goto out;
448 }
449 #endif
450 /*
451 * See 7392553 for more details on why this check exists.
452 * Suffice to say: If this check is ON and something tries
453 * to mount the rootFS RW, we'll turn off the codesign
454 * bitmap optimization.
455 */
456 #if CHECK_CS_VALIDATION_BITMAP
457 if ((flags & MNT_RDONLY) == 0 ) {
458 root_fs_upgrade_try = TRUE;
459 }
460 #endif
461 }
462
463 error = mount_common(fstypename, pvp, vp, &nd.ni_cnd, uap->data, flags, 0,
464 labelstr, FALSE, ctx);
465
466 out:
467
468 #if CONFIG_MACF
469 if (labelstr)
470 FREE(labelstr, M_MACTEMP);
471 #endif /* CONFIG_MACF */
472
473 if (vp) {
474 vnode_put(vp);
475 }
476 if (pvp) {
477 vnode_put(pvp);
478 }
479 if (need_nameidone) {
480 nameidone(&nd);
481 }
482
483 return (error);
484 }
485
486 /*
487 * common mount implementation (final stage of mounting)
488
489 * Arguments:
490 * fstypename file system type (ie it's vfs name)
491 * pvp parent of covered vnode
492 * vp covered vnode
493 * cnp component name (ie path) of covered vnode
494 * flags generic mount flags
495 * fsmountargs file system specific data
496 * labelstr optional MAC label
497 * kernelmount TRUE for mounts initiated from inside the kernel
498 * ctx caller's context
499 */
500 static int
501 mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
502 struct componentname *cnp, user_addr_t fsmountargs, int flags, uint32_t internal_flags,
503 char *labelstr, boolean_t kernelmount, vfs_context_t ctx)
504 {
505 #if !CONFIG_MACF
506 #pragma unused(labelstr)
507 #endif
508 struct vnode *devvp = NULLVP;
509 struct vnode *device_vnode = NULLVP;
510 #if CONFIG_MACF
511 struct vnode *rvp;
512 #endif
513 struct mount *mp;
514 struct vfstable *vfsp = (struct vfstable *)0;
515 struct proc *p = vfs_context_proc(ctx);
516 int error, flag = 0;
517 user_addr_t devpath = USER_ADDR_NULL;
518 int ronly = 0;
519 int mntalloc = 0;
520 boolean_t vfsp_ref = FALSE;
521 boolean_t is_rwlock_locked = FALSE;
522 boolean_t did_rele = FALSE;
523 boolean_t have_usecount = FALSE;
524
525 /*
526 * Process an update for an existing mount
527 */
528 if (flags & MNT_UPDATE) {
529 if ((vp->v_flag & VROOT) == 0) {
530 error = EINVAL;
531 goto out1;
532 }
533 mp = vp->v_mount;
534
535 /* unmount in progress return error */
536 mount_lock_spin(mp);
537 if (mp->mnt_lflag & MNT_LUNMOUNT) {
538 mount_unlock(mp);
539 error = EBUSY;
540 goto out1;
541 }
542 mount_unlock(mp);
543 lck_rw_lock_exclusive(&mp->mnt_rwlock);
544 is_rwlock_locked = TRUE;
545 /*
546 * We only allow the filesystem to be reloaded if it
547 * is currently mounted read-only.
548 */
549 if ((flags & MNT_RELOAD) &&
550 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
551 error = ENOTSUP;
552 goto out1;
553 }
554
555 /*
556 * If content protection is enabled, update mounts are not
557 * allowed to turn it off.
558 */
559 if ((mp->mnt_flag & MNT_CPROTECT) &&
560 ((flags & MNT_CPROTECT) == 0)) {
561 error = EINVAL;
562 goto out1;
563 }
564
565 #ifdef CONFIG_IMGSRC_ACCESS
566 /* Can't downgrade the backer of the root FS */
567 if ((mp->mnt_kern_flag & MNTK_BACKS_ROOT) &&
568 (!vfs_isrdonly(mp)) && (flags & MNT_RDONLY)) {
569 error = ENOTSUP;
570 goto out1;
571 }
572 #endif /* CONFIG_IMGSRC_ACCESS */
573
574 /*
575 * Only root, or the user that did the original mount is
576 * permitted to update it.
577 */
578 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
579 (error = suser(vfs_context_ucred(ctx), &p->p_acflag))) {
580 goto out1;
581 }
582 #if CONFIG_MACF
583 error = mac_mount_check_remount(ctx, mp);
584 if (error != 0) {
585 goto out1;
586 }
587 #endif
588 /*
589 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
590 * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
591 */
592 if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
593 flags |= MNT_NOSUID | MNT_NODEV;
594 if (mp->mnt_flag & MNT_NOEXEC)
595 flags |= MNT_NOEXEC;
596 }
597 flag = mp->mnt_flag;
598
599
600
601 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
602
603 vfsp = mp->mnt_vtable;
604 goto update;
605 }
606 /*
607 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
608 * MNT_NOEXEC if mount point is already MNT_NOEXEC.
609 */
610 if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
611 flags |= MNT_NOSUID | MNT_NODEV;
612 if (vp->v_mount->mnt_flag & MNT_NOEXEC)
613 flags |= MNT_NOEXEC;
614 }
615
616 /* XXXAUDIT: Should we capture the type on the error path as well? */
617 AUDIT_ARG(text, fstypename);
618 mount_list_lock();
619 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
620 if (!strncmp(vfsp->vfc_name, fstypename, MFSNAMELEN)) {
621 vfsp->vfc_refcount++;
622 vfsp_ref = TRUE;
623 break;
624 }
625 mount_list_unlock();
626 if (vfsp == NULL) {
627 error = ENODEV;
628 goto out1;
629 }
630
631 /*
632 * VFC_VFSLOCALARGS is not currently supported for kernel mounts
633 */
634 if (kernelmount && (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS)) {
635 error = EINVAL; /* unsupported request */
636 goto out1;
637 }
638
639 error = prepare_coveredvp(vp, ctx, cnp, fstypename, ((internal_flags & KERNEL_MOUNT_NOAUTH) != 0));
640 if (error != 0) {
641 goto out1;
642 }
643
644 /*
645 * Allocate and initialize the filesystem (mount_t)
646 */
647 MALLOC_ZONE(mp, struct mount *, (u_int32_t)sizeof(struct mount),
648 M_MOUNT, M_WAITOK);
649 bzero((char *)mp, (u_int32_t)sizeof(struct mount));
650 mntalloc = 1;
651
652 /* Initialize the default IO constraints */
653 mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
654 mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
655 mp->mnt_maxsegreadsize = mp->mnt_maxreadcnt;
656 mp->mnt_maxsegwritesize = mp->mnt_maxwritecnt;
657 mp->mnt_devblocksize = DEV_BSIZE;
658 mp->mnt_alignmentmask = PAGE_MASK;
659 mp->mnt_ioqueue_depth = MNT_DEFAULT_IOQUEUE_DEPTH;
660 mp->mnt_ioscale = 1;
661 mp->mnt_ioflags = 0;
662 mp->mnt_realrootvp = NULLVP;
663 mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL;
664
665 TAILQ_INIT(&mp->mnt_vnodelist);
666 TAILQ_INIT(&mp->mnt_workerqueue);
667 TAILQ_INIT(&mp->mnt_newvnodes);
668 mount_lock_init(mp);
669 lck_rw_lock_exclusive(&mp->mnt_rwlock);
670 is_rwlock_locked = TRUE;
671 mp->mnt_op = vfsp->vfc_vfsops;
672 mp->mnt_vtable = vfsp;
673 //mp->mnt_stat.f_type = vfsp->vfc_typenum;
674 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
675 strlcpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN);
676 strlcpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
677 mp->mnt_vnodecovered = vp;
678 mp->mnt_vfsstat.f_owner = kauth_cred_getuid(vfs_context_ucred(ctx));
679 mp->mnt_throttle_mask = LOWPRI_MAX_NUM_DEV - 1;
680 mp->mnt_devbsdunit = 0;
681
682 /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
683 vfs_setowner(mp, KAUTH_UID_NONE, KAUTH_GID_NONE);
684
685 #if NFSCLIENT || DEVFS || ROUTEFS
686 if (kernelmount)
687 mp->mnt_kern_flag |= MNTK_KERNEL_MOUNT;
688 if ((internal_flags & KERNEL_MOUNT_PERMIT_UNMOUNT) != 0)
689 mp->mnt_kern_flag |= MNTK_PERMIT_UNMOUNT;
690 #endif /* NFSCLIENT || DEVFS */
691
692 update:
693 /*
694 * Set the mount level flags.
695 */
696 if (flags & MNT_RDONLY)
697 mp->mnt_flag |= MNT_RDONLY;
698 else if (mp->mnt_flag & MNT_RDONLY) {
699 // disallow read/write upgrades of file systems that
700 // had the TYPENAME_OVERRIDE feature set.
701 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
702 error = EPERM;
703 goto out1;
704 }
705 mp->mnt_kern_flag |= MNTK_WANTRDWR;
706 }
707 mp->mnt_flag &= ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
708 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
709 MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
710 MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
711 MNT_QUARANTINE | MNT_CPROTECT);
712 mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
713 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
714 MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
715 MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
716 MNT_QUARANTINE | MNT_CPROTECT);
717
718 #if CONFIG_MACF
719 if (flags & MNT_MULTILABEL) {
720 if (vfsp->vfc_vfsflags & VFC_VFSNOMACLABEL) {
721 error = EINVAL;
722 goto out1;
723 }
724 mp->mnt_flag |= MNT_MULTILABEL;
725 }
726 #endif
727 /*
728 * Process device path for local file systems if requested
729 */
730 if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS &&
731 !(internal_flags & KERNEL_MOUNT_SNAPSHOT)) {
732 if (vfs_context_is64bit(ctx)) {
733 if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
734 goto out1;
735 fsmountargs += sizeof(devpath);
736 } else {
737 user32_addr_t tmp;
738 if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
739 goto out1;
740 /* munge into LP64 addr */
741 devpath = CAST_USER_ADDR_T(tmp);
742 fsmountargs += sizeof(tmp);
743 }
744
745 /* Lookup device and authorize access to it */
746 if ((devpath)) {
747 struct nameidata nd;
748
749 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW, UIO_USERSPACE, devpath, ctx);
750 if ( (error = namei(&nd)) )
751 goto out1;
752
753 strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
754 devvp = nd.ni_vp;
755
756 nameidone(&nd);
757
758 if (devvp->v_type != VBLK) {
759 error = ENOTBLK;
760 goto out2;
761 }
762 if (major(devvp->v_rdev) >= nblkdev) {
763 error = ENXIO;
764 goto out2;
765 }
766 /*
767 * If mount by non-root, then verify that user has necessary
768 * permissions on the device.
769 */
770 if (suser(vfs_context_ucred(ctx), NULL) != 0) {
771 mode_t accessmode = KAUTH_VNODE_READ_DATA;
772
773 if ((mp->mnt_flag & MNT_RDONLY) == 0)
774 accessmode |= KAUTH_VNODE_WRITE_DATA;
775 if ((error = vnode_authorize(devvp, NULL, accessmode, ctx)) != 0)
776 goto out2;
777 }
778 }
779 /* On first mount, preflight and open device */
780 if (devpath && ((flags & MNT_UPDATE) == 0)) {
781 if ( (error = vnode_ref(devvp)) )
782 goto out2;
783 /*
784 * Disallow multiple mounts of the same device.
785 * Disallow mounting of a device that is currently in use
786 * (except for root, which might share swap device for miniroot).
787 * Flush out any old buffers remaining from a previous use.
788 */
789 if ( (error = vfs_mountedon(devvp)) )
790 goto out3;
791
792 if (vcount(devvp) > 1 && !(vfs_flags(mp) & MNT_ROOTFS)) {
793 error = EBUSY;
794 goto out3;
795 }
796 if ( (error = VNOP_FSYNC(devvp, MNT_WAIT, ctx)) ) {
797 error = ENOTBLK;
798 goto out3;
799 }
800 if ( (error = buf_invalidateblks(devvp, BUF_WRITE_DATA, 0, 0)) )
801 goto out3;
802
803 ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
804 #if CONFIG_MACF
805 error = mac_vnode_check_open(ctx,
806 devvp,
807 ronly ? FREAD : FREAD|FWRITE);
808 if (error)
809 goto out3;
810 #endif /* MAC */
811 if ( (error = VNOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, ctx)) )
812 goto out3;
813
814 mp->mnt_devvp = devvp;
815 device_vnode = devvp;
816
817 } else if ((mp->mnt_flag & MNT_RDONLY) &&
818 (mp->mnt_kern_flag & MNTK_WANTRDWR) &&
819 (device_vnode = mp->mnt_devvp)) {
820 dev_t dev;
821 int maj;
822 /*
823 * If upgrade to read-write by non-root, then verify
824 * that user has necessary permissions on the device.
825 */
826 vnode_getalways(device_vnode);
827
828 if (suser(vfs_context_ucred(ctx), NULL) &&
829 (error = vnode_authorize(device_vnode, NULL,
830 KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA,
831 ctx)) != 0) {
832 vnode_put(device_vnode);
833 goto out2;
834 }
835
836 /* Tell the device that we're upgrading */
837 dev = (dev_t)device_vnode->v_rdev;
838 maj = major(dev);
839
840 if ((u_int)maj >= (u_int)nblkdev)
841 panic("Volume mounted on a device with invalid major number.");
842
843 error = bdevsw[maj].d_open(dev, FREAD | FWRITE, S_IFBLK, p);
844 vnode_put(device_vnode);
845 device_vnode = NULLVP;
846 if (error != 0) {
847 goto out2;
848 }
849 }
850 }
851 #if CONFIG_MACF
852 if ((flags & MNT_UPDATE) == 0) {
853 mac_mount_label_init(mp);
854 mac_mount_label_associate(ctx, mp);
855 }
856 if (labelstr) {
857 if ((flags & MNT_UPDATE) != 0) {
858 error = mac_mount_check_label_update(ctx, mp);
859 if (error != 0)
860 goto out3;
861 }
862 }
863 #endif
864 /*
865 * Mount the filesystem.
866 */
867 if (internal_flags & KERNEL_MOUNT_SNAPSHOT) {
868 error = VFS_IOCTL(mp, VFSIOC_MOUNT_SNAPSHOT,
869 (caddr_t)fsmountargs, 0, ctx);
870 } else {
871 error = VFS_MOUNT(mp, device_vnode, fsmountargs, ctx);
872 }
873
874 if (flags & MNT_UPDATE) {
875 if (mp->mnt_kern_flag & MNTK_WANTRDWR)
876 mp->mnt_flag &= ~MNT_RDONLY;
877 mp->mnt_flag &=~
878 (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
879 mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
880 if (error)
881 mp->mnt_flag = flag; /* restore flag value */
882 vfs_event_signal(NULL, VQ_UPDATE, (intptr_t)NULL);
883 lck_rw_done(&mp->mnt_rwlock);
884 is_rwlock_locked = FALSE;
885 if (!error)
886 enablequotas(mp, ctx);
887 goto exit;
888 }
889
890 /*
891 * Put the new filesystem on the mount list after root.
892 */
893 if (error == 0) {
894 struct vfs_attr vfsattr;
895 #if CONFIG_MACF
896 if (vfs_flags(mp) & MNT_MULTILABEL) {
897 error = VFS_ROOT(mp, &rvp, ctx);
898 if (error) {
899 printf("%s() VFS_ROOT returned %d\n", __func__, error);
900 goto out3;
901 }
902 error = vnode_label(mp, NULL, rvp, NULL, 0, ctx);
903 /*
904 * drop reference provided by VFS_ROOT
905 */
906 vnode_put(rvp);
907
908 if (error)
909 goto out3;
910 }
911 #endif /* MAC */
912
913 vnode_lock_spin(vp);
914 CLR(vp->v_flag, VMOUNT);
915 vp->v_mountedhere = mp;
916 vnode_unlock(vp);
917
918 /*
919 * taking the name_cache_lock exclusively will
920 * insure that everyone is out of the fast path who
921 * might be trying to use a now stale copy of
922 * vp->v_mountedhere->mnt_realrootvp
923 * bumping mount_generation causes the cached values
924 * to be invalidated
925 */
926 name_cache_lock();
927 mount_generation++;
928 name_cache_unlock();
929
930 error = vnode_ref(vp);
931 if (error != 0) {
932 goto out4;
933 }
934
935 have_usecount = TRUE;
936
937 error = checkdirs(vp, ctx);
938 if (error != 0) {
939 /* Unmount the filesystem as cdir/rdirs cannot be updated */
940 goto out4;
941 }
942 /*
943 * there is no cleanup code here so I have made it void
944 * we need to revisit this
945 */
946 (void)VFS_START(mp, 0, ctx);
947
948 if (mount_list_add(mp) != 0) {
949 /*
950 * The system is shutting down trying to umount
951 * everything, so fail with a plausible errno.
952 */
953 error = EBUSY;
954 goto out4;
955 }
956 lck_rw_done(&mp->mnt_rwlock);
957 is_rwlock_locked = FALSE;
958
959 /* Check if this mounted file system supports EAs or named streams. */
960 /* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */
961 VFSATTR_INIT(&vfsattr);
962 VFSATTR_WANTED(&vfsattr, f_capabilities);
963 if (strncmp(mp->mnt_vfsstat.f_fstypename, "webdav", sizeof("webdav")) != 0 &&
964 vfs_getattr(mp, &vfsattr, ctx) == 0 &&
965 VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) {
966 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR) &&
967 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR)) {
968 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
969 }
970 #if NAMEDSTREAMS
971 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS) &&
972 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS)) {
973 mp->mnt_kern_flag |= MNTK_NAMED_STREAMS;
974 }
975 #endif
976 /* Check if this file system supports path from id lookups. */
977 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID) &&
978 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID)) {
979 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
980 } else if (mp->mnt_flag & MNT_DOVOLFS) {
981 /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */
982 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
983 }
984
985 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_DIR_HARDLINKS) &&
986 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_DIR_HARDLINKS)) {
987 mp->mnt_kern_flag |= MNTK_DIR_HARDLINKS;
988 }
989 }
990 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSNATIVEXATTR) {
991 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
992 }
993 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSPREFLIGHT) {
994 mp->mnt_kern_flag |= MNTK_UNMOUNT_PREFLIGHT;
995 }
996 /* increment the operations count */
997 OSAddAtomic(1, &vfs_nummntops);
998 enablequotas(mp, ctx);
999
1000 if (device_vnode) {
1001 device_vnode->v_specflags |= SI_MOUNTEDON;
1002
1003 /*
1004 * cache the IO attributes for the underlying physical media...
1005 * an error return indicates the underlying driver doesn't
1006 * support all the queries necessary... however, reasonable
1007 * defaults will have been set, so no reason to bail or care
1008 */
1009 vfs_init_io_attributes(device_vnode, mp);
1010 }
1011
1012 /* Now that mount is setup, notify the listeners */
1013 vfs_notify_mount(pvp);
1014 IOBSDMountChange(mp, kIOMountChangeMount);
1015
1016 } else {
1017 /* If we fail a fresh mount, there should be no vnodes left hooked into the mountpoint. */
1018 if (mp->mnt_vnodelist.tqh_first != NULL) {
1019 panic("mount_common(): mount of %s filesystem failed with %d, but vnode list is not empty.",
1020 mp->mnt_vtable->vfc_name, error);
1021 }
1022
1023 vnode_lock_spin(vp);
1024 CLR(vp->v_flag, VMOUNT);
1025 vnode_unlock(vp);
1026 mount_list_lock();
1027 mp->mnt_vtable->vfc_refcount--;
1028 mount_list_unlock();
1029
1030 if (device_vnode ) {
1031 vnode_rele(device_vnode);
1032 VNOP_CLOSE(device_vnode, ronly ? FREAD : FREAD|FWRITE, ctx);
1033 }
1034 lck_rw_done(&mp->mnt_rwlock);
1035 is_rwlock_locked = FALSE;
1036
1037 /*
1038 * if we get here, we have a mount structure that needs to be freed,
1039 * but since the coveredvp hasn't yet been updated to point at it,
1040 * no need to worry about other threads holding a crossref on this mp
1041 * so it's ok to just free it
1042 */
1043 mount_lock_destroy(mp);
1044 #if CONFIG_MACF
1045 mac_mount_label_destroy(mp);
1046 #endif
1047 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1048 }
1049 exit:
1050 /*
1051 * drop I/O count on the device vp if there was one
1052 */
1053 if (devpath && devvp)
1054 vnode_put(devvp);
1055
1056 return(error);
1057
1058 /* Error condition exits */
1059 out4:
1060 (void)VFS_UNMOUNT(mp, MNT_FORCE, ctx);
1061
1062 /*
1063 * If the mount has been placed on the covered vp,
1064 * it may have been discovered by now, so we have
1065 * to treat this just like an unmount
1066 */
1067 mount_lock_spin(mp);
1068 mp->mnt_lflag |= MNT_LDEAD;
1069 mount_unlock(mp);
1070
1071 if (device_vnode != NULLVP) {
1072 vnode_rele(device_vnode);
1073 VNOP_CLOSE(device_vnode, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
1074 ctx);
1075 did_rele = TRUE;
1076 }
1077
1078 vnode_lock_spin(vp);
1079
1080 mp->mnt_crossref++;
1081 vp->v_mountedhere = (mount_t) 0;
1082
1083 vnode_unlock(vp);
1084
1085 if (have_usecount) {
1086 vnode_rele(vp);
1087 }
1088 out3:
1089 if (devpath && ((flags & MNT_UPDATE) == 0) && (!did_rele))
1090 vnode_rele(devvp);
1091 out2:
1092 if (devpath && devvp)
1093 vnode_put(devvp);
1094 out1:
1095 /* Release mnt_rwlock only when it was taken */
1096 if (is_rwlock_locked == TRUE) {
1097 lck_rw_done(&mp->mnt_rwlock);
1098 }
1099
1100 if (mntalloc) {
1101 if (mp->mnt_crossref)
1102 mount_dropcrossref(mp, vp, 0);
1103 else {
1104 mount_lock_destroy(mp);
1105 #if CONFIG_MACF
1106 mac_mount_label_destroy(mp);
1107 #endif
1108 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1109 }
1110 }
1111 if (vfsp_ref) {
1112 mount_list_lock();
1113 vfsp->vfc_refcount--;
1114 mount_list_unlock();
1115 }
1116
1117 return(error);
1118 }
1119
1120 /*
1121 * Flush in-core data, check for competing mount attempts,
1122 * and set VMOUNT
1123 */
1124 int
1125 prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth)
1126 {
1127 #if !CONFIG_MACF
1128 #pragma unused(cnp,fsname)
1129 #endif
1130 struct vnode_attr va;
1131 int error;
1132
1133 if (!skip_auth) {
1134 /*
1135 * If the user is not root, ensure that they own the directory
1136 * onto which we are attempting to mount.
1137 */
1138 VATTR_INIT(&va);
1139 VATTR_WANTED(&va, va_uid);
1140 if ((error = vnode_getattr(vp, &va, ctx)) ||
1141 (va.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
1142 (!vfs_context_issuser(ctx)))) {
1143 error = EPERM;
1144 goto out;
1145 }
1146 }
1147
1148 if ( (error = VNOP_FSYNC(vp, MNT_WAIT, ctx)) )
1149 goto out;
1150
1151 if ( (error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0)) )
1152 goto out;
1153
1154 if (vp->v_type != VDIR) {
1155 error = ENOTDIR;
1156 goto out;
1157 }
1158
1159 if (ISSET(vp->v_flag, VMOUNT) && (vp->v_mountedhere != NULL)) {
1160 error = EBUSY;
1161 goto out;
1162 }
1163
1164 #if CONFIG_MACF
1165 error = mac_mount_check_mount(ctx, vp,
1166 cnp, fsname);
1167 if (error != 0)
1168 goto out;
1169 #endif
1170
1171 vnode_lock_spin(vp);
1172 SET(vp->v_flag, VMOUNT);
1173 vnode_unlock(vp);
1174
1175 out:
1176 return error;
1177 }
1178
1179 #if CONFIG_IMGSRC_ACCESS
1180
1181 #if DEBUG
1182 #define IMGSRC_DEBUG(args...) printf(args)
1183 #else
1184 #define IMGSRC_DEBUG(args...) do { } while(0)
1185 #endif
1186
1187 static int
1188 authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx)
1189 {
1190 struct nameidata nd;
1191 vnode_t vp, realdevvp;
1192 mode_t accessmode;
1193 int error;
1194
1195 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW, UIO_USERSPACE, devpath, ctx);
1196 if ( (error = namei(&nd)) ) {
1197 IMGSRC_DEBUG("namei() failed with %d\n", error);
1198 return error;
1199 }
1200
1201 vp = nd.ni_vp;
1202
1203 if (!vnode_isblk(vp)) {
1204 IMGSRC_DEBUG("Not block device.\n");
1205 error = ENOTBLK;
1206 goto out;
1207 }
1208
1209 realdevvp = mp->mnt_devvp;
1210 if (realdevvp == NULLVP) {
1211 IMGSRC_DEBUG("No device backs the mount.\n");
1212 error = ENXIO;
1213 goto out;
1214 }
1215
1216 error = vnode_getwithref(realdevvp);
1217 if (error != 0) {
1218 IMGSRC_DEBUG("Coudn't get iocount on device.\n");
1219 goto out;
1220 }
1221
1222 if (vnode_specrdev(vp) != vnode_specrdev(realdevvp)) {
1223 IMGSRC_DEBUG("Wrong dev_t.\n");
1224 error = ENXIO;
1225 goto out1;
1226 }
1227
1228 strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
1229
1230 /*
1231 * If mount by non-root, then verify that user has necessary
1232 * permissions on the device.
1233 */
1234 if (!vfs_context_issuser(ctx)) {
1235 accessmode = KAUTH_VNODE_READ_DATA;
1236 if ((mp->mnt_flag & MNT_RDONLY) == 0)
1237 accessmode |= KAUTH_VNODE_WRITE_DATA;
1238 if ((error = vnode_authorize(vp, NULL, accessmode, ctx)) != 0) {
1239 IMGSRC_DEBUG("Access denied.\n");
1240 goto out1;
1241 }
1242 }
1243
1244 *devvpp = vp;
1245
1246 out1:
1247 vnode_put(realdevvp);
1248 out:
1249 nameidone(&nd);
1250 if (error) {
1251 vnode_put(vp);
1252 }
1253
1254 return error;
1255 }
1256
1257 /*
1258 * Clear VMOUNT, set v_mountedhere, and mnt_vnodecovered, ref the vnode,
1259 * and call checkdirs()
1260 */
1261 static int
1262 place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx)
1263 {
1264 int error;
1265
1266 mp->mnt_vnodecovered = vp; /* XXX This is normally only set at init-time ... */
1267
1268 vnode_lock_spin(vp);
1269 CLR(vp->v_flag, VMOUNT);
1270 vp->v_mountedhere = mp;
1271 vnode_unlock(vp);
1272
1273 /*
1274 * taking the name_cache_lock exclusively will
1275 * insure that everyone is out of the fast path who
1276 * might be trying to use a now stale copy of
1277 * vp->v_mountedhere->mnt_realrootvp
1278 * bumping mount_generation causes the cached values
1279 * to be invalidated
1280 */
1281 name_cache_lock();
1282 mount_generation++;
1283 name_cache_unlock();
1284
1285 error = vnode_ref(vp);
1286 if (error != 0) {
1287 goto out;
1288 }
1289
1290 error = checkdirs(vp, ctx);
1291 if (error != 0) {
1292 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1293 vnode_rele(vp);
1294 goto out;
1295 }
1296
1297 out:
1298 if (error != 0) {
1299 mp->mnt_vnodecovered = NULLVP;
1300 }
1301 return error;
1302 }
1303
1304 static void
1305 undo_place_on_covered_vp(mount_t mp, vnode_t vp)
1306 {
1307 vnode_rele(vp);
1308 vnode_lock_spin(vp);
1309 vp->v_mountedhere = (mount_t)NULL;
1310 vnode_unlock(vp);
1311
1312 mp->mnt_vnodecovered = NULLVP;
1313 }
1314
1315 static int
1316 mount_begin_update(mount_t mp, vfs_context_t ctx, int flags)
1317 {
1318 int error;
1319
1320 /* unmount in progress return error */
1321 mount_lock_spin(mp);
1322 if (mp->mnt_lflag & MNT_LUNMOUNT) {
1323 mount_unlock(mp);
1324 return EBUSY;
1325 }
1326 mount_unlock(mp);
1327 lck_rw_lock_exclusive(&mp->mnt_rwlock);
1328
1329 /*
1330 * We only allow the filesystem to be reloaded if it
1331 * is currently mounted read-only.
1332 */
1333 if ((flags & MNT_RELOAD) &&
1334 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
1335 error = ENOTSUP;
1336 goto out;
1337 }
1338
1339 /*
1340 * Only root, or the user that did the original mount is
1341 * permitted to update it.
1342 */
1343 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
1344 (!vfs_context_issuser(ctx))) {
1345 error = EPERM;
1346 goto out;
1347 }
1348 #if CONFIG_MACF
1349 error = mac_mount_check_remount(ctx, mp);
1350 if (error != 0) {
1351 goto out;
1352 }
1353 #endif
1354
1355 out:
1356 if (error) {
1357 lck_rw_done(&mp->mnt_rwlock);
1358 }
1359
1360 return error;
1361 }
1362
1363 static void
1364 mount_end_update(mount_t mp)
1365 {
1366 lck_rw_done(&mp->mnt_rwlock);
1367 }
1368
1369 static int
1370 get_imgsrc_rootvnode(uint32_t height, vnode_t *rvpp)
1371 {
1372 vnode_t vp;
1373
1374 if (height >= MAX_IMAGEBOOT_NESTING) {
1375 return EINVAL;
1376 }
1377
1378 vp = imgsrc_rootvnodes[height];
1379 if ((vp != NULLVP) && (vnode_get(vp) == 0)) {
1380 *rvpp = vp;
1381 return 0;
1382 } else {
1383 return ENOENT;
1384 }
1385 }
1386
1387 static int
1388 relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp,
1389 const char *fsname, vfs_context_t ctx,
1390 boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index)
1391 {
1392 int error;
1393 mount_t mp;
1394 boolean_t placed = FALSE;
1395 vnode_t devvp = NULLVP;
1396 struct vfstable *vfsp;
1397 user_addr_t devpath;
1398 char *old_mntonname;
1399 vnode_t rvp;
1400 uint32_t height;
1401 uint32_t flags;
1402
1403 /* If we didn't imageboot, nothing to move */
1404 if (imgsrc_rootvnodes[0] == NULLVP) {
1405 return EINVAL;
1406 }
1407
1408 /* Only root can do this */
1409 if (!vfs_context_issuser(ctx)) {
1410 return EPERM;
1411 }
1412
1413 IMGSRC_DEBUG("looking for root vnode.\n");
1414
1415 /*
1416 * Get root vnode of filesystem we're moving.
1417 */
1418 if (by_index) {
1419 if (is64bit) {
1420 struct user64_mnt_imgsrc_args mia64;
1421 error = copyin(fsmountargs, &mia64, sizeof(mia64));
1422 if (error != 0) {
1423 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1424 return error;
1425 }
1426
1427 height = mia64.mi_height;
1428 flags = mia64.mi_flags;
1429 devpath = mia64.mi_devpath;
1430 } else {
1431 struct user32_mnt_imgsrc_args mia32;
1432 error = copyin(fsmountargs, &mia32, sizeof(mia32));
1433 if (error != 0) {
1434 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1435 return error;
1436 }
1437
1438 height = mia32.mi_height;
1439 flags = mia32.mi_flags;
1440 devpath = mia32.mi_devpath;
1441 }
1442 } else {
1443 /*
1444 * For binary compatibility--assumes one level of nesting.
1445 */
1446 if (is64bit) {
1447 if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
1448 return error;
1449 } else {
1450 user32_addr_t tmp;
1451 if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
1452 return error;
1453
1454 /* munge into LP64 addr */
1455 devpath = CAST_USER_ADDR_T(tmp);
1456 }
1457
1458 height = 0;
1459 flags = 0;
1460 }
1461
1462 if (flags != 0) {
1463 IMGSRC_DEBUG("%s: Got nonzero flags.\n", __FUNCTION__);
1464 return EINVAL;
1465 }
1466
1467 error = get_imgsrc_rootvnode(height, &rvp);
1468 if (error != 0) {
1469 IMGSRC_DEBUG("getting root vnode failed with %d\n", error);
1470 return error;
1471 }
1472
1473 IMGSRC_DEBUG("got root vnode.\n");
1474
1475 MALLOC(old_mntonname, char*, MAXPATHLEN, M_TEMP, M_WAITOK);
1476
1477 /* Can only move once */
1478 mp = vnode_mount(rvp);
1479 if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
1480 IMGSRC_DEBUG("Already moved.\n");
1481 error = EBUSY;
1482 goto out0;
1483 }
1484
1485 IMGSRC_DEBUG("Starting updated.\n");
1486
1487 /* Get exclusive rwlock on mount, authorize update on mp */
1488 error = mount_begin_update(mp , ctx, 0);
1489 if (error != 0) {
1490 IMGSRC_DEBUG("Starting updated failed with %d\n", error);
1491 goto out0;
1492 }
1493
1494 /*
1495 * It can only be moved once. Flag is set under the rwlock,
1496 * so we're now safe to proceed.
1497 */
1498 if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
1499 IMGSRC_DEBUG("Already moved [2]\n");
1500 goto out1;
1501 }
1502
1503
1504 IMGSRC_DEBUG("Preparing coveredvp.\n");
1505
1506 /* Mark covered vnode as mount in progress, authorize placing mount on top */
1507 error = prepare_coveredvp(vp, ctx, cnp, fsname, FALSE);
1508 if (error != 0) {
1509 IMGSRC_DEBUG("Preparing coveredvp failed with %d.\n", error);
1510 goto out1;
1511 }
1512
1513 IMGSRC_DEBUG("Covered vp OK.\n");
1514
1515 /* Sanity check the name caller has provided */
1516 vfsp = mp->mnt_vtable;
1517 if (strncmp(vfsp->vfc_name, fsname, MFSNAMELEN) != 0) {
1518 IMGSRC_DEBUG("Wrong fs name.\n");
1519 error = EINVAL;
1520 goto out2;
1521 }
1522
1523 /* Check the device vnode and update mount-from name, for local filesystems */
1524 if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
1525 IMGSRC_DEBUG("Local, doing device validation.\n");
1526
1527 if (devpath != USER_ADDR_NULL) {
1528 error = authorize_devpath_and_update_mntfromname(mp, devpath, &devvp, ctx);
1529 if (error) {
1530 IMGSRC_DEBUG("authorize_devpath_and_update_mntfromname() failed.\n");
1531 goto out2;
1532 }
1533
1534 vnode_put(devvp);
1535 }
1536 }
1537
1538 /*
1539 * Place mp on top of vnode, ref the vnode, call checkdirs(),
1540 * and increment the name cache's mount generation
1541 */
1542
1543 IMGSRC_DEBUG("About to call place_mount_and_checkdirs().\n");
1544 error = place_mount_and_checkdirs(mp, vp, ctx);
1545 if (error != 0) {
1546 goto out2;
1547 }
1548
1549 placed = TRUE;
1550
1551 strlcpy(old_mntonname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN);
1552 strlcpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
1553
1554 /* Forbid future moves */
1555 mount_lock(mp);
1556 mp->mnt_kern_flag |= MNTK_HAS_MOVED;
1557 mount_unlock(mp);
1558
1559 /* Finally, add to mount list, completely ready to go */
1560 if (mount_list_add(mp) != 0) {
1561 /*
1562 * The system is shutting down trying to umount
1563 * everything, so fail with a plausible errno.
1564 */
1565 error = EBUSY;
1566 goto out3;
1567 }
1568
1569 mount_end_update(mp);
1570 vnode_put(rvp);
1571 FREE(old_mntonname, M_TEMP);
1572
1573 vfs_notify_mount(pvp);
1574
1575 return 0;
1576 out3:
1577 strlcpy(mp->mnt_vfsstat.f_mntonname, old_mntonname, MAXPATHLEN);
1578
1579 mount_lock(mp);
1580 mp->mnt_kern_flag &= ~(MNTK_HAS_MOVED);
1581 mount_unlock(mp);
1582
1583 out2:
1584 /*
1585 * Placing the mp on the vnode clears VMOUNT,
1586 * so cleanup is different after that point
1587 */
1588 if (placed) {
1589 /* Rele the vp, clear VMOUNT and v_mountedhere */
1590 undo_place_on_covered_vp(mp, vp);
1591 } else {
1592 vnode_lock_spin(vp);
1593 CLR(vp->v_flag, VMOUNT);
1594 vnode_unlock(vp);
1595 }
1596 out1:
1597 mount_end_update(mp);
1598
1599 out0:
1600 vnode_put(rvp);
1601 FREE(old_mntonname, M_TEMP);
1602 return error;
1603 }
1604
1605 #endif /* CONFIG_IMGSRC_ACCESS */
1606
1607 void
1608 enablequotas(struct mount *mp, vfs_context_t ctx)
1609 {
1610 struct nameidata qnd;
1611 int type;
1612 char qfpath[MAXPATHLEN];
1613 const char *qfname = QUOTAFILENAME;
1614 const char *qfopsname = QUOTAOPSNAME;
1615 const char *qfextension[] = INITQFNAMES;
1616
1617 /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
1618 if (strncmp(mp->mnt_vfsstat.f_fstypename, "hfs", sizeof("hfs")) != 0 ) {
1619 return;
1620 }
1621 /*
1622 * Enable filesystem disk quotas if necessary.
1623 * We ignore errors as this should not interfere with final mount
1624 */
1625 for (type=0; type < MAXQUOTAS; type++) {
1626 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfopsname, qfextension[type]);
1627 NDINIT(&qnd, LOOKUP, OP_MOUNT, FOLLOW, UIO_SYSSPACE,
1628 CAST_USER_ADDR_T(qfpath), ctx);
1629 if (namei(&qnd) != 0)
1630 continue; /* option file to trigger quotas is not present */
1631 vnode_put(qnd.ni_vp);
1632 nameidone(&qnd);
1633 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfname, qfextension[type]);
1634
1635 (void) VFS_QUOTACTL(mp, QCMD(Q_QUOTAON, type), 0, qfpath, ctx);
1636 }
1637 return;
1638 }
1639
1640
1641 static int
1642 checkdirs_callback(proc_t p, void * arg)
1643 {
1644 struct cdirargs * cdrp = (struct cdirargs * )arg;
1645 vnode_t olddp = cdrp->olddp;
1646 vnode_t newdp = cdrp->newdp;
1647 struct filedesc *fdp;
1648 vnode_t tvp;
1649 vnode_t fdp_cvp;
1650 vnode_t fdp_rvp;
1651 int cdir_changed = 0;
1652 int rdir_changed = 0;
1653
1654 /*
1655 * XXX Also needs to iterate each thread in the process to see if it
1656 * XXX is using a per-thread current working directory, and, if so,
1657 * XXX update that as well.
1658 */
1659
1660 proc_fdlock(p);
1661 fdp = p->p_fd;
1662 if (fdp == (struct filedesc *)0) {
1663 proc_fdunlock(p);
1664 return(PROC_RETURNED);
1665 }
1666 fdp_cvp = fdp->fd_cdir;
1667 fdp_rvp = fdp->fd_rdir;
1668 proc_fdunlock(p);
1669
1670 if (fdp_cvp == olddp) {
1671 vnode_ref(newdp);
1672 tvp = fdp->fd_cdir;
1673 fdp_cvp = newdp;
1674 cdir_changed = 1;
1675 vnode_rele(tvp);
1676 }
1677 if (fdp_rvp == olddp) {
1678 vnode_ref(newdp);
1679 tvp = fdp->fd_rdir;
1680 fdp_rvp = newdp;
1681 rdir_changed = 1;
1682 vnode_rele(tvp);
1683 }
1684 if (cdir_changed || rdir_changed) {
1685 proc_fdlock(p);
1686 fdp->fd_cdir = fdp_cvp;
1687 fdp->fd_rdir = fdp_rvp;
1688 proc_fdunlock(p);
1689 }
1690 return(PROC_RETURNED);
1691 }
1692
1693
1694
1695 /*
1696 * Scan all active processes to see if any of them have a current
1697 * or root directory onto which the new filesystem has just been
1698 * mounted. If so, replace them with the new mount point.
1699 */
1700 static int
1701 checkdirs(vnode_t olddp, vfs_context_t ctx)
1702 {
1703 vnode_t newdp;
1704 vnode_t tvp;
1705 int err;
1706 struct cdirargs cdr;
1707
1708 if (olddp->v_usecount == 1)
1709 return(0);
1710 err = VFS_ROOT(olddp->v_mountedhere, &newdp, ctx);
1711
1712 if (err != 0) {
1713 #if DIAGNOSTIC
1714 panic("mount: lost mount: error %d", err);
1715 #endif
1716 return(err);
1717 }
1718
1719 cdr.olddp = olddp;
1720 cdr.newdp = newdp;
1721 /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
1722 proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS, checkdirs_callback, (void *)&cdr, NULL, NULL);
1723
1724 if (rootvnode == olddp) {
1725 vnode_ref(newdp);
1726 tvp = rootvnode;
1727 rootvnode = newdp;
1728 vnode_rele(tvp);
1729 }
1730
1731 vnode_put(newdp);
1732 return(0);
1733 }
1734
1735 /*
1736 * Unmount a file system.
1737 *
1738 * Note: unmount takes a path to the vnode mounted on as argument,
1739 * not special file (as before).
1740 */
1741 /* ARGSUSED */
1742 int
1743 unmount(__unused proc_t p, struct unmount_args *uap, __unused int32_t *retval)
1744 {
1745 vnode_t vp;
1746 struct mount *mp;
1747 int error;
1748 struct nameidata nd;
1749 vfs_context_t ctx = vfs_context_current();
1750
1751 NDINIT(&nd, LOOKUP, OP_UNMOUNT, FOLLOW | AUDITVNPATH1,
1752 UIO_USERSPACE, uap->path, ctx);
1753 error = namei(&nd);
1754 if (error)
1755 return (error);
1756 vp = nd.ni_vp;
1757 mp = vp->v_mount;
1758 nameidone(&nd);
1759
1760 #if CONFIG_MACF
1761 error = mac_mount_check_umount(ctx, mp);
1762 if (error != 0) {
1763 vnode_put(vp);
1764 return (error);
1765 }
1766 #endif
1767 /*
1768 * Must be the root of the filesystem
1769 */
1770 if ((vp->v_flag & VROOT) == 0) {
1771 vnode_put(vp);
1772 return (EINVAL);
1773 }
1774 mount_ref(mp, 0);
1775 vnode_put(vp);
1776 /* safedounmount consumes the mount ref */
1777 return (safedounmount(mp, uap->flags, ctx));
1778 }
1779
1780 int
1781 vfs_unmountbyfsid(fsid_t *fsid, int flags, vfs_context_t ctx)
1782 {
1783 mount_t mp;
1784
1785 mp = mount_list_lookupby_fsid(fsid, 0, 1);
1786 if (mp == (mount_t)0) {
1787 return(ENOENT);
1788 }
1789 mount_ref(mp, 0);
1790 mount_iterdrop(mp);
1791 /* safedounmount consumes the mount ref */
1792 return(safedounmount(mp, flags, ctx));
1793 }
1794
1795
1796 /*
1797 * The mount struct comes with a mount ref which will be consumed.
1798 * Do the actual file system unmount, prevent some common foot shooting.
1799 */
1800 int
1801 safedounmount(struct mount *mp, int flags, vfs_context_t ctx)
1802 {
1803 int error;
1804 proc_t p = vfs_context_proc(ctx);
1805
1806 /*
1807 * If the file system is not responding and MNT_NOBLOCK
1808 * is set and not a forced unmount then return EBUSY.
1809 */
1810 if ((mp->mnt_kern_flag & MNT_LNOTRESP) &&
1811 (flags & MNT_NOBLOCK) && ((flags & MNT_FORCE) == 0)) {
1812 error = EBUSY;
1813 goto out;
1814 }
1815
1816 /*
1817 * Skip authorization if the mount is tagged as permissive and
1818 * this is not a forced-unmount attempt.
1819 */
1820 if (!(((mp->mnt_kern_flag & MNTK_PERMIT_UNMOUNT) != 0) && ((flags & MNT_FORCE) == 0))) {
1821 /*
1822 * Only root, or the user that did the original mount is
1823 * permitted to unmount this filesystem.
1824 */
1825 if ((mp->mnt_vfsstat.f_owner != kauth_cred_getuid(kauth_cred_get())) &&
1826 (error = suser(kauth_cred_get(), &p->p_acflag)))
1827 goto out;
1828 }
1829 /*
1830 * Don't allow unmounting the root file system.
1831 */
1832 if (mp->mnt_flag & MNT_ROOTFS) {
1833 error = EBUSY; /* the root is always busy */
1834 goto out;
1835 }
1836
1837 #ifdef CONFIG_IMGSRC_ACCESS
1838 if (mp->mnt_kern_flag & MNTK_BACKS_ROOT) {
1839 error = EBUSY;
1840 goto out;
1841 }
1842 #endif /* CONFIG_IMGSRC_ACCESS */
1843
1844 return (dounmount(mp, flags, 1, ctx));
1845
1846 out:
1847 mount_drop(mp, 0);
1848 return(error);
1849 }
1850
1851 /*
1852 * Do the actual file system unmount.
1853 */
1854 int
1855 dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx)
1856 {
1857 vnode_t coveredvp = (vnode_t)0;
1858 int error;
1859 int needwakeup = 0;
1860 int forcedunmount = 0;
1861 int lflags = 0;
1862 struct vnode *devvp = NULLVP;
1863 #if CONFIG_TRIGGERS
1864 proc_t p = vfs_context_proc(ctx);
1865 int did_vflush = 0;
1866 int pflags_save = 0;
1867 #endif /* CONFIG_TRIGGERS */
1868
1869 mount_lock(mp);
1870
1871 /*
1872 * If already an unmount in progress just return EBUSY.
1873 * Even a forced unmount cannot override.
1874 */
1875 if (mp->mnt_lflag & MNT_LUNMOUNT) {
1876 if (withref != 0)
1877 mount_drop(mp, 1);
1878 mount_unlock(mp);
1879 return (EBUSY);
1880 }
1881
1882 if (flags & MNT_FORCE) {
1883 forcedunmount = 1;
1884 mp->mnt_lflag |= MNT_LFORCE;
1885 }
1886
1887 #if CONFIG_TRIGGERS
1888 if (flags & MNT_NOBLOCK && p != kernproc)
1889 pflags_save = OSBitOrAtomic(P_NOREMOTEHANG, &p->p_flag);
1890 #endif
1891
1892 mp->mnt_kern_flag |= MNTK_UNMOUNT;
1893 mp->mnt_lflag |= MNT_LUNMOUNT;
1894 mp->mnt_flag &=~ MNT_ASYNC;
1895 /*
1896 * anyone currently in the fast path that
1897 * trips over the cached rootvp will be
1898 * dumped out and forced into the slow path
1899 * to regenerate a new cached value
1900 */
1901 mp->mnt_realrootvp = NULLVP;
1902 mount_unlock(mp);
1903
1904 if (forcedunmount && (flags & MNT_LNOSUB) == 0) {
1905 /*
1906 * Force unmount any mounts in this filesystem.
1907 * If any unmounts fail - just leave them dangling.
1908 * Avoids recursion.
1909 */
1910 (void) dounmount_submounts(mp, flags | MNT_LNOSUB, ctx);
1911 }
1912
1913 /*
1914 * taking the name_cache_lock exclusively will
1915 * insure that everyone is out of the fast path who
1916 * might be trying to use a now stale copy of
1917 * vp->v_mountedhere->mnt_realrootvp
1918 * bumping mount_generation causes the cached values
1919 * to be invalidated
1920 */
1921 name_cache_lock();
1922 mount_generation++;
1923 name_cache_unlock();
1924
1925
1926 lck_rw_lock_exclusive(&mp->mnt_rwlock);
1927 if (withref != 0)
1928 mount_drop(mp, 0);
1929 #if CONFIG_FSE
1930 fsevent_unmount(mp); /* has to come first! */
1931 #endif
1932 error = 0;
1933 if (forcedunmount == 0) {
1934 ubc_umount(mp); /* release cached vnodes */
1935 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
1936 error = VFS_SYNC(mp, MNT_WAIT, ctx);
1937 if (error) {
1938 mount_lock(mp);
1939 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1940 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1941 mp->mnt_lflag &= ~MNT_LFORCE;
1942 goto out;
1943 }
1944 }
1945 }
1946
1947 IOBSDMountChange(mp, kIOMountChangeUnmount);
1948
1949 #if CONFIG_TRIGGERS
1950 vfs_nested_trigger_unmounts(mp, flags, ctx);
1951 did_vflush = 1;
1952 #endif
1953 if (forcedunmount)
1954 lflags |= FORCECLOSE;
1955 error = vflush(mp, NULLVP, SKIPSWAP | SKIPSYSTEM | SKIPROOT | lflags);
1956 if ((forcedunmount == 0) && error) {
1957 mount_lock(mp);
1958 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1959 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1960 mp->mnt_lflag &= ~MNT_LFORCE;
1961 goto out;
1962 }
1963
1964 /* make sure there are no one in the mount iterations or lookup */
1965 mount_iterdrain(mp);
1966
1967 error = VFS_UNMOUNT(mp, flags, ctx);
1968 if (error) {
1969 mount_iterreset(mp);
1970 mount_lock(mp);
1971 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1972 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1973 mp->mnt_lflag &= ~MNT_LFORCE;
1974 goto out;
1975 }
1976
1977 /* increment the operations count */
1978 if (!error)
1979 OSAddAtomic(1, &vfs_nummntops);
1980
1981 if ( mp->mnt_devvp && mp->mnt_vtable->vfc_vfsflags & VFC_VFSLOCALARGS) {
1982 /* hold an io reference and drop the usecount before close */
1983 devvp = mp->mnt_devvp;
1984 vnode_getalways(devvp);
1985 vnode_rele(devvp);
1986 VNOP_CLOSE(devvp, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
1987 ctx);
1988 vnode_clearmountedon(devvp);
1989 vnode_put(devvp);
1990 }
1991 lck_rw_done(&mp->mnt_rwlock);
1992 mount_list_remove(mp);
1993 lck_rw_lock_exclusive(&mp->mnt_rwlock);
1994
1995 /* mark the mount point hook in the vp but not drop the ref yet */
1996 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
1997 /*
1998 * The covered vnode needs special handling. Trying to get an
1999 * iocount must not block here as this may lead to deadlocks
2000 * if the Filesystem to which the covered vnode belongs is
2001 * undergoing forced unmounts. Since we hold a usecount, the
2002 * vnode cannot be reused (it can, however, still be terminated)
2003 */
2004 vnode_getalways(coveredvp);
2005 vnode_lock_spin(coveredvp);
2006
2007 mp->mnt_crossref++;
2008 coveredvp->v_mountedhere = (struct mount *)0;
2009 CLR(coveredvp->v_flag, VMOUNT);
2010
2011 vnode_unlock(coveredvp);
2012 vnode_put(coveredvp);
2013 }
2014
2015 mount_list_lock();
2016 mp->mnt_vtable->vfc_refcount--;
2017 mount_list_unlock();
2018
2019 cache_purgevfs(mp); /* remove cache entries for this file sys */
2020 vfs_event_signal(NULL, VQ_UNMOUNT, (intptr_t)NULL);
2021 mount_lock(mp);
2022 mp->mnt_lflag |= MNT_LDEAD;
2023
2024 if (mp->mnt_lflag & MNT_LWAIT) {
2025 /*
2026 * do the wakeup here
2027 * in case we block in mount_refdrain
2028 * which will drop the mount lock
2029 * and allow anyone blocked in vfs_busy
2030 * to wakeup and see the LDEAD state
2031 */
2032 mp->mnt_lflag &= ~MNT_LWAIT;
2033 wakeup((caddr_t)mp);
2034 }
2035 mount_refdrain(mp);
2036 out:
2037 if (mp->mnt_lflag & MNT_LWAIT) {
2038 mp->mnt_lflag &= ~MNT_LWAIT;
2039 needwakeup = 1;
2040 }
2041
2042 #if CONFIG_TRIGGERS
2043 if (flags & MNT_NOBLOCK && p != kernproc) {
2044 // Restore P_NOREMOTEHANG bit to its previous value
2045 if ((pflags_save & P_NOREMOTEHANG) == 0)
2046 OSBitAndAtomic(~((uint32_t) P_NOREMOTEHANG), &p->p_flag);
2047 }
2048
2049 /*
2050 * Callback and context are set together under the mount lock, and
2051 * never cleared, so we're safe to examine them here, drop the lock,
2052 * and call out.
2053 */
2054 if (mp->mnt_triggercallback != NULL) {
2055 mount_unlock(mp);
2056 if (error == 0) {
2057 mp->mnt_triggercallback(mp, VTC_RELEASE, mp->mnt_triggerdata, ctx);
2058 } else if (did_vflush) {
2059 mp->mnt_triggercallback(mp, VTC_REPLACE, mp->mnt_triggerdata, ctx);
2060 }
2061 } else {
2062 mount_unlock(mp);
2063 }
2064 #else
2065 mount_unlock(mp);
2066 #endif /* CONFIG_TRIGGERS */
2067
2068 lck_rw_done(&mp->mnt_rwlock);
2069
2070 if (needwakeup)
2071 wakeup((caddr_t)mp);
2072
2073 if (!error) {
2074 if ((coveredvp != NULLVP)) {
2075 vnode_t pvp = NULLVP;
2076
2077 /*
2078 * The covered vnode needs special handling. Trying to
2079 * get an iocount must not block here as this may lead
2080 * to deadlocks if the Filesystem to which the covered
2081 * vnode belongs is undergoing forced unmounts. Since we
2082 * hold a usecount, the vnode cannot be reused
2083 * (it can, however, still be terminated).
2084 */
2085 vnode_getalways(coveredvp);
2086
2087 mount_dropcrossref(mp, coveredvp, 0);
2088 /*
2089 * We'll _try_ to detect if this really needs to be
2090 * done. The coveredvp can only be in termination (or
2091 * terminated) if the coveredvp's mount point is in a
2092 * forced unmount (or has been) since we still hold the
2093 * ref.
2094 */
2095 if (!vnode_isrecycled(coveredvp)) {
2096 pvp = vnode_getparent(coveredvp);
2097 #if CONFIG_TRIGGERS
2098 if (coveredvp->v_resolve) {
2099 vnode_trigger_rearm(coveredvp, ctx);
2100 }
2101 #endif
2102 }
2103
2104 vnode_rele(coveredvp);
2105 vnode_put(coveredvp);
2106 coveredvp = NULLVP;
2107
2108 if (pvp) {
2109 lock_vnode_and_post(pvp, NOTE_WRITE);
2110 vnode_put(pvp);
2111 }
2112 } else if (mp->mnt_flag & MNT_ROOTFS) {
2113 mount_lock_destroy(mp);
2114 #if CONFIG_MACF
2115 mac_mount_label_destroy(mp);
2116 #endif
2117 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
2118 } else
2119 panic("dounmount: no coveredvp");
2120 }
2121 return (error);
2122 }
2123
2124 /*
2125 * Unmount any mounts in this filesystem.
2126 */
2127 void
2128 dounmount_submounts(struct mount *mp, int flags, vfs_context_t ctx)
2129 {
2130 mount_t smp;
2131 fsid_t *fsids, fsid;
2132 int fsids_sz;
2133 int count = 0, i, m = 0;
2134 vnode_t vp;
2135
2136 mount_list_lock();
2137
2138 // Get an array to hold the submounts fsids.
2139 TAILQ_FOREACH(smp, &mountlist, mnt_list)
2140 count++;
2141 fsids_sz = count * sizeof(fsid_t);
2142 MALLOC(fsids, fsid_t *, fsids_sz, M_TEMP, M_NOWAIT);
2143 if (fsids == NULL) {
2144 mount_list_unlock();
2145 goto out;
2146 }
2147 fsids[0] = mp->mnt_vfsstat.f_fsid; // Prime the pump
2148
2149 /*
2150 * Fill the array with submount fsids.
2151 * Since mounts are always added to the tail of the mount list, the
2152 * list is always in mount order.
2153 * For each mount check if the mounted-on vnode belongs to a
2154 * mount that's already added to our array of mounts to be unmounted.
2155 */
2156 for (smp = TAILQ_NEXT(mp, mnt_list); smp; smp = TAILQ_NEXT(smp, mnt_list)) {
2157 vp = smp->mnt_vnodecovered;
2158 if (vp == NULL)
2159 continue;
2160 fsid = vnode_mount(vp)->mnt_vfsstat.f_fsid; // Underlying fsid
2161 for (i = 0; i <= m; i++) {
2162 if (fsids[i].val[0] == fsid.val[0] &&
2163 fsids[i].val[1] == fsid.val[1]) {
2164 fsids[++m] = smp->mnt_vfsstat.f_fsid;
2165 break;
2166 }
2167 }
2168 }
2169 mount_list_unlock();
2170
2171 // Unmount the submounts in reverse order. Ignore errors.
2172 for (i = m; i > 0; i--) {
2173 smp = mount_list_lookupby_fsid(&fsids[i], 0, 1);
2174 if (smp) {
2175 mount_ref(smp, 0);
2176 mount_iterdrop(smp);
2177 (void) dounmount(smp, flags, 1, ctx);
2178 }
2179 }
2180 out:
2181 if (fsids)
2182 FREE(fsids, M_TEMP);
2183 }
2184
2185 void
2186 mount_dropcrossref(mount_t mp, vnode_t dp, int need_put)
2187 {
2188 vnode_lock(dp);
2189 mp->mnt_crossref--;
2190
2191 if (mp->mnt_crossref < 0)
2192 panic("mount cross refs -ve");
2193
2194 if ((mp != dp->v_mountedhere) && (mp->mnt_crossref == 0)) {
2195
2196 if (need_put)
2197 vnode_put_locked(dp);
2198 vnode_unlock(dp);
2199
2200 mount_lock_destroy(mp);
2201 #if CONFIG_MACF
2202 mac_mount_label_destroy(mp);
2203 #endif
2204 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
2205 return;
2206 }
2207 if (need_put)
2208 vnode_put_locked(dp);
2209 vnode_unlock(dp);
2210 }
2211
2212
2213 /*
2214 * Sync each mounted filesystem.
2215 */
2216 #if DIAGNOSTIC
2217 int syncprt = 0;
2218 #endif
2219
2220 int print_vmpage_stat=0;
2221 int sync_timeout = 60; // Sync time limit (sec)
2222
2223 static int
2224 sync_callback(mount_t mp, __unused void *arg)
2225 {
2226 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
2227 int asyncflag = mp->mnt_flag & MNT_ASYNC;
2228
2229 mp->mnt_flag &= ~MNT_ASYNC;
2230 VFS_SYNC(mp, arg ? MNT_WAIT : MNT_NOWAIT, vfs_context_kernel());
2231 if (asyncflag)
2232 mp->mnt_flag |= MNT_ASYNC;
2233 }
2234
2235 return (VFS_RETURNED);
2236 }
2237
2238 /* ARGSUSED */
2239 int
2240 sync(__unused proc_t p, __unused struct sync_args *uap, __unused int32_t *retval)
2241 {
2242 vfs_iterate(LK_NOWAIT, sync_callback, NULL);
2243
2244 if (print_vmpage_stat) {
2245 vm_countdirtypages();
2246 }
2247
2248 #if DIAGNOSTIC
2249 if (syncprt)
2250 vfs_bufstats();
2251 #endif /* DIAGNOSTIC */
2252 return 0;
2253 }
2254
2255 static void
2256 sync_thread(void *arg, __unused wait_result_t wr)
2257 {
2258 int *timeout = (int *) arg;
2259
2260 vfs_iterate(LK_NOWAIT, sync_callback, NULL);
2261
2262 if (timeout)
2263 wakeup((caddr_t) timeout);
2264 if (print_vmpage_stat) {
2265 vm_countdirtypages();
2266 }
2267
2268 #if DIAGNOSTIC
2269 if (syncprt)
2270 vfs_bufstats();
2271 #endif /* DIAGNOSTIC */
2272 }
2273
2274 /*
2275 * Sync in a separate thread so we can time out if it blocks.
2276 */
2277 static int
2278 sync_async(int timeout)
2279 {
2280 thread_t thd;
2281 int error;
2282 struct timespec ts = {timeout, 0};
2283
2284 lck_mtx_lock(sync_mtx_lck);
2285 if (kernel_thread_start(sync_thread, &timeout, &thd) != KERN_SUCCESS) {
2286 printf("sync_thread failed\n");
2287 lck_mtx_unlock(sync_mtx_lck);
2288 return (0);
2289 }
2290
2291 error = msleep((caddr_t) &timeout, sync_mtx_lck, (PVFS | PDROP | PCATCH), "sync_thread", &ts);
2292 if (error) {
2293 printf("sync timed out: %d sec\n", timeout);
2294 }
2295 thread_deallocate(thd);
2296
2297 return (0);
2298 }
2299
2300 /*
2301 * An in-kernel sync for power management to call.
2302 */
2303 __private_extern__ int
2304 sync_internal(void)
2305 {
2306 (void) sync_async(sync_timeout);
2307
2308 return 0;
2309 } /* end of sync_internal call */
2310
2311 /*
2312 * Change filesystem quotas.
2313 */
2314 #if QUOTA
2315 int
2316 quotactl(proc_t p, struct quotactl_args *uap, __unused int32_t *retval)
2317 {
2318 struct mount *mp;
2319 int error, quota_cmd, quota_status;
2320 caddr_t datap;
2321 size_t fnamelen;
2322 struct nameidata nd;
2323 vfs_context_t ctx = vfs_context_current();
2324 struct dqblk my_dqblk;
2325
2326 AUDIT_ARG(uid, uap->uid);
2327 AUDIT_ARG(cmd, uap->cmd);
2328 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
2329 uap->path, ctx);
2330 error = namei(&nd);
2331 if (error)
2332 return (error);
2333 mp = nd.ni_vp->v_mount;
2334 vnode_put(nd.ni_vp);
2335 nameidone(&nd);
2336
2337 /* copyin any data we will need for downstream code */
2338 quota_cmd = uap->cmd >> SUBCMDSHIFT;
2339
2340 switch (quota_cmd) {
2341 case Q_QUOTAON:
2342 /* uap->arg specifies a file from which to take the quotas */
2343 fnamelen = MAXPATHLEN;
2344 datap = kalloc(MAXPATHLEN);
2345 error = copyinstr(uap->arg, datap, MAXPATHLEN, &fnamelen);
2346 break;
2347 case Q_GETQUOTA:
2348 /* uap->arg is a pointer to a dqblk structure. */
2349 datap = (caddr_t) &my_dqblk;
2350 break;
2351 case Q_SETQUOTA:
2352 case Q_SETUSE:
2353 /* uap->arg is a pointer to a dqblk structure. */
2354 datap = (caddr_t) &my_dqblk;
2355 if (proc_is64bit(p)) {
2356 struct user_dqblk my_dqblk64;
2357 error = copyin(uap->arg, (caddr_t)&my_dqblk64, sizeof (my_dqblk64));
2358 if (error == 0) {
2359 munge_dqblk(&my_dqblk, &my_dqblk64, FALSE);
2360 }
2361 }
2362 else {
2363 error = copyin(uap->arg, (caddr_t)&my_dqblk, sizeof (my_dqblk));
2364 }
2365 break;
2366 case Q_QUOTASTAT:
2367 /* uap->arg is a pointer to an integer */
2368 datap = (caddr_t) &quota_status;
2369 break;
2370 default:
2371 datap = NULL;
2372 break;
2373 } /* switch */
2374
2375 if (error == 0) {
2376 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, datap, ctx);
2377 }
2378
2379 switch (quota_cmd) {
2380 case Q_QUOTAON:
2381 if (datap != NULL)
2382 kfree(datap, MAXPATHLEN);
2383 break;
2384 case Q_GETQUOTA:
2385 /* uap->arg is a pointer to a dqblk structure we need to copy out to */
2386 if (error == 0) {
2387 if (proc_is64bit(p)) {
2388 struct user_dqblk my_dqblk64 = {.dqb_bhardlimit = 0};
2389 munge_dqblk(&my_dqblk, &my_dqblk64, TRUE);
2390 error = copyout((caddr_t)&my_dqblk64, uap->arg, sizeof (my_dqblk64));
2391 }
2392 else {
2393 error = copyout(datap, uap->arg, sizeof (struct dqblk));
2394 }
2395 }
2396 break;
2397 case Q_QUOTASTAT:
2398 /* uap->arg is a pointer to an integer */
2399 if (error == 0) {
2400 error = copyout(datap, uap->arg, sizeof(quota_status));
2401 }
2402 break;
2403 default:
2404 break;
2405 } /* switch */
2406
2407 return (error);
2408 }
2409 #else
2410 int
2411 quotactl(__unused proc_t p, __unused struct quotactl_args *uap, __unused int32_t *retval)
2412 {
2413 return (EOPNOTSUPP);
2414 }
2415 #endif /* QUOTA */
2416
2417 /*
2418 * Get filesystem statistics.
2419 *
2420 * Returns: 0 Success
2421 * namei:???
2422 * vfs_update_vfsstat:???
2423 * munge_statfs:EFAULT
2424 */
2425 /* ARGSUSED */
2426 int
2427 statfs(__unused proc_t p, struct statfs_args *uap, __unused int32_t *retval)
2428 {
2429 struct mount *mp;
2430 struct vfsstatfs *sp;
2431 int error;
2432 struct nameidata nd;
2433 vfs_context_t ctx = vfs_context_current();
2434 vnode_t vp;
2435
2436 NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1,
2437 UIO_USERSPACE, uap->path, ctx);
2438 error = namei(&nd);
2439 if (error != 0)
2440 return (error);
2441 vp = nd.ni_vp;
2442 mp = vp->v_mount;
2443 sp = &mp->mnt_vfsstat;
2444 nameidone(&nd);
2445
2446 #if CONFIG_MACF
2447 error = mac_mount_check_stat(ctx, mp);
2448 if (error != 0)
2449 return (error);
2450 #endif
2451
2452 error = vfs_update_vfsstat(mp, ctx, VFS_USER_EVENT);
2453 if (error != 0) {
2454 vnode_put(vp);
2455 return (error);
2456 }
2457
2458 error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
2459 vnode_put(vp);
2460 return (error);
2461 }
2462
2463 /*
2464 * Get filesystem statistics.
2465 */
2466 /* ARGSUSED */
2467 int
2468 fstatfs(__unused proc_t p, struct fstatfs_args *uap, __unused int32_t *retval)
2469 {
2470 vnode_t vp;
2471 struct mount *mp;
2472 struct vfsstatfs *sp;
2473 int error;
2474
2475 AUDIT_ARG(fd, uap->fd);
2476
2477 if ( (error = file_vnode(uap->fd, &vp)) )
2478 return (error);
2479
2480 error = vnode_getwithref(vp);
2481 if (error) {
2482 file_drop(uap->fd);
2483 return (error);
2484 }
2485
2486 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2487
2488 mp = vp->v_mount;
2489 if (!mp) {
2490 error = EBADF;
2491 goto out;
2492 }
2493
2494 #if CONFIG_MACF
2495 error = mac_mount_check_stat(vfs_context_current(), mp);
2496 if (error != 0)
2497 goto out;
2498 #endif
2499
2500 sp = &mp->mnt_vfsstat;
2501 if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
2502 goto out;
2503 }
2504
2505 error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
2506
2507 out:
2508 file_drop(uap->fd);
2509 vnode_put(vp);
2510
2511 return (error);
2512 }
2513
2514 /*
2515 * Common routine to handle copying of statfs64 data to user space
2516 */
2517 static int
2518 statfs64_common(struct mount *mp, struct vfsstatfs *sfsp, user_addr_t bufp)
2519 {
2520 int error;
2521 struct statfs64 sfs;
2522
2523 bzero(&sfs, sizeof(sfs));
2524
2525 sfs.f_bsize = sfsp->f_bsize;
2526 sfs.f_iosize = (int32_t)sfsp->f_iosize;
2527 sfs.f_blocks = sfsp->f_blocks;
2528 sfs.f_bfree = sfsp->f_bfree;
2529 sfs.f_bavail = sfsp->f_bavail;
2530 sfs.f_files = sfsp->f_files;
2531 sfs.f_ffree = sfsp->f_ffree;
2532 sfs.f_fsid = sfsp->f_fsid;
2533 sfs.f_owner = sfsp->f_owner;
2534 sfs.f_type = mp->mnt_vtable->vfc_typenum;
2535 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
2536 sfs.f_fssubtype = sfsp->f_fssubtype;
2537 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
2538 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN);
2539 } else {
2540 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSTYPENAMELEN);
2541 }
2542 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MAXPATHLEN);
2543 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MAXPATHLEN);
2544
2545 error = copyout((caddr_t)&sfs, bufp, sizeof(sfs));
2546
2547 return(error);
2548 }
2549
2550 /*
2551 * Get file system statistics in 64-bit mode
2552 */
2553 int
2554 statfs64(__unused struct proc *p, struct statfs64_args *uap, __unused int32_t *retval)
2555 {
2556 struct mount *mp;
2557 struct vfsstatfs *sp;
2558 int error;
2559 struct nameidata nd;
2560 vfs_context_t ctxp = vfs_context_current();
2561 vnode_t vp;
2562
2563 NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1,
2564 UIO_USERSPACE, uap->path, ctxp);
2565 error = namei(&nd);
2566 if (error != 0)
2567 return (error);
2568 vp = nd.ni_vp;
2569 mp = vp->v_mount;
2570 sp = &mp->mnt_vfsstat;
2571 nameidone(&nd);
2572
2573 #if CONFIG_MACF
2574 error = mac_mount_check_stat(ctxp, mp);
2575 if (error != 0)
2576 return (error);
2577 #endif
2578
2579 error = vfs_update_vfsstat(mp, ctxp, VFS_USER_EVENT);
2580 if (error != 0) {
2581 vnode_put(vp);
2582 return (error);
2583 }
2584
2585 error = statfs64_common(mp, sp, uap->buf);
2586 vnode_put(vp);
2587
2588 return (error);
2589 }
2590
2591 /*
2592 * Get file system statistics in 64-bit mode
2593 */
2594 int
2595 fstatfs64(__unused struct proc *p, struct fstatfs64_args *uap, __unused int32_t *retval)
2596 {
2597 struct vnode *vp;
2598 struct mount *mp;
2599 struct vfsstatfs *sp;
2600 int error;
2601
2602 AUDIT_ARG(fd, uap->fd);
2603
2604 if ( (error = file_vnode(uap->fd, &vp)) )
2605 return (error);
2606
2607 error = vnode_getwithref(vp);
2608 if (error) {
2609 file_drop(uap->fd);
2610 return (error);
2611 }
2612
2613 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2614
2615 mp = vp->v_mount;
2616 if (!mp) {
2617 error = EBADF;
2618 goto out;
2619 }
2620
2621 #if CONFIG_MACF
2622 error = mac_mount_check_stat(vfs_context_current(), mp);
2623 if (error != 0)
2624 goto out;
2625 #endif
2626
2627 sp = &mp->mnt_vfsstat;
2628 if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
2629 goto out;
2630 }
2631
2632 error = statfs64_common(mp, sp, uap->buf);
2633
2634 out:
2635 file_drop(uap->fd);
2636 vnode_put(vp);
2637
2638 return (error);
2639 }
2640
2641 struct getfsstat_struct {
2642 user_addr_t sfsp;
2643 user_addr_t *mp;
2644 int count;
2645 int maxcount;
2646 int flags;
2647 int error;
2648 };
2649
2650
2651 static int
2652 getfsstat_callback(mount_t mp, void * arg)
2653 {
2654
2655 struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
2656 struct vfsstatfs *sp;
2657 int error, my_size;
2658 vfs_context_t ctx = vfs_context_current();
2659
2660 if (fstp->sfsp && fstp->count < fstp->maxcount) {
2661 #if CONFIG_MACF
2662 error = mac_mount_check_stat(ctx, mp);
2663 if (error != 0) {
2664 fstp->error = error;
2665 return(VFS_RETURNED_DONE);
2666 }
2667 #endif
2668 sp = &mp->mnt_vfsstat;
2669 /*
2670 * If MNT_NOWAIT is specified, do not refresh the
2671 * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT.
2672 */
2673 if (((fstp->flags & MNT_NOWAIT) == 0 || (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
2674 (error = vfs_update_vfsstat(mp, ctx,
2675 VFS_USER_EVENT))) {
2676 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
2677 return(VFS_RETURNED);
2678 }
2679
2680 /*
2681 * Need to handle LP64 version of struct statfs
2682 */
2683 error = munge_statfs(mp, sp, fstp->sfsp, &my_size, IS_64BIT_PROCESS(vfs_context_proc(ctx)), FALSE);
2684 if (error) {
2685 fstp->error = error;
2686 return(VFS_RETURNED_DONE);
2687 }
2688 fstp->sfsp += my_size;
2689
2690 if (fstp->mp) {
2691 #if CONFIG_MACF
2692 error = mac_mount_label_get(mp, *fstp->mp);
2693 if (error) {
2694 fstp->error = error;
2695 return(VFS_RETURNED_DONE);
2696 }
2697 #endif
2698 fstp->mp++;
2699 }
2700 }
2701 fstp->count++;
2702 return(VFS_RETURNED);
2703 }
2704
2705 /*
2706 * Get statistics on all filesystems.
2707 */
2708 int
2709 getfsstat(__unused proc_t p, struct getfsstat_args *uap, int *retval)
2710 {
2711 struct __mac_getfsstat_args muap;
2712
2713 muap.buf = uap->buf;
2714 muap.bufsize = uap->bufsize;
2715 muap.mac = USER_ADDR_NULL;
2716 muap.macsize = 0;
2717 muap.flags = uap->flags;
2718
2719 return (__mac_getfsstat(p, &muap, retval));
2720 }
2721
2722 /*
2723 * __mac_getfsstat: Get MAC-related file system statistics
2724 *
2725 * Parameters: p (ignored)
2726 * uap User argument descriptor (see below)
2727 * retval Count of file system statistics (N stats)
2728 *
2729 * Indirect: uap->bufsize Buffer size
2730 * uap->macsize MAC info size
2731 * uap->buf Buffer where information will be returned
2732 * uap->mac MAC info
2733 * uap->flags File system flags
2734 *
2735 *
2736 * Returns: 0 Success
2737 * !0 Not success
2738 *
2739 */
2740 int
2741 __mac_getfsstat(__unused proc_t p, struct __mac_getfsstat_args *uap, int *retval)
2742 {
2743 user_addr_t sfsp;
2744 user_addr_t *mp;
2745 size_t count, maxcount, bufsize, macsize;
2746 struct getfsstat_struct fst;
2747
2748 bufsize = (size_t) uap->bufsize;
2749 macsize = (size_t) uap->macsize;
2750
2751 if (IS_64BIT_PROCESS(p)) {
2752 maxcount = bufsize / sizeof(struct user64_statfs);
2753 }
2754 else {
2755 maxcount = bufsize / sizeof(struct user32_statfs);
2756 }
2757 sfsp = uap->buf;
2758 count = 0;
2759
2760 mp = NULL;
2761
2762 #if CONFIG_MACF
2763 if (uap->mac != USER_ADDR_NULL) {
2764 u_int32_t *mp0;
2765 int error;
2766 unsigned int i;
2767
2768 count = (macsize / (IS_64BIT_PROCESS(p) ? 8 : 4));
2769 if (count != maxcount)
2770 return (EINVAL);
2771
2772 /* Copy in the array */
2773 MALLOC(mp0, u_int32_t *, macsize, M_MACTEMP, M_WAITOK);
2774 if (mp0 == NULL) {
2775 return (ENOMEM);
2776 }
2777
2778 error = copyin(uap->mac, mp0, macsize);
2779 if (error) {
2780 FREE(mp0, M_MACTEMP);
2781 return (error);
2782 }
2783
2784 /* Normalize to an array of user_addr_t */
2785 MALLOC(mp, user_addr_t *, count * sizeof(user_addr_t), M_MACTEMP, M_WAITOK);
2786 if (mp == NULL) {
2787 FREE(mp0, M_MACTEMP);
2788 return (ENOMEM);
2789 }
2790
2791 for (i = 0; i < count; i++) {
2792 if (IS_64BIT_PROCESS(p))
2793 mp[i] = ((user_addr_t *)mp0)[i];
2794 else
2795 mp[i] = (user_addr_t)mp0[i];
2796 }
2797 FREE(mp0, M_MACTEMP);
2798 }
2799 #endif
2800
2801
2802 fst.sfsp = sfsp;
2803 fst.mp = mp;
2804 fst.flags = uap->flags;
2805 fst.count = 0;
2806 fst.error = 0;
2807 fst.maxcount = maxcount;
2808
2809
2810 vfs_iterate(0, getfsstat_callback, &fst);
2811
2812 if (mp)
2813 FREE(mp, M_MACTEMP);
2814
2815 if (fst.error ) {
2816 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
2817 return(fst.error);
2818 }
2819
2820 if (fst.sfsp && fst.count > fst.maxcount)
2821 *retval = fst.maxcount;
2822 else
2823 *retval = fst.count;
2824 return (0);
2825 }
2826
2827 static int
2828 getfsstat64_callback(mount_t mp, void * arg)
2829 {
2830 struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
2831 struct vfsstatfs *sp;
2832 int error;
2833
2834 if (fstp->sfsp && fstp->count < fstp->maxcount) {
2835 #if CONFIG_MACF
2836 error = mac_mount_check_stat(vfs_context_current(), mp);
2837 if (error != 0) {
2838 fstp->error = error;
2839 return(VFS_RETURNED_DONE);
2840 }
2841 #endif
2842 sp = &mp->mnt_vfsstat;
2843 /*
2844 * If MNT_NOWAIT is specified, do not refresh the fsstat
2845 * cache. MNT_WAIT overrides MNT_NOWAIT.
2846 *
2847 * We treat MNT_DWAIT as MNT_WAIT for all instances of
2848 * getfsstat, since the constants are out of the same
2849 * namespace.
2850 */
2851 if (((fstp->flags & MNT_NOWAIT) == 0 ||
2852 (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
2853 (error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT))) {
2854 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
2855 return(VFS_RETURNED);
2856 }
2857
2858 error = statfs64_common(mp, sp, fstp->sfsp);
2859 if (error) {
2860 fstp->error = error;
2861 return(VFS_RETURNED_DONE);
2862 }
2863 fstp->sfsp += sizeof(struct statfs64);
2864 }
2865 fstp->count++;
2866 return(VFS_RETURNED);
2867 }
2868
2869 /*
2870 * Get statistics on all file systems in 64 bit mode.
2871 */
2872 int
2873 getfsstat64(__unused proc_t p, struct getfsstat64_args *uap, int *retval)
2874 {
2875 user_addr_t sfsp;
2876 int count, maxcount;
2877 struct getfsstat_struct fst;
2878
2879 maxcount = uap->bufsize / sizeof(struct statfs64);
2880
2881 sfsp = uap->buf;
2882 count = 0;
2883
2884 fst.sfsp = sfsp;
2885 fst.flags = uap->flags;
2886 fst.count = 0;
2887 fst.error = 0;
2888 fst.maxcount = maxcount;
2889
2890 vfs_iterate(0, getfsstat64_callback, &fst);
2891
2892 if (fst.error ) {
2893 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
2894 return(fst.error);
2895 }
2896
2897 if (fst.sfsp && fst.count > fst.maxcount)
2898 *retval = fst.maxcount;
2899 else
2900 *retval = fst.count;
2901
2902 return (0);
2903 }
2904
2905 /*
2906 * gets the associated vnode with the file descriptor passed.
2907 * as input
2908 *
2909 * INPUT
2910 * ctx - vfs context of caller
2911 * fd - file descriptor for which vnode is required.
2912 * vpp - Pointer to pointer to vnode to be returned.
2913 *
2914 * The vnode is returned with an iocount so any vnode obtained
2915 * by this call needs a vnode_put
2916 *
2917 */
2918 int
2919 vnode_getfromfd(vfs_context_t ctx, int fd, vnode_t *vpp)
2920 {
2921 int error;
2922 vnode_t vp;
2923 struct fileproc *fp;
2924 proc_t p = vfs_context_proc(ctx);
2925
2926 *vpp = NULLVP;
2927
2928 error = fp_getfvp(p, fd, &fp, &vp);
2929 if (error)
2930 return (error);
2931
2932 error = vnode_getwithref(vp);
2933 if (error) {
2934 (void)fp_drop(p, fd, fp, 0);
2935 return (error);
2936 }
2937
2938 (void)fp_drop(p, fd, fp, 0);
2939 *vpp = vp;
2940 return (error);
2941 }
2942
2943 /*
2944 * Wrapper function around namei to start lookup from a directory
2945 * specified by a file descriptor ni_dirfd.
2946 *
2947 * In addition to all the errors returned by namei, this call can
2948 * return ENOTDIR if the file descriptor does not refer to a directory.
2949 * and EBADF if the file descriptor is not valid.
2950 */
2951 int
2952 nameiat(struct nameidata *ndp, int dirfd)
2953 {
2954 if ((dirfd != AT_FDCWD) &&
2955 !(ndp->ni_flag & NAMEI_CONTLOOKUP) &&
2956 !(ndp->ni_cnd.cn_flags & USEDVP)) {
2957 int error = 0;
2958 char c;
2959
2960 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
2961 error = copyin(ndp->ni_dirp, &c, sizeof(char));
2962 if (error)
2963 return (error);
2964 } else {
2965 c = *((char *)(ndp->ni_dirp));
2966 }
2967
2968 if (c != '/') {
2969 vnode_t dvp_at;
2970
2971 error = vnode_getfromfd(ndp->ni_cnd.cn_context, dirfd,
2972 &dvp_at);
2973 if (error)
2974 return (error);
2975
2976 if (vnode_vtype(dvp_at) != VDIR) {
2977 vnode_put(dvp_at);
2978 return (ENOTDIR);
2979 }
2980
2981 ndp->ni_dvp = dvp_at;
2982 ndp->ni_cnd.cn_flags |= USEDVP;
2983 error = namei(ndp);
2984 ndp->ni_cnd.cn_flags &= ~USEDVP;
2985 vnode_put(dvp_at);
2986 return (error);
2987 }
2988 }
2989
2990 return (namei(ndp));
2991 }
2992
2993 /*
2994 * Change current working directory to a given file descriptor.
2995 */
2996 /* ARGSUSED */
2997 static int
2998 common_fchdir(proc_t p, struct fchdir_args *uap, int per_thread)
2999 {
3000 struct filedesc *fdp = p->p_fd;
3001 vnode_t vp;
3002 vnode_t tdp;
3003 vnode_t tvp;
3004 struct mount *mp;
3005 int error;
3006 vfs_context_t ctx = vfs_context_current();
3007
3008 AUDIT_ARG(fd, uap->fd);
3009 if (per_thread && uap->fd == -1) {
3010 /*
3011 * Switching back from per-thread to per process CWD; verify we
3012 * in fact have one before proceeding. The only success case
3013 * for this code path is to return 0 preemptively after zapping
3014 * the thread structure contents.
3015 */
3016 thread_t th = vfs_context_thread(ctx);
3017 if (th) {
3018 uthread_t uth = get_bsdthread_info(th);
3019 tvp = uth->uu_cdir;
3020 uth->uu_cdir = NULLVP;
3021 if (tvp != NULLVP) {
3022 vnode_rele(tvp);
3023 return (0);
3024 }
3025 }
3026 return (EBADF);
3027 }
3028
3029 if ( (error = file_vnode(uap->fd, &vp)) )
3030 return(error);
3031 if ( (error = vnode_getwithref(vp)) ) {
3032 file_drop(uap->fd);
3033 return(error);
3034 }
3035
3036 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
3037
3038 if (vp->v_type != VDIR) {
3039 error = ENOTDIR;
3040 goto out;
3041 }
3042
3043 #if CONFIG_MACF
3044 error = mac_vnode_check_chdir(ctx, vp);
3045 if (error)
3046 goto out;
3047 #endif
3048 error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
3049 if (error)
3050 goto out;
3051
3052 while (!error && (mp = vp->v_mountedhere) != NULL) {
3053 if (vfs_busy(mp, LK_NOWAIT)) {
3054 error = EACCES;
3055 goto out;
3056 }
3057 error = VFS_ROOT(mp, &tdp, ctx);
3058 vfs_unbusy(mp);
3059 if (error)
3060 break;
3061 vnode_put(vp);
3062 vp = tdp;
3063 }
3064 if (error)
3065 goto out;
3066 if ( (error = vnode_ref(vp)) )
3067 goto out;
3068 vnode_put(vp);
3069
3070 if (per_thread) {
3071 thread_t th = vfs_context_thread(ctx);
3072 if (th) {
3073 uthread_t uth = get_bsdthread_info(th);
3074 tvp = uth->uu_cdir;
3075 uth->uu_cdir = vp;
3076 OSBitOrAtomic(P_THCWD, &p->p_flag);
3077 } else {
3078 vnode_rele(vp);
3079 return (ENOENT);
3080 }
3081 } else {
3082 proc_fdlock(p);
3083 tvp = fdp->fd_cdir;
3084 fdp->fd_cdir = vp;
3085 proc_fdunlock(p);
3086 }
3087
3088 if (tvp)
3089 vnode_rele(tvp);
3090 file_drop(uap->fd);
3091
3092 return (0);
3093 out:
3094 vnode_put(vp);
3095 file_drop(uap->fd);
3096
3097 return(error);
3098 }
3099
3100 int
3101 fchdir(proc_t p, struct fchdir_args *uap, __unused int32_t *retval)
3102 {
3103 return common_fchdir(p, uap, 0);
3104 }
3105
3106 int
3107 __pthread_fchdir(proc_t p, struct __pthread_fchdir_args *uap, __unused int32_t *retval)
3108 {
3109 return common_fchdir(p, (void *)uap, 1);
3110 }
3111
3112 /*
3113 * Change current working directory (".").
3114 *
3115 * Returns: 0 Success
3116 * change_dir:ENOTDIR
3117 * change_dir:???
3118 * vnode_ref:ENOENT No such file or directory
3119 */
3120 /* ARGSUSED */
3121 static int
3122 common_chdir(proc_t p, struct chdir_args *uap, int per_thread)
3123 {
3124 struct filedesc *fdp = p->p_fd;
3125 int error;
3126 struct nameidata nd;
3127 vnode_t tvp;
3128 vfs_context_t ctx = vfs_context_current();
3129
3130 NDINIT(&nd, LOOKUP, OP_CHDIR, FOLLOW | AUDITVNPATH1,
3131 UIO_USERSPACE, uap->path, ctx);
3132 error = change_dir(&nd, ctx);
3133 if (error)
3134 return (error);
3135 if ( (error = vnode_ref(nd.ni_vp)) ) {
3136 vnode_put(nd.ni_vp);
3137 return (error);
3138 }
3139 /*
3140 * drop the iocount we picked up in change_dir
3141 */
3142 vnode_put(nd.ni_vp);
3143
3144 if (per_thread) {
3145 thread_t th = vfs_context_thread(ctx);
3146 if (th) {
3147 uthread_t uth = get_bsdthread_info(th);
3148 tvp = uth->uu_cdir;
3149 uth->uu_cdir = nd.ni_vp;
3150 OSBitOrAtomic(P_THCWD, &p->p_flag);
3151 } else {
3152 vnode_rele(nd.ni_vp);
3153 return (ENOENT);
3154 }
3155 } else {
3156 proc_fdlock(p);
3157 tvp = fdp->fd_cdir;
3158 fdp->fd_cdir = nd.ni_vp;
3159 proc_fdunlock(p);
3160 }
3161
3162 if (tvp)
3163 vnode_rele(tvp);
3164
3165 return (0);
3166 }
3167
3168
3169 /*
3170 * chdir
3171 *
3172 * Change current working directory (".") for the entire process
3173 *
3174 * Parameters: p Process requesting the call
3175 * uap User argument descriptor (see below)
3176 * retval (ignored)
3177 *
3178 * Indirect parameters: uap->path Directory path
3179 *
3180 * Returns: 0 Success
3181 * common_chdir: ENOTDIR
3182 * common_chdir: ENOENT No such file or directory
3183 * common_chdir: ???
3184 *
3185 */
3186 int
3187 chdir(proc_t p, struct chdir_args *uap, __unused int32_t *retval)
3188 {
3189 return common_chdir(p, (void *)uap, 0);
3190 }
3191
3192 /*
3193 * __pthread_chdir
3194 *
3195 * Change current working directory (".") for a single thread
3196 *
3197 * Parameters: p Process requesting the call
3198 * uap User argument descriptor (see below)
3199 * retval (ignored)
3200 *
3201 * Indirect parameters: uap->path Directory path
3202 *
3203 * Returns: 0 Success
3204 * common_chdir: ENOTDIR
3205 * common_chdir: ENOENT No such file or directory
3206 * common_chdir: ???
3207 *
3208 */
3209 int
3210 __pthread_chdir(proc_t p, struct __pthread_chdir_args *uap, __unused int32_t *retval)
3211 {
3212 return common_chdir(p, (void *)uap, 1);
3213 }
3214
3215
3216 /*
3217 * Change notion of root (``/'') directory.
3218 */
3219 /* ARGSUSED */
3220 int
3221 chroot(proc_t p, struct chroot_args *uap, __unused int32_t *retval)
3222 {
3223 struct filedesc *fdp = p->p_fd;
3224 int error;
3225 struct nameidata nd;
3226 vnode_t tvp;
3227 vfs_context_t ctx = vfs_context_current();
3228
3229 if ((error = suser(kauth_cred_get(), &p->p_acflag)))
3230 return (error);
3231
3232 NDINIT(&nd, LOOKUP, OP_CHROOT, FOLLOW | AUDITVNPATH1,
3233 UIO_USERSPACE, uap->path, ctx);
3234 error = change_dir(&nd, ctx);
3235 if (error)
3236 return (error);
3237
3238 #if CONFIG_MACF
3239 error = mac_vnode_check_chroot(ctx, nd.ni_vp,
3240 &nd.ni_cnd);
3241 if (error) {
3242 vnode_put(nd.ni_vp);
3243 return (error);
3244 }
3245 #endif
3246
3247 if ( (error = vnode_ref(nd.ni_vp)) ) {
3248 vnode_put(nd.ni_vp);
3249 return (error);
3250 }
3251 vnode_put(nd.ni_vp);
3252
3253 proc_fdlock(p);
3254 tvp = fdp->fd_rdir;
3255 fdp->fd_rdir = nd.ni_vp;
3256 fdp->fd_flags |= FD_CHROOT;
3257 proc_fdunlock(p);
3258
3259 if (tvp != NULL)
3260 vnode_rele(tvp);
3261
3262 return (0);
3263 }
3264
3265 /*
3266 * Common routine for chroot and chdir.
3267 *
3268 * Returns: 0 Success
3269 * ENOTDIR Not a directory
3270 * namei:??? [anything namei can return]
3271 * vnode_authorize:??? [anything vnode_authorize can return]
3272 */
3273 static int
3274 change_dir(struct nameidata *ndp, vfs_context_t ctx)
3275 {
3276 vnode_t vp;
3277 int error;
3278
3279 if ((error = namei(ndp)))
3280 return (error);
3281 nameidone(ndp);
3282 vp = ndp->ni_vp;
3283
3284 if (vp->v_type != VDIR) {
3285 vnode_put(vp);
3286 return (ENOTDIR);
3287 }
3288
3289 #if CONFIG_MACF
3290 error = mac_vnode_check_chdir(ctx, vp);
3291 if (error) {
3292 vnode_put(vp);
3293 return (error);
3294 }
3295 #endif
3296
3297 error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
3298 if (error) {
3299 vnode_put(vp);
3300 return (error);
3301 }
3302
3303 return (error);
3304 }
3305
3306 /*
3307 * Free the vnode data (for directories) associated with the file glob.
3308 */
3309 struct fd_vn_data *
3310 fg_vn_data_alloc(void)
3311 {
3312 struct fd_vn_data *fvdata;
3313
3314 /* Allocate per fd vnode data */
3315 MALLOC(fvdata, struct fd_vn_data *, (sizeof(struct fd_vn_data)),
3316 M_FD_VN_DATA, M_WAITOK | M_ZERO);
3317 lck_mtx_init(&fvdata->fv_lock, fd_vn_lck_grp, fd_vn_lck_attr);
3318 return fvdata;
3319 }
3320
3321 /*
3322 * Free the vnode data (for directories) associated with the file glob.
3323 */
3324 void
3325 fg_vn_data_free(void *fgvndata)
3326 {
3327 struct fd_vn_data *fvdata = (struct fd_vn_data *)fgvndata;
3328
3329 if (fvdata->fv_buf)
3330 FREE(fvdata->fv_buf, M_FD_DIRBUF);
3331 lck_mtx_destroy(&fvdata->fv_lock, fd_vn_lck_grp);
3332 FREE(fvdata, M_FD_VN_DATA);
3333 }
3334
3335 /*
3336 * Check permissions, allocate an open file structure,
3337 * and call the device open routine if any.
3338 *
3339 * Returns: 0 Success
3340 * EINVAL
3341 * EINTR
3342 * falloc:ENFILE
3343 * falloc:EMFILE
3344 * falloc:ENOMEM
3345 * vn_open_auth:???
3346 * dupfdopen:???
3347 * VNOP_ADVLOCK:???
3348 * vnode_setsize:???
3349 *
3350 * XXX Need to implement uid, gid
3351 */
3352 int
3353 open1(vfs_context_t ctx, struct nameidata *ndp, int uflags,
3354 struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra,
3355 int32_t *retval)
3356 {
3357 proc_t p = vfs_context_proc(ctx);
3358 uthread_t uu = get_bsdthread_info(vfs_context_thread(ctx));
3359 struct fileproc *fp;
3360 vnode_t vp;
3361 int flags, oflags;
3362 int type, indx, error;
3363 struct flock lf;
3364 struct vfs_context context;
3365
3366 oflags = uflags;
3367
3368 if ((oflags & O_ACCMODE) == O_ACCMODE)
3369 return(EINVAL);
3370
3371 flags = FFLAGS(uflags);
3372 CLR(flags, FENCRYPTED);
3373 CLR(flags, FUNENCRYPTED);
3374
3375 AUDIT_ARG(fflags, oflags);
3376 AUDIT_ARG(mode, vap->va_mode);
3377
3378 if ((error = falloc_withalloc(p,
3379 &fp, &indx, ctx, fp_zalloc, cra)) != 0) {
3380 return (error);
3381 }
3382 uu->uu_dupfd = -indx - 1;
3383
3384 if ((error = vn_open_auth(ndp, &flags, vap))) {
3385 if ((error == ENODEV || error == ENXIO) && (uu->uu_dupfd >= 0)){ /* XXX from fdopen */
3386 if ((error = dupfdopen(p->p_fd, indx, uu->uu_dupfd, flags, error)) == 0) {
3387 fp_drop(p, indx, NULL, 0);
3388 *retval = indx;
3389 return (0);
3390 }
3391 }
3392 if (error == ERESTART)
3393 error = EINTR;
3394 fp_free(p, indx, fp);
3395 return (error);
3396 }
3397 uu->uu_dupfd = 0;
3398 vp = ndp->ni_vp;
3399
3400 fp->f_fglob->fg_flag = flags & (FMASK | O_EVTONLY | FENCRYPTED | FUNENCRYPTED);
3401 fp->f_fglob->fg_ops = &vnops;
3402 fp->f_fglob->fg_data = (caddr_t)vp;
3403
3404 if (flags & (O_EXLOCK | O_SHLOCK)) {
3405 lf.l_whence = SEEK_SET;
3406 lf.l_start = 0;
3407 lf.l_len = 0;
3408 if (flags & O_EXLOCK)
3409 lf.l_type = F_WRLCK;
3410 else
3411 lf.l_type = F_RDLCK;
3412 type = F_FLOCK;
3413 if ((flags & FNONBLOCK) == 0)
3414 type |= F_WAIT;
3415 #if CONFIG_MACF
3416 error = mac_file_check_lock(vfs_context_ucred(ctx), fp->f_fglob,
3417 F_SETLK, &lf);
3418 if (error)
3419 goto bad;
3420 #endif
3421 if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx, NULL)))
3422 goto bad;
3423 fp->f_fglob->fg_flag |= FHASLOCK;
3424 }
3425
3426 /* try to truncate by setting the size attribute */
3427 if ((flags & O_TRUNC) && ((error = vnode_setsize(vp, (off_t)0, 0, ctx)) != 0))
3428 goto bad;
3429
3430 /*
3431 * For directories we hold some additional information in the fd.
3432 */
3433 if (vnode_vtype(vp) == VDIR) {
3434 fp->f_fglob->fg_vn_data = fg_vn_data_alloc();
3435 } else {
3436 fp->f_fglob->fg_vn_data = NULL;
3437 }
3438
3439 vnode_put(vp);
3440
3441 /*
3442 * The first terminal open (without a O_NOCTTY) by a session leader
3443 * results in it being set as the controlling terminal.
3444 */
3445 if (vnode_istty(vp) && !(p->p_flag & P_CONTROLT) &&
3446 !(flags & O_NOCTTY)) {
3447 int tmp = 0;
3448
3449 (void)(*fp->f_fglob->fg_ops->fo_ioctl)(fp, (int)TIOCSCTTY,
3450 (caddr_t)&tmp, ctx);
3451 }
3452
3453 proc_fdlock(p);
3454 if (flags & O_CLOEXEC)
3455 *fdflags(p, indx) |= UF_EXCLOSE;
3456 if (flags & O_CLOFORK)
3457 *fdflags(p, indx) |= UF_FORKCLOSE;
3458 procfdtbl_releasefd(p, indx, NULL);
3459
3460 #if CONFIG_SECLUDED_MEMORY
3461 if (secluded_for_filecache &&
3462 FILEGLOB_DTYPE(fp->f_fglob) == DTYPE_VNODE &&
3463 vnode_vtype(vp) == VREG) {
3464 memory_object_control_t moc;
3465
3466 moc = ubc_getobject(vp, UBC_FLAGS_NONE);
3467
3468 if (moc == MEMORY_OBJECT_CONTROL_NULL) {
3469 /* nothing to do... */
3470 } else if (fp->f_fglob->fg_flag & FWRITE) {
3471 /* writable -> no longer eligible for secluded pages */
3472 memory_object_mark_eligible_for_secluded(moc,
3473 FALSE);
3474 } else if (secluded_for_filecache == 1) {
3475 char pathname[32] = { 0, };
3476 size_t copied;
3477 /* XXX FBDP: better way to detect /Applications/ ? */
3478 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
3479 copyinstr(ndp->ni_dirp,
3480 pathname,
3481 sizeof (pathname),
3482 &copied);
3483 } else {
3484 copystr(CAST_DOWN(void *, ndp->ni_dirp),
3485 pathname,
3486 sizeof (pathname),
3487 &copied);
3488 }
3489 pathname[sizeof (pathname) - 1] = '\0';
3490 if (strncmp(pathname,
3491 "/Applications/",
3492 strlen("/Applications/")) == 0 &&
3493 strncmp(pathname,
3494 "/Applications/Camera.app/",
3495 strlen("/Applications/Camera.app/")) != 0) {
3496 /*
3497 * not writable
3498 * AND from "/Applications/"
3499 * AND not from "/Applications/Camera.app/"
3500 * ==> eligible for secluded
3501 */
3502 memory_object_mark_eligible_for_secluded(moc,
3503 TRUE);
3504 }
3505 } else if (secluded_for_filecache == 2) {
3506 /* not implemented... */
3507 if (!strncmp(vp->v_name,
3508 DYLD_SHARED_CACHE_NAME,
3509 strlen(DYLD_SHARED_CACHE_NAME)) ||
3510 !strncmp(vp->v_name,
3511 "dyld",
3512 strlen(vp->v_name)) ||
3513 !strncmp(vp->v_name,
3514 "launchd",
3515 strlen(vp->v_name)) ||
3516 !strncmp(vp->v_name,
3517 "Camera",
3518 strlen(vp->v_name)) ||
3519 !strncmp(vp->v_name,
3520 "mediaserverd",
3521 strlen(vp->v_name))) {
3522 /*
3523 * This file matters when launching Camera:
3524 * do not store its contents in the secluded
3525 * pool that will be drained on Camera launch.
3526 */
3527 memory_object_mark_eligible_for_secluded(moc,
3528 FALSE);
3529 }
3530 }
3531 }
3532 #endif /* CONFIG_SECLUDED_MEMORY */
3533
3534 fp_drop(p, indx, fp, 1);
3535 proc_fdunlock(p);
3536
3537 *retval = indx;
3538
3539 return (0);
3540 bad:
3541 context = *vfs_context_current();
3542 context.vc_ucred = fp->f_fglob->fg_cred;
3543
3544 if ((fp->f_fglob->fg_flag & FHASLOCK) &&
3545 (FILEGLOB_DTYPE(fp->f_fglob) == DTYPE_VNODE)) {
3546 lf.l_whence = SEEK_SET;
3547 lf.l_start = 0;
3548 lf.l_len = 0;
3549 lf.l_type = F_UNLCK;
3550
3551 (void)VNOP_ADVLOCK(
3552 vp, (caddr_t)fp->f_fglob, F_UNLCK, &lf, F_FLOCK, ctx, NULL);
3553 }
3554
3555 vn_close(vp, fp->f_fglob->fg_flag, &context);
3556 vnode_put(vp);
3557 fp_free(p, indx, fp);
3558
3559 return (error);
3560 }
3561
3562 /*
3563 * While most of the *at syscall handlers can call nameiat() which
3564 * is a wrapper around namei, the use of namei and initialisation
3565 * of nameidata are far removed and in different functions - namei
3566 * gets called in vn_open_auth for open1. So we'll just do here what
3567 * nameiat() does.
3568 */
3569 static int
3570 open1at(vfs_context_t ctx, struct nameidata *ndp, int uflags,
3571 struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra, int32_t *retval,
3572 int dirfd)
3573 {
3574 if ((dirfd != AT_FDCWD) && !(ndp->ni_cnd.cn_flags & USEDVP)) {
3575 int error;
3576 char c;
3577
3578 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
3579 error = copyin(ndp->ni_dirp, &c, sizeof(char));
3580 if (error)
3581 return (error);
3582 } else {
3583 c = *((char *)(ndp->ni_dirp));
3584 }
3585
3586 if (c != '/') {
3587 vnode_t dvp_at;
3588
3589 error = vnode_getfromfd(ndp->ni_cnd.cn_context, dirfd,
3590 &dvp_at);
3591 if (error)
3592 return (error);
3593
3594 if (vnode_vtype(dvp_at) != VDIR) {
3595 vnode_put(dvp_at);
3596 return (ENOTDIR);
3597 }
3598
3599 ndp->ni_dvp = dvp_at;
3600 ndp->ni_cnd.cn_flags |= USEDVP;
3601 error = open1(ctx, ndp, uflags, vap, fp_zalloc, cra,
3602 retval);
3603 vnode_put(dvp_at);
3604 return (error);
3605 }
3606 }
3607
3608 return (open1(ctx, ndp, uflags, vap, fp_zalloc, cra, retval));
3609 }
3610
3611 /*
3612 * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)).
3613 *
3614 * Parameters: p Process requesting the open
3615 * uap User argument descriptor (see below)
3616 * retval Pointer to an area to receive the
3617 * return calue from the system call
3618 *
3619 * Indirect: uap->path Path to open (same as 'open')
3620 * uap->flags Flags to open (same as 'open'
3621 * uap->uid UID to set, if creating
3622 * uap->gid GID to set, if creating
3623 * uap->mode File mode, if creating (same as 'open')
3624 * uap->xsecurity ACL to set, if creating
3625 *
3626 * Returns: 0 Success
3627 * !0 errno value
3628 *
3629 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
3630 *
3631 * XXX: We should enummerate the possible errno values here, and where
3632 * in the code they originated.
3633 */
3634 int
3635 open_extended(proc_t p, struct open_extended_args *uap, int32_t *retval)
3636 {
3637 struct filedesc *fdp = p->p_fd;
3638 int ciferror;
3639 kauth_filesec_t xsecdst;
3640 struct vnode_attr va;
3641 struct nameidata nd;
3642 int cmode;
3643
3644 AUDIT_ARG(owner, uap->uid, uap->gid);
3645
3646 xsecdst = NULL;
3647 if ((uap->xsecurity != USER_ADDR_NULL) &&
3648 ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
3649 return ciferror;
3650
3651 VATTR_INIT(&va);
3652 cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3653 VATTR_SET(&va, va_mode, cmode);
3654 if (uap->uid != KAUTH_UID_NONE)
3655 VATTR_SET(&va, va_uid, uap->uid);
3656 if (uap->gid != KAUTH_GID_NONE)
3657 VATTR_SET(&va, va_gid, uap->gid);
3658 if (xsecdst != NULL)
3659 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
3660
3661 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
3662 uap->path, vfs_context_current());
3663
3664 ciferror = open1(vfs_context_current(), &nd, uap->flags, &va,
3665 fileproc_alloc_init, NULL, retval);
3666 if (xsecdst != NULL)
3667 kauth_filesec_free(xsecdst);
3668
3669 return ciferror;
3670 }
3671
3672 /*
3673 * Go through the data-protected atomically controlled open (2)
3674 *
3675 * int open_dprotected_np(user_addr_t path, int flags, int class, int dpflags, int mode)
3676 */
3677 int open_dprotected_np (__unused proc_t p, struct open_dprotected_np_args *uap, int32_t *retval) {
3678 int flags = uap->flags;
3679 int class = uap->class;
3680 int dpflags = uap->dpflags;
3681
3682 /*
3683 * Follow the same path as normal open(2)
3684 * Look up the item if it exists, and acquire the vnode.
3685 */
3686 struct filedesc *fdp = p->p_fd;
3687 struct vnode_attr va;
3688 struct nameidata nd;
3689 int cmode;
3690 int error;
3691
3692 VATTR_INIT(&va);
3693 /* Mask off all but regular access permissions */
3694 cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3695 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
3696
3697 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
3698 uap->path, vfs_context_current());
3699
3700 /*
3701 * Initialize the extra fields in vnode_attr to pass down our
3702 * extra fields.
3703 * 1. target cprotect class.
3704 * 2. set a flag to mark it as requiring open-raw-encrypted semantics.
3705 */
3706 if (flags & O_CREAT) {
3707 /* lower level kernel code validates that the class is valid before applying it. */
3708 if (class != PROTECTION_CLASS_DEFAULT) {
3709 /*
3710 * PROTECTION_CLASS_DEFAULT implies that we make the class for this
3711 * file behave the same as open (2)
3712 */
3713 VATTR_SET(&va, va_dataprotect_class, class);
3714 }
3715 }
3716
3717 if (dpflags & (O_DP_GETRAWENCRYPTED|O_DP_GETRAWUNENCRYPTED)) {
3718 if ( flags & (O_RDWR | O_WRONLY)) {
3719 /* Not allowed to write raw encrypted bytes */
3720 return EINVAL;
3721 }
3722 if (uap->dpflags & O_DP_GETRAWENCRYPTED) {
3723 VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED);
3724 }
3725 if (uap->dpflags & O_DP_GETRAWUNENCRYPTED) {
3726 VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWUNENCRYPTED);
3727 }
3728 }
3729
3730 error = open1(vfs_context_current(), &nd, uap->flags, &va,
3731 fileproc_alloc_init, NULL, retval);
3732
3733 return error;
3734 }
3735
3736 static int
3737 openat_internal(vfs_context_t ctx, user_addr_t path, int flags, int mode,
3738 int fd, enum uio_seg segflg, int *retval)
3739 {
3740 struct filedesc *fdp = (vfs_context_proc(ctx))->p_fd;
3741 struct vnode_attr va;
3742 struct nameidata nd;
3743 int cmode;
3744
3745 VATTR_INIT(&va);
3746 /* Mask off all but regular access permissions */
3747 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3748 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
3749
3750 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1,
3751 segflg, path, ctx);
3752
3753 return (open1at(ctx, &nd, flags, &va, fileproc_alloc_init, NULL,
3754 retval, fd));
3755 }
3756
3757 int
3758 open(proc_t p, struct open_args *uap, int32_t *retval)
3759 {
3760 __pthread_testcancel(1);
3761 return(open_nocancel(p, (struct open_nocancel_args *)uap, retval));
3762 }
3763
3764 int
3765 open_nocancel(__unused proc_t p, struct open_nocancel_args *uap,
3766 int32_t *retval)
3767 {
3768 return (openat_internal(vfs_context_current(), uap->path, uap->flags,
3769 uap->mode, AT_FDCWD, UIO_USERSPACE, retval));
3770 }
3771
3772 int
3773 openat_nocancel(__unused proc_t p, struct openat_nocancel_args *uap,
3774 int32_t *retval)
3775 {
3776 return (openat_internal(vfs_context_current(), uap->path, uap->flags,
3777 uap->mode, uap->fd, UIO_USERSPACE, retval));
3778 }
3779
3780 int
3781 openat(proc_t p, struct openat_args *uap, int32_t *retval)
3782 {
3783 __pthread_testcancel(1);
3784 return(openat_nocancel(p, (struct openat_nocancel_args *)uap, retval));
3785 }
3786
3787 /*
3788 * openbyid_np: open a file given a file system id and a file system object id
3789 * the hfs file system object id is an fsobj_id_t {uint32, uint32}
3790 * file systems that don't support object ids it is a node id (uint64_t).
3791 *
3792 * Parameters: p Process requesting the open
3793 * uap User argument descriptor (see below)
3794 * retval Pointer to an area to receive the
3795 * return calue from the system call
3796 *
3797 * Indirect: uap->path Path to open (same as 'open')
3798 *
3799 * uap->fsid id of target file system
3800 * uap->objid id of target file system object
3801 * uap->flags Flags to open (same as 'open')
3802 *
3803 * Returns: 0 Success
3804 * !0 errno value
3805 *
3806 *
3807 * XXX: We should enummerate the possible errno values here, and where
3808 * in the code they originated.
3809 */
3810 int
3811 openbyid_np(__unused proc_t p, struct openbyid_np_args *uap, int *retval)
3812 {
3813 fsid_t fsid;
3814 uint64_t objid;
3815 int error;
3816 char *buf = NULL;
3817 int buflen = MAXPATHLEN;
3818 int pathlen = 0;
3819 vfs_context_t ctx = vfs_context_current();
3820
3821 if ((error = priv_check_cred(vfs_context_ucred(ctx), PRIV_VFS_OPEN_BY_ID, 0))) {
3822 return (error);
3823 }
3824
3825 if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
3826 return (error);
3827 }
3828
3829 /*uap->obj is an fsobj_id_t defined as struct {uint32_t, uint32_t} */
3830 if ((error = copyin(uap->objid, (caddr_t)&objid, sizeof(uint64_t)))) {
3831 return (error);
3832 }
3833
3834 AUDIT_ARG(value32, fsid.val[0]);
3835 AUDIT_ARG(value64, objid);
3836
3837 /*resolve path from fsis, objid*/
3838 do {
3839 MALLOC(buf, char *, buflen + 1, M_TEMP, M_WAITOK);
3840 if (buf == NULL) {
3841 return (ENOMEM);
3842 }
3843
3844 error = fsgetpath_internal(
3845 ctx, fsid.val[0], objid,
3846 buflen, buf, &pathlen);
3847
3848 if (error) {
3849 FREE(buf, M_TEMP);
3850 buf = NULL;
3851 }
3852 } while (error == ENOSPC && (buflen += MAXPATHLEN));
3853
3854 if (error) {
3855 return error;
3856 }
3857
3858 buf[pathlen] = 0;
3859
3860 error = openat_internal(
3861 ctx, (user_addr_t)buf, uap->oflags, 0, AT_FDCWD, UIO_SYSSPACE, retval);
3862
3863 FREE(buf, M_TEMP);
3864
3865 return error;
3866 }
3867
3868
3869 /*
3870 * Create a special file.
3871 */
3872 static int mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap);
3873
3874 int
3875 mknod(proc_t p, struct mknod_args *uap, __unused int32_t *retval)
3876 {
3877 struct vnode_attr va;
3878 vfs_context_t ctx = vfs_context_current();
3879 int error;
3880 struct nameidata nd;
3881 vnode_t vp, dvp;
3882
3883 VATTR_INIT(&va);
3884 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
3885 VATTR_SET(&va, va_rdev, uap->dev);
3886
3887 /* If it's a mknod() of a FIFO, call mkfifo1() instead */
3888 if ((uap->mode & S_IFMT) == S_IFIFO)
3889 return(mkfifo1(ctx, uap->path, &va));
3890
3891 AUDIT_ARG(mode, uap->mode);
3892 AUDIT_ARG(value32, uap->dev);
3893
3894 if ((error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
3895 return (error);
3896 NDINIT(&nd, CREATE, OP_MKNOD, LOCKPARENT | AUDITVNPATH1,
3897 UIO_USERSPACE, uap->path, ctx);
3898 error = namei(&nd);
3899 if (error)
3900 return (error);
3901 dvp = nd.ni_dvp;
3902 vp = nd.ni_vp;
3903
3904 if (vp != NULL) {
3905 error = EEXIST;
3906 goto out;
3907 }
3908
3909 switch (uap->mode & S_IFMT) {
3910 case S_IFCHR:
3911 VATTR_SET(&va, va_type, VCHR);
3912 break;
3913 case S_IFBLK:
3914 VATTR_SET(&va, va_type, VBLK);
3915 break;
3916 default:
3917 error = EINVAL;
3918 goto out;
3919 }
3920
3921 #if CONFIG_MACF
3922 error = mac_vnode_check_create(ctx,
3923 nd.ni_dvp, &nd.ni_cnd, &va);
3924 if (error)
3925 goto out;
3926 #endif
3927
3928 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
3929 goto out;
3930
3931 if ((error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx)) != 0)
3932 goto out;
3933
3934 if (vp) {
3935 int update_flags = 0;
3936
3937 // Make sure the name & parent pointers are hooked up
3938 if (vp->v_name == NULL)
3939 update_flags |= VNODE_UPDATE_NAME;
3940 if (vp->v_parent == NULLVP)
3941 update_flags |= VNODE_UPDATE_PARENT;
3942
3943 if (update_flags)
3944 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
3945
3946 #if CONFIG_FSE
3947 add_fsevent(FSE_CREATE_FILE, ctx,
3948 FSE_ARG_VNODE, vp,
3949 FSE_ARG_DONE);
3950 #endif
3951 }
3952
3953 out:
3954 /*
3955 * nameidone has to happen before we vnode_put(dvp)
3956 * since it may need to release the fs_nodelock on the dvp
3957 */
3958 nameidone(&nd);
3959
3960 if (vp)
3961 vnode_put(vp);
3962 vnode_put(dvp);
3963
3964 return (error);
3965 }
3966
3967 /*
3968 * Create a named pipe.
3969 *
3970 * Returns: 0 Success
3971 * EEXIST
3972 * namei:???
3973 * vnode_authorize:???
3974 * vn_create:???
3975 */
3976 static int
3977 mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap)
3978 {
3979 vnode_t vp, dvp;
3980 int error;
3981 struct nameidata nd;
3982
3983 NDINIT(&nd, CREATE, OP_MKFIFO, LOCKPARENT | AUDITVNPATH1,
3984 UIO_USERSPACE, upath, ctx);
3985 error = namei(&nd);
3986 if (error)
3987 return (error);
3988 dvp = nd.ni_dvp;
3989 vp = nd.ni_vp;
3990
3991 /* check that this is a new file and authorize addition */
3992 if (vp != NULL) {
3993 error = EEXIST;
3994 goto out;
3995 }
3996 VATTR_SET(vap, va_type, VFIFO);
3997
3998 if ((error = vn_authorize_create(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0)
3999 goto out;
4000
4001 error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx);
4002 out:
4003 /*
4004 * nameidone has to happen before we vnode_put(dvp)
4005 * since it may need to release the fs_nodelock on the dvp
4006 */
4007 nameidone(&nd);
4008
4009 if (vp)
4010 vnode_put(vp);
4011 vnode_put(dvp);
4012
4013 return error;
4014 }
4015
4016
4017 /*
4018 * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)).
4019 *
4020 * Parameters: p Process requesting the open
4021 * uap User argument descriptor (see below)
4022 * retval (Ignored)
4023 *
4024 * Indirect: uap->path Path to fifo (same as 'mkfifo')
4025 * uap->uid UID to set
4026 * uap->gid GID to set
4027 * uap->mode File mode to set (same as 'mkfifo')
4028 * uap->xsecurity ACL to set, if creating
4029 *
4030 * Returns: 0 Success
4031 * !0 errno value
4032 *
4033 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
4034 *
4035 * XXX: We should enummerate the possible errno values here, and where
4036 * in the code they originated.
4037 */
4038 int
4039 mkfifo_extended(proc_t p, struct mkfifo_extended_args *uap, __unused int32_t *retval)
4040 {
4041 int ciferror;
4042 kauth_filesec_t xsecdst;
4043 struct vnode_attr va;
4044
4045 AUDIT_ARG(owner, uap->uid, uap->gid);
4046
4047 xsecdst = KAUTH_FILESEC_NONE;
4048 if (uap->xsecurity != USER_ADDR_NULL) {
4049 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
4050 return ciferror;
4051 }
4052
4053 VATTR_INIT(&va);
4054 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
4055 if (uap->uid != KAUTH_UID_NONE)
4056 VATTR_SET(&va, va_uid, uap->uid);
4057 if (uap->gid != KAUTH_GID_NONE)
4058 VATTR_SET(&va, va_gid, uap->gid);
4059 if (xsecdst != KAUTH_FILESEC_NONE)
4060 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
4061
4062 ciferror = mkfifo1(vfs_context_current(), uap->path, &va);
4063
4064 if (xsecdst != KAUTH_FILESEC_NONE)
4065 kauth_filesec_free(xsecdst);
4066 return ciferror;
4067 }
4068
4069 /* ARGSUSED */
4070 int
4071 mkfifo(proc_t p, struct mkfifo_args *uap, __unused int32_t *retval)
4072 {
4073 struct vnode_attr va;
4074
4075 VATTR_INIT(&va);
4076 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
4077
4078 return(mkfifo1(vfs_context_current(), uap->path, &va));
4079 }
4080
4081
4082 static char *
4083 my_strrchr(char *p, int ch)
4084 {
4085 char *save;
4086
4087 for (save = NULL;; ++p) {
4088 if (*p == ch)
4089 save = p;
4090 if (!*p)
4091 return(save);
4092 }
4093 /* NOTREACHED */
4094 }
4095
4096 extern int safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path);
4097
4098 int
4099 safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path)
4100 {
4101 int ret, len = _len;
4102
4103 *truncated_path = 0;
4104 ret = vn_getpath(dvp, path, &len);
4105 if (ret == 0 && len < (MAXPATHLEN - 1)) {
4106 if (leafname) {
4107 path[len-1] = '/';
4108 len += strlcpy(&path[len], leafname, MAXPATHLEN-len) + 1;
4109 if (len > MAXPATHLEN) {
4110 char *ptr;
4111
4112 // the string got truncated!
4113 *truncated_path = 1;
4114 ptr = my_strrchr(path, '/');
4115 if (ptr) {
4116 *ptr = '\0'; // chop off the string at the last directory component
4117 }
4118 len = strlen(path) + 1;
4119 }
4120 }
4121 } else if (ret == 0) {
4122 *truncated_path = 1;
4123 } else if (ret != 0) {
4124 struct vnode *mydvp=dvp;
4125
4126 if (ret != ENOSPC) {
4127 printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n",
4128 dvp, dvp->v_name ? dvp->v_name : "no-name", ret);
4129 }
4130 *truncated_path = 1;
4131
4132 do {
4133 if (mydvp->v_parent != NULL) {
4134 mydvp = mydvp->v_parent;
4135 } else if (mydvp->v_mount) {
4136 strlcpy(path, mydvp->v_mount->mnt_vfsstat.f_mntonname, _len);
4137 break;
4138 } else {
4139 // no parent and no mount point? only thing is to punt and say "/" changed
4140 strlcpy(path, "/", _len);
4141 len = 2;
4142 mydvp = NULL;
4143 }
4144
4145 if (mydvp == NULL) {
4146 break;
4147 }
4148
4149 len = _len;
4150 ret = vn_getpath(mydvp, path, &len);
4151 } while (ret == ENOSPC);
4152 }
4153
4154 return len;
4155 }
4156
4157
4158 /*
4159 * Make a hard file link.
4160 *
4161 * Returns: 0 Success
4162 * EPERM
4163 * EEXIST
4164 * EXDEV
4165 * namei:???
4166 * vnode_authorize:???
4167 * VNOP_LINK:???
4168 */
4169 /* ARGSUSED */
4170 static int
4171 linkat_internal(vfs_context_t ctx, int fd1, user_addr_t path, int fd2,
4172 user_addr_t link, int flag, enum uio_seg segflg)
4173 {
4174 vnode_t vp, dvp, lvp;
4175 struct nameidata nd;
4176 int follow;
4177 int error;
4178 #if CONFIG_FSE
4179 fse_info finfo;
4180 #endif
4181 int need_event, has_listeners;
4182 char *target_path = NULL;
4183 int truncated=0;
4184
4185 vp = dvp = lvp = NULLVP;
4186
4187 /* look up the object we are linking to */
4188 follow = (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW;
4189 NDINIT(&nd, LOOKUP, OP_LOOKUP, AUDITVNPATH1 | follow,
4190 segflg, path, ctx);
4191
4192 error = nameiat(&nd, fd1);
4193 if (error)
4194 return (error);
4195 vp = nd.ni_vp;
4196
4197 nameidone(&nd);
4198
4199 /*
4200 * Normally, linking to directories is not supported.
4201 * However, some file systems may have limited support.
4202 */
4203 if (vp->v_type == VDIR) {
4204 if (!ISSET(vp->v_mount->mnt_kern_flag, MNTK_DIR_HARDLINKS)) {
4205 error = EPERM; /* POSIX */
4206 goto out;
4207 }
4208
4209 /* Linking to a directory requires ownership. */
4210 if (!kauth_cred_issuser(vfs_context_ucred(ctx))) {
4211 struct vnode_attr dva;
4212
4213 VATTR_INIT(&dva);
4214 VATTR_WANTED(&dva, va_uid);
4215 if (vnode_getattr(vp, &dva, ctx) != 0 ||
4216 !VATTR_IS_SUPPORTED(&dva, va_uid) ||
4217 (dva.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)))) {
4218 error = EACCES;
4219 goto out;
4220 }
4221 }
4222 }
4223
4224 /* lookup the target node */
4225 #if CONFIG_TRIGGERS
4226 nd.ni_op = OP_LINK;
4227 #endif
4228 nd.ni_cnd.cn_nameiop = CREATE;
4229 nd.ni_cnd.cn_flags = LOCKPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK;
4230 nd.ni_dirp = link;
4231 error = nameiat(&nd, fd2);
4232 if (error != 0)
4233 goto out;
4234 dvp = nd.ni_dvp;
4235 lvp = nd.ni_vp;
4236
4237 #if CONFIG_MACF
4238 if ((error = mac_vnode_check_link(ctx, dvp, vp, &nd.ni_cnd)) != 0)
4239 goto out2;
4240 #endif
4241
4242 /* or to anything that kauth doesn't want us to (eg. immutable items) */
4243 if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_LINKTARGET, ctx)) != 0)
4244 goto out2;
4245
4246 /* target node must not exist */
4247 if (lvp != NULLVP) {
4248 error = EEXIST;
4249 goto out2;
4250 }
4251 /* cannot link across mountpoints */
4252 if (vnode_mount(vp) != vnode_mount(dvp)) {
4253 error = EXDEV;
4254 goto out2;
4255 }
4256
4257 /* authorize creation of the target note */
4258 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
4259 goto out2;
4260
4261 /* and finally make the link */
4262 error = VNOP_LINK(vp, dvp, &nd.ni_cnd, ctx);
4263 if (error)
4264 goto out2;
4265
4266 #if CONFIG_MACF
4267 (void)mac_vnode_notify_link(ctx, vp, dvp, &nd.ni_cnd);
4268 #endif
4269
4270 #if CONFIG_FSE
4271 need_event = need_fsevent(FSE_CREATE_FILE, dvp);
4272 #else
4273 need_event = 0;
4274 #endif
4275 has_listeners = kauth_authorize_fileop_has_listeners();
4276
4277 if (need_event || has_listeners) {
4278 char *link_to_path = NULL;
4279 int len, link_name_len;
4280
4281 /* build the path to the new link file */
4282 GET_PATH(target_path);
4283 if (target_path == NULL) {
4284 error = ENOMEM;
4285 goto out2;
4286 }
4287
4288 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, target_path, MAXPATHLEN, &truncated);
4289
4290 if (has_listeners) {
4291 /* build the path to file we are linking to */
4292 GET_PATH(link_to_path);
4293 if (link_to_path == NULL) {
4294 error = ENOMEM;
4295 goto out2;
4296 }
4297
4298 link_name_len = MAXPATHLEN;
4299 if (vn_getpath(vp, link_to_path, &link_name_len) == 0) {
4300 /*
4301 * Call out to allow 3rd party notification of rename.
4302 * Ignore result of kauth_authorize_fileop call.
4303 */
4304 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_LINK,
4305 (uintptr_t)link_to_path,
4306 (uintptr_t)target_path);
4307 }
4308 if (link_to_path != NULL) {
4309 RELEASE_PATH(link_to_path);
4310 }
4311 }
4312 #if CONFIG_FSE
4313 if (need_event) {
4314 /* construct fsevent */
4315 if (get_fse_info(vp, &finfo, ctx) == 0) {
4316 if (truncated) {
4317 finfo.mode |= FSE_TRUNCATED_PATH;
4318 }
4319
4320 // build the path to the destination of the link
4321 add_fsevent(FSE_CREATE_FILE, ctx,
4322 FSE_ARG_STRING, len, target_path,
4323 FSE_ARG_FINFO, &finfo,
4324 FSE_ARG_DONE);
4325 }
4326 if (vp->v_parent) {
4327 add_fsevent(FSE_STAT_CHANGED, ctx,
4328 FSE_ARG_VNODE, vp->v_parent,
4329 FSE_ARG_DONE);
4330 }
4331 }
4332 #endif
4333 }
4334 out2:
4335 /*
4336 * nameidone has to happen before we vnode_put(dvp)
4337 * since it may need to release the fs_nodelock on the dvp
4338 */
4339 nameidone(&nd);
4340 if (target_path != NULL) {
4341 RELEASE_PATH(target_path);
4342 }
4343 out:
4344 if (lvp)
4345 vnode_put(lvp);
4346 if (dvp)
4347 vnode_put(dvp);
4348 vnode_put(vp);
4349 return (error);
4350 }
4351
4352 int
4353 link(__unused proc_t p, struct link_args *uap, __unused int32_t *retval)
4354 {
4355 return (linkat_internal(vfs_context_current(), AT_FDCWD, uap->path,
4356 AT_FDCWD, uap->link, AT_SYMLINK_FOLLOW, UIO_USERSPACE));
4357 }
4358
4359 int
4360 linkat(__unused proc_t p, struct linkat_args *uap, __unused int32_t *retval)
4361 {
4362 if (uap->flag & ~AT_SYMLINK_FOLLOW)
4363 return (EINVAL);
4364
4365 return (linkat_internal(vfs_context_current(), uap->fd1, uap->path,
4366 uap->fd2, uap->link, uap->flag, UIO_USERSPACE));
4367 }
4368
4369 /*
4370 * Make a symbolic link.
4371 *
4372 * We could add support for ACLs here too...
4373 */
4374 /* ARGSUSED */
4375 static int
4376 symlinkat_internal(vfs_context_t ctx, user_addr_t path_data, int fd,
4377 user_addr_t link, enum uio_seg segflg)
4378 {
4379 struct vnode_attr va;
4380 char *path;
4381 int error;
4382 struct nameidata nd;
4383 vnode_t vp, dvp;
4384 size_t dummy=0;
4385 proc_t p;
4386
4387 error = 0;
4388 if (UIO_SEG_IS_USER_SPACE(segflg)) {
4389 MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
4390 error = copyinstr(path_data, path, MAXPATHLEN, &dummy);
4391 } else {
4392 path = (char *)path_data;
4393 }
4394 if (error)
4395 goto out;
4396 AUDIT_ARG(text, path); /* This is the link string */
4397
4398 NDINIT(&nd, CREATE, OP_SYMLINK, LOCKPARENT | AUDITVNPATH1,
4399 segflg, link, ctx);
4400
4401 error = nameiat(&nd, fd);
4402 if (error)
4403 goto out;
4404 dvp = nd.ni_dvp;
4405 vp = nd.ni_vp;
4406
4407 p = vfs_context_proc(ctx);
4408 VATTR_INIT(&va);
4409 VATTR_SET(&va, va_type, VLNK);
4410 VATTR_SET(&va, va_mode, ACCESSPERMS & ~p->p_fd->fd_cmask);
4411
4412 #if CONFIG_MACF
4413 error = mac_vnode_check_create(ctx,
4414 dvp, &nd.ni_cnd, &va);
4415 #endif
4416 if (error != 0) {
4417 goto skipit;
4418 }
4419
4420 if (vp != NULL) {
4421 error = EEXIST;
4422 goto skipit;
4423 }
4424
4425 /* authorize */
4426 if (error == 0)
4427 error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
4428 /* get default ownership, etc. */
4429 if (error == 0)
4430 error = vnode_authattr_new(dvp, &va, 0, ctx);
4431 if (error == 0)
4432 error = VNOP_SYMLINK(dvp, &vp, &nd.ni_cnd, &va, path, ctx);
4433
4434 #if CONFIG_MACF
4435 if (error == 0 && vp)
4436 error = vnode_label(vnode_mount(vp), dvp, vp, &nd.ni_cnd, VNODE_LABEL_CREATE, ctx);
4437 #endif
4438
4439 /* do fallback attribute handling */
4440 if (error == 0 && vp)
4441 error = vnode_setattr_fallback(vp, &va, ctx);
4442
4443 if (error == 0) {
4444 int update_flags = 0;
4445
4446 /*check if a new vnode was created, else try to get one*/
4447 if (vp == NULL) {
4448 nd.ni_cnd.cn_nameiop = LOOKUP;
4449 #if CONFIG_TRIGGERS
4450 nd.ni_op = OP_LOOKUP;
4451 #endif
4452 nd.ni_cnd.cn_flags = 0;
4453 error = nameiat(&nd, fd);
4454 vp = nd.ni_vp;
4455
4456 if (vp == NULL)
4457 goto skipit;
4458 }
4459
4460 #if 0 /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
4461 /* call out to allow 3rd party notification of rename.
4462 * Ignore result of kauth_authorize_fileop call.
4463 */
4464 if (kauth_authorize_fileop_has_listeners() &&
4465 namei(&nd) == 0) {
4466 char *new_link_path = NULL;
4467 int len;
4468
4469 /* build the path to the new link file */
4470 new_link_path = get_pathbuff();
4471 len = MAXPATHLEN;
4472 vn_getpath(dvp, new_link_path, &len);
4473 if ((len + 1 + nd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) {
4474 new_link_path[len - 1] = '/';
4475 strlcpy(&new_link_path[len], nd.ni_cnd.cn_nameptr, MAXPATHLEN-len);
4476 }
4477
4478 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_SYMLINK,
4479 (uintptr_t)path, (uintptr_t)new_link_path);
4480 if (new_link_path != NULL)
4481 release_pathbuff(new_link_path);
4482 }
4483 #endif
4484 // Make sure the name & parent pointers are hooked up
4485 if (vp->v_name == NULL)
4486 update_flags |= VNODE_UPDATE_NAME;
4487 if (vp->v_parent == NULLVP)
4488 update_flags |= VNODE_UPDATE_PARENT;
4489
4490 if (update_flags)
4491 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
4492
4493 #if CONFIG_FSE
4494 add_fsevent(FSE_CREATE_FILE, ctx,
4495 FSE_ARG_VNODE, vp,
4496 FSE_ARG_DONE);
4497 #endif
4498 }
4499
4500 skipit:
4501 /*
4502 * nameidone has to happen before we vnode_put(dvp)
4503 * since it may need to release the fs_nodelock on the dvp
4504 */
4505 nameidone(&nd);
4506
4507 if (vp)
4508 vnode_put(vp);
4509 vnode_put(dvp);
4510 out:
4511 if (path && (path != (char *)path_data))
4512 FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
4513
4514 return (error);
4515 }
4516
4517 int
4518 symlink(__unused proc_t p, struct symlink_args *uap, __unused int32_t *retval)
4519 {
4520 return (symlinkat_internal(vfs_context_current(), uap->path, AT_FDCWD,
4521 uap->link, UIO_USERSPACE));
4522 }
4523
4524 int
4525 symlinkat(__unused proc_t p, struct symlinkat_args *uap,
4526 __unused int32_t *retval)
4527 {
4528 return (symlinkat_internal(vfs_context_current(), uap->path1, uap->fd,
4529 uap->path2, UIO_USERSPACE));
4530 }
4531
4532 /*
4533 * Delete a whiteout from the filesystem.
4534 * No longer supported.
4535 */
4536 int
4537 undelete(__unused proc_t p, __unused struct undelete_args *uap, __unused int32_t *retval)
4538 {
4539 return (ENOTSUP);
4540 }
4541
4542 /*
4543 * Delete a name from the filesystem.
4544 */
4545 /* ARGSUSED */
4546 static int
4547 unlinkat_internal(vfs_context_t ctx, int fd, vnode_t start_dvp,
4548 user_addr_t path_arg, enum uio_seg segflg, int unlink_flags)
4549 {
4550 struct nameidata nd;
4551 vnode_t vp, dvp;
4552 int error;
4553 struct componentname *cnp;
4554 char *path = NULL;
4555 int len=0;
4556 #if CONFIG_FSE
4557 fse_info finfo;
4558 struct vnode_attr va;
4559 #endif
4560 int flags;
4561 int need_event;
4562 int has_listeners;
4563 int truncated_path;
4564 int batched;
4565 struct vnode_attr *vap;
4566 int do_retry;
4567 int retry_count = 0;
4568 int cn_flags;
4569
4570 cn_flags = LOCKPARENT;
4571 if (!(unlink_flags & VNODE_REMOVE_NO_AUDIT_PATH))
4572 cn_flags |= AUDITVNPATH1;
4573 /* If a starting dvp is passed, it trumps any fd passed. */
4574 if (start_dvp)
4575 cn_flags |= USEDVP;
4576
4577 #if NAMEDRSRCFORK
4578 /* unlink or delete is allowed on rsrc forks and named streams */
4579 cn_flags |= CN_ALLOWRSRCFORK;
4580 #endif
4581
4582 retry:
4583 do_retry = 0;
4584 flags = 0;
4585 need_event = 0;
4586 has_listeners = 0;
4587 truncated_path = 0;
4588 vap = NULL;
4589
4590 NDINIT(&nd, DELETE, OP_UNLINK, cn_flags, segflg, path_arg, ctx);
4591
4592 nd.ni_dvp = start_dvp;
4593 nd.ni_flag |= NAMEI_COMPOUNDREMOVE;
4594 cnp = &nd.ni_cnd;
4595
4596 lookup_continue:
4597 error = nameiat(&nd, fd);
4598 if (error)
4599 return (error);
4600
4601 dvp = nd.ni_dvp;
4602 vp = nd.ni_vp;
4603
4604
4605 /* With Carbon delete semantics, busy files cannot be deleted */
4606 if (unlink_flags & VNODE_REMOVE_NODELETEBUSY) {
4607 flags |= VNODE_REMOVE_NODELETEBUSY;
4608 }
4609
4610 /* Skip any potential upcalls if told to. */
4611 if (unlink_flags & VNODE_REMOVE_SKIP_NAMESPACE_EVENT) {
4612 flags |= VNODE_REMOVE_SKIP_NAMESPACE_EVENT;
4613 }
4614
4615 if (vp) {
4616 batched = vnode_compound_remove_available(vp);
4617 /*
4618 * The root of a mounted filesystem cannot be deleted.
4619 */
4620 if (vp->v_flag & VROOT) {
4621 error = EBUSY;
4622 }
4623
4624 if (!batched) {
4625 error = vn_authorize_unlink(dvp, vp, cnp, ctx, NULL);
4626 if (error) {
4627 if (error == ENOENT) {
4628 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
4629 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
4630 do_retry = 1;
4631 retry_count++;
4632 }
4633 }
4634 goto out;
4635 }
4636 }
4637 } else {
4638 batched = 1;
4639
4640 if (!vnode_compound_remove_available(dvp)) {
4641 panic("No vp, but no compound remove?");
4642 }
4643 }
4644
4645 #if CONFIG_FSE
4646 need_event = need_fsevent(FSE_DELETE, dvp);
4647 if (need_event) {
4648 if (!batched) {
4649 if ((vp->v_flag & VISHARDLINK) == 0) {
4650 /* XXX need to get these data in batched VNOP */
4651 get_fse_info(vp, &finfo, ctx);
4652 }
4653 } else {
4654 error = vfs_get_notify_attributes(&va);
4655 if (error) {
4656 goto out;
4657 }
4658
4659 vap = &va;
4660 }
4661 }
4662 #endif
4663 has_listeners = kauth_authorize_fileop_has_listeners();
4664 if (need_event || has_listeners) {
4665 if (path == NULL) {
4666 GET_PATH(path);
4667 if (path == NULL) {
4668 error = ENOMEM;
4669 goto out;
4670 }
4671 }
4672 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated_path);
4673 }
4674
4675 #if NAMEDRSRCFORK
4676 if (nd.ni_cnd.cn_flags & CN_WANTSRSRCFORK)
4677 error = vnode_removenamedstream(dvp, vp, XATTR_RESOURCEFORK_NAME, 0, ctx);
4678 else
4679 #endif
4680 {
4681 error = vn_remove(dvp, &nd.ni_vp, &nd, flags, vap, ctx);
4682 vp = nd.ni_vp;
4683 if (error == EKEEPLOOKING) {
4684 if (!batched) {
4685 panic("EKEEPLOOKING, but not a filesystem that supports compound VNOPs?");
4686 }
4687
4688 if ((nd.ni_flag & NAMEI_CONTLOOKUP) == 0) {
4689 panic("EKEEPLOOKING, but continue flag not set?");
4690 }
4691
4692 if (vnode_isdir(vp)) {
4693 error = EISDIR;
4694 goto out;
4695 }
4696 goto lookup_continue;
4697 } else if (error == ENOENT && batched) {
4698 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
4699 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
4700 /*
4701 * For compound VNOPs, the authorization callback may
4702 * return ENOENT in case of racing hardlink lookups
4703 * hitting the name cache, redrive the lookup.
4704 */
4705 do_retry = 1;
4706 retry_count += 1;
4707 goto out;
4708 }
4709 }
4710 }
4711
4712 /*
4713 * Call out to allow 3rd party notification of delete.
4714 * Ignore result of kauth_authorize_fileop call.
4715 */
4716 if (!error) {
4717 if (has_listeners) {
4718 kauth_authorize_fileop(vfs_context_ucred(ctx),
4719 KAUTH_FILEOP_DELETE,
4720 (uintptr_t)vp,
4721 (uintptr_t)path);
4722 }
4723
4724 if (vp->v_flag & VISHARDLINK) {
4725 //
4726 // if a hardlink gets deleted we want to blow away the
4727 // v_parent link because the path that got us to this
4728 // instance of the link is no longer valid. this will
4729 // force the next call to get the path to ask the file
4730 // system instead of just following the v_parent link.
4731 //
4732 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
4733 }
4734
4735 #if CONFIG_FSE
4736 if (need_event) {
4737 if (vp->v_flag & VISHARDLINK) {
4738 get_fse_info(vp, &finfo, ctx);
4739 } else if (vap) {
4740 vnode_get_fse_info_from_vap(vp, &finfo, vap);
4741 }
4742 if (truncated_path) {
4743 finfo.mode |= FSE_TRUNCATED_PATH;
4744 }
4745 add_fsevent(FSE_DELETE, ctx,
4746 FSE_ARG_STRING, len, path,
4747 FSE_ARG_FINFO, &finfo,
4748 FSE_ARG_DONE);
4749 }
4750 #endif
4751 }
4752
4753 out:
4754 if (path != NULL)
4755 RELEASE_PATH(path);
4756
4757 #if NAMEDRSRCFORK
4758 /* recycle the deleted rsrc fork vnode to force a reclaim, which
4759 * will cause its shadow file to go away if necessary.
4760 */
4761 if (vp && (vnode_isnamedstream(vp)) &&
4762 (vp->v_parent != NULLVP) &&
4763 vnode_isshadow(vp)) {
4764 vnode_recycle(vp);
4765 }
4766 #endif
4767 /*
4768 * nameidone has to happen before we vnode_put(dvp)
4769 * since it may need to release the fs_nodelock on the dvp
4770 */
4771 nameidone(&nd);
4772 vnode_put(dvp);
4773 if (vp) {
4774 vnode_put(vp);
4775 }
4776
4777 if (do_retry) {
4778 goto retry;
4779 }
4780
4781 return (error);
4782 }
4783
4784 int
4785 unlink1(vfs_context_t ctx, vnode_t start_dvp, user_addr_t path_arg,
4786 enum uio_seg segflg, int unlink_flags)
4787 {
4788 return (unlinkat_internal(ctx, AT_FDCWD, start_dvp, path_arg, segflg,
4789 unlink_flags));
4790 }
4791
4792 /*
4793 * Delete a name from the filesystem using Carbon semantics.
4794 */
4795 int
4796 delete(__unused proc_t p, struct delete_args *uap, __unused int32_t *retval)
4797 {
4798 return (unlinkat_internal(vfs_context_current(), AT_FDCWD, NULLVP,
4799 uap->path, UIO_USERSPACE, VNODE_REMOVE_NODELETEBUSY));
4800 }
4801
4802 /*
4803 * Delete a name from the filesystem using POSIX semantics.
4804 */
4805 int
4806 unlink(__unused proc_t p, struct unlink_args *uap, __unused int32_t *retval)
4807 {
4808 return (unlinkat_internal(vfs_context_current(), AT_FDCWD, NULLVP,
4809 uap->path, UIO_USERSPACE, 0));
4810 }
4811
4812 int
4813 unlinkat(__unused proc_t p, struct unlinkat_args *uap, __unused int32_t *retval)
4814 {
4815 if (uap->flag & ~AT_REMOVEDIR)
4816 return (EINVAL);
4817
4818 if (uap->flag & AT_REMOVEDIR)
4819 return (rmdirat_internal(vfs_context_current(), uap->fd,
4820 uap->path, UIO_USERSPACE));
4821 else
4822 return (unlinkat_internal(vfs_context_current(), uap->fd,
4823 NULLVP, uap->path, UIO_USERSPACE, 0));
4824 }
4825
4826 /*
4827 * Reposition read/write file offset.
4828 */
4829 int
4830 lseek(proc_t p, struct lseek_args *uap, off_t *retval)
4831 {
4832 struct fileproc *fp;
4833 vnode_t vp;
4834 struct vfs_context *ctx;
4835 off_t offset = uap->offset, file_size;
4836 int error;
4837
4838 if ( (error = fp_getfvp(p,uap->fd, &fp, &vp)) ) {
4839 if (error == ENOTSUP)
4840 return (ESPIPE);
4841 return (error);
4842 }
4843 if (vnode_isfifo(vp)) {
4844 file_drop(uap->fd);
4845 return(ESPIPE);
4846 }
4847
4848
4849 ctx = vfs_context_current();
4850 #if CONFIG_MACF
4851 if (uap->whence == L_INCR && uap->offset == 0)
4852 error = mac_file_check_get_offset(vfs_context_ucred(ctx),
4853 fp->f_fglob);
4854 else
4855 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
4856 fp->f_fglob);
4857 if (error) {
4858 file_drop(uap->fd);
4859 return (error);
4860 }
4861 #endif
4862 if ( (error = vnode_getwithref(vp)) ) {
4863 file_drop(uap->fd);
4864 return(error);
4865 }
4866
4867 switch (uap->whence) {
4868 case L_INCR:
4869 offset += fp->f_fglob->fg_offset;
4870 break;
4871 case L_XTND:
4872 if ((error = vnode_size(vp, &file_size, ctx)) != 0)
4873 break;
4874 offset += file_size;
4875 break;
4876 case L_SET:
4877 break;
4878 default:
4879 error = EINVAL;
4880 }
4881 if (error == 0) {
4882 if (uap->offset > 0 && offset < 0) {
4883 /* Incremented/relative move past max size */
4884 error = EOVERFLOW;
4885 } else {
4886 /*
4887 * Allow negative offsets on character devices, per
4888 * POSIX 1003.1-2001. Most likely for writing disk
4889 * labels.
4890 */
4891 if (offset < 0 && vp->v_type != VCHR) {
4892 /* Decremented/relative move before start */
4893 error = EINVAL;
4894 } else {
4895 /* Success */
4896 fp->f_fglob->fg_offset = offset;
4897 *retval = fp->f_fglob->fg_offset;
4898 }
4899 }
4900 }
4901
4902 /*
4903 * An lseek can affect whether data is "available to read." Use
4904 * hint of NOTE_NONE so no EVFILT_VNODE events fire
4905 */
4906 post_event_if_success(vp, error, NOTE_NONE);
4907 (void)vnode_put(vp);
4908 file_drop(uap->fd);
4909 return (error);
4910 }
4911
4912
4913 /*
4914 * Check access permissions.
4915 *
4916 * Returns: 0 Success
4917 * vnode_authorize:???
4918 */
4919 static int
4920 access1(vnode_t vp, vnode_t dvp, int uflags, vfs_context_t ctx)
4921 {
4922 kauth_action_t action;
4923 int error;
4924
4925 /*
4926 * If just the regular access bits, convert them to something
4927 * that vnode_authorize will understand.
4928 */
4929 if (!(uflags & _ACCESS_EXTENDED_MASK)) {
4930 action = 0;
4931 if (uflags & R_OK)
4932 action |= KAUTH_VNODE_READ_DATA; /* aka KAUTH_VNODE_LIST_DIRECTORY */
4933 if (uflags & W_OK) {
4934 if (vnode_isdir(vp)) {
4935 action |= KAUTH_VNODE_ADD_FILE |
4936 KAUTH_VNODE_ADD_SUBDIRECTORY;
4937 /* might want delete rights here too */
4938 } else {
4939 action |= KAUTH_VNODE_WRITE_DATA;
4940 }
4941 }
4942 if (uflags & X_OK) {
4943 if (vnode_isdir(vp)) {
4944 action |= KAUTH_VNODE_SEARCH;
4945 } else {
4946 action |= KAUTH_VNODE_EXECUTE;
4947 }
4948 }
4949 } else {
4950 /* take advantage of definition of uflags */
4951 action = uflags >> 8;
4952 }
4953
4954 #if CONFIG_MACF
4955 error = mac_vnode_check_access(ctx, vp, uflags);
4956 if (error)
4957 return (error);
4958 #endif /* MAC */
4959
4960 /* action == 0 means only check for existence */
4961 if (action != 0) {
4962 error = vnode_authorize(vp, dvp, action | KAUTH_VNODE_ACCESS, ctx);
4963 } else {
4964 error = 0;
4965 }
4966
4967 return(error);
4968 }
4969
4970
4971
4972 /*
4973 * access_extended: Check access permissions in bulk.
4974 *
4975 * Description: uap->entries Pointer to an array of accessx
4976 * descriptor structs, plus one or
4977 * more NULL terminated strings (see
4978 * "Notes" section below).
4979 * uap->size Size of the area pointed to by
4980 * uap->entries.
4981 * uap->results Pointer to the results array.
4982 *
4983 * Returns: 0 Success
4984 * ENOMEM Insufficient memory
4985 * EINVAL Invalid arguments
4986 * namei:EFAULT Bad address
4987 * namei:ENAMETOOLONG Filename too long
4988 * namei:ENOENT No such file or directory
4989 * namei:ELOOP Too many levels of symbolic links
4990 * namei:EBADF Bad file descriptor
4991 * namei:ENOTDIR Not a directory
4992 * namei:???
4993 * access1:
4994 *
4995 * Implicit returns:
4996 * uap->results Array contents modified
4997 *
4998 * Notes: The uap->entries are structured as an arbitrary length array
4999 * of accessx descriptors, followed by one or more NULL terminated
5000 * strings
5001 *
5002 * struct accessx_descriptor[0]
5003 * ...
5004 * struct accessx_descriptor[n]
5005 * char name_data[0];
5006 *
5007 * We determine the entry count by walking the buffer containing
5008 * the uap->entries argument descriptor. For each descriptor we
5009 * see, the valid values for the offset ad_name_offset will be
5010 * in the byte range:
5011 *
5012 * [ uap->entries + sizeof(struct accessx_descriptor) ]
5013 * to
5014 * [ uap->entries + uap->size - 2 ]
5015 *
5016 * since we must have at least one string, and the string must
5017 * be at least one character plus the NULL terminator in length.
5018 *
5019 * XXX: Need to support the check-as uid argument
5020 */
5021 int
5022 access_extended(__unused proc_t p, struct access_extended_args *uap, __unused int32_t *retval)
5023 {
5024 struct accessx_descriptor *input = NULL;
5025 errno_t *result = NULL;
5026 errno_t error = 0;
5027 int wantdelete = 0;
5028 unsigned int desc_max, desc_actual, i, j;
5029 struct vfs_context context;
5030 struct nameidata nd;
5031 int niopts;
5032 vnode_t vp = NULL;
5033 vnode_t dvp = NULL;
5034 #define ACCESSX_MAX_DESCR_ON_STACK 10
5035 struct accessx_descriptor stack_input[ACCESSX_MAX_DESCR_ON_STACK];
5036
5037 context.vc_ucred = NULL;
5038
5039 /*
5040 * Validate parameters; if valid, copy the descriptor array and string
5041 * arguments into local memory. Before proceeding, the following
5042 * conditions must have been met:
5043 *
5044 * o The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
5045 * o There must be sufficient room in the request for at least one
5046 * descriptor and a one yte NUL terminated string.
5047 * o The allocation of local storage must not fail.
5048 */
5049 if (uap->size > ACCESSX_MAX_TABLESIZE)
5050 return(ENOMEM);
5051 if (uap->size < (sizeof(struct accessx_descriptor) + 2))
5052 return(EINVAL);
5053 if (uap->size <= sizeof (stack_input)) {
5054 input = stack_input;
5055 } else {
5056 MALLOC(input, struct accessx_descriptor *, uap->size, M_TEMP, M_WAITOK);
5057 if (input == NULL) {
5058 error = ENOMEM;
5059 goto out;
5060 }
5061 }
5062 error = copyin(uap->entries, input, uap->size);
5063 if (error)
5064 goto out;
5065
5066 AUDIT_ARG(opaque, input, uap->size);
5067
5068 /*
5069 * Force NUL termination of the copyin buffer to avoid nami() running
5070 * off the end. If the caller passes us bogus data, they may get a
5071 * bogus result.
5072 */
5073 ((char *)input)[uap->size - 1] = 0;
5074
5075 /*
5076 * Access is defined as checking against the process' real identity,
5077 * even if operations are checking the effective identity. This
5078 * requires that we use a local vfs context.
5079 */
5080 context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
5081 context.vc_thread = current_thread();
5082
5083 /*
5084 * Find out how many entries we have, so we can allocate the result
5085 * array by walking the list and adjusting the count downward by the
5086 * earliest string offset we see.
5087 */
5088 desc_max = (uap->size - 2) / sizeof(struct accessx_descriptor);
5089 desc_actual = desc_max;
5090 for (i = 0; i < desc_actual; i++) {
5091 /*
5092 * Take the offset to the name string for this entry and
5093 * convert to an input array index, which would be one off
5094 * the end of the array if this entry was the lowest-addressed
5095 * name string.
5096 */
5097 j = input[i].ad_name_offset / sizeof(struct accessx_descriptor);
5098
5099 /*
5100 * An offset greater than the max allowable offset is an error.
5101 * It is also an error for any valid entry to point
5102 * to a location prior to the end of the current entry, if
5103 * it's not a reference to the string of the previous entry.
5104 */
5105 if (j > desc_max || (j != 0 && j <= i)) {
5106 error = EINVAL;
5107 goto out;
5108 }
5109
5110 /* Also do not let ad_name_offset point to something beyond the size of the input */
5111 if (input[i].ad_name_offset >= uap->size) {
5112 error = EINVAL;
5113 goto out;
5114 }
5115
5116 /*
5117 * An offset of 0 means use the previous descriptor's offset;
5118 * this is used to chain multiple requests for the same file
5119 * to avoid multiple lookups.
5120 */
5121 if (j == 0) {
5122 /* This is not valid for the first entry */
5123 if (i == 0) {
5124 error = EINVAL;
5125 goto out;
5126 }
5127 continue;
5128 }
5129
5130 /*
5131 * If the offset of the string for this descriptor is before
5132 * what we believe is the current actual last descriptor,
5133 * then we need to adjust our estimate downward; this permits
5134 * the string table following the last descriptor to be out
5135 * of order relative to the descriptor list.
5136 */
5137 if (j < desc_actual)
5138 desc_actual = j;
5139 }
5140
5141 /*
5142 * We limit the actual number of descriptors we are willing to process
5143 * to a hard maximum of ACCESSX_MAX_DESCRIPTORS. If the number being
5144 * requested does not exceed this limit,
5145 */
5146 if (desc_actual > ACCESSX_MAX_DESCRIPTORS) {
5147 error = ENOMEM;
5148 goto out;
5149 }
5150 MALLOC(result, errno_t *, desc_actual * sizeof(errno_t), M_TEMP, M_WAITOK);
5151 if (result == NULL) {
5152 error = ENOMEM;
5153 goto out;
5154 }
5155
5156 /*
5157 * Do the work by iterating over the descriptor entries we know to
5158 * at least appear to contain valid data.
5159 */
5160 error = 0;
5161 for (i = 0; i < desc_actual; i++) {
5162 /*
5163 * If the ad_name_offset is 0, then we use the previous
5164 * results to make the check; otherwise, we are looking up
5165 * a new file name.
5166 */
5167 if (input[i].ad_name_offset != 0) {
5168 /* discard old vnodes */
5169 if (vp) {
5170 vnode_put(vp);
5171 vp = NULL;
5172 }
5173 if (dvp) {
5174 vnode_put(dvp);
5175 dvp = NULL;
5176 }
5177
5178 /*
5179 * Scan forward in the descriptor list to see if we
5180 * need the parent vnode. We will need it if we are
5181 * deleting, since we must have rights to remove
5182 * entries in the parent directory, as well as the
5183 * rights to delete the object itself.
5184 */
5185 wantdelete = input[i].ad_flags & _DELETE_OK;
5186 for (j = i + 1; (j < desc_actual) && (input[j].ad_name_offset == 0); j++)
5187 if (input[j].ad_flags & _DELETE_OK)
5188 wantdelete = 1;
5189
5190 niopts = FOLLOW | AUDITVNPATH1;
5191
5192 /* need parent for vnode_authorize for deletion test */
5193 if (wantdelete)
5194 niopts |= WANTPARENT;
5195
5196 /* do the lookup */
5197 NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, UIO_SYSSPACE,
5198 CAST_USER_ADDR_T(((const char *)input) + input[i].ad_name_offset),
5199 &context);
5200 error = namei(&nd);
5201 if (!error) {
5202 vp = nd.ni_vp;
5203 if (wantdelete)
5204 dvp = nd.ni_dvp;
5205 }
5206 nameidone(&nd);
5207 }
5208
5209 /*
5210 * Handle lookup errors.
5211 */
5212 switch(error) {
5213 case ENOENT:
5214 case EACCES:
5215 case EPERM:
5216 case ENOTDIR:
5217 result[i] = error;
5218 break;
5219 case 0:
5220 /* run this access check */
5221 result[i] = access1(vp, dvp, input[i].ad_flags, &context);
5222 break;
5223 default:
5224 /* fatal lookup error */
5225
5226 goto out;
5227 }
5228 }
5229
5230 AUDIT_ARG(data, result, sizeof(errno_t), desc_actual);
5231
5232 /* copy out results */
5233 error = copyout(result, uap->results, desc_actual * sizeof(errno_t));
5234
5235 out:
5236 if (input && input != stack_input)
5237 FREE(input, M_TEMP);
5238 if (result)
5239 FREE(result, M_TEMP);
5240 if (vp)
5241 vnode_put(vp);
5242 if (dvp)
5243 vnode_put(dvp);
5244 if (IS_VALID_CRED(context.vc_ucred))
5245 kauth_cred_unref(&context.vc_ucred);
5246 return(error);
5247 }
5248
5249
5250 /*
5251 * Returns: 0 Success
5252 * namei:EFAULT Bad address
5253 * namei:ENAMETOOLONG Filename too long
5254 * namei:ENOENT No such file or directory
5255 * namei:ELOOP Too many levels of symbolic links
5256 * namei:EBADF Bad file descriptor
5257 * namei:ENOTDIR Not a directory
5258 * namei:???
5259 * access1:
5260 */
5261 static int
5262 faccessat_internal(vfs_context_t ctx, int fd, user_addr_t path, int amode,
5263 int flag, enum uio_seg segflg)
5264 {
5265 int error;
5266 struct nameidata nd;
5267 int niopts;
5268 struct vfs_context context;
5269 #if NAMEDRSRCFORK
5270 int is_namedstream = 0;
5271 #endif
5272
5273 /*
5274 * Unless the AT_EACCESS option is used, Access is defined as checking
5275 * against the process' real identity, even if operations are checking
5276 * the effective identity. So we need to tweak the credential
5277 * in the context for that case.
5278 */
5279 if (!(flag & AT_EACCESS))
5280 context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
5281 else
5282 context.vc_ucred = ctx->vc_ucred;
5283 context.vc_thread = ctx->vc_thread;
5284
5285
5286 niopts = FOLLOW | AUDITVNPATH1;
5287 /* need parent for vnode_authorize for deletion test */
5288 if (amode & _DELETE_OK)
5289 niopts |= WANTPARENT;
5290 NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, segflg,
5291 path, &context);
5292
5293 #if NAMEDRSRCFORK
5294 /* access(F_OK) calls are allowed for resource forks. */
5295 if (amode == F_OK)
5296 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
5297 #endif
5298 error = nameiat(&nd, fd);
5299 if (error)
5300 goto out;
5301
5302 #if NAMEDRSRCFORK
5303 /* Grab reference on the shadow stream file vnode to
5304 * force an inactive on release which will mark it
5305 * for recycle.
5306 */
5307 if (vnode_isnamedstream(nd.ni_vp) &&
5308 (nd.ni_vp->v_parent != NULLVP) &&
5309 vnode_isshadow(nd.ni_vp)) {
5310 is_namedstream = 1;
5311 vnode_ref(nd.ni_vp);
5312 }
5313 #endif
5314
5315 error = access1(nd.ni_vp, nd.ni_dvp, amode, &context);
5316
5317 #if NAMEDRSRCFORK
5318 if (is_namedstream) {
5319 vnode_rele(nd.ni_vp);
5320 }
5321 #endif
5322
5323 vnode_put(nd.ni_vp);
5324 if (amode & _DELETE_OK)
5325 vnode_put(nd.ni_dvp);
5326 nameidone(&nd);
5327
5328 out:
5329 if (!(flag & AT_EACCESS))
5330 kauth_cred_unref(&context.vc_ucred);
5331 return (error);
5332 }
5333
5334 int
5335 access(__unused proc_t p, struct access_args *uap, __unused int32_t *retval)
5336 {
5337 return (faccessat_internal(vfs_context_current(), AT_FDCWD,
5338 uap->path, uap->flags, 0, UIO_USERSPACE));
5339 }
5340
5341 int
5342 faccessat(__unused proc_t p, struct faccessat_args *uap,
5343 __unused int32_t *retval)
5344 {
5345 if (uap->flag & ~AT_EACCESS)
5346 return (EINVAL);
5347
5348 return (faccessat_internal(vfs_context_current(), uap->fd,
5349 uap->path, uap->amode, uap->flag, UIO_USERSPACE));
5350 }
5351
5352 /*
5353 * Returns: 0 Success
5354 * EFAULT
5355 * copyout:EFAULT
5356 * namei:???
5357 * vn_stat:???
5358 */
5359 static int
5360 fstatat_internal(vfs_context_t ctx, user_addr_t path, user_addr_t ub,
5361 user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64,
5362 enum uio_seg segflg, int fd, int flag)
5363 {
5364 struct nameidata nd;
5365 int follow;
5366 union {
5367 struct stat sb;
5368 struct stat64 sb64;
5369 } source;
5370 union {
5371 struct user64_stat user64_sb;
5372 struct user32_stat user32_sb;
5373 struct user64_stat64 user64_sb64;
5374 struct user32_stat64 user32_sb64;
5375 } dest;
5376 caddr_t sbp;
5377 int error, my_size;
5378 kauth_filesec_t fsec;
5379 size_t xsecurity_bufsize;
5380 void * statptr;
5381
5382 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
5383 NDINIT(&nd, LOOKUP, OP_GETATTR, follow | AUDITVNPATH1,
5384 segflg, path, ctx);
5385
5386 #if NAMEDRSRCFORK
5387 int is_namedstream = 0;
5388 /* stat calls are allowed for resource forks. */
5389 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
5390 #endif
5391 error = nameiat(&nd, fd);
5392 if (error)
5393 return (error);
5394 fsec = KAUTH_FILESEC_NONE;
5395
5396 statptr = (void *)&source;
5397
5398 #if NAMEDRSRCFORK
5399 /* Grab reference on the shadow stream file vnode to
5400 * force an inactive on release which will mark it
5401 * for recycle.
5402 */
5403 if (vnode_isnamedstream(nd.ni_vp) &&
5404 (nd.ni_vp->v_parent != NULLVP) &&
5405 vnode_isshadow(nd.ni_vp)) {
5406 is_namedstream = 1;
5407 vnode_ref(nd.ni_vp);
5408 }
5409 #endif
5410
5411 error = vn_stat(nd.ni_vp, statptr, (xsecurity != USER_ADDR_NULL ? &fsec : NULL), isstat64, ctx);
5412
5413 #if NAMEDRSRCFORK
5414 if (is_namedstream) {
5415 vnode_rele(nd.ni_vp);
5416 }
5417 #endif
5418 vnode_put(nd.ni_vp);
5419 nameidone(&nd);
5420
5421 if (error)
5422 return (error);
5423 /* Zap spare fields */
5424 if (isstat64 != 0) {
5425 source.sb64.st_lspare = 0;
5426 source.sb64.st_qspare[0] = 0LL;
5427 source.sb64.st_qspare[1] = 0LL;
5428 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
5429 munge_user64_stat64(&source.sb64, &dest.user64_sb64);
5430 my_size = sizeof(dest.user64_sb64);
5431 sbp = (caddr_t)&dest.user64_sb64;
5432 } else {
5433 munge_user32_stat64(&source.sb64, &dest.user32_sb64);
5434 my_size = sizeof(dest.user32_sb64);
5435 sbp = (caddr_t)&dest.user32_sb64;
5436 }
5437 /*
5438 * Check if we raced (post lookup) against the last unlink of a file.
5439 */
5440 if ((source.sb64.st_nlink == 0) && S_ISREG(source.sb64.st_mode)) {
5441 source.sb64.st_nlink = 1;
5442 }
5443 } else {
5444 source.sb.st_lspare = 0;
5445 source.sb.st_qspare[0] = 0LL;
5446 source.sb.st_qspare[1] = 0LL;
5447 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
5448 munge_user64_stat(&source.sb, &dest.user64_sb);
5449 my_size = sizeof(dest.user64_sb);
5450 sbp = (caddr_t)&dest.user64_sb;
5451 } else {
5452 munge_user32_stat(&source.sb, &dest.user32_sb);
5453 my_size = sizeof(dest.user32_sb);
5454 sbp = (caddr_t)&dest.user32_sb;
5455 }
5456
5457 /*
5458 * Check if we raced (post lookup) against the last unlink of a file.
5459 */
5460 if ((source.sb.st_nlink == 0) && S_ISREG(source.sb.st_mode)) {
5461 source.sb.st_nlink = 1;
5462 }
5463 }
5464 if ((error = copyout(sbp, ub, my_size)) != 0)
5465 goto out;
5466
5467 /* caller wants extended security information? */
5468 if (xsecurity != USER_ADDR_NULL) {
5469
5470 /* did we get any? */
5471 if (fsec == KAUTH_FILESEC_NONE) {
5472 if (susize(xsecurity_size, 0) != 0) {
5473 error = EFAULT;
5474 goto out;
5475 }
5476 } else {
5477 /* find the user buffer size */
5478 xsecurity_bufsize = fusize(xsecurity_size);
5479
5480 /* copy out the actual data size */
5481 if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
5482 error = EFAULT;
5483 goto out;
5484 }
5485
5486 /* if the caller supplied enough room, copy out to it */
5487 if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec))
5488 error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
5489 }
5490 }
5491 out:
5492 if (fsec != KAUTH_FILESEC_NONE)
5493 kauth_filesec_free(fsec);
5494 return (error);
5495 }
5496
5497 /*
5498 * stat_extended: Get file status; with extended security (ACL).
5499 *
5500 * Parameters: p (ignored)
5501 * uap User argument descriptor (see below)
5502 * retval (ignored)
5503 *
5504 * Indirect: uap->path Path of file to get status from
5505 * uap->ub User buffer (holds file status info)
5506 * uap->xsecurity ACL to get (extended security)
5507 * uap->xsecurity_size Size of ACL
5508 *
5509 * Returns: 0 Success
5510 * !0 errno value
5511 *
5512 */
5513 int
5514 stat_extended(__unused proc_t p, struct stat_extended_args *uap,
5515 __unused int32_t *retval)
5516 {
5517 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5518 uap->xsecurity, uap->xsecurity_size, 0, UIO_USERSPACE, AT_FDCWD,
5519 0));
5520 }
5521
5522 /*
5523 * Returns: 0 Success
5524 * fstatat_internal:??? [see fstatat_internal() in this file]
5525 */
5526 int
5527 stat(__unused proc_t p, struct stat_args *uap, __unused int32_t *retval)
5528 {
5529 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5530 0, 0, 0, UIO_USERSPACE, AT_FDCWD, 0));
5531 }
5532
5533 int
5534 stat64(__unused proc_t p, struct stat64_args *uap, __unused int32_t *retval)
5535 {
5536 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5537 0, 0, 1, UIO_USERSPACE, AT_FDCWD, 0));
5538 }
5539
5540 /*
5541 * stat64_extended: Get file status; can handle large inode numbers; with extended security (ACL).
5542 *
5543 * Parameters: p (ignored)
5544 * uap User argument descriptor (see below)
5545 * retval (ignored)
5546 *
5547 * Indirect: uap->path Path of file to get status from
5548 * uap->ub User buffer (holds file status info)
5549 * uap->xsecurity ACL to get (extended security)
5550 * uap->xsecurity_size Size of ACL
5551 *
5552 * Returns: 0 Success
5553 * !0 errno value
5554 *
5555 */
5556 int
5557 stat64_extended(__unused proc_t p, struct stat64_extended_args *uap, __unused int32_t *retval)
5558 {
5559 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5560 uap->xsecurity, uap->xsecurity_size, 1, UIO_USERSPACE, AT_FDCWD,
5561 0));
5562 }
5563
5564 /*
5565 * lstat_extended: Get file status; does not follow links; with extended security (ACL).
5566 *
5567 * Parameters: p (ignored)
5568 * uap User argument descriptor (see below)
5569 * retval (ignored)
5570 *
5571 * Indirect: uap->path Path of file to get status from
5572 * uap->ub User buffer (holds file status info)
5573 * uap->xsecurity ACL to get (extended security)
5574 * uap->xsecurity_size Size of ACL
5575 *
5576 * Returns: 0 Success
5577 * !0 errno value
5578 *
5579 */
5580 int
5581 lstat_extended(__unused proc_t p, struct lstat_extended_args *uap, __unused int32_t *retval)
5582 {
5583 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5584 uap->xsecurity, uap->xsecurity_size, 0, UIO_USERSPACE, AT_FDCWD,
5585 AT_SYMLINK_NOFOLLOW));
5586 }
5587
5588 /*
5589 * Get file status; this version does not follow links.
5590 */
5591 int
5592 lstat(__unused proc_t p, struct lstat_args *uap, __unused int32_t *retval)
5593 {
5594 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5595 0, 0, 0, UIO_USERSPACE, AT_FDCWD, AT_SYMLINK_NOFOLLOW));
5596 }
5597
5598 int
5599 lstat64(__unused proc_t p, struct lstat64_args *uap, __unused int32_t *retval)
5600 {
5601 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5602 0, 0, 1, UIO_USERSPACE, AT_FDCWD, AT_SYMLINK_NOFOLLOW));
5603 }
5604
5605 /*
5606 * lstat64_extended: Get file status; can handle large inode numbers; does not
5607 * follow links; with extended security (ACL).
5608 *
5609 * Parameters: p (ignored)
5610 * uap User argument descriptor (see below)
5611 * retval (ignored)
5612 *
5613 * Indirect: uap->path Path of file to get status from
5614 * uap->ub User buffer (holds file status info)
5615 * uap->xsecurity ACL to get (extended security)
5616 * uap->xsecurity_size Size of ACL
5617 *
5618 * Returns: 0 Success
5619 * !0 errno value
5620 *
5621 */
5622 int
5623 lstat64_extended(__unused proc_t p, struct lstat64_extended_args *uap, __unused int32_t *retval)
5624 {
5625 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5626 uap->xsecurity, uap->xsecurity_size, 1, UIO_USERSPACE, AT_FDCWD,
5627 AT_SYMLINK_NOFOLLOW));
5628 }
5629
5630 int
5631 fstatat(__unused proc_t p, struct fstatat_args *uap, __unused int32_t *retval)
5632 {
5633 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
5634 return (EINVAL);
5635
5636 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5637 0, 0, 0, UIO_USERSPACE, uap->fd, uap->flag));
5638 }
5639
5640 int
5641 fstatat64(__unused proc_t p, struct fstatat64_args *uap,
5642 __unused int32_t *retval)
5643 {
5644 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
5645 return (EINVAL);
5646
5647 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5648 0, 0, 1, UIO_USERSPACE, uap->fd, uap->flag));
5649 }
5650
5651 /*
5652 * Get configurable pathname variables.
5653 *
5654 * Returns: 0 Success
5655 * namei:???
5656 * vn_pathconf:???
5657 *
5658 * Notes: Global implementation constants are intended to be
5659 * implemented in this function directly; all other constants
5660 * are per-FS implementation, and therefore must be handled in
5661 * each respective FS, instead.
5662 *
5663 * XXX We implement some things globally right now that should actually be
5664 * XXX per-FS; we will need to deal with this at some point.
5665 */
5666 /* ARGSUSED */
5667 int
5668 pathconf(__unused proc_t p, struct pathconf_args *uap, int32_t *retval)
5669 {
5670 int error;
5671 struct nameidata nd;
5672 vfs_context_t ctx = vfs_context_current();
5673
5674 NDINIT(&nd, LOOKUP, OP_PATHCONF, FOLLOW | AUDITVNPATH1,
5675 UIO_USERSPACE, uap->path, ctx);
5676 error = namei(&nd);
5677 if (error)
5678 return (error);
5679
5680 error = vn_pathconf(nd.ni_vp, uap->name, retval, ctx);
5681
5682 vnode_put(nd.ni_vp);
5683 nameidone(&nd);
5684 return (error);
5685 }
5686
5687 /*
5688 * Return target name of a symbolic link.
5689 */
5690 /* ARGSUSED */
5691 static int
5692 readlinkat_internal(vfs_context_t ctx, int fd, user_addr_t path,
5693 enum uio_seg seg, user_addr_t buf, size_t bufsize, enum uio_seg bufseg,
5694 int *retval)
5695 {
5696 vnode_t vp;
5697 uio_t auio;
5698 int error;
5699 struct nameidata nd;
5700 char uio_buf[ UIO_SIZEOF(1) ];
5701
5702 NDINIT(&nd, LOOKUP, OP_READLINK, NOFOLLOW | AUDITVNPATH1,
5703 seg, path, ctx);
5704
5705 error = nameiat(&nd, fd);
5706 if (error)
5707 return (error);
5708 vp = nd.ni_vp;
5709
5710 nameidone(&nd);
5711
5712 auio = uio_createwithbuffer(1, 0, bufseg, UIO_READ,
5713 &uio_buf[0], sizeof(uio_buf));
5714 uio_addiov(auio, buf, bufsize);
5715 if (vp->v_type != VLNK) {
5716 error = EINVAL;
5717 } else {
5718 #if CONFIG_MACF
5719 error = mac_vnode_check_readlink(ctx, vp);
5720 #endif
5721 if (error == 0)
5722 error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA,
5723 ctx);
5724 if (error == 0)
5725 error = VNOP_READLINK(vp, auio, ctx);
5726 }
5727 vnode_put(vp);
5728
5729 *retval = bufsize - (int)uio_resid(auio);
5730 return (error);
5731 }
5732
5733 int
5734 readlink(proc_t p, struct readlink_args *uap, int32_t *retval)
5735 {
5736 enum uio_seg procseg;
5737
5738 procseg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
5739 return (readlinkat_internal(vfs_context_current(), AT_FDCWD,
5740 CAST_USER_ADDR_T(uap->path), procseg, CAST_USER_ADDR_T(uap->buf),
5741 uap->count, procseg, retval));
5742 }
5743
5744 int
5745 readlinkat(proc_t p, struct readlinkat_args *uap, int32_t *retval)
5746 {
5747 enum uio_seg procseg;
5748
5749 procseg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
5750 return (readlinkat_internal(vfs_context_current(), uap->fd, uap->path,
5751 procseg, uap->buf, uap->bufsize, procseg, retval));
5752 }
5753
5754 /*
5755 * Change file flags.
5756 */
5757 static int
5758 chflags1(vnode_t vp, int flags, vfs_context_t ctx)
5759 {
5760 struct vnode_attr va;
5761 kauth_action_t action;
5762 int error;
5763
5764 VATTR_INIT(&va);
5765 VATTR_SET(&va, va_flags, flags);
5766
5767 #if CONFIG_MACF
5768 error = mac_vnode_check_setflags(ctx, vp, flags);
5769 if (error)
5770 goto out;
5771 #endif
5772
5773 /* request authorisation, disregard immutability */
5774 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
5775 goto out;
5776 /*
5777 * Request that the auth layer disregard those file flags it's allowed to when
5778 * authorizing this operation; we need to do this in order to be able to
5779 * clear immutable flags.
5780 */
5781 if (action && ((error = vnode_authorize(vp, NULL, action | KAUTH_VNODE_NOIMMUTABLE, ctx)) != 0))
5782 goto out;
5783 error = vnode_setattr(vp, &va, ctx);
5784
5785 #if CONFIG_MACF
5786 if (error == 0)
5787 mac_vnode_notify_setflags(ctx, vp, flags);
5788 #endif
5789
5790 if ((error == 0) && !VATTR_IS_SUPPORTED(&va, va_flags)) {
5791 error = ENOTSUP;
5792 }
5793 out:
5794 vnode_put(vp);
5795 return(error);
5796 }
5797
5798 /*
5799 * Change flags of a file given a path name.
5800 */
5801 /* ARGSUSED */
5802 int
5803 chflags(__unused proc_t p, struct chflags_args *uap, __unused int32_t *retval)
5804 {
5805 vnode_t vp;
5806 vfs_context_t ctx = vfs_context_current();
5807 int error;
5808 struct nameidata nd;
5809
5810 AUDIT_ARG(fflags, uap->flags);
5811 NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
5812 UIO_USERSPACE, uap->path, ctx);
5813 error = namei(&nd);
5814 if (error)
5815 return (error);
5816 vp = nd.ni_vp;
5817 nameidone(&nd);
5818
5819 error = chflags1(vp, uap->flags, ctx);
5820
5821 return(error);
5822 }
5823
5824 /*
5825 * Change flags of a file given a file descriptor.
5826 */
5827 /* ARGSUSED */
5828 int
5829 fchflags(__unused proc_t p, struct fchflags_args *uap, __unused int32_t *retval)
5830 {
5831 vnode_t vp;
5832 int error;
5833
5834 AUDIT_ARG(fd, uap->fd);
5835 AUDIT_ARG(fflags, uap->flags);
5836 if ( (error = file_vnode(uap->fd, &vp)) )
5837 return (error);
5838
5839 if ((error = vnode_getwithref(vp))) {
5840 file_drop(uap->fd);
5841 return(error);
5842 }
5843
5844 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5845
5846 error = chflags1(vp, uap->flags, vfs_context_current());
5847
5848 file_drop(uap->fd);
5849 return (error);
5850 }
5851
5852 /*
5853 * Change security information on a filesystem object.
5854 *
5855 * Returns: 0 Success
5856 * EPERM Operation not permitted
5857 * vnode_authattr:??? [anything vnode_authattr can return]
5858 * vnode_authorize:??? [anything vnode_authorize can return]
5859 * vnode_setattr:??? [anything vnode_setattr can return]
5860 *
5861 * Notes: If vnode_authattr or vnode_authorize return EACCES, it will be
5862 * translated to EPERM before being returned.
5863 */
5864 static int
5865 chmod_vnode(vfs_context_t ctx, vnode_t vp, struct vnode_attr *vap)
5866 {
5867 kauth_action_t action;
5868 int error;
5869
5870 AUDIT_ARG(mode, vap->va_mode);
5871 /* XXX audit new args */
5872
5873 #if NAMEDSTREAMS
5874 /* chmod calls are not allowed for resource forks. */
5875 if (vp->v_flag & VISNAMEDSTREAM) {
5876 return (EPERM);
5877 }
5878 #endif
5879
5880 #if CONFIG_MACF
5881 if (VATTR_IS_ACTIVE(vap, va_mode) &&
5882 (error = mac_vnode_check_setmode(ctx, vp, (mode_t)vap->va_mode)) != 0)
5883 return (error);
5884
5885 if (VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid)) {
5886 if ((error = mac_vnode_check_setowner(ctx, vp,
5887 VATTR_IS_ACTIVE(vap, va_uid) ? vap->va_uid : -1,
5888 VATTR_IS_ACTIVE(vap, va_gid) ? vap->va_gid : -1)))
5889 return (error);
5890 }
5891
5892 if (VATTR_IS_ACTIVE(vap, va_acl) &&
5893 (error = mac_vnode_check_setacl(ctx, vp, vap->va_acl)))
5894 return (error);
5895 #endif
5896
5897 /* make sure that the caller is allowed to set this security information */
5898 if (((error = vnode_authattr(vp, vap, &action, ctx)) != 0) ||
5899 ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
5900 if (error == EACCES)
5901 error = EPERM;
5902 return(error);
5903 }
5904
5905 if ((error = vnode_setattr(vp, vap, ctx)) != 0)
5906 return (error);
5907
5908 #if CONFIG_MACF
5909 if (VATTR_IS_ACTIVE(vap, va_mode))
5910 mac_vnode_notify_setmode(ctx, vp, (mode_t)vap->va_mode);
5911
5912 if (VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid))
5913 mac_vnode_notify_setowner(ctx, vp,
5914 VATTR_IS_ACTIVE(vap, va_uid) ? vap->va_uid : -1,
5915 VATTR_IS_ACTIVE(vap, va_gid) ? vap->va_gid : -1);
5916
5917 if (VATTR_IS_ACTIVE(vap, va_acl))
5918 mac_vnode_notify_setacl(ctx, vp, vap->va_acl);
5919 #endif
5920
5921 return (error);
5922 }
5923
5924
5925 /*
5926 * Change mode of a file given a path name.
5927 *
5928 * Returns: 0 Success
5929 * namei:??? [anything namei can return]
5930 * chmod_vnode:??? [anything chmod_vnode can return]
5931 */
5932 static int
5933 chmodat(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap,
5934 int fd, int flag, enum uio_seg segflg)
5935 {
5936 struct nameidata nd;
5937 int follow, error;
5938
5939 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
5940 NDINIT(&nd, LOOKUP, OP_SETATTR, follow | AUDITVNPATH1,
5941 segflg, path, ctx);
5942 if ((error = nameiat(&nd, fd)))
5943 return (error);
5944 error = chmod_vnode(ctx, nd.ni_vp, vap);
5945 vnode_put(nd.ni_vp);
5946 nameidone(&nd);
5947 return(error);
5948 }
5949
5950 /*
5951 * chmod_extended: Change the mode of a file given a path name; with extended
5952 * argument list (including extended security (ACL)).
5953 *
5954 * Parameters: p Process requesting the open
5955 * uap User argument descriptor (see below)
5956 * retval (ignored)
5957 *
5958 * Indirect: uap->path Path to object (same as 'chmod')
5959 * uap->uid UID to set
5960 * uap->gid GID to set
5961 * uap->mode File mode to set (same as 'chmod')
5962 * uap->xsecurity ACL to set (or delete)
5963 *
5964 * Returns: 0 Success
5965 * !0 errno value
5966 *
5967 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
5968 *
5969 * XXX: We should enummerate the possible errno values here, and where
5970 * in the code they originated.
5971 */
5972 int
5973 chmod_extended(__unused proc_t p, struct chmod_extended_args *uap, __unused int32_t *retval)
5974 {
5975 int error;
5976 struct vnode_attr va;
5977 kauth_filesec_t xsecdst;
5978
5979 AUDIT_ARG(owner, uap->uid, uap->gid);
5980
5981 VATTR_INIT(&va);
5982 if (uap->mode != -1)
5983 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
5984 if (uap->uid != KAUTH_UID_NONE)
5985 VATTR_SET(&va, va_uid, uap->uid);
5986 if (uap->gid != KAUTH_GID_NONE)
5987 VATTR_SET(&va, va_gid, uap->gid);
5988
5989 xsecdst = NULL;
5990 switch(uap->xsecurity) {
5991 /* explicit remove request */
5992 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
5993 VATTR_SET(&va, va_acl, NULL);
5994 break;
5995 /* not being set */
5996 case USER_ADDR_NULL:
5997 break;
5998 default:
5999 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
6000 return(error);
6001 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
6002 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va.va_acl->acl_entrycount);
6003 }
6004
6005 error = chmodat(vfs_context_current(), uap->path, &va, AT_FDCWD, 0,
6006 UIO_USERSPACE);
6007
6008 if (xsecdst != NULL)
6009 kauth_filesec_free(xsecdst);
6010 return(error);
6011 }
6012
6013 /*
6014 * Returns: 0 Success
6015 * chmodat:??? [anything chmodat can return]
6016 */
6017 static int
6018 fchmodat_internal(vfs_context_t ctx, user_addr_t path, int mode, int fd,
6019 int flag, enum uio_seg segflg)
6020 {
6021 struct vnode_attr va;
6022
6023 VATTR_INIT(&va);
6024 VATTR_SET(&va, va_mode, mode & ALLPERMS);
6025
6026 return (chmodat(ctx, path, &va, fd, flag, segflg));
6027 }
6028
6029 int
6030 chmod(__unused proc_t p, struct chmod_args *uap, __unused int32_t *retval)
6031 {
6032 return (fchmodat_internal(vfs_context_current(), uap->path, uap->mode,
6033 AT_FDCWD, 0, UIO_USERSPACE));
6034 }
6035
6036 int
6037 fchmodat(__unused proc_t p, struct fchmodat_args *uap, __unused int32_t *retval)
6038 {
6039 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
6040 return (EINVAL);
6041
6042 return (fchmodat_internal(vfs_context_current(), uap->path, uap->mode,
6043 uap->fd, uap->flag, UIO_USERSPACE));
6044 }
6045
6046 /*
6047 * Change mode of a file given a file descriptor.
6048 */
6049 static int
6050 fchmod1(__unused proc_t p, int fd, struct vnode_attr *vap)
6051 {
6052 vnode_t vp;
6053 int error;
6054
6055 AUDIT_ARG(fd, fd);
6056
6057 if ((error = file_vnode(fd, &vp)) != 0)
6058 return (error);
6059 if ((error = vnode_getwithref(vp)) != 0) {
6060 file_drop(fd);
6061 return(error);
6062 }
6063 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6064
6065 error = chmod_vnode(vfs_context_current(), vp, vap);
6066 (void)vnode_put(vp);
6067 file_drop(fd);
6068
6069 return (error);
6070 }
6071
6072 /*
6073 * fchmod_extended: Change mode of a file given a file descriptor; with
6074 * extended argument list (including extended security (ACL)).
6075 *
6076 * Parameters: p Process requesting to change file mode
6077 * uap User argument descriptor (see below)
6078 * retval (ignored)
6079 *
6080 * Indirect: uap->mode File mode to set (same as 'chmod')
6081 * uap->uid UID to set
6082 * uap->gid GID to set
6083 * uap->xsecurity ACL to set (or delete)
6084 * uap->fd File descriptor of file to change mode
6085 *
6086 * Returns: 0 Success
6087 * !0 errno value
6088 *
6089 */
6090 int
6091 fchmod_extended(proc_t p, struct fchmod_extended_args *uap, __unused int32_t *retval)
6092 {
6093 int error;
6094 struct vnode_attr va;
6095 kauth_filesec_t xsecdst;
6096
6097 AUDIT_ARG(owner, uap->uid, uap->gid);
6098
6099 VATTR_INIT(&va);
6100 if (uap->mode != -1)
6101 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
6102 if (uap->uid != KAUTH_UID_NONE)
6103 VATTR_SET(&va, va_uid, uap->uid);
6104 if (uap->gid != KAUTH_GID_NONE)
6105 VATTR_SET(&va, va_gid, uap->gid);
6106
6107 xsecdst = NULL;
6108 switch(uap->xsecurity) {
6109 case USER_ADDR_NULL:
6110 VATTR_SET(&va, va_acl, NULL);
6111 break;
6112 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
6113 VATTR_SET(&va, va_acl, NULL);
6114 break;
6115 /* not being set */
6116 case CAST_USER_ADDR_T(-1):
6117 break;
6118 default:
6119 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
6120 return(error);
6121 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
6122 }
6123
6124 error = fchmod1(p, uap->fd, &va);
6125
6126
6127 switch(uap->xsecurity) {
6128 case USER_ADDR_NULL:
6129 case CAST_USER_ADDR_T(-1):
6130 break;
6131 default:
6132 if (xsecdst != NULL)
6133 kauth_filesec_free(xsecdst);
6134 }
6135 return(error);
6136 }
6137
6138 int
6139 fchmod(proc_t p, struct fchmod_args *uap, __unused int32_t *retval)
6140 {
6141 struct vnode_attr va;
6142
6143 VATTR_INIT(&va);
6144 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
6145
6146 return(fchmod1(p, uap->fd, &va));
6147 }
6148
6149
6150 /*
6151 * Set ownership given a path name.
6152 */
6153 /* ARGSUSED */
6154 static int
6155 fchownat_internal(vfs_context_t ctx, int fd, user_addr_t path, uid_t uid,
6156 gid_t gid, int flag, enum uio_seg segflg)
6157 {
6158 vnode_t vp;
6159 struct vnode_attr va;
6160 int error;
6161 struct nameidata nd;
6162 int follow;
6163 kauth_action_t action;
6164
6165 AUDIT_ARG(owner, uid, gid);
6166
6167 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
6168 NDINIT(&nd, LOOKUP, OP_SETATTR, follow | AUDITVNPATH1, segflg,
6169 path, ctx);
6170 error = nameiat(&nd, fd);
6171 if (error)
6172 return (error);
6173 vp = nd.ni_vp;
6174
6175 nameidone(&nd);
6176
6177 VATTR_INIT(&va);
6178 if (uid != (uid_t)VNOVAL)
6179 VATTR_SET(&va, va_uid, uid);
6180 if (gid != (gid_t)VNOVAL)
6181 VATTR_SET(&va, va_gid, gid);
6182
6183 #if CONFIG_MACF
6184 error = mac_vnode_check_setowner(ctx, vp, uid, gid);
6185 if (error)
6186 goto out;
6187 #endif
6188
6189 /* preflight and authorize attribute changes */
6190 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
6191 goto out;
6192 if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
6193 goto out;
6194 error = vnode_setattr(vp, &va, ctx);
6195
6196 #if CONFIG_MACF
6197 if (error == 0)
6198 mac_vnode_notify_setowner(ctx, vp, uid, gid);
6199 #endif
6200
6201 out:
6202 /*
6203 * EACCES is only allowed from namei(); permissions failure should
6204 * return EPERM, so we need to translate the error code.
6205 */
6206 if (error == EACCES)
6207 error = EPERM;
6208
6209 vnode_put(vp);
6210 return (error);
6211 }
6212
6213 int
6214 chown(__unused proc_t p, struct chown_args *uap, __unused int32_t *retval)
6215 {
6216 return (fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path,
6217 uap->uid, uap->gid, 0, UIO_USERSPACE));
6218 }
6219
6220 int
6221 lchown(__unused proc_t p, struct lchown_args *uap, __unused int32_t *retval)
6222 {
6223 return (fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path,
6224 uap->owner, uap->group, AT_SYMLINK_NOFOLLOW, UIO_USERSPACE));
6225 }
6226
6227 int
6228 fchownat(__unused proc_t p, struct fchownat_args *uap, __unused int32_t *retval)
6229 {
6230 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
6231 return (EINVAL);
6232
6233 return (fchownat_internal(vfs_context_current(), uap->fd, uap->path,
6234 uap->uid, uap->gid, uap->flag, UIO_USERSPACE));
6235 }
6236
6237 /*
6238 * Set ownership given a file descriptor.
6239 */
6240 /* ARGSUSED */
6241 int
6242 fchown(__unused proc_t p, struct fchown_args *uap, __unused int32_t *retval)
6243 {
6244 struct vnode_attr va;
6245 vfs_context_t ctx = vfs_context_current();
6246 vnode_t vp;
6247 int error;
6248 kauth_action_t action;
6249
6250 AUDIT_ARG(owner, uap->uid, uap->gid);
6251 AUDIT_ARG(fd, uap->fd);
6252
6253 if ( (error = file_vnode(uap->fd, &vp)) )
6254 return (error);
6255
6256 if ( (error = vnode_getwithref(vp)) ) {
6257 file_drop(uap->fd);
6258 return(error);
6259 }
6260 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6261
6262 VATTR_INIT(&va);
6263 if (uap->uid != VNOVAL)
6264 VATTR_SET(&va, va_uid, uap->uid);
6265 if (uap->gid != VNOVAL)
6266 VATTR_SET(&va, va_gid, uap->gid);
6267
6268 #if NAMEDSTREAMS
6269 /* chown calls are not allowed for resource forks. */
6270 if (vp->v_flag & VISNAMEDSTREAM) {
6271 error = EPERM;
6272 goto out;
6273 }
6274 #endif
6275
6276 #if CONFIG_MACF
6277 error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
6278 if (error)
6279 goto out;
6280 #endif
6281
6282 /* preflight and authorize attribute changes */
6283 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
6284 goto out;
6285 if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
6286 if (error == EACCES)
6287 error = EPERM;
6288 goto out;
6289 }
6290 error = vnode_setattr(vp, &va, ctx);
6291
6292 #if CONFIG_MACF
6293 if (error == 0)
6294 mac_vnode_notify_setowner(ctx, vp, uap->uid, uap->gid);
6295 #endif
6296
6297 out:
6298 (void)vnode_put(vp);
6299 file_drop(uap->fd);
6300 return (error);
6301 }
6302
6303 static int
6304 getutimes(user_addr_t usrtvp, struct timespec *tsp)
6305 {
6306 int error;
6307
6308 if (usrtvp == USER_ADDR_NULL) {
6309 struct timeval old_tv;
6310 /* XXX Y2038 bug because of microtime argument */
6311 microtime(&old_tv);
6312 TIMEVAL_TO_TIMESPEC(&old_tv, &tsp[0]);
6313 tsp[1] = tsp[0];
6314 } else {
6315 if (IS_64BIT_PROCESS(current_proc())) {
6316 struct user64_timeval tv[2];
6317 error = copyin(usrtvp, (void *)tv, sizeof(tv));
6318 if (error)
6319 return (error);
6320 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
6321 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
6322 } else {
6323 struct user32_timeval tv[2];
6324 error = copyin(usrtvp, (void *)tv, sizeof(tv));
6325 if (error)
6326 return (error);
6327 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
6328 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
6329 }
6330 }
6331 return 0;
6332 }
6333
6334 static int
6335 setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts,
6336 int nullflag)
6337 {
6338 int error;
6339 struct vnode_attr va;
6340 kauth_action_t action;
6341
6342 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6343
6344 VATTR_INIT(&va);
6345 VATTR_SET(&va, va_access_time, ts[0]);
6346 VATTR_SET(&va, va_modify_time, ts[1]);
6347 if (nullflag)
6348 va.va_vaflags |= VA_UTIMES_NULL;
6349
6350 #if NAMEDSTREAMS
6351 /* utimes calls are not allowed for resource forks. */
6352 if (vp->v_flag & VISNAMEDSTREAM) {
6353 error = EPERM;
6354 goto out;
6355 }
6356 #endif
6357
6358 #if CONFIG_MACF
6359 error = mac_vnode_check_setutimes(ctx, vp, ts[0], ts[1]);
6360 if (error)
6361 goto out;
6362 #endif
6363 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
6364 if (!nullflag && error == EACCES)
6365 error = EPERM;
6366 goto out;
6367 }
6368
6369 /* since we may not need to auth anything, check here */
6370 if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
6371 if (!nullflag && error == EACCES)
6372 error = EPERM;
6373 goto out;
6374 }
6375 error = vnode_setattr(vp, &va, ctx);
6376
6377 #if CONFIG_MACF
6378 if (error == 0)
6379 mac_vnode_notify_setutimes(ctx, vp, ts[0], ts[1]);
6380 #endif
6381
6382 out:
6383 return error;
6384 }
6385
6386 /*
6387 * Set the access and modification times of a file.
6388 */
6389 /* ARGSUSED */
6390 int
6391 utimes(__unused proc_t p, struct utimes_args *uap, __unused int32_t *retval)
6392 {
6393 struct timespec ts[2];
6394 user_addr_t usrtvp;
6395 int error;
6396 struct nameidata nd;
6397 vfs_context_t ctx = vfs_context_current();
6398
6399 /*
6400 * AUDIT: Needed to change the order of operations to do the
6401 * name lookup first because auditing wants the path.
6402 */
6403 NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
6404 UIO_USERSPACE, uap->path, ctx);
6405 error = namei(&nd);
6406 if (error)
6407 return (error);
6408 nameidone(&nd);
6409
6410 /*
6411 * Fetch the user-supplied time. If usrtvp is USER_ADDR_NULL, we fetch
6412 * the current time instead.
6413 */
6414 usrtvp = uap->tptr;
6415 if ((error = getutimes(usrtvp, ts)) != 0)
6416 goto out;
6417
6418 error = setutimes(ctx, nd.ni_vp, ts, usrtvp == USER_ADDR_NULL);
6419
6420 out:
6421 vnode_put(nd.ni_vp);
6422 return (error);
6423 }
6424
6425 /*
6426 * Set the access and modification times of a file.
6427 */
6428 /* ARGSUSED */
6429 int
6430 futimes(__unused proc_t p, struct futimes_args *uap, __unused int32_t *retval)
6431 {
6432 struct timespec ts[2];
6433 vnode_t vp;
6434 user_addr_t usrtvp;
6435 int error;
6436
6437 AUDIT_ARG(fd, uap->fd);
6438 usrtvp = uap->tptr;
6439 if ((error = getutimes(usrtvp, ts)) != 0)
6440 return (error);
6441 if ((error = file_vnode(uap->fd, &vp)) != 0)
6442 return (error);
6443 if((error = vnode_getwithref(vp))) {
6444 file_drop(uap->fd);
6445 return(error);
6446 }
6447
6448 error = setutimes(vfs_context_current(), vp, ts, usrtvp == 0);
6449 vnode_put(vp);
6450 file_drop(uap->fd);
6451 return(error);
6452 }
6453
6454 /*
6455 * Truncate a file given its path name.
6456 */
6457 /* ARGSUSED */
6458 int
6459 truncate(__unused proc_t p, struct truncate_args *uap, __unused int32_t *retval)
6460 {
6461 vnode_t vp;
6462 struct vnode_attr va;
6463 vfs_context_t ctx = vfs_context_current();
6464 int error;
6465 struct nameidata nd;
6466 kauth_action_t action;
6467
6468 if (uap->length < 0)
6469 return(EINVAL);
6470 NDINIT(&nd, LOOKUP, OP_TRUNCATE, FOLLOW | AUDITVNPATH1,
6471 UIO_USERSPACE, uap->path, ctx);
6472 if ((error = namei(&nd)))
6473 return (error);
6474 vp = nd.ni_vp;
6475
6476 nameidone(&nd);
6477
6478 VATTR_INIT(&va);
6479 VATTR_SET(&va, va_data_size, uap->length);
6480
6481 #if CONFIG_MACF
6482 error = mac_vnode_check_truncate(ctx, NOCRED, vp);
6483 if (error)
6484 goto out;
6485 #endif
6486
6487 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
6488 goto out;
6489 if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
6490 goto out;
6491 error = vnode_setattr(vp, &va, ctx);
6492
6493 #if CONFIG_MACF
6494 if (error == 0)
6495 mac_vnode_notify_truncate(ctx, NOCRED, vp);
6496 #endif
6497
6498 out:
6499 vnode_put(vp);
6500 return (error);
6501 }
6502
6503 /*
6504 * Truncate a file given a file descriptor.
6505 */
6506 /* ARGSUSED */
6507 int
6508 ftruncate(proc_t p, struct ftruncate_args *uap, int32_t *retval)
6509 {
6510 vfs_context_t ctx = vfs_context_current();
6511 struct vnode_attr va;
6512 vnode_t vp;
6513 struct fileproc *fp;
6514 int error ;
6515 int fd = uap->fd;
6516
6517 AUDIT_ARG(fd, uap->fd);
6518 if (uap->length < 0)
6519 return(EINVAL);
6520
6521 if ( (error = fp_lookup(p,fd,&fp,0)) ) {
6522 return(error);
6523 }
6524
6525 switch (FILEGLOB_DTYPE(fp->f_fglob)) {
6526 case DTYPE_PSXSHM:
6527 error = pshm_truncate(p, fp, uap->fd, uap->length, retval);
6528 goto out;
6529 case DTYPE_VNODE:
6530 break;
6531 default:
6532 error = EINVAL;
6533 goto out;
6534 }
6535
6536 vp = (vnode_t)fp->f_fglob->fg_data;
6537
6538 if ((fp->f_fglob->fg_flag & FWRITE) == 0) {
6539 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
6540 error = EINVAL;
6541 goto out;
6542 }
6543
6544 if ((error = vnode_getwithref(vp)) != 0) {
6545 goto out;
6546 }
6547
6548 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6549
6550 #if CONFIG_MACF
6551 error = mac_vnode_check_truncate(ctx,
6552 fp->f_fglob->fg_cred, vp);
6553 if (error) {
6554 (void)vnode_put(vp);
6555 goto out;
6556 }
6557 #endif
6558 VATTR_INIT(&va);
6559 VATTR_SET(&va, va_data_size, uap->length);
6560 error = vnode_setattr(vp, &va, ctx);
6561
6562 #if CONFIG_MACF
6563 if (error == 0)
6564 mac_vnode_notify_truncate(ctx, fp->f_fglob->fg_cred, vp);
6565 #endif
6566
6567 (void)vnode_put(vp);
6568 out:
6569 file_drop(fd);
6570 return (error);
6571 }
6572
6573
6574 /*
6575 * Sync an open file with synchronized I/O _file_ integrity completion
6576 */
6577 /* ARGSUSED */
6578 int
6579 fsync(proc_t p, struct fsync_args *uap, __unused int32_t *retval)
6580 {
6581 __pthread_testcancel(1);
6582 return(fsync_common(p, uap, MNT_WAIT));
6583 }
6584
6585
6586 /*
6587 * Sync an open file with synchronized I/O _file_ integrity completion
6588 *
6589 * Notes: This is a legacy support function that does not test for
6590 * thread cancellation points.
6591 */
6592 /* ARGSUSED */
6593 int
6594 fsync_nocancel(proc_t p, struct fsync_nocancel_args *uap, __unused int32_t *retval)
6595 {
6596 return(fsync_common(p, (struct fsync_args *)uap, MNT_WAIT));
6597 }
6598
6599
6600 /*
6601 * Sync an open file with synchronized I/O _data_ integrity completion
6602 */
6603 /* ARGSUSED */
6604 int
6605 fdatasync(proc_t p, struct fdatasync_args *uap, __unused int32_t *retval)
6606 {
6607 __pthread_testcancel(1);
6608 return(fsync_common(p, (struct fsync_args *)uap, MNT_DWAIT));
6609 }
6610
6611
6612 /*
6613 * fsync_common
6614 *
6615 * Common fsync code to support both synchronized I/O file integrity completion
6616 * (normal fsync) and synchronized I/O data integrity completion (fdatasync).
6617 *
6618 * If 'flags' is MNT_DWAIT, the caller is requesting data integrity, which
6619 * will only guarantee that the file data contents are retrievable. If
6620 * 'flags' is MNT_WAIT, the caller is rewuesting file integrity, which also
6621 * includes additional metadata unnecessary for retrieving the file data
6622 * contents, such as atime, mtime, ctime, etc., also be committed to stable
6623 * storage.
6624 *
6625 * Parameters: p The process
6626 * uap->fd The descriptor to synchronize
6627 * flags The data integrity flags
6628 *
6629 * Returns: int Success
6630 * fp_getfvp:EBADF Bad file descriptor
6631 * fp_getfvp:ENOTSUP fd does not refer to a vnode
6632 * VNOP_FSYNC:??? unspecified
6633 *
6634 * Notes: We use struct fsync_args because it is a short name, and all
6635 * caller argument structures are otherwise identical.
6636 */
6637 static int
6638 fsync_common(proc_t p, struct fsync_args *uap, int flags)
6639 {
6640 vnode_t vp;
6641 struct fileproc *fp;
6642 vfs_context_t ctx = vfs_context_current();
6643 int error;
6644
6645 AUDIT_ARG(fd, uap->fd);
6646
6647 if ( (error = fp_getfvp(p, uap->fd, &fp, &vp)) )
6648 return (error);
6649 if ( (error = vnode_getwithref(vp)) ) {
6650 file_drop(uap->fd);
6651 return(error);
6652 }
6653
6654 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6655
6656 error = VNOP_FSYNC(vp, flags, ctx);
6657
6658 #if NAMEDRSRCFORK
6659 /* Sync resource fork shadow file if necessary. */
6660 if ((error == 0) &&
6661 (vp->v_flag & VISNAMEDSTREAM) &&
6662 (vp->v_parent != NULLVP) &&
6663 vnode_isshadow(vp) &&
6664 (fp->f_flags & FP_WRITTEN)) {
6665 (void) vnode_flushnamedstream(vp->v_parent, vp, ctx);
6666 }
6667 #endif
6668
6669 (void)vnode_put(vp);
6670 file_drop(uap->fd);
6671 return (error);
6672 }
6673
6674 /*
6675 * Duplicate files. Source must be a file, target must be a file or
6676 * must not exist.
6677 *
6678 * XXX Copyfile authorisation checking is woefully inadequate, and will not
6679 * perform inheritance correctly.
6680 */
6681 /* ARGSUSED */
6682 int
6683 copyfile(__unused proc_t p, struct copyfile_args *uap, __unused int32_t *retval)
6684 {
6685 vnode_t tvp, fvp, tdvp, sdvp;
6686 struct nameidata fromnd, tond;
6687 int error;
6688 vfs_context_t ctx = vfs_context_current();
6689 #if CONFIG_MACF
6690 struct filedesc *fdp = (vfs_context_proc(ctx))->p_fd;
6691 struct vnode_attr va;
6692 #endif
6693
6694 /* Check that the flags are valid. */
6695
6696 if (uap->flags & ~CPF_MASK) {
6697 return(EINVAL);
6698 }
6699
6700 NDINIT(&fromnd, LOOKUP, OP_COPYFILE, AUDITVNPATH1,
6701 UIO_USERSPACE, uap->from, ctx);
6702 if ((error = namei(&fromnd)))
6703 return (error);
6704 fvp = fromnd.ni_vp;
6705
6706 NDINIT(&tond, CREATE, OP_LINK,
6707 LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNPATH2 | CN_NBMOUNTLOOK,
6708 UIO_USERSPACE, uap->to, ctx);
6709 if ((error = namei(&tond))) {
6710 goto out1;
6711 }
6712 tdvp = tond.ni_dvp;
6713 tvp = tond.ni_vp;
6714
6715 if (tvp != NULL) {
6716 if (!(uap->flags & CPF_OVERWRITE)) {
6717 error = EEXIST;
6718 goto out;
6719 }
6720 }
6721
6722 if (fvp->v_type == VDIR || (tvp && tvp->v_type == VDIR)) {
6723 error = EISDIR;
6724 goto out;
6725 }
6726
6727 /* This calls existing MAC hooks for open */
6728 if ((error = vn_authorize_open_existing(fvp, &fromnd.ni_cnd, FREAD, ctx,
6729 NULL))) {
6730 goto out;
6731 }
6732
6733 if (tvp) {
6734 /*
6735 * See unlinkat_internal for an explanation of the potential
6736 * ENOENT from the MAC hook but the gist is that the MAC hook
6737 * can fail because vn_getpath isn't able to return the full
6738 * path. We choose to ignore this failure.
6739 */
6740 error = vn_authorize_unlink(tdvp, tvp, &tond.ni_cnd, ctx, NULL);
6741 if (error && error != ENOENT)
6742 goto out;
6743 error = 0;
6744 }
6745
6746 #if CONFIG_MACF
6747 VATTR_INIT(&va);
6748 VATTR_SET(&va, va_type, fvp->v_type);
6749 /* Mask off all but regular access permissions */
6750 VATTR_SET(&va, va_mode,
6751 ((((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT) & ACCESSPERMS));
6752 error = mac_vnode_check_create(ctx, tdvp, &tond.ni_cnd, &va);
6753 if (error)
6754 goto out;
6755 #endif /* CONFIG_MACF */
6756
6757 if ((error = vnode_authorize(tdvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
6758 goto out;
6759
6760 if (fvp == tdvp)
6761 error = EINVAL;
6762 /*
6763 * If source is the same as the destination (that is the
6764 * same inode number) then there is nothing to do.
6765 * (fixed to have POSIX semantics - CSM 3/2/98)
6766 */
6767 if (fvp == tvp)
6768 error = -1;
6769 if (!error)
6770 error = VNOP_COPYFILE(fvp, tdvp, tvp, &tond.ni_cnd, uap->mode, uap->flags, ctx);
6771 out:
6772 sdvp = tond.ni_startdir;
6773 /*
6774 * nameidone has to happen before we vnode_put(tdvp)
6775 * since it may need to release the fs_nodelock on the tdvp
6776 */
6777 nameidone(&tond);
6778
6779 if (tvp)
6780 vnode_put(tvp);
6781 vnode_put(tdvp);
6782 vnode_put(sdvp);
6783 out1:
6784 vnode_put(fvp);
6785
6786 nameidone(&fromnd);
6787
6788 if (error == -1)
6789 return (0);
6790 return (error);
6791 }
6792
6793 #define CLONE_SNAPSHOT_FALLBACKS_ENABLED 1
6794
6795 /*
6796 * Helper function for doing clones. The caller is expected to provide an
6797 * iocounted source vnode and release it.
6798 */
6799 static int
6800 clonefile_internal(vnode_t fvp, boolean_t data_read_authorised, int dst_dirfd,
6801 user_addr_t dst, uint32_t flags, vfs_context_t ctx)
6802 {
6803 vnode_t tvp, tdvp;
6804 struct nameidata tond;
6805 int error;
6806 int follow;
6807 boolean_t free_acl;
6808 boolean_t attr_cleanup;
6809 enum vtype v_type;
6810 kauth_action_t action;
6811 struct componentname *cnp;
6812 uint32_t defaulted;
6813 struct vnode_attr va;
6814
6815 v_type = vnode_vtype(fvp);
6816 switch (v_type) {
6817 case VLNK:
6818 /* FALLTHRU */
6819 case VREG:
6820 action = KAUTH_VNODE_ADD_FILE;
6821 break;
6822 case VDIR:
6823 if (vnode_isvroot(fvp) || vnode_ismount(fvp) ||
6824 fvp->v_mountedhere) {
6825 return (EINVAL);
6826 }
6827 action = KAUTH_VNODE_ADD_SUBDIRECTORY;
6828 break;
6829 default:
6830 return (EINVAL);
6831 }
6832
6833 AUDIT_ARG(fd2, dst_dirfd);
6834 AUDIT_ARG(value32, flags);
6835
6836 follow = (flags & CLONE_NOFOLLOW) ? NOFOLLOW : FOLLOW;
6837 NDINIT(&tond, CREATE, OP_LINK, follow | WANTPARENT | AUDITVNPATH2,
6838 UIO_USERSPACE, dst, ctx);
6839 if ((error = nameiat(&tond, dst_dirfd)))
6840 return (error);
6841 cnp = &tond.ni_cnd;
6842 tdvp = tond.ni_dvp;
6843 tvp = tond.ni_vp;
6844
6845 free_acl = FALSE;
6846 attr_cleanup = FALSE;
6847
6848 if (tvp != NULL) {
6849 error = EEXIST;
6850 goto out;
6851 }
6852
6853 if (vnode_mount(tdvp) != vnode_mount(fvp)) {
6854 error = EXDEV;
6855 goto out;
6856 }
6857
6858 #if CONFIG_MACF
6859 if ((error = mac_vnode_check_clone(ctx, tdvp, fvp, cnp)))
6860 goto out;
6861 #endif
6862 if ((error = vnode_authorize(tdvp, NULL, action, ctx)))
6863 goto out;
6864
6865 action = KAUTH_VNODE_GENERIC_READ_BITS;
6866 if (data_read_authorised)
6867 action &= ~KAUTH_VNODE_READ_DATA;
6868 if ((error = vnode_authorize(fvp, NULL, action, ctx)))
6869 goto out;
6870
6871 /*
6872 * certain attributes may need to be changed from the source, we ask for
6873 * those here.
6874 */
6875 VATTR_INIT(&va);
6876 VATTR_WANTED(&va, va_type);
6877 VATTR_WANTED(&va, va_mode);
6878 VATTR_WANTED(&va, va_flags);
6879 VATTR_WANTED(&va, va_acl);
6880
6881 if ((error = vnode_getattr(fvp, &va, ctx)) != 0)
6882 goto out;
6883
6884 if (!VATTR_IS_SUPPORTED(&va, va_acl))
6885 VATTR_CLEAR_ACTIVE(&va, va_acl);
6886 else if (va.va_acl != NULL)
6887 free_acl = TRUE;
6888
6889 if (!VATTR_IS_SUPPORTED(&va, va_mode)) {
6890 VATTR_CLEAR_ACTIVE(&va, va_mode);
6891 } else {
6892 proc_t p = vfs_context_proc(ctx);
6893
6894 VATTR_SET(&va, va_mode,
6895 (va.va_mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
6896 }
6897
6898 if (!VATTR_IS_SUPPORTED(&va, va_flags)) {
6899 VATTR_CLEAR_ACTIVE(&va, va_flags);
6900 } else if (va.va_flags & SF_RESTRICTED) {
6901 /*
6902 * Turn off SF_RESTRICTED from source, if the destination needs
6903 * it, it will be handled in vnode_authattr_new.
6904 */
6905 VATTR_SET(&va, va_flags, (va.va_flags & ~SF_RESTRICTED));
6906 }
6907
6908 /* Handle ACL inheritance, initialize vap. */
6909 if (v_type == VLNK) {
6910 error = vnode_authattr_new(tdvp, &va, 0, ctx);
6911 } else {
6912 error = vn_attribute_prepare(tdvp, &va, &defaulted, ctx);
6913 attr_cleanup = TRUE;
6914 }
6915
6916 if (error) {
6917 attr_cleanup = FALSE;
6918 goto out;
6919 }
6920
6921 error = VNOP_CLONEFILE(fvp, tdvp, &tvp, cnp, &va, flags, ctx);
6922
6923 if (!error && tvp) {
6924 int update_flags = 0;
6925 #if CONFIG_FSE
6926 int fsevent;
6927 #endif /* CONFIG_FSE */
6928
6929 #if CONFIG_MACF
6930 (void)vnode_label(vnode_mount(tvp), tdvp, tvp, cnp,
6931 VNODE_LABEL_CREATE, ctx);
6932 #endif
6933 /*
6934 * If some of the requested attributes weren't handled by the
6935 * VNOP, use our fallback code.
6936 */
6937 if (!VATTR_ALL_SUPPORTED(&va))
6938 (void)vnode_setattr_fallback(tvp, &va, ctx);
6939
6940 // Make sure the name & parent pointers are hooked up
6941 if (tvp->v_name == NULL)
6942 update_flags |= VNODE_UPDATE_NAME;
6943 if (tvp->v_parent == NULLVP)
6944 update_flags |= VNODE_UPDATE_PARENT;
6945
6946 if (update_flags) {
6947 (void)vnode_update_identity(tvp, tdvp, cnp->cn_nameptr,
6948 cnp->cn_namelen, cnp->cn_hash, update_flags);
6949 }
6950
6951 #if CONFIG_FSE
6952 switch (vnode_vtype(tvp)) {
6953 case VLNK:
6954 /* FALLTHRU */
6955 case VREG:
6956 fsevent = FSE_CREATE_FILE;
6957 break;
6958 case VDIR:
6959 fsevent = FSE_CREATE_DIR;
6960 break;
6961 default:
6962 goto out;
6963 }
6964
6965 if (need_fsevent(fsevent, tvp)) {
6966 add_fsevent(fsevent, ctx, FSE_ARG_VNODE, tvp,
6967 FSE_ARG_DONE);
6968 }
6969 #endif /* CONFIG_FSE */
6970 }
6971 #if CLONE_SNAPSHOT_FALLBACKS_ENABLED
6972 else if (error == ENOTSUP) {
6973 struct vfs_attr vfa;
6974
6975 /*
6976 * Fallback to VNOP_COPYFILE but check first that the
6977 * filesystem supports cloning.
6978 */
6979 VFSATTR_INIT(&vfa);
6980 VFSATTR_WANTED(&vfa, f_capabilities);
6981 if ((vfs_getattr(vnode_mount(tdvp), &vfa, ctx) == 0) &&
6982 VFSATTR_IS_SUPPORTED(&vfa, f_capabilities) &&
6983 (vfa.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_CLONE) &&
6984 (vfa.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_CLONE)) {
6985
6986 error = VNOP_COPYFILE(fvp, tdvp, tvp, cnp, 0,
6987 0, ctx);
6988 }
6989 }
6990 #endif /* CLONE_SNAPSHOT_FALLBACKS_ENABLED */
6991
6992 out:
6993 if (attr_cleanup)
6994 vn_attribute_cleanup(&va, defaulted);
6995 if (free_acl && va.va_acl)
6996 kauth_acl_free(va.va_acl);
6997 nameidone(&tond);
6998 if (tvp)
6999 vnode_put(tvp);
7000 vnode_put(tdvp);
7001 return (error);
7002 }
7003
7004 /*
7005 * clone files or directories, target must not exist.
7006 */
7007 /* ARGSUSED */
7008 int
7009 clonefileat(__unused proc_t p, struct clonefileat_args *uap,
7010 __unused int32_t *retval)
7011 {
7012 vnode_t fvp;
7013 struct nameidata fromnd;
7014 int follow;
7015 int error;
7016 vfs_context_t ctx = vfs_context_current();
7017
7018 /* Check that the flags are valid. */
7019 if (uap->flags & ~CLONE_NOFOLLOW)
7020 return (EINVAL);
7021
7022 AUDIT_ARG(fd, uap->src_dirfd);
7023
7024 follow = (uap->flags & CLONE_NOFOLLOW) ? NOFOLLOW : FOLLOW;
7025 NDINIT(&fromnd, LOOKUP, OP_COPYFILE, follow | AUDITVNPATH1,
7026 UIO_USERSPACE, uap->src, ctx);
7027 if ((error = nameiat(&fromnd, uap->src_dirfd)))
7028 return (error);
7029
7030 fvp = fromnd.ni_vp;
7031 nameidone(&fromnd);
7032
7033 error = clonefile_internal(fvp, FALSE, uap->dst_dirfd, uap->dst,
7034 uap->flags, ctx);
7035
7036 vnode_put(fvp);
7037 return (error);
7038 }
7039
7040 int
7041 fclonefileat(__unused proc_t p, struct fclonefileat_args *uap,
7042 __unused int32_t *retval)
7043 {
7044 vnode_t fvp;
7045 struct fileproc *fp;
7046 int error;
7047 vfs_context_t ctx = vfs_context_current();
7048
7049 AUDIT_ARG(fd, uap->src_fd);
7050 error = fp_getfvp(p, uap->src_fd, &fp, &fvp);
7051 if (error)
7052 return (error);
7053
7054 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
7055 AUDIT_ARG(vnpath_withref, fvp, ARG_VNODE1);
7056 error = EBADF;
7057 goto out;
7058 }
7059
7060 if ((error = vnode_getwithref(fvp)))
7061 goto out;
7062
7063 AUDIT_ARG(vnpath, fvp, ARG_VNODE1);
7064
7065 error = clonefile_internal(fvp, TRUE, uap->dst_dirfd, uap->dst,
7066 uap->flags, ctx);
7067
7068 vnode_put(fvp);
7069 out:
7070 file_drop(uap->src_fd);
7071 return (error);
7072 }
7073
7074 /*
7075 * Rename files. Source and destination must either both be directories,
7076 * or both not be directories. If target is a directory, it must be empty.
7077 */
7078 /* ARGSUSED */
7079 static int
7080 renameat_internal(vfs_context_t ctx, int fromfd, user_addr_t from,
7081 int tofd, user_addr_t to, int segflg, vfs_rename_flags_t flags)
7082 {
7083 if (flags & ~VFS_RENAME_FLAGS_MASK)
7084 return EINVAL;
7085
7086 if (ISSET(flags, VFS_RENAME_SWAP) && ISSET(flags, VFS_RENAME_EXCL))
7087 return EINVAL;
7088
7089 vnode_t tvp, tdvp;
7090 vnode_t fvp, fdvp;
7091 struct nameidata *fromnd, *tond;
7092 int error;
7093 int do_retry;
7094 int retry_count;
7095 int mntrename;
7096 int need_event;
7097 const char *oname = NULL;
7098 char *from_name = NULL, *to_name = NULL;
7099 int from_len=0, to_len=0;
7100 int holding_mntlock;
7101 mount_t locked_mp = NULL;
7102 vnode_t oparent = NULLVP;
7103 #if CONFIG_FSE
7104 fse_info from_finfo, to_finfo;
7105 #endif
7106 int from_truncated=0, to_truncated;
7107 int batched = 0;
7108 struct vnode_attr *fvap, *tvap;
7109 int continuing = 0;
7110 /* carving out a chunk for structs that are too big to be on stack. */
7111 struct {
7112 struct nameidata from_node, to_node;
7113 struct vnode_attr fv_attr, tv_attr;
7114 } * __rename_data;
7115 MALLOC(__rename_data, void *, sizeof(*__rename_data), M_TEMP, M_WAITOK);
7116 fromnd = &__rename_data->from_node;
7117 tond = &__rename_data->to_node;
7118
7119 holding_mntlock = 0;
7120 do_retry = 0;
7121 retry_count = 0;
7122 retry:
7123 fvp = tvp = NULL;
7124 fdvp = tdvp = NULL;
7125 fvap = tvap = NULL;
7126 mntrename = FALSE;
7127
7128 NDINIT(fromnd, DELETE, OP_UNLINK, WANTPARENT | AUDITVNPATH1,
7129 segflg, from, ctx);
7130 fromnd->ni_flag = NAMEI_COMPOUNDRENAME;
7131
7132 NDINIT(tond, RENAME, OP_RENAME, WANTPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK,
7133 segflg, to, ctx);
7134 tond->ni_flag = NAMEI_COMPOUNDRENAME;
7135
7136 continue_lookup:
7137 if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
7138 if ( (error = nameiat(fromnd, fromfd)) )
7139 goto out1;
7140 fdvp = fromnd->ni_dvp;
7141 fvp = fromnd->ni_vp;
7142
7143 if (fvp && fvp->v_type == VDIR)
7144 tond->ni_cnd.cn_flags |= WILLBEDIR;
7145 }
7146
7147 if ((tond->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
7148 if ( (error = nameiat(tond, tofd)) ) {
7149 /*
7150 * Translate error code for rename("dir1", "dir2/.").
7151 */
7152 if (error == EISDIR && fvp->v_type == VDIR)
7153 error = EINVAL;
7154 goto out1;
7155 }
7156 tdvp = tond->ni_dvp;
7157 tvp = tond->ni_vp;
7158 }
7159
7160 if (!tvp && ISSET(flags, VFS_RENAME_SWAP)) {
7161 error = ENOENT;
7162 goto out1;
7163 }
7164
7165 if (tvp && ISSET(flags, VFS_RENAME_EXCL)) {
7166 error = EEXIST;
7167 goto out1;
7168 }
7169
7170 batched = vnode_compound_rename_available(fdvp);
7171 if (!fvp) {
7172 /*
7173 * Claim: this check will never reject a valid rename.
7174 * For success, either fvp must be on the same mount as tdvp, or fvp must sit atop a vnode on the same mount as tdvp.
7175 * Suppose fdvp and tdvp are not on the same mount.
7176 * If fvp is on the same mount as tdvp, then fvp is not on the same mount as fdvp, so fvp is the root of its filesystem. If fvp is the root,
7177 * then you can't move it to within another dir on the same mountpoint.
7178 * If fvp sits atop a vnode on the same mount as fdvp, then that vnode must be part of the same mount as fdvp, which is a contradiction.
7179 *
7180 * If this check passes, then we are safe to pass these vnodes to the same FS.
7181 */
7182 if (fdvp->v_mount != tdvp->v_mount) {
7183 error = EXDEV;
7184 goto out1;
7185 }
7186 goto skipped_lookup;
7187 }
7188
7189 if (!batched) {
7190 error = vn_authorize_renamex(fdvp, fvp, &fromnd->ni_cnd, tdvp, tvp, &tond->ni_cnd, ctx, flags, NULL);
7191 if (error) {
7192 if (error == ENOENT) {
7193 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
7194 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
7195 /*
7196 * We encountered a race where after doing the namei, tvp stops
7197 * being valid. If so, simply re-drive the rename call from the
7198 * top.
7199 */
7200 do_retry = 1;
7201 retry_count += 1;
7202 }
7203 }
7204 goto out1;
7205 }
7206 }
7207
7208 /*
7209 * If the source and destination are the same (i.e. they're
7210 * links to the same vnode) and the target file system is
7211 * case sensitive, then there is nothing to do.
7212 *
7213 * XXX Come back to this.
7214 */
7215 if (fvp == tvp) {
7216 int pathconf_val;
7217
7218 /*
7219 * Note: if _PC_CASE_SENSITIVE selector isn't supported,
7220 * then assume that this file system is case sensitive.
7221 */
7222 if (VNOP_PATHCONF(fvp, _PC_CASE_SENSITIVE, &pathconf_val, ctx) != 0 ||
7223 pathconf_val != 0) {
7224 goto out1;
7225 }
7226 }
7227
7228 /*
7229 * Allow the renaming of mount points.
7230 * - target must not exist
7231 * - target must reside in the same directory as source
7232 * - union mounts cannot be renamed
7233 * - "/" cannot be renamed
7234 *
7235 * XXX Handle this in VFS after a continued lookup (if we missed
7236 * in the cache to start off)
7237 *
7238 * N.B. If RENAME_SWAP is being used, then @tvp != NULL and so
7239 * we'll skip past here. The file system is responsible for
7240 * checking that @tvp is not a descendent of @fvp and vice versa
7241 * so it should always return EINVAL if either @tvp or @fvp is the
7242 * root of a volume.
7243 */
7244 if ((fvp->v_flag & VROOT) &&
7245 (fvp->v_type == VDIR) &&
7246 (tvp == NULL) &&
7247 (fvp->v_mountedhere == NULL) &&
7248 (fdvp == tdvp) &&
7249 ((fvp->v_mount->mnt_flag & (MNT_UNION | MNT_ROOTFS)) == 0) &&
7250 (fvp->v_mount->mnt_vnodecovered != NULLVP)) {
7251 vnode_t coveredvp;
7252
7253 /* switch fvp to the covered vnode */
7254 coveredvp = fvp->v_mount->mnt_vnodecovered;
7255 if ( (vnode_getwithref(coveredvp)) ) {
7256 error = ENOENT;
7257 goto out1;
7258 }
7259 vnode_put(fvp);
7260
7261 fvp = coveredvp;
7262 mntrename = TRUE;
7263 }
7264 /*
7265 * Check for cross-device rename.
7266 */
7267 if ((fvp->v_mount != tdvp->v_mount) ||
7268 (tvp && (fvp->v_mount != tvp->v_mount))) {
7269 error = EXDEV;
7270 goto out1;
7271 }
7272
7273 /*
7274 * If source is the same as the destination (that is the
7275 * same inode number) then there is nothing to do...
7276 * EXCEPT if the underlying file system supports case
7277 * insensitivity and is case preserving. In this case
7278 * the file system needs to handle the special case of
7279 * getting the same vnode as target (fvp) and source (tvp).
7280 *
7281 * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
7282 * and _PC_CASE_PRESERVING can have this exception, and they need to
7283 * handle the special case of getting the same vnode as target and
7284 * source. NOTE: Then the target is unlocked going into vnop_rename,
7285 * so not to cause locking problems. There is a single reference on tvp.
7286 *
7287 * NOTE - that fvp == tvp also occurs if they are hard linked and
7288 * that correct behaviour then is just to return success without doing
7289 * anything.
7290 *
7291 * XXX filesystem should take care of this itself, perhaps...
7292 */
7293 if (fvp == tvp && fdvp == tdvp) {
7294 if (fromnd->ni_cnd.cn_namelen == tond->ni_cnd.cn_namelen &&
7295 !bcmp(fromnd->ni_cnd.cn_nameptr, tond->ni_cnd.cn_nameptr,
7296 fromnd->ni_cnd.cn_namelen)) {
7297 goto out1;
7298 }
7299 }
7300
7301 if (holding_mntlock && fvp->v_mount != locked_mp) {
7302 /*
7303 * we're holding a reference and lock
7304 * on locked_mp, but it no longer matches
7305 * what we want to do... so drop our hold
7306 */
7307 mount_unlock_renames(locked_mp);
7308 mount_drop(locked_mp, 0);
7309 holding_mntlock = 0;
7310 }
7311 if (tdvp != fdvp && fvp->v_type == VDIR) {
7312 /*
7313 * serialize renames that re-shape
7314 * the tree... if holding_mntlock is
7315 * set, then we're ready to go...
7316 * otherwise we
7317 * first need to drop the iocounts
7318 * we picked up, second take the
7319 * lock to serialize the access,
7320 * then finally start the lookup
7321 * process over with the lock held
7322 */
7323 if (!holding_mntlock) {
7324 /*
7325 * need to grab a reference on
7326 * the mount point before we
7327 * drop all the iocounts... once
7328 * the iocounts are gone, the mount
7329 * could follow
7330 */
7331 locked_mp = fvp->v_mount;
7332 mount_ref(locked_mp, 0);
7333
7334 /*
7335 * nameidone has to happen before we vnode_put(tvp)
7336 * since it may need to release the fs_nodelock on the tvp
7337 */
7338 nameidone(tond);
7339
7340 if (tvp)
7341 vnode_put(tvp);
7342 vnode_put(tdvp);
7343
7344 /*
7345 * nameidone has to happen before we vnode_put(fdvp)
7346 * since it may need to release the fs_nodelock on the fvp
7347 */
7348 nameidone(fromnd);
7349
7350 vnode_put(fvp);
7351 vnode_put(fdvp);
7352
7353 mount_lock_renames(locked_mp);
7354 holding_mntlock = 1;
7355
7356 goto retry;
7357 }
7358 } else {
7359 /*
7360 * when we dropped the iocounts to take
7361 * the lock, we allowed the identity of
7362 * the various vnodes to change... if they did,
7363 * we may no longer be dealing with a rename
7364 * that reshapes the tree... once we're holding
7365 * the iocounts, the vnodes can't change type
7366 * so we're free to drop the lock at this point
7367 * and continue on
7368 */
7369 if (holding_mntlock) {
7370 mount_unlock_renames(locked_mp);
7371 mount_drop(locked_mp, 0);
7372 holding_mntlock = 0;
7373 }
7374 }
7375
7376 // save these off so we can later verify that fvp is the same
7377 oname = fvp->v_name;
7378 oparent = fvp->v_parent;
7379
7380 skipped_lookup:
7381 #if CONFIG_FSE
7382 need_event = need_fsevent(FSE_RENAME, fdvp);
7383 if (need_event) {
7384 if (fvp) {
7385 get_fse_info(fvp, &from_finfo, ctx);
7386 } else {
7387 error = vfs_get_notify_attributes(&__rename_data->fv_attr);
7388 if (error) {
7389 goto out1;
7390 }
7391
7392 fvap = &__rename_data->fv_attr;
7393 }
7394
7395 if (tvp) {
7396 get_fse_info(tvp, &to_finfo, ctx);
7397 } else if (batched) {
7398 error = vfs_get_notify_attributes(&__rename_data->tv_attr);
7399 if (error) {
7400 goto out1;
7401 }
7402
7403 tvap = &__rename_data->tv_attr;
7404 }
7405 }
7406 #else
7407 need_event = 0;
7408 #endif /* CONFIG_FSE */
7409
7410 if (need_event || kauth_authorize_fileop_has_listeners()) {
7411 if (from_name == NULL) {
7412 GET_PATH(from_name);
7413 if (from_name == NULL) {
7414 error = ENOMEM;
7415 goto out1;
7416 }
7417 }
7418
7419 from_len = safe_getpath(fdvp, fromnd->ni_cnd.cn_nameptr, from_name, MAXPATHLEN, &from_truncated);
7420
7421 if (to_name == NULL) {
7422 GET_PATH(to_name);
7423 if (to_name == NULL) {
7424 error = ENOMEM;
7425 goto out1;
7426 }
7427 }
7428
7429 to_len = safe_getpath(tdvp, tond->ni_cnd.cn_nameptr, to_name, MAXPATHLEN, &to_truncated);
7430 }
7431 error = vn_rename(fdvp, &fvp, &fromnd->ni_cnd, fvap,
7432 tdvp, &tvp, &tond->ni_cnd, tvap,
7433 flags, ctx);
7434
7435 if (holding_mntlock) {
7436 /*
7437 * we can drop our serialization
7438 * lock now
7439 */
7440 mount_unlock_renames(locked_mp);
7441 mount_drop(locked_mp, 0);
7442 holding_mntlock = 0;
7443 }
7444 if (error) {
7445 if (error == EKEEPLOOKING) {
7446 if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) == 0) {
7447 if ((tond->ni_flag & NAMEI_CONTLOOKUP) == 0) {
7448 panic("EKEEPLOOKING without NAMEI_CONTLOOKUP on either ndp?");
7449 }
7450 }
7451
7452 fromnd->ni_vp = fvp;
7453 tond->ni_vp = tvp;
7454
7455 goto continue_lookup;
7456 }
7457
7458 /*
7459 * We may encounter a race in the VNOP where the destination didn't
7460 * exist when we did the namei, but it does by the time we go and
7461 * try to create the entry. In this case, we should re-drive this rename
7462 * call from the top again. Currently, only HFS bubbles out ERECYCLE,
7463 * but other filesystems susceptible to this race could return it, too.
7464 */
7465 if (error == ERECYCLE) {
7466 do_retry = 1;
7467 }
7468
7469 /*
7470 * For compound VNOPs, the authorization callback may return
7471 * ENOENT in case of racing hardlink lookups hitting the name
7472 * cache, redrive the lookup.
7473 */
7474 if (batched && error == ENOENT) {
7475 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
7476 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
7477 do_retry = 1;
7478 retry_count += 1;
7479 }
7480 }
7481
7482 goto out1;
7483 }
7484
7485 /* call out to allow 3rd party notification of rename.
7486 * Ignore result of kauth_authorize_fileop call.
7487 */
7488 kauth_authorize_fileop(vfs_context_ucred(ctx),
7489 KAUTH_FILEOP_RENAME,
7490 (uintptr_t)from_name, (uintptr_t)to_name);
7491 if (flags & VFS_RENAME_SWAP) {
7492 kauth_authorize_fileop(vfs_context_ucred(ctx),
7493 KAUTH_FILEOP_RENAME,
7494 (uintptr_t)to_name, (uintptr_t)from_name);
7495 }
7496
7497 #if CONFIG_FSE
7498 if (from_name != NULL && to_name != NULL) {
7499 if (from_truncated || to_truncated) {
7500 // set it here since only the from_finfo gets reported up to user space
7501 from_finfo.mode |= FSE_TRUNCATED_PATH;
7502 }
7503
7504 if (tvap && tvp) {
7505 vnode_get_fse_info_from_vap(tvp, &to_finfo, tvap);
7506 }
7507 if (fvap) {
7508 vnode_get_fse_info_from_vap(fvp, &from_finfo, fvap);
7509 }
7510
7511 if (tvp) {
7512 add_fsevent(FSE_RENAME, ctx,
7513 FSE_ARG_STRING, from_len, from_name,
7514 FSE_ARG_FINFO, &from_finfo,
7515 FSE_ARG_STRING, to_len, to_name,
7516 FSE_ARG_FINFO, &to_finfo,
7517 FSE_ARG_DONE);
7518 if (flags & VFS_RENAME_SWAP) {
7519 /*
7520 * Strictly speaking, swap is the equivalent of
7521 * *three* renames. FSEvents clients should only take
7522 * the events as a hint, so we only bother reporting
7523 * two.
7524 */
7525 add_fsevent(FSE_RENAME, ctx,
7526 FSE_ARG_STRING, to_len, to_name,
7527 FSE_ARG_FINFO, &to_finfo,
7528 FSE_ARG_STRING, from_len, from_name,
7529 FSE_ARG_FINFO, &from_finfo,
7530 FSE_ARG_DONE);
7531 }
7532 } else {
7533 add_fsevent(FSE_RENAME, ctx,
7534 FSE_ARG_STRING, from_len, from_name,
7535 FSE_ARG_FINFO, &from_finfo,
7536 FSE_ARG_STRING, to_len, to_name,
7537 FSE_ARG_DONE);
7538 }
7539 }
7540 #endif /* CONFIG_FSE */
7541
7542 /*
7543 * update filesystem's mount point data
7544 */
7545 if (mntrename) {
7546 char *cp, *pathend, *mpname;
7547 char * tobuf;
7548 struct mount *mp;
7549 int maxlen;
7550 size_t len = 0;
7551
7552 mp = fvp->v_mountedhere;
7553
7554 if (vfs_busy(mp, LK_NOWAIT)) {
7555 error = EBUSY;
7556 goto out1;
7557 }
7558 MALLOC_ZONE(tobuf, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
7559
7560 if (UIO_SEG_IS_USER_SPACE(segflg))
7561 error = copyinstr(to, tobuf, MAXPATHLEN, &len);
7562 else
7563 error = copystr((void *)to, tobuf, MAXPATHLEN, &len);
7564 if (!error) {
7565 /* find current mount point prefix */
7566 pathend = &mp->mnt_vfsstat.f_mntonname[0];
7567 for (cp = pathend; *cp != '\0'; ++cp) {
7568 if (*cp == '/')
7569 pathend = cp + 1;
7570 }
7571 /* find last component of target name */
7572 for (mpname = cp = tobuf; *cp != '\0'; ++cp) {
7573 if (*cp == '/')
7574 mpname = cp + 1;
7575 }
7576 /* append name to prefix */
7577 maxlen = MAXPATHLEN - (pathend - mp->mnt_vfsstat.f_mntonname);
7578 bzero(pathend, maxlen);
7579 strlcpy(pathend, mpname, maxlen);
7580 }
7581 FREE_ZONE(tobuf, MAXPATHLEN, M_NAMEI);
7582
7583 vfs_unbusy(mp);
7584 }
7585 /*
7586 * fix up name & parent pointers. note that we first
7587 * check that fvp has the same name/parent pointers it
7588 * had before the rename call... this is a 'weak' check
7589 * at best...
7590 *
7591 * XXX oparent and oname may not be set in the compound vnop case
7592 */
7593 if (batched || (oname == fvp->v_name && oparent == fvp->v_parent)) {
7594 int update_flags;
7595
7596 update_flags = VNODE_UPDATE_NAME;
7597
7598 if (fdvp != tdvp)
7599 update_flags |= VNODE_UPDATE_PARENT;
7600
7601 vnode_update_identity(fvp, tdvp, tond->ni_cnd.cn_nameptr, tond->ni_cnd.cn_namelen, tond->ni_cnd.cn_hash, update_flags);
7602 }
7603 out1:
7604 if (to_name != NULL) {
7605 RELEASE_PATH(to_name);
7606 to_name = NULL;
7607 }
7608 if (from_name != NULL) {
7609 RELEASE_PATH(from_name);
7610 from_name = NULL;
7611 }
7612 if (holding_mntlock) {
7613 mount_unlock_renames(locked_mp);
7614 mount_drop(locked_mp, 0);
7615 holding_mntlock = 0;
7616 }
7617 if (tdvp) {
7618 /*
7619 * nameidone has to happen before we vnode_put(tdvp)
7620 * since it may need to release the fs_nodelock on the tdvp
7621 */
7622 nameidone(tond);
7623
7624 if (tvp)
7625 vnode_put(tvp);
7626 vnode_put(tdvp);
7627 }
7628 if (fdvp) {
7629 /*
7630 * nameidone has to happen before we vnode_put(fdvp)
7631 * since it may need to release the fs_nodelock on the fdvp
7632 */
7633 nameidone(fromnd);
7634
7635 if (fvp)
7636 vnode_put(fvp);
7637 vnode_put(fdvp);
7638 }
7639
7640 /*
7641 * If things changed after we did the namei, then we will re-drive
7642 * this rename call from the top.
7643 */
7644 if (do_retry) {
7645 do_retry = 0;
7646 goto retry;
7647 }
7648
7649 FREE(__rename_data, M_TEMP);
7650 return (error);
7651 }
7652
7653 int
7654 rename(__unused proc_t p, struct rename_args *uap, __unused int32_t *retval)
7655 {
7656 return (renameat_internal(vfs_context_current(), AT_FDCWD, uap->from,
7657 AT_FDCWD, uap->to, UIO_USERSPACE, 0));
7658 }
7659
7660 int renameatx_np(__unused proc_t p, struct renameatx_np_args *uap, __unused int32_t *retval)
7661 {
7662 return renameat_internal(
7663 vfs_context_current(),
7664 uap->fromfd, uap->from,
7665 uap->tofd, uap->to,
7666 UIO_USERSPACE, uap->flags);
7667 }
7668
7669 int
7670 renameat(__unused proc_t p, struct renameat_args *uap, __unused int32_t *retval)
7671 {
7672 return (renameat_internal(vfs_context_current(), uap->fromfd, uap->from,
7673 uap->tofd, uap->to, UIO_USERSPACE, 0));
7674 }
7675
7676 /*
7677 * Make a directory file.
7678 *
7679 * Returns: 0 Success
7680 * EEXIST
7681 * namei:???
7682 * vnode_authorize:???
7683 * vn_create:???
7684 */
7685 /* ARGSUSED */
7686 static int
7687 mkdir1at(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap, int fd,
7688 enum uio_seg segflg)
7689 {
7690 vnode_t vp, dvp;
7691 int error;
7692 int update_flags = 0;
7693 int batched;
7694 struct nameidata nd;
7695
7696 AUDIT_ARG(mode, vap->va_mode);
7697 NDINIT(&nd, CREATE, OP_MKDIR, LOCKPARENT | AUDITVNPATH1, segflg,
7698 path, ctx);
7699 nd.ni_cnd.cn_flags |= WILLBEDIR;
7700 nd.ni_flag = NAMEI_COMPOUNDMKDIR;
7701
7702 continue_lookup:
7703 error = nameiat(&nd, fd);
7704 if (error)
7705 return (error);
7706 dvp = nd.ni_dvp;
7707 vp = nd.ni_vp;
7708
7709 if (vp != NULL) {
7710 error = EEXIST;
7711 goto out;
7712 }
7713
7714 batched = vnode_compound_mkdir_available(dvp);
7715
7716 VATTR_SET(vap, va_type, VDIR);
7717
7718 /*
7719 * XXX
7720 * Don't authorize in VFS for compound VNOP.... mkdir -p today assumes that it will
7721 * only get EXISTS or EISDIR for existing path components, and not that it could see
7722 * EACCESS/EPERM--so if we authorize for mkdir on "/" for "mkdir -p /tmp/foo/bar/baz"
7723 * it will fail in a spurious manner. Need to figure out if this is valid behavior.
7724 */
7725 if ((error = vn_authorize_mkdir(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0) {
7726 if (error == EACCES || error == EPERM) {
7727 int error2;
7728
7729 nameidone(&nd);
7730 vnode_put(dvp);
7731 dvp = NULLVP;
7732
7733 /*
7734 * Try a lookup without "NAMEI_COMPOUNDVNOP" to make sure we return EEXIST
7735 * rather than EACCESS if the target exists.
7736 */
7737 NDINIT(&nd, LOOKUP, OP_MKDIR, AUDITVNPATH1, segflg,
7738 path, ctx);
7739 error2 = nameiat(&nd, fd);
7740 if (error2) {
7741 goto out;
7742 } else {
7743 vp = nd.ni_vp;
7744 error = EEXIST;
7745 goto out;
7746 }
7747 }
7748
7749 goto out;
7750 }
7751
7752 /*
7753 * make the directory
7754 */
7755 if ((error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx)) != 0) {
7756 if (error == EKEEPLOOKING) {
7757 nd.ni_vp = vp;
7758 goto continue_lookup;
7759 }
7760
7761 goto out;
7762 }
7763
7764 // Make sure the name & parent pointers are hooked up
7765 if (vp->v_name == NULL)
7766 update_flags |= VNODE_UPDATE_NAME;
7767 if (vp->v_parent == NULLVP)
7768 update_flags |= VNODE_UPDATE_PARENT;
7769
7770 if (update_flags)
7771 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
7772
7773 #if CONFIG_FSE
7774 add_fsevent(FSE_CREATE_DIR, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE);
7775 #endif
7776
7777 out:
7778 /*
7779 * nameidone has to happen before we vnode_put(dvp)
7780 * since it may need to release the fs_nodelock on the dvp
7781 */
7782 nameidone(&nd);
7783
7784 if (vp)
7785 vnode_put(vp);
7786 if (dvp)
7787 vnode_put(dvp);
7788
7789 return (error);
7790 }
7791
7792 /*
7793 * mkdir_extended: Create a directory; with extended security (ACL).
7794 *
7795 * Parameters: p Process requesting to create the directory
7796 * uap User argument descriptor (see below)
7797 * retval (ignored)
7798 *
7799 * Indirect: uap->path Path of directory to create
7800 * uap->mode Access permissions to set
7801 * uap->xsecurity ACL to set
7802 *
7803 * Returns: 0 Success
7804 * !0 Not success
7805 *
7806 */
7807 int
7808 mkdir_extended(proc_t p, struct mkdir_extended_args *uap, __unused int32_t *retval)
7809 {
7810 int ciferror;
7811 kauth_filesec_t xsecdst;
7812 struct vnode_attr va;
7813
7814 AUDIT_ARG(owner, uap->uid, uap->gid);
7815
7816 xsecdst = NULL;
7817 if ((uap->xsecurity != USER_ADDR_NULL) &&
7818 ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
7819 return ciferror;
7820
7821 VATTR_INIT(&va);
7822 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
7823 if (xsecdst != NULL)
7824 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
7825
7826 ciferror = mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD,
7827 UIO_USERSPACE);
7828 if (xsecdst != NULL)
7829 kauth_filesec_free(xsecdst);
7830 return ciferror;
7831 }
7832
7833 int
7834 mkdir(proc_t p, struct mkdir_args *uap, __unused int32_t *retval)
7835 {
7836 struct vnode_attr va;
7837
7838 VATTR_INIT(&va);
7839 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
7840
7841 return (mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD,
7842 UIO_USERSPACE));
7843 }
7844
7845 int
7846 mkdirat(proc_t p, struct mkdirat_args *uap, __unused int32_t *retval)
7847 {
7848 struct vnode_attr va;
7849
7850 VATTR_INIT(&va);
7851 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
7852
7853 return(mkdir1at(vfs_context_current(), uap->path, &va, uap->fd,
7854 UIO_USERSPACE));
7855 }
7856
7857 static int
7858 rmdirat_internal(vfs_context_t ctx, int fd, user_addr_t dirpath,
7859 enum uio_seg segflg)
7860 {
7861 vnode_t vp, dvp;
7862 int error;
7863 struct nameidata nd;
7864 char *path = NULL;
7865 int len=0;
7866 int has_listeners = 0;
7867 int need_event = 0;
7868 int truncated = 0;
7869 #if CONFIG_FSE
7870 struct vnode_attr va;
7871 #endif /* CONFIG_FSE */
7872 struct vnode_attr *vap = NULL;
7873 int restart_count = 0;
7874 int batched;
7875
7876 int restart_flag;
7877
7878 /*
7879 * This loop exists to restart rmdir in the unlikely case that two
7880 * processes are simultaneously trying to remove the same directory
7881 * containing orphaned appleDouble files.
7882 */
7883 do {
7884 NDINIT(&nd, DELETE, OP_RMDIR, LOCKPARENT | AUDITVNPATH1,
7885 segflg, dirpath, ctx);
7886 nd.ni_flag = NAMEI_COMPOUNDRMDIR;
7887 continue_lookup:
7888 restart_flag = 0;
7889 vap = NULL;
7890
7891 error = nameiat(&nd, fd);
7892 if (error)
7893 return (error);
7894
7895 dvp = nd.ni_dvp;
7896 vp = nd.ni_vp;
7897
7898 if (vp) {
7899 batched = vnode_compound_rmdir_available(vp);
7900
7901 if (vp->v_flag & VROOT) {
7902 /*
7903 * The root of a mounted filesystem cannot be deleted.
7904 */
7905 error = EBUSY;
7906 goto out;
7907 }
7908
7909 /*
7910 * Removed a check here; we used to abort if vp's vid
7911 * was not the same as what we'd seen the last time around.
7912 * I do not think that check was valid, because if we retry
7913 * and all dirents are gone, the directory could legitimately
7914 * be recycled but still be present in a situation where we would
7915 * have had permission to delete. Therefore, we won't make
7916 * an effort to preserve that check now that we may not have a
7917 * vp here.
7918 */
7919
7920 if (!batched) {
7921 error = vn_authorize_rmdir(dvp, vp, &nd.ni_cnd, ctx, NULL);
7922 if (error) {
7923 if (error == ENOENT) {
7924 assert(restart_count < MAX_AUTHORIZE_ENOENT_RETRIES);
7925 if (restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
7926 restart_flag = 1;
7927 restart_count += 1;
7928 }
7929 }
7930 goto out;
7931 }
7932 }
7933 } else {
7934 batched = 1;
7935
7936 if (!vnode_compound_rmdir_available(dvp)) {
7937 panic("No error, but no compound rmdir?");
7938 }
7939 }
7940
7941 #if CONFIG_FSE
7942 fse_info finfo;
7943
7944 need_event = need_fsevent(FSE_DELETE, dvp);
7945 if (need_event) {
7946 if (!batched) {
7947 get_fse_info(vp, &finfo, ctx);
7948 } else {
7949 error = vfs_get_notify_attributes(&va);
7950 if (error) {
7951 goto out;
7952 }
7953
7954 vap = &va;
7955 }
7956 }
7957 #endif
7958 has_listeners = kauth_authorize_fileop_has_listeners();
7959 if (need_event || has_listeners) {
7960 if (path == NULL) {
7961 GET_PATH(path);
7962 if (path == NULL) {
7963 error = ENOMEM;
7964 goto out;
7965 }
7966 }
7967
7968 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated);
7969 #if CONFIG_FSE
7970 if (truncated) {
7971 finfo.mode |= FSE_TRUNCATED_PATH;
7972 }
7973 #endif
7974 }
7975
7976 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
7977 nd.ni_vp = vp;
7978 if (vp == NULLVP) {
7979 /* Couldn't find a vnode */
7980 goto out;
7981 }
7982
7983 if (error == EKEEPLOOKING) {
7984 goto continue_lookup;
7985 } else if (batched && error == ENOENT) {
7986 assert(restart_count < MAX_AUTHORIZE_ENOENT_RETRIES);
7987 if (restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
7988 /*
7989 * For compound VNOPs, the authorization callback
7990 * may return ENOENT in case of racing hard link lookups
7991 * redrive the lookup.
7992 */
7993 restart_flag = 1;
7994 restart_count += 1;
7995 goto out;
7996 }
7997 }
7998 #if CONFIG_APPLEDOUBLE
7999 /*
8000 * Special case to remove orphaned AppleDouble
8001 * files. I don't like putting this in the kernel,
8002 * but carbon does not like putting this in carbon either,
8003 * so here we are.
8004 */
8005 if (error == ENOTEMPTY) {
8006 error = rmdir_remove_orphaned_appleDouble(vp, ctx, &restart_flag);
8007 if (error == EBUSY) {
8008 goto out;
8009 }
8010
8011
8012 /*
8013 * Assuming everything went well, we will try the RMDIR again
8014 */
8015 if (!error)
8016 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
8017 }
8018 #endif /* CONFIG_APPLEDOUBLE */
8019 /*
8020 * Call out to allow 3rd party notification of delete.
8021 * Ignore result of kauth_authorize_fileop call.
8022 */
8023 if (!error) {
8024 if (has_listeners) {
8025 kauth_authorize_fileop(vfs_context_ucred(ctx),
8026 KAUTH_FILEOP_DELETE,
8027 (uintptr_t)vp,
8028 (uintptr_t)path);
8029 }
8030
8031 if (vp->v_flag & VISHARDLINK) {
8032 // see the comment in unlink1() about why we update
8033 // the parent of a hard link when it is removed
8034 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
8035 }
8036
8037 #if CONFIG_FSE
8038 if (need_event) {
8039 if (vap) {
8040 vnode_get_fse_info_from_vap(vp, &finfo, vap);
8041 }
8042 add_fsevent(FSE_DELETE, ctx,
8043 FSE_ARG_STRING, len, path,
8044 FSE_ARG_FINFO, &finfo,
8045 FSE_ARG_DONE);
8046 }
8047 #endif
8048 }
8049
8050 out:
8051 if (path != NULL) {
8052 RELEASE_PATH(path);
8053 path = NULL;
8054 }
8055 /*
8056 * nameidone has to happen before we vnode_put(dvp)
8057 * since it may need to release the fs_nodelock on the dvp
8058 */
8059 nameidone(&nd);
8060 vnode_put(dvp);
8061
8062 if (vp)
8063 vnode_put(vp);
8064
8065 if (restart_flag == 0) {
8066 wakeup_one((caddr_t)vp);
8067 return (error);
8068 }
8069 tsleep(vp, PVFS, "rm AD", 1);
8070
8071 } while (restart_flag != 0);
8072
8073 return (error);
8074
8075 }
8076
8077 /*
8078 * Remove a directory file.
8079 */
8080 /* ARGSUSED */
8081 int
8082 rmdir(__unused proc_t p, struct rmdir_args *uap, __unused int32_t *retval)
8083 {
8084 return (rmdirat_internal(vfs_context_current(), AT_FDCWD,
8085 CAST_USER_ADDR_T(uap->path), UIO_USERSPACE));
8086 }
8087
8088 /* Get direntry length padded to 8 byte alignment */
8089 #define DIRENT64_LEN(namlen) \
8090 ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
8091
8092 errno_t
8093 vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag,
8094 int *numdirent, vfs_context_t ctxp)
8095 {
8096 /* Check if fs natively supports VNODE_READDIR_EXTENDED */
8097 if ((vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED) &&
8098 ((vp->v_mount->mnt_kern_flag & MNTK_DENY_READDIREXT) == 0)) {
8099 return VNOP_READDIR(vp, uio, flags, eofflag, numdirent, ctxp);
8100 } else {
8101 size_t bufsize;
8102 void * bufptr;
8103 uio_t auio;
8104 struct direntry *entry64;
8105 struct dirent *dep;
8106 int bytesread;
8107 int error;
8108
8109 /*
8110 * Our kernel buffer needs to be smaller since re-packing
8111 * will expand each dirent. The worse case (when the name
8112 * length is 3) corresponds to a struct direntry size of 32
8113 * bytes (8-byte aligned) and a struct dirent size of 12 bytes
8114 * (4-byte aligned). So having a buffer that is 3/8 the size
8115 * will prevent us from reading more than we can pack.
8116 *
8117 * Since this buffer is wired memory, we will limit the
8118 * buffer size to a maximum of 32K. We would really like to
8119 * use 32K in the MIN(), but we use magic number 87371 to
8120 * prevent uio_resid() * 3 / 8 from overflowing.
8121 */
8122 bufsize = 3 * MIN((user_size_t)uio_resid(uio), 87371u) / 8;
8123 MALLOC(bufptr, void *, bufsize, M_TEMP, M_WAITOK);
8124 if (bufptr == NULL) {
8125 return ENOMEM;
8126 }
8127
8128 auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
8129 uio_addiov(auio, (uintptr_t)bufptr, bufsize);
8130 auio->uio_offset = uio->uio_offset;
8131
8132 error = VNOP_READDIR(vp, auio, 0, eofflag, numdirent, ctxp);
8133
8134 dep = (struct dirent *)bufptr;
8135 bytesread = bufsize - uio_resid(auio);
8136
8137 MALLOC(entry64, struct direntry *, sizeof(struct direntry),
8138 M_TEMP, M_WAITOK);
8139 /*
8140 * Convert all the entries and copy them out to user's buffer.
8141 */
8142 while (error == 0 && (char *)dep < ((char *)bufptr + bytesread)) {
8143 size_t enbufsize = DIRENT64_LEN(dep->d_namlen);
8144
8145 bzero(entry64, enbufsize);
8146 /* Convert a dirent to a dirent64. */
8147 entry64->d_ino = dep->d_ino;
8148 entry64->d_seekoff = 0;
8149 entry64->d_reclen = enbufsize;
8150 entry64->d_namlen = dep->d_namlen;
8151 entry64->d_type = dep->d_type;
8152 bcopy(dep->d_name, entry64->d_name, dep->d_namlen + 1);
8153
8154 /* Move to next entry. */
8155 dep = (struct dirent *)((char *)dep + dep->d_reclen);
8156
8157 /* Copy entry64 to user's buffer. */
8158 error = uiomove((caddr_t)entry64, entry64->d_reclen, uio);
8159 }
8160
8161 /* Update the real offset using the offset we got from VNOP_READDIR. */
8162 if (error == 0) {
8163 uio->uio_offset = auio->uio_offset;
8164 }
8165 uio_free(auio);
8166 FREE(bufptr, M_TEMP);
8167 FREE(entry64, M_TEMP);
8168 return (error);
8169 }
8170 }
8171
8172 #define GETDIRENTRIES_MAXBUFSIZE (128 * 1024 * 1024U)
8173
8174 /*
8175 * Read a block of directory entries in a file system independent format.
8176 */
8177 static int
8178 getdirentries_common(int fd, user_addr_t bufp, user_size_t bufsize, ssize_t *bytesread,
8179 off_t *offset, int flags)
8180 {
8181 vnode_t vp;
8182 struct vfs_context context = *vfs_context_current(); /* local copy */
8183 struct fileproc *fp;
8184 uio_t auio;
8185 int spacetype = proc_is64bit(vfs_context_proc(&context)) ? UIO_USERSPACE64 : UIO_USERSPACE32;
8186 off_t loff;
8187 int error, eofflag, numdirent;
8188 char uio_buf[ UIO_SIZEOF(1) ];
8189
8190 error = fp_getfvp(vfs_context_proc(&context), fd, &fp, &vp);
8191 if (error) {
8192 return (error);
8193 }
8194 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
8195 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
8196 error = EBADF;
8197 goto out;
8198 }
8199
8200 if (bufsize > GETDIRENTRIES_MAXBUFSIZE)
8201 bufsize = GETDIRENTRIES_MAXBUFSIZE;
8202
8203 #if CONFIG_MACF
8204 error = mac_file_check_change_offset(vfs_context_ucred(&context), fp->f_fglob);
8205 if (error)
8206 goto out;
8207 #endif
8208 if ( (error = vnode_getwithref(vp)) ) {
8209 goto out;
8210 }
8211 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
8212
8213 unionread:
8214 if (vp->v_type != VDIR) {
8215 (void)vnode_put(vp);
8216 error = EINVAL;
8217 goto out;
8218 }
8219
8220 #if CONFIG_MACF
8221 error = mac_vnode_check_readdir(&context, vp);
8222 if (error != 0) {
8223 (void)vnode_put(vp);
8224 goto out;
8225 }
8226 #endif /* MAC */
8227
8228 loff = fp->f_fglob->fg_offset;
8229 auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
8230 uio_addiov(auio, bufp, bufsize);
8231
8232 if (flags & VNODE_READDIR_EXTENDED) {
8233 error = vnode_readdir64(vp, auio, flags, &eofflag, &numdirent, &context);
8234 fp->f_fglob->fg_offset = uio_offset(auio);
8235 } else {
8236 error = VNOP_READDIR(vp, auio, 0, &eofflag, &numdirent, &context);
8237 fp->f_fglob->fg_offset = uio_offset(auio);
8238 }
8239 if (error) {
8240 (void)vnode_put(vp);
8241 goto out;
8242 }
8243
8244 if ((user_ssize_t)bufsize == uio_resid(auio)){
8245 if (union_dircheckp) {
8246 error = union_dircheckp(&vp, fp, &context);
8247 if (error == -1)
8248 goto unionread;
8249 if (error)
8250 goto out;
8251 }
8252
8253 if ((vp->v_mount->mnt_flag & MNT_UNION)) {
8254 struct vnode *tvp = vp;
8255 if (lookup_traverse_union(tvp, &vp, &context) == 0) {
8256 vnode_ref(vp);
8257 fp->f_fglob->fg_data = (caddr_t) vp;
8258 fp->f_fglob->fg_offset = 0;
8259 vnode_rele(tvp);
8260 vnode_put(tvp);
8261 goto unionread;
8262 }
8263 vp = tvp;
8264 }
8265 }
8266
8267 vnode_put(vp);
8268 if (offset) {
8269 *offset = loff;
8270 }
8271
8272 *bytesread = bufsize - uio_resid(auio);
8273 out:
8274 file_drop(fd);
8275 return (error);
8276 }
8277
8278
8279 int
8280 getdirentries(__unused struct proc *p, struct getdirentries_args *uap, int32_t *retval)
8281 {
8282 off_t offset;
8283 ssize_t bytesread;
8284 int error;
8285
8286 AUDIT_ARG(fd, uap->fd);
8287 error = getdirentries_common(uap->fd, uap->buf, uap->count, &bytesread, &offset, 0);
8288
8289 if (error == 0) {
8290 if (proc_is64bit(p)) {
8291 user64_long_t base = (user64_long_t)offset;
8292 error = copyout((caddr_t)&base, uap->basep, sizeof(user64_long_t));
8293 } else {
8294 user32_long_t base = (user32_long_t)offset;
8295 error = copyout((caddr_t)&base, uap->basep, sizeof(user32_long_t));
8296 }
8297 *retval = bytesread;
8298 }
8299 return (error);
8300 }
8301
8302 int
8303 getdirentries64(__unused struct proc *p, struct getdirentries64_args *uap, user_ssize_t *retval)
8304 {
8305 off_t offset;
8306 ssize_t bytesread;
8307 int error;
8308
8309 AUDIT_ARG(fd, uap->fd);
8310 error = getdirentries_common(uap->fd, uap->buf, uap->bufsize, &bytesread, &offset, VNODE_READDIR_EXTENDED);
8311
8312 if (error == 0) {
8313 *retval = bytesread;
8314 error = copyout((caddr_t)&offset, uap->position, sizeof(off_t));
8315 }
8316 return (error);
8317 }
8318
8319
8320 /*
8321 * Set the mode mask for creation of filesystem nodes.
8322 * XXX implement xsecurity
8323 */
8324 #define UMASK_NOXSECURITY (void *)1 /* leave existing xsecurity alone */
8325 static int
8326 umask1(proc_t p, int newmask, __unused kauth_filesec_t fsec, int32_t *retval)
8327 {
8328 struct filedesc *fdp;
8329
8330 AUDIT_ARG(mask, newmask);
8331 proc_fdlock(p);
8332 fdp = p->p_fd;
8333 *retval = fdp->fd_cmask;
8334 fdp->fd_cmask = newmask & ALLPERMS;
8335 proc_fdunlock(p);
8336 return (0);
8337 }
8338
8339 /*
8340 * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL).
8341 *
8342 * Parameters: p Process requesting to set the umask
8343 * uap User argument descriptor (see below)
8344 * retval umask of the process (parameter p)
8345 *
8346 * Indirect: uap->newmask umask to set
8347 * uap->xsecurity ACL to set
8348 *
8349 * Returns: 0 Success
8350 * !0 Not success
8351 *
8352 */
8353 int
8354 umask_extended(proc_t p, struct umask_extended_args *uap, int32_t *retval)
8355 {
8356 int ciferror;
8357 kauth_filesec_t xsecdst;
8358
8359 xsecdst = KAUTH_FILESEC_NONE;
8360 if (uap->xsecurity != USER_ADDR_NULL) {
8361 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
8362 return ciferror;
8363 } else {
8364 xsecdst = KAUTH_FILESEC_NONE;
8365 }
8366
8367 ciferror = umask1(p, uap->newmask, xsecdst, retval);
8368
8369 if (xsecdst != KAUTH_FILESEC_NONE)
8370 kauth_filesec_free(xsecdst);
8371 return ciferror;
8372 }
8373
8374 int
8375 umask(proc_t p, struct umask_args *uap, int32_t *retval)
8376 {
8377 return(umask1(p, uap->newmask, UMASK_NOXSECURITY, retval));
8378 }
8379
8380 /*
8381 * Void all references to file by ripping underlying filesystem
8382 * away from vnode.
8383 */
8384 /* ARGSUSED */
8385 int
8386 revoke(proc_t p, struct revoke_args *uap, __unused int32_t *retval)
8387 {
8388 vnode_t vp;
8389 struct vnode_attr va;
8390 vfs_context_t ctx = vfs_context_current();
8391 int error;
8392 struct nameidata nd;
8393
8394 NDINIT(&nd, LOOKUP, OP_REVOKE, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
8395 uap->path, ctx);
8396 error = namei(&nd);
8397 if (error)
8398 return (error);
8399 vp = nd.ni_vp;
8400
8401 nameidone(&nd);
8402
8403 if (!(vnode_ischr(vp) || vnode_isblk(vp))) {
8404 error = ENOTSUP;
8405 goto out;
8406 }
8407
8408 if (vnode_isblk(vp) && vnode_ismountedon(vp)) {
8409 error = EBUSY;
8410 goto out;
8411 }
8412
8413 #if CONFIG_MACF
8414 error = mac_vnode_check_revoke(ctx, vp);
8415 if (error)
8416 goto out;
8417 #endif
8418
8419 VATTR_INIT(&va);
8420 VATTR_WANTED(&va, va_uid);
8421 if ((error = vnode_getattr(vp, &va, ctx)))
8422 goto out;
8423 if (kauth_cred_getuid(vfs_context_ucred(ctx)) != va.va_uid &&
8424 (error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
8425 goto out;
8426 if (vp->v_usecount > 0 || (vnode_isaliased(vp)))
8427 VNOP_REVOKE(vp, REVOKEALL, ctx);
8428 out:
8429 vnode_put(vp);
8430 return (error);
8431 }
8432
8433
8434 /*
8435 * HFS/HFS PlUS SPECIFIC SYSTEM CALLS
8436 * The following system calls are designed to support features
8437 * which are specific to the HFS & HFS Plus volume formats
8438 */
8439
8440
8441 /*
8442 * Obtain attribute information on objects in a directory while enumerating
8443 * the directory.
8444 */
8445 /* ARGSUSED */
8446 int
8447 getdirentriesattr (proc_t p, struct getdirentriesattr_args *uap, int32_t *retval)
8448 {
8449 vnode_t vp;
8450 struct fileproc *fp;
8451 uio_t auio = NULL;
8452 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
8453 uint32_t count, savecount;
8454 uint32_t newstate;
8455 int error, eofflag;
8456 uint32_t loff;
8457 struct attrlist attributelist;
8458 vfs_context_t ctx = vfs_context_current();
8459 int fd = uap->fd;
8460 char uio_buf[ UIO_SIZEOF(1) ];
8461 kauth_action_t action;
8462
8463 AUDIT_ARG(fd, fd);
8464
8465 /* Get the attributes into kernel space */
8466 if ((error = copyin(uap->alist, (caddr_t)&attributelist, sizeof(attributelist)))) {
8467 return(error);
8468 }
8469 if ((error = copyin(uap->count, (caddr_t)&count, sizeof(count)))) {
8470 return(error);
8471 }
8472 savecount = count;
8473 if ( (error = fp_getfvp(p, fd, &fp, &vp)) ) {
8474 return (error);
8475 }
8476 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
8477 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
8478 error = EBADF;
8479 goto out;
8480 }
8481
8482
8483 #if CONFIG_MACF
8484 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
8485 fp->f_fglob);
8486 if (error)
8487 goto out;
8488 #endif
8489
8490
8491 if ( (error = vnode_getwithref(vp)) )
8492 goto out;
8493
8494 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
8495
8496 unionread:
8497 if (vp->v_type != VDIR) {
8498 (void)vnode_put(vp);
8499 error = EINVAL;
8500 goto out;
8501 }
8502
8503 #if CONFIG_MACF
8504 error = mac_vnode_check_readdir(ctx, vp);
8505 if (error != 0) {
8506 (void)vnode_put(vp);
8507 goto out;
8508 }
8509 #endif /* MAC */
8510
8511 /* set up the uio structure which will contain the users return buffer */
8512 loff = fp->f_fglob->fg_offset;
8513 auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
8514 uio_addiov(auio, uap->buffer, uap->buffersize);
8515
8516 /*
8517 * If the only item requested is file names, we can let that past with
8518 * just LIST_DIRECTORY. If they want any other attributes, that means
8519 * they need SEARCH as well.
8520 */
8521 action = KAUTH_VNODE_LIST_DIRECTORY;
8522 if ((attributelist.commonattr & ~ATTR_CMN_NAME) ||
8523 attributelist.fileattr || attributelist.dirattr)
8524 action |= KAUTH_VNODE_SEARCH;
8525
8526 if ((error = vnode_authorize(vp, NULL, action, ctx)) == 0) {
8527
8528 /* Believe it or not, uap->options only has 32-bits of valid
8529 * info, so truncate before extending again */
8530
8531 error = VNOP_READDIRATTR(vp, &attributelist, auio, count,
8532 (u_long)(uint32_t)uap->options, &newstate, &eofflag, &count, ctx);
8533 }
8534
8535 if (error) {
8536 (void) vnode_put(vp);
8537 goto out;
8538 }
8539
8540 /*
8541 * If we've got the last entry of a directory in a union mount
8542 * then reset the eofflag and pretend there's still more to come.
8543 * The next call will again set eofflag and the buffer will be empty,
8544 * so traverse to the underlying directory and do the directory
8545 * read there.
8546 */
8547 if (eofflag && vp->v_mount->mnt_flag & MNT_UNION) {
8548 if (uio_resid(auio) < (user_ssize_t) uap->buffersize) { // Got some entries
8549 eofflag = 0;
8550 } else { // Empty buffer
8551 struct vnode *tvp = vp;
8552 if (lookup_traverse_union(tvp, &vp, ctx) == 0) {
8553 vnode_ref_ext(vp, fp->f_fglob->fg_flag & O_EVTONLY, 0);
8554 fp->f_fglob->fg_data = (caddr_t) vp;
8555 fp->f_fglob->fg_offset = 0; // reset index for new dir
8556 count = savecount;
8557 vnode_rele_internal(tvp, fp->f_fglob->fg_flag & O_EVTONLY, 0, 0);
8558 vnode_put(tvp);
8559 goto unionread;
8560 }
8561 vp = tvp;
8562 }
8563 }
8564
8565 (void)vnode_put(vp);
8566
8567 if (error)
8568 goto out;
8569 fp->f_fglob->fg_offset = uio_offset(auio); /* should be multiple of dirent, not variable */
8570
8571 if ((error = copyout((caddr_t) &count, uap->count, sizeof(count))))
8572 goto out;
8573 if ((error = copyout((caddr_t) &newstate, uap->newstate, sizeof(newstate))))
8574 goto out;
8575 if ((error = copyout((caddr_t) &loff, uap->basep, sizeof(loff))))
8576 goto out;
8577
8578 *retval = eofflag; /* similar to getdirentries */
8579 error = 0;
8580 out:
8581 file_drop(fd);
8582 return (error); /* return error earlier, an retval of 0 or 1 now */
8583
8584 } /* end of getdirentriesattr system call */
8585
8586 /*
8587 * Exchange data between two files
8588 */
8589
8590 /* ARGSUSED */
8591 int
8592 exchangedata (__unused proc_t p, struct exchangedata_args *uap, __unused int32_t *retval)
8593 {
8594
8595 struct nameidata fnd, snd;
8596 vfs_context_t ctx = vfs_context_current();
8597 vnode_t fvp;
8598 vnode_t svp;
8599 int error;
8600 u_int32_t nameiflags;
8601 char *fpath = NULL;
8602 char *spath = NULL;
8603 int flen=0, slen=0;
8604 int from_truncated=0, to_truncated=0;
8605 #if CONFIG_FSE
8606 fse_info f_finfo, s_finfo;
8607 #endif
8608
8609 nameiflags = 0;
8610 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
8611
8612 NDINIT(&fnd, LOOKUP, OP_EXCHANGEDATA, nameiflags | AUDITVNPATH1,
8613 UIO_USERSPACE, uap->path1, ctx);
8614
8615 error = namei(&fnd);
8616 if (error)
8617 goto out2;
8618
8619 nameidone(&fnd);
8620 fvp = fnd.ni_vp;
8621
8622 NDINIT(&snd, LOOKUP, OP_EXCHANGEDATA, CN_NBMOUNTLOOK | nameiflags | AUDITVNPATH2,
8623 UIO_USERSPACE, uap->path2, ctx);
8624
8625 error = namei(&snd);
8626 if (error) {
8627 vnode_put(fvp);
8628 goto out2;
8629 }
8630 nameidone(&snd);
8631 svp = snd.ni_vp;
8632
8633 /*
8634 * if the files are the same, return an inval error
8635 */
8636 if (svp == fvp) {
8637 error = EINVAL;
8638 goto out;
8639 }
8640
8641 /*
8642 * if the files are on different volumes, return an error
8643 */
8644 if (svp->v_mount != fvp->v_mount) {
8645 error = EXDEV;
8646 goto out;
8647 }
8648
8649 /* If they're not files, return an error */
8650 if ( (vnode_isreg(fvp) == 0) || (vnode_isreg(svp) == 0)) {
8651 error = EINVAL;
8652 goto out;
8653 }
8654
8655 #if CONFIG_MACF
8656 error = mac_vnode_check_exchangedata(ctx,
8657 fvp, svp);
8658 if (error)
8659 goto out;
8660 #endif
8661 if (((error = vnode_authorize(fvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0) ||
8662 ((error = vnode_authorize(svp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0))
8663 goto out;
8664
8665 if (
8666 #if CONFIG_FSE
8667 need_fsevent(FSE_EXCHANGE, fvp) ||
8668 #endif
8669 kauth_authorize_fileop_has_listeners()) {
8670 GET_PATH(fpath);
8671 GET_PATH(spath);
8672 if (fpath == NULL || spath == NULL) {
8673 error = ENOMEM;
8674 goto out;
8675 }
8676
8677 flen = safe_getpath(fvp, NULL, fpath, MAXPATHLEN, &from_truncated);
8678 slen = safe_getpath(svp, NULL, spath, MAXPATHLEN, &to_truncated);
8679
8680 #if CONFIG_FSE
8681 get_fse_info(fvp, &f_finfo, ctx);
8682 get_fse_info(svp, &s_finfo, ctx);
8683 if (from_truncated || to_truncated) {
8684 // set it here since only the f_finfo gets reported up to user space
8685 f_finfo.mode |= FSE_TRUNCATED_PATH;
8686 }
8687 #endif
8688 }
8689 /* Ok, make the call */
8690 error = VNOP_EXCHANGE(fvp, svp, 0, ctx);
8691
8692 if (error == 0) {
8693 const char *tmpname;
8694
8695 if (fpath != NULL && spath != NULL) {
8696 /* call out to allow 3rd party notification of exchangedata.
8697 * Ignore result of kauth_authorize_fileop call.
8698 */
8699 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_EXCHANGE,
8700 (uintptr_t)fpath, (uintptr_t)spath);
8701 }
8702 name_cache_lock();
8703
8704 tmpname = fvp->v_name;
8705 fvp->v_name = svp->v_name;
8706 svp->v_name = tmpname;
8707
8708 if (fvp->v_parent != svp->v_parent) {
8709 vnode_t tmp;
8710
8711 tmp = fvp->v_parent;
8712 fvp->v_parent = svp->v_parent;
8713 svp->v_parent = tmp;
8714 }
8715 name_cache_unlock();
8716
8717 #if CONFIG_FSE
8718 if (fpath != NULL && spath != NULL) {
8719 add_fsevent(FSE_EXCHANGE, ctx,
8720 FSE_ARG_STRING, flen, fpath,
8721 FSE_ARG_FINFO, &f_finfo,
8722 FSE_ARG_STRING, slen, spath,
8723 FSE_ARG_FINFO, &s_finfo,
8724 FSE_ARG_DONE);
8725 }
8726 #endif
8727 }
8728
8729 out:
8730 if (fpath != NULL)
8731 RELEASE_PATH(fpath);
8732 if (spath != NULL)
8733 RELEASE_PATH(spath);
8734 vnode_put(svp);
8735 vnode_put(fvp);
8736 out2:
8737 return (error);
8738 }
8739
8740 /*
8741 * Return (in MB) the amount of freespace on the given vnode's volume.
8742 */
8743 uint32_t freespace_mb(vnode_t vp);
8744
8745 uint32_t
8746 freespace_mb(vnode_t vp)
8747 {
8748 vfs_update_vfsstat(vp->v_mount, vfs_context_current(), VFS_USER_EVENT);
8749 return (((uint64_t)vp->v_mount->mnt_vfsstat.f_bavail *
8750 vp->v_mount->mnt_vfsstat.f_bsize) >> 20);
8751 }
8752
8753 #if CONFIG_SEARCHFS
8754
8755 /* ARGSUSED */
8756
8757 int
8758 searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval)
8759 {
8760 vnode_t vp, tvp;
8761 int i, error=0;
8762 int fserror = 0;
8763 struct nameidata nd;
8764 struct user64_fssearchblock searchblock;
8765 struct searchstate *state;
8766 struct attrlist *returnattrs;
8767 struct timeval timelimit;
8768 void *searchparams1,*searchparams2;
8769 uio_t auio = NULL;
8770 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
8771 uint32_t nummatches;
8772 int mallocsize;
8773 uint32_t nameiflags;
8774 vfs_context_t ctx = vfs_context_current();
8775 char uio_buf[ UIO_SIZEOF(1) ];
8776
8777 /* Start by copying in fsearchblock parameter list */
8778 if (IS_64BIT_PROCESS(p)) {
8779 error = copyin(uap->searchblock, (caddr_t) &searchblock, sizeof(searchblock));
8780 timelimit.tv_sec = searchblock.timelimit.tv_sec;
8781 timelimit.tv_usec = searchblock.timelimit.tv_usec;
8782 }
8783 else {
8784 struct user32_fssearchblock tmp_searchblock;
8785
8786 error = copyin(uap->searchblock, (caddr_t) &tmp_searchblock, sizeof(tmp_searchblock));
8787 // munge into 64-bit version
8788 searchblock.returnattrs = CAST_USER_ADDR_T(tmp_searchblock.returnattrs);
8789 searchblock.returnbuffer = CAST_USER_ADDR_T(tmp_searchblock.returnbuffer);
8790 searchblock.returnbuffersize = tmp_searchblock.returnbuffersize;
8791 searchblock.maxmatches = tmp_searchblock.maxmatches;
8792 /*
8793 * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary
8794 * from a 32 bit long, and tv_usec is already a signed 32 bit int.
8795 */
8796 timelimit.tv_sec = (__darwin_time_t) tmp_searchblock.timelimit.tv_sec;
8797 timelimit.tv_usec = (__darwin_useconds_t) tmp_searchblock.timelimit.tv_usec;
8798 searchblock.searchparams1 = CAST_USER_ADDR_T(tmp_searchblock.searchparams1);
8799 searchblock.sizeofsearchparams1 = tmp_searchblock.sizeofsearchparams1;
8800 searchblock.searchparams2 = CAST_USER_ADDR_T(tmp_searchblock.searchparams2);
8801 searchblock.sizeofsearchparams2 = tmp_searchblock.sizeofsearchparams2;
8802 searchblock.searchattrs = tmp_searchblock.searchattrs;
8803 }
8804 if (error)
8805 return(error);
8806
8807 /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
8808 */
8809 if (searchblock.sizeofsearchparams1 > SEARCHFS_MAX_SEARCHPARMS ||
8810 searchblock.sizeofsearchparams2 > SEARCHFS_MAX_SEARCHPARMS)
8811 return(EINVAL);
8812
8813 /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
8814 /* It all has to do into local memory and it's not that big so we might as well put it all together. */
8815 /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
8816 /* block. */
8817 /* */
8818 /* NOTE: we allocate an extra 8 bytes to account for the difference in size of the searchstate */
8819 /* due to the changes in rdar://problem/12438273. That way if a 3rd party file system */
8820 /* assumes the size is still 556 bytes it will continue to work */
8821
8822 mallocsize = searchblock.sizeofsearchparams1 + searchblock.sizeofsearchparams2 +
8823 sizeof(struct attrlist) + sizeof(struct searchstate) + (2*sizeof(uint32_t));
8824
8825 MALLOC(searchparams1, void *, mallocsize, M_TEMP, M_WAITOK);
8826
8827 /* Now set up the various pointers to the correct place in our newly allocated memory */
8828
8829 searchparams2 = (void *) (((caddr_t) searchparams1) + searchblock.sizeofsearchparams1);
8830 returnattrs = (struct attrlist *) (((caddr_t) searchparams2) + searchblock.sizeofsearchparams2);
8831 state = (struct searchstate *) (((caddr_t) returnattrs) + sizeof (struct attrlist));
8832
8833 /* Now copy in the stuff given our local variables. */
8834
8835 if ((error = copyin(searchblock.searchparams1, searchparams1, searchblock.sizeofsearchparams1)))
8836 goto freeandexit;
8837
8838 if ((error = copyin(searchblock.searchparams2, searchparams2, searchblock.sizeofsearchparams2)))
8839 goto freeandexit;
8840
8841 if ((error = copyin(searchblock.returnattrs, (caddr_t) returnattrs, sizeof(struct attrlist))))
8842 goto freeandexit;
8843
8844 if ((error = copyin(uap->state, (caddr_t) state, sizeof(struct searchstate))))
8845 goto freeandexit;
8846
8847 /*
8848 * When searching a union mount, need to set the
8849 * start flag at the first call on each layer to
8850 * reset state for the new volume.
8851 */
8852 if (uap->options & SRCHFS_START)
8853 state->ss_union_layer = 0;
8854 else
8855 uap->options |= state->ss_union_flags;
8856 state->ss_union_flags = 0;
8857
8858 /*
8859 * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter,
8860 * which is passed in with an attrreference_t, we need to inspect the buffer manually here.
8861 * The KPI does not provide us the ability to pass in the length of the buffers searchparams1
8862 * and searchparams2. To obviate the need for all searchfs-supporting filesystems to
8863 * validate the user-supplied data offset of the attrreference_t, we'll do it here.
8864 */
8865
8866 if (searchblock.searchattrs.commonattr & ATTR_CMN_NAME) {
8867 attrreference_t* string_ref;
8868 u_int32_t* start_length;
8869 user64_size_t param_length;
8870
8871 /* validate searchparams1 */
8872 param_length = searchblock.sizeofsearchparams1;
8873 /* skip the word that specifies length of the buffer */
8874 start_length= (u_int32_t*) searchparams1;
8875 start_length= start_length+1;
8876 string_ref= (attrreference_t*) start_length;
8877
8878 /* ensure no negative offsets or too big offsets */
8879 if (string_ref->attr_dataoffset < 0 ) {
8880 error = EINVAL;
8881 goto freeandexit;
8882 }
8883 if (string_ref->attr_length > MAXPATHLEN) {
8884 error = EINVAL;
8885 goto freeandexit;
8886 }
8887
8888 /* Check for pointer overflow in the string ref */
8889 if (((char*) string_ref + string_ref->attr_dataoffset) < (char*) string_ref) {
8890 error = EINVAL;
8891 goto freeandexit;
8892 }
8893
8894 if (((char*) string_ref + string_ref->attr_dataoffset) > ((char*)searchparams1 + param_length)) {
8895 error = EINVAL;
8896 goto freeandexit;
8897 }
8898 if (((char*)string_ref + string_ref->attr_dataoffset + string_ref->attr_length) > ((char*)searchparams1 + param_length)) {
8899 error = EINVAL;
8900 goto freeandexit;
8901 }
8902 }
8903
8904 /* set up the uio structure which will contain the users return buffer */
8905 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
8906 uio_addiov(auio, searchblock.returnbuffer, searchblock.returnbuffersize);
8907
8908 nameiflags = 0;
8909 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
8910 NDINIT(&nd, LOOKUP, OP_SEARCHFS, nameiflags | AUDITVNPATH1,
8911 UIO_USERSPACE, uap->path, ctx);
8912
8913 error = namei(&nd);
8914 if (error)
8915 goto freeandexit;
8916 vp = nd.ni_vp;
8917 nameidone(&nd);
8918
8919 /*
8920 * Switch to the root vnode for the volume
8921 */
8922 error = VFS_ROOT(vnode_mount(vp), &tvp, ctx);
8923 vnode_put(vp);
8924 if (error)
8925 goto freeandexit;
8926 vp = tvp;
8927
8928 /*
8929 * If it's a union mount, the path lookup takes
8930 * us to the top layer. But we may need to descend
8931 * to a lower layer. For non-union mounts the layer
8932 * is always zero.
8933 */
8934 for (i = 0; i < (int) state->ss_union_layer; i++) {
8935 if ((vp->v_mount->mnt_flag & MNT_UNION) == 0)
8936 break;
8937 tvp = vp;
8938 vp = vp->v_mount->mnt_vnodecovered;
8939 if (vp == NULL) {
8940 vnode_put(tvp);
8941 error = ENOENT;
8942 goto freeandexit;
8943 }
8944 vnode_getwithref(vp);
8945 vnode_put(tvp);
8946 }
8947
8948 #if CONFIG_MACF
8949 error = mac_vnode_check_searchfs(ctx, vp, &searchblock.searchattrs);
8950 if (error) {
8951 vnode_put(vp);
8952 goto freeandexit;
8953 }
8954 #endif
8955
8956
8957 /*
8958 * If searchblock.maxmatches == 0, then skip the search. This has happened
8959 * before and sometimes the underlying code doesnt deal with it well.
8960 */
8961 if (searchblock.maxmatches == 0) {
8962 nummatches = 0;
8963 goto saveandexit;
8964 }
8965
8966 /*
8967 * Allright, we have everything we need, so lets make that call.
8968 *
8969 * We keep special track of the return value from the file system:
8970 * EAGAIN is an acceptable error condition that shouldn't keep us
8971 * from copying out any results...
8972 */
8973
8974 fserror = VNOP_SEARCHFS(vp,
8975 searchparams1,
8976 searchparams2,
8977 &searchblock.searchattrs,
8978 (u_long)searchblock.maxmatches,
8979 &timelimit,
8980 returnattrs,
8981 &nummatches,
8982 (u_long)uap->scriptcode,
8983 (u_long)uap->options,
8984 auio,
8985 (struct searchstate *) &state->ss_fsstate,
8986 ctx);
8987
8988 /*
8989 * If it's a union mount we need to be called again
8990 * to search the mounted-on filesystem.
8991 */
8992 if ((vp->v_mount->mnt_flag & MNT_UNION) && fserror == 0) {
8993 state->ss_union_flags = SRCHFS_START;
8994 state->ss_union_layer++; // search next layer down
8995 fserror = EAGAIN;
8996 }
8997
8998 saveandexit:
8999
9000 vnode_put(vp);
9001
9002 /* Now copy out the stuff that needs copying out. That means the number of matches, the
9003 search state. Everything was already put into he return buffer by the vop call. */
9004
9005 if ((error = copyout((caddr_t) state, uap->state, sizeof(struct searchstate))) != 0)
9006 goto freeandexit;
9007
9008 if ((error = suulong(uap->nummatches, (uint64_t)nummatches)) != 0)
9009 goto freeandexit;
9010
9011 error = fserror;
9012
9013 freeandexit:
9014
9015 FREE(searchparams1,M_TEMP);
9016
9017 return(error);
9018
9019
9020 } /* end of searchfs system call */
9021
9022 #else /* CONFIG_SEARCHFS */
9023
9024 int
9025 searchfs(__unused proc_t p, __unused struct searchfs_args *uap, __unused int32_t *retval)
9026 {
9027 return (ENOTSUP);
9028 }
9029
9030 #endif /* CONFIG_SEARCHFS */
9031
9032
9033 lck_grp_attr_t * nspace_group_attr;
9034 lck_attr_t * nspace_lock_attr;
9035 lck_grp_t * nspace_mutex_group;
9036
9037 lck_mtx_t nspace_handler_lock;
9038 lck_mtx_t nspace_handler_exclusion_lock;
9039
9040 time_t snapshot_timestamp=0;
9041 int nspace_allow_virtual_devs=0;
9042
9043 void nspace_handler_init(void);
9044
9045 typedef struct nspace_item_info {
9046 struct vnode *vp;
9047 void *arg;
9048 uint64_t op;
9049 uint32_t vid;
9050 uint32_t flags;
9051 uint32_t token;
9052 uint32_t refcount;
9053 } nspace_item_info;
9054
9055 #define MAX_NSPACE_ITEMS 128
9056 nspace_item_info nspace_items[MAX_NSPACE_ITEMS];
9057 uint32_t nspace_item_idx=0; // also used as the sleep/wakeup rendezvous address
9058 uint32_t nspace_token_id=0;
9059 uint32_t nspace_handler_timeout = 15; // seconds
9060
9061 #define NSPACE_ITEM_NEW 0x0001
9062 #define NSPACE_ITEM_PROCESSING 0x0002
9063 #define NSPACE_ITEM_DEAD 0x0004
9064 #define NSPACE_ITEM_CANCELLED 0x0008
9065 #define NSPACE_ITEM_DONE 0x0010
9066 #define NSPACE_ITEM_RESET_TIMER 0x0020
9067
9068 #define NSPACE_ITEM_NSPACE_EVENT 0x0040
9069 #define NSPACE_ITEM_SNAPSHOT_EVENT 0x0080
9070
9071 #define NSPACE_ITEM_ALL_EVENT_TYPES (NSPACE_ITEM_NSPACE_EVENT | NSPACE_ITEM_SNAPSHOT_EVENT)
9072
9073 //#pragma optimization_level 0
9074
9075 typedef enum {
9076 NSPACE_HANDLER_NSPACE = 0,
9077 NSPACE_HANDLER_SNAPSHOT = 1,
9078
9079 NSPACE_HANDLER_COUNT,
9080 } nspace_type_t;
9081
9082 typedef struct {
9083 uint64_t handler_tid;
9084 struct proc *handler_proc;
9085 int handler_busy;
9086 } nspace_handler_t;
9087
9088 nspace_handler_t nspace_handlers[NSPACE_HANDLER_COUNT];
9089
9090 /* namespace fsctl functions */
9091 static int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type);
9092 static int nspace_item_flags_for_type(nspace_type_t nspace_type);
9093 static int nspace_open_flags_for_type(nspace_type_t nspace_type);
9094 static nspace_type_t nspace_type_for_op(uint64_t op);
9095 static int nspace_is_special_process(struct proc *proc);
9096 static int vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx);
9097 static int wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type);
9098 static int validate_namespace_args (int is64bit, int size);
9099 static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data);
9100
9101
9102 static inline int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type)
9103 {
9104 switch(nspace_type) {
9105 case NSPACE_HANDLER_NSPACE:
9106 return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_NSPACE_EVENT;
9107 case NSPACE_HANDLER_SNAPSHOT:
9108 return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_SNAPSHOT_EVENT;
9109 default:
9110 printf("nspace_flags_matches_handler: invalid type %u\n", (int)nspace_type);
9111 return 0;
9112 }
9113 }
9114
9115 static inline int nspace_item_flags_for_type(nspace_type_t nspace_type)
9116 {
9117 switch(nspace_type) {
9118 case NSPACE_HANDLER_NSPACE:
9119 return NSPACE_ITEM_NSPACE_EVENT;
9120 case NSPACE_HANDLER_SNAPSHOT:
9121 return NSPACE_ITEM_SNAPSHOT_EVENT;
9122 default:
9123 printf("nspace_item_flags_for_type: invalid type %u\n", (int)nspace_type);
9124 return 0;
9125 }
9126 }
9127
9128 static inline int nspace_open_flags_for_type(nspace_type_t nspace_type)
9129 {
9130 switch(nspace_type) {
9131 case NSPACE_HANDLER_NSPACE:
9132 return FREAD | FWRITE | O_EVTONLY;
9133 case NSPACE_HANDLER_SNAPSHOT:
9134 return FREAD | O_EVTONLY;
9135 default:
9136 printf("nspace_open_flags_for_type: invalid type %u\n", (int)nspace_type);
9137 return 0;
9138 }
9139 }
9140
9141 static inline nspace_type_t nspace_type_for_op(uint64_t op)
9142 {
9143 switch(op & NAMESPACE_HANDLER_EVENT_TYPE_MASK) {
9144 case NAMESPACE_HANDLER_NSPACE_EVENT:
9145 return NSPACE_HANDLER_NSPACE;
9146 case NAMESPACE_HANDLER_SNAPSHOT_EVENT:
9147 return NSPACE_HANDLER_SNAPSHOT;
9148 default:
9149 printf("nspace_type_for_op: invalid op mask %llx\n", op & NAMESPACE_HANDLER_EVENT_TYPE_MASK);
9150 return NSPACE_HANDLER_NSPACE;
9151 }
9152 }
9153
9154 static inline int nspace_is_special_process(struct proc *proc)
9155 {
9156 int i;
9157 for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
9158 if (proc == nspace_handlers[i].handler_proc)
9159 return 1;
9160 }
9161 return 0;
9162 }
9163
9164 void
9165 nspace_handler_init(void)
9166 {
9167 nspace_lock_attr = lck_attr_alloc_init();
9168 nspace_group_attr = lck_grp_attr_alloc_init();
9169 nspace_mutex_group = lck_grp_alloc_init("nspace-mutex", nspace_group_attr);
9170 lck_mtx_init(&nspace_handler_lock, nspace_mutex_group, nspace_lock_attr);
9171 lck_mtx_init(&nspace_handler_exclusion_lock, nspace_mutex_group, nspace_lock_attr);
9172 memset(&nspace_items[0], 0, sizeof(nspace_items));
9173 }
9174
9175 void
9176 nspace_proc_exit(struct proc *p)
9177 {
9178 int i, event_mask = 0;
9179
9180 for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
9181 if (p == nspace_handlers[i].handler_proc) {
9182 event_mask |= nspace_item_flags_for_type(i);
9183 nspace_handlers[i].handler_tid = 0;
9184 nspace_handlers[i].handler_proc = NULL;
9185 }
9186 }
9187
9188 if (event_mask == 0) {
9189 return;
9190 }
9191
9192 lck_mtx_lock(&nspace_handler_lock);
9193 if (event_mask & NSPACE_ITEM_SNAPSHOT_EVENT) {
9194 // if this process was the snapshot handler, zero snapshot_timeout
9195 snapshot_timestamp = 0;
9196 }
9197
9198 //
9199 // unblock anyone that's waiting for the handler that died
9200 //
9201 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9202 if (nspace_items[i].flags & (NSPACE_ITEM_NEW | NSPACE_ITEM_PROCESSING)) {
9203
9204 if ( nspace_items[i].flags & event_mask ) {
9205
9206 if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
9207 vnode_lock_spin(nspace_items[i].vp);
9208 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
9209 vnode_unlock(nspace_items[i].vp);
9210 }
9211 nspace_items[i].vp = NULL;
9212 nspace_items[i].vid = 0;
9213 nspace_items[i].flags = NSPACE_ITEM_DONE;
9214 nspace_items[i].token = 0;
9215
9216 wakeup((caddr_t)&(nspace_items[i].vp));
9217 }
9218 }
9219 }
9220
9221 wakeup((caddr_t)&nspace_item_idx);
9222 lck_mtx_unlock(&nspace_handler_lock);
9223 }
9224
9225
9226 int
9227 resolve_nspace_item(struct vnode *vp, uint64_t op)
9228 {
9229 return resolve_nspace_item_ext(vp, op, NULL);
9230 }
9231
9232 int
9233 resolve_nspace_item_ext(struct vnode *vp, uint64_t op, void *arg)
9234 {
9235 int i, error, keep_waiting;
9236 struct timespec ts;
9237 nspace_type_t nspace_type = nspace_type_for_op(op);
9238
9239 // only allow namespace events on regular files, directories and symlinks.
9240 if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
9241 return 0;
9242 }
9243
9244 //
9245 // if this is a snapshot event and the vnode is on a
9246 // disk image just pretend nothing happened since any
9247 // change to the disk image will cause the disk image
9248 // itself to get backed up and this avoids multi-way
9249 // deadlocks between the snapshot handler and the ever
9250 // popular diskimages-helper process. the variable
9251 // nspace_allow_virtual_devs allows this behavior to
9252 // be overridden (for use by the Mobile TimeMachine
9253 // testing infrastructure which uses disk images)
9254 //
9255 if ( (op & NAMESPACE_HANDLER_SNAPSHOT_EVENT)
9256 && (vp->v_mount != NULL)
9257 && (vp->v_mount->mnt_kern_flag & MNTK_VIRTUALDEV)
9258 && !nspace_allow_virtual_devs) {
9259
9260 return 0;
9261 }
9262
9263 // if (thread_tid(current_thread()) == namespace_handler_tid) {
9264 if (nspace_handlers[nspace_type].handler_proc == NULL) {
9265 return 0;
9266 }
9267
9268 if (nspace_is_special_process(current_proc())) {
9269 return EDEADLK;
9270 }
9271
9272 lck_mtx_lock(&nspace_handler_lock);
9273
9274 retry:
9275 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9276 if (vp == nspace_items[i].vp && op == nspace_items[i].op) {
9277 break;
9278 }
9279 }
9280
9281 if (i >= MAX_NSPACE_ITEMS) {
9282 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9283 if (nspace_items[i].flags == 0) {
9284 break;
9285 }
9286 }
9287 } else {
9288 nspace_items[i].refcount++;
9289 }
9290
9291 if (i >= MAX_NSPACE_ITEMS) {
9292 ts.tv_sec = nspace_handler_timeout;
9293 ts.tv_nsec = 0;
9294
9295 error = msleep((caddr_t)&nspace_token_id, &nspace_handler_lock, PVFS|PCATCH, "nspace-no-space", &ts);
9296 if (error == 0) {
9297 // an entry got free'd up, go see if we can get a slot
9298 goto retry;
9299 } else {
9300 lck_mtx_unlock(&nspace_handler_lock);
9301 return error;
9302 }
9303 }
9304
9305 //
9306 // if it didn't already exist, add it. if it did exist
9307 // we'll get woken up when someone does a wakeup() on
9308 // the slot in the nspace_items table.
9309 //
9310 if (vp != nspace_items[i].vp) {
9311 nspace_items[i].vp = vp;
9312 nspace_items[i].arg = (arg == NSPACE_REARM_NO_ARG) ? NULL : arg; // arg is {NULL, true, uio *} - only pass uio thru to the user
9313 nspace_items[i].op = op;
9314 nspace_items[i].vid = vnode_vid(vp);
9315 nspace_items[i].flags = NSPACE_ITEM_NEW;
9316 nspace_items[i].flags |= nspace_item_flags_for_type(nspace_type);
9317 if (nspace_items[i].flags & NSPACE_ITEM_SNAPSHOT_EVENT) {
9318 if (arg) {
9319 vnode_lock_spin(vp);
9320 vp->v_flag |= VNEEDSSNAPSHOT;
9321 vnode_unlock(vp);
9322 }
9323 }
9324
9325 nspace_items[i].token = 0;
9326 nspace_items[i].refcount = 1;
9327
9328 wakeup((caddr_t)&nspace_item_idx);
9329 }
9330
9331 //
9332 // Now go to sleep until the handler does a wakeup on this
9333 // slot in the nspace_items table (or we timeout).
9334 //
9335 keep_waiting = 1;
9336 while(keep_waiting) {
9337 ts.tv_sec = nspace_handler_timeout;
9338 ts.tv_nsec = 0;
9339 error = msleep((caddr_t)&(nspace_items[i].vp), &nspace_handler_lock, PVFS|PCATCH, "namespace-done", &ts);
9340
9341 if (nspace_items[i].flags & NSPACE_ITEM_DONE) {
9342 error = 0;
9343 } else if (nspace_items[i].flags & NSPACE_ITEM_CANCELLED) {
9344 error = nspace_items[i].token;
9345 } else if (error == EWOULDBLOCK || error == ETIMEDOUT) {
9346 if (nspace_items[i].flags & NSPACE_ITEM_RESET_TIMER) {
9347 nspace_items[i].flags &= ~NSPACE_ITEM_RESET_TIMER;
9348 continue;
9349 } else {
9350 error = ETIMEDOUT;
9351 }
9352 } else if (error == 0) {
9353 // hmmm, why did we get woken up?
9354 printf("woken up for token %d but it's not done, cancelled or timedout and error == 0.\n",
9355 nspace_items[i].token);
9356 }
9357
9358 if (--nspace_items[i].refcount == 0) {
9359 nspace_items[i].vp = NULL; // clear this so that no one will match on it again
9360 nspace_items[i].arg = NULL;
9361 nspace_items[i].token = 0; // clear this so that the handler will not find it anymore
9362 nspace_items[i].flags = 0; // this clears it for re-use
9363 }
9364 wakeup(&nspace_token_id);
9365 keep_waiting = 0;
9366 }
9367
9368 lck_mtx_unlock(&nspace_handler_lock);
9369
9370 return error;
9371 }
9372
9373 int nspace_snapshot_event(vnode_t vp, time_t ctime, uint64_t op_type, void *arg)
9374 {
9375 int snapshot_error = 0;
9376
9377 if (vp == NULL) {
9378 return 0;
9379 }
9380
9381 /* Swap files are special; skip them */
9382 if (vnode_isswap(vp)) {
9383 return 0;
9384 }
9385
9386 if (ctime != 0 && snapshot_timestamp != 0 && (ctime <= snapshot_timestamp || vnode_needssnapshots(vp))) {
9387 // the change time is within this epoch
9388 int error;
9389
9390 error = resolve_nspace_item_ext(vp, op_type | NAMESPACE_HANDLER_SNAPSHOT_EVENT, arg);
9391 if (error == EDEADLK) {
9392 snapshot_error = 0;
9393 } else if (error) {
9394 if (error == EAGAIN) {
9395 printf("nspace_snapshot_event: timed out waiting for namespace handler...\n");
9396 } else if (error == EINTR) {
9397 // printf("nspace_snapshot_event: got a signal while waiting for namespace handler...\n");
9398 snapshot_error = EINTR;
9399 }
9400 }
9401 }
9402
9403 return snapshot_error;
9404 }
9405
9406 int
9407 get_nspace_item_status(struct vnode *vp, int32_t *status)
9408 {
9409 int i;
9410
9411 lck_mtx_lock(&nspace_handler_lock);
9412 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9413 if (nspace_items[i].vp == vp) {
9414 break;
9415 }
9416 }
9417
9418 if (i >= MAX_NSPACE_ITEMS) {
9419 lck_mtx_unlock(&nspace_handler_lock);
9420 return ENOENT;
9421 }
9422
9423 *status = nspace_items[i].flags;
9424 lck_mtx_unlock(&nspace_handler_lock);
9425 return 0;
9426 }
9427
9428
9429 #if 0
9430 static int
9431 build_volfs_path(struct vnode *vp, char *path, int *len)
9432 {
9433 struct vnode_attr va;
9434 int ret;
9435
9436 VATTR_INIT(&va);
9437 VATTR_WANTED(&va, va_fsid);
9438 VATTR_WANTED(&va, va_fileid);
9439
9440 if (vnode_getattr(vp, &va, vfs_context_kernel()) != 0) {
9441 *len = snprintf(path, *len, "/non/existent/path/because/vnode_getattr/failed") + 1;
9442 ret = -1;
9443 } else {
9444 *len = snprintf(path, *len, "/.vol/%d/%lld", (dev_t)va.va_fsid, va.va_fileid) + 1;
9445 ret = 0;
9446 }
9447
9448 return ret;
9449 }
9450 #endif
9451
9452 //
9453 // Note: this function does NOT check permissions on all of the
9454 // parent directories leading to this vnode. It should only be
9455 // called on behalf of a root process. Otherwise a process may
9456 // get access to a file because the file itself is readable even
9457 // though its parent directories would prevent access.
9458 //
9459 static int
9460 vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx)
9461 {
9462 int error, action;
9463
9464 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
9465 return error;
9466 }
9467
9468 #if CONFIG_MACF
9469 error = mac_vnode_check_open(ctx, vp, fmode);
9470 if (error)
9471 return error;
9472 #endif
9473
9474 /* compute action to be authorized */
9475 action = 0;
9476 if (fmode & FREAD) {
9477 action |= KAUTH_VNODE_READ_DATA;
9478 }
9479 if (fmode & (FWRITE | O_TRUNC)) {
9480 /*
9481 * If we are writing, appending, and not truncating,
9482 * indicate that we are appending so that if the
9483 * UF_APPEND or SF_APPEND bits are set, we do not deny
9484 * the open.
9485 */
9486 if ((fmode & O_APPEND) && !(fmode & O_TRUNC)) {
9487 action |= KAUTH_VNODE_APPEND_DATA;
9488 } else {
9489 action |= KAUTH_VNODE_WRITE_DATA;
9490 }
9491 }
9492
9493 if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)
9494 return error;
9495
9496
9497 //
9498 // if the vnode is tagged VOPENEVT and the current process
9499 // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY
9500 // flag to the open mode so that this open won't count against
9501 // the vnode when carbon delete() does a vnode_isinuse() to see
9502 // if a file is currently in use. this allows spotlight
9503 // importers to not interfere with carbon apps that depend on
9504 // the no-delete-if-busy semantics of carbon delete().
9505 //
9506 if ((vp->v_flag & VOPENEVT) && (current_proc()->p_flag & P_CHECKOPENEVT)) {
9507 fmode |= O_EVTONLY;
9508 }
9509
9510 if ( (error = VNOP_OPEN(vp, fmode, ctx)) ) {
9511 return error;
9512 }
9513 if ( (error = vnode_ref_ext(vp, fmode, 0)) ) {
9514 VNOP_CLOSE(vp, fmode, ctx);
9515 return error;
9516 }
9517
9518 /* Call out to allow 3rd party notification of open.
9519 * Ignore result of kauth_authorize_fileop call.
9520 */
9521 #if CONFIG_MACF
9522 mac_vnode_notify_open(ctx, vp, fmode);
9523 #endif
9524 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN,
9525 (uintptr_t)vp, 0);
9526
9527
9528 return 0;
9529 }
9530
9531 static int
9532 wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type)
9533 {
9534 int i;
9535 int error = 0;
9536 int unblock = 0;
9537 task_t curtask;
9538
9539 lck_mtx_lock(&nspace_handler_exclusion_lock);
9540 if (nspace_handlers[nspace_type].handler_busy) {
9541 lck_mtx_unlock(&nspace_handler_exclusion_lock);
9542 return EBUSY;
9543 }
9544
9545 nspace_handlers[nspace_type].handler_busy = 1;
9546 lck_mtx_unlock(&nspace_handler_exclusion_lock);
9547
9548 /*
9549 * Any process that gets here will be one of the namespace handlers.
9550 * As such, they should be prevented from acquiring DMG vnodes during vnode reclamation
9551 * as we can cause deadlocks to occur, because the namespace handler may prevent
9552 * VNOP_INACTIVE from proceeding. Mark the current task as a P_DEPENDENCY_CAPABLE
9553 * process.
9554 */
9555 curtask = current_task();
9556 bsd_set_dependency_capable (curtask);
9557
9558 lck_mtx_lock(&nspace_handler_lock);
9559 if (nspace_handlers[nspace_type].handler_proc == NULL) {
9560 nspace_handlers[nspace_type].handler_tid = thread_tid(current_thread());
9561 nspace_handlers[nspace_type].handler_proc = current_proc();
9562 }
9563
9564 if (nspace_type == NSPACE_HANDLER_SNAPSHOT &&
9565 (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
9566 error = EINVAL;
9567 }
9568
9569 while (error == 0) {
9570
9571 /* Try to find matching namespace item */
9572 for (i = 0; i < MAX_NSPACE_ITEMS; i++) {
9573 if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
9574 if (nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
9575 break;
9576 }
9577 }
9578 }
9579
9580 if (i >= MAX_NSPACE_ITEMS) {
9581 /* Nothing is there yet. Wait for wake up and retry */
9582 error = msleep((caddr_t)&nspace_item_idx, &nspace_handler_lock, PVFS|PCATCH, "namespace-items", 0);
9583 if ((nspace_type == NSPACE_HANDLER_SNAPSHOT) && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
9584 /* Prevent infinite loop if snapshot handler exited */
9585 error = EINVAL;
9586 break;
9587 }
9588 continue;
9589 }
9590
9591 nspace_items[i].flags &= ~NSPACE_ITEM_NEW;
9592 nspace_items[i].flags |= NSPACE_ITEM_PROCESSING;
9593 nspace_items[i].token = ++nspace_token_id;
9594
9595 assert(nspace_items[i].vp);
9596 struct fileproc *fp;
9597 int32_t indx;
9598 int32_t fmode;
9599 struct proc *p = current_proc();
9600 vfs_context_t ctx = vfs_context_current();
9601 struct vnode_attr va;
9602 bool vn_get_succsessful = false;
9603 bool vn_open_successful = false;
9604 bool fp_alloc_successful = false;
9605
9606 /*
9607 * Use vnode pointer to acquire a file descriptor for
9608 * hand-off to userland
9609 */
9610 fmode = nspace_open_flags_for_type(nspace_type);
9611 error = vnode_getwithvid(nspace_items[i].vp, nspace_items[i].vid);
9612 if (error) goto cleanup;
9613 vn_get_succsessful = true;
9614
9615 error = vn_open_with_vp(nspace_items[i].vp, fmode, ctx);
9616 if (error) goto cleanup;
9617 vn_open_successful = true;
9618
9619 error = falloc(p, &fp, &indx, ctx);
9620 if (error) goto cleanup;
9621 fp_alloc_successful = true;
9622
9623 fp->f_fglob->fg_flag = fmode;
9624 fp->f_fglob->fg_ops = &vnops;
9625 fp->f_fglob->fg_data = (caddr_t)nspace_items[i].vp;
9626
9627 proc_fdlock(p);
9628 procfdtbl_releasefd(p, indx, NULL);
9629 fp_drop(p, indx, fp, 1);
9630 proc_fdunlock(p);
9631
9632 /*
9633 * All variants of the namespace handler struct support these three fields:
9634 * token, flags, and the FD pointer
9635 */
9636 error = copyout(&nspace_items[i].token, nhd->token, sizeof(uint32_t));
9637 if (error) goto cleanup;
9638 error = copyout(&nspace_items[i].op, nhd->flags, sizeof(uint64_t));
9639 if (error) goto cleanup;
9640 error = copyout(&indx, nhd->fdptr, sizeof(uint32_t));
9641 if (error) goto cleanup;
9642
9643 /*
9644 * Handle optional fields:
9645 * extended version support an info ptr (offset, length), and the
9646 *
9647 * namedata version supports a unique per-link object ID
9648 *
9649 */
9650 if (nhd->infoptr) {
9651 uio_t uio = (uio_t)nspace_items[i].arg;
9652 uint64_t u_offset, u_length;
9653
9654 if (uio) {
9655 u_offset = uio_offset(uio);
9656 u_length = uio_resid(uio);
9657 } else {
9658 u_offset = 0;
9659 u_length = 0;
9660 }
9661 error = copyout(&u_offset, nhd->infoptr, sizeof(uint64_t));
9662 if (error) goto cleanup;
9663 error = copyout(&u_length, nhd->infoptr + sizeof(uint64_t), sizeof(uint64_t));
9664 if (error) goto cleanup;
9665 }
9666
9667 if (nhd->objid) {
9668 VATTR_INIT(&va);
9669 VATTR_WANTED(&va, va_linkid);
9670 error = vnode_getattr(nspace_items[i].vp, &va, ctx);
9671 if (error) goto cleanup;
9672
9673 uint64_t linkid = 0;
9674 if (VATTR_IS_SUPPORTED (&va, va_linkid)) {
9675 linkid = (uint64_t)va.va_linkid;
9676 }
9677 error = copyout(&linkid, nhd->objid, sizeof(uint64_t));
9678 }
9679 cleanup:
9680 if (error) {
9681 if (fp_alloc_successful) fp_free(p, indx, fp);
9682 if (vn_open_successful) vn_close(nspace_items[i].vp, fmode, ctx);
9683 unblock = 1;
9684 }
9685
9686 if (vn_get_succsessful) vnode_put(nspace_items[i].vp);
9687
9688 break;
9689 }
9690
9691 if (unblock) {
9692 if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
9693 vnode_lock_spin(nspace_items[i].vp);
9694 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
9695 vnode_unlock(nspace_items[i].vp);
9696 }
9697 nspace_items[i].vp = NULL;
9698 nspace_items[i].vid = 0;
9699 nspace_items[i].flags = NSPACE_ITEM_DONE;
9700 nspace_items[i].token = 0;
9701
9702 wakeup((caddr_t)&(nspace_items[i].vp));
9703 }
9704
9705 if (nspace_type == NSPACE_HANDLER_SNAPSHOT) {
9706 // just go through every snapshot event and unblock it immediately.
9707 if (error && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
9708 for(i = 0; i < MAX_NSPACE_ITEMS; i++) {
9709 if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
9710 if (nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
9711 nspace_items[i].vp = NULL;
9712 nspace_items[i].vid = 0;
9713 nspace_items[i].flags = NSPACE_ITEM_DONE;
9714 nspace_items[i].token = 0;
9715
9716 wakeup((caddr_t)&(nspace_items[i].vp));
9717 }
9718 }
9719 }
9720 }
9721 }
9722
9723 lck_mtx_unlock(&nspace_handler_lock);
9724
9725 lck_mtx_lock(&nspace_handler_exclusion_lock);
9726 nspace_handlers[nspace_type].handler_busy = 0;
9727 lck_mtx_unlock(&nspace_handler_exclusion_lock);
9728
9729 return error;
9730 }
9731
9732 static inline int validate_namespace_args (int is64bit, int size) {
9733
9734 if (is64bit) {
9735 /* Must be one of these */
9736 if (size == sizeof(user64_namespace_handler_info)) {
9737 goto sizeok;
9738 }
9739 if (size == sizeof(user64_namespace_handler_info_ext)) {
9740 goto sizeok;
9741 }
9742 if (size == sizeof(user64_namespace_handler_data)) {
9743 goto sizeok;
9744 }
9745 return EINVAL;
9746 }
9747 else {
9748 /* 32 bit -- must be one of these */
9749 if (size == sizeof(user32_namespace_handler_info)) {
9750 goto sizeok;
9751 }
9752 if (size == sizeof(user32_namespace_handler_info_ext)) {
9753 goto sizeok;
9754 }
9755 if (size == sizeof(user32_namespace_handler_data)) {
9756 goto sizeok;
9757 }
9758 return EINVAL;
9759 }
9760
9761 sizeok:
9762
9763 return 0;
9764
9765 }
9766
9767 static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data)
9768 {
9769 int error = 0;
9770 namespace_handler_data nhd;
9771
9772 bzero (&nhd, sizeof(namespace_handler_data));
9773
9774 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
9775 return error;
9776 }
9777
9778 error = validate_namespace_args (is64bit, size);
9779 if (error) {
9780 return error;
9781 }
9782
9783 /* Copy in the userland pointers into our kernel-only struct */
9784
9785 if (is64bit) {
9786 /* 64 bit userland structures */
9787 nhd.token = (user_addr_t)((user64_namespace_handler_info *)data)->token;
9788 nhd.flags = (user_addr_t)((user64_namespace_handler_info *)data)->flags;
9789 nhd.fdptr = (user_addr_t)((user64_namespace_handler_info *)data)->fdptr;
9790
9791 /* If the size is greater than the standard info struct, add in extra fields */
9792 if (size > (sizeof(user64_namespace_handler_info))) {
9793 if (size >= (sizeof(user64_namespace_handler_info_ext))) {
9794 nhd.infoptr = (user_addr_t)((user64_namespace_handler_info_ext *)data)->infoptr;
9795 }
9796 if (size == (sizeof(user64_namespace_handler_data))) {
9797 nhd.objid = (user_addr_t)((user64_namespace_handler_data*)data)->objid;
9798 }
9799 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
9800 }
9801 }
9802 else {
9803 /* 32 bit userland structures */
9804 nhd.token = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->token);
9805 nhd.flags = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->flags);
9806 nhd.fdptr = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->fdptr);
9807
9808 if (size > (sizeof(user32_namespace_handler_info))) {
9809 if (size >= (sizeof(user32_namespace_handler_info_ext))) {
9810 nhd.infoptr = CAST_USER_ADDR_T(((user32_namespace_handler_info_ext *)data)->infoptr);
9811 }
9812 if (size == (sizeof(user32_namespace_handler_data))) {
9813 nhd.objid = (user_addr_t)((user32_namespace_handler_data*)data)->objid;
9814 }
9815 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
9816 }
9817 }
9818
9819 return wait_for_namespace_event(&nhd, nspace_type);
9820 }
9821
9822 /*
9823 * Make a filesystem-specific control call:
9824 */
9825 /* ARGSUSED */
9826 static int
9827 fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long options, vfs_context_t ctx)
9828 {
9829 int error=0;
9830 boolean_t is64bit;
9831 u_int size;
9832 #define STK_PARAMS 128
9833 char stkbuf[STK_PARAMS] = {0};
9834 caddr_t data, memp;
9835 vnode_t vp = *arg_vp;
9836
9837 size = IOCPARM_LEN(cmd);
9838 if (size > IOCPARM_MAX) return (EINVAL);
9839
9840 is64bit = proc_is64bit(p);
9841
9842 memp = NULL;
9843
9844
9845 /*
9846 * ensure the buffer is large enough for underlying calls
9847 */
9848 #ifndef HFSIOC_GETPATH
9849 typedef char pn_t[MAXPATHLEN];
9850 #define HFSIOC_GETPATH _IOWR('h', 13, pn_t)
9851 #endif
9852
9853 #ifndef HFS_GETPATH
9854 #define HFS_GETPATH IOCBASECMD(HFSIOC_GETPATH)
9855 #endif
9856 if (IOCBASECMD(cmd) == HFS_GETPATH) {
9857 /* Round up to MAXPATHLEN regardless of user input */
9858 size = MAXPATHLEN;
9859 }
9860
9861 if (size > sizeof (stkbuf)) {
9862 if ((memp = (caddr_t)kalloc(size)) == 0) return ENOMEM;
9863 data = memp;
9864 } else {
9865 data = &stkbuf[0];
9866 };
9867
9868 if (cmd & IOC_IN) {
9869 if (size) {
9870 error = copyin(udata, data, size);
9871 if (error) {
9872 if (memp) {
9873 kfree (memp, size);
9874 }
9875 return error;
9876 }
9877 } else {
9878 if (is64bit) {
9879 *(user_addr_t *)data = udata;
9880 }
9881 else {
9882 *(uint32_t *)data = (uint32_t)udata;
9883 }
9884 };
9885 } else if ((cmd & IOC_OUT) && size) {
9886 /*
9887 * Zero the buffer so the user always
9888 * gets back something deterministic.
9889 */
9890 bzero(data, size);
9891 } else if (cmd & IOC_VOID) {
9892 if (is64bit) {
9893 *(user_addr_t *)data = udata;
9894 }
9895 else {
9896 *(uint32_t *)data = (uint32_t)udata;
9897 }
9898 }
9899
9900 /* Check to see if it's a generic command */
9901 switch (IOCBASECMD(cmd)) {
9902
9903 case FSCTL_SYNC_VOLUME: {
9904 mount_t mp = vp->v_mount;
9905 int arg = *(uint32_t*)data;
9906
9907 /* record vid of vp so we can drop it below. */
9908 uint32_t vvid = vp->v_id;
9909
9910 /*
9911 * Then grab mount_iterref so that we can release the vnode.
9912 * Without this, a thread may call vnode_iterate_prepare then
9913 * get into a deadlock because we've never released the root vp
9914 */
9915 error = mount_iterref (mp, 0);
9916 if (error) {
9917 break;
9918 }
9919 vnode_put(vp);
9920
9921 /* issue the sync for this volume */
9922 (void)sync_callback(mp, (arg & FSCTL_SYNC_WAIT) ? &arg : NULL);
9923
9924 /*
9925 * Then release the mount_iterref once we're done syncing; it's not
9926 * needed for the VNOP_IOCTL below
9927 */
9928 mount_iterdrop(mp);
9929
9930 if (arg & FSCTL_SYNC_FULLSYNC) {
9931 /* re-obtain vnode iocount on the root vp, if possible */
9932 error = vnode_getwithvid (vp, vvid);
9933 if (error == 0) {
9934 error = VNOP_IOCTL(vp, F_FULLFSYNC, (caddr_t)NULL, 0, ctx);
9935 vnode_put (vp);
9936 }
9937 }
9938 /* mark the argument VP as having been released */
9939 *arg_vp = NULL;
9940 }
9941 break;
9942
9943 case FSCTL_ROUTEFS_SETROUTEID: {
9944 #if ROUTEFS
9945 char routepath[MAXPATHLEN];
9946 size_t len = 0;
9947
9948 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
9949 break;
9950 }
9951 bzero(routepath, MAXPATHLEN);
9952 error = copyinstr(udata, &routepath[0], MAXPATHLEN, &len);
9953 if (error) {
9954 break;
9955 }
9956 error = routefs_kernel_mount(routepath);
9957 if (error) {
9958 break;
9959 }
9960 #endif
9961 }
9962 break;
9963
9964 case FSCTL_SET_PACKAGE_EXTS: {
9965 user_addr_t ext_strings;
9966 uint32_t num_entries;
9967 uint32_t max_width;
9968
9969 if ((error = priv_check_cred(kauth_cred_get(), PRIV_PACKAGE_EXTENSIONS, 0)))
9970 break;
9971
9972 if ( (is64bit && size != sizeof(user64_package_ext_info))
9973 || (is64bit == 0 && size != sizeof(user32_package_ext_info))) {
9974
9975 // either you're 64-bit and passed a 64-bit struct or
9976 // you're 32-bit and passed a 32-bit struct. otherwise
9977 // it's not ok.
9978 error = EINVAL;
9979 break;
9980 }
9981
9982 if (is64bit) {
9983 ext_strings = ((user64_package_ext_info *)data)->strings;
9984 num_entries = ((user64_package_ext_info *)data)->num_entries;
9985 max_width = ((user64_package_ext_info *)data)->max_width;
9986 } else {
9987 ext_strings = CAST_USER_ADDR_T(((user32_package_ext_info *)data)->strings);
9988 num_entries = ((user32_package_ext_info *)data)->num_entries;
9989 max_width = ((user32_package_ext_info *)data)->max_width;
9990 }
9991 error = set_package_extensions_table(ext_strings, num_entries, max_width);
9992 }
9993 break;
9994
9995 /* namespace handlers */
9996 case FSCTL_NAMESPACE_HANDLER_GET: {
9997 error = process_namespace_fsctl(NSPACE_HANDLER_NSPACE, is64bit, size, data);
9998 }
9999 break;
10000
10001 /* Snapshot handlers */
10002 case FSCTL_OLD_SNAPSHOT_HANDLER_GET: {
10003 error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
10004 }
10005 break;
10006
10007 case FSCTL_SNAPSHOT_HANDLER_GET_EXT: {
10008 error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
10009 }
10010 break;
10011
10012 case FSCTL_NAMESPACE_HANDLER_UPDATE: {
10013 uint32_t token, val;
10014 int i;
10015
10016 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
10017 break;
10018 }
10019
10020 if (!nspace_is_special_process(p)) {
10021 error = EINVAL;
10022 break;
10023 }
10024
10025 token = ((uint32_t *)data)[0];
10026 val = ((uint32_t *)data)[1];
10027
10028 lck_mtx_lock(&nspace_handler_lock);
10029
10030 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
10031 if (nspace_items[i].token == token) {
10032 break; /* exit for loop, not case stmt */
10033 }
10034 }
10035
10036 if (i >= MAX_NSPACE_ITEMS) {
10037 error = ENOENT;
10038 } else {
10039 //
10040 // if this bit is set, when resolve_nspace_item() times out
10041 // it will loop and go back to sleep.
10042 //
10043 nspace_items[i].flags |= NSPACE_ITEM_RESET_TIMER;
10044 }
10045
10046 lck_mtx_unlock(&nspace_handler_lock);
10047
10048 if (error) {
10049 printf("nspace-handler-update: did not find token %u\n", token);
10050 }
10051 }
10052 break;
10053
10054 case FSCTL_NAMESPACE_HANDLER_UNBLOCK: {
10055 uint32_t token, val;
10056 int i;
10057
10058 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
10059 break;
10060 }
10061
10062 if (!nspace_is_special_process(p)) {
10063 error = EINVAL;
10064 break;
10065 }
10066
10067 token = ((uint32_t *)data)[0];
10068 val = ((uint32_t *)data)[1];
10069
10070 lck_mtx_lock(&nspace_handler_lock);
10071
10072 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
10073 if (nspace_items[i].token == token) {
10074 break; /* exit for loop, not case statement */
10075 }
10076 }
10077
10078 if (i >= MAX_NSPACE_ITEMS) {
10079 printf("nspace-handler-unblock: did not find token %u\n", token);
10080 error = ENOENT;
10081 } else {
10082 if (val == 0 && nspace_items[i].vp) {
10083 vnode_lock_spin(nspace_items[i].vp);
10084 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
10085 vnode_unlock(nspace_items[i].vp);
10086 }
10087
10088 nspace_items[i].vp = NULL;
10089 nspace_items[i].arg = NULL;
10090 nspace_items[i].op = 0;
10091 nspace_items[i].vid = 0;
10092 nspace_items[i].flags = NSPACE_ITEM_DONE;
10093 nspace_items[i].token = 0;
10094
10095 wakeup((caddr_t)&(nspace_items[i].vp));
10096 }
10097
10098 lck_mtx_unlock(&nspace_handler_lock);
10099 }
10100 break;
10101
10102 case FSCTL_NAMESPACE_HANDLER_CANCEL: {
10103 uint32_t token, val;
10104 int i;
10105
10106 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
10107 break;
10108 }
10109
10110 if (!nspace_is_special_process(p)) {
10111 error = EINVAL;
10112 break;
10113 }
10114
10115 token = ((uint32_t *)data)[0];
10116 val = ((uint32_t *)data)[1];
10117
10118 lck_mtx_lock(&nspace_handler_lock);
10119
10120 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
10121 if (nspace_items[i].token == token) {
10122 break; /* exit for loop, not case stmt */
10123 }
10124 }
10125
10126 if (i >= MAX_NSPACE_ITEMS) {
10127 printf("nspace-handler-cancel: did not find token %u\n", token);
10128 error = ENOENT;
10129 } else {
10130 if (nspace_items[i].vp) {
10131 vnode_lock_spin(nspace_items[i].vp);
10132 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
10133 vnode_unlock(nspace_items[i].vp);
10134 }
10135
10136 nspace_items[i].vp = NULL;
10137 nspace_items[i].arg = NULL;
10138 nspace_items[i].vid = 0;
10139 nspace_items[i].token = val;
10140 nspace_items[i].flags &= ~NSPACE_ITEM_PROCESSING;
10141 nspace_items[i].flags |= NSPACE_ITEM_CANCELLED;
10142
10143 wakeup((caddr_t)&(nspace_items[i].vp));
10144 }
10145
10146 lck_mtx_unlock(&nspace_handler_lock);
10147 }
10148 break;
10149
10150 case FSCTL_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME: {
10151 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
10152 break;
10153 }
10154
10155 // we explicitly do not do the namespace_handler_proc check here
10156
10157 lck_mtx_lock(&nspace_handler_lock);
10158 snapshot_timestamp = ((uint32_t *)data)[0];
10159 wakeup(&nspace_item_idx);
10160 lck_mtx_unlock(&nspace_handler_lock);
10161 printf("nspace-handler-set-snapshot-time: %d\n", (int)snapshot_timestamp);
10162
10163 }
10164 break;
10165
10166 case FSCTL_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS:
10167 {
10168 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
10169 break;
10170 }
10171
10172 lck_mtx_lock(&nspace_handler_lock);
10173 nspace_allow_virtual_devs = ((uint32_t *)data)[0];
10174 lck_mtx_unlock(&nspace_handler_lock);
10175 printf("nspace-snapshot-handler will%s allow events on disk-images\n",
10176 nspace_allow_virtual_devs ? "" : " NOT");
10177 error = 0;
10178
10179 }
10180 break;
10181
10182 case FSCTL_SET_FSTYPENAME_OVERRIDE:
10183 {
10184 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
10185 break;
10186 }
10187 if (vp->v_mount) {
10188 mount_lock(vp->v_mount);
10189 if (data[0] != 0) {
10190 strlcpy(&vp->v_mount->fstypename_override[0], data, MFSTYPENAMELEN);
10191 vp->v_mount->mnt_kern_flag |= MNTK_TYPENAME_OVERRIDE;
10192 if (vfs_isrdonly(vp->v_mount) && strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
10193 vp->v_mount->mnt_kern_flag |= MNTK_EXTENDED_SECURITY;
10194 vp->v_mount->mnt_kern_flag &= ~MNTK_AUTH_OPAQUE;
10195 }
10196 } else {
10197 if (strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
10198 vp->v_mount->mnt_kern_flag &= ~MNTK_EXTENDED_SECURITY;
10199 }
10200 vp->v_mount->mnt_kern_flag &= ~MNTK_TYPENAME_OVERRIDE;
10201 vp->v_mount->fstypename_override[0] = '\0';
10202 }
10203 mount_unlock(vp->v_mount);
10204 }
10205 }
10206 break;
10207
10208 default: {
10209 /* Invoke the filesystem-specific code */
10210 error = VNOP_IOCTL(vp, IOCBASECMD(cmd), data, options, ctx);
10211 }
10212
10213 } /* end switch stmt */
10214
10215 /*
10216 * if no errors, copy any data to user. Size was
10217 * already set and checked above.
10218 */
10219 if (error == 0 && (cmd & IOC_OUT) && size)
10220 error = copyout(data, udata, size);
10221
10222 if (memp) {
10223 kfree(memp, size);
10224 }
10225
10226 return error;
10227 }
10228
10229 /* ARGSUSED */
10230 int
10231 fsctl (proc_t p, struct fsctl_args *uap, __unused int32_t *retval)
10232 {
10233 int error;
10234 struct nameidata nd;
10235 u_long nameiflags;
10236 vnode_t vp = NULL;
10237 vfs_context_t ctx = vfs_context_current();
10238
10239 AUDIT_ARG(cmd, uap->cmd);
10240 AUDIT_ARG(value32, uap->options);
10241 /* Get the vnode for the file we are getting info on: */
10242 nameiflags = 0;
10243 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
10244 NDINIT(&nd, LOOKUP, OP_FSCTL, nameiflags | AUDITVNPATH1,
10245 UIO_USERSPACE, uap->path, ctx);
10246 if ((error = namei(&nd))) goto done;
10247 vp = nd.ni_vp;
10248 nameidone(&nd);
10249
10250 #if CONFIG_MACF
10251 error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd);
10252 if (error) {
10253 goto done;
10254 }
10255 #endif
10256
10257 error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
10258
10259 done:
10260 if (vp)
10261 vnode_put(vp);
10262 return error;
10263 }
10264 /* ARGSUSED */
10265 int
10266 ffsctl (proc_t p, struct ffsctl_args *uap, __unused int32_t *retval)
10267 {
10268 int error;
10269 vnode_t vp = NULL;
10270 vfs_context_t ctx = vfs_context_current();
10271 int fd = -1;
10272
10273 AUDIT_ARG(fd, uap->fd);
10274 AUDIT_ARG(cmd, uap->cmd);
10275 AUDIT_ARG(value32, uap->options);
10276
10277 /* Get the vnode for the file we are getting info on: */
10278 if ((error = file_vnode(uap->fd, &vp)))
10279 return error;
10280 fd = uap->fd;
10281 if ((error = vnode_getwithref(vp))) {
10282 file_drop(fd);
10283 return error;
10284 }
10285
10286 #if CONFIG_MACF
10287 if ((error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd))) {
10288 file_drop(fd);
10289 vnode_put(vp);
10290 return error;
10291 }
10292 #endif
10293
10294 error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
10295
10296 file_drop(fd);
10297
10298 /*validate vp; fsctl_internal() can drop iocount and reset vp to NULL*/
10299 if (vp) {
10300 vnode_put(vp);
10301 }
10302
10303 return error;
10304 }
10305 /* end of fsctl system call */
10306
10307 /*
10308 * Retrieve the data of an extended attribute.
10309 */
10310 int
10311 getxattr(proc_t p, struct getxattr_args *uap, user_ssize_t *retval)
10312 {
10313 vnode_t vp;
10314 struct nameidata nd;
10315 char attrname[XATTR_MAXNAMELEN+1];
10316 vfs_context_t ctx = vfs_context_current();
10317 uio_t auio = NULL;
10318 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10319 size_t attrsize = 0;
10320 size_t namelen;
10321 u_int32_t nameiflags;
10322 int error;
10323 char uio_buf[ UIO_SIZEOF(1) ];
10324
10325 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
10326 return (EINVAL);
10327
10328 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
10329 NDINIT(&nd, LOOKUP, OP_GETXATTR, nameiflags, spacetype, uap->path, ctx);
10330 if ((error = namei(&nd))) {
10331 return (error);
10332 }
10333 vp = nd.ni_vp;
10334 nameidone(&nd);
10335
10336 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
10337 goto out;
10338 }
10339 if (xattr_protected(attrname)) {
10340 if (!vfs_context_issuser(ctx) || strcmp(attrname, "com.apple.system.Security") != 0) {
10341 error = EPERM;
10342 goto out;
10343 }
10344 }
10345 /*
10346 * the specific check for 0xffffffff is a hack to preserve
10347 * binaray compatibilty in K64 with applications that discovered
10348 * that passing in a buf pointer and a size of -1 resulted in
10349 * just the size of the indicated extended attribute being returned.
10350 * this isn't part of the documented behavior, but because of the
10351 * original implemtation's check for "uap->size > 0", this behavior
10352 * was allowed. In K32 that check turned into a signed comparison
10353 * even though uap->size is unsigned... in K64, we blow by that
10354 * check because uap->size is unsigned and doesn't get sign smeared
10355 * in the munger for a 32 bit user app. we also need to add a
10356 * check to limit the maximum size of the buffer being passed in...
10357 * unfortunately, the underlying fileystems seem to just malloc
10358 * the requested size even if the actual extended attribute is tiny.
10359 * because that malloc is for kernel wired memory, we have to put a
10360 * sane limit on it.
10361 *
10362 * U32 running on K64 will yield 0x00000000ffffffff for uap->size
10363 * U64 running on K64 will yield -1 (64 bits wide)
10364 * U32/U64 running on K32 will yield -1 (32 bits wide)
10365 */
10366 if (uap->size == 0xffffffff || uap->size == (size_t)-1)
10367 goto no_uio;
10368
10369 if (uap->value) {
10370 if (uap->size > (size_t)XATTR_MAXSIZE)
10371 uap->size = XATTR_MAXSIZE;
10372
10373 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
10374 &uio_buf[0], sizeof(uio_buf));
10375 uio_addiov(auio, uap->value, uap->size);
10376 }
10377 no_uio:
10378 error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, ctx);
10379 out:
10380 vnode_put(vp);
10381
10382 if (auio) {
10383 *retval = uap->size - uio_resid(auio);
10384 } else {
10385 *retval = (user_ssize_t)attrsize;
10386 }
10387
10388 return (error);
10389 }
10390
10391 /*
10392 * Retrieve the data of an extended attribute.
10393 */
10394 int
10395 fgetxattr(proc_t p, struct fgetxattr_args *uap, user_ssize_t *retval)
10396 {
10397 vnode_t vp;
10398 char attrname[XATTR_MAXNAMELEN+1];
10399 uio_t auio = NULL;
10400 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10401 size_t attrsize = 0;
10402 size_t namelen;
10403 int error;
10404 char uio_buf[ UIO_SIZEOF(1) ];
10405
10406 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
10407 return (EINVAL);
10408
10409 if ( (error = file_vnode(uap->fd, &vp)) ) {
10410 return (error);
10411 }
10412 if ( (error = vnode_getwithref(vp)) ) {
10413 file_drop(uap->fd);
10414 return(error);
10415 }
10416 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
10417 goto out;
10418 }
10419 if (xattr_protected(attrname)) {
10420 error = EPERM;
10421 goto out;
10422 }
10423 if (uap->value && uap->size > 0) {
10424 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
10425 &uio_buf[0], sizeof(uio_buf));
10426 uio_addiov(auio, uap->value, uap->size);
10427 }
10428
10429 error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, vfs_context_current());
10430 out:
10431 (void)vnode_put(vp);
10432 file_drop(uap->fd);
10433
10434 if (auio) {
10435 *retval = uap->size - uio_resid(auio);
10436 } else {
10437 *retval = (user_ssize_t)attrsize;
10438 }
10439 return (error);
10440 }
10441
10442 /*
10443 * Set the data of an extended attribute.
10444 */
10445 int
10446 setxattr(proc_t p, struct setxattr_args *uap, int *retval)
10447 {
10448 vnode_t vp;
10449 struct nameidata nd;
10450 char attrname[XATTR_MAXNAMELEN+1];
10451 vfs_context_t ctx = vfs_context_current();
10452 uio_t auio = NULL;
10453 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10454 size_t namelen;
10455 u_int32_t nameiflags;
10456 int error;
10457 char uio_buf[ UIO_SIZEOF(1) ];
10458
10459 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
10460 return (EINVAL);
10461
10462 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
10463 if (error == EPERM) {
10464 /* if the string won't fit in attrname, copyinstr emits EPERM */
10465 return (ENAMETOOLONG);
10466 }
10467 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
10468 return error;
10469 }
10470 if (xattr_protected(attrname))
10471 return(EPERM);
10472 if (uap->size != 0 && uap->value == 0) {
10473 return (EINVAL);
10474 }
10475
10476 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
10477 NDINIT(&nd, LOOKUP, OP_SETXATTR, nameiflags, spacetype, uap->path, ctx);
10478 if ((error = namei(&nd))) {
10479 return (error);
10480 }
10481 vp = nd.ni_vp;
10482 nameidone(&nd);
10483
10484 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
10485 &uio_buf[0], sizeof(uio_buf));
10486 uio_addiov(auio, uap->value, uap->size);
10487
10488 error = vn_setxattr(vp, attrname, auio, uap->options, ctx);
10489 #if CONFIG_FSE
10490 if (error == 0) {
10491 add_fsevent(FSE_XATTR_MODIFIED, ctx,
10492 FSE_ARG_VNODE, vp,
10493 FSE_ARG_DONE);
10494 }
10495 #endif
10496 vnode_put(vp);
10497 *retval = 0;
10498 return (error);
10499 }
10500
10501 /*
10502 * Set the data of an extended attribute.
10503 */
10504 int
10505 fsetxattr(proc_t p, struct fsetxattr_args *uap, int *retval)
10506 {
10507 vnode_t vp;
10508 char attrname[XATTR_MAXNAMELEN+1];
10509 uio_t auio = NULL;
10510 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10511 size_t namelen;
10512 int error;
10513 char uio_buf[ UIO_SIZEOF(1) ];
10514 #if CONFIG_FSE
10515 vfs_context_t ctx = vfs_context_current();
10516 #endif
10517
10518 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
10519 return (EINVAL);
10520
10521 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
10522 if (error == EPERM) {
10523 /* if the string won't fit in attrname, copyinstr emits EPERM */
10524 return (ENAMETOOLONG);
10525 }
10526 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
10527 return error;
10528 }
10529 if (xattr_protected(attrname))
10530 return(EPERM);
10531 if (uap->size != 0 && uap->value == 0) {
10532 return (EINVAL);
10533 }
10534 if ( (error = file_vnode(uap->fd, &vp)) ) {
10535 return (error);
10536 }
10537 if ( (error = vnode_getwithref(vp)) ) {
10538 file_drop(uap->fd);
10539 return(error);
10540 }
10541 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
10542 &uio_buf[0], sizeof(uio_buf));
10543 uio_addiov(auio, uap->value, uap->size);
10544
10545 error = vn_setxattr(vp, attrname, auio, uap->options, vfs_context_current());
10546 #if CONFIG_FSE
10547 if (error == 0) {
10548 add_fsevent(FSE_XATTR_MODIFIED, ctx,
10549 FSE_ARG_VNODE, vp,
10550 FSE_ARG_DONE);
10551 }
10552 #endif
10553 vnode_put(vp);
10554 file_drop(uap->fd);
10555 *retval = 0;
10556 return (error);
10557 }
10558
10559 /*
10560 * Remove an extended attribute.
10561 * XXX Code duplication here.
10562 */
10563 int
10564 removexattr(proc_t p, struct removexattr_args *uap, int *retval)
10565 {
10566 vnode_t vp;
10567 struct nameidata nd;
10568 char attrname[XATTR_MAXNAMELEN+1];
10569 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10570 vfs_context_t ctx = vfs_context_current();
10571 size_t namelen;
10572 u_int32_t nameiflags;
10573 int error;
10574
10575 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
10576 return (EINVAL);
10577
10578 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
10579 if (error != 0) {
10580 return (error);
10581 }
10582 if (xattr_protected(attrname))
10583 return(EPERM);
10584 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
10585 NDINIT(&nd, LOOKUP, OP_REMOVEXATTR, nameiflags, spacetype, uap->path, ctx);
10586 if ((error = namei(&nd))) {
10587 return (error);
10588 }
10589 vp = nd.ni_vp;
10590 nameidone(&nd);
10591
10592 error = vn_removexattr(vp, attrname, uap->options, ctx);
10593 #if CONFIG_FSE
10594 if (error == 0) {
10595 add_fsevent(FSE_XATTR_REMOVED, ctx,
10596 FSE_ARG_VNODE, vp,
10597 FSE_ARG_DONE);
10598 }
10599 #endif
10600 vnode_put(vp);
10601 *retval = 0;
10602 return (error);
10603 }
10604
10605 /*
10606 * Remove an extended attribute.
10607 * XXX Code duplication here.
10608 */
10609 int
10610 fremovexattr(__unused proc_t p, struct fremovexattr_args *uap, int *retval)
10611 {
10612 vnode_t vp;
10613 char attrname[XATTR_MAXNAMELEN+1];
10614 size_t namelen;
10615 int error;
10616 #if CONFIG_FSE
10617 vfs_context_t ctx = vfs_context_current();
10618 #endif
10619
10620 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
10621 return (EINVAL);
10622
10623 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
10624 if (error != 0) {
10625 return (error);
10626 }
10627 if (xattr_protected(attrname))
10628 return(EPERM);
10629 if ( (error = file_vnode(uap->fd, &vp)) ) {
10630 return (error);
10631 }
10632 if ( (error = vnode_getwithref(vp)) ) {
10633 file_drop(uap->fd);
10634 return(error);
10635 }
10636
10637 error = vn_removexattr(vp, attrname, uap->options, vfs_context_current());
10638 #if CONFIG_FSE
10639 if (error == 0) {
10640 add_fsevent(FSE_XATTR_REMOVED, ctx,
10641 FSE_ARG_VNODE, vp,
10642 FSE_ARG_DONE);
10643 }
10644 #endif
10645 vnode_put(vp);
10646 file_drop(uap->fd);
10647 *retval = 0;
10648 return (error);
10649 }
10650
10651 /*
10652 * Retrieve the list of extended attribute names.
10653 * XXX Code duplication here.
10654 */
10655 int
10656 listxattr(proc_t p, struct listxattr_args *uap, user_ssize_t *retval)
10657 {
10658 vnode_t vp;
10659 struct nameidata nd;
10660 vfs_context_t ctx = vfs_context_current();
10661 uio_t auio = NULL;
10662 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10663 size_t attrsize = 0;
10664 u_int32_t nameiflags;
10665 int error;
10666 char uio_buf[ UIO_SIZEOF(1) ];
10667
10668 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
10669 return (EINVAL);
10670
10671 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
10672 NDINIT(&nd, LOOKUP, OP_LISTXATTR, nameiflags, spacetype, uap->path, ctx);
10673 if ((error = namei(&nd))) {
10674 return (error);
10675 }
10676 vp = nd.ni_vp;
10677 nameidone(&nd);
10678 if (uap->namebuf != 0 && uap->bufsize > 0) {
10679 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
10680 &uio_buf[0], sizeof(uio_buf));
10681 uio_addiov(auio, uap->namebuf, uap->bufsize);
10682 }
10683
10684 error = vn_listxattr(vp, auio, &attrsize, uap->options, ctx);
10685
10686 vnode_put(vp);
10687 if (auio) {
10688 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
10689 } else {
10690 *retval = (user_ssize_t)attrsize;
10691 }
10692 return (error);
10693 }
10694
10695 /*
10696 * Retrieve the list of extended attribute names.
10697 * XXX Code duplication here.
10698 */
10699 int
10700 flistxattr(proc_t p, struct flistxattr_args *uap, user_ssize_t *retval)
10701 {
10702 vnode_t vp;
10703 uio_t auio = NULL;
10704 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10705 size_t attrsize = 0;
10706 int error;
10707 char uio_buf[ UIO_SIZEOF(1) ];
10708
10709 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
10710 return (EINVAL);
10711
10712 if ( (error = file_vnode(uap->fd, &vp)) ) {
10713 return (error);
10714 }
10715 if ( (error = vnode_getwithref(vp)) ) {
10716 file_drop(uap->fd);
10717 return(error);
10718 }
10719 if (uap->namebuf != 0 && uap->bufsize > 0) {
10720 auio = uio_createwithbuffer(1, 0, spacetype,
10721 UIO_READ, &uio_buf[0], sizeof(uio_buf));
10722 uio_addiov(auio, uap->namebuf, uap->bufsize);
10723 }
10724
10725 error = vn_listxattr(vp, auio, &attrsize, uap->options, vfs_context_current());
10726
10727 vnode_put(vp);
10728 file_drop(uap->fd);
10729 if (auio) {
10730 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
10731 } else {
10732 *retval = (user_ssize_t)attrsize;
10733 }
10734 return (error);
10735 }
10736
10737 static int fsgetpath_internal(
10738 vfs_context_t ctx, int volfs_id, uint64_t objid,
10739 vm_size_t bufsize, caddr_t buf, int *pathlen)
10740 {
10741 int error;
10742 struct mount *mp = NULL;
10743 vnode_t vp;
10744 int length;
10745 int bpflags;
10746
10747 if (bufsize > PAGE_SIZE) {
10748 return (EINVAL);
10749 }
10750
10751 if (buf == NULL) {
10752 return (ENOMEM);
10753 }
10754
10755 if ((mp = mount_lookupby_volfsid(volfs_id, 1)) == NULL) {
10756 error = ENOTSUP; /* unexpected failure */
10757 return ENOTSUP;
10758 }
10759
10760 unionget:
10761 if (objid == 2) {
10762 error = VFS_ROOT(mp, &vp, ctx);
10763 } else {
10764 error = VFS_VGET(mp, (ino64_t)objid, &vp, ctx);
10765 }
10766
10767 if (error == ENOENT && (mp->mnt_flag & MNT_UNION)) {
10768 /*
10769 * If the fileid isn't found and we're in a union
10770 * mount volume, then see if the fileid is in the
10771 * mounted-on volume.
10772 */
10773 struct mount *tmp = mp;
10774 mp = vnode_mount(tmp->mnt_vnodecovered);
10775 vfs_unbusy(tmp);
10776 if (vfs_busy(mp, LK_NOWAIT) == 0)
10777 goto unionget;
10778 } else {
10779 vfs_unbusy(mp);
10780 }
10781
10782 if (error) {
10783 return error;
10784 }
10785
10786 #if CONFIG_MACF
10787 error = mac_vnode_check_fsgetpath(ctx, vp);
10788 if (error) {
10789 vnode_put(vp);
10790 return error;
10791 }
10792 #endif
10793
10794 /* Obtain the absolute path to this vnode. */
10795 bpflags = vfs_context_suser(ctx) ? BUILDPATH_CHECKACCESS : 0;
10796 bpflags |= BUILDPATH_CHECK_MOVED;
10797 error = build_path(vp, buf, bufsize, &length, bpflags, ctx);
10798 vnode_put(vp);
10799
10800 if (error) {
10801 goto out;
10802 }
10803
10804 AUDIT_ARG(text, buf);
10805
10806 if (kdebug_enable) {
10807 long dbg_parms[NUMPARMS];
10808 int dbg_namelen;
10809
10810 dbg_namelen = (int)sizeof(dbg_parms);
10811
10812 if (length < dbg_namelen) {
10813 memcpy((char *)dbg_parms, buf, length);
10814 memset((char *)dbg_parms + length, 0, dbg_namelen - length);
10815
10816 dbg_namelen = length;
10817 } else {
10818 memcpy((char *)dbg_parms, buf + (length - dbg_namelen), dbg_namelen);
10819 }
10820
10821 kdebug_lookup_gen_events(dbg_parms, dbg_namelen, (void *)vp, TRUE);
10822 }
10823
10824 *pathlen = (user_ssize_t)length; /* may be superseded by error */
10825
10826 out:
10827 return (error);
10828 }
10829
10830 /*
10831 * Obtain the full pathname of a file system object by id.
10832 *
10833 * This is a private SPI used by the File Manager.
10834 */
10835 __private_extern__
10836 int
10837 fsgetpath(__unused proc_t p, struct fsgetpath_args *uap, user_ssize_t *retval)
10838 {
10839 vfs_context_t ctx = vfs_context_current();
10840 fsid_t fsid;
10841 char *realpath;
10842 int length;
10843 int error;
10844
10845 if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
10846 return (error);
10847 }
10848 AUDIT_ARG(value32, fsid.val[0]);
10849 AUDIT_ARG(value64, uap->objid);
10850 /* Restrict output buffer size for now. */
10851
10852 if (uap->bufsize > PAGE_SIZE) {
10853 return (EINVAL);
10854 }
10855 MALLOC(realpath, char *, uap->bufsize, M_TEMP, M_WAITOK);
10856 if (realpath == NULL) {
10857 return (ENOMEM);
10858 }
10859
10860 error = fsgetpath_internal(
10861 ctx, fsid.val[0], uap->objid,
10862 uap->bufsize, realpath, &length);
10863
10864 if (error) {
10865 goto out;
10866 }
10867
10868 error = copyout((caddr_t)realpath, uap->buf, length);
10869
10870 *retval = (user_ssize_t)length; /* may be superseded by error */
10871 out:
10872 if (realpath) {
10873 FREE(realpath, M_TEMP);
10874 }
10875 return (error);
10876 }
10877
10878 /*
10879 * Common routine to handle various flavors of statfs data heading out
10880 * to user space.
10881 *
10882 * Returns: 0 Success
10883 * EFAULT
10884 */
10885 static int
10886 munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
10887 user_addr_t bufp, int *sizep, boolean_t is_64_bit,
10888 boolean_t partial_copy)
10889 {
10890 int error;
10891 int my_size, copy_size;
10892
10893 if (is_64_bit) {
10894 struct user64_statfs sfs;
10895 my_size = copy_size = sizeof(sfs);
10896 bzero(&sfs, my_size);
10897 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
10898 sfs.f_type = mp->mnt_vtable->vfc_typenum;
10899 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
10900 sfs.f_bsize = (user64_long_t)sfsp->f_bsize;
10901 sfs.f_iosize = (user64_long_t)sfsp->f_iosize;
10902 sfs.f_blocks = (user64_long_t)sfsp->f_blocks;
10903 sfs.f_bfree = (user64_long_t)sfsp->f_bfree;
10904 sfs.f_bavail = (user64_long_t)sfsp->f_bavail;
10905 sfs.f_files = (user64_long_t)sfsp->f_files;
10906 sfs.f_ffree = (user64_long_t)sfsp->f_ffree;
10907 sfs.f_fsid = sfsp->f_fsid;
10908 sfs.f_owner = sfsp->f_owner;
10909 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
10910 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
10911 } else {
10912 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
10913 }
10914 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
10915 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
10916
10917 if (partial_copy) {
10918 copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
10919 }
10920 error = copyout((caddr_t)&sfs, bufp, copy_size);
10921 }
10922 else {
10923 struct user32_statfs sfs;
10924
10925 my_size = copy_size = sizeof(sfs);
10926 bzero(&sfs, my_size);
10927
10928 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
10929 sfs.f_type = mp->mnt_vtable->vfc_typenum;
10930 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
10931
10932 /*
10933 * It's possible for there to be more than 2^^31 blocks in the filesystem, so we
10934 * have to fudge the numbers here in that case. We inflate the blocksize in order
10935 * to reflect the filesystem size as best we can.
10936 */
10937 if ((sfsp->f_blocks > INT_MAX)
10938 /* Hack for 4061702 . I think the real fix is for Carbon to
10939 * look for some volume capability and not depend on hidden
10940 * semantics agreed between a FS and carbon.
10941 * f_blocks, f_bfree, and f_bavail set to -1 is the trigger
10942 * for Carbon to set bNoVolumeSizes volume attribute.
10943 * Without this the webdavfs files cannot be copied onto
10944 * disk as they look huge. This change should not affect
10945 * XSAN as they should not setting these to -1..
10946 */
10947 && (sfsp->f_blocks != 0xffffffffffffffffULL)
10948 && (sfsp->f_bfree != 0xffffffffffffffffULL)
10949 && (sfsp->f_bavail != 0xffffffffffffffffULL)) {
10950 int shift;
10951
10952 /*
10953 * Work out how far we have to shift the block count down to make it fit.
10954 * Note that it's possible to have to shift so far that the resulting
10955 * blocksize would be unreportably large. At that point, we will clip
10956 * any values that don't fit.
10957 *
10958 * For safety's sake, we also ensure that f_iosize is never reported as
10959 * being smaller than f_bsize.
10960 */
10961 for (shift = 0; shift < 32; shift++) {
10962 if ((sfsp->f_blocks >> shift) <= INT_MAX)
10963 break;
10964 if ((sfsp->f_bsize << (shift + 1)) > INT_MAX)
10965 break;
10966 }
10967 #define __SHIFT_OR_CLIP(x, s) ((((x) >> (s)) > INT_MAX) ? INT_MAX : ((x) >> (s)))
10968 sfs.f_blocks = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_blocks, shift);
10969 sfs.f_bfree = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bfree, shift);
10970 sfs.f_bavail = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bavail, shift);
10971 #undef __SHIFT_OR_CLIP
10972 sfs.f_bsize = (user32_long_t)(sfsp->f_bsize << shift);
10973 sfs.f_iosize = lmax(sfsp->f_iosize, sfsp->f_bsize);
10974 } else {
10975 /* filesystem is small enough to be reported honestly */
10976 sfs.f_bsize = (user32_long_t)sfsp->f_bsize;
10977 sfs.f_iosize = (user32_long_t)sfsp->f_iosize;
10978 sfs.f_blocks = (user32_long_t)sfsp->f_blocks;
10979 sfs.f_bfree = (user32_long_t)sfsp->f_bfree;
10980 sfs.f_bavail = (user32_long_t)sfsp->f_bavail;
10981 }
10982 sfs.f_files = (user32_long_t)sfsp->f_files;
10983 sfs.f_ffree = (user32_long_t)sfsp->f_ffree;
10984 sfs.f_fsid = sfsp->f_fsid;
10985 sfs.f_owner = sfsp->f_owner;
10986 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
10987 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
10988 } else {
10989 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
10990 }
10991 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
10992 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
10993
10994 if (partial_copy) {
10995 copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
10996 }
10997 error = copyout((caddr_t)&sfs, bufp, copy_size);
10998 }
10999
11000 if (sizep != NULL) {
11001 *sizep = my_size;
11002 }
11003 return(error);
11004 }
11005
11006 /*
11007 * copy stat structure into user_stat structure.
11008 */
11009 void munge_user64_stat(struct stat *sbp, struct user64_stat *usbp)
11010 {
11011 bzero(usbp, sizeof(*usbp));
11012
11013 usbp->st_dev = sbp->st_dev;
11014 usbp->st_ino = sbp->st_ino;
11015 usbp->st_mode = sbp->st_mode;
11016 usbp->st_nlink = sbp->st_nlink;
11017 usbp->st_uid = sbp->st_uid;
11018 usbp->st_gid = sbp->st_gid;
11019 usbp->st_rdev = sbp->st_rdev;
11020 #ifndef _POSIX_C_SOURCE
11021 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
11022 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
11023 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
11024 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
11025 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
11026 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
11027 #else
11028 usbp->st_atime = sbp->st_atime;
11029 usbp->st_atimensec = sbp->st_atimensec;
11030 usbp->st_mtime = sbp->st_mtime;
11031 usbp->st_mtimensec = sbp->st_mtimensec;
11032 usbp->st_ctime = sbp->st_ctime;
11033 usbp->st_ctimensec = sbp->st_ctimensec;
11034 #endif
11035 usbp->st_size = sbp->st_size;
11036 usbp->st_blocks = sbp->st_blocks;
11037 usbp->st_blksize = sbp->st_blksize;
11038 usbp->st_flags = sbp->st_flags;
11039 usbp->st_gen = sbp->st_gen;
11040 usbp->st_lspare = sbp->st_lspare;
11041 usbp->st_qspare[0] = sbp->st_qspare[0];
11042 usbp->st_qspare[1] = sbp->st_qspare[1];
11043 }
11044
11045 void munge_user32_stat(struct stat *sbp, struct user32_stat *usbp)
11046 {
11047 bzero(usbp, sizeof(*usbp));
11048
11049 usbp->st_dev = sbp->st_dev;
11050 usbp->st_ino = sbp->st_ino;
11051 usbp->st_mode = sbp->st_mode;
11052 usbp->st_nlink = sbp->st_nlink;
11053 usbp->st_uid = sbp->st_uid;
11054 usbp->st_gid = sbp->st_gid;
11055 usbp->st_rdev = sbp->st_rdev;
11056 #ifndef _POSIX_C_SOURCE
11057 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
11058 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
11059 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
11060 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
11061 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
11062 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
11063 #else
11064 usbp->st_atime = sbp->st_atime;
11065 usbp->st_atimensec = sbp->st_atimensec;
11066 usbp->st_mtime = sbp->st_mtime;
11067 usbp->st_mtimensec = sbp->st_mtimensec;
11068 usbp->st_ctime = sbp->st_ctime;
11069 usbp->st_ctimensec = sbp->st_ctimensec;
11070 #endif
11071 usbp->st_size = sbp->st_size;
11072 usbp->st_blocks = sbp->st_blocks;
11073 usbp->st_blksize = sbp->st_blksize;
11074 usbp->st_flags = sbp->st_flags;
11075 usbp->st_gen = sbp->st_gen;
11076 usbp->st_lspare = sbp->st_lspare;
11077 usbp->st_qspare[0] = sbp->st_qspare[0];
11078 usbp->st_qspare[1] = sbp->st_qspare[1];
11079 }
11080
11081 /*
11082 * copy stat64 structure into user_stat64 structure.
11083 */
11084 void munge_user64_stat64(struct stat64 *sbp, struct user64_stat64 *usbp)
11085 {
11086 bzero(usbp, sizeof(*usbp));
11087
11088 usbp->st_dev = sbp->st_dev;
11089 usbp->st_ino = sbp->st_ino;
11090 usbp->st_mode = sbp->st_mode;
11091 usbp->st_nlink = sbp->st_nlink;
11092 usbp->st_uid = sbp->st_uid;
11093 usbp->st_gid = sbp->st_gid;
11094 usbp->st_rdev = sbp->st_rdev;
11095 #ifndef _POSIX_C_SOURCE
11096 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
11097 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
11098 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
11099 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
11100 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
11101 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
11102 usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
11103 usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
11104 #else
11105 usbp->st_atime = sbp->st_atime;
11106 usbp->st_atimensec = sbp->st_atimensec;
11107 usbp->st_mtime = sbp->st_mtime;
11108 usbp->st_mtimensec = sbp->st_mtimensec;
11109 usbp->st_ctime = sbp->st_ctime;
11110 usbp->st_ctimensec = sbp->st_ctimensec;
11111 usbp->st_birthtime = sbp->st_birthtime;
11112 usbp->st_birthtimensec = sbp->st_birthtimensec;
11113 #endif
11114 usbp->st_size = sbp->st_size;
11115 usbp->st_blocks = sbp->st_blocks;
11116 usbp->st_blksize = sbp->st_blksize;
11117 usbp->st_flags = sbp->st_flags;
11118 usbp->st_gen = sbp->st_gen;
11119 usbp->st_lspare = sbp->st_lspare;
11120 usbp->st_qspare[0] = sbp->st_qspare[0];
11121 usbp->st_qspare[1] = sbp->st_qspare[1];
11122 }
11123
11124 void munge_user32_stat64(struct stat64 *sbp, struct user32_stat64 *usbp)
11125 {
11126 bzero(usbp, sizeof(*usbp));
11127
11128 usbp->st_dev = sbp->st_dev;
11129 usbp->st_ino = sbp->st_ino;
11130 usbp->st_mode = sbp->st_mode;
11131 usbp->st_nlink = sbp->st_nlink;
11132 usbp->st_uid = sbp->st_uid;
11133 usbp->st_gid = sbp->st_gid;
11134 usbp->st_rdev = sbp->st_rdev;
11135 #ifndef _POSIX_C_SOURCE
11136 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
11137 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
11138 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
11139 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
11140 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
11141 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
11142 usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
11143 usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
11144 #else
11145 usbp->st_atime = sbp->st_atime;
11146 usbp->st_atimensec = sbp->st_atimensec;
11147 usbp->st_mtime = sbp->st_mtime;
11148 usbp->st_mtimensec = sbp->st_mtimensec;
11149 usbp->st_ctime = sbp->st_ctime;
11150 usbp->st_ctimensec = sbp->st_ctimensec;
11151 usbp->st_birthtime = sbp->st_birthtime;
11152 usbp->st_birthtimensec = sbp->st_birthtimensec;
11153 #endif
11154 usbp->st_size = sbp->st_size;
11155 usbp->st_blocks = sbp->st_blocks;
11156 usbp->st_blksize = sbp->st_blksize;
11157 usbp->st_flags = sbp->st_flags;
11158 usbp->st_gen = sbp->st_gen;
11159 usbp->st_lspare = sbp->st_lspare;
11160 usbp->st_qspare[0] = sbp->st_qspare[0];
11161 usbp->st_qspare[1] = sbp->st_qspare[1];
11162 }
11163
11164 /*
11165 * Purge buffer cache for simulating cold starts
11166 */
11167 static int vnode_purge_callback(struct vnode *vp, __unused void *cargs)
11168 {
11169 ubc_msync(vp, (off_t)0, ubc_getsize(vp), NULL /* off_t *resid_off */, UBC_PUSHALL | UBC_INVALIDATE);
11170
11171 return VNODE_RETURNED;
11172 }
11173
11174 static int vfs_purge_callback(mount_t mp, __unused void * arg)
11175 {
11176 vnode_iterate(mp, VNODE_WAIT | VNODE_ITERATE_ALL, vnode_purge_callback, NULL);
11177
11178 return VFS_RETURNED;
11179 }
11180
11181 int
11182 vfs_purge(__unused struct proc *p, __unused struct vfs_purge_args *uap, __unused int32_t *retval)
11183 {
11184 if (!kauth_cred_issuser(kauth_cred_get()))
11185 return EPERM;
11186
11187 vfs_iterate(0/* flags */, vfs_purge_callback, NULL);
11188
11189 return 0;
11190 }
11191
11192 /*
11193 * gets the vnode associated with the (unnamed) snapshot directory
11194 * for a Filesystem. The snapshot directory vnode is returned with
11195 * an iocount on it.
11196 */
11197 int
11198 vnode_get_snapdir(vnode_t rvp, vnode_t *sdvpp, vfs_context_t ctx)
11199 {
11200 int error;
11201
11202 error = VFS_VGET_SNAPDIR(vnode_mount(rvp), sdvpp, ctx);
11203
11204 #if CLONE_SNAPSHOT_FALLBACKS_ENABLED
11205 if (error == ENOTSUP) {
11206 struct nameidata snapnd;
11207
11208 /*
11209 * Temporary fallback to <mountpoint>/.snaps lookup
11210 * XXX: To be removed.
11211 */
11212 NDINIT(&snapnd, LOOKUP, OP_LOOKUP, USEDVP,
11213 UIO_SYSSPACE, CAST_USER_ADDR_T(".snaps"), ctx);
11214 snapnd.ni_dvp = rvp;
11215
11216 if ((error = namei(&snapnd))) {
11217 error = ENOTSUP;
11218 *sdvpp = NULLVP;
11219 } else {
11220 *sdvpp = snapnd.ni_vp;
11221 nameidone(&snapnd);
11222 }
11223 }
11224 #endif /* CLONE_SNAPSHOT_FALLBACKS_ENABLED */
11225 return (error);
11226 }
11227
11228 /*
11229 * Get the snapshot vnode.
11230 *
11231 * If successful, the call returns with an iocount on *rvpp ,*sdvpp and
11232 * needs nameidone() on ndp.
11233 *
11234 * If the snapshot vnode exists it is returned in ndp->ni_vp.
11235 *
11236 * If it returns with an error, *rvpp, *sdvpp are NULL and nameidone() is
11237 * not needed.
11238 */
11239 static int
11240 vnode_get_snapshot(int dirfd, vnode_t *rvpp, vnode_t *sdvpp,
11241 user_addr_t name, struct nameidata *ndp, int32_t op,
11242 #if !CONFIG_TRIGGERS
11243 __unused
11244 #endif
11245 enum path_operation pathop,
11246 vfs_context_t ctx)
11247 {
11248 int error, i;
11249 caddr_t name_buf;
11250 size_t name_len;
11251 struct vfs_attr vfa;
11252
11253 *sdvpp = NULLVP;
11254 *rvpp = NULLVP;
11255
11256 error = vnode_getfromfd(ctx, dirfd, rvpp);
11257 if (error)
11258 return (error);
11259
11260 if (!vnode_isvroot(*rvpp)) {
11261 error = EINVAL;
11262 goto out;
11263 }
11264
11265 /* Make sure the filesystem supports snapshots */
11266 VFSATTR_INIT(&vfa);
11267 VFSATTR_WANTED(&vfa, f_capabilities);
11268 if ((vfs_getattr(vnode_mount(*rvpp), &vfa, ctx) != 0) ||
11269 !VFSATTR_IS_SUPPORTED(&vfa, f_capabilities) ||
11270 !((vfa.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] &
11271 VOL_CAP_INT_SNAPSHOT)) ||
11272 !((vfa.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] &
11273 VOL_CAP_INT_SNAPSHOT))) {
11274 error = ENOTSUP;
11275 goto out;
11276 }
11277
11278 error = vnode_get_snapdir(*rvpp, sdvpp, ctx);
11279 if (error)
11280 goto out;
11281
11282 MALLOC(name_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
11283 error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
11284 if (error)
11285 goto out1;
11286
11287 /*
11288 * Some sanity checks- name can't be empty, "." or ".." or have slashes.
11289 * (the length returned by copyinstr includes the terminating NUL)
11290 */
11291 if ((name_len == 1) || (name_len == 2 && name_buf[0] == '.') ||
11292 (name_len == 3 && name_buf[0] == '.' && name_buf[1] == '.')) {
11293 error = EINVAL;
11294 goto out1;
11295 }
11296 for (i = 0; i < (int)name_len && name_buf[i] != '/'; i++);
11297 if (i < (int)name_len) {
11298 error = EINVAL;
11299 goto out1;
11300 }
11301
11302 #if CONFIG_MACF
11303 if (op == CREATE) {
11304 error = mac_mount_check_snapshot_create(ctx, vnode_mount(*rvpp),
11305 name_buf);
11306 } else if (op == DELETE) {
11307 error = mac_mount_check_snapshot_delete(ctx, vnode_mount(*rvpp),
11308 name_buf);
11309 }
11310 if (error)
11311 goto out1;
11312 #endif
11313
11314 /* Check if the snapshot already exists ... */
11315 NDINIT(ndp, op, pathop, USEDVP | NOCACHE | AUDITVNPATH1,
11316 UIO_SYSSPACE, CAST_USER_ADDR_T(name_buf), ctx);
11317 ndp->ni_dvp = *sdvpp;
11318
11319 error = namei(ndp);
11320 out1:
11321 FREE(name_buf, M_TEMP);
11322 out:
11323 if (error) {
11324 if (*sdvpp) {
11325 vnode_put(*sdvpp);
11326 *sdvpp = NULLVP;
11327 }
11328 if (*rvpp) {
11329 vnode_put(*rvpp);
11330 *rvpp = NULLVP;
11331 }
11332 }
11333 return (error);
11334 }
11335
11336 /*
11337 * create a filesystem snapshot (for supporting filesystems)
11338 *
11339 * A much simplified version of openat(dirfd, name, O_CREAT | O_EXCL)
11340 * We get to the (unnamed) snapshot directory vnode and create the vnode
11341 * for the snapshot in it.
11342 *
11343 * Restrictions:
11344 *
11345 * a) Passed in name for snapshot cannot have slashes.
11346 * b) name can't be "." or ".."
11347 *
11348 * Since this requires superuser privileges, vnode_authorize calls are not
11349 * made.
11350 */
11351 static int
11352 snapshot_create(int dirfd, user_addr_t name, __unused uint32_t flags,
11353 vfs_context_t ctx)
11354 {
11355 vnode_t rvp, snapdvp;
11356 int error;
11357 struct nameidata namend;
11358
11359 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, CREATE,
11360 OP_LINK, ctx);
11361 if (error)
11362 return (error);
11363
11364 if (namend.ni_vp) {
11365 vnode_put(namend.ni_vp);
11366 error = EEXIST;
11367 } else {
11368 struct vnode_attr va;
11369 vnode_t vp = NULLVP;
11370
11371 VATTR_INIT(&va);
11372 VATTR_SET(&va, va_type, VREG);
11373 VATTR_SET(&va, va_mode, 0);
11374
11375 error = vn_create(snapdvp, &vp, &namend, &va,
11376 VN_CREATE_NOAUTH | VN_CREATE_NOINHERIT, 0, NULL, ctx);
11377 if (!error && vp)
11378 vnode_put(vp);
11379 #if CLONE_SNAPSHOT_FALLBACKS_ENABLED
11380 else if (error) {
11381 error = VNOP_COPYFILE(rvp, rvp, NULLVP, &namend.ni_cnd,
11382 0, 0, ctx);
11383 }
11384 #endif /* CLONE_SNAPSHOT_FALLBACKS_ENABLED */
11385 }
11386
11387 nameidone(&namend);
11388 vnode_put(snapdvp);
11389 vnode_put(rvp);
11390 return (error);
11391 }
11392
11393 /*
11394 * Delete a Filesystem snapshot
11395 *
11396 * get the vnode for the unnamed snapshot directory and the snapshot and
11397 * delete the snapshot.
11398 */
11399 static int
11400 snapshot_delete(int dirfd, user_addr_t name, __unused uint32_t flags,
11401 vfs_context_t ctx)
11402 {
11403 vnode_t rvp, snapdvp;
11404 int error;
11405 struct nameidata namend;
11406
11407 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, DELETE,
11408 OP_UNLINK, ctx);
11409 if (error)
11410 goto out;
11411
11412 error = VNOP_REMOVE(snapdvp, namend.ni_vp, &namend.ni_cnd,
11413 VNODE_REMOVE_SKIP_NAMESPACE_EVENT, ctx);
11414
11415 vnode_put(namend.ni_vp);
11416 nameidone(&namend);
11417 vnode_put(snapdvp);
11418 vnode_put(rvp);
11419 out:
11420 return (error);
11421 }
11422
11423 /*
11424 * Revert a filesystem to a snapshot
11425 *
11426 * Marks the filesystem to revert to the given snapshot on next mount.
11427 */
11428 static int
11429 snapshot_revert(int dirfd, user_addr_t name, __unused uint32_t flags,
11430 vfs_context_t ctx)
11431 {
11432 int error;
11433 vnode_t rvp;
11434 mount_t mp;
11435 struct fs_snapshot_revert_args revert_data;
11436 struct componentname cnp;
11437 caddr_t name_buf;
11438 size_t name_len;
11439
11440 error = vnode_getfromfd(ctx, dirfd, &rvp);
11441 if (error) {
11442 return (error);
11443 }
11444 mp = vnode_mount(rvp);
11445
11446 /*
11447 * Grab mount_iterref so that we can release the vnode,
11448 * since VFSIOC_REVERT_SNAPSHOT could conceivably cause a sync.
11449 */
11450 error = mount_iterref (mp, 0);
11451 vnode_put(rvp);
11452 if (error) {
11453 return (error);
11454 }
11455
11456 MALLOC(name_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
11457 error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
11458 if (error) {
11459 mount_iterdrop(mp);
11460 FREE(name_buf, M_TEMP);
11461 return (error);
11462 }
11463
11464 memset(&cnp, 0, sizeof(cnp));
11465 cnp.cn_pnbuf = (char *)name_buf;
11466 cnp.cn_nameiop = LOOKUP;
11467 cnp.cn_flags = ISLASTCN | HASBUF;
11468 cnp.cn_pnlen = MAXPATHLEN;
11469 cnp.cn_nameptr = cnp.cn_pnbuf;
11470 cnp.cn_namelen = (int)name_len;
11471 revert_data.sr_cnp = &cnp;
11472
11473 error = VFS_IOCTL(mp, VFSIOC_REVERT_SNAPSHOT, (caddr_t)&revert_data, 0, ctx);
11474 mount_iterdrop(mp);
11475 FREE(name_buf, M_TEMP);
11476
11477 if (error) {
11478 /* If there was any error, try again using VNOP_IOCTL */
11479
11480 vnode_t snapdvp;
11481 struct nameidata namend;
11482
11483 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, LOOKUP,
11484 OP_LOOKUP, ctx);
11485 if (error) {
11486 return (error);
11487 }
11488
11489
11490 #ifndef APFSIOC_REVERT_TO_SNAPSHOT
11491 #define APFSIOC_REVERT_TO_SNAPSHOT _IOW('J', 1, u_int64_t)
11492 #endif
11493
11494 #ifndef APFS_REVERT_TO_SNAPSHOT
11495 #define APFS_REVERT_TO_SNAPSHOT IOCBASECMD(APFSIOC_REVERT_TO_SNAPSHOT)
11496 #endif
11497
11498 error = VNOP_IOCTL(namend.ni_vp, APFS_REVERT_TO_SNAPSHOT, (caddr_t) NULL,
11499 0, ctx);
11500
11501 vnode_put(namend.ni_vp);
11502 nameidone(&namend);
11503 vnode_put(snapdvp);
11504 vnode_put(rvp);
11505 }
11506
11507 return (error);
11508 }
11509
11510 /*
11511 * rename a Filesystem snapshot
11512 *
11513 * get the vnode for the unnamed snapshot directory and the snapshot and
11514 * rename the snapshot. This is a very specialised (and simple) case of
11515 * rename(2) (which has to deal with a lot more complications). It differs
11516 * slightly from rename(2) in that EEXIST is returned if the new name exists.
11517 */
11518 static int
11519 snapshot_rename(int dirfd, user_addr_t old, user_addr_t new,
11520 __unused uint32_t flags, vfs_context_t ctx)
11521 {
11522 vnode_t rvp, snapdvp;
11523 int error, i;
11524 caddr_t newname_buf;
11525 size_t name_len;
11526 vnode_t fvp;
11527 struct nameidata *fromnd, *tond;
11528 /* carving out a chunk for structs that are too big to be on stack. */
11529 struct {
11530 struct nameidata from_node;
11531 struct nameidata to_node;
11532 } * __rename_data;
11533
11534 MALLOC(__rename_data, void *, sizeof(*__rename_data), M_TEMP, M_WAITOK);
11535 fromnd = &__rename_data->from_node;
11536 tond = &__rename_data->to_node;
11537
11538 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, old, fromnd, DELETE,
11539 OP_UNLINK, ctx);
11540 if (error)
11541 goto out;
11542 fvp = fromnd->ni_vp;
11543
11544 MALLOC(newname_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
11545 error = copyinstr(new, newname_buf, MAXPATHLEN, &name_len);
11546 if (error)
11547 goto out1;
11548
11549 /*
11550 * Some sanity checks- new name can't be empty, "." or ".." or have
11551 * slashes.
11552 * (the length returned by copyinstr includes the terminating NUL)
11553 *
11554 * The FS rename VNOP is suppossed to handle this but we'll pick it
11555 * off here itself.
11556 */
11557 if ((name_len == 1) || (name_len == 2 && newname_buf[0] == '.') ||
11558 (name_len == 3 && newname_buf[0] == '.' && newname_buf[1] == '.')) {
11559 error = EINVAL;
11560 goto out1;
11561 }
11562 for (i = 0; i < (int)name_len && newname_buf[i] != '/'; i++);
11563 if (i < (int)name_len) {
11564 error = EINVAL;
11565 goto out1;
11566 }
11567
11568 #if CONFIG_MACF
11569 error = mac_mount_check_snapshot_create(ctx, vnode_mount(rvp),
11570 newname_buf);
11571 if (error)
11572 goto out1;
11573 #endif
11574
11575 NDINIT(tond, RENAME, OP_RENAME, USEDVP | NOCACHE | AUDITVNPATH2,
11576 UIO_SYSSPACE, CAST_USER_ADDR_T(newname_buf), ctx);
11577 tond->ni_dvp = snapdvp;
11578
11579 error = namei(tond);
11580 if (error) {
11581 goto out2;
11582 } else if (tond->ni_vp) {
11583 /*
11584 * snapshot rename behaves differently than rename(2) - if the
11585 * new name exists, EEXIST is returned.
11586 */
11587 vnode_put(tond->ni_vp);
11588 error = EEXIST;
11589 goto out2;
11590 }
11591
11592 error = VNOP_RENAME(snapdvp, fvp, &fromnd->ni_cnd, snapdvp, NULLVP,
11593 &tond->ni_cnd, ctx);
11594
11595 out2:
11596 nameidone(tond);
11597 out1:
11598 FREE(newname_buf, M_TEMP);
11599 vnode_put(fvp);
11600 vnode_put(snapdvp);
11601 vnode_put(rvp);
11602 nameidone(fromnd);
11603 out:
11604 FREE(__rename_data, M_TEMP);
11605 return (error);
11606 }
11607
11608 /*
11609 * Mount a Filesystem snapshot
11610 *
11611 * get the vnode for the unnamed snapshot directory and the snapshot and
11612 * mount the snapshot.
11613 */
11614 static int
11615 snapshot_mount(int dirfd, user_addr_t name, user_addr_t directory,
11616 user_addr_t mnt_data, __unused uint32_t flags, vfs_context_t ctx)
11617 {
11618 vnode_t rvp, snapdvp, snapvp, vp, pvp;
11619 int error;
11620 struct nameidata *snapndp, *dirndp;
11621 /* carving out a chunk for structs that are too big to be on stack. */
11622 struct {
11623 struct nameidata snapnd;
11624 struct nameidata dirnd;
11625 } * __snapshot_mount_data;
11626
11627 MALLOC(__snapshot_mount_data, void *, sizeof(*__snapshot_mount_data),
11628 M_TEMP, M_WAITOK);
11629 snapndp = &__snapshot_mount_data->snapnd;
11630 dirndp = &__snapshot_mount_data->dirnd;
11631
11632 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, snapndp, LOOKUP,
11633 OP_LOOKUP, ctx);
11634 if (error)
11635 goto out;
11636
11637 snapvp = snapndp->ni_vp;
11638 if (!vnode_mount(rvp) || (vnode_mount(rvp) == dead_mountp)) {
11639 error = EIO;
11640 goto out1;
11641 }
11642
11643 /* Get the vnode to be covered */
11644 NDINIT(dirndp, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
11645 UIO_USERSPACE, directory, ctx);
11646 error = namei(dirndp);
11647 if (error)
11648 goto out1;
11649
11650 vp = dirndp->ni_vp;
11651 pvp = dirndp->ni_dvp;
11652
11653 if ((vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_ROOTFS)) {
11654 error = EINVAL;
11655 } else {
11656 mount_t mp = vnode_mount(rvp);
11657 struct fs_snapshot_mount_args smnt_data;
11658
11659 smnt_data.sm_mp = mp;
11660 smnt_data.sm_cnp = &snapndp->ni_cnd;
11661 error = mount_common(mp->mnt_vfsstat.f_fstypename, pvp, vp,
11662 &dirndp->ni_cnd, CAST_USER_ADDR_T(&smnt_data), 0,
11663 KERNEL_MOUNT_SNAPSHOT, NULL, FALSE, ctx);
11664 if (error) {
11665 /* Retry with user passed args */
11666 error = mount_common(mp->mnt_vfsstat.f_fstypename, pvp,
11667 vp, &dirndp->ni_cnd, CAST_USER_ADDR_T(mnt_data), 0,
11668 0, NULL, FALSE, ctx);
11669 }
11670 }
11671
11672 vnode_put(vp);
11673 vnode_put(pvp);
11674 nameidone(dirndp);
11675 out1:
11676 vnode_put(snapvp);
11677 vnode_put(snapdvp);
11678 vnode_put(rvp);
11679 nameidone(snapndp);
11680 out:
11681 FREE(__snapshot_mount_data, M_TEMP);
11682 return (error);
11683 }
11684
11685 /*
11686 * FS snapshot operations dispatcher
11687 */
11688 int
11689 fs_snapshot(__unused proc_t p, struct fs_snapshot_args *uap,
11690 __unused int32_t *retval)
11691 {
11692 int error;
11693 vfs_context_t ctx = vfs_context_current();
11694
11695 error = priv_check_cred(vfs_context_ucred(ctx), PRIV_VFS_SNAPSHOT, 0);
11696 if (error)
11697 return (error);
11698
11699 switch (uap->op) {
11700 case SNAPSHOT_OP_CREATE:
11701 error = snapshot_create(uap->dirfd, uap->name1, uap->flags, ctx);
11702 break;
11703 case SNAPSHOT_OP_DELETE:
11704 error = snapshot_delete(uap->dirfd, uap->name1, uap->flags, ctx);
11705 break;
11706 case SNAPSHOT_OP_RENAME:
11707 error = snapshot_rename(uap->dirfd, uap->name1, uap->name2,
11708 uap->flags, ctx);
11709 break;
11710 case SNAPSHOT_OP_MOUNT:
11711 error = snapshot_mount(uap->dirfd, uap->name1, uap->name2,
11712 uap->data, uap->flags, ctx);
11713 break;
11714 case SNAPSHOT_OP_REVERT:
11715 error = snapshot_revert(uap->dirfd, uap->name1, uap->flags, ctx);
11716 break;
11717 default:
11718 error = ENOSYS;
11719 }
11720
11721 return (error);
11722 }