]> git.saurik.com Git - apple/xnu.git/blob - bsd/vfs/vfs_syscalls.c
70f798e860bb3effc1b16050ffef63e3159e89de
[apple/xnu.git] / bsd / vfs / vfs_syscalls.c
1 /*
2 * Copyright (c) 1995-2016 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1989, 1993
30 * The Regents of the University of California. All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)vfs_syscalls.c 8.41 (Berkeley) 6/15/95
66 */
67 /*
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
71 * Version 2.0.
72 */
73
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/namei.h>
77 #include <sys/filedesc.h>
78 #include <sys/kernel.h>
79 #include <sys/file_internal.h>
80 #include <sys/stat.h>
81 #include <sys/vnode_internal.h>
82 #include <sys/mount_internal.h>
83 #include <sys/proc_internal.h>
84 #include <sys/kauth.h>
85 #include <sys/uio_internal.h>
86 #include <sys/malloc.h>
87 #include <sys/mman.h>
88 #include <sys/dirent.h>
89 #include <sys/attr.h>
90 #include <sys/sysctl.h>
91 #include <sys/ubc.h>
92 #include <sys/quota.h>
93 #include <sys/kdebug.h>
94 #include <sys/fsevents.h>
95 #include <sys/imgsrc.h>
96 #include <sys/sysproto.h>
97 #include <sys/xattr.h>
98 #include <sys/fcntl.h>
99 #include <sys/fsctl.h>
100 #include <sys/ubc_internal.h>
101 #include <sys/disk.h>
102 #include <sys/content_protection.h>
103 #include <sys/clonefile.h>
104 #include <sys/snapshot.h>
105 #include <sys/priv.h>
106 #include <machine/cons.h>
107 #include <machine/limits.h>
108 #include <miscfs/specfs/specdev.h>
109
110 #include <security/audit/audit.h>
111 #include <bsm/audit_kevents.h>
112
113 #include <mach/mach_types.h>
114 #include <kern/kern_types.h>
115 #include <kern/kalloc.h>
116 #include <kern/task.h>
117
118 #include <vm/vm_pageout.h>
119 #include <vm/vm_protos.h>
120
121 #include <libkern/OSAtomic.h>
122 #include <pexpert/pexpert.h>
123 #include <IOKit/IOBSD.h>
124
125 #if ROUTEFS
126 #include <miscfs/routefs/routefs.h>
127 #endif /* ROUTEFS */
128
129 #if CONFIG_MACF
130 #include <security/mac.h>
131 #include <security/mac_framework.h>
132 #endif
133
134 #if CONFIG_FSE
135 #define GET_PATH(x) \
136 (x) = get_pathbuff();
137 #define RELEASE_PATH(x) \
138 release_pathbuff(x);
139 #else
140 #define GET_PATH(x) \
141 MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
142 #define RELEASE_PATH(x) \
143 FREE_ZONE((x), MAXPATHLEN, M_NAMEI);
144 #endif /* CONFIG_FSE */
145
146 /* struct for checkdirs iteration */
147 struct cdirargs {
148 vnode_t olddp;
149 vnode_t newdp;
150 };
151 /* callback for checkdirs iteration */
152 static int checkdirs_callback(proc_t p, void * arg);
153
154 static int change_dir(struct nameidata *ndp, vfs_context_t ctx);
155 static int checkdirs(vnode_t olddp, vfs_context_t ctx);
156 void enablequotas(struct mount *mp, vfs_context_t ctx);
157 static int getfsstat_callback(mount_t mp, void * arg);
158 static int getutimes(user_addr_t usrtvp, struct timespec *tsp);
159 static int setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts, int nullflag);
160 static int sync_callback(mount_t, void *);
161 static void sync_thread(void *, __unused wait_result_t);
162 static int sync_async(int);
163 static int munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
164 user_addr_t bufp, int *sizep, boolean_t is_64_bit,
165 boolean_t partial_copy);
166 static int statfs64_common(struct mount *mp, struct vfsstatfs *sfsp,
167 user_addr_t bufp);
168 static int fsync_common(proc_t p, struct fsync_args *uap, int flags);
169 static int mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
170 struct componentname *cnp, user_addr_t fsmountargs,
171 int flags, uint32_t internal_flags, char *labelstr, boolean_t kernelmount,
172 vfs_context_t ctx);
173 void vfs_notify_mount(vnode_t pdvp);
174
175 int prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth);
176
177 struct fd_vn_data * fg_vn_data_alloc(void);
178
179 /*
180 * Max retries for ENOENT returns from vn_authorize_{rmdir, unlink, rename}
181 * Concurrent lookups (or lookups by ids) on hard links can cause the
182 * vn_getpath (which does not re-enter the filesystem as vn_getpath_fsenter
183 * does) to return ENOENT as the path cannot be returned from the name cache
184 * alone. We have no option but to retry and hope to get one namei->reverse path
185 * generation done without an intervening lookup, lookup by id on the hard link
186 * item. This is only an issue for MAC hooks which cannot reenter the filesystem
187 * which currently are the MAC hooks for rename, unlink and rmdir.
188 */
189 #define MAX_AUTHORIZE_ENOENT_RETRIES 1024
190
191 static int rmdirat_internal(vfs_context_t, int, user_addr_t, enum uio_seg);
192
193 static int fsgetpath_internal(vfs_context_t, int, uint64_t, vm_size_t, caddr_t, int *);
194
195 #ifdef CONFIG_IMGSRC_ACCESS
196 static int authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx);
197 static int place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx);
198 static void undo_place_on_covered_vp(mount_t mp, vnode_t vp);
199 static int mount_begin_update(mount_t mp, vfs_context_t ctx, int flags);
200 static void mount_end_update(mount_t mp);
201 static int relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp, const char *fsname, vfs_context_t ctx, boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index);
202 #endif /* CONFIG_IMGSRC_ACCESS */
203
204 int (*union_dircheckp)(struct vnode **, struct fileproc *, vfs_context_t);
205
206 __private_extern__
207 int sync_internal(void);
208
209 __private_extern__
210 int unlink1(vfs_context_t, vnode_t, user_addr_t, enum uio_seg, int);
211
212 extern lck_grp_t *fd_vn_lck_grp;
213 extern lck_grp_attr_t *fd_vn_lck_grp_attr;
214 extern lck_attr_t *fd_vn_lck_attr;
215
216 /*
217 * incremented each time a mount or unmount operation occurs
218 * used to invalidate the cached value of the rootvp in the
219 * mount structure utilized by cache_lookup_path
220 */
221 uint32_t mount_generation = 0;
222
223 /* counts number of mount and unmount operations */
224 unsigned int vfs_nummntops=0;
225
226 extern const struct fileops vnops;
227 #if CONFIG_APPLEDOUBLE
228 extern errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *);
229 #endif /* CONFIG_APPLEDOUBLE */
230
231 /*
232 * Virtual File System System Calls
233 */
234
235 #if NFSCLIENT || DEVFS || ROUTEFS
236 /*
237 * Private in-kernel mounting spi (NFS only, not exported)
238 */
239 __private_extern__
240 boolean_t
241 vfs_iskernelmount(mount_t mp)
242 {
243 return ((mp->mnt_kern_flag & MNTK_KERNEL_MOUNT) ? TRUE : FALSE);
244 }
245
246 __private_extern__
247 int
248 kernel_mount(char *fstype, vnode_t pvp, vnode_t vp, const char *path,
249 void *data, __unused size_t datalen, int syscall_flags, __unused uint32_t kern_flags, vfs_context_t ctx)
250 {
251 struct nameidata nd;
252 boolean_t did_namei;
253 int error;
254
255 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
256 UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx);
257
258 /*
259 * Get the vnode to be covered if it's not supplied
260 */
261 if (vp == NULLVP) {
262 error = namei(&nd);
263 if (error)
264 return (error);
265 vp = nd.ni_vp;
266 pvp = nd.ni_dvp;
267 did_namei = TRUE;
268 } else {
269 char *pnbuf = CAST_DOWN(char *, path);
270
271 nd.ni_cnd.cn_pnbuf = pnbuf;
272 nd.ni_cnd.cn_pnlen = strlen(pnbuf) + 1;
273 did_namei = FALSE;
274 }
275
276 error = mount_common(fstype, pvp, vp, &nd.ni_cnd, CAST_USER_ADDR_T(data),
277 syscall_flags, kern_flags, NULL, TRUE, ctx);
278
279 if (did_namei) {
280 vnode_put(vp);
281 vnode_put(pvp);
282 nameidone(&nd);
283 }
284
285 return (error);
286 }
287 #endif /* NFSCLIENT || DEVFS */
288
289 /*
290 * Mount a file system.
291 */
292 /* ARGSUSED */
293 int
294 mount(proc_t p, struct mount_args *uap, __unused int32_t *retval)
295 {
296 struct __mac_mount_args muap;
297
298 muap.type = uap->type;
299 muap.path = uap->path;
300 muap.flags = uap->flags;
301 muap.data = uap->data;
302 muap.mac_p = USER_ADDR_NULL;
303 return (__mac_mount(p, &muap, retval));
304 }
305
306 void
307 vfs_notify_mount(vnode_t pdvp)
308 {
309 vfs_event_signal(NULL, VQ_MOUNT, (intptr_t)NULL);
310 lock_vnode_and_post(pdvp, NOTE_WRITE);
311 }
312
313 /*
314 * __mac_mount:
315 * Mount a file system taking into account MAC label behavior.
316 * See mount(2) man page for more information
317 *
318 * Parameters: p Process requesting the mount
319 * uap User argument descriptor (see below)
320 * retval (ignored)
321 *
322 * Indirect: uap->type Filesystem type
323 * uap->path Path to mount
324 * uap->data Mount arguments
325 * uap->mac_p MAC info
326 * uap->flags Mount flags
327 *
328 *
329 * Returns: 0 Success
330 * !0 Not success
331 */
332 boolean_t root_fs_upgrade_try = FALSE;
333
334 int
335 __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int32_t *retval)
336 {
337 vnode_t pvp = NULL;
338 vnode_t vp = NULL;
339 int need_nameidone = 0;
340 vfs_context_t ctx = vfs_context_current();
341 char fstypename[MFSNAMELEN];
342 struct nameidata nd;
343 size_t dummy=0;
344 char *labelstr = NULL;
345 int flags = uap->flags;
346 int error;
347 #if CONFIG_IMGSRC_ACCESS || CONFIG_MACF
348 boolean_t is_64bit = IS_64BIT_PROCESS(p);
349 #else
350 #pragma unused(p)
351 #endif
352 /*
353 * Get the fs type name from user space
354 */
355 error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy);
356 if (error)
357 return (error);
358
359 /*
360 * Get the vnode to be covered
361 */
362 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
363 UIO_USERSPACE, uap->path, ctx);
364 error = namei(&nd);
365 if (error) {
366 goto out;
367 }
368 need_nameidone = 1;
369 vp = nd.ni_vp;
370 pvp = nd.ni_dvp;
371
372 #ifdef CONFIG_IMGSRC_ACCESS
373 /* Mounting image source cannot be batched with other operations */
374 if (flags == MNT_IMGSRC_BY_INDEX) {
375 error = relocate_imageboot_source(pvp, vp, &nd.ni_cnd, fstypename,
376 ctx, is_64bit, uap->data, (flags == MNT_IMGSRC_BY_INDEX));
377 goto out;
378 }
379 #endif /* CONFIG_IMGSRC_ACCESS */
380
381 #if CONFIG_MACF
382 /*
383 * Get the label string (if any) from user space
384 */
385 if (uap->mac_p != USER_ADDR_NULL) {
386 struct user_mac mac;
387 size_t ulen = 0;
388
389 if (is_64bit) {
390 struct user64_mac mac64;
391 error = copyin(uap->mac_p, &mac64, sizeof(mac64));
392 mac.m_buflen = mac64.m_buflen;
393 mac.m_string = mac64.m_string;
394 } else {
395 struct user32_mac mac32;
396 error = copyin(uap->mac_p, &mac32, sizeof(mac32));
397 mac.m_buflen = mac32.m_buflen;
398 mac.m_string = mac32.m_string;
399 }
400 if (error)
401 goto out;
402 if ((mac.m_buflen > MAC_MAX_LABEL_BUF_LEN) ||
403 (mac.m_buflen < 2)) {
404 error = EINVAL;
405 goto out;
406 }
407 MALLOC(labelstr, char *, mac.m_buflen, M_MACTEMP, M_WAITOK);
408 error = copyinstr(mac.m_string, labelstr, mac.m_buflen, &ulen);
409 if (error) {
410 goto out;
411 }
412 AUDIT_ARG(mac_string, labelstr);
413 }
414 #endif /* CONFIG_MACF */
415
416 AUDIT_ARG(fflags, flags);
417
418 #if SECURE_KERNEL
419 if (flags & MNT_UNION) {
420 /* No union mounts on release kernels */
421 error = EPERM;
422 goto out;
423 }
424 #endif
425
426 if ((vp->v_flag & VROOT) &&
427 (vp->v_mount->mnt_flag & MNT_ROOTFS)) {
428 if (!(flags & MNT_UNION)) {
429 flags |= MNT_UPDATE;
430 }
431 else {
432 /*
433 * For a union mount on '/', treat it as fresh
434 * mount instead of update.
435 * Otherwise, union mouting on '/' used to panic the
436 * system before, since mnt_vnodecovered was found to
437 * be NULL for '/' which is required for unionlookup
438 * after it gets ENOENT on union mount.
439 */
440 flags = (flags & ~(MNT_UPDATE));
441 }
442
443 #if SECURE_KERNEL
444 if ((flags & MNT_RDONLY) == 0) {
445 /* Release kernels are not allowed to mount "/" as rw */
446 error = EPERM;
447 goto out;
448 }
449 #endif
450 /*
451 * See 7392553 for more details on why this check exists.
452 * Suffice to say: If this check is ON and something tries
453 * to mount the rootFS RW, we'll turn off the codesign
454 * bitmap optimization.
455 */
456 #if CHECK_CS_VALIDATION_BITMAP
457 if ((flags & MNT_RDONLY) == 0 ) {
458 root_fs_upgrade_try = TRUE;
459 }
460 #endif
461 }
462
463 error = mount_common(fstypename, pvp, vp, &nd.ni_cnd, uap->data, flags, 0,
464 labelstr, FALSE, ctx);
465
466 out:
467
468 #if CONFIG_MACF
469 if (labelstr)
470 FREE(labelstr, M_MACTEMP);
471 #endif /* CONFIG_MACF */
472
473 if (vp) {
474 vnode_put(vp);
475 }
476 if (pvp) {
477 vnode_put(pvp);
478 }
479 if (need_nameidone) {
480 nameidone(&nd);
481 }
482
483 return (error);
484 }
485
486 /*
487 * common mount implementation (final stage of mounting)
488
489 * Arguments:
490 * fstypename file system type (ie it's vfs name)
491 * pvp parent of covered vnode
492 * vp covered vnode
493 * cnp component name (ie path) of covered vnode
494 * flags generic mount flags
495 * fsmountargs file system specific data
496 * labelstr optional MAC label
497 * kernelmount TRUE for mounts initiated from inside the kernel
498 * ctx caller's context
499 */
500 static int
501 mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
502 struct componentname *cnp, user_addr_t fsmountargs, int flags, uint32_t internal_flags,
503 char *labelstr, boolean_t kernelmount, vfs_context_t ctx)
504 {
505 #if !CONFIG_MACF
506 #pragma unused(labelstr)
507 #endif
508 struct vnode *devvp = NULLVP;
509 struct vnode *device_vnode = NULLVP;
510 #if CONFIG_MACF
511 struct vnode *rvp;
512 #endif
513 struct mount *mp;
514 struct vfstable *vfsp = (struct vfstable *)0;
515 struct proc *p = vfs_context_proc(ctx);
516 int error, flag = 0;
517 user_addr_t devpath = USER_ADDR_NULL;
518 int ronly = 0;
519 int mntalloc = 0;
520 boolean_t vfsp_ref = FALSE;
521 boolean_t is_rwlock_locked = FALSE;
522 boolean_t did_rele = FALSE;
523 boolean_t have_usecount = FALSE;
524
525 /*
526 * Process an update for an existing mount
527 */
528 if (flags & MNT_UPDATE) {
529 if ((vp->v_flag & VROOT) == 0) {
530 error = EINVAL;
531 goto out1;
532 }
533 mp = vp->v_mount;
534
535 /* unmount in progress return error */
536 mount_lock_spin(mp);
537 if (mp->mnt_lflag & MNT_LUNMOUNT) {
538 mount_unlock(mp);
539 error = EBUSY;
540 goto out1;
541 }
542 mount_unlock(mp);
543 lck_rw_lock_exclusive(&mp->mnt_rwlock);
544 is_rwlock_locked = TRUE;
545 /*
546 * We only allow the filesystem to be reloaded if it
547 * is currently mounted read-only.
548 */
549 if ((flags & MNT_RELOAD) &&
550 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
551 error = ENOTSUP;
552 goto out1;
553 }
554
555 /*
556 * If content protection is enabled, update mounts are not
557 * allowed to turn it off.
558 */
559 if ((mp->mnt_flag & MNT_CPROTECT) &&
560 ((flags & MNT_CPROTECT) == 0)) {
561 error = EINVAL;
562 goto out1;
563 }
564
565 #ifdef CONFIG_IMGSRC_ACCESS
566 /* Can't downgrade the backer of the root FS */
567 if ((mp->mnt_kern_flag & MNTK_BACKS_ROOT) &&
568 (!vfs_isrdonly(mp)) && (flags & MNT_RDONLY)) {
569 error = ENOTSUP;
570 goto out1;
571 }
572 #endif /* CONFIG_IMGSRC_ACCESS */
573
574 /*
575 * Only root, or the user that did the original mount is
576 * permitted to update it.
577 */
578 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
579 (error = suser(vfs_context_ucred(ctx), &p->p_acflag))) {
580 goto out1;
581 }
582 #if CONFIG_MACF
583 error = mac_mount_check_remount(ctx, mp);
584 if (error != 0) {
585 goto out1;
586 }
587 #endif
588 /*
589 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
590 * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
591 */
592 if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
593 flags |= MNT_NOSUID | MNT_NODEV;
594 if (mp->mnt_flag & MNT_NOEXEC)
595 flags |= MNT_NOEXEC;
596 }
597 flag = mp->mnt_flag;
598
599
600
601 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
602
603 vfsp = mp->mnt_vtable;
604 goto update;
605 }
606 /*
607 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
608 * MNT_NOEXEC if mount point is already MNT_NOEXEC.
609 */
610 if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
611 flags |= MNT_NOSUID | MNT_NODEV;
612 if (vp->v_mount->mnt_flag & MNT_NOEXEC)
613 flags |= MNT_NOEXEC;
614 }
615
616 /* XXXAUDIT: Should we capture the type on the error path as well? */
617 AUDIT_ARG(text, fstypename);
618 mount_list_lock();
619 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
620 if (!strncmp(vfsp->vfc_name, fstypename, MFSNAMELEN)) {
621 vfsp->vfc_refcount++;
622 vfsp_ref = TRUE;
623 break;
624 }
625 mount_list_unlock();
626 if (vfsp == NULL) {
627 error = ENODEV;
628 goto out1;
629 }
630
631 /*
632 * VFC_VFSLOCALARGS is not currently supported for kernel mounts
633 */
634 if (kernelmount && (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS)) {
635 error = EINVAL; /* unsupported request */
636 goto out1;
637 }
638
639 error = prepare_coveredvp(vp, ctx, cnp, fstypename, ((internal_flags & KERNEL_MOUNT_NOAUTH) != 0));
640 if (error != 0) {
641 goto out1;
642 }
643
644 /*
645 * Allocate and initialize the filesystem (mount_t)
646 */
647 MALLOC_ZONE(mp, struct mount *, (u_int32_t)sizeof(struct mount),
648 M_MOUNT, M_WAITOK);
649 bzero((char *)mp, (u_int32_t)sizeof(struct mount));
650 mntalloc = 1;
651
652 /* Initialize the default IO constraints */
653 mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
654 mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
655 mp->mnt_maxsegreadsize = mp->mnt_maxreadcnt;
656 mp->mnt_maxsegwritesize = mp->mnt_maxwritecnt;
657 mp->mnt_devblocksize = DEV_BSIZE;
658 mp->mnt_alignmentmask = PAGE_MASK;
659 mp->mnt_ioqueue_depth = MNT_DEFAULT_IOQUEUE_DEPTH;
660 mp->mnt_ioscale = 1;
661 mp->mnt_ioflags = 0;
662 mp->mnt_realrootvp = NULLVP;
663 mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL;
664
665 TAILQ_INIT(&mp->mnt_vnodelist);
666 TAILQ_INIT(&mp->mnt_workerqueue);
667 TAILQ_INIT(&mp->mnt_newvnodes);
668 mount_lock_init(mp);
669 lck_rw_lock_exclusive(&mp->mnt_rwlock);
670 is_rwlock_locked = TRUE;
671 mp->mnt_op = vfsp->vfc_vfsops;
672 mp->mnt_vtable = vfsp;
673 //mp->mnt_stat.f_type = vfsp->vfc_typenum;
674 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
675 strlcpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN);
676 strlcpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
677 mp->mnt_vnodecovered = vp;
678 mp->mnt_vfsstat.f_owner = kauth_cred_getuid(vfs_context_ucred(ctx));
679 mp->mnt_throttle_mask = LOWPRI_MAX_NUM_DEV - 1;
680 mp->mnt_devbsdunit = 0;
681
682 /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
683 vfs_setowner(mp, KAUTH_UID_NONE, KAUTH_GID_NONE);
684
685 #if NFSCLIENT || DEVFS || ROUTEFS
686 if (kernelmount)
687 mp->mnt_kern_flag |= MNTK_KERNEL_MOUNT;
688 if ((internal_flags & KERNEL_MOUNT_PERMIT_UNMOUNT) != 0)
689 mp->mnt_kern_flag |= MNTK_PERMIT_UNMOUNT;
690 #endif /* NFSCLIENT || DEVFS */
691
692 update:
693 /*
694 * Set the mount level flags.
695 */
696 if (flags & MNT_RDONLY)
697 mp->mnt_flag |= MNT_RDONLY;
698 else if (mp->mnt_flag & MNT_RDONLY) {
699 // disallow read/write upgrades of file systems that
700 // had the TYPENAME_OVERRIDE feature set.
701 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
702 error = EPERM;
703 goto out1;
704 }
705 mp->mnt_kern_flag |= MNTK_WANTRDWR;
706 }
707 mp->mnt_flag &= ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
708 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
709 MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
710 MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
711 MNT_QUARANTINE | MNT_CPROTECT);
712
713 #if SECURE_KERNEL
714 #if !CONFIG_MNT_SUID
715 /*
716 * On release builds of iOS based platforms, always enforce NOSUID and NODEV on
717 * all mounts. We do this here because we can catch update mounts as well as
718 * non-update mounts in this case.
719 */
720 mp->mnt_flag |= (MNT_NOSUID);
721 #endif
722 #endif
723
724 mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
725 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
726 MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
727 MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
728 MNT_QUARANTINE | MNT_CPROTECT);
729
730 #if CONFIG_MACF
731 if (flags & MNT_MULTILABEL) {
732 if (vfsp->vfc_vfsflags & VFC_VFSNOMACLABEL) {
733 error = EINVAL;
734 goto out1;
735 }
736 mp->mnt_flag |= MNT_MULTILABEL;
737 }
738 #endif
739 /*
740 * Process device path for local file systems if requested
741 */
742 if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS &&
743 !(internal_flags & KERNEL_MOUNT_SNAPSHOT)) {
744 if (vfs_context_is64bit(ctx)) {
745 if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
746 goto out1;
747 fsmountargs += sizeof(devpath);
748 } else {
749 user32_addr_t tmp;
750 if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
751 goto out1;
752 /* munge into LP64 addr */
753 devpath = CAST_USER_ADDR_T(tmp);
754 fsmountargs += sizeof(tmp);
755 }
756
757 /* Lookup device and authorize access to it */
758 if ((devpath)) {
759 struct nameidata nd;
760
761 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW, UIO_USERSPACE, devpath, ctx);
762 if ( (error = namei(&nd)) )
763 goto out1;
764
765 strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
766 devvp = nd.ni_vp;
767
768 nameidone(&nd);
769
770 if (devvp->v_type != VBLK) {
771 error = ENOTBLK;
772 goto out2;
773 }
774 if (major(devvp->v_rdev) >= nblkdev) {
775 error = ENXIO;
776 goto out2;
777 }
778 /*
779 * If mount by non-root, then verify that user has necessary
780 * permissions on the device.
781 */
782 if (suser(vfs_context_ucred(ctx), NULL) != 0) {
783 mode_t accessmode = KAUTH_VNODE_READ_DATA;
784
785 if ((mp->mnt_flag & MNT_RDONLY) == 0)
786 accessmode |= KAUTH_VNODE_WRITE_DATA;
787 if ((error = vnode_authorize(devvp, NULL, accessmode, ctx)) != 0)
788 goto out2;
789 }
790 }
791 /* On first mount, preflight and open device */
792 if (devpath && ((flags & MNT_UPDATE) == 0)) {
793 if ( (error = vnode_ref(devvp)) )
794 goto out2;
795 /*
796 * Disallow multiple mounts of the same device.
797 * Disallow mounting of a device that is currently in use
798 * (except for root, which might share swap device for miniroot).
799 * Flush out any old buffers remaining from a previous use.
800 */
801 if ( (error = vfs_mountedon(devvp)) )
802 goto out3;
803
804 if (vcount(devvp) > 1 && !(vfs_flags(mp) & MNT_ROOTFS)) {
805 error = EBUSY;
806 goto out3;
807 }
808 if ( (error = VNOP_FSYNC(devvp, MNT_WAIT, ctx)) ) {
809 error = ENOTBLK;
810 goto out3;
811 }
812 if ( (error = buf_invalidateblks(devvp, BUF_WRITE_DATA, 0, 0)) )
813 goto out3;
814
815 ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
816 #if CONFIG_MACF
817 error = mac_vnode_check_open(ctx,
818 devvp,
819 ronly ? FREAD : FREAD|FWRITE);
820 if (error)
821 goto out3;
822 #endif /* MAC */
823 if ( (error = VNOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, ctx)) )
824 goto out3;
825
826 mp->mnt_devvp = devvp;
827 device_vnode = devvp;
828
829 } else if ((mp->mnt_flag & MNT_RDONLY) &&
830 (mp->mnt_kern_flag & MNTK_WANTRDWR) &&
831 (device_vnode = mp->mnt_devvp)) {
832 dev_t dev;
833 int maj;
834 /*
835 * If upgrade to read-write by non-root, then verify
836 * that user has necessary permissions on the device.
837 */
838 vnode_getalways(device_vnode);
839
840 if (suser(vfs_context_ucred(ctx), NULL) &&
841 (error = vnode_authorize(device_vnode, NULL,
842 KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA,
843 ctx)) != 0) {
844 vnode_put(device_vnode);
845 goto out2;
846 }
847
848 /* Tell the device that we're upgrading */
849 dev = (dev_t)device_vnode->v_rdev;
850 maj = major(dev);
851
852 if ((u_int)maj >= (u_int)nblkdev)
853 panic("Volume mounted on a device with invalid major number.");
854
855 error = bdevsw[maj].d_open(dev, FREAD | FWRITE, S_IFBLK, p);
856 vnode_put(device_vnode);
857 device_vnode = NULLVP;
858 if (error != 0) {
859 goto out2;
860 }
861 }
862 }
863 #if CONFIG_MACF
864 if ((flags & MNT_UPDATE) == 0) {
865 mac_mount_label_init(mp);
866 mac_mount_label_associate(ctx, mp);
867 }
868 if (labelstr) {
869 if ((flags & MNT_UPDATE) != 0) {
870 error = mac_mount_check_label_update(ctx, mp);
871 if (error != 0)
872 goto out3;
873 }
874 }
875 #endif
876 /*
877 * Mount the filesystem.
878 */
879 if (internal_flags & KERNEL_MOUNT_SNAPSHOT) {
880 error = VFS_IOCTL(mp, VFSIOC_MOUNT_SNAPSHOT,
881 (caddr_t)fsmountargs, 0, ctx);
882 } else {
883 error = VFS_MOUNT(mp, device_vnode, fsmountargs, ctx);
884 }
885
886 if (flags & MNT_UPDATE) {
887 if (mp->mnt_kern_flag & MNTK_WANTRDWR)
888 mp->mnt_flag &= ~MNT_RDONLY;
889 mp->mnt_flag &=~
890 (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
891 mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
892 if (error)
893 mp->mnt_flag = flag; /* restore flag value */
894 vfs_event_signal(NULL, VQ_UPDATE, (intptr_t)NULL);
895 lck_rw_done(&mp->mnt_rwlock);
896 is_rwlock_locked = FALSE;
897 if (!error)
898 enablequotas(mp, ctx);
899 goto exit;
900 }
901
902 /*
903 * Put the new filesystem on the mount list after root.
904 */
905 if (error == 0) {
906 struct vfs_attr vfsattr;
907 #if CONFIG_MACF
908 if (vfs_flags(mp) & MNT_MULTILABEL) {
909 error = VFS_ROOT(mp, &rvp, ctx);
910 if (error) {
911 printf("%s() VFS_ROOT returned %d\n", __func__, error);
912 goto out3;
913 }
914 error = vnode_label(mp, NULL, rvp, NULL, 0, ctx);
915 /*
916 * drop reference provided by VFS_ROOT
917 */
918 vnode_put(rvp);
919
920 if (error)
921 goto out3;
922 }
923 #endif /* MAC */
924
925 vnode_lock_spin(vp);
926 CLR(vp->v_flag, VMOUNT);
927 vp->v_mountedhere = mp;
928 vnode_unlock(vp);
929
930 /*
931 * taking the name_cache_lock exclusively will
932 * insure that everyone is out of the fast path who
933 * might be trying to use a now stale copy of
934 * vp->v_mountedhere->mnt_realrootvp
935 * bumping mount_generation causes the cached values
936 * to be invalidated
937 */
938 name_cache_lock();
939 mount_generation++;
940 name_cache_unlock();
941
942 error = vnode_ref(vp);
943 if (error != 0) {
944 goto out4;
945 }
946
947 have_usecount = TRUE;
948
949 error = checkdirs(vp, ctx);
950 if (error != 0) {
951 /* Unmount the filesystem as cdir/rdirs cannot be updated */
952 goto out4;
953 }
954 /*
955 * there is no cleanup code here so I have made it void
956 * we need to revisit this
957 */
958 (void)VFS_START(mp, 0, ctx);
959
960 if (mount_list_add(mp) != 0) {
961 /*
962 * The system is shutting down trying to umount
963 * everything, so fail with a plausible errno.
964 */
965 error = EBUSY;
966 goto out4;
967 }
968 lck_rw_done(&mp->mnt_rwlock);
969 is_rwlock_locked = FALSE;
970
971 /* Check if this mounted file system supports EAs or named streams. */
972 /* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */
973 VFSATTR_INIT(&vfsattr);
974 VFSATTR_WANTED(&vfsattr, f_capabilities);
975 if (strncmp(mp->mnt_vfsstat.f_fstypename, "webdav", sizeof("webdav")) != 0 &&
976 vfs_getattr(mp, &vfsattr, ctx) == 0 &&
977 VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) {
978 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR) &&
979 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR)) {
980 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
981 }
982 #if NAMEDSTREAMS
983 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS) &&
984 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS)) {
985 mp->mnt_kern_flag |= MNTK_NAMED_STREAMS;
986 }
987 #endif
988 /* Check if this file system supports path from id lookups. */
989 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID) &&
990 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID)) {
991 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
992 } else if (mp->mnt_flag & MNT_DOVOLFS) {
993 /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */
994 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
995 }
996
997 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_DIR_HARDLINKS) &&
998 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_DIR_HARDLINKS)) {
999 mp->mnt_kern_flag |= MNTK_DIR_HARDLINKS;
1000 }
1001 }
1002 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSNATIVEXATTR) {
1003 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
1004 }
1005 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSPREFLIGHT) {
1006 mp->mnt_kern_flag |= MNTK_UNMOUNT_PREFLIGHT;
1007 }
1008 /* increment the operations count */
1009 OSAddAtomic(1, &vfs_nummntops);
1010 enablequotas(mp, ctx);
1011
1012 if (device_vnode) {
1013 device_vnode->v_specflags |= SI_MOUNTEDON;
1014
1015 /*
1016 * cache the IO attributes for the underlying physical media...
1017 * an error return indicates the underlying driver doesn't
1018 * support all the queries necessary... however, reasonable
1019 * defaults will have been set, so no reason to bail or care
1020 */
1021 vfs_init_io_attributes(device_vnode, mp);
1022 }
1023
1024 /* Now that mount is setup, notify the listeners */
1025 vfs_notify_mount(pvp);
1026 IOBSDMountChange(mp, kIOMountChangeMount);
1027
1028 } else {
1029 /* If we fail a fresh mount, there should be no vnodes left hooked into the mountpoint. */
1030 if (mp->mnt_vnodelist.tqh_first != NULL) {
1031 panic("mount_common(): mount of %s filesystem failed with %d, but vnode list is not empty.",
1032 mp->mnt_vtable->vfc_name, error);
1033 }
1034
1035 vnode_lock_spin(vp);
1036 CLR(vp->v_flag, VMOUNT);
1037 vnode_unlock(vp);
1038 mount_list_lock();
1039 mp->mnt_vtable->vfc_refcount--;
1040 mount_list_unlock();
1041
1042 if (device_vnode ) {
1043 vnode_rele(device_vnode);
1044 VNOP_CLOSE(device_vnode, ronly ? FREAD : FREAD|FWRITE, ctx);
1045 }
1046 lck_rw_done(&mp->mnt_rwlock);
1047 is_rwlock_locked = FALSE;
1048
1049 /*
1050 * if we get here, we have a mount structure that needs to be freed,
1051 * but since the coveredvp hasn't yet been updated to point at it,
1052 * no need to worry about other threads holding a crossref on this mp
1053 * so it's ok to just free it
1054 */
1055 mount_lock_destroy(mp);
1056 #if CONFIG_MACF
1057 mac_mount_label_destroy(mp);
1058 #endif
1059 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1060 }
1061 exit:
1062 /*
1063 * drop I/O count on the device vp if there was one
1064 */
1065 if (devpath && devvp)
1066 vnode_put(devvp);
1067
1068 return(error);
1069
1070 /* Error condition exits */
1071 out4:
1072 (void)VFS_UNMOUNT(mp, MNT_FORCE, ctx);
1073
1074 /*
1075 * If the mount has been placed on the covered vp,
1076 * it may have been discovered by now, so we have
1077 * to treat this just like an unmount
1078 */
1079 mount_lock_spin(mp);
1080 mp->mnt_lflag |= MNT_LDEAD;
1081 mount_unlock(mp);
1082
1083 if (device_vnode != NULLVP) {
1084 vnode_rele(device_vnode);
1085 VNOP_CLOSE(device_vnode, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
1086 ctx);
1087 did_rele = TRUE;
1088 }
1089
1090 vnode_lock_spin(vp);
1091
1092 mp->mnt_crossref++;
1093 vp->v_mountedhere = (mount_t) 0;
1094
1095 vnode_unlock(vp);
1096
1097 if (have_usecount) {
1098 vnode_rele(vp);
1099 }
1100 out3:
1101 if (devpath && ((flags & MNT_UPDATE) == 0) && (!did_rele))
1102 vnode_rele(devvp);
1103 out2:
1104 if (devpath && devvp)
1105 vnode_put(devvp);
1106 out1:
1107 /* Release mnt_rwlock only when it was taken */
1108 if (is_rwlock_locked == TRUE) {
1109 lck_rw_done(&mp->mnt_rwlock);
1110 }
1111
1112 if (mntalloc) {
1113 if (mp->mnt_crossref)
1114 mount_dropcrossref(mp, vp, 0);
1115 else {
1116 mount_lock_destroy(mp);
1117 #if CONFIG_MACF
1118 mac_mount_label_destroy(mp);
1119 #endif
1120 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1121 }
1122 }
1123 if (vfsp_ref) {
1124 mount_list_lock();
1125 vfsp->vfc_refcount--;
1126 mount_list_unlock();
1127 }
1128
1129 return(error);
1130 }
1131
1132 /*
1133 * Flush in-core data, check for competing mount attempts,
1134 * and set VMOUNT
1135 */
1136 int
1137 prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth)
1138 {
1139 #if !CONFIG_MACF
1140 #pragma unused(cnp,fsname)
1141 #endif
1142 struct vnode_attr va;
1143 int error;
1144
1145 if (!skip_auth) {
1146 /*
1147 * If the user is not root, ensure that they own the directory
1148 * onto which we are attempting to mount.
1149 */
1150 VATTR_INIT(&va);
1151 VATTR_WANTED(&va, va_uid);
1152 if ((error = vnode_getattr(vp, &va, ctx)) ||
1153 (va.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
1154 (!vfs_context_issuser(ctx)))) {
1155 error = EPERM;
1156 goto out;
1157 }
1158 }
1159
1160 if ( (error = VNOP_FSYNC(vp, MNT_WAIT, ctx)) )
1161 goto out;
1162
1163 if ( (error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0)) )
1164 goto out;
1165
1166 if (vp->v_type != VDIR) {
1167 error = ENOTDIR;
1168 goto out;
1169 }
1170
1171 if (ISSET(vp->v_flag, VMOUNT) && (vp->v_mountedhere != NULL)) {
1172 error = EBUSY;
1173 goto out;
1174 }
1175
1176 #if CONFIG_MACF
1177 error = mac_mount_check_mount(ctx, vp,
1178 cnp, fsname);
1179 if (error != 0)
1180 goto out;
1181 #endif
1182
1183 vnode_lock_spin(vp);
1184 SET(vp->v_flag, VMOUNT);
1185 vnode_unlock(vp);
1186
1187 out:
1188 return error;
1189 }
1190
1191 #if CONFIG_IMGSRC_ACCESS
1192
1193 #if DEBUG
1194 #define IMGSRC_DEBUG(args...) printf(args)
1195 #else
1196 #define IMGSRC_DEBUG(args...) do { } while(0)
1197 #endif
1198
1199 static int
1200 authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx)
1201 {
1202 struct nameidata nd;
1203 vnode_t vp, realdevvp;
1204 mode_t accessmode;
1205 int error;
1206
1207 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW, UIO_USERSPACE, devpath, ctx);
1208 if ( (error = namei(&nd)) ) {
1209 IMGSRC_DEBUG("namei() failed with %d\n", error);
1210 return error;
1211 }
1212
1213 vp = nd.ni_vp;
1214
1215 if (!vnode_isblk(vp)) {
1216 IMGSRC_DEBUG("Not block device.\n");
1217 error = ENOTBLK;
1218 goto out;
1219 }
1220
1221 realdevvp = mp->mnt_devvp;
1222 if (realdevvp == NULLVP) {
1223 IMGSRC_DEBUG("No device backs the mount.\n");
1224 error = ENXIO;
1225 goto out;
1226 }
1227
1228 error = vnode_getwithref(realdevvp);
1229 if (error != 0) {
1230 IMGSRC_DEBUG("Coudn't get iocount on device.\n");
1231 goto out;
1232 }
1233
1234 if (vnode_specrdev(vp) != vnode_specrdev(realdevvp)) {
1235 IMGSRC_DEBUG("Wrong dev_t.\n");
1236 error = ENXIO;
1237 goto out1;
1238 }
1239
1240 strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
1241
1242 /*
1243 * If mount by non-root, then verify that user has necessary
1244 * permissions on the device.
1245 */
1246 if (!vfs_context_issuser(ctx)) {
1247 accessmode = KAUTH_VNODE_READ_DATA;
1248 if ((mp->mnt_flag & MNT_RDONLY) == 0)
1249 accessmode |= KAUTH_VNODE_WRITE_DATA;
1250 if ((error = vnode_authorize(vp, NULL, accessmode, ctx)) != 0) {
1251 IMGSRC_DEBUG("Access denied.\n");
1252 goto out1;
1253 }
1254 }
1255
1256 *devvpp = vp;
1257
1258 out1:
1259 vnode_put(realdevvp);
1260 out:
1261 nameidone(&nd);
1262 if (error) {
1263 vnode_put(vp);
1264 }
1265
1266 return error;
1267 }
1268
1269 /*
1270 * Clear VMOUNT, set v_mountedhere, and mnt_vnodecovered, ref the vnode,
1271 * and call checkdirs()
1272 */
1273 static int
1274 place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx)
1275 {
1276 int error;
1277
1278 mp->mnt_vnodecovered = vp; /* XXX This is normally only set at init-time ... */
1279
1280 vnode_lock_spin(vp);
1281 CLR(vp->v_flag, VMOUNT);
1282 vp->v_mountedhere = mp;
1283 vnode_unlock(vp);
1284
1285 /*
1286 * taking the name_cache_lock exclusively will
1287 * insure that everyone is out of the fast path who
1288 * might be trying to use a now stale copy of
1289 * vp->v_mountedhere->mnt_realrootvp
1290 * bumping mount_generation causes the cached values
1291 * to be invalidated
1292 */
1293 name_cache_lock();
1294 mount_generation++;
1295 name_cache_unlock();
1296
1297 error = vnode_ref(vp);
1298 if (error != 0) {
1299 goto out;
1300 }
1301
1302 error = checkdirs(vp, ctx);
1303 if (error != 0) {
1304 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1305 vnode_rele(vp);
1306 goto out;
1307 }
1308
1309 out:
1310 if (error != 0) {
1311 mp->mnt_vnodecovered = NULLVP;
1312 }
1313 return error;
1314 }
1315
1316 static void
1317 undo_place_on_covered_vp(mount_t mp, vnode_t vp)
1318 {
1319 vnode_rele(vp);
1320 vnode_lock_spin(vp);
1321 vp->v_mountedhere = (mount_t)NULL;
1322 vnode_unlock(vp);
1323
1324 mp->mnt_vnodecovered = NULLVP;
1325 }
1326
1327 static int
1328 mount_begin_update(mount_t mp, vfs_context_t ctx, int flags)
1329 {
1330 int error;
1331
1332 /* unmount in progress return error */
1333 mount_lock_spin(mp);
1334 if (mp->mnt_lflag & MNT_LUNMOUNT) {
1335 mount_unlock(mp);
1336 return EBUSY;
1337 }
1338 mount_unlock(mp);
1339 lck_rw_lock_exclusive(&mp->mnt_rwlock);
1340
1341 /*
1342 * We only allow the filesystem to be reloaded if it
1343 * is currently mounted read-only.
1344 */
1345 if ((flags & MNT_RELOAD) &&
1346 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
1347 error = ENOTSUP;
1348 goto out;
1349 }
1350
1351 /*
1352 * Only root, or the user that did the original mount is
1353 * permitted to update it.
1354 */
1355 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
1356 (!vfs_context_issuser(ctx))) {
1357 error = EPERM;
1358 goto out;
1359 }
1360 #if CONFIG_MACF
1361 error = mac_mount_check_remount(ctx, mp);
1362 if (error != 0) {
1363 goto out;
1364 }
1365 #endif
1366
1367 out:
1368 if (error) {
1369 lck_rw_done(&mp->mnt_rwlock);
1370 }
1371
1372 return error;
1373 }
1374
1375 static void
1376 mount_end_update(mount_t mp)
1377 {
1378 lck_rw_done(&mp->mnt_rwlock);
1379 }
1380
1381 static int
1382 get_imgsrc_rootvnode(uint32_t height, vnode_t *rvpp)
1383 {
1384 vnode_t vp;
1385
1386 if (height >= MAX_IMAGEBOOT_NESTING) {
1387 return EINVAL;
1388 }
1389
1390 vp = imgsrc_rootvnodes[height];
1391 if ((vp != NULLVP) && (vnode_get(vp) == 0)) {
1392 *rvpp = vp;
1393 return 0;
1394 } else {
1395 return ENOENT;
1396 }
1397 }
1398
1399 static int
1400 relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp,
1401 const char *fsname, vfs_context_t ctx,
1402 boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index)
1403 {
1404 int error;
1405 mount_t mp;
1406 boolean_t placed = FALSE;
1407 vnode_t devvp = NULLVP;
1408 struct vfstable *vfsp;
1409 user_addr_t devpath;
1410 char *old_mntonname;
1411 vnode_t rvp;
1412 uint32_t height;
1413 uint32_t flags;
1414
1415 /* If we didn't imageboot, nothing to move */
1416 if (imgsrc_rootvnodes[0] == NULLVP) {
1417 return EINVAL;
1418 }
1419
1420 /* Only root can do this */
1421 if (!vfs_context_issuser(ctx)) {
1422 return EPERM;
1423 }
1424
1425 IMGSRC_DEBUG("looking for root vnode.\n");
1426
1427 /*
1428 * Get root vnode of filesystem we're moving.
1429 */
1430 if (by_index) {
1431 if (is64bit) {
1432 struct user64_mnt_imgsrc_args mia64;
1433 error = copyin(fsmountargs, &mia64, sizeof(mia64));
1434 if (error != 0) {
1435 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1436 return error;
1437 }
1438
1439 height = mia64.mi_height;
1440 flags = mia64.mi_flags;
1441 devpath = mia64.mi_devpath;
1442 } else {
1443 struct user32_mnt_imgsrc_args mia32;
1444 error = copyin(fsmountargs, &mia32, sizeof(mia32));
1445 if (error != 0) {
1446 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1447 return error;
1448 }
1449
1450 height = mia32.mi_height;
1451 flags = mia32.mi_flags;
1452 devpath = mia32.mi_devpath;
1453 }
1454 } else {
1455 /*
1456 * For binary compatibility--assumes one level of nesting.
1457 */
1458 if (is64bit) {
1459 if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
1460 return error;
1461 } else {
1462 user32_addr_t tmp;
1463 if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
1464 return error;
1465
1466 /* munge into LP64 addr */
1467 devpath = CAST_USER_ADDR_T(tmp);
1468 }
1469
1470 height = 0;
1471 flags = 0;
1472 }
1473
1474 if (flags != 0) {
1475 IMGSRC_DEBUG("%s: Got nonzero flags.\n", __FUNCTION__);
1476 return EINVAL;
1477 }
1478
1479 error = get_imgsrc_rootvnode(height, &rvp);
1480 if (error != 0) {
1481 IMGSRC_DEBUG("getting root vnode failed with %d\n", error);
1482 return error;
1483 }
1484
1485 IMGSRC_DEBUG("got root vnode.\n");
1486
1487 MALLOC(old_mntonname, char*, MAXPATHLEN, M_TEMP, M_WAITOK);
1488
1489 /* Can only move once */
1490 mp = vnode_mount(rvp);
1491 if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
1492 IMGSRC_DEBUG("Already moved.\n");
1493 error = EBUSY;
1494 goto out0;
1495 }
1496
1497 IMGSRC_DEBUG("Starting updated.\n");
1498
1499 /* Get exclusive rwlock on mount, authorize update on mp */
1500 error = mount_begin_update(mp , ctx, 0);
1501 if (error != 0) {
1502 IMGSRC_DEBUG("Starting updated failed with %d\n", error);
1503 goto out0;
1504 }
1505
1506 /*
1507 * It can only be moved once. Flag is set under the rwlock,
1508 * so we're now safe to proceed.
1509 */
1510 if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
1511 IMGSRC_DEBUG("Already moved [2]\n");
1512 goto out1;
1513 }
1514
1515
1516 IMGSRC_DEBUG("Preparing coveredvp.\n");
1517
1518 /* Mark covered vnode as mount in progress, authorize placing mount on top */
1519 error = prepare_coveredvp(vp, ctx, cnp, fsname, FALSE);
1520 if (error != 0) {
1521 IMGSRC_DEBUG("Preparing coveredvp failed with %d.\n", error);
1522 goto out1;
1523 }
1524
1525 IMGSRC_DEBUG("Covered vp OK.\n");
1526
1527 /* Sanity check the name caller has provided */
1528 vfsp = mp->mnt_vtable;
1529 if (strncmp(vfsp->vfc_name, fsname, MFSNAMELEN) != 0) {
1530 IMGSRC_DEBUG("Wrong fs name.\n");
1531 error = EINVAL;
1532 goto out2;
1533 }
1534
1535 /* Check the device vnode and update mount-from name, for local filesystems */
1536 if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
1537 IMGSRC_DEBUG("Local, doing device validation.\n");
1538
1539 if (devpath != USER_ADDR_NULL) {
1540 error = authorize_devpath_and_update_mntfromname(mp, devpath, &devvp, ctx);
1541 if (error) {
1542 IMGSRC_DEBUG("authorize_devpath_and_update_mntfromname() failed.\n");
1543 goto out2;
1544 }
1545
1546 vnode_put(devvp);
1547 }
1548 }
1549
1550 /*
1551 * Place mp on top of vnode, ref the vnode, call checkdirs(),
1552 * and increment the name cache's mount generation
1553 */
1554
1555 IMGSRC_DEBUG("About to call place_mount_and_checkdirs().\n");
1556 error = place_mount_and_checkdirs(mp, vp, ctx);
1557 if (error != 0) {
1558 goto out2;
1559 }
1560
1561 placed = TRUE;
1562
1563 strlcpy(old_mntonname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN);
1564 strlcpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
1565
1566 /* Forbid future moves */
1567 mount_lock(mp);
1568 mp->mnt_kern_flag |= MNTK_HAS_MOVED;
1569 mount_unlock(mp);
1570
1571 /* Finally, add to mount list, completely ready to go */
1572 if (mount_list_add(mp) != 0) {
1573 /*
1574 * The system is shutting down trying to umount
1575 * everything, so fail with a plausible errno.
1576 */
1577 error = EBUSY;
1578 goto out3;
1579 }
1580
1581 mount_end_update(mp);
1582 vnode_put(rvp);
1583 FREE(old_mntonname, M_TEMP);
1584
1585 vfs_notify_mount(pvp);
1586
1587 return 0;
1588 out3:
1589 strlcpy(mp->mnt_vfsstat.f_mntonname, old_mntonname, MAXPATHLEN);
1590
1591 mount_lock(mp);
1592 mp->mnt_kern_flag &= ~(MNTK_HAS_MOVED);
1593 mount_unlock(mp);
1594
1595 out2:
1596 /*
1597 * Placing the mp on the vnode clears VMOUNT,
1598 * so cleanup is different after that point
1599 */
1600 if (placed) {
1601 /* Rele the vp, clear VMOUNT and v_mountedhere */
1602 undo_place_on_covered_vp(mp, vp);
1603 } else {
1604 vnode_lock_spin(vp);
1605 CLR(vp->v_flag, VMOUNT);
1606 vnode_unlock(vp);
1607 }
1608 out1:
1609 mount_end_update(mp);
1610
1611 out0:
1612 vnode_put(rvp);
1613 FREE(old_mntonname, M_TEMP);
1614 return error;
1615 }
1616
1617 #endif /* CONFIG_IMGSRC_ACCESS */
1618
1619 void
1620 enablequotas(struct mount *mp, vfs_context_t ctx)
1621 {
1622 struct nameidata qnd;
1623 int type;
1624 char qfpath[MAXPATHLEN];
1625 const char *qfname = QUOTAFILENAME;
1626 const char *qfopsname = QUOTAOPSNAME;
1627 const char *qfextension[] = INITQFNAMES;
1628
1629 /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
1630 if (strncmp(mp->mnt_vfsstat.f_fstypename, "hfs", sizeof("hfs")) != 0 ) {
1631 return;
1632 }
1633 /*
1634 * Enable filesystem disk quotas if necessary.
1635 * We ignore errors as this should not interfere with final mount
1636 */
1637 for (type=0; type < MAXQUOTAS; type++) {
1638 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfopsname, qfextension[type]);
1639 NDINIT(&qnd, LOOKUP, OP_MOUNT, FOLLOW, UIO_SYSSPACE,
1640 CAST_USER_ADDR_T(qfpath), ctx);
1641 if (namei(&qnd) != 0)
1642 continue; /* option file to trigger quotas is not present */
1643 vnode_put(qnd.ni_vp);
1644 nameidone(&qnd);
1645 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfname, qfextension[type]);
1646
1647 (void) VFS_QUOTACTL(mp, QCMD(Q_QUOTAON, type), 0, qfpath, ctx);
1648 }
1649 return;
1650 }
1651
1652
1653 static int
1654 checkdirs_callback(proc_t p, void * arg)
1655 {
1656 struct cdirargs * cdrp = (struct cdirargs * )arg;
1657 vnode_t olddp = cdrp->olddp;
1658 vnode_t newdp = cdrp->newdp;
1659 struct filedesc *fdp;
1660 vnode_t tvp;
1661 vnode_t fdp_cvp;
1662 vnode_t fdp_rvp;
1663 int cdir_changed = 0;
1664 int rdir_changed = 0;
1665
1666 /*
1667 * XXX Also needs to iterate each thread in the process to see if it
1668 * XXX is using a per-thread current working directory, and, if so,
1669 * XXX update that as well.
1670 */
1671
1672 proc_fdlock(p);
1673 fdp = p->p_fd;
1674 if (fdp == (struct filedesc *)0) {
1675 proc_fdunlock(p);
1676 return(PROC_RETURNED);
1677 }
1678 fdp_cvp = fdp->fd_cdir;
1679 fdp_rvp = fdp->fd_rdir;
1680 proc_fdunlock(p);
1681
1682 if (fdp_cvp == olddp) {
1683 vnode_ref(newdp);
1684 tvp = fdp->fd_cdir;
1685 fdp_cvp = newdp;
1686 cdir_changed = 1;
1687 vnode_rele(tvp);
1688 }
1689 if (fdp_rvp == olddp) {
1690 vnode_ref(newdp);
1691 tvp = fdp->fd_rdir;
1692 fdp_rvp = newdp;
1693 rdir_changed = 1;
1694 vnode_rele(tvp);
1695 }
1696 if (cdir_changed || rdir_changed) {
1697 proc_fdlock(p);
1698 fdp->fd_cdir = fdp_cvp;
1699 fdp->fd_rdir = fdp_rvp;
1700 proc_fdunlock(p);
1701 }
1702 return(PROC_RETURNED);
1703 }
1704
1705
1706
1707 /*
1708 * Scan all active processes to see if any of them have a current
1709 * or root directory onto which the new filesystem has just been
1710 * mounted. If so, replace them with the new mount point.
1711 */
1712 static int
1713 checkdirs(vnode_t olddp, vfs_context_t ctx)
1714 {
1715 vnode_t newdp;
1716 vnode_t tvp;
1717 int err;
1718 struct cdirargs cdr;
1719
1720 if (olddp->v_usecount == 1)
1721 return(0);
1722 err = VFS_ROOT(olddp->v_mountedhere, &newdp, ctx);
1723
1724 if (err != 0) {
1725 #if DIAGNOSTIC
1726 panic("mount: lost mount: error %d", err);
1727 #endif
1728 return(err);
1729 }
1730
1731 cdr.olddp = olddp;
1732 cdr.newdp = newdp;
1733 /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
1734 proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS, checkdirs_callback, (void *)&cdr, NULL, NULL);
1735
1736 if (rootvnode == olddp) {
1737 vnode_ref(newdp);
1738 tvp = rootvnode;
1739 rootvnode = newdp;
1740 vnode_rele(tvp);
1741 }
1742
1743 vnode_put(newdp);
1744 return(0);
1745 }
1746
1747 /*
1748 * Unmount a file system.
1749 *
1750 * Note: unmount takes a path to the vnode mounted on as argument,
1751 * not special file (as before).
1752 */
1753 /* ARGSUSED */
1754 int
1755 unmount(__unused proc_t p, struct unmount_args *uap, __unused int32_t *retval)
1756 {
1757 vnode_t vp;
1758 struct mount *mp;
1759 int error;
1760 struct nameidata nd;
1761 vfs_context_t ctx = vfs_context_current();
1762
1763 NDINIT(&nd, LOOKUP, OP_UNMOUNT, FOLLOW | AUDITVNPATH1,
1764 UIO_USERSPACE, uap->path, ctx);
1765 error = namei(&nd);
1766 if (error)
1767 return (error);
1768 vp = nd.ni_vp;
1769 mp = vp->v_mount;
1770 nameidone(&nd);
1771
1772 #if CONFIG_MACF
1773 error = mac_mount_check_umount(ctx, mp);
1774 if (error != 0) {
1775 vnode_put(vp);
1776 return (error);
1777 }
1778 #endif
1779 /*
1780 * Must be the root of the filesystem
1781 */
1782 if ((vp->v_flag & VROOT) == 0) {
1783 vnode_put(vp);
1784 return (EINVAL);
1785 }
1786 mount_ref(mp, 0);
1787 vnode_put(vp);
1788 /* safedounmount consumes the mount ref */
1789 return (safedounmount(mp, uap->flags, ctx));
1790 }
1791
1792 int
1793 vfs_unmountbyfsid(fsid_t *fsid, int flags, vfs_context_t ctx)
1794 {
1795 mount_t mp;
1796
1797 mp = mount_list_lookupby_fsid(fsid, 0, 1);
1798 if (mp == (mount_t)0) {
1799 return(ENOENT);
1800 }
1801 mount_ref(mp, 0);
1802 mount_iterdrop(mp);
1803 /* safedounmount consumes the mount ref */
1804 return(safedounmount(mp, flags, ctx));
1805 }
1806
1807
1808 /*
1809 * The mount struct comes with a mount ref which will be consumed.
1810 * Do the actual file system unmount, prevent some common foot shooting.
1811 */
1812 int
1813 safedounmount(struct mount *mp, int flags, vfs_context_t ctx)
1814 {
1815 int error;
1816 proc_t p = vfs_context_proc(ctx);
1817
1818 /*
1819 * If the file system is not responding and MNT_NOBLOCK
1820 * is set and not a forced unmount then return EBUSY.
1821 */
1822 if ((mp->mnt_kern_flag & MNT_LNOTRESP) &&
1823 (flags & MNT_NOBLOCK) && ((flags & MNT_FORCE) == 0)) {
1824 error = EBUSY;
1825 goto out;
1826 }
1827
1828 /*
1829 * Skip authorization if the mount is tagged as permissive and
1830 * this is not a forced-unmount attempt.
1831 */
1832 if (!(((mp->mnt_kern_flag & MNTK_PERMIT_UNMOUNT) != 0) && ((flags & MNT_FORCE) == 0))) {
1833 /*
1834 * Only root, or the user that did the original mount is
1835 * permitted to unmount this filesystem.
1836 */
1837 if ((mp->mnt_vfsstat.f_owner != kauth_cred_getuid(kauth_cred_get())) &&
1838 (error = suser(kauth_cred_get(), &p->p_acflag)))
1839 goto out;
1840 }
1841 /*
1842 * Don't allow unmounting the root file system.
1843 */
1844 if (mp->mnt_flag & MNT_ROOTFS) {
1845 error = EBUSY; /* the root is always busy */
1846 goto out;
1847 }
1848
1849 #ifdef CONFIG_IMGSRC_ACCESS
1850 if (mp->mnt_kern_flag & MNTK_BACKS_ROOT) {
1851 error = EBUSY;
1852 goto out;
1853 }
1854 #endif /* CONFIG_IMGSRC_ACCESS */
1855
1856 return (dounmount(mp, flags, 1, ctx));
1857
1858 out:
1859 mount_drop(mp, 0);
1860 return(error);
1861 }
1862
1863 /*
1864 * Do the actual file system unmount.
1865 */
1866 int
1867 dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx)
1868 {
1869 vnode_t coveredvp = (vnode_t)0;
1870 int error;
1871 int needwakeup = 0;
1872 int forcedunmount = 0;
1873 int lflags = 0;
1874 struct vnode *devvp = NULLVP;
1875 #if CONFIG_TRIGGERS
1876 proc_t p = vfs_context_proc(ctx);
1877 int did_vflush = 0;
1878 int pflags_save = 0;
1879 #endif /* CONFIG_TRIGGERS */
1880
1881 #if CONFIG_FSE
1882 if (!(flags & MNT_FORCE)) {
1883 fsevent_unmount(mp, ctx); /* has to come first! */
1884 }
1885 #endif
1886
1887 mount_lock(mp);
1888
1889 /*
1890 * If already an unmount in progress just return EBUSY.
1891 * Even a forced unmount cannot override.
1892 */
1893 if (mp->mnt_lflag & MNT_LUNMOUNT) {
1894 if (withref != 0)
1895 mount_drop(mp, 1);
1896 mount_unlock(mp);
1897 return (EBUSY);
1898 }
1899
1900 if (flags & MNT_FORCE) {
1901 forcedunmount = 1;
1902 mp->mnt_lflag |= MNT_LFORCE;
1903 }
1904
1905 #if CONFIG_TRIGGERS
1906 if (flags & MNT_NOBLOCK && p != kernproc)
1907 pflags_save = OSBitOrAtomic(P_NOREMOTEHANG, &p->p_flag);
1908 #endif
1909
1910 mp->mnt_kern_flag |= MNTK_UNMOUNT;
1911 mp->mnt_lflag |= MNT_LUNMOUNT;
1912 mp->mnt_flag &=~ MNT_ASYNC;
1913 /*
1914 * anyone currently in the fast path that
1915 * trips over the cached rootvp will be
1916 * dumped out and forced into the slow path
1917 * to regenerate a new cached value
1918 */
1919 mp->mnt_realrootvp = NULLVP;
1920 mount_unlock(mp);
1921
1922 if (forcedunmount && (flags & MNT_LNOSUB) == 0) {
1923 /*
1924 * Force unmount any mounts in this filesystem.
1925 * If any unmounts fail - just leave them dangling.
1926 * Avoids recursion.
1927 */
1928 (void) dounmount_submounts(mp, flags | MNT_LNOSUB, ctx);
1929 }
1930
1931 /*
1932 * taking the name_cache_lock exclusively will
1933 * insure that everyone is out of the fast path who
1934 * might be trying to use a now stale copy of
1935 * vp->v_mountedhere->mnt_realrootvp
1936 * bumping mount_generation causes the cached values
1937 * to be invalidated
1938 */
1939 name_cache_lock();
1940 mount_generation++;
1941 name_cache_unlock();
1942
1943
1944 lck_rw_lock_exclusive(&mp->mnt_rwlock);
1945 if (withref != 0)
1946 mount_drop(mp, 0);
1947 error = 0;
1948 if (forcedunmount == 0) {
1949 ubc_umount(mp); /* release cached vnodes */
1950 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
1951 error = VFS_SYNC(mp, MNT_WAIT, ctx);
1952 if (error) {
1953 mount_lock(mp);
1954 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1955 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1956 mp->mnt_lflag &= ~MNT_LFORCE;
1957 goto out;
1958 }
1959 }
1960 }
1961
1962 IOBSDMountChange(mp, kIOMountChangeUnmount);
1963
1964 #if CONFIG_TRIGGERS
1965 vfs_nested_trigger_unmounts(mp, flags, ctx);
1966 did_vflush = 1;
1967 #endif
1968 if (forcedunmount)
1969 lflags |= FORCECLOSE;
1970 error = vflush(mp, NULLVP, SKIPSWAP | SKIPSYSTEM | SKIPROOT | lflags);
1971 if ((forcedunmount == 0) && error) {
1972 mount_lock(mp);
1973 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1974 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1975 mp->mnt_lflag &= ~MNT_LFORCE;
1976 goto out;
1977 }
1978
1979 /* make sure there are no one in the mount iterations or lookup */
1980 mount_iterdrain(mp);
1981
1982 error = VFS_UNMOUNT(mp, flags, ctx);
1983 if (error) {
1984 mount_iterreset(mp);
1985 mount_lock(mp);
1986 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1987 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1988 mp->mnt_lflag &= ~MNT_LFORCE;
1989 goto out;
1990 }
1991
1992 /* increment the operations count */
1993 if (!error)
1994 OSAddAtomic(1, &vfs_nummntops);
1995
1996 if ( mp->mnt_devvp && mp->mnt_vtable->vfc_vfsflags & VFC_VFSLOCALARGS) {
1997 /* hold an io reference and drop the usecount before close */
1998 devvp = mp->mnt_devvp;
1999 vnode_getalways(devvp);
2000 vnode_rele(devvp);
2001 VNOP_CLOSE(devvp, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
2002 ctx);
2003 vnode_clearmountedon(devvp);
2004 vnode_put(devvp);
2005 }
2006 lck_rw_done(&mp->mnt_rwlock);
2007 mount_list_remove(mp);
2008 lck_rw_lock_exclusive(&mp->mnt_rwlock);
2009
2010 /* mark the mount point hook in the vp but not drop the ref yet */
2011 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
2012 /*
2013 * The covered vnode needs special handling. Trying to get an
2014 * iocount must not block here as this may lead to deadlocks
2015 * if the Filesystem to which the covered vnode belongs is
2016 * undergoing forced unmounts. Since we hold a usecount, the
2017 * vnode cannot be reused (it can, however, still be terminated)
2018 */
2019 vnode_getalways(coveredvp);
2020 vnode_lock_spin(coveredvp);
2021
2022 mp->mnt_crossref++;
2023 coveredvp->v_mountedhere = (struct mount *)0;
2024 CLR(coveredvp->v_flag, VMOUNT);
2025
2026 vnode_unlock(coveredvp);
2027 vnode_put(coveredvp);
2028 }
2029
2030 mount_list_lock();
2031 mp->mnt_vtable->vfc_refcount--;
2032 mount_list_unlock();
2033
2034 cache_purgevfs(mp); /* remove cache entries for this file sys */
2035 vfs_event_signal(NULL, VQ_UNMOUNT, (intptr_t)NULL);
2036 mount_lock(mp);
2037 mp->mnt_lflag |= MNT_LDEAD;
2038
2039 if (mp->mnt_lflag & MNT_LWAIT) {
2040 /*
2041 * do the wakeup here
2042 * in case we block in mount_refdrain
2043 * which will drop the mount lock
2044 * and allow anyone blocked in vfs_busy
2045 * to wakeup and see the LDEAD state
2046 */
2047 mp->mnt_lflag &= ~MNT_LWAIT;
2048 wakeup((caddr_t)mp);
2049 }
2050 mount_refdrain(mp);
2051 out:
2052 if (mp->mnt_lflag & MNT_LWAIT) {
2053 mp->mnt_lflag &= ~MNT_LWAIT;
2054 needwakeup = 1;
2055 }
2056
2057 #if CONFIG_TRIGGERS
2058 if (flags & MNT_NOBLOCK && p != kernproc) {
2059 // Restore P_NOREMOTEHANG bit to its previous value
2060 if ((pflags_save & P_NOREMOTEHANG) == 0)
2061 OSBitAndAtomic(~((uint32_t) P_NOREMOTEHANG), &p->p_flag);
2062 }
2063
2064 /*
2065 * Callback and context are set together under the mount lock, and
2066 * never cleared, so we're safe to examine them here, drop the lock,
2067 * and call out.
2068 */
2069 if (mp->mnt_triggercallback != NULL) {
2070 mount_unlock(mp);
2071 if (error == 0) {
2072 mp->mnt_triggercallback(mp, VTC_RELEASE, mp->mnt_triggerdata, ctx);
2073 } else if (did_vflush) {
2074 mp->mnt_triggercallback(mp, VTC_REPLACE, mp->mnt_triggerdata, ctx);
2075 }
2076 } else {
2077 mount_unlock(mp);
2078 }
2079 #else
2080 mount_unlock(mp);
2081 #endif /* CONFIG_TRIGGERS */
2082
2083 lck_rw_done(&mp->mnt_rwlock);
2084
2085 if (needwakeup)
2086 wakeup((caddr_t)mp);
2087
2088 if (!error) {
2089 if ((coveredvp != NULLVP)) {
2090 vnode_t pvp = NULLVP;
2091
2092 /*
2093 * The covered vnode needs special handling. Trying to
2094 * get an iocount must not block here as this may lead
2095 * to deadlocks if the Filesystem to which the covered
2096 * vnode belongs is undergoing forced unmounts. Since we
2097 * hold a usecount, the vnode cannot be reused
2098 * (it can, however, still be terminated).
2099 */
2100 vnode_getalways(coveredvp);
2101
2102 mount_dropcrossref(mp, coveredvp, 0);
2103 /*
2104 * We'll _try_ to detect if this really needs to be
2105 * done. The coveredvp can only be in termination (or
2106 * terminated) if the coveredvp's mount point is in a
2107 * forced unmount (or has been) since we still hold the
2108 * ref.
2109 */
2110 if (!vnode_isrecycled(coveredvp)) {
2111 pvp = vnode_getparent(coveredvp);
2112 #if CONFIG_TRIGGERS
2113 if (coveredvp->v_resolve) {
2114 vnode_trigger_rearm(coveredvp, ctx);
2115 }
2116 #endif
2117 }
2118
2119 vnode_rele(coveredvp);
2120 vnode_put(coveredvp);
2121 coveredvp = NULLVP;
2122
2123 if (pvp) {
2124 lock_vnode_and_post(pvp, NOTE_WRITE);
2125 vnode_put(pvp);
2126 }
2127 } else if (mp->mnt_flag & MNT_ROOTFS) {
2128 mount_lock_destroy(mp);
2129 #if CONFIG_MACF
2130 mac_mount_label_destroy(mp);
2131 #endif
2132 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
2133 } else
2134 panic("dounmount: no coveredvp");
2135 }
2136 return (error);
2137 }
2138
2139 /*
2140 * Unmount any mounts in this filesystem.
2141 */
2142 void
2143 dounmount_submounts(struct mount *mp, int flags, vfs_context_t ctx)
2144 {
2145 mount_t smp;
2146 fsid_t *fsids, fsid;
2147 int fsids_sz;
2148 int count = 0, i, m = 0;
2149 vnode_t vp;
2150
2151 mount_list_lock();
2152
2153 // Get an array to hold the submounts fsids.
2154 TAILQ_FOREACH(smp, &mountlist, mnt_list)
2155 count++;
2156 fsids_sz = count * sizeof(fsid_t);
2157 MALLOC(fsids, fsid_t *, fsids_sz, M_TEMP, M_NOWAIT);
2158 if (fsids == NULL) {
2159 mount_list_unlock();
2160 goto out;
2161 }
2162 fsids[0] = mp->mnt_vfsstat.f_fsid; // Prime the pump
2163
2164 /*
2165 * Fill the array with submount fsids.
2166 * Since mounts are always added to the tail of the mount list, the
2167 * list is always in mount order.
2168 * For each mount check if the mounted-on vnode belongs to a
2169 * mount that's already added to our array of mounts to be unmounted.
2170 */
2171 for (smp = TAILQ_NEXT(mp, mnt_list); smp; smp = TAILQ_NEXT(smp, mnt_list)) {
2172 vp = smp->mnt_vnodecovered;
2173 if (vp == NULL)
2174 continue;
2175 fsid = vnode_mount(vp)->mnt_vfsstat.f_fsid; // Underlying fsid
2176 for (i = 0; i <= m; i++) {
2177 if (fsids[i].val[0] == fsid.val[0] &&
2178 fsids[i].val[1] == fsid.val[1]) {
2179 fsids[++m] = smp->mnt_vfsstat.f_fsid;
2180 break;
2181 }
2182 }
2183 }
2184 mount_list_unlock();
2185
2186 // Unmount the submounts in reverse order. Ignore errors.
2187 for (i = m; i > 0; i--) {
2188 smp = mount_list_lookupby_fsid(&fsids[i], 0, 1);
2189 if (smp) {
2190 mount_ref(smp, 0);
2191 mount_iterdrop(smp);
2192 (void) dounmount(smp, flags, 1, ctx);
2193 }
2194 }
2195 out:
2196 if (fsids)
2197 FREE(fsids, M_TEMP);
2198 }
2199
2200 void
2201 mount_dropcrossref(mount_t mp, vnode_t dp, int need_put)
2202 {
2203 vnode_lock(dp);
2204 mp->mnt_crossref--;
2205
2206 if (mp->mnt_crossref < 0)
2207 panic("mount cross refs -ve");
2208
2209 if ((mp != dp->v_mountedhere) && (mp->mnt_crossref == 0)) {
2210
2211 if (need_put)
2212 vnode_put_locked(dp);
2213 vnode_unlock(dp);
2214
2215 mount_lock_destroy(mp);
2216 #if CONFIG_MACF
2217 mac_mount_label_destroy(mp);
2218 #endif
2219 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
2220 return;
2221 }
2222 if (need_put)
2223 vnode_put_locked(dp);
2224 vnode_unlock(dp);
2225 }
2226
2227
2228 /*
2229 * Sync each mounted filesystem.
2230 */
2231 #if DIAGNOSTIC
2232 int syncprt = 0;
2233 #endif
2234
2235 int print_vmpage_stat=0;
2236 int sync_timeout = 60; // Sync time limit (sec)
2237
2238 static int
2239 sync_callback(mount_t mp, __unused void *arg)
2240 {
2241 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
2242 int asyncflag = mp->mnt_flag & MNT_ASYNC;
2243
2244 mp->mnt_flag &= ~MNT_ASYNC;
2245 VFS_SYNC(mp, arg ? MNT_WAIT : MNT_NOWAIT, vfs_context_kernel());
2246 if (asyncflag)
2247 mp->mnt_flag |= MNT_ASYNC;
2248 }
2249
2250 return (VFS_RETURNED);
2251 }
2252
2253 /* ARGSUSED */
2254 int
2255 sync(__unused proc_t p, __unused struct sync_args *uap, __unused int32_t *retval)
2256 {
2257 vfs_iterate(LK_NOWAIT, sync_callback, NULL);
2258
2259 if (print_vmpage_stat) {
2260 vm_countdirtypages();
2261 }
2262
2263 #if DIAGNOSTIC
2264 if (syncprt)
2265 vfs_bufstats();
2266 #endif /* DIAGNOSTIC */
2267 return 0;
2268 }
2269
2270 static void
2271 sync_thread(void *arg, __unused wait_result_t wr)
2272 {
2273 int *timeout = (int *) arg;
2274
2275 vfs_iterate(LK_NOWAIT, sync_callback, NULL);
2276
2277 if (timeout)
2278 wakeup((caddr_t) timeout);
2279 if (print_vmpage_stat) {
2280 vm_countdirtypages();
2281 }
2282
2283 #if DIAGNOSTIC
2284 if (syncprt)
2285 vfs_bufstats();
2286 #endif /* DIAGNOSTIC */
2287 }
2288
2289 /*
2290 * Sync in a separate thread so we can time out if it blocks.
2291 */
2292 static int
2293 sync_async(int timeout)
2294 {
2295 thread_t thd;
2296 int error;
2297 struct timespec ts = {timeout, 0};
2298
2299 lck_mtx_lock(sync_mtx_lck);
2300 if (kernel_thread_start(sync_thread, &timeout, &thd) != KERN_SUCCESS) {
2301 printf("sync_thread failed\n");
2302 lck_mtx_unlock(sync_mtx_lck);
2303 return (0);
2304 }
2305
2306 error = msleep((caddr_t) &timeout, sync_mtx_lck, (PVFS | PDROP | PCATCH), "sync_thread", &ts);
2307 if (error) {
2308 printf("sync timed out: %d sec\n", timeout);
2309 }
2310 thread_deallocate(thd);
2311
2312 return (0);
2313 }
2314
2315 /*
2316 * An in-kernel sync for power management to call.
2317 */
2318 __private_extern__ int
2319 sync_internal(void)
2320 {
2321 (void) sync_async(sync_timeout);
2322
2323 return 0;
2324 } /* end of sync_internal call */
2325
2326 /*
2327 * Change filesystem quotas.
2328 */
2329 #if QUOTA
2330 int
2331 quotactl(proc_t p, struct quotactl_args *uap, __unused int32_t *retval)
2332 {
2333 struct mount *mp;
2334 int error, quota_cmd, quota_status;
2335 caddr_t datap;
2336 size_t fnamelen;
2337 struct nameidata nd;
2338 vfs_context_t ctx = vfs_context_current();
2339 struct dqblk my_dqblk;
2340
2341 AUDIT_ARG(uid, uap->uid);
2342 AUDIT_ARG(cmd, uap->cmd);
2343 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
2344 uap->path, ctx);
2345 error = namei(&nd);
2346 if (error)
2347 return (error);
2348 mp = nd.ni_vp->v_mount;
2349 vnode_put(nd.ni_vp);
2350 nameidone(&nd);
2351
2352 /* copyin any data we will need for downstream code */
2353 quota_cmd = uap->cmd >> SUBCMDSHIFT;
2354
2355 switch (quota_cmd) {
2356 case Q_QUOTAON:
2357 /* uap->arg specifies a file from which to take the quotas */
2358 fnamelen = MAXPATHLEN;
2359 datap = kalloc(MAXPATHLEN);
2360 error = copyinstr(uap->arg, datap, MAXPATHLEN, &fnamelen);
2361 break;
2362 case Q_GETQUOTA:
2363 /* uap->arg is a pointer to a dqblk structure. */
2364 datap = (caddr_t) &my_dqblk;
2365 break;
2366 case Q_SETQUOTA:
2367 case Q_SETUSE:
2368 /* uap->arg is a pointer to a dqblk structure. */
2369 datap = (caddr_t) &my_dqblk;
2370 if (proc_is64bit(p)) {
2371 struct user_dqblk my_dqblk64;
2372 error = copyin(uap->arg, (caddr_t)&my_dqblk64, sizeof (my_dqblk64));
2373 if (error == 0) {
2374 munge_dqblk(&my_dqblk, &my_dqblk64, FALSE);
2375 }
2376 }
2377 else {
2378 error = copyin(uap->arg, (caddr_t)&my_dqblk, sizeof (my_dqblk));
2379 }
2380 break;
2381 case Q_QUOTASTAT:
2382 /* uap->arg is a pointer to an integer */
2383 datap = (caddr_t) &quota_status;
2384 break;
2385 default:
2386 datap = NULL;
2387 break;
2388 } /* switch */
2389
2390 if (error == 0) {
2391 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, datap, ctx);
2392 }
2393
2394 switch (quota_cmd) {
2395 case Q_QUOTAON:
2396 if (datap != NULL)
2397 kfree(datap, MAXPATHLEN);
2398 break;
2399 case Q_GETQUOTA:
2400 /* uap->arg is a pointer to a dqblk structure we need to copy out to */
2401 if (error == 0) {
2402 if (proc_is64bit(p)) {
2403 struct user_dqblk my_dqblk64 = {.dqb_bhardlimit = 0};
2404 munge_dqblk(&my_dqblk, &my_dqblk64, TRUE);
2405 error = copyout((caddr_t)&my_dqblk64, uap->arg, sizeof (my_dqblk64));
2406 }
2407 else {
2408 error = copyout(datap, uap->arg, sizeof (struct dqblk));
2409 }
2410 }
2411 break;
2412 case Q_QUOTASTAT:
2413 /* uap->arg is a pointer to an integer */
2414 if (error == 0) {
2415 error = copyout(datap, uap->arg, sizeof(quota_status));
2416 }
2417 break;
2418 default:
2419 break;
2420 } /* switch */
2421
2422 return (error);
2423 }
2424 #else
2425 int
2426 quotactl(__unused proc_t p, __unused struct quotactl_args *uap, __unused int32_t *retval)
2427 {
2428 return (EOPNOTSUPP);
2429 }
2430 #endif /* QUOTA */
2431
2432 /*
2433 * Get filesystem statistics.
2434 *
2435 * Returns: 0 Success
2436 * namei:???
2437 * vfs_update_vfsstat:???
2438 * munge_statfs:EFAULT
2439 */
2440 /* ARGSUSED */
2441 int
2442 statfs(__unused proc_t p, struct statfs_args *uap, __unused int32_t *retval)
2443 {
2444 struct mount *mp;
2445 struct vfsstatfs *sp;
2446 int error;
2447 struct nameidata nd;
2448 vfs_context_t ctx = vfs_context_current();
2449 vnode_t vp;
2450
2451 NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1,
2452 UIO_USERSPACE, uap->path, ctx);
2453 error = namei(&nd);
2454 if (error != 0)
2455 return (error);
2456 vp = nd.ni_vp;
2457 mp = vp->v_mount;
2458 sp = &mp->mnt_vfsstat;
2459 nameidone(&nd);
2460
2461 #if CONFIG_MACF
2462 error = mac_mount_check_stat(ctx, mp);
2463 if (error != 0)
2464 return (error);
2465 #endif
2466
2467 error = vfs_update_vfsstat(mp, ctx, VFS_USER_EVENT);
2468 if (error != 0) {
2469 vnode_put(vp);
2470 return (error);
2471 }
2472
2473 error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
2474 vnode_put(vp);
2475 return (error);
2476 }
2477
2478 /*
2479 * Get filesystem statistics.
2480 */
2481 /* ARGSUSED */
2482 int
2483 fstatfs(__unused proc_t p, struct fstatfs_args *uap, __unused int32_t *retval)
2484 {
2485 vnode_t vp;
2486 struct mount *mp;
2487 struct vfsstatfs *sp;
2488 int error;
2489
2490 AUDIT_ARG(fd, uap->fd);
2491
2492 if ( (error = file_vnode(uap->fd, &vp)) )
2493 return (error);
2494
2495 error = vnode_getwithref(vp);
2496 if (error) {
2497 file_drop(uap->fd);
2498 return (error);
2499 }
2500
2501 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2502
2503 mp = vp->v_mount;
2504 if (!mp) {
2505 error = EBADF;
2506 goto out;
2507 }
2508
2509 #if CONFIG_MACF
2510 error = mac_mount_check_stat(vfs_context_current(), mp);
2511 if (error != 0)
2512 goto out;
2513 #endif
2514
2515 sp = &mp->mnt_vfsstat;
2516 if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
2517 goto out;
2518 }
2519
2520 error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
2521
2522 out:
2523 file_drop(uap->fd);
2524 vnode_put(vp);
2525
2526 return (error);
2527 }
2528
2529 /*
2530 * Common routine to handle copying of statfs64 data to user space
2531 */
2532 static int
2533 statfs64_common(struct mount *mp, struct vfsstatfs *sfsp, user_addr_t bufp)
2534 {
2535 int error;
2536 struct statfs64 sfs;
2537
2538 bzero(&sfs, sizeof(sfs));
2539
2540 sfs.f_bsize = sfsp->f_bsize;
2541 sfs.f_iosize = (int32_t)sfsp->f_iosize;
2542 sfs.f_blocks = sfsp->f_blocks;
2543 sfs.f_bfree = sfsp->f_bfree;
2544 sfs.f_bavail = sfsp->f_bavail;
2545 sfs.f_files = sfsp->f_files;
2546 sfs.f_ffree = sfsp->f_ffree;
2547 sfs.f_fsid = sfsp->f_fsid;
2548 sfs.f_owner = sfsp->f_owner;
2549 sfs.f_type = mp->mnt_vtable->vfc_typenum;
2550 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
2551 sfs.f_fssubtype = sfsp->f_fssubtype;
2552 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
2553 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN);
2554 } else {
2555 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSTYPENAMELEN);
2556 }
2557 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MAXPATHLEN);
2558 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MAXPATHLEN);
2559
2560 error = copyout((caddr_t)&sfs, bufp, sizeof(sfs));
2561
2562 return(error);
2563 }
2564
2565 /*
2566 * Get file system statistics in 64-bit mode
2567 */
2568 int
2569 statfs64(__unused struct proc *p, struct statfs64_args *uap, __unused int32_t *retval)
2570 {
2571 struct mount *mp;
2572 struct vfsstatfs *sp;
2573 int error;
2574 struct nameidata nd;
2575 vfs_context_t ctxp = vfs_context_current();
2576 vnode_t vp;
2577
2578 NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1,
2579 UIO_USERSPACE, uap->path, ctxp);
2580 error = namei(&nd);
2581 if (error != 0)
2582 return (error);
2583 vp = nd.ni_vp;
2584 mp = vp->v_mount;
2585 sp = &mp->mnt_vfsstat;
2586 nameidone(&nd);
2587
2588 #if CONFIG_MACF
2589 error = mac_mount_check_stat(ctxp, mp);
2590 if (error != 0)
2591 return (error);
2592 #endif
2593
2594 error = vfs_update_vfsstat(mp, ctxp, VFS_USER_EVENT);
2595 if (error != 0) {
2596 vnode_put(vp);
2597 return (error);
2598 }
2599
2600 error = statfs64_common(mp, sp, uap->buf);
2601 vnode_put(vp);
2602
2603 return (error);
2604 }
2605
2606 /*
2607 * Get file system statistics in 64-bit mode
2608 */
2609 int
2610 fstatfs64(__unused struct proc *p, struct fstatfs64_args *uap, __unused int32_t *retval)
2611 {
2612 struct vnode *vp;
2613 struct mount *mp;
2614 struct vfsstatfs *sp;
2615 int error;
2616
2617 AUDIT_ARG(fd, uap->fd);
2618
2619 if ( (error = file_vnode(uap->fd, &vp)) )
2620 return (error);
2621
2622 error = vnode_getwithref(vp);
2623 if (error) {
2624 file_drop(uap->fd);
2625 return (error);
2626 }
2627
2628 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2629
2630 mp = vp->v_mount;
2631 if (!mp) {
2632 error = EBADF;
2633 goto out;
2634 }
2635
2636 #if CONFIG_MACF
2637 error = mac_mount_check_stat(vfs_context_current(), mp);
2638 if (error != 0)
2639 goto out;
2640 #endif
2641
2642 sp = &mp->mnt_vfsstat;
2643 if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
2644 goto out;
2645 }
2646
2647 error = statfs64_common(mp, sp, uap->buf);
2648
2649 out:
2650 file_drop(uap->fd);
2651 vnode_put(vp);
2652
2653 return (error);
2654 }
2655
2656 struct getfsstat_struct {
2657 user_addr_t sfsp;
2658 user_addr_t *mp;
2659 int count;
2660 int maxcount;
2661 int flags;
2662 int error;
2663 };
2664
2665
2666 static int
2667 getfsstat_callback(mount_t mp, void * arg)
2668 {
2669
2670 struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
2671 struct vfsstatfs *sp;
2672 int error, my_size;
2673 vfs_context_t ctx = vfs_context_current();
2674
2675 if (fstp->sfsp && fstp->count < fstp->maxcount) {
2676 #if CONFIG_MACF
2677 error = mac_mount_check_stat(ctx, mp);
2678 if (error != 0) {
2679 fstp->error = error;
2680 return(VFS_RETURNED_DONE);
2681 }
2682 #endif
2683 sp = &mp->mnt_vfsstat;
2684 /*
2685 * If MNT_NOWAIT is specified, do not refresh the
2686 * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT.
2687 */
2688 if (((fstp->flags & MNT_NOWAIT) == 0 || (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
2689 (error = vfs_update_vfsstat(mp, ctx,
2690 VFS_USER_EVENT))) {
2691 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
2692 return(VFS_RETURNED);
2693 }
2694
2695 /*
2696 * Need to handle LP64 version of struct statfs
2697 */
2698 error = munge_statfs(mp, sp, fstp->sfsp, &my_size, IS_64BIT_PROCESS(vfs_context_proc(ctx)), FALSE);
2699 if (error) {
2700 fstp->error = error;
2701 return(VFS_RETURNED_DONE);
2702 }
2703 fstp->sfsp += my_size;
2704
2705 if (fstp->mp) {
2706 #if CONFIG_MACF
2707 error = mac_mount_label_get(mp, *fstp->mp);
2708 if (error) {
2709 fstp->error = error;
2710 return(VFS_RETURNED_DONE);
2711 }
2712 #endif
2713 fstp->mp++;
2714 }
2715 }
2716 fstp->count++;
2717 return(VFS_RETURNED);
2718 }
2719
2720 /*
2721 * Get statistics on all filesystems.
2722 */
2723 int
2724 getfsstat(__unused proc_t p, struct getfsstat_args *uap, int *retval)
2725 {
2726 struct __mac_getfsstat_args muap;
2727
2728 muap.buf = uap->buf;
2729 muap.bufsize = uap->bufsize;
2730 muap.mac = USER_ADDR_NULL;
2731 muap.macsize = 0;
2732 muap.flags = uap->flags;
2733
2734 return (__mac_getfsstat(p, &muap, retval));
2735 }
2736
2737 /*
2738 * __mac_getfsstat: Get MAC-related file system statistics
2739 *
2740 * Parameters: p (ignored)
2741 * uap User argument descriptor (see below)
2742 * retval Count of file system statistics (N stats)
2743 *
2744 * Indirect: uap->bufsize Buffer size
2745 * uap->macsize MAC info size
2746 * uap->buf Buffer where information will be returned
2747 * uap->mac MAC info
2748 * uap->flags File system flags
2749 *
2750 *
2751 * Returns: 0 Success
2752 * !0 Not success
2753 *
2754 */
2755 int
2756 __mac_getfsstat(__unused proc_t p, struct __mac_getfsstat_args *uap, int *retval)
2757 {
2758 user_addr_t sfsp;
2759 user_addr_t *mp;
2760 size_t count, maxcount, bufsize, macsize;
2761 struct getfsstat_struct fst;
2762
2763 bufsize = (size_t) uap->bufsize;
2764 macsize = (size_t) uap->macsize;
2765
2766 if (IS_64BIT_PROCESS(p)) {
2767 maxcount = bufsize / sizeof(struct user64_statfs);
2768 }
2769 else {
2770 maxcount = bufsize / sizeof(struct user32_statfs);
2771 }
2772 sfsp = uap->buf;
2773 count = 0;
2774
2775 mp = NULL;
2776
2777 #if CONFIG_MACF
2778 if (uap->mac != USER_ADDR_NULL) {
2779 u_int32_t *mp0;
2780 int error;
2781 unsigned int i;
2782
2783 count = (macsize / (IS_64BIT_PROCESS(p) ? 8 : 4));
2784 if (count != maxcount)
2785 return (EINVAL);
2786
2787 /* Copy in the array */
2788 MALLOC(mp0, u_int32_t *, macsize, M_MACTEMP, M_WAITOK);
2789 if (mp0 == NULL) {
2790 return (ENOMEM);
2791 }
2792
2793 error = copyin(uap->mac, mp0, macsize);
2794 if (error) {
2795 FREE(mp0, M_MACTEMP);
2796 return (error);
2797 }
2798
2799 /* Normalize to an array of user_addr_t */
2800 MALLOC(mp, user_addr_t *, count * sizeof(user_addr_t), M_MACTEMP, M_WAITOK);
2801 if (mp == NULL) {
2802 FREE(mp0, M_MACTEMP);
2803 return (ENOMEM);
2804 }
2805
2806 for (i = 0; i < count; i++) {
2807 if (IS_64BIT_PROCESS(p))
2808 mp[i] = ((user_addr_t *)mp0)[i];
2809 else
2810 mp[i] = (user_addr_t)mp0[i];
2811 }
2812 FREE(mp0, M_MACTEMP);
2813 }
2814 #endif
2815
2816
2817 fst.sfsp = sfsp;
2818 fst.mp = mp;
2819 fst.flags = uap->flags;
2820 fst.count = 0;
2821 fst.error = 0;
2822 fst.maxcount = maxcount;
2823
2824
2825 vfs_iterate(0, getfsstat_callback, &fst);
2826
2827 if (mp)
2828 FREE(mp, M_MACTEMP);
2829
2830 if (fst.error ) {
2831 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
2832 return(fst.error);
2833 }
2834
2835 if (fst.sfsp && fst.count > fst.maxcount)
2836 *retval = fst.maxcount;
2837 else
2838 *retval = fst.count;
2839 return (0);
2840 }
2841
2842 static int
2843 getfsstat64_callback(mount_t mp, void * arg)
2844 {
2845 struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
2846 struct vfsstatfs *sp;
2847 int error;
2848
2849 if (fstp->sfsp && fstp->count < fstp->maxcount) {
2850 #if CONFIG_MACF
2851 error = mac_mount_check_stat(vfs_context_current(), mp);
2852 if (error != 0) {
2853 fstp->error = error;
2854 return(VFS_RETURNED_DONE);
2855 }
2856 #endif
2857 sp = &mp->mnt_vfsstat;
2858 /*
2859 * If MNT_NOWAIT is specified, do not refresh the fsstat
2860 * cache. MNT_WAIT overrides MNT_NOWAIT.
2861 *
2862 * We treat MNT_DWAIT as MNT_WAIT for all instances of
2863 * getfsstat, since the constants are out of the same
2864 * namespace.
2865 */
2866 if (((fstp->flags & MNT_NOWAIT) == 0 ||
2867 (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
2868 (error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT))) {
2869 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
2870 return(VFS_RETURNED);
2871 }
2872
2873 error = statfs64_common(mp, sp, fstp->sfsp);
2874 if (error) {
2875 fstp->error = error;
2876 return(VFS_RETURNED_DONE);
2877 }
2878 fstp->sfsp += sizeof(struct statfs64);
2879 }
2880 fstp->count++;
2881 return(VFS_RETURNED);
2882 }
2883
2884 /*
2885 * Get statistics on all file systems in 64 bit mode.
2886 */
2887 int
2888 getfsstat64(__unused proc_t p, struct getfsstat64_args *uap, int *retval)
2889 {
2890 user_addr_t sfsp;
2891 int count, maxcount;
2892 struct getfsstat_struct fst;
2893
2894 maxcount = uap->bufsize / sizeof(struct statfs64);
2895
2896 sfsp = uap->buf;
2897 count = 0;
2898
2899 fst.sfsp = sfsp;
2900 fst.flags = uap->flags;
2901 fst.count = 0;
2902 fst.error = 0;
2903 fst.maxcount = maxcount;
2904
2905 vfs_iterate(0, getfsstat64_callback, &fst);
2906
2907 if (fst.error ) {
2908 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
2909 return(fst.error);
2910 }
2911
2912 if (fst.sfsp && fst.count > fst.maxcount)
2913 *retval = fst.maxcount;
2914 else
2915 *retval = fst.count;
2916
2917 return (0);
2918 }
2919
2920 /*
2921 * gets the associated vnode with the file descriptor passed.
2922 * as input
2923 *
2924 * INPUT
2925 * ctx - vfs context of caller
2926 * fd - file descriptor for which vnode is required.
2927 * vpp - Pointer to pointer to vnode to be returned.
2928 *
2929 * The vnode is returned with an iocount so any vnode obtained
2930 * by this call needs a vnode_put
2931 *
2932 */
2933 int
2934 vnode_getfromfd(vfs_context_t ctx, int fd, vnode_t *vpp)
2935 {
2936 int error;
2937 vnode_t vp;
2938 struct fileproc *fp;
2939 proc_t p = vfs_context_proc(ctx);
2940
2941 *vpp = NULLVP;
2942
2943 error = fp_getfvp(p, fd, &fp, &vp);
2944 if (error)
2945 return (error);
2946
2947 error = vnode_getwithref(vp);
2948 if (error) {
2949 (void)fp_drop(p, fd, fp, 0);
2950 return (error);
2951 }
2952
2953 (void)fp_drop(p, fd, fp, 0);
2954 *vpp = vp;
2955 return (error);
2956 }
2957
2958 /*
2959 * Wrapper function around namei to start lookup from a directory
2960 * specified by a file descriptor ni_dirfd.
2961 *
2962 * In addition to all the errors returned by namei, this call can
2963 * return ENOTDIR if the file descriptor does not refer to a directory.
2964 * and EBADF if the file descriptor is not valid.
2965 */
2966 int
2967 nameiat(struct nameidata *ndp, int dirfd)
2968 {
2969 if ((dirfd != AT_FDCWD) &&
2970 !(ndp->ni_flag & NAMEI_CONTLOOKUP) &&
2971 !(ndp->ni_cnd.cn_flags & USEDVP)) {
2972 int error = 0;
2973 char c;
2974
2975 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
2976 error = copyin(ndp->ni_dirp, &c, sizeof(char));
2977 if (error)
2978 return (error);
2979 } else {
2980 c = *((char *)(ndp->ni_dirp));
2981 }
2982
2983 if (c != '/') {
2984 vnode_t dvp_at;
2985
2986 error = vnode_getfromfd(ndp->ni_cnd.cn_context, dirfd,
2987 &dvp_at);
2988 if (error)
2989 return (error);
2990
2991 if (vnode_vtype(dvp_at) != VDIR) {
2992 vnode_put(dvp_at);
2993 return (ENOTDIR);
2994 }
2995
2996 ndp->ni_dvp = dvp_at;
2997 ndp->ni_cnd.cn_flags |= USEDVP;
2998 error = namei(ndp);
2999 ndp->ni_cnd.cn_flags &= ~USEDVP;
3000 vnode_put(dvp_at);
3001 return (error);
3002 }
3003 }
3004
3005 return (namei(ndp));
3006 }
3007
3008 /*
3009 * Change current working directory to a given file descriptor.
3010 */
3011 /* ARGSUSED */
3012 static int
3013 common_fchdir(proc_t p, struct fchdir_args *uap, int per_thread)
3014 {
3015 struct filedesc *fdp = p->p_fd;
3016 vnode_t vp;
3017 vnode_t tdp;
3018 vnode_t tvp;
3019 struct mount *mp;
3020 int error;
3021 vfs_context_t ctx = vfs_context_current();
3022
3023 AUDIT_ARG(fd, uap->fd);
3024 if (per_thread && uap->fd == -1) {
3025 /*
3026 * Switching back from per-thread to per process CWD; verify we
3027 * in fact have one before proceeding. The only success case
3028 * for this code path is to return 0 preemptively after zapping
3029 * the thread structure contents.
3030 */
3031 thread_t th = vfs_context_thread(ctx);
3032 if (th) {
3033 uthread_t uth = get_bsdthread_info(th);
3034 tvp = uth->uu_cdir;
3035 uth->uu_cdir = NULLVP;
3036 if (tvp != NULLVP) {
3037 vnode_rele(tvp);
3038 return (0);
3039 }
3040 }
3041 return (EBADF);
3042 }
3043
3044 if ( (error = file_vnode(uap->fd, &vp)) )
3045 return(error);
3046 if ( (error = vnode_getwithref(vp)) ) {
3047 file_drop(uap->fd);
3048 return(error);
3049 }
3050
3051 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
3052
3053 if (vp->v_type != VDIR) {
3054 error = ENOTDIR;
3055 goto out;
3056 }
3057
3058 #if CONFIG_MACF
3059 error = mac_vnode_check_chdir(ctx, vp);
3060 if (error)
3061 goto out;
3062 #endif
3063 error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
3064 if (error)
3065 goto out;
3066
3067 while (!error && (mp = vp->v_mountedhere) != NULL) {
3068 if (vfs_busy(mp, LK_NOWAIT)) {
3069 error = EACCES;
3070 goto out;
3071 }
3072 error = VFS_ROOT(mp, &tdp, ctx);
3073 vfs_unbusy(mp);
3074 if (error)
3075 break;
3076 vnode_put(vp);
3077 vp = tdp;
3078 }
3079 if (error)
3080 goto out;
3081 if ( (error = vnode_ref(vp)) )
3082 goto out;
3083 vnode_put(vp);
3084
3085 if (per_thread) {
3086 thread_t th = vfs_context_thread(ctx);
3087 if (th) {
3088 uthread_t uth = get_bsdthread_info(th);
3089 tvp = uth->uu_cdir;
3090 uth->uu_cdir = vp;
3091 OSBitOrAtomic(P_THCWD, &p->p_flag);
3092 } else {
3093 vnode_rele(vp);
3094 return (ENOENT);
3095 }
3096 } else {
3097 proc_fdlock(p);
3098 tvp = fdp->fd_cdir;
3099 fdp->fd_cdir = vp;
3100 proc_fdunlock(p);
3101 }
3102
3103 if (tvp)
3104 vnode_rele(tvp);
3105 file_drop(uap->fd);
3106
3107 return (0);
3108 out:
3109 vnode_put(vp);
3110 file_drop(uap->fd);
3111
3112 return(error);
3113 }
3114
3115 int
3116 fchdir(proc_t p, struct fchdir_args *uap, __unused int32_t *retval)
3117 {
3118 return common_fchdir(p, uap, 0);
3119 }
3120
3121 int
3122 __pthread_fchdir(proc_t p, struct __pthread_fchdir_args *uap, __unused int32_t *retval)
3123 {
3124 return common_fchdir(p, (void *)uap, 1);
3125 }
3126
3127 /*
3128 * Change current working directory (".").
3129 *
3130 * Returns: 0 Success
3131 * change_dir:ENOTDIR
3132 * change_dir:???
3133 * vnode_ref:ENOENT No such file or directory
3134 */
3135 /* ARGSUSED */
3136 static int
3137 common_chdir(proc_t p, struct chdir_args *uap, int per_thread)
3138 {
3139 struct filedesc *fdp = p->p_fd;
3140 int error;
3141 struct nameidata nd;
3142 vnode_t tvp;
3143 vfs_context_t ctx = vfs_context_current();
3144
3145 NDINIT(&nd, LOOKUP, OP_CHDIR, FOLLOW | AUDITVNPATH1,
3146 UIO_USERSPACE, uap->path, ctx);
3147 error = change_dir(&nd, ctx);
3148 if (error)
3149 return (error);
3150 if ( (error = vnode_ref(nd.ni_vp)) ) {
3151 vnode_put(nd.ni_vp);
3152 return (error);
3153 }
3154 /*
3155 * drop the iocount we picked up in change_dir
3156 */
3157 vnode_put(nd.ni_vp);
3158
3159 if (per_thread) {
3160 thread_t th = vfs_context_thread(ctx);
3161 if (th) {
3162 uthread_t uth = get_bsdthread_info(th);
3163 tvp = uth->uu_cdir;
3164 uth->uu_cdir = nd.ni_vp;
3165 OSBitOrAtomic(P_THCWD, &p->p_flag);
3166 } else {
3167 vnode_rele(nd.ni_vp);
3168 return (ENOENT);
3169 }
3170 } else {
3171 proc_fdlock(p);
3172 tvp = fdp->fd_cdir;
3173 fdp->fd_cdir = nd.ni_vp;
3174 proc_fdunlock(p);
3175 }
3176
3177 if (tvp)
3178 vnode_rele(tvp);
3179
3180 return (0);
3181 }
3182
3183
3184 /*
3185 * chdir
3186 *
3187 * Change current working directory (".") for the entire process
3188 *
3189 * Parameters: p Process requesting the call
3190 * uap User argument descriptor (see below)
3191 * retval (ignored)
3192 *
3193 * Indirect parameters: uap->path Directory path
3194 *
3195 * Returns: 0 Success
3196 * common_chdir: ENOTDIR
3197 * common_chdir: ENOENT No such file or directory
3198 * common_chdir: ???
3199 *
3200 */
3201 int
3202 chdir(proc_t p, struct chdir_args *uap, __unused int32_t *retval)
3203 {
3204 return common_chdir(p, (void *)uap, 0);
3205 }
3206
3207 /*
3208 * __pthread_chdir
3209 *
3210 * Change current working directory (".") for a single thread
3211 *
3212 * Parameters: p Process requesting the call
3213 * uap User argument descriptor (see below)
3214 * retval (ignored)
3215 *
3216 * Indirect parameters: uap->path Directory path
3217 *
3218 * Returns: 0 Success
3219 * common_chdir: ENOTDIR
3220 * common_chdir: ENOENT No such file or directory
3221 * common_chdir: ???
3222 *
3223 */
3224 int
3225 __pthread_chdir(proc_t p, struct __pthread_chdir_args *uap, __unused int32_t *retval)
3226 {
3227 return common_chdir(p, (void *)uap, 1);
3228 }
3229
3230
3231 /*
3232 * Change notion of root (``/'') directory.
3233 */
3234 /* ARGSUSED */
3235 int
3236 chroot(proc_t p, struct chroot_args *uap, __unused int32_t *retval)
3237 {
3238 struct filedesc *fdp = p->p_fd;
3239 int error;
3240 struct nameidata nd;
3241 vnode_t tvp;
3242 vfs_context_t ctx = vfs_context_current();
3243
3244 if ((error = suser(kauth_cred_get(), &p->p_acflag)))
3245 return (error);
3246
3247 NDINIT(&nd, LOOKUP, OP_CHROOT, FOLLOW | AUDITVNPATH1,
3248 UIO_USERSPACE, uap->path, ctx);
3249 error = change_dir(&nd, ctx);
3250 if (error)
3251 return (error);
3252
3253 #if CONFIG_MACF
3254 error = mac_vnode_check_chroot(ctx, nd.ni_vp,
3255 &nd.ni_cnd);
3256 if (error) {
3257 vnode_put(nd.ni_vp);
3258 return (error);
3259 }
3260 #endif
3261
3262 if ( (error = vnode_ref(nd.ni_vp)) ) {
3263 vnode_put(nd.ni_vp);
3264 return (error);
3265 }
3266 vnode_put(nd.ni_vp);
3267
3268 proc_fdlock(p);
3269 tvp = fdp->fd_rdir;
3270 fdp->fd_rdir = nd.ni_vp;
3271 fdp->fd_flags |= FD_CHROOT;
3272 proc_fdunlock(p);
3273
3274 if (tvp != NULL)
3275 vnode_rele(tvp);
3276
3277 return (0);
3278 }
3279
3280 /*
3281 * Common routine for chroot and chdir.
3282 *
3283 * Returns: 0 Success
3284 * ENOTDIR Not a directory
3285 * namei:??? [anything namei can return]
3286 * vnode_authorize:??? [anything vnode_authorize can return]
3287 */
3288 static int
3289 change_dir(struct nameidata *ndp, vfs_context_t ctx)
3290 {
3291 vnode_t vp;
3292 int error;
3293
3294 if ((error = namei(ndp)))
3295 return (error);
3296 nameidone(ndp);
3297 vp = ndp->ni_vp;
3298
3299 if (vp->v_type != VDIR) {
3300 vnode_put(vp);
3301 return (ENOTDIR);
3302 }
3303
3304 #if CONFIG_MACF
3305 error = mac_vnode_check_chdir(ctx, vp);
3306 if (error) {
3307 vnode_put(vp);
3308 return (error);
3309 }
3310 #endif
3311
3312 error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
3313 if (error) {
3314 vnode_put(vp);
3315 return (error);
3316 }
3317
3318 return (error);
3319 }
3320
3321 /*
3322 * Free the vnode data (for directories) associated with the file glob.
3323 */
3324 struct fd_vn_data *
3325 fg_vn_data_alloc(void)
3326 {
3327 struct fd_vn_data *fvdata;
3328
3329 /* Allocate per fd vnode data */
3330 MALLOC(fvdata, struct fd_vn_data *, (sizeof(struct fd_vn_data)),
3331 M_FD_VN_DATA, M_WAITOK | M_ZERO);
3332 lck_mtx_init(&fvdata->fv_lock, fd_vn_lck_grp, fd_vn_lck_attr);
3333 return fvdata;
3334 }
3335
3336 /*
3337 * Free the vnode data (for directories) associated with the file glob.
3338 */
3339 void
3340 fg_vn_data_free(void *fgvndata)
3341 {
3342 struct fd_vn_data *fvdata = (struct fd_vn_data *)fgvndata;
3343
3344 if (fvdata->fv_buf)
3345 FREE(fvdata->fv_buf, M_FD_DIRBUF);
3346 lck_mtx_destroy(&fvdata->fv_lock, fd_vn_lck_grp);
3347 FREE(fvdata, M_FD_VN_DATA);
3348 }
3349
3350 /*
3351 * Check permissions, allocate an open file structure,
3352 * and call the device open routine if any.
3353 *
3354 * Returns: 0 Success
3355 * EINVAL
3356 * EINTR
3357 * falloc:ENFILE
3358 * falloc:EMFILE
3359 * falloc:ENOMEM
3360 * vn_open_auth:???
3361 * dupfdopen:???
3362 * VNOP_ADVLOCK:???
3363 * vnode_setsize:???
3364 *
3365 * XXX Need to implement uid, gid
3366 */
3367 int
3368 open1(vfs_context_t ctx, struct nameidata *ndp, int uflags,
3369 struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra,
3370 int32_t *retval)
3371 {
3372 proc_t p = vfs_context_proc(ctx);
3373 uthread_t uu = get_bsdthread_info(vfs_context_thread(ctx));
3374 struct fileproc *fp;
3375 vnode_t vp;
3376 int flags, oflags;
3377 int type, indx, error;
3378 struct flock lf;
3379 struct vfs_context context;
3380
3381 oflags = uflags;
3382
3383 if ((oflags & O_ACCMODE) == O_ACCMODE)
3384 return(EINVAL);
3385
3386 flags = FFLAGS(uflags);
3387 CLR(flags, FENCRYPTED);
3388 CLR(flags, FUNENCRYPTED);
3389
3390 AUDIT_ARG(fflags, oflags);
3391 AUDIT_ARG(mode, vap->va_mode);
3392
3393 if ((error = falloc_withalloc(p,
3394 &fp, &indx, ctx, fp_zalloc, cra)) != 0) {
3395 return (error);
3396 }
3397 uu->uu_dupfd = -indx - 1;
3398
3399 if ((error = vn_open_auth(ndp, &flags, vap))) {
3400 if ((error == ENODEV || error == ENXIO) && (uu->uu_dupfd >= 0)){ /* XXX from fdopen */
3401 if ((error = dupfdopen(p->p_fd, indx, uu->uu_dupfd, flags, error)) == 0) {
3402 fp_drop(p, indx, NULL, 0);
3403 *retval = indx;
3404 return (0);
3405 }
3406 }
3407 if (error == ERESTART)
3408 error = EINTR;
3409 fp_free(p, indx, fp);
3410 return (error);
3411 }
3412 uu->uu_dupfd = 0;
3413 vp = ndp->ni_vp;
3414
3415 fp->f_fglob->fg_flag = flags & (FMASK | O_EVTONLY | FENCRYPTED | FUNENCRYPTED);
3416 fp->f_fglob->fg_ops = &vnops;
3417 fp->f_fglob->fg_data = (caddr_t)vp;
3418
3419 if (flags & (O_EXLOCK | O_SHLOCK)) {
3420 lf.l_whence = SEEK_SET;
3421 lf.l_start = 0;
3422 lf.l_len = 0;
3423 if (flags & O_EXLOCK)
3424 lf.l_type = F_WRLCK;
3425 else
3426 lf.l_type = F_RDLCK;
3427 type = F_FLOCK;
3428 if ((flags & FNONBLOCK) == 0)
3429 type |= F_WAIT;
3430 #if CONFIG_MACF
3431 error = mac_file_check_lock(vfs_context_ucred(ctx), fp->f_fglob,
3432 F_SETLK, &lf);
3433 if (error)
3434 goto bad;
3435 #endif
3436 if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx, NULL)))
3437 goto bad;
3438 fp->f_fglob->fg_flag |= FHASLOCK;
3439 }
3440
3441 /* try to truncate by setting the size attribute */
3442 if ((flags & O_TRUNC) && ((error = vnode_setsize(vp, (off_t)0, 0, ctx)) != 0))
3443 goto bad;
3444
3445 /*
3446 * For directories we hold some additional information in the fd.
3447 */
3448 if (vnode_vtype(vp) == VDIR) {
3449 fp->f_fglob->fg_vn_data = fg_vn_data_alloc();
3450 } else {
3451 fp->f_fglob->fg_vn_data = NULL;
3452 }
3453
3454 vnode_put(vp);
3455
3456 /*
3457 * The first terminal open (without a O_NOCTTY) by a session leader
3458 * results in it being set as the controlling terminal.
3459 */
3460 if (vnode_istty(vp) && !(p->p_flag & P_CONTROLT) &&
3461 !(flags & O_NOCTTY)) {
3462 int tmp = 0;
3463
3464 (void)(*fp->f_fglob->fg_ops->fo_ioctl)(fp, (int)TIOCSCTTY,
3465 (caddr_t)&tmp, ctx);
3466 }
3467
3468 proc_fdlock(p);
3469 if (flags & O_CLOEXEC)
3470 *fdflags(p, indx) |= UF_EXCLOSE;
3471 if (flags & O_CLOFORK)
3472 *fdflags(p, indx) |= UF_FORKCLOSE;
3473 procfdtbl_releasefd(p, indx, NULL);
3474
3475 #if CONFIG_SECLUDED_MEMORY
3476 if (secluded_for_filecache &&
3477 FILEGLOB_DTYPE(fp->f_fglob) == DTYPE_VNODE &&
3478 vnode_vtype(vp) == VREG) {
3479 memory_object_control_t moc;
3480
3481 moc = ubc_getobject(vp, UBC_FLAGS_NONE);
3482
3483 if (moc == MEMORY_OBJECT_CONTROL_NULL) {
3484 /* nothing to do... */
3485 } else if (fp->f_fglob->fg_flag & FWRITE) {
3486 /* writable -> no longer eligible for secluded pages */
3487 memory_object_mark_eligible_for_secluded(moc,
3488 FALSE);
3489 } else if (secluded_for_filecache == 1) {
3490 char pathname[32] = { 0, };
3491 size_t copied;
3492 /* XXX FBDP: better way to detect /Applications/ ? */
3493 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
3494 copyinstr(ndp->ni_dirp,
3495 pathname,
3496 sizeof (pathname),
3497 &copied);
3498 } else {
3499 copystr(CAST_DOWN(void *, ndp->ni_dirp),
3500 pathname,
3501 sizeof (pathname),
3502 &copied);
3503 }
3504 pathname[sizeof (pathname) - 1] = '\0';
3505 if (strncmp(pathname,
3506 "/Applications/",
3507 strlen("/Applications/")) == 0 &&
3508 strncmp(pathname,
3509 "/Applications/Camera.app/",
3510 strlen("/Applications/Camera.app/")) != 0) {
3511 /*
3512 * not writable
3513 * AND from "/Applications/"
3514 * AND not from "/Applications/Camera.app/"
3515 * ==> eligible for secluded
3516 */
3517 memory_object_mark_eligible_for_secluded(moc,
3518 TRUE);
3519 }
3520 } else if (secluded_for_filecache == 2) {
3521 /* not implemented... */
3522 if (!strncmp(vp->v_name,
3523 DYLD_SHARED_CACHE_NAME,
3524 strlen(DYLD_SHARED_CACHE_NAME)) ||
3525 !strncmp(vp->v_name,
3526 "dyld",
3527 strlen(vp->v_name)) ||
3528 !strncmp(vp->v_name,
3529 "launchd",
3530 strlen(vp->v_name)) ||
3531 !strncmp(vp->v_name,
3532 "Camera",
3533 strlen(vp->v_name)) ||
3534 !strncmp(vp->v_name,
3535 "mediaserverd",
3536 strlen(vp->v_name))) {
3537 /*
3538 * This file matters when launching Camera:
3539 * do not store its contents in the secluded
3540 * pool that will be drained on Camera launch.
3541 */
3542 memory_object_mark_eligible_for_secluded(moc,
3543 FALSE);
3544 }
3545 }
3546 }
3547 #endif /* CONFIG_SECLUDED_MEMORY */
3548
3549 fp_drop(p, indx, fp, 1);
3550 proc_fdunlock(p);
3551
3552 *retval = indx;
3553
3554 return (0);
3555 bad:
3556 context = *vfs_context_current();
3557 context.vc_ucred = fp->f_fglob->fg_cred;
3558
3559 if ((fp->f_fglob->fg_flag & FHASLOCK) &&
3560 (FILEGLOB_DTYPE(fp->f_fglob) == DTYPE_VNODE)) {
3561 lf.l_whence = SEEK_SET;
3562 lf.l_start = 0;
3563 lf.l_len = 0;
3564 lf.l_type = F_UNLCK;
3565
3566 (void)VNOP_ADVLOCK(
3567 vp, (caddr_t)fp->f_fglob, F_UNLCK, &lf, F_FLOCK, ctx, NULL);
3568 }
3569
3570 vn_close(vp, fp->f_fglob->fg_flag, &context);
3571 vnode_put(vp);
3572 fp_free(p, indx, fp);
3573
3574 return (error);
3575 }
3576
3577 /*
3578 * While most of the *at syscall handlers can call nameiat() which
3579 * is a wrapper around namei, the use of namei and initialisation
3580 * of nameidata are far removed and in different functions - namei
3581 * gets called in vn_open_auth for open1. So we'll just do here what
3582 * nameiat() does.
3583 */
3584 static int
3585 open1at(vfs_context_t ctx, struct nameidata *ndp, int uflags,
3586 struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra, int32_t *retval,
3587 int dirfd)
3588 {
3589 if ((dirfd != AT_FDCWD) && !(ndp->ni_cnd.cn_flags & USEDVP)) {
3590 int error;
3591 char c;
3592
3593 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
3594 error = copyin(ndp->ni_dirp, &c, sizeof(char));
3595 if (error)
3596 return (error);
3597 } else {
3598 c = *((char *)(ndp->ni_dirp));
3599 }
3600
3601 if (c != '/') {
3602 vnode_t dvp_at;
3603
3604 error = vnode_getfromfd(ndp->ni_cnd.cn_context, dirfd,
3605 &dvp_at);
3606 if (error)
3607 return (error);
3608
3609 if (vnode_vtype(dvp_at) != VDIR) {
3610 vnode_put(dvp_at);
3611 return (ENOTDIR);
3612 }
3613
3614 ndp->ni_dvp = dvp_at;
3615 ndp->ni_cnd.cn_flags |= USEDVP;
3616 error = open1(ctx, ndp, uflags, vap, fp_zalloc, cra,
3617 retval);
3618 vnode_put(dvp_at);
3619 return (error);
3620 }
3621 }
3622
3623 return (open1(ctx, ndp, uflags, vap, fp_zalloc, cra, retval));
3624 }
3625
3626 /*
3627 * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)).
3628 *
3629 * Parameters: p Process requesting the open
3630 * uap User argument descriptor (see below)
3631 * retval Pointer to an area to receive the
3632 * return calue from the system call
3633 *
3634 * Indirect: uap->path Path to open (same as 'open')
3635 * uap->flags Flags to open (same as 'open'
3636 * uap->uid UID to set, if creating
3637 * uap->gid GID to set, if creating
3638 * uap->mode File mode, if creating (same as 'open')
3639 * uap->xsecurity ACL to set, if creating
3640 *
3641 * Returns: 0 Success
3642 * !0 errno value
3643 *
3644 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
3645 *
3646 * XXX: We should enummerate the possible errno values here, and where
3647 * in the code they originated.
3648 */
3649 int
3650 open_extended(proc_t p, struct open_extended_args *uap, int32_t *retval)
3651 {
3652 struct filedesc *fdp = p->p_fd;
3653 int ciferror;
3654 kauth_filesec_t xsecdst;
3655 struct vnode_attr va;
3656 struct nameidata nd;
3657 int cmode;
3658
3659 AUDIT_ARG(owner, uap->uid, uap->gid);
3660
3661 xsecdst = NULL;
3662 if ((uap->xsecurity != USER_ADDR_NULL) &&
3663 ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
3664 return ciferror;
3665
3666 VATTR_INIT(&va);
3667 cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3668 VATTR_SET(&va, va_mode, cmode);
3669 if (uap->uid != KAUTH_UID_NONE)
3670 VATTR_SET(&va, va_uid, uap->uid);
3671 if (uap->gid != KAUTH_GID_NONE)
3672 VATTR_SET(&va, va_gid, uap->gid);
3673 if (xsecdst != NULL)
3674 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
3675
3676 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
3677 uap->path, vfs_context_current());
3678
3679 ciferror = open1(vfs_context_current(), &nd, uap->flags, &va,
3680 fileproc_alloc_init, NULL, retval);
3681 if (xsecdst != NULL)
3682 kauth_filesec_free(xsecdst);
3683
3684 return ciferror;
3685 }
3686
3687 /*
3688 * Go through the data-protected atomically controlled open (2)
3689 *
3690 * int open_dprotected_np(user_addr_t path, int flags, int class, int dpflags, int mode)
3691 */
3692 int open_dprotected_np (__unused proc_t p, struct open_dprotected_np_args *uap, int32_t *retval) {
3693 int flags = uap->flags;
3694 int class = uap->class;
3695 int dpflags = uap->dpflags;
3696
3697 /*
3698 * Follow the same path as normal open(2)
3699 * Look up the item if it exists, and acquire the vnode.
3700 */
3701 struct filedesc *fdp = p->p_fd;
3702 struct vnode_attr va;
3703 struct nameidata nd;
3704 int cmode;
3705 int error;
3706
3707 VATTR_INIT(&va);
3708 /* Mask off all but regular access permissions */
3709 cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3710 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
3711
3712 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
3713 uap->path, vfs_context_current());
3714
3715 /*
3716 * Initialize the extra fields in vnode_attr to pass down our
3717 * extra fields.
3718 * 1. target cprotect class.
3719 * 2. set a flag to mark it as requiring open-raw-encrypted semantics.
3720 */
3721 if (flags & O_CREAT) {
3722 /* lower level kernel code validates that the class is valid before applying it. */
3723 if (class != PROTECTION_CLASS_DEFAULT) {
3724 /*
3725 * PROTECTION_CLASS_DEFAULT implies that we make the class for this
3726 * file behave the same as open (2)
3727 */
3728 VATTR_SET(&va, va_dataprotect_class, class);
3729 }
3730 }
3731
3732 if (dpflags & (O_DP_GETRAWENCRYPTED|O_DP_GETRAWUNENCRYPTED)) {
3733 if ( flags & (O_RDWR | O_WRONLY)) {
3734 /* Not allowed to write raw encrypted bytes */
3735 return EINVAL;
3736 }
3737 if (uap->dpflags & O_DP_GETRAWENCRYPTED) {
3738 VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED);
3739 }
3740 if (uap->dpflags & O_DP_GETRAWUNENCRYPTED) {
3741 VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWUNENCRYPTED);
3742 }
3743 }
3744
3745 error = open1(vfs_context_current(), &nd, uap->flags, &va,
3746 fileproc_alloc_init, NULL, retval);
3747
3748 return error;
3749 }
3750
3751 static int
3752 openat_internal(vfs_context_t ctx, user_addr_t path, int flags, int mode,
3753 int fd, enum uio_seg segflg, int *retval)
3754 {
3755 struct filedesc *fdp = (vfs_context_proc(ctx))->p_fd;
3756 struct vnode_attr va;
3757 struct nameidata nd;
3758 int cmode;
3759
3760 VATTR_INIT(&va);
3761 /* Mask off all but regular access permissions */
3762 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3763 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
3764
3765 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1,
3766 segflg, path, ctx);
3767
3768 return (open1at(ctx, &nd, flags, &va, fileproc_alloc_init, NULL,
3769 retval, fd));
3770 }
3771
3772 int
3773 open(proc_t p, struct open_args *uap, int32_t *retval)
3774 {
3775 __pthread_testcancel(1);
3776 return(open_nocancel(p, (struct open_nocancel_args *)uap, retval));
3777 }
3778
3779 int
3780 open_nocancel(__unused proc_t p, struct open_nocancel_args *uap,
3781 int32_t *retval)
3782 {
3783 return (openat_internal(vfs_context_current(), uap->path, uap->flags,
3784 uap->mode, AT_FDCWD, UIO_USERSPACE, retval));
3785 }
3786
3787 int
3788 openat_nocancel(__unused proc_t p, struct openat_nocancel_args *uap,
3789 int32_t *retval)
3790 {
3791 return (openat_internal(vfs_context_current(), uap->path, uap->flags,
3792 uap->mode, uap->fd, UIO_USERSPACE, retval));
3793 }
3794
3795 int
3796 openat(proc_t p, struct openat_args *uap, int32_t *retval)
3797 {
3798 __pthread_testcancel(1);
3799 return(openat_nocancel(p, (struct openat_nocancel_args *)uap, retval));
3800 }
3801
3802 /*
3803 * openbyid_np: open a file given a file system id and a file system object id
3804 * the hfs file system object id is an fsobj_id_t {uint32, uint32}
3805 * file systems that don't support object ids it is a node id (uint64_t).
3806 *
3807 * Parameters: p Process requesting the open
3808 * uap User argument descriptor (see below)
3809 * retval Pointer to an area to receive the
3810 * return calue from the system call
3811 *
3812 * Indirect: uap->path Path to open (same as 'open')
3813 *
3814 * uap->fsid id of target file system
3815 * uap->objid id of target file system object
3816 * uap->flags Flags to open (same as 'open')
3817 *
3818 * Returns: 0 Success
3819 * !0 errno value
3820 *
3821 *
3822 * XXX: We should enummerate the possible errno values here, and where
3823 * in the code they originated.
3824 */
3825 int
3826 openbyid_np(__unused proc_t p, struct openbyid_np_args *uap, int *retval)
3827 {
3828 fsid_t fsid;
3829 uint64_t objid;
3830 int error;
3831 char *buf = NULL;
3832 int buflen = MAXPATHLEN;
3833 int pathlen = 0;
3834 vfs_context_t ctx = vfs_context_current();
3835
3836 if ((error = priv_check_cred(vfs_context_ucred(ctx), PRIV_VFS_OPEN_BY_ID, 0))) {
3837 return (error);
3838 }
3839
3840 if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
3841 return (error);
3842 }
3843
3844 /*uap->obj is an fsobj_id_t defined as struct {uint32_t, uint32_t} */
3845 if ((error = copyin(uap->objid, (caddr_t)&objid, sizeof(uint64_t)))) {
3846 return (error);
3847 }
3848
3849 AUDIT_ARG(value32, fsid.val[0]);
3850 AUDIT_ARG(value64, objid);
3851
3852 /*resolve path from fsis, objid*/
3853 do {
3854 MALLOC(buf, char *, buflen + 1, M_TEMP, M_WAITOK);
3855 if (buf == NULL) {
3856 return (ENOMEM);
3857 }
3858
3859 error = fsgetpath_internal(
3860 ctx, fsid.val[0], objid,
3861 buflen, buf, &pathlen);
3862
3863 if (error) {
3864 FREE(buf, M_TEMP);
3865 buf = NULL;
3866 }
3867 } while (error == ENOSPC && (buflen += MAXPATHLEN));
3868
3869 if (error) {
3870 return error;
3871 }
3872
3873 buf[pathlen] = 0;
3874
3875 error = openat_internal(
3876 ctx, (user_addr_t)buf, uap->oflags, 0, AT_FDCWD, UIO_SYSSPACE, retval);
3877
3878 FREE(buf, M_TEMP);
3879
3880 return error;
3881 }
3882
3883
3884 /*
3885 * Create a special file.
3886 */
3887 static int mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap);
3888
3889 int
3890 mknod(proc_t p, struct mknod_args *uap, __unused int32_t *retval)
3891 {
3892 struct vnode_attr va;
3893 vfs_context_t ctx = vfs_context_current();
3894 int error;
3895 struct nameidata nd;
3896 vnode_t vp, dvp;
3897
3898 VATTR_INIT(&va);
3899 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
3900 VATTR_SET(&va, va_rdev, uap->dev);
3901
3902 /* If it's a mknod() of a FIFO, call mkfifo1() instead */
3903 if ((uap->mode & S_IFMT) == S_IFIFO)
3904 return(mkfifo1(ctx, uap->path, &va));
3905
3906 AUDIT_ARG(mode, uap->mode);
3907 AUDIT_ARG(value32, uap->dev);
3908
3909 if ((error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
3910 return (error);
3911 NDINIT(&nd, CREATE, OP_MKNOD, LOCKPARENT | AUDITVNPATH1,
3912 UIO_USERSPACE, uap->path, ctx);
3913 error = namei(&nd);
3914 if (error)
3915 return (error);
3916 dvp = nd.ni_dvp;
3917 vp = nd.ni_vp;
3918
3919 if (vp != NULL) {
3920 error = EEXIST;
3921 goto out;
3922 }
3923
3924 switch (uap->mode & S_IFMT) {
3925 case S_IFCHR:
3926 VATTR_SET(&va, va_type, VCHR);
3927 break;
3928 case S_IFBLK:
3929 VATTR_SET(&va, va_type, VBLK);
3930 break;
3931 default:
3932 error = EINVAL;
3933 goto out;
3934 }
3935
3936 #if CONFIG_MACF
3937 error = mac_vnode_check_create(ctx,
3938 nd.ni_dvp, &nd.ni_cnd, &va);
3939 if (error)
3940 goto out;
3941 #endif
3942
3943 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
3944 goto out;
3945
3946 if ((error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx)) != 0)
3947 goto out;
3948
3949 if (vp) {
3950 int update_flags = 0;
3951
3952 // Make sure the name & parent pointers are hooked up
3953 if (vp->v_name == NULL)
3954 update_flags |= VNODE_UPDATE_NAME;
3955 if (vp->v_parent == NULLVP)
3956 update_flags |= VNODE_UPDATE_PARENT;
3957
3958 if (update_flags)
3959 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
3960
3961 #if CONFIG_FSE
3962 add_fsevent(FSE_CREATE_FILE, ctx,
3963 FSE_ARG_VNODE, vp,
3964 FSE_ARG_DONE);
3965 #endif
3966 }
3967
3968 out:
3969 /*
3970 * nameidone has to happen before we vnode_put(dvp)
3971 * since it may need to release the fs_nodelock on the dvp
3972 */
3973 nameidone(&nd);
3974
3975 if (vp)
3976 vnode_put(vp);
3977 vnode_put(dvp);
3978
3979 return (error);
3980 }
3981
3982 /*
3983 * Create a named pipe.
3984 *
3985 * Returns: 0 Success
3986 * EEXIST
3987 * namei:???
3988 * vnode_authorize:???
3989 * vn_create:???
3990 */
3991 static int
3992 mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap)
3993 {
3994 vnode_t vp, dvp;
3995 int error;
3996 struct nameidata nd;
3997
3998 NDINIT(&nd, CREATE, OP_MKFIFO, LOCKPARENT | AUDITVNPATH1,
3999 UIO_USERSPACE, upath, ctx);
4000 error = namei(&nd);
4001 if (error)
4002 return (error);
4003 dvp = nd.ni_dvp;
4004 vp = nd.ni_vp;
4005
4006 /* check that this is a new file and authorize addition */
4007 if (vp != NULL) {
4008 error = EEXIST;
4009 goto out;
4010 }
4011 VATTR_SET(vap, va_type, VFIFO);
4012
4013 if ((error = vn_authorize_create(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0)
4014 goto out;
4015
4016 error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx);
4017 out:
4018 /*
4019 * nameidone has to happen before we vnode_put(dvp)
4020 * since it may need to release the fs_nodelock on the dvp
4021 */
4022 nameidone(&nd);
4023
4024 if (vp)
4025 vnode_put(vp);
4026 vnode_put(dvp);
4027
4028 return error;
4029 }
4030
4031
4032 /*
4033 * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)).
4034 *
4035 * Parameters: p Process requesting the open
4036 * uap User argument descriptor (see below)
4037 * retval (Ignored)
4038 *
4039 * Indirect: uap->path Path to fifo (same as 'mkfifo')
4040 * uap->uid UID to set
4041 * uap->gid GID to set
4042 * uap->mode File mode to set (same as 'mkfifo')
4043 * uap->xsecurity ACL to set, if creating
4044 *
4045 * Returns: 0 Success
4046 * !0 errno value
4047 *
4048 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
4049 *
4050 * XXX: We should enummerate the possible errno values here, and where
4051 * in the code they originated.
4052 */
4053 int
4054 mkfifo_extended(proc_t p, struct mkfifo_extended_args *uap, __unused int32_t *retval)
4055 {
4056 int ciferror;
4057 kauth_filesec_t xsecdst;
4058 struct vnode_attr va;
4059
4060 AUDIT_ARG(owner, uap->uid, uap->gid);
4061
4062 xsecdst = KAUTH_FILESEC_NONE;
4063 if (uap->xsecurity != USER_ADDR_NULL) {
4064 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
4065 return ciferror;
4066 }
4067
4068 VATTR_INIT(&va);
4069 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
4070 if (uap->uid != KAUTH_UID_NONE)
4071 VATTR_SET(&va, va_uid, uap->uid);
4072 if (uap->gid != KAUTH_GID_NONE)
4073 VATTR_SET(&va, va_gid, uap->gid);
4074 if (xsecdst != KAUTH_FILESEC_NONE)
4075 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
4076
4077 ciferror = mkfifo1(vfs_context_current(), uap->path, &va);
4078
4079 if (xsecdst != KAUTH_FILESEC_NONE)
4080 kauth_filesec_free(xsecdst);
4081 return ciferror;
4082 }
4083
4084 /* ARGSUSED */
4085 int
4086 mkfifo(proc_t p, struct mkfifo_args *uap, __unused int32_t *retval)
4087 {
4088 struct vnode_attr va;
4089
4090 VATTR_INIT(&va);
4091 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
4092
4093 return(mkfifo1(vfs_context_current(), uap->path, &va));
4094 }
4095
4096
4097 static char *
4098 my_strrchr(char *p, int ch)
4099 {
4100 char *save;
4101
4102 for (save = NULL;; ++p) {
4103 if (*p == ch)
4104 save = p;
4105 if (!*p)
4106 return(save);
4107 }
4108 /* NOTREACHED */
4109 }
4110
4111 extern int safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path);
4112
4113 int
4114 safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path)
4115 {
4116 int ret, len = _len;
4117
4118 *truncated_path = 0;
4119 ret = vn_getpath(dvp, path, &len);
4120 if (ret == 0 && len < (MAXPATHLEN - 1)) {
4121 if (leafname) {
4122 path[len-1] = '/';
4123 len += strlcpy(&path[len], leafname, MAXPATHLEN-len) + 1;
4124 if (len > MAXPATHLEN) {
4125 char *ptr;
4126
4127 // the string got truncated!
4128 *truncated_path = 1;
4129 ptr = my_strrchr(path, '/');
4130 if (ptr) {
4131 *ptr = '\0'; // chop off the string at the last directory component
4132 }
4133 len = strlen(path) + 1;
4134 }
4135 }
4136 } else if (ret == 0) {
4137 *truncated_path = 1;
4138 } else if (ret != 0) {
4139 struct vnode *mydvp=dvp;
4140
4141 if (ret != ENOSPC) {
4142 printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n",
4143 dvp, dvp->v_name ? dvp->v_name : "no-name", ret);
4144 }
4145 *truncated_path = 1;
4146
4147 do {
4148 if (mydvp->v_parent != NULL) {
4149 mydvp = mydvp->v_parent;
4150 } else if (mydvp->v_mount) {
4151 strlcpy(path, mydvp->v_mount->mnt_vfsstat.f_mntonname, _len);
4152 break;
4153 } else {
4154 // no parent and no mount point? only thing is to punt and say "/" changed
4155 strlcpy(path, "/", _len);
4156 len = 2;
4157 mydvp = NULL;
4158 }
4159
4160 if (mydvp == NULL) {
4161 break;
4162 }
4163
4164 len = _len;
4165 ret = vn_getpath(mydvp, path, &len);
4166 } while (ret == ENOSPC);
4167 }
4168
4169 return len;
4170 }
4171
4172
4173 /*
4174 * Make a hard file link.
4175 *
4176 * Returns: 0 Success
4177 * EPERM
4178 * EEXIST
4179 * EXDEV
4180 * namei:???
4181 * vnode_authorize:???
4182 * VNOP_LINK:???
4183 */
4184 /* ARGSUSED */
4185 static int
4186 linkat_internal(vfs_context_t ctx, int fd1, user_addr_t path, int fd2,
4187 user_addr_t link, int flag, enum uio_seg segflg)
4188 {
4189 vnode_t vp, dvp, lvp;
4190 struct nameidata nd;
4191 int follow;
4192 int error;
4193 #if CONFIG_FSE
4194 fse_info finfo;
4195 #endif
4196 int need_event, has_listeners;
4197 char *target_path = NULL;
4198 int truncated=0;
4199
4200 vp = dvp = lvp = NULLVP;
4201
4202 /* look up the object we are linking to */
4203 follow = (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW;
4204 NDINIT(&nd, LOOKUP, OP_LOOKUP, AUDITVNPATH1 | follow,
4205 segflg, path, ctx);
4206
4207 error = nameiat(&nd, fd1);
4208 if (error)
4209 return (error);
4210 vp = nd.ni_vp;
4211
4212 nameidone(&nd);
4213
4214 /*
4215 * Normally, linking to directories is not supported.
4216 * However, some file systems may have limited support.
4217 */
4218 if (vp->v_type == VDIR) {
4219 if (!ISSET(vp->v_mount->mnt_kern_flag, MNTK_DIR_HARDLINKS)) {
4220 error = EPERM; /* POSIX */
4221 goto out;
4222 }
4223
4224 /* Linking to a directory requires ownership. */
4225 if (!kauth_cred_issuser(vfs_context_ucred(ctx))) {
4226 struct vnode_attr dva;
4227
4228 VATTR_INIT(&dva);
4229 VATTR_WANTED(&dva, va_uid);
4230 if (vnode_getattr(vp, &dva, ctx) != 0 ||
4231 !VATTR_IS_SUPPORTED(&dva, va_uid) ||
4232 (dva.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)))) {
4233 error = EACCES;
4234 goto out;
4235 }
4236 }
4237 }
4238
4239 /* lookup the target node */
4240 #if CONFIG_TRIGGERS
4241 nd.ni_op = OP_LINK;
4242 #endif
4243 nd.ni_cnd.cn_nameiop = CREATE;
4244 nd.ni_cnd.cn_flags = LOCKPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK;
4245 nd.ni_dirp = link;
4246 error = nameiat(&nd, fd2);
4247 if (error != 0)
4248 goto out;
4249 dvp = nd.ni_dvp;
4250 lvp = nd.ni_vp;
4251
4252 #if CONFIG_MACF
4253 if ((error = mac_vnode_check_link(ctx, dvp, vp, &nd.ni_cnd)) != 0)
4254 goto out2;
4255 #endif
4256
4257 /* or to anything that kauth doesn't want us to (eg. immutable items) */
4258 if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_LINKTARGET, ctx)) != 0)
4259 goto out2;
4260
4261 /* target node must not exist */
4262 if (lvp != NULLVP) {
4263 error = EEXIST;
4264 goto out2;
4265 }
4266 /* cannot link across mountpoints */
4267 if (vnode_mount(vp) != vnode_mount(dvp)) {
4268 error = EXDEV;
4269 goto out2;
4270 }
4271
4272 /* authorize creation of the target note */
4273 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
4274 goto out2;
4275
4276 /* and finally make the link */
4277 error = VNOP_LINK(vp, dvp, &nd.ni_cnd, ctx);
4278 if (error)
4279 goto out2;
4280
4281 #if CONFIG_MACF
4282 (void)mac_vnode_notify_link(ctx, vp, dvp, &nd.ni_cnd);
4283 #endif
4284
4285 #if CONFIG_FSE
4286 need_event = need_fsevent(FSE_CREATE_FILE, dvp);
4287 #else
4288 need_event = 0;
4289 #endif
4290 has_listeners = kauth_authorize_fileop_has_listeners();
4291
4292 if (need_event || has_listeners) {
4293 char *link_to_path = NULL;
4294 int len, link_name_len;
4295
4296 /* build the path to the new link file */
4297 GET_PATH(target_path);
4298 if (target_path == NULL) {
4299 error = ENOMEM;
4300 goto out2;
4301 }
4302
4303 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, target_path, MAXPATHLEN, &truncated);
4304
4305 if (has_listeners) {
4306 /* build the path to file we are linking to */
4307 GET_PATH(link_to_path);
4308 if (link_to_path == NULL) {
4309 error = ENOMEM;
4310 goto out2;
4311 }
4312
4313 link_name_len = MAXPATHLEN;
4314 if (vn_getpath(vp, link_to_path, &link_name_len) == 0) {
4315 /*
4316 * Call out to allow 3rd party notification of rename.
4317 * Ignore result of kauth_authorize_fileop call.
4318 */
4319 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_LINK,
4320 (uintptr_t)link_to_path,
4321 (uintptr_t)target_path);
4322 }
4323 if (link_to_path != NULL) {
4324 RELEASE_PATH(link_to_path);
4325 }
4326 }
4327 #if CONFIG_FSE
4328 if (need_event) {
4329 /* construct fsevent */
4330 if (get_fse_info(vp, &finfo, ctx) == 0) {
4331 if (truncated) {
4332 finfo.mode |= FSE_TRUNCATED_PATH;
4333 }
4334
4335 // build the path to the destination of the link
4336 add_fsevent(FSE_CREATE_FILE, ctx,
4337 FSE_ARG_STRING, len, target_path,
4338 FSE_ARG_FINFO, &finfo,
4339 FSE_ARG_DONE);
4340 }
4341 if (vp->v_parent) {
4342 add_fsevent(FSE_STAT_CHANGED, ctx,
4343 FSE_ARG_VNODE, vp->v_parent,
4344 FSE_ARG_DONE);
4345 }
4346 }
4347 #endif
4348 }
4349 out2:
4350 /*
4351 * nameidone has to happen before we vnode_put(dvp)
4352 * since it may need to release the fs_nodelock on the dvp
4353 */
4354 nameidone(&nd);
4355 if (target_path != NULL) {
4356 RELEASE_PATH(target_path);
4357 }
4358 out:
4359 if (lvp)
4360 vnode_put(lvp);
4361 if (dvp)
4362 vnode_put(dvp);
4363 vnode_put(vp);
4364 return (error);
4365 }
4366
4367 int
4368 link(__unused proc_t p, struct link_args *uap, __unused int32_t *retval)
4369 {
4370 return (linkat_internal(vfs_context_current(), AT_FDCWD, uap->path,
4371 AT_FDCWD, uap->link, AT_SYMLINK_FOLLOW, UIO_USERSPACE));
4372 }
4373
4374 int
4375 linkat(__unused proc_t p, struct linkat_args *uap, __unused int32_t *retval)
4376 {
4377 if (uap->flag & ~AT_SYMLINK_FOLLOW)
4378 return (EINVAL);
4379
4380 return (linkat_internal(vfs_context_current(), uap->fd1, uap->path,
4381 uap->fd2, uap->link, uap->flag, UIO_USERSPACE));
4382 }
4383
4384 /*
4385 * Make a symbolic link.
4386 *
4387 * We could add support for ACLs here too...
4388 */
4389 /* ARGSUSED */
4390 static int
4391 symlinkat_internal(vfs_context_t ctx, user_addr_t path_data, int fd,
4392 user_addr_t link, enum uio_seg segflg)
4393 {
4394 struct vnode_attr va;
4395 char *path;
4396 int error;
4397 struct nameidata nd;
4398 vnode_t vp, dvp;
4399 size_t dummy=0;
4400 proc_t p;
4401
4402 error = 0;
4403 if (UIO_SEG_IS_USER_SPACE(segflg)) {
4404 MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
4405 error = copyinstr(path_data, path, MAXPATHLEN, &dummy);
4406 } else {
4407 path = (char *)path_data;
4408 }
4409 if (error)
4410 goto out;
4411 AUDIT_ARG(text, path); /* This is the link string */
4412
4413 NDINIT(&nd, CREATE, OP_SYMLINK, LOCKPARENT | AUDITVNPATH1,
4414 segflg, link, ctx);
4415
4416 error = nameiat(&nd, fd);
4417 if (error)
4418 goto out;
4419 dvp = nd.ni_dvp;
4420 vp = nd.ni_vp;
4421
4422 p = vfs_context_proc(ctx);
4423 VATTR_INIT(&va);
4424 VATTR_SET(&va, va_type, VLNK);
4425 VATTR_SET(&va, va_mode, ACCESSPERMS & ~p->p_fd->fd_cmask);
4426
4427 #if CONFIG_MACF
4428 error = mac_vnode_check_create(ctx,
4429 dvp, &nd.ni_cnd, &va);
4430 #endif
4431 if (error != 0) {
4432 goto skipit;
4433 }
4434
4435 if (vp != NULL) {
4436 error = EEXIST;
4437 goto skipit;
4438 }
4439
4440 /* authorize */
4441 if (error == 0)
4442 error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
4443 /* get default ownership, etc. */
4444 if (error == 0)
4445 error = vnode_authattr_new(dvp, &va, 0, ctx);
4446 if (error == 0)
4447 error = VNOP_SYMLINK(dvp, &vp, &nd.ni_cnd, &va, path, ctx);
4448
4449 #if CONFIG_MACF
4450 if (error == 0 && vp)
4451 error = vnode_label(vnode_mount(vp), dvp, vp, &nd.ni_cnd, VNODE_LABEL_CREATE, ctx);
4452 #endif
4453
4454 /* do fallback attribute handling */
4455 if (error == 0 && vp)
4456 error = vnode_setattr_fallback(vp, &va, ctx);
4457
4458 if (error == 0) {
4459 int update_flags = 0;
4460
4461 /*check if a new vnode was created, else try to get one*/
4462 if (vp == NULL) {
4463 nd.ni_cnd.cn_nameiop = LOOKUP;
4464 #if CONFIG_TRIGGERS
4465 nd.ni_op = OP_LOOKUP;
4466 #endif
4467 nd.ni_cnd.cn_flags = 0;
4468 error = nameiat(&nd, fd);
4469 vp = nd.ni_vp;
4470
4471 if (vp == NULL)
4472 goto skipit;
4473 }
4474
4475 #if 0 /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
4476 /* call out to allow 3rd party notification of rename.
4477 * Ignore result of kauth_authorize_fileop call.
4478 */
4479 if (kauth_authorize_fileop_has_listeners() &&
4480 namei(&nd) == 0) {
4481 char *new_link_path = NULL;
4482 int len;
4483
4484 /* build the path to the new link file */
4485 new_link_path = get_pathbuff();
4486 len = MAXPATHLEN;
4487 vn_getpath(dvp, new_link_path, &len);
4488 if ((len + 1 + nd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) {
4489 new_link_path[len - 1] = '/';
4490 strlcpy(&new_link_path[len], nd.ni_cnd.cn_nameptr, MAXPATHLEN-len);
4491 }
4492
4493 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_SYMLINK,
4494 (uintptr_t)path, (uintptr_t)new_link_path);
4495 if (new_link_path != NULL)
4496 release_pathbuff(new_link_path);
4497 }
4498 #endif
4499 // Make sure the name & parent pointers are hooked up
4500 if (vp->v_name == NULL)
4501 update_flags |= VNODE_UPDATE_NAME;
4502 if (vp->v_parent == NULLVP)
4503 update_flags |= VNODE_UPDATE_PARENT;
4504
4505 if (update_flags)
4506 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
4507
4508 #if CONFIG_FSE
4509 add_fsevent(FSE_CREATE_FILE, ctx,
4510 FSE_ARG_VNODE, vp,
4511 FSE_ARG_DONE);
4512 #endif
4513 }
4514
4515 skipit:
4516 /*
4517 * nameidone has to happen before we vnode_put(dvp)
4518 * since it may need to release the fs_nodelock on the dvp
4519 */
4520 nameidone(&nd);
4521
4522 if (vp)
4523 vnode_put(vp);
4524 vnode_put(dvp);
4525 out:
4526 if (path && (path != (char *)path_data))
4527 FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
4528
4529 return (error);
4530 }
4531
4532 int
4533 symlink(__unused proc_t p, struct symlink_args *uap, __unused int32_t *retval)
4534 {
4535 return (symlinkat_internal(vfs_context_current(), uap->path, AT_FDCWD,
4536 uap->link, UIO_USERSPACE));
4537 }
4538
4539 int
4540 symlinkat(__unused proc_t p, struct symlinkat_args *uap,
4541 __unused int32_t *retval)
4542 {
4543 return (symlinkat_internal(vfs_context_current(), uap->path1, uap->fd,
4544 uap->path2, UIO_USERSPACE));
4545 }
4546
4547 /*
4548 * Delete a whiteout from the filesystem.
4549 * No longer supported.
4550 */
4551 int
4552 undelete(__unused proc_t p, __unused struct undelete_args *uap, __unused int32_t *retval)
4553 {
4554 return (ENOTSUP);
4555 }
4556
4557 /*
4558 * Delete a name from the filesystem.
4559 */
4560 /* ARGSUSED */
4561 static int
4562 unlinkat_internal(vfs_context_t ctx, int fd, vnode_t start_dvp,
4563 user_addr_t path_arg, enum uio_seg segflg, int unlink_flags)
4564 {
4565 struct nameidata nd;
4566 vnode_t vp, dvp;
4567 int error;
4568 struct componentname *cnp;
4569 char *path = NULL;
4570 int len=0;
4571 #if CONFIG_FSE
4572 fse_info finfo;
4573 struct vnode_attr va;
4574 #endif
4575 int flags;
4576 int need_event;
4577 int has_listeners;
4578 int truncated_path;
4579 int batched;
4580 struct vnode_attr *vap;
4581 int do_retry;
4582 int retry_count = 0;
4583 int cn_flags;
4584
4585 cn_flags = LOCKPARENT;
4586 if (!(unlink_flags & VNODE_REMOVE_NO_AUDIT_PATH))
4587 cn_flags |= AUDITVNPATH1;
4588 /* If a starting dvp is passed, it trumps any fd passed. */
4589 if (start_dvp)
4590 cn_flags |= USEDVP;
4591
4592 #if NAMEDRSRCFORK
4593 /* unlink or delete is allowed on rsrc forks and named streams */
4594 cn_flags |= CN_ALLOWRSRCFORK;
4595 #endif
4596
4597 retry:
4598 do_retry = 0;
4599 flags = 0;
4600 need_event = 0;
4601 has_listeners = 0;
4602 truncated_path = 0;
4603 vap = NULL;
4604
4605 NDINIT(&nd, DELETE, OP_UNLINK, cn_flags, segflg, path_arg, ctx);
4606
4607 nd.ni_dvp = start_dvp;
4608 nd.ni_flag |= NAMEI_COMPOUNDREMOVE;
4609 cnp = &nd.ni_cnd;
4610
4611 continue_lookup:
4612 error = nameiat(&nd, fd);
4613 if (error)
4614 return (error);
4615
4616 dvp = nd.ni_dvp;
4617 vp = nd.ni_vp;
4618
4619
4620 /* With Carbon delete semantics, busy files cannot be deleted */
4621 if (unlink_flags & VNODE_REMOVE_NODELETEBUSY) {
4622 flags |= VNODE_REMOVE_NODELETEBUSY;
4623 }
4624
4625 /* Skip any potential upcalls if told to. */
4626 if (unlink_flags & VNODE_REMOVE_SKIP_NAMESPACE_EVENT) {
4627 flags |= VNODE_REMOVE_SKIP_NAMESPACE_EVENT;
4628 }
4629
4630 if (vp) {
4631 batched = vnode_compound_remove_available(vp);
4632 /*
4633 * The root of a mounted filesystem cannot be deleted.
4634 */
4635 if (vp->v_flag & VROOT) {
4636 error = EBUSY;
4637 }
4638
4639 if (!batched) {
4640 error = vn_authorize_unlink(dvp, vp, cnp, ctx, NULL);
4641 if (error) {
4642 if (error == ENOENT) {
4643 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
4644 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
4645 do_retry = 1;
4646 retry_count++;
4647 }
4648 }
4649 goto out;
4650 }
4651 }
4652 } else {
4653 batched = 1;
4654
4655 if (!vnode_compound_remove_available(dvp)) {
4656 panic("No vp, but no compound remove?");
4657 }
4658 }
4659
4660 #if CONFIG_FSE
4661 need_event = need_fsevent(FSE_DELETE, dvp);
4662 if (need_event) {
4663 if (!batched) {
4664 if ((vp->v_flag & VISHARDLINK) == 0) {
4665 /* XXX need to get these data in batched VNOP */
4666 get_fse_info(vp, &finfo, ctx);
4667 }
4668 } else {
4669 error = vfs_get_notify_attributes(&va);
4670 if (error) {
4671 goto out;
4672 }
4673
4674 vap = &va;
4675 }
4676 }
4677 #endif
4678 has_listeners = kauth_authorize_fileop_has_listeners();
4679 if (need_event || has_listeners) {
4680 if (path == NULL) {
4681 GET_PATH(path);
4682 if (path == NULL) {
4683 error = ENOMEM;
4684 goto out;
4685 }
4686 }
4687 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated_path);
4688 }
4689
4690 #if NAMEDRSRCFORK
4691 if (nd.ni_cnd.cn_flags & CN_WANTSRSRCFORK)
4692 error = vnode_removenamedstream(dvp, vp, XATTR_RESOURCEFORK_NAME, 0, ctx);
4693 else
4694 #endif
4695 {
4696 error = vn_remove(dvp, &nd.ni_vp, &nd, flags, vap, ctx);
4697 vp = nd.ni_vp;
4698 if (error == EKEEPLOOKING) {
4699 if (!batched) {
4700 panic("EKEEPLOOKING, but not a filesystem that supports compound VNOPs?");
4701 }
4702
4703 if ((nd.ni_flag & NAMEI_CONTLOOKUP) == 0) {
4704 panic("EKEEPLOOKING, but continue flag not set?");
4705 }
4706
4707 if (vnode_isdir(vp)) {
4708 error = EISDIR;
4709 goto out;
4710 }
4711 goto continue_lookup;
4712 } else if (error == ENOENT && batched) {
4713 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
4714 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
4715 /*
4716 * For compound VNOPs, the authorization callback may
4717 * return ENOENT in case of racing hardlink lookups
4718 * hitting the name cache, redrive the lookup.
4719 */
4720 do_retry = 1;
4721 retry_count += 1;
4722 goto out;
4723 }
4724 }
4725 }
4726
4727 /*
4728 * Call out to allow 3rd party notification of delete.
4729 * Ignore result of kauth_authorize_fileop call.
4730 */
4731 if (!error) {
4732 if (has_listeners) {
4733 kauth_authorize_fileop(vfs_context_ucred(ctx),
4734 KAUTH_FILEOP_DELETE,
4735 (uintptr_t)vp,
4736 (uintptr_t)path);
4737 }
4738
4739 if (vp->v_flag & VISHARDLINK) {
4740 //
4741 // if a hardlink gets deleted we want to blow away the
4742 // v_parent link because the path that got us to this
4743 // instance of the link is no longer valid. this will
4744 // force the next call to get the path to ask the file
4745 // system instead of just following the v_parent link.
4746 //
4747 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
4748 }
4749
4750 #if CONFIG_FSE
4751 if (need_event) {
4752 if (vp->v_flag & VISHARDLINK) {
4753 get_fse_info(vp, &finfo, ctx);
4754 } else if (vap) {
4755 vnode_get_fse_info_from_vap(vp, &finfo, vap);
4756 }
4757 if (truncated_path) {
4758 finfo.mode |= FSE_TRUNCATED_PATH;
4759 }
4760 add_fsevent(FSE_DELETE, ctx,
4761 FSE_ARG_STRING, len, path,
4762 FSE_ARG_FINFO, &finfo,
4763 FSE_ARG_DONE);
4764 }
4765 #endif
4766 }
4767
4768 out:
4769 if (path != NULL)
4770 RELEASE_PATH(path);
4771
4772 #if NAMEDRSRCFORK
4773 /* recycle the deleted rsrc fork vnode to force a reclaim, which
4774 * will cause its shadow file to go away if necessary.
4775 */
4776 if (vp && (vnode_isnamedstream(vp)) &&
4777 (vp->v_parent != NULLVP) &&
4778 vnode_isshadow(vp)) {
4779 vnode_recycle(vp);
4780 }
4781 #endif
4782 /*
4783 * nameidone has to happen before we vnode_put(dvp)
4784 * since it may need to release the fs_nodelock on the dvp
4785 */
4786 nameidone(&nd);
4787 vnode_put(dvp);
4788 if (vp) {
4789 vnode_put(vp);
4790 }
4791
4792 if (do_retry) {
4793 goto retry;
4794 }
4795
4796 return (error);
4797 }
4798
4799 int
4800 unlink1(vfs_context_t ctx, vnode_t start_dvp, user_addr_t path_arg,
4801 enum uio_seg segflg, int unlink_flags)
4802 {
4803 return (unlinkat_internal(ctx, AT_FDCWD, start_dvp, path_arg, segflg,
4804 unlink_flags));
4805 }
4806
4807 /*
4808 * Delete a name from the filesystem using Carbon semantics.
4809 */
4810 int
4811 delete(__unused proc_t p, struct delete_args *uap, __unused int32_t *retval)
4812 {
4813 return (unlinkat_internal(vfs_context_current(), AT_FDCWD, NULLVP,
4814 uap->path, UIO_USERSPACE, VNODE_REMOVE_NODELETEBUSY));
4815 }
4816
4817 /*
4818 * Delete a name from the filesystem using POSIX semantics.
4819 */
4820 int
4821 unlink(__unused proc_t p, struct unlink_args *uap, __unused int32_t *retval)
4822 {
4823 return (unlinkat_internal(vfs_context_current(), AT_FDCWD, NULLVP,
4824 uap->path, UIO_USERSPACE, 0));
4825 }
4826
4827 int
4828 unlinkat(__unused proc_t p, struct unlinkat_args *uap, __unused int32_t *retval)
4829 {
4830 if (uap->flag & ~AT_REMOVEDIR)
4831 return (EINVAL);
4832
4833 if (uap->flag & AT_REMOVEDIR)
4834 return (rmdirat_internal(vfs_context_current(), uap->fd,
4835 uap->path, UIO_USERSPACE));
4836 else
4837 return (unlinkat_internal(vfs_context_current(), uap->fd,
4838 NULLVP, uap->path, UIO_USERSPACE, 0));
4839 }
4840
4841 /*
4842 * Reposition read/write file offset.
4843 */
4844 int
4845 lseek(proc_t p, struct lseek_args *uap, off_t *retval)
4846 {
4847 struct fileproc *fp;
4848 vnode_t vp;
4849 struct vfs_context *ctx;
4850 off_t offset = uap->offset, file_size;
4851 int error;
4852
4853 if ( (error = fp_getfvp(p,uap->fd, &fp, &vp)) ) {
4854 if (error == ENOTSUP)
4855 return (ESPIPE);
4856 return (error);
4857 }
4858 if (vnode_isfifo(vp)) {
4859 file_drop(uap->fd);
4860 return(ESPIPE);
4861 }
4862
4863
4864 ctx = vfs_context_current();
4865 #if CONFIG_MACF
4866 if (uap->whence == L_INCR && uap->offset == 0)
4867 error = mac_file_check_get_offset(vfs_context_ucred(ctx),
4868 fp->f_fglob);
4869 else
4870 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
4871 fp->f_fglob);
4872 if (error) {
4873 file_drop(uap->fd);
4874 return (error);
4875 }
4876 #endif
4877 if ( (error = vnode_getwithref(vp)) ) {
4878 file_drop(uap->fd);
4879 return(error);
4880 }
4881
4882 switch (uap->whence) {
4883 case L_INCR:
4884 offset += fp->f_fglob->fg_offset;
4885 break;
4886 case L_XTND:
4887 if ((error = vnode_size(vp, &file_size, ctx)) != 0)
4888 break;
4889 offset += file_size;
4890 break;
4891 case L_SET:
4892 break;
4893 case SEEK_HOLE:
4894 error = VNOP_IOCTL(vp, FSCTL_FIOSEEKHOLE, (caddr_t)&offset, 0, ctx);
4895 break;
4896 case SEEK_DATA:
4897 error = VNOP_IOCTL(vp, FSCTL_FIOSEEKDATA, (caddr_t)&offset, 0, ctx);
4898 break;
4899 default:
4900 error = EINVAL;
4901 }
4902 if (error == 0) {
4903 if (uap->offset > 0 && offset < 0) {
4904 /* Incremented/relative move past max size */
4905 error = EOVERFLOW;
4906 } else {
4907 /*
4908 * Allow negative offsets on character devices, per
4909 * POSIX 1003.1-2001. Most likely for writing disk
4910 * labels.
4911 */
4912 if (offset < 0 && vp->v_type != VCHR) {
4913 /* Decremented/relative move before start */
4914 error = EINVAL;
4915 } else {
4916 /* Success */
4917 fp->f_fglob->fg_offset = offset;
4918 *retval = fp->f_fglob->fg_offset;
4919 }
4920 }
4921 }
4922
4923 /*
4924 * An lseek can affect whether data is "available to read." Use
4925 * hint of NOTE_NONE so no EVFILT_VNODE events fire
4926 */
4927 post_event_if_success(vp, error, NOTE_NONE);
4928 (void)vnode_put(vp);
4929 file_drop(uap->fd);
4930 return (error);
4931 }
4932
4933
4934 /*
4935 * Check access permissions.
4936 *
4937 * Returns: 0 Success
4938 * vnode_authorize:???
4939 */
4940 static int
4941 access1(vnode_t vp, vnode_t dvp, int uflags, vfs_context_t ctx)
4942 {
4943 kauth_action_t action;
4944 int error;
4945
4946 /*
4947 * If just the regular access bits, convert them to something
4948 * that vnode_authorize will understand.
4949 */
4950 if (!(uflags & _ACCESS_EXTENDED_MASK)) {
4951 action = 0;
4952 if (uflags & R_OK)
4953 action |= KAUTH_VNODE_READ_DATA; /* aka KAUTH_VNODE_LIST_DIRECTORY */
4954 if (uflags & W_OK) {
4955 if (vnode_isdir(vp)) {
4956 action |= KAUTH_VNODE_ADD_FILE |
4957 KAUTH_VNODE_ADD_SUBDIRECTORY;
4958 /* might want delete rights here too */
4959 } else {
4960 action |= KAUTH_VNODE_WRITE_DATA;
4961 }
4962 }
4963 if (uflags & X_OK) {
4964 if (vnode_isdir(vp)) {
4965 action |= KAUTH_VNODE_SEARCH;
4966 } else {
4967 action |= KAUTH_VNODE_EXECUTE;
4968 }
4969 }
4970 } else {
4971 /* take advantage of definition of uflags */
4972 action = uflags >> 8;
4973 }
4974
4975 #if CONFIG_MACF
4976 error = mac_vnode_check_access(ctx, vp, uflags);
4977 if (error)
4978 return (error);
4979 #endif /* MAC */
4980
4981 /* action == 0 means only check for existence */
4982 if (action != 0) {
4983 error = vnode_authorize(vp, dvp, action | KAUTH_VNODE_ACCESS, ctx);
4984 } else {
4985 error = 0;
4986 }
4987
4988 return(error);
4989 }
4990
4991
4992
4993 /*
4994 * access_extended: Check access permissions in bulk.
4995 *
4996 * Description: uap->entries Pointer to an array of accessx
4997 * descriptor structs, plus one or
4998 * more NULL terminated strings (see
4999 * "Notes" section below).
5000 * uap->size Size of the area pointed to by
5001 * uap->entries.
5002 * uap->results Pointer to the results array.
5003 *
5004 * Returns: 0 Success
5005 * ENOMEM Insufficient memory
5006 * EINVAL Invalid arguments
5007 * namei:EFAULT Bad address
5008 * namei:ENAMETOOLONG Filename too long
5009 * namei:ENOENT No such file or directory
5010 * namei:ELOOP Too many levels of symbolic links
5011 * namei:EBADF Bad file descriptor
5012 * namei:ENOTDIR Not a directory
5013 * namei:???
5014 * access1:
5015 *
5016 * Implicit returns:
5017 * uap->results Array contents modified
5018 *
5019 * Notes: The uap->entries are structured as an arbitrary length array
5020 * of accessx descriptors, followed by one or more NULL terminated
5021 * strings
5022 *
5023 * struct accessx_descriptor[0]
5024 * ...
5025 * struct accessx_descriptor[n]
5026 * char name_data[0];
5027 *
5028 * We determine the entry count by walking the buffer containing
5029 * the uap->entries argument descriptor. For each descriptor we
5030 * see, the valid values for the offset ad_name_offset will be
5031 * in the byte range:
5032 *
5033 * [ uap->entries + sizeof(struct accessx_descriptor) ]
5034 * to
5035 * [ uap->entries + uap->size - 2 ]
5036 *
5037 * since we must have at least one string, and the string must
5038 * be at least one character plus the NULL terminator in length.
5039 *
5040 * XXX: Need to support the check-as uid argument
5041 */
5042 int
5043 access_extended(__unused proc_t p, struct access_extended_args *uap, __unused int32_t *retval)
5044 {
5045 struct accessx_descriptor *input = NULL;
5046 errno_t *result = NULL;
5047 errno_t error = 0;
5048 int wantdelete = 0;
5049 unsigned int desc_max, desc_actual, i, j;
5050 struct vfs_context context;
5051 struct nameidata nd;
5052 int niopts;
5053 vnode_t vp = NULL;
5054 vnode_t dvp = NULL;
5055 #define ACCESSX_MAX_DESCR_ON_STACK 10
5056 struct accessx_descriptor stack_input[ACCESSX_MAX_DESCR_ON_STACK];
5057
5058 context.vc_ucred = NULL;
5059
5060 /*
5061 * Validate parameters; if valid, copy the descriptor array and string
5062 * arguments into local memory. Before proceeding, the following
5063 * conditions must have been met:
5064 *
5065 * o The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
5066 * o There must be sufficient room in the request for at least one
5067 * descriptor and a one yte NUL terminated string.
5068 * o The allocation of local storage must not fail.
5069 */
5070 if (uap->size > ACCESSX_MAX_TABLESIZE)
5071 return(ENOMEM);
5072 if (uap->size < (sizeof(struct accessx_descriptor) + 2))
5073 return(EINVAL);
5074 if (uap->size <= sizeof (stack_input)) {
5075 input = stack_input;
5076 } else {
5077 MALLOC(input, struct accessx_descriptor *, uap->size, M_TEMP, M_WAITOK);
5078 if (input == NULL) {
5079 error = ENOMEM;
5080 goto out;
5081 }
5082 }
5083 error = copyin(uap->entries, input, uap->size);
5084 if (error)
5085 goto out;
5086
5087 AUDIT_ARG(opaque, input, uap->size);
5088
5089 /*
5090 * Force NUL termination of the copyin buffer to avoid nami() running
5091 * off the end. If the caller passes us bogus data, they may get a
5092 * bogus result.
5093 */
5094 ((char *)input)[uap->size - 1] = 0;
5095
5096 /*
5097 * Access is defined as checking against the process' real identity,
5098 * even if operations are checking the effective identity. This
5099 * requires that we use a local vfs context.
5100 */
5101 context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
5102 context.vc_thread = current_thread();
5103
5104 /*
5105 * Find out how many entries we have, so we can allocate the result
5106 * array by walking the list and adjusting the count downward by the
5107 * earliest string offset we see.
5108 */
5109 desc_max = (uap->size - 2) / sizeof(struct accessx_descriptor);
5110 desc_actual = desc_max;
5111 for (i = 0; i < desc_actual; i++) {
5112 /*
5113 * Take the offset to the name string for this entry and
5114 * convert to an input array index, which would be one off
5115 * the end of the array if this entry was the lowest-addressed
5116 * name string.
5117 */
5118 j = input[i].ad_name_offset / sizeof(struct accessx_descriptor);
5119
5120 /*
5121 * An offset greater than the max allowable offset is an error.
5122 * It is also an error for any valid entry to point
5123 * to a location prior to the end of the current entry, if
5124 * it's not a reference to the string of the previous entry.
5125 */
5126 if (j > desc_max || (j != 0 && j <= i)) {
5127 error = EINVAL;
5128 goto out;
5129 }
5130
5131 /* Also do not let ad_name_offset point to something beyond the size of the input */
5132 if (input[i].ad_name_offset >= uap->size) {
5133 error = EINVAL;
5134 goto out;
5135 }
5136
5137 /*
5138 * An offset of 0 means use the previous descriptor's offset;
5139 * this is used to chain multiple requests for the same file
5140 * to avoid multiple lookups.
5141 */
5142 if (j == 0) {
5143 /* This is not valid for the first entry */
5144 if (i == 0) {
5145 error = EINVAL;
5146 goto out;
5147 }
5148 continue;
5149 }
5150
5151 /*
5152 * If the offset of the string for this descriptor is before
5153 * what we believe is the current actual last descriptor,
5154 * then we need to adjust our estimate downward; this permits
5155 * the string table following the last descriptor to be out
5156 * of order relative to the descriptor list.
5157 */
5158 if (j < desc_actual)
5159 desc_actual = j;
5160 }
5161
5162 /*
5163 * We limit the actual number of descriptors we are willing to process
5164 * to a hard maximum of ACCESSX_MAX_DESCRIPTORS. If the number being
5165 * requested does not exceed this limit,
5166 */
5167 if (desc_actual > ACCESSX_MAX_DESCRIPTORS) {
5168 error = ENOMEM;
5169 goto out;
5170 }
5171 MALLOC(result, errno_t *, desc_actual * sizeof(errno_t), M_TEMP, M_WAITOK);
5172 if (result == NULL) {
5173 error = ENOMEM;
5174 goto out;
5175 }
5176
5177 /*
5178 * Do the work by iterating over the descriptor entries we know to
5179 * at least appear to contain valid data.
5180 */
5181 error = 0;
5182 for (i = 0; i < desc_actual; i++) {
5183 /*
5184 * If the ad_name_offset is 0, then we use the previous
5185 * results to make the check; otherwise, we are looking up
5186 * a new file name.
5187 */
5188 if (input[i].ad_name_offset != 0) {
5189 /* discard old vnodes */
5190 if (vp) {
5191 vnode_put(vp);
5192 vp = NULL;
5193 }
5194 if (dvp) {
5195 vnode_put(dvp);
5196 dvp = NULL;
5197 }
5198
5199 /*
5200 * Scan forward in the descriptor list to see if we
5201 * need the parent vnode. We will need it if we are
5202 * deleting, since we must have rights to remove
5203 * entries in the parent directory, as well as the
5204 * rights to delete the object itself.
5205 */
5206 wantdelete = input[i].ad_flags & _DELETE_OK;
5207 for (j = i + 1; (j < desc_actual) && (input[j].ad_name_offset == 0); j++)
5208 if (input[j].ad_flags & _DELETE_OK)
5209 wantdelete = 1;
5210
5211 niopts = FOLLOW | AUDITVNPATH1;
5212
5213 /* need parent for vnode_authorize for deletion test */
5214 if (wantdelete)
5215 niopts |= WANTPARENT;
5216
5217 /* do the lookup */
5218 NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, UIO_SYSSPACE,
5219 CAST_USER_ADDR_T(((const char *)input) + input[i].ad_name_offset),
5220 &context);
5221 error = namei(&nd);
5222 if (!error) {
5223 vp = nd.ni_vp;
5224 if (wantdelete)
5225 dvp = nd.ni_dvp;
5226 }
5227 nameidone(&nd);
5228 }
5229
5230 /*
5231 * Handle lookup errors.
5232 */
5233 switch(error) {
5234 case ENOENT:
5235 case EACCES:
5236 case EPERM:
5237 case ENOTDIR:
5238 result[i] = error;
5239 break;
5240 case 0:
5241 /* run this access check */
5242 result[i] = access1(vp, dvp, input[i].ad_flags, &context);
5243 break;
5244 default:
5245 /* fatal lookup error */
5246
5247 goto out;
5248 }
5249 }
5250
5251 AUDIT_ARG(data, result, sizeof(errno_t), desc_actual);
5252
5253 /* copy out results */
5254 error = copyout(result, uap->results, desc_actual * sizeof(errno_t));
5255
5256 out:
5257 if (input && input != stack_input)
5258 FREE(input, M_TEMP);
5259 if (result)
5260 FREE(result, M_TEMP);
5261 if (vp)
5262 vnode_put(vp);
5263 if (dvp)
5264 vnode_put(dvp);
5265 if (IS_VALID_CRED(context.vc_ucred))
5266 kauth_cred_unref(&context.vc_ucred);
5267 return(error);
5268 }
5269
5270
5271 /*
5272 * Returns: 0 Success
5273 * namei:EFAULT Bad address
5274 * namei:ENAMETOOLONG Filename too long
5275 * namei:ENOENT No such file or directory
5276 * namei:ELOOP Too many levels of symbolic links
5277 * namei:EBADF Bad file descriptor
5278 * namei:ENOTDIR Not a directory
5279 * namei:???
5280 * access1:
5281 */
5282 static int
5283 faccessat_internal(vfs_context_t ctx, int fd, user_addr_t path, int amode,
5284 int flag, enum uio_seg segflg)
5285 {
5286 int error;
5287 struct nameidata nd;
5288 int niopts;
5289 struct vfs_context context;
5290 #if NAMEDRSRCFORK
5291 int is_namedstream = 0;
5292 #endif
5293
5294 /*
5295 * Unless the AT_EACCESS option is used, Access is defined as checking
5296 * against the process' real identity, even if operations are checking
5297 * the effective identity. So we need to tweak the credential
5298 * in the context for that case.
5299 */
5300 if (!(flag & AT_EACCESS))
5301 context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
5302 else
5303 context.vc_ucred = ctx->vc_ucred;
5304 context.vc_thread = ctx->vc_thread;
5305
5306
5307 niopts = FOLLOW | AUDITVNPATH1;
5308 /* need parent for vnode_authorize for deletion test */
5309 if (amode & _DELETE_OK)
5310 niopts |= WANTPARENT;
5311 NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, segflg,
5312 path, &context);
5313
5314 #if NAMEDRSRCFORK
5315 /* access(F_OK) calls are allowed for resource forks. */
5316 if (amode == F_OK)
5317 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
5318 #endif
5319 error = nameiat(&nd, fd);
5320 if (error)
5321 goto out;
5322
5323 #if NAMEDRSRCFORK
5324 /* Grab reference on the shadow stream file vnode to
5325 * force an inactive on release which will mark it
5326 * for recycle.
5327 */
5328 if (vnode_isnamedstream(nd.ni_vp) &&
5329 (nd.ni_vp->v_parent != NULLVP) &&
5330 vnode_isshadow(nd.ni_vp)) {
5331 is_namedstream = 1;
5332 vnode_ref(nd.ni_vp);
5333 }
5334 #endif
5335
5336 error = access1(nd.ni_vp, nd.ni_dvp, amode, &context);
5337
5338 #if NAMEDRSRCFORK
5339 if (is_namedstream) {
5340 vnode_rele(nd.ni_vp);
5341 }
5342 #endif
5343
5344 vnode_put(nd.ni_vp);
5345 if (amode & _DELETE_OK)
5346 vnode_put(nd.ni_dvp);
5347 nameidone(&nd);
5348
5349 out:
5350 if (!(flag & AT_EACCESS))
5351 kauth_cred_unref(&context.vc_ucred);
5352 return (error);
5353 }
5354
5355 int
5356 access(__unused proc_t p, struct access_args *uap, __unused int32_t *retval)
5357 {
5358 return (faccessat_internal(vfs_context_current(), AT_FDCWD,
5359 uap->path, uap->flags, 0, UIO_USERSPACE));
5360 }
5361
5362 int
5363 faccessat(__unused proc_t p, struct faccessat_args *uap,
5364 __unused int32_t *retval)
5365 {
5366 if (uap->flag & ~AT_EACCESS)
5367 return (EINVAL);
5368
5369 return (faccessat_internal(vfs_context_current(), uap->fd,
5370 uap->path, uap->amode, uap->flag, UIO_USERSPACE));
5371 }
5372
5373 /*
5374 * Returns: 0 Success
5375 * EFAULT
5376 * copyout:EFAULT
5377 * namei:???
5378 * vn_stat:???
5379 */
5380 static int
5381 fstatat_internal(vfs_context_t ctx, user_addr_t path, user_addr_t ub,
5382 user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64,
5383 enum uio_seg segflg, int fd, int flag)
5384 {
5385 struct nameidata nd;
5386 int follow;
5387 union {
5388 struct stat sb;
5389 struct stat64 sb64;
5390 } source;
5391 union {
5392 struct user64_stat user64_sb;
5393 struct user32_stat user32_sb;
5394 struct user64_stat64 user64_sb64;
5395 struct user32_stat64 user32_sb64;
5396 } dest;
5397 caddr_t sbp;
5398 int error, my_size;
5399 kauth_filesec_t fsec;
5400 size_t xsecurity_bufsize;
5401 void * statptr;
5402
5403 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
5404 NDINIT(&nd, LOOKUP, OP_GETATTR, follow | AUDITVNPATH1,
5405 segflg, path, ctx);
5406
5407 #if NAMEDRSRCFORK
5408 int is_namedstream = 0;
5409 /* stat calls are allowed for resource forks. */
5410 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
5411 #endif
5412 error = nameiat(&nd, fd);
5413 if (error)
5414 return (error);
5415 fsec = KAUTH_FILESEC_NONE;
5416
5417 statptr = (void *)&source;
5418
5419 #if NAMEDRSRCFORK
5420 /* Grab reference on the shadow stream file vnode to
5421 * force an inactive on release which will mark it
5422 * for recycle.
5423 */
5424 if (vnode_isnamedstream(nd.ni_vp) &&
5425 (nd.ni_vp->v_parent != NULLVP) &&
5426 vnode_isshadow(nd.ni_vp)) {
5427 is_namedstream = 1;
5428 vnode_ref(nd.ni_vp);
5429 }
5430 #endif
5431
5432 error = vn_stat(nd.ni_vp, statptr, (xsecurity != USER_ADDR_NULL ? &fsec : NULL), isstat64, ctx);
5433
5434 #if NAMEDRSRCFORK
5435 if (is_namedstream) {
5436 vnode_rele(nd.ni_vp);
5437 }
5438 #endif
5439 vnode_put(nd.ni_vp);
5440 nameidone(&nd);
5441
5442 if (error)
5443 return (error);
5444 /* Zap spare fields */
5445 if (isstat64 != 0) {
5446 source.sb64.st_lspare = 0;
5447 source.sb64.st_qspare[0] = 0LL;
5448 source.sb64.st_qspare[1] = 0LL;
5449 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
5450 munge_user64_stat64(&source.sb64, &dest.user64_sb64);
5451 my_size = sizeof(dest.user64_sb64);
5452 sbp = (caddr_t)&dest.user64_sb64;
5453 } else {
5454 munge_user32_stat64(&source.sb64, &dest.user32_sb64);
5455 my_size = sizeof(dest.user32_sb64);
5456 sbp = (caddr_t)&dest.user32_sb64;
5457 }
5458 /*
5459 * Check if we raced (post lookup) against the last unlink of a file.
5460 */
5461 if ((source.sb64.st_nlink == 0) && S_ISREG(source.sb64.st_mode)) {
5462 source.sb64.st_nlink = 1;
5463 }
5464 } else {
5465 source.sb.st_lspare = 0;
5466 source.sb.st_qspare[0] = 0LL;
5467 source.sb.st_qspare[1] = 0LL;
5468 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
5469 munge_user64_stat(&source.sb, &dest.user64_sb);
5470 my_size = sizeof(dest.user64_sb);
5471 sbp = (caddr_t)&dest.user64_sb;
5472 } else {
5473 munge_user32_stat(&source.sb, &dest.user32_sb);
5474 my_size = sizeof(dest.user32_sb);
5475 sbp = (caddr_t)&dest.user32_sb;
5476 }
5477
5478 /*
5479 * Check if we raced (post lookup) against the last unlink of a file.
5480 */
5481 if ((source.sb.st_nlink == 0) && S_ISREG(source.sb.st_mode)) {
5482 source.sb.st_nlink = 1;
5483 }
5484 }
5485 if ((error = copyout(sbp, ub, my_size)) != 0)
5486 goto out;
5487
5488 /* caller wants extended security information? */
5489 if (xsecurity != USER_ADDR_NULL) {
5490
5491 /* did we get any? */
5492 if (fsec == KAUTH_FILESEC_NONE) {
5493 if (susize(xsecurity_size, 0) != 0) {
5494 error = EFAULT;
5495 goto out;
5496 }
5497 } else {
5498 /* find the user buffer size */
5499 xsecurity_bufsize = fusize(xsecurity_size);
5500
5501 /* copy out the actual data size */
5502 if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
5503 error = EFAULT;
5504 goto out;
5505 }
5506
5507 /* if the caller supplied enough room, copy out to it */
5508 if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec))
5509 error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
5510 }
5511 }
5512 out:
5513 if (fsec != KAUTH_FILESEC_NONE)
5514 kauth_filesec_free(fsec);
5515 return (error);
5516 }
5517
5518 /*
5519 * stat_extended: Get file status; with extended security (ACL).
5520 *
5521 * Parameters: p (ignored)
5522 * uap User argument descriptor (see below)
5523 * retval (ignored)
5524 *
5525 * Indirect: uap->path Path of file to get status from
5526 * uap->ub User buffer (holds file status info)
5527 * uap->xsecurity ACL to get (extended security)
5528 * uap->xsecurity_size Size of ACL
5529 *
5530 * Returns: 0 Success
5531 * !0 errno value
5532 *
5533 */
5534 int
5535 stat_extended(__unused proc_t p, struct stat_extended_args *uap,
5536 __unused int32_t *retval)
5537 {
5538 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5539 uap->xsecurity, uap->xsecurity_size, 0, UIO_USERSPACE, AT_FDCWD,
5540 0));
5541 }
5542
5543 /*
5544 * Returns: 0 Success
5545 * fstatat_internal:??? [see fstatat_internal() in this file]
5546 */
5547 int
5548 stat(__unused proc_t p, struct stat_args *uap, __unused int32_t *retval)
5549 {
5550 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5551 0, 0, 0, UIO_USERSPACE, AT_FDCWD, 0));
5552 }
5553
5554 int
5555 stat64(__unused proc_t p, struct stat64_args *uap, __unused int32_t *retval)
5556 {
5557 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5558 0, 0, 1, UIO_USERSPACE, AT_FDCWD, 0));
5559 }
5560
5561 /*
5562 * stat64_extended: Get file status; can handle large inode numbers; with extended security (ACL).
5563 *
5564 * Parameters: p (ignored)
5565 * uap User argument descriptor (see below)
5566 * retval (ignored)
5567 *
5568 * Indirect: uap->path Path of file to get status from
5569 * uap->ub User buffer (holds file status info)
5570 * uap->xsecurity ACL to get (extended security)
5571 * uap->xsecurity_size Size of ACL
5572 *
5573 * Returns: 0 Success
5574 * !0 errno value
5575 *
5576 */
5577 int
5578 stat64_extended(__unused proc_t p, struct stat64_extended_args *uap, __unused int32_t *retval)
5579 {
5580 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5581 uap->xsecurity, uap->xsecurity_size, 1, UIO_USERSPACE, AT_FDCWD,
5582 0));
5583 }
5584
5585 /*
5586 * lstat_extended: Get file status; does not follow links; with extended security (ACL).
5587 *
5588 * Parameters: p (ignored)
5589 * uap User argument descriptor (see below)
5590 * retval (ignored)
5591 *
5592 * Indirect: uap->path Path of file to get status from
5593 * uap->ub User buffer (holds file status info)
5594 * uap->xsecurity ACL to get (extended security)
5595 * uap->xsecurity_size Size of ACL
5596 *
5597 * Returns: 0 Success
5598 * !0 errno value
5599 *
5600 */
5601 int
5602 lstat_extended(__unused proc_t p, struct lstat_extended_args *uap, __unused int32_t *retval)
5603 {
5604 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5605 uap->xsecurity, uap->xsecurity_size, 0, UIO_USERSPACE, AT_FDCWD,
5606 AT_SYMLINK_NOFOLLOW));
5607 }
5608
5609 /*
5610 * Get file status; this version does not follow links.
5611 */
5612 int
5613 lstat(__unused proc_t p, struct lstat_args *uap, __unused int32_t *retval)
5614 {
5615 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5616 0, 0, 0, UIO_USERSPACE, AT_FDCWD, AT_SYMLINK_NOFOLLOW));
5617 }
5618
5619 int
5620 lstat64(__unused proc_t p, struct lstat64_args *uap, __unused int32_t *retval)
5621 {
5622 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5623 0, 0, 1, UIO_USERSPACE, AT_FDCWD, AT_SYMLINK_NOFOLLOW));
5624 }
5625
5626 /*
5627 * lstat64_extended: Get file status; can handle large inode numbers; does not
5628 * follow links; with extended security (ACL).
5629 *
5630 * Parameters: p (ignored)
5631 * uap User argument descriptor (see below)
5632 * retval (ignored)
5633 *
5634 * Indirect: uap->path Path of file to get status from
5635 * uap->ub User buffer (holds file status info)
5636 * uap->xsecurity ACL to get (extended security)
5637 * uap->xsecurity_size Size of ACL
5638 *
5639 * Returns: 0 Success
5640 * !0 errno value
5641 *
5642 */
5643 int
5644 lstat64_extended(__unused proc_t p, struct lstat64_extended_args *uap, __unused int32_t *retval)
5645 {
5646 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5647 uap->xsecurity, uap->xsecurity_size, 1, UIO_USERSPACE, AT_FDCWD,
5648 AT_SYMLINK_NOFOLLOW));
5649 }
5650
5651 int
5652 fstatat(__unused proc_t p, struct fstatat_args *uap, __unused int32_t *retval)
5653 {
5654 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
5655 return (EINVAL);
5656
5657 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5658 0, 0, 0, UIO_USERSPACE, uap->fd, uap->flag));
5659 }
5660
5661 int
5662 fstatat64(__unused proc_t p, struct fstatat64_args *uap,
5663 __unused int32_t *retval)
5664 {
5665 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
5666 return (EINVAL);
5667
5668 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5669 0, 0, 1, UIO_USERSPACE, uap->fd, uap->flag));
5670 }
5671
5672 /*
5673 * Get configurable pathname variables.
5674 *
5675 * Returns: 0 Success
5676 * namei:???
5677 * vn_pathconf:???
5678 *
5679 * Notes: Global implementation constants are intended to be
5680 * implemented in this function directly; all other constants
5681 * are per-FS implementation, and therefore must be handled in
5682 * each respective FS, instead.
5683 *
5684 * XXX We implement some things globally right now that should actually be
5685 * XXX per-FS; we will need to deal with this at some point.
5686 */
5687 /* ARGSUSED */
5688 int
5689 pathconf(__unused proc_t p, struct pathconf_args *uap, int32_t *retval)
5690 {
5691 int error;
5692 struct nameidata nd;
5693 vfs_context_t ctx = vfs_context_current();
5694
5695 NDINIT(&nd, LOOKUP, OP_PATHCONF, FOLLOW | AUDITVNPATH1,
5696 UIO_USERSPACE, uap->path, ctx);
5697 error = namei(&nd);
5698 if (error)
5699 return (error);
5700
5701 error = vn_pathconf(nd.ni_vp, uap->name, retval, ctx);
5702
5703 vnode_put(nd.ni_vp);
5704 nameidone(&nd);
5705 return (error);
5706 }
5707
5708 /*
5709 * Return target name of a symbolic link.
5710 */
5711 /* ARGSUSED */
5712 static int
5713 readlinkat_internal(vfs_context_t ctx, int fd, user_addr_t path,
5714 enum uio_seg seg, user_addr_t buf, size_t bufsize, enum uio_seg bufseg,
5715 int *retval)
5716 {
5717 vnode_t vp;
5718 uio_t auio;
5719 int error;
5720 struct nameidata nd;
5721 char uio_buf[ UIO_SIZEOF(1) ];
5722
5723 NDINIT(&nd, LOOKUP, OP_READLINK, NOFOLLOW | AUDITVNPATH1,
5724 seg, path, ctx);
5725
5726 error = nameiat(&nd, fd);
5727 if (error)
5728 return (error);
5729 vp = nd.ni_vp;
5730
5731 nameidone(&nd);
5732
5733 auio = uio_createwithbuffer(1, 0, bufseg, UIO_READ,
5734 &uio_buf[0], sizeof(uio_buf));
5735 uio_addiov(auio, buf, bufsize);
5736 if (vp->v_type != VLNK) {
5737 error = EINVAL;
5738 } else {
5739 #if CONFIG_MACF
5740 error = mac_vnode_check_readlink(ctx, vp);
5741 #endif
5742 if (error == 0)
5743 error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA,
5744 ctx);
5745 if (error == 0)
5746 error = VNOP_READLINK(vp, auio, ctx);
5747 }
5748 vnode_put(vp);
5749
5750 *retval = bufsize - (int)uio_resid(auio);
5751 return (error);
5752 }
5753
5754 int
5755 readlink(proc_t p, struct readlink_args *uap, int32_t *retval)
5756 {
5757 enum uio_seg procseg;
5758
5759 procseg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
5760 return (readlinkat_internal(vfs_context_current(), AT_FDCWD,
5761 CAST_USER_ADDR_T(uap->path), procseg, CAST_USER_ADDR_T(uap->buf),
5762 uap->count, procseg, retval));
5763 }
5764
5765 int
5766 readlinkat(proc_t p, struct readlinkat_args *uap, int32_t *retval)
5767 {
5768 enum uio_seg procseg;
5769
5770 procseg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
5771 return (readlinkat_internal(vfs_context_current(), uap->fd, uap->path,
5772 procseg, uap->buf, uap->bufsize, procseg, retval));
5773 }
5774
5775 /*
5776 * Change file flags.
5777 *
5778 * NOTE: this will vnode_put() `vp'
5779 */
5780 static int
5781 chflags1(vnode_t vp, int flags, vfs_context_t ctx)
5782 {
5783 struct vnode_attr va;
5784 kauth_action_t action;
5785 int error;
5786
5787 VATTR_INIT(&va);
5788 VATTR_SET(&va, va_flags, flags);
5789
5790 #if CONFIG_MACF
5791 error = mac_vnode_check_setflags(ctx, vp, flags);
5792 if (error)
5793 goto out;
5794 #endif
5795
5796 /* request authorisation, disregard immutability */
5797 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
5798 goto out;
5799 /*
5800 * Request that the auth layer disregard those file flags it's allowed to when
5801 * authorizing this operation; we need to do this in order to be able to
5802 * clear immutable flags.
5803 */
5804 if (action && ((error = vnode_authorize(vp, NULL, action | KAUTH_VNODE_NOIMMUTABLE, ctx)) != 0))
5805 goto out;
5806 error = vnode_setattr(vp, &va, ctx);
5807
5808 #if CONFIG_MACF
5809 if (error == 0)
5810 mac_vnode_notify_setflags(ctx, vp, flags);
5811 #endif
5812
5813 if ((error == 0) && !VATTR_IS_SUPPORTED(&va, va_flags)) {
5814 error = ENOTSUP;
5815 }
5816 out:
5817 vnode_put(vp);
5818 return(error);
5819 }
5820
5821 /*
5822 * Change flags of a file given a path name.
5823 */
5824 /* ARGSUSED */
5825 int
5826 chflags(__unused proc_t p, struct chflags_args *uap, __unused int32_t *retval)
5827 {
5828 vnode_t vp;
5829 vfs_context_t ctx = vfs_context_current();
5830 int error;
5831 struct nameidata nd;
5832
5833 AUDIT_ARG(fflags, uap->flags);
5834 NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
5835 UIO_USERSPACE, uap->path, ctx);
5836 error = namei(&nd);
5837 if (error)
5838 return (error);
5839 vp = nd.ni_vp;
5840 nameidone(&nd);
5841
5842 /* we don't vnode_put() here because chflags1 does internally */
5843 error = chflags1(vp, uap->flags, ctx);
5844
5845 return(error);
5846 }
5847
5848 /*
5849 * Change flags of a file given a file descriptor.
5850 */
5851 /* ARGSUSED */
5852 int
5853 fchflags(__unused proc_t p, struct fchflags_args *uap, __unused int32_t *retval)
5854 {
5855 vnode_t vp;
5856 int error;
5857
5858 AUDIT_ARG(fd, uap->fd);
5859 AUDIT_ARG(fflags, uap->flags);
5860 if ( (error = file_vnode(uap->fd, &vp)) )
5861 return (error);
5862
5863 if ((error = vnode_getwithref(vp))) {
5864 file_drop(uap->fd);
5865 return(error);
5866 }
5867
5868 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5869
5870 /* we don't vnode_put() here because chflags1 does internally */
5871 error = chflags1(vp, uap->flags, vfs_context_current());
5872
5873 file_drop(uap->fd);
5874 return (error);
5875 }
5876
5877 /*
5878 * Change security information on a filesystem object.
5879 *
5880 * Returns: 0 Success
5881 * EPERM Operation not permitted
5882 * vnode_authattr:??? [anything vnode_authattr can return]
5883 * vnode_authorize:??? [anything vnode_authorize can return]
5884 * vnode_setattr:??? [anything vnode_setattr can return]
5885 *
5886 * Notes: If vnode_authattr or vnode_authorize return EACCES, it will be
5887 * translated to EPERM before being returned.
5888 */
5889 static int
5890 chmod_vnode(vfs_context_t ctx, vnode_t vp, struct vnode_attr *vap)
5891 {
5892 kauth_action_t action;
5893 int error;
5894
5895 AUDIT_ARG(mode, vap->va_mode);
5896 /* XXX audit new args */
5897
5898 #if NAMEDSTREAMS
5899 /* chmod calls are not allowed for resource forks. */
5900 if (vp->v_flag & VISNAMEDSTREAM) {
5901 return (EPERM);
5902 }
5903 #endif
5904
5905 #if CONFIG_MACF
5906 if (VATTR_IS_ACTIVE(vap, va_mode) &&
5907 (error = mac_vnode_check_setmode(ctx, vp, (mode_t)vap->va_mode)) != 0)
5908 return (error);
5909
5910 if (VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid)) {
5911 if ((error = mac_vnode_check_setowner(ctx, vp,
5912 VATTR_IS_ACTIVE(vap, va_uid) ? vap->va_uid : -1,
5913 VATTR_IS_ACTIVE(vap, va_gid) ? vap->va_gid : -1)))
5914 return (error);
5915 }
5916
5917 if (VATTR_IS_ACTIVE(vap, va_acl) &&
5918 (error = mac_vnode_check_setacl(ctx, vp, vap->va_acl)))
5919 return (error);
5920 #endif
5921
5922 /* make sure that the caller is allowed to set this security information */
5923 if (((error = vnode_authattr(vp, vap, &action, ctx)) != 0) ||
5924 ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
5925 if (error == EACCES)
5926 error = EPERM;
5927 return(error);
5928 }
5929
5930 if ((error = vnode_setattr(vp, vap, ctx)) != 0)
5931 return (error);
5932
5933 #if CONFIG_MACF
5934 if (VATTR_IS_ACTIVE(vap, va_mode))
5935 mac_vnode_notify_setmode(ctx, vp, (mode_t)vap->va_mode);
5936
5937 if (VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid))
5938 mac_vnode_notify_setowner(ctx, vp,
5939 VATTR_IS_ACTIVE(vap, va_uid) ? vap->va_uid : -1,
5940 VATTR_IS_ACTIVE(vap, va_gid) ? vap->va_gid : -1);
5941
5942 if (VATTR_IS_ACTIVE(vap, va_acl))
5943 mac_vnode_notify_setacl(ctx, vp, vap->va_acl);
5944 #endif
5945
5946 return (error);
5947 }
5948
5949
5950 /*
5951 * Change mode of a file given a path name.
5952 *
5953 * Returns: 0 Success
5954 * namei:??? [anything namei can return]
5955 * chmod_vnode:??? [anything chmod_vnode can return]
5956 */
5957 static int
5958 chmodat(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap,
5959 int fd, int flag, enum uio_seg segflg)
5960 {
5961 struct nameidata nd;
5962 int follow, error;
5963
5964 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
5965 NDINIT(&nd, LOOKUP, OP_SETATTR, follow | AUDITVNPATH1,
5966 segflg, path, ctx);
5967 if ((error = nameiat(&nd, fd)))
5968 return (error);
5969 error = chmod_vnode(ctx, nd.ni_vp, vap);
5970 vnode_put(nd.ni_vp);
5971 nameidone(&nd);
5972 return(error);
5973 }
5974
5975 /*
5976 * chmod_extended: Change the mode of a file given a path name; with extended
5977 * argument list (including extended security (ACL)).
5978 *
5979 * Parameters: p Process requesting the open
5980 * uap User argument descriptor (see below)
5981 * retval (ignored)
5982 *
5983 * Indirect: uap->path Path to object (same as 'chmod')
5984 * uap->uid UID to set
5985 * uap->gid GID to set
5986 * uap->mode File mode to set (same as 'chmod')
5987 * uap->xsecurity ACL to set (or delete)
5988 *
5989 * Returns: 0 Success
5990 * !0 errno value
5991 *
5992 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
5993 *
5994 * XXX: We should enummerate the possible errno values here, and where
5995 * in the code they originated.
5996 */
5997 int
5998 chmod_extended(__unused proc_t p, struct chmod_extended_args *uap, __unused int32_t *retval)
5999 {
6000 int error;
6001 struct vnode_attr va;
6002 kauth_filesec_t xsecdst;
6003
6004 AUDIT_ARG(owner, uap->uid, uap->gid);
6005
6006 VATTR_INIT(&va);
6007 if (uap->mode != -1)
6008 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
6009 if (uap->uid != KAUTH_UID_NONE)
6010 VATTR_SET(&va, va_uid, uap->uid);
6011 if (uap->gid != KAUTH_GID_NONE)
6012 VATTR_SET(&va, va_gid, uap->gid);
6013
6014 xsecdst = NULL;
6015 switch(uap->xsecurity) {
6016 /* explicit remove request */
6017 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
6018 VATTR_SET(&va, va_acl, NULL);
6019 break;
6020 /* not being set */
6021 case USER_ADDR_NULL:
6022 break;
6023 default:
6024 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
6025 return(error);
6026 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
6027 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va.va_acl->acl_entrycount);
6028 }
6029
6030 error = chmodat(vfs_context_current(), uap->path, &va, AT_FDCWD, 0,
6031 UIO_USERSPACE);
6032
6033 if (xsecdst != NULL)
6034 kauth_filesec_free(xsecdst);
6035 return(error);
6036 }
6037
6038 /*
6039 * Returns: 0 Success
6040 * chmodat:??? [anything chmodat can return]
6041 */
6042 static int
6043 fchmodat_internal(vfs_context_t ctx, user_addr_t path, int mode, int fd,
6044 int flag, enum uio_seg segflg)
6045 {
6046 struct vnode_attr va;
6047
6048 VATTR_INIT(&va);
6049 VATTR_SET(&va, va_mode, mode & ALLPERMS);
6050
6051 return (chmodat(ctx, path, &va, fd, flag, segflg));
6052 }
6053
6054 int
6055 chmod(__unused proc_t p, struct chmod_args *uap, __unused int32_t *retval)
6056 {
6057 return (fchmodat_internal(vfs_context_current(), uap->path, uap->mode,
6058 AT_FDCWD, 0, UIO_USERSPACE));
6059 }
6060
6061 int
6062 fchmodat(__unused proc_t p, struct fchmodat_args *uap, __unused int32_t *retval)
6063 {
6064 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
6065 return (EINVAL);
6066
6067 return (fchmodat_internal(vfs_context_current(), uap->path, uap->mode,
6068 uap->fd, uap->flag, UIO_USERSPACE));
6069 }
6070
6071 /*
6072 * Change mode of a file given a file descriptor.
6073 */
6074 static int
6075 fchmod1(__unused proc_t p, int fd, struct vnode_attr *vap)
6076 {
6077 vnode_t vp;
6078 int error;
6079
6080 AUDIT_ARG(fd, fd);
6081
6082 if ((error = file_vnode(fd, &vp)) != 0)
6083 return (error);
6084 if ((error = vnode_getwithref(vp)) != 0) {
6085 file_drop(fd);
6086 return(error);
6087 }
6088 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6089
6090 error = chmod_vnode(vfs_context_current(), vp, vap);
6091 (void)vnode_put(vp);
6092 file_drop(fd);
6093
6094 return (error);
6095 }
6096
6097 /*
6098 * fchmod_extended: Change mode of a file given a file descriptor; with
6099 * extended argument list (including extended security (ACL)).
6100 *
6101 * Parameters: p Process requesting to change file mode
6102 * uap User argument descriptor (see below)
6103 * retval (ignored)
6104 *
6105 * Indirect: uap->mode File mode to set (same as 'chmod')
6106 * uap->uid UID to set
6107 * uap->gid GID to set
6108 * uap->xsecurity ACL to set (or delete)
6109 * uap->fd File descriptor of file to change mode
6110 *
6111 * Returns: 0 Success
6112 * !0 errno value
6113 *
6114 */
6115 int
6116 fchmod_extended(proc_t p, struct fchmod_extended_args *uap, __unused int32_t *retval)
6117 {
6118 int error;
6119 struct vnode_attr va;
6120 kauth_filesec_t xsecdst;
6121
6122 AUDIT_ARG(owner, uap->uid, uap->gid);
6123
6124 VATTR_INIT(&va);
6125 if (uap->mode != -1)
6126 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
6127 if (uap->uid != KAUTH_UID_NONE)
6128 VATTR_SET(&va, va_uid, uap->uid);
6129 if (uap->gid != KAUTH_GID_NONE)
6130 VATTR_SET(&va, va_gid, uap->gid);
6131
6132 xsecdst = NULL;
6133 switch(uap->xsecurity) {
6134 case USER_ADDR_NULL:
6135 VATTR_SET(&va, va_acl, NULL);
6136 break;
6137 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
6138 VATTR_SET(&va, va_acl, NULL);
6139 break;
6140 /* not being set */
6141 case CAST_USER_ADDR_T(-1):
6142 break;
6143 default:
6144 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
6145 return(error);
6146 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
6147 }
6148
6149 error = fchmod1(p, uap->fd, &va);
6150
6151
6152 switch(uap->xsecurity) {
6153 case USER_ADDR_NULL:
6154 case CAST_USER_ADDR_T(-1):
6155 break;
6156 default:
6157 if (xsecdst != NULL)
6158 kauth_filesec_free(xsecdst);
6159 }
6160 return(error);
6161 }
6162
6163 int
6164 fchmod(proc_t p, struct fchmod_args *uap, __unused int32_t *retval)
6165 {
6166 struct vnode_attr va;
6167
6168 VATTR_INIT(&va);
6169 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
6170
6171 return(fchmod1(p, uap->fd, &va));
6172 }
6173
6174
6175 /*
6176 * Set ownership given a path name.
6177 */
6178 /* ARGSUSED */
6179 static int
6180 fchownat_internal(vfs_context_t ctx, int fd, user_addr_t path, uid_t uid,
6181 gid_t gid, int flag, enum uio_seg segflg)
6182 {
6183 vnode_t vp;
6184 struct vnode_attr va;
6185 int error;
6186 struct nameidata nd;
6187 int follow;
6188 kauth_action_t action;
6189
6190 AUDIT_ARG(owner, uid, gid);
6191
6192 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
6193 NDINIT(&nd, LOOKUP, OP_SETATTR, follow | AUDITVNPATH1, segflg,
6194 path, ctx);
6195 error = nameiat(&nd, fd);
6196 if (error)
6197 return (error);
6198 vp = nd.ni_vp;
6199
6200 nameidone(&nd);
6201
6202 VATTR_INIT(&va);
6203 if (uid != (uid_t)VNOVAL)
6204 VATTR_SET(&va, va_uid, uid);
6205 if (gid != (gid_t)VNOVAL)
6206 VATTR_SET(&va, va_gid, gid);
6207
6208 #if CONFIG_MACF
6209 error = mac_vnode_check_setowner(ctx, vp, uid, gid);
6210 if (error)
6211 goto out;
6212 #endif
6213
6214 /* preflight and authorize attribute changes */
6215 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
6216 goto out;
6217 if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
6218 goto out;
6219 error = vnode_setattr(vp, &va, ctx);
6220
6221 #if CONFIG_MACF
6222 if (error == 0)
6223 mac_vnode_notify_setowner(ctx, vp, uid, gid);
6224 #endif
6225
6226 out:
6227 /*
6228 * EACCES is only allowed from namei(); permissions failure should
6229 * return EPERM, so we need to translate the error code.
6230 */
6231 if (error == EACCES)
6232 error = EPERM;
6233
6234 vnode_put(vp);
6235 return (error);
6236 }
6237
6238 int
6239 chown(__unused proc_t p, struct chown_args *uap, __unused int32_t *retval)
6240 {
6241 return (fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path,
6242 uap->uid, uap->gid, 0, UIO_USERSPACE));
6243 }
6244
6245 int
6246 lchown(__unused proc_t p, struct lchown_args *uap, __unused int32_t *retval)
6247 {
6248 return (fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path,
6249 uap->owner, uap->group, AT_SYMLINK_NOFOLLOW, UIO_USERSPACE));
6250 }
6251
6252 int
6253 fchownat(__unused proc_t p, struct fchownat_args *uap, __unused int32_t *retval)
6254 {
6255 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
6256 return (EINVAL);
6257
6258 return (fchownat_internal(vfs_context_current(), uap->fd, uap->path,
6259 uap->uid, uap->gid, uap->flag, UIO_USERSPACE));
6260 }
6261
6262 /*
6263 * Set ownership given a file descriptor.
6264 */
6265 /* ARGSUSED */
6266 int
6267 fchown(__unused proc_t p, struct fchown_args *uap, __unused int32_t *retval)
6268 {
6269 struct vnode_attr va;
6270 vfs_context_t ctx = vfs_context_current();
6271 vnode_t vp;
6272 int error;
6273 kauth_action_t action;
6274
6275 AUDIT_ARG(owner, uap->uid, uap->gid);
6276 AUDIT_ARG(fd, uap->fd);
6277
6278 if ( (error = file_vnode(uap->fd, &vp)) )
6279 return (error);
6280
6281 if ( (error = vnode_getwithref(vp)) ) {
6282 file_drop(uap->fd);
6283 return(error);
6284 }
6285 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6286
6287 VATTR_INIT(&va);
6288 if (uap->uid != VNOVAL)
6289 VATTR_SET(&va, va_uid, uap->uid);
6290 if (uap->gid != VNOVAL)
6291 VATTR_SET(&va, va_gid, uap->gid);
6292
6293 #if NAMEDSTREAMS
6294 /* chown calls are not allowed for resource forks. */
6295 if (vp->v_flag & VISNAMEDSTREAM) {
6296 error = EPERM;
6297 goto out;
6298 }
6299 #endif
6300
6301 #if CONFIG_MACF
6302 error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
6303 if (error)
6304 goto out;
6305 #endif
6306
6307 /* preflight and authorize attribute changes */
6308 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
6309 goto out;
6310 if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
6311 if (error == EACCES)
6312 error = EPERM;
6313 goto out;
6314 }
6315 error = vnode_setattr(vp, &va, ctx);
6316
6317 #if CONFIG_MACF
6318 if (error == 0)
6319 mac_vnode_notify_setowner(ctx, vp, uap->uid, uap->gid);
6320 #endif
6321
6322 out:
6323 (void)vnode_put(vp);
6324 file_drop(uap->fd);
6325 return (error);
6326 }
6327
6328 static int
6329 getutimes(user_addr_t usrtvp, struct timespec *tsp)
6330 {
6331 int error;
6332
6333 if (usrtvp == USER_ADDR_NULL) {
6334 struct timeval old_tv;
6335 /* XXX Y2038 bug because of microtime argument */
6336 microtime(&old_tv);
6337 TIMEVAL_TO_TIMESPEC(&old_tv, &tsp[0]);
6338 tsp[1] = tsp[0];
6339 } else {
6340 if (IS_64BIT_PROCESS(current_proc())) {
6341 struct user64_timeval tv[2];
6342 error = copyin(usrtvp, (void *)tv, sizeof(tv));
6343 if (error)
6344 return (error);
6345 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
6346 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
6347 } else {
6348 struct user32_timeval tv[2];
6349 error = copyin(usrtvp, (void *)tv, sizeof(tv));
6350 if (error)
6351 return (error);
6352 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
6353 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
6354 }
6355 }
6356 return 0;
6357 }
6358
6359 static int
6360 setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts,
6361 int nullflag)
6362 {
6363 int error;
6364 struct vnode_attr va;
6365 kauth_action_t action;
6366
6367 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6368
6369 VATTR_INIT(&va);
6370 VATTR_SET(&va, va_access_time, ts[0]);
6371 VATTR_SET(&va, va_modify_time, ts[1]);
6372 if (nullflag)
6373 va.va_vaflags |= VA_UTIMES_NULL;
6374
6375 #if NAMEDSTREAMS
6376 /* utimes calls are not allowed for resource forks. */
6377 if (vp->v_flag & VISNAMEDSTREAM) {
6378 error = EPERM;
6379 goto out;
6380 }
6381 #endif
6382
6383 #if CONFIG_MACF
6384 error = mac_vnode_check_setutimes(ctx, vp, ts[0], ts[1]);
6385 if (error)
6386 goto out;
6387 #endif
6388 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
6389 if (!nullflag && error == EACCES)
6390 error = EPERM;
6391 goto out;
6392 }
6393
6394 /* since we may not need to auth anything, check here */
6395 if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
6396 if (!nullflag && error == EACCES)
6397 error = EPERM;
6398 goto out;
6399 }
6400 error = vnode_setattr(vp, &va, ctx);
6401
6402 #if CONFIG_MACF
6403 if (error == 0)
6404 mac_vnode_notify_setutimes(ctx, vp, ts[0], ts[1]);
6405 #endif
6406
6407 out:
6408 return error;
6409 }
6410
6411 /*
6412 * Set the access and modification times of a file.
6413 */
6414 /* ARGSUSED */
6415 int
6416 utimes(__unused proc_t p, struct utimes_args *uap, __unused int32_t *retval)
6417 {
6418 struct timespec ts[2];
6419 user_addr_t usrtvp;
6420 int error;
6421 struct nameidata nd;
6422 vfs_context_t ctx = vfs_context_current();
6423
6424 /*
6425 * AUDIT: Needed to change the order of operations to do the
6426 * name lookup first because auditing wants the path.
6427 */
6428 NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
6429 UIO_USERSPACE, uap->path, ctx);
6430 error = namei(&nd);
6431 if (error)
6432 return (error);
6433 nameidone(&nd);
6434
6435 /*
6436 * Fetch the user-supplied time. If usrtvp is USER_ADDR_NULL, we fetch
6437 * the current time instead.
6438 */
6439 usrtvp = uap->tptr;
6440 if ((error = getutimes(usrtvp, ts)) != 0)
6441 goto out;
6442
6443 error = setutimes(ctx, nd.ni_vp, ts, usrtvp == USER_ADDR_NULL);
6444
6445 out:
6446 vnode_put(nd.ni_vp);
6447 return (error);
6448 }
6449
6450 /*
6451 * Set the access and modification times of a file.
6452 */
6453 /* ARGSUSED */
6454 int
6455 futimes(__unused proc_t p, struct futimes_args *uap, __unused int32_t *retval)
6456 {
6457 struct timespec ts[2];
6458 vnode_t vp;
6459 user_addr_t usrtvp;
6460 int error;
6461
6462 AUDIT_ARG(fd, uap->fd);
6463 usrtvp = uap->tptr;
6464 if ((error = getutimes(usrtvp, ts)) != 0)
6465 return (error);
6466 if ((error = file_vnode(uap->fd, &vp)) != 0)
6467 return (error);
6468 if((error = vnode_getwithref(vp))) {
6469 file_drop(uap->fd);
6470 return(error);
6471 }
6472
6473 error = setutimes(vfs_context_current(), vp, ts, usrtvp == 0);
6474 vnode_put(vp);
6475 file_drop(uap->fd);
6476 return(error);
6477 }
6478
6479 /*
6480 * Truncate a file given its path name.
6481 */
6482 /* ARGSUSED */
6483 int
6484 truncate(__unused proc_t p, struct truncate_args *uap, __unused int32_t *retval)
6485 {
6486 vnode_t vp;
6487 struct vnode_attr va;
6488 vfs_context_t ctx = vfs_context_current();
6489 int error;
6490 struct nameidata nd;
6491 kauth_action_t action;
6492
6493 if (uap->length < 0)
6494 return(EINVAL);
6495 NDINIT(&nd, LOOKUP, OP_TRUNCATE, FOLLOW | AUDITVNPATH1,
6496 UIO_USERSPACE, uap->path, ctx);
6497 if ((error = namei(&nd)))
6498 return (error);
6499 vp = nd.ni_vp;
6500
6501 nameidone(&nd);
6502
6503 VATTR_INIT(&va);
6504 VATTR_SET(&va, va_data_size, uap->length);
6505
6506 #if CONFIG_MACF
6507 error = mac_vnode_check_truncate(ctx, NOCRED, vp);
6508 if (error)
6509 goto out;
6510 #endif
6511
6512 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
6513 goto out;
6514 if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
6515 goto out;
6516 error = vnode_setattr(vp, &va, ctx);
6517
6518 #if CONFIG_MACF
6519 if (error == 0)
6520 mac_vnode_notify_truncate(ctx, NOCRED, vp);
6521 #endif
6522
6523 out:
6524 vnode_put(vp);
6525 return (error);
6526 }
6527
6528 /*
6529 * Truncate a file given a file descriptor.
6530 */
6531 /* ARGSUSED */
6532 int
6533 ftruncate(proc_t p, struct ftruncate_args *uap, int32_t *retval)
6534 {
6535 vfs_context_t ctx = vfs_context_current();
6536 struct vnode_attr va;
6537 vnode_t vp;
6538 struct fileproc *fp;
6539 int error ;
6540 int fd = uap->fd;
6541
6542 AUDIT_ARG(fd, uap->fd);
6543 if (uap->length < 0)
6544 return(EINVAL);
6545
6546 if ( (error = fp_lookup(p,fd,&fp,0)) ) {
6547 return(error);
6548 }
6549
6550 switch (FILEGLOB_DTYPE(fp->f_fglob)) {
6551 case DTYPE_PSXSHM:
6552 error = pshm_truncate(p, fp, uap->fd, uap->length, retval);
6553 goto out;
6554 case DTYPE_VNODE:
6555 break;
6556 default:
6557 error = EINVAL;
6558 goto out;
6559 }
6560
6561 vp = (vnode_t)fp->f_fglob->fg_data;
6562
6563 if ((fp->f_fglob->fg_flag & FWRITE) == 0) {
6564 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
6565 error = EINVAL;
6566 goto out;
6567 }
6568
6569 if ((error = vnode_getwithref(vp)) != 0) {
6570 goto out;
6571 }
6572
6573 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6574
6575 #if CONFIG_MACF
6576 error = mac_vnode_check_truncate(ctx,
6577 fp->f_fglob->fg_cred, vp);
6578 if (error) {
6579 (void)vnode_put(vp);
6580 goto out;
6581 }
6582 #endif
6583 VATTR_INIT(&va);
6584 VATTR_SET(&va, va_data_size, uap->length);
6585 error = vnode_setattr(vp, &va, ctx);
6586
6587 #if CONFIG_MACF
6588 if (error == 0)
6589 mac_vnode_notify_truncate(ctx, fp->f_fglob->fg_cred, vp);
6590 #endif
6591
6592 (void)vnode_put(vp);
6593 out:
6594 file_drop(fd);
6595 return (error);
6596 }
6597
6598
6599 /*
6600 * Sync an open file with synchronized I/O _file_ integrity completion
6601 */
6602 /* ARGSUSED */
6603 int
6604 fsync(proc_t p, struct fsync_args *uap, __unused int32_t *retval)
6605 {
6606 __pthread_testcancel(1);
6607 return(fsync_common(p, uap, MNT_WAIT));
6608 }
6609
6610
6611 /*
6612 * Sync an open file with synchronized I/O _file_ integrity completion
6613 *
6614 * Notes: This is a legacy support function that does not test for
6615 * thread cancellation points.
6616 */
6617 /* ARGSUSED */
6618 int
6619 fsync_nocancel(proc_t p, struct fsync_nocancel_args *uap, __unused int32_t *retval)
6620 {
6621 return(fsync_common(p, (struct fsync_args *)uap, MNT_WAIT));
6622 }
6623
6624
6625 /*
6626 * Sync an open file with synchronized I/O _data_ integrity completion
6627 */
6628 /* ARGSUSED */
6629 int
6630 fdatasync(proc_t p, struct fdatasync_args *uap, __unused int32_t *retval)
6631 {
6632 __pthread_testcancel(1);
6633 return(fsync_common(p, (struct fsync_args *)uap, MNT_DWAIT));
6634 }
6635
6636
6637 /*
6638 * fsync_common
6639 *
6640 * Common fsync code to support both synchronized I/O file integrity completion
6641 * (normal fsync) and synchronized I/O data integrity completion (fdatasync).
6642 *
6643 * If 'flags' is MNT_DWAIT, the caller is requesting data integrity, which
6644 * will only guarantee that the file data contents are retrievable. If
6645 * 'flags' is MNT_WAIT, the caller is rewuesting file integrity, which also
6646 * includes additional metadata unnecessary for retrieving the file data
6647 * contents, such as atime, mtime, ctime, etc., also be committed to stable
6648 * storage.
6649 *
6650 * Parameters: p The process
6651 * uap->fd The descriptor to synchronize
6652 * flags The data integrity flags
6653 *
6654 * Returns: int Success
6655 * fp_getfvp:EBADF Bad file descriptor
6656 * fp_getfvp:ENOTSUP fd does not refer to a vnode
6657 * VNOP_FSYNC:??? unspecified
6658 *
6659 * Notes: We use struct fsync_args because it is a short name, and all
6660 * caller argument structures are otherwise identical.
6661 */
6662 static int
6663 fsync_common(proc_t p, struct fsync_args *uap, int flags)
6664 {
6665 vnode_t vp;
6666 struct fileproc *fp;
6667 vfs_context_t ctx = vfs_context_current();
6668 int error;
6669
6670 AUDIT_ARG(fd, uap->fd);
6671
6672 if ( (error = fp_getfvp(p, uap->fd, &fp, &vp)) )
6673 return (error);
6674 if ( (error = vnode_getwithref(vp)) ) {
6675 file_drop(uap->fd);
6676 return(error);
6677 }
6678
6679 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6680
6681 error = VNOP_FSYNC(vp, flags, ctx);
6682
6683 #if NAMEDRSRCFORK
6684 /* Sync resource fork shadow file if necessary. */
6685 if ((error == 0) &&
6686 (vp->v_flag & VISNAMEDSTREAM) &&
6687 (vp->v_parent != NULLVP) &&
6688 vnode_isshadow(vp) &&
6689 (fp->f_flags & FP_WRITTEN)) {
6690 (void) vnode_flushnamedstream(vp->v_parent, vp, ctx);
6691 }
6692 #endif
6693
6694 (void)vnode_put(vp);
6695 file_drop(uap->fd);
6696 return (error);
6697 }
6698
6699 /*
6700 * Duplicate files. Source must be a file, target must be a file or
6701 * must not exist.
6702 *
6703 * XXX Copyfile authorisation checking is woefully inadequate, and will not
6704 * perform inheritance correctly.
6705 */
6706 /* ARGSUSED */
6707 int
6708 copyfile(__unused proc_t p, struct copyfile_args *uap, __unused int32_t *retval)
6709 {
6710 vnode_t tvp, fvp, tdvp, sdvp;
6711 struct nameidata fromnd, tond;
6712 int error;
6713 vfs_context_t ctx = vfs_context_current();
6714 #if CONFIG_MACF
6715 struct filedesc *fdp = (vfs_context_proc(ctx))->p_fd;
6716 struct vnode_attr va;
6717 #endif
6718
6719 /* Check that the flags are valid. */
6720
6721 if (uap->flags & ~CPF_MASK) {
6722 return(EINVAL);
6723 }
6724
6725 NDINIT(&fromnd, LOOKUP, OP_COPYFILE, AUDITVNPATH1,
6726 UIO_USERSPACE, uap->from, ctx);
6727 if ((error = namei(&fromnd)))
6728 return (error);
6729 fvp = fromnd.ni_vp;
6730
6731 NDINIT(&tond, CREATE, OP_LINK,
6732 LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNPATH2 | CN_NBMOUNTLOOK,
6733 UIO_USERSPACE, uap->to, ctx);
6734 if ((error = namei(&tond))) {
6735 goto out1;
6736 }
6737 tdvp = tond.ni_dvp;
6738 tvp = tond.ni_vp;
6739
6740 if (tvp != NULL) {
6741 if (!(uap->flags & CPF_OVERWRITE)) {
6742 error = EEXIST;
6743 goto out;
6744 }
6745 }
6746
6747 if (fvp->v_type == VDIR || (tvp && tvp->v_type == VDIR)) {
6748 error = EISDIR;
6749 goto out;
6750 }
6751
6752 /* This calls existing MAC hooks for open */
6753 if ((error = vn_authorize_open_existing(fvp, &fromnd.ni_cnd, FREAD, ctx,
6754 NULL))) {
6755 goto out;
6756 }
6757
6758 if (tvp) {
6759 /*
6760 * See unlinkat_internal for an explanation of the potential
6761 * ENOENT from the MAC hook but the gist is that the MAC hook
6762 * can fail because vn_getpath isn't able to return the full
6763 * path. We choose to ignore this failure.
6764 */
6765 error = vn_authorize_unlink(tdvp, tvp, &tond.ni_cnd, ctx, NULL);
6766 if (error && error != ENOENT)
6767 goto out;
6768 error = 0;
6769 }
6770
6771 #if CONFIG_MACF
6772 VATTR_INIT(&va);
6773 VATTR_SET(&va, va_type, fvp->v_type);
6774 /* Mask off all but regular access permissions */
6775 VATTR_SET(&va, va_mode,
6776 ((((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT) & ACCESSPERMS));
6777 error = mac_vnode_check_create(ctx, tdvp, &tond.ni_cnd, &va);
6778 if (error)
6779 goto out;
6780 #endif /* CONFIG_MACF */
6781
6782 if ((error = vnode_authorize(tdvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
6783 goto out;
6784
6785 if (fvp == tdvp)
6786 error = EINVAL;
6787 /*
6788 * If source is the same as the destination (that is the
6789 * same inode number) then there is nothing to do.
6790 * (fixed to have POSIX semantics - CSM 3/2/98)
6791 */
6792 if (fvp == tvp)
6793 error = -1;
6794 if (!error)
6795 error = VNOP_COPYFILE(fvp, tdvp, tvp, &tond.ni_cnd, uap->mode, uap->flags, ctx);
6796 out:
6797 sdvp = tond.ni_startdir;
6798 /*
6799 * nameidone has to happen before we vnode_put(tdvp)
6800 * since it may need to release the fs_nodelock on the tdvp
6801 */
6802 nameidone(&tond);
6803
6804 if (tvp)
6805 vnode_put(tvp);
6806 vnode_put(tdvp);
6807 vnode_put(sdvp);
6808 out1:
6809 vnode_put(fvp);
6810
6811 nameidone(&fromnd);
6812
6813 if (error == -1)
6814 return (0);
6815 return (error);
6816 }
6817
6818 #define CLONE_SNAPSHOT_FALLBACKS_ENABLED 1
6819
6820 /*
6821 * Helper function for doing clones. The caller is expected to provide an
6822 * iocounted source vnode and release it.
6823 */
6824 static int
6825 clonefile_internal(vnode_t fvp, boolean_t data_read_authorised, int dst_dirfd,
6826 user_addr_t dst, uint32_t flags, vfs_context_t ctx)
6827 {
6828 vnode_t tvp, tdvp;
6829 struct nameidata tond;
6830 int error;
6831 int follow;
6832 boolean_t free_src_acl;
6833 boolean_t attr_cleanup;
6834 enum vtype v_type;
6835 kauth_action_t action;
6836 struct componentname *cnp;
6837 uint32_t defaulted;
6838 struct vnode_attr va;
6839 struct vnode_attr nva;
6840
6841 v_type = vnode_vtype(fvp);
6842 switch (v_type) {
6843 case VLNK:
6844 /* FALLTHRU */
6845 case VREG:
6846 action = KAUTH_VNODE_ADD_FILE;
6847 break;
6848 case VDIR:
6849 if (vnode_isvroot(fvp) || vnode_ismount(fvp) ||
6850 fvp->v_mountedhere) {
6851 return (EINVAL);
6852 }
6853 action = KAUTH_VNODE_ADD_SUBDIRECTORY;
6854 break;
6855 default:
6856 return (EINVAL);
6857 }
6858
6859 AUDIT_ARG(fd2, dst_dirfd);
6860 AUDIT_ARG(value32, flags);
6861
6862 follow = (flags & CLONE_NOFOLLOW) ? NOFOLLOW : FOLLOW;
6863 NDINIT(&tond, CREATE, OP_LINK, follow | WANTPARENT | AUDITVNPATH2,
6864 UIO_USERSPACE, dst, ctx);
6865 if ((error = nameiat(&tond, dst_dirfd)))
6866 return (error);
6867 cnp = &tond.ni_cnd;
6868 tdvp = tond.ni_dvp;
6869 tvp = tond.ni_vp;
6870
6871 free_src_acl = FALSE;
6872 attr_cleanup = FALSE;
6873
6874 if (tvp != NULL) {
6875 error = EEXIST;
6876 goto out;
6877 }
6878
6879 if (vnode_mount(tdvp) != vnode_mount(fvp)) {
6880 error = EXDEV;
6881 goto out;
6882 }
6883
6884 #if CONFIG_MACF
6885 if ((error = mac_vnode_check_clone(ctx, tdvp, fvp, cnp)))
6886 goto out;
6887 #endif
6888 if ((error = vnode_authorize(tdvp, NULL, action, ctx)))
6889 goto out;
6890
6891 action = KAUTH_VNODE_GENERIC_READ_BITS;
6892 if (data_read_authorised)
6893 action &= ~KAUTH_VNODE_READ_DATA;
6894 if ((error = vnode_authorize(fvp, NULL, action, ctx)))
6895 goto out;
6896
6897 /*
6898 * certain attributes may need to be changed from the source, we ask for
6899 * those here.
6900 */
6901 VATTR_INIT(&va);
6902 VATTR_WANTED(&va, va_uid);
6903 VATTR_WANTED(&va, va_gid);
6904 VATTR_WANTED(&va, va_mode);
6905 VATTR_WANTED(&va, va_flags);
6906 VATTR_WANTED(&va, va_acl);
6907
6908 if ((error = vnode_getattr(fvp, &va, ctx)) != 0)
6909 goto out;
6910
6911 VATTR_INIT(&nva);
6912 VATTR_SET(&nva, va_type, v_type);
6913 if (VATTR_IS_SUPPORTED(&va, va_acl) && va.va_acl != NULL) {
6914 VATTR_SET(&nva, va_acl, va.va_acl);
6915 free_src_acl = TRUE;
6916 }
6917
6918 /* Handle ACL inheritance, initialize vap. */
6919 if (v_type == VLNK) {
6920 error = vnode_authattr_new(tdvp, &nva, 0, ctx);
6921 } else {
6922 error = vn_attribute_prepare(tdvp, &nva, &defaulted, ctx);
6923 if (error)
6924 goto out;
6925 attr_cleanup = TRUE;
6926 }
6927
6928 /*
6929 * We've got initial values for all security parameters,
6930 * If we are superuser, then we can change owners to be the
6931 * same as the source. Both superuser and the owner have default
6932 * WRITE_SECURITY privileges so all other fields can be taken
6933 * from source as well.
6934 */
6935 if (vfs_context_issuser(ctx)) {
6936 if (VATTR_IS_SUPPORTED(&va, va_uid))
6937 VATTR_SET(&nva, va_uid, va.va_uid);
6938 if (VATTR_IS_SUPPORTED(&va, va_gid))
6939 VATTR_SET(&nva, va_gid, va.va_gid);
6940 }
6941 if (VATTR_IS_SUPPORTED(&va, va_mode))
6942 VATTR_SET(&nva, va_mode, va.va_mode);
6943 if (VATTR_IS_SUPPORTED(&va, va_flags)) {
6944 VATTR_SET(&nva, va_flags,
6945 ((va.va_flags & ~SF_RESTRICTED) | /* Turn off from source */
6946 (nva.va_flags & SF_RESTRICTED)));
6947 }
6948
6949 error = VNOP_CLONEFILE(fvp, tdvp, &tvp, cnp, &nva,
6950 VNODE_CLONEFILE_DEFAULT, ctx);
6951
6952 if (!error && tvp) {
6953 int update_flags = 0;
6954 #if CONFIG_FSE
6955 int fsevent;
6956 #endif /* CONFIG_FSE */
6957
6958 #if CONFIG_MACF
6959 (void)vnode_label(vnode_mount(tvp), tdvp, tvp, cnp,
6960 VNODE_LABEL_CREATE, ctx);
6961 #endif
6962 /*
6963 * If some of the requested attributes weren't handled by the
6964 * VNOP, use our fallback code.
6965 */
6966 if (!VATTR_ALL_SUPPORTED(&va))
6967 (void)vnode_setattr_fallback(tvp, &nva, ctx);
6968
6969 // Make sure the name & parent pointers are hooked up
6970 if (tvp->v_name == NULL)
6971 update_flags |= VNODE_UPDATE_NAME;
6972 if (tvp->v_parent == NULLVP)
6973 update_flags |= VNODE_UPDATE_PARENT;
6974
6975 if (update_flags) {
6976 (void)vnode_update_identity(tvp, tdvp, cnp->cn_nameptr,
6977 cnp->cn_namelen, cnp->cn_hash, update_flags);
6978 }
6979
6980 #if CONFIG_FSE
6981 switch (vnode_vtype(tvp)) {
6982 case VLNK:
6983 /* FALLTHRU */
6984 case VREG:
6985 fsevent = FSE_CREATE_FILE;
6986 break;
6987 case VDIR:
6988 fsevent = FSE_CREATE_DIR;
6989 break;
6990 default:
6991 goto out;
6992 }
6993
6994 if (need_fsevent(fsevent, tvp)) {
6995 add_fsevent(fsevent, ctx, FSE_ARG_VNODE, tvp,
6996 FSE_ARG_DONE);
6997 }
6998 #endif /* CONFIG_FSE */
6999 }
7000
7001 out:
7002 if (attr_cleanup)
7003 vn_attribute_cleanup(&nva, defaulted);
7004 if (free_src_acl && va.va_acl)
7005 kauth_acl_free(va.va_acl);
7006 nameidone(&tond);
7007 if (tvp)
7008 vnode_put(tvp);
7009 vnode_put(tdvp);
7010 return (error);
7011 }
7012
7013 /*
7014 * clone files or directories, target must not exist.
7015 */
7016 /* ARGSUSED */
7017 int
7018 clonefileat(__unused proc_t p, struct clonefileat_args *uap,
7019 __unused int32_t *retval)
7020 {
7021 vnode_t fvp;
7022 struct nameidata fromnd;
7023 int follow;
7024 int error;
7025 vfs_context_t ctx = vfs_context_current();
7026
7027 /* Check that the flags are valid. */
7028 if (uap->flags & ~CLONE_NOFOLLOW)
7029 return (EINVAL);
7030
7031 AUDIT_ARG(fd, uap->src_dirfd);
7032
7033 follow = (uap->flags & CLONE_NOFOLLOW) ? NOFOLLOW : FOLLOW;
7034 NDINIT(&fromnd, LOOKUP, OP_COPYFILE, follow | AUDITVNPATH1,
7035 UIO_USERSPACE, uap->src, ctx);
7036 if ((error = nameiat(&fromnd, uap->src_dirfd)))
7037 return (error);
7038
7039 fvp = fromnd.ni_vp;
7040 nameidone(&fromnd);
7041
7042 error = clonefile_internal(fvp, FALSE, uap->dst_dirfd, uap->dst,
7043 uap->flags, ctx);
7044
7045 vnode_put(fvp);
7046 return (error);
7047 }
7048
7049 int
7050 fclonefileat(__unused proc_t p, struct fclonefileat_args *uap,
7051 __unused int32_t *retval)
7052 {
7053 vnode_t fvp;
7054 struct fileproc *fp;
7055 int error;
7056 vfs_context_t ctx = vfs_context_current();
7057
7058 AUDIT_ARG(fd, uap->src_fd);
7059 error = fp_getfvp(p, uap->src_fd, &fp, &fvp);
7060 if (error)
7061 return (error);
7062
7063 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
7064 AUDIT_ARG(vnpath_withref, fvp, ARG_VNODE1);
7065 error = EBADF;
7066 goto out;
7067 }
7068
7069 if ((error = vnode_getwithref(fvp)))
7070 goto out;
7071
7072 AUDIT_ARG(vnpath, fvp, ARG_VNODE1);
7073
7074 error = clonefile_internal(fvp, TRUE, uap->dst_dirfd, uap->dst,
7075 uap->flags, ctx);
7076
7077 vnode_put(fvp);
7078 out:
7079 file_drop(uap->src_fd);
7080 return (error);
7081 }
7082
7083 /*
7084 * Rename files. Source and destination must either both be directories,
7085 * or both not be directories. If target is a directory, it must be empty.
7086 */
7087 /* ARGSUSED */
7088 static int
7089 renameat_internal(vfs_context_t ctx, int fromfd, user_addr_t from,
7090 int tofd, user_addr_t to, int segflg, vfs_rename_flags_t flags)
7091 {
7092 if (flags & ~VFS_RENAME_FLAGS_MASK)
7093 return EINVAL;
7094
7095 if (ISSET(flags, VFS_RENAME_SWAP) && ISSET(flags, VFS_RENAME_EXCL))
7096 return EINVAL;
7097
7098 vnode_t tvp, tdvp;
7099 vnode_t fvp, fdvp;
7100 struct nameidata *fromnd, *tond;
7101 int error;
7102 int do_retry;
7103 int retry_count;
7104 int mntrename;
7105 int need_event;
7106 const char *oname = NULL;
7107 char *from_name = NULL, *to_name = NULL;
7108 int from_len=0, to_len=0;
7109 int holding_mntlock;
7110 mount_t locked_mp = NULL;
7111 vnode_t oparent = NULLVP;
7112 #if CONFIG_FSE
7113 fse_info from_finfo, to_finfo;
7114 #endif
7115 int from_truncated=0, to_truncated;
7116 int batched = 0;
7117 struct vnode_attr *fvap, *tvap;
7118 int continuing = 0;
7119 /* carving out a chunk for structs that are too big to be on stack. */
7120 struct {
7121 struct nameidata from_node, to_node;
7122 struct vnode_attr fv_attr, tv_attr;
7123 } * __rename_data;
7124 MALLOC(__rename_data, void *, sizeof(*__rename_data), M_TEMP, M_WAITOK);
7125 fromnd = &__rename_data->from_node;
7126 tond = &__rename_data->to_node;
7127
7128 holding_mntlock = 0;
7129 do_retry = 0;
7130 retry_count = 0;
7131 retry:
7132 fvp = tvp = NULL;
7133 fdvp = tdvp = NULL;
7134 fvap = tvap = NULL;
7135 mntrename = FALSE;
7136
7137 NDINIT(fromnd, DELETE, OP_UNLINK, WANTPARENT | AUDITVNPATH1,
7138 segflg, from, ctx);
7139 fromnd->ni_flag = NAMEI_COMPOUNDRENAME;
7140
7141 NDINIT(tond, RENAME, OP_RENAME, WANTPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK,
7142 segflg, to, ctx);
7143 tond->ni_flag = NAMEI_COMPOUNDRENAME;
7144
7145 continue_lookup:
7146 if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
7147 if ( (error = nameiat(fromnd, fromfd)) )
7148 goto out1;
7149 fdvp = fromnd->ni_dvp;
7150 fvp = fromnd->ni_vp;
7151
7152 if (fvp && fvp->v_type == VDIR)
7153 tond->ni_cnd.cn_flags |= WILLBEDIR;
7154 }
7155
7156 if ((tond->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
7157 if ( (error = nameiat(tond, tofd)) ) {
7158 /*
7159 * Translate error code for rename("dir1", "dir2/.").
7160 */
7161 if (error == EISDIR && fvp->v_type == VDIR)
7162 error = EINVAL;
7163 goto out1;
7164 }
7165 tdvp = tond->ni_dvp;
7166 tvp = tond->ni_vp;
7167 }
7168
7169 if (!tvp && ISSET(flags, VFS_RENAME_SWAP)) {
7170 error = ENOENT;
7171 goto out1;
7172 }
7173
7174 if (tvp && ISSET(flags, VFS_RENAME_EXCL)) {
7175 error = EEXIST;
7176 goto out1;
7177 }
7178
7179 batched = vnode_compound_rename_available(fdvp);
7180 if (!fvp) {
7181 /*
7182 * Claim: this check will never reject a valid rename.
7183 * For success, either fvp must be on the same mount as tdvp, or fvp must sit atop a vnode on the same mount as tdvp.
7184 * Suppose fdvp and tdvp are not on the same mount.
7185 * If fvp is on the same mount as tdvp, then fvp is not on the same mount as fdvp, so fvp is the root of its filesystem. If fvp is the root,
7186 * then you can't move it to within another dir on the same mountpoint.
7187 * If fvp sits atop a vnode on the same mount as fdvp, then that vnode must be part of the same mount as fdvp, which is a contradiction.
7188 *
7189 * If this check passes, then we are safe to pass these vnodes to the same FS.
7190 */
7191 if (fdvp->v_mount != tdvp->v_mount) {
7192 error = EXDEV;
7193 goto out1;
7194 }
7195 goto skipped_lookup;
7196 }
7197
7198 if (!batched) {
7199 error = vn_authorize_renamex(fdvp, fvp, &fromnd->ni_cnd, tdvp, tvp, &tond->ni_cnd, ctx, flags, NULL);
7200 if (error) {
7201 if (error == ENOENT) {
7202 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
7203 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
7204 /*
7205 * We encountered a race where after doing the namei, tvp stops
7206 * being valid. If so, simply re-drive the rename call from the
7207 * top.
7208 */
7209 do_retry = 1;
7210 retry_count += 1;
7211 }
7212 }
7213 goto out1;
7214 }
7215 }
7216
7217 /*
7218 * If the source and destination are the same (i.e. they're
7219 * links to the same vnode) and the target file system is
7220 * case sensitive, then there is nothing to do.
7221 *
7222 * XXX Come back to this.
7223 */
7224 if (fvp == tvp) {
7225 int pathconf_val;
7226
7227 /*
7228 * Note: if _PC_CASE_SENSITIVE selector isn't supported,
7229 * then assume that this file system is case sensitive.
7230 */
7231 if (VNOP_PATHCONF(fvp, _PC_CASE_SENSITIVE, &pathconf_val, ctx) != 0 ||
7232 pathconf_val != 0) {
7233 goto out1;
7234 }
7235 }
7236
7237 /*
7238 * Allow the renaming of mount points.
7239 * - target must not exist
7240 * - target must reside in the same directory as source
7241 * - union mounts cannot be renamed
7242 * - "/" cannot be renamed
7243 *
7244 * XXX Handle this in VFS after a continued lookup (if we missed
7245 * in the cache to start off)
7246 *
7247 * N.B. If RENAME_SWAP is being used, then @tvp != NULL and so
7248 * we'll skip past here. The file system is responsible for
7249 * checking that @tvp is not a descendent of @fvp and vice versa
7250 * so it should always return EINVAL if either @tvp or @fvp is the
7251 * root of a volume.
7252 */
7253 if ((fvp->v_flag & VROOT) &&
7254 (fvp->v_type == VDIR) &&
7255 (tvp == NULL) &&
7256 (fvp->v_mountedhere == NULL) &&
7257 (fdvp == tdvp) &&
7258 ((fvp->v_mount->mnt_flag & (MNT_UNION | MNT_ROOTFS)) == 0) &&
7259 (fvp->v_mount->mnt_vnodecovered != NULLVP)) {
7260 vnode_t coveredvp;
7261
7262 /* switch fvp to the covered vnode */
7263 coveredvp = fvp->v_mount->mnt_vnodecovered;
7264 if ( (vnode_getwithref(coveredvp)) ) {
7265 error = ENOENT;
7266 goto out1;
7267 }
7268 vnode_put(fvp);
7269
7270 fvp = coveredvp;
7271 mntrename = TRUE;
7272 }
7273 /*
7274 * Check for cross-device rename.
7275 */
7276 if ((fvp->v_mount != tdvp->v_mount) ||
7277 (tvp && (fvp->v_mount != tvp->v_mount))) {
7278 error = EXDEV;
7279 goto out1;
7280 }
7281
7282 /*
7283 * If source is the same as the destination (that is the
7284 * same inode number) then there is nothing to do...
7285 * EXCEPT if the underlying file system supports case
7286 * insensitivity and is case preserving. In this case
7287 * the file system needs to handle the special case of
7288 * getting the same vnode as target (fvp) and source (tvp).
7289 *
7290 * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
7291 * and _PC_CASE_PRESERVING can have this exception, and they need to
7292 * handle the special case of getting the same vnode as target and
7293 * source. NOTE: Then the target is unlocked going into vnop_rename,
7294 * so not to cause locking problems. There is a single reference on tvp.
7295 *
7296 * NOTE - that fvp == tvp also occurs if they are hard linked and
7297 * that correct behaviour then is just to return success without doing
7298 * anything.
7299 *
7300 * XXX filesystem should take care of this itself, perhaps...
7301 */
7302 if (fvp == tvp && fdvp == tdvp) {
7303 if (fromnd->ni_cnd.cn_namelen == tond->ni_cnd.cn_namelen &&
7304 !bcmp(fromnd->ni_cnd.cn_nameptr, tond->ni_cnd.cn_nameptr,
7305 fromnd->ni_cnd.cn_namelen)) {
7306 goto out1;
7307 }
7308 }
7309
7310 if (holding_mntlock && fvp->v_mount != locked_mp) {
7311 /*
7312 * we're holding a reference and lock
7313 * on locked_mp, but it no longer matches
7314 * what we want to do... so drop our hold
7315 */
7316 mount_unlock_renames(locked_mp);
7317 mount_drop(locked_mp, 0);
7318 holding_mntlock = 0;
7319 }
7320 if (tdvp != fdvp && fvp->v_type == VDIR) {
7321 /*
7322 * serialize renames that re-shape
7323 * the tree... if holding_mntlock is
7324 * set, then we're ready to go...
7325 * otherwise we
7326 * first need to drop the iocounts
7327 * we picked up, second take the
7328 * lock to serialize the access,
7329 * then finally start the lookup
7330 * process over with the lock held
7331 */
7332 if (!holding_mntlock) {
7333 /*
7334 * need to grab a reference on
7335 * the mount point before we
7336 * drop all the iocounts... once
7337 * the iocounts are gone, the mount
7338 * could follow
7339 */
7340 locked_mp = fvp->v_mount;
7341 mount_ref(locked_mp, 0);
7342
7343 /*
7344 * nameidone has to happen before we vnode_put(tvp)
7345 * since it may need to release the fs_nodelock on the tvp
7346 */
7347 nameidone(tond);
7348
7349 if (tvp)
7350 vnode_put(tvp);
7351 vnode_put(tdvp);
7352
7353 /*
7354 * nameidone has to happen before we vnode_put(fdvp)
7355 * since it may need to release the fs_nodelock on the fvp
7356 */
7357 nameidone(fromnd);
7358
7359 vnode_put(fvp);
7360 vnode_put(fdvp);
7361
7362 mount_lock_renames(locked_mp);
7363 holding_mntlock = 1;
7364
7365 goto retry;
7366 }
7367 } else {
7368 /*
7369 * when we dropped the iocounts to take
7370 * the lock, we allowed the identity of
7371 * the various vnodes to change... if they did,
7372 * we may no longer be dealing with a rename
7373 * that reshapes the tree... once we're holding
7374 * the iocounts, the vnodes can't change type
7375 * so we're free to drop the lock at this point
7376 * and continue on
7377 */
7378 if (holding_mntlock) {
7379 mount_unlock_renames(locked_mp);
7380 mount_drop(locked_mp, 0);
7381 holding_mntlock = 0;
7382 }
7383 }
7384
7385 // save these off so we can later verify that fvp is the same
7386 oname = fvp->v_name;
7387 oparent = fvp->v_parent;
7388
7389 skipped_lookup:
7390 #if CONFIG_FSE
7391 need_event = need_fsevent(FSE_RENAME, fdvp);
7392 if (need_event) {
7393 if (fvp) {
7394 get_fse_info(fvp, &from_finfo, ctx);
7395 } else {
7396 error = vfs_get_notify_attributes(&__rename_data->fv_attr);
7397 if (error) {
7398 goto out1;
7399 }
7400
7401 fvap = &__rename_data->fv_attr;
7402 }
7403
7404 if (tvp) {
7405 get_fse_info(tvp, &to_finfo, ctx);
7406 } else if (batched) {
7407 error = vfs_get_notify_attributes(&__rename_data->tv_attr);
7408 if (error) {
7409 goto out1;
7410 }
7411
7412 tvap = &__rename_data->tv_attr;
7413 }
7414 }
7415 #else
7416 need_event = 0;
7417 #endif /* CONFIG_FSE */
7418
7419 if (need_event || kauth_authorize_fileop_has_listeners()) {
7420 if (from_name == NULL) {
7421 GET_PATH(from_name);
7422 if (from_name == NULL) {
7423 error = ENOMEM;
7424 goto out1;
7425 }
7426 }
7427
7428 from_len = safe_getpath(fdvp, fromnd->ni_cnd.cn_nameptr, from_name, MAXPATHLEN, &from_truncated);
7429
7430 if (to_name == NULL) {
7431 GET_PATH(to_name);
7432 if (to_name == NULL) {
7433 error = ENOMEM;
7434 goto out1;
7435 }
7436 }
7437
7438 to_len = safe_getpath(tdvp, tond->ni_cnd.cn_nameptr, to_name, MAXPATHLEN, &to_truncated);
7439 }
7440 error = vn_rename(fdvp, &fvp, &fromnd->ni_cnd, fvap,
7441 tdvp, &tvp, &tond->ni_cnd, tvap,
7442 flags, ctx);
7443
7444 if (holding_mntlock) {
7445 /*
7446 * we can drop our serialization
7447 * lock now
7448 */
7449 mount_unlock_renames(locked_mp);
7450 mount_drop(locked_mp, 0);
7451 holding_mntlock = 0;
7452 }
7453 if (error) {
7454 if (error == EKEEPLOOKING) {
7455 if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) == 0) {
7456 if ((tond->ni_flag & NAMEI_CONTLOOKUP) == 0) {
7457 panic("EKEEPLOOKING without NAMEI_CONTLOOKUP on either ndp?");
7458 }
7459 }
7460
7461 fromnd->ni_vp = fvp;
7462 tond->ni_vp = tvp;
7463
7464 goto continue_lookup;
7465 }
7466
7467 /*
7468 * We may encounter a race in the VNOP where the destination didn't
7469 * exist when we did the namei, but it does by the time we go and
7470 * try to create the entry. In this case, we should re-drive this rename
7471 * call from the top again. Currently, only HFS bubbles out ERECYCLE,
7472 * but other filesystems susceptible to this race could return it, too.
7473 */
7474 if (error == ERECYCLE) {
7475 do_retry = 1;
7476 }
7477
7478 /*
7479 * For compound VNOPs, the authorization callback may return
7480 * ENOENT in case of racing hardlink lookups hitting the name
7481 * cache, redrive the lookup.
7482 */
7483 if (batched && error == ENOENT) {
7484 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
7485 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
7486 do_retry = 1;
7487 retry_count += 1;
7488 }
7489 }
7490
7491 goto out1;
7492 }
7493
7494 /* call out to allow 3rd party notification of rename.
7495 * Ignore result of kauth_authorize_fileop call.
7496 */
7497 kauth_authorize_fileop(vfs_context_ucred(ctx),
7498 KAUTH_FILEOP_RENAME,
7499 (uintptr_t)from_name, (uintptr_t)to_name);
7500 if (flags & VFS_RENAME_SWAP) {
7501 kauth_authorize_fileop(vfs_context_ucred(ctx),
7502 KAUTH_FILEOP_RENAME,
7503 (uintptr_t)to_name, (uintptr_t)from_name);
7504 }
7505
7506 #if CONFIG_FSE
7507 if (from_name != NULL && to_name != NULL) {
7508 if (from_truncated || to_truncated) {
7509 // set it here since only the from_finfo gets reported up to user space
7510 from_finfo.mode |= FSE_TRUNCATED_PATH;
7511 }
7512
7513 if (tvap && tvp) {
7514 vnode_get_fse_info_from_vap(tvp, &to_finfo, tvap);
7515 }
7516 if (fvap) {
7517 vnode_get_fse_info_from_vap(fvp, &from_finfo, fvap);
7518 }
7519
7520 if (tvp) {
7521 add_fsevent(FSE_RENAME, ctx,
7522 FSE_ARG_STRING, from_len, from_name,
7523 FSE_ARG_FINFO, &from_finfo,
7524 FSE_ARG_STRING, to_len, to_name,
7525 FSE_ARG_FINFO, &to_finfo,
7526 FSE_ARG_DONE);
7527 if (flags & VFS_RENAME_SWAP) {
7528 /*
7529 * Strictly speaking, swap is the equivalent of
7530 * *three* renames. FSEvents clients should only take
7531 * the events as a hint, so we only bother reporting
7532 * two.
7533 */
7534 add_fsevent(FSE_RENAME, ctx,
7535 FSE_ARG_STRING, to_len, to_name,
7536 FSE_ARG_FINFO, &to_finfo,
7537 FSE_ARG_STRING, from_len, from_name,
7538 FSE_ARG_FINFO, &from_finfo,
7539 FSE_ARG_DONE);
7540 }
7541 } else {
7542 add_fsevent(FSE_RENAME, ctx,
7543 FSE_ARG_STRING, from_len, from_name,
7544 FSE_ARG_FINFO, &from_finfo,
7545 FSE_ARG_STRING, to_len, to_name,
7546 FSE_ARG_DONE);
7547 }
7548 }
7549 #endif /* CONFIG_FSE */
7550
7551 /*
7552 * update filesystem's mount point data
7553 */
7554 if (mntrename) {
7555 char *cp, *pathend, *mpname;
7556 char * tobuf;
7557 struct mount *mp;
7558 int maxlen;
7559 size_t len = 0;
7560
7561 mp = fvp->v_mountedhere;
7562
7563 if (vfs_busy(mp, LK_NOWAIT)) {
7564 error = EBUSY;
7565 goto out1;
7566 }
7567 MALLOC_ZONE(tobuf, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
7568
7569 if (UIO_SEG_IS_USER_SPACE(segflg))
7570 error = copyinstr(to, tobuf, MAXPATHLEN, &len);
7571 else
7572 error = copystr((void *)to, tobuf, MAXPATHLEN, &len);
7573 if (!error) {
7574 /* find current mount point prefix */
7575 pathend = &mp->mnt_vfsstat.f_mntonname[0];
7576 for (cp = pathend; *cp != '\0'; ++cp) {
7577 if (*cp == '/')
7578 pathend = cp + 1;
7579 }
7580 /* find last component of target name */
7581 for (mpname = cp = tobuf; *cp != '\0'; ++cp) {
7582 if (*cp == '/')
7583 mpname = cp + 1;
7584 }
7585 /* append name to prefix */
7586 maxlen = MAXPATHLEN - (pathend - mp->mnt_vfsstat.f_mntonname);
7587 bzero(pathend, maxlen);
7588 strlcpy(pathend, mpname, maxlen);
7589 }
7590 FREE_ZONE(tobuf, MAXPATHLEN, M_NAMEI);
7591
7592 vfs_unbusy(mp);
7593 }
7594 /*
7595 * fix up name & parent pointers. note that we first
7596 * check that fvp has the same name/parent pointers it
7597 * had before the rename call... this is a 'weak' check
7598 * at best...
7599 *
7600 * XXX oparent and oname may not be set in the compound vnop case
7601 */
7602 if (batched || (oname == fvp->v_name && oparent == fvp->v_parent)) {
7603 int update_flags;
7604
7605 update_flags = VNODE_UPDATE_NAME;
7606
7607 if (fdvp != tdvp)
7608 update_flags |= VNODE_UPDATE_PARENT;
7609
7610 vnode_update_identity(fvp, tdvp, tond->ni_cnd.cn_nameptr, tond->ni_cnd.cn_namelen, tond->ni_cnd.cn_hash, update_flags);
7611 }
7612 out1:
7613 if (to_name != NULL) {
7614 RELEASE_PATH(to_name);
7615 to_name = NULL;
7616 }
7617 if (from_name != NULL) {
7618 RELEASE_PATH(from_name);
7619 from_name = NULL;
7620 }
7621 if (holding_mntlock) {
7622 mount_unlock_renames(locked_mp);
7623 mount_drop(locked_mp, 0);
7624 holding_mntlock = 0;
7625 }
7626 if (tdvp) {
7627 /*
7628 * nameidone has to happen before we vnode_put(tdvp)
7629 * since it may need to release the fs_nodelock on the tdvp
7630 */
7631 nameidone(tond);
7632
7633 if (tvp)
7634 vnode_put(tvp);
7635 vnode_put(tdvp);
7636 }
7637 if (fdvp) {
7638 /*
7639 * nameidone has to happen before we vnode_put(fdvp)
7640 * since it may need to release the fs_nodelock on the fdvp
7641 */
7642 nameidone(fromnd);
7643
7644 if (fvp)
7645 vnode_put(fvp);
7646 vnode_put(fdvp);
7647 }
7648
7649 /*
7650 * If things changed after we did the namei, then we will re-drive
7651 * this rename call from the top.
7652 */
7653 if (do_retry) {
7654 do_retry = 0;
7655 goto retry;
7656 }
7657
7658 FREE(__rename_data, M_TEMP);
7659 return (error);
7660 }
7661
7662 int
7663 rename(__unused proc_t p, struct rename_args *uap, __unused int32_t *retval)
7664 {
7665 return (renameat_internal(vfs_context_current(), AT_FDCWD, uap->from,
7666 AT_FDCWD, uap->to, UIO_USERSPACE, 0));
7667 }
7668
7669 int renameatx_np(__unused proc_t p, struct renameatx_np_args *uap, __unused int32_t *retval)
7670 {
7671 return renameat_internal(
7672 vfs_context_current(),
7673 uap->fromfd, uap->from,
7674 uap->tofd, uap->to,
7675 UIO_USERSPACE, uap->flags);
7676 }
7677
7678 int
7679 renameat(__unused proc_t p, struct renameat_args *uap, __unused int32_t *retval)
7680 {
7681 return (renameat_internal(vfs_context_current(), uap->fromfd, uap->from,
7682 uap->tofd, uap->to, UIO_USERSPACE, 0));
7683 }
7684
7685 /*
7686 * Make a directory file.
7687 *
7688 * Returns: 0 Success
7689 * EEXIST
7690 * namei:???
7691 * vnode_authorize:???
7692 * vn_create:???
7693 */
7694 /* ARGSUSED */
7695 static int
7696 mkdir1at(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap, int fd,
7697 enum uio_seg segflg)
7698 {
7699 vnode_t vp, dvp;
7700 int error;
7701 int update_flags = 0;
7702 int batched;
7703 struct nameidata nd;
7704
7705 AUDIT_ARG(mode, vap->va_mode);
7706 NDINIT(&nd, CREATE, OP_MKDIR, LOCKPARENT | AUDITVNPATH1, segflg,
7707 path, ctx);
7708 nd.ni_cnd.cn_flags |= WILLBEDIR;
7709 nd.ni_flag = NAMEI_COMPOUNDMKDIR;
7710
7711 continue_lookup:
7712 error = nameiat(&nd, fd);
7713 if (error)
7714 return (error);
7715 dvp = nd.ni_dvp;
7716 vp = nd.ni_vp;
7717
7718 if (vp != NULL) {
7719 error = EEXIST;
7720 goto out;
7721 }
7722
7723 batched = vnode_compound_mkdir_available(dvp);
7724
7725 VATTR_SET(vap, va_type, VDIR);
7726
7727 /*
7728 * XXX
7729 * Don't authorize in VFS for compound VNOP.... mkdir -p today assumes that it will
7730 * only get EXISTS or EISDIR for existing path components, and not that it could see
7731 * EACCESS/EPERM--so if we authorize for mkdir on "/" for "mkdir -p /tmp/foo/bar/baz"
7732 * it will fail in a spurious manner. Need to figure out if this is valid behavior.
7733 */
7734 if ((error = vn_authorize_mkdir(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0) {
7735 if (error == EACCES || error == EPERM) {
7736 int error2;
7737
7738 nameidone(&nd);
7739 vnode_put(dvp);
7740 dvp = NULLVP;
7741
7742 /*
7743 * Try a lookup without "NAMEI_COMPOUNDVNOP" to make sure we return EEXIST
7744 * rather than EACCESS if the target exists.
7745 */
7746 NDINIT(&nd, LOOKUP, OP_MKDIR, AUDITVNPATH1, segflg,
7747 path, ctx);
7748 error2 = nameiat(&nd, fd);
7749 if (error2) {
7750 goto out;
7751 } else {
7752 vp = nd.ni_vp;
7753 error = EEXIST;
7754 goto out;
7755 }
7756 }
7757
7758 goto out;
7759 }
7760
7761 /*
7762 * make the directory
7763 */
7764 if ((error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx)) != 0) {
7765 if (error == EKEEPLOOKING) {
7766 nd.ni_vp = vp;
7767 goto continue_lookup;
7768 }
7769
7770 goto out;
7771 }
7772
7773 // Make sure the name & parent pointers are hooked up
7774 if (vp->v_name == NULL)
7775 update_flags |= VNODE_UPDATE_NAME;
7776 if (vp->v_parent == NULLVP)
7777 update_flags |= VNODE_UPDATE_PARENT;
7778
7779 if (update_flags)
7780 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
7781
7782 #if CONFIG_FSE
7783 add_fsevent(FSE_CREATE_DIR, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE);
7784 #endif
7785
7786 out:
7787 /*
7788 * nameidone has to happen before we vnode_put(dvp)
7789 * since it may need to release the fs_nodelock on the dvp
7790 */
7791 nameidone(&nd);
7792
7793 if (vp)
7794 vnode_put(vp);
7795 if (dvp)
7796 vnode_put(dvp);
7797
7798 return (error);
7799 }
7800
7801 /*
7802 * mkdir_extended: Create a directory; with extended security (ACL).
7803 *
7804 * Parameters: p Process requesting to create the directory
7805 * uap User argument descriptor (see below)
7806 * retval (ignored)
7807 *
7808 * Indirect: uap->path Path of directory to create
7809 * uap->mode Access permissions to set
7810 * uap->xsecurity ACL to set
7811 *
7812 * Returns: 0 Success
7813 * !0 Not success
7814 *
7815 */
7816 int
7817 mkdir_extended(proc_t p, struct mkdir_extended_args *uap, __unused int32_t *retval)
7818 {
7819 int ciferror;
7820 kauth_filesec_t xsecdst;
7821 struct vnode_attr va;
7822
7823 AUDIT_ARG(owner, uap->uid, uap->gid);
7824
7825 xsecdst = NULL;
7826 if ((uap->xsecurity != USER_ADDR_NULL) &&
7827 ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
7828 return ciferror;
7829
7830 VATTR_INIT(&va);
7831 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
7832 if (xsecdst != NULL)
7833 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
7834
7835 ciferror = mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD,
7836 UIO_USERSPACE);
7837 if (xsecdst != NULL)
7838 kauth_filesec_free(xsecdst);
7839 return ciferror;
7840 }
7841
7842 int
7843 mkdir(proc_t p, struct mkdir_args *uap, __unused int32_t *retval)
7844 {
7845 struct vnode_attr va;
7846
7847 VATTR_INIT(&va);
7848 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
7849
7850 return (mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD,
7851 UIO_USERSPACE));
7852 }
7853
7854 int
7855 mkdirat(proc_t p, struct mkdirat_args *uap, __unused int32_t *retval)
7856 {
7857 struct vnode_attr va;
7858
7859 VATTR_INIT(&va);
7860 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
7861
7862 return(mkdir1at(vfs_context_current(), uap->path, &va, uap->fd,
7863 UIO_USERSPACE));
7864 }
7865
7866 static int
7867 rmdirat_internal(vfs_context_t ctx, int fd, user_addr_t dirpath,
7868 enum uio_seg segflg)
7869 {
7870 vnode_t vp, dvp;
7871 int error;
7872 struct nameidata nd;
7873 char *path = NULL;
7874 int len=0;
7875 int has_listeners = 0;
7876 int need_event = 0;
7877 int truncated = 0;
7878 #if CONFIG_FSE
7879 struct vnode_attr va;
7880 #endif /* CONFIG_FSE */
7881 struct vnode_attr *vap = NULL;
7882 int restart_count = 0;
7883 int batched;
7884
7885 int restart_flag;
7886
7887 /*
7888 * This loop exists to restart rmdir in the unlikely case that two
7889 * processes are simultaneously trying to remove the same directory
7890 * containing orphaned appleDouble files.
7891 */
7892 do {
7893 NDINIT(&nd, DELETE, OP_RMDIR, LOCKPARENT | AUDITVNPATH1,
7894 segflg, dirpath, ctx);
7895 nd.ni_flag = NAMEI_COMPOUNDRMDIR;
7896 continue_lookup:
7897 restart_flag = 0;
7898 vap = NULL;
7899
7900 error = nameiat(&nd, fd);
7901 if (error)
7902 return (error);
7903
7904 dvp = nd.ni_dvp;
7905 vp = nd.ni_vp;
7906
7907 if (vp) {
7908 batched = vnode_compound_rmdir_available(vp);
7909
7910 if (vp->v_flag & VROOT) {
7911 /*
7912 * The root of a mounted filesystem cannot be deleted.
7913 */
7914 error = EBUSY;
7915 goto out;
7916 }
7917
7918 /*
7919 * Removed a check here; we used to abort if vp's vid
7920 * was not the same as what we'd seen the last time around.
7921 * I do not think that check was valid, because if we retry
7922 * and all dirents are gone, the directory could legitimately
7923 * be recycled but still be present in a situation where we would
7924 * have had permission to delete. Therefore, we won't make
7925 * an effort to preserve that check now that we may not have a
7926 * vp here.
7927 */
7928
7929 if (!batched) {
7930 error = vn_authorize_rmdir(dvp, vp, &nd.ni_cnd, ctx, NULL);
7931 if (error) {
7932 if (error == ENOENT) {
7933 assert(restart_count < MAX_AUTHORIZE_ENOENT_RETRIES);
7934 if (restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
7935 restart_flag = 1;
7936 restart_count += 1;
7937 }
7938 }
7939 goto out;
7940 }
7941 }
7942 } else {
7943 batched = 1;
7944
7945 if (!vnode_compound_rmdir_available(dvp)) {
7946 panic("No error, but no compound rmdir?");
7947 }
7948 }
7949
7950 #if CONFIG_FSE
7951 fse_info finfo;
7952
7953 need_event = need_fsevent(FSE_DELETE, dvp);
7954 if (need_event) {
7955 if (!batched) {
7956 get_fse_info(vp, &finfo, ctx);
7957 } else {
7958 error = vfs_get_notify_attributes(&va);
7959 if (error) {
7960 goto out;
7961 }
7962
7963 vap = &va;
7964 }
7965 }
7966 #endif
7967 has_listeners = kauth_authorize_fileop_has_listeners();
7968 if (need_event || has_listeners) {
7969 if (path == NULL) {
7970 GET_PATH(path);
7971 if (path == NULL) {
7972 error = ENOMEM;
7973 goto out;
7974 }
7975 }
7976
7977 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated);
7978 #if CONFIG_FSE
7979 if (truncated) {
7980 finfo.mode |= FSE_TRUNCATED_PATH;
7981 }
7982 #endif
7983 }
7984
7985 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
7986 nd.ni_vp = vp;
7987 if (vp == NULLVP) {
7988 /* Couldn't find a vnode */
7989 goto out;
7990 }
7991
7992 if (error == EKEEPLOOKING) {
7993 goto continue_lookup;
7994 } else if (batched && error == ENOENT) {
7995 assert(restart_count < MAX_AUTHORIZE_ENOENT_RETRIES);
7996 if (restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
7997 /*
7998 * For compound VNOPs, the authorization callback
7999 * may return ENOENT in case of racing hard link lookups
8000 * redrive the lookup.
8001 */
8002 restart_flag = 1;
8003 restart_count += 1;
8004 goto out;
8005 }
8006 }
8007 #if CONFIG_APPLEDOUBLE
8008 /*
8009 * Special case to remove orphaned AppleDouble
8010 * files. I don't like putting this in the kernel,
8011 * but carbon does not like putting this in carbon either,
8012 * so here we are.
8013 */
8014 if (error == ENOTEMPTY) {
8015 error = rmdir_remove_orphaned_appleDouble(vp, ctx, &restart_flag);
8016 if (error == EBUSY) {
8017 goto out;
8018 }
8019
8020
8021 /*
8022 * Assuming everything went well, we will try the RMDIR again
8023 */
8024 if (!error)
8025 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
8026 }
8027 #endif /* CONFIG_APPLEDOUBLE */
8028 /*
8029 * Call out to allow 3rd party notification of delete.
8030 * Ignore result of kauth_authorize_fileop call.
8031 */
8032 if (!error) {
8033 if (has_listeners) {
8034 kauth_authorize_fileop(vfs_context_ucred(ctx),
8035 KAUTH_FILEOP_DELETE,
8036 (uintptr_t)vp,
8037 (uintptr_t)path);
8038 }
8039
8040 if (vp->v_flag & VISHARDLINK) {
8041 // see the comment in unlink1() about why we update
8042 // the parent of a hard link when it is removed
8043 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
8044 }
8045
8046 #if CONFIG_FSE
8047 if (need_event) {
8048 if (vap) {
8049 vnode_get_fse_info_from_vap(vp, &finfo, vap);
8050 }
8051 add_fsevent(FSE_DELETE, ctx,
8052 FSE_ARG_STRING, len, path,
8053 FSE_ARG_FINFO, &finfo,
8054 FSE_ARG_DONE);
8055 }
8056 #endif
8057 }
8058
8059 out:
8060 if (path != NULL) {
8061 RELEASE_PATH(path);
8062 path = NULL;
8063 }
8064 /*
8065 * nameidone has to happen before we vnode_put(dvp)
8066 * since it may need to release the fs_nodelock on the dvp
8067 */
8068 nameidone(&nd);
8069 vnode_put(dvp);
8070
8071 if (vp)
8072 vnode_put(vp);
8073
8074 if (restart_flag == 0) {
8075 wakeup_one((caddr_t)vp);
8076 return (error);
8077 }
8078 tsleep(vp, PVFS, "rm AD", 1);
8079
8080 } while (restart_flag != 0);
8081
8082 return (error);
8083
8084 }
8085
8086 /*
8087 * Remove a directory file.
8088 */
8089 /* ARGSUSED */
8090 int
8091 rmdir(__unused proc_t p, struct rmdir_args *uap, __unused int32_t *retval)
8092 {
8093 return (rmdirat_internal(vfs_context_current(), AT_FDCWD,
8094 CAST_USER_ADDR_T(uap->path), UIO_USERSPACE));
8095 }
8096
8097 /* Get direntry length padded to 8 byte alignment */
8098 #define DIRENT64_LEN(namlen) \
8099 ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
8100
8101 errno_t
8102 vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag,
8103 int *numdirent, vfs_context_t ctxp)
8104 {
8105 /* Check if fs natively supports VNODE_READDIR_EXTENDED */
8106 if ((vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED) &&
8107 ((vp->v_mount->mnt_kern_flag & MNTK_DENY_READDIREXT) == 0)) {
8108 return VNOP_READDIR(vp, uio, flags, eofflag, numdirent, ctxp);
8109 } else {
8110 size_t bufsize;
8111 void * bufptr;
8112 uio_t auio;
8113 struct direntry *entry64;
8114 struct dirent *dep;
8115 int bytesread;
8116 int error;
8117
8118 /*
8119 * Our kernel buffer needs to be smaller since re-packing
8120 * will expand each dirent. The worse case (when the name
8121 * length is 3) corresponds to a struct direntry size of 32
8122 * bytes (8-byte aligned) and a struct dirent size of 12 bytes
8123 * (4-byte aligned). So having a buffer that is 3/8 the size
8124 * will prevent us from reading more than we can pack.
8125 *
8126 * Since this buffer is wired memory, we will limit the
8127 * buffer size to a maximum of 32K. We would really like to
8128 * use 32K in the MIN(), but we use magic number 87371 to
8129 * prevent uio_resid() * 3 / 8 from overflowing.
8130 */
8131 bufsize = 3 * MIN((user_size_t)uio_resid(uio), 87371u) / 8;
8132 MALLOC(bufptr, void *, bufsize, M_TEMP, M_WAITOK);
8133 if (bufptr == NULL) {
8134 return ENOMEM;
8135 }
8136
8137 auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
8138 uio_addiov(auio, (uintptr_t)bufptr, bufsize);
8139 auio->uio_offset = uio->uio_offset;
8140
8141 error = VNOP_READDIR(vp, auio, 0, eofflag, numdirent, ctxp);
8142
8143 dep = (struct dirent *)bufptr;
8144 bytesread = bufsize - uio_resid(auio);
8145
8146 MALLOC(entry64, struct direntry *, sizeof(struct direntry),
8147 M_TEMP, M_WAITOK);
8148 /*
8149 * Convert all the entries and copy them out to user's buffer.
8150 */
8151 while (error == 0 && (char *)dep < ((char *)bufptr + bytesread)) {
8152 size_t enbufsize = DIRENT64_LEN(dep->d_namlen);
8153
8154 bzero(entry64, enbufsize);
8155 /* Convert a dirent to a dirent64. */
8156 entry64->d_ino = dep->d_ino;
8157 entry64->d_seekoff = 0;
8158 entry64->d_reclen = enbufsize;
8159 entry64->d_namlen = dep->d_namlen;
8160 entry64->d_type = dep->d_type;
8161 bcopy(dep->d_name, entry64->d_name, dep->d_namlen + 1);
8162
8163 /* Move to next entry. */
8164 dep = (struct dirent *)((char *)dep + dep->d_reclen);
8165
8166 /* Copy entry64 to user's buffer. */
8167 error = uiomove((caddr_t)entry64, entry64->d_reclen, uio);
8168 }
8169
8170 /* Update the real offset using the offset we got from VNOP_READDIR. */
8171 if (error == 0) {
8172 uio->uio_offset = auio->uio_offset;
8173 }
8174 uio_free(auio);
8175 FREE(bufptr, M_TEMP);
8176 FREE(entry64, M_TEMP);
8177 return (error);
8178 }
8179 }
8180
8181 #define GETDIRENTRIES_MAXBUFSIZE (128 * 1024 * 1024U)
8182
8183 /*
8184 * Read a block of directory entries in a file system independent format.
8185 */
8186 static int
8187 getdirentries_common(int fd, user_addr_t bufp, user_size_t bufsize, ssize_t *bytesread,
8188 off_t *offset, int flags)
8189 {
8190 vnode_t vp;
8191 struct vfs_context context = *vfs_context_current(); /* local copy */
8192 struct fileproc *fp;
8193 uio_t auio;
8194 int spacetype = proc_is64bit(vfs_context_proc(&context)) ? UIO_USERSPACE64 : UIO_USERSPACE32;
8195 off_t loff;
8196 int error, eofflag, numdirent;
8197 char uio_buf[ UIO_SIZEOF(1) ];
8198
8199 error = fp_getfvp(vfs_context_proc(&context), fd, &fp, &vp);
8200 if (error) {
8201 return (error);
8202 }
8203 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
8204 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
8205 error = EBADF;
8206 goto out;
8207 }
8208
8209 if (bufsize > GETDIRENTRIES_MAXBUFSIZE)
8210 bufsize = GETDIRENTRIES_MAXBUFSIZE;
8211
8212 #if CONFIG_MACF
8213 error = mac_file_check_change_offset(vfs_context_ucred(&context), fp->f_fglob);
8214 if (error)
8215 goto out;
8216 #endif
8217 if ( (error = vnode_getwithref(vp)) ) {
8218 goto out;
8219 }
8220 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
8221
8222 unionread:
8223 if (vp->v_type != VDIR) {
8224 (void)vnode_put(vp);
8225 error = EINVAL;
8226 goto out;
8227 }
8228
8229 #if CONFIG_MACF
8230 error = mac_vnode_check_readdir(&context, vp);
8231 if (error != 0) {
8232 (void)vnode_put(vp);
8233 goto out;
8234 }
8235 #endif /* MAC */
8236
8237 loff = fp->f_fglob->fg_offset;
8238 auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
8239 uio_addiov(auio, bufp, bufsize);
8240
8241 if (flags & VNODE_READDIR_EXTENDED) {
8242 error = vnode_readdir64(vp, auio, flags, &eofflag, &numdirent, &context);
8243 fp->f_fglob->fg_offset = uio_offset(auio);
8244 } else {
8245 error = VNOP_READDIR(vp, auio, 0, &eofflag, &numdirent, &context);
8246 fp->f_fglob->fg_offset = uio_offset(auio);
8247 }
8248 if (error) {
8249 (void)vnode_put(vp);
8250 goto out;
8251 }
8252
8253 if ((user_ssize_t)bufsize == uio_resid(auio)){
8254 if (union_dircheckp) {
8255 error = union_dircheckp(&vp, fp, &context);
8256 if (error == -1)
8257 goto unionread;
8258 if (error) {
8259 (void)vnode_put(vp);
8260 goto out;
8261 }
8262 }
8263
8264 if ((vp->v_mount->mnt_flag & MNT_UNION)) {
8265 struct vnode *tvp = vp;
8266 if (lookup_traverse_union(tvp, &vp, &context) == 0) {
8267 vnode_ref(vp);
8268 fp->f_fglob->fg_data = (caddr_t) vp;
8269 fp->f_fglob->fg_offset = 0;
8270 vnode_rele(tvp);
8271 vnode_put(tvp);
8272 goto unionread;
8273 }
8274 vp = tvp;
8275 }
8276 }
8277
8278 vnode_put(vp);
8279 if (offset) {
8280 *offset = loff;
8281 }
8282
8283 *bytesread = bufsize - uio_resid(auio);
8284 out:
8285 file_drop(fd);
8286 return (error);
8287 }
8288
8289
8290 int
8291 getdirentries(__unused struct proc *p, struct getdirentries_args *uap, int32_t *retval)
8292 {
8293 off_t offset;
8294 ssize_t bytesread;
8295 int error;
8296
8297 AUDIT_ARG(fd, uap->fd);
8298 error = getdirentries_common(uap->fd, uap->buf, uap->count, &bytesread, &offset, 0);
8299
8300 if (error == 0) {
8301 if (proc_is64bit(p)) {
8302 user64_long_t base = (user64_long_t)offset;
8303 error = copyout((caddr_t)&base, uap->basep, sizeof(user64_long_t));
8304 } else {
8305 user32_long_t base = (user32_long_t)offset;
8306 error = copyout((caddr_t)&base, uap->basep, sizeof(user32_long_t));
8307 }
8308 *retval = bytesread;
8309 }
8310 return (error);
8311 }
8312
8313 int
8314 getdirentries64(__unused struct proc *p, struct getdirentries64_args *uap, user_ssize_t *retval)
8315 {
8316 off_t offset;
8317 ssize_t bytesread;
8318 int error;
8319
8320 AUDIT_ARG(fd, uap->fd);
8321 error = getdirentries_common(uap->fd, uap->buf, uap->bufsize, &bytesread, &offset, VNODE_READDIR_EXTENDED);
8322
8323 if (error == 0) {
8324 *retval = bytesread;
8325 error = copyout((caddr_t)&offset, uap->position, sizeof(off_t));
8326 }
8327 return (error);
8328 }
8329
8330
8331 /*
8332 * Set the mode mask for creation of filesystem nodes.
8333 * XXX implement xsecurity
8334 */
8335 #define UMASK_NOXSECURITY (void *)1 /* leave existing xsecurity alone */
8336 static int
8337 umask1(proc_t p, int newmask, __unused kauth_filesec_t fsec, int32_t *retval)
8338 {
8339 struct filedesc *fdp;
8340
8341 AUDIT_ARG(mask, newmask);
8342 proc_fdlock(p);
8343 fdp = p->p_fd;
8344 *retval = fdp->fd_cmask;
8345 fdp->fd_cmask = newmask & ALLPERMS;
8346 proc_fdunlock(p);
8347 return (0);
8348 }
8349
8350 /*
8351 * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL).
8352 *
8353 * Parameters: p Process requesting to set the umask
8354 * uap User argument descriptor (see below)
8355 * retval umask of the process (parameter p)
8356 *
8357 * Indirect: uap->newmask umask to set
8358 * uap->xsecurity ACL to set
8359 *
8360 * Returns: 0 Success
8361 * !0 Not success
8362 *
8363 */
8364 int
8365 umask_extended(proc_t p, struct umask_extended_args *uap, int32_t *retval)
8366 {
8367 int ciferror;
8368 kauth_filesec_t xsecdst;
8369
8370 xsecdst = KAUTH_FILESEC_NONE;
8371 if (uap->xsecurity != USER_ADDR_NULL) {
8372 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
8373 return ciferror;
8374 } else {
8375 xsecdst = KAUTH_FILESEC_NONE;
8376 }
8377
8378 ciferror = umask1(p, uap->newmask, xsecdst, retval);
8379
8380 if (xsecdst != KAUTH_FILESEC_NONE)
8381 kauth_filesec_free(xsecdst);
8382 return ciferror;
8383 }
8384
8385 int
8386 umask(proc_t p, struct umask_args *uap, int32_t *retval)
8387 {
8388 return(umask1(p, uap->newmask, UMASK_NOXSECURITY, retval));
8389 }
8390
8391 /*
8392 * Void all references to file by ripping underlying filesystem
8393 * away from vnode.
8394 */
8395 /* ARGSUSED */
8396 int
8397 revoke(proc_t p, struct revoke_args *uap, __unused int32_t *retval)
8398 {
8399 vnode_t vp;
8400 struct vnode_attr va;
8401 vfs_context_t ctx = vfs_context_current();
8402 int error;
8403 struct nameidata nd;
8404
8405 NDINIT(&nd, LOOKUP, OP_REVOKE, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
8406 uap->path, ctx);
8407 error = namei(&nd);
8408 if (error)
8409 return (error);
8410 vp = nd.ni_vp;
8411
8412 nameidone(&nd);
8413
8414 if (!(vnode_ischr(vp) || vnode_isblk(vp))) {
8415 error = ENOTSUP;
8416 goto out;
8417 }
8418
8419 if (vnode_isblk(vp) && vnode_ismountedon(vp)) {
8420 error = EBUSY;
8421 goto out;
8422 }
8423
8424 #if CONFIG_MACF
8425 error = mac_vnode_check_revoke(ctx, vp);
8426 if (error)
8427 goto out;
8428 #endif
8429
8430 VATTR_INIT(&va);
8431 VATTR_WANTED(&va, va_uid);
8432 if ((error = vnode_getattr(vp, &va, ctx)))
8433 goto out;
8434 if (kauth_cred_getuid(vfs_context_ucred(ctx)) != va.va_uid &&
8435 (error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
8436 goto out;
8437 if (vp->v_usecount > 0 || (vnode_isaliased(vp)))
8438 VNOP_REVOKE(vp, REVOKEALL, ctx);
8439 out:
8440 vnode_put(vp);
8441 return (error);
8442 }
8443
8444
8445 /*
8446 * HFS/HFS PlUS SPECIFIC SYSTEM CALLS
8447 * The following system calls are designed to support features
8448 * which are specific to the HFS & HFS Plus volume formats
8449 */
8450
8451
8452 /*
8453 * Obtain attribute information on objects in a directory while enumerating
8454 * the directory.
8455 */
8456 /* ARGSUSED */
8457 int
8458 getdirentriesattr (proc_t p, struct getdirentriesattr_args *uap, int32_t *retval)
8459 {
8460 vnode_t vp;
8461 struct fileproc *fp;
8462 uio_t auio = NULL;
8463 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
8464 uint32_t count, savecount;
8465 uint32_t newstate;
8466 int error, eofflag;
8467 uint32_t loff;
8468 struct attrlist attributelist;
8469 vfs_context_t ctx = vfs_context_current();
8470 int fd = uap->fd;
8471 char uio_buf[ UIO_SIZEOF(1) ];
8472 kauth_action_t action;
8473
8474 AUDIT_ARG(fd, fd);
8475
8476 /* Get the attributes into kernel space */
8477 if ((error = copyin(uap->alist, (caddr_t)&attributelist, sizeof(attributelist)))) {
8478 return(error);
8479 }
8480 if ((error = copyin(uap->count, (caddr_t)&count, sizeof(count)))) {
8481 return(error);
8482 }
8483 savecount = count;
8484 if ( (error = fp_getfvp(p, fd, &fp, &vp)) ) {
8485 return (error);
8486 }
8487 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
8488 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
8489 error = EBADF;
8490 goto out;
8491 }
8492
8493
8494 #if CONFIG_MACF
8495 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
8496 fp->f_fglob);
8497 if (error)
8498 goto out;
8499 #endif
8500
8501
8502 if ( (error = vnode_getwithref(vp)) )
8503 goto out;
8504
8505 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
8506
8507 unionread:
8508 if (vp->v_type != VDIR) {
8509 (void)vnode_put(vp);
8510 error = EINVAL;
8511 goto out;
8512 }
8513
8514 #if CONFIG_MACF
8515 error = mac_vnode_check_readdir(ctx, vp);
8516 if (error != 0) {
8517 (void)vnode_put(vp);
8518 goto out;
8519 }
8520 #endif /* MAC */
8521
8522 /* set up the uio structure which will contain the users return buffer */
8523 loff = fp->f_fglob->fg_offset;
8524 auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
8525 uio_addiov(auio, uap->buffer, uap->buffersize);
8526
8527 /*
8528 * If the only item requested is file names, we can let that past with
8529 * just LIST_DIRECTORY. If they want any other attributes, that means
8530 * they need SEARCH as well.
8531 */
8532 action = KAUTH_VNODE_LIST_DIRECTORY;
8533 if ((attributelist.commonattr & ~ATTR_CMN_NAME) ||
8534 attributelist.fileattr || attributelist.dirattr)
8535 action |= KAUTH_VNODE_SEARCH;
8536
8537 if ((error = vnode_authorize(vp, NULL, action, ctx)) == 0) {
8538
8539 /* Believe it or not, uap->options only has 32-bits of valid
8540 * info, so truncate before extending again */
8541
8542 error = VNOP_READDIRATTR(vp, &attributelist, auio, count,
8543 (u_long)(uint32_t)uap->options, &newstate, &eofflag, &count, ctx);
8544 }
8545
8546 if (error) {
8547 (void) vnode_put(vp);
8548 goto out;
8549 }
8550
8551 /*
8552 * If we've got the last entry of a directory in a union mount
8553 * then reset the eofflag and pretend there's still more to come.
8554 * The next call will again set eofflag and the buffer will be empty,
8555 * so traverse to the underlying directory and do the directory
8556 * read there.
8557 */
8558 if (eofflag && vp->v_mount->mnt_flag & MNT_UNION) {
8559 if (uio_resid(auio) < (user_ssize_t) uap->buffersize) { // Got some entries
8560 eofflag = 0;
8561 } else { // Empty buffer
8562 struct vnode *tvp = vp;
8563 if (lookup_traverse_union(tvp, &vp, ctx) == 0) {
8564 vnode_ref_ext(vp, fp->f_fglob->fg_flag & O_EVTONLY, 0);
8565 fp->f_fglob->fg_data = (caddr_t) vp;
8566 fp->f_fglob->fg_offset = 0; // reset index for new dir
8567 count = savecount;
8568 vnode_rele_internal(tvp, fp->f_fglob->fg_flag & O_EVTONLY, 0, 0);
8569 vnode_put(tvp);
8570 goto unionread;
8571 }
8572 vp = tvp;
8573 }
8574 }
8575
8576 (void)vnode_put(vp);
8577
8578 if (error)
8579 goto out;
8580 fp->f_fglob->fg_offset = uio_offset(auio); /* should be multiple of dirent, not variable */
8581
8582 if ((error = copyout((caddr_t) &count, uap->count, sizeof(count))))
8583 goto out;
8584 if ((error = copyout((caddr_t) &newstate, uap->newstate, sizeof(newstate))))
8585 goto out;
8586 if ((error = copyout((caddr_t) &loff, uap->basep, sizeof(loff))))
8587 goto out;
8588
8589 *retval = eofflag; /* similar to getdirentries */
8590 error = 0;
8591 out:
8592 file_drop(fd);
8593 return (error); /* return error earlier, an retval of 0 or 1 now */
8594
8595 } /* end of getdirentriesattr system call */
8596
8597 /*
8598 * Exchange data between two files
8599 */
8600
8601 /* ARGSUSED */
8602 int
8603 exchangedata (__unused proc_t p, struct exchangedata_args *uap, __unused int32_t *retval)
8604 {
8605
8606 struct nameidata fnd, snd;
8607 vfs_context_t ctx = vfs_context_current();
8608 vnode_t fvp;
8609 vnode_t svp;
8610 int error;
8611 u_int32_t nameiflags;
8612 char *fpath = NULL;
8613 char *spath = NULL;
8614 int flen=0, slen=0;
8615 int from_truncated=0, to_truncated=0;
8616 #if CONFIG_FSE
8617 fse_info f_finfo, s_finfo;
8618 #endif
8619
8620 nameiflags = 0;
8621 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
8622
8623 NDINIT(&fnd, LOOKUP, OP_EXCHANGEDATA, nameiflags | AUDITVNPATH1,
8624 UIO_USERSPACE, uap->path1, ctx);
8625
8626 error = namei(&fnd);
8627 if (error)
8628 goto out2;
8629
8630 nameidone(&fnd);
8631 fvp = fnd.ni_vp;
8632
8633 NDINIT(&snd, LOOKUP, OP_EXCHANGEDATA, CN_NBMOUNTLOOK | nameiflags | AUDITVNPATH2,
8634 UIO_USERSPACE, uap->path2, ctx);
8635
8636 error = namei(&snd);
8637 if (error) {
8638 vnode_put(fvp);
8639 goto out2;
8640 }
8641 nameidone(&snd);
8642 svp = snd.ni_vp;
8643
8644 /*
8645 * if the files are the same, return an inval error
8646 */
8647 if (svp == fvp) {
8648 error = EINVAL;
8649 goto out;
8650 }
8651
8652 /*
8653 * if the files are on different volumes, return an error
8654 */
8655 if (svp->v_mount != fvp->v_mount) {
8656 error = EXDEV;
8657 goto out;
8658 }
8659
8660 /* If they're not files, return an error */
8661 if ( (vnode_isreg(fvp) == 0) || (vnode_isreg(svp) == 0)) {
8662 error = EINVAL;
8663 goto out;
8664 }
8665
8666 #if CONFIG_MACF
8667 error = mac_vnode_check_exchangedata(ctx,
8668 fvp, svp);
8669 if (error)
8670 goto out;
8671 #endif
8672 if (((error = vnode_authorize(fvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0) ||
8673 ((error = vnode_authorize(svp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0))
8674 goto out;
8675
8676 if (
8677 #if CONFIG_FSE
8678 need_fsevent(FSE_EXCHANGE, fvp) ||
8679 #endif
8680 kauth_authorize_fileop_has_listeners()) {
8681 GET_PATH(fpath);
8682 GET_PATH(spath);
8683 if (fpath == NULL || spath == NULL) {
8684 error = ENOMEM;
8685 goto out;
8686 }
8687
8688 flen = safe_getpath(fvp, NULL, fpath, MAXPATHLEN, &from_truncated);
8689 slen = safe_getpath(svp, NULL, spath, MAXPATHLEN, &to_truncated);
8690
8691 #if CONFIG_FSE
8692 get_fse_info(fvp, &f_finfo, ctx);
8693 get_fse_info(svp, &s_finfo, ctx);
8694 if (from_truncated || to_truncated) {
8695 // set it here since only the f_finfo gets reported up to user space
8696 f_finfo.mode |= FSE_TRUNCATED_PATH;
8697 }
8698 #endif
8699 }
8700 /* Ok, make the call */
8701 error = VNOP_EXCHANGE(fvp, svp, 0, ctx);
8702
8703 if (error == 0) {
8704 const char *tmpname;
8705
8706 if (fpath != NULL && spath != NULL) {
8707 /* call out to allow 3rd party notification of exchangedata.
8708 * Ignore result of kauth_authorize_fileop call.
8709 */
8710 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_EXCHANGE,
8711 (uintptr_t)fpath, (uintptr_t)spath);
8712 }
8713 name_cache_lock();
8714
8715 tmpname = fvp->v_name;
8716 fvp->v_name = svp->v_name;
8717 svp->v_name = tmpname;
8718
8719 if (fvp->v_parent != svp->v_parent) {
8720 vnode_t tmp;
8721
8722 tmp = fvp->v_parent;
8723 fvp->v_parent = svp->v_parent;
8724 svp->v_parent = tmp;
8725 }
8726 name_cache_unlock();
8727
8728 #if CONFIG_FSE
8729 if (fpath != NULL && spath != NULL) {
8730 add_fsevent(FSE_EXCHANGE, ctx,
8731 FSE_ARG_STRING, flen, fpath,
8732 FSE_ARG_FINFO, &f_finfo,
8733 FSE_ARG_STRING, slen, spath,
8734 FSE_ARG_FINFO, &s_finfo,
8735 FSE_ARG_DONE);
8736 }
8737 #endif
8738 }
8739
8740 out:
8741 if (fpath != NULL)
8742 RELEASE_PATH(fpath);
8743 if (spath != NULL)
8744 RELEASE_PATH(spath);
8745 vnode_put(svp);
8746 vnode_put(fvp);
8747 out2:
8748 return (error);
8749 }
8750
8751 /*
8752 * Return (in MB) the amount of freespace on the given vnode's volume.
8753 */
8754 uint32_t freespace_mb(vnode_t vp);
8755
8756 uint32_t
8757 freespace_mb(vnode_t vp)
8758 {
8759 vfs_update_vfsstat(vp->v_mount, vfs_context_current(), VFS_USER_EVENT);
8760 return (((uint64_t)vp->v_mount->mnt_vfsstat.f_bavail *
8761 vp->v_mount->mnt_vfsstat.f_bsize) >> 20);
8762 }
8763
8764 #if CONFIG_SEARCHFS
8765
8766 /* ARGSUSED */
8767
8768 int
8769 searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval)
8770 {
8771 vnode_t vp, tvp;
8772 int i, error=0;
8773 int fserror = 0;
8774 struct nameidata nd;
8775 struct user64_fssearchblock searchblock;
8776 struct searchstate *state;
8777 struct attrlist *returnattrs;
8778 struct timeval timelimit;
8779 void *searchparams1,*searchparams2;
8780 uio_t auio = NULL;
8781 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
8782 uint32_t nummatches;
8783 int mallocsize;
8784 uint32_t nameiflags;
8785 vfs_context_t ctx = vfs_context_current();
8786 char uio_buf[ UIO_SIZEOF(1) ];
8787
8788 /* Start by copying in fsearchblock parameter list */
8789 if (IS_64BIT_PROCESS(p)) {
8790 error = copyin(uap->searchblock, (caddr_t) &searchblock, sizeof(searchblock));
8791 timelimit.tv_sec = searchblock.timelimit.tv_sec;
8792 timelimit.tv_usec = searchblock.timelimit.tv_usec;
8793 }
8794 else {
8795 struct user32_fssearchblock tmp_searchblock;
8796
8797 error = copyin(uap->searchblock, (caddr_t) &tmp_searchblock, sizeof(tmp_searchblock));
8798 // munge into 64-bit version
8799 searchblock.returnattrs = CAST_USER_ADDR_T(tmp_searchblock.returnattrs);
8800 searchblock.returnbuffer = CAST_USER_ADDR_T(tmp_searchblock.returnbuffer);
8801 searchblock.returnbuffersize = tmp_searchblock.returnbuffersize;
8802 searchblock.maxmatches = tmp_searchblock.maxmatches;
8803 /*
8804 * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary
8805 * from a 32 bit long, and tv_usec is already a signed 32 bit int.
8806 */
8807 timelimit.tv_sec = (__darwin_time_t) tmp_searchblock.timelimit.tv_sec;
8808 timelimit.tv_usec = (__darwin_useconds_t) tmp_searchblock.timelimit.tv_usec;
8809 searchblock.searchparams1 = CAST_USER_ADDR_T(tmp_searchblock.searchparams1);
8810 searchblock.sizeofsearchparams1 = tmp_searchblock.sizeofsearchparams1;
8811 searchblock.searchparams2 = CAST_USER_ADDR_T(tmp_searchblock.searchparams2);
8812 searchblock.sizeofsearchparams2 = tmp_searchblock.sizeofsearchparams2;
8813 searchblock.searchattrs = tmp_searchblock.searchattrs;
8814 }
8815 if (error)
8816 return(error);
8817
8818 /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
8819 */
8820 if (searchblock.sizeofsearchparams1 > SEARCHFS_MAX_SEARCHPARMS ||
8821 searchblock.sizeofsearchparams2 > SEARCHFS_MAX_SEARCHPARMS)
8822 return(EINVAL);
8823
8824 /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
8825 /* It all has to do into local memory and it's not that big so we might as well put it all together. */
8826 /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
8827 /* block. */
8828 /* */
8829 /* NOTE: we allocate an extra 8 bytes to account for the difference in size of the searchstate */
8830 /* due to the changes in rdar://problem/12438273. That way if a 3rd party file system */
8831 /* assumes the size is still 556 bytes it will continue to work */
8832
8833 mallocsize = searchblock.sizeofsearchparams1 + searchblock.sizeofsearchparams2 +
8834 sizeof(struct attrlist) + sizeof(struct searchstate) + (2*sizeof(uint32_t));
8835
8836 MALLOC(searchparams1, void *, mallocsize, M_TEMP, M_WAITOK);
8837
8838 /* Now set up the various pointers to the correct place in our newly allocated memory */
8839
8840 searchparams2 = (void *) (((caddr_t) searchparams1) + searchblock.sizeofsearchparams1);
8841 returnattrs = (struct attrlist *) (((caddr_t) searchparams2) + searchblock.sizeofsearchparams2);
8842 state = (struct searchstate *) (((caddr_t) returnattrs) + sizeof (struct attrlist));
8843
8844 /* Now copy in the stuff given our local variables. */
8845
8846 if ((error = copyin(searchblock.searchparams1, searchparams1, searchblock.sizeofsearchparams1)))
8847 goto freeandexit;
8848
8849 if ((error = copyin(searchblock.searchparams2, searchparams2, searchblock.sizeofsearchparams2)))
8850 goto freeandexit;
8851
8852 if ((error = copyin(searchblock.returnattrs, (caddr_t) returnattrs, sizeof(struct attrlist))))
8853 goto freeandexit;
8854
8855 if ((error = copyin(uap->state, (caddr_t) state, sizeof(struct searchstate))))
8856 goto freeandexit;
8857
8858 /*
8859 * When searching a union mount, need to set the
8860 * start flag at the first call on each layer to
8861 * reset state for the new volume.
8862 */
8863 if (uap->options & SRCHFS_START)
8864 state->ss_union_layer = 0;
8865 else
8866 uap->options |= state->ss_union_flags;
8867 state->ss_union_flags = 0;
8868
8869 /*
8870 * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter,
8871 * which is passed in with an attrreference_t, we need to inspect the buffer manually here.
8872 * The KPI does not provide us the ability to pass in the length of the buffers searchparams1
8873 * and searchparams2. To obviate the need for all searchfs-supporting filesystems to
8874 * validate the user-supplied data offset of the attrreference_t, we'll do it here.
8875 */
8876
8877 if (searchblock.searchattrs.commonattr & ATTR_CMN_NAME) {
8878 attrreference_t* string_ref;
8879 u_int32_t* start_length;
8880 user64_size_t param_length;
8881
8882 /* validate searchparams1 */
8883 param_length = searchblock.sizeofsearchparams1;
8884 /* skip the word that specifies length of the buffer */
8885 start_length= (u_int32_t*) searchparams1;
8886 start_length= start_length+1;
8887 string_ref= (attrreference_t*) start_length;
8888
8889 /* ensure no negative offsets or too big offsets */
8890 if (string_ref->attr_dataoffset < 0 ) {
8891 error = EINVAL;
8892 goto freeandexit;
8893 }
8894 if (string_ref->attr_length > MAXPATHLEN) {
8895 error = EINVAL;
8896 goto freeandexit;
8897 }
8898
8899 /* Check for pointer overflow in the string ref */
8900 if (((char*) string_ref + string_ref->attr_dataoffset) < (char*) string_ref) {
8901 error = EINVAL;
8902 goto freeandexit;
8903 }
8904
8905 if (((char*) string_ref + string_ref->attr_dataoffset) > ((char*)searchparams1 + param_length)) {
8906 error = EINVAL;
8907 goto freeandexit;
8908 }
8909 if (((char*)string_ref + string_ref->attr_dataoffset + string_ref->attr_length) > ((char*)searchparams1 + param_length)) {
8910 error = EINVAL;
8911 goto freeandexit;
8912 }
8913 }
8914
8915 /* set up the uio structure which will contain the users return buffer */
8916 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
8917 uio_addiov(auio, searchblock.returnbuffer, searchblock.returnbuffersize);
8918
8919 nameiflags = 0;
8920 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
8921 NDINIT(&nd, LOOKUP, OP_SEARCHFS, nameiflags | AUDITVNPATH1,
8922 UIO_USERSPACE, uap->path, ctx);
8923
8924 error = namei(&nd);
8925 if (error)
8926 goto freeandexit;
8927 vp = nd.ni_vp;
8928 nameidone(&nd);
8929
8930 /*
8931 * Switch to the root vnode for the volume
8932 */
8933 error = VFS_ROOT(vnode_mount(vp), &tvp, ctx);
8934 vnode_put(vp);
8935 if (error)
8936 goto freeandexit;
8937 vp = tvp;
8938
8939 /*
8940 * If it's a union mount, the path lookup takes
8941 * us to the top layer. But we may need to descend
8942 * to a lower layer. For non-union mounts the layer
8943 * is always zero.
8944 */
8945 for (i = 0; i < (int) state->ss_union_layer; i++) {
8946 if ((vp->v_mount->mnt_flag & MNT_UNION) == 0)
8947 break;
8948 tvp = vp;
8949 vp = vp->v_mount->mnt_vnodecovered;
8950 if (vp == NULL) {
8951 vnode_put(tvp);
8952 error = ENOENT;
8953 goto freeandexit;
8954 }
8955 error = vnode_getwithref(vp);
8956 vnode_put(tvp);
8957 if (error)
8958 goto freeandexit;
8959 }
8960
8961 #if CONFIG_MACF
8962 error = mac_vnode_check_searchfs(ctx, vp, &searchblock.searchattrs);
8963 if (error) {
8964 vnode_put(vp);
8965 goto freeandexit;
8966 }
8967 #endif
8968
8969
8970 /*
8971 * If searchblock.maxmatches == 0, then skip the search. This has happened
8972 * before and sometimes the underlying code doesnt deal with it well.
8973 */
8974 if (searchblock.maxmatches == 0) {
8975 nummatches = 0;
8976 goto saveandexit;
8977 }
8978
8979 /*
8980 * Allright, we have everything we need, so lets make that call.
8981 *
8982 * We keep special track of the return value from the file system:
8983 * EAGAIN is an acceptable error condition that shouldn't keep us
8984 * from copying out any results...
8985 */
8986
8987 fserror = VNOP_SEARCHFS(vp,
8988 searchparams1,
8989 searchparams2,
8990 &searchblock.searchattrs,
8991 (u_long)searchblock.maxmatches,
8992 &timelimit,
8993 returnattrs,
8994 &nummatches,
8995 (u_long)uap->scriptcode,
8996 (u_long)uap->options,
8997 auio,
8998 (struct searchstate *) &state->ss_fsstate,
8999 ctx);
9000
9001 /*
9002 * If it's a union mount we need to be called again
9003 * to search the mounted-on filesystem.
9004 */
9005 if ((vp->v_mount->mnt_flag & MNT_UNION) && fserror == 0) {
9006 state->ss_union_flags = SRCHFS_START;
9007 state->ss_union_layer++; // search next layer down
9008 fserror = EAGAIN;
9009 }
9010
9011 saveandexit:
9012
9013 vnode_put(vp);
9014
9015 /* Now copy out the stuff that needs copying out. That means the number of matches, the
9016 search state. Everything was already put into he return buffer by the vop call. */
9017
9018 if ((error = copyout((caddr_t) state, uap->state, sizeof(struct searchstate))) != 0)
9019 goto freeandexit;
9020
9021 if ((error = suulong(uap->nummatches, (uint64_t)nummatches)) != 0)
9022 goto freeandexit;
9023
9024 error = fserror;
9025
9026 freeandexit:
9027
9028 FREE(searchparams1,M_TEMP);
9029
9030 return(error);
9031
9032
9033 } /* end of searchfs system call */
9034
9035 #else /* CONFIG_SEARCHFS */
9036
9037 int
9038 searchfs(__unused proc_t p, __unused struct searchfs_args *uap, __unused int32_t *retval)
9039 {
9040 return (ENOTSUP);
9041 }
9042
9043 #endif /* CONFIG_SEARCHFS */
9044
9045
9046 lck_grp_attr_t * nspace_group_attr;
9047 lck_attr_t * nspace_lock_attr;
9048 lck_grp_t * nspace_mutex_group;
9049
9050 lck_mtx_t nspace_handler_lock;
9051 lck_mtx_t nspace_handler_exclusion_lock;
9052
9053 time_t snapshot_timestamp=0;
9054 int nspace_allow_virtual_devs=0;
9055
9056 void nspace_handler_init(void);
9057
9058 typedef struct nspace_item_info {
9059 struct vnode *vp;
9060 void *arg;
9061 uint64_t op;
9062 uint32_t vid;
9063 uint32_t flags;
9064 uint32_t token;
9065 uint32_t refcount;
9066 } nspace_item_info;
9067
9068 #define MAX_NSPACE_ITEMS 128
9069 nspace_item_info nspace_items[MAX_NSPACE_ITEMS];
9070 uint32_t nspace_item_idx=0; // also used as the sleep/wakeup rendezvous address
9071 uint32_t nspace_token_id=0;
9072 uint32_t nspace_handler_timeout = 15; // seconds
9073
9074 #define NSPACE_ITEM_NEW 0x0001
9075 #define NSPACE_ITEM_PROCESSING 0x0002
9076 #define NSPACE_ITEM_DEAD 0x0004
9077 #define NSPACE_ITEM_CANCELLED 0x0008
9078 #define NSPACE_ITEM_DONE 0x0010
9079 #define NSPACE_ITEM_RESET_TIMER 0x0020
9080
9081 #define NSPACE_ITEM_NSPACE_EVENT 0x0040
9082 #define NSPACE_ITEM_SNAPSHOT_EVENT 0x0080
9083
9084 #define NSPACE_ITEM_ALL_EVENT_TYPES (NSPACE_ITEM_NSPACE_EVENT | NSPACE_ITEM_SNAPSHOT_EVENT)
9085
9086 //#pragma optimization_level 0
9087
9088 typedef enum {
9089 NSPACE_HANDLER_NSPACE = 0,
9090 NSPACE_HANDLER_SNAPSHOT = 1,
9091
9092 NSPACE_HANDLER_COUNT,
9093 } nspace_type_t;
9094
9095 typedef struct {
9096 uint64_t handler_tid;
9097 struct proc *handler_proc;
9098 int handler_busy;
9099 } nspace_handler_t;
9100
9101 nspace_handler_t nspace_handlers[NSPACE_HANDLER_COUNT];
9102
9103 /* namespace fsctl functions */
9104 static int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type);
9105 static int nspace_item_flags_for_type(nspace_type_t nspace_type);
9106 static int nspace_open_flags_for_type(nspace_type_t nspace_type);
9107 static nspace_type_t nspace_type_for_op(uint64_t op);
9108 static int nspace_is_special_process(struct proc *proc);
9109 static int vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx);
9110 static int wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type);
9111 static int validate_namespace_args (int is64bit, int size);
9112 static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data);
9113
9114
9115 static inline int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type)
9116 {
9117 switch(nspace_type) {
9118 case NSPACE_HANDLER_NSPACE:
9119 return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_NSPACE_EVENT;
9120 case NSPACE_HANDLER_SNAPSHOT:
9121 return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_SNAPSHOT_EVENT;
9122 default:
9123 printf("nspace_flags_matches_handler: invalid type %u\n", (int)nspace_type);
9124 return 0;
9125 }
9126 }
9127
9128 static inline int nspace_item_flags_for_type(nspace_type_t nspace_type)
9129 {
9130 switch(nspace_type) {
9131 case NSPACE_HANDLER_NSPACE:
9132 return NSPACE_ITEM_NSPACE_EVENT;
9133 case NSPACE_HANDLER_SNAPSHOT:
9134 return NSPACE_ITEM_SNAPSHOT_EVENT;
9135 default:
9136 printf("nspace_item_flags_for_type: invalid type %u\n", (int)nspace_type);
9137 return 0;
9138 }
9139 }
9140
9141 static inline int nspace_open_flags_for_type(nspace_type_t nspace_type)
9142 {
9143 switch(nspace_type) {
9144 case NSPACE_HANDLER_NSPACE:
9145 return FREAD | FWRITE | O_EVTONLY;
9146 case NSPACE_HANDLER_SNAPSHOT:
9147 return FREAD | O_EVTONLY;
9148 default:
9149 printf("nspace_open_flags_for_type: invalid type %u\n", (int)nspace_type);
9150 return 0;
9151 }
9152 }
9153
9154 static inline nspace_type_t nspace_type_for_op(uint64_t op)
9155 {
9156 switch(op & NAMESPACE_HANDLER_EVENT_TYPE_MASK) {
9157 case NAMESPACE_HANDLER_NSPACE_EVENT:
9158 return NSPACE_HANDLER_NSPACE;
9159 case NAMESPACE_HANDLER_SNAPSHOT_EVENT:
9160 return NSPACE_HANDLER_SNAPSHOT;
9161 default:
9162 printf("nspace_type_for_op: invalid op mask %llx\n", op & NAMESPACE_HANDLER_EVENT_TYPE_MASK);
9163 return NSPACE_HANDLER_NSPACE;
9164 }
9165 }
9166
9167 static inline int nspace_is_special_process(struct proc *proc)
9168 {
9169 int i;
9170 for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
9171 if (proc == nspace_handlers[i].handler_proc)
9172 return 1;
9173 }
9174 return 0;
9175 }
9176
9177 void
9178 nspace_handler_init(void)
9179 {
9180 nspace_lock_attr = lck_attr_alloc_init();
9181 nspace_group_attr = lck_grp_attr_alloc_init();
9182 nspace_mutex_group = lck_grp_alloc_init("nspace-mutex", nspace_group_attr);
9183 lck_mtx_init(&nspace_handler_lock, nspace_mutex_group, nspace_lock_attr);
9184 lck_mtx_init(&nspace_handler_exclusion_lock, nspace_mutex_group, nspace_lock_attr);
9185 memset(&nspace_items[0], 0, sizeof(nspace_items));
9186 }
9187
9188 void
9189 nspace_proc_exit(struct proc *p)
9190 {
9191 int i, event_mask = 0;
9192
9193 for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
9194 if (p == nspace_handlers[i].handler_proc) {
9195 event_mask |= nspace_item_flags_for_type(i);
9196 nspace_handlers[i].handler_tid = 0;
9197 nspace_handlers[i].handler_proc = NULL;
9198 }
9199 }
9200
9201 if (event_mask == 0) {
9202 return;
9203 }
9204
9205 lck_mtx_lock(&nspace_handler_lock);
9206 if (event_mask & NSPACE_ITEM_SNAPSHOT_EVENT) {
9207 // if this process was the snapshot handler, zero snapshot_timeout
9208 snapshot_timestamp = 0;
9209 }
9210
9211 //
9212 // unblock anyone that's waiting for the handler that died
9213 //
9214 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9215 if (nspace_items[i].flags & (NSPACE_ITEM_NEW | NSPACE_ITEM_PROCESSING)) {
9216
9217 if ( nspace_items[i].flags & event_mask ) {
9218
9219 if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
9220 vnode_lock_spin(nspace_items[i].vp);
9221 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
9222 vnode_unlock(nspace_items[i].vp);
9223 }
9224 nspace_items[i].vp = NULL;
9225 nspace_items[i].vid = 0;
9226 nspace_items[i].flags = NSPACE_ITEM_DONE;
9227 nspace_items[i].token = 0;
9228
9229 wakeup((caddr_t)&(nspace_items[i].vp));
9230 }
9231 }
9232 }
9233
9234 wakeup((caddr_t)&nspace_item_idx);
9235 lck_mtx_unlock(&nspace_handler_lock);
9236 }
9237
9238
9239 int
9240 resolve_nspace_item(struct vnode *vp, uint64_t op)
9241 {
9242 return resolve_nspace_item_ext(vp, op, NULL);
9243 }
9244
9245 int
9246 resolve_nspace_item_ext(struct vnode *vp, uint64_t op, void *arg)
9247 {
9248 int i, error, keep_waiting;
9249 struct timespec ts;
9250 nspace_type_t nspace_type = nspace_type_for_op(op);
9251
9252 // only allow namespace events on regular files, directories and symlinks.
9253 if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
9254 return 0;
9255 }
9256
9257 //
9258 // if this is a snapshot event and the vnode is on a
9259 // disk image just pretend nothing happened since any
9260 // change to the disk image will cause the disk image
9261 // itself to get backed up and this avoids multi-way
9262 // deadlocks between the snapshot handler and the ever
9263 // popular diskimages-helper process. the variable
9264 // nspace_allow_virtual_devs allows this behavior to
9265 // be overridden (for use by the Mobile TimeMachine
9266 // testing infrastructure which uses disk images)
9267 //
9268 if ( (op & NAMESPACE_HANDLER_SNAPSHOT_EVENT)
9269 && (vp->v_mount != NULL)
9270 && (vp->v_mount->mnt_kern_flag & MNTK_VIRTUALDEV)
9271 && !nspace_allow_virtual_devs) {
9272
9273 return 0;
9274 }
9275
9276 // if (thread_tid(current_thread()) == namespace_handler_tid) {
9277 if (nspace_handlers[nspace_type].handler_proc == NULL) {
9278 return 0;
9279 }
9280
9281 if (nspace_is_special_process(current_proc())) {
9282 return EDEADLK;
9283 }
9284
9285 lck_mtx_lock(&nspace_handler_lock);
9286
9287 retry:
9288 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9289 if (vp == nspace_items[i].vp && op == nspace_items[i].op) {
9290 break;
9291 }
9292 }
9293
9294 if (i >= MAX_NSPACE_ITEMS) {
9295 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9296 if (nspace_items[i].flags == 0) {
9297 break;
9298 }
9299 }
9300 } else {
9301 nspace_items[i].refcount++;
9302 }
9303
9304 if (i >= MAX_NSPACE_ITEMS) {
9305 ts.tv_sec = nspace_handler_timeout;
9306 ts.tv_nsec = 0;
9307
9308 error = msleep((caddr_t)&nspace_token_id, &nspace_handler_lock, PVFS|PCATCH, "nspace-no-space", &ts);
9309 if (error == 0) {
9310 // an entry got free'd up, go see if we can get a slot
9311 goto retry;
9312 } else {
9313 lck_mtx_unlock(&nspace_handler_lock);
9314 return error;
9315 }
9316 }
9317
9318 //
9319 // if it didn't already exist, add it. if it did exist
9320 // we'll get woken up when someone does a wakeup() on
9321 // the slot in the nspace_items table.
9322 //
9323 if (vp != nspace_items[i].vp) {
9324 nspace_items[i].vp = vp;
9325 nspace_items[i].arg = (arg == NSPACE_REARM_NO_ARG) ? NULL : arg; // arg is {NULL, true, uio *} - only pass uio thru to the user
9326 nspace_items[i].op = op;
9327 nspace_items[i].vid = vnode_vid(vp);
9328 nspace_items[i].flags = NSPACE_ITEM_NEW;
9329 nspace_items[i].flags |= nspace_item_flags_for_type(nspace_type);
9330 if (nspace_items[i].flags & NSPACE_ITEM_SNAPSHOT_EVENT) {
9331 if (arg) {
9332 vnode_lock_spin(vp);
9333 vp->v_flag |= VNEEDSSNAPSHOT;
9334 vnode_unlock(vp);
9335 }
9336 }
9337
9338 nspace_items[i].token = 0;
9339 nspace_items[i].refcount = 1;
9340
9341 wakeup((caddr_t)&nspace_item_idx);
9342 }
9343
9344 //
9345 // Now go to sleep until the handler does a wakeup on this
9346 // slot in the nspace_items table (or we timeout).
9347 //
9348 keep_waiting = 1;
9349 while(keep_waiting) {
9350 ts.tv_sec = nspace_handler_timeout;
9351 ts.tv_nsec = 0;
9352 error = msleep((caddr_t)&(nspace_items[i].vp), &nspace_handler_lock, PVFS|PCATCH, "namespace-done", &ts);
9353
9354 if (nspace_items[i].flags & NSPACE_ITEM_DONE) {
9355 error = 0;
9356 } else if (nspace_items[i].flags & NSPACE_ITEM_CANCELLED) {
9357 error = nspace_items[i].token;
9358 } else if (error == EWOULDBLOCK || error == ETIMEDOUT) {
9359 if (nspace_items[i].flags & NSPACE_ITEM_RESET_TIMER) {
9360 nspace_items[i].flags &= ~NSPACE_ITEM_RESET_TIMER;
9361 continue;
9362 } else {
9363 error = ETIMEDOUT;
9364 }
9365 } else if (error == 0) {
9366 // hmmm, why did we get woken up?
9367 printf("woken up for token %d but it's not done, cancelled or timedout and error == 0.\n",
9368 nspace_items[i].token);
9369 }
9370
9371 if (--nspace_items[i].refcount == 0) {
9372 nspace_items[i].vp = NULL; // clear this so that no one will match on it again
9373 nspace_items[i].arg = NULL;
9374 nspace_items[i].token = 0; // clear this so that the handler will not find it anymore
9375 nspace_items[i].flags = 0; // this clears it for re-use
9376 }
9377 wakeup(&nspace_token_id);
9378 keep_waiting = 0;
9379 }
9380
9381 lck_mtx_unlock(&nspace_handler_lock);
9382
9383 return error;
9384 }
9385
9386 int nspace_snapshot_event(vnode_t vp, time_t ctime, uint64_t op_type, void *arg)
9387 {
9388 int snapshot_error = 0;
9389
9390 if (vp == NULL) {
9391 return 0;
9392 }
9393
9394 /* Swap files are special; skip them */
9395 if (vnode_isswap(vp)) {
9396 return 0;
9397 }
9398
9399 if (ctime != 0 && snapshot_timestamp != 0 && (ctime <= snapshot_timestamp || vnode_needssnapshots(vp))) {
9400 // the change time is within this epoch
9401 int error;
9402
9403 error = resolve_nspace_item_ext(vp, op_type | NAMESPACE_HANDLER_SNAPSHOT_EVENT, arg);
9404 if (error == EDEADLK) {
9405 snapshot_error = 0;
9406 } else if (error) {
9407 if (error == EAGAIN) {
9408 printf("nspace_snapshot_event: timed out waiting for namespace handler...\n");
9409 } else if (error == EINTR) {
9410 // printf("nspace_snapshot_event: got a signal while waiting for namespace handler...\n");
9411 snapshot_error = EINTR;
9412 }
9413 }
9414 }
9415
9416 return snapshot_error;
9417 }
9418
9419 int
9420 get_nspace_item_status(struct vnode *vp, int32_t *status)
9421 {
9422 int i;
9423
9424 lck_mtx_lock(&nspace_handler_lock);
9425 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9426 if (nspace_items[i].vp == vp) {
9427 break;
9428 }
9429 }
9430
9431 if (i >= MAX_NSPACE_ITEMS) {
9432 lck_mtx_unlock(&nspace_handler_lock);
9433 return ENOENT;
9434 }
9435
9436 *status = nspace_items[i].flags;
9437 lck_mtx_unlock(&nspace_handler_lock);
9438 return 0;
9439 }
9440
9441
9442 #if 0
9443 static int
9444 build_volfs_path(struct vnode *vp, char *path, int *len)
9445 {
9446 struct vnode_attr va;
9447 int ret;
9448
9449 VATTR_INIT(&va);
9450 VATTR_WANTED(&va, va_fsid);
9451 VATTR_WANTED(&va, va_fileid);
9452
9453 if (vnode_getattr(vp, &va, vfs_context_kernel()) != 0) {
9454 *len = snprintf(path, *len, "/non/existent/path/because/vnode_getattr/failed") + 1;
9455 ret = -1;
9456 } else {
9457 *len = snprintf(path, *len, "/.vol/%d/%lld", (dev_t)va.va_fsid, va.va_fileid) + 1;
9458 ret = 0;
9459 }
9460
9461 return ret;
9462 }
9463 #endif
9464
9465 //
9466 // Note: this function does NOT check permissions on all of the
9467 // parent directories leading to this vnode. It should only be
9468 // called on behalf of a root process. Otherwise a process may
9469 // get access to a file because the file itself is readable even
9470 // though its parent directories would prevent access.
9471 //
9472 static int
9473 vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx)
9474 {
9475 int error, action;
9476
9477 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
9478 return error;
9479 }
9480
9481 #if CONFIG_MACF
9482 error = mac_vnode_check_open(ctx, vp, fmode);
9483 if (error)
9484 return error;
9485 #endif
9486
9487 /* compute action to be authorized */
9488 action = 0;
9489 if (fmode & FREAD) {
9490 action |= KAUTH_VNODE_READ_DATA;
9491 }
9492 if (fmode & (FWRITE | O_TRUNC)) {
9493 /*
9494 * If we are writing, appending, and not truncating,
9495 * indicate that we are appending so that if the
9496 * UF_APPEND or SF_APPEND bits are set, we do not deny
9497 * the open.
9498 */
9499 if ((fmode & O_APPEND) && !(fmode & O_TRUNC)) {
9500 action |= KAUTH_VNODE_APPEND_DATA;
9501 } else {
9502 action |= KAUTH_VNODE_WRITE_DATA;
9503 }
9504 }
9505
9506 if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)
9507 return error;
9508
9509
9510 //
9511 // if the vnode is tagged VOPENEVT and the current process
9512 // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY
9513 // flag to the open mode so that this open won't count against
9514 // the vnode when carbon delete() does a vnode_isinuse() to see
9515 // if a file is currently in use. this allows spotlight
9516 // importers to not interfere with carbon apps that depend on
9517 // the no-delete-if-busy semantics of carbon delete().
9518 //
9519 if ((vp->v_flag & VOPENEVT) && (current_proc()->p_flag & P_CHECKOPENEVT)) {
9520 fmode |= O_EVTONLY;
9521 }
9522
9523 if ( (error = VNOP_OPEN(vp, fmode, ctx)) ) {
9524 return error;
9525 }
9526 if ( (error = vnode_ref_ext(vp, fmode, 0)) ) {
9527 VNOP_CLOSE(vp, fmode, ctx);
9528 return error;
9529 }
9530
9531 /* Call out to allow 3rd party notification of open.
9532 * Ignore result of kauth_authorize_fileop call.
9533 */
9534 #if CONFIG_MACF
9535 mac_vnode_notify_open(ctx, vp, fmode);
9536 #endif
9537 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN,
9538 (uintptr_t)vp, 0);
9539
9540
9541 return 0;
9542 }
9543
9544 static int
9545 wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type)
9546 {
9547 int i;
9548 int error = 0;
9549 int unblock = 0;
9550 task_t curtask;
9551
9552 lck_mtx_lock(&nspace_handler_exclusion_lock);
9553 if (nspace_handlers[nspace_type].handler_busy) {
9554 lck_mtx_unlock(&nspace_handler_exclusion_lock);
9555 return EBUSY;
9556 }
9557
9558 nspace_handlers[nspace_type].handler_busy = 1;
9559 lck_mtx_unlock(&nspace_handler_exclusion_lock);
9560
9561 /*
9562 * Any process that gets here will be one of the namespace handlers.
9563 * As such, they should be prevented from acquiring DMG vnodes during vnode reclamation
9564 * as we can cause deadlocks to occur, because the namespace handler may prevent
9565 * VNOP_INACTIVE from proceeding. Mark the current task as a P_DEPENDENCY_CAPABLE
9566 * process.
9567 */
9568 curtask = current_task();
9569 bsd_set_dependency_capable (curtask);
9570
9571 lck_mtx_lock(&nspace_handler_lock);
9572 if (nspace_handlers[nspace_type].handler_proc == NULL) {
9573 nspace_handlers[nspace_type].handler_tid = thread_tid(current_thread());
9574 nspace_handlers[nspace_type].handler_proc = current_proc();
9575 }
9576
9577 if (nspace_type == NSPACE_HANDLER_SNAPSHOT &&
9578 (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
9579 error = EINVAL;
9580 }
9581
9582 while (error == 0) {
9583
9584 /* Try to find matching namespace item */
9585 for (i = 0; i < MAX_NSPACE_ITEMS; i++) {
9586 if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
9587 if (nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
9588 break;
9589 }
9590 }
9591 }
9592
9593 if (i >= MAX_NSPACE_ITEMS) {
9594 /* Nothing is there yet. Wait for wake up and retry */
9595 error = msleep((caddr_t)&nspace_item_idx, &nspace_handler_lock, PVFS|PCATCH, "namespace-items", 0);
9596 if ((nspace_type == NSPACE_HANDLER_SNAPSHOT) && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
9597 /* Prevent infinite loop if snapshot handler exited */
9598 error = EINVAL;
9599 break;
9600 }
9601 continue;
9602 }
9603
9604 nspace_items[i].flags &= ~NSPACE_ITEM_NEW;
9605 nspace_items[i].flags |= NSPACE_ITEM_PROCESSING;
9606 nspace_items[i].token = ++nspace_token_id;
9607
9608 assert(nspace_items[i].vp);
9609 struct fileproc *fp;
9610 int32_t indx;
9611 int32_t fmode;
9612 struct proc *p = current_proc();
9613 vfs_context_t ctx = vfs_context_current();
9614 struct vnode_attr va;
9615 bool vn_get_succsessful = false;
9616 bool vn_open_successful = false;
9617 bool fp_alloc_successful = false;
9618
9619 /*
9620 * Use vnode pointer to acquire a file descriptor for
9621 * hand-off to userland
9622 */
9623 fmode = nspace_open_flags_for_type(nspace_type);
9624 error = vnode_getwithvid(nspace_items[i].vp, nspace_items[i].vid);
9625 if (error) goto cleanup;
9626 vn_get_succsessful = true;
9627
9628 error = vn_open_with_vp(nspace_items[i].vp, fmode, ctx);
9629 if (error) goto cleanup;
9630 vn_open_successful = true;
9631
9632 error = falloc(p, &fp, &indx, ctx);
9633 if (error) goto cleanup;
9634 fp_alloc_successful = true;
9635
9636 fp->f_fglob->fg_flag = fmode;
9637 fp->f_fglob->fg_ops = &vnops;
9638 fp->f_fglob->fg_data = (caddr_t)nspace_items[i].vp;
9639
9640 proc_fdlock(p);
9641 procfdtbl_releasefd(p, indx, NULL);
9642 fp_drop(p, indx, fp, 1);
9643 proc_fdunlock(p);
9644
9645 /*
9646 * All variants of the namespace handler struct support these three fields:
9647 * token, flags, and the FD pointer
9648 */
9649 error = copyout(&nspace_items[i].token, nhd->token, sizeof(uint32_t));
9650 if (error) goto cleanup;
9651 error = copyout(&nspace_items[i].op, nhd->flags, sizeof(uint64_t));
9652 if (error) goto cleanup;
9653 error = copyout(&indx, nhd->fdptr, sizeof(uint32_t));
9654 if (error) goto cleanup;
9655
9656 /*
9657 * Handle optional fields:
9658 * extended version support an info ptr (offset, length), and the
9659 *
9660 * namedata version supports a unique per-link object ID
9661 *
9662 */
9663 if (nhd->infoptr) {
9664 uio_t uio = (uio_t)nspace_items[i].arg;
9665 uint64_t u_offset, u_length;
9666
9667 if (uio) {
9668 u_offset = uio_offset(uio);
9669 u_length = uio_resid(uio);
9670 } else {
9671 u_offset = 0;
9672 u_length = 0;
9673 }
9674 error = copyout(&u_offset, nhd->infoptr, sizeof(uint64_t));
9675 if (error) goto cleanup;
9676 error = copyout(&u_length, nhd->infoptr + sizeof(uint64_t), sizeof(uint64_t));
9677 if (error) goto cleanup;
9678 }
9679
9680 if (nhd->objid) {
9681 VATTR_INIT(&va);
9682 VATTR_WANTED(&va, va_linkid);
9683 error = vnode_getattr(nspace_items[i].vp, &va, ctx);
9684 if (error) goto cleanup;
9685
9686 uint64_t linkid = 0;
9687 if (VATTR_IS_SUPPORTED (&va, va_linkid)) {
9688 linkid = (uint64_t)va.va_linkid;
9689 }
9690 error = copyout(&linkid, nhd->objid, sizeof(uint64_t));
9691 }
9692 cleanup:
9693 if (error) {
9694 if (fp_alloc_successful) fp_free(p, indx, fp);
9695 if (vn_open_successful) vn_close(nspace_items[i].vp, fmode, ctx);
9696 unblock = 1;
9697 }
9698
9699 if (vn_get_succsessful) vnode_put(nspace_items[i].vp);
9700
9701 break;
9702 }
9703
9704 if (unblock) {
9705 if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
9706 vnode_lock_spin(nspace_items[i].vp);
9707 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
9708 vnode_unlock(nspace_items[i].vp);
9709 }
9710 nspace_items[i].vp = NULL;
9711 nspace_items[i].vid = 0;
9712 nspace_items[i].flags = NSPACE_ITEM_DONE;
9713 nspace_items[i].token = 0;
9714
9715 wakeup((caddr_t)&(nspace_items[i].vp));
9716 }
9717
9718 if (nspace_type == NSPACE_HANDLER_SNAPSHOT) {
9719 // just go through every snapshot event and unblock it immediately.
9720 if (error && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
9721 for(i = 0; i < MAX_NSPACE_ITEMS; i++) {
9722 if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
9723 if (nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
9724 nspace_items[i].vp = NULL;
9725 nspace_items[i].vid = 0;
9726 nspace_items[i].flags = NSPACE_ITEM_DONE;
9727 nspace_items[i].token = 0;
9728
9729 wakeup((caddr_t)&(nspace_items[i].vp));
9730 }
9731 }
9732 }
9733 }
9734 }
9735
9736 lck_mtx_unlock(&nspace_handler_lock);
9737
9738 lck_mtx_lock(&nspace_handler_exclusion_lock);
9739 nspace_handlers[nspace_type].handler_busy = 0;
9740 lck_mtx_unlock(&nspace_handler_exclusion_lock);
9741
9742 return error;
9743 }
9744
9745 static inline int validate_namespace_args (int is64bit, int size) {
9746
9747 if (is64bit) {
9748 /* Must be one of these */
9749 if (size == sizeof(user64_namespace_handler_info)) {
9750 goto sizeok;
9751 }
9752 if (size == sizeof(user64_namespace_handler_info_ext)) {
9753 goto sizeok;
9754 }
9755 if (size == sizeof(user64_namespace_handler_data)) {
9756 goto sizeok;
9757 }
9758 return EINVAL;
9759 }
9760 else {
9761 /* 32 bit -- must be one of these */
9762 if (size == sizeof(user32_namespace_handler_info)) {
9763 goto sizeok;
9764 }
9765 if (size == sizeof(user32_namespace_handler_info_ext)) {
9766 goto sizeok;
9767 }
9768 if (size == sizeof(user32_namespace_handler_data)) {
9769 goto sizeok;
9770 }
9771 return EINVAL;
9772 }
9773
9774 sizeok:
9775
9776 return 0;
9777
9778 }
9779
9780 static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data)
9781 {
9782 int error = 0;
9783 namespace_handler_data nhd;
9784
9785 bzero (&nhd, sizeof(namespace_handler_data));
9786
9787 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
9788 return error;
9789 }
9790
9791 error = validate_namespace_args (is64bit, size);
9792 if (error) {
9793 return error;
9794 }
9795
9796 /* Copy in the userland pointers into our kernel-only struct */
9797
9798 if (is64bit) {
9799 /* 64 bit userland structures */
9800 nhd.token = (user_addr_t)((user64_namespace_handler_info *)data)->token;
9801 nhd.flags = (user_addr_t)((user64_namespace_handler_info *)data)->flags;
9802 nhd.fdptr = (user_addr_t)((user64_namespace_handler_info *)data)->fdptr;
9803
9804 /* If the size is greater than the standard info struct, add in extra fields */
9805 if (size > (sizeof(user64_namespace_handler_info))) {
9806 if (size >= (sizeof(user64_namespace_handler_info_ext))) {
9807 nhd.infoptr = (user_addr_t)((user64_namespace_handler_info_ext *)data)->infoptr;
9808 }
9809 if (size == (sizeof(user64_namespace_handler_data))) {
9810 nhd.objid = (user_addr_t)((user64_namespace_handler_data*)data)->objid;
9811 }
9812 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
9813 }
9814 }
9815 else {
9816 /* 32 bit userland structures */
9817 nhd.token = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->token);
9818 nhd.flags = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->flags);
9819 nhd.fdptr = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->fdptr);
9820
9821 if (size > (sizeof(user32_namespace_handler_info))) {
9822 if (size >= (sizeof(user32_namespace_handler_info_ext))) {
9823 nhd.infoptr = CAST_USER_ADDR_T(((user32_namespace_handler_info_ext *)data)->infoptr);
9824 }
9825 if (size == (sizeof(user32_namespace_handler_data))) {
9826 nhd.objid = (user_addr_t)((user32_namespace_handler_data*)data)->objid;
9827 }
9828 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
9829 }
9830 }
9831
9832 return wait_for_namespace_event(&nhd, nspace_type);
9833 }
9834
9835 /*
9836 * Make a filesystem-specific control call:
9837 */
9838 /* ARGSUSED */
9839 static int
9840 fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long options, vfs_context_t ctx)
9841 {
9842 int error=0;
9843 boolean_t is64bit;
9844 u_int size;
9845 #define STK_PARAMS 128
9846 char stkbuf[STK_PARAMS] = {0};
9847 caddr_t data, memp;
9848 vnode_t vp = *arg_vp;
9849
9850 size = IOCPARM_LEN(cmd);
9851 if (size > IOCPARM_MAX) return (EINVAL);
9852
9853 is64bit = proc_is64bit(p);
9854
9855 memp = NULL;
9856
9857
9858 /*
9859 * ensure the buffer is large enough for underlying calls
9860 */
9861 #ifndef HFSIOC_GETPATH
9862 typedef char pn_t[MAXPATHLEN];
9863 #define HFSIOC_GETPATH _IOWR('h', 13, pn_t)
9864 #endif
9865
9866 #ifndef HFS_GETPATH
9867 #define HFS_GETPATH IOCBASECMD(HFSIOC_GETPATH)
9868 #endif
9869 if (IOCBASECMD(cmd) == HFS_GETPATH) {
9870 /* Round up to MAXPATHLEN regardless of user input */
9871 size = MAXPATHLEN;
9872 }
9873
9874 if (size > sizeof (stkbuf)) {
9875 if ((memp = (caddr_t)kalloc(size)) == 0) return ENOMEM;
9876 data = memp;
9877 } else {
9878 data = &stkbuf[0];
9879 };
9880
9881 if (cmd & IOC_IN) {
9882 if (size) {
9883 error = copyin(udata, data, size);
9884 if (error) {
9885 if (memp) {
9886 kfree (memp, size);
9887 }
9888 return error;
9889 }
9890 } else {
9891 if (is64bit) {
9892 *(user_addr_t *)data = udata;
9893 }
9894 else {
9895 *(uint32_t *)data = (uint32_t)udata;
9896 }
9897 };
9898 } else if ((cmd & IOC_OUT) && size) {
9899 /*
9900 * Zero the buffer so the user always
9901 * gets back something deterministic.
9902 */
9903 bzero(data, size);
9904 } else if (cmd & IOC_VOID) {
9905 if (is64bit) {
9906 *(user_addr_t *)data = udata;
9907 }
9908 else {
9909 *(uint32_t *)data = (uint32_t)udata;
9910 }
9911 }
9912
9913 /* Check to see if it's a generic command */
9914 switch (IOCBASECMD(cmd)) {
9915
9916 case FSCTL_SYNC_VOLUME: {
9917 mount_t mp = vp->v_mount;
9918 int arg = *(uint32_t*)data;
9919
9920 /* record vid of vp so we can drop it below. */
9921 uint32_t vvid = vp->v_id;
9922
9923 /*
9924 * Then grab mount_iterref so that we can release the vnode.
9925 * Without this, a thread may call vnode_iterate_prepare then
9926 * get into a deadlock because we've never released the root vp
9927 */
9928 error = mount_iterref (mp, 0);
9929 if (error) {
9930 break;
9931 }
9932 vnode_put(vp);
9933
9934 /* issue the sync for this volume */
9935 (void)sync_callback(mp, (arg & FSCTL_SYNC_WAIT) ? &arg : NULL);
9936
9937 /*
9938 * Then release the mount_iterref once we're done syncing; it's not
9939 * needed for the VNOP_IOCTL below
9940 */
9941 mount_iterdrop(mp);
9942
9943 if (arg & FSCTL_SYNC_FULLSYNC) {
9944 /* re-obtain vnode iocount on the root vp, if possible */
9945 error = vnode_getwithvid (vp, vvid);
9946 if (error == 0) {
9947 error = VNOP_IOCTL(vp, F_FULLFSYNC, (caddr_t)NULL, 0, ctx);
9948 vnode_put (vp);
9949 }
9950 }
9951 /* mark the argument VP as having been released */
9952 *arg_vp = NULL;
9953 }
9954 break;
9955
9956 case FSCTL_ROUTEFS_SETROUTEID: {
9957 #if ROUTEFS
9958 char routepath[MAXPATHLEN];
9959 size_t len = 0;
9960
9961 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
9962 break;
9963 }
9964 bzero(routepath, MAXPATHLEN);
9965 error = copyinstr(udata, &routepath[0], MAXPATHLEN, &len);
9966 if (error) {
9967 break;
9968 }
9969 error = routefs_kernel_mount(routepath);
9970 if (error) {
9971 break;
9972 }
9973 #endif
9974 }
9975 break;
9976
9977 case FSCTL_SET_PACKAGE_EXTS: {
9978 user_addr_t ext_strings;
9979 uint32_t num_entries;
9980 uint32_t max_width;
9981
9982 if ((error = priv_check_cred(kauth_cred_get(), PRIV_PACKAGE_EXTENSIONS, 0)))
9983 break;
9984
9985 if ( (is64bit && size != sizeof(user64_package_ext_info))
9986 || (is64bit == 0 && size != sizeof(user32_package_ext_info))) {
9987
9988 // either you're 64-bit and passed a 64-bit struct or
9989 // you're 32-bit and passed a 32-bit struct. otherwise
9990 // it's not ok.
9991 error = EINVAL;
9992 break;
9993 }
9994
9995 if (is64bit) {
9996 ext_strings = ((user64_package_ext_info *)data)->strings;
9997 num_entries = ((user64_package_ext_info *)data)->num_entries;
9998 max_width = ((user64_package_ext_info *)data)->max_width;
9999 } else {
10000 ext_strings = CAST_USER_ADDR_T(((user32_package_ext_info *)data)->strings);
10001 num_entries = ((user32_package_ext_info *)data)->num_entries;
10002 max_width = ((user32_package_ext_info *)data)->max_width;
10003 }
10004 error = set_package_extensions_table(ext_strings, num_entries, max_width);
10005 }
10006 break;
10007
10008 /* namespace handlers */
10009 case FSCTL_NAMESPACE_HANDLER_GET: {
10010 error = process_namespace_fsctl(NSPACE_HANDLER_NSPACE, is64bit, size, data);
10011 }
10012 break;
10013
10014 /* Snapshot handlers */
10015 case FSCTL_OLD_SNAPSHOT_HANDLER_GET: {
10016 error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
10017 }
10018 break;
10019
10020 case FSCTL_SNAPSHOT_HANDLER_GET_EXT: {
10021 error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
10022 }
10023 break;
10024
10025 case FSCTL_NAMESPACE_HANDLER_UPDATE: {
10026 uint32_t token, val;
10027 int i;
10028
10029 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
10030 break;
10031 }
10032
10033 if (!nspace_is_special_process(p)) {
10034 error = EINVAL;
10035 break;
10036 }
10037
10038 token = ((uint32_t *)data)[0];
10039 val = ((uint32_t *)data)[1];
10040
10041 lck_mtx_lock(&nspace_handler_lock);
10042
10043 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
10044 if (nspace_items[i].token == token) {
10045 break; /* exit for loop, not case stmt */
10046 }
10047 }
10048
10049 if (i >= MAX_NSPACE_ITEMS) {
10050 error = ENOENT;
10051 } else {
10052 //
10053 // if this bit is set, when resolve_nspace_item() times out
10054 // it will loop and go back to sleep.
10055 //
10056 nspace_items[i].flags |= NSPACE_ITEM_RESET_TIMER;
10057 }
10058
10059 lck_mtx_unlock(&nspace_handler_lock);
10060
10061 if (error) {
10062 printf("nspace-handler-update: did not find token %u\n", token);
10063 }
10064 }
10065 break;
10066
10067 case FSCTL_NAMESPACE_HANDLER_UNBLOCK: {
10068 uint32_t token, val;
10069 int i;
10070
10071 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
10072 break;
10073 }
10074
10075 if (!nspace_is_special_process(p)) {
10076 error = EINVAL;
10077 break;
10078 }
10079
10080 token = ((uint32_t *)data)[0];
10081 val = ((uint32_t *)data)[1];
10082
10083 lck_mtx_lock(&nspace_handler_lock);
10084
10085 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
10086 if (nspace_items[i].token == token) {
10087 break; /* exit for loop, not case statement */
10088 }
10089 }
10090
10091 if (i >= MAX_NSPACE_ITEMS) {
10092 printf("nspace-handler-unblock: did not find token %u\n", token);
10093 error = ENOENT;
10094 } else {
10095 if (val == 0 && nspace_items[i].vp) {
10096 vnode_lock_spin(nspace_items[i].vp);
10097 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
10098 vnode_unlock(nspace_items[i].vp);
10099 }
10100
10101 nspace_items[i].vp = NULL;
10102 nspace_items[i].arg = NULL;
10103 nspace_items[i].op = 0;
10104 nspace_items[i].vid = 0;
10105 nspace_items[i].flags = NSPACE_ITEM_DONE;
10106 nspace_items[i].token = 0;
10107
10108 wakeup((caddr_t)&(nspace_items[i].vp));
10109 }
10110
10111 lck_mtx_unlock(&nspace_handler_lock);
10112 }
10113 break;
10114
10115 case FSCTL_NAMESPACE_HANDLER_CANCEL: {
10116 uint32_t token, val;
10117 int i;
10118
10119 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
10120 break;
10121 }
10122
10123 if (!nspace_is_special_process(p)) {
10124 error = EINVAL;
10125 break;
10126 }
10127
10128 token = ((uint32_t *)data)[0];
10129 val = ((uint32_t *)data)[1];
10130
10131 lck_mtx_lock(&nspace_handler_lock);
10132
10133 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
10134 if (nspace_items[i].token == token) {
10135 break; /* exit for loop, not case stmt */
10136 }
10137 }
10138
10139 if (i >= MAX_NSPACE_ITEMS) {
10140 printf("nspace-handler-cancel: did not find token %u\n", token);
10141 error = ENOENT;
10142 } else {
10143 if (nspace_items[i].vp) {
10144 vnode_lock_spin(nspace_items[i].vp);
10145 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
10146 vnode_unlock(nspace_items[i].vp);
10147 }
10148
10149 nspace_items[i].vp = NULL;
10150 nspace_items[i].arg = NULL;
10151 nspace_items[i].vid = 0;
10152 nspace_items[i].token = val;
10153 nspace_items[i].flags &= ~NSPACE_ITEM_PROCESSING;
10154 nspace_items[i].flags |= NSPACE_ITEM_CANCELLED;
10155
10156 wakeup((caddr_t)&(nspace_items[i].vp));
10157 }
10158
10159 lck_mtx_unlock(&nspace_handler_lock);
10160 }
10161 break;
10162
10163 case FSCTL_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME: {
10164 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
10165 break;
10166 }
10167
10168 // we explicitly do not do the namespace_handler_proc check here
10169
10170 lck_mtx_lock(&nspace_handler_lock);
10171 snapshot_timestamp = ((uint32_t *)data)[0];
10172 wakeup(&nspace_item_idx);
10173 lck_mtx_unlock(&nspace_handler_lock);
10174 printf("nspace-handler-set-snapshot-time: %d\n", (int)snapshot_timestamp);
10175
10176 }
10177 break;
10178
10179 case FSCTL_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS:
10180 {
10181 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
10182 break;
10183 }
10184
10185 lck_mtx_lock(&nspace_handler_lock);
10186 nspace_allow_virtual_devs = ((uint32_t *)data)[0];
10187 lck_mtx_unlock(&nspace_handler_lock);
10188 printf("nspace-snapshot-handler will%s allow events on disk-images\n",
10189 nspace_allow_virtual_devs ? "" : " NOT");
10190 error = 0;
10191
10192 }
10193 break;
10194
10195 case FSCTL_SET_FSTYPENAME_OVERRIDE:
10196 {
10197 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
10198 break;
10199 }
10200 if (vp->v_mount) {
10201 mount_lock(vp->v_mount);
10202 if (data[0] != 0) {
10203 strlcpy(&vp->v_mount->fstypename_override[0], data, MFSTYPENAMELEN);
10204 vp->v_mount->mnt_kern_flag |= MNTK_TYPENAME_OVERRIDE;
10205 if (vfs_isrdonly(vp->v_mount) && strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
10206 vp->v_mount->mnt_kern_flag |= MNTK_EXTENDED_SECURITY;
10207 vp->v_mount->mnt_kern_flag &= ~MNTK_AUTH_OPAQUE;
10208 }
10209 } else {
10210 if (strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
10211 vp->v_mount->mnt_kern_flag &= ~MNTK_EXTENDED_SECURITY;
10212 }
10213 vp->v_mount->mnt_kern_flag &= ~MNTK_TYPENAME_OVERRIDE;
10214 vp->v_mount->fstypename_override[0] = '\0';
10215 }
10216 mount_unlock(vp->v_mount);
10217 }
10218 }
10219 break;
10220
10221 default: {
10222 /* Invoke the filesystem-specific code */
10223 error = VNOP_IOCTL(vp, IOCBASECMD(cmd), data, options, ctx);
10224 }
10225
10226 } /* end switch stmt */
10227
10228 /*
10229 * if no errors, copy any data to user. Size was
10230 * already set and checked above.
10231 */
10232 if (error == 0 && (cmd & IOC_OUT) && size)
10233 error = copyout(data, udata, size);
10234
10235 if (memp) {
10236 kfree(memp, size);
10237 }
10238
10239 return error;
10240 }
10241
10242 /* ARGSUSED */
10243 int
10244 fsctl (proc_t p, struct fsctl_args *uap, __unused int32_t *retval)
10245 {
10246 int error;
10247 struct nameidata nd;
10248 u_long nameiflags;
10249 vnode_t vp = NULL;
10250 vfs_context_t ctx = vfs_context_current();
10251
10252 AUDIT_ARG(cmd, uap->cmd);
10253 AUDIT_ARG(value32, uap->options);
10254 /* Get the vnode for the file we are getting info on: */
10255 nameiflags = 0;
10256 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
10257 NDINIT(&nd, LOOKUP, OP_FSCTL, nameiflags | AUDITVNPATH1,
10258 UIO_USERSPACE, uap->path, ctx);
10259 if ((error = namei(&nd))) goto done;
10260 vp = nd.ni_vp;
10261 nameidone(&nd);
10262
10263 #if CONFIG_MACF
10264 error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd);
10265 if (error) {
10266 goto done;
10267 }
10268 #endif
10269
10270 error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
10271
10272 done:
10273 if (vp)
10274 vnode_put(vp);
10275 return error;
10276 }
10277 /* ARGSUSED */
10278 int
10279 ffsctl (proc_t p, struct ffsctl_args *uap, __unused int32_t *retval)
10280 {
10281 int error;
10282 vnode_t vp = NULL;
10283 vfs_context_t ctx = vfs_context_current();
10284 int fd = -1;
10285
10286 AUDIT_ARG(fd, uap->fd);
10287 AUDIT_ARG(cmd, uap->cmd);
10288 AUDIT_ARG(value32, uap->options);
10289
10290 /* Get the vnode for the file we are getting info on: */
10291 if ((error = file_vnode(uap->fd, &vp)))
10292 return error;
10293 fd = uap->fd;
10294 if ((error = vnode_getwithref(vp))) {
10295 file_drop(fd);
10296 return error;
10297 }
10298
10299 #if CONFIG_MACF
10300 if ((error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd))) {
10301 file_drop(fd);
10302 vnode_put(vp);
10303 return error;
10304 }
10305 #endif
10306
10307 error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
10308
10309 file_drop(fd);
10310
10311 /*validate vp; fsctl_internal() can drop iocount and reset vp to NULL*/
10312 if (vp) {
10313 vnode_put(vp);
10314 }
10315
10316 return error;
10317 }
10318 /* end of fsctl system call */
10319
10320 /*
10321 * Retrieve the data of an extended attribute.
10322 */
10323 int
10324 getxattr(proc_t p, struct getxattr_args *uap, user_ssize_t *retval)
10325 {
10326 vnode_t vp;
10327 struct nameidata nd;
10328 char attrname[XATTR_MAXNAMELEN+1];
10329 vfs_context_t ctx = vfs_context_current();
10330 uio_t auio = NULL;
10331 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10332 size_t attrsize = 0;
10333 size_t namelen;
10334 u_int32_t nameiflags;
10335 int error;
10336 char uio_buf[ UIO_SIZEOF(1) ];
10337
10338 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
10339 return (EINVAL);
10340
10341 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
10342 NDINIT(&nd, LOOKUP, OP_GETXATTR, nameiflags, spacetype, uap->path, ctx);
10343 if ((error = namei(&nd))) {
10344 return (error);
10345 }
10346 vp = nd.ni_vp;
10347 nameidone(&nd);
10348
10349 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
10350 goto out;
10351 }
10352 if (xattr_protected(attrname)) {
10353 if (!vfs_context_issuser(ctx) || strcmp(attrname, "com.apple.system.Security") != 0) {
10354 error = EPERM;
10355 goto out;
10356 }
10357 }
10358 /*
10359 * the specific check for 0xffffffff is a hack to preserve
10360 * binaray compatibilty in K64 with applications that discovered
10361 * that passing in a buf pointer and a size of -1 resulted in
10362 * just the size of the indicated extended attribute being returned.
10363 * this isn't part of the documented behavior, but because of the
10364 * original implemtation's check for "uap->size > 0", this behavior
10365 * was allowed. In K32 that check turned into a signed comparison
10366 * even though uap->size is unsigned... in K64, we blow by that
10367 * check because uap->size is unsigned and doesn't get sign smeared
10368 * in the munger for a 32 bit user app. we also need to add a
10369 * check to limit the maximum size of the buffer being passed in...
10370 * unfortunately, the underlying fileystems seem to just malloc
10371 * the requested size even if the actual extended attribute is tiny.
10372 * because that malloc is for kernel wired memory, we have to put a
10373 * sane limit on it.
10374 *
10375 * U32 running on K64 will yield 0x00000000ffffffff for uap->size
10376 * U64 running on K64 will yield -1 (64 bits wide)
10377 * U32/U64 running on K32 will yield -1 (32 bits wide)
10378 */
10379 if (uap->size == 0xffffffff || uap->size == (size_t)-1)
10380 goto no_uio;
10381
10382 if (uap->value) {
10383 if (uap->size > (size_t)XATTR_MAXSIZE)
10384 uap->size = XATTR_MAXSIZE;
10385
10386 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
10387 &uio_buf[0], sizeof(uio_buf));
10388 uio_addiov(auio, uap->value, uap->size);
10389 }
10390 no_uio:
10391 error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, ctx);
10392 out:
10393 vnode_put(vp);
10394
10395 if (auio) {
10396 *retval = uap->size - uio_resid(auio);
10397 } else {
10398 *retval = (user_ssize_t)attrsize;
10399 }
10400
10401 return (error);
10402 }
10403
10404 /*
10405 * Retrieve the data of an extended attribute.
10406 */
10407 int
10408 fgetxattr(proc_t p, struct fgetxattr_args *uap, user_ssize_t *retval)
10409 {
10410 vnode_t vp;
10411 char attrname[XATTR_MAXNAMELEN+1];
10412 uio_t auio = NULL;
10413 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10414 size_t attrsize = 0;
10415 size_t namelen;
10416 int error;
10417 char uio_buf[ UIO_SIZEOF(1) ];
10418
10419 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
10420 return (EINVAL);
10421
10422 if ( (error = file_vnode(uap->fd, &vp)) ) {
10423 return (error);
10424 }
10425 if ( (error = vnode_getwithref(vp)) ) {
10426 file_drop(uap->fd);
10427 return(error);
10428 }
10429 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
10430 goto out;
10431 }
10432 if (xattr_protected(attrname)) {
10433 error = EPERM;
10434 goto out;
10435 }
10436 if (uap->value && uap->size > 0) {
10437 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
10438 &uio_buf[0], sizeof(uio_buf));
10439 uio_addiov(auio, uap->value, uap->size);
10440 }
10441
10442 error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, vfs_context_current());
10443 out:
10444 (void)vnode_put(vp);
10445 file_drop(uap->fd);
10446
10447 if (auio) {
10448 *retval = uap->size - uio_resid(auio);
10449 } else {
10450 *retval = (user_ssize_t)attrsize;
10451 }
10452 return (error);
10453 }
10454
10455 /*
10456 * Set the data of an extended attribute.
10457 */
10458 int
10459 setxattr(proc_t p, struct setxattr_args *uap, int *retval)
10460 {
10461 vnode_t vp;
10462 struct nameidata nd;
10463 char attrname[XATTR_MAXNAMELEN+1];
10464 vfs_context_t ctx = vfs_context_current();
10465 uio_t auio = NULL;
10466 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10467 size_t namelen;
10468 u_int32_t nameiflags;
10469 int error;
10470 char uio_buf[ UIO_SIZEOF(1) ];
10471
10472 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
10473 return (EINVAL);
10474
10475 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
10476 if (error == EPERM) {
10477 /* if the string won't fit in attrname, copyinstr emits EPERM */
10478 return (ENAMETOOLONG);
10479 }
10480 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
10481 return error;
10482 }
10483 if (xattr_protected(attrname))
10484 return(EPERM);
10485 if (uap->size != 0 && uap->value == 0) {
10486 return (EINVAL);
10487 }
10488
10489 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
10490 NDINIT(&nd, LOOKUP, OP_SETXATTR, nameiflags, spacetype, uap->path, ctx);
10491 if ((error = namei(&nd))) {
10492 return (error);
10493 }
10494 vp = nd.ni_vp;
10495 nameidone(&nd);
10496
10497 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
10498 &uio_buf[0], sizeof(uio_buf));
10499 uio_addiov(auio, uap->value, uap->size);
10500
10501 error = vn_setxattr(vp, attrname, auio, uap->options, ctx);
10502 #if CONFIG_FSE
10503 if (error == 0) {
10504 add_fsevent(FSE_XATTR_MODIFIED, ctx,
10505 FSE_ARG_VNODE, vp,
10506 FSE_ARG_DONE);
10507 }
10508 #endif
10509 vnode_put(vp);
10510 *retval = 0;
10511 return (error);
10512 }
10513
10514 /*
10515 * Set the data of an extended attribute.
10516 */
10517 int
10518 fsetxattr(proc_t p, struct fsetxattr_args *uap, int *retval)
10519 {
10520 vnode_t vp;
10521 char attrname[XATTR_MAXNAMELEN+1];
10522 uio_t auio = NULL;
10523 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10524 size_t namelen;
10525 int error;
10526 char uio_buf[ UIO_SIZEOF(1) ];
10527 #if CONFIG_FSE
10528 vfs_context_t ctx = vfs_context_current();
10529 #endif
10530
10531 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
10532 return (EINVAL);
10533
10534 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
10535 if (error == EPERM) {
10536 /* if the string won't fit in attrname, copyinstr emits EPERM */
10537 return (ENAMETOOLONG);
10538 }
10539 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
10540 return error;
10541 }
10542 if (xattr_protected(attrname))
10543 return(EPERM);
10544 if (uap->size != 0 && uap->value == 0) {
10545 return (EINVAL);
10546 }
10547 if ( (error = file_vnode(uap->fd, &vp)) ) {
10548 return (error);
10549 }
10550 if ( (error = vnode_getwithref(vp)) ) {
10551 file_drop(uap->fd);
10552 return(error);
10553 }
10554 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
10555 &uio_buf[0], sizeof(uio_buf));
10556 uio_addiov(auio, uap->value, uap->size);
10557
10558 error = vn_setxattr(vp, attrname, auio, uap->options, vfs_context_current());
10559 #if CONFIG_FSE
10560 if (error == 0) {
10561 add_fsevent(FSE_XATTR_MODIFIED, ctx,
10562 FSE_ARG_VNODE, vp,
10563 FSE_ARG_DONE);
10564 }
10565 #endif
10566 vnode_put(vp);
10567 file_drop(uap->fd);
10568 *retval = 0;
10569 return (error);
10570 }
10571
10572 /*
10573 * Remove an extended attribute.
10574 * XXX Code duplication here.
10575 */
10576 int
10577 removexattr(proc_t p, struct removexattr_args *uap, int *retval)
10578 {
10579 vnode_t vp;
10580 struct nameidata nd;
10581 char attrname[XATTR_MAXNAMELEN+1];
10582 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10583 vfs_context_t ctx = vfs_context_current();
10584 size_t namelen;
10585 u_int32_t nameiflags;
10586 int error;
10587
10588 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
10589 return (EINVAL);
10590
10591 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
10592 if (error != 0) {
10593 return (error);
10594 }
10595 if (xattr_protected(attrname))
10596 return(EPERM);
10597 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
10598 NDINIT(&nd, LOOKUP, OP_REMOVEXATTR, nameiflags, spacetype, uap->path, ctx);
10599 if ((error = namei(&nd))) {
10600 return (error);
10601 }
10602 vp = nd.ni_vp;
10603 nameidone(&nd);
10604
10605 error = vn_removexattr(vp, attrname, uap->options, ctx);
10606 #if CONFIG_FSE
10607 if (error == 0) {
10608 add_fsevent(FSE_XATTR_REMOVED, ctx,
10609 FSE_ARG_VNODE, vp,
10610 FSE_ARG_DONE);
10611 }
10612 #endif
10613 vnode_put(vp);
10614 *retval = 0;
10615 return (error);
10616 }
10617
10618 /*
10619 * Remove an extended attribute.
10620 * XXX Code duplication here.
10621 */
10622 int
10623 fremovexattr(__unused proc_t p, struct fremovexattr_args *uap, int *retval)
10624 {
10625 vnode_t vp;
10626 char attrname[XATTR_MAXNAMELEN+1];
10627 size_t namelen;
10628 int error;
10629 #if CONFIG_FSE
10630 vfs_context_t ctx = vfs_context_current();
10631 #endif
10632
10633 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
10634 return (EINVAL);
10635
10636 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
10637 if (error != 0) {
10638 return (error);
10639 }
10640 if (xattr_protected(attrname))
10641 return(EPERM);
10642 if ( (error = file_vnode(uap->fd, &vp)) ) {
10643 return (error);
10644 }
10645 if ( (error = vnode_getwithref(vp)) ) {
10646 file_drop(uap->fd);
10647 return(error);
10648 }
10649
10650 error = vn_removexattr(vp, attrname, uap->options, vfs_context_current());
10651 #if CONFIG_FSE
10652 if (error == 0) {
10653 add_fsevent(FSE_XATTR_REMOVED, ctx,
10654 FSE_ARG_VNODE, vp,
10655 FSE_ARG_DONE);
10656 }
10657 #endif
10658 vnode_put(vp);
10659 file_drop(uap->fd);
10660 *retval = 0;
10661 return (error);
10662 }
10663
10664 /*
10665 * Retrieve the list of extended attribute names.
10666 * XXX Code duplication here.
10667 */
10668 int
10669 listxattr(proc_t p, struct listxattr_args *uap, user_ssize_t *retval)
10670 {
10671 vnode_t vp;
10672 struct nameidata nd;
10673 vfs_context_t ctx = vfs_context_current();
10674 uio_t auio = NULL;
10675 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10676 size_t attrsize = 0;
10677 u_int32_t nameiflags;
10678 int error;
10679 char uio_buf[ UIO_SIZEOF(1) ];
10680
10681 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
10682 return (EINVAL);
10683
10684 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
10685 NDINIT(&nd, LOOKUP, OP_LISTXATTR, nameiflags, spacetype, uap->path, ctx);
10686 if ((error = namei(&nd))) {
10687 return (error);
10688 }
10689 vp = nd.ni_vp;
10690 nameidone(&nd);
10691 if (uap->namebuf != 0 && uap->bufsize > 0) {
10692 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
10693 &uio_buf[0], sizeof(uio_buf));
10694 uio_addiov(auio, uap->namebuf, uap->bufsize);
10695 }
10696
10697 error = vn_listxattr(vp, auio, &attrsize, uap->options, ctx);
10698
10699 vnode_put(vp);
10700 if (auio) {
10701 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
10702 } else {
10703 *retval = (user_ssize_t)attrsize;
10704 }
10705 return (error);
10706 }
10707
10708 /*
10709 * Retrieve the list of extended attribute names.
10710 * XXX Code duplication here.
10711 */
10712 int
10713 flistxattr(proc_t p, struct flistxattr_args *uap, user_ssize_t *retval)
10714 {
10715 vnode_t vp;
10716 uio_t auio = NULL;
10717 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10718 size_t attrsize = 0;
10719 int error;
10720 char uio_buf[ UIO_SIZEOF(1) ];
10721
10722 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
10723 return (EINVAL);
10724
10725 if ( (error = file_vnode(uap->fd, &vp)) ) {
10726 return (error);
10727 }
10728 if ( (error = vnode_getwithref(vp)) ) {
10729 file_drop(uap->fd);
10730 return(error);
10731 }
10732 if (uap->namebuf != 0 && uap->bufsize > 0) {
10733 auio = uio_createwithbuffer(1, 0, spacetype,
10734 UIO_READ, &uio_buf[0], sizeof(uio_buf));
10735 uio_addiov(auio, uap->namebuf, uap->bufsize);
10736 }
10737
10738 error = vn_listxattr(vp, auio, &attrsize, uap->options, vfs_context_current());
10739
10740 vnode_put(vp);
10741 file_drop(uap->fd);
10742 if (auio) {
10743 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
10744 } else {
10745 *retval = (user_ssize_t)attrsize;
10746 }
10747 return (error);
10748 }
10749
10750 static int fsgetpath_internal(
10751 vfs_context_t ctx, int volfs_id, uint64_t objid,
10752 vm_size_t bufsize, caddr_t buf, int *pathlen)
10753 {
10754 int error;
10755 struct mount *mp = NULL;
10756 vnode_t vp;
10757 int length;
10758 int bpflags;
10759 /* maximum number of times to retry build_path */
10760 unsigned int retries = 0x10;
10761
10762 if (bufsize > PAGE_SIZE) {
10763 return (EINVAL);
10764 }
10765
10766 if (buf == NULL) {
10767 return (ENOMEM);
10768 }
10769
10770 retry:
10771 if ((mp = mount_lookupby_volfsid(volfs_id, 1)) == NULL) {
10772 error = ENOTSUP; /* unexpected failure */
10773 return ENOTSUP;
10774 }
10775
10776 unionget:
10777 if (objid == 2) {
10778 error = VFS_ROOT(mp, &vp, ctx);
10779 } else {
10780 error = VFS_VGET(mp, (ino64_t)objid, &vp, ctx);
10781 }
10782
10783 if (error == ENOENT && (mp->mnt_flag & MNT_UNION)) {
10784 /*
10785 * If the fileid isn't found and we're in a union
10786 * mount volume, then see if the fileid is in the
10787 * mounted-on volume.
10788 */
10789 struct mount *tmp = mp;
10790 mp = vnode_mount(tmp->mnt_vnodecovered);
10791 vfs_unbusy(tmp);
10792 if (vfs_busy(mp, LK_NOWAIT) == 0)
10793 goto unionget;
10794 } else {
10795 vfs_unbusy(mp);
10796 }
10797
10798 if (error) {
10799 return error;
10800 }
10801
10802 #if CONFIG_MACF
10803 error = mac_vnode_check_fsgetpath(ctx, vp);
10804 if (error) {
10805 vnode_put(vp);
10806 return error;
10807 }
10808 #endif
10809
10810 /* Obtain the absolute path to this vnode. */
10811 bpflags = vfs_context_suser(ctx) ? BUILDPATH_CHECKACCESS : 0;
10812 bpflags |= BUILDPATH_CHECK_MOVED;
10813 error = build_path(vp, buf, bufsize, &length, bpflags, ctx);
10814 vnode_put(vp);
10815
10816 if (error) {
10817 /* there was a race building the path, try a few more times */
10818 if (error == EAGAIN) {
10819 --retries;
10820 if (retries > 0)
10821 goto retry;
10822
10823 error = ENOENT;
10824 }
10825 goto out;
10826 }
10827
10828 AUDIT_ARG(text, buf);
10829
10830 if (kdebug_enable) {
10831 long dbg_parms[NUMPARMS];
10832 int dbg_namelen;
10833
10834 dbg_namelen = (int)sizeof(dbg_parms);
10835
10836 if (length < dbg_namelen) {
10837 memcpy((char *)dbg_parms, buf, length);
10838 memset((char *)dbg_parms + length, 0, dbg_namelen - length);
10839
10840 dbg_namelen = length;
10841 } else {
10842 memcpy((char *)dbg_parms, buf + (length - dbg_namelen), dbg_namelen);
10843 }
10844
10845 kdebug_lookup_gen_events(dbg_parms, dbg_namelen, (void *)vp, TRUE);
10846 }
10847
10848 *pathlen = (user_ssize_t)length; /* may be superseded by error */
10849
10850 out:
10851 return (error);
10852 }
10853
10854 /*
10855 * Obtain the full pathname of a file system object by id.
10856 *
10857 * This is a private SPI used by the File Manager.
10858 */
10859 __private_extern__
10860 int
10861 fsgetpath(__unused proc_t p, struct fsgetpath_args *uap, user_ssize_t *retval)
10862 {
10863 vfs_context_t ctx = vfs_context_current();
10864 fsid_t fsid;
10865 char *realpath;
10866 int length;
10867 int error;
10868
10869 if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
10870 return (error);
10871 }
10872 AUDIT_ARG(value32, fsid.val[0]);
10873 AUDIT_ARG(value64, uap->objid);
10874 /* Restrict output buffer size for now. */
10875
10876 if (uap->bufsize > PAGE_SIZE) {
10877 return (EINVAL);
10878 }
10879 MALLOC(realpath, char *, uap->bufsize, M_TEMP, M_WAITOK);
10880 if (realpath == NULL) {
10881 return (ENOMEM);
10882 }
10883
10884 error = fsgetpath_internal(
10885 ctx, fsid.val[0], uap->objid,
10886 uap->bufsize, realpath, &length);
10887
10888 if (error) {
10889 goto out;
10890 }
10891
10892 error = copyout((caddr_t)realpath, uap->buf, length);
10893
10894 *retval = (user_ssize_t)length; /* may be superseded by error */
10895 out:
10896 if (realpath) {
10897 FREE(realpath, M_TEMP);
10898 }
10899 return (error);
10900 }
10901
10902 /*
10903 * Common routine to handle various flavors of statfs data heading out
10904 * to user space.
10905 *
10906 * Returns: 0 Success
10907 * EFAULT
10908 */
10909 static int
10910 munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
10911 user_addr_t bufp, int *sizep, boolean_t is_64_bit,
10912 boolean_t partial_copy)
10913 {
10914 int error;
10915 int my_size, copy_size;
10916
10917 if (is_64_bit) {
10918 struct user64_statfs sfs;
10919 my_size = copy_size = sizeof(sfs);
10920 bzero(&sfs, my_size);
10921 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
10922 sfs.f_type = mp->mnt_vtable->vfc_typenum;
10923 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
10924 sfs.f_bsize = (user64_long_t)sfsp->f_bsize;
10925 sfs.f_iosize = (user64_long_t)sfsp->f_iosize;
10926 sfs.f_blocks = (user64_long_t)sfsp->f_blocks;
10927 sfs.f_bfree = (user64_long_t)sfsp->f_bfree;
10928 sfs.f_bavail = (user64_long_t)sfsp->f_bavail;
10929 sfs.f_files = (user64_long_t)sfsp->f_files;
10930 sfs.f_ffree = (user64_long_t)sfsp->f_ffree;
10931 sfs.f_fsid = sfsp->f_fsid;
10932 sfs.f_owner = sfsp->f_owner;
10933 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
10934 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
10935 } else {
10936 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
10937 }
10938 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
10939 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
10940
10941 if (partial_copy) {
10942 copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
10943 }
10944 error = copyout((caddr_t)&sfs, bufp, copy_size);
10945 }
10946 else {
10947 struct user32_statfs sfs;
10948
10949 my_size = copy_size = sizeof(sfs);
10950 bzero(&sfs, my_size);
10951
10952 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
10953 sfs.f_type = mp->mnt_vtable->vfc_typenum;
10954 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
10955
10956 /*
10957 * It's possible for there to be more than 2^^31 blocks in the filesystem, so we
10958 * have to fudge the numbers here in that case. We inflate the blocksize in order
10959 * to reflect the filesystem size as best we can.
10960 */
10961 if ((sfsp->f_blocks > INT_MAX)
10962 /* Hack for 4061702 . I think the real fix is for Carbon to
10963 * look for some volume capability and not depend on hidden
10964 * semantics agreed between a FS and carbon.
10965 * f_blocks, f_bfree, and f_bavail set to -1 is the trigger
10966 * for Carbon to set bNoVolumeSizes volume attribute.
10967 * Without this the webdavfs files cannot be copied onto
10968 * disk as they look huge. This change should not affect
10969 * XSAN as they should not setting these to -1..
10970 */
10971 && (sfsp->f_blocks != 0xffffffffffffffffULL)
10972 && (sfsp->f_bfree != 0xffffffffffffffffULL)
10973 && (sfsp->f_bavail != 0xffffffffffffffffULL)) {
10974 int shift;
10975
10976 /*
10977 * Work out how far we have to shift the block count down to make it fit.
10978 * Note that it's possible to have to shift so far that the resulting
10979 * blocksize would be unreportably large. At that point, we will clip
10980 * any values that don't fit.
10981 *
10982 * For safety's sake, we also ensure that f_iosize is never reported as
10983 * being smaller than f_bsize.
10984 */
10985 for (shift = 0; shift < 32; shift++) {
10986 if ((sfsp->f_blocks >> shift) <= INT_MAX)
10987 break;
10988 if ((sfsp->f_bsize << (shift + 1)) > INT_MAX)
10989 break;
10990 }
10991 #define __SHIFT_OR_CLIP(x, s) ((((x) >> (s)) > INT_MAX) ? INT_MAX : ((x) >> (s)))
10992 sfs.f_blocks = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_blocks, shift);
10993 sfs.f_bfree = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bfree, shift);
10994 sfs.f_bavail = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bavail, shift);
10995 #undef __SHIFT_OR_CLIP
10996 sfs.f_bsize = (user32_long_t)(sfsp->f_bsize << shift);
10997 sfs.f_iosize = lmax(sfsp->f_iosize, sfsp->f_bsize);
10998 } else {
10999 /* filesystem is small enough to be reported honestly */
11000 sfs.f_bsize = (user32_long_t)sfsp->f_bsize;
11001 sfs.f_iosize = (user32_long_t)sfsp->f_iosize;
11002 sfs.f_blocks = (user32_long_t)sfsp->f_blocks;
11003 sfs.f_bfree = (user32_long_t)sfsp->f_bfree;
11004 sfs.f_bavail = (user32_long_t)sfsp->f_bavail;
11005 }
11006 sfs.f_files = (user32_long_t)sfsp->f_files;
11007 sfs.f_ffree = (user32_long_t)sfsp->f_ffree;
11008 sfs.f_fsid = sfsp->f_fsid;
11009 sfs.f_owner = sfsp->f_owner;
11010 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
11011 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
11012 } else {
11013 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
11014 }
11015 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
11016 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
11017
11018 if (partial_copy) {
11019 copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
11020 }
11021 error = copyout((caddr_t)&sfs, bufp, copy_size);
11022 }
11023
11024 if (sizep != NULL) {
11025 *sizep = my_size;
11026 }
11027 return(error);
11028 }
11029
11030 /*
11031 * copy stat structure into user_stat structure.
11032 */
11033 void munge_user64_stat(struct stat *sbp, struct user64_stat *usbp)
11034 {
11035 bzero(usbp, sizeof(*usbp));
11036
11037 usbp->st_dev = sbp->st_dev;
11038 usbp->st_ino = sbp->st_ino;
11039 usbp->st_mode = sbp->st_mode;
11040 usbp->st_nlink = sbp->st_nlink;
11041 usbp->st_uid = sbp->st_uid;
11042 usbp->st_gid = sbp->st_gid;
11043 usbp->st_rdev = sbp->st_rdev;
11044 #ifndef _POSIX_C_SOURCE
11045 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
11046 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
11047 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
11048 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
11049 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
11050 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
11051 #else
11052 usbp->st_atime = sbp->st_atime;
11053 usbp->st_atimensec = sbp->st_atimensec;
11054 usbp->st_mtime = sbp->st_mtime;
11055 usbp->st_mtimensec = sbp->st_mtimensec;
11056 usbp->st_ctime = sbp->st_ctime;
11057 usbp->st_ctimensec = sbp->st_ctimensec;
11058 #endif
11059 usbp->st_size = sbp->st_size;
11060 usbp->st_blocks = sbp->st_blocks;
11061 usbp->st_blksize = sbp->st_blksize;
11062 usbp->st_flags = sbp->st_flags;
11063 usbp->st_gen = sbp->st_gen;
11064 usbp->st_lspare = sbp->st_lspare;
11065 usbp->st_qspare[0] = sbp->st_qspare[0];
11066 usbp->st_qspare[1] = sbp->st_qspare[1];
11067 }
11068
11069 void munge_user32_stat(struct stat *sbp, struct user32_stat *usbp)
11070 {
11071 bzero(usbp, sizeof(*usbp));
11072
11073 usbp->st_dev = sbp->st_dev;
11074 usbp->st_ino = sbp->st_ino;
11075 usbp->st_mode = sbp->st_mode;
11076 usbp->st_nlink = sbp->st_nlink;
11077 usbp->st_uid = sbp->st_uid;
11078 usbp->st_gid = sbp->st_gid;
11079 usbp->st_rdev = sbp->st_rdev;
11080 #ifndef _POSIX_C_SOURCE
11081 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
11082 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
11083 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
11084 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
11085 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
11086 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
11087 #else
11088 usbp->st_atime = sbp->st_atime;
11089 usbp->st_atimensec = sbp->st_atimensec;
11090 usbp->st_mtime = sbp->st_mtime;
11091 usbp->st_mtimensec = sbp->st_mtimensec;
11092 usbp->st_ctime = sbp->st_ctime;
11093 usbp->st_ctimensec = sbp->st_ctimensec;
11094 #endif
11095 usbp->st_size = sbp->st_size;
11096 usbp->st_blocks = sbp->st_blocks;
11097 usbp->st_blksize = sbp->st_blksize;
11098 usbp->st_flags = sbp->st_flags;
11099 usbp->st_gen = sbp->st_gen;
11100 usbp->st_lspare = sbp->st_lspare;
11101 usbp->st_qspare[0] = sbp->st_qspare[0];
11102 usbp->st_qspare[1] = sbp->st_qspare[1];
11103 }
11104
11105 /*
11106 * copy stat64 structure into user_stat64 structure.
11107 */
11108 void munge_user64_stat64(struct stat64 *sbp, struct user64_stat64 *usbp)
11109 {
11110 bzero(usbp, sizeof(*usbp));
11111
11112 usbp->st_dev = sbp->st_dev;
11113 usbp->st_ino = sbp->st_ino;
11114 usbp->st_mode = sbp->st_mode;
11115 usbp->st_nlink = sbp->st_nlink;
11116 usbp->st_uid = sbp->st_uid;
11117 usbp->st_gid = sbp->st_gid;
11118 usbp->st_rdev = sbp->st_rdev;
11119 #ifndef _POSIX_C_SOURCE
11120 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
11121 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
11122 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
11123 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
11124 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
11125 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
11126 usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
11127 usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
11128 #else
11129 usbp->st_atime = sbp->st_atime;
11130 usbp->st_atimensec = sbp->st_atimensec;
11131 usbp->st_mtime = sbp->st_mtime;
11132 usbp->st_mtimensec = sbp->st_mtimensec;
11133 usbp->st_ctime = sbp->st_ctime;
11134 usbp->st_ctimensec = sbp->st_ctimensec;
11135 usbp->st_birthtime = sbp->st_birthtime;
11136 usbp->st_birthtimensec = sbp->st_birthtimensec;
11137 #endif
11138 usbp->st_size = sbp->st_size;
11139 usbp->st_blocks = sbp->st_blocks;
11140 usbp->st_blksize = sbp->st_blksize;
11141 usbp->st_flags = sbp->st_flags;
11142 usbp->st_gen = sbp->st_gen;
11143 usbp->st_lspare = sbp->st_lspare;
11144 usbp->st_qspare[0] = sbp->st_qspare[0];
11145 usbp->st_qspare[1] = sbp->st_qspare[1];
11146 }
11147
11148 void munge_user32_stat64(struct stat64 *sbp, struct user32_stat64 *usbp)
11149 {
11150 bzero(usbp, sizeof(*usbp));
11151
11152 usbp->st_dev = sbp->st_dev;
11153 usbp->st_ino = sbp->st_ino;
11154 usbp->st_mode = sbp->st_mode;
11155 usbp->st_nlink = sbp->st_nlink;
11156 usbp->st_uid = sbp->st_uid;
11157 usbp->st_gid = sbp->st_gid;
11158 usbp->st_rdev = sbp->st_rdev;
11159 #ifndef _POSIX_C_SOURCE
11160 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
11161 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
11162 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
11163 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
11164 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
11165 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
11166 usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
11167 usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
11168 #else
11169 usbp->st_atime = sbp->st_atime;
11170 usbp->st_atimensec = sbp->st_atimensec;
11171 usbp->st_mtime = sbp->st_mtime;
11172 usbp->st_mtimensec = sbp->st_mtimensec;
11173 usbp->st_ctime = sbp->st_ctime;
11174 usbp->st_ctimensec = sbp->st_ctimensec;
11175 usbp->st_birthtime = sbp->st_birthtime;
11176 usbp->st_birthtimensec = sbp->st_birthtimensec;
11177 #endif
11178 usbp->st_size = sbp->st_size;
11179 usbp->st_blocks = sbp->st_blocks;
11180 usbp->st_blksize = sbp->st_blksize;
11181 usbp->st_flags = sbp->st_flags;
11182 usbp->st_gen = sbp->st_gen;
11183 usbp->st_lspare = sbp->st_lspare;
11184 usbp->st_qspare[0] = sbp->st_qspare[0];
11185 usbp->st_qspare[1] = sbp->st_qspare[1];
11186 }
11187
11188 /*
11189 * Purge buffer cache for simulating cold starts
11190 */
11191 static int vnode_purge_callback(struct vnode *vp, __unused void *cargs)
11192 {
11193 ubc_msync(vp, (off_t)0, ubc_getsize(vp), NULL /* off_t *resid_off */, UBC_PUSHALL | UBC_INVALIDATE);
11194
11195 return VNODE_RETURNED;
11196 }
11197
11198 static int vfs_purge_callback(mount_t mp, __unused void * arg)
11199 {
11200 vnode_iterate(mp, VNODE_WAIT | VNODE_ITERATE_ALL, vnode_purge_callback, NULL);
11201
11202 return VFS_RETURNED;
11203 }
11204
11205 int
11206 vfs_purge(__unused struct proc *p, __unused struct vfs_purge_args *uap, __unused int32_t *retval)
11207 {
11208 if (!kauth_cred_issuser(kauth_cred_get()))
11209 return EPERM;
11210
11211 vfs_iterate(0/* flags */, vfs_purge_callback, NULL);
11212
11213 return 0;
11214 }
11215
11216 /*
11217 * gets the vnode associated with the (unnamed) snapshot directory
11218 * for a Filesystem. The snapshot directory vnode is returned with
11219 * an iocount on it.
11220 */
11221 int
11222 vnode_get_snapdir(vnode_t rvp, vnode_t *sdvpp, vfs_context_t ctx)
11223 {
11224 return (VFS_VGET_SNAPDIR(vnode_mount(rvp), sdvpp, ctx));
11225 }
11226
11227 /*
11228 * Get the snapshot vnode.
11229 *
11230 * If successful, the call returns with an iocount on *rvpp ,*sdvpp and
11231 * needs nameidone() on ndp.
11232 *
11233 * If the snapshot vnode exists it is returned in ndp->ni_vp.
11234 *
11235 * If it returns with an error, *rvpp, *sdvpp are NULL and nameidone() is
11236 * not needed.
11237 */
11238 static int
11239 vnode_get_snapshot(int dirfd, vnode_t *rvpp, vnode_t *sdvpp,
11240 user_addr_t name, struct nameidata *ndp, int32_t op,
11241 #if !CONFIG_TRIGGERS
11242 __unused
11243 #endif
11244 enum path_operation pathop,
11245 vfs_context_t ctx)
11246 {
11247 int error, i;
11248 caddr_t name_buf;
11249 size_t name_len;
11250 struct vfs_attr vfa;
11251
11252 *sdvpp = NULLVP;
11253 *rvpp = NULLVP;
11254
11255 error = vnode_getfromfd(ctx, dirfd, rvpp);
11256 if (error)
11257 return (error);
11258
11259 if (!vnode_isvroot(*rvpp)) {
11260 error = EINVAL;
11261 goto out;
11262 }
11263
11264 /* Make sure the filesystem supports snapshots */
11265 VFSATTR_INIT(&vfa);
11266 VFSATTR_WANTED(&vfa, f_capabilities);
11267 if ((vfs_getattr(vnode_mount(*rvpp), &vfa, ctx) != 0) ||
11268 !VFSATTR_IS_SUPPORTED(&vfa, f_capabilities) ||
11269 !((vfa.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] &
11270 VOL_CAP_INT_SNAPSHOT)) ||
11271 !((vfa.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] &
11272 VOL_CAP_INT_SNAPSHOT))) {
11273 error = ENOTSUP;
11274 goto out;
11275 }
11276
11277 error = vnode_get_snapdir(*rvpp, sdvpp, ctx);
11278 if (error)
11279 goto out;
11280
11281 MALLOC(name_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
11282 error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
11283 if (error)
11284 goto out1;
11285
11286 /*
11287 * Some sanity checks- name can't be empty, "." or ".." or have slashes.
11288 * (the length returned by copyinstr includes the terminating NUL)
11289 */
11290 if ((name_len == 1) || (name_len == 2 && name_buf[0] == '.') ||
11291 (name_len == 3 && name_buf[0] == '.' && name_buf[1] == '.')) {
11292 error = EINVAL;
11293 goto out1;
11294 }
11295 for (i = 0; i < (int)name_len && name_buf[i] != '/'; i++);
11296 if (i < (int)name_len) {
11297 error = EINVAL;
11298 goto out1;
11299 }
11300
11301 #if CONFIG_MACF
11302 if (op == CREATE) {
11303 error = mac_mount_check_snapshot_create(ctx, vnode_mount(*rvpp),
11304 name_buf);
11305 } else if (op == DELETE) {
11306 error = mac_mount_check_snapshot_delete(ctx, vnode_mount(*rvpp),
11307 name_buf);
11308 }
11309 if (error)
11310 goto out1;
11311 #endif
11312
11313 /* Check if the snapshot already exists ... */
11314 NDINIT(ndp, op, pathop, USEDVP | NOCACHE | AUDITVNPATH1,
11315 UIO_SYSSPACE, CAST_USER_ADDR_T(name_buf), ctx);
11316 ndp->ni_dvp = *sdvpp;
11317
11318 error = namei(ndp);
11319 out1:
11320 FREE(name_buf, M_TEMP);
11321 out:
11322 if (error) {
11323 if (*sdvpp) {
11324 vnode_put(*sdvpp);
11325 *sdvpp = NULLVP;
11326 }
11327 if (*rvpp) {
11328 vnode_put(*rvpp);
11329 *rvpp = NULLVP;
11330 }
11331 }
11332 return (error);
11333 }
11334
11335 /*
11336 * create a filesystem snapshot (for supporting filesystems)
11337 *
11338 * A much simplified version of openat(dirfd, name, O_CREAT | O_EXCL)
11339 * We get to the (unnamed) snapshot directory vnode and create the vnode
11340 * for the snapshot in it.
11341 *
11342 * Restrictions:
11343 *
11344 * a) Passed in name for snapshot cannot have slashes.
11345 * b) name can't be "." or ".."
11346 *
11347 * Since this requires superuser privileges, vnode_authorize calls are not
11348 * made.
11349 */
11350 static int
11351 snapshot_create(int dirfd, user_addr_t name, __unused uint32_t flags,
11352 vfs_context_t ctx)
11353 {
11354 vnode_t rvp, snapdvp;
11355 int error;
11356 struct nameidata namend;
11357
11358 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, CREATE,
11359 OP_LINK, ctx);
11360 if (error)
11361 return (error);
11362
11363 if (namend.ni_vp) {
11364 vnode_put(namend.ni_vp);
11365 error = EEXIST;
11366 } else {
11367 struct vnode_attr va;
11368 vnode_t vp = NULLVP;
11369
11370 VATTR_INIT(&va);
11371 VATTR_SET(&va, va_type, VREG);
11372 VATTR_SET(&va, va_mode, 0);
11373
11374 error = vn_create(snapdvp, &vp, &namend, &va,
11375 VN_CREATE_NOAUTH | VN_CREATE_NOINHERIT, 0, NULL, ctx);
11376 if (!error && vp)
11377 vnode_put(vp);
11378 }
11379
11380 nameidone(&namend);
11381 vnode_put(snapdvp);
11382 vnode_put(rvp);
11383 return (error);
11384 }
11385
11386 /*
11387 * Delete a Filesystem snapshot
11388 *
11389 * get the vnode for the unnamed snapshot directory and the snapshot and
11390 * delete the snapshot.
11391 */
11392 static int
11393 snapshot_delete(int dirfd, user_addr_t name, __unused uint32_t flags,
11394 vfs_context_t ctx)
11395 {
11396 vnode_t rvp, snapdvp;
11397 int error;
11398 struct nameidata namend;
11399
11400 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, DELETE,
11401 OP_UNLINK, ctx);
11402 if (error)
11403 goto out;
11404
11405 error = VNOP_REMOVE(snapdvp, namend.ni_vp, &namend.ni_cnd,
11406 VNODE_REMOVE_SKIP_NAMESPACE_EVENT, ctx);
11407
11408 vnode_put(namend.ni_vp);
11409 nameidone(&namend);
11410 vnode_put(snapdvp);
11411 vnode_put(rvp);
11412 out:
11413 return (error);
11414 }
11415
11416 /*
11417 * Revert a filesystem to a snapshot
11418 *
11419 * Marks the filesystem to revert to the given snapshot on next mount.
11420 */
11421 static int
11422 snapshot_revert(int dirfd, user_addr_t name, __unused uint32_t flags,
11423 vfs_context_t ctx)
11424 {
11425 int error;
11426 vnode_t rvp;
11427 mount_t mp;
11428 struct fs_snapshot_revert_args revert_data;
11429 struct componentname cnp;
11430 caddr_t name_buf;
11431 size_t name_len;
11432
11433 error = vnode_getfromfd(ctx, dirfd, &rvp);
11434 if (error) {
11435 return (error);
11436 }
11437 mp = vnode_mount(rvp);
11438
11439 MALLOC(name_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
11440 error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
11441 if (error) {
11442 FREE(name_buf, M_TEMP);
11443 vnode_put(rvp);
11444 return (error);
11445 }
11446
11447 #if CONFIG_MACF
11448 error = mac_mount_check_snapshot_revert(ctx, mp, name_buf);
11449 if (error) {
11450 FREE(name_buf, M_TEMP);
11451 vnode_put(rvp);
11452 return (error);
11453 }
11454 #endif
11455
11456 /*
11457 * Grab mount_iterref so that we can release the vnode,
11458 * since VFSIOC_REVERT_SNAPSHOT could conceivably cause a sync.
11459 */
11460 error = mount_iterref (mp, 0);
11461 vnode_put(rvp);
11462 if (error) {
11463 FREE(name_buf, M_TEMP);
11464 return (error);
11465 }
11466
11467 memset(&cnp, 0, sizeof(cnp));
11468 cnp.cn_pnbuf = (char *)name_buf;
11469 cnp.cn_nameiop = LOOKUP;
11470 cnp.cn_flags = ISLASTCN | HASBUF;
11471 cnp.cn_pnlen = MAXPATHLEN;
11472 cnp.cn_nameptr = cnp.cn_pnbuf;
11473 cnp.cn_namelen = (int)name_len;
11474 revert_data.sr_cnp = &cnp;
11475
11476 error = VFS_IOCTL(mp, VFSIOC_REVERT_SNAPSHOT, (caddr_t)&revert_data, 0, ctx);
11477 mount_iterdrop(mp);
11478 FREE(name_buf, M_TEMP);
11479
11480 if (error) {
11481 /* If there was any error, try again using VNOP_IOCTL */
11482
11483 vnode_t snapdvp;
11484 struct nameidata namend;
11485
11486 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, LOOKUP,
11487 OP_LOOKUP, ctx);
11488 if (error) {
11489 return (error);
11490 }
11491
11492
11493 #ifndef APFSIOC_REVERT_TO_SNAPSHOT
11494 #define APFSIOC_REVERT_TO_SNAPSHOT _IOW('J', 1, u_int64_t)
11495 #endif
11496
11497 #ifndef APFS_REVERT_TO_SNAPSHOT
11498 #define APFS_REVERT_TO_SNAPSHOT IOCBASECMD(APFSIOC_REVERT_TO_SNAPSHOT)
11499 #endif
11500
11501 error = VNOP_IOCTL(namend.ni_vp, APFS_REVERT_TO_SNAPSHOT, (caddr_t) NULL,
11502 0, ctx);
11503
11504 vnode_put(namend.ni_vp);
11505 nameidone(&namend);
11506 vnode_put(snapdvp);
11507 vnode_put(rvp);
11508 }
11509
11510 return (error);
11511 }
11512
11513 /*
11514 * rename a Filesystem snapshot
11515 *
11516 * get the vnode for the unnamed snapshot directory and the snapshot and
11517 * rename the snapshot. This is a very specialised (and simple) case of
11518 * rename(2) (which has to deal with a lot more complications). It differs
11519 * slightly from rename(2) in that EEXIST is returned if the new name exists.
11520 */
11521 static int
11522 snapshot_rename(int dirfd, user_addr_t old, user_addr_t new,
11523 __unused uint32_t flags, vfs_context_t ctx)
11524 {
11525 vnode_t rvp, snapdvp;
11526 int error, i;
11527 caddr_t newname_buf;
11528 size_t name_len;
11529 vnode_t fvp;
11530 struct nameidata *fromnd, *tond;
11531 /* carving out a chunk for structs that are too big to be on stack. */
11532 struct {
11533 struct nameidata from_node;
11534 struct nameidata to_node;
11535 } * __rename_data;
11536
11537 MALLOC(__rename_data, void *, sizeof(*__rename_data), M_TEMP, M_WAITOK);
11538 fromnd = &__rename_data->from_node;
11539 tond = &__rename_data->to_node;
11540
11541 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, old, fromnd, DELETE,
11542 OP_UNLINK, ctx);
11543 if (error)
11544 goto out;
11545 fvp = fromnd->ni_vp;
11546
11547 MALLOC(newname_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
11548 error = copyinstr(new, newname_buf, MAXPATHLEN, &name_len);
11549 if (error)
11550 goto out1;
11551
11552 /*
11553 * Some sanity checks- new name can't be empty, "." or ".." or have
11554 * slashes.
11555 * (the length returned by copyinstr includes the terminating NUL)
11556 *
11557 * The FS rename VNOP is suppossed to handle this but we'll pick it
11558 * off here itself.
11559 */
11560 if ((name_len == 1) || (name_len == 2 && newname_buf[0] == '.') ||
11561 (name_len == 3 && newname_buf[0] == '.' && newname_buf[1] == '.')) {
11562 error = EINVAL;
11563 goto out1;
11564 }
11565 for (i = 0; i < (int)name_len && newname_buf[i] != '/'; i++);
11566 if (i < (int)name_len) {
11567 error = EINVAL;
11568 goto out1;
11569 }
11570
11571 #if CONFIG_MACF
11572 error = mac_mount_check_snapshot_create(ctx, vnode_mount(rvp),
11573 newname_buf);
11574 if (error)
11575 goto out1;
11576 #endif
11577
11578 NDINIT(tond, RENAME, OP_RENAME, USEDVP | NOCACHE | AUDITVNPATH2,
11579 UIO_SYSSPACE, CAST_USER_ADDR_T(newname_buf), ctx);
11580 tond->ni_dvp = snapdvp;
11581
11582 error = namei(tond);
11583 if (error) {
11584 goto out2;
11585 } else if (tond->ni_vp) {
11586 /*
11587 * snapshot rename behaves differently than rename(2) - if the
11588 * new name exists, EEXIST is returned.
11589 */
11590 vnode_put(tond->ni_vp);
11591 error = EEXIST;
11592 goto out2;
11593 }
11594
11595 error = VNOP_RENAME(snapdvp, fvp, &fromnd->ni_cnd, snapdvp, NULLVP,
11596 &tond->ni_cnd, ctx);
11597
11598 out2:
11599 nameidone(tond);
11600 out1:
11601 FREE(newname_buf, M_TEMP);
11602 vnode_put(fvp);
11603 vnode_put(snapdvp);
11604 vnode_put(rvp);
11605 nameidone(fromnd);
11606 out:
11607 FREE(__rename_data, M_TEMP);
11608 return (error);
11609 }
11610
11611 /*
11612 * Mount a Filesystem snapshot
11613 *
11614 * get the vnode for the unnamed snapshot directory and the snapshot and
11615 * mount the snapshot.
11616 */
11617 static int
11618 snapshot_mount(int dirfd, user_addr_t name, user_addr_t directory,
11619 __unused user_addr_t mnt_data, __unused uint32_t flags, vfs_context_t ctx)
11620 {
11621 vnode_t rvp, snapdvp, snapvp, vp, pvp;
11622 int error;
11623 struct nameidata *snapndp, *dirndp;
11624 /* carving out a chunk for structs that are too big to be on stack. */
11625 struct {
11626 struct nameidata snapnd;
11627 struct nameidata dirnd;
11628 } * __snapshot_mount_data;
11629
11630 MALLOC(__snapshot_mount_data, void *, sizeof(*__snapshot_mount_data),
11631 M_TEMP, M_WAITOK);
11632 snapndp = &__snapshot_mount_data->snapnd;
11633 dirndp = &__snapshot_mount_data->dirnd;
11634
11635 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, snapndp, LOOKUP,
11636 OP_LOOKUP, ctx);
11637 if (error)
11638 goto out;
11639
11640 snapvp = snapndp->ni_vp;
11641 if (!vnode_mount(rvp) || (vnode_mount(rvp) == dead_mountp)) {
11642 error = EIO;
11643 goto out1;
11644 }
11645
11646 /* Get the vnode to be covered */
11647 NDINIT(dirndp, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
11648 UIO_USERSPACE, directory, ctx);
11649 error = namei(dirndp);
11650 if (error)
11651 goto out1;
11652
11653 vp = dirndp->ni_vp;
11654 pvp = dirndp->ni_dvp;
11655
11656 if ((vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_ROOTFS)) {
11657 error = EINVAL;
11658 } else {
11659 mount_t mp = vnode_mount(rvp);
11660 struct fs_snapshot_mount_args smnt_data;
11661
11662 smnt_data.sm_mp = mp;
11663 smnt_data.sm_cnp = &snapndp->ni_cnd;
11664 error = mount_common(mp->mnt_vfsstat.f_fstypename, pvp, vp,
11665 &dirndp->ni_cnd, CAST_USER_ADDR_T(&smnt_data), 0,
11666 KERNEL_MOUNT_SNAPSHOT, NULL, FALSE, ctx);
11667 }
11668
11669 vnode_put(vp);
11670 vnode_put(pvp);
11671 nameidone(dirndp);
11672 out1:
11673 vnode_put(snapvp);
11674 vnode_put(snapdvp);
11675 vnode_put(rvp);
11676 nameidone(snapndp);
11677 out:
11678 FREE(__snapshot_mount_data, M_TEMP);
11679 return (error);
11680 }
11681
11682 /*
11683 * Root from a snapshot of the filesystem
11684 *
11685 * Marks the filesystem to root from the given snapshot on next boot.
11686 */
11687 static int
11688 snapshot_root(int dirfd, user_addr_t name, __unused uint32_t flags,
11689 vfs_context_t ctx)
11690 {
11691 int error;
11692 vnode_t rvp;
11693 mount_t mp;
11694 struct fs_snapshot_root_args root_data;
11695 struct componentname cnp;
11696 caddr_t name_buf;
11697 size_t name_len;
11698
11699 error = vnode_getfromfd(ctx, dirfd, &rvp);
11700 if (error) {
11701 return (error);
11702 }
11703 mp = vnode_mount(rvp);
11704
11705 MALLOC(name_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
11706 error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
11707 if (error) {
11708 FREE(name_buf, M_TEMP);
11709 vnode_put(rvp);
11710 return (error);
11711 }
11712
11713 // XXX MAC checks ?
11714
11715 /*
11716 * Grab mount_iterref so that we can release the vnode,
11717 * since VFSIOC_ROOT_SNAPSHOT could conceivably cause a sync.
11718 */
11719 error = mount_iterref (mp, 0);
11720 vnode_put(rvp);
11721 if (error) {
11722 FREE(name_buf, M_TEMP);
11723 return (error);
11724 }
11725
11726 memset(&cnp, 0, sizeof(cnp));
11727 cnp.cn_pnbuf = (char *)name_buf;
11728 cnp.cn_nameiop = LOOKUP;
11729 cnp.cn_flags = ISLASTCN | HASBUF;
11730 cnp.cn_pnlen = MAXPATHLEN;
11731 cnp.cn_nameptr = cnp.cn_pnbuf;
11732 cnp.cn_namelen = (int)name_len;
11733 root_data.sr_cnp = &cnp;
11734
11735 error = VFS_IOCTL(mp, VFSIOC_ROOT_SNAPSHOT, (caddr_t)&root_data, 0, ctx);
11736
11737 mount_iterdrop(mp);
11738 FREE(name_buf, M_TEMP);
11739
11740 return (error);
11741 }
11742
11743 /*
11744 * FS snapshot operations dispatcher
11745 */
11746 int
11747 fs_snapshot(__unused proc_t p, struct fs_snapshot_args *uap,
11748 __unused int32_t *retval)
11749 {
11750 int error;
11751 vfs_context_t ctx = vfs_context_current();
11752
11753 AUDIT_ARG(fd, uap->dirfd);
11754 AUDIT_ARG(value32, uap->op);
11755
11756 error = priv_check_cred(vfs_context_ucred(ctx), PRIV_VFS_SNAPSHOT, 0);
11757 if (error)
11758 return (error);
11759
11760 switch (uap->op) {
11761 case SNAPSHOT_OP_CREATE:
11762 error = snapshot_create(uap->dirfd, uap->name1, uap->flags, ctx);
11763 break;
11764 case SNAPSHOT_OP_DELETE:
11765 error = snapshot_delete(uap->dirfd, uap->name1, uap->flags, ctx);
11766 break;
11767 case SNAPSHOT_OP_RENAME:
11768 error = snapshot_rename(uap->dirfd, uap->name1, uap->name2,
11769 uap->flags, ctx);
11770 break;
11771 case SNAPSHOT_OP_MOUNT:
11772 error = snapshot_mount(uap->dirfd, uap->name1, uap->name2,
11773 uap->data, uap->flags, ctx);
11774 break;
11775 case SNAPSHOT_OP_REVERT:
11776 error = snapshot_revert(uap->dirfd, uap->name1, uap->flags, ctx);
11777 break;
11778 case SNAPSHOT_OP_ROOT:
11779 error = snapshot_root(uap->dirfd, uap->name1, uap->flags, ctx);
11780 break;
11781 default:
11782 error = ENOSYS;
11783 }
11784
11785 return (error);
11786 }