]> git.saurik.com Git - apple/xnu.git/blob - bsd/vfs/vfs_syscalls.c
xnu-2422.110.17.tar.gz
[apple/xnu.git] / bsd / vfs / vfs_syscalls.c
1 /*
2 * Copyright (c) 1995-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1989, 1993
30 * The Regents of the University of California. All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)vfs_syscalls.c 8.41 (Berkeley) 6/15/95
66 */
67 /*
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
71 * Version 2.0.
72 */
73
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/namei.h>
77 #include <sys/filedesc.h>
78 #include <sys/kernel.h>
79 #include <sys/file_internal.h>
80 #include <sys/stat.h>
81 #include <sys/vnode_internal.h>
82 #include <sys/mount_internal.h>
83 #include <sys/proc_internal.h>
84 #include <sys/kauth.h>
85 #include <sys/uio_internal.h>
86 #include <sys/malloc.h>
87 #include <sys/mman.h>
88 #include <sys/dirent.h>
89 #include <sys/attr.h>
90 #include <sys/sysctl.h>
91 #include <sys/ubc.h>
92 #include <sys/quota.h>
93 #include <sys/kdebug.h>
94 #include <sys/fsevents.h>
95 #include <sys/imgsrc.h>
96 #include <sys/sysproto.h>
97 #include <sys/xattr.h>
98 #include <sys/fcntl.h>
99 #include <sys/fsctl.h>
100 #include <sys/ubc_internal.h>
101 #include <sys/disk.h>
102 #include <machine/cons.h>
103 #include <machine/limits.h>
104 #include <miscfs/specfs/specdev.h>
105
106 #include <security/audit/audit.h>
107 #include <bsm/audit_kevents.h>
108
109 #include <mach/mach_types.h>
110 #include <kern/kern_types.h>
111 #include <kern/kalloc.h>
112 #include <kern/task.h>
113
114 #include <vm/vm_pageout.h>
115
116 #include <libkern/OSAtomic.h>
117 #include <pexpert/pexpert.h>
118
119 #if CONFIG_MACF
120 #include <security/mac.h>
121 #include <security/mac_framework.h>
122 #endif
123
124 #if CONFIG_FSE
125 #define GET_PATH(x) \
126 (x) = get_pathbuff();
127 #define RELEASE_PATH(x) \
128 release_pathbuff(x);
129 #else
130 #define GET_PATH(x) \
131 MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
132 #define RELEASE_PATH(x) \
133 FREE_ZONE((x), MAXPATHLEN, M_NAMEI);
134 #endif /* CONFIG_FSE */
135
136 /* struct for checkdirs iteration */
137 struct cdirargs {
138 vnode_t olddp;
139 vnode_t newdp;
140 };
141 /* callback for checkdirs iteration */
142 static int checkdirs_callback(proc_t p, void * arg);
143
144 static int change_dir(struct nameidata *ndp, vfs_context_t ctx);
145 static int checkdirs(vnode_t olddp, vfs_context_t ctx);
146 void enablequotas(struct mount *mp, vfs_context_t ctx);
147 static int getfsstat_callback(mount_t mp, void * arg);
148 static int getutimes(user_addr_t usrtvp, struct timespec *tsp);
149 static int setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts, int nullflag);
150 static int sync_callback(mount_t, void *);
151 static int munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
152 user_addr_t bufp, int *sizep, boolean_t is_64_bit,
153 boolean_t partial_copy);
154 static int statfs64_common(struct mount *mp, struct vfsstatfs *sfsp,
155 user_addr_t bufp);
156 static int fsync_common(proc_t p, struct fsync_args *uap, int flags);
157 static int mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
158 struct componentname *cnp, user_addr_t fsmountargs,
159 int flags, uint32_t internal_flags, char *labelstr, boolean_t kernelmount,
160 vfs_context_t ctx);
161 void vfs_notify_mount(vnode_t pdvp);
162
163 int prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth);
164
165 #ifdef CONFIG_IMGSRC_ACCESS
166 static int authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx);
167 static int place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx);
168 static void undo_place_on_covered_vp(mount_t mp, vnode_t vp);
169 static int mount_begin_update(mount_t mp, vfs_context_t ctx, int flags);
170 static void mount_end_update(mount_t mp);
171 static int relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp, const char *fsname, vfs_context_t ctx, boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index);
172 #endif /* CONFIG_IMGSRC_ACCESS */
173
174 int (*union_dircheckp)(struct vnode **, struct fileproc *, vfs_context_t);
175
176 __private_extern__
177 int sync_internal(void);
178
179 __private_extern__
180 int unlink1(vfs_context_t, struct nameidata *, int);
181
182 /*
183 * incremented each time a mount or unmount operation occurs
184 * used to invalidate the cached value of the rootvp in the
185 * mount structure utilized by cache_lookup_path
186 */
187 uint32_t mount_generation = 0;
188
189 /* counts number of mount and unmount operations */
190 unsigned int vfs_nummntops=0;
191
192 extern const struct fileops vnops;
193 #if CONFIG_APPLEDOUBLE
194 extern errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *);
195 #endif /* CONFIG_APPLEDOUBLE */
196
197 /*
198 * Virtual File System System Calls
199 */
200
201 #if NFSCLIENT
202 /*
203 * Private in-kernel mounting spi (NFS only, not exported)
204 */
205 __private_extern__
206 boolean_t
207 vfs_iskernelmount(mount_t mp)
208 {
209 return ((mp->mnt_kern_flag & MNTK_KERNEL_MOUNT) ? TRUE : FALSE);
210 }
211
212 __private_extern__
213 int
214 kernel_mount(char *fstype, vnode_t pvp, vnode_t vp, const char *path,
215 void *data, __unused size_t datalen, int syscall_flags, __unused uint32_t kern_flags, vfs_context_t ctx)
216 {
217 struct nameidata nd;
218 boolean_t did_namei;
219 int error;
220
221 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
222 UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx);
223
224 /*
225 * Get the vnode to be covered if it's not supplied
226 */
227 if (vp == NULLVP) {
228 error = namei(&nd);
229 if (error)
230 return (error);
231 vp = nd.ni_vp;
232 pvp = nd.ni_dvp;
233 did_namei = TRUE;
234 } else {
235 char *pnbuf = CAST_DOWN(char *, path);
236
237 nd.ni_cnd.cn_pnbuf = pnbuf;
238 nd.ni_cnd.cn_pnlen = strlen(pnbuf) + 1;
239 did_namei = FALSE;
240 }
241
242 error = mount_common(fstype, pvp, vp, &nd.ni_cnd, CAST_USER_ADDR_T(data),
243 syscall_flags, kern_flags, NULL, TRUE, ctx);
244
245 if (did_namei) {
246 vnode_put(vp);
247 vnode_put(pvp);
248 nameidone(&nd);
249 }
250
251 return (error);
252 }
253 #endif /* NFSCLIENT */
254
255 /*
256 * Mount a file system.
257 */
258 /* ARGSUSED */
259 int
260 mount(proc_t p, struct mount_args *uap, __unused int32_t *retval)
261 {
262 struct __mac_mount_args muap;
263
264 muap.type = uap->type;
265 muap.path = uap->path;
266 muap.flags = uap->flags;
267 muap.data = uap->data;
268 muap.mac_p = USER_ADDR_NULL;
269 return (__mac_mount(p, &muap, retval));
270 }
271
272 void
273 vfs_notify_mount(vnode_t pdvp)
274 {
275 vfs_event_signal(NULL, VQ_MOUNT, (intptr_t)NULL);
276 lock_vnode_and_post(pdvp, NOTE_WRITE);
277 }
278
279 /*
280 * __mac_mount:
281 * Mount a file system taking into account MAC label behavior.
282 * See mount(2) man page for more information
283 *
284 * Parameters: p Process requesting the mount
285 * uap User argument descriptor (see below)
286 * retval (ignored)
287 *
288 * Indirect: uap->type Filesystem type
289 * uap->path Path to mount
290 * uap->data Mount arguments
291 * uap->mac_p MAC info
292 * uap->flags Mount flags
293 *
294 *
295 * Returns: 0 Success
296 * !0 Not success
297 */
298 boolean_t root_fs_upgrade_try = FALSE;
299
300 int
301 __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int32_t *retval)
302 {
303 vnode_t pvp = NULL;
304 vnode_t vp = NULL;
305 int need_nameidone = 0;
306 vfs_context_t ctx = vfs_context_current();
307 char fstypename[MFSNAMELEN];
308 struct nameidata nd;
309 size_t dummy=0;
310 char *labelstr = NULL;
311 int flags = uap->flags;
312 int error;
313 #if CONFIG_IMGSRC_ACCESS || CONFIG_MACF
314 boolean_t is_64bit = IS_64BIT_PROCESS(p);
315 #else
316 #pragma unused(p)
317 #endif
318 /*
319 * Get the fs type name from user space
320 */
321 error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy);
322 if (error)
323 return (error);
324
325 /*
326 * Get the vnode to be covered
327 */
328 NDINIT(&nd, LOOKUP, OP_MOUNT, NOTRIGGER | FOLLOW | AUDITVNPATH1 | WANTPARENT,
329 UIO_USERSPACE, uap->path, ctx);
330 error = namei(&nd);
331 if (error) {
332 goto out;
333 }
334 need_nameidone = 1;
335 vp = nd.ni_vp;
336 pvp = nd.ni_dvp;
337
338 #ifdef CONFIG_IMGSRC_ACCESS
339 /* Mounting image source cannot be batched with other operations */
340 if (flags == MNT_IMGSRC_BY_INDEX) {
341 error = relocate_imageboot_source(pvp, vp, &nd.ni_cnd, fstypename,
342 ctx, is_64bit, uap->data, (flags == MNT_IMGSRC_BY_INDEX));
343 goto out;
344 }
345 #endif /* CONFIG_IMGSRC_ACCESS */
346
347 #if CONFIG_MACF
348 /*
349 * Get the label string (if any) from user space
350 */
351 if (uap->mac_p != USER_ADDR_NULL) {
352 struct user_mac mac;
353 size_t ulen = 0;
354
355 if (is_64bit) {
356 struct user64_mac mac64;
357 error = copyin(uap->mac_p, &mac64, sizeof(mac64));
358 mac.m_buflen = mac64.m_buflen;
359 mac.m_string = mac64.m_string;
360 } else {
361 struct user32_mac mac32;
362 error = copyin(uap->mac_p, &mac32, sizeof(mac32));
363 mac.m_buflen = mac32.m_buflen;
364 mac.m_string = mac32.m_string;
365 }
366 if (error)
367 goto out;
368 if ((mac.m_buflen > MAC_MAX_LABEL_BUF_LEN) ||
369 (mac.m_buflen < 2)) {
370 error = EINVAL;
371 goto out;
372 }
373 MALLOC(labelstr, char *, mac.m_buflen, M_MACTEMP, M_WAITOK);
374 error = copyinstr(mac.m_string, labelstr, mac.m_buflen, &ulen);
375 if (error) {
376 goto out;
377 }
378 AUDIT_ARG(mac_string, labelstr);
379 }
380 #endif /* CONFIG_MACF */
381
382 AUDIT_ARG(fflags, flags);
383
384 if ((vp->v_flag & VROOT) &&
385 (vp->v_mount->mnt_flag & MNT_ROOTFS)) {
386 if (!(flags & MNT_UNION)) {
387 flags |= MNT_UPDATE;
388 }
389 else {
390 /*
391 * For a union mount on '/', treat it as fresh
392 * mount instead of update.
393 * Otherwise, union mouting on '/' used to panic the
394 * system before, since mnt_vnodecovered was found to
395 * be NULL for '/' which is required for unionlookup
396 * after it gets ENOENT on union mount.
397 */
398 flags = (flags & ~(MNT_UPDATE));
399 }
400
401 #if 0
402 //#ifdef SECURE_KERNEL
403 if ((flags & MNT_RDONLY) == 0) {
404 /* Release kernels are not allowed to mount "/" as rw */
405 error = EPERM;
406 goto out;
407 }
408 //#endif
409 #endif
410 /*
411 * See 7392553 for more details on why this check exists.
412 * Suffice to say: If this check is ON and something tries
413 * to mount the rootFS RW, we'll turn off the codesign
414 * bitmap optimization.
415 */
416 #if CHECK_CS_VALIDATION_BITMAP
417 if ((flags & MNT_RDONLY) == 0 ) {
418 root_fs_upgrade_try = TRUE;
419 }
420 #endif
421 }
422
423 error = mount_common(fstypename, pvp, vp, &nd.ni_cnd, uap->data, flags, 0,
424 labelstr, FALSE, ctx);
425
426 out:
427
428 #if CONFIG_MACF
429 if (labelstr)
430 FREE(labelstr, M_MACTEMP);
431 #endif /* CONFIG_MACF */
432
433 if (vp) {
434 vnode_put(vp);
435 }
436 if (pvp) {
437 vnode_put(pvp);
438 }
439 if (need_nameidone) {
440 nameidone(&nd);
441 }
442
443 return (error);
444 }
445
446 /*
447 * common mount implementation (final stage of mounting)
448
449 * Arguments:
450 * fstypename file system type (ie it's vfs name)
451 * pvp parent of covered vnode
452 * vp covered vnode
453 * cnp component name (ie path) of covered vnode
454 * flags generic mount flags
455 * fsmountargs file system specific data
456 * labelstr optional MAC label
457 * kernelmount TRUE for mounts initiated from inside the kernel
458 * ctx caller's context
459 */
460 static int
461 mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
462 struct componentname *cnp, user_addr_t fsmountargs, int flags, uint32_t internal_flags,
463 char *labelstr, boolean_t kernelmount, vfs_context_t ctx)
464 {
465 #if !CONFIG_MACF
466 #pragma unused(labelstr)
467 #endif
468 struct vnode *devvp = NULLVP;
469 struct vnode *device_vnode = NULLVP;
470 #if CONFIG_MACF
471 struct vnode *rvp;
472 #endif
473 struct mount *mp;
474 struct vfstable *vfsp = (struct vfstable *)0;
475 struct proc *p = vfs_context_proc(ctx);
476 int error, flag = 0;
477 user_addr_t devpath = USER_ADDR_NULL;
478 int ronly = 0;
479 int mntalloc = 0;
480 boolean_t vfsp_ref = FALSE;
481 boolean_t is_rwlock_locked = FALSE;
482 boolean_t did_rele = FALSE;
483 boolean_t have_usecount = FALSE;
484
485 /*
486 * Process an update for an existing mount
487 */
488 if (flags & MNT_UPDATE) {
489 if ((vp->v_flag & VROOT) == 0) {
490 error = EINVAL;
491 goto out1;
492 }
493 mp = vp->v_mount;
494
495 /* unmount in progress return error */
496 mount_lock_spin(mp);
497 if (mp->mnt_lflag & MNT_LUNMOUNT) {
498 mount_unlock(mp);
499 error = EBUSY;
500 goto out1;
501 }
502 mount_unlock(mp);
503 lck_rw_lock_exclusive(&mp->mnt_rwlock);
504 is_rwlock_locked = TRUE;
505 /*
506 * We only allow the filesystem to be reloaded if it
507 * is currently mounted read-only.
508 */
509 if ((flags & MNT_RELOAD) &&
510 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
511 error = ENOTSUP;
512 goto out1;
513 }
514
515 /*
516 * If content protection is enabled, update mounts are not
517 * allowed to turn it off.
518 */
519 if ((mp->mnt_flag & MNT_CPROTECT) &&
520 ((flags & MNT_CPROTECT) == 0)) {
521 error = EINVAL;
522 goto out1;
523 }
524
525 #ifdef CONFIG_IMGSRC_ACCESS
526 /* Can't downgrade the backer of the root FS */
527 if ((mp->mnt_kern_flag & MNTK_BACKS_ROOT) &&
528 (!vfs_isrdonly(mp)) && (flags & MNT_RDONLY)) {
529 error = ENOTSUP;
530 goto out1;
531 }
532 #endif /* CONFIG_IMGSRC_ACCESS */
533
534 /*
535 * Only root, or the user that did the original mount is
536 * permitted to update it.
537 */
538 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
539 (error = suser(vfs_context_ucred(ctx), &p->p_acflag))) {
540 goto out1;
541 }
542 #if CONFIG_MACF
543 error = mac_mount_check_remount(ctx, mp);
544 if (error != 0) {
545 goto out1;
546 }
547 #endif
548 /*
549 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
550 * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
551 */
552 if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
553 flags |= MNT_NOSUID | MNT_NODEV;
554 if (mp->mnt_flag & MNT_NOEXEC)
555 flags |= MNT_NOEXEC;
556 }
557 flag = mp->mnt_flag;
558
559
560
561 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
562
563 vfsp = mp->mnt_vtable;
564 goto update;
565 }
566 /*
567 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
568 * MNT_NOEXEC if mount point is already MNT_NOEXEC.
569 */
570 if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
571 flags |= MNT_NOSUID | MNT_NODEV;
572 if (vp->v_mount->mnt_flag & MNT_NOEXEC)
573 flags |= MNT_NOEXEC;
574 }
575
576 /* XXXAUDIT: Should we capture the type on the error path as well? */
577 AUDIT_ARG(text, fstypename);
578 mount_list_lock();
579 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
580 if (!strncmp(vfsp->vfc_name, fstypename, MFSNAMELEN)) {
581 vfsp->vfc_refcount++;
582 vfsp_ref = TRUE;
583 break;
584 }
585 mount_list_unlock();
586 if (vfsp == NULL) {
587 error = ENODEV;
588 goto out1;
589 }
590
591 /*
592 * VFC_VFSLOCALARGS is not currently supported for kernel mounts
593 */
594 if (kernelmount && (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS)) {
595 error = EINVAL; /* unsupported request */
596 goto out1;
597 }
598
599 error = prepare_coveredvp(vp, ctx, cnp, fstypename, ((internal_flags & KERNEL_MOUNT_NOAUTH) != 0));
600 if (error != 0) {
601 goto out1;
602 }
603
604 /*
605 * Allocate and initialize the filesystem (mount_t)
606 */
607 MALLOC_ZONE(mp, struct mount *, (u_int32_t)sizeof(struct mount),
608 M_MOUNT, M_WAITOK);
609 bzero((char *)mp, (u_int32_t)sizeof(struct mount));
610 mntalloc = 1;
611
612 /* Initialize the default IO constraints */
613 mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
614 mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
615 mp->mnt_maxsegreadsize = mp->mnt_maxreadcnt;
616 mp->mnt_maxsegwritesize = mp->mnt_maxwritecnt;
617 mp->mnt_devblocksize = DEV_BSIZE;
618 mp->mnt_alignmentmask = PAGE_MASK;
619 mp->mnt_ioqueue_depth = MNT_DEFAULT_IOQUEUE_DEPTH;
620 mp->mnt_ioscale = 1;
621 mp->mnt_ioflags = 0;
622 mp->mnt_realrootvp = NULLVP;
623 mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL;
624
625 TAILQ_INIT(&mp->mnt_vnodelist);
626 TAILQ_INIT(&mp->mnt_workerqueue);
627 TAILQ_INIT(&mp->mnt_newvnodes);
628 mount_lock_init(mp);
629 lck_rw_lock_exclusive(&mp->mnt_rwlock);
630 is_rwlock_locked = TRUE;
631 mp->mnt_op = vfsp->vfc_vfsops;
632 mp->mnt_vtable = vfsp;
633 //mp->mnt_stat.f_type = vfsp->vfc_typenum;
634 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
635 strncpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN);
636 strncpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
637 mp->mnt_vnodecovered = vp;
638 mp->mnt_vfsstat.f_owner = kauth_cred_getuid(vfs_context_ucred(ctx));
639 mp->mnt_throttle_mask = LOWPRI_MAX_NUM_DEV - 1;
640 mp->mnt_devbsdunit = 0;
641
642 /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
643 vfs_setowner(mp, KAUTH_UID_NONE, KAUTH_GID_NONE);
644
645 #if NFSCLIENT
646 if (kernelmount)
647 mp->mnt_kern_flag |= MNTK_KERNEL_MOUNT;
648 if ((internal_flags & KERNEL_MOUNT_PERMIT_UNMOUNT) != 0)
649 mp->mnt_kern_flag |= MNTK_PERMIT_UNMOUNT;
650 #endif /* NFSCLIENT */
651
652 update:
653 /*
654 * Set the mount level flags.
655 */
656 if (flags & MNT_RDONLY)
657 mp->mnt_flag |= MNT_RDONLY;
658 else if (mp->mnt_flag & MNT_RDONLY) {
659 // disallow read/write upgrades of file systems that
660 // had the TYPENAME_OVERRIDE feature set.
661 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
662 error = EPERM;
663 goto out1;
664 }
665 mp->mnt_kern_flag |= MNTK_WANTRDWR;
666 }
667 mp->mnt_flag &= ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
668 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
669 MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
670 MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
671 MNT_QUARANTINE | MNT_CPROTECT);
672 mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
673 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
674 MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
675 MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
676 MNT_QUARANTINE | MNT_CPROTECT);
677
678 #if CONFIG_MACF
679 if (flags & MNT_MULTILABEL) {
680 if (vfsp->vfc_vfsflags & VFC_VFSNOMACLABEL) {
681 error = EINVAL;
682 goto out1;
683 }
684 mp->mnt_flag |= MNT_MULTILABEL;
685 }
686 #endif
687 /*
688 * Process device path for local file systems if requested
689 */
690 if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
691 if (vfs_context_is64bit(ctx)) {
692 if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
693 goto out1;
694 fsmountargs += sizeof(devpath);
695 } else {
696 user32_addr_t tmp;
697 if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
698 goto out1;
699 /* munge into LP64 addr */
700 devpath = CAST_USER_ADDR_T(tmp);
701 fsmountargs += sizeof(tmp);
702 }
703
704 /* Lookup device and authorize access to it */
705 if ((devpath)) {
706 struct nameidata nd;
707
708 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW, UIO_USERSPACE, devpath, ctx);
709 if ( (error = namei(&nd)) )
710 goto out1;
711
712 strncpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
713 devvp = nd.ni_vp;
714
715 nameidone(&nd);
716
717 if (devvp->v_type != VBLK) {
718 error = ENOTBLK;
719 goto out2;
720 }
721 if (major(devvp->v_rdev) >= nblkdev) {
722 error = ENXIO;
723 goto out2;
724 }
725 /*
726 * If mount by non-root, then verify that user has necessary
727 * permissions on the device.
728 */
729 if (suser(vfs_context_ucred(ctx), NULL) != 0) {
730 mode_t accessmode = KAUTH_VNODE_READ_DATA;
731
732 if ((mp->mnt_flag & MNT_RDONLY) == 0)
733 accessmode |= KAUTH_VNODE_WRITE_DATA;
734 if ((error = vnode_authorize(devvp, NULL, accessmode, ctx)) != 0)
735 goto out2;
736 }
737 }
738 /* On first mount, preflight and open device */
739 if (devpath && ((flags & MNT_UPDATE) == 0)) {
740 if ( (error = vnode_ref(devvp)) )
741 goto out2;
742 /*
743 * Disallow multiple mounts of the same device.
744 * Disallow mounting of a device that is currently in use
745 * (except for root, which might share swap device for miniroot).
746 * Flush out any old buffers remaining from a previous use.
747 */
748 if ( (error = vfs_mountedon(devvp)) )
749 goto out3;
750
751 if (vcount(devvp) > 1 && !(vfs_flags(mp) & MNT_ROOTFS)) {
752 error = EBUSY;
753 goto out3;
754 }
755 if ( (error = VNOP_FSYNC(devvp, MNT_WAIT, ctx)) ) {
756 error = ENOTBLK;
757 goto out3;
758 }
759 if ( (error = buf_invalidateblks(devvp, BUF_WRITE_DATA, 0, 0)) )
760 goto out3;
761
762 ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
763 #if CONFIG_MACF
764 error = mac_vnode_check_open(ctx,
765 devvp,
766 ronly ? FREAD : FREAD|FWRITE);
767 if (error)
768 goto out3;
769 #endif /* MAC */
770 if ( (error = VNOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, ctx)) )
771 goto out3;
772
773 mp->mnt_devvp = devvp;
774 device_vnode = devvp;
775
776 } else if ((mp->mnt_flag & MNT_RDONLY) &&
777 (mp->mnt_kern_flag & MNTK_WANTRDWR) &&
778 (device_vnode = mp->mnt_devvp)) {
779 dev_t dev;
780 int maj;
781 /*
782 * If upgrade to read-write by non-root, then verify
783 * that user has necessary permissions on the device.
784 */
785 vnode_getalways(device_vnode);
786
787 if (suser(vfs_context_ucred(ctx), NULL) &&
788 (error = vnode_authorize(device_vnode, NULL,
789 KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA,
790 ctx)) != 0) {
791 vnode_put(device_vnode);
792 goto out2;
793 }
794
795 /* Tell the device that we're upgrading */
796 dev = (dev_t)device_vnode->v_rdev;
797 maj = major(dev);
798
799 if ((u_int)maj >= (u_int)nblkdev)
800 panic("Volume mounted on a device with invalid major number.");
801
802 error = bdevsw[maj].d_open(dev, FREAD | FWRITE, S_IFBLK, p);
803 vnode_put(device_vnode);
804 device_vnode = NULLVP;
805 if (error != 0) {
806 goto out2;
807 }
808 }
809 }
810 #if CONFIG_MACF
811 if ((flags & MNT_UPDATE) == 0) {
812 mac_mount_label_init(mp);
813 mac_mount_label_associate(ctx, mp);
814 }
815 if (labelstr) {
816 if ((flags & MNT_UPDATE) != 0) {
817 error = mac_mount_check_label_update(ctx, mp);
818 if (error != 0)
819 goto out3;
820 }
821 }
822 #endif
823 /*
824 * Mount the filesystem.
825 */
826 error = VFS_MOUNT(mp, device_vnode, fsmountargs, ctx);
827
828 if (flags & MNT_UPDATE) {
829 if (mp->mnt_kern_flag & MNTK_WANTRDWR)
830 mp->mnt_flag &= ~MNT_RDONLY;
831 mp->mnt_flag &=~
832 (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
833 mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
834 if (error)
835 mp->mnt_flag = flag; /* restore flag value */
836 vfs_event_signal(NULL, VQ_UPDATE, (intptr_t)NULL);
837 lck_rw_done(&mp->mnt_rwlock);
838 is_rwlock_locked = FALSE;
839 if (!error)
840 enablequotas(mp, ctx);
841 goto exit;
842 }
843
844 /*
845 * Put the new filesystem on the mount list after root.
846 */
847 if (error == 0) {
848 struct vfs_attr vfsattr;
849 #if CONFIG_MACF
850 if (vfs_flags(mp) & MNT_MULTILABEL) {
851 error = VFS_ROOT(mp, &rvp, ctx);
852 if (error) {
853 printf("%s() VFS_ROOT returned %d\n", __func__, error);
854 goto out3;
855 }
856 error = vnode_label(mp, NULL, rvp, NULL, 0, ctx);
857 /*
858 * drop reference provided by VFS_ROOT
859 */
860 vnode_put(rvp);
861
862 if (error)
863 goto out3;
864 }
865 #endif /* MAC */
866
867 vnode_lock_spin(vp);
868 CLR(vp->v_flag, VMOUNT);
869 vp->v_mountedhere = mp;
870 vnode_unlock(vp);
871
872 /*
873 * taking the name_cache_lock exclusively will
874 * insure that everyone is out of the fast path who
875 * might be trying to use a now stale copy of
876 * vp->v_mountedhere->mnt_realrootvp
877 * bumping mount_generation causes the cached values
878 * to be invalidated
879 */
880 name_cache_lock();
881 mount_generation++;
882 name_cache_unlock();
883
884 error = vnode_ref(vp);
885 if (error != 0) {
886 goto out4;
887 }
888
889 have_usecount = TRUE;
890
891 error = checkdirs(vp, ctx);
892 if (error != 0) {
893 /* Unmount the filesystem as cdir/rdirs cannot be updated */
894 goto out4;
895 }
896 /*
897 * there is no cleanup code here so I have made it void
898 * we need to revisit this
899 */
900 (void)VFS_START(mp, 0, ctx);
901
902 if (mount_list_add(mp) != 0) {
903 /*
904 * The system is shutting down trying to umount
905 * everything, so fail with a plausible errno.
906 */
907 error = EBUSY;
908 goto out4;
909 }
910 lck_rw_done(&mp->mnt_rwlock);
911 is_rwlock_locked = FALSE;
912
913 /* Check if this mounted file system supports EAs or named streams. */
914 /* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */
915 VFSATTR_INIT(&vfsattr);
916 VFSATTR_WANTED(&vfsattr, f_capabilities);
917 if (strncmp(mp->mnt_vfsstat.f_fstypename, "webdav", sizeof("webdav")) != 0 &&
918 vfs_getattr(mp, &vfsattr, ctx) == 0 &&
919 VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) {
920 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR) &&
921 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR)) {
922 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
923 }
924 #if NAMEDSTREAMS
925 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS) &&
926 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS)) {
927 mp->mnt_kern_flag |= MNTK_NAMED_STREAMS;
928 }
929 #endif
930 /* Check if this file system supports path from id lookups. */
931 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID) &&
932 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID)) {
933 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
934 } else if (mp->mnt_flag & MNT_DOVOLFS) {
935 /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */
936 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
937 }
938 }
939 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSNATIVEXATTR) {
940 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
941 }
942 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSPREFLIGHT) {
943 mp->mnt_kern_flag |= MNTK_UNMOUNT_PREFLIGHT;
944 }
945 /* increment the operations count */
946 OSAddAtomic(1, &vfs_nummntops);
947 enablequotas(mp, ctx);
948
949 if (device_vnode) {
950 device_vnode->v_specflags |= SI_MOUNTEDON;
951
952 /*
953 * cache the IO attributes for the underlying physical media...
954 * an error return indicates the underlying driver doesn't
955 * support all the queries necessary... however, reasonable
956 * defaults will have been set, so no reason to bail or care
957 */
958 vfs_init_io_attributes(device_vnode, mp);
959 }
960
961 /* Now that mount is setup, notify the listeners */
962 vfs_notify_mount(pvp);
963 } else {
964 /* If we fail a fresh mount, there should be no vnodes left hooked into the mountpoint. */
965 if (mp->mnt_vnodelist.tqh_first != NULL) {
966 panic("mount_common(): mount of %s filesystem failed with %d, but vnode list is not empty.",
967 mp->mnt_vtable->vfc_name, error);
968 }
969
970 vnode_lock_spin(vp);
971 CLR(vp->v_flag, VMOUNT);
972 vnode_unlock(vp);
973 mount_list_lock();
974 mp->mnt_vtable->vfc_refcount--;
975 mount_list_unlock();
976
977 if (device_vnode ) {
978 vnode_rele(device_vnode);
979 VNOP_CLOSE(device_vnode, ronly ? FREAD : FREAD|FWRITE, ctx);
980 }
981 lck_rw_done(&mp->mnt_rwlock);
982 is_rwlock_locked = FALSE;
983
984 /*
985 * if we get here, we have a mount structure that needs to be freed,
986 * but since the coveredvp hasn't yet been updated to point at it,
987 * no need to worry about other threads holding a crossref on this mp
988 * so it's ok to just free it
989 */
990 mount_lock_destroy(mp);
991 #if CONFIG_MACF
992 mac_mount_label_destroy(mp);
993 #endif
994 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
995 }
996 exit:
997 /*
998 * drop I/O count on the device vp if there was one
999 */
1000 if (devpath && devvp)
1001 vnode_put(devvp);
1002
1003 return(error);
1004
1005 /* Error condition exits */
1006 out4:
1007 (void)VFS_UNMOUNT(mp, MNT_FORCE, ctx);
1008
1009 /*
1010 * If the mount has been placed on the covered vp,
1011 * it may have been discovered by now, so we have
1012 * to treat this just like an unmount
1013 */
1014 mount_lock_spin(mp);
1015 mp->mnt_lflag |= MNT_LDEAD;
1016 mount_unlock(mp);
1017
1018 if (device_vnode != NULLVP) {
1019 vnode_rele(device_vnode);
1020 VNOP_CLOSE(device_vnode, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
1021 ctx);
1022 did_rele = TRUE;
1023 }
1024
1025 vnode_lock_spin(vp);
1026
1027 mp->mnt_crossref++;
1028 vp->v_mountedhere = (mount_t) 0;
1029
1030 vnode_unlock(vp);
1031
1032 if (have_usecount) {
1033 vnode_rele(vp);
1034 }
1035 out3:
1036 if (devpath && ((flags & MNT_UPDATE) == 0) && (!did_rele))
1037 vnode_rele(devvp);
1038 out2:
1039 if (devpath && devvp)
1040 vnode_put(devvp);
1041 out1:
1042 /* Release mnt_rwlock only when it was taken */
1043 if (is_rwlock_locked == TRUE) {
1044 lck_rw_done(&mp->mnt_rwlock);
1045 }
1046
1047 if (mntalloc) {
1048 if (mp->mnt_crossref)
1049 mount_dropcrossref(mp, vp, 0);
1050 else {
1051 mount_lock_destroy(mp);
1052 #if CONFIG_MACF
1053 mac_mount_label_destroy(mp);
1054 #endif
1055 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1056 }
1057 }
1058 if (vfsp_ref) {
1059 mount_list_lock();
1060 vfsp->vfc_refcount--;
1061 mount_list_unlock();
1062 }
1063
1064 return(error);
1065 }
1066
1067 /*
1068 * Flush in-core data, check for competing mount attempts,
1069 * and set VMOUNT
1070 */
1071 int
1072 prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth)
1073 {
1074 #if !CONFIG_MACF
1075 #pragma unused(cnp,fsname)
1076 #endif
1077 struct vnode_attr va;
1078 int error;
1079
1080 if (!skip_auth) {
1081 /*
1082 * If the user is not root, ensure that they own the directory
1083 * onto which we are attempting to mount.
1084 */
1085 VATTR_INIT(&va);
1086 VATTR_WANTED(&va, va_uid);
1087 if ((error = vnode_getattr(vp, &va, ctx)) ||
1088 (va.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
1089 (!vfs_context_issuser(ctx)))) {
1090 error = EPERM;
1091 goto out;
1092 }
1093 }
1094
1095 if ( (error = VNOP_FSYNC(vp, MNT_WAIT, ctx)) )
1096 goto out;
1097
1098 if ( (error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0)) )
1099 goto out;
1100
1101 if (vp->v_type != VDIR) {
1102 error = ENOTDIR;
1103 goto out;
1104 }
1105
1106 if (ISSET(vp->v_flag, VMOUNT) && (vp->v_mountedhere != NULL)) {
1107 error = EBUSY;
1108 goto out;
1109 }
1110
1111 #if CONFIG_MACF
1112 error = mac_mount_check_mount(ctx, vp,
1113 cnp, fsname);
1114 if (error != 0)
1115 goto out;
1116 #endif
1117
1118 vnode_lock_spin(vp);
1119 SET(vp->v_flag, VMOUNT);
1120 vnode_unlock(vp);
1121
1122 out:
1123 return error;
1124 }
1125
1126 #if CONFIG_IMGSRC_ACCESS
1127
1128 #if DEBUG
1129 #define IMGSRC_DEBUG(args...) printf(args)
1130 #else
1131 #define IMGSRC_DEBUG(args...) do { } while(0)
1132 #endif
1133
1134 static int
1135 authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx)
1136 {
1137 struct nameidata nd;
1138 vnode_t vp, realdevvp;
1139 mode_t accessmode;
1140 int error;
1141
1142 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW, UIO_USERSPACE, devpath, ctx);
1143 if ( (error = namei(&nd)) ) {
1144 IMGSRC_DEBUG("namei() failed with %d\n", error);
1145 return error;
1146 }
1147
1148 vp = nd.ni_vp;
1149
1150 if (!vnode_isblk(vp)) {
1151 IMGSRC_DEBUG("Not block device.\n");
1152 error = ENOTBLK;
1153 goto out;
1154 }
1155
1156 realdevvp = mp->mnt_devvp;
1157 if (realdevvp == NULLVP) {
1158 IMGSRC_DEBUG("No device backs the mount.\n");
1159 error = ENXIO;
1160 goto out;
1161 }
1162
1163 error = vnode_getwithref(realdevvp);
1164 if (error != 0) {
1165 IMGSRC_DEBUG("Coudn't get iocount on device.\n");
1166 goto out;
1167 }
1168
1169 if (vnode_specrdev(vp) != vnode_specrdev(realdevvp)) {
1170 IMGSRC_DEBUG("Wrong dev_t.\n");
1171 error = ENXIO;
1172 goto out1;
1173 }
1174
1175 strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
1176
1177 /*
1178 * If mount by non-root, then verify that user has necessary
1179 * permissions on the device.
1180 */
1181 if (!vfs_context_issuser(ctx)) {
1182 accessmode = KAUTH_VNODE_READ_DATA;
1183 if ((mp->mnt_flag & MNT_RDONLY) == 0)
1184 accessmode |= KAUTH_VNODE_WRITE_DATA;
1185 if ((error = vnode_authorize(vp, NULL, accessmode, ctx)) != 0) {
1186 IMGSRC_DEBUG("Access denied.\n");
1187 goto out1;
1188 }
1189 }
1190
1191 *devvpp = vp;
1192
1193 out1:
1194 vnode_put(realdevvp);
1195 out:
1196 nameidone(&nd);
1197 if (error) {
1198 vnode_put(vp);
1199 }
1200
1201 return error;
1202 }
1203
1204 /*
1205 * Clear VMOUNT, set v_mountedhere, and mnt_vnodecovered, ref the vnode,
1206 * and call checkdirs()
1207 */
1208 static int
1209 place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx)
1210 {
1211 int error;
1212
1213 mp->mnt_vnodecovered = vp; /* XXX This is normally only set at init-time ... */
1214
1215 vnode_lock_spin(vp);
1216 CLR(vp->v_flag, VMOUNT);
1217 vp->v_mountedhere = mp;
1218 vnode_unlock(vp);
1219
1220 /*
1221 * taking the name_cache_lock exclusively will
1222 * insure that everyone is out of the fast path who
1223 * might be trying to use a now stale copy of
1224 * vp->v_mountedhere->mnt_realrootvp
1225 * bumping mount_generation causes the cached values
1226 * to be invalidated
1227 */
1228 name_cache_lock();
1229 mount_generation++;
1230 name_cache_unlock();
1231
1232 error = vnode_ref(vp);
1233 if (error != 0) {
1234 goto out;
1235 }
1236
1237 error = checkdirs(vp, ctx);
1238 if (error != 0) {
1239 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1240 vnode_rele(vp);
1241 goto out;
1242 }
1243
1244 out:
1245 if (error != 0) {
1246 mp->mnt_vnodecovered = NULLVP;
1247 }
1248 return error;
1249 }
1250
1251 static void
1252 undo_place_on_covered_vp(mount_t mp, vnode_t vp)
1253 {
1254 vnode_rele(vp);
1255 vnode_lock_spin(vp);
1256 vp->v_mountedhere = (mount_t)NULL;
1257 vnode_unlock(vp);
1258
1259 mp->mnt_vnodecovered = NULLVP;
1260 }
1261
1262 static int
1263 mount_begin_update(mount_t mp, vfs_context_t ctx, int flags)
1264 {
1265 int error;
1266
1267 /* unmount in progress return error */
1268 mount_lock_spin(mp);
1269 if (mp->mnt_lflag & MNT_LUNMOUNT) {
1270 mount_unlock(mp);
1271 return EBUSY;
1272 }
1273 mount_unlock(mp);
1274 lck_rw_lock_exclusive(&mp->mnt_rwlock);
1275
1276 /*
1277 * We only allow the filesystem to be reloaded if it
1278 * is currently mounted read-only.
1279 */
1280 if ((flags & MNT_RELOAD) &&
1281 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
1282 error = ENOTSUP;
1283 goto out;
1284 }
1285
1286 /*
1287 * Only root, or the user that did the original mount is
1288 * permitted to update it.
1289 */
1290 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
1291 (!vfs_context_issuser(ctx))) {
1292 error = EPERM;
1293 goto out;
1294 }
1295 #if CONFIG_MACF
1296 error = mac_mount_check_remount(ctx, mp);
1297 if (error != 0) {
1298 goto out;
1299 }
1300 #endif
1301
1302 out:
1303 if (error) {
1304 lck_rw_done(&mp->mnt_rwlock);
1305 }
1306
1307 return error;
1308 }
1309
1310 static void
1311 mount_end_update(mount_t mp)
1312 {
1313 lck_rw_done(&mp->mnt_rwlock);
1314 }
1315
1316 static int
1317 get_imgsrc_rootvnode(uint32_t height, vnode_t *rvpp)
1318 {
1319 vnode_t vp;
1320
1321 if (height >= MAX_IMAGEBOOT_NESTING) {
1322 return EINVAL;
1323 }
1324
1325 vp = imgsrc_rootvnodes[height];
1326 if ((vp != NULLVP) && (vnode_get(vp) == 0)) {
1327 *rvpp = vp;
1328 return 0;
1329 } else {
1330 return ENOENT;
1331 }
1332 }
1333
1334 static int
1335 relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp,
1336 const char *fsname, vfs_context_t ctx,
1337 boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index)
1338 {
1339 int error;
1340 mount_t mp;
1341 boolean_t placed = FALSE;
1342 vnode_t devvp = NULLVP;
1343 struct vfstable *vfsp;
1344 user_addr_t devpath;
1345 char *old_mntonname;
1346 vnode_t rvp;
1347 uint32_t height;
1348 uint32_t flags;
1349
1350 /* If we didn't imageboot, nothing to move */
1351 if (imgsrc_rootvnodes[0] == NULLVP) {
1352 return EINVAL;
1353 }
1354
1355 /* Only root can do this */
1356 if (!vfs_context_issuser(ctx)) {
1357 return EPERM;
1358 }
1359
1360 IMGSRC_DEBUG("looking for root vnode.\n");
1361
1362 /*
1363 * Get root vnode of filesystem we're moving.
1364 */
1365 if (by_index) {
1366 if (is64bit) {
1367 struct user64_mnt_imgsrc_args mia64;
1368 error = copyin(fsmountargs, &mia64, sizeof(mia64));
1369 if (error != 0) {
1370 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1371 return error;
1372 }
1373
1374 height = mia64.mi_height;
1375 flags = mia64.mi_flags;
1376 devpath = mia64.mi_devpath;
1377 } else {
1378 struct user32_mnt_imgsrc_args mia32;
1379 error = copyin(fsmountargs, &mia32, sizeof(mia32));
1380 if (error != 0) {
1381 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1382 return error;
1383 }
1384
1385 height = mia32.mi_height;
1386 flags = mia32.mi_flags;
1387 devpath = mia32.mi_devpath;
1388 }
1389 } else {
1390 /*
1391 * For binary compatibility--assumes one level of nesting.
1392 */
1393 if (is64bit) {
1394 if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
1395 return error;
1396 } else {
1397 user32_addr_t tmp;
1398 if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
1399 return error;
1400
1401 /* munge into LP64 addr */
1402 devpath = CAST_USER_ADDR_T(tmp);
1403 }
1404
1405 height = 0;
1406 flags = 0;
1407 }
1408
1409 if (flags != 0) {
1410 IMGSRC_DEBUG("%s: Got nonzero flags.\n", __FUNCTION__);
1411 return EINVAL;
1412 }
1413
1414 error = get_imgsrc_rootvnode(height, &rvp);
1415 if (error != 0) {
1416 IMGSRC_DEBUG("getting root vnode failed with %d\n", error);
1417 return error;
1418 }
1419
1420 IMGSRC_DEBUG("got root vnode.\n");
1421
1422 MALLOC(old_mntonname, char*, MAXPATHLEN, M_TEMP, M_WAITOK);
1423
1424 /* Can only move once */
1425 mp = vnode_mount(rvp);
1426 if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
1427 IMGSRC_DEBUG("Already moved.\n");
1428 error = EBUSY;
1429 goto out0;
1430 }
1431
1432 IMGSRC_DEBUG("Starting updated.\n");
1433
1434 /* Get exclusive rwlock on mount, authorize update on mp */
1435 error = mount_begin_update(mp , ctx, 0);
1436 if (error != 0) {
1437 IMGSRC_DEBUG("Starting updated failed with %d\n", error);
1438 goto out0;
1439 }
1440
1441 /*
1442 * It can only be moved once. Flag is set under the rwlock,
1443 * so we're now safe to proceed.
1444 */
1445 if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
1446 IMGSRC_DEBUG("Already moved [2]\n");
1447 goto out1;
1448 }
1449
1450
1451 IMGSRC_DEBUG("Preparing coveredvp.\n");
1452
1453 /* Mark covered vnode as mount in progress, authorize placing mount on top */
1454 error = prepare_coveredvp(vp, ctx, cnp, fsname, FALSE);
1455 if (error != 0) {
1456 IMGSRC_DEBUG("Preparing coveredvp failed with %d.\n", error);
1457 goto out1;
1458 }
1459
1460 IMGSRC_DEBUG("Covered vp OK.\n");
1461
1462 /* Sanity check the name caller has provided */
1463 vfsp = mp->mnt_vtable;
1464 if (strncmp(vfsp->vfc_name, fsname, MFSNAMELEN) != 0) {
1465 IMGSRC_DEBUG("Wrong fs name.\n");
1466 error = EINVAL;
1467 goto out2;
1468 }
1469
1470 /* Check the device vnode and update mount-from name, for local filesystems */
1471 if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
1472 IMGSRC_DEBUG("Local, doing device validation.\n");
1473
1474 if (devpath != USER_ADDR_NULL) {
1475 error = authorize_devpath_and_update_mntfromname(mp, devpath, &devvp, ctx);
1476 if (error) {
1477 IMGSRC_DEBUG("authorize_devpath_and_update_mntfromname() failed.\n");
1478 goto out2;
1479 }
1480
1481 vnode_put(devvp);
1482 }
1483 }
1484
1485 /*
1486 * Place mp on top of vnode, ref the vnode, call checkdirs(),
1487 * and increment the name cache's mount generation
1488 */
1489
1490 IMGSRC_DEBUG("About to call place_mount_and_checkdirs().\n");
1491 error = place_mount_and_checkdirs(mp, vp, ctx);
1492 if (error != 0) {
1493 goto out2;
1494 }
1495
1496 placed = TRUE;
1497
1498 strncpy(old_mntonname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN);
1499 strncpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
1500
1501 /* Forbid future moves */
1502 mount_lock(mp);
1503 mp->mnt_kern_flag |= MNTK_HAS_MOVED;
1504 mount_unlock(mp);
1505
1506 /* Finally, add to mount list, completely ready to go */
1507 if (mount_list_add(mp) != 0) {
1508 /*
1509 * The system is shutting down trying to umount
1510 * everything, so fail with a plausible errno.
1511 */
1512 error = EBUSY;
1513 goto out3;
1514 }
1515
1516 mount_end_update(mp);
1517 vnode_put(rvp);
1518 FREE(old_mntonname, M_TEMP);
1519
1520 vfs_notify_mount(pvp);
1521
1522 return 0;
1523 out3:
1524 strncpy(mp->mnt_vfsstat.f_mntonname, old_mntonname, MAXPATHLEN);
1525
1526 mount_lock(mp);
1527 mp->mnt_kern_flag &= ~(MNTK_HAS_MOVED);
1528 mount_unlock(mp);
1529
1530 out2:
1531 /*
1532 * Placing the mp on the vnode clears VMOUNT,
1533 * so cleanup is different after that point
1534 */
1535 if (placed) {
1536 /* Rele the vp, clear VMOUNT and v_mountedhere */
1537 undo_place_on_covered_vp(mp, vp);
1538 } else {
1539 vnode_lock_spin(vp);
1540 CLR(vp->v_flag, VMOUNT);
1541 vnode_unlock(vp);
1542 }
1543 out1:
1544 mount_end_update(mp);
1545
1546 out0:
1547 vnode_put(rvp);
1548 FREE(old_mntonname, M_TEMP);
1549 return error;
1550 }
1551
1552 #endif /* CONFIG_IMGSRC_ACCESS */
1553
1554 void
1555 enablequotas(struct mount *mp, vfs_context_t ctx)
1556 {
1557 struct nameidata qnd;
1558 int type;
1559 char qfpath[MAXPATHLEN];
1560 const char *qfname = QUOTAFILENAME;
1561 const char *qfopsname = QUOTAOPSNAME;
1562 const char *qfextension[] = INITQFNAMES;
1563
1564 /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
1565 if (strncmp(mp->mnt_vfsstat.f_fstypename, "hfs", sizeof("hfs")) != 0 ) {
1566 return;
1567 }
1568 /*
1569 * Enable filesystem disk quotas if necessary.
1570 * We ignore errors as this should not interfere with final mount
1571 */
1572 for (type=0; type < MAXQUOTAS; type++) {
1573 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfopsname, qfextension[type]);
1574 NDINIT(&qnd, LOOKUP, OP_MOUNT, FOLLOW, UIO_SYSSPACE,
1575 CAST_USER_ADDR_T(qfpath), ctx);
1576 if (namei(&qnd) != 0)
1577 continue; /* option file to trigger quotas is not present */
1578 vnode_put(qnd.ni_vp);
1579 nameidone(&qnd);
1580 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfname, qfextension[type]);
1581
1582 (void) VFS_QUOTACTL(mp, QCMD(Q_QUOTAON, type), 0, qfpath, ctx);
1583 }
1584 return;
1585 }
1586
1587
1588 static int
1589 checkdirs_callback(proc_t p, void * arg)
1590 {
1591 struct cdirargs * cdrp = (struct cdirargs * )arg;
1592 vnode_t olddp = cdrp->olddp;
1593 vnode_t newdp = cdrp->newdp;
1594 struct filedesc *fdp;
1595 vnode_t tvp;
1596 vnode_t fdp_cvp;
1597 vnode_t fdp_rvp;
1598 int cdir_changed = 0;
1599 int rdir_changed = 0;
1600
1601 /*
1602 * XXX Also needs to iterate each thread in the process to see if it
1603 * XXX is using a per-thread current working directory, and, if so,
1604 * XXX update that as well.
1605 */
1606
1607 proc_fdlock(p);
1608 fdp = p->p_fd;
1609 if (fdp == (struct filedesc *)0) {
1610 proc_fdunlock(p);
1611 return(PROC_RETURNED);
1612 }
1613 fdp_cvp = fdp->fd_cdir;
1614 fdp_rvp = fdp->fd_rdir;
1615 proc_fdunlock(p);
1616
1617 if (fdp_cvp == olddp) {
1618 vnode_ref(newdp);
1619 tvp = fdp->fd_cdir;
1620 fdp_cvp = newdp;
1621 cdir_changed = 1;
1622 vnode_rele(tvp);
1623 }
1624 if (fdp_rvp == olddp) {
1625 vnode_ref(newdp);
1626 tvp = fdp->fd_rdir;
1627 fdp_rvp = newdp;
1628 rdir_changed = 1;
1629 vnode_rele(tvp);
1630 }
1631 if (cdir_changed || rdir_changed) {
1632 proc_fdlock(p);
1633 fdp->fd_cdir = fdp_cvp;
1634 fdp->fd_rdir = fdp_rvp;
1635 proc_fdunlock(p);
1636 }
1637 return(PROC_RETURNED);
1638 }
1639
1640
1641
1642 /*
1643 * Scan all active processes to see if any of them have a current
1644 * or root directory onto which the new filesystem has just been
1645 * mounted. If so, replace them with the new mount point.
1646 */
1647 static int
1648 checkdirs(vnode_t olddp, vfs_context_t ctx)
1649 {
1650 vnode_t newdp;
1651 vnode_t tvp;
1652 int err;
1653 struct cdirargs cdr;
1654 struct uthread * uth = get_bsdthread_info(current_thread());
1655
1656 if (olddp->v_usecount == 1)
1657 return(0);
1658 if (uth != (struct uthread *)0)
1659 uth->uu_notrigger = 1;
1660 err = VFS_ROOT(olddp->v_mountedhere, &newdp, ctx);
1661 if (uth != (struct uthread *)0)
1662 uth->uu_notrigger = 0;
1663
1664 if (err != 0) {
1665 #if DIAGNOSTIC
1666 panic("mount: lost mount: error %d", err);
1667 #endif
1668 return(err);
1669 }
1670
1671 cdr.olddp = olddp;
1672 cdr.newdp = newdp;
1673 /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
1674 proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS, checkdirs_callback, (void *)&cdr, NULL, NULL);
1675
1676 if (rootvnode == olddp) {
1677 vnode_ref(newdp);
1678 tvp = rootvnode;
1679 rootvnode = newdp;
1680 vnode_rele(tvp);
1681 }
1682
1683 vnode_put(newdp);
1684 return(0);
1685 }
1686
1687 /*
1688 * Unmount a file system.
1689 *
1690 * Note: unmount takes a path to the vnode mounted on as argument,
1691 * not special file (as before).
1692 */
1693 /* ARGSUSED */
1694 int
1695 unmount(__unused proc_t p, struct unmount_args *uap, __unused int32_t *retval)
1696 {
1697 vnode_t vp;
1698 struct mount *mp;
1699 int error;
1700 struct nameidata nd;
1701 vfs_context_t ctx = vfs_context_current();
1702
1703 NDINIT(&nd, LOOKUP, OP_UNMOUNT, NOTRIGGER | FOLLOW | AUDITVNPATH1,
1704 UIO_USERSPACE, uap->path, ctx);
1705 error = namei(&nd);
1706 if (error)
1707 return (error);
1708 vp = nd.ni_vp;
1709 mp = vp->v_mount;
1710 nameidone(&nd);
1711
1712 #if CONFIG_MACF
1713 error = mac_mount_check_umount(ctx, mp);
1714 if (error != 0) {
1715 vnode_put(vp);
1716 return (error);
1717 }
1718 #endif
1719 /*
1720 * Must be the root of the filesystem
1721 */
1722 if ((vp->v_flag & VROOT) == 0) {
1723 vnode_put(vp);
1724 return (EINVAL);
1725 }
1726 mount_ref(mp, 0);
1727 vnode_put(vp);
1728 /* safedounmount consumes the mount ref */
1729 return (safedounmount(mp, uap->flags, ctx));
1730 }
1731
1732 int
1733 vfs_unmountbyfsid(fsid_t * fsid, int flags, vfs_context_t ctx)
1734 {
1735 mount_t mp;
1736
1737 mp = mount_list_lookupby_fsid(fsid, 0, 1);
1738 if (mp == (mount_t)0) {
1739 return(ENOENT);
1740 }
1741 mount_ref(mp, 0);
1742 mount_iterdrop(mp);
1743 /* safedounmount consumes the mount ref */
1744 return(safedounmount(mp, flags, ctx));
1745 }
1746
1747
1748 /*
1749 * The mount struct comes with a mount ref which will be consumed.
1750 * Do the actual file system unmount, prevent some common foot shooting.
1751 */
1752 int
1753 safedounmount(struct mount *mp, int flags, vfs_context_t ctx)
1754 {
1755 int error;
1756 proc_t p = vfs_context_proc(ctx);
1757
1758 /*
1759 * If the file system is not responding and MNT_NOBLOCK
1760 * is set and not a forced unmount then return EBUSY.
1761 */
1762 if ((mp->mnt_kern_flag & MNT_LNOTRESP) &&
1763 (flags & MNT_NOBLOCK) && ((flags & MNT_FORCE) == 0)) {
1764 error = EBUSY;
1765 goto out;
1766 }
1767
1768 /*
1769 * Skip authorization if the mount is tagged as permissive and
1770 * this is not a forced-unmount attempt.
1771 */
1772 if (!(((mp->mnt_kern_flag & MNTK_PERMIT_UNMOUNT) != 0) && ((flags & MNT_FORCE) == 0))) {
1773 /*
1774 * Only root, or the user that did the original mount is
1775 * permitted to unmount this filesystem.
1776 */
1777 if ((mp->mnt_vfsstat.f_owner != kauth_cred_getuid(kauth_cred_get())) &&
1778 (error = suser(kauth_cred_get(), &p->p_acflag)))
1779 goto out;
1780 }
1781 /*
1782 * Don't allow unmounting the root file system.
1783 */
1784 if (mp->mnt_flag & MNT_ROOTFS) {
1785 error = EBUSY; /* the root is always busy */
1786 goto out;
1787 }
1788
1789 #ifdef CONFIG_IMGSRC_ACCESS
1790 if (mp->mnt_kern_flag & MNTK_BACKS_ROOT) {
1791 error = EBUSY;
1792 goto out;
1793 }
1794 #endif /* CONFIG_IMGSRC_ACCESS */
1795
1796 return (dounmount(mp, flags, 1, ctx));
1797
1798 out:
1799 mount_drop(mp, 0);
1800 return(error);
1801 }
1802
1803 /*
1804 * Do the actual file system unmount.
1805 */
1806 int
1807 dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx)
1808 {
1809 vnode_t coveredvp = (vnode_t)0;
1810 int error;
1811 int needwakeup = 0;
1812 int forcedunmount = 0;
1813 int lflags = 0;
1814 struct vnode *devvp = NULLVP;
1815 #if CONFIG_TRIGGERS
1816 proc_t p = vfs_context_proc(ctx);
1817 int did_vflush = 0;
1818 int pflags_save = 0;
1819 #endif /* CONFIG_TRIGGERS */
1820
1821 if (flags & MNT_FORCE)
1822 forcedunmount = 1;
1823
1824 mount_lock(mp);
1825 /* XXX post jaguar fix LK_DRAIN - then clean this up */
1826 if ((flags & MNT_FORCE)) {
1827 mp->mnt_kern_flag |= MNTK_FRCUNMOUNT;
1828 mp->mnt_lflag |= MNT_LFORCE;
1829 }
1830 if (mp->mnt_lflag & MNT_LUNMOUNT) {
1831 mp->mnt_lflag |= MNT_LWAIT;
1832 if(withref != 0)
1833 mount_drop(mp, 1);
1834 msleep((caddr_t)mp, &mp->mnt_mlock, (PVFS | PDROP), "dounmount", NULL);
1835 /*
1836 * The prior unmount attempt has probably succeeded.
1837 * Do not dereference mp here - returning EBUSY is safest.
1838 */
1839 return (EBUSY);
1840 }
1841
1842 #if CONFIG_TRIGGERS
1843 if (flags & MNT_NOBLOCK && p != kernproc)
1844 pflags_save = OSBitOrAtomic(P_NOREMOTEHANG, &p->p_flag);
1845 #endif
1846
1847 mp->mnt_kern_flag |= MNTK_UNMOUNT;
1848 mp->mnt_lflag |= MNT_LUNMOUNT;
1849 mp->mnt_flag &=~ MNT_ASYNC;
1850 /*
1851 * anyone currently in the fast path that
1852 * trips over the cached rootvp will be
1853 * dumped out and forced into the slow path
1854 * to regenerate a new cached value
1855 */
1856 mp->mnt_realrootvp = NULLVP;
1857 mount_unlock(mp);
1858
1859 /*
1860 * taking the name_cache_lock exclusively will
1861 * insure that everyone is out of the fast path who
1862 * might be trying to use a now stale copy of
1863 * vp->v_mountedhere->mnt_realrootvp
1864 * bumping mount_generation causes the cached values
1865 * to be invalidated
1866 */
1867 name_cache_lock();
1868 mount_generation++;
1869 name_cache_unlock();
1870
1871
1872 lck_rw_lock_exclusive(&mp->mnt_rwlock);
1873 if (withref != 0)
1874 mount_drop(mp, 0);
1875 #if CONFIG_FSE
1876 fsevent_unmount(mp); /* has to come first! */
1877 #endif
1878 error = 0;
1879 if (forcedunmount == 0) {
1880 ubc_umount(mp); /* release cached vnodes */
1881 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
1882 error = VFS_SYNC(mp, MNT_WAIT, ctx);
1883 if (error) {
1884 mount_lock(mp);
1885 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1886 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1887 mp->mnt_lflag &= ~MNT_LFORCE;
1888 goto out;
1889 }
1890 }
1891 }
1892
1893 #if CONFIG_TRIGGERS
1894 vfs_nested_trigger_unmounts(mp, flags, ctx);
1895 did_vflush = 1;
1896 #endif
1897 if (forcedunmount)
1898 lflags |= FORCECLOSE;
1899 error = vflush(mp, NULLVP, SKIPSWAP | SKIPSYSTEM | SKIPROOT | lflags);
1900 if ((forcedunmount == 0) && error) {
1901 mount_lock(mp);
1902 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1903 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1904 mp->mnt_lflag &= ~MNT_LFORCE;
1905 goto out;
1906 }
1907
1908 /* make sure there are no one in the mount iterations or lookup */
1909 mount_iterdrain(mp);
1910
1911 error = VFS_UNMOUNT(mp, flags, ctx);
1912 if (error) {
1913 mount_iterreset(mp);
1914 mount_lock(mp);
1915 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1916 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1917 mp->mnt_lflag &= ~MNT_LFORCE;
1918 goto out;
1919 }
1920
1921 /* increment the operations count */
1922 if (!error)
1923 OSAddAtomic(1, &vfs_nummntops);
1924
1925 if ( mp->mnt_devvp && mp->mnt_vtable->vfc_vfsflags & VFC_VFSLOCALARGS) {
1926 /* hold an io reference and drop the usecount before close */
1927 devvp = mp->mnt_devvp;
1928 vnode_getalways(devvp);
1929 vnode_rele(devvp);
1930 VNOP_CLOSE(devvp, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
1931 ctx);
1932 vnode_clearmountedon(devvp);
1933 vnode_put(devvp);
1934 }
1935 lck_rw_done(&mp->mnt_rwlock);
1936 mount_list_remove(mp);
1937 lck_rw_lock_exclusive(&mp->mnt_rwlock);
1938
1939 /* mark the mount point hook in the vp but not drop the ref yet */
1940 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
1941 vnode_getwithref(coveredvp);
1942 vnode_lock_spin(coveredvp);
1943
1944 mp->mnt_crossref++;
1945 coveredvp->v_mountedhere = (struct mount *)0;
1946
1947 vnode_unlock(coveredvp);
1948 vnode_put(coveredvp);
1949 }
1950
1951 mount_list_lock();
1952 mp->mnt_vtable->vfc_refcount--;
1953 mount_list_unlock();
1954
1955 cache_purgevfs(mp); /* remove cache entries for this file sys */
1956 vfs_event_signal(NULL, VQ_UNMOUNT, (intptr_t)NULL);
1957 mount_lock(mp);
1958 mp->mnt_lflag |= MNT_LDEAD;
1959
1960 if (mp->mnt_lflag & MNT_LWAIT) {
1961 /*
1962 * do the wakeup here
1963 * in case we block in mount_refdrain
1964 * which will drop the mount lock
1965 * and allow anyone blocked in vfs_busy
1966 * to wakeup and see the LDEAD state
1967 */
1968 mp->mnt_lflag &= ~MNT_LWAIT;
1969 wakeup((caddr_t)mp);
1970 }
1971 mount_refdrain(mp);
1972 out:
1973 if (mp->mnt_lflag & MNT_LWAIT) {
1974 mp->mnt_lflag &= ~MNT_LWAIT;
1975 needwakeup = 1;
1976 }
1977
1978 #if CONFIG_TRIGGERS
1979 if (flags & MNT_NOBLOCK && p != kernproc) {
1980 // Restore P_NOREMOTEHANG bit to its previous value
1981 if ((pflags_save & P_NOREMOTEHANG) == 0)
1982 OSBitAndAtomic(~((uint32_t) P_NOREMOTEHANG), &p->p_flag);
1983 }
1984
1985 /*
1986 * Callback and context are set together under the mount lock, and
1987 * never cleared, so we're safe to examine them here, drop the lock,
1988 * and call out.
1989 */
1990 if (mp->mnt_triggercallback != NULL) {
1991 mount_unlock(mp);
1992 if (error == 0) {
1993 mp->mnt_triggercallback(mp, VTC_RELEASE, mp->mnt_triggerdata, ctx);
1994 } else if (did_vflush) {
1995 mp->mnt_triggercallback(mp, VTC_REPLACE, mp->mnt_triggerdata, ctx);
1996 }
1997 } else {
1998 mount_unlock(mp);
1999 }
2000 #else
2001 mount_unlock(mp);
2002 #endif /* CONFIG_TRIGGERS */
2003
2004 lck_rw_done(&mp->mnt_rwlock);
2005
2006 if (needwakeup)
2007 wakeup((caddr_t)mp);
2008
2009 if (!error) {
2010 if ((coveredvp != NULLVP)) {
2011 vnode_t pvp;
2012
2013 vnode_getwithref(coveredvp);
2014 pvp = vnode_getparent(coveredvp);
2015 vnode_rele(coveredvp);
2016
2017 mount_dropcrossref(mp, coveredvp, 0);
2018 #if CONFIG_TRIGGERS
2019 if (coveredvp->v_resolve)
2020 vnode_trigger_rearm(coveredvp, ctx);
2021 #endif
2022 vnode_put(coveredvp);
2023
2024 if (pvp) {
2025 lock_vnode_and_post(pvp, NOTE_WRITE);
2026 vnode_put(pvp);
2027 }
2028 } else if (mp->mnt_flag & MNT_ROOTFS) {
2029 mount_lock_destroy(mp);
2030 #if CONFIG_MACF
2031 mac_mount_label_destroy(mp);
2032 #endif
2033 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
2034 } else
2035 panic("dounmount: no coveredvp");
2036 }
2037 return (error);
2038 }
2039
2040 void
2041 mount_dropcrossref(mount_t mp, vnode_t dp, int need_put)
2042 {
2043 vnode_lock(dp);
2044 mp->mnt_crossref--;
2045
2046 if (mp->mnt_crossref < 0)
2047 panic("mount cross refs -ve");
2048
2049 if ((mp != dp->v_mountedhere) && (mp->mnt_crossref == 0)) {
2050
2051 if (need_put)
2052 vnode_put_locked(dp);
2053 vnode_unlock(dp);
2054
2055 mount_lock_destroy(mp);
2056 #if CONFIG_MACF
2057 mac_mount_label_destroy(mp);
2058 #endif
2059 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
2060 return;
2061 }
2062 if (need_put)
2063 vnode_put_locked(dp);
2064 vnode_unlock(dp);
2065 }
2066
2067
2068 /*
2069 * Sync each mounted filesystem.
2070 */
2071 #if DIAGNOSTIC
2072 int syncprt = 0;
2073 struct ctldebug debug0 = { "syncprt", &syncprt };
2074 #endif
2075
2076 int print_vmpage_stat=0;
2077
2078 static int
2079 sync_callback(mount_t mp, void * arg)
2080 {
2081 int asyncflag;
2082
2083 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
2084 asyncflag = mp->mnt_flag & MNT_ASYNC;
2085 mp->mnt_flag &= ~MNT_ASYNC;
2086 VFS_SYNC(mp, arg ? MNT_WAIT : MNT_NOWAIT, vfs_context_current());
2087 if (asyncflag)
2088 mp->mnt_flag |= MNT_ASYNC;
2089 }
2090 return(VFS_RETURNED);
2091 }
2092
2093
2094 /* ARGSUSED */
2095 int
2096 sync(__unused proc_t p, __unused struct sync_args *uap, __unused int32_t *retval)
2097 {
2098 vfs_iterate(LK_NOWAIT, sync_callback, (void *)0);
2099
2100 if(print_vmpage_stat) {
2101 vm_countdirtypages();
2102 }
2103
2104 #if DIAGNOSTIC
2105 if (syncprt)
2106 vfs_bufstats();
2107 #endif /* DIAGNOSTIC */
2108 return (0);
2109 }
2110
2111 /*
2112 * Change filesystem quotas.
2113 */
2114 #if QUOTA
2115 static int quotactl_funneled(proc_t p, struct quotactl_args *uap, int32_t *retval);
2116
2117 int
2118 quotactl(proc_t p, struct quotactl_args *uap, int32_t *retval)
2119 {
2120 boolean_t funnel_state;
2121 int error;
2122
2123 funnel_state = thread_funnel_set(kernel_flock, TRUE);
2124 error = quotactl_funneled(p, uap, retval);
2125 thread_funnel_set(kernel_flock, funnel_state);
2126 return(error);
2127 }
2128
2129 static int
2130 quotactl_funneled(proc_t p, struct quotactl_args *uap, __unused int32_t *retval)
2131 {
2132 struct mount *mp;
2133 int error, quota_cmd, quota_status;
2134 caddr_t datap;
2135 size_t fnamelen;
2136 struct nameidata nd;
2137 vfs_context_t ctx = vfs_context_current();
2138 struct dqblk my_dqblk;
2139
2140 AUDIT_ARG(uid, uap->uid);
2141 AUDIT_ARG(cmd, uap->cmd);
2142 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
2143 uap->path, ctx);
2144 error = namei(&nd);
2145 if (error)
2146 return (error);
2147 mp = nd.ni_vp->v_mount;
2148 vnode_put(nd.ni_vp);
2149 nameidone(&nd);
2150
2151 /* copyin any data we will need for downstream code */
2152 quota_cmd = uap->cmd >> SUBCMDSHIFT;
2153
2154 switch (quota_cmd) {
2155 case Q_QUOTAON:
2156 /* uap->arg specifies a file from which to take the quotas */
2157 fnamelen = MAXPATHLEN;
2158 datap = kalloc(MAXPATHLEN);
2159 error = copyinstr(uap->arg, datap, MAXPATHLEN, &fnamelen);
2160 break;
2161 case Q_GETQUOTA:
2162 /* uap->arg is a pointer to a dqblk structure. */
2163 datap = (caddr_t) &my_dqblk;
2164 break;
2165 case Q_SETQUOTA:
2166 case Q_SETUSE:
2167 /* uap->arg is a pointer to a dqblk structure. */
2168 datap = (caddr_t) &my_dqblk;
2169 if (proc_is64bit(p)) {
2170 struct user_dqblk my_dqblk64;
2171 error = copyin(uap->arg, (caddr_t)&my_dqblk64, sizeof (my_dqblk64));
2172 if (error == 0) {
2173 munge_dqblk(&my_dqblk, &my_dqblk64, FALSE);
2174 }
2175 }
2176 else {
2177 error = copyin(uap->arg, (caddr_t)&my_dqblk, sizeof (my_dqblk));
2178 }
2179 break;
2180 case Q_QUOTASTAT:
2181 /* uap->arg is a pointer to an integer */
2182 datap = (caddr_t) &quota_status;
2183 break;
2184 default:
2185 datap = NULL;
2186 break;
2187 } /* switch */
2188
2189 if (error == 0) {
2190 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, datap, ctx);
2191 }
2192
2193 switch (quota_cmd) {
2194 case Q_QUOTAON:
2195 if (datap != NULL)
2196 kfree(datap, MAXPATHLEN);
2197 break;
2198 case Q_GETQUOTA:
2199 /* uap->arg is a pointer to a dqblk structure we need to copy out to */
2200 if (error == 0) {
2201 if (proc_is64bit(p)) {
2202 struct user_dqblk my_dqblk64;
2203 munge_dqblk(&my_dqblk, &my_dqblk64, TRUE);
2204 error = copyout((caddr_t)&my_dqblk64, uap->arg, sizeof (my_dqblk64));
2205 }
2206 else {
2207 error = copyout(datap, uap->arg, sizeof (struct dqblk));
2208 }
2209 }
2210 break;
2211 case Q_QUOTASTAT:
2212 /* uap->arg is a pointer to an integer */
2213 if (error == 0) {
2214 error = copyout(datap, uap->arg, sizeof(quota_status));
2215 }
2216 break;
2217 default:
2218 break;
2219 } /* switch */
2220
2221 return (error);
2222 }
2223 #else
2224 int
2225 quotactl(__unused proc_t p, __unused struct quotactl_args *uap, __unused int32_t *retval)
2226 {
2227 return (EOPNOTSUPP);
2228 }
2229 #endif /* QUOTA */
2230
2231 /*
2232 * Get filesystem statistics.
2233 *
2234 * Returns: 0 Success
2235 * namei:???
2236 * vfs_update_vfsstat:???
2237 * munge_statfs:EFAULT
2238 */
2239 /* ARGSUSED */
2240 int
2241 statfs(__unused proc_t p, struct statfs_args *uap, __unused int32_t *retval)
2242 {
2243 struct mount *mp;
2244 struct vfsstatfs *sp;
2245 int error;
2246 struct nameidata nd;
2247 vfs_context_t ctx = vfs_context_current();
2248 vnode_t vp;
2249
2250 NDINIT(&nd, LOOKUP, OP_STATFS, NOTRIGGER | FOLLOW | AUDITVNPATH1,
2251 UIO_USERSPACE, uap->path, ctx);
2252 error = namei(&nd);
2253 if (error)
2254 return (error);
2255 vp = nd.ni_vp;
2256 mp = vp->v_mount;
2257 sp = &mp->mnt_vfsstat;
2258 nameidone(&nd);
2259
2260 error = vfs_update_vfsstat(mp, ctx, VFS_USER_EVENT);
2261 if (error != 0) {
2262 vnode_put(vp);
2263 return (error);
2264 }
2265
2266 error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
2267 vnode_put(vp);
2268 return (error);
2269 }
2270
2271 /*
2272 * Get filesystem statistics.
2273 */
2274 /* ARGSUSED */
2275 int
2276 fstatfs(__unused proc_t p, struct fstatfs_args *uap, __unused int32_t *retval)
2277 {
2278 vnode_t vp;
2279 struct mount *mp;
2280 struct vfsstatfs *sp;
2281 int error;
2282
2283 AUDIT_ARG(fd, uap->fd);
2284
2285 if ( (error = file_vnode(uap->fd, &vp)) )
2286 return (error);
2287
2288 error = vnode_getwithref(vp);
2289 if (error) {
2290 file_drop(uap->fd);
2291 return (error);
2292 }
2293
2294 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2295
2296 mp = vp->v_mount;
2297 if (!mp) {
2298 error = EBADF;
2299 goto out;
2300 }
2301 sp = &mp->mnt_vfsstat;
2302 if ((error = vfs_update_vfsstat(mp,vfs_context_current(),VFS_USER_EVENT)) != 0) {
2303 goto out;
2304 }
2305
2306 error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
2307
2308 out:
2309 file_drop(uap->fd);
2310 vnode_put(vp);
2311
2312 return (error);
2313 }
2314
2315 /*
2316 * Common routine to handle copying of statfs64 data to user space
2317 */
2318 static int
2319 statfs64_common(struct mount *mp, struct vfsstatfs *sfsp, user_addr_t bufp)
2320 {
2321 int error;
2322 struct statfs64 sfs;
2323
2324 bzero(&sfs, sizeof(sfs));
2325
2326 sfs.f_bsize = sfsp->f_bsize;
2327 sfs.f_iosize = (int32_t)sfsp->f_iosize;
2328 sfs.f_blocks = sfsp->f_blocks;
2329 sfs.f_bfree = sfsp->f_bfree;
2330 sfs.f_bavail = sfsp->f_bavail;
2331 sfs.f_files = sfsp->f_files;
2332 sfs.f_ffree = sfsp->f_ffree;
2333 sfs.f_fsid = sfsp->f_fsid;
2334 sfs.f_owner = sfsp->f_owner;
2335 sfs.f_type = mp->mnt_vtable->vfc_typenum;
2336 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
2337 sfs.f_fssubtype = sfsp->f_fssubtype;
2338 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
2339 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN);
2340 } else {
2341 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSTYPENAMELEN);
2342 }
2343 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MAXPATHLEN);
2344 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MAXPATHLEN);
2345
2346 error = copyout((caddr_t)&sfs, bufp, sizeof(sfs));
2347
2348 return(error);
2349 }
2350
2351 /*
2352 * Get file system statistics in 64-bit mode
2353 */
2354 int
2355 statfs64(__unused struct proc *p, struct statfs64_args *uap, __unused int32_t *retval)
2356 {
2357 struct mount *mp;
2358 struct vfsstatfs *sp;
2359 int error;
2360 struct nameidata nd;
2361 vfs_context_t ctxp = vfs_context_current();
2362 vnode_t vp;
2363
2364 NDINIT(&nd, LOOKUP, OP_STATFS, NOTRIGGER | FOLLOW | AUDITVNPATH1,
2365 UIO_USERSPACE, uap->path, ctxp);
2366 error = namei(&nd);
2367 if (error)
2368 return (error);
2369 vp = nd.ni_vp;
2370 mp = vp->v_mount;
2371 sp = &mp->mnt_vfsstat;
2372 nameidone(&nd);
2373
2374 error = vfs_update_vfsstat(mp, ctxp, VFS_USER_EVENT);
2375 if (error != 0) {
2376 vnode_put(vp);
2377 return (error);
2378 }
2379
2380 error = statfs64_common(mp, sp, uap->buf);
2381 vnode_put(vp);
2382
2383 return (error);
2384 }
2385
2386 /*
2387 * Get file system statistics in 64-bit mode
2388 */
2389 int
2390 fstatfs64(__unused struct proc *p, struct fstatfs64_args *uap, __unused int32_t *retval)
2391 {
2392 struct vnode *vp;
2393 struct mount *mp;
2394 struct vfsstatfs *sp;
2395 int error;
2396
2397 AUDIT_ARG(fd, uap->fd);
2398
2399 if ( (error = file_vnode(uap->fd, &vp)) )
2400 return (error);
2401
2402 error = vnode_getwithref(vp);
2403 if (error) {
2404 file_drop(uap->fd);
2405 return (error);
2406 }
2407
2408 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2409
2410 mp = vp->v_mount;
2411 if (!mp) {
2412 error = EBADF;
2413 goto out;
2414 }
2415 sp = &mp->mnt_vfsstat;
2416 if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
2417 goto out;
2418 }
2419
2420 error = statfs64_common(mp, sp, uap->buf);
2421
2422 out:
2423 file_drop(uap->fd);
2424 vnode_put(vp);
2425
2426 return (error);
2427 }
2428
2429 struct getfsstat_struct {
2430 user_addr_t sfsp;
2431 user_addr_t *mp;
2432 int count;
2433 int maxcount;
2434 int flags;
2435 int error;
2436 };
2437
2438
2439 static int
2440 getfsstat_callback(mount_t mp, void * arg)
2441 {
2442
2443 struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
2444 struct vfsstatfs *sp;
2445 int error, my_size;
2446 vfs_context_t ctx = vfs_context_current();
2447
2448 if (fstp->sfsp && fstp->count < fstp->maxcount) {
2449 sp = &mp->mnt_vfsstat;
2450 /*
2451 * If MNT_NOWAIT is specified, do not refresh the
2452 * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT.
2453 */
2454 if (((fstp->flags & MNT_NOWAIT) == 0 || (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
2455 (error = vfs_update_vfsstat(mp, ctx,
2456 VFS_USER_EVENT))) {
2457 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
2458 return(VFS_RETURNED);
2459 }
2460
2461 /*
2462 * Need to handle LP64 version of struct statfs
2463 */
2464 error = munge_statfs(mp, sp, fstp->sfsp, &my_size, IS_64BIT_PROCESS(vfs_context_proc(ctx)), FALSE);
2465 if (error) {
2466 fstp->error = error;
2467 return(VFS_RETURNED_DONE);
2468 }
2469 fstp->sfsp += my_size;
2470
2471 if (fstp->mp) {
2472 #if CONFIG_MACF
2473 error = mac_mount_label_get(mp, *fstp->mp);
2474 if (error) {
2475 fstp->error = error;
2476 return(VFS_RETURNED_DONE);
2477 }
2478 #endif
2479 fstp->mp++;
2480 }
2481 }
2482 fstp->count++;
2483 return(VFS_RETURNED);
2484 }
2485
2486 /*
2487 * Get statistics on all filesystems.
2488 */
2489 int
2490 getfsstat(__unused proc_t p, struct getfsstat_args *uap, int *retval)
2491 {
2492 struct __mac_getfsstat_args muap;
2493
2494 muap.buf = uap->buf;
2495 muap.bufsize = uap->bufsize;
2496 muap.mac = USER_ADDR_NULL;
2497 muap.macsize = 0;
2498 muap.flags = uap->flags;
2499
2500 return (__mac_getfsstat(p, &muap, retval));
2501 }
2502
2503 /*
2504 * __mac_getfsstat: Get MAC-related file system statistics
2505 *
2506 * Parameters: p (ignored)
2507 * uap User argument descriptor (see below)
2508 * retval Count of file system statistics (N stats)
2509 *
2510 * Indirect: uap->bufsize Buffer size
2511 * uap->macsize MAC info size
2512 * uap->buf Buffer where information will be returned
2513 * uap->mac MAC info
2514 * uap->flags File system flags
2515 *
2516 *
2517 * Returns: 0 Success
2518 * !0 Not success
2519 *
2520 */
2521 int
2522 __mac_getfsstat(__unused proc_t p, struct __mac_getfsstat_args *uap, int *retval)
2523 {
2524 user_addr_t sfsp;
2525 user_addr_t *mp;
2526 size_t count, maxcount, bufsize, macsize;
2527 struct getfsstat_struct fst;
2528
2529 bufsize = (size_t) uap->bufsize;
2530 macsize = (size_t) uap->macsize;
2531
2532 if (IS_64BIT_PROCESS(p)) {
2533 maxcount = bufsize / sizeof(struct user64_statfs);
2534 }
2535 else {
2536 maxcount = bufsize / sizeof(struct user32_statfs);
2537 }
2538 sfsp = uap->buf;
2539 count = 0;
2540
2541 mp = NULL;
2542
2543 #if CONFIG_MACF
2544 if (uap->mac != USER_ADDR_NULL) {
2545 u_int32_t *mp0;
2546 int error;
2547 unsigned int i;
2548
2549 count = (macsize / (IS_64BIT_PROCESS(p) ? 8 : 4));
2550 if (count != maxcount)
2551 return (EINVAL);
2552
2553 /* Copy in the array */
2554 MALLOC(mp0, u_int32_t *, macsize, M_MACTEMP, M_WAITOK);
2555 if (mp0 == NULL) {
2556 return (ENOMEM);
2557 }
2558
2559 error = copyin(uap->mac, mp0, macsize);
2560 if (error) {
2561 FREE(mp0, M_MACTEMP);
2562 return (error);
2563 }
2564
2565 /* Normalize to an array of user_addr_t */
2566 MALLOC(mp, user_addr_t *, count * sizeof(user_addr_t), M_MACTEMP, M_WAITOK);
2567 if (mp == NULL) {
2568 FREE(mp0, M_MACTEMP);
2569 return (ENOMEM);
2570 }
2571
2572 for (i = 0; i < count; i++) {
2573 if (IS_64BIT_PROCESS(p))
2574 mp[i] = ((user_addr_t *)mp0)[i];
2575 else
2576 mp[i] = (user_addr_t)mp0[i];
2577 }
2578 FREE(mp0, M_MACTEMP);
2579 }
2580 #endif
2581
2582
2583 fst.sfsp = sfsp;
2584 fst.mp = mp;
2585 fst.flags = uap->flags;
2586 fst.count = 0;
2587 fst.error = 0;
2588 fst.maxcount = maxcount;
2589
2590
2591 vfs_iterate(0, getfsstat_callback, &fst);
2592
2593 if (mp)
2594 FREE(mp, M_MACTEMP);
2595
2596 if (fst.error ) {
2597 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
2598 return(fst.error);
2599 }
2600
2601 if (fst.sfsp && fst.count > fst.maxcount)
2602 *retval = fst.maxcount;
2603 else
2604 *retval = fst.count;
2605 return (0);
2606 }
2607
2608 static int
2609 getfsstat64_callback(mount_t mp, void * arg)
2610 {
2611 struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
2612 struct vfsstatfs *sp;
2613 int error;
2614
2615 if (fstp->sfsp && fstp->count < fstp->maxcount) {
2616 sp = &mp->mnt_vfsstat;
2617 /*
2618 * If MNT_NOWAIT is specified, do not refresh the fsstat
2619 * cache. MNT_WAIT overrides MNT_NOWAIT.
2620 *
2621 * We treat MNT_DWAIT as MNT_WAIT for all instances of
2622 * getfsstat, since the constants are out of the same
2623 * namespace.
2624 */
2625 if (((fstp->flags & MNT_NOWAIT) == 0 ||
2626 (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
2627 (error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT))) {
2628 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
2629 return(VFS_RETURNED);
2630 }
2631
2632 error = statfs64_common(mp, sp, fstp->sfsp);
2633 if (error) {
2634 fstp->error = error;
2635 return(VFS_RETURNED_DONE);
2636 }
2637 fstp->sfsp += sizeof(struct statfs64);
2638 }
2639 fstp->count++;
2640 return(VFS_RETURNED);
2641 }
2642
2643 /*
2644 * Get statistics on all file systems in 64 bit mode.
2645 */
2646 int
2647 getfsstat64(__unused proc_t p, struct getfsstat64_args *uap, int *retval)
2648 {
2649 user_addr_t sfsp;
2650 int count, maxcount;
2651 struct getfsstat_struct fst;
2652
2653 maxcount = uap->bufsize / sizeof(struct statfs64);
2654
2655 sfsp = uap->buf;
2656 count = 0;
2657
2658 fst.sfsp = sfsp;
2659 fst.flags = uap->flags;
2660 fst.count = 0;
2661 fst.error = 0;
2662 fst.maxcount = maxcount;
2663
2664 vfs_iterate(0, getfsstat64_callback, &fst);
2665
2666 if (fst.error ) {
2667 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
2668 return(fst.error);
2669 }
2670
2671 if (fst.sfsp && fst.count > fst.maxcount)
2672 *retval = fst.maxcount;
2673 else
2674 *retval = fst.count;
2675
2676 return (0);
2677 }
2678
2679 /*
2680 * Change current working directory to a given file descriptor.
2681 */
2682 /* ARGSUSED */
2683 static int
2684 common_fchdir(proc_t p, struct fchdir_args *uap, int per_thread)
2685 {
2686 struct filedesc *fdp = p->p_fd;
2687 vnode_t vp;
2688 vnode_t tdp;
2689 vnode_t tvp;
2690 struct mount *mp;
2691 int error;
2692 vfs_context_t ctx = vfs_context_current();
2693
2694 AUDIT_ARG(fd, uap->fd);
2695 if (per_thread && uap->fd == -1) {
2696 /*
2697 * Switching back from per-thread to per process CWD; verify we
2698 * in fact have one before proceeding. The only success case
2699 * for this code path is to return 0 preemptively after zapping
2700 * the thread structure contents.
2701 */
2702 thread_t th = vfs_context_thread(ctx);
2703 if (th) {
2704 uthread_t uth = get_bsdthread_info(th);
2705 tvp = uth->uu_cdir;
2706 uth->uu_cdir = NULLVP;
2707 if (tvp != NULLVP) {
2708 vnode_rele(tvp);
2709 return (0);
2710 }
2711 }
2712 return (EBADF);
2713 }
2714
2715 if ( (error = file_vnode(uap->fd, &vp)) )
2716 return(error);
2717 if ( (error = vnode_getwithref(vp)) ) {
2718 file_drop(uap->fd);
2719 return(error);
2720 }
2721
2722 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
2723
2724 if (vp->v_type != VDIR) {
2725 error = ENOTDIR;
2726 goto out;
2727 }
2728
2729 #if CONFIG_MACF
2730 error = mac_vnode_check_chdir(ctx, vp);
2731 if (error)
2732 goto out;
2733 #endif
2734 error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
2735 if (error)
2736 goto out;
2737
2738 while (!error && (mp = vp->v_mountedhere) != NULL) {
2739 if (vfs_busy(mp, LK_NOWAIT)) {
2740 error = EACCES;
2741 goto out;
2742 }
2743 error = VFS_ROOT(mp, &tdp, ctx);
2744 vfs_unbusy(mp);
2745 if (error)
2746 break;
2747 vnode_put(vp);
2748 vp = tdp;
2749 }
2750 if (error)
2751 goto out;
2752 if ( (error = vnode_ref(vp)) )
2753 goto out;
2754 vnode_put(vp);
2755
2756 if (per_thread) {
2757 thread_t th = vfs_context_thread(ctx);
2758 if (th) {
2759 uthread_t uth = get_bsdthread_info(th);
2760 tvp = uth->uu_cdir;
2761 uth->uu_cdir = vp;
2762 OSBitOrAtomic(P_THCWD, &p->p_flag);
2763 } else {
2764 vnode_rele(vp);
2765 return (ENOENT);
2766 }
2767 } else {
2768 proc_fdlock(p);
2769 tvp = fdp->fd_cdir;
2770 fdp->fd_cdir = vp;
2771 proc_fdunlock(p);
2772 }
2773
2774 if (tvp)
2775 vnode_rele(tvp);
2776 file_drop(uap->fd);
2777
2778 return (0);
2779 out:
2780 vnode_put(vp);
2781 file_drop(uap->fd);
2782
2783 return(error);
2784 }
2785
2786 int
2787 fchdir(proc_t p, struct fchdir_args *uap, __unused int32_t *retval)
2788 {
2789 return common_fchdir(p, uap, 0);
2790 }
2791
2792 int
2793 __pthread_fchdir(proc_t p, struct __pthread_fchdir_args *uap, __unused int32_t *retval)
2794 {
2795 return common_fchdir(p, (void *)uap, 1);
2796 }
2797
2798 /*
2799 * Change current working directory (".").
2800 *
2801 * Returns: 0 Success
2802 * change_dir:ENOTDIR
2803 * change_dir:???
2804 * vnode_ref:ENOENT No such file or directory
2805 */
2806 /* ARGSUSED */
2807 static int
2808 common_chdir(proc_t p, struct chdir_args *uap, int per_thread)
2809 {
2810 struct filedesc *fdp = p->p_fd;
2811 int error;
2812 struct nameidata nd;
2813 vnode_t tvp;
2814 vfs_context_t ctx = vfs_context_current();
2815
2816 NDINIT(&nd, LOOKUP, OP_CHDIR, FOLLOW | AUDITVNPATH1,
2817 UIO_USERSPACE, uap->path, ctx);
2818 error = change_dir(&nd, ctx);
2819 if (error)
2820 return (error);
2821 if ( (error = vnode_ref(nd.ni_vp)) ) {
2822 vnode_put(nd.ni_vp);
2823 return (error);
2824 }
2825 /*
2826 * drop the iocount we picked up in change_dir
2827 */
2828 vnode_put(nd.ni_vp);
2829
2830 if (per_thread) {
2831 thread_t th = vfs_context_thread(ctx);
2832 if (th) {
2833 uthread_t uth = get_bsdthread_info(th);
2834 tvp = uth->uu_cdir;
2835 uth->uu_cdir = nd.ni_vp;
2836 OSBitOrAtomic(P_THCWD, &p->p_flag);
2837 } else {
2838 vnode_rele(nd.ni_vp);
2839 return (ENOENT);
2840 }
2841 } else {
2842 proc_fdlock(p);
2843 tvp = fdp->fd_cdir;
2844 fdp->fd_cdir = nd.ni_vp;
2845 proc_fdunlock(p);
2846 }
2847
2848 if (tvp)
2849 vnode_rele(tvp);
2850
2851 return (0);
2852 }
2853
2854
2855 /*
2856 * chdir
2857 *
2858 * Change current working directory (".") for the entire process
2859 *
2860 * Parameters: p Process requesting the call
2861 * uap User argument descriptor (see below)
2862 * retval (ignored)
2863 *
2864 * Indirect parameters: uap->path Directory path
2865 *
2866 * Returns: 0 Success
2867 * common_chdir: ENOTDIR
2868 * common_chdir: ENOENT No such file or directory
2869 * common_chdir: ???
2870 *
2871 */
2872 int
2873 chdir(proc_t p, struct chdir_args *uap, __unused int32_t *retval)
2874 {
2875 return common_chdir(p, (void *)uap, 0);
2876 }
2877
2878 /*
2879 * __pthread_chdir
2880 *
2881 * Change current working directory (".") for a single thread
2882 *
2883 * Parameters: p Process requesting the call
2884 * uap User argument descriptor (see below)
2885 * retval (ignored)
2886 *
2887 * Indirect parameters: uap->path Directory path
2888 *
2889 * Returns: 0 Success
2890 * common_chdir: ENOTDIR
2891 * common_chdir: ENOENT No such file or directory
2892 * common_chdir: ???
2893 *
2894 */
2895 int
2896 __pthread_chdir(proc_t p, struct __pthread_chdir_args *uap, __unused int32_t *retval)
2897 {
2898 return common_chdir(p, (void *)uap, 1);
2899 }
2900
2901
2902 /*
2903 * Change notion of root (``/'') directory.
2904 */
2905 /* ARGSUSED */
2906 int
2907 chroot(proc_t p, struct chroot_args *uap, __unused int32_t *retval)
2908 {
2909 struct filedesc *fdp = p->p_fd;
2910 int error;
2911 struct nameidata nd;
2912 vnode_t tvp;
2913 vfs_context_t ctx = vfs_context_current();
2914
2915 if ((error = suser(kauth_cred_get(), &p->p_acflag)))
2916 return (error);
2917
2918 NDINIT(&nd, LOOKUP, OP_CHROOT, FOLLOW | AUDITVNPATH1,
2919 UIO_USERSPACE, uap->path, ctx);
2920 error = change_dir(&nd, ctx);
2921 if (error)
2922 return (error);
2923
2924 #if CONFIG_MACF
2925 error = mac_vnode_check_chroot(ctx, nd.ni_vp,
2926 &nd.ni_cnd);
2927 if (error) {
2928 vnode_put(nd.ni_vp);
2929 return (error);
2930 }
2931 #endif
2932
2933 if ( (error = vnode_ref(nd.ni_vp)) ) {
2934 vnode_put(nd.ni_vp);
2935 return (error);
2936 }
2937 vnode_put(nd.ni_vp);
2938
2939 proc_fdlock(p);
2940 tvp = fdp->fd_rdir;
2941 fdp->fd_rdir = nd.ni_vp;
2942 fdp->fd_flags |= FD_CHROOT;
2943 proc_fdunlock(p);
2944
2945 if (tvp != NULL)
2946 vnode_rele(tvp);
2947
2948 return (0);
2949 }
2950
2951 /*
2952 * Common routine for chroot and chdir.
2953 *
2954 * Returns: 0 Success
2955 * ENOTDIR Not a directory
2956 * namei:??? [anything namei can return]
2957 * vnode_authorize:??? [anything vnode_authorize can return]
2958 */
2959 static int
2960 change_dir(struct nameidata *ndp, vfs_context_t ctx)
2961 {
2962 vnode_t vp;
2963 int error;
2964
2965 if ((error = namei(ndp)))
2966 return (error);
2967 nameidone(ndp);
2968 vp = ndp->ni_vp;
2969
2970 if (vp->v_type != VDIR) {
2971 vnode_put(vp);
2972 return (ENOTDIR);
2973 }
2974
2975 #if CONFIG_MACF
2976 error = mac_vnode_check_chdir(ctx, vp);
2977 if (error) {
2978 vnode_put(vp);
2979 return (error);
2980 }
2981 #endif
2982
2983 error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
2984 if (error) {
2985 vnode_put(vp);
2986 return (error);
2987 }
2988
2989 return (error);
2990 }
2991
2992 /*
2993 * Check permissions, allocate an open file structure,
2994 * and call the device open routine if any.
2995 *
2996 * Returns: 0 Success
2997 * EINVAL
2998 * EINTR
2999 * falloc:ENFILE
3000 * falloc:EMFILE
3001 * falloc:ENOMEM
3002 * vn_open_auth:???
3003 * dupfdopen:???
3004 * VNOP_ADVLOCK:???
3005 * vnode_setsize:???
3006 *
3007 * XXX Need to implement uid, gid
3008 */
3009 int
3010 open1(vfs_context_t ctx, struct nameidata *ndp, int uflags,
3011 struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra,
3012 int32_t *retval)
3013 {
3014 proc_t p = vfs_context_proc(ctx);
3015 uthread_t uu = get_bsdthread_info(vfs_context_thread(ctx));
3016 struct fileproc *fp;
3017 vnode_t vp;
3018 int flags, oflags;
3019 int type, indx, error;
3020 struct flock lf;
3021 int no_controlling_tty = 0;
3022 int deny_controlling_tty = 0;
3023 struct session *sessp = SESSION_NULL;
3024
3025 oflags = uflags;
3026
3027 if ((oflags & O_ACCMODE) == O_ACCMODE)
3028 return(EINVAL);
3029 flags = FFLAGS(uflags);
3030
3031 AUDIT_ARG(fflags, oflags);
3032 AUDIT_ARG(mode, vap->va_mode);
3033
3034 if ((error = falloc_withalloc(p,
3035 &fp, &indx, ctx, fp_zalloc, cra)) != 0) {
3036 return (error);
3037 }
3038 uu->uu_dupfd = -indx - 1;
3039
3040 if (!(p->p_flag & P_CONTROLT)) {
3041 sessp = proc_session(p);
3042 no_controlling_tty = 1;
3043 /*
3044 * If conditions would warrant getting a controlling tty if
3045 * the device being opened is a tty (see ttyopen in tty.c),
3046 * but the open flags deny it, set a flag in the session to
3047 * prevent it.
3048 */
3049 if (SESS_LEADER(p, sessp) &&
3050 sessp->s_ttyvp == NULL &&
3051 (flags & O_NOCTTY)) {
3052 session_lock(sessp);
3053 sessp->s_flags |= S_NOCTTY;
3054 session_unlock(sessp);
3055 deny_controlling_tty = 1;
3056 }
3057 }
3058
3059 if ((error = vn_open_auth(ndp, &flags, vap))) {
3060 if ((error == ENODEV || error == ENXIO) && (uu->uu_dupfd >= 0)){ /* XXX from fdopen */
3061 if ((error = dupfdopen(p->p_fd, indx, uu->uu_dupfd, flags, error)) == 0) {
3062 fp_drop(p, indx, NULL, 0);
3063 *retval = indx;
3064 if (deny_controlling_tty) {
3065 session_lock(sessp);
3066 sessp->s_flags &= ~S_NOCTTY;
3067 session_unlock(sessp);
3068 }
3069 if (sessp != SESSION_NULL)
3070 session_rele(sessp);
3071 return (0);
3072 }
3073 }
3074 if (error == ERESTART)
3075 error = EINTR;
3076 fp_free(p, indx, fp);
3077
3078 if (deny_controlling_tty) {
3079 session_lock(sessp);
3080 sessp->s_flags &= ~S_NOCTTY;
3081 session_unlock(sessp);
3082 }
3083 if (sessp != SESSION_NULL)
3084 session_rele(sessp);
3085 return (error);
3086 }
3087 uu->uu_dupfd = 0;
3088 vp = ndp->ni_vp;
3089
3090 fp->f_fglob->fg_flag = flags & (FMASK | O_EVTONLY);
3091 fp->f_fglob->fg_ops = &vnops;
3092 fp->f_fglob->fg_data = (caddr_t)vp;
3093
3094 #if CONFIG_PROTECT
3095 if (VATTR_IS_ACTIVE (vap, va_dataprotect_flags)) {
3096 if (vap->va_dataprotect_flags & VA_DP_RAWENCRYPTED) {
3097 fp->f_fglob->fg_flag |= FENCRYPTED;
3098 }
3099 }
3100 #endif
3101
3102 if (flags & (O_EXLOCK | O_SHLOCK)) {
3103 lf.l_whence = SEEK_SET;
3104 lf.l_start = 0;
3105 lf.l_len = 0;
3106 if (flags & O_EXLOCK)
3107 lf.l_type = F_WRLCK;
3108 else
3109 lf.l_type = F_RDLCK;
3110 type = F_FLOCK;
3111 if ((flags & FNONBLOCK) == 0)
3112 type |= F_WAIT;
3113 #if CONFIG_MACF
3114 error = mac_file_check_lock(vfs_context_ucred(ctx), fp->f_fglob,
3115 F_SETLK, &lf);
3116 if (error)
3117 goto bad;
3118 #endif
3119 if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx, NULL)))
3120 goto bad;
3121 fp->f_fglob->fg_flag |= FHASLOCK;
3122 }
3123
3124 /* try to truncate by setting the size attribute */
3125 if ((flags & O_TRUNC) && ((error = vnode_setsize(vp, (off_t)0, 0, ctx)) != 0))
3126 goto bad;
3127
3128 /*
3129 * If the open flags denied the acquisition of a controlling tty,
3130 * clear the flag in the session structure that prevented the lower
3131 * level code from assigning one.
3132 */
3133 if (deny_controlling_tty) {
3134 session_lock(sessp);
3135 sessp->s_flags &= ~S_NOCTTY;
3136 session_unlock(sessp);
3137 }
3138
3139 /*
3140 * If a controlling tty was set by the tty line discipline, then we
3141 * want to set the vp of the tty into the session structure. We have
3142 * a race here because we can't get to the vp for the tp in ttyopen,
3143 * because it's not passed as a parameter in the open path.
3144 */
3145 if (no_controlling_tty && (p->p_flag & P_CONTROLT)) {
3146 vnode_t ttyvp;
3147
3148 /*
3149 * We already have a ref from vn_open_auth(), so we can demand another reference.
3150 */
3151 error = vnode_ref_ext(vp, 0, VNODE_REF_FORCE);
3152 if (error != 0) {
3153 panic("vnode_ref_ext() with VNODE_REF_FORCE failed?!");
3154 }
3155
3156 session_lock(sessp);
3157 ttyvp = sessp->s_ttyvp;
3158 sessp->s_ttyvp = vp;
3159 sessp->s_ttyvid = vnode_vid(vp);
3160 session_unlock(sessp);
3161 if (ttyvp != NULLVP)
3162 vnode_rele(ttyvp);
3163 }
3164
3165 vnode_put(vp);
3166
3167 proc_fdlock(p);
3168 if (flags & O_CLOEXEC)
3169 *fdflags(p, indx) |= UF_EXCLOSE;
3170 if (flags & O_CLOFORK)
3171 *fdflags(p, indx) |= UF_FORKCLOSE;
3172 procfdtbl_releasefd(p, indx, NULL);
3173 fp_drop(p, indx, fp, 1);
3174 proc_fdunlock(p);
3175
3176 *retval = indx;
3177
3178 if (sessp != SESSION_NULL)
3179 session_rele(sessp);
3180 return (0);
3181 bad:
3182 if (deny_controlling_tty) {
3183 session_lock(sessp);
3184 sessp->s_flags &= ~S_NOCTTY;
3185 session_unlock(sessp);
3186 }
3187 if (sessp != SESSION_NULL)
3188 session_rele(sessp);
3189
3190 struct vfs_context context = *vfs_context_current();
3191 context.vc_ucred = fp->f_fglob->fg_cred;
3192
3193 vn_close(vp, fp->f_fglob->fg_flag, &context);
3194 vnode_put(vp);
3195 fp_free(p, indx, fp);
3196
3197 return (error);
3198 }
3199
3200 /*
3201 * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)).
3202 *
3203 * Parameters: p Process requesting the open
3204 * uap User argument descriptor (see below)
3205 * retval Pointer to an area to receive the
3206 * return calue from the system call
3207 *
3208 * Indirect: uap->path Path to open (same as 'open')
3209 * uap->flags Flags to open (same as 'open'
3210 * uap->uid UID to set, if creating
3211 * uap->gid GID to set, if creating
3212 * uap->mode File mode, if creating (same as 'open')
3213 * uap->xsecurity ACL to set, if creating
3214 *
3215 * Returns: 0 Success
3216 * !0 errno value
3217 *
3218 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
3219 *
3220 * XXX: We should enummerate the possible errno values here, and where
3221 * in the code they originated.
3222 */
3223 int
3224 open_extended(proc_t p, struct open_extended_args *uap, int32_t *retval)
3225 {
3226 struct filedesc *fdp = p->p_fd;
3227 int ciferror;
3228 kauth_filesec_t xsecdst;
3229 struct vnode_attr va;
3230 struct nameidata nd;
3231 int cmode;
3232
3233 AUDIT_ARG(owner, uap->uid, uap->gid);
3234
3235 xsecdst = NULL;
3236 if ((uap->xsecurity != USER_ADDR_NULL) &&
3237 ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
3238 return ciferror;
3239
3240 VATTR_INIT(&va);
3241 cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3242 VATTR_SET(&va, va_mode, cmode);
3243 if (uap->uid != KAUTH_UID_NONE)
3244 VATTR_SET(&va, va_uid, uap->uid);
3245 if (uap->gid != KAUTH_GID_NONE)
3246 VATTR_SET(&va, va_gid, uap->gid);
3247 if (xsecdst != NULL)
3248 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
3249
3250 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
3251 uap->path, vfs_context_current());
3252
3253 ciferror = open1(vfs_context_current(), &nd, uap->flags, &va,
3254 fileproc_alloc_init, NULL, retval);
3255 if (xsecdst != NULL)
3256 kauth_filesec_free(xsecdst);
3257
3258 return ciferror;
3259 }
3260
3261 /*
3262 * Go through the data-protected atomically controlled open (2)
3263 *
3264 * int open_dprotected_np(user_addr_t path, int flags, int class, int dpflags, int mode)
3265 */
3266 int open_dprotected_np (__unused proc_t p, struct open_dprotected_np_args *uap, int32_t *retval) {
3267 int flags = uap->flags;
3268 int class = uap->class;
3269 int dpflags = uap->dpflags;
3270
3271 /*
3272 * Follow the same path as normal open(2)
3273 * Look up the item if it exists, and acquire the vnode.
3274 */
3275 struct filedesc *fdp = p->p_fd;
3276 struct vnode_attr va;
3277 struct nameidata nd;
3278 int cmode;
3279 int error;
3280
3281 VATTR_INIT(&va);
3282 /* Mask off all but regular access permissions */
3283 cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3284 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
3285
3286 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
3287 uap->path, vfs_context_current());
3288
3289 /*
3290 * Initialize the extra fields in vnode_attr to pass down our
3291 * extra fields.
3292 * 1. target cprotect class.
3293 * 2. set a flag to mark it as requiring open-raw-encrypted semantics.
3294 */
3295 if (flags & O_CREAT) {
3296 VATTR_SET(&va, va_dataprotect_class, class);
3297 }
3298
3299 if (dpflags & O_DP_GETRAWENCRYPTED) {
3300 if ( flags & (O_RDWR | O_WRONLY)) {
3301 /* Not allowed to write raw encrypted bytes */
3302 return EINVAL;
3303 }
3304 VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED);
3305 }
3306
3307 error = open1(vfs_context_current(), &nd, uap->flags, &va,
3308 fileproc_alloc_init, NULL, retval);
3309
3310 return error;
3311 }
3312
3313
3314 int
3315 open(proc_t p, struct open_args *uap, int32_t *retval)
3316 {
3317 __pthread_testcancel(1);
3318 return(open_nocancel(p, (struct open_nocancel_args *)uap, retval));
3319 }
3320
3321 int
3322 open_nocancel(proc_t p, struct open_nocancel_args *uap, int32_t *retval)
3323 {
3324 struct filedesc *fdp = p->p_fd;
3325 struct vnode_attr va;
3326 struct nameidata nd;
3327 int cmode;
3328
3329 VATTR_INIT(&va);
3330 /* Mask off all but regular access permissions */
3331 cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3332 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
3333
3334 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
3335 uap->path, vfs_context_current());
3336
3337 return (open1(vfs_context_current(), &nd, uap->flags, &va,
3338 fileproc_alloc_init, NULL, retval));
3339 }
3340
3341
3342 /*
3343 * Create a special file.
3344 */
3345 static int mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap);
3346
3347 int
3348 mknod(proc_t p, struct mknod_args *uap, __unused int32_t *retval)
3349 {
3350 struct vnode_attr va;
3351 vfs_context_t ctx = vfs_context_current();
3352 int error;
3353 struct nameidata nd;
3354 vnode_t vp, dvp;
3355
3356 VATTR_INIT(&va);
3357 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
3358 VATTR_SET(&va, va_rdev, uap->dev);
3359
3360 /* If it's a mknod() of a FIFO, call mkfifo1() instead */
3361 if ((uap->mode & S_IFMT) == S_IFIFO)
3362 return(mkfifo1(ctx, uap->path, &va));
3363
3364 AUDIT_ARG(mode, uap->mode);
3365 AUDIT_ARG(value32, uap->dev);
3366
3367 if ((error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
3368 return (error);
3369 NDINIT(&nd, CREATE, OP_MKNOD, LOCKPARENT | AUDITVNPATH1,
3370 UIO_USERSPACE, uap->path, ctx);
3371 error = namei(&nd);
3372 if (error)
3373 return (error);
3374 dvp = nd.ni_dvp;
3375 vp = nd.ni_vp;
3376
3377 if (vp != NULL) {
3378 error = EEXIST;
3379 goto out;
3380 }
3381
3382 switch (uap->mode & S_IFMT) {
3383 case S_IFMT: /* used by badsect to flag bad sectors */
3384 VATTR_SET(&va, va_type, VBAD);
3385 break;
3386 case S_IFCHR:
3387 VATTR_SET(&va, va_type, VCHR);
3388 break;
3389 case S_IFBLK:
3390 VATTR_SET(&va, va_type, VBLK);
3391 break;
3392 default:
3393 error = EINVAL;
3394 goto out;
3395 }
3396
3397 #if CONFIG_MACF
3398 error = mac_vnode_check_create(ctx,
3399 nd.ni_dvp, &nd.ni_cnd, &va);
3400 if (error)
3401 goto out;
3402 #endif
3403
3404 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
3405 goto out;
3406
3407 if ((error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx)) != 0)
3408 goto out;
3409
3410 if (vp) {
3411 int update_flags = 0;
3412
3413 // Make sure the name & parent pointers are hooked up
3414 if (vp->v_name == NULL)
3415 update_flags |= VNODE_UPDATE_NAME;
3416 if (vp->v_parent == NULLVP)
3417 update_flags |= VNODE_UPDATE_PARENT;
3418
3419 if (update_flags)
3420 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
3421
3422 #if CONFIG_FSE
3423 add_fsevent(FSE_CREATE_FILE, ctx,
3424 FSE_ARG_VNODE, vp,
3425 FSE_ARG_DONE);
3426 #endif
3427 }
3428
3429 out:
3430 /*
3431 * nameidone has to happen before we vnode_put(dvp)
3432 * since it may need to release the fs_nodelock on the dvp
3433 */
3434 nameidone(&nd);
3435
3436 if (vp)
3437 vnode_put(vp);
3438 vnode_put(dvp);
3439
3440 return (error);
3441 }
3442
3443 /*
3444 * Create a named pipe.
3445 *
3446 * Returns: 0 Success
3447 * EEXIST
3448 * namei:???
3449 * vnode_authorize:???
3450 * vn_create:???
3451 */
3452 static int
3453 mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap)
3454 {
3455 vnode_t vp, dvp;
3456 int error;
3457 struct nameidata nd;
3458
3459 NDINIT(&nd, CREATE, OP_MKFIFO, LOCKPARENT | AUDITVNPATH1,
3460 UIO_USERSPACE, upath, ctx);
3461 error = namei(&nd);
3462 if (error)
3463 return (error);
3464 dvp = nd.ni_dvp;
3465 vp = nd.ni_vp;
3466
3467 /* check that this is a new file and authorize addition */
3468 if (vp != NULL) {
3469 error = EEXIST;
3470 goto out;
3471 }
3472 VATTR_SET(vap, va_type, VFIFO);
3473
3474 if ((error = vn_authorize_create(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0)
3475 goto out;
3476
3477 error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx);
3478 out:
3479 /*
3480 * nameidone has to happen before we vnode_put(dvp)
3481 * since it may need to release the fs_nodelock on the dvp
3482 */
3483 nameidone(&nd);
3484
3485 if (vp)
3486 vnode_put(vp);
3487 vnode_put(dvp);
3488
3489 return error;
3490 }
3491
3492
3493 /*
3494 * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)).
3495 *
3496 * Parameters: p Process requesting the open
3497 * uap User argument descriptor (see below)
3498 * retval (Ignored)
3499 *
3500 * Indirect: uap->path Path to fifo (same as 'mkfifo')
3501 * uap->uid UID to set
3502 * uap->gid GID to set
3503 * uap->mode File mode to set (same as 'mkfifo')
3504 * uap->xsecurity ACL to set, if creating
3505 *
3506 * Returns: 0 Success
3507 * !0 errno value
3508 *
3509 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
3510 *
3511 * XXX: We should enummerate the possible errno values here, and where
3512 * in the code they originated.
3513 */
3514 int
3515 mkfifo_extended(proc_t p, struct mkfifo_extended_args *uap, __unused int32_t *retval)
3516 {
3517 int ciferror;
3518 kauth_filesec_t xsecdst;
3519 struct vnode_attr va;
3520
3521 AUDIT_ARG(owner, uap->uid, uap->gid);
3522
3523 xsecdst = KAUTH_FILESEC_NONE;
3524 if (uap->xsecurity != USER_ADDR_NULL) {
3525 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
3526 return ciferror;
3527 }
3528
3529 VATTR_INIT(&va);
3530 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
3531 if (uap->uid != KAUTH_UID_NONE)
3532 VATTR_SET(&va, va_uid, uap->uid);
3533 if (uap->gid != KAUTH_GID_NONE)
3534 VATTR_SET(&va, va_gid, uap->gid);
3535 if (xsecdst != KAUTH_FILESEC_NONE)
3536 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
3537
3538 ciferror = mkfifo1(vfs_context_current(), uap->path, &va);
3539
3540 if (xsecdst != KAUTH_FILESEC_NONE)
3541 kauth_filesec_free(xsecdst);
3542 return ciferror;
3543 }
3544
3545 /* ARGSUSED */
3546 int
3547 mkfifo(proc_t p, struct mkfifo_args *uap, __unused int32_t *retval)
3548 {
3549 struct vnode_attr va;
3550
3551 VATTR_INIT(&va);
3552 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
3553
3554 return(mkfifo1(vfs_context_current(), uap->path, &va));
3555 }
3556
3557
3558 static char *
3559 my_strrchr(char *p, int ch)
3560 {
3561 char *save;
3562
3563 for (save = NULL;; ++p) {
3564 if (*p == ch)
3565 save = p;
3566 if (!*p)
3567 return(save);
3568 }
3569 /* NOTREACHED */
3570 }
3571
3572 extern int safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path);
3573
3574 int
3575 safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path)
3576 {
3577 int ret, len = _len;
3578
3579 *truncated_path = 0;
3580 ret = vn_getpath(dvp, path, &len);
3581 if (ret == 0 && len < (MAXPATHLEN - 1)) {
3582 if (leafname) {
3583 path[len-1] = '/';
3584 len += strlcpy(&path[len], leafname, MAXPATHLEN-len) + 1;
3585 if (len > MAXPATHLEN) {
3586 char *ptr;
3587
3588 // the string got truncated!
3589 *truncated_path = 1;
3590 ptr = my_strrchr(path, '/');
3591 if (ptr) {
3592 *ptr = '\0'; // chop off the string at the last directory component
3593 }
3594 len = strlen(path) + 1;
3595 }
3596 }
3597 } else if (ret == 0) {
3598 *truncated_path = 1;
3599 } else if (ret != 0) {
3600 struct vnode *mydvp=dvp;
3601
3602 if (ret != ENOSPC) {
3603 printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n",
3604 dvp, dvp->v_name ? dvp->v_name : "no-name", ret);
3605 }
3606 *truncated_path = 1;
3607
3608 do {
3609 if (mydvp->v_parent != NULL) {
3610 mydvp = mydvp->v_parent;
3611 } else if (mydvp->v_mount) {
3612 strlcpy(path, mydvp->v_mount->mnt_vfsstat.f_mntonname, _len);
3613 break;
3614 } else {
3615 // no parent and no mount point? only thing is to punt and say "/" changed
3616 strlcpy(path, "/", _len);
3617 len = 2;
3618 mydvp = NULL;
3619 }
3620
3621 if (mydvp == NULL) {
3622 break;
3623 }
3624
3625 len = _len;
3626 ret = vn_getpath(mydvp, path, &len);
3627 } while (ret == ENOSPC);
3628 }
3629
3630 return len;
3631 }
3632
3633
3634 /*
3635 * Make a hard file link.
3636 *
3637 * Returns: 0 Success
3638 * EPERM
3639 * EEXIST
3640 * EXDEV
3641 * namei:???
3642 * vnode_authorize:???
3643 * VNOP_LINK:???
3644 */
3645 /* ARGSUSED */
3646 int
3647 link(__unused proc_t p, struct link_args *uap, __unused int32_t *retval)
3648 {
3649 vnode_t vp, dvp, lvp;
3650 struct nameidata nd;
3651 vfs_context_t ctx = vfs_context_current();
3652 int error;
3653 #if CONFIG_FSE
3654 fse_info finfo;
3655 #endif
3656 int need_event, has_listeners;
3657 char *target_path = NULL;
3658 int truncated=0;
3659
3660 vp = dvp = lvp = NULLVP;
3661
3662 /* look up the object we are linking to */
3663 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | AUDITVNPATH1,
3664 UIO_USERSPACE, uap->path, ctx);
3665 error = namei(&nd);
3666 if (error)
3667 return (error);
3668 vp = nd.ni_vp;
3669
3670 nameidone(&nd);
3671
3672 /*
3673 * Normally, linking to directories is not supported.
3674 * However, some file systems may have limited support.
3675 */
3676 if (vp->v_type == VDIR) {
3677 if (!(vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSDIRLINKS)) {
3678 error = EPERM; /* POSIX */
3679 goto out;
3680 }
3681 /* Linking to a directory requires ownership. */
3682 if (!kauth_cred_issuser(vfs_context_ucred(ctx))) {
3683 struct vnode_attr dva;
3684
3685 VATTR_INIT(&dva);
3686 VATTR_WANTED(&dva, va_uid);
3687 if (vnode_getattr(vp, &dva, ctx) != 0 ||
3688 !VATTR_IS_SUPPORTED(&dva, va_uid) ||
3689 (dva.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)))) {
3690 error = EACCES;
3691 goto out;
3692 }
3693 }
3694 }
3695
3696 /* lookup the target node */
3697 #if CONFIG_TRIGGERS
3698 nd.ni_op = OP_LINK;
3699 #endif
3700 nd.ni_cnd.cn_nameiop = CREATE;
3701 nd.ni_cnd.cn_flags = LOCKPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK;
3702 nd.ni_dirp = uap->link;
3703 error = namei(&nd);
3704 if (error != 0)
3705 goto out;
3706 dvp = nd.ni_dvp;
3707 lvp = nd.ni_vp;
3708
3709 #if CONFIG_MACF
3710 if ((error = mac_vnode_check_link(ctx, dvp, vp, &nd.ni_cnd)) != 0)
3711 goto out2;
3712 #endif
3713
3714 /* or to anything that kauth doesn't want us to (eg. immutable items) */
3715 if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_LINKTARGET, ctx)) != 0)
3716 goto out2;
3717
3718 /* target node must not exist */
3719 if (lvp != NULLVP) {
3720 error = EEXIST;
3721 goto out2;
3722 }
3723 /* cannot link across mountpoints */
3724 if (vnode_mount(vp) != vnode_mount(dvp)) {
3725 error = EXDEV;
3726 goto out2;
3727 }
3728
3729 /* authorize creation of the target note */
3730 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
3731 goto out2;
3732
3733 /* and finally make the link */
3734 error = VNOP_LINK(vp, dvp, &nd.ni_cnd, ctx);
3735 if (error)
3736 goto out2;
3737
3738 #if CONFIG_MACF
3739 (void)mac_vnode_notify_link(ctx, vp, dvp, &nd.ni_cnd);
3740 #endif
3741
3742 #if CONFIG_FSE
3743 need_event = need_fsevent(FSE_CREATE_FILE, dvp);
3744 #else
3745 need_event = 0;
3746 #endif
3747 has_listeners = kauth_authorize_fileop_has_listeners();
3748
3749 if (need_event || has_listeners) {
3750 char *link_to_path = NULL;
3751 int len, link_name_len;
3752
3753 /* build the path to the new link file */
3754 GET_PATH(target_path);
3755 if (target_path == NULL) {
3756 error = ENOMEM;
3757 goto out2;
3758 }
3759
3760 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, target_path, MAXPATHLEN, &truncated);
3761
3762 if (has_listeners) {
3763 /* build the path to file we are linking to */
3764 GET_PATH(link_to_path);
3765 if (link_to_path == NULL) {
3766 error = ENOMEM;
3767 goto out2;
3768 }
3769
3770 link_name_len = MAXPATHLEN;
3771 vn_getpath(vp, link_to_path, &link_name_len);
3772
3773 /*
3774 * Call out to allow 3rd party notification of rename.
3775 * Ignore result of kauth_authorize_fileop call.
3776 */
3777 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_LINK,
3778 (uintptr_t)link_to_path, (uintptr_t)target_path);
3779 if (link_to_path != NULL) {
3780 RELEASE_PATH(link_to_path);
3781 }
3782 }
3783 #if CONFIG_FSE
3784 if (need_event) {
3785 /* construct fsevent */
3786 if (get_fse_info(vp, &finfo, ctx) == 0) {
3787 if (truncated) {
3788 finfo.mode |= FSE_TRUNCATED_PATH;
3789 }
3790
3791 // build the path to the destination of the link
3792 add_fsevent(FSE_CREATE_FILE, ctx,
3793 FSE_ARG_STRING, len, target_path,
3794 FSE_ARG_FINFO, &finfo,
3795 FSE_ARG_DONE);
3796 }
3797 if (vp->v_parent) {
3798 add_fsevent(FSE_STAT_CHANGED, ctx,
3799 FSE_ARG_VNODE, vp->v_parent,
3800 FSE_ARG_DONE);
3801 }
3802 }
3803 #endif
3804 }
3805 out2:
3806 /*
3807 * nameidone has to happen before we vnode_put(dvp)
3808 * since it may need to release the fs_nodelock on the dvp
3809 */
3810 nameidone(&nd);
3811 if (target_path != NULL) {
3812 RELEASE_PATH(target_path);
3813 }
3814 out:
3815 if (lvp)
3816 vnode_put(lvp);
3817 if (dvp)
3818 vnode_put(dvp);
3819 vnode_put(vp);
3820 return (error);
3821 }
3822
3823 /*
3824 * Make a symbolic link.
3825 *
3826 * We could add support for ACLs here too...
3827 */
3828 /* ARGSUSED */
3829 int
3830 symlink(proc_t p, struct symlink_args *uap, __unused int32_t *retval)
3831 {
3832 struct vnode_attr va;
3833 char *path;
3834 int error;
3835 struct nameidata nd;
3836 vfs_context_t ctx = vfs_context_current();
3837 vnode_t vp, dvp;
3838 size_t dummy=0;
3839
3840 MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
3841 error = copyinstr(uap->path, path, MAXPATHLEN, &dummy);
3842 if (error)
3843 goto out;
3844 AUDIT_ARG(text, path); /* This is the link string */
3845
3846 NDINIT(&nd, CREATE, OP_SYMLINK, LOCKPARENT | AUDITVNPATH1,
3847 UIO_USERSPACE, uap->link, ctx);
3848 error = namei(&nd);
3849 if (error)
3850 goto out;
3851 dvp = nd.ni_dvp;
3852 vp = nd.ni_vp;
3853
3854 VATTR_INIT(&va);
3855 VATTR_SET(&va, va_type, VLNK);
3856 VATTR_SET(&va, va_mode, ACCESSPERMS & ~p->p_fd->fd_cmask);
3857 #if CONFIG_MACF
3858 error = mac_vnode_check_create(ctx,
3859 dvp, &nd.ni_cnd, &va);
3860 #endif
3861 if (error != 0) {
3862 goto skipit;
3863 }
3864
3865 if (vp != NULL) {
3866 error = EEXIST;
3867 goto skipit;
3868 }
3869
3870 /* authorize */
3871 if (error == 0)
3872 error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
3873 /* get default ownership, etc. */
3874 if (error == 0)
3875 error = vnode_authattr_new(dvp, &va, 0, ctx);
3876 if (error == 0)
3877 error = VNOP_SYMLINK(dvp, &vp, &nd.ni_cnd, &va, path, ctx);
3878
3879 #if CONFIG_MACF
3880 if (error == 0)
3881 error = vnode_label(vnode_mount(vp), dvp, vp, &nd.ni_cnd, VNODE_LABEL_CREATE, ctx);
3882 #endif
3883
3884 /* do fallback attribute handling */
3885 if (error == 0)
3886 error = vnode_setattr_fallback(vp, &va, ctx);
3887
3888 if (error == 0) {
3889 int update_flags = 0;
3890
3891 if (vp == NULL) {
3892 nd.ni_cnd.cn_nameiop = LOOKUP;
3893 #if CONFIG_TRIGGERS
3894 nd.ni_op = OP_LOOKUP;
3895 #endif
3896 nd.ni_cnd.cn_flags = 0;
3897 error = namei(&nd);
3898 vp = nd.ni_vp;
3899
3900 if (vp == NULL)
3901 goto skipit;
3902 }
3903
3904 #if 0 /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
3905 /* call out to allow 3rd party notification of rename.
3906 * Ignore result of kauth_authorize_fileop call.
3907 */
3908 if (kauth_authorize_fileop_has_listeners() &&
3909 namei(&nd) == 0) {
3910 char *new_link_path = NULL;
3911 int len;
3912
3913 /* build the path to the new link file */
3914 new_link_path = get_pathbuff();
3915 len = MAXPATHLEN;
3916 vn_getpath(dvp, new_link_path, &len);
3917 if ((len + 1 + nd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) {
3918 new_link_path[len - 1] = '/';
3919 strlcpy(&new_link_path[len], nd.ni_cnd.cn_nameptr, MAXPATHLEN-len);
3920 }
3921
3922 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_SYMLINK,
3923 (uintptr_t)path, (uintptr_t)new_link_path);
3924 if (new_link_path != NULL)
3925 release_pathbuff(new_link_path);
3926 }
3927 #endif
3928 // Make sure the name & parent pointers are hooked up
3929 if (vp->v_name == NULL)
3930 update_flags |= VNODE_UPDATE_NAME;
3931 if (vp->v_parent == NULLVP)
3932 update_flags |= VNODE_UPDATE_PARENT;
3933
3934 if (update_flags)
3935 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
3936
3937 #if CONFIG_FSE
3938 add_fsevent(FSE_CREATE_FILE, ctx,
3939 FSE_ARG_VNODE, vp,
3940 FSE_ARG_DONE);
3941 #endif
3942 }
3943
3944 skipit:
3945 /*
3946 * nameidone has to happen before we vnode_put(dvp)
3947 * since it may need to release the fs_nodelock on the dvp
3948 */
3949 nameidone(&nd);
3950
3951 if (vp)
3952 vnode_put(vp);
3953 vnode_put(dvp);
3954 out:
3955 FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
3956
3957 return (error);
3958 }
3959
3960 /*
3961 * Delete a whiteout from the filesystem.
3962 * XXX authorization not implmented for whiteouts
3963 */
3964 int
3965 undelete(__unused proc_t p, struct undelete_args *uap, __unused int32_t *retval)
3966 {
3967 int error;
3968 struct nameidata nd;
3969 vfs_context_t ctx = vfs_context_current();
3970 vnode_t vp, dvp;
3971
3972 NDINIT(&nd, DELETE, OP_UNLINK, LOCKPARENT | DOWHITEOUT | AUDITVNPATH1,
3973 UIO_USERSPACE, uap->path, ctx);
3974 error = namei(&nd);
3975 if (error)
3976 return (error);
3977 dvp = nd.ni_dvp;
3978 vp = nd.ni_vp;
3979
3980 if (vp == NULLVP && (nd.ni_cnd.cn_flags & ISWHITEOUT)) {
3981 error = VNOP_WHITEOUT(dvp, &nd.ni_cnd, DELETE, ctx);
3982 } else
3983 error = EEXIST;
3984
3985 /*
3986 * nameidone has to happen before we vnode_put(dvp)
3987 * since it may need to release the fs_nodelock on the dvp
3988 */
3989 nameidone(&nd);
3990
3991 if (vp)
3992 vnode_put(vp);
3993 vnode_put(dvp);
3994
3995 return (error);
3996 }
3997
3998
3999 /*
4000 * Delete a name from the filesystem.
4001 */
4002 /* ARGSUSED */
4003 int
4004 unlink1(vfs_context_t ctx, struct nameidata *ndp, int unlink_flags)
4005 {
4006 vnode_t vp, dvp;
4007 int error;
4008 struct componentname *cnp;
4009 char *path = NULL;
4010 int len=0;
4011 #if CONFIG_FSE
4012 fse_info finfo;
4013 struct vnode_attr va;
4014 #endif
4015 int flags = 0;
4016 int need_event = 0;
4017 int has_listeners = 0;
4018 int truncated_path=0;
4019 int batched;
4020 struct vnode_attr *vap = NULL;
4021
4022 #if NAMEDRSRCFORK
4023 /* unlink or delete is allowed on rsrc forks and named streams */
4024 ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
4025 #endif
4026
4027 ndp->ni_cnd.cn_flags |= LOCKPARENT;
4028 ndp->ni_flag |= NAMEI_COMPOUNDREMOVE;
4029 cnp = &ndp->ni_cnd;
4030
4031 lookup_continue:
4032 error = namei(ndp);
4033 if (error)
4034 return (error);
4035
4036 dvp = ndp->ni_dvp;
4037 vp = ndp->ni_vp;
4038
4039
4040 /* With Carbon delete semantics, busy files cannot be deleted */
4041 if (unlink_flags & VNODE_REMOVE_NODELETEBUSY) {
4042 flags |= VNODE_REMOVE_NODELETEBUSY;
4043 }
4044
4045 /* Skip any potential upcalls if told to. */
4046 if (unlink_flags & VNODE_REMOVE_SKIP_NAMESPACE_EVENT) {
4047 flags |= VNODE_REMOVE_SKIP_NAMESPACE_EVENT;
4048 }
4049
4050 if (vp) {
4051 batched = vnode_compound_remove_available(vp);
4052 /*
4053 * The root of a mounted filesystem cannot be deleted.
4054 */
4055 if (vp->v_flag & VROOT) {
4056 error = EBUSY;
4057 }
4058
4059 if (!batched) {
4060 error = vn_authorize_unlink(dvp, vp, cnp, ctx, NULL);
4061 if (error) {
4062 goto out;
4063 }
4064 }
4065 } else {
4066 batched = 1;
4067
4068 if (!vnode_compound_remove_available(dvp)) {
4069 panic("No vp, but no compound remove?");
4070 }
4071 }
4072
4073 #if CONFIG_FSE
4074 need_event = need_fsevent(FSE_DELETE, dvp);
4075 if (need_event) {
4076 if (!batched) {
4077 if ((vp->v_flag & VISHARDLINK) == 0) {
4078 /* XXX need to get these data in batched VNOP */
4079 get_fse_info(vp, &finfo, ctx);
4080 }
4081 } else {
4082 error = vfs_get_notify_attributes(&va);
4083 if (error) {
4084 goto out;
4085 }
4086
4087 vap = &va;
4088 }
4089 }
4090 #endif
4091 has_listeners = kauth_authorize_fileop_has_listeners();
4092 if (need_event || has_listeners) {
4093 if (path == NULL) {
4094 GET_PATH(path);
4095 if (path == NULL) {
4096 error = ENOMEM;
4097 goto out;
4098 }
4099 }
4100 len = safe_getpath(dvp, ndp->ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated_path);
4101 }
4102
4103 #if NAMEDRSRCFORK
4104 if (ndp->ni_cnd.cn_flags & CN_WANTSRSRCFORK)
4105 error = vnode_removenamedstream(dvp, vp, XATTR_RESOURCEFORK_NAME, 0, ctx);
4106 else
4107 #endif
4108 {
4109 error = vn_remove(dvp, &ndp->ni_vp, ndp, flags, vap, ctx);
4110 vp = ndp->ni_vp;
4111 if (error == EKEEPLOOKING) {
4112 if (!batched) {
4113 panic("EKEEPLOOKING, but not a filesystem that supports compound VNOPs?");
4114 }
4115
4116 if ((ndp->ni_flag & NAMEI_CONTLOOKUP) == 0) {
4117 panic("EKEEPLOOKING, but continue flag not set?");
4118 }
4119
4120 if (vnode_isdir(vp)) {
4121 error = EISDIR;
4122 goto out;
4123 }
4124 goto lookup_continue;
4125 }
4126 }
4127
4128 /*
4129 * Call out to allow 3rd party notification of delete.
4130 * Ignore result of kauth_authorize_fileop call.
4131 */
4132 if (!error) {
4133 if (has_listeners) {
4134 kauth_authorize_fileop(vfs_context_ucred(ctx),
4135 KAUTH_FILEOP_DELETE,
4136 (uintptr_t)vp,
4137 (uintptr_t)path);
4138 }
4139
4140 if (vp->v_flag & VISHARDLINK) {
4141 //
4142 // if a hardlink gets deleted we want to blow away the
4143 // v_parent link because the path that got us to this
4144 // instance of the link is no longer valid. this will
4145 // force the next call to get the path to ask the file
4146 // system instead of just following the v_parent link.
4147 //
4148 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
4149 }
4150
4151 #if CONFIG_FSE
4152 if (need_event) {
4153 if (vp->v_flag & VISHARDLINK) {
4154 get_fse_info(vp, &finfo, ctx);
4155 } else if (vap) {
4156 vnode_get_fse_info_from_vap(vp, &finfo, vap);
4157 }
4158 if (truncated_path) {
4159 finfo.mode |= FSE_TRUNCATED_PATH;
4160 }
4161 add_fsevent(FSE_DELETE, ctx,
4162 FSE_ARG_STRING, len, path,
4163 FSE_ARG_FINFO, &finfo,
4164 FSE_ARG_DONE);
4165 }
4166 #endif
4167 }
4168
4169 out:
4170 if (path != NULL)
4171 RELEASE_PATH(path);
4172
4173 #if NAMEDRSRCFORK
4174 /* recycle the deleted rsrc fork vnode to force a reclaim, which
4175 * will cause its shadow file to go away if necessary.
4176 */
4177 if (vp && (vnode_isnamedstream(vp)) &&
4178 (vp->v_parent != NULLVP) &&
4179 vnode_isshadow(vp)) {
4180 vnode_recycle(vp);
4181 }
4182 #endif
4183 /*
4184 * nameidone has to happen before we vnode_put(dvp)
4185 * since it may need to release the fs_nodelock on the dvp
4186 */
4187 nameidone(ndp);
4188 vnode_put(dvp);
4189 if (vp) {
4190 vnode_put(vp);
4191 }
4192 return (error);
4193 }
4194
4195 /*
4196 * Delete a name from the filesystem using POSIX semantics.
4197 */
4198 int
4199 unlink(__unused proc_t p, struct unlink_args *uap, __unused int32_t *retval)
4200 {
4201 struct nameidata nd;
4202 vfs_context_t ctx = vfs_context_current();
4203
4204 NDINIT(&nd, DELETE, OP_UNLINK, AUDITVNPATH1, UIO_USERSPACE,
4205 uap->path, ctx);
4206 return unlink1(ctx, &nd, 0);
4207 }
4208
4209 /*
4210 * Delete a name from the filesystem using Carbon semantics.
4211 */
4212 int
4213 delete(__unused proc_t p, struct delete_args *uap, __unused int32_t *retval)
4214 {
4215 struct nameidata nd;
4216 vfs_context_t ctx = vfs_context_current();
4217
4218 NDINIT(&nd, DELETE, OP_UNLINK, AUDITVNPATH1, UIO_USERSPACE,
4219 uap->path, ctx);
4220 return unlink1(ctx, &nd, VNODE_REMOVE_NODELETEBUSY);
4221 }
4222
4223 /*
4224 * Reposition read/write file offset.
4225 */
4226 int
4227 lseek(proc_t p, struct lseek_args *uap, off_t *retval)
4228 {
4229 struct fileproc *fp;
4230 vnode_t vp;
4231 struct vfs_context *ctx;
4232 off_t offset = uap->offset, file_size;
4233 int error;
4234
4235 if ( (error = fp_getfvp(p,uap->fd, &fp, &vp)) ) {
4236 if (error == ENOTSUP)
4237 return (ESPIPE);
4238 return (error);
4239 }
4240 if (vnode_isfifo(vp)) {
4241 file_drop(uap->fd);
4242 return(ESPIPE);
4243 }
4244
4245
4246 ctx = vfs_context_current();
4247 #if CONFIG_MACF
4248 if (uap->whence == L_INCR && uap->offset == 0)
4249 error = mac_file_check_get_offset(vfs_context_ucred(ctx),
4250 fp->f_fglob);
4251 else
4252 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
4253 fp->f_fglob);
4254 if (error) {
4255 file_drop(uap->fd);
4256 return (error);
4257 }
4258 #endif
4259 if ( (error = vnode_getwithref(vp)) ) {
4260 file_drop(uap->fd);
4261 return(error);
4262 }
4263
4264 switch (uap->whence) {
4265 case L_INCR:
4266 offset += fp->f_fglob->fg_offset;
4267 break;
4268 case L_XTND:
4269 if ((error = vnode_size(vp, &file_size, ctx)) != 0)
4270 break;
4271 offset += file_size;
4272 break;
4273 case L_SET:
4274 break;
4275 default:
4276 error = EINVAL;
4277 }
4278 if (error == 0) {
4279 if (uap->offset > 0 && offset < 0) {
4280 /* Incremented/relative move past max size */
4281 error = EOVERFLOW;
4282 } else {
4283 /*
4284 * Allow negative offsets on character devices, per
4285 * POSIX 1003.1-2001. Most likely for writing disk
4286 * labels.
4287 */
4288 if (offset < 0 && vp->v_type != VCHR) {
4289 /* Decremented/relative move before start */
4290 error = EINVAL;
4291 } else {
4292 /* Success */
4293 fp->f_fglob->fg_offset = offset;
4294 *retval = fp->f_fglob->fg_offset;
4295 }
4296 }
4297 }
4298
4299 /*
4300 * An lseek can affect whether data is "available to read." Use
4301 * hint of NOTE_NONE so no EVFILT_VNODE events fire
4302 */
4303 post_event_if_success(vp, error, NOTE_NONE);
4304 (void)vnode_put(vp);
4305 file_drop(uap->fd);
4306 return (error);
4307 }
4308
4309
4310 /*
4311 * Check access permissions.
4312 *
4313 * Returns: 0 Success
4314 * vnode_authorize:???
4315 */
4316 static int
4317 access1(vnode_t vp, vnode_t dvp, int uflags, vfs_context_t ctx)
4318 {
4319 kauth_action_t action;
4320 int error;
4321
4322 /*
4323 * If just the regular access bits, convert them to something
4324 * that vnode_authorize will understand.
4325 */
4326 if (!(uflags & _ACCESS_EXTENDED_MASK)) {
4327 action = 0;
4328 if (uflags & R_OK)
4329 action |= KAUTH_VNODE_READ_DATA; /* aka KAUTH_VNODE_LIST_DIRECTORY */
4330 if (uflags & W_OK) {
4331 if (vnode_isdir(vp)) {
4332 action |= KAUTH_VNODE_ADD_FILE |
4333 KAUTH_VNODE_ADD_SUBDIRECTORY;
4334 /* might want delete rights here too */
4335 } else {
4336 action |= KAUTH_VNODE_WRITE_DATA;
4337 }
4338 }
4339 if (uflags & X_OK) {
4340 if (vnode_isdir(vp)) {
4341 action |= KAUTH_VNODE_SEARCH;
4342 } else {
4343 action |= KAUTH_VNODE_EXECUTE;
4344 }
4345 }
4346 } else {
4347 /* take advantage of definition of uflags */
4348 action = uflags >> 8;
4349 }
4350
4351 #if CONFIG_MACF
4352 error = mac_vnode_check_access(ctx, vp, uflags);
4353 if (error)
4354 return (error);
4355 #endif /* MAC */
4356
4357 /* action == 0 means only check for existence */
4358 if (action != 0) {
4359 error = vnode_authorize(vp, dvp, action | KAUTH_VNODE_ACCESS, ctx);
4360 } else {
4361 error = 0;
4362 }
4363
4364 return(error);
4365 }
4366
4367
4368
4369 /*
4370 * access_extended: Check access permissions in bulk.
4371 *
4372 * Description: uap->entries Pointer to an array of accessx
4373 * descriptor structs, plus one or
4374 * more NULL terminated strings (see
4375 * "Notes" section below).
4376 * uap->size Size of the area pointed to by
4377 * uap->entries.
4378 * uap->results Pointer to the results array.
4379 *
4380 * Returns: 0 Success
4381 * ENOMEM Insufficient memory
4382 * EINVAL Invalid arguments
4383 * namei:EFAULT Bad address
4384 * namei:ENAMETOOLONG Filename too long
4385 * namei:ENOENT No such file or directory
4386 * namei:ELOOP Too many levels of symbolic links
4387 * namei:EBADF Bad file descriptor
4388 * namei:ENOTDIR Not a directory
4389 * namei:???
4390 * access1:
4391 *
4392 * Implicit returns:
4393 * uap->results Array contents modified
4394 *
4395 * Notes: The uap->entries are structured as an arbitrary length array
4396 * of accessx descriptors, followed by one or more NULL terminated
4397 * strings
4398 *
4399 * struct accessx_descriptor[0]
4400 * ...
4401 * struct accessx_descriptor[n]
4402 * char name_data[0];
4403 *
4404 * We determine the entry count by walking the buffer containing
4405 * the uap->entries argument descriptor. For each descriptor we
4406 * see, the valid values for the offset ad_name_offset will be
4407 * in the byte range:
4408 *
4409 * [ uap->entries + sizeof(struct accessx_descriptor) ]
4410 * to
4411 * [ uap->entries + uap->size - 2 ]
4412 *
4413 * since we must have at least one string, and the string must
4414 * be at least one character plus the NULL terminator in length.
4415 *
4416 * XXX: Need to support the check-as uid argument
4417 */
4418 int
4419 access_extended(__unused proc_t p, struct access_extended_args *uap, __unused int32_t *retval)
4420 {
4421 struct accessx_descriptor *input = NULL;
4422 errno_t *result = NULL;
4423 errno_t error = 0;
4424 int wantdelete = 0;
4425 unsigned int desc_max, desc_actual, i, j;
4426 struct vfs_context context;
4427 struct nameidata nd;
4428 int niopts;
4429 vnode_t vp = NULL;
4430 vnode_t dvp = NULL;
4431 #define ACCESSX_MAX_DESCR_ON_STACK 10
4432 struct accessx_descriptor stack_input[ACCESSX_MAX_DESCR_ON_STACK];
4433
4434 context.vc_ucred = NULL;
4435
4436 /*
4437 * Validate parameters; if valid, copy the descriptor array and string
4438 * arguments into local memory. Before proceeding, the following
4439 * conditions must have been met:
4440 *
4441 * o The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
4442 * o There must be sufficient room in the request for at least one
4443 * descriptor and a one yte NUL terminated string.
4444 * o The allocation of local storage must not fail.
4445 */
4446 if (uap->size > ACCESSX_MAX_TABLESIZE)
4447 return(ENOMEM);
4448 if (uap->size < (sizeof(struct accessx_descriptor) + 2))
4449 return(EINVAL);
4450 if (uap->size <= sizeof (stack_input)) {
4451 input = stack_input;
4452 } else {
4453 MALLOC(input, struct accessx_descriptor *, uap->size, M_TEMP, M_WAITOK);
4454 if (input == NULL) {
4455 error = ENOMEM;
4456 goto out;
4457 }
4458 }
4459 error = copyin(uap->entries, input, uap->size);
4460 if (error)
4461 goto out;
4462
4463 AUDIT_ARG(opaque, input, uap->size);
4464
4465 /*
4466 * Force NUL termination of the copyin buffer to avoid nami() running
4467 * off the end. If the caller passes us bogus data, they may get a
4468 * bogus result.
4469 */
4470 ((char *)input)[uap->size - 1] = 0;
4471
4472 /*
4473 * Access is defined as checking against the process' real identity,
4474 * even if operations are checking the effective identity. This
4475 * requires that we use a local vfs context.
4476 */
4477 context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
4478 context.vc_thread = current_thread();
4479
4480 /*
4481 * Find out how many entries we have, so we can allocate the result
4482 * array by walking the list and adjusting the count downward by the
4483 * earliest string offset we see.
4484 */
4485 desc_max = (uap->size - 2) / sizeof(struct accessx_descriptor);
4486 desc_actual = desc_max;
4487 for (i = 0; i < desc_actual; i++) {
4488 /*
4489 * Take the offset to the name string for this entry and
4490 * convert to an input array index, which would be one off
4491 * the end of the array if this entry was the lowest-addressed
4492 * name string.
4493 */
4494 j = input[i].ad_name_offset / sizeof(struct accessx_descriptor);
4495
4496 /*
4497 * An offset greater than the max allowable offset is an error.
4498 * It is also an error for any valid entry to point
4499 * to a location prior to the end of the current entry, if
4500 * it's not a reference to the string of the previous entry.
4501 */
4502 if (j > desc_max || (j != 0 && j <= i)) {
4503 error = EINVAL;
4504 goto out;
4505 }
4506
4507 /*
4508 * An offset of 0 means use the previous descriptor's offset;
4509 * this is used to chain multiple requests for the same file
4510 * to avoid multiple lookups.
4511 */
4512 if (j == 0) {
4513 /* This is not valid for the first entry */
4514 if (i == 0) {
4515 error = EINVAL;
4516 goto out;
4517 }
4518 continue;
4519 }
4520
4521 /*
4522 * If the offset of the string for this descriptor is before
4523 * what we believe is the current actual last descriptor,
4524 * then we need to adjust our estimate downward; this permits
4525 * the string table following the last descriptor to be out
4526 * of order relative to the descriptor list.
4527 */
4528 if (j < desc_actual)
4529 desc_actual = j;
4530 }
4531
4532 /*
4533 * We limit the actual number of descriptors we are willing to process
4534 * to a hard maximum of ACCESSX_MAX_DESCRIPTORS. If the number being
4535 * requested does not exceed this limit,
4536 */
4537 if (desc_actual > ACCESSX_MAX_DESCRIPTORS) {
4538 error = ENOMEM;
4539 goto out;
4540 }
4541 MALLOC(result, errno_t *, desc_actual * sizeof(errno_t), M_TEMP, M_WAITOK);
4542 if (result == NULL) {
4543 error = ENOMEM;
4544 goto out;
4545 }
4546
4547 /*
4548 * Do the work by iterating over the descriptor entries we know to
4549 * at least appear to contain valid data.
4550 */
4551 error = 0;
4552 for (i = 0; i < desc_actual; i++) {
4553 /*
4554 * If the ad_name_offset is 0, then we use the previous
4555 * results to make the check; otherwise, we are looking up
4556 * a new file name.
4557 */
4558 if (input[i].ad_name_offset != 0) {
4559 /* discard old vnodes */
4560 if (vp) {
4561 vnode_put(vp);
4562 vp = NULL;
4563 }
4564 if (dvp) {
4565 vnode_put(dvp);
4566 dvp = NULL;
4567 }
4568
4569 /*
4570 * Scan forward in the descriptor list to see if we
4571 * need the parent vnode. We will need it if we are
4572 * deleting, since we must have rights to remove
4573 * entries in the parent directory, as well as the
4574 * rights to delete the object itself.
4575 */
4576 wantdelete = input[i].ad_flags & _DELETE_OK;
4577 for (j = i + 1; (j < desc_actual) && (input[j].ad_name_offset == 0); j++)
4578 if (input[j].ad_flags & _DELETE_OK)
4579 wantdelete = 1;
4580
4581 niopts = FOLLOW | AUDITVNPATH1;
4582
4583 /* need parent for vnode_authorize for deletion test */
4584 if (wantdelete)
4585 niopts |= WANTPARENT;
4586
4587 /* do the lookup */
4588 NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, UIO_SYSSPACE,
4589 CAST_USER_ADDR_T(((const char *)input) + input[i].ad_name_offset),
4590 &context);
4591 error = namei(&nd);
4592 if (!error) {
4593 vp = nd.ni_vp;
4594 if (wantdelete)
4595 dvp = nd.ni_dvp;
4596 }
4597 nameidone(&nd);
4598 }
4599
4600 /*
4601 * Handle lookup errors.
4602 */
4603 switch(error) {
4604 case ENOENT:
4605 case EACCES:
4606 case EPERM:
4607 case ENOTDIR:
4608 result[i] = error;
4609 break;
4610 case 0:
4611 /* run this access check */
4612 result[i] = access1(vp, dvp, input[i].ad_flags, &context);
4613 break;
4614 default:
4615 /* fatal lookup error */
4616
4617 goto out;
4618 }
4619 }
4620
4621 AUDIT_ARG(data, result, sizeof(errno_t), desc_actual);
4622
4623 /* copy out results */
4624 error = copyout(result, uap->results, desc_actual * sizeof(errno_t));
4625
4626 out:
4627 if (input && input != stack_input)
4628 FREE(input, M_TEMP);
4629 if (result)
4630 FREE(result, M_TEMP);
4631 if (vp)
4632 vnode_put(vp);
4633 if (dvp)
4634 vnode_put(dvp);
4635 if (IS_VALID_CRED(context.vc_ucred))
4636 kauth_cred_unref(&context.vc_ucred);
4637 return(error);
4638 }
4639
4640
4641 /*
4642 * Returns: 0 Success
4643 * namei:EFAULT Bad address
4644 * namei:ENAMETOOLONG Filename too long
4645 * namei:ENOENT No such file or directory
4646 * namei:ELOOP Too many levels of symbolic links
4647 * namei:EBADF Bad file descriptor
4648 * namei:ENOTDIR Not a directory
4649 * namei:???
4650 * access1:
4651 */
4652 int
4653 access(__unused proc_t p, struct access_args *uap, __unused int32_t *retval)
4654 {
4655 int error;
4656 struct nameidata nd;
4657 int niopts;
4658 struct vfs_context context;
4659 #if NAMEDRSRCFORK
4660 int is_namedstream = 0;
4661 #endif
4662
4663 /*
4664 * Access is defined as checking against the process'
4665 * real identity, even if operations are checking the
4666 * effective identity. So we need to tweak the credential
4667 * in the context.
4668 */
4669 context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
4670 context.vc_thread = current_thread();
4671
4672 niopts = FOLLOW | AUDITVNPATH1;
4673 /* need parent for vnode_authorize for deletion test */
4674 if (uap->flags & _DELETE_OK)
4675 niopts |= WANTPARENT;
4676 NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, UIO_USERSPACE,
4677 uap->path, &context);
4678
4679 #if NAMEDRSRCFORK
4680 /* access(F_OK) calls are allowed for resource forks. */
4681 if (uap->flags == F_OK)
4682 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
4683 #endif
4684 error = namei(&nd);
4685 if (error)
4686 goto out;
4687
4688 #if NAMEDRSRCFORK
4689 /* Grab reference on the shadow stream file vnode to
4690 * force an inactive on release which will mark it
4691 * for recycle.
4692 */
4693 if (vnode_isnamedstream(nd.ni_vp) &&
4694 (nd.ni_vp->v_parent != NULLVP) &&
4695 vnode_isshadow(nd.ni_vp)) {
4696 is_namedstream = 1;
4697 vnode_ref(nd.ni_vp);
4698 }
4699 #endif
4700
4701 error = access1(nd.ni_vp, nd.ni_dvp, uap->flags, &context);
4702
4703 #if NAMEDRSRCFORK
4704 if (is_namedstream) {
4705 vnode_rele(nd.ni_vp);
4706 }
4707 #endif
4708
4709 vnode_put(nd.ni_vp);
4710 if (uap->flags & _DELETE_OK)
4711 vnode_put(nd.ni_dvp);
4712 nameidone(&nd);
4713
4714 out:
4715 kauth_cred_unref(&context.vc_ucred);
4716 return(error);
4717 }
4718
4719
4720 /*
4721 * Returns: 0 Success
4722 * EFAULT
4723 * copyout:EFAULT
4724 * namei:???
4725 * vn_stat:???
4726 */
4727 static int
4728 stat2(vfs_context_t ctx, struct nameidata *ndp, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
4729 {
4730 union {
4731 struct stat sb;
4732 struct stat64 sb64;
4733 } source;
4734 union {
4735 struct user64_stat user64_sb;
4736 struct user32_stat user32_sb;
4737 struct user64_stat64 user64_sb64;
4738 struct user32_stat64 user32_sb64;
4739 } dest;
4740 caddr_t sbp;
4741 int error, my_size;
4742 kauth_filesec_t fsec;
4743 size_t xsecurity_bufsize;
4744 void * statptr;
4745
4746 #if NAMEDRSRCFORK
4747 int is_namedstream = 0;
4748 /* stat calls are allowed for resource forks. */
4749 ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
4750 #endif
4751 error = namei(ndp);
4752 if (error)
4753 return (error);
4754 fsec = KAUTH_FILESEC_NONE;
4755
4756 statptr = (void *)&source;
4757
4758 #if NAMEDRSRCFORK
4759 /* Grab reference on the shadow stream file vnode to
4760 * force an inactive on release which will mark it
4761 * for recycle.
4762 */
4763 if (vnode_isnamedstream(ndp->ni_vp) &&
4764 (ndp->ni_vp->v_parent != NULLVP) &&
4765 vnode_isshadow(ndp->ni_vp)) {
4766 is_namedstream = 1;
4767 vnode_ref(ndp->ni_vp);
4768 }
4769 #endif
4770
4771 error = vn_stat(ndp->ni_vp, statptr, (xsecurity != USER_ADDR_NULL ? &fsec : NULL), isstat64, ctx);
4772
4773 #if NAMEDRSRCFORK
4774 if (is_namedstream) {
4775 vnode_rele(ndp->ni_vp);
4776 }
4777 #endif
4778 vnode_put(ndp->ni_vp);
4779 nameidone(ndp);
4780
4781 if (error)
4782 return (error);
4783 /* Zap spare fields */
4784 if (isstat64 != 0) {
4785 source.sb64.st_lspare = 0;
4786 source.sb64.st_qspare[0] = 0LL;
4787 source.sb64.st_qspare[1] = 0LL;
4788 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
4789 munge_user64_stat64(&source.sb64, &dest.user64_sb64);
4790 my_size = sizeof(dest.user64_sb64);
4791 sbp = (caddr_t)&dest.user64_sb64;
4792 } else {
4793 munge_user32_stat64(&source.sb64, &dest.user32_sb64);
4794 my_size = sizeof(dest.user32_sb64);
4795 sbp = (caddr_t)&dest.user32_sb64;
4796 }
4797 /*
4798 * Check if we raced (post lookup) against the last unlink of a file.
4799 */
4800 if ((source.sb64.st_nlink == 0) && S_ISREG(source.sb64.st_mode)) {
4801 source.sb64.st_nlink = 1;
4802 }
4803 } else {
4804 source.sb.st_lspare = 0;
4805 source.sb.st_qspare[0] = 0LL;
4806 source.sb.st_qspare[1] = 0LL;
4807 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
4808 munge_user64_stat(&source.sb, &dest.user64_sb);
4809 my_size = sizeof(dest.user64_sb);
4810 sbp = (caddr_t)&dest.user64_sb;
4811 } else {
4812 munge_user32_stat(&source.sb, &dest.user32_sb);
4813 my_size = sizeof(dest.user32_sb);
4814 sbp = (caddr_t)&dest.user32_sb;
4815 }
4816
4817 /*
4818 * Check if we raced (post lookup) against the last unlink of a file.
4819 */
4820 if ((source.sb.st_nlink == 0) && S_ISREG(source.sb.st_mode)) {
4821 source.sb.st_nlink = 1;
4822 }
4823 }
4824 if ((error = copyout(sbp, ub, my_size)) != 0)
4825 goto out;
4826
4827 /* caller wants extended security information? */
4828 if (xsecurity != USER_ADDR_NULL) {
4829
4830 /* did we get any? */
4831 if (fsec == KAUTH_FILESEC_NONE) {
4832 if (susize(xsecurity_size, 0) != 0) {
4833 error = EFAULT;
4834 goto out;
4835 }
4836 } else {
4837 /* find the user buffer size */
4838 xsecurity_bufsize = fusize(xsecurity_size);
4839
4840 /* copy out the actual data size */
4841 if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
4842 error = EFAULT;
4843 goto out;
4844 }
4845
4846 /* if the caller supplied enough room, copy out to it */
4847 if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec))
4848 error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
4849 }
4850 }
4851 out:
4852 if (fsec != KAUTH_FILESEC_NONE)
4853 kauth_filesec_free(fsec);
4854 return (error);
4855 }
4856
4857 /*
4858 * Get file status; this version follows links.
4859 *
4860 * Returns: 0 Success
4861 * stat2:??? [see stat2() in this file]
4862 */
4863 static int
4864 stat1(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
4865 {
4866 struct nameidata nd;
4867 vfs_context_t ctx = vfs_context_current();
4868
4869 NDINIT(&nd, LOOKUP, OP_GETATTR, NOTRIGGER | FOLLOW | AUDITVNPATH1,
4870 UIO_USERSPACE, path, ctx);
4871 return(stat2(ctx, &nd, ub, xsecurity, xsecurity_size, isstat64));
4872 }
4873
4874 /*
4875 * stat_extended: Get file status; with extended security (ACL).
4876 *
4877 * Parameters: p (ignored)
4878 * uap User argument descriptor (see below)
4879 * retval (ignored)
4880 *
4881 * Indirect: uap->path Path of file to get status from
4882 * uap->ub User buffer (holds file status info)
4883 * uap->xsecurity ACL to get (extended security)
4884 * uap->xsecurity_size Size of ACL
4885 *
4886 * Returns: 0 Success
4887 * !0 errno value
4888 *
4889 */
4890 int
4891 stat_extended(__unused proc_t p, struct stat_extended_args *uap, __unused int32_t *retval)
4892 {
4893 return (stat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 0));
4894 }
4895
4896 /*
4897 * Returns: 0 Success
4898 * stat1:??? [see stat1() in this file]
4899 */
4900 int
4901 stat(__unused proc_t p, struct stat_args *uap, __unused int32_t *retval)
4902 {
4903 return(stat1(uap->path, uap->ub, 0, 0, 0));
4904 }
4905
4906 int
4907 stat64(__unused proc_t p, struct stat64_args *uap, __unused int32_t *retval)
4908 {
4909 return(stat1(uap->path, uap->ub, 0, 0, 1));
4910 }
4911
4912 /*
4913 * stat64_extended: Get file status; can handle large inode numbers; with extended security (ACL).
4914 *
4915 * Parameters: p (ignored)
4916 * uap User argument descriptor (see below)
4917 * retval (ignored)
4918 *
4919 * Indirect: uap->path Path of file to get status from
4920 * uap->ub User buffer (holds file status info)
4921 * uap->xsecurity ACL to get (extended security)
4922 * uap->xsecurity_size Size of ACL
4923 *
4924 * Returns: 0 Success
4925 * !0 errno value
4926 *
4927 */
4928 int
4929 stat64_extended(__unused proc_t p, struct stat64_extended_args *uap, __unused int32_t *retval)
4930 {
4931 return (stat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 1));
4932 }
4933 /*
4934 * Get file status; this version does not follow links.
4935 */
4936 static int
4937 lstat1(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
4938 {
4939 struct nameidata nd;
4940 vfs_context_t ctx = vfs_context_current();
4941
4942 NDINIT(&nd, LOOKUP, OP_GETATTR, NOTRIGGER | NOFOLLOW | AUDITVNPATH1,
4943 UIO_USERSPACE, path, ctx);
4944
4945 return(stat2(ctx, &nd, ub, xsecurity, xsecurity_size, isstat64));
4946 }
4947
4948 /*
4949 * lstat_extended: Get file status; does not follow links; with extended security (ACL).
4950 *
4951 * Parameters: p (ignored)
4952 * uap User argument descriptor (see below)
4953 * retval (ignored)
4954 *
4955 * Indirect: uap->path Path of file to get status from
4956 * uap->ub User buffer (holds file status info)
4957 * uap->xsecurity ACL to get (extended security)
4958 * uap->xsecurity_size Size of ACL
4959 *
4960 * Returns: 0 Success
4961 * !0 errno value
4962 *
4963 */
4964 int
4965 lstat_extended(__unused proc_t p, struct lstat_extended_args *uap, __unused int32_t *retval)
4966 {
4967 return (lstat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 0));
4968 }
4969
4970 int
4971 lstat(__unused proc_t p, struct lstat_args *uap, __unused int32_t *retval)
4972 {
4973 return(lstat1(uap->path, uap->ub, 0, 0, 0));
4974 }
4975
4976 int
4977 lstat64(__unused proc_t p, struct lstat64_args *uap, __unused int32_t *retval)
4978 {
4979 return(lstat1(uap->path, uap->ub, 0, 0, 1));
4980 }
4981
4982 /*
4983 * lstat64_extended: Get file status; can handle large inode numbers; does not
4984 * follow links; with extended security (ACL).
4985 *
4986 * Parameters: p (ignored)
4987 * uap User argument descriptor (see below)
4988 * retval (ignored)
4989 *
4990 * Indirect: uap->path Path of file to get status from
4991 * uap->ub User buffer (holds file status info)
4992 * uap->xsecurity ACL to get (extended security)
4993 * uap->xsecurity_size Size of ACL
4994 *
4995 * Returns: 0 Success
4996 * !0 errno value
4997 *
4998 */
4999 int
5000 lstat64_extended(__unused proc_t p, struct lstat64_extended_args *uap, __unused int32_t *retval)
5001 {
5002 return (lstat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 1));
5003 }
5004
5005 /*
5006 * Get configurable pathname variables.
5007 *
5008 * Returns: 0 Success
5009 * namei:???
5010 * vn_pathconf:???
5011 *
5012 * Notes: Global implementation constants are intended to be
5013 * implemented in this function directly; all other constants
5014 * are per-FS implementation, and therefore must be handled in
5015 * each respective FS, instead.
5016 *
5017 * XXX We implement some things globally right now that should actually be
5018 * XXX per-FS; we will need to deal with this at some point.
5019 */
5020 /* ARGSUSED */
5021 int
5022 pathconf(__unused proc_t p, struct pathconf_args *uap, int32_t *retval)
5023 {
5024 int error;
5025 struct nameidata nd;
5026 vfs_context_t ctx = vfs_context_current();
5027
5028 NDINIT(&nd, LOOKUP, OP_PATHCONF, FOLLOW | AUDITVNPATH1,
5029 UIO_USERSPACE, uap->path, ctx);
5030 error = namei(&nd);
5031 if (error)
5032 return (error);
5033
5034 error = vn_pathconf(nd.ni_vp, uap->name, retval, ctx);
5035
5036 vnode_put(nd.ni_vp);
5037 nameidone(&nd);
5038 return (error);
5039 }
5040
5041 /*
5042 * Return target name of a symbolic link.
5043 */
5044 /* ARGSUSED */
5045 int
5046 readlink(proc_t p, struct readlink_args *uap, int32_t *retval)
5047 {
5048 vnode_t vp;
5049 uio_t auio;
5050 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
5051 int error;
5052 struct nameidata nd;
5053 vfs_context_t ctx = vfs_context_current();
5054 char uio_buf[ UIO_SIZEOF(1) ];
5055
5056 NDINIT(&nd, LOOKUP, OP_READLINK, NOFOLLOW | AUDITVNPATH1,
5057 UIO_USERSPACE, uap->path, ctx);
5058 error = namei(&nd);
5059 if (error)
5060 return (error);
5061 vp = nd.ni_vp;
5062
5063 nameidone(&nd);
5064
5065 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
5066 &uio_buf[0], sizeof(uio_buf));
5067 uio_addiov(auio, uap->buf, uap->count);
5068 if (vp->v_type != VLNK)
5069 error = EINVAL;
5070 else {
5071 #if CONFIG_MACF
5072 error = mac_vnode_check_readlink(ctx,
5073 vp);
5074 #endif
5075 if (error == 0)
5076 error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, ctx);
5077 if (error == 0)
5078 error = VNOP_READLINK(vp, auio, ctx);
5079 }
5080 vnode_put(vp);
5081
5082 /* Safe: uio_resid() is bounded above by "count", and "count" is an int */
5083 *retval = uap->count - (int)uio_resid(auio);
5084 return (error);
5085 }
5086
5087 /*
5088 * Change file flags.
5089 */
5090 static int
5091 chflags1(vnode_t vp, int flags, vfs_context_t ctx)
5092 {
5093 struct vnode_attr va;
5094 kauth_action_t action;
5095 int error;
5096
5097 VATTR_INIT(&va);
5098 VATTR_SET(&va, va_flags, flags);
5099
5100 #if CONFIG_MACF
5101 error = mac_vnode_check_setflags(ctx, vp, flags);
5102 if (error)
5103 goto out;
5104 #endif
5105
5106 /* request authorisation, disregard immutability */
5107 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
5108 goto out;
5109 /*
5110 * Request that the auth layer disregard those file flags it's allowed to when
5111 * authorizing this operation; we need to do this in order to be able to
5112 * clear immutable flags.
5113 */
5114 if (action && ((error = vnode_authorize(vp, NULL, action | KAUTH_VNODE_NOIMMUTABLE, ctx)) != 0))
5115 goto out;
5116 error = vnode_setattr(vp, &va, ctx);
5117
5118 if ((error == 0) && !VATTR_IS_SUPPORTED(&va, va_flags)) {
5119 error = ENOTSUP;
5120 }
5121 out:
5122 vnode_put(vp);
5123 return(error);
5124 }
5125
5126 /*
5127 * Change flags of a file given a path name.
5128 */
5129 /* ARGSUSED */
5130 int
5131 chflags(__unused proc_t p, struct chflags_args *uap, __unused int32_t *retval)
5132 {
5133 vnode_t vp;
5134 vfs_context_t ctx = vfs_context_current();
5135 int error;
5136 struct nameidata nd;
5137
5138 AUDIT_ARG(fflags, uap->flags);
5139 NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
5140 UIO_USERSPACE, uap->path, ctx);
5141 error = namei(&nd);
5142 if (error)
5143 return (error);
5144 vp = nd.ni_vp;
5145 nameidone(&nd);
5146
5147 error = chflags1(vp, uap->flags, ctx);
5148
5149 return(error);
5150 }
5151
5152 /*
5153 * Change flags of a file given a file descriptor.
5154 */
5155 /* ARGSUSED */
5156 int
5157 fchflags(__unused proc_t p, struct fchflags_args *uap, __unused int32_t *retval)
5158 {
5159 vnode_t vp;
5160 int error;
5161
5162 AUDIT_ARG(fd, uap->fd);
5163 AUDIT_ARG(fflags, uap->flags);
5164 if ( (error = file_vnode(uap->fd, &vp)) )
5165 return (error);
5166
5167 if ((error = vnode_getwithref(vp))) {
5168 file_drop(uap->fd);
5169 return(error);
5170 }
5171
5172 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5173
5174 error = chflags1(vp, uap->flags, vfs_context_current());
5175
5176 file_drop(uap->fd);
5177 return (error);
5178 }
5179
5180 /*
5181 * Change security information on a filesystem object.
5182 *
5183 * Returns: 0 Success
5184 * EPERM Operation not permitted
5185 * vnode_authattr:??? [anything vnode_authattr can return]
5186 * vnode_authorize:??? [anything vnode_authorize can return]
5187 * vnode_setattr:??? [anything vnode_setattr can return]
5188 *
5189 * Notes: If vnode_authattr or vnode_authorize return EACCES, it will be
5190 * translated to EPERM before being returned.
5191 */
5192 static int
5193 chmod2(vfs_context_t ctx, vnode_t vp, struct vnode_attr *vap)
5194 {
5195 kauth_action_t action;
5196 int error;
5197
5198 AUDIT_ARG(mode, vap->va_mode);
5199 /* XXX audit new args */
5200
5201 #if NAMEDSTREAMS
5202 /* chmod calls are not allowed for resource forks. */
5203 if (vp->v_flag & VISNAMEDSTREAM) {
5204 return (EPERM);
5205 }
5206 #endif
5207
5208 #if CONFIG_MACF
5209 if (VATTR_IS_ACTIVE(vap, va_mode) &&
5210 (error = mac_vnode_check_setmode(ctx, vp, (mode_t)vap->va_mode)) != 0)
5211 return (error);
5212 #endif
5213
5214 /* make sure that the caller is allowed to set this security information */
5215 if (((error = vnode_authattr(vp, vap, &action, ctx)) != 0) ||
5216 ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
5217 if (error == EACCES)
5218 error = EPERM;
5219 return(error);
5220 }
5221
5222 error = vnode_setattr(vp, vap, ctx);
5223
5224 return (error);
5225 }
5226
5227
5228 /*
5229 * Change mode of a file given a path name.
5230 *
5231 * Returns: 0 Success
5232 * namei:??? [anything namei can return]
5233 * chmod2:??? [anything chmod2 can return]
5234 */
5235 static int
5236 chmod1(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap)
5237 {
5238 struct nameidata nd;
5239 int error;
5240
5241 NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
5242 UIO_USERSPACE, path, ctx);
5243 if ((error = namei(&nd)))
5244 return (error);
5245 error = chmod2(ctx, nd.ni_vp, vap);
5246 vnode_put(nd.ni_vp);
5247 nameidone(&nd);
5248 return(error);
5249 }
5250
5251 /*
5252 * chmod_extended: Change the mode of a file given a path name; with extended
5253 * argument list (including extended security (ACL)).
5254 *
5255 * Parameters: p Process requesting the open
5256 * uap User argument descriptor (see below)
5257 * retval (ignored)
5258 *
5259 * Indirect: uap->path Path to object (same as 'chmod')
5260 * uap->uid UID to set
5261 * uap->gid GID to set
5262 * uap->mode File mode to set (same as 'chmod')
5263 * uap->xsecurity ACL to set (or delete)
5264 *
5265 * Returns: 0 Success
5266 * !0 errno value
5267 *
5268 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
5269 *
5270 * XXX: We should enummerate the possible errno values here, and where
5271 * in the code they originated.
5272 */
5273 int
5274 chmod_extended(__unused proc_t p, struct chmod_extended_args *uap, __unused int32_t *retval)
5275 {
5276 int error;
5277 struct vnode_attr va;
5278 kauth_filesec_t xsecdst;
5279
5280 AUDIT_ARG(owner, uap->uid, uap->gid);
5281
5282 VATTR_INIT(&va);
5283 if (uap->mode != -1)
5284 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
5285 if (uap->uid != KAUTH_UID_NONE)
5286 VATTR_SET(&va, va_uid, uap->uid);
5287 if (uap->gid != KAUTH_GID_NONE)
5288 VATTR_SET(&va, va_gid, uap->gid);
5289
5290 xsecdst = NULL;
5291 switch(uap->xsecurity) {
5292 /* explicit remove request */
5293 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
5294 VATTR_SET(&va, va_acl, NULL);
5295 break;
5296 /* not being set */
5297 case USER_ADDR_NULL:
5298 break;
5299 default:
5300 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
5301 return(error);
5302 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
5303 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va.va_acl->acl_entrycount);
5304 }
5305
5306 error = chmod1(vfs_context_current(), uap->path, &va);
5307
5308 if (xsecdst != NULL)
5309 kauth_filesec_free(xsecdst);
5310 return(error);
5311 }
5312
5313 /*
5314 * Returns: 0 Success
5315 * chmod1:??? [anything chmod1 can return]
5316 */
5317 int
5318 chmod(__unused proc_t p, struct chmod_args *uap, __unused int32_t *retval)
5319 {
5320 struct vnode_attr va;
5321
5322 VATTR_INIT(&va);
5323 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
5324
5325 return(chmod1(vfs_context_current(), uap->path, &va));
5326 }
5327
5328 /*
5329 * Change mode of a file given a file descriptor.
5330 */
5331 static int
5332 fchmod1(__unused proc_t p, int fd, struct vnode_attr *vap)
5333 {
5334 vnode_t vp;
5335 int error;
5336
5337 AUDIT_ARG(fd, fd);
5338
5339 if ((error = file_vnode(fd, &vp)) != 0)
5340 return (error);
5341 if ((error = vnode_getwithref(vp)) != 0) {
5342 file_drop(fd);
5343 return(error);
5344 }
5345 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5346
5347 error = chmod2(vfs_context_current(), vp, vap);
5348 (void)vnode_put(vp);
5349 file_drop(fd);
5350
5351 return (error);
5352 }
5353
5354 /*
5355 * fchmod_extended: Change mode of a file given a file descriptor; with
5356 * extended argument list (including extended security (ACL)).
5357 *
5358 * Parameters: p Process requesting to change file mode
5359 * uap User argument descriptor (see below)
5360 * retval (ignored)
5361 *
5362 * Indirect: uap->mode File mode to set (same as 'chmod')
5363 * uap->uid UID to set
5364 * uap->gid GID to set
5365 * uap->xsecurity ACL to set (or delete)
5366 * uap->fd File descriptor of file to change mode
5367 *
5368 * Returns: 0 Success
5369 * !0 errno value
5370 *
5371 */
5372 int
5373 fchmod_extended(proc_t p, struct fchmod_extended_args *uap, __unused int32_t *retval)
5374 {
5375 int error;
5376 struct vnode_attr va;
5377 kauth_filesec_t xsecdst;
5378
5379 AUDIT_ARG(owner, uap->uid, uap->gid);
5380
5381 VATTR_INIT(&va);
5382 if (uap->mode != -1)
5383 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
5384 if (uap->uid != KAUTH_UID_NONE)
5385 VATTR_SET(&va, va_uid, uap->uid);
5386 if (uap->gid != KAUTH_GID_NONE)
5387 VATTR_SET(&va, va_gid, uap->gid);
5388
5389 xsecdst = NULL;
5390 switch(uap->xsecurity) {
5391 case USER_ADDR_NULL:
5392 VATTR_SET(&va, va_acl, NULL);
5393 break;
5394 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
5395 VATTR_SET(&va, va_acl, NULL);
5396 break;
5397 /* not being set */
5398 case CAST_USER_ADDR_T(-1):
5399 break;
5400 default:
5401 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
5402 return(error);
5403 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
5404 }
5405
5406 error = fchmod1(p, uap->fd, &va);
5407
5408
5409 switch(uap->xsecurity) {
5410 case USER_ADDR_NULL:
5411 case CAST_USER_ADDR_T(-1):
5412 break;
5413 default:
5414 if (xsecdst != NULL)
5415 kauth_filesec_free(xsecdst);
5416 }
5417 return(error);
5418 }
5419
5420 int
5421 fchmod(proc_t p, struct fchmod_args *uap, __unused int32_t *retval)
5422 {
5423 struct vnode_attr va;
5424
5425 VATTR_INIT(&va);
5426 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
5427
5428 return(fchmod1(p, uap->fd, &va));
5429 }
5430
5431
5432 /*
5433 * Set ownership given a path name.
5434 */
5435 /* ARGSUSED */
5436 static int
5437 chown1(vfs_context_t ctx, struct chown_args *uap, __unused int32_t *retval, int follow)
5438 {
5439 vnode_t vp;
5440 struct vnode_attr va;
5441 int error;
5442 struct nameidata nd;
5443 kauth_action_t action;
5444
5445 AUDIT_ARG(owner, uap->uid, uap->gid);
5446
5447 NDINIT(&nd, LOOKUP, OP_SETATTR,
5448 (follow ? FOLLOW : 0) | NOTRIGGER | AUDITVNPATH1,
5449 UIO_USERSPACE, uap->path, ctx);
5450 error = namei(&nd);
5451 if (error)
5452 return (error);
5453 vp = nd.ni_vp;
5454
5455 nameidone(&nd);
5456
5457 VATTR_INIT(&va);
5458 if (uap->uid != VNOVAL)
5459 VATTR_SET(&va, va_uid, uap->uid);
5460 if (uap->gid != VNOVAL)
5461 VATTR_SET(&va, va_gid, uap->gid);
5462
5463 #if CONFIG_MACF
5464 error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
5465 if (error)
5466 goto out;
5467 #endif
5468
5469 /* preflight and authorize attribute changes */
5470 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
5471 goto out;
5472 if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
5473 goto out;
5474 error = vnode_setattr(vp, &va, ctx);
5475
5476 out:
5477 /*
5478 * EACCES is only allowed from namei(); permissions failure should
5479 * return EPERM, so we need to translate the error code.
5480 */
5481 if (error == EACCES)
5482 error = EPERM;
5483
5484 vnode_put(vp);
5485 return (error);
5486 }
5487
5488 int
5489 chown(__unused proc_t p, struct chown_args *uap, int32_t *retval)
5490 {
5491 return chown1(vfs_context_current(), uap, retval, 1);
5492 }
5493
5494 int
5495 lchown(__unused proc_t p, struct lchown_args *uap, int32_t *retval)
5496 {
5497 /* Argument list identical, but machine generated; cast for chown1() */
5498 return chown1(vfs_context_current(), (struct chown_args *)uap, retval, 0);
5499 }
5500
5501 /*
5502 * Set ownership given a file descriptor.
5503 */
5504 /* ARGSUSED */
5505 int
5506 fchown(__unused proc_t p, struct fchown_args *uap, __unused int32_t *retval)
5507 {
5508 struct vnode_attr va;
5509 vfs_context_t ctx = vfs_context_current();
5510 vnode_t vp;
5511 int error;
5512 kauth_action_t action;
5513
5514 AUDIT_ARG(owner, uap->uid, uap->gid);
5515 AUDIT_ARG(fd, uap->fd);
5516
5517 if ( (error = file_vnode(uap->fd, &vp)) )
5518 return (error);
5519
5520 if ( (error = vnode_getwithref(vp)) ) {
5521 file_drop(uap->fd);
5522 return(error);
5523 }
5524 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5525
5526 VATTR_INIT(&va);
5527 if (uap->uid != VNOVAL)
5528 VATTR_SET(&va, va_uid, uap->uid);
5529 if (uap->gid != VNOVAL)
5530 VATTR_SET(&va, va_gid, uap->gid);
5531
5532 #if NAMEDSTREAMS
5533 /* chown calls are not allowed for resource forks. */
5534 if (vp->v_flag & VISNAMEDSTREAM) {
5535 error = EPERM;
5536 goto out;
5537 }
5538 #endif
5539
5540 #if CONFIG_MACF
5541 error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
5542 if (error)
5543 goto out;
5544 #endif
5545
5546 /* preflight and authorize attribute changes */
5547 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
5548 goto out;
5549 if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
5550 if (error == EACCES)
5551 error = EPERM;
5552 goto out;
5553 }
5554 error = vnode_setattr(vp, &va, ctx);
5555
5556 out:
5557 (void)vnode_put(vp);
5558 file_drop(uap->fd);
5559 return (error);
5560 }
5561
5562 static int
5563 getutimes(user_addr_t usrtvp, struct timespec *tsp)
5564 {
5565 int error;
5566
5567 if (usrtvp == USER_ADDR_NULL) {
5568 struct timeval old_tv;
5569 /* XXX Y2038 bug because of microtime argument */
5570 microtime(&old_tv);
5571 TIMEVAL_TO_TIMESPEC(&old_tv, &tsp[0]);
5572 tsp[1] = tsp[0];
5573 } else {
5574 if (IS_64BIT_PROCESS(current_proc())) {
5575 struct user64_timeval tv[2];
5576 error = copyin(usrtvp, (void *)tv, sizeof(tv));
5577 if (error)
5578 return (error);
5579 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
5580 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
5581 } else {
5582 struct user32_timeval tv[2];
5583 error = copyin(usrtvp, (void *)tv, sizeof(tv));
5584 if (error)
5585 return (error);
5586 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
5587 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
5588 }
5589 }
5590 return 0;
5591 }
5592
5593 static int
5594 setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts,
5595 int nullflag)
5596 {
5597 int error;
5598 struct vnode_attr va;
5599 kauth_action_t action;
5600
5601 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5602
5603 VATTR_INIT(&va);
5604 VATTR_SET(&va, va_access_time, ts[0]);
5605 VATTR_SET(&va, va_modify_time, ts[1]);
5606 if (nullflag)
5607 va.va_vaflags |= VA_UTIMES_NULL;
5608
5609 #if NAMEDSTREAMS
5610 /* utimes calls are not allowed for resource forks. */
5611 if (vp->v_flag & VISNAMEDSTREAM) {
5612 error = EPERM;
5613 goto out;
5614 }
5615 #endif
5616
5617 #if CONFIG_MACF
5618 error = mac_vnode_check_setutimes(ctx, vp, ts[0], ts[1]);
5619 if (error)
5620 goto out;
5621 #endif
5622 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
5623 if (!nullflag && error == EACCES)
5624 error = EPERM;
5625 goto out;
5626 }
5627
5628 /* since we may not need to auth anything, check here */
5629 if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
5630 if (!nullflag && error == EACCES)
5631 error = EPERM;
5632 goto out;
5633 }
5634 error = vnode_setattr(vp, &va, ctx);
5635
5636 out:
5637 return error;
5638 }
5639
5640 /*
5641 * Set the access and modification times of a file.
5642 */
5643 /* ARGSUSED */
5644 int
5645 utimes(__unused proc_t p, struct utimes_args *uap, __unused int32_t *retval)
5646 {
5647 struct timespec ts[2];
5648 user_addr_t usrtvp;
5649 int error;
5650 struct nameidata nd;
5651 vfs_context_t ctx = vfs_context_current();
5652
5653 /*
5654 * AUDIT: Needed to change the order of operations to do the
5655 * name lookup first because auditing wants the path.
5656 */
5657 NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
5658 UIO_USERSPACE, uap->path, ctx);
5659 error = namei(&nd);
5660 if (error)
5661 return (error);
5662 nameidone(&nd);
5663
5664 /*
5665 * Fetch the user-supplied time. If usrtvp is USER_ADDR_NULL, we fetch
5666 * the current time instead.
5667 */
5668 usrtvp = uap->tptr;
5669 if ((error = getutimes(usrtvp, ts)) != 0)
5670 goto out;
5671
5672 error = setutimes(ctx, nd.ni_vp, ts, usrtvp == USER_ADDR_NULL);
5673
5674 out:
5675 vnode_put(nd.ni_vp);
5676 return (error);
5677 }
5678
5679 /*
5680 * Set the access and modification times of a file.
5681 */
5682 /* ARGSUSED */
5683 int
5684 futimes(__unused proc_t p, struct futimes_args *uap, __unused int32_t *retval)
5685 {
5686 struct timespec ts[2];
5687 vnode_t vp;
5688 user_addr_t usrtvp;
5689 int error;
5690
5691 AUDIT_ARG(fd, uap->fd);
5692 usrtvp = uap->tptr;
5693 if ((error = getutimes(usrtvp, ts)) != 0)
5694 return (error);
5695 if ((error = file_vnode(uap->fd, &vp)) != 0)
5696 return (error);
5697 if((error = vnode_getwithref(vp))) {
5698 file_drop(uap->fd);
5699 return(error);
5700 }
5701
5702 error = setutimes(vfs_context_current(), vp, ts, usrtvp == 0);
5703 vnode_put(vp);
5704 file_drop(uap->fd);
5705 return(error);
5706 }
5707
5708 /*
5709 * Truncate a file given its path name.
5710 */
5711 /* ARGSUSED */
5712 int
5713 truncate(__unused proc_t p, struct truncate_args *uap, __unused int32_t *retval)
5714 {
5715 vnode_t vp;
5716 struct vnode_attr va;
5717 vfs_context_t ctx = vfs_context_current();
5718 int error;
5719 struct nameidata nd;
5720 kauth_action_t action;
5721
5722 if (uap->length < 0)
5723 return(EINVAL);
5724 NDINIT(&nd, LOOKUP, OP_TRUNCATE, FOLLOW | AUDITVNPATH1,
5725 UIO_USERSPACE, uap->path, ctx);
5726 if ((error = namei(&nd)))
5727 return (error);
5728 vp = nd.ni_vp;
5729
5730 nameidone(&nd);
5731
5732 VATTR_INIT(&va);
5733 VATTR_SET(&va, va_data_size, uap->length);
5734
5735 #if CONFIG_MACF
5736 error = mac_vnode_check_truncate(ctx, NOCRED, vp);
5737 if (error)
5738 goto out;
5739 #endif
5740
5741 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
5742 goto out;
5743 if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
5744 goto out;
5745 error = vnode_setattr(vp, &va, ctx);
5746 out:
5747 vnode_put(vp);
5748 return (error);
5749 }
5750
5751 /*
5752 * Truncate a file given a file descriptor.
5753 */
5754 /* ARGSUSED */
5755 int
5756 ftruncate(proc_t p, struct ftruncate_args *uap, int32_t *retval)
5757 {
5758 vfs_context_t ctx = vfs_context_current();
5759 struct vnode_attr va;
5760 vnode_t vp;
5761 struct fileproc *fp;
5762 int error ;
5763 int fd = uap->fd;
5764
5765 AUDIT_ARG(fd, uap->fd);
5766 if (uap->length < 0)
5767 return(EINVAL);
5768
5769 if ( (error = fp_lookup(p,fd,&fp,0)) ) {
5770 return(error);
5771 }
5772
5773 switch (FILEGLOB_DTYPE(fp->f_fglob)) {
5774 case DTYPE_PSXSHM:
5775 error = pshm_truncate(p, fp, uap->fd, uap->length, retval);
5776 goto out;
5777 case DTYPE_VNODE:
5778 break;
5779 default:
5780 error = EINVAL;
5781 goto out;
5782 }
5783
5784 vp = (vnode_t)fp->f_fglob->fg_data;
5785
5786 if ((fp->f_fglob->fg_flag & FWRITE) == 0) {
5787 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
5788 error = EINVAL;
5789 goto out;
5790 }
5791
5792 if ((error = vnode_getwithref(vp)) != 0) {
5793 goto out;
5794 }
5795
5796 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5797
5798 #if CONFIG_MACF
5799 error = mac_vnode_check_truncate(ctx,
5800 fp->f_fglob->fg_cred, vp);
5801 if (error) {
5802 (void)vnode_put(vp);
5803 goto out;
5804 }
5805 #endif
5806 VATTR_INIT(&va);
5807 VATTR_SET(&va, va_data_size, uap->length);
5808 error = vnode_setattr(vp, &va, ctx);
5809 (void)vnode_put(vp);
5810 out:
5811 file_drop(fd);
5812 return (error);
5813 }
5814
5815
5816 /*
5817 * Sync an open file with synchronized I/O _file_ integrity completion
5818 */
5819 /* ARGSUSED */
5820 int
5821 fsync(proc_t p, struct fsync_args *uap, __unused int32_t *retval)
5822 {
5823 __pthread_testcancel(1);
5824 return(fsync_common(p, uap, MNT_WAIT));
5825 }
5826
5827
5828 /*
5829 * Sync an open file with synchronized I/O _file_ integrity completion
5830 *
5831 * Notes: This is a legacy support function that does not test for
5832 * thread cancellation points.
5833 */
5834 /* ARGSUSED */
5835 int
5836 fsync_nocancel(proc_t p, struct fsync_nocancel_args *uap, __unused int32_t *retval)
5837 {
5838 return(fsync_common(p, (struct fsync_args *)uap, MNT_WAIT));
5839 }
5840
5841
5842 /*
5843 * Sync an open file with synchronized I/O _data_ integrity completion
5844 */
5845 /* ARGSUSED */
5846 int
5847 fdatasync(proc_t p, struct fdatasync_args *uap, __unused int32_t *retval)
5848 {
5849 __pthread_testcancel(1);
5850 return(fsync_common(p, (struct fsync_args *)uap, MNT_DWAIT));
5851 }
5852
5853
5854 /*
5855 * fsync_common
5856 *
5857 * Common fsync code to support both synchronized I/O file integrity completion
5858 * (normal fsync) and synchronized I/O data integrity completion (fdatasync).
5859 *
5860 * If 'flags' is MNT_DWAIT, the caller is requesting data integrity, which
5861 * will only guarantee that the file data contents are retrievable. If
5862 * 'flags' is MNT_WAIT, the caller is rewuesting file integrity, which also
5863 * includes additional metadata unnecessary for retrieving the file data
5864 * contents, such as atime, mtime, ctime, etc., also be committed to stable
5865 * storage.
5866 *
5867 * Parameters: p The process
5868 * uap->fd The descriptor to synchronize
5869 * flags The data integrity flags
5870 *
5871 * Returns: int Success
5872 * fp_getfvp:EBADF Bad file descriptor
5873 * fp_getfvp:ENOTSUP fd does not refer to a vnode
5874 * VNOP_FSYNC:??? unspecified
5875 *
5876 * Notes: We use struct fsync_args because it is a short name, and all
5877 * caller argument structures are otherwise identical.
5878 */
5879 static int
5880 fsync_common(proc_t p, struct fsync_args *uap, int flags)
5881 {
5882 vnode_t vp;
5883 struct fileproc *fp;
5884 vfs_context_t ctx = vfs_context_current();
5885 int error;
5886
5887 AUDIT_ARG(fd, uap->fd);
5888
5889 if ( (error = fp_getfvp(p, uap->fd, &fp, &vp)) )
5890 return (error);
5891 if ( (error = vnode_getwithref(vp)) ) {
5892 file_drop(uap->fd);
5893 return(error);
5894 }
5895
5896 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5897
5898 error = VNOP_FSYNC(vp, flags, ctx);
5899
5900 #if NAMEDRSRCFORK
5901 /* Sync resource fork shadow file if necessary. */
5902 if ((error == 0) &&
5903 (vp->v_flag & VISNAMEDSTREAM) &&
5904 (vp->v_parent != NULLVP) &&
5905 vnode_isshadow(vp) &&
5906 (fp->f_flags & FP_WRITTEN)) {
5907 (void) vnode_flushnamedstream(vp->v_parent, vp, ctx);
5908 }
5909 #endif
5910
5911 (void)vnode_put(vp);
5912 file_drop(uap->fd);
5913 return (error);
5914 }
5915
5916 /*
5917 * Duplicate files. Source must be a file, target must be a file or
5918 * must not exist.
5919 *
5920 * XXX Copyfile authorisation checking is woefully inadequate, and will not
5921 * perform inheritance correctly.
5922 */
5923 /* ARGSUSED */
5924 int
5925 copyfile(__unused proc_t p, struct copyfile_args *uap, __unused int32_t *retval)
5926 {
5927 vnode_t tvp, fvp, tdvp, sdvp;
5928 struct nameidata fromnd, tond;
5929 int error;
5930 vfs_context_t ctx = vfs_context_current();
5931
5932 /* Check that the flags are valid. */
5933
5934 if (uap->flags & ~CPF_MASK) {
5935 return(EINVAL);
5936 }
5937
5938 NDINIT(&fromnd, LOOKUP, OP_COPYFILE, SAVESTART | AUDITVNPATH1,
5939 UIO_USERSPACE, uap->from, ctx);
5940 if ((error = namei(&fromnd)))
5941 return (error);
5942 fvp = fromnd.ni_vp;
5943
5944 NDINIT(&tond, CREATE, OP_LINK,
5945 LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNPATH2 | CN_NBMOUNTLOOK,
5946 UIO_USERSPACE, uap->to, ctx);
5947 if ((error = namei(&tond))) {
5948 goto out1;
5949 }
5950 tdvp = tond.ni_dvp;
5951 tvp = tond.ni_vp;
5952
5953 if (tvp != NULL) {
5954 if (!(uap->flags & CPF_OVERWRITE)) {
5955 error = EEXIST;
5956 goto out;
5957 }
5958 }
5959 if (fvp->v_type == VDIR || (tvp && tvp->v_type == VDIR)) {
5960 error = EISDIR;
5961 goto out;
5962 }
5963
5964 if ((error = vnode_authorize(tdvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
5965 goto out;
5966
5967 if (fvp == tdvp)
5968 error = EINVAL;
5969 /*
5970 * If source is the same as the destination (that is the
5971 * same inode number) then there is nothing to do.
5972 * (fixed to have POSIX semantics - CSM 3/2/98)
5973 */
5974 if (fvp == tvp)
5975 error = -1;
5976 if (!error)
5977 error = VNOP_COPYFILE(fvp, tdvp, tvp, &tond.ni_cnd, uap->mode, uap->flags, ctx);
5978 out:
5979 sdvp = tond.ni_startdir;
5980 /*
5981 * nameidone has to happen before we vnode_put(tdvp)
5982 * since it may need to release the fs_nodelock on the tdvp
5983 */
5984 nameidone(&tond);
5985
5986 if (tvp)
5987 vnode_put(tvp);
5988 vnode_put(tdvp);
5989 vnode_put(sdvp);
5990 out1:
5991 vnode_put(fvp);
5992
5993 if (fromnd.ni_startdir)
5994 vnode_put(fromnd.ni_startdir);
5995 nameidone(&fromnd);
5996
5997 if (error == -1)
5998 return (0);
5999 return (error);
6000 }
6001
6002
6003 /*
6004 * Rename files. Source and destination must either both be directories,
6005 * or both not be directories. If target is a directory, it must be empty.
6006 */
6007 /* ARGSUSED */
6008 int
6009 rename(__unused proc_t p, struct rename_args *uap, __unused int32_t *retval)
6010 {
6011 vnode_t tvp, tdvp;
6012 vnode_t fvp, fdvp;
6013 struct nameidata *fromnd, *tond;
6014 vfs_context_t ctx = vfs_context_current();
6015 int error;
6016 int do_retry;
6017 int mntrename;
6018 int need_event;
6019 const char *oname = NULL;
6020 char *from_name = NULL, *to_name = NULL;
6021 int from_len=0, to_len=0;
6022 int holding_mntlock;
6023 mount_t locked_mp = NULL;
6024 vnode_t oparent = NULLVP;
6025 #if CONFIG_FSE
6026 fse_info from_finfo, to_finfo;
6027 #endif
6028 int from_truncated=0, to_truncated;
6029 int batched = 0;
6030 struct vnode_attr *fvap, *tvap;
6031 int continuing = 0;
6032 /* carving out a chunk for structs that are too big to be on stack. */
6033 struct {
6034 struct nameidata from_node, to_node;
6035 struct vnode_attr fv_attr, tv_attr;
6036 } * __rename_data;
6037 MALLOC(__rename_data, void *, sizeof(*__rename_data), M_TEMP, M_WAITOK);
6038 fromnd = &__rename_data->from_node;
6039 tond = &__rename_data->to_node;
6040
6041 holding_mntlock = 0;
6042 do_retry = 0;
6043 retry:
6044 fvp = tvp = NULL;
6045 fdvp = tdvp = NULL;
6046 fvap = tvap = NULL;
6047 mntrename = FALSE;
6048
6049 NDINIT(fromnd, DELETE, OP_UNLINK, WANTPARENT | AUDITVNPATH1,
6050 UIO_USERSPACE, uap->from, ctx);
6051 fromnd->ni_flag = NAMEI_COMPOUNDRENAME;
6052
6053 NDINIT(tond, RENAME, OP_RENAME, WANTPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK,
6054 UIO_USERSPACE, uap->to, ctx);
6055 tond->ni_flag = NAMEI_COMPOUNDRENAME;
6056
6057 continue_lookup:
6058 if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
6059 if ( (error = namei(fromnd)) )
6060 goto out1;
6061 fdvp = fromnd->ni_dvp;
6062 fvp = fromnd->ni_vp;
6063
6064 if (fvp && fvp->v_type == VDIR)
6065 tond->ni_cnd.cn_flags |= WILLBEDIR;
6066 }
6067
6068 if ((tond->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
6069 if ( (error = namei(tond)) ) {
6070 /*
6071 * Translate error code for rename("dir1", "dir2/.").
6072 */
6073 if (error == EISDIR && fvp->v_type == VDIR)
6074 error = EINVAL;
6075 goto out1;
6076 }
6077 tdvp = tond->ni_dvp;
6078 tvp = tond->ni_vp;
6079 }
6080
6081 batched = vnode_compound_rename_available(fdvp);
6082 if (!fvp) {
6083 /*
6084 * Claim: this check will never reject a valid rename.
6085 * For success, either fvp must be on the same mount as tdvp, or fvp must sit atop a vnode on the same mount as tdvp.
6086 * Suppose fdvp and tdvp are not on the same mount.
6087 * If fvp is on the same mount as tdvp, then fvp is not on the same mount as fdvp, so fvp is the root of its filesystem. If fvp is the root,
6088 * then you can't move it to within another dir on the same mountpoint.
6089 * If fvp sits atop a vnode on the same mount as fdvp, then that vnode must be part of the same mount as fdvp, which is a contradiction.
6090 *
6091 * If this check passes, then we are safe to pass these vnodes to the same FS.
6092 */
6093 if (fdvp->v_mount != tdvp->v_mount) {
6094 error = EXDEV;
6095 goto out1;
6096 }
6097 goto skipped_lookup;
6098 }
6099
6100 if (!batched) {
6101 error = vn_authorize_rename(fdvp, fvp, &fromnd->ni_cnd, tdvp, tvp, &tond->ni_cnd, ctx, NULL);
6102 if (error) {
6103 if (error == ENOENT) {
6104 /*
6105 * We encountered a race where after doing the namei, tvp stops
6106 * being valid. If so, simply re-drive the rename call from the
6107 * top.
6108 */
6109 do_retry = 1;
6110 }
6111 goto out1;
6112 }
6113 }
6114
6115 /*
6116 * If the source and destination are the same (i.e. they're
6117 * links to the same vnode) and the target file system is
6118 * case sensitive, then there is nothing to do.
6119 *
6120 * XXX Come back to this.
6121 */
6122 if (fvp == tvp) {
6123 int pathconf_val;
6124
6125 /*
6126 * Note: if _PC_CASE_SENSITIVE selector isn't supported,
6127 * then assume that this file system is case sensitive.
6128 */
6129 if (VNOP_PATHCONF(fvp, _PC_CASE_SENSITIVE, &pathconf_val, ctx) != 0 ||
6130 pathconf_val != 0) {
6131 goto out1;
6132 }
6133 }
6134
6135 /*
6136 * Allow the renaming of mount points.
6137 * - target must not exist
6138 * - target must reside in the same directory as source
6139 * - union mounts cannot be renamed
6140 * - "/" cannot be renamed
6141 *
6142 * XXX Handle this in VFS after a continued lookup (if we missed
6143 * in the cache to start off)
6144 */
6145 if ((fvp->v_flag & VROOT) &&
6146 (fvp->v_type == VDIR) &&
6147 (tvp == NULL) &&
6148 (fvp->v_mountedhere == NULL) &&
6149 (fdvp == tdvp) &&
6150 ((fvp->v_mount->mnt_flag & (MNT_UNION | MNT_ROOTFS)) == 0) &&
6151 (fvp->v_mount->mnt_vnodecovered != NULLVP)) {
6152 vnode_t coveredvp;
6153
6154 /* switch fvp to the covered vnode */
6155 coveredvp = fvp->v_mount->mnt_vnodecovered;
6156 if ( (vnode_getwithref(coveredvp)) ) {
6157 error = ENOENT;
6158 goto out1;
6159 }
6160 vnode_put(fvp);
6161
6162 fvp = coveredvp;
6163 mntrename = TRUE;
6164 }
6165 /*
6166 * Check for cross-device rename.
6167 */
6168 if ((fvp->v_mount != tdvp->v_mount) ||
6169 (tvp && (fvp->v_mount != tvp->v_mount))) {
6170 error = EXDEV;
6171 goto out1;
6172 }
6173
6174 /*
6175 * If source is the same as the destination (that is the
6176 * same inode number) then there is nothing to do...
6177 * EXCEPT if the underlying file system supports case
6178 * insensitivity and is case preserving. In this case
6179 * the file system needs to handle the special case of
6180 * getting the same vnode as target (fvp) and source (tvp).
6181 *
6182 * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
6183 * and _PC_CASE_PRESERVING can have this exception, and they need to
6184 * handle the special case of getting the same vnode as target and
6185 * source. NOTE: Then the target is unlocked going into vnop_rename,
6186 * so not to cause locking problems. There is a single reference on tvp.
6187 *
6188 * NOTE - that fvp == tvp also occurs if they are hard linked and
6189 * that correct behaviour then is just to return success without doing
6190 * anything.
6191 *
6192 * XXX filesystem should take care of this itself, perhaps...
6193 */
6194 if (fvp == tvp && fdvp == tdvp) {
6195 if (fromnd->ni_cnd.cn_namelen == tond->ni_cnd.cn_namelen &&
6196 !bcmp(fromnd->ni_cnd.cn_nameptr, tond->ni_cnd.cn_nameptr,
6197 fromnd->ni_cnd.cn_namelen)) {
6198 goto out1;
6199 }
6200 }
6201
6202 if (holding_mntlock && fvp->v_mount != locked_mp) {
6203 /*
6204 * we're holding a reference and lock
6205 * on locked_mp, but it no longer matches
6206 * what we want to do... so drop our hold
6207 */
6208 mount_unlock_renames(locked_mp);
6209 mount_drop(locked_mp, 0);
6210 holding_mntlock = 0;
6211 }
6212 if (tdvp != fdvp && fvp->v_type == VDIR) {
6213 /*
6214 * serialize renames that re-shape
6215 * the tree... if holding_mntlock is
6216 * set, then we're ready to go...
6217 * otherwise we
6218 * first need to drop the iocounts
6219 * we picked up, second take the
6220 * lock to serialize the access,
6221 * then finally start the lookup
6222 * process over with the lock held
6223 */
6224 if (!holding_mntlock) {
6225 /*
6226 * need to grab a reference on
6227 * the mount point before we
6228 * drop all the iocounts... once
6229 * the iocounts are gone, the mount
6230 * could follow
6231 */
6232 locked_mp = fvp->v_mount;
6233 mount_ref(locked_mp, 0);
6234
6235 /*
6236 * nameidone has to happen before we vnode_put(tvp)
6237 * since it may need to release the fs_nodelock on the tvp
6238 */
6239 nameidone(tond);
6240
6241 if (tvp)
6242 vnode_put(tvp);
6243 vnode_put(tdvp);
6244
6245 /*
6246 * nameidone has to happen before we vnode_put(fdvp)
6247 * since it may need to release the fs_nodelock on the fvp
6248 */
6249 nameidone(fromnd);
6250
6251 vnode_put(fvp);
6252 vnode_put(fdvp);
6253
6254 mount_lock_renames(locked_mp);
6255 holding_mntlock = 1;
6256
6257 goto retry;
6258 }
6259 } else {
6260 /*
6261 * when we dropped the iocounts to take
6262 * the lock, we allowed the identity of
6263 * the various vnodes to change... if they did,
6264 * we may no longer be dealing with a rename
6265 * that reshapes the tree... once we're holding
6266 * the iocounts, the vnodes can't change type
6267 * so we're free to drop the lock at this point
6268 * and continue on
6269 */
6270 if (holding_mntlock) {
6271 mount_unlock_renames(locked_mp);
6272 mount_drop(locked_mp, 0);
6273 holding_mntlock = 0;
6274 }
6275 }
6276
6277 // save these off so we can later verify that fvp is the same
6278 oname = fvp->v_name;
6279 oparent = fvp->v_parent;
6280
6281 skipped_lookup:
6282 #if CONFIG_FSE
6283 need_event = need_fsevent(FSE_RENAME, fdvp);
6284 if (need_event) {
6285 if (fvp) {
6286 get_fse_info(fvp, &from_finfo, ctx);
6287 } else {
6288 error = vfs_get_notify_attributes(&__rename_data->fv_attr);
6289 if (error) {
6290 goto out1;
6291 }
6292
6293 fvap = &__rename_data->fv_attr;
6294 }
6295
6296 if (tvp) {
6297 get_fse_info(tvp, &to_finfo, ctx);
6298 } else if (batched) {
6299 error = vfs_get_notify_attributes(&__rename_data->tv_attr);
6300 if (error) {
6301 goto out1;
6302 }
6303
6304 tvap = &__rename_data->tv_attr;
6305 }
6306 }
6307 #else
6308 need_event = 0;
6309 #endif /* CONFIG_FSE */
6310
6311 if (need_event || kauth_authorize_fileop_has_listeners()) {
6312 if (from_name == NULL) {
6313 GET_PATH(from_name);
6314 if (from_name == NULL) {
6315 error = ENOMEM;
6316 goto out1;
6317 }
6318 }
6319
6320 from_len = safe_getpath(fdvp, fromnd->ni_cnd.cn_nameptr, from_name, MAXPATHLEN, &from_truncated);
6321
6322 if (to_name == NULL) {
6323 GET_PATH(to_name);
6324 if (to_name == NULL) {
6325 error = ENOMEM;
6326 goto out1;
6327 }
6328 }
6329
6330 to_len = safe_getpath(tdvp, tond->ni_cnd.cn_nameptr, to_name, MAXPATHLEN, &to_truncated);
6331 }
6332
6333 error = vn_rename(fdvp, &fvp, &fromnd->ni_cnd, fvap,
6334 tdvp, &tvp, &tond->ni_cnd, tvap,
6335 0, ctx);
6336
6337 if (holding_mntlock) {
6338 /*
6339 * we can drop our serialization
6340 * lock now
6341 */
6342 mount_unlock_renames(locked_mp);
6343 mount_drop(locked_mp, 0);
6344 holding_mntlock = 0;
6345 }
6346 if (error) {
6347 if (error == EKEEPLOOKING) {
6348 if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) == 0) {
6349 if ((tond->ni_flag & NAMEI_CONTLOOKUP) == 0) {
6350 panic("EKEEPLOOKING without NAMEI_CONTLOOKUP on either ndp?");
6351 }
6352 }
6353
6354 fromnd->ni_vp = fvp;
6355 tond->ni_vp = tvp;
6356
6357 goto continue_lookup;
6358 }
6359
6360 /*
6361 * We may encounter a race in the VNOP where the destination didn't
6362 * exist when we did the namei, but it does by the time we go and
6363 * try to create the entry. In this case, we should re-drive this rename
6364 * call from the top again. Currently, only HFS bubbles out ERECYCLE,
6365 * but other filesystems susceptible to this race could return it, too.
6366 */
6367 if (error == ERECYCLE) {
6368 do_retry = 1;
6369 }
6370
6371 goto out1;
6372 }
6373
6374 /* call out to allow 3rd party notification of rename.
6375 * Ignore result of kauth_authorize_fileop call.
6376 */
6377 kauth_authorize_fileop(vfs_context_ucred(ctx),
6378 KAUTH_FILEOP_RENAME,
6379 (uintptr_t)from_name, (uintptr_t)to_name);
6380
6381 #if CONFIG_FSE
6382 if (from_name != NULL && to_name != NULL) {
6383 if (from_truncated || to_truncated) {
6384 // set it here since only the from_finfo gets reported up to user space
6385 from_finfo.mode |= FSE_TRUNCATED_PATH;
6386 }
6387
6388 if (tvap && tvp) {
6389 vnode_get_fse_info_from_vap(tvp, &to_finfo, tvap);
6390 }
6391 if (fvap) {
6392 vnode_get_fse_info_from_vap(fvp, &from_finfo, fvap);
6393 }
6394
6395 if (tvp) {
6396 add_fsevent(FSE_RENAME, ctx,
6397 FSE_ARG_STRING, from_len, from_name,
6398 FSE_ARG_FINFO, &from_finfo,
6399 FSE_ARG_STRING, to_len, to_name,
6400 FSE_ARG_FINFO, &to_finfo,
6401 FSE_ARG_DONE);
6402 } else {
6403 add_fsevent(FSE_RENAME, ctx,
6404 FSE_ARG_STRING, from_len, from_name,
6405 FSE_ARG_FINFO, &from_finfo,
6406 FSE_ARG_STRING, to_len, to_name,
6407 FSE_ARG_DONE);
6408 }
6409 }
6410 #endif /* CONFIG_FSE */
6411
6412 /*
6413 * update filesystem's mount point data
6414 */
6415 if (mntrename) {
6416 char *cp, *pathend, *mpname;
6417 char * tobuf;
6418 struct mount *mp;
6419 int maxlen;
6420 size_t len = 0;
6421
6422 mp = fvp->v_mountedhere;
6423
6424 if (vfs_busy(mp, LK_NOWAIT)) {
6425 error = EBUSY;
6426 goto out1;
6427 }
6428 MALLOC_ZONE(tobuf, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
6429
6430 error = copyinstr(uap->to, tobuf, MAXPATHLEN, &len);
6431 if (!error) {
6432 /* find current mount point prefix */
6433 pathend = &mp->mnt_vfsstat.f_mntonname[0];
6434 for (cp = pathend; *cp != '\0'; ++cp) {
6435 if (*cp == '/')
6436 pathend = cp + 1;
6437 }
6438 /* find last component of target name */
6439 for (mpname = cp = tobuf; *cp != '\0'; ++cp) {
6440 if (*cp == '/')
6441 mpname = cp + 1;
6442 }
6443 /* append name to prefix */
6444 maxlen = MAXPATHLEN - (pathend - mp->mnt_vfsstat.f_mntonname);
6445 bzero(pathend, maxlen);
6446 strlcpy(pathend, mpname, maxlen);
6447 }
6448 FREE_ZONE(tobuf, MAXPATHLEN, M_NAMEI);
6449
6450 vfs_unbusy(mp);
6451 }
6452 /*
6453 * fix up name & parent pointers. note that we first
6454 * check that fvp has the same name/parent pointers it
6455 * had before the rename call... this is a 'weak' check
6456 * at best...
6457 *
6458 * XXX oparent and oname may not be set in the compound vnop case
6459 */
6460 if (batched || (oname == fvp->v_name && oparent == fvp->v_parent)) {
6461 int update_flags;
6462
6463 update_flags = VNODE_UPDATE_NAME;
6464
6465 if (fdvp != tdvp)
6466 update_flags |= VNODE_UPDATE_PARENT;
6467
6468 vnode_update_identity(fvp, tdvp, tond->ni_cnd.cn_nameptr, tond->ni_cnd.cn_namelen, tond->ni_cnd.cn_hash, update_flags);
6469 }
6470 out1:
6471 if (to_name != NULL) {
6472 RELEASE_PATH(to_name);
6473 to_name = NULL;
6474 }
6475 if (from_name != NULL) {
6476 RELEASE_PATH(from_name);
6477 from_name = NULL;
6478 }
6479 if (holding_mntlock) {
6480 mount_unlock_renames(locked_mp);
6481 mount_drop(locked_mp, 0);
6482 holding_mntlock = 0;
6483 }
6484 if (tdvp) {
6485 /*
6486 * nameidone has to happen before we vnode_put(tdvp)
6487 * since it may need to release the fs_nodelock on the tdvp
6488 */
6489 nameidone(tond);
6490
6491 if (tvp)
6492 vnode_put(tvp);
6493 vnode_put(tdvp);
6494 }
6495 if (fdvp) {
6496 /*
6497 * nameidone has to happen before we vnode_put(fdvp)
6498 * since it may need to release the fs_nodelock on the fdvp
6499 */
6500 nameidone(fromnd);
6501
6502 if (fvp)
6503 vnode_put(fvp);
6504 vnode_put(fdvp);
6505 }
6506
6507
6508 /*
6509 * If things changed after we did the namei, then we will re-drive
6510 * this rename call from the top.
6511 */
6512 if (do_retry) {
6513 do_retry = 0;
6514 goto retry;
6515 }
6516
6517 FREE(__rename_data, M_TEMP);
6518 return (error);
6519 }
6520
6521 /*
6522 * Make a directory file.
6523 *
6524 * Returns: 0 Success
6525 * EEXIST
6526 * namei:???
6527 * vnode_authorize:???
6528 * vn_create:???
6529 */
6530 /* ARGSUSED */
6531 static int
6532 mkdir1(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap)
6533 {
6534 vnode_t vp, dvp;
6535 int error;
6536 int update_flags = 0;
6537 int batched;
6538 struct nameidata nd;
6539
6540 AUDIT_ARG(mode, vap->va_mode);
6541 NDINIT(&nd, CREATE, OP_MKDIR, LOCKPARENT | AUDITVNPATH1, UIO_USERSPACE,
6542 path, ctx);
6543 nd.ni_cnd.cn_flags |= WILLBEDIR;
6544 nd.ni_flag = NAMEI_COMPOUNDMKDIR;
6545
6546 continue_lookup:
6547 error = namei(&nd);
6548 if (error)
6549 return (error);
6550 dvp = nd.ni_dvp;
6551 vp = nd.ni_vp;
6552
6553 if (vp != NULL) {
6554 error = EEXIST;
6555 goto out;
6556 }
6557
6558 batched = vnode_compound_mkdir_available(dvp);
6559
6560 VATTR_SET(vap, va_type, VDIR);
6561
6562 /*
6563 * XXX
6564 * Don't authorize in VFS for compound VNOP.... mkdir -p today assumes that it will
6565 * only get EXISTS or EISDIR for existing path components, and not that it could see
6566 * EACCESS/EPERM--so if we authorize for mkdir on "/" for "mkdir -p /tmp/foo/bar/baz"
6567 * it will fail in a spurious manner. Need to figure out if this is valid behavior.
6568 */
6569 if ((error = vn_authorize_mkdir(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0) {
6570 if (error == EACCES || error == EPERM) {
6571 int error2;
6572
6573 nameidone(&nd);
6574 vnode_put(dvp);
6575 dvp = NULLVP;
6576
6577 /*
6578 * Try a lookup without "NAMEI_COMPOUNDVNOP" to make sure we return EEXIST
6579 * rather than EACCESS if the target exists.
6580 */
6581 NDINIT(&nd, LOOKUP, OP_MKDIR, AUDITVNPATH1, UIO_USERSPACE,
6582 path, ctx);
6583 error2 = namei(&nd);
6584 if (error2) {
6585 goto out;
6586 } else {
6587 vp = nd.ni_vp;
6588 error = EEXIST;
6589 goto out;
6590 }
6591 }
6592
6593 goto out;
6594 }
6595
6596 /*
6597 * make the directory
6598 */
6599 if ((error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx)) != 0) {
6600 if (error == EKEEPLOOKING) {
6601 nd.ni_vp = vp;
6602 goto continue_lookup;
6603 }
6604
6605 goto out;
6606 }
6607
6608 // Make sure the name & parent pointers are hooked up
6609 if (vp->v_name == NULL)
6610 update_flags |= VNODE_UPDATE_NAME;
6611 if (vp->v_parent == NULLVP)
6612 update_flags |= VNODE_UPDATE_PARENT;
6613
6614 if (update_flags)
6615 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
6616
6617 #if CONFIG_FSE
6618 add_fsevent(FSE_CREATE_DIR, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE);
6619 #endif
6620
6621 out:
6622 /*
6623 * nameidone has to happen before we vnode_put(dvp)
6624 * since it may need to release the fs_nodelock on the dvp
6625 */
6626 nameidone(&nd);
6627
6628 if (vp)
6629 vnode_put(vp);
6630 if (dvp)
6631 vnode_put(dvp);
6632
6633 return (error);
6634 }
6635
6636 /*
6637 * mkdir_extended: Create a directory; with extended security (ACL).
6638 *
6639 * Parameters: p Process requesting to create the directory
6640 * uap User argument descriptor (see below)
6641 * retval (ignored)
6642 *
6643 * Indirect: uap->path Path of directory to create
6644 * uap->mode Access permissions to set
6645 * uap->xsecurity ACL to set
6646 *
6647 * Returns: 0 Success
6648 * !0 Not success
6649 *
6650 */
6651 int
6652 mkdir_extended(proc_t p, struct mkdir_extended_args *uap, __unused int32_t *retval)
6653 {
6654 int ciferror;
6655 kauth_filesec_t xsecdst;
6656 struct vnode_attr va;
6657
6658 AUDIT_ARG(owner, uap->uid, uap->gid);
6659
6660 xsecdst = NULL;
6661 if ((uap->xsecurity != USER_ADDR_NULL) &&
6662 ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
6663 return ciferror;
6664
6665 VATTR_INIT(&va);
6666 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
6667 if (xsecdst != NULL)
6668 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
6669
6670 ciferror = mkdir1(vfs_context_current(), uap->path, &va);
6671 if (xsecdst != NULL)
6672 kauth_filesec_free(xsecdst);
6673 return ciferror;
6674 }
6675
6676 int
6677 mkdir(proc_t p, struct mkdir_args *uap, __unused int32_t *retval)
6678 {
6679 struct vnode_attr va;
6680
6681 VATTR_INIT(&va);
6682 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
6683
6684 return(mkdir1(vfs_context_current(), uap->path, &va));
6685 }
6686
6687 /*
6688 * Remove a directory file.
6689 */
6690 /* ARGSUSED */
6691 int
6692 rmdir(__unused proc_t p, struct rmdir_args *uap, __unused int32_t *retval)
6693 {
6694 vnode_t vp, dvp;
6695 int error;
6696 struct nameidata nd;
6697 char *path = NULL;
6698 int len=0;
6699 int has_listeners = 0;
6700 int need_event = 0;
6701 int truncated = 0;
6702 vfs_context_t ctx = vfs_context_current();
6703 #if CONFIG_FSE
6704 struct vnode_attr va;
6705 #endif /* CONFIG_FSE */
6706 struct vnode_attr *vap = NULL;
6707 int batched;
6708
6709 int restart_flag;
6710
6711 /*
6712 * This loop exists to restart rmdir in the unlikely case that two
6713 * processes are simultaneously trying to remove the same directory
6714 * containing orphaned appleDouble files.
6715 */
6716 do {
6717 NDINIT(&nd, DELETE, OP_RMDIR, LOCKPARENT | AUDITVNPATH1,
6718 UIO_USERSPACE, uap->path, ctx);
6719 nd.ni_flag = NAMEI_COMPOUNDRMDIR;
6720 continue_lookup:
6721 restart_flag = 0;
6722 vap = NULL;
6723
6724 error = namei(&nd);
6725 if (error)
6726 return (error);
6727
6728 dvp = nd.ni_dvp;
6729 vp = nd.ni_vp;
6730
6731 if (vp) {
6732 batched = vnode_compound_rmdir_available(vp);
6733
6734 if (vp->v_flag & VROOT) {
6735 /*
6736 * The root of a mounted filesystem cannot be deleted.
6737 */
6738 error = EBUSY;
6739 goto out;
6740 }
6741
6742 /*
6743 * Removed a check here; we used to abort if vp's vid
6744 * was not the same as what we'd seen the last time around.
6745 * I do not think that check was valid, because if we retry
6746 * and all dirents are gone, the directory could legitimately
6747 * be recycled but still be present in a situation where we would
6748 * have had permission to delete. Therefore, we won't make
6749 * an effort to preserve that check now that we may not have a
6750 * vp here.
6751 */
6752
6753 if (!batched) {
6754 error = vn_authorize_rmdir(dvp, vp, &nd.ni_cnd, ctx, NULL);
6755 if (error) {
6756 goto out;
6757 }
6758 }
6759 } else {
6760 batched = 1;
6761
6762 if (!vnode_compound_rmdir_available(dvp)) {
6763 panic("No error, but no compound rmdir?");
6764 }
6765 }
6766
6767 #if CONFIG_FSE
6768 fse_info finfo;
6769
6770 need_event = need_fsevent(FSE_DELETE, dvp);
6771 if (need_event) {
6772 if (!batched) {
6773 get_fse_info(vp, &finfo, ctx);
6774 } else {
6775 error = vfs_get_notify_attributes(&va);
6776 if (error) {
6777 goto out;
6778 }
6779
6780 vap = &va;
6781 }
6782 }
6783 #endif
6784 has_listeners = kauth_authorize_fileop_has_listeners();
6785 if (need_event || has_listeners) {
6786 if (path == NULL) {
6787 GET_PATH(path);
6788 if (path == NULL) {
6789 error = ENOMEM;
6790 goto out;
6791 }
6792 }
6793
6794 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated);
6795 #if CONFIG_FSE
6796 if (truncated) {
6797 finfo.mode |= FSE_TRUNCATED_PATH;
6798 }
6799 #endif
6800 }
6801
6802 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
6803 nd.ni_vp = vp;
6804 if (vp == NULLVP) {
6805 /* Couldn't find a vnode */
6806 goto out;
6807 }
6808
6809 if (error == EKEEPLOOKING) {
6810 goto continue_lookup;
6811 }
6812 #if CONFIG_APPLEDOUBLE
6813 /*
6814 * Special case to remove orphaned AppleDouble
6815 * files. I don't like putting this in the kernel,
6816 * but carbon does not like putting this in carbon either,
6817 * so here we are.
6818 */
6819 if (error == ENOTEMPTY) {
6820 error = rmdir_remove_orphaned_appleDouble(vp, ctx, &restart_flag);
6821 if (error == EBUSY) {
6822 goto out;
6823 }
6824
6825
6826 /*
6827 * Assuming everything went well, we will try the RMDIR again
6828 */
6829 if (!error)
6830 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
6831 }
6832 #endif /* CONFIG_APPLEDOUBLE */
6833 /*
6834 * Call out to allow 3rd party notification of delete.
6835 * Ignore result of kauth_authorize_fileop call.
6836 */
6837 if (!error) {
6838 if (has_listeners) {
6839 kauth_authorize_fileop(vfs_context_ucred(ctx),
6840 KAUTH_FILEOP_DELETE,
6841 (uintptr_t)vp,
6842 (uintptr_t)path);
6843 }
6844
6845 if (vp->v_flag & VISHARDLINK) {
6846 // see the comment in unlink1() about why we update
6847 // the parent of a hard link when it is removed
6848 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
6849 }
6850
6851 #if CONFIG_FSE
6852 if (need_event) {
6853 if (vap) {
6854 vnode_get_fse_info_from_vap(vp, &finfo, vap);
6855 }
6856 add_fsevent(FSE_DELETE, ctx,
6857 FSE_ARG_STRING, len, path,
6858 FSE_ARG_FINFO, &finfo,
6859 FSE_ARG_DONE);
6860 }
6861 #endif
6862 }
6863
6864 out:
6865 if (path != NULL) {
6866 RELEASE_PATH(path);
6867 path = NULL;
6868 }
6869 /*
6870 * nameidone has to happen before we vnode_put(dvp)
6871 * since it may need to release the fs_nodelock on the dvp
6872 */
6873 nameidone(&nd);
6874 vnode_put(dvp);
6875
6876 if (vp)
6877 vnode_put(vp);
6878
6879 if (restart_flag == 0) {
6880 wakeup_one((caddr_t)vp);
6881 return (error);
6882 }
6883 tsleep(vp, PVFS, "rm AD", 1);
6884
6885 } while (restart_flag != 0);
6886
6887 return (error);
6888
6889 }
6890
6891 /* Get direntry length padded to 8 byte alignment */
6892 #define DIRENT64_LEN(namlen) \
6893 ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
6894
6895 static errno_t
6896 vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag,
6897 int *numdirent, vfs_context_t ctxp)
6898 {
6899 /* Check if fs natively supports VNODE_READDIR_EXTENDED */
6900 if ((vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED) &&
6901 ((vp->v_mount->mnt_kern_flag & MNTK_DENY_READDIREXT) == 0)) {
6902 return VNOP_READDIR(vp, uio, flags, eofflag, numdirent, ctxp);
6903 } else {
6904 size_t bufsize;
6905 void * bufptr;
6906 uio_t auio;
6907 struct direntry *entry64;
6908 struct dirent *dep;
6909 int bytesread;
6910 int error;
6911
6912 /*
6913 * Our kernel buffer needs to be smaller since re-packing
6914 * will expand each dirent. The worse case (when the name
6915 * length is 3) corresponds to a struct direntry size of 32
6916 * bytes (8-byte aligned) and a struct dirent size of 12 bytes
6917 * (4-byte aligned). So having a buffer that is 3/8 the size
6918 * will prevent us from reading more than we can pack.
6919 *
6920 * Since this buffer is wired memory, we will limit the
6921 * buffer size to a maximum of 32K. We would really like to
6922 * use 32K in the MIN(), but we use magic number 87371 to
6923 * prevent uio_resid() * 3 / 8 from overflowing.
6924 */
6925 bufsize = 3 * MIN((user_size_t)uio_resid(uio), 87371u) / 8;
6926 MALLOC(bufptr, void *, bufsize, M_TEMP, M_WAITOK);
6927 if (bufptr == NULL) {
6928 return ENOMEM;
6929 }
6930
6931 auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
6932 uio_addiov(auio, (uintptr_t)bufptr, bufsize);
6933 auio->uio_offset = uio->uio_offset;
6934
6935 error = VNOP_READDIR(vp, auio, 0, eofflag, numdirent, ctxp);
6936
6937 dep = (struct dirent *)bufptr;
6938 bytesread = bufsize - uio_resid(auio);
6939
6940 MALLOC(entry64, struct direntry *, sizeof(struct direntry),
6941 M_TEMP, M_WAITOK);
6942 /*
6943 * Convert all the entries and copy them out to user's buffer.
6944 */
6945 while (error == 0 && (char *)dep < ((char *)bufptr + bytesread)) {
6946 size_t enbufsize = DIRENT64_LEN(dep->d_namlen);
6947
6948 bzero(entry64, enbufsize);
6949 /* Convert a dirent to a dirent64. */
6950 entry64->d_ino = dep->d_ino;
6951 entry64->d_seekoff = 0;
6952 entry64->d_reclen = enbufsize;
6953 entry64->d_namlen = dep->d_namlen;
6954 entry64->d_type = dep->d_type;
6955 bcopy(dep->d_name, entry64->d_name, dep->d_namlen + 1);
6956
6957 /* Move to next entry. */
6958 dep = (struct dirent *)((char *)dep + dep->d_reclen);
6959
6960 /* Copy entry64 to user's buffer. */
6961 error = uiomove((caddr_t)entry64, entry64->d_reclen, uio);
6962 }
6963
6964 /* Update the real offset using the offset we got from VNOP_READDIR. */
6965 if (error == 0) {
6966 uio->uio_offset = auio->uio_offset;
6967 }
6968 uio_free(auio);
6969 FREE(bufptr, M_TEMP);
6970 FREE(entry64, M_TEMP);
6971 return (error);
6972 }
6973 }
6974
6975 #define GETDIRENTRIES_MAXBUFSIZE (128 * 1024 * 1024U)
6976
6977 /*
6978 * Read a block of directory entries in a file system independent format.
6979 */
6980 static int
6981 getdirentries_common(int fd, user_addr_t bufp, user_size_t bufsize, ssize_t *bytesread,
6982 off_t *offset, int flags)
6983 {
6984 vnode_t vp;
6985 struct vfs_context context = *vfs_context_current(); /* local copy */
6986 struct fileproc *fp;
6987 uio_t auio;
6988 int spacetype = proc_is64bit(vfs_context_proc(&context)) ? UIO_USERSPACE64 : UIO_USERSPACE32;
6989 off_t loff;
6990 int error, eofflag, numdirent;
6991 char uio_buf[ UIO_SIZEOF(1) ];
6992
6993 error = fp_getfvp(vfs_context_proc(&context), fd, &fp, &vp);
6994 if (error) {
6995 return (error);
6996 }
6997 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
6998 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
6999 error = EBADF;
7000 goto out;
7001 }
7002
7003 if (bufsize > GETDIRENTRIES_MAXBUFSIZE)
7004 bufsize = GETDIRENTRIES_MAXBUFSIZE;
7005
7006 #if CONFIG_MACF
7007 error = mac_file_check_change_offset(vfs_context_ucred(&context), fp->f_fglob);
7008 if (error)
7009 goto out;
7010 #endif
7011 if ( (error = vnode_getwithref(vp)) ) {
7012 goto out;
7013 }
7014 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
7015
7016 unionread:
7017 if (vp->v_type != VDIR) {
7018 (void)vnode_put(vp);
7019 error = EINVAL;
7020 goto out;
7021 }
7022
7023 #if CONFIG_MACF
7024 error = mac_vnode_check_readdir(&context, vp);
7025 if (error != 0) {
7026 (void)vnode_put(vp);
7027 goto out;
7028 }
7029 #endif /* MAC */
7030
7031 loff = fp->f_fglob->fg_offset;
7032 auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
7033 uio_addiov(auio, bufp, bufsize);
7034
7035 if (flags & VNODE_READDIR_EXTENDED) {
7036 error = vnode_readdir64(vp, auio, flags, &eofflag, &numdirent, &context);
7037 fp->f_fglob->fg_offset = uio_offset(auio);
7038 } else {
7039 error = VNOP_READDIR(vp, auio, 0, &eofflag, &numdirent, &context);
7040 fp->f_fglob->fg_offset = uio_offset(auio);
7041 }
7042 if (error) {
7043 (void)vnode_put(vp);
7044 goto out;
7045 }
7046
7047 if ((user_ssize_t)bufsize == uio_resid(auio)){
7048 if (union_dircheckp) {
7049 error = union_dircheckp(&vp, fp, &context);
7050 if (error == -1)
7051 goto unionread;
7052 if (error)
7053 goto out;
7054 }
7055
7056 if ((vp->v_mount->mnt_flag & MNT_UNION)) {
7057 struct vnode *tvp = vp;
7058 if (lookup_traverse_union(tvp, &vp, &context) == 0) {
7059 vnode_ref(vp);
7060 fp->f_fglob->fg_data = (caddr_t) vp;
7061 fp->f_fglob->fg_offset = 0;
7062 vnode_rele(tvp);
7063 vnode_put(tvp);
7064 goto unionread;
7065 }
7066 vp = tvp;
7067 }
7068 }
7069
7070 vnode_put(vp);
7071 if (offset) {
7072 *offset = loff;
7073 }
7074
7075 *bytesread = bufsize - uio_resid(auio);
7076 out:
7077 file_drop(fd);
7078 return (error);
7079 }
7080
7081
7082 int
7083 getdirentries(__unused struct proc *p, struct getdirentries_args *uap, int32_t *retval)
7084 {
7085 off_t offset;
7086 ssize_t bytesread;
7087 int error;
7088
7089 AUDIT_ARG(fd, uap->fd);
7090 error = getdirentries_common(uap->fd, uap->buf, uap->count, &bytesread, &offset, 0);
7091
7092 if (error == 0) {
7093 if (proc_is64bit(p)) {
7094 user64_long_t base = (user64_long_t)offset;
7095 error = copyout((caddr_t)&base, uap->basep, sizeof(user64_long_t));
7096 } else {
7097 user32_long_t base = (user32_long_t)offset;
7098 error = copyout((caddr_t)&base, uap->basep, sizeof(user32_long_t));
7099 }
7100 *retval = bytesread;
7101 }
7102 return (error);
7103 }
7104
7105 int
7106 getdirentries64(__unused struct proc *p, struct getdirentries64_args *uap, user_ssize_t *retval)
7107 {
7108 off_t offset;
7109 ssize_t bytesread;
7110 int error;
7111
7112 AUDIT_ARG(fd, uap->fd);
7113 error = getdirentries_common(uap->fd, uap->buf, uap->bufsize, &bytesread, &offset, VNODE_READDIR_EXTENDED);
7114
7115 if (error == 0) {
7116 *retval = bytesread;
7117 error = copyout((caddr_t)&offset, uap->position, sizeof(off_t));
7118 }
7119 return (error);
7120 }
7121
7122
7123 /*
7124 * Set the mode mask for creation of filesystem nodes.
7125 * XXX implement xsecurity
7126 */
7127 #define UMASK_NOXSECURITY (void *)1 /* leave existing xsecurity alone */
7128 static int
7129 umask1(proc_t p, int newmask, __unused kauth_filesec_t fsec, int32_t *retval)
7130 {
7131 struct filedesc *fdp;
7132
7133 AUDIT_ARG(mask, newmask);
7134 proc_fdlock(p);
7135 fdp = p->p_fd;
7136 *retval = fdp->fd_cmask;
7137 fdp->fd_cmask = newmask & ALLPERMS;
7138 proc_fdunlock(p);
7139 return (0);
7140 }
7141
7142 /*
7143 * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL).
7144 *
7145 * Parameters: p Process requesting to set the umask
7146 * uap User argument descriptor (see below)
7147 * retval umask of the process (parameter p)
7148 *
7149 * Indirect: uap->newmask umask to set
7150 * uap->xsecurity ACL to set
7151 *
7152 * Returns: 0 Success
7153 * !0 Not success
7154 *
7155 */
7156 int
7157 umask_extended(proc_t p, struct umask_extended_args *uap, int32_t *retval)
7158 {
7159 int ciferror;
7160 kauth_filesec_t xsecdst;
7161
7162 xsecdst = KAUTH_FILESEC_NONE;
7163 if (uap->xsecurity != USER_ADDR_NULL) {
7164 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
7165 return ciferror;
7166 } else {
7167 xsecdst = KAUTH_FILESEC_NONE;
7168 }
7169
7170 ciferror = umask1(p, uap->newmask, xsecdst, retval);
7171
7172 if (xsecdst != KAUTH_FILESEC_NONE)
7173 kauth_filesec_free(xsecdst);
7174 return ciferror;
7175 }
7176
7177 int
7178 umask(proc_t p, struct umask_args *uap, int32_t *retval)
7179 {
7180 return(umask1(p, uap->newmask, UMASK_NOXSECURITY, retval));
7181 }
7182
7183 /*
7184 * Void all references to file by ripping underlying filesystem
7185 * away from vnode.
7186 */
7187 /* ARGSUSED */
7188 int
7189 revoke(proc_t p, struct revoke_args *uap, __unused int32_t *retval)
7190 {
7191 vnode_t vp;
7192 struct vnode_attr va;
7193 vfs_context_t ctx = vfs_context_current();
7194 int error;
7195 struct nameidata nd;
7196
7197 NDINIT(&nd, LOOKUP, OP_REVOKE, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
7198 uap->path, ctx);
7199 error = namei(&nd);
7200 if (error)
7201 return (error);
7202 vp = nd.ni_vp;
7203
7204 nameidone(&nd);
7205
7206 if (!(vnode_ischr(vp) || vnode_isblk(vp))) {
7207 error = ENOTSUP;
7208 goto out;
7209 }
7210
7211 if (vnode_isblk(vp) && vnode_ismountedon(vp)) {
7212 error = EBUSY;
7213 goto out;
7214 }
7215
7216 #if CONFIG_MACF
7217 error = mac_vnode_check_revoke(ctx, vp);
7218 if (error)
7219 goto out;
7220 #endif
7221
7222 VATTR_INIT(&va);
7223 VATTR_WANTED(&va, va_uid);
7224 if ((error = vnode_getattr(vp, &va, ctx)))
7225 goto out;
7226 if (kauth_cred_getuid(vfs_context_ucred(ctx)) != va.va_uid &&
7227 (error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
7228 goto out;
7229 if (vp->v_usecount > 0 || (vnode_isaliased(vp)))
7230 VNOP_REVOKE(vp, REVOKEALL, ctx);
7231 out:
7232 vnode_put(vp);
7233 return (error);
7234 }
7235
7236
7237 /*
7238 * HFS/HFS PlUS SPECIFIC SYSTEM CALLS
7239 * The following system calls are designed to support features
7240 * which are specific to the HFS & HFS Plus volume formats
7241 */
7242
7243
7244 /*
7245 * Obtain attribute information on objects in a directory while enumerating
7246 * the directory.
7247 */
7248 /* ARGSUSED */
7249 int
7250 getdirentriesattr (proc_t p, struct getdirentriesattr_args *uap, int32_t *retval)
7251 {
7252 vnode_t vp;
7253 struct fileproc *fp;
7254 uio_t auio = NULL;
7255 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
7256 uint32_t count, savecount;
7257 uint32_t newstate;
7258 int error, eofflag;
7259 uint32_t loff;
7260 struct attrlist attributelist;
7261 vfs_context_t ctx = vfs_context_current();
7262 int fd = uap->fd;
7263 char uio_buf[ UIO_SIZEOF(1) ];
7264 kauth_action_t action;
7265
7266 AUDIT_ARG(fd, fd);
7267
7268 /* Get the attributes into kernel space */
7269 if ((error = copyin(uap->alist, (caddr_t)&attributelist, sizeof(attributelist)))) {
7270 return(error);
7271 }
7272 if ((error = copyin(uap->count, (caddr_t)&count, sizeof(count)))) {
7273 return(error);
7274 }
7275 savecount = count;
7276 if ( (error = fp_getfvp(p, fd, &fp, &vp)) ) {
7277 return (error);
7278 }
7279 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
7280 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
7281 error = EBADF;
7282 goto out;
7283 }
7284
7285
7286 #if CONFIG_MACF
7287 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
7288 fp->f_fglob);
7289 if (error)
7290 goto out;
7291 #endif
7292
7293
7294 if ( (error = vnode_getwithref(vp)) )
7295 goto out;
7296
7297 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
7298
7299 unionread:
7300 if (vp->v_type != VDIR) {
7301 (void)vnode_put(vp);
7302 error = EINVAL;
7303 goto out;
7304 }
7305
7306 #if CONFIG_MACF
7307 error = mac_vnode_check_readdir(ctx, vp);
7308 if (error != 0) {
7309 (void)vnode_put(vp);
7310 goto out;
7311 }
7312 #endif /* MAC */
7313
7314 /* set up the uio structure which will contain the users return buffer */
7315 loff = fp->f_fglob->fg_offset;
7316 auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
7317 uio_addiov(auio, uap->buffer, uap->buffersize);
7318
7319 /*
7320 * If the only item requested is file names, we can let that past with
7321 * just LIST_DIRECTORY. If they want any other attributes, that means
7322 * they need SEARCH as well.
7323 */
7324 action = KAUTH_VNODE_LIST_DIRECTORY;
7325 if ((attributelist.commonattr & ~ATTR_CMN_NAME) ||
7326 attributelist.fileattr || attributelist.dirattr)
7327 action |= KAUTH_VNODE_SEARCH;
7328
7329 if ((error = vnode_authorize(vp, NULL, action, ctx)) == 0) {
7330
7331 /* Believe it or not, uap->options only has 32-bits of valid
7332 * info, so truncate before extending again */
7333
7334 error = VNOP_READDIRATTR(vp, &attributelist, auio, count,
7335 (u_long)(uint32_t)uap->options, &newstate, &eofflag, &count, ctx);
7336 }
7337
7338 if (error) {
7339 (void) vnode_put(vp);
7340 goto out;
7341 }
7342
7343 /*
7344 * If we've got the last entry of a directory in a union mount
7345 * then reset the eofflag and pretend there's still more to come.
7346 * The next call will again set eofflag and the buffer will be empty,
7347 * so traverse to the underlying directory and do the directory
7348 * read there.
7349 */
7350 if (eofflag && vp->v_mount->mnt_flag & MNT_UNION) {
7351 if (uio_resid(auio) < (user_ssize_t) uap->buffersize) { // Got some entries
7352 eofflag = 0;
7353 } else { // Empty buffer
7354 struct vnode *tvp = vp;
7355 if (lookup_traverse_union(tvp, &vp, ctx) == 0) {
7356 vnode_ref_ext(vp, fp->f_fglob->fg_flag & O_EVTONLY, 0);
7357 fp->f_fglob->fg_data = (caddr_t) vp;
7358 fp->f_fglob->fg_offset = 0; // reset index for new dir
7359 count = savecount;
7360 vnode_rele_internal(tvp, fp->f_fglob->fg_flag & O_EVTONLY, 0, 0);
7361 vnode_put(tvp);
7362 goto unionread;
7363 }
7364 vp = tvp;
7365 }
7366 }
7367
7368 (void)vnode_put(vp);
7369
7370 if (error)
7371 goto out;
7372 fp->f_fglob->fg_offset = uio_offset(auio); /* should be multiple of dirent, not variable */
7373
7374 if ((error = copyout((caddr_t) &count, uap->count, sizeof(count))))
7375 goto out;
7376 if ((error = copyout((caddr_t) &newstate, uap->newstate, sizeof(newstate))))
7377 goto out;
7378 if ((error = copyout((caddr_t) &loff, uap->basep, sizeof(loff))))
7379 goto out;
7380
7381 *retval = eofflag; /* similar to getdirentries */
7382 error = 0;
7383 out:
7384 file_drop(fd);
7385 return (error); /* return error earlier, an retval of 0 or 1 now */
7386
7387 } /* end of getdirentriesattr system call */
7388
7389 /*
7390 * Exchange data between two files
7391 */
7392
7393 /* ARGSUSED */
7394 int
7395 exchangedata (__unused proc_t p, struct exchangedata_args *uap, __unused int32_t *retval)
7396 {
7397
7398 struct nameidata fnd, snd;
7399 vfs_context_t ctx = vfs_context_current();
7400 vnode_t fvp;
7401 vnode_t svp;
7402 int error;
7403 u_int32_t nameiflags;
7404 char *fpath = NULL;
7405 char *spath = NULL;
7406 int flen=0, slen=0;
7407 int from_truncated=0, to_truncated=0;
7408 #if CONFIG_FSE
7409 fse_info f_finfo, s_finfo;
7410 #endif
7411
7412 nameiflags = 0;
7413 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
7414
7415 NDINIT(&fnd, LOOKUP, OP_EXCHANGEDATA, nameiflags | AUDITVNPATH1,
7416 UIO_USERSPACE, uap->path1, ctx);
7417
7418 error = namei(&fnd);
7419 if (error)
7420 goto out2;
7421
7422 nameidone(&fnd);
7423 fvp = fnd.ni_vp;
7424
7425 NDINIT(&snd, LOOKUP, OP_EXCHANGEDATA, CN_NBMOUNTLOOK | nameiflags | AUDITVNPATH2,
7426 UIO_USERSPACE, uap->path2, ctx);
7427
7428 error = namei(&snd);
7429 if (error) {
7430 vnode_put(fvp);
7431 goto out2;
7432 }
7433 nameidone(&snd);
7434 svp = snd.ni_vp;
7435
7436 /*
7437 * if the files are the same, return an inval error
7438 */
7439 if (svp == fvp) {
7440 error = EINVAL;
7441 goto out;
7442 }
7443
7444 /*
7445 * if the files are on different volumes, return an error
7446 */
7447 if (svp->v_mount != fvp->v_mount) {
7448 error = EXDEV;
7449 goto out;
7450 }
7451
7452 /* If they're not files, return an error */
7453 if ( (vnode_isreg(fvp) == 0) || (vnode_isreg(svp) == 0)) {
7454 error = EINVAL;
7455 goto out;
7456 }
7457
7458 #if CONFIG_MACF
7459 error = mac_vnode_check_exchangedata(ctx,
7460 fvp, svp);
7461 if (error)
7462 goto out;
7463 #endif
7464 if (((error = vnode_authorize(fvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0) ||
7465 ((error = vnode_authorize(svp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0))
7466 goto out;
7467
7468 if (
7469 #if CONFIG_FSE
7470 need_fsevent(FSE_EXCHANGE, fvp) ||
7471 #endif
7472 kauth_authorize_fileop_has_listeners()) {
7473 GET_PATH(fpath);
7474 GET_PATH(spath);
7475 if (fpath == NULL || spath == NULL) {
7476 error = ENOMEM;
7477 goto out;
7478 }
7479
7480 flen = safe_getpath(fvp, NULL, fpath, MAXPATHLEN, &from_truncated);
7481 slen = safe_getpath(svp, NULL, spath, MAXPATHLEN, &to_truncated);
7482
7483 #if CONFIG_FSE
7484 get_fse_info(fvp, &f_finfo, ctx);
7485 get_fse_info(svp, &s_finfo, ctx);
7486 if (from_truncated || to_truncated) {
7487 // set it here since only the f_finfo gets reported up to user space
7488 f_finfo.mode |= FSE_TRUNCATED_PATH;
7489 }
7490 #endif
7491 }
7492 /* Ok, make the call */
7493 error = VNOP_EXCHANGE(fvp, svp, 0, ctx);
7494
7495 if (error == 0) {
7496 const char *tmpname;
7497
7498 if (fpath != NULL && spath != NULL) {
7499 /* call out to allow 3rd party notification of exchangedata.
7500 * Ignore result of kauth_authorize_fileop call.
7501 */
7502 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_EXCHANGE,
7503 (uintptr_t)fpath, (uintptr_t)spath);
7504 }
7505 name_cache_lock();
7506
7507 tmpname = fvp->v_name;
7508 fvp->v_name = svp->v_name;
7509 svp->v_name = tmpname;
7510
7511 if (fvp->v_parent != svp->v_parent) {
7512 vnode_t tmp;
7513
7514 tmp = fvp->v_parent;
7515 fvp->v_parent = svp->v_parent;
7516 svp->v_parent = tmp;
7517 }
7518 name_cache_unlock();
7519
7520 #if CONFIG_FSE
7521 if (fpath != NULL && spath != NULL) {
7522 add_fsevent(FSE_EXCHANGE, ctx,
7523 FSE_ARG_STRING, flen, fpath,
7524 FSE_ARG_FINFO, &f_finfo,
7525 FSE_ARG_STRING, slen, spath,
7526 FSE_ARG_FINFO, &s_finfo,
7527 FSE_ARG_DONE);
7528 }
7529 #endif
7530 }
7531
7532 out:
7533 if (fpath != NULL)
7534 RELEASE_PATH(fpath);
7535 if (spath != NULL)
7536 RELEASE_PATH(spath);
7537 vnode_put(svp);
7538 vnode_put(fvp);
7539 out2:
7540 return (error);
7541 }
7542
7543 /*
7544 * Return (in MB) the amount of freespace on the given vnode's volume.
7545 */
7546 uint32_t freespace_mb(vnode_t vp);
7547
7548 uint32_t
7549 freespace_mb(vnode_t vp)
7550 {
7551 vfs_update_vfsstat(vp->v_mount, vfs_context_current(), VFS_USER_EVENT);
7552 return (((uint64_t)vp->v_mount->mnt_vfsstat.f_bavail *
7553 vp->v_mount->mnt_vfsstat.f_bsize) >> 20);
7554 }
7555
7556 #if CONFIG_SEARCHFS
7557
7558 /* ARGSUSED */
7559
7560 int
7561 searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval)
7562 {
7563 vnode_t vp, tvp;
7564 int i, error=0;
7565 int fserror = 0;
7566 struct nameidata nd;
7567 struct user64_fssearchblock searchblock;
7568 struct searchstate *state;
7569 struct attrlist *returnattrs;
7570 struct timeval timelimit;
7571 void *searchparams1,*searchparams2;
7572 uio_t auio = NULL;
7573 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
7574 uint32_t nummatches;
7575 int mallocsize;
7576 uint32_t nameiflags;
7577 vfs_context_t ctx = vfs_context_current();
7578 char uio_buf[ UIO_SIZEOF(1) ];
7579
7580 /* Start by copying in fsearchblock parameter list */
7581 if (IS_64BIT_PROCESS(p)) {
7582 error = copyin(uap->searchblock, (caddr_t) &searchblock, sizeof(searchblock));
7583 timelimit.tv_sec = searchblock.timelimit.tv_sec;
7584 timelimit.tv_usec = searchblock.timelimit.tv_usec;
7585 }
7586 else {
7587 struct user32_fssearchblock tmp_searchblock;
7588
7589 error = copyin(uap->searchblock, (caddr_t) &tmp_searchblock, sizeof(tmp_searchblock));
7590 // munge into 64-bit version
7591 searchblock.returnattrs = CAST_USER_ADDR_T(tmp_searchblock.returnattrs);
7592 searchblock.returnbuffer = CAST_USER_ADDR_T(tmp_searchblock.returnbuffer);
7593 searchblock.returnbuffersize = tmp_searchblock.returnbuffersize;
7594 searchblock.maxmatches = tmp_searchblock.maxmatches;
7595 /*
7596 * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary
7597 * from a 32 bit long, and tv_usec is already a signed 32 bit int.
7598 */
7599 timelimit.tv_sec = (__darwin_time_t) tmp_searchblock.timelimit.tv_sec;
7600 timelimit.tv_usec = (__darwin_useconds_t) tmp_searchblock.timelimit.tv_usec;
7601 searchblock.searchparams1 = CAST_USER_ADDR_T(tmp_searchblock.searchparams1);
7602 searchblock.sizeofsearchparams1 = tmp_searchblock.sizeofsearchparams1;
7603 searchblock.searchparams2 = CAST_USER_ADDR_T(tmp_searchblock.searchparams2);
7604 searchblock.sizeofsearchparams2 = tmp_searchblock.sizeofsearchparams2;
7605 searchblock.searchattrs = tmp_searchblock.searchattrs;
7606 }
7607 if (error)
7608 return(error);
7609
7610 /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
7611 */
7612 if (searchblock.sizeofsearchparams1 > SEARCHFS_MAX_SEARCHPARMS ||
7613 searchblock.sizeofsearchparams2 > SEARCHFS_MAX_SEARCHPARMS)
7614 return(EINVAL);
7615
7616 /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
7617 /* It all has to do into local memory and it's not that big so we might as well put it all together. */
7618 /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
7619 /* block. */
7620
7621 mallocsize = searchblock.sizeofsearchparams1 + searchblock.sizeofsearchparams2 +
7622 sizeof(struct attrlist) + sizeof(struct searchstate);
7623
7624 MALLOC(searchparams1, void *, mallocsize, M_TEMP, M_WAITOK);
7625
7626 /* Now set up the various pointers to the correct place in our newly allocated memory */
7627
7628 searchparams2 = (void *) (((caddr_t) searchparams1) + searchblock.sizeofsearchparams1);
7629 returnattrs = (struct attrlist *) (((caddr_t) searchparams2) + searchblock.sizeofsearchparams2);
7630 state = (struct searchstate *) (((caddr_t) returnattrs) + sizeof (struct attrlist));
7631
7632 /* Now copy in the stuff given our local variables. */
7633
7634 if ((error = copyin(searchblock.searchparams1, searchparams1, searchblock.sizeofsearchparams1)))
7635 goto freeandexit;
7636
7637 if ((error = copyin(searchblock.searchparams2, searchparams2, searchblock.sizeofsearchparams2)))
7638 goto freeandexit;
7639
7640 if ((error = copyin(searchblock.returnattrs, (caddr_t) returnattrs, sizeof(struct attrlist))))
7641 goto freeandexit;
7642
7643 if ((error = copyin(uap->state, (caddr_t) state, sizeof(struct searchstate))))
7644 goto freeandexit;
7645
7646 /*
7647 * When searching a union mount, need to set the
7648 * start flag at the first call on each layer to
7649 * reset state for the new volume.
7650 */
7651 if (uap->options & SRCHFS_START)
7652 state->ss_union_layer = 0;
7653 else
7654 uap->options |= state->ss_union_flags;
7655 state->ss_union_flags = 0;
7656
7657 /*
7658 * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter,
7659 * which is passed in with an attrreference_t, we need to inspect the buffer manually here.
7660 * The KPI does not provide us the ability to pass in the length of the buffers searchparams1
7661 * and searchparams2. To obviate the need for all searchfs-supporting filesystems to
7662 * validate the user-supplied data offset of the attrreference_t, we'll do it here.
7663 */
7664
7665 if (searchblock.searchattrs.commonattr & ATTR_CMN_NAME) {
7666 attrreference_t* string_ref;
7667 u_int32_t* start_length;
7668 user64_size_t param_length;
7669
7670 /* validate searchparams1 */
7671 param_length = searchblock.sizeofsearchparams1;
7672 /* skip the word that specifies length of the buffer */
7673 start_length= (u_int32_t*) searchparams1;
7674 start_length= start_length+1;
7675 string_ref= (attrreference_t*) start_length;
7676
7677 /* ensure no negative offsets or too big offsets */
7678 if (string_ref->attr_dataoffset < 0 ) {
7679 error = EINVAL;
7680 goto freeandexit;
7681 }
7682 if (string_ref->attr_length > MAXPATHLEN) {
7683 error = EINVAL;
7684 goto freeandexit;
7685 }
7686
7687 /* Check for pointer overflow in the string ref */
7688 if (((char*) string_ref + string_ref->attr_dataoffset) < (char*) string_ref) {
7689 error = EINVAL;
7690 goto freeandexit;
7691 }
7692
7693 if (((char*) string_ref + string_ref->attr_dataoffset) > ((char*)searchparams1 + param_length)) {
7694 error = EINVAL;
7695 goto freeandexit;
7696 }
7697 if (((char*)string_ref + string_ref->attr_dataoffset + string_ref->attr_length) > ((char*)searchparams1 + param_length)) {
7698 error = EINVAL;
7699 goto freeandexit;
7700 }
7701 }
7702
7703 /* set up the uio structure which will contain the users return buffer */
7704 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
7705 uio_addiov(auio, searchblock.returnbuffer, searchblock.returnbuffersize);
7706
7707 nameiflags = 0;
7708 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
7709 NDINIT(&nd, LOOKUP, OP_SEARCHFS, nameiflags | AUDITVNPATH1,
7710 UIO_USERSPACE, uap->path, ctx);
7711
7712 error = namei(&nd);
7713 if (error)
7714 goto freeandexit;
7715 vp = nd.ni_vp;
7716 nameidone(&nd);
7717
7718 /*
7719 * Switch to the root vnode for the volume
7720 */
7721 error = VFS_ROOT(vnode_mount(vp), &tvp, ctx);
7722 if (error)
7723 goto freeandexit;
7724 vnode_put(vp);
7725 vp = tvp;
7726
7727 /*
7728 * If it's a union mount, the path lookup takes
7729 * us to the top layer. But we may need to descend
7730 * to a lower layer. For non-union mounts the layer
7731 * is always zero.
7732 */
7733 for (i = 0; i < (int) state->ss_union_layer; i++) {
7734 if ((vp->v_mount->mnt_flag & MNT_UNION) == 0)
7735 break;
7736 tvp = vp;
7737 vp = vp->v_mount->mnt_vnodecovered;
7738 if (vp == NULL) {
7739 vp = tvp;
7740 error = ENOENT;
7741 goto freeandexit;
7742 }
7743 vnode_getwithref(vp);
7744 vnode_put(tvp);
7745 }
7746
7747 #if CONFIG_MACF
7748 error = mac_vnode_check_searchfs(ctx, vp, &searchblock.searchattrs);
7749 if (error) {
7750 vnode_put(vp);
7751 goto freeandexit;
7752 }
7753 #endif
7754
7755
7756 /*
7757 * If searchblock.maxmatches == 0, then skip the search. This has happened
7758 * before and sometimes the underlying code doesnt deal with it well.
7759 */
7760 if (searchblock.maxmatches == 0) {
7761 nummatches = 0;
7762 goto saveandexit;
7763 }
7764
7765 /*
7766 * Allright, we have everything we need, so lets make that call.
7767 *
7768 * We keep special track of the return value from the file system:
7769 * EAGAIN is an acceptable error condition that shouldn't keep us
7770 * from copying out any results...
7771 */
7772
7773 fserror = VNOP_SEARCHFS(vp,
7774 searchparams1,
7775 searchparams2,
7776 &searchblock.searchattrs,
7777 (u_long)searchblock.maxmatches,
7778 &timelimit,
7779 returnattrs,
7780 &nummatches,
7781 (u_long)uap->scriptcode,
7782 (u_long)uap->options,
7783 auio,
7784 (struct searchstate *) &state->ss_fsstate,
7785 ctx);
7786
7787 /*
7788 * If it's a union mount we need to be called again
7789 * to search the mounted-on filesystem.
7790 */
7791 if ((vp->v_mount->mnt_flag & MNT_UNION) && fserror == 0) {
7792 state->ss_union_flags = SRCHFS_START;
7793 state->ss_union_layer++; // search next layer down
7794 fserror = EAGAIN;
7795 }
7796
7797 saveandexit:
7798
7799 vnode_put(vp);
7800
7801 /* Now copy out the stuff that needs copying out. That means the number of matches, the
7802 search state. Everything was already put into he return buffer by the vop call. */
7803
7804 if ((error = copyout((caddr_t) state, uap->state, sizeof(struct searchstate))) != 0)
7805 goto freeandexit;
7806
7807 if ((error = suulong(uap->nummatches, (uint64_t)nummatches)) != 0)
7808 goto freeandexit;
7809
7810 error = fserror;
7811
7812 freeandexit:
7813
7814 FREE(searchparams1,M_TEMP);
7815
7816 return(error);
7817
7818
7819 } /* end of searchfs system call */
7820
7821 #else /* CONFIG_SEARCHFS */
7822
7823 int
7824 searchfs(__unused proc_t p, __unused struct searchfs_args *uap, __unused int32_t *retval)
7825 {
7826 return (ENOTSUP);
7827 }
7828
7829 #endif /* CONFIG_SEARCHFS */
7830
7831
7832 lck_grp_attr_t * nspace_group_attr;
7833 lck_attr_t * nspace_lock_attr;
7834 lck_grp_t * nspace_mutex_group;
7835
7836 lck_mtx_t nspace_handler_lock;
7837 lck_mtx_t nspace_handler_exclusion_lock;
7838
7839 time_t snapshot_timestamp=0;
7840 int nspace_allow_virtual_devs=0;
7841
7842 void nspace_handler_init(void);
7843
7844 typedef struct nspace_item_info {
7845 struct vnode *vp;
7846 void *arg;
7847 uint64_t op;
7848 uint32_t vid;
7849 uint32_t flags;
7850 uint32_t token;
7851 uint32_t refcount;
7852 } nspace_item_info;
7853
7854 #define MAX_NSPACE_ITEMS 128
7855 nspace_item_info nspace_items[MAX_NSPACE_ITEMS];
7856 uint32_t nspace_item_idx=0; // also used as the sleep/wakeup rendezvous address
7857 uint32_t nspace_token_id=0;
7858 uint32_t nspace_handler_timeout = 15; // seconds
7859
7860 #define NSPACE_ITEM_NEW 0x0001
7861 #define NSPACE_ITEM_PROCESSING 0x0002
7862 #define NSPACE_ITEM_DEAD 0x0004
7863 #define NSPACE_ITEM_CANCELLED 0x0008
7864 #define NSPACE_ITEM_DONE 0x0010
7865 #define NSPACE_ITEM_RESET_TIMER 0x0020
7866
7867 #define NSPACE_ITEM_NSPACE_EVENT 0x0040
7868 #define NSPACE_ITEM_SNAPSHOT_EVENT 0x0080
7869 #define NSPACE_ITEM_TRACK_EVENT 0x0100
7870
7871 #define NSPACE_ITEM_ALL_EVENT_TYPES (NSPACE_ITEM_NSPACE_EVENT | NSPACE_ITEM_SNAPSHOT_EVENT | NSPACE_ITEM_TRACK_EVENT)
7872
7873 //#pragma optimization_level 0
7874
7875 typedef enum {
7876 NSPACE_HANDLER_NSPACE = 0,
7877 NSPACE_HANDLER_SNAPSHOT = 1,
7878 NSPACE_HANDLER_TRACK = 2,
7879
7880 NSPACE_HANDLER_COUNT,
7881 } nspace_type_t;
7882
7883 typedef struct {
7884 uint64_t handler_tid;
7885 struct proc *handler_proc;
7886 int handler_busy;
7887 } nspace_handler_t;
7888
7889 nspace_handler_t nspace_handlers[NSPACE_HANDLER_COUNT];
7890
7891 /* namespace fsctl functions */
7892 static int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type);
7893 static int nspace_item_flags_for_type(nspace_type_t nspace_type);
7894 static int nspace_open_flags_for_type(nspace_type_t nspace_type);
7895 static nspace_type_t nspace_type_for_op(uint64_t op);
7896 static int nspace_is_special_process(struct proc *proc);
7897 static int vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx);
7898 static int wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type);
7899 static int validate_namespace_args (int is64bit, int size);
7900 static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data);
7901
7902
7903 static inline int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type)
7904 {
7905 switch(nspace_type) {
7906 case NSPACE_HANDLER_NSPACE:
7907 return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_NSPACE_EVENT;
7908 case NSPACE_HANDLER_SNAPSHOT:
7909 return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_SNAPSHOT_EVENT;
7910 case NSPACE_HANDLER_TRACK:
7911 return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_TRACK_EVENT;
7912 default:
7913 printf("nspace_flags_matches_handler: invalid type %u\n", (int)nspace_type);
7914 return 0;
7915 }
7916 }
7917
7918 static inline int nspace_item_flags_for_type(nspace_type_t nspace_type)
7919 {
7920 switch(nspace_type) {
7921 case NSPACE_HANDLER_NSPACE:
7922 return NSPACE_ITEM_NSPACE_EVENT;
7923 case NSPACE_HANDLER_SNAPSHOT:
7924 return NSPACE_ITEM_SNAPSHOT_EVENT;
7925 case NSPACE_HANDLER_TRACK:
7926 return NSPACE_ITEM_TRACK_EVENT;
7927 default:
7928 printf("nspace_item_flags_for_type: invalid type %u\n", (int)nspace_type);
7929 return 0;
7930 }
7931 }
7932
7933 static inline int nspace_open_flags_for_type(nspace_type_t nspace_type)
7934 {
7935 switch(nspace_type) {
7936 case NSPACE_HANDLER_NSPACE:
7937 return FREAD | FWRITE | O_EVTONLY;
7938 case NSPACE_HANDLER_SNAPSHOT:
7939 case NSPACE_HANDLER_TRACK:
7940 return FREAD | O_EVTONLY;
7941 default:
7942 printf("nspace_open_flags_for_type: invalid type %u\n", (int)nspace_type);
7943 return 0;
7944 }
7945 }
7946
7947 static inline nspace_type_t nspace_type_for_op(uint64_t op)
7948 {
7949 switch(op & NAMESPACE_HANDLER_EVENT_TYPE_MASK) {
7950 case NAMESPACE_HANDLER_NSPACE_EVENT:
7951 return NSPACE_HANDLER_NSPACE;
7952 case NAMESPACE_HANDLER_SNAPSHOT_EVENT:
7953 return NSPACE_HANDLER_SNAPSHOT;
7954 case NAMESPACE_HANDLER_TRACK_EVENT:
7955 return NSPACE_HANDLER_TRACK;
7956 default:
7957 printf("nspace_type_for_op: invalid op mask %llx\n", op & NAMESPACE_HANDLER_EVENT_TYPE_MASK);
7958 return NSPACE_HANDLER_NSPACE;
7959 }
7960 }
7961
7962 static inline int nspace_is_special_process(struct proc *proc)
7963 {
7964 int i;
7965 for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
7966 if (proc == nspace_handlers[i].handler_proc)
7967 return 1;
7968 }
7969 return 0;
7970 }
7971
7972 void
7973 nspace_handler_init(void)
7974 {
7975 nspace_lock_attr = lck_attr_alloc_init();
7976 nspace_group_attr = lck_grp_attr_alloc_init();
7977 nspace_mutex_group = lck_grp_alloc_init("nspace-mutex", nspace_group_attr);
7978 lck_mtx_init(&nspace_handler_lock, nspace_mutex_group, nspace_lock_attr);
7979 lck_mtx_init(&nspace_handler_exclusion_lock, nspace_mutex_group, nspace_lock_attr);
7980 memset(&nspace_items[0], 0, sizeof(nspace_items));
7981 }
7982
7983 void
7984 nspace_proc_exit(struct proc *p)
7985 {
7986 int i, event_mask = 0;
7987
7988 for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
7989 if (p == nspace_handlers[i].handler_proc) {
7990 event_mask |= nspace_item_flags_for_type(i);
7991 nspace_handlers[i].handler_tid = 0;
7992 nspace_handlers[i].handler_proc = NULL;
7993 }
7994 }
7995
7996 if (event_mask == 0) {
7997 return;
7998 }
7999
8000 if (event_mask & NSPACE_ITEM_SNAPSHOT_EVENT) {
8001 // if this process was the snapshot handler, zero snapshot_timeout
8002 snapshot_timestamp = 0;
8003 }
8004
8005 //
8006 // unblock anyone that's waiting for the handler that died
8007 //
8008 lck_mtx_lock(&nspace_handler_lock);
8009 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8010 if (nspace_items[i].flags & (NSPACE_ITEM_NEW | NSPACE_ITEM_PROCESSING)) {
8011
8012 if ( nspace_items[i].flags & event_mask ) {
8013
8014 if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
8015 vnode_lock_spin(nspace_items[i].vp);
8016 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
8017 vnode_unlock(nspace_items[i].vp);
8018 }
8019 nspace_items[i].vp = NULL;
8020 nspace_items[i].vid = 0;
8021 nspace_items[i].flags = NSPACE_ITEM_DONE;
8022 nspace_items[i].token = 0;
8023
8024 wakeup((caddr_t)&(nspace_items[i].vp));
8025 }
8026 }
8027 }
8028
8029 wakeup((caddr_t)&nspace_item_idx);
8030 lck_mtx_unlock(&nspace_handler_lock);
8031 }
8032
8033
8034 int
8035 resolve_nspace_item(struct vnode *vp, uint64_t op)
8036 {
8037 return resolve_nspace_item_ext(vp, op, NULL);
8038 }
8039
8040 int
8041 resolve_nspace_item_ext(struct vnode *vp, uint64_t op, void *arg)
8042 {
8043 int i, error, keep_waiting;
8044 struct timespec ts;
8045 nspace_type_t nspace_type = nspace_type_for_op(op);
8046
8047 // only allow namespace events on regular files, directories and symlinks.
8048 if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
8049 return 0;
8050 }
8051
8052 //
8053 // if this is a snapshot event and the vnode is on a
8054 // disk image just pretend nothing happened since any
8055 // change to the disk image will cause the disk image
8056 // itself to get backed up and this avoids multi-way
8057 // deadlocks between the snapshot handler and the ever
8058 // popular diskimages-helper process. the variable
8059 // nspace_allow_virtual_devs allows this behavior to
8060 // be overridden (for use by the Mobile TimeMachine
8061 // testing infrastructure which uses disk images)
8062 //
8063 if ( (op & NAMESPACE_HANDLER_SNAPSHOT_EVENT)
8064 && (vp->v_mount != NULL)
8065 && (vp->v_mount->mnt_kern_flag & MNTK_VIRTUALDEV)
8066 && !nspace_allow_virtual_devs) {
8067
8068 return 0;
8069 }
8070
8071 // if (thread_tid(current_thread()) == namespace_handler_tid) {
8072 if (nspace_handlers[nspace_type].handler_proc == NULL) {
8073 return 0;
8074 }
8075
8076 if (nspace_is_special_process(current_proc())) {
8077 return EDEADLK;
8078 }
8079
8080 lck_mtx_lock(&nspace_handler_lock);
8081
8082 retry:
8083 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8084 if (vp == nspace_items[i].vp && op == nspace_items[i].op) {
8085 break;
8086 }
8087 }
8088
8089 if (i >= MAX_NSPACE_ITEMS) {
8090 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8091 if (nspace_items[i].flags == 0) {
8092 break;
8093 }
8094 }
8095 } else {
8096 nspace_items[i].refcount++;
8097 }
8098
8099 if (i >= MAX_NSPACE_ITEMS) {
8100 ts.tv_sec = nspace_handler_timeout;
8101 ts.tv_nsec = 0;
8102
8103 error = msleep((caddr_t)&nspace_token_id, &nspace_handler_lock, PVFS|PCATCH, "nspace-no-space", &ts);
8104 if (error == 0) {
8105 // an entry got free'd up, go see if we can get a slot
8106 goto retry;
8107 } else {
8108 lck_mtx_unlock(&nspace_handler_lock);
8109 return error;
8110 }
8111 }
8112
8113 //
8114 // if it didn't already exist, add it. if it did exist
8115 // we'll get woken up when someone does a wakeup() on
8116 // the slot in the nspace_items table.
8117 //
8118 if (vp != nspace_items[i].vp) {
8119 nspace_items[i].vp = vp;
8120 nspace_items[i].arg = (arg == NSPACE_REARM_NO_ARG) ? NULL : arg; // arg is {NULL, true, uio *} - only pass uio thru to the user
8121 nspace_items[i].op = op;
8122 nspace_items[i].vid = vnode_vid(vp);
8123 nspace_items[i].flags = NSPACE_ITEM_NEW;
8124 nspace_items[i].flags |= nspace_item_flags_for_type(nspace_type);
8125 if (nspace_items[i].flags & NSPACE_ITEM_SNAPSHOT_EVENT) {
8126 if (arg) {
8127 vnode_lock_spin(vp);
8128 vp->v_flag |= VNEEDSSNAPSHOT;
8129 vnode_unlock(vp);
8130 }
8131 }
8132
8133 nspace_items[i].token = 0;
8134 nspace_items[i].refcount = 1;
8135
8136 wakeup((caddr_t)&nspace_item_idx);
8137 }
8138
8139 //
8140 // Now go to sleep until the handler does a wakeup on this
8141 // slot in the nspace_items table (or we timeout).
8142 //
8143 keep_waiting = 1;
8144 while(keep_waiting) {
8145 ts.tv_sec = nspace_handler_timeout;
8146 ts.tv_nsec = 0;
8147 error = msleep((caddr_t)&(nspace_items[i].vp), &nspace_handler_lock, PVFS|PCATCH, "namespace-done", &ts);
8148
8149 if (nspace_items[i].flags & NSPACE_ITEM_DONE) {
8150 error = 0;
8151 } else if (nspace_items[i].flags & NSPACE_ITEM_CANCELLED) {
8152 error = nspace_items[i].token;
8153 } else if (error == EWOULDBLOCK || error == ETIMEDOUT) {
8154 if (nspace_items[i].flags & NSPACE_ITEM_RESET_TIMER) {
8155 nspace_items[i].flags &= ~NSPACE_ITEM_RESET_TIMER;
8156 continue;
8157 } else {
8158 error = ETIMEDOUT;
8159 }
8160 } else if (error == 0) {
8161 // hmmm, why did we get woken up?
8162 printf("woken up for token %d but it's not done, cancelled or timedout and error == 0.\n",
8163 nspace_items[i].token);
8164 }
8165
8166 if (--nspace_items[i].refcount == 0) {
8167 nspace_items[i].vp = NULL; // clear this so that no one will match on it again
8168 nspace_items[i].arg = NULL;
8169 nspace_items[i].token = 0; // clear this so that the handler will not find it anymore
8170 nspace_items[i].flags = 0; // this clears it for re-use
8171 }
8172 wakeup(&nspace_token_id);
8173 keep_waiting = 0;
8174 }
8175
8176 lck_mtx_unlock(&nspace_handler_lock);
8177
8178 return error;
8179 }
8180
8181
8182 int
8183 get_nspace_item_status(struct vnode *vp, int32_t *status)
8184 {
8185 int i;
8186
8187 lck_mtx_lock(&nspace_handler_lock);
8188 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8189 if (nspace_items[i].vp == vp) {
8190 break;
8191 }
8192 }
8193
8194 if (i >= MAX_NSPACE_ITEMS) {
8195 lck_mtx_unlock(&nspace_handler_lock);
8196 return ENOENT;
8197 }
8198
8199 *status = nspace_items[i].flags;
8200 lck_mtx_unlock(&nspace_handler_lock);
8201 return 0;
8202 }
8203
8204
8205 #if 0
8206 static int
8207 build_volfs_path(struct vnode *vp, char *path, int *len)
8208 {
8209 struct vnode_attr va;
8210 int ret;
8211
8212 VATTR_INIT(&va);
8213 VATTR_WANTED(&va, va_fsid);
8214 VATTR_WANTED(&va, va_fileid);
8215
8216 if (vnode_getattr(vp, &va, vfs_context_kernel()) != 0) {
8217 *len = snprintf(path, *len, "/non/existent/path/because/vnode_getattr/failed") + 1;
8218 ret = -1;
8219 } else {
8220 *len = snprintf(path, *len, "/.vol/%d/%lld", (dev_t)va.va_fsid, va.va_fileid) + 1;
8221 ret = 0;
8222 }
8223
8224 return ret;
8225 }
8226 #endif
8227
8228 //
8229 // Note: this function does NOT check permissions on all of the
8230 // parent directories leading to this vnode. It should only be
8231 // called on behalf of a root process. Otherwise a process may
8232 // get access to a file because the file itself is readable even
8233 // though its parent directories would prevent access.
8234 //
8235 static int
8236 vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx)
8237 {
8238 int error, action;
8239
8240 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
8241 return error;
8242 }
8243
8244 #if CONFIG_MACF
8245 error = mac_vnode_check_open(ctx, vp, fmode);
8246 if (error)
8247 return error;
8248 #endif
8249
8250 /* compute action to be authorized */
8251 action = 0;
8252 if (fmode & FREAD) {
8253 action |= KAUTH_VNODE_READ_DATA;
8254 }
8255 if (fmode & (FWRITE | O_TRUNC)) {
8256 /*
8257 * If we are writing, appending, and not truncating,
8258 * indicate that we are appending so that if the
8259 * UF_APPEND or SF_APPEND bits are set, we do not deny
8260 * the open.
8261 */
8262 if ((fmode & O_APPEND) && !(fmode & O_TRUNC)) {
8263 action |= KAUTH_VNODE_APPEND_DATA;
8264 } else {
8265 action |= KAUTH_VNODE_WRITE_DATA;
8266 }
8267 }
8268
8269 if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)
8270 return error;
8271
8272
8273 //
8274 // if the vnode is tagged VOPENEVT and the current process
8275 // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY
8276 // flag to the open mode so that this open won't count against
8277 // the vnode when carbon delete() does a vnode_isinuse() to see
8278 // if a file is currently in use. this allows spotlight
8279 // importers to not interfere with carbon apps that depend on
8280 // the no-delete-if-busy semantics of carbon delete().
8281 //
8282 if ((vp->v_flag & VOPENEVT) && (current_proc()->p_flag & P_CHECKOPENEVT)) {
8283 fmode |= O_EVTONLY;
8284 }
8285
8286 if ( (error = VNOP_OPEN(vp, fmode, ctx)) ) {
8287 return error;
8288 }
8289 if ( (error = vnode_ref_ext(vp, fmode, 0)) ) {
8290 VNOP_CLOSE(vp, fmode, ctx);
8291 return error;
8292 }
8293
8294 /* Call out to allow 3rd party notification of open.
8295 * Ignore result of kauth_authorize_fileop call.
8296 */
8297 #if CONFIG_MACF
8298 mac_vnode_notify_open(ctx, vp, fmode);
8299 #endif
8300 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN,
8301 (uintptr_t)vp, 0);
8302
8303
8304 return 0;
8305 }
8306
8307 static int
8308 wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type)
8309 {
8310 int i, error=0, unblock=0;
8311 task_t curtask;
8312
8313 lck_mtx_lock(&nspace_handler_exclusion_lock);
8314 if (nspace_handlers[nspace_type].handler_busy) {
8315 lck_mtx_unlock(&nspace_handler_exclusion_lock);
8316 return EBUSY;
8317 }
8318 nspace_handlers[nspace_type].handler_busy = 1;
8319 lck_mtx_unlock(&nspace_handler_exclusion_lock);
8320
8321 /*
8322 * Any process that gets here will be one of the namespace handlers.
8323 * As such, they should be prevented from acquiring DMG vnodes during vnode reclamation
8324 * as we can cause deadlocks to occur, because the namespace handler may prevent
8325 * VNOP_INACTIVE from proceeding. Mark the current task as a P_DEPENDENCY_CAPABLE
8326 * process.
8327 */
8328 curtask = current_task();
8329 bsd_set_dependency_capable (curtask);
8330
8331 lck_mtx_lock(&nspace_handler_lock);
8332 if (nspace_handlers[nspace_type].handler_proc == NULL) {
8333 nspace_handlers[nspace_type].handler_tid = thread_tid(current_thread());
8334 nspace_handlers[nspace_type].handler_proc = current_proc();
8335 }
8336
8337 while (error == 0) {
8338
8339 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8340 if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
8341 if (!nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
8342 continue;
8343 }
8344 break;
8345 }
8346 }
8347
8348 if (i < MAX_NSPACE_ITEMS) {
8349 nspace_items[i].flags &= ~NSPACE_ITEM_NEW;
8350 nspace_items[i].flags |= NSPACE_ITEM_PROCESSING;
8351 nspace_items[i].token = ++nspace_token_id;
8352
8353 if (nspace_items[i].vp) {
8354 struct fileproc *fp;
8355 int32_t indx, fmode;
8356 struct proc *p = current_proc();
8357 vfs_context_t ctx = vfs_context_current();
8358 struct vnode_attr va;
8359
8360
8361 /*
8362 * Use vnode pointer to acquire a file descriptor for
8363 * hand-off to userland
8364 */
8365 fmode = nspace_open_flags_for_type(nspace_type);
8366 error = vnode_getwithvid(nspace_items[i].vp, nspace_items[i].vid);
8367 if (error) {
8368 unblock = 1;
8369 break;
8370 }
8371 error = vn_open_with_vp(nspace_items[i].vp, fmode, ctx);
8372 if (error) {
8373 unblock = 1;
8374 vnode_put(nspace_items[i].vp);
8375 break;
8376 }
8377
8378 if ((error = falloc(p, &fp, &indx, ctx))) {
8379 vn_close(nspace_items[i].vp, fmode, ctx);
8380 vnode_put(nspace_items[i].vp);
8381 unblock = 1;
8382 break;
8383 }
8384
8385 fp->f_fglob->fg_flag = fmode;
8386 fp->f_fglob->fg_ops = &vnops;
8387 fp->f_fglob->fg_data = (caddr_t)nspace_items[i].vp;
8388
8389 proc_fdlock(p);
8390 procfdtbl_releasefd(p, indx, NULL);
8391 fp_drop(p, indx, fp, 1);
8392 proc_fdunlock(p);
8393
8394 /*
8395 * All variants of the namespace handler struct support these three fields:
8396 * token, flags, and the FD pointer
8397 */
8398 error = copyout(&nspace_items[i].token, nhd->token, sizeof(uint32_t));
8399 error = copyout(&nspace_items[i].op, nhd->flags, sizeof(uint64_t));
8400 error = copyout(&indx, nhd->fdptr, sizeof(uint32_t));
8401
8402 /*
8403 * Handle optional fields:
8404 * extended version support an info ptr (offset, length), and the
8405 *
8406 * namedata version supports a unique per-link object ID
8407 *
8408 */
8409 if (nhd->infoptr) {
8410 uio_t uio = (uio_t)nspace_items[i].arg;
8411 uint64_t u_offset, u_length;
8412
8413 if (uio) {
8414 u_offset = uio_offset(uio);
8415 u_length = uio_resid(uio);
8416 } else {
8417 u_offset = 0;
8418 u_length = 0;
8419 }
8420 error = copyout(&u_offset, nhd->infoptr, sizeof(uint64_t));
8421 error = copyout(&u_length, nhd->infoptr+sizeof(uint64_t), sizeof(uint64_t));
8422 }
8423
8424 if (nhd->objid) {
8425 VATTR_INIT(&va);
8426 VATTR_WANTED(&va, va_linkid);
8427 error = vnode_getattr(nspace_items[i].vp, &va, ctx);
8428 if (error == 0 ) {
8429 uint64_t linkid = 0;
8430 if (VATTR_IS_SUPPORTED (&va, va_linkid)) {
8431 linkid = (uint64_t)va.va_linkid;
8432 }
8433 error = copyout (&linkid, nhd->objid, sizeof(uint64_t));
8434 }
8435 }
8436
8437 if (error) {
8438 vn_close(nspace_items[i].vp, fmode, ctx);
8439 fp_free(p, indx, fp);
8440 unblock = 1;
8441 }
8442
8443 vnode_put(nspace_items[i].vp);
8444
8445 break;
8446 } else {
8447 printf("wait_for_nspace_event: failed (nspace_items[%d] == %p error %d, name %s)\n",
8448 i, nspace_items[i].vp, error, nspace_items[i].vp->v_name);
8449 }
8450
8451 } else {
8452 error = msleep((caddr_t)&nspace_item_idx, &nspace_handler_lock, PVFS|PCATCH, "namespace-items", 0);
8453 if ((nspace_type == NSPACE_HANDLER_SNAPSHOT) && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
8454 error = EINVAL;
8455 break;
8456 }
8457
8458 }
8459 }
8460
8461 if (unblock) {
8462 if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
8463 vnode_lock_spin(nspace_items[i].vp);
8464 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
8465 vnode_unlock(nspace_items[i].vp);
8466 }
8467 nspace_items[i].vp = NULL;
8468 nspace_items[i].vid = 0;
8469 nspace_items[i].flags = NSPACE_ITEM_DONE;
8470 nspace_items[i].token = 0;
8471
8472 wakeup((caddr_t)&(nspace_items[i].vp));
8473 }
8474
8475 if (nspace_type == NSPACE_HANDLER_SNAPSHOT) {
8476 // just go through every snapshot event and unblock it immediately.
8477 if (error && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
8478 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8479 if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
8480 if (nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
8481 nspace_items[i].vp = NULL;
8482 nspace_items[i].vid = 0;
8483 nspace_items[i].flags = NSPACE_ITEM_DONE;
8484 nspace_items[i].token = 0;
8485
8486 wakeup((caddr_t)&(nspace_items[i].vp));
8487 }
8488 }
8489 }
8490 }
8491 }
8492
8493 lck_mtx_unlock(&nspace_handler_lock);
8494
8495 lck_mtx_lock(&nspace_handler_exclusion_lock);
8496 nspace_handlers[nspace_type].handler_busy = 0;
8497 lck_mtx_unlock(&nspace_handler_exclusion_lock);
8498
8499 return error;
8500 }
8501
8502 static inline int validate_namespace_args (int is64bit, int size) {
8503
8504 if (is64bit) {
8505 /* Must be one of these */
8506 if (size == sizeof(user64_namespace_handler_info)) {
8507 goto sizeok;
8508 }
8509 if (size == sizeof(user64_namespace_handler_info_ext)) {
8510 goto sizeok;
8511 }
8512 if (size == sizeof(user64_namespace_handler_data)) {
8513 goto sizeok;
8514 }
8515 return EINVAL;
8516 }
8517 else {
8518 /* 32 bit -- must be one of these */
8519 if (size == sizeof(user32_namespace_handler_info)) {
8520 goto sizeok;
8521 }
8522 if (size == sizeof(user32_namespace_handler_info_ext)) {
8523 goto sizeok;
8524 }
8525 if (size == sizeof(user32_namespace_handler_data)) {
8526 goto sizeok;
8527 }
8528 return EINVAL;
8529 }
8530
8531 sizeok:
8532
8533 return 0;
8534
8535 }
8536
8537 static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data)
8538 {
8539 int error = 0;
8540 namespace_handler_data nhd;
8541
8542 bzero (&nhd, sizeof(namespace_handler_data));
8543
8544 if (nspace_type == NSPACE_HANDLER_SNAPSHOT &&
8545 (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
8546 return EINVAL;
8547 }
8548
8549 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
8550 return error;
8551 }
8552
8553 error = validate_namespace_args (is64bit, size);
8554 if (error) {
8555 return error;
8556 }
8557
8558 /* Copy in the userland pointers into our kernel-only struct */
8559
8560 if (is64bit) {
8561 /* 64 bit userland structures */
8562 nhd.token = (user_addr_t)((user64_namespace_handler_info *)data)->token;
8563 nhd.flags = (user_addr_t)((user64_namespace_handler_info *)data)->flags;
8564 nhd.fdptr = (user_addr_t)((user64_namespace_handler_info *)data)->fdptr;
8565
8566 /* If the size is greater than the standard info struct, add in extra fields */
8567 if (size > (sizeof(user64_namespace_handler_info))) {
8568 if (size >= (sizeof(user64_namespace_handler_info_ext))) {
8569 nhd.infoptr = (user_addr_t)((user64_namespace_handler_info_ext *)data)->infoptr;
8570 }
8571 if (size == (sizeof(user64_namespace_handler_data))) {
8572 nhd.objid = (user_addr_t)((user64_namespace_handler_data*)data)->objid;
8573 }
8574 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
8575 }
8576 }
8577 else {
8578 /* 32 bit userland structures */
8579 nhd.token = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->token);
8580 nhd.flags = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->flags);
8581 nhd.fdptr = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->fdptr);
8582
8583 if (size > (sizeof(user32_namespace_handler_info))) {
8584 if (size >= (sizeof(user32_namespace_handler_info_ext))) {
8585 nhd.infoptr = CAST_USER_ADDR_T(((user32_namespace_handler_info_ext *)data)->infoptr);
8586 }
8587 if (size == (sizeof(user32_namespace_handler_data))) {
8588 nhd.objid = (user_addr_t)((user32_namespace_handler_data*)data)->objid;
8589 }
8590 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
8591 }
8592 }
8593
8594 return wait_for_namespace_event(&nhd, nspace_type);
8595 }
8596
8597 /*
8598 * Make a filesystem-specific control call:
8599 */
8600 /* ARGSUSED */
8601 static int
8602 fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long options, vfs_context_t ctx)
8603 {
8604 int error=0;
8605 boolean_t is64bit;
8606 u_int size;
8607 #define STK_PARAMS 128
8608 char stkbuf[STK_PARAMS];
8609 caddr_t data, memp;
8610 vnode_t vp = *arg_vp;
8611
8612 size = IOCPARM_LEN(cmd);
8613 if (size > IOCPARM_MAX) return (EINVAL);
8614
8615 is64bit = proc_is64bit(p);
8616
8617 memp = NULL;
8618 if (size > sizeof (stkbuf)) {
8619 if ((memp = (caddr_t)kalloc(size)) == 0) return ENOMEM;
8620 data = memp;
8621 } else {
8622 data = &stkbuf[0];
8623 };
8624
8625 if (cmd & IOC_IN) {
8626 if (size) {
8627 error = copyin(udata, data, size);
8628 if (error) goto FSCtl_Exit;
8629 } else {
8630 if (is64bit) {
8631 *(user_addr_t *)data = udata;
8632 }
8633 else {
8634 *(uint32_t *)data = (uint32_t)udata;
8635 }
8636 };
8637 } else if ((cmd & IOC_OUT) && size) {
8638 /*
8639 * Zero the buffer so the user always
8640 * gets back something deterministic.
8641 */
8642 bzero(data, size);
8643 } else if (cmd & IOC_VOID) {
8644 if (is64bit) {
8645 *(user_addr_t *)data = udata;
8646 }
8647 else {
8648 *(uint32_t *)data = (uint32_t)udata;
8649 }
8650 }
8651
8652 /* Check to see if it's a generic command */
8653 if (IOCBASECMD(cmd) == FSCTL_SYNC_VOLUME) {
8654 mount_t mp = vp->v_mount;
8655 int arg = *(uint32_t*)data;
8656
8657 /* record vid of vp so we can drop it below. */
8658 uint32_t vvid = vp->v_id;
8659
8660 /*
8661 * Then grab mount_iterref so that we can release the vnode.
8662 * Without this, a thread may call vnode_iterate_prepare then
8663 * get into a deadlock because we've never released the root vp
8664 */
8665 error = mount_iterref (mp, 0);
8666 if (error) {
8667 goto FSCtl_Exit;
8668 }
8669 vnode_put(vp);
8670
8671 /* issue the sync for this volume */
8672 (void)sync_callback(mp, (arg & FSCTL_SYNC_WAIT) ? &arg : NULL);
8673
8674 /*
8675 * Then release the mount_iterref once we're done syncing; it's not
8676 * needed for the VNOP_IOCTL below
8677 */
8678 mount_iterdrop(mp);
8679
8680 if (arg & FSCTL_SYNC_FULLSYNC) {
8681 /* re-obtain vnode iocount on the root vp, if possible */
8682 error = vnode_getwithvid (vp, vvid);
8683 if (error == 0) {
8684 error = VNOP_IOCTL(vp, F_FULLFSYNC, (caddr_t)NULL, 0, ctx);
8685 vnode_put (vp);
8686 }
8687 }
8688 /* mark the argument VP as having been released */
8689 *arg_vp = NULL;
8690
8691 } else if (IOCBASECMD(cmd) == FSCTL_SET_PACKAGE_EXTS) {
8692 user_addr_t ext_strings;
8693 uint32_t num_entries;
8694 uint32_t max_width;
8695
8696 if ( (is64bit && size != sizeof(user64_package_ext_info))
8697 || (is64bit == 0 && size != sizeof(user32_package_ext_info))) {
8698
8699 // either you're 64-bit and passed a 64-bit struct or
8700 // you're 32-bit and passed a 32-bit struct. otherwise
8701 // it's not ok.
8702 error = EINVAL;
8703 goto FSCtl_Exit;
8704 }
8705
8706 if (is64bit) {
8707 ext_strings = ((user64_package_ext_info *)data)->strings;
8708 num_entries = ((user64_package_ext_info *)data)->num_entries;
8709 max_width = ((user64_package_ext_info *)data)->max_width;
8710 } else {
8711 ext_strings = CAST_USER_ADDR_T(((user32_package_ext_info *)data)->strings);
8712 num_entries = ((user32_package_ext_info *)data)->num_entries;
8713 max_width = ((user32_package_ext_info *)data)->max_width;
8714 }
8715
8716 error = set_package_extensions_table(ext_strings, num_entries, max_width);
8717
8718
8719 }
8720
8721 /* namespace handlers */
8722 else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_HANDLER_GET) {
8723 error = process_namespace_fsctl(NSPACE_HANDLER_NSPACE, is64bit, size, data);
8724 }
8725
8726 /* Snapshot handlers */
8727 else if (IOCBASECMD(cmd) == FSCTL_OLD_SNAPSHOT_HANDLER_GET) {
8728 error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
8729 } else if (IOCBASECMD(cmd) == FSCTL_SNAPSHOT_HANDLER_GET_EXT) {
8730 error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
8731 }
8732
8733 /* Tracked File Handlers */
8734 else if (IOCBASECMD(cmd) == FSCTL_TRACKED_HANDLER_GET) {
8735 error = process_namespace_fsctl(NSPACE_HANDLER_TRACK, is64bit, size, data);
8736 }
8737 else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_HANDLER_GETDATA) {
8738 error = process_namespace_fsctl(NSPACE_HANDLER_TRACK, is64bit, size, data);
8739 } else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_HANDLER_UPDATE) {
8740 uint32_t token, val;
8741 int i;
8742
8743 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
8744 goto FSCtl_Exit;
8745 }
8746
8747 if (!nspace_is_special_process(p)) {
8748 error = EINVAL;
8749 goto FSCtl_Exit;
8750 }
8751
8752 token = ((uint32_t *)data)[0];
8753 val = ((uint32_t *)data)[1];
8754
8755 lck_mtx_lock(&nspace_handler_lock);
8756
8757 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8758 if (nspace_items[i].token == token) {
8759 break;
8760 }
8761 }
8762
8763 if (i >= MAX_NSPACE_ITEMS) {
8764 error = ENOENT;
8765 } else {
8766 //
8767 // if this bit is set, when resolve_nspace_item() times out
8768 // it will loop and go back to sleep.
8769 //
8770 nspace_items[i].flags |= NSPACE_ITEM_RESET_TIMER;
8771 }
8772
8773 lck_mtx_unlock(&nspace_handler_lock);
8774
8775 if (error) {
8776 printf("nspace-handler-update: did not find token %u\n", token);
8777 }
8778
8779 } else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_HANDLER_UNBLOCK) {
8780 uint32_t token, val;
8781 int i;
8782
8783 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
8784 goto FSCtl_Exit;
8785 }
8786
8787 if (!nspace_is_special_process(p)) {
8788 error = EINVAL;
8789 goto FSCtl_Exit;
8790 }
8791
8792 token = ((uint32_t *)data)[0];
8793 val = ((uint32_t *)data)[1];
8794
8795 lck_mtx_lock(&nspace_handler_lock);
8796
8797 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8798 if (nspace_items[i].token == token) {
8799 break;
8800 }
8801 }
8802
8803 if (i >= MAX_NSPACE_ITEMS) {
8804 printf("nspace-handler-unblock: did not find token %u\n", token);
8805 error = ENOENT;
8806 } else {
8807 if (val == 0 && nspace_items[i].vp) {
8808 vnode_lock_spin(nspace_items[i].vp);
8809 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
8810 vnode_unlock(nspace_items[i].vp);
8811 }
8812
8813 nspace_items[i].vp = NULL;
8814 nspace_items[i].arg = NULL;
8815 nspace_items[i].op = 0;
8816 nspace_items[i].vid = 0;
8817 nspace_items[i].flags = NSPACE_ITEM_DONE;
8818 nspace_items[i].token = 0;
8819
8820 wakeup((caddr_t)&(nspace_items[i].vp));
8821 }
8822
8823 lck_mtx_unlock(&nspace_handler_lock);
8824
8825 } else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_HANDLER_CANCEL) {
8826 uint32_t token, val;
8827 int i;
8828
8829 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
8830 goto FSCtl_Exit;
8831 }
8832
8833 if (!nspace_is_special_process(p)) {
8834 error = EINVAL;
8835 goto FSCtl_Exit;
8836 }
8837
8838 token = ((uint32_t *)data)[0];
8839 val = ((uint32_t *)data)[1];
8840
8841 lck_mtx_lock(&nspace_handler_lock);
8842
8843 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8844 if (nspace_items[i].token == token) {
8845 break;
8846 }
8847 }
8848
8849 if (i >= MAX_NSPACE_ITEMS) {
8850 printf("nspace-handler-cancel: did not find token %u\n", token);
8851 error = ENOENT;
8852 } else {
8853 if (nspace_items[i].vp) {
8854 vnode_lock_spin(nspace_items[i].vp);
8855 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
8856 vnode_unlock(nspace_items[i].vp);
8857 }
8858
8859 nspace_items[i].vp = NULL;
8860 nspace_items[i].arg = NULL;
8861 nspace_items[i].vid = 0;
8862 nspace_items[i].token = val;
8863 nspace_items[i].flags &= ~NSPACE_ITEM_PROCESSING;
8864 nspace_items[i].flags |= NSPACE_ITEM_CANCELLED;
8865
8866 wakeup((caddr_t)&(nspace_items[i].vp));
8867 }
8868
8869 lck_mtx_unlock(&nspace_handler_lock);
8870 } else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME) {
8871 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
8872 goto FSCtl_Exit;
8873 }
8874
8875 // we explicitly do not do the namespace_handler_proc check here
8876
8877 lck_mtx_lock(&nspace_handler_lock);
8878 snapshot_timestamp = ((uint32_t *)data)[0];
8879 wakeup(&nspace_item_idx);
8880 lck_mtx_unlock(&nspace_handler_lock);
8881 printf("nspace-handler-set-snapshot-time: %d\n", (int)snapshot_timestamp);
8882
8883 } else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS) {
8884 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
8885 goto FSCtl_Exit;
8886 }
8887
8888 lck_mtx_lock(&nspace_handler_lock);
8889 nspace_allow_virtual_devs = ((uint32_t *)data)[0];
8890 lck_mtx_unlock(&nspace_handler_lock);
8891 printf("nspace-snapshot-handler will%s allow events on disk-images\n",
8892 nspace_allow_virtual_devs ? "" : " NOT");
8893 error = 0;
8894
8895 } else if (IOCBASECMD(cmd) == FSCTL_SET_FSTYPENAME_OVERRIDE) {
8896 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
8897 goto FSCtl_Exit;
8898 }
8899 if (vp->v_mount) {
8900 mount_lock(vp->v_mount);
8901 if (data[0] != 0) {
8902 strlcpy(&vp->v_mount->fstypename_override[0], data, MFSTYPENAMELEN);
8903 vp->v_mount->mnt_kern_flag |= MNTK_TYPENAME_OVERRIDE;
8904 if (vfs_isrdonly(vp->v_mount) && strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
8905 vp->v_mount->mnt_kern_flag |= MNTK_EXTENDED_SECURITY;
8906 vp->v_mount->mnt_kern_flag &= ~MNTK_AUTH_OPAQUE;
8907 }
8908 } else {
8909 if (strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
8910 vp->v_mount->mnt_kern_flag &= ~MNTK_EXTENDED_SECURITY;
8911 }
8912 vp->v_mount->mnt_kern_flag &= ~MNTK_TYPENAME_OVERRIDE;
8913 vp->v_mount->fstypename_override[0] = '\0';
8914 }
8915 mount_unlock(vp->v_mount);
8916 }
8917 } else {
8918 /* Invoke the filesystem-specific code */
8919 error = VNOP_IOCTL(vp, IOCBASECMD(cmd), data, options, ctx);
8920 }
8921
8922
8923 /*
8924 * Copy any data to user, size was
8925 * already set and checked above.
8926 */
8927 if (error == 0 && (cmd & IOC_OUT) && size)
8928 error = copyout(data, udata, size);
8929
8930 FSCtl_Exit:
8931 if (memp) kfree(memp, size);
8932
8933 return error;
8934 }
8935
8936 /* ARGSUSED */
8937 int
8938 fsctl (proc_t p, struct fsctl_args *uap, __unused int32_t *retval)
8939 {
8940 int error;
8941 struct nameidata nd;
8942 u_long nameiflags;
8943 vnode_t vp = NULL;
8944 vfs_context_t ctx = vfs_context_current();
8945
8946 AUDIT_ARG(cmd, uap->cmd);
8947 AUDIT_ARG(value32, uap->options);
8948 /* Get the vnode for the file we are getting info on: */
8949 nameiflags = 0;
8950 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
8951 NDINIT(&nd, LOOKUP, OP_FSCTL, nameiflags | AUDITVNPATH1,
8952 UIO_USERSPACE, uap->path, ctx);
8953 if ((error = namei(&nd))) goto done;
8954 vp = nd.ni_vp;
8955 nameidone(&nd);
8956
8957 #if CONFIG_MACF
8958 error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd);
8959 if (error) {
8960 goto done;
8961 }
8962 #endif
8963
8964 error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
8965
8966 done:
8967 if (vp)
8968 vnode_put(vp);
8969 return error;
8970 }
8971 /* ARGSUSED */
8972 int
8973 ffsctl (proc_t p, struct ffsctl_args *uap, __unused int32_t *retval)
8974 {
8975 int error;
8976 vnode_t vp = NULL;
8977 vfs_context_t ctx = vfs_context_current();
8978 int fd = -1;
8979
8980 AUDIT_ARG(fd, uap->fd);
8981 AUDIT_ARG(cmd, uap->cmd);
8982 AUDIT_ARG(value32, uap->options);
8983
8984 /* Get the vnode for the file we are getting info on: */
8985 if ((error = file_vnode(uap->fd, &vp)))
8986 goto done;
8987 fd = uap->fd;
8988 if ((error = vnode_getwithref(vp))) {
8989 goto done;
8990 }
8991
8992 #if CONFIG_MACF
8993 error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd);
8994 if (error) {
8995 goto done;
8996 }
8997 #endif
8998
8999 error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
9000
9001 done:
9002 if (fd != -1)
9003 file_drop(fd);
9004
9005 if (vp)
9006 vnode_put(vp);
9007 return error;
9008 }
9009 /* end of fsctl system call */
9010
9011 /*
9012 * An in-kernel sync for power management to call.
9013 */
9014 __private_extern__ int
9015 sync_internal(void)
9016 {
9017 int error;
9018
9019 struct sync_args data;
9020
9021 int retval[2];
9022
9023
9024 error = sync(current_proc(), &data, &retval[0]);
9025
9026
9027 return (error);
9028 } /* end of sync_internal call */
9029
9030
9031 /*
9032 * Retrieve the data of an extended attribute.
9033 */
9034 int
9035 getxattr(proc_t p, struct getxattr_args *uap, user_ssize_t *retval)
9036 {
9037 vnode_t vp;
9038 struct nameidata nd;
9039 char attrname[XATTR_MAXNAMELEN+1];
9040 vfs_context_t ctx = vfs_context_current();
9041 uio_t auio = NULL;
9042 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9043 size_t attrsize = 0;
9044 size_t namelen;
9045 u_int32_t nameiflags;
9046 int error;
9047 char uio_buf[ UIO_SIZEOF(1) ];
9048
9049 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
9050 return (EINVAL);
9051
9052 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
9053 NDINIT(&nd, LOOKUP, OP_GETXATTR, nameiflags, spacetype, uap->path, ctx);
9054 if ((error = namei(&nd))) {
9055 return (error);
9056 }
9057 vp = nd.ni_vp;
9058 nameidone(&nd);
9059
9060 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
9061 goto out;
9062 }
9063 if (xattr_protected(attrname)) {
9064 if (!vfs_context_issuser(ctx) || strcmp(attrname, "com.apple.system.Security") != 0) {
9065 error = EPERM;
9066 goto out;
9067 }
9068 }
9069 /*
9070 * the specific check for 0xffffffff is a hack to preserve
9071 * binaray compatibilty in K64 with applications that discovered
9072 * that passing in a buf pointer and a size of -1 resulted in
9073 * just the size of the indicated extended attribute being returned.
9074 * this isn't part of the documented behavior, but because of the
9075 * original implemtation's check for "uap->size > 0", this behavior
9076 * was allowed. In K32 that check turned into a signed comparison
9077 * even though uap->size is unsigned... in K64, we blow by that
9078 * check because uap->size is unsigned and doesn't get sign smeared
9079 * in the munger for a 32 bit user app. we also need to add a
9080 * check to limit the maximum size of the buffer being passed in...
9081 * unfortunately, the underlying fileystems seem to just malloc
9082 * the requested size even if the actual extended attribute is tiny.
9083 * because that malloc is for kernel wired memory, we have to put a
9084 * sane limit on it.
9085 *
9086 * U32 running on K64 will yield 0x00000000ffffffff for uap->size
9087 * U64 running on K64 will yield -1 (64 bits wide)
9088 * U32/U64 running on K32 will yield -1 (32 bits wide)
9089 */
9090 if (uap->size == 0xffffffff || uap->size == (size_t)-1)
9091 goto no_uio;
9092
9093 if (uap->value) {
9094 if (uap->size > (size_t)XATTR_MAXSIZE)
9095 uap->size = XATTR_MAXSIZE;
9096
9097 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
9098 &uio_buf[0], sizeof(uio_buf));
9099 uio_addiov(auio, uap->value, uap->size);
9100 }
9101 no_uio:
9102 error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, ctx);
9103 out:
9104 vnode_put(vp);
9105
9106 if (auio) {
9107 *retval = uap->size - uio_resid(auio);
9108 } else {
9109 *retval = (user_ssize_t)attrsize;
9110 }
9111
9112 return (error);
9113 }
9114
9115 /*
9116 * Retrieve the data of an extended attribute.
9117 */
9118 int
9119 fgetxattr(proc_t p, struct fgetxattr_args *uap, user_ssize_t *retval)
9120 {
9121 vnode_t vp;
9122 char attrname[XATTR_MAXNAMELEN+1];
9123 uio_t auio = NULL;
9124 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9125 size_t attrsize = 0;
9126 size_t namelen;
9127 int error;
9128 char uio_buf[ UIO_SIZEOF(1) ];
9129
9130 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
9131 return (EINVAL);
9132
9133 if ( (error = file_vnode(uap->fd, &vp)) ) {
9134 return (error);
9135 }
9136 if ( (error = vnode_getwithref(vp)) ) {
9137 file_drop(uap->fd);
9138 return(error);
9139 }
9140 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
9141 goto out;
9142 }
9143 if (xattr_protected(attrname)) {
9144 error = EPERM;
9145 goto out;
9146 }
9147 if (uap->value && uap->size > 0) {
9148 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
9149 &uio_buf[0], sizeof(uio_buf));
9150 uio_addiov(auio, uap->value, uap->size);
9151 }
9152
9153 error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, vfs_context_current());
9154 out:
9155 (void)vnode_put(vp);
9156 file_drop(uap->fd);
9157
9158 if (auio) {
9159 *retval = uap->size - uio_resid(auio);
9160 } else {
9161 *retval = (user_ssize_t)attrsize;
9162 }
9163 return (error);
9164 }
9165
9166 /*
9167 * Set the data of an extended attribute.
9168 */
9169 int
9170 setxattr(proc_t p, struct setxattr_args *uap, int *retval)
9171 {
9172 vnode_t vp;
9173 struct nameidata nd;
9174 char attrname[XATTR_MAXNAMELEN+1];
9175 vfs_context_t ctx = vfs_context_current();
9176 uio_t auio = NULL;
9177 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9178 size_t namelen;
9179 u_int32_t nameiflags;
9180 int error;
9181 char uio_buf[ UIO_SIZEOF(1) ];
9182
9183 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
9184 return (EINVAL);
9185
9186 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
9187 if (error == EPERM) {
9188 /* if the string won't fit in attrname, copyinstr emits EPERM */
9189 return (ENAMETOOLONG);
9190 }
9191 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
9192 return error;
9193 }
9194 if (xattr_protected(attrname))
9195 return(EPERM);
9196 if (uap->size != 0 && uap->value == 0) {
9197 return (EINVAL);
9198 }
9199
9200 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
9201 NDINIT(&nd, LOOKUP, OP_SETXATTR, nameiflags, spacetype, uap->path, ctx);
9202 if ((error = namei(&nd))) {
9203 return (error);
9204 }
9205 vp = nd.ni_vp;
9206 nameidone(&nd);
9207
9208 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
9209 &uio_buf[0], sizeof(uio_buf));
9210 uio_addiov(auio, uap->value, uap->size);
9211
9212 error = vn_setxattr(vp, attrname, auio, uap->options, ctx);
9213 #if CONFIG_FSE
9214 if (error == 0) {
9215 add_fsevent(FSE_XATTR_MODIFIED, ctx,
9216 FSE_ARG_VNODE, vp,
9217 FSE_ARG_DONE);
9218 }
9219 #endif
9220 vnode_put(vp);
9221 *retval = 0;
9222 return (error);
9223 }
9224
9225 /*
9226 * Set the data of an extended attribute.
9227 */
9228 int
9229 fsetxattr(proc_t p, struct fsetxattr_args *uap, int *retval)
9230 {
9231 vnode_t vp;
9232 char attrname[XATTR_MAXNAMELEN+1];
9233 uio_t auio = NULL;
9234 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9235 size_t namelen;
9236 int error;
9237 char uio_buf[ UIO_SIZEOF(1) ];
9238 #if CONFIG_FSE
9239 vfs_context_t ctx = vfs_context_current();
9240 #endif
9241
9242 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
9243 return (EINVAL);
9244
9245 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
9246 return (error);
9247 }
9248 if (xattr_protected(attrname))
9249 return(EPERM);
9250 if (uap->size != 0 && uap->value == 0) {
9251 return (EINVAL);
9252 }
9253 if ( (error = file_vnode(uap->fd, &vp)) ) {
9254 return (error);
9255 }
9256 if ( (error = vnode_getwithref(vp)) ) {
9257 file_drop(uap->fd);
9258 return(error);
9259 }
9260 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
9261 &uio_buf[0], sizeof(uio_buf));
9262 uio_addiov(auio, uap->value, uap->size);
9263
9264 error = vn_setxattr(vp, attrname, auio, uap->options, vfs_context_current());
9265 #if CONFIG_FSE
9266 if (error == 0) {
9267 add_fsevent(FSE_XATTR_MODIFIED, ctx,
9268 FSE_ARG_VNODE, vp,
9269 FSE_ARG_DONE);
9270 }
9271 #endif
9272 vnode_put(vp);
9273 file_drop(uap->fd);
9274 *retval = 0;
9275 return (error);
9276 }
9277
9278 /*
9279 * Remove an extended attribute.
9280 * XXX Code duplication here.
9281 */
9282 int
9283 removexattr(proc_t p, struct removexattr_args *uap, int *retval)
9284 {
9285 vnode_t vp;
9286 struct nameidata nd;
9287 char attrname[XATTR_MAXNAMELEN+1];
9288 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9289 vfs_context_t ctx = vfs_context_current();
9290 size_t namelen;
9291 u_int32_t nameiflags;
9292 int error;
9293
9294 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
9295 return (EINVAL);
9296
9297 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
9298 if (error != 0) {
9299 return (error);
9300 }
9301 if (xattr_protected(attrname))
9302 return(EPERM);
9303 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
9304 NDINIT(&nd, LOOKUP, OP_REMOVEXATTR, nameiflags, spacetype, uap->path, ctx);
9305 if ((error = namei(&nd))) {
9306 return (error);
9307 }
9308 vp = nd.ni_vp;
9309 nameidone(&nd);
9310
9311 error = vn_removexattr(vp, attrname, uap->options, ctx);
9312 #if CONFIG_FSE
9313 if (error == 0) {
9314 add_fsevent(FSE_XATTR_REMOVED, ctx,
9315 FSE_ARG_VNODE, vp,
9316 FSE_ARG_DONE);
9317 }
9318 #endif
9319 vnode_put(vp);
9320 *retval = 0;
9321 return (error);
9322 }
9323
9324 /*
9325 * Remove an extended attribute.
9326 * XXX Code duplication here.
9327 */
9328 int
9329 fremovexattr(__unused proc_t p, struct fremovexattr_args *uap, int *retval)
9330 {
9331 vnode_t vp;
9332 char attrname[XATTR_MAXNAMELEN+1];
9333 size_t namelen;
9334 int error;
9335 #if CONFIG_FSE
9336 vfs_context_t ctx = vfs_context_current();
9337 #endif
9338
9339 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
9340 return (EINVAL);
9341
9342 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
9343 if (error != 0) {
9344 return (error);
9345 }
9346 if (xattr_protected(attrname))
9347 return(EPERM);
9348 if ( (error = file_vnode(uap->fd, &vp)) ) {
9349 return (error);
9350 }
9351 if ( (error = vnode_getwithref(vp)) ) {
9352 file_drop(uap->fd);
9353 return(error);
9354 }
9355
9356 error = vn_removexattr(vp, attrname, uap->options, vfs_context_current());
9357 #if CONFIG_FSE
9358 if (error == 0) {
9359 add_fsevent(FSE_XATTR_REMOVED, ctx,
9360 FSE_ARG_VNODE, vp,
9361 FSE_ARG_DONE);
9362 }
9363 #endif
9364 vnode_put(vp);
9365 file_drop(uap->fd);
9366 *retval = 0;
9367 return (error);
9368 }
9369
9370 /*
9371 * Retrieve the list of extended attribute names.
9372 * XXX Code duplication here.
9373 */
9374 int
9375 listxattr(proc_t p, struct listxattr_args *uap, user_ssize_t *retval)
9376 {
9377 vnode_t vp;
9378 struct nameidata nd;
9379 vfs_context_t ctx = vfs_context_current();
9380 uio_t auio = NULL;
9381 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9382 size_t attrsize = 0;
9383 u_int32_t nameiflags;
9384 int error;
9385 char uio_buf[ UIO_SIZEOF(1) ];
9386
9387 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
9388 return (EINVAL);
9389
9390 nameiflags = ((uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW) | NOTRIGGER;
9391 NDINIT(&nd, LOOKUP, OP_LISTXATTR, nameiflags, spacetype, uap->path, ctx);
9392 if ((error = namei(&nd))) {
9393 return (error);
9394 }
9395 vp = nd.ni_vp;
9396 nameidone(&nd);
9397 if (uap->namebuf != 0 && uap->bufsize > 0) {
9398 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
9399 &uio_buf[0], sizeof(uio_buf));
9400 uio_addiov(auio, uap->namebuf, uap->bufsize);
9401 }
9402
9403 error = vn_listxattr(vp, auio, &attrsize, uap->options, ctx);
9404
9405 vnode_put(vp);
9406 if (auio) {
9407 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
9408 } else {
9409 *retval = (user_ssize_t)attrsize;
9410 }
9411 return (error);
9412 }
9413
9414 /*
9415 * Retrieve the list of extended attribute names.
9416 * XXX Code duplication here.
9417 */
9418 int
9419 flistxattr(proc_t p, struct flistxattr_args *uap, user_ssize_t *retval)
9420 {
9421 vnode_t vp;
9422 uio_t auio = NULL;
9423 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9424 size_t attrsize = 0;
9425 int error;
9426 char uio_buf[ UIO_SIZEOF(1) ];
9427
9428 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
9429 return (EINVAL);
9430
9431 if ( (error = file_vnode(uap->fd, &vp)) ) {
9432 return (error);
9433 }
9434 if ( (error = vnode_getwithref(vp)) ) {
9435 file_drop(uap->fd);
9436 return(error);
9437 }
9438 if (uap->namebuf != 0 && uap->bufsize > 0) {
9439 auio = uio_createwithbuffer(1, 0, spacetype,
9440 UIO_READ, &uio_buf[0], sizeof(uio_buf));
9441 uio_addiov(auio, uap->namebuf, uap->bufsize);
9442 }
9443
9444 error = vn_listxattr(vp, auio, &attrsize, uap->options, vfs_context_current());
9445
9446 vnode_put(vp);
9447 file_drop(uap->fd);
9448 if (auio) {
9449 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
9450 } else {
9451 *retval = (user_ssize_t)attrsize;
9452 }
9453 return (error);
9454 }
9455
9456 /*
9457 * Obtain the full pathname of a file system object by id.
9458 *
9459 * This is a private SPI used by the File Manager.
9460 */
9461 __private_extern__
9462 int
9463 fsgetpath(__unused proc_t p, struct fsgetpath_args *uap, user_ssize_t *retval)
9464 {
9465 vnode_t vp;
9466 struct mount *mp = NULL;
9467 vfs_context_t ctx = vfs_context_current();
9468 fsid_t fsid;
9469 char *realpath;
9470 int bpflags;
9471 int length;
9472 int error;
9473
9474 if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
9475 return (error);
9476 }
9477 AUDIT_ARG(value32, fsid.val[0]);
9478 AUDIT_ARG(value64, uap->objid);
9479 /* Restrict output buffer size for now. */
9480 if (uap->bufsize > PAGE_SIZE) {
9481 return (EINVAL);
9482 }
9483 MALLOC(realpath, char *, uap->bufsize, M_TEMP, M_WAITOK);
9484 if (realpath == NULL) {
9485 return (ENOMEM);
9486 }
9487 /* Find the target mountpoint. */
9488 if ((mp = mount_lookupby_volfsid(fsid.val[0], 1)) == NULL) {
9489 error = ENOTSUP; /* unexpected failure */
9490 goto out;
9491 }
9492 unionget:
9493 /* Find the target vnode. */
9494 if (uap->objid == 2) {
9495 error = VFS_ROOT(mp, &vp, ctx);
9496 } else {
9497 error = VFS_VGET(mp, (ino64_t)uap->objid, &vp, ctx);
9498 }
9499
9500 if (error == ENOENT && (mp->mnt_flag & MNT_UNION)) {
9501 /*
9502 * If the fileid isn't found and we're in a union
9503 * mount volume, then see if the fileid is in the
9504 * mounted-on volume.
9505 */
9506 struct mount *tmp = mp;
9507 mp = vnode_mount(tmp->mnt_vnodecovered);
9508 vfs_unbusy(tmp);
9509 if (vfs_busy(mp, LK_NOWAIT) == 0)
9510 goto unionget;
9511 } else
9512 vfs_unbusy(mp);
9513
9514 if (error) {
9515 goto out;
9516 }
9517 #if CONFIG_MACF
9518 error = mac_vnode_check_fsgetpath(ctx, vp);
9519 if (error) {
9520 vnode_put(vp);
9521 goto out;
9522 }
9523 #endif
9524 /* Obtain the absolute path to this vnode. */
9525 bpflags = vfs_context_suser(ctx) ? BUILDPATH_CHECKACCESS : 0;
9526 bpflags |= BUILDPATH_CHECK_MOVED;
9527 error = build_path(vp, realpath, uap->bufsize, &length, bpflags, ctx);
9528 vnode_put(vp);
9529 if (error) {
9530 goto out;
9531 }
9532 AUDIT_ARG(text, realpath);
9533
9534 if (kdebug_enable) {
9535 long dbg_parms[NUMPARMS];
9536 int dbg_namelen;
9537
9538 dbg_namelen = (int)sizeof(dbg_parms);
9539
9540 if (length < dbg_namelen) {
9541 memcpy((char *)dbg_parms, realpath, length);
9542 memset((char *)dbg_parms + length, 0, dbg_namelen - length);
9543
9544 dbg_namelen = length;
9545 } else
9546 memcpy((char *)dbg_parms, realpath + (length - dbg_namelen), dbg_namelen);
9547
9548 kdebug_lookup_gen_events(dbg_parms, dbg_namelen, (void *)vp, TRUE);
9549 }
9550 error = copyout((caddr_t)realpath, uap->buf, length);
9551
9552 *retval = (user_ssize_t)length; /* may be superseded by error */
9553 out:
9554 if (realpath) {
9555 FREE(realpath, M_TEMP);
9556 }
9557 return (error);
9558 }
9559
9560 /*
9561 * Common routine to handle various flavors of statfs data heading out
9562 * to user space.
9563 *
9564 * Returns: 0 Success
9565 * EFAULT
9566 */
9567 static int
9568 munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
9569 user_addr_t bufp, int *sizep, boolean_t is_64_bit,
9570 boolean_t partial_copy)
9571 {
9572 int error;
9573 int my_size, copy_size;
9574
9575 if (is_64_bit) {
9576 struct user64_statfs sfs;
9577 my_size = copy_size = sizeof(sfs);
9578 bzero(&sfs, my_size);
9579 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
9580 sfs.f_type = mp->mnt_vtable->vfc_typenum;
9581 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
9582 sfs.f_bsize = (user64_long_t)sfsp->f_bsize;
9583 sfs.f_iosize = (user64_long_t)sfsp->f_iosize;
9584 sfs.f_blocks = (user64_long_t)sfsp->f_blocks;
9585 sfs.f_bfree = (user64_long_t)sfsp->f_bfree;
9586 sfs.f_bavail = (user64_long_t)sfsp->f_bavail;
9587 sfs.f_files = (user64_long_t)sfsp->f_files;
9588 sfs.f_ffree = (user64_long_t)sfsp->f_ffree;
9589 sfs.f_fsid = sfsp->f_fsid;
9590 sfs.f_owner = sfsp->f_owner;
9591 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
9592 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN);
9593 } else {
9594 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
9595 }
9596 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
9597 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
9598
9599 if (partial_copy) {
9600 copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
9601 }
9602 error = copyout((caddr_t)&sfs, bufp, copy_size);
9603 }
9604 else {
9605 struct user32_statfs sfs;
9606
9607 my_size = copy_size = sizeof(sfs);
9608 bzero(&sfs, my_size);
9609
9610 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
9611 sfs.f_type = mp->mnt_vtable->vfc_typenum;
9612 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
9613
9614 /*
9615 * It's possible for there to be more than 2^^31 blocks in the filesystem, so we
9616 * have to fudge the numbers here in that case. We inflate the blocksize in order
9617 * to reflect the filesystem size as best we can.
9618 */
9619 if ((sfsp->f_blocks > INT_MAX)
9620 /* Hack for 4061702 . I think the real fix is for Carbon to
9621 * look for some volume capability and not depend on hidden
9622 * semantics agreed between a FS and carbon.
9623 * f_blocks, f_bfree, and f_bavail set to -1 is the trigger
9624 * for Carbon to set bNoVolumeSizes volume attribute.
9625 * Without this the webdavfs files cannot be copied onto
9626 * disk as they look huge. This change should not affect
9627 * XSAN as they should not setting these to -1..
9628 */
9629 && (sfsp->f_blocks != 0xffffffffffffffffULL)
9630 && (sfsp->f_bfree != 0xffffffffffffffffULL)
9631 && (sfsp->f_bavail != 0xffffffffffffffffULL)) {
9632 int shift;
9633
9634 /*
9635 * Work out how far we have to shift the block count down to make it fit.
9636 * Note that it's possible to have to shift so far that the resulting
9637 * blocksize would be unreportably large. At that point, we will clip
9638 * any values that don't fit.
9639 *
9640 * For safety's sake, we also ensure that f_iosize is never reported as
9641 * being smaller than f_bsize.
9642 */
9643 for (shift = 0; shift < 32; shift++) {
9644 if ((sfsp->f_blocks >> shift) <= INT_MAX)
9645 break;
9646 if ((sfsp->f_bsize << (shift + 1)) > INT_MAX)
9647 break;
9648 }
9649 #define __SHIFT_OR_CLIP(x, s) ((((x) >> (s)) > INT_MAX) ? INT_MAX : ((x) >> (s)))
9650 sfs.f_blocks = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_blocks, shift);
9651 sfs.f_bfree = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bfree, shift);
9652 sfs.f_bavail = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bavail, shift);
9653 #undef __SHIFT_OR_CLIP
9654 sfs.f_bsize = (user32_long_t)(sfsp->f_bsize << shift);
9655 sfs.f_iosize = lmax(sfsp->f_iosize, sfsp->f_bsize);
9656 } else {
9657 /* filesystem is small enough to be reported honestly */
9658 sfs.f_bsize = (user32_long_t)sfsp->f_bsize;
9659 sfs.f_iosize = (user32_long_t)sfsp->f_iosize;
9660 sfs.f_blocks = (user32_long_t)sfsp->f_blocks;
9661 sfs.f_bfree = (user32_long_t)sfsp->f_bfree;
9662 sfs.f_bavail = (user32_long_t)sfsp->f_bavail;
9663 }
9664 sfs.f_files = (user32_long_t)sfsp->f_files;
9665 sfs.f_ffree = (user32_long_t)sfsp->f_ffree;
9666 sfs.f_fsid = sfsp->f_fsid;
9667 sfs.f_owner = sfsp->f_owner;
9668 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
9669 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN);
9670 } else {
9671 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
9672 }
9673 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
9674 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
9675
9676 if (partial_copy) {
9677 copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
9678 }
9679 error = copyout((caddr_t)&sfs, bufp, copy_size);
9680 }
9681
9682 if (sizep != NULL) {
9683 *sizep = my_size;
9684 }
9685 return(error);
9686 }
9687
9688 /*
9689 * copy stat structure into user_stat structure.
9690 */
9691 void munge_user64_stat(struct stat *sbp, struct user64_stat *usbp)
9692 {
9693 bzero(usbp, sizeof(*usbp));
9694
9695 usbp->st_dev = sbp->st_dev;
9696 usbp->st_ino = sbp->st_ino;
9697 usbp->st_mode = sbp->st_mode;
9698 usbp->st_nlink = sbp->st_nlink;
9699 usbp->st_uid = sbp->st_uid;
9700 usbp->st_gid = sbp->st_gid;
9701 usbp->st_rdev = sbp->st_rdev;
9702 #ifndef _POSIX_C_SOURCE
9703 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
9704 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
9705 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
9706 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
9707 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
9708 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
9709 #else
9710 usbp->st_atime = sbp->st_atime;
9711 usbp->st_atimensec = sbp->st_atimensec;
9712 usbp->st_mtime = sbp->st_mtime;
9713 usbp->st_mtimensec = sbp->st_mtimensec;
9714 usbp->st_ctime = sbp->st_ctime;
9715 usbp->st_ctimensec = sbp->st_ctimensec;
9716 #endif
9717 usbp->st_size = sbp->st_size;
9718 usbp->st_blocks = sbp->st_blocks;
9719 usbp->st_blksize = sbp->st_blksize;
9720 usbp->st_flags = sbp->st_flags;
9721 usbp->st_gen = sbp->st_gen;
9722 usbp->st_lspare = sbp->st_lspare;
9723 usbp->st_qspare[0] = sbp->st_qspare[0];
9724 usbp->st_qspare[1] = sbp->st_qspare[1];
9725 }
9726
9727 void munge_user32_stat(struct stat *sbp, struct user32_stat *usbp)
9728 {
9729 bzero(usbp, sizeof(*usbp));
9730
9731 usbp->st_dev = sbp->st_dev;
9732 usbp->st_ino = sbp->st_ino;
9733 usbp->st_mode = sbp->st_mode;
9734 usbp->st_nlink = sbp->st_nlink;
9735 usbp->st_uid = sbp->st_uid;
9736 usbp->st_gid = sbp->st_gid;
9737 usbp->st_rdev = sbp->st_rdev;
9738 #ifndef _POSIX_C_SOURCE
9739 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
9740 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
9741 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
9742 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
9743 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
9744 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
9745 #else
9746 usbp->st_atime = sbp->st_atime;
9747 usbp->st_atimensec = sbp->st_atimensec;
9748 usbp->st_mtime = sbp->st_mtime;
9749 usbp->st_mtimensec = sbp->st_mtimensec;
9750 usbp->st_ctime = sbp->st_ctime;
9751 usbp->st_ctimensec = sbp->st_ctimensec;
9752 #endif
9753 usbp->st_size = sbp->st_size;
9754 usbp->st_blocks = sbp->st_blocks;
9755 usbp->st_blksize = sbp->st_blksize;
9756 usbp->st_flags = sbp->st_flags;
9757 usbp->st_gen = sbp->st_gen;
9758 usbp->st_lspare = sbp->st_lspare;
9759 usbp->st_qspare[0] = sbp->st_qspare[0];
9760 usbp->st_qspare[1] = sbp->st_qspare[1];
9761 }
9762
9763 /*
9764 * copy stat64 structure into user_stat64 structure.
9765 */
9766 void munge_user64_stat64(struct stat64 *sbp, struct user64_stat64 *usbp)
9767 {
9768 bzero(usbp, sizeof(*usbp));
9769
9770 usbp->st_dev = sbp->st_dev;
9771 usbp->st_ino = sbp->st_ino;
9772 usbp->st_mode = sbp->st_mode;
9773 usbp->st_nlink = sbp->st_nlink;
9774 usbp->st_uid = sbp->st_uid;
9775 usbp->st_gid = sbp->st_gid;
9776 usbp->st_rdev = sbp->st_rdev;
9777 #ifndef _POSIX_C_SOURCE
9778 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
9779 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
9780 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
9781 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
9782 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
9783 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
9784 usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
9785 usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
9786 #else
9787 usbp->st_atime = sbp->st_atime;
9788 usbp->st_atimensec = sbp->st_atimensec;
9789 usbp->st_mtime = sbp->st_mtime;
9790 usbp->st_mtimensec = sbp->st_mtimensec;
9791 usbp->st_ctime = sbp->st_ctime;
9792 usbp->st_ctimensec = sbp->st_ctimensec;
9793 usbp->st_birthtime = sbp->st_birthtime;
9794 usbp->st_birthtimensec = sbp->st_birthtimensec;
9795 #endif
9796 usbp->st_size = sbp->st_size;
9797 usbp->st_blocks = sbp->st_blocks;
9798 usbp->st_blksize = sbp->st_blksize;
9799 usbp->st_flags = sbp->st_flags;
9800 usbp->st_gen = sbp->st_gen;
9801 usbp->st_lspare = sbp->st_lspare;
9802 usbp->st_qspare[0] = sbp->st_qspare[0];
9803 usbp->st_qspare[1] = sbp->st_qspare[1];
9804 }
9805
9806 void munge_user32_stat64(struct stat64 *sbp, struct user32_stat64 *usbp)
9807 {
9808 bzero(usbp, sizeof(*usbp));
9809
9810 usbp->st_dev = sbp->st_dev;
9811 usbp->st_ino = sbp->st_ino;
9812 usbp->st_mode = sbp->st_mode;
9813 usbp->st_nlink = sbp->st_nlink;
9814 usbp->st_uid = sbp->st_uid;
9815 usbp->st_gid = sbp->st_gid;
9816 usbp->st_rdev = sbp->st_rdev;
9817 #ifndef _POSIX_C_SOURCE
9818 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
9819 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
9820 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
9821 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
9822 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
9823 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
9824 usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
9825 usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
9826 #else
9827 usbp->st_atime = sbp->st_atime;
9828 usbp->st_atimensec = sbp->st_atimensec;
9829 usbp->st_mtime = sbp->st_mtime;
9830 usbp->st_mtimensec = sbp->st_mtimensec;
9831 usbp->st_ctime = sbp->st_ctime;
9832 usbp->st_ctimensec = sbp->st_ctimensec;
9833 usbp->st_birthtime = sbp->st_birthtime;
9834 usbp->st_birthtimensec = sbp->st_birthtimensec;
9835 #endif
9836 usbp->st_size = sbp->st_size;
9837 usbp->st_blocks = sbp->st_blocks;
9838 usbp->st_blksize = sbp->st_blksize;
9839 usbp->st_flags = sbp->st_flags;
9840 usbp->st_gen = sbp->st_gen;
9841 usbp->st_lspare = sbp->st_lspare;
9842 usbp->st_qspare[0] = sbp->st_qspare[0];
9843 usbp->st_qspare[1] = sbp->st_qspare[1];
9844 }
9845
9846 /*
9847 * Purge buffer cache for simulating cold starts
9848 */
9849 static int vnode_purge_callback(struct vnode *vp, __unused void *cargs)
9850 {
9851 ubc_msync(vp, (off_t)0, ubc_getsize(vp), NULL /* off_t *resid_off */, UBC_PUSHALL | UBC_INVALIDATE);
9852
9853 return VNODE_RETURNED;
9854 }
9855
9856 static int vfs_purge_callback(mount_t mp, __unused void * arg)
9857 {
9858 vnode_iterate(mp, VNODE_WAIT | VNODE_ITERATE_ALL, vnode_purge_callback, NULL);
9859
9860 return VFS_RETURNED;
9861 }
9862
9863 int
9864 vfs_purge(__unused struct proc *p, __unused struct vfs_purge_args *uap, __unused int32_t *retval)
9865 {
9866 if (!kauth_cred_issuser(kauth_cred_get()))
9867 return EPERM;
9868
9869 vfs_iterate(0/* flags */, vfs_purge_callback, NULL);
9870
9871 return 0;
9872 }
9873