]> git.saurik.com Git - apple/xnu.git/blob - bsd/vfs/vfs_syscalls.c
a949a717d8753b67a5fdf51f7c589e42883feeec
[apple/xnu.git] / bsd / vfs / vfs_syscalls.c
1 /*
2 * Copyright (c) 1995-2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1989, 1993
30 * The Regents of the University of California. All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)vfs_syscalls.c 8.41 (Berkeley) 6/15/95
66 */
67 /*
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
71 * Version 2.0.
72 */
73
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/namei.h>
77 #include <sys/filedesc.h>
78 #include <sys/kernel.h>
79 #include <sys/file_internal.h>
80 #include <sys/stat.h>
81 #include <sys/vnode_internal.h>
82 #include <sys/mount_internal.h>
83 #include <sys/proc_internal.h>
84 #include <sys/kauth.h>
85 #include <sys/uio_internal.h>
86 #include <sys/malloc.h>
87 #include <sys/mman.h>
88 #include <sys/dirent.h>
89 #include <sys/attr.h>
90 #include <sys/sysctl.h>
91 #include <sys/ubc.h>
92 #include <sys/quota.h>
93 #include <sys/kdebug.h>
94 #include <sys/fsevents.h>
95 #include <sys/imgsrc.h>
96 #include <sys/sysproto.h>
97 #include <sys/xattr.h>
98 #include <sys/fcntl.h>
99 #include <sys/fsctl.h>
100 #include <sys/ubc_internal.h>
101 #include <sys/disk.h>
102 #include <sys/content_protection.h>
103 #include <machine/cons.h>
104 #include <machine/limits.h>
105 #include <miscfs/specfs/specdev.h>
106
107 #include <security/audit/audit.h>
108 #include <bsm/audit_kevents.h>
109
110 #include <mach/mach_types.h>
111 #include <kern/kern_types.h>
112 #include <kern/kalloc.h>
113 #include <kern/task.h>
114
115 #include <vm/vm_pageout.h>
116
117 #include <libkern/OSAtomic.h>
118 #include <pexpert/pexpert.h>
119 #include <IOKit/IOBSD.h>
120
121 #if CONFIG_MACF
122 #include <security/mac.h>
123 #include <security/mac_framework.h>
124 #endif
125
126 #if CONFIG_FSE
127 #define GET_PATH(x) \
128 (x) = get_pathbuff();
129 #define RELEASE_PATH(x) \
130 release_pathbuff(x);
131 #else
132 #define GET_PATH(x) \
133 MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
134 #define RELEASE_PATH(x) \
135 FREE_ZONE((x), MAXPATHLEN, M_NAMEI);
136 #endif /* CONFIG_FSE */
137
138 /* struct for checkdirs iteration */
139 struct cdirargs {
140 vnode_t olddp;
141 vnode_t newdp;
142 };
143 /* callback for checkdirs iteration */
144 static int checkdirs_callback(proc_t p, void * arg);
145
146 static int change_dir(struct nameidata *ndp, vfs_context_t ctx);
147 static int checkdirs(vnode_t olddp, vfs_context_t ctx);
148 void enablequotas(struct mount *mp, vfs_context_t ctx);
149 static int getfsstat_callback(mount_t mp, void * arg);
150 static int getutimes(user_addr_t usrtvp, struct timespec *tsp);
151 static int setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts, int nullflag);
152 static int sync_callback(mount_t, void *);
153 static void sync_thread(void *, __unused wait_result_t);
154 static int sync_async(int);
155 static int munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
156 user_addr_t bufp, int *sizep, boolean_t is_64_bit,
157 boolean_t partial_copy);
158 static int statfs64_common(struct mount *mp, struct vfsstatfs *sfsp,
159 user_addr_t bufp);
160 static int fsync_common(proc_t p, struct fsync_args *uap, int flags);
161 static int mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
162 struct componentname *cnp, user_addr_t fsmountargs,
163 int flags, uint32_t internal_flags, char *labelstr, boolean_t kernelmount,
164 vfs_context_t ctx);
165 void vfs_notify_mount(vnode_t pdvp);
166
167 int prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth);
168
169 struct fd_vn_data * fg_vn_data_alloc(void);
170
171 /*
172 * Max retries for ENOENT returns from vn_authorize_{rmdir, unlink, rename}
173 * Concurrent lookups (or lookups by ids) on hard links can cause the
174 * vn_getpath (which does not re-enter the filesystem as vn_getpath_fsenter
175 * does) to return ENOENT as the path cannot be returned from the name cache
176 * alone. We have no option but to retry and hope to get one namei->reverse path
177 * generation done without an intervening lookup, lookup by id on the hard link
178 * item. This is only an issue for MAC hooks which cannot reenter the filesystem
179 * which currently are the MAC hooks for rename, unlink and rmdir.
180 */
181 #define MAX_AUTHORIZE_ENOENT_RETRIES 1024
182
183 static int rmdirat_internal(vfs_context_t, int, user_addr_t, enum uio_seg);
184
185 static int fsgetpath_internal(vfs_context_t, int, uint64_t, vm_size_t, caddr_t, int *);
186
187 #ifdef CONFIG_IMGSRC_ACCESS
188 static int authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx);
189 static int place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx);
190 static void undo_place_on_covered_vp(mount_t mp, vnode_t vp);
191 static int mount_begin_update(mount_t mp, vfs_context_t ctx, int flags);
192 static void mount_end_update(mount_t mp);
193 static int relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp, const char *fsname, vfs_context_t ctx, boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index);
194 #endif /* CONFIG_IMGSRC_ACCESS */
195
196 int (*union_dircheckp)(struct vnode **, struct fileproc *, vfs_context_t);
197
198 __private_extern__
199 int sync_internal(void);
200
201 __private_extern__
202 int unlink1(vfs_context_t, vnode_t, user_addr_t, enum uio_seg, int);
203
204 extern lck_grp_t *fd_vn_lck_grp;
205 extern lck_grp_attr_t *fd_vn_lck_grp_attr;
206 extern lck_attr_t *fd_vn_lck_attr;
207
208 /*
209 * incremented each time a mount or unmount operation occurs
210 * used to invalidate the cached value of the rootvp in the
211 * mount structure utilized by cache_lookup_path
212 */
213 uint32_t mount_generation = 0;
214
215 /* counts number of mount and unmount operations */
216 unsigned int vfs_nummntops=0;
217
218 extern const struct fileops vnops;
219 #if CONFIG_APPLEDOUBLE
220 extern errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *);
221 #endif /* CONFIG_APPLEDOUBLE */
222
223 typedef uint32_t vfs_rename_flags_t;
224 #if CONFIG_SECLUDED_RENAME
225 enum {
226 VFS_SECLUDE_RENAME = 0x00000001
227 };
228 #endif
229
230 /*
231 * Virtual File System System Calls
232 */
233
234 #if NFSCLIENT || DEVFS
235 /*
236 * Private in-kernel mounting spi (NFS only, not exported)
237 */
238 __private_extern__
239 boolean_t
240 vfs_iskernelmount(mount_t mp)
241 {
242 return ((mp->mnt_kern_flag & MNTK_KERNEL_MOUNT) ? TRUE : FALSE);
243 }
244
245 __private_extern__
246 int
247 kernel_mount(char *fstype, vnode_t pvp, vnode_t vp, const char *path,
248 void *data, __unused size_t datalen, int syscall_flags, __unused uint32_t kern_flags, vfs_context_t ctx)
249 {
250 struct nameidata nd;
251 boolean_t did_namei;
252 int error;
253
254 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
255 UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx);
256
257 /*
258 * Get the vnode to be covered if it's not supplied
259 */
260 if (vp == NULLVP) {
261 error = namei(&nd);
262 if (error)
263 return (error);
264 vp = nd.ni_vp;
265 pvp = nd.ni_dvp;
266 did_namei = TRUE;
267 } else {
268 char *pnbuf = CAST_DOWN(char *, path);
269
270 nd.ni_cnd.cn_pnbuf = pnbuf;
271 nd.ni_cnd.cn_pnlen = strlen(pnbuf) + 1;
272 did_namei = FALSE;
273 }
274
275 error = mount_common(fstype, pvp, vp, &nd.ni_cnd, CAST_USER_ADDR_T(data),
276 syscall_flags, kern_flags, NULL, TRUE, ctx);
277
278 if (did_namei) {
279 vnode_put(vp);
280 vnode_put(pvp);
281 nameidone(&nd);
282 }
283
284 return (error);
285 }
286 #endif /* NFSCLIENT || DEVFS */
287
288 /*
289 * Mount a file system.
290 */
291 /* ARGSUSED */
292 int
293 mount(proc_t p, struct mount_args *uap, __unused int32_t *retval)
294 {
295 struct __mac_mount_args muap;
296
297 muap.type = uap->type;
298 muap.path = uap->path;
299 muap.flags = uap->flags;
300 muap.data = uap->data;
301 muap.mac_p = USER_ADDR_NULL;
302 return (__mac_mount(p, &muap, retval));
303 }
304
305 void
306 vfs_notify_mount(vnode_t pdvp)
307 {
308 vfs_event_signal(NULL, VQ_MOUNT, (intptr_t)NULL);
309 lock_vnode_and_post(pdvp, NOTE_WRITE);
310 }
311
312 /*
313 * __mac_mount:
314 * Mount a file system taking into account MAC label behavior.
315 * See mount(2) man page for more information
316 *
317 * Parameters: p Process requesting the mount
318 * uap User argument descriptor (see below)
319 * retval (ignored)
320 *
321 * Indirect: uap->type Filesystem type
322 * uap->path Path to mount
323 * uap->data Mount arguments
324 * uap->mac_p MAC info
325 * uap->flags Mount flags
326 *
327 *
328 * Returns: 0 Success
329 * !0 Not success
330 */
331 boolean_t root_fs_upgrade_try = FALSE;
332
333 int
334 __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int32_t *retval)
335 {
336 vnode_t pvp = NULL;
337 vnode_t vp = NULL;
338 int need_nameidone = 0;
339 vfs_context_t ctx = vfs_context_current();
340 char fstypename[MFSNAMELEN];
341 struct nameidata nd;
342 size_t dummy=0;
343 char *labelstr = NULL;
344 int flags = uap->flags;
345 int error;
346 #if CONFIG_IMGSRC_ACCESS || CONFIG_MACF
347 boolean_t is_64bit = IS_64BIT_PROCESS(p);
348 #else
349 #pragma unused(p)
350 #endif
351 /*
352 * Get the fs type name from user space
353 */
354 error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy);
355 if (error)
356 return (error);
357
358 /*
359 * Get the vnode to be covered
360 */
361 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
362 UIO_USERSPACE, uap->path, ctx);
363 error = namei(&nd);
364 if (error) {
365 goto out;
366 }
367 need_nameidone = 1;
368 vp = nd.ni_vp;
369 pvp = nd.ni_dvp;
370
371 #ifdef CONFIG_IMGSRC_ACCESS
372 /* Mounting image source cannot be batched with other operations */
373 if (flags == MNT_IMGSRC_BY_INDEX) {
374 error = relocate_imageboot_source(pvp, vp, &nd.ni_cnd, fstypename,
375 ctx, is_64bit, uap->data, (flags == MNT_IMGSRC_BY_INDEX));
376 goto out;
377 }
378 #endif /* CONFIG_IMGSRC_ACCESS */
379
380 #if CONFIG_MACF
381 /*
382 * Get the label string (if any) from user space
383 */
384 if (uap->mac_p != USER_ADDR_NULL) {
385 struct user_mac mac;
386 size_t ulen = 0;
387
388 if (is_64bit) {
389 struct user64_mac mac64;
390 error = copyin(uap->mac_p, &mac64, sizeof(mac64));
391 mac.m_buflen = mac64.m_buflen;
392 mac.m_string = mac64.m_string;
393 } else {
394 struct user32_mac mac32;
395 error = copyin(uap->mac_p, &mac32, sizeof(mac32));
396 mac.m_buflen = mac32.m_buflen;
397 mac.m_string = mac32.m_string;
398 }
399 if (error)
400 goto out;
401 if ((mac.m_buflen > MAC_MAX_LABEL_BUF_LEN) ||
402 (mac.m_buflen < 2)) {
403 error = EINVAL;
404 goto out;
405 }
406 MALLOC(labelstr, char *, mac.m_buflen, M_MACTEMP, M_WAITOK);
407 error = copyinstr(mac.m_string, labelstr, mac.m_buflen, &ulen);
408 if (error) {
409 goto out;
410 }
411 AUDIT_ARG(mac_string, labelstr);
412 }
413 #endif /* CONFIG_MACF */
414
415 AUDIT_ARG(fflags, flags);
416
417 if ((vp->v_flag & VROOT) &&
418 (vp->v_mount->mnt_flag & MNT_ROOTFS)) {
419 if (!(flags & MNT_UNION)) {
420 flags |= MNT_UPDATE;
421 }
422 else {
423 /*
424 * For a union mount on '/', treat it as fresh
425 * mount instead of update.
426 * Otherwise, union mouting on '/' used to panic the
427 * system before, since mnt_vnodecovered was found to
428 * be NULL for '/' which is required for unionlookup
429 * after it gets ENOENT on union mount.
430 */
431 flags = (flags & ~(MNT_UPDATE));
432 }
433
434 #ifdef SECURE_KERNEL
435 if ((flags & MNT_RDONLY) == 0) {
436 /* Release kernels are not allowed to mount "/" as rw */
437 error = EPERM;
438 goto out;
439 }
440 #endif
441 /*
442 * See 7392553 for more details on why this check exists.
443 * Suffice to say: If this check is ON and something tries
444 * to mount the rootFS RW, we'll turn off the codesign
445 * bitmap optimization.
446 */
447 #if CHECK_CS_VALIDATION_BITMAP
448 if ((flags & MNT_RDONLY) == 0 ) {
449 root_fs_upgrade_try = TRUE;
450 }
451 #endif
452 }
453
454 error = mount_common(fstypename, pvp, vp, &nd.ni_cnd, uap->data, flags, 0,
455 labelstr, FALSE, ctx);
456
457 out:
458
459 #if CONFIG_MACF
460 if (labelstr)
461 FREE(labelstr, M_MACTEMP);
462 #endif /* CONFIG_MACF */
463
464 if (vp) {
465 vnode_put(vp);
466 }
467 if (pvp) {
468 vnode_put(pvp);
469 }
470 if (need_nameidone) {
471 nameidone(&nd);
472 }
473
474 return (error);
475 }
476
477 /*
478 * common mount implementation (final stage of mounting)
479
480 * Arguments:
481 * fstypename file system type (ie it's vfs name)
482 * pvp parent of covered vnode
483 * vp covered vnode
484 * cnp component name (ie path) of covered vnode
485 * flags generic mount flags
486 * fsmountargs file system specific data
487 * labelstr optional MAC label
488 * kernelmount TRUE for mounts initiated from inside the kernel
489 * ctx caller's context
490 */
491 static int
492 mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
493 struct componentname *cnp, user_addr_t fsmountargs, int flags, uint32_t internal_flags,
494 char *labelstr, boolean_t kernelmount, vfs_context_t ctx)
495 {
496 #if !CONFIG_MACF
497 #pragma unused(labelstr)
498 #endif
499 struct vnode *devvp = NULLVP;
500 struct vnode *device_vnode = NULLVP;
501 #if CONFIG_MACF
502 struct vnode *rvp;
503 #endif
504 struct mount *mp;
505 struct vfstable *vfsp = (struct vfstable *)0;
506 struct proc *p = vfs_context_proc(ctx);
507 int error, flag = 0;
508 user_addr_t devpath = USER_ADDR_NULL;
509 int ronly = 0;
510 int mntalloc = 0;
511 boolean_t vfsp_ref = FALSE;
512 boolean_t is_rwlock_locked = FALSE;
513 boolean_t did_rele = FALSE;
514 boolean_t have_usecount = FALSE;
515
516 /*
517 * Process an update for an existing mount
518 */
519 if (flags & MNT_UPDATE) {
520 if ((vp->v_flag & VROOT) == 0) {
521 error = EINVAL;
522 goto out1;
523 }
524 mp = vp->v_mount;
525
526 /* unmount in progress return error */
527 mount_lock_spin(mp);
528 if (mp->mnt_lflag & MNT_LUNMOUNT) {
529 mount_unlock(mp);
530 error = EBUSY;
531 goto out1;
532 }
533 mount_unlock(mp);
534 lck_rw_lock_exclusive(&mp->mnt_rwlock);
535 is_rwlock_locked = TRUE;
536 /*
537 * We only allow the filesystem to be reloaded if it
538 * is currently mounted read-only.
539 */
540 if ((flags & MNT_RELOAD) &&
541 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
542 error = ENOTSUP;
543 goto out1;
544 }
545
546 /*
547 * If content protection is enabled, update mounts are not
548 * allowed to turn it off.
549 */
550 if ((mp->mnt_flag & MNT_CPROTECT) &&
551 ((flags & MNT_CPROTECT) == 0)) {
552 error = EINVAL;
553 goto out1;
554 }
555
556 #ifdef CONFIG_IMGSRC_ACCESS
557 /* Can't downgrade the backer of the root FS */
558 if ((mp->mnt_kern_flag & MNTK_BACKS_ROOT) &&
559 (!vfs_isrdonly(mp)) && (flags & MNT_RDONLY)) {
560 error = ENOTSUP;
561 goto out1;
562 }
563 #endif /* CONFIG_IMGSRC_ACCESS */
564
565 /*
566 * Only root, or the user that did the original mount is
567 * permitted to update it.
568 */
569 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
570 (error = suser(vfs_context_ucred(ctx), &p->p_acflag))) {
571 goto out1;
572 }
573 #if CONFIG_MACF
574 error = mac_mount_check_remount(ctx, mp);
575 if (error != 0) {
576 goto out1;
577 }
578 #endif
579 /*
580 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
581 * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
582 */
583 if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
584 flags |= MNT_NOSUID | MNT_NODEV;
585 if (mp->mnt_flag & MNT_NOEXEC)
586 flags |= MNT_NOEXEC;
587 }
588 flag = mp->mnt_flag;
589
590
591
592 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
593
594 vfsp = mp->mnt_vtable;
595 goto update;
596 }
597 /*
598 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
599 * MNT_NOEXEC if mount point is already MNT_NOEXEC.
600 */
601 if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
602 flags |= MNT_NOSUID | MNT_NODEV;
603 if (vp->v_mount->mnt_flag & MNT_NOEXEC)
604 flags |= MNT_NOEXEC;
605 }
606
607 /* XXXAUDIT: Should we capture the type on the error path as well? */
608 AUDIT_ARG(text, fstypename);
609 mount_list_lock();
610 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
611 if (!strncmp(vfsp->vfc_name, fstypename, MFSNAMELEN)) {
612 vfsp->vfc_refcount++;
613 vfsp_ref = TRUE;
614 break;
615 }
616 mount_list_unlock();
617 if (vfsp == NULL) {
618 error = ENODEV;
619 goto out1;
620 }
621
622 /*
623 * VFC_VFSLOCALARGS is not currently supported for kernel mounts
624 */
625 if (kernelmount && (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS)) {
626 error = EINVAL; /* unsupported request */
627 goto out1;
628 }
629
630 error = prepare_coveredvp(vp, ctx, cnp, fstypename, ((internal_flags & KERNEL_MOUNT_NOAUTH) != 0));
631 if (error != 0) {
632 goto out1;
633 }
634
635 /*
636 * Allocate and initialize the filesystem (mount_t)
637 */
638 MALLOC_ZONE(mp, struct mount *, (u_int32_t)sizeof(struct mount),
639 M_MOUNT, M_WAITOK);
640 bzero((char *)mp, (u_int32_t)sizeof(struct mount));
641 mntalloc = 1;
642
643 /* Initialize the default IO constraints */
644 mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
645 mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
646 mp->mnt_maxsegreadsize = mp->mnt_maxreadcnt;
647 mp->mnt_maxsegwritesize = mp->mnt_maxwritecnt;
648 mp->mnt_devblocksize = DEV_BSIZE;
649 mp->mnt_alignmentmask = PAGE_MASK;
650 mp->mnt_ioqueue_depth = MNT_DEFAULT_IOQUEUE_DEPTH;
651 mp->mnt_ioscale = 1;
652 mp->mnt_ioflags = 0;
653 mp->mnt_realrootvp = NULLVP;
654 mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL;
655
656 TAILQ_INIT(&mp->mnt_vnodelist);
657 TAILQ_INIT(&mp->mnt_workerqueue);
658 TAILQ_INIT(&mp->mnt_newvnodes);
659 mount_lock_init(mp);
660 lck_rw_lock_exclusive(&mp->mnt_rwlock);
661 is_rwlock_locked = TRUE;
662 mp->mnt_op = vfsp->vfc_vfsops;
663 mp->mnt_vtable = vfsp;
664 //mp->mnt_stat.f_type = vfsp->vfc_typenum;
665 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
666 strlcpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN);
667 strlcpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
668 mp->mnt_vnodecovered = vp;
669 mp->mnt_vfsstat.f_owner = kauth_cred_getuid(vfs_context_ucred(ctx));
670 mp->mnt_throttle_mask = LOWPRI_MAX_NUM_DEV - 1;
671 mp->mnt_devbsdunit = 0;
672
673 /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
674 vfs_setowner(mp, KAUTH_UID_NONE, KAUTH_GID_NONE);
675
676 #if NFSCLIENT || DEVFS
677 if (kernelmount)
678 mp->mnt_kern_flag |= MNTK_KERNEL_MOUNT;
679 if ((internal_flags & KERNEL_MOUNT_PERMIT_UNMOUNT) != 0)
680 mp->mnt_kern_flag |= MNTK_PERMIT_UNMOUNT;
681 #endif /* NFSCLIENT || DEVFS */
682
683 update:
684 /*
685 * Set the mount level flags.
686 */
687 if (flags & MNT_RDONLY)
688 mp->mnt_flag |= MNT_RDONLY;
689 else if (mp->mnt_flag & MNT_RDONLY) {
690 // disallow read/write upgrades of file systems that
691 // had the TYPENAME_OVERRIDE feature set.
692 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
693 error = EPERM;
694 goto out1;
695 }
696 mp->mnt_kern_flag |= MNTK_WANTRDWR;
697 }
698 mp->mnt_flag &= ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
699 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
700 MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
701 MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
702 MNT_QUARANTINE | MNT_CPROTECT);
703 mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
704 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
705 MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
706 MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
707 MNT_QUARANTINE | MNT_CPROTECT);
708
709 #if CONFIG_MACF
710 if (flags & MNT_MULTILABEL) {
711 if (vfsp->vfc_vfsflags & VFC_VFSNOMACLABEL) {
712 error = EINVAL;
713 goto out1;
714 }
715 mp->mnt_flag |= MNT_MULTILABEL;
716 }
717 #endif
718 /*
719 * Process device path for local file systems if requested
720 */
721 if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
722 if (vfs_context_is64bit(ctx)) {
723 if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
724 goto out1;
725 fsmountargs += sizeof(devpath);
726 } else {
727 user32_addr_t tmp;
728 if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
729 goto out1;
730 /* munge into LP64 addr */
731 devpath = CAST_USER_ADDR_T(tmp);
732 fsmountargs += sizeof(tmp);
733 }
734
735 /* Lookup device and authorize access to it */
736 if ((devpath)) {
737 struct nameidata nd;
738
739 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW, UIO_USERSPACE, devpath, ctx);
740 if ( (error = namei(&nd)) )
741 goto out1;
742
743 strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
744 devvp = nd.ni_vp;
745
746 nameidone(&nd);
747
748 if (devvp->v_type != VBLK) {
749 error = ENOTBLK;
750 goto out2;
751 }
752 if (major(devvp->v_rdev) >= nblkdev) {
753 error = ENXIO;
754 goto out2;
755 }
756 /*
757 * If mount by non-root, then verify that user has necessary
758 * permissions on the device.
759 */
760 if (suser(vfs_context_ucred(ctx), NULL) != 0) {
761 mode_t accessmode = KAUTH_VNODE_READ_DATA;
762
763 if ((mp->mnt_flag & MNT_RDONLY) == 0)
764 accessmode |= KAUTH_VNODE_WRITE_DATA;
765 if ((error = vnode_authorize(devvp, NULL, accessmode, ctx)) != 0)
766 goto out2;
767 }
768 }
769 /* On first mount, preflight and open device */
770 if (devpath && ((flags & MNT_UPDATE) == 0)) {
771 if ( (error = vnode_ref(devvp)) )
772 goto out2;
773 /*
774 * Disallow multiple mounts of the same device.
775 * Disallow mounting of a device that is currently in use
776 * (except for root, which might share swap device for miniroot).
777 * Flush out any old buffers remaining from a previous use.
778 */
779 if ( (error = vfs_mountedon(devvp)) )
780 goto out3;
781
782 if (vcount(devvp) > 1 && !(vfs_flags(mp) & MNT_ROOTFS)) {
783 error = EBUSY;
784 goto out3;
785 }
786 if ( (error = VNOP_FSYNC(devvp, MNT_WAIT, ctx)) ) {
787 error = ENOTBLK;
788 goto out3;
789 }
790 if ( (error = buf_invalidateblks(devvp, BUF_WRITE_DATA, 0, 0)) )
791 goto out3;
792
793 ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
794 #if CONFIG_MACF
795 error = mac_vnode_check_open(ctx,
796 devvp,
797 ronly ? FREAD : FREAD|FWRITE);
798 if (error)
799 goto out3;
800 #endif /* MAC */
801 if ( (error = VNOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, ctx)) )
802 goto out3;
803
804 mp->mnt_devvp = devvp;
805 device_vnode = devvp;
806
807 } else if ((mp->mnt_flag & MNT_RDONLY) &&
808 (mp->mnt_kern_flag & MNTK_WANTRDWR) &&
809 (device_vnode = mp->mnt_devvp)) {
810 dev_t dev;
811 int maj;
812 /*
813 * If upgrade to read-write by non-root, then verify
814 * that user has necessary permissions on the device.
815 */
816 vnode_getalways(device_vnode);
817
818 if (suser(vfs_context_ucred(ctx), NULL) &&
819 (error = vnode_authorize(device_vnode, NULL,
820 KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA,
821 ctx)) != 0) {
822 vnode_put(device_vnode);
823 goto out2;
824 }
825
826 /* Tell the device that we're upgrading */
827 dev = (dev_t)device_vnode->v_rdev;
828 maj = major(dev);
829
830 if ((u_int)maj >= (u_int)nblkdev)
831 panic("Volume mounted on a device with invalid major number.");
832
833 error = bdevsw[maj].d_open(dev, FREAD | FWRITE, S_IFBLK, p);
834 vnode_put(device_vnode);
835 device_vnode = NULLVP;
836 if (error != 0) {
837 goto out2;
838 }
839 }
840 }
841 #if CONFIG_MACF
842 if ((flags & MNT_UPDATE) == 0) {
843 mac_mount_label_init(mp);
844 mac_mount_label_associate(ctx, mp);
845 }
846 if (labelstr) {
847 if ((flags & MNT_UPDATE) != 0) {
848 error = mac_mount_check_label_update(ctx, mp);
849 if (error != 0)
850 goto out3;
851 }
852 }
853 #endif
854 /*
855 * Mount the filesystem.
856 */
857 error = VFS_MOUNT(mp, device_vnode, fsmountargs, ctx);
858
859 if (flags & MNT_UPDATE) {
860 if (mp->mnt_kern_flag & MNTK_WANTRDWR)
861 mp->mnt_flag &= ~MNT_RDONLY;
862 mp->mnt_flag &=~
863 (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
864 mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
865 if (error)
866 mp->mnt_flag = flag; /* restore flag value */
867 vfs_event_signal(NULL, VQ_UPDATE, (intptr_t)NULL);
868 lck_rw_done(&mp->mnt_rwlock);
869 is_rwlock_locked = FALSE;
870 if (!error)
871 enablequotas(mp, ctx);
872 goto exit;
873 }
874
875 /*
876 * Put the new filesystem on the mount list after root.
877 */
878 if (error == 0) {
879 struct vfs_attr vfsattr;
880 #if CONFIG_MACF
881 if (vfs_flags(mp) & MNT_MULTILABEL) {
882 error = VFS_ROOT(mp, &rvp, ctx);
883 if (error) {
884 printf("%s() VFS_ROOT returned %d\n", __func__, error);
885 goto out3;
886 }
887 error = vnode_label(mp, NULL, rvp, NULL, 0, ctx);
888 /*
889 * drop reference provided by VFS_ROOT
890 */
891 vnode_put(rvp);
892
893 if (error)
894 goto out3;
895 }
896 #endif /* MAC */
897
898 vnode_lock_spin(vp);
899 CLR(vp->v_flag, VMOUNT);
900 vp->v_mountedhere = mp;
901 vnode_unlock(vp);
902
903 /*
904 * taking the name_cache_lock exclusively will
905 * insure that everyone is out of the fast path who
906 * might be trying to use a now stale copy of
907 * vp->v_mountedhere->mnt_realrootvp
908 * bumping mount_generation causes the cached values
909 * to be invalidated
910 */
911 name_cache_lock();
912 mount_generation++;
913 name_cache_unlock();
914
915 error = vnode_ref(vp);
916 if (error != 0) {
917 goto out4;
918 }
919
920 have_usecount = TRUE;
921
922 error = checkdirs(vp, ctx);
923 if (error != 0) {
924 /* Unmount the filesystem as cdir/rdirs cannot be updated */
925 goto out4;
926 }
927 /*
928 * there is no cleanup code here so I have made it void
929 * we need to revisit this
930 */
931 (void)VFS_START(mp, 0, ctx);
932
933 if (mount_list_add(mp) != 0) {
934 /*
935 * The system is shutting down trying to umount
936 * everything, so fail with a plausible errno.
937 */
938 error = EBUSY;
939 goto out4;
940 }
941 lck_rw_done(&mp->mnt_rwlock);
942 is_rwlock_locked = FALSE;
943
944 /* Check if this mounted file system supports EAs or named streams. */
945 /* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */
946 VFSATTR_INIT(&vfsattr);
947 VFSATTR_WANTED(&vfsattr, f_capabilities);
948 if (strncmp(mp->mnt_vfsstat.f_fstypename, "webdav", sizeof("webdav")) != 0 &&
949 vfs_getattr(mp, &vfsattr, ctx) == 0 &&
950 VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) {
951 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR) &&
952 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR)) {
953 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
954 }
955 #if NAMEDSTREAMS
956 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS) &&
957 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS)) {
958 mp->mnt_kern_flag |= MNTK_NAMED_STREAMS;
959 }
960 #endif
961 /* Check if this file system supports path from id lookups. */
962 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID) &&
963 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID)) {
964 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
965 } else if (mp->mnt_flag & MNT_DOVOLFS) {
966 /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */
967 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
968 }
969 }
970 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSNATIVEXATTR) {
971 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
972 }
973 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSPREFLIGHT) {
974 mp->mnt_kern_flag |= MNTK_UNMOUNT_PREFLIGHT;
975 }
976 /* increment the operations count */
977 OSAddAtomic(1, &vfs_nummntops);
978 enablequotas(mp, ctx);
979
980 if (device_vnode) {
981 device_vnode->v_specflags |= SI_MOUNTEDON;
982
983 /*
984 * cache the IO attributes for the underlying physical media...
985 * an error return indicates the underlying driver doesn't
986 * support all the queries necessary... however, reasonable
987 * defaults will have been set, so no reason to bail or care
988 */
989 vfs_init_io_attributes(device_vnode, mp);
990 }
991
992 /* Now that mount is setup, notify the listeners */
993 vfs_notify_mount(pvp);
994 IOBSDMountChange(mp, kIOMountChangeMount);
995
996 } else {
997 /* If we fail a fresh mount, there should be no vnodes left hooked into the mountpoint. */
998 if (mp->mnt_vnodelist.tqh_first != NULL) {
999 panic("mount_common(): mount of %s filesystem failed with %d, but vnode list is not empty.",
1000 mp->mnt_vtable->vfc_name, error);
1001 }
1002
1003 vnode_lock_spin(vp);
1004 CLR(vp->v_flag, VMOUNT);
1005 vnode_unlock(vp);
1006 mount_list_lock();
1007 mp->mnt_vtable->vfc_refcount--;
1008 mount_list_unlock();
1009
1010 if (device_vnode ) {
1011 vnode_rele(device_vnode);
1012 VNOP_CLOSE(device_vnode, ronly ? FREAD : FREAD|FWRITE, ctx);
1013 }
1014 lck_rw_done(&mp->mnt_rwlock);
1015 is_rwlock_locked = FALSE;
1016
1017 /*
1018 * if we get here, we have a mount structure that needs to be freed,
1019 * but since the coveredvp hasn't yet been updated to point at it,
1020 * no need to worry about other threads holding a crossref on this mp
1021 * so it's ok to just free it
1022 */
1023 mount_lock_destroy(mp);
1024 #if CONFIG_MACF
1025 mac_mount_label_destroy(mp);
1026 #endif
1027 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1028 }
1029 exit:
1030 /*
1031 * drop I/O count on the device vp if there was one
1032 */
1033 if (devpath && devvp)
1034 vnode_put(devvp);
1035
1036 return(error);
1037
1038 /* Error condition exits */
1039 out4:
1040 (void)VFS_UNMOUNT(mp, MNT_FORCE, ctx);
1041
1042 /*
1043 * If the mount has been placed on the covered vp,
1044 * it may have been discovered by now, so we have
1045 * to treat this just like an unmount
1046 */
1047 mount_lock_spin(mp);
1048 mp->mnt_lflag |= MNT_LDEAD;
1049 mount_unlock(mp);
1050
1051 if (device_vnode != NULLVP) {
1052 vnode_rele(device_vnode);
1053 VNOP_CLOSE(device_vnode, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
1054 ctx);
1055 did_rele = TRUE;
1056 }
1057
1058 vnode_lock_spin(vp);
1059
1060 mp->mnt_crossref++;
1061 vp->v_mountedhere = (mount_t) 0;
1062
1063 vnode_unlock(vp);
1064
1065 if (have_usecount) {
1066 vnode_rele(vp);
1067 }
1068 out3:
1069 if (devpath && ((flags & MNT_UPDATE) == 0) && (!did_rele))
1070 vnode_rele(devvp);
1071 out2:
1072 if (devpath && devvp)
1073 vnode_put(devvp);
1074 out1:
1075 /* Release mnt_rwlock only when it was taken */
1076 if (is_rwlock_locked == TRUE) {
1077 lck_rw_done(&mp->mnt_rwlock);
1078 }
1079
1080 if (mntalloc) {
1081 if (mp->mnt_crossref)
1082 mount_dropcrossref(mp, vp, 0);
1083 else {
1084 mount_lock_destroy(mp);
1085 #if CONFIG_MACF
1086 mac_mount_label_destroy(mp);
1087 #endif
1088 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1089 }
1090 }
1091 if (vfsp_ref) {
1092 mount_list_lock();
1093 vfsp->vfc_refcount--;
1094 mount_list_unlock();
1095 }
1096
1097 return(error);
1098 }
1099
1100 /*
1101 * Flush in-core data, check for competing mount attempts,
1102 * and set VMOUNT
1103 */
1104 int
1105 prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth)
1106 {
1107 #if !CONFIG_MACF
1108 #pragma unused(cnp,fsname)
1109 #endif
1110 struct vnode_attr va;
1111 int error;
1112
1113 if (!skip_auth) {
1114 /*
1115 * If the user is not root, ensure that they own the directory
1116 * onto which we are attempting to mount.
1117 */
1118 VATTR_INIT(&va);
1119 VATTR_WANTED(&va, va_uid);
1120 if ((error = vnode_getattr(vp, &va, ctx)) ||
1121 (va.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
1122 (!vfs_context_issuser(ctx)))) {
1123 error = EPERM;
1124 goto out;
1125 }
1126 }
1127
1128 if ( (error = VNOP_FSYNC(vp, MNT_WAIT, ctx)) )
1129 goto out;
1130
1131 if ( (error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0)) )
1132 goto out;
1133
1134 if (vp->v_type != VDIR) {
1135 error = ENOTDIR;
1136 goto out;
1137 }
1138
1139 if (ISSET(vp->v_flag, VMOUNT) && (vp->v_mountedhere != NULL)) {
1140 error = EBUSY;
1141 goto out;
1142 }
1143
1144 #if CONFIG_MACF
1145 error = mac_mount_check_mount(ctx, vp,
1146 cnp, fsname);
1147 if (error != 0)
1148 goto out;
1149 #endif
1150
1151 vnode_lock_spin(vp);
1152 SET(vp->v_flag, VMOUNT);
1153 vnode_unlock(vp);
1154
1155 out:
1156 return error;
1157 }
1158
1159 #if CONFIG_IMGSRC_ACCESS
1160
1161 #if DEBUG
1162 #define IMGSRC_DEBUG(args...) printf(args)
1163 #else
1164 #define IMGSRC_DEBUG(args...) do { } while(0)
1165 #endif
1166
1167 static int
1168 authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx)
1169 {
1170 struct nameidata nd;
1171 vnode_t vp, realdevvp;
1172 mode_t accessmode;
1173 int error;
1174
1175 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW, UIO_USERSPACE, devpath, ctx);
1176 if ( (error = namei(&nd)) ) {
1177 IMGSRC_DEBUG("namei() failed with %d\n", error);
1178 return error;
1179 }
1180
1181 vp = nd.ni_vp;
1182
1183 if (!vnode_isblk(vp)) {
1184 IMGSRC_DEBUG("Not block device.\n");
1185 error = ENOTBLK;
1186 goto out;
1187 }
1188
1189 realdevvp = mp->mnt_devvp;
1190 if (realdevvp == NULLVP) {
1191 IMGSRC_DEBUG("No device backs the mount.\n");
1192 error = ENXIO;
1193 goto out;
1194 }
1195
1196 error = vnode_getwithref(realdevvp);
1197 if (error != 0) {
1198 IMGSRC_DEBUG("Coudn't get iocount on device.\n");
1199 goto out;
1200 }
1201
1202 if (vnode_specrdev(vp) != vnode_specrdev(realdevvp)) {
1203 IMGSRC_DEBUG("Wrong dev_t.\n");
1204 error = ENXIO;
1205 goto out1;
1206 }
1207
1208 strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
1209
1210 /*
1211 * If mount by non-root, then verify that user has necessary
1212 * permissions on the device.
1213 */
1214 if (!vfs_context_issuser(ctx)) {
1215 accessmode = KAUTH_VNODE_READ_DATA;
1216 if ((mp->mnt_flag & MNT_RDONLY) == 0)
1217 accessmode |= KAUTH_VNODE_WRITE_DATA;
1218 if ((error = vnode_authorize(vp, NULL, accessmode, ctx)) != 0) {
1219 IMGSRC_DEBUG("Access denied.\n");
1220 goto out1;
1221 }
1222 }
1223
1224 *devvpp = vp;
1225
1226 out1:
1227 vnode_put(realdevvp);
1228 out:
1229 nameidone(&nd);
1230 if (error) {
1231 vnode_put(vp);
1232 }
1233
1234 return error;
1235 }
1236
1237 /*
1238 * Clear VMOUNT, set v_mountedhere, and mnt_vnodecovered, ref the vnode,
1239 * and call checkdirs()
1240 */
1241 static int
1242 place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx)
1243 {
1244 int error;
1245
1246 mp->mnt_vnodecovered = vp; /* XXX This is normally only set at init-time ... */
1247
1248 vnode_lock_spin(vp);
1249 CLR(vp->v_flag, VMOUNT);
1250 vp->v_mountedhere = mp;
1251 vnode_unlock(vp);
1252
1253 /*
1254 * taking the name_cache_lock exclusively will
1255 * insure that everyone is out of the fast path who
1256 * might be trying to use a now stale copy of
1257 * vp->v_mountedhere->mnt_realrootvp
1258 * bumping mount_generation causes the cached values
1259 * to be invalidated
1260 */
1261 name_cache_lock();
1262 mount_generation++;
1263 name_cache_unlock();
1264
1265 error = vnode_ref(vp);
1266 if (error != 0) {
1267 goto out;
1268 }
1269
1270 error = checkdirs(vp, ctx);
1271 if (error != 0) {
1272 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1273 vnode_rele(vp);
1274 goto out;
1275 }
1276
1277 out:
1278 if (error != 0) {
1279 mp->mnt_vnodecovered = NULLVP;
1280 }
1281 return error;
1282 }
1283
1284 static void
1285 undo_place_on_covered_vp(mount_t mp, vnode_t vp)
1286 {
1287 vnode_rele(vp);
1288 vnode_lock_spin(vp);
1289 vp->v_mountedhere = (mount_t)NULL;
1290 vnode_unlock(vp);
1291
1292 mp->mnt_vnodecovered = NULLVP;
1293 }
1294
1295 static int
1296 mount_begin_update(mount_t mp, vfs_context_t ctx, int flags)
1297 {
1298 int error;
1299
1300 /* unmount in progress return error */
1301 mount_lock_spin(mp);
1302 if (mp->mnt_lflag & MNT_LUNMOUNT) {
1303 mount_unlock(mp);
1304 return EBUSY;
1305 }
1306 mount_unlock(mp);
1307 lck_rw_lock_exclusive(&mp->mnt_rwlock);
1308
1309 /*
1310 * We only allow the filesystem to be reloaded if it
1311 * is currently mounted read-only.
1312 */
1313 if ((flags & MNT_RELOAD) &&
1314 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
1315 error = ENOTSUP;
1316 goto out;
1317 }
1318
1319 /*
1320 * Only root, or the user that did the original mount is
1321 * permitted to update it.
1322 */
1323 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
1324 (!vfs_context_issuser(ctx))) {
1325 error = EPERM;
1326 goto out;
1327 }
1328 #if CONFIG_MACF
1329 error = mac_mount_check_remount(ctx, mp);
1330 if (error != 0) {
1331 goto out;
1332 }
1333 #endif
1334
1335 out:
1336 if (error) {
1337 lck_rw_done(&mp->mnt_rwlock);
1338 }
1339
1340 return error;
1341 }
1342
1343 static void
1344 mount_end_update(mount_t mp)
1345 {
1346 lck_rw_done(&mp->mnt_rwlock);
1347 }
1348
1349 static int
1350 get_imgsrc_rootvnode(uint32_t height, vnode_t *rvpp)
1351 {
1352 vnode_t vp;
1353
1354 if (height >= MAX_IMAGEBOOT_NESTING) {
1355 return EINVAL;
1356 }
1357
1358 vp = imgsrc_rootvnodes[height];
1359 if ((vp != NULLVP) && (vnode_get(vp) == 0)) {
1360 *rvpp = vp;
1361 return 0;
1362 } else {
1363 return ENOENT;
1364 }
1365 }
1366
1367 static int
1368 relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp,
1369 const char *fsname, vfs_context_t ctx,
1370 boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index)
1371 {
1372 int error;
1373 mount_t mp;
1374 boolean_t placed = FALSE;
1375 vnode_t devvp = NULLVP;
1376 struct vfstable *vfsp;
1377 user_addr_t devpath;
1378 char *old_mntonname;
1379 vnode_t rvp;
1380 uint32_t height;
1381 uint32_t flags;
1382
1383 /* If we didn't imageboot, nothing to move */
1384 if (imgsrc_rootvnodes[0] == NULLVP) {
1385 return EINVAL;
1386 }
1387
1388 /* Only root can do this */
1389 if (!vfs_context_issuser(ctx)) {
1390 return EPERM;
1391 }
1392
1393 IMGSRC_DEBUG("looking for root vnode.\n");
1394
1395 /*
1396 * Get root vnode of filesystem we're moving.
1397 */
1398 if (by_index) {
1399 if (is64bit) {
1400 struct user64_mnt_imgsrc_args mia64;
1401 error = copyin(fsmountargs, &mia64, sizeof(mia64));
1402 if (error != 0) {
1403 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1404 return error;
1405 }
1406
1407 height = mia64.mi_height;
1408 flags = mia64.mi_flags;
1409 devpath = mia64.mi_devpath;
1410 } else {
1411 struct user32_mnt_imgsrc_args mia32;
1412 error = copyin(fsmountargs, &mia32, sizeof(mia32));
1413 if (error != 0) {
1414 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1415 return error;
1416 }
1417
1418 height = mia32.mi_height;
1419 flags = mia32.mi_flags;
1420 devpath = mia32.mi_devpath;
1421 }
1422 } else {
1423 /*
1424 * For binary compatibility--assumes one level of nesting.
1425 */
1426 if (is64bit) {
1427 if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
1428 return error;
1429 } else {
1430 user32_addr_t tmp;
1431 if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
1432 return error;
1433
1434 /* munge into LP64 addr */
1435 devpath = CAST_USER_ADDR_T(tmp);
1436 }
1437
1438 height = 0;
1439 flags = 0;
1440 }
1441
1442 if (flags != 0) {
1443 IMGSRC_DEBUG("%s: Got nonzero flags.\n", __FUNCTION__);
1444 return EINVAL;
1445 }
1446
1447 error = get_imgsrc_rootvnode(height, &rvp);
1448 if (error != 0) {
1449 IMGSRC_DEBUG("getting root vnode failed with %d\n", error);
1450 return error;
1451 }
1452
1453 IMGSRC_DEBUG("got root vnode.\n");
1454
1455 MALLOC(old_mntonname, char*, MAXPATHLEN, M_TEMP, M_WAITOK);
1456
1457 /* Can only move once */
1458 mp = vnode_mount(rvp);
1459 if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
1460 IMGSRC_DEBUG("Already moved.\n");
1461 error = EBUSY;
1462 goto out0;
1463 }
1464
1465 IMGSRC_DEBUG("Starting updated.\n");
1466
1467 /* Get exclusive rwlock on mount, authorize update on mp */
1468 error = mount_begin_update(mp , ctx, 0);
1469 if (error != 0) {
1470 IMGSRC_DEBUG("Starting updated failed with %d\n", error);
1471 goto out0;
1472 }
1473
1474 /*
1475 * It can only be moved once. Flag is set under the rwlock,
1476 * so we're now safe to proceed.
1477 */
1478 if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
1479 IMGSRC_DEBUG("Already moved [2]\n");
1480 goto out1;
1481 }
1482
1483
1484 IMGSRC_DEBUG("Preparing coveredvp.\n");
1485
1486 /* Mark covered vnode as mount in progress, authorize placing mount on top */
1487 error = prepare_coveredvp(vp, ctx, cnp, fsname, FALSE);
1488 if (error != 0) {
1489 IMGSRC_DEBUG("Preparing coveredvp failed with %d.\n", error);
1490 goto out1;
1491 }
1492
1493 IMGSRC_DEBUG("Covered vp OK.\n");
1494
1495 /* Sanity check the name caller has provided */
1496 vfsp = mp->mnt_vtable;
1497 if (strncmp(vfsp->vfc_name, fsname, MFSNAMELEN) != 0) {
1498 IMGSRC_DEBUG("Wrong fs name.\n");
1499 error = EINVAL;
1500 goto out2;
1501 }
1502
1503 /* Check the device vnode and update mount-from name, for local filesystems */
1504 if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
1505 IMGSRC_DEBUG("Local, doing device validation.\n");
1506
1507 if (devpath != USER_ADDR_NULL) {
1508 error = authorize_devpath_and_update_mntfromname(mp, devpath, &devvp, ctx);
1509 if (error) {
1510 IMGSRC_DEBUG("authorize_devpath_and_update_mntfromname() failed.\n");
1511 goto out2;
1512 }
1513
1514 vnode_put(devvp);
1515 }
1516 }
1517
1518 /*
1519 * Place mp on top of vnode, ref the vnode, call checkdirs(),
1520 * and increment the name cache's mount generation
1521 */
1522
1523 IMGSRC_DEBUG("About to call place_mount_and_checkdirs().\n");
1524 error = place_mount_and_checkdirs(mp, vp, ctx);
1525 if (error != 0) {
1526 goto out2;
1527 }
1528
1529 placed = TRUE;
1530
1531 strlcpy(old_mntonname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN);
1532 strlcpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
1533
1534 /* Forbid future moves */
1535 mount_lock(mp);
1536 mp->mnt_kern_flag |= MNTK_HAS_MOVED;
1537 mount_unlock(mp);
1538
1539 /* Finally, add to mount list, completely ready to go */
1540 if (mount_list_add(mp) != 0) {
1541 /*
1542 * The system is shutting down trying to umount
1543 * everything, so fail with a plausible errno.
1544 */
1545 error = EBUSY;
1546 goto out3;
1547 }
1548
1549 mount_end_update(mp);
1550 vnode_put(rvp);
1551 FREE(old_mntonname, M_TEMP);
1552
1553 vfs_notify_mount(pvp);
1554
1555 return 0;
1556 out3:
1557 strlcpy(mp->mnt_vfsstat.f_mntonname, old_mntonname, MAXPATHLEN);
1558
1559 mount_lock(mp);
1560 mp->mnt_kern_flag &= ~(MNTK_HAS_MOVED);
1561 mount_unlock(mp);
1562
1563 out2:
1564 /*
1565 * Placing the mp on the vnode clears VMOUNT,
1566 * so cleanup is different after that point
1567 */
1568 if (placed) {
1569 /* Rele the vp, clear VMOUNT and v_mountedhere */
1570 undo_place_on_covered_vp(mp, vp);
1571 } else {
1572 vnode_lock_spin(vp);
1573 CLR(vp->v_flag, VMOUNT);
1574 vnode_unlock(vp);
1575 }
1576 out1:
1577 mount_end_update(mp);
1578
1579 out0:
1580 vnode_put(rvp);
1581 FREE(old_mntonname, M_TEMP);
1582 return error;
1583 }
1584
1585 #endif /* CONFIG_IMGSRC_ACCESS */
1586
1587 void
1588 enablequotas(struct mount *mp, vfs_context_t ctx)
1589 {
1590 struct nameidata qnd;
1591 int type;
1592 char qfpath[MAXPATHLEN];
1593 const char *qfname = QUOTAFILENAME;
1594 const char *qfopsname = QUOTAOPSNAME;
1595 const char *qfextension[] = INITQFNAMES;
1596
1597 /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
1598 if (strncmp(mp->mnt_vfsstat.f_fstypename, "hfs", sizeof("hfs")) != 0 ) {
1599 return;
1600 }
1601 /*
1602 * Enable filesystem disk quotas if necessary.
1603 * We ignore errors as this should not interfere with final mount
1604 */
1605 for (type=0; type < MAXQUOTAS; type++) {
1606 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfopsname, qfextension[type]);
1607 NDINIT(&qnd, LOOKUP, OP_MOUNT, FOLLOW, UIO_SYSSPACE,
1608 CAST_USER_ADDR_T(qfpath), ctx);
1609 if (namei(&qnd) != 0)
1610 continue; /* option file to trigger quotas is not present */
1611 vnode_put(qnd.ni_vp);
1612 nameidone(&qnd);
1613 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfname, qfextension[type]);
1614
1615 (void) VFS_QUOTACTL(mp, QCMD(Q_QUOTAON, type), 0, qfpath, ctx);
1616 }
1617 return;
1618 }
1619
1620
1621 static int
1622 checkdirs_callback(proc_t p, void * arg)
1623 {
1624 struct cdirargs * cdrp = (struct cdirargs * )arg;
1625 vnode_t olddp = cdrp->olddp;
1626 vnode_t newdp = cdrp->newdp;
1627 struct filedesc *fdp;
1628 vnode_t tvp;
1629 vnode_t fdp_cvp;
1630 vnode_t fdp_rvp;
1631 int cdir_changed = 0;
1632 int rdir_changed = 0;
1633
1634 /*
1635 * XXX Also needs to iterate each thread in the process to see if it
1636 * XXX is using a per-thread current working directory, and, if so,
1637 * XXX update that as well.
1638 */
1639
1640 proc_fdlock(p);
1641 fdp = p->p_fd;
1642 if (fdp == (struct filedesc *)0) {
1643 proc_fdunlock(p);
1644 return(PROC_RETURNED);
1645 }
1646 fdp_cvp = fdp->fd_cdir;
1647 fdp_rvp = fdp->fd_rdir;
1648 proc_fdunlock(p);
1649
1650 if (fdp_cvp == olddp) {
1651 vnode_ref(newdp);
1652 tvp = fdp->fd_cdir;
1653 fdp_cvp = newdp;
1654 cdir_changed = 1;
1655 vnode_rele(tvp);
1656 }
1657 if (fdp_rvp == olddp) {
1658 vnode_ref(newdp);
1659 tvp = fdp->fd_rdir;
1660 fdp_rvp = newdp;
1661 rdir_changed = 1;
1662 vnode_rele(tvp);
1663 }
1664 if (cdir_changed || rdir_changed) {
1665 proc_fdlock(p);
1666 fdp->fd_cdir = fdp_cvp;
1667 fdp->fd_rdir = fdp_rvp;
1668 proc_fdunlock(p);
1669 }
1670 return(PROC_RETURNED);
1671 }
1672
1673
1674
1675 /*
1676 * Scan all active processes to see if any of them have a current
1677 * or root directory onto which the new filesystem has just been
1678 * mounted. If so, replace them with the new mount point.
1679 */
1680 static int
1681 checkdirs(vnode_t olddp, vfs_context_t ctx)
1682 {
1683 vnode_t newdp;
1684 vnode_t tvp;
1685 int err;
1686 struct cdirargs cdr;
1687
1688 if (olddp->v_usecount == 1)
1689 return(0);
1690 err = VFS_ROOT(olddp->v_mountedhere, &newdp, ctx);
1691
1692 if (err != 0) {
1693 #if DIAGNOSTIC
1694 panic("mount: lost mount: error %d", err);
1695 #endif
1696 return(err);
1697 }
1698
1699 cdr.olddp = olddp;
1700 cdr.newdp = newdp;
1701 /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
1702 proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS, checkdirs_callback, (void *)&cdr, NULL, NULL);
1703
1704 if (rootvnode == olddp) {
1705 vnode_ref(newdp);
1706 tvp = rootvnode;
1707 rootvnode = newdp;
1708 vnode_rele(tvp);
1709 }
1710
1711 vnode_put(newdp);
1712 return(0);
1713 }
1714
1715 /*
1716 * Unmount a file system.
1717 *
1718 * Note: unmount takes a path to the vnode mounted on as argument,
1719 * not special file (as before).
1720 */
1721 /* ARGSUSED */
1722 int
1723 unmount(__unused proc_t p, struct unmount_args *uap, __unused int32_t *retval)
1724 {
1725 vnode_t vp;
1726 struct mount *mp;
1727 int error;
1728 struct nameidata nd;
1729 vfs_context_t ctx = vfs_context_current();
1730
1731 NDINIT(&nd, LOOKUP, OP_UNMOUNT, FOLLOW | AUDITVNPATH1,
1732 UIO_USERSPACE, uap->path, ctx);
1733 error = namei(&nd);
1734 if (error)
1735 return (error);
1736 vp = nd.ni_vp;
1737 mp = vp->v_mount;
1738 nameidone(&nd);
1739
1740 #if CONFIG_MACF
1741 error = mac_mount_check_umount(ctx, mp);
1742 if (error != 0) {
1743 vnode_put(vp);
1744 return (error);
1745 }
1746 #endif
1747 /*
1748 * Must be the root of the filesystem
1749 */
1750 if ((vp->v_flag & VROOT) == 0) {
1751 vnode_put(vp);
1752 return (EINVAL);
1753 }
1754 mount_ref(mp, 0);
1755 vnode_put(vp);
1756 /* safedounmount consumes the mount ref */
1757 return (safedounmount(mp, uap->flags, ctx));
1758 }
1759
1760 int
1761 vfs_unmountbyfsid(fsid_t * fsid, int flags, vfs_context_t ctx)
1762 {
1763 mount_t mp;
1764
1765 mp = mount_list_lookupby_fsid(fsid, 0, 1);
1766 if (mp == (mount_t)0) {
1767 return(ENOENT);
1768 }
1769 mount_ref(mp, 0);
1770 mount_iterdrop(mp);
1771 /* safedounmount consumes the mount ref */
1772 return(safedounmount(mp, flags, ctx));
1773 }
1774
1775
1776 /*
1777 * The mount struct comes with a mount ref which will be consumed.
1778 * Do the actual file system unmount, prevent some common foot shooting.
1779 */
1780 int
1781 safedounmount(struct mount *mp, int flags, vfs_context_t ctx)
1782 {
1783 int error;
1784 proc_t p = vfs_context_proc(ctx);
1785
1786 /*
1787 * If the file system is not responding and MNT_NOBLOCK
1788 * is set and not a forced unmount then return EBUSY.
1789 */
1790 if ((mp->mnt_kern_flag & MNT_LNOTRESP) &&
1791 (flags & MNT_NOBLOCK) && ((flags & MNT_FORCE) == 0)) {
1792 error = EBUSY;
1793 goto out;
1794 }
1795
1796 /*
1797 * Skip authorization if the mount is tagged as permissive and
1798 * this is not a forced-unmount attempt.
1799 */
1800 if (!(((mp->mnt_kern_flag & MNTK_PERMIT_UNMOUNT) != 0) && ((flags & MNT_FORCE) == 0))) {
1801 /*
1802 * Only root, or the user that did the original mount is
1803 * permitted to unmount this filesystem.
1804 */
1805 if ((mp->mnt_vfsstat.f_owner != kauth_cred_getuid(kauth_cred_get())) &&
1806 (error = suser(kauth_cred_get(), &p->p_acflag)))
1807 goto out;
1808 }
1809 /*
1810 * Don't allow unmounting the root file system.
1811 */
1812 if (mp->mnt_flag & MNT_ROOTFS) {
1813 error = EBUSY; /* the root is always busy */
1814 goto out;
1815 }
1816
1817 #ifdef CONFIG_IMGSRC_ACCESS
1818 if (mp->mnt_kern_flag & MNTK_BACKS_ROOT) {
1819 error = EBUSY;
1820 goto out;
1821 }
1822 #endif /* CONFIG_IMGSRC_ACCESS */
1823
1824 return (dounmount(mp, flags, 1, ctx));
1825
1826 out:
1827 mount_drop(mp, 0);
1828 return(error);
1829 }
1830
1831 /*
1832 * Do the actual file system unmount.
1833 */
1834 int
1835 dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx)
1836 {
1837 vnode_t coveredvp = (vnode_t)0;
1838 int error;
1839 int needwakeup = 0;
1840 int forcedunmount = 0;
1841 int lflags = 0;
1842 struct vnode *devvp = NULLVP;
1843 #if CONFIG_TRIGGERS
1844 proc_t p = vfs_context_proc(ctx);
1845 int did_vflush = 0;
1846 int pflags_save = 0;
1847 #endif /* CONFIG_TRIGGERS */
1848
1849 mount_lock(mp);
1850
1851 /*
1852 * If already an unmount in progress just return EBUSY.
1853 * Even a forced unmount cannot override.
1854 */
1855 if (mp->mnt_lflag & MNT_LUNMOUNT) {
1856 if (withref != 0)
1857 mount_drop(mp, 1);
1858 mount_unlock(mp);
1859 return (EBUSY);
1860 }
1861
1862 if (flags & MNT_FORCE) {
1863 forcedunmount = 1;
1864 mp->mnt_lflag |= MNT_LFORCE;
1865 }
1866
1867 #if CONFIG_TRIGGERS
1868 if (flags & MNT_NOBLOCK && p != kernproc)
1869 pflags_save = OSBitOrAtomic(P_NOREMOTEHANG, &p->p_flag);
1870 #endif
1871
1872 mp->mnt_kern_flag |= MNTK_UNMOUNT;
1873 mp->mnt_lflag |= MNT_LUNMOUNT;
1874 mp->mnt_flag &=~ MNT_ASYNC;
1875 /*
1876 * anyone currently in the fast path that
1877 * trips over the cached rootvp will be
1878 * dumped out and forced into the slow path
1879 * to regenerate a new cached value
1880 */
1881 mp->mnt_realrootvp = NULLVP;
1882 mount_unlock(mp);
1883
1884 if (forcedunmount && (flags & MNT_LNOSUB) == 0) {
1885 /*
1886 * Force unmount any mounts in this filesystem.
1887 * If any unmounts fail - just leave them dangling.
1888 * Avoids recursion.
1889 */
1890 (void) dounmount_submounts(mp, flags | MNT_LNOSUB, ctx);
1891 }
1892
1893 /*
1894 * taking the name_cache_lock exclusively will
1895 * insure that everyone is out of the fast path who
1896 * might be trying to use a now stale copy of
1897 * vp->v_mountedhere->mnt_realrootvp
1898 * bumping mount_generation causes the cached values
1899 * to be invalidated
1900 */
1901 name_cache_lock();
1902 mount_generation++;
1903 name_cache_unlock();
1904
1905
1906 lck_rw_lock_exclusive(&mp->mnt_rwlock);
1907 if (withref != 0)
1908 mount_drop(mp, 0);
1909 #if CONFIG_FSE
1910 fsevent_unmount(mp); /* has to come first! */
1911 #endif
1912 error = 0;
1913 if (forcedunmount == 0) {
1914 ubc_umount(mp); /* release cached vnodes */
1915 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
1916 error = VFS_SYNC(mp, MNT_WAIT, ctx);
1917 if (error) {
1918 mount_lock(mp);
1919 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1920 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1921 mp->mnt_lflag &= ~MNT_LFORCE;
1922 goto out;
1923 }
1924 }
1925 }
1926
1927 IOBSDMountChange(mp, kIOMountChangeUnmount);
1928
1929 #if CONFIG_TRIGGERS
1930 vfs_nested_trigger_unmounts(mp, flags, ctx);
1931 did_vflush = 1;
1932 #endif
1933 if (forcedunmount)
1934 lflags |= FORCECLOSE;
1935 error = vflush(mp, NULLVP, SKIPSWAP | SKIPSYSTEM | SKIPROOT | lflags);
1936 if ((forcedunmount == 0) && error) {
1937 mount_lock(mp);
1938 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1939 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1940 mp->mnt_lflag &= ~MNT_LFORCE;
1941 goto out;
1942 }
1943
1944 /* make sure there are no one in the mount iterations or lookup */
1945 mount_iterdrain(mp);
1946
1947 error = VFS_UNMOUNT(mp, flags, ctx);
1948 if (error) {
1949 mount_iterreset(mp);
1950 mount_lock(mp);
1951 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1952 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1953 mp->mnt_lflag &= ~MNT_LFORCE;
1954 goto out;
1955 }
1956
1957 /* increment the operations count */
1958 if (!error)
1959 OSAddAtomic(1, &vfs_nummntops);
1960
1961 if ( mp->mnt_devvp && mp->mnt_vtable->vfc_vfsflags & VFC_VFSLOCALARGS) {
1962 /* hold an io reference and drop the usecount before close */
1963 devvp = mp->mnt_devvp;
1964 vnode_getalways(devvp);
1965 vnode_rele(devvp);
1966 VNOP_CLOSE(devvp, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
1967 ctx);
1968 vnode_clearmountedon(devvp);
1969 vnode_put(devvp);
1970 }
1971 lck_rw_done(&mp->mnt_rwlock);
1972 mount_list_remove(mp);
1973 lck_rw_lock_exclusive(&mp->mnt_rwlock);
1974
1975 /* mark the mount point hook in the vp but not drop the ref yet */
1976 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
1977 /*
1978 * The covered vnode needs special handling. Trying to get an
1979 * iocount must not block here as this may lead to deadlocks
1980 * if the Filesystem to which the covered vnode belongs is
1981 * undergoing forced unmounts. Since we hold a usecount, the
1982 * vnode cannot be reused (it can, however, still be terminated)
1983 */
1984 vnode_getalways(coveredvp);
1985 vnode_lock_spin(coveredvp);
1986
1987 mp->mnt_crossref++;
1988 coveredvp->v_mountedhere = (struct mount *)0;
1989 CLR(coveredvp->v_flag, VMOUNT);
1990
1991 vnode_unlock(coveredvp);
1992 vnode_put(coveredvp);
1993 }
1994
1995 mount_list_lock();
1996 mp->mnt_vtable->vfc_refcount--;
1997 mount_list_unlock();
1998
1999 cache_purgevfs(mp); /* remove cache entries for this file sys */
2000 vfs_event_signal(NULL, VQ_UNMOUNT, (intptr_t)NULL);
2001 mount_lock(mp);
2002 mp->mnt_lflag |= MNT_LDEAD;
2003
2004 if (mp->mnt_lflag & MNT_LWAIT) {
2005 /*
2006 * do the wakeup here
2007 * in case we block in mount_refdrain
2008 * which will drop the mount lock
2009 * and allow anyone blocked in vfs_busy
2010 * to wakeup and see the LDEAD state
2011 */
2012 mp->mnt_lflag &= ~MNT_LWAIT;
2013 wakeup((caddr_t)mp);
2014 }
2015 mount_refdrain(mp);
2016 out:
2017 if (mp->mnt_lflag & MNT_LWAIT) {
2018 mp->mnt_lflag &= ~MNT_LWAIT;
2019 needwakeup = 1;
2020 }
2021
2022 #if CONFIG_TRIGGERS
2023 if (flags & MNT_NOBLOCK && p != kernproc) {
2024 // Restore P_NOREMOTEHANG bit to its previous value
2025 if ((pflags_save & P_NOREMOTEHANG) == 0)
2026 OSBitAndAtomic(~((uint32_t) P_NOREMOTEHANG), &p->p_flag);
2027 }
2028
2029 /*
2030 * Callback and context are set together under the mount lock, and
2031 * never cleared, so we're safe to examine them here, drop the lock,
2032 * and call out.
2033 */
2034 if (mp->mnt_triggercallback != NULL) {
2035 mount_unlock(mp);
2036 if (error == 0) {
2037 mp->mnt_triggercallback(mp, VTC_RELEASE, mp->mnt_triggerdata, ctx);
2038 } else if (did_vflush) {
2039 mp->mnt_triggercallback(mp, VTC_REPLACE, mp->mnt_triggerdata, ctx);
2040 }
2041 } else {
2042 mount_unlock(mp);
2043 }
2044 #else
2045 mount_unlock(mp);
2046 #endif /* CONFIG_TRIGGERS */
2047
2048 lck_rw_done(&mp->mnt_rwlock);
2049
2050 if (needwakeup)
2051 wakeup((caddr_t)mp);
2052
2053 if (!error) {
2054 if ((coveredvp != NULLVP)) {
2055 vnode_t pvp = NULLVP;
2056
2057 /*
2058 * The covered vnode needs special handling. Trying to
2059 * get an iocount must not block here as this may lead
2060 * to deadlocks if the Filesystem to which the covered
2061 * vnode belongs is undergoing forced unmounts. Since we
2062 * hold a usecount, the vnode cannot be reused
2063 * (it can, however, still be terminated).
2064 */
2065 vnode_getalways(coveredvp);
2066
2067 mount_dropcrossref(mp, coveredvp, 0);
2068 /*
2069 * We'll _try_ to detect if this really needs to be
2070 * done. The coveredvp can only be in termination (or
2071 * terminated) if the coveredvp's mount point is in a
2072 * forced unmount (or has been) since we still hold the
2073 * ref.
2074 */
2075 if (!vnode_isrecycled(coveredvp)) {
2076 pvp = vnode_getparent(coveredvp);
2077 #if CONFIG_TRIGGERS
2078 if (coveredvp->v_resolve) {
2079 vnode_trigger_rearm(coveredvp, ctx);
2080 }
2081 #endif
2082 }
2083
2084 vnode_rele(coveredvp);
2085 vnode_put(coveredvp);
2086 coveredvp = NULLVP;
2087
2088 if (pvp) {
2089 lock_vnode_and_post(pvp, NOTE_WRITE);
2090 vnode_put(pvp);
2091 }
2092 } else if (mp->mnt_flag & MNT_ROOTFS) {
2093 mount_lock_destroy(mp);
2094 #if CONFIG_MACF
2095 mac_mount_label_destroy(mp);
2096 #endif
2097 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
2098 } else
2099 panic("dounmount: no coveredvp");
2100 }
2101 return (error);
2102 }
2103
2104 /*
2105 * Unmount any mounts in this filesystem.
2106 */
2107 void
2108 dounmount_submounts(struct mount *mp, int flags, vfs_context_t ctx)
2109 {
2110 mount_t smp;
2111 fsid_t *fsids, fsid;
2112 int fsids_sz;
2113 int count = 0, i, m = 0;
2114 vnode_t vp;
2115
2116 mount_list_lock();
2117
2118 // Get an array to hold the submounts fsids.
2119 TAILQ_FOREACH(smp, &mountlist, mnt_list)
2120 count++;
2121 fsids_sz = count * sizeof(fsid_t);
2122 MALLOC(fsids, fsid_t *, fsids_sz, M_TEMP, M_NOWAIT);
2123 if (fsids == NULL) {
2124 mount_list_unlock();
2125 goto out;
2126 }
2127 fsids[0] = mp->mnt_vfsstat.f_fsid; // Prime the pump
2128
2129 /*
2130 * Fill the array with submount fsids.
2131 * Since mounts are always added to the tail of the mount list, the
2132 * list is always in mount order.
2133 * For each mount check if the mounted-on vnode belongs to a
2134 * mount that's already added to our array of mounts to be unmounted.
2135 */
2136 for (smp = TAILQ_NEXT(mp, mnt_list); smp; smp = TAILQ_NEXT(smp, mnt_list)) {
2137 vp = smp->mnt_vnodecovered;
2138 if (vp == NULL)
2139 continue;
2140 fsid = vnode_mount(vp)->mnt_vfsstat.f_fsid; // Underlying fsid
2141 for (i = 0; i <= m; i++) {
2142 if (fsids[i].val[0] == fsid.val[0] &&
2143 fsids[i].val[1] == fsid.val[1]) {
2144 fsids[++m] = smp->mnt_vfsstat.f_fsid;
2145 break;
2146 }
2147 }
2148 }
2149 mount_list_unlock();
2150
2151 // Unmount the submounts in reverse order. Ignore errors.
2152 for (i = m; i > 0; i--) {
2153 smp = mount_list_lookupby_fsid(&fsids[i], 0, 1);
2154 if (smp) {
2155 mount_ref(smp, 0);
2156 mount_iterdrop(smp);
2157 (void) dounmount(smp, flags, 1, ctx);
2158 }
2159 }
2160 out:
2161 if (fsids)
2162 FREE(fsids, M_TEMP);
2163 }
2164
2165 void
2166 mount_dropcrossref(mount_t mp, vnode_t dp, int need_put)
2167 {
2168 vnode_lock(dp);
2169 mp->mnt_crossref--;
2170
2171 if (mp->mnt_crossref < 0)
2172 panic("mount cross refs -ve");
2173
2174 if ((mp != dp->v_mountedhere) && (mp->mnt_crossref == 0)) {
2175
2176 if (need_put)
2177 vnode_put_locked(dp);
2178 vnode_unlock(dp);
2179
2180 mount_lock_destroy(mp);
2181 #if CONFIG_MACF
2182 mac_mount_label_destroy(mp);
2183 #endif
2184 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
2185 return;
2186 }
2187 if (need_put)
2188 vnode_put_locked(dp);
2189 vnode_unlock(dp);
2190 }
2191
2192
2193 /*
2194 * Sync each mounted filesystem.
2195 */
2196 #if DIAGNOSTIC
2197 int syncprt = 0;
2198 #endif
2199
2200 int print_vmpage_stat=0;
2201 int sync_timeout = 60; // Sync time limit (sec)
2202
2203 static int
2204 sync_callback(mount_t mp, __unused void *arg)
2205 {
2206 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
2207 int asyncflag = mp->mnt_flag & MNT_ASYNC;
2208
2209 mp->mnt_flag &= ~MNT_ASYNC;
2210 VFS_SYNC(mp, arg ? MNT_WAIT : MNT_NOWAIT, vfs_context_kernel());
2211 if (asyncflag)
2212 mp->mnt_flag |= MNT_ASYNC;
2213 }
2214
2215 return (VFS_RETURNED);
2216 }
2217
2218 /* ARGSUSED */
2219 int
2220 sync(__unused proc_t p, __unused struct sync_args *uap, __unused int32_t *retval)
2221 {
2222 vfs_iterate(LK_NOWAIT, sync_callback, NULL);
2223
2224 if (print_vmpage_stat) {
2225 vm_countdirtypages();
2226 }
2227
2228 #if DIAGNOSTIC
2229 if (syncprt)
2230 vfs_bufstats();
2231 #endif /* DIAGNOSTIC */
2232 return 0;
2233 }
2234
2235 static void
2236 sync_thread(void *arg, __unused wait_result_t wr)
2237 {
2238 int *timeout = (int *) arg;
2239
2240 vfs_iterate(LK_NOWAIT, sync_callback, NULL);
2241
2242 if (timeout)
2243 wakeup((caddr_t) timeout);
2244 if (print_vmpage_stat) {
2245 vm_countdirtypages();
2246 }
2247
2248 #if DIAGNOSTIC
2249 if (syncprt)
2250 vfs_bufstats();
2251 #endif /* DIAGNOSTIC */
2252 }
2253
2254 /*
2255 * Sync in a separate thread so we can time out if it blocks.
2256 */
2257 static int
2258 sync_async(int timeout)
2259 {
2260 thread_t thd;
2261 int error;
2262 struct timespec ts = {timeout, 0};
2263
2264 lck_mtx_lock(sync_mtx_lck);
2265 if (kernel_thread_start(sync_thread, &timeout, &thd) != KERN_SUCCESS) {
2266 printf("sync_thread failed\n");
2267 lck_mtx_unlock(sync_mtx_lck);
2268 return (0);
2269 }
2270
2271 error = msleep((caddr_t) &timeout, sync_mtx_lck, (PVFS | PDROP | PCATCH), "sync_thread", &ts);
2272 if (error) {
2273 printf("sync timed out: %d sec\n", timeout);
2274 }
2275 thread_deallocate(thd);
2276
2277 return (0);
2278 }
2279
2280 /*
2281 * An in-kernel sync for power management to call.
2282 */
2283 __private_extern__ int
2284 sync_internal(void)
2285 {
2286 (void) sync_async(sync_timeout);
2287
2288 return 0;
2289 } /* end of sync_internal call */
2290
2291 /*
2292 * Change filesystem quotas.
2293 */
2294 #if QUOTA
2295 int
2296 quotactl(proc_t p, struct quotactl_args *uap, __unused int32_t *retval)
2297 {
2298 struct mount *mp;
2299 int error, quota_cmd, quota_status;
2300 caddr_t datap;
2301 size_t fnamelen;
2302 struct nameidata nd;
2303 vfs_context_t ctx = vfs_context_current();
2304 struct dqblk my_dqblk;
2305
2306 AUDIT_ARG(uid, uap->uid);
2307 AUDIT_ARG(cmd, uap->cmd);
2308 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
2309 uap->path, ctx);
2310 error = namei(&nd);
2311 if (error)
2312 return (error);
2313 mp = nd.ni_vp->v_mount;
2314 vnode_put(nd.ni_vp);
2315 nameidone(&nd);
2316
2317 /* copyin any data we will need for downstream code */
2318 quota_cmd = uap->cmd >> SUBCMDSHIFT;
2319
2320 switch (quota_cmd) {
2321 case Q_QUOTAON:
2322 /* uap->arg specifies a file from which to take the quotas */
2323 fnamelen = MAXPATHLEN;
2324 datap = kalloc(MAXPATHLEN);
2325 error = copyinstr(uap->arg, datap, MAXPATHLEN, &fnamelen);
2326 break;
2327 case Q_GETQUOTA:
2328 /* uap->arg is a pointer to a dqblk structure. */
2329 datap = (caddr_t) &my_dqblk;
2330 break;
2331 case Q_SETQUOTA:
2332 case Q_SETUSE:
2333 /* uap->arg is a pointer to a dqblk structure. */
2334 datap = (caddr_t) &my_dqblk;
2335 if (proc_is64bit(p)) {
2336 struct user_dqblk my_dqblk64;
2337 error = copyin(uap->arg, (caddr_t)&my_dqblk64, sizeof (my_dqblk64));
2338 if (error == 0) {
2339 munge_dqblk(&my_dqblk, &my_dqblk64, FALSE);
2340 }
2341 }
2342 else {
2343 error = copyin(uap->arg, (caddr_t)&my_dqblk, sizeof (my_dqblk));
2344 }
2345 break;
2346 case Q_QUOTASTAT:
2347 /* uap->arg is a pointer to an integer */
2348 datap = (caddr_t) &quota_status;
2349 break;
2350 default:
2351 datap = NULL;
2352 break;
2353 } /* switch */
2354
2355 if (error == 0) {
2356 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, datap, ctx);
2357 }
2358
2359 switch (quota_cmd) {
2360 case Q_QUOTAON:
2361 if (datap != NULL)
2362 kfree(datap, MAXPATHLEN);
2363 break;
2364 case Q_GETQUOTA:
2365 /* uap->arg is a pointer to a dqblk structure we need to copy out to */
2366 if (error == 0) {
2367 if (proc_is64bit(p)) {
2368 struct user_dqblk my_dqblk64 = {.dqb_bhardlimit = 0};
2369 munge_dqblk(&my_dqblk, &my_dqblk64, TRUE);
2370 error = copyout((caddr_t)&my_dqblk64, uap->arg, sizeof (my_dqblk64));
2371 }
2372 else {
2373 error = copyout(datap, uap->arg, sizeof (struct dqblk));
2374 }
2375 }
2376 break;
2377 case Q_QUOTASTAT:
2378 /* uap->arg is a pointer to an integer */
2379 if (error == 0) {
2380 error = copyout(datap, uap->arg, sizeof(quota_status));
2381 }
2382 break;
2383 default:
2384 break;
2385 } /* switch */
2386
2387 return (error);
2388 }
2389 #else
2390 int
2391 quotactl(__unused proc_t p, __unused struct quotactl_args *uap, __unused int32_t *retval)
2392 {
2393 return (EOPNOTSUPP);
2394 }
2395 #endif /* QUOTA */
2396
2397 /*
2398 * Get filesystem statistics.
2399 *
2400 * Returns: 0 Success
2401 * namei:???
2402 * vfs_update_vfsstat:???
2403 * munge_statfs:EFAULT
2404 */
2405 /* ARGSUSED */
2406 int
2407 statfs(__unused proc_t p, struct statfs_args *uap, __unused int32_t *retval)
2408 {
2409 struct mount *mp;
2410 struct vfsstatfs *sp;
2411 int error;
2412 struct nameidata nd;
2413 vfs_context_t ctx = vfs_context_current();
2414 vnode_t vp;
2415
2416 NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1,
2417 UIO_USERSPACE, uap->path, ctx);
2418 error = namei(&nd);
2419 if (error)
2420 return (error);
2421 vp = nd.ni_vp;
2422 mp = vp->v_mount;
2423 sp = &mp->mnt_vfsstat;
2424 nameidone(&nd);
2425
2426 error = vfs_update_vfsstat(mp, ctx, VFS_USER_EVENT);
2427 if (error != 0) {
2428 vnode_put(vp);
2429 return (error);
2430 }
2431
2432 error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
2433 vnode_put(vp);
2434 return (error);
2435 }
2436
2437 /*
2438 * Get filesystem statistics.
2439 */
2440 /* ARGSUSED */
2441 int
2442 fstatfs(__unused proc_t p, struct fstatfs_args *uap, __unused int32_t *retval)
2443 {
2444 vnode_t vp;
2445 struct mount *mp;
2446 struct vfsstatfs *sp;
2447 int error;
2448
2449 AUDIT_ARG(fd, uap->fd);
2450
2451 if ( (error = file_vnode(uap->fd, &vp)) )
2452 return (error);
2453
2454 error = vnode_getwithref(vp);
2455 if (error) {
2456 file_drop(uap->fd);
2457 return (error);
2458 }
2459
2460 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2461
2462 mp = vp->v_mount;
2463 if (!mp) {
2464 error = EBADF;
2465 goto out;
2466 }
2467 sp = &mp->mnt_vfsstat;
2468 if ((error = vfs_update_vfsstat(mp,vfs_context_current(),VFS_USER_EVENT)) != 0) {
2469 goto out;
2470 }
2471
2472 error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
2473
2474 out:
2475 file_drop(uap->fd);
2476 vnode_put(vp);
2477
2478 return (error);
2479 }
2480
2481 /*
2482 * Common routine to handle copying of statfs64 data to user space
2483 */
2484 static int
2485 statfs64_common(struct mount *mp, struct vfsstatfs *sfsp, user_addr_t bufp)
2486 {
2487 int error;
2488 struct statfs64 sfs;
2489
2490 bzero(&sfs, sizeof(sfs));
2491
2492 sfs.f_bsize = sfsp->f_bsize;
2493 sfs.f_iosize = (int32_t)sfsp->f_iosize;
2494 sfs.f_blocks = sfsp->f_blocks;
2495 sfs.f_bfree = sfsp->f_bfree;
2496 sfs.f_bavail = sfsp->f_bavail;
2497 sfs.f_files = sfsp->f_files;
2498 sfs.f_ffree = sfsp->f_ffree;
2499 sfs.f_fsid = sfsp->f_fsid;
2500 sfs.f_owner = sfsp->f_owner;
2501 sfs.f_type = mp->mnt_vtable->vfc_typenum;
2502 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
2503 sfs.f_fssubtype = sfsp->f_fssubtype;
2504 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
2505 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN);
2506 } else {
2507 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSTYPENAMELEN);
2508 }
2509 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MAXPATHLEN);
2510 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MAXPATHLEN);
2511
2512 error = copyout((caddr_t)&sfs, bufp, sizeof(sfs));
2513
2514 return(error);
2515 }
2516
2517 /*
2518 * Get file system statistics in 64-bit mode
2519 */
2520 int
2521 statfs64(__unused struct proc *p, struct statfs64_args *uap, __unused int32_t *retval)
2522 {
2523 struct mount *mp;
2524 struct vfsstatfs *sp;
2525 int error;
2526 struct nameidata nd;
2527 vfs_context_t ctxp = vfs_context_current();
2528 vnode_t vp;
2529
2530 NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1,
2531 UIO_USERSPACE, uap->path, ctxp);
2532 error = namei(&nd);
2533 if (error)
2534 return (error);
2535 vp = nd.ni_vp;
2536 mp = vp->v_mount;
2537 sp = &mp->mnt_vfsstat;
2538 nameidone(&nd);
2539
2540 error = vfs_update_vfsstat(mp, ctxp, VFS_USER_EVENT);
2541 if (error != 0) {
2542 vnode_put(vp);
2543 return (error);
2544 }
2545
2546 error = statfs64_common(mp, sp, uap->buf);
2547 vnode_put(vp);
2548
2549 return (error);
2550 }
2551
2552 /*
2553 * Get file system statistics in 64-bit mode
2554 */
2555 int
2556 fstatfs64(__unused struct proc *p, struct fstatfs64_args *uap, __unused int32_t *retval)
2557 {
2558 struct vnode *vp;
2559 struct mount *mp;
2560 struct vfsstatfs *sp;
2561 int error;
2562
2563 AUDIT_ARG(fd, uap->fd);
2564
2565 if ( (error = file_vnode(uap->fd, &vp)) )
2566 return (error);
2567
2568 error = vnode_getwithref(vp);
2569 if (error) {
2570 file_drop(uap->fd);
2571 return (error);
2572 }
2573
2574 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2575
2576 mp = vp->v_mount;
2577 if (!mp) {
2578 error = EBADF;
2579 goto out;
2580 }
2581 sp = &mp->mnt_vfsstat;
2582 if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
2583 goto out;
2584 }
2585
2586 error = statfs64_common(mp, sp, uap->buf);
2587
2588 out:
2589 file_drop(uap->fd);
2590 vnode_put(vp);
2591
2592 return (error);
2593 }
2594
2595 struct getfsstat_struct {
2596 user_addr_t sfsp;
2597 user_addr_t *mp;
2598 int count;
2599 int maxcount;
2600 int flags;
2601 int error;
2602 };
2603
2604
2605 static int
2606 getfsstat_callback(mount_t mp, void * arg)
2607 {
2608
2609 struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
2610 struct vfsstatfs *sp;
2611 int error, my_size;
2612 vfs_context_t ctx = vfs_context_current();
2613
2614 if (fstp->sfsp && fstp->count < fstp->maxcount) {
2615 sp = &mp->mnt_vfsstat;
2616 /*
2617 * If MNT_NOWAIT is specified, do not refresh the
2618 * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT.
2619 */
2620 if (((fstp->flags & MNT_NOWAIT) == 0 || (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
2621 (error = vfs_update_vfsstat(mp, ctx,
2622 VFS_USER_EVENT))) {
2623 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
2624 return(VFS_RETURNED);
2625 }
2626
2627 /*
2628 * Need to handle LP64 version of struct statfs
2629 */
2630 error = munge_statfs(mp, sp, fstp->sfsp, &my_size, IS_64BIT_PROCESS(vfs_context_proc(ctx)), FALSE);
2631 if (error) {
2632 fstp->error = error;
2633 return(VFS_RETURNED_DONE);
2634 }
2635 fstp->sfsp += my_size;
2636
2637 if (fstp->mp) {
2638 #if CONFIG_MACF
2639 error = mac_mount_label_get(mp, *fstp->mp);
2640 if (error) {
2641 fstp->error = error;
2642 return(VFS_RETURNED_DONE);
2643 }
2644 #endif
2645 fstp->mp++;
2646 }
2647 }
2648 fstp->count++;
2649 return(VFS_RETURNED);
2650 }
2651
2652 /*
2653 * Get statistics on all filesystems.
2654 */
2655 int
2656 getfsstat(__unused proc_t p, struct getfsstat_args *uap, int *retval)
2657 {
2658 struct __mac_getfsstat_args muap;
2659
2660 muap.buf = uap->buf;
2661 muap.bufsize = uap->bufsize;
2662 muap.mac = USER_ADDR_NULL;
2663 muap.macsize = 0;
2664 muap.flags = uap->flags;
2665
2666 return (__mac_getfsstat(p, &muap, retval));
2667 }
2668
2669 /*
2670 * __mac_getfsstat: Get MAC-related file system statistics
2671 *
2672 * Parameters: p (ignored)
2673 * uap User argument descriptor (see below)
2674 * retval Count of file system statistics (N stats)
2675 *
2676 * Indirect: uap->bufsize Buffer size
2677 * uap->macsize MAC info size
2678 * uap->buf Buffer where information will be returned
2679 * uap->mac MAC info
2680 * uap->flags File system flags
2681 *
2682 *
2683 * Returns: 0 Success
2684 * !0 Not success
2685 *
2686 */
2687 int
2688 __mac_getfsstat(__unused proc_t p, struct __mac_getfsstat_args *uap, int *retval)
2689 {
2690 user_addr_t sfsp;
2691 user_addr_t *mp;
2692 size_t count, maxcount, bufsize, macsize;
2693 struct getfsstat_struct fst;
2694
2695 bufsize = (size_t) uap->bufsize;
2696 macsize = (size_t) uap->macsize;
2697
2698 if (IS_64BIT_PROCESS(p)) {
2699 maxcount = bufsize / sizeof(struct user64_statfs);
2700 }
2701 else {
2702 maxcount = bufsize / sizeof(struct user32_statfs);
2703 }
2704 sfsp = uap->buf;
2705 count = 0;
2706
2707 mp = NULL;
2708
2709 #if CONFIG_MACF
2710 if (uap->mac != USER_ADDR_NULL) {
2711 u_int32_t *mp0;
2712 int error;
2713 unsigned int i;
2714
2715 count = (macsize / (IS_64BIT_PROCESS(p) ? 8 : 4));
2716 if (count != maxcount)
2717 return (EINVAL);
2718
2719 /* Copy in the array */
2720 MALLOC(mp0, u_int32_t *, macsize, M_MACTEMP, M_WAITOK);
2721 if (mp0 == NULL) {
2722 return (ENOMEM);
2723 }
2724
2725 error = copyin(uap->mac, mp0, macsize);
2726 if (error) {
2727 FREE(mp0, M_MACTEMP);
2728 return (error);
2729 }
2730
2731 /* Normalize to an array of user_addr_t */
2732 MALLOC(mp, user_addr_t *, count * sizeof(user_addr_t), M_MACTEMP, M_WAITOK);
2733 if (mp == NULL) {
2734 FREE(mp0, M_MACTEMP);
2735 return (ENOMEM);
2736 }
2737
2738 for (i = 0; i < count; i++) {
2739 if (IS_64BIT_PROCESS(p))
2740 mp[i] = ((user_addr_t *)mp0)[i];
2741 else
2742 mp[i] = (user_addr_t)mp0[i];
2743 }
2744 FREE(mp0, M_MACTEMP);
2745 }
2746 #endif
2747
2748
2749 fst.sfsp = sfsp;
2750 fst.mp = mp;
2751 fst.flags = uap->flags;
2752 fst.count = 0;
2753 fst.error = 0;
2754 fst.maxcount = maxcount;
2755
2756
2757 vfs_iterate(0, getfsstat_callback, &fst);
2758
2759 if (mp)
2760 FREE(mp, M_MACTEMP);
2761
2762 if (fst.error ) {
2763 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
2764 return(fst.error);
2765 }
2766
2767 if (fst.sfsp && fst.count > fst.maxcount)
2768 *retval = fst.maxcount;
2769 else
2770 *retval = fst.count;
2771 return (0);
2772 }
2773
2774 static int
2775 getfsstat64_callback(mount_t mp, void * arg)
2776 {
2777 struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
2778 struct vfsstatfs *sp;
2779 int error;
2780
2781 if (fstp->sfsp && fstp->count < fstp->maxcount) {
2782 sp = &mp->mnt_vfsstat;
2783 /*
2784 * If MNT_NOWAIT is specified, do not refresh the fsstat
2785 * cache. MNT_WAIT overrides MNT_NOWAIT.
2786 *
2787 * We treat MNT_DWAIT as MNT_WAIT for all instances of
2788 * getfsstat, since the constants are out of the same
2789 * namespace.
2790 */
2791 if (((fstp->flags & MNT_NOWAIT) == 0 ||
2792 (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
2793 (error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT))) {
2794 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
2795 return(VFS_RETURNED);
2796 }
2797
2798 error = statfs64_common(mp, sp, fstp->sfsp);
2799 if (error) {
2800 fstp->error = error;
2801 return(VFS_RETURNED_DONE);
2802 }
2803 fstp->sfsp += sizeof(struct statfs64);
2804 }
2805 fstp->count++;
2806 return(VFS_RETURNED);
2807 }
2808
2809 /*
2810 * Get statistics on all file systems in 64 bit mode.
2811 */
2812 int
2813 getfsstat64(__unused proc_t p, struct getfsstat64_args *uap, int *retval)
2814 {
2815 user_addr_t sfsp;
2816 int count, maxcount;
2817 struct getfsstat_struct fst;
2818
2819 maxcount = uap->bufsize / sizeof(struct statfs64);
2820
2821 sfsp = uap->buf;
2822 count = 0;
2823
2824 fst.sfsp = sfsp;
2825 fst.flags = uap->flags;
2826 fst.count = 0;
2827 fst.error = 0;
2828 fst.maxcount = maxcount;
2829
2830 vfs_iterate(0, getfsstat64_callback, &fst);
2831
2832 if (fst.error ) {
2833 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
2834 return(fst.error);
2835 }
2836
2837 if (fst.sfsp && fst.count > fst.maxcount)
2838 *retval = fst.maxcount;
2839 else
2840 *retval = fst.count;
2841
2842 return (0);
2843 }
2844
2845 /*
2846 * gets the associated vnode with the file descriptor passed.
2847 * as input
2848 *
2849 * INPUT
2850 * ctx - vfs context of caller
2851 * fd - file descriptor for which vnode is required.
2852 * vpp - Pointer to pointer to vnode to be returned.
2853 *
2854 * The vnode is returned with an iocount so any vnode obtained
2855 * by this call needs a vnode_put
2856 *
2857 */
2858 static int
2859 vnode_getfromfd(vfs_context_t ctx, int fd, vnode_t *vpp)
2860 {
2861 int error;
2862 vnode_t vp;
2863 struct fileproc *fp;
2864 proc_t p = vfs_context_proc(ctx);
2865
2866 *vpp = NULLVP;
2867
2868 error = fp_getfvp(p, fd, &fp, &vp);
2869 if (error)
2870 return (error);
2871
2872 error = vnode_getwithref(vp);
2873 if (error) {
2874 (void)fp_drop(p, fd, fp, 0);
2875 return (error);
2876 }
2877
2878 (void)fp_drop(p, fd, fp, 0);
2879 *vpp = vp;
2880 return (error);
2881 }
2882
2883 /*
2884 * Wrapper function around namei to start lookup from a directory
2885 * specified by a file descriptor ni_dirfd.
2886 *
2887 * In addition to all the errors returned by namei, this call can
2888 * return ENOTDIR if the file descriptor does not refer to a directory.
2889 * and EBADF if the file descriptor is not valid.
2890 */
2891 int
2892 nameiat(struct nameidata *ndp, int dirfd)
2893 {
2894 if ((dirfd != AT_FDCWD) &&
2895 !(ndp->ni_flag & NAMEI_CONTLOOKUP) &&
2896 !(ndp->ni_cnd.cn_flags & USEDVP)) {
2897 int error = 0;
2898 char c;
2899
2900 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
2901 error = copyin(ndp->ni_dirp, &c, sizeof(char));
2902 if (error)
2903 return (error);
2904 } else {
2905 c = *((char *)(ndp->ni_dirp));
2906 }
2907
2908 if (c != '/') {
2909 vnode_t dvp_at;
2910
2911 error = vnode_getfromfd(ndp->ni_cnd.cn_context, dirfd,
2912 &dvp_at);
2913 if (error)
2914 return (error);
2915
2916 if (vnode_vtype(dvp_at) != VDIR) {
2917 vnode_put(dvp_at);
2918 return (ENOTDIR);
2919 }
2920
2921 ndp->ni_dvp = dvp_at;
2922 ndp->ni_cnd.cn_flags |= USEDVP;
2923 error = namei(ndp);
2924 ndp->ni_cnd.cn_flags &= ~USEDVP;
2925 vnode_put(dvp_at);
2926 return (error);
2927 }
2928 }
2929
2930 return (namei(ndp));
2931 }
2932
2933 /*
2934 * Change current working directory to a given file descriptor.
2935 */
2936 /* ARGSUSED */
2937 static int
2938 common_fchdir(proc_t p, struct fchdir_args *uap, int per_thread)
2939 {
2940 struct filedesc *fdp = p->p_fd;
2941 vnode_t vp;
2942 vnode_t tdp;
2943 vnode_t tvp;
2944 struct mount *mp;
2945 int error;
2946 vfs_context_t ctx = vfs_context_current();
2947
2948 AUDIT_ARG(fd, uap->fd);
2949 if (per_thread && uap->fd == -1) {
2950 /*
2951 * Switching back from per-thread to per process CWD; verify we
2952 * in fact have one before proceeding. The only success case
2953 * for this code path is to return 0 preemptively after zapping
2954 * the thread structure contents.
2955 */
2956 thread_t th = vfs_context_thread(ctx);
2957 if (th) {
2958 uthread_t uth = get_bsdthread_info(th);
2959 tvp = uth->uu_cdir;
2960 uth->uu_cdir = NULLVP;
2961 if (tvp != NULLVP) {
2962 vnode_rele(tvp);
2963 return (0);
2964 }
2965 }
2966 return (EBADF);
2967 }
2968
2969 if ( (error = file_vnode(uap->fd, &vp)) )
2970 return(error);
2971 if ( (error = vnode_getwithref(vp)) ) {
2972 file_drop(uap->fd);
2973 return(error);
2974 }
2975
2976 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
2977
2978 if (vp->v_type != VDIR) {
2979 error = ENOTDIR;
2980 goto out;
2981 }
2982
2983 #if CONFIG_MACF
2984 error = mac_vnode_check_chdir(ctx, vp);
2985 if (error)
2986 goto out;
2987 #endif
2988 error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
2989 if (error)
2990 goto out;
2991
2992 while (!error && (mp = vp->v_mountedhere) != NULL) {
2993 if (vfs_busy(mp, LK_NOWAIT)) {
2994 error = EACCES;
2995 goto out;
2996 }
2997 error = VFS_ROOT(mp, &tdp, ctx);
2998 vfs_unbusy(mp);
2999 if (error)
3000 break;
3001 vnode_put(vp);
3002 vp = tdp;
3003 }
3004 if (error)
3005 goto out;
3006 if ( (error = vnode_ref(vp)) )
3007 goto out;
3008 vnode_put(vp);
3009
3010 if (per_thread) {
3011 thread_t th = vfs_context_thread(ctx);
3012 if (th) {
3013 uthread_t uth = get_bsdthread_info(th);
3014 tvp = uth->uu_cdir;
3015 uth->uu_cdir = vp;
3016 OSBitOrAtomic(P_THCWD, &p->p_flag);
3017 } else {
3018 vnode_rele(vp);
3019 return (ENOENT);
3020 }
3021 } else {
3022 proc_fdlock(p);
3023 tvp = fdp->fd_cdir;
3024 fdp->fd_cdir = vp;
3025 proc_fdunlock(p);
3026 }
3027
3028 if (tvp)
3029 vnode_rele(tvp);
3030 file_drop(uap->fd);
3031
3032 return (0);
3033 out:
3034 vnode_put(vp);
3035 file_drop(uap->fd);
3036
3037 return(error);
3038 }
3039
3040 int
3041 fchdir(proc_t p, struct fchdir_args *uap, __unused int32_t *retval)
3042 {
3043 return common_fchdir(p, uap, 0);
3044 }
3045
3046 int
3047 __pthread_fchdir(proc_t p, struct __pthread_fchdir_args *uap, __unused int32_t *retval)
3048 {
3049 return common_fchdir(p, (void *)uap, 1);
3050 }
3051
3052 /*
3053 * Change current working directory (".").
3054 *
3055 * Returns: 0 Success
3056 * change_dir:ENOTDIR
3057 * change_dir:???
3058 * vnode_ref:ENOENT No such file or directory
3059 */
3060 /* ARGSUSED */
3061 static int
3062 common_chdir(proc_t p, struct chdir_args *uap, int per_thread)
3063 {
3064 struct filedesc *fdp = p->p_fd;
3065 int error;
3066 struct nameidata nd;
3067 vnode_t tvp;
3068 vfs_context_t ctx = vfs_context_current();
3069
3070 NDINIT(&nd, LOOKUP, OP_CHDIR, FOLLOW | AUDITVNPATH1,
3071 UIO_USERSPACE, uap->path, ctx);
3072 error = change_dir(&nd, ctx);
3073 if (error)
3074 return (error);
3075 if ( (error = vnode_ref(nd.ni_vp)) ) {
3076 vnode_put(nd.ni_vp);
3077 return (error);
3078 }
3079 /*
3080 * drop the iocount we picked up in change_dir
3081 */
3082 vnode_put(nd.ni_vp);
3083
3084 if (per_thread) {
3085 thread_t th = vfs_context_thread(ctx);
3086 if (th) {
3087 uthread_t uth = get_bsdthread_info(th);
3088 tvp = uth->uu_cdir;
3089 uth->uu_cdir = nd.ni_vp;
3090 OSBitOrAtomic(P_THCWD, &p->p_flag);
3091 } else {
3092 vnode_rele(nd.ni_vp);
3093 return (ENOENT);
3094 }
3095 } else {
3096 proc_fdlock(p);
3097 tvp = fdp->fd_cdir;
3098 fdp->fd_cdir = nd.ni_vp;
3099 proc_fdunlock(p);
3100 }
3101
3102 if (tvp)
3103 vnode_rele(tvp);
3104
3105 return (0);
3106 }
3107
3108
3109 /*
3110 * chdir
3111 *
3112 * Change current working directory (".") for the entire process
3113 *
3114 * Parameters: p Process requesting the call
3115 * uap User argument descriptor (see below)
3116 * retval (ignored)
3117 *
3118 * Indirect parameters: uap->path Directory path
3119 *
3120 * Returns: 0 Success
3121 * common_chdir: ENOTDIR
3122 * common_chdir: ENOENT No such file or directory
3123 * common_chdir: ???
3124 *
3125 */
3126 int
3127 chdir(proc_t p, struct chdir_args *uap, __unused int32_t *retval)
3128 {
3129 return common_chdir(p, (void *)uap, 0);
3130 }
3131
3132 /*
3133 * __pthread_chdir
3134 *
3135 * Change current working directory (".") for a single thread
3136 *
3137 * Parameters: p Process requesting the call
3138 * uap User argument descriptor (see below)
3139 * retval (ignored)
3140 *
3141 * Indirect parameters: uap->path Directory path
3142 *
3143 * Returns: 0 Success
3144 * common_chdir: ENOTDIR
3145 * common_chdir: ENOENT No such file or directory
3146 * common_chdir: ???
3147 *
3148 */
3149 int
3150 __pthread_chdir(proc_t p, struct __pthread_chdir_args *uap, __unused int32_t *retval)
3151 {
3152 return common_chdir(p, (void *)uap, 1);
3153 }
3154
3155
3156 /*
3157 * Change notion of root (``/'') directory.
3158 */
3159 /* ARGSUSED */
3160 int
3161 chroot(proc_t p, struct chroot_args *uap, __unused int32_t *retval)
3162 {
3163 struct filedesc *fdp = p->p_fd;
3164 int error;
3165 struct nameidata nd;
3166 vnode_t tvp;
3167 vfs_context_t ctx = vfs_context_current();
3168
3169 if ((error = suser(kauth_cred_get(), &p->p_acflag)))
3170 return (error);
3171
3172 NDINIT(&nd, LOOKUP, OP_CHROOT, FOLLOW | AUDITVNPATH1,
3173 UIO_USERSPACE, uap->path, ctx);
3174 error = change_dir(&nd, ctx);
3175 if (error)
3176 return (error);
3177
3178 #if CONFIG_MACF
3179 error = mac_vnode_check_chroot(ctx, nd.ni_vp,
3180 &nd.ni_cnd);
3181 if (error) {
3182 vnode_put(nd.ni_vp);
3183 return (error);
3184 }
3185 #endif
3186
3187 if ( (error = vnode_ref(nd.ni_vp)) ) {
3188 vnode_put(nd.ni_vp);
3189 return (error);
3190 }
3191 vnode_put(nd.ni_vp);
3192
3193 proc_fdlock(p);
3194 tvp = fdp->fd_rdir;
3195 fdp->fd_rdir = nd.ni_vp;
3196 fdp->fd_flags |= FD_CHROOT;
3197 proc_fdunlock(p);
3198
3199 if (tvp != NULL)
3200 vnode_rele(tvp);
3201
3202 return (0);
3203 }
3204
3205 /*
3206 * Common routine for chroot and chdir.
3207 *
3208 * Returns: 0 Success
3209 * ENOTDIR Not a directory
3210 * namei:??? [anything namei can return]
3211 * vnode_authorize:??? [anything vnode_authorize can return]
3212 */
3213 static int
3214 change_dir(struct nameidata *ndp, vfs_context_t ctx)
3215 {
3216 vnode_t vp;
3217 int error;
3218
3219 if ((error = namei(ndp)))
3220 return (error);
3221 nameidone(ndp);
3222 vp = ndp->ni_vp;
3223
3224 if (vp->v_type != VDIR) {
3225 vnode_put(vp);
3226 return (ENOTDIR);
3227 }
3228
3229 #if CONFIG_MACF
3230 error = mac_vnode_check_chdir(ctx, vp);
3231 if (error) {
3232 vnode_put(vp);
3233 return (error);
3234 }
3235 #endif
3236
3237 error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
3238 if (error) {
3239 vnode_put(vp);
3240 return (error);
3241 }
3242
3243 return (error);
3244 }
3245
3246 /*
3247 * Free the vnode data (for directories) associated with the file glob.
3248 */
3249 struct fd_vn_data *
3250 fg_vn_data_alloc(void)
3251 {
3252 struct fd_vn_data *fvdata;
3253
3254 /* Allocate per fd vnode data */
3255 MALLOC(fvdata, struct fd_vn_data *, (sizeof(struct fd_vn_data)),
3256 M_FD_VN_DATA, M_WAITOK | M_ZERO);
3257 lck_mtx_init(&fvdata->fv_lock, fd_vn_lck_grp, fd_vn_lck_attr);
3258 return fvdata;
3259 }
3260
3261 /*
3262 * Free the vnode data (for directories) associated with the file glob.
3263 */
3264 void
3265 fg_vn_data_free(void *fgvndata)
3266 {
3267 struct fd_vn_data *fvdata = (struct fd_vn_data *)fgvndata;
3268
3269 if (fvdata->fv_buf)
3270 FREE(fvdata->fv_buf, M_FD_DIRBUF);
3271 lck_mtx_destroy(&fvdata->fv_lock, fd_vn_lck_grp);
3272 FREE(fvdata, M_FD_VN_DATA);
3273 }
3274
3275 /*
3276 * Check permissions, allocate an open file structure,
3277 * and call the device open routine if any.
3278 *
3279 * Returns: 0 Success
3280 * EINVAL
3281 * EINTR
3282 * falloc:ENFILE
3283 * falloc:EMFILE
3284 * falloc:ENOMEM
3285 * vn_open_auth:???
3286 * dupfdopen:???
3287 * VNOP_ADVLOCK:???
3288 * vnode_setsize:???
3289 *
3290 * XXX Need to implement uid, gid
3291 */
3292 int
3293 open1(vfs_context_t ctx, struct nameidata *ndp, int uflags,
3294 struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra,
3295 int32_t *retval)
3296 {
3297 proc_t p = vfs_context_proc(ctx);
3298 uthread_t uu = get_bsdthread_info(vfs_context_thread(ctx));
3299 struct fileproc *fp;
3300 vnode_t vp;
3301 int flags, oflags;
3302 int type, indx, error;
3303 struct flock lf;
3304 struct vfs_context context;
3305
3306 oflags = uflags;
3307
3308 if ((oflags & O_ACCMODE) == O_ACCMODE)
3309 return(EINVAL);
3310
3311 flags = FFLAGS(uflags);
3312 CLR(flags, FENCRYPTED);
3313 CLR(flags, FUNENCRYPTED);
3314
3315 AUDIT_ARG(fflags, oflags);
3316 AUDIT_ARG(mode, vap->va_mode);
3317
3318 if ((error = falloc_withalloc(p,
3319 &fp, &indx, ctx, fp_zalloc, cra)) != 0) {
3320 return (error);
3321 }
3322 uu->uu_dupfd = -indx - 1;
3323
3324 if ((error = vn_open_auth(ndp, &flags, vap))) {
3325 if ((error == ENODEV || error == ENXIO) && (uu->uu_dupfd >= 0)){ /* XXX from fdopen */
3326 if ((error = dupfdopen(p->p_fd, indx, uu->uu_dupfd, flags, error)) == 0) {
3327 fp_drop(p, indx, NULL, 0);
3328 *retval = indx;
3329 return (0);
3330 }
3331 }
3332 if (error == ERESTART)
3333 error = EINTR;
3334 fp_free(p, indx, fp);
3335 return (error);
3336 }
3337 uu->uu_dupfd = 0;
3338 vp = ndp->ni_vp;
3339
3340 fp->f_fglob->fg_flag = flags & (FMASK | O_EVTONLY | FENCRYPTED | FUNENCRYPTED);
3341 fp->f_fglob->fg_ops = &vnops;
3342 fp->f_fglob->fg_data = (caddr_t)vp;
3343
3344 if (flags & (O_EXLOCK | O_SHLOCK)) {
3345 lf.l_whence = SEEK_SET;
3346 lf.l_start = 0;
3347 lf.l_len = 0;
3348 if (flags & O_EXLOCK)
3349 lf.l_type = F_WRLCK;
3350 else
3351 lf.l_type = F_RDLCK;
3352 type = F_FLOCK;
3353 if ((flags & FNONBLOCK) == 0)
3354 type |= F_WAIT;
3355 #if CONFIG_MACF
3356 error = mac_file_check_lock(vfs_context_ucred(ctx), fp->f_fglob,
3357 F_SETLK, &lf);
3358 if (error)
3359 goto bad;
3360 #endif
3361 if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx, NULL)))
3362 goto bad;
3363 fp->f_fglob->fg_flag |= FHASLOCK;
3364 }
3365
3366 /* try to truncate by setting the size attribute */
3367 if ((flags & O_TRUNC) && ((error = vnode_setsize(vp, (off_t)0, 0, ctx)) != 0))
3368 goto bad;
3369
3370 /*
3371 * For directories we hold some additional information in the fd.
3372 */
3373 if (vnode_vtype(vp) == VDIR) {
3374 fp->f_fglob->fg_vn_data = fg_vn_data_alloc();
3375 } else {
3376 fp->f_fglob->fg_vn_data = NULL;
3377 }
3378
3379 vnode_put(vp);
3380
3381 /*
3382 * The first terminal open (without a O_NOCTTY) by a session leader
3383 * results in it being set as the controlling terminal.
3384 */
3385 if (vnode_istty(vp) && !(p->p_flag & P_CONTROLT) &&
3386 !(flags & O_NOCTTY)) {
3387 int tmp = 0;
3388
3389 (void)(*fp->f_fglob->fg_ops->fo_ioctl)(fp, (int)TIOCSCTTY,
3390 (caddr_t)&tmp, ctx);
3391 }
3392
3393 proc_fdlock(p);
3394 if (flags & O_CLOEXEC)
3395 *fdflags(p, indx) |= UF_EXCLOSE;
3396 if (flags & O_CLOFORK)
3397 *fdflags(p, indx) |= UF_FORKCLOSE;
3398 procfdtbl_releasefd(p, indx, NULL);
3399 fp_drop(p, indx, fp, 1);
3400 proc_fdunlock(p);
3401
3402 *retval = indx;
3403
3404 return (0);
3405 bad:
3406 context = *vfs_context_current();
3407 context.vc_ucred = fp->f_fglob->fg_cred;
3408
3409 if ((fp->f_fglob->fg_flag & FHASLOCK) &&
3410 (FILEGLOB_DTYPE(fp->f_fglob) == DTYPE_VNODE)) {
3411 lf.l_whence = SEEK_SET;
3412 lf.l_start = 0;
3413 lf.l_len = 0;
3414 lf.l_type = F_UNLCK;
3415
3416 (void)VNOP_ADVLOCK(
3417 vp, (caddr_t)fp->f_fglob, F_UNLCK, &lf, F_FLOCK, ctx, NULL);
3418 }
3419
3420 vn_close(vp, fp->f_fglob->fg_flag, &context);
3421 vnode_put(vp);
3422 fp_free(p, indx, fp);
3423
3424 return (error);
3425 }
3426
3427 /*
3428 * While most of the *at syscall handlers can call nameiat() which
3429 * is a wrapper around namei, the use of namei and initialisation
3430 * of nameidata are far removed and in different functions - namei
3431 * gets called in vn_open_auth for open1. So we'll just do here what
3432 * nameiat() does.
3433 */
3434 static int
3435 open1at(vfs_context_t ctx, struct nameidata *ndp, int uflags,
3436 struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra, int32_t *retval,
3437 int dirfd)
3438 {
3439 if ((dirfd != AT_FDCWD) && !(ndp->ni_cnd.cn_flags & USEDVP)) {
3440 int error;
3441 char c;
3442
3443 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
3444 error = copyin(ndp->ni_dirp, &c, sizeof(char));
3445 if (error)
3446 return (error);
3447 } else {
3448 c = *((char *)(ndp->ni_dirp));
3449 }
3450
3451 if (c != '/') {
3452 vnode_t dvp_at;
3453
3454 error = vnode_getfromfd(ndp->ni_cnd.cn_context, dirfd,
3455 &dvp_at);
3456 if (error)
3457 return (error);
3458
3459 if (vnode_vtype(dvp_at) != VDIR) {
3460 vnode_put(dvp_at);
3461 return (ENOTDIR);
3462 }
3463
3464 ndp->ni_dvp = dvp_at;
3465 ndp->ni_cnd.cn_flags |= USEDVP;
3466 error = open1(ctx, ndp, uflags, vap, fp_zalloc, cra,
3467 retval);
3468 vnode_put(dvp_at);
3469 return (error);
3470 }
3471 }
3472
3473 return (open1(ctx, ndp, uflags, vap, fp_zalloc, cra, retval));
3474 }
3475
3476 /*
3477 * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)).
3478 *
3479 * Parameters: p Process requesting the open
3480 * uap User argument descriptor (see below)
3481 * retval Pointer to an area to receive the
3482 * return calue from the system call
3483 *
3484 * Indirect: uap->path Path to open (same as 'open')
3485 * uap->flags Flags to open (same as 'open'
3486 * uap->uid UID to set, if creating
3487 * uap->gid GID to set, if creating
3488 * uap->mode File mode, if creating (same as 'open')
3489 * uap->xsecurity ACL to set, if creating
3490 *
3491 * Returns: 0 Success
3492 * !0 errno value
3493 *
3494 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
3495 *
3496 * XXX: We should enummerate the possible errno values here, and where
3497 * in the code they originated.
3498 */
3499 int
3500 open_extended(proc_t p, struct open_extended_args *uap, int32_t *retval)
3501 {
3502 struct filedesc *fdp = p->p_fd;
3503 int ciferror;
3504 kauth_filesec_t xsecdst;
3505 struct vnode_attr va;
3506 struct nameidata nd;
3507 int cmode;
3508
3509 AUDIT_ARG(owner, uap->uid, uap->gid);
3510
3511 xsecdst = NULL;
3512 if ((uap->xsecurity != USER_ADDR_NULL) &&
3513 ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
3514 return ciferror;
3515
3516 VATTR_INIT(&va);
3517 cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3518 VATTR_SET(&va, va_mode, cmode);
3519 if (uap->uid != KAUTH_UID_NONE)
3520 VATTR_SET(&va, va_uid, uap->uid);
3521 if (uap->gid != KAUTH_GID_NONE)
3522 VATTR_SET(&va, va_gid, uap->gid);
3523 if (xsecdst != NULL)
3524 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
3525
3526 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
3527 uap->path, vfs_context_current());
3528
3529 ciferror = open1(vfs_context_current(), &nd, uap->flags, &va,
3530 fileproc_alloc_init, NULL, retval);
3531 if (xsecdst != NULL)
3532 kauth_filesec_free(xsecdst);
3533
3534 return ciferror;
3535 }
3536
3537 /*
3538 * Go through the data-protected atomically controlled open (2)
3539 *
3540 * int open_dprotected_np(user_addr_t path, int flags, int class, int dpflags, int mode)
3541 */
3542 int open_dprotected_np (__unused proc_t p, struct open_dprotected_np_args *uap, int32_t *retval) {
3543 int flags = uap->flags;
3544 int class = uap->class;
3545 int dpflags = uap->dpflags;
3546
3547 /*
3548 * Follow the same path as normal open(2)
3549 * Look up the item if it exists, and acquire the vnode.
3550 */
3551 struct filedesc *fdp = p->p_fd;
3552 struct vnode_attr va;
3553 struct nameidata nd;
3554 int cmode;
3555 int error;
3556
3557 VATTR_INIT(&va);
3558 /* Mask off all but regular access permissions */
3559 cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3560 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
3561
3562 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
3563 uap->path, vfs_context_current());
3564
3565 /*
3566 * Initialize the extra fields in vnode_attr to pass down our
3567 * extra fields.
3568 * 1. target cprotect class.
3569 * 2. set a flag to mark it as requiring open-raw-encrypted semantics.
3570 */
3571 if (flags & O_CREAT) {
3572 /* lower level kernel code validates that the class is valid before applying it. */
3573 if (class != PROTECTION_CLASS_DEFAULT) {
3574 /*
3575 * PROTECTION_CLASS_DEFAULT implies that we make the class for this
3576 * file behave the same as open (2)
3577 */
3578 VATTR_SET(&va, va_dataprotect_class, class);
3579 }
3580 }
3581
3582 if (dpflags & (O_DP_GETRAWENCRYPTED|O_DP_GETRAWUNENCRYPTED)) {
3583 if ( flags & (O_RDWR | O_WRONLY)) {
3584 /* Not allowed to write raw encrypted bytes */
3585 return EINVAL;
3586 }
3587 if (uap->dpflags & O_DP_GETRAWENCRYPTED) {
3588 VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED);
3589 }
3590 if (uap->dpflags & O_DP_GETRAWUNENCRYPTED) {
3591 VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWUNENCRYPTED);
3592 }
3593 }
3594
3595 error = open1(vfs_context_current(), &nd, uap->flags, &va,
3596 fileproc_alloc_init, NULL, retval);
3597
3598 return error;
3599 }
3600
3601 static int
3602 openat_internal(vfs_context_t ctx, user_addr_t path, int flags, int mode,
3603 int fd, enum uio_seg segflg, int *retval)
3604 {
3605 struct filedesc *fdp = (vfs_context_proc(ctx))->p_fd;
3606 struct vnode_attr va;
3607 struct nameidata nd;
3608 int cmode;
3609
3610 VATTR_INIT(&va);
3611 /* Mask off all but regular access permissions */
3612 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3613 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
3614
3615 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1,
3616 segflg, path, ctx);
3617
3618 return (open1at(ctx, &nd, flags, &va, fileproc_alloc_init, NULL,
3619 retval, fd));
3620 }
3621
3622 int
3623 open(proc_t p, struct open_args *uap, int32_t *retval)
3624 {
3625 __pthread_testcancel(1);
3626 return(open_nocancel(p, (struct open_nocancel_args *)uap, retval));
3627 }
3628
3629 int
3630 open_nocancel(__unused proc_t p, struct open_nocancel_args *uap,
3631 int32_t *retval)
3632 {
3633 return (openat_internal(vfs_context_current(), uap->path, uap->flags,
3634 uap->mode, AT_FDCWD, UIO_USERSPACE, retval));
3635 }
3636
3637 int
3638 openat_nocancel(__unused proc_t p, struct openat_nocancel_args *uap,
3639 int32_t *retval)
3640 {
3641 return (openat_internal(vfs_context_current(), uap->path, uap->flags,
3642 uap->mode, uap->fd, UIO_USERSPACE, retval));
3643 }
3644
3645 int
3646 openat(proc_t p, struct openat_args *uap, int32_t *retval)
3647 {
3648 __pthread_testcancel(1);
3649 return(openat_nocancel(p, (struct openat_nocancel_args *)uap, retval));
3650 }
3651
3652 /*
3653 * openbyid_np: open a file given a file system id and a file system object id
3654 * the hfs file system object id is an fsobj_id_t {uint32, uint32}
3655 * file systems that don't support object ids it is a node id (uint64_t).
3656 *
3657 * Parameters: p Process requesting the open
3658 * uap User argument descriptor (see below)
3659 * retval Pointer to an area to receive the
3660 * return calue from the system call
3661 *
3662 * Indirect: uap->path Path to open (same as 'open')
3663 *
3664 * uap->fsid id of target file system
3665 * uap->objid id of target file system object
3666 * uap->flags Flags to open (same as 'open')
3667 *
3668 * Returns: 0 Success
3669 * !0 errno value
3670 *
3671 *
3672 * XXX: We should enummerate the possible errno values here, and where
3673 * in the code they originated.
3674 */
3675 int
3676 openbyid_np(__unused proc_t p, struct openbyid_np_args *uap, int *retval)
3677 {
3678 fsid_t fsid;
3679 uint64_t objid;
3680 int error;
3681 char *buf = NULL;
3682 int buflen = MAXPATHLEN;
3683 int pathlen = 0;
3684 vfs_context_t ctx = vfs_context_current();
3685
3686 if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
3687 return (error);
3688 }
3689
3690 /*uap->obj is an fsobj_id_t defined as struct {uint32_t, uint32_t} */
3691 if ((error = copyin(uap->objid, (caddr_t)&objid, sizeof(uint64_t)))) {
3692 return (error);
3693 }
3694
3695 AUDIT_ARG(value32, fsid.val[0]);
3696 AUDIT_ARG(value64, objid);
3697
3698 /*resolve path from fsis, objid*/
3699 do {
3700 MALLOC(buf, char *, buflen + 1, M_TEMP, M_WAITOK);
3701 if (buf == NULL) {
3702 return (ENOMEM);
3703 }
3704
3705 error = fsgetpath_internal(
3706 ctx, fsid.val[0], objid,
3707 buflen, buf, &pathlen);
3708
3709 if (error) {
3710 FREE(buf, M_TEMP);
3711 buf = NULL;
3712 }
3713 } while (error == ENOSPC && (buflen += MAXPATHLEN));
3714
3715 if (error) {
3716 return error;
3717 }
3718
3719 buf[pathlen] = 0;
3720
3721 error = openat_internal(
3722 ctx, (user_addr_t)buf, uap->oflags, 0, AT_FDCWD, UIO_SYSSPACE, retval);
3723
3724 FREE(buf, M_TEMP);
3725
3726 return error;
3727 }
3728
3729
3730 /*
3731 * Create a special file.
3732 */
3733 static int mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap);
3734
3735 int
3736 mknod(proc_t p, struct mknod_args *uap, __unused int32_t *retval)
3737 {
3738 struct vnode_attr va;
3739 vfs_context_t ctx = vfs_context_current();
3740 int error;
3741 struct nameidata nd;
3742 vnode_t vp, dvp;
3743
3744 VATTR_INIT(&va);
3745 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
3746 VATTR_SET(&va, va_rdev, uap->dev);
3747
3748 /* If it's a mknod() of a FIFO, call mkfifo1() instead */
3749 if ((uap->mode & S_IFMT) == S_IFIFO)
3750 return(mkfifo1(ctx, uap->path, &va));
3751
3752 AUDIT_ARG(mode, uap->mode);
3753 AUDIT_ARG(value32, uap->dev);
3754
3755 if ((error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
3756 return (error);
3757 NDINIT(&nd, CREATE, OP_MKNOD, LOCKPARENT | AUDITVNPATH1,
3758 UIO_USERSPACE, uap->path, ctx);
3759 error = namei(&nd);
3760 if (error)
3761 return (error);
3762 dvp = nd.ni_dvp;
3763 vp = nd.ni_vp;
3764
3765 if (vp != NULL) {
3766 error = EEXIST;
3767 goto out;
3768 }
3769
3770 switch (uap->mode & S_IFMT) {
3771 case S_IFCHR:
3772 VATTR_SET(&va, va_type, VCHR);
3773 break;
3774 case S_IFBLK:
3775 VATTR_SET(&va, va_type, VBLK);
3776 break;
3777 default:
3778 error = EINVAL;
3779 goto out;
3780 }
3781
3782 #if CONFIG_MACF
3783 error = mac_vnode_check_create(ctx,
3784 nd.ni_dvp, &nd.ni_cnd, &va);
3785 if (error)
3786 goto out;
3787 #endif
3788
3789 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
3790 goto out;
3791
3792 if ((error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx)) != 0)
3793 goto out;
3794
3795 if (vp) {
3796 int update_flags = 0;
3797
3798 // Make sure the name & parent pointers are hooked up
3799 if (vp->v_name == NULL)
3800 update_flags |= VNODE_UPDATE_NAME;
3801 if (vp->v_parent == NULLVP)
3802 update_flags |= VNODE_UPDATE_PARENT;
3803
3804 if (update_flags)
3805 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
3806
3807 #if CONFIG_FSE
3808 add_fsevent(FSE_CREATE_FILE, ctx,
3809 FSE_ARG_VNODE, vp,
3810 FSE_ARG_DONE);
3811 #endif
3812 }
3813
3814 out:
3815 /*
3816 * nameidone has to happen before we vnode_put(dvp)
3817 * since it may need to release the fs_nodelock on the dvp
3818 */
3819 nameidone(&nd);
3820
3821 if (vp)
3822 vnode_put(vp);
3823 vnode_put(dvp);
3824
3825 return (error);
3826 }
3827
3828 /*
3829 * Create a named pipe.
3830 *
3831 * Returns: 0 Success
3832 * EEXIST
3833 * namei:???
3834 * vnode_authorize:???
3835 * vn_create:???
3836 */
3837 static int
3838 mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap)
3839 {
3840 vnode_t vp, dvp;
3841 int error;
3842 struct nameidata nd;
3843
3844 NDINIT(&nd, CREATE, OP_MKFIFO, LOCKPARENT | AUDITVNPATH1,
3845 UIO_USERSPACE, upath, ctx);
3846 error = namei(&nd);
3847 if (error)
3848 return (error);
3849 dvp = nd.ni_dvp;
3850 vp = nd.ni_vp;
3851
3852 /* check that this is a new file and authorize addition */
3853 if (vp != NULL) {
3854 error = EEXIST;
3855 goto out;
3856 }
3857 VATTR_SET(vap, va_type, VFIFO);
3858
3859 if ((error = vn_authorize_create(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0)
3860 goto out;
3861
3862 error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx);
3863 out:
3864 /*
3865 * nameidone has to happen before we vnode_put(dvp)
3866 * since it may need to release the fs_nodelock on the dvp
3867 */
3868 nameidone(&nd);
3869
3870 if (vp)
3871 vnode_put(vp);
3872 vnode_put(dvp);
3873
3874 return error;
3875 }
3876
3877
3878 /*
3879 * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)).
3880 *
3881 * Parameters: p Process requesting the open
3882 * uap User argument descriptor (see below)
3883 * retval (Ignored)
3884 *
3885 * Indirect: uap->path Path to fifo (same as 'mkfifo')
3886 * uap->uid UID to set
3887 * uap->gid GID to set
3888 * uap->mode File mode to set (same as 'mkfifo')
3889 * uap->xsecurity ACL to set, if creating
3890 *
3891 * Returns: 0 Success
3892 * !0 errno value
3893 *
3894 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
3895 *
3896 * XXX: We should enummerate the possible errno values here, and where
3897 * in the code they originated.
3898 */
3899 int
3900 mkfifo_extended(proc_t p, struct mkfifo_extended_args *uap, __unused int32_t *retval)
3901 {
3902 int ciferror;
3903 kauth_filesec_t xsecdst;
3904 struct vnode_attr va;
3905
3906 AUDIT_ARG(owner, uap->uid, uap->gid);
3907
3908 xsecdst = KAUTH_FILESEC_NONE;
3909 if (uap->xsecurity != USER_ADDR_NULL) {
3910 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
3911 return ciferror;
3912 }
3913
3914 VATTR_INIT(&va);
3915 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
3916 if (uap->uid != KAUTH_UID_NONE)
3917 VATTR_SET(&va, va_uid, uap->uid);
3918 if (uap->gid != KAUTH_GID_NONE)
3919 VATTR_SET(&va, va_gid, uap->gid);
3920 if (xsecdst != KAUTH_FILESEC_NONE)
3921 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
3922
3923 ciferror = mkfifo1(vfs_context_current(), uap->path, &va);
3924
3925 if (xsecdst != KAUTH_FILESEC_NONE)
3926 kauth_filesec_free(xsecdst);
3927 return ciferror;
3928 }
3929
3930 /* ARGSUSED */
3931 int
3932 mkfifo(proc_t p, struct mkfifo_args *uap, __unused int32_t *retval)
3933 {
3934 struct vnode_attr va;
3935
3936 VATTR_INIT(&va);
3937 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
3938
3939 return(mkfifo1(vfs_context_current(), uap->path, &va));
3940 }
3941
3942
3943 static char *
3944 my_strrchr(char *p, int ch)
3945 {
3946 char *save;
3947
3948 for (save = NULL;; ++p) {
3949 if (*p == ch)
3950 save = p;
3951 if (!*p)
3952 return(save);
3953 }
3954 /* NOTREACHED */
3955 }
3956
3957 extern int safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path);
3958
3959 int
3960 safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path)
3961 {
3962 int ret, len = _len;
3963
3964 *truncated_path = 0;
3965 ret = vn_getpath(dvp, path, &len);
3966 if (ret == 0 && len < (MAXPATHLEN - 1)) {
3967 if (leafname) {
3968 path[len-1] = '/';
3969 len += strlcpy(&path[len], leafname, MAXPATHLEN-len) + 1;
3970 if (len > MAXPATHLEN) {
3971 char *ptr;
3972
3973 // the string got truncated!
3974 *truncated_path = 1;
3975 ptr = my_strrchr(path, '/');
3976 if (ptr) {
3977 *ptr = '\0'; // chop off the string at the last directory component
3978 }
3979 len = strlen(path) + 1;
3980 }
3981 }
3982 } else if (ret == 0) {
3983 *truncated_path = 1;
3984 } else if (ret != 0) {
3985 struct vnode *mydvp=dvp;
3986
3987 if (ret != ENOSPC) {
3988 printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n",
3989 dvp, dvp->v_name ? dvp->v_name : "no-name", ret);
3990 }
3991 *truncated_path = 1;
3992
3993 do {
3994 if (mydvp->v_parent != NULL) {
3995 mydvp = mydvp->v_parent;
3996 } else if (mydvp->v_mount) {
3997 strlcpy(path, mydvp->v_mount->mnt_vfsstat.f_mntonname, _len);
3998 break;
3999 } else {
4000 // no parent and no mount point? only thing is to punt and say "/" changed
4001 strlcpy(path, "/", _len);
4002 len = 2;
4003 mydvp = NULL;
4004 }
4005
4006 if (mydvp == NULL) {
4007 break;
4008 }
4009
4010 len = _len;
4011 ret = vn_getpath(mydvp, path, &len);
4012 } while (ret == ENOSPC);
4013 }
4014
4015 return len;
4016 }
4017
4018
4019 /*
4020 * Make a hard file link.
4021 *
4022 * Returns: 0 Success
4023 * EPERM
4024 * EEXIST
4025 * EXDEV
4026 * namei:???
4027 * vnode_authorize:???
4028 * VNOP_LINK:???
4029 */
4030 /* ARGSUSED */
4031 static int
4032 linkat_internal(vfs_context_t ctx, int fd1, user_addr_t path, int fd2,
4033 user_addr_t link, int flag, enum uio_seg segflg)
4034 {
4035 vnode_t vp, dvp, lvp;
4036 struct nameidata nd;
4037 int follow;
4038 int error;
4039 #if CONFIG_FSE
4040 fse_info finfo;
4041 #endif
4042 int need_event, has_listeners;
4043 char *target_path = NULL;
4044 int truncated=0;
4045
4046 vp = dvp = lvp = NULLVP;
4047
4048 /* look up the object we are linking to */
4049 follow = (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW;
4050 NDINIT(&nd, LOOKUP, OP_LOOKUP, AUDITVNPATH1 | follow,
4051 segflg, path, ctx);
4052
4053 error = nameiat(&nd, fd1);
4054 if (error)
4055 return (error);
4056 vp = nd.ni_vp;
4057
4058 nameidone(&nd);
4059
4060 /*
4061 * Normally, linking to directories is not supported.
4062 * However, some file systems may have limited support.
4063 */
4064 if (vp->v_type == VDIR) {
4065 if (!(vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSDIRLINKS)) {
4066 error = EPERM; /* POSIX */
4067 goto out;
4068 }
4069 /* Linking to a directory requires ownership. */
4070 if (!kauth_cred_issuser(vfs_context_ucred(ctx))) {
4071 struct vnode_attr dva;
4072
4073 VATTR_INIT(&dva);
4074 VATTR_WANTED(&dva, va_uid);
4075 if (vnode_getattr(vp, &dva, ctx) != 0 ||
4076 !VATTR_IS_SUPPORTED(&dva, va_uid) ||
4077 (dva.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)))) {
4078 error = EACCES;
4079 goto out;
4080 }
4081 }
4082 }
4083
4084 /* lookup the target node */
4085 #if CONFIG_TRIGGERS
4086 nd.ni_op = OP_LINK;
4087 #endif
4088 nd.ni_cnd.cn_nameiop = CREATE;
4089 nd.ni_cnd.cn_flags = LOCKPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK;
4090 nd.ni_dirp = link;
4091 error = nameiat(&nd, fd2);
4092 if (error != 0)
4093 goto out;
4094 dvp = nd.ni_dvp;
4095 lvp = nd.ni_vp;
4096
4097 #if CONFIG_MACF
4098 if ((error = mac_vnode_check_link(ctx, dvp, vp, &nd.ni_cnd)) != 0)
4099 goto out2;
4100 #endif
4101
4102 /* or to anything that kauth doesn't want us to (eg. immutable items) */
4103 if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_LINKTARGET, ctx)) != 0)
4104 goto out2;
4105
4106 /* target node must not exist */
4107 if (lvp != NULLVP) {
4108 error = EEXIST;
4109 goto out2;
4110 }
4111 /* cannot link across mountpoints */
4112 if (vnode_mount(vp) != vnode_mount(dvp)) {
4113 error = EXDEV;
4114 goto out2;
4115 }
4116
4117 /* authorize creation of the target note */
4118 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
4119 goto out2;
4120
4121 /* and finally make the link */
4122 error = VNOP_LINK(vp, dvp, &nd.ni_cnd, ctx);
4123 if (error)
4124 goto out2;
4125
4126 #if CONFIG_MACF
4127 (void)mac_vnode_notify_link(ctx, vp, dvp, &nd.ni_cnd);
4128 #endif
4129
4130 #if CONFIG_FSE
4131 need_event = need_fsevent(FSE_CREATE_FILE, dvp);
4132 #else
4133 need_event = 0;
4134 #endif
4135 has_listeners = kauth_authorize_fileop_has_listeners();
4136
4137 if (need_event || has_listeners) {
4138 char *link_to_path = NULL;
4139 int len, link_name_len;
4140
4141 /* build the path to the new link file */
4142 GET_PATH(target_path);
4143 if (target_path == NULL) {
4144 error = ENOMEM;
4145 goto out2;
4146 }
4147
4148 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, target_path, MAXPATHLEN, &truncated);
4149
4150 if (has_listeners) {
4151 /* build the path to file we are linking to */
4152 GET_PATH(link_to_path);
4153 if (link_to_path == NULL) {
4154 error = ENOMEM;
4155 goto out2;
4156 }
4157
4158 link_name_len = MAXPATHLEN;
4159 if (vn_getpath(vp, link_to_path, &link_name_len) == 0) {
4160 /*
4161 * Call out to allow 3rd party notification of rename.
4162 * Ignore result of kauth_authorize_fileop call.
4163 */
4164 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_LINK,
4165 (uintptr_t)link_to_path,
4166 (uintptr_t)target_path);
4167 }
4168 if (link_to_path != NULL) {
4169 RELEASE_PATH(link_to_path);
4170 }
4171 }
4172 #if CONFIG_FSE
4173 if (need_event) {
4174 /* construct fsevent */
4175 if (get_fse_info(vp, &finfo, ctx) == 0) {
4176 if (truncated) {
4177 finfo.mode |= FSE_TRUNCATED_PATH;
4178 }
4179
4180 // build the path to the destination of the link
4181 add_fsevent(FSE_CREATE_FILE, ctx,
4182 FSE_ARG_STRING, len, target_path,
4183 FSE_ARG_FINFO, &finfo,
4184 FSE_ARG_DONE);
4185 }
4186 if (vp->v_parent) {
4187 add_fsevent(FSE_STAT_CHANGED, ctx,
4188 FSE_ARG_VNODE, vp->v_parent,
4189 FSE_ARG_DONE);
4190 }
4191 }
4192 #endif
4193 }
4194 out2:
4195 /*
4196 * nameidone has to happen before we vnode_put(dvp)
4197 * since it may need to release the fs_nodelock on the dvp
4198 */
4199 nameidone(&nd);
4200 if (target_path != NULL) {
4201 RELEASE_PATH(target_path);
4202 }
4203 out:
4204 if (lvp)
4205 vnode_put(lvp);
4206 if (dvp)
4207 vnode_put(dvp);
4208 vnode_put(vp);
4209 return (error);
4210 }
4211
4212 int
4213 link(__unused proc_t p, struct link_args *uap, __unused int32_t *retval)
4214 {
4215 return (linkat_internal(vfs_context_current(), AT_FDCWD, uap->path,
4216 AT_FDCWD, uap->link, AT_SYMLINK_FOLLOW, UIO_USERSPACE));
4217 }
4218
4219 int
4220 linkat(__unused proc_t p, struct linkat_args *uap, __unused int32_t *retval)
4221 {
4222 if (uap->flag & ~AT_SYMLINK_FOLLOW)
4223 return (EINVAL);
4224
4225 return (linkat_internal(vfs_context_current(), uap->fd1, uap->path,
4226 uap->fd2, uap->link, uap->flag, UIO_USERSPACE));
4227 }
4228
4229 /*
4230 * Make a symbolic link.
4231 *
4232 * We could add support for ACLs here too...
4233 */
4234 /* ARGSUSED */
4235 static int
4236 symlinkat_internal(vfs_context_t ctx, user_addr_t path_data, int fd,
4237 user_addr_t link, enum uio_seg segflg)
4238 {
4239 struct vnode_attr va;
4240 char *path;
4241 int error;
4242 struct nameidata nd;
4243 vnode_t vp, dvp;
4244 uint32_t dfflags; // Directory file flags
4245 size_t dummy=0;
4246 proc_t p;
4247
4248 error = 0;
4249 if (UIO_SEG_IS_USER_SPACE(segflg)) {
4250 MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
4251 error = copyinstr(path_data, path, MAXPATHLEN, &dummy);
4252 } else {
4253 path = (char *)path_data;
4254 }
4255 if (error)
4256 goto out;
4257 AUDIT_ARG(text, path); /* This is the link string */
4258
4259 NDINIT(&nd, CREATE, OP_SYMLINK, LOCKPARENT | AUDITVNPATH1,
4260 segflg, link, ctx);
4261
4262 error = nameiat(&nd, fd);
4263 if (error)
4264 goto out;
4265 dvp = nd.ni_dvp;
4266 vp = nd.ni_vp;
4267
4268 p = vfs_context_proc(ctx);
4269 VATTR_INIT(&va);
4270 VATTR_SET(&va, va_type, VLNK);
4271 VATTR_SET(&va, va_mode, ACCESSPERMS & ~p->p_fd->fd_cmask);
4272
4273 /*
4274 * Handle inheritance of restricted flag
4275 */
4276 error = vnode_flags(dvp, &dfflags, ctx);
4277 if (error)
4278 goto skipit;
4279 if (dfflags & SF_RESTRICTED)
4280 VATTR_SET(&va, va_flags, SF_RESTRICTED);
4281
4282 #if CONFIG_MACF
4283 error = mac_vnode_check_create(ctx,
4284 dvp, &nd.ni_cnd, &va);
4285 #endif
4286 if (error != 0) {
4287 goto skipit;
4288 }
4289
4290 if (vp != NULL) {
4291 error = EEXIST;
4292 goto skipit;
4293 }
4294
4295 /* authorize */
4296 if (error == 0)
4297 error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
4298 /* get default ownership, etc. */
4299 if (error == 0)
4300 error = vnode_authattr_new(dvp, &va, 0, ctx);
4301 if (error == 0)
4302 error = VNOP_SYMLINK(dvp, &vp, &nd.ni_cnd, &va, path, ctx);
4303
4304 #if CONFIG_MACF
4305 if (error == 0 && vp)
4306 error = vnode_label(vnode_mount(vp), dvp, vp, &nd.ni_cnd, VNODE_LABEL_CREATE, ctx);
4307 #endif
4308
4309 /* do fallback attribute handling */
4310 if (error == 0 && vp)
4311 error = vnode_setattr_fallback(vp, &va, ctx);
4312
4313 if (error == 0) {
4314 int update_flags = 0;
4315
4316 /*check if a new vnode was created, else try to get one*/
4317 if (vp == NULL) {
4318 nd.ni_cnd.cn_nameiop = LOOKUP;
4319 #if CONFIG_TRIGGERS
4320 nd.ni_op = OP_LOOKUP;
4321 #endif
4322 nd.ni_cnd.cn_flags = 0;
4323 error = nameiat(&nd, fd);
4324 vp = nd.ni_vp;
4325
4326 if (vp == NULL)
4327 goto skipit;
4328 }
4329
4330 #if 0 /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
4331 /* call out to allow 3rd party notification of rename.
4332 * Ignore result of kauth_authorize_fileop call.
4333 */
4334 if (kauth_authorize_fileop_has_listeners() &&
4335 namei(&nd) == 0) {
4336 char *new_link_path = NULL;
4337 int len;
4338
4339 /* build the path to the new link file */
4340 new_link_path = get_pathbuff();
4341 len = MAXPATHLEN;
4342 vn_getpath(dvp, new_link_path, &len);
4343 if ((len + 1 + nd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) {
4344 new_link_path[len - 1] = '/';
4345 strlcpy(&new_link_path[len], nd.ni_cnd.cn_nameptr, MAXPATHLEN-len);
4346 }
4347
4348 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_SYMLINK,
4349 (uintptr_t)path, (uintptr_t)new_link_path);
4350 if (new_link_path != NULL)
4351 release_pathbuff(new_link_path);
4352 }
4353 #endif
4354 // Make sure the name & parent pointers are hooked up
4355 if (vp->v_name == NULL)
4356 update_flags |= VNODE_UPDATE_NAME;
4357 if (vp->v_parent == NULLVP)
4358 update_flags |= VNODE_UPDATE_PARENT;
4359
4360 if (update_flags)
4361 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
4362
4363 #if CONFIG_FSE
4364 add_fsevent(FSE_CREATE_FILE, ctx,
4365 FSE_ARG_VNODE, vp,
4366 FSE_ARG_DONE);
4367 #endif
4368 }
4369
4370 skipit:
4371 /*
4372 * nameidone has to happen before we vnode_put(dvp)
4373 * since it may need to release the fs_nodelock on the dvp
4374 */
4375 nameidone(&nd);
4376
4377 if (vp)
4378 vnode_put(vp);
4379 vnode_put(dvp);
4380 out:
4381 if (path && (path != (char *)path_data))
4382 FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
4383
4384 return (error);
4385 }
4386
4387 int
4388 symlink(__unused proc_t p, struct symlink_args *uap, __unused int32_t *retval)
4389 {
4390 return (symlinkat_internal(vfs_context_current(), uap->path, AT_FDCWD,
4391 uap->link, UIO_USERSPACE));
4392 }
4393
4394 int
4395 symlinkat(__unused proc_t p, struct symlinkat_args *uap,
4396 __unused int32_t *retval)
4397 {
4398 return (symlinkat_internal(vfs_context_current(), uap->path1, uap->fd,
4399 uap->path2, UIO_USERSPACE));
4400 }
4401
4402 /*
4403 * Delete a whiteout from the filesystem.
4404 * No longer supported.
4405 */
4406 int
4407 undelete(__unused proc_t p, __unused struct undelete_args *uap, __unused int32_t *retval)
4408 {
4409 return (ENOTSUP);
4410 }
4411
4412 /*
4413 * Delete a name from the filesystem.
4414 */
4415 /* ARGSUSED */
4416 static int
4417 unlinkat_internal(vfs_context_t ctx, int fd, vnode_t start_dvp,
4418 user_addr_t path_arg, enum uio_seg segflg, int unlink_flags)
4419 {
4420 struct nameidata nd;
4421 vnode_t vp, dvp;
4422 int error;
4423 struct componentname *cnp;
4424 char *path = NULL;
4425 int len=0;
4426 #if CONFIG_FSE
4427 fse_info finfo;
4428 struct vnode_attr va;
4429 #endif
4430 int flags;
4431 int need_event;
4432 int has_listeners;
4433 int truncated_path;
4434 int batched;
4435 struct vnode_attr *vap;
4436 int do_retry;
4437 int retry_count = 0;
4438 int cn_flags;
4439
4440 cn_flags = LOCKPARENT;
4441 if (!(unlink_flags & VNODE_REMOVE_NO_AUDIT_PATH))
4442 cn_flags |= AUDITVNPATH1;
4443 /* If a starting dvp is passed, it trumps any fd passed. */
4444 if (start_dvp)
4445 cn_flags |= USEDVP;
4446
4447 #if NAMEDRSRCFORK
4448 /* unlink or delete is allowed on rsrc forks and named streams */
4449 cn_flags |= CN_ALLOWRSRCFORK;
4450 #endif
4451
4452 retry:
4453 do_retry = 0;
4454 flags = 0;
4455 need_event = 0;
4456 has_listeners = 0;
4457 truncated_path = 0;
4458 vap = NULL;
4459
4460 NDINIT(&nd, DELETE, OP_UNLINK, cn_flags, segflg, path_arg, ctx);
4461
4462 nd.ni_dvp = start_dvp;
4463 nd.ni_flag |= NAMEI_COMPOUNDREMOVE;
4464 cnp = &nd.ni_cnd;
4465
4466 lookup_continue:
4467 error = nameiat(&nd, fd);
4468 if (error)
4469 return (error);
4470
4471 dvp = nd.ni_dvp;
4472 vp = nd.ni_vp;
4473
4474
4475 /* With Carbon delete semantics, busy files cannot be deleted */
4476 if (unlink_flags & VNODE_REMOVE_NODELETEBUSY) {
4477 flags |= VNODE_REMOVE_NODELETEBUSY;
4478 }
4479
4480 /* Skip any potential upcalls if told to. */
4481 if (unlink_flags & VNODE_REMOVE_SKIP_NAMESPACE_EVENT) {
4482 flags |= VNODE_REMOVE_SKIP_NAMESPACE_EVENT;
4483 }
4484
4485 if (vp) {
4486 batched = vnode_compound_remove_available(vp);
4487 /*
4488 * The root of a mounted filesystem cannot be deleted.
4489 */
4490 if (vp->v_flag & VROOT) {
4491 error = EBUSY;
4492 }
4493
4494 if (!batched) {
4495 error = vn_authorize_unlink(dvp, vp, cnp, ctx, NULL);
4496 if (error) {
4497 if (error == ENOENT) {
4498 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
4499 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
4500 do_retry = 1;
4501 retry_count++;
4502 }
4503 }
4504 goto out;
4505 }
4506 }
4507 } else {
4508 batched = 1;
4509
4510 if (!vnode_compound_remove_available(dvp)) {
4511 panic("No vp, but no compound remove?");
4512 }
4513 }
4514
4515 #if CONFIG_FSE
4516 need_event = need_fsevent(FSE_DELETE, dvp);
4517 if (need_event) {
4518 if (!batched) {
4519 if ((vp->v_flag & VISHARDLINK) == 0) {
4520 /* XXX need to get these data in batched VNOP */
4521 get_fse_info(vp, &finfo, ctx);
4522 }
4523 } else {
4524 error = vfs_get_notify_attributes(&va);
4525 if (error) {
4526 goto out;
4527 }
4528
4529 vap = &va;
4530 }
4531 }
4532 #endif
4533 has_listeners = kauth_authorize_fileop_has_listeners();
4534 if (need_event || has_listeners) {
4535 if (path == NULL) {
4536 GET_PATH(path);
4537 if (path == NULL) {
4538 error = ENOMEM;
4539 goto out;
4540 }
4541 }
4542 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated_path);
4543 }
4544
4545 #if NAMEDRSRCFORK
4546 if (nd.ni_cnd.cn_flags & CN_WANTSRSRCFORK)
4547 error = vnode_removenamedstream(dvp, vp, XATTR_RESOURCEFORK_NAME, 0, ctx);
4548 else
4549 #endif
4550 {
4551 error = vn_remove(dvp, &nd.ni_vp, &nd, flags, vap, ctx);
4552 vp = nd.ni_vp;
4553 if (error == EKEEPLOOKING) {
4554 if (!batched) {
4555 panic("EKEEPLOOKING, but not a filesystem that supports compound VNOPs?");
4556 }
4557
4558 if ((nd.ni_flag & NAMEI_CONTLOOKUP) == 0) {
4559 panic("EKEEPLOOKING, but continue flag not set?");
4560 }
4561
4562 if (vnode_isdir(vp)) {
4563 error = EISDIR;
4564 goto out;
4565 }
4566 goto lookup_continue;
4567 } else if (error == ENOENT && batched) {
4568 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
4569 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
4570 /*
4571 * For compound VNOPs, the authorization callback may
4572 * return ENOENT in case of racing hardlink lookups
4573 * hitting the name cache, redrive the lookup.
4574 */
4575 do_retry = 1;
4576 retry_count += 1;
4577 goto out;
4578 }
4579 }
4580 }
4581
4582 /*
4583 * Call out to allow 3rd party notification of delete.
4584 * Ignore result of kauth_authorize_fileop call.
4585 */
4586 if (!error) {
4587 if (has_listeners) {
4588 kauth_authorize_fileop(vfs_context_ucred(ctx),
4589 KAUTH_FILEOP_DELETE,
4590 (uintptr_t)vp,
4591 (uintptr_t)path);
4592 }
4593
4594 if (vp->v_flag & VISHARDLINK) {
4595 //
4596 // if a hardlink gets deleted we want to blow away the
4597 // v_parent link because the path that got us to this
4598 // instance of the link is no longer valid. this will
4599 // force the next call to get the path to ask the file
4600 // system instead of just following the v_parent link.
4601 //
4602 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
4603 }
4604
4605 #if CONFIG_FSE
4606 if (need_event) {
4607 if (vp->v_flag & VISHARDLINK) {
4608 get_fse_info(vp, &finfo, ctx);
4609 } else if (vap) {
4610 vnode_get_fse_info_from_vap(vp, &finfo, vap);
4611 }
4612 if (truncated_path) {
4613 finfo.mode |= FSE_TRUNCATED_PATH;
4614 }
4615 add_fsevent(FSE_DELETE, ctx,
4616 FSE_ARG_STRING, len, path,
4617 FSE_ARG_FINFO, &finfo,
4618 FSE_ARG_DONE);
4619 }
4620 #endif
4621 }
4622
4623 out:
4624 if (path != NULL)
4625 RELEASE_PATH(path);
4626
4627 #if NAMEDRSRCFORK
4628 /* recycle the deleted rsrc fork vnode to force a reclaim, which
4629 * will cause its shadow file to go away if necessary.
4630 */
4631 if (vp && (vnode_isnamedstream(vp)) &&
4632 (vp->v_parent != NULLVP) &&
4633 vnode_isshadow(vp)) {
4634 vnode_recycle(vp);
4635 }
4636 #endif
4637 /*
4638 * nameidone has to happen before we vnode_put(dvp)
4639 * since it may need to release the fs_nodelock on the dvp
4640 */
4641 nameidone(&nd);
4642 vnode_put(dvp);
4643 if (vp) {
4644 vnode_put(vp);
4645 }
4646
4647 if (do_retry) {
4648 goto retry;
4649 }
4650
4651 return (error);
4652 }
4653
4654 int
4655 unlink1(vfs_context_t ctx, vnode_t start_dvp, user_addr_t path_arg,
4656 enum uio_seg segflg, int unlink_flags)
4657 {
4658 return (unlinkat_internal(ctx, AT_FDCWD, start_dvp, path_arg, segflg,
4659 unlink_flags));
4660 }
4661
4662 /*
4663 * Delete a name from the filesystem using Carbon semantics.
4664 */
4665 int
4666 delete(__unused proc_t p, struct delete_args *uap, __unused int32_t *retval)
4667 {
4668 return (unlinkat_internal(vfs_context_current(), AT_FDCWD, NULLVP,
4669 uap->path, UIO_USERSPACE, VNODE_REMOVE_NODELETEBUSY));
4670 }
4671
4672 /*
4673 * Delete a name from the filesystem using POSIX semantics.
4674 */
4675 int
4676 unlink(__unused proc_t p, struct unlink_args *uap, __unused int32_t *retval)
4677 {
4678 return (unlinkat_internal(vfs_context_current(), AT_FDCWD, NULLVP,
4679 uap->path, UIO_USERSPACE, 0));
4680 }
4681
4682 int
4683 unlinkat(__unused proc_t p, struct unlinkat_args *uap, __unused int32_t *retval)
4684 {
4685 if (uap->flag & ~AT_REMOVEDIR)
4686 return (EINVAL);
4687
4688 if (uap->flag & AT_REMOVEDIR)
4689 return (rmdirat_internal(vfs_context_current(), uap->fd,
4690 uap->path, UIO_USERSPACE));
4691 else
4692 return (unlinkat_internal(vfs_context_current(), uap->fd,
4693 NULLVP, uap->path, UIO_USERSPACE, 0));
4694 }
4695
4696 /*
4697 * Reposition read/write file offset.
4698 */
4699 int
4700 lseek(proc_t p, struct lseek_args *uap, off_t *retval)
4701 {
4702 struct fileproc *fp;
4703 vnode_t vp;
4704 struct vfs_context *ctx;
4705 off_t offset = uap->offset, file_size;
4706 int error;
4707
4708 if ( (error = fp_getfvp(p,uap->fd, &fp, &vp)) ) {
4709 if (error == ENOTSUP)
4710 return (ESPIPE);
4711 return (error);
4712 }
4713 if (vnode_isfifo(vp)) {
4714 file_drop(uap->fd);
4715 return(ESPIPE);
4716 }
4717
4718
4719 ctx = vfs_context_current();
4720 #if CONFIG_MACF
4721 if (uap->whence == L_INCR && uap->offset == 0)
4722 error = mac_file_check_get_offset(vfs_context_ucred(ctx),
4723 fp->f_fglob);
4724 else
4725 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
4726 fp->f_fglob);
4727 if (error) {
4728 file_drop(uap->fd);
4729 return (error);
4730 }
4731 #endif
4732 if ( (error = vnode_getwithref(vp)) ) {
4733 file_drop(uap->fd);
4734 return(error);
4735 }
4736
4737 switch (uap->whence) {
4738 case L_INCR:
4739 offset += fp->f_fglob->fg_offset;
4740 break;
4741 case L_XTND:
4742 if ((error = vnode_size(vp, &file_size, ctx)) != 0)
4743 break;
4744 offset += file_size;
4745 break;
4746 case L_SET:
4747 break;
4748 default:
4749 error = EINVAL;
4750 }
4751 if (error == 0) {
4752 if (uap->offset > 0 && offset < 0) {
4753 /* Incremented/relative move past max size */
4754 error = EOVERFLOW;
4755 } else {
4756 /*
4757 * Allow negative offsets on character devices, per
4758 * POSIX 1003.1-2001. Most likely for writing disk
4759 * labels.
4760 */
4761 if (offset < 0 && vp->v_type != VCHR) {
4762 /* Decremented/relative move before start */
4763 error = EINVAL;
4764 } else {
4765 /* Success */
4766 fp->f_fglob->fg_offset = offset;
4767 *retval = fp->f_fglob->fg_offset;
4768 }
4769 }
4770 }
4771
4772 /*
4773 * An lseek can affect whether data is "available to read." Use
4774 * hint of NOTE_NONE so no EVFILT_VNODE events fire
4775 */
4776 post_event_if_success(vp, error, NOTE_NONE);
4777 (void)vnode_put(vp);
4778 file_drop(uap->fd);
4779 return (error);
4780 }
4781
4782
4783 /*
4784 * Check access permissions.
4785 *
4786 * Returns: 0 Success
4787 * vnode_authorize:???
4788 */
4789 static int
4790 access1(vnode_t vp, vnode_t dvp, int uflags, vfs_context_t ctx)
4791 {
4792 kauth_action_t action;
4793 int error;
4794
4795 /*
4796 * If just the regular access bits, convert them to something
4797 * that vnode_authorize will understand.
4798 */
4799 if (!(uflags & _ACCESS_EXTENDED_MASK)) {
4800 action = 0;
4801 if (uflags & R_OK)
4802 action |= KAUTH_VNODE_READ_DATA; /* aka KAUTH_VNODE_LIST_DIRECTORY */
4803 if (uflags & W_OK) {
4804 if (vnode_isdir(vp)) {
4805 action |= KAUTH_VNODE_ADD_FILE |
4806 KAUTH_VNODE_ADD_SUBDIRECTORY;
4807 /* might want delete rights here too */
4808 } else {
4809 action |= KAUTH_VNODE_WRITE_DATA;
4810 }
4811 }
4812 if (uflags & X_OK) {
4813 if (vnode_isdir(vp)) {
4814 action |= KAUTH_VNODE_SEARCH;
4815 } else {
4816 action |= KAUTH_VNODE_EXECUTE;
4817 }
4818 }
4819 } else {
4820 /* take advantage of definition of uflags */
4821 action = uflags >> 8;
4822 }
4823
4824 #if CONFIG_MACF
4825 error = mac_vnode_check_access(ctx, vp, uflags);
4826 if (error)
4827 return (error);
4828 #endif /* MAC */
4829
4830 /* action == 0 means only check for existence */
4831 if (action != 0) {
4832 error = vnode_authorize(vp, dvp, action | KAUTH_VNODE_ACCESS, ctx);
4833 } else {
4834 error = 0;
4835 }
4836
4837 return(error);
4838 }
4839
4840
4841
4842 /*
4843 * access_extended: Check access permissions in bulk.
4844 *
4845 * Description: uap->entries Pointer to an array of accessx
4846 * descriptor structs, plus one or
4847 * more NULL terminated strings (see
4848 * "Notes" section below).
4849 * uap->size Size of the area pointed to by
4850 * uap->entries.
4851 * uap->results Pointer to the results array.
4852 *
4853 * Returns: 0 Success
4854 * ENOMEM Insufficient memory
4855 * EINVAL Invalid arguments
4856 * namei:EFAULT Bad address
4857 * namei:ENAMETOOLONG Filename too long
4858 * namei:ENOENT No such file or directory
4859 * namei:ELOOP Too many levels of symbolic links
4860 * namei:EBADF Bad file descriptor
4861 * namei:ENOTDIR Not a directory
4862 * namei:???
4863 * access1:
4864 *
4865 * Implicit returns:
4866 * uap->results Array contents modified
4867 *
4868 * Notes: The uap->entries are structured as an arbitrary length array
4869 * of accessx descriptors, followed by one or more NULL terminated
4870 * strings
4871 *
4872 * struct accessx_descriptor[0]
4873 * ...
4874 * struct accessx_descriptor[n]
4875 * char name_data[0];
4876 *
4877 * We determine the entry count by walking the buffer containing
4878 * the uap->entries argument descriptor. For each descriptor we
4879 * see, the valid values for the offset ad_name_offset will be
4880 * in the byte range:
4881 *
4882 * [ uap->entries + sizeof(struct accessx_descriptor) ]
4883 * to
4884 * [ uap->entries + uap->size - 2 ]
4885 *
4886 * since we must have at least one string, and the string must
4887 * be at least one character plus the NULL terminator in length.
4888 *
4889 * XXX: Need to support the check-as uid argument
4890 */
4891 int
4892 access_extended(__unused proc_t p, struct access_extended_args *uap, __unused int32_t *retval)
4893 {
4894 struct accessx_descriptor *input = NULL;
4895 errno_t *result = NULL;
4896 errno_t error = 0;
4897 int wantdelete = 0;
4898 unsigned int desc_max, desc_actual, i, j;
4899 struct vfs_context context;
4900 struct nameidata nd;
4901 int niopts;
4902 vnode_t vp = NULL;
4903 vnode_t dvp = NULL;
4904 #define ACCESSX_MAX_DESCR_ON_STACK 10
4905 struct accessx_descriptor stack_input[ACCESSX_MAX_DESCR_ON_STACK];
4906
4907 context.vc_ucred = NULL;
4908
4909 /*
4910 * Validate parameters; if valid, copy the descriptor array and string
4911 * arguments into local memory. Before proceeding, the following
4912 * conditions must have been met:
4913 *
4914 * o The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
4915 * o There must be sufficient room in the request for at least one
4916 * descriptor and a one yte NUL terminated string.
4917 * o The allocation of local storage must not fail.
4918 */
4919 if (uap->size > ACCESSX_MAX_TABLESIZE)
4920 return(ENOMEM);
4921 if (uap->size < (sizeof(struct accessx_descriptor) + 2))
4922 return(EINVAL);
4923 if (uap->size <= sizeof (stack_input)) {
4924 input = stack_input;
4925 } else {
4926 MALLOC(input, struct accessx_descriptor *, uap->size, M_TEMP, M_WAITOK);
4927 if (input == NULL) {
4928 error = ENOMEM;
4929 goto out;
4930 }
4931 }
4932 error = copyin(uap->entries, input, uap->size);
4933 if (error)
4934 goto out;
4935
4936 AUDIT_ARG(opaque, input, uap->size);
4937
4938 /*
4939 * Force NUL termination of the copyin buffer to avoid nami() running
4940 * off the end. If the caller passes us bogus data, they may get a
4941 * bogus result.
4942 */
4943 ((char *)input)[uap->size - 1] = 0;
4944
4945 /*
4946 * Access is defined as checking against the process' real identity,
4947 * even if operations are checking the effective identity. This
4948 * requires that we use a local vfs context.
4949 */
4950 context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
4951 context.vc_thread = current_thread();
4952
4953 /*
4954 * Find out how many entries we have, so we can allocate the result
4955 * array by walking the list and adjusting the count downward by the
4956 * earliest string offset we see.
4957 */
4958 desc_max = (uap->size - 2) / sizeof(struct accessx_descriptor);
4959 desc_actual = desc_max;
4960 for (i = 0; i < desc_actual; i++) {
4961 /*
4962 * Take the offset to the name string for this entry and
4963 * convert to an input array index, which would be one off
4964 * the end of the array if this entry was the lowest-addressed
4965 * name string.
4966 */
4967 j = input[i].ad_name_offset / sizeof(struct accessx_descriptor);
4968
4969 /*
4970 * An offset greater than the max allowable offset is an error.
4971 * It is also an error for any valid entry to point
4972 * to a location prior to the end of the current entry, if
4973 * it's not a reference to the string of the previous entry.
4974 */
4975 if (j > desc_max || (j != 0 && j <= i)) {
4976 error = EINVAL;
4977 goto out;
4978 }
4979
4980 /*
4981 * An offset of 0 means use the previous descriptor's offset;
4982 * this is used to chain multiple requests for the same file
4983 * to avoid multiple lookups.
4984 */
4985 if (j == 0) {
4986 /* This is not valid for the first entry */
4987 if (i == 0) {
4988 error = EINVAL;
4989 goto out;
4990 }
4991 continue;
4992 }
4993
4994 /*
4995 * If the offset of the string for this descriptor is before
4996 * what we believe is the current actual last descriptor,
4997 * then we need to adjust our estimate downward; this permits
4998 * the string table following the last descriptor to be out
4999 * of order relative to the descriptor list.
5000 */
5001 if (j < desc_actual)
5002 desc_actual = j;
5003 }
5004
5005 /*
5006 * We limit the actual number of descriptors we are willing to process
5007 * to a hard maximum of ACCESSX_MAX_DESCRIPTORS. If the number being
5008 * requested does not exceed this limit,
5009 */
5010 if (desc_actual > ACCESSX_MAX_DESCRIPTORS) {
5011 error = ENOMEM;
5012 goto out;
5013 }
5014 MALLOC(result, errno_t *, desc_actual * sizeof(errno_t), M_TEMP, M_WAITOK);
5015 if (result == NULL) {
5016 error = ENOMEM;
5017 goto out;
5018 }
5019
5020 /*
5021 * Do the work by iterating over the descriptor entries we know to
5022 * at least appear to contain valid data.
5023 */
5024 error = 0;
5025 for (i = 0; i < desc_actual; i++) {
5026 /*
5027 * If the ad_name_offset is 0, then we use the previous
5028 * results to make the check; otherwise, we are looking up
5029 * a new file name.
5030 */
5031 if (input[i].ad_name_offset != 0) {
5032 /* discard old vnodes */
5033 if (vp) {
5034 vnode_put(vp);
5035 vp = NULL;
5036 }
5037 if (dvp) {
5038 vnode_put(dvp);
5039 dvp = NULL;
5040 }
5041
5042 /*
5043 * Scan forward in the descriptor list to see if we
5044 * need the parent vnode. We will need it if we are
5045 * deleting, since we must have rights to remove
5046 * entries in the parent directory, as well as the
5047 * rights to delete the object itself.
5048 */
5049 wantdelete = input[i].ad_flags & _DELETE_OK;
5050 for (j = i + 1; (j < desc_actual) && (input[j].ad_name_offset == 0); j++)
5051 if (input[j].ad_flags & _DELETE_OK)
5052 wantdelete = 1;
5053
5054 niopts = FOLLOW | AUDITVNPATH1;
5055
5056 /* need parent for vnode_authorize for deletion test */
5057 if (wantdelete)
5058 niopts |= WANTPARENT;
5059
5060 /* do the lookup */
5061 NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, UIO_SYSSPACE,
5062 CAST_USER_ADDR_T(((const char *)input) + input[i].ad_name_offset),
5063 &context);
5064 error = namei(&nd);
5065 if (!error) {
5066 vp = nd.ni_vp;
5067 if (wantdelete)
5068 dvp = nd.ni_dvp;
5069 }
5070 nameidone(&nd);
5071 }
5072
5073 /*
5074 * Handle lookup errors.
5075 */
5076 switch(error) {
5077 case ENOENT:
5078 case EACCES:
5079 case EPERM:
5080 case ENOTDIR:
5081 result[i] = error;
5082 break;
5083 case 0:
5084 /* run this access check */
5085 result[i] = access1(vp, dvp, input[i].ad_flags, &context);
5086 break;
5087 default:
5088 /* fatal lookup error */
5089
5090 goto out;
5091 }
5092 }
5093
5094 AUDIT_ARG(data, result, sizeof(errno_t), desc_actual);
5095
5096 /* copy out results */
5097 error = copyout(result, uap->results, desc_actual * sizeof(errno_t));
5098
5099 out:
5100 if (input && input != stack_input)
5101 FREE(input, M_TEMP);
5102 if (result)
5103 FREE(result, M_TEMP);
5104 if (vp)
5105 vnode_put(vp);
5106 if (dvp)
5107 vnode_put(dvp);
5108 if (IS_VALID_CRED(context.vc_ucred))
5109 kauth_cred_unref(&context.vc_ucred);
5110 return(error);
5111 }
5112
5113
5114 /*
5115 * Returns: 0 Success
5116 * namei:EFAULT Bad address
5117 * namei:ENAMETOOLONG Filename too long
5118 * namei:ENOENT No such file or directory
5119 * namei:ELOOP Too many levels of symbolic links
5120 * namei:EBADF Bad file descriptor
5121 * namei:ENOTDIR Not a directory
5122 * namei:???
5123 * access1:
5124 */
5125 static int
5126 faccessat_internal(vfs_context_t ctx, int fd, user_addr_t path, int amode,
5127 int flag, enum uio_seg segflg)
5128 {
5129 int error;
5130 struct nameidata nd;
5131 int niopts;
5132 struct vfs_context context;
5133 #if NAMEDRSRCFORK
5134 int is_namedstream = 0;
5135 #endif
5136
5137 /*
5138 * Unless the AT_EACCESS option is used, Access is defined as checking
5139 * against the process' real identity, even if operations are checking
5140 * the effective identity. So we need to tweak the credential
5141 * in the context for that case.
5142 */
5143 if (!(flag & AT_EACCESS))
5144 context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
5145 else
5146 context.vc_ucred = ctx->vc_ucred;
5147 context.vc_thread = ctx->vc_thread;
5148
5149
5150 niopts = FOLLOW | AUDITVNPATH1;
5151 /* need parent for vnode_authorize for deletion test */
5152 if (amode & _DELETE_OK)
5153 niopts |= WANTPARENT;
5154 NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, segflg,
5155 path, &context);
5156
5157 #if NAMEDRSRCFORK
5158 /* access(F_OK) calls are allowed for resource forks. */
5159 if (amode == F_OK)
5160 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
5161 #endif
5162 error = nameiat(&nd, fd);
5163 if (error)
5164 goto out;
5165
5166 #if NAMEDRSRCFORK
5167 /* Grab reference on the shadow stream file vnode to
5168 * force an inactive on release which will mark it
5169 * for recycle.
5170 */
5171 if (vnode_isnamedstream(nd.ni_vp) &&
5172 (nd.ni_vp->v_parent != NULLVP) &&
5173 vnode_isshadow(nd.ni_vp)) {
5174 is_namedstream = 1;
5175 vnode_ref(nd.ni_vp);
5176 }
5177 #endif
5178
5179 error = access1(nd.ni_vp, nd.ni_dvp, amode, &context);
5180
5181 #if NAMEDRSRCFORK
5182 if (is_namedstream) {
5183 vnode_rele(nd.ni_vp);
5184 }
5185 #endif
5186
5187 vnode_put(nd.ni_vp);
5188 if (amode & _DELETE_OK)
5189 vnode_put(nd.ni_dvp);
5190 nameidone(&nd);
5191
5192 out:
5193 if (!(flag & AT_EACCESS))
5194 kauth_cred_unref(&context.vc_ucred);
5195 return (error);
5196 }
5197
5198 int
5199 access(__unused proc_t p, struct access_args *uap, __unused int32_t *retval)
5200 {
5201 return (faccessat_internal(vfs_context_current(), AT_FDCWD,
5202 uap->path, uap->flags, 0, UIO_USERSPACE));
5203 }
5204
5205 int
5206 faccessat(__unused proc_t p, struct faccessat_args *uap,
5207 __unused int32_t *retval)
5208 {
5209 if (uap->flag & ~AT_EACCESS)
5210 return (EINVAL);
5211
5212 return (faccessat_internal(vfs_context_current(), uap->fd,
5213 uap->path, uap->amode, uap->flag, UIO_USERSPACE));
5214 }
5215
5216 /*
5217 * Returns: 0 Success
5218 * EFAULT
5219 * copyout:EFAULT
5220 * namei:???
5221 * vn_stat:???
5222 */
5223 static int
5224 fstatat_internal(vfs_context_t ctx, user_addr_t path, user_addr_t ub,
5225 user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64,
5226 enum uio_seg segflg, int fd, int flag)
5227 {
5228 struct nameidata nd;
5229 int follow;
5230 union {
5231 struct stat sb;
5232 struct stat64 sb64;
5233 } source;
5234 union {
5235 struct user64_stat user64_sb;
5236 struct user32_stat user32_sb;
5237 struct user64_stat64 user64_sb64;
5238 struct user32_stat64 user32_sb64;
5239 } dest;
5240 caddr_t sbp;
5241 int error, my_size;
5242 kauth_filesec_t fsec;
5243 size_t xsecurity_bufsize;
5244 void * statptr;
5245
5246 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
5247 NDINIT(&nd, LOOKUP, OP_GETATTR, follow | AUDITVNPATH1,
5248 segflg, path, ctx);
5249
5250 #if NAMEDRSRCFORK
5251 int is_namedstream = 0;
5252 /* stat calls are allowed for resource forks. */
5253 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
5254 #endif
5255 error = nameiat(&nd, fd);
5256 if (error)
5257 return (error);
5258 fsec = KAUTH_FILESEC_NONE;
5259
5260 statptr = (void *)&source;
5261
5262 #if NAMEDRSRCFORK
5263 /* Grab reference on the shadow stream file vnode to
5264 * force an inactive on release which will mark it
5265 * for recycle.
5266 */
5267 if (vnode_isnamedstream(nd.ni_vp) &&
5268 (nd.ni_vp->v_parent != NULLVP) &&
5269 vnode_isshadow(nd.ni_vp)) {
5270 is_namedstream = 1;
5271 vnode_ref(nd.ni_vp);
5272 }
5273 #endif
5274
5275 error = vn_stat(nd.ni_vp, statptr, (xsecurity != USER_ADDR_NULL ? &fsec : NULL), isstat64, ctx);
5276
5277 #if NAMEDRSRCFORK
5278 if (is_namedstream) {
5279 vnode_rele(nd.ni_vp);
5280 }
5281 #endif
5282 vnode_put(nd.ni_vp);
5283 nameidone(&nd);
5284
5285 if (error)
5286 return (error);
5287 /* Zap spare fields */
5288 if (isstat64 != 0) {
5289 source.sb64.st_lspare = 0;
5290 source.sb64.st_qspare[0] = 0LL;
5291 source.sb64.st_qspare[1] = 0LL;
5292 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
5293 munge_user64_stat64(&source.sb64, &dest.user64_sb64);
5294 my_size = sizeof(dest.user64_sb64);
5295 sbp = (caddr_t)&dest.user64_sb64;
5296 } else {
5297 munge_user32_stat64(&source.sb64, &dest.user32_sb64);
5298 my_size = sizeof(dest.user32_sb64);
5299 sbp = (caddr_t)&dest.user32_sb64;
5300 }
5301 /*
5302 * Check if we raced (post lookup) against the last unlink of a file.
5303 */
5304 if ((source.sb64.st_nlink == 0) && S_ISREG(source.sb64.st_mode)) {
5305 source.sb64.st_nlink = 1;
5306 }
5307 } else {
5308 source.sb.st_lspare = 0;
5309 source.sb.st_qspare[0] = 0LL;
5310 source.sb.st_qspare[1] = 0LL;
5311 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
5312 munge_user64_stat(&source.sb, &dest.user64_sb);
5313 my_size = sizeof(dest.user64_sb);
5314 sbp = (caddr_t)&dest.user64_sb;
5315 } else {
5316 munge_user32_stat(&source.sb, &dest.user32_sb);
5317 my_size = sizeof(dest.user32_sb);
5318 sbp = (caddr_t)&dest.user32_sb;
5319 }
5320
5321 /*
5322 * Check if we raced (post lookup) against the last unlink of a file.
5323 */
5324 if ((source.sb.st_nlink == 0) && S_ISREG(source.sb.st_mode)) {
5325 source.sb.st_nlink = 1;
5326 }
5327 }
5328 if ((error = copyout(sbp, ub, my_size)) != 0)
5329 goto out;
5330
5331 /* caller wants extended security information? */
5332 if (xsecurity != USER_ADDR_NULL) {
5333
5334 /* did we get any? */
5335 if (fsec == KAUTH_FILESEC_NONE) {
5336 if (susize(xsecurity_size, 0) != 0) {
5337 error = EFAULT;
5338 goto out;
5339 }
5340 } else {
5341 /* find the user buffer size */
5342 xsecurity_bufsize = fusize(xsecurity_size);
5343
5344 /* copy out the actual data size */
5345 if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
5346 error = EFAULT;
5347 goto out;
5348 }
5349
5350 /* if the caller supplied enough room, copy out to it */
5351 if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec))
5352 error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
5353 }
5354 }
5355 out:
5356 if (fsec != KAUTH_FILESEC_NONE)
5357 kauth_filesec_free(fsec);
5358 return (error);
5359 }
5360
5361 /*
5362 * stat_extended: Get file status; with extended security (ACL).
5363 *
5364 * Parameters: p (ignored)
5365 * uap User argument descriptor (see below)
5366 * retval (ignored)
5367 *
5368 * Indirect: uap->path Path of file to get status from
5369 * uap->ub User buffer (holds file status info)
5370 * uap->xsecurity ACL to get (extended security)
5371 * uap->xsecurity_size Size of ACL
5372 *
5373 * Returns: 0 Success
5374 * !0 errno value
5375 *
5376 */
5377 int
5378 stat_extended(__unused proc_t p, struct stat_extended_args *uap,
5379 __unused int32_t *retval)
5380 {
5381 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5382 uap->xsecurity, uap->xsecurity_size, 0, UIO_USERSPACE, AT_FDCWD,
5383 0));
5384 }
5385
5386 /*
5387 * Returns: 0 Success
5388 * fstatat_internal:??? [see fstatat_internal() in this file]
5389 */
5390 int
5391 stat(__unused proc_t p, struct stat_args *uap, __unused int32_t *retval)
5392 {
5393 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5394 0, 0, 0, UIO_USERSPACE, AT_FDCWD, 0));
5395 }
5396
5397 int
5398 stat64(__unused proc_t p, struct stat64_args *uap, __unused int32_t *retval)
5399 {
5400 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5401 0, 0, 1, UIO_USERSPACE, AT_FDCWD, 0));
5402 }
5403
5404 /*
5405 * stat64_extended: Get file status; can handle large inode numbers; with extended security (ACL).
5406 *
5407 * Parameters: p (ignored)
5408 * uap User argument descriptor (see below)
5409 * retval (ignored)
5410 *
5411 * Indirect: uap->path Path of file to get status from
5412 * uap->ub User buffer (holds file status info)
5413 * uap->xsecurity ACL to get (extended security)
5414 * uap->xsecurity_size Size of ACL
5415 *
5416 * Returns: 0 Success
5417 * !0 errno value
5418 *
5419 */
5420 int
5421 stat64_extended(__unused proc_t p, struct stat64_extended_args *uap, __unused int32_t *retval)
5422 {
5423 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5424 uap->xsecurity, uap->xsecurity_size, 1, UIO_USERSPACE, AT_FDCWD,
5425 0));
5426 }
5427
5428 /*
5429 * lstat_extended: Get file status; does not follow links; with extended security (ACL).
5430 *
5431 * Parameters: p (ignored)
5432 * uap User argument descriptor (see below)
5433 * retval (ignored)
5434 *
5435 * Indirect: uap->path Path of file to get status from
5436 * uap->ub User buffer (holds file status info)
5437 * uap->xsecurity ACL to get (extended security)
5438 * uap->xsecurity_size Size of ACL
5439 *
5440 * Returns: 0 Success
5441 * !0 errno value
5442 *
5443 */
5444 int
5445 lstat_extended(__unused proc_t p, struct lstat_extended_args *uap, __unused int32_t *retval)
5446 {
5447 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5448 uap->xsecurity, uap->xsecurity_size, 0, UIO_USERSPACE, AT_FDCWD,
5449 AT_SYMLINK_NOFOLLOW));
5450 }
5451
5452 /*
5453 * Get file status; this version does not follow links.
5454 */
5455 int
5456 lstat(__unused proc_t p, struct lstat_args *uap, __unused int32_t *retval)
5457 {
5458 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5459 0, 0, 0, UIO_USERSPACE, AT_FDCWD, AT_SYMLINK_NOFOLLOW));
5460 }
5461
5462 int
5463 lstat64(__unused proc_t p, struct lstat64_args *uap, __unused int32_t *retval)
5464 {
5465 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5466 0, 0, 1, UIO_USERSPACE, AT_FDCWD, AT_SYMLINK_NOFOLLOW));
5467 }
5468
5469 /*
5470 * lstat64_extended: Get file status; can handle large inode numbers; does not
5471 * follow links; with extended security (ACL).
5472 *
5473 * Parameters: p (ignored)
5474 * uap User argument descriptor (see below)
5475 * retval (ignored)
5476 *
5477 * Indirect: uap->path Path of file to get status from
5478 * uap->ub User buffer (holds file status info)
5479 * uap->xsecurity ACL to get (extended security)
5480 * uap->xsecurity_size Size of ACL
5481 *
5482 * Returns: 0 Success
5483 * !0 errno value
5484 *
5485 */
5486 int
5487 lstat64_extended(__unused proc_t p, struct lstat64_extended_args *uap, __unused int32_t *retval)
5488 {
5489 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5490 uap->xsecurity, uap->xsecurity_size, 1, UIO_USERSPACE, AT_FDCWD,
5491 AT_SYMLINK_NOFOLLOW));
5492 }
5493
5494 int
5495 fstatat(__unused proc_t p, struct fstatat_args *uap, __unused int32_t *retval)
5496 {
5497 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
5498 return (EINVAL);
5499
5500 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5501 0, 0, 0, UIO_USERSPACE, uap->fd, uap->flag));
5502 }
5503
5504 int
5505 fstatat64(__unused proc_t p, struct fstatat64_args *uap,
5506 __unused int32_t *retval)
5507 {
5508 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
5509 return (EINVAL);
5510
5511 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5512 0, 0, 1, UIO_USERSPACE, uap->fd, uap->flag));
5513 }
5514
5515 /*
5516 * Get configurable pathname variables.
5517 *
5518 * Returns: 0 Success
5519 * namei:???
5520 * vn_pathconf:???
5521 *
5522 * Notes: Global implementation constants are intended to be
5523 * implemented in this function directly; all other constants
5524 * are per-FS implementation, and therefore must be handled in
5525 * each respective FS, instead.
5526 *
5527 * XXX We implement some things globally right now that should actually be
5528 * XXX per-FS; we will need to deal with this at some point.
5529 */
5530 /* ARGSUSED */
5531 int
5532 pathconf(__unused proc_t p, struct pathconf_args *uap, int32_t *retval)
5533 {
5534 int error;
5535 struct nameidata nd;
5536 vfs_context_t ctx = vfs_context_current();
5537
5538 NDINIT(&nd, LOOKUP, OP_PATHCONF, FOLLOW | AUDITVNPATH1,
5539 UIO_USERSPACE, uap->path, ctx);
5540 error = namei(&nd);
5541 if (error)
5542 return (error);
5543
5544 error = vn_pathconf(nd.ni_vp, uap->name, retval, ctx);
5545
5546 vnode_put(nd.ni_vp);
5547 nameidone(&nd);
5548 return (error);
5549 }
5550
5551 /*
5552 * Return target name of a symbolic link.
5553 */
5554 /* ARGSUSED */
5555 static int
5556 readlinkat_internal(vfs_context_t ctx, int fd, user_addr_t path,
5557 enum uio_seg seg, user_addr_t buf, size_t bufsize, enum uio_seg bufseg,
5558 int *retval)
5559 {
5560 vnode_t vp;
5561 uio_t auio;
5562 int error;
5563 struct nameidata nd;
5564 char uio_buf[ UIO_SIZEOF(1) ];
5565
5566 NDINIT(&nd, LOOKUP, OP_READLINK, NOFOLLOW | AUDITVNPATH1,
5567 seg, path, ctx);
5568
5569 error = nameiat(&nd, fd);
5570 if (error)
5571 return (error);
5572 vp = nd.ni_vp;
5573
5574 nameidone(&nd);
5575
5576 auio = uio_createwithbuffer(1, 0, bufseg, UIO_READ,
5577 &uio_buf[0], sizeof(uio_buf));
5578 uio_addiov(auio, buf, bufsize);
5579 if (vp->v_type != VLNK) {
5580 error = EINVAL;
5581 } else {
5582 #if CONFIG_MACF
5583 error = mac_vnode_check_readlink(ctx, vp);
5584 #endif
5585 if (error == 0)
5586 error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA,
5587 ctx);
5588 if (error == 0)
5589 error = VNOP_READLINK(vp, auio, ctx);
5590 }
5591 vnode_put(vp);
5592
5593 *retval = bufsize - (int)uio_resid(auio);
5594 return (error);
5595 }
5596
5597 int
5598 readlink(proc_t p, struct readlink_args *uap, int32_t *retval)
5599 {
5600 enum uio_seg procseg;
5601
5602 procseg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
5603 return (readlinkat_internal(vfs_context_current(), AT_FDCWD,
5604 CAST_USER_ADDR_T(uap->path), procseg, CAST_USER_ADDR_T(uap->buf),
5605 uap->count, procseg, retval));
5606 }
5607
5608 int
5609 readlinkat(proc_t p, struct readlinkat_args *uap, int32_t *retval)
5610 {
5611 enum uio_seg procseg;
5612
5613 procseg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
5614 return (readlinkat_internal(vfs_context_current(), uap->fd, uap->path,
5615 procseg, uap->buf, uap->bufsize, procseg, retval));
5616 }
5617
5618 /*
5619 * Change file flags.
5620 */
5621 static int
5622 chflags1(vnode_t vp, int flags, vfs_context_t ctx)
5623 {
5624 struct vnode_attr va;
5625 kauth_action_t action;
5626 int error;
5627
5628 VATTR_INIT(&va);
5629 VATTR_SET(&va, va_flags, flags);
5630
5631 #if CONFIG_MACF
5632 error = mac_vnode_check_setflags(ctx, vp, flags);
5633 if (error)
5634 goto out;
5635 #endif
5636
5637 /* request authorisation, disregard immutability */
5638 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
5639 goto out;
5640 /*
5641 * Request that the auth layer disregard those file flags it's allowed to when
5642 * authorizing this operation; we need to do this in order to be able to
5643 * clear immutable flags.
5644 */
5645 if (action && ((error = vnode_authorize(vp, NULL, action | KAUTH_VNODE_NOIMMUTABLE, ctx)) != 0))
5646 goto out;
5647 error = vnode_setattr(vp, &va, ctx);
5648
5649 if ((error == 0) && !VATTR_IS_SUPPORTED(&va, va_flags)) {
5650 error = ENOTSUP;
5651 }
5652 out:
5653 vnode_put(vp);
5654 return(error);
5655 }
5656
5657 /*
5658 * Change flags of a file given a path name.
5659 */
5660 /* ARGSUSED */
5661 int
5662 chflags(__unused proc_t p, struct chflags_args *uap, __unused int32_t *retval)
5663 {
5664 vnode_t vp;
5665 vfs_context_t ctx = vfs_context_current();
5666 int error;
5667 struct nameidata nd;
5668
5669 AUDIT_ARG(fflags, uap->flags);
5670 NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
5671 UIO_USERSPACE, uap->path, ctx);
5672 error = namei(&nd);
5673 if (error)
5674 return (error);
5675 vp = nd.ni_vp;
5676 nameidone(&nd);
5677
5678 error = chflags1(vp, uap->flags, ctx);
5679
5680 return(error);
5681 }
5682
5683 /*
5684 * Change flags of a file given a file descriptor.
5685 */
5686 /* ARGSUSED */
5687 int
5688 fchflags(__unused proc_t p, struct fchflags_args *uap, __unused int32_t *retval)
5689 {
5690 vnode_t vp;
5691 int error;
5692
5693 AUDIT_ARG(fd, uap->fd);
5694 AUDIT_ARG(fflags, uap->flags);
5695 if ( (error = file_vnode(uap->fd, &vp)) )
5696 return (error);
5697
5698 if ((error = vnode_getwithref(vp))) {
5699 file_drop(uap->fd);
5700 return(error);
5701 }
5702
5703 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5704
5705 error = chflags1(vp, uap->flags, vfs_context_current());
5706
5707 file_drop(uap->fd);
5708 return (error);
5709 }
5710
5711 /*
5712 * Change security information on a filesystem object.
5713 *
5714 * Returns: 0 Success
5715 * EPERM Operation not permitted
5716 * vnode_authattr:??? [anything vnode_authattr can return]
5717 * vnode_authorize:??? [anything vnode_authorize can return]
5718 * vnode_setattr:??? [anything vnode_setattr can return]
5719 *
5720 * Notes: If vnode_authattr or vnode_authorize return EACCES, it will be
5721 * translated to EPERM before being returned.
5722 */
5723 static int
5724 chmod_vnode(vfs_context_t ctx, vnode_t vp, struct vnode_attr *vap)
5725 {
5726 kauth_action_t action;
5727 int error;
5728
5729 AUDIT_ARG(mode, vap->va_mode);
5730 /* XXX audit new args */
5731
5732 #if NAMEDSTREAMS
5733 /* chmod calls are not allowed for resource forks. */
5734 if (vp->v_flag & VISNAMEDSTREAM) {
5735 return (EPERM);
5736 }
5737 #endif
5738
5739 #if CONFIG_MACF
5740 if (VATTR_IS_ACTIVE(vap, va_mode) &&
5741 (error = mac_vnode_check_setmode(ctx, vp, (mode_t)vap->va_mode)) != 0)
5742 return (error);
5743 #endif
5744
5745 /* make sure that the caller is allowed to set this security information */
5746 if (((error = vnode_authattr(vp, vap, &action, ctx)) != 0) ||
5747 ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
5748 if (error == EACCES)
5749 error = EPERM;
5750 return(error);
5751 }
5752
5753 error = vnode_setattr(vp, vap, ctx);
5754
5755 return (error);
5756 }
5757
5758
5759 /*
5760 * Change mode of a file given a path name.
5761 *
5762 * Returns: 0 Success
5763 * namei:??? [anything namei can return]
5764 * chmod_vnode:??? [anything chmod_vnode can return]
5765 */
5766 static int
5767 chmodat(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap,
5768 int fd, int flag, enum uio_seg segflg)
5769 {
5770 struct nameidata nd;
5771 int follow, error;
5772
5773 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
5774 NDINIT(&nd, LOOKUP, OP_SETATTR, follow | AUDITVNPATH1,
5775 segflg, path, ctx);
5776 if ((error = nameiat(&nd, fd)))
5777 return (error);
5778 error = chmod_vnode(ctx, nd.ni_vp, vap);
5779 vnode_put(nd.ni_vp);
5780 nameidone(&nd);
5781 return(error);
5782 }
5783
5784 /*
5785 * chmod_extended: Change the mode of a file given a path name; with extended
5786 * argument list (including extended security (ACL)).
5787 *
5788 * Parameters: p Process requesting the open
5789 * uap User argument descriptor (see below)
5790 * retval (ignored)
5791 *
5792 * Indirect: uap->path Path to object (same as 'chmod')
5793 * uap->uid UID to set
5794 * uap->gid GID to set
5795 * uap->mode File mode to set (same as 'chmod')
5796 * uap->xsecurity ACL to set (or delete)
5797 *
5798 * Returns: 0 Success
5799 * !0 errno value
5800 *
5801 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
5802 *
5803 * XXX: We should enummerate the possible errno values here, and where
5804 * in the code they originated.
5805 */
5806 int
5807 chmod_extended(__unused proc_t p, struct chmod_extended_args *uap, __unused int32_t *retval)
5808 {
5809 int error;
5810 struct vnode_attr va;
5811 kauth_filesec_t xsecdst;
5812
5813 AUDIT_ARG(owner, uap->uid, uap->gid);
5814
5815 VATTR_INIT(&va);
5816 if (uap->mode != -1)
5817 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
5818 if (uap->uid != KAUTH_UID_NONE)
5819 VATTR_SET(&va, va_uid, uap->uid);
5820 if (uap->gid != KAUTH_GID_NONE)
5821 VATTR_SET(&va, va_gid, uap->gid);
5822
5823 xsecdst = NULL;
5824 switch(uap->xsecurity) {
5825 /* explicit remove request */
5826 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
5827 VATTR_SET(&va, va_acl, NULL);
5828 break;
5829 /* not being set */
5830 case USER_ADDR_NULL:
5831 break;
5832 default:
5833 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
5834 return(error);
5835 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
5836 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va.va_acl->acl_entrycount);
5837 }
5838
5839 error = chmodat(vfs_context_current(), uap->path, &va, AT_FDCWD, 0,
5840 UIO_USERSPACE);
5841
5842 if (xsecdst != NULL)
5843 kauth_filesec_free(xsecdst);
5844 return(error);
5845 }
5846
5847 /*
5848 * Returns: 0 Success
5849 * chmodat:??? [anything chmodat can return]
5850 */
5851 static int
5852 fchmodat_internal(vfs_context_t ctx, user_addr_t path, int mode, int fd,
5853 int flag, enum uio_seg segflg)
5854 {
5855 struct vnode_attr va;
5856
5857 VATTR_INIT(&va);
5858 VATTR_SET(&va, va_mode, mode & ALLPERMS);
5859
5860 return (chmodat(ctx, path, &va, fd, flag, segflg));
5861 }
5862
5863 int
5864 chmod(__unused proc_t p, struct chmod_args *uap, __unused int32_t *retval)
5865 {
5866 return (fchmodat_internal(vfs_context_current(), uap->path, uap->mode,
5867 AT_FDCWD, 0, UIO_USERSPACE));
5868 }
5869
5870 int
5871 fchmodat(__unused proc_t p, struct fchmodat_args *uap, __unused int32_t *retval)
5872 {
5873 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
5874 return (EINVAL);
5875
5876 return (fchmodat_internal(vfs_context_current(), uap->path, uap->mode,
5877 uap->fd, uap->flag, UIO_USERSPACE));
5878 }
5879
5880 /*
5881 * Change mode of a file given a file descriptor.
5882 */
5883 static int
5884 fchmod1(__unused proc_t p, int fd, struct vnode_attr *vap)
5885 {
5886 vnode_t vp;
5887 int error;
5888
5889 AUDIT_ARG(fd, fd);
5890
5891 if ((error = file_vnode(fd, &vp)) != 0)
5892 return (error);
5893 if ((error = vnode_getwithref(vp)) != 0) {
5894 file_drop(fd);
5895 return(error);
5896 }
5897 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5898
5899 error = chmod_vnode(vfs_context_current(), vp, vap);
5900 (void)vnode_put(vp);
5901 file_drop(fd);
5902
5903 return (error);
5904 }
5905
5906 /*
5907 * fchmod_extended: Change mode of a file given a file descriptor; with
5908 * extended argument list (including extended security (ACL)).
5909 *
5910 * Parameters: p Process requesting to change file mode
5911 * uap User argument descriptor (see below)
5912 * retval (ignored)
5913 *
5914 * Indirect: uap->mode File mode to set (same as 'chmod')
5915 * uap->uid UID to set
5916 * uap->gid GID to set
5917 * uap->xsecurity ACL to set (or delete)
5918 * uap->fd File descriptor of file to change mode
5919 *
5920 * Returns: 0 Success
5921 * !0 errno value
5922 *
5923 */
5924 int
5925 fchmod_extended(proc_t p, struct fchmod_extended_args *uap, __unused int32_t *retval)
5926 {
5927 int error;
5928 struct vnode_attr va;
5929 kauth_filesec_t xsecdst;
5930
5931 AUDIT_ARG(owner, uap->uid, uap->gid);
5932
5933 VATTR_INIT(&va);
5934 if (uap->mode != -1)
5935 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
5936 if (uap->uid != KAUTH_UID_NONE)
5937 VATTR_SET(&va, va_uid, uap->uid);
5938 if (uap->gid != KAUTH_GID_NONE)
5939 VATTR_SET(&va, va_gid, uap->gid);
5940
5941 xsecdst = NULL;
5942 switch(uap->xsecurity) {
5943 case USER_ADDR_NULL:
5944 VATTR_SET(&va, va_acl, NULL);
5945 break;
5946 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
5947 VATTR_SET(&va, va_acl, NULL);
5948 break;
5949 /* not being set */
5950 case CAST_USER_ADDR_T(-1):
5951 break;
5952 default:
5953 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
5954 return(error);
5955 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
5956 }
5957
5958 error = fchmod1(p, uap->fd, &va);
5959
5960
5961 switch(uap->xsecurity) {
5962 case USER_ADDR_NULL:
5963 case CAST_USER_ADDR_T(-1):
5964 break;
5965 default:
5966 if (xsecdst != NULL)
5967 kauth_filesec_free(xsecdst);
5968 }
5969 return(error);
5970 }
5971
5972 int
5973 fchmod(proc_t p, struct fchmod_args *uap, __unused int32_t *retval)
5974 {
5975 struct vnode_attr va;
5976
5977 VATTR_INIT(&va);
5978 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
5979
5980 return(fchmod1(p, uap->fd, &va));
5981 }
5982
5983
5984 /*
5985 * Set ownership given a path name.
5986 */
5987 /* ARGSUSED */
5988 static int
5989 fchownat_internal(vfs_context_t ctx, int fd, user_addr_t path, uid_t uid,
5990 gid_t gid, int flag, enum uio_seg segflg)
5991 {
5992 vnode_t vp;
5993 struct vnode_attr va;
5994 int error;
5995 struct nameidata nd;
5996 int follow;
5997 kauth_action_t action;
5998
5999 AUDIT_ARG(owner, uid, gid);
6000
6001 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
6002 NDINIT(&nd, LOOKUP, OP_SETATTR, follow | AUDITVNPATH1, segflg,
6003 path, ctx);
6004 error = nameiat(&nd, fd);
6005 if (error)
6006 return (error);
6007 vp = nd.ni_vp;
6008
6009 nameidone(&nd);
6010
6011 VATTR_INIT(&va);
6012 if (uid != (uid_t)VNOVAL)
6013 VATTR_SET(&va, va_uid, uid);
6014 if (gid != (gid_t)VNOVAL)
6015 VATTR_SET(&va, va_gid, gid);
6016
6017 #if CONFIG_MACF
6018 error = mac_vnode_check_setowner(ctx, vp, uid, gid);
6019 if (error)
6020 goto out;
6021 #endif
6022
6023 /* preflight and authorize attribute changes */
6024 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
6025 goto out;
6026 if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
6027 goto out;
6028 error = vnode_setattr(vp, &va, ctx);
6029
6030 out:
6031 /*
6032 * EACCES is only allowed from namei(); permissions failure should
6033 * return EPERM, so we need to translate the error code.
6034 */
6035 if (error == EACCES)
6036 error = EPERM;
6037
6038 vnode_put(vp);
6039 return (error);
6040 }
6041
6042 int
6043 chown(__unused proc_t p, struct chown_args *uap, __unused int32_t *retval)
6044 {
6045 return (fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path,
6046 uap->uid, uap->gid, 0, UIO_USERSPACE));
6047 }
6048
6049 int
6050 lchown(__unused proc_t p, struct lchown_args *uap, __unused int32_t *retval)
6051 {
6052 return (fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path,
6053 uap->owner, uap->group, AT_SYMLINK_NOFOLLOW, UIO_USERSPACE));
6054 }
6055
6056 int
6057 fchownat(__unused proc_t p, struct fchownat_args *uap, __unused int32_t *retval)
6058 {
6059 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
6060 return (EINVAL);
6061
6062 return (fchownat_internal(vfs_context_current(), uap->fd, uap->path,
6063 uap->uid, uap->gid, uap->flag, UIO_USERSPACE));
6064 }
6065
6066 /*
6067 * Set ownership given a file descriptor.
6068 */
6069 /* ARGSUSED */
6070 int
6071 fchown(__unused proc_t p, struct fchown_args *uap, __unused int32_t *retval)
6072 {
6073 struct vnode_attr va;
6074 vfs_context_t ctx = vfs_context_current();
6075 vnode_t vp;
6076 int error;
6077 kauth_action_t action;
6078
6079 AUDIT_ARG(owner, uap->uid, uap->gid);
6080 AUDIT_ARG(fd, uap->fd);
6081
6082 if ( (error = file_vnode(uap->fd, &vp)) )
6083 return (error);
6084
6085 if ( (error = vnode_getwithref(vp)) ) {
6086 file_drop(uap->fd);
6087 return(error);
6088 }
6089 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6090
6091 VATTR_INIT(&va);
6092 if (uap->uid != VNOVAL)
6093 VATTR_SET(&va, va_uid, uap->uid);
6094 if (uap->gid != VNOVAL)
6095 VATTR_SET(&va, va_gid, uap->gid);
6096
6097 #if NAMEDSTREAMS
6098 /* chown calls are not allowed for resource forks. */
6099 if (vp->v_flag & VISNAMEDSTREAM) {
6100 error = EPERM;
6101 goto out;
6102 }
6103 #endif
6104
6105 #if CONFIG_MACF
6106 error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
6107 if (error)
6108 goto out;
6109 #endif
6110
6111 /* preflight and authorize attribute changes */
6112 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
6113 goto out;
6114 if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
6115 if (error == EACCES)
6116 error = EPERM;
6117 goto out;
6118 }
6119 error = vnode_setattr(vp, &va, ctx);
6120
6121 out:
6122 (void)vnode_put(vp);
6123 file_drop(uap->fd);
6124 return (error);
6125 }
6126
6127 static int
6128 getutimes(user_addr_t usrtvp, struct timespec *tsp)
6129 {
6130 int error;
6131
6132 if (usrtvp == USER_ADDR_NULL) {
6133 struct timeval old_tv;
6134 /* XXX Y2038 bug because of microtime argument */
6135 microtime(&old_tv);
6136 TIMEVAL_TO_TIMESPEC(&old_tv, &tsp[0]);
6137 tsp[1] = tsp[0];
6138 } else {
6139 if (IS_64BIT_PROCESS(current_proc())) {
6140 struct user64_timeval tv[2];
6141 error = copyin(usrtvp, (void *)tv, sizeof(tv));
6142 if (error)
6143 return (error);
6144 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
6145 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
6146 } else {
6147 struct user32_timeval tv[2];
6148 error = copyin(usrtvp, (void *)tv, sizeof(tv));
6149 if (error)
6150 return (error);
6151 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
6152 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
6153 }
6154 }
6155 return 0;
6156 }
6157
6158 static int
6159 setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts,
6160 int nullflag)
6161 {
6162 int error;
6163 struct vnode_attr va;
6164 kauth_action_t action;
6165
6166 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6167
6168 VATTR_INIT(&va);
6169 VATTR_SET(&va, va_access_time, ts[0]);
6170 VATTR_SET(&va, va_modify_time, ts[1]);
6171 if (nullflag)
6172 va.va_vaflags |= VA_UTIMES_NULL;
6173
6174 #if NAMEDSTREAMS
6175 /* utimes calls are not allowed for resource forks. */
6176 if (vp->v_flag & VISNAMEDSTREAM) {
6177 error = EPERM;
6178 goto out;
6179 }
6180 #endif
6181
6182 #if CONFIG_MACF
6183 error = mac_vnode_check_setutimes(ctx, vp, ts[0], ts[1]);
6184 if (error)
6185 goto out;
6186 #endif
6187 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
6188 if (!nullflag && error == EACCES)
6189 error = EPERM;
6190 goto out;
6191 }
6192
6193 /* since we may not need to auth anything, check here */
6194 if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
6195 if (!nullflag && error == EACCES)
6196 error = EPERM;
6197 goto out;
6198 }
6199 error = vnode_setattr(vp, &va, ctx);
6200
6201 out:
6202 return error;
6203 }
6204
6205 /*
6206 * Set the access and modification times of a file.
6207 */
6208 /* ARGSUSED */
6209 int
6210 utimes(__unused proc_t p, struct utimes_args *uap, __unused int32_t *retval)
6211 {
6212 struct timespec ts[2];
6213 user_addr_t usrtvp;
6214 int error;
6215 struct nameidata nd;
6216 vfs_context_t ctx = vfs_context_current();
6217
6218 /*
6219 * AUDIT: Needed to change the order of operations to do the
6220 * name lookup first because auditing wants the path.
6221 */
6222 NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
6223 UIO_USERSPACE, uap->path, ctx);
6224 error = namei(&nd);
6225 if (error)
6226 return (error);
6227 nameidone(&nd);
6228
6229 /*
6230 * Fetch the user-supplied time. If usrtvp is USER_ADDR_NULL, we fetch
6231 * the current time instead.
6232 */
6233 usrtvp = uap->tptr;
6234 if ((error = getutimes(usrtvp, ts)) != 0)
6235 goto out;
6236
6237 error = setutimes(ctx, nd.ni_vp, ts, usrtvp == USER_ADDR_NULL);
6238
6239 out:
6240 vnode_put(nd.ni_vp);
6241 return (error);
6242 }
6243
6244 /*
6245 * Set the access and modification times of a file.
6246 */
6247 /* ARGSUSED */
6248 int
6249 futimes(__unused proc_t p, struct futimes_args *uap, __unused int32_t *retval)
6250 {
6251 struct timespec ts[2];
6252 vnode_t vp;
6253 user_addr_t usrtvp;
6254 int error;
6255
6256 AUDIT_ARG(fd, uap->fd);
6257 usrtvp = uap->tptr;
6258 if ((error = getutimes(usrtvp, ts)) != 0)
6259 return (error);
6260 if ((error = file_vnode(uap->fd, &vp)) != 0)
6261 return (error);
6262 if((error = vnode_getwithref(vp))) {
6263 file_drop(uap->fd);
6264 return(error);
6265 }
6266
6267 error = setutimes(vfs_context_current(), vp, ts, usrtvp == 0);
6268 vnode_put(vp);
6269 file_drop(uap->fd);
6270 return(error);
6271 }
6272
6273 /*
6274 * Truncate a file given its path name.
6275 */
6276 /* ARGSUSED */
6277 int
6278 truncate(__unused proc_t p, struct truncate_args *uap, __unused int32_t *retval)
6279 {
6280 vnode_t vp;
6281 struct vnode_attr va;
6282 vfs_context_t ctx = vfs_context_current();
6283 int error;
6284 struct nameidata nd;
6285 kauth_action_t action;
6286
6287 if (uap->length < 0)
6288 return(EINVAL);
6289 NDINIT(&nd, LOOKUP, OP_TRUNCATE, FOLLOW | AUDITVNPATH1,
6290 UIO_USERSPACE, uap->path, ctx);
6291 if ((error = namei(&nd)))
6292 return (error);
6293 vp = nd.ni_vp;
6294
6295 nameidone(&nd);
6296
6297 VATTR_INIT(&va);
6298 VATTR_SET(&va, va_data_size, uap->length);
6299
6300 #if CONFIG_MACF
6301 error = mac_vnode_check_truncate(ctx, NOCRED, vp);
6302 if (error)
6303 goto out;
6304 #endif
6305
6306 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
6307 goto out;
6308 if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
6309 goto out;
6310 error = vnode_setattr(vp, &va, ctx);
6311 out:
6312 vnode_put(vp);
6313 return (error);
6314 }
6315
6316 /*
6317 * Truncate a file given a file descriptor.
6318 */
6319 /* ARGSUSED */
6320 int
6321 ftruncate(proc_t p, struct ftruncate_args *uap, int32_t *retval)
6322 {
6323 vfs_context_t ctx = vfs_context_current();
6324 struct vnode_attr va;
6325 vnode_t vp;
6326 struct fileproc *fp;
6327 int error ;
6328 int fd = uap->fd;
6329
6330 AUDIT_ARG(fd, uap->fd);
6331 if (uap->length < 0)
6332 return(EINVAL);
6333
6334 if ( (error = fp_lookup(p,fd,&fp,0)) ) {
6335 return(error);
6336 }
6337
6338 switch (FILEGLOB_DTYPE(fp->f_fglob)) {
6339 case DTYPE_PSXSHM:
6340 error = pshm_truncate(p, fp, uap->fd, uap->length, retval);
6341 goto out;
6342 case DTYPE_VNODE:
6343 break;
6344 default:
6345 error = EINVAL;
6346 goto out;
6347 }
6348
6349 vp = (vnode_t)fp->f_fglob->fg_data;
6350
6351 if ((fp->f_fglob->fg_flag & FWRITE) == 0) {
6352 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
6353 error = EINVAL;
6354 goto out;
6355 }
6356
6357 if ((error = vnode_getwithref(vp)) != 0) {
6358 goto out;
6359 }
6360
6361 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6362
6363 #if CONFIG_MACF
6364 error = mac_vnode_check_truncate(ctx,
6365 fp->f_fglob->fg_cred, vp);
6366 if (error) {
6367 (void)vnode_put(vp);
6368 goto out;
6369 }
6370 #endif
6371 VATTR_INIT(&va);
6372 VATTR_SET(&va, va_data_size, uap->length);
6373 error = vnode_setattr(vp, &va, ctx);
6374 (void)vnode_put(vp);
6375 out:
6376 file_drop(fd);
6377 return (error);
6378 }
6379
6380
6381 /*
6382 * Sync an open file with synchronized I/O _file_ integrity completion
6383 */
6384 /* ARGSUSED */
6385 int
6386 fsync(proc_t p, struct fsync_args *uap, __unused int32_t *retval)
6387 {
6388 __pthread_testcancel(1);
6389 return(fsync_common(p, uap, MNT_WAIT));
6390 }
6391
6392
6393 /*
6394 * Sync an open file with synchronized I/O _file_ integrity completion
6395 *
6396 * Notes: This is a legacy support function that does not test for
6397 * thread cancellation points.
6398 */
6399 /* ARGSUSED */
6400 int
6401 fsync_nocancel(proc_t p, struct fsync_nocancel_args *uap, __unused int32_t *retval)
6402 {
6403 return(fsync_common(p, (struct fsync_args *)uap, MNT_WAIT));
6404 }
6405
6406
6407 /*
6408 * Sync an open file with synchronized I/O _data_ integrity completion
6409 */
6410 /* ARGSUSED */
6411 int
6412 fdatasync(proc_t p, struct fdatasync_args *uap, __unused int32_t *retval)
6413 {
6414 __pthread_testcancel(1);
6415 return(fsync_common(p, (struct fsync_args *)uap, MNT_DWAIT));
6416 }
6417
6418
6419 /*
6420 * fsync_common
6421 *
6422 * Common fsync code to support both synchronized I/O file integrity completion
6423 * (normal fsync) and synchronized I/O data integrity completion (fdatasync).
6424 *
6425 * If 'flags' is MNT_DWAIT, the caller is requesting data integrity, which
6426 * will only guarantee that the file data contents are retrievable. If
6427 * 'flags' is MNT_WAIT, the caller is rewuesting file integrity, which also
6428 * includes additional metadata unnecessary for retrieving the file data
6429 * contents, such as atime, mtime, ctime, etc., also be committed to stable
6430 * storage.
6431 *
6432 * Parameters: p The process
6433 * uap->fd The descriptor to synchronize
6434 * flags The data integrity flags
6435 *
6436 * Returns: int Success
6437 * fp_getfvp:EBADF Bad file descriptor
6438 * fp_getfvp:ENOTSUP fd does not refer to a vnode
6439 * VNOP_FSYNC:??? unspecified
6440 *
6441 * Notes: We use struct fsync_args because it is a short name, and all
6442 * caller argument structures are otherwise identical.
6443 */
6444 static int
6445 fsync_common(proc_t p, struct fsync_args *uap, int flags)
6446 {
6447 vnode_t vp;
6448 struct fileproc *fp;
6449 vfs_context_t ctx = vfs_context_current();
6450 int error;
6451
6452 AUDIT_ARG(fd, uap->fd);
6453
6454 if ( (error = fp_getfvp(p, uap->fd, &fp, &vp)) )
6455 return (error);
6456 if ( (error = vnode_getwithref(vp)) ) {
6457 file_drop(uap->fd);
6458 return(error);
6459 }
6460
6461 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6462
6463 error = VNOP_FSYNC(vp, flags, ctx);
6464
6465 #if NAMEDRSRCFORK
6466 /* Sync resource fork shadow file if necessary. */
6467 if ((error == 0) &&
6468 (vp->v_flag & VISNAMEDSTREAM) &&
6469 (vp->v_parent != NULLVP) &&
6470 vnode_isshadow(vp) &&
6471 (fp->f_flags & FP_WRITTEN)) {
6472 (void) vnode_flushnamedstream(vp->v_parent, vp, ctx);
6473 }
6474 #endif
6475
6476 (void)vnode_put(vp);
6477 file_drop(uap->fd);
6478 return (error);
6479 }
6480
6481 /*
6482 * Duplicate files. Source must be a file, target must be a file or
6483 * must not exist.
6484 *
6485 * XXX Copyfile authorisation checking is woefully inadequate, and will not
6486 * perform inheritance correctly.
6487 */
6488 /* ARGSUSED */
6489 int
6490 copyfile(__unused proc_t p, struct copyfile_args *uap, __unused int32_t *retval)
6491 {
6492 vnode_t tvp, fvp, tdvp, sdvp;
6493 struct nameidata fromnd, tond;
6494 int error;
6495 vfs_context_t ctx = vfs_context_current();
6496
6497 /* Check that the flags are valid. */
6498
6499 if (uap->flags & ~CPF_MASK) {
6500 return(EINVAL);
6501 }
6502
6503 NDINIT(&fromnd, LOOKUP, OP_COPYFILE, SAVESTART | AUDITVNPATH1,
6504 UIO_USERSPACE, uap->from, ctx);
6505 if ((error = namei(&fromnd)))
6506 return (error);
6507 fvp = fromnd.ni_vp;
6508
6509 NDINIT(&tond, CREATE, OP_LINK,
6510 LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNPATH2 | CN_NBMOUNTLOOK,
6511 UIO_USERSPACE, uap->to, ctx);
6512 if ((error = namei(&tond))) {
6513 goto out1;
6514 }
6515 tdvp = tond.ni_dvp;
6516 tvp = tond.ni_vp;
6517
6518 if (tvp != NULL) {
6519 if (!(uap->flags & CPF_OVERWRITE)) {
6520 error = EEXIST;
6521 goto out;
6522 }
6523 }
6524 if (fvp->v_type == VDIR || (tvp && tvp->v_type == VDIR)) {
6525 error = EISDIR;
6526 goto out;
6527 }
6528
6529 if ((error = vnode_authorize(tdvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
6530 goto out;
6531
6532 if (fvp == tdvp)
6533 error = EINVAL;
6534 /*
6535 * If source is the same as the destination (that is the
6536 * same inode number) then there is nothing to do.
6537 * (fixed to have POSIX semantics - CSM 3/2/98)
6538 */
6539 if (fvp == tvp)
6540 error = -1;
6541 if (!error)
6542 error = VNOP_COPYFILE(fvp, tdvp, tvp, &tond.ni_cnd, uap->mode, uap->flags, ctx);
6543 out:
6544 sdvp = tond.ni_startdir;
6545 /*
6546 * nameidone has to happen before we vnode_put(tdvp)
6547 * since it may need to release the fs_nodelock on the tdvp
6548 */
6549 nameidone(&tond);
6550
6551 if (tvp)
6552 vnode_put(tvp);
6553 vnode_put(tdvp);
6554 vnode_put(sdvp);
6555 out1:
6556 vnode_put(fvp);
6557
6558 if (fromnd.ni_startdir)
6559 vnode_put(fromnd.ni_startdir);
6560 nameidone(&fromnd);
6561
6562 if (error == -1)
6563 return (0);
6564 return (error);
6565 }
6566
6567
6568 /*
6569 * Rename files. Source and destination must either both be directories,
6570 * or both not be directories. If target is a directory, it must be empty.
6571 */
6572 /* ARGSUSED */
6573 static int
6574 renameat_internal(vfs_context_t ctx, int fromfd, user_addr_t from,
6575 int tofd, user_addr_t to, int segflg, vfs_rename_flags_t flags)
6576 {
6577 vnode_t tvp, tdvp;
6578 vnode_t fvp, fdvp;
6579 struct nameidata *fromnd, *tond;
6580 int error;
6581 int do_retry;
6582 int retry_count;
6583 int mntrename;
6584 int need_event;
6585 const char *oname = NULL;
6586 char *from_name = NULL, *to_name = NULL;
6587 int from_len=0, to_len=0;
6588 int holding_mntlock;
6589 mount_t locked_mp = NULL;
6590 vnode_t oparent = NULLVP;
6591 #if CONFIG_FSE
6592 fse_info from_finfo, to_finfo;
6593 #endif
6594 int from_truncated=0, to_truncated;
6595 int batched = 0;
6596 struct vnode_attr *fvap, *tvap;
6597 int continuing = 0;
6598 /* carving out a chunk for structs that are too big to be on stack. */
6599 struct {
6600 struct nameidata from_node, to_node;
6601 struct vnode_attr fv_attr, tv_attr;
6602 } * __rename_data;
6603 MALLOC(__rename_data, void *, sizeof(*__rename_data), M_TEMP, M_WAITOK);
6604 fromnd = &__rename_data->from_node;
6605 tond = &__rename_data->to_node;
6606
6607 holding_mntlock = 0;
6608 do_retry = 0;
6609 retry_count = 0;
6610 retry:
6611 fvp = tvp = NULL;
6612 fdvp = tdvp = NULL;
6613 fvap = tvap = NULL;
6614 mntrename = FALSE;
6615
6616 NDINIT(fromnd, DELETE, OP_UNLINK, WANTPARENT | AUDITVNPATH1,
6617 segflg, from, ctx);
6618 fromnd->ni_flag = NAMEI_COMPOUNDRENAME;
6619
6620 NDINIT(tond, RENAME, OP_RENAME, WANTPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK,
6621 segflg, to, ctx);
6622 tond->ni_flag = NAMEI_COMPOUNDRENAME;
6623
6624 continue_lookup:
6625 if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
6626 if ( (error = nameiat(fromnd, fromfd)) )
6627 goto out1;
6628 fdvp = fromnd->ni_dvp;
6629 fvp = fromnd->ni_vp;
6630
6631 if (fvp && fvp->v_type == VDIR)
6632 tond->ni_cnd.cn_flags |= WILLBEDIR;
6633 }
6634
6635 if ((tond->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
6636 if ( (error = nameiat(tond, tofd)) ) {
6637 /*
6638 * Translate error code for rename("dir1", "dir2/.").
6639 */
6640 if (error == EISDIR && fvp->v_type == VDIR)
6641 error = EINVAL;
6642 goto out1;
6643 }
6644 tdvp = tond->ni_dvp;
6645 tvp = tond->ni_vp;
6646 }
6647
6648 batched = vnode_compound_rename_available(fdvp);
6649 if (!fvp) {
6650 /*
6651 * Claim: this check will never reject a valid rename.
6652 * For success, either fvp must be on the same mount as tdvp, or fvp must sit atop a vnode on the same mount as tdvp.
6653 * Suppose fdvp and tdvp are not on the same mount.
6654 * If fvp is on the same mount as tdvp, then fvp is not on the same mount as fdvp, so fvp is the root of its filesystem. If fvp is the root,
6655 * then you can't move it to within another dir on the same mountpoint.
6656 * If fvp sits atop a vnode on the same mount as fdvp, then that vnode must be part of the same mount as fdvp, which is a contradiction.
6657 *
6658 * If this check passes, then we are safe to pass these vnodes to the same FS.
6659 */
6660 if (fdvp->v_mount != tdvp->v_mount) {
6661 error = EXDEV;
6662 goto out1;
6663 }
6664 goto skipped_lookup;
6665 }
6666
6667 if (!batched) {
6668 error = vn_authorize_rename(fdvp, fvp, &fromnd->ni_cnd, tdvp, tvp, &tond->ni_cnd, ctx, NULL);
6669 if (error) {
6670 if (error == ENOENT) {
6671 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
6672 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
6673 /*
6674 * We encountered a race where after doing the namei, tvp stops
6675 * being valid. If so, simply re-drive the rename call from the
6676 * top.
6677 */
6678 do_retry = 1;
6679 retry_count += 1;
6680 }
6681 }
6682 goto out1;
6683 }
6684 }
6685
6686 /*
6687 * If the source and destination are the same (i.e. they're
6688 * links to the same vnode) and the target file system is
6689 * case sensitive, then there is nothing to do.
6690 *
6691 * XXX Come back to this.
6692 */
6693 if (fvp == tvp) {
6694 int pathconf_val;
6695
6696 /*
6697 * Note: if _PC_CASE_SENSITIVE selector isn't supported,
6698 * then assume that this file system is case sensitive.
6699 */
6700 if (VNOP_PATHCONF(fvp, _PC_CASE_SENSITIVE, &pathconf_val, ctx) != 0 ||
6701 pathconf_val != 0) {
6702 goto out1;
6703 }
6704 }
6705
6706 /*
6707 * Allow the renaming of mount points.
6708 * - target must not exist
6709 * - target must reside in the same directory as source
6710 * - union mounts cannot be renamed
6711 * - "/" cannot be renamed
6712 *
6713 * XXX Handle this in VFS after a continued lookup (if we missed
6714 * in the cache to start off)
6715 */
6716 if ((fvp->v_flag & VROOT) &&
6717 (fvp->v_type == VDIR) &&
6718 (tvp == NULL) &&
6719 (fvp->v_mountedhere == NULL) &&
6720 (fdvp == tdvp) &&
6721 ((fvp->v_mount->mnt_flag & (MNT_UNION | MNT_ROOTFS)) == 0) &&
6722 (fvp->v_mount->mnt_vnodecovered != NULLVP)) {
6723 vnode_t coveredvp;
6724
6725 /* switch fvp to the covered vnode */
6726 coveredvp = fvp->v_mount->mnt_vnodecovered;
6727 if ( (vnode_getwithref(coveredvp)) ) {
6728 error = ENOENT;
6729 goto out1;
6730 }
6731 vnode_put(fvp);
6732
6733 fvp = coveredvp;
6734 mntrename = TRUE;
6735 }
6736 /*
6737 * Check for cross-device rename.
6738 */
6739 if ((fvp->v_mount != tdvp->v_mount) ||
6740 (tvp && (fvp->v_mount != tvp->v_mount))) {
6741 error = EXDEV;
6742 goto out1;
6743 }
6744
6745 /*
6746 * If source is the same as the destination (that is the
6747 * same inode number) then there is nothing to do...
6748 * EXCEPT if the underlying file system supports case
6749 * insensitivity and is case preserving. In this case
6750 * the file system needs to handle the special case of
6751 * getting the same vnode as target (fvp) and source (tvp).
6752 *
6753 * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
6754 * and _PC_CASE_PRESERVING can have this exception, and they need to
6755 * handle the special case of getting the same vnode as target and
6756 * source. NOTE: Then the target is unlocked going into vnop_rename,
6757 * so not to cause locking problems. There is a single reference on tvp.
6758 *
6759 * NOTE - that fvp == tvp also occurs if they are hard linked and
6760 * that correct behaviour then is just to return success without doing
6761 * anything.
6762 *
6763 * XXX filesystem should take care of this itself, perhaps...
6764 */
6765 if (fvp == tvp && fdvp == tdvp) {
6766 if (fromnd->ni_cnd.cn_namelen == tond->ni_cnd.cn_namelen &&
6767 !bcmp(fromnd->ni_cnd.cn_nameptr, tond->ni_cnd.cn_nameptr,
6768 fromnd->ni_cnd.cn_namelen)) {
6769 goto out1;
6770 }
6771 }
6772
6773 if (holding_mntlock && fvp->v_mount != locked_mp) {
6774 /*
6775 * we're holding a reference and lock
6776 * on locked_mp, but it no longer matches
6777 * what we want to do... so drop our hold
6778 */
6779 mount_unlock_renames(locked_mp);
6780 mount_drop(locked_mp, 0);
6781 holding_mntlock = 0;
6782 }
6783 if (tdvp != fdvp && fvp->v_type == VDIR) {
6784 /*
6785 * serialize renames that re-shape
6786 * the tree... if holding_mntlock is
6787 * set, then we're ready to go...
6788 * otherwise we
6789 * first need to drop the iocounts
6790 * we picked up, second take the
6791 * lock to serialize the access,
6792 * then finally start the lookup
6793 * process over with the lock held
6794 */
6795 if (!holding_mntlock) {
6796 /*
6797 * need to grab a reference on
6798 * the mount point before we
6799 * drop all the iocounts... once
6800 * the iocounts are gone, the mount
6801 * could follow
6802 */
6803 locked_mp = fvp->v_mount;
6804 mount_ref(locked_mp, 0);
6805
6806 /*
6807 * nameidone has to happen before we vnode_put(tvp)
6808 * since it may need to release the fs_nodelock on the tvp
6809 */
6810 nameidone(tond);
6811
6812 if (tvp)
6813 vnode_put(tvp);
6814 vnode_put(tdvp);
6815
6816 /*
6817 * nameidone has to happen before we vnode_put(fdvp)
6818 * since it may need to release the fs_nodelock on the fvp
6819 */
6820 nameidone(fromnd);
6821
6822 vnode_put(fvp);
6823 vnode_put(fdvp);
6824
6825 mount_lock_renames(locked_mp);
6826 holding_mntlock = 1;
6827
6828 goto retry;
6829 }
6830 } else {
6831 /*
6832 * when we dropped the iocounts to take
6833 * the lock, we allowed the identity of
6834 * the various vnodes to change... if they did,
6835 * we may no longer be dealing with a rename
6836 * that reshapes the tree... once we're holding
6837 * the iocounts, the vnodes can't change type
6838 * so we're free to drop the lock at this point
6839 * and continue on
6840 */
6841 if (holding_mntlock) {
6842 mount_unlock_renames(locked_mp);
6843 mount_drop(locked_mp, 0);
6844 holding_mntlock = 0;
6845 }
6846 }
6847
6848 // save these off so we can later verify that fvp is the same
6849 oname = fvp->v_name;
6850 oparent = fvp->v_parent;
6851
6852 skipped_lookup:
6853 #if CONFIG_FSE
6854 need_event = need_fsevent(FSE_RENAME, fdvp);
6855 if (need_event) {
6856 if (fvp) {
6857 get_fse_info(fvp, &from_finfo, ctx);
6858 } else {
6859 error = vfs_get_notify_attributes(&__rename_data->fv_attr);
6860 if (error) {
6861 goto out1;
6862 }
6863
6864 fvap = &__rename_data->fv_attr;
6865 }
6866
6867 if (tvp) {
6868 get_fse_info(tvp, &to_finfo, ctx);
6869 } else if (batched) {
6870 error = vfs_get_notify_attributes(&__rename_data->tv_attr);
6871 if (error) {
6872 goto out1;
6873 }
6874
6875 tvap = &__rename_data->tv_attr;
6876 }
6877 }
6878 #else
6879 need_event = 0;
6880 #endif /* CONFIG_FSE */
6881
6882 if (need_event || kauth_authorize_fileop_has_listeners()) {
6883 if (from_name == NULL) {
6884 GET_PATH(from_name);
6885 if (from_name == NULL) {
6886 error = ENOMEM;
6887 goto out1;
6888 }
6889 }
6890
6891 from_len = safe_getpath(fdvp, fromnd->ni_cnd.cn_nameptr, from_name, MAXPATHLEN, &from_truncated);
6892
6893 if (to_name == NULL) {
6894 GET_PATH(to_name);
6895 if (to_name == NULL) {
6896 error = ENOMEM;
6897 goto out1;
6898 }
6899 }
6900
6901 to_len = safe_getpath(tdvp, tond->ni_cnd.cn_nameptr, to_name, MAXPATHLEN, &to_truncated);
6902 }
6903 #if CONFIG_SECLUDED_RENAME
6904 if (flags & VFS_SECLUDE_RENAME) {
6905 fromnd->ni_cnd.cn_flags |= CN_SECLUDE_RENAME;
6906 }
6907 #else
6908 #pragma unused(flags)
6909 #endif
6910 error = vn_rename(fdvp, &fvp, &fromnd->ni_cnd, fvap,
6911 tdvp, &tvp, &tond->ni_cnd, tvap,
6912 0, ctx);
6913
6914 if (holding_mntlock) {
6915 /*
6916 * we can drop our serialization
6917 * lock now
6918 */
6919 mount_unlock_renames(locked_mp);
6920 mount_drop(locked_mp, 0);
6921 holding_mntlock = 0;
6922 }
6923 if (error) {
6924 if (error == EKEEPLOOKING) {
6925 if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) == 0) {
6926 if ((tond->ni_flag & NAMEI_CONTLOOKUP) == 0) {
6927 panic("EKEEPLOOKING without NAMEI_CONTLOOKUP on either ndp?");
6928 }
6929 }
6930
6931 fromnd->ni_vp = fvp;
6932 tond->ni_vp = tvp;
6933
6934 goto continue_lookup;
6935 }
6936
6937 /*
6938 * We may encounter a race in the VNOP where the destination didn't
6939 * exist when we did the namei, but it does by the time we go and
6940 * try to create the entry. In this case, we should re-drive this rename
6941 * call from the top again. Currently, only HFS bubbles out ERECYCLE,
6942 * but other filesystems susceptible to this race could return it, too.
6943 */
6944 if (error == ERECYCLE) {
6945 do_retry = 1;
6946 }
6947
6948 /*
6949 * For compound VNOPs, the authorization callback may return
6950 * ENOENT in case of racing hardlink lookups hitting the name
6951 * cache, redrive the lookup.
6952 */
6953 if (batched && error == ENOENT) {
6954 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
6955 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
6956 do_retry = 1;
6957 retry_count += 1;
6958 }
6959 }
6960
6961 goto out1;
6962 }
6963
6964 /* call out to allow 3rd party notification of rename.
6965 * Ignore result of kauth_authorize_fileop call.
6966 */
6967 kauth_authorize_fileop(vfs_context_ucred(ctx),
6968 KAUTH_FILEOP_RENAME,
6969 (uintptr_t)from_name, (uintptr_t)to_name);
6970
6971 #if CONFIG_FSE
6972 if (from_name != NULL && to_name != NULL) {
6973 if (from_truncated || to_truncated) {
6974 // set it here since only the from_finfo gets reported up to user space
6975 from_finfo.mode |= FSE_TRUNCATED_PATH;
6976 }
6977
6978 if (tvap && tvp) {
6979 vnode_get_fse_info_from_vap(tvp, &to_finfo, tvap);
6980 }
6981 if (fvap) {
6982 vnode_get_fse_info_from_vap(fvp, &from_finfo, fvap);
6983 }
6984
6985 if (tvp) {
6986 add_fsevent(FSE_RENAME, ctx,
6987 FSE_ARG_STRING, from_len, from_name,
6988 FSE_ARG_FINFO, &from_finfo,
6989 FSE_ARG_STRING, to_len, to_name,
6990 FSE_ARG_FINFO, &to_finfo,
6991 FSE_ARG_DONE);
6992 } else {
6993 add_fsevent(FSE_RENAME, ctx,
6994 FSE_ARG_STRING, from_len, from_name,
6995 FSE_ARG_FINFO, &from_finfo,
6996 FSE_ARG_STRING, to_len, to_name,
6997 FSE_ARG_DONE);
6998 }
6999 }
7000 #endif /* CONFIG_FSE */
7001
7002 /*
7003 * update filesystem's mount point data
7004 */
7005 if (mntrename) {
7006 char *cp, *pathend, *mpname;
7007 char * tobuf;
7008 struct mount *mp;
7009 int maxlen;
7010 size_t len = 0;
7011
7012 mp = fvp->v_mountedhere;
7013
7014 if (vfs_busy(mp, LK_NOWAIT)) {
7015 error = EBUSY;
7016 goto out1;
7017 }
7018 MALLOC_ZONE(tobuf, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
7019
7020 if (UIO_SEG_IS_USER_SPACE(segflg))
7021 error = copyinstr(to, tobuf, MAXPATHLEN, &len);
7022 else
7023 error = copystr((void *)to, tobuf, MAXPATHLEN, &len);
7024 if (!error) {
7025 /* find current mount point prefix */
7026 pathend = &mp->mnt_vfsstat.f_mntonname[0];
7027 for (cp = pathend; *cp != '\0'; ++cp) {
7028 if (*cp == '/')
7029 pathend = cp + 1;
7030 }
7031 /* find last component of target name */
7032 for (mpname = cp = tobuf; *cp != '\0'; ++cp) {
7033 if (*cp == '/')
7034 mpname = cp + 1;
7035 }
7036 /* append name to prefix */
7037 maxlen = MAXPATHLEN - (pathend - mp->mnt_vfsstat.f_mntonname);
7038 bzero(pathend, maxlen);
7039 strlcpy(pathend, mpname, maxlen);
7040 }
7041 FREE_ZONE(tobuf, MAXPATHLEN, M_NAMEI);
7042
7043 vfs_unbusy(mp);
7044 }
7045 /*
7046 * fix up name & parent pointers. note that we first
7047 * check that fvp has the same name/parent pointers it
7048 * had before the rename call... this is a 'weak' check
7049 * at best...
7050 *
7051 * XXX oparent and oname may not be set in the compound vnop case
7052 */
7053 if (batched || (oname == fvp->v_name && oparent == fvp->v_parent)) {
7054 int update_flags;
7055
7056 update_flags = VNODE_UPDATE_NAME;
7057
7058 if (fdvp != tdvp)
7059 update_flags |= VNODE_UPDATE_PARENT;
7060
7061 vnode_update_identity(fvp, tdvp, tond->ni_cnd.cn_nameptr, tond->ni_cnd.cn_namelen, tond->ni_cnd.cn_hash, update_flags);
7062 }
7063 out1:
7064 if (to_name != NULL) {
7065 RELEASE_PATH(to_name);
7066 to_name = NULL;
7067 }
7068 if (from_name != NULL) {
7069 RELEASE_PATH(from_name);
7070 from_name = NULL;
7071 }
7072 if (holding_mntlock) {
7073 mount_unlock_renames(locked_mp);
7074 mount_drop(locked_mp, 0);
7075 holding_mntlock = 0;
7076 }
7077 if (tdvp) {
7078 /*
7079 * nameidone has to happen before we vnode_put(tdvp)
7080 * since it may need to release the fs_nodelock on the tdvp
7081 */
7082 nameidone(tond);
7083
7084 if (tvp)
7085 vnode_put(tvp);
7086 vnode_put(tdvp);
7087 }
7088 if (fdvp) {
7089 /*
7090 * nameidone has to happen before we vnode_put(fdvp)
7091 * since it may need to release the fs_nodelock on the fdvp
7092 */
7093 nameidone(fromnd);
7094
7095 if (fvp)
7096 vnode_put(fvp);
7097 vnode_put(fdvp);
7098 }
7099
7100 /*
7101 * If things changed after we did the namei, then we will re-drive
7102 * this rename call from the top.
7103 */
7104 if (do_retry) {
7105 do_retry = 0;
7106 goto retry;
7107 }
7108
7109 FREE(__rename_data, M_TEMP);
7110 return (error);
7111 }
7112
7113 int
7114 rename(__unused proc_t p, struct rename_args *uap, __unused int32_t *retval)
7115 {
7116 return (renameat_internal(vfs_context_current(), AT_FDCWD, uap->from,
7117 AT_FDCWD, uap->to, UIO_USERSPACE, 0));
7118 }
7119
7120 #if CONFIG_SECLUDED_RENAME
7121 int rename_ext(__unused proc_t p, struct rename_ext_args *uap, __unused int32_t *retval)
7122 {
7123 return renameat_internal(
7124 vfs_context_current(),
7125 AT_FDCWD, uap->from,
7126 AT_FDCWD, uap->to,
7127 UIO_USERSPACE, uap->flags);
7128 }
7129 #endif
7130
7131 int
7132 renameat(__unused proc_t p, struct renameat_args *uap, __unused int32_t *retval)
7133 {
7134 return (renameat_internal(vfs_context_current(), uap->fromfd, uap->from,
7135 uap->tofd, uap->to, UIO_USERSPACE, 0));
7136 }
7137
7138 /*
7139 * Make a directory file.
7140 *
7141 * Returns: 0 Success
7142 * EEXIST
7143 * namei:???
7144 * vnode_authorize:???
7145 * vn_create:???
7146 */
7147 /* ARGSUSED */
7148 static int
7149 mkdir1at(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap, int fd,
7150 enum uio_seg segflg)
7151 {
7152 vnode_t vp, dvp;
7153 int error;
7154 int update_flags = 0;
7155 int batched;
7156 struct nameidata nd;
7157
7158 AUDIT_ARG(mode, vap->va_mode);
7159 NDINIT(&nd, CREATE, OP_MKDIR, LOCKPARENT | AUDITVNPATH1, segflg,
7160 path, ctx);
7161 nd.ni_cnd.cn_flags |= WILLBEDIR;
7162 nd.ni_flag = NAMEI_COMPOUNDMKDIR;
7163
7164 continue_lookup:
7165 error = nameiat(&nd, fd);
7166 if (error)
7167 return (error);
7168 dvp = nd.ni_dvp;
7169 vp = nd.ni_vp;
7170
7171 if (vp != NULL) {
7172 error = EEXIST;
7173 goto out;
7174 }
7175
7176 batched = vnode_compound_mkdir_available(dvp);
7177
7178 VATTR_SET(vap, va_type, VDIR);
7179
7180 /*
7181 * XXX
7182 * Don't authorize in VFS for compound VNOP.... mkdir -p today assumes that it will
7183 * only get EXISTS or EISDIR for existing path components, and not that it could see
7184 * EACCESS/EPERM--so if we authorize for mkdir on "/" for "mkdir -p /tmp/foo/bar/baz"
7185 * it will fail in a spurious manner. Need to figure out if this is valid behavior.
7186 */
7187 if ((error = vn_authorize_mkdir(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0) {
7188 if (error == EACCES || error == EPERM) {
7189 int error2;
7190
7191 nameidone(&nd);
7192 vnode_put(dvp);
7193 dvp = NULLVP;
7194
7195 /*
7196 * Try a lookup without "NAMEI_COMPOUNDVNOP" to make sure we return EEXIST
7197 * rather than EACCESS if the target exists.
7198 */
7199 NDINIT(&nd, LOOKUP, OP_MKDIR, AUDITVNPATH1, segflg,
7200 path, ctx);
7201 error2 = nameiat(&nd, fd);
7202 if (error2) {
7203 goto out;
7204 } else {
7205 vp = nd.ni_vp;
7206 error = EEXIST;
7207 goto out;
7208 }
7209 }
7210
7211 goto out;
7212 }
7213
7214 /*
7215 * make the directory
7216 */
7217 if ((error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx)) != 0) {
7218 if (error == EKEEPLOOKING) {
7219 nd.ni_vp = vp;
7220 goto continue_lookup;
7221 }
7222
7223 goto out;
7224 }
7225
7226 // Make sure the name & parent pointers are hooked up
7227 if (vp->v_name == NULL)
7228 update_flags |= VNODE_UPDATE_NAME;
7229 if (vp->v_parent == NULLVP)
7230 update_flags |= VNODE_UPDATE_PARENT;
7231
7232 if (update_flags)
7233 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
7234
7235 #if CONFIG_FSE
7236 add_fsevent(FSE_CREATE_DIR, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE);
7237 #endif
7238
7239 out:
7240 /*
7241 * nameidone has to happen before we vnode_put(dvp)
7242 * since it may need to release the fs_nodelock on the dvp
7243 */
7244 nameidone(&nd);
7245
7246 if (vp)
7247 vnode_put(vp);
7248 if (dvp)
7249 vnode_put(dvp);
7250
7251 return (error);
7252 }
7253
7254 /*
7255 * mkdir_extended: Create a directory; with extended security (ACL).
7256 *
7257 * Parameters: p Process requesting to create the directory
7258 * uap User argument descriptor (see below)
7259 * retval (ignored)
7260 *
7261 * Indirect: uap->path Path of directory to create
7262 * uap->mode Access permissions to set
7263 * uap->xsecurity ACL to set
7264 *
7265 * Returns: 0 Success
7266 * !0 Not success
7267 *
7268 */
7269 int
7270 mkdir_extended(proc_t p, struct mkdir_extended_args *uap, __unused int32_t *retval)
7271 {
7272 int ciferror;
7273 kauth_filesec_t xsecdst;
7274 struct vnode_attr va;
7275
7276 AUDIT_ARG(owner, uap->uid, uap->gid);
7277
7278 xsecdst = NULL;
7279 if ((uap->xsecurity != USER_ADDR_NULL) &&
7280 ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
7281 return ciferror;
7282
7283 VATTR_INIT(&va);
7284 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
7285 if (xsecdst != NULL)
7286 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
7287
7288 ciferror = mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD,
7289 UIO_USERSPACE);
7290 if (xsecdst != NULL)
7291 kauth_filesec_free(xsecdst);
7292 return ciferror;
7293 }
7294
7295 int
7296 mkdir(proc_t p, struct mkdir_args *uap, __unused int32_t *retval)
7297 {
7298 struct vnode_attr va;
7299
7300 VATTR_INIT(&va);
7301 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
7302
7303 return (mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD,
7304 UIO_USERSPACE));
7305 }
7306
7307 int
7308 mkdirat(proc_t p, struct mkdirat_args *uap, __unused int32_t *retval)
7309 {
7310 struct vnode_attr va;
7311
7312 VATTR_INIT(&va);
7313 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
7314
7315 return(mkdir1at(vfs_context_current(), uap->path, &va, uap->fd,
7316 UIO_USERSPACE));
7317 }
7318
7319 static int
7320 rmdirat_internal(vfs_context_t ctx, int fd, user_addr_t dirpath,
7321 enum uio_seg segflg)
7322 {
7323 vnode_t vp, dvp;
7324 int error;
7325 struct nameidata nd;
7326 char *path = NULL;
7327 int len=0;
7328 int has_listeners = 0;
7329 int need_event = 0;
7330 int truncated = 0;
7331 #if CONFIG_FSE
7332 struct vnode_attr va;
7333 #endif /* CONFIG_FSE */
7334 struct vnode_attr *vap = NULL;
7335 int restart_count = 0;
7336 int batched;
7337
7338 int restart_flag;
7339
7340 /*
7341 * This loop exists to restart rmdir in the unlikely case that two
7342 * processes are simultaneously trying to remove the same directory
7343 * containing orphaned appleDouble files.
7344 */
7345 do {
7346 NDINIT(&nd, DELETE, OP_RMDIR, LOCKPARENT | AUDITVNPATH1,
7347 segflg, dirpath, ctx);
7348 nd.ni_flag = NAMEI_COMPOUNDRMDIR;
7349 continue_lookup:
7350 restart_flag = 0;
7351 vap = NULL;
7352
7353 error = nameiat(&nd, fd);
7354 if (error)
7355 return (error);
7356
7357 dvp = nd.ni_dvp;
7358 vp = nd.ni_vp;
7359
7360 if (vp) {
7361 batched = vnode_compound_rmdir_available(vp);
7362
7363 if (vp->v_flag & VROOT) {
7364 /*
7365 * The root of a mounted filesystem cannot be deleted.
7366 */
7367 error = EBUSY;
7368 goto out;
7369 }
7370
7371 /*
7372 * Removed a check here; we used to abort if vp's vid
7373 * was not the same as what we'd seen the last time around.
7374 * I do not think that check was valid, because if we retry
7375 * and all dirents are gone, the directory could legitimately
7376 * be recycled but still be present in a situation where we would
7377 * have had permission to delete. Therefore, we won't make
7378 * an effort to preserve that check now that we may not have a
7379 * vp here.
7380 */
7381
7382 if (!batched) {
7383 error = vn_authorize_rmdir(dvp, vp, &nd.ni_cnd, ctx, NULL);
7384 if (error) {
7385 if (error == ENOENT) {
7386 assert(restart_count < MAX_AUTHORIZE_ENOENT_RETRIES);
7387 if (restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
7388 restart_flag = 1;
7389 restart_count += 1;
7390 }
7391 }
7392 goto out;
7393 }
7394 }
7395 } else {
7396 batched = 1;
7397
7398 if (!vnode_compound_rmdir_available(dvp)) {
7399 panic("No error, but no compound rmdir?");
7400 }
7401 }
7402
7403 #if CONFIG_FSE
7404 fse_info finfo;
7405
7406 need_event = need_fsevent(FSE_DELETE, dvp);
7407 if (need_event) {
7408 if (!batched) {
7409 get_fse_info(vp, &finfo, ctx);
7410 } else {
7411 error = vfs_get_notify_attributes(&va);
7412 if (error) {
7413 goto out;
7414 }
7415
7416 vap = &va;
7417 }
7418 }
7419 #endif
7420 has_listeners = kauth_authorize_fileop_has_listeners();
7421 if (need_event || has_listeners) {
7422 if (path == NULL) {
7423 GET_PATH(path);
7424 if (path == NULL) {
7425 error = ENOMEM;
7426 goto out;
7427 }
7428 }
7429
7430 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated);
7431 #if CONFIG_FSE
7432 if (truncated) {
7433 finfo.mode |= FSE_TRUNCATED_PATH;
7434 }
7435 #endif
7436 }
7437
7438 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
7439 nd.ni_vp = vp;
7440 if (vp == NULLVP) {
7441 /* Couldn't find a vnode */
7442 goto out;
7443 }
7444
7445 if (error == EKEEPLOOKING) {
7446 goto continue_lookup;
7447 } else if (batched && error == ENOENT) {
7448 assert(restart_count < MAX_AUTHORIZE_ENOENT_RETRIES);
7449 if (restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
7450 /*
7451 * For compound VNOPs, the authorization callback
7452 * may return ENOENT in case of racing hard link lookups
7453 * redrive the lookup.
7454 */
7455 restart_flag = 1;
7456 restart_count += 1;
7457 goto out;
7458 }
7459 }
7460 #if CONFIG_APPLEDOUBLE
7461 /*
7462 * Special case to remove orphaned AppleDouble
7463 * files. I don't like putting this in the kernel,
7464 * but carbon does not like putting this in carbon either,
7465 * so here we are.
7466 */
7467 if (error == ENOTEMPTY) {
7468 error = rmdir_remove_orphaned_appleDouble(vp, ctx, &restart_flag);
7469 if (error == EBUSY) {
7470 goto out;
7471 }
7472
7473
7474 /*
7475 * Assuming everything went well, we will try the RMDIR again
7476 */
7477 if (!error)
7478 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
7479 }
7480 #endif /* CONFIG_APPLEDOUBLE */
7481 /*
7482 * Call out to allow 3rd party notification of delete.
7483 * Ignore result of kauth_authorize_fileop call.
7484 */
7485 if (!error) {
7486 if (has_listeners) {
7487 kauth_authorize_fileop(vfs_context_ucred(ctx),
7488 KAUTH_FILEOP_DELETE,
7489 (uintptr_t)vp,
7490 (uintptr_t)path);
7491 }
7492
7493 if (vp->v_flag & VISHARDLINK) {
7494 // see the comment in unlink1() about why we update
7495 // the parent of a hard link when it is removed
7496 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
7497 }
7498
7499 #if CONFIG_FSE
7500 if (need_event) {
7501 if (vap) {
7502 vnode_get_fse_info_from_vap(vp, &finfo, vap);
7503 }
7504 add_fsevent(FSE_DELETE, ctx,
7505 FSE_ARG_STRING, len, path,
7506 FSE_ARG_FINFO, &finfo,
7507 FSE_ARG_DONE);
7508 }
7509 #endif
7510 }
7511
7512 out:
7513 if (path != NULL) {
7514 RELEASE_PATH(path);
7515 path = NULL;
7516 }
7517 /*
7518 * nameidone has to happen before we vnode_put(dvp)
7519 * since it may need to release the fs_nodelock on the dvp
7520 */
7521 nameidone(&nd);
7522 vnode_put(dvp);
7523
7524 if (vp)
7525 vnode_put(vp);
7526
7527 if (restart_flag == 0) {
7528 wakeup_one((caddr_t)vp);
7529 return (error);
7530 }
7531 tsleep(vp, PVFS, "rm AD", 1);
7532
7533 } while (restart_flag != 0);
7534
7535 return (error);
7536
7537 }
7538
7539 /*
7540 * Remove a directory file.
7541 */
7542 /* ARGSUSED */
7543 int
7544 rmdir(__unused proc_t p, struct rmdir_args *uap, __unused int32_t *retval)
7545 {
7546 return (rmdirat_internal(vfs_context_current(), AT_FDCWD,
7547 CAST_USER_ADDR_T(uap->path), UIO_USERSPACE));
7548 }
7549
7550 /* Get direntry length padded to 8 byte alignment */
7551 #define DIRENT64_LEN(namlen) \
7552 ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
7553
7554 errno_t
7555 vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag,
7556 int *numdirent, vfs_context_t ctxp)
7557 {
7558 /* Check if fs natively supports VNODE_READDIR_EXTENDED */
7559 if ((vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED) &&
7560 ((vp->v_mount->mnt_kern_flag & MNTK_DENY_READDIREXT) == 0)) {
7561 return VNOP_READDIR(vp, uio, flags, eofflag, numdirent, ctxp);
7562 } else {
7563 size_t bufsize;
7564 void * bufptr;
7565 uio_t auio;
7566 struct direntry *entry64;
7567 struct dirent *dep;
7568 int bytesread;
7569 int error;
7570
7571 /*
7572 * Our kernel buffer needs to be smaller since re-packing
7573 * will expand each dirent. The worse case (when the name
7574 * length is 3) corresponds to a struct direntry size of 32
7575 * bytes (8-byte aligned) and a struct dirent size of 12 bytes
7576 * (4-byte aligned). So having a buffer that is 3/8 the size
7577 * will prevent us from reading more than we can pack.
7578 *
7579 * Since this buffer is wired memory, we will limit the
7580 * buffer size to a maximum of 32K. We would really like to
7581 * use 32K in the MIN(), but we use magic number 87371 to
7582 * prevent uio_resid() * 3 / 8 from overflowing.
7583 */
7584 bufsize = 3 * MIN((user_size_t)uio_resid(uio), 87371u) / 8;
7585 MALLOC(bufptr, void *, bufsize, M_TEMP, M_WAITOK);
7586 if (bufptr == NULL) {
7587 return ENOMEM;
7588 }
7589
7590 auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
7591 uio_addiov(auio, (uintptr_t)bufptr, bufsize);
7592 auio->uio_offset = uio->uio_offset;
7593
7594 error = VNOP_READDIR(vp, auio, 0, eofflag, numdirent, ctxp);
7595
7596 dep = (struct dirent *)bufptr;
7597 bytesread = bufsize - uio_resid(auio);
7598
7599 MALLOC(entry64, struct direntry *, sizeof(struct direntry),
7600 M_TEMP, M_WAITOK);
7601 /*
7602 * Convert all the entries and copy them out to user's buffer.
7603 */
7604 while (error == 0 && (char *)dep < ((char *)bufptr + bytesread)) {
7605 size_t enbufsize = DIRENT64_LEN(dep->d_namlen);
7606
7607 bzero(entry64, enbufsize);
7608 /* Convert a dirent to a dirent64. */
7609 entry64->d_ino = dep->d_ino;
7610 entry64->d_seekoff = 0;
7611 entry64->d_reclen = enbufsize;
7612 entry64->d_namlen = dep->d_namlen;
7613 entry64->d_type = dep->d_type;
7614 bcopy(dep->d_name, entry64->d_name, dep->d_namlen + 1);
7615
7616 /* Move to next entry. */
7617 dep = (struct dirent *)((char *)dep + dep->d_reclen);
7618
7619 /* Copy entry64 to user's buffer. */
7620 error = uiomove((caddr_t)entry64, entry64->d_reclen, uio);
7621 }
7622
7623 /* Update the real offset using the offset we got from VNOP_READDIR. */
7624 if (error == 0) {
7625 uio->uio_offset = auio->uio_offset;
7626 }
7627 uio_free(auio);
7628 FREE(bufptr, M_TEMP);
7629 FREE(entry64, M_TEMP);
7630 return (error);
7631 }
7632 }
7633
7634 #define GETDIRENTRIES_MAXBUFSIZE (128 * 1024 * 1024U)
7635
7636 /*
7637 * Read a block of directory entries in a file system independent format.
7638 */
7639 static int
7640 getdirentries_common(int fd, user_addr_t bufp, user_size_t bufsize, ssize_t *bytesread,
7641 off_t *offset, int flags)
7642 {
7643 vnode_t vp;
7644 struct vfs_context context = *vfs_context_current(); /* local copy */
7645 struct fileproc *fp;
7646 uio_t auio;
7647 int spacetype = proc_is64bit(vfs_context_proc(&context)) ? UIO_USERSPACE64 : UIO_USERSPACE32;
7648 off_t loff;
7649 int error, eofflag, numdirent;
7650 char uio_buf[ UIO_SIZEOF(1) ];
7651
7652 error = fp_getfvp(vfs_context_proc(&context), fd, &fp, &vp);
7653 if (error) {
7654 return (error);
7655 }
7656 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
7657 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
7658 error = EBADF;
7659 goto out;
7660 }
7661
7662 if (bufsize > GETDIRENTRIES_MAXBUFSIZE)
7663 bufsize = GETDIRENTRIES_MAXBUFSIZE;
7664
7665 #if CONFIG_MACF
7666 error = mac_file_check_change_offset(vfs_context_ucred(&context), fp->f_fglob);
7667 if (error)
7668 goto out;
7669 #endif
7670 if ( (error = vnode_getwithref(vp)) ) {
7671 goto out;
7672 }
7673 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
7674
7675 unionread:
7676 if (vp->v_type != VDIR) {
7677 (void)vnode_put(vp);
7678 error = EINVAL;
7679 goto out;
7680 }
7681
7682 #if CONFIG_MACF
7683 error = mac_vnode_check_readdir(&context, vp);
7684 if (error != 0) {
7685 (void)vnode_put(vp);
7686 goto out;
7687 }
7688 #endif /* MAC */
7689
7690 loff = fp->f_fglob->fg_offset;
7691 auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
7692 uio_addiov(auio, bufp, bufsize);
7693
7694 if (flags & VNODE_READDIR_EXTENDED) {
7695 error = vnode_readdir64(vp, auio, flags, &eofflag, &numdirent, &context);
7696 fp->f_fglob->fg_offset = uio_offset(auio);
7697 } else {
7698 error = VNOP_READDIR(vp, auio, 0, &eofflag, &numdirent, &context);
7699 fp->f_fglob->fg_offset = uio_offset(auio);
7700 }
7701 if (error) {
7702 (void)vnode_put(vp);
7703 goto out;
7704 }
7705
7706 if ((user_ssize_t)bufsize == uio_resid(auio)){
7707 if (union_dircheckp) {
7708 error = union_dircheckp(&vp, fp, &context);
7709 if (error == -1)
7710 goto unionread;
7711 if (error)
7712 goto out;
7713 }
7714
7715 if ((vp->v_mount->mnt_flag & MNT_UNION)) {
7716 struct vnode *tvp = vp;
7717 if (lookup_traverse_union(tvp, &vp, &context) == 0) {
7718 vnode_ref(vp);
7719 fp->f_fglob->fg_data = (caddr_t) vp;
7720 fp->f_fglob->fg_offset = 0;
7721 vnode_rele(tvp);
7722 vnode_put(tvp);
7723 goto unionread;
7724 }
7725 vp = tvp;
7726 }
7727 }
7728
7729 vnode_put(vp);
7730 if (offset) {
7731 *offset = loff;
7732 }
7733
7734 *bytesread = bufsize - uio_resid(auio);
7735 out:
7736 file_drop(fd);
7737 return (error);
7738 }
7739
7740
7741 int
7742 getdirentries(__unused struct proc *p, struct getdirentries_args *uap, int32_t *retval)
7743 {
7744 off_t offset;
7745 ssize_t bytesread;
7746 int error;
7747
7748 AUDIT_ARG(fd, uap->fd);
7749 error = getdirentries_common(uap->fd, uap->buf, uap->count, &bytesread, &offset, 0);
7750
7751 if (error == 0) {
7752 if (proc_is64bit(p)) {
7753 user64_long_t base = (user64_long_t)offset;
7754 error = copyout((caddr_t)&base, uap->basep, sizeof(user64_long_t));
7755 } else {
7756 user32_long_t base = (user32_long_t)offset;
7757 error = copyout((caddr_t)&base, uap->basep, sizeof(user32_long_t));
7758 }
7759 *retval = bytesread;
7760 }
7761 return (error);
7762 }
7763
7764 int
7765 getdirentries64(__unused struct proc *p, struct getdirentries64_args *uap, user_ssize_t *retval)
7766 {
7767 off_t offset;
7768 ssize_t bytesread;
7769 int error;
7770
7771 AUDIT_ARG(fd, uap->fd);
7772 error = getdirentries_common(uap->fd, uap->buf, uap->bufsize, &bytesread, &offset, VNODE_READDIR_EXTENDED);
7773
7774 if (error == 0) {
7775 *retval = bytesread;
7776 error = copyout((caddr_t)&offset, uap->position, sizeof(off_t));
7777 }
7778 return (error);
7779 }
7780
7781
7782 /*
7783 * Set the mode mask for creation of filesystem nodes.
7784 * XXX implement xsecurity
7785 */
7786 #define UMASK_NOXSECURITY (void *)1 /* leave existing xsecurity alone */
7787 static int
7788 umask1(proc_t p, int newmask, __unused kauth_filesec_t fsec, int32_t *retval)
7789 {
7790 struct filedesc *fdp;
7791
7792 AUDIT_ARG(mask, newmask);
7793 proc_fdlock(p);
7794 fdp = p->p_fd;
7795 *retval = fdp->fd_cmask;
7796 fdp->fd_cmask = newmask & ALLPERMS;
7797 proc_fdunlock(p);
7798 return (0);
7799 }
7800
7801 /*
7802 * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL).
7803 *
7804 * Parameters: p Process requesting to set the umask
7805 * uap User argument descriptor (see below)
7806 * retval umask of the process (parameter p)
7807 *
7808 * Indirect: uap->newmask umask to set
7809 * uap->xsecurity ACL to set
7810 *
7811 * Returns: 0 Success
7812 * !0 Not success
7813 *
7814 */
7815 int
7816 umask_extended(proc_t p, struct umask_extended_args *uap, int32_t *retval)
7817 {
7818 int ciferror;
7819 kauth_filesec_t xsecdst;
7820
7821 xsecdst = KAUTH_FILESEC_NONE;
7822 if (uap->xsecurity != USER_ADDR_NULL) {
7823 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
7824 return ciferror;
7825 } else {
7826 xsecdst = KAUTH_FILESEC_NONE;
7827 }
7828
7829 ciferror = umask1(p, uap->newmask, xsecdst, retval);
7830
7831 if (xsecdst != KAUTH_FILESEC_NONE)
7832 kauth_filesec_free(xsecdst);
7833 return ciferror;
7834 }
7835
7836 int
7837 umask(proc_t p, struct umask_args *uap, int32_t *retval)
7838 {
7839 return(umask1(p, uap->newmask, UMASK_NOXSECURITY, retval));
7840 }
7841
7842 /*
7843 * Void all references to file by ripping underlying filesystem
7844 * away from vnode.
7845 */
7846 /* ARGSUSED */
7847 int
7848 revoke(proc_t p, struct revoke_args *uap, __unused int32_t *retval)
7849 {
7850 vnode_t vp;
7851 struct vnode_attr va;
7852 vfs_context_t ctx = vfs_context_current();
7853 int error;
7854 struct nameidata nd;
7855
7856 NDINIT(&nd, LOOKUP, OP_REVOKE, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
7857 uap->path, ctx);
7858 error = namei(&nd);
7859 if (error)
7860 return (error);
7861 vp = nd.ni_vp;
7862
7863 nameidone(&nd);
7864
7865 if (!(vnode_ischr(vp) || vnode_isblk(vp))) {
7866 error = ENOTSUP;
7867 goto out;
7868 }
7869
7870 if (vnode_isblk(vp) && vnode_ismountedon(vp)) {
7871 error = EBUSY;
7872 goto out;
7873 }
7874
7875 #if CONFIG_MACF
7876 error = mac_vnode_check_revoke(ctx, vp);
7877 if (error)
7878 goto out;
7879 #endif
7880
7881 VATTR_INIT(&va);
7882 VATTR_WANTED(&va, va_uid);
7883 if ((error = vnode_getattr(vp, &va, ctx)))
7884 goto out;
7885 if (kauth_cred_getuid(vfs_context_ucred(ctx)) != va.va_uid &&
7886 (error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
7887 goto out;
7888 if (vp->v_usecount > 0 || (vnode_isaliased(vp)))
7889 VNOP_REVOKE(vp, REVOKEALL, ctx);
7890 out:
7891 vnode_put(vp);
7892 return (error);
7893 }
7894
7895
7896 /*
7897 * HFS/HFS PlUS SPECIFIC SYSTEM CALLS
7898 * The following system calls are designed to support features
7899 * which are specific to the HFS & HFS Plus volume formats
7900 */
7901
7902
7903 /*
7904 * Obtain attribute information on objects in a directory while enumerating
7905 * the directory.
7906 */
7907 /* ARGSUSED */
7908 int
7909 getdirentriesattr (proc_t p, struct getdirentriesattr_args *uap, int32_t *retval)
7910 {
7911 vnode_t vp;
7912 struct fileproc *fp;
7913 uio_t auio = NULL;
7914 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
7915 uint32_t count, savecount;
7916 uint32_t newstate;
7917 int error, eofflag;
7918 uint32_t loff;
7919 struct attrlist attributelist;
7920 vfs_context_t ctx = vfs_context_current();
7921 int fd = uap->fd;
7922 char uio_buf[ UIO_SIZEOF(1) ];
7923 kauth_action_t action;
7924
7925 AUDIT_ARG(fd, fd);
7926
7927 /* Get the attributes into kernel space */
7928 if ((error = copyin(uap->alist, (caddr_t)&attributelist, sizeof(attributelist)))) {
7929 return(error);
7930 }
7931 if ((error = copyin(uap->count, (caddr_t)&count, sizeof(count)))) {
7932 return(error);
7933 }
7934 savecount = count;
7935 if ( (error = fp_getfvp(p, fd, &fp, &vp)) ) {
7936 return (error);
7937 }
7938 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
7939 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
7940 error = EBADF;
7941 goto out;
7942 }
7943
7944
7945 #if CONFIG_MACF
7946 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
7947 fp->f_fglob);
7948 if (error)
7949 goto out;
7950 #endif
7951
7952
7953 if ( (error = vnode_getwithref(vp)) )
7954 goto out;
7955
7956 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
7957
7958 unionread:
7959 if (vp->v_type != VDIR) {
7960 (void)vnode_put(vp);
7961 error = EINVAL;
7962 goto out;
7963 }
7964
7965 #if CONFIG_MACF
7966 error = mac_vnode_check_readdir(ctx, vp);
7967 if (error != 0) {
7968 (void)vnode_put(vp);
7969 goto out;
7970 }
7971 #endif /* MAC */
7972
7973 /* set up the uio structure which will contain the users return buffer */
7974 loff = fp->f_fglob->fg_offset;
7975 auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
7976 uio_addiov(auio, uap->buffer, uap->buffersize);
7977
7978 /*
7979 * If the only item requested is file names, we can let that past with
7980 * just LIST_DIRECTORY. If they want any other attributes, that means
7981 * they need SEARCH as well.
7982 */
7983 action = KAUTH_VNODE_LIST_DIRECTORY;
7984 if ((attributelist.commonattr & ~ATTR_CMN_NAME) ||
7985 attributelist.fileattr || attributelist.dirattr)
7986 action |= KAUTH_VNODE_SEARCH;
7987
7988 if ((error = vnode_authorize(vp, NULL, action, ctx)) == 0) {
7989
7990 /* Believe it or not, uap->options only has 32-bits of valid
7991 * info, so truncate before extending again */
7992
7993 error = VNOP_READDIRATTR(vp, &attributelist, auio, count,
7994 (u_long)(uint32_t)uap->options, &newstate, &eofflag, &count, ctx);
7995 }
7996
7997 if (error) {
7998 (void) vnode_put(vp);
7999 goto out;
8000 }
8001
8002 /*
8003 * If we've got the last entry of a directory in a union mount
8004 * then reset the eofflag and pretend there's still more to come.
8005 * The next call will again set eofflag and the buffer will be empty,
8006 * so traverse to the underlying directory and do the directory
8007 * read there.
8008 */
8009 if (eofflag && vp->v_mount->mnt_flag & MNT_UNION) {
8010 if (uio_resid(auio) < (user_ssize_t) uap->buffersize) { // Got some entries
8011 eofflag = 0;
8012 } else { // Empty buffer
8013 struct vnode *tvp = vp;
8014 if (lookup_traverse_union(tvp, &vp, ctx) == 0) {
8015 vnode_ref_ext(vp, fp->f_fglob->fg_flag & O_EVTONLY, 0);
8016 fp->f_fglob->fg_data = (caddr_t) vp;
8017 fp->f_fglob->fg_offset = 0; // reset index for new dir
8018 count = savecount;
8019 vnode_rele_internal(tvp, fp->f_fglob->fg_flag & O_EVTONLY, 0, 0);
8020 vnode_put(tvp);
8021 goto unionread;
8022 }
8023 vp = tvp;
8024 }
8025 }
8026
8027 (void)vnode_put(vp);
8028
8029 if (error)
8030 goto out;
8031 fp->f_fglob->fg_offset = uio_offset(auio); /* should be multiple of dirent, not variable */
8032
8033 if ((error = copyout((caddr_t) &count, uap->count, sizeof(count))))
8034 goto out;
8035 if ((error = copyout((caddr_t) &newstate, uap->newstate, sizeof(newstate))))
8036 goto out;
8037 if ((error = copyout((caddr_t) &loff, uap->basep, sizeof(loff))))
8038 goto out;
8039
8040 *retval = eofflag; /* similar to getdirentries */
8041 error = 0;
8042 out:
8043 file_drop(fd);
8044 return (error); /* return error earlier, an retval of 0 or 1 now */
8045
8046 } /* end of getdirentriesattr system call */
8047
8048 /*
8049 * Exchange data between two files
8050 */
8051
8052 /* ARGSUSED */
8053 int
8054 exchangedata (__unused proc_t p, struct exchangedata_args *uap, __unused int32_t *retval)
8055 {
8056
8057 struct nameidata fnd, snd;
8058 vfs_context_t ctx = vfs_context_current();
8059 vnode_t fvp;
8060 vnode_t svp;
8061 int error;
8062 u_int32_t nameiflags;
8063 char *fpath = NULL;
8064 char *spath = NULL;
8065 int flen=0, slen=0;
8066 int from_truncated=0, to_truncated=0;
8067 #if CONFIG_FSE
8068 fse_info f_finfo, s_finfo;
8069 #endif
8070
8071 nameiflags = 0;
8072 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
8073
8074 NDINIT(&fnd, LOOKUP, OP_EXCHANGEDATA, nameiflags | AUDITVNPATH1,
8075 UIO_USERSPACE, uap->path1, ctx);
8076
8077 error = namei(&fnd);
8078 if (error)
8079 goto out2;
8080
8081 nameidone(&fnd);
8082 fvp = fnd.ni_vp;
8083
8084 NDINIT(&snd, LOOKUP, OP_EXCHANGEDATA, CN_NBMOUNTLOOK | nameiflags | AUDITVNPATH2,
8085 UIO_USERSPACE, uap->path2, ctx);
8086
8087 error = namei(&snd);
8088 if (error) {
8089 vnode_put(fvp);
8090 goto out2;
8091 }
8092 nameidone(&snd);
8093 svp = snd.ni_vp;
8094
8095 /*
8096 * if the files are the same, return an inval error
8097 */
8098 if (svp == fvp) {
8099 error = EINVAL;
8100 goto out;
8101 }
8102
8103 /*
8104 * if the files are on different volumes, return an error
8105 */
8106 if (svp->v_mount != fvp->v_mount) {
8107 error = EXDEV;
8108 goto out;
8109 }
8110
8111 /* If they're not files, return an error */
8112 if ( (vnode_isreg(fvp) == 0) || (vnode_isreg(svp) == 0)) {
8113 error = EINVAL;
8114 goto out;
8115 }
8116
8117 #if CONFIG_MACF
8118 error = mac_vnode_check_exchangedata(ctx,
8119 fvp, svp);
8120 if (error)
8121 goto out;
8122 #endif
8123 if (((error = vnode_authorize(fvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0) ||
8124 ((error = vnode_authorize(svp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0))
8125 goto out;
8126
8127 if (
8128 #if CONFIG_FSE
8129 need_fsevent(FSE_EXCHANGE, fvp) ||
8130 #endif
8131 kauth_authorize_fileop_has_listeners()) {
8132 GET_PATH(fpath);
8133 GET_PATH(spath);
8134 if (fpath == NULL || spath == NULL) {
8135 error = ENOMEM;
8136 goto out;
8137 }
8138
8139 flen = safe_getpath(fvp, NULL, fpath, MAXPATHLEN, &from_truncated);
8140 slen = safe_getpath(svp, NULL, spath, MAXPATHLEN, &to_truncated);
8141
8142 #if CONFIG_FSE
8143 get_fse_info(fvp, &f_finfo, ctx);
8144 get_fse_info(svp, &s_finfo, ctx);
8145 if (from_truncated || to_truncated) {
8146 // set it here since only the f_finfo gets reported up to user space
8147 f_finfo.mode |= FSE_TRUNCATED_PATH;
8148 }
8149 #endif
8150 }
8151 /* Ok, make the call */
8152 error = VNOP_EXCHANGE(fvp, svp, 0, ctx);
8153
8154 if (error == 0) {
8155 const char *tmpname;
8156
8157 if (fpath != NULL && spath != NULL) {
8158 /* call out to allow 3rd party notification of exchangedata.
8159 * Ignore result of kauth_authorize_fileop call.
8160 */
8161 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_EXCHANGE,
8162 (uintptr_t)fpath, (uintptr_t)spath);
8163 }
8164 name_cache_lock();
8165
8166 tmpname = fvp->v_name;
8167 fvp->v_name = svp->v_name;
8168 svp->v_name = tmpname;
8169
8170 if (fvp->v_parent != svp->v_parent) {
8171 vnode_t tmp;
8172
8173 tmp = fvp->v_parent;
8174 fvp->v_parent = svp->v_parent;
8175 svp->v_parent = tmp;
8176 }
8177 name_cache_unlock();
8178
8179 #if CONFIG_FSE
8180 if (fpath != NULL && spath != NULL) {
8181 add_fsevent(FSE_EXCHANGE, ctx,
8182 FSE_ARG_STRING, flen, fpath,
8183 FSE_ARG_FINFO, &f_finfo,
8184 FSE_ARG_STRING, slen, spath,
8185 FSE_ARG_FINFO, &s_finfo,
8186 FSE_ARG_DONE);
8187 }
8188 #endif
8189 }
8190
8191 out:
8192 if (fpath != NULL)
8193 RELEASE_PATH(fpath);
8194 if (spath != NULL)
8195 RELEASE_PATH(spath);
8196 vnode_put(svp);
8197 vnode_put(fvp);
8198 out2:
8199 return (error);
8200 }
8201
8202 /*
8203 * Return (in MB) the amount of freespace on the given vnode's volume.
8204 */
8205 uint32_t freespace_mb(vnode_t vp);
8206
8207 uint32_t
8208 freespace_mb(vnode_t vp)
8209 {
8210 vfs_update_vfsstat(vp->v_mount, vfs_context_current(), VFS_USER_EVENT);
8211 return (((uint64_t)vp->v_mount->mnt_vfsstat.f_bavail *
8212 vp->v_mount->mnt_vfsstat.f_bsize) >> 20);
8213 }
8214
8215 #if CONFIG_SEARCHFS
8216
8217 /* ARGSUSED */
8218
8219 int
8220 searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval)
8221 {
8222 vnode_t vp, tvp;
8223 int i, error=0;
8224 int fserror = 0;
8225 struct nameidata nd;
8226 struct user64_fssearchblock searchblock;
8227 struct searchstate *state;
8228 struct attrlist *returnattrs;
8229 struct timeval timelimit;
8230 void *searchparams1,*searchparams2;
8231 uio_t auio = NULL;
8232 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
8233 uint32_t nummatches;
8234 int mallocsize;
8235 uint32_t nameiflags;
8236 vfs_context_t ctx = vfs_context_current();
8237 char uio_buf[ UIO_SIZEOF(1) ];
8238
8239 /* Start by copying in fsearchblock parameter list */
8240 if (IS_64BIT_PROCESS(p)) {
8241 error = copyin(uap->searchblock, (caddr_t) &searchblock, sizeof(searchblock));
8242 timelimit.tv_sec = searchblock.timelimit.tv_sec;
8243 timelimit.tv_usec = searchblock.timelimit.tv_usec;
8244 }
8245 else {
8246 struct user32_fssearchblock tmp_searchblock;
8247
8248 error = copyin(uap->searchblock, (caddr_t) &tmp_searchblock, sizeof(tmp_searchblock));
8249 // munge into 64-bit version
8250 searchblock.returnattrs = CAST_USER_ADDR_T(tmp_searchblock.returnattrs);
8251 searchblock.returnbuffer = CAST_USER_ADDR_T(tmp_searchblock.returnbuffer);
8252 searchblock.returnbuffersize = tmp_searchblock.returnbuffersize;
8253 searchblock.maxmatches = tmp_searchblock.maxmatches;
8254 /*
8255 * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary
8256 * from a 32 bit long, and tv_usec is already a signed 32 bit int.
8257 */
8258 timelimit.tv_sec = (__darwin_time_t) tmp_searchblock.timelimit.tv_sec;
8259 timelimit.tv_usec = (__darwin_useconds_t) tmp_searchblock.timelimit.tv_usec;
8260 searchblock.searchparams1 = CAST_USER_ADDR_T(tmp_searchblock.searchparams1);
8261 searchblock.sizeofsearchparams1 = tmp_searchblock.sizeofsearchparams1;
8262 searchblock.searchparams2 = CAST_USER_ADDR_T(tmp_searchblock.searchparams2);
8263 searchblock.sizeofsearchparams2 = tmp_searchblock.sizeofsearchparams2;
8264 searchblock.searchattrs = tmp_searchblock.searchattrs;
8265 }
8266 if (error)
8267 return(error);
8268
8269 /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
8270 */
8271 if (searchblock.sizeofsearchparams1 > SEARCHFS_MAX_SEARCHPARMS ||
8272 searchblock.sizeofsearchparams2 > SEARCHFS_MAX_SEARCHPARMS)
8273 return(EINVAL);
8274
8275 /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
8276 /* It all has to do into local memory and it's not that big so we might as well put it all together. */
8277 /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
8278 /* block. */
8279 /* */
8280 /* NOTE: we allocate an extra 8 bytes to account for the difference in size of the searchstate */
8281 /* due to the changes in rdar://problem/12438273. That way if a 3rd party file system */
8282 /* assumes the size is still 556 bytes it will continue to work */
8283
8284 mallocsize = searchblock.sizeofsearchparams1 + searchblock.sizeofsearchparams2 +
8285 sizeof(struct attrlist) + sizeof(struct searchstate) + (2*sizeof(uint32_t));
8286
8287 MALLOC(searchparams1, void *, mallocsize, M_TEMP, M_WAITOK);
8288
8289 /* Now set up the various pointers to the correct place in our newly allocated memory */
8290
8291 searchparams2 = (void *) (((caddr_t) searchparams1) + searchblock.sizeofsearchparams1);
8292 returnattrs = (struct attrlist *) (((caddr_t) searchparams2) + searchblock.sizeofsearchparams2);
8293 state = (struct searchstate *) (((caddr_t) returnattrs) + sizeof (struct attrlist));
8294
8295 /* Now copy in the stuff given our local variables. */
8296
8297 if ((error = copyin(searchblock.searchparams1, searchparams1, searchblock.sizeofsearchparams1)))
8298 goto freeandexit;
8299
8300 if ((error = copyin(searchblock.searchparams2, searchparams2, searchblock.sizeofsearchparams2)))
8301 goto freeandexit;
8302
8303 if ((error = copyin(searchblock.returnattrs, (caddr_t) returnattrs, sizeof(struct attrlist))))
8304 goto freeandexit;
8305
8306 if ((error = copyin(uap->state, (caddr_t) state, sizeof(struct searchstate))))
8307 goto freeandexit;
8308
8309 /*
8310 * When searching a union mount, need to set the
8311 * start flag at the first call on each layer to
8312 * reset state for the new volume.
8313 */
8314 if (uap->options & SRCHFS_START)
8315 state->ss_union_layer = 0;
8316 else
8317 uap->options |= state->ss_union_flags;
8318 state->ss_union_flags = 0;
8319
8320 /*
8321 * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter,
8322 * which is passed in with an attrreference_t, we need to inspect the buffer manually here.
8323 * The KPI does not provide us the ability to pass in the length of the buffers searchparams1
8324 * and searchparams2. To obviate the need for all searchfs-supporting filesystems to
8325 * validate the user-supplied data offset of the attrreference_t, we'll do it here.
8326 */
8327
8328 if (searchblock.searchattrs.commonattr & ATTR_CMN_NAME) {
8329 attrreference_t* string_ref;
8330 u_int32_t* start_length;
8331 user64_size_t param_length;
8332
8333 /* validate searchparams1 */
8334 param_length = searchblock.sizeofsearchparams1;
8335 /* skip the word that specifies length of the buffer */
8336 start_length= (u_int32_t*) searchparams1;
8337 start_length= start_length+1;
8338 string_ref= (attrreference_t*) start_length;
8339
8340 /* ensure no negative offsets or too big offsets */
8341 if (string_ref->attr_dataoffset < 0 ) {
8342 error = EINVAL;
8343 goto freeandexit;
8344 }
8345 if (string_ref->attr_length > MAXPATHLEN) {
8346 error = EINVAL;
8347 goto freeandexit;
8348 }
8349
8350 /* Check for pointer overflow in the string ref */
8351 if (((char*) string_ref + string_ref->attr_dataoffset) < (char*) string_ref) {
8352 error = EINVAL;
8353 goto freeandexit;
8354 }
8355
8356 if (((char*) string_ref + string_ref->attr_dataoffset) > ((char*)searchparams1 + param_length)) {
8357 error = EINVAL;
8358 goto freeandexit;
8359 }
8360 if (((char*)string_ref + string_ref->attr_dataoffset + string_ref->attr_length) > ((char*)searchparams1 + param_length)) {
8361 error = EINVAL;
8362 goto freeandexit;
8363 }
8364 }
8365
8366 /* set up the uio structure which will contain the users return buffer */
8367 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
8368 uio_addiov(auio, searchblock.returnbuffer, searchblock.returnbuffersize);
8369
8370 nameiflags = 0;
8371 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
8372 NDINIT(&nd, LOOKUP, OP_SEARCHFS, nameiflags | AUDITVNPATH1,
8373 UIO_USERSPACE, uap->path, ctx);
8374
8375 error = namei(&nd);
8376 if (error)
8377 goto freeandexit;
8378 vp = nd.ni_vp;
8379 nameidone(&nd);
8380
8381 /*
8382 * Switch to the root vnode for the volume
8383 */
8384 error = VFS_ROOT(vnode_mount(vp), &tvp, ctx);
8385 vnode_put(vp);
8386 if (error)
8387 goto freeandexit;
8388 vp = tvp;
8389
8390 /*
8391 * If it's a union mount, the path lookup takes
8392 * us to the top layer. But we may need to descend
8393 * to a lower layer. For non-union mounts the layer
8394 * is always zero.
8395 */
8396 for (i = 0; i < (int) state->ss_union_layer; i++) {
8397 if ((vp->v_mount->mnt_flag & MNT_UNION) == 0)
8398 break;
8399 tvp = vp;
8400 vp = vp->v_mount->mnt_vnodecovered;
8401 if (vp == NULL) {
8402 vnode_put(tvp);
8403 error = ENOENT;
8404 goto freeandexit;
8405 }
8406 vnode_getwithref(vp);
8407 vnode_put(tvp);
8408 }
8409
8410 #if CONFIG_MACF
8411 error = mac_vnode_check_searchfs(ctx, vp, &searchblock.searchattrs);
8412 if (error) {
8413 vnode_put(vp);
8414 goto freeandexit;
8415 }
8416 #endif
8417
8418
8419 /*
8420 * If searchblock.maxmatches == 0, then skip the search. This has happened
8421 * before and sometimes the underlying code doesnt deal with it well.
8422 */
8423 if (searchblock.maxmatches == 0) {
8424 nummatches = 0;
8425 goto saveandexit;
8426 }
8427
8428 /*
8429 * Allright, we have everything we need, so lets make that call.
8430 *
8431 * We keep special track of the return value from the file system:
8432 * EAGAIN is an acceptable error condition that shouldn't keep us
8433 * from copying out any results...
8434 */
8435
8436 fserror = VNOP_SEARCHFS(vp,
8437 searchparams1,
8438 searchparams2,
8439 &searchblock.searchattrs,
8440 (u_long)searchblock.maxmatches,
8441 &timelimit,
8442 returnattrs,
8443 &nummatches,
8444 (u_long)uap->scriptcode,
8445 (u_long)uap->options,
8446 auio,
8447 (struct searchstate *) &state->ss_fsstate,
8448 ctx);
8449
8450 /*
8451 * If it's a union mount we need to be called again
8452 * to search the mounted-on filesystem.
8453 */
8454 if ((vp->v_mount->mnt_flag & MNT_UNION) && fserror == 0) {
8455 state->ss_union_flags = SRCHFS_START;
8456 state->ss_union_layer++; // search next layer down
8457 fserror = EAGAIN;
8458 }
8459
8460 saveandexit:
8461
8462 vnode_put(vp);
8463
8464 /* Now copy out the stuff that needs copying out. That means the number of matches, the
8465 search state. Everything was already put into he return buffer by the vop call. */
8466
8467 if ((error = copyout((caddr_t) state, uap->state, sizeof(struct searchstate))) != 0)
8468 goto freeandexit;
8469
8470 if ((error = suulong(uap->nummatches, (uint64_t)nummatches)) != 0)
8471 goto freeandexit;
8472
8473 error = fserror;
8474
8475 freeandexit:
8476
8477 FREE(searchparams1,M_TEMP);
8478
8479 return(error);
8480
8481
8482 } /* end of searchfs system call */
8483
8484 #else /* CONFIG_SEARCHFS */
8485
8486 int
8487 searchfs(__unused proc_t p, __unused struct searchfs_args *uap, __unused int32_t *retval)
8488 {
8489 return (ENOTSUP);
8490 }
8491
8492 #endif /* CONFIG_SEARCHFS */
8493
8494
8495 lck_grp_attr_t * nspace_group_attr;
8496 lck_attr_t * nspace_lock_attr;
8497 lck_grp_t * nspace_mutex_group;
8498
8499 lck_mtx_t nspace_handler_lock;
8500 lck_mtx_t nspace_handler_exclusion_lock;
8501
8502 time_t snapshot_timestamp=0;
8503 int nspace_allow_virtual_devs=0;
8504
8505 void nspace_handler_init(void);
8506
8507 typedef struct nspace_item_info {
8508 struct vnode *vp;
8509 void *arg;
8510 uint64_t op;
8511 uint32_t vid;
8512 uint32_t flags;
8513 uint32_t token;
8514 uint32_t refcount;
8515 } nspace_item_info;
8516
8517 #define MAX_NSPACE_ITEMS 128
8518 nspace_item_info nspace_items[MAX_NSPACE_ITEMS];
8519 uint32_t nspace_item_idx=0; // also used as the sleep/wakeup rendezvous address
8520 uint32_t nspace_token_id=0;
8521 uint32_t nspace_handler_timeout = 15; // seconds
8522
8523 #define NSPACE_ITEM_NEW 0x0001
8524 #define NSPACE_ITEM_PROCESSING 0x0002
8525 #define NSPACE_ITEM_DEAD 0x0004
8526 #define NSPACE_ITEM_CANCELLED 0x0008
8527 #define NSPACE_ITEM_DONE 0x0010
8528 #define NSPACE_ITEM_RESET_TIMER 0x0020
8529
8530 #define NSPACE_ITEM_NSPACE_EVENT 0x0040
8531 #define NSPACE_ITEM_SNAPSHOT_EVENT 0x0080
8532
8533 #define NSPACE_ITEM_ALL_EVENT_TYPES (NSPACE_ITEM_NSPACE_EVENT | NSPACE_ITEM_SNAPSHOT_EVENT)
8534
8535 //#pragma optimization_level 0
8536
8537 typedef enum {
8538 NSPACE_HANDLER_NSPACE = 0,
8539 NSPACE_HANDLER_SNAPSHOT = 1,
8540
8541 NSPACE_HANDLER_COUNT,
8542 } nspace_type_t;
8543
8544 typedef struct {
8545 uint64_t handler_tid;
8546 struct proc *handler_proc;
8547 int handler_busy;
8548 } nspace_handler_t;
8549
8550 nspace_handler_t nspace_handlers[NSPACE_HANDLER_COUNT];
8551
8552 /* namespace fsctl functions */
8553 static int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type);
8554 static int nspace_item_flags_for_type(nspace_type_t nspace_type);
8555 static int nspace_open_flags_for_type(nspace_type_t nspace_type);
8556 static nspace_type_t nspace_type_for_op(uint64_t op);
8557 static int nspace_is_special_process(struct proc *proc);
8558 static int vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx);
8559 static int wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type);
8560 static int validate_namespace_args (int is64bit, int size);
8561 static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data);
8562
8563
8564 static inline int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type)
8565 {
8566 switch(nspace_type) {
8567 case NSPACE_HANDLER_NSPACE:
8568 return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_NSPACE_EVENT;
8569 case NSPACE_HANDLER_SNAPSHOT:
8570 return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_SNAPSHOT_EVENT;
8571 default:
8572 printf("nspace_flags_matches_handler: invalid type %u\n", (int)nspace_type);
8573 return 0;
8574 }
8575 }
8576
8577 static inline int nspace_item_flags_for_type(nspace_type_t nspace_type)
8578 {
8579 switch(nspace_type) {
8580 case NSPACE_HANDLER_NSPACE:
8581 return NSPACE_ITEM_NSPACE_EVENT;
8582 case NSPACE_HANDLER_SNAPSHOT:
8583 return NSPACE_ITEM_SNAPSHOT_EVENT;
8584 default:
8585 printf("nspace_item_flags_for_type: invalid type %u\n", (int)nspace_type);
8586 return 0;
8587 }
8588 }
8589
8590 static inline int nspace_open_flags_for_type(nspace_type_t nspace_type)
8591 {
8592 switch(nspace_type) {
8593 case NSPACE_HANDLER_NSPACE:
8594 return FREAD | FWRITE | O_EVTONLY;
8595 case NSPACE_HANDLER_SNAPSHOT:
8596 return FREAD | O_EVTONLY;
8597 default:
8598 printf("nspace_open_flags_for_type: invalid type %u\n", (int)nspace_type);
8599 return 0;
8600 }
8601 }
8602
8603 static inline nspace_type_t nspace_type_for_op(uint64_t op)
8604 {
8605 switch(op & NAMESPACE_HANDLER_EVENT_TYPE_MASK) {
8606 case NAMESPACE_HANDLER_NSPACE_EVENT:
8607 return NSPACE_HANDLER_NSPACE;
8608 case NAMESPACE_HANDLER_SNAPSHOT_EVENT:
8609 return NSPACE_HANDLER_SNAPSHOT;
8610 default:
8611 printf("nspace_type_for_op: invalid op mask %llx\n", op & NAMESPACE_HANDLER_EVENT_TYPE_MASK);
8612 return NSPACE_HANDLER_NSPACE;
8613 }
8614 }
8615
8616 static inline int nspace_is_special_process(struct proc *proc)
8617 {
8618 int i;
8619 for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
8620 if (proc == nspace_handlers[i].handler_proc)
8621 return 1;
8622 }
8623 return 0;
8624 }
8625
8626 void
8627 nspace_handler_init(void)
8628 {
8629 nspace_lock_attr = lck_attr_alloc_init();
8630 nspace_group_attr = lck_grp_attr_alloc_init();
8631 nspace_mutex_group = lck_grp_alloc_init("nspace-mutex", nspace_group_attr);
8632 lck_mtx_init(&nspace_handler_lock, nspace_mutex_group, nspace_lock_attr);
8633 lck_mtx_init(&nspace_handler_exclusion_lock, nspace_mutex_group, nspace_lock_attr);
8634 memset(&nspace_items[0], 0, sizeof(nspace_items));
8635 }
8636
8637 void
8638 nspace_proc_exit(struct proc *p)
8639 {
8640 int i, event_mask = 0;
8641
8642 for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
8643 if (p == nspace_handlers[i].handler_proc) {
8644 event_mask |= nspace_item_flags_for_type(i);
8645 nspace_handlers[i].handler_tid = 0;
8646 nspace_handlers[i].handler_proc = NULL;
8647 }
8648 }
8649
8650 if (event_mask == 0) {
8651 return;
8652 }
8653
8654 if (event_mask & NSPACE_ITEM_SNAPSHOT_EVENT) {
8655 // if this process was the snapshot handler, zero snapshot_timeout
8656 snapshot_timestamp = 0;
8657 }
8658
8659 //
8660 // unblock anyone that's waiting for the handler that died
8661 //
8662 lck_mtx_lock(&nspace_handler_lock);
8663 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8664 if (nspace_items[i].flags & (NSPACE_ITEM_NEW | NSPACE_ITEM_PROCESSING)) {
8665
8666 if ( nspace_items[i].flags & event_mask ) {
8667
8668 if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
8669 vnode_lock_spin(nspace_items[i].vp);
8670 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
8671 vnode_unlock(nspace_items[i].vp);
8672 }
8673 nspace_items[i].vp = NULL;
8674 nspace_items[i].vid = 0;
8675 nspace_items[i].flags = NSPACE_ITEM_DONE;
8676 nspace_items[i].token = 0;
8677
8678 wakeup((caddr_t)&(nspace_items[i].vp));
8679 }
8680 }
8681 }
8682
8683 wakeup((caddr_t)&nspace_item_idx);
8684 lck_mtx_unlock(&nspace_handler_lock);
8685 }
8686
8687
8688 int
8689 resolve_nspace_item(struct vnode *vp, uint64_t op)
8690 {
8691 return resolve_nspace_item_ext(vp, op, NULL);
8692 }
8693
8694 int
8695 resolve_nspace_item_ext(struct vnode *vp, uint64_t op, void *arg)
8696 {
8697 int i, error, keep_waiting;
8698 struct timespec ts;
8699 nspace_type_t nspace_type = nspace_type_for_op(op);
8700
8701 // only allow namespace events on regular files, directories and symlinks.
8702 if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
8703 return 0;
8704 }
8705
8706 //
8707 // if this is a snapshot event and the vnode is on a
8708 // disk image just pretend nothing happened since any
8709 // change to the disk image will cause the disk image
8710 // itself to get backed up and this avoids multi-way
8711 // deadlocks between the snapshot handler and the ever
8712 // popular diskimages-helper process. the variable
8713 // nspace_allow_virtual_devs allows this behavior to
8714 // be overridden (for use by the Mobile TimeMachine
8715 // testing infrastructure which uses disk images)
8716 //
8717 if ( (op & NAMESPACE_HANDLER_SNAPSHOT_EVENT)
8718 && (vp->v_mount != NULL)
8719 && (vp->v_mount->mnt_kern_flag & MNTK_VIRTUALDEV)
8720 && !nspace_allow_virtual_devs) {
8721
8722 return 0;
8723 }
8724
8725 // if (thread_tid(current_thread()) == namespace_handler_tid) {
8726 if (nspace_handlers[nspace_type].handler_proc == NULL) {
8727 return 0;
8728 }
8729
8730 if (nspace_is_special_process(current_proc())) {
8731 return EDEADLK;
8732 }
8733
8734 lck_mtx_lock(&nspace_handler_lock);
8735
8736 retry:
8737 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8738 if (vp == nspace_items[i].vp && op == nspace_items[i].op) {
8739 break;
8740 }
8741 }
8742
8743 if (i >= MAX_NSPACE_ITEMS) {
8744 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8745 if (nspace_items[i].flags == 0) {
8746 break;
8747 }
8748 }
8749 } else {
8750 nspace_items[i].refcount++;
8751 }
8752
8753 if (i >= MAX_NSPACE_ITEMS) {
8754 ts.tv_sec = nspace_handler_timeout;
8755 ts.tv_nsec = 0;
8756
8757 error = msleep((caddr_t)&nspace_token_id, &nspace_handler_lock, PVFS|PCATCH, "nspace-no-space", &ts);
8758 if (error == 0) {
8759 // an entry got free'd up, go see if we can get a slot
8760 goto retry;
8761 } else {
8762 lck_mtx_unlock(&nspace_handler_lock);
8763 return error;
8764 }
8765 }
8766
8767 //
8768 // if it didn't already exist, add it. if it did exist
8769 // we'll get woken up when someone does a wakeup() on
8770 // the slot in the nspace_items table.
8771 //
8772 if (vp != nspace_items[i].vp) {
8773 nspace_items[i].vp = vp;
8774 nspace_items[i].arg = (arg == NSPACE_REARM_NO_ARG) ? NULL : arg; // arg is {NULL, true, uio *} - only pass uio thru to the user
8775 nspace_items[i].op = op;
8776 nspace_items[i].vid = vnode_vid(vp);
8777 nspace_items[i].flags = NSPACE_ITEM_NEW;
8778 nspace_items[i].flags |= nspace_item_flags_for_type(nspace_type);
8779 if (nspace_items[i].flags & NSPACE_ITEM_SNAPSHOT_EVENT) {
8780 if (arg) {
8781 vnode_lock_spin(vp);
8782 vp->v_flag |= VNEEDSSNAPSHOT;
8783 vnode_unlock(vp);
8784 }
8785 }
8786
8787 nspace_items[i].token = 0;
8788 nspace_items[i].refcount = 1;
8789
8790 wakeup((caddr_t)&nspace_item_idx);
8791 }
8792
8793 //
8794 // Now go to sleep until the handler does a wakeup on this
8795 // slot in the nspace_items table (or we timeout).
8796 //
8797 keep_waiting = 1;
8798 while(keep_waiting) {
8799 ts.tv_sec = nspace_handler_timeout;
8800 ts.tv_nsec = 0;
8801 error = msleep((caddr_t)&(nspace_items[i].vp), &nspace_handler_lock, PVFS|PCATCH, "namespace-done", &ts);
8802
8803 if (nspace_items[i].flags & NSPACE_ITEM_DONE) {
8804 error = 0;
8805 } else if (nspace_items[i].flags & NSPACE_ITEM_CANCELLED) {
8806 error = nspace_items[i].token;
8807 } else if (error == EWOULDBLOCK || error == ETIMEDOUT) {
8808 if (nspace_items[i].flags & NSPACE_ITEM_RESET_TIMER) {
8809 nspace_items[i].flags &= ~NSPACE_ITEM_RESET_TIMER;
8810 continue;
8811 } else {
8812 error = ETIMEDOUT;
8813 }
8814 } else if (error == 0) {
8815 // hmmm, why did we get woken up?
8816 printf("woken up for token %d but it's not done, cancelled or timedout and error == 0.\n",
8817 nspace_items[i].token);
8818 }
8819
8820 if (--nspace_items[i].refcount == 0) {
8821 nspace_items[i].vp = NULL; // clear this so that no one will match on it again
8822 nspace_items[i].arg = NULL;
8823 nspace_items[i].token = 0; // clear this so that the handler will not find it anymore
8824 nspace_items[i].flags = 0; // this clears it for re-use
8825 }
8826 wakeup(&nspace_token_id);
8827 keep_waiting = 0;
8828 }
8829
8830 lck_mtx_unlock(&nspace_handler_lock);
8831
8832 return error;
8833 }
8834
8835
8836 int
8837 get_nspace_item_status(struct vnode *vp, int32_t *status)
8838 {
8839 int i;
8840
8841 lck_mtx_lock(&nspace_handler_lock);
8842 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8843 if (nspace_items[i].vp == vp) {
8844 break;
8845 }
8846 }
8847
8848 if (i >= MAX_NSPACE_ITEMS) {
8849 lck_mtx_unlock(&nspace_handler_lock);
8850 return ENOENT;
8851 }
8852
8853 *status = nspace_items[i].flags;
8854 lck_mtx_unlock(&nspace_handler_lock);
8855 return 0;
8856 }
8857
8858
8859 #if 0
8860 static int
8861 build_volfs_path(struct vnode *vp, char *path, int *len)
8862 {
8863 struct vnode_attr va;
8864 int ret;
8865
8866 VATTR_INIT(&va);
8867 VATTR_WANTED(&va, va_fsid);
8868 VATTR_WANTED(&va, va_fileid);
8869
8870 if (vnode_getattr(vp, &va, vfs_context_kernel()) != 0) {
8871 *len = snprintf(path, *len, "/non/existent/path/because/vnode_getattr/failed") + 1;
8872 ret = -1;
8873 } else {
8874 *len = snprintf(path, *len, "/.vol/%d/%lld", (dev_t)va.va_fsid, va.va_fileid) + 1;
8875 ret = 0;
8876 }
8877
8878 return ret;
8879 }
8880 #endif
8881
8882 //
8883 // Note: this function does NOT check permissions on all of the
8884 // parent directories leading to this vnode. It should only be
8885 // called on behalf of a root process. Otherwise a process may
8886 // get access to a file because the file itself is readable even
8887 // though its parent directories would prevent access.
8888 //
8889 static int
8890 vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx)
8891 {
8892 int error, action;
8893
8894 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
8895 return error;
8896 }
8897
8898 #if CONFIG_MACF
8899 error = mac_vnode_check_open(ctx, vp, fmode);
8900 if (error)
8901 return error;
8902 #endif
8903
8904 /* compute action to be authorized */
8905 action = 0;
8906 if (fmode & FREAD) {
8907 action |= KAUTH_VNODE_READ_DATA;
8908 }
8909 if (fmode & (FWRITE | O_TRUNC)) {
8910 /*
8911 * If we are writing, appending, and not truncating,
8912 * indicate that we are appending so that if the
8913 * UF_APPEND or SF_APPEND bits are set, we do not deny
8914 * the open.
8915 */
8916 if ((fmode & O_APPEND) && !(fmode & O_TRUNC)) {
8917 action |= KAUTH_VNODE_APPEND_DATA;
8918 } else {
8919 action |= KAUTH_VNODE_WRITE_DATA;
8920 }
8921 }
8922
8923 if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)
8924 return error;
8925
8926
8927 //
8928 // if the vnode is tagged VOPENEVT and the current process
8929 // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY
8930 // flag to the open mode so that this open won't count against
8931 // the vnode when carbon delete() does a vnode_isinuse() to see
8932 // if a file is currently in use. this allows spotlight
8933 // importers to not interfere with carbon apps that depend on
8934 // the no-delete-if-busy semantics of carbon delete().
8935 //
8936 if ((vp->v_flag & VOPENEVT) && (current_proc()->p_flag & P_CHECKOPENEVT)) {
8937 fmode |= O_EVTONLY;
8938 }
8939
8940 if ( (error = VNOP_OPEN(vp, fmode, ctx)) ) {
8941 return error;
8942 }
8943 if ( (error = vnode_ref_ext(vp, fmode, 0)) ) {
8944 VNOP_CLOSE(vp, fmode, ctx);
8945 return error;
8946 }
8947
8948 /* Call out to allow 3rd party notification of open.
8949 * Ignore result of kauth_authorize_fileop call.
8950 */
8951 #if CONFIG_MACF
8952 mac_vnode_notify_open(ctx, vp, fmode);
8953 #endif
8954 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN,
8955 (uintptr_t)vp, 0);
8956
8957
8958 return 0;
8959 }
8960
8961 static int
8962 wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type)
8963 {
8964 int i, error=0, unblock=0;
8965 task_t curtask;
8966
8967 lck_mtx_lock(&nspace_handler_exclusion_lock);
8968 if (nspace_handlers[nspace_type].handler_busy) {
8969 lck_mtx_unlock(&nspace_handler_exclusion_lock);
8970 return EBUSY;
8971 }
8972 nspace_handlers[nspace_type].handler_busy = 1;
8973 lck_mtx_unlock(&nspace_handler_exclusion_lock);
8974
8975 /*
8976 * Any process that gets here will be one of the namespace handlers.
8977 * As such, they should be prevented from acquiring DMG vnodes during vnode reclamation
8978 * as we can cause deadlocks to occur, because the namespace handler may prevent
8979 * VNOP_INACTIVE from proceeding. Mark the current task as a P_DEPENDENCY_CAPABLE
8980 * process.
8981 */
8982 curtask = current_task();
8983 bsd_set_dependency_capable (curtask);
8984
8985 lck_mtx_lock(&nspace_handler_lock);
8986 if (nspace_handlers[nspace_type].handler_proc == NULL) {
8987 nspace_handlers[nspace_type].handler_tid = thread_tid(current_thread());
8988 nspace_handlers[nspace_type].handler_proc = current_proc();
8989 }
8990
8991 while (error == 0) {
8992
8993 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8994 if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
8995 if (!nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
8996 continue;
8997 }
8998 break;
8999 }
9000 }
9001
9002 if (i < MAX_NSPACE_ITEMS) {
9003 nspace_items[i].flags &= ~NSPACE_ITEM_NEW;
9004 nspace_items[i].flags |= NSPACE_ITEM_PROCESSING;
9005 nspace_items[i].token = ++nspace_token_id;
9006
9007 if (nspace_items[i].vp) {
9008 struct fileproc *fp;
9009 int32_t indx, fmode;
9010 struct proc *p = current_proc();
9011 vfs_context_t ctx = vfs_context_current();
9012 struct vnode_attr va;
9013
9014
9015 /*
9016 * Use vnode pointer to acquire a file descriptor for
9017 * hand-off to userland
9018 */
9019 fmode = nspace_open_flags_for_type(nspace_type);
9020 error = vnode_getwithvid(nspace_items[i].vp, nspace_items[i].vid);
9021 if (error) {
9022 unblock = 1;
9023 break;
9024 }
9025 error = vn_open_with_vp(nspace_items[i].vp, fmode, ctx);
9026 if (error) {
9027 unblock = 1;
9028 vnode_put(nspace_items[i].vp);
9029 break;
9030 }
9031
9032 if ((error = falloc(p, &fp, &indx, ctx))) {
9033 vn_close(nspace_items[i].vp, fmode, ctx);
9034 vnode_put(nspace_items[i].vp);
9035 unblock = 1;
9036 break;
9037 }
9038
9039 fp->f_fglob->fg_flag = fmode;
9040 fp->f_fglob->fg_ops = &vnops;
9041 fp->f_fglob->fg_data = (caddr_t)nspace_items[i].vp;
9042
9043 proc_fdlock(p);
9044 procfdtbl_releasefd(p, indx, NULL);
9045 fp_drop(p, indx, fp, 1);
9046 proc_fdunlock(p);
9047
9048 /*
9049 * All variants of the namespace handler struct support these three fields:
9050 * token, flags, and the FD pointer
9051 */
9052 error = copyout(&nspace_items[i].token, nhd->token, sizeof(uint32_t));
9053 error = copyout(&nspace_items[i].op, nhd->flags, sizeof(uint64_t));
9054 error = copyout(&indx, nhd->fdptr, sizeof(uint32_t));
9055
9056 /*
9057 * Handle optional fields:
9058 * extended version support an info ptr (offset, length), and the
9059 *
9060 * namedata version supports a unique per-link object ID
9061 *
9062 */
9063 if (nhd->infoptr) {
9064 uio_t uio = (uio_t)nspace_items[i].arg;
9065 uint64_t u_offset, u_length;
9066
9067 if (uio) {
9068 u_offset = uio_offset(uio);
9069 u_length = uio_resid(uio);
9070 } else {
9071 u_offset = 0;
9072 u_length = 0;
9073 }
9074 error = copyout(&u_offset, nhd->infoptr, sizeof(uint64_t));
9075 error = copyout(&u_length, nhd->infoptr+sizeof(uint64_t), sizeof(uint64_t));
9076 }
9077
9078 if (nhd->objid) {
9079 VATTR_INIT(&va);
9080 VATTR_WANTED(&va, va_linkid);
9081 error = vnode_getattr(nspace_items[i].vp, &va, ctx);
9082 if (error == 0 ) {
9083 uint64_t linkid = 0;
9084 if (VATTR_IS_SUPPORTED (&va, va_linkid)) {
9085 linkid = (uint64_t)va.va_linkid;
9086 }
9087 error = copyout (&linkid, nhd->objid, sizeof(uint64_t));
9088 }
9089 }
9090
9091 if (error) {
9092 vn_close(nspace_items[i].vp, fmode, ctx);
9093 fp_free(p, indx, fp);
9094 unblock = 1;
9095 }
9096
9097 vnode_put(nspace_items[i].vp);
9098
9099 break;
9100 } else {
9101 printf("wait_for_nspace_event: failed (nspace_items[%d] == %p error %d, name %s)\n",
9102 i, nspace_items[i].vp, error, nspace_items[i].vp->v_name);
9103 }
9104
9105 } else {
9106 error = msleep((caddr_t)&nspace_item_idx, &nspace_handler_lock, PVFS|PCATCH, "namespace-items", 0);
9107 if ((nspace_type == NSPACE_HANDLER_SNAPSHOT) && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
9108 error = EINVAL;
9109 break;
9110 }
9111
9112 }
9113 }
9114
9115 if (unblock) {
9116 if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
9117 vnode_lock_spin(nspace_items[i].vp);
9118 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
9119 vnode_unlock(nspace_items[i].vp);
9120 }
9121 nspace_items[i].vp = NULL;
9122 nspace_items[i].vid = 0;
9123 nspace_items[i].flags = NSPACE_ITEM_DONE;
9124 nspace_items[i].token = 0;
9125
9126 wakeup((caddr_t)&(nspace_items[i].vp));
9127 }
9128
9129 if (nspace_type == NSPACE_HANDLER_SNAPSHOT) {
9130 // just go through every snapshot event and unblock it immediately.
9131 if (error && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
9132 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9133 if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
9134 if (nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
9135 nspace_items[i].vp = NULL;
9136 nspace_items[i].vid = 0;
9137 nspace_items[i].flags = NSPACE_ITEM_DONE;
9138 nspace_items[i].token = 0;
9139
9140 wakeup((caddr_t)&(nspace_items[i].vp));
9141 }
9142 }
9143 }
9144 }
9145 }
9146
9147 lck_mtx_unlock(&nspace_handler_lock);
9148
9149 lck_mtx_lock(&nspace_handler_exclusion_lock);
9150 nspace_handlers[nspace_type].handler_busy = 0;
9151 lck_mtx_unlock(&nspace_handler_exclusion_lock);
9152
9153 return error;
9154 }
9155
9156 static inline int validate_namespace_args (int is64bit, int size) {
9157
9158 if (is64bit) {
9159 /* Must be one of these */
9160 if (size == sizeof(user64_namespace_handler_info)) {
9161 goto sizeok;
9162 }
9163 if (size == sizeof(user64_namespace_handler_info_ext)) {
9164 goto sizeok;
9165 }
9166 if (size == sizeof(user64_namespace_handler_data)) {
9167 goto sizeok;
9168 }
9169 return EINVAL;
9170 }
9171 else {
9172 /* 32 bit -- must be one of these */
9173 if (size == sizeof(user32_namespace_handler_info)) {
9174 goto sizeok;
9175 }
9176 if (size == sizeof(user32_namespace_handler_info_ext)) {
9177 goto sizeok;
9178 }
9179 if (size == sizeof(user32_namespace_handler_data)) {
9180 goto sizeok;
9181 }
9182 return EINVAL;
9183 }
9184
9185 sizeok:
9186
9187 return 0;
9188
9189 }
9190
9191 static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data)
9192 {
9193 int error = 0;
9194 namespace_handler_data nhd;
9195
9196 bzero (&nhd, sizeof(namespace_handler_data));
9197
9198 if (nspace_type == NSPACE_HANDLER_SNAPSHOT &&
9199 (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
9200 return EINVAL;
9201 }
9202
9203 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
9204 return error;
9205 }
9206
9207 error = validate_namespace_args (is64bit, size);
9208 if (error) {
9209 return error;
9210 }
9211
9212 /* Copy in the userland pointers into our kernel-only struct */
9213
9214 if (is64bit) {
9215 /* 64 bit userland structures */
9216 nhd.token = (user_addr_t)((user64_namespace_handler_info *)data)->token;
9217 nhd.flags = (user_addr_t)((user64_namespace_handler_info *)data)->flags;
9218 nhd.fdptr = (user_addr_t)((user64_namespace_handler_info *)data)->fdptr;
9219
9220 /* If the size is greater than the standard info struct, add in extra fields */
9221 if (size > (sizeof(user64_namespace_handler_info))) {
9222 if (size >= (sizeof(user64_namespace_handler_info_ext))) {
9223 nhd.infoptr = (user_addr_t)((user64_namespace_handler_info_ext *)data)->infoptr;
9224 }
9225 if (size == (sizeof(user64_namespace_handler_data))) {
9226 nhd.objid = (user_addr_t)((user64_namespace_handler_data*)data)->objid;
9227 }
9228 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
9229 }
9230 }
9231 else {
9232 /* 32 bit userland structures */
9233 nhd.token = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->token);
9234 nhd.flags = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->flags);
9235 nhd.fdptr = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->fdptr);
9236
9237 if (size > (sizeof(user32_namespace_handler_info))) {
9238 if (size >= (sizeof(user32_namespace_handler_info_ext))) {
9239 nhd.infoptr = CAST_USER_ADDR_T(((user32_namespace_handler_info_ext *)data)->infoptr);
9240 }
9241 if (size == (sizeof(user32_namespace_handler_data))) {
9242 nhd.objid = (user_addr_t)((user32_namespace_handler_data*)data)->objid;
9243 }
9244 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
9245 }
9246 }
9247
9248 return wait_for_namespace_event(&nhd, nspace_type);
9249 }
9250
9251 /*
9252 * Make a filesystem-specific control call:
9253 */
9254 /* ARGSUSED */
9255 static int
9256 fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long options, vfs_context_t ctx)
9257 {
9258 int error=0;
9259 boolean_t is64bit;
9260 u_int size;
9261 #define STK_PARAMS 128
9262 char stkbuf[STK_PARAMS];
9263 caddr_t data, memp;
9264 vnode_t vp = *arg_vp;
9265
9266 size = IOCPARM_LEN(cmd);
9267 if (size > IOCPARM_MAX) return (EINVAL);
9268
9269 is64bit = proc_is64bit(p);
9270
9271 memp = NULL;
9272
9273
9274 /*
9275 * ensure the buffer is large enough for underlying calls
9276 */
9277 #ifndef HFSIOC_GETPATH
9278 typedef char pn_t[MAXPATHLEN];
9279 #define HFSIOC_GETPATH _IOWR('h', 13, pn_t)
9280 #endif
9281
9282 #ifndef HFS_GETPATH
9283 #define HFS_GETPATH IOCBASECMD(HFSIOC_GETPATH)
9284 #endif
9285 if (IOCBASECMD(cmd) == HFS_GETPATH) {
9286 /* Round up to MAXPATHLEN regardless of user input */
9287 size = MAXPATHLEN;
9288 }
9289
9290 if (size > sizeof (stkbuf)) {
9291 if ((memp = (caddr_t)kalloc(size)) == 0) return ENOMEM;
9292 data = memp;
9293 } else {
9294 data = &stkbuf[0];
9295 };
9296
9297 if (cmd & IOC_IN) {
9298 if (size) {
9299 error = copyin(udata, data, size);
9300 if (error) {
9301 if (memp) {
9302 kfree (memp, size);
9303 }
9304 return error;
9305 }
9306 } else {
9307 if (is64bit) {
9308 *(user_addr_t *)data = udata;
9309 }
9310 else {
9311 *(uint32_t *)data = (uint32_t)udata;
9312 }
9313 };
9314 } else if ((cmd & IOC_OUT) && size) {
9315 /*
9316 * Zero the buffer so the user always
9317 * gets back something deterministic.
9318 */
9319 bzero(data, size);
9320 } else if (cmd & IOC_VOID) {
9321 if (is64bit) {
9322 *(user_addr_t *)data = udata;
9323 }
9324 else {
9325 *(uint32_t *)data = (uint32_t)udata;
9326 }
9327 }
9328
9329 /* Check to see if it's a generic command */
9330 switch (IOCBASECMD(cmd)) {
9331
9332 case FSCTL_SYNC_VOLUME: {
9333 mount_t mp = vp->v_mount;
9334 int arg = *(uint32_t*)data;
9335
9336 /* record vid of vp so we can drop it below. */
9337 uint32_t vvid = vp->v_id;
9338
9339 /*
9340 * Then grab mount_iterref so that we can release the vnode.
9341 * Without this, a thread may call vnode_iterate_prepare then
9342 * get into a deadlock because we've never released the root vp
9343 */
9344 error = mount_iterref (mp, 0);
9345 if (error) {
9346 break;
9347 }
9348 vnode_put(vp);
9349
9350 /* issue the sync for this volume */
9351 (void)sync_callback(mp, (arg & FSCTL_SYNC_WAIT) ? &arg : NULL);
9352
9353 /*
9354 * Then release the mount_iterref once we're done syncing; it's not
9355 * needed for the VNOP_IOCTL below
9356 */
9357 mount_iterdrop(mp);
9358
9359 if (arg & FSCTL_SYNC_FULLSYNC) {
9360 /* re-obtain vnode iocount on the root vp, if possible */
9361 error = vnode_getwithvid (vp, vvid);
9362 if (error == 0) {
9363 error = VNOP_IOCTL(vp, F_FULLFSYNC, (caddr_t)NULL, 0, ctx);
9364 vnode_put (vp);
9365 }
9366 }
9367 /* mark the argument VP as having been released */
9368 *arg_vp = NULL;
9369 }
9370 break;
9371
9372 case FSCTL_SET_PACKAGE_EXTS: {
9373 user_addr_t ext_strings;
9374 uint32_t num_entries;
9375 uint32_t max_width;
9376
9377 if ( (is64bit && size != sizeof(user64_package_ext_info))
9378 || (is64bit == 0 && size != sizeof(user32_package_ext_info))) {
9379
9380 // either you're 64-bit and passed a 64-bit struct or
9381 // you're 32-bit and passed a 32-bit struct. otherwise
9382 // it's not ok.
9383 error = EINVAL;
9384 break;
9385 }
9386
9387 if (is64bit) {
9388 ext_strings = ((user64_package_ext_info *)data)->strings;
9389 num_entries = ((user64_package_ext_info *)data)->num_entries;
9390 max_width = ((user64_package_ext_info *)data)->max_width;
9391 } else {
9392 ext_strings = CAST_USER_ADDR_T(((user32_package_ext_info *)data)->strings);
9393 num_entries = ((user32_package_ext_info *)data)->num_entries;
9394 max_width = ((user32_package_ext_info *)data)->max_width;
9395 }
9396 error = set_package_extensions_table(ext_strings, num_entries, max_width);
9397 }
9398 break;
9399
9400 /* namespace handlers */
9401 case FSCTL_NAMESPACE_HANDLER_GET: {
9402 error = process_namespace_fsctl(NSPACE_HANDLER_NSPACE, is64bit, size, data);
9403 }
9404 break;
9405
9406 /* Snapshot handlers */
9407 case FSCTL_OLD_SNAPSHOT_HANDLER_GET: {
9408 error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
9409 }
9410 break;
9411
9412 case FSCTL_SNAPSHOT_HANDLER_GET_EXT: {
9413 error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
9414 }
9415 break;
9416
9417 case FSCTL_NAMESPACE_HANDLER_UPDATE: {
9418 uint32_t token, val;
9419 int i;
9420
9421 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
9422 break;
9423 }
9424
9425 if (!nspace_is_special_process(p)) {
9426 error = EINVAL;
9427 break;
9428 }
9429
9430 token = ((uint32_t *)data)[0];
9431 val = ((uint32_t *)data)[1];
9432
9433 lck_mtx_lock(&nspace_handler_lock);
9434
9435 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9436 if (nspace_items[i].token == token) {
9437 break; /* exit for loop, not case stmt */
9438 }
9439 }
9440
9441 if (i >= MAX_NSPACE_ITEMS) {
9442 error = ENOENT;
9443 } else {
9444 //
9445 // if this bit is set, when resolve_nspace_item() times out
9446 // it will loop and go back to sleep.
9447 //
9448 nspace_items[i].flags |= NSPACE_ITEM_RESET_TIMER;
9449 }
9450
9451 lck_mtx_unlock(&nspace_handler_lock);
9452
9453 if (error) {
9454 printf("nspace-handler-update: did not find token %u\n", token);
9455 }
9456 }
9457 break;
9458
9459 case FSCTL_NAMESPACE_HANDLER_UNBLOCK: {
9460 uint32_t token, val;
9461 int i;
9462
9463 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
9464 break;
9465 }
9466
9467 if (!nspace_is_special_process(p)) {
9468 error = EINVAL;
9469 break;
9470 }
9471
9472 token = ((uint32_t *)data)[0];
9473 val = ((uint32_t *)data)[1];
9474
9475 lck_mtx_lock(&nspace_handler_lock);
9476
9477 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9478 if (nspace_items[i].token == token) {
9479 break; /* exit for loop, not case statement */
9480 }
9481 }
9482
9483 if (i >= MAX_NSPACE_ITEMS) {
9484 printf("nspace-handler-unblock: did not find token %u\n", token);
9485 error = ENOENT;
9486 } else {
9487 if (val == 0 && nspace_items[i].vp) {
9488 vnode_lock_spin(nspace_items[i].vp);
9489 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
9490 vnode_unlock(nspace_items[i].vp);
9491 }
9492
9493 nspace_items[i].vp = NULL;
9494 nspace_items[i].arg = NULL;
9495 nspace_items[i].op = 0;
9496 nspace_items[i].vid = 0;
9497 nspace_items[i].flags = NSPACE_ITEM_DONE;
9498 nspace_items[i].token = 0;
9499
9500 wakeup((caddr_t)&(nspace_items[i].vp));
9501 }
9502
9503 lck_mtx_unlock(&nspace_handler_lock);
9504 }
9505 break;
9506
9507 case FSCTL_NAMESPACE_HANDLER_CANCEL: {
9508 uint32_t token, val;
9509 int i;
9510
9511 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
9512 break;
9513 }
9514
9515 if (!nspace_is_special_process(p)) {
9516 error = EINVAL;
9517 break;
9518 }
9519
9520 token = ((uint32_t *)data)[0];
9521 val = ((uint32_t *)data)[1];
9522
9523 lck_mtx_lock(&nspace_handler_lock);
9524
9525 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9526 if (nspace_items[i].token == token) {
9527 break; /* exit for loop, not case stmt */
9528 }
9529 }
9530
9531 if (i >= MAX_NSPACE_ITEMS) {
9532 printf("nspace-handler-cancel: did not find token %u\n", token);
9533 error = ENOENT;
9534 } else {
9535 if (nspace_items[i].vp) {
9536 vnode_lock_spin(nspace_items[i].vp);
9537 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
9538 vnode_unlock(nspace_items[i].vp);
9539 }
9540
9541 nspace_items[i].vp = NULL;
9542 nspace_items[i].arg = NULL;
9543 nspace_items[i].vid = 0;
9544 nspace_items[i].token = val;
9545 nspace_items[i].flags &= ~NSPACE_ITEM_PROCESSING;
9546 nspace_items[i].flags |= NSPACE_ITEM_CANCELLED;
9547
9548 wakeup((caddr_t)&(nspace_items[i].vp));
9549 }
9550
9551 lck_mtx_unlock(&nspace_handler_lock);
9552 }
9553 break;
9554
9555 case FSCTL_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME: {
9556 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
9557 break;
9558 }
9559
9560 // we explicitly do not do the namespace_handler_proc check here
9561
9562 lck_mtx_lock(&nspace_handler_lock);
9563 snapshot_timestamp = ((uint32_t *)data)[0];
9564 wakeup(&nspace_item_idx);
9565 lck_mtx_unlock(&nspace_handler_lock);
9566 printf("nspace-handler-set-snapshot-time: %d\n", (int)snapshot_timestamp);
9567
9568 }
9569 break;
9570
9571 case FSCTL_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS:
9572 {
9573 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
9574 break;
9575 }
9576
9577 lck_mtx_lock(&nspace_handler_lock);
9578 nspace_allow_virtual_devs = ((uint32_t *)data)[0];
9579 lck_mtx_unlock(&nspace_handler_lock);
9580 printf("nspace-snapshot-handler will%s allow events on disk-images\n",
9581 nspace_allow_virtual_devs ? "" : " NOT");
9582 error = 0;
9583
9584 }
9585 break;
9586
9587 case FSCTL_SET_FSTYPENAME_OVERRIDE:
9588 {
9589 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
9590 break;
9591 }
9592 if (vp->v_mount) {
9593 mount_lock(vp->v_mount);
9594 if (data[0] != 0) {
9595 strlcpy(&vp->v_mount->fstypename_override[0], data, MFSTYPENAMELEN);
9596 vp->v_mount->mnt_kern_flag |= MNTK_TYPENAME_OVERRIDE;
9597 if (vfs_isrdonly(vp->v_mount) && strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
9598 vp->v_mount->mnt_kern_flag |= MNTK_EXTENDED_SECURITY;
9599 vp->v_mount->mnt_kern_flag &= ~MNTK_AUTH_OPAQUE;
9600 }
9601 } else {
9602 if (strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
9603 vp->v_mount->mnt_kern_flag &= ~MNTK_EXTENDED_SECURITY;
9604 }
9605 vp->v_mount->mnt_kern_flag &= ~MNTK_TYPENAME_OVERRIDE;
9606 vp->v_mount->fstypename_override[0] = '\0';
9607 }
9608 mount_unlock(vp->v_mount);
9609 }
9610 }
9611 break;
9612
9613 default: {
9614 /* Invoke the filesystem-specific code */
9615 error = VNOP_IOCTL(vp, IOCBASECMD(cmd), data, options, ctx);
9616 }
9617
9618 } /* end switch stmt */
9619
9620 /*
9621 * if no errors, copy any data to user. Size was
9622 * already set and checked above.
9623 */
9624 if (error == 0 && (cmd & IOC_OUT) && size)
9625 error = copyout(data, udata, size);
9626
9627 if (memp) {
9628 kfree(memp, size);
9629 }
9630
9631 return error;
9632 }
9633
9634 /* ARGSUSED */
9635 int
9636 fsctl (proc_t p, struct fsctl_args *uap, __unused int32_t *retval)
9637 {
9638 int error;
9639 struct nameidata nd;
9640 u_long nameiflags;
9641 vnode_t vp = NULL;
9642 vfs_context_t ctx = vfs_context_current();
9643
9644 AUDIT_ARG(cmd, uap->cmd);
9645 AUDIT_ARG(value32, uap->options);
9646 /* Get the vnode for the file we are getting info on: */
9647 nameiflags = 0;
9648 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
9649 NDINIT(&nd, LOOKUP, OP_FSCTL, nameiflags | AUDITVNPATH1,
9650 UIO_USERSPACE, uap->path, ctx);
9651 if ((error = namei(&nd))) goto done;
9652 vp = nd.ni_vp;
9653 nameidone(&nd);
9654
9655 #if CONFIG_MACF
9656 error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd);
9657 if (error) {
9658 goto done;
9659 }
9660 #endif
9661
9662 error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
9663
9664 done:
9665 if (vp)
9666 vnode_put(vp);
9667 return error;
9668 }
9669 /* ARGSUSED */
9670 int
9671 ffsctl (proc_t p, struct ffsctl_args *uap, __unused int32_t *retval)
9672 {
9673 int error;
9674 vnode_t vp = NULL;
9675 vfs_context_t ctx = vfs_context_current();
9676 int fd = -1;
9677
9678 AUDIT_ARG(fd, uap->fd);
9679 AUDIT_ARG(cmd, uap->cmd);
9680 AUDIT_ARG(value32, uap->options);
9681
9682 /* Get the vnode for the file we are getting info on: */
9683 if ((error = file_vnode(uap->fd, &vp)))
9684 return error;
9685 fd = uap->fd;
9686 if ((error = vnode_getwithref(vp))) {
9687 file_drop(fd);
9688 return error;
9689 }
9690
9691 #if CONFIG_MACF
9692 if ((error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd))) {
9693 file_drop(fd);
9694 vnode_put(vp);
9695 return error;
9696 }
9697 #endif
9698
9699 error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
9700
9701 file_drop(fd);
9702
9703 /*validate vp; fsctl_internal() can drop iocount and reset vp to NULL*/
9704 if (vp) {
9705 vnode_put(vp);
9706 }
9707
9708 return error;
9709 }
9710 /* end of fsctl system call */
9711
9712 /*
9713 * Retrieve the data of an extended attribute.
9714 */
9715 int
9716 getxattr(proc_t p, struct getxattr_args *uap, user_ssize_t *retval)
9717 {
9718 vnode_t vp;
9719 struct nameidata nd;
9720 char attrname[XATTR_MAXNAMELEN+1];
9721 vfs_context_t ctx = vfs_context_current();
9722 uio_t auio = NULL;
9723 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9724 size_t attrsize = 0;
9725 size_t namelen;
9726 u_int32_t nameiflags;
9727 int error;
9728 char uio_buf[ UIO_SIZEOF(1) ];
9729
9730 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
9731 return (EINVAL);
9732
9733 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
9734 NDINIT(&nd, LOOKUP, OP_GETXATTR, nameiflags, spacetype, uap->path, ctx);
9735 if ((error = namei(&nd))) {
9736 return (error);
9737 }
9738 vp = nd.ni_vp;
9739 nameidone(&nd);
9740
9741 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
9742 goto out;
9743 }
9744 if (xattr_protected(attrname)) {
9745 if (!vfs_context_issuser(ctx) || strcmp(attrname, "com.apple.system.Security") != 0) {
9746 error = EPERM;
9747 goto out;
9748 }
9749 }
9750 /*
9751 * the specific check for 0xffffffff is a hack to preserve
9752 * binaray compatibilty in K64 with applications that discovered
9753 * that passing in a buf pointer and a size of -1 resulted in
9754 * just the size of the indicated extended attribute being returned.
9755 * this isn't part of the documented behavior, but because of the
9756 * original implemtation's check for "uap->size > 0", this behavior
9757 * was allowed. In K32 that check turned into a signed comparison
9758 * even though uap->size is unsigned... in K64, we blow by that
9759 * check because uap->size is unsigned and doesn't get sign smeared
9760 * in the munger for a 32 bit user app. we also need to add a
9761 * check to limit the maximum size of the buffer being passed in...
9762 * unfortunately, the underlying fileystems seem to just malloc
9763 * the requested size even if the actual extended attribute is tiny.
9764 * because that malloc is for kernel wired memory, we have to put a
9765 * sane limit on it.
9766 *
9767 * U32 running on K64 will yield 0x00000000ffffffff for uap->size
9768 * U64 running on K64 will yield -1 (64 bits wide)
9769 * U32/U64 running on K32 will yield -1 (32 bits wide)
9770 */
9771 if (uap->size == 0xffffffff || uap->size == (size_t)-1)
9772 goto no_uio;
9773
9774 if (uap->value) {
9775 if (uap->size > (size_t)XATTR_MAXSIZE)
9776 uap->size = XATTR_MAXSIZE;
9777
9778 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
9779 &uio_buf[0], sizeof(uio_buf));
9780 uio_addiov(auio, uap->value, uap->size);
9781 }
9782 no_uio:
9783 error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, ctx);
9784 out:
9785 vnode_put(vp);
9786
9787 if (auio) {
9788 *retval = uap->size - uio_resid(auio);
9789 } else {
9790 *retval = (user_ssize_t)attrsize;
9791 }
9792
9793 return (error);
9794 }
9795
9796 /*
9797 * Retrieve the data of an extended attribute.
9798 */
9799 int
9800 fgetxattr(proc_t p, struct fgetxattr_args *uap, user_ssize_t *retval)
9801 {
9802 vnode_t vp;
9803 char attrname[XATTR_MAXNAMELEN+1];
9804 uio_t auio = NULL;
9805 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9806 size_t attrsize = 0;
9807 size_t namelen;
9808 int error;
9809 char uio_buf[ UIO_SIZEOF(1) ];
9810
9811 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
9812 return (EINVAL);
9813
9814 if ( (error = file_vnode(uap->fd, &vp)) ) {
9815 return (error);
9816 }
9817 if ( (error = vnode_getwithref(vp)) ) {
9818 file_drop(uap->fd);
9819 return(error);
9820 }
9821 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
9822 goto out;
9823 }
9824 if (xattr_protected(attrname)) {
9825 error = EPERM;
9826 goto out;
9827 }
9828 if (uap->value && uap->size > 0) {
9829 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
9830 &uio_buf[0], sizeof(uio_buf));
9831 uio_addiov(auio, uap->value, uap->size);
9832 }
9833
9834 error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, vfs_context_current());
9835 out:
9836 (void)vnode_put(vp);
9837 file_drop(uap->fd);
9838
9839 if (auio) {
9840 *retval = uap->size - uio_resid(auio);
9841 } else {
9842 *retval = (user_ssize_t)attrsize;
9843 }
9844 return (error);
9845 }
9846
9847 /*
9848 * Set the data of an extended attribute.
9849 */
9850 int
9851 setxattr(proc_t p, struct setxattr_args *uap, int *retval)
9852 {
9853 vnode_t vp;
9854 struct nameidata nd;
9855 char attrname[XATTR_MAXNAMELEN+1];
9856 vfs_context_t ctx = vfs_context_current();
9857 uio_t auio = NULL;
9858 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9859 size_t namelen;
9860 u_int32_t nameiflags;
9861 int error;
9862 char uio_buf[ UIO_SIZEOF(1) ];
9863
9864 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
9865 return (EINVAL);
9866
9867 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
9868 if (error == EPERM) {
9869 /* if the string won't fit in attrname, copyinstr emits EPERM */
9870 return (ENAMETOOLONG);
9871 }
9872 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
9873 return error;
9874 }
9875 if (xattr_protected(attrname))
9876 return(EPERM);
9877 if (uap->size != 0 && uap->value == 0) {
9878 return (EINVAL);
9879 }
9880
9881 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
9882 NDINIT(&nd, LOOKUP, OP_SETXATTR, nameiflags, spacetype, uap->path, ctx);
9883 if ((error = namei(&nd))) {
9884 return (error);
9885 }
9886 vp = nd.ni_vp;
9887 nameidone(&nd);
9888
9889 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
9890 &uio_buf[0], sizeof(uio_buf));
9891 uio_addiov(auio, uap->value, uap->size);
9892
9893 error = vn_setxattr(vp, attrname, auio, uap->options, ctx);
9894 #if CONFIG_FSE
9895 if (error == 0) {
9896 add_fsevent(FSE_XATTR_MODIFIED, ctx,
9897 FSE_ARG_VNODE, vp,
9898 FSE_ARG_DONE);
9899 }
9900 #endif
9901 vnode_put(vp);
9902 *retval = 0;
9903 return (error);
9904 }
9905
9906 /*
9907 * Set the data of an extended attribute.
9908 */
9909 int
9910 fsetxattr(proc_t p, struct fsetxattr_args *uap, int *retval)
9911 {
9912 vnode_t vp;
9913 char attrname[XATTR_MAXNAMELEN+1];
9914 uio_t auio = NULL;
9915 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9916 size_t namelen;
9917 int error;
9918 char uio_buf[ UIO_SIZEOF(1) ];
9919 #if CONFIG_FSE
9920 vfs_context_t ctx = vfs_context_current();
9921 #endif
9922
9923 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
9924 return (EINVAL);
9925
9926 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
9927 if (error == EPERM) {
9928 /* if the string won't fit in attrname, copyinstr emits EPERM */
9929 return (ENAMETOOLONG);
9930 }
9931 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
9932 return error;
9933 }
9934 if (xattr_protected(attrname))
9935 return(EPERM);
9936 if (uap->size != 0 && uap->value == 0) {
9937 return (EINVAL);
9938 }
9939 if ( (error = file_vnode(uap->fd, &vp)) ) {
9940 return (error);
9941 }
9942 if ( (error = vnode_getwithref(vp)) ) {
9943 file_drop(uap->fd);
9944 return(error);
9945 }
9946 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
9947 &uio_buf[0], sizeof(uio_buf));
9948 uio_addiov(auio, uap->value, uap->size);
9949
9950 error = vn_setxattr(vp, attrname, auio, uap->options, vfs_context_current());
9951 #if CONFIG_FSE
9952 if (error == 0) {
9953 add_fsevent(FSE_XATTR_MODIFIED, ctx,
9954 FSE_ARG_VNODE, vp,
9955 FSE_ARG_DONE);
9956 }
9957 #endif
9958 vnode_put(vp);
9959 file_drop(uap->fd);
9960 *retval = 0;
9961 return (error);
9962 }
9963
9964 /*
9965 * Remove an extended attribute.
9966 * XXX Code duplication here.
9967 */
9968 int
9969 removexattr(proc_t p, struct removexattr_args *uap, int *retval)
9970 {
9971 vnode_t vp;
9972 struct nameidata nd;
9973 char attrname[XATTR_MAXNAMELEN+1];
9974 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9975 vfs_context_t ctx = vfs_context_current();
9976 size_t namelen;
9977 u_int32_t nameiflags;
9978 int error;
9979
9980 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
9981 return (EINVAL);
9982
9983 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
9984 if (error != 0) {
9985 return (error);
9986 }
9987 if (xattr_protected(attrname))
9988 return(EPERM);
9989 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
9990 NDINIT(&nd, LOOKUP, OP_REMOVEXATTR, nameiflags, spacetype, uap->path, ctx);
9991 if ((error = namei(&nd))) {
9992 return (error);
9993 }
9994 vp = nd.ni_vp;
9995 nameidone(&nd);
9996
9997 error = vn_removexattr(vp, attrname, uap->options, ctx);
9998 #if CONFIG_FSE
9999 if (error == 0) {
10000 add_fsevent(FSE_XATTR_REMOVED, ctx,
10001 FSE_ARG_VNODE, vp,
10002 FSE_ARG_DONE);
10003 }
10004 #endif
10005 vnode_put(vp);
10006 *retval = 0;
10007 return (error);
10008 }
10009
10010 /*
10011 * Remove an extended attribute.
10012 * XXX Code duplication here.
10013 */
10014 int
10015 fremovexattr(__unused proc_t p, struct fremovexattr_args *uap, int *retval)
10016 {
10017 vnode_t vp;
10018 char attrname[XATTR_MAXNAMELEN+1];
10019 size_t namelen;
10020 int error;
10021 #if CONFIG_FSE
10022 vfs_context_t ctx = vfs_context_current();
10023 #endif
10024
10025 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
10026 return (EINVAL);
10027
10028 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
10029 if (error != 0) {
10030 return (error);
10031 }
10032 if (xattr_protected(attrname))
10033 return(EPERM);
10034 if ( (error = file_vnode(uap->fd, &vp)) ) {
10035 return (error);
10036 }
10037 if ( (error = vnode_getwithref(vp)) ) {
10038 file_drop(uap->fd);
10039 return(error);
10040 }
10041
10042 error = vn_removexattr(vp, attrname, uap->options, vfs_context_current());
10043 #if CONFIG_FSE
10044 if (error == 0) {
10045 add_fsevent(FSE_XATTR_REMOVED, ctx,
10046 FSE_ARG_VNODE, vp,
10047 FSE_ARG_DONE);
10048 }
10049 #endif
10050 vnode_put(vp);
10051 file_drop(uap->fd);
10052 *retval = 0;
10053 return (error);
10054 }
10055
10056 /*
10057 * Retrieve the list of extended attribute names.
10058 * XXX Code duplication here.
10059 */
10060 int
10061 listxattr(proc_t p, struct listxattr_args *uap, user_ssize_t *retval)
10062 {
10063 vnode_t vp;
10064 struct nameidata nd;
10065 vfs_context_t ctx = vfs_context_current();
10066 uio_t auio = NULL;
10067 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10068 size_t attrsize = 0;
10069 u_int32_t nameiflags;
10070 int error;
10071 char uio_buf[ UIO_SIZEOF(1) ];
10072
10073 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
10074 return (EINVAL);
10075
10076 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
10077 NDINIT(&nd, LOOKUP, OP_LISTXATTR, nameiflags, spacetype, uap->path, ctx);
10078 if ((error = namei(&nd))) {
10079 return (error);
10080 }
10081 vp = nd.ni_vp;
10082 nameidone(&nd);
10083 if (uap->namebuf != 0 && uap->bufsize > 0) {
10084 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
10085 &uio_buf[0], sizeof(uio_buf));
10086 uio_addiov(auio, uap->namebuf, uap->bufsize);
10087 }
10088
10089 error = vn_listxattr(vp, auio, &attrsize, uap->options, ctx);
10090
10091 vnode_put(vp);
10092 if (auio) {
10093 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
10094 } else {
10095 *retval = (user_ssize_t)attrsize;
10096 }
10097 return (error);
10098 }
10099
10100 /*
10101 * Retrieve the list of extended attribute names.
10102 * XXX Code duplication here.
10103 */
10104 int
10105 flistxattr(proc_t p, struct flistxattr_args *uap, user_ssize_t *retval)
10106 {
10107 vnode_t vp;
10108 uio_t auio = NULL;
10109 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10110 size_t attrsize = 0;
10111 int error;
10112 char uio_buf[ UIO_SIZEOF(1) ];
10113
10114 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
10115 return (EINVAL);
10116
10117 if ( (error = file_vnode(uap->fd, &vp)) ) {
10118 return (error);
10119 }
10120 if ( (error = vnode_getwithref(vp)) ) {
10121 file_drop(uap->fd);
10122 return(error);
10123 }
10124 if (uap->namebuf != 0 && uap->bufsize > 0) {
10125 auio = uio_createwithbuffer(1, 0, spacetype,
10126 UIO_READ, &uio_buf[0], sizeof(uio_buf));
10127 uio_addiov(auio, uap->namebuf, uap->bufsize);
10128 }
10129
10130 error = vn_listxattr(vp, auio, &attrsize, uap->options, vfs_context_current());
10131
10132 vnode_put(vp);
10133 file_drop(uap->fd);
10134 if (auio) {
10135 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
10136 } else {
10137 *retval = (user_ssize_t)attrsize;
10138 }
10139 return (error);
10140 }
10141
10142 static int fsgetpath_internal(
10143 vfs_context_t ctx, int volfs_id, uint64_t objid,
10144 vm_size_t bufsize, caddr_t buf, int *pathlen)
10145 {
10146 int error;
10147 struct mount *mp = NULL;
10148 vnode_t vp;
10149 int length;
10150 int bpflags;
10151
10152 if (bufsize > PAGE_SIZE) {
10153 return (EINVAL);
10154 }
10155
10156 if (buf == NULL) {
10157 return (ENOMEM);
10158 }
10159
10160 if ((mp = mount_lookupby_volfsid(volfs_id, 1)) == NULL) {
10161 error = ENOTSUP; /* unexpected failure */
10162 return ENOTSUP;
10163 }
10164
10165 unionget:
10166 if (objid == 2) {
10167 error = VFS_ROOT(mp, &vp, ctx);
10168 } else {
10169 error = VFS_VGET(mp, (ino64_t)objid, &vp, ctx);
10170 }
10171
10172 if (error == ENOENT && (mp->mnt_flag & MNT_UNION)) {
10173 /*
10174 * If the fileid isn't found and we're in a union
10175 * mount volume, then see if the fileid is in the
10176 * mounted-on volume.
10177 */
10178 struct mount *tmp = mp;
10179 mp = vnode_mount(tmp->mnt_vnodecovered);
10180 vfs_unbusy(tmp);
10181 if (vfs_busy(mp, LK_NOWAIT) == 0)
10182 goto unionget;
10183 } else {
10184 vfs_unbusy(mp);
10185 }
10186
10187 if (error) {
10188 return error;
10189 }
10190
10191 #if CONFIG_MACF
10192 error = mac_vnode_check_fsgetpath(ctx, vp);
10193 if (error) {
10194 vnode_put(vp);
10195 return error;
10196 }
10197 #endif
10198
10199 /* Obtain the absolute path to this vnode. */
10200 bpflags = vfs_context_suser(ctx) ? BUILDPATH_CHECKACCESS : 0;
10201 bpflags |= BUILDPATH_CHECK_MOVED;
10202 error = build_path(vp, buf, bufsize, &length, bpflags, ctx);
10203 vnode_put(vp);
10204
10205 if (error) {
10206 goto out;
10207 }
10208
10209 AUDIT_ARG(text, buf);
10210
10211 if (kdebug_enable) {
10212 long dbg_parms[NUMPARMS];
10213 int dbg_namelen;
10214
10215 dbg_namelen = (int)sizeof(dbg_parms);
10216
10217 if (length < dbg_namelen) {
10218 memcpy((char *)dbg_parms, buf, length);
10219 memset((char *)dbg_parms + length, 0, dbg_namelen - length);
10220
10221 dbg_namelen = length;
10222 } else {
10223 memcpy((char *)dbg_parms, buf + (length - dbg_namelen), dbg_namelen);
10224 }
10225
10226 kdebug_lookup_gen_events(dbg_parms, dbg_namelen, (void *)vp, TRUE);
10227 }
10228
10229 *pathlen = (user_ssize_t)length; /* may be superseded by error */
10230
10231 out:
10232 return (error);
10233 }
10234
10235 /*
10236 * Obtain the full pathname of a file system object by id.
10237 *
10238 * This is a private SPI used by the File Manager.
10239 */
10240 __private_extern__
10241 int
10242 fsgetpath(__unused proc_t p, struct fsgetpath_args *uap, user_ssize_t *retval)
10243 {
10244 vfs_context_t ctx = vfs_context_current();
10245 fsid_t fsid;
10246 char *realpath;
10247 int length;
10248 int error;
10249
10250 if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
10251 return (error);
10252 }
10253 AUDIT_ARG(value32, fsid.val[0]);
10254 AUDIT_ARG(value64, uap->objid);
10255 /* Restrict output buffer size for now. */
10256
10257 if (uap->bufsize > PAGE_SIZE) {
10258 return (EINVAL);
10259 }
10260 MALLOC(realpath, char *, uap->bufsize, M_TEMP, M_WAITOK);
10261 if (realpath == NULL) {
10262 return (ENOMEM);
10263 }
10264
10265 error = fsgetpath_internal(
10266 ctx, fsid.val[0], uap->objid,
10267 uap->bufsize, realpath, &length);
10268
10269 if (error) {
10270 goto out;
10271 }
10272
10273 error = copyout((caddr_t)realpath, uap->buf, length);
10274
10275 *retval = (user_ssize_t)length; /* may be superseded by error */
10276 out:
10277 if (realpath) {
10278 FREE(realpath, M_TEMP);
10279 }
10280 return (error);
10281 }
10282
10283 /*
10284 * Common routine to handle various flavors of statfs data heading out
10285 * to user space.
10286 *
10287 * Returns: 0 Success
10288 * EFAULT
10289 */
10290 static int
10291 munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
10292 user_addr_t bufp, int *sizep, boolean_t is_64_bit,
10293 boolean_t partial_copy)
10294 {
10295 int error;
10296 int my_size, copy_size;
10297
10298 if (is_64_bit) {
10299 struct user64_statfs sfs;
10300 my_size = copy_size = sizeof(sfs);
10301 bzero(&sfs, my_size);
10302 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
10303 sfs.f_type = mp->mnt_vtable->vfc_typenum;
10304 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
10305 sfs.f_bsize = (user64_long_t)sfsp->f_bsize;
10306 sfs.f_iosize = (user64_long_t)sfsp->f_iosize;
10307 sfs.f_blocks = (user64_long_t)sfsp->f_blocks;
10308 sfs.f_bfree = (user64_long_t)sfsp->f_bfree;
10309 sfs.f_bavail = (user64_long_t)sfsp->f_bavail;
10310 sfs.f_files = (user64_long_t)sfsp->f_files;
10311 sfs.f_ffree = (user64_long_t)sfsp->f_ffree;
10312 sfs.f_fsid = sfsp->f_fsid;
10313 sfs.f_owner = sfsp->f_owner;
10314 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
10315 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
10316 } else {
10317 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
10318 }
10319 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
10320 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
10321
10322 if (partial_copy) {
10323 copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
10324 }
10325 error = copyout((caddr_t)&sfs, bufp, copy_size);
10326 }
10327 else {
10328 struct user32_statfs sfs;
10329
10330 my_size = copy_size = sizeof(sfs);
10331 bzero(&sfs, my_size);
10332
10333 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
10334 sfs.f_type = mp->mnt_vtable->vfc_typenum;
10335 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
10336
10337 /*
10338 * It's possible for there to be more than 2^^31 blocks in the filesystem, so we
10339 * have to fudge the numbers here in that case. We inflate the blocksize in order
10340 * to reflect the filesystem size as best we can.
10341 */
10342 if ((sfsp->f_blocks > INT_MAX)
10343 /* Hack for 4061702 . I think the real fix is for Carbon to
10344 * look for some volume capability and not depend on hidden
10345 * semantics agreed between a FS and carbon.
10346 * f_blocks, f_bfree, and f_bavail set to -1 is the trigger
10347 * for Carbon to set bNoVolumeSizes volume attribute.
10348 * Without this the webdavfs files cannot be copied onto
10349 * disk as they look huge. This change should not affect
10350 * XSAN as they should not setting these to -1..
10351 */
10352 && (sfsp->f_blocks != 0xffffffffffffffffULL)
10353 && (sfsp->f_bfree != 0xffffffffffffffffULL)
10354 && (sfsp->f_bavail != 0xffffffffffffffffULL)) {
10355 int shift;
10356
10357 /*
10358 * Work out how far we have to shift the block count down to make it fit.
10359 * Note that it's possible to have to shift so far that the resulting
10360 * blocksize would be unreportably large. At that point, we will clip
10361 * any values that don't fit.
10362 *
10363 * For safety's sake, we also ensure that f_iosize is never reported as
10364 * being smaller than f_bsize.
10365 */
10366 for (shift = 0; shift < 32; shift++) {
10367 if ((sfsp->f_blocks >> shift) <= INT_MAX)
10368 break;
10369 if ((sfsp->f_bsize << (shift + 1)) > INT_MAX)
10370 break;
10371 }
10372 #define __SHIFT_OR_CLIP(x, s) ((((x) >> (s)) > INT_MAX) ? INT_MAX : ((x) >> (s)))
10373 sfs.f_blocks = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_blocks, shift);
10374 sfs.f_bfree = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bfree, shift);
10375 sfs.f_bavail = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bavail, shift);
10376 #undef __SHIFT_OR_CLIP
10377 sfs.f_bsize = (user32_long_t)(sfsp->f_bsize << shift);
10378 sfs.f_iosize = lmax(sfsp->f_iosize, sfsp->f_bsize);
10379 } else {
10380 /* filesystem is small enough to be reported honestly */
10381 sfs.f_bsize = (user32_long_t)sfsp->f_bsize;
10382 sfs.f_iosize = (user32_long_t)sfsp->f_iosize;
10383 sfs.f_blocks = (user32_long_t)sfsp->f_blocks;
10384 sfs.f_bfree = (user32_long_t)sfsp->f_bfree;
10385 sfs.f_bavail = (user32_long_t)sfsp->f_bavail;
10386 }
10387 sfs.f_files = (user32_long_t)sfsp->f_files;
10388 sfs.f_ffree = (user32_long_t)sfsp->f_ffree;
10389 sfs.f_fsid = sfsp->f_fsid;
10390 sfs.f_owner = sfsp->f_owner;
10391 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
10392 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
10393 } else {
10394 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
10395 }
10396 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
10397 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
10398
10399 if (partial_copy) {
10400 copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
10401 }
10402 error = copyout((caddr_t)&sfs, bufp, copy_size);
10403 }
10404
10405 if (sizep != NULL) {
10406 *sizep = my_size;
10407 }
10408 return(error);
10409 }
10410
10411 /*
10412 * copy stat structure into user_stat structure.
10413 */
10414 void munge_user64_stat(struct stat *sbp, struct user64_stat *usbp)
10415 {
10416 bzero(usbp, sizeof(*usbp));
10417
10418 usbp->st_dev = sbp->st_dev;
10419 usbp->st_ino = sbp->st_ino;
10420 usbp->st_mode = sbp->st_mode;
10421 usbp->st_nlink = sbp->st_nlink;
10422 usbp->st_uid = sbp->st_uid;
10423 usbp->st_gid = sbp->st_gid;
10424 usbp->st_rdev = sbp->st_rdev;
10425 #ifndef _POSIX_C_SOURCE
10426 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
10427 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
10428 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
10429 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
10430 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
10431 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
10432 #else
10433 usbp->st_atime = sbp->st_atime;
10434 usbp->st_atimensec = sbp->st_atimensec;
10435 usbp->st_mtime = sbp->st_mtime;
10436 usbp->st_mtimensec = sbp->st_mtimensec;
10437 usbp->st_ctime = sbp->st_ctime;
10438 usbp->st_ctimensec = sbp->st_ctimensec;
10439 #endif
10440 usbp->st_size = sbp->st_size;
10441 usbp->st_blocks = sbp->st_blocks;
10442 usbp->st_blksize = sbp->st_blksize;
10443 usbp->st_flags = sbp->st_flags;
10444 usbp->st_gen = sbp->st_gen;
10445 usbp->st_lspare = sbp->st_lspare;
10446 usbp->st_qspare[0] = sbp->st_qspare[0];
10447 usbp->st_qspare[1] = sbp->st_qspare[1];
10448 }
10449
10450 void munge_user32_stat(struct stat *sbp, struct user32_stat *usbp)
10451 {
10452 bzero(usbp, sizeof(*usbp));
10453
10454 usbp->st_dev = sbp->st_dev;
10455 usbp->st_ino = sbp->st_ino;
10456 usbp->st_mode = sbp->st_mode;
10457 usbp->st_nlink = sbp->st_nlink;
10458 usbp->st_uid = sbp->st_uid;
10459 usbp->st_gid = sbp->st_gid;
10460 usbp->st_rdev = sbp->st_rdev;
10461 #ifndef _POSIX_C_SOURCE
10462 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
10463 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
10464 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
10465 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
10466 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
10467 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
10468 #else
10469 usbp->st_atime = sbp->st_atime;
10470 usbp->st_atimensec = sbp->st_atimensec;
10471 usbp->st_mtime = sbp->st_mtime;
10472 usbp->st_mtimensec = sbp->st_mtimensec;
10473 usbp->st_ctime = sbp->st_ctime;
10474 usbp->st_ctimensec = sbp->st_ctimensec;
10475 #endif
10476 usbp->st_size = sbp->st_size;
10477 usbp->st_blocks = sbp->st_blocks;
10478 usbp->st_blksize = sbp->st_blksize;
10479 usbp->st_flags = sbp->st_flags;
10480 usbp->st_gen = sbp->st_gen;
10481 usbp->st_lspare = sbp->st_lspare;
10482 usbp->st_qspare[0] = sbp->st_qspare[0];
10483 usbp->st_qspare[1] = sbp->st_qspare[1];
10484 }
10485
10486 /*
10487 * copy stat64 structure into user_stat64 structure.
10488 */
10489 void munge_user64_stat64(struct stat64 *sbp, struct user64_stat64 *usbp)
10490 {
10491 bzero(usbp, sizeof(*usbp));
10492
10493 usbp->st_dev = sbp->st_dev;
10494 usbp->st_ino = sbp->st_ino;
10495 usbp->st_mode = sbp->st_mode;
10496 usbp->st_nlink = sbp->st_nlink;
10497 usbp->st_uid = sbp->st_uid;
10498 usbp->st_gid = sbp->st_gid;
10499 usbp->st_rdev = sbp->st_rdev;
10500 #ifndef _POSIX_C_SOURCE
10501 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
10502 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
10503 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
10504 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
10505 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
10506 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
10507 usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
10508 usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
10509 #else
10510 usbp->st_atime = sbp->st_atime;
10511 usbp->st_atimensec = sbp->st_atimensec;
10512 usbp->st_mtime = sbp->st_mtime;
10513 usbp->st_mtimensec = sbp->st_mtimensec;
10514 usbp->st_ctime = sbp->st_ctime;
10515 usbp->st_ctimensec = sbp->st_ctimensec;
10516 usbp->st_birthtime = sbp->st_birthtime;
10517 usbp->st_birthtimensec = sbp->st_birthtimensec;
10518 #endif
10519 usbp->st_size = sbp->st_size;
10520 usbp->st_blocks = sbp->st_blocks;
10521 usbp->st_blksize = sbp->st_blksize;
10522 usbp->st_flags = sbp->st_flags;
10523 usbp->st_gen = sbp->st_gen;
10524 usbp->st_lspare = sbp->st_lspare;
10525 usbp->st_qspare[0] = sbp->st_qspare[0];
10526 usbp->st_qspare[1] = sbp->st_qspare[1];
10527 }
10528
10529 void munge_user32_stat64(struct stat64 *sbp, struct user32_stat64 *usbp)
10530 {
10531 bzero(usbp, sizeof(*usbp));
10532
10533 usbp->st_dev = sbp->st_dev;
10534 usbp->st_ino = sbp->st_ino;
10535 usbp->st_mode = sbp->st_mode;
10536 usbp->st_nlink = sbp->st_nlink;
10537 usbp->st_uid = sbp->st_uid;
10538 usbp->st_gid = sbp->st_gid;
10539 usbp->st_rdev = sbp->st_rdev;
10540 #ifndef _POSIX_C_SOURCE
10541 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
10542 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
10543 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
10544 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
10545 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
10546 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
10547 usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
10548 usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
10549 #else
10550 usbp->st_atime = sbp->st_atime;
10551 usbp->st_atimensec = sbp->st_atimensec;
10552 usbp->st_mtime = sbp->st_mtime;
10553 usbp->st_mtimensec = sbp->st_mtimensec;
10554 usbp->st_ctime = sbp->st_ctime;
10555 usbp->st_ctimensec = sbp->st_ctimensec;
10556 usbp->st_birthtime = sbp->st_birthtime;
10557 usbp->st_birthtimensec = sbp->st_birthtimensec;
10558 #endif
10559 usbp->st_size = sbp->st_size;
10560 usbp->st_blocks = sbp->st_blocks;
10561 usbp->st_blksize = sbp->st_blksize;
10562 usbp->st_flags = sbp->st_flags;
10563 usbp->st_gen = sbp->st_gen;
10564 usbp->st_lspare = sbp->st_lspare;
10565 usbp->st_qspare[0] = sbp->st_qspare[0];
10566 usbp->st_qspare[1] = sbp->st_qspare[1];
10567 }
10568
10569 /*
10570 * Purge buffer cache for simulating cold starts
10571 */
10572 static int vnode_purge_callback(struct vnode *vp, __unused void *cargs)
10573 {
10574 ubc_msync(vp, (off_t)0, ubc_getsize(vp), NULL /* off_t *resid_off */, UBC_PUSHALL | UBC_INVALIDATE);
10575
10576 return VNODE_RETURNED;
10577 }
10578
10579 static int vfs_purge_callback(mount_t mp, __unused void * arg)
10580 {
10581 vnode_iterate(mp, VNODE_WAIT | VNODE_ITERATE_ALL, vnode_purge_callback, NULL);
10582
10583 return VFS_RETURNED;
10584 }
10585
10586 int
10587 vfs_purge(__unused struct proc *p, __unused struct vfs_purge_args *uap, __unused int32_t *retval)
10588 {
10589 if (!kauth_cred_issuser(kauth_cred_get()))
10590 return EPERM;
10591
10592 vfs_iterate(0/* flags */, vfs_purge_callback, NULL);
10593
10594 return 0;
10595 }
10596