]> git.saurik.com Git - apple/xnu.git/blob - bsd/vfs/vfs_syscalls.c
84627d65ecb57986a1158b06487ec35fc5f493ac
[apple/xnu.git] / bsd / vfs / vfs_syscalls.c
1 /*
2 * Copyright (c) 1995-2019 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1989, 1993
30 * The Regents of the University of California. All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)vfs_syscalls.c 8.41 (Berkeley) 6/15/95
66 */
67 /*
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
71 * Version 2.0.
72 */
73
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/namei.h>
77 #include <sys/filedesc.h>
78 #include <sys/kernel.h>
79 #include <sys/file_internal.h>
80 #include <sys/stat.h>
81 #include <sys/vnode_internal.h>
82 #include <sys/mount_internal.h>
83 #include <sys/proc_internal.h>
84 #include <sys/kauth.h>
85 #include <sys/uio_internal.h>
86 #include <sys/malloc.h>
87 #include <sys/mman.h>
88 #include <sys/dirent.h>
89 #include <sys/attr.h>
90 #include <sys/sysctl.h>
91 #include <sys/ubc.h>
92 #include <sys/quota.h>
93 #include <sys/kdebug.h>
94 #include <sys/fsevents.h>
95 #include <sys/imgsrc.h>
96 #include <sys/sysproto.h>
97 #include <sys/sysctl.h>
98 #include <sys/xattr.h>
99 #include <sys/fcntl.h>
100 #include <sys/fsctl.h>
101 #include <sys/ubc_internal.h>
102 #include <sys/disk.h>
103 #include <sys/content_protection.h>
104 #include <sys/clonefile.h>
105 #include <sys/snapshot.h>
106 #include <sys/priv.h>
107 #include <sys/fsgetpath.h>
108 #include <machine/cons.h>
109 #include <machine/limits.h>
110 #include <miscfs/specfs/specdev.h>
111
112 #include <vfs/vfs_disk_conditioner.h>
113
114 #include <security/audit/audit.h>
115 #include <bsm/audit_kevents.h>
116
117 #include <mach/mach_types.h>
118 #include <kern/kern_types.h>
119 #include <kern/kalloc.h>
120 #include <kern/task.h>
121
122 #include <vm/vm_pageout.h>
123 #include <vm/vm_protos.h>
124
125 #include <libkern/OSAtomic.h>
126 #include <pexpert/pexpert.h>
127 #include <IOKit/IOBSD.h>
128
129 // deps for MIG call
130 #include <kern/host.h>
131 #include <kern/ipc_misc.h>
132 #include <mach/host_priv.h>
133 #include <mach/vfs_nspace.h>
134 #include <os/log.h>
135
136 #if ROUTEFS
137 #include <miscfs/routefs/routefs.h>
138 #endif /* ROUTEFS */
139
140 #if CONFIG_MACF
141 #include <security/mac.h>
142 #include <security/mac_framework.h>
143 #endif
144
145 #if CONFIG_FSE
146 #define GET_PATH(x) \
147 (x) = get_pathbuff();
148 #define RELEASE_PATH(x) \
149 release_pathbuff(x);
150 #else
151 #define GET_PATH(x) \
152 MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
153 #define RELEASE_PATH(x) \
154 FREE_ZONE((x), MAXPATHLEN, M_NAMEI);
155 #endif /* CONFIG_FSE */
156
157 #ifndef HFS_GET_BOOT_INFO
158 #define HFS_GET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00004)
159 #endif
160
161 #ifndef HFS_SET_BOOT_INFO
162 #define HFS_SET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00005)
163 #endif
164
165 #ifndef APFSIOC_REVERT_TO_SNAPSHOT
166 #define APFSIOC_REVERT_TO_SNAPSHOT _IOW('J', 1, u_int64_t)
167 #endif
168
169 extern void disk_conditioner_unmount(mount_t mp);
170
171 /* struct for checkdirs iteration */
172 struct cdirargs {
173 vnode_t olddp;
174 vnode_t newdp;
175 };
176 /* callback for checkdirs iteration */
177 static int checkdirs_callback(proc_t p, void * arg);
178
179 static int change_dir(struct nameidata *ndp, vfs_context_t ctx);
180 static int checkdirs(vnode_t olddp, vfs_context_t ctx);
181 void enablequotas(struct mount *mp, vfs_context_t ctx);
182 static int getfsstat_callback(mount_t mp, void * arg);
183 static int getutimes(user_addr_t usrtvp, struct timespec *tsp);
184 static int setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts, int nullflag);
185 static int sync_callback(mount_t, void *);
186 static int munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
187 user_addr_t bufp, int *sizep, boolean_t is_64_bit,
188 boolean_t partial_copy);
189 static int fsync_common(proc_t p, struct fsync_args *uap, int flags);
190 static int mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
191 struct componentname *cnp, user_addr_t fsmountargs,
192 int flags, uint32_t internal_flags, char *labelstr, boolean_t kernelmount,
193 vfs_context_t ctx);
194 void vfs_notify_mount(vnode_t pdvp);
195
196 int prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth);
197
198 struct fd_vn_data * fg_vn_data_alloc(void);
199
200 /*
201 * Max retries for ENOENT returns from vn_authorize_{rmdir, unlink, rename}
202 * Concurrent lookups (or lookups by ids) on hard links can cause the
203 * vn_getpath (which does not re-enter the filesystem as vn_getpath_fsenter
204 * does) to return ENOENT as the path cannot be returned from the name cache
205 * alone. We have no option but to retry and hope to get one namei->reverse path
206 * generation done without an intervening lookup, lookup by id on the hard link
207 * item. This is only an issue for MAC hooks which cannot reenter the filesystem
208 * which currently are the MAC hooks for rename, unlink and rmdir.
209 */
210 #define MAX_AUTHORIZE_ENOENT_RETRIES 1024
211
212 static int rmdirat_internal(vfs_context_t, int, user_addr_t, enum uio_seg,
213 int unlink_flags);
214
215 static int fsgetpath_internal(vfs_context_t, int, uint64_t, vm_size_t, caddr_t, uint32_t options, int *);
216
217 #ifdef CONFIG_IMGSRC_ACCESS
218 static int authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx);
219 static int place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx);
220 static void undo_place_on_covered_vp(mount_t mp, vnode_t vp);
221 static int mount_begin_update(mount_t mp, vfs_context_t ctx, int flags);
222 static void mount_end_update(mount_t mp);
223 static int relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp, const char *fsname, vfs_context_t ctx, boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index);
224 #endif /* CONFIG_IMGSRC_ACCESS */
225
226 #if CONFIG_LOCKERBOOT
227 int mount_locker_protoboot(const char *fsname, const char *mntpoint,
228 const char *pbdevpath);
229 #endif
230
231 //snapshot functions
232 #if CONFIG_MNT_ROOTSNAP
233 static int snapshot_root(int dirfd, user_addr_t name, uint32_t flags, vfs_context_t ctx);
234 #else
235 static int snapshot_root(int dirfd, user_addr_t name, uint32_t flags, vfs_context_t ctx) __attribute__((unused));
236 #endif
237
238 int (*union_dircheckp)(struct vnode **, struct fileproc *, vfs_context_t);
239
240 __private_extern__
241 int sync_internal(void);
242
243 __private_extern__
244 int unlink1(vfs_context_t, vnode_t, user_addr_t, enum uio_seg, int);
245
246 extern lck_grp_t *fd_vn_lck_grp;
247 extern lck_grp_attr_t *fd_vn_lck_grp_attr;
248 extern lck_attr_t *fd_vn_lck_attr;
249
250 /*
251 * incremented each time a mount or unmount operation occurs
252 * used to invalidate the cached value of the rootvp in the
253 * mount structure utilized by cache_lookup_path
254 */
255 uint32_t mount_generation = 0;
256
257 /* counts number of mount and unmount operations */
258 unsigned int vfs_nummntops = 0;
259
260 extern const struct fileops vnops;
261 #if CONFIG_APPLEDOUBLE
262 extern errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *);
263 #endif /* CONFIG_APPLEDOUBLE */
264
265 /*
266 * Virtual File System System Calls
267 */
268
269 #if NFSCLIENT || DEVFS || ROUTEFS
270 /*
271 * Private in-kernel mounting spi (NFS only, not exported)
272 */
273 __private_extern__
274 boolean_t
275 vfs_iskernelmount(mount_t mp)
276 {
277 return (mp->mnt_kern_flag & MNTK_KERNEL_MOUNT) ? TRUE : FALSE;
278 }
279
280 __private_extern__
281 int
282 kernel_mount(char *fstype, vnode_t pvp, vnode_t vp, const char *path,
283 void *data, __unused size_t datalen, int syscall_flags, uint32_t kern_flags, vfs_context_t ctx)
284 {
285 struct nameidata nd;
286 boolean_t did_namei;
287 int error;
288
289 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
290 UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx);
291
292 /*
293 * Get the vnode to be covered if it's not supplied
294 */
295 if (vp == NULLVP) {
296 error = namei(&nd);
297 if (error) {
298 if (kern_flags & (KERNEL_MOUNT_SNAPSHOT | KERNEL_MOUNT_VMVOL | KERNEL_MOUNT_DATAVOL)) {
299 printf("failed to locate mount-on path: %s ", path);
300 }
301 return error;
302 }
303 vp = nd.ni_vp;
304 pvp = nd.ni_dvp;
305 did_namei = TRUE;
306 } else {
307 char *pnbuf = CAST_DOWN(char *, path);
308
309 nd.ni_cnd.cn_pnbuf = pnbuf;
310 nd.ni_cnd.cn_pnlen = strlen(pnbuf) + 1;
311 did_namei = FALSE;
312 }
313
314 error = mount_common(fstype, pvp, vp, &nd.ni_cnd, CAST_USER_ADDR_T(data),
315 syscall_flags, kern_flags, NULL, TRUE, ctx);
316
317 if (did_namei) {
318 vnode_put(vp);
319 vnode_put(pvp);
320 nameidone(&nd);
321 }
322
323 return error;
324 }
325 #endif /* NFSCLIENT || DEVFS */
326
327 /*
328 * Mount a file system.
329 */
330 /* ARGSUSED */
331 int
332 mount(proc_t p, struct mount_args *uap, __unused int32_t *retval)
333 {
334 struct __mac_mount_args muap;
335
336 muap.type = uap->type;
337 muap.path = uap->path;
338 muap.flags = uap->flags;
339 muap.data = uap->data;
340 muap.mac_p = USER_ADDR_NULL;
341 return __mac_mount(p, &muap, retval);
342 }
343
344 int
345 fmount(__unused proc_t p, struct fmount_args *uap, __unused int32_t *retval)
346 {
347 struct componentname cn;
348 vfs_context_t ctx = vfs_context_current();
349 size_t dummy = 0;
350 int error;
351 int flags = uap->flags;
352 char fstypename[MFSNAMELEN];
353 char *labelstr = NULL; /* regular mount call always sets it to NULL for __mac_mount() */
354 vnode_t pvp;
355 vnode_t vp;
356
357 AUDIT_ARG(fd, uap->fd);
358 AUDIT_ARG(fflags, flags);
359 /* fstypename will get audited by mount_common */
360
361 /* Sanity check the flags */
362 if (flags & (MNT_IMGSRC_BY_INDEX | MNT_ROOTFS)) {
363 return ENOTSUP;
364 }
365
366 if (flags & MNT_UNION) {
367 return EPERM;
368 }
369
370 error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy);
371 if (error) {
372 return error;
373 }
374
375 if ((error = file_vnode(uap->fd, &vp)) != 0) {
376 return error;
377 }
378
379 if ((error = vnode_getwithref(vp)) != 0) {
380 file_drop(uap->fd);
381 return error;
382 }
383
384 pvp = vnode_getparent(vp);
385 if (pvp == NULL) {
386 vnode_put(vp);
387 file_drop(uap->fd);
388 return EINVAL;
389 }
390
391 memset(&cn, 0, sizeof(struct componentname));
392 MALLOC(cn.cn_pnbuf, char *, MAXPATHLEN, M_TEMP, M_WAITOK);
393 cn.cn_pnlen = MAXPATHLEN;
394
395 if ((error = vn_getpath(vp, cn.cn_pnbuf, &cn.cn_pnlen)) != 0) {
396 FREE(cn.cn_pnbuf, M_TEMP);
397 vnode_put(pvp);
398 vnode_put(vp);
399 file_drop(uap->fd);
400 return error;
401 }
402
403 error = mount_common(fstypename, pvp, vp, &cn, uap->data, flags, 0, labelstr, FALSE, ctx);
404
405 FREE(cn.cn_pnbuf, M_TEMP);
406 vnode_put(pvp);
407 vnode_put(vp);
408 file_drop(uap->fd);
409
410 return error;
411 }
412
413 void
414 vfs_notify_mount(vnode_t pdvp)
415 {
416 vfs_event_signal(NULL, VQ_MOUNT, (intptr_t)NULL);
417 lock_vnode_and_post(pdvp, NOTE_WRITE);
418 }
419
420 /*
421 * __mac_mount:
422 * Mount a file system taking into account MAC label behavior.
423 * See mount(2) man page for more information
424 *
425 * Parameters: p Process requesting the mount
426 * uap User argument descriptor (see below)
427 * retval (ignored)
428 *
429 * Indirect: uap->type Filesystem type
430 * uap->path Path to mount
431 * uap->data Mount arguments
432 * uap->mac_p MAC info
433 * uap->flags Mount flags
434 *
435 *
436 * Returns: 0 Success
437 * !0 Not success
438 */
439 boolean_t root_fs_upgrade_try = FALSE;
440
441 int
442 __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int32_t *retval)
443 {
444 vnode_t pvp = NULL;
445 vnode_t vp = NULL;
446 int need_nameidone = 0;
447 vfs_context_t ctx = vfs_context_current();
448 char fstypename[MFSNAMELEN];
449 struct nameidata nd;
450 size_t dummy = 0;
451 char *labelstr = NULL;
452 int flags = uap->flags;
453 int error;
454 #if CONFIG_IMGSRC_ACCESS || CONFIG_MACF
455 boolean_t is_64bit = IS_64BIT_PROCESS(p);
456 #else
457 #pragma unused(p)
458 #endif
459 /*
460 * Get the fs type name from user space
461 */
462 error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy);
463 if (error) {
464 return error;
465 }
466
467 /*
468 * Get the vnode to be covered
469 */
470 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
471 UIO_USERSPACE, uap->path, ctx);
472 error = namei(&nd);
473 if (error) {
474 goto out;
475 }
476 need_nameidone = 1;
477 vp = nd.ni_vp;
478 pvp = nd.ni_dvp;
479
480 #ifdef CONFIG_IMGSRC_ACCESS
481 /* Mounting image source cannot be batched with other operations */
482 if (flags == MNT_IMGSRC_BY_INDEX) {
483 error = relocate_imageboot_source(pvp, vp, &nd.ni_cnd, fstypename,
484 ctx, is_64bit, uap->data, (flags == MNT_IMGSRC_BY_INDEX));
485 goto out;
486 }
487 #endif /* CONFIG_IMGSRC_ACCESS */
488
489 #if CONFIG_MACF
490 /*
491 * Get the label string (if any) from user space
492 */
493 if (uap->mac_p != USER_ADDR_NULL) {
494 struct user_mac mac;
495 size_t ulen = 0;
496
497 if (is_64bit) {
498 struct user64_mac mac64;
499 error = copyin(uap->mac_p, &mac64, sizeof(mac64));
500 mac.m_buflen = mac64.m_buflen;
501 mac.m_string = mac64.m_string;
502 } else {
503 struct user32_mac mac32;
504 error = copyin(uap->mac_p, &mac32, sizeof(mac32));
505 mac.m_buflen = mac32.m_buflen;
506 mac.m_string = mac32.m_string;
507 }
508 if (error) {
509 goto out;
510 }
511 if ((mac.m_buflen > MAC_MAX_LABEL_BUF_LEN) ||
512 (mac.m_buflen < 2)) {
513 error = EINVAL;
514 goto out;
515 }
516 MALLOC(labelstr, char *, mac.m_buflen, M_MACTEMP, M_WAITOK);
517 error = copyinstr(mac.m_string, labelstr, mac.m_buflen, &ulen);
518 if (error) {
519 goto out;
520 }
521 AUDIT_ARG(mac_string, labelstr);
522 }
523 #endif /* CONFIG_MACF */
524
525 AUDIT_ARG(fflags, flags);
526
527 #if SECURE_KERNEL
528 if (flags & MNT_UNION) {
529 /* No union mounts on release kernels */
530 error = EPERM;
531 goto out;
532 }
533 #endif
534
535 if ((vp->v_flag & VROOT) &&
536 (vp->v_mount->mnt_flag & MNT_ROOTFS)) {
537 if (!(flags & MNT_UNION)) {
538 flags |= MNT_UPDATE;
539 } else {
540 /*
541 * For a union mount on '/', treat it as fresh
542 * mount instead of update.
543 * Otherwise, union mouting on '/' used to panic the
544 * system before, since mnt_vnodecovered was found to
545 * be NULL for '/' which is required for unionlookup
546 * after it gets ENOENT on union mount.
547 */
548 flags = (flags & ~(MNT_UPDATE));
549 }
550
551 #if SECURE_KERNEL
552 if ((flags & MNT_RDONLY) == 0) {
553 /* Release kernels are not allowed to mount "/" as rw */
554 error = EPERM;
555 goto out;
556 }
557 #endif
558 /*
559 * See 7392553 for more details on why this check exists.
560 * Suffice to say: If this check is ON and something tries
561 * to mount the rootFS RW, we'll turn off the codesign
562 * bitmap optimization.
563 */
564 #if CHECK_CS_VALIDATION_BITMAP
565 if ((flags & MNT_RDONLY) == 0) {
566 root_fs_upgrade_try = TRUE;
567 }
568 #endif
569 }
570
571 error = mount_common(fstypename, pvp, vp, &nd.ni_cnd, uap->data, flags, 0,
572 labelstr, FALSE, ctx);
573
574 out:
575
576 #if CONFIG_MACF
577 if (labelstr) {
578 FREE(labelstr, M_MACTEMP);
579 }
580 #endif /* CONFIG_MACF */
581
582 if (vp) {
583 vnode_put(vp);
584 }
585 if (pvp) {
586 vnode_put(pvp);
587 }
588 if (need_nameidone) {
589 nameidone(&nd);
590 }
591
592 return error;
593 }
594
595 /*
596 * common mount implementation (final stage of mounting)
597 *
598 * Arguments:
599 * fstypename file system type (ie it's vfs name)
600 * pvp parent of covered vnode
601 * vp covered vnode
602 * cnp component name (ie path) of covered vnode
603 * flags generic mount flags
604 * fsmountargs file system specific data
605 * labelstr optional MAC label
606 * kernelmount TRUE for mounts initiated from inside the kernel
607 * ctx caller's context
608 */
609 static int
610 mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
611 struct componentname *cnp, user_addr_t fsmountargs, int flags, uint32_t internal_flags,
612 char *labelstr, boolean_t kernelmount, vfs_context_t ctx)
613 {
614 #if !CONFIG_MACF
615 #pragma unused(labelstr)
616 #endif
617 struct vnode *devvp = NULLVP;
618 struct vnode *device_vnode = NULLVP;
619 #if CONFIG_MACF
620 struct vnode *rvp;
621 #endif
622 struct mount *mp;
623 struct vfstable *vfsp = (struct vfstable *)0;
624 struct proc *p = vfs_context_proc(ctx);
625 int error, flag = 0;
626 user_addr_t devpath = USER_ADDR_NULL;
627 int ronly = 0;
628 int mntalloc = 0;
629 boolean_t vfsp_ref = FALSE;
630 boolean_t is_rwlock_locked = FALSE;
631 boolean_t did_rele = FALSE;
632 boolean_t have_usecount = FALSE;
633
634 #if CONFIG_ROSV_STARTUP || CONFIG_MOUNT_VM
635 /* Check for mutually-exclusive flag bits */
636 uint32_t checkflags = (internal_flags & (KERNEL_MOUNT_DATAVOL | KERNEL_MOUNT_VMVOL));
637 int bitcount = 0;
638 while (checkflags != 0) {
639 checkflags &= (checkflags - 1);
640 bitcount++;
641 }
642
643 if (bitcount > 1) {
644 //not allowed to request multiple mount-by-role flags
645 error = EINVAL;
646 goto out1;
647 }
648 #endif
649
650 /*
651 * Process an update for an existing mount
652 */
653 if (flags & MNT_UPDATE) {
654 if ((vp->v_flag & VROOT) == 0) {
655 error = EINVAL;
656 goto out1;
657 }
658 mp = vp->v_mount;
659
660 /* unmount in progress return error */
661 mount_lock_spin(mp);
662 if (mp->mnt_lflag & MNT_LUNMOUNT) {
663 mount_unlock(mp);
664 error = EBUSY;
665 goto out1;
666 }
667 mount_unlock(mp);
668 lck_rw_lock_exclusive(&mp->mnt_rwlock);
669 is_rwlock_locked = TRUE;
670 /*
671 * We only allow the filesystem to be reloaded if it
672 * is currently mounted read-only.
673 */
674 if ((flags & MNT_RELOAD) &&
675 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
676 error = ENOTSUP;
677 goto out1;
678 }
679
680 /*
681 * If content protection is enabled, update mounts are not
682 * allowed to turn it off.
683 */
684 if ((mp->mnt_flag & MNT_CPROTECT) &&
685 ((flags & MNT_CPROTECT) == 0)) {
686 error = EINVAL;
687 goto out1;
688 }
689
690 /*
691 * can't turn off MNT_REMOVABLE either but it may be an unexpected
692 * failure to return an error for this so we'll just silently
693 * add it if it is not passed in.
694 */
695 if ((mp->mnt_flag & MNT_REMOVABLE) &&
696 ((flags & MNT_REMOVABLE) == 0)) {
697 flags |= MNT_REMOVABLE;
698 }
699
700 #ifdef CONFIG_IMGSRC_ACCESS
701 /* Can't downgrade the backer of the root FS */
702 if ((mp->mnt_kern_flag & MNTK_BACKS_ROOT) &&
703 (!vfs_isrdonly(mp)) && (flags & MNT_RDONLY)) {
704 error = ENOTSUP;
705 goto out1;
706 }
707 #endif /* CONFIG_IMGSRC_ACCESS */
708
709 /*
710 * Only root, or the user that did the original mount is
711 * permitted to update it.
712 */
713 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
714 (error = suser(vfs_context_ucred(ctx), &p->p_acflag))) {
715 goto out1;
716 }
717 #if CONFIG_MACF
718 error = mac_mount_check_remount(ctx, mp);
719 if (error != 0) {
720 goto out1;
721 }
722 #endif
723 /*
724 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
725 * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
726 */
727 if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
728 flags |= MNT_NOSUID | MNT_NODEV;
729 if (mp->mnt_flag & MNT_NOEXEC) {
730 flags |= MNT_NOEXEC;
731 }
732 }
733 flag = mp->mnt_flag;
734
735
736
737 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
738
739 vfsp = mp->mnt_vtable;
740 goto update;
741 } // MNT_UPDATE
742
743 /*
744 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
745 * MNT_NOEXEC if mount point is already MNT_NOEXEC.
746 */
747 if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
748 flags |= MNT_NOSUID | MNT_NODEV;
749 if (vp->v_mount->mnt_flag & MNT_NOEXEC) {
750 flags |= MNT_NOEXEC;
751 }
752 }
753
754 /* XXXAUDIT: Should we capture the type on the error path as well? */
755 AUDIT_ARG(text, fstypename);
756 mount_list_lock();
757 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
758 if (!strncmp(vfsp->vfc_name, fstypename, MFSNAMELEN)) {
759 vfsp->vfc_refcount++;
760 vfsp_ref = TRUE;
761 break;
762 }
763 }
764 mount_list_unlock();
765 if (vfsp == NULL) {
766 error = ENODEV;
767 goto out1;
768 }
769
770 /*
771 * VFC_VFSLOCALARGS is not currently supported for kernel mounts,
772 * except in ROSV configs.
773 */
774 if (kernelmount && (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) &&
775 ((internal_flags & (KERNEL_MOUNT_DATAVOL | KERNEL_MOUNT_VMVOL)) == 0)) {
776 error = EINVAL; /* unsupported request */
777 goto out1;
778 }
779
780 error = prepare_coveredvp(vp, ctx, cnp, fstypename, ((internal_flags & KERNEL_MOUNT_NOAUTH) != 0));
781 if (error != 0) {
782 goto out1;
783 }
784
785 /*
786 * Allocate and initialize the filesystem (mount_t)
787 */
788 MALLOC_ZONE(mp, struct mount *, (u_int32_t)sizeof(struct mount),
789 M_MOUNT, M_WAITOK);
790 bzero((char *)mp, (u_int32_t)sizeof(struct mount));
791 mntalloc = 1;
792
793 /* Initialize the default IO constraints */
794 mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
795 mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
796 mp->mnt_maxsegreadsize = mp->mnt_maxreadcnt;
797 mp->mnt_maxsegwritesize = mp->mnt_maxwritecnt;
798 mp->mnt_devblocksize = DEV_BSIZE;
799 mp->mnt_alignmentmask = PAGE_MASK;
800 mp->mnt_ioqueue_depth = MNT_DEFAULT_IOQUEUE_DEPTH;
801 mp->mnt_ioscale = 1;
802 mp->mnt_ioflags = 0;
803 mp->mnt_realrootvp = NULLVP;
804 mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL;
805
806 TAILQ_INIT(&mp->mnt_vnodelist);
807 TAILQ_INIT(&mp->mnt_workerqueue);
808 TAILQ_INIT(&mp->mnt_newvnodes);
809 mount_lock_init(mp);
810 lck_rw_lock_exclusive(&mp->mnt_rwlock);
811 is_rwlock_locked = TRUE;
812 mp->mnt_op = vfsp->vfc_vfsops;
813 mp->mnt_vtable = vfsp;
814 //mp->mnt_stat.f_type = vfsp->vfc_typenum;
815 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
816 strlcpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN);
817 do {
818 int pathlen = MAXPATHLEN;
819
820 if (vn_getpath_ext(vp, pvp, mp->mnt_vfsstat.f_mntonname, &pathlen, VN_GETPATH_FSENTER)) {
821 strlcpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
822 }
823 } while (0);
824 mp->mnt_vnodecovered = vp;
825 mp->mnt_vfsstat.f_owner = kauth_cred_getuid(vfs_context_ucred(ctx));
826 mp->mnt_throttle_mask = LOWPRI_MAX_NUM_DEV - 1;
827 mp->mnt_devbsdunit = 0;
828
829 /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
830 vfs_setowner(mp, KAUTH_UID_NONE, KAUTH_GID_NONE);
831
832 #if NFSCLIENT || DEVFS || ROUTEFS
833 if (kernelmount) {
834 mp->mnt_kern_flag |= MNTK_KERNEL_MOUNT;
835 }
836 if ((internal_flags & KERNEL_MOUNT_PERMIT_UNMOUNT) != 0) {
837 mp->mnt_kern_flag |= MNTK_PERMIT_UNMOUNT;
838 }
839 #endif /* NFSCLIENT || DEVFS */
840
841 update:
842
843 /*
844 * Set the mount level flags.
845 */
846 if (flags & MNT_RDONLY) {
847 mp->mnt_flag |= MNT_RDONLY;
848 } else if (mp->mnt_flag & MNT_RDONLY) {
849 // disallow read/write upgrades of file systems that
850 // had the TYPENAME_OVERRIDE feature set.
851 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
852 error = EPERM;
853 goto out1;
854 }
855 mp->mnt_kern_flag |= MNTK_WANTRDWR;
856 }
857 mp->mnt_flag &= ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
858 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
859 MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
860 MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME | MNT_STRICTATIME |
861 MNT_QUARANTINE | MNT_CPROTECT);
862
863 #if SECURE_KERNEL
864 #if !CONFIG_MNT_SUID
865 /*
866 * On release builds of iOS based platforms, always enforce NOSUID on
867 * all mounts. We do this here because we can catch update mounts as well as
868 * non-update mounts in this case.
869 */
870 mp->mnt_flag |= (MNT_NOSUID);
871 #endif
872 #endif
873
874 mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
875 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
876 MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
877 MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME | MNT_STRICTATIME |
878 MNT_QUARANTINE | MNT_CPROTECT);
879
880 #if CONFIG_MACF
881 if (flags & MNT_MULTILABEL) {
882 if (vfsp->vfc_vfsflags & VFC_VFSNOMACLABEL) {
883 error = EINVAL;
884 goto out1;
885 }
886 mp->mnt_flag |= MNT_MULTILABEL;
887 }
888 #endif
889 /*
890 * Process device path for local file systems if requested
891 */
892 if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS &&
893 !(internal_flags & (KERNEL_MOUNT_SNAPSHOT | KERNEL_MOUNT_DATAVOL | KERNEL_MOUNT_VMVOL))) {
894 //snapshot, vm, datavolume mounts are special
895 if (vfs_context_is64bit(ctx)) {
896 if ((error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath)))) {
897 goto out1;
898 }
899 fsmountargs += sizeof(devpath);
900 } else {
901 user32_addr_t tmp;
902 if ((error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp)))) {
903 goto out1;
904 }
905 /* munge into LP64 addr */
906 devpath = CAST_USER_ADDR_T(tmp);
907 fsmountargs += sizeof(tmp);
908 }
909
910 /* Lookup device and authorize access to it */
911 if ((devpath)) {
912 struct nameidata nd;
913
914 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW, UIO_USERSPACE, devpath, ctx);
915 if ((error = namei(&nd))) {
916 goto out1;
917 }
918
919 strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
920 devvp = nd.ni_vp;
921
922 nameidone(&nd);
923
924 if (devvp->v_type != VBLK) {
925 error = ENOTBLK;
926 goto out2;
927 }
928 if (major(devvp->v_rdev) >= nblkdev) {
929 error = ENXIO;
930 goto out2;
931 }
932 /*
933 * If mount by non-root, then verify that user has necessary
934 * permissions on the device.
935 */
936 if (suser(vfs_context_ucred(ctx), NULL) != 0) {
937 mode_t accessmode = KAUTH_VNODE_READ_DATA;
938
939 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
940 accessmode |= KAUTH_VNODE_WRITE_DATA;
941 }
942 if ((error = vnode_authorize(devvp, NULL, accessmode, ctx)) != 0) {
943 goto out2;
944 }
945 }
946 }
947 /* On first mount, preflight and open device */
948 if (devpath && ((flags & MNT_UPDATE) == 0)) {
949 if ((error = vnode_ref(devvp))) {
950 goto out2;
951 }
952 /*
953 * Disallow multiple mounts of the same device.
954 * Disallow mounting of a device that is currently in use
955 * (except for root, which might share swap device for miniroot).
956 * Flush out any old buffers remaining from a previous use.
957 */
958 if ((error = vfs_mountedon(devvp))) {
959 goto out3;
960 }
961
962 if (vcount(devvp) > 1 && !(vfs_flags(mp) & MNT_ROOTFS)) {
963 error = EBUSY;
964 goto out3;
965 }
966 if ((error = VNOP_FSYNC(devvp, MNT_WAIT, ctx))) {
967 error = ENOTBLK;
968 goto out3;
969 }
970 if ((error = buf_invalidateblks(devvp, BUF_WRITE_DATA, 0, 0))) {
971 goto out3;
972 }
973
974 ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
975 #if CONFIG_MACF
976 error = mac_vnode_check_open(ctx,
977 devvp,
978 ronly ? FREAD : FREAD | FWRITE);
979 if (error) {
980 goto out3;
981 }
982 #endif /* MAC */
983 if ((error = VNOP_OPEN(devvp, ronly ? FREAD : FREAD | FWRITE, ctx))) {
984 goto out3;
985 }
986
987 mp->mnt_devvp = devvp;
988 device_vnode = devvp;
989 } else if ((mp->mnt_flag & MNT_RDONLY) &&
990 (mp->mnt_kern_flag & MNTK_WANTRDWR) &&
991 (device_vnode = mp->mnt_devvp)) {
992 dev_t dev;
993 int maj;
994 /*
995 * If upgrade to read-write by non-root, then verify
996 * that user has necessary permissions on the device.
997 */
998 vnode_getalways(device_vnode);
999
1000 if (suser(vfs_context_ucred(ctx), NULL) &&
1001 (error = vnode_authorize(device_vnode, NULL,
1002 KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA,
1003 ctx)) != 0) {
1004 vnode_put(device_vnode);
1005 goto out2;
1006 }
1007
1008 /* Tell the device that we're upgrading */
1009 dev = (dev_t)device_vnode->v_rdev;
1010 maj = major(dev);
1011
1012 if ((u_int)maj >= (u_int)nblkdev) {
1013 panic("Volume mounted on a device with invalid major number.");
1014 }
1015
1016 error = bdevsw[maj].d_open(dev, FREAD | FWRITE, S_IFBLK, p);
1017 vnode_put(device_vnode);
1018 device_vnode = NULLVP;
1019 if (error != 0) {
1020 goto out2;
1021 }
1022 }
1023 } // localargs && !(snapshot | data | vm)
1024
1025 #if CONFIG_MACF
1026 if ((flags & MNT_UPDATE) == 0) {
1027 mac_mount_label_init(mp);
1028 mac_mount_label_associate(ctx, mp);
1029 }
1030 if (labelstr) {
1031 if ((flags & MNT_UPDATE) != 0) {
1032 error = mac_mount_check_label_update(ctx, mp);
1033 if (error != 0) {
1034 goto out3;
1035 }
1036 }
1037 }
1038 #endif
1039 /*
1040 * Mount the filesystem. We already asserted that internal_flags
1041 * cannot have more than one mount-by-role bit set.
1042 */
1043 if (internal_flags & KERNEL_MOUNT_SNAPSHOT) {
1044 error = VFS_IOCTL(mp, VFSIOC_MOUNT_SNAPSHOT,
1045 (caddr_t)fsmountargs, 0, ctx);
1046 } else if (internal_flags & KERNEL_MOUNT_DATAVOL) {
1047 #if CONFIG_ROSV_STARTUP
1048 struct mount *origin_mp = (struct mount*)fsmountargs;
1049 fs_role_mount_args_t frma = {origin_mp, VFS_DATA_ROLE};
1050 error = VFS_IOCTL(mp, VFSIOC_MOUNT_BYROLE, (caddr_t)&frma, 0, ctx);
1051 if (error) {
1052 printf("MOUNT-BY-ROLE (%d) failed! (%d)", VFS_DATA_ROLE, error);
1053 } else {
1054 /* Mark volume associated with system volume */
1055 mp->mnt_kern_flag |= MNTK_SYSTEM;
1056
1057 /* Attempt to acquire the mnt_devvp and set it up */
1058 struct vnode *mp_devvp = NULL;
1059 if (mp->mnt_vfsstat.f_mntfromname[0] != 0) {
1060 errno_t lerr = vnode_lookup(mp->mnt_vfsstat.f_mntfromname,
1061 0, &mp_devvp, vfs_context_kernel());
1062 if (!lerr) {
1063 mp->mnt_devvp = mp_devvp;
1064 //vnode_lookup took an iocount, need to drop it.
1065 vnode_put(mp_devvp);
1066 // now set `device_vnode` to the devvp that was acquired.
1067 // this is needed in order to ensure vfs_init_io_attributes is invoked.
1068 // note that though the iocount above was dropped, the mount acquires
1069 // an implicit reference against the device.
1070 device_vnode = mp_devvp;
1071 }
1072 }
1073 }
1074 #else
1075 error = EINVAL;
1076 #endif
1077 } else if (internal_flags & KERNEL_MOUNT_VMVOL) {
1078 #if CONFIG_MOUNT_VM
1079 struct mount *origin_mp = (struct mount*)fsmountargs;
1080 fs_role_mount_args_t frma = {origin_mp, VFS_VM_ROLE};
1081 error = VFS_IOCTL(mp, VFSIOC_MOUNT_BYROLE, (caddr_t)&frma, 0, ctx);
1082 if (error) {
1083 printf("MOUNT-BY-ROLE (%d) failed! (%d)", VFS_VM_ROLE, error);
1084 } else {
1085 /* Mark volume associated with system volume and a swap mount */
1086 mp->mnt_kern_flag |= (MNTK_SYSTEM | MNTK_SWAP_MOUNT);
1087 /* Attempt to acquire the mnt_devvp and set it up */
1088 struct vnode *mp_devvp = NULL;
1089 if (mp->mnt_vfsstat.f_mntfromname[0] != 0) {
1090 errno_t lerr = vnode_lookup(mp->mnt_vfsstat.f_mntfromname,
1091 0, &mp_devvp, vfs_context_kernel());
1092 if (!lerr) {
1093 mp->mnt_devvp = mp_devvp;
1094 //vnode_lookup took an iocount, need to drop it.
1095 vnode_put(mp_devvp);
1096
1097 // now set `device_vnode` to the devvp that was acquired.
1098 // note that though the iocount above was dropped, the mount acquires
1099 // an implicit reference against the device.
1100 device_vnode = mp_devvp;
1101 }
1102 }
1103 }
1104 #else
1105 error = EINVAL;
1106 #endif
1107 } else {
1108 error = VFS_MOUNT(mp, device_vnode, fsmountargs, ctx);
1109 }
1110
1111 if (flags & MNT_UPDATE) {
1112 if (mp->mnt_kern_flag & MNTK_WANTRDWR) {
1113 mp->mnt_flag &= ~MNT_RDONLY;
1114 }
1115 mp->mnt_flag &= ~
1116 (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
1117 mp->mnt_kern_flag &= ~MNTK_WANTRDWR;
1118 if (error) {
1119 mp->mnt_flag = flag; /* restore flag value */
1120 }
1121 vfs_event_signal(NULL, VQ_UPDATE, (intptr_t)NULL);
1122 lck_rw_done(&mp->mnt_rwlock);
1123 is_rwlock_locked = FALSE;
1124 if (!error) {
1125 enablequotas(mp, ctx);
1126 }
1127 goto exit;
1128 }
1129
1130 /*
1131 * Put the new filesystem on the mount list after root.
1132 */
1133 if (error == 0) {
1134 struct vfs_attr vfsattr;
1135 #if CONFIG_MACF
1136 error = mac_mount_check_mount_late(ctx, mp);
1137 if (error != 0) {
1138 goto out3;
1139 }
1140
1141 if (vfs_flags(mp) & MNT_MULTILABEL) {
1142 error = VFS_ROOT(mp, &rvp, ctx);
1143 if (error) {
1144 printf("%s() VFS_ROOT returned %d\n", __func__, error);
1145 goto out3;
1146 }
1147 error = vnode_label(mp, NULL, rvp, NULL, 0, ctx);
1148 /*
1149 * drop reference provided by VFS_ROOT
1150 */
1151 vnode_put(rvp);
1152
1153 if (error) {
1154 goto out3;
1155 }
1156 }
1157 #endif /* MAC */
1158
1159 vnode_lock_spin(vp);
1160 CLR(vp->v_flag, VMOUNT);
1161 vp->v_mountedhere = mp;
1162 vnode_unlock(vp);
1163
1164 /*
1165 * taking the name_cache_lock exclusively will
1166 * insure that everyone is out of the fast path who
1167 * might be trying to use a now stale copy of
1168 * vp->v_mountedhere->mnt_realrootvp
1169 * bumping mount_generation causes the cached values
1170 * to be invalidated
1171 */
1172 name_cache_lock();
1173 mount_generation++;
1174 name_cache_unlock();
1175
1176 error = vnode_ref(vp);
1177 if (error != 0) {
1178 goto out4;
1179 }
1180
1181 have_usecount = TRUE;
1182
1183 error = checkdirs(vp, ctx);
1184 if (error != 0) {
1185 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1186 goto out4;
1187 }
1188 /*
1189 * there is no cleanup code here so I have made it void
1190 * we need to revisit this
1191 */
1192 (void)VFS_START(mp, 0, ctx);
1193
1194 if (mount_list_add(mp) != 0) {
1195 /*
1196 * The system is shutting down trying to umount
1197 * everything, so fail with a plausible errno.
1198 */
1199 error = EBUSY;
1200 goto out4;
1201 }
1202 lck_rw_done(&mp->mnt_rwlock);
1203 is_rwlock_locked = FALSE;
1204
1205 /* Check if this mounted file system supports EAs or named streams. */
1206 /* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */
1207 VFSATTR_INIT(&vfsattr);
1208 VFSATTR_WANTED(&vfsattr, f_capabilities);
1209 if (strncmp(mp->mnt_vfsstat.f_fstypename, "webdav", sizeof("webdav")) != 0 &&
1210 vfs_getattr(mp, &vfsattr, ctx) == 0 &&
1211 VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) {
1212 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR) &&
1213 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR)) {
1214 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
1215 }
1216 #if NAMEDSTREAMS
1217 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS) &&
1218 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS)) {
1219 mp->mnt_kern_flag |= MNTK_NAMED_STREAMS;
1220 }
1221 #endif
1222 /* Check if this file system supports path from id lookups. */
1223 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID) &&
1224 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID)) {
1225 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
1226 } else if (mp->mnt_flag & MNT_DOVOLFS) {
1227 /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */
1228 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
1229 }
1230
1231 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_DIR_HARDLINKS) &&
1232 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_DIR_HARDLINKS)) {
1233 mp->mnt_kern_flag |= MNTK_DIR_HARDLINKS;
1234 }
1235 }
1236 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSNATIVEXATTR) {
1237 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
1238 }
1239 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSPREFLIGHT) {
1240 mp->mnt_kern_flag |= MNTK_UNMOUNT_PREFLIGHT;
1241 }
1242 /* increment the operations count */
1243 OSAddAtomic(1, &vfs_nummntops);
1244 enablequotas(mp, ctx);
1245
1246 if (device_vnode) {
1247 device_vnode->v_specflags |= SI_MOUNTEDON;
1248
1249 /*
1250 * cache the IO attributes for the underlying physical media...
1251 * an error return indicates the underlying driver doesn't
1252 * support all the queries necessary... however, reasonable
1253 * defaults will have been set, so no reason to bail or care
1254 */
1255 vfs_init_io_attributes(device_vnode, mp);
1256 }
1257
1258 /* Now that mount is setup, notify the listeners */
1259 vfs_notify_mount(pvp);
1260 IOBSDMountChange(mp, kIOMountChangeMount);
1261 } else {
1262 /* If we fail a fresh mount, there should be no vnodes left hooked into the mountpoint. */
1263 if (mp->mnt_vnodelist.tqh_first != NULL) {
1264 panic("mount_common(): mount of %s filesystem failed with %d, but vnode list is not empty.",
1265 mp->mnt_vtable->vfc_name, error);
1266 }
1267
1268 vnode_lock_spin(vp);
1269 CLR(vp->v_flag, VMOUNT);
1270 vnode_unlock(vp);
1271 mount_list_lock();
1272 mp->mnt_vtable->vfc_refcount--;
1273 mount_list_unlock();
1274
1275 if (device_vnode) {
1276 vnode_rele(device_vnode);
1277 VNOP_CLOSE(device_vnode, ronly ? FREAD : FREAD | FWRITE, ctx);
1278 }
1279 lck_rw_done(&mp->mnt_rwlock);
1280 is_rwlock_locked = FALSE;
1281
1282 /*
1283 * if we get here, we have a mount structure that needs to be freed,
1284 * but since the coveredvp hasn't yet been updated to point at it,
1285 * no need to worry about other threads holding a crossref on this mp
1286 * so it's ok to just free it
1287 */
1288 mount_lock_destroy(mp);
1289 #if CONFIG_MACF
1290 mac_mount_label_destroy(mp);
1291 #endif
1292 FREE_ZONE(mp, sizeof(struct mount), M_MOUNT);
1293 }
1294 exit:
1295 /*
1296 * drop I/O count on the device vp if there was one
1297 */
1298 if (devpath && devvp) {
1299 vnode_put(devvp);
1300 }
1301
1302 return error;
1303
1304 /* Error condition exits */
1305 out4:
1306 (void)VFS_UNMOUNT(mp, MNT_FORCE, ctx);
1307
1308 /*
1309 * If the mount has been placed on the covered vp,
1310 * it may have been discovered by now, so we have
1311 * to treat this just like an unmount
1312 */
1313 mount_lock_spin(mp);
1314 mp->mnt_lflag |= MNT_LDEAD;
1315 mount_unlock(mp);
1316
1317 if (device_vnode != NULLVP) {
1318 vnode_rele(device_vnode);
1319 VNOP_CLOSE(device_vnode, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD | FWRITE,
1320 ctx);
1321 did_rele = TRUE;
1322 }
1323
1324 vnode_lock_spin(vp);
1325
1326 mp->mnt_crossref++;
1327 vp->v_mountedhere = (mount_t) 0;
1328
1329 vnode_unlock(vp);
1330
1331 if (have_usecount) {
1332 vnode_rele(vp);
1333 }
1334 out3:
1335 if (devpath && ((flags & MNT_UPDATE) == 0) && (!did_rele)) {
1336 vnode_rele(devvp);
1337 }
1338 out2:
1339 if (devpath && devvp) {
1340 vnode_put(devvp);
1341 }
1342 out1:
1343 /* Release mnt_rwlock only when it was taken */
1344 if (is_rwlock_locked == TRUE) {
1345 lck_rw_done(&mp->mnt_rwlock);
1346 }
1347
1348 if (mntalloc) {
1349 if (mp->mnt_crossref) {
1350 mount_dropcrossref(mp, vp, 0);
1351 } else {
1352 mount_lock_destroy(mp);
1353 #if CONFIG_MACF
1354 mac_mount_label_destroy(mp);
1355 #endif
1356 FREE_ZONE(mp, sizeof(struct mount), M_MOUNT);
1357 }
1358 }
1359 if (vfsp_ref) {
1360 mount_list_lock();
1361 vfsp->vfc_refcount--;
1362 mount_list_unlock();
1363 }
1364
1365 return error;
1366 }
1367
1368 /*
1369 * Flush in-core data, check for competing mount attempts,
1370 * and set VMOUNT
1371 */
1372 int
1373 prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth)
1374 {
1375 #if !CONFIG_MACF
1376 #pragma unused(cnp,fsname)
1377 #endif
1378 struct vnode_attr va;
1379 int error;
1380
1381 if (!skip_auth) {
1382 /*
1383 * If the user is not root, ensure that they own the directory
1384 * onto which we are attempting to mount.
1385 */
1386 VATTR_INIT(&va);
1387 VATTR_WANTED(&va, va_uid);
1388 if ((error = vnode_getattr(vp, &va, ctx)) ||
1389 (va.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
1390 (!vfs_context_issuser(ctx)))) {
1391 error = EPERM;
1392 goto out;
1393 }
1394 }
1395
1396 if ((error = VNOP_FSYNC(vp, MNT_WAIT, ctx))) {
1397 goto out;
1398 }
1399
1400 if ((error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0))) {
1401 goto out;
1402 }
1403
1404 if (vp->v_type != VDIR) {
1405 error = ENOTDIR;
1406 goto out;
1407 }
1408
1409 if (ISSET(vp->v_flag, VMOUNT) && (vp->v_mountedhere != NULL)) {
1410 error = EBUSY;
1411 goto out;
1412 }
1413
1414 #if CONFIG_MACF
1415 error = mac_mount_check_mount(ctx, vp,
1416 cnp, fsname);
1417 if (error != 0) {
1418 goto out;
1419 }
1420 #endif
1421
1422 vnode_lock_spin(vp);
1423 SET(vp->v_flag, VMOUNT);
1424 vnode_unlock(vp);
1425
1426 out:
1427 return error;
1428 }
1429
1430 #if CONFIG_IMGSRC_ACCESS
1431
1432 #define DEBUG_IMGSRC 0
1433
1434 #if DEBUG_IMGSRC
1435 #define IMGSRC_DEBUG(args...) printf("imgsrc: " args)
1436 #else
1437 #define IMGSRC_DEBUG(args...) do { } while(0)
1438 #endif
1439
1440 static int
1441 authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx)
1442 {
1443 struct nameidata nd;
1444 vnode_t vp, realdevvp;
1445 mode_t accessmode;
1446 int error;
1447 enum uio_seg uio = UIO_USERSPACE;
1448
1449 if (ctx == vfs_context_kernel()) {
1450 uio = UIO_SYSSPACE;
1451 }
1452
1453 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW, uio, devpath, ctx);
1454 if ((error = namei(&nd))) {
1455 IMGSRC_DEBUG("namei() failed with %d\n", error);
1456 return error;
1457 }
1458
1459 vp = nd.ni_vp;
1460
1461 if (!vnode_isblk(vp)) {
1462 IMGSRC_DEBUG("Not block device.\n");
1463 error = ENOTBLK;
1464 goto out;
1465 }
1466
1467 realdevvp = mp->mnt_devvp;
1468 if (realdevvp == NULLVP) {
1469 IMGSRC_DEBUG("No device backs the mount.\n");
1470 error = ENXIO;
1471 goto out;
1472 }
1473
1474 error = vnode_getwithref(realdevvp);
1475 if (error != 0) {
1476 IMGSRC_DEBUG("Coudn't get iocount on device.\n");
1477 goto out;
1478 }
1479
1480 if (vnode_specrdev(vp) != vnode_specrdev(realdevvp)) {
1481 IMGSRC_DEBUG("Wrong dev_t.\n");
1482 error = ENXIO;
1483 goto out1;
1484 }
1485
1486 strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
1487
1488 /*
1489 * If mount by non-root, then verify that user has necessary
1490 * permissions on the device.
1491 */
1492 if (!vfs_context_issuser(ctx)) {
1493 accessmode = KAUTH_VNODE_READ_DATA;
1494 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
1495 accessmode |= KAUTH_VNODE_WRITE_DATA;
1496 }
1497 if ((error = vnode_authorize(vp, NULL, accessmode, ctx)) != 0) {
1498 IMGSRC_DEBUG("Access denied.\n");
1499 goto out1;
1500 }
1501 }
1502
1503 *devvpp = vp;
1504
1505 out1:
1506 vnode_put(realdevvp);
1507
1508 out:
1509 nameidone(&nd);
1510
1511 if (error) {
1512 vnode_put(vp);
1513 }
1514
1515 return error;
1516 }
1517
1518 /*
1519 * Clear VMOUNT, set v_mountedhere, and mnt_vnodecovered, ref the vnode,
1520 * and call checkdirs()
1521 */
1522 static int
1523 place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx)
1524 {
1525 int error;
1526
1527 mp->mnt_vnodecovered = vp; /* XXX This is normally only set at init-time ... */
1528
1529 IMGSRC_DEBUG("placing: fsname = %s, vp = %s\n",
1530 mp->mnt_vtable->vfc_name, vnode_getname(vp));
1531
1532 vnode_lock_spin(vp);
1533 CLR(vp->v_flag, VMOUNT);
1534 vp->v_mountedhere = mp;
1535 vnode_unlock(vp);
1536
1537 /*
1538 * taking the name_cache_lock exclusively will
1539 * insure that everyone is out of the fast path who
1540 * might be trying to use a now stale copy of
1541 * vp->v_mountedhere->mnt_realrootvp
1542 * bumping mount_generation causes the cached values
1543 * to be invalidated
1544 */
1545 name_cache_lock();
1546 mount_generation++;
1547 name_cache_unlock();
1548
1549 error = vnode_ref(vp);
1550 if (error != 0) {
1551 goto out;
1552 }
1553
1554 error = checkdirs(vp, ctx);
1555 if (error != 0) {
1556 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1557 vnode_rele(vp);
1558 goto out;
1559 }
1560
1561 out:
1562 if (error != 0) {
1563 mp->mnt_vnodecovered = NULLVP;
1564 }
1565 return error;
1566 }
1567
1568 static void
1569 undo_place_on_covered_vp(mount_t mp, vnode_t vp)
1570 {
1571 vnode_rele(vp);
1572 vnode_lock_spin(vp);
1573 vp->v_mountedhere = (mount_t)NULL;
1574 vnode_unlock(vp);
1575
1576 mp->mnt_vnodecovered = NULLVP;
1577 }
1578
1579 static int
1580 mount_begin_update(mount_t mp, vfs_context_t ctx, int flags)
1581 {
1582 int error;
1583
1584 /* unmount in progress return error */
1585 mount_lock_spin(mp);
1586 if (mp->mnt_lflag & MNT_LUNMOUNT) {
1587 mount_unlock(mp);
1588 return EBUSY;
1589 }
1590 mount_unlock(mp);
1591 lck_rw_lock_exclusive(&mp->mnt_rwlock);
1592
1593 /*
1594 * We only allow the filesystem to be reloaded if it
1595 * is currently mounted read-only.
1596 */
1597 if ((flags & MNT_RELOAD) &&
1598 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
1599 error = ENOTSUP;
1600 goto out;
1601 }
1602
1603 /*
1604 * Only root, or the user that did the original mount is
1605 * permitted to update it.
1606 */
1607 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
1608 (!vfs_context_issuser(ctx))) {
1609 error = EPERM;
1610 goto out;
1611 }
1612 #if CONFIG_MACF
1613 error = mac_mount_check_remount(ctx, mp);
1614 if (error != 0) {
1615 goto out;
1616 }
1617 #endif
1618
1619 out:
1620 if (error) {
1621 lck_rw_done(&mp->mnt_rwlock);
1622 }
1623
1624 return error;
1625 }
1626
1627 static void
1628 mount_end_update(mount_t mp)
1629 {
1630 lck_rw_done(&mp->mnt_rwlock);
1631 }
1632
1633 static int
1634 get_imgsrc_rootvnode(uint32_t height, vnode_t *rvpp)
1635 {
1636 vnode_t vp;
1637
1638 if (height >= MAX_IMAGEBOOT_NESTING) {
1639 return EINVAL;
1640 }
1641
1642 vp = imgsrc_rootvnodes[height];
1643 if ((vp != NULLVP) && (vnode_get(vp) == 0)) {
1644 *rvpp = vp;
1645 return 0;
1646 } else {
1647 return ENOENT;
1648 }
1649 }
1650
1651 static int
1652 relocate_imageboot_source(vnode_t pvp, vnode_t vp,
1653 struct componentname *cnp, const char *fsname, vfs_context_t ctx,
1654 boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index)
1655 {
1656 int error;
1657 mount_t mp;
1658 boolean_t placed = FALSE;
1659 struct vfstable *vfsp;
1660 user_addr_t devpath;
1661 char *old_mntonname;
1662 vnode_t rvp;
1663 vnode_t devvp;
1664 uint32_t height;
1665 uint32_t flags;
1666
1667 /* If we didn't imageboot, nothing to move */
1668 if (imgsrc_rootvnodes[0] == NULLVP) {
1669 return EINVAL;
1670 }
1671
1672 /* Only root can do this */
1673 if (!vfs_context_issuser(ctx)) {
1674 return EPERM;
1675 }
1676
1677 IMGSRC_DEBUG("looking for root vnode.\n");
1678
1679 /*
1680 * Get root vnode of filesystem we're moving.
1681 */
1682 if (by_index) {
1683 if (is64bit) {
1684 struct user64_mnt_imgsrc_args mia64;
1685 error = copyin(fsmountargs, &mia64, sizeof(mia64));
1686 if (error != 0) {
1687 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1688 return error;
1689 }
1690
1691 height = mia64.mi_height;
1692 flags = mia64.mi_flags;
1693 devpath = mia64.mi_devpath;
1694 } else {
1695 struct user32_mnt_imgsrc_args mia32;
1696 error = copyin(fsmountargs, &mia32, sizeof(mia32));
1697 if (error != 0) {
1698 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1699 return error;
1700 }
1701
1702 height = mia32.mi_height;
1703 flags = mia32.mi_flags;
1704 devpath = mia32.mi_devpath;
1705 }
1706 } else {
1707 /*
1708 * For binary compatibility--assumes one level of nesting.
1709 */
1710 if (is64bit) {
1711 if ((error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath)))) {
1712 return error;
1713 }
1714 } else {
1715 user32_addr_t tmp;
1716 if ((error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp)))) {
1717 return error;
1718 }
1719
1720 /* munge into LP64 addr */
1721 devpath = CAST_USER_ADDR_T(tmp);
1722 }
1723
1724 height = 0;
1725 flags = 0;
1726 }
1727
1728 if (flags != 0) {
1729 IMGSRC_DEBUG("%s: Got nonzero flags.\n", __FUNCTION__);
1730 return EINVAL;
1731 }
1732
1733 error = get_imgsrc_rootvnode(height, &rvp);
1734 if (error != 0) {
1735 IMGSRC_DEBUG("getting old root vnode failed with %d\n", error);
1736 return error;
1737 }
1738
1739 IMGSRC_DEBUG("got old root vnode\n");
1740
1741 MALLOC(old_mntonname, char*, MAXPATHLEN, M_TEMP, M_WAITOK);
1742
1743 /* Can only move once */
1744 mp = vnode_mount(rvp);
1745 if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
1746 IMGSRC_DEBUG("Already moved.\n");
1747 error = EBUSY;
1748 goto out0;
1749 }
1750
1751 IMGSRC_DEBUG("moving rvp: fsname = %s\n", mp->mnt_vtable->vfc_name);
1752 IMGSRC_DEBUG("Starting updated.\n");
1753
1754 /* Get exclusive rwlock on mount, authorize update on mp */
1755 error = mount_begin_update(mp, ctx, 0);
1756 if (error != 0) {
1757 IMGSRC_DEBUG("Starting updated failed with %d\n", error);
1758 goto out0;
1759 }
1760
1761 /*
1762 * It can only be moved once. Flag is set under the rwlock,
1763 * so we're now safe to proceed.
1764 */
1765 if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
1766 IMGSRC_DEBUG("Already moved [2]\n");
1767 goto out1;
1768 }
1769
1770 IMGSRC_DEBUG("Preparing coveredvp.\n");
1771
1772 /* Mark covered vnode as mount in progress, authorize placing mount on top */
1773 error = prepare_coveredvp(vp, ctx, cnp, fsname, FALSE);
1774 if (error != 0) {
1775 IMGSRC_DEBUG("Preparing coveredvp failed with %d.\n", error);
1776 goto out1;
1777 }
1778
1779 IMGSRC_DEBUG("Covered vp OK.\n");
1780
1781 /* Sanity check the name caller has provided */
1782 vfsp = mp->mnt_vtable;
1783 if (strncmp(vfsp->vfc_name, fsname, MFSNAMELEN) != 0) {
1784 IMGSRC_DEBUG("Wrong fs name: actual = %s, expected = %s\n",
1785 vfsp->vfc_name, fsname);
1786 error = EINVAL;
1787 goto out2;
1788 }
1789
1790 /* Check the device vnode and update mount-from name, for local filesystems */
1791 if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
1792 IMGSRC_DEBUG("Local, doing device validation.\n");
1793
1794 if (devpath != USER_ADDR_NULL) {
1795 error = authorize_devpath_and_update_mntfromname(mp, devpath, &devvp, ctx);
1796 if (error) {
1797 IMGSRC_DEBUG("authorize_devpath_and_update_mntfromname() failed.\n");
1798 goto out2;
1799 }
1800
1801 vnode_put(devvp);
1802 }
1803 }
1804
1805 /*
1806 * Place mp on top of vnode, ref the vnode, call checkdirs(),
1807 * and increment the name cache's mount generation
1808 */
1809
1810 IMGSRC_DEBUG("About to call place_mount_and_checkdirs().\n");
1811 error = place_mount_and_checkdirs(mp, vp, ctx);
1812 if (error != 0) {
1813 goto out2;
1814 }
1815
1816 placed = TRUE;
1817
1818 strlcpy(old_mntonname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN);
1819 strlcpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
1820
1821 /* Forbid future moves */
1822 mount_lock(mp);
1823 mp->mnt_kern_flag |= MNTK_HAS_MOVED;
1824 mount_unlock(mp);
1825
1826 /* Finally, add to mount list, completely ready to go */
1827 if (mount_list_add(mp) != 0) {
1828 /*
1829 * The system is shutting down trying to umount
1830 * everything, so fail with a plausible errno.
1831 */
1832 error = EBUSY;
1833 goto out3;
1834 }
1835
1836 mount_end_update(mp);
1837 vnode_put(rvp);
1838 FREE(old_mntonname, M_TEMP);
1839
1840 vfs_notify_mount(pvp);
1841
1842 return 0;
1843 out3:
1844 strlcpy(mp->mnt_vfsstat.f_mntonname, old_mntonname, MAXPATHLEN);
1845
1846 mount_lock(mp);
1847 mp->mnt_kern_flag &= ~(MNTK_HAS_MOVED);
1848 mount_unlock(mp);
1849
1850 out2:
1851 /*
1852 * Placing the mp on the vnode clears VMOUNT,
1853 * so cleanup is different after that point
1854 */
1855 if (placed) {
1856 /* Rele the vp, clear VMOUNT and v_mountedhere */
1857 undo_place_on_covered_vp(mp, vp);
1858 } else {
1859 vnode_lock_spin(vp);
1860 CLR(vp->v_flag, VMOUNT);
1861 vnode_unlock(vp);
1862 }
1863 out1:
1864 mount_end_update(mp);
1865
1866 out0:
1867 vnode_put(rvp);
1868 FREE(old_mntonname, M_TEMP);
1869 return error;
1870 }
1871
1872 #if CONFIG_LOCKERBOOT
1873 __private_extern__
1874 int
1875 mount_locker_protoboot(const char *fsname, const char *mntpoint,
1876 const char *pbdevpath)
1877 {
1878 int error = -1;
1879 struct nameidata nd;
1880 boolean_t cleanup_nd = FALSE;
1881 vfs_context_t ctx = vfs_context_kernel();
1882 boolean_t is64 = TRUE;
1883 boolean_t by_index = TRUE;
1884 struct user64_mnt_imgsrc_args mia64 = {
1885 .mi_height = 0,
1886 .mi_flags = 0,
1887 .mi_devpath = CAST_USER_ADDR_T(pbdevpath),
1888 };
1889 user_addr_t mia64addr = CAST_USER_ADDR_T(&mia64);
1890
1891 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
1892 UIO_SYSSPACE, CAST_USER_ADDR_T(mntpoint), ctx);
1893 error = namei(&nd);
1894 if (error) {
1895 IMGSRC_DEBUG("namei: %d\n", error);
1896 goto out;
1897 }
1898
1899 cleanup_nd = TRUE;
1900 error = relocate_imageboot_source(nd.ni_dvp, nd.ni_vp,
1901 &nd.ni_cnd, fsname, ctx, is64, mia64addr, by_index);
1902
1903 out:
1904 if (cleanup_nd) {
1905 int stashed = error;
1906
1907 error = vnode_put(nd.ni_vp);
1908 if (error) {
1909 panic("vnode_put() returned non-zero: %d", error);
1910 }
1911
1912 if (nd.ni_dvp) {
1913 error = vnode_put(nd.ni_dvp);
1914 if (error) {
1915 panic("vnode_put() returned non-zero: %d", error);
1916 }
1917 }
1918 nameidone(&nd);
1919
1920 error = stashed;
1921 }
1922 return error;
1923 }
1924 #endif /* CONFIG_LOCKERBOOT */
1925 #endif /* CONFIG_IMGSRC_ACCESS */
1926
1927 void
1928 enablequotas(struct mount *mp, vfs_context_t ctx)
1929 {
1930 struct nameidata qnd;
1931 int type;
1932 char qfpath[MAXPATHLEN];
1933 const char *qfname = QUOTAFILENAME;
1934 const char *qfopsname = QUOTAOPSNAME;
1935 const char *qfextension[] = INITQFNAMES;
1936
1937 /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
1938 if (strncmp(mp->mnt_vfsstat.f_fstypename, "hfs", sizeof("hfs")) != 0) {
1939 return;
1940 }
1941 /*
1942 * Enable filesystem disk quotas if necessary.
1943 * We ignore errors as this should not interfere with final mount
1944 */
1945 for (type = 0; type < MAXQUOTAS; type++) {
1946 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfopsname, qfextension[type]);
1947 NDINIT(&qnd, LOOKUP, OP_MOUNT, FOLLOW, UIO_SYSSPACE,
1948 CAST_USER_ADDR_T(qfpath), ctx);
1949 if (namei(&qnd) != 0) {
1950 continue; /* option file to trigger quotas is not present */
1951 }
1952 vnode_put(qnd.ni_vp);
1953 nameidone(&qnd);
1954 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfname, qfextension[type]);
1955
1956 (void) VFS_QUOTACTL(mp, QCMD(Q_QUOTAON, type), 0, qfpath, ctx);
1957 }
1958 return;
1959 }
1960
1961
1962 static int
1963 checkdirs_callback(proc_t p, void * arg)
1964 {
1965 struct cdirargs * cdrp = (struct cdirargs *)arg;
1966 vnode_t olddp = cdrp->olddp;
1967 vnode_t newdp = cdrp->newdp;
1968 struct filedesc *fdp;
1969 vnode_t new_cvp = newdp;
1970 vnode_t new_rvp = newdp;
1971 vnode_t old_cvp = NULL;
1972 vnode_t old_rvp = NULL;
1973
1974 /*
1975 * XXX Also needs to iterate each thread in the process to see if it
1976 * XXX is using a per-thread current working directory, and, if so,
1977 * XXX update that as well.
1978 */
1979
1980 /*
1981 * First, with the proc_fdlock held, check to see if we will need
1982 * to do any work. If not, we will get out fast.
1983 */
1984 proc_fdlock(p);
1985 fdp = p->p_fd;
1986 if (fdp == NULL ||
1987 (fdp->fd_cdir != olddp && fdp->fd_rdir != olddp)) {
1988 proc_fdunlock(p);
1989 return PROC_RETURNED;
1990 }
1991 proc_fdunlock(p);
1992
1993 /*
1994 * Ok, we will have to do some work. Always take two refs
1995 * because we might need that many. We'll dispose of whatever
1996 * we ended up not using.
1997 */
1998 if (vnode_ref(newdp) != 0) {
1999 return PROC_RETURNED;
2000 }
2001 if (vnode_ref(newdp) != 0) {
2002 vnode_rele(newdp);
2003 return PROC_RETURNED;
2004 }
2005
2006 /*
2007 * Now do the work. Note: we dropped the proc_fdlock, so we
2008 * have to do all of the checks again.
2009 */
2010 proc_fdlock(p);
2011 fdp = p->p_fd;
2012 if (fdp != NULL) {
2013 if (fdp->fd_cdir == olddp) {
2014 old_cvp = olddp;
2015 fdp->fd_cdir = newdp;
2016 new_cvp = NULL;
2017 }
2018 if (fdp->fd_rdir == olddp) {
2019 old_rvp = olddp;
2020 fdp->fd_rdir = newdp;
2021 new_rvp = NULL;
2022 }
2023 }
2024 proc_fdunlock(p);
2025
2026 /*
2027 * Dispose of any references that are no longer needed.
2028 */
2029 if (old_cvp != NULL) {
2030 vnode_rele(old_cvp);
2031 }
2032 if (old_rvp != NULL) {
2033 vnode_rele(old_rvp);
2034 }
2035 if (new_cvp != NULL) {
2036 vnode_rele(new_cvp);
2037 }
2038 if (new_rvp != NULL) {
2039 vnode_rele(new_rvp);
2040 }
2041
2042 return PROC_RETURNED;
2043 }
2044
2045
2046
2047 /*
2048 * Scan all active processes to see if any of them have a current
2049 * or root directory onto which the new filesystem has just been
2050 * mounted. If so, replace them with the new mount point.
2051 */
2052 static int
2053 checkdirs(vnode_t olddp, vfs_context_t ctx)
2054 {
2055 vnode_t newdp;
2056 vnode_t tvp;
2057 int err;
2058 struct cdirargs cdr;
2059
2060 if (olddp->v_usecount == 1) {
2061 return 0;
2062 }
2063 err = VFS_ROOT(olddp->v_mountedhere, &newdp, ctx);
2064
2065 if (err != 0) {
2066 #if DIAGNOSTIC
2067 panic("mount: lost mount: error %d", err);
2068 #endif
2069 return err;
2070 }
2071
2072 cdr.olddp = olddp;
2073 cdr.newdp = newdp;
2074 /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
2075 proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS, checkdirs_callback, (void *)&cdr, NULL, NULL);
2076
2077 if (rootvnode == olddp) {
2078 vnode_ref(newdp);
2079 tvp = rootvnode;
2080 rootvnode = newdp;
2081 vnode_rele(tvp);
2082 }
2083
2084 vnode_put(newdp);
2085 return 0;
2086 }
2087
2088 /*
2089 * Unmount a file system.
2090 *
2091 * Note: unmount takes a path to the vnode mounted on as argument,
2092 * not special file (as before).
2093 */
2094 /* ARGSUSED */
2095 int
2096 unmount(__unused proc_t p, struct unmount_args *uap, __unused int32_t *retval)
2097 {
2098 vnode_t vp;
2099 struct mount *mp;
2100 int error;
2101 struct nameidata nd;
2102 vfs_context_t ctx = vfs_context_current();
2103
2104 NDINIT(&nd, LOOKUP, OP_UNMOUNT, FOLLOW | AUDITVNPATH1,
2105 UIO_USERSPACE, uap->path, ctx);
2106 error = namei(&nd);
2107 if (error) {
2108 return error;
2109 }
2110 vp = nd.ni_vp;
2111 mp = vp->v_mount;
2112 nameidone(&nd);
2113
2114 #if CONFIG_MACF
2115 error = mac_mount_check_umount(ctx, mp);
2116 if (error != 0) {
2117 vnode_put(vp);
2118 return error;
2119 }
2120 #endif
2121 /*
2122 * Must be the root of the filesystem
2123 */
2124 if ((vp->v_flag & VROOT) == 0) {
2125 vnode_put(vp);
2126 return EINVAL;
2127 }
2128 mount_ref(mp, 0);
2129 vnode_put(vp);
2130 /* safedounmount consumes the mount ref */
2131 return safedounmount(mp, uap->flags, ctx);
2132 }
2133
2134 int
2135 vfs_unmountbyfsid(fsid_t *fsid, int flags, vfs_context_t ctx)
2136 {
2137 mount_t mp;
2138
2139 mp = mount_list_lookupby_fsid(fsid, 0, 1);
2140 if (mp == (mount_t)0) {
2141 return ENOENT;
2142 }
2143 mount_ref(mp, 0);
2144 mount_iterdrop(mp);
2145 /* safedounmount consumes the mount ref */
2146 return safedounmount(mp, flags, ctx);
2147 }
2148
2149
2150 /*
2151 * The mount struct comes with a mount ref which will be consumed.
2152 * Do the actual file system unmount, prevent some common foot shooting.
2153 */
2154 int
2155 safedounmount(struct mount *mp, int flags, vfs_context_t ctx)
2156 {
2157 int error;
2158 proc_t p = vfs_context_proc(ctx);
2159
2160 /*
2161 * If the file system is not responding and MNT_NOBLOCK
2162 * is set and not a forced unmount then return EBUSY.
2163 */
2164 if ((mp->mnt_kern_flag & MNT_LNOTRESP) &&
2165 (flags & MNT_NOBLOCK) && ((flags & MNT_FORCE) == 0)) {
2166 error = EBUSY;
2167 goto out;
2168 }
2169
2170 /*
2171 * Skip authorization if the mount is tagged as permissive and
2172 * this is not a forced-unmount attempt.
2173 */
2174 if (!(((mp->mnt_kern_flag & MNTK_PERMIT_UNMOUNT) != 0) && ((flags & MNT_FORCE) == 0))) {
2175 /*
2176 * Only root, or the user that did the original mount is
2177 * permitted to unmount this filesystem.
2178 */
2179 if ((mp->mnt_vfsstat.f_owner != kauth_cred_getuid(kauth_cred_get())) &&
2180 (error = suser(kauth_cred_get(), &p->p_acflag))) {
2181 goto out;
2182 }
2183 }
2184 /*
2185 * Don't allow unmounting the root file system (or the associated VM or DATA mounts) .
2186 */
2187 if ((mp->mnt_flag & MNT_ROOTFS) || (mp->mnt_kern_flag & MNTK_SYSTEM)) {
2188 error = EBUSY; /* the root (or associated volumes) is always busy */
2189 goto out;
2190 }
2191
2192 #ifdef CONFIG_IMGSRC_ACCESS
2193 if (mp->mnt_kern_flag & MNTK_BACKS_ROOT) {
2194 error = EBUSY;
2195 goto out;
2196 }
2197 #endif /* CONFIG_IMGSRC_ACCESS */
2198
2199 return dounmount(mp, flags, 1, ctx);
2200
2201 out:
2202 mount_drop(mp, 0);
2203 return error;
2204 }
2205
2206 /*
2207 * Do the actual file system unmount.
2208 */
2209 int
2210 dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx)
2211 {
2212 vnode_t coveredvp = (vnode_t)0;
2213 int error;
2214 int needwakeup = 0;
2215 int forcedunmount = 0;
2216 int lflags = 0;
2217 struct vnode *devvp = NULLVP;
2218 #if CONFIG_TRIGGERS
2219 proc_t p = vfs_context_proc(ctx);
2220 int did_vflush = 0;
2221 int pflags_save = 0;
2222 #endif /* CONFIG_TRIGGERS */
2223
2224 #if CONFIG_FSE
2225 if (!(flags & MNT_FORCE)) {
2226 fsevent_unmount(mp, ctx); /* has to come first! */
2227 }
2228 #endif
2229
2230 mount_lock(mp);
2231
2232 /*
2233 * If already an unmount in progress just return EBUSY.
2234 * Even a forced unmount cannot override.
2235 */
2236 if (mp->mnt_lflag & MNT_LUNMOUNT) {
2237 if (withref != 0) {
2238 mount_drop(mp, 1);
2239 }
2240 mount_unlock(mp);
2241 return EBUSY;
2242 }
2243
2244 if (flags & MNT_FORCE) {
2245 forcedunmount = 1;
2246 mp->mnt_lflag |= MNT_LFORCE;
2247 }
2248
2249 #if CONFIG_TRIGGERS
2250 if (flags & MNT_NOBLOCK && p != kernproc) {
2251 pflags_save = OSBitOrAtomic(P_NOREMOTEHANG, &p->p_flag);
2252 }
2253 #endif
2254
2255 mp->mnt_kern_flag |= MNTK_UNMOUNT;
2256 mp->mnt_lflag |= MNT_LUNMOUNT;
2257 mp->mnt_flag &= ~MNT_ASYNC;
2258 /*
2259 * anyone currently in the fast path that
2260 * trips over the cached rootvp will be
2261 * dumped out and forced into the slow path
2262 * to regenerate a new cached value
2263 */
2264 mp->mnt_realrootvp = NULLVP;
2265 mount_unlock(mp);
2266
2267 if (forcedunmount && (flags & MNT_LNOSUB) == 0) {
2268 /*
2269 * Force unmount any mounts in this filesystem.
2270 * If any unmounts fail - just leave them dangling.
2271 * Avoids recursion.
2272 */
2273 (void) dounmount_submounts(mp, flags | MNT_LNOSUB, ctx);
2274 }
2275
2276 /*
2277 * taking the name_cache_lock exclusively will
2278 * insure that everyone is out of the fast path who
2279 * might be trying to use a now stale copy of
2280 * vp->v_mountedhere->mnt_realrootvp
2281 * bumping mount_generation causes the cached values
2282 * to be invalidated
2283 */
2284 name_cache_lock();
2285 mount_generation++;
2286 name_cache_unlock();
2287
2288
2289 lck_rw_lock_exclusive(&mp->mnt_rwlock);
2290 if (withref != 0) {
2291 mount_drop(mp, 0);
2292 }
2293 error = 0;
2294 if (forcedunmount == 0) {
2295 ubc_umount(mp); /* release cached vnodes */
2296 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
2297 error = VFS_SYNC(mp, MNT_WAIT, ctx);
2298 if (error) {
2299 mount_lock(mp);
2300 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
2301 mp->mnt_lflag &= ~MNT_LUNMOUNT;
2302 mp->mnt_lflag &= ~MNT_LFORCE;
2303 goto out;
2304 }
2305 }
2306 }
2307
2308 IOBSDMountChange(mp, kIOMountChangeUnmount);
2309
2310 #if CONFIG_TRIGGERS
2311 vfs_nested_trigger_unmounts(mp, flags, ctx);
2312 did_vflush = 1;
2313 #endif
2314 if (forcedunmount) {
2315 lflags |= FORCECLOSE;
2316 }
2317 error = vflush(mp, NULLVP, SKIPSWAP | SKIPSYSTEM | SKIPROOT | lflags);
2318 if ((forcedunmount == 0) && error) {
2319 mount_lock(mp);
2320 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
2321 mp->mnt_lflag &= ~MNT_LUNMOUNT;
2322 mp->mnt_lflag &= ~MNT_LFORCE;
2323 goto out;
2324 }
2325
2326 /* make sure there are no one in the mount iterations or lookup */
2327 mount_iterdrain(mp);
2328
2329 error = VFS_UNMOUNT(mp, flags, ctx);
2330 if (error) {
2331 mount_iterreset(mp);
2332 mount_lock(mp);
2333 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
2334 mp->mnt_lflag &= ~MNT_LUNMOUNT;
2335 mp->mnt_lflag &= ~MNT_LFORCE;
2336 goto out;
2337 }
2338
2339 /* increment the operations count */
2340 if (!error) {
2341 OSAddAtomic(1, &vfs_nummntops);
2342 }
2343
2344 if (mp->mnt_devvp && mp->mnt_vtable->vfc_vfsflags & VFC_VFSLOCALARGS) {
2345 /* hold an io reference and drop the usecount before close */
2346 devvp = mp->mnt_devvp;
2347 vnode_getalways(devvp);
2348 vnode_rele(devvp);
2349 VNOP_CLOSE(devvp, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD | FWRITE,
2350 ctx);
2351 vnode_clearmountedon(devvp);
2352 vnode_put(devvp);
2353 }
2354 lck_rw_done(&mp->mnt_rwlock);
2355 mount_list_remove(mp);
2356 lck_rw_lock_exclusive(&mp->mnt_rwlock);
2357
2358 /* mark the mount point hook in the vp but not drop the ref yet */
2359 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
2360 /*
2361 * The covered vnode needs special handling. Trying to get an
2362 * iocount must not block here as this may lead to deadlocks
2363 * if the Filesystem to which the covered vnode belongs is
2364 * undergoing forced unmounts. Since we hold a usecount, the
2365 * vnode cannot be reused (it can, however, still be terminated)
2366 */
2367 vnode_getalways(coveredvp);
2368 vnode_lock_spin(coveredvp);
2369
2370 mp->mnt_crossref++;
2371 coveredvp->v_mountedhere = (struct mount *)0;
2372 CLR(coveredvp->v_flag, VMOUNT);
2373
2374 vnode_unlock(coveredvp);
2375 vnode_put(coveredvp);
2376 }
2377
2378 mount_list_lock();
2379 mp->mnt_vtable->vfc_refcount--;
2380 mount_list_unlock();
2381
2382 cache_purgevfs(mp); /* remove cache entries for this file sys */
2383 vfs_event_signal(NULL, VQ_UNMOUNT, (intptr_t)NULL);
2384 mount_lock(mp);
2385 mp->mnt_lflag |= MNT_LDEAD;
2386
2387 if (mp->mnt_lflag & MNT_LWAIT) {
2388 /*
2389 * do the wakeup here
2390 * in case we block in mount_refdrain
2391 * which will drop the mount lock
2392 * and allow anyone blocked in vfs_busy
2393 * to wakeup and see the LDEAD state
2394 */
2395 mp->mnt_lflag &= ~MNT_LWAIT;
2396 wakeup((caddr_t)mp);
2397 }
2398 mount_refdrain(mp);
2399
2400 /* free disk_conditioner_info structure for this mount */
2401 disk_conditioner_unmount(mp);
2402
2403 out:
2404 if (mp->mnt_lflag & MNT_LWAIT) {
2405 mp->mnt_lflag &= ~MNT_LWAIT;
2406 needwakeup = 1;
2407 }
2408
2409 #if CONFIG_TRIGGERS
2410 if (flags & MNT_NOBLOCK && p != kernproc) {
2411 // Restore P_NOREMOTEHANG bit to its previous value
2412 if ((pflags_save & P_NOREMOTEHANG) == 0) {
2413 OSBitAndAtomic(~((uint32_t) P_NOREMOTEHANG), &p->p_flag);
2414 }
2415 }
2416
2417 /*
2418 * Callback and context are set together under the mount lock, and
2419 * never cleared, so we're safe to examine them here, drop the lock,
2420 * and call out.
2421 */
2422 if (mp->mnt_triggercallback != NULL) {
2423 mount_unlock(mp);
2424 if (error == 0) {
2425 mp->mnt_triggercallback(mp, VTC_RELEASE, mp->mnt_triggerdata, ctx);
2426 } else if (did_vflush) {
2427 mp->mnt_triggercallback(mp, VTC_REPLACE, mp->mnt_triggerdata, ctx);
2428 }
2429 } else {
2430 mount_unlock(mp);
2431 }
2432 #else
2433 mount_unlock(mp);
2434 #endif /* CONFIG_TRIGGERS */
2435
2436 lck_rw_done(&mp->mnt_rwlock);
2437
2438 if (needwakeup) {
2439 wakeup((caddr_t)mp);
2440 }
2441
2442 if (!error) {
2443 if ((coveredvp != NULLVP)) {
2444 vnode_t pvp = NULLVP;
2445
2446 /*
2447 * The covered vnode needs special handling. Trying to
2448 * get an iocount must not block here as this may lead
2449 * to deadlocks if the Filesystem to which the covered
2450 * vnode belongs is undergoing forced unmounts. Since we
2451 * hold a usecount, the vnode cannot be reused
2452 * (it can, however, still be terminated).
2453 */
2454 vnode_getalways(coveredvp);
2455
2456 mount_dropcrossref(mp, coveredvp, 0);
2457 /*
2458 * We'll _try_ to detect if this really needs to be
2459 * done. The coveredvp can only be in termination (or
2460 * terminated) if the coveredvp's mount point is in a
2461 * forced unmount (or has been) since we still hold the
2462 * ref.
2463 */
2464 if (!vnode_isrecycled(coveredvp)) {
2465 pvp = vnode_getparent(coveredvp);
2466 #if CONFIG_TRIGGERS
2467 if (coveredvp->v_resolve) {
2468 vnode_trigger_rearm(coveredvp, ctx);
2469 }
2470 #endif
2471 }
2472
2473 vnode_rele(coveredvp);
2474 vnode_put(coveredvp);
2475 coveredvp = NULLVP;
2476
2477 if (pvp) {
2478 lock_vnode_and_post(pvp, NOTE_WRITE);
2479 vnode_put(pvp);
2480 }
2481 } else if (mp->mnt_flag & MNT_ROOTFS) {
2482 mount_lock_destroy(mp);
2483 #if CONFIG_MACF
2484 mac_mount_label_destroy(mp);
2485 #endif
2486 FREE_ZONE(mp, sizeof(struct mount), M_MOUNT);
2487 } else {
2488 panic("dounmount: no coveredvp");
2489 }
2490 }
2491 return error;
2492 }
2493
2494 /*
2495 * Unmount any mounts in this filesystem.
2496 */
2497 void
2498 dounmount_submounts(struct mount *mp, int flags, vfs_context_t ctx)
2499 {
2500 mount_t smp;
2501 fsid_t *fsids, fsid;
2502 int fsids_sz;
2503 int count = 0, i, m = 0;
2504 vnode_t vp;
2505
2506 mount_list_lock();
2507
2508 // Get an array to hold the submounts fsids.
2509 TAILQ_FOREACH(smp, &mountlist, mnt_list)
2510 count++;
2511 fsids_sz = count * sizeof(fsid_t);
2512 MALLOC(fsids, fsid_t *, fsids_sz, M_TEMP, M_NOWAIT);
2513 if (fsids == NULL) {
2514 mount_list_unlock();
2515 goto out;
2516 }
2517 fsids[0] = mp->mnt_vfsstat.f_fsid; // Prime the pump
2518
2519 /*
2520 * Fill the array with submount fsids.
2521 * Since mounts are always added to the tail of the mount list, the
2522 * list is always in mount order.
2523 * For each mount check if the mounted-on vnode belongs to a
2524 * mount that's already added to our array of mounts to be unmounted.
2525 */
2526 for (smp = TAILQ_NEXT(mp, mnt_list); smp; smp = TAILQ_NEXT(smp, mnt_list)) {
2527 vp = smp->mnt_vnodecovered;
2528 if (vp == NULL) {
2529 continue;
2530 }
2531 fsid = vnode_mount(vp)->mnt_vfsstat.f_fsid; // Underlying fsid
2532 for (i = 0; i <= m; i++) {
2533 if (fsids[i].val[0] == fsid.val[0] &&
2534 fsids[i].val[1] == fsid.val[1]) {
2535 fsids[++m] = smp->mnt_vfsstat.f_fsid;
2536 break;
2537 }
2538 }
2539 }
2540 mount_list_unlock();
2541
2542 // Unmount the submounts in reverse order. Ignore errors.
2543 for (i = m; i > 0; i--) {
2544 smp = mount_list_lookupby_fsid(&fsids[i], 0, 1);
2545 if (smp) {
2546 mount_ref(smp, 0);
2547 mount_iterdrop(smp);
2548 (void) dounmount(smp, flags, 1, ctx);
2549 }
2550 }
2551 out:
2552 if (fsids) {
2553 FREE(fsids, M_TEMP);
2554 }
2555 }
2556
2557 void
2558 mount_dropcrossref(mount_t mp, vnode_t dp, int need_put)
2559 {
2560 vnode_lock(dp);
2561 mp->mnt_crossref--;
2562
2563 if (mp->mnt_crossref < 0) {
2564 panic("mount cross refs -ve");
2565 }
2566
2567 if ((mp != dp->v_mountedhere) && (mp->mnt_crossref == 0)) {
2568 if (need_put) {
2569 vnode_put_locked(dp);
2570 }
2571 vnode_unlock(dp);
2572
2573 mount_lock_destroy(mp);
2574 #if CONFIG_MACF
2575 mac_mount_label_destroy(mp);
2576 #endif
2577 FREE_ZONE(mp, sizeof(struct mount), M_MOUNT);
2578 return;
2579 }
2580 if (need_put) {
2581 vnode_put_locked(dp);
2582 }
2583 vnode_unlock(dp);
2584 }
2585
2586
2587 /*
2588 * Sync each mounted filesystem.
2589 */
2590 #if DIAGNOSTIC
2591 int syncprt = 0;
2592 #endif
2593
2594 int print_vmpage_stat = 0;
2595
2596 /*
2597 * sync_callback: simple wrapper that calls VFS_SYNC() on volumes
2598 * mounted read-write with the passed waitfor value.
2599 *
2600 * Parameters: mp mount-point descriptor per mounted file-system instance.
2601 * arg user argument (please see below)
2602 *
2603 * User argument is a pointer to 32 bit unsigned integer which describes the
2604 * type of waitfor value to set for calling VFS_SYNC(). If user argument is
2605 * passed as NULL, VFS_SYNC() is called with MNT_NOWAIT set as the default
2606 * waitfor value.
2607 *
2608 * Returns: VFS_RETURNED
2609 */
2610 static int
2611 sync_callback(mount_t mp, void *arg)
2612 {
2613 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
2614 int asyncflag = mp->mnt_flag & MNT_ASYNC;
2615 unsigned waitfor = MNT_NOWAIT;
2616
2617 if (arg) {
2618 waitfor = *(uint32_t*)arg;
2619 }
2620
2621 /* Sanity check for flags - these are the only valid combinations for the flag bits*/
2622 if (waitfor != MNT_WAIT &&
2623 waitfor != (MNT_WAIT | MNT_VOLUME) &&
2624 waitfor != MNT_NOWAIT &&
2625 waitfor != (MNT_NOWAIT | MNT_VOLUME) &&
2626 waitfor != MNT_DWAIT &&
2627 waitfor != (MNT_DWAIT | MNT_VOLUME)) {
2628 panic("Passed inappropriate waitfor %u to "
2629 "sync_callback()", waitfor);
2630 }
2631
2632 mp->mnt_flag &= ~MNT_ASYNC;
2633 (void)VFS_SYNC(mp, waitfor, vfs_context_kernel());
2634 if (asyncflag) {
2635 mp->mnt_flag |= MNT_ASYNC;
2636 }
2637 }
2638
2639 return VFS_RETURNED;
2640 }
2641
2642 /* ARGSUSED */
2643 int
2644 sync(__unused proc_t p, __unused struct sync_args *uap, __unused int32_t *retval)
2645 {
2646 vfs_iterate(LK_NOWAIT, sync_callback, NULL);
2647
2648 if (print_vmpage_stat) {
2649 vm_countdirtypages();
2650 }
2651
2652 #if DIAGNOSTIC
2653 if (syncprt) {
2654 vfs_bufstats();
2655 }
2656 #endif /* DIAGNOSTIC */
2657 return 0;
2658 }
2659
2660 typedef enum {
2661 SYNC_ALL = 0,
2662 SYNC_ONLY_RELIABLE_MEDIA = 1,
2663 SYNC_ONLY_UNRELIABLE_MEDIA = 2
2664 } sync_type_t;
2665
2666 static int
2667 sync_internal_callback(mount_t mp, void *arg)
2668 {
2669 if (arg) {
2670 int is_reliable = !(mp->mnt_kern_flag & MNTK_VIRTUALDEV) &&
2671 (mp->mnt_flag & MNT_LOCAL);
2672 sync_type_t sync_type = *((sync_type_t *)arg);
2673
2674 if ((sync_type == SYNC_ONLY_RELIABLE_MEDIA) && !is_reliable) {
2675 return VFS_RETURNED;
2676 } else if ((sync_type == SYNC_ONLY_UNRELIABLE_MEDIA) && is_reliable) {
2677 return VFS_RETURNED;
2678 }
2679 }
2680
2681 (void)sync_callback(mp, NULL);
2682
2683 return VFS_RETURNED;
2684 }
2685
2686 int sync_thread_state = 0;
2687 int sync_timeout_seconds = 5;
2688
2689 #define SYNC_THREAD_RUN 0x0001
2690 #define SYNC_THREAD_RUNNING 0x0002
2691
2692 static void
2693 sync_thread(__unused void *arg, __unused wait_result_t wr)
2694 {
2695 sync_type_t sync_type;
2696
2697 lck_mtx_lock(sync_mtx_lck);
2698 while (sync_thread_state & SYNC_THREAD_RUN) {
2699 sync_thread_state &= ~SYNC_THREAD_RUN;
2700 lck_mtx_unlock(sync_mtx_lck);
2701
2702 sync_type = SYNC_ONLY_RELIABLE_MEDIA;
2703 vfs_iterate(LK_NOWAIT, sync_internal_callback, &sync_type);
2704 sync_type = SYNC_ONLY_UNRELIABLE_MEDIA;
2705 vfs_iterate(LK_NOWAIT, sync_internal_callback, &sync_type);
2706
2707 lck_mtx_lock(sync_mtx_lck);
2708 }
2709 /*
2710 * This wakeup _has_ to be issued before the lock is released otherwise
2711 * we may end up waking up a thread in sync_internal which is
2712 * expecting a wakeup from a thread it just created and not from this
2713 * thread which is about to exit.
2714 */
2715 wakeup(&sync_thread_state);
2716 sync_thread_state &= ~SYNC_THREAD_RUNNING;
2717 lck_mtx_unlock(sync_mtx_lck);
2718
2719 if (print_vmpage_stat) {
2720 vm_countdirtypages();
2721 }
2722
2723 #if DIAGNOSTIC
2724 if (syncprt) {
2725 vfs_bufstats();
2726 }
2727 #endif /* DIAGNOSTIC */
2728 }
2729
2730 struct timeval sync_timeout_last_print = {.tv_sec = 0, .tv_usec = 0};
2731
2732 /*
2733 * An in-kernel sync for power management to call.
2734 * This function always returns within sync_timeout seconds.
2735 */
2736 __private_extern__ int
2737 sync_internal(void)
2738 {
2739 thread_t thd;
2740 int error;
2741 int thread_created = FALSE;
2742 struct timespec ts = {.tv_sec = sync_timeout_seconds, .tv_nsec = 0};
2743
2744 lck_mtx_lock(sync_mtx_lck);
2745 sync_thread_state |= SYNC_THREAD_RUN;
2746 if (!(sync_thread_state & SYNC_THREAD_RUNNING)) {
2747 int kr;
2748
2749 sync_thread_state |= SYNC_THREAD_RUNNING;
2750 kr = kernel_thread_start(sync_thread, NULL, &thd);
2751 if (kr != KERN_SUCCESS) {
2752 sync_thread_state &= ~SYNC_THREAD_RUNNING;
2753 lck_mtx_unlock(sync_mtx_lck);
2754 printf("sync_thread failed\n");
2755 return 0;
2756 }
2757 thread_created = TRUE;
2758 }
2759
2760 error = msleep((caddr_t)&sync_thread_state, sync_mtx_lck,
2761 (PVFS | PDROP | PCATCH), "sync_thread", &ts);
2762 if (error) {
2763 struct timeval now;
2764
2765 microtime(&now);
2766 if (now.tv_sec - sync_timeout_last_print.tv_sec > 120) {
2767 printf("sync timed out: %d sec\n", sync_timeout_seconds);
2768 sync_timeout_last_print.tv_sec = now.tv_sec;
2769 }
2770 }
2771
2772 if (thread_created) {
2773 thread_deallocate(thd);
2774 }
2775
2776 return 0;
2777 } /* end of sync_internal call */
2778
2779 /*
2780 * Change filesystem quotas.
2781 */
2782 #if QUOTA
2783 int
2784 quotactl(proc_t p, struct quotactl_args *uap, __unused int32_t *retval)
2785 {
2786 struct mount *mp;
2787 int error, quota_cmd, quota_status = 0;
2788 caddr_t datap;
2789 size_t fnamelen;
2790 struct nameidata nd;
2791 vfs_context_t ctx = vfs_context_current();
2792 struct dqblk my_dqblk = {};
2793
2794 AUDIT_ARG(uid, uap->uid);
2795 AUDIT_ARG(cmd, uap->cmd);
2796 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
2797 uap->path, ctx);
2798 error = namei(&nd);
2799 if (error) {
2800 return error;
2801 }
2802 mp = nd.ni_vp->v_mount;
2803 mount_ref(mp, 0);
2804 vnode_put(nd.ni_vp);
2805 nameidone(&nd);
2806
2807 /* copyin any data we will need for downstream code */
2808 quota_cmd = uap->cmd >> SUBCMDSHIFT;
2809
2810 switch (quota_cmd) {
2811 case Q_QUOTAON:
2812 /* uap->arg specifies a file from which to take the quotas */
2813 fnamelen = MAXPATHLEN;
2814 datap = kalloc(MAXPATHLEN);
2815 error = copyinstr(uap->arg, datap, MAXPATHLEN, &fnamelen);
2816 break;
2817 case Q_GETQUOTA:
2818 /* uap->arg is a pointer to a dqblk structure. */
2819 datap = (caddr_t) &my_dqblk;
2820 break;
2821 case Q_SETQUOTA:
2822 case Q_SETUSE:
2823 /* uap->arg is a pointer to a dqblk structure. */
2824 datap = (caddr_t) &my_dqblk;
2825 if (proc_is64bit(p)) {
2826 struct user_dqblk my_dqblk64;
2827 error = copyin(uap->arg, (caddr_t)&my_dqblk64, sizeof(my_dqblk64));
2828 if (error == 0) {
2829 munge_dqblk(&my_dqblk, &my_dqblk64, FALSE);
2830 }
2831 } else {
2832 error = copyin(uap->arg, (caddr_t)&my_dqblk, sizeof(my_dqblk));
2833 }
2834 break;
2835 case Q_QUOTASTAT:
2836 /* uap->arg is a pointer to an integer */
2837 datap = (caddr_t) &quota_status;
2838 break;
2839 default:
2840 datap = NULL;
2841 break;
2842 } /* switch */
2843
2844 if (error == 0) {
2845 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, datap, ctx);
2846 }
2847
2848 switch (quota_cmd) {
2849 case Q_QUOTAON:
2850 if (datap != NULL) {
2851 kfree(datap, MAXPATHLEN);
2852 }
2853 break;
2854 case Q_GETQUOTA:
2855 /* uap->arg is a pointer to a dqblk structure we need to copy out to */
2856 if (error == 0) {
2857 if (proc_is64bit(p)) {
2858 struct user_dqblk my_dqblk64;
2859
2860 memset(&my_dqblk64, 0, sizeof(my_dqblk64));
2861 munge_dqblk(&my_dqblk, &my_dqblk64, TRUE);
2862 error = copyout((caddr_t)&my_dqblk64, uap->arg, sizeof(my_dqblk64));
2863 } else {
2864 error = copyout(datap, uap->arg, sizeof(struct dqblk));
2865 }
2866 }
2867 break;
2868 case Q_QUOTASTAT:
2869 /* uap->arg is a pointer to an integer */
2870 if (error == 0) {
2871 error = copyout(datap, uap->arg, sizeof(quota_status));
2872 }
2873 break;
2874 default:
2875 break;
2876 } /* switch */
2877
2878 mount_drop(mp, 0);
2879 return error;
2880 }
2881 #else
2882 int
2883 quotactl(__unused proc_t p, __unused struct quotactl_args *uap, __unused int32_t *retval)
2884 {
2885 return EOPNOTSUPP;
2886 }
2887 #endif /* QUOTA */
2888
2889 /*
2890 * Get filesystem statistics.
2891 *
2892 * Returns: 0 Success
2893 * namei:???
2894 * vfs_update_vfsstat:???
2895 * munge_statfs:EFAULT
2896 */
2897 /* ARGSUSED */
2898 int
2899 statfs(__unused proc_t p, struct statfs_args *uap, __unused int32_t *retval)
2900 {
2901 struct mount *mp;
2902 struct vfsstatfs *sp;
2903 int error;
2904 struct nameidata nd;
2905 vfs_context_t ctx = vfs_context_current();
2906 vnode_t vp;
2907
2908 NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1,
2909 UIO_USERSPACE, uap->path, ctx);
2910 error = namei(&nd);
2911 if (error != 0) {
2912 return error;
2913 }
2914 vp = nd.ni_vp;
2915 mp = vp->v_mount;
2916 sp = &mp->mnt_vfsstat;
2917 nameidone(&nd);
2918
2919 #if CONFIG_MACF
2920 error = mac_mount_check_stat(ctx, mp);
2921 if (error != 0) {
2922 vnode_put(vp);
2923 return error;
2924 }
2925 #endif
2926
2927 error = vfs_update_vfsstat(mp, ctx, VFS_USER_EVENT);
2928 if (error != 0) {
2929 vnode_put(vp);
2930 return error;
2931 }
2932
2933 error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
2934 vnode_put(vp);
2935 return error;
2936 }
2937
2938 /*
2939 * Get filesystem statistics.
2940 */
2941 /* ARGSUSED */
2942 int
2943 fstatfs(__unused proc_t p, struct fstatfs_args *uap, __unused int32_t *retval)
2944 {
2945 vnode_t vp;
2946 struct mount *mp;
2947 struct vfsstatfs *sp;
2948 int error;
2949
2950 AUDIT_ARG(fd, uap->fd);
2951
2952 if ((error = file_vnode(uap->fd, &vp))) {
2953 return error;
2954 }
2955
2956 error = vnode_getwithref(vp);
2957 if (error) {
2958 file_drop(uap->fd);
2959 return error;
2960 }
2961
2962 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2963
2964 mp = vp->v_mount;
2965 if (!mp) {
2966 error = EBADF;
2967 goto out;
2968 }
2969
2970 #if CONFIG_MACF
2971 error = mac_mount_check_stat(vfs_context_current(), mp);
2972 if (error != 0) {
2973 goto out;
2974 }
2975 #endif
2976
2977 sp = &mp->mnt_vfsstat;
2978 if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
2979 goto out;
2980 }
2981
2982 error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
2983
2984 out:
2985 file_drop(uap->fd);
2986 vnode_put(vp);
2987
2988 return error;
2989 }
2990
2991 void
2992 vfs_get_statfs64(struct mount *mp, struct statfs64 *sfs)
2993 {
2994 struct vfsstatfs *vsfs = &mp->mnt_vfsstat;
2995
2996 bzero(sfs, sizeof(*sfs));
2997
2998 sfs->f_bsize = vsfs->f_bsize;
2999 sfs->f_iosize = (int32_t)vsfs->f_iosize;
3000 sfs->f_blocks = vsfs->f_blocks;
3001 sfs->f_bfree = vsfs->f_bfree;
3002 sfs->f_bavail = vsfs->f_bavail;
3003 sfs->f_files = vsfs->f_files;
3004 sfs->f_ffree = vsfs->f_ffree;
3005 sfs->f_fsid = vsfs->f_fsid;
3006 sfs->f_owner = vsfs->f_owner;
3007 sfs->f_type = mp->mnt_vtable->vfc_typenum;
3008 sfs->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
3009 sfs->f_fssubtype = vsfs->f_fssubtype;
3010 sfs->f_flags_ext = ((mp->mnt_kern_flag & MNTK_SYSTEM) && !(mp->mnt_kern_flag & MNTK_SWAP_MOUNT) && !(mp->mnt_flag & MNT_ROOTFS)) ? MNT_EXT_ROOT_DATA_VOL : 0;
3011 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
3012 strlcpy(&sfs->f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN);
3013 } else {
3014 strlcpy(&sfs->f_fstypename[0], &vsfs->f_fstypename[0], MFSTYPENAMELEN);
3015 }
3016 strlcpy(&sfs->f_mntonname[0], &vsfs->f_mntonname[0], MAXPATHLEN);
3017 strlcpy(&sfs->f_mntfromname[0], &vsfs->f_mntfromname[0], MAXPATHLEN);
3018 }
3019
3020 /*
3021 * Get file system statistics in 64-bit mode
3022 */
3023 int
3024 statfs64(__unused struct proc *p, struct statfs64_args *uap, __unused int32_t *retval)
3025 {
3026 struct mount *mp;
3027 int error;
3028 struct nameidata nd;
3029 struct statfs64 sfs;
3030 vfs_context_t ctxp = vfs_context_current();
3031 vnode_t vp;
3032
3033 NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1,
3034 UIO_USERSPACE, uap->path, ctxp);
3035 error = namei(&nd);
3036 if (error != 0) {
3037 return error;
3038 }
3039 vp = nd.ni_vp;
3040 mp = vp->v_mount;
3041 nameidone(&nd);
3042
3043 #if CONFIG_MACF
3044 error = mac_mount_check_stat(ctxp, mp);
3045 if (error != 0) {
3046 vnode_put(vp);
3047 return error;
3048 }
3049 #endif
3050
3051 error = vfs_update_vfsstat(mp, ctxp, VFS_USER_EVENT);
3052 if (error != 0) {
3053 vnode_put(vp);
3054 return error;
3055 }
3056
3057 vfs_get_statfs64(mp, &sfs);
3058 if ((mp->mnt_kern_flag & MNTK_SYSTEM) && !(mp->mnt_kern_flag & MNTK_SWAP_MOUNT) && !(mp->mnt_flag & MNT_ROOTFS) &&
3059 (p->p_vfs_iopolicy & P_VFS_IOPOLICY_STATFS_NO_DATA_VOLUME)) {
3060 /* This process does not want to see a seperate data volume mountpoint */
3061 strlcpy(&sfs.f_mntonname[0], "/", sizeof("/"));
3062 }
3063 error = copyout(&sfs, uap->buf, sizeof(sfs));
3064 vnode_put(vp);
3065
3066 return error;
3067 }
3068
3069 /*
3070 * Get file system statistics in 64-bit mode
3071 */
3072 int
3073 fstatfs64(__unused struct proc *p, struct fstatfs64_args *uap, __unused int32_t *retval)
3074 {
3075 struct vnode *vp;
3076 struct mount *mp;
3077 struct statfs64 sfs;
3078 int error;
3079
3080 AUDIT_ARG(fd, uap->fd);
3081
3082 if ((error = file_vnode(uap->fd, &vp))) {
3083 return error;
3084 }
3085
3086 error = vnode_getwithref(vp);
3087 if (error) {
3088 file_drop(uap->fd);
3089 return error;
3090 }
3091
3092 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
3093
3094 mp = vp->v_mount;
3095 if (!mp) {
3096 error = EBADF;
3097 goto out;
3098 }
3099
3100 #if CONFIG_MACF
3101 error = mac_mount_check_stat(vfs_context_current(), mp);
3102 if (error != 0) {
3103 goto out;
3104 }
3105 #endif
3106
3107 if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
3108 goto out;
3109 }
3110
3111 vfs_get_statfs64(mp, &sfs);
3112 if ((mp->mnt_kern_flag & MNTK_SYSTEM) && !(mp->mnt_kern_flag & MNTK_SWAP_MOUNT) && !(mp->mnt_flag & MNT_ROOTFS) &&
3113 (p->p_vfs_iopolicy & P_VFS_IOPOLICY_STATFS_NO_DATA_VOLUME)) {
3114 /* This process does not want to see a seperate data volume mountpoint */
3115 strlcpy(&sfs.f_mntonname[0], "/", sizeof("/"));
3116 }
3117 error = copyout(&sfs, uap->buf, sizeof(sfs));
3118
3119 out:
3120 file_drop(uap->fd);
3121 vnode_put(vp);
3122
3123 return error;
3124 }
3125
3126 struct getfsstat_struct {
3127 user_addr_t sfsp;
3128 user_addr_t *mp;
3129 int count;
3130 int maxcount;
3131 int flags;
3132 int error;
3133 };
3134
3135
3136 static int
3137 getfsstat_callback(mount_t mp, void * arg)
3138 {
3139 struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
3140 struct vfsstatfs *sp;
3141 int error, my_size;
3142 vfs_context_t ctx = vfs_context_current();
3143
3144 if (fstp->sfsp && fstp->count < fstp->maxcount) {
3145 #if CONFIG_MACF
3146 error = mac_mount_check_stat(ctx, mp);
3147 if (error != 0) {
3148 fstp->error = error;
3149 return VFS_RETURNED_DONE;
3150 }
3151 #endif
3152 sp = &mp->mnt_vfsstat;
3153 /*
3154 * If MNT_NOWAIT is specified, do not refresh the
3155 * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT.
3156 */
3157 if ((mp->mnt_lflag & MNT_LDEAD) ||
3158 (((fstp->flags & MNT_NOWAIT) == 0 || (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
3159 (!(mp->mnt_lflag & MNT_LUNMOUNT)) &&
3160 (error = vfs_update_vfsstat(mp, ctx, VFS_USER_EVENT)))) {
3161 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
3162 return VFS_RETURNED;
3163 }
3164
3165 /*
3166 * Need to handle LP64 version of struct statfs
3167 */
3168 error = munge_statfs(mp, sp, fstp->sfsp, &my_size, IS_64BIT_PROCESS(vfs_context_proc(ctx)), FALSE);
3169 if (error) {
3170 fstp->error = error;
3171 return VFS_RETURNED_DONE;
3172 }
3173 fstp->sfsp += my_size;
3174
3175 if (fstp->mp) {
3176 #if CONFIG_MACF
3177 error = mac_mount_label_get(mp, *fstp->mp);
3178 if (error) {
3179 fstp->error = error;
3180 return VFS_RETURNED_DONE;
3181 }
3182 #endif
3183 fstp->mp++;
3184 }
3185 }
3186 fstp->count++;
3187 return VFS_RETURNED;
3188 }
3189
3190 /*
3191 * Get statistics on all filesystems.
3192 */
3193 int
3194 getfsstat(__unused proc_t p, struct getfsstat_args *uap, int *retval)
3195 {
3196 struct __mac_getfsstat_args muap;
3197
3198 muap.buf = uap->buf;
3199 muap.bufsize = uap->bufsize;
3200 muap.mac = USER_ADDR_NULL;
3201 muap.macsize = 0;
3202 muap.flags = uap->flags;
3203
3204 return __mac_getfsstat(p, &muap, retval);
3205 }
3206
3207 /*
3208 * __mac_getfsstat: Get MAC-related file system statistics
3209 *
3210 * Parameters: p (ignored)
3211 * uap User argument descriptor (see below)
3212 * retval Count of file system statistics (N stats)
3213 *
3214 * Indirect: uap->bufsize Buffer size
3215 * uap->macsize MAC info size
3216 * uap->buf Buffer where information will be returned
3217 * uap->mac MAC info
3218 * uap->flags File system flags
3219 *
3220 *
3221 * Returns: 0 Success
3222 * !0 Not success
3223 *
3224 */
3225 int
3226 __mac_getfsstat(__unused proc_t p, struct __mac_getfsstat_args *uap, int *retval)
3227 {
3228 user_addr_t sfsp;
3229 user_addr_t *mp;
3230 size_t count, maxcount, bufsize, macsize;
3231 struct getfsstat_struct fst;
3232
3233 if ((unsigned)uap->bufsize > INT_MAX || (unsigned)uap->macsize > INT_MAX) {
3234 return EINVAL;
3235 }
3236
3237 bufsize = (size_t) uap->bufsize;
3238 macsize = (size_t) uap->macsize;
3239
3240 if (IS_64BIT_PROCESS(p)) {
3241 maxcount = bufsize / sizeof(struct user64_statfs);
3242 } else {
3243 maxcount = bufsize / sizeof(struct user32_statfs);
3244 }
3245 sfsp = uap->buf;
3246 count = 0;
3247
3248 mp = NULL;
3249
3250 #if CONFIG_MACF
3251 if (uap->mac != USER_ADDR_NULL) {
3252 u_int32_t *mp0;
3253 int error;
3254 unsigned int i;
3255
3256 count = (macsize / (IS_64BIT_PROCESS(p) ? 8 : 4));
3257 if (count != maxcount) {
3258 return EINVAL;
3259 }
3260
3261 /* Copy in the array */
3262 MALLOC(mp0, u_int32_t *, macsize, M_MACTEMP, M_WAITOK);
3263 if (mp0 == NULL) {
3264 return ENOMEM;
3265 }
3266
3267 error = copyin(uap->mac, mp0, macsize);
3268 if (error) {
3269 FREE(mp0, M_MACTEMP);
3270 return error;
3271 }
3272
3273 /* Normalize to an array of user_addr_t */
3274 MALLOC(mp, user_addr_t *, count * sizeof(user_addr_t), M_MACTEMP, M_WAITOK);
3275 if (mp == NULL) {
3276 FREE(mp0, M_MACTEMP);
3277 return ENOMEM;
3278 }
3279
3280 for (i = 0; i < count; i++) {
3281 if (IS_64BIT_PROCESS(p)) {
3282 mp[i] = ((user_addr_t *)mp0)[i];
3283 } else {
3284 mp[i] = (user_addr_t)mp0[i];
3285 }
3286 }
3287 FREE(mp0, M_MACTEMP);
3288 }
3289 #endif
3290
3291
3292 fst.sfsp = sfsp;
3293 fst.mp = mp;
3294 fst.flags = uap->flags;
3295 fst.count = 0;
3296 fst.error = 0;
3297 fst.maxcount = maxcount;
3298
3299
3300 vfs_iterate(VFS_ITERATE_NOSKIP_UNMOUNT, getfsstat_callback, &fst);
3301
3302 if (mp) {
3303 FREE(mp, M_MACTEMP);
3304 }
3305
3306 if (fst.error) {
3307 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
3308 return fst.error;
3309 }
3310
3311 if (fst.sfsp && fst.count > fst.maxcount) {
3312 *retval = fst.maxcount;
3313 } else {
3314 *retval = fst.count;
3315 }
3316 return 0;
3317 }
3318
3319 static int
3320 getfsstat64_callback(mount_t mp, void * arg)
3321 {
3322 struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
3323 struct vfsstatfs *sp;
3324 struct statfs64 sfs;
3325 int error;
3326
3327 if (fstp->sfsp && fstp->count < fstp->maxcount) {
3328 #if CONFIG_MACF
3329 error = mac_mount_check_stat(vfs_context_current(), mp);
3330 if (error != 0) {
3331 fstp->error = error;
3332 return VFS_RETURNED_DONE;
3333 }
3334 #endif
3335 sp = &mp->mnt_vfsstat;
3336 /*
3337 * If MNT_NOWAIT is specified, do not refresh the fsstat
3338 * cache. MNT_WAIT overrides MNT_NOWAIT.
3339 *
3340 * We treat MNT_DWAIT as MNT_WAIT for all instances of
3341 * getfsstat, since the constants are out of the same
3342 * namespace.
3343 */
3344 if ((mp->mnt_lflag & MNT_LDEAD) ||
3345 ((((fstp->flags & MNT_NOWAIT) == 0) || (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
3346 (!(mp->mnt_lflag & MNT_LUNMOUNT)) &&
3347 (error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)))) {
3348 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
3349 return VFS_RETURNED;
3350 }
3351
3352 vfs_get_statfs64(mp, &sfs);
3353 error = copyout(&sfs, fstp->sfsp, sizeof(sfs));
3354 if (error) {
3355 fstp->error = error;
3356 return VFS_RETURNED_DONE;
3357 }
3358 fstp->sfsp += sizeof(sfs);
3359 }
3360 fstp->count++;
3361 return VFS_RETURNED;
3362 }
3363
3364 /*
3365 * Get statistics on all file systems in 64 bit mode.
3366 */
3367 int
3368 getfsstat64(__unused proc_t p, struct getfsstat64_args *uap, int *retval)
3369 {
3370 user_addr_t sfsp;
3371 int count, maxcount;
3372 struct getfsstat_struct fst;
3373
3374 maxcount = uap->bufsize / sizeof(struct statfs64);
3375
3376 sfsp = uap->buf;
3377 count = 0;
3378
3379 fst.sfsp = sfsp;
3380 fst.flags = uap->flags;
3381 fst.count = 0;
3382 fst.error = 0;
3383 fst.maxcount = maxcount;
3384
3385 vfs_iterate(VFS_ITERATE_NOSKIP_UNMOUNT, getfsstat64_callback, &fst);
3386
3387 if (fst.error) {
3388 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
3389 return fst.error;
3390 }
3391
3392 if (fst.sfsp && fst.count > fst.maxcount) {
3393 *retval = fst.maxcount;
3394 } else {
3395 *retval = fst.count;
3396 }
3397
3398 return 0;
3399 }
3400
3401 /*
3402 * gets the associated vnode with the file descriptor passed.
3403 * as input
3404 *
3405 * INPUT
3406 * ctx - vfs context of caller
3407 * fd - file descriptor for which vnode is required.
3408 * vpp - Pointer to pointer to vnode to be returned.
3409 *
3410 * The vnode is returned with an iocount so any vnode obtained
3411 * by this call needs a vnode_put
3412 *
3413 */
3414 int
3415 vnode_getfromfd(vfs_context_t ctx, int fd, vnode_t *vpp)
3416 {
3417 int error;
3418 vnode_t vp;
3419 struct fileproc *fp;
3420 proc_t p = vfs_context_proc(ctx);
3421
3422 *vpp = NULLVP;
3423
3424 error = fp_getfvp(p, fd, &fp, &vp);
3425 if (error) {
3426 return error;
3427 }
3428
3429 error = vnode_getwithref(vp);
3430 if (error) {
3431 (void)fp_drop(p, fd, fp, 0);
3432 return error;
3433 }
3434
3435 (void)fp_drop(p, fd, fp, 0);
3436 *vpp = vp;
3437 return error;
3438 }
3439
3440 /*
3441 * Wrapper function around namei to start lookup from a directory
3442 * specified by a file descriptor ni_dirfd.
3443 *
3444 * In addition to all the errors returned by namei, this call can
3445 * return ENOTDIR if the file descriptor does not refer to a directory.
3446 * and EBADF if the file descriptor is not valid.
3447 */
3448 int
3449 nameiat(struct nameidata *ndp, int dirfd)
3450 {
3451 if ((dirfd != AT_FDCWD) &&
3452 !(ndp->ni_flag & NAMEI_CONTLOOKUP) &&
3453 !(ndp->ni_cnd.cn_flags & USEDVP)) {
3454 int error = 0;
3455 char c;
3456
3457 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
3458 error = copyin(ndp->ni_dirp, &c, sizeof(char));
3459 if (error) {
3460 return error;
3461 }
3462 } else {
3463 c = *((char *)(ndp->ni_dirp));
3464 }
3465
3466 if (c != '/') {
3467 vnode_t dvp_at;
3468
3469 error = vnode_getfromfd(ndp->ni_cnd.cn_context, dirfd,
3470 &dvp_at);
3471 if (error) {
3472 return error;
3473 }
3474
3475 if (vnode_vtype(dvp_at) != VDIR) {
3476 vnode_put(dvp_at);
3477 return ENOTDIR;
3478 }
3479
3480 ndp->ni_dvp = dvp_at;
3481 ndp->ni_cnd.cn_flags |= USEDVP;
3482 error = namei(ndp);
3483 ndp->ni_cnd.cn_flags &= ~USEDVP;
3484 vnode_put(dvp_at);
3485 return error;
3486 }
3487 }
3488
3489 return namei(ndp);
3490 }
3491
3492 /*
3493 * Change current working directory to a given file descriptor.
3494 */
3495 /* ARGSUSED */
3496 static int
3497 common_fchdir(proc_t p, struct fchdir_args *uap, int per_thread)
3498 {
3499 struct filedesc *fdp = p->p_fd;
3500 vnode_t vp;
3501 vnode_t tdp;
3502 vnode_t tvp;
3503 struct mount *mp;
3504 int error;
3505 vfs_context_t ctx = vfs_context_current();
3506
3507 AUDIT_ARG(fd, uap->fd);
3508 if (per_thread && uap->fd == -1) {
3509 /*
3510 * Switching back from per-thread to per process CWD; verify we
3511 * in fact have one before proceeding. The only success case
3512 * for this code path is to return 0 preemptively after zapping
3513 * the thread structure contents.
3514 */
3515 thread_t th = vfs_context_thread(ctx);
3516 if (th) {
3517 uthread_t uth = get_bsdthread_info(th);
3518 tvp = uth->uu_cdir;
3519 uth->uu_cdir = NULLVP;
3520 if (tvp != NULLVP) {
3521 vnode_rele(tvp);
3522 return 0;
3523 }
3524 }
3525 return EBADF;
3526 }
3527
3528 if ((error = file_vnode(uap->fd, &vp))) {
3529 return error;
3530 }
3531 if ((error = vnode_getwithref(vp))) {
3532 file_drop(uap->fd);
3533 return error;
3534 }
3535
3536 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
3537
3538 if (vp->v_type != VDIR) {
3539 error = ENOTDIR;
3540 goto out;
3541 }
3542
3543 #if CONFIG_MACF
3544 error = mac_vnode_check_chdir(ctx, vp);
3545 if (error) {
3546 goto out;
3547 }
3548 #endif
3549 error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
3550 if (error) {
3551 goto out;
3552 }
3553
3554 while (!error && (mp = vp->v_mountedhere) != NULL) {
3555 if (vfs_busy(mp, LK_NOWAIT)) {
3556 error = EACCES;
3557 goto out;
3558 }
3559 error = VFS_ROOT(mp, &tdp, ctx);
3560 vfs_unbusy(mp);
3561 if (error) {
3562 break;
3563 }
3564 vnode_put(vp);
3565 vp = tdp;
3566 }
3567 if (error) {
3568 goto out;
3569 }
3570 if ((error = vnode_ref(vp))) {
3571 goto out;
3572 }
3573 vnode_put(vp);
3574
3575 if (per_thread) {
3576 thread_t th = vfs_context_thread(ctx);
3577 if (th) {
3578 uthread_t uth = get_bsdthread_info(th);
3579 tvp = uth->uu_cdir;
3580 uth->uu_cdir = vp;
3581 OSBitOrAtomic(P_THCWD, &p->p_flag);
3582 } else {
3583 vnode_rele(vp);
3584 return ENOENT;
3585 }
3586 } else {
3587 proc_fdlock(p);
3588 tvp = fdp->fd_cdir;
3589 fdp->fd_cdir = vp;
3590 proc_fdunlock(p);
3591 }
3592
3593 if (tvp) {
3594 vnode_rele(tvp);
3595 }
3596 file_drop(uap->fd);
3597
3598 return 0;
3599 out:
3600 vnode_put(vp);
3601 file_drop(uap->fd);
3602
3603 return error;
3604 }
3605
3606 int
3607 fchdir(proc_t p, struct fchdir_args *uap, __unused int32_t *retval)
3608 {
3609 return common_fchdir(p, uap, 0);
3610 }
3611
3612 int
3613 __pthread_fchdir(proc_t p, struct __pthread_fchdir_args *uap, __unused int32_t *retval)
3614 {
3615 return common_fchdir(p, (void *)uap, 1);
3616 }
3617
3618
3619 /*
3620 * Change current working directory (".").
3621 *
3622 * Returns: 0 Success
3623 * change_dir:ENOTDIR
3624 * change_dir:???
3625 * vnode_ref:ENOENT No such file or directory
3626 */
3627 /* ARGSUSED */
3628 int
3629 chdir_internal(proc_t p, vfs_context_t ctx, struct nameidata *ndp, int per_thread)
3630 {
3631 struct filedesc *fdp = p->p_fd;
3632 int error;
3633 vnode_t tvp;
3634
3635 error = change_dir(ndp, ctx);
3636 if (error) {
3637 return error;
3638 }
3639 if ((error = vnode_ref(ndp->ni_vp))) {
3640 vnode_put(ndp->ni_vp);
3641 return error;
3642 }
3643 /*
3644 * drop the iocount we picked up in change_dir
3645 */
3646 vnode_put(ndp->ni_vp);
3647
3648 if (per_thread) {
3649 thread_t th = vfs_context_thread(ctx);
3650 if (th) {
3651 uthread_t uth = get_bsdthread_info(th);
3652 tvp = uth->uu_cdir;
3653 uth->uu_cdir = ndp->ni_vp;
3654 OSBitOrAtomic(P_THCWD, &p->p_flag);
3655 } else {
3656 vnode_rele(ndp->ni_vp);
3657 return ENOENT;
3658 }
3659 } else {
3660 proc_fdlock(p);
3661 tvp = fdp->fd_cdir;
3662 fdp->fd_cdir = ndp->ni_vp;
3663 proc_fdunlock(p);
3664 }
3665
3666 if (tvp) {
3667 vnode_rele(tvp);
3668 }
3669
3670 return 0;
3671 }
3672
3673
3674 /*
3675 * Change current working directory (".").
3676 *
3677 * Returns: 0 Success
3678 * chdir_internal:ENOTDIR
3679 * chdir_internal:ENOENT No such file or directory
3680 * chdir_internal:???
3681 */
3682 /* ARGSUSED */
3683 static int
3684 common_chdir(proc_t p, struct chdir_args *uap, int per_thread)
3685 {
3686 struct nameidata nd;
3687 vfs_context_t ctx = vfs_context_current();
3688
3689 NDINIT(&nd, LOOKUP, OP_CHDIR, FOLLOW | AUDITVNPATH1,
3690 UIO_USERSPACE, uap->path, ctx);
3691
3692 return chdir_internal(p, ctx, &nd, per_thread);
3693 }
3694
3695
3696 /*
3697 * chdir
3698 *
3699 * Change current working directory (".") for the entire process
3700 *
3701 * Parameters: p Process requesting the call
3702 * uap User argument descriptor (see below)
3703 * retval (ignored)
3704 *
3705 * Indirect parameters: uap->path Directory path
3706 *
3707 * Returns: 0 Success
3708 * common_chdir: ENOTDIR
3709 * common_chdir: ENOENT No such file or directory
3710 * common_chdir: ???
3711 *
3712 */
3713 int
3714 chdir(proc_t p, struct chdir_args *uap, __unused int32_t *retval)
3715 {
3716 return common_chdir(p, (void *)uap, 0);
3717 }
3718
3719 /*
3720 * __pthread_chdir
3721 *
3722 * Change current working directory (".") for a single thread
3723 *
3724 * Parameters: p Process requesting the call
3725 * uap User argument descriptor (see below)
3726 * retval (ignored)
3727 *
3728 * Indirect parameters: uap->path Directory path
3729 *
3730 * Returns: 0 Success
3731 * common_chdir: ENOTDIR
3732 * common_chdir: ENOENT No such file or directory
3733 * common_chdir: ???
3734 *
3735 */
3736 int
3737 __pthread_chdir(proc_t p, struct __pthread_chdir_args *uap, __unused int32_t *retval)
3738 {
3739 return common_chdir(p, (void *)uap, 1);
3740 }
3741
3742
3743 /*
3744 * Change notion of root (``/'') directory.
3745 */
3746 /* ARGSUSED */
3747 int
3748 chroot(proc_t p, struct chroot_args *uap, __unused int32_t *retval)
3749 {
3750 struct filedesc *fdp = p->p_fd;
3751 int error;
3752 struct nameidata nd;
3753 vnode_t tvp;
3754 vfs_context_t ctx = vfs_context_current();
3755
3756 if ((error = suser(kauth_cred_get(), &p->p_acflag))) {
3757 return error;
3758 }
3759
3760 NDINIT(&nd, LOOKUP, OP_CHROOT, FOLLOW | AUDITVNPATH1,
3761 UIO_USERSPACE, uap->path, ctx);
3762 error = change_dir(&nd, ctx);
3763 if (error) {
3764 return error;
3765 }
3766
3767 #if CONFIG_MACF
3768 error = mac_vnode_check_chroot(ctx, nd.ni_vp,
3769 &nd.ni_cnd);
3770 if (error) {
3771 vnode_put(nd.ni_vp);
3772 return error;
3773 }
3774 #endif
3775
3776 if ((error = vnode_ref(nd.ni_vp))) {
3777 vnode_put(nd.ni_vp);
3778 return error;
3779 }
3780 vnode_put(nd.ni_vp);
3781
3782 proc_fdlock(p);
3783 tvp = fdp->fd_rdir;
3784 fdp->fd_rdir = nd.ni_vp;
3785 fdp->fd_flags |= FD_CHROOT;
3786 proc_fdunlock(p);
3787
3788 if (tvp != NULL) {
3789 vnode_rele(tvp);
3790 }
3791
3792 return 0;
3793 }
3794
3795 /*
3796 * Common routine for chroot and chdir.
3797 *
3798 * Returns: 0 Success
3799 * ENOTDIR Not a directory
3800 * namei:??? [anything namei can return]
3801 * vnode_authorize:??? [anything vnode_authorize can return]
3802 */
3803 static int
3804 change_dir(struct nameidata *ndp, vfs_context_t ctx)
3805 {
3806 vnode_t vp;
3807 int error;
3808
3809 if ((error = namei(ndp))) {
3810 return error;
3811 }
3812 nameidone(ndp);
3813 vp = ndp->ni_vp;
3814
3815 if (vp->v_type != VDIR) {
3816 vnode_put(vp);
3817 return ENOTDIR;
3818 }
3819
3820 #if CONFIG_MACF
3821 error = mac_vnode_check_chdir(ctx, vp);
3822 if (error) {
3823 vnode_put(vp);
3824 return error;
3825 }
3826 #endif
3827
3828 error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
3829 if (error) {
3830 vnode_put(vp);
3831 return error;
3832 }
3833
3834 return error;
3835 }
3836
3837 /*
3838 * Free the vnode data (for directories) associated with the file glob.
3839 */
3840 struct fd_vn_data *
3841 fg_vn_data_alloc(void)
3842 {
3843 struct fd_vn_data *fvdata;
3844
3845 /* Allocate per fd vnode data */
3846 MALLOC(fvdata, struct fd_vn_data *, (sizeof(struct fd_vn_data)),
3847 M_FD_VN_DATA, M_WAITOK | M_ZERO);
3848 lck_mtx_init(&fvdata->fv_lock, fd_vn_lck_grp, fd_vn_lck_attr);
3849 return fvdata;
3850 }
3851
3852 /*
3853 * Free the vnode data (for directories) associated with the file glob.
3854 */
3855 void
3856 fg_vn_data_free(void *fgvndata)
3857 {
3858 struct fd_vn_data *fvdata = (struct fd_vn_data *)fgvndata;
3859
3860 if (fvdata->fv_buf) {
3861 FREE(fvdata->fv_buf, M_FD_DIRBUF);
3862 }
3863 lck_mtx_destroy(&fvdata->fv_lock, fd_vn_lck_grp);
3864 FREE(fvdata, M_FD_VN_DATA);
3865 }
3866
3867 /*
3868 * Check permissions, allocate an open file structure,
3869 * and call the device open routine if any.
3870 *
3871 * Returns: 0 Success
3872 * EINVAL
3873 * EINTR
3874 * falloc:ENFILE
3875 * falloc:EMFILE
3876 * falloc:ENOMEM
3877 * vn_open_auth:???
3878 * dupfdopen:???
3879 * VNOP_ADVLOCK:???
3880 * vnode_setsize:???
3881 *
3882 * XXX Need to implement uid, gid
3883 */
3884 int
3885 open1(vfs_context_t ctx, struct nameidata *ndp, int uflags,
3886 struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra,
3887 int32_t *retval)
3888 {
3889 proc_t p = vfs_context_proc(ctx);
3890 uthread_t uu = get_bsdthread_info(vfs_context_thread(ctx));
3891 struct fileproc *fp;
3892 vnode_t vp;
3893 int flags, oflags;
3894 int type, indx, error;
3895 struct flock lf;
3896 struct vfs_context context;
3897
3898 oflags = uflags;
3899
3900 if ((oflags & O_ACCMODE) == O_ACCMODE) {
3901 return EINVAL;
3902 }
3903
3904 flags = FFLAGS(uflags);
3905 CLR(flags, FENCRYPTED);
3906 CLR(flags, FUNENCRYPTED);
3907
3908 AUDIT_ARG(fflags, oflags);
3909 AUDIT_ARG(mode, vap->va_mode);
3910
3911 if ((error = falloc_withalloc(p,
3912 &fp, &indx, ctx, fp_zalloc, cra)) != 0) {
3913 return error;
3914 }
3915 uu->uu_dupfd = -indx - 1;
3916
3917 if ((error = vn_open_auth(ndp, &flags, vap))) {
3918 if ((error == ENODEV || error == ENXIO) && (uu->uu_dupfd >= 0)) { /* XXX from fdopen */
3919 if ((error = dupfdopen(p->p_fd, indx, uu->uu_dupfd, flags, error)) == 0) {
3920 fp_drop(p, indx, NULL, 0);
3921 *retval = indx;
3922 return 0;
3923 }
3924 }
3925 if (error == ERESTART) {
3926 error = EINTR;
3927 }
3928 fp_free(p, indx, fp);
3929 return error;
3930 }
3931 uu->uu_dupfd = 0;
3932 vp = ndp->ni_vp;
3933
3934 fp->f_fglob->fg_flag = flags & (FMASK | O_EVTONLY | FENCRYPTED | FUNENCRYPTED);
3935 fp->f_fglob->fg_ops = &vnops;
3936 fp->f_fglob->fg_data = (caddr_t)vp;
3937
3938 if (flags & (O_EXLOCK | O_SHLOCK)) {
3939 lf.l_whence = SEEK_SET;
3940 lf.l_start = 0;
3941 lf.l_len = 0;
3942 if (flags & O_EXLOCK) {
3943 lf.l_type = F_WRLCK;
3944 } else {
3945 lf.l_type = F_RDLCK;
3946 }
3947 type = F_FLOCK;
3948 if ((flags & FNONBLOCK) == 0) {
3949 type |= F_WAIT;
3950 }
3951 #if CONFIG_MACF
3952 error = mac_file_check_lock(vfs_context_ucred(ctx), fp->f_fglob,
3953 F_SETLK, &lf);
3954 if (error) {
3955 goto bad;
3956 }
3957 #endif
3958 if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx, NULL))) {
3959 goto bad;
3960 }
3961 fp->f_fglob->fg_flag |= FHASLOCK;
3962 }
3963
3964 /* try to truncate by setting the size attribute */
3965 if ((flags & O_TRUNC) && ((error = vnode_setsize(vp, (off_t)0, 0, ctx)) != 0)) {
3966 goto bad;
3967 }
3968
3969 /*
3970 * For directories we hold some additional information in the fd.
3971 */
3972 if (vnode_vtype(vp) == VDIR) {
3973 fp->f_fglob->fg_vn_data = fg_vn_data_alloc();
3974 } else {
3975 fp->f_fglob->fg_vn_data = NULL;
3976 }
3977
3978 vnode_put(vp);
3979
3980 /*
3981 * The first terminal open (without a O_NOCTTY) by a session leader
3982 * results in it being set as the controlling terminal.
3983 */
3984 if (vnode_istty(vp) && !(p->p_flag & P_CONTROLT) &&
3985 !(flags & O_NOCTTY)) {
3986 int tmp = 0;
3987
3988 (void)(*fp->f_fglob->fg_ops->fo_ioctl)(fp, (int)TIOCSCTTY,
3989 (caddr_t)&tmp, ctx);
3990 }
3991
3992 proc_fdlock(p);
3993 if (flags & O_CLOEXEC) {
3994 *fdflags(p, indx) |= UF_EXCLOSE;
3995 }
3996 if (flags & O_CLOFORK) {
3997 *fdflags(p, indx) |= UF_FORKCLOSE;
3998 }
3999 procfdtbl_releasefd(p, indx, NULL);
4000
4001 #if CONFIG_SECLUDED_MEMORY
4002 if (secluded_for_filecache &&
4003 FILEGLOB_DTYPE(fp->f_fglob) == DTYPE_VNODE &&
4004 vnode_vtype(vp) == VREG) {
4005 memory_object_control_t moc;
4006
4007 moc = ubc_getobject(vp, UBC_FLAGS_NONE);
4008
4009 if (moc == MEMORY_OBJECT_CONTROL_NULL) {
4010 /* nothing to do... */
4011 } else if (fp->f_fglob->fg_flag & FWRITE) {
4012 /* writable -> no longer eligible for secluded pages */
4013 memory_object_mark_eligible_for_secluded(moc,
4014 FALSE);
4015 } else if (secluded_for_filecache == 1) {
4016 char pathname[32] = { 0, };
4017 size_t copied;
4018 /* XXX FBDP: better way to detect /Applications/ ? */
4019 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
4020 (void)copyinstr(ndp->ni_dirp,
4021 pathname,
4022 sizeof(pathname),
4023 &copied);
4024 } else {
4025 copystr(CAST_DOWN(void *, ndp->ni_dirp),
4026 pathname,
4027 sizeof(pathname),
4028 &copied);
4029 }
4030 pathname[sizeof(pathname) - 1] = '\0';
4031 if (strncmp(pathname,
4032 "/Applications/",
4033 strlen("/Applications/")) == 0 &&
4034 strncmp(pathname,
4035 "/Applications/Camera.app/",
4036 strlen("/Applications/Camera.app/")) != 0) {
4037 /*
4038 * not writable
4039 * AND from "/Applications/"
4040 * AND not from "/Applications/Camera.app/"
4041 * ==> eligible for secluded
4042 */
4043 memory_object_mark_eligible_for_secluded(moc,
4044 TRUE);
4045 }
4046 } else if (secluded_for_filecache == 2) {
4047 #if __arm64__
4048 #define DYLD_SHARED_CACHE_NAME "dyld_shared_cache_arm64"
4049 #elif __arm__
4050 #define DYLD_SHARED_CACHE_NAME "dyld_shared_cache_armv7"
4051 #else
4052 /* not implemented... */
4053 #endif
4054 size_t len = strlen(vp->v_name);
4055 if (!strncmp(vp->v_name, DYLD_SHARED_CACHE_NAME, len) ||
4056 !strncmp(vp->v_name, "dyld", len) ||
4057 !strncmp(vp->v_name, "launchd", len) ||
4058 !strncmp(vp->v_name, "Camera", len) ||
4059 !strncmp(vp->v_name, "mediaserverd", len) ||
4060 !strncmp(vp->v_name, "SpringBoard", len) ||
4061 !strncmp(vp->v_name, "backboardd", len)) {
4062 /*
4063 * This file matters when launching Camera:
4064 * do not store its contents in the secluded
4065 * pool that will be drained on Camera launch.
4066 */
4067 memory_object_mark_eligible_for_secluded(moc,
4068 FALSE);
4069 }
4070 }
4071 }
4072 #endif /* CONFIG_SECLUDED_MEMORY */
4073
4074 fp_drop(p, indx, fp, 1);
4075 proc_fdunlock(p);
4076
4077 *retval = indx;
4078
4079 return 0;
4080 bad:
4081 context = *vfs_context_current();
4082 context.vc_ucred = fp->f_fglob->fg_cred;
4083
4084 if ((fp->f_fglob->fg_flag & FHASLOCK) &&
4085 (FILEGLOB_DTYPE(fp->f_fglob) == DTYPE_VNODE)) {
4086 lf.l_whence = SEEK_SET;
4087 lf.l_start = 0;
4088 lf.l_len = 0;
4089 lf.l_type = F_UNLCK;
4090
4091 (void)VNOP_ADVLOCK(
4092 vp, (caddr_t)fp->f_fglob, F_UNLCK, &lf, F_FLOCK, ctx, NULL);
4093 }
4094
4095 vn_close(vp, fp->f_fglob->fg_flag, &context);
4096 vnode_put(vp);
4097 fp_free(p, indx, fp);
4098
4099 return error;
4100 }
4101
4102 /*
4103 * While most of the *at syscall handlers can call nameiat() which
4104 * is a wrapper around namei, the use of namei and initialisation
4105 * of nameidata are far removed and in different functions - namei
4106 * gets called in vn_open_auth for open1. So we'll just do here what
4107 * nameiat() does.
4108 */
4109 static int
4110 open1at(vfs_context_t ctx, struct nameidata *ndp, int uflags,
4111 struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra, int32_t *retval,
4112 int dirfd)
4113 {
4114 if ((dirfd != AT_FDCWD) && !(ndp->ni_cnd.cn_flags & USEDVP)) {
4115 int error;
4116 char c;
4117
4118 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
4119 error = copyin(ndp->ni_dirp, &c, sizeof(char));
4120 if (error) {
4121 return error;
4122 }
4123 } else {
4124 c = *((char *)(ndp->ni_dirp));
4125 }
4126
4127 if (c != '/') {
4128 vnode_t dvp_at;
4129
4130 error = vnode_getfromfd(ndp->ni_cnd.cn_context, dirfd,
4131 &dvp_at);
4132 if (error) {
4133 return error;
4134 }
4135
4136 if (vnode_vtype(dvp_at) != VDIR) {
4137 vnode_put(dvp_at);
4138 return ENOTDIR;
4139 }
4140
4141 ndp->ni_dvp = dvp_at;
4142 ndp->ni_cnd.cn_flags |= USEDVP;
4143 error = open1(ctx, ndp, uflags, vap, fp_zalloc, cra,
4144 retval);
4145 vnode_put(dvp_at);
4146 return error;
4147 }
4148 }
4149
4150 return open1(ctx, ndp, uflags, vap, fp_zalloc, cra, retval);
4151 }
4152
4153 /*
4154 * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)).
4155 *
4156 * Parameters: p Process requesting the open
4157 * uap User argument descriptor (see below)
4158 * retval Pointer to an area to receive the
4159 * return calue from the system call
4160 *
4161 * Indirect: uap->path Path to open (same as 'open')
4162 * uap->flags Flags to open (same as 'open'
4163 * uap->uid UID to set, if creating
4164 * uap->gid GID to set, if creating
4165 * uap->mode File mode, if creating (same as 'open')
4166 * uap->xsecurity ACL to set, if creating
4167 *
4168 * Returns: 0 Success
4169 * !0 errno value
4170 *
4171 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
4172 *
4173 * XXX: We should enummerate the possible errno values here, and where
4174 * in the code they originated.
4175 */
4176 int
4177 open_extended(proc_t p, struct open_extended_args *uap, int32_t *retval)
4178 {
4179 struct filedesc *fdp = p->p_fd;
4180 int ciferror;
4181 kauth_filesec_t xsecdst;
4182 struct vnode_attr va;
4183 struct nameidata nd;
4184 int cmode;
4185
4186 AUDIT_ARG(owner, uap->uid, uap->gid);
4187
4188 xsecdst = NULL;
4189 if ((uap->xsecurity != USER_ADDR_NULL) &&
4190 ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)) {
4191 return ciferror;
4192 }
4193
4194 VATTR_INIT(&va);
4195 cmode = ((uap->mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
4196 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
4197 if (uap->uid != KAUTH_UID_NONE) {
4198 VATTR_SET(&va, va_uid, uap->uid);
4199 }
4200 if (uap->gid != KAUTH_GID_NONE) {
4201 VATTR_SET(&va, va_gid, uap->gid);
4202 }
4203 if (xsecdst != NULL) {
4204 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
4205 }
4206
4207 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
4208 uap->path, vfs_context_current());
4209
4210 ciferror = open1(vfs_context_current(), &nd, uap->flags, &va,
4211 fileproc_alloc_init, NULL, retval);
4212 if (xsecdst != NULL) {
4213 kauth_filesec_free(xsecdst);
4214 }
4215
4216 return ciferror;
4217 }
4218
4219 /*
4220 * Go through the data-protected atomically controlled open (2)
4221 *
4222 * int open_dprotected_np(user_addr_t path, int flags, int class, int dpflags, int mode)
4223 */
4224 int
4225 open_dprotected_np(__unused proc_t p, struct open_dprotected_np_args *uap, int32_t *retval)
4226 {
4227 int flags = uap->flags;
4228 int class = uap->class;
4229 int dpflags = uap->dpflags;
4230
4231 /*
4232 * Follow the same path as normal open(2)
4233 * Look up the item if it exists, and acquire the vnode.
4234 */
4235 struct filedesc *fdp = p->p_fd;
4236 struct vnode_attr va;
4237 struct nameidata nd;
4238 int cmode;
4239 int error;
4240
4241 VATTR_INIT(&va);
4242 /* Mask off all but regular access permissions */
4243 cmode = ((uap->mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
4244 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
4245
4246 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
4247 uap->path, vfs_context_current());
4248
4249 /*
4250 * Initialize the extra fields in vnode_attr to pass down our
4251 * extra fields.
4252 * 1. target cprotect class.
4253 * 2. set a flag to mark it as requiring open-raw-encrypted semantics.
4254 */
4255 if (flags & O_CREAT) {
4256 /* lower level kernel code validates that the class is valid before applying it. */
4257 if (class != PROTECTION_CLASS_DEFAULT) {
4258 /*
4259 * PROTECTION_CLASS_DEFAULT implies that we make the class for this
4260 * file behave the same as open (2)
4261 */
4262 VATTR_SET(&va, va_dataprotect_class, class);
4263 }
4264 }
4265
4266 if (dpflags & (O_DP_GETRAWENCRYPTED | O_DP_GETRAWUNENCRYPTED)) {
4267 if (flags & (O_RDWR | O_WRONLY)) {
4268 /* Not allowed to write raw encrypted bytes */
4269 return EINVAL;
4270 }
4271 if (uap->dpflags & O_DP_GETRAWENCRYPTED) {
4272 VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED);
4273 }
4274 if (uap->dpflags & O_DP_GETRAWUNENCRYPTED) {
4275 VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWUNENCRYPTED);
4276 }
4277 }
4278
4279 error = open1(vfs_context_current(), &nd, uap->flags, &va,
4280 fileproc_alloc_init, NULL, retval);
4281
4282 return error;
4283 }
4284
4285 static int
4286 openat_internal(vfs_context_t ctx, user_addr_t path, int flags, int mode,
4287 int fd, enum uio_seg segflg, int *retval)
4288 {
4289 struct filedesc *fdp = (vfs_context_proc(ctx))->p_fd;
4290 struct vnode_attr va;
4291 struct nameidata nd;
4292 int cmode;
4293
4294 VATTR_INIT(&va);
4295 /* Mask off all but regular access permissions */
4296 cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
4297 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
4298
4299 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1,
4300 segflg, path, ctx);
4301
4302 return open1at(ctx, &nd, flags, &va, fileproc_alloc_init, NULL,
4303 retval, fd);
4304 }
4305
4306 int
4307 open(proc_t p, struct open_args *uap, int32_t *retval)
4308 {
4309 __pthread_testcancel(1);
4310 return open_nocancel(p, (struct open_nocancel_args *)uap, retval);
4311 }
4312
4313 int
4314 open_nocancel(__unused proc_t p, struct open_nocancel_args *uap,
4315 int32_t *retval)
4316 {
4317 return openat_internal(vfs_context_current(), uap->path, uap->flags,
4318 uap->mode, AT_FDCWD, UIO_USERSPACE, retval);
4319 }
4320
4321 int
4322 openat_nocancel(__unused proc_t p, struct openat_nocancel_args *uap,
4323 int32_t *retval)
4324 {
4325 return openat_internal(vfs_context_current(), uap->path, uap->flags,
4326 uap->mode, uap->fd, UIO_USERSPACE, retval);
4327 }
4328
4329 int
4330 openat(proc_t p, struct openat_args *uap, int32_t *retval)
4331 {
4332 __pthread_testcancel(1);
4333 return openat_nocancel(p, (struct openat_nocancel_args *)uap, retval);
4334 }
4335
4336 /*
4337 * openbyid_np: open a file given a file system id and a file system object id
4338 * the hfs file system object id is an fsobj_id_t {uint32, uint32}
4339 * file systems that don't support object ids it is a node id (uint64_t).
4340 *
4341 * Parameters: p Process requesting the open
4342 * uap User argument descriptor (see below)
4343 * retval Pointer to an area to receive the
4344 * return calue from the system call
4345 *
4346 * Indirect: uap->path Path to open (same as 'open')
4347 *
4348 * uap->fsid id of target file system
4349 * uap->objid id of target file system object
4350 * uap->flags Flags to open (same as 'open')
4351 *
4352 * Returns: 0 Success
4353 * !0 errno value
4354 *
4355 *
4356 * XXX: We should enummerate the possible errno values here, and where
4357 * in the code they originated.
4358 */
4359 int
4360 openbyid_np(__unused proc_t p, struct openbyid_np_args *uap, int *retval)
4361 {
4362 fsid_t fsid;
4363 uint64_t objid;
4364 int error;
4365 char *buf = NULL;
4366 int buflen = MAXPATHLEN;
4367 int pathlen = 0;
4368 vfs_context_t ctx = vfs_context_current();
4369
4370 if ((error = priv_check_cred(vfs_context_ucred(ctx), PRIV_VFS_OPEN_BY_ID, 0))) {
4371 return error;
4372 }
4373
4374 if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
4375 return error;
4376 }
4377
4378 /*uap->obj is an fsobj_id_t defined as struct {uint32_t, uint32_t} */
4379 if ((error = copyin(uap->objid, (caddr_t)&objid, sizeof(uint64_t)))) {
4380 return error;
4381 }
4382
4383 AUDIT_ARG(value32, fsid.val[0]);
4384 AUDIT_ARG(value64, objid);
4385
4386 /*resolve path from fsis, objid*/
4387 do {
4388 MALLOC(buf, char *, buflen + 1, M_TEMP, M_WAITOK);
4389 if (buf == NULL) {
4390 return ENOMEM;
4391 }
4392
4393 error = fsgetpath_internal( ctx, fsid.val[0], objid, buflen,
4394 buf, FSOPT_ISREALFSID, &pathlen);
4395
4396 if (error) {
4397 FREE(buf, M_TEMP);
4398 buf = NULL;
4399 }
4400 } while (error == ENOSPC && (buflen += MAXPATHLEN));
4401
4402 if (error) {
4403 return error;
4404 }
4405
4406 buf[pathlen] = 0;
4407
4408 error = openat_internal(
4409 ctx, (user_addr_t)buf, uap->oflags, 0, AT_FDCWD, UIO_SYSSPACE, retval);
4410
4411 FREE(buf, M_TEMP);
4412
4413 return error;
4414 }
4415
4416
4417 /*
4418 * Create a special file.
4419 */
4420 static int mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap);
4421
4422 int
4423 mknod(proc_t p, struct mknod_args *uap, __unused int32_t *retval)
4424 {
4425 struct vnode_attr va;
4426 vfs_context_t ctx = vfs_context_current();
4427 int error;
4428 struct nameidata nd;
4429 vnode_t vp, dvp;
4430
4431 VATTR_INIT(&va);
4432 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
4433 VATTR_SET(&va, va_rdev, uap->dev);
4434
4435 /* If it's a mknod() of a FIFO, call mkfifo1() instead */
4436 if ((uap->mode & S_IFMT) == S_IFIFO) {
4437 return mkfifo1(ctx, uap->path, &va);
4438 }
4439
4440 AUDIT_ARG(mode, uap->mode);
4441 AUDIT_ARG(value32, uap->dev);
4442
4443 if ((error = suser(vfs_context_ucred(ctx), &p->p_acflag))) {
4444 return error;
4445 }
4446 NDINIT(&nd, CREATE, OP_MKNOD, LOCKPARENT | AUDITVNPATH1,
4447 UIO_USERSPACE, uap->path, ctx);
4448 error = namei(&nd);
4449 if (error) {
4450 return error;
4451 }
4452 dvp = nd.ni_dvp;
4453 vp = nd.ni_vp;
4454
4455 if (vp != NULL) {
4456 error = EEXIST;
4457 goto out;
4458 }
4459
4460 switch (uap->mode & S_IFMT) {
4461 case S_IFCHR:
4462 VATTR_SET(&va, va_type, VCHR);
4463 break;
4464 case S_IFBLK:
4465 VATTR_SET(&va, va_type, VBLK);
4466 break;
4467 default:
4468 error = EINVAL;
4469 goto out;
4470 }
4471
4472 #if CONFIG_MACF
4473 error = mac_vnode_check_create(ctx,
4474 nd.ni_dvp, &nd.ni_cnd, &va);
4475 if (error) {
4476 goto out;
4477 }
4478 #endif
4479
4480 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0) {
4481 goto out;
4482 }
4483
4484 if ((error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx)) != 0) {
4485 goto out;
4486 }
4487
4488 if (vp) {
4489 int update_flags = 0;
4490
4491 // Make sure the name & parent pointers are hooked up
4492 if (vp->v_name == NULL) {
4493 update_flags |= VNODE_UPDATE_NAME;
4494 }
4495 if (vp->v_parent == NULLVP) {
4496 update_flags |= VNODE_UPDATE_PARENT;
4497 }
4498
4499 if (update_flags) {
4500 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
4501 }
4502
4503 #if CONFIG_FSE
4504 add_fsevent(FSE_CREATE_FILE, ctx,
4505 FSE_ARG_VNODE, vp,
4506 FSE_ARG_DONE);
4507 #endif
4508 }
4509
4510 out:
4511 /*
4512 * nameidone has to happen before we vnode_put(dvp)
4513 * since it may need to release the fs_nodelock on the dvp
4514 */
4515 nameidone(&nd);
4516
4517 if (vp) {
4518 vnode_put(vp);
4519 }
4520 vnode_put(dvp);
4521
4522 return error;
4523 }
4524
4525 /*
4526 * Create a named pipe.
4527 *
4528 * Returns: 0 Success
4529 * EEXIST
4530 * namei:???
4531 * vnode_authorize:???
4532 * vn_create:???
4533 */
4534 static int
4535 mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap)
4536 {
4537 vnode_t vp, dvp;
4538 int error;
4539 struct nameidata nd;
4540
4541 NDINIT(&nd, CREATE, OP_MKFIFO, LOCKPARENT | AUDITVNPATH1,
4542 UIO_USERSPACE, upath, ctx);
4543 error = namei(&nd);
4544 if (error) {
4545 return error;
4546 }
4547 dvp = nd.ni_dvp;
4548 vp = nd.ni_vp;
4549
4550 /* check that this is a new file and authorize addition */
4551 if (vp != NULL) {
4552 error = EEXIST;
4553 goto out;
4554 }
4555 VATTR_SET(vap, va_type, VFIFO);
4556
4557 if ((error = vn_authorize_create(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0) {
4558 goto out;
4559 }
4560
4561 error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx);
4562 out:
4563 /*
4564 * nameidone has to happen before we vnode_put(dvp)
4565 * since it may need to release the fs_nodelock on the dvp
4566 */
4567 nameidone(&nd);
4568
4569 if (vp) {
4570 vnode_put(vp);
4571 }
4572 vnode_put(dvp);
4573
4574 return error;
4575 }
4576
4577
4578 /*
4579 * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)).
4580 *
4581 * Parameters: p Process requesting the open
4582 * uap User argument descriptor (see below)
4583 * retval (Ignored)
4584 *
4585 * Indirect: uap->path Path to fifo (same as 'mkfifo')
4586 * uap->uid UID to set
4587 * uap->gid GID to set
4588 * uap->mode File mode to set (same as 'mkfifo')
4589 * uap->xsecurity ACL to set, if creating
4590 *
4591 * Returns: 0 Success
4592 * !0 errno value
4593 *
4594 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
4595 *
4596 * XXX: We should enummerate the possible errno values here, and where
4597 * in the code they originated.
4598 */
4599 int
4600 mkfifo_extended(proc_t p, struct mkfifo_extended_args *uap, __unused int32_t *retval)
4601 {
4602 int ciferror;
4603 kauth_filesec_t xsecdst;
4604 struct vnode_attr va;
4605
4606 AUDIT_ARG(owner, uap->uid, uap->gid);
4607
4608 xsecdst = KAUTH_FILESEC_NONE;
4609 if (uap->xsecurity != USER_ADDR_NULL) {
4610 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0) {
4611 return ciferror;
4612 }
4613 }
4614
4615 VATTR_INIT(&va);
4616 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
4617 if (uap->uid != KAUTH_UID_NONE) {
4618 VATTR_SET(&va, va_uid, uap->uid);
4619 }
4620 if (uap->gid != KAUTH_GID_NONE) {
4621 VATTR_SET(&va, va_gid, uap->gid);
4622 }
4623 if (xsecdst != KAUTH_FILESEC_NONE) {
4624 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
4625 }
4626
4627 ciferror = mkfifo1(vfs_context_current(), uap->path, &va);
4628
4629 if (xsecdst != KAUTH_FILESEC_NONE) {
4630 kauth_filesec_free(xsecdst);
4631 }
4632 return ciferror;
4633 }
4634
4635 /* ARGSUSED */
4636 int
4637 mkfifo(proc_t p, struct mkfifo_args *uap, __unused int32_t *retval)
4638 {
4639 struct vnode_attr va;
4640
4641 VATTR_INIT(&va);
4642 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
4643
4644 return mkfifo1(vfs_context_current(), uap->path, &va);
4645 }
4646
4647
4648 static char *
4649 my_strrchr(char *p, int ch)
4650 {
4651 char *save;
4652
4653 for (save = NULL;; ++p) {
4654 if (*p == ch) {
4655 save = p;
4656 }
4657 if (!*p) {
4658 return save;
4659 }
4660 }
4661 /* NOTREACHED */
4662 }
4663
4664 extern int safe_getpath_new(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path, int firmlink);
4665 extern int safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path);
4666 extern int safe_getpath_no_firmlink(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path);
4667
4668 int
4669 safe_getpath_new(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path, int firmlink)
4670 {
4671 int ret, len = _len;
4672
4673 *truncated_path = 0;
4674
4675 if (firmlink) {
4676 ret = vn_getpath(dvp, path, &len);
4677 } else {
4678 ret = vn_getpath_no_firmlink(dvp, path, &len);
4679 }
4680 if (ret == 0 && len < (MAXPATHLEN - 1)) {
4681 if (leafname) {
4682 path[len - 1] = '/';
4683 len += strlcpy(&path[len], leafname, MAXPATHLEN - len) + 1;
4684 if (len > MAXPATHLEN) {
4685 char *ptr;
4686
4687 // the string got truncated!
4688 *truncated_path = 1;
4689 ptr = my_strrchr(path, '/');
4690 if (ptr) {
4691 *ptr = '\0'; // chop off the string at the last directory component
4692 }
4693 len = strlen(path) + 1;
4694 }
4695 }
4696 } else if (ret == 0) {
4697 *truncated_path = 1;
4698 } else if (ret != 0) {
4699 struct vnode *mydvp = dvp;
4700
4701 if (ret != ENOSPC) {
4702 printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n",
4703 dvp, dvp->v_name ? dvp->v_name : "no-name", ret);
4704 }
4705 *truncated_path = 1;
4706
4707 do {
4708 if (mydvp->v_parent != NULL) {
4709 mydvp = mydvp->v_parent;
4710 } else if (mydvp->v_mount) {
4711 strlcpy(path, mydvp->v_mount->mnt_vfsstat.f_mntonname, _len);
4712 break;
4713 } else {
4714 // no parent and no mount point? only thing is to punt and say "/" changed
4715 strlcpy(path, "/", _len);
4716 len = 2;
4717 mydvp = NULL;
4718 }
4719
4720 if (mydvp == NULL) {
4721 break;
4722 }
4723
4724 len = _len;
4725 if (firmlink) {
4726 ret = vn_getpath(mydvp, path, &len);
4727 } else {
4728 ret = vn_getpath_no_firmlink(mydvp, path, &len);
4729 }
4730 } while (ret == ENOSPC);
4731 }
4732
4733 return len;
4734 }
4735
4736 int
4737 safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path)
4738 {
4739 return safe_getpath_new(dvp, leafname, path, _len, truncated_path, 1);
4740 }
4741
4742 int
4743 safe_getpath_no_firmlink(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path)
4744 {
4745 return safe_getpath_new(dvp, leafname, path, _len, truncated_path, 0);
4746 }
4747
4748 /*
4749 * Make a hard file link.
4750 *
4751 * Returns: 0 Success
4752 * EPERM
4753 * EEXIST
4754 * EXDEV
4755 * namei:???
4756 * vnode_authorize:???
4757 * VNOP_LINK:???
4758 */
4759 /* ARGSUSED */
4760 static int
4761 linkat_internal(vfs_context_t ctx, int fd1, user_addr_t path, int fd2,
4762 user_addr_t link, int flag, enum uio_seg segflg)
4763 {
4764 vnode_t vp, pvp, dvp, lvp;
4765 struct nameidata nd;
4766 int follow;
4767 int error;
4768 #if CONFIG_FSE
4769 fse_info finfo;
4770 #endif
4771 int need_event, has_listeners, need_kpath2;
4772 char *target_path = NULL;
4773 int truncated = 0;
4774
4775 vp = dvp = lvp = NULLVP;
4776
4777 /* look up the object we are linking to */
4778 follow = (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW;
4779 NDINIT(&nd, LOOKUP, OP_LOOKUP, AUDITVNPATH1 | follow,
4780 segflg, path, ctx);
4781
4782 error = nameiat(&nd, fd1);
4783 if (error) {
4784 return error;
4785 }
4786 vp = nd.ni_vp;
4787
4788 nameidone(&nd);
4789
4790 /*
4791 * Normally, linking to directories is not supported.
4792 * However, some file systems may have limited support.
4793 */
4794 if (vp->v_type == VDIR) {
4795 if (!ISSET(vp->v_mount->mnt_kern_flag, MNTK_DIR_HARDLINKS)) {
4796 error = EPERM; /* POSIX */
4797 goto out;
4798 }
4799
4800 /* Linking to a directory requires ownership. */
4801 if (!kauth_cred_issuser(vfs_context_ucred(ctx))) {
4802 struct vnode_attr dva;
4803
4804 VATTR_INIT(&dva);
4805 VATTR_WANTED(&dva, va_uid);
4806 if (vnode_getattr(vp, &dva, ctx) != 0 ||
4807 !VATTR_IS_SUPPORTED(&dva, va_uid) ||
4808 (dva.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)))) {
4809 error = EACCES;
4810 goto out;
4811 }
4812 }
4813 }
4814
4815 /* lookup the target node */
4816 #if CONFIG_TRIGGERS
4817 nd.ni_op = OP_LINK;
4818 #endif
4819 nd.ni_cnd.cn_nameiop = CREATE;
4820 nd.ni_cnd.cn_flags = LOCKPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK;
4821 nd.ni_dirp = link;
4822 error = nameiat(&nd, fd2);
4823 if (error != 0) {
4824 goto out;
4825 }
4826 dvp = nd.ni_dvp;
4827 lvp = nd.ni_vp;
4828
4829 #if CONFIG_MACF
4830 if ((error = mac_vnode_check_link(ctx, dvp, vp, &nd.ni_cnd)) != 0) {
4831 goto out2;
4832 }
4833 #endif
4834
4835 /* or to anything that kauth doesn't want us to (eg. immutable items) */
4836 if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_LINKTARGET, ctx)) != 0) {
4837 goto out2;
4838 }
4839
4840 /* target node must not exist */
4841 if (lvp != NULLVP) {
4842 error = EEXIST;
4843 goto out2;
4844 }
4845 /* cannot link across mountpoints */
4846 if (vnode_mount(vp) != vnode_mount(dvp)) {
4847 error = EXDEV;
4848 goto out2;
4849 }
4850
4851 /* authorize creation of the target note */
4852 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0) {
4853 goto out2;
4854 }
4855
4856 /* and finally make the link */
4857 error = VNOP_LINK(vp, dvp, &nd.ni_cnd, ctx);
4858 if (error) {
4859 goto out2;
4860 }
4861
4862 #if CONFIG_MACF
4863 (void)mac_vnode_notify_link(ctx, vp, dvp, &nd.ni_cnd);
4864 #endif
4865
4866 #if CONFIG_FSE
4867 need_event = need_fsevent(FSE_CREATE_FILE, dvp);
4868 #else
4869 need_event = 0;
4870 #endif
4871 has_listeners = kauth_authorize_fileop_has_listeners();
4872
4873 need_kpath2 = 0;
4874 #if CONFIG_AUDIT
4875 if (AUDIT_RECORD_EXISTS()) {
4876 need_kpath2 = 1;
4877 }
4878 #endif
4879
4880 if (need_event || has_listeners || need_kpath2) {
4881 char *link_to_path = NULL;
4882 int len, link_name_len;
4883
4884 /* build the path to the new link file */
4885 GET_PATH(target_path);
4886 if (target_path == NULL) {
4887 error = ENOMEM;
4888 goto out2;
4889 }
4890
4891 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, target_path, MAXPATHLEN, &truncated);
4892
4893 AUDIT_ARG(kpath, target_path, ARG_KPATH2);
4894
4895 if (has_listeners) {
4896 /* build the path to file we are linking to */
4897 GET_PATH(link_to_path);
4898 if (link_to_path == NULL) {
4899 error = ENOMEM;
4900 goto out2;
4901 }
4902
4903 link_name_len = MAXPATHLEN;
4904 if (vn_getpath(vp, link_to_path, &link_name_len) == 0) {
4905 /*
4906 * Call out to allow 3rd party notification of rename.
4907 * Ignore result of kauth_authorize_fileop call.
4908 */
4909 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_LINK,
4910 (uintptr_t)link_to_path,
4911 (uintptr_t)target_path);
4912 }
4913 if (link_to_path != NULL) {
4914 RELEASE_PATH(link_to_path);
4915 }
4916 }
4917 #if CONFIG_FSE
4918 if (need_event) {
4919 /* construct fsevent */
4920 if (get_fse_info(vp, &finfo, ctx) == 0) {
4921 if (truncated) {
4922 finfo.mode |= FSE_TRUNCATED_PATH;
4923 }
4924
4925 // build the path to the destination of the link
4926 add_fsevent(FSE_CREATE_FILE, ctx,
4927 FSE_ARG_STRING, len, target_path,
4928 FSE_ARG_FINFO, &finfo,
4929 FSE_ARG_DONE);
4930 }
4931
4932 pvp = vp->v_parent;
4933 // need an iocount on pvp in this case
4934 if (pvp && pvp != dvp) {
4935 error = vnode_get(pvp);
4936 if (error) {
4937 pvp = NULLVP;
4938 error = 0;
4939 }
4940 }
4941 if (pvp) {
4942 add_fsevent(FSE_STAT_CHANGED, ctx,
4943 FSE_ARG_VNODE, pvp, FSE_ARG_DONE);
4944 }
4945 if (pvp && pvp != dvp) {
4946 vnode_put(pvp);
4947 }
4948 }
4949 #endif
4950 }
4951 out2:
4952 /*
4953 * nameidone has to happen before we vnode_put(dvp)
4954 * since it may need to release the fs_nodelock on the dvp
4955 */
4956 nameidone(&nd);
4957 if (target_path != NULL) {
4958 RELEASE_PATH(target_path);
4959 }
4960 out:
4961 if (lvp) {
4962 vnode_put(lvp);
4963 }
4964 if (dvp) {
4965 vnode_put(dvp);
4966 }
4967 vnode_put(vp);
4968 return error;
4969 }
4970
4971 int
4972 link(__unused proc_t p, struct link_args *uap, __unused int32_t *retval)
4973 {
4974 return linkat_internal(vfs_context_current(), AT_FDCWD, uap->path,
4975 AT_FDCWD, uap->link, AT_SYMLINK_FOLLOW, UIO_USERSPACE);
4976 }
4977
4978 int
4979 linkat(__unused proc_t p, struct linkat_args *uap, __unused int32_t *retval)
4980 {
4981 if (uap->flag & ~AT_SYMLINK_FOLLOW) {
4982 return EINVAL;
4983 }
4984
4985 return linkat_internal(vfs_context_current(), uap->fd1, uap->path,
4986 uap->fd2, uap->link, uap->flag, UIO_USERSPACE);
4987 }
4988
4989 /*
4990 * Make a symbolic link.
4991 *
4992 * We could add support for ACLs here too...
4993 */
4994 /* ARGSUSED */
4995 static int
4996 symlinkat_internal(vfs_context_t ctx, user_addr_t path_data, int fd,
4997 user_addr_t link, enum uio_seg segflg)
4998 {
4999 struct vnode_attr va;
5000 char *path;
5001 int error;
5002 struct nameidata nd;
5003 vnode_t vp, dvp;
5004 size_t dummy = 0;
5005 proc_t p;
5006
5007 error = 0;
5008 if (UIO_SEG_IS_USER_SPACE(segflg)) {
5009 MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
5010 error = copyinstr(path_data, path, MAXPATHLEN, &dummy);
5011 } else {
5012 path = (char *)path_data;
5013 }
5014 if (error) {
5015 goto out;
5016 }
5017 AUDIT_ARG(text, path); /* This is the link string */
5018
5019 NDINIT(&nd, CREATE, OP_SYMLINK, LOCKPARENT | AUDITVNPATH1,
5020 segflg, link, ctx);
5021
5022 error = nameiat(&nd, fd);
5023 if (error) {
5024 goto out;
5025 }
5026 dvp = nd.ni_dvp;
5027 vp = nd.ni_vp;
5028
5029 p = vfs_context_proc(ctx);
5030 VATTR_INIT(&va);
5031 VATTR_SET(&va, va_type, VLNK);
5032 VATTR_SET(&va, va_mode, ACCESSPERMS & ~p->p_fd->fd_cmask);
5033
5034 #if CONFIG_MACF
5035 error = mac_vnode_check_create(ctx,
5036 dvp, &nd.ni_cnd, &va);
5037 #endif
5038 if (error != 0) {
5039 goto skipit;
5040 }
5041
5042 if (vp != NULL) {
5043 error = EEXIST;
5044 goto skipit;
5045 }
5046
5047 /* authorize */
5048 if (error == 0) {
5049 error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
5050 }
5051 /* get default ownership, etc. */
5052 if (error == 0) {
5053 error = vnode_authattr_new(dvp, &va, 0, ctx);
5054 }
5055 if (error == 0) {
5056 error = VNOP_SYMLINK(dvp, &vp, &nd.ni_cnd, &va, path, ctx);
5057 }
5058
5059 #if CONFIG_MACF
5060 if (error == 0 && vp) {
5061 error = vnode_label(vnode_mount(vp), dvp, vp, &nd.ni_cnd, VNODE_LABEL_CREATE, ctx);
5062 }
5063 #endif
5064
5065 /* do fallback attribute handling */
5066 if (error == 0 && vp) {
5067 error = vnode_setattr_fallback(vp, &va, ctx);
5068 }
5069
5070 if (error == 0) {
5071 int update_flags = 0;
5072
5073 /*check if a new vnode was created, else try to get one*/
5074 if (vp == NULL) {
5075 nd.ni_cnd.cn_nameiop = LOOKUP;
5076 #if CONFIG_TRIGGERS
5077 nd.ni_op = OP_LOOKUP;
5078 #endif
5079 nd.ni_cnd.cn_flags = 0;
5080 error = nameiat(&nd, fd);
5081 vp = nd.ni_vp;
5082
5083 if (vp == NULL) {
5084 goto skipit;
5085 }
5086 }
5087
5088 #if 0 /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
5089 /* call out to allow 3rd party notification of rename.
5090 * Ignore result of kauth_authorize_fileop call.
5091 */
5092 if (kauth_authorize_fileop_has_listeners() &&
5093 namei(&nd) == 0) {
5094 char *new_link_path = NULL;
5095 int len;
5096
5097 /* build the path to the new link file */
5098 new_link_path = get_pathbuff();
5099 len = MAXPATHLEN;
5100 vn_getpath(dvp, new_link_path, &len);
5101 if ((len + 1 + nd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) {
5102 new_link_path[len - 1] = '/';
5103 strlcpy(&new_link_path[len], nd.ni_cnd.cn_nameptr, MAXPATHLEN - len);
5104 }
5105
5106 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_SYMLINK,
5107 (uintptr_t)path, (uintptr_t)new_link_path);
5108 if (new_link_path != NULL) {
5109 release_pathbuff(new_link_path);
5110 }
5111 }
5112 #endif
5113 // Make sure the name & parent pointers are hooked up
5114 if (vp->v_name == NULL) {
5115 update_flags |= VNODE_UPDATE_NAME;
5116 }
5117 if (vp->v_parent == NULLVP) {
5118 update_flags |= VNODE_UPDATE_PARENT;
5119 }
5120
5121 if (update_flags) {
5122 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
5123 }
5124
5125 #if CONFIG_FSE
5126 add_fsevent(FSE_CREATE_FILE, ctx,
5127 FSE_ARG_VNODE, vp,
5128 FSE_ARG_DONE);
5129 #endif
5130 }
5131
5132 skipit:
5133 /*
5134 * nameidone has to happen before we vnode_put(dvp)
5135 * since it may need to release the fs_nodelock on the dvp
5136 */
5137 nameidone(&nd);
5138
5139 if (vp) {
5140 vnode_put(vp);
5141 }
5142 vnode_put(dvp);
5143 out:
5144 if (path && (path != (char *)path_data)) {
5145 FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
5146 }
5147
5148 return error;
5149 }
5150
5151 int
5152 symlink(__unused proc_t p, struct symlink_args *uap, __unused int32_t *retval)
5153 {
5154 return symlinkat_internal(vfs_context_current(), uap->path, AT_FDCWD,
5155 uap->link, UIO_USERSPACE);
5156 }
5157
5158 int
5159 symlinkat(__unused proc_t p, struct symlinkat_args *uap,
5160 __unused int32_t *retval)
5161 {
5162 return symlinkat_internal(vfs_context_current(), uap->path1, uap->fd,
5163 uap->path2, UIO_USERSPACE);
5164 }
5165
5166 /*
5167 * Delete a whiteout from the filesystem.
5168 * No longer supported.
5169 */
5170 int
5171 undelete(__unused proc_t p, __unused struct undelete_args *uap, __unused int32_t *retval)
5172 {
5173 return ENOTSUP;
5174 }
5175
5176 /*
5177 * Delete a name from the filesystem.
5178 */
5179 /* ARGSUSED */
5180 static int
5181 unlinkat_internal(vfs_context_t ctx, int fd, vnode_t start_dvp,
5182 user_addr_t path_arg, enum uio_seg segflg, int unlink_flags)
5183 {
5184 struct nameidata nd;
5185 vnode_t vp, dvp;
5186 int error;
5187 struct componentname *cnp;
5188 char *path = NULL;
5189 char *no_firmlink_path = NULL;
5190 int len_path = 0;
5191 int len_no_firmlink_path = 0;
5192 #if CONFIG_FSE
5193 fse_info finfo;
5194 struct vnode_attr va;
5195 #endif
5196 int flags;
5197 int need_event;
5198 int has_listeners;
5199 int truncated_path;
5200 int truncated_no_firmlink_path;
5201 int batched;
5202 struct vnode_attr *vap;
5203 int do_retry;
5204 int retry_count = 0;
5205 int cn_flags;
5206
5207 cn_flags = LOCKPARENT;
5208 if (!(unlink_flags & VNODE_REMOVE_NO_AUDIT_PATH)) {
5209 cn_flags |= AUDITVNPATH1;
5210 }
5211 /* If a starting dvp is passed, it trumps any fd passed. */
5212 if (start_dvp) {
5213 cn_flags |= USEDVP;
5214 }
5215
5216 #if NAMEDRSRCFORK
5217 /* unlink or delete is allowed on rsrc forks and named streams */
5218 cn_flags |= CN_ALLOWRSRCFORK;
5219 #endif
5220
5221 retry:
5222 do_retry = 0;
5223 flags = 0;
5224 need_event = 0;
5225 has_listeners = 0;
5226 truncated_path = 0;
5227 truncated_no_firmlink_path = 0;
5228 vap = NULL;
5229
5230 NDINIT(&nd, DELETE, OP_UNLINK, cn_flags, segflg, path_arg, ctx);
5231
5232 nd.ni_dvp = start_dvp;
5233 nd.ni_flag |= NAMEI_COMPOUNDREMOVE;
5234 cnp = &nd.ni_cnd;
5235
5236 continue_lookup:
5237 error = nameiat(&nd, fd);
5238 if (error) {
5239 return error;
5240 }
5241
5242 dvp = nd.ni_dvp;
5243 vp = nd.ni_vp;
5244
5245
5246 /* With Carbon delete semantics, busy files cannot be deleted */
5247 if (unlink_flags & VNODE_REMOVE_NODELETEBUSY) {
5248 flags |= VNODE_REMOVE_NODELETEBUSY;
5249 }
5250
5251 /* Skip any potential upcalls if told to. */
5252 if (unlink_flags & VNODE_REMOVE_SKIP_NAMESPACE_EVENT) {
5253 flags |= VNODE_REMOVE_SKIP_NAMESPACE_EVENT;
5254 }
5255
5256 if (vp) {
5257 batched = vnode_compound_remove_available(vp);
5258 /*
5259 * The root of a mounted filesystem cannot be deleted.
5260 */
5261 if ((vp->v_flag & VROOT) || (dvp->v_mount != vp->v_mount)) {
5262 error = EBUSY;
5263 goto out;
5264 }
5265
5266 #if DEVELOPMENT || DEBUG
5267 /*
5268 * XXX VSWAP: Check for entitlements or special flag here
5269 * so we can restrict access appropriately.
5270 */
5271 #else /* DEVELOPMENT || DEBUG */
5272
5273 if (vnode_isswap(vp) && (ctx != vfs_context_kernel())) {
5274 error = EPERM;
5275 goto out;
5276 }
5277 #endif /* DEVELOPMENT || DEBUG */
5278
5279 if (!batched) {
5280 error = vn_authorize_unlink(dvp, vp, cnp, ctx, NULL);
5281 if (error) {
5282 if (error == ENOENT) {
5283 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
5284 do_retry = 1;
5285 retry_count++;
5286 }
5287 }
5288 goto out;
5289 }
5290 }
5291 } else {
5292 batched = 1;
5293
5294 if (!vnode_compound_remove_available(dvp)) {
5295 panic("No vp, but no compound remove?");
5296 }
5297 }
5298
5299 #if CONFIG_FSE
5300 need_event = need_fsevent(FSE_DELETE, dvp);
5301 if (need_event) {
5302 if (!batched) {
5303 if ((vp->v_flag & VISHARDLINK) == 0) {
5304 /* XXX need to get these data in batched VNOP */
5305 get_fse_info(vp, &finfo, ctx);
5306 }
5307 } else {
5308 error = vfs_get_notify_attributes(&va);
5309 if (error) {
5310 goto out;
5311 }
5312
5313 vap = &va;
5314 }
5315 }
5316 #endif
5317 has_listeners = kauth_authorize_fileop_has_listeners();
5318 if (need_event || has_listeners) {
5319 if (path == NULL) {
5320 GET_PATH(path);
5321 if (path == NULL) {
5322 error = ENOMEM;
5323 goto out;
5324 }
5325 }
5326 len_path = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated_path);
5327 if (no_firmlink_path == NULL) {
5328 GET_PATH(no_firmlink_path);
5329 if (no_firmlink_path == NULL) {
5330 error = ENOMEM;
5331 goto out;
5332 }
5333 }
5334 len_no_firmlink_path = safe_getpath_no_firmlink(dvp, nd.ni_cnd.cn_nameptr, no_firmlink_path, MAXPATHLEN, &truncated_no_firmlink_path);
5335 }
5336
5337 #if NAMEDRSRCFORK
5338 if (nd.ni_cnd.cn_flags & CN_WANTSRSRCFORK) {
5339 error = vnode_removenamedstream(dvp, vp, XATTR_RESOURCEFORK_NAME, 0, ctx);
5340 } else
5341 #endif
5342 {
5343 error = vn_remove(dvp, &nd.ni_vp, &nd, flags, vap, ctx);
5344 vp = nd.ni_vp;
5345 if (error == EKEEPLOOKING) {
5346 if (!batched) {
5347 panic("EKEEPLOOKING, but not a filesystem that supports compound VNOPs?");
5348 }
5349
5350 if ((nd.ni_flag & NAMEI_CONTLOOKUP) == 0) {
5351 panic("EKEEPLOOKING, but continue flag not set?");
5352 }
5353
5354 if (vnode_isdir(vp)) {
5355 error = EISDIR;
5356 goto out;
5357 }
5358 goto continue_lookup;
5359 } else if (error == ENOENT && batched) {
5360 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
5361 /*
5362 * For compound VNOPs, the authorization callback may
5363 * return ENOENT in case of racing hardlink lookups
5364 * hitting the name cache, redrive the lookup.
5365 */
5366 do_retry = 1;
5367 retry_count += 1;
5368 goto out;
5369 }
5370 }
5371 }
5372
5373 /*
5374 * Call out to allow 3rd party notification of delete.
5375 * Ignore result of kauth_authorize_fileop call.
5376 */
5377 if (!error) {
5378 if (has_listeners) {
5379 kauth_authorize_fileop(vfs_context_ucred(ctx),
5380 KAUTH_FILEOP_DELETE,
5381 (uintptr_t)vp,
5382 (uintptr_t)path);
5383 }
5384
5385 if (vp->v_flag & VISHARDLINK) {
5386 //
5387 // if a hardlink gets deleted we want to blow away the
5388 // v_parent link because the path that got us to this
5389 // instance of the link is no longer valid. this will
5390 // force the next call to get the path to ask the file
5391 // system instead of just following the v_parent link.
5392 //
5393 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
5394 }
5395
5396 #if CONFIG_FSE
5397 if (need_event) {
5398 if (vp->v_flag & VISHARDLINK) {
5399 get_fse_info(vp, &finfo, ctx);
5400 } else if (vap) {
5401 vnode_get_fse_info_from_vap(vp, &finfo, vap);
5402 }
5403 if (truncated_path) {
5404 finfo.mode |= FSE_TRUNCATED_PATH;
5405 }
5406 add_fsevent(FSE_DELETE, ctx,
5407 FSE_ARG_STRING, len_no_firmlink_path, no_firmlink_path,
5408 FSE_ARG_FINFO, &finfo,
5409 FSE_ARG_DONE);
5410 }
5411 #endif
5412 }
5413
5414 out:
5415 if (path != NULL) {
5416 RELEASE_PATH(path);
5417 path = NULL;
5418 }
5419
5420 if (no_firmlink_path != NULL) {
5421 RELEASE_PATH(no_firmlink_path);
5422 no_firmlink_path = NULL;
5423 }
5424 #if NAMEDRSRCFORK
5425 /* recycle the deleted rsrc fork vnode to force a reclaim, which
5426 * will cause its shadow file to go away if necessary.
5427 */
5428 if (vp && (vnode_isnamedstream(vp)) &&
5429 (vp->v_parent != NULLVP) &&
5430 vnode_isshadow(vp)) {
5431 vnode_recycle(vp);
5432 }
5433 #endif
5434 /*
5435 * nameidone has to happen before we vnode_put(dvp)
5436 * since it may need to release the fs_nodelock on the dvp
5437 */
5438 nameidone(&nd);
5439 vnode_put(dvp);
5440 if (vp) {
5441 vnode_put(vp);
5442 }
5443
5444 if (do_retry) {
5445 goto retry;
5446 }
5447
5448 return error;
5449 }
5450
5451 int
5452 unlink1(vfs_context_t ctx, vnode_t start_dvp, user_addr_t path_arg,
5453 enum uio_seg segflg, int unlink_flags)
5454 {
5455 return unlinkat_internal(ctx, AT_FDCWD, start_dvp, path_arg, segflg,
5456 unlink_flags);
5457 }
5458
5459 /*
5460 * Delete a name from the filesystem using Carbon semantics.
5461 */
5462 int
5463 delete(__unused proc_t p, struct delete_args *uap, __unused int32_t *retval)
5464 {
5465 return unlinkat_internal(vfs_context_current(), AT_FDCWD, NULLVP,
5466 uap->path, UIO_USERSPACE, VNODE_REMOVE_NODELETEBUSY);
5467 }
5468
5469 /*
5470 * Delete a name from the filesystem using POSIX semantics.
5471 */
5472 int
5473 unlink(__unused proc_t p, struct unlink_args *uap, __unused int32_t *retval)
5474 {
5475 return unlinkat_internal(vfs_context_current(), AT_FDCWD, NULLVP,
5476 uap->path, UIO_USERSPACE, 0);
5477 }
5478
5479 int
5480 unlinkat(__unused proc_t p, struct unlinkat_args *uap, __unused int32_t *retval)
5481 {
5482 if (uap->flag & ~(AT_REMOVEDIR | AT_REMOVEDIR_DATALESS)) {
5483 return EINVAL;
5484 }
5485
5486 if (uap->flag & (AT_REMOVEDIR | AT_REMOVEDIR_DATALESS)) {
5487 int unlink_flags = 0;
5488
5489 if (uap->flag & AT_REMOVEDIR_DATALESS) {
5490 unlink_flags |= VNODE_REMOVE_DATALESS_DIR;
5491 }
5492 return rmdirat_internal(vfs_context_current(), uap->fd,
5493 uap->path, UIO_USERSPACE, unlink_flags);
5494 } else {
5495 return unlinkat_internal(vfs_context_current(), uap->fd,
5496 NULLVP, uap->path, UIO_USERSPACE, 0);
5497 }
5498 }
5499
5500 /*
5501 * Reposition read/write file offset.
5502 */
5503 int
5504 lseek(proc_t p, struct lseek_args *uap, off_t *retval)
5505 {
5506 struct fileproc *fp;
5507 vnode_t vp;
5508 struct vfs_context *ctx;
5509 off_t offset = uap->offset, file_size;
5510 int error;
5511
5512 if ((error = fp_getfvp(p, uap->fd, &fp, &vp))) {
5513 if (error == ENOTSUP) {
5514 return ESPIPE;
5515 }
5516 return error;
5517 }
5518 if (vnode_isfifo(vp)) {
5519 file_drop(uap->fd);
5520 return ESPIPE;
5521 }
5522
5523
5524 ctx = vfs_context_current();
5525 #if CONFIG_MACF
5526 if (uap->whence == L_INCR && uap->offset == 0) {
5527 error = mac_file_check_get_offset(vfs_context_ucred(ctx),
5528 fp->f_fglob);
5529 } else {
5530 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
5531 fp->f_fglob);
5532 }
5533 if (error) {
5534 file_drop(uap->fd);
5535 return error;
5536 }
5537 #endif
5538 if ((error = vnode_getwithref(vp))) {
5539 file_drop(uap->fd);
5540 return error;
5541 }
5542
5543 switch (uap->whence) {
5544 case L_INCR:
5545 offset += fp->f_fglob->fg_offset;
5546 break;
5547 case L_XTND:
5548 if ((error = vnode_size(vp, &file_size, ctx)) != 0) {
5549 break;
5550 }
5551 offset += file_size;
5552 break;
5553 case L_SET:
5554 break;
5555 case SEEK_HOLE:
5556 error = VNOP_IOCTL(vp, FSIOC_FIOSEEKHOLE, (caddr_t)&offset, 0, ctx);
5557 break;
5558 case SEEK_DATA:
5559 error = VNOP_IOCTL(vp, FSIOC_FIOSEEKDATA, (caddr_t)&offset, 0, ctx);
5560 break;
5561 default:
5562 error = EINVAL;
5563 }
5564 if (error == 0) {
5565 if (uap->offset > 0 && offset < 0) {
5566 /* Incremented/relative move past max size */
5567 error = EOVERFLOW;
5568 } else {
5569 /*
5570 * Allow negative offsets on character devices, per
5571 * POSIX 1003.1-2001. Most likely for writing disk
5572 * labels.
5573 */
5574 if (offset < 0 && vp->v_type != VCHR) {
5575 /* Decremented/relative move before start */
5576 error = EINVAL;
5577 } else {
5578 /* Success */
5579 fp->f_fglob->fg_offset = offset;
5580 *retval = fp->f_fglob->fg_offset;
5581 }
5582 }
5583 }
5584
5585 /*
5586 * An lseek can affect whether data is "available to read." Use
5587 * hint of NOTE_NONE so no EVFILT_VNODE events fire
5588 */
5589 post_event_if_success(vp, error, NOTE_NONE);
5590 (void)vnode_put(vp);
5591 file_drop(uap->fd);
5592 return error;
5593 }
5594
5595
5596 /*
5597 * Check access permissions.
5598 *
5599 * Returns: 0 Success
5600 * vnode_authorize:???
5601 */
5602 static int
5603 access1(vnode_t vp, vnode_t dvp, int uflags, vfs_context_t ctx)
5604 {
5605 kauth_action_t action;
5606 int error;
5607
5608 /*
5609 * If just the regular access bits, convert them to something
5610 * that vnode_authorize will understand.
5611 */
5612 if (!(uflags & _ACCESS_EXTENDED_MASK)) {
5613 action = 0;
5614 if (uflags & R_OK) {
5615 action |= KAUTH_VNODE_READ_DATA; /* aka KAUTH_VNODE_LIST_DIRECTORY */
5616 }
5617 if (uflags & W_OK) {
5618 if (vnode_isdir(vp)) {
5619 action |= KAUTH_VNODE_ADD_FILE |
5620 KAUTH_VNODE_ADD_SUBDIRECTORY;
5621 /* might want delete rights here too */
5622 } else {
5623 action |= KAUTH_VNODE_WRITE_DATA;
5624 }
5625 }
5626 if (uflags & X_OK) {
5627 if (vnode_isdir(vp)) {
5628 action |= KAUTH_VNODE_SEARCH;
5629 } else {
5630 action |= KAUTH_VNODE_EXECUTE;
5631 }
5632 }
5633 } else {
5634 /* take advantage of definition of uflags */
5635 action = uflags >> 8;
5636 }
5637
5638 #if CONFIG_MACF
5639 error = mac_vnode_check_access(ctx, vp, uflags);
5640 if (error) {
5641 return error;
5642 }
5643 #endif /* MAC */
5644
5645 /* action == 0 means only check for existence */
5646 if (action != 0) {
5647 error = vnode_authorize(vp, dvp, action | KAUTH_VNODE_ACCESS, ctx);
5648 } else {
5649 error = 0;
5650 }
5651
5652 return error;
5653 }
5654
5655
5656
5657 /*
5658 * access_extended: Check access permissions in bulk.
5659 *
5660 * Description: uap->entries Pointer to an array of accessx
5661 * descriptor structs, plus one or
5662 * more NULL terminated strings (see
5663 * "Notes" section below).
5664 * uap->size Size of the area pointed to by
5665 * uap->entries.
5666 * uap->results Pointer to the results array.
5667 *
5668 * Returns: 0 Success
5669 * ENOMEM Insufficient memory
5670 * EINVAL Invalid arguments
5671 * namei:EFAULT Bad address
5672 * namei:ENAMETOOLONG Filename too long
5673 * namei:ENOENT No such file or directory
5674 * namei:ELOOP Too many levels of symbolic links
5675 * namei:EBADF Bad file descriptor
5676 * namei:ENOTDIR Not a directory
5677 * namei:???
5678 * access1:
5679 *
5680 * Implicit returns:
5681 * uap->results Array contents modified
5682 *
5683 * Notes: The uap->entries are structured as an arbitrary length array
5684 * of accessx descriptors, followed by one or more NULL terminated
5685 * strings
5686 *
5687 * struct accessx_descriptor[0]
5688 * ...
5689 * struct accessx_descriptor[n]
5690 * char name_data[0];
5691 *
5692 * We determine the entry count by walking the buffer containing
5693 * the uap->entries argument descriptor. For each descriptor we
5694 * see, the valid values for the offset ad_name_offset will be
5695 * in the byte range:
5696 *
5697 * [ uap->entries + sizeof(struct accessx_descriptor) ]
5698 * to
5699 * [ uap->entries + uap->size - 2 ]
5700 *
5701 * since we must have at least one string, and the string must
5702 * be at least one character plus the NULL terminator in length.
5703 *
5704 * XXX: Need to support the check-as uid argument
5705 */
5706 int
5707 access_extended(__unused proc_t p, struct access_extended_args *uap, __unused int32_t *retval)
5708 {
5709 struct accessx_descriptor *input = NULL;
5710 errno_t *result = NULL;
5711 errno_t error = 0;
5712 int wantdelete = 0;
5713 unsigned int desc_max, desc_actual, i, j;
5714 struct vfs_context context;
5715 struct nameidata nd;
5716 int niopts;
5717 vnode_t vp = NULL;
5718 vnode_t dvp = NULL;
5719 #define ACCESSX_MAX_DESCR_ON_STACK 10
5720 struct accessx_descriptor stack_input[ACCESSX_MAX_DESCR_ON_STACK];
5721
5722 context.vc_ucred = NULL;
5723
5724 /*
5725 * Validate parameters; if valid, copy the descriptor array and string
5726 * arguments into local memory. Before proceeding, the following
5727 * conditions must have been met:
5728 *
5729 * o The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
5730 * o There must be sufficient room in the request for at least one
5731 * descriptor and a one yte NUL terminated string.
5732 * o The allocation of local storage must not fail.
5733 */
5734 if (uap->size > ACCESSX_MAX_TABLESIZE) {
5735 return ENOMEM;
5736 }
5737 if (uap->size < (sizeof(struct accessx_descriptor) + 2)) {
5738 return EINVAL;
5739 }
5740 if (uap->size <= sizeof(stack_input)) {
5741 input = stack_input;
5742 } else {
5743 MALLOC(input, struct accessx_descriptor *, uap->size, M_TEMP, M_WAITOK);
5744 if (input == NULL) {
5745 error = ENOMEM;
5746 goto out;
5747 }
5748 }
5749 error = copyin(uap->entries, input, uap->size);
5750 if (error) {
5751 goto out;
5752 }
5753
5754 AUDIT_ARG(opaque, input, uap->size);
5755
5756 /*
5757 * Force NUL termination of the copyin buffer to avoid nami() running
5758 * off the end. If the caller passes us bogus data, they may get a
5759 * bogus result.
5760 */
5761 ((char *)input)[uap->size - 1] = 0;
5762
5763 /*
5764 * Access is defined as checking against the process' real identity,
5765 * even if operations are checking the effective identity. This
5766 * requires that we use a local vfs context.
5767 */
5768 context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
5769 context.vc_thread = current_thread();
5770
5771 /*
5772 * Find out how many entries we have, so we can allocate the result
5773 * array by walking the list and adjusting the count downward by the
5774 * earliest string offset we see.
5775 */
5776 desc_max = (uap->size - 2) / sizeof(struct accessx_descriptor);
5777 desc_actual = desc_max;
5778 for (i = 0; i < desc_actual; i++) {
5779 /*
5780 * Take the offset to the name string for this entry and
5781 * convert to an input array index, which would be one off
5782 * the end of the array if this entry was the lowest-addressed
5783 * name string.
5784 */
5785 j = input[i].ad_name_offset / sizeof(struct accessx_descriptor);
5786
5787 /*
5788 * An offset greater than the max allowable offset is an error.
5789 * It is also an error for any valid entry to point
5790 * to a location prior to the end of the current entry, if
5791 * it's not a reference to the string of the previous entry.
5792 */
5793 if (j > desc_max || (j != 0 && j <= i)) {
5794 error = EINVAL;
5795 goto out;
5796 }
5797
5798 /* Also do not let ad_name_offset point to something beyond the size of the input */
5799 if (input[i].ad_name_offset >= uap->size) {
5800 error = EINVAL;
5801 goto out;
5802 }
5803
5804 /*
5805 * An offset of 0 means use the previous descriptor's offset;
5806 * this is used to chain multiple requests for the same file
5807 * to avoid multiple lookups.
5808 */
5809 if (j == 0) {
5810 /* This is not valid for the first entry */
5811 if (i == 0) {
5812 error = EINVAL;
5813 goto out;
5814 }
5815 continue;
5816 }
5817
5818 /*
5819 * If the offset of the string for this descriptor is before
5820 * what we believe is the current actual last descriptor,
5821 * then we need to adjust our estimate downward; this permits
5822 * the string table following the last descriptor to be out
5823 * of order relative to the descriptor list.
5824 */
5825 if (j < desc_actual) {
5826 desc_actual = j;
5827 }
5828 }
5829
5830 /*
5831 * We limit the actual number of descriptors we are willing to process
5832 * to a hard maximum of ACCESSX_MAX_DESCRIPTORS. If the number being
5833 * requested does not exceed this limit,
5834 */
5835 if (desc_actual > ACCESSX_MAX_DESCRIPTORS) {
5836 error = ENOMEM;
5837 goto out;
5838 }
5839 MALLOC(result, errno_t *, desc_actual * sizeof(errno_t), M_TEMP, M_WAITOK | M_ZERO);
5840 if (result == NULL) {
5841 error = ENOMEM;
5842 goto out;
5843 }
5844
5845 /*
5846 * Do the work by iterating over the descriptor entries we know to
5847 * at least appear to contain valid data.
5848 */
5849 error = 0;
5850 for (i = 0; i < desc_actual; i++) {
5851 /*
5852 * If the ad_name_offset is 0, then we use the previous
5853 * results to make the check; otherwise, we are looking up
5854 * a new file name.
5855 */
5856 if (input[i].ad_name_offset != 0) {
5857 /* discard old vnodes */
5858 if (vp) {
5859 vnode_put(vp);
5860 vp = NULL;
5861 }
5862 if (dvp) {
5863 vnode_put(dvp);
5864 dvp = NULL;
5865 }
5866
5867 /*
5868 * Scan forward in the descriptor list to see if we
5869 * need the parent vnode. We will need it if we are
5870 * deleting, since we must have rights to remove
5871 * entries in the parent directory, as well as the
5872 * rights to delete the object itself.
5873 */
5874 wantdelete = input[i].ad_flags & _DELETE_OK;
5875 for (j = i + 1; (j < desc_actual) && (input[j].ad_name_offset == 0); j++) {
5876 if (input[j].ad_flags & _DELETE_OK) {
5877 wantdelete = 1;
5878 }
5879 }
5880
5881 niopts = FOLLOW | AUDITVNPATH1;
5882
5883 /* need parent for vnode_authorize for deletion test */
5884 if (wantdelete) {
5885 niopts |= WANTPARENT;
5886 }
5887
5888 /* do the lookup */
5889 NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, UIO_SYSSPACE,
5890 CAST_USER_ADDR_T(((const char *)input) + input[i].ad_name_offset),
5891 &context);
5892 error = namei(&nd);
5893 if (!error) {
5894 vp = nd.ni_vp;
5895 if (wantdelete) {
5896 dvp = nd.ni_dvp;
5897 }
5898 }
5899 nameidone(&nd);
5900 }
5901
5902 /*
5903 * Handle lookup errors.
5904 */
5905 switch (error) {
5906 case ENOENT:
5907 case EACCES:
5908 case EPERM:
5909 case ENOTDIR:
5910 result[i] = error;
5911 break;
5912 case 0:
5913 /* run this access check */
5914 result[i] = access1(vp, dvp, input[i].ad_flags, &context);
5915 break;
5916 default:
5917 /* fatal lookup error */
5918
5919 goto out;
5920 }
5921 }
5922
5923 AUDIT_ARG(data, result, sizeof(errno_t), desc_actual);
5924
5925 /* copy out results */
5926 error = copyout(result, uap->results, desc_actual * sizeof(errno_t));
5927
5928 out:
5929 if (input && input != stack_input) {
5930 FREE(input, M_TEMP);
5931 }
5932 if (result) {
5933 FREE(result, M_TEMP);
5934 }
5935 if (vp) {
5936 vnode_put(vp);
5937 }
5938 if (dvp) {
5939 vnode_put(dvp);
5940 }
5941 if (IS_VALID_CRED(context.vc_ucred)) {
5942 kauth_cred_unref(&context.vc_ucred);
5943 }
5944 return error;
5945 }
5946
5947
5948 /*
5949 * Returns: 0 Success
5950 * namei:EFAULT Bad address
5951 * namei:ENAMETOOLONG Filename too long
5952 * namei:ENOENT No such file or directory
5953 * namei:ELOOP Too many levels of symbolic links
5954 * namei:EBADF Bad file descriptor
5955 * namei:ENOTDIR Not a directory
5956 * namei:???
5957 * access1:
5958 */
5959 static int
5960 faccessat_internal(vfs_context_t ctx, int fd, user_addr_t path, int amode,
5961 int flag, enum uio_seg segflg)
5962 {
5963 int error;
5964 struct nameidata nd;
5965 int niopts;
5966 struct vfs_context context;
5967 #if NAMEDRSRCFORK
5968 int is_namedstream = 0;
5969 #endif
5970
5971 /*
5972 * Unless the AT_EACCESS option is used, Access is defined as checking
5973 * against the process' real identity, even if operations are checking
5974 * the effective identity. So we need to tweak the credential
5975 * in the context for that case.
5976 */
5977 if (!(flag & AT_EACCESS)) {
5978 context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
5979 } else {
5980 context.vc_ucred = ctx->vc_ucred;
5981 }
5982 context.vc_thread = ctx->vc_thread;
5983
5984
5985 niopts = (flag & AT_SYMLINK_NOFOLLOW ? NOFOLLOW : FOLLOW) | AUDITVNPATH1;
5986 /* need parent for vnode_authorize for deletion test */
5987 if (amode & _DELETE_OK) {
5988 niopts |= WANTPARENT;
5989 }
5990 NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, segflg,
5991 path, &context);
5992
5993 #if NAMEDRSRCFORK
5994 /* access(F_OK) calls are allowed for resource forks. */
5995 if (amode == F_OK) {
5996 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
5997 }
5998 #endif
5999 error = nameiat(&nd, fd);
6000 if (error) {
6001 goto out;
6002 }
6003
6004 #if NAMEDRSRCFORK
6005 /* Grab reference on the shadow stream file vnode to
6006 * force an inactive on release which will mark it
6007 * for recycle.
6008 */
6009 if (vnode_isnamedstream(nd.ni_vp) &&
6010 (nd.ni_vp->v_parent != NULLVP) &&
6011 vnode_isshadow(nd.ni_vp)) {
6012 is_namedstream = 1;
6013 vnode_ref(nd.ni_vp);
6014 }
6015 #endif
6016
6017 error = access1(nd.ni_vp, nd.ni_dvp, amode, &context);
6018
6019 #if NAMEDRSRCFORK
6020 if (is_namedstream) {
6021 vnode_rele(nd.ni_vp);
6022 }
6023 #endif
6024
6025 vnode_put(nd.ni_vp);
6026 if (amode & _DELETE_OK) {
6027 vnode_put(nd.ni_dvp);
6028 }
6029 nameidone(&nd);
6030
6031 out:
6032 if (!(flag & AT_EACCESS)) {
6033 kauth_cred_unref(&context.vc_ucred);
6034 }
6035 return error;
6036 }
6037
6038 int
6039 access(__unused proc_t p, struct access_args *uap, __unused int32_t *retval)
6040 {
6041 return faccessat_internal(vfs_context_current(), AT_FDCWD,
6042 uap->path, uap->flags, 0, UIO_USERSPACE);
6043 }
6044
6045 int
6046 faccessat(__unused proc_t p, struct faccessat_args *uap,
6047 __unused int32_t *retval)
6048 {
6049 if (uap->flag & ~(AT_EACCESS | AT_SYMLINK_NOFOLLOW)) {
6050 return EINVAL;
6051 }
6052
6053 return faccessat_internal(vfs_context_current(), uap->fd,
6054 uap->path, uap->amode, uap->flag, UIO_USERSPACE);
6055 }
6056
6057 /*
6058 * Returns: 0 Success
6059 * EFAULT
6060 * copyout:EFAULT
6061 * namei:???
6062 * vn_stat:???
6063 */
6064 static int
6065 fstatat_internal(vfs_context_t ctx, user_addr_t path, user_addr_t ub,
6066 user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64,
6067 enum uio_seg segflg, int fd, int flag)
6068 {
6069 struct nameidata nd;
6070 int follow;
6071 union {
6072 struct stat sb;
6073 struct stat64 sb64;
6074 } source = {};
6075 union {
6076 struct user64_stat user64_sb;
6077 struct user32_stat user32_sb;
6078 struct user64_stat64 user64_sb64;
6079 struct user32_stat64 user32_sb64;
6080 } dest = {};
6081 caddr_t sbp;
6082 int error, my_size;
6083 kauth_filesec_t fsec;
6084 size_t xsecurity_bufsize;
6085 void * statptr;
6086 struct fileproc *fp = NULL;
6087 int needsrealdev = 0;
6088
6089 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
6090 NDINIT(&nd, LOOKUP, OP_GETATTR, follow | AUDITVNPATH1,
6091 segflg, path, ctx);
6092
6093 #if NAMEDRSRCFORK
6094 int is_namedstream = 0;
6095 /* stat calls are allowed for resource forks. */
6096 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
6097 #endif
6098
6099 if (flag & AT_FDONLY) {
6100 vnode_t fvp;
6101
6102 error = fp_getfvp(vfs_context_proc(ctx), fd, &fp, &fvp);
6103 if (error) {
6104 return error;
6105 }
6106 if ((error = vnode_getwithref(fvp))) {
6107 file_drop(fd);
6108 return error;
6109 }
6110 nd.ni_vp = fvp;
6111 } else {
6112 error = nameiat(&nd, fd);
6113 if (error) {
6114 return error;
6115 }
6116 }
6117 fsec = KAUTH_FILESEC_NONE;
6118
6119 statptr = (void *)&source;
6120
6121 #if NAMEDRSRCFORK
6122 /* Grab reference on the shadow stream file vnode to
6123 * force an inactive on release which will mark it
6124 * for recycle.
6125 */
6126 if (vnode_isnamedstream(nd.ni_vp) &&
6127 (nd.ni_vp->v_parent != NULLVP) &&
6128 vnode_isshadow(nd.ni_vp)) {
6129 is_namedstream = 1;
6130 vnode_ref(nd.ni_vp);
6131 }
6132 #endif
6133
6134 needsrealdev = flag & AT_REALDEV ? 1 : 0;
6135 if (fp && (xsecurity == USER_ADDR_NULL)) {
6136 /*
6137 * If the caller has the file open, and is not
6138 * requesting extended security information, we are
6139 * going to let them get the basic stat information.
6140 */
6141 error = vn_stat_noauth(nd.ni_vp, statptr, NULL, isstat64, needsrealdev, ctx,
6142 fp->f_fglob->fg_cred);
6143 } else {
6144 error = vn_stat(nd.ni_vp, statptr, (xsecurity != USER_ADDR_NULL ? &fsec : NULL),
6145 isstat64, needsrealdev, ctx);
6146 }
6147
6148 #if NAMEDRSRCFORK
6149 if (is_namedstream) {
6150 vnode_rele(nd.ni_vp);
6151 }
6152 #endif
6153 vnode_put(nd.ni_vp);
6154 nameidone(&nd);
6155 if (fp) {
6156 file_drop(fd);
6157 fp = NULL;
6158 }
6159
6160 if (error) {
6161 return error;
6162 }
6163 /* Zap spare fields */
6164 if (isstat64 != 0) {
6165 source.sb64.st_lspare = 0;
6166 source.sb64.st_qspare[0] = 0LL;
6167 source.sb64.st_qspare[1] = 0LL;
6168 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
6169 munge_user64_stat64(&source.sb64, &dest.user64_sb64);
6170 my_size = sizeof(dest.user64_sb64);
6171 sbp = (caddr_t)&dest.user64_sb64;
6172 } else {
6173 munge_user32_stat64(&source.sb64, &dest.user32_sb64);
6174 my_size = sizeof(dest.user32_sb64);
6175 sbp = (caddr_t)&dest.user32_sb64;
6176 }
6177 /*
6178 * Check if we raced (post lookup) against the last unlink of a file.
6179 */
6180 if ((source.sb64.st_nlink == 0) && S_ISREG(source.sb64.st_mode)) {
6181 source.sb64.st_nlink = 1;
6182 }
6183 } else {
6184 source.sb.st_lspare = 0;
6185 source.sb.st_qspare[0] = 0LL;
6186 source.sb.st_qspare[1] = 0LL;
6187 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
6188 munge_user64_stat(&source.sb, &dest.user64_sb);
6189 my_size = sizeof(dest.user64_sb);
6190 sbp = (caddr_t)&dest.user64_sb;
6191 } else {
6192 munge_user32_stat(&source.sb, &dest.user32_sb);
6193 my_size = sizeof(dest.user32_sb);
6194 sbp = (caddr_t)&dest.user32_sb;
6195 }
6196
6197 /*
6198 * Check if we raced (post lookup) against the last unlink of a file.
6199 */
6200 if ((source.sb.st_nlink == 0) && S_ISREG(source.sb.st_mode)) {
6201 source.sb.st_nlink = 1;
6202 }
6203 }
6204 if ((error = copyout(sbp, ub, my_size)) != 0) {
6205 goto out;
6206 }
6207
6208 /* caller wants extended security information? */
6209 if (xsecurity != USER_ADDR_NULL) {
6210 /* did we get any? */
6211 if (fsec == KAUTH_FILESEC_NONE) {
6212 if (susize(xsecurity_size, 0) != 0) {
6213 error = EFAULT;
6214 goto out;
6215 }
6216 } else {
6217 /* find the user buffer size */
6218 xsecurity_bufsize = fusize(xsecurity_size);
6219
6220 /* copy out the actual data size */
6221 if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
6222 error = EFAULT;
6223 goto out;
6224 }
6225
6226 /* if the caller supplied enough room, copy out to it */
6227 if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec)) {
6228 error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
6229 }
6230 }
6231 }
6232 out:
6233 if (fsec != KAUTH_FILESEC_NONE) {
6234 kauth_filesec_free(fsec);
6235 }
6236 return error;
6237 }
6238
6239 /*
6240 * stat_extended: Get file status; with extended security (ACL).
6241 *
6242 * Parameters: p (ignored)
6243 * uap User argument descriptor (see below)
6244 * retval (ignored)
6245 *
6246 * Indirect: uap->path Path of file to get status from
6247 * uap->ub User buffer (holds file status info)
6248 * uap->xsecurity ACL to get (extended security)
6249 * uap->xsecurity_size Size of ACL
6250 *
6251 * Returns: 0 Success
6252 * !0 errno value
6253 *
6254 */
6255 int
6256 stat_extended(__unused proc_t p, struct stat_extended_args *uap,
6257 __unused int32_t *retval)
6258 {
6259 return fstatat_internal(vfs_context_current(), uap->path, uap->ub,
6260 uap->xsecurity, uap->xsecurity_size, 0, UIO_USERSPACE, AT_FDCWD,
6261 0);
6262 }
6263
6264 /*
6265 * Returns: 0 Success
6266 * fstatat_internal:??? [see fstatat_internal() in this file]
6267 */
6268 int
6269 stat(__unused proc_t p, struct stat_args *uap, __unused int32_t *retval)
6270 {
6271 return fstatat_internal(vfs_context_current(), uap->path, uap->ub,
6272 0, 0, 0, UIO_USERSPACE, AT_FDCWD, 0);
6273 }
6274
6275 int
6276 stat64(__unused proc_t p, struct stat64_args *uap, __unused int32_t *retval)
6277 {
6278 return fstatat_internal(vfs_context_current(), uap->path, uap->ub,
6279 0, 0, 1, UIO_USERSPACE, AT_FDCWD, 0);
6280 }
6281
6282 /*
6283 * stat64_extended: Get file status; can handle large inode numbers; with extended security (ACL).
6284 *
6285 * Parameters: p (ignored)
6286 * uap User argument descriptor (see below)
6287 * retval (ignored)
6288 *
6289 * Indirect: uap->path Path of file to get status from
6290 * uap->ub User buffer (holds file status info)
6291 * uap->xsecurity ACL to get (extended security)
6292 * uap->xsecurity_size Size of ACL
6293 *
6294 * Returns: 0 Success
6295 * !0 errno value
6296 *
6297 */
6298 int
6299 stat64_extended(__unused proc_t p, struct stat64_extended_args *uap, __unused int32_t *retval)
6300 {
6301 return fstatat_internal(vfs_context_current(), uap->path, uap->ub,
6302 uap->xsecurity, uap->xsecurity_size, 1, UIO_USERSPACE, AT_FDCWD,
6303 0);
6304 }
6305
6306 /*
6307 * lstat_extended: Get file status; does not follow links; with extended security (ACL).
6308 *
6309 * Parameters: p (ignored)
6310 * uap User argument descriptor (see below)
6311 * retval (ignored)
6312 *
6313 * Indirect: uap->path Path of file to get status from
6314 * uap->ub User buffer (holds file status info)
6315 * uap->xsecurity ACL to get (extended security)
6316 * uap->xsecurity_size Size of ACL
6317 *
6318 * Returns: 0 Success
6319 * !0 errno value
6320 *
6321 */
6322 int
6323 lstat_extended(__unused proc_t p, struct lstat_extended_args *uap, __unused int32_t *retval)
6324 {
6325 return fstatat_internal(vfs_context_current(), uap->path, uap->ub,
6326 uap->xsecurity, uap->xsecurity_size, 0, UIO_USERSPACE, AT_FDCWD,
6327 AT_SYMLINK_NOFOLLOW);
6328 }
6329
6330 /*
6331 * Get file status; this version does not follow links.
6332 */
6333 int
6334 lstat(__unused proc_t p, struct lstat_args *uap, __unused int32_t *retval)
6335 {
6336 return fstatat_internal(vfs_context_current(), uap->path, uap->ub,
6337 0, 0, 0, UIO_USERSPACE, AT_FDCWD, AT_SYMLINK_NOFOLLOW);
6338 }
6339
6340 int
6341 lstat64(__unused proc_t p, struct lstat64_args *uap, __unused int32_t *retval)
6342 {
6343 return fstatat_internal(vfs_context_current(), uap->path, uap->ub,
6344 0, 0, 1, UIO_USERSPACE, AT_FDCWD, AT_SYMLINK_NOFOLLOW);
6345 }
6346
6347 /*
6348 * lstat64_extended: Get file status; can handle large inode numbers; does not
6349 * follow links; with extended security (ACL).
6350 *
6351 * Parameters: p (ignored)
6352 * uap User argument descriptor (see below)
6353 * retval (ignored)
6354 *
6355 * Indirect: uap->path Path of file to get status from
6356 * uap->ub User buffer (holds file status info)
6357 * uap->xsecurity ACL to get (extended security)
6358 * uap->xsecurity_size Size of ACL
6359 *
6360 * Returns: 0 Success
6361 * !0 errno value
6362 *
6363 */
6364 int
6365 lstat64_extended(__unused proc_t p, struct lstat64_extended_args *uap, __unused int32_t *retval)
6366 {
6367 return fstatat_internal(vfs_context_current(), uap->path, uap->ub,
6368 uap->xsecurity, uap->xsecurity_size, 1, UIO_USERSPACE, AT_FDCWD,
6369 AT_SYMLINK_NOFOLLOW);
6370 }
6371
6372 int
6373 fstatat(__unused proc_t p, struct fstatat_args *uap, __unused int32_t *retval)
6374 {
6375 if (uap->flag & ~(AT_SYMLINK_NOFOLLOW | AT_REALDEV | AT_FDONLY)) {
6376 return EINVAL;
6377 }
6378
6379 return fstatat_internal(vfs_context_current(), uap->path, uap->ub,
6380 0, 0, 0, UIO_USERSPACE, uap->fd, uap->flag);
6381 }
6382
6383 int
6384 fstatat64(__unused proc_t p, struct fstatat64_args *uap,
6385 __unused int32_t *retval)
6386 {
6387 if (uap->flag & ~(AT_SYMLINK_NOFOLLOW | AT_REALDEV | AT_FDONLY)) {
6388 return EINVAL;
6389 }
6390
6391 return fstatat_internal(vfs_context_current(), uap->path, uap->ub,
6392 0, 0, 1, UIO_USERSPACE, uap->fd, uap->flag);
6393 }
6394
6395 /*
6396 * Get configurable pathname variables.
6397 *
6398 * Returns: 0 Success
6399 * namei:???
6400 * vn_pathconf:???
6401 *
6402 * Notes: Global implementation constants are intended to be
6403 * implemented in this function directly; all other constants
6404 * are per-FS implementation, and therefore must be handled in
6405 * each respective FS, instead.
6406 *
6407 * XXX We implement some things globally right now that should actually be
6408 * XXX per-FS; we will need to deal with this at some point.
6409 */
6410 /* ARGSUSED */
6411 int
6412 pathconf(__unused proc_t p, struct pathconf_args *uap, int32_t *retval)
6413 {
6414 int error;
6415 struct nameidata nd;
6416 vfs_context_t ctx = vfs_context_current();
6417
6418 NDINIT(&nd, LOOKUP, OP_PATHCONF, FOLLOW | AUDITVNPATH1,
6419 UIO_USERSPACE, uap->path, ctx);
6420 error = namei(&nd);
6421 if (error) {
6422 return error;
6423 }
6424
6425 error = vn_pathconf(nd.ni_vp, uap->name, retval, ctx);
6426
6427 vnode_put(nd.ni_vp);
6428 nameidone(&nd);
6429 return error;
6430 }
6431
6432 /*
6433 * Return target name of a symbolic link.
6434 */
6435 /* ARGSUSED */
6436 static int
6437 readlinkat_internal(vfs_context_t ctx, int fd, user_addr_t path,
6438 enum uio_seg seg, user_addr_t buf, size_t bufsize, enum uio_seg bufseg,
6439 int *retval)
6440 {
6441 vnode_t vp;
6442 uio_t auio;
6443 int error;
6444 struct nameidata nd;
6445 char uio_buf[UIO_SIZEOF(1)];
6446
6447 NDINIT(&nd, LOOKUP, OP_READLINK, NOFOLLOW | AUDITVNPATH1,
6448 seg, path, ctx);
6449
6450 error = nameiat(&nd, fd);
6451 if (error) {
6452 return error;
6453 }
6454 vp = nd.ni_vp;
6455
6456 nameidone(&nd);
6457
6458 auio = uio_createwithbuffer(1, 0, bufseg, UIO_READ,
6459 &uio_buf[0], sizeof(uio_buf));
6460 uio_addiov(auio, buf, bufsize);
6461 if (vp->v_type != VLNK) {
6462 error = EINVAL;
6463 } else {
6464 #if CONFIG_MACF
6465 error = mac_vnode_check_readlink(ctx, vp);
6466 #endif
6467 if (error == 0) {
6468 error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA,
6469 ctx);
6470 }
6471 if (error == 0) {
6472 error = VNOP_READLINK(vp, auio, ctx);
6473 }
6474 }
6475 vnode_put(vp);
6476
6477 *retval = bufsize - (int)uio_resid(auio);
6478 return error;
6479 }
6480
6481 int
6482 readlink(proc_t p, struct readlink_args *uap, int32_t *retval)
6483 {
6484 enum uio_seg procseg;
6485
6486 procseg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
6487 return readlinkat_internal(vfs_context_current(), AT_FDCWD,
6488 CAST_USER_ADDR_T(uap->path), procseg, CAST_USER_ADDR_T(uap->buf),
6489 uap->count, procseg, retval);
6490 }
6491
6492 int
6493 readlinkat(proc_t p, struct readlinkat_args *uap, int32_t *retval)
6494 {
6495 enum uio_seg procseg;
6496
6497 procseg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
6498 return readlinkat_internal(vfs_context_current(), uap->fd, uap->path,
6499 procseg, uap->buf, uap->bufsize, procseg, retval);
6500 }
6501
6502 /*
6503 * Change file flags, the deep inner layer.
6504 */
6505 static int
6506 chflags0(vnode_t vp, struct vnode_attr *va,
6507 int (*setattr)(vnode_t, void *, vfs_context_t),
6508 void *arg, vfs_context_t ctx)
6509 {
6510 kauth_action_t action = 0;
6511 int error;
6512
6513 #if CONFIG_MACF
6514 error = mac_vnode_check_setflags(ctx, vp, va->va_flags);
6515 if (error) {
6516 goto out;
6517 }
6518 #endif
6519
6520 /* request authorisation, disregard immutability */
6521 if ((error = vnode_authattr(vp, va, &action, ctx)) != 0) {
6522 goto out;
6523 }
6524 /*
6525 * Request that the auth layer disregard those file flags it's allowed to when
6526 * authorizing this operation; we need to do this in order to be able to
6527 * clear immutable flags.
6528 */
6529 if (action && ((error = vnode_authorize(vp, NULL, action | KAUTH_VNODE_NOIMMUTABLE, ctx)) != 0)) {
6530 goto out;
6531 }
6532 error = (*setattr)(vp, arg, ctx);
6533
6534 #if CONFIG_MACF
6535 if (error == 0) {
6536 mac_vnode_notify_setflags(ctx, vp, va->va_flags);
6537 }
6538 #endif
6539
6540 out:
6541 return error;
6542 }
6543
6544 /*
6545 * Change file flags.
6546 *
6547 * NOTE: this will vnode_put() `vp'
6548 */
6549 static int
6550 chflags1(vnode_t vp, int flags, vfs_context_t ctx)
6551 {
6552 struct vnode_attr va;
6553 int error;
6554
6555 VATTR_INIT(&va);
6556 VATTR_SET(&va, va_flags, flags);
6557
6558 error = chflags0(vp, &va, (void *)vnode_setattr, &va, ctx);
6559 vnode_put(vp);
6560
6561 if ((error == 0) && !VATTR_IS_SUPPORTED(&va, va_flags)) {
6562 error = ENOTSUP;
6563 }
6564
6565 return error;
6566 }
6567
6568 /*
6569 * Change flags of a file given a path name.
6570 */
6571 /* ARGSUSED */
6572 int
6573 chflags(__unused proc_t p, struct chflags_args *uap, __unused int32_t *retval)
6574 {
6575 vnode_t vp;
6576 vfs_context_t ctx = vfs_context_current();
6577 int error;
6578 struct nameidata nd;
6579
6580 AUDIT_ARG(fflags, uap->flags);
6581 NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
6582 UIO_USERSPACE, uap->path, ctx);
6583 error = namei(&nd);
6584 if (error) {
6585 return error;
6586 }
6587 vp = nd.ni_vp;
6588 nameidone(&nd);
6589
6590 /* we don't vnode_put() here because chflags1 does internally */
6591 error = chflags1(vp, uap->flags, ctx);
6592
6593 return error;
6594 }
6595
6596 /*
6597 * Change flags of a file given a file descriptor.
6598 */
6599 /* ARGSUSED */
6600 int
6601 fchflags(__unused proc_t p, struct fchflags_args *uap, __unused int32_t *retval)
6602 {
6603 vnode_t vp;
6604 int error;
6605
6606 AUDIT_ARG(fd, uap->fd);
6607 AUDIT_ARG(fflags, uap->flags);
6608 if ((error = file_vnode(uap->fd, &vp))) {
6609 return error;
6610 }
6611
6612 if ((error = vnode_getwithref(vp))) {
6613 file_drop(uap->fd);
6614 return error;
6615 }
6616
6617 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6618
6619 /* we don't vnode_put() here because chflags1 does internally */
6620 error = chflags1(vp, uap->flags, vfs_context_current());
6621
6622 file_drop(uap->fd);
6623 return error;
6624 }
6625
6626 /*
6627 * Change security information on a filesystem object.
6628 *
6629 * Returns: 0 Success
6630 * EPERM Operation not permitted
6631 * vnode_authattr:??? [anything vnode_authattr can return]
6632 * vnode_authorize:??? [anything vnode_authorize can return]
6633 * vnode_setattr:??? [anything vnode_setattr can return]
6634 *
6635 * Notes: If vnode_authattr or vnode_authorize return EACCES, it will be
6636 * translated to EPERM before being returned.
6637 */
6638 static int
6639 chmod_vnode(vfs_context_t ctx, vnode_t vp, struct vnode_attr *vap)
6640 {
6641 kauth_action_t action;
6642 int error;
6643
6644 AUDIT_ARG(mode, vap->va_mode);
6645 /* XXX audit new args */
6646
6647 #if NAMEDSTREAMS
6648 /* chmod calls are not allowed for resource forks. */
6649 if (vp->v_flag & VISNAMEDSTREAM) {
6650 return EPERM;
6651 }
6652 #endif
6653
6654 #if CONFIG_MACF
6655 if (VATTR_IS_ACTIVE(vap, va_mode) &&
6656 (error = mac_vnode_check_setmode(ctx, vp, (mode_t)vap->va_mode)) != 0) {
6657 return error;
6658 }
6659
6660 if (VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid)) {
6661 if ((error = mac_vnode_check_setowner(ctx, vp,
6662 VATTR_IS_ACTIVE(vap, va_uid) ? vap->va_uid : -1,
6663 VATTR_IS_ACTIVE(vap, va_gid) ? vap->va_gid : -1))) {
6664 return error;
6665 }
6666 }
6667
6668 if (VATTR_IS_ACTIVE(vap, va_acl) &&
6669 (error = mac_vnode_check_setacl(ctx, vp, vap->va_acl))) {
6670 return error;
6671 }
6672 #endif
6673
6674 /* make sure that the caller is allowed to set this security information */
6675 if (((error = vnode_authattr(vp, vap, &action, ctx)) != 0) ||
6676 ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
6677 if (error == EACCES) {
6678 error = EPERM;
6679 }
6680 return error;
6681 }
6682
6683 if ((error = vnode_setattr(vp, vap, ctx)) != 0) {
6684 return error;
6685 }
6686
6687 #if CONFIG_MACF
6688 if (VATTR_IS_ACTIVE(vap, va_mode)) {
6689 mac_vnode_notify_setmode(ctx, vp, (mode_t)vap->va_mode);
6690 }
6691
6692 if (VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid)) {
6693 mac_vnode_notify_setowner(ctx, vp,
6694 VATTR_IS_ACTIVE(vap, va_uid) ? vap->va_uid : -1,
6695 VATTR_IS_ACTIVE(vap, va_gid) ? vap->va_gid : -1);
6696 }
6697
6698 if (VATTR_IS_ACTIVE(vap, va_acl)) {
6699 mac_vnode_notify_setacl(ctx, vp, vap->va_acl);
6700 }
6701 #endif
6702
6703 return error;
6704 }
6705
6706
6707 /*
6708 * Change mode of a file given a path name.
6709 *
6710 * Returns: 0 Success
6711 * namei:??? [anything namei can return]
6712 * chmod_vnode:??? [anything chmod_vnode can return]
6713 */
6714 static int
6715 chmodat(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap,
6716 int fd, int flag, enum uio_seg segflg)
6717 {
6718 struct nameidata nd;
6719 int follow, error;
6720
6721 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
6722 NDINIT(&nd, LOOKUP, OP_SETATTR, follow | AUDITVNPATH1,
6723 segflg, path, ctx);
6724 if ((error = nameiat(&nd, fd))) {
6725 return error;
6726 }
6727 error = chmod_vnode(ctx, nd.ni_vp, vap);
6728 vnode_put(nd.ni_vp);
6729 nameidone(&nd);
6730 return error;
6731 }
6732
6733 /*
6734 * chmod_extended: Change the mode of a file given a path name; with extended
6735 * argument list (including extended security (ACL)).
6736 *
6737 * Parameters: p Process requesting the open
6738 * uap User argument descriptor (see below)
6739 * retval (ignored)
6740 *
6741 * Indirect: uap->path Path to object (same as 'chmod')
6742 * uap->uid UID to set
6743 * uap->gid GID to set
6744 * uap->mode File mode to set (same as 'chmod')
6745 * uap->xsecurity ACL to set (or delete)
6746 *
6747 * Returns: 0 Success
6748 * !0 errno value
6749 *
6750 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
6751 *
6752 * XXX: We should enummerate the possible errno values here, and where
6753 * in the code they originated.
6754 */
6755 int
6756 chmod_extended(__unused proc_t p, struct chmod_extended_args *uap, __unused int32_t *retval)
6757 {
6758 int error;
6759 struct vnode_attr va;
6760 kauth_filesec_t xsecdst;
6761
6762 AUDIT_ARG(owner, uap->uid, uap->gid);
6763
6764 VATTR_INIT(&va);
6765 if (uap->mode != -1) {
6766 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
6767 }
6768 if (uap->uid != KAUTH_UID_NONE) {
6769 VATTR_SET(&va, va_uid, uap->uid);
6770 }
6771 if (uap->gid != KAUTH_GID_NONE) {
6772 VATTR_SET(&va, va_gid, uap->gid);
6773 }
6774
6775 xsecdst = NULL;
6776 switch (uap->xsecurity) {
6777 /* explicit remove request */
6778 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
6779 VATTR_SET(&va, va_acl, NULL);
6780 break;
6781 /* not being set */
6782 case USER_ADDR_NULL:
6783 break;
6784 default:
6785 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0) {
6786 return error;
6787 }
6788 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
6789 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va.va_acl->acl_entrycount);
6790 }
6791
6792 error = chmodat(vfs_context_current(), uap->path, &va, AT_FDCWD, 0,
6793 UIO_USERSPACE);
6794
6795 if (xsecdst != NULL) {
6796 kauth_filesec_free(xsecdst);
6797 }
6798 return error;
6799 }
6800
6801 /*
6802 * Returns: 0 Success
6803 * chmodat:??? [anything chmodat can return]
6804 */
6805 static int
6806 fchmodat_internal(vfs_context_t ctx, user_addr_t path, int mode, int fd,
6807 int flag, enum uio_seg segflg)
6808 {
6809 struct vnode_attr va;
6810
6811 VATTR_INIT(&va);
6812 VATTR_SET(&va, va_mode, mode & ALLPERMS);
6813
6814 return chmodat(ctx, path, &va, fd, flag, segflg);
6815 }
6816
6817 int
6818 chmod(__unused proc_t p, struct chmod_args *uap, __unused int32_t *retval)
6819 {
6820 return fchmodat_internal(vfs_context_current(), uap->path, uap->mode,
6821 AT_FDCWD, 0, UIO_USERSPACE);
6822 }
6823
6824 int
6825 fchmodat(__unused proc_t p, struct fchmodat_args *uap, __unused int32_t *retval)
6826 {
6827 if (uap->flag & ~AT_SYMLINK_NOFOLLOW) {
6828 return EINVAL;
6829 }
6830
6831 return fchmodat_internal(vfs_context_current(), uap->path, uap->mode,
6832 uap->fd, uap->flag, UIO_USERSPACE);
6833 }
6834
6835 /*
6836 * Change mode of a file given a file descriptor.
6837 */
6838 static int
6839 fchmod1(__unused proc_t p, int fd, struct vnode_attr *vap)
6840 {
6841 vnode_t vp;
6842 int error;
6843
6844 AUDIT_ARG(fd, fd);
6845
6846 if ((error = file_vnode(fd, &vp)) != 0) {
6847 return error;
6848 }
6849 if ((error = vnode_getwithref(vp)) != 0) {
6850 file_drop(fd);
6851 return error;
6852 }
6853 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6854
6855 error = chmod_vnode(vfs_context_current(), vp, vap);
6856 (void)vnode_put(vp);
6857 file_drop(fd);
6858
6859 return error;
6860 }
6861
6862 /*
6863 * fchmod_extended: Change mode of a file given a file descriptor; with
6864 * extended argument list (including extended security (ACL)).
6865 *
6866 * Parameters: p Process requesting to change file mode
6867 * uap User argument descriptor (see below)
6868 * retval (ignored)
6869 *
6870 * Indirect: uap->mode File mode to set (same as 'chmod')
6871 * uap->uid UID to set
6872 * uap->gid GID to set
6873 * uap->xsecurity ACL to set (or delete)
6874 * uap->fd File descriptor of file to change mode
6875 *
6876 * Returns: 0 Success
6877 * !0 errno value
6878 *
6879 */
6880 int
6881 fchmod_extended(proc_t p, struct fchmod_extended_args *uap, __unused int32_t *retval)
6882 {
6883 int error;
6884 struct vnode_attr va;
6885 kauth_filesec_t xsecdst;
6886
6887 AUDIT_ARG(owner, uap->uid, uap->gid);
6888
6889 VATTR_INIT(&va);
6890 if (uap->mode != -1) {
6891 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
6892 }
6893 if (uap->uid != KAUTH_UID_NONE) {
6894 VATTR_SET(&va, va_uid, uap->uid);
6895 }
6896 if (uap->gid != KAUTH_GID_NONE) {
6897 VATTR_SET(&va, va_gid, uap->gid);
6898 }
6899
6900 xsecdst = NULL;
6901 switch (uap->xsecurity) {
6902 case USER_ADDR_NULL:
6903 VATTR_SET(&va, va_acl, NULL);
6904 break;
6905 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
6906 VATTR_SET(&va, va_acl, NULL);
6907 break;
6908 /* not being set */
6909 case CAST_USER_ADDR_T(-1):
6910 break;
6911 default:
6912 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0) {
6913 return error;
6914 }
6915 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
6916 }
6917
6918 error = fchmod1(p, uap->fd, &va);
6919
6920
6921 switch (uap->xsecurity) {
6922 case USER_ADDR_NULL:
6923 case CAST_USER_ADDR_T(-1):
6924 break;
6925 default:
6926 if (xsecdst != NULL) {
6927 kauth_filesec_free(xsecdst);
6928 }
6929 }
6930 return error;
6931 }
6932
6933 int
6934 fchmod(proc_t p, struct fchmod_args *uap, __unused int32_t *retval)
6935 {
6936 struct vnode_attr va;
6937
6938 VATTR_INIT(&va);
6939 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
6940
6941 return fchmod1(p, uap->fd, &va);
6942 }
6943
6944
6945 /*
6946 * Set ownership given a path name.
6947 */
6948 /* ARGSUSED */
6949 static int
6950 fchownat_internal(vfs_context_t ctx, int fd, user_addr_t path, uid_t uid,
6951 gid_t gid, int flag, enum uio_seg segflg)
6952 {
6953 vnode_t vp;
6954 struct vnode_attr va;
6955 int error;
6956 struct nameidata nd;
6957 int follow;
6958 kauth_action_t action;
6959
6960 AUDIT_ARG(owner, uid, gid);
6961
6962 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
6963 NDINIT(&nd, LOOKUP, OP_SETATTR, follow | AUDITVNPATH1, segflg,
6964 path, ctx);
6965 error = nameiat(&nd, fd);
6966 if (error) {
6967 return error;
6968 }
6969 vp = nd.ni_vp;
6970
6971 nameidone(&nd);
6972
6973 VATTR_INIT(&va);
6974 if (uid != (uid_t)VNOVAL) {
6975 VATTR_SET(&va, va_uid, uid);
6976 }
6977 if (gid != (gid_t)VNOVAL) {
6978 VATTR_SET(&va, va_gid, gid);
6979 }
6980
6981 #if CONFIG_MACF
6982 error = mac_vnode_check_setowner(ctx, vp, uid, gid);
6983 if (error) {
6984 goto out;
6985 }
6986 #endif
6987
6988 /* preflight and authorize attribute changes */
6989 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
6990 goto out;
6991 }
6992 if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
6993 goto out;
6994 }
6995 error = vnode_setattr(vp, &va, ctx);
6996
6997 #if CONFIG_MACF
6998 if (error == 0) {
6999 mac_vnode_notify_setowner(ctx, vp, uid, gid);
7000 }
7001 #endif
7002
7003 out:
7004 /*
7005 * EACCES is only allowed from namei(); permissions failure should
7006 * return EPERM, so we need to translate the error code.
7007 */
7008 if (error == EACCES) {
7009 error = EPERM;
7010 }
7011
7012 vnode_put(vp);
7013 return error;
7014 }
7015
7016 int
7017 chown(__unused proc_t p, struct chown_args *uap, __unused int32_t *retval)
7018 {
7019 return fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path,
7020 uap->uid, uap->gid, 0, UIO_USERSPACE);
7021 }
7022
7023 int
7024 lchown(__unused proc_t p, struct lchown_args *uap, __unused int32_t *retval)
7025 {
7026 return fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path,
7027 uap->owner, uap->group, AT_SYMLINK_NOFOLLOW, UIO_USERSPACE);
7028 }
7029
7030 int
7031 fchownat(__unused proc_t p, struct fchownat_args *uap, __unused int32_t *retval)
7032 {
7033 if (uap->flag & ~AT_SYMLINK_NOFOLLOW) {
7034 return EINVAL;
7035 }
7036
7037 return fchownat_internal(vfs_context_current(), uap->fd, uap->path,
7038 uap->uid, uap->gid, uap->flag, UIO_USERSPACE);
7039 }
7040
7041 /*
7042 * Set ownership given a file descriptor.
7043 */
7044 /* ARGSUSED */
7045 int
7046 fchown(__unused proc_t p, struct fchown_args *uap, __unused int32_t *retval)
7047 {
7048 struct vnode_attr va;
7049 vfs_context_t ctx = vfs_context_current();
7050 vnode_t vp;
7051 int error;
7052 kauth_action_t action;
7053
7054 AUDIT_ARG(owner, uap->uid, uap->gid);
7055 AUDIT_ARG(fd, uap->fd);
7056
7057 if ((error = file_vnode(uap->fd, &vp))) {
7058 return error;
7059 }
7060
7061 if ((error = vnode_getwithref(vp))) {
7062 file_drop(uap->fd);
7063 return error;
7064 }
7065 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
7066
7067 VATTR_INIT(&va);
7068 if (uap->uid != VNOVAL) {
7069 VATTR_SET(&va, va_uid, uap->uid);
7070 }
7071 if (uap->gid != VNOVAL) {
7072 VATTR_SET(&va, va_gid, uap->gid);
7073 }
7074
7075 #if NAMEDSTREAMS
7076 /* chown calls are not allowed for resource forks. */
7077 if (vp->v_flag & VISNAMEDSTREAM) {
7078 error = EPERM;
7079 goto out;
7080 }
7081 #endif
7082
7083 #if CONFIG_MACF
7084 error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
7085 if (error) {
7086 goto out;
7087 }
7088 #endif
7089
7090 /* preflight and authorize attribute changes */
7091 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
7092 goto out;
7093 }
7094 if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
7095 if (error == EACCES) {
7096 error = EPERM;
7097 }
7098 goto out;
7099 }
7100 error = vnode_setattr(vp, &va, ctx);
7101
7102 #if CONFIG_MACF
7103 if (error == 0) {
7104 mac_vnode_notify_setowner(ctx, vp, uap->uid, uap->gid);
7105 }
7106 #endif
7107
7108 out:
7109 (void)vnode_put(vp);
7110 file_drop(uap->fd);
7111 return error;
7112 }
7113
7114 static int
7115 getutimes(user_addr_t usrtvp, struct timespec *tsp)
7116 {
7117 int error;
7118
7119 if (usrtvp == USER_ADDR_NULL) {
7120 struct timeval old_tv;
7121 /* XXX Y2038 bug because of microtime argument */
7122 microtime(&old_tv);
7123 TIMEVAL_TO_TIMESPEC(&old_tv, &tsp[0]);
7124 tsp[1] = tsp[0];
7125 } else {
7126 if (IS_64BIT_PROCESS(current_proc())) {
7127 struct user64_timeval tv[2];
7128 error = copyin(usrtvp, (void *)tv, sizeof(tv));
7129 if (error) {
7130 return error;
7131 }
7132 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
7133 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
7134 } else {
7135 struct user32_timeval tv[2];
7136 error = copyin(usrtvp, (void *)tv, sizeof(tv));
7137 if (error) {
7138 return error;
7139 }
7140 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
7141 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
7142 }
7143 }
7144 return 0;
7145 }
7146
7147 static int
7148 setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts,
7149 int nullflag)
7150 {
7151 int error;
7152 struct vnode_attr va;
7153 kauth_action_t action;
7154
7155 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
7156
7157 VATTR_INIT(&va);
7158 VATTR_SET(&va, va_access_time, ts[0]);
7159 VATTR_SET(&va, va_modify_time, ts[1]);
7160 if (nullflag) {
7161 va.va_vaflags |= VA_UTIMES_NULL;
7162 }
7163
7164 #if NAMEDSTREAMS
7165 /* utimes calls are not allowed for resource forks. */
7166 if (vp->v_flag & VISNAMEDSTREAM) {
7167 error = EPERM;
7168 goto out;
7169 }
7170 #endif
7171
7172 #if CONFIG_MACF
7173 error = mac_vnode_check_setutimes(ctx, vp, ts[0], ts[1]);
7174 if (error) {
7175 goto out;
7176 }
7177 #endif
7178 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
7179 if (!nullflag && error == EACCES) {
7180 error = EPERM;
7181 }
7182 goto out;
7183 }
7184
7185 /* since we may not need to auth anything, check here */
7186 if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
7187 if (!nullflag && error == EACCES) {
7188 error = EPERM;
7189 }
7190 goto out;
7191 }
7192 error = vnode_setattr(vp, &va, ctx);
7193
7194 #if CONFIG_MACF
7195 if (error == 0) {
7196 mac_vnode_notify_setutimes(ctx, vp, ts[0], ts[1]);
7197 }
7198 #endif
7199
7200 out:
7201 return error;
7202 }
7203
7204 /*
7205 * Set the access and modification times of a file.
7206 */
7207 /* ARGSUSED */
7208 int
7209 utimes(__unused proc_t p, struct utimes_args *uap, __unused int32_t *retval)
7210 {
7211 struct timespec ts[2];
7212 user_addr_t usrtvp;
7213 int error;
7214 struct nameidata nd;
7215 vfs_context_t ctx = vfs_context_current();
7216
7217 /*
7218 * AUDIT: Needed to change the order of operations to do the
7219 * name lookup first because auditing wants the path.
7220 */
7221 NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
7222 UIO_USERSPACE, uap->path, ctx);
7223 error = namei(&nd);
7224 if (error) {
7225 return error;
7226 }
7227 nameidone(&nd);
7228
7229 /*
7230 * Fetch the user-supplied time. If usrtvp is USER_ADDR_NULL, we fetch
7231 * the current time instead.
7232 */
7233 usrtvp = uap->tptr;
7234 if ((error = getutimes(usrtvp, ts)) != 0) {
7235 goto out;
7236 }
7237
7238 error = setutimes(ctx, nd.ni_vp, ts, usrtvp == USER_ADDR_NULL);
7239
7240 out:
7241 vnode_put(nd.ni_vp);
7242 return error;
7243 }
7244
7245 /*
7246 * Set the access and modification times of a file.
7247 */
7248 /* ARGSUSED */
7249 int
7250 futimes(__unused proc_t p, struct futimes_args *uap, __unused int32_t *retval)
7251 {
7252 struct timespec ts[2];
7253 vnode_t vp;
7254 user_addr_t usrtvp;
7255 int error;
7256
7257 AUDIT_ARG(fd, uap->fd);
7258 usrtvp = uap->tptr;
7259 if ((error = getutimes(usrtvp, ts)) != 0) {
7260 return error;
7261 }
7262 if ((error = file_vnode(uap->fd, &vp)) != 0) {
7263 return error;
7264 }
7265 if ((error = vnode_getwithref(vp))) {
7266 file_drop(uap->fd);
7267 return error;
7268 }
7269
7270 error = setutimes(vfs_context_current(), vp, ts, usrtvp == 0);
7271 vnode_put(vp);
7272 file_drop(uap->fd);
7273 return error;
7274 }
7275
7276 /*
7277 * Truncate a file given its path name.
7278 */
7279 /* ARGSUSED */
7280 int
7281 truncate(__unused proc_t p, struct truncate_args *uap, __unused int32_t *retval)
7282 {
7283 vnode_t vp;
7284 struct vnode_attr va;
7285 vfs_context_t ctx = vfs_context_current();
7286 int error;
7287 struct nameidata nd;
7288 kauth_action_t action;
7289
7290 if (uap->length < 0) {
7291 return EINVAL;
7292 }
7293 NDINIT(&nd, LOOKUP, OP_TRUNCATE, FOLLOW | AUDITVNPATH1,
7294 UIO_USERSPACE, uap->path, ctx);
7295 if ((error = namei(&nd))) {
7296 return error;
7297 }
7298 vp = nd.ni_vp;
7299
7300 nameidone(&nd);
7301
7302 VATTR_INIT(&va);
7303 VATTR_SET(&va, va_data_size, uap->length);
7304
7305 #if CONFIG_MACF
7306 error = mac_vnode_check_truncate(ctx, NOCRED, vp);
7307 if (error) {
7308 goto out;
7309 }
7310 #endif
7311
7312 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
7313 goto out;
7314 }
7315 if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
7316 goto out;
7317 }
7318 error = vnode_setattr(vp, &va, ctx);
7319
7320 #if CONFIG_MACF
7321 if (error == 0) {
7322 mac_vnode_notify_truncate(ctx, NOCRED, vp);
7323 }
7324 #endif
7325
7326 out:
7327 vnode_put(vp);
7328 return error;
7329 }
7330
7331 /*
7332 * Truncate a file given a file descriptor.
7333 */
7334 /* ARGSUSED */
7335 int
7336 ftruncate(proc_t p, struct ftruncate_args *uap, int32_t *retval)
7337 {
7338 vfs_context_t ctx = vfs_context_current();
7339 struct vnode_attr va;
7340 vnode_t vp;
7341 struct fileproc *fp;
7342 int error;
7343 int fd = uap->fd;
7344
7345 AUDIT_ARG(fd, uap->fd);
7346 if (uap->length < 0) {
7347 return EINVAL;
7348 }
7349
7350 if ((error = fp_lookup(p, fd, &fp, 0))) {
7351 return error;
7352 }
7353
7354 switch (FILEGLOB_DTYPE(fp->f_fglob)) {
7355 case DTYPE_PSXSHM:
7356 error = pshm_truncate(p, fp, uap->fd, uap->length, retval);
7357 goto out;
7358 case DTYPE_VNODE:
7359 break;
7360 default:
7361 error = EINVAL;
7362 goto out;
7363 }
7364
7365 vp = (vnode_t)fp->f_fglob->fg_data;
7366
7367 if ((fp->f_fglob->fg_flag & FWRITE) == 0) {
7368 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
7369 error = EINVAL;
7370 goto out;
7371 }
7372
7373 if ((error = vnode_getwithref(vp)) != 0) {
7374 goto out;
7375 }
7376
7377 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
7378
7379 #if CONFIG_MACF
7380 error = mac_vnode_check_truncate(ctx,
7381 fp->f_fglob->fg_cred, vp);
7382 if (error) {
7383 (void)vnode_put(vp);
7384 goto out;
7385 }
7386 #endif
7387 VATTR_INIT(&va);
7388 VATTR_SET(&va, va_data_size, uap->length);
7389 error = vnode_setattr(vp, &va, ctx);
7390
7391 #if CONFIG_MACF
7392 if (error == 0) {
7393 mac_vnode_notify_truncate(ctx, fp->f_fglob->fg_cred, vp);
7394 }
7395 #endif
7396
7397 (void)vnode_put(vp);
7398 out:
7399 file_drop(fd);
7400 return error;
7401 }
7402
7403
7404 /*
7405 * Sync an open file with synchronized I/O _file_ integrity completion
7406 */
7407 /* ARGSUSED */
7408 int
7409 fsync(proc_t p, struct fsync_args *uap, __unused int32_t *retval)
7410 {
7411 __pthread_testcancel(1);
7412 return fsync_common(p, uap, MNT_WAIT);
7413 }
7414
7415
7416 /*
7417 * Sync an open file with synchronized I/O _file_ integrity completion
7418 *
7419 * Notes: This is a legacy support function that does not test for
7420 * thread cancellation points.
7421 */
7422 /* ARGSUSED */
7423 int
7424 fsync_nocancel(proc_t p, struct fsync_nocancel_args *uap, __unused int32_t *retval)
7425 {
7426 return fsync_common(p, (struct fsync_args *)uap, MNT_WAIT);
7427 }
7428
7429
7430 /*
7431 * Sync an open file with synchronized I/O _data_ integrity completion
7432 */
7433 /* ARGSUSED */
7434 int
7435 fdatasync(proc_t p, struct fdatasync_args *uap, __unused int32_t *retval)
7436 {
7437 __pthread_testcancel(1);
7438 return fsync_common(p, (struct fsync_args *)uap, MNT_DWAIT);
7439 }
7440
7441
7442 /*
7443 * fsync_common
7444 *
7445 * Common fsync code to support both synchronized I/O file integrity completion
7446 * (normal fsync) and synchronized I/O data integrity completion (fdatasync).
7447 *
7448 * If 'flags' is MNT_DWAIT, the caller is requesting data integrity, which
7449 * will only guarantee that the file data contents are retrievable. If
7450 * 'flags' is MNT_WAIT, the caller is rewuesting file integrity, which also
7451 * includes additional metadata unnecessary for retrieving the file data
7452 * contents, such as atime, mtime, ctime, etc., also be committed to stable
7453 * storage.
7454 *
7455 * Parameters: p The process
7456 * uap->fd The descriptor to synchronize
7457 * flags The data integrity flags
7458 *
7459 * Returns: int Success
7460 * fp_getfvp:EBADF Bad file descriptor
7461 * fp_getfvp:ENOTSUP fd does not refer to a vnode
7462 * VNOP_FSYNC:??? unspecified
7463 *
7464 * Notes: We use struct fsync_args because it is a short name, and all
7465 * caller argument structures are otherwise identical.
7466 */
7467 static int
7468 fsync_common(proc_t p, struct fsync_args *uap, int flags)
7469 {
7470 vnode_t vp;
7471 struct fileproc *fp;
7472 vfs_context_t ctx = vfs_context_current();
7473 int error;
7474
7475 AUDIT_ARG(fd, uap->fd);
7476
7477 if ((error = fp_getfvp(p, uap->fd, &fp, &vp))) {
7478 return error;
7479 }
7480 if ((error = vnode_getwithref(vp))) {
7481 file_drop(uap->fd);
7482 return error;
7483 }
7484
7485 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
7486
7487 error = VNOP_FSYNC(vp, flags, ctx);
7488
7489 #if NAMEDRSRCFORK
7490 /* Sync resource fork shadow file if necessary. */
7491 if ((error == 0) &&
7492 (vp->v_flag & VISNAMEDSTREAM) &&
7493 (vp->v_parent != NULLVP) &&
7494 vnode_isshadow(vp) &&
7495 (fp->f_flags & FP_WRITTEN)) {
7496 (void) vnode_flushnamedstream(vp->v_parent, vp, ctx);
7497 }
7498 #endif
7499
7500 (void)vnode_put(vp);
7501 file_drop(uap->fd);
7502 return error;
7503 }
7504
7505 /*
7506 * Duplicate files. Source must be a file, target must be a file or
7507 * must not exist.
7508 *
7509 * XXX Copyfile authorisation checking is woefully inadequate, and will not
7510 * perform inheritance correctly.
7511 */
7512 /* ARGSUSED */
7513 int
7514 copyfile(__unused proc_t p, struct copyfile_args *uap, __unused int32_t *retval)
7515 {
7516 vnode_t tvp, fvp, tdvp, sdvp;
7517 struct nameidata fromnd, tond;
7518 int error;
7519 vfs_context_t ctx = vfs_context_current();
7520 #if CONFIG_MACF
7521 struct filedesc *fdp = (vfs_context_proc(ctx))->p_fd;
7522 struct vnode_attr va;
7523 #endif
7524
7525 /* Check that the flags are valid. */
7526
7527 if (uap->flags & ~CPF_MASK) {
7528 return EINVAL;
7529 }
7530
7531 NDINIT(&fromnd, LOOKUP, OP_COPYFILE, AUDITVNPATH1,
7532 UIO_USERSPACE, uap->from, ctx);
7533 if ((error = namei(&fromnd))) {
7534 return error;
7535 }
7536 fvp = fromnd.ni_vp;
7537
7538 NDINIT(&tond, CREATE, OP_LINK,
7539 LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNPATH2 | CN_NBMOUNTLOOK,
7540 UIO_USERSPACE, uap->to, ctx);
7541 if ((error = namei(&tond))) {
7542 goto out1;
7543 }
7544 tdvp = tond.ni_dvp;
7545 tvp = tond.ni_vp;
7546
7547 if (tvp != NULL) {
7548 if (!(uap->flags & CPF_OVERWRITE)) {
7549 error = EEXIST;
7550 goto out;
7551 }
7552 }
7553
7554 if (fvp->v_type == VDIR || (tvp && tvp->v_type == VDIR)) {
7555 error = EISDIR;
7556 goto out;
7557 }
7558
7559 /* This calls existing MAC hooks for open */
7560 if ((error = vn_authorize_open_existing(fvp, &fromnd.ni_cnd, FREAD, ctx,
7561 NULL))) {
7562 goto out;
7563 }
7564
7565 if (tvp) {
7566 /*
7567 * See unlinkat_internal for an explanation of the potential
7568 * ENOENT from the MAC hook but the gist is that the MAC hook
7569 * can fail because vn_getpath isn't able to return the full
7570 * path. We choose to ignore this failure.
7571 */
7572 error = vn_authorize_unlink(tdvp, tvp, &tond.ni_cnd, ctx, NULL);
7573 if (error && error != ENOENT) {
7574 goto out;
7575 }
7576 error = 0;
7577 }
7578
7579 #if CONFIG_MACF
7580 VATTR_INIT(&va);
7581 VATTR_SET(&va, va_type, fvp->v_type);
7582 /* Mask off all but regular access permissions */
7583 VATTR_SET(&va, va_mode,
7584 ((((uap->mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT) & ACCESSPERMS));
7585 error = mac_vnode_check_create(ctx, tdvp, &tond.ni_cnd, &va);
7586 if (error) {
7587 goto out;
7588 }
7589 #endif /* CONFIG_MACF */
7590
7591 if ((error = vnode_authorize(tdvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0) {
7592 goto out;
7593 }
7594
7595 if (fvp == tdvp) {
7596 error = EINVAL;
7597 }
7598 /*
7599 * If source is the same as the destination (that is the
7600 * same inode number) then there is nothing to do.
7601 * (fixed to have POSIX semantics - CSM 3/2/98)
7602 */
7603 if (fvp == tvp) {
7604 error = -1;
7605 }
7606 if (!error) {
7607 error = VNOP_COPYFILE(fvp, tdvp, tvp, &tond.ni_cnd, uap->mode, uap->flags, ctx);
7608 }
7609 out:
7610 sdvp = tond.ni_startdir;
7611 /*
7612 * nameidone has to happen before we vnode_put(tdvp)
7613 * since it may need to release the fs_nodelock on the tdvp
7614 */
7615 nameidone(&tond);
7616
7617 if (tvp) {
7618 vnode_put(tvp);
7619 }
7620 vnode_put(tdvp);
7621 vnode_put(sdvp);
7622 out1:
7623 vnode_put(fvp);
7624
7625 nameidone(&fromnd);
7626
7627 if (error == -1) {
7628 return 0;
7629 }
7630 return error;
7631 }
7632
7633 #define CLONE_SNAPSHOT_FALLBACKS_ENABLED 1
7634
7635 /*
7636 * Helper function for doing clones. The caller is expected to provide an
7637 * iocounted source vnode and release it.
7638 */
7639 static int
7640 clonefile_internal(vnode_t fvp, boolean_t data_read_authorised, int dst_dirfd,
7641 user_addr_t dst, uint32_t flags, vfs_context_t ctx)
7642 {
7643 vnode_t tvp, tdvp;
7644 struct nameidata tond;
7645 int error;
7646 int follow;
7647 boolean_t free_src_acl;
7648 boolean_t attr_cleanup;
7649 enum vtype v_type;
7650 kauth_action_t action;
7651 struct componentname *cnp;
7652 uint32_t defaulted;
7653 struct vnode_attr va;
7654 struct vnode_attr nva;
7655 uint32_t vnop_flags;
7656
7657 v_type = vnode_vtype(fvp);
7658 switch (v_type) {
7659 case VLNK:
7660 /* FALLTHRU */
7661 case VREG:
7662 action = KAUTH_VNODE_ADD_FILE;
7663 break;
7664 case VDIR:
7665 if (vnode_isvroot(fvp) || vnode_ismount(fvp) ||
7666 fvp->v_mountedhere) {
7667 return EINVAL;
7668 }
7669 action = KAUTH_VNODE_ADD_SUBDIRECTORY;
7670 break;
7671 default:
7672 return EINVAL;
7673 }
7674
7675 AUDIT_ARG(fd2, dst_dirfd);
7676 AUDIT_ARG(value32, flags);
7677
7678 follow = (flags & CLONE_NOFOLLOW) ? NOFOLLOW : FOLLOW;
7679 NDINIT(&tond, CREATE, OP_LINK, follow | WANTPARENT | AUDITVNPATH2,
7680 UIO_USERSPACE, dst, ctx);
7681 if ((error = nameiat(&tond, dst_dirfd))) {
7682 return error;
7683 }
7684 cnp = &tond.ni_cnd;
7685 tdvp = tond.ni_dvp;
7686 tvp = tond.ni_vp;
7687
7688 free_src_acl = FALSE;
7689 attr_cleanup = FALSE;
7690
7691 if (tvp != NULL) {
7692 error = EEXIST;
7693 goto out;
7694 }
7695
7696 if (vnode_mount(tdvp) != vnode_mount(fvp)) {
7697 error = EXDEV;
7698 goto out;
7699 }
7700
7701 #if CONFIG_MACF
7702 if ((error = mac_vnode_check_clone(ctx, tdvp, fvp, cnp))) {
7703 goto out;
7704 }
7705 #endif
7706 if ((error = vnode_authorize(tdvp, NULL, action, ctx))) {
7707 goto out;
7708 }
7709
7710 action = KAUTH_VNODE_GENERIC_READ_BITS;
7711 if (data_read_authorised) {
7712 action &= ~KAUTH_VNODE_READ_DATA;
7713 }
7714 if ((error = vnode_authorize(fvp, NULL, action, ctx))) {
7715 goto out;
7716 }
7717
7718 /*
7719 * certain attributes may need to be changed from the source, we ask for
7720 * those here.
7721 */
7722 VATTR_INIT(&va);
7723 VATTR_WANTED(&va, va_uid);
7724 VATTR_WANTED(&va, va_gid);
7725 VATTR_WANTED(&va, va_mode);
7726 VATTR_WANTED(&va, va_flags);
7727 VATTR_WANTED(&va, va_acl);
7728
7729 if ((error = vnode_getattr(fvp, &va, ctx)) != 0) {
7730 goto out;
7731 }
7732
7733 VATTR_INIT(&nva);
7734 VATTR_SET(&nva, va_type, v_type);
7735 if (VATTR_IS_SUPPORTED(&va, va_acl) && va.va_acl != NULL) {
7736 VATTR_SET(&nva, va_acl, va.va_acl);
7737 free_src_acl = TRUE;
7738 }
7739
7740 /* Handle ACL inheritance, initialize vap. */
7741 if (v_type == VLNK) {
7742 error = vnode_authattr_new(tdvp, &nva, 0, ctx);
7743 } else {
7744 error = vn_attribute_prepare(tdvp, &nva, &defaulted, ctx);
7745 if (error) {
7746 goto out;
7747 }
7748 attr_cleanup = TRUE;
7749 }
7750
7751 vnop_flags = VNODE_CLONEFILE_DEFAULT;
7752 /*
7753 * We've got initial values for all security parameters,
7754 * If we are superuser, then we can change owners to be the
7755 * same as the source. Both superuser and the owner have default
7756 * WRITE_SECURITY privileges so all other fields can be taken
7757 * from source as well.
7758 */
7759 if (!(flags & CLONE_NOOWNERCOPY) && vfs_context_issuser(ctx)) {
7760 if (VATTR_IS_SUPPORTED(&va, va_uid)) {
7761 VATTR_SET(&nva, va_uid, va.va_uid);
7762 }
7763 if (VATTR_IS_SUPPORTED(&va, va_gid)) {
7764 VATTR_SET(&nva, va_gid, va.va_gid);
7765 }
7766 } else {
7767 vnop_flags |= VNODE_CLONEFILE_NOOWNERCOPY;
7768 }
7769
7770 if (VATTR_IS_SUPPORTED(&va, va_mode)) {
7771 VATTR_SET(&nva, va_mode, va.va_mode);
7772 }
7773 if (VATTR_IS_SUPPORTED(&va, va_flags)) {
7774 VATTR_SET(&nva, va_flags,
7775 ((va.va_flags & ~(UF_DATAVAULT | SF_RESTRICTED)) | /* Turn off from source */
7776 (nva.va_flags & (UF_DATAVAULT | SF_RESTRICTED))));
7777 }
7778
7779 error = VNOP_CLONEFILE(fvp, tdvp, &tvp, cnp, &nva, vnop_flags, ctx);
7780
7781 if (!error && tvp) {
7782 int update_flags = 0;
7783 #if CONFIG_FSE
7784 int fsevent;
7785 #endif /* CONFIG_FSE */
7786
7787 #if CONFIG_MACF
7788 (void)vnode_label(vnode_mount(tvp), tdvp, tvp, cnp,
7789 VNODE_LABEL_CREATE, ctx);
7790 #endif
7791 /*
7792 * If some of the requested attributes weren't handled by the
7793 * VNOP, use our fallback code.
7794 */
7795 if (!VATTR_ALL_SUPPORTED(&va)) {
7796 (void)vnode_setattr_fallback(tvp, &nva, ctx);
7797 }
7798
7799 // Make sure the name & parent pointers are hooked up
7800 if (tvp->v_name == NULL) {
7801 update_flags |= VNODE_UPDATE_NAME;
7802 }
7803 if (tvp->v_parent == NULLVP) {
7804 update_flags |= VNODE_UPDATE_PARENT;
7805 }
7806
7807 if (update_flags) {
7808 (void)vnode_update_identity(tvp, tdvp, cnp->cn_nameptr,
7809 cnp->cn_namelen, cnp->cn_hash, update_flags);
7810 }
7811
7812 #if CONFIG_FSE
7813 switch (vnode_vtype(tvp)) {
7814 case VLNK:
7815 /* FALLTHRU */
7816 case VREG:
7817 fsevent = FSE_CREATE_FILE;
7818 break;
7819 case VDIR:
7820 fsevent = FSE_CREATE_DIR;
7821 break;
7822 default:
7823 goto out;
7824 }
7825
7826 if (need_fsevent(fsevent, tvp)) {
7827 /*
7828 * The following is a sequence of three explicit events.
7829 * A pair of FSE_CLONE events representing the source and destination
7830 * followed by an FSE_CREATE_[FILE | DIR] for the destination.
7831 * fseventsd may coalesce the destination clone and create events
7832 * into a single event resulting in the following sequence for a client
7833 * FSE_CLONE (src)
7834 * FSE_CLONE | FSE_CREATE (dst)
7835 */
7836 add_fsevent(FSE_CLONE, ctx, FSE_ARG_VNODE, fvp, FSE_ARG_VNODE, tvp,
7837 FSE_ARG_DONE);
7838 add_fsevent(fsevent, ctx, FSE_ARG_VNODE, tvp,
7839 FSE_ARG_DONE);
7840 }
7841 #endif /* CONFIG_FSE */
7842 }
7843
7844 out:
7845 if (attr_cleanup) {
7846 vn_attribute_cleanup(&nva, defaulted);
7847 }
7848 if (free_src_acl && va.va_acl) {
7849 kauth_acl_free(va.va_acl);
7850 }
7851 nameidone(&tond);
7852 if (tvp) {
7853 vnode_put(tvp);
7854 }
7855 vnode_put(tdvp);
7856 return error;
7857 }
7858
7859 /*
7860 * clone files or directories, target must not exist.
7861 */
7862 /* ARGSUSED */
7863 int
7864 clonefileat(__unused proc_t p, struct clonefileat_args *uap,
7865 __unused int32_t *retval)
7866 {
7867 vnode_t fvp;
7868 struct nameidata fromnd;
7869 int follow;
7870 int error;
7871 vfs_context_t ctx = vfs_context_current();
7872
7873 /* Check that the flags are valid. */
7874 if (uap->flags & ~(CLONE_NOFOLLOW | CLONE_NOOWNERCOPY)) {
7875 return EINVAL;
7876 }
7877
7878 AUDIT_ARG(fd, uap->src_dirfd);
7879
7880 follow = (uap->flags & CLONE_NOFOLLOW) ? NOFOLLOW : FOLLOW;
7881 NDINIT(&fromnd, LOOKUP, OP_COPYFILE, follow | AUDITVNPATH1,
7882 UIO_USERSPACE, uap->src, ctx);
7883 if ((error = nameiat(&fromnd, uap->src_dirfd))) {
7884 return error;
7885 }
7886
7887 fvp = fromnd.ni_vp;
7888 nameidone(&fromnd);
7889
7890 error = clonefile_internal(fvp, FALSE, uap->dst_dirfd, uap->dst,
7891 uap->flags, ctx);
7892
7893 vnode_put(fvp);
7894 return error;
7895 }
7896
7897 int
7898 fclonefileat(__unused proc_t p, struct fclonefileat_args *uap,
7899 __unused int32_t *retval)
7900 {
7901 vnode_t fvp;
7902 struct fileproc *fp;
7903 int error;
7904 vfs_context_t ctx = vfs_context_current();
7905
7906 /* Check that the flags are valid. */
7907 if (uap->flags & ~(CLONE_NOFOLLOW | CLONE_NOOWNERCOPY)) {
7908 return EINVAL;
7909 }
7910
7911 AUDIT_ARG(fd, uap->src_fd);
7912 error = fp_getfvp(p, uap->src_fd, &fp, &fvp);
7913 if (error) {
7914 return error;
7915 }
7916
7917 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
7918 AUDIT_ARG(vnpath_withref, fvp, ARG_VNODE1);
7919 error = EBADF;
7920 goto out;
7921 }
7922
7923 if ((error = vnode_getwithref(fvp))) {
7924 goto out;
7925 }
7926
7927 AUDIT_ARG(vnpath, fvp, ARG_VNODE1);
7928
7929 error = clonefile_internal(fvp, TRUE, uap->dst_dirfd, uap->dst,
7930 uap->flags, ctx);
7931
7932 vnode_put(fvp);
7933 out:
7934 file_drop(uap->src_fd);
7935 return error;
7936 }
7937
7938 static int
7939 rename_submounts_callback(mount_t mp, void *arg)
7940 {
7941 int error = 0;
7942 mount_t pmp = (mount_t)arg;
7943 int prefix_len = strlen(pmp->mnt_vfsstat.f_mntonname);
7944
7945 if (strncmp(mp->mnt_vfsstat.f_mntonname, pmp->mnt_vfsstat.f_mntonname, prefix_len) != 0) {
7946 return 0;
7947 }
7948
7949 if (mp->mnt_vfsstat.f_mntonname[prefix_len] != '/') {
7950 return 0;
7951 }
7952
7953 if ((error = vfs_busy(mp, LK_NOWAIT))) {
7954 printf("vfs_busy failed with %d for %s\n", error, mp->mnt_vfsstat.f_mntonname);
7955 return -1;
7956 }
7957
7958 int pathlen = MAXPATHLEN;
7959 if ((error = vn_getpath_ext(mp->mnt_vnodecovered, NULL, mp->mnt_vfsstat.f_mntonname, &pathlen, VN_GETPATH_FSENTER))) {
7960 printf("vn_getpath_ext failed with %d for mnt_vnodecovered of %s\n", error, mp->mnt_vfsstat.f_mntonname);
7961 }
7962
7963 vfs_unbusy(mp);
7964
7965 return error;
7966 }
7967
7968 /*
7969 * Rename files. Source and destination must either both be directories,
7970 * or both not be directories. If target is a directory, it must be empty.
7971 */
7972 /* ARGSUSED */
7973 static int
7974 renameat_internal(vfs_context_t ctx, int fromfd, user_addr_t from,
7975 int tofd, user_addr_t to, int segflg, vfs_rename_flags_t flags)
7976 {
7977 if (flags & ~VFS_RENAME_FLAGS_MASK) {
7978 return EINVAL;
7979 }
7980
7981 if (ISSET(flags, VFS_RENAME_SWAP) && ISSET(flags, VFS_RENAME_EXCL)) {
7982 return EINVAL;
7983 }
7984
7985 vnode_t tvp, tdvp;
7986 vnode_t fvp, fdvp;
7987 struct nameidata *fromnd, *tond;
7988 int error;
7989 int do_retry;
7990 int retry_count;
7991 int mntrename;
7992 int need_event;
7993 int need_kpath2;
7994 int has_listeners;
7995 const char *oname = NULL;
7996 char *from_name = NULL, *to_name = NULL;
7997 char *from_name_no_firmlink = NULL, *to_name_no_firmlink = NULL;
7998 int from_len = 0, to_len = 0;
7999 int from_len_no_firmlink = 0, to_len_no_firmlink = 0;
8000 int holding_mntlock;
8001 mount_t locked_mp = NULL;
8002 vnode_t oparent = NULLVP;
8003 #if CONFIG_FSE
8004 fse_info from_finfo, to_finfo;
8005 #endif
8006 int from_truncated = 0, to_truncated = 0;
8007 int from_truncated_no_firmlink = 0, to_truncated_no_firmlink = 0;
8008 int batched = 0;
8009 struct vnode_attr *fvap, *tvap;
8010 int continuing = 0;
8011 /* carving out a chunk for structs that are too big to be on stack. */
8012 struct {
8013 struct nameidata from_node, to_node;
8014 struct vnode_attr fv_attr, tv_attr;
8015 } * __rename_data;
8016 MALLOC(__rename_data, void *, sizeof(*__rename_data), M_TEMP, M_WAITOK);
8017 fromnd = &__rename_data->from_node;
8018 tond = &__rename_data->to_node;
8019
8020 holding_mntlock = 0;
8021 do_retry = 0;
8022 retry_count = 0;
8023 retry:
8024 fvp = tvp = NULL;
8025 fdvp = tdvp = NULL;
8026 fvap = tvap = NULL;
8027 mntrename = FALSE;
8028
8029 NDINIT(fromnd, DELETE, OP_UNLINK, WANTPARENT | AUDITVNPATH1,
8030 segflg, from, ctx);
8031 fromnd->ni_flag = NAMEI_COMPOUNDRENAME;
8032
8033 NDINIT(tond, RENAME, OP_RENAME, WANTPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK,
8034 segflg, to, ctx);
8035 tond->ni_flag = NAMEI_COMPOUNDRENAME;
8036
8037 continue_lookup:
8038 if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
8039 if ((error = nameiat(fromnd, fromfd))) {
8040 goto out1;
8041 }
8042 fdvp = fromnd->ni_dvp;
8043 fvp = fromnd->ni_vp;
8044
8045 if (fvp && fvp->v_type == VDIR) {
8046 tond->ni_cnd.cn_flags |= WILLBEDIR;
8047 }
8048 }
8049
8050 if ((tond->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
8051 if ((error = nameiat(tond, tofd))) {
8052 /*
8053 * Translate error code for rename("dir1", "dir2/.").
8054 */
8055 if (error == EISDIR && fvp->v_type == VDIR) {
8056 error = EINVAL;
8057 }
8058 goto out1;
8059 }
8060 tdvp = tond->ni_dvp;
8061 tvp = tond->ni_vp;
8062 }
8063
8064 #if DEVELOPMENT || DEBUG
8065 /*
8066 * XXX VSWAP: Check for entitlements or special flag here
8067 * so we can restrict access appropriately.
8068 */
8069 #else /* DEVELOPMENT || DEBUG */
8070
8071 if (fromnd->ni_vp && vnode_isswap(fromnd->ni_vp) && (ctx != vfs_context_kernel())) {
8072 error = EPERM;
8073 goto out1;
8074 }
8075
8076 if (tond->ni_vp && vnode_isswap(tond->ni_vp) && (ctx != vfs_context_kernel())) {
8077 error = EPERM;
8078 goto out1;
8079 }
8080 #endif /* DEVELOPMENT || DEBUG */
8081
8082 if (!tvp && ISSET(flags, VFS_RENAME_SWAP)) {
8083 error = ENOENT;
8084 goto out1;
8085 }
8086
8087 if (tvp && ISSET(flags, VFS_RENAME_EXCL)) {
8088 error = EEXIST;
8089 goto out1;
8090 }
8091
8092 batched = vnode_compound_rename_available(fdvp);
8093
8094 #if CONFIG_FSE
8095 need_event = need_fsevent(FSE_RENAME, fdvp);
8096 if (need_event) {
8097 if (fvp) {
8098 get_fse_info(fvp, &from_finfo, ctx);
8099 } else {
8100 error = vfs_get_notify_attributes(&__rename_data->fv_attr);
8101 if (error) {
8102 goto out1;
8103 }
8104
8105 fvap = &__rename_data->fv_attr;
8106 }
8107
8108 if (tvp) {
8109 get_fse_info(tvp, &to_finfo, ctx);
8110 } else if (batched) {
8111 error = vfs_get_notify_attributes(&__rename_data->tv_attr);
8112 if (error) {
8113 goto out1;
8114 }
8115
8116 tvap = &__rename_data->tv_attr;
8117 }
8118 }
8119 #else
8120 need_event = 0;
8121 #endif /* CONFIG_FSE */
8122
8123 has_listeners = kauth_authorize_fileop_has_listeners();
8124
8125 need_kpath2 = 0;
8126 #if CONFIG_AUDIT
8127 if (AUDIT_RECORD_EXISTS()) {
8128 need_kpath2 = 1;
8129 }
8130 #endif
8131
8132 if (need_event || has_listeners) {
8133 if (from_name == NULL) {
8134 GET_PATH(from_name);
8135 if (from_name == NULL) {
8136 error = ENOMEM;
8137 goto out1;
8138 }
8139 }
8140
8141 from_len = safe_getpath(fdvp, fromnd->ni_cnd.cn_nameptr, from_name, MAXPATHLEN, &from_truncated);
8142
8143 if (from_name_no_firmlink == NULL) {
8144 GET_PATH(from_name_no_firmlink);
8145 if (from_name_no_firmlink == NULL) {
8146 error = ENOMEM;
8147 goto out1;
8148 }
8149 }
8150
8151 from_len_no_firmlink = safe_getpath_no_firmlink(fdvp, fromnd->ni_cnd.cn_nameptr, from_name_no_firmlink, MAXPATHLEN, &from_truncated_no_firmlink);
8152 }
8153
8154 if (need_event || need_kpath2 || has_listeners) {
8155 if (to_name == NULL) {
8156 GET_PATH(to_name);
8157 if (to_name == NULL) {
8158 error = ENOMEM;
8159 goto out1;
8160 }
8161 }
8162
8163 to_len = safe_getpath(tdvp, tond->ni_cnd.cn_nameptr, to_name, MAXPATHLEN, &to_truncated);
8164
8165 if (to_name_no_firmlink == NULL) {
8166 GET_PATH(to_name_no_firmlink);
8167 if (to_name_no_firmlink == NULL) {
8168 error = ENOMEM;
8169 goto out1;
8170 }
8171 }
8172
8173 to_len_no_firmlink = safe_getpath_no_firmlink(tdvp, tond->ni_cnd.cn_nameptr, to_name_no_firmlink, MAXPATHLEN, &to_truncated_no_firmlink);
8174 if (to_name && need_kpath2) {
8175 AUDIT_ARG(kpath, to_name, ARG_KPATH2);
8176 }
8177 }
8178 if (!fvp) {
8179 /*
8180 * Claim: this check will never reject a valid rename.
8181 * For success, either fvp must be on the same mount as tdvp, or fvp must sit atop a vnode on the same mount as tdvp.
8182 * Suppose fdvp and tdvp are not on the same mount.
8183 * If fvp is on the same mount as tdvp, then fvp is not on the same mount as fdvp, so fvp is the root of its filesystem. If fvp is the root,
8184 * then you can't move it to within another dir on the same mountpoint.
8185 * If fvp sits atop a vnode on the same mount as fdvp, then that vnode must be part of the same mount as fdvp, which is a contradiction.
8186 *
8187 * If this check passes, then we are safe to pass these vnodes to the same FS.
8188 */
8189 if (fdvp->v_mount != tdvp->v_mount) {
8190 error = EXDEV;
8191 goto out1;
8192 }
8193 goto skipped_lookup;
8194 }
8195
8196 if (!batched) {
8197 error = vn_authorize_renamex_with_paths(fdvp, fvp, &fromnd->ni_cnd, from_name, tdvp, tvp, &tond->ni_cnd, to_name, ctx, flags, NULL);
8198 if (error) {
8199 if (error == ENOENT) {
8200 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
8201 /*
8202 * We encountered a race where after doing the namei, tvp stops
8203 * being valid. If so, simply re-drive the rename call from the
8204 * top.
8205 */
8206 do_retry = 1;
8207 retry_count += 1;
8208 }
8209 }
8210 goto out1;
8211 }
8212 }
8213
8214 /*
8215 * If the source and destination are the same (i.e. they're
8216 * links to the same vnode) and the target file system is
8217 * case sensitive, then there is nothing to do.
8218 *
8219 * XXX Come back to this.
8220 */
8221 if (fvp == tvp) {
8222 int pathconf_val;
8223
8224 /*
8225 * Note: if _PC_CASE_SENSITIVE selector isn't supported,
8226 * then assume that this file system is case sensitive.
8227 */
8228 if (VNOP_PATHCONF(fvp, _PC_CASE_SENSITIVE, &pathconf_val, ctx) != 0 ||
8229 pathconf_val != 0) {
8230 goto out1;
8231 }
8232 }
8233
8234 /*
8235 * Allow the renaming of mount points.
8236 * - target must not exist
8237 * - target must reside in the same directory as source
8238 * - union mounts cannot be renamed
8239 * - "/" cannot be renamed
8240 *
8241 * XXX Handle this in VFS after a continued lookup (if we missed
8242 * in the cache to start off)
8243 *
8244 * N.B. If RENAME_SWAP is being used, then @tvp != NULL and so
8245 * we'll skip past here. The file system is responsible for
8246 * checking that @tvp is not a descendent of @fvp and vice versa
8247 * so it should always return EINVAL if either @tvp or @fvp is the
8248 * root of a volume.
8249 */
8250 if ((fvp->v_flag & VROOT) &&
8251 (fvp->v_type == VDIR) &&
8252 (tvp == NULL) &&
8253 (fvp->v_mountedhere == NULL) &&
8254 (fdvp == tdvp) &&
8255 ((fvp->v_mount->mnt_flag & (MNT_UNION | MNT_ROOTFS)) == 0) &&
8256 ((fvp->v_mount->mnt_kern_flag & MNTK_SYSTEM) == 0) &&
8257 (fvp->v_mount->mnt_vnodecovered != NULLVP)) {
8258 vnode_t coveredvp;
8259
8260 /* switch fvp to the covered vnode */
8261 coveredvp = fvp->v_mount->mnt_vnodecovered;
8262 if ((vnode_getwithref(coveredvp))) {
8263 error = ENOENT;
8264 goto out1;
8265 }
8266 vnode_put(fvp);
8267
8268 fvp = coveredvp;
8269 mntrename = TRUE;
8270 }
8271 /*
8272 * Check for cross-device rename.
8273 */
8274 if ((fvp->v_mount != tdvp->v_mount) ||
8275 (tvp && (fvp->v_mount != tvp->v_mount))) {
8276 error = EXDEV;
8277 goto out1;
8278 }
8279
8280 /*
8281 * If source is the same as the destination (that is the
8282 * same inode number) then there is nothing to do...
8283 * EXCEPT if the underlying file system supports case
8284 * insensitivity and is case preserving. In this case
8285 * the file system needs to handle the special case of
8286 * getting the same vnode as target (fvp) and source (tvp).
8287 *
8288 * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
8289 * and _PC_CASE_PRESERVING can have this exception, and they need to
8290 * handle the special case of getting the same vnode as target and
8291 * source. NOTE: Then the target is unlocked going into vnop_rename,
8292 * so not to cause locking problems. There is a single reference on tvp.
8293 *
8294 * NOTE - that fvp == tvp also occurs if they are hard linked and
8295 * that correct behaviour then is just to return success without doing
8296 * anything.
8297 *
8298 * XXX filesystem should take care of this itself, perhaps...
8299 */
8300 if (fvp == tvp && fdvp == tdvp) {
8301 if (fromnd->ni_cnd.cn_namelen == tond->ni_cnd.cn_namelen &&
8302 !bcmp(fromnd->ni_cnd.cn_nameptr, tond->ni_cnd.cn_nameptr,
8303 fromnd->ni_cnd.cn_namelen)) {
8304 goto out1;
8305 }
8306 }
8307
8308 if (holding_mntlock && fvp->v_mount != locked_mp) {
8309 /*
8310 * we're holding a reference and lock
8311 * on locked_mp, but it no longer matches
8312 * what we want to do... so drop our hold
8313 */
8314 mount_unlock_renames(locked_mp);
8315 mount_drop(locked_mp, 0);
8316 holding_mntlock = 0;
8317 }
8318 if (tdvp != fdvp && fvp->v_type == VDIR) {
8319 /*
8320 * serialize renames that re-shape
8321 * the tree... if holding_mntlock is
8322 * set, then we're ready to go...
8323 * otherwise we
8324 * first need to drop the iocounts
8325 * we picked up, second take the
8326 * lock to serialize the access,
8327 * then finally start the lookup
8328 * process over with the lock held
8329 */
8330 if (!holding_mntlock) {
8331 /*
8332 * need to grab a reference on
8333 * the mount point before we
8334 * drop all the iocounts... once
8335 * the iocounts are gone, the mount
8336 * could follow
8337 */
8338 locked_mp = fvp->v_mount;
8339 mount_ref(locked_mp, 0);
8340
8341 /*
8342 * nameidone has to happen before we vnode_put(tvp)
8343 * since it may need to release the fs_nodelock on the tvp
8344 */
8345 nameidone(tond);
8346
8347 if (tvp) {
8348 vnode_put(tvp);
8349 }
8350 vnode_put(tdvp);
8351
8352 /*
8353 * nameidone has to happen before we vnode_put(fdvp)
8354 * since it may need to release the fs_nodelock on the fvp
8355 */
8356 nameidone(fromnd);
8357
8358 vnode_put(fvp);
8359 vnode_put(fdvp);
8360
8361 mount_lock_renames(locked_mp);
8362 holding_mntlock = 1;
8363
8364 goto retry;
8365 }
8366 } else {
8367 /*
8368 * when we dropped the iocounts to take
8369 * the lock, we allowed the identity of
8370 * the various vnodes to change... if they did,
8371 * we may no longer be dealing with a rename
8372 * that reshapes the tree... once we're holding
8373 * the iocounts, the vnodes can't change type
8374 * so we're free to drop the lock at this point
8375 * and continue on
8376 */
8377 if (holding_mntlock) {
8378 mount_unlock_renames(locked_mp);
8379 mount_drop(locked_mp, 0);
8380 holding_mntlock = 0;
8381 }
8382 }
8383
8384 // save these off so we can later verify that fvp is the same
8385 oname = fvp->v_name;
8386 oparent = fvp->v_parent;
8387
8388 skipped_lookup:
8389 error = vn_rename(fdvp, &fvp, &fromnd->ni_cnd, fvap,
8390 tdvp, &tvp, &tond->ni_cnd, tvap,
8391 flags, ctx);
8392
8393 if (holding_mntlock) {
8394 /*
8395 * we can drop our serialization
8396 * lock now
8397 */
8398 mount_unlock_renames(locked_mp);
8399 mount_drop(locked_mp, 0);
8400 holding_mntlock = 0;
8401 }
8402 if (error) {
8403 if (error == EDATALESS) {
8404 /*
8405 * If we've been here before, something has gone
8406 * horribly wrong and we should just get out lest
8407 * we spiral around the drain forever.
8408 */
8409 if (flags & VFS_RENAME_DATALESS) {
8410 error = EIO;
8411 goto out1;
8412 }
8413
8414 /*
8415 * The object we're renaming is dataless (or has a
8416 * dataless descendent) and requires materialization
8417 * before the rename occurs. But we're holding the
8418 * mount point's rename lock, so it's not safe to
8419 * make the upcall.
8420 *
8421 * In this case, we release the lock, perform the
8422 * materialization, and start the whole thing over.
8423 */
8424 error = vnode_materialize_dataless_file(fvp,
8425 NAMESPACE_HANDLER_RENAME_OP);
8426
8427 if (error == 0) {
8428 /*
8429 * The next time around we need to tell the
8430 * file system that the materializtaion has
8431 * been performed.
8432 */
8433 flags |= VFS_RENAME_DATALESS;
8434 do_retry = 1;
8435 }
8436 goto out1;
8437 }
8438 if (error == EKEEPLOOKING) {
8439 if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) == 0) {
8440 if ((tond->ni_flag & NAMEI_CONTLOOKUP) == 0) {
8441 panic("EKEEPLOOKING without NAMEI_CONTLOOKUP on either ndp?");
8442 }
8443 }
8444
8445 fromnd->ni_vp = fvp;
8446 tond->ni_vp = tvp;
8447
8448 goto continue_lookup;
8449 }
8450
8451 /*
8452 * We may encounter a race in the VNOP where the destination didn't
8453 * exist when we did the namei, but it does by the time we go and
8454 * try to create the entry. In this case, we should re-drive this rename
8455 * call from the top again. Currently, only HFS bubbles out ERECYCLE,
8456 * but other filesystems susceptible to this race could return it, too.
8457 */
8458 if (error == ERECYCLE) {
8459 do_retry = 1;
8460 }
8461
8462 /*
8463 * For compound VNOPs, the authorization callback may return
8464 * ENOENT in case of racing hardlink lookups hitting the name
8465 * cache, redrive the lookup.
8466 */
8467 if (batched && error == ENOENT) {
8468 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
8469 do_retry = 1;
8470 retry_count += 1;
8471 }
8472 }
8473
8474 goto out1;
8475 }
8476
8477 /* call out to allow 3rd party notification of rename.
8478 * Ignore result of kauth_authorize_fileop call.
8479 */
8480 kauth_authorize_fileop(vfs_context_ucred(ctx),
8481 KAUTH_FILEOP_RENAME,
8482 (uintptr_t)from_name, (uintptr_t)to_name);
8483 if (flags & VFS_RENAME_SWAP) {
8484 kauth_authorize_fileop(vfs_context_ucred(ctx),
8485 KAUTH_FILEOP_RENAME,
8486 (uintptr_t)to_name, (uintptr_t)from_name);
8487 }
8488
8489 #if CONFIG_FSE
8490 if (from_name != NULL && to_name != NULL) {
8491 if (from_truncated || to_truncated) {
8492 // set it here since only the from_finfo gets reported up to user space
8493 from_finfo.mode |= FSE_TRUNCATED_PATH;
8494 }
8495
8496 if (tvap && tvp) {
8497 vnode_get_fse_info_from_vap(tvp, &to_finfo, tvap);
8498 }
8499 if (fvap) {
8500 vnode_get_fse_info_from_vap(fvp, &from_finfo, fvap);
8501 }
8502
8503 if (tvp) {
8504 add_fsevent(FSE_RENAME, ctx,
8505 FSE_ARG_STRING, from_len_no_firmlink, from_name_no_firmlink,
8506 FSE_ARG_FINFO, &from_finfo,
8507 FSE_ARG_STRING, to_len_no_firmlink, to_name_no_firmlink,
8508 FSE_ARG_FINFO, &to_finfo,
8509 FSE_ARG_DONE);
8510 if (flags & VFS_RENAME_SWAP) {
8511 /*
8512 * Strictly speaking, swap is the equivalent of
8513 * *three* renames. FSEvents clients should only take
8514 * the events as a hint, so we only bother reporting
8515 * two.
8516 */
8517 add_fsevent(FSE_RENAME, ctx,
8518 FSE_ARG_STRING, to_len_no_firmlink, to_name_no_firmlink,
8519 FSE_ARG_FINFO, &to_finfo,
8520 FSE_ARG_STRING, from_len_no_firmlink, from_name_no_firmlink,
8521 FSE_ARG_FINFO, &from_finfo,
8522 FSE_ARG_DONE);
8523 }
8524 } else {
8525 add_fsevent(FSE_RENAME, ctx,
8526 FSE_ARG_STRING, from_len_no_firmlink, from_name_no_firmlink,
8527 FSE_ARG_FINFO, &from_finfo,
8528 FSE_ARG_STRING, to_len_no_firmlink, to_name_no_firmlink,
8529 FSE_ARG_DONE);
8530 }
8531 }
8532 #endif /* CONFIG_FSE */
8533
8534 /*
8535 * update filesystem's mount point data
8536 */
8537 if (mntrename) {
8538 char *cp, *pathend, *mpname;
8539 char * tobuf;
8540 struct mount *mp;
8541 int maxlen;
8542 size_t len = 0;
8543
8544 mp = fvp->v_mountedhere;
8545
8546 if (vfs_busy(mp, LK_NOWAIT)) {
8547 error = EBUSY;
8548 goto out1;
8549 }
8550 MALLOC_ZONE(tobuf, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
8551
8552 if (UIO_SEG_IS_USER_SPACE(segflg)) {
8553 error = copyinstr(to, tobuf, MAXPATHLEN, &len);
8554 } else {
8555 error = copystr((void *)to, tobuf, MAXPATHLEN, &len);
8556 }
8557 if (!error) {
8558 /* find current mount point prefix */
8559 pathend = &mp->mnt_vfsstat.f_mntonname[0];
8560 for (cp = pathend; *cp != '\0'; ++cp) {
8561 if (*cp == '/') {
8562 pathend = cp + 1;
8563 }
8564 }
8565 /* find last component of target name */
8566 for (mpname = cp = tobuf; *cp != '\0'; ++cp) {
8567 if (*cp == '/') {
8568 mpname = cp + 1;
8569 }
8570 }
8571
8572 /* Update f_mntonname of sub mounts */
8573 vfs_iterate(0, rename_submounts_callback, (void *)mp);
8574
8575 /* append name to prefix */
8576 maxlen = MAXPATHLEN - (pathend - mp->mnt_vfsstat.f_mntonname);
8577 bzero(pathend, maxlen);
8578
8579 strlcpy(pathend, mpname, maxlen);
8580 }
8581 FREE_ZONE(tobuf, MAXPATHLEN, M_NAMEI);
8582
8583 vfs_unbusy(mp);
8584
8585 vfs_event_signal(NULL, VQ_UPDATE, (intptr_t)NULL);
8586 }
8587 /*
8588 * fix up name & parent pointers. note that we first
8589 * check that fvp has the same name/parent pointers it
8590 * had before the rename call... this is a 'weak' check
8591 * at best...
8592 *
8593 * XXX oparent and oname may not be set in the compound vnop case
8594 */
8595 if (batched || (oname == fvp->v_name && oparent == fvp->v_parent)) {
8596 int update_flags;
8597
8598 update_flags = VNODE_UPDATE_NAME;
8599
8600 if (fdvp != tdvp) {
8601 update_flags |= VNODE_UPDATE_PARENT;
8602 }
8603
8604 vnode_update_identity(fvp, tdvp, tond->ni_cnd.cn_nameptr, tond->ni_cnd.cn_namelen, tond->ni_cnd.cn_hash, update_flags);
8605 }
8606 out1:
8607 if (to_name != NULL) {
8608 RELEASE_PATH(to_name);
8609 to_name = NULL;
8610 }
8611 if (to_name_no_firmlink != NULL) {
8612 RELEASE_PATH(to_name_no_firmlink);
8613 to_name_no_firmlink = NULL;
8614 }
8615 if (from_name != NULL) {
8616 RELEASE_PATH(from_name);
8617 from_name = NULL;
8618 }
8619 if (from_name_no_firmlink != NULL) {
8620 RELEASE_PATH(from_name_no_firmlink);
8621 from_name_no_firmlink = NULL;
8622 }
8623 if (holding_mntlock) {
8624 mount_unlock_renames(locked_mp);
8625 mount_drop(locked_mp, 0);
8626 holding_mntlock = 0;
8627 }
8628 if (tdvp) {
8629 /*
8630 * nameidone has to happen before we vnode_put(tdvp)
8631 * since it may need to release the fs_nodelock on the tdvp
8632 */
8633 nameidone(tond);
8634
8635 if (tvp) {
8636 vnode_put(tvp);
8637 }
8638 vnode_put(tdvp);
8639 }
8640 if (fdvp) {
8641 /*
8642 * nameidone has to happen before we vnode_put(fdvp)
8643 * since it may need to release the fs_nodelock on the fdvp
8644 */
8645 nameidone(fromnd);
8646
8647 if (fvp) {
8648 vnode_put(fvp);
8649 }
8650 vnode_put(fdvp);
8651 }
8652
8653 /*
8654 * If things changed after we did the namei, then we will re-drive
8655 * this rename call from the top.
8656 */
8657 if (do_retry) {
8658 do_retry = 0;
8659 goto retry;
8660 }
8661
8662 FREE(__rename_data, M_TEMP);
8663 return error;
8664 }
8665
8666 int
8667 rename(__unused proc_t p, struct rename_args *uap, __unused int32_t *retval)
8668 {
8669 return renameat_internal(vfs_context_current(), AT_FDCWD, uap->from,
8670 AT_FDCWD, uap->to, UIO_USERSPACE, 0);
8671 }
8672
8673 int
8674 renameatx_np(__unused proc_t p, struct renameatx_np_args *uap, __unused int32_t *retval)
8675 {
8676 return renameat_internal(
8677 vfs_context_current(),
8678 uap->fromfd, uap->from,
8679 uap->tofd, uap->to,
8680 UIO_USERSPACE, uap->flags);
8681 }
8682
8683 int
8684 renameat(__unused proc_t p, struct renameat_args *uap, __unused int32_t *retval)
8685 {
8686 return renameat_internal(vfs_context_current(), uap->fromfd, uap->from,
8687 uap->tofd, uap->to, UIO_USERSPACE, 0);
8688 }
8689
8690 /*
8691 * Make a directory file.
8692 *
8693 * Returns: 0 Success
8694 * EEXIST
8695 * namei:???
8696 * vnode_authorize:???
8697 * vn_create:???
8698 */
8699 /* ARGSUSED */
8700 static int
8701 mkdir1at(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap, int fd,
8702 enum uio_seg segflg)
8703 {
8704 vnode_t vp, dvp;
8705 int error;
8706 int update_flags = 0;
8707 int batched;
8708 struct nameidata nd;
8709
8710 AUDIT_ARG(mode, vap->va_mode);
8711 NDINIT(&nd, CREATE, OP_MKDIR, LOCKPARENT | AUDITVNPATH1, segflg,
8712 path, ctx);
8713 nd.ni_cnd.cn_flags |= WILLBEDIR;
8714 nd.ni_flag = NAMEI_COMPOUNDMKDIR;
8715
8716 continue_lookup:
8717 error = nameiat(&nd, fd);
8718 if (error) {
8719 return error;
8720 }
8721 dvp = nd.ni_dvp;
8722 vp = nd.ni_vp;
8723
8724 if (vp != NULL) {
8725 error = EEXIST;
8726 goto out;
8727 }
8728
8729 batched = vnode_compound_mkdir_available(dvp);
8730
8731 VATTR_SET(vap, va_type, VDIR);
8732
8733 /*
8734 * XXX
8735 * Don't authorize in VFS for compound VNOP.... mkdir -p today assumes that it will
8736 * only get EXISTS or EISDIR for existing path components, and not that it could see
8737 * EACCESS/EPERM--so if we authorize for mkdir on "/" for "mkdir -p /tmp/foo/bar/baz"
8738 * it will fail in a spurious manner. Need to figure out if this is valid behavior.
8739 */
8740 if ((error = vn_authorize_mkdir(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0) {
8741 if (error == EACCES || error == EPERM) {
8742 int error2;
8743
8744 nameidone(&nd);
8745 vnode_put(dvp);
8746 dvp = NULLVP;
8747
8748 /*
8749 * Try a lookup without "NAMEI_COMPOUNDVNOP" to make sure we return EEXIST
8750 * rather than EACCESS if the target exists.
8751 */
8752 NDINIT(&nd, LOOKUP, OP_MKDIR, AUDITVNPATH1, segflg,
8753 path, ctx);
8754 error2 = nameiat(&nd, fd);
8755 if (error2) {
8756 goto out;
8757 } else {
8758 vp = nd.ni_vp;
8759 error = EEXIST;
8760 goto out;
8761 }
8762 }
8763
8764 goto out;
8765 }
8766
8767 /*
8768 * make the directory
8769 */
8770 if ((error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx)) != 0) {
8771 if (error == EKEEPLOOKING) {
8772 nd.ni_vp = vp;
8773 goto continue_lookup;
8774 }
8775
8776 goto out;
8777 }
8778
8779 // Make sure the name & parent pointers are hooked up
8780 if (vp->v_name == NULL) {
8781 update_flags |= VNODE_UPDATE_NAME;
8782 }
8783 if (vp->v_parent == NULLVP) {
8784 update_flags |= VNODE_UPDATE_PARENT;
8785 }
8786
8787 if (update_flags) {
8788 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
8789 }
8790
8791 #if CONFIG_FSE
8792 add_fsevent(FSE_CREATE_DIR, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE);
8793 #endif
8794
8795 out:
8796 /*
8797 * nameidone has to happen before we vnode_put(dvp)
8798 * since it may need to release the fs_nodelock on the dvp
8799 */
8800 nameidone(&nd);
8801
8802 if (vp) {
8803 vnode_put(vp);
8804 }
8805 if (dvp) {
8806 vnode_put(dvp);
8807 }
8808
8809 return error;
8810 }
8811
8812 /*
8813 * mkdir_extended: Create a directory; with extended security (ACL).
8814 *
8815 * Parameters: p Process requesting to create the directory
8816 * uap User argument descriptor (see below)
8817 * retval (ignored)
8818 *
8819 * Indirect: uap->path Path of directory to create
8820 * uap->mode Access permissions to set
8821 * uap->xsecurity ACL to set
8822 *
8823 * Returns: 0 Success
8824 * !0 Not success
8825 *
8826 */
8827 int
8828 mkdir_extended(proc_t p, struct mkdir_extended_args *uap, __unused int32_t *retval)
8829 {
8830 int ciferror;
8831 kauth_filesec_t xsecdst;
8832 struct vnode_attr va;
8833
8834 AUDIT_ARG(owner, uap->uid, uap->gid);
8835
8836 xsecdst = NULL;
8837 if ((uap->xsecurity != USER_ADDR_NULL) &&
8838 ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)) {
8839 return ciferror;
8840 }
8841
8842 VATTR_INIT(&va);
8843 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
8844 if (xsecdst != NULL) {
8845 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
8846 }
8847
8848 ciferror = mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD,
8849 UIO_USERSPACE);
8850 if (xsecdst != NULL) {
8851 kauth_filesec_free(xsecdst);
8852 }
8853 return ciferror;
8854 }
8855
8856 int
8857 mkdir(proc_t p, struct mkdir_args *uap, __unused int32_t *retval)
8858 {
8859 struct vnode_attr va;
8860
8861 VATTR_INIT(&va);
8862 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
8863
8864 return mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD,
8865 UIO_USERSPACE);
8866 }
8867
8868 int
8869 mkdirat(proc_t p, struct mkdirat_args *uap, __unused int32_t *retval)
8870 {
8871 struct vnode_attr va;
8872
8873 VATTR_INIT(&va);
8874 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
8875
8876 return mkdir1at(vfs_context_current(), uap->path, &va, uap->fd,
8877 UIO_USERSPACE);
8878 }
8879
8880 static int
8881 rmdirat_internal(vfs_context_t ctx, int fd, user_addr_t dirpath,
8882 enum uio_seg segflg, int unlink_flags)
8883 {
8884 vnode_t vp, dvp;
8885 int error;
8886 struct nameidata nd;
8887 char *path = NULL;
8888 char *no_firmlink_path = NULL;
8889 int len_path = 0;
8890 int len_no_firmlink_path = 0;
8891 int has_listeners = 0;
8892 int need_event = 0;
8893 int truncated_path = 0;
8894 int truncated_no_firmlink_path = 0;
8895 #if CONFIG_FSE
8896 struct vnode_attr va;
8897 #endif /* CONFIG_FSE */
8898 struct vnode_attr *vap = NULL;
8899 int restart_count = 0;
8900 int batched;
8901
8902 int restart_flag;
8903
8904 /*
8905 * This loop exists to restart rmdir in the unlikely case that two
8906 * processes are simultaneously trying to remove the same directory
8907 * containing orphaned appleDouble files.
8908 */
8909 do {
8910 NDINIT(&nd, DELETE, OP_RMDIR, LOCKPARENT | AUDITVNPATH1,
8911 segflg, dirpath, ctx);
8912 nd.ni_flag = NAMEI_COMPOUNDRMDIR;
8913 continue_lookup:
8914 restart_flag = 0;
8915 vap = NULL;
8916
8917 error = nameiat(&nd, fd);
8918 if (error) {
8919 return error;
8920 }
8921
8922 dvp = nd.ni_dvp;
8923 vp = nd.ni_vp;
8924
8925 if (vp) {
8926 batched = vnode_compound_rmdir_available(vp);
8927
8928 if (vp->v_flag & VROOT) {
8929 /*
8930 * The root of a mounted filesystem cannot be deleted.
8931 */
8932 error = EBUSY;
8933 goto out;
8934 }
8935
8936 #if DEVELOPMENT || DEBUG
8937 /*
8938 * XXX VSWAP: Check for entitlements or special flag here
8939 * so we can restrict access appropriately.
8940 */
8941 #else /* DEVELOPMENT || DEBUG */
8942
8943 if (vnode_isswap(vp) && (ctx != vfs_context_kernel())) {
8944 error = EPERM;
8945 goto out;
8946 }
8947 #endif /* DEVELOPMENT || DEBUG */
8948
8949 /*
8950 * Removed a check here; we used to abort if vp's vid
8951 * was not the same as what we'd seen the last time around.
8952 * I do not think that check was valid, because if we retry
8953 * and all dirents are gone, the directory could legitimately
8954 * be recycled but still be present in a situation where we would
8955 * have had permission to delete. Therefore, we won't make
8956 * an effort to preserve that check now that we may not have a
8957 * vp here.
8958 */
8959
8960 if (!batched) {
8961 error = vn_authorize_rmdir(dvp, vp, &nd.ni_cnd, ctx, NULL);
8962 if (error) {
8963 if (error == ENOENT) {
8964 if (restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
8965 restart_flag = 1;
8966 restart_count += 1;
8967 }
8968 }
8969 goto out;
8970 }
8971 }
8972 } else {
8973 batched = 1;
8974
8975 if (!vnode_compound_rmdir_available(dvp)) {
8976 panic("No error, but no compound rmdir?");
8977 }
8978 }
8979
8980 #if CONFIG_FSE
8981 fse_info finfo;
8982
8983 need_event = need_fsevent(FSE_DELETE, dvp);
8984 if (need_event) {
8985 if (!batched) {
8986 get_fse_info(vp, &finfo, ctx);
8987 } else {
8988 error = vfs_get_notify_attributes(&va);
8989 if (error) {
8990 goto out;
8991 }
8992
8993 vap = &va;
8994 }
8995 }
8996 #endif
8997 has_listeners = kauth_authorize_fileop_has_listeners();
8998 if (need_event || has_listeners) {
8999 if (path == NULL) {
9000 GET_PATH(path);
9001 if (path == NULL) {
9002 error = ENOMEM;
9003 goto out;
9004 }
9005 }
9006
9007 len_path = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated_path);
9008
9009 if (no_firmlink_path == NULL) {
9010 GET_PATH(no_firmlink_path);
9011 if (no_firmlink_path == NULL) {
9012 error = ENOMEM;
9013 goto out;
9014 }
9015 }
9016
9017 len_no_firmlink_path = safe_getpath_no_firmlink(dvp, nd.ni_cnd.cn_nameptr, no_firmlink_path, MAXPATHLEN, &truncated_no_firmlink_path);
9018 #if CONFIG_FSE
9019 if (truncated_no_firmlink_path) {
9020 finfo.mode |= FSE_TRUNCATED_PATH;
9021 }
9022 #endif
9023 }
9024
9025 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
9026 nd.ni_vp = vp;
9027 if (vp == NULLVP) {
9028 /* Couldn't find a vnode */
9029 goto out;
9030 }
9031
9032 if (error == EKEEPLOOKING) {
9033 goto continue_lookup;
9034 } else if (batched && error == ENOENT) {
9035 if (restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
9036 /*
9037 * For compound VNOPs, the authorization callback
9038 * may return ENOENT in case of racing hard link lookups
9039 * redrive the lookup.
9040 */
9041 restart_flag = 1;
9042 restart_count += 1;
9043 goto out;
9044 }
9045 }
9046
9047 /*
9048 * XXX There's no provision for passing flags
9049 * to VNOP_RMDIR(). So, if vn_rmdir() fails
9050 * because it's not empty, then we try again
9051 * with VNOP_REMOVE(), passing in a special
9052 * flag that clever file systems will know
9053 * how to handle.
9054 */
9055 if (error == ENOTEMPTY &&
9056 (unlink_flags & VNODE_REMOVE_DATALESS_DIR) != 0) {
9057 /*
9058 * If this fails, we want to keep the original
9059 * error.
9060 */
9061 if (vn_remove(dvp, &vp, &nd,
9062 VNODE_REMOVE_DATALESS_DIR, vap, ctx) == 0) {
9063 error = 0;
9064 }
9065 }
9066
9067 #if CONFIG_APPLEDOUBLE
9068 /*
9069 * Special case to remove orphaned AppleDouble
9070 * files. I don't like putting this in the kernel,
9071 * but carbon does not like putting this in carbon either,
9072 * so here we are.
9073 */
9074 if (error == ENOTEMPTY) {
9075 int ad_error = rmdir_remove_orphaned_appleDouble(vp, ctx, &restart_flag);
9076 if (ad_error == EBUSY) {
9077 error = ad_error;
9078 goto out;
9079 }
9080
9081
9082 /*
9083 * Assuming everything went well, we will try the RMDIR again
9084 */
9085 if (!ad_error) {
9086 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
9087 }
9088 }
9089 #endif /* CONFIG_APPLEDOUBLE */
9090 /*
9091 * Call out to allow 3rd party notification of delete.
9092 * Ignore result of kauth_authorize_fileop call.
9093 */
9094 if (!error) {
9095 if (has_listeners) {
9096 kauth_authorize_fileop(vfs_context_ucred(ctx),
9097 KAUTH_FILEOP_DELETE,
9098 (uintptr_t)vp,
9099 (uintptr_t)path);
9100 }
9101
9102 if (vp->v_flag & VISHARDLINK) {
9103 // see the comment in unlink1() about why we update
9104 // the parent of a hard link when it is removed
9105 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
9106 }
9107
9108 #if CONFIG_FSE
9109 if (need_event) {
9110 if (vap) {
9111 vnode_get_fse_info_from_vap(vp, &finfo, vap);
9112 }
9113 add_fsevent(FSE_DELETE, ctx,
9114 FSE_ARG_STRING, len_no_firmlink_path, no_firmlink_path,
9115 FSE_ARG_FINFO, &finfo,
9116 FSE_ARG_DONE);
9117 }
9118 #endif
9119 }
9120
9121 out:
9122 if (path != NULL) {
9123 RELEASE_PATH(path);
9124 path = NULL;
9125 }
9126
9127 if (no_firmlink_path != NULL) {
9128 RELEASE_PATH(no_firmlink_path);
9129 no_firmlink_path = NULL;
9130 }
9131
9132 /*
9133 * nameidone has to happen before we vnode_put(dvp)
9134 * since it may need to release the fs_nodelock on the dvp
9135 */
9136 nameidone(&nd);
9137 vnode_put(dvp);
9138
9139 if (vp) {
9140 vnode_put(vp);
9141 }
9142
9143 if (restart_flag == 0) {
9144 wakeup_one((caddr_t)vp);
9145 return error;
9146 }
9147 tsleep(vp, PVFS, "rm AD", 1);
9148 } while (restart_flag != 0);
9149
9150 return error;
9151 }
9152
9153 /*
9154 * Remove a directory file.
9155 */
9156 /* ARGSUSED */
9157 int
9158 rmdir(__unused proc_t p, struct rmdir_args *uap, __unused int32_t *retval)
9159 {
9160 return rmdirat_internal(vfs_context_current(), AT_FDCWD,
9161 CAST_USER_ADDR_T(uap->path), UIO_USERSPACE, 0);
9162 }
9163
9164 /* Get direntry length padded to 8 byte alignment */
9165 #define DIRENT64_LEN(namlen) \
9166 ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
9167
9168 /* Get dirent length padded to 4 byte alignment */
9169 #define DIRENT_LEN(namelen) \
9170 ((sizeof(struct dirent) + (namelen + 1) - (__DARWIN_MAXNAMLEN + 1) + 3) & ~3)
9171
9172 /* Get the end of this dirent */
9173 #define DIRENT_END(dep) \
9174 (((char *)(dep)) + (dep)->d_reclen - 1)
9175
9176 errno_t
9177 vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag,
9178 int *numdirent, vfs_context_t ctxp)
9179 {
9180 /* Check if fs natively supports VNODE_READDIR_EXTENDED */
9181 if ((vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED) &&
9182 ((vp->v_mount->mnt_kern_flag & MNTK_DENY_READDIREXT) == 0)) {
9183 return VNOP_READDIR(vp, uio, flags, eofflag, numdirent, ctxp);
9184 } else {
9185 size_t bufsize;
9186 void * bufptr;
9187 uio_t auio;
9188 struct direntry *entry64;
9189 struct dirent *dep;
9190 int bytesread;
9191 int error;
9192
9193 /*
9194 * We're here because the underlying file system does not
9195 * support direnties or we mounted denying support so we must
9196 * fall back to dirents and convert them to direntries.
9197 *
9198 * Our kernel buffer needs to be smaller since re-packing will
9199 * expand each dirent. The worse case (when the name length
9200 * is 3 or less) corresponds to a struct direntry size of 32
9201 * bytes (8-byte aligned) and a struct dirent size of 12 bytes
9202 * (4-byte aligned). So having a buffer that is 3/8 the size
9203 * will prevent us from reading more than we can pack.
9204 *
9205 * Since this buffer is wired memory, we will limit the
9206 * buffer size to a maximum of 32K. We would really like to
9207 * use 32K in the MIN(), but we use magic number 87371 to
9208 * prevent uio_resid() * 3 / 8 from overflowing.
9209 */
9210 bufsize = 3 * MIN((user_size_t)uio_resid(uio), 87371u) / 8;
9211 MALLOC(bufptr, void *, bufsize, M_TEMP, M_WAITOK);
9212 if (bufptr == NULL) {
9213 return ENOMEM;
9214 }
9215
9216 auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
9217 uio_addiov(auio, (uintptr_t)bufptr, bufsize);
9218 auio->uio_offset = uio->uio_offset;
9219
9220 error = VNOP_READDIR(vp, auio, 0, eofflag, numdirent, ctxp);
9221
9222 dep = (struct dirent *)bufptr;
9223 bytesread = bufsize - uio_resid(auio);
9224
9225 MALLOC(entry64, struct direntry *, sizeof(struct direntry),
9226 M_TEMP, M_WAITOK);
9227 /*
9228 * Convert all the entries and copy them out to user's buffer.
9229 */
9230 while (error == 0 && (char *)dep < ((char *)bufptr + bytesread)) {
9231 size_t enbufsize = DIRENT64_LEN(dep->d_namlen);
9232
9233 if (DIRENT_END(dep) > ((char *)bufptr + bytesread) ||
9234 DIRENT_LEN(dep->d_namlen) > dep->d_reclen) {
9235 printf("%s: %s: Bad dirent recived from directory %s\n", __func__,
9236 vp->v_mount->mnt_vfsstat.f_mntonname,
9237 vp->v_name ? vp->v_name : "<unknown>");
9238 error = EIO;
9239 break;
9240 }
9241
9242 bzero(entry64, enbufsize);
9243 /* Convert a dirent to a dirent64. */
9244 entry64->d_ino = dep->d_ino;
9245 entry64->d_seekoff = 0;
9246 entry64->d_reclen = enbufsize;
9247 entry64->d_namlen = dep->d_namlen;
9248 entry64->d_type = dep->d_type;
9249 bcopy(dep->d_name, entry64->d_name, dep->d_namlen + 1);
9250
9251 /* Move to next entry. */
9252 dep = (struct dirent *)((char *)dep + dep->d_reclen);
9253
9254 /* Copy entry64 to user's buffer. */
9255 error = uiomove((caddr_t)entry64, entry64->d_reclen, uio);
9256 }
9257
9258 /* Update the real offset using the offset we got from VNOP_READDIR. */
9259 if (error == 0) {
9260 uio->uio_offset = auio->uio_offset;
9261 }
9262 uio_free(auio);
9263 FREE(bufptr, M_TEMP);
9264 FREE(entry64, M_TEMP);
9265 return error;
9266 }
9267 }
9268
9269 #define GETDIRENTRIES_MAXBUFSIZE (128 * 1024 * 1024U)
9270
9271 /*
9272 * Read a block of directory entries in a file system independent format.
9273 */
9274 static int
9275 getdirentries_common(int fd, user_addr_t bufp, user_size_t bufsize, ssize_t *bytesread,
9276 off_t *offset, int *eofflag, int flags)
9277 {
9278 vnode_t vp;
9279 struct vfs_context context = *vfs_context_current(); /* local copy */
9280 struct fileproc *fp;
9281 uio_t auio;
9282 int spacetype = proc_is64bit(vfs_context_proc(&context)) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9283 off_t loff;
9284 int error, numdirent;
9285 char uio_buf[UIO_SIZEOF(1)];
9286
9287 error = fp_getfvp(vfs_context_proc(&context), fd, &fp, &vp);
9288 if (error) {
9289 return error;
9290 }
9291 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
9292 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
9293 error = EBADF;
9294 goto out;
9295 }
9296
9297 if (bufsize > GETDIRENTRIES_MAXBUFSIZE) {
9298 bufsize = GETDIRENTRIES_MAXBUFSIZE;
9299 }
9300
9301 #if CONFIG_MACF
9302 error = mac_file_check_change_offset(vfs_context_ucred(&context), fp->f_fglob);
9303 if (error) {
9304 goto out;
9305 }
9306 #endif
9307 if ((error = vnode_getwithref(vp))) {
9308 goto out;
9309 }
9310 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
9311
9312 unionread:
9313 if (vp->v_type != VDIR) {
9314 (void)vnode_put(vp);
9315 error = EINVAL;
9316 goto out;
9317 }
9318
9319 #if CONFIG_MACF
9320 error = mac_vnode_check_readdir(&context, vp);
9321 if (error != 0) {
9322 (void)vnode_put(vp);
9323 goto out;
9324 }
9325 #endif /* MAC */
9326
9327 loff = fp->f_fglob->fg_offset;
9328 auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
9329 uio_addiov(auio, bufp, bufsize);
9330
9331 if (flags & VNODE_READDIR_EXTENDED) {
9332 error = vnode_readdir64(vp, auio, flags, eofflag, &numdirent, &context);
9333 fp->f_fglob->fg_offset = uio_offset(auio);
9334 } else {
9335 error = VNOP_READDIR(vp, auio, 0, eofflag, &numdirent, &context);
9336 fp->f_fglob->fg_offset = uio_offset(auio);
9337 }
9338 if (error) {
9339 (void)vnode_put(vp);
9340 goto out;
9341 }
9342
9343 if ((user_ssize_t)bufsize == uio_resid(auio)) {
9344 if (union_dircheckp) {
9345 error = union_dircheckp(&vp, fp, &context);
9346 if (error == -1) {
9347 goto unionread;
9348 }
9349 if (error) {
9350 (void)vnode_put(vp);
9351 goto out;
9352 }
9353 }
9354
9355 if ((vp->v_mount->mnt_flag & MNT_UNION)) {
9356 struct vnode *tvp = vp;
9357 if (lookup_traverse_union(tvp, &vp, &context) == 0) {
9358 vnode_ref(vp);
9359 fp->f_fglob->fg_data = (caddr_t) vp;
9360 fp->f_fglob->fg_offset = 0;
9361 vnode_rele(tvp);
9362 vnode_put(tvp);
9363 goto unionread;
9364 }
9365 vp = tvp;
9366 }
9367 }
9368
9369 vnode_put(vp);
9370 if (offset) {
9371 *offset = loff;
9372 }
9373
9374 *bytesread = bufsize - uio_resid(auio);
9375 out:
9376 file_drop(fd);
9377 return error;
9378 }
9379
9380
9381 int
9382 getdirentries(__unused struct proc *p, struct getdirentries_args *uap, int32_t *retval)
9383 {
9384 off_t offset;
9385 ssize_t bytesread;
9386 int error, eofflag;
9387
9388 AUDIT_ARG(fd, uap->fd);
9389 error = getdirentries_common(uap->fd, uap->buf, uap->count,
9390 &bytesread, &offset, &eofflag, 0);
9391
9392 if (error == 0) {
9393 if (proc_is64bit(p)) {
9394 user64_long_t base = (user64_long_t)offset;
9395 error = copyout((caddr_t)&base, uap->basep, sizeof(user64_long_t));
9396 } else {
9397 user32_long_t base = (user32_long_t)offset;
9398 error = copyout((caddr_t)&base, uap->basep, sizeof(user32_long_t));
9399 }
9400 *retval = bytesread;
9401 }
9402 return error;
9403 }
9404
9405 int
9406 getdirentries64(__unused struct proc *p, struct getdirentries64_args *uap, user_ssize_t *retval)
9407 {
9408 off_t offset;
9409 ssize_t bytesread;
9410 int error, eofflag;
9411 user_size_t bufsize;
9412
9413 AUDIT_ARG(fd, uap->fd);
9414
9415 /*
9416 * If the buffer is at least GETDIRENTRIES64_EXTENDED_BUFSIZE large,
9417 * then the kernel carves out the last 4 bytes to return extended
9418 * information to userspace (namely whether we reached EOF with this call).
9419 */
9420 if (uap->bufsize >= GETDIRENTRIES64_EXTENDED_BUFSIZE) {
9421 bufsize = uap->bufsize - sizeof(getdirentries64_flags_t);
9422 } else {
9423 bufsize = uap->bufsize;
9424 }
9425
9426 error = getdirentries_common(uap->fd, uap->buf, bufsize,
9427 &bytesread, &offset, &eofflag, VNODE_READDIR_EXTENDED);
9428
9429 if (error == 0) {
9430 *retval = bytesread;
9431 error = copyout((caddr_t)&offset, uap->position, sizeof(off_t));
9432
9433 if (error == 0 && uap->bufsize >= GETDIRENTRIES64_EXTENDED_BUFSIZE) {
9434 getdirentries64_flags_t flags = 0;
9435 if (eofflag) {
9436 flags |= GETDIRENTRIES64_EOF;
9437 }
9438 error = copyout(&flags, (user_addr_t)uap->buf + bufsize,
9439 sizeof(flags));
9440 }
9441 }
9442 return error;
9443 }
9444
9445
9446 /*
9447 * Set the mode mask for creation of filesystem nodes.
9448 * XXX implement xsecurity
9449 */
9450 #define UMASK_NOXSECURITY (void *)1 /* leave existing xsecurity alone */
9451 static int
9452 umask1(proc_t p, int newmask, __unused kauth_filesec_t fsec, int32_t *retval)
9453 {
9454 struct filedesc *fdp;
9455
9456 AUDIT_ARG(mask, newmask);
9457 proc_fdlock(p);
9458 fdp = p->p_fd;
9459 *retval = fdp->fd_cmask;
9460 fdp->fd_cmask = newmask & ALLPERMS;
9461 proc_fdunlock(p);
9462 return 0;
9463 }
9464
9465 /*
9466 * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL).
9467 *
9468 * Parameters: p Process requesting to set the umask
9469 * uap User argument descriptor (see below)
9470 * retval umask of the process (parameter p)
9471 *
9472 * Indirect: uap->newmask umask to set
9473 * uap->xsecurity ACL to set
9474 *
9475 * Returns: 0 Success
9476 * !0 Not success
9477 *
9478 */
9479 int
9480 umask_extended(proc_t p, struct umask_extended_args *uap, int32_t *retval)
9481 {
9482 int ciferror;
9483 kauth_filesec_t xsecdst;
9484
9485 xsecdst = KAUTH_FILESEC_NONE;
9486 if (uap->xsecurity != USER_ADDR_NULL) {
9487 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0) {
9488 return ciferror;
9489 }
9490 } else {
9491 xsecdst = KAUTH_FILESEC_NONE;
9492 }
9493
9494 ciferror = umask1(p, uap->newmask, xsecdst, retval);
9495
9496 if (xsecdst != KAUTH_FILESEC_NONE) {
9497 kauth_filesec_free(xsecdst);
9498 }
9499 return ciferror;
9500 }
9501
9502 int
9503 umask(proc_t p, struct umask_args *uap, int32_t *retval)
9504 {
9505 return umask1(p, uap->newmask, UMASK_NOXSECURITY, retval);
9506 }
9507
9508 /*
9509 * Void all references to file by ripping underlying filesystem
9510 * away from vnode.
9511 */
9512 /* ARGSUSED */
9513 int
9514 revoke(proc_t p, struct revoke_args *uap, __unused int32_t *retval)
9515 {
9516 vnode_t vp;
9517 struct vnode_attr va;
9518 vfs_context_t ctx = vfs_context_current();
9519 int error;
9520 struct nameidata nd;
9521
9522 NDINIT(&nd, LOOKUP, OP_REVOKE, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
9523 uap->path, ctx);
9524 error = namei(&nd);
9525 if (error) {
9526 return error;
9527 }
9528 vp = nd.ni_vp;
9529
9530 nameidone(&nd);
9531
9532 if (!(vnode_ischr(vp) || vnode_isblk(vp))) {
9533 error = ENOTSUP;
9534 goto out;
9535 }
9536
9537 if (vnode_isblk(vp) && vnode_ismountedon(vp)) {
9538 error = EBUSY;
9539 goto out;
9540 }
9541
9542 #if CONFIG_MACF
9543 error = mac_vnode_check_revoke(ctx, vp);
9544 if (error) {
9545 goto out;
9546 }
9547 #endif
9548
9549 VATTR_INIT(&va);
9550 VATTR_WANTED(&va, va_uid);
9551 if ((error = vnode_getattr(vp, &va, ctx))) {
9552 goto out;
9553 }
9554 if (kauth_cred_getuid(vfs_context_ucred(ctx)) != va.va_uid &&
9555 (error = suser(vfs_context_ucred(ctx), &p->p_acflag))) {
9556 goto out;
9557 }
9558 if (vp->v_usecount > 0 || (vnode_isaliased(vp))) {
9559 VNOP_REVOKE(vp, REVOKEALL, ctx);
9560 }
9561 out:
9562 vnode_put(vp);
9563 return error;
9564 }
9565
9566
9567 /*
9568 * HFS/HFS PlUS SPECIFIC SYSTEM CALLS
9569 * The following system calls are designed to support features
9570 * which are specific to the HFS & HFS Plus volume formats
9571 */
9572
9573
9574 /*
9575 * Obtain attribute information on objects in a directory while enumerating
9576 * the directory.
9577 */
9578 /* ARGSUSED */
9579 int
9580 getdirentriesattr(proc_t p, struct getdirentriesattr_args *uap, int32_t *retval)
9581 {
9582 vnode_t vp;
9583 struct fileproc *fp;
9584 uio_t auio = NULL;
9585 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9586 uint32_t count = 0, savecount = 0;
9587 uint32_t newstate = 0;
9588 int error, eofflag;
9589 uint32_t loff = 0;
9590 struct attrlist attributelist;
9591 vfs_context_t ctx = vfs_context_current();
9592 int fd = uap->fd;
9593 char uio_buf[UIO_SIZEOF(1)];
9594 kauth_action_t action;
9595
9596 AUDIT_ARG(fd, fd);
9597
9598 /* Get the attributes into kernel space */
9599 if ((error = copyin(uap->alist, (caddr_t)&attributelist, sizeof(attributelist)))) {
9600 return error;
9601 }
9602 if ((error = copyin(uap->count, (caddr_t)&count, sizeof(count)))) {
9603 return error;
9604 }
9605 savecount = count;
9606 if ((error = fp_getfvp(p, fd, &fp, &vp))) {
9607 return error;
9608 }
9609 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
9610 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
9611 error = EBADF;
9612 goto out;
9613 }
9614
9615
9616 #if CONFIG_MACF
9617 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
9618 fp->f_fglob);
9619 if (error) {
9620 goto out;
9621 }
9622 #endif
9623
9624
9625 if ((error = vnode_getwithref(vp))) {
9626 goto out;
9627 }
9628
9629 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
9630
9631 unionread:
9632 if (vp->v_type != VDIR) {
9633 (void)vnode_put(vp);
9634 error = EINVAL;
9635 goto out;
9636 }
9637
9638 #if CONFIG_MACF
9639 error = mac_vnode_check_readdir(ctx, vp);
9640 if (error != 0) {
9641 (void)vnode_put(vp);
9642 goto out;
9643 }
9644 #endif /* MAC */
9645
9646 /* set up the uio structure which will contain the users return buffer */
9647 loff = fp->f_fglob->fg_offset;
9648 auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
9649 uio_addiov(auio, uap->buffer, uap->buffersize);
9650
9651 /*
9652 * If the only item requested is file names, we can let that past with
9653 * just LIST_DIRECTORY. If they want any other attributes, that means
9654 * they need SEARCH as well.
9655 */
9656 action = KAUTH_VNODE_LIST_DIRECTORY;
9657 if ((attributelist.commonattr & ~ATTR_CMN_NAME) ||
9658 attributelist.fileattr || attributelist.dirattr) {
9659 action |= KAUTH_VNODE_SEARCH;
9660 }
9661
9662 if ((error = vnode_authorize(vp, NULL, action, ctx)) == 0) {
9663 /* Believe it or not, uap->options only has 32-bits of valid
9664 * info, so truncate before extending again */
9665
9666 error = VNOP_READDIRATTR(vp, &attributelist, auio, count,
9667 (u_long)(uint32_t)uap->options, &newstate, &eofflag, &count, ctx);
9668 }
9669
9670 if (error) {
9671 (void) vnode_put(vp);
9672 goto out;
9673 }
9674
9675 /*
9676 * If we've got the last entry of a directory in a union mount
9677 * then reset the eofflag and pretend there's still more to come.
9678 * The next call will again set eofflag and the buffer will be empty,
9679 * so traverse to the underlying directory and do the directory
9680 * read there.
9681 */
9682 if (eofflag && vp->v_mount->mnt_flag & MNT_UNION) {
9683 if (uio_resid(auio) < (user_ssize_t) uap->buffersize) { // Got some entries
9684 eofflag = 0;
9685 } else { // Empty buffer
9686 struct vnode *tvp = vp;
9687 if (lookup_traverse_union(tvp, &vp, ctx) == 0) {
9688 vnode_ref_ext(vp, fp->f_fglob->fg_flag & O_EVTONLY, 0);
9689 fp->f_fglob->fg_data = (caddr_t) vp;
9690 fp->f_fglob->fg_offset = 0; // reset index for new dir
9691 count = savecount;
9692 vnode_rele_internal(tvp, fp->f_fglob->fg_flag & O_EVTONLY, 0, 0);
9693 vnode_put(tvp);
9694 goto unionread;
9695 }
9696 vp = tvp;
9697 }
9698 }
9699
9700 (void)vnode_put(vp);
9701
9702 if (error) {
9703 goto out;
9704 }
9705 fp->f_fglob->fg_offset = uio_offset(auio); /* should be multiple of dirent, not variable */
9706
9707 if ((error = copyout((caddr_t) &count, uap->count, sizeof(count)))) {
9708 goto out;
9709 }
9710 if ((error = copyout((caddr_t) &newstate, uap->newstate, sizeof(newstate)))) {
9711 goto out;
9712 }
9713 if ((error = copyout((caddr_t) &loff, uap->basep, sizeof(loff)))) {
9714 goto out;
9715 }
9716
9717 *retval = eofflag; /* similar to getdirentries */
9718 error = 0;
9719 out:
9720 file_drop(fd);
9721 return error; /* return error earlier, an retval of 0 or 1 now */
9722 } /* end of getdirentriesattr system call */
9723
9724 /*
9725 * Exchange data between two files
9726 */
9727
9728 /* ARGSUSED */
9729 int
9730 exchangedata(__unused proc_t p, struct exchangedata_args *uap, __unused int32_t *retval)
9731 {
9732 struct nameidata fnd, snd;
9733 vfs_context_t ctx = vfs_context_current();
9734 vnode_t fvp;
9735 vnode_t svp;
9736 int error;
9737 u_int32_t nameiflags;
9738 char *fpath = NULL;
9739 char *spath = NULL;
9740 int flen = 0, slen = 0;
9741 int from_truncated = 0, to_truncated = 0;
9742 #if CONFIG_FSE
9743 fse_info f_finfo, s_finfo;
9744 #endif
9745
9746 nameiflags = 0;
9747 if ((uap->options & FSOPT_NOFOLLOW) == 0) {
9748 nameiflags |= FOLLOW;
9749 }
9750
9751 NDINIT(&fnd, LOOKUP, OP_EXCHANGEDATA, nameiflags | AUDITVNPATH1,
9752 UIO_USERSPACE, uap->path1, ctx);
9753
9754 error = namei(&fnd);
9755 if (error) {
9756 goto out2;
9757 }
9758
9759 nameidone(&fnd);
9760 fvp = fnd.ni_vp;
9761
9762 NDINIT(&snd, LOOKUP, OP_EXCHANGEDATA, CN_NBMOUNTLOOK | nameiflags | AUDITVNPATH2,
9763 UIO_USERSPACE, uap->path2, ctx);
9764
9765 error = namei(&snd);
9766 if (error) {
9767 vnode_put(fvp);
9768 goto out2;
9769 }
9770 nameidone(&snd);
9771 svp = snd.ni_vp;
9772
9773 /*
9774 * if the files are the same, return an inval error
9775 */
9776 if (svp == fvp) {
9777 error = EINVAL;
9778 goto out;
9779 }
9780
9781 /*
9782 * if the files are on different volumes, return an error
9783 */
9784 if (svp->v_mount != fvp->v_mount) {
9785 error = EXDEV;
9786 goto out;
9787 }
9788
9789 /* If they're not files, return an error */
9790 if ((vnode_isreg(fvp) == 0) || (vnode_isreg(svp) == 0)) {
9791 error = EINVAL;
9792 goto out;
9793 }
9794
9795 #if CONFIG_MACF
9796 error = mac_vnode_check_exchangedata(ctx,
9797 fvp, svp);
9798 if (error) {
9799 goto out;
9800 }
9801 #endif
9802 if (((error = vnode_authorize(fvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0) ||
9803 ((error = vnode_authorize(svp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0)) {
9804 goto out;
9805 }
9806
9807 if (
9808 #if CONFIG_FSE
9809 need_fsevent(FSE_EXCHANGE, fvp) ||
9810 #endif
9811 kauth_authorize_fileop_has_listeners()) {
9812 GET_PATH(fpath);
9813 GET_PATH(spath);
9814 if (fpath == NULL || spath == NULL) {
9815 error = ENOMEM;
9816 goto out;
9817 }
9818
9819 flen = safe_getpath(fvp, NULL, fpath, MAXPATHLEN, &from_truncated);
9820 slen = safe_getpath(svp, NULL, spath, MAXPATHLEN, &to_truncated);
9821
9822 #if CONFIG_FSE
9823 get_fse_info(fvp, &f_finfo, ctx);
9824 get_fse_info(svp, &s_finfo, ctx);
9825 if (from_truncated || to_truncated) {
9826 // set it here since only the f_finfo gets reported up to user space
9827 f_finfo.mode |= FSE_TRUNCATED_PATH;
9828 }
9829 #endif
9830 }
9831 /* Ok, make the call */
9832 error = VNOP_EXCHANGE(fvp, svp, 0, ctx);
9833
9834 if (error == 0) {
9835 const char *tmpname;
9836
9837 if (fpath != NULL && spath != NULL) {
9838 /* call out to allow 3rd party notification of exchangedata.
9839 * Ignore result of kauth_authorize_fileop call.
9840 */
9841 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_EXCHANGE,
9842 (uintptr_t)fpath, (uintptr_t)spath);
9843 }
9844 name_cache_lock();
9845
9846 tmpname = fvp->v_name;
9847 fvp->v_name = svp->v_name;
9848 svp->v_name = tmpname;
9849
9850 if (fvp->v_parent != svp->v_parent) {
9851 vnode_t tmp;
9852
9853 tmp = fvp->v_parent;
9854 fvp->v_parent = svp->v_parent;
9855 svp->v_parent = tmp;
9856 }
9857 name_cache_unlock();
9858
9859 #if CONFIG_FSE
9860 if (fpath != NULL && spath != NULL) {
9861 add_fsevent(FSE_EXCHANGE, ctx,
9862 FSE_ARG_STRING, flen, fpath,
9863 FSE_ARG_FINFO, &f_finfo,
9864 FSE_ARG_STRING, slen, spath,
9865 FSE_ARG_FINFO, &s_finfo,
9866 FSE_ARG_DONE);
9867 }
9868 #endif
9869 }
9870
9871 out:
9872 if (fpath != NULL) {
9873 RELEASE_PATH(fpath);
9874 }
9875 if (spath != NULL) {
9876 RELEASE_PATH(spath);
9877 }
9878 vnode_put(svp);
9879 vnode_put(fvp);
9880 out2:
9881 return error;
9882 }
9883
9884 /*
9885 * Return (in MB) the amount of freespace on the given vnode's volume.
9886 */
9887 uint32_t freespace_mb(vnode_t vp);
9888
9889 uint32_t
9890 freespace_mb(vnode_t vp)
9891 {
9892 vfs_update_vfsstat(vp->v_mount, vfs_context_current(), VFS_USER_EVENT);
9893 return ((uint64_t)vp->v_mount->mnt_vfsstat.f_bavail *
9894 vp->v_mount->mnt_vfsstat.f_bsize) >> 20;
9895 }
9896
9897 #if CONFIG_SEARCHFS
9898
9899 /* ARGSUSED */
9900
9901 int
9902 searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval)
9903 {
9904 vnode_t vp, tvp;
9905 int i, error = 0;
9906 int fserror = 0;
9907 struct nameidata nd;
9908 struct user64_fssearchblock searchblock;
9909 struct searchstate *state;
9910 struct attrlist *returnattrs;
9911 struct timeval timelimit;
9912 void *searchparams1, *searchparams2;
9913 uio_t auio = NULL;
9914 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9915 uint32_t nummatches;
9916 int mallocsize;
9917 uint32_t nameiflags;
9918 vfs_context_t ctx = vfs_context_current();
9919 char uio_buf[UIO_SIZEOF(1)];
9920
9921 /* Start by copying in fsearchblock parameter list */
9922 if (IS_64BIT_PROCESS(p)) {
9923 error = copyin(uap->searchblock, (caddr_t) &searchblock, sizeof(searchblock));
9924 timelimit.tv_sec = searchblock.timelimit.tv_sec;
9925 timelimit.tv_usec = searchblock.timelimit.tv_usec;
9926 } else {
9927 struct user32_fssearchblock tmp_searchblock;
9928
9929 error = copyin(uap->searchblock, (caddr_t) &tmp_searchblock, sizeof(tmp_searchblock));
9930 // munge into 64-bit version
9931 searchblock.returnattrs = CAST_USER_ADDR_T(tmp_searchblock.returnattrs);
9932 searchblock.returnbuffer = CAST_USER_ADDR_T(tmp_searchblock.returnbuffer);
9933 searchblock.returnbuffersize = tmp_searchblock.returnbuffersize;
9934 searchblock.maxmatches = tmp_searchblock.maxmatches;
9935 /*
9936 * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary
9937 * from a 32 bit long, and tv_usec is already a signed 32 bit int.
9938 */
9939 timelimit.tv_sec = (__darwin_time_t) tmp_searchblock.timelimit.tv_sec;
9940 timelimit.tv_usec = (__darwin_useconds_t) tmp_searchblock.timelimit.tv_usec;
9941 searchblock.searchparams1 = CAST_USER_ADDR_T(tmp_searchblock.searchparams1);
9942 searchblock.sizeofsearchparams1 = tmp_searchblock.sizeofsearchparams1;
9943 searchblock.searchparams2 = CAST_USER_ADDR_T(tmp_searchblock.searchparams2);
9944 searchblock.sizeofsearchparams2 = tmp_searchblock.sizeofsearchparams2;
9945 searchblock.searchattrs = tmp_searchblock.searchattrs;
9946 }
9947 if (error) {
9948 return error;
9949 }
9950
9951 /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
9952 */
9953 if (searchblock.sizeofsearchparams1 > SEARCHFS_MAX_SEARCHPARMS ||
9954 searchblock.sizeofsearchparams2 > SEARCHFS_MAX_SEARCHPARMS) {
9955 return EINVAL;
9956 }
9957
9958 /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
9959 /* It all has to do into local memory and it's not that big so we might as well put it all together. */
9960 /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
9961 /* block. */
9962 /* */
9963 /* NOTE: we allocate an extra 8 bytes to account for the difference in size of the searchstate */
9964 /* due to the changes in rdar://problem/12438273. That way if a 3rd party file system */
9965 /* assumes the size is still 556 bytes it will continue to work */
9966
9967 mallocsize = searchblock.sizeofsearchparams1 + searchblock.sizeofsearchparams2 +
9968 sizeof(struct attrlist) + sizeof(struct searchstate) + (2 * sizeof(uint32_t));
9969
9970 MALLOC(searchparams1, void *, mallocsize, M_TEMP, M_WAITOK);
9971
9972 /* Now set up the various pointers to the correct place in our newly allocated memory */
9973
9974 searchparams2 = (void *) (((caddr_t) searchparams1) + searchblock.sizeofsearchparams1);
9975 returnattrs = (struct attrlist *) (((caddr_t) searchparams2) + searchblock.sizeofsearchparams2);
9976 state = (struct searchstate *) (((caddr_t) returnattrs) + sizeof(struct attrlist));
9977
9978 /* Now copy in the stuff given our local variables. */
9979
9980 if ((error = copyin(searchblock.searchparams1, searchparams1, searchblock.sizeofsearchparams1))) {
9981 goto freeandexit;
9982 }
9983
9984 if ((error = copyin(searchblock.searchparams2, searchparams2, searchblock.sizeofsearchparams2))) {
9985 goto freeandexit;
9986 }
9987
9988 if ((error = copyin(searchblock.returnattrs, (caddr_t) returnattrs, sizeof(struct attrlist)))) {
9989 goto freeandexit;
9990 }
9991
9992 if ((error = copyin(uap->state, (caddr_t) state, sizeof(struct searchstate)))) {
9993 goto freeandexit;
9994 }
9995
9996 /*
9997 * When searching a union mount, need to set the
9998 * start flag at the first call on each layer to
9999 * reset state for the new volume.
10000 */
10001 if (uap->options & SRCHFS_START) {
10002 state->ss_union_layer = 0;
10003 } else {
10004 uap->options |= state->ss_union_flags;
10005 }
10006 state->ss_union_flags = 0;
10007
10008 /*
10009 * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter,
10010 * which is passed in with an attrreference_t, we need to inspect the buffer manually here.
10011 * The KPI does not provide us the ability to pass in the length of the buffers searchparams1
10012 * and searchparams2. To obviate the need for all searchfs-supporting filesystems to
10013 * validate the user-supplied data offset of the attrreference_t, we'll do it here.
10014 */
10015
10016 if (searchblock.searchattrs.commonattr & ATTR_CMN_NAME) {
10017 attrreference_t* string_ref;
10018 u_int32_t* start_length;
10019 user64_size_t param_length;
10020
10021 /* validate searchparams1 */
10022 param_length = searchblock.sizeofsearchparams1;
10023 /* skip the word that specifies length of the buffer */
10024 start_length = (u_int32_t*) searchparams1;
10025 start_length = start_length + 1;
10026 string_ref = (attrreference_t*) start_length;
10027
10028 /* ensure no negative offsets or too big offsets */
10029 if (string_ref->attr_dataoffset < 0) {
10030 error = EINVAL;
10031 goto freeandexit;
10032 }
10033 if (string_ref->attr_length > MAXPATHLEN) {
10034 error = EINVAL;
10035 goto freeandexit;
10036 }
10037
10038 /* Check for pointer overflow in the string ref */
10039 if (((char*) string_ref + string_ref->attr_dataoffset) < (char*) string_ref) {
10040 error = EINVAL;
10041 goto freeandexit;
10042 }
10043
10044 if (((char*) string_ref + string_ref->attr_dataoffset) > ((char*)searchparams1 + param_length)) {
10045 error = EINVAL;
10046 goto freeandexit;
10047 }
10048 if (((char*)string_ref + string_ref->attr_dataoffset + string_ref->attr_length) > ((char*)searchparams1 + param_length)) {
10049 error = EINVAL;
10050 goto freeandexit;
10051 }
10052 }
10053
10054 /* set up the uio structure which will contain the users return buffer */
10055 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
10056 uio_addiov(auio, searchblock.returnbuffer, searchblock.returnbuffersize);
10057
10058 nameiflags = 0;
10059 if ((uap->options & FSOPT_NOFOLLOW) == 0) {
10060 nameiflags |= FOLLOW;
10061 }
10062 NDINIT(&nd, LOOKUP, OP_SEARCHFS, nameiflags | AUDITVNPATH1,
10063 UIO_USERSPACE, uap->path, ctx);
10064
10065 error = namei(&nd);
10066 if (error) {
10067 goto freeandexit;
10068 }
10069 vp = nd.ni_vp;
10070 nameidone(&nd);
10071
10072 /*
10073 * Switch to the root vnode for the volume
10074 */
10075 error = VFS_ROOT(vnode_mount(vp), &tvp, ctx);
10076 vnode_put(vp);
10077 if (error) {
10078 goto freeandexit;
10079 }
10080 vp = tvp;
10081
10082 /*
10083 * If it's a union mount, the path lookup takes
10084 * us to the top layer. But we may need to descend
10085 * to a lower layer. For non-union mounts the layer
10086 * is always zero.
10087 */
10088 for (i = 0; i < (int) state->ss_union_layer; i++) {
10089 if ((vp->v_mount->mnt_flag & MNT_UNION) == 0) {
10090 break;
10091 }
10092 tvp = vp;
10093 vp = vp->v_mount->mnt_vnodecovered;
10094 if (vp == NULL) {
10095 vnode_put(tvp);
10096 error = ENOENT;
10097 goto freeandexit;
10098 }
10099 error = vnode_getwithref(vp);
10100 vnode_put(tvp);
10101 if (error) {
10102 goto freeandexit;
10103 }
10104 }
10105
10106 #if CONFIG_MACF
10107 error = mac_vnode_check_searchfs(ctx, vp, &searchblock.searchattrs);
10108 if (error) {
10109 vnode_put(vp);
10110 goto freeandexit;
10111 }
10112 #endif
10113
10114
10115 /*
10116 * If searchblock.maxmatches == 0, then skip the search. This has happened
10117 * before and sometimes the underlying code doesnt deal with it well.
10118 */
10119 if (searchblock.maxmatches == 0) {
10120 nummatches = 0;
10121 goto saveandexit;
10122 }
10123
10124 /*
10125 * Allright, we have everything we need, so lets make that call.
10126 *
10127 * We keep special track of the return value from the file system:
10128 * EAGAIN is an acceptable error condition that shouldn't keep us
10129 * from copying out any results...
10130 */
10131
10132 fserror = VNOP_SEARCHFS(vp,
10133 searchparams1,
10134 searchparams2,
10135 &searchblock.searchattrs,
10136 (u_long)searchblock.maxmatches,
10137 &timelimit,
10138 returnattrs,
10139 &nummatches,
10140 (u_long)uap->scriptcode,
10141 (u_long)uap->options,
10142 auio,
10143 (struct searchstate *) &state->ss_fsstate,
10144 ctx);
10145
10146 /*
10147 * If it's a union mount we need to be called again
10148 * to search the mounted-on filesystem.
10149 */
10150 if ((vp->v_mount->mnt_flag & MNT_UNION) && fserror == 0) {
10151 state->ss_union_flags = SRCHFS_START;
10152 state->ss_union_layer++; // search next layer down
10153 fserror = EAGAIN;
10154 }
10155
10156 saveandexit:
10157
10158 vnode_put(vp);
10159
10160 /* Now copy out the stuff that needs copying out. That means the number of matches, the
10161 * search state. Everything was already put into he return buffer by the vop call. */
10162
10163 if ((error = copyout((caddr_t) state, uap->state, sizeof(struct searchstate))) != 0) {
10164 goto freeandexit;
10165 }
10166
10167 if ((error = suulong(uap->nummatches, (uint64_t)nummatches)) != 0) {
10168 goto freeandexit;
10169 }
10170
10171 error = fserror;
10172
10173 freeandexit:
10174
10175 FREE(searchparams1, M_TEMP);
10176
10177 return error;
10178 } /* end of searchfs system call */
10179
10180 #else /* CONFIG_SEARCHFS */
10181
10182 int
10183 searchfs(__unused proc_t p, __unused struct searchfs_args *uap, __unused int32_t *retval)
10184 {
10185 return ENOTSUP;
10186 }
10187
10188 #endif /* CONFIG_SEARCHFS */
10189
10190
10191 #if CONFIG_DATALESS_FILES
10192
10193 /*
10194 * === Namespace Resolver Up-call Mechanism ===
10195 *
10196 * When I/O is performed to a dataless file or directory (read, write,
10197 * lookup-in, etc.), the file system performs an upcall to the namespace
10198 * resolver (filecoordinationd) to materialize the object.
10199 *
10200 * We need multiple up-calls to be in flight at once, and we need these
10201 * up-calls to be interruptible, thus the following implementation:
10202 *
10203 * => The nspace_resolver_request represents the in-kernel request state.
10204 * It contains a request ID, storage space for the errno code returned
10205 * by filecoordinationd, and flags.
10206 *
10207 * => The request ID is simply a global monotonically incrementing 32-bit
10208 * number. Outstanding requests are stored in a hash table, and the
10209 * hash function is extremely simple.
10210 *
10211 * => When an upcall is to be made to filecoordinationd, a request structure
10212 * is allocated on the stack (it is small, and needs to live only during
10213 * the duration of the call to resolve_nspace_item_ext()). It is
10214 * initialized and inserted into the table. Some backpressure from
10215 * filecoordinationd is applied by limiting the numnber of entries that
10216 * can be inserted into the table (and thus limiting the number of
10217 * outstanding requests issued to filecoordinationd); waiting for an
10218 * available slot is interruptible.
10219 *
10220 * => Once the request has been inserted into the table, the up-call is made
10221 * to filecoordinationd via a MiG-generated stub. The up-call returns
10222 * immediately and filecoordinationd processes the request asynchronously.
10223 *
10224 * => The caller now waits for the request to complete. Tnis is achieved by
10225 * sleeping on the address of the request structure and waiting for
10226 * filecoordinationd to mark the request structure as complete. This
10227 * is an interruptible sleep call; if interrupted, the request structure
10228 * is removed from the table and EINTR is returned to the caller. If
10229 * this occurs, an advisory up-call is made to filecoordinationd with
10230 * the request ID to indicate that the request can be aborted or
10231 * de-prioritized at the discretion of filecoordinationd.
10232 *
10233 * => When filecoordinationd has completed the request, it signals completion
10234 * by writing to the vfs.nspace.complete sysctl node. Only a process
10235 * decorated as a namespace resolver can write to this sysctl node. The
10236 * value is a request ID / errno tuple passed as an array of 2 uint32_t's.
10237 * The request ID is looked up in the table, and if the request is found,
10238 * the error code is stored in the request structure and a wakeup()
10239 * issued on the address of the request structure. If the request is not
10240 * found, we simply drop the completion notification, assuming that the
10241 * caller was interrupted.
10242 *
10243 * => When the waiting thread wakes up, it extracts the error code from the
10244 * request structure, removes the request from the table, and returns the
10245 * error code to the calling function. Fini!
10246 */
10247
10248 struct nspace_resolver_request {
10249 LIST_ENTRY(nspace_resolver_request) r_hashlink;
10250 uint32_t r_req_id;
10251 int r_resolver_error;
10252 int r_flags;
10253 };
10254
10255 #define RRF_COMPLETE 0x0001
10256
10257 static uint32_t
10258 next_nspace_req_id(void)
10259 {
10260 static uint32_t next_req_id;
10261
10262 return OSAddAtomic(1, &next_req_id);
10263 }
10264
10265 #define NSPACE_RESOLVER_REQ_HASHSIZE 32 /* XXX tune */
10266 #define NSPACE_RESOLVER_MAX_OUTSTANDING 256 /* XXX tune */
10267
10268 static LIST_HEAD(nspace_resolver_requesthead,
10269 nspace_resolver_request) * nspace_resolver_request_hashtbl;
10270 static u_long nspace_resolver_request_hashmask;
10271 static u_int nspace_resolver_request_count;
10272 static bool nspace_resolver_request_wait_slot;
10273 static lck_grp_t *nspace_resolver_request_lck_grp;
10274 static lck_mtx_t nspace_resolver_request_hash_mutex;
10275
10276 #define NSPACE_REQ_LOCK() \
10277 lck_mtx_lock(&nspace_resolver_request_hash_mutex)
10278 #define NSPACE_REQ_UNLOCK() \
10279 lck_mtx_unlock(&nspace_resolver_request_hash_mutex)
10280
10281 #define NSPACE_RESOLVER_HASH(req_id) \
10282 (&nspace_resolver_request_hashtbl[(req_id) & \
10283 nspace_resolver_request_hashmask])
10284
10285 static struct nspace_resolver_request *
10286 nspace_resolver_req_lookup(uint32_t req_id)
10287 {
10288 struct nspace_resolver_requesthead *bucket;
10289 struct nspace_resolver_request *req;
10290
10291 bucket = NSPACE_RESOLVER_HASH(req_id);
10292 LIST_FOREACH(req, bucket, r_hashlink) {
10293 if (req->r_req_id == req_id) {
10294 return req;
10295 }
10296 }
10297
10298 return NULL;
10299 }
10300
10301 static int
10302 nspace_resolver_req_add(struct nspace_resolver_request *req)
10303 {
10304 struct nspace_resolver_requesthead *bucket;
10305 int error;
10306
10307 while (nspace_resolver_request_count >=
10308 NSPACE_RESOLVER_MAX_OUTSTANDING) {
10309 nspace_resolver_request_wait_slot = true;
10310 error = msleep(&nspace_resolver_request_count,
10311 &nspace_resolver_request_hash_mutex,
10312 PVFS | PCATCH, "nspacerq", NULL);
10313 if (error) {
10314 return error;
10315 }
10316 }
10317
10318 bucket = NSPACE_RESOLVER_HASH(req->r_req_id);
10319 #if DIAGNOSTIC
10320 assert(nspace_resolver_req_lookup(req->r_req_id) == NULL);
10321 #endif /* DIAGNOSTIC */
10322 LIST_INSERT_HEAD(bucket, req, r_hashlink);
10323 nspace_resolver_request_count++;
10324
10325 return 0;
10326 }
10327
10328 static void
10329 nspace_resolver_req_remove(struct nspace_resolver_request *req)
10330 {
10331 struct nspace_resolver_requesthead *bucket;
10332
10333 bucket = NSPACE_RESOLVER_HASH(req->r_req_id);
10334 #if DIAGNOSTIC
10335 assert(nspace_resolver_req_lookup(req->r_req_id) != NULL);
10336 #endif /* DIAGNOSTIC */
10337 LIST_REMOVE(req, r_hashlink);
10338 nspace_resolver_request_count--;
10339
10340 if (nspace_resolver_request_wait_slot) {
10341 nspace_resolver_request_wait_slot = false;
10342 wakeup(&nspace_resolver_request_count);
10343 }
10344 }
10345
10346 static void
10347 nspace_resolver_req_cancel(uint32_t req_id)
10348 {
10349 kern_return_t kr;
10350 mach_port_t mp;
10351
10352 // Failures here aren't fatal -- the cancellation message
10353 // sent to the resolver is merely advisory.
10354
10355 kr = host_get_filecoordinationd_port(host_priv_self(), &mp);
10356 if (kr != KERN_SUCCESS || !IPC_PORT_VALID(mp)) {
10357 return;
10358 }
10359
10360 kr = send_nspace_resolve_cancel(mp, req_id);
10361 if (kr != KERN_SUCCESS) {
10362 os_log_error(OS_LOG_DEFAULT,
10363 "NSPACE send_nspace_resolve_cancel failure: %d", kr);
10364 }
10365
10366 ipc_port_release_send(mp);
10367 }
10368
10369 static int
10370 nspace_resolver_req_wait(struct nspace_resolver_request *req)
10371 {
10372 bool send_cancel_message = false;
10373 int error;
10374
10375 NSPACE_REQ_LOCK();
10376
10377 while ((req->r_flags & RRF_COMPLETE) == 0) {
10378 error = msleep(req, &nspace_resolver_request_hash_mutex,
10379 PVFS | PCATCH, "nspace", NULL);
10380 if (error && error != ERESTART) {
10381 req->r_resolver_error = (error == EINTR) ? EINTR :
10382 ETIMEDOUT;
10383 send_cancel_message = true;
10384 break;
10385 }
10386 }
10387
10388 nspace_resolver_req_remove(req);
10389
10390 NSPACE_REQ_UNLOCK();
10391
10392 if (send_cancel_message) {
10393 nspace_resolver_req_cancel(req->r_req_id);
10394 }
10395
10396 return req->r_resolver_error;
10397 }
10398
10399 static void
10400 nspace_resolver_req_mark_complete(
10401 struct nspace_resolver_request *req,
10402 int resolver_error)
10403 {
10404 req->r_resolver_error = resolver_error;
10405 req->r_flags |= RRF_COMPLETE;
10406 wakeup(req);
10407 }
10408
10409 static void
10410 nspace_resolver_req_completed(uint32_t req_id, int resolver_error)
10411 {
10412 struct nspace_resolver_request *req;
10413
10414 NSPACE_REQ_LOCK();
10415
10416 // If we don't find the request corresponding to our req_id,
10417 // just drop the completion signal on the floor; it's likely
10418 // that the requester interrupted with a signal.
10419
10420 req = nspace_resolver_req_lookup(req_id);
10421 if (req) {
10422 nspace_resolver_req_mark_complete(req, resolver_error);
10423 }
10424
10425 NSPACE_REQ_UNLOCK();
10426 }
10427
10428 static struct proc *nspace_resolver_proc;
10429
10430 static int
10431 nspace_resolver_get_proc_state(struct proc *p, int *is_resolver)
10432 {
10433 *is_resolver = ((p->p_lflag & P_LNSPACE_RESOLVER) &&
10434 p == nspace_resolver_proc) ? 1 : 0;
10435 return 0;
10436 }
10437
10438 static int
10439 nspace_resolver_set_proc_state(struct proc *p, int is_resolver)
10440 {
10441 vfs_context_t ctx = vfs_context_current();
10442 int error = 0;
10443
10444 //
10445 // The system filecoordinationd runs as uid == 0. This also
10446 // has the nice side-effect of filtering out filecoordinationd
10447 // running in the simulator.
10448 //
10449 if (!vfs_context_issuser(ctx)) {
10450 return EPERM;
10451 }
10452
10453 error = priv_check_cred(vfs_context_ucred(ctx),
10454 PRIV_VFS_DATALESS_RESOLVER, 0);
10455 if (error) {
10456 return error;
10457 }
10458
10459 if (is_resolver) {
10460 NSPACE_REQ_LOCK();
10461
10462 if (nspace_resolver_proc == NULL) {
10463 proc_lock(p);
10464 p->p_lflag |= P_LNSPACE_RESOLVER;
10465 proc_unlock(p);
10466 nspace_resolver_proc = p;
10467 } else {
10468 error = EBUSY;
10469 }
10470
10471 NSPACE_REQ_UNLOCK();
10472 } else {
10473 // This is basically just like the exit case.
10474 // nspace_resolver_exited() will verify that the
10475 // process is the resolver, and will clear the
10476 // global.
10477 nspace_resolver_exited(p);
10478 }
10479
10480 return error;
10481 }
10482
10483 static int
10484 nspace_materialization_get_proc_state(struct proc *p, int *is_prevented)
10485 {
10486 if ((p->p_lflag & P_LNSPACE_RESOLVER) != 0 ||
10487 (p->p_vfs_iopolicy &
10488 P_VFS_IOPOLICY_MATERIALIZE_DATALESS_FILES) == 0) {
10489 *is_prevented = 1;
10490 } else {
10491 *is_prevented = 0;
10492 }
10493 return 0;
10494 }
10495
10496 static int
10497 nspace_materialization_set_proc_state(struct proc *p, int is_prevented)
10498 {
10499 if (p->p_lflag & P_LNSPACE_RESOLVER) {
10500 return is_prevented ? 0 : EBUSY;
10501 }
10502
10503 if (is_prevented) {
10504 OSBitAndAtomic16(~((uint16_t)P_VFS_IOPOLICY_MATERIALIZE_DATALESS_FILES), &p->p_vfs_iopolicy);
10505 } else {
10506 OSBitOrAtomic16((uint16_t)P_VFS_IOPOLICY_MATERIALIZE_DATALESS_FILES, &p->p_vfs_iopolicy);
10507 }
10508 return 0;
10509 }
10510
10511 static int
10512 nspace_materialization_get_thread_state(int *is_prevented)
10513 {
10514 uthread_t ut = get_bsdthread_info(current_thread());
10515
10516 *is_prevented = (ut->uu_flag & UT_NSPACE_NODATALESSFAULTS) ? 1 : 0;
10517 return 0;
10518 }
10519
10520 static int
10521 nspace_materialization_set_thread_state(int is_prevented)
10522 {
10523 uthread_t ut = get_bsdthread_info(current_thread());
10524
10525 if (is_prevented) {
10526 ut->uu_flag |= UT_NSPACE_NODATALESSFAULTS;
10527 } else {
10528 ut->uu_flag &= ~UT_NSPACE_NODATALESSFAULTS;
10529 }
10530 return 0;
10531 }
10532
10533 static int
10534 nspace_materialization_is_prevented(void)
10535 {
10536 proc_t p = current_proc();
10537 uthread_t ut = (uthread_t)get_bsdthread_info(current_thread());
10538 vfs_context_t ctx = vfs_context_current();
10539
10540 /*
10541 * Kernel context ==> return EDEADLK, as we would with any random
10542 * process decorated as no-materialize.
10543 */
10544 if (ctx == vfs_context_kernel()) {
10545 return EDEADLK;
10546 }
10547
10548 /*
10549 * If the process has the dataless-manipulation entitlement,
10550 * materialization is prevented, and depending on the kind
10551 * of file system operation, things get to proceed as if the
10552 * object is not dataless.
10553 */
10554 if (vfs_context_is_dataless_manipulator(ctx)) {
10555 return EJUSTRETURN;
10556 }
10557
10558 /*
10559 * Per-thread decorations override any process-wide decorations.
10560 * (Foundation uses this, and this overrides even the dataless-
10561 * manipulation entitlement so as to make API contracts consistent.)
10562 */
10563 if (ut != NULL) {
10564 if (ut->uu_flag & UT_NSPACE_NODATALESSFAULTS) {
10565 return EDEADLK;
10566 }
10567 if (ut->uu_flag & UT_NSPACE_FORCEDATALESSFAULTS) {
10568 return 0;
10569 }
10570 }
10571
10572 /*
10573 * If the process's iopolicy specifies that dataless files
10574 * can be materialized, then we let it go ahead.
10575 */
10576 if (p->p_vfs_iopolicy & P_VFS_IOPOLICY_MATERIALIZE_DATALESS_FILES) {
10577 return 0;
10578 }
10579
10580 /*
10581 * The default behavior is to not materialize dataless files;
10582 * return to the caller that deadlock was detected.
10583 */
10584 return EDEADLK;
10585 }
10586
10587 /* the vfs.nspace branch */
10588 SYSCTL_NODE(_vfs, OID_AUTO, nspace, CTLFLAG_RW | CTLFLAG_LOCKED, NULL, "vfs nspace hinge");
10589
10590 static int
10591 sysctl_nspace_resolver(__unused struct sysctl_oid *oidp,
10592 __unused void *arg1, __unused int arg2, struct sysctl_req *req)
10593 {
10594 struct proc *p = req->p;
10595 int new_value, old_value, changed = 0;
10596 int error;
10597
10598 error = nspace_resolver_get_proc_state(p, &old_value);
10599 if (error) {
10600 return error;
10601 }
10602
10603 error = sysctl_io_number(req, old_value, sizeof(int), &new_value,
10604 &changed);
10605 if (error == 0 && changed) {
10606 error = nspace_resolver_set_proc_state(p, new_value);
10607 }
10608 return error;
10609 }
10610
10611 /* decorate this process as the dataless file resolver */
10612 SYSCTL_PROC(_vfs_nspace, OID_AUTO, resolver,
10613 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED,
10614 0, 0, sysctl_nspace_resolver, "I", "");
10615
10616 static int
10617 sysctl_nspace_prevent_materialization(__unused struct sysctl_oid *oidp,
10618 __unused void *arg1, __unused int arg2, struct sysctl_req *req)
10619 {
10620 struct proc *p = req->p;
10621 int new_value, old_value, changed = 0;
10622 int error;
10623
10624 error = nspace_materialization_get_proc_state(p, &old_value);
10625 if (error) {
10626 return error;
10627 }
10628
10629 error = sysctl_io_number(req, old_value, sizeof(int), &new_value,
10630 &changed);
10631 if (error == 0 && changed) {
10632 error = nspace_materialization_set_proc_state(p, new_value);
10633 }
10634 return error;
10635 }
10636
10637 /* decorate this process as not wanting to materialize dataless files */
10638 SYSCTL_PROC(_vfs_nspace, OID_AUTO, prevent_materialization,
10639 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED,
10640 0, 0, sysctl_nspace_prevent_materialization, "I", "");
10641
10642 static int
10643 sysctl_nspace_thread_prevent_materialization(__unused struct sysctl_oid *oidp,
10644 __unused void *arg1, __unused int arg2, struct sysctl_req *req)
10645 {
10646 int new_value, old_value, changed = 0;
10647 int error;
10648
10649 error = nspace_materialization_get_thread_state(&old_value);
10650 if (error) {
10651 return error;
10652 }
10653
10654 error = sysctl_io_number(req, old_value, sizeof(int), &new_value,
10655 &changed);
10656 if (error == 0 && changed) {
10657 error = nspace_materialization_set_thread_state(new_value);
10658 }
10659 return error;
10660 }
10661
10662 /* decorate this thread as not wanting to materialize dataless files */
10663 SYSCTL_PROC(_vfs_nspace, OID_AUTO, thread_prevent_materialization,
10664 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED,
10665 0, 0, sysctl_nspace_thread_prevent_materialization, "I", "");
10666
10667 static int
10668 sysctl_nspace_complete(__unused struct sysctl_oid *oidp, __unused void *arg1,
10669 __unused int arg2, struct sysctl_req *req)
10670 {
10671 struct proc *p = req->p;
10672 uint32_t req_status[2] = { 0, 0 };
10673 int error, is_resolver, changed = 0;
10674
10675 error = nspace_resolver_get_proc_state(p, &is_resolver);
10676 if (error) {
10677 return error;
10678 }
10679
10680 if (!is_resolver) {
10681 return EPERM;
10682 }
10683
10684 error = sysctl_io_opaque(req, req_status, sizeof(req_status),
10685 &changed);
10686 if (error) {
10687 return error;
10688 }
10689
10690 /*
10691 * req_status[0] is the req_id
10692 *
10693 * req_status[1] is the errno
10694 */
10695 if (error == 0 && changed) {
10696 nspace_resolver_req_completed(req_status[0],
10697 (int)req_status[1]);
10698 }
10699 return error;
10700 }
10701
10702 /* Resolver reports completed reqs here. */
10703 SYSCTL_PROC(_vfs_nspace, OID_AUTO, complete,
10704 CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED,
10705 0, 0, sysctl_nspace_complete, "-", "");
10706
10707 #endif /* CONFIG_DATALESS_FILES */
10708
10709 #if CONFIG_DATALESS_FILES
10710 #define __no_dataless_unused /* nothing */
10711 #else
10712 #define __no_dataless_unused __unused
10713 #endif
10714
10715 void
10716 nspace_resolver_init(void)
10717 {
10718 #if CONFIG_DATALESS_FILES
10719 nspace_resolver_request_lck_grp =
10720 lck_grp_alloc_init("file namespace resolver", NULL);
10721
10722 lck_mtx_init(&nspace_resolver_request_hash_mutex,
10723 nspace_resolver_request_lck_grp, NULL);
10724
10725 nspace_resolver_request_hashtbl =
10726 hashinit(NSPACE_RESOLVER_REQ_HASHSIZE,
10727 M_VNODE /* XXX */, &nspace_resolver_request_hashmask);
10728 #endif /* CONFIG_DATALESS_FILES */
10729 }
10730
10731 void
10732 nspace_resolver_exited(struct proc *p __no_dataless_unused)
10733 {
10734 #if CONFIG_DATALESS_FILES
10735 struct nspace_resolver_requesthead *bucket;
10736 struct nspace_resolver_request *req;
10737 u_long idx;
10738
10739 NSPACE_REQ_LOCK();
10740
10741 if ((p->p_lflag & P_LNSPACE_RESOLVER) &&
10742 p == nspace_resolver_proc) {
10743 for (idx = 0; idx <= nspace_resolver_request_hashmask; idx++) {
10744 bucket = &nspace_resolver_request_hashtbl[idx];
10745 LIST_FOREACH(req, bucket, r_hashlink) {
10746 nspace_resolver_req_mark_complete(req,
10747 ETIMEDOUT);
10748 }
10749 }
10750 nspace_resolver_proc = NULL;
10751 }
10752
10753 NSPACE_REQ_UNLOCK();
10754 #endif /* CONFIG_DATALESS_FILES */
10755 }
10756
10757 int
10758 resolve_nspace_item(struct vnode *vp, uint64_t op)
10759 {
10760 return resolve_nspace_item_ext(vp, op, NULL);
10761 }
10762
10763 #define DATALESS_RESOLVER_ENTITLEMENT \
10764 "com.apple.private.vfs.dataless-resolver"
10765 #define DATALESS_MANIPULATION_ENTITLEMENT \
10766 "com.apple.private.vfs.dataless-manipulation"
10767
10768 /*
10769 * Return TRUE if the vfs context is associated with a process entitled
10770 * for dataless manipulation.
10771 *
10772 * XXX Arguably belongs in vfs_subr.c, but is here because of the
10773 * complication around CONFIG_DATALESS_FILES.
10774 */
10775 boolean_t
10776 vfs_context_is_dataless_manipulator(vfs_context_t ctx __unused)
10777 {
10778 #if CONFIG_DATALESS_FILES
10779 assert(ctx->vc_thread == current_thread());
10780 task_t const task = current_task();
10781 return IOTaskHasEntitlement(task, DATALESS_MANIPULATION_ENTITLEMENT) ||
10782 IOTaskHasEntitlement(task, DATALESS_RESOLVER_ENTITLEMENT);
10783 #else
10784 return false;
10785 #endif /* CONFIG_DATALESS_FILES */
10786 }
10787
10788 int
10789 resolve_nspace_item_ext(
10790 struct vnode *vp __no_dataless_unused,
10791 uint64_t op __no_dataless_unused,
10792 void *arg __unused)
10793 {
10794 #if CONFIG_DATALESS_FILES
10795 int error;
10796 mach_port_t mp;
10797 char *path = NULL;
10798 int path_len;
10799 kern_return_t kr;
10800 struct nspace_resolver_request req;
10801
10802 // only allow namespace events on regular files, directories and symlinks.
10803 if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
10804 return EFTYPE;
10805 }
10806
10807 //
10808 // if this is a snapshot event and the vnode is on a
10809 // disk image just pretend nothing happened since any
10810 // change to the disk image will cause the disk image
10811 // itself to get backed up and this avoids multi-way
10812 // deadlocks between the snapshot handler and the ever
10813 // popular diskimages-helper process. the variable
10814 // nspace_allow_virtual_devs allows this behavior to
10815 // be overridden (for use by the Mobile TimeMachine
10816 // testing infrastructure which uses disk images)
10817 //
10818 if (op & NAMESPACE_HANDLER_SNAPSHOT_EVENT) {
10819 os_log_debug(OS_LOG_DEFAULT, "NSPACE SNAPSHOT not handled");
10820 return ENOTSUP;
10821 }
10822
10823 error = nspace_materialization_is_prevented();
10824 if (error) {
10825 os_log_debug(OS_LOG_DEFAULT,
10826 "NSPACE process/thread is decorated as no-materialization");
10827 return error;
10828 }
10829
10830 kr = host_get_filecoordinationd_port(host_priv_self(), &mp);
10831 if (kr != KERN_SUCCESS || !IPC_PORT_VALID(mp)) {
10832 os_log_error(OS_LOG_DEFAULT, "NSPACE no port");
10833 // Treat this like being unable to access the backing
10834 // store server.
10835 return ETIMEDOUT;
10836 }
10837
10838 MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
10839 if (path == NULL) {
10840 error = ENOMEM;
10841 goto out_release_port;
10842 }
10843 path_len = MAXPATHLEN;
10844
10845 error = vn_getpath(vp, path, &path_len);
10846 if (error == 0) {
10847 int xxx_rdar44371223; /* XXX Mig bug */
10848 req.r_req_id = next_nspace_req_id();
10849 req.r_resolver_error = 0;
10850 req.r_flags = 0;
10851
10852 NSPACE_REQ_LOCK();
10853 error = nspace_resolver_req_add(&req);
10854 NSPACE_REQ_UNLOCK();
10855 if (error) {
10856 goto out_release_port;
10857 }
10858
10859 os_log_debug(OS_LOG_DEFAULT, "NSPACE resolve_path call");
10860 kr = send_nspace_resolve_path(mp, req.r_req_id,
10861 current_proc()->p_pid, (uint32_t)(op & 0xffffffff),
10862 path, &xxx_rdar44371223);
10863 if (kr != KERN_SUCCESS) {
10864 // Also treat this like being unable to access
10865 // the backing store server.
10866 os_log_error(OS_LOG_DEFAULT,
10867 "NSPACE resolve_path failure: %d", kr);
10868 error = ETIMEDOUT;
10869
10870 NSPACE_REQ_LOCK();
10871 nspace_resolver_req_remove(&req);
10872 NSPACE_REQ_UNLOCK();
10873 goto out_release_port;
10874 }
10875
10876 // Give back the memory we allocated earlier while
10877 // we wait; we no longer need it.
10878 FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
10879 path = NULL;
10880
10881 // Request has been submitted to the resolver.
10882 // Now (interruptibly) wait for completion.
10883 // Upon requrn, the request will have been removed
10884 // from the lookup table.
10885 error = nspace_resolver_req_wait(&req);
10886 }
10887
10888 out_release_port:
10889 if (path != NULL) {
10890 FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
10891 }
10892 ipc_port_release_send(mp);
10893
10894 return error;
10895 #else
10896 return ENOTSUP;
10897 #endif /* CONFIG_DATALESS_FILES */
10898 }
10899
10900 int
10901 nspace_snapshot_event(__unused vnode_t vp, __unused time_t ctime,
10902 __unused uint64_t op_type, __unused void *arg)
10903 {
10904 return 0;
10905 }
10906
10907 #if 0
10908 static int
10909 build_volfs_path(struct vnode *vp, char *path, int *len)
10910 {
10911 struct vnode_attr va;
10912 int ret;
10913
10914 VATTR_INIT(&va);
10915 VATTR_WANTED(&va, va_fsid);
10916 VATTR_WANTED(&va, va_fileid);
10917
10918 if (vnode_getattr(vp, &va, vfs_context_kernel()) != 0) {
10919 *len = snprintf(path, *len, "/non/existent/path/because/vnode_getattr/failed") + 1;
10920 ret = -1;
10921 } else {
10922 *len = snprintf(path, *len, "/.vol/%d/%lld", (dev_t)va.va_fsid, va.va_fileid) + 1;
10923 ret = 0;
10924 }
10925
10926 return ret;
10927 }
10928 #endif
10929
10930 static unsigned long
10931 fsctl_bogus_command_compat(unsigned long cmd)
10932 {
10933 switch (cmd) {
10934 case IOCBASECMD(FSIOC_SYNC_VOLUME):
10935 return FSIOC_SYNC_VOLUME;
10936 case IOCBASECMD(FSIOC_ROUTEFS_SETROUTEID):
10937 return FSIOC_ROUTEFS_SETROUTEID;
10938 case IOCBASECMD(FSIOC_SET_PACKAGE_EXTS):
10939 return FSIOC_SET_PACKAGE_EXTS;
10940 case IOCBASECMD(FSIOC_SET_FSTYPENAME_OVERRIDE):
10941 return FSIOC_SET_FSTYPENAME_OVERRIDE;
10942 case IOCBASECMD(DISK_CONDITIONER_IOC_GET):
10943 return DISK_CONDITIONER_IOC_GET;
10944 case IOCBASECMD(DISK_CONDITIONER_IOC_SET):
10945 return DISK_CONDITIONER_IOC_SET;
10946 case IOCBASECMD(FSIOC_FIOSEEKHOLE):
10947 return FSIOC_FIOSEEKHOLE;
10948 case IOCBASECMD(FSIOC_FIOSEEKDATA):
10949 return FSIOC_FIOSEEKDATA;
10950 case IOCBASECMD(SPOTLIGHT_IOC_GET_MOUNT_TIME):
10951 return SPOTLIGHT_IOC_GET_MOUNT_TIME;
10952 case IOCBASECMD(SPOTLIGHT_IOC_GET_LAST_MTIME):
10953 return SPOTLIGHT_IOC_GET_LAST_MTIME;
10954 }
10955
10956 return cmd;
10957 }
10958
10959 static int
10960 cas_bsdflags_setattr(vnode_t vp, void *arg, vfs_context_t ctx)
10961 {
10962 return VNOP_IOCTL(vp, FSIOC_CAS_BSDFLAGS, arg, FWRITE, ctx);
10963 }
10964
10965 /*
10966 * Make a filesystem-specific control call:
10967 */
10968 /* ARGSUSED */
10969 static int
10970 fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long options, vfs_context_t ctx)
10971 {
10972 int error = 0;
10973 boolean_t is64bit;
10974 u_int size;
10975 #define STK_PARAMS 128
10976 char stkbuf[STK_PARAMS] = {0};
10977 caddr_t data, memp;
10978 vnode_t vp = *arg_vp;
10979
10980 if (vp->v_type == VCHR || vp->v_type == VBLK) {
10981 return ENOTTY;
10982 }
10983
10984 cmd = fsctl_bogus_command_compat(cmd);
10985
10986 size = IOCPARM_LEN(cmd);
10987 if (size > IOCPARM_MAX) {
10988 return EINVAL;
10989 }
10990
10991 is64bit = proc_is64bit(p);
10992
10993 memp = NULL;
10994
10995 if (size > sizeof(stkbuf)) {
10996 if ((memp = (caddr_t)kalloc(size)) == 0) {
10997 return ENOMEM;
10998 }
10999 data = memp;
11000 } else {
11001 data = &stkbuf[0];
11002 };
11003
11004 if (cmd & IOC_IN) {
11005 if (size) {
11006 error = copyin(udata, data, size);
11007 if (error) {
11008 if (memp) {
11009 kfree(memp, size);
11010 }
11011 return error;
11012 }
11013 } else {
11014 if (is64bit) {
11015 *(user_addr_t *)data = udata;
11016 } else {
11017 *(uint32_t *)data = (uint32_t)udata;
11018 }
11019 };
11020 } else if ((cmd & IOC_OUT) && size) {
11021 /*
11022 * Zero the buffer so the user always
11023 * gets back something deterministic.
11024 */
11025 bzero(data, size);
11026 } else if (cmd & IOC_VOID) {
11027 if (is64bit) {
11028 *(user_addr_t *)data = udata;
11029 } else {
11030 *(uint32_t *)data = (uint32_t)udata;
11031 }
11032 }
11033
11034 /* Check to see if it's a generic command */
11035 switch (cmd) {
11036 case FSIOC_SYNC_VOLUME: {
11037 struct vfs_attr vfa;
11038 mount_t mp = vp->v_mount;
11039 unsigned arg;
11040
11041
11042 /* record vid of vp so we can drop it below. */
11043 uint32_t vvid = vp->v_id;
11044
11045 /*
11046 * Then grab mount_iterref so that we can release the vnode.
11047 * Without this, a thread may call vnode_iterate_prepare then
11048 * get into a deadlock because we've never released the root vp
11049 */
11050 error = mount_iterref(mp, 0);
11051 if (error) {
11052 break;
11053 }
11054 vnode_put(vp);
11055
11056 arg = MNT_NOWAIT;
11057 if (*(uint32_t*)data & FSCTL_SYNC_WAIT) {
11058 arg = MNT_WAIT;
11059 }
11060
11061 /*
11062 * If the filessytem supports multiple filesytems in a
11063 * partition (For eg APFS volumes in a container, it knows
11064 * that the waitfor argument to VFS_SYNC are flags.
11065 */
11066 VFSATTR_INIT(&vfa);
11067 VFSATTR_WANTED(&vfa, f_capabilities);
11068 if ((vfs_getattr(mp, &vfa, vfs_context_current()) == 0) &&
11069 VFSATTR_IS_SUPPORTED(&vfa, f_capabilities) &&
11070 ((vfa.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_SHARED_SPACE)) &&
11071 ((vfa.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_SHARED_SPACE))) {
11072 arg |= MNT_VOLUME;
11073 }
11074
11075 /* issue the sync for this volume */
11076 (void)sync_callback(mp, &arg);
11077
11078 /*
11079 * Then release the mount_iterref once we're done syncing; it's not
11080 * needed for the VNOP_IOCTL below
11081 */
11082 mount_iterdrop(mp);
11083
11084 if (arg & FSCTL_SYNC_FULLSYNC) {
11085 /* re-obtain vnode iocount on the root vp, if possible */
11086 error = vnode_getwithvid(vp, vvid);
11087 if (error == 0) {
11088 error = VNOP_IOCTL(vp, F_FULLFSYNC, (caddr_t)NULL, 0, ctx);
11089 vnode_put(vp);
11090 }
11091 }
11092 /* mark the argument VP as having been released */
11093 *arg_vp = NULL;
11094 }
11095 break;
11096
11097 case FSIOC_ROUTEFS_SETROUTEID: {
11098 #if ROUTEFS
11099 char routepath[MAXPATHLEN];
11100 size_t len = 0;
11101
11102 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
11103 break;
11104 }
11105 bzero(routepath, MAXPATHLEN);
11106 error = copyinstr(udata, &routepath[0], MAXPATHLEN, &len);
11107 if (error) {
11108 break;
11109 }
11110 error = routefs_kernel_mount(routepath);
11111 if (error) {
11112 break;
11113 }
11114 #endif
11115 }
11116 break;
11117
11118 case FSIOC_SET_PACKAGE_EXTS: {
11119 user_addr_t ext_strings;
11120 uint32_t num_entries;
11121 uint32_t max_width;
11122
11123 if ((error = priv_check_cred(kauth_cred_get(), PRIV_PACKAGE_EXTENSIONS, 0))) {
11124 break;
11125 }
11126
11127 if ((is64bit && size != sizeof(user64_package_ext_info))
11128 || (is64bit == 0 && size != sizeof(user32_package_ext_info))) {
11129 // either you're 64-bit and passed a 64-bit struct or
11130 // you're 32-bit and passed a 32-bit struct. otherwise
11131 // it's not ok.
11132 error = EINVAL;
11133 break;
11134 }
11135
11136 if (is64bit) {
11137 ext_strings = ((user64_package_ext_info *)data)->strings;
11138 num_entries = ((user64_package_ext_info *)data)->num_entries;
11139 max_width = ((user64_package_ext_info *)data)->max_width;
11140 } else {
11141 ext_strings = CAST_USER_ADDR_T(((user32_package_ext_info *)data)->strings);
11142 num_entries = ((user32_package_ext_info *)data)->num_entries;
11143 max_width = ((user32_package_ext_info *)data)->max_width;
11144 }
11145 error = set_package_extensions_table(ext_strings, num_entries, max_width);
11146 }
11147 break;
11148
11149 case FSIOC_SET_FSTYPENAME_OVERRIDE:
11150 {
11151 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
11152 break;
11153 }
11154 if (vp->v_mount) {
11155 mount_lock(vp->v_mount);
11156 if (data[0] != 0) {
11157 strlcpy(&vp->v_mount->fstypename_override[0], data, MFSTYPENAMELEN);
11158 vp->v_mount->mnt_kern_flag |= MNTK_TYPENAME_OVERRIDE;
11159 if (vfs_isrdonly(vp->v_mount) && strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
11160 vp->v_mount->mnt_kern_flag |= MNTK_EXTENDED_SECURITY;
11161 vp->v_mount->mnt_kern_flag &= ~MNTK_AUTH_OPAQUE;
11162 }
11163 } else {
11164 if (strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
11165 vp->v_mount->mnt_kern_flag &= ~MNTK_EXTENDED_SECURITY;
11166 }
11167 vp->v_mount->mnt_kern_flag &= ~MNTK_TYPENAME_OVERRIDE;
11168 vp->v_mount->fstypename_override[0] = '\0';
11169 }
11170 mount_unlock(vp->v_mount);
11171 }
11172 }
11173 break;
11174
11175 case DISK_CONDITIONER_IOC_GET: {
11176 error = disk_conditioner_get_info(vp->v_mount, (disk_conditioner_info *)data);
11177 }
11178 break;
11179
11180 case DISK_CONDITIONER_IOC_SET: {
11181 error = disk_conditioner_set_info(vp->v_mount, (disk_conditioner_info *)data);
11182 }
11183 break;
11184
11185 case FSIOC_CAS_BSDFLAGS: {
11186 struct fsioc_cas_bsdflags *cas = (struct fsioc_cas_bsdflags *)data;
11187 struct vnode_attr va;
11188
11189 VATTR_INIT(&va);
11190 VATTR_SET(&va, va_flags, cas->new_flags);
11191
11192 error = chflags0(vp, &va, cas_bsdflags_setattr, cas, ctx);
11193 }
11194 break;
11195
11196 case FSIOC_FD_ONLY_OPEN_ONCE: {
11197 if (vnode_usecount(vp) > 1) {
11198 error = EBUSY;
11199 } else {
11200 error = 0;
11201 }
11202 }
11203 break;
11204
11205 default: {
11206 /* other, known commands shouldn't be passed down here */
11207 switch (cmd) {
11208 case F_PUNCHHOLE:
11209 case F_TRIM_ACTIVE_FILE:
11210 case F_RDADVISE:
11211 case F_TRANSCODEKEY:
11212 case F_GETPROTECTIONLEVEL:
11213 case F_GETDEFAULTPROTLEVEL:
11214 case F_MAKECOMPRESSED:
11215 case F_SET_GREEDY_MODE:
11216 case F_SETSTATICCONTENT:
11217 case F_SETIOTYPE:
11218 case F_SETBACKINGSTORE:
11219 case F_GETPATH_MTMINFO:
11220 case APFSIOC_REVERT_TO_SNAPSHOT:
11221 case FSIOC_FIOSEEKHOLE:
11222 case FSIOC_FIOSEEKDATA:
11223 case HFS_GET_BOOT_INFO:
11224 case HFS_SET_BOOT_INFO:
11225 case FIOPINSWAP:
11226 case F_CHKCLEAN:
11227 case F_FULLFSYNC:
11228 case F_BARRIERFSYNC:
11229 case F_FREEZE_FS:
11230 case F_THAW_FS:
11231 error = EINVAL;
11232 goto outdrop;
11233 }
11234 /* Invoke the filesystem-specific code */
11235 error = VNOP_IOCTL(vp, cmd, data, options, ctx);
11236 }
11237 } /* end switch stmt */
11238
11239 /*
11240 * if no errors, copy any data to user. Size was
11241 * already set and checked above.
11242 */
11243 if (error == 0 && (cmd & IOC_OUT) && size) {
11244 error = copyout(data, udata, size);
11245 }
11246
11247 outdrop:
11248 if (memp) {
11249 kfree(memp, size);
11250 }
11251
11252 return error;
11253 }
11254
11255 /* ARGSUSED */
11256 int
11257 fsctl(proc_t p, struct fsctl_args *uap, __unused int32_t *retval)
11258 {
11259 int error;
11260 struct nameidata nd;
11261 u_long nameiflags;
11262 vnode_t vp = NULL;
11263 vfs_context_t ctx = vfs_context_current();
11264
11265 AUDIT_ARG(cmd, uap->cmd);
11266 AUDIT_ARG(value32, uap->options);
11267 /* Get the vnode for the file we are getting info on: */
11268 nameiflags = 0;
11269 //
11270 // if we come through fsctl() then the file is by definition not open.
11271 // therefore for the FSIOC_FD_ONLY_OPEN_ONCE selector we return an error
11272 // lest the caller mistakenly thinks the only open is their own (but in
11273 // reality it's someone elses).
11274 //
11275 if (uap->cmd == FSIOC_FD_ONLY_OPEN_ONCE) {
11276 return EINVAL;
11277 }
11278 if ((uap->options & FSOPT_NOFOLLOW) == 0) {
11279 nameiflags |= FOLLOW;
11280 }
11281 if (uap->cmd == FSIOC_FIRMLINK_CTL) {
11282 nameiflags |= (CN_FIRMLINK_NOFOLLOW | NOCACHE);
11283 }
11284 NDINIT(&nd, LOOKUP, OP_FSCTL, nameiflags | AUDITVNPATH1,
11285 UIO_USERSPACE, uap->path, ctx);
11286 if ((error = namei(&nd))) {
11287 goto done;
11288 }
11289 vp = nd.ni_vp;
11290 nameidone(&nd);
11291
11292 #if CONFIG_MACF
11293 error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd);
11294 if (error) {
11295 goto done;
11296 }
11297 #endif
11298
11299 error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
11300
11301 done:
11302 if (vp) {
11303 vnode_put(vp);
11304 }
11305 return error;
11306 }
11307 /* ARGSUSED */
11308 int
11309 ffsctl(proc_t p, struct ffsctl_args *uap, __unused int32_t *retval)
11310 {
11311 int error;
11312 vnode_t vp = NULL;
11313 vfs_context_t ctx = vfs_context_current();
11314 int fd = -1;
11315
11316 AUDIT_ARG(fd, uap->fd);
11317 AUDIT_ARG(cmd, uap->cmd);
11318 AUDIT_ARG(value32, uap->options);
11319
11320 /* Get the vnode for the file we are getting info on: */
11321 if ((error = file_vnode(uap->fd, &vp))) {
11322 return error;
11323 }
11324 fd = uap->fd;
11325 if ((error = vnode_getwithref(vp))) {
11326 file_drop(fd);
11327 return error;
11328 }
11329
11330 #if CONFIG_MACF
11331 if ((error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd))) {
11332 file_drop(fd);
11333 vnode_put(vp);
11334 return error;
11335 }
11336 #endif
11337
11338 error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
11339
11340 file_drop(fd);
11341
11342 /*validate vp; fsctl_internal() can drop iocount and reset vp to NULL*/
11343 if (vp) {
11344 vnode_put(vp);
11345 }
11346
11347 return error;
11348 }
11349 /* end of fsctl system call */
11350
11351 /*
11352 * Retrieve the data of an extended attribute.
11353 */
11354 int
11355 getxattr(proc_t p, struct getxattr_args *uap, user_ssize_t *retval)
11356 {
11357 vnode_t vp;
11358 struct nameidata nd;
11359 char attrname[XATTR_MAXNAMELEN + 1];
11360 vfs_context_t ctx = vfs_context_current();
11361 uio_t auio = NULL;
11362 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
11363 size_t attrsize = 0;
11364 size_t namelen;
11365 u_int32_t nameiflags;
11366 int error;
11367 char uio_buf[UIO_SIZEOF(1)];
11368
11369 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT)) {
11370 return EINVAL;
11371 }
11372
11373 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
11374 NDINIT(&nd, LOOKUP, OP_GETXATTR, nameiflags, spacetype, uap->path, ctx);
11375 if ((error = namei(&nd))) {
11376 return error;
11377 }
11378 vp = nd.ni_vp;
11379 nameidone(&nd);
11380
11381 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
11382 if (error != 0) {
11383 goto out;
11384 }
11385 if (xattr_protected(attrname)) {
11386 if (!vfs_context_issuser(ctx) || strcmp(attrname, "com.apple.system.Security") != 0) {
11387 error = EPERM;
11388 goto out;
11389 }
11390 }
11391 /*
11392 * the specific check for 0xffffffff is a hack to preserve
11393 * binaray compatibilty in K64 with applications that discovered
11394 * that passing in a buf pointer and a size of -1 resulted in
11395 * just the size of the indicated extended attribute being returned.
11396 * this isn't part of the documented behavior, but because of the
11397 * original implemtation's check for "uap->size > 0", this behavior
11398 * was allowed. In K32 that check turned into a signed comparison
11399 * even though uap->size is unsigned... in K64, we blow by that
11400 * check because uap->size is unsigned and doesn't get sign smeared
11401 * in the munger for a 32 bit user app. we also need to add a
11402 * check to limit the maximum size of the buffer being passed in...
11403 * unfortunately, the underlying fileystems seem to just malloc
11404 * the requested size even if the actual extended attribute is tiny.
11405 * because that malloc is for kernel wired memory, we have to put a
11406 * sane limit on it.
11407 *
11408 * U32 running on K64 will yield 0x00000000ffffffff for uap->size
11409 * U64 running on K64 will yield -1 (64 bits wide)
11410 * U32/U64 running on K32 will yield -1 (32 bits wide)
11411 */
11412 if (uap->size == 0xffffffff || uap->size == (size_t)-1) {
11413 goto no_uio;
11414 }
11415
11416 if (uap->value) {
11417 if (uap->size > (size_t)XATTR_MAXSIZE) {
11418 uap->size = XATTR_MAXSIZE;
11419 }
11420
11421 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
11422 &uio_buf[0], sizeof(uio_buf));
11423 uio_addiov(auio, uap->value, uap->size);
11424 }
11425 no_uio:
11426 error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, ctx);
11427 out:
11428 vnode_put(vp);
11429
11430 if (auio) {
11431 *retval = uap->size - uio_resid(auio);
11432 } else {
11433 *retval = (user_ssize_t)attrsize;
11434 }
11435
11436 return error;
11437 }
11438
11439 /*
11440 * Retrieve the data of an extended attribute.
11441 */
11442 int
11443 fgetxattr(proc_t p, struct fgetxattr_args *uap, user_ssize_t *retval)
11444 {
11445 vnode_t vp;
11446 char attrname[XATTR_MAXNAMELEN + 1];
11447 uio_t auio = NULL;
11448 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
11449 size_t attrsize = 0;
11450 size_t namelen;
11451 int error;
11452 char uio_buf[UIO_SIZEOF(1)];
11453
11454 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT)) {
11455 return EINVAL;
11456 }
11457
11458 if ((error = file_vnode(uap->fd, &vp))) {
11459 return error;
11460 }
11461 if ((error = vnode_getwithref(vp))) {
11462 file_drop(uap->fd);
11463 return error;
11464 }
11465 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
11466 if (error != 0) {
11467 goto out;
11468 }
11469 if (xattr_protected(attrname)) {
11470 error = EPERM;
11471 goto out;
11472 }
11473 if (uap->value && uap->size > 0) {
11474 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
11475 &uio_buf[0], sizeof(uio_buf));
11476 uio_addiov(auio, uap->value, uap->size);
11477 }
11478
11479 error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, vfs_context_current());
11480 out:
11481 (void)vnode_put(vp);
11482 file_drop(uap->fd);
11483
11484 if (auio) {
11485 *retval = uap->size - uio_resid(auio);
11486 } else {
11487 *retval = (user_ssize_t)attrsize;
11488 }
11489 return error;
11490 }
11491
11492 /*
11493 * Set the data of an extended attribute.
11494 */
11495 int
11496 setxattr(proc_t p, struct setxattr_args *uap, int *retval)
11497 {
11498 vnode_t vp;
11499 struct nameidata nd;
11500 char attrname[XATTR_MAXNAMELEN + 1];
11501 vfs_context_t ctx = vfs_context_current();
11502 uio_t auio = NULL;
11503 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
11504 size_t namelen;
11505 u_int32_t nameiflags;
11506 int error;
11507 char uio_buf[UIO_SIZEOF(1)];
11508
11509 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT)) {
11510 return EINVAL;
11511 }
11512
11513 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
11514 if (error != 0) {
11515 if (error == EPERM) {
11516 /* if the string won't fit in attrname, copyinstr emits EPERM */
11517 return ENAMETOOLONG;
11518 }
11519 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
11520 return error;
11521 }
11522 if (xattr_protected(attrname)) {
11523 return EPERM;
11524 }
11525 if (uap->size != 0 && uap->value == 0) {
11526 return EINVAL;
11527 }
11528
11529 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
11530 NDINIT(&nd, LOOKUP, OP_SETXATTR, nameiflags, spacetype, uap->path, ctx);
11531 if ((error = namei(&nd))) {
11532 return error;
11533 }
11534 vp = nd.ni_vp;
11535 nameidone(&nd);
11536
11537 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
11538 &uio_buf[0], sizeof(uio_buf));
11539 uio_addiov(auio, uap->value, uap->size);
11540
11541 error = vn_setxattr(vp, attrname, auio, uap->options, ctx);
11542 #if CONFIG_FSE
11543 if (error == 0) {
11544 add_fsevent(FSE_XATTR_MODIFIED, ctx,
11545 FSE_ARG_VNODE, vp,
11546 FSE_ARG_DONE);
11547 }
11548 #endif
11549 vnode_put(vp);
11550 *retval = 0;
11551 return error;
11552 }
11553
11554 /*
11555 * Set the data of an extended attribute.
11556 */
11557 int
11558 fsetxattr(proc_t p, struct fsetxattr_args *uap, int *retval)
11559 {
11560 vnode_t vp;
11561 char attrname[XATTR_MAXNAMELEN + 1];
11562 uio_t auio = NULL;
11563 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
11564 size_t namelen;
11565 int error;
11566 char uio_buf[UIO_SIZEOF(1)];
11567 #if CONFIG_FSE
11568 vfs_context_t ctx = vfs_context_current();
11569 #endif
11570
11571 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT)) {
11572 return EINVAL;
11573 }
11574
11575 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
11576 if (error != 0) {
11577 if (error == EPERM) {
11578 /* if the string won't fit in attrname, copyinstr emits EPERM */
11579 return ENAMETOOLONG;
11580 }
11581 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
11582 return error;
11583 }
11584 if (xattr_protected(attrname)) {
11585 return EPERM;
11586 }
11587 if (uap->size != 0 && uap->value == 0) {
11588 return EINVAL;
11589 }
11590 if ((error = file_vnode(uap->fd, &vp))) {
11591 return error;
11592 }
11593 if ((error = vnode_getwithref(vp))) {
11594 file_drop(uap->fd);
11595 return error;
11596 }
11597 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
11598 &uio_buf[0], sizeof(uio_buf));
11599 uio_addiov(auio, uap->value, uap->size);
11600
11601 error = vn_setxattr(vp, attrname, auio, uap->options, vfs_context_current());
11602 #if CONFIG_FSE
11603 if (error == 0) {
11604 add_fsevent(FSE_XATTR_MODIFIED, ctx,
11605 FSE_ARG_VNODE, vp,
11606 FSE_ARG_DONE);
11607 }
11608 #endif
11609 vnode_put(vp);
11610 file_drop(uap->fd);
11611 *retval = 0;
11612 return error;
11613 }
11614
11615 /*
11616 * Remove an extended attribute.
11617 * XXX Code duplication here.
11618 */
11619 int
11620 removexattr(proc_t p, struct removexattr_args *uap, int *retval)
11621 {
11622 vnode_t vp;
11623 struct nameidata nd;
11624 char attrname[XATTR_MAXNAMELEN + 1];
11625 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
11626 vfs_context_t ctx = vfs_context_current();
11627 size_t namelen;
11628 u_int32_t nameiflags;
11629 int error;
11630
11631 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT)) {
11632 return EINVAL;
11633 }
11634
11635 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
11636 if (error != 0) {
11637 return error;
11638 }
11639 if (xattr_protected(attrname)) {
11640 return EPERM;
11641 }
11642 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
11643 NDINIT(&nd, LOOKUP, OP_REMOVEXATTR, nameiflags, spacetype, uap->path, ctx);
11644 if ((error = namei(&nd))) {
11645 return error;
11646 }
11647 vp = nd.ni_vp;
11648 nameidone(&nd);
11649
11650 error = vn_removexattr(vp, attrname, uap->options, ctx);
11651 #if CONFIG_FSE
11652 if (error == 0) {
11653 add_fsevent(FSE_XATTR_REMOVED, ctx,
11654 FSE_ARG_VNODE, vp,
11655 FSE_ARG_DONE);
11656 }
11657 #endif
11658 vnode_put(vp);
11659 *retval = 0;
11660 return error;
11661 }
11662
11663 /*
11664 * Remove an extended attribute.
11665 * XXX Code duplication here.
11666 */
11667 int
11668 fremovexattr(__unused proc_t p, struct fremovexattr_args *uap, int *retval)
11669 {
11670 vnode_t vp;
11671 char attrname[XATTR_MAXNAMELEN + 1];
11672 size_t namelen;
11673 int error;
11674 #if CONFIG_FSE
11675 vfs_context_t ctx = vfs_context_current();
11676 #endif
11677
11678 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT)) {
11679 return EINVAL;
11680 }
11681
11682 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
11683 if (error != 0) {
11684 return error;
11685 }
11686 if (xattr_protected(attrname)) {
11687 return EPERM;
11688 }
11689 if ((error = file_vnode(uap->fd, &vp))) {
11690 return error;
11691 }
11692 if ((error = vnode_getwithref(vp))) {
11693 file_drop(uap->fd);
11694 return error;
11695 }
11696
11697 error = vn_removexattr(vp, attrname, uap->options, vfs_context_current());
11698 #if CONFIG_FSE
11699 if (error == 0) {
11700 add_fsevent(FSE_XATTR_REMOVED, ctx,
11701 FSE_ARG_VNODE, vp,
11702 FSE_ARG_DONE);
11703 }
11704 #endif
11705 vnode_put(vp);
11706 file_drop(uap->fd);
11707 *retval = 0;
11708 return error;
11709 }
11710
11711 /*
11712 * Retrieve the list of extended attribute names.
11713 * XXX Code duplication here.
11714 */
11715 int
11716 listxattr(proc_t p, struct listxattr_args *uap, user_ssize_t *retval)
11717 {
11718 vnode_t vp;
11719 struct nameidata nd;
11720 vfs_context_t ctx = vfs_context_current();
11721 uio_t auio = NULL;
11722 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
11723 size_t attrsize = 0;
11724 u_int32_t nameiflags;
11725 int error;
11726 char uio_buf[UIO_SIZEOF(1)];
11727
11728 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT)) {
11729 return EINVAL;
11730 }
11731
11732 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
11733 NDINIT(&nd, LOOKUP, OP_LISTXATTR, nameiflags, spacetype, uap->path, ctx);
11734 if ((error = namei(&nd))) {
11735 return error;
11736 }
11737 vp = nd.ni_vp;
11738 nameidone(&nd);
11739 if (uap->namebuf != 0 && uap->bufsize > 0) {
11740 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
11741 &uio_buf[0], sizeof(uio_buf));
11742 uio_addiov(auio, uap->namebuf, uap->bufsize);
11743 }
11744
11745 error = vn_listxattr(vp, auio, &attrsize, uap->options, ctx);
11746
11747 vnode_put(vp);
11748 if (auio) {
11749 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
11750 } else {
11751 *retval = (user_ssize_t)attrsize;
11752 }
11753 return error;
11754 }
11755
11756 /*
11757 * Retrieve the list of extended attribute names.
11758 * XXX Code duplication here.
11759 */
11760 int
11761 flistxattr(proc_t p, struct flistxattr_args *uap, user_ssize_t *retval)
11762 {
11763 vnode_t vp;
11764 uio_t auio = NULL;
11765 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
11766 size_t attrsize = 0;
11767 int error;
11768 char uio_buf[UIO_SIZEOF(1)];
11769
11770 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT)) {
11771 return EINVAL;
11772 }
11773
11774 if ((error = file_vnode(uap->fd, &vp))) {
11775 return error;
11776 }
11777 if ((error = vnode_getwithref(vp))) {
11778 file_drop(uap->fd);
11779 return error;
11780 }
11781 if (uap->namebuf != 0 && uap->bufsize > 0) {
11782 auio = uio_createwithbuffer(1, 0, spacetype,
11783 UIO_READ, &uio_buf[0], sizeof(uio_buf));
11784 uio_addiov(auio, uap->namebuf, uap->bufsize);
11785 }
11786
11787 error = vn_listxattr(vp, auio, &attrsize, uap->options, vfs_context_current());
11788
11789 vnode_put(vp);
11790 file_drop(uap->fd);
11791 if (auio) {
11792 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
11793 } else {
11794 *retval = (user_ssize_t)attrsize;
11795 }
11796 return error;
11797 }
11798
11799 static int
11800 fsgetpath_internal(vfs_context_t ctx, int volfs_id, uint64_t objid,
11801 vm_size_t bufsize, caddr_t buf, uint32_t options, int *pathlen)
11802 {
11803 int error;
11804 struct mount *mp = NULL;
11805 vnode_t vp;
11806 int length;
11807 int bpflags;
11808 /* maximum number of times to retry build_path */
11809 unsigned int retries = 0x10;
11810
11811 if (bufsize > PAGE_SIZE) {
11812 return EINVAL;
11813 }
11814
11815 if (buf == NULL) {
11816 return ENOMEM;
11817 }
11818
11819 retry:
11820 if ((mp = mount_lookupby_volfsid(volfs_id, 1)) == NULL) {
11821 error = ENOTSUP; /* unexpected failure */
11822 return ENOTSUP;
11823 }
11824
11825 unionget:
11826 if (objid == 2) {
11827 struct vfs_attr vfsattr;
11828 int use_vfs_root = TRUE;
11829
11830 VFSATTR_INIT(&vfsattr);
11831 VFSATTR_WANTED(&vfsattr, f_capabilities);
11832 if (!(options & FSOPT_ISREALFSID) &&
11833 vfs_getattr(mp, &vfsattr, vfs_context_kernel()) == 0 &&
11834 VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) {
11835 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_VOL_GROUPS) &&
11836 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_VOL_GROUPS)) {
11837 use_vfs_root = FALSE;
11838 }
11839 }
11840
11841 if (use_vfs_root) {
11842 error = VFS_ROOT(mp, &vp, ctx);
11843 } else {
11844 error = VFS_VGET(mp, objid, &vp, ctx);
11845 }
11846 } else {
11847 error = VFS_VGET(mp, (ino64_t)objid, &vp, ctx);
11848 }
11849
11850 if (error == ENOENT && (mp->mnt_flag & MNT_UNION)) {
11851 /*
11852 * If the fileid isn't found and we're in a union
11853 * mount volume, then see if the fileid is in the
11854 * mounted-on volume.
11855 */
11856 struct mount *tmp = mp;
11857 mp = vnode_mount(tmp->mnt_vnodecovered);
11858 vfs_unbusy(tmp);
11859 if (vfs_busy(mp, LK_NOWAIT) == 0) {
11860 goto unionget;
11861 }
11862 } else {
11863 vfs_unbusy(mp);
11864 }
11865
11866 if (error) {
11867 return error;
11868 }
11869
11870 #if CONFIG_MACF
11871 error = mac_vnode_check_fsgetpath(ctx, vp);
11872 if (error) {
11873 vnode_put(vp);
11874 return error;
11875 }
11876 #endif
11877
11878 /* Obtain the absolute path to this vnode. */
11879 bpflags = vfs_context_suser(ctx) ? BUILDPATH_CHECKACCESS : 0;
11880 if (options & FSOPT_NOFIRMLINKPATH) {
11881 bpflags |= BUILDPATH_NO_FIRMLINK;
11882 }
11883 bpflags |= BUILDPATH_CHECK_MOVED;
11884 error = build_path(vp, buf, bufsize, &length, bpflags, ctx);
11885 vnode_put(vp);
11886
11887 if (error) {
11888 /* there was a race building the path, try a few more times */
11889 if (error == EAGAIN) {
11890 --retries;
11891 if (retries > 0) {
11892 goto retry;
11893 }
11894
11895 error = ENOENT;
11896 }
11897 goto out;
11898 }
11899
11900 AUDIT_ARG(text, buf);
11901
11902 if (kdebug_enable) {
11903 long dbg_parms[NUMPARMS];
11904 int dbg_namelen;
11905
11906 dbg_namelen = (int)sizeof(dbg_parms);
11907
11908 if (length < dbg_namelen) {
11909 memcpy((char *)dbg_parms, buf, length);
11910 memset((char *)dbg_parms + length, 0, dbg_namelen - length);
11911
11912 dbg_namelen = length;
11913 } else {
11914 memcpy((char *)dbg_parms, buf + (length - dbg_namelen), dbg_namelen);
11915 }
11916
11917 kdebug_vfs_lookup(dbg_parms, dbg_namelen, (void *)vp,
11918 KDBG_VFS_LOOKUP_FLAG_LOOKUP);
11919 }
11920
11921 *pathlen = (user_ssize_t)length; /* may be superseded by error */
11922
11923 out:
11924 return error;
11925 }
11926
11927 /*
11928 * Obtain the full pathname of a file system object by id.
11929 */
11930 static int
11931 fsgetpath_extended(user_addr_t buf, int bufsize, user_addr_t user_fsid, uint64_t objid,
11932 uint32_t options, user_ssize_t *retval)
11933 {
11934 vfs_context_t ctx = vfs_context_current();
11935 fsid_t fsid;
11936 char *realpath;
11937 int length;
11938 int error;
11939
11940 if (options & ~(FSOPT_NOFIRMLINKPATH | FSOPT_ISREALFSID)) {
11941 return EINVAL;
11942 }
11943
11944 if ((error = copyin(user_fsid, (caddr_t)&fsid, sizeof(fsid)))) {
11945 return error;
11946 }
11947 AUDIT_ARG(value32, fsid.val[0]);
11948 AUDIT_ARG(value64, objid);
11949 /* Restrict output buffer size for now. */
11950
11951 if (bufsize > PAGE_SIZE || bufsize <= 0) {
11952 return EINVAL;
11953 }
11954 MALLOC(realpath, char *, bufsize, M_TEMP, M_WAITOK | M_ZERO);
11955 if (realpath == NULL) {
11956 return ENOMEM;
11957 }
11958
11959 error = fsgetpath_internal(ctx, fsid.val[0], objid, bufsize, realpath,
11960 options, &length);
11961
11962 if (error) {
11963 goto out;
11964 }
11965
11966 error = copyout((caddr_t)realpath, buf, length);
11967
11968 *retval = (user_ssize_t)length; /* may be superseded by error */
11969 out:
11970 if (realpath) {
11971 FREE(realpath, M_TEMP);
11972 }
11973 return error;
11974 }
11975
11976 int
11977 fsgetpath(__unused proc_t p, struct fsgetpath_args *uap, user_ssize_t *retval)
11978 {
11979 return fsgetpath_extended(uap->buf, uap->bufsize, uap->fsid, uap->objid,
11980 0, retval);
11981 }
11982
11983 int
11984 fsgetpath_ext(__unused proc_t p, struct fsgetpath_ext_args *uap, user_ssize_t *retval)
11985 {
11986 return fsgetpath_extended(uap->buf, uap->bufsize, uap->fsid, uap->objid,
11987 uap->options, retval);
11988 }
11989
11990 /*
11991 * Common routine to handle various flavors of statfs data heading out
11992 * to user space.
11993 *
11994 * Returns: 0 Success
11995 * EFAULT
11996 */
11997 static int
11998 munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
11999 user_addr_t bufp, int *sizep, boolean_t is_64_bit,
12000 boolean_t partial_copy)
12001 {
12002 int error;
12003 int my_size, copy_size;
12004
12005 if (is_64_bit) {
12006 struct user64_statfs sfs;
12007 my_size = copy_size = sizeof(sfs);
12008 bzero(&sfs, my_size);
12009 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
12010 sfs.f_type = mp->mnt_vtable->vfc_typenum;
12011 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
12012 sfs.f_bsize = (user64_long_t)sfsp->f_bsize;
12013 sfs.f_iosize = (user64_long_t)sfsp->f_iosize;
12014 sfs.f_blocks = (user64_long_t)sfsp->f_blocks;
12015 sfs.f_bfree = (user64_long_t)sfsp->f_bfree;
12016 sfs.f_bavail = (user64_long_t)sfsp->f_bavail;
12017 sfs.f_files = (user64_long_t)sfsp->f_files;
12018 sfs.f_ffree = (user64_long_t)sfsp->f_ffree;
12019 sfs.f_fsid = sfsp->f_fsid;
12020 sfs.f_owner = sfsp->f_owner;
12021 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
12022 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
12023 } else {
12024 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
12025 }
12026 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
12027 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
12028
12029 if (partial_copy) {
12030 copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
12031 }
12032 error = copyout((caddr_t)&sfs, bufp, copy_size);
12033 } else {
12034 struct user32_statfs sfs;
12035
12036 my_size = copy_size = sizeof(sfs);
12037 bzero(&sfs, my_size);
12038
12039 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
12040 sfs.f_type = mp->mnt_vtable->vfc_typenum;
12041 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
12042
12043 /*
12044 * It's possible for there to be more than 2^^31 blocks in the filesystem, so we
12045 * have to fudge the numbers here in that case. We inflate the blocksize in order
12046 * to reflect the filesystem size as best we can.
12047 */
12048 if ((sfsp->f_blocks > INT_MAX)
12049 /* Hack for 4061702 . I think the real fix is for Carbon to
12050 * look for some volume capability and not depend on hidden
12051 * semantics agreed between a FS and carbon.
12052 * f_blocks, f_bfree, and f_bavail set to -1 is the trigger
12053 * for Carbon to set bNoVolumeSizes volume attribute.
12054 * Without this the webdavfs files cannot be copied onto
12055 * disk as they look huge. This change should not affect
12056 * XSAN as they should not setting these to -1..
12057 */
12058 && (sfsp->f_blocks != 0xffffffffffffffffULL)
12059 && (sfsp->f_bfree != 0xffffffffffffffffULL)
12060 && (sfsp->f_bavail != 0xffffffffffffffffULL)) {
12061 int shift;
12062
12063 /*
12064 * Work out how far we have to shift the block count down to make it fit.
12065 * Note that it's possible to have to shift so far that the resulting
12066 * blocksize would be unreportably large. At that point, we will clip
12067 * any values that don't fit.
12068 *
12069 * For safety's sake, we also ensure that f_iosize is never reported as
12070 * being smaller than f_bsize.
12071 */
12072 for (shift = 0; shift < 32; shift++) {
12073 if ((sfsp->f_blocks >> shift) <= INT_MAX) {
12074 break;
12075 }
12076 if ((sfsp->f_bsize << (shift + 1)) > INT_MAX) {
12077 break;
12078 }
12079 }
12080 #define __SHIFT_OR_CLIP(x, s) ((((x) >> (s)) > INT_MAX) ? INT_MAX : ((x) >> (s)))
12081 sfs.f_blocks = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_blocks, shift);
12082 sfs.f_bfree = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bfree, shift);
12083 sfs.f_bavail = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bavail, shift);
12084 #undef __SHIFT_OR_CLIP
12085 sfs.f_bsize = (user32_long_t)(sfsp->f_bsize << shift);
12086 sfs.f_iosize = lmax(sfsp->f_iosize, sfsp->f_bsize);
12087 } else {
12088 /* filesystem is small enough to be reported honestly */
12089 sfs.f_bsize = (user32_long_t)sfsp->f_bsize;
12090 sfs.f_iosize = (user32_long_t)sfsp->f_iosize;
12091 sfs.f_blocks = (user32_long_t)sfsp->f_blocks;
12092 sfs.f_bfree = (user32_long_t)sfsp->f_bfree;
12093 sfs.f_bavail = (user32_long_t)sfsp->f_bavail;
12094 }
12095 sfs.f_files = (user32_long_t)sfsp->f_files;
12096 sfs.f_ffree = (user32_long_t)sfsp->f_ffree;
12097 sfs.f_fsid = sfsp->f_fsid;
12098 sfs.f_owner = sfsp->f_owner;
12099 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
12100 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
12101 } else {
12102 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
12103 }
12104 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
12105 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
12106
12107 if (partial_copy) {
12108 copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
12109 }
12110 error = copyout((caddr_t)&sfs, bufp, copy_size);
12111 }
12112
12113 if (sizep != NULL) {
12114 *sizep = my_size;
12115 }
12116 return error;
12117 }
12118
12119 /*
12120 * copy stat structure into user_stat structure.
12121 */
12122 void
12123 munge_user64_stat(struct stat *sbp, struct user64_stat *usbp)
12124 {
12125 bzero(usbp, sizeof(*usbp));
12126
12127 usbp->st_dev = sbp->st_dev;
12128 usbp->st_ino = sbp->st_ino;
12129 usbp->st_mode = sbp->st_mode;
12130 usbp->st_nlink = sbp->st_nlink;
12131 usbp->st_uid = sbp->st_uid;
12132 usbp->st_gid = sbp->st_gid;
12133 usbp->st_rdev = sbp->st_rdev;
12134 #ifndef _POSIX_C_SOURCE
12135 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
12136 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
12137 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
12138 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
12139 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
12140 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
12141 #else
12142 usbp->st_atime = sbp->st_atime;
12143 usbp->st_atimensec = sbp->st_atimensec;
12144 usbp->st_mtime = sbp->st_mtime;
12145 usbp->st_mtimensec = sbp->st_mtimensec;
12146 usbp->st_ctime = sbp->st_ctime;
12147 usbp->st_ctimensec = sbp->st_ctimensec;
12148 #endif
12149 usbp->st_size = sbp->st_size;
12150 usbp->st_blocks = sbp->st_blocks;
12151 usbp->st_blksize = sbp->st_blksize;
12152 usbp->st_flags = sbp->st_flags;
12153 usbp->st_gen = sbp->st_gen;
12154 usbp->st_lspare = sbp->st_lspare;
12155 usbp->st_qspare[0] = sbp->st_qspare[0];
12156 usbp->st_qspare[1] = sbp->st_qspare[1];
12157 }
12158
12159 void
12160 munge_user32_stat(struct stat *sbp, struct user32_stat *usbp)
12161 {
12162 bzero(usbp, sizeof(*usbp));
12163
12164 usbp->st_dev = sbp->st_dev;
12165 usbp->st_ino = sbp->st_ino;
12166 usbp->st_mode = sbp->st_mode;
12167 usbp->st_nlink = sbp->st_nlink;
12168 usbp->st_uid = sbp->st_uid;
12169 usbp->st_gid = sbp->st_gid;
12170 usbp->st_rdev = sbp->st_rdev;
12171 #ifndef _POSIX_C_SOURCE
12172 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
12173 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
12174 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
12175 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
12176 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
12177 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
12178 #else
12179 usbp->st_atime = sbp->st_atime;
12180 usbp->st_atimensec = sbp->st_atimensec;
12181 usbp->st_mtime = sbp->st_mtime;
12182 usbp->st_mtimensec = sbp->st_mtimensec;
12183 usbp->st_ctime = sbp->st_ctime;
12184 usbp->st_ctimensec = sbp->st_ctimensec;
12185 #endif
12186 usbp->st_size = sbp->st_size;
12187 usbp->st_blocks = sbp->st_blocks;
12188 usbp->st_blksize = sbp->st_blksize;
12189 usbp->st_flags = sbp->st_flags;
12190 usbp->st_gen = sbp->st_gen;
12191 usbp->st_lspare = sbp->st_lspare;
12192 usbp->st_qspare[0] = sbp->st_qspare[0];
12193 usbp->st_qspare[1] = sbp->st_qspare[1];
12194 }
12195
12196 /*
12197 * copy stat64 structure into user_stat64 structure.
12198 */
12199 void
12200 munge_user64_stat64(struct stat64 *sbp, struct user64_stat64 *usbp)
12201 {
12202 bzero(usbp, sizeof(*usbp));
12203
12204 usbp->st_dev = sbp->st_dev;
12205 usbp->st_ino = sbp->st_ino;
12206 usbp->st_mode = sbp->st_mode;
12207 usbp->st_nlink = sbp->st_nlink;
12208 usbp->st_uid = sbp->st_uid;
12209 usbp->st_gid = sbp->st_gid;
12210 usbp->st_rdev = sbp->st_rdev;
12211 #ifndef _POSIX_C_SOURCE
12212 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
12213 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
12214 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
12215 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
12216 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
12217 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
12218 usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
12219 usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
12220 #else
12221 usbp->st_atime = sbp->st_atime;
12222 usbp->st_atimensec = sbp->st_atimensec;
12223 usbp->st_mtime = sbp->st_mtime;
12224 usbp->st_mtimensec = sbp->st_mtimensec;
12225 usbp->st_ctime = sbp->st_ctime;
12226 usbp->st_ctimensec = sbp->st_ctimensec;
12227 usbp->st_birthtime = sbp->st_birthtime;
12228 usbp->st_birthtimensec = sbp->st_birthtimensec;
12229 #endif
12230 usbp->st_size = sbp->st_size;
12231 usbp->st_blocks = sbp->st_blocks;
12232 usbp->st_blksize = sbp->st_blksize;
12233 usbp->st_flags = sbp->st_flags;
12234 usbp->st_gen = sbp->st_gen;
12235 usbp->st_lspare = sbp->st_lspare;
12236 usbp->st_qspare[0] = sbp->st_qspare[0];
12237 usbp->st_qspare[1] = sbp->st_qspare[1];
12238 }
12239
12240 void
12241 munge_user32_stat64(struct stat64 *sbp, struct user32_stat64 *usbp)
12242 {
12243 bzero(usbp, sizeof(*usbp));
12244
12245 usbp->st_dev = sbp->st_dev;
12246 usbp->st_ino = sbp->st_ino;
12247 usbp->st_mode = sbp->st_mode;
12248 usbp->st_nlink = sbp->st_nlink;
12249 usbp->st_uid = sbp->st_uid;
12250 usbp->st_gid = sbp->st_gid;
12251 usbp->st_rdev = sbp->st_rdev;
12252 #ifndef _POSIX_C_SOURCE
12253 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
12254 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
12255 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
12256 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
12257 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
12258 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
12259 usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
12260 usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
12261 #else
12262 usbp->st_atime = sbp->st_atime;
12263 usbp->st_atimensec = sbp->st_atimensec;
12264 usbp->st_mtime = sbp->st_mtime;
12265 usbp->st_mtimensec = sbp->st_mtimensec;
12266 usbp->st_ctime = sbp->st_ctime;
12267 usbp->st_ctimensec = sbp->st_ctimensec;
12268 usbp->st_birthtime = sbp->st_birthtime;
12269 usbp->st_birthtimensec = sbp->st_birthtimensec;
12270 #endif
12271 usbp->st_size = sbp->st_size;
12272 usbp->st_blocks = sbp->st_blocks;
12273 usbp->st_blksize = sbp->st_blksize;
12274 usbp->st_flags = sbp->st_flags;
12275 usbp->st_gen = sbp->st_gen;
12276 usbp->st_lspare = sbp->st_lspare;
12277 usbp->st_qspare[0] = sbp->st_qspare[0];
12278 usbp->st_qspare[1] = sbp->st_qspare[1];
12279 }
12280
12281 /*
12282 * Purge buffer cache for simulating cold starts
12283 */
12284 static int
12285 vnode_purge_callback(struct vnode *vp, __unused void *cargs)
12286 {
12287 ubc_msync(vp, (off_t)0, ubc_getsize(vp), NULL /* off_t *resid_off */, UBC_PUSHALL | UBC_INVALIDATE);
12288
12289 return VNODE_RETURNED;
12290 }
12291
12292 static int
12293 vfs_purge_callback(mount_t mp, __unused void * arg)
12294 {
12295 vnode_iterate(mp, VNODE_WAIT | VNODE_ITERATE_ALL, vnode_purge_callback, NULL);
12296
12297 return VFS_RETURNED;
12298 }
12299
12300 int
12301 vfs_purge(__unused struct proc *p, __unused struct vfs_purge_args *uap, __unused int32_t *retval)
12302 {
12303 if (!kauth_cred_issuser(kauth_cred_get())) {
12304 return EPERM;
12305 }
12306
12307 vfs_iterate(0 /* flags */, vfs_purge_callback, NULL);
12308
12309 return 0;
12310 }
12311
12312 /*
12313 * gets the vnode associated with the (unnamed) snapshot directory
12314 * for a Filesystem. The snapshot directory vnode is returned with
12315 * an iocount on it.
12316 */
12317 int
12318 vnode_get_snapdir(vnode_t rvp, vnode_t *sdvpp, vfs_context_t ctx)
12319 {
12320 return VFS_VGET_SNAPDIR(vnode_mount(rvp), sdvpp, ctx);
12321 }
12322
12323 /*
12324 * Get the snapshot vnode.
12325 *
12326 * If successful, the call returns with an iocount on *rvpp ,*sdvpp and
12327 * needs nameidone() on ndp.
12328 *
12329 * If the snapshot vnode exists it is returned in ndp->ni_vp.
12330 *
12331 * If it returns with an error, *rvpp, *sdvpp are NULL and nameidone() is
12332 * not needed.
12333 */
12334 static int
12335 vnode_get_snapshot(int dirfd, vnode_t *rvpp, vnode_t *sdvpp,
12336 user_addr_t name, struct nameidata *ndp, int32_t op,
12337 #if !CONFIG_TRIGGERS
12338 __unused
12339 #endif
12340 enum path_operation pathop,
12341 vfs_context_t ctx)
12342 {
12343 int error, i;
12344 caddr_t name_buf;
12345 size_t name_len;
12346 struct vfs_attr vfa;
12347
12348 *sdvpp = NULLVP;
12349 *rvpp = NULLVP;
12350
12351 error = vnode_getfromfd(ctx, dirfd, rvpp);
12352 if (error) {
12353 return error;
12354 }
12355
12356 if (!vnode_isvroot(*rvpp)) {
12357 error = EINVAL;
12358 goto out;
12359 }
12360
12361 /* Make sure the filesystem supports snapshots */
12362 VFSATTR_INIT(&vfa);
12363 VFSATTR_WANTED(&vfa, f_capabilities);
12364 if ((vfs_getattr(vnode_mount(*rvpp), &vfa, ctx) != 0) ||
12365 !VFSATTR_IS_SUPPORTED(&vfa, f_capabilities) ||
12366 !((vfa.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] &
12367 VOL_CAP_INT_SNAPSHOT)) ||
12368 !((vfa.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] &
12369 VOL_CAP_INT_SNAPSHOT))) {
12370 error = ENOTSUP;
12371 goto out;
12372 }
12373
12374 error = vnode_get_snapdir(*rvpp, sdvpp, ctx);
12375 if (error) {
12376 goto out;
12377 }
12378
12379 MALLOC(name_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
12380 error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
12381 if (error) {
12382 goto out1;
12383 }
12384
12385 /*
12386 * Some sanity checks- name can't be empty, "." or ".." or have slashes.
12387 * (the length returned by copyinstr includes the terminating NUL)
12388 */
12389 if ((name_len == 1) || (name_len == 2 && name_buf[0] == '.') ||
12390 (name_len == 3 && name_buf[0] == '.' && name_buf[1] == '.')) {
12391 error = EINVAL;
12392 goto out1;
12393 }
12394 for (i = 0; i < (int)name_len && name_buf[i] != '/'; i++) {
12395 ;
12396 }
12397 if (i < (int)name_len) {
12398 error = EINVAL;
12399 goto out1;
12400 }
12401
12402 #if CONFIG_MACF
12403 if (op == CREATE) {
12404 error = mac_mount_check_snapshot_create(ctx, vnode_mount(*rvpp),
12405 name_buf);
12406 } else if (op == DELETE) {
12407 error = mac_mount_check_snapshot_delete(ctx, vnode_mount(*rvpp),
12408 name_buf);
12409 }
12410 if (error) {
12411 goto out1;
12412 }
12413 #endif
12414
12415 /* Check if the snapshot already exists ... */
12416 NDINIT(ndp, op, pathop, USEDVP | NOCACHE | AUDITVNPATH1,
12417 UIO_SYSSPACE, CAST_USER_ADDR_T(name_buf), ctx);
12418 ndp->ni_dvp = *sdvpp;
12419
12420 error = namei(ndp);
12421 out1:
12422 FREE(name_buf, M_TEMP);
12423 out:
12424 if (error) {
12425 if (*sdvpp) {
12426 vnode_put(*sdvpp);
12427 *sdvpp = NULLVP;
12428 }
12429 if (*rvpp) {
12430 vnode_put(*rvpp);
12431 *rvpp = NULLVP;
12432 }
12433 }
12434 return error;
12435 }
12436
12437 /*
12438 * create a filesystem snapshot (for supporting filesystems)
12439 *
12440 * A much simplified version of openat(dirfd, name, O_CREAT | O_EXCL)
12441 * We get to the (unnamed) snapshot directory vnode and create the vnode
12442 * for the snapshot in it.
12443 *
12444 * Restrictions:
12445 *
12446 * a) Passed in name for snapshot cannot have slashes.
12447 * b) name can't be "." or ".."
12448 *
12449 * Since this requires superuser privileges, vnode_authorize calls are not
12450 * made.
12451 */
12452 static int
12453 snapshot_create(int dirfd, user_addr_t name, __unused uint32_t flags,
12454 vfs_context_t ctx)
12455 {
12456 vnode_t rvp, snapdvp;
12457 int error;
12458 struct nameidata namend;
12459
12460 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, CREATE,
12461 OP_LINK, ctx);
12462 if (error) {
12463 return error;
12464 }
12465
12466 if (namend.ni_vp) {
12467 vnode_put(namend.ni_vp);
12468 error = EEXIST;
12469 } else {
12470 struct vnode_attr va;
12471 vnode_t vp = NULLVP;
12472
12473 VATTR_INIT(&va);
12474 VATTR_SET(&va, va_type, VREG);
12475 VATTR_SET(&va, va_mode, 0);
12476
12477 error = vn_create(snapdvp, &vp, &namend, &va,
12478 VN_CREATE_NOAUTH | VN_CREATE_NOINHERIT, 0, NULL, ctx);
12479 if (!error && vp) {
12480 vnode_put(vp);
12481 }
12482 }
12483
12484 nameidone(&namend);
12485 vnode_put(snapdvp);
12486 vnode_put(rvp);
12487 return error;
12488 }
12489
12490 /*
12491 * Delete a Filesystem snapshot
12492 *
12493 * get the vnode for the unnamed snapshot directory and the snapshot and
12494 * delete the snapshot.
12495 */
12496 static int
12497 snapshot_delete(int dirfd, user_addr_t name, __unused uint32_t flags,
12498 vfs_context_t ctx)
12499 {
12500 vnode_t rvp, snapdvp;
12501 int error;
12502 struct nameidata namend;
12503
12504 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, DELETE,
12505 OP_UNLINK, ctx);
12506 if (error) {
12507 goto out;
12508 }
12509
12510 error = VNOP_REMOVE(snapdvp, namend.ni_vp, &namend.ni_cnd,
12511 VNODE_REMOVE_SKIP_NAMESPACE_EVENT, ctx);
12512
12513 vnode_put(namend.ni_vp);
12514 nameidone(&namend);
12515 vnode_put(snapdvp);
12516 vnode_put(rvp);
12517 out:
12518 return error;
12519 }
12520
12521 /*
12522 * Revert a filesystem to a snapshot
12523 *
12524 * Marks the filesystem to revert to the given snapshot on next mount.
12525 */
12526 static int
12527 snapshot_revert(int dirfd, user_addr_t name, __unused uint32_t flags,
12528 vfs_context_t ctx)
12529 {
12530 int error;
12531 vnode_t rvp;
12532 mount_t mp;
12533 struct fs_snapshot_revert_args revert_data;
12534 struct componentname cnp;
12535 caddr_t name_buf;
12536 size_t name_len;
12537
12538 error = vnode_getfromfd(ctx, dirfd, &rvp);
12539 if (error) {
12540 return error;
12541 }
12542 mp = vnode_mount(rvp);
12543
12544 MALLOC(name_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
12545 error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
12546 if (error) {
12547 FREE(name_buf, M_TEMP);
12548 vnode_put(rvp);
12549 return error;
12550 }
12551
12552 #if CONFIG_MACF
12553 error = mac_mount_check_snapshot_revert(ctx, mp, name_buf);
12554 if (error) {
12555 FREE(name_buf, M_TEMP);
12556 vnode_put(rvp);
12557 return error;
12558 }
12559 #endif
12560
12561 /*
12562 * Grab mount_iterref so that we can release the vnode,
12563 * since VFSIOC_REVERT_SNAPSHOT could conceivably cause a sync.
12564 */
12565 error = mount_iterref(mp, 0);
12566 vnode_put(rvp);
12567 if (error) {
12568 FREE(name_buf, M_TEMP);
12569 return error;
12570 }
12571
12572 memset(&cnp, 0, sizeof(cnp));
12573 cnp.cn_pnbuf = (char *)name_buf;
12574 cnp.cn_nameiop = LOOKUP;
12575 cnp.cn_flags = ISLASTCN | HASBUF;
12576 cnp.cn_pnlen = MAXPATHLEN;
12577 cnp.cn_nameptr = cnp.cn_pnbuf;
12578 cnp.cn_namelen = (int)name_len;
12579 revert_data.sr_cnp = &cnp;
12580
12581 error = VFS_IOCTL(mp, VFSIOC_REVERT_SNAPSHOT, (caddr_t)&revert_data, 0, ctx);
12582 mount_iterdrop(mp);
12583 FREE(name_buf, M_TEMP);
12584
12585 if (error) {
12586 /* If there was any error, try again using VNOP_IOCTL */
12587
12588 vnode_t snapdvp;
12589 struct nameidata namend;
12590
12591 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, LOOKUP,
12592 OP_LOOKUP, ctx);
12593 if (error) {
12594 return error;
12595 }
12596
12597
12598 error = VNOP_IOCTL(namend.ni_vp, APFSIOC_REVERT_TO_SNAPSHOT, (caddr_t) NULL,
12599 0, ctx);
12600
12601 vnode_put(namend.ni_vp);
12602 nameidone(&namend);
12603 vnode_put(snapdvp);
12604 vnode_put(rvp);
12605 }
12606
12607 return error;
12608 }
12609
12610 /*
12611 * rename a Filesystem snapshot
12612 *
12613 * get the vnode for the unnamed snapshot directory and the snapshot and
12614 * rename the snapshot. This is a very specialised (and simple) case of
12615 * rename(2) (which has to deal with a lot more complications). It differs
12616 * slightly from rename(2) in that EEXIST is returned if the new name exists.
12617 */
12618 static int
12619 snapshot_rename(int dirfd, user_addr_t old, user_addr_t new,
12620 __unused uint32_t flags, vfs_context_t ctx)
12621 {
12622 vnode_t rvp, snapdvp;
12623 int error, i;
12624 caddr_t newname_buf;
12625 size_t name_len;
12626 vnode_t fvp;
12627 struct nameidata *fromnd, *tond;
12628 /* carving out a chunk for structs that are too big to be on stack. */
12629 struct {
12630 struct nameidata from_node;
12631 struct nameidata to_node;
12632 } * __rename_data;
12633
12634 MALLOC(__rename_data, void *, sizeof(*__rename_data), M_TEMP, M_WAITOK);
12635 fromnd = &__rename_data->from_node;
12636 tond = &__rename_data->to_node;
12637
12638 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, old, fromnd, DELETE,
12639 OP_UNLINK, ctx);
12640 if (error) {
12641 goto out;
12642 }
12643 fvp = fromnd->ni_vp;
12644
12645 MALLOC(newname_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
12646 error = copyinstr(new, newname_buf, MAXPATHLEN, &name_len);
12647 if (error) {
12648 goto out1;
12649 }
12650
12651 /*
12652 * Some sanity checks- new name can't be empty, "." or ".." or have
12653 * slashes.
12654 * (the length returned by copyinstr includes the terminating NUL)
12655 *
12656 * The FS rename VNOP is suppossed to handle this but we'll pick it
12657 * off here itself.
12658 */
12659 if ((name_len == 1) || (name_len == 2 && newname_buf[0] == '.') ||
12660 (name_len == 3 && newname_buf[0] == '.' && newname_buf[1] == '.')) {
12661 error = EINVAL;
12662 goto out1;
12663 }
12664 for (i = 0; i < (int)name_len && newname_buf[i] != '/'; i++) {
12665 ;
12666 }
12667 if (i < (int)name_len) {
12668 error = EINVAL;
12669 goto out1;
12670 }
12671
12672 #if CONFIG_MACF
12673 error = mac_mount_check_snapshot_create(ctx, vnode_mount(rvp),
12674 newname_buf);
12675 if (error) {
12676 goto out1;
12677 }
12678 #endif
12679
12680 NDINIT(tond, RENAME, OP_RENAME, USEDVP | NOCACHE | AUDITVNPATH2,
12681 UIO_SYSSPACE, CAST_USER_ADDR_T(newname_buf), ctx);
12682 tond->ni_dvp = snapdvp;
12683
12684 error = namei(tond);
12685 if (error) {
12686 goto out2;
12687 } else if (tond->ni_vp) {
12688 /*
12689 * snapshot rename behaves differently than rename(2) - if the
12690 * new name exists, EEXIST is returned.
12691 */
12692 vnode_put(tond->ni_vp);
12693 error = EEXIST;
12694 goto out2;
12695 }
12696
12697 error = VNOP_RENAME(snapdvp, fvp, &fromnd->ni_cnd, snapdvp, NULLVP,
12698 &tond->ni_cnd, ctx);
12699
12700 out2:
12701 nameidone(tond);
12702 out1:
12703 FREE(newname_buf, M_TEMP);
12704 vnode_put(fvp);
12705 vnode_put(snapdvp);
12706 vnode_put(rvp);
12707 nameidone(fromnd);
12708 out:
12709 FREE(__rename_data, M_TEMP);
12710 return error;
12711 }
12712
12713 /*
12714 * Mount a Filesystem snapshot
12715 *
12716 * get the vnode for the unnamed snapshot directory and the snapshot and
12717 * mount the snapshot.
12718 */
12719 static int
12720 snapshot_mount(int dirfd, user_addr_t name, user_addr_t directory,
12721 __unused user_addr_t mnt_data, __unused uint32_t flags, vfs_context_t ctx)
12722 {
12723 vnode_t rvp, snapdvp, snapvp, vp, pvp;
12724 int error;
12725 struct nameidata *snapndp, *dirndp;
12726 /* carving out a chunk for structs that are too big to be on stack. */
12727 struct {
12728 struct nameidata snapnd;
12729 struct nameidata dirnd;
12730 } * __snapshot_mount_data;
12731
12732 MALLOC(__snapshot_mount_data, void *, sizeof(*__snapshot_mount_data),
12733 M_TEMP, M_WAITOK);
12734 snapndp = &__snapshot_mount_data->snapnd;
12735 dirndp = &__snapshot_mount_data->dirnd;
12736
12737 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, snapndp, LOOKUP,
12738 OP_LOOKUP, ctx);
12739 if (error) {
12740 goto out;
12741 }
12742
12743 snapvp = snapndp->ni_vp;
12744 if (!vnode_mount(rvp) || (vnode_mount(rvp) == dead_mountp)) {
12745 error = EIO;
12746 goto out1;
12747 }
12748
12749 /* Get the vnode to be covered */
12750 NDINIT(dirndp, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
12751 UIO_USERSPACE, directory, ctx);
12752 error = namei(dirndp);
12753 if (error) {
12754 goto out1;
12755 }
12756
12757 vp = dirndp->ni_vp;
12758 pvp = dirndp->ni_dvp;
12759
12760 if ((vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_ROOTFS)) {
12761 error = EINVAL;
12762 } else {
12763 mount_t mp = vnode_mount(rvp);
12764 struct fs_snapshot_mount_args smnt_data;
12765
12766 smnt_data.sm_mp = mp;
12767 smnt_data.sm_cnp = &snapndp->ni_cnd;
12768 error = mount_common(mp->mnt_vfsstat.f_fstypename, pvp, vp,
12769 &dirndp->ni_cnd, CAST_USER_ADDR_T(&smnt_data), flags & MNT_DONTBROWSE,
12770 KERNEL_MOUNT_SNAPSHOT, NULL, FALSE, ctx);
12771 }
12772
12773 vnode_put(vp);
12774 vnode_put(pvp);
12775 nameidone(dirndp);
12776 out1:
12777 vnode_put(snapvp);
12778 vnode_put(snapdvp);
12779 vnode_put(rvp);
12780 nameidone(snapndp);
12781 out:
12782 FREE(__snapshot_mount_data, M_TEMP);
12783 return error;
12784 }
12785
12786 /*
12787 * Root from a snapshot of the filesystem
12788 *
12789 * Marks the filesystem to root from the given snapshot on next boot.
12790 */
12791 static int
12792 snapshot_root(int dirfd, user_addr_t name, __unused uint32_t flags,
12793 vfs_context_t ctx)
12794 {
12795 int error;
12796 vnode_t rvp;
12797 mount_t mp;
12798 struct fs_snapshot_root_args root_data;
12799 struct componentname cnp;
12800 caddr_t name_buf;
12801 size_t name_len;
12802
12803 error = vnode_getfromfd(ctx, dirfd, &rvp);
12804 if (error) {
12805 return error;
12806 }
12807 mp = vnode_mount(rvp);
12808
12809 MALLOC(name_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
12810 error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
12811 if (error) {
12812 FREE(name_buf, M_TEMP);
12813 vnode_put(rvp);
12814 return error;
12815 }
12816
12817 // XXX MAC checks ?
12818
12819 /*
12820 * Grab mount_iterref so that we can release the vnode,
12821 * since VFSIOC_ROOT_SNAPSHOT could conceivably cause a sync.
12822 */
12823 error = mount_iterref(mp, 0);
12824 vnode_put(rvp);
12825 if (error) {
12826 FREE(name_buf, M_TEMP);
12827 return error;
12828 }
12829
12830 memset(&cnp, 0, sizeof(cnp));
12831 cnp.cn_pnbuf = (char *)name_buf;
12832 cnp.cn_nameiop = LOOKUP;
12833 cnp.cn_flags = ISLASTCN | HASBUF;
12834 cnp.cn_pnlen = MAXPATHLEN;
12835 cnp.cn_nameptr = cnp.cn_pnbuf;
12836 cnp.cn_namelen = (int)name_len;
12837 root_data.sr_cnp = &cnp;
12838
12839 error = VFS_IOCTL(mp, VFSIOC_ROOT_SNAPSHOT, (caddr_t)&root_data, 0, ctx);
12840
12841 mount_iterdrop(mp);
12842 FREE(name_buf, M_TEMP);
12843
12844 return error;
12845 }
12846
12847 /*
12848 * FS snapshot operations dispatcher
12849 */
12850 int
12851 fs_snapshot(__unused proc_t p, struct fs_snapshot_args *uap,
12852 __unused int32_t *retval)
12853 {
12854 int error;
12855 vfs_context_t ctx = vfs_context_current();
12856
12857 AUDIT_ARG(fd, uap->dirfd);
12858 AUDIT_ARG(value32, uap->op);
12859
12860 error = priv_check_cred(vfs_context_ucred(ctx), PRIV_VFS_SNAPSHOT, 0);
12861 if (error) {
12862 return error;
12863 }
12864
12865 /*
12866 * Enforce user authorization for snapshot modification operations
12867 */
12868 if ((uap->op != SNAPSHOT_OP_MOUNT) &&
12869 (uap->op != SNAPSHOT_OP_ROOT)) {
12870 vnode_t dvp = NULLVP;
12871 vnode_t devvp = NULLVP;
12872 mount_t mp;
12873
12874 error = vnode_getfromfd(ctx, uap->dirfd, &dvp);
12875 if (error) {
12876 return error;
12877 }
12878 mp = vnode_mount(dvp);
12879 devvp = mp->mnt_devvp;
12880
12881 /* get an iocount on devvp */
12882 if (devvp == NULLVP) {
12883 error = vnode_lookup(mp->mnt_vfsstat.f_mntfromname, 0, &devvp, ctx);
12884 /* for mounts which arent block devices */
12885 if (error == ENOENT) {
12886 error = ENXIO;
12887 }
12888 } else {
12889 error = vnode_getwithref(devvp);
12890 }
12891
12892 if (error) {
12893 vnode_put(dvp);
12894 return error;
12895 }
12896
12897 if ((vfs_context_issuser(ctx) == 0) &&
12898 (vnode_authorize(devvp, NULL, KAUTH_VNODE_WRITE_DATA, ctx) != 0)) {
12899 error = EPERM;
12900 }
12901 vnode_put(dvp);
12902 vnode_put(devvp);
12903
12904 if (error) {
12905 return error;
12906 }
12907 }
12908
12909 switch (uap->op) {
12910 case SNAPSHOT_OP_CREATE:
12911 error = snapshot_create(uap->dirfd, uap->name1, uap->flags, ctx);
12912 break;
12913 case SNAPSHOT_OP_DELETE:
12914 error = snapshot_delete(uap->dirfd, uap->name1, uap->flags, ctx);
12915 break;
12916 case SNAPSHOT_OP_RENAME:
12917 error = snapshot_rename(uap->dirfd, uap->name1, uap->name2,
12918 uap->flags, ctx);
12919 break;
12920 case SNAPSHOT_OP_MOUNT:
12921 error = snapshot_mount(uap->dirfd, uap->name1, uap->name2,
12922 uap->data, uap->flags, ctx);
12923 break;
12924 case SNAPSHOT_OP_REVERT:
12925 error = snapshot_revert(uap->dirfd, uap->name1, uap->flags, ctx);
12926 break;
12927 #if CONFIG_MNT_ROOTSNAP
12928 case SNAPSHOT_OP_ROOT:
12929 error = snapshot_root(uap->dirfd, uap->name1, uap->flags, ctx);
12930 break;
12931 #endif /* CONFIG_MNT_ROOTSNAP */
12932 default:
12933 error = ENOSYS;
12934 }
12935
12936 return error;
12937 }