]> git.saurik.com Git - apple/xnu.git/blob - bsd/vfs/vfs_syscalls.c
838ad8c12170b4c506e444675b85292670a831a5
[apple/xnu.git] / bsd / vfs / vfs_syscalls.c
1 /*
2 * Copyright (c) 1995-2019 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1989, 1993
30 * The Regents of the University of California. All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)vfs_syscalls.c 8.41 (Berkeley) 6/15/95
66 */
67 /*
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
71 * Version 2.0.
72 */
73
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/namei.h>
77 #include <sys/filedesc.h>
78 #include <sys/kernel.h>
79 #include <sys/file_internal.h>
80 #include <sys/stat.h>
81 #include <sys/vnode_internal.h>
82 #include <sys/mount_internal.h>
83 #include <sys/proc_internal.h>
84 #include <sys/kauth.h>
85 #include <sys/uio_internal.h>
86 #include <sys/malloc.h>
87 #include <sys/mman.h>
88 #include <sys/dirent.h>
89 #include <sys/attr.h>
90 #include <sys/sysctl.h>
91 #include <sys/ubc.h>
92 #include <sys/quota.h>
93 #include <sys/kdebug.h>
94 #include <sys/fsevents.h>
95 #include <sys/imgsrc.h>
96 #include <sys/sysproto.h>
97 #include <sys/sysctl.h>
98 #include <sys/xattr.h>
99 #include <sys/fcntl.h>
100 #include <sys/fsctl.h>
101 #include <sys/ubc_internal.h>
102 #include <sys/disk.h>
103 #include <sys/content_protection.h>
104 #include <sys/clonefile.h>
105 #include <sys/snapshot.h>
106 #include <sys/priv.h>
107 #include <sys/fsgetpath.h>
108 #include <machine/cons.h>
109 #include <machine/limits.h>
110 #include <miscfs/specfs/specdev.h>
111
112 #include <vfs/vfs_disk_conditioner.h>
113
114 #include <security/audit/audit.h>
115 #include <bsm/audit_kevents.h>
116
117 #include <mach/mach_types.h>
118 #include <kern/kern_types.h>
119 #include <kern/kalloc.h>
120 #include <kern/task.h>
121
122 #include <vm/vm_pageout.h>
123 #include <vm/vm_protos.h>
124
125 #include <libkern/OSAtomic.h>
126 #include <pexpert/pexpert.h>
127 #include <IOKit/IOBSD.h>
128
129 // deps for MIG call
130 #include <kern/host.h>
131 #include <kern/ipc_misc.h>
132 #include <mach/host_priv.h>
133 #include <mach/vfs_nspace.h>
134 #include <os/log.h>
135
136 #if ROUTEFS
137 #include <miscfs/routefs/routefs.h>
138 #endif /* ROUTEFS */
139
140 #if CONFIG_MACF
141 #include <security/mac.h>
142 #include <security/mac_framework.h>
143 #endif
144
145 #if CONFIG_FSE
146 #define GET_PATH(x) \
147 (x) = get_pathbuff();
148 #define RELEASE_PATH(x) \
149 release_pathbuff(x);
150 #else
151 #define GET_PATH(x) \
152 MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
153 #define RELEASE_PATH(x) \
154 FREE_ZONE((x), MAXPATHLEN, M_NAMEI);
155 #endif /* CONFIG_FSE */
156
157 #ifndef HFS_GET_BOOT_INFO
158 #define HFS_GET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00004)
159 #endif
160
161 #ifndef HFS_SET_BOOT_INFO
162 #define HFS_SET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00005)
163 #endif
164
165 #ifndef APFSIOC_REVERT_TO_SNAPSHOT
166 #define APFSIOC_REVERT_TO_SNAPSHOT _IOW('J', 1, u_int64_t)
167 #endif
168
169 extern void disk_conditioner_unmount(mount_t mp);
170
171 /* struct for checkdirs iteration */
172 struct cdirargs {
173 vnode_t olddp;
174 vnode_t newdp;
175 };
176 /* callback for checkdirs iteration */
177 static int checkdirs_callback(proc_t p, void * arg);
178
179 static int change_dir(struct nameidata *ndp, vfs_context_t ctx);
180 static int checkdirs(vnode_t olddp, vfs_context_t ctx);
181 void enablequotas(struct mount *mp, vfs_context_t ctx);
182 static int getfsstat_callback(mount_t mp, void * arg);
183 static int getutimes(user_addr_t usrtvp, struct timespec *tsp);
184 static int setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts, int nullflag);
185 static int sync_callback(mount_t, void *);
186 static int munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
187 user_addr_t bufp, int *sizep, boolean_t is_64_bit,
188 boolean_t partial_copy);
189 static int fsync_common(proc_t p, struct fsync_args *uap, int flags);
190 static int mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
191 struct componentname *cnp, user_addr_t fsmountargs,
192 int flags, uint32_t internal_flags, char *labelstr, boolean_t kernelmount,
193 vfs_context_t ctx);
194 void vfs_notify_mount(vnode_t pdvp);
195
196 int prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth);
197
198 struct fd_vn_data * fg_vn_data_alloc(void);
199
200 /*
201 * Max retries for ENOENT returns from vn_authorize_{rmdir, unlink, rename}
202 * Concurrent lookups (or lookups by ids) on hard links can cause the
203 * vn_getpath (which does not re-enter the filesystem as vn_getpath_fsenter
204 * does) to return ENOENT as the path cannot be returned from the name cache
205 * alone. We have no option but to retry and hope to get one namei->reverse path
206 * generation done without an intervening lookup, lookup by id on the hard link
207 * item. This is only an issue for MAC hooks which cannot reenter the filesystem
208 * which currently are the MAC hooks for rename, unlink and rmdir.
209 */
210 #define MAX_AUTHORIZE_ENOENT_RETRIES 1024
211
212 static int rmdirat_internal(vfs_context_t, int, user_addr_t, enum uio_seg,
213 int unlink_flags);
214
215 static int fsgetpath_internal(vfs_context_t, int, uint64_t, vm_size_t, caddr_t, uint32_t options, int *);
216
217 #ifdef CONFIG_IMGSRC_ACCESS
218 static int authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx);
219 static int place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx);
220 static void undo_place_on_covered_vp(mount_t mp, vnode_t vp);
221 static int mount_begin_update(mount_t mp, vfs_context_t ctx, int flags);
222 static void mount_end_update(mount_t mp);
223 static int relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp, const char *fsname, vfs_context_t ctx, boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index);
224 #endif /* CONFIG_IMGSRC_ACCESS */
225
226 #if CONFIG_LOCKERBOOT
227 int mount_locker_protoboot(const char *fsname, const char *mntpoint,
228 const char *pbdevpath);
229 #endif
230
231 //snapshot functions
232 #if CONFIG_MNT_ROOTSNAP
233 static int snapshot_root(int dirfd, user_addr_t name, uint32_t flags, vfs_context_t ctx);
234 #else
235 static int snapshot_root(int dirfd, user_addr_t name, uint32_t flags, vfs_context_t ctx) __attribute__((unused));
236 #endif
237
238 int (*union_dircheckp)(struct vnode **, struct fileproc *, vfs_context_t);
239
240 __private_extern__
241 int sync_internal(void);
242
243 __private_extern__
244 int unlink1(vfs_context_t, vnode_t, user_addr_t, enum uio_seg, int);
245
246 extern lck_grp_t *fd_vn_lck_grp;
247 extern lck_grp_attr_t *fd_vn_lck_grp_attr;
248 extern lck_attr_t *fd_vn_lck_attr;
249
250 /*
251 * incremented each time a mount or unmount operation occurs
252 * used to invalidate the cached value of the rootvp in the
253 * mount structure utilized by cache_lookup_path
254 */
255 uint32_t mount_generation = 0;
256
257 /* counts number of mount and unmount operations */
258 unsigned int vfs_nummntops = 0;
259
260 extern const struct fileops vnops;
261 #if CONFIG_APPLEDOUBLE
262 extern errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *);
263 #endif /* CONFIG_APPLEDOUBLE */
264
265 /*
266 * Virtual File System System Calls
267 */
268
269 #if NFSCLIENT || DEVFS || ROUTEFS
270 /*
271 * Private in-kernel mounting spi (NFS only, not exported)
272 */
273 __private_extern__
274 boolean_t
275 vfs_iskernelmount(mount_t mp)
276 {
277 return (mp->mnt_kern_flag & MNTK_KERNEL_MOUNT) ? TRUE : FALSE;
278 }
279
280 __private_extern__
281 int
282 kernel_mount(char *fstype, vnode_t pvp, vnode_t vp, const char *path,
283 void *data, __unused size_t datalen, int syscall_flags, uint32_t kern_flags, vfs_context_t ctx)
284 {
285 struct nameidata nd;
286 boolean_t did_namei;
287 int error;
288
289 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
290 UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx);
291
292 /*
293 * Get the vnode to be covered if it's not supplied
294 */
295 if (vp == NULLVP) {
296 error = namei(&nd);
297 if (error) {
298 if (kern_flags & (KERNEL_MOUNT_SNAPSHOT | KERNEL_MOUNT_VMVOL | KERNEL_MOUNT_DATAVOL)) {
299 printf("failed to locate mount-on path: %s ", path);
300 }
301 return error;
302 }
303 vp = nd.ni_vp;
304 pvp = nd.ni_dvp;
305 did_namei = TRUE;
306 } else {
307 char *pnbuf = CAST_DOWN(char *, path);
308
309 nd.ni_cnd.cn_pnbuf = pnbuf;
310 nd.ni_cnd.cn_pnlen = strlen(pnbuf) + 1;
311 did_namei = FALSE;
312 }
313
314 error = mount_common(fstype, pvp, vp, &nd.ni_cnd, CAST_USER_ADDR_T(data),
315 syscall_flags, kern_flags, NULL, TRUE, ctx);
316
317 if (did_namei) {
318 vnode_put(vp);
319 vnode_put(pvp);
320 nameidone(&nd);
321 }
322
323 return error;
324 }
325 #endif /* NFSCLIENT || DEVFS */
326
327 /*
328 * Mount a file system.
329 */
330 /* ARGSUSED */
331 int
332 mount(proc_t p, struct mount_args *uap, __unused int32_t *retval)
333 {
334 struct __mac_mount_args muap;
335
336 muap.type = uap->type;
337 muap.path = uap->path;
338 muap.flags = uap->flags;
339 muap.data = uap->data;
340 muap.mac_p = USER_ADDR_NULL;
341 return __mac_mount(p, &muap, retval);
342 }
343
344 int
345 fmount(__unused proc_t p, struct fmount_args *uap, __unused int32_t *retval)
346 {
347 struct componentname cn;
348 vfs_context_t ctx = vfs_context_current();
349 size_t dummy = 0;
350 int error;
351 int flags = uap->flags;
352 char fstypename[MFSNAMELEN];
353 char *labelstr = NULL; /* regular mount call always sets it to NULL for __mac_mount() */
354 vnode_t pvp;
355 vnode_t vp;
356
357 AUDIT_ARG(fd, uap->fd);
358 AUDIT_ARG(fflags, flags);
359 /* fstypename will get audited by mount_common */
360
361 /* Sanity check the flags */
362 if (flags & (MNT_IMGSRC_BY_INDEX | MNT_ROOTFS)) {
363 return ENOTSUP;
364 }
365
366 if (flags & MNT_UNION) {
367 return EPERM;
368 }
369
370 error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy);
371 if (error) {
372 return error;
373 }
374
375 if ((error = file_vnode(uap->fd, &vp)) != 0) {
376 return error;
377 }
378
379 if ((error = vnode_getwithref(vp)) != 0) {
380 file_drop(uap->fd);
381 return error;
382 }
383
384 pvp = vnode_getparent(vp);
385 if (pvp == NULL) {
386 vnode_put(vp);
387 file_drop(uap->fd);
388 return EINVAL;
389 }
390
391 memset(&cn, 0, sizeof(struct componentname));
392 MALLOC(cn.cn_pnbuf, char *, MAXPATHLEN, M_TEMP, M_WAITOK);
393 cn.cn_pnlen = MAXPATHLEN;
394
395 if ((error = vn_getpath(vp, cn.cn_pnbuf, &cn.cn_pnlen)) != 0) {
396 FREE(cn.cn_pnbuf, M_TEMP);
397 vnode_put(pvp);
398 vnode_put(vp);
399 file_drop(uap->fd);
400 return error;
401 }
402
403 error = mount_common(fstypename, pvp, vp, &cn, uap->data, flags, 0, labelstr, FALSE, ctx);
404
405 FREE(cn.cn_pnbuf, M_TEMP);
406 vnode_put(pvp);
407 vnode_put(vp);
408 file_drop(uap->fd);
409
410 return error;
411 }
412
413 void
414 vfs_notify_mount(vnode_t pdvp)
415 {
416 vfs_event_signal(NULL, VQ_MOUNT, (intptr_t)NULL);
417 lock_vnode_and_post(pdvp, NOTE_WRITE);
418 }
419
420 /*
421 * __mac_mount:
422 * Mount a file system taking into account MAC label behavior.
423 * See mount(2) man page for more information
424 *
425 * Parameters: p Process requesting the mount
426 * uap User argument descriptor (see below)
427 * retval (ignored)
428 *
429 * Indirect: uap->type Filesystem type
430 * uap->path Path to mount
431 * uap->data Mount arguments
432 * uap->mac_p MAC info
433 * uap->flags Mount flags
434 *
435 *
436 * Returns: 0 Success
437 * !0 Not success
438 */
439 boolean_t root_fs_upgrade_try = FALSE;
440
441 int
442 __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int32_t *retval)
443 {
444 vnode_t pvp = NULL;
445 vnode_t vp = NULL;
446 int need_nameidone = 0;
447 vfs_context_t ctx = vfs_context_current();
448 char fstypename[MFSNAMELEN];
449 struct nameidata nd;
450 size_t dummy = 0;
451 char *labelstr = NULL;
452 int flags = uap->flags;
453 int error;
454 #if CONFIG_IMGSRC_ACCESS || CONFIG_MACF
455 boolean_t is_64bit = IS_64BIT_PROCESS(p);
456 #else
457 #pragma unused(p)
458 #endif
459 /*
460 * Get the fs type name from user space
461 */
462 error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy);
463 if (error) {
464 return error;
465 }
466
467 /*
468 * Get the vnode to be covered
469 */
470 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
471 UIO_USERSPACE, uap->path, ctx);
472 error = namei(&nd);
473 if (error) {
474 goto out;
475 }
476 need_nameidone = 1;
477 vp = nd.ni_vp;
478 pvp = nd.ni_dvp;
479
480 #ifdef CONFIG_IMGSRC_ACCESS
481 /* Mounting image source cannot be batched with other operations */
482 if (flags == MNT_IMGSRC_BY_INDEX) {
483 error = relocate_imageboot_source(pvp, vp, &nd.ni_cnd, fstypename,
484 ctx, is_64bit, uap->data, (flags == MNT_IMGSRC_BY_INDEX));
485 goto out;
486 }
487 #endif /* CONFIG_IMGSRC_ACCESS */
488
489 #if CONFIG_MACF
490 /*
491 * Get the label string (if any) from user space
492 */
493 if (uap->mac_p != USER_ADDR_NULL) {
494 struct user_mac mac;
495 size_t ulen = 0;
496
497 if (is_64bit) {
498 struct user64_mac mac64;
499 error = copyin(uap->mac_p, &mac64, sizeof(mac64));
500 mac.m_buflen = mac64.m_buflen;
501 mac.m_string = mac64.m_string;
502 } else {
503 struct user32_mac mac32;
504 error = copyin(uap->mac_p, &mac32, sizeof(mac32));
505 mac.m_buflen = mac32.m_buflen;
506 mac.m_string = mac32.m_string;
507 }
508 if (error) {
509 goto out;
510 }
511 if ((mac.m_buflen > MAC_MAX_LABEL_BUF_LEN) ||
512 (mac.m_buflen < 2)) {
513 error = EINVAL;
514 goto out;
515 }
516 MALLOC(labelstr, char *, mac.m_buflen, M_MACTEMP, M_WAITOK);
517 error = copyinstr(mac.m_string, labelstr, mac.m_buflen, &ulen);
518 if (error) {
519 goto out;
520 }
521 AUDIT_ARG(mac_string, labelstr);
522 }
523 #endif /* CONFIG_MACF */
524
525 AUDIT_ARG(fflags, flags);
526
527 #if SECURE_KERNEL
528 if (flags & MNT_UNION) {
529 /* No union mounts on release kernels */
530 error = EPERM;
531 goto out;
532 }
533 #endif
534
535 if ((vp->v_flag & VROOT) &&
536 (vp->v_mount->mnt_flag & MNT_ROOTFS)) {
537 if (!(flags & MNT_UNION)) {
538 flags |= MNT_UPDATE;
539 } else {
540 /*
541 * For a union mount on '/', treat it as fresh
542 * mount instead of update.
543 * Otherwise, union mouting on '/' used to panic the
544 * system before, since mnt_vnodecovered was found to
545 * be NULL for '/' which is required for unionlookup
546 * after it gets ENOENT on union mount.
547 */
548 flags = (flags & ~(MNT_UPDATE));
549 }
550
551 #if SECURE_KERNEL
552 if ((flags & MNT_RDONLY) == 0) {
553 /* Release kernels are not allowed to mount "/" as rw */
554 error = EPERM;
555 goto out;
556 }
557 #endif
558 /*
559 * See 7392553 for more details on why this check exists.
560 * Suffice to say: If this check is ON and something tries
561 * to mount the rootFS RW, we'll turn off the codesign
562 * bitmap optimization.
563 */
564 #if CHECK_CS_VALIDATION_BITMAP
565 if ((flags & MNT_RDONLY) == 0) {
566 root_fs_upgrade_try = TRUE;
567 }
568 #endif
569 }
570
571 error = mount_common(fstypename, pvp, vp, &nd.ni_cnd, uap->data, flags, 0,
572 labelstr, FALSE, ctx);
573
574 out:
575
576 #if CONFIG_MACF
577 if (labelstr) {
578 FREE(labelstr, M_MACTEMP);
579 }
580 #endif /* CONFIG_MACF */
581
582 if (vp) {
583 vnode_put(vp);
584 }
585 if (pvp) {
586 vnode_put(pvp);
587 }
588 if (need_nameidone) {
589 nameidone(&nd);
590 }
591
592 return error;
593 }
594
595 /*
596 * common mount implementation (final stage of mounting)
597 *
598 * Arguments:
599 * fstypename file system type (ie it's vfs name)
600 * pvp parent of covered vnode
601 * vp covered vnode
602 * cnp component name (ie path) of covered vnode
603 * flags generic mount flags
604 * fsmountargs file system specific data
605 * labelstr optional MAC label
606 * kernelmount TRUE for mounts initiated from inside the kernel
607 * ctx caller's context
608 */
609 static int
610 mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
611 struct componentname *cnp, user_addr_t fsmountargs, int flags, uint32_t internal_flags,
612 char *labelstr, boolean_t kernelmount, vfs_context_t ctx)
613 {
614 #if !CONFIG_MACF
615 #pragma unused(labelstr)
616 #endif
617 struct vnode *devvp = NULLVP;
618 struct vnode *device_vnode = NULLVP;
619 #if CONFIG_MACF
620 struct vnode *rvp;
621 #endif
622 struct mount *mp;
623 struct vfstable *vfsp = (struct vfstable *)0;
624 struct proc *p = vfs_context_proc(ctx);
625 int error, flag = 0;
626 user_addr_t devpath = USER_ADDR_NULL;
627 int ronly = 0;
628 int mntalloc = 0;
629 boolean_t vfsp_ref = FALSE;
630 boolean_t is_rwlock_locked = FALSE;
631 boolean_t did_rele = FALSE;
632 boolean_t have_usecount = FALSE;
633
634 #if CONFIG_ROSV_STARTUP || CONFIG_MOUNT_VM
635 /* Check for mutually-exclusive flag bits */
636 uint32_t checkflags = (internal_flags & (KERNEL_MOUNT_DATAVOL | KERNEL_MOUNT_VMVOL));
637 int bitcount = 0;
638 while (checkflags != 0) {
639 checkflags &= (checkflags - 1);
640 bitcount++;
641 }
642
643 if (bitcount > 1) {
644 //not allowed to request multiple mount-by-role flags
645 error = EINVAL;
646 goto out1;
647 }
648 #endif
649
650 /*
651 * Process an update for an existing mount
652 */
653 if (flags & MNT_UPDATE) {
654 if ((vp->v_flag & VROOT) == 0) {
655 error = EINVAL;
656 goto out1;
657 }
658 mp = vp->v_mount;
659
660 /* unmount in progress return error */
661 mount_lock_spin(mp);
662 if (mp->mnt_lflag & MNT_LUNMOUNT) {
663 mount_unlock(mp);
664 error = EBUSY;
665 goto out1;
666 }
667 mount_unlock(mp);
668 lck_rw_lock_exclusive(&mp->mnt_rwlock);
669 is_rwlock_locked = TRUE;
670 /*
671 * We only allow the filesystem to be reloaded if it
672 * is currently mounted read-only.
673 */
674 if ((flags & MNT_RELOAD) &&
675 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
676 error = ENOTSUP;
677 goto out1;
678 }
679
680 /*
681 * If content protection is enabled, update mounts are not
682 * allowed to turn it off.
683 */
684 if ((mp->mnt_flag & MNT_CPROTECT) &&
685 ((flags & MNT_CPROTECT) == 0)) {
686 error = EINVAL;
687 goto out1;
688 }
689
690 /*
691 * can't turn off MNT_REMOVABLE either but it may be an unexpected
692 * failure to return an error for this so we'll just silently
693 * add it if it is not passed in.
694 */
695 if ((mp->mnt_flag & MNT_REMOVABLE) &&
696 ((flags & MNT_REMOVABLE) == 0)) {
697 flags |= MNT_REMOVABLE;
698 }
699
700 #ifdef CONFIG_IMGSRC_ACCESS
701 /* Can't downgrade the backer of the root FS */
702 if ((mp->mnt_kern_flag & MNTK_BACKS_ROOT) &&
703 (!vfs_isrdonly(mp)) && (flags & MNT_RDONLY)) {
704 error = ENOTSUP;
705 goto out1;
706 }
707 #endif /* CONFIG_IMGSRC_ACCESS */
708
709 /*
710 * Only root, or the user that did the original mount is
711 * permitted to update it.
712 */
713 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
714 (error = suser(vfs_context_ucred(ctx), &p->p_acflag))) {
715 goto out1;
716 }
717 #if CONFIG_MACF
718 error = mac_mount_check_remount(ctx, mp);
719 if (error != 0) {
720 goto out1;
721 }
722 #endif
723 /*
724 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
725 * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
726 */
727 if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
728 flags |= MNT_NOSUID | MNT_NODEV;
729 if (mp->mnt_flag & MNT_NOEXEC) {
730 flags |= MNT_NOEXEC;
731 }
732 }
733 flag = mp->mnt_flag;
734
735
736
737 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
738
739 vfsp = mp->mnt_vtable;
740 goto update;
741 } // MNT_UPDATE
742
743 /*
744 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
745 * MNT_NOEXEC if mount point is already MNT_NOEXEC.
746 */
747 if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
748 flags |= MNT_NOSUID | MNT_NODEV;
749 if (vp->v_mount->mnt_flag & MNT_NOEXEC) {
750 flags |= MNT_NOEXEC;
751 }
752 }
753
754 /* XXXAUDIT: Should we capture the type on the error path as well? */
755 AUDIT_ARG(text, fstypename);
756 mount_list_lock();
757 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
758 if (!strncmp(vfsp->vfc_name, fstypename, MFSNAMELEN)) {
759 vfsp->vfc_refcount++;
760 vfsp_ref = TRUE;
761 break;
762 }
763 }
764 mount_list_unlock();
765 if (vfsp == NULL) {
766 error = ENODEV;
767 goto out1;
768 }
769
770 /*
771 * VFC_VFSLOCALARGS is not currently supported for kernel mounts,
772 * except in ROSV configs.
773 */
774 if (kernelmount && (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) &&
775 ((internal_flags & (KERNEL_MOUNT_DATAVOL | KERNEL_MOUNT_VMVOL)) == 0)) {
776 error = EINVAL; /* unsupported request */
777 goto out1;
778 }
779
780 error = prepare_coveredvp(vp, ctx, cnp, fstypename, ((internal_flags & KERNEL_MOUNT_NOAUTH) != 0));
781 if (error != 0) {
782 goto out1;
783 }
784
785 /*
786 * Allocate and initialize the filesystem (mount_t)
787 */
788 MALLOC_ZONE(mp, struct mount *, (u_int32_t)sizeof(struct mount),
789 M_MOUNT, M_WAITOK);
790 bzero((char *)mp, (u_int32_t)sizeof(struct mount));
791 mntalloc = 1;
792
793 /* Initialize the default IO constraints */
794 mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
795 mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
796 mp->mnt_maxsegreadsize = mp->mnt_maxreadcnt;
797 mp->mnt_maxsegwritesize = mp->mnt_maxwritecnt;
798 mp->mnt_devblocksize = DEV_BSIZE;
799 mp->mnt_alignmentmask = PAGE_MASK;
800 mp->mnt_ioqueue_depth = MNT_DEFAULT_IOQUEUE_DEPTH;
801 mp->mnt_ioscale = 1;
802 mp->mnt_ioflags = 0;
803 mp->mnt_realrootvp = NULLVP;
804 mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL;
805
806 TAILQ_INIT(&mp->mnt_vnodelist);
807 TAILQ_INIT(&mp->mnt_workerqueue);
808 TAILQ_INIT(&mp->mnt_newvnodes);
809 mount_lock_init(mp);
810 lck_rw_lock_exclusive(&mp->mnt_rwlock);
811 is_rwlock_locked = TRUE;
812 mp->mnt_op = vfsp->vfc_vfsops;
813 mp->mnt_vtable = vfsp;
814 //mp->mnt_stat.f_type = vfsp->vfc_typenum;
815 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
816 strlcpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN);
817 do {
818 int pathlen = MAXPATHLEN;
819
820 if (vn_getpath_ext(vp, pvp, mp->mnt_vfsstat.f_mntonname, &pathlen, VN_GETPATH_FSENTER)) {
821 strlcpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
822 }
823 } while (0);
824 mp->mnt_vnodecovered = vp;
825 mp->mnt_vfsstat.f_owner = kauth_cred_getuid(vfs_context_ucred(ctx));
826 mp->mnt_throttle_mask = LOWPRI_MAX_NUM_DEV - 1;
827 mp->mnt_devbsdunit = 0;
828
829 /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
830 vfs_setowner(mp, KAUTH_UID_NONE, KAUTH_GID_NONE);
831
832 #if NFSCLIENT || DEVFS || ROUTEFS
833 if (kernelmount) {
834 mp->mnt_kern_flag |= MNTK_KERNEL_MOUNT;
835 }
836 if ((internal_flags & KERNEL_MOUNT_PERMIT_UNMOUNT) != 0) {
837 mp->mnt_kern_flag |= MNTK_PERMIT_UNMOUNT;
838 }
839 #endif /* NFSCLIENT || DEVFS */
840
841 update:
842
843 /*
844 * Set the mount level flags.
845 */
846 if (flags & MNT_RDONLY) {
847 mp->mnt_flag |= MNT_RDONLY;
848 } else if (mp->mnt_flag & MNT_RDONLY) {
849 // disallow read/write upgrades of file systems that
850 // had the TYPENAME_OVERRIDE feature set.
851 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
852 error = EPERM;
853 goto out1;
854 }
855 mp->mnt_kern_flag |= MNTK_WANTRDWR;
856 }
857 mp->mnt_flag &= ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
858 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
859 MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
860 MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME | MNT_STRICTATIME |
861 MNT_QUARANTINE | MNT_CPROTECT);
862
863 #if SECURE_KERNEL
864 #if !CONFIG_MNT_SUID
865 /*
866 * On release builds of iOS based platforms, always enforce NOSUID on
867 * all mounts. We do this here because we can catch update mounts as well as
868 * non-update mounts in this case.
869 */
870 mp->mnt_flag |= (MNT_NOSUID);
871 #endif
872 #endif
873
874 mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
875 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
876 MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
877 MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME | MNT_STRICTATIME |
878 MNT_QUARANTINE | MNT_CPROTECT);
879
880 #if CONFIG_MACF
881 if (flags & MNT_MULTILABEL) {
882 if (vfsp->vfc_vfsflags & VFC_VFSNOMACLABEL) {
883 error = EINVAL;
884 goto out1;
885 }
886 mp->mnt_flag |= MNT_MULTILABEL;
887 }
888 #endif
889 /*
890 * Process device path for local file systems if requested
891 */
892 if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS &&
893 !(internal_flags & (KERNEL_MOUNT_SNAPSHOT | KERNEL_MOUNT_DATAVOL | KERNEL_MOUNT_VMVOL))) {
894 //snapshot, vm, datavolume mounts are special
895 if (vfs_context_is64bit(ctx)) {
896 if ((error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath)))) {
897 goto out1;
898 }
899 fsmountargs += sizeof(devpath);
900 } else {
901 user32_addr_t tmp;
902 if ((error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp)))) {
903 goto out1;
904 }
905 /* munge into LP64 addr */
906 devpath = CAST_USER_ADDR_T(tmp);
907 fsmountargs += sizeof(tmp);
908 }
909
910 /* Lookup device and authorize access to it */
911 if ((devpath)) {
912 struct nameidata nd;
913
914 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW, UIO_USERSPACE, devpath, ctx);
915 if ((error = namei(&nd))) {
916 goto out1;
917 }
918
919 strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
920 devvp = nd.ni_vp;
921
922 nameidone(&nd);
923
924 if (devvp->v_type != VBLK) {
925 error = ENOTBLK;
926 goto out2;
927 }
928 if (major(devvp->v_rdev) >= nblkdev) {
929 error = ENXIO;
930 goto out2;
931 }
932 /*
933 * If mount by non-root, then verify that user has necessary
934 * permissions on the device.
935 */
936 if (suser(vfs_context_ucred(ctx), NULL) != 0) {
937 mode_t accessmode = KAUTH_VNODE_READ_DATA;
938
939 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
940 accessmode |= KAUTH_VNODE_WRITE_DATA;
941 }
942 if ((error = vnode_authorize(devvp, NULL, accessmode, ctx)) != 0) {
943 goto out2;
944 }
945 }
946 }
947 /* On first mount, preflight and open device */
948 if (devpath && ((flags & MNT_UPDATE) == 0)) {
949 if ((error = vnode_ref(devvp))) {
950 goto out2;
951 }
952 /*
953 * Disallow multiple mounts of the same device.
954 * Disallow mounting of a device that is currently in use
955 * (except for root, which might share swap device for miniroot).
956 * Flush out any old buffers remaining from a previous use.
957 */
958 if ((error = vfs_mountedon(devvp))) {
959 goto out3;
960 }
961
962 if (vcount(devvp) > 1 && !(vfs_flags(mp) & MNT_ROOTFS)) {
963 error = EBUSY;
964 goto out3;
965 }
966 if ((error = VNOP_FSYNC(devvp, MNT_WAIT, ctx))) {
967 error = ENOTBLK;
968 goto out3;
969 }
970 if ((error = buf_invalidateblks(devvp, BUF_WRITE_DATA, 0, 0))) {
971 goto out3;
972 }
973
974 ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
975 #if CONFIG_MACF
976 error = mac_vnode_check_open(ctx,
977 devvp,
978 ronly ? FREAD : FREAD | FWRITE);
979 if (error) {
980 goto out3;
981 }
982 #endif /* MAC */
983 if ((error = VNOP_OPEN(devvp, ronly ? FREAD : FREAD | FWRITE, ctx))) {
984 goto out3;
985 }
986
987 mp->mnt_devvp = devvp;
988 device_vnode = devvp;
989 } else if ((mp->mnt_flag & MNT_RDONLY) &&
990 (mp->mnt_kern_flag & MNTK_WANTRDWR) &&
991 (device_vnode = mp->mnt_devvp)) {
992 dev_t dev;
993 int maj;
994 /*
995 * If upgrade to read-write by non-root, then verify
996 * that user has necessary permissions on the device.
997 */
998 vnode_getalways(device_vnode);
999
1000 if (suser(vfs_context_ucred(ctx), NULL) &&
1001 (error = vnode_authorize(device_vnode, NULL,
1002 KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA,
1003 ctx)) != 0) {
1004 vnode_put(device_vnode);
1005 goto out2;
1006 }
1007
1008 /* Tell the device that we're upgrading */
1009 dev = (dev_t)device_vnode->v_rdev;
1010 maj = major(dev);
1011
1012 if ((u_int)maj >= (u_int)nblkdev) {
1013 panic("Volume mounted on a device with invalid major number.");
1014 }
1015
1016 error = bdevsw[maj].d_open(dev, FREAD | FWRITE, S_IFBLK, p);
1017 vnode_put(device_vnode);
1018 device_vnode = NULLVP;
1019 if (error != 0) {
1020 goto out2;
1021 }
1022 }
1023 } // localargs && !(snapshot | data | vm)
1024
1025 #if CONFIG_MACF
1026 if ((flags & MNT_UPDATE) == 0) {
1027 mac_mount_label_init(mp);
1028 mac_mount_label_associate(ctx, mp);
1029 }
1030 if (labelstr) {
1031 if ((flags & MNT_UPDATE) != 0) {
1032 error = mac_mount_check_label_update(ctx, mp);
1033 if (error != 0) {
1034 goto out3;
1035 }
1036 }
1037 }
1038 #endif
1039 /*
1040 * Mount the filesystem. We already asserted that internal_flags
1041 * cannot have more than one mount-by-role bit set.
1042 */
1043 if (internal_flags & KERNEL_MOUNT_SNAPSHOT) {
1044 error = VFS_IOCTL(mp, VFSIOC_MOUNT_SNAPSHOT,
1045 (caddr_t)fsmountargs, 0, ctx);
1046 } else if (internal_flags & KERNEL_MOUNT_DATAVOL) {
1047 #if CONFIG_ROSV_STARTUP
1048 struct mount *origin_mp = (struct mount*)fsmountargs;
1049 fs_role_mount_args_t frma = {origin_mp, VFS_DATA_ROLE};
1050 error = VFS_IOCTL(mp, VFSIOC_MOUNT_BYROLE, (caddr_t)&frma, 0, ctx);
1051 if (error) {
1052 printf("MOUNT-BY-ROLE (%d) failed! (%d)", VFS_DATA_ROLE, error);
1053 } else {
1054 /* Mark volume associated with system volume */
1055 mp->mnt_kern_flag |= MNTK_SYSTEM;
1056
1057 /* Attempt to acquire the mnt_devvp and set it up */
1058 struct vnode *mp_devvp = NULL;
1059 if (mp->mnt_vfsstat.f_mntfromname[0] != 0) {
1060 errno_t lerr = vnode_lookup(mp->mnt_vfsstat.f_mntfromname,
1061 0, &mp_devvp, vfs_context_kernel());
1062 if (!lerr) {
1063 mp->mnt_devvp = mp_devvp;
1064 //vnode_lookup took an iocount, need to drop it.
1065 vnode_put(mp_devvp);
1066 // now set `device_vnode` to the devvp that was acquired.
1067 // this is needed in order to ensure vfs_init_io_attributes is invoked.
1068 // note that though the iocount above was dropped, the mount acquires
1069 // an implicit reference against the device.
1070 device_vnode = mp_devvp;
1071 }
1072 }
1073 }
1074 #else
1075 error = EINVAL;
1076 #endif
1077 } else if (internal_flags & KERNEL_MOUNT_VMVOL) {
1078 #if CONFIG_MOUNT_VM
1079 struct mount *origin_mp = (struct mount*)fsmountargs;
1080 fs_role_mount_args_t frma = {origin_mp, VFS_VM_ROLE};
1081 error = VFS_IOCTL(mp, VFSIOC_MOUNT_BYROLE, (caddr_t)&frma, 0, ctx);
1082 if (error) {
1083 printf("MOUNT-BY-ROLE (%d) failed! (%d)", VFS_VM_ROLE, error);
1084 } else {
1085 /* Mark volume associated with system volume and a swap mount */
1086 mp->mnt_kern_flag |= (MNTK_SYSTEM | MNTK_SWAP_MOUNT);
1087 /* Attempt to acquire the mnt_devvp and set it up */
1088 struct vnode *mp_devvp = NULL;
1089 if (mp->mnt_vfsstat.f_mntfromname[0] != 0) {
1090 errno_t lerr = vnode_lookup(mp->mnt_vfsstat.f_mntfromname,
1091 0, &mp_devvp, vfs_context_kernel());
1092 if (!lerr) {
1093 mp->mnt_devvp = mp_devvp;
1094 //vnode_lookup took an iocount, need to drop it.
1095 vnode_put(mp_devvp);
1096
1097 // now set `device_vnode` to the devvp that was acquired.
1098 // note that though the iocount above was dropped, the mount acquires
1099 // an implicit reference against the device.
1100 device_vnode = mp_devvp;
1101 }
1102 }
1103 }
1104 #else
1105 error = EINVAL;
1106 #endif
1107 } else {
1108 error = VFS_MOUNT(mp, device_vnode, fsmountargs, ctx);
1109 }
1110
1111 if (flags & MNT_UPDATE) {
1112 if (mp->mnt_kern_flag & MNTK_WANTRDWR) {
1113 mp->mnt_flag &= ~MNT_RDONLY;
1114 }
1115 mp->mnt_flag &= ~
1116 (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
1117 mp->mnt_kern_flag &= ~MNTK_WANTRDWR;
1118 if (error) {
1119 mp->mnt_flag = flag; /* restore flag value */
1120 }
1121 vfs_event_signal(NULL, VQ_UPDATE, (intptr_t)NULL);
1122 lck_rw_done(&mp->mnt_rwlock);
1123 is_rwlock_locked = FALSE;
1124 if (!error) {
1125 enablequotas(mp, ctx);
1126 }
1127 goto exit;
1128 }
1129
1130 /*
1131 * Put the new filesystem on the mount list after root.
1132 */
1133 if (error == 0) {
1134 struct vfs_attr vfsattr;
1135 #if CONFIG_MACF
1136 error = mac_mount_check_mount_late(ctx, mp);
1137 if (error != 0) {
1138 goto out3;
1139 }
1140
1141 if (vfs_flags(mp) & MNT_MULTILABEL) {
1142 error = VFS_ROOT(mp, &rvp, ctx);
1143 if (error) {
1144 printf("%s() VFS_ROOT returned %d\n", __func__, error);
1145 goto out3;
1146 }
1147 error = vnode_label(mp, NULL, rvp, NULL, 0, ctx);
1148 /*
1149 * drop reference provided by VFS_ROOT
1150 */
1151 vnode_put(rvp);
1152
1153 if (error) {
1154 goto out3;
1155 }
1156 }
1157 #endif /* MAC */
1158
1159 vnode_lock_spin(vp);
1160 CLR(vp->v_flag, VMOUNT);
1161 vp->v_mountedhere = mp;
1162 vnode_unlock(vp);
1163
1164 /*
1165 * taking the name_cache_lock exclusively will
1166 * insure that everyone is out of the fast path who
1167 * might be trying to use a now stale copy of
1168 * vp->v_mountedhere->mnt_realrootvp
1169 * bumping mount_generation causes the cached values
1170 * to be invalidated
1171 */
1172 name_cache_lock();
1173 mount_generation++;
1174 name_cache_unlock();
1175
1176 error = vnode_ref(vp);
1177 if (error != 0) {
1178 goto out4;
1179 }
1180
1181 have_usecount = TRUE;
1182
1183 error = checkdirs(vp, ctx);
1184 if (error != 0) {
1185 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1186 goto out4;
1187 }
1188 /*
1189 * there is no cleanup code here so I have made it void
1190 * we need to revisit this
1191 */
1192 (void)VFS_START(mp, 0, ctx);
1193
1194 if (mount_list_add(mp) != 0) {
1195 /*
1196 * The system is shutting down trying to umount
1197 * everything, so fail with a plausible errno.
1198 */
1199 error = EBUSY;
1200 goto out4;
1201 }
1202 lck_rw_done(&mp->mnt_rwlock);
1203 is_rwlock_locked = FALSE;
1204
1205 /* Check if this mounted file system supports EAs or named streams. */
1206 /* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */
1207 VFSATTR_INIT(&vfsattr);
1208 VFSATTR_WANTED(&vfsattr, f_capabilities);
1209 if (strncmp(mp->mnt_vfsstat.f_fstypename, "webdav", sizeof("webdav")) != 0 &&
1210 vfs_getattr(mp, &vfsattr, ctx) == 0 &&
1211 VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) {
1212 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR) &&
1213 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR)) {
1214 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
1215 }
1216 #if NAMEDSTREAMS
1217 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS) &&
1218 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS)) {
1219 mp->mnt_kern_flag |= MNTK_NAMED_STREAMS;
1220 }
1221 #endif
1222 /* Check if this file system supports path from id lookups. */
1223 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID) &&
1224 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID)) {
1225 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
1226 } else if (mp->mnt_flag & MNT_DOVOLFS) {
1227 /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */
1228 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
1229 }
1230
1231 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_DIR_HARDLINKS) &&
1232 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_DIR_HARDLINKS)) {
1233 mp->mnt_kern_flag |= MNTK_DIR_HARDLINKS;
1234 }
1235 }
1236 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSNATIVEXATTR) {
1237 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
1238 }
1239 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSPREFLIGHT) {
1240 mp->mnt_kern_flag |= MNTK_UNMOUNT_PREFLIGHT;
1241 }
1242 /* increment the operations count */
1243 OSAddAtomic(1, &vfs_nummntops);
1244 enablequotas(mp, ctx);
1245
1246 if (device_vnode) {
1247 device_vnode->v_specflags |= SI_MOUNTEDON;
1248
1249 /*
1250 * cache the IO attributes for the underlying physical media...
1251 * an error return indicates the underlying driver doesn't
1252 * support all the queries necessary... however, reasonable
1253 * defaults will have been set, so no reason to bail or care
1254 */
1255 vfs_init_io_attributes(device_vnode, mp);
1256 }
1257
1258 /* Now that mount is setup, notify the listeners */
1259 vfs_notify_mount(pvp);
1260 IOBSDMountChange(mp, kIOMountChangeMount);
1261 } else {
1262 /* If we fail a fresh mount, there should be no vnodes left hooked into the mountpoint. */
1263 if (mp->mnt_vnodelist.tqh_first != NULL) {
1264 panic("mount_common(): mount of %s filesystem failed with %d, but vnode list is not empty.",
1265 mp->mnt_vtable->vfc_name, error);
1266 }
1267
1268 vnode_lock_spin(vp);
1269 CLR(vp->v_flag, VMOUNT);
1270 vnode_unlock(vp);
1271 mount_list_lock();
1272 mp->mnt_vtable->vfc_refcount--;
1273 mount_list_unlock();
1274
1275 if (device_vnode) {
1276 vnode_rele(device_vnode);
1277 VNOP_CLOSE(device_vnode, ronly ? FREAD : FREAD | FWRITE, ctx);
1278 }
1279 lck_rw_done(&mp->mnt_rwlock);
1280 is_rwlock_locked = FALSE;
1281
1282 /*
1283 * if we get here, we have a mount structure that needs to be freed,
1284 * but since the coveredvp hasn't yet been updated to point at it,
1285 * no need to worry about other threads holding a crossref on this mp
1286 * so it's ok to just free it
1287 */
1288 mount_lock_destroy(mp);
1289 #if CONFIG_MACF
1290 mac_mount_label_destroy(mp);
1291 #endif
1292 FREE_ZONE(mp, sizeof(struct mount), M_MOUNT);
1293 }
1294 exit:
1295 /*
1296 * drop I/O count on the device vp if there was one
1297 */
1298 if (devpath && devvp) {
1299 vnode_put(devvp);
1300 }
1301
1302 return error;
1303
1304 /* Error condition exits */
1305 out4:
1306 (void)VFS_UNMOUNT(mp, MNT_FORCE, ctx);
1307
1308 /*
1309 * If the mount has been placed on the covered vp,
1310 * it may have been discovered by now, so we have
1311 * to treat this just like an unmount
1312 */
1313 mount_lock_spin(mp);
1314 mp->mnt_lflag |= MNT_LDEAD;
1315 mount_unlock(mp);
1316
1317 if (device_vnode != NULLVP) {
1318 vnode_rele(device_vnode);
1319 VNOP_CLOSE(device_vnode, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD | FWRITE,
1320 ctx);
1321 did_rele = TRUE;
1322 }
1323
1324 vnode_lock_spin(vp);
1325
1326 mp->mnt_crossref++;
1327 vp->v_mountedhere = (mount_t) 0;
1328
1329 vnode_unlock(vp);
1330
1331 if (have_usecount) {
1332 vnode_rele(vp);
1333 }
1334 out3:
1335 if (devpath && ((flags & MNT_UPDATE) == 0) && (!did_rele)) {
1336 vnode_rele(devvp);
1337 }
1338 out2:
1339 if (devpath && devvp) {
1340 vnode_put(devvp);
1341 }
1342 out1:
1343 /* Release mnt_rwlock only when it was taken */
1344 if (is_rwlock_locked == TRUE) {
1345 lck_rw_done(&mp->mnt_rwlock);
1346 }
1347
1348 if (mntalloc) {
1349 if (mp->mnt_crossref) {
1350 mount_dropcrossref(mp, vp, 0);
1351 } else {
1352 mount_lock_destroy(mp);
1353 #if CONFIG_MACF
1354 mac_mount_label_destroy(mp);
1355 #endif
1356 FREE_ZONE(mp, sizeof(struct mount), M_MOUNT);
1357 }
1358 }
1359 if (vfsp_ref) {
1360 mount_list_lock();
1361 vfsp->vfc_refcount--;
1362 mount_list_unlock();
1363 }
1364
1365 return error;
1366 }
1367
1368 /*
1369 * Flush in-core data, check for competing mount attempts,
1370 * and set VMOUNT
1371 */
1372 int
1373 prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth)
1374 {
1375 #if !CONFIG_MACF
1376 #pragma unused(cnp,fsname)
1377 #endif
1378 struct vnode_attr va;
1379 int error;
1380
1381 if (!skip_auth) {
1382 /*
1383 * If the user is not root, ensure that they own the directory
1384 * onto which we are attempting to mount.
1385 */
1386 VATTR_INIT(&va);
1387 VATTR_WANTED(&va, va_uid);
1388 if ((error = vnode_getattr(vp, &va, ctx)) ||
1389 (va.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
1390 (!vfs_context_issuser(ctx)))) {
1391 error = EPERM;
1392 goto out;
1393 }
1394 }
1395
1396 if ((error = VNOP_FSYNC(vp, MNT_WAIT, ctx))) {
1397 goto out;
1398 }
1399
1400 if ((error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0))) {
1401 goto out;
1402 }
1403
1404 if (vp->v_type != VDIR) {
1405 error = ENOTDIR;
1406 goto out;
1407 }
1408
1409 if (ISSET(vp->v_flag, VMOUNT) && (vp->v_mountedhere != NULL)) {
1410 error = EBUSY;
1411 goto out;
1412 }
1413
1414 #if CONFIG_MACF
1415 error = mac_mount_check_mount(ctx, vp,
1416 cnp, fsname);
1417 if (error != 0) {
1418 goto out;
1419 }
1420 #endif
1421
1422 vnode_lock_spin(vp);
1423 SET(vp->v_flag, VMOUNT);
1424 vnode_unlock(vp);
1425
1426 out:
1427 return error;
1428 }
1429
1430 #if CONFIG_IMGSRC_ACCESS
1431
1432 #define DEBUG_IMGSRC 0
1433
1434 #if DEBUG_IMGSRC
1435 #define IMGSRC_DEBUG(args...) printf("imgsrc: " args)
1436 #else
1437 #define IMGSRC_DEBUG(args...) do { } while(0)
1438 #endif
1439
1440 static int
1441 authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx)
1442 {
1443 struct nameidata nd;
1444 vnode_t vp, realdevvp;
1445 mode_t accessmode;
1446 int error;
1447 enum uio_seg uio = UIO_USERSPACE;
1448
1449 if (ctx == vfs_context_kernel()) {
1450 uio = UIO_SYSSPACE;
1451 }
1452
1453 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW, uio, devpath, ctx);
1454 if ((error = namei(&nd))) {
1455 IMGSRC_DEBUG("namei() failed with %d\n", error);
1456 return error;
1457 }
1458
1459 vp = nd.ni_vp;
1460
1461 if (!vnode_isblk(vp)) {
1462 IMGSRC_DEBUG("Not block device.\n");
1463 error = ENOTBLK;
1464 goto out;
1465 }
1466
1467 realdevvp = mp->mnt_devvp;
1468 if (realdevvp == NULLVP) {
1469 IMGSRC_DEBUG("No device backs the mount.\n");
1470 error = ENXIO;
1471 goto out;
1472 }
1473
1474 error = vnode_getwithref(realdevvp);
1475 if (error != 0) {
1476 IMGSRC_DEBUG("Coudn't get iocount on device.\n");
1477 goto out;
1478 }
1479
1480 if (vnode_specrdev(vp) != vnode_specrdev(realdevvp)) {
1481 IMGSRC_DEBUG("Wrong dev_t.\n");
1482 error = ENXIO;
1483 goto out1;
1484 }
1485
1486 strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
1487
1488 /*
1489 * If mount by non-root, then verify that user has necessary
1490 * permissions on the device.
1491 */
1492 if (!vfs_context_issuser(ctx)) {
1493 accessmode = KAUTH_VNODE_READ_DATA;
1494 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
1495 accessmode |= KAUTH_VNODE_WRITE_DATA;
1496 }
1497 if ((error = vnode_authorize(vp, NULL, accessmode, ctx)) != 0) {
1498 IMGSRC_DEBUG("Access denied.\n");
1499 goto out1;
1500 }
1501 }
1502
1503 *devvpp = vp;
1504
1505 out1:
1506 vnode_put(realdevvp);
1507
1508 out:
1509 nameidone(&nd);
1510
1511 if (error) {
1512 vnode_put(vp);
1513 }
1514
1515 return error;
1516 }
1517
1518 /*
1519 * Clear VMOUNT, set v_mountedhere, and mnt_vnodecovered, ref the vnode,
1520 * and call checkdirs()
1521 */
1522 static int
1523 place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx)
1524 {
1525 int error;
1526
1527 mp->mnt_vnodecovered = vp; /* XXX This is normally only set at init-time ... */
1528
1529 IMGSRC_DEBUG("placing: fsname = %s, vp = %s\n",
1530 mp->mnt_vtable->vfc_name, vnode_getname(vp));
1531
1532 vnode_lock_spin(vp);
1533 CLR(vp->v_flag, VMOUNT);
1534 vp->v_mountedhere = mp;
1535 vnode_unlock(vp);
1536
1537 /*
1538 * taking the name_cache_lock exclusively will
1539 * insure that everyone is out of the fast path who
1540 * might be trying to use a now stale copy of
1541 * vp->v_mountedhere->mnt_realrootvp
1542 * bumping mount_generation causes the cached values
1543 * to be invalidated
1544 */
1545 name_cache_lock();
1546 mount_generation++;
1547 name_cache_unlock();
1548
1549 error = vnode_ref(vp);
1550 if (error != 0) {
1551 goto out;
1552 }
1553
1554 error = checkdirs(vp, ctx);
1555 if (error != 0) {
1556 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1557 vnode_rele(vp);
1558 goto out;
1559 }
1560
1561 out:
1562 if (error != 0) {
1563 mp->mnt_vnodecovered = NULLVP;
1564 }
1565 return error;
1566 }
1567
1568 static void
1569 undo_place_on_covered_vp(mount_t mp, vnode_t vp)
1570 {
1571 vnode_rele(vp);
1572 vnode_lock_spin(vp);
1573 vp->v_mountedhere = (mount_t)NULL;
1574 vnode_unlock(vp);
1575
1576 mp->mnt_vnodecovered = NULLVP;
1577 }
1578
1579 static int
1580 mount_begin_update(mount_t mp, vfs_context_t ctx, int flags)
1581 {
1582 int error;
1583
1584 /* unmount in progress return error */
1585 mount_lock_spin(mp);
1586 if (mp->mnt_lflag & MNT_LUNMOUNT) {
1587 mount_unlock(mp);
1588 return EBUSY;
1589 }
1590 mount_unlock(mp);
1591 lck_rw_lock_exclusive(&mp->mnt_rwlock);
1592
1593 /*
1594 * We only allow the filesystem to be reloaded if it
1595 * is currently mounted read-only.
1596 */
1597 if ((flags & MNT_RELOAD) &&
1598 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
1599 error = ENOTSUP;
1600 goto out;
1601 }
1602
1603 /*
1604 * Only root, or the user that did the original mount is
1605 * permitted to update it.
1606 */
1607 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
1608 (!vfs_context_issuser(ctx))) {
1609 error = EPERM;
1610 goto out;
1611 }
1612 #if CONFIG_MACF
1613 error = mac_mount_check_remount(ctx, mp);
1614 if (error != 0) {
1615 goto out;
1616 }
1617 #endif
1618
1619 out:
1620 if (error) {
1621 lck_rw_done(&mp->mnt_rwlock);
1622 }
1623
1624 return error;
1625 }
1626
1627 static void
1628 mount_end_update(mount_t mp)
1629 {
1630 lck_rw_done(&mp->mnt_rwlock);
1631 }
1632
1633 static int
1634 get_imgsrc_rootvnode(uint32_t height, vnode_t *rvpp)
1635 {
1636 vnode_t vp;
1637
1638 if (height >= MAX_IMAGEBOOT_NESTING) {
1639 return EINVAL;
1640 }
1641
1642 vp = imgsrc_rootvnodes[height];
1643 if ((vp != NULLVP) && (vnode_get(vp) == 0)) {
1644 *rvpp = vp;
1645 return 0;
1646 } else {
1647 return ENOENT;
1648 }
1649 }
1650
1651 static int
1652 relocate_imageboot_source(vnode_t pvp, vnode_t vp,
1653 struct componentname *cnp, const char *fsname, vfs_context_t ctx,
1654 boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index)
1655 {
1656 int error;
1657 mount_t mp;
1658 boolean_t placed = FALSE;
1659 struct vfstable *vfsp;
1660 user_addr_t devpath;
1661 char *old_mntonname;
1662 vnode_t rvp;
1663 vnode_t devvp;
1664 uint32_t height;
1665 uint32_t flags;
1666
1667 /* If we didn't imageboot, nothing to move */
1668 if (imgsrc_rootvnodes[0] == NULLVP) {
1669 return EINVAL;
1670 }
1671
1672 /* Only root can do this */
1673 if (!vfs_context_issuser(ctx)) {
1674 return EPERM;
1675 }
1676
1677 IMGSRC_DEBUG("looking for root vnode.\n");
1678
1679 /*
1680 * Get root vnode of filesystem we're moving.
1681 */
1682 if (by_index) {
1683 if (is64bit) {
1684 struct user64_mnt_imgsrc_args mia64;
1685 error = copyin(fsmountargs, &mia64, sizeof(mia64));
1686 if (error != 0) {
1687 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1688 return error;
1689 }
1690
1691 height = mia64.mi_height;
1692 flags = mia64.mi_flags;
1693 devpath = mia64.mi_devpath;
1694 } else {
1695 struct user32_mnt_imgsrc_args mia32;
1696 error = copyin(fsmountargs, &mia32, sizeof(mia32));
1697 if (error != 0) {
1698 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1699 return error;
1700 }
1701
1702 height = mia32.mi_height;
1703 flags = mia32.mi_flags;
1704 devpath = mia32.mi_devpath;
1705 }
1706 } else {
1707 /*
1708 * For binary compatibility--assumes one level of nesting.
1709 */
1710 if (is64bit) {
1711 if ((error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath)))) {
1712 return error;
1713 }
1714 } else {
1715 user32_addr_t tmp;
1716 if ((error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp)))) {
1717 return error;
1718 }
1719
1720 /* munge into LP64 addr */
1721 devpath = CAST_USER_ADDR_T(tmp);
1722 }
1723
1724 height = 0;
1725 flags = 0;
1726 }
1727
1728 if (flags != 0) {
1729 IMGSRC_DEBUG("%s: Got nonzero flags.\n", __FUNCTION__);
1730 return EINVAL;
1731 }
1732
1733 error = get_imgsrc_rootvnode(height, &rvp);
1734 if (error != 0) {
1735 IMGSRC_DEBUG("getting old root vnode failed with %d\n", error);
1736 return error;
1737 }
1738
1739 IMGSRC_DEBUG("got old root vnode\n");
1740
1741 MALLOC(old_mntonname, char*, MAXPATHLEN, M_TEMP, M_WAITOK);
1742
1743 /* Can only move once */
1744 mp = vnode_mount(rvp);
1745 if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
1746 IMGSRC_DEBUG("Already moved.\n");
1747 error = EBUSY;
1748 goto out0;
1749 }
1750
1751 IMGSRC_DEBUG("moving rvp: fsname = %s\n", mp->mnt_vtable->vfc_name);
1752 IMGSRC_DEBUG("Starting updated.\n");
1753
1754 /* Get exclusive rwlock on mount, authorize update on mp */
1755 error = mount_begin_update(mp, ctx, 0);
1756 if (error != 0) {
1757 IMGSRC_DEBUG("Starting updated failed with %d\n", error);
1758 goto out0;
1759 }
1760
1761 /*
1762 * It can only be moved once. Flag is set under the rwlock,
1763 * so we're now safe to proceed.
1764 */
1765 if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
1766 IMGSRC_DEBUG("Already moved [2]\n");
1767 goto out1;
1768 }
1769
1770 IMGSRC_DEBUG("Preparing coveredvp.\n");
1771
1772 /* Mark covered vnode as mount in progress, authorize placing mount on top */
1773 error = prepare_coveredvp(vp, ctx, cnp, fsname, FALSE);
1774 if (error != 0) {
1775 IMGSRC_DEBUG("Preparing coveredvp failed with %d.\n", error);
1776 goto out1;
1777 }
1778
1779 IMGSRC_DEBUG("Covered vp OK.\n");
1780
1781 /* Sanity check the name caller has provided */
1782 vfsp = mp->mnt_vtable;
1783 if (strncmp(vfsp->vfc_name, fsname, MFSNAMELEN) != 0) {
1784 IMGSRC_DEBUG("Wrong fs name: actual = %s, expected = %s\n",
1785 vfsp->vfc_name, fsname);
1786 error = EINVAL;
1787 goto out2;
1788 }
1789
1790 /* Check the device vnode and update mount-from name, for local filesystems */
1791 if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
1792 IMGSRC_DEBUG("Local, doing device validation.\n");
1793
1794 if (devpath != USER_ADDR_NULL) {
1795 error = authorize_devpath_and_update_mntfromname(mp, devpath, &devvp, ctx);
1796 if (error) {
1797 IMGSRC_DEBUG("authorize_devpath_and_update_mntfromname() failed.\n");
1798 goto out2;
1799 }
1800
1801 vnode_put(devvp);
1802 }
1803 }
1804
1805 /*
1806 * Place mp on top of vnode, ref the vnode, call checkdirs(),
1807 * and increment the name cache's mount generation
1808 */
1809
1810 IMGSRC_DEBUG("About to call place_mount_and_checkdirs().\n");
1811 error = place_mount_and_checkdirs(mp, vp, ctx);
1812 if (error != 0) {
1813 goto out2;
1814 }
1815
1816 placed = TRUE;
1817
1818 strlcpy(old_mntonname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN);
1819 strlcpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
1820
1821 /* Forbid future moves */
1822 mount_lock(mp);
1823 mp->mnt_kern_flag |= MNTK_HAS_MOVED;
1824 mount_unlock(mp);
1825
1826 /* Finally, add to mount list, completely ready to go */
1827 if (mount_list_add(mp) != 0) {
1828 /*
1829 * The system is shutting down trying to umount
1830 * everything, so fail with a plausible errno.
1831 */
1832 error = EBUSY;
1833 goto out3;
1834 }
1835
1836 mount_end_update(mp);
1837 vnode_put(rvp);
1838 FREE(old_mntonname, M_TEMP);
1839
1840 vfs_notify_mount(pvp);
1841
1842 return 0;
1843 out3:
1844 strlcpy(mp->mnt_vfsstat.f_mntonname, old_mntonname, MAXPATHLEN);
1845
1846 mount_lock(mp);
1847 mp->mnt_kern_flag &= ~(MNTK_HAS_MOVED);
1848 mount_unlock(mp);
1849
1850 out2:
1851 /*
1852 * Placing the mp on the vnode clears VMOUNT,
1853 * so cleanup is different after that point
1854 */
1855 if (placed) {
1856 /* Rele the vp, clear VMOUNT and v_mountedhere */
1857 undo_place_on_covered_vp(mp, vp);
1858 } else {
1859 vnode_lock_spin(vp);
1860 CLR(vp->v_flag, VMOUNT);
1861 vnode_unlock(vp);
1862 }
1863 out1:
1864 mount_end_update(mp);
1865
1866 out0:
1867 vnode_put(rvp);
1868 FREE(old_mntonname, M_TEMP);
1869 return error;
1870 }
1871
1872 #if CONFIG_LOCKERBOOT
1873 __private_extern__
1874 int
1875 mount_locker_protoboot(const char *fsname, const char *mntpoint,
1876 const char *pbdevpath)
1877 {
1878 int error = -1;
1879 struct nameidata nd;
1880 boolean_t cleanup_nd = FALSE;
1881 vfs_context_t ctx = vfs_context_kernel();
1882 boolean_t is64 = TRUE;
1883 boolean_t by_index = TRUE;
1884 struct user64_mnt_imgsrc_args mia64 = {
1885 .mi_height = 0,
1886 .mi_flags = 0,
1887 .mi_devpath = CAST_USER_ADDR_T(pbdevpath),
1888 };
1889 user_addr_t mia64addr = CAST_USER_ADDR_T(&mia64);
1890
1891 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
1892 UIO_SYSSPACE, CAST_USER_ADDR_T(mntpoint), ctx);
1893 error = namei(&nd);
1894 if (error) {
1895 IMGSRC_DEBUG("namei: %d\n", error);
1896 goto out;
1897 }
1898
1899 cleanup_nd = TRUE;
1900 error = relocate_imageboot_source(nd.ni_dvp, nd.ni_vp,
1901 &nd.ni_cnd, fsname, ctx, is64, mia64addr, by_index);
1902
1903 out:
1904 if (cleanup_nd) {
1905 int stashed = error;
1906
1907 error = vnode_put(nd.ni_vp);
1908 if (error) {
1909 panic("vnode_put() returned non-zero: %d", error);
1910 }
1911
1912 if (nd.ni_dvp) {
1913 error = vnode_put(nd.ni_dvp);
1914 if (error) {
1915 panic("vnode_put() returned non-zero: %d", error);
1916 }
1917 }
1918 nameidone(&nd);
1919
1920 error = stashed;
1921 }
1922 return error;
1923 }
1924 #endif /* CONFIG_LOCKERBOOT */
1925 #endif /* CONFIG_IMGSRC_ACCESS */
1926
1927 void
1928 enablequotas(struct mount *mp, vfs_context_t ctx)
1929 {
1930 struct nameidata qnd;
1931 int type;
1932 char qfpath[MAXPATHLEN];
1933 const char *qfname = QUOTAFILENAME;
1934 const char *qfopsname = QUOTAOPSNAME;
1935 const char *qfextension[] = INITQFNAMES;
1936
1937 /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
1938 if (strncmp(mp->mnt_vfsstat.f_fstypename, "hfs", sizeof("hfs")) != 0) {
1939 return;
1940 }
1941 /*
1942 * Enable filesystem disk quotas if necessary.
1943 * We ignore errors as this should not interfere with final mount
1944 */
1945 for (type = 0; type < MAXQUOTAS; type++) {
1946 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfopsname, qfextension[type]);
1947 NDINIT(&qnd, LOOKUP, OP_MOUNT, FOLLOW, UIO_SYSSPACE,
1948 CAST_USER_ADDR_T(qfpath), ctx);
1949 if (namei(&qnd) != 0) {
1950 continue; /* option file to trigger quotas is not present */
1951 }
1952 vnode_put(qnd.ni_vp);
1953 nameidone(&qnd);
1954 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfname, qfextension[type]);
1955
1956 (void) VFS_QUOTACTL(mp, QCMD(Q_QUOTAON, type), 0, qfpath, ctx);
1957 }
1958 return;
1959 }
1960
1961
1962 static int
1963 checkdirs_callback(proc_t p, void * arg)
1964 {
1965 struct cdirargs * cdrp = (struct cdirargs *)arg;
1966 vnode_t olddp = cdrp->olddp;
1967 vnode_t newdp = cdrp->newdp;
1968 struct filedesc *fdp;
1969 vnode_t tvp;
1970 vnode_t fdp_cvp;
1971 vnode_t fdp_rvp;
1972 int cdir_changed = 0;
1973 int rdir_changed = 0;
1974
1975 /*
1976 * XXX Also needs to iterate each thread in the process to see if it
1977 * XXX is using a per-thread current working directory, and, if so,
1978 * XXX update that as well.
1979 */
1980
1981 proc_fdlock(p);
1982 fdp = p->p_fd;
1983 if (fdp == (struct filedesc *)0) {
1984 proc_fdunlock(p);
1985 return PROC_RETURNED;
1986 }
1987 fdp_cvp = fdp->fd_cdir;
1988 fdp_rvp = fdp->fd_rdir;
1989 proc_fdunlock(p);
1990
1991 if (fdp_cvp == olddp) {
1992 vnode_ref(newdp);
1993 tvp = fdp->fd_cdir;
1994 fdp_cvp = newdp;
1995 cdir_changed = 1;
1996 vnode_rele(tvp);
1997 }
1998 if (fdp_rvp == olddp) {
1999 vnode_ref(newdp);
2000 tvp = fdp->fd_rdir;
2001 fdp_rvp = newdp;
2002 rdir_changed = 1;
2003 vnode_rele(tvp);
2004 }
2005 if (cdir_changed || rdir_changed) {
2006 proc_fdlock(p);
2007 fdp->fd_cdir = fdp_cvp;
2008 fdp->fd_rdir = fdp_rvp;
2009 proc_fdunlock(p);
2010 }
2011 return PROC_RETURNED;
2012 }
2013
2014
2015
2016 /*
2017 * Scan all active processes to see if any of them have a current
2018 * or root directory onto which the new filesystem has just been
2019 * mounted. If so, replace them with the new mount point.
2020 */
2021 static int
2022 checkdirs(vnode_t olddp, vfs_context_t ctx)
2023 {
2024 vnode_t newdp;
2025 vnode_t tvp;
2026 int err;
2027 struct cdirargs cdr;
2028
2029 if (olddp->v_usecount == 1) {
2030 return 0;
2031 }
2032 err = VFS_ROOT(olddp->v_mountedhere, &newdp, ctx);
2033
2034 if (err != 0) {
2035 #if DIAGNOSTIC
2036 panic("mount: lost mount: error %d", err);
2037 #endif
2038 return err;
2039 }
2040
2041 cdr.olddp = olddp;
2042 cdr.newdp = newdp;
2043 /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
2044 proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS, checkdirs_callback, (void *)&cdr, NULL, NULL);
2045
2046 if (rootvnode == olddp) {
2047 vnode_ref(newdp);
2048 tvp = rootvnode;
2049 rootvnode = newdp;
2050 vnode_rele(tvp);
2051 }
2052
2053 vnode_put(newdp);
2054 return 0;
2055 }
2056
2057 /*
2058 * Unmount a file system.
2059 *
2060 * Note: unmount takes a path to the vnode mounted on as argument,
2061 * not special file (as before).
2062 */
2063 /* ARGSUSED */
2064 int
2065 unmount(__unused proc_t p, struct unmount_args *uap, __unused int32_t *retval)
2066 {
2067 vnode_t vp;
2068 struct mount *mp;
2069 int error;
2070 struct nameidata nd;
2071 vfs_context_t ctx = vfs_context_current();
2072
2073 NDINIT(&nd, LOOKUP, OP_UNMOUNT, FOLLOW | AUDITVNPATH1,
2074 UIO_USERSPACE, uap->path, ctx);
2075 error = namei(&nd);
2076 if (error) {
2077 return error;
2078 }
2079 vp = nd.ni_vp;
2080 mp = vp->v_mount;
2081 nameidone(&nd);
2082
2083 #if CONFIG_MACF
2084 error = mac_mount_check_umount(ctx, mp);
2085 if (error != 0) {
2086 vnode_put(vp);
2087 return error;
2088 }
2089 #endif
2090 /*
2091 * Must be the root of the filesystem
2092 */
2093 if ((vp->v_flag & VROOT) == 0) {
2094 vnode_put(vp);
2095 return EINVAL;
2096 }
2097 mount_ref(mp, 0);
2098 vnode_put(vp);
2099 /* safedounmount consumes the mount ref */
2100 return safedounmount(mp, uap->flags, ctx);
2101 }
2102
2103 int
2104 vfs_unmountbyfsid(fsid_t *fsid, int flags, vfs_context_t ctx)
2105 {
2106 mount_t mp;
2107
2108 mp = mount_list_lookupby_fsid(fsid, 0, 1);
2109 if (mp == (mount_t)0) {
2110 return ENOENT;
2111 }
2112 mount_ref(mp, 0);
2113 mount_iterdrop(mp);
2114 /* safedounmount consumes the mount ref */
2115 return safedounmount(mp, flags, ctx);
2116 }
2117
2118
2119 /*
2120 * The mount struct comes with a mount ref which will be consumed.
2121 * Do the actual file system unmount, prevent some common foot shooting.
2122 */
2123 int
2124 safedounmount(struct mount *mp, int flags, vfs_context_t ctx)
2125 {
2126 int error;
2127 proc_t p = vfs_context_proc(ctx);
2128
2129 /*
2130 * If the file system is not responding and MNT_NOBLOCK
2131 * is set and not a forced unmount then return EBUSY.
2132 */
2133 if ((mp->mnt_kern_flag & MNT_LNOTRESP) &&
2134 (flags & MNT_NOBLOCK) && ((flags & MNT_FORCE) == 0)) {
2135 error = EBUSY;
2136 goto out;
2137 }
2138
2139 /*
2140 * Skip authorization if the mount is tagged as permissive and
2141 * this is not a forced-unmount attempt.
2142 */
2143 if (!(((mp->mnt_kern_flag & MNTK_PERMIT_UNMOUNT) != 0) && ((flags & MNT_FORCE) == 0))) {
2144 /*
2145 * Only root, or the user that did the original mount is
2146 * permitted to unmount this filesystem.
2147 */
2148 if ((mp->mnt_vfsstat.f_owner != kauth_cred_getuid(kauth_cred_get())) &&
2149 (error = suser(kauth_cred_get(), &p->p_acflag))) {
2150 goto out;
2151 }
2152 }
2153 /*
2154 * Don't allow unmounting the root file system (or the associated VM or DATA mounts) .
2155 */
2156 if ((mp->mnt_flag & MNT_ROOTFS) || (mp->mnt_kern_flag & MNTK_SYSTEM)) {
2157 error = EBUSY; /* the root (or associated volumes) is always busy */
2158 goto out;
2159 }
2160
2161 #ifdef CONFIG_IMGSRC_ACCESS
2162 if (mp->mnt_kern_flag & MNTK_BACKS_ROOT) {
2163 error = EBUSY;
2164 goto out;
2165 }
2166 #endif /* CONFIG_IMGSRC_ACCESS */
2167
2168 return dounmount(mp, flags, 1, ctx);
2169
2170 out:
2171 mount_drop(mp, 0);
2172 return error;
2173 }
2174
2175 /*
2176 * Do the actual file system unmount.
2177 */
2178 int
2179 dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx)
2180 {
2181 vnode_t coveredvp = (vnode_t)0;
2182 int error;
2183 int needwakeup = 0;
2184 int forcedunmount = 0;
2185 int lflags = 0;
2186 struct vnode *devvp = NULLVP;
2187 #if CONFIG_TRIGGERS
2188 proc_t p = vfs_context_proc(ctx);
2189 int did_vflush = 0;
2190 int pflags_save = 0;
2191 #endif /* CONFIG_TRIGGERS */
2192
2193 #if CONFIG_FSE
2194 if (!(flags & MNT_FORCE)) {
2195 fsevent_unmount(mp, ctx); /* has to come first! */
2196 }
2197 #endif
2198
2199 mount_lock(mp);
2200
2201 /*
2202 * If already an unmount in progress just return EBUSY.
2203 * Even a forced unmount cannot override.
2204 */
2205 if (mp->mnt_lflag & MNT_LUNMOUNT) {
2206 if (withref != 0) {
2207 mount_drop(mp, 1);
2208 }
2209 mount_unlock(mp);
2210 return EBUSY;
2211 }
2212
2213 if (flags & MNT_FORCE) {
2214 forcedunmount = 1;
2215 mp->mnt_lflag |= MNT_LFORCE;
2216 }
2217
2218 #if CONFIG_TRIGGERS
2219 if (flags & MNT_NOBLOCK && p != kernproc) {
2220 pflags_save = OSBitOrAtomic(P_NOREMOTEHANG, &p->p_flag);
2221 }
2222 #endif
2223
2224 mp->mnt_kern_flag |= MNTK_UNMOUNT;
2225 mp->mnt_lflag |= MNT_LUNMOUNT;
2226 mp->mnt_flag &= ~MNT_ASYNC;
2227 /*
2228 * anyone currently in the fast path that
2229 * trips over the cached rootvp will be
2230 * dumped out and forced into the slow path
2231 * to regenerate a new cached value
2232 */
2233 mp->mnt_realrootvp = NULLVP;
2234 mount_unlock(mp);
2235
2236 if (forcedunmount && (flags & MNT_LNOSUB) == 0) {
2237 /*
2238 * Force unmount any mounts in this filesystem.
2239 * If any unmounts fail - just leave them dangling.
2240 * Avoids recursion.
2241 */
2242 (void) dounmount_submounts(mp, flags | MNT_LNOSUB, ctx);
2243 }
2244
2245 /*
2246 * taking the name_cache_lock exclusively will
2247 * insure that everyone is out of the fast path who
2248 * might be trying to use a now stale copy of
2249 * vp->v_mountedhere->mnt_realrootvp
2250 * bumping mount_generation causes the cached values
2251 * to be invalidated
2252 */
2253 name_cache_lock();
2254 mount_generation++;
2255 name_cache_unlock();
2256
2257
2258 lck_rw_lock_exclusive(&mp->mnt_rwlock);
2259 if (withref != 0) {
2260 mount_drop(mp, 0);
2261 }
2262 error = 0;
2263 if (forcedunmount == 0) {
2264 ubc_umount(mp); /* release cached vnodes */
2265 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
2266 error = VFS_SYNC(mp, MNT_WAIT, ctx);
2267 if (error) {
2268 mount_lock(mp);
2269 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
2270 mp->mnt_lflag &= ~MNT_LUNMOUNT;
2271 mp->mnt_lflag &= ~MNT_LFORCE;
2272 goto out;
2273 }
2274 }
2275 }
2276
2277 IOBSDMountChange(mp, kIOMountChangeUnmount);
2278
2279 #if CONFIG_TRIGGERS
2280 vfs_nested_trigger_unmounts(mp, flags, ctx);
2281 did_vflush = 1;
2282 #endif
2283 if (forcedunmount) {
2284 lflags |= FORCECLOSE;
2285 }
2286 error = vflush(mp, NULLVP, SKIPSWAP | SKIPSYSTEM | SKIPROOT | lflags);
2287 if ((forcedunmount == 0) && error) {
2288 mount_lock(mp);
2289 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
2290 mp->mnt_lflag &= ~MNT_LUNMOUNT;
2291 mp->mnt_lflag &= ~MNT_LFORCE;
2292 goto out;
2293 }
2294
2295 /* make sure there are no one in the mount iterations or lookup */
2296 mount_iterdrain(mp);
2297
2298 error = VFS_UNMOUNT(mp, flags, ctx);
2299 if (error) {
2300 mount_iterreset(mp);
2301 mount_lock(mp);
2302 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
2303 mp->mnt_lflag &= ~MNT_LUNMOUNT;
2304 mp->mnt_lflag &= ~MNT_LFORCE;
2305 goto out;
2306 }
2307
2308 /* increment the operations count */
2309 if (!error) {
2310 OSAddAtomic(1, &vfs_nummntops);
2311 }
2312
2313 if (mp->mnt_devvp && mp->mnt_vtable->vfc_vfsflags & VFC_VFSLOCALARGS) {
2314 /* hold an io reference and drop the usecount before close */
2315 devvp = mp->mnt_devvp;
2316 vnode_getalways(devvp);
2317 vnode_rele(devvp);
2318 VNOP_CLOSE(devvp, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD | FWRITE,
2319 ctx);
2320 vnode_clearmountedon(devvp);
2321 vnode_put(devvp);
2322 }
2323 lck_rw_done(&mp->mnt_rwlock);
2324 mount_list_remove(mp);
2325 lck_rw_lock_exclusive(&mp->mnt_rwlock);
2326
2327 /* mark the mount point hook in the vp but not drop the ref yet */
2328 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
2329 /*
2330 * The covered vnode needs special handling. Trying to get an
2331 * iocount must not block here as this may lead to deadlocks
2332 * if the Filesystem to which the covered vnode belongs is
2333 * undergoing forced unmounts. Since we hold a usecount, the
2334 * vnode cannot be reused (it can, however, still be terminated)
2335 */
2336 vnode_getalways(coveredvp);
2337 vnode_lock_spin(coveredvp);
2338
2339 mp->mnt_crossref++;
2340 coveredvp->v_mountedhere = (struct mount *)0;
2341 CLR(coveredvp->v_flag, VMOUNT);
2342
2343 vnode_unlock(coveredvp);
2344 vnode_put(coveredvp);
2345 }
2346
2347 mount_list_lock();
2348 mp->mnt_vtable->vfc_refcount--;
2349 mount_list_unlock();
2350
2351 cache_purgevfs(mp); /* remove cache entries for this file sys */
2352 vfs_event_signal(NULL, VQ_UNMOUNT, (intptr_t)NULL);
2353 mount_lock(mp);
2354 mp->mnt_lflag |= MNT_LDEAD;
2355
2356 if (mp->mnt_lflag & MNT_LWAIT) {
2357 /*
2358 * do the wakeup here
2359 * in case we block in mount_refdrain
2360 * which will drop the mount lock
2361 * and allow anyone blocked in vfs_busy
2362 * to wakeup and see the LDEAD state
2363 */
2364 mp->mnt_lflag &= ~MNT_LWAIT;
2365 wakeup((caddr_t)mp);
2366 }
2367 mount_refdrain(mp);
2368
2369 /* free disk_conditioner_info structure for this mount */
2370 disk_conditioner_unmount(mp);
2371
2372 out:
2373 if (mp->mnt_lflag & MNT_LWAIT) {
2374 mp->mnt_lflag &= ~MNT_LWAIT;
2375 needwakeup = 1;
2376 }
2377
2378 #if CONFIG_TRIGGERS
2379 if (flags & MNT_NOBLOCK && p != kernproc) {
2380 // Restore P_NOREMOTEHANG bit to its previous value
2381 if ((pflags_save & P_NOREMOTEHANG) == 0) {
2382 OSBitAndAtomic(~((uint32_t) P_NOREMOTEHANG), &p->p_flag);
2383 }
2384 }
2385
2386 /*
2387 * Callback and context are set together under the mount lock, and
2388 * never cleared, so we're safe to examine them here, drop the lock,
2389 * and call out.
2390 */
2391 if (mp->mnt_triggercallback != NULL) {
2392 mount_unlock(mp);
2393 if (error == 0) {
2394 mp->mnt_triggercallback(mp, VTC_RELEASE, mp->mnt_triggerdata, ctx);
2395 } else if (did_vflush) {
2396 mp->mnt_triggercallback(mp, VTC_REPLACE, mp->mnt_triggerdata, ctx);
2397 }
2398 } else {
2399 mount_unlock(mp);
2400 }
2401 #else
2402 mount_unlock(mp);
2403 #endif /* CONFIG_TRIGGERS */
2404
2405 lck_rw_done(&mp->mnt_rwlock);
2406
2407 if (needwakeup) {
2408 wakeup((caddr_t)mp);
2409 }
2410
2411 if (!error) {
2412 if ((coveredvp != NULLVP)) {
2413 vnode_t pvp = NULLVP;
2414
2415 /*
2416 * The covered vnode needs special handling. Trying to
2417 * get an iocount must not block here as this may lead
2418 * to deadlocks if the Filesystem to which the covered
2419 * vnode belongs is undergoing forced unmounts. Since we
2420 * hold a usecount, the vnode cannot be reused
2421 * (it can, however, still be terminated).
2422 */
2423 vnode_getalways(coveredvp);
2424
2425 mount_dropcrossref(mp, coveredvp, 0);
2426 /*
2427 * We'll _try_ to detect if this really needs to be
2428 * done. The coveredvp can only be in termination (or
2429 * terminated) if the coveredvp's mount point is in a
2430 * forced unmount (or has been) since we still hold the
2431 * ref.
2432 */
2433 if (!vnode_isrecycled(coveredvp)) {
2434 pvp = vnode_getparent(coveredvp);
2435 #if CONFIG_TRIGGERS
2436 if (coveredvp->v_resolve) {
2437 vnode_trigger_rearm(coveredvp, ctx);
2438 }
2439 #endif
2440 }
2441
2442 vnode_rele(coveredvp);
2443 vnode_put(coveredvp);
2444 coveredvp = NULLVP;
2445
2446 if (pvp) {
2447 lock_vnode_and_post(pvp, NOTE_WRITE);
2448 vnode_put(pvp);
2449 }
2450 } else if (mp->mnt_flag & MNT_ROOTFS) {
2451 mount_lock_destroy(mp);
2452 #if CONFIG_MACF
2453 mac_mount_label_destroy(mp);
2454 #endif
2455 FREE_ZONE(mp, sizeof(struct mount), M_MOUNT);
2456 } else {
2457 panic("dounmount: no coveredvp");
2458 }
2459 }
2460 return error;
2461 }
2462
2463 /*
2464 * Unmount any mounts in this filesystem.
2465 */
2466 void
2467 dounmount_submounts(struct mount *mp, int flags, vfs_context_t ctx)
2468 {
2469 mount_t smp;
2470 fsid_t *fsids, fsid;
2471 int fsids_sz;
2472 int count = 0, i, m = 0;
2473 vnode_t vp;
2474
2475 mount_list_lock();
2476
2477 // Get an array to hold the submounts fsids.
2478 TAILQ_FOREACH(smp, &mountlist, mnt_list)
2479 count++;
2480 fsids_sz = count * sizeof(fsid_t);
2481 MALLOC(fsids, fsid_t *, fsids_sz, M_TEMP, M_NOWAIT);
2482 if (fsids == NULL) {
2483 mount_list_unlock();
2484 goto out;
2485 }
2486 fsids[0] = mp->mnt_vfsstat.f_fsid; // Prime the pump
2487
2488 /*
2489 * Fill the array with submount fsids.
2490 * Since mounts are always added to the tail of the mount list, the
2491 * list is always in mount order.
2492 * For each mount check if the mounted-on vnode belongs to a
2493 * mount that's already added to our array of mounts to be unmounted.
2494 */
2495 for (smp = TAILQ_NEXT(mp, mnt_list); smp; smp = TAILQ_NEXT(smp, mnt_list)) {
2496 vp = smp->mnt_vnodecovered;
2497 if (vp == NULL) {
2498 continue;
2499 }
2500 fsid = vnode_mount(vp)->mnt_vfsstat.f_fsid; // Underlying fsid
2501 for (i = 0; i <= m; i++) {
2502 if (fsids[i].val[0] == fsid.val[0] &&
2503 fsids[i].val[1] == fsid.val[1]) {
2504 fsids[++m] = smp->mnt_vfsstat.f_fsid;
2505 break;
2506 }
2507 }
2508 }
2509 mount_list_unlock();
2510
2511 // Unmount the submounts in reverse order. Ignore errors.
2512 for (i = m; i > 0; i--) {
2513 smp = mount_list_lookupby_fsid(&fsids[i], 0, 1);
2514 if (smp) {
2515 mount_ref(smp, 0);
2516 mount_iterdrop(smp);
2517 (void) dounmount(smp, flags, 1, ctx);
2518 }
2519 }
2520 out:
2521 if (fsids) {
2522 FREE(fsids, M_TEMP);
2523 }
2524 }
2525
2526 void
2527 mount_dropcrossref(mount_t mp, vnode_t dp, int need_put)
2528 {
2529 vnode_lock(dp);
2530 mp->mnt_crossref--;
2531
2532 if (mp->mnt_crossref < 0) {
2533 panic("mount cross refs -ve");
2534 }
2535
2536 if ((mp != dp->v_mountedhere) && (mp->mnt_crossref == 0)) {
2537 if (need_put) {
2538 vnode_put_locked(dp);
2539 }
2540 vnode_unlock(dp);
2541
2542 mount_lock_destroy(mp);
2543 #if CONFIG_MACF
2544 mac_mount_label_destroy(mp);
2545 #endif
2546 FREE_ZONE(mp, sizeof(struct mount), M_MOUNT);
2547 return;
2548 }
2549 if (need_put) {
2550 vnode_put_locked(dp);
2551 }
2552 vnode_unlock(dp);
2553 }
2554
2555
2556 /*
2557 * Sync each mounted filesystem.
2558 */
2559 #if DIAGNOSTIC
2560 int syncprt = 0;
2561 #endif
2562
2563 int print_vmpage_stat = 0;
2564
2565 /*
2566 * sync_callback: simple wrapper that calls VFS_SYNC() on volumes
2567 * mounted read-write with the passed waitfor value.
2568 *
2569 * Parameters: mp mount-point descriptor per mounted file-system instance.
2570 * arg user argument (please see below)
2571 *
2572 * User argument is a pointer to 32 bit unsigned integer which describes the
2573 * type of waitfor value to set for calling VFS_SYNC(). If user argument is
2574 * passed as NULL, VFS_SYNC() is called with MNT_NOWAIT set as the default
2575 * waitfor value.
2576 *
2577 * Returns: VFS_RETURNED
2578 */
2579 static int
2580 sync_callback(mount_t mp, void *arg)
2581 {
2582 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
2583 int asyncflag = mp->mnt_flag & MNT_ASYNC;
2584 unsigned waitfor = MNT_NOWAIT;
2585
2586 if (arg) {
2587 waitfor = *(uint32_t*)arg;
2588 }
2589
2590 /* Sanity check for flags - these are the only valid combinations for the flag bits*/
2591 if (waitfor != MNT_WAIT &&
2592 waitfor != (MNT_WAIT | MNT_VOLUME) &&
2593 waitfor != MNT_NOWAIT &&
2594 waitfor != (MNT_NOWAIT | MNT_VOLUME) &&
2595 waitfor != MNT_DWAIT &&
2596 waitfor != (MNT_DWAIT | MNT_VOLUME)) {
2597 panic("Passed inappropriate waitfor %u to "
2598 "sync_callback()", waitfor);
2599 }
2600
2601 mp->mnt_flag &= ~MNT_ASYNC;
2602 (void)VFS_SYNC(mp, waitfor, vfs_context_kernel());
2603 if (asyncflag) {
2604 mp->mnt_flag |= MNT_ASYNC;
2605 }
2606 }
2607
2608 return VFS_RETURNED;
2609 }
2610
2611 /* ARGSUSED */
2612 int
2613 sync(__unused proc_t p, __unused struct sync_args *uap, __unused int32_t *retval)
2614 {
2615 vfs_iterate(LK_NOWAIT, sync_callback, NULL);
2616
2617 if (print_vmpage_stat) {
2618 vm_countdirtypages();
2619 }
2620
2621 #if DIAGNOSTIC
2622 if (syncprt) {
2623 vfs_bufstats();
2624 }
2625 #endif /* DIAGNOSTIC */
2626 return 0;
2627 }
2628
2629 typedef enum {
2630 SYNC_ALL = 0,
2631 SYNC_ONLY_RELIABLE_MEDIA = 1,
2632 SYNC_ONLY_UNRELIABLE_MEDIA = 2
2633 } sync_type_t;
2634
2635 static int
2636 sync_internal_callback(mount_t mp, void *arg)
2637 {
2638 if (arg) {
2639 int is_reliable = !(mp->mnt_kern_flag & MNTK_VIRTUALDEV) &&
2640 (mp->mnt_flag & MNT_LOCAL);
2641 sync_type_t sync_type = *((sync_type_t *)arg);
2642
2643 if ((sync_type == SYNC_ONLY_RELIABLE_MEDIA) && !is_reliable) {
2644 return VFS_RETURNED;
2645 } else if ((sync_type == SYNC_ONLY_UNRELIABLE_MEDIA) && is_reliable) {
2646 return VFS_RETURNED;
2647 }
2648 }
2649
2650 (void)sync_callback(mp, NULL);
2651
2652 return VFS_RETURNED;
2653 }
2654
2655 int sync_thread_state = 0;
2656 int sync_timeout_seconds = 5;
2657
2658 #define SYNC_THREAD_RUN 0x0001
2659 #define SYNC_THREAD_RUNNING 0x0002
2660
2661 static void
2662 sync_thread(__unused void *arg, __unused wait_result_t wr)
2663 {
2664 sync_type_t sync_type;
2665
2666 lck_mtx_lock(sync_mtx_lck);
2667 while (sync_thread_state & SYNC_THREAD_RUN) {
2668 sync_thread_state &= ~SYNC_THREAD_RUN;
2669 lck_mtx_unlock(sync_mtx_lck);
2670
2671 sync_type = SYNC_ONLY_RELIABLE_MEDIA;
2672 vfs_iterate(LK_NOWAIT, sync_internal_callback, &sync_type);
2673 sync_type = SYNC_ONLY_UNRELIABLE_MEDIA;
2674 vfs_iterate(LK_NOWAIT, sync_internal_callback, &sync_type);
2675
2676 lck_mtx_lock(sync_mtx_lck);
2677 }
2678 /*
2679 * This wakeup _has_ to be issued before the lock is released otherwise
2680 * we may end up waking up a thread in sync_internal which is
2681 * expecting a wakeup from a thread it just created and not from this
2682 * thread which is about to exit.
2683 */
2684 wakeup(&sync_thread_state);
2685 sync_thread_state &= ~SYNC_THREAD_RUNNING;
2686 lck_mtx_unlock(sync_mtx_lck);
2687
2688 if (print_vmpage_stat) {
2689 vm_countdirtypages();
2690 }
2691
2692 #if DIAGNOSTIC
2693 if (syncprt) {
2694 vfs_bufstats();
2695 }
2696 #endif /* DIAGNOSTIC */
2697 }
2698
2699 struct timeval sync_timeout_last_print = {.tv_sec = 0, .tv_usec = 0};
2700
2701 /*
2702 * An in-kernel sync for power management to call.
2703 * This function always returns within sync_timeout seconds.
2704 */
2705 __private_extern__ int
2706 sync_internal(void)
2707 {
2708 thread_t thd;
2709 int error;
2710 int thread_created = FALSE;
2711 struct timespec ts = {.tv_sec = sync_timeout_seconds, .tv_nsec = 0};
2712
2713 lck_mtx_lock(sync_mtx_lck);
2714 sync_thread_state |= SYNC_THREAD_RUN;
2715 if (!(sync_thread_state & SYNC_THREAD_RUNNING)) {
2716 int kr;
2717
2718 sync_thread_state |= SYNC_THREAD_RUNNING;
2719 kr = kernel_thread_start(sync_thread, NULL, &thd);
2720 if (kr != KERN_SUCCESS) {
2721 sync_thread_state &= ~SYNC_THREAD_RUNNING;
2722 lck_mtx_unlock(sync_mtx_lck);
2723 printf("sync_thread failed\n");
2724 return 0;
2725 }
2726 thread_created = TRUE;
2727 }
2728
2729 error = msleep((caddr_t)&sync_thread_state, sync_mtx_lck,
2730 (PVFS | PDROP | PCATCH), "sync_thread", &ts);
2731 if (error) {
2732 struct timeval now;
2733
2734 microtime(&now);
2735 if (now.tv_sec - sync_timeout_last_print.tv_sec > 120) {
2736 printf("sync timed out: %d sec\n", sync_timeout_seconds);
2737 sync_timeout_last_print.tv_sec = now.tv_sec;
2738 }
2739 }
2740
2741 if (thread_created) {
2742 thread_deallocate(thd);
2743 }
2744
2745 return 0;
2746 } /* end of sync_internal call */
2747
2748 /*
2749 * Change filesystem quotas.
2750 */
2751 #if QUOTA
2752 int
2753 quotactl(proc_t p, struct quotactl_args *uap, __unused int32_t *retval)
2754 {
2755 struct mount *mp;
2756 int error, quota_cmd, quota_status = 0;
2757 caddr_t datap;
2758 size_t fnamelen;
2759 struct nameidata nd;
2760 vfs_context_t ctx = vfs_context_current();
2761 struct dqblk my_dqblk = {};
2762
2763 AUDIT_ARG(uid, uap->uid);
2764 AUDIT_ARG(cmd, uap->cmd);
2765 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
2766 uap->path, ctx);
2767 error = namei(&nd);
2768 if (error) {
2769 return error;
2770 }
2771 mp = nd.ni_vp->v_mount;
2772 vnode_put(nd.ni_vp);
2773 nameidone(&nd);
2774
2775 /* copyin any data we will need for downstream code */
2776 quota_cmd = uap->cmd >> SUBCMDSHIFT;
2777
2778 switch (quota_cmd) {
2779 case Q_QUOTAON:
2780 /* uap->arg specifies a file from which to take the quotas */
2781 fnamelen = MAXPATHLEN;
2782 datap = kalloc(MAXPATHLEN);
2783 error = copyinstr(uap->arg, datap, MAXPATHLEN, &fnamelen);
2784 break;
2785 case Q_GETQUOTA:
2786 /* uap->arg is a pointer to a dqblk structure. */
2787 datap = (caddr_t) &my_dqblk;
2788 break;
2789 case Q_SETQUOTA:
2790 case Q_SETUSE:
2791 /* uap->arg is a pointer to a dqblk structure. */
2792 datap = (caddr_t) &my_dqblk;
2793 if (proc_is64bit(p)) {
2794 struct user_dqblk my_dqblk64;
2795 error = copyin(uap->arg, (caddr_t)&my_dqblk64, sizeof(my_dqblk64));
2796 if (error == 0) {
2797 munge_dqblk(&my_dqblk, &my_dqblk64, FALSE);
2798 }
2799 } else {
2800 error = copyin(uap->arg, (caddr_t)&my_dqblk, sizeof(my_dqblk));
2801 }
2802 break;
2803 case Q_QUOTASTAT:
2804 /* uap->arg is a pointer to an integer */
2805 datap = (caddr_t) &quota_status;
2806 break;
2807 default:
2808 datap = NULL;
2809 break;
2810 } /* switch */
2811
2812 if (error == 0) {
2813 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, datap, ctx);
2814 }
2815
2816 switch (quota_cmd) {
2817 case Q_QUOTAON:
2818 if (datap != NULL) {
2819 kfree(datap, MAXPATHLEN);
2820 }
2821 break;
2822 case Q_GETQUOTA:
2823 /* uap->arg is a pointer to a dqblk structure we need to copy out to */
2824 if (error == 0) {
2825 if (proc_is64bit(p)) {
2826 struct user_dqblk my_dqblk64;
2827
2828 memset(&my_dqblk64, 0, sizeof(my_dqblk64));
2829 munge_dqblk(&my_dqblk, &my_dqblk64, TRUE);
2830 error = copyout((caddr_t)&my_dqblk64, uap->arg, sizeof(my_dqblk64));
2831 } else {
2832 error = copyout(datap, uap->arg, sizeof(struct dqblk));
2833 }
2834 }
2835 break;
2836 case Q_QUOTASTAT:
2837 /* uap->arg is a pointer to an integer */
2838 if (error == 0) {
2839 error = copyout(datap, uap->arg, sizeof(quota_status));
2840 }
2841 break;
2842 default:
2843 break;
2844 } /* switch */
2845
2846 return error;
2847 }
2848 #else
2849 int
2850 quotactl(__unused proc_t p, __unused struct quotactl_args *uap, __unused int32_t *retval)
2851 {
2852 return EOPNOTSUPP;
2853 }
2854 #endif /* QUOTA */
2855
2856 /*
2857 * Get filesystem statistics.
2858 *
2859 * Returns: 0 Success
2860 * namei:???
2861 * vfs_update_vfsstat:???
2862 * munge_statfs:EFAULT
2863 */
2864 /* ARGSUSED */
2865 int
2866 statfs(__unused proc_t p, struct statfs_args *uap, __unused int32_t *retval)
2867 {
2868 struct mount *mp;
2869 struct vfsstatfs *sp;
2870 int error;
2871 struct nameidata nd;
2872 vfs_context_t ctx = vfs_context_current();
2873 vnode_t vp;
2874
2875 NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1,
2876 UIO_USERSPACE, uap->path, ctx);
2877 error = namei(&nd);
2878 if (error != 0) {
2879 return error;
2880 }
2881 vp = nd.ni_vp;
2882 mp = vp->v_mount;
2883 sp = &mp->mnt_vfsstat;
2884 nameidone(&nd);
2885
2886 #if CONFIG_MACF
2887 error = mac_mount_check_stat(ctx, mp);
2888 if (error != 0) {
2889 vnode_put(vp);
2890 return error;
2891 }
2892 #endif
2893
2894 error = vfs_update_vfsstat(mp, ctx, VFS_USER_EVENT);
2895 if (error != 0) {
2896 vnode_put(vp);
2897 return error;
2898 }
2899
2900 error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
2901 vnode_put(vp);
2902 return error;
2903 }
2904
2905 /*
2906 * Get filesystem statistics.
2907 */
2908 /* ARGSUSED */
2909 int
2910 fstatfs(__unused proc_t p, struct fstatfs_args *uap, __unused int32_t *retval)
2911 {
2912 vnode_t vp;
2913 struct mount *mp;
2914 struct vfsstatfs *sp;
2915 int error;
2916
2917 AUDIT_ARG(fd, uap->fd);
2918
2919 if ((error = file_vnode(uap->fd, &vp))) {
2920 return error;
2921 }
2922
2923 error = vnode_getwithref(vp);
2924 if (error) {
2925 file_drop(uap->fd);
2926 return error;
2927 }
2928
2929 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2930
2931 mp = vp->v_mount;
2932 if (!mp) {
2933 error = EBADF;
2934 goto out;
2935 }
2936
2937 #if CONFIG_MACF
2938 error = mac_mount_check_stat(vfs_context_current(), mp);
2939 if (error != 0) {
2940 goto out;
2941 }
2942 #endif
2943
2944 sp = &mp->mnt_vfsstat;
2945 if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
2946 goto out;
2947 }
2948
2949 error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
2950
2951 out:
2952 file_drop(uap->fd);
2953 vnode_put(vp);
2954
2955 return error;
2956 }
2957
2958 void
2959 vfs_get_statfs64(struct mount *mp, struct statfs64 *sfs)
2960 {
2961 struct vfsstatfs *vsfs = &mp->mnt_vfsstat;
2962
2963 bzero(sfs, sizeof(*sfs));
2964
2965 sfs->f_bsize = vsfs->f_bsize;
2966 sfs->f_iosize = (int32_t)vsfs->f_iosize;
2967 sfs->f_blocks = vsfs->f_blocks;
2968 sfs->f_bfree = vsfs->f_bfree;
2969 sfs->f_bavail = vsfs->f_bavail;
2970 sfs->f_files = vsfs->f_files;
2971 sfs->f_ffree = vsfs->f_ffree;
2972 sfs->f_fsid = vsfs->f_fsid;
2973 sfs->f_owner = vsfs->f_owner;
2974 sfs->f_type = mp->mnt_vtable->vfc_typenum;
2975 sfs->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
2976 sfs->f_fssubtype = vsfs->f_fssubtype;
2977 sfs->f_flags_ext = ((mp->mnt_kern_flag & MNTK_SYSTEM) && !(mp->mnt_kern_flag & MNTK_SWAP_MOUNT) && !(mp->mnt_flag & MNT_ROOTFS)) ? MNT_EXT_ROOT_DATA_VOL : 0;
2978 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
2979 strlcpy(&sfs->f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN);
2980 } else {
2981 strlcpy(&sfs->f_fstypename[0], &vsfs->f_fstypename[0], MFSTYPENAMELEN);
2982 }
2983 strlcpy(&sfs->f_mntonname[0], &vsfs->f_mntonname[0], MAXPATHLEN);
2984 strlcpy(&sfs->f_mntfromname[0], &vsfs->f_mntfromname[0], MAXPATHLEN);
2985 }
2986
2987 /*
2988 * Get file system statistics in 64-bit mode
2989 */
2990 int
2991 statfs64(__unused struct proc *p, struct statfs64_args *uap, __unused int32_t *retval)
2992 {
2993 struct mount *mp;
2994 int error;
2995 struct nameidata nd;
2996 struct statfs64 sfs;
2997 vfs_context_t ctxp = vfs_context_current();
2998 vnode_t vp;
2999
3000 NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1,
3001 UIO_USERSPACE, uap->path, ctxp);
3002 error = namei(&nd);
3003 if (error != 0) {
3004 return error;
3005 }
3006 vp = nd.ni_vp;
3007 mp = vp->v_mount;
3008 nameidone(&nd);
3009
3010 #if CONFIG_MACF
3011 error = mac_mount_check_stat(ctxp, mp);
3012 if (error != 0) {
3013 vnode_put(vp);
3014 return error;
3015 }
3016 #endif
3017
3018 error = vfs_update_vfsstat(mp, ctxp, VFS_USER_EVENT);
3019 if (error != 0) {
3020 vnode_put(vp);
3021 return error;
3022 }
3023
3024 vfs_get_statfs64(mp, &sfs);
3025 if ((mp->mnt_kern_flag & MNTK_SYSTEM) && !(mp->mnt_kern_flag & MNTK_SWAP_MOUNT) && !(mp->mnt_flag & MNT_ROOTFS) &&
3026 (p->p_vfs_iopolicy & P_VFS_IOPOLICY_STATFS_NO_DATA_VOLUME)) {
3027 /* This process does not want to see a seperate data volume mountpoint */
3028 strlcpy(&sfs.f_mntonname[0], "/", sizeof("/"));
3029 }
3030 error = copyout(&sfs, uap->buf, sizeof(sfs));
3031 vnode_put(vp);
3032
3033 return error;
3034 }
3035
3036 /*
3037 * Get file system statistics in 64-bit mode
3038 */
3039 int
3040 fstatfs64(__unused struct proc *p, struct fstatfs64_args *uap, __unused int32_t *retval)
3041 {
3042 struct vnode *vp;
3043 struct mount *mp;
3044 struct statfs64 sfs;
3045 int error;
3046
3047 AUDIT_ARG(fd, uap->fd);
3048
3049 if ((error = file_vnode(uap->fd, &vp))) {
3050 return error;
3051 }
3052
3053 error = vnode_getwithref(vp);
3054 if (error) {
3055 file_drop(uap->fd);
3056 return error;
3057 }
3058
3059 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
3060
3061 mp = vp->v_mount;
3062 if (!mp) {
3063 error = EBADF;
3064 goto out;
3065 }
3066
3067 #if CONFIG_MACF
3068 error = mac_mount_check_stat(vfs_context_current(), mp);
3069 if (error != 0) {
3070 goto out;
3071 }
3072 #endif
3073
3074 if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
3075 goto out;
3076 }
3077
3078 vfs_get_statfs64(mp, &sfs);
3079 if ((mp->mnt_kern_flag & MNTK_SYSTEM) && !(mp->mnt_kern_flag & MNTK_SWAP_MOUNT) && !(mp->mnt_flag & MNT_ROOTFS) &&
3080 (p->p_vfs_iopolicy & P_VFS_IOPOLICY_STATFS_NO_DATA_VOLUME)) {
3081 /* This process does not want to see a seperate data volume mountpoint */
3082 strlcpy(&sfs.f_mntonname[0], "/", sizeof("/"));
3083 }
3084 error = copyout(&sfs, uap->buf, sizeof(sfs));
3085
3086 out:
3087 file_drop(uap->fd);
3088 vnode_put(vp);
3089
3090 return error;
3091 }
3092
3093 struct getfsstat_struct {
3094 user_addr_t sfsp;
3095 user_addr_t *mp;
3096 int count;
3097 int maxcount;
3098 int flags;
3099 int error;
3100 };
3101
3102
3103 static int
3104 getfsstat_callback(mount_t mp, void * arg)
3105 {
3106 struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
3107 struct vfsstatfs *sp;
3108 int error, my_size;
3109 vfs_context_t ctx = vfs_context_current();
3110
3111 if (fstp->sfsp && fstp->count < fstp->maxcount) {
3112 #if CONFIG_MACF
3113 error = mac_mount_check_stat(ctx, mp);
3114 if (error != 0) {
3115 fstp->error = error;
3116 return VFS_RETURNED_DONE;
3117 }
3118 #endif
3119 sp = &mp->mnt_vfsstat;
3120 /*
3121 * If MNT_NOWAIT is specified, do not refresh the
3122 * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT.
3123 */
3124 if ((mp->mnt_lflag & MNT_LDEAD) ||
3125 (((fstp->flags & MNT_NOWAIT) == 0 || (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
3126 (!(mp->mnt_lflag & MNT_LUNMOUNT)) &&
3127 (error = vfs_update_vfsstat(mp, ctx, VFS_USER_EVENT)))) {
3128 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
3129 return VFS_RETURNED;
3130 }
3131
3132 /*
3133 * Need to handle LP64 version of struct statfs
3134 */
3135 error = munge_statfs(mp, sp, fstp->sfsp, &my_size, IS_64BIT_PROCESS(vfs_context_proc(ctx)), FALSE);
3136 if (error) {
3137 fstp->error = error;
3138 return VFS_RETURNED_DONE;
3139 }
3140 fstp->sfsp += my_size;
3141
3142 if (fstp->mp) {
3143 #if CONFIG_MACF
3144 error = mac_mount_label_get(mp, *fstp->mp);
3145 if (error) {
3146 fstp->error = error;
3147 return VFS_RETURNED_DONE;
3148 }
3149 #endif
3150 fstp->mp++;
3151 }
3152 }
3153 fstp->count++;
3154 return VFS_RETURNED;
3155 }
3156
3157 /*
3158 * Get statistics on all filesystems.
3159 */
3160 int
3161 getfsstat(__unused proc_t p, struct getfsstat_args *uap, int *retval)
3162 {
3163 struct __mac_getfsstat_args muap;
3164
3165 muap.buf = uap->buf;
3166 muap.bufsize = uap->bufsize;
3167 muap.mac = USER_ADDR_NULL;
3168 muap.macsize = 0;
3169 muap.flags = uap->flags;
3170
3171 return __mac_getfsstat(p, &muap, retval);
3172 }
3173
3174 /*
3175 * __mac_getfsstat: Get MAC-related file system statistics
3176 *
3177 * Parameters: p (ignored)
3178 * uap User argument descriptor (see below)
3179 * retval Count of file system statistics (N stats)
3180 *
3181 * Indirect: uap->bufsize Buffer size
3182 * uap->macsize MAC info size
3183 * uap->buf Buffer where information will be returned
3184 * uap->mac MAC info
3185 * uap->flags File system flags
3186 *
3187 *
3188 * Returns: 0 Success
3189 * !0 Not success
3190 *
3191 */
3192 int
3193 __mac_getfsstat(__unused proc_t p, struct __mac_getfsstat_args *uap, int *retval)
3194 {
3195 user_addr_t sfsp;
3196 user_addr_t *mp;
3197 size_t count, maxcount, bufsize, macsize;
3198 struct getfsstat_struct fst;
3199
3200 if ((unsigned)uap->bufsize > INT_MAX || (unsigned)uap->macsize > INT_MAX) {
3201 return EINVAL;
3202 }
3203
3204 bufsize = (size_t) uap->bufsize;
3205 macsize = (size_t) uap->macsize;
3206
3207 if (IS_64BIT_PROCESS(p)) {
3208 maxcount = bufsize / sizeof(struct user64_statfs);
3209 } else {
3210 maxcount = bufsize / sizeof(struct user32_statfs);
3211 }
3212 sfsp = uap->buf;
3213 count = 0;
3214
3215 mp = NULL;
3216
3217 #if CONFIG_MACF
3218 if (uap->mac != USER_ADDR_NULL) {
3219 u_int32_t *mp0;
3220 int error;
3221 unsigned int i;
3222
3223 count = (macsize / (IS_64BIT_PROCESS(p) ? 8 : 4));
3224 if (count != maxcount) {
3225 return EINVAL;
3226 }
3227
3228 /* Copy in the array */
3229 MALLOC(mp0, u_int32_t *, macsize, M_MACTEMP, M_WAITOK);
3230 if (mp0 == NULL) {
3231 return ENOMEM;
3232 }
3233
3234 error = copyin(uap->mac, mp0, macsize);
3235 if (error) {
3236 FREE(mp0, M_MACTEMP);
3237 return error;
3238 }
3239
3240 /* Normalize to an array of user_addr_t */
3241 MALLOC(mp, user_addr_t *, count * sizeof(user_addr_t), M_MACTEMP, M_WAITOK);
3242 if (mp == NULL) {
3243 FREE(mp0, M_MACTEMP);
3244 return ENOMEM;
3245 }
3246
3247 for (i = 0; i < count; i++) {
3248 if (IS_64BIT_PROCESS(p)) {
3249 mp[i] = ((user_addr_t *)mp0)[i];
3250 } else {
3251 mp[i] = (user_addr_t)mp0[i];
3252 }
3253 }
3254 FREE(mp0, M_MACTEMP);
3255 }
3256 #endif
3257
3258
3259 fst.sfsp = sfsp;
3260 fst.mp = mp;
3261 fst.flags = uap->flags;
3262 fst.count = 0;
3263 fst.error = 0;
3264 fst.maxcount = maxcount;
3265
3266
3267 vfs_iterate(VFS_ITERATE_NOSKIP_UNMOUNT, getfsstat_callback, &fst);
3268
3269 if (mp) {
3270 FREE(mp, M_MACTEMP);
3271 }
3272
3273 if (fst.error) {
3274 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
3275 return fst.error;
3276 }
3277
3278 if (fst.sfsp && fst.count > fst.maxcount) {
3279 *retval = fst.maxcount;
3280 } else {
3281 *retval = fst.count;
3282 }
3283 return 0;
3284 }
3285
3286 static int
3287 getfsstat64_callback(mount_t mp, void * arg)
3288 {
3289 struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
3290 struct vfsstatfs *sp;
3291 struct statfs64 sfs;
3292 int error;
3293
3294 if (fstp->sfsp && fstp->count < fstp->maxcount) {
3295 #if CONFIG_MACF
3296 error = mac_mount_check_stat(vfs_context_current(), mp);
3297 if (error != 0) {
3298 fstp->error = error;
3299 return VFS_RETURNED_DONE;
3300 }
3301 #endif
3302 sp = &mp->mnt_vfsstat;
3303 /*
3304 * If MNT_NOWAIT is specified, do not refresh the fsstat
3305 * cache. MNT_WAIT overrides MNT_NOWAIT.
3306 *
3307 * We treat MNT_DWAIT as MNT_WAIT for all instances of
3308 * getfsstat, since the constants are out of the same
3309 * namespace.
3310 */
3311 if ((mp->mnt_lflag & MNT_LDEAD) ||
3312 ((((fstp->flags & MNT_NOWAIT) == 0) || (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
3313 (!(mp->mnt_lflag & MNT_LUNMOUNT)) &&
3314 (error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)))) {
3315 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
3316 return VFS_RETURNED;
3317 }
3318
3319 vfs_get_statfs64(mp, &sfs);
3320 error = copyout(&sfs, fstp->sfsp, sizeof(sfs));
3321 if (error) {
3322 fstp->error = error;
3323 return VFS_RETURNED_DONE;
3324 }
3325 fstp->sfsp += sizeof(sfs);
3326 }
3327 fstp->count++;
3328 return VFS_RETURNED;
3329 }
3330
3331 /*
3332 * Get statistics on all file systems in 64 bit mode.
3333 */
3334 int
3335 getfsstat64(__unused proc_t p, struct getfsstat64_args *uap, int *retval)
3336 {
3337 user_addr_t sfsp;
3338 int count, maxcount;
3339 struct getfsstat_struct fst;
3340
3341 maxcount = uap->bufsize / sizeof(struct statfs64);
3342
3343 sfsp = uap->buf;
3344 count = 0;
3345
3346 fst.sfsp = sfsp;
3347 fst.flags = uap->flags;
3348 fst.count = 0;
3349 fst.error = 0;
3350 fst.maxcount = maxcount;
3351
3352 vfs_iterate(VFS_ITERATE_NOSKIP_UNMOUNT, getfsstat64_callback, &fst);
3353
3354 if (fst.error) {
3355 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
3356 return fst.error;
3357 }
3358
3359 if (fst.sfsp && fst.count > fst.maxcount) {
3360 *retval = fst.maxcount;
3361 } else {
3362 *retval = fst.count;
3363 }
3364
3365 return 0;
3366 }
3367
3368 /*
3369 * gets the associated vnode with the file descriptor passed.
3370 * as input
3371 *
3372 * INPUT
3373 * ctx - vfs context of caller
3374 * fd - file descriptor for which vnode is required.
3375 * vpp - Pointer to pointer to vnode to be returned.
3376 *
3377 * The vnode is returned with an iocount so any vnode obtained
3378 * by this call needs a vnode_put
3379 *
3380 */
3381 int
3382 vnode_getfromfd(vfs_context_t ctx, int fd, vnode_t *vpp)
3383 {
3384 int error;
3385 vnode_t vp;
3386 struct fileproc *fp;
3387 proc_t p = vfs_context_proc(ctx);
3388
3389 *vpp = NULLVP;
3390
3391 error = fp_getfvp(p, fd, &fp, &vp);
3392 if (error) {
3393 return error;
3394 }
3395
3396 error = vnode_getwithref(vp);
3397 if (error) {
3398 (void)fp_drop(p, fd, fp, 0);
3399 return error;
3400 }
3401
3402 (void)fp_drop(p, fd, fp, 0);
3403 *vpp = vp;
3404 return error;
3405 }
3406
3407 /*
3408 * Wrapper function around namei to start lookup from a directory
3409 * specified by a file descriptor ni_dirfd.
3410 *
3411 * In addition to all the errors returned by namei, this call can
3412 * return ENOTDIR if the file descriptor does not refer to a directory.
3413 * and EBADF if the file descriptor is not valid.
3414 */
3415 int
3416 nameiat(struct nameidata *ndp, int dirfd)
3417 {
3418 if ((dirfd != AT_FDCWD) &&
3419 !(ndp->ni_flag & NAMEI_CONTLOOKUP) &&
3420 !(ndp->ni_cnd.cn_flags & USEDVP)) {
3421 int error = 0;
3422 char c;
3423
3424 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
3425 error = copyin(ndp->ni_dirp, &c, sizeof(char));
3426 if (error) {
3427 return error;
3428 }
3429 } else {
3430 c = *((char *)(ndp->ni_dirp));
3431 }
3432
3433 if (c != '/') {
3434 vnode_t dvp_at;
3435
3436 error = vnode_getfromfd(ndp->ni_cnd.cn_context, dirfd,
3437 &dvp_at);
3438 if (error) {
3439 return error;
3440 }
3441
3442 if (vnode_vtype(dvp_at) != VDIR) {
3443 vnode_put(dvp_at);
3444 return ENOTDIR;
3445 }
3446
3447 ndp->ni_dvp = dvp_at;
3448 ndp->ni_cnd.cn_flags |= USEDVP;
3449 error = namei(ndp);
3450 ndp->ni_cnd.cn_flags &= ~USEDVP;
3451 vnode_put(dvp_at);
3452 return error;
3453 }
3454 }
3455
3456 return namei(ndp);
3457 }
3458
3459 /*
3460 * Change current working directory to a given file descriptor.
3461 */
3462 /* ARGSUSED */
3463 static int
3464 common_fchdir(proc_t p, struct fchdir_args *uap, int per_thread)
3465 {
3466 struct filedesc *fdp = p->p_fd;
3467 vnode_t vp;
3468 vnode_t tdp;
3469 vnode_t tvp;
3470 struct mount *mp;
3471 int error;
3472 vfs_context_t ctx = vfs_context_current();
3473
3474 AUDIT_ARG(fd, uap->fd);
3475 if (per_thread && uap->fd == -1) {
3476 /*
3477 * Switching back from per-thread to per process CWD; verify we
3478 * in fact have one before proceeding. The only success case
3479 * for this code path is to return 0 preemptively after zapping
3480 * the thread structure contents.
3481 */
3482 thread_t th = vfs_context_thread(ctx);
3483 if (th) {
3484 uthread_t uth = get_bsdthread_info(th);
3485 tvp = uth->uu_cdir;
3486 uth->uu_cdir = NULLVP;
3487 if (tvp != NULLVP) {
3488 vnode_rele(tvp);
3489 return 0;
3490 }
3491 }
3492 return EBADF;
3493 }
3494
3495 if ((error = file_vnode(uap->fd, &vp))) {
3496 return error;
3497 }
3498 if ((error = vnode_getwithref(vp))) {
3499 file_drop(uap->fd);
3500 return error;
3501 }
3502
3503 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
3504
3505 if (vp->v_type != VDIR) {
3506 error = ENOTDIR;
3507 goto out;
3508 }
3509
3510 #if CONFIG_MACF
3511 error = mac_vnode_check_chdir(ctx, vp);
3512 if (error) {
3513 goto out;
3514 }
3515 #endif
3516 error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
3517 if (error) {
3518 goto out;
3519 }
3520
3521 while (!error && (mp = vp->v_mountedhere) != NULL) {
3522 if (vfs_busy(mp, LK_NOWAIT)) {
3523 error = EACCES;
3524 goto out;
3525 }
3526 error = VFS_ROOT(mp, &tdp, ctx);
3527 vfs_unbusy(mp);
3528 if (error) {
3529 break;
3530 }
3531 vnode_put(vp);
3532 vp = tdp;
3533 }
3534 if (error) {
3535 goto out;
3536 }
3537 if ((error = vnode_ref(vp))) {
3538 goto out;
3539 }
3540 vnode_put(vp);
3541
3542 if (per_thread) {
3543 thread_t th = vfs_context_thread(ctx);
3544 if (th) {
3545 uthread_t uth = get_bsdthread_info(th);
3546 tvp = uth->uu_cdir;
3547 uth->uu_cdir = vp;
3548 OSBitOrAtomic(P_THCWD, &p->p_flag);
3549 } else {
3550 vnode_rele(vp);
3551 return ENOENT;
3552 }
3553 } else {
3554 proc_fdlock(p);
3555 tvp = fdp->fd_cdir;
3556 fdp->fd_cdir = vp;
3557 proc_fdunlock(p);
3558 }
3559
3560 if (tvp) {
3561 vnode_rele(tvp);
3562 }
3563 file_drop(uap->fd);
3564
3565 return 0;
3566 out:
3567 vnode_put(vp);
3568 file_drop(uap->fd);
3569
3570 return error;
3571 }
3572
3573 int
3574 fchdir(proc_t p, struct fchdir_args *uap, __unused int32_t *retval)
3575 {
3576 return common_fchdir(p, uap, 0);
3577 }
3578
3579 int
3580 __pthread_fchdir(proc_t p, struct __pthread_fchdir_args *uap, __unused int32_t *retval)
3581 {
3582 return common_fchdir(p, (void *)uap, 1);
3583 }
3584
3585
3586 /*
3587 * Change current working directory (".").
3588 *
3589 * Returns: 0 Success
3590 * change_dir:ENOTDIR
3591 * change_dir:???
3592 * vnode_ref:ENOENT No such file or directory
3593 */
3594 /* ARGSUSED */
3595 int
3596 chdir_internal(proc_t p, vfs_context_t ctx, struct nameidata *ndp, int per_thread)
3597 {
3598 struct filedesc *fdp = p->p_fd;
3599 int error;
3600 vnode_t tvp;
3601
3602 error = change_dir(ndp, ctx);
3603 if (error) {
3604 return error;
3605 }
3606 if ((error = vnode_ref(ndp->ni_vp))) {
3607 vnode_put(ndp->ni_vp);
3608 return error;
3609 }
3610 /*
3611 * drop the iocount we picked up in change_dir
3612 */
3613 vnode_put(ndp->ni_vp);
3614
3615 if (per_thread) {
3616 thread_t th = vfs_context_thread(ctx);
3617 if (th) {
3618 uthread_t uth = get_bsdthread_info(th);
3619 tvp = uth->uu_cdir;
3620 uth->uu_cdir = ndp->ni_vp;
3621 OSBitOrAtomic(P_THCWD, &p->p_flag);
3622 } else {
3623 vnode_rele(ndp->ni_vp);
3624 return ENOENT;
3625 }
3626 } else {
3627 proc_fdlock(p);
3628 tvp = fdp->fd_cdir;
3629 fdp->fd_cdir = ndp->ni_vp;
3630 proc_fdunlock(p);
3631 }
3632
3633 if (tvp) {
3634 vnode_rele(tvp);
3635 }
3636
3637 return 0;
3638 }
3639
3640
3641 /*
3642 * Change current working directory (".").
3643 *
3644 * Returns: 0 Success
3645 * chdir_internal:ENOTDIR
3646 * chdir_internal:ENOENT No such file or directory
3647 * chdir_internal:???
3648 */
3649 /* ARGSUSED */
3650 static int
3651 common_chdir(proc_t p, struct chdir_args *uap, int per_thread)
3652 {
3653 struct nameidata nd;
3654 vfs_context_t ctx = vfs_context_current();
3655
3656 NDINIT(&nd, LOOKUP, OP_CHDIR, FOLLOW | AUDITVNPATH1,
3657 UIO_USERSPACE, uap->path, ctx);
3658
3659 return chdir_internal(p, ctx, &nd, per_thread);
3660 }
3661
3662
3663 /*
3664 * chdir
3665 *
3666 * Change current working directory (".") for the entire process
3667 *
3668 * Parameters: p Process requesting the call
3669 * uap User argument descriptor (see below)
3670 * retval (ignored)
3671 *
3672 * Indirect parameters: uap->path Directory path
3673 *
3674 * Returns: 0 Success
3675 * common_chdir: ENOTDIR
3676 * common_chdir: ENOENT No such file or directory
3677 * common_chdir: ???
3678 *
3679 */
3680 int
3681 chdir(proc_t p, struct chdir_args *uap, __unused int32_t *retval)
3682 {
3683 return common_chdir(p, (void *)uap, 0);
3684 }
3685
3686 /*
3687 * __pthread_chdir
3688 *
3689 * Change current working directory (".") for a single thread
3690 *
3691 * Parameters: p Process requesting the call
3692 * uap User argument descriptor (see below)
3693 * retval (ignored)
3694 *
3695 * Indirect parameters: uap->path Directory path
3696 *
3697 * Returns: 0 Success
3698 * common_chdir: ENOTDIR
3699 * common_chdir: ENOENT No such file or directory
3700 * common_chdir: ???
3701 *
3702 */
3703 int
3704 __pthread_chdir(proc_t p, struct __pthread_chdir_args *uap, __unused int32_t *retval)
3705 {
3706 return common_chdir(p, (void *)uap, 1);
3707 }
3708
3709
3710 /*
3711 * Change notion of root (``/'') directory.
3712 */
3713 /* ARGSUSED */
3714 int
3715 chroot(proc_t p, struct chroot_args *uap, __unused int32_t *retval)
3716 {
3717 struct filedesc *fdp = p->p_fd;
3718 int error;
3719 struct nameidata nd;
3720 vnode_t tvp;
3721 vfs_context_t ctx = vfs_context_current();
3722
3723 if ((error = suser(kauth_cred_get(), &p->p_acflag))) {
3724 return error;
3725 }
3726
3727 NDINIT(&nd, LOOKUP, OP_CHROOT, FOLLOW | AUDITVNPATH1,
3728 UIO_USERSPACE, uap->path, ctx);
3729 error = change_dir(&nd, ctx);
3730 if (error) {
3731 return error;
3732 }
3733
3734 #if CONFIG_MACF
3735 error = mac_vnode_check_chroot(ctx, nd.ni_vp,
3736 &nd.ni_cnd);
3737 if (error) {
3738 vnode_put(nd.ni_vp);
3739 return error;
3740 }
3741 #endif
3742
3743 if ((error = vnode_ref(nd.ni_vp))) {
3744 vnode_put(nd.ni_vp);
3745 return error;
3746 }
3747 vnode_put(nd.ni_vp);
3748
3749 proc_fdlock(p);
3750 tvp = fdp->fd_rdir;
3751 fdp->fd_rdir = nd.ni_vp;
3752 fdp->fd_flags |= FD_CHROOT;
3753 proc_fdunlock(p);
3754
3755 if (tvp != NULL) {
3756 vnode_rele(tvp);
3757 }
3758
3759 return 0;
3760 }
3761
3762 /*
3763 * Common routine for chroot and chdir.
3764 *
3765 * Returns: 0 Success
3766 * ENOTDIR Not a directory
3767 * namei:??? [anything namei can return]
3768 * vnode_authorize:??? [anything vnode_authorize can return]
3769 */
3770 static int
3771 change_dir(struct nameidata *ndp, vfs_context_t ctx)
3772 {
3773 vnode_t vp;
3774 int error;
3775
3776 if ((error = namei(ndp))) {
3777 return error;
3778 }
3779 nameidone(ndp);
3780 vp = ndp->ni_vp;
3781
3782 if (vp->v_type != VDIR) {
3783 vnode_put(vp);
3784 return ENOTDIR;
3785 }
3786
3787 #if CONFIG_MACF
3788 error = mac_vnode_check_chdir(ctx, vp);
3789 if (error) {
3790 vnode_put(vp);
3791 return error;
3792 }
3793 #endif
3794
3795 error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
3796 if (error) {
3797 vnode_put(vp);
3798 return error;
3799 }
3800
3801 return error;
3802 }
3803
3804 /*
3805 * Free the vnode data (for directories) associated with the file glob.
3806 */
3807 struct fd_vn_data *
3808 fg_vn_data_alloc(void)
3809 {
3810 struct fd_vn_data *fvdata;
3811
3812 /* Allocate per fd vnode data */
3813 MALLOC(fvdata, struct fd_vn_data *, (sizeof(struct fd_vn_data)),
3814 M_FD_VN_DATA, M_WAITOK | M_ZERO);
3815 lck_mtx_init(&fvdata->fv_lock, fd_vn_lck_grp, fd_vn_lck_attr);
3816 return fvdata;
3817 }
3818
3819 /*
3820 * Free the vnode data (for directories) associated with the file glob.
3821 */
3822 void
3823 fg_vn_data_free(void *fgvndata)
3824 {
3825 struct fd_vn_data *fvdata = (struct fd_vn_data *)fgvndata;
3826
3827 if (fvdata->fv_buf) {
3828 FREE(fvdata->fv_buf, M_FD_DIRBUF);
3829 }
3830 lck_mtx_destroy(&fvdata->fv_lock, fd_vn_lck_grp);
3831 FREE(fvdata, M_FD_VN_DATA);
3832 }
3833
3834 /*
3835 * Check permissions, allocate an open file structure,
3836 * and call the device open routine if any.
3837 *
3838 * Returns: 0 Success
3839 * EINVAL
3840 * EINTR
3841 * falloc:ENFILE
3842 * falloc:EMFILE
3843 * falloc:ENOMEM
3844 * vn_open_auth:???
3845 * dupfdopen:???
3846 * VNOP_ADVLOCK:???
3847 * vnode_setsize:???
3848 *
3849 * XXX Need to implement uid, gid
3850 */
3851 int
3852 open1(vfs_context_t ctx, struct nameidata *ndp, int uflags,
3853 struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra,
3854 int32_t *retval)
3855 {
3856 proc_t p = vfs_context_proc(ctx);
3857 uthread_t uu = get_bsdthread_info(vfs_context_thread(ctx));
3858 struct fileproc *fp;
3859 vnode_t vp;
3860 int flags, oflags;
3861 int type, indx, error;
3862 struct flock lf;
3863 struct vfs_context context;
3864
3865 oflags = uflags;
3866
3867 if ((oflags & O_ACCMODE) == O_ACCMODE) {
3868 return EINVAL;
3869 }
3870
3871 flags = FFLAGS(uflags);
3872 CLR(flags, FENCRYPTED);
3873 CLR(flags, FUNENCRYPTED);
3874
3875 AUDIT_ARG(fflags, oflags);
3876 AUDIT_ARG(mode, vap->va_mode);
3877
3878 if ((error = falloc_withalloc(p,
3879 &fp, &indx, ctx, fp_zalloc, cra)) != 0) {
3880 return error;
3881 }
3882 uu->uu_dupfd = -indx - 1;
3883
3884 if ((error = vn_open_auth(ndp, &flags, vap))) {
3885 if ((error == ENODEV || error == ENXIO) && (uu->uu_dupfd >= 0)) { /* XXX from fdopen */
3886 if ((error = dupfdopen(p->p_fd, indx, uu->uu_dupfd, flags, error)) == 0) {
3887 fp_drop(p, indx, NULL, 0);
3888 *retval = indx;
3889 return 0;
3890 }
3891 }
3892 if (error == ERESTART) {
3893 error = EINTR;
3894 }
3895 fp_free(p, indx, fp);
3896 return error;
3897 }
3898 uu->uu_dupfd = 0;
3899 vp = ndp->ni_vp;
3900
3901 fp->f_fglob->fg_flag = flags & (FMASK | O_EVTONLY | FENCRYPTED | FUNENCRYPTED);
3902 fp->f_fglob->fg_ops = &vnops;
3903 fp->f_fglob->fg_data = (caddr_t)vp;
3904
3905 if (flags & (O_EXLOCK | O_SHLOCK)) {
3906 lf.l_whence = SEEK_SET;
3907 lf.l_start = 0;
3908 lf.l_len = 0;
3909 if (flags & O_EXLOCK) {
3910 lf.l_type = F_WRLCK;
3911 } else {
3912 lf.l_type = F_RDLCK;
3913 }
3914 type = F_FLOCK;
3915 if ((flags & FNONBLOCK) == 0) {
3916 type |= F_WAIT;
3917 }
3918 #if CONFIG_MACF
3919 error = mac_file_check_lock(vfs_context_ucred(ctx), fp->f_fglob,
3920 F_SETLK, &lf);
3921 if (error) {
3922 goto bad;
3923 }
3924 #endif
3925 if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx, NULL))) {
3926 goto bad;
3927 }
3928 fp->f_fglob->fg_flag |= FHASLOCK;
3929 }
3930
3931 /* try to truncate by setting the size attribute */
3932 if ((flags & O_TRUNC) && ((error = vnode_setsize(vp, (off_t)0, 0, ctx)) != 0)) {
3933 goto bad;
3934 }
3935
3936 /*
3937 * For directories we hold some additional information in the fd.
3938 */
3939 if (vnode_vtype(vp) == VDIR) {
3940 fp->f_fglob->fg_vn_data = fg_vn_data_alloc();
3941 } else {
3942 fp->f_fglob->fg_vn_data = NULL;
3943 }
3944
3945 vnode_put(vp);
3946
3947 /*
3948 * The first terminal open (without a O_NOCTTY) by a session leader
3949 * results in it being set as the controlling terminal.
3950 */
3951 if (vnode_istty(vp) && !(p->p_flag & P_CONTROLT) &&
3952 !(flags & O_NOCTTY)) {
3953 int tmp = 0;
3954
3955 (void)(*fp->f_fglob->fg_ops->fo_ioctl)(fp, (int)TIOCSCTTY,
3956 (caddr_t)&tmp, ctx);
3957 }
3958
3959 proc_fdlock(p);
3960 if (flags & O_CLOEXEC) {
3961 *fdflags(p, indx) |= UF_EXCLOSE;
3962 }
3963 if (flags & O_CLOFORK) {
3964 *fdflags(p, indx) |= UF_FORKCLOSE;
3965 }
3966 procfdtbl_releasefd(p, indx, NULL);
3967
3968 #if CONFIG_SECLUDED_MEMORY
3969 if (secluded_for_filecache &&
3970 FILEGLOB_DTYPE(fp->f_fglob) == DTYPE_VNODE &&
3971 vnode_vtype(vp) == VREG) {
3972 memory_object_control_t moc;
3973
3974 moc = ubc_getobject(vp, UBC_FLAGS_NONE);
3975
3976 if (moc == MEMORY_OBJECT_CONTROL_NULL) {
3977 /* nothing to do... */
3978 } else if (fp->f_fglob->fg_flag & FWRITE) {
3979 /* writable -> no longer eligible for secluded pages */
3980 memory_object_mark_eligible_for_secluded(moc,
3981 FALSE);
3982 } else if (secluded_for_filecache == 1) {
3983 char pathname[32] = { 0, };
3984 size_t copied;
3985 /* XXX FBDP: better way to detect /Applications/ ? */
3986 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
3987 (void)copyinstr(ndp->ni_dirp,
3988 pathname,
3989 sizeof(pathname),
3990 &copied);
3991 } else {
3992 copystr(CAST_DOWN(void *, ndp->ni_dirp),
3993 pathname,
3994 sizeof(pathname),
3995 &copied);
3996 }
3997 pathname[sizeof(pathname) - 1] = '\0';
3998 if (strncmp(pathname,
3999 "/Applications/",
4000 strlen("/Applications/")) == 0 &&
4001 strncmp(pathname,
4002 "/Applications/Camera.app/",
4003 strlen("/Applications/Camera.app/")) != 0) {
4004 /*
4005 * not writable
4006 * AND from "/Applications/"
4007 * AND not from "/Applications/Camera.app/"
4008 * ==> eligible for secluded
4009 */
4010 memory_object_mark_eligible_for_secluded(moc,
4011 TRUE);
4012 }
4013 } else if (secluded_for_filecache == 2) {
4014 #if __arm64__
4015 #define DYLD_SHARED_CACHE_NAME "dyld_shared_cache_arm64"
4016 #elif __arm__
4017 #define DYLD_SHARED_CACHE_NAME "dyld_shared_cache_armv7"
4018 #else
4019 /* not implemented... */
4020 #endif
4021 size_t len = strlen(vp->v_name);
4022 if (!strncmp(vp->v_name, DYLD_SHARED_CACHE_NAME, len) ||
4023 !strncmp(vp->v_name, "dyld", len) ||
4024 !strncmp(vp->v_name, "launchd", len) ||
4025 !strncmp(vp->v_name, "Camera", len) ||
4026 !strncmp(vp->v_name, "mediaserverd", len) ||
4027 !strncmp(vp->v_name, "SpringBoard", len) ||
4028 !strncmp(vp->v_name, "backboardd", len)) {
4029 /*
4030 * This file matters when launching Camera:
4031 * do not store its contents in the secluded
4032 * pool that will be drained on Camera launch.
4033 */
4034 memory_object_mark_eligible_for_secluded(moc,
4035 FALSE);
4036 }
4037 }
4038 }
4039 #endif /* CONFIG_SECLUDED_MEMORY */
4040
4041 fp_drop(p, indx, fp, 1);
4042 proc_fdunlock(p);
4043
4044 *retval = indx;
4045
4046 return 0;
4047 bad:
4048 context = *vfs_context_current();
4049 context.vc_ucred = fp->f_fglob->fg_cred;
4050
4051 if ((fp->f_fglob->fg_flag & FHASLOCK) &&
4052 (FILEGLOB_DTYPE(fp->f_fglob) == DTYPE_VNODE)) {
4053 lf.l_whence = SEEK_SET;
4054 lf.l_start = 0;
4055 lf.l_len = 0;
4056 lf.l_type = F_UNLCK;
4057
4058 (void)VNOP_ADVLOCK(
4059 vp, (caddr_t)fp->f_fglob, F_UNLCK, &lf, F_FLOCK, ctx, NULL);
4060 }
4061
4062 vn_close(vp, fp->f_fglob->fg_flag, &context);
4063 vnode_put(vp);
4064 fp_free(p, indx, fp);
4065
4066 return error;
4067 }
4068
4069 /*
4070 * While most of the *at syscall handlers can call nameiat() which
4071 * is a wrapper around namei, the use of namei and initialisation
4072 * of nameidata are far removed and in different functions - namei
4073 * gets called in vn_open_auth for open1. So we'll just do here what
4074 * nameiat() does.
4075 */
4076 static int
4077 open1at(vfs_context_t ctx, struct nameidata *ndp, int uflags,
4078 struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra, int32_t *retval,
4079 int dirfd)
4080 {
4081 if ((dirfd != AT_FDCWD) && !(ndp->ni_cnd.cn_flags & USEDVP)) {
4082 int error;
4083 char c;
4084
4085 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
4086 error = copyin(ndp->ni_dirp, &c, sizeof(char));
4087 if (error) {
4088 return error;
4089 }
4090 } else {
4091 c = *((char *)(ndp->ni_dirp));
4092 }
4093
4094 if (c != '/') {
4095 vnode_t dvp_at;
4096
4097 error = vnode_getfromfd(ndp->ni_cnd.cn_context, dirfd,
4098 &dvp_at);
4099 if (error) {
4100 return error;
4101 }
4102
4103 if (vnode_vtype(dvp_at) != VDIR) {
4104 vnode_put(dvp_at);
4105 return ENOTDIR;
4106 }
4107
4108 ndp->ni_dvp = dvp_at;
4109 ndp->ni_cnd.cn_flags |= USEDVP;
4110 error = open1(ctx, ndp, uflags, vap, fp_zalloc, cra,
4111 retval);
4112 vnode_put(dvp_at);
4113 return error;
4114 }
4115 }
4116
4117 return open1(ctx, ndp, uflags, vap, fp_zalloc, cra, retval);
4118 }
4119
4120 /*
4121 * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)).
4122 *
4123 * Parameters: p Process requesting the open
4124 * uap User argument descriptor (see below)
4125 * retval Pointer to an area to receive the
4126 * return calue from the system call
4127 *
4128 * Indirect: uap->path Path to open (same as 'open')
4129 * uap->flags Flags to open (same as 'open'
4130 * uap->uid UID to set, if creating
4131 * uap->gid GID to set, if creating
4132 * uap->mode File mode, if creating (same as 'open')
4133 * uap->xsecurity ACL to set, if creating
4134 *
4135 * Returns: 0 Success
4136 * !0 errno value
4137 *
4138 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
4139 *
4140 * XXX: We should enummerate the possible errno values here, and where
4141 * in the code they originated.
4142 */
4143 int
4144 open_extended(proc_t p, struct open_extended_args *uap, int32_t *retval)
4145 {
4146 struct filedesc *fdp = p->p_fd;
4147 int ciferror;
4148 kauth_filesec_t xsecdst;
4149 struct vnode_attr va;
4150 struct nameidata nd;
4151 int cmode;
4152
4153 AUDIT_ARG(owner, uap->uid, uap->gid);
4154
4155 xsecdst = NULL;
4156 if ((uap->xsecurity != USER_ADDR_NULL) &&
4157 ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)) {
4158 return ciferror;
4159 }
4160
4161 VATTR_INIT(&va);
4162 cmode = ((uap->mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
4163 VATTR_SET(&va, va_mode, cmode);
4164 if (uap->uid != KAUTH_UID_NONE) {
4165 VATTR_SET(&va, va_uid, uap->uid);
4166 }
4167 if (uap->gid != KAUTH_GID_NONE) {
4168 VATTR_SET(&va, va_gid, uap->gid);
4169 }
4170 if (xsecdst != NULL) {
4171 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
4172 }
4173
4174 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
4175 uap->path, vfs_context_current());
4176
4177 ciferror = open1(vfs_context_current(), &nd, uap->flags, &va,
4178 fileproc_alloc_init, NULL, retval);
4179 if (xsecdst != NULL) {
4180 kauth_filesec_free(xsecdst);
4181 }
4182
4183 return ciferror;
4184 }
4185
4186 /*
4187 * Go through the data-protected atomically controlled open (2)
4188 *
4189 * int open_dprotected_np(user_addr_t path, int flags, int class, int dpflags, int mode)
4190 */
4191 int
4192 open_dprotected_np(__unused proc_t p, struct open_dprotected_np_args *uap, int32_t *retval)
4193 {
4194 int flags = uap->flags;
4195 int class = uap->class;
4196 int dpflags = uap->dpflags;
4197
4198 /*
4199 * Follow the same path as normal open(2)
4200 * Look up the item if it exists, and acquire the vnode.
4201 */
4202 struct filedesc *fdp = p->p_fd;
4203 struct vnode_attr va;
4204 struct nameidata nd;
4205 int cmode;
4206 int error;
4207
4208 VATTR_INIT(&va);
4209 /* Mask off all but regular access permissions */
4210 cmode = ((uap->mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
4211 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
4212
4213 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
4214 uap->path, vfs_context_current());
4215
4216 /*
4217 * Initialize the extra fields in vnode_attr to pass down our
4218 * extra fields.
4219 * 1. target cprotect class.
4220 * 2. set a flag to mark it as requiring open-raw-encrypted semantics.
4221 */
4222 if (flags & O_CREAT) {
4223 /* lower level kernel code validates that the class is valid before applying it. */
4224 if (class != PROTECTION_CLASS_DEFAULT) {
4225 /*
4226 * PROTECTION_CLASS_DEFAULT implies that we make the class for this
4227 * file behave the same as open (2)
4228 */
4229 VATTR_SET(&va, va_dataprotect_class, class);
4230 }
4231 }
4232
4233 if (dpflags & (O_DP_GETRAWENCRYPTED | O_DP_GETRAWUNENCRYPTED)) {
4234 if (flags & (O_RDWR | O_WRONLY)) {
4235 /* Not allowed to write raw encrypted bytes */
4236 return EINVAL;
4237 }
4238 if (uap->dpflags & O_DP_GETRAWENCRYPTED) {
4239 VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED);
4240 }
4241 if (uap->dpflags & O_DP_GETRAWUNENCRYPTED) {
4242 VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWUNENCRYPTED);
4243 }
4244 }
4245
4246 error = open1(vfs_context_current(), &nd, uap->flags, &va,
4247 fileproc_alloc_init, NULL, retval);
4248
4249 return error;
4250 }
4251
4252 static int
4253 openat_internal(vfs_context_t ctx, user_addr_t path, int flags, int mode,
4254 int fd, enum uio_seg segflg, int *retval)
4255 {
4256 struct filedesc *fdp = (vfs_context_proc(ctx))->p_fd;
4257 struct vnode_attr va;
4258 struct nameidata nd;
4259 int cmode;
4260
4261 VATTR_INIT(&va);
4262 /* Mask off all but regular access permissions */
4263 cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
4264 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
4265
4266 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1,
4267 segflg, path, ctx);
4268
4269 return open1at(ctx, &nd, flags, &va, fileproc_alloc_init, NULL,
4270 retval, fd);
4271 }
4272
4273 int
4274 open(proc_t p, struct open_args *uap, int32_t *retval)
4275 {
4276 __pthread_testcancel(1);
4277 return open_nocancel(p, (struct open_nocancel_args *)uap, retval);
4278 }
4279
4280 int
4281 open_nocancel(__unused proc_t p, struct open_nocancel_args *uap,
4282 int32_t *retval)
4283 {
4284 return openat_internal(vfs_context_current(), uap->path, uap->flags,
4285 uap->mode, AT_FDCWD, UIO_USERSPACE, retval);
4286 }
4287
4288 int
4289 openat_nocancel(__unused proc_t p, struct openat_nocancel_args *uap,
4290 int32_t *retval)
4291 {
4292 return openat_internal(vfs_context_current(), uap->path, uap->flags,
4293 uap->mode, uap->fd, UIO_USERSPACE, retval);
4294 }
4295
4296 int
4297 openat(proc_t p, struct openat_args *uap, int32_t *retval)
4298 {
4299 __pthread_testcancel(1);
4300 return openat_nocancel(p, (struct openat_nocancel_args *)uap, retval);
4301 }
4302
4303 /*
4304 * openbyid_np: open a file given a file system id and a file system object id
4305 * the hfs file system object id is an fsobj_id_t {uint32, uint32}
4306 * file systems that don't support object ids it is a node id (uint64_t).
4307 *
4308 * Parameters: p Process requesting the open
4309 * uap User argument descriptor (see below)
4310 * retval Pointer to an area to receive the
4311 * return calue from the system call
4312 *
4313 * Indirect: uap->path Path to open (same as 'open')
4314 *
4315 * uap->fsid id of target file system
4316 * uap->objid id of target file system object
4317 * uap->flags Flags to open (same as 'open')
4318 *
4319 * Returns: 0 Success
4320 * !0 errno value
4321 *
4322 *
4323 * XXX: We should enummerate the possible errno values here, and where
4324 * in the code they originated.
4325 */
4326 int
4327 openbyid_np(__unused proc_t p, struct openbyid_np_args *uap, int *retval)
4328 {
4329 fsid_t fsid;
4330 uint64_t objid;
4331 int error;
4332 char *buf = NULL;
4333 int buflen = MAXPATHLEN;
4334 int pathlen = 0;
4335 vfs_context_t ctx = vfs_context_current();
4336
4337 if ((error = priv_check_cred(vfs_context_ucred(ctx), PRIV_VFS_OPEN_BY_ID, 0))) {
4338 return error;
4339 }
4340
4341 if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
4342 return error;
4343 }
4344
4345 /*uap->obj is an fsobj_id_t defined as struct {uint32_t, uint32_t} */
4346 if ((error = copyin(uap->objid, (caddr_t)&objid, sizeof(uint64_t)))) {
4347 return error;
4348 }
4349
4350 AUDIT_ARG(value32, fsid.val[0]);
4351 AUDIT_ARG(value64, objid);
4352
4353 /*resolve path from fsis, objid*/
4354 do {
4355 MALLOC(buf, char *, buflen + 1, M_TEMP, M_WAITOK);
4356 if (buf == NULL) {
4357 return ENOMEM;
4358 }
4359
4360 error = fsgetpath_internal( ctx, fsid.val[0], objid, buflen,
4361 buf, FSOPT_ISREALFSID, &pathlen);
4362
4363 if (error) {
4364 FREE(buf, M_TEMP);
4365 buf = NULL;
4366 }
4367 } while (error == ENOSPC && (buflen += MAXPATHLEN));
4368
4369 if (error) {
4370 return error;
4371 }
4372
4373 buf[pathlen] = 0;
4374
4375 error = openat_internal(
4376 ctx, (user_addr_t)buf, uap->oflags, 0, AT_FDCWD, UIO_SYSSPACE, retval);
4377
4378 FREE(buf, M_TEMP);
4379
4380 return error;
4381 }
4382
4383
4384 /*
4385 * Create a special file.
4386 */
4387 static int mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap);
4388
4389 int
4390 mknod(proc_t p, struct mknod_args *uap, __unused int32_t *retval)
4391 {
4392 struct vnode_attr va;
4393 vfs_context_t ctx = vfs_context_current();
4394 int error;
4395 struct nameidata nd;
4396 vnode_t vp, dvp;
4397
4398 VATTR_INIT(&va);
4399 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
4400 VATTR_SET(&va, va_rdev, uap->dev);
4401
4402 /* If it's a mknod() of a FIFO, call mkfifo1() instead */
4403 if ((uap->mode & S_IFMT) == S_IFIFO) {
4404 return mkfifo1(ctx, uap->path, &va);
4405 }
4406
4407 AUDIT_ARG(mode, uap->mode);
4408 AUDIT_ARG(value32, uap->dev);
4409
4410 if ((error = suser(vfs_context_ucred(ctx), &p->p_acflag))) {
4411 return error;
4412 }
4413 NDINIT(&nd, CREATE, OP_MKNOD, LOCKPARENT | AUDITVNPATH1,
4414 UIO_USERSPACE, uap->path, ctx);
4415 error = namei(&nd);
4416 if (error) {
4417 return error;
4418 }
4419 dvp = nd.ni_dvp;
4420 vp = nd.ni_vp;
4421
4422 if (vp != NULL) {
4423 error = EEXIST;
4424 goto out;
4425 }
4426
4427 switch (uap->mode & S_IFMT) {
4428 case S_IFCHR:
4429 VATTR_SET(&va, va_type, VCHR);
4430 break;
4431 case S_IFBLK:
4432 VATTR_SET(&va, va_type, VBLK);
4433 break;
4434 default:
4435 error = EINVAL;
4436 goto out;
4437 }
4438
4439 #if CONFIG_MACF
4440 error = mac_vnode_check_create(ctx,
4441 nd.ni_dvp, &nd.ni_cnd, &va);
4442 if (error) {
4443 goto out;
4444 }
4445 #endif
4446
4447 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0) {
4448 goto out;
4449 }
4450
4451 if ((error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx)) != 0) {
4452 goto out;
4453 }
4454
4455 if (vp) {
4456 int update_flags = 0;
4457
4458 // Make sure the name & parent pointers are hooked up
4459 if (vp->v_name == NULL) {
4460 update_flags |= VNODE_UPDATE_NAME;
4461 }
4462 if (vp->v_parent == NULLVP) {
4463 update_flags |= VNODE_UPDATE_PARENT;
4464 }
4465
4466 if (update_flags) {
4467 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
4468 }
4469
4470 #if CONFIG_FSE
4471 add_fsevent(FSE_CREATE_FILE, ctx,
4472 FSE_ARG_VNODE, vp,
4473 FSE_ARG_DONE);
4474 #endif
4475 }
4476
4477 out:
4478 /*
4479 * nameidone has to happen before we vnode_put(dvp)
4480 * since it may need to release the fs_nodelock on the dvp
4481 */
4482 nameidone(&nd);
4483
4484 if (vp) {
4485 vnode_put(vp);
4486 }
4487 vnode_put(dvp);
4488
4489 return error;
4490 }
4491
4492 /*
4493 * Create a named pipe.
4494 *
4495 * Returns: 0 Success
4496 * EEXIST
4497 * namei:???
4498 * vnode_authorize:???
4499 * vn_create:???
4500 */
4501 static int
4502 mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap)
4503 {
4504 vnode_t vp, dvp;
4505 int error;
4506 struct nameidata nd;
4507
4508 NDINIT(&nd, CREATE, OP_MKFIFO, LOCKPARENT | AUDITVNPATH1,
4509 UIO_USERSPACE, upath, ctx);
4510 error = namei(&nd);
4511 if (error) {
4512 return error;
4513 }
4514 dvp = nd.ni_dvp;
4515 vp = nd.ni_vp;
4516
4517 /* check that this is a new file and authorize addition */
4518 if (vp != NULL) {
4519 error = EEXIST;
4520 goto out;
4521 }
4522 VATTR_SET(vap, va_type, VFIFO);
4523
4524 if ((error = vn_authorize_create(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0) {
4525 goto out;
4526 }
4527
4528 error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx);
4529 out:
4530 /*
4531 * nameidone has to happen before we vnode_put(dvp)
4532 * since it may need to release the fs_nodelock on the dvp
4533 */
4534 nameidone(&nd);
4535
4536 if (vp) {
4537 vnode_put(vp);
4538 }
4539 vnode_put(dvp);
4540
4541 return error;
4542 }
4543
4544
4545 /*
4546 * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)).
4547 *
4548 * Parameters: p Process requesting the open
4549 * uap User argument descriptor (see below)
4550 * retval (Ignored)
4551 *
4552 * Indirect: uap->path Path to fifo (same as 'mkfifo')
4553 * uap->uid UID to set
4554 * uap->gid GID to set
4555 * uap->mode File mode to set (same as 'mkfifo')
4556 * uap->xsecurity ACL to set, if creating
4557 *
4558 * Returns: 0 Success
4559 * !0 errno value
4560 *
4561 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
4562 *
4563 * XXX: We should enummerate the possible errno values here, and where
4564 * in the code they originated.
4565 */
4566 int
4567 mkfifo_extended(proc_t p, struct mkfifo_extended_args *uap, __unused int32_t *retval)
4568 {
4569 int ciferror;
4570 kauth_filesec_t xsecdst;
4571 struct vnode_attr va;
4572
4573 AUDIT_ARG(owner, uap->uid, uap->gid);
4574
4575 xsecdst = KAUTH_FILESEC_NONE;
4576 if (uap->xsecurity != USER_ADDR_NULL) {
4577 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0) {
4578 return ciferror;
4579 }
4580 }
4581
4582 VATTR_INIT(&va);
4583 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
4584 if (uap->uid != KAUTH_UID_NONE) {
4585 VATTR_SET(&va, va_uid, uap->uid);
4586 }
4587 if (uap->gid != KAUTH_GID_NONE) {
4588 VATTR_SET(&va, va_gid, uap->gid);
4589 }
4590 if (xsecdst != KAUTH_FILESEC_NONE) {
4591 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
4592 }
4593
4594 ciferror = mkfifo1(vfs_context_current(), uap->path, &va);
4595
4596 if (xsecdst != KAUTH_FILESEC_NONE) {
4597 kauth_filesec_free(xsecdst);
4598 }
4599 return ciferror;
4600 }
4601
4602 /* ARGSUSED */
4603 int
4604 mkfifo(proc_t p, struct mkfifo_args *uap, __unused int32_t *retval)
4605 {
4606 struct vnode_attr va;
4607
4608 VATTR_INIT(&va);
4609 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
4610
4611 return mkfifo1(vfs_context_current(), uap->path, &va);
4612 }
4613
4614
4615 static char *
4616 my_strrchr(char *p, int ch)
4617 {
4618 char *save;
4619
4620 for (save = NULL;; ++p) {
4621 if (*p == ch) {
4622 save = p;
4623 }
4624 if (!*p) {
4625 return save;
4626 }
4627 }
4628 /* NOTREACHED */
4629 }
4630
4631 extern int safe_getpath_new(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path, int firmlink);
4632 extern int safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path);
4633 extern int safe_getpath_no_firmlink(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path);
4634
4635 int
4636 safe_getpath_new(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path, int firmlink)
4637 {
4638 int ret, len = _len;
4639
4640 *truncated_path = 0;
4641
4642 if (firmlink) {
4643 ret = vn_getpath(dvp, path, &len);
4644 } else {
4645 ret = vn_getpath_no_firmlink(dvp, path, &len);
4646 }
4647 if (ret == 0 && len < (MAXPATHLEN - 1)) {
4648 if (leafname) {
4649 path[len - 1] = '/';
4650 len += strlcpy(&path[len], leafname, MAXPATHLEN - len) + 1;
4651 if (len > MAXPATHLEN) {
4652 char *ptr;
4653
4654 // the string got truncated!
4655 *truncated_path = 1;
4656 ptr = my_strrchr(path, '/');
4657 if (ptr) {
4658 *ptr = '\0'; // chop off the string at the last directory component
4659 }
4660 len = strlen(path) + 1;
4661 }
4662 }
4663 } else if (ret == 0) {
4664 *truncated_path = 1;
4665 } else if (ret != 0) {
4666 struct vnode *mydvp = dvp;
4667
4668 if (ret != ENOSPC) {
4669 printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n",
4670 dvp, dvp->v_name ? dvp->v_name : "no-name", ret);
4671 }
4672 *truncated_path = 1;
4673
4674 do {
4675 if (mydvp->v_parent != NULL) {
4676 mydvp = mydvp->v_parent;
4677 } else if (mydvp->v_mount) {
4678 strlcpy(path, mydvp->v_mount->mnt_vfsstat.f_mntonname, _len);
4679 break;
4680 } else {
4681 // no parent and no mount point? only thing is to punt and say "/" changed
4682 strlcpy(path, "/", _len);
4683 len = 2;
4684 mydvp = NULL;
4685 }
4686
4687 if (mydvp == NULL) {
4688 break;
4689 }
4690
4691 len = _len;
4692 if (firmlink) {
4693 ret = vn_getpath(mydvp, path, &len);
4694 } else {
4695 ret = vn_getpath_no_firmlink(mydvp, path, &len);
4696 }
4697 } while (ret == ENOSPC);
4698 }
4699
4700 return len;
4701 }
4702
4703 int
4704 safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path)
4705 {
4706 return safe_getpath_new(dvp, leafname, path, _len, truncated_path, 1);
4707 }
4708
4709 int
4710 safe_getpath_no_firmlink(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path)
4711 {
4712 return safe_getpath_new(dvp, leafname, path, _len, truncated_path, 0);
4713 }
4714
4715 /*
4716 * Make a hard file link.
4717 *
4718 * Returns: 0 Success
4719 * EPERM
4720 * EEXIST
4721 * EXDEV
4722 * namei:???
4723 * vnode_authorize:???
4724 * VNOP_LINK:???
4725 */
4726 /* ARGSUSED */
4727 static int
4728 linkat_internal(vfs_context_t ctx, int fd1, user_addr_t path, int fd2,
4729 user_addr_t link, int flag, enum uio_seg segflg)
4730 {
4731 vnode_t vp, pvp, dvp, lvp;
4732 struct nameidata nd;
4733 int follow;
4734 int error;
4735 #if CONFIG_FSE
4736 fse_info finfo;
4737 #endif
4738 int need_event, has_listeners, need_kpath2;
4739 char *target_path = NULL;
4740 int truncated = 0;
4741
4742 vp = dvp = lvp = NULLVP;
4743
4744 /* look up the object we are linking to */
4745 follow = (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW;
4746 NDINIT(&nd, LOOKUP, OP_LOOKUP, AUDITVNPATH1 | follow,
4747 segflg, path, ctx);
4748
4749 error = nameiat(&nd, fd1);
4750 if (error) {
4751 return error;
4752 }
4753 vp = nd.ni_vp;
4754
4755 nameidone(&nd);
4756
4757 /*
4758 * Normally, linking to directories is not supported.
4759 * However, some file systems may have limited support.
4760 */
4761 if (vp->v_type == VDIR) {
4762 if (!ISSET(vp->v_mount->mnt_kern_flag, MNTK_DIR_HARDLINKS)) {
4763 error = EPERM; /* POSIX */
4764 goto out;
4765 }
4766
4767 /* Linking to a directory requires ownership. */
4768 if (!kauth_cred_issuser(vfs_context_ucred(ctx))) {
4769 struct vnode_attr dva;
4770
4771 VATTR_INIT(&dva);
4772 VATTR_WANTED(&dva, va_uid);
4773 if (vnode_getattr(vp, &dva, ctx) != 0 ||
4774 !VATTR_IS_SUPPORTED(&dva, va_uid) ||
4775 (dva.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)))) {
4776 error = EACCES;
4777 goto out;
4778 }
4779 }
4780 }
4781
4782 /* lookup the target node */
4783 #if CONFIG_TRIGGERS
4784 nd.ni_op = OP_LINK;
4785 #endif
4786 nd.ni_cnd.cn_nameiop = CREATE;
4787 nd.ni_cnd.cn_flags = LOCKPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK;
4788 nd.ni_dirp = link;
4789 error = nameiat(&nd, fd2);
4790 if (error != 0) {
4791 goto out;
4792 }
4793 dvp = nd.ni_dvp;
4794 lvp = nd.ni_vp;
4795
4796 #if CONFIG_MACF
4797 if ((error = mac_vnode_check_link(ctx, dvp, vp, &nd.ni_cnd)) != 0) {
4798 goto out2;
4799 }
4800 #endif
4801
4802 /* or to anything that kauth doesn't want us to (eg. immutable items) */
4803 if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_LINKTARGET, ctx)) != 0) {
4804 goto out2;
4805 }
4806
4807 /* target node must not exist */
4808 if (lvp != NULLVP) {
4809 error = EEXIST;
4810 goto out2;
4811 }
4812 /* cannot link across mountpoints */
4813 if (vnode_mount(vp) != vnode_mount(dvp)) {
4814 error = EXDEV;
4815 goto out2;
4816 }
4817
4818 /* authorize creation of the target note */
4819 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0) {
4820 goto out2;
4821 }
4822
4823 /* and finally make the link */
4824 error = VNOP_LINK(vp, dvp, &nd.ni_cnd, ctx);
4825 if (error) {
4826 goto out2;
4827 }
4828
4829 #if CONFIG_MACF
4830 (void)mac_vnode_notify_link(ctx, vp, dvp, &nd.ni_cnd);
4831 #endif
4832
4833 #if CONFIG_FSE
4834 need_event = need_fsevent(FSE_CREATE_FILE, dvp);
4835 #else
4836 need_event = 0;
4837 #endif
4838 has_listeners = kauth_authorize_fileop_has_listeners();
4839
4840 need_kpath2 = 0;
4841 #if CONFIG_AUDIT
4842 if (AUDIT_RECORD_EXISTS()) {
4843 need_kpath2 = 1;
4844 }
4845 #endif
4846
4847 if (need_event || has_listeners || need_kpath2) {
4848 char *link_to_path = NULL;
4849 int len, link_name_len;
4850
4851 /* build the path to the new link file */
4852 GET_PATH(target_path);
4853 if (target_path == NULL) {
4854 error = ENOMEM;
4855 goto out2;
4856 }
4857
4858 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, target_path, MAXPATHLEN, &truncated);
4859
4860 AUDIT_ARG(kpath, target_path, ARG_KPATH2);
4861
4862 if (has_listeners) {
4863 /* build the path to file we are linking to */
4864 GET_PATH(link_to_path);
4865 if (link_to_path == NULL) {
4866 error = ENOMEM;
4867 goto out2;
4868 }
4869
4870 link_name_len = MAXPATHLEN;
4871 if (vn_getpath(vp, link_to_path, &link_name_len) == 0) {
4872 /*
4873 * Call out to allow 3rd party notification of rename.
4874 * Ignore result of kauth_authorize_fileop call.
4875 */
4876 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_LINK,
4877 (uintptr_t)link_to_path,
4878 (uintptr_t)target_path);
4879 }
4880 if (link_to_path != NULL) {
4881 RELEASE_PATH(link_to_path);
4882 }
4883 }
4884 #if CONFIG_FSE
4885 if (need_event) {
4886 /* construct fsevent */
4887 if (get_fse_info(vp, &finfo, ctx) == 0) {
4888 if (truncated) {
4889 finfo.mode |= FSE_TRUNCATED_PATH;
4890 }
4891
4892 // build the path to the destination of the link
4893 add_fsevent(FSE_CREATE_FILE, ctx,
4894 FSE_ARG_STRING, len, target_path,
4895 FSE_ARG_FINFO, &finfo,
4896 FSE_ARG_DONE);
4897 }
4898
4899 pvp = vp->v_parent;
4900 // need an iocount on pvp in this case
4901 if (pvp && pvp != dvp) {
4902 error = vnode_get(pvp);
4903 if (error) {
4904 pvp = NULLVP;
4905 error = 0;
4906 }
4907 }
4908 if (pvp) {
4909 add_fsevent(FSE_STAT_CHANGED, ctx,
4910 FSE_ARG_VNODE, pvp, FSE_ARG_DONE);
4911 }
4912 if (pvp && pvp != dvp) {
4913 vnode_put(pvp);
4914 }
4915 }
4916 #endif
4917 }
4918 out2:
4919 /*
4920 * nameidone has to happen before we vnode_put(dvp)
4921 * since it may need to release the fs_nodelock on the dvp
4922 */
4923 nameidone(&nd);
4924 if (target_path != NULL) {
4925 RELEASE_PATH(target_path);
4926 }
4927 out:
4928 if (lvp) {
4929 vnode_put(lvp);
4930 }
4931 if (dvp) {
4932 vnode_put(dvp);
4933 }
4934 vnode_put(vp);
4935 return error;
4936 }
4937
4938 int
4939 link(__unused proc_t p, struct link_args *uap, __unused int32_t *retval)
4940 {
4941 return linkat_internal(vfs_context_current(), AT_FDCWD, uap->path,
4942 AT_FDCWD, uap->link, AT_SYMLINK_FOLLOW, UIO_USERSPACE);
4943 }
4944
4945 int
4946 linkat(__unused proc_t p, struct linkat_args *uap, __unused int32_t *retval)
4947 {
4948 if (uap->flag & ~AT_SYMLINK_FOLLOW) {
4949 return EINVAL;
4950 }
4951
4952 return linkat_internal(vfs_context_current(), uap->fd1, uap->path,
4953 uap->fd2, uap->link, uap->flag, UIO_USERSPACE);
4954 }
4955
4956 /*
4957 * Make a symbolic link.
4958 *
4959 * We could add support for ACLs here too...
4960 */
4961 /* ARGSUSED */
4962 static int
4963 symlinkat_internal(vfs_context_t ctx, user_addr_t path_data, int fd,
4964 user_addr_t link, enum uio_seg segflg)
4965 {
4966 struct vnode_attr va;
4967 char *path;
4968 int error;
4969 struct nameidata nd;
4970 vnode_t vp, dvp;
4971 size_t dummy = 0;
4972 proc_t p;
4973
4974 error = 0;
4975 if (UIO_SEG_IS_USER_SPACE(segflg)) {
4976 MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
4977 error = copyinstr(path_data, path, MAXPATHLEN, &dummy);
4978 } else {
4979 path = (char *)path_data;
4980 }
4981 if (error) {
4982 goto out;
4983 }
4984 AUDIT_ARG(text, path); /* This is the link string */
4985
4986 NDINIT(&nd, CREATE, OP_SYMLINK, LOCKPARENT | AUDITVNPATH1,
4987 segflg, link, ctx);
4988
4989 error = nameiat(&nd, fd);
4990 if (error) {
4991 goto out;
4992 }
4993 dvp = nd.ni_dvp;
4994 vp = nd.ni_vp;
4995
4996 p = vfs_context_proc(ctx);
4997 VATTR_INIT(&va);
4998 VATTR_SET(&va, va_type, VLNK);
4999 VATTR_SET(&va, va_mode, ACCESSPERMS & ~p->p_fd->fd_cmask);
5000
5001 #if CONFIG_MACF
5002 error = mac_vnode_check_create(ctx,
5003 dvp, &nd.ni_cnd, &va);
5004 #endif
5005 if (error != 0) {
5006 goto skipit;
5007 }
5008
5009 if (vp != NULL) {
5010 error = EEXIST;
5011 goto skipit;
5012 }
5013
5014 /* authorize */
5015 if (error == 0) {
5016 error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
5017 }
5018 /* get default ownership, etc. */
5019 if (error == 0) {
5020 error = vnode_authattr_new(dvp, &va, 0, ctx);
5021 }
5022 if (error == 0) {
5023 error = VNOP_SYMLINK(dvp, &vp, &nd.ni_cnd, &va, path, ctx);
5024 }
5025
5026 #if CONFIG_MACF
5027 if (error == 0 && vp) {
5028 error = vnode_label(vnode_mount(vp), dvp, vp, &nd.ni_cnd, VNODE_LABEL_CREATE, ctx);
5029 }
5030 #endif
5031
5032 /* do fallback attribute handling */
5033 if (error == 0 && vp) {
5034 error = vnode_setattr_fallback(vp, &va, ctx);
5035 }
5036
5037 if (error == 0) {
5038 int update_flags = 0;
5039
5040 /*check if a new vnode was created, else try to get one*/
5041 if (vp == NULL) {
5042 nd.ni_cnd.cn_nameiop = LOOKUP;
5043 #if CONFIG_TRIGGERS
5044 nd.ni_op = OP_LOOKUP;
5045 #endif
5046 nd.ni_cnd.cn_flags = 0;
5047 error = nameiat(&nd, fd);
5048 vp = nd.ni_vp;
5049
5050 if (vp == NULL) {
5051 goto skipit;
5052 }
5053 }
5054
5055 #if 0 /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
5056 /* call out to allow 3rd party notification of rename.
5057 * Ignore result of kauth_authorize_fileop call.
5058 */
5059 if (kauth_authorize_fileop_has_listeners() &&
5060 namei(&nd) == 0) {
5061 char *new_link_path = NULL;
5062 int len;
5063
5064 /* build the path to the new link file */
5065 new_link_path = get_pathbuff();
5066 len = MAXPATHLEN;
5067 vn_getpath(dvp, new_link_path, &len);
5068 if ((len + 1 + nd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) {
5069 new_link_path[len - 1] = '/';
5070 strlcpy(&new_link_path[len], nd.ni_cnd.cn_nameptr, MAXPATHLEN - len);
5071 }
5072
5073 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_SYMLINK,
5074 (uintptr_t)path, (uintptr_t)new_link_path);
5075 if (new_link_path != NULL) {
5076 release_pathbuff(new_link_path);
5077 }
5078 }
5079 #endif
5080 // Make sure the name & parent pointers are hooked up
5081 if (vp->v_name == NULL) {
5082 update_flags |= VNODE_UPDATE_NAME;
5083 }
5084 if (vp->v_parent == NULLVP) {
5085 update_flags |= VNODE_UPDATE_PARENT;
5086 }
5087
5088 if (update_flags) {
5089 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
5090 }
5091
5092 #if CONFIG_FSE
5093 add_fsevent(FSE_CREATE_FILE, ctx,
5094 FSE_ARG_VNODE, vp,
5095 FSE_ARG_DONE);
5096 #endif
5097 }
5098
5099 skipit:
5100 /*
5101 * nameidone has to happen before we vnode_put(dvp)
5102 * since it may need to release the fs_nodelock on the dvp
5103 */
5104 nameidone(&nd);
5105
5106 if (vp) {
5107 vnode_put(vp);
5108 }
5109 vnode_put(dvp);
5110 out:
5111 if (path && (path != (char *)path_data)) {
5112 FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
5113 }
5114
5115 return error;
5116 }
5117
5118 int
5119 symlink(__unused proc_t p, struct symlink_args *uap, __unused int32_t *retval)
5120 {
5121 return symlinkat_internal(vfs_context_current(), uap->path, AT_FDCWD,
5122 uap->link, UIO_USERSPACE);
5123 }
5124
5125 int
5126 symlinkat(__unused proc_t p, struct symlinkat_args *uap,
5127 __unused int32_t *retval)
5128 {
5129 return symlinkat_internal(vfs_context_current(), uap->path1, uap->fd,
5130 uap->path2, UIO_USERSPACE);
5131 }
5132
5133 /*
5134 * Delete a whiteout from the filesystem.
5135 * No longer supported.
5136 */
5137 int
5138 undelete(__unused proc_t p, __unused struct undelete_args *uap, __unused int32_t *retval)
5139 {
5140 return ENOTSUP;
5141 }
5142
5143 /*
5144 * Delete a name from the filesystem.
5145 */
5146 /* ARGSUSED */
5147 static int
5148 unlinkat_internal(vfs_context_t ctx, int fd, vnode_t start_dvp,
5149 user_addr_t path_arg, enum uio_seg segflg, int unlink_flags)
5150 {
5151 struct nameidata nd;
5152 vnode_t vp, dvp;
5153 int error;
5154 struct componentname *cnp;
5155 char *path = NULL;
5156 char *no_firmlink_path = NULL;
5157 int len_path = 0;
5158 int len_no_firmlink_path = 0;
5159 #if CONFIG_FSE
5160 fse_info finfo;
5161 struct vnode_attr va;
5162 #endif
5163 int flags;
5164 int need_event;
5165 int has_listeners;
5166 int truncated_path;
5167 int truncated_no_firmlink_path;
5168 int batched;
5169 struct vnode_attr *vap;
5170 int do_retry;
5171 int retry_count = 0;
5172 int cn_flags;
5173
5174 cn_flags = LOCKPARENT;
5175 if (!(unlink_flags & VNODE_REMOVE_NO_AUDIT_PATH)) {
5176 cn_flags |= AUDITVNPATH1;
5177 }
5178 /* If a starting dvp is passed, it trumps any fd passed. */
5179 if (start_dvp) {
5180 cn_flags |= USEDVP;
5181 }
5182
5183 #if NAMEDRSRCFORK
5184 /* unlink or delete is allowed on rsrc forks and named streams */
5185 cn_flags |= CN_ALLOWRSRCFORK;
5186 #endif
5187
5188 retry:
5189 do_retry = 0;
5190 flags = 0;
5191 need_event = 0;
5192 has_listeners = 0;
5193 truncated_path = 0;
5194 truncated_no_firmlink_path = 0;
5195 vap = NULL;
5196
5197 NDINIT(&nd, DELETE, OP_UNLINK, cn_flags, segflg, path_arg, ctx);
5198
5199 nd.ni_dvp = start_dvp;
5200 nd.ni_flag |= NAMEI_COMPOUNDREMOVE;
5201 cnp = &nd.ni_cnd;
5202
5203 continue_lookup:
5204 error = nameiat(&nd, fd);
5205 if (error) {
5206 return error;
5207 }
5208
5209 dvp = nd.ni_dvp;
5210 vp = nd.ni_vp;
5211
5212
5213 /* With Carbon delete semantics, busy files cannot be deleted */
5214 if (unlink_flags & VNODE_REMOVE_NODELETEBUSY) {
5215 flags |= VNODE_REMOVE_NODELETEBUSY;
5216 }
5217
5218 /* Skip any potential upcalls if told to. */
5219 if (unlink_flags & VNODE_REMOVE_SKIP_NAMESPACE_EVENT) {
5220 flags |= VNODE_REMOVE_SKIP_NAMESPACE_EVENT;
5221 }
5222
5223 if (vp) {
5224 batched = vnode_compound_remove_available(vp);
5225 /*
5226 * The root of a mounted filesystem cannot be deleted.
5227 */
5228 if ((vp->v_flag & VROOT) || (dvp->v_mount != vp->v_mount)) {
5229 error = EBUSY;
5230 goto out;
5231 }
5232
5233 #if DEVELOPMENT || DEBUG
5234 /*
5235 * XXX VSWAP: Check for entitlements or special flag here
5236 * so we can restrict access appropriately.
5237 */
5238 #else /* DEVELOPMENT || DEBUG */
5239
5240 if (vnode_isswap(vp) && (ctx != vfs_context_kernel())) {
5241 error = EPERM;
5242 goto out;
5243 }
5244 #endif /* DEVELOPMENT || DEBUG */
5245
5246 if (!batched) {
5247 error = vn_authorize_unlink(dvp, vp, cnp, ctx, NULL);
5248 if (error) {
5249 if (error == ENOENT) {
5250 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
5251 do_retry = 1;
5252 retry_count++;
5253 }
5254 }
5255 goto out;
5256 }
5257 }
5258 } else {
5259 batched = 1;
5260
5261 if (!vnode_compound_remove_available(dvp)) {
5262 panic("No vp, but no compound remove?");
5263 }
5264 }
5265
5266 #if CONFIG_FSE
5267 need_event = need_fsevent(FSE_DELETE, dvp);
5268 if (need_event) {
5269 if (!batched) {
5270 if ((vp->v_flag & VISHARDLINK) == 0) {
5271 /* XXX need to get these data in batched VNOP */
5272 get_fse_info(vp, &finfo, ctx);
5273 }
5274 } else {
5275 error = vfs_get_notify_attributes(&va);
5276 if (error) {
5277 goto out;
5278 }
5279
5280 vap = &va;
5281 }
5282 }
5283 #endif
5284 has_listeners = kauth_authorize_fileop_has_listeners();
5285 if (need_event || has_listeners) {
5286 if (path == NULL) {
5287 GET_PATH(path);
5288 if (path == NULL) {
5289 error = ENOMEM;
5290 goto out;
5291 }
5292 }
5293 len_path = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated_path);
5294 if (no_firmlink_path == NULL) {
5295 GET_PATH(no_firmlink_path);
5296 if (no_firmlink_path == NULL) {
5297 error = ENOMEM;
5298 goto out;
5299 }
5300 }
5301 len_no_firmlink_path = safe_getpath_no_firmlink(dvp, nd.ni_cnd.cn_nameptr, no_firmlink_path, MAXPATHLEN, &truncated_no_firmlink_path);
5302 }
5303
5304 #if NAMEDRSRCFORK
5305 if (nd.ni_cnd.cn_flags & CN_WANTSRSRCFORK) {
5306 error = vnode_removenamedstream(dvp, vp, XATTR_RESOURCEFORK_NAME, 0, ctx);
5307 } else
5308 #endif
5309 {
5310 error = vn_remove(dvp, &nd.ni_vp, &nd, flags, vap, ctx);
5311 vp = nd.ni_vp;
5312 if (error == EKEEPLOOKING) {
5313 if (!batched) {
5314 panic("EKEEPLOOKING, but not a filesystem that supports compound VNOPs?");
5315 }
5316
5317 if ((nd.ni_flag & NAMEI_CONTLOOKUP) == 0) {
5318 panic("EKEEPLOOKING, but continue flag not set?");
5319 }
5320
5321 if (vnode_isdir(vp)) {
5322 error = EISDIR;
5323 goto out;
5324 }
5325 goto continue_lookup;
5326 } else if (error == ENOENT && batched) {
5327 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
5328 /*
5329 * For compound VNOPs, the authorization callback may
5330 * return ENOENT in case of racing hardlink lookups
5331 * hitting the name cache, redrive the lookup.
5332 */
5333 do_retry = 1;
5334 retry_count += 1;
5335 goto out;
5336 }
5337 }
5338 }
5339
5340 /*
5341 * Call out to allow 3rd party notification of delete.
5342 * Ignore result of kauth_authorize_fileop call.
5343 */
5344 if (!error) {
5345 if (has_listeners) {
5346 kauth_authorize_fileop(vfs_context_ucred(ctx),
5347 KAUTH_FILEOP_DELETE,
5348 (uintptr_t)vp,
5349 (uintptr_t)path);
5350 }
5351
5352 if (vp->v_flag & VISHARDLINK) {
5353 //
5354 // if a hardlink gets deleted we want to blow away the
5355 // v_parent link because the path that got us to this
5356 // instance of the link is no longer valid. this will
5357 // force the next call to get the path to ask the file
5358 // system instead of just following the v_parent link.
5359 //
5360 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
5361 }
5362
5363 #if CONFIG_FSE
5364 if (need_event) {
5365 if (vp->v_flag & VISHARDLINK) {
5366 get_fse_info(vp, &finfo, ctx);
5367 } else if (vap) {
5368 vnode_get_fse_info_from_vap(vp, &finfo, vap);
5369 }
5370 if (truncated_path) {
5371 finfo.mode |= FSE_TRUNCATED_PATH;
5372 }
5373 add_fsevent(FSE_DELETE, ctx,
5374 FSE_ARG_STRING, len_no_firmlink_path, no_firmlink_path,
5375 FSE_ARG_FINFO, &finfo,
5376 FSE_ARG_DONE);
5377 }
5378 #endif
5379 }
5380
5381 out:
5382 if (path != NULL) {
5383 RELEASE_PATH(path);
5384 path = NULL;
5385 }
5386
5387 if (no_firmlink_path != NULL) {
5388 RELEASE_PATH(no_firmlink_path);
5389 no_firmlink_path = NULL;
5390 }
5391 #if NAMEDRSRCFORK
5392 /* recycle the deleted rsrc fork vnode to force a reclaim, which
5393 * will cause its shadow file to go away if necessary.
5394 */
5395 if (vp && (vnode_isnamedstream(vp)) &&
5396 (vp->v_parent != NULLVP) &&
5397 vnode_isshadow(vp)) {
5398 vnode_recycle(vp);
5399 }
5400 #endif
5401 /*
5402 * nameidone has to happen before we vnode_put(dvp)
5403 * since it may need to release the fs_nodelock on the dvp
5404 */
5405 nameidone(&nd);
5406 vnode_put(dvp);
5407 if (vp) {
5408 vnode_put(vp);
5409 }
5410
5411 if (do_retry) {
5412 goto retry;
5413 }
5414
5415 return error;
5416 }
5417
5418 int
5419 unlink1(vfs_context_t ctx, vnode_t start_dvp, user_addr_t path_arg,
5420 enum uio_seg segflg, int unlink_flags)
5421 {
5422 return unlinkat_internal(ctx, AT_FDCWD, start_dvp, path_arg, segflg,
5423 unlink_flags);
5424 }
5425
5426 /*
5427 * Delete a name from the filesystem using Carbon semantics.
5428 */
5429 int
5430 delete(__unused proc_t p, struct delete_args *uap, __unused int32_t *retval)
5431 {
5432 return unlinkat_internal(vfs_context_current(), AT_FDCWD, NULLVP,
5433 uap->path, UIO_USERSPACE, VNODE_REMOVE_NODELETEBUSY);
5434 }
5435
5436 /*
5437 * Delete a name from the filesystem using POSIX semantics.
5438 */
5439 int
5440 unlink(__unused proc_t p, struct unlink_args *uap, __unused int32_t *retval)
5441 {
5442 return unlinkat_internal(vfs_context_current(), AT_FDCWD, NULLVP,
5443 uap->path, UIO_USERSPACE, 0);
5444 }
5445
5446 int
5447 unlinkat(__unused proc_t p, struct unlinkat_args *uap, __unused int32_t *retval)
5448 {
5449 if (uap->flag & ~(AT_REMOVEDIR | AT_REMOVEDIR_DATALESS)) {
5450 return EINVAL;
5451 }
5452
5453 if (uap->flag & (AT_REMOVEDIR | AT_REMOVEDIR_DATALESS)) {
5454 int unlink_flags = 0;
5455
5456 if (uap->flag & AT_REMOVEDIR_DATALESS) {
5457 unlink_flags |= VNODE_REMOVE_DATALESS_DIR;
5458 }
5459 return rmdirat_internal(vfs_context_current(), uap->fd,
5460 uap->path, UIO_USERSPACE, unlink_flags);
5461 } else {
5462 return unlinkat_internal(vfs_context_current(), uap->fd,
5463 NULLVP, uap->path, UIO_USERSPACE, 0);
5464 }
5465 }
5466
5467 /*
5468 * Reposition read/write file offset.
5469 */
5470 int
5471 lseek(proc_t p, struct lseek_args *uap, off_t *retval)
5472 {
5473 struct fileproc *fp;
5474 vnode_t vp;
5475 struct vfs_context *ctx;
5476 off_t offset = uap->offset, file_size;
5477 int error;
5478
5479 if ((error = fp_getfvp(p, uap->fd, &fp, &vp))) {
5480 if (error == ENOTSUP) {
5481 return ESPIPE;
5482 }
5483 return error;
5484 }
5485 if (vnode_isfifo(vp)) {
5486 file_drop(uap->fd);
5487 return ESPIPE;
5488 }
5489
5490
5491 ctx = vfs_context_current();
5492 #if CONFIG_MACF
5493 if (uap->whence == L_INCR && uap->offset == 0) {
5494 error = mac_file_check_get_offset(vfs_context_ucred(ctx),
5495 fp->f_fglob);
5496 } else {
5497 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
5498 fp->f_fglob);
5499 }
5500 if (error) {
5501 file_drop(uap->fd);
5502 return error;
5503 }
5504 #endif
5505 if ((error = vnode_getwithref(vp))) {
5506 file_drop(uap->fd);
5507 return error;
5508 }
5509
5510 switch (uap->whence) {
5511 case L_INCR:
5512 offset += fp->f_fglob->fg_offset;
5513 break;
5514 case L_XTND:
5515 if ((error = vnode_size(vp, &file_size, ctx)) != 0) {
5516 break;
5517 }
5518 offset += file_size;
5519 break;
5520 case L_SET:
5521 break;
5522 case SEEK_HOLE:
5523 error = VNOP_IOCTL(vp, FSIOC_FIOSEEKHOLE, (caddr_t)&offset, 0, ctx);
5524 break;
5525 case SEEK_DATA:
5526 error = VNOP_IOCTL(vp, FSIOC_FIOSEEKDATA, (caddr_t)&offset, 0, ctx);
5527 break;
5528 default:
5529 error = EINVAL;
5530 }
5531 if (error == 0) {
5532 if (uap->offset > 0 && offset < 0) {
5533 /* Incremented/relative move past max size */
5534 error = EOVERFLOW;
5535 } else {
5536 /*
5537 * Allow negative offsets on character devices, per
5538 * POSIX 1003.1-2001. Most likely for writing disk
5539 * labels.
5540 */
5541 if (offset < 0 && vp->v_type != VCHR) {
5542 /* Decremented/relative move before start */
5543 error = EINVAL;
5544 } else {
5545 /* Success */
5546 fp->f_fglob->fg_offset = offset;
5547 *retval = fp->f_fglob->fg_offset;
5548 }
5549 }
5550 }
5551
5552 /*
5553 * An lseek can affect whether data is "available to read." Use
5554 * hint of NOTE_NONE so no EVFILT_VNODE events fire
5555 */
5556 post_event_if_success(vp, error, NOTE_NONE);
5557 (void)vnode_put(vp);
5558 file_drop(uap->fd);
5559 return error;
5560 }
5561
5562
5563 /*
5564 * Check access permissions.
5565 *
5566 * Returns: 0 Success
5567 * vnode_authorize:???
5568 */
5569 static int
5570 access1(vnode_t vp, vnode_t dvp, int uflags, vfs_context_t ctx)
5571 {
5572 kauth_action_t action;
5573 int error;
5574
5575 /*
5576 * If just the regular access bits, convert them to something
5577 * that vnode_authorize will understand.
5578 */
5579 if (!(uflags & _ACCESS_EXTENDED_MASK)) {
5580 action = 0;
5581 if (uflags & R_OK) {
5582 action |= KAUTH_VNODE_READ_DATA; /* aka KAUTH_VNODE_LIST_DIRECTORY */
5583 }
5584 if (uflags & W_OK) {
5585 if (vnode_isdir(vp)) {
5586 action |= KAUTH_VNODE_ADD_FILE |
5587 KAUTH_VNODE_ADD_SUBDIRECTORY;
5588 /* might want delete rights here too */
5589 } else {
5590 action |= KAUTH_VNODE_WRITE_DATA;
5591 }
5592 }
5593 if (uflags & X_OK) {
5594 if (vnode_isdir(vp)) {
5595 action |= KAUTH_VNODE_SEARCH;
5596 } else {
5597 action |= KAUTH_VNODE_EXECUTE;
5598 }
5599 }
5600 } else {
5601 /* take advantage of definition of uflags */
5602 action = uflags >> 8;
5603 }
5604
5605 #if CONFIG_MACF
5606 error = mac_vnode_check_access(ctx, vp, uflags);
5607 if (error) {
5608 return error;
5609 }
5610 #endif /* MAC */
5611
5612 /* action == 0 means only check for existence */
5613 if (action != 0) {
5614 error = vnode_authorize(vp, dvp, action | KAUTH_VNODE_ACCESS, ctx);
5615 } else {
5616 error = 0;
5617 }
5618
5619 return error;
5620 }
5621
5622
5623
5624 /*
5625 * access_extended: Check access permissions in bulk.
5626 *
5627 * Description: uap->entries Pointer to an array of accessx
5628 * descriptor structs, plus one or
5629 * more NULL terminated strings (see
5630 * "Notes" section below).
5631 * uap->size Size of the area pointed to by
5632 * uap->entries.
5633 * uap->results Pointer to the results array.
5634 *
5635 * Returns: 0 Success
5636 * ENOMEM Insufficient memory
5637 * EINVAL Invalid arguments
5638 * namei:EFAULT Bad address
5639 * namei:ENAMETOOLONG Filename too long
5640 * namei:ENOENT No such file or directory
5641 * namei:ELOOP Too many levels of symbolic links
5642 * namei:EBADF Bad file descriptor
5643 * namei:ENOTDIR Not a directory
5644 * namei:???
5645 * access1:
5646 *
5647 * Implicit returns:
5648 * uap->results Array contents modified
5649 *
5650 * Notes: The uap->entries are structured as an arbitrary length array
5651 * of accessx descriptors, followed by one or more NULL terminated
5652 * strings
5653 *
5654 * struct accessx_descriptor[0]
5655 * ...
5656 * struct accessx_descriptor[n]
5657 * char name_data[0];
5658 *
5659 * We determine the entry count by walking the buffer containing
5660 * the uap->entries argument descriptor. For each descriptor we
5661 * see, the valid values for the offset ad_name_offset will be
5662 * in the byte range:
5663 *
5664 * [ uap->entries + sizeof(struct accessx_descriptor) ]
5665 * to
5666 * [ uap->entries + uap->size - 2 ]
5667 *
5668 * since we must have at least one string, and the string must
5669 * be at least one character plus the NULL terminator in length.
5670 *
5671 * XXX: Need to support the check-as uid argument
5672 */
5673 int
5674 access_extended(__unused proc_t p, struct access_extended_args *uap, __unused int32_t *retval)
5675 {
5676 struct accessx_descriptor *input = NULL;
5677 errno_t *result = NULL;
5678 errno_t error = 0;
5679 int wantdelete = 0;
5680 unsigned int desc_max, desc_actual, i, j;
5681 struct vfs_context context;
5682 struct nameidata nd;
5683 int niopts;
5684 vnode_t vp = NULL;
5685 vnode_t dvp = NULL;
5686 #define ACCESSX_MAX_DESCR_ON_STACK 10
5687 struct accessx_descriptor stack_input[ACCESSX_MAX_DESCR_ON_STACK];
5688
5689 context.vc_ucred = NULL;
5690
5691 /*
5692 * Validate parameters; if valid, copy the descriptor array and string
5693 * arguments into local memory. Before proceeding, the following
5694 * conditions must have been met:
5695 *
5696 * o The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
5697 * o There must be sufficient room in the request for at least one
5698 * descriptor and a one yte NUL terminated string.
5699 * o The allocation of local storage must not fail.
5700 */
5701 if (uap->size > ACCESSX_MAX_TABLESIZE) {
5702 return ENOMEM;
5703 }
5704 if (uap->size < (sizeof(struct accessx_descriptor) + 2)) {
5705 return EINVAL;
5706 }
5707 if (uap->size <= sizeof(stack_input)) {
5708 input = stack_input;
5709 } else {
5710 MALLOC(input, struct accessx_descriptor *, uap->size, M_TEMP, M_WAITOK);
5711 if (input == NULL) {
5712 error = ENOMEM;
5713 goto out;
5714 }
5715 }
5716 error = copyin(uap->entries, input, uap->size);
5717 if (error) {
5718 goto out;
5719 }
5720
5721 AUDIT_ARG(opaque, input, uap->size);
5722
5723 /*
5724 * Force NUL termination of the copyin buffer to avoid nami() running
5725 * off the end. If the caller passes us bogus data, they may get a
5726 * bogus result.
5727 */
5728 ((char *)input)[uap->size - 1] = 0;
5729
5730 /*
5731 * Access is defined as checking against the process' real identity,
5732 * even if operations are checking the effective identity. This
5733 * requires that we use a local vfs context.
5734 */
5735 context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
5736 context.vc_thread = current_thread();
5737
5738 /*
5739 * Find out how many entries we have, so we can allocate the result
5740 * array by walking the list and adjusting the count downward by the
5741 * earliest string offset we see.
5742 */
5743 desc_max = (uap->size - 2) / sizeof(struct accessx_descriptor);
5744 desc_actual = desc_max;
5745 for (i = 0; i < desc_actual; i++) {
5746 /*
5747 * Take the offset to the name string for this entry and
5748 * convert to an input array index, which would be one off
5749 * the end of the array if this entry was the lowest-addressed
5750 * name string.
5751 */
5752 j = input[i].ad_name_offset / sizeof(struct accessx_descriptor);
5753
5754 /*
5755 * An offset greater than the max allowable offset is an error.
5756 * It is also an error for any valid entry to point
5757 * to a location prior to the end of the current entry, if
5758 * it's not a reference to the string of the previous entry.
5759 */
5760 if (j > desc_max || (j != 0 && j <= i)) {
5761 error = EINVAL;
5762 goto out;
5763 }
5764
5765 /* Also do not let ad_name_offset point to something beyond the size of the input */
5766 if (input[i].ad_name_offset >= uap->size) {
5767 error = EINVAL;
5768 goto out;
5769 }
5770
5771 /*
5772 * An offset of 0 means use the previous descriptor's offset;
5773 * this is used to chain multiple requests for the same file
5774 * to avoid multiple lookups.
5775 */
5776 if (j == 0) {
5777 /* This is not valid for the first entry */
5778 if (i == 0) {
5779 error = EINVAL;
5780 goto out;
5781 }
5782 continue;
5783 }
5784
5785 /*
5786 * If the offset of the string for this descriptor is before
5787 * what we believe is the current actual last descriptor,
5788 * then we need to adjust our estimate downward; this permits
5789 * the string table following the last descriptor to be out
5790 * of order relative to the descriptor list.
5791 */
5792 if (j < desc_actual) {
5793 desc_actual = j;
5794 }
5795 }
5796
5797 /*
5798 * We limit the actual number of descriptors we are willing to process
5799 * to a hard maximum of ACCESSX_MAX_DESCRIPTORS. If the number being
5800 * requested does not exceed this limit,
5801 */
5802 if (desc_actual > ACCESSX_MAX_DESCRIPTORS) {
5803 error = ENOMEM;
5804 goto out;
5805 }
5806 MALLOC(result, errno_t *, desc_actual * sizeof(errno_t), M_TEMP, M_WAITOK | M_ZERO);
5807 if (result == NULL) {
5808 error = ENOMEM;
5809 goto out;
5810 }
5811
5812 /*
5813 * Do the work by iterating over the descriptor entries we know to
5814 * at least appear to contain valid data.
5815 */
5816 error = 0;
5817 for (i = 0; i < desc_actual; i++) {
5818 /*
5819 * If the ad_name_offset is 0, then we use the previous
5820 * results to make the check; otherwise, we are looking up
5821 * a new file name.
5822 */
5823 if (input[i].ad_name_offset != 0) {
5824 /* discard old vnodes */
5825 if (vp) {
5826 vnode_put(vp);
5827 vp = NULL;
5828 }
5829 if (dvp) {
5830 vnode_put(dvp);
5831 dvp = NULL;
5832 }
5833
5834 /*
5835 * Scan forward in the descriptor list to see if we
5836 * need the parent vnode. We will need it if we are
5837 * deleting, since we must have rights to remove
5838 * entries in the parent directory, as well as the
5839 * rights to delete the object itself.
5840 */
5841 wantdelete = input[i].ad_flags & _DELETE_OK;
5842 for (j = i + 1; (j < desc_actual) && (input[j].ad_name_offset == 0); j++) {
5843 if (input[j].ad_flags & _DELETE_OK) {
5844 wantdelete = 1;
5845 }
5846 }
5847
5848 niopts = FOLLOW | AUDITVNPATH1;
5849
5850 /* need parent for vnode_authorize for deletion test */
5851 if (wantdelete) {
5852 niopts |= WANTPARENT;
5853 }
5854
5855 /* do the lookup */
5856 NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, UIO_SYSSPACE,
5857 CAST_USER_ADDR_T(((const char *)input) + input[i].ad_name_offset),
5858 &context);
5859 error = namei(&nd);
5860 if (!error) {
5861 vp = nd.ni_vp;
5862 if (wantdelete) {
5863 dvp = nd.ni_dvp;
5864 }
5865 }
5866 nameidone(&nd);
5867 }
5868
5869 /*
5870 * Handle lookup errors.
5871 */
5872 switch (error) {
5873 case ENOENT:
5874 case EACCES:
5875 case EPERM:
5876 case ENOTDIR:
5877 result[i] = error;
5878 break;
5879 case 0:
5880 /* run this access check */
5881 result[i] = access1(vp, dvp, input[i].ad_flags, &context);
5882 break;
5883 default:
5884 /* fatal lookup error */
5885
5886 goto out;
5887 }
5888 }
5889
5890 AUDIT_ARG(data, result, sizeof(errno_t), desc_actual);
5891
5892 /* copy out results */
5893 error = copyout(result, uap->results, desc_actual * sizeof(errno_t));
5894
5895 out:
5896 if (input && input != stack_input) {
5897 FREE(input, M_TEMP);
5898 }
5899 if (result) {
5900 FREE(result, M_TEMP);
5901 }
5902 if (vp) {
5903 vnode_put(vp);
5904 }
5905 if (dvp) {
5906 vnode_put(dvp);
5907 }
5908 if (IS_VALID_CRED(context.vc_ucred)) {
5909 kauth_cred_unref(&context.vc_ucred);
5910 }
5911 return error;
5912 }
5913
5914
5915 /*
5916 * Returns: 0 Success
5917 * namei:EFAULT Bad address
5918 * namei:ENAMETOOLONG Filename too long
5919 * namei:ENOENT No such file or directory
5920 * namei:ELOOP Too many levels of symbolic links
5921 * namei:EBADF Bad file descriptor
5922 * namei:ENOTDIR Not a directory
5923 * namei:???
5924 * access1:
5925 */
5926 static int
5927 faccessat_internal(vfs_context_t ctx, int fd, user_addr_t path, int amode,
5928 int flag, enum uio_seg segflg)
5929 {
5930 int error;
5931 struct nameidata nd;
5932 int niopts;
5933 struct vfs_context context;
5934 #if NAMEDRSRCFORK
5935 int is_namedstream = 0;
5936 #endif
5937
5938 /*
5939 * Unless the AT_EACCESS option is used, Access is defined as checking
5940 * against the process' real identity, even if operations are checking
5941 * the effective identity. So we need to tweak the credential
5942 * in the context for that case.
5943 */
5944 if (!(flag & AT_EACCESS)) {
5945 context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
5946 } else {
5947 context.vc_ucred = ctx->vc_ucred;
5948 }
5949 context.vc_thread = ctx->vc_thread;
5950
5951
5952 niopts = (flag & AT_SYMLINK_NOFOLLOW ? NOFOLLOW : FOLLOW) | AUDITVNPATH1;
5953 /* need parent for vnode_authorize for deletion test */
5954 if (amode & _DELETE_OK) {
5955 niopts |= WANTPARENT;
5956 }
5957 NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, segflg,
5958 path, &context);
5959
5960 #if NAMEDRSRCFORK
5961 /* access(F_OK) calls are allowed for resource forks. */
5962 if (amode == F_OK) {
5963 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
5964 }
5965 #endif
5966 error = nameiat(&nd, fd);
5967 if (error) {
5968 goto out;
5969 }
5970
5971 #if NAMEDRSRCFORK
5972 /* Grab reference on the shadow stream file vnode to
5973 * force an inactive on release which will mark it
5974 * for recycle.
5975 */
5976 if (vnode_isnamedstream(nd.ni_vp) &&
5977 (nd.ni_vp->v_parent != NULLVP) &&
5978 vnode_isshadow(nd.ni_vp)) {
5979 is_namedstream = 1;
5980 vnode_ref(nd.ni_vp);
5981 }
5982 #endif
5983
5984 error = access1(nd.ni_vp, nd.ni_dvp, amode, &context);
5985
5986 #if NAMEDRSRCFORK
5987 if (is_namedstream) {
5988 vnode_rele(nd.ni_vp);
5989 }
5990 #endif
5991
5992 vnode_put(nd.ni_vp);
5993 if (amode & _DELETE_OK) {
5994 vnode_put(nd.ni_dvp);
5995 }
5996 nameidone(&nd);
5997
5998 out:
5999 if (!(flag & AT_EACCESS)) {
6000 kauth_cred_unref(&context.vc_ucred);
6001 }
6002 return error;
6003 }
6004
6005 int
6006 access(__unused proc_t p, struct access_args *uap, __unused int32_t *retval)
6007 {
6008 return faccessat_internal(vfs_context_current(), AT_FDCWD,
6009 uap->path, uap->flags, 0, UIO_USERSPACE);
6010 }
6011
6012 int
6013 faccessat(__unused proc_t p, struct faccessat_args *uap,
6014 __unused int32_t *retval)
6015 {
6016 if (uap->flag & ~(AT_EACCESS | AT_SYMLINK_NOFOLLOW)) {
6017 return EINVAL;
6018 }
6019
6020 return faccessat_internal(vfs_context_current(), uap->fd,
6021 uap->path, uap->amode, uap->flag, UIO_USERSPACE);
6022 }
6023
6024 /*
6025 * Returns: 0 Success
6026 * EFAULT
6027 * copyout:EFAULT
6028 * namei:???
6029 * vn_stat:???
6030 */
6031 static int
6032 fstatat_internal(vfs_context_t ctx, user_addr_t path, user_addr_t ub,
6033 user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64,
6034 enum uio_seg segflg, int fd, int flag)
6035 {
6036 struct nameidata nd;
6037 int follow;
6038 union {
6039 struct stat sb;
6040 struct stat64 sb64;
6041 } source = {};
6042 union {
6043 struct user64_stat user64_sb;
6044 struct user32_stat user32_sb;
6045 struct user64_stat64 user64_sb64;
6046 struct user32_stat64 user32_sb64;
6047 } dest = {};
6048 caddr_t sbp;
6049 int error, my_size;
6050 kauth_filesec_t fsec;
6051 size_t xsecurity_bufsize;
6052 void * statptr;
6053 struct fileproc *fp = NULL;
6054 int needsrealdev = 0;
6055
6056 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
6057 NDINIT(&nd, LOOKUP, OP_GETATTR, follow | AUDITVNPATH1,
6058 segflg, path, ctx);
6059
6060 #if NAMEDRSRCFORK
6061 int is_namedstream = 0;
6062 /* stat calls are allowed for resource forks. */
6063 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
6064 #endif
6065
6066 if (flag & AT_FDONLY) {
6067 vnode_t fvp;
6068
6069 error = fp_getfvp(vfs_context_proc(ctx), fd, &fp, &fvp);
6070 if (error) {
6071 return error;
6072 }
6073 if ((error = vnode_getwithref(fvp))) {
6074 file_drop(fd);
6075 return error;
6076 }
6077 nd.ni_vp = fvp;
6078 } else {
6079 error = nameiat(&nd, fd);
6080 if (error) {
6081 return error;
6082 }
6083 }
6084 fsec = KAUTH_FILESEC_NONE;
6085
6086 statptr = (void *)&source;
6087
6088 #if NAMEDRSRCFORK
6089 /* Grab reference on the shadow stream file vnode to
6090 * force an inactive on release which will mark it
6091 * for recycle.
6092 */
6093 if (vnode_isnamedstream(nd.ni_vp) &&
6094 (nd.ni_vp->v_parent != NULLVP) &&
6095 vnode_isshadow(nd.ni_vp)) {
6096 is_namedstream = 1;
6097 vnode_ref(nd.ni_vp);
6098 }
6099 #endif
6100
6101 needsrealdev = flag & AT_REALDEV ? 1 : 0;
6102 if (fp && (xsecurity == USER_ADDR_NULL)) {
6103 /*
6104 * If the caller has the file open, and is not
6105 * requesting extended security information, we are
6106 * going to let them get the basic stat information.
6107 */
6108 error = vn_stat_noauth(nd.ni_vp, statptr, NULL, isstat64, needsrealdev, ctx,
6109 fp->f_fglob->fg_cred);
6110 } else {
6111 error = vn_stat(nd.ni_vp, statptr, (xsecurity != USER_ADDR_NULL ? &fsec : NULL),
6112 isstat64, needsrealdev, ctx);
6113 }
6114
6115 #if NAMEDRSRCFORK
6116 if (is_namedstream) {
6117 vnode_rele(nd.ni_vp);
6118 }
6119 #endif
6120 vnode_put(nd.ni_vp);
6121 nameidone(&nd);
6122 if (fp) {
6123 file_drop(fd);
6124 fp = NULL;
6125 }
6126
6127 if (error) {
6128 return error;
6129 }
6130 /* Zap spare fields */
6131 if (isstat64 != 0) {
6132 source.sb64.st_lspare = 0;
6133 source.sb64.st_qspare[0] = 0LL;
6134 source.sb64.st_qspare[1] = 0LL;
6135 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
6136 munge_user64_stat64(&source.sb64, &dest.user64_sb64);
6137 my_size = sizeof(dest.user64_sb64);
6138 sbp = (caddr_t)&dest.user64_sb64;
6139 } else {
6140 munge_user32_stat64(&source.sb64, &dest.user32_sb64);
6141 my_size = sizeof(dest.user32_sb64);
6142 sbp = (caddr_t)&dest.user32_sb64;
6143 }
6144 /*
6145 * Check if we raced (post lookup) against the last unlink of a file.
6146 */
6147 if ((source.sb64.st_nlink == 0) && S_ISREG(source.sb64.st_mode)) {
6148 source.sb64.st_nlink = 1;
6149 }
6150 } else {
6151 source.sb.st_lspare = 0;
6152 source.sb.st_qspare[0] = 0LL;
6153 source.sb.st_qspare[1] = 0LL;
6154 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
6155 munge_user64_stat(&source.sb, &dest.user64_sb);
6156 my_size = sizeof(dest.user64_sb);
6157 sbp = (caddr_t)&dest.user64_sb;
6158 } else {
6159 munge_user32_stat(&source.sb, &dest.user32_sb);
6160 my_size = sizeof(dest.user32_sb);
6161 sbp = (caddr_t)&dest.user32_sb;
6162 }
6163
6164 /*
6165 * Check if we raced (post lookup) against the last unlink of a file.
6166 */
6167 if ((source.sb.st_nlink == 0) && S_ISREG(source.sb.st_mode)) {
6168 source.sb.st_nlink = 1;
6169 }
6170 }
6171 if ((error = copyout(sbp, ub, my_size)) != 0) {
6172 goto out;
6173 }
6174
6175 /* caller wants extended security information? */
6176 if (xsecurity != USER_ADDR_NULL) {
6177 /* did we get any? */
6178 if (fsec == KAUTH_FILESEC_NONE) {
6179 if (susize(xsecurity_size, 0) != 0) {
6180 error = EFAULT;
6181 goto out;
6182 }
6183 } else {
6184 /* find the user buffer size */
6185 xsecurity_bufsize = fusize(xsecurity_size);
6186
6187 /* copy out the actual data size */
6188 if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
6189 error = EFAULT;
6190 goto out;
6191 }
6192
6193 /* if the caller supplied enough room, copy out to it */
6194 if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec)) {
6195 error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
6196 }
6197 }
6198 }
6199 out:
6200 if (fsec != KAUTH_FILESEC_NONE) {
6201 kauth_filesec_free(fsec);
6202 }
6203 return error;
6204 }
6205
6206 /*
6207 * stat_extended: Get file status; with extended security (ACL).
6208 *
6209 * Parameters: p (ignored)
6210 * uap User argument descriptor (see below)
6211 * retval (ignored)
6212 *
6213 * Indirect: uap->path Path of file to get status from
6214 * uap->ub User buffer (holds file status info)
6215 * uap->xsecurity ACL to get (extended security)
6216 * uap->xsecurity_size Size of ACL
6217 *
6218 * Returns: 0 Success
6219 * !0 errno value
6220 *
6221 */
6222 int
6223 stat_extended(__unused proc_t p, struct stat_extended_args *uap,
6224 __unused int32_t *retval)
6225 {
6226 return fstatat_internal(vfs_context_current(), uap->path, uap->ub,
6227 uap->xsecurity, uap->xsecurity_size, 0, UIO_USERSPACE, AT_FDCWD,
6228 0);
6229 }
6230
6231 /*
6232 * Returns: 0 Success
6233 * fstatat_internal:??? [see fstatat_internal() in this file]
6234 */
6235 int
6236 stat(__unused proc_t p, struct stat_args *uap, __unused int32_t *retval)
6237 {
6238 return fstatat_internal(vfs_context_current(), uap->path, uap->ub,
6239 0, 0, 0, UIO_USERSPACE, AT_FDCWD, 0);
6240 }
6241
6242 int
6243 stat64(__unused proc_t p, struct stat64_args *uap, __unused int32_t *retval)
6244 {
6245 return fstatat_internal(vfs_context_current(), uap->path, uap->ub,
6246 0, 0, 1, UIO_USERSPACE, AT_FDCWD, 0);
6247 }
6248
6249 /*
6250 * stat64_extended: Get file status; can handle large inode numbers; with extended security (ACL).
6251 *
6252 * Parameters: p (ignored)
6253 * uap User argument descriptor (see below)
6254 * retval (ignored)
6255 *
6256 * Indirect: uap->path Path of file to get status from
6257 * uap->ub User buffer (holds file status info)
6258 * uap->xsecurity ACL to get (extended security)
6259 * uap->xsecurity_size Size of ACL
6260 *
6261 * Returns: 0 Success
6262 * !0 errno value
6263 *
6264 */
6265 int
6266 stat64_extended(__unused proc_t p, struct stat64_extended_args *uap, __unused int32_t *retval)
6267 {
6268 return fstatat_internal(vfs_context_current(), uap->path, uap->ub,
6269 uap->xsecurity, uap->xsecurity_size, 1, UIO_USERSPACE, AT_FDCWD,
6270 0);
6271 }
6272
6273 /*
6274 * lstat_extended: Get file status; does not follow links; with extended security (ACL).
6275 *
6276 * Parameters: p (ignored)
6277 * uap User argument descriptor (see below)
6278 * retval (ignored)
6279 *
6280 * Indirect: uap->path Path of file to get status from
6281 * uap->ub User buffer (holds file status info)
6282 * uap->xsecurity ACL to get (extended security)
6283 * uap->xsecurity_size Size of ACL
6284 *
6285 * Returns: 0 Success
6286 * !0 errno value
6287 *
6288 */
6289 int
6290 lstat_extended(__unused proc_t p, struct lstat_extended_args *uap, __unused int32_t *retval)
6291 {
6292 return fstatat_internal(vfs_context_current(), uap->path, uap->ub,
6293 uap->xsecurity, uap->xsecurity_size, 0, UIO_USERSPACE, AT_FDCWD,
6294 AT_SYMLINK_NOFOLLOW);
6295 }
6296
6297 /*
6298 * Get file status; this version does not follow links.
6299 */
6300 int
6301 lstat(__unused proc_t p, struct lstat_args *uap, __unused int32_t *retval)
6302 {
6303 return fstatat_internal(vfs_context_current(), uap->path, uap->ub,
6304 0, 0, 0, UIO_USERSPACE, AT_FDCWD, AT_SYMLINK_NOFOLLOW);
6305 }
6306
6307 int
6308 lstat64(__unused proc_t p, struct lstat64_args *uap, __unused int32_t *retval)
6309 {
6310 return fstatat_internal(vfs_context_current(), uap->path, uap->ub,
6311 0, 0, 1, UIO_USERSPACE, AT_FDCWD, AT_SYMLINK_NOFOLLOW);
6312 }
6313
6314 /*
6315 * lstat64_extended: Get file status; can handle large inode numbers; does not
6316 * follow links; with extended security (ACL).
6317 *
6318 * Parameters: p (ignored)
6319 * uap User argument descriptor (see below)
6320 * retval (ignored)
6321 *
6322 * Indirect: uap->path Path of file to get status from
6323 * uap->ub User buffer (holds file status info)
6324 * uap->xsecurity ACL to get (extended security)
6325 * uap->xsecurity_size Size of ACL
6326 *
6327 * Returns: 0 Success
6328 * !0 errno value
6329 *
6330 */
6331 int
6332 lstat64_extended(__unused proc_t p, struct lstat64_extended_args *uap, __unused int32_t *retval)
6333 {
6334 return fstatat_internal(vfs_context_current(), uap->path, uap->ub,
6335 uap->xsecurity, uap->xsecurity_size, 1, UIO_USERSPACE, AT_FDCWD,
6336 AT_SYMLINK_NOFOLLOW);
6337 }
6338
6339 int
6340 fstatat(__unused proc_t p, struct fstatat_args *uap, __unused int32_t *retval)
6341 {
6342 if (uap->flag & ~(AT_SYMLINK_NOFOLLOW | AT_REALDEV | AT_FDONLY)) {
6343 return EINVAL;
6344 }
6345
6346 return fstatat_internal(vfs_context_current(), uap->path, uap->ub,
6347 0, 0, 0, UIO_USERSPACE, uap->fd, uap->flag);
6348 }
6349
6350 int
6351 fstatat64(__unused proc_t p, struct fstatat64_args *uap,
6352 __unused int32_t *retval)
6353 {
6354 if (uap->flag & ~(AT_SYMLINK_NOFOLLOW | AT_REALDEV | AT_FDONLY)) {
6355 return EINVAL;
6356 }
6357
6358 return fstatat_internal(vfs_context_current(), uap->path, uap->ub,
6359 0, 0, 1, UIO_USERSPACE, uap->fd, uap->flag);
6360 }
6361
6362 /*
6363 * Get configurable pathname variables.
6364 *
6365 * Returns: 0 Success
6366 * namei:???
6367 * vn_pathconf:???
6368 *
6369 * Notes: Global implementation constants are intended to be
6370 * implemented in this function directly; all other constants
6371 * are per-FS implementation, and therefore must be handled in
6372 * each respective FS, instead.
6373 *
6374 * XXX We implement some things globally right now that should actually be
6375 * XXX per-FS; we will need to deal with this at some point.
6376 */
6377 /* ARGSUSED */
6378 int
6379 pathconf(__unused proc_t p, struct pathconf_args *uap, int32_t *retval)
6380 {
6381 int error;
6382 struct nameidata nd;
6383 vfs_context_t ctx = vfs_context_current();
6384
6385 NDINIT(&nd, LOOKUP, OP_PATHCONF, FOLLOW | AUDITVNPATH1,
6386 UIO_USERSPACE, uap->path, ctx);
6387 error = namei(&nd);
6388 if (error) {
6389 return error;
6390 }
6391
6392 error = vn_pathconf(nd.ni_vp, uap->name, retval, ctx);
6393
6394 vnode_put(nd.ni_vp);
6395 nameidone(&nd);
6396 return error;
6397 }
6398
6399 /*
6400 * Return target name of a symbolic link.
6401 */
6402 /* ARGSUSED */
6403 static int
6404 readlinkat_internal(vfs_context_t ctx, int fd, user_addr_t path,
6405 enum uio_seg seg, user_addr_t buf, size_t bufsize, enum uio_seg bufseg,
6406 int *retval)
6407 {
6408 vnode_t vp;
6409 uio_t auio;
6410 int error;
6411 struct nameidata nd;
6412 char uio_buf[UIO_SIZEOF(1)];
6413
6414 NDINIT(&nd, LOOKUP, OP_READLINK, NOFOLLOW | AUDITVNPATH1,
6415 seg, path, ctx);
6416
6417 error = nameiat(&nd, fd);
6418 if (error) {
6419 return error;
6420 }
6421 vp = nd.ni_vp;
6422
6423 nameidone(&nd);
6424
6425 auio = uio_createwithbuffer(1, 0, bufseg, UIO_READ,
6426 &uio_buf[0], sizeof(uio_buf));
6427 uio_addiov(auio, buf, bufsize);
6428 if (vp->v_type != VLNK) {
6429 error = EINVAL;
6430 } else {
6431 #if CONFIG_MACF
6432 error = mac_vnode_check_readlink(ctx, vp);
6433 #endif
6434 if (error == 0) {
6435 error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA,
6436 ctx);
6437 }
6438 if (error == 0) {
6439 error = VNOP_READLINK(vp, auio, ctx);
6440 }
6441 }
6442 vnode_put(vp);
6443
6444 *retval = bufsize - (int)uio_resid(auio);
6445 return error;
6446 }
6447
6448 int
6449 readlink(proc_t p, struct readlink_args *uap, int32_t *retval)
6450 {
6451 enum uio_seg procseg;
6452
6453 procseg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
6454 return readlinkat_internal(vfs_context_current(), AT_FDCWD,
6455 CAST_USER_ADDR_T(uap->path), procseg, CAST_USER_ADDR_T(uap->buf),
6456 uap->count, procseg, retval);
6457 }
6458
6459 int
6460 readlinkat(proc_t p, struct readlinkat_args *uap, int32_t *retval)
6461 {
6462 enum uio_seg procseg;
6463
6464 procseg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
6465 return readlinkat_internal(vfs_context_current(), uap->fd, uap->path,
6466 procseg, uap->buf, uap->bufsize, procseg, retval);
6467 }
6468
6469 /*
6470 * Change file flags, the deep inner layer.
6471 */
6472 static int
6473 chflags0(vnode_t vp, struct vnode_attr *va,
6474 int (*setattr)(vnode_t, void *, vfs_context_t),
6475 void *arg, vfs_context_t ctx)
6476 {
6477 kauth_action_t action = 0;
6478 int error;
6479
6480 #if CONFIG_MACF
6481 error = mac_vnode_check_setflags(ctx, vp, va->va_flags);
6482 if (error) {
6483 goto out;
6484 }
6485 #endif
6486
6487 /* request authorisation, disregard immutability */
6488 if ((error = vnode_authattr(vp, va, &action, ctx)) != 0) {
6489 goto out;
6490 }
6491 /*
6492 * Request that the auth layer disregard those file flags it's allowed to when
6493 * authorizing this operation; we need to do this in order to be able to
6494 * clear immutable flags.
6495 */
6496 if (action && ((error = vnode_authorize(vp, NULL, action | KAUTH_VNODE_NOIMMUTABLE, ctx)) != 0)) {
6497 goto out;
6498 }
6499 error = (*setattr)(vp, arg, ctx);
6500
6501 #if CONFIG_MACF
6502 if (error == 0) {
6503 mac_vnode_notify_setflags(ctx, vp, va->va_flags);
6504 }
6505 #endif
6506
6507 out:
6508 return error;
6509 }
6510
6511 /*
6512 * Change file flags.
6513 *
6514 * NOTE: this will vnode_put() `vp'
6515 */
6516 static int
6517 chflags1(vnode_t vp, int flags, vfs_context_t ctx)
6518 {
6519 struct vnode_attr va;
6520 int error;
6521
6522 VATTR_INIT(&va);
6523 VATTR_SET(&va, va_flags, flags);
6524
6525 error = chflags0(vp, &va, (void *)vnode_setattr, &va, ctx);
6526 vnode_put(vp);
6527
6528 if ((error == 0) && !VATTR_IS_SUPPORTED(&va, va_flags)) {
6529 error = ENOTSUP;
6530 }
6531
6532 return error;
6533 }
6534
6535 /*
6536 * Change flags of a file given a path name.
6537 */
6538 /* ARGSUSED */
6539 int
6540 chflags(__unused proc_t p, struct chflags_args *uap, __unused int32_t *retval)
6541 {
6542 vnode_t vp;
6543 vfs_context_t ctx = vfs_context_current();
6544 int error;
6545 struct nameidata nd;
6546
6547 AUDIT_ARG(fflags, uap->flags);
6548 NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
6549 UIO_USERSPACE, uap->path, ctx);
6550 error = namei(&nd);
6551 if (error) {
6552 return error;
6553 }
6554 vp = nd.ni_vp;
6555 nameidone(&nd);
6556
6557 /* we don't vnode_put() here because chflags1 does internally */
6558 error = chflags1(vp, uap->flags, ctx);
6559
6560 return error;
6561 }
6562
6563 /*
6564 * Change flags of a file given a file descriptor.
6565 */
6566 /* ARGSUSED */
6567 int
6568 fchflags(__unused proc_t p, struct fchflags_args *uap, __unused int32_t *retval)
6569 {
6570 vnode_t vp;
6571 int error;
6572
6573 AUDIT_ARG(fd, uap->fd);
6574 AUDIT_ARG(fflags, uap->flags);
6575 if ((error = file_vnode(uap->fd, &vp))) {
6576 return error;
6577 }
6578
6579 if ((error = vnode_getwithref(vp))) {
6580 file_drop(uap->fd);
6581 return error;
6582 }
6583
6584 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6585
6586 /* we don't vnode_put() here because chflags1 does internally */
6587 error = chflags1(vp, uap->flags, vfs_context_current());
6588
6589 file_drop(uap->fd);
6590 return error;
6591 }
6592
6593 /*
6594 * Change security information on a filesystem object.
6595 *
6596 * Returns: 0 Success
6597 * EPERM Operation not permitted
6598 * vnode_authattr:??? [anything vnode_authattr can return]
6599 * vnode_authorize:??? [anything vnode_authorize can return]
6600 * vnode_setattr:??? [anything vnode_setattr can return]
6601 *
6602 * Notes: If vnode_authattr or vnode_authorize return EACCES, it will be
6603 * translated to EPERM before being returned.
6604 */
6605 static int
6606 chmod_vnode(vfs_context_t ctx, vnode_t vp, struct vnode_attr *vap)
6607 {
6608 kauth_action_t action;
6609 int error;
6610
6611 AUDIT_ARG(mode, vap->va_mode);
6612 /* XXX audit new args */
6613
6614 #if NAMEDSTREAMS
6615 /* chmod calls are not allowed for resource forks. */
6616 if (vp->v_flag & VISNAMEDSTREAM) {
6617 return EPERM;
6618 }
6619 #endif
6620
6621 #if CONFIG_MACF
6622 if (VATTR_IS_ACTIVE(vap, va_mode) &&
6623 (error = mac_vnode_check_setmode(ctx, vp, (mode_t)vap->va_mode)) != 0) {
6624 return error;
6625 }
6626
6627 if (VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid)) {
6628 if ((error = mac_vnode_check_setowner(ctx, vp,
6629 VATTR_IS_ACTIVE(vap, va_uid) ? vap->va_uid : -1,
6630 VATTR_IS_ACTIVE(vap, va_gid) ? vap->va_gid : -1))) {
6631 return error;
6632 }
6633 }
6634
6635 if (VATTR_IS_ACTIVE(vap, va_acl) &&
6636 (error = mac_vnode_check_setacl(ctx, vp, vap->va_acl))) {
6637 return error;
6638 }
6639 #endif
6640
6641 /* make sure that the caller is allowed to set this security information */
6642 if (((error = vnode_authattr(vp, vap, &action, ctx)) != 0) ||
6643 ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
6644 if (error == EACCES) {
6645 error = EPERM;
6646 }
6647 return error;
6648 }
6649
6650 if ((error = vnode_setattr(vp, vap, ctx)) != 0) {
6651 return error;
6652 }
6653
6654 #if CONFIG_MACF
6655 if (VATTR_IS_ACTIVE(vap, va_mode)) {
6656 mac_vnode_notify_setmode(ctx, vp, (mode_t)vap->va_mode);
6657 }
6658
6659 if (VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid)) {
6660 mac_vnode_notify_setowner(ctx, vp,
6661 VATTR_IS_ACTIVE(vap, va_uid) ? vap->va_uid : -1,
6662 VATTR_IS_ACTIVE(vap, va_gid) ? vap->va_gid : -1);
6663 }
6664
6665 if (VATTR_IS_ACTIVE(vap, va_acl)) {
6666 mac_vnode_notify_setacl(ctx, vp, vap->va_acl);
6667 }
6668 #endif
6669
6670 return error;
6671 }
6672
6673
6674 /*
6675 * Change mode of a file given a path name.
6676 *
6677 * Returns: 0 Success
6678 * namei:??? [anything namei can return]
6679 * chmod_vnode:??? [anything chmod_vnode can return]
6680 */
6681 static int
6682 chmodat(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap,
6683 int fd, int flag, enum uio_seg segflg)
6684 {
6685 struct nameidata nd;
6686 int follow, error;
6687
6688 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
6689 NDINIT(&nd, LOOKUP, OP_SETATTR, follow | AUDITVNPATH1,
6690 segflg, path, ctx);
6691 if ((error = nameiat(&nd, fd))) {
6692 return error;
6693 }
6694 error = chmod_vnode(ctx, nd.ni_vp, vap);
6695 vnode_put(nd.ni_vp);
6696 nameidone(&nd);
6697 return error;
6698 }
6699
6700 /*
6701 * chmod_extended: Change the mode of a file given a path name; with extended
6702 * argument list (including extended security (ACL)).
6703 *
6704 * Parameters: p Process requesting the open
6705 * uap User argument descriptor (see below)
6706 * retval (ignored)
6707 *
6708 * Indirect: uap->path Path to object (same as 'chmod')
6709 * uap->uid UID to set
6710 * uap->gid GID to set
6711 * uap->mode File mode to set (same as 'chmod')
6712 * uap->xsecurity ACL to set (or delete)
6713 *
6714 * Returns: 0 Success
6715 * !0 errno value
6716 *
6717 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
6718 *
6719 * XXX: We should enummerate the possible errno values here, and where
6720 * in the code they originated.
6721 */
6722 int
6723 chmod_extended(__unused proc_t p, struct chmod_extended_args *uap, __unused int32_t *retval)
6724 {
6725 int error;
6726 struct vnode_attr va;
6727 kauth_filesec_t xsecdst;
6728
6729 AUDIT_ARG(owner, uap->uid, uap->gid);
6730
6731 VATTR_INIT(&va);
6732 if (uap->mode != -1) {
6733 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
6734 }
6735 if (uap->uid != KAUTH_UID_NONE) {
6736 VATTR_SET(&va, va_uid, uap->uid);
6737 }
6738 if (uap->gid != KAUTH_GID_NONE) {
6739 VATTR_SET(&va, va_gid, uap->gid);
6740 }
6741
6742 xsecdst = NULL;
6743 switch (uap->xsecurity) {
6744 /* explicit remove request */
6745 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
6746 VATTR_SET(&va, va_acl, NULL);
6747 break;
6748 /* not being set */
6749 case USER_ADDR_NULL:
6750 break;
6751 default:
6752 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0) {
6753 return error;
6754 }
6755 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
6756 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va.va_acl->acl_entrycount);
6757 }
6758
6759 error = chmodat(vfs_context_current(), uap->path, &va, AT_FDCWD, 0,
6760 UIO_USERSPACE);
6761
6762 if (xsecdst != NULL) {
6763 kauth_filesec_free(xsecdst);
6764 }
6765 return error;
6766 }
6767
6768 /*
6769 * Returns: 0 Success
6770 * chmodat:??? [anything chmodat can return]
6771 */
6772 static int
6773 fchmodat_internal(vfs_context_t ctx, user_addr_t path, int mode, int fd,
6774 int flag, enum uio_seg segflg)
6775 {
6776 struct vnode_attr va;
6777
6778 VATTR_INIT(&va);
6779 VATTR_SET(&va, va_mode, mode & ALLPERMS);
6780
6781 return chmodat(ctx, path, &va, fd, flag, segflg);
6782 }
6783
6784 int
6785 chmod(__unused proc_t p, struct chmod_args *uap, __unused int32_t *retval)
6786 {
6787 return fchmodat_internal(vfs_context_current(), uap->path, uap->mode,
6788 AT_FDCWD, 0, UIO_USERSPACE);
6789 }
6790
6791 int
6792 fchmodat(__unused proc_t p, struct fchmodat_args *uap, __unused int32_t *retval)
6793 {
6794 if (uap->flag & ~AT_SYMLINK_NOFOLLOW) {
6795 return EINVAL;
6796 }
6797
6798 return fchmodat_internal(vfs_context_current(), uap->path, uap->mode,
6799 uap->fd, uap->flag, UIO_USERSPACE);
6800 }
6801
6802 /*
6803 * Change mode of a file given a file descriptor.
6804 */
6805 static int
6806 fchmod1(__unused proc_t p, int fd, struct vnode_attr *vap)
6807 {
6808 vnode_t vp;
6809 int error;
6810
6811 AUDIT_ARG(fd, fd);
6812
6813 if ((error = file_vnode(fd, &vp)) != 0) {
6814 return error;
6815 }
6816 if ((error = vnode_getwithref(vp)) != 0) {
6817 file_drop(fd);
6818 return error;
6819 }
6820 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6821
6822 error = chmod_vnode(vfs_context_current(), vp, vap);
6823 (void)vnode_put(vp);
6824 file_drop(fd);
6825
6826 return error;
6827 }
6828
6829 /*
6830 * fchmod_extended: Change mode of a file given a file descriptor; with
6831 * extended argument list (including extended security (ACL)).
6832 *
6833 * Parameters: p Process requesting to change file mode
6834 * uap User argument descriptor (see below)
6835 * retval (ignored)
6836 *
6837 * Indirect: uap->mode File mode to set (same as 'chmod')
6838 * uap->uid UID to set
6839 * uap->gid GID to set
6840 * uap->xsecurity ACL to set (or delete)
6841 * uap->fd File descriptor of file to change mode
6842 *
6843 * Returns: 0 Success
6844 * !0 errno value
6845 *
6846 */
6847 int
6848 fchmod_extended(proc_t p, struct fchmod_extended_args *uap, __unused int32_t *retval)
6849 {
6850 int error;
6851 struct vnode_attr va;
6852 kauth_filesec_t xsecdst;
6853
6854 AUDIT_ARG(owner, uap->uid, uap->gid);
6855
6856 VATTR_INIT(&va);
6857 if (uap->mode != -1) {
6858 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
6859 }
6860 if (uap->uid != KAUTH_UID_NONE) {
6861 VATTR_SET(&va, va_uid, uap->uid);
6862 }
6863 if (uap->gid != KAUTH_GID_NONE) {
6864 VATTR_SET(&va, va_gid, uap->gid);
6865 }
6866
6867 xsecdst = NULL;
6868 switch (uap->xsecurity) {
6869 case USER_ADDR_NULL:
6870 VATTR_SET(&va, va_acl, NULL);
6871 break;
6872 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
6873 VATTR_SET(&va, va_acl, NULL);
6874 break;
6875 /* not being set */
6876 case CAST_USER_ADDR_T(-1):
6877 break;
6878 default:
6879 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0) {
6880 return error;
6881 }
6882 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
6883 }
6884
6885 error = fchmod1(p, uap->fd, &va);
6886
6887
6888 switch (uap->xsecurity) {
6889 case USER_ADDR_NULL:
6890 case CAST_USER_ADDR_T(-1):
6891 break;
6892 default:
6893 if (xsecdst != NULL) {
6894 kauth_filesec_free(xsecdst);
6895 }
6896 }
6897 return error;
6898 }
6899
6900 int
6901 fchmod(proc_t p, struct fchmod_args *uap, __unused int32_t *retval)
6902 {
6903 struct vnode_attr va;
6904
6905 VATTR_INIT(&va);
6906 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
6907
6908 return fchmod1(p, uap->fd, &va);
6909 }
6910
6911
6912 /*
6913 * Set ownership given a path name.
6914 */
6915 /* ARGSUSED */
6916 static int
6917 fchownat_internal(vfs_context_t ctx, int fd, user_addr_t path, uid_t uid,
6918 gid_t gid, int flag, enum uio_seg segflg)
6919 {
6920 vnode_t vp;
6921 struct vnode_attr va;
6922 int error;
6923 struct nameidata nd;
6924 int follow;
6925 kauth_action_t action;
6926
6927 AUDIT_ARG(owner, uid, gid);
6928
6929 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
6930 NDINIT(&nd, LOOKUP, OP_SETATTR, follow | AUDITVNPATH1, segflg,
6931 path, ctx);
6932 error = nameiat(&nd, fd);
6933 if (error) {
6934 return error;
6935 }
6936 vp = nd.ni_vp;
6937
6938 nameidone(&nd);
6939
6940 VATTR_INIT(&va);
6941 if (uid != (uid_t)VNOVAL) {
6942 VATTR_SET(&va, va_uid, uid);
6943 }
6944 if (gid != (gid_t)VNOVAL) {
6945 VATTR_SET(&va, va_gid, gid);
6946 }
6947
6948 #if CONFIG_MACF
6949 error = mac_vnode_check_setowner(ctx, vp, uid, gid);
6950 if (error) {
6951 goto out;
6952 }
6953 #endif
6954
6955 /* preflight and authorize attribute changes */
6956 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
6957 goto out;
6958 }
6959 if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
6960 goto out;
6961 }
6962 error = vnode_setattr(vp, &va, ctx);
6963
6964 #if CONFIG_MACF
6965 if (error == 0) {
6966 mac_vnode_notify_setowner(ctx, vp, uid, gid);
6967 }
6968 #endif
6969
6970 out:
6971 /*
6972 * EACCES is only allowed from namei(); permissions failure should
6973 * return EPERM, so we need to translate the error code.
6974 */
6975 if (error == EACCES) {
6976 error = EPERM;
6977 }
6978
6979 vnode_put(vp);
6980 return error;
6981 }
6982
6983 int
6984 chown(__unused proc_t p, struct chown_args *uap, __unused int32_t *retval)
6985 {
6986 return fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path,
6987 uap->uid, uap->gid, 0, UIO_USERSPACE);
6988 }
6989
6990 int
6991 lchown(__unused proc_t p, struct lchown_args *uap, __unused int32_t *retval)
6992 {
6993 return fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path,
6994 uap->owner, uap->group, AT_SYMLINK_NOFOLLOW, UIO_USERSPACE);
6995 }
6996
6997 int
6998 fchownat(__unused proc_t p, struct fchownat_args *uap, __unused int32_t *retval)
6999 {
7000 if (uap->flag & ~AT_SYMLINK_NOFOLLOW) {
7001 return EINVAL;
7002 }
7003
7004 return fchownat_internal(vfs_context_current(), uap->fd, uap->path,
7005 uap->uid, uap->gid, uap->flag, UIO_USERSPACE);
7006 }
7007
7008 /*
7009 * Set ownership given a file descriptor.
7010 */
7011 /* ARGSUSED */
7012 int
7013 fchown(__unused proc_t p, struct fchown_args *uap, __unused int32_t *retval)
7014 {
7015 struct vnode_attr va;
7016 vfs_context_t ctx = vfs_context_current();
7017 vnode_t vp;
7018 int error;
7019 kauth_action_t action;
7020
7021 AUDIT_ARG(owner, uap->uid, uap->gid);
7022 AUDIT_ARG(fd, uap->fd);
7023
7024 if ((error = file_vnode(uap->fd, &vp))) {
7025 return error;
7026 }
7027
7028 if ((error = vnode_getwithref(vp))) {
7029 file_drop(uap->fd);
7030 return error;
7031 }
7032 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
7033
7034 VATTR_INIT(&va);
7035 if (uap->uid != VNOVAL) {
7036 VATTR_SET(&va, va_uid, uap->uid);
7037 }
7038 if (uap->gid != VNOVAL) {
7039 VATTR_SET(&va, va_gid, uap->gid);
7040 }
7041
7042 #if NAMEDSTREAMS
7043 /* chown calls are not allowed for resource forks. */
7044 if (vp->v_flag & VISNAMEDSTREAM) {
7045 error = EPERM;
7046 goto out;
7047 }
7048 #endif
7049
7050 #if CONFIG_MACF
7051 error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
7052 if (error) {
7053 goto out;
7054 }
7055 #endif
7056
7057 /* preflight and authorize attribute changes */
7058 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
7059 goto out;
7060 }
7061 if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
7062 if (error == EACCES) {
7063 error = EPERM;
7064 }
7065 goto out;
7066 }
7067 error = vnode_setattr(vp, &va, ctx);
7068
7069 #if CONFIG_MACF
7070 if (error == 0) {
7071 mac_vnode_notify_setowner(ctx, vp, uap->uid, uap->gid);
7072 }
7073 #endif
7074
7075 out:
7076 (void)vnode_put(vp);
7077 file_drop(uap->fd);
7078 return error;
7079 }
7080
7081 static int
7082 getutimes(user_addr_t usrtvp, struct timespec *tsp)
7083 {
7084 int error;
7085
7086 if (usrtvp == USER_ADDR_NULL) {
7087 struct timeval old_tv;
7088 /* XXX Y2038 bug because of microtime argument */
7089 microtime(&old_tv);
7090 TIMEVAL_TO_TIMESPEC(&old_tv, &tsp[0]);
7091 tsp[1] = tsp[0];
7092 } else {
7093 if (IS_64BIT_PROCESS(current_proc())) {
7094 struct user64_timeval tv[2];
7095 error = copyin(usrtvp, (void *)tv, sizeof(tv));
7096 if (error) {
7097 return error;
7098 }
7099 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
7100 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
7101 } else {
7102 struct user32_timeval tv[2];
7103 error = copyin(usrtvp, (void *)tv, sizeof(tv));
7104 if (error) {
7105 return error;
7106 }
7107 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
7108 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
7109 }
7110 }
7111 return 0;
7112 }
7113
7114 static int
7115 setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts,
7116 int nullflag)
7117 {
7118 int error;
7119 struct vnode_attr va;
7120 kauth_action_t action;
7121
7122 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
7123
7124 VATTR_INIT(&va);
7125 VATTR_SET(&va, va_access_time, ts[0]);
7126 VATTR_SET(&va, va_modify_time, ts[1]);
7127 if (nullflag) {
7128 va.va_vaflags |= VA_UTIMES_NULL;
7129 }
7130
7131 #if NAMEDSTREAMS
7132 /* utimes calls are not allowed for resource forks. */
7133 if (vp->v_flag & VISNAMEDSTREAM) {
7134 error = EPERM;
7135 goto out;
7136 }
7137 #endif
7138
7139 #if CONFIG_MACF
7140 error = mac_vnode_check_setutimes(ctx, vp, ts[0], ts[1]);
7141 if (error) {
7142 goto out;
7143 }
7144 #endif
7145 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
7146 if (!nullflag && error == EACCES) {
7147 error = EPERM;
7148 }
7149 goto out;
7150 }
7151
7152 /* since we may not need to auth anything, check here */
7153 if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
7154 if (!nullflag && error == EACCES) {
7155 error = EPERM;
7156 }
7157 goto out;
7158 }
7159 error = vnode_setattr(vp, &va, ctx);
7160
7161 #if CONFIG_MACF
7162 if (error == 0) {
7163 mac_vnode_notify_setutimes(ctx, vp, ts[0], ts[1]);
7164 }
7165 #endif
7166
7167 out:
7168 return error;
7169 }
7170
7171 /*
7172 * Set the access and modification times of a file.
7173 */
7174 /* ARGSUSED */
7175 int
7176 utimes(__unused proc_t p, struct utimes_args *uap, __unused int32_t *retval)
7177 {
7178 struct timespec ts[2];
7179 user_addr_t usrtvp;
7180 int error;
7181 struct nameidata nd;
7182 vfs_context_t ctx = vfs_context_current();
7183
7184 /*
7185 * AUDIT: Needed to change the order of operations to do the
7186 * name lookup first because auditing wants the path.
7187 */
7188 NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
7189 UIO_USERSPACE, uap->path, ctx);
7190 error = namei(&nd);
7191 if (error) {
7192 return error;
7193 }
7194 nameidone(&nd);
7195
7196 /*
7197 * Fetch the user-supplied time. If usrtvp is USER_ADDR_NULL, we fetch
7198 * the current time instead.
7199 */
7200 usrtvp = uap->tptr;
7201 if ((error = getutimes(usrtvp, ts)) != 0) {
7202 goto out;
7203 }
7204
7205 error = setutimes(ctx, nd.ni_vp, ts, usrtvp == USER_ADDR_NULL);
7206
7207 out:
7208 vnode_put(nd.ni_vp);
7209 return error;
7210 }
7211
7212 /*
7213 * Set the access and modification times of a file.
7214 */
7215 /* ARGSUSED */
7216 int
7217 futimes(__unused proc_t p, struct futimes_args *uap, __unused int32_t *retval)
7218 {
7219 struct timespec ts[2];
7220 vnode_t vp;
7221 user_addr_t usrtvp;
7222 int error;
7223
7224 AUDIT_ARG(fd, uap->fd);
7225 usrtvp = uap->tptr;
7226 if ((error = getutimes(usrtvp, ts)) != 0) {
7227 return error;
7228 }
7229 if ((error = file_vnode(uap->fd, &vp)) != 0) {
7230 return error;
7231 }
7232 if ((error = vnode_getwithref(vp))) {
7233 file_drop(uap->fd);
7234 return error;
7235 }
7236
7237 error = setutimes(vfs_context_current(), vp, ts, usrtvp == 0);
7238 vnode_put(vp);
7239 file_drop(uap->fd);
7240 return error;
7241 }
7242
7243 /*
7244 * Truncate a file given its path name.
7245 */
7246 /* ARGSUSED */
7247 int
7248 truncate(__unused proc_t p, struct truncate_args *uap, __unused int32_t *retval)
7249 {
7250 vnode_t vp;
7251 struct vnode_attr va;
7252 vfs_context_t ctx = vfs_context_current();
7253 int error;
7254 struct nameidata nd;
7255 kauth_action_t action;
7256
7257 if (uap->length < 0) {
7258 return EINVAL;
7259 }
7260 NDINIT(&nd, LOOKUP, OP_TRUNCATE, FOLLOW | AUDITVNPATH1,
7261 UIO_USERSPACE, uap->path, ctx);
7262 if ((error = namei(&nd))) {
7263 return error;
7264 }
7265 vp = nd.ni_vp;
7266
7267 nameidone(&nd);
7268
7269 VATTR_INIT(&va);
7270 VATTR_SET(&va, va_data_size, uap->length);
7271
7272 #if CONFIG_MACF
7273 error = mac_vnode_check_truncate(ctx, NOCRED, vp);
7274 if (error) {
7275 goto out;
7276 }
7277 #endif
7278
7279 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
7280 goto out;
7281 }
7282 if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
7283 goto out;
7284 }
7285 error = vnode_setattr(vp, &va, ctx);
7286
7287 #if CONFIG_MACF
7288 if (error == 0) {
7289 mac_vnode_notify_truncate(ctx, NOCRED, vp);
7290 }
7291 #endif
7292
7293 out:
7294 vnode_put(vp);
7295 return error;
7296 }
7297
7298 /*
7299 * Truncate a file given a file descriptor.
7300 */
7301 /* ARGSUSED */
7302 int
7303 ftruncate(proc_t p, struct ftruncate_args *uap, int32_t *retval)
7304 {
7305 vfs_context_t ctx = vfs_context_current();
7306 struct vnode_attr va;
7307 vnode_t vp;
7308 struct fileproc *fp;
7309 int error;
7310 int fd = uap->fd;
7311
7312 AUDIT_ARG(fd, uap->fd);
7313 if (uap->length < 0) {
7314 return EINVAL;
7315 }
7316
7317 if ((error = fp_lookup(p, fd, &fp, 0))) {
7318 return error;
7319 }
7320
7321 switch (FILEGLOB_DTYPE(fp->f_fglob)) {
7322 case DTYPE_PSXSHM:
7323 error = pshm_truncate(p, fp, uap->fd, uap->length, retval);
7324 goto out;
7325 case DTYPE_VNODE:
7326 break;
7327 default:
7328 error = EINVAL;
7329 goto out;
7330 }
7331
7332 vp = (vnode_t)fp->f_fglob->fg_data;
7333
7334 if ((fp->f_fglob->fg_flag & FWRITE) == 0) {
7335 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
7336 error = EINVAL;
7337 goto out;
7338 }
7339
7340 if ((error = vnode_getwithref(vp)) != 0) {
7341 goto out;
7342 }
7343
7344 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
7345
7346 #if CONFIG_MACF
7347 error = mac_vnode_check_truncate(ctx,
7348 fp->f_fglob->fg_cred, vp);
7349 if (error) {
7350 (void)vnode_put(vp);
7351 goto out;
7352 }
7353 #endif
7354 VATTR_INIT(&va);
7355 VATTR_SET(&va, va_data_size, uap->length);
7356 error = vnode_setattr(vp, &va, ctx);
7357
7358 #if CONFIG_MACF
7359 if (error == 0) {
7360 mac_vnode_notify_truncate(ctx, fp->f_fglob->fg_cred, vp);
7361 }
7362 #endif
7363
7364 (void)vnode_put(vp);
7365 out:
7366 file_drop(fd);
7367 return error;
7368 }
7369
7370
7371 /*
7372 * Sync an open file with synchronized I/O _file_ integrity completion
7373 */
7374 /* ARGSUSED */
7375 int
7376 fsync(proc_t p, struct fsync_args *uap, __unused int32_t *retval)
7377 {
7378 __pthread_testcancel(1);
7379 return fsync_common(p, uap, MNT_WAIT);
7380 }
7381
7382
7383 /*
7384 * Sync an open file with synchronized I/O _file_ integrity completion
7385 *
7386 * Notes: This is a legacy support function that does not test for
7387 * thread cancellation points.
7388 */
7389 /* ARGSUSED */
7390 int
7391 fsync_nocancel(proc_t p, struct fsync_nocancel_args *uap, __unused int32_t *retval)
7392 {
7393 return fsync_common(p, (struct fsync_args *)uap, MNT_WAIT);
7394 }
7395
7396
7397 /*
7398 * Sync an open file with synchronized I/O _data_ integrity completion
7399 */
7400 /* ARGSUSED */
7401 int
7402 fdatasync(proc_t p, struct fdatasync_args *uap, __unused int32_t *retval)
7403 {
7404 __pthread_testcancel(1);
7405 return fsync_common(p, (struct fsync_args *)uap, MNT_DWAIT);
7406 }
7407
7408
7409 /*
7410 * fsync_common
7411 *
7412 * Common fsync code to support both synchronized I/O file integrity completion
7413 * (normal fsync) and synchronized I/O data integrity completion (fdatasync).
7414 *
7415 * If 'flags' is MNT_DWAIT, the caller is requesting data integrity, which
7416 * will only guarantee that the file data contents are retrievable. If
7417 * 'flags' is MNT_WAIT, the caller is rewuesting file integrity, which also
7418 * includes additional metadata unnecessary for retrieving the file data
7419 * contents, such as atime, mtime, ctime, etc., also be committed to stable
7420 * storage.
7421 *
7422 * Parameters: p The process
7423 * uap->fd The descriptor to synchronize
7424 * flags The data integrity flags
7425 *
7426 * Returns: int Success
7427 * fp_getfvp:EBADF Bad file descriptor
7428 * fp_getfvp:ENOTSUP fd does not refer to a vnode
7429 * VNOP_FSYNC:??? unspecified
7430 *
7431 * Notes: We use struct fsync_args because it is a short name, and all
7432 * caller argument structures are otherwise identical.
7433 */
7434 static int
7435 fsync_common(proc_t p, struct fsync_args *uap, int flags)
7436 {
7437 vnode_t vp;
7438 struct fileproc *fp;
7439 vfs_context_t ctx = vfs_context_current();
7440 int error;
7441
7442 AUDIT_ARG(fd, uap->fd);
7443
7444 if ((error = fp_getfvp(p, uap->fd, &fp, &vp))) {
7445 return error;
7446 }
7447 if ((error = vnode_getwithref(vp))) {
7448 file_drop(uap->fd);
7449 return error;
7450 }
7451
7452 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
7453
7454 error = VNOP_FSYNC(vp, flags, ctx);
7455
7456 #if NAMEDRSRCFORK
7457 /* Sync resource fork shadow file if necessary. */
7458 if ((error == 0) &&
7459 (vp->v_flag & VISNAMEDSTREAM) &&
7460 (vp->v_parent != NULLVP) &&
7461 vnode_isshadow(vp) &&
7462 (fp->f_flags & FP_WRITTEN)) {
7463 (void) vnode_flushnamedstream(vp->v_parent, vp, ctx);
7464 }
7465 #endif
7466
7467 (void)vnode_put(vp);
7468 file_drop(uap->fd);
7469 return error;
7470 }
7471
7472 /*
7473 * Duplicate files. Source must be a file, target must be a file or
7474 * must not exist.
7475 *
7476 * XXX Copyfile authorisation checking is woefully inadequate, and will not
7477 * perform inheritance correctly.
7478 */
7479 /* ARGSUSED */
7480 int
7481 copyfile(__unused proc_t p, struct copyfile_args *uap, __unused int32_t *retval)
7482 {
7483 vnode_t tvp, fvp, tdvp, sdvp;
7484 struct nameidata fromnd, tond;
7485 int error;
7486 vfs_context_t ctx = vfs_context_current();
7487 #if CONFIG_MACF
7488 struct filedesc *fdp = (vfs_context_proc(ctx))->p_fd;
7489 struct vnode_attr va;
7490 #endif
7491
7492 /* Check that the flags are valid. */
7493
7494 if (uap->flags & ~CPF_MASK) {
7495 return EINVAL;
7496 }
7497
7498 NDINIT(&fromnd, LOOKUP, OP_COPYFILE, AUDITVNPATH1,
7499 UIO_USERSPACE, uap->from, ctx);
7500 if ((error = namei(&fromnd))) {
7501 return error;
7502 }
7503 fvp = fromnd.ni_vp;
7504
7505 NDINIT(&tond, CREATE, OP_LINK,
7506 LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNPATH2 | CN_NBMOUNTLOOK,
7507 UIO_USERSPACE, uap->to, ctx);
7508 if ((error = namei(&tond))) {
7509 goto out1;
7510 }
7511 tdvp = tond.ni_dvp;
7512 tvp = tond.ni_vp;
7513
7514 if (tvp != NULL) {
7515 if (!(uap->flags & CPF_OVERWRITE)) {
7516 error = EEXIST;
7517 goto out;
7518 }
7519 }
7520
7521 if (fvp->v_type == VDIR || (tvp && tvp->v_type == VDIR)) {
7522 error = EISDIR;
7523 goto out;
7524 }
7525
7526 /* This calls existing MAC hooks for open */
7527 if ((error = vn_authorize_open_existing(fvp, &fromnd.ni_cnd, FREAD, ctx,
7528 NULL))) {
7529 goto out;
7530 }
7531
7532 if (tvp) {
7533 /*
7534 * See unlinkat_internal for an explanation of the potential
7535 * ENOENT from the MAC hook but the gist is that the MAC hook
7536 * can fail because vn_getpath isn't able to return the full
7537 * path. We choose to ignore this failure.
7538 */
7539 error = vn_authorize_unlink(tdvp, tvp, &tond.ni_cnd, ctx, NULL);
7540 if (error && error != ENOENT) {
7541 goto out;
7542 }
7543 error = 0;
7544 }
7545
7546 #if CONFIG_MACF
7547 VATTR_INIT(&va);
7548 VATTR_SET(&va, va_type, fvp->v_type);
7549 /* Mask off all but regular access permissions */
7550 VATTR_SET(&va, va_mode,
7551 ((((uap->mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT) & ACCESSPERMS));
7552 error = mac_vnode_check_create(ctx, tdvp, &tond.ni_cnd, &va);
7553 if (error) {
7554 goto out;
7555 }
7556 #endif /* CONFIG_MACF */
7557
7558 if ((error = vnode_authorize(tdvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0) {
7559 goto out;
7560 }
7561
7562 if (fvp == tdvp) {
7563 error = EINVAL;
7564 }
7565 /*
7566 * If source is the same as the destination (that is the
7567 * same inode number) then there is nothing to do.
7568 * (fixed to have POSIX semantics - CSM 3/2/98)
7569 */
7570 if (fvp == tvp) {
7571 error = -1;
7572 }
7573 if (!error) {
7574 error = VNOP_COPYFILE(fvp, tdvp, tvp, &tond.ni_cnd, uap->mode, uap->flags, ctx);
7575 }
7576 out:
7577 sdvp = tond.ni_startdir;
7578 /*
7579 * nameidone has to happen before we vnode_put(tdvp)
7580 * since it may need to release the fs_nodelock on the tdvp
7581 */
7582 nameidone(&tond);
7583
7584 if (tvp) {
7585 vnode_put(tvp);
7586 }
7587 vnode_put(tdvp);
7588 vnode_put(sdvp);
7589 out1:
7590 vnode_put(fvp);
7591
7592 nameidone(&fromnd);
7593
7594 if (error == -1) {
7595 return 0;
7596 }
7597 return error;
7598 }
7599
7600 #define CLONE_SNAPSHOT_FALLBACKS_ENABLED 1
7601
7602 /*
7603 * Helper function for doing clones. The caller is expected to provide an
7604 * iocounted source vnode and release it.
7605 */
7606 static int
7607 clonefile_internal(vnode_t fvp, boolean_t data_read_authorised, int dst_dirfd,
7608 user_addr_t dst, uint32_t flags, vfs_context_t ctx)
7609 {
7610 vnode_t tvp, tdvp;
7611 struct nameidata tond;
7612 int error;
7613 int follow;
7614 boolean_t free_src_acl;
7615 boolean_t attr_cleanup;
7616 enum vtype v_type;
7617 kauth_action_t action;
7618 struct componentname *cnp;
7619 uint32_t defaulted;
7620 struct vnode_attr va;
7621 struct vnode_attr nva;
7622 uint32_t vnop_flags;
7623
7624 v_type = vnode_vtype(fvp);
7625 switch (v_type) {
7626 case VLNK:
7627 /* FALLTHRU */
7628 case VREG:
7629 action = KAUTH_VNODE_ADD_FILE;
7630 break;
7631 case VDIR:
7632 if (vnode_isvroot(fvp) || vnode_ismount(fvp) ||
7633 fvp->v_mountedhere) {
7634 return EINVAL;
7635 }
7636 action = KAUTH_VNODE_ADD_SUBDIRECTORY;
7637 break;
7638 default:
7639 return EINVAL;
7640 }
7641
7642 AUDIT_ARG(fd2, dst_dirfd);
7643 AUDIT_ARG(value32, flags);
7644
7645 follow = (flags & CLONE_NOFOLLOW) ? NOFOLLOW : FOLLOW;
7646 NDINIT(&tond, CREATE, OP_LINK, follow | WANTPARENT | AUDITVNPATH2,
7647 UIO_USERSPACE, dst, ctx);
7648 if ((error = nameiat(&tond, dst_dirfd))) {
7649 return error;
7650 }
7651 cnp = &tond.ni_cnd;
7652 tdvp = tond.ni_dvp;
7653 tvp = tond.ni_vp;
7654
7655 free_src_acl = FALSE;
7656 attr_cleanup = FALSE;
7657
7658 if (tvp != NULL) {
7659 error = EEXIST;
7660 goto out;
7661 }
7662
7663 if (vnode_mount(tdvp) != vnode_mount(fvp)) {
7664 error = EXDEV;
7665 goto out;
7666 }
7667
7668 #if CONFIG_MACF
7669 if ((error = mac_vnode_check_clone(ctx, tdvp, fvp, cnp))) {
7670 goto out;
7671 }
7672 #endif
7673 if ((error = vnode_authorize(tdvp, NULL, action, ctx))) {
7674 goto out;
7675 }
7676
7677 action = KAUTH_VNODE_GENERIC_READ_BITS;
7678 if (data_read_authorised) {
7679 action &= ~KAUTH_VNODE_READ_DATA;
7680 }
7681 if ((error = vnode_authorize(fvp, NULL, action, ctx))) {
7682 goto out;
7683 }
7684
7685 /*
7686 * certain attributes may need to be changed from the source, we ask for
7687 * those here.
7688 */
7689 VATTR_INIT(&va);
7690 VATTR_WANTED(&va, va_uid);
7691 VATTR_WANTED(&va, va_gid);
7692 VATTR_WANTED(&va, va_mode);
7693 VATTR_WANTED(&va, va_flags);
7694 VATTR_WANTED(&va, va_acl);
7695
7696 if ((error = vnode_getattr(fvp, &va, ctx)) != 0) {
7697 goto out;
7698 }
7699
7700 VATTR_INIT(&nva);
7701 VATTR_SET(&nva, va_type, v_type);
7702 if (VATTR_IS_SUPPORTED(&va, va_acl) && va.va_acl != NULL) {
7703 VATTR_SET(&nva, va_acl, va.va_acl);
7704 free_src_acl = TRUE;
7705 }
7706
7707 /* Handle ACL inheritance, initialize vap. */
7708 if (v_type == VLNK) {
7709 error = vnode_authattr_new(tdvp, &nva, 0, ctx);
7710 } else {
7711 error = vn_attribute_prepare(tdvp, &nva, &defaulted, ctx);
7712 if (error) {
7713 goto out;
7714 }
7715 attr_cleanup = TRUE;
7716 }
7717
7718 vnop_flags = VNODE_CLONEFILE_DEFAULT;
7719 /*
7720 * We've got initial values for all security parameters,
7721 * If we are superuser, then we can change owners to be the
7722 * same as the source. Both superuser and the owner have default
7723 * WRITE_SECURITY privileges so all other fields can be taken
7724 * from source as well.
7725 */
7726 if (!(flags & CLONE_NOOWNERCOPY) && vfs_context_issuser(ctx)) {
7727 if (VATTR_IS_SUPPORTED(&va, va_uid)) {
7728 VATTR_SET(&nva, va_uid, va.va_uid);
7729 }
7730 if (VATTR_IS_SUPPORTED(&va, va_gid)) {
7731 VATTR_SET(&nva, va_gid, va.va_gid);
7732 }
7733 } else {
7734 vnop_flags |= VNODE_CLONEFILE_NOOWNERCOPY;
7735 }
7736
7737 if (VATTR_IS_SUPPORTED(&va, va_mode)) {
7738 VATTR_SET(&nva, va_mode, va.va_mode);
7739 }
7740 if (VATTR_IS_SUPPORTED(&va, va_flags)) {
7741 VATTR_SET(&nva, va_flags,
7742 ((va.va_flags & ~(UF_DATAVAULT | SF_RESTRICTED)) | /* Turn off from source */
7743 (nva.va_flags & (UF_DATAVAULT | SF_RESTRICTED))));
7744 }
7745
7746 error = VNOP_CLONEFILE(fvp, tdvp, &tvp, cnp, &nva, vnop_flags, ctx);
7747
7748 if (!error && tvp) {
7749 int update_flags = 0;
7750 #if CONFIG_FSE
7751 int fsevent;
7752 #endif /* CONFIG_FSE */
7753
7754 #if CONFIG_MACF
7755 (void)vnode_label(vnode_mount(tvp), tdvp, tvp, cnp,
7756 VNODE_LABEL_CREATE, ctx);
7757 #endif
7758 /*
7759 * If some of the requested attributes weren't handled by the
7760 * VNOP, use our fallback code.
7761 */
7762 if (!VATTR_ALL_SUPPORTED(&va)) {
7763 (void)vnode_setattr_fallback(tvp, &nva, ctx);
7764 }
7765
7766 // Make sure the name & parent pointers are hooked up
7767 if (tvp->v_name == NULL) {
7768 update_flags |= VNODE_UPDATE_NAME;
7769 }
7770 if (tvp->v_parent == NULLVP) {
7771 update_flags |= VNODE_UPDATE_PARENT;
7772 }
7773
7774 if (update_flags) {
7775 (void)vnode_update_identity(tvp, tdvp, cnp->cn_nameptr,
7776 cnp->cn_namelen, cnp->cn_hash, update_flags);
7777 }
7778
7779 #if CONFIG_FSE
7780 switch (vnode_vtype(tvp)) {
7781 case VLNK:
7782 /* FALLTHRU */
7783 case VREG:
7784 fsevent = FSE_CREATE_FILE;
7785 break;
7786 case VDIR:
7787 fsevent = FSE_CREATE_DIR;
7788 break;
7789 default:
7790 goto out;
7791 }
7792
7793 if (need_fsevent(fsevent, tvp)) {
7794 /*
7795 * The following is a sequence of three explicit events.
7796 * A pair of FSE_CLONE events representing the source and destination
7797 * followed by an FSE_CREATE_[FILE | DIR] for the destination.
7798 * fseventsd may coalesce the destination clone and create events
7799 * into a single event resulting in the following sequence for a client
7800 * FSE_CLONE (src)
7801 * FSE_CLONE | FSE_CREATE (dst)
7802 */
7803 add_fsevent(FSE_CLONE, ctx, FSE_ARG_VNODE, fvp, FSE_ARG_VNODE, tvp,
7804 FSE_ARG_DONE);
7805 add_fsevent(fsevent, ctx, FSE_ARG_VNODE, tvp,
7806 FSE_ARG_DONE);
7807 }
7808 #endif /* CONFIG_FSE */
7809 }
7810
7811 out:
7812 if (attr_cleanup) {
7813 vn_attribute_cleanup(&nva, defaulted);
7814 }
7815 if (free_src_acl && va.va_acl) {
7816 kauth_acl_free(va.va_acl);
7817 }
7818 nameidone(&tond);
7819 if (tvp) {
7820 vnode_put(tvp);
7821 }
7822 vnode_put(tdvp);
7823 return error;
7824 }
7825
7826 /*
7827 * clone files or directories, target must not exist.
7828 */
7829 /* ARGSUSED */
7830 int
7831 clonefileat(__unused proc_t p, struct clonefileat_args *uap,
7832 __unused int32_t *retval)
7833 {
7834 vnode_t fvp;
7835 struct nameidata fromnd;
7836 int follow;
7837 int error;
7838 vfs_context_t ctx = vfs_context_current();
7839
7840 /* Check that the flags are valid. */
7841 if (uap->flags & ~(CLONE_NOFOLLOW | CLONE_NOOWNERCOPY)) {
7842 return EINVAL;
7843 }
7844
7845 AUDIT_ARG(fd, uap->src_dirfd);
7846
7847 follow = (uap->flags & CLONE_NOFOLLOW) ? NOFOLLOW : FOLLOW;
7848 NDINIT(&fromnd, LOOKUP, OP_COPYFILE, follow | AUDITVNPATH1,
7849 UIO_USERSPACE, uap->src, ctx);
7850 if ((error = nameiat(&fromnd, uap->src_dirfd))) {
7851 return error;
7852 }
7853
7854 fvp = fromnd.ni_vp;
7855 nameidone(&fromnd);
7856
7857 error = clonefile_internal(fvp, FALSE, uap->dst_dirfd, uap->dst,
7858 uap->flags, ctx);
7859
7860 vnode_put(fvp);
7861 return error;
7862 }
7863
7864 int
7865 fclonefileat(__unused proc_t p, struct fclonefileat_args *uap,
7866 __unused int32_t *retval)
7867 {
7868 vnode_t fvp;
7869 struct fileproc *fp;
7870 int error;
7871 vfs_context_t ctx = vfs_context_current();
7872
7873 /* Check that the flags are valid. */
7874 if (uap->flags & ~(CLONE_NOFOLLOW | CLONE_NOOWNERCOPY)) {
7875 return EINVAL;
7876 }
7877
7878 AUDIT_ARG(fd, uap->src_fd);
7879 error = fp_getfvp(p, uap->src_fd, &fp, &fvp);
7880 if (error) {
7881 return error;
7882 }
7883
7884 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
7885 AUDIT_ARG(vnpath_withref, fvp, ARG_VNODE1);
7886 error = EBADF;
7887 goto out;
7888 }
7889
7890 if ((error = vnode_getwithref(fvp))) {
7891 goto out;
7892 }
7893
7894 AUDIT_ARG(vnpath, fvp, ARG_VNODE1);
7895
7896 error = clonefile_internal(fvp, TRUE, uap->dst_dirfd, uap->dst,
7897 uap->flags, ctx);
7898
7899 vnode_put(fvp);
7900 out:
7901 file_drop(uap->src_fd);
7902 return error;
7903 }
7904
7905 static int
7906 rename_submounts_callback(mount_t mp, void *arg)
7907 {
7908 int error = 0;
7909 mount_t pmp = (mount_t)arg;
7910 int prefix_len = strlen(pmp->mnt_vfsstat.f_mntonname);
7911
7912 if (strncmp(mp->mnt_vfsstat.f_mntonname, pmp->mnt_vfsstat.f_mntonname, prefix_len) != 0) {
7913 return 0;
7914 }
7915
7916 if (mp->mnt_vfsstat.f_mntonname[prefix_len] != '/') {
7917 return 0;
7918 }
7919
7920 if ((error = vfs_busy(mp, LK_NOWAIT))) {
7921 printf("vfs_busy failed with %d for %s\n", error, mp->mnt_vfsstat.f_mntonname);
7922 return -1;
7923 }
7924
7925 int pathlen = MAXPATHLEN;
7926 if ((error = vn_getpath_ext(mp->mnt_vnodecovered, NULL, mp->mnt_vfsstat.f_mntonname, &pathlen, VN_GETPATH_FSENTER))) {
7927 printf("vn_getpath_ext failed with %d for mnt_vnodecovered of %s\n", error, mp->mnt_vfsstat.f_mntonname);
7928 }
7929
7930 vfs_unbusy(mp);
7931
7932 return error;
7933 }
7934
7935 /*
7936 * Rename files. Source and destination must either both be directories,
7937 * or both not be directories. If target is a directory, it must be empty.
7938 */
7939 /* ARGSUSED */
7940 static int
7941 renameat_internal(vfs_context_t ctx, int fromfd, user_addr_t from,
7942 int tofd, user_addr_t to, int segflg, vfs_rename_flags_t flags)
7943 {
7944 if (flags & ~VFS_RENAME_FLAGS_MASK) {
7945 return EINVAL;
7946 }
7947
7948 if (ISSET(flags, VFS_RENAME_SWAP) && ISSET(flags, VFS_RENAME_EXCL)) {
7949 return EINVAL;
7950 }
7951
7952 vnode_t tvp, tdvp;
7953 vnode_t fvp, fdvp;
7954 struct nameidata *fromnd, *tond;
7955 int error;
7956 int do_retry;
7957 int retry_count;
7958 int mntrename;
7959 int need_event;
7960 int need_kpath2;
7961 int has_listeners;
7962 const char *oname = NULL;
7963 char *from_name = NULL, *to_name = NULL;
7964 char *from_name_no_firmlink = NULL, *to_name_no_firmlink = NULL;
7965 int from_len = 0, to_len = 0;
7966 int from_len_no_firmlink = 0, to_len_no_firmlink = 0;
7967 int holding_mntlock;
7968 mount_t locked_mp = NULL;
7969 vnode_t oparent = NULLVP;
7970 #if CONFIG_FSE
7971 fse_info from_finfo, to_finfo;
7972 #endif
7973 int from_truncated = 0, to_truncated = 0;
7974 int from_truncated_no_firmlink = 0, to_truncated_no_firmlink = 0;
7975 int batched = 0;
7976 struct vnode_attr *fvap, *tvap;
7977 int continuing = 0;
7978 /* carving out a chunk for structs that are too big to be on stack. */
7979 struct {
7980 struct nameidata from_node, to_node;
7981 struct vnode_attr fv_attr, tv_attr;
7982 } * __rename_data;
7983 MALLOC(__rename_data, void *, sizeof(*__rename_data), M_TEMP, M_WAITOK);
7984 fromnd = &__rename_data->from_node;
7985 tond = &__rename_data->to_node;
7986
7987 holding_mntlock = 0;
7988 do_retry = 0;
7989 retry_count = 0;
7990 retry:
7991 fvp = tvp = NULL;
7992 fdvp = tdvp = NULL;
7993 fvap = tvap = NULL;
7994 mntrename = FALSE;
7995
7996 NDINIT(fromnd, DELETE, OP_UNLINK, WANTPARENT | AUDITVNPATH1,
7997 segflg, from, ctx);
7998 fromnd->ni_flag = NAMEI_COMPOUNDRENAME;
7999
8000 NDINIT(tond, RENAME, OP_RENAME, WANTPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK,
8001 segflg, to, ctx);
8002 tond->ni_flag = NAMEI_COMPOUNDRENAME;
8003
8004 continue_lookup:
8005 if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
8006 if ((error = nameiat(fromnd, fromfd))) {
8007 goto out1;
8008 }
8009 fdvp = fromnd->ni_dvp;
8010 fvp = fromnd->ni_vp;
8011
8012 if (fvp && fvp->v_type == VDIR) {
8013 tond->ni_cnd.cn_flags |= WILLBEDIR;
8014 }
8015 }
8016
8017 if ((tond->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
8018 if ((error = nameiat(tond, tofd))) {
8019 /*
8020 * Translate error code for rename("dir1", "dir2/.").
8021 */
8022 if (error == EISDIR && fvp->v_type == VDIR) {
8023 error = EINVAL;
8024 }
8025 goto out1;
8026 }
8027 tdvp = tond->ni_dvp;
8028 tvp = tond->ni_vp;
8029 }
8030
8031 #if DEVELOPMENT || DEBUG
8032 /*
8033 * XXX VSWAP: Check for entitlements or special flag here
8034 * so we can restrict access appropriately.
8035 */
8036 #else /* DEVELOPMENT || DEBUG */
8037
8038 if (fromnd->ni_vp && vnode_isswap(fromnd->ni_vp) && (ctx != vfs_context_kernel())) {
8039 error = EPERM;
8040 goto out1;
8041 }
8042
8043 if (tond->ni_vp && vnode_isswap(tond->ni_vp) && (ctx != vfs_context_kernel())) {
8044 error = EPERM;
8045 goto out1;
8046 }
8047 #endif /* DEVELOPMENT || DEBUG */
8048
8049 if (!tvp && ISSET(flags, VFS_RENAME_SWAP)) {
8050 error = ENOENT;
8051 goto out1;
8052 }
8053
8054 if (tvp && ISSET(flags, VFS_RENAME_EXCL)) {
8055 error = EEXIST;
8056 goto out1;
8057 }
8058
8059 batched = vnode_compound_rename_available(fdvp);
8060
8061 #if CONFIG_FSE
8062 need_event = need_fsevent(FSE_RENAME, fdvp);
8063 if (need_event) {
8064 if (fvp) {
8065 get_fse_info(fvp, &from_finfo, ctx);
8066 } else {
8067 error = vfs_get_notify_attributes(&__rename_data->fv_attr);
8068 if (error) {
8069 goto out1;
8070 }
8071
8072 fvap = &__rename_data->fv_attr;
8073 }
8074
8075 if (tvp) {
8076 get_fse_info(tvp, &to_finfo, ctx);
8077 } else if (batched) {
8078 error = vfs_get_notify_attributes(&__rename_data->tv_attr);
8079 if (error) {
8080 goto out1;
8081 }
8082
8083 tvap = &__rename_data->tv_attr;
8084 }
8085 }
8086 #else
8087 need_event = 0;
8088 #endif /* CONFIG_FSE */
8089
8090 has_listeners = kauth_authorize_fileop_has_listeners();
8091
8092 need_kpath2 = 0;
8093 #if CONFIG_AUDIT
8094 if (AUDIT_RECORD_EXISTS()) {
8095 need_kpath2 = 1;
8096 }
8097 #endif
8098
8099 if (need_event || has_listeners) {
8100 if (from_name == NULL) {
8101 GET_PATH(from_name);
8102 if (from_name == NULL) {
8103 error = ENOMEM;
8104 goto out1;
8105 }
8106 }
8107
8108 from_len = safe_getpath(fdvp, fromnd->ni_cnd.cn_nameptr, from_name, MAXPATHLEN, &from_truncated);
8109
8110 if (from_name_no_firmlink == NULL) {
8111 GET_PATH(from_name_no_firmlink);
8112 if (from_name_no_firmlink == NULL) {
8113 error = ENOMEM;
8114 goto out1;
8115 }
8116 }
8117
8118 from_len_no_firmlink = safe_getpath_no_firmlink(fdvp, fromnd->ni_cnd.cn_nameptr, from_name_no_firmlink, MAXPATHLEN, &from_truncated_no_firmlink);
8119 }
8120
8121 if (need_event || need_kpath2 || has_listeners) {
8122 if (to_name == NULL) {
8123 GET_PATH(to_name);
8124 if (to_name == NULL) {
8125 error = ENOMEM;
8126 goto out1;
8127 }
8128 }
8129
8130 to_len = safe_getpath(tdvp, tond->ni_cnd.cn_nameptr, to_name, MAXPATHLEN, &to_truncated);
8131
8132 if (to_name_no_firmlink == NULL) {
8133 GET_PATH(to_name_no_firmlink);
8134 if (to_name_no_firmlink == NULL) {
8135 error = ENOMEM;
8136 goto out1;
8137 }
8138 }
8139
8140 to_len_no_firmlink = safe_getpath_no_firmlink(tdvp, tond->ni_cnd.cn_nameptr, to_name_no_firmlink, MAXPATHLEN, &to_truncated_no_firmlink);
8141 if (to_name && need_kpath2) {
8142 AUDIT_ARG(kpath, to_name, ARG_KPATH2);
8143 }
8144 }
8145 if (!fvp) {
8146 /*
8147 * Claim: this check will never reject a valid rename.
8148 * For success, either fvp must be on the same mount as tdvp, or fvp must sit atop a vnode on the same mount as tdvp.
8149 * Suppose fdvp and tdvp are not on the same mount.
8150 * If fvp is on the same mount as tdvp, then fvp is not on the same mount as fdvp, so fvp is the root of its filesystem. If fvp is the root,
8151 * then you can't move it to within another dir on the same mountpoint.
8152 * If fvp sits atop a vnode on the same mount as fdvp, then that vnode must be part of the same mount as fdvp, which is a contradiction.
8153 *
8154 * If this check passes, then we are safe to pass these vnodes to the same FS.
8155 */
8156 if (fdvp->v_mount != tdvp->v_mount) {
8157 error = EXDEV;
8158 goto out1;
8159 }
8160 goto skipped_lookup;
8161 }
8162
8163 if (!batched) {
8164 error = vn_authorize_renamex_with_paths(fdvp, fvp, &fromnd->ni_cnd, from_name, tdvp, tvp, &tond->ni_cnd, to_name, ctx, flags, NULL);
8165 if (error) {
8166 if (error == ENOENT) {
8167 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
8168 /*
8169 * We encountered a race where after doing the namei, tvp stops
8170 * being valid. If so, simply re-drive the rename call from the
8171 * top.
8172 */
8173 do_retry = 1;
8174 retry_count += 1;
8175 }
8176 }
8177 goto out1;
8178 }
8179 }
8180
8181 /*
8182 * If the source and destination are the same (i.e. they're
8183 * links to the same vnode) and the target file system is
8184 * case sensitive, then there is nothing to do.
8185 *
8186 * XXX Come back to this.
8187 */
8188 if (fvp == tvp) {
8189 int pathconf_val;
8190
8191 /*
8192 * Note: if _PC_CASE_SENSITIVE selector isn't supported,
8193 * then assume that this file system is case sensitive.
8194 */
8195 if (VNOP_PATHCONF(fvp, _PC_CASE_SENSITIVE, &pathconf_val, ctx) != 0 ||
8196 pathconf_val != 0) {
8197 goto out1;
8198 }
8199 }
8200
8201 /*
8202 * Allow the renaming of mount points.
8203 * - target must not exist
8204 * - target must reside in the same directory as source
8205 * - union mounts cannot be renamed
8206 * - "/" cannot be renamed
8207 *
8208 * XXX Handle this in VFS after a continued lookup (if we missed
8209 * in the cache to start off)
8210 *
8211 * N.B. If RENAME_SWAP is being used, then @tvp != NULL and so
8212 * we'll skip past here. The file system is responsible for
8213 * checking that @tvp is not a descendent of @fvp and vice versa
8214 * so it should always return EINVAL if either @tvp or @fvp is the
8215 * root of a volume.
8216 */
8217 if ((fvp->v_flag & VROOT) &&
8218 (fvp->v_type == VDIR) &&
8219 (tvp == NULL) &&
8220 (fvp->v_mountedhere == NULL) &&
8221 (fdvp == tdvp) &&
8222 ((fvp->v_mount->mnt_flag & (MNT_UNION | MNT_ROOTFS)) == 0) &&
8223 ((fvp->v_mount->mnt_kern_flag & MNTK_SYSTEM) == 0) &&
8224 (fvp->v_mount->mnt_vnodecovered != NULLVP)) {
8225 vnode_t coveredvp;
8226
8227 /* switch fvp to the covered vnode */
8228 coveredvp = fvp->v_mount->mnt_vnodecovered;
8229 if ((vnode_getwithref(coveredvp))) {
8230 error = ENOENT;
8231 goto out1;
8232 }
8233 vnode_put(fvp);
8234
8235 fvp = coveredvp;
8236 mntrename = TRUE;
8237 }
8238 /*
8239 * Check for cross-device rename.
8240 */
8241 if ((fvp->v_mount != tdvp->v_mount) ||
8242 (tvp && (fvp->v_mount != tvp->v_mount))) {
8243 error = EXDEV;
8244 goto out1;
8245 }
8246
8247 /*
8248 * If source is the same as the destination (that is the
8249 * same inode number) then there is nothing to do...
8250 * EXCEPT if the underlying file system supports case
8251 * insensitivity and is case preserving. In this case
8252 * the file system needs to handle the special case of
8253 * getting the same vnode as target (fvp) and source (tvp).
8254 *
8255 * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
8256 * and _PC_CASE_PRESERVING can have this exception, and they need to
8257 * handle the special case of getting the same vnode as target and
8258 * source. NOTE: Then the target is unlocked going into vnop_rename,
8259 * so not to cause locking problems. There is a single reference on tvp.
8260 *
8261 * NOTE - that fvp == tvp also occurs if they are hard linked and
8262 * that correct behaviour then is just to return success without doing
8263 * anything.
8264 *
8265 * XXX filesystem should take care of this itself, perhaps...
8266 */
8267 if (fvp == tvp && fdvp == tdvp) {
8268 if (fromnd->ni_cnd.cn_namelen == tond->ni_cnd.cn_namelen &&
8269 !bcmp(fromnd->ni_cnd.cn_nameptr, tond->ni_cnd.cn_nameptr,
8270 fromnd->ni_cnd.cn_namelen)) {
8271 goto out1;
8272 }
8273 }
8274
8275 if (holding_mntlock && fvp->v_mount != locked_mp) {
8276 /*
8277 * we're holding a reference and lock
8278 * on locked_mp, but it no longer matches
8279 * what we want to do... so drop our hold
8280 */
8281 mount_unlock_renames(locked_mp);
8282 mount_drop(locked_mp, 0);
8283 holding_mntlock = 0;
8284 }
8285 if (tdvp != fdvp && fvp->v_type == VDIR) {
8286 /*
8287 * serialize renames that re-shape
8288 * the tree... if holding_mntlock is
8289 * set, then we're ready to go...
8290 * otherwise we
8291 * first need to drop the iocounts
8292 * we picked up, second take the
8293 * lock to serialize the access,
8294 * then finally start the lookup
8295 * process over with the lock held
8296 */
8297 if (!holding_mntlock) {
8298 /*
8299 * need to grab a reference on
8300 * the mount point before we
8301 * drop all the iocounts... once
8302 * the iocounts are gone, the mount
8303 * could follow
8304 */
8305 locked_mp = fvp->v_mount;
8306 mount_ref(locked_mp, 0);
8307
8308 /*
8309 * nameidone has to happen before we vnode_put(tvp)
8310 * since it may need to release the fs_nodelock on the tvp
8311 */
8312 nameidone(tond);
8313
8314 if (tvp) {
8315 vnode_put(tvp);
8316 }
8317 vnode_put(tdvp);
8318
8319 /*
8320 * nameidone has to happen before we vnode_put(fdvp)
8321 * since it may need to release the fs_nodelock on the fvp
8322 */
8323 nameidone(fromnd);
8324
8325 vnode_put(fvp);
8326 vnode_put(fdvp);
8327
8328 mount_lock_renames(locked_mp);
8329 holding_mntlock = 1;
8330
8331 goto retry;
8332 }
8333 } else {
8334 /*
8335 * when we dropped the iocounts to take
8336 * the lock, we allowed the identity of
8337 * the various vnodes to change... if they did,
8338 * we may no longer be dealing with a rename
8339 * that reshapes the tree... once we're holding
8340 * the iocounts, the vnodes can't change type
8341 * so we're free to drop the lock at this point
8342 * and continue on
8343 */
8344 if (holding_mntlock) {
8345 mount_unlock_renames(locked_mp);
8346 mount_drop(locked_mp, 0);
8347 holding_mntlock = 0;
8348 }
8349 }
8350
8351 // save these off so we can later verify that fvp is the same
8352 oname = fvp->v_name;
8353 oparent = fvp->v_parent;
8354
8355 skipped_lookup:
8356 error = vn_rename(fdvp, &fvp, &fromnd->ni_cnd, fvap,
8357 tdvp, &tvp, &tond->ni_cnd, tvap,
8358 flags, ctx);
8359
8360 if (holding_mntlock) {
8361 /*
8362 * we can drop our serialization
8363 * lock now
8364 */
8365 mount_unlock_renames(locked_mp);
8366 mount_drop(locked_mp, 0);
8367 holding_mntlock = 0;
8368 }
8369 if (error) {
8370 if (error == EDATALESS) {
8371 /*
8372 * If we've been here before, something has gone
8373 * horribly wrong and we should just get out lest
8374 * we spiral around the drain forever.
8375 */
8376 if (flags & VFS_RENAME_DATALESS) {
8377 error = EIO;
8378 goto out1;
8379 }
8380
8381 /*
8382 * The object we're renaming is dataless (or has a
8383 * dataless descendent) and requires materialization
8384 * before the rename occurs. But we're holding the
8385 * mount point's rename lock, so it's not safe to
8386 * make the upcall.
8387 *
8388 * In this case, we release the lock, perform the
8389 * materialization, and start the whole thing over.
8390 */
8391 error = vnode_materialize_dataless_file(fvp,
8392 NAMESPACE_HANDLER_RENAME_OP);
8393
8394 if (error == 0) {
8395 /*
8396 * The next time around we need to tell the
8397 * file system that the materializtaion has
8398 * been performed.
8399 */
8400 flags |= VFS_RENAME_DATALESS;
8401 do_retry = 1;
8402 }
8403 goto out1;
8404 }
8405 if (error == EKEEPLOOKING) {
8406 if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) == 0) {
8407 if ((tond->ni_flag & NAMEI_CONTLOOKUP) == 0) {
8408 panic("EKEEPLOOKING without NAMEI_CONTLOOKUP on either ndp?");
8409 }
8410 }
8411
8412 fromnd->ni_vp = fvp;
8413 tond->ni_vp = tvp;
8414
8415 goto continue_lookup;
8416 }
8417
8418 /*
8419 * We may encounter a race in the VNOP where the destination didn't
8420 * exist when we did the namei, but it does by the time we go and
8421 * try to create the entry. In this case, we should re-drive this rename
8422 * call from the top again. Currently, only HFS bubbles out ERECYCLE,
8423 * but other filesystems susceptible to this race could return it, too.
8424 */
8425 if (error == ERECYCLE) {
8426 do_retry = 1;
8427 }
8428
8429 /*
8430 * For compound VNOPs, the authorization callback may return
8431 * ENOENT in case of racing hardlink lookups hitting the name
8432 * cache, redrive the lookup.
8433 */
8434 if (batched && error == ENOENT) {
8435 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
8436 do_retry = 1;
8437 retry_count += 1;
8438 }
8439 }
8440
8441 goto out1;
8442 }
8443
8444 /* call out to allow 3rd party notification of rename.
8445 * Ignore result of kauth_authorize_fileop call.
8446 */
8447 kauth_authorize_fileop(vfs_context_ucred(ctx),
8448 KAUTH_FILEOP_RENAME,
8449 (uintptr_t)from_name, (uintptr_t)to_name);
8450 if (flags & VFS_RENAME_SWAP) {
8451 kauth_authorize_fileop(vfs_context_ucred(ctx),
8452 KAUTH_FILEOP_RENAME,
8453 (uintptr_t)to_name, (uintptr_t)from_name);
8454 }
8455
8456 #if CONFIG_FSE
8457 if (from_name != NULL && to_name != NULL) {
8458 if (from_truncated || to_truncated) {
8459 // set it here since only the from_finfo gets reported up to user space
8460 from_finfo.mode |= FSE_TRUNCATED_PATH;
8461 }
8462
8463 if (tvap && tvp) {
8464 vnode_get_fse_info_from_vap(tvp, &to_finfo, tvap);
8465 }
8466 if (fvap) {
8467 vnode_get_fse_info_from_vap(fvp, &from_finfo, fvap);
8468 }
8469
8470 if (tvp) {
8471 add_fsevent(FSE_RENAME, ctx,
8472 FSE_ARG_STRING, from_len_no_firmlink, from_name_no_firmlink,
8473 FSE_ARG_FINFO, &from_finfo,
8474 FSE_ARG_STRING, to_len_no_firmlink, to_name_no_firmlink,
8475 FSE_ARG_FINFO, &to_finfo,
8476 FSE_ARG_DONE);
8477 if (flags & VFS_RENAME_SWAP) {
8478 /*
8479 * Strictly speaking, swap is the equivalent of
8480 * *three* renames. FSEvents clients should only take
8481 * the events as a hint, so we only bother reporting
8482 * two.
8483 */
8484 add_fsevent(FSE_RENAME, ctx,
8485 FSE_ARG_STRING, to_len_no_firmlink, to_name_no_firmlink,
8486 FSE_ARG_FINFO, &to_finfo,
8487 FSE_ARG_STRING, from_len_no_firmlink, from_name_no_firmlink,
8488 FSE_ARG_FINFO, &from_finfo,
8489 FSE_ARG_DONE);
8490 }
8491 } else {
8492 add_fsevent(FSE_RENAME, ctx,
8493 FSE_ARG_STRING, from_len_no_firmlink, from_name_no_firmlink,
8494 FSE_ARG_FINFO, &from_finfo,
8495 FSE_ARG_STRING, to_len_no_firmlink, to_name_no_firmlink,
8496 FSE_ARG_DONE);
8497 }
8498 }
8499 #endif /* CONFIG_FSE */
8500
8501 /*
8502 * update filesystem's mount point data
8503 */
8504 if (mntrename) {
8505 char *cp, *pathend, *mpname;
8506 char * tobuf;
8507 struct mount *mp;
8508 int maxlen;
8509 size_t len = 0;
8510
8511 mp = fvp->v_mountedhere;
8512
8513 if (vfs_busy(mp, LK_NOWAIT)) {
8514 error = EBUSY;
8515 goto out1;
8516 }
8517 MALLOC_ZONE(tobuf, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
8518
8519 if (UIO_SEG_IS_USER_SPACE(segflg)) {
8520 error = copyinstr(to, tobuf, MAXPATHLEN, &len);
8521 } else {
8522 error = copystr((void *)to, tobuf, MAXPATHLEN, &len);
8523 }
8524 if (!error) {
8525 /* find current mount point prefix */
8526 pathend = &mp->mnt_vfsstat.f_mntonname[0];
8527 for (cp = pathend; *cp != '\0'; ++cp) {
8528 if (*cp == '/') {
8529 pathend = cp + 1;
8530 }
8531 }
8532 /* find last component of target name */
8533 for (mpname = cp = tobuf; *cp != '\0'; ++cp) {
8534 if (*cp == '/') {
8535 mpname = cp + 1;
8536 }
8537 }
8538
8539 /* Update f_mntonname of sub mounts */
8540 vfs_iterate(0, rename_submounts_callback, (void *)mp);
8541
8542 /* append name to prefix */
8543 maxlen = MAXPATHLEN - (pathend - mp->mnt_vfsstat.f_mntonname);
8544 bzero(pathend, maxlen);
8545
8546 strlcpy(pathend, mpname, maxlen);
8547 }
8548 FREE_ZONE(tobuf, MAXPATHLEN, M_NAMEI);
8549
8550 vfs_unbusy(mp);
8551
8552 vfs_event_signal(NULL, VQ_UPDATE, (intptr_t)NULL);
8553 }
8554 /*
8555 * fix up name & parent pointers. note that we first
8556 * check that fvp has the same name/parent pointers it
8557 * had before the rename call... this is a 'weak' check
8558 * at best...
8559 *
8560 * XXX oparent and oname may not be set in the compound vnop case
8561 */
8562 if (batched || (oname == fvp->v_name && oparent == fvp->v_parent)) {
8563 int update_flags;
8564
8565 update_flags = VNODE_UPDATE_NAME;
8566
8567 if (fdvp != tdvp) {
8568 update_flags |= VNODE_UPDATE_PARENT;
8569 }
8570
8571 vnode_update_identity(fvp, tdvp, tond->ni_cnd.cn_nameptr, tond->ni_cnd.cn_namelen, tond->ni_cnd.cn_hash, update_flags);
8572 }
8573 out1:
8574 if (to_name != NULL) {
8575 RELEASE_PATH(to_name);
8576 to_name = NULL;
8577 }
8578 if (to_name_no_firmlink != NULL) {
8579 RELEASE_PATH(to_name_no_firmlink);
8580 to_name_no_firmlink = NULL;
8581 }
8582 if (from_name != NULL) {
8583 RELEASE_PATH(from_name);
8584 from_name = NULL;
8585 }
8586 if (from_name_no_firmlink != NULL) {
8587 RELEASE_PATH(from_name_no_firmlink);
8588 from_name_no_firmlink = NULL;
8589 }
8590 if (holding_mntlock) {
8591 mount_unlock_renames(locked_mp);
8592 mount_drop(locked_mp, 0);
8593 holding_mntlock = 0;
8594 }
8595 if (tdvp) {
8596 /*
8597 * nameidone has to happen before we vnode_put(tdvp)
8598 * since it may need to release the fs_nodelock on the tdvp
8599 */
8600 nameidone(tond);
8601
8602 if (tvp) {
8603 vnode_put(tvp);
8604 }
8605 vnode_put(tdvp);
8606 }
8607 if (fdvp) {
8608 /*
8609 * nameidone has to happen before we vnode_put(fdvp)
8610 * since it may need to release the fs_nodelock on the fdvp
8611 */
8612 nameidone(fromnd);
8613
8614 if (fvp) {
8615 vnode_put(fvp);
8616 }
8617 vnode_put(fdvp);
8618 }
8619
8620 /*
8621 * If things changed after we did the namei, then we will re-drive
8622 * this rename call from the top.
8623 */
8624 if (do_retry) {
8625 do_retry = 0;
8626 goto retry;
8627 }
8628
8629 FREE(__rename_data, M_TEMP);
8630 return error;
8631 }
8632
8633 int
8634 rename(__unused proc_t p, struct rename_args *uap, __unused int32_t *retval)
8635 {
8636 return renameat_internal(vfs_context_current(), AT_FDCWD, uap->from,
8637 AT_FDCWD, uap->to, UIO_USERSPACE, 0);
8638 }
8639
8640 int
8641 renameatx_np(__unused proc_t p, struct renameatx_np_args *uap, __unused int32_t *retval)
8642 {
8643 return renameat_internal(
8644 vfs_context_current(),
8645 uap->fromfd, uap->from,
8646 uap->tofd, uap->to,
8647 UIO_USERSPACE, uap->flags);
8648 }
8649
8650 int
8651 renameat(__unused proc_t p, struct renameat_args *uap, __unused int32_t *retval)
8652 {
8653 return renameat_internal(vfs_context_current(), uap->fromfd, uap->from,
8654 uap->tofd, uap->to, UIO_USERSPACE, 0);
8655 }
8656
8657 /*
8658 * Make a directory file.
8659 *
8660 * Returns: 0 Success
8661 * EEXIST
8662 * namei:???
8663 * vnode_authorize:???
8664 * vn_create:???
8665 */
8666 /* ARGSUSED */
8667 static int
8668 mkdir1at(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap, int fd,
8669 enum uio_seg segflg)
8670 {
8671 vnode_t vp, dvp;
8672 int error;
8673 int update_flags = 0;
8674 int batched;
8675 struct nameidata nd;
8676
8677 AUDIT_ARG(mode, vap->va_mode);
8678 NDINIT(&nd, CREATE, OP_MKDIR, LOCKPARENT | AUDITVNPATH1, segflg,
8679 path, ctx);
8680 nd.ni_cnd.cn_flags |= WILLBEDIR;
8681 nd.ni_flag = NAMEI_COMPOUNDMKDIR;
8682
8683 continue_lookup:
8684 error = nameiat(&nd, fd);
8685 if (error) {
8686 return error;
8687 }
8688 dvp = nd.ni_dvp;
8689 vp = nd.ni_vp;
8690
8691 if (vp != NULL) {
8692 error = EEXIST;
8693 goto out;
8694 }
8695
8696 batched = vnode_compound_mkdir_available(dvp);
8697
8698 VATTR_SET(vap, va_type, VDIR);
8699
8700 /*
8701 * XXX
8702 * Don't authorize in VFS for compound VNOP.... mkdir -p today assumes that it will
8703 * only get EXISTS or EISDIR for existing path components, and not that it could see
8704 * EACCESS/EPERM--so if we authorize for mkdir on "/" for "mkdir -p /tmp/foo/bar/baz"
8705 * it will fail in a spurious manner. Need to figure out if this is valid behavior.
8706 */
8707 if ((error = vn_authorize_mkdir(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0) {
8708 if (error == EACCES || error == EPERM) {
8709 int error2;
8710
8711 nameidone(&nd);
8712 vnode_put(dvp);
8713 dvp = NULLVP;
8714
8715 /*
8716 * Try a lookup without "NAMEI_COMPOUNDVNOP" to make sure we return EEXIST
8717 * rather than EACCESS if the target exists.
8718 */
8719 NDINIT(&nd, LOOKUP, OP_MKDIR, AUDITVNPATH1, segflg,
8720 path, ctx);
8721 error2 = nameiat(&nd, fd);
8722 if (error2) {
8723 goto out;
8724 } else {
8725 vp = nd.ni_vp;
8726 error = EEXIST;
8727 goto out;
8728 }
8729 }
8730
8731 goto out;
8732 }
8733
8734 /*
8735 * make the directory
8736 */
8737 if ((error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx)) != 0) {
8738 if (error == EKEEPLOOKING) {
8739 nd.ni_vp = vp;
8740 goto continue_lookup;
8741 }
8742
8743 goto out;
8744 }
8745
8746 // Make sure the name & parent pointers are hooked up
8747 if (vp->v_name == NULL) {
8748 update_flags |= VNODE_UPDATE_NAME;
8749 }
8750 if (vp->v_parent == NULLVP) {
8751 update_flags |= VNODE_UPDATE_PARENT;
8752 }
8753
8754 if (update_flags) {
8755 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
8756 }
8757
8758 #if CONFIG_FSE
8759 add_fsevent(FSE_CREATE_DIR, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE);
8760 #endif
8761
8762 out:
8763 /*
8764 * nameidone has to happen before we vnode_put(dvp)
8765 * since it may need to release the fs_nodelock on the dvp
8766 */
8767 nameidone(&nd);
8768
8769 if (vp) {
8770 vnode_put(vp);
8771 }
8772 if (dvp) {
8773 vnode_put(dvp);
8774 }
8775
8776 return error;
8777 }
8778
8779 /*
8780 * mkdir_extended: Create a directory; with extended security (ACL).
8781 *
8782 * Parameters: p Process requesting to create the directory
8783 * uap User argument descriptor (see below)
8784 * retval (ignored)
8785 *
8786 * Indirect: uap->path Path of directory to create
8787 * uap->mode Access permissions to set
8788 * uap->xsecurity ACL to set
8789 *
8790 * Returns: 0 Success
8791 * !0 Not success
8792 *
8793 */
8794 int
8795 mkdir_extended(proc_t p, struct mkdir_extended_args *uap, __unused int32_t *retval)
8796 {
8797 int ciferror;
8798 kauth_filesec_t xsecdst;
8799 struct vnode_attr va;
8800
8801 AUDIT_ARG(owner, uap->uid, uap->gid);
8802
8803 xsecdst = NULL;
8804 if ((uap->xsecurity != USER_ADDR_NULL) &&
8805 ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)) {
8806 return ciferror;
8807 }
8808
8809 VATTR_INIT(&va);
8810 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
8811 if (xsecdst != NULL) {
8812 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
8813 }
8814
8815 ciferror = mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD,
8816 UIO_USERSPACE);
8817 if (xsecdst != NULL) {
8818 kauth_filesec_free(xsecdst);
8819 }
8820 return ciferror;
8821 }
8822
8823 int
8824 mkdir(proc_t p, struct mkdir_args *uap, __unused int32_t *retval)
8825 {
8826 struct vnode_attr va;
8827
8828 VATTR_INIT(&va);
8829 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
8830
8831 return mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD,
8832 UIO_USERSPACE);
8833 }
8834
8835 int
8836 mkdirat(proc_t p, struct mkdirat_args *uap, __unused int32_t *retval)
8837 {
8838 struct vnode_attr va;
8839
8840 VATTR_INIT(&va);
8841 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
8842
8843 return mkdir1at(vfs_context_current(), uap->path, &va, uap->fd,
8844 UIO_USERSPACE);
8845 }
8846
8847 static int
8848 rmdirat_internal(vfs_context_t ctx, int fd, user_addr_t dirpath,
8849 enum uio_seg segflg, int unlink_flags)
8850 {
8851 vnode_t vp, dvp;
8852 int error;
8853 struct nameidata nd;
8854 char *path = NULL;
8855 char *no_firmlink_path = NULL;
8856 int len_path = 0;
8857 int len_no_firmlink_path = 0;
8858 int has_listeners = 0;
8859 int need_event = 0;
8860 int truncated_path = 0;
8861 int truncated_no_firmlink_path = 0;
8862 #if CONFIG_FSE
8863 struct vnode_attr va;
8864 #endif /* CONFIG_FSE */
8865 struct vnode_attr *vap = NULL;
8866 int restart_count = 0;
8867 int batched;
8868
8869 int restart_flag;
8870
8871 /*
8872 * This loop exists to restart rmdir in the unlikely case that two
8873 * processes are simultaneously trying to remove the same directory
8874 * containing orphaned appleDouble files.
8875 */
8876 do {
8877 NDINIT(&nd, DELETE, OP_RMDIR, LOCKPARENT | AUDITVNPATH1,
8878 segflg, dirpath, ctx);
8879 nd.ni_flag = NAMEI_COMPOUNDRMDIR;
8880 continue_lookup:
8881 restart_flag = 0;
8882 vap = NULL;
8883
8884 error = nameiat(&nd, fd);
8885 if (error) {
8886 return error;
8887 }
8888
8889 dvp = nd.ni_dvp;
8890 vp = nd.ni_vp;
8891
8892 if (vp) {
8893 batched = vnode_compound_rmdir_available(vp);
8894
8895 if (vp->v_flag & VROOT) {
8896 /*
8897 * The root of a mounted filesystem cannot be deleted.
8898 */
8899 error = EBUSY;
8900 goto out;
8901 }
8902
8903 #if DEVELOPMENT || DEBUG
8904 /*
8905 * XXX VSWAP: Check for entitlements or special flag here
8906 * so we can restrict access appropriately.
8907 */
8908 #else /* DEVELOPMENT || DEBUG */
8909
8910 if (vnode_isswap(vp) && (ctx != vfs_context_kernel())) {
8911 error = EPERM;
8912 goto out;
8913 }
8914 #endif /* DEVELOPMENT || DEBUG */
8915
8916 /*
8917 * Removed a check here; we used to abort if vp's vid
8918 * was not the same as what we'd seen the last time around.
8919 * I do not think that check was valid, because if we retry
8920 * and all dirents are gone, the directory could legitimately
8921 * be recycled but still be present in a situation where we would
8922 * have had permission to delete. Therefore, we won't make
8923 * an effort to preserve that check now that we may not have a
8924 * vp here.
8925 */
8926
8927 if (!batched) {
8928 error = vn_authorize_rmdir(dvp, vp, &nd.ni_cnd, ctx, NULL);
8929 if (error) {
8930 if (error == ENOENT) {
8931 if (restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
8932 restart_flag = 1;
8933 restart_count += 1;
8934 }
8935 }
8936 goto out;
8937 }
8938 }
8939 } else {
8940 batched = 1;
8941
8942 if (!vnode_compound_rmdir_available(dvp)) {
8943 panic("No error, but no compound rmdir?");
8944 }
8945 }
8946
8947 #if CONFIG_FSE
8948 fse_info finfo;
8949
8950 need_event = need_fsevent(FSE_DELETE, dvp);
8951 if (need_event) {
8952 if (!batched) {
8953 get_fse_info(vp, &finfo, ctx);
8954 } else {
8955 error = vfs_get_notify_attributes(&va);
8956 if (error) {
8957 goto out;
8958 }
8959
8960 vap = &va;
8961 }
8962 }
8963 #endif
8964 has_listeners = kauth_authorize_fileop_has_listeners();
8965 if (need_event || has_listeners) {
8966 if (path == NULL) {
8967 GET_PATH(path);
8968 if (path == NULL) {
8969 error = ENOMEM;
8970 goto out;
8971 }
8972 }
8973
8974 len_path = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated_path);
8975
8976 if (no_firmlink_path == NULL) {
8977 GET_PATH(no_firmlink_path);
8978 if (no_firmlink_path == NULL) {
8979 error = ENOMEM;
8980 goto out;
8981 }
8982 }
8983
8984 len_no_firmlink_path = safe_getpath_no_firmlink(dvp, nd.ni_cnd.cn_nameptr, no_firmlink_path, MAXPATHLEN, &truncated_no_firmlink_path);
8985 #if CONFIG_FSE
8986 if (truncated_no_firmlink_path) {
8987 finfo.mode |= FSE_TRUNCATED_PATH;
8988 }
8989 #endif
8990 }
8991
8992 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
8993 nd.ni_vp = vp;
8994 if (vp == NULLVP) {
8995 /* Couldn't find a vnode */
8996 goto out;
8997 }
8998
8999 if (error == EKEEPLOOKING) {
9000 goto continue_lookup;
9001 } else if (batched && error == ENOENT) {
9002 if (restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
9003 /*
9004 * For compound VNOPs, the authorization callback
9005 * may return ENOENT in case of racing hard link lookups
9006 * redrive the lookup.
9007 */
9008 restart_flag = 1;
9009 restart_count += 1;
9010 goto out;
9011 }
9012 }
9013
9014 /*
9015 * XXX There's no provision for passing flags
9016 * to VNOP_RMDIR(). So, if vn_rmdir() fails
9017 * because it's not empty, then we try again
9018 * with VNOP_REMOVE(), passing in a special
9019 * flag that clever file systems will know
9020 * how to handle.
9021 */
9022 if (error == ENOTEMPTY &&
9023 (unlink_flags & VNODE_REMOVE_DATALESS_DIR) != 0) {
9024 /*
9025 * If this fails, we want to keep the original
9026 * error.
9027 */
9028 if (vn_remove(dvp, &vp, &nd,
9029 VNODE_REMOVE_DATALESS_DIR, vap, ctx) == 0) {
9030 error = 0;
9031 }
9032 }
9033
9034 #if CONFIG_APPLEDOUBLE
9035 /*
9036 * Special case to remove orphaned AppleDouble
9037 * files. I don't like putting this in the kernel,
9038 * but carbon does not like putting this in carbon either,
9039 * so here we are.
9040 */
9041 if (error == ENOTEMPTY) {
9042 int ad_error = rmdir_remove_orphaned_appleDouble(vp, ctx, &restart_flag);
9043 if (ad_error == EBUSY) {
9044 error = ad_error;
9045 goto out;
9046 }
9047
9048
9049 /*
9050 * Assuming everything went well, we will try the RMDIR again
9051 */
9052 if (!ad_error) {
9053 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
9054 }
9055 }
9056 #endif /* CONFIG_APPLEDOUBLE */
9057 /*
9058 * Call out to allow 3rd party notification of delete.
9059 * Ignore result of kauth_authorize_fileop call.
9060 */
9061 if (!error) {
9062 if (has_listeners) {
9063 kauth_authorize_fileop(vfs_context_ucred(ctx),
9064 KAUTH_FILEOP_DELETE,
9065 (uintptr_t)vp,
9066 (uintptr_t)path);
9067 }
9068
9069 if (vp->v_flag & VISHARDLINK) {
9070 // see the comment in unlink1() about why we update
9071 // the parent of a hard link when it is removed
9072 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
9073 }
9074
9075 #if CONFIG_FSE
9076 if (need_event) {
9077 if (vap) {
9078 vnode_get_fse_info_from_vap(vp, &finfo, vap);
9079 }
9080 add_fsevent(FSE_DELETE, ctx,
9081 FSE_ARG_STRING, len_no_firmlink_path, no_firmlink_path,
9082 FSE_ARG_FINFO, &finfo,
9083 FSE_ARG_DONE);
9084 }
9085 #endif
9086 }
9087
9088 out:
9089 if (path != NULL) {
9090 RELEASE_PATH(path);
9091 path = NULL;
9092 }
9093
9094 if (no_firmlink_path != NULL) {
9095 RELEASE_PATH(no_firmlink_path);
9096 no_firmlink_path = NULL;
9097 }
9098
9099 /*
9100 * nameidone has to happen before we vnode_put(dvp)
9101 * since it may need to release the fs_nodelock on the dvp
9102 */
9103 nameidone(&nd);
9104 vnode_put(dvp);
9105
9106 if (vp) {
9107 vnode_put(vp);
9108 }
9109
9110 if (restart_flag == 0) {
9111 wakeup_one((caddr_t)vp);
9112 return error;
9113 }
9114 tsleep(vp, PVFS, "rm AD", 1);
9115 } while (restart_flag != 0);
9116
9117 return error;
9118 }
9119
9120 /*
9121 * Remove a directory file.
9122 */
9123 /* ARGSUSED */
9124 int
9125 rmdir(__unused proc_t p, struct rmdir_args *uap, __unused int32_t *retval)
9126 {
9127 return rmdirat_internal(vfs_context_current(), AT_FDCWD,
9128 CAST_USER_ADDR_T(uap->path), UIO_USERSPACE, 0);
9129 }
9130
9131 /* Get direntry length padded to 8 byte alignment */
9132 #define DIRENT64_LEN(namlen) \
9133 ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
9134
9135 /* Get dirent length padded to 4 byte alignment */
9136 #define DIRENT_LEN(namelen) \
9137 ((sizeof(struct dirent) + (namelen + 1) - (__DARWIN_MAXNAMLEN + 1) + 3) & ~3)
9138
9139 /* Get the end of this dirent */
9140 #define DIRENT_END(dep) \
9141 (((char *)(dep)) + (dep)->d_reclen - 1)
9142
9143 errno_t
9144 vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag,
9145 int *numdirent, vfs_context_t ctxp)
9146 {
9147 /* Check if fs natively supports VNODE_READDIR_EXTENDED */
9148 if ((vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED) &&
9149 ((vp->v_mount->mnt_kern_flag & MNTK_DENY_READDIREXT) == 0)) {
9150 return VNOP_READDIR(vp, uio, flags, eofflag, numdirent, ctxp);
9151 } else {
9152 size_t bufsize;
9153 void * bufptr;
9154 uio_t auio;
9155 struct direntry *entry64;
9156 struct dirent *dep;
9157 int bytesread;
9158 int error;
9159
9160 /*
9161 * We're here because the underlying file system does not
9162 * support direnties or we mounted denying support so we must
9163 * fall back to dirents and convert them to direntries.
9164 *
9165 * Our kernel buffer needs to be smaller since re-packing will
9166 * expand each dirent. The worse case (when the name length
9167 * is 3 or less) corresponds to a struct direntry size of 32
9168 * bytes (8-byte aligned) and a struct dirent size of 12 bytes
9169 * (4-byte aligned). So having a buffer that is 3/8 the size
9170 * will prevent us from reading more than we can pack.
9171 *
9172 * Since this buffer is wired memory, we will limit the
9173 * buffer size to a maximum of 32K. We would really like to
9174 * use 32K in the MIN(), but we use magic number 87371 to
9175 * prevent uio_resid() * 3 / 8 from overflowing.
9176 */
9177 bufsize = 3 * MIN((user_size_t)uio_resid(uio), 87371u) / 8;
9178 MALLOC(bufptr, void *, bufsize, M_TEMP, M_WAITOK);
9179 if (bufptr == NULL) {
9180 return ENOMEM;
9181 }
9182
9183 auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
9184 uio_addiov(auio, (uintptr_t)bufptr, bufsize);
9185 auio->uio_offset = uio->uio_offset;
9186
9187 error = VNOP_READDIR(vp, auio, 0, eofflag, numdirent, ctxp);
9188
9189 dep = (struct dirent *)bufptr;
9190 bytesread = bufsize - uio_resid(auio);
9191
9192 MALLOC(entry64, struct direntry *, sizeof(struct direntry),
9193 M_TEMP, M_WAITOK);
9194 /*
9195 * Convert all the entries and copy them out to user's buffer.
9196 */
9197 while (error == 0 && (char *)dep < ((char *)bufptr + bytesread)) {
9198 size_t enbufsize = DIRENT64_LEN(dep->d_namlen);
9199
9200 if (DIRENT_END(dep) > ((char *)bufptr + bytesread) ||
9201 DIRENT_LEN(dep->d_namlen) > dep->d_reclen) {
9202 printf("%s: %s: Bad dirent recived from directory %s\n", __func__,
9203 vp->v_mount->mnt_vfsstat.f_mntonname,
9204 vp->v_name ? vp->v_name : "<unknown>");
9205 error = EIO;
9206 break;
9207 }
9208
9209 bzero(entry64, enbufsize);
9210 /* Convert a dirent to a dirent64. */
9211 entry64->d_ino = dep->d_ino;
9212 entry64->d_seekoff = 0;
9213 entry64->d_reclen = enbufsize;
9214 entry64->d_namlen = dep->d_namlen;
9215 entry64->d_type = dep->d_type;
9216 bcopy(dep->d_name, entry64->d_name, dep->d_namlen + 1);
9217
9218 /* Move to next entry. */
9219 dep = (struct dirent *)((char *)dep + dep->d_reclen);
9220
9221 /* Copy entry64 to user's buffer. */
9222 error = uiomove((caddr_t)entry64, entry64->d_reclen, uio);
9223 }
9224
9225 /* Update the real offset using the offset we got from VNOP_READDIR. */
9226 if (error == 0) {
9227 uio->uio_offset = auio->uio_offset;
9228 }
9229 uio_free(auio);
9230 FREE(bufptr, M_TEMP);
9231 FREE(entry64, M_TEMP);
9232 return error;
9233 }
9234 }
9235
9236 #define GETDIRENTRIES_MAXBUFSIZE (128 * 1024 * 1024U)
9237
9238 /*
9239 * Read a block of directory entries in a file system independent format.
9240 */
9241 static int
9242 getdirentries_common(int fd, user_addr_t bufp, user_size_t bufsize, ssize_t *bytesread,
9243 off_t *offset, int *eofflag, int flags)
9244 {
9245 vnode_t vp;
9246 struct vfs_context context = *vfs_context_current(); /* local copy */
9247 struct fileproc *fp;
9248 uio_t auio;
9249 int spacetype = proc_is64bit(vfs_context_proc(&context)) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9250 off_t loff;
9251 int error, numdirent;
9252 char uio_buf[UIO_SIZEOF(1)];
9253
9254 error = fp_getfvp(vfs_context_proc(&context), fd, &fp, &vp);
9255 if (error) {
9256 return error;
9257 }
9258 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
9259 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
9260 error = EBADF;
9261 goto out;
9262 }
9263
9264 if (bufsize > GETDIRENTRIES_MAXBUFSIZE) {
9265 bufsize = GETDIRENTRIES_MAXBUFSIZE;
9266 }
9267
9268 #if CONFIG_MACF
9269 error = mac_file_check_change_offset(vfs_context_ucred(&context), fp->f_fglob);
9270 if (error) {
9271 goto out;
9272 }
9273 #endif
9274 if ((error = vnode_getwithref(vp))) {
9275 goto out;
9276 }
9277 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
9278
9279 unionread:
9280 if (vp->v_type != VDIR) {
9281 (void)vnode_put(vp);
9282 error = EINVAL;
9283 goto out;
9284 }
9285
9286 #if CONFIG_MACF
9287 error = mac_vnode_check_readdir(&context, vp);
9288 if (error != 0) {
9289 (void)vnode_put(vp);
9290 goto out;
9291 }
9292 #endif /* MAC */
9293
9294 loff = fp->f_fglob->fg_offset;
9295 auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
9296 uio_addiov(auio, bufp, bufsize);
9297
9298 if (flags & VNODE_READDIR_EXTENDED) {
9299 error = vnode_readdir64(vp, auio, flags, eofflag, &numdirent, &context);
9300 fp->f_fglob->fg_offset = uio_offset(auio);
9301 } else {
9302 error = VNOP_READDIR(vp, auio, 0, eofflag, &numdirent, &context);
9303 fp->f_fglob->fg_offset = uio_offset(auio);
9304 }
9305 if (error) {
9306 (void)vnode_put(vp);
9307 goto out;
9308 }
9309
9310 if ((user_ssize_t)bufsize == uio_resid(auio)) {
9311 if (union_dircheckp) {
9312 error = union_dircheckp(&vp, fp, &context);
9313 if (error == -1) {
9314 goto unionread;
9315 }
9316 if (error) {
9317 (void)vnode_put(vp);
9318 goto out;
9319 }
9320 }
9321
9322 if ((vp->v_mount->mnt_flag & MNT_UNION)) {
9323 struct vnode *tvp = vp;
9324 if (lookup_traverse_union(tvp, &vp, &context) == 0) {
9325 vnode_ref(vp);
9326 fp->f_fglob->fg_data = (caddr_t) vp;
9327 fp->f_fglob->fg_offset = 0;
9328 vnode_rele(tvp);
9329 vnode_put(tvp);
9330 goto unionread;
9331 }
9332 vp = tvp;
9333 }
9334 }
9335
9336 vnode_put(vp);
9337 if (offset) {
9338 *offset = loff;
9339 }
9340
9341 *bytesread = bufsize - uio_resid(auio);
9342 out:
9343 file_drop(fd);
9344 return error;
9345 }
9346
9347
9348 int
9349 getdirentries(__unused struct proc *p, struct getdirentries_args *uap, int32_t *retval)
9350 {
9351 off_t offset;
9352 ssize_t bytesread;
9353 int error, eofflag;
9354
9355 AUDIT_ARG(fd, uap->fd);
9356 error = getdirentries_common(uap->fd, uap->buf, uap->count,
9357 &bytesread, &offset, &eofflag, 0);
9358
9359 if (error == 0) {
9360 if (proc_is64bit(p)) {
9361 user64_long_t base = (user64_long_t)offset;
9362 error = copyout((caddr_t)&base, uap->basep, sizeof(user64_long_t));
9363 } else {
9364 user32_long_t base = (user32_long_t)offset;
9365 error = copyout((caddr_t)&base, uap->basep, sizeof(user32_long_t));
9366 }
9367 *retval = bytesread;
9368 }
9369 return error;
9370 }
9371
9372 int
9373 getdirentries64(__unused struct proc *p, struct getdirentries64_args *uap, user_ssize_t *retval)
9374 {
9375 off_t offset;
9376 ssize_t bytesread;
9377 int error, eofflag;
9378 user_size_t bufsize;
9379
9380 AUDIT_ARG(fd, uap->fd);
9381
9382 /*
9383 * If the buffer is at least GETDIRENTRIES64_EXTENDED_BUFSIZE large,
9384 * then the kernel carves out the last 4 bytes to return extended
9385 * information to userspace (namely whether we reached EOF with this call).
9386 */
9387 if (uap->bufsize >= GETDIRENTRIES64_EXTENDED_BUFSIZE) {
9388 bufsize = uap->bufsize - sizeof(getdirentries64_flags_t);
9389 } else {
9390 bufsize = uap->bufsize;
9391 }
9392
9393 error = getdirentries_common(uap->fd, uap->buf, bufsize,
9394 &bytesread, &offset, &eofflag, VNODE_READDIR_EXTENDED);
9395
9396 if (error == 0) {
9397 *retval = bytesread;
9398 error = copyout((caddr_t)&offset, uap->position, sizeof(off_t));
9399
9400 if (error == 0 && uap->bufsize >= GETDIRENTRIES64_EXTENDED_BUFSIZE) {
9401 getdirentries64_flags_t flags = 0;
9402 if (eofflag) {
9403 flags |= GETDIRENTRIES64_EOF;
9404 }
9405 error = copyout(&flags, (user_addr_t)uap->buf + bufsize,
9406 sizeof(flags));
9407 }
9408 }
9409 return error;
9410 }
9411
9412
9413 /*
9414 * Set the mode mask for creation of filesystem nodes.
9415 * XXX implement xsecurity
9416 */
9417 #define UMASK_NOXSECURITY (void *)1 /* leave existing xsecurity alone */
9418 static int
9419 umask1(proc_t p, int newmask, __unused kauth_filesec_t fsec, int32_t *retval)
9420 {
9421 struct filedesc *fdp;
9422
9423 AUDIT_ARG(mask, newmask);
9424 proc_fdlock(p);
9425 fdp = p->p_fd;
9426 *retval = fdp->fd_cmask;
9427 fdp->fd_cmask = newmask & ALLPERMS;
9428 proc_fdunlock(p);
9429 return 0;
9430 }
9431
9432 /*
9433 * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL).
9434 *
9435 * Parameters: p Process requesting to set the umask
9436 * uap User argument descriptor (see below)
9437 * retval umask of the process (parameter p)
9438 *
9439 * Indirect: uap->newmask umask to set
9440 * uap->xsecurity ACL to set
9441 *
9442 * Returns: 0 Success
9443 * !0 Not success
9444 *
9445 */
9446 int
9447 umask_extended(proc_t p, struct umask_extended_args *uap, int32_t *retval)
9448 {
9449 int ciferror;
9450 kauth_filesec_t xsecdst;
9451
9452 xsecdst = KAUTH_FILESEC_NONE;
9453 if (uap->xsecurity != USER_ADDR_NULL) {
9454 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0) {
9455 return ciferror;
9456 }
9457 } else {
9458 xsecdst = KAUTH_FILESEC_NONE;
9459 }
9460
9461 ciferror = umask1(p, uap->newmask, xsecdst, retval);
9462
9463 if (xsecdst != KAUTH_FILESEC_NONE) {
9464 kauth_filesec_free(xsecdst);
9465 }
9466 return ciferror;
9467 }
9468
9469 int
9470 umask(proc_t p, struct umask_args *uap, int32_t *retval)
9471 {
9472 return umask1(p, uap->newmask, UMASK_NOXSECURITY, retval);
9473 }
9474
9475 /*
9476 * Void all references to file by ripping underlying filesystem
9477 * away from vnode.
9478 */
9479 /* ARGSUSED */
9480 int
9481 revoke(proc_t p, struct revoke_args *uap, __unused int32_t *retval)
9482 {
9483 vnode_t vp;
9484 struct vnode_attr va;
9485 vfs_context_t ctx = vfs_context_current();
9486 int error;
9487 struct nameidata nd;
9488
9489 NDINIT(&nd, LOOKUP, OP_REVOKE, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
9490 uap->path, ctx);
9491 error = namei(&nd);
9492 if (error) {
9493 return error;
9494 }
9495 vp = nd.ni_vp;
9496
9497 nameidone(&nd);
9498
9499 if (!(vnode_ischr(vp) || vnode_isblk(vp))) {
9500 error = ENOTSUP;
9501 goto out;
9502 }
9503
9504 if (vnode_isblk(vp) && vnode_ismountedon(vp)) {
9505 error = EBUSY;
9506 goto out;
9507 }
9508
9509 #if CONFIG_MACF
9510 error = mac_vnode_check_revoke(ctx, vp);
9511 if (error) {
9512 goto out;
9513 }
9514 #endif
9515
9516 VATTR_INIT(&va);
9517 VATTR_WANTED(&va, va_uid);
9518 if ((error = vnode_getattr(vp, &va, ctx))) {
9519 goto out;
9520 }
9521 if (kauth_cred_getuid(vfs_context_ucred(ctx)) != va.va_uid &&
9522 (error = suser(vfs_context_ucred(ctx), &p->p_acflag))) {
9523 goto out;
9524 }
9525 if (vp->v_usecount > 0 || (vnode_isaliased(vp))) {
9526 VNOP_REVOKE(vp, REVOKEALL, ctx);
9527 }
9528 out:
9529 vnode_put(vp);
9530 return error;
9531 }
9532
9533
9534 /*
9535 * HFS/HFS PlUS SPECIFIC SYSTEM CALLS
9536 * The following system calls are designed to support features
9537 * which are specific to the HFS & HFS Plus volume formats
9538 */
9539
9540
9541 /*
9542 * Obtain attribute information on objects in a directory while enumerating
9543 * the directory.
9544 */
9545 /* ARGSUSED */
9546 int
9547 getdirentriesattr(proc_t p, struct getdirentriesattr_args *uap, int32_t *retval)
9548 {
9549 vnode_t vp;
9550 struct fileproc *fp;
9551 uio_t auio = NULL;
9552 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9553 uint32_t count = 0, savecount = 0;
9554 uint32_t newstate = 0;
9555 int error, eofflag;
9556 uint32_t loff = 0;
9557 struct attrlist attributelist;
9558 vfs_context_t ctx = vfs_context_current();
9559 int fd = uap->fd;
9560 char uio_buf[UIO_SIZEOF(1)];
9561 kauth_action_t action;
9562
9563 AUDIT_ARG(fd, fd);
9564
9565 /* Get the attributes into kernel space */
9566 if ((error = copyin(uap->alist, (caddr_t)&attributelist, sizeof(attributelist)))) {
9567 return error;
9568 }
9569 if ((error = copyin(uap->count, (caddr_t)&count, sizeof(count)))) {
9570 return error;
9571 }
9572 savecount = count;
9573 if ((error = fp_getfvp(p, fd, &fp, &vp))) {
9574 return error;
9575 }
9576 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
9577 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
9578 error = EBADF;
9579 goto out;
9580 }
9581
9582
9583 #if CONFIG_MACF
9584 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
9585 fp->f_fglob);
9586 if (error) {
9587 goto out;
9588 }
9589 #endif
9590
9591
9592 if ((error = vnode_getwithref(vp))) {
9593 goto out;
9594 }
9595
9596 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
9597
9598 unionread:
9599 if (vp->v_type != VDIR) {
9600 (void)vnode_put(vp);
9601 error = EINVAL;
9602 goto out;
9603 }
9604
9605 #if CONFIG_MACF
9606 error = mac_vnode_check_readdir(ctx, vp);
9607 if (error != 0) {
9608 (void)vnode_put(vp);
9609 goto out;
9610 }
9611 #endif /* MAC */
9612
9613 /* set up the uio structure which will contain the users return buffer */
9614 loff = fp->f_fglob->fg_offset;
9615 auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
9616 uio_addiov(auio, uap->buffer, uap->buffersize);
9617
9618 /*
9619 * If the only item requested is file names, we can let that past with
9620 * just LIST_DIRECTORY. If they want any other attributes, that means
9621 * they need SEARCH as well.
9622 */
9623 action = KAUTH_VNODE_LIST_DIRECTORY;
9624 if ((attributelist.commonattr & ~ATTR_CMN_NAME) ||
9625 attributelist.fileattr || attributelist.dirattr) {
9626 action |= KAUTH_VNODE_SEARCH;
9627 }
9628
9629 if ((error = vnode_authorize(vp, NULL, action, ctx)) == 0) {
9630 /* Believe it or not, uap->options only has 32-bits of valid
9631 * info, so truncate before extending again */
9632
9633 error = VNOP_READDIRATTR(vp, &attributelist, auio, count,
9634 (u_long)(uint32_t)uap->options, &newstate, &eofflag, &count, ctx);
9635 }
9636
9637 if (error) {
9638 (void) vnode_put(vp);
9639 goto out;
9640 }
9641
9642 /*
9643 * If we've got the last entry of a directory in a union mount
9644 * then reset the eofflag and pretend there's still more to come.
9645 * The next call will again set eofflag and the buffer will be empty,
9646 * so traverse to the underlying directory and do the directory
9647 * read there.
9648 */
9649 if (eofflag && vp->v_mount->mnt_flag & MNT_UNION) {
9650 if (uio_resid(auio) < (user_ssize_t) uap->buffersize) { // Got some entries
9651 eofflag = 0;
9652 } else { // Empty buffer
9653 struct vnode *tvp = vp;
9654 if (lookup_traverse_union(tvp, &vp, ctx) == 0) {
9655 vnode_ref_ext(vp, fp->f_fglob->fg_flag & O_EVTONLY, 0);
9656 fp->f_fglob->fg_data = (caddr_t) vp;
9657 fp->f_fglob->fg_offset = 0; // reset index for new dir
9658 count = savecount;
9659 vnode_rele_internal(tvp, fp->f_fglob->fg_flag & O_EVTONLY, 0, 0);
9660 vnode_put(tvp);
9661 goto unionread;
9662 }
9663 vp = tvp;
9664 }
9665 }
9666
9667 (void)vnode_put(vp);
9668
9669 if (error) {
9670 goto out;
9671 }
9672 fp->f_fglob->fg_offset = uio_offset(auio); /* should be multiple of dirent, not variable */
9673
9674 if ((error = copyout((caddr_t) &count, uap->count, sizeof(count)))) {
9675 goto out;
9676 }
9677 if ((error = copyout((caddr_t) &newstate, uap->newstate, sizeof(newstate)))) {
9678 goto out;
9679 }
9680 if ((error = copyout((caddr_t) &loff, uap->basep, sizeof(loff)))) {
9681 goto out;
9682 }
9683
9684 *retval = eofflag; /* similar to getdirentries */
9685 error = 0;
9686 out:
9687 file_drop(fd);
9688 return error; /* return error earlier, an retval of 0 or 1 now */
9689 } /* end of getdirentriesattr system call */
9690
9691 /*
9692 * Exchange data between two files
9693 */
9694
9695 /* ARGSUSED */
9696 int
9697 exchangedata(__unused proc_t p, struct exchangedata_args *uap, __unused int32_t *retval)
9698 {
9699 struct nameidata fnd, snd;
9700 vfs_context_t ctx = vfs_context_current();
9701 vnode_t fvp;
9702 vnode_t svp;
9703 int error;
9704 u_int32_t nameiflags;
9705 char *fpath = NULL;
9706 char *spath = NULL;
9707 int flen = 0, slen = 0;
9708 int from_truncated = 0, to_truncated = 0;
9709 #if CONFIG_FSE
9710 fse_info f_finfo, s_finfo;
9711 #endif
9712
9713 nameiflags = 0;
9714 if ((uap->options & FSOPT_NOFOLLOW) == 0) {
9715 nameiflags |= FOLLOW;
9716 }
9717
9718 NDINIT(&fnd, LOOKUP, OP_EXCHANGEDATA, nameiflags | AUDITVNPATH1,
9719 UIO_USERSPACE, uap->path1, ctx);
9720
9721 error = namei(&fnd);
9722 if (error) {
9723 goto out2;
9724 }
9725
9726 nameidone(&fnd);
9727 fvp = fnd.ni_vp;
9728
9729 NDINIT(&snd, LOOKUP, OP_EXCHANGEDATA, CN_NBMOUNTLOOK | nameiflags | AUDITVNPATH2,
9730 UIO_USERSPACE, uap->path2, ctx);
9731
9732 error = namei(&snd);
9733 if (error) {
9734 vnode_put(fvp);
9735 goto out2;
9736 }
9737 nameidone(&snd);
9738 svp = snd.ni_vp;
9739
9740 /*
9741 * if the files are the same, return an inval error
9742 */
9743 if (svp == fvp) {
9744 error = EINVAL;
9745 goto out;
9746 }
9747
9748 /*
9749 * if the files are on different volumes, return an error
9750 */
9751 if (svp->v_mount != fvp->v_mount) {
9752 error = EXDEV;
9753 goto out;
9754 }
9755
9756 /* If they're not files, return an error */
9757 if ((vnode_isreg(fvp) == 0) || (vnode_isreg(svp) == 0)) {
9758 error = EINVAL;
9759 goto out;
9760 }
9761
9762 #if CONFIG_MACF
9763 error = mac_vnode_check_exchangedata(ctx,
9764 fvp, svp);
9765 if (error) {
9766 goto out;
9767 }
9768 #endif
9769 if (((error = vnode_authorize(fvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0) ||
9770 ((error = vnode_authorize(svp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0)) {
9771 goto out;
9772 }
9773
9774 if (
9775 #if CONFIG_FSE
9776 need_fsevent(FSE_EXCHANGE, fvp) ||
9777 #endif
9778 kauth_authorize_fileop_has_listeners()) {
9779 GET_PATH(fpath);
9780 GET_PATH(spath);
9781 if (fpath == NULL || spath == NULL) {
9782 error = ENOMEM;
9783 goto out;
9784 }
9785
9786 flen = safe_getpath(fvp, NULL, fpath, MAXPATHLEN, &from_truncated);
9787 slen = safe_getpath(svp, NULL, spath, MAXPATHLEN, &to_truncated);
9788
9789 #if CONFIG_FSE
9790 get_fse_info(fvp, &f_finfo, ctx);
9791 get_fse_info(svp, &s_finfo, ctx);
9792 if (from_truncated || to_truncated) {
9793 // set it here since only the f_finfo gets reported up to user space
9794 f_finfo.mode |= FSE_TRUNCATED_PATH;
9795 }
9796 #endif
9797 }
9798 /* Ok, make the call */
9799 error = VNOP_EXCHANGE(fvp, svp, 0, ctx);
9800
9801 if (error == 0) {
9802 const char *tmpname;
9803
9804 if (fpath != NULL && spath != NULL) {
9805 /* call out to allow 3rd party notification of exchangedata.
9806 * Ignore result of kauth_authorize_fileop call.
9807 */
9808 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_EXCHANGE,
9809 (uintptr_t)fpath, (uintptr_t)spath);
9810 }
9811 name_cache_lock();
9812
9813 tmpname = fvp->v_name;
9814 fvp->v_name = svp->v_name;
9815 svp->v_name = tmpname;
9816
9817 if (fvp->v_parent != svp->v_parent) {
9818 vnode_t tmp;
9819
9820 tmp = fvp->v_parent;
9821 fvp->v_parent = svp->v_parent;
9822 svp->v_parent = tmp;
9823 }
9824 name_cache_unlock();
9825
9826 #if CONFIG_FSE
9827 if (fpath != NULL && spath != NULL) {
9828 add_fsevent(FSE_EXCHANGE, ctx,
9829 FSE_ARG_STRING, flen, fpath,
9830 FSE_ARG_FINFO, &f_finfo,
9831 FSE_ARG_STRING, slen, spath,
9832 FSE_ARG_FINFO, &s_finfo,
9833 FSE_ARG_DONE);
9834 }
9835 #endif
9836 }
9837
9838 out:
9839 if (fpath != NULL) {
9840 RELEASE_PATH(fpath);
9841 }
9842 if (spath != NULL) {
9843 RELEASE_PATH(spath);
9844 }
9845 vnode_put(svp);
9846 vnode_put(fvp);
9847 out2:
9848 return error;
9849 }
9850
9851 /*
9852 * Return (in MB) the amount of freespace on the given vnode's volume.
9853 */
9854 uint32_t freespace_mb(vnode_t vp);
9855
9856 uint32_t
9857 freespace_mb(vnode_t vp)
9858 {
9859 vfs_update_vfsstat(vp->v_mount, vfs_context_current(), VFS_USER_EVENT);
9860 return ((uint64_t)vp->v_mount->mnt_vfsstat.f_bavail *
9861 vp->v_mount->mnt_vfsstat.f_bsize) >> 20;
9862 }
9863
9864 #if CONFIG_SEARCHFS
9865
9866 /* ARGSUSED */
9867
9868 int
9869 searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval)
9870 {
9871 vnode_t vp, tvp;
9872 int i, error = 0;
9873 int fserror = 0;
9874 struct nameidata nd;
9875 struct user64_fssearchblock searchblock;
9876 struct searchstate *state;
9877 struct attrlist *returnattrs;
9878 struct timeval timelimit;
9879 void *searchparams1, *searchparams2;
9880 uio_t auio = NULL;
9881 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9882 uint32_t nummatches;
9883 int mallocsize;
9884 uint32_t nameiflags;
9885 vfs_context_t ctx = vfs_context_current();
9886 char uio_buf[UIO_SIZEOF(1)];
9887
9888 /* Start by copying in fsearchblock parameter list */
9889 if (IS_64BIT_PROCESS(p)) {
9890 error = copyin(uap->searchblock, (caddr_t) &searchblock, sizeof(searchblock));
9891 timelimit.tv_sec = searchblock.timelimit.tv_sec;
9892 timelimit.tv_usec = searchblock.timelimit.tv_usec;
9893 } else {
9894 struct user32_fssearchblock tmp_searchblock;
9895
9896 error = copyin(uap->searchblock, (caddr_t) &tmp_searchblock, sizeof(tmp_searchblock));
9897 // munge into 64-bit version
9898 searchblock.returnattrs = CAST_USER_ADDR_T(tmp_searchblock.returnattrs);
9899 searchblock.returnbuffer = CAST_USER_ADDR_T(tmp_searchblock.returnbuffer);
9900 searchblock.returnbuffersize = tmp_searchblock.returnbuffersize;
9901 searchblock.maxmatches = tmp_searchblock.maxmatches;
9902 /*
9903 * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary
9904 * from a 32 bit long, and tv_usec is already a signed 32 bit int.
9905 */
9906 timelimit.tv_sec = (__darwin_time_t) tmp_searchblock.timelimit.tv_sec;
9907 timelimit.tv_usec = (__darwin_useconds_t) tmp_searchblock.timelimit.tv_usec;
9908 searchblock.searchparams1 = CAST_USER_ADDR_T(tmp_searchblock.searchparams1);
9909 searchblock.sizeofsearchparams1 = tmp_searchblock.sizeofsearchparams1;
9910 searchblock.searchparams2 = CAST_USER_ADDR_T(tmp_searchblock.searchparams2);
9911 searchblock.sizeofsearchparams2 = tmp_searchblock.sizeofsearchparams2;
9912 searchblock.searchattrs = tmp_searchblock.searchattrs;
9913 }
9914 if (error) {
9915 return error;
9916 }
9917
9918 /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
9919 */
9920 if (searchblock.sizeofsearchparams1 > SEARCHFS_MAX_SEARCHPARMS ||
9921 searchblock.sizeofsearchparams2 > SEARCHFS_MAX_SEARCHPARMS) {
9922 return EINVAL;
9923 }
9924
9925 /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
9926 /* It all has to do into local memory and it's not that big so we might as well put it all together. */
9927 /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
9928 /* block. */
9929 /* */
9930 /* NOTE: we allocate an extra 8 bytes to account for the difference in size of the searchstate */
9931 /* due to the changes in rdar://problem/12438273. That way if a 3rd party file system */
9932 /* assumes the size is still 556 bytes it will continue to work */
9933
9934 mallocsize = searchblock.sizeofsearchparams1 + searchblock.sizeofsearchparams2 +
9935 sizeof(struct attrlist) + sizeof(struct searchstate) + (2 * sizeof(uint32_t));
9936
9937 MALLOC(searchparams1, void *, mallocsize, M_TEMP, M_WAITOK);
9938
9939 /* Now set up the various pointers to the correct place in our newly allocated memory */
9940
9941 searchparams2 = (void *) (((caddr_t) searchparams1) + searchblock.sizeofsearchparams1);
9942 returnattrs = (struct attrlist *) (((caddr_t) searchparams2) + searchblock.sizeofsearchparams2);
9943 state = (struct searchstate *) (((caddr_t) returnattrs) + sizeof(struct attrlist));
9944
9945 /* Now copy in the stuff given our local variables. */
9946
9947 if ((error = copyin(searchblock.searchparams1, searchparams1, searchblock.sizeofsearchparams1))) {
9948 goto freeandexit;
9949 }
9950
9951 if ((error = copyin(searchblock.searchparams2, searchparams2, searchblock.sizeofsearchparams2))) {
9952 goto freeandexit;
9953 }
9954
9955 if ((error = copyin(searchblock.returnattrs, (caddr_t) returnattrs, sizeof(struct attrlist)))) {
9956 goto freeandexit;
9957 }
9958
9959 if ((error = copyin(uap->state, (caddr_t) state, sizeof(struct searchstate)))) {
9960 goto freeandexit;
9961 }
9962
9963 /*
9964 * When searching a union mount, need to set the
9965 * start flag at the first call on each layer to
9966 * reset state for the new volume.
9967 */
9968 if (uap->options & SRCHFS_START) {
9969 state->ss_union_layer = 0;
9970 } else {
9971 uap->options |= state->ss_union_flags;
9972 }
9973 state->ss_union_flags = 0;
9974
9975 /*
9976 * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter,
9977 * which is passed in with an attrreference_t, we need to inspect the buffer manually here.
9978 * The KPI does not provide us the ability to pass in the length of the buffers searchparams1
9979 * and searchparams2. To obviate the need for all searchfs-supporting filesystems to
9980 * validate the user-supplied data offset of the attrreference_t, we'll do it here.
9981 */
9982
9983 if (searchblock.searchattrs.commonattr & ATTR_CMN_NAME) {
9984 attrreference_t* string_ref;
9985 u_int32_t* start_length;
9986 user64_size_t param_length;
9987
9988 /* validate searchparams1 */
9989 param_length = searchblock.sizeofsearchparams1;
9990 /* skip the word that specifies length of the buffer */
9991 start_length = (u_int32_t*) searchparams1;
9992 start_length = start_length + 1;
9993 string_ref = (attrreference_t*) start_length;
9994
9995 /* ensure no negative offsets or too big offsets */
9996 if (string_ref->attr_dataoffset < 0) {
9997 error = EINVAL;
9998 goto freeandexit;
9999 }
10000 if (string_ref->attr_length > MAXPATHLEN) {
10001 error = EINVAL;
10002 goto freeandexit;
10003 }
10004
10005 /* Check for pointer overflow in the string ref */
10006 if (((char*) string_ref + string_ref->attr_dataoffset) < (char*) string_ref) {
10007 error = EINVAL;
10008 goto freeandexit;
10009 }
10010
10011 if (((char*) string_ref + string_ref->attr_dataoffset) > ((char*)searchparams1 + param_length)) {
10012 error = EINVAL;
10013 goto freeandexit;
10014 }
10015 if (((char*)string_ref + string_ref->attr_dataoffset + string_ref->attr_length) > ((char*)searchparams1 + param_length)) {
10016 error = EINVAL;
10017 goto freeandexit;
10018 }
10019 }
10020
10021 /* set up the uio structure which will contain the users return buffer */
10022 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
10023 uio_addiov(auio, searchblock.returnbuffer, searchblock.returnbuffersize);
10024
10025 nameiflags = 0;
10026 if ((uap->options & FSOPT_NOFOLLOW) == 0) {
10027 nameiflags |= FOLLOW;
10028 }
10029 NDINIT(&nd, LOOKUP, OP_SEARCHFS, nameiflags | AUDITVNPATH1,
10030 UIO_USERSPACE, uap->path, ctx);
10031
10032 error = namei(&nd);
10033 if (error) {
10034 goto freeandexit;
10035 }
10036 vp = nd.ni_vp;
10037 nameidone(&nd);
10038
10039 /*
10040 * Switch to the root vnode for the volume
10041 */
10042 error = VFS_ROOT(vnode_mount(vp), &tvp, ctx);
10043 vnode_put(vp);
10044 if (error) {
10045 goto freeandexit;
10046 }
10047 vp = tvp;
10048
10049 /*
10050 * If it's a union mount, the path lookup takes
10051 * us to the top layer. But we may need to descend
10052 * to a lower layer. For non-union mounts the layer
10053 * is always zero.
10054 */
10055 for (i = 0; i < (int) state->ss_union_layer; i++) {
10056 if ((vp->v_mount->mnt_flag & MNT_UNION) == 0) {
10057 break;
10058 }
10059 tvp = vp;
10060 vp = vp->v_mount->mnt_vnodecovered;
10061 if (vp == NULL) {
10062 vnode_put(tvp);
10063 error = ENOENT;
10064 goto freeandexit;
10065 }
10066 error = vnode_getwithref(vp);
10067 vnode_put(tvp);
10068 if (error) {
10069 goto freeandexit;
10070 }
10071 }
10072
10073 #if CONFIG_MACF
10074 error = mac_vnode_check_searchfs(ctx, vp, &searchblock.searchattrs);
10075 if (error) {
10076 vnode_put(vp);
10077 goto freeandexit;
10078 }
10079 #endif
10080
10081
10082 /*
10083 * If searchblock.maxmatches == 0, then skip the search. This has happened
10084 * before and sometimes the underlying code doesnt deal with it well.
10085 */
10086 if (searchblock.maxmatches == 0) {
10087 nummatches = 0;
10088 goto saveandexit;
10089 }
10090
10091 /*
10092 * Allright, we have everything we need, so lets make that call.
10093 *
10094 * We keep special track of the return value from the file system:
10095 * EAGAIN is an acceptable error condition that shouldn't keep us
10096 * from copying out any results...
10097 */
10098
10099 fserror = VNOP_SEARCHFS(vp,
10100 searchparams1,
10101 searchparams2,
10102 &searchblock.searchattrs,
10103 (u_long)searchblock.maxmatches,
10104 &timelimit,
10105 returnattrs,
10106 &nummatches,
10107 (u_long)uap->scriptcode,
10108 (u_long)uap->options,
10109 auio,
10110 (struct searchstate *) &state->ss_fsstate,
10111 ctx);
10112
10113 /*
10114 * If it's a union mount we need to be called again
10115 * to search the mounted-on filesystem.
10116 */
10117 if ((vp->v_mount->mnt_flag & MNT_UNION) && fserror == 0) {
10118 state->ss_union_flags = SRCHFS_START;
10119 state->ss_union_layer++; // search next layer down
10120 fserror = EAGAIN;
10121 }
10122
10123 saveandexit:
10124
10125 vnode_put(vp);
10126
10127 /* Now copy out the stuff that needs copying out. That means the number of matches, the
10128 * search state. Everything was already put into he return buffer by the vop call. */
10129
10130 if ((error = copyout((caddr_t) state, uap->state, sizeof(struct searchstate))) != 0) {
10131 goto freeandexit;
10132 }
10133
10134 if ((error = suulong(uap->nummatches, (uint64_t)nummatches)) != 0) {
10135 goto freeandexit;
10136 }
10137
10138 error = fserror;
10139
10140 freeandexit:
10141
10142 FREE(searchparams1, M_TEMP);
10143
10144 return error;
10145 } /* end of searchfs system call */
10146
10147 #else /* CONFIG_SEARCHFS */
10148
10149 int
10150 searchfs(__unused proc_t p, __unused struct searchfs_args *uap, __unused int32_t *retval)
10151 {
10152 return ENOTSUP;
10153 }
10154
10155 #endif /* CONFIG_SEARCHFS */
10156
10157
10158 #if CONFIG_DATALESS_FILES
10159
10160 /*
10161 * === Namespace Resolver Up-call Mechanism ===
10162 *
10163 * When I/O is performed to a dataless file or directory (read, write,
10164 * lookup-in, etc.), the file system performs an upcall to the namespace
10165 * resolver (filecoordinationd) to materialize the object.
10166 *
10167 * We need multiple up-calls to be in flight at once, and we need these
10168 * up-calls to be interruptible, thus the following implementation:
10169 *
10170 * => The nspace_resolver_request represents the in-kernel request state.
10171 * It contains a request ID, storage space for the errno code returned
10172 * by filecoordinationd, and flags.
10173 *
10174 * => The request ID is simply a global monotonically incrementing 32-bit
10175 * number. Outstanding requests are stored in a hash table, and the
10176 * hash function is extremely simple.
10177 *
10178 * => When an upcall is to be made to filecoordinationd, a request structure
10179 * is allocated on the stack (it is small, and needs to live only during
10180 * the duration of the call to resolve_nspace_item_ext()). It is
10181 * initialized and inserted into the table. Some backpressure from
10182 * filecoordinationd is applied by limiting the numnber of entries that
10183 * can be inserted into the table (and thus limiting the number of
10184 * outstanding requests issued to filecoordinationd); waiting for an
10185 * available slot is interruptible.
10186 *
10187 * => Once the request has been inserted into the table, the up-call is made
10188 * to filecoordinationd via a MiG-generated stub. The up-call returns
10189 * immediately and filecoordinationd processes the request asynchronously.
10190 *
10191 * => The caller now waits for the request to complete. Tnis is achieved by
10192 * sleeping on the address of the request structure and waiting for
10193 * filecoordinationd to mark the request structure as complete. This
10194 * is an interruptible sleep call; if interrupted, the request structure
10195 * is removed from the table and EINTR is returned to the caller. If
10196 * this occurs, an advisory up-call is made to filecoordinationd with
10197 * the request ID to indicate that the request can be aborted or
10198 * de-prioritized at the discretion of filecoordinationd.
10199 *
10200 * => When filecoordinationd has completed the request, it signals completion
10201 * by writing to the vfs.nspace.complete sysctl node. Only a process
10202 * decorated as a namespace resolver can write to this sysctl node. The
10203 * value is a request ID / errno tuple passed as an array of 2 uint32_t's.
10204 * The request ID is looked up in the table, and if the request is found,
10205 * the error code is stored in the request structure and a wakeup()
10206 * issued on the address of the request structure. If the request is not
10207 * found, we simply drop the completion notification, assuming that the
10208 * caller was interrupted.
10209 *
10210 * => When the waiting thread wakes up, it extracts the error code from the
10211 * request structure, removes the request from the table, and returns the
10212 * error code to the calling function. Fini!
10213 */
10214
10215 struct nspace_resolver_request {
10216 LIST_ENTRY(nspace_resolver_request) r_hashlink;
10217 uint32_t r_req_id;
10218 int r_resolver_error;
10219 int r_flags;
10220 };
10221
10222 #define RRF_COMPLETE 0x0001
10223
10224 static uint32_t
10225 next_nspace_req_id(void)
10226 {
10227 static uint32_t next_req_id;
10228
10229 return OSAddAtomic(1, &next_req_id);
10230 }
10231
10232 #define NSPACE_RESOLVER_REQ_HASHSIZE 32 /* XXX tune */
10233 #define NSPACE_RESOLVER_MAX_OUTSTANDING 256 /* XXX tune */
10234
10235 static LIST_HEAD(nspace_resolver_requesthead,
10236 nspace_resolver_request) * nspace_resolver_request_hashtbl;
10237 static u_long nspace_resolver_request_hashmask;
10238 static u_int nspace_resolver_request_count;
10239 static bool nspace_resolver_request_wait_slot;
10240 static lck_grp_t *nspace_resolver_request_lck_grp;
10241 static lck_mtx_t nspace_resolver_request_hash_mutex;
10242
10243 #define NSPACE_REQ_LOCK() \
10244 lck_mtx_lock(&nspace_resolver_request_hash_mutex)
10245 #define NSPACE_REQ_UNLOCK() \
10246 lck_mtx_unlock(&nspace_resolver_request_hash_mutex)
10247
10248 #define NSPACE_RESOLVER_HASH(req_id) \
10249 (&nspace_resolver_request_hashtbl[(req_id) & \
10250 nspace_resolver_request_hashmask])
10251
10252 static struct nspace_resolver_request *
10253 nspace_resolver_req_lookup(uint32_t req_id)
10254 {
10255 struct nspace_resolver_requesthead *bucket;
10256 struct nspace_resolver_request *req;
10257
10258 bucket = NSPACE_RESOLVER_HASH(req_id);
10259 LIST_FOREACH(req, bucket, r_hashlink) {
10260 if (req->r_req_id == req_id) {
10261 return req;
10262 }
10263 }
10264
10265 return NULL;
10266 }
10267
10268 static int
10269 nspace_resolver_req_add(struct nspace_resolver_request *req)
10270 {
10271 struct nspace_resolver_requesthead *bucket;
10272 int error;
10273
10274 while (nspace_resolver_request_count >=
10275 NSPACE_RESOLVER_MAX_OUTSTANDING) {
10276 nspace_resolver_request_wait_slot = true;
10277 error = msleep(&nspace_resolver_request_count,
10278 &nspace_resolver_request_hash_mutex,
10279 PVFS | PCATCH, "nspacerq", NULL);
10280 if (error) {
10281 return error;
10282 }
10283 }
10284
10285 bucket = NSPACE_RESOLVER_HASH(req->r_req_id);
10286 #if DIAGNOSTIC
10287 assert(nspace_resolver_req_lookup(req->r_req_id) == NULL);
10288 #endif /* DIAGNOSTIC */
10289 LIST_INSERT_HEAD(bucket, req, r_hashlink);
10290 nspace_resolver_request_count++;
10291
10292 return 0;
10293 }
10294
10295 static void
10296 nspace_resolver_req_remove(struct nspace_resolver_request *req)
10297 {
10298 struct nspace_resolver_requesthead *bucket;
10299
10300 bucket = NSPACE_RESOLVER_HASH(req->r_req_id);
10301 #if DIAGNOSTIC
10302 assert(nspace_resolver_req_lookup(req->r_req_id) != NULL);
10303 #endif /* DIAGNOSTIC */
10304 LIST_REMOVE(req, r_hashlink);
10305 nspace_resolver_request_count--;
10306
10307 if (nspace_resolver_request_wait_slot) {
10308 nspace_resolver_request_wait_slot = false;
10309 wakeup(&nspace_resolver_request_count);
10310 }
10311 }
10312
10313 static void
10314 nspace_resolver_req_cancel(uint32_t req_id)
10315 {
10316 kern_return_t kr;
10317 mach_port_t mp;
10318
10319 // Failures here aren't fatal -- the cancellation message
10320 // sent to the resolver is merely advisory.
10321
10322 kr = host_get_filecoordinationd_port(host_priv_self(), &mp);
10323 if (kr != KERN_SUCCESS || !IPC_PORT_VALID(mp)) {
10324 return;
10325 }
10326
10327 kr = send_nspace_resolve_cancel(mp, req_id);
10328 if (kr != KERN_SUCCESS) {
10329 os_log_error(OS_LOG_DEFAULT,
10330 "NSPACE send_nspace_resolve_cancel failure: %d", kr);
10331 }
10332
10333 ipc_port_release_send(mp);
10334 }
10335
10336 static int
10337 nspace_resolver_req_wait(struct nspace_resolver_request *req)
10338 {
10339 bool send_cancel_message = false;
10340 int error;
10341
10342 NSPACE_REQ_LOCK();
10343
10344 while ((req->r_flags & RRF_COMPLETE) == 0) {
10345 error = msleep(req, &nspace_resolver_request_hash_mutex,
10346 PVFS | PCATCH, "nspace", NULL);
10347 if (error && error != ERESTART) {
10348 req->r_resolver_error = (error == EINTR) ? EINTR :
10349 ETIMEDOUT;
10350 send_cancel_message = true;
10351 break;
10352 }
10353 }
10354
10355 nspace_resolver_req_remove(req);
10356
10357 NSPACE_REQ_UNLOCK();
10358
10359 if (send_cancel_message) {
10360 nspace_resolver_req_cancel(req->r_req_id);
10361 }
10362
10363 return req->r_resolver_error;
10364 }
10365
10366 static void
10367 nspace_resolver_req_mark_complete(
10368 struct nspace_resolver_request *req,
10369 int resolver_error)
10370 {
10371 req->r_resolver_error = resolver_error;
10372 req->r_flags |= RRF_COMPLETE;
10373 wakeup(req);
10374 }
10375
10376 static void
10377 nspace_resolver_req_completed(uint32_t req_id, int resolver_error)
10378 {
10379 struct nspace_resolver_request *req;
10380
10381 NSPACE_REQ_LOCK();
10382
10383 // If we don't find the request corresponding to our req_id,
10384 // just drop the completion signal on the floor; it's likely
10385 // that the requester interrupted with a signal.
10386
10387 req = nspace_resolver_req_lookup(req_id);
10388 if (req) {
10389 nspace_resolver_req_mark_complete(req, resolver_error);
10390 }
10391
10392 NSPACE_REQ_UNLOCK();
10393 }
10394
10395 static struct proc *nspace_resolver_proc;
10396
10397 static int
10398 nspace_resolver_get_proc_state(struct proc *p, int *is_resolver)
10399 {
10400 *is_resolver = ((p->p_lflag & P_LNSPACE_RESOLVER) &&
10401 p == nspace_resolver_proc) ? 1 : 0;
10402 return 0;
10403 }
10404
10405 static int
10406 nspace_resolver_set_proc_state(struct proc *p, int is_resolver)
10407 {
10408 vfs_context_t ctx = vfs_context_current();
10409 int error = 0;
10410
10411 //
10412 // The system filecoordinationd runs as uid == 0. This also
10413 // has the nice side-effect of filtering out filecoordinationd
10414 // running in the simulator.
10415 //
10416 if (!vfs_context_issuser(ctx)) {
10417 return EPERM;
10418 }
10419
10420 error = priv_check_cred(vfs_context_ucred(ctx),
10421 PRIV_VFS_DATALESS_RESOLVER, 0);
10422 if (error) {
10423 return error;
10424 }
10425
10426 if (is_resolver) {
10427 NSPACE_REQ_LOCK();
10428
10429 if (nspace_resolver_proc == NULL) {
10430 proc_lock(p);
10431 p->p_lflag |= P_LNSPACE_RESOLVER;
10432 proc_unlock(p);
10433 nspace_resolver_proc = p;
10434 } else {
10435 error = EBUSY;
10436 }
10437
10438 NSPACE_REQ_UNLOCK();
10439 } else {
10440 // This is basically just like the exit case.
10441 // nspace_resolver_exited() will verify that the
10442 // process is the resolver, and will clear the
10443 // global.
10444 nspace_resolver_exited(p);
10445 }
10446
10447 return error;
10448 }
10449
10450 static int
10451 nspace_materialization_get_proc_state(struct proc *p, int *is_prevented)
10452 {
10453 if ((p->p_lflag & P_LNSPACE_RESOLVER) != 0 ||
10454 (p->p_vfs_iopolicy &
10455 P_VFS_IOPOLICY_MATERIALIZE_DATALESS_FILES) == 0) {
10456 *is_prevented = 1;
10457 } else {
10458 *is_prevented = 0;
10459 }
10460 return 0;
10461 }
10462
10463 static int
10464 nspace_materialization_set_proc_state(struct proc *p, int is_prevented)
10465 {
10466 if (p->p_lflag & P_LNSPACE_RESOLVER) {
10467 return is_prevented ? 0 : EBUSY;
10468 }
10469
10470 if (is_prevented) {
10471 OSBitAndAtomic16(~((uint16_t)P_VFS_IOPOLICY_MATERIALIZE_DATALESS_FILES), &p->p_vfs_iopolicy);
10472 } else {
10473 OSBitOrAtomic16((uint16_t)P_VFS_IOPOLICY_MATERIALIZE_DATALESS_FILES, &p->p_vfs_iopolicy);
10474 }
10475 return 0;
10476 }
10477
10478 static int
10479 nspace_materialization_get_thread_state(int *is_prevented)
10480 {
10481 uthread_t ut = get_bsdthread_info(current_thread());
10482
10483 *is_prevented = (ut->uu_flag & UT_NSPACE_NODATALESSFAULTS) ? 1 : 0;
10484 return 0;
10485 }
10486
10487 static int
10488 nspace_materialization_set_thread_state(int is_prevented)
10489 {
10490 uthread_t ut = get_bsdthread_info(current_thread());
10491
10492 if (is_prevented) {
10493 ut->uu_flag |= UT_NSPACE_NODATALESSFAULTS;
10494 } else {
10495 ut->uu_flag &= ~UT_NSPACE_NODATALESSFAULTS;
10496 }
10497 return 0;
10498 }
10499
10500 static int
10501 nspace_materialization_is_prevented(void)
10502 {
10503 proc_t p = current_proc();
10504 uthread_t ut = (uthread_t)get_bsdthread_info(current_thread());
10505 vfs_context_t ctx = vfs_context_current();
10506
10507 /*
10508 * Kernel context ==> return EDEADLK, as we would with any random
10509 * process decorated as no-materialize.
10510 */
10511 if (ctx == vfs_context_kernel()) {
10512 return EDEADLK;
10513 }
10514
10515 /*
10516 * If the process has the dataless-manipulation entitlement,
10517 * materialization is prevented, and depending on the kind
10518 * of file system operation, things get to proceed as if the
10519 * object is not dataless.
10520 */
10521 if (vfs_context_is_dataless_manipulator(ctx)) {
10522 return EJUSTRETURN;
10523 }
10524
10525 /*
10526 * Per-thread decorations override any process-wide decorations.
10527 * (Foundation uses this, and this overrides even the dataless-
10528 * manipulation entitlement so as to make API contracts consistent.)
10529 */
10530 if (ut != NULL) {
10531 if (ut->uu_flag & UT_NSPACE_NODATALESSFAULTS) {
10532 return EDEADLK;
10533 }
10534 if (ut->uu_flag & UT_NSPACE_FORCEDATALESSFAULTS) {
10535 return 0;
10536 }
10537 }
10538
10539 /*
10540 * If the process's iopolicy specifies that dataless files
10541 * can be materialized, then we let it go ahead.
10542 */
10543 if (p->p_vfs_iopolicy & P_VFS_IOPOLICY_MATERIALIZE_DATALESS_FILES) {
10544 return 0;
10545 }
10546
10547 /*
10548 * The default behavior is to not materialize dataless files;
10549 * return to the caller that deadlock was detected.
10550 */
10551 return EDEADLK;
10552 }
10553
10554 /* the vfs.nspace branch */
10555 SYSCTL_NODE(_vfs, OID_AUTO, nspace, CTLFLAG_RW | CTLFLAG_LOCKED, NULL, "vfs nspace hinge");
10556
10557 static int
10558 sysctl_nspace_resolver(__unused struct sysctl_oid *oidp,
10559 __unused void *arg1, __unused int arg2, struct sysctl_req *req)
10560 {
10561 struct proc *p = req->p;
10562 int new_value, old_value, changed = 0;
10563 int error;
10564
10565 error = nspace_resolver_get_proc_state(p, &old_value);
10566 if (error) {
10567 return error;
10568 }
10569
10570 error = sysctl_io_number(req, old_value, sizeof(int), &new_value,
10571 &changed);
10572 if (error == 0 && changed) {
10573 error = nspace_resolver_set_proc_state(p, new_value);
10574 }
10575 return error;
10576 }
10577
10578 /* decorate this process as the dataless file resolver */
10579 SYSCTL_PROC(_vfs_nspace, OID_AUTO, resolver,
10580 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED,
10581 0, 0, sysctl_nspace_resolver, "I", "");
10582
10583 static int
10584 sysctl_nspace_prevent_materialization(__unused struct sysctl_oid *oidp,
10585 __unused void *arg1, __unused int arg2, struct sysctl_req *req)
10586 {
10587 struct proc *p = req->p;
10588 int new_value, old_value, changed = 0;
10589 int error;
10590
10591 error = nspace_materialization_get_proc_state(p, &old_value);
10592 if (error) {
10593 return error;
10594 }
10595
10596 error = sysctl_io_number(req, old_value, sizeof(int), &new_value,
10597 &changed);
10598 if (error == 0 && changed) {
10599 error = nspace_materialization_set_proc_state(p, new_value);
10600 }
10601 return error;
10602 }
10603
10604 /* decorate this process as not wanting to materialize dataless files */
10605 SYSCTL_PROC(_vfs_nspace, OID_AUTO, prevent_materialization,
10606 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED,
10607 0, 0, sysctl_nspace_prevent_materialization, "I", "");
10608
10609 static int
10610 sysctl_nspace_thread_prevent_materialization(__unused struct sysctl_oid *oidp,
10611 __unused void *arg1, __unused int arg2, struct sysctl_req *req)
10612 {
10613 int new_value, old_value, changed = 0;
10614 int error;
10615
10616 error = nspace_materialization_get_thread_state(&old_value);
10617 if (error) {
10618 return error;
10619 }
10620
10621 error = sysctl_io_number(req, old_value, sizeof(int), &new_value,
10622 &changed);
10623 if (error == 0 && changed) {
10624 error = nspace_materialization_set_thread_state(new_value);
10625 }
10626 return error;
10627 }
10628
10629 /* decorate this thread as not wanting to materialize dataless files */
10630 SYSCTL_PROC(_vfs_nspace, OID_AUTO, thread_prevent_materialization,
10631 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED,
10632 0, 0, sysctl_nspace_thread_prevent_materialization, "I", "");
10633
10634 static int
10635 sysctl_nspace_complete(__unused struct sysctl_oid *oidp, __unused void *arg1,
10636 __unused int arg2, struct sysctl_req *req)
10637 {
10638 struct proc *p = req->p;
10639 uint32_t req_status[2] = { 0, 0 };
10640 int error, is_resolver, changed = 0;
10641
10642 error = nspace_resolver_get_proc_state(p, &is_resolver);
10643 if (error) {
10644 return error;
10645 }
10646
10647 if (!is_resolver) {
10648 return EPERM;
10649 }
10650
10651 error = sysctl_io_opaque(req, req_status, sizeof(req_status),
10652 &changed);
10653 if (error) {
10654 return error;
10655 }
10656
10657 /*
10658 * req_status[0] is the req_id
10659 *
10660 * req_status[1] is the errno
10661 */
10662 if (error == 0 && changed) {
10663 nspace_resolver_req_completed(req_status[0],
10664 (int)req_status[1]);
10665 }
10666 return error;
10667 }
10668
10669 /* Resolver reports completed reqs here. */
10670 SYSCTL_PROC(_vfs_nspace, OID_AUTO, complete,
10671 CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED,
10672 0, 0, sysctl_nspace_complete, "-", "");
10673
10674 #endif /* CONFIG_DATALESS_FILES */
10675
10676 #if CONFIG_DATALESS_FILES
10677 #define __no_dataless_unused /* nothing */
10678 #else
10679 #define __no_dataless_unused __unused
10680 #endif
10681
10682 void
10683 nspace_resolver_init(void)
10684 {
10685 #if CONFIG_DATALESS_FILES
10686 nspace_resolver_request_lck_grp =
10687 lck_grp_alloc_init("file namespace resolver", NULL);
10688
10689 lck_mtx_init(&nspace_resolver_request_hash_mutex,
10690 nspace_resolver_request_lck_grp, NULL);
10691
10692 nspace_resolver_request_hashtbl =
10693 hashinit(NSPACE_RESOLVER_REQ_HASHSIZE,
10694 M_VNODE /* XXX */, &nspace_resolver_request_hashmask);
10695 #endif /* CONFIG_DATALESS_FILES */
10696 }
10697
10698 void
10699 nspace_resolver_exited(struct proc *p __no_dataless_unused)
10700 {
10701 #if CONFIG_DATALESS_FILES
10702 struct nspace_resolver_requesthead *bucket;
10703 struct nspace_resolver_request *req;
10704 u_long idx;
10705
10706 NSPACE_REQ_LOCK();
10707
10708 if ((p->p_lflag & P_LNSPACE_RESOLVER) &&
10709 p == nspace_resolver_proc) {
10710 for (idx = 0; idx <= nspace_resolver_request_hashmask; idx++) {
10711 bucket = &nspace_resolver_request_hashtbl[idx];
10712 LIST_FOREACH(req, bucket, r_hashlink) {
10713 nspace_resolver_req_mark_complete(req,
10714 ETIMEDOUT);
10715 }
10716 }
10717 nspace_resolver_proc = NULL;
10718 }
10719
10720 NSPACE_REQ_UNLOCK();
10721 #endif /* CONFIG_DATALESS_FILES */
10722 }
10723
10724 int
10725 resolve_nspace_item(struct vnode *vp, uint64_t op)
10726 {
10727 return resolve_nspace_item_ext(vp, op, NULL);
10728 }
10729
10730 #define DATALESS_RESOLVER_ENTITLEMENT \
10731 "com.apple.private.vfs.dataless-resolver"
10732 #define DATALESS_MANIPULATION_ENTITLEMENT \
10733 "com.apple.private.vfs.dataless-manipulation"
10734
10735 /*
10736 * Return TRUE if the vfs context is associated with a process entitled
10737 * for dataless manipulation.
10738 *
10739 * XXX Arguably belongs in vfs_subr.c, but is here because of the
10740 * complication around CONFIG_DATALESS_FILES.
10741 */
10742 boolean_t
10743 vfs_context_is_dataless_manipulator(vfs_context_t ctx __unused)
10744 {
10745 #if CONFIG_DATALESS_FILES
10746 assert(ctx->vc_thread == current_thread());
10747 task_t const task = current_task();
10748 return IOTaskHasEntitlement(task, DATALESS_MANIPULATION_ENTITLEMENT) ||
10749 IOTaskHasEntitlement(task, DATALESS_RESOLVER_ENTITLEMENT);
10750 #else
10751 return false;
10752 #endif /* CONFIG_DATALESS_FILES */
10753 }
10754
10755 int
10756 resolve_nspace_item_ext(
10757 struct vnode *vp __no_dataless_unused,
10758 uint64_t op __no_dataless_unused,
10759 void *arg __unused)
10760 {
10761 #if CONFIG_DATALESS_FILES
10762 int error;
10763 mach_port_t mp;
10764 char *path = NULL;
10765 int path_len;
10766 kern_return_t kr;
10767 struct nspace_resolver_request req;
10768
10769 // only allow namespace events on regular files, directories and symlinks.
10770 if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
10771 return EFTYPE;
10772 }
10773
10774 //
10775 // if this is a snapshot event and the vnode is on a
10776 // disk image just pretend nothing happened since any
10777 // change to the disk image will cause the disk image
10778 // itself to get backed up and this avoids multi-way
10779 // deadlocks between the snapshot handler and the ever
10780 // popular diskimages-helper process. the variable
10781 // nspace_allow_virtual_devs allows this behavior to
10782 // be overridden (for use by the Mobile TimeMachine
10783 // testing infrastructure which uses disk images)
10784 //
10785 if (op & NAMESPACE_HANDLER_SNAPSHOT_EVENT) {
10786 os_log_debug(OS_LOG_DEFAULT, "NSPACE SNAPSHOT not handled");
10787 return ENOTSUP;
10788 }
10789
10790 error = nspace_materialization_is_prevented();
10791 if (error) {
10792 os_log_debug(OS_LOG_DEFAULT,
10793 "NSPACE process/thread is decorated as no-materialization");
10794 return error;
10795 }
10796
10797 kr = host_get_filecoordinationd_port(host_priv_self(), &mp);
10798 if (kr != KERN_SUCCESS || !IPC_PORT_VALID(mp)) {
10799 os_log_error(OS_LOG_DEFAULT, "NSPACE no port");
10800 // Treat this like being unable to access the backing
10801 // store server.
10802 return ETIMEDOUT;
10803 }
10804
10805 MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
10806 if (path == NULL) {
10807 error = ENOMEM;
10808 goto out_release_port;
10809 }
10810 path_len = MAXPATHLEN;
10811
10812 error = vn_getpath(vp, path, &path_len);
10813 if (error == 0) {
10814 int xxx_rdar44371223; /* XXX Mig bug */
10815 req.r_req_id = next_nspace_req_id();
10816 req.r_resolver_error = 0;
10817 req.r_flags = 0;
10818
10819 NSPACE_REQ_LOCK();
10820 error = nspace_resolver_req_add(&req);
10821 NSPACE_REQ_UNLOCK();
10822 if (error) {
10823 goto out_release_port;
10824 }
10825
10826 os_log_debug(OS_LOG_DEFAULT, "NSPACE resolve_path call");
10827 kr = send_nspace_resolve_path(mp, req.r_req_id,
10828 current_proc()->p_pid, (uint32_t)(op & 0xffffffff),
10829 path, &xxx_rdar44371223);
10830 if (kr != KERN_SUCCESS) {
10831 // Also treat this like being unable to access
10832 // the backing store server.
10833 os_log_error(OS_LOG_DEFAULT,
10834 "NSPACE resolve_path failure: %d", kr);
10835 error = ETIMEDOUT;
10836
10837 NSPACE_REQ_LOCK();
10838 nspace_resolver_req_remove(&req);
10839 NSPACE_REQ_UNLOCK();
10840 goto out_release_port;
10841 }
10842
10843 // Give back the memory we allocated earlier while
10844 // we wait; we no longer need it.
10845 FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
10846 path = NULL;
10847
10848 // Request has been submitted to the resolver.
10849 // Now (interruptibly) wait for completion.
10850 // Upon requrn, the request will have been removed
10851 // from the lookup table.
10852 error = nspace_resolver_req_wait(&req);
10853 }
10854
10855 out_release_port:
10856 if (path != NULL) {
10857 FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
10858 }
10859 ipc_port_release_send(mp);
10860
10861 return error;
10862 #else
10863 return ENOTSUP;
10864 #endif /* CONFIG_DATALESS_FILES */
10865 }
10866
10867 int
10868 nspace_snapshot_event(__unused vnode_t vp, __unused time_t ctime,
10869 __unused uint64_t op_type, __unused void *arg)
10870 {
10871 return 0;
10872 }
10873
10874 #if 0
10875 static int
10876 build_volfs_path(struct vnode *vp, char *path, int *len)
10877 {
10878 struct vnode_attr va;
10879 int ret;
10880
10881 VATTR_INIT(&va);
10882 VATTR_WANTED(&va, va_fsid);
10883 VATTR_WANTED(&va, va_fileid);
10884
10885 if (vnode_getattr(vp, &va, vfs_context_kernel()) != 0) {
10886 *len = snprintf(path, *len, "/non/existent/path/because/vnode_getattr/failed") + 1;
10887 ret = -1;
10888 } else {
10889 *len = snprintf(path, *len, "/.vol/%d/%lld", (dev_t)va.va_fsid, va.va_fileid) + 1;
10890 ret = 0;
10891 }
10892
10893 return ret;
10894 }
10895 #endif
10896
10897 static unsigned long
10898 fsctl_bogus_command_compat(unsigned long cmd)
10899 {
10900 switch (cmd) {
10901 case IOCBASECMD(FSIOC_SYNC_VOLUME):
10902 return FSIOC_SYNC_VOLUME;
10903 case IOCBASECMD(FSIOC_ROUTEFS_SETROUTEID):
10904 return FSIOC_ROUTEFS_SETROUTEID;
10905 case IOCBASECMD(FSIOC_SET_PACKAGE_EXTS):
10906 return FSIOC_SET_PACKAGE_EXTS;
10907 case IOCBASECMD(FSIOC_SET_FSTYPENAME_OVERRIDE):
10908 return FSIOC_SET_FSTYPENAME_OVERRIDE;
10909 case IOCBASECMD(DISK_CONDITIONER_IOC_GET):
10910 return DISK_CONDITIONER_IOC_GET;
10911 case IOCBASECMD(DISK_CONDITIONER_IOC_SET):
10912 return DISK_CONDITIONER_IOC_SET;
10913 case IOCBASECMD(FSIOC_FIOSEEKHOLE):
10914 return FSIOC_FIOSEEKHOLE;
10915 case IOCBASECMD(FSIOC_FIOSEEKDATA):
10916 return FSIOC_FIOSEEKDATA;
10917 case IOCBASECMD(SPOTLIGHT_IOC_GET_MOUNT_TIME):
10918 return SPOTLIGHT_IOC_GET_MOUNT_TIME;
10919 case IOCBASECMD(SPOTLIGHT_IOC_GET_LAST_MTIME):
10920 return SPOTLIGHT_IOC_GET_LAST_MTIME;
10921 }
10922
10923 return cmd;
10924 }
10925
10926 static int
10927 cas_bsdflags_setattr(vnode_t vp, void *arg, vfs_context_t ctx)
10928 {
10929 return VNOP_IOCTL(vp, FSIOC_CAS_BSDFLAGS, arg, FWRITE, ctx);
10930 }
10931
10932 /*
10933 * Make a filesystem-specific control call:
10934 */
10935 /* ARGSUSED */
10936 static int
10937 fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long options, vfs_context_t ctx)
10938 {
10939 int error = 0;
10940 boolean_t is64bit;
10941 u_int size;
10942 #define STK_PARAMS 128
10943 char stkbuf[STK_PARAMS] = {0};
10944 caddr_t data, memp;
10945 vnode_t vp = *arg_vp;
10946
10947 if (vp->v_type == VCHR || vp->v_type == VBLK) {
10948 return ENOTTY;
10949 }
10950
10951 cmd = fsctl_bogus_command_compat(cmd);
10952
10953 size = IOCPARM_LEN(cmd);
10954 if (size > IOCPARM_MAX) {
10955 return EINVAL;
10956 }
10957
10958 is64bit = proc_is64bit(p);
10959
10960 memp = NULL;
10961
10962 if (size > sizeof(stkbuf)) {
10963 if ((memp = (caddr_t)kalloc(size)) == 0) {
10964 return ENOMEM;
10965 }
10966 data = memp;
10967 } else {
10968 data = &stkbuf[0];
10969 };
10970
10971 if (cmd & IOC_IN) {
10972 if (size) {
10973 error = copyin(udata, data, size);
10974 if (error) {
10975 if (memp) {
10976 kfree(memp, size);
10977 }
10978 return error;
10979 }
10980 } else {
10981 if (is64bit) {
10982 *(user_addr_t *)data = udata;
10983 } else {
10984 *(uint32_t *)data = (uint32_t)udata;
10985 }
10986 };
10987 } else if ((cmd & IOC_OUT) && size) {
10988 /*
10989 * Zero the buffer so the user always
10990 * gets back something deterministic.
10991 */
10992 bzero(data, size);
10993 } else if (cmd & IOC_VOID) {
10994 if (is64bit) {
10995 *(user_addr_t *)data = udata;
10996 } else {
10997 *(uint32_t *)data = (uint32_t)udata;
10998 }
10999 }
11000
11001 /* Check to see if it's a generic command */
11002 switch (cmd) {
11003 case FSIOC_SYNC_VOLUME: {
11004 struct vfs_attr vfa;
11005 mount_t mp = vp->v_mount;
11006 unsigned arg;
11007
11008
11009 /* record vid of vp so we can drop it below. */
11010 uint32_t vvid = vp->v_id;
11011
11012 /*
11013 * Then grab mount_iterref so that we can release the vnode.
11014 * Without this, a thread may call vnode_iterate_prepare then
11015 * get into a deadlock because we've never released the root vp
11016 */
11017 error = mount_iterref(mp, 0);
11018 if (error) {
11019 break;
11020 }
11021 vnode_put(vp);
11022
11023 arg = MNT_NOWAIT;
11024 if (*(uint32_t*)data & FSCTL_SYNC_WAIT) {
11025 arg = MNT_WAIT;
11026 }
11027
11028 /*
11029 * If the filessytem supports multiple filesytems in a
11030 * partition (For eg APFS volumes in a container, it knows
11031 * that the waitfor argument to VFS_SYNC are flags.
11032 */
11033 VFSATTR_INIT(&vfa);
11034 VFSATTR_WANTED(&vfa, f_capabilities);
11035 if ((vfs_getattr(mp, &vfa, vfs_context_current()) == 0) &&
11036 VFSATTR_IS_SUPPORTED(&vfa, f_capabilities) &&
11037 ((vfa.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_SHARED_SPACE)) &&
11038 ((vfa.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_SHARED_SPACE))) {
11039 arg |= MNT_VOLUME;
11040 }
11041
11042 /* issue the sync for this volume */
11043 (void)sync_callback(mp, &arg);
11044
11045 /*
11046 * Then release the mount_iterref once we're done syncing; it's not
11047 * needed for the VNOP_IOCTL below
11048 */
11049 mount_iterdrop(mp);
11050
11051 if (arg & FSCTL_SYNC_FULLSYNC) {
11052 /* re-obtain vnode iocount on the root vp, if possible */
11053 error = vnode_getwithvid(vp, vvid);
11054 if (error == 0) {
11055 error = VNOP_IOCTL(vp, F_FULLFSYNC, (caddr_t)NULL, 0, ctx);
11056 vnode_put(vp);
11057 }
11058 }
11059 /* mark the argument VP as having been released */
11060 *arg_vp = NULL;
11061 }
11062 break;
11063
11064 case FSIOC_ROUTEFS_SETROUTEID: {
11065 #if ROUTEFS
11066 char routepath[MAXPATHLEN];
11067 size_t len = 0;
11068
11069 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
11070 break;
11071 }
11072 bzero(routepath, MAXPATHLEN);
11073 error = copyinstr(udata, &routepath[0], MAXPATHLEN, &len);
11074 if (error) {
11075 break;
11076 }
11077 error = routefs_kernel_mount(routepath);
11078 if (error) {
11079 break;
11080 }
11081 #endif
11082 }
11083 break;
11084
11085 case FSIOC_SET_PACKAGE_EXTS: {
11086 user_addr_t ext_strings;
11087 uint32_t num_entries;
11088 uint32_t max_width;
11089
11090 if ((error = priv_check_cred(kauth_cred_get(), PRIV_PACKAGE_EXTENSIONS, 0))) {
11091 break;
11092 }
11093
11094 if ((is64bit && size != sizeof(user64_package_ext_info))
11095 || (is64bit == 0 && size != sizeof(user32_package_ext_info))) {
11096 // either you're 64-bit and passed a 64-bit struct or
11097 // you're 32-bit and passed a 32-bit struct. otherwise
11098 // it's not ok.
11099 error = EINVAL;
11100 break;
11101 }
11102
11103 if (is64bit) {
11104 ext_strings = ((user64_package_ext_info *)data)->strings;
11105 num_entries = ((user64_package_ext_info *)data)->num_entries;
11106 max_width = ((user64_package_ext_info *)data)->max_width;
11107 } else {
11108 ext_strings = CAST_USER_ADDR_T(((user32_package_ext_info *)data)->strings);
11109 num_entries = ((user32_package_ext_info *)data)->num_entries;
11110 max_width = ((user32_package_ext_info *)data)->max_width;
11111 }
11112 error = set_package_extensions_table(ext_strings, num_entries, max_width);
11113 }
11114 break;
11115
11116 case FSIOC_SET_FSTYPENAME_OVERRIDE:
11117 {
11118 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
11119 break;
11120 }
11121 if (vp->v_mount) {
11122 mount_lock(vp->v_mount);
11123 if (data[0] != 0) {
11124 strlcpy(&vp->v_mount->fstypename_override[0], data, MFSTYPENAMELEN);
11125 vp->v_mount->mnt_kern_flag |= MNTK_TYPENAME_OVERRIDE;
11126 if (vfs_isrdonly(vp->v_mount) && strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
11127 vp->v_mount->mnt_kern_flag |= MNTK_EXTENDED_SECURITY;
11128 vp->v_mount->mnt_kern_flag &= ~MNTK_AUTH_OPAQUE;
11129 }
11130 } else {
11131 if (strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
11132 vp->v_mount->mnt_kern_flag &= ~MNTK_EXTENDED_SECURITY;
11133 }
11134 vp->v_mount->mnt_kern_flag &= ~MNTK_TYPENAME_OVERRIDE;
11135 vp->v_mount->fstypename_override[0] = '\0';
11136 }
11137 mount_unlock(vp->v_mount);
11138 }
11139 }
11140 break;
11141
11142 case DISK_CONDITIONER_IOC_GET: {
11143 error = disk_conditioner_get_info(vp->v_mount, (disk_conditioner_info *)data);
11144 }
11145 break;
11146
11147 case DISK_CONDITIONER_IOC_SET: {
11148 error = disk_conditioner_set_info(vp->v_mount, (disk_conditioner_info *)data);
11149 }
11150 break;
11151
11152 case FSIOC_CAS_BSDFLAGS: {
11153 struct fsioc_cas_bsdflags *cas = (struct fsioc_cas_bsdflags *)data;
11154 struct vnode_attr va;
11155
11156 VATTR_INIT(&va);
11157 VATTR_SET(&va, va_flags, cas->new_flags);
11158
11159 error = chflags0(vp, &va, cas_bsdflags_setattr, cas, ctx);
11160 }
11161 break;
11162
11163 case FSIOC_FD_ONLY_OPEN_ONCE: {
11164 if (vnode_usecount(vp) > 1) {
11165 error = EBUSY;
11166 } else {
11167 error = 0;
11168 }
11169 }
11170 break;
11171
11172 default: {
11173 /* other, known commands shouldn't be passed down here */
11174 switch (cmd) {
11175 case F_PUNCHHOLE:
11176 case F_TRIM_ACTIVE_FILE:
11177 case F_RDADVISE:
11178 case F_TRANSCODEKEY:
11179 case F_GETPROTECTIONLEVEL:
11180 case F_GETDEFAULTPROTLEVEL:
11181 case F_MAKECOMPRESSED:
11182 case F_SET_GREEDY_MODE:
11183 case F_SETSTATICCONTENT:
11184 case F_SETIOTYPE:
11185 case F_SETBACKINGSTORE:
11186 case F_GETPATH_MTMINFO:
11187 case APFSIOC_REVERT_TO_SNAPSHOT:
11188 case FSIOC_FIOSEEKHOLE:
11189 case FSIOC_FIOSEEKDATA:
11190 case HFS_GET_BOOT_INFO:
11191 case HFS_SET_BOOT_INFO:
11192 case FIOPINSWAP:
11193 case F_CHKCLEAN:
11194 case F_FULLFSYNC:
11195 case F_BARRIERFSYNC:
11196 case F_FREEZE_FS:
11197 case F_THAW_FS:
11198 error = EINVAL;
11199 goto outdrop;
11200 }
11201 /* Invoke the filesystem-specific code */
11202 error = VNOP_IOCTL(vp, cmd, data, options, ctx);
11203 }
11204 } /* end switch stmt */
11205
11206 /*
11207 * if no errors, copy any data to user. Size was
11208 * already set and checked above.
11209 */
11210 if (error == 0 && (cmd & IOC_OUT) && size) {
11211 error = copyout(data, udata, size);
11212 }
11213
11214 outdrop:
11215 if (memp) {
11216 kfree(memp, size);
11217 }
11218
11219 return error;
11220 }
11221
11222 /* ARGSUSED */
11223 int
11224 fsctl(proc_t p, struct fsctl_args *uap, __unused int32_t *retval)
11225 {
11226 int error;
11227 struct nameidata nd;
11228 u_long nameiflags;
11229 vnode_t vp = NULL;
11230 vfs_context_t ctx = vfs_context_current();
11231
11232 AUDIT_ARG(cmd, uap->cmd);
11233 AUDIT_ARG(value32, uap->options);
11234 /* Get the vnode for the file we are getting info on: */
11235 nameiflags = 0;
11236 //
11237 // if we come through fsctl() then the file is by definition not open.
11238 // therefore for the FSIOC_FD_ONLY_OPEN_ONCE selector we return an error
11239 // lest the caller mistakenly thinks the only open is their own (but in
11240 // reality it's someone elses).
11241 //
11242 if (uap->cmd == FSIOC_FD_ONLY_OPEN_ONCE) {
11243 return EINVAL;
11244 }
11245 if ((uap->options & FSOPT_NOFOLLOW) == 0) {
11246 nameiflags |= FOLLOW;
11247 }
11248 if (uap->cmd == FSIOC_FIRMLINK_CTL) {
11249 nameiflags |= (CN_FIRMLINK_NOFOLLOW | NOCACHE);
11250 }
11251 NDINIT(&nd, LOOKUP, OP_FSCTL, nameiflags | AUDITVNPATH1,
11252 UIO_USERSPACE, uap->path, ctx);
11253 if ((error = namei(&nd))) {
11254 goto done;
11255 }
11256 vp = nd.ni_vp;
11257 nameidone(&nd);
11258
11259 #if CONFIG_MACF
11260 error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd);
11261 if (error) {
11262 goto done;
11263 }
11264 #endif
11265
11266 error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
11267
11268 done:
11269 if (vp) {
11270 vnode_put(vp);
11271 }
11272 return error;
11273 }
11274 /* ARGSUSED */
11275 int
11276 ffsctl(proc_t p, struct ffsctl_args *uap, __unused int32_t *retval)
11277 {
11278 int error;
11279 vnode_t vp = NULL;
11280 vfs_context_t ctx = vfs_context_current();
11281 int fd = -1;
11282
11283 AUDIT_ARG(fd, uap->fd);
11284 AUDIT_ARG(cmd, uap->cmd);
11285 AUDIT_ARG(value32, uap->options);
11286
11287 /* Get the vnode for the file we are getting info on: */
11288 if ((error = file_vnode(uap->fd, &vp))) {
11289 return error;
11290 }
11291 fd = uap->fd;
11292 if ((error = vnode_getwithref(vp))) {
11293 file_drop(fd);
11294 return error;
11295 }
11296
11297 #if CONFIG_MACF
11298 if ((error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd))) {
11299 file_drop(fd);
11300 vnode_put(vp);
11301 return error;
11302 }
11303 #endif
11304
11305 error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
11306
11307 file_drop(fd);
11308
11309 /*validate vp; fsctl_internal() can drop iocount and reset vp to NULL*/
11310 if (vp) {
11311 vnode_put(vp);
11312 }
11313
11314 return error;
11315 }
11316 /* end of fsctl system call */
11317
11318 /*
11319 * Retrieve the data of an extended attribute.
11320 */
11321 int
11322 getxattr(proc_t p, struct getxattr_args *uap, user_ssize_t *retval)
11323 {
11324 vnode_t vp;
11325 struct nameidata nd;
11326 char attrname[XATTR_MAXNAMELEN + 1];
11327 vfs_context_t ctx = vfs_context_current();
11328 uio_t auio = NULL;
11329 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
11330 size_t attrsize = 0;
11331 size_t namelen;
11332 u_int32_t nameiflags;
11333 int error;
11334 char uio_buf[UIO_SIZEOF(1)];
11335
11336 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT)) {
11337 return EINVAL;
11338 }
11339
11340 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
11341 NDINIT(&nd, LOOKUP, OP_GETXATTR, nameiflags, spacetype, uap->path, ctx);
11342 if ((error = namei(&nd))) {
11343 return error;
11344 }
11345 vp = nd.ni_vp;
11346 nameidone(&nd);
11347
11348 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
11349 if (error != 0) {
11350 goto out;
11351 }
11352 if (xattr_protected(attrname)) {
11353 if (!vfs_context_issuser(ctx) || strcmp(attrname, "com.apple.system.Security") != 0) {
11354 error = EPERM;
11355 goto out;
11356 }
11357 }
11358 /*
11359 * the specific check for 0xffffffff is a hack to preserve
11360 * binaray compatibilty in K64 with applications that discovered
11361 * that passing in a buf pointer and a size of -1 resulted in
11362 * just the size of the indicated extended attribute being returned.
11363 * this isn't part of the documented behavior, but because of the
11364 * original implemtation's check for "uap->size > 0", this behavior
11365 * was allowed. In K32 that check turned into a signed comparison
11366 * even though uap->size is unsigned... in K64, we blow by that
11367 * check because uap->size is unsigned and doesn't get sign smeared
11368 * in the munger for a 32 bit user app. we also need to add a
11369 * check to limit the maximum size of the buffer being passed in...
11370 * unfortunately, the underlying fileystems seem to just malloc
11371 * the requested size even if the actual extended attribute is tiny.
11372 * because that malloc is for kernel wired memory, we have to put a
11373 * sane limit on it.
11374 *
11375 * U32 running on K64 will yield 0x00000000ffffffff for uap->size
11376 * U64 running on K64 will yield -1 (64 bits wide)
11377 * U32/U64 running on K32 will yield -1 (32 bits wide)
11378 */
11379 if (uap->size == 0xffffffff || uap->size == (size_t)-1) {
11380 goto no_uio;
11381 }
11382
11383 if (uap->value) {
11384 if (uap->size > (size_t)XATTR_MAXSIZE) {
11385 uap->size = XATTR_MAXSIZE;
11386 }
11387
11388 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
11389 &uio_buf[0], sizeof(uio_buf));
11390 uio_addiov(auio, uap->value, uap->size);
11391 }
11392 no_uio:
11393 error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, ctx);
11394 out:
11395 vnode_put(vp);
11396
11397 if (auio) {
11398 *retval = uap->size - uio_resid(auio);
11399 } else {
11400 *retval = (user_ssize_t)attrsize;
11401 }
11402
11403 return error;
11404 }
11405
11406 /*
11407 * Retrieve the data of an extended attribute.
11408 */
11409 int
11410 fgetxattr(proc_t p, struct fgetxattr_args *uap, user_ssize_t *retval)
11411 {
11412 vnode_t vp;
11413 char attrname[XATTR_MAXNAMELEN + 1];
11414 uio_t auio = NULL;
11415 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
11416 size_t attrsize = 0;
11417 size_t namelen;
11418 int error;
11419 char uio_buf[UIO_SIZEOF(1)];
11420
11421 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT)) {
11422 return EINVAL;
11423 }
11424
11425 if ((error = file_vnode(uap->fd, &vp))) {
11426 return error;
11427 }
11428 if ((error = vnode_getwithref(vp))) {
11429 file_drop(uap->fd);
11430 return error;
11431 }
11432 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
11433 if (error != 0) {
11434 goto out;
11435 }
11436 if (xattr_protected(attrname)) {
11437 error = EPERM;
11438 goto out;
11439 }
11440 if (uap->value && uap->size > 0) {
11441 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
11442 &uio_buf[0], sizeof(uio_buf));
11443 uio_addiov(auio, uap->value, uap->size);
11444 }
11445
11446 error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, vfs_context_current());
11447 out:
11448 (void)vnode_put(vp);
11449 file_drop(uap->fd);
11450
11451 if (auio) {
11452 *retval = uap->size - uio_resid(auio);
11453 } else {
11454 *retval = (user_ssize_t)attrsize;
11455 }
11456 return error;
11457 }
11458
11459 /*
11460 * Set the data of an extended attribute.
11461 */
11462 int
11463 setxattr(proc_t p, struct setxattr_args *uap, int *retval)
11464 {
11465 vnode_t vp;
11466 struct nameidata nd;
11467 char attrname[XATTR_MAXNAMELEN + 1];
11468 vfs_context_t ctx = vfs_context_current();
11469 uio_t auio = NULL;
11470 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
11471 size_t namelen;
11472 u_int32_t nameiflags;
11473 int error;
11474 char uio_buf[UIO_SIZEOF(1)];
11475
11476 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT)) {
11477 return EINVAL;
11478 }
11479
11480 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
11481 if (error != 0) {
11482 if (error == EPERM) {
11483 /* if the string won't fit in attrname, copyinstr emits EPERM */
11484 return ENAMETOOLONG;
11485 }
11486 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
11487 return error;
11488 }
11489 if (xattr_protected(attrname)) {
11490 return EPERM;
11491 }
11492 if (uap->size != 0 && uap->value == 0) {
11493 return EINVAL;
11494 }
11495
11496 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
11497 NDINIT(&nd, LOOKUP, OP_SETXATTR, nameiflags, spacetype, uap->path, ctx);
11498 if ((error = namei(&nd))) {
11499 return error;
11500 }
11501 vp = nd.ni_vp;
11502 nameidone(&nd);
11503
11504 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
11505 &uio_buf[0], sizeof(uio_buf));
11506 uio_addiov(auio, uap->value, uap->size);
11507
11508 error = vn_setxattr(vp, attrname, auio, uap->options, ctx);
11509 #if CONFIG_FSE
11510 if (error == 0) {
11511 add_fsevent(FSE_XATTR_MODIFIED, ctx,
11512 FSE_ARG_VNODE, vp,
11513 FSE_ARG_DONE);
11514 }
11515 #endif
11516 vnode_put(vp);
11517 *retval = 0;
11518 return error;
11519 }
11520
11521 /*
11522 * Set the data of an extended attribute.
11523 */
11524 int
11525 fsetxattr(proc_t p, struct fsetxattr_args *uap, int *retval)
11526 {
11527 vnode_t vp;
11528 char attrname[XATTR_MAXNAMELEN + 1];
11529 uio_t auio = NULL;
11530 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
11531 size_t namelen;
11532 int error;
11533 char uio_buf[UIO_SIZEOF(1)];
11534 #if CONFIG_FSE
11535 vfs_context_t ctx = vfs_context_current();
11536 #endif
11537
11538 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT)) {
11539 return EINVAL;
11540 }
11541
11542 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
11543 if (error != 0) {
11544 if (error == EPERM) {
11545 /* if the string won't fit in attrname, copyinstr emits EPERM */
11546 return ENAMETOOLONG;
11547 }
11548 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
11549 return error;
11550 }
11551 if (xattr_protected(attrname)) {
11552 return EPERM;
11553 }
11554 if (uap->size != 0 && uap->value == 0) {
11555 return EINVAL;
11556 }
11557 if ((error = file_vnode(uap->fd, &vp))) {
11558 return error;
11559 }
11560 if ((error = vnode_getwithref(vp))) {
11561 file_drop(uap->fd);
11562 return error;
11563 }
11564 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
11565 &uio_buf[0], sizeof(uio_buf));
11566 uio_addiov(auio, uap->value, uap->size);
11567
11568 error = vn_setxattr(vp, attrname, auio, uap->options, vfs_context_current());
11569 #if CONFIG_FSE
11570 if (error == 0) {
11571 add_fsevent(FSE_XATTR_MODIFIED, ctx,
11572 FSE_ARG_VNODE, vp,
11573 FSE_ARG_DONE);
11574 }
11575 #endif
11576 vnode_put(vp);
11577 file_drop(uap->fd);
11578 *retval = 0;
11579 return error;
11580 }
11581
11582 /*
11583 * Remove an extended attribute.
11584 * XXX Code duplication here.
11585 */
11586 int
11587 removexattr(proc_t p, struct removexattr_args *uap, int *retval)
11588 {
11589 vnode_t vp;
11590 struct nameidata nd;
11591 char attrname[XATTR_MAXNAMELEN + 1];
11592 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
11593 vfs_context_t ctx = vfs_context_current();
11594 size_t namelen;
11595 u_int32_t nameiflags;
11596 int error;
11597
11598 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT)) {
11599 return EINVAL;
11600 }
11601
11602 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
11603 if (error != 0) {
11604 return error;
11605 }
11606 if (xattr_protected(attrname)) {
11607 return EPERM;
11608 }
11609 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
11610 NDINIT(&nd, LOOKUP, OP_REMOVEXATTR, nameiflags, spacetype, uap->path, ctx);
11611 if ((error = namei(&nd))) {
11612 return error;
11613 }
11614 vp = nd.ni_vp;
11615 nameidone(&nd);
11616
11617 error = vn_removexattr(vp, attrname, uap->options, ctx);
11618 #if CONFIG_FSE
11619 if (error == 0) {
11620 add_fsevent(FSE_XATTR_REMOVED, ctx,
11621 FSE_ARG_VNODE, vp,
11622 FSE_ARG_DONE);
11623 }
11624 #endif
11625 vnode_put(vp);
11626 *retval = 0;
11627 return error;
11628 }
11629
11630 /*
11631 * Remove an extended attribute.
11632 * XXX Code duplication here.
11633 */
11634 int
11635 fremovexattr(__unused proc_t p, struct fremovexattr_args *uap, int *retval)
11636 {
11637 vnode_t vp;
11638 char attrname[XATTR_MAXNAMELEN + 1];
11639 size_t namelen;
11640 int error;
11641 #if CONFIG_FSE
11642 vfs_context_t ctx = vfs_context_current();
11643 #endif
11644
11645 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT)) {
11646 return EINVAL;
11647 }
11648
11649 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
11650 if (error != 0) {
11651 return error;
11652 }
11653 if (xattr_protected(attrname)) {
11654 return EPERM;
11655 }
11656 if ((error = file_vnode(uap->fd, &vp))) {
11657 return error;
11658 }
11659 if ((error = vnode_getwithref(vp))) {
11660 file_drop(uap->fd);
11661 return error;
11662 }
11663
11664 error = vn_removexattr(vp, attrname, uap->options, vfs_context_current());
11665 #if CONFIG_FSE
11666 if (error == 0) {
11667 add_fsevent(FSE_XATTR_REMOVED, ctx,
11668 FSE_ARG_VNODE, vp,
11669 FSE_ARG_DONE);
11670 }
11671 #endif
11672 vnode_put(vp);
11673 file_drop(uap->fd);
11674 *retval = 0;
11675 return error;
11676 }
11677
11678 /*
11679 * Retrieve the list of extended attribute names.
11680 * XXX Code duplication here.
11681 */
11682 int
11683 listxattr(proc_t p, struct listxattr_args *uap, user_ssize_t *retval)
11684 {
11685 vnode_t vp;
11686 struct nameidata nd;
11687 vfs_context_t ctx = vfs_context_current();
11688 uio_t auio = NULL;
11689 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
11690 size_t attrsize = 0;
11691 u_int32_t nameiflags;
11692 int error;
11693 char uio_buf[UIO_SIZEOF(1)];
11694
11695 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT)) {
11696 return EINVAL;
11697 }
11698
11699 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
11700 NDINIT(&nd, LOOKUP, OP_LISTXATTR, nameiflags, spacetype, uap->path, ctx);
11701 if ((error = namei(&nd))) {
11702 return error;
11703 }
11704 vp = nd.ni_vp;
11705 nameidone(&nd);
11706 if (uap->namebuf != 0 && uap->bufsize > 0) {
11707 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
11708 &uio_buf[0], sizeof(uio_buf));
11709 uio_addiov(auio, uap->namebuf, uap->bufsize);
11710 }
11711
11712 error = vn_listxattr(vp, auio, &attrsize, uap->options, ctx);
11713
11714 vnode_put(vp);
11715 if (auio) {
11716 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
11717 } else {
11718 *retval = (user_ssize_t)attrsize;
11719 }
11720 return error;
11721 }
11722
11723 /*
11724 * Retrieve the list of extended attribute names.
11725 * XXX Code duplication here.
11726 */
11727 int
11728 flistxattr(proc_t p, struct flistxattr_args *uap, user_ssize_t *retval)
11729 {
11730 vnode_t vp;
11731 uio_t auio = NULL;
11732 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
11733 size_t attrsize = 0;
11734 int error;
11735 char uio_buf[UIO_SIZEOF(1)];
11736
11737 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT)) {
11738 return EINVAL;
11739 }
11740
11741 if ((error = file_vnode(uap->fd, &vp))) {
11742 return error;
11743 }
11744 if ((error = vnode_getwithref(vp))) {
11745 file_drop(uap->fd);
11746 return error;
11747 }
11748 if (uap->namebuf != 0 && uap->bufsize > 0) {
11749 auio = uio_createwithbuffer(1, 0, spacetype,
11750 UIO_READ, &uio_buf[0], sizeof(uio_buf));
11751 uio_addiov(auio, uap->namebuf, uap->bufsize);
11752 }
11753
11754 error = vn_listxattr(vp, auio, &attrsize, uap->options, vfs_context_current());
11755
11756 vnode_put(vp);
11757 file_drop(uap->fd);
11758 if (auio) {
11759 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
11760 } else {
11761 *retval = (user_ssize_t)attrsize;
11762 }
11763 return error;
11764 }
11765
11766 static int
11767 fsgetpath_internal(vfs_context_t ctx, int volfs_id, uint64_t objid,
11768 vm_size_t bufsize, caddr_t buf, uint32_t options, int *pathlen)
11769 {
11770 int error;
11771 struct mount *mp = NULL;
11772 vnode_t vp;
11773 int length;
11774 int bpflags;
11775 /* maximum number of times to retry build_path */
11776 unsigned int retries = 0x10;
11777
11778 if (bufsize > PAGE_SIZE) {
11779 return EINVAL;
11780 }
11781
11782 if (buf == NULL) {
11783 return ENOMEM;
11784 }
11785
11786 retry:
11787 if ((mp = mount_lookupby_volfsid(volfs_id, 1)) == NULL) {
11788 error = ENOTSUP; /* unexpected failure */
11789 return ENOTSUP;
11790 }
11791
11792 unionget:
11793 if (objid == 2) {
11794 struct vfs_attr vfsattr;
11795 int use_vfs_root = TRUE;
11796
11797 VFSATTR_INIT(&vfsattr);
11798 VFSATTR_WANTED(&vfsattr, f_capabilities);
11799 if (!(options & FSOPT_ISREALFSID) &&
11800 vfs_getattr(mp, &vfsattr, vfs_context_kernel()) == 0 &&
11801 VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) {
11802 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_VOL_GROUPS) &&
11803 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_VOL_GROUPS)) {
11804 use_vfs_root = FALSE;
11805 }
11806 }
11807
11808 if (use_vfs_root) {
11809 error = VFS_ROOT(mp, &vp, ctx);
11810 } else {
11811 error = VFS_VGET(mp, objid, &vp, ctx);
11812 }
11813 } else {
11814 error = VFS_VGET(mp, (ino64_t)objid, &vp, ctx);
11815 }
11816
11817 if (error == ENOENT && (mp->mnt_flag & MNT_UNION)) {
11818 /*
11819 * If the fileid isn't found and we're in a union
11820 * mount volume, then see if the fileid is in the
11821 * mounted-on volume.
11822 */
11823 struct mount *tmp = mp;
11824 mp = vnode_mount(tmp->mnt_vnodecovered);
11825 vfs_unbusy(tmp);
11826 if (vfs_busy(mp, LK_NOWAIT) == 0) {
11827 goto unionget;
11828 }
11829 } else {
11830 vfs_unbusy(mp);
11831 }
11832
11833 if (error) {
11834 return error;
11835 }
11836
11837 #if CONFIG_MACF
11838 error = mac_vnode_check_fsgetpath(ctx, vp);
11839 if (error) {
11840 vnode_put(vp);
11841 return error;
11842 }
11843 #endif
11844
11845 /* Obtain the absolute path to this vnode. */
11846 bpflags = vfs_context_suser(ctx) ? BUILDPATH_CHECKACCESS : 0;
11847 if (options & FSOPT_NOFIRMLINKPATH) {
11848 bpflags |= BUILDPATH_NO_FIRMLINK;
11849 }
11850 bpflags |= BUILDPATH_CHECK_MOVED;
11851 error = build_path(vp, buf, bufsize, &length, bpflags, ctx);
11852 vnode_put(vp);
11853
11854 if (error) {
11855 /* there was a race building the path, try a few more times */
11856 if (error == EAGAIN) {
11857 --retries;
11858 if (retries > 0) {
11859 goto retry;
11860 }
11861
11862 error = ENOENT;
11863 }
11864 goto out;
11865 }
11866
11867 AUDIT_ARG(text, buf);
11868
11869 if (kdebug_enable) {
11870 long dbg_parms[NUMPARMS];
11871 int dbg_namelen;
11872
11873 dbg_namelen = (int)sizeof(dbg_parms);
11874
11875 if (length < dbg_namelen) {
11876 memcpy((char *)dbg_parms, buf, length);
11877 memset((char *)dbg_parms + length, 0, dbg_namelen - length);
11878
11879 dbg_namelen = length;
11880 } else {
11881 memcpy((char *)dbg_parms, buf + (length - dbg_namelen), dbg_namelen);
11882 }
11883
11884 kdebug_vfs_lookup(dbg_parms, dbg_namelen, (void *)vp,
11885 KDBG_VFS_LOOKUP_FLAG_LOOKUP);
11886 }
11887
11888 *pathlen = (user_ssize_t)length; /* may be superseded by error */
11889
11890 out:
11891 return error;
11892 }
11893
11894 /*
11895 * Obtain the full pathname of a file system object by id.
11896 */
11897 static int
11898 fsgetpath_extended(user_addr_t buf, int bufsize, user_addr_t user_fsid, uint64_t objid,
11899 uint32_t options, user_ssize_t *retval)
11900 {
11901 vfs_context_t ctx = vfs_context_current();
11902 fsid_t fsid;
11903 char *realpath;
11904 int length;
11905 int error;
11906
11907 if (options & ~(FSOPT_NOFIRMLINKPATH | FSOPT_ISREALFSID)) {
11908 return EINVAL;
11909 }
11910
11911 if ((error = copyin(user_fsid, (caddr_t)&fsid, sizeof(fsid)))) {
11912 return error;
11913 }
11914 AUDIT_ARG(value32, fsid.val[0]);
11915 AUDIT_ARG(value64, objid);
11916 /* Restrict output buffer size for now. */
11917
11918 if (bufsize > PAGE_SIZE || bufsize <= 0) {
11919 return EINVAL;
11920 }
11921 MALLOC(realpath, char *, bufsize, M_TEMP, M_WAITOK | M_ZERO);
11922 if (realpath == NULL) {
11923 return ENOMEM;
11924 }
11925
11926 error = fsgetpath_internal(ctx, fsid.val[0], objid, bufsize, realpath,
11927 options, &length);
11928
11929 if (error) {
11930 goto out;
11931 }
11932
11933 error = copyout((caddr_t)realpath, buf, length);
11934
11935 *retval = (user_ssize_t)length; /* may be superseded by error */
11936 out:
11937 if (realpath) {
11938 FREE(realpath, M_TEMP);
11939 }
11940 return error;
11941 }
11942
11943 int
11944 fsgetpath(__unused proc_t p, struct fsgetpath_args *uap, user_ssize_t *retval)
11945 {
11946 return fsgetpath_extended(uap->buf, uap->bufsize, uap->fsid, uap->objid,
11947 0, retval);
11948 }
11949
11950 int
11951 fsgetpath_ext(__unused proc_t p, struct fsgetpath_ext_args *uap, user_ssize_t *retval)
11952 {
11953 return fsgetpath_extended(uap->buf, uap->bufsize, uap->fsid, uap->objid,
11954 uap->options, retval);
11955 }
11956
11957 /*
11958 * Common routine to handle various flavors of statfs data heading out
11959 * to user space.
11960 *
11961 * Returns: 0 Success
11962 * EFAULT
11963 */
11964 static int
11965 munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
11966 user_addr_t bufp, int *sizep, boolean_t is_64_bit,
11967 boolean_t partial_copy)
11968 {
11969 int error;
11970 int my_size, copy_size;
11971
11972 if (is_64_bit) {
11973 struct user64_statfs sfs;
11974 my_size = copy_size = sizeof(sfs);
11975 bzero(&sfs, my_size);
11976 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
11977 sfs.f_type = mp->mnt_vtable->vfc_typenum;
11978 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
11979 sfs.f_bsize = (user64_long_t)sfsp->f_bsize;
11980 sfs.f_iosize = (user64_long_t)sfsp->f_iosize;
11981 sfs.f_blocks = (user64_long_t)sfsp->f_blocks;
11982 sfs.f_bfree = (user64_long_t)sfsp->f_bfree;
11983 sfs.f_bavail = (user64_long_t)sfsp->f_bavail;
11984 sfs.f_files = (user64_long_t)sfsp->f_files;
11985 sfs.f_ffree = (user64_long_t)sfsp->f_ffree;
11986 sfs.f_fsid = sfsp->f_fsid;
11987 sfs.f_owner = sfsp->f_owner;
11988 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
11989 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
11990 } else {
11991 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
11992 }
11993 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
11994 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
11995
11996 if (partial_copy) {
11997 copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
11998 }
11999 error = copyout((caddr_t)&sfs, bufp, copy_size);
12000 } else {
12001 struct user32_statfs sfs;
12002
12003 my_size = copy_size = sizeof(sfs);
12004 bzero(&sfs, my_size);
12005
12006 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
12007 sfs.f_type = mp->mnt_vtable->vfc_typenum;
12008 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
12009
12010 /*
12011 * It's possible for there to be more than 2^^31 blocks in the filesystem, so we
12012 * have to fudge the numbers here in that case. We inflate the blocksize in order
12013 * to reflect the filesystem size as best we can.
12014 */
12015 if ((sfsp->f_blocks > INT_MAX)
12016 /* Hack for 4061702 . I think the real fix is for Carbon to
12017 * look for some volume capability and not depend on hidden
12018 * semantics agreed between a FS and carbon.
12019 * f_blocks, f_bfree, and f_bavail set to -1 is the trigger
12020 * for Carbon to set bNoVolumeSizes volume attribute.
12021 * Without this the webdavfs files cannot be copied onto
12022 * disk as they look huge. This change should not affect
12023 * XSAN as they should not setting these to -1..
12024 */
12025 && (sfsp->f_blocks != 0xffffffffffffffffULL)
12026 && (sfsp->f_bfree != 0xffffffffffffffffULL)
12027 && (sfsp->f_bavail != 0xffffffffffffffffULL)) {
12028 int shift;
12029
12030 /*
12031 * Work out how far we have to shift the block count down to make it fit.
12032 * Note that it's possible to have to shift so far that the resulting
12033 * blocksize would be unreportably large. At that point, we will clip
12034 * any values that don't fit.
12035 *
12036 * For safety's sake, we also ensure that f_iosize is never reported as
12037 * being smaller than f_bsize.
12038 */
12039 for (shift = 0; shift < 32; shift++) {
12040 if ((sfsp->f_blocks >> shift) <= INT_MAX) {
12041 break;
12042 }
12043 if ((sfsp->f_bsize << (shift + 1)) > INT_MAX) {
12044 break;
12045 }
12046 }
12047 #define __SHIFT_OR_CLIP(x, s) ((((x) >> (s)) > INT_MAX) ? INT_MAX : ((x) >> (s)))
12048 sfs.f_blocks = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_blocks, shift);
12049 sfs.f_bfree = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bfree, shift);
12050 sfs.f_bavail = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bavail, shift);
12051 #undef __SHIFT_OR_CLIP
12052 sfs.f_bsize = (user32_long_t)(sfsp->f_bsize << shift);
12053 sfs.f_iosize = lmax(sfsp->f_iosize, sfsp->f_bsize);
12054 } else {
12055 /* filesystem is small enough to be reported honestly */
12056 sfs.f_bsize = (user32_long_t)sfsp->f_bsize;
12057 sfs.f_iosize = (user32_long_t)sfsp->f_iosize;
12058 sfs.f_blocks = (user32_long_t)sfsp->f_blocks;
12059 sfs.f_bfree = (user32_long_t)sfsp->f_bfree;
12060 sfs.f_bavail = (user32_long_t)sfsp->f_bavail;
12061 }
12062 sfs.f_files = (user32_long_t)sfsp->f_files;
12063 sfs.f_ffree = (user32_long_t)sfsp->f_ffree;
12064 sfs.f_fsid = sfsp->f_fsid;
12065 sfs.f_owner = sfsp->f_owner;
12066 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
12067 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
12068 } else {
12069 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
12070 }
12071 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
12072 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
12073
12074 if (partial_copy) {
12075 copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
12076 }
12077 error = copyout((caddr_t)&sfs, bufp, copy_size);
12078 }
12079
12080 if (sizep != NULL) {
12081 *sizep = my_size;
12082 }
12083 return error;
12084 }
12085
12086 /*
12087 * copy stat structure into user_stat structure.
12088 */
12089 void
12090 munge_user64_stat(struct stat *sbp, struct user64_stat *usbp)
12091 {
12092 bzero(usbp, sizeof(*usbp));
12093
12094 usbp->st_dev = sbp->st_dev;
12095 usbp->st_ino = sbp->st_ino;
12096 usbp->st_mode = sbp->st_mode;
12097 usbp->st_nlink = sbp->st_nlink;
12098 usbp->st_uid = sbp->st_uid;
12099 usbp->st_gid = sbp->st_gid;
12100 usbp->st_rdev = sbp->st_rdev;
12101 #ifndef _POSIX_C_SOURCE
12102 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
12103 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
12104 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
12105 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
12106 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
12107 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
12108 #else
12109 usbp->st_atime = sbp->st_atime;
12110 usbp->st_atimensec = sbp->st_atimensec;
12111 usbp->st_mtime = sbp->st_mtime;
12112 usbp->st_mtimensec = sbp->st_mtimensec;
12113 usbp->st_ctime = sbp->st_ctime;
12114 usbp->st_ctimensec = sbp->st_ctimensec;
12115 #endif
12116 usbp->st_size = sbp->st_size;
12117 usbp->st_blocks = sbp->st_blocks;
12118 usbp->st_blksize = sbp->st_blksize;
12119 usbp->st_flags = sbp->st_flags;
12120 usbp->st_gen = sbp->st_gen;
12121 usbp->st_lspare = sbp->st_lspare;
12122 usbp->st_qspare[0] = sbp->st_qspare[0];
12123 usbp->st_qspare[1] = sbp->st_qspare[1];
12124 }
12125
12126 void
12127 munge_user32_stat(struct stat *sbp, struct user32_stat *usbp)
12128 {
12129 bzero(usbp, sizeof(*usbp));
12130
12131 usbp->st_dev = sbp->st_dev;
12132 usbp->st_ino = sbp->st_ino;
12133 usbp->st_mode = sbp->st_mode;
12134 usbp->st_nlink = sbp->st_nlink;
12135 usbp->st_uid = sbp->st_uid;
12136 usbp->st_gid = sbp->st_gid;
12137 usbp->st_rdev = sbp->st_rdev;
12138 #ifndef _POSIX_C_SOURCE
12139 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
12140 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
12141 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
12142 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
12143 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
12144 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
12145 #else
12146 usbp->st_atime = sbp->st_atime;
12147 usbp->st_atimensec = sbp->st_atimensec;
12148 usbp->st_mtime = sbp->st_mtime;
12149 usbp->st_mtimensec = sbp->st_mtimensec;
12150 usbp->st_ctime = sbp->st_ctime;
12151 usbp->st_ctimensec = sbp->st_ctimensec;
12152 #endif
12153 usbp->st_size = sbp->st_size;
12154 usbp->st_blocks = sbp->st_blocks;
12155 usbp->st_blksize = sbp->st_blksize;
12156 usbp->st_flags = sbp->st_flags;
12157 usbp->st_gen = sbp->st_gen;
12158 usbp->st_lspare = sbp->st_lspare;
12159 usbp->st_qspare[0] = sbp->st_qspare[0];
12160 usbp->st_qspare[1] = sbp->st_qspare[1];
12161 }
12162
12163 /*
12164 * copy stat64 structure into user_stat64 structure.
12165 */
12166 void
12167 munge_user64_stat64(struct stat64 *sbp, struct user64_stat64 *usbp)
12168 {
12169 bzero(usbp, sizeof(*usbp));
12170
12171 usbp->st_dev = sbp->st_dev;
12172 usbp->st_ino = sbp->st_ino;
12173 usbp->st_mode = sbp->st_mode;
12174 usbp->st_nlink = sbp->st_nlink;
12175 usbp->st_uid = sbp->st_uid;
12176 usbp->st_gid = sbp->st_gid;
12177 usbp->st_rdev = sbp->st_rdev;
12178 #ifndef _POSIX_C_SOURCE
12179 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
12180 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
12181 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
12182 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
12183 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
12184 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
12185 usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
12186 usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
12187 #else
12188 usbp->st_atime = sbp->st_atime;
12189 usbp->st_atimensec = sbp->st_atimensec;
12190 usbp->st_mtime = sbp->st_mtime;
12191 usbp->st_mtimensec = sbp->st_mtimensec;
12192 usbp->st_ctime = sbp->st_ctime;
12193 usbp->st_ctimensec = sbp->st_ctimensec;
12194 usbp->st_birthtime = sbp->st_birthtime;
12195 usbp->st_birthtimensec = sbp->st_birthtimensec;
12196 #endif
12197 usbp->st_size = sbp->st_size;
12198 usbp->st_blocks = sbp->st_blocks;
12199 usbp->st_blksize = sbp->st_blksize;
12200 usbp->st_flags = sbp->st_flags;
12201 usbp->st_gen = sbp->st_gen;
12202 usbp->st_lspare = sbp->st_lspare;
12203 usbp->st_qspare[0] = sbp->st_qspare[0];
12204 usbp->st_qspare[1] = sbp->st_qspare[1];
12205 }
12206
12207 void
12208 munge_user32_stat64(struct stat64 *sbp, struct user32_stat64 *usbp)
12209 {
12210 bzero(usbp, sizeof(*usbp));
12211
12212 usbp->st_dev = sbp->st_dev;
12213 usbp->st_ino = sbp->st_ino;
12214 usbp->st_mode = sbp->st_mode;
12215 usbp->st_nlink = sbp->st_nlink;
12216 usbp->st_uid = sbp->st_uid;
12217 usbp->st_gid = sbp->st_gid;
12218 usbp->st_rdev = sbp->st_rdev;
12219 #ifndef _POSIX_C_SOURCE
12220 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
12221 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
12222 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
12223 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
12224 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
12225 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
12226 usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
12227 usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
12228 #else
12229 usbp->st_atime = sbp->st_atime;
12230 usbp->st_atimensec = sbp->st_atimensec;
12231 usbp->st_mtime = sbp->st_mtime;
12232 usbp->st_mtimensec = sbp->st_mtimensec;
12233 usbp->st_ctime = sbp->st_ctime;
12234 usbp->st_ctimensec = sbp->st_ctimensec;
12235 usbp->st_birthtime = sbp->st_birthtime;
12236 usbp->st_birthtimensec = sbp->st_birthtimensec;
12237 #endif
12238 usbp->st_size = sbp->st_size;
12239 usbp->st_blocks = sbp->st_blocks;
12240 usbp->st_blksize = sbp->st_blksize;
12241 usbp->st_flags = sbp->st_flags;
12242 usbp->st_gen = sbp->st_gen;
12243 usbp->st_lspare = sbp->st_lspare;
12244 usbp->st_qspare[0] = sbp->st_qspare[0];
12245 usbp->st_qspare[1] = sbp->st_qspare[1];
12246 }
12247
12248 /*
12249 * Purge buffer cache for simulating cold starts
12250 */
12251 static int
12252 vnode_purge_callback(struct vnode *vp, __unused void *cargs)
12253 {
12254 ubc_msync(vp, (off_t)0, ubc_getsize(vp), NULL /* off_t *resid_off */, UBC_PUSHALL | UBC_INVALIDATE);
12255
12256 return VNODE_RETURNED;
12257 }
12258
12259 static int
12260 vfs_purge_callback(mount_t mp, __unused void * arg)
12261 {
12262 vnode_iterate(mp, VNODE_WAIT | VNODE_ITERATE_ALL, vnode_purge_callback, NULL);
12263
12264 return VFS_RETURNED;
12265 }
12266
12267 int
12268 vfs_purge(__unused struct proc *p, __unused struct vfs_purge_args *uap, __unused int32_t *retval)
12269 {
12270 if (!kauth_cred_issuser(kauth_cred_get())) {
12271 return EPERM;
12272 }
12273
12274 vfs_iterate(0 /* flags */, vfs_purge_callback, NULL);
12275
12276 return 0;
12277 }
12278
12279 /*
12280 * gets the vnode associated with the (unnamed) snapshot directory
12281 * for a Filesystem. The snapshot directory vnode is returned with
12282 * an iocount on it.
12283 */
12284 int
12285 vnode_get_snapdir(vnode_t rvp, vnode_t *sdvpp, vfs_context_t ctx)
12286 {
12287 return VFS_VGET_SNAPDIR(vnode_mount(rvp), sdvpp, ctx);
12288 }
12289
12290 /*
12291 * Get the snapshot vnode.
12292 *
12293 * If successful, the call returns with an iocount on *rvpp ,*sdvpp and
12294 * needs nameidone() on ndp.
12295 *
12296 * If the snapshot vnode exists it is returned in ndp->ni_vp.
12297 *
12298 * If it returns with an error, *rvpp, *sdvpp are NULL and nameidone() is
12299 * not needed.
12300 */
12301 static int
12302 vnode_get_snapshot(int dirfd, vnode_t *rvpp, vnode_t *sdvpp,
12303 user_addr_t name, struct nameidata *ndp, int32_t op,
12304 #if !CONFIG_TRIGGERS
12305 __unused
12306 #endif
12307 enum path_operation pathop,
12308 vfs_context_t ctx)
12309 {
12310 int error, i;
12311 caddr_t name_buf;
12312 size_t name_len;
12313 struct vfs_attr vfa;
12314
12315 *sdvpp = NULLVP;
12316 *rvpp = NULLVP;
12317
12318 error = vnode_getfromfd(ctx, dirfd, rvpp);
12319 if (error) {
12320 return error;
12321 }
12322
12323 if (!vnode_isvroot(*rvpp)) {
12324 error = EINVAL;
12325 goto out;
12326 }
12327
12328 /* Make sure the filesystem supports snapshots */
12329 VFSATTR_INIT(&vfa);
12330 VFSATTR_WANTED(&vfa, f_capabilities);
12331 if ((vfs_getattr(vnode_mount(*rvpp), &vfa, ctx) != 0) ||
12332 !VFSATTR_IS_SUPPORTED(&vfa, f_capabilities) ||
12333 !((vfa.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] &
12334 VOL_CAP_INT_SNAPSHOT)) ||
12335 !((vfa.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] &
12336 VOL_CAP_INT_SNAPSHOT))) {
12337 error = ENOTSUP;
12338 goto out;
12339 }
12340
12341 error = vnode_get_snapdir(*rvpp, sdvpp, ctx);
12342 if (error) {
12343 goto out;
12344 }
12345
12346 MALLOC(name_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
12347 error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
12348 if (error) {
12349 goto out1;
12350 }
12351
12352 /*
12353 * Some sanity checks- name can't be empty, "." or ".." or have slashes.
12354 * (the length returned by copyinstr includes the terminating NUL)
12355 */
12356 if ((name_len == 1) || (name_len == 2 && name_buf[0] == '.') ||
12357 (name_len == 3 && name_buf[0] == '.' && name_buf[1] == '.')) {
12358 error = EINVAL;
12359 goto out1;
12360 }
12361 for (i = 0; i < (int)name_len && name_buf[i] != '/'; i++) {
12362 ;
12363 }
12364 if (i < (int)name_len) {
12365 error = EINVAL;
12366 goto out1;
12367 }
12368
12369 #if CONFIG_MACF
12370 if (op == CREATE) {
12371 error = mac_mount_check_snapshot_create(ctx, vnode_mount(*rvpp),
12372 name_buf);
12373 } else if (op == DELETE) {
12374 error = mac_mount_check_snapshot_delete(ctx, vnode_mount(*rvpp),
12375 name_buf);
12376 }
12377 if (error) {
12378 goto out1;
12379 }
12380 #endif
12381
12382 /* Check if the snapshot already exists ... */
12383 NDINIT(ndp, op, pathop, USEDVP | NOCACHE | AUDITVNPATH1,
12384 UIO_SYSSPACE, CAST_USER_ADDR_T(name_buf), ctx);
12385 ndp->ni_dvp = *sdvpp;
12386
12387 error = namei(ndp);
12388 out1:
12389 FREE(name_buf, M_TEMP);
12390 out:
12391 if (error) {
12392 if (*sdvpp) {
12393 vnode_put(*sdvpp);
12394 *sdvpp = NULLVP;
12395 }
12396 if (*rvpp) {
12397 vnode_put(*rvpp);
12398 *rvpp = NULLVP;
12399 }
12400 }
12401 return error;
12402 }
12403
12404 /*
12405 * create a filesystem snapshot (for supporting filesystems)
12406 *
12407 * A much simplified version of openat(dirfd, name, O_CREAT | O_EXCL)
12408 * We get to the (unnamed) snapshot directory vnode and create the vnode
12409 * for the snapshot in it.
12410 *
12411 * Restrictions:
12412 *
12413 * a) Passed in name for snapshot cannot have slashes.
12414 * b) name can't be "." or ".."
12415 *
12416 * Since this requires superuser privileges, vnode_authorize calls are not
12417 * made.
12418 */
12419 static int
12420 snapshot_create(int dirfd, user_addr_t name, __unused uint32_t flags,
12421 vfs_context_t ctx)
12422 {
12423 vnode_t rvp, snapdvp;
12424 int error;
12425 struct nameidata namend;
12426
12427 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, CREATE,
12428 OP_LINK, ctx);
12429 if (error) {
12430 return error;
12431 }
12432
12433 if (namend.ni_vp) {
12434 vnode_put(namend.ni_vp);
12435 error = EEXIST;
12436 } else {
12437 struct vnode_attr va;
12438 vnode_t vp = NULLVP;
12439
12440 VATTR_INIT(&va);
12441 VATTR_SET(&va, va_type, VREG);
12442 VATTR_SET(&va, va_mode, 0);
12443
12444 error = vn_create(snapdvp, &vp, &namend, &va,
12445 VN_CREATE_NOAUTH | VN_CREATE_NOINHERIT, 0, NULL, ctx);
12446 if (!error && vp) {
12447 vnode_put(vp);
12448 }
12449 }
12450
12451 nameidone(&namend);
12452 vnode_put(snapdvp);
12453 vnode_put(rvp);
12454 return error;
12455 }
12456
12457 /*
12458 * Delete a Filesystem snapshot
12459 *
12460 * get the vnode for the unnamed snapshot directory and the snapshot and
12461 * delete the snapshot.
12462 */
12463 static int
12464 snapshot_delete(int dirfd, user_addr_t name, __unused uint32_t flags,
12465 vfs_context_t ctx)
12466 {
12467 vnode_t rvp, snapdvp;
12468 int error;
12469 struct nameidata namend;
12470
12471 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, DELETE,
12472 OP_UNLINK, ctx);
12473 if (error) {
12474 goto out;
12475 }
12476
12477 error = VNOP_REMOVE(snapdvp, namend.ni_vp, &namend.ni_cnd,
12478 VNODE_REMOVE_SKIP_NAMESPACE_EVENT, ctx);
12479
12480 vnode_put(namend.ni_vp);
12481 nameidone(&namend);
12482 vnode_put(snapdvp);
12483 vnode_put(rvp);
12484 out:
12485 return error;
12486 }
12487
12488 /*
12489 * Revert a filesystem to a snapshot
12490 *
12491 * Marks the filesystem to revert to the given snapshot on next mount.
12492 */
12493 static int
12494 snapshot_revert(int dirfd, user_addr_t name, __unused uint32_t flags,
12495 vfs_context_t ctx)
12496 {
12497 int error;
12498 vnode_t rvp;
12499 mount_t mp;
12500 struct fs_snapshot_revert_args revert_data;
12501 struct componentname cnp;
12502 caddr_t name_buf;
12503 size_t name_len;
12504
12505 error = vnode_getfromfd(ctx, dirfd, &rvp);
12506 if (error) {
12507 return error;
12508 }
12509 mp = vnode_mount(rvp);
12510
12511 MALLOC(name_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
12512 error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
12513 if (error) {
12514 FREE(name_buf, M_TEMP);
12515 vnode_put(rvp);
12516 return error;
12517 }
12518
12519 #if CONFIG_MACF
12520 error = mac_mount_check_snapshot_revert(ctx, mp, name_buf);
12521 if (error) {
12522 FREE(name_buf, M_TEMP);
12523 vnode_put(rvp);
12524 return error;
12525 }
12526 #endif
12527
12528 /*
12529 * Grab mount_iterref so that we can release the vnode,
12530 * since VFSIOC_REVERT_SNAPSHOT could conceivably cause a sync.
12531 */
12532 error = mount_iterref(mp, 0);
12533 vnode_put(rvp);
12534 if (error) {
12535 FREE(name_buf, M_TEMP);
12536 return error;
12537 }
12538
12539 memset(&cnp, 0, sizeof(cnp));
12540 cnp.cn_pnbuf = (char *)name_buf;
12541 cnp.cn_nameiop = LOOKUP;
12542 cnp.cn_flags = ISLASTCN | HASBUF;
12543 cnp.cn_pnlen = MAXPATHLEN;
12544 cnp.cn_nameptr = cnp.cn_pnbuf;
12545 cnp.cn_namelen = (int)name_len;
12546 revert_data.sr_cnp = &cnp;
12547
12548 error = VFS_IOCTL(mp, VFSIOC_REVERT_SNAPSHOT, (caddr_t)&revert_data, 0, ctx);
12549 mount_iterdrop(mp);
12550 FREE(name_buf, M_TEMP);
12551
12552 if (error) {
12553 /* If there was any error, try again using VNOP_IOCTL */
12554
12555 vnode_t snapdvp;
12556 struct nameidata namend;
12557
12558 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, LOOKUP,
12559 OP_LOOKUP, ctx);
12560 if (error) {
12561 return error;
12562 }
12563
12564
12565 error = VNOP_IOCTL(namend.ni_vp, APFSIOC_REVERT_TO_SNAPSHOT, (caddr_t) NULL,
12566 0, ctx);
12567
12568 vnode_put(namend.ni_vp);
12569 nameidone(&namend);
12570 vnode_put(snapdvp);
12571 vnode_put(rvp);
12572 }
12573
12574 return error;
12575 }
12576
12577 /*
12578 * rename a Filesystem snapshot
12579 *
12580 * get the vnode for the unnamed snapshot directory and the snapshot and
12581 * rename the snapshot. This is a very specialised (and simple) case of
12582 * rename(2) (which has to deal with a lot more complications). It differs
12583 * slightly from rename(2) in that EEXIST is returned if the new name exists.
12584 */
12585 static int
12586 snapshot_rename(int dirfd, user_addr_t old, user_addr_t new,
12587 __unused uint32_t flags, vfs_context_t ctx)
12588 {
12589 vnode_t rvp, snapdvp;
12590 int error, i;
12591 caddr_t newname_buf;
12592 size_t name_len;
12593 vnode_t fvp;
12594 struct nameidata *fromnd, *tond;
12595 /* carving out a chunk for structs that are too big to be on stack. */
12596 struct {
12597 struct nameidata from_node;
12598 struct nameidata to_node;
12599 } * __rename_data;
12600
12601 MALLOC(__rename_data, void *, sizeof(*__rename_data), M_TEMP, M_WAITOK);
12602 fromnd = &__rename_data->from_node;
12603 tond = &__rename_data->to_node;
12604
12605 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, old, fromnd, DELETE,
12606 OP_UNLINK, ctx);
12607 if (error) {
12608 goto out;
12609 }
12610 fvp = fromnd->ni_vp;
12611
12612 MALLOC(newname_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
12613 error = copyinstr(new, newname_buf, MAXPATHLEN, &name_len);
12614 if (error) {
12615 goto out1;
12616 }
12617
12618 /*
12619 * Some sanity checks- new name can't be empty, "." or ".." or have
12620 * slashes.
12621 * (the length returned by copyinstr includes the terminating NUL)
12622 *
12623 * The FS rename VNOP is suppossed to handle this but we'll pick it
12624 * off here itself.
12625 */
12626 if ((name_len == 1) || (name_len == 2 && newname_buf[0] == '.') ||
12627 (name_len == 3 && newname_buf[0] == '.' && newname_buf[1] == '.')) {
12628 error = EINVAL;
12629 goto out1;
12630 }
12631 for (i = 0; i < (int)name_len && newname_buf[i] != '/'; i++) {
12632 ;
12633 }
12634 if (i < (int)name_len) {
12635 error = EINVAL;
12636 goto out1;
12637 }
12638
12639 #if CONFIG_MACF
12640 error = mac_mount_check_snapshot_create(ctx, vnode_mount(rvp),
12641 newname_buf);
12642 if (error) {
12643 goto out1;
12644 }
12645 #endif
12646
12647 NDINIT(tond, RENAME, OP_RENAME, USEDVP | NOCACHE | AUDITVNPATH2,
12648 UIO_SYSSPACE, CAST_USER_ADDR_T(newname_buf), ctx);
12649 tond->ni_dvp = snapdvp;
12650
12651 error = namei(tond);
12652 if (error) {
12653 goto out2;
12654 } else if (tond->ni_vp) {
12655 /*
12656 * snapshot rename behaves differently than rename(2) - if the
12657 * new name exists, EEXIST is returned.
12658 */
12659 vnode_put(tond->ni_vp);
12660 error = EEXIST;
12661 goto out2;
12662 }
12663
12664 error = VNOP_RENAME(snapdvp, fvp, &fromnd->ni_cnd, snapdvp, NULLVP,
12665 &tond->ni_cnd, ctx);
12666
12667 out2:
12668 nameidone(tond);
12669 out1:
12670 FREE(newname_buf, M_TEMP);
12671 vnode_put(fvp);
12672 vnode_put(snapdvp);
12673 vnode_put(rvp);
12674 nameidone(fromnd);
12675 out:
12676 FREE(__rename_data, M_TEMP);
12677 return error;
12678 }
12679
12680 /*
12681 * Mount a Filesystem snapshot
12682 *
12683 * get the vnode for the unnamed snapshot directory and the snapshot and
12684 * mount the snapshot.
12685 */
12686 static int
12687 snapshot_mount(int dirfd, user_addr_t name, user_addr_t directory,
12688 __unused user_addr_t mnt_data, __unused uint32_t flags, vfs_context_t ctx)
12689 {
12690 vnode_t rvp, snapdvp, snapvp, vp, pvp;
12691 int error;
12692 struct nameidata *snapndp, *dirndp;
12693 /* carving out a chunk for structs that are too big to be on stack. */
12694 struct {
12695 struct nameidata snapnd;
12696 struct nameidata dirnd;
12697 } * __snapshot_mount_data;
12698
12699 MALLOC(__snapshot_mount_data, void *, sizeof(*__snapshot_mount_data),
12700 M_TEMP, M_WAITOK);
12701 snapndp = &__snapshot_mount_data->snapnd;
12702 dirndp = &__snapshot_mount_data->dirnd;
12703
12704 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, snapndp, LOOKUP,
12705 OP_LOOKUP, ctx);
12706 if (error) {
12707 goto out;
12708 }
12709
12710 snapvp = snapndp->ni_vp;
12711 if (!vnode_mount(rvp) || (vnode_mount(rvp) == dead_mountp)) {
12712 error = EIO;
12713 goto out1;
12714 }
12715
12716 /* Get the vnode to be covered */
12717 NDINIT(dirndp, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
12718 UIO_USERSPACE, directory, ctx);
12719 error = namei(dirndp);
12720 if (error) {
12721 goto out1;
12722 }
12723
12724 vp = dirndp->ni_vp;
12725 pvp = dirndp->ni_dvp;
12726
12727 if ((vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_ROOTFS)) {
12728 error = EINVAL;
12729 } else {
12730 mount_t mp = vnode_mount(rvp);
12731 struct fs_snapshot_mount_args smnt_data;
12732
12733 smnt_data.sm_mp = mp;
12734 smnt_data.sm_cnp = &snapndp->ni_cnd;
12735 error = mount_common(mp->mnt_vfsstat.f_fstypename, pvp, vp,
12736 &dirndp->ni_cnd, CAST_USER_ADDR_T(&smnt_data), flags & MNT_DONTBROWSE,
12737 KERNEL_MOUNT_SNAPSHOT, NULL, FALSE, ctx);
12738 }
12739
12740 vnode_put(vp);
12741 vnode_put(pvp);
12742 nameidone(dirndp);
12743 out1:
12744 vnode_put(snapvp);
12745 vnode_put(snapdvp);
12746 vnode_put(rvp);
12747 nameidone(snapndp);
12748 out:
12749 FREE(__snapshot_mount_data, M_TEMP);
12750 return error;
12751 }
12752
12753 /*
12754 * Root from a snapshot of the filesystem
12755 *
12756 * Marks the filesystem to root from the given snapshot on next boot.
12757 */
12758 static int
12759 snapshot_root(int dirfd, user_addr_t name, __unused uint32_t flags,
12760 vfs_context_t ctx)
12761 {
12762 int error;
12763 vnode_t rvp;
12764 mount_t mp;
12765 struct fs_snapshot_root_args root_data;
12766 struct componentname cnp;
12767 caddr_t name_buf;
12768 size_t name_len;
12769
12770 error = vnode_getfromfd(ctx, dirfd, &rvp);
12771 if (error) {
12772 return error;
12773 }
12774 mp = vnode_mount(rvp);
12775
12776 MALLOC(name_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
12777 error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
12778 if (error) {
12779 FREE(name_buf, M_TEMP);
12780 vnode_put(rvp);
12781 return error;
12782 }
12783
12784 // XXX MAC checks ?
12785
12786 /*
12787 * Grab mount_iterref so that we can release the vnode,
12788 * since VFSIOC_ROOT_SNAPSHOT could conceivably cause a sync.
12789 */
12790 error = mount_iterref(mp, 0);
12791 vnode_put(rvp);
12792 if (error) {
12793 FREE(name_buf, M_TEMP);
12794 return error;
12795 }
12796
12797 memset(&cnp, 0, sizeof(cnp));
12798 cnp.cn_pnbuf = (char *)name_buf;
12799 cnp.cn_nameiop = LOOKUP;
12800 cnp.cn_flags = ISLASTCN | HASBUF;
12801 cnp.cn_pnlen = MAXPATHLEN;
12802 cnp.cn_nameptr = cnp.cn_pnbuf;
12803 cnp.cn_namelen = (int)name_len;
12804 root_data.sr_cnp = &cnp;
12805
12806 error = VFS_IOCTL(mp, VFSIOC_ROOT_SNAPSHOT, (caddr_t)&root_data, 0, ctx);
12807
12808 mount_iterdrop(mp);
12809 FREE(name_buf, M_TEMP);
12810
12811 return error;
12812 }
12813
12814 /*
12815 * FS snapshot operations dispatcher
12816 */
12817 int
12818 fs_snapshot(__unused proc_t p, struct fs_snapshot_args *uap,
12819 __unused int32_t *retval)
12820 {
12821 int error;
12822 vfs_context_t ctx = vfs_context_current();
12823
12824 AUDIT_ARG(fd, uap->dirfd);
12825 AUDIT_ARG(value32, uap->op);
12826
12827 error = priv_check_cred(vfs_context_ucred(ctx), PRIV_VFS_SNAPSHOT, 0);
12828 if (error) {
12829 return error;
12830 }
12831
12832 /*
12833 * Enforce user authorization for snapshot modification operations
12834 */
12835 if ((uap->op != SNAPSHOT_OP_MOUNT) &&
12836 (uap->op != SNAPSHOT_OP_ROOT)) {
12837 vnode_t dvp = NULLVP;
12838 vnode_t devvp = NULLVP;
12839 mount_t mp;
12840
12841 error = vnode_getfromfd(ctx, uap->dirfd, &dvp);
12842 if (error) {
12843 return error;
12844 }
12845 mp = vnode_mount(dvp);
12846 devvp = mp->mnt_devvp;
12847
12848 /* get an iocount on devvp */
12849 if (devvp == NULLVP) {
12850 error = vnode_lookup(mp->mnt_vfsstat.f_mntfromname, 0, &devvp, ctx);
12851 /* for mounts which arent block devices */
12852 if (error == ENOENT) {
12853 error = ENXIO;
12854 }
12855 } else {
12856 error = vnode_getwithref(devvp);
12857 }
12858
12859 if (error) {
12860 vnode_put(dvp);
12861 return error;
12862 }
12863
12864 if ((vfs_context_issuser(ctx) == 0) &&
12865 (vnode_authorize(devvp, NULL, KAUTH_VNODE_WRITE_DATA, ctx) != 0)) {
12866 error = EPERM;
12867 }
12868 vnode_put(dvp);
12869 vnode_put(devvp);
12870
12871 if (error) {
12872 return error;
12873 }
12874 }
12875
12876 switch (uap->op) {
12877 case SNAPSHOT_OP_CREATE:
12878 error = snapshot_create(uap->dirfd, uap->name1, uap->flags, ctx);
12879 break;
12880 case SNAPSHOT_OP_DELETE:
12881 error = snapshot_delete(uap->dirfd, uap->name1, uap->flags, ctx);
12882 break;
12883 case SNAPSHOT_OP_RENAME:
12884 error = snapshot_rename(uap->dirfd, uap->name1, uap->name2,
12885 uap->flags, ctx);
12886 break;
12887 case SNAPSHOT_OP_MOUNT:
12888 error = snapshot_mount(uap->dirfd, uap->name1, uap->name2,
12889 uap->data, uap->flags, ctx);
12890 break;
12891 case SNAPSHOT_OP_REVERT:
12892 error = snapshot_revert(uap->dirfd, uap->name1, uap->flags, ctx);
12893 break;
12894 #if CONFIG_MNT_ROOTSNAP
12895 case SNAPSHOT_OP_ROOT:
12896 error = snapshot_root(uap->dirfd, uap->name1, uap->flags, ctx);
12897 break;
12898 #endif /* CONFIG_MNT_ROOTSNAP */
12899 default:
12900 error = ENOSYS;
12901 }
12902
12903 return error;
12904 }