]> git.saurik.com Git - apple/xnu.git/blob - bsd/vfs/vfs_syscalls.c
e1497887b2e011700690c12c14e2ca85fdb80001
[apple/xnu.git] / bsd / vfs / vfs_syscalls.c
1 /*
2 * Copyright (c) 1995-2019 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1989, 1993
30 * The Regents of the University of California. All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)vfs_syscalls.c 8.41 (Berkeley) 6/15/95
66 */
67 /*
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
71 * Version 2.0.
72 */
73
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/namei.h>
77 #include <sys/filedesc.h>
78 #include <sys/kernel.h>
79 #include <sys/file_internal.h>
80 #include <sys/stat.h>
81 #include <sys/vnode_internal.h>
82 #include <sys/mount_internal.h>
83 #include <sys/proc_internal.h>
84 #include <sys/kauth.h>
85 #include <sys/uio_internal.h>
86 #include <sys/malloc.h>
87 #include <sys/mman.h>
88 #include <sys/dirent.h>
89 #include <sys/attr.h>
90 #include <sys/sysctl.h>
91 #include <sys/ubc.h>
92 #include <sys/quota.h>
93 #include <sys/kdebug.h>
94 #include <sys/fsevents.h>
95 #include <sys/imgsrc.h>
96 #include <sys/sysproto.h>
97 #include <sys/sysctl.h>
98 #include <sys/xattr.h>
99 #include <sys/fcntl.h>
100 #include <sys/fsctl.h>
101 #include <sys/ubc_internal.h>
102 #include <sys/disk.h>
103 #include <sys/content_protection.h>
104 #include <sys/clonefile.h>
105 #include <sys/snapshot.h>
106 #include <sys/priv.h>
107 #include <sys/fsgetpath.h>
108 #include <machine/cons.h>
109 #include <machine/limits.h>
110 #include <miscfs/specfs/specdev.h>
111
112 #include <vfs/vfs_disk_conditioner.h>
113
114 #include <security/audit/audit.h>
115 #include <bsm/audit_kevents.h>
116
117 #include <mach/mach_types.h>
118 #include <kern/kern_types.h>
119 #include <kern/kalloc.h>
120 #include <kern/task.h>
121
122 #include <vm/vm_pageout.h>
123 #include <vm/vm_protos.h>
124
125 #include <libkern/OSAtomic.h>
126 #include <pexpert/pexpert.h>
127 #include <IOKit/IOBSD.h>
128
129 // deps for MIG call
130 #include <kern/host.h>
131 #include <kern/ipc_misc.h>
132 #include <mach/host_priv.h>
133 #include <mach/vfs_nspace.h>
134 #include <os/log.h>
135
136 #if ROUTEFS
137 #include <miscfs/routefs/routefs.h>
138 #endif /* ROUTEFS */
139
140 #if CONFIG_MACF
141 #include <security/mac.h>
142 #include <security/mac_framework.h>
143 #endif
144
145 #if CONFIG_FSE
146 #define GET_PATH(x) \
147 (x) = get_pathbuff();
148 #define RELEASE_PATH(x) \
149 release_pathbuff(x);
150 #else
151 #define GET_PATH(x) \
152 MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
153 #define RELEASE_PATH(x) \
154 FREE_ZONE((x), MAXPATHLEN, M_NAMEI);
155 #endif /* CONFIG_FSE */
156
157 #ifndef HFS_GET_BOOT_INFO
158 #define HFS_GET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00004)
159 #endif
160
161 #ifndef HFS_SET_BOOT_INFO
162 #define HFS_SET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00005)
163 #endif
164
165 #ifndef APFSIOC_REVERT_TO_SNAPSHOT
166 #define APFSIOC_REVERT_TO_SNAPSHOT _IOW('J', 1, u_int64_t)
167 #endif
168
169 extern void disk_conditioner_unmount(mount_t mp);
170
171 /* struct for checkdirs iteration */
172 struct cdirargs {
173 vnode_t olddp;
174 vnode_t newdp;
175 };
176 /* callback for checkdirs iteration */
177 static int checkdirs_callback(proc_t p, void * arg);
178
179 static int change_dir(struct nameidata *ndp, vfs_context_t ctx);
180 static int checkdirs(vnode_t olddp, vfs_context_t ctx);
181 void enablequotas(struct mount *mp, vfs_context_t ctx);
182 static int getfsstat_callback(mount_t mp, void * arg);
183 static int getutimes(user_addr_t usrtvp, struct timespec *tsp);
184 static int setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts, int nullflag);
185 static int sync_callback(mount_t, void *);
186 static int munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
187 user_addr_t bufp, int *sizep, boolean_t is_64_bit,
188 boolean_t partial_copy);
189 static int fsync_common(proc_t p, struct fsync_args *uap, int flags);
190 static int mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
191 struct componentname *cnp, user_addr_t fsmountargs,
192 int flags, uint32_t internal_flags, char *labelstr, boolean_t kernelmount,
193 vfs_context_t ctx);
194 void vfs_notify_mount(vnode_t pdvp);
195
196 int prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth);
197
198 struct fd_vn_data * fg_vn_data_alloc(void);
199
200 /*
201 * Max retries for ENOENT returns from vn_authorize_{rmdir, unlink, rename}
202 * Concurrent lookups (or lookups by ids) on hard links can cause the
203 * vn_getpath (which does not re-enter the filesystem as vn_getpath_fsenter
204 * does) to return ENOENT as the path cannot be returned from the name cache
205 * alone. We have no option but to retry and hope to get one namei->reverse path
206 * generation done without an intervening lookup, lookup by id on the hard link
207 * item. This is only an issue for MAC hooks which cannot reenter the filesystem
208 * which currently are the MAC hooks for rename, unlink and rmdir.
209 */
210 #define MAX_AUTHORIZE_ENOENT_RETRIES 1024
211
212 static int rmdirat_internal(vfs_context_t, int, user_addr_t, enum uio_seg,
213 int unlink_flags);
214
215 static int fsgetpath_internal(vfs_context_t, int, uint64_t, vm_size_t, caddr_t, uint32_t options, int *);
216
217 #ifdef CONFIG_IMGSRC_ACCESS
218 static int authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx);
219 static int place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx);
220 static void undo_place_on_covered_vp(mount_t mp, vnode_t vp);
221 static int mount_begin_update(mount_t mp, vfs_context_t ctx, int flags);
222 static void mount_end_update(mount_t mp);
223 static int relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp, const char *fsname, vfs_context_t ctx, boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index);
224 #endif /* CONFIG_IMGSRC_ACCESS */
225
226 #if CONFIG_LOCKERBOOT
227 int mount_locker_protoboot(const char *fsname, const char *mntpoint,
228 const char *pbdevpath);
229 #endif
230
231 //snapshot functions
232 #if CONFIG_MNT_ROOTSNAP
233 static int snapshot_root(int dirfd, user_addr_t name, uint32_t flags, vfs_context_t ctx);
234 #else
235 static int snapshot_root(int dirfd, user_addr_t name, uint32_t flags, vfs_context_t ctx) __attribute__((unused));
236 #endif
237
238 int (*union_dircheckp)(struct vnode **, struct fileproc *, vfs_context_t);
239
240 __private_extern__
241 int sync_internal(void);
242
243 __private_extern__
244 int unlink1(vfs_context_t, vnode_t, user_addr_t, enum uio_seg, int);
245
246 extern lck_grp_t *fd_vn_lck_grp;
247 extern lck_grp_attr_t *fd_vn_lck_grp_attr;
248 extern lck_attr_t *fd_vn_lck_attr;
249
250 /*
251 * incremented each time a mount or unmount operation occurs
252 * used to invalidate the cached value of the rootvp in the
253 * mount structure utilized by cache_lookup_path
254 */
255 uint32_t mount_generation = 0;
256
257 /* counts number of mount and unmount operations */
258 unsigned int vfs_nummntops = 0;
259
260 extern const struct fileops vnops;
261 #if CONFIG_APPLEDOUBLE
262 extern errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *);
263 #endif /* CONFIG_APPLEDOUBLE */
264
265 /*
266 * Virtual File System System Calls
267 */
268
269 #if NFSCLIENT || DEVFS || ROUTEFS
270 /*
271 * Private in-kernel mounting spi (NFS only, not exported)
272 */
273 __private_extern__
274 boolean_t
275 vfs_iskernelmount(mount_t mp)
276 {
277 return (mp->mnt_kern_flag & MNTK_KERNEL_MOUNT) ? TRUE : FALSE;
278 }
279
280 __private_extern__
281 int
282 kernel_mount(char *fstype, vnode_t pvp, vnode_t vp, const char *path,
283 void *data, __unused size_t datalen, int syscall_flags, uint32_t kern_flags, vfs_context_t ctx)
284 {
285 struct nameidata nd;
286 boolean_t did_namei;
287 int error;
288
289 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
290 UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx);
291
292 /*
293 * Get the vnode to be covered if it's not supplied
294 */
295 if (vp == NULLVP) {
296 error = namei(&nd);
297 if (error) {
298 if (kern_flags & (KERNEL_MOUNT_SNAPSHOT | KERNEL_MOUNT_VMVOL | KERNEL_MOUNT_DATAVOL)) {
299 printf("failed to locate mount-on path: %s ", path);
300 }
301 return error;
302 }
303 vp = nd.ni_vp;
304 pvp = nd.ni_dvp;
305 did_namei = TRUE;
306 } else {
307 char *pnbuf = CAST_DOWN(char *, path);
308
309 nd.ni_cnd.cn_pnbuf = pnbuf;
310 nd.ni_cnd.cn_pnlen = strlen(pnbuf) + 1;
311 did_namei = FALSE;
312 }
313
314 error = mount_common(fstype, pvp, vp, &nd.ni_cnd, CAST_USER_ADDR_T(data),
315 syscall_flags, kern_flags, NULL, TRUE, ctx);
316
317 if (did_namei) {
318 vnode_put(vp);
319 vnode_put(pvp);
320 nameidone(&nd);
321 }
322
323 return error;
324 }
325 #endif /* NFSCLIENT || DEVFS */
326
327 /*
328 * Mount a file system.
329 */
330 /* ARGSUSED */
331 int
332 mount(proc_t p, struct mount_args *uap, __unused int32_t *retval)
333 {
334 struct __mac_mount_args muap;
335
336 muap.type = uap->type;
337 muap.path = uap->path;
338 muap.flags = uap->flags;
339 muap.data = uap->data;
340 muap.mac_p = USER_ADDR_NULL;
341 return __mac_mount(p, &muap, retval);
342 }
343
344 int
345 fmount(__unused proc_t p, struct fmount_args *uap, __unused int32_t *retval)
346 {
347 struct componentname cn;
348 vfs_context_t ctx = vfs_context_current();
349 size_t dummy = 0;
350 int error;
351 int flags = uap->flags;
352 char fstypename[MFSNAMELEN];
353 char *labelstr = NULL; /* regular mount call always sets it to NULL for __mac_mount() */
354 vnode_t pvp;
355 vnode_t vp;
356
357 AUDIT_ARG(fd, uap->fd);
358 AUDIT_ARG(fflags, flags);
359 /* fstypename will get audited by mount_common */
360
361 /* Sanity check the flags */
362 if (flags & (MNT_IMGSRC_BY_INDEX | MNT_ROOTFS)) {
363 return ENOTSUP;
364 }
365
366 if (flags & MNT_UNION) {
367 return EPERM;
368 }
369
370 error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy);
371 if (error) {
372 return error;
373 }
374
375 if ((error = file_vnode(uap->fd, &vp)) != 0) {
376 return error;
377 }
378
379 if ((error = vnode_getwithref(vp)) != 0) {
380 file_drop(uap->fd);
381 return error;
382 }
383
384 pvp = vnode_getparent(vp);
385 if (pvp == NULL) {
386 vnode_put(vp);
387 file_drop(uap->fd);
388 return EINVAL;
389 }
390
391 memset(&cn, 0, sizeof(struct componentname));
392 MALLOC(cn.cn_pnbuf, char *, MAXPATHLEN, M_TEMP, M_WAITOK);
393 cn.cn_pnlen = MAXPATHLEN;
394
395 if ((error = vn_getpath(vp, cn.cn_pnbuf, &cn.cn_pnlen)) != 0) {
396 FREE(cn.cn_pnbuf, M_TEMP);
397 vnode_put(pvp);
398 vnode_put(vp);
399 file_drop(uap->fd);
400 return error;
401 }
402
403 error = mount_common(fstypename, pvp, vp, &cn, uap->data, flags, 0, labelstr, FALSE, ctx);
404
405 FREE(cn.cn_pnbuf, M_TEMP);
406 vnode_put(pvp);
407 vnode_put(vp);
408 file_drop(uap->fd);
409
410 return error;
411 }
412
413 void
414 vfs_notify_mount(vnode_t pdvp)
415 {
416 vfs_event_signal(NULL, VQ_MOUNT, (intptr_t)NULL);
417 lock_vnode_and_post(pdvp, NOTE_WRITE);
418 }
419
420 /*
421 * __mac_mount:
422 * Mount a file system taking into account MAC label behavior.
423 * See mount(2) man page for more information
424 *
425 * Parameters: p Process requesting the mount
426 * uap User argument descriptor (see below)
427 * retval (ignored)
428 *
429 * Indirect: uap->type Filesystem type
430 * uap->path Path to mount
431 * uap->data Mount arguments
432 * uap->mac_p MAC info
433 * uap->flags Mount flags
434 *
435 *
436 * Returns: 0 Success
437 * !0 Not success
438 */
439 boolean_t root_fs_upgrade_try = FALSE;
440
441 int
442 __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int32_t *retval)
443 {
444 vnode_t pvp = NULL;
445 vnode_t vp = NULL;
446 int need_nameidone = 0;
447 vfs_context_t ctx = vfs_context_current();
448 char fstypename[MFSNAMELEN];
449 struct nameidata nd;
450 size_t dummy = 0;
451 char *labelstr = NULL;
452 int flags = uap->flags;
453 int error;
454 #if CONFIG_IMGSRC_ACCESS || CONFIG_MACF
455 boolean_t is_64bit = IS_64BIT_PROCESS(p);
456 #else
457 #pragma unused(p)
458 #endif
459 /*
460 * Get the fs type name from user space
461 */
462 error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy);
463 if (error) {
464 return error;
465 }
466
467 /*
468 * Get the vnode to be covered
469 */
470 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
471 UIO_USERSPACE, uap->path, ctx);
472 error = namei(&nd);
473 if (error) {
474 goto out;
475 }
476 need_nameidone = 1;
477 vp = nd.ni_vp;
478 pvp = nd.ni_dvp;
479
480 #ifdef CONFIG_IMGSRC_ACCESS
481 /* Mounting image source cannot be batched with other operations */
482 if (flags == MNT_IMGSRC_BY_INDEX) {
483 error = relocate_imageboot_source(pvp, vp, &nd.ni_cnd, fstypename,
484 ctx, is_64bit, uap->data, (flags == MNT_IMGSRC_BY_INDEX));
485 goto out;
486 }
487 #endif /* CONFIG_IMGSRC_ACCESS */
488
489 #if CONFIG_MACF
490 /*
491 * Get the label string (if any) from user space
492 */
493 if (uap->mac_p != USER_ADDR_NULL) {
494 struct user_mac mac;
495 size_t ulen = 0;
496
497 if (is_64bit) {
498 struct user64_mac mac64;
499 error = copyin(uap->mac_p, &mac64, sizeof(mac64));
500 mac.m_buflen = mac64.m_buflen;
501 mac.m_string = mac64.m_string;
502 } else {
503 struct user32_mac mac32;
504 error = copyin(uap->mac_p, &mac32, sizeof(mac32));
505 mac.m_buflen = mac32.m_buflen;
506 mac.m_string = mac32.m_string;
507 }
508 if (error) {
509 goto out;
510 }
511 if ((mac.m_buflen > MAC_MAX_LABEL_BUF_LEN) ||
512 (mac.m_buflen < 2)) {
513 error = EINVAL;
514 goto out;
515 }
516 MALLOC(labelstr, char *, mac.m_buflen, M_MACTEMP, M_WAITOK);
517 error = copyinstr(mac.m_string, labelstr, mac.m_buflen, &ulen);
518 if (error) {
519 goto out;
520 }
521 AUDIT_ARG(mac_string, labelstr);
522 }
523 #endif /* CONFIG_MACF */
524
525 AUDIT_ARG(fflags, flags);
526
527 #if SECURE_KERNEL
528 if (flags & MNT_UNION) {
529 /* No union mounts on release kernels */
530 error = EPERM;
531 goto out;
532 }
533 #endif
534
535 if ((vp->v_flag & VROOT) &&
536 (vp->v_mount->mnt_flag & MNT_ROOTFS)) {
537 if (!(flags & MNT_UNION)) {
538 flags |= MNT_UPDATE;
539 } else {
540 /*
541 * For a union mount on '/', treat it as fresh
542 * mount instead of update.
543 * Otherwise, union mouting on '/' used to panic the
544 * system before, since mnt_vnodecovered was found to
545 * be NULL for '/' which is required for unionlookup
546 * after it gets ENOENT on union mount.
547 */
548 flags = (flags & ~(MNT_UPDATE));
549 }
550
551 #if SECURE_KERNEL
552 if ((flags & MNT_RDONLY) == 0) {
553 /* Release kernels are not allowed to mount "/" as rw */
554 error = EPERM;
555 goto out;
556 }
557 #endif
558 /*
559 * See 7392553 for more details on why this check exists.
560 * Suffice to say: If this check is ON and something tries
561 * to mount the rootFS RW, we'll turn off the codesign
562 * bitmap optimization.
563 */
564 #if CHECK_CS_VALIDATION_BITMAP
565 if ((flags & MNT_RDONLY) == 0) {
566 root_fs_upgrade_try = TRUE;
567 }
568 #endif
569 }
570
571 error = mount_common(fstypename, pvp, vp, &nd.ni_cnd, uap->data, flags, 0,
572 labelstr, FALSE, ctx);
573
574 out:
575
576 #if CONFIG_MACF
577 if (labelstr) {
578 FREE(labelstr, M_MACTEMP);
579 }
580 #endif /* CONFIG_MACF */
581
582 if (vp) {
583 vnode_put(vp);
584 }
585 if (pvp) {
586 vnode_put(pvp);
587 }
588 if (need_nameidone) {
589 nameidone(&nd);
590 }
591
592 return error;
593 }
594
595 /*
596 * common mount implementation (final stage of mounting)
597 *
598 * Arguments:
599 * fstypename file system type (ie it's vfs name)
600 * pvp parent of covered vnode
601 * vp covered vnode
602 * cnp component name (ie path) of covered vnode
603 * flags generic mount flags
604 * fsmountargs file system specific data
605 * labelstr optional MAC label
606 * kernelmount TRUE for mounts initiated from inside the kernel
607 * ctx caller's context
608 */
609 static int
610 mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
611 struct componentname *cnp, user_addr_t fsmountargs, int flags, uint32_t internal_flags,
612 char *labelstr, boolean_t kernelmount, vfs_context_t ctx)
613 {
614 #if !CONFIG_MACF
615 #pragma unused(labelstr)
616 #endif
617 struct vnode *devvp = NULLVP;
618 struct vnode *device_vnode = NULLVP;
619 #if CONFIG_MACF
620 struct vnode *rvp;
621 #endif
622 struct mount *mp;
623 struct vfstable *vfsp = (struct vfstable *)0;
624 struct proc *p = vfs_context_proc(ctx);
625 int error, flag = 0;
626 user_addr_t devpath = USER_ADDR_NULL;
627 int ronly = 0;
628 int mntalloc = 0;
629 boolean_t vfsp_ref = FALSE;
630 boolean_t is_rwlock_locked = FALSE;
631 boolean_t did_rele = FALSE;
632 boolean_t have_usecount = FALSE;
633
634 #if CONFIG_ROSV_STARTUP || CONFIG_MOUNT_VM
635 /* Check for mutually-exclusive flag bits */
636 uint32_t checkflags = (internal_flags & (KERNEL_MOUNT_DATAVOL | KERNEL_MOUNT_VMVOL));
637 int bitcount = 0;
638 while (checkflags != 0) {
639 checkflags &= (checkflags - 1);
640 bitcount++;
641 }
642
643 if (bitcount > 1) {
644 //not allowed to request multiple mount-by-role flags
645 error = EINVAL;
646 goto out1;
647 }
648 #endif
649
650 /*
651 * Process an update for an existing mount
652 */
653 if (flags & MNT_UPDATE) {
654 if ((vp->v_flag & VROOT) == 0) {
655 error = EINVAL;
656 goto out1;
657 }
658 mp = vp->v_mount;
659
660 /* unmount in progress return error */
661 mount_lock_spin(mp);
662 if (mp->mnt_lflag & MNT_LUNMOUNT) {
663 mount_unlock(mp);
664 error = EBUSY;
665 goto out1;
666 }
667 mount_unlock(mp);
668 lck_rw_lock_exclusive(&mp->mnt_rwlock);
669 is_rwlock_locked = TRUE;
670 /*
671 * We only allow the filesystem to be reloaded if it
672 * is currently mounted read-only.
673 */
674 if ((flags & MNT_RELOAD) &&
675 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
676 error = ENOTSUP;
677 goto out1;
678 }
679
680 /*
681 * If content protection is enabled, update mounts are not
682 * allowed to turn it off.
683 */
684 if ((mp->mnt_flag & MNT_CPROTECT) &&
685 ((flags & MNT_CPROTECT) == 0)) {
686 error = EINVAL;
687 goto out1;
688 }
689
690 /*
691 * can't turn off MNT_REMOVABLE either but it may be an unexpected
692 * failure to return an error for this so we'll just silently
693 * add it if it is not passed in.
694 */
695 if ((mp->mnt_flag & MNT_REMOVABLE) &&
696 ((flags & MNT_REMOVABLE) == 0)) {
697 flags |= MNT_REMOVABLE;
698 }
699
700 #ifdef CONFIG_IMGSRC_ACCESS
701 /* Can't downgrade the backer of the root FS */
702 if ((mp->mnt_kern_flag & MNTK_BACKS_ROOT) &&
703 (!vfs_isrdonly(mp)) && (flags & MNT_RDONLY)) {
704 error = ENOTSUP;
705 goto out1;
706 }
707 #endif /* CONFIG_IMGSRC_ACCESS */
708
709 /*
710 * Only root, or the user that did the original mount is
711 * permitted to update it.
712 */
713 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
714 (error = suser(vfs_context_ucred(ctx), &p->p_acflag))) {
715 goto out1;
716 }
717 #if CONFIG_MACF
718 error = mac_mount_check_remount(ctx, mp);
719 if (error != 0) {
720 goto out1;
721 }
722 #endif
723 /*
724 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
725 * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
726 */
727 if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
728 flags |= MNT_NOSUID | MNT_NODEV;
729 if (mp->mnt_flag & MNT_NOEXEC) {
730 flags |= MNT_NOEXEC;
731 }
732 }
733 flag = mp->mnt_flag;
734
735
736
737 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
738
739 vfsp = mp->mnt_vtable;
740 goto update;
741 } // MNT_UPDATE
742
743 /*
744 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
745 * MNT_NOEXEC if mount point is already MNT_NOEXEC.
746 */
747 if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
748 flags |= MNT_NOSUID | MNT_NODEV;
749 if (vp->v_mount->mnt_flag & MNT_NOEXEC) {
750 flags |= MNT_NOEXEC;
751 }
752 }
753
754 /* XXXAUDIT: Should we capture the type on the error path as well? */
755 AUDIT_ARG(text, fstypename);
756 mount_list_lock();
757 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
758 if (!strncmp(vfsp->vfc_name, fstypename, MFSNAMELEN)) {
759 vfsp->vfc_refcount++;
760 vfsp_ref = TRUE;
761 break;
762 }
763 }
764 mount_list_unlock();
765 if (vfsp == NULL) {
766 error = ENODEV;
767 goto out1;
768 }
769
770 /*
771 * VFC_VFSLOCALARGS is not currently supported for kernel mounts,
772 * except in ROSV configs.
773 */
774 if (kernelmount && (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) &&
775 ((internal_flags & (KERNEL_MOUNT_DATAVOL | KERNEL_MOUNT_VMVOL)) == 0)) {
776 error = EINVAL; /* unsupported request */
777 goto out1;
778 }
779
780 error = prepare_coveredvp(vp, ctx, cnp, fstypename, ((internal_flags & KERNEL_MOUNT_NOAUTH) != 0));
781 if (error != 0) {
782 goto out1;
783 }
784
785 /*
786 * Allocate and initialize the filesystem (mount_t)
787 */
788 MALLOC_ZONE(mp, struct mount *, (u_int32_t)sizeof(struct mount),
789 M_MOUNT, M_WAITOK);
790 bzero((char *)mp, (u_int32_t)sizeof(struct mount));
791 mntalloc = 1;
792
793 /* Initialize the default IO constraints */
794 mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
795 mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
796 mp->mnt_maxsegreadsize = mp->mnt_maxreadcnt;
797 mp->mnt_maxsegwritesize = mp->mnt_maxwritecnt;
798 mp->mnt_devblocksize = DEV_BSIZE;
799 mp->mnt_alignmentmask = PAGE_MASK;
800 mp->mnt_ioqueue_depth = MNT_DEFAULT_IOQUEUE_DEPTH;
801 mp->mnt_ioscale = 1;
802 mp->mnt_ioflags = 0;
803 mp->mnt_realrootvp = NULLVP;
804 mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL;
805
806 TAILQ_INIT(&mp->mnt_vnodelist);
807 TAILQ_INIT(&mp->mnt_workerqueue);
808 TAILQ_INIT(&mp->mnt_newvnodes);
809 mount_lock_init(mp);
810 lck_rw_lock_exclusive(&mp->mnt_rwlock);
811 is_rwlock_locked = TRUE;
812 mp->mnt_op = vfsp->vfc_vfsops;
813 mp->mnt_vtable = vfsp;
814 //mp->mnt_stat.f_type = vfsp->vfc_typenum;
815 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
816 strlcpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN);
817 do {
818 int pathlen = MAXPATHLEN;
819
820 if (vn_getpath_ext(vp, pvp, mp->mnt_vfsstat.f_mntonname, &pathlen, VN_GETPATH_FSENTER)) {
821 strlcpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
822 }
823 } while (0);
824 mp->mnt_vnodecovered = vp;
825 mp->mnt_vfsstat.f_owner = kauth_cred_getuid(vfs_context_ucred(ctx));
826 mp->mnt_throttle_mask = LOWPRI_MAX_NUM_DEV - 1;
827 mp->mnt_devbsdunit = 0;
828
829 /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
830 vfs_setowner(mp, KAUTH_UID_NONE, KAUTH_GID_NONE);
831
832 #if NFSCLIENT || DEVFS || ROUTEFS
833 if (kernelmount) {
834 mp->mnt_kern_flag |= MNTK_KERNEL_MOUNT;
835 }
836 if ((internal_flags & KERNEL_MOUNT_PERMIT_UNMOUNT) != 0) {
837 mp->mnt_kern_flag |= MNTK_PERMIT_UNMOUNT;
838 }
839 #endif /* NFSCLIENT || DEVFS */
840
841 update:
842
843 /*
844 * Set the mount level flags.
845 */
846 if (flags & MNT_RDONLY) {
847 mp->mnt_flag |= MNT_RDONLY;
848 } else if (mp->mnt_flag & MNT_RDONLY) {
849 // disallow read/write upgrades of file systems that
850 // had the TYPENAME_OVERRIDE feature set.
851 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
852 error = EPERM;
853 goto out1;
854 }
855 mp->mnt_kern_flag |= MNTK_WANTRDWR;
856 }
857 mp->mnt_flag &= ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
858 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
859 MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
860 MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME | MNT_STRICTATIME |
861 MNT_QUARANTINE | MNT_CPROTECT);
862
863 #if SECURE_KERNEL
864 #if !CONFIG_MNT_SUID
865 /*
866 * On release builds of iOS based platforms, always enforce NOSUID on
867 * all mounts. We do this here because we can catch update mounts as well as
868 * non-update mounts in this case.
869 */
870 mp->mnt_flag |= (MNT_NOSUID);
871 #endif
872 #endif
873
874 mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
875 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
876 MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
877 MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME | MNT_STRICTATIME |
878 MNT_QUARANTINE | MNT_CPROTECT);
879
880 #if CONFIG_MACF
881 if (flags & MNT_MULTILABEL) {
882 if (vfsp->vfc_vfsflags & VFC_VFSNOMACLABEL) {
883 error = EINVAL;
884 goto out1;
885 }
886 mp->mnt_flag |= MNT_MULTILABEL;
887 }
888 #endif
889 /*
890 * Process device path for local file systems if requested
891 */
892 if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS &&
893 !(internal_flags & (KERNEL_MOUNT_SNAPSHOT | KERNEL_MOUNT_DATAVOL | KERNEL_MOUNT_VMVOL))) {
894 //snapshot, vm, datavolume mounts are special
895 if (vfs_context_is64bit(ctx)) {
896 if ((error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath)))) {
897 goto out1;
898 }
899 fsmountargs += sizeof(devpath);
900 } else {
901 user32_addr_t tmp;
902 if ((error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp)))) {
903 goto out1;
904 }
905 /* munge into LP64 addr */
906 devpath = CAST_USER_ADDR_T(tmp);
907 fsmountargs += sizeof(tmp);
908 }
909
910 /* Lookup device and authorize access to it */
911 if ((devpath)) {
912 struct nameidata nd;
913
914 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW, UIO_USERSPACE, devpath, ctx);
915 if ((error = namei(&nd))) {
916 goto out1;
917 }
918
919 strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
920 devvp = nd.ni_vp;
921
922 nameidone(&nd);
923
924 if (devvp->v_type != VBLK) {
925 error = ENOTBLK;
926 goto out2;
927 }
928 if (major(devvp->v_rdev) >= nblkdev) {
929 error = ENXIO;
930 goto out2;
931 }
932 /*
933 * If mount by non-root, then verify that user has necessary
934 * permissions on the device.
935 */
936 if (suser(vfs_context_ucred(ctx), NULL) != 0) {
937 mode_t accessmode = KAUTH_VNODE_READ_DATA;
938
939 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
940 accessmode |= KAUTH_VNODE_WRITE_DATA;
941 }
942 if ((error = vnode_authorize(devvp, NULL, accessmode, ctx)) != 0) {
943 goto out2;
944 }
945 }
946 }
947 /* On first mount, preflight and open device */
948 if (devpath && ((flags & MNT_UPDATE) == 0)) {
949 if ((error = vnode_ref(devvp))) {
950 goto out2;
951 }
952 /*
953 * Disallow multiple mounts of the same device.
954 * Disallow mounting of a device that is currently in use
955 * (except for root, which might share swap device for miniroot).
956 * Flush out any old buffers remaining from a previous use.
957 */
958 if ((error = vfs_mountedon(devvp))) {
959 goto out3;
960 }
961
962 if (vcount(devvp) > 1 && !(vfs_flags(mp) & MNT_ROOTFS)) {
963 error = EBUSY;
964 goto out3;
965 }
966 if ((error = VNOP_FSYNC(devvp, MNT_WAIT, ctx))) {
967 error = ENOTBLK;
968 goto out3;
969 }
970 if ((error = buf_invalidateblks(devvp, BUF_WRITE_DATA, 0, 0))) {
971 goto out3;
972 }
973
974 ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
975 #if CONFIG_MACF
976 error = mac_vnode_check_open(ctx,
977 devvp,
978 ronly ? FREAD : FREAD | FWRITE);
979 if (error) {
980 goto out3;
981 }
982 #endif /* MAC */
983 if ((error = VNOP_OPEN(devvp, ronly ? FREAD : FREAD | FWRITE, ctx))) {
984 goto out3;
985 }
986
987 mp->mnt_devvp = devvp;
988 device_vnode = devvp;
989 } else if ((mp->mnt_flag & MNT_RDONLY) &&
990 (mp->mnt_kern_flag & MNTK_WANTRDWR) &&
991 (device_vnode = mp->mnt_devvp)) {
992 dev_t dev;
993 int maj;
994 /*
995 * If upgrade to read-write by non-root, then verify
996 * that user has necessary permissions on the device.
997 */
998 vnode_getalways(device_vnode);
999
1000 if (suser(vfs_context_ucred(ctx), NULL) &&
1001 (error = vnode_authorize(device_vnode, NULL,
1002 KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA,
1003 ctx)) != 0) {
1004 vnode_put(device_vnode);
1005 goto out2;
1006 }
1007
1008 /* Tell the device that we're upgrading */
1009 dev = (dev_t)device_vnode->v_rdev;
1010 maj = major(dev);
1011
1012 if ((u_int)maj >= (u_int)nblkdev) {
1013 panic("Volume mounted on a device with invalid major number.");
1014 }
1015
1016 error = bdevsw[maj].d_open(dev, FREAD | FWRITE, S_IFBLK, p);
1017 vnode_put(device_vnode);
1018 device_vnode = NULLVP;
1019 if (error != 0) {
1020 goto out2;
1021 }
1022 }
1023 } // localargs && !(snapshot | data | vm)
1024
1025 #if CONFIG_MACF
1026 if ((flags & MNT_UPDATE) == 0) {
1027 mac_mount_label_init(mp);
1028 mac_mount_label_associate(ctx, mp);
1029 }
1030 if (labelstr) {
1031 if ((flags & MNT_UPDATE) != 0) {
1032 error = mac_mount_check_label_update(ctx, mp);
1033 if (error != 0) {
1034 goto out3;
1035 }
1036 }
1037 }
1038 #endif
1039 /*
1040 * Mount the filesystem. We already asserted that internal_flags
1041 * cannot have more than one mount-by-role bit set.
1042 */
1043 if (internal_flags & KERNEL_MOUNT_SNAPSHOT) {
1044 error = VFS_IOCTL(mp, VFSIOC_MOUNT_SNAPSHOT,
1045 (caddr_t)fsmountargs, 0, ctx);
1046 } else if (internal_flags & KERNEL_MOUNT_DATAVOL) {
1047 #if CONFIG_ROSV_STARTUP
1048 struct mount *origin_mp = (struct mount*)fsmountargs;
1049 fs_role_mount_args_t frma = {origin_mp, VFS_DATA_ROLE};
1050 error = VFS_IOCTL(mp, VFSIOC_MOUNT_BYROLE, (caddr_t)&frma, 0, ctx);
1051 if (error) {
1052 printf("MOUNT-BY-ROLE (%d) failed! (%d)", VFS_DATA_ROLE, error);
1053 } else {
1054 /* Mark volume associated with system volume */
1055 mp->mnt_kern_flag |= MNTK_SYSTEM;
1056
1057 /* Attempt to acquire the mnt_devvp and set it up */
1058 struct vnode *mp_devvp = NULL;
1059 if (mp->mnt_vfsstat.f_mntfromname[0] != 0) {
1060 errno_t lerr = vnode_lookup(mp->mnt_vfsstat.f_mntfromname,
1061 0, &mp_devvp, vfs_context_kernel());
1062 if (!lerr) {
1063 mp->mnt_devvp = mp_devvp;
1064 //vnode_lookup took an iocount, need to drop it.
1065 vnode_put(mp_devvp);
1066 // now set `device_vnode` to the devvp that was acquired.
1067 // this is needed in order to ensure vfs_init_io_attributes is invoked.
1068 // note that though the iocount above was dropped, the mount acquires
1069 // an implicit reference against the device.
1070 device_vnode = mp_devvp;
1071 }
1072 }
1073 }
1074 #else
1075 error = EINVAL;
1076 #endif
1077 } else if (internal_flags & KERNEL_MOUNT_VMVOL) {
1078 #if CONFIG_MOUNT_VM
1079 struct mount *origin_mp = (struct mount*)fsmountargs;
1080 fs_role_mount_args_t frma = {origin_mp, VFS_VM_ROLE};
1081 error = VFS_IOCTL(mp, VFSIOC_MOUNT_BYROLE, (caddr_t)&frma, 0, ctx);
1082 if (error) {
1083 printf("MOUNT-BY-ROLE (%d) failed! (%d)", VFS_VM_ROLE, error);
1084 } else {
1085 /* Mark volume associated with system volume and a swap mount */
1086 mp->mnt_kern_flag |= (MNTK_SYSTEM | MNTK_SWAP_MOUNT);
1087 /* Attempt to acquire the mnt_devvp and set it up */
1088 struct vnode *mp_devvp = NULL;
1089 if (mp->mnt_vfsstat.f_mntfromname[0] != 0) {
1090 errno_t lerr = vnode_lookup(mp->mnt_vfsstat.f_mntfromname,
1091 0, &mp_devvp, vfs_context_kernel());
1092 if (!lerr) {
1093 mp->mnt_devvp = mp_devvp;
1094 //vnode_lookup took an iocount, need to drop it.
1095 vnode_put(mp_devvp);
1096
1097 // now set `device_vnode` to the devvp that was acquired.
1098 // note that though the iocount above was dropped, the mount acquires
1099 // an implicit reference against the device.
1100 device_vnode = mp_devvp;
1101 }
1102 }
1103 }
1104 #else
1105 error = EINVAL;
1106 #endif
1107 } else {
1108 error = VFS_MOUNT(mp, device_vnode, fsmountargs, ctx);
1109 }
1110
1111 if (flags & MNT_UPDATE) {
1112 if (mp->mnt_kern_flag & MNTK_WANTRDWR) {
1113 mp->mnt_flag &= ~MNT_RDONLY;
1114 }
1115 mp->mnt_flag &= ~
1116 (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
1117 mp->mnt_kern_flag &= ~MNTK_WANTRDWR;
1118 if (error) {
1119 mp->mnt_flag = flag; /* restore flag value */
1120 }
1121 vfs_event_signal(NULL, VQ_UPDATE, (intptr_t)NULL);
1122 lck_rw_done(&mp->mnt_rwlock);
1123 is_rwlock_locked = FALSE;
1124 if (!error) {
1125 enablequotas(mp, ctx);
1126 }
1127 goto exit;
1128 }
1129
1130 /*
1131 * Put the new filesystem on the mount list after root.
1132 */
1133 if (error == 0) {
1134 struct vfs_attr vfsattr;
1135 #if CONFIG_MACF
1136 error = mac_mount_check_mount_late(ctx, mp);
1137 if (error != 0) {
1138 goto out3;
1139 }
1140
1141 if (vfs_flags(mp) & MNT_MULTILABEL) {
1142 error = VFS_ROOT(mp, &rvp, ctx);
1143 if (error) {
1144 printf("%s() VFS_ROOT returned %d\n", __func__, error);
1145 goto out3;
1146 }
1147 error = vnode_label(mp, NULL, rvp, NULL, 0, ctx);
1148 /*
1149 * drop reference provided by VFS_ROOT
1150 */
1151 vnode_put(rvp);
1152
1153 if (error) {
1154 goto out3;
1155 }
1156 }
1157 #endif /* MAC */
1158
1159 vnode_lock_spin(vp);
1160 CLR(vp->v_flag, VMOUNT);
1161 vp->v_mountedhere = mp;
1162 vnode_unlock(vp);
1163
1164 /*
1165 * taking the name_cache_lock exclusively will
1166 * insure that everyone is out of the fast path who
1167 * might be trying to use a now stale copy of
1168 * vp->v_mountedhere->mnt_realrootvp
1169 * bumping mount_generation causes the cached values
1170 * to be invalidated
1171 */
1172 name_cache_lock();
1173 mount_generation++;
1174 name_cache_unlock();
1175
1176 error = vnode_ref(vp);
1177 if (error != 0) {
1178 goto out4;
1179 }
1180
1181 have_usecount = TRUE;
1182
1183 error = checkdirs(vp, ctx);
1184 if (error != 0) {
1185 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1186 goto out4;
1187 }
1188 /*
1189 * there is no cleanup code here so I have made it void
1190 * we need to revisit this
1191 */
1192 (void)VFS_START(mp, 0, ctx);
1193
1194 if (mount_list_add(mp) != 0) {
1195 /*
1196 * The system is shutting down trying to umount
1197 * everything, so fail with a plausible errno.
1198 */
1199 error = EBUSY;
1200 goto out4;
1201 }
1202 lck_rw_done(&mp->mnt_rwlock);
1203 is_rwlock_locked = FALSE;
1204
1205 /* Check if this mounted file system supports EAs or named streams. */
1206 /* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */
1207 VFSATTR_INIT(&vfsattr);
1208 VFSATTR_WANTED(&vfsattr, f_capabilities);
1209 if (strncmp(mp->mnt_vfsstat.f_fstypename, "webdav", sizeof("webdav")) != 0 &&
1210 vfs_getattr(mp, &vfsattr, ctx) == 0 &&
1211 VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) {
1212 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR) &&
1213 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR)) {
1214 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
1215 }
1216 #if NAMEDSTREAMS
1217 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS) &&
1218 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS)) {
1219 mp->mnt_kern_flag |= MNTK_NAMED_STREAMS;
1220 }
1221 #endif
1222 /* Check if this file system supports path from id lookups. */
1223 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID) &&
1224 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID)) {
1225 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
1226 } else if (mp->mnt_flag & MNT_DOVOLFS) {
1227 /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */
1228 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
1229 }
1230
1231 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_DIR_HARDLINKS) &&
1232 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_DIR_HARDLINKS)) {
1233 mp->mnt_kern_flag |= MNTK_DIR_HARDLINKS;
1234 }
1235 }
1236 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSNATIVEXATTR) {
1237 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
1238 }
1239 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSPREFLIGHT) {
1240 mp->mnt_kern_flag |= MNTK_UNMOUNT_PREFLIGHT;
1241 }
1242 /* increment the operations count */
1243 OSAddAtomic(1, &vfs_nummntops);
1244 enablequotas(mp, ctx);
1245
1246 if (device_vnode) {
1247 device_vnode->v_specflags |= SI_MOUNTEDON;
1248
1249 /*
1250 * cache the IO attributes for the underlying physical media...
1251 * an error return indicates the underlying driver doesn't
1252 * support all the queries necessary... however, reasonable
1253 * defaults will have been set, so no reason to bail or care
1254 */
1255 vfs_init_io_attributes(device_vnode, mp);
1256 }
1257
1258 /* Now that mount is setup, notify the listeners */
1259 vfs_notify_mount(pvp);
1260 IOBSDMountChange(mp, kIOMountChangeMount);
1261 } else {
1262 /* If we fail a fresh mount, there should be no vnodes left hooked into the mountpoint. */
1263 if (mp->mnt_vnodelist.tqh_first != NULL) {
1264 panic("mount_common(): mount of %s filesystem failed with %d, but vnode list is not empty.",
1265 mp->mnt_vtable->vfc_name, error);
1266 }
1267
1268 vnode_lock_spin(vp);
1269 CLR(vp->v_flag, VMOUNT);
1270 vnode_unlock(vp);
1271 mount_list_lock();
1272 mp->mnt_vtable->vfc_refcount--;
1273 mount_list_unlock();
1274
1275 if (device_vnode) {
1276 vnode_rele(device_vnode);
1277 VNOP_CLOSE(device_vnode, ronly ? FREAD : FREAD | FWRITE, ctx);
1278 }
1279 lck_rw_done(&mp->mnt_rwlock);
1280 is_rwlock_locked = FALSE;
1281
1282 /*
1283 * if we get here, we have a mount structure that needs to be freed,
1284 * but since the coveredvp hasn't yet been updated to point at it,
1285 * no need to worry about other threads holding a crossref on this mp
1286 * so it's ok to just free it
1287 */
1288 mount_lock_destroy(mp);
1289 #if CONFIG_MACF
1290 mac_mount_label_destroy(mp);
1291 #endif
1292 FREE_ZONE(mp, sizeof(struct mount), M_MOUNT);
1293 }
1294 exit:
1295 /*
1296 * drop I/O count on the device vp if there was one
1297 */
1298 if (devpath && devvp) {
1299 vnode_put(devvp);
1300 }
1301
1302 return error;
1303
1304 /* Error condition exits */
1305 out4:
1306 (void)VFS_UNMOUNT(mp, MNT_FORCE, ctx);
1307
1308 /*
1309 * If the mount has been placed on the covered vp,
1310 * it may have been discovered by now, so we have
1311 * to treat this just like an unmount
1312 */
1313 mount_lock_spin(mp);
1314 mp->mnt_lflag |= MNT_LDEAD;
1315 mount_unlock(mp);
1316
1317 if (device_vnode != NULLVP) {
1318 vnode_rele(device_vnode);
1319 VNOP_CLOSE(device_vnode, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD | FWRITE,
1320 ctx);
1321 did_rele = TRUE;
1322 }
1323
1324 vnode_lock_spin(vp);
1325
1326 mp->mnt_crossref++;
1327 vp->v_mountedhere = (mount_t) 0;
1328
1329 vnode_unlock(vp);
1330
1331 if (have_usecount) {
1332 vnode_rele(vp);
1333 }
1334 out3:
1335 if (devpath && ((flags & MNT_UPDATE) == 0) && (!did_rele)) {
1336 vnode_rele(devvp);
1337 }
1338 out2:
1339 if (devpath && devvp) {
1340 vnode_put(devvp);
1341 }
1342 out1:
1343 /* Release mnt_rwlock only when it was taken */
1344 if (is_rwlock_locked == TRUE) {
1345 lck_rw_done(&mp->mnt_rwlock);
1346 }
1347
1348 if (mntalloc) {
1349 if (mp->mnt_crossref) {
1350 mount_dropcrossref(mp, vp, 0);
1351 } else {
1352 mount_lock_destroy(mp);
1353 #if CONFIG_MACF
1354 mac_mount_label_destroy(mp);
1355 #endif
1356 FREE_ZONE(mp, sizeof(struct mount), M_MOUNT);
1357 }
1358 }
1359 if (vfsp_ref) {
1360 mount_list_lock();
1361 vfsp->vfc_refcount--;
1362 mount_list_unlock();
1363 }
1364
1365 return error;
1366 }
1367
1368 /*
1369 * Flush in-core data, check for competing mount attempts,
1370 * and set VMOUNT
1371 */
1372 int
1373 prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth)
1374 {
1375 #if !CONFIG_MACF
1376 #pragma unused(cnp,fsname)
1377 #endif
1378 struct vnode_attr va;
1379 int error;
1380
1381 if (!skip_auth) {
1382 /*
1383 * If the user is not root, ensure that they own the directory
1384 * onto which we are attempting to mount.
1385 */
1386 VATTR_INIT(&va);
1387 VATTR_WANTED(&va, va_uid);
1388 if ((error = vnode_getattr(vp, &va, ctx)) ||
1389 (va.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
1390 (!vfs_context_issuser(ctx)))) {
1391 error = EPERM;
1392 goto out;
1393 }
1394 }
1395
1396 if ((error = VNOP_FSYNC(vp, MNT_WAIT, ctx))) {
1397 goto out;
1398 }
1399
1400 if ((error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0))) {
1401 goto out;
1402 }
1403
1404 if (vp->v_type != VDIR) {
1405 error = ENOTDIR;
1406 goto out;
1407 }
1408
1409 if (ISSET(vp->v_flag, VMOUNT) && (vp->v_mountedhere != NULL)) {
1410 error = EBUSY;
1411 goto out;
1412 }
1413
1414 #if CONFIG_MACF
1415 error = mac_mount_check_mount(ctx, vp,
1416 cnp, fsname);
1417 if (error != 0) {
1418 goto out;
1419 }
1420 #endif
1421
1422 vnode_lock_spin(vp);
1423 SET(vp->v_flag, VMOUNT);
1424 vnode_unlock(vp);
1425
1426 out:
1427 return error;
1428 }
1429
1430 #if CONFIG_IMGSRC_ACCESS
1431
1432 #define DEBUG_IMGSRC 0
1433
1434 #if DEBUG_IMGSRC
1435 #define IMGSRC_DEBUG(args...) printf("imgsrc: " args)
1436 #else
1437 #define IMGSRC_DEBUG(args...) do { } while(0)
1438 #endif
1439
1440 static int
1441 authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx)
1442 {
1443 struct nameidata nd;
1444 vnode_t vp, realdevvp;
1445 mode_t accessmode;
1446 int error;
1447 enum uio_seg uio = UIO_USERSPACE;
1448
1449 if (ctx == vfs_context_kernel()) {
1450 uio = UIO_SYSSPACE;
1451 }
1452
1453 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW, uio, devpath, ctx);
1454 if ((error = namei(&nd))) {
1455 IMGSRC_DEBUG("namei() failed with %d\n", error);
1456 return error;
1457 }
1458
1459 vp = nd.ni_vp;
1460
1461 if (!vnode_isblk(vp)) {
1462 IMGSRC_DEBUG("Not block device.\n");
1463 error = ENOTBLK;
1464 goto out;
1465 }
1466
1467 realdevvp = mp->mnt_devvp;
1468 if (realdevvp == NULLVP) {
1469 IMGSRC_DEBUG("No device backs the mount.\n");
1470 error = ENXIO;
1471 goto out;
1472 }
1473
1474 error = vnode_getwithref(realdevvp);
1475 if (error != 0) {
1476 IMGSRC_DEBUG("Coudn't get iocount on device.\n");
1477 goto out;
1478 }
1479
1480 if (vnode_specrdev(vp) != vnode_specrdev(realdevvp)) {
1481 IMGSRC_DEBUG("Wrong dev_t.\n");
1482 error = ENXIO;
1483 goto out1;
1484 }
1485
1486 strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
1487
1488 /*
1489 * If mount by non-root, then verify that user has necessary
1490 * permissions on the device.
1491 */
1492 if (!vfs_context_issuser(ctx)) {
1493 accessmode = KAUTH_VNODE_READ_DATA;
1494 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
1495 accessmode |= KAUTH_VNODE_WRITE_DATA;
1496 }
1497 if ((error = vnode_authorize(vp, NULL, accessmode, ctx)) != 0) {
1498 IMGSRC_DEBUG("Access denied.\n");
1499 goto out1;
1500 }
1501 }
1502
1503 *devvpp = vp;
1504
1505 out1:
1506 vnode_put(realdevvp);
1507
1508 out:
1509 nameidone(&nd);
1510
1511 if (error) {
1512 vnode_put(vp);
1513 }
1514
1515 return error;
1516 }
1517
1518 /*
1519 * Clear VMOUNT, set v_mountedhere, and mnt_vnodecovered, ref the vnode,
1520 * and call checkdirs()
1521 */
1522 static int
1523 place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx)
1524 {
1525 int error;
1526
1527 mp->mnt_vnodecovered = vp; /* XXX This is normally only set at init-time ... */
1528
1529 IMGSRC_DEBUG("placing: fsname = %s, vp = %s\n",
1530 mp->mnt_vtable->vfc_name, vnode_getname(vp));
1531
1532 vnode_lock_spin(vp);
1533 CLR(vp->v_flag, VMOUNT);
1534 vp->v_mountedhere = mp;
1535 vnode_unlock(vp);
1536
1537 /*
1538 * taking the name_cache_lock exclusively will
1539 * insure that everyone is out of the fast path who
1540 * might be trying to use a now stale copy of
1541 * vp->v_mountedhere->mnt_realrootvp
1542 * bumping mount_generation causes the cached values
1543 * to be invalidated
1544 */
1545 name_cache_lock();
1546 mount_generation++;
1547 name_cache_unlock();
1548
1549 error = vnode_ref(vp);
1550 if (error != 0) {
1551 goto out;
1552 }
1553
1554 error = checkdirs(vp, ctx);
1555 if (error != 0) {
1556 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1557 vnode_rele(vp);
1558 goto out;
1559 }
1560
1561 out:
1562 if (error != 0) {
1563 mp->mnt_vnodecovered = NULLVP;
1564 }
1565 return error;
1566 }
1567
1568 static void
1569 undo_place_on_covered_vp(mount_t mp, vnode_t vp)
1570 {
1571 vnode_rele(vp);
1572 vnode_lock_spin(vp);
1573 vp->v_mountedhere = (mount_t)NULL;
1574 vnode_unlock(vp);
1575
1576 mp->mnt_vnodecovered = NULLVP;
1577 }
1578
1579 static int
1580 mount_begin_update(mount_t mp, vfs_context_t ctx, int flags)
1581 {
1582 int error;
1583
1584 /* unmount in progress return error */
1585 mount_lock_spin(mp);
1586 if (mp->mnt_lflag & MNT_LUNMOUNT) {
1587 mount_unlock(mp);
1588 return EBUSY;
1589 }
1590 mount_unlock(mp);
1591 lck_rw_lock_exclusive(&mp->mnt_rwlock);
1592
1593 /*
1594 * We only allow the filesystem to be reloaded if it
1595 * is currently mounted read-only.
1596 */
1597 if ((flags & MNT_RELOAD) &&
1598 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
1599 error = ENOTSUP;
1600 goto out;
1601 }
1602
1603 /*
1604 * Only root, or the user that did the original mount is
1605 * permitted to update it.
1606 */
1607 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
1608 (!vfs_context_issuser(ctx))) {
1609 error = EPERM;
1610 goto out;
1611 }
1612 #if CONFIG_MACF
1613 error = mac_mount_check_remount(ctx, mp);
1614 if (error != 0) {
1615 goto out;
1616 }
1617 #endif
1618
1619 out:
1620 if (error) {
1621 lck_rw_done(&mp->mnt_rwlock);
1622 }
1623
1624 return error;
1625 }
1626
1627 static void
1628 mount_end_update(mount_t mp)
1629 {
1630 lck_rw_done(&mp->mnt_rwlock);
1631 }
1632
1633 static int
1634 get_imgsrc_rootvnode(uint32_t height, vnode_t *rvpp)
1635 {
1636 vnode_t vp;
1637
1638 if (height >= MAX_IMAGEBOOT_NESTING) {
1639 return EINVAL;
1640 }
1641
1642 vp = imgsrc_rootvnodes[height];
1643 if ((vp != NULLVP) && (vnode_get(vp) == 0)) {
1644 *rvpp = vp;
1645 return 0;
1646 } else {
1647 return ENOENT;
1648 }
1649 }
1650
1651 static int
1652 relocate_imageboot_source(vnode_t pvp, vnode_t vp,
1653 struct componentname *cnp, const char *fsname, vfs_context_t ctx,
1654 boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index)
1655 {
1656 int error;
1657 mount_t mp;
1658 boolean_t placed = FALSE;
1659 struct vfstable *vfsp;
1660 user_addr_t devpath;
1661 char *old_mntonname;
1662 vnode_t rvp;
1663 vnode_t devvp;
1664 uint32_t height;
1665 uint32_t flags;
1666
1667 /* If we didn't imageboot, nothing to move */
1668 if (imgsrc_rootvnodes[0] == NULLVP) {
1669 return EINVAL;
1670 }
1671
1672 /* Only root can do this */
1673 if (!vfs_context_issuser(ctx)) {
1674 return EPERM;
1675 }
1676
1677 IMGSRC_DEBUG("looking for root vnode.\n");
1678
1679 /*
1680 * Get root vnode of filesystem we're moving.
1681 */
1682 if (by_index) {
1683 if (is64bit) {
1684 struct user64_mnt_imgsrc_args mia64;
1685 error = copyin(fsmountargs, &mia64, sizeof(mia64));
1686 if (error != 0) {
1687 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1688 return error;
1689 }
1690
1691 height = mia64.mi_height;
1692 flags = mia64.mi_flags;
1693 devpath = mia64.mi_devpath;
1694 } else {
1695 struct user32_mnt_imgsrc_args mia32;
1696 error = copyin(fsmountargs, &mia32, sizeof(mia32));
1697 if (error != 0) {
1698 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1699 return error;
1700 }
1701
1702 height = mia32.mi_height;
1703 flags = mia32.mi_flags;
1704 devpath = mia32.mi_devpath;
1705 }
1706 } else {
1707 /*
1708 * For binary compatibility--assumes one level of nesting.
1709 */
1710 if (is64bit) {
1711 if ((error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath)))) {
1712 return error;
1713 }
1714 } else {
1715 user32_addr_t tmp;
1716 if ((error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp)))) {
1717 return error;
1718 }
1719
1720 /* munge into LP64 addr */
1721 devpath = CAST_USER_ADDR_T(tmp);
1722 }
1723
1724 height = 0;
1725 flags = 0;
1726 }
1727
1728 if (flags != 0) {
1729 IMGSRC_DEBUG("%s: Got nonzero flags.\n", __FUNCTION__);
1730 return EINVAL;
1731 }
1732
1733 error = get_imgsrc_rootvnode(height, &rvp);
1734 if (error != 0) {
1735 IMGSRC_DEBUG("getting old root vnode failed with %d\n", error);
1736 return error;
1737 }
1738
1739 IMGSRC_DEBUG("got old root vnode\n");
1740
1741 MALLOC(old_mntonname, char*, MAXPATHLEN, M_TEMP, M_WAITOK);
1742
1743 /* Can only move once */
1744 mp = vnode_mount(rvp);
1745 if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
1746 IMGSRC_DEBUG("Already moved.\n");
1747 error = EBUSY;
1748 goto out0;
1749 }
1750
1751 IMGSRC_DEBUG("moving rvp: fsname = %s\n", mp->mnt_vtable->vfc_name);
1752 IMGSRC_DEBUG("Starting updated.\n");
1753
1754 /* Get exclusive rwlock on mount, authorize update on mp */
1755 error = mount_begin_update(mp, ctx, 0);
1756 if (error != 0) {
1757 IMGSRC_DEBUG("Starting updated failed with %d\n", error);
1758 goto out0;
1759 }
1760
1761 /*
1762 * It can only be moved once. Flag is set under the rwlock,
1763 * so we're now safe to proceed.
1764 */
1765 if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
1766 IMGSRC_DEBUG("Already moved [2]\n");
1767 goto out1;
1768 }
1769
1770 IMGSRC_DEBUG("Preparing coveredvp.\n");
1771
1772 /* Mark covered vnode as mount in progress, authorize placing mount on top */
1773 error = prepare_coveredvp(vp, ctx, cnp, fsname, FALSE);
1774 if (error != 0) {
1775 IMGSRC_DEBUG("Preparing coveredvp failed with %d.\n", error);
1776 goto out1;
1777 }
1778
1779 IMGSRC_DEBUG("Covered vp OK.\n");
1780
1781 /* Sanity check the name caller has provided */
1782 vfsp = mp->mnt_vtable;
1783 if (strncmp(vfsp->vfc_name, fsname, MFSNAMELEN) != 0) {
1784 IMGSRC_DEBUG("Wrong fs name: actual = %s, expected = %s\n",
1785 vfsp->vfc_name, fsname);
1786 error = EINVAL;
1787 goto out2;
1788 }
1789
1790 /* Check the device vnode and update mount-from name, for local filesystems */
1791 if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
1792 IMGSRC_DEBUG("Local, doing device validation.\n");
1793
1794 if (devpath != USER_ADDR_NULL) {
1795 error = authorize_devpath_and_update_mntfromname(mp, devpath, &devvp, ctx);
1796 if (error) {
1797 IMGSRC_DEBUG("authorize_devpath_and_update_mntfromname() failed.\n");
1798 goto out2;
1799 }
1800
1801 vnode_put(devvp);
1802 }
1803 }
1804
1805 /*
1806 * Place mp on top of vnode, ref the vnode, call checkdirs(),
1807 * and increment the name cache's mount generation
1808 */
1809
1810 IMGSRC_DEBUG("About to call place_mount_and_checkdirs().\n");
1811 error = place_mount_and_checkdirs(mp, vp, ctx);
1812 if (error != 0) {
1813 goto out2;
1814 }
1815
1816 placed = TRUE;
1817
1818 strlcpy(old_mntonname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN);
1819 strlcpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
1820
1821 /* Forbid future moves */
1822 mount_lock(mp);
1823 mp->mnt_kern_flag |= MNTK_HAS_MOVED;
1824 mount_unlock(mp);
1825
1826 /* Finally, add to mount list, completely ready to go */
1827 if (mount_list_add(mp) != 0) {
1828 /*
1829 * The system is shutting down trying to umount
1830 * everything, so fail with a plausible errno.
1831 */
1832 error = EBUSY;
1833 goto out3;
1834 }
1835
1836 mount_end_update(mp);
1837 vnode_put(rvp);
1838 FREE(old_mntonname, M_TEMP);
1839
1840 vfs_notify_mount(pvp);
1841
1842 return 0;
1843 out3:
1844 strlcpy(mp->mnt_vfsstat.f_mntonname, old_mntonname, MAXPATHLEN);
1845
1846 mount_lock(mp);
1847 mp->mnt_kern_flag &= ~(MNTK_HAS_MOVED);
1848 mount_unlock(mp);
1849
1850 out2:
1851 /*
1852 * Placing the mp on the vnode clears VMOUNT,
1853 * so cleanup is different after that point
1854 */
1855 if (placed) {
1856 /* Rele the vp, clear VMOUNT and v_mountedhere */
1857 undo_place_on_covered_vp(mp, vp);
1858 } else {
1859 vnode_lock_spin(vp);
1860 CLR(vp->v_flag, VMOUNT);
1861 vnode_unlock(vp);
1862 }
1863 out1:
1864 mount_end_update(mp);
1865
1866 out0:
1867 vnode_put(rvp);
1868 FREE(old_mntonname, M_TEMP);
1869 return error;
1870 }
1871
1872 #if CONFIG_LOCKERBOOT
1873 __private_extern__
1874 int
1875 mount_locker_protoboot(const char *fsname, const char *mntpoint,
1876 const char *pbdevpath)
1877 {
1878 int error = -1;
1879 struct nameidata nd;
1880 boolean_t cleanup_nd = FALSE;
1881 vfs_context_t ctx = vfs_context_kernel();
1882 boolean_t is64 = TRUE;
1883 boolean_t by_index = TRUE;
1884 struct user64_mnt_imgsrc_args mia64 = {
1885 .mi_height = 0,
1886 .mi_flags = 0,
1887 .mi_devpath = CAST_USER_ADDR_T(pbdevpath),
1888 };
1889 user_addr_t mia64addr = CAST_USER_ADDR_T(&mia64);
1890
1891 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
1892 UIO_SYSSPACE, CAST_USER_ADDR_T(mntpoint), ctx);
1893 error = namei(&nd);
1894 if (error) {
1895 IMGSRC_DEBUG("namei: %d\n", error);
1896 goto out;
1897 }
1898
1899 cleanup_nd = TRUE;
1900 error = relocate_imageboot_source(nd.ni_dvp, nd.ni_vp,
1901 &nd.ni_cnd, fsname, ctx, is64, mia64addr, by_index);
1902
1903 out:
1904 if (cleanup_nd) {
1905 int stashed = error;
1906
1907 error = vnode_put(nd.ni_vp);
1908 if (error) {
1909 panic("vnode_put() returned non-zero: %d", error);
1910 }
1911
1912 if (nd.ni_dvp) {
1913 error = vnode_put(nd.ni_dvp);
1914 if (error) {
1915 panic("vnode_put() returned non-zero: %d", error);
1916 }
1917 }
1918 nameidone(&nd);
1919
1920 error = stashed;
1921 }
1922 return error;
1923 }
1924 #endif /* CONFIG_LOCKERBOOT */
1925 #endif /* CONFIG_IMGSRC_ACCESS */
1926
1927 void
1928 enablequotas(struct mount *mp, vfs_context_t ctx)
1929 {
1930 struct nameidata qnd;
1931 int type;
1932 char qfpath[MAXPATHLEN];
1933 const char *qfname = QUOTAFILENAME;
1934 const char *qfopsname = QUOTAOPSNAME;
1935 const char *qfextension[] = INITQFNAMES;
1936
1937 /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
1938 if (strncmp(mp->mnt_vfsstat.f_fstypename, "hfs", sizeof("hfs")) != 0) {
1939 return;
1940 }
1941 /*
1942 * Enable filesystem disk quotas if necessary.
1943 * We ignore errors as this should not interfere with final mount
1944 */
1945 for (type = 0; type < MAXQUOTAS; type++) {
1946 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfopsname, qfextension[type]);
1947 NDINIT(&qnd, LOOKUP, OP_MOUNT, FOLLOW, UIO_SYSSPACE,
1948 CAST_USER_ADDR_T(qfpath), ctx);
1949 if (namei(&qnd) != 0) {
1950 continue; /* option file to trigger quotas is not present */
1951 }
1952 vnode_put(qnd.ni_vp);
1953 nameidone(&qnd);
1954 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfname, qfextension[type]);
1955
1956 (void) VFS_QUOTACTL(mp, QCMD(Q_QUOTAON, type), 0, qfpath, ctx);
1957 }
1958 return;
1959 }
1960
1961
1962 static int
1963 checkdirs_callback(proc_t p, void * arg)
1964 {
1965 struct cdirargs * cdrp = (struct cdirargs *)arg;
1966 vnode_t olddp = cdrp->olddp;
1967 vnode_t newdp = cdrp->newdp;
1968 struct filedesc *fdp;
1969 vnode_t new_cvp = newdp;
1970 vnode_t new_rvp = newdp;
1971 vnode_t old_cvp = NULL;
1972 vnode_t old_rvp = NULL;
1973
1974 /*
1975 * XXX Also needs to iterate each thread in the process to see if it
1976 * XXX is using a per-thread current working directory, and, if so,
1977 * XXX update that as well.
1978 */
1979
1980 /*
1981 * First, with the proc_fdlock held, check to see if we will need
1982 * to do any work. If not, we will get out fast.
1983 */
1984 proc_fdlock(p);
1985 fdp = p->p_fd;
1986 if (fdp == NULL ||
1987 (fdp->fd_cdir != olddp && fdp->fd_rdir != olddp)) {
1988 proc_fdunlock(p);
1989 return PROC_RETURNED;
1990 }
1991 proc_fdunlock(p);
1992
1993 /*
1994 * Ok, we will have to do some work. Always take two refs
1995 * because we might need that many. We'll dispose of whatever
1996 * we ended up not using.
1997 */
1998 if (vnode_ref(newdp) != 0) {
1999 return PROC_RETURNED;
2000 }
2001 if (vnode_ref(newdp) != 0) {
2002 vnode_rele(newdp);
2003 return PROC_RETURNED;
2004 }
2005
2006 /*
2007 * Now do the work. Note: we dropped the proc_fdlock, so we
2008 * have to do all of the checks again.
2009 */
2010 proc_fdlock(p);
2011 fdp = p->p_fd;
2012 if (fdp != NULL) {
2013 if (fdp->fd_cdir == olddp) {
2014 old_cvp = olddp;
2015 fdp->fd_cdir = newdp;
2016 new_cvp = NULL;
2017 }
2018 if (fdp->fd_rdir == olddp) {
2019 old_rvp = olddp;
2020 fdp->fd_rdir = newdp;
2021 new_rvp = NULL;
2022 }
2023 }
2024 proc_fdunlock(p);
2025
2026 /*
2027 * Dispose of any references that are no longer needed.
2028 */
2029 if (old_cvp != NULL) {
2030 vnode_rele(old_cvp);
2031 }
2032 if (old_rvp != NULL) {
2033 vnode_rele(old_rvp);
2034 }
2035 if (new_cvp != NULL) {
2036 vnode_rele(new_cvp);
2037 }
2038 if (new_rvp != NULL) {
2039 vnode_rele(new_rvp);
2040 }
2041
2042 return PROC_RETURNED;
2043 }
2044
2045
2046
2047 /*
2048 * Scan all active processes to see if any of them have a current
2049 * or root directory onto which the new filesystem has just been
2050 * mounted. If so, replace them with the new mount point.
2051 */
2052 static int
2053 checkdirs(vnode_t olddp, vfs_context_t ctx)
2054 {
2055 vnode_t newdp;
2056 vnode_t tvp;
2057 int err;
2058 struct cdirargs cdr;
2059
2060 if (olddp->v_usecount == 1) {
2061 return 0;
2062 }
2063 err = VFS_ROOT(olddp->v_mountedhere, &newdp, ctx);
2064
2065 if (err != 0) {
2066 #if DIAGNOSTIC
2067 panic("mount: lost mount: error %d", err);
2068 #endif
2069 return err;
2070 }
2071
2072 cdr.olddp = olddp;
2073 cdr.newdp = newdp;
2074 /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
2075 proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS, checkdirs_callback, (void *)&cdr, NULL, NULL);
2076
2077 if (rootvnode == olddp) {
2078 vnode_ref(newdp);
2079 tvp = rootvnode;
2080 rootvnode = newdp;
2081 vnode_rele(tvp);
2082 }
2083
2084 vnode_put(newdp);
2085 return 0;
2086 }
2087
2088 /*
2089 * Unmount a file system.
2090 *
2091 * Note: unmount takes a path to the vnode mounted on as argument,
2092 * not special file (as before).
2093 */
2094 /* ARGSUSED */
2095 int
2096 unmount(__unused proc_t p, struct unmount_args *uap, __unused int32_t *retval)
2097 {
2098 vnode_t vp;
2099 struct mount *mp;
2100 int error;
2101 struct nameidata nd;
2102 vfs_context_t ctx = vfs_context_current();
2103
2104 NDINIT(&nd, LOOKUP, OP_UNMOUNT, FOLLOW | AUDITVNPATH1,
2105 UIO_USERSPACE, uap->path, ctx);
2106 error = namei(&nd);
2107 if (error) {
2108 return error;
2109 }
2110 vp = nd.ni_vp;
2111 mp = vp->v_mount;
2112 nameidone(&nd);
2113
2114 #if CONFIG_MACF
2115 error = mac_mount_check_umount(ctx, mp);
2116 if (error != 0) {
2117 vnode_put(vp);
2118 return error;
2119 }
2120 #endif
2121 /*
2122 * Must be the root of the filesystem
2123 */
2124 if ((vp->v_flag & VROOT) == 0) {
2125 vnode_put(vp);
2126 return EINVAL;
2127 }
2128 mount_ref(mp, 0);
2129 vnode_put(vp);
2130 /* safedounmount consumes the mount ref */
2131 return safedounmount(mp, uap->flags, ctx);
2132 }
2133
2134 int
2135 vfs_unmountbyfsid(fsid_t *fsid, int flags, vfs_context_t ctx)
2136 {
2137 mount_t mp;
2138
2139 mp = mount_list_lookupby_fsid(fsid, 0, 1);
2140 if (mp == (mount_t)0) {
2141 return ENOENT;
2142 }
2143 mount_ref(mp, 0);
2144 mount_iterdrop(mp);
2145 /* safedounmount consumes the mount ref */
2146 return safedounmount(mp, flags, ctx);
2147 }
2148
2149
2150 /*
2151 * The mount struct comes with a mount ref which will be consumed.
2152 * Do the actual file system unmount, prevent some common foot shooting.
2153 */
2154 int
2155 safedounmount(struct mount *mp, int flags, vfs_context_t ctx)
2156 {
2157 int error;
2158 proc_t p = vfs_context_proc(ctx);
2159
2160 /*
2161 * If the file system is not responding and MNT_NOBLOCK
2162 * is set and not a forced unmount then return EBUSY.
2163 */
2164 if ((mp->mnt_kern_flag & MNT_LNOTRESP) &&
2165 (flags & MNT_NOBLOCK) && ((flags & MNT_FORCE) == 0)) {
2166 error = EBUSY;
2167 goto out;
2168 }
2169
2170 /*
2171 * Skip authorization if the mount is tagged as permissive and
2172 * this is not a forced-unmount attempt.
2173 */
2174 if (!(((mp->mnt_kern_flag & MNTK_PERMIT_UNMOUNT) != 0) && ((flags & MNT_FORCE) == 0))) {
2175 /*
2176 * Only root, or the user that did the original mount is
2177 * permitted to unmount this filesystem.
2178 */
2179 if ((mp->mnt_vfsstat.f_owner != kauth_cred_getuid(kauth_cred_get())) &&
2180 (error = suser(kauth_cred_get(), &p->p_acflag))) {
2181 goto out;
2182 }
2183 }
2184 /*
2185 * Don't allow unmounting the root file system (or the associated VM or DATA mounts) .
2186 */
2187 if ((mp->mnt_flag & MNT_ROOTFS) || (mp->mnt_kern_flag & MNTK_SYSTEM)) {
2188 error = EBUSY; /* the root (or associated volumes) is always busy */
2189 goto out;
2190 }
2191
2192 #ifdef CONFIG_IMGSRC_ACCESS
2193 if (mp->mnt_kern_flag & MNTK_BACKS_ROOT) {
2194 error = EBUSY;
2195 goto out;
2196 }
2197 #endif /* CONFIG_IMGSRC_ACCESS */
2198
2199 return dounmount(mp, flags, 1, ctx);
2200
2201 out:
2202 mount_drop(mp, 0);
2203 return error;
2204 }
2205
2206 /*
2207 * Do the actual file system unmount.
2208 */
2209 int
2210 dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx)
2211 {
2212 vnode_t coveredvp = (vnode_t)0;
2213 int error;
2214 int needwakeup = 0;
2215 int forcedunmount = 0;
2216 int lflags = 0;
2217 struct vnode *devvp = NULLVP;
2218 #if CONFIG_TRIGGERS
2219 proc_t p = vfs_context_proc(ctx);
2220 int did_vflush = 0;
2221 int pflags_save = 0;
2222 #endif /* CONFIG_TRIGGERS */
2223
2224 #if CONFIG_FSE
2225 if (!(flags & MNT_FORCE)) {
2226 fsevent_unmount(mp, ctx); /* has to come first! */
2227 }
2228 #endif
2229
2230 mount_lock(mp);
2231
2232 /*
2233 * If already an unmount in progress just return EBUSY.
2234 * Even a forced unmount cannot override.
2235 */
2236 if (mp->mnt_lflag & MNT_LUNMOUNT) {
2237 if (withref != 0) {
2238 mount_drop(mp, 1);
2239 }
2240 mount_unlock(mp);
2241 return EBUSY;
2242 }
2243
2244 if (flags & MNT_FORCE) {
2245 forcedunmount = 1;
2246 mp->mnt_lflag |= MNT_LFORCE;
2247 }
2248
2249 #if CONFIG_TRIGGERS
2250 if (flags & MNT_NOBLOCK && p != kernproc) {
2251 pflags_save = OSBitOrAtomic(P_NOREMOTEHANG, &p->p_flag);
2252 }
2253 #endif
2254
2255 mp->mnt_kern_flag |= MNTK_UNMOUNT;
2256 mp->mnt_lflag |= MNT_LUNMOUNT;
2257 mp->mnt_flag &= ~MNT_ASYNC;
2258 /*
2259 * anyone currently in the fast path that
2260 * trips over the cached rootvp will be
2261 * dumped out and forced into the slow path
2262 * to regenerate a new cached value
2263 */
2264 mp->mnt_realrootvp = NULLVP;
2265 mount_unlock(mp);
2266
2267 if (forcedunmount && (flags & MNT_LNOSUB) == 0) {
2268 /*
2269 * Force unmount any mounts in this filesystem.
2270 * If any unmounts fail - just leave them dangling.
2271 * Avoids recursion.
2272 */
2273 (void) dounmount_submounts(mp, flags | MNT_LNOSUB, ctx);
2274 }
2275
2276 /*
2277 * taking the name_cache_lock exclusively will
2278 * insure that everyone is out of the fast path who
2279 * might be trying to use a now stale copy of
2280 * vp->v_mountedhere->mnt_realrootvp
2281 * bumping mount_generation causes the cached values
2282 * to be invalidated
2283 */
2284 name_cache_lock();
2285 mount_generation++;
2286 name_cache_unlock();
2287
2288
2289 lck_rw_lock_exclusive(&mp->mnt_rwlock);
2290 if (withref != 0) {
2291 mount_drop(mp, 0);
2292 }
2293 error = 0;
2294 if (forcedunmount == 0) {
2295 ubc_umount(mp); /* release cached vnodes */
2296 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
2297 error = VFS_SYNC(mp, MNT_WAIT, ctx);
2298 if (error) {
2299 mount_lock(mp);
2300 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
2301 mp->mnt_lflag &= ~MNT_LUNMOUNT;
2302 mp->mnt_lflag &= ~MNT_LFORCE;
2303 goto out;
2304 }
2305 }
2306 }
2307
2308 IOBSDMountChange(mp, kIOMountChangeUnmount);
2309
2310 #if CONFIG_TRIGGERS
2311 vfs_nested_trigger_unmounts(mp, flags, ctx);
2312 did_vflush = 1;
2313 #endif
2314 if (forcedunmount) {
2315 lflags |= FORCECLOSE;
2316 }
2317 error = vflush(mp, NULLVP, SKIPSWAP | SKIPSYSTEM | SKIPROOT | lflags);
2318 if ((forcedunmount == 0) && error) {
2319 mount_lock(mp);
2320 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
2321 mp->mnt_lflag &= ~MNT_LUNMOUNT;
2322 mp->mnt_lflag &= ~MNT_LFORCE;
2323 goto out;
2324 }
2325
2326 /* make sure there are no one in the mount iterations or lookup */
2327 mount_iterdrain(mp);
2328
2329 error = VFS_UNMOUNT(mp, flags, ctx);
2330 if (error) {
2331 mount_iterreset(mp);
2332 mount_lock(mp);
2333 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
2334 mp->mnt_lflag &= ~MNT_LUNMOUNT;
2335 mp->mnt_lflag &= ~MNT_LFORCE;
2336 goto out;
2337 }
2338
2339 /* increment the operations count */
2340 if (!error) {
2341 OSAddAtomic(1, &vfs_nummntops);
2342 }
2343
2344 if (mp->mnt_devvp && mp->mnt_vtable->vfc_vfsflags & VFC_VFSLOCALARGS) {
2345 /* hold an io reference and drop the usecount before close */
2346 devvp = mp->mnt_devvp;
2347 vnode_getalways(devvp);
2348 vnode_rele(devvp);
2349 VNOP_CLOSE(devvp, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD | FWRITE,
2350 ctx);
2351 vnode_clearmountedon(devvp);
2352 vnode_put(devvp);
2353 }
2354 lck_rw_done(&mp->mnt_rwlock);
2355 mount_list_remove(mp);
2356 lck_rw_lock_exclusive(&mp->mnt_rwlock);
2357
2358 /* mark the mount point hook in the vp but not drop the ref yet */
2359 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
2360 /*
2361 * The covered vnode needs special handling. Trying to get an
2362 * iocount must not block here as this may lead to deadlocks
2363 * if the Filesystem to which the covered vnode belongs is
2364 * undergoing forced unmounts. Since we hold a usecount, the
2365 * vnode cannot be reused (it can, however, still be terminated)
2366 */
2367 vnode_getalways(coveredvp);
2368 vnode_lock_spin(coveredvp);
2369
2370 mp->mnt_crossref++;
2371 coveredvp->v_mountedhere = (struct mount *)0;
2372 CLR(coveredvp->v_flag, VMOUNT);
2373
2374 vnode_unlock(coveredvp);
2375 vnode_put(coveredvp);
2376 }
2377
2378 mount_list_lock();
2379 mp->mnt_vtable->vfc_refcount--;
2380 mount_list_unlock();
2381
2382 cache_purgevfs(mp); /* remove cache entries for this file sys */
2383 vfs_event_signal(NULL, VQ_UNMOUNT, (intptr_t)NULL);
2384 mount_lock(mp);
2385 mp->mnt_lflag |= MNT_LDEAD;
2386
2387 if (mp->mnt_lflag & MNT_LWAIT) {
2388 /*
2389 * do the wakeup here
2390 * in case we block in mount_refdrain
2391 * which will drop the mount lock
2392 * and allow anyone blocked in vfs_busy
2393 * to wakeup and see the LDEAD state
2394 */
2395 mp->mnt_lflag &= ~MNT_LWAIT;
2396 wakeup((caddr_t)mp);
2397 }
2398 mount_refdrain(mp);
2399
2400 /* free disk_conditioner_info structure for this mount */
2401 disk_conditioner_unmount(mp);
2402
2403 out:
2404 if (mp->mnt_lflag & MNT_LWAIT) {
2405 mp->mnt_lflag &= ~MNT_LWAIT;
2406 needwakeup = 1;
2407 }
2408
2409 #if CONFIG_TRIGGERS
2410 if (flags & MNT_NOBLOCK && p != kernproc) {
2411 // Restore P_NOREMOTEHANG bit to its previous value
2412 if ((pflags_save & P_NOREMOTEHANG) == 0) {
2413 OSBitAndAtomic(~((uint32_t) P_NOREMOTEHANG), &p->p_flag);
2414 }
2415 }
2416
2417 /*
2418 * Callback and context are set together under the mount lock, and
2419 * never cleared, so we're safe to examine them here, drop the lock,
2420 * and call out.
2421 */
2422 if (mp->mnt_triggercallback != NULL) {
2423 mount_unlock(mp);
2424 if (error == 0) {
2425 mp->mnt_triggercallback(mp, VTC_RELEASE, mp->mnt_triggerdata, ctx);
2426 } else if (did_vflush) {
2427 mp->mnt_triggercallback(mp, VTC_REPLACE, mp->mnt_triggerdata, ctx);
2428 }
2429 } else {
2430 mount_unlock(mp);
2431 }
2432 #else
2433 mount_unlock(mp);
2434 #endif /* CONFIG_TRIGGERS */
2435
2436 lck_rw_done(&mp->mnt_rwlock);
2437
2438 if (needwakeup) {
2439 wakeup((caddr_t)mp);
2440 }
2441
2442 if (!error) {
2443 if ((coveredvp != NULLVP)) {
2444 vnode_t pvp = NULLVP;
2445
2446 /*
2447 * The covered vnode needs special handling. Trying to
2448 * get an iocount must not block here as this may lead
2449 * to deadlocks if the Filesystem to which the covered
2450 * vnode belongs is undergoing forced unmounts. Since we
2451 * hold a usecount, the vnode cannot be reused
2452 * (it can, however, still be terminated).
2453 */
2454 vnode_getalways(coveredvp);
2455
2456 mount_dropcrossref(mp, coveredvp, 0);
2457 /*
2458 * We'll _try_ to detect if this really needs to be
2459 * done. The coveredvp can only be in termination (or
2460 * terminated) if the coveredvp's mount point is in a
2461 * forced unmount (or has been) since we still hold the
2462 * ref.
2463 */
2464 if (!vnode_isrecycled(coveredvp)) {
2465 pvp = vnode_getparent(coveredvp);
2466 #if CONFIG_TRIGGERS
2467 if (coveredvp->v_resolve) {
2468 vnode_trigger_rearm(coveredvp, ctx);
2469 }
2470 #endif
2471 }
2472
2473 vnode_rele(coveredvp);
2474 vnode_put(coveredvp);
2475 coveredvp = NULLVP;
2476
2477 if (pvp) {
2478 lock_vnode_and_post(pvp, NOTE_WRITE);
2479 vnode_put(pvp);
2480 }
2481 } else if (mp->mnt_flag & MNT_ROOTFS) {
2482 mount_lock_destroy(mp);
2483 #if CONFIG_MACF
2484 mac_mount_label_destroy(mp);
2485 #endif
2486 FREE_ZONE(mp, sizeof(struct mount), M_MOUNT);
2487 } else {
2488 panic("dounmount: no coveredvp");
2489 }
2490 }
2491 return error;
2492 }
2493
2494 /*
2495 * Unmount any mounts in this filesystem.
2496 */
2497 void
2498 dounmount_submounts(struct mount *mp, int flags, vfs_context_t ctx)
2499 {
2500 mount_t smp;
2501 fsid_t *fsids, fsid;
2502 int fsids_sz;
2503 int count = 0, i, m = 0;
2504 vnode_t vp;
2505
2506 mount_list_lock();
2507
2508 // Get an array to hold the submounts fsids.
2509 TAILQ_FOREACH(smp, &mountlist, mnt_list)
2510 count++;
2511 fsids_sz = count * sizeof(fsid_t);
2512 MALLOC(fsids, fsid_t *, fsids_sz, M_TEMP, M_NOWAIT);
2513 if (fsids == NULL) {
2514 mount_list_unlock();
2515 goto out;
2516 }
2517 fsids[0] = mp->mnt_vfsstat.f_fsid; // Prime the pump
2518
2519 /*
2520 * Fill the array with submount fsids.
2521 * Since mounts are always added to the tail of the mount list, the
2522 * list is always in mount order.
2523 * For each mount check if the mounted-on vnode belongs to a
2524 * mount that's already added to our array of mounts to be unmounted.
2525 */
2526 for (smp = TAILQ_NEXT(mp, mnt_list); smp; smp = TAILQ_NEXT(smp, mnt_list)) {
2527 vp = smp->mnt_vnodecovered;
2528 if (vp == NULL) {
2529 continue;
2530 }
2531 fsid = vnode_mount(vp)->mnt_vfsstat.f_fsid; // Underlying fsid
2532 for (i = 0; i <= m; i++) {
2533 if (fsids[i].val[0] == fsid.val[0] &&
2534 fsids[i].val[1] == fsid.val[1]) {
2535 fsids[++m] = smp->mnt_vfsstat.f_fsid;
2536 break;
2537 }
2538 }
2539 }
2540 mount_list_unlock();
2541
2542 // Unmount the submounts in reverse order. Ignore errors.
2543 for (i = m; i > 0; i--) {
2544 smp = mount_list_lookupby_fsid(&fsids[i], 0, 1);
2545 if (smp) {
2546 mount_ref(smp, 0);
2547 mount_iterdrop(smp);
2548 (void) dounmount(smp, flags, 1, ctx);
2549 }
2550 }
2551 out:
2552 if (fsids) {
2553 FREE(fsids, M_TEMP);
2554 }
2555 }
2556
2557 void
2558 mount_dropcrossref(mount_t mp, vnode_t dp, int need_put)
2559 {
2560 vnode_lock(dp);
2561 mp->mnt_crossref--;
2562
2563 if (mp->mnt_crossref < 0) {
2564 panic("mount cross refs -ve");
2565 }
2566
2567 if ((mp != dp->v_mountedhere) && (mp->mnt_crossref == 0)) {
2568 if (need_put) {
2569 vnode_put_locked(dp);
2570 }
2571 vnode_unlock(dp);
2572
2573 mount_lock_destroy(mp);
2574 #if CONFIG_MACF
2575 mac_mount_label_destroy(mp);
2576 #endif
2577 FREE_ZONE(mp, sizeof(struct mount), M_MOUNT);
2578 return;
2579 }
2580 if (need_put) {
2581 vnode_put_locked(dp);
2582 }
2583 vnode_unlock(dp);
2584 }
2585
2586
2587 /*
2588 * Sync each mounted filesystem.
2589 */
2590 #if DIAGNOSTIC
2591 int syncprt = 0;
2592 #endif
2593
2594 int print_vmpage_stat = 0;
2595
2596 /*
2597 * sync_callback: simple wrapper that calls VFS_SYNC() on volumes
2598 * mounted read-write with the passed waitfor value.
2599 *
2600 * Parameters: mp mount-point descriptor per mounted file-system instance.
2601 * arg user argument (please see below)
2602 *
2603 * User argument is a pointer to 32 bit unsigned integer which describes the
2604 * type of waitfor value to set for calling VFS_SYNC(). If user argument is
2605 * passed as NULL, VFS_SYNC() is called with MNT_NOWAIT set as the default
2606 * waitfor value.
2607 *
2608 * Returns: VFS_RETURNED
2609 */
2610 static int
2611 sync_callback(mount_t mp, void *arg)
2612 {
2613 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
2614 int asyncflag = mp->mnt_flag & MNT_ASYNC;
2615 unsigned waitfor = MNT_NOWAIT;
2616
2617 if (arg) {
2618 waitfor = *(uint32_t*)arg;
2619 }
2620
2621 /* Sanity check for flags - these are the only valid combinations for the flag bits*/
2622 if (waitfor != MNT_WAIT &&
2623 waitfor != (MNT_WAIT | MNT_VOLUME) &&
2624 waitfor != MNT_NOWAIT &&
2625 waitfor != (MNT_NOWAIT | MNT_VOLUME) &&
2626 waitfor != MNT_DWAIT &&
2627 waitfor != (MNT_DWAIT | MNT_VOLUME)) {
2628 panic("Passed inappropriate waitfor %u to "
2629 "sync_callback()", waitfor);
2630 }
2631
2632 mp->mnt_flag &= ~MNT_ASYNC;
2633 (void)VFS_SYNC(mp, waitfor, vfs_context_kernel());
2634 if (asyncflag) {
2635 mp->mnt_flag |= MNT_ASYNC;
2636 }
2637 }
2638
2639 return VFS_RETURNED;
2640 }
2641
2642 /* ARGSUSED */
2643 int
2644 sync(__unused proc_t p, __unused struct sync_args *uap, __unused int32_t *retval)
2645 {
2646 vfs_iterate(LK_NOWAIT, sync_callback, NULL);
2647
2648 if (print_vmpage_stat) {
2649 vm_countdirtypages();
2650 }
2651
2652 #if DIAGNOSTIC
2653 if (syncprt) {
2654 vfs_bufstats();
2655 }
2656 #endif /* DIAGNOSTIC */
2657 return 0;
2658 }
2659
2660 typedef enum {
2661 SYNC_ALL = 0,
2662 SYNC_ONLY_RELIABLE_MEDIA = 1,
2663 SYNC_ONLY_UNRELIABLE_MEDIA = 2
2664 } sync_type_t;
2665
2666 static int
2667 sync_internal_callback(mount_t mp, void *arg)
2668 {
2669 if (arg) {
2670 int is_reliable = !(mp->mnt_kern_flag & MNTK_VIRTUALDEV) &&
2671 (mp->mnt_flag & MNT_LOCAL);
2672 sync_type_t sync_type = *((sync_type_t *)arg);
2673
2674 if ((sync_type == SYNC_ONLY_RELIABLE_MEDIA) && !is_reliable) {
2675 return VFS_RETURNED;
2676 } else if ((sync_type == SYNC_ONLY_UNRELIABLE_MEDIA) && is_reliable) {
2677 return VFS_RETURNED;
2678 }
2679 }
2680
2681 (void)sync_callback(mp, NULL);
2682
2683 return VFS_RETURNED;
2684 }
2685
2686 int sync_thread_state = 0;
2687 int sync_timeout_seconds = 5;
2688
2689 #define SYNC_THREAD_RUN 0x0001
2690 #define SYNC_THREAD_RUNNING 0x0002
2691
2692 static void
2693 sync_thread(__unused void *arg, __unused wait_result_t wr)
2694 {
2695 sync_type_t sync_type;
2696
2697 lck_mtx_lock(sync_mtx_lck);
2698 while (sync_thread_state & SYNC_THREAD_RUN) {
2699 sync_thread_state &= ~SYNC_THREAD_RUN;
2700 lck_mtx_unlock(sync_mtx_lck);
2701
2702 sync_type = SYNC_ONLY_RELIABLE_MEDIA;
2703 vfs_iterate(LK_NOWAIT, sync_internal_callback, &sync_type);
2704 sync_type = SYNC_ONLY_UNRELIABLE_MEDIA;
2705 vfs_iterate(LK_NOWAIT, sync_internal_callback, &sync_type);
2706
2707 lck_mtx_lock(sync_mtx_lck);
2708 }
2709 /*
2710 * This wakeup _has_ to be issued before the lock is released otherwise
2711 * we may end up waking up a thread in sync_internal which is
2712 * expecting a wakeup from a thread it just created and not from this
2713 * thread which is about to exit.
2714 */
2715 wakeup(&sync_thread_state);
2716 sync_thread_state &= ~SYNC_THREAD_RUNNING;
2717 lck_mtx_unlock(sync_mtx_lck);
2718
2719 if (print_vmpage_stat) {
2720 vm_countdirtypages();
2721 }
2722
2723 #if DIAGNOSTIC
2724 if (syncprt) {
2725 vfs_bufstats();
2726 }
2727 #endif /* DIAGNOSTIC */
2728 }
2729
2730 struct timeval sync_timeout_last_print = {.tv_sec = 0, .tv_usec = 0};
2731
2732 /*
2733 * An in-kernel sync for power management to call.
2734 * This function always returns within sync_timeout seconds.
2735 */
2736 __private_extern__ int
2737 sync_internal(void)
2738 {
2739 thread_t thd;
2740 int error;
2741 int thread_created = FALSE;
2742 struct timespec ts = {.tv_sec = sync_timeout_seconds, .tv_nsec = 0};
2743
2744 lck_mtx_lock(sync_mtx_lck);
2745 sync_thread_state |= SYNC_THREAD_RUN;
2746 if (!(sync_thread_state & SYNC_THREAD_RUNNING)) {
2747 int kr;
2748
2749 sync_thread_state |= SYNC_THREAD_RUNNING;
2750 kr = kernel_thread_start(sync_thread, NULL, &thd);
2751 if (kr != KERN_SUCCESS) {
2752 sync_thread_state &= ~SYNC_THREAD_RUNNING;
2753 lck_mtx_unlock(sync_mtx_lck);
2754 printf("sync_thread failed\n");
2755 return 0;
2756 }
2757 thread_created = TRUE;
2758 }
2759
2760 error = msleep((caddr_t)&sync_thread_state, sync_mtx_lck,
2761 (PVFS | PDROP | PCATCH), "sync_thread", &ts);
2762 if (error) {
2763 struct timeval now;
2764
2765 microtime(&now);
2766 if (now.tv_sec - sync_timeout_last_print.tv_sec > 120) {
2767 printf("sync timed out: %d sec\n", sync_timeout_seconds);
2768 sync_timeout_last_print.tv_sec = now.tv_sec;
2769 }
2770 }
2771
2772 if (thread_created) {
2773 thread_deallocate(thd);
2774 }
2775
2776 return 0;
2777 } /* end of sync_internal call */
2778
2779 /*
2780 * Change filesystem quotas.
2781 */
2782 #if QUOTA
2783 int
2784 quotactl(proc_t p, struct quotactl_args *uap, __unused int32_t *retval)
2785 {
2786 struct mount *mp;
2787 int error, quota_cmd, quota_status = 0;
2788 caddr_t datap;
2789 size_t fnamelen;
2790 struct nameidata nd;
2791 vfs_context_t ctx = vfs_context_current();
2792 struct dqblk my_dqblk = {};
2793
2794 AUDIT_ARG(uid, uap->uid);
2795 AUDIT_ARG(cmd, uap->cmd);
2796 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
2797 uap->path, ctx);
2798 error = namei(&nd);
2799 if (error) {
2800 return error;
2801 }
2802 mp = nd.ni_vp->v_mount;
2803 vnode_put(nd.ni_vp);
2804 nameidone(&nd);
2805
2806 /* copyin any data we will need for downstream code */
2807 quota_cmd = uap->cmd >> SUBCMDSHIFT;
2808
2809 switch (quota_cmd) {
2810 case Q_QUOTAON:
2811 /* uap->arg specifies a file from which to take the quotas */
2812 fnamelen = MAXPATHLEN;
2813 datap = kalloc(MAXPATHLEN);
2814 error = copyinstr(uap->arg, datap, MAXPATHLEN, &fnamelen);
2815 break;
2816 case Q_GETQUOTA:
2817 /* uap->arg is a pointer to a dqblk structure. */
2818 datap = (caddr_t) &my_dqblk;
2819 break;
2820 case Q_SETQUOTA:
2821 case Q_SETUSE:
2822 /* uap->arg is a pointer to a dqblk structure. */
2823 datap = (caddr_t) &my_dqblk;
2824 if (proc_is64bit(p)) {
2825 struct user_dqblk my_dqblk64;
2826 error = copyin(uap->arg, (caddr_t)&my_dqblk64, sizeof(my_dqblk64));
2827 if (error == 0) {
2828 munge_dqblk(&my_dqblk, &my_dqblk64, FALSE);
2829 }
2830 } else {
2831 error = copyin(uap->arg, (caddr_t)&my_dqblk, sizeof(my_dqblk));
2832 }
2833 break;
2834 case Q_QUOTASTAT:
2835 /* uap->arg is a pointer to an integer */
2836 datap = (caddr_t) &quota_status;
2837 break;
2838 default:
2839 datap = NULL;
2840 break;
2841 } /* switch */
2842
2843 if (error == 0) {
2844 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, datap, ctx);
2845 }
2846
2847 switch (quota_cmd) {
2848 case Q_QUOTAON:
2849 if (datap != NULL) {
2850 kfree(datap, MAXPATHLEN);
2851 }
2852 break;
2853 case Q_GETQUOTA:
2854 /* uap->arg is a pointer to a dqblk structure we need to copy out to */
2855 if (error == 0) {
2856 if (proc_is64bit(p)) {
2857 struct user_dqblk my_dqblk64;
2858
2859 memset(&my_dqblk64, 0, sizeof(my_dqblk64));
2860 munge_dqblk(&my_dqblk, &my_dqblk64, TRUE);
2861 error = copyout((caddr_t)&my_dqblk64, uap->arg, sizeof(my_dqblk64));
2862 } else {
2863 error = copyout(datap, uap->arg, sizeof(struct dqblk));
2864 }
2865 }
2866 break;
2867 case Q_QUOTASTAT:
2868 /* uap->arg is a pointer to an integer */
2869 if (error == 0) {
2870 error = copyout(datap, uap->arg, sizeof(quota_status));
2871 }
2872 break;
2873 default:
2874 break;
2875 } /* switch */
2876
2877 return error;
2878 }
2879 #else
2880 int
2881 quotactl(__unused proc_t p, __unused struct quotactl_args *uap, __unused int32_t *retval)
2882 {
2883 return EOPNOTSUPP;
2884 }
2885 #endif /* QUOTA */
2886
2887 /*
2888 * Get filesystem statistics.
2889 *
2890 * Returns: 0 Success
2891 * namei:???
2892 * vfs_update_vfsstat:???
2893 * munge_statfs:EFAULT
2894 */
2895 /* ARGSUSED */
2896 int
2897 statfs(__unused proc_t p, struct statfs_args *uap, __unused int32_t *retval)
2898 {
2899 struct mount *mp;
2900 struct vfsstatfs *sp;
2901 int error;
2902 struct nameidata nd;
2903 vfs_context_t ctx = vfs_context_current();
2904 vnode_t vp;
2905
2906 NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1,
2907 UIO_USERSPACE, uap->path, ctx);
2908 error = namei(&nd);
2909 if (error != 0) {
2910 return error;
2911 }
2912 vp = nd.ni_vp;
2913 mp = vp->v_mount;
2914 sp = &mp->mnt_vfsstat;
2915 nameidone(&nd);
2916
2917 #if CONFIG_MACF
2918 error = mac_mount_check_stat(ctx, mp);
2919 if (error != 0) {
2920 vnode_put(vp);
2921 return error;
2922 }
2923 #endif
2924
2925 error = vfs_update_vfsstat(mp, ctx, VFS_USER_EVENT);
2926 if (error != 0) {
2927 vnode_put(vp);
2928 return error;
2929 }
2930
2931 error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
2932 vnode_put(vp);
2933 return error;
2934 }
2935
2936 /*
2937 * Get filesystem statistics.
2938 */
2939 /* ARGSUSED */
2940 int
2941 fstatfs(__unused proc_t p, struct fstatfs_args *uap, __unused int32_t *retval)
2942 {
2943 vnode_t vp;
2944 struct mount *mp;
2945 struct vfsstatfs *sp;
2946 int error;
2947
2948 AUDIT_ARG(fd, uap->fd);
2949
2950 if ((error = file_vnode(uap->fd, &vp))) {
2951 return error;
2952 }
2953
2954 error = vnode_getwithref(vp);
2955 if (error) {
2956 file_drop(uap->fd);
2957 return error;
2958 }
2959
2960 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2961
2962 mp = vp->v_mount;
2963 if (!mp) {
2964 error = EBADF;
2965 goto out;
2966 }
2967
2968 #if CONFIG_MACF
2969 error = mac_mount_check_stat(vfs_context_current(), mp);
2970 if (error != 0) {
2971 goto out;
2972 }
2973 #endif
2974
2975 sp = &mp->mnt_vfsstat;
2976 if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
2977 goto out;
2978 }
2979
2980 error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
2981
2982 out:
2983 file_drop(uap->fd);
2984 vnode_put(vp);
2985
2986 return error;
2987 }
2988
2989 void
2990 vfs_get_statfs64(struct mount *mp, struct statfs64 *sfs)
2991 {
2992 struct vfsstatfs *vsfs = &mp->mnt_vfsstat;
2993
2994 bzero(sfs, sizeof(*sfs));
2995
2996 sfs->f_bsize = vsfs->f_bsize;
2997 sfs->f_iosize = (int32_t)vsfs->f_iosize;
2998 sfs->f_blocks = vsfs->f_blocks;
2999 sfs->f_bfree = vsfs->f_bfree;
3000 sfs->f_bavail = vsfs->f_bavail;
3001 sfs->f_files = vsfs->f_files;
3002 sfs->f_ffree = vsfs->f_ffree;
3003 sfs->f_fsid = vsfs->f_fsid;
3004 sfs->f_owner = vsfs->f_owner;
3005 sfs->f_type = mp->mnt_vtable->vfc_typenum;
3006 sfs->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
3007 sfs->f_fssubtype = vsfs->f_fssubtype;
3008 sfs->f_flags_ext = ((mp->mnt_kern_flag & MNTK_SYSTEM) && !(mp->mnt_kern_flag & MNTK_SWAP_MOUNT) && !(mp->mnt_flag & MNT_ROOTFS)) ? MNT_EXT_ROOT_DATA_VOL : 0;
3009 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
3010 strlcpy(&sfs->f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN);
3011 } else {
3012 strlcpy(&sfs->f_fstypename[0], &vsfs->f_fstypename[0], MFSTYPENAMELEN);
3013 }
3014 strlcpy(&sfs->f_mntonname[0], &vsfs->f_mntonname[0], MAXPATHLEN);
3015 strlcpy(&sfs->f_mntfromname[0], &vsfs->f_mntfromname[0], MAXPATHLEN);
3016 }
3017
3018 /*
3019 * Get file system statistics in 64-bit mode
3020 */
3021 int
3022 statfs64(__unused struct proc *p, struct statfs64_args *uap, __unused int32_t *retval)
3023 {
3024 struct mount *mp;
3025 int error;
3026 struct nameidata nd;
3027 struct statfs64 sfs;
3028 vfs_context_t ctxp = vfs_context_current();
3029 vnode_t vp;
3030
3031 NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1,
3032 UIO_USERSPACE, uap->path, ctxp);
3033 error = namei(&nd);
3034 if (error != 0) {
3035 return error;
3036 }
3037 vp = nd.ni_vp;
3038 mp = vp->v_mount;
3039 nameidone(&nd);
3040
3041 #if CONFIG_MACF
3042 error = mac_mount_check_stat(ctxp, mp);
3043 if (error != 0) {
3044 vnode_put(vp);
3045 return error;
3046 }
3047 #endif
3048
3049 error = vfs_update_vfsstat(mp, ctxp, VFS_USER_EVENT);
3050 if (error != 0) {
3051 vnode_put(vp);
3052 return error;
3053 }
3054
3055 vfs_get_statfs64(mp, &sfs);
3056 if ((mp->mnt_kern_flag & MNTK_SYSTEM) && !(mp->mnt_kern_flag & MNTK_SWAP_MOUNT) && !(mp->mnt_flag & MNT_ROOTFS) &&
3057 (p->p_vfs_iopolicy & P_VFS_IOPOLICY_STATFS_NO_DATA_VOLUME)) {
3058 /* This process does not want to see a seperate data volume mountpoint */
3059 strlcpy(&sfs.f_mntonname[0], "/", sizeof("/"));
3060 }
3061 error = copyout(&sfs, uap->buf, sizeof(sfs));
3062 vnode_put(vp);
3063
3064 return error;
3065 }
3066
3067 /*
3068 * Get file system statistics in 64-bit mode
3069 */
3070 int
3071 fstatfs64(__unused struct proc *p, struct fstatfs64_args *uap, __unused int32_t *retval)
3072 {
3073 struct vnode *vp;
3074 struct mount *mp;
3075 struct statfs64 sfs;
3076 int error;
3077
3078 AUDIT_ARG(fd, uap->fd);
3079
3080 if ((error = file_vnode(uap->fd, &vp))) {
3081 return error;
3082 }
3083
3084 error = vnode_getwithref(vp);
3085 if (error) {
3086 file_drop(uap->fd);
3087 return error;
3088 }
3089
3090 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
3091
3092 mp = vp->v_mount;
3093 if (!mp) {
3094 error = EBADF;
3095 goto out;
3096 }
3097
3098 #if CONFIG_MACF
3099 error = mac_mount_check_stat(vfs_context_current(), mp);
3100 if (error != 0) {
3101 goto out;
3102 }
3103 #endif
3104
3105 if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
3106 goto out;
3107 }
3108
3109 vfs_get_statfs64(mp, &sfs);
3110 if ((mp->mnt_kern_flag & MNTK_SYSTEM) && !(mp->mnt_kern_flag & MNTK_SWAP_MOUNT) && !(mp->mnt_flag & MNT_ROOTFS) &&
3111 (p->p_vfs_iopolicy & P_VFS_IOPOLICY_STATFS_NO_DATA_VOLUME)) {
3112 /* This process does not want to see a seperate data volume mountpoint */
3113 strlcpy(&sfs.f_mntonname[0], "/", sizeof("/"));
3114 }
3115 error = copyout(&sfs, uap->buf, sizeof(sfs));
3116
3117 out:
3118 file_drop(uap->fd);
3119 vnode_put(vp);
3120
3121 return error;
3122 }
3123
3124 struct getfsstat_struct {
3125 user_addr_t sfsp;
3126 user_addr_t *mp;
3127 int count;
3128 int maxcount;
3129 int flags;
3130 int error;
3131 };
3132
3133
3134 static int
3135 getfsstat_callback(mount_t mp, void * arg)
3136 {
3137 struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
3138 struct vfsstatfs *sp;
3139 int error, my_size;
3140 vfs_context_t ctx = vfs_context_current();
3141
3142 if (fstp->sfsp && fstp->count < fstp->maxcount) {
3143 #if CONFIG_MACF
3144 error = mac_mount_check_stat(ctx, mp);
3145 if (error != 0) {
3146 fstp->error = error;
3147 return VFS_RETURNED_DONE;
3148 }
3149 #endif
3150 sp = &mp->mnt_vfsstat;
3151 /*
3152 * If MNT_NOWAIT is specified, do not refresh the
3153 * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT.
3154 */
3155 if ((mp->mnt_lflag & MNT_LDEAD) ||
3156 (((fstp->flags & MNT_NOWAIT) == 0 || (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
3157 (!(mp->mnt_lflag & MNT_LUNMOUNT)) &&
3158 (error = vfs_update_vfsstat(mp, ctx, VFS_USER_EVENT)))) {
3159 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
3160 return VFS_RETURNED;
3161 }
3162
3163 /*
3164 * Need to handle LP64 version of struct statfs
3165 */
3166 error = munge_statfs(mp, sp, fstp->sfsp, &my_size, IS_64BIT_PROCESS(vfs_context_proc(ctx)), FALSE);
3167 if (error) {
3168 fstp->error = error;
3169 return VFS_RETURNED_DONE;
3170 }
3171 fstp->sfsp += my_size;
3172
3173 if (fstp->mp) {
3174 #if CONFIG_MACF
3175 error = mac_mount_label_get(mp, *fstp->mp);
3176 if (error) {
3177 fstp->error = error;
3178 return VFS_RETURNED_DONE;
3179 }
3180 #endif
3181 fstp->mp++;
3182 }
3183 }
3184 fstp->count++;
3185 return VFS_RETURNED;
3186 }
3187
3188 /*
3189 * Get statistics on all filesystems.
3190 */
3191 int
3192 getfsstat(__unused proc_t p, struct getfsstat_args *uap, int *retval)
3193 {
3194 struct __mac_getfsstat_args muap;
3195
3196 muap.buf = uap->buf;
3197 muap.bufsize = uap->bufsize;
3198 muap.mac = USER_ADDR_NULL;
3199 muap.macsize = 0;
3200 muap.flags = uap->flags;
3201
3202 return __mac_getfsstat(p, &muap, retval);
3203 }
3204
3205 /*
3206 * __mac_getfsstat: Get MAC-related file system statistics
3207 *
3208 * Parameters: p (ignored)
3209 * uap User argument descriptor (see below)
3210 * retval Count of file system statistics (N stats)
3211 *
3212 * Indirect: uap->bufsize Buffer size
3213 * uap->macsize MAC info size
3214 * uap->buf Buffer where information will be returned
3215 * uap->mac MAC info
3216 * uap->flags File system flags
3217 *
3218 *
3219 * Returns: 0 Success
3220 * !0 Not success
3221 *
3222 */
3223 int
3224 __mac_getfsstat(__unused proc_t p, struct __mac_getfsstat_args *uap, int *retval)
3225 {
3226 user_addr_t sfsp;
3227 user_addr_t *mp;
3228 size_t count, maxcount, bufsize, macsize;
3229 struct getfsstat_struct fst;
3230
3231 if ((unsigned)uap->bufsize > INT_MAX || (unsigned)uap->macsize > INT_MAX) {
3232 return EINVAL;
3233 }
3234
3235 bufsize = (size_t) uap->bufsize;
3236 macsize = (size_t) uap->macsize;
3237
3238 if (IS_64BIT_PROCESS(p)) {
3239 maxcount = bufsize / sizeof(struct user64_statfs);
3240 } else {
3241 maxcount = bufsize / sizeof(struct user32_statfs);
3242 }
3243 sfsp = uap->buf;
3244 count = 0;
3245
3246 mp = NULL;
3247
3248 #if CONFIG_MACF
3249 if (uap->mac != USER_ADDR_NULL) {
3250 u_int32_t *mp0;
3251 int error;
3252 unsigned int i;
3253
3254 count = (macsize / (IS_64BIT_PROCESS(p) ? 8 : 4));
3255 if (count != maxcount) {
3256 return EINVAL;
3257 }
3258
3259 /* Copy in the array */
3260 MALLOC(mp0, u_int32_t *, macsize, M_MACTEMP, M_WAITOK);
3261 if (mp0 == NULL) {
3262 return ENOMEM;
3263 }
3264
3265 error = copyin(uap->mac, mp0, macsize);
3266 if (error) {
3267 FREE(mp0, M_MACTEMP);
3268 return error;
3269 }
3270
3271 /* Normalize to an array of user_addr_t */
3272 MALLOC(mp, user_addr_t *, count * sizeof(user_addr_t), M_MACTEMP, M_WAITOK);
3273 if (mp == NULL) {
3274 FREE(mp0, M_MACTEMP);
3275 return ENOMEM;
3276 }
3277
3278 for (i = 0; i < count; i++) {
3279 if (IS_64BIT_PROCESS(p)) {
3280 mp[i] = ((user_addr_t *)mp0)[i];
3281 } else {
3282 mp[i] = (user_addr_t)mp0[i];
3283 }
3284 }
3285 FREE(mp0, M_MACTEMP);
3286 }
3287 #endif
3288
3289
3290 fst.sfsp = sfsp;
3291 fst.mp = mp;
3292 fst.flags = uap->flags;
3293 fst.count = 0;
3294 fst.error = 0;
3295 fst.maxcount = maxcount;
3296
3297
3298 vfs_iterate(VFS_ITERATE_NOSKIP_UNMOUNT, getfsstat_callback, &fst);
3299
3300 if (mp) {
3301 FREE(mp, M_MACTEMP);
3302 }
3303
3304 if (fst.error) {
3305 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
3306 return fst.error;
3307 }
3308
3309 if (fst.sfsp && fst.count > fst.maxcount) {
3310 *retval = fst.maxcount;
3311 } else {
3312 *retval = fst.count;
3313 }
3314 return 0;
3315 }
3316
3317 static int
3318 getfsstat64_callback(mount_t mp, void * arg)
3319 {
3320 struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
3321 struct vfsstatfs *sp;
3322 struct statfs64 sfs;
3323 int error;
3324
3325 if (fstp->sfsp && fstp->count < fstp->maxcount) {
3326 #if CONFIG_MACF
3327 error = mac_mount_check_stat(vfs_context_current(), mp);
3328 if (error != 0) {
3329 fstp->error = error;
3330 return VFS_RETURNED_DONE;
3331 }
3332 #endif
3333 sp = &mp->mnt_vfsstat;
3334 /*
3335 * If MNT_NOWAIT is specified, do not refresh the fsstat
3336 * cache. MNT_WAIT overrides MNT_NOWAIT.
3337 *
3338 * We treat MNT_DWAIT as MNT_WAIT for all instances of
3339 * getfsstat, since the constants are out of the same
3340 * namespace.
3341 */
3342 if ((mp->mnt_lflag & MNT_LDEAD) ||
3343 ((((fstp->flags & MNT_NOWAIT) == 0) || (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
3344 (!(mp->mnt_lflag & MNT_LUNMOUNT)) &&
3345 (error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)))) {
3346 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
3347 return VFS_RETURNED;
3348 }
3349
3350 vfs_get_statfs64(mp, &sfs);
3351 error = copyout(&sfs, fstp->sfsp, sizeof(sfs));
3352 if (error) {
3353 fstp->error = error;
3354 return VFS_RETURNED_DONE;
3355 }
3356 fstp->sfsp += sizeof(sfs);
3357 }
3358 fstp->count++;
3359 return VFS_RETURNED;
3360 }
3361
3362 /*
3363 * Get statistics on all file systems in 64 bit mode.
3364 */
3365 int
3366 getfsstat64(__unused proc_t p, struct getfsstat64_args *uap, int *retval)
3367 {
3368 user_addr_t sfsp;
3369 int count, maxcount;
3370 struct getfsstat_struct fst;
3371
3372 maxcount = uap->bufsize / sizeof(struct statfs64);
3373
3374 sfsp = uap->buf;
3375 count = 0;
3376
3377 fst.sfsp = sfsp;
3378 fst.flags = uap->flags;
3379 fst.count = 0;
3380 fst.error = 0;
3381 fst.maxcount = maxcount;
3382
3383 vfs_iterate(VFS_ITERATE_NOSKIP_UNMOUNT, getfsstat64_callback, &fst);
3384
3385 if (fst.error) {
3386 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
3387 return fst.error;
3388 }
3389
3390 if (fst.sfsp && fst.count > fst.maxcount) {
3391 *retval = fst.maxcount;
3392 } else {
3393 *retval = fst.count;
3394 }
3395
3396 return 0;
3397 }
3398
3399 /*
3400 * gets the associated vnode with the file descriptor passed.
3401 * as input
3402 *
3403 * INPUT
3404 * ctx - vfs context of caller
3405 * fd - file descriptor for which vnode is required.
3406 * vpp - Pointer to pointer to vnode to be returned.
3407 *
3408 * The vnode is returned with an iocount so any vnode obtained
3409 * by this call needs a vnode_put
3410 *
3411 */
3412 int
3413 vnode_getfromfd(vfs_context_t ctx, int fd, vnode_t *vpp)
3414 {
3415 int error;
3416 vnode_t vp;
3417 struct fileproc *fp;
3418 proc_t p = vfs_context_proc(ctx);
3419
3420 *vpp = NULLVP;
3421
3422 error = fp_getfvp(p, fd, &fp, &vp);
3423 if (error) {
3424 return error;
3425 }
3426
3427 error = vnode_getwithref(vp);
3428 if (error) {
3429 (void)fp_drop(p, fd, fp, 0);
3430 return error;
3431 }
3432
3433 (void)fp_drop(p, fd, fp, 0);
3434 *vpp = vp;
3435 return error;
3436 }
3437
3438 /*
3439 * Wrapper function around namei to start lookup from a directory
3440 * specified by a file descriptor ni_dirfd.
3441 *
3442 * In addition to all the errors returned by namei, this call can
3443 * return ENOTDIR if the file descriptor does not refer to a directory.
3444 * and EBADF if the file descriptor is not valid.
3445 */
3446 int
3447 nameiat(struct nameidata *ndp, int dirfd)
3448 {
3449 if ((dirfd != AT_FDCWD) &&
3450 !(ndp->ni_flag & NAMEI_CONTLOOKUP) &&
3451 !(ndp->ni_cnd.cn_flags & USEDVP)) {
3452 int error = 0;
3453 char c;
3454
3455 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
3456 error = copyin(ndp->ni_dirp, &c, sizeof(char));
3457 if (error) {
3458 return error;
3459 }
3460 } else {
3461 c = *((char *)(ndp->ni_dirp));
3462 }
3463
3464 if (c != '/') {
3465 vnode_t dvp_at;
3466
3467 error = vnode_getfromfd(ndp->ni_cnd.cn_context, dirfd,
3468 &dvp_at);
3469 if (error) {
3470 return error;
3471 }
3472
3473 if (vnode_vtype(dvp_at) != VDIR) {
3474 vnode_put(dvp_at);
3475 return ENOTDIR;
3476 }
3477
3478 ndp->ni_dvp = dvp_at;
3479 ndp->ni_cnd.cn_flags |= USEDVP;
3480 error = namei(ndp);
3481 ndp->ni_cnd.cn_flags &= ~USEDVP;
3482 vnode_put(dvp_at);
3483 return error;
3484 }
3485 }
3486
3487 return namei(ndp);
3488 }
3489
3490 /*
3491 * Change current working directory to a given file descriptor.
3492 */
3493 /* ARGSUSED */
3494 static int
3495 common_fchdir(proc_t p, struct fchdir_args *uap, int per_thread)
3496 {
3497 struct filedesc *fdp = p->p_fd;
3498 vnode_t vp;
3499 vnode_t tdp;
3500 vnode_t tvp;
3501 struct mount *mp;
3502 int error;
3503 vfs_context_t ctx = vfs_context_current();
3504
3505 AUDIT_ARG(fd, uap->fd);
3506 if (per_thread && uap->fd == -1) {
3507 /*
3508 * Switching back from per-thread to per process CWD; verify we
3509 * in fact have one before proceeding. The only success case
3510 * for this code path is to return 0 preemptively after zapping
3511 * the thread structure contents.
3512 */
3513 thread_t th = vfs_context_thread(ctx);
3514 if (th) {
3515 uthread_t uth = get_bsdthread_info(th);
3516 tvp = uth->uu_cdir;
3517 uth->uu_cdir = NULLVP;
3518 if (tvp != NULLVP) {
3519 vnode_rele(tvp);
3520 return 0;
3521 }
3522 }
3523 return EBADF;
3524 }
3525
3526 if ((error = file_vnode(uap->fd, &vp))) {
3527 return error;
3528 }
3529 if ((error = vnode_getwithref(vp))) {
3530 file_drop(uap->fd);
3531 return error;
3532 }
3533
3534 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
3535
3536 if (vp->v_type != VDIR) {
3537 error = ENOTDIR;
3538 goto out;
3539 }
3540
3541 #if CONFIG_MACF
3542 error = mac_vnode_check_chdir(ctx, vp);
3543 if (error) {
3544 goto out;
3545 }
3546 #endif
3547 error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
3548 if (error) {
3549 goto out;
3550 }
3551
3552 while (!error && (mp = vp->v_mountedhere) != NULL) {
3553 if (vfs_busy(mp, LK_NOWAIT)) {
3554 error = EACCES;
3555 goto out;
3556 }
3557 error = VFS_ROOT(mp, &tdp, ctx);
3558 vfs_unbusy(mp);
3559 if (error) {
3560 break;
3561 }
3562 vnode_put(vp);
3563 vp = tdp;
3564 }
3565 if (error) {
3566 goto out;
3567 }
3568 if ((error = vnode_ref(vp))) {
3569 goto out;
3570 }
3571 vnode_put(vp);
3572
3573 if (per_thread) {
3574 thread_t th = vfs_context_thread(ctx);
3575 if (th) {
3576 uthread_t uth = get_bsdthread_info(th);
3577 tvp = uth->uu_cdir;
3578 uth->uu_cdir = vp;
3579 OSBitOrAtomic(P_THCWD, &p->p_flag);
3580 } else {
3581 vnode_rele(vp);
3582 return ENOENT;
3583 }
3584 } else {
3585 proc_fdlock(p);
3586 tvp = fdp->fd_cdir;
3587 fdp->fd_cdir = vp;
3588 proc_fdunlock(p);
3589 }
3590
3591 if (tvp) {
3592 vnode_rele(tvp);
3593 }
3594 file_drop(uap->fd);
3595
3596 return 0;
3597 out:
3598 vnode_put(vp);
3599 file_drop(uap->fd);
3600
3601 return error;
3602 }
3603
3604 int
3605 fchdir(proc_t p, struct fchdir_args *uap, __unused int32_t *retval)
3606 {
3607 return common_fchdir(p, uap, 0);
3608 }
3609
3610 int
3611 __pthread_fchdir(proc_t p, struct __pthread_fchdir_args *uap, __unused int32_t *retval)
3612 {
3613 return common_fchdir(p, (void *)uap, 1);
3614 }
3615
3616
3617 /*
3618 * Change current working directory (".").
3619 *
3620 * Returns: 0 Success
3621 * change_dir:ENOTDIR
3622 * change_dir:???
3623 * vnode_ref:ENOENT No such file or directory
3624 */
3625 /* ARGSUSED */
3626 int
3627 chdir_internal(proc_t p, vfs_context_t ctx, struct nameidata *ndp, int per_thread)
3628 {
3629 struct filedesc *fdp = p->p_fd;
3630 int error;
3631 vnode_t tvp;
3632
3633 error = change_dir(ndp, ctx);
3634 if (error) {
3635 return error;
3636 }
3637 if ((error = vnode_ref(ndp->ni_vp))) {
3638 vnode_put(ndp->ni_vp);
3639 return error;
3640 }
3641 /*
3642 * drop the iocount we picked up in change_dir
3643 */
3644 vnode_put(ndp->ni_vp);
3645
3646 if (per_thread) {
3647 thread_t th = vfs_context_thread(ctx);
3648 if (th) {
3649 uthread_t uth = get_bsdthread_info(th);
3650 tvp = uth->uu_cdir;
3651 uth->uu_cdir = ndp->ni_vp;
3652 OSBitOrAtomic(P_THCWD, &p->p_flag);
3653 } else {
3654 vnode_rele(ndp->ni_vp);
3655 return ENOENT;
3656 }
3657 } else {
3658 proc_fdlock(p);
3659 tvp = fdp->fd_cdir;
3660 fdp->fd_cdir = ndp->ni_vp;
3661 proc_fdunlock(p);
3662 }
3663
3664 if (tvp) {
3665 vnode_rele(tvp);
3666 }
3667
3668 return 0;
3669 }
3670
3671
3672 /*
3673 * Change current working directory (".").
3674 *
3675 * Returns: 0 Success
3676 * chdir_internal:ENOTDIR
3677 * chdir_internal:ENOENT No such file or directory
3678 * chdir_internal:???
3679 */
3680 /* ARGSUSED */
3681 static int
3682 common_chdir(proc_t p, struct chdir_args *uap, int per_thread)
3683 {
3684 struct nameidata nd;
3685 vfs_context_t ctx = vfs_context_current();
3686
3687 NDINIT(&nd, LOOKUP, OP_CHDIR, FOLLOW | AUDITVNPATH1,
3688 UIO_USERSPACE, uap->path, ctx);
3689
3690 return chdir_internal(p, ctx, &nd, per_thread);
3691 }
3692
3693
3694 /*
3695 * chdir
3696 *
3697 * Change current working directory (".") for the entire process
3698 *
3699 * Parameters: p Process requesting the call
3700 * uap User argument descriptor (see below)
3701 * retval (ignored)
3702 *
3703 * Indirect parameters: uap->path Directory path
3704 *
3705 * Returns: 0 Success
3706 * common_chdir: ENOTDIR
3707 * common_chdir: ENOENT No such file or directory
3708 * common_chdir: ???
3709 *
3710 */
3711 int
3712 chdir(proc_t p, struct chdir_args *uap, __unused int32_t *retval)
3713 {
3714 return common_chdir(p, (void *)uap, 0);
3715 }
3716
3717 /*
3718 * __pthread_chdir
3719 *
3720 * Change current working directory (".") for a single thread
3721 *
3722 * Parameters: p Process requesting the call
3723 * uap User argument descriptor (see below)
3724 * retval (ignored)
3725 *
3726 * Indirect parameters: uap->path Directory path
3727 *
3728 * Returns: 0 Success
3729 * common_chdir: ENOTDIR
3730 * common_chdir: ENOENT No such file or directory
3731 * common_chdir: ???
3732 *
3733 */
3734 int
3735 __pthread_chdir(proc_t p, struct __pthread_chdir_args *uap, __unused int32_t *retval)
3736 {
3737 return common_chdir(p, (void *)uap, 1);
3738 }
3739
3740
3741 /*
3742 * Change notion of root (``/'') directory.
3743 */
3744 /* ARGSUSED */
3745 int
3746 chroot(proc_t p, struct chroot_args *uap, __unused int32_t *retval)
3747 {
3748 struct filedesc *fdp = p->p_fd;
3749 int error;
3750 struct nameidata nd;
3751 vnode_t tvp;
3752 vfs_context_t ctx = vfs_context_current();
3753
3754 if ((error = suser(kauth_cred_get(), &p->p_acflag))) {
3755 return error;
3756 }
3757
3758 NDINIT(&nd, LOOKUP, OP_CHROOT, FOLLOW | AUDITVNPATH1,
3759 UIO_USERSPACE, uap->path, ctx);
3760 error = change_dir(&nd, ctx);
3761 if (error) {
3762 return error;
3763 }
3764
3765 #if CONFIG_MACF
3766 error = mac_vnode_check_chroot(ctx, nd.ni_vp,
3767 &nd.ni_cnd);
3768 if (error) {
3769 vnode_put(nd.ni_vp);
3770 return error;
3771 }
3772 #endif
3773
3774 if ((error = vnode_ref(nd.ni_vp))) {
3775 vnode_put(nd.ni_vp);
3776 return error;
3777 }
3778 vnode_put(nd.ni_vp);
3779
3780 proc_fdlock(p);
3781 tvp = fdp->fd_rdir;
3782 fdp->fd_rdir = nd.ni_vp;
3783 fdp->fd_flags |= FD_CHROOT;
3784 proc_fdunlock(p);
3785
3786 if (tvp != NULL) {
3787 vnode_rele(tvp);
3788 }
3789
3790 return 0;
3791 }
3792
3793 /*
3794 * Common routine for chroot and chdir.
3795 *
3796 * Returns: 0 Success
3797 * ENOTDIR Not a directory
3798 * namei:??? [anything namei can return]
3799 * vnode_authorize:??? [anything vnode_authorize can return]
3800 */
3801 static int
3802 change_dir(struct nameidata *ndp, vfs_context_t ctx)
3803 {
3804 vnode_t vp;
3805 int error;
3806
3807 if ((error = namei(ndp))) {
3808 return error;
3809 }
3810 nameidone(ndp);
3811 vp = ndp->ni_vp;
3812
3813 if (vp->v_type != VDIR) {
3814 vnode_put(vp);
3815 return ENOTDIR;
3816 }
3817
3818 #if CONFIG_MACF
3819 error = mac_vnode_check_chdir(ctx, vp);
3820 if (error) {
3821 vnode_put(vp);
3822 return error;
3823 }
3824 #endif
3825
3826 error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
3827 if (error) {
3828 vnode_put(vp);
3829 return error;
3830 }
3831
3832 return error;
3833 }
3834
3835 /*
3836 * Free the vnode data (for directories) associated with the file glob.
3837 */
3838 struct fd_vn_data *
3839 fg_vn_data_alloc(void)
3840 {
3841 struct fd_vn_data *fvdata;
3842
3843 /* Allocate per fd vnode data */
3844 MALLOC(fvdata, struct fd_vn_data *, (sizeof(struct fd_vn_data)),
3845 M_FD_VN_DATA, M_WAITOK | M_ZERO);
3846 lck_mtx_init(&fvdata->fv_lock, fd_vn_lck_grp, fd_vn_lck_attr);
3847 return fvdata;
3848 }
3849
3850 /*
3851 * Free the vnode data (for directories) associated with the file glob.
3852 */
3853 void
3854 fg_vn_data_free(void *fgvndata)
3855 {
3856 struct fd_vn_data *fvdata = (struct fd_vn_data *)fgvndata;
3857
3858 if (fvdata->fv_buf) {
3859 FREE(fvdata->fv_buf, M_FD_DIRBUF);
3860 }
3861 lck_mtx_destroy(&fvdata->fv_lock, fd_vn_lck_grp);
3862 FREE(fvdata, M_FD_VN_DATA);
3863 }
3864
3865 /*
3866 * Check permissions, allocate an open file structure,
3867 * and call the device open routine if any.
3868 *
3869 * Returns: 0 Success
3870 * EINVAL
3871 * EINTR
3872 * falloc:ENFILE
3873 * falloc:EMFILE
3874 * falloc:ENOMEM
3875 * vn_open_auth:???
3876 * dupfdopen:???
3877 * VNOP_ADVLOCK:???
3878 * vnode_setsize:???
3879 *
3880 * XXX Need to implement uid, gid
3881 */
3882 int
3883 open1(vfs_context_t ctx, struct nameidata *ndp, int uflags,
3884 struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra,
3885 int32_t *retval)
3886 {
3887 proc_t p = vfs_context_proc(ctx);
3888 uthread_t uu = get_bsdthread_info(vfs_context_thread(ctx));
3889 struct fileproc *fp;
3890 vnode_t vp;
3891 int flags, oflags;
3892 int type, indx, error;
3893 struct flock lf;
3894 struct vfs_context context;
3895
3896 oflags = uflags;
3897
3898 if ((oflags & O_ACCMODE) == O_ACCMODE) {
3899 return EINVAL;
3900 }
3901
3902 flags = FFLAGS(uflags);
3903 CLR(flags, FENCRYPTED);
3904 CLR(flags, FUNENCRYPTED);
3905
3906 AUDIT_ARG(fflags, oflags);
3907 AUDIT_ARG(mode, vap->va_mode);
3908
3909 if ((error = falloc_withalloc(p,
3910 &fp, &indx, ctx, fp_zalloc, cra)) != 0) {
3911 return error;
3912 }
3913 uu->uu_dupfd = -indx - 1;
3914
3915 if ((error = vn_open_auth(ndp, &flags, vap))) {
3916 if ((error == ENODEV || error == ENXIO) && (uu->uu_dupfd >= 0)) { /* XXX from fdopen */
3917 if ((error = dupfdopen(p->p_fd, indx, uu->uu_dupfd, flags, error)) == 0) {
3918 fp_drop(p, indx, NULL, 0);
3919 *retval = indx;
3920 return 0;
3921 }
3922 }
3923 if (error == ERESTART) {
3924 error = EINTR;
3925 }
3926 fp_free(p, indx, fp);
3927 return error;
3928 }
3929 uu->uu_dupfd = 0;
3930 vp = ndp->ni_vp;
3931
3932 fp->f_fglob->fg_flag = flags & (FMASK | O_EVTONLY | FENCRYPTED | FUNENCRYPTED);
3933 fp->f_fglob->fg_ops = &vnops;
3934 fp->f_fglob->fg_data = (caddr_t)vp;
3935
3936 if (flags & (O_EXLOCK | O_SHLOCK)) {
3937 lf.l_whence = SEEK_SET;
3938 lf.l_start = 0;
3939 lf.l_len = 0;
3940 if (flags & O_EXLOCK) {
3941 lf.l_type = F_WRLCK;
3942 } else {
3943 lf.l_type = F_RDLCK;
3944 }
3945 type = F_FLOCK;
3946 if ((flags & FNONBLOCK) == 0) {
3947 type |= F_WAIT;
3948 }
3949 #if CONFIG_MACF
3950 error = mac_file_check_lock(vfs_context_ucred(ctx), fp->f_fglob,
3951 F_SETLK, &lf);
3952 if (error) {
3953 goto bad;
3954 }
3955 #endif
3956 if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx, NULL))) {
3957 goto bad;
3958 }
3959 fp->f_fglob->fg_flag |= FHASLOCK;
3960 }
3961
3962 /* try to truncate by setting the size attribute */
3963 if ((flags & O_TRUNC) && ((error = vnode_setsize(vp, (off_t)0, 0, ctx)) != 0)) {
3964 goto bad;
3965 }
3966
3967 /*
3968 * For directories we hold some additional information in the fd.
3969 */
3970 if (vnode_vtype(vp) == VDIR) {
3971 fp->f_fglob->fg_vn_data = fg_vn_data_alloc();
3972 } else {
3973 fp->f_fglob->fg_vn_data = NULL;
3974 }
3975
3976 vnode_put(vp);
3977
3978 /*
3979 * The first terminal open (without a O_NOCTTY) by a session leader
3980 * results in it being set as the controlling terminal.
3981 */
3982 if (vnode_istty(vp) && !(p->p_flag & P_CONTROLT) &&
3983 !(flags & O_NOCTTY)) {
3984 int tmp = 0;
3985
3986 (void)(*fp->f_fglob->fg_ops->fo_ioctl)(fp, (int)TIOCSCTTY,
3987 (caddr_t)&tmp, ctx);
3988 }
3989
3990 proc_fdlock(p);
3991 if (flags & O_CLOEXEC) {
3992 *fdflags(p, indx) |= UF_EXCLOSE;
3993 }
3994 if (flags & O_CLOFORK) {
3995 *fdflags(p, indx) |= UF_FORKCLOSE;
3996 }
3997 procfdtbl_releasefd(p, indx, NULL);
3998
3999 #if CONFIG_SECLUDED_MEMORY
4000 if (secluded_for_filecache &&
4001 FILEGLOB_DTYPE(fp->f_fglob) == DTYPE_VNODE &&
4002 vnode_vtype(vp) == VREG) {
4003 memory_object_control_t moc;
4004
4005 moc = ubc_getobject(vp, UBC_FLAGS_NONE);
4006
4007 if (moc == MEMORY_OBJECT_CONTROL_NULL) {
4008 /* nothing to do... */
4009 } else if (fp->f_fglob->fg_flag & FWRITE) {
4010 /* writable -> no longer eligible for secluded pages */
4011 memory_object_mark_eligible_for_secluded(moc,
4012 FALSE);
4013 } else if (secluded_for_filecache == 1) {
4014 char pathname[32] = { 0, };
4015 size_t copied;
4016 /* XXX FBDP: better way to detect /Applications/ ? */
4017 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
4018 (void)copyinstr(ndp->ni_dirp,
4019 pathname,
4020 sizeof(pathname),
4021 &copied);
4022 } else {
4023 copystr(CAST_DOWN(void *, ndp->ni_dirp),
4024 pathname,
4025 sizeof(pathname),
4026 &copied);
4027 }
4028 pathname[sizeof(pathname) - 1] = '\0';
4029 if (strncmp(pathname,
4030 "/Applications/",
4031 strlen("/Applications/")) == 0 &&
4032 strncmp(pathname,
4033 "/Applications/Camera.app/",
4034 strlen("/Applications/Camera.app/")) != 0) {
4035 /*
4036 * not writable
4037 * AND from "/Applications/"
4038 * AND not from "/Applications/Camera.app/"
4039 * ==> eligible for secluded
4040 */
4041 memory_object_mark_eligible_for_secluded(moc,
4042 TRUE);
4043 }
4044 } else if (secluded_for_filecache == 2) {
4045 #if __arm64__
4046 #define DYLD_SHARED_CACHE_NAME "dyld_shared_cache_arm64"
4047 #elif __arm__
4048 #define DYLD_SHARED_CACHE_NAME "dyld_shared_cache_armv7"
4049 #else
4050 /* not implemented... */
4051 #endif
4052 size_t len = strlen(vp->v_name);
4053 if (!strncmp(vp->v_name, DYLD_SHARED_CACHE_NAME, len) ||
4054 !strncmp(vp->v_name, "dyld", len) ||
4055 !strncmp(vp->v_name, "launchd", len) ||
4056 !strncmp(vp->v_name, "Camera", len) ||
4057 !strncmp(vp->v_name, "mediaserverd", len) ||
4058 !strncmp(vp->v_name, "SpringBoard", len) ||
4059 !strncmp(vp->v_name, "backboardd", len)) {
4060 /*
4061 * This file matters when launching Camera:
4062 * do not store its contents in the secluded
4063 * pool that will be drained on Camera launch.
4064 */
4065 memory_object_mark_eligible_for_secluded(moc,
4066 FALSE);
4067 }
4068 }
4069 }
4070 #endif /* CONFIG_SECLUDED_MEMORY */
4071
4072 fp_drop(p, indx, fp, 1);
4073 proc_fdunlock(p);
4074
4075 *retval = indx;
4076
4077 return 0;
4078 bad:
4079 context = *vfs_context_current();
4080 context.vc_ucred = fp->f_fglob->fg_cred;
4081
4082 if ((fp->f_fglob->fg_flag & FHASLOCK) &&
4083 (FILEGLOB_DTYPE(fp->f_fglob) == DTYPE_VNODE)) {
4084 lf.l_whence = SEEK_SET;
4085 lf.l_start = 0;
4086 lf.l_len = 0;
4087 lf.l_type = F_UNLCK;
4088
4089 (void)VNOP_ADVLOCK(
4090 vp, (caddr_t)fp->f_fglob, F_UNLCK, &lf, F_FLOCK, ctx, NULL);
4091 }
4092
4093 vn_close(vp, fp->f_fglob->fg_flag, &context);
4094 vnode_put(vp);
4095 fp_free(p, indx, fp);
4096
4097 return error;
4098 }
4099
4100 /*
4101 * While most of the *at syscall handlers can call nameiat() which
4102 * is a wrapper around namei, the use of namei and initialisation
4103 * of nameidata are far removed and in different functions - namei
4104 * gets called in vn_open_auth for open1. So we'll just do here what
4105 * nameiat() does.
4106 */
4107 static int
4108 open1at(vfs_context_t ctx, struct nameidata *ndp, int uflags,
4109 struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra, int32_t *retval,
4110 int dirfd)
4111 {
4112 if ((dirfd != AT_FDCWD) && !(ndp->ni_cnd.cn_flags & USEDVP)) {
4113 int error;
4114 char c;
4115
4116 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
4117 error = copyin(ndp->ni_dirp, &c, sizeof(char));
4118 if (error) {
4119 return error;
4120 }
4121 } else {
4122 c = *((char *)(ndp->ni_dirp));
4123 }
4124
4125 if (c != '/') {
4126 vnode_t dvp_at;
4127
4128 error = vnode_getfromfd(ndp->ni_cnd.cn_context, dirfd,
4129 &dvp_at);
4130 if (error) {
4131 return error;
4132 }
4133
4134 if (vnode_vtype(dvp_at) != VDIR) {
4135 vnode_put(dvp_at);
4136 return ENOTDIR;
4137 }
4138
4139 ndp->ni_dvp = dvp_at;
4140 ndp->ni_cnd.cn_flags |= USEDVP;
4141 error = open1(ctx, ndp, uflags, vap, fp_zalloc, cra,
4142 retval);
4143 vnode_put(dvp_at);
4144 return error;
4145 }
4146 }
4147
4148 return open1(ctx, ndp, uflags, vap, fp_zalloc, cra, retval);
4149 }
4150
4151 /*
4152 * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)).
4153 *
4154 * Parameters: p Process requesting the open
4155 * uap User argument descriptor (see below)
4156 * retval Pointer to an area to receive the
4157 * return calue from the system call
4158 *
4159 * Indirect: uap->path Path to open (same as 'open')
4160 * uap->flags Flags to open (same as 'open'
4161 * uap->uid UID to set, if creating
4162 * uap->gid GID to set, if creating
4163 * uap->mode File mode, if creating (same as 'open')
4164 * uap->xsecurity ACL to set, if creating
4165 *
4166 * Returns: 0 Success
4167 * !0 errno value
4168 *
4169 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
4170 *
4171 * XXX: We should enummerate the possible errno values here, and where
4172 * in the code they originated.
4173 */
4174 int
4175 open_extended(proc_t p, struct open_extended_args *uap, int32_t *retval)
4176 {
4177 struct filedesc *fdp = p->p_fd;
4178 int ciferror;
4179 kauth_filesec_t xsecdst;
4180 struct vnode_attr va;
4181 struct nameidata nd;
4182 int cmode;
4183
4184 AUDIT_ARG(owner, uap->uid, uap->gid);
4185
4186 xsecdst = NULL;
4187 if ((uap->xsecurity != USER_ADDR_NULL) &&
4188 ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)) {
4189 return ciferror;
4190 }
4191
4192 VATTR_INIT(&va);
4193 cmode = ((uap->mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
4194 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
4195 if (uap->uid != KAUTH_UID_NONE) {
4196 VATTR_SET(&va, va_uid, uap->uid);
4197 }
4198 if (uap->gid != KAUTH_GID_NONE) {
4199 VATTR_SET(&va, va_gid, uap->gid);
4200 }
4201 if (xsecdst != NULL) {
4202 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
4203 }
4204
4205 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
4206 uap->path, vfs_context_current());
4207
4208 ciferror = open1(vfs_context_current(), &nd, uap->flags, &va,
4209 fileproc_alloc_init, NULL, retval);
4210 if (xsecdst != NULL) {
4211 kauth_filesec_free(xsecdst);
4212 }
4213
4214 return ciferror;
4215 }
4216
4217 /*
4218 * Go through the data-protected atomically controlled open (2)
4219 *
4220 * int open_dprotected_np(user_addr_t path, int flags, int class, int dpflags, int mode)
4221 */
4222 int
4223 open_dprotected_np(__unused proc_t p, struct open_dprotected_np_args *uap, int32_t *retval)
4224 {
4225 int flags = uap->flags;
4226 int class = uap->class;
4227 int dpflags = uap->dpflags;
4228
4229 /*
4230 * Follow the same path as normal open(2)
4231 * Look up the item if it exists, and acquire the vnode.
4232 */
4233 struct filedesc *fdp = p->p_fd;
4234 struct vnode_attr va;
4235 struct nameidata nd;
4236 int cmode;
4237 int error;
4238
4239 VATTR_INIT(&va);
4240 /* Mask off all but regular access permissions */
4241 cmode = ((uap->mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
4242 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
4243
4244 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
4245 uap->path, vfs_context_current());
4246
4247 /*
4248 * Initialize the extra fields in vnode_attr to pass down our
4249 * extra fields.
4250 * 1. target cprotect class.
4251 * 2. set a flag to mark it as requiring open-raw-encrypted semantics.
4252 */
4253 if (flags & O_CREAT) {
4254 /* lower level kernel code validates that the class is valid before applying it. */
4255 if (class != PROTECTION_CLASS_DEFAULT) {
4256 /*
4257 * PROTECTION_CLASS_DEFAULT implies that we make the class for this
4258 * file behave the same as open (2)
4259 */
4260 VATTR_SET(&va, va_dataprotect_class, class);
4261 }
4262 }
4263
4264 if (dpflags & (O_DP_GETRAWENCRYPTED | O_DP_GETRAWUNENCRYPTED)) {
4265 if (flags & (O_RDWR | O_WRONLY)) {
4266 /* Not allowed to write raw encrypted bytes */
4267 return EINVAL;
4268 }
4269 if (uap->dpflags & O_DP_GETRAWENCRYPTED) {
4270 VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED);
4271 }
4272 if (uap->dpflags & O_DP_GETRAWUNENCRYPTED) {
4273 VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWUNENCRYPTED);
4274 }
4275 }
4276
4277 error = open1(vfs_context_current(), &nd, uap->flags, &va,
4278 fileproc_alloc_init, NULL, retval);
4279
4280 return error;
4281 }
4282
4283 static int
4284 openat_internal(vfs_context_t ctx, user_addr_t path, int flags, int mode,
4285 int fd, enum uio_seg segflg, int *retval)
4286 {
4287 struct filedesc *fdp = (vfs_context_proc(ctx))->p_fd;
4288 struct vnode_attr va;
4289 struct nameidata nd;
4290 int cmode;
4291
4292 VATTR_INIT(&va);
4293 /* Mask off all but regular access permissions */
4294 cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
4295 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
4296
4297 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1,
4298 segflg, path, ctx);
4299
4300 return open1at(ctx, &nd, flags, &va, fileproc_alloc_init, NULL,
4301 retval, fd);
4302 }
4303
4304 int
4305 open(proc_t p, struct open_args *uap, int32_t *retval)
4306 {
4307 __pthread_testcancel(1);
4308 return open_nocancel(p, (struct open_nocancel_args *)uap, retval);
4309 }
4310
4311 int
4312 open_nocancel(__unused proc_t p, struct open_nocancel_args *uap,
4313 int32_t *retval)
4314 {
4315 return openat_internal(vfs_context_current(), uap->path, uap->flags,
4316 uap->mode, AT_FDCWD, UIO_USERSPACE, retval);
4317 }
4318
4319 int
4320 openat_nocancel(__unused proc_t p, struct openat_nocancel_args *uap,
4321 int32_t *retval)
4322 {
4323 return openat_internal(vfs_context_current(), uap->path, uap->flags,
4324 uap->mode, uap->fd, UIO_USERSPACE, retval);
4325 }
4326
4327 int
4328 openat(proc_t p, struct openat_args *uap, int32_t *retval)
4329 {
4330 __pthread_testcancel(1);
4331 return openat_nocancel(p, (struct openat_nocancel_args *)uap, retval);
4332 }
4333
4334 /*
4335 * openbyid_np: open a file given a file system id and a file system object id
4336 * the hfs file system object id is an fsobj_id_t {uint32, uint32}
4337 * file systems that don't support object ids it is a node id (uint64_t).
4338 *
4339 * Parameters: p Process requesting the open
4340 * uap User argument descriptor (see below)
4341 * retval Pointer to an area to receive the
4342 * return calue from the system call
4343 *
4344 * Indirect: uap->path Path to open (same as 'open')
4345 *
4346 * uap->fsid id of target file system
4347 * uap->objid id of target file system object
4348 * uap->flags Flags to open (same as 'open')
4349 *
4350 * Returns: 0 Success
4351 * !0 errno value
4352 *
4353 *
4354 * XXX: We should enummerate the possible errno values here, and where
4355 * in the code they originated.
4356 */
4357 int
4358 openbyid_np(__unused proc_t p, struct openbyid_np_args *uap, int *retval)
4359 {
4360 fsid_t fsid;
4361 uint64_t objid;
4362 int error;
4363 char *buf = NULL;
4364 int buflen = MAXPATHLEN;
4365 int pathlen = 0;
4366 vfs_context_t ctx = vfs_context_current();
4367
4368 if ((error = priv_check_cred(vfs_context_ucred(ctx), PRIV_VFS_OPEN_BY_ID, 0))) {
4369 return error;
4370 }
4371
4372 if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
4373 return error;
4374 }
4375
4376 /*uap->obj is an fsobj_id_t defined as struct {uint32_t, uint32_t} */
4377 if ((error = copyin(uap->objid, (caddr_t)&objid, sizeof(uint64_t)))) {
4378 return error;
4379 }
4380
4381 AUDIT_ARG(value32, fsid.val[0]);
4382 AUDIT_ARG(value64, objid);
4383
4384 /*resolve path from fsis, objid*/
4385 do {
4386 MALLOC(buf, char *, buflen + 1, M_TEMP, M_WAITOK);
4387 if (buf == NULL) {
4388 return ENOMEM;
4389 }
4390
4391 error = fsgetpath_internal( ctx, fsid.val[0], objid, buflen,
4392 buf, FSOPT_ISREALFSID, &pathlen);
4393
4394 if (error) {
4395 FREE(buf, M_TEMP);
4396 buf = NULL;
4397 }
4398 } while (error == ENOSPC && (buflen += MAXPATHLEN));
4399
4400 if (error) {
4401 return error;
4402 }
4403
4404 buf[pathlen] = 0;
4405
4406 error = openat_internal(
4407 ctx, (user_addr_t)buf, uap->oflags, 0, AT_FDCWD, UIO_SYSSPACE, retval);
4408
4409 FREE(buf, M_TEMP);
4410
4411 return error;
4412 }
4413
4414
4415 /*
4416 * Create a special file.
4417 */
4418 static int mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap);
4419
4420 int
4421 mknod(proc_t p, struct mknod_args *uap, __unused int32_t *retval)
4422 {
4423 struct vnode_attr va;
4424 vfs_context_t ctx = vfs_context_current();
4425 int error;
4426 struct nameidata nd;
4427 vnode_t vp, dvp;
4428
4429 VATTR_INIT(&va);
4430 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
4431 VATTR_SET(&va, va_rdev, uap->dev);
4432
4433 /* If it's a mknod() of a FIFO, call mkfifo1() instead */
4434 if ((uap->mode & S_IFMT) == S_IFIFO) {
4435 return mkfifo1(ctx, uap->path, &va);
4436 }
4437
4438 AUDIT_ARG(mode, uap->mode);
4439 AUDIT_ARG(value32, uap->dev);
4440
4441 if ((error = suser(vfs_context_ucred(ctx), &p->p_acflag))) {
4442 return error;
4443 }
4444 NDINIT(&nd, CREATE, OP_MKNOD, LOCKPARENT | AUDITVNPATH1,
4445 UIO_USERSPACE, uap->path, ctx);
4446 error = namei(&nd);
4447 if (error) {
4448 return error;
4449 }
4450 dvp = nd.ni_dvp;
4451 vp = nd.ni_vp;
4452
4453 if (vp != NULL) {
4454 error = EEXIST;
4455 goto out;
4456 }
4457
4458 switch (uap->mode & S_IFMT) {
4459 case S_IFCHR:
4460 VATTR_SET(&va, va_type, VCHR);
4461 break;
4462 case S_IFBLK:
4463 VATTR_SET(&va, va_type, VBLK);
4464 break;
4465 default:
4466 error = EINVAL;
4467 goto out;
4468 }
4469
4470 #if CONFIG_MACF
4471 error = mac_vnode_check_create(ctx,
4472 nd.ni_dvp, &nd.ni_cnd, &va);
4473 if (error) {
4474 goto out;
4475 }
4476 #endif
4477
4478 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0) {
4479 goto out;
4480 }
4481
4482 if ((error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx)) != 0) {
4483 goto out;
4484 }
4485
4486 if (vp) {
4487 int update_flags = 0;
4488
4489 // Make sure the name & parent pointers are hooked up
4490 if (vp->v_name == NULL) {
4491 update_flags |= VNODE_UPDATE_NAME;
4492 }
4493 if (vp->v_parent == NULLVP) {
4494 update_flags |= VNODE_UPDATE_PARENT;
4495 }
4496
4497 if (update_flags) {
4498 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
4499 }
4500
4501 #if CONFIG_FSE
4502 add_fsevent(FSE_CREATE_FILE, ctx,
4503 FSE_ARG_VNODE, vp,
4504 FSE_ARG_DONE);
4505 #endif
4506 }
4507
4508 out:
4509 /*
4510 * nameidone has to happen before we vnode_put(dvp)
4511 * since it may need to release the fs_nodelock on the dvp
4512 */
4513 nameidone(&nd);
4514
4515 if (vp) {
4516 vnode_put(vp);
4517 }
4518 vnode_put(dvp);
4519
4520 return error;
4521 }
4522
4523 /*
4524 * Create a named pipe.
4525 *
4526 * Returns: 0 Success
4527 * EEXIST
4528 * namei:???
4529 * vnode_authorize:???
4530 * vn_create:???
4531 */
4532 static int
4533 mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap)
4534 {
4535 vnode_t vp, dvp;
4536 int error;
4537 struct nameidata nd;
4538
4539 NDINIT(&nd, CREATE, OP_MKFIFO, LOCKPARENT | AUDITVNPATH1,
4540 UIO_USERSPACE, upath, ctx);
4541 error = namei(&nd);
4542 if (error) {
4543 return error;
4544 }
4545 dvp = nd.ni_dvp;
4546 vp = nd.ni_vp;
4547
4548 /* check that this is a new file and authorize addition */
4549 if (vp != NULL) {
4550 error = EEXIST;
4551 goto out;
4552 }
4553 VATTR_SET(vap, va_type, VFIFO);
4554
4555 if ((error = vn_authorize_create(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0) {
4556 goto out;
4557 }
4558
4559 error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx);
4560 out:
4561 /*
4562 * nameidone has to happen before we vnode_put(dvp)
4563 * since it may need to release the fs_nodelock on the dvp
4564 */
4565 nameidone(&nd);
4566
4567 if (vp) {
4568 vnode_put(vp);
4569 }
4570 vnode_put(dvp);
4571
4572 return error;
4573 }
4574
4575
4576 /*
4577 * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)).
4578 *
4579 * Parameters: p Process requesting the open
4580 * uap User argument descriptor (see below)
4581 * retval (Ignored)
4582 *
4583 * Indirect: uap->path Path to fifo (same as 'mkfifo')
4584 * uap->uid UID to set
4585 * uap->gid GID to set
4586 * uap->mode File mode to set (same as 'mkfifo')
4587 * uap->xsecurity ACL to set, if creating
4588 *
4589 * Returns: 0 Success
4590 * !0 errno value
4591 *
4592 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
4593 *
4594 * XXX: We should enummerate the possible errno values here, and where
4595 * in the code they originated.
4596 */
4597 int
4598 mkfifo_extended(proc_t p, struct mkfifo_extended_args *uap, __unused int32_t *retval)
4599 {
4600 int ciferror;
4601 kauth_filesec_t xsecdst;
4602 struct vnode_attr va;
4603
4604 AUDIT_ARG(owner, uap->uid, uap->gid);
4605
4606 xsecdst = KAUTH_FILESEC_NONE;
4607 if (uap->xsecurity != USER_ADDR_NULL) {
4608 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0) {
4609 return ciferror;
4610 }
4611 }
4612
4613 VATTR_INIT(&va);
4614 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
4615 if (uap->uid != KAUTH_UID_NONE) {
4616 VATTR_SET(&va, va_uid, uap->uid);
4617 }
4618 if (uap->gid != KAUTH_GID_NONE) {
4619 VATTR_SET(&va, va_gid, uap->gid);
4620 }
4621 if (xsecdst != KAUTH_FILESEC_NONE) {
4622 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
4623 }
4624
4625 ciferror = mkfifo1(vfs_context_current(), uap->path, &va);
4626
4627 if (xsecdst != KAUTH_FILESEC_NONE) {
4628 kauth_filesec_free(xsecdst);
4629 }
4630 return ciferror;
4631 }
4632
4633 /* ARGSUSED */
4634 int
4635 mkfifo(proc_t p, struct mkfifo_args *uap, __unused int32_t *retval)
4636 {
4637 struct vnode_attr va;
4638
4639 VATTR_INIT(&va);
4640 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
4641
4642 return mkfifo1(vfs_context_current(), uap->path, &va);
4643 }
4644
4645
4646 static char *
4647 my_strrchr(char *p, int ch)
4648 {
4649 char *save;
4650
4651 for (save = NULL;; ++p) {
4652 if (*p == ch) {
4653 save = p;
4654 }
4655 if (!*p) {
4656 return save;
4657 }
4658 }
4659 /* NOTREACHED */
4660 }
4661
4662 extern int safe_getpath_new(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path, int firmlink);
4663 extern int safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path);
4664 extern int safe_getpath_no_firmlink(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path);
4665
4666 int
4667 safe_getpath_new(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path, int firmlink)
4668 {
4669 int ret, len = _len;
4670
4671 *truncated_path = 0;
4672
4673 if (firmlink) {
4674 ret = vn_getpath(dvp, path, &len);
4675 } else {
4676 ret = vn_getpath_no_firmlink(dvp, path, &len);
4677 }
4678 if (ret == 0 && len < (MAXPATHLEN - 1)) {
4679 if (leafname) {
4680 path[len - 1] = '/';
4681 len += strlcpy(&path[len], leafname, MAXPATHLEN - len) + 1;
4682 if (len > MAXPATHLEN) {
4683 char *ptr;
4684
4685 // the string got truncated!
4686 *truncated_path = 1;
4687 ptr = my_strrchr(path, '/');
4688 if (ptr) {
4689 *ptr = '\0'; // chop off the string at the last directory component
4690 }
4691 len = strlen(path) + 1;
4692 }
4693 }
4694 } else if (ret == 0) {
4695 *truncated_path = 1;
4696 } else if (ret != 0) {
4697 struct vnode *mydvp = dvp;
4698
4699 if (ret != ENOSPC) {
4700 printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n",
4701 dvp, dvp->v_name ? dvp->v_name : "no-name", ret);
4702 }
4703 *truncated_path = 1;
4704
4705 do {
4706 if (mydvp->v_parent != NULL) {
4707 mydvp = mydvp->v_parent;
4708 } else if (mydvp->v_mount) {
4709 strlcpy(path, mydvp->v_mount->mnt_vfsstat.f_mntonname, _len);
4710 break;
4711 } else {
4712 // no parent and no mount point? only thing is to punt and say "/" changed
4713 strlcpy(path, "/", _len);
4714 len = 2;
4715 mydvp = NULL;
4716 }
4717
4718 if (mydvp == NULL) {
4719 break;
4720 }
4721
4722 len = _len;
4723 if (firmlink) {
4724 ret = vn_getpath(mydvp, path, &len);
4725 } else {
4726 ret = vn_getpath_no_firmlink(mydvp, path, &len);
4727 }
4728 } while (ret == ENOSPC);
4729 }
4730
4731 return len;
4732 }
4733
4734 int
4735 safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path)
4736 {
4737 return safe_getpath_new(dvp, leafname, path, _len, truncated_path, 1);
4738 }
4739
4740 int
4741 safe_getpath_no_firmlink(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path)
4742 {
4743 return safe_getpath_new(dvp, leafname, path, _len, truncated_path, 0);
4744 }
4745
4746 /*
4747 * Make a hard file link.
4748 *
4749 * Returns: 0 Success
4750 * EPERM
4751 * EEXIST
4752 * EXDEV
4753 * namei:???
4754 * vnode_authorize:???
4755 * VNOP_LINK:???
4756 */
4757 /* ARGSUSED */
4758 static int
4759 linkat_internal(vfs_context_t ctx, int fd1, user_addr_t path, int fd2,
4760 user_addr_t link, int flag, enum uio_seg segflg)
4761 {
4762 vnode_t vp, pvp, dvp, lvp;
4763 struct nameidata nd;
4764 int follow;
4765 int error;
4766 #if CONFIG_FSE
4767 fse_info finfo;
4768 #endif
4769 int need_event, has_listeners, need_kpath2;
4770 char *target_path = NULL;
4771 int truncated = 0;
4772
4773 vp = dvp = lvp = NULLVP;
4774
4775 /* look up the object we are linking to */
4776 follow = (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW;
4777 NDINIT(&nd, LOOKUP, OP_LOOKUP, AUDITVNPATH1 | follow,
4778 segflg, path, ctx);
4779
4780 error = nameiat(&nd, fd1);
4781 if (error) {
4782 return error;
4783 }
4784 vp = nd.ni_vp;
4785
4786 nameidone(&nd);
4787
4788 /*
4789 * Normally, linking to directories is not supported.
4790 * However, some file systems may have limited support.
4791 */
4792 if (vp->v_type == VDIR) {
4793 if (!ISSET(vp->v_mount->mnt_kern_flag, MNTK_DIR_HARDLINKS)) {
4794 error = EPERM; /* POSIX */
4795 goto out;
4796 }
4797
4798 /* Linking to a directory requires ownership. */
4799 if (!kauth_cred_issuser(vfs_context_ucred(ctx))) {
4800 struct vnode_attr dva;
4801
4802 VATTR_INIT(&dva);
4803 VATTR_WANTED(&dva, va_uid);
4804 if (vnode_getattr(vp, &dva, ctx) != 0 ||
4805 !VATTR_IS_SUPPORTED(&dva, va_uid) ||
4806 (dva.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)))) {
4807 error = EACCES;
4808 goto out;
4809 }
4810 }
4811 }
4812
4813 /* lookup the target node */
4814 #if CONFIG_TRIGGERS
4815 nd.ni_op = OP_LINK;
4816 #endif
4817 nd.ni_cnd.cn_nameiop = CREATE;
4818 nd.ni_cnd.cn_flags = LOCKPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK;
4819 nd.ni_dirp = link;
4820 error = nameiat(&nd, fd2);
4821 if (error != 0) {
4822 goto out;
4823 }
4824 dvp = nd.ni_dvp;
4825 lvp = nd.ni_vp;
4826
4827 #if CONFIG_MACF
4828 if ((error = mac_vnode_check_link(ctx, dvp, vp, &nd.ni_cnd)) != 0) {
4829 goto out2;
4830 }
4831 #endif
4832
4833 /* or to anything that kauth doesn't want us to (eg. immutable items) */
4834 if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_LINKTARGET, ctx)) != 0) {
4835 goto out2;
4836 }
4837
4838 /* target node must not exist */
4839 if (lvp != NULLVP) {
4840 error = EEXIST;
4841 goto out2;
4842 }
4843 /* cannot link across mountpoints */
4844 if (vnode_mount(vp) != vnode_mount(dvp)) {
4845 error = EXDEV;
4846 goto out2;
4847 }
4848
4849 /* authorize creation of the target note */
4850 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0) {
4851 goto out2;
4852 }
4853
4854 /* and finally make the link */
4855 error = VNOP_LINK(vp, dvp, &nd.ni_cnd, ctx);
4856 if (error) {
4857 goto out2;
4858 }
4859
4860 #if CONFIG_MACF
4861 (void)mac_vnode_notify_link(ctx, vp, dvp, &nd.ni_cnd);
4862 #endif
4863
4864 #if CONFIG_FSE
4865 need_event = need_fsevent(FSE_CREATE_FILE, dvp);
4866 #else
4867 need_event = 0;
4868 #endif
4869 has_listeners = kauth_authorize_fileop_has_listeners();
4870
4871 need_kpath2 = 0;
4872 #if CONFIG_AUDIT
4873 if (AUDIT_RECORD_EXISTS()) {
4874 need_kpath2 = 1;
4875 }
4876 #endif
4877
4878 if (need_event || has_listeners || need_kpath2) {
4879 char *link_to_path = NULL;
4880 int len, link_name_len;
4881
4882 /* build the path to the new link file */
4883 GET_PATH(target_path);
4884 if (target_path == NULL) {
4885 error = ENOMEM;
4886 goto out2;
4887 }
4888
4889 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, target_path, MAXPATHLEN, &truncated);
4890
4891 AUDIT_ARG(kpath, target_path, ARG_KPATH2);
4892
4893 if (has_listeners) {
4894 /* build the path to file we are linking to */
4895 GET_PATH(link_to_path);
4896 if (link_to_path == NULL) {
4897 error = ENOMEM;
4898 goto out2;
4899 }
4900
4901 link_name_len = MAXPATHLEN;
4902 if (vn_getpath(vp, link_to_path, &link_name_len) == 0) {
4903 /*
4904 * Call out to allow 3rd party notification of rename.
4905 * Ignore result of kauth_authorize_fileop call.
4906 */
4907 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_LINK,
4908 (uintptr_t)link_to_path,
4909 (uintptr_t)target_path);
4910 }
4911 if (link_to_path != NULL) {
4912 RELEASE_PATH(link_to_path);
4913 }
4914 }
4915 #if CONFIG_FSE
4916 if (need_event) {
4917 /* construct fsevent */
4918 if (get_fse_info(vp, &finfo, ctx) == 0) {
4919 if (truncated) {
4920 finfo.mode |= FSE_TRUNCATED_PATH;
4921 }
4922
4923 // build the path to the destination of the link
4924 add_fsevent(FSE_CREATE_FILE, ctx,
4925 FSE_ARG_STRING, len, target_path,
4926 FSE_ARG_FINFO, &finfo,
4927 FSE_ARG_DONE);
4928 }
4929
4930 pvp = vp->v_parent;
4931 // need an iocount on pvp in this case
4932 if (pvp && pvp != dvp) {
4933 error = vnode_get(pvp);
4934 if (error) {
4935 pvp = NULLVP;
4936 error = 0;
4937 }
4938 }
4939 if (pvp) {
4940 add_fsevent(FSE_STAT_CHANGED, ctx,
4941 FSE_ARG_VNODE, pvp, FSE_ARG_DONE);
4942 }
4943 if (pvp && pvp != dvp) {
4944 vnode_put(pvp);
4945 }
4946 }
4947 #endif
4948 }
4949 out2:
4950 /*
4951 * nameidone has to happen before we vnode_put(dvp)
4952 * since it may need to release the fs_nodelock on the dvp
4953 */
4954 nameidone(&nd);
4955 if (target_path != NULL) {
4956 RELEASE_PATH(target_path);
4957 }
4958 out:
4959 if (lvp) {
4960 vnode_put(lvp);
4961 }
4962 if (dvp) {
4963 vnode_put(dvp);
4964 }
4965 vnode_put(vp);
4966 return error;
4967 }
4968
4969 int
4970 link(__unused proc_t p, struct link_args *uap, __unused int32_t *retval)
4971 {
4972 return linkat_internal(vfs_context_current(), AT_FDCWD, uap->path,
4973 AT_FDCWD, uap->link, AT_SYMLINK_FOLLOW, UIO_USERSPACE);
4974 }
4975
4976 int
4977 linkat(__unused proc_t p, struct linkat_args *uap, __unused int32_t *retval)
4978 {
4979 if (uap->flag & ~AT_SYMLINK_FOLLOW) {
4980 return EINVAL;
4981 }
4982
4983 return linkat_internal(vfs_context_current(), uap->fd1, uap->path,
4984 uap->fd2, uap->link, uap->flag, UIO_USERSPACE);
4985 }
4986
4987 /*
4988 * Make a symbolic link.
4989 *
4990 * We could add support for ACLs here too...
4991 */
4992 /* ARGSUSED */
4993 static int
4994 symlinkat_internal(vfs_context_t ctx, user_addr_t path_data, int fd,
4995 user_addr_t link, enum uio_seg segflg)
4996 {
4997 struct vnode_attr va;
4998 char *path;
4999 int error;
5000 struct nameidata nd;
5001 vnode_t vp, dvp;
5002 size_t dummy = 0;
5003 proc_t p;
5004
5005 error = 0;
5006 if (UIO_SEG_IS_USER_SPACE(segflg)) {
5007 MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
5008 error = copyinstr(path_data, path, MAXPATHLEN, &dummy);
5009 } else {
5010 path = (char *)path_data;
5011 }
5012 if (error) {
5013 goto out;
5014 }
5015 AUDIT_ARG(text, path); /* This is the link string */
5016
5017 NDINIT(&nd, CREATE, OP_SYMLINK, LOCKPARENT | AUDITVNPATH1,
5018 segflg, link, ctx);
5019
5020 error = nameiat(&nd, fd);
5021 if (error) {
5022 goto out;
5023 }
5024 dvp = nd.ni_dvp;
5025 vp = nd.ni_vp;
5026
5027 p = vfs_context_proc(ctx);
5028 VATTR_INIT(&va);
5029 VATTR_SET(&va, va_type, VLNK);
5030 VATTR_SET(&va, va_mode, ACCESSPERMS & ~p->p_fd->fd_cmask);
5031
5032 #if CONFIG_MACF
5033 error = mac_vnode_check_create(ctx,
5034 dvp, &nd.ni_cnd, &va);
5035 #endif
5036 if (error != 0) {
5037 goto skipit;
5038 }
5039
5040 if (vp != NULL) {
5041 error = EEXIST;
5042 goto skipit;
5043 }
5044
5045 /* authorize */
5046 if (error == 0) {
5047 error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
5048 }
5049 /* get default ownership, etc. */
5050 if (error == 0) {
5051 error = vnode_authattr_new(dvp, &va, 0, ctx);
5052 }
5053 if (error == 0) {
5054 error = VNOP_SYMLINK(dvp, &vp, &nd.ni_cnd, &va, path, ctx);
5055 }
5056
5057 #if CONFIG_MACF
5058 if (error == 0 && vp) {
5059 error = vnode_label(vnode_mount(vp), dvp, vp, &nd.ni_cnd, VNODE_LABEL_CREATE, ctx);
5060 }
5061 #endif
5062
5063 /* do fallback attribute handling */
5064 if (error == 0 && vp) {
5065 error = vnode_setattr_fallback(vp, &va, ctx);
5066 }
5067
5068 if (error == 0) {
5069 int update_flags = 0;
5070
5071 /*check if a new vnode was created, else try to get one*/
5072 if (vp == NULL) {
5073 nd.ni_cnd.cn_nameiop = LOOKUP;
5074 #if CONFIG_TRIGGERS
5075 nd.ni_op = OP_LOOKUP;
5076 #endif
5077 nd.ni_cnd.cn_flags = 0;
5078 error = nameiat(&nd, fd);
5079 vp = nd.ni_vp;
5080
5081 if (vp == NULL) {
5082 goto skipit;
5083 }
5084 }
5085
5086 #if 0 /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
5087 /* call out to allow 3rd party notification of rename.
5088 * Ignore result of kauth_authorize_fileop call.
5089 */
5090 if (kauth_authorize_fileop_has_listeners() &&
5091 namei(&nd) == 0) {
5092 char *new_link_path = NULL;
5093 int len;
5094
5095 /* build the path to the new link file */
5096 new_link_path = get_pathbuff();
5097 len = MAXPATHLEN;
5098 vn_getpath(dvp, new_link_path, &len);
5099 if ((len + 1 + nd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) {
5100 new_link_path[len - 1] = '/';
5101 strlcpy(&new_link_path[len], nd.ni_cnd.cn_nameptr, MAXPATHLEN - len);
5102 }
5103
5104 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_SYMLINK,
5105 (uintptr_t)path, (uintptr_t)new_link_path);
5106 if (new_link_path != NULL) {
5107 release_pathbuff(new_link_path);
5108 }
5109 }
5110 #endif
5111 // Make sure the name & parent pointers are hooked up
5112 if (vp->v_name == NULL) {
5113 update_flags |= VNODE_UPDATE_NAME;
5114 }
5115 if (vp->v_parent == NULLVP) {
5116 update_flags |= VNODE_UPDATE_PARENT;
5117 }
5118
5119 if (update_flags) {
5120 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
5121 }
5122
5123 #if CONFIG_FSE
5124 add_fsevent(FSE_CREATE_FILE, ctx,
5125 FSE_ARG_VNODE, vp,
5126 FSE_ARG_DONE);
5127 #endif
5128 }
5129
5130 skipit:
5131 /*
5132 * nameidone has to happen before we vnode_put(dvp)
5133 * since it may need to release the fs_nodelock on the dvp
5134 */
5135 nameidone(&nd);
5136
5137 if (vp) {
5138 vnode_put(vp);
5139 }
5140 vnode_put(dvp);
5141 out:
5142 if (path && (path != (char *)path_data)) {
5143 FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
5144 }
5145
5146 return error;
5147 }
5148
5149 int
5150 symlink(__unused proc_t p, struct symlink_args *uap, __unused int32_t *retval)
5151 {
5152 return symlinkat_internal(vfs_context_current(), uap->path, AT_FDCWD,
5153 uap->link, UIO_USERSPACE);
5154 }
5155
5156 int
5157 symlinkat(__unused proc_t p, struct symlinkat_args *uap,
5158 __unused int32_t *retval)
5159 {
5160 return symlinkat_internal(vfs_context_current(), uap->path1, uap->fd,
5161 uap->path2, UIO_USERSPACE);
5162 }
5163
5164 /*
5165 * Delete a whiteout from the filesystem.
5166 * No longer supported.
5167 */
5168 int
5169 undelete(__unused proc_t p, __unused struct undelete_args *uap, __unused int32_t *retval)
5170 {
5171 return ENOTSUP;
5172 }
5173
5174 /*
5175 * Delete a name from the filesystem.
5176 */
5177 /* ARGSUSED */
5178 static int
5179 unlinkat_internal(vfs_context_t ctx, int fd, vnode_t start_dvp,
5180 user_addr_t path_arg, enum uio_seg segflg, int unlink_flags)
5181 {
5182 struct nameidata nd;
5183 vnode_t vp, dvp;
5184 int error;
5185 struct componentname *cnp;
5186 char *path = NULL;
5187 char *no_firmlink_path = NULL;
5188 int len_path = 0;
5189 int len_no_firmlink_path = 0;
5190 #if CONFIG_FSE
5191 fse_info finfo;
5192 struct vnode_attr va;
5193 #endif
5194 int flags;
5195 int need_event;
5196 int has_listeners;
5197 int truncated_path;
5198 int truncated_no_firmlink_path;
5199 int batched;
5200 struct vnode_attr *vap;
5201 int do_retry;
5202 int retry_count = 0;
5203 int cn_flags;
5204
5205 cn_flags = LOCKPARENT;
5206 if (!(unlink_flags & VNODE_REMOVE_NO_AUDIT_PATH)) {
5207 cn_flags |= AUDITVNPATH1;
5208 }
5209 /* If a starting dvp is passed, it trumps any fd passed. */
5210 if (start_dvp) {
5211 cn_flags |= USEDVP;
5212 }
5213
5214 #if NAMEDRSRCFORK
5215 /* unlink or delete is allowed on rsrc forks and named streams */
5216 cn_flags |= CN_ALLOWRSRCFORK;
5217 #endif
5218
5219 retry:
5220 do_retry = 0;
5221 flags = 0;
5222 need_event = 0;
5223 has_listeners = 0;
5224 truncated_path = 0;
5225 truncated_no_firmlink_path = 0;
5226 vap = NULL;
5227
5228 NDINIT(&nd, DELETE, OP_UNLINK, cn_flags, segflg, path_arg, ctx);
5229
5230 nd.ni_dvp = start_dvp;
5231 nd.ni_flag |= NAMEI_COMPOUNDREMOVE;
5232 cnp = &nd.ni_cnd;
5233
5234 continue_lookup:
5235 error = nameiat(&nd, fd);
5236 if (error) {
5237 return error;
5238 }
5239
5240 dvp = nd.ni_dvp;
5241 vp = nd.ni_vp;
5242
5243
5244 /* With Carbon delete semantics, busy files cannot be deleted */
5245 if (unlink_flags & VNODE_REMOVE_NODELETEBUSY) {
5246 flags |= VNODE_REMOVE_NODELETEBUSY;
5247 }
5248
5249 /* Skip any potential upcalls if told to. */
5250 if (unlink_flags & VNODE_REMOVE_SKIP_NAMESPACE_EVENT) {
5251 flags |= VNODE_REMOVE_SKIP_NAMESPACE_EVENT;
5252 }
5253
5254 if (vp) {
5255 batched = vnode_compound_remove_available(vp);
5256 /*
5257 * The root of a mounted filesystem cannot be deleted.
5258 */
5259 if ((vp->v_flag & VROOT) || (dvp->v_mount != vp->v_mount)) {
5260 error = EBUSY;
5261 goto out;
5262 }
5263
5264 #if DEVELOPMENT || DEBUG
5265 /*
5266 * XXX VSWAP: Check for entitlements or special flag here
5267 * so we can restrict access appropriately.
5268 */
5269 #else /* DEVELOPMENT || DEBUG */
5270
5271 if (vnode_isswap(vp) && (ctx != vfs_context_kernel())) {
5272 error = EPERM;
5273 goto out;
5274 }
5275 #endif /* DEVELOPMENT || DEBUG */
5276
5277 if (!batched) {
5278 error = vn_authorize_unlink(dvp, vp, cnp, ctx, NULL);
5279 if (error) {
5280 if (error == ENOENT) {
5281 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
5282 do_retry = 1;
5283 retry_count++;
5284 }
5285 }
5286 goto out;
5287 }
5288 }
5289 } else {
5290 batched = 1;
5291
5292 if (!vnode_compound_remove_available(dvp)) {
5293 panic("No vp, but no compound remove?");
5294 }
5295 }
5296
5297 #if CONFIG_FSE
5298 need_event = need_fsevent(FSE_DELETE, dvp);
5299 if (need_event) {
5300 if (!batched) {
5301 if ((vp->v_flag & VISHARDLINK) == 0) {
5302 /* XXX need to get these data in batched VNOP */
5303 get_fse_info(vp, &finfo, ctx);
5304 }
5305 } else {
5306 error = vfs_get_notify_attributes(&va);
5307 if (error) {
5308 goto out;
5309 }
5310
5311 vap = &va;
5312 }
5313 }
5314 #endif
5315 has_listeners = kauth_authorize_fileop_has_listeners();
5316 if (need_event || has_listeners) {
5317 if (path == NULL) {
5318 GET_PATH(path);
5319 if (path == NULL) {
5320 error = ENOMEM;
5321 goto out;
5322 }
5323 }
5324 len_path = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated_path);
5325 if (no_firmlink_path == NULL) {
5326 GET_PATH(no_firmlink_path);
5327 if (no_firmlink_path == NULL) {
5328 error = ENOMEM;
5329 goto out;
5330 }
5331 }
5332 len_no_firmlink_path = safe_getpath_no_firmlink(dvp, nd.ni_cnd.cn_nameptr, no_firmlink_path, MAXPATHLEN, &truncated_no_firmlink_path);
5333 }
5334
5335 #if NAMEDRSRCFORK
5336 if (nd.ni_cnd.cn_flags & CN_WANTSRSRCFORK) {
5337 error = vnode_removenamedstream(dvp, vp, XATTR_RESOURCEFORK_NAME, 0, ctx);
5338 } else
5339 #endif
5340 {
5341 error = vn_remove(dvp, &nd.ni_vp, &nd, flags, vap, ctx);
5342 vp = nd.ni_vp;
5343 if (error == EKEEPLOOKING) {
5344 if (!batched) {
5345 panic("EKEEPLOOKING, but not a filesystem that supports compound VNOPs?");
5346 }
5347
5348 if ((nd.ni_flag & NAMEI_CONTLOOKUP) == 0) {
5349 panic("EKEEPLOOKING, but continue flag not set?");
5350 }
5351
5352 if (vnode_isdir(vp)) {
5353 error = EISDIR;
5354 goto out;
5355 }
5356 goto continue_lookup;
5357 } else if (error == ENOENT && batched) {
5358 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
5359 /*
5360 * For compound VNOPs, the authorization callback may
5361 * return ENOENT in case of racing hardlink lookups
5362 * hitting the name cache, redrive the lookup.
5363 */
5364 do_retry = 1;
5365 retry_count += 1;
5366 goto out;
5367 }
5368 }
5369 }
5370
5371 /*
5372 * Call out to allow 3rd party notification of delete.
5373 * Ignore result of kauth_authorize_fileop call.
5374 */
5375 if (!error) {
5376 if (has_listeners) {
5377 kauth_authorize_fileop(vfs_context_ucred(ctx),
5378 KAUTH_FILEOP_DELETE,
5379 (uintptr_t)vp,
5380 (uintptr_t)path);
5381 }
5382
5383 if (vp->v_flag & VISHARDLINK) {
5384 //
5385 // if a hardlink gets deleted we want to blow away the
5386 // v_parent link because the path that got us to this
5387 // instance of the link is no longer valid. this will
5388 // force the next call to get the path to ask the file
5389 // system instead of just following the v_parent link.
5390 //
5391 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
5392 }
5393
5394 #if CONFIG_FSE
5395 if (need_event) {
5396 if (vp->v_flag & VISHARDLINK) {
5397 get_fse_info(vp, &finfo, ctx);
5398 } else if (vap) {
5399 vnode_get_fse_info_from_vap(vp, &finfo, vap);
5400 }
5401 if (truncated_path) {
5402 finfo.mode |= FSE_TRUNCATED_PATH;
5403 }
5404 add_fsevent(FSE_DELETE, ctx,
5405 FSE_ARG_STRING, len_no_firmlink_path, no_firmlink_path,
5406 FSE_ARG_FINFO, &finfo,
5407 FSE_ARG_DONE);
5408 }
5409 #endif
5410 }
5411
5412 out:
5413 if (path != NULL) {
5414 RELEASE_PATH(path);
5415 path = NULL;
5416 }
5417
5418 if (no_firmlink_path != NULL) {
5419 RELEASE_PATH(no_firmlink_path);
5420 no_firmlink_path = NULL;
5421 }
5422 #if NAMEDRSRCFORK
5423 /* recycle the deleted rsrc fork vnode to force a reclaim, which
5424 * will cause its shadow file to go away if necessary.
5425 */
5426 if (vp && (vnode_isnamedstream(vp)) &&
5427 (vp->v_parent != NULLVP) &&
5428 vnode_isshadow(vp)) {
5429 vnode_recycle(vp);
5430 }
5431 #endif
5432 /*
5433 * nameidone has to happen before we vnode_put(dvp)
5434 * since it may need to release the fs_nodelock on the dvp
5435 */
5436 nameidone(&nd);
5437 vnode_put(dvp);
5438 if (vp) {
5439 vnode_put(vp);
5440 }
5441
5442 if (do_retry) {
5443 goto retry;
5444 }
5445
5446 return error;
5447 }
5448
5449 int
5450 unlink1(vfs_context_t ctx, vnode_t start_dvp, user_addr_t path_arg,
5451 enum uio_seg segflg, int unlink_flags)
5452 {
5453 return unlinkat_internal(ctx, AT_FDCWD, start_dvp, path_arg, segflg,
5454 unlink_flags);
5455 }
5456
5457 /*
5458 * Delete a name from the filesystem using Carbon semantics.
5459 */
5460 int
5461 delete(__unused proc_t p, struct delete_args *uap, __unused int32_t *retval)
5462 {
5463 return unlinkat_internal(vfs_context_current(), AT_FDCWD, NULLVP,
5464 uap->path, UIO_USERSPACE, VNODE_REMOVE_NODELETEBUSY);
5465 }
5466
5467 /*
5468 * Delete a name from the filesystem using POSIX semantics.
5469 */
5470 int
5471 unlink(__unused proc_t p, struct unlink_args *uap, __unused int32_t *retval)
5472 {
5473 return unlinkat_internal(vfs_context_current(), AT_FDCWD, NULLVP,
5474 uap->path, UIO_USERSPACE, 0);
5475 }
5476
5477 int
5478 unlinkat(__unused proc_t p, struct unlinkat_args *uap, __unused int32_t *retval)
5479 {
5480 if (uap->flag & ~(AT_REMOVEDIR | AT_REMOVEDIR_DATALESS)) {
5481 return EINVAL;
5482 }
5483
5484 if (uap->flag & (AT_REMOVEDIR | AT_REMOVEDIR_DATALESS)) {
5485 int unlink_flags = 0;
5486
5487 if (uap->flag & AT_REMOVEDIR_DATALESS) {
5488 unlink_flags |= VNODE_REMOVE_DATALESS_DIR;
5489 }
5490 return rmdirat_internal(vfs_context_current(), uap->fd,
5491 uap->path, UIO_USERSPACE, unlink_flags);
5492 } else {
5493 return unlinkat_internal(vfs_context_current(), uap->fd,
5494 NULLVP, uap->path, UIO_USERSPACE, 0);
5495 }
5496 }
5497
5498 /*
5499 * Reposition read/write file offset.
5500 */
5501 int
5502 lseek(proc_t p, struct lseek_args *uap, off_t *retval)
5503 {
5504 struct fileproc *fp;
5505 vnode_t vp;
5506 struct vfs_context *ctx;
5507 off_t offset = uap->offset, file_size;
5508 int error;
5509
5510 if ((error = fp_getfvp(p, uap->fd, &fp, &vp))) {
5511 if (error == ENOTSUP) {
5512 return ESPIPE;
5513 }
5514 return error;
5515 }
5516 if (vnode_isfifo(vp)) {
5517 file_drop(uap->fd);
5518 return ESPIPE;
5519 }
5520
5521
5522 ctx = vfs_context_current();
5523 #if CONFIG_MACF
5524 if (uap->whence == L_INCR && uap->offset == 0) {
5525 error = mac_file_check_get_offset(vfs_context_ucred(ctx),
5526 fp->f_fglob);
5527 } else {
5528 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
5529 fp->f_fglob);
5530 }
5531 if (error) {
5532 file_drop(uap->fd);
5533 return error;
5534 }
5535 #endif
5536 if ((error = vnode_getwithref(vp))) {
5537 file_drop(uap->fd);
5538 return error;
5539 }
5540
5541 switch (uap->whence) {
5542 case L_INCR:
5543 offset += fp->f_fglob->fg_offset;
5544 break;
5545 case L_XTND:
5546 if ((error = vnode_size(vp, &file_size, ctx)) != 0) {
5547 break;
5548 }
5549 offset += file_size;
5550 break;
5551 case L_SET:
5552 break;
5553 case SEEK_HOLE:
5554 error = VNOP_IOCTL(vp, FSIOC_FIOSEEKHOLE, (caddr_t)&offset, 0, ctx);
5555 break;
5556 case SEEK_DATA:
5557 error = VNOP_IOCTL(vp, FSIOC_FIOSEEKDATA, (caddr_t)&offset, 0, ctx);
5558 break;
5559 default:
5560 error = EINVAL;
5561 }
5562 if (error == 0) {
5563 if (uap->offset > 0 && offset < 0) {
5564 /* Incremented/relative move past max size */
5565 error = EOVERFLOW;
5566 } else {
5567 /*
5568 * Allow negative offsets on character devices, per
5569 * POSIX 1003.1-2001. Most likely for writing disk
5570 * labels.
5571 */
5572 if (offset < 0 && vp->v_type != VCHR) {
5573 /* Decremented/relative move before start */
5574 error = EINVAL;
5575 } else {
5576 /* Success */
5577 fp->f_fglob->fg_offset = offset;
5578 *retval = fp->f_fglob->fg_offset;
5579 }
5580 }
5581 }
5582
5583 /*
5584 * An lseek can affect whether data is "available to read." Use
5585 * hint of NOTE_NONE so no EVFILT_VNODE events fire
5586 */
5587 post_event_if_success(vp, error, NOTE_NONE);
5588 (void)vnode_put(vp);
5589 file_drop(uap->fd);
5590 return error;
5591 }
5592
5593
5594 /*
5595 * Check access permissions.
5596 *
5597 * Returns: 0 Success
5598 * vnode_authorize:???
5599 */
5600 static int
5601 access1(vnode_t vp, vnode_t dvp, int uflags, vfs_context_t ctx)
5602 {
5603 kauth_action_t action;
5604 int error;
5605
5606 /*
5607 * If just the regular access bits, convert them to something
5608 * that vnode_authorize will understand.
5609 */
5610 if (!(uflags & _ACCESS_EXTENDED_MASK)) {
5611 action = 0;
5612 if (uflags & R_OK) {
5613 action |= KAUTH_VNODE_READ_DATA; /* aka KAUTH_VNODE_LIST_DIRECTORY */
5614 }
5615 if (uflags & W_OK) {
5616 if (vnode_isdir(vp)) {
5617 action |= KAUTH_VNODE_ADD_FILE |
5618 KAUTH_VNODE_ADD_SUBDIRECTORY;
5619 /* might want delete rights here too */
5620 } else {
5621 action |= KAUTH_VNODE_WRITE_DATA;
5622 }
5623 }
5624 if (uflags & X_OK) {
5625 if (vnode_isdir(vp)) {
5626 action |= KAUTH_VNODE_SEARCH;
5627 } else {
5628 action |= KAUTH_VNODE_EXECUTE;
5629 }
5630 }
5631 } else {
5632 /* take advantage of definition of uflags */
5633 action = uflags >> 8;
5634 }
5635
5636 #if CONFIG_MACF
5637 error = mac_vnode_check_access(ctx, vp, uflags);
5638 if (error) {
5639 return error;
5640 }
5641 #endif /* MAC */
5642
5643 /* action == 0 means only check for existence */
5644 if (action != 0) {
5645 error = vnode_authorize(vp, dvp, action | KAUTH_VNODE_ACCESS, ctx);
5646 } else {
5647 error = 0;
5648 }
5649
5650 return error;
5651 }
5652
5653
5654
5655 /*
5656 * access_extended: Check access permissions in bulk.
5657 *
5658 * Description: uap->entries Pointer to an array of accessx
5659 * descriptor structs, plus one or
5660 * more NULL terminated strings (see
5661 * "Notes" section below).
5662 * uap->size Size of the area pointed to by
5663 * uap->entries.
5664 * uap->results Pointer to the results array.
5665 *
5666 * Returns: 0 Success
5667 * ENOMEM Insufficient memory
5668 * EINVAL Invalid arguments
5669 * namei:EFAULT Bad address
5670 * namei:ENAMETOOLONG Filename too long
5671 * namei:ENOENT No such file or directory
5672 * namei:ELOOP Too many levels of symbolic links
5673 * namei:EBADF Bad file descriptor
5674 * namei:ENOTDIR Not a directory
5675 * namei:???
5676 * access1:
5677 *
5678 * Implicit returns:
5679 * uap->results Array contents modified
5680 *
5681 * Notes: The uap->entries are structured as an arbitrary length array
5682 * of accessx descriptors, followed by one or more NULL terminated
5683 * strings
5684 *
5685 * struct accessx_descriptor[0]
5686 * ...
5687 * struct accessx_descriptor[n]
5688 * char name_data[0];
5689 *
5690 * We determine the entry count by walking the buffer containing
5691 * the uap->entries argument descriptor. For each descriptor we
5692 * see, the valid values for the offset ad_name_offset will be
5693 * in the byte range:
5694 *
5695 * [ uap->entries + sizeof(struct accessx_descriptor) ]
5696 * to
5697 * [ uap->entries + uap->size - 2 ]
5698 *
5699 * since we must have at least one string, and the string must
5700 * be at least one character plus the NULL terminator in length.
5701 *
5702 * XXX: Need to support the check-as uid argument
5703 */
5704 int
5705 access_extended(__unused proc_t p, struct access_extended_args *uap, __unused int32_t *retval)
5706 {
5707 struct accessx_descriptor *input = NULL;
5708 errno_t *result = NULL;
5709 errno_t error = 0;
5710 int wantdelete = 0;
5711 unsigned int desc_max, desc_actual, i, j;
5712 struct vfs_context context;
5713 struct nameidata nd;
5714 int niopts;
5715 vnode_t vp = NULL;
5716 vnode_t dvp = NULL;
5717 #define ACCESSX_MAX_DESCR_ON_STACK 10
5718 struct accessx_descriptor stack_input[ACCESSX_MAX_DESCR_ON_STACK];
5719
5720 context.vc_ucred = NULL;
5721
5722 /*
5723 * Validate parameters; if valid, copy the descriptor array and string
5724 * arguments into local memory. Before proceeding, the following
5725 * conditions must have been met:
5726 *
5727 * o The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
5728 * o There must be sufficient room in the request for at least one
5729 * descriptor and a one yte NUL terminated string.
5730 * o The allocation of local storage must not fail.
5731 */
5732 if (uap->size > ACCESSX_MAX_TABLESIZE) {
5733 return ENOMEM;
5734 }
5735 if (uap->size < (sizeof(struct accessx_descriptor) + 2)) {
5736 return EINVAL;
5737 }
5738 if (uap->size <= sizeof(stack_input)) {
5739 input = stack_input;
5740 } else {
5741 MALLOC(input, struct accessx_descriptor *, uap->size, M_TEMP, M_WAITOK);
5742 if (input == NULL) {
5743 error = ENOMEM;
5744 goto out;
5745 }
5746 }
5747 error = copyin(uap->entries, input, uap->size);
5748 if (error) {
5749 goto out;
5750 }
5751
5752 AUDIT_ARG(opaque, input, uap->size);
5753
5754 /*
5755 * Force NUL termination of the copyin buffer to avoid nami() running
5756 * off the end. If the caller passes us bogus data, they may get a
5757 * bogus result.
5758 */
5759 ((char *)input)[uap->size - 1] = 0;
5760
5761 /*
5762 * Access is defined as checking against the process' real identity,
5763 * even if operations are checking the effective identity. This
5764 * requires that we use a local vfs context.
5765 */
5766 context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
5767 context.vc_thread = current_thread();
5768
5769 /*
5770 * Find out how many entries we have, so we can allocate the result
5771 * array by walking the list and adjusting the count downward by the
5772 * earliest string offset we see.
5773 */
5774 desc_max = (uap->size - 2) / sizeof(struct accessx_descriptor);
5775 desc_actual = desc_max;
5776 for (i = 0; i < desc_actual; i++) {
5777 /*
5778 * Take the offset to the name string for this entry and
5779 * convert to an input array index, which would be one off
5780 * the end of the array if this entry was the lowest-addressed
5781 * name string.
5782 */
5783 j = input[i].ad_name_offset / sizeof(struct accessx_descriptor);
5784
5785 /*
5786 * An offset greater than the max allowable offset is an error.
5787 * It is also an error for any valid entry to point
5788 * to a location prior to the end of the current entry, if
5789 * it's not a reference to the string of the previous entry.
5790 */
5791 if (j > desc_max || (j != 0 && j <= i)) {
5792 error = EINVAL;
5793 goto out;
5794 }
5795
5796 /* Also do not let ad_name_offset point to something beyond the size of the input */
5797 if (input[i].ad_name_offset >= uap->size) {
5798 error = EINVAL;
5799 goto out;
5800 }
5801
5802 /*
5803 * An offset of 0 means use the previous descriptor's offset;
5804 * this is used to chain multiple requests for the same file
5805 * to avoid multiple lookups.
5806 */
5807 if (j == 0) {
5808 /* This is not valid for the first entry */
5809 if (i == 0) {
5810 error = EINVAL;
5811 goto out;
5812 }
5813 continue;
5814 }
5815
5816 /*
5817 * If the offset of the string for this descriptor is before
5818 * what we believe is the current actual last descriptor,
5819 * then we need to adjust our estimate downward; this permits
5820 * the string table following the last descriptor to be out
5821 * of order relative to the descriptor list.
5822 */
5823 if (j < desc_actual) {
5824 desc_actual = j;
5825 }
5826 }
5827
5828 /*
5829 * We limit the actual number of descriptors we are willing to process
5830 * to a hard maximum of ACCESSX_MAX_DESCRIPTORS. If the number being
5831 * requested does not exceed this limit,
5832 */
5833 if (desc_actual > ACCESSX_MAX_DESCRIPTORS) {
5834 error = ENOMEM;
5835 goto out;
5836 }
5837 MALLOC(result, errno_t *, desc_actual * sizeof(errno_t), M_TEMP, M_WAITOK | M_ZERO);
5838 if (result == NULL) {
5839 error = ENOMEM;
5840 goto out;
5841 }
5842
5843 /*
5844 * Do the work by iterating over the descriptor entries we know to
5845 * at least appear to contain valid data.
5846 */
5847 error = 0;
5848 for (i = 0; i < desc_actual; i++) {
5849 /*
5850 * If the ad_name_offset is 0, then we use the previous
5851 * results to make the check; otherwise, we are looking up
5852 * a new file name.
5853 */
5854 if (input[i].ad_name_offset != 0) {
5855 /* discard old vnodes */
5856 if (vp) {
5857 vnode_put(vp);
5858 vp = NULL;
5859 }
5860 if (dvp) {
5861 vnode_put(dvp);
5862 dvp = NULL;
5863 }
5864
5865 /*
5866 * Scan forward in the descriptor list to see if we
5867 * need the parent vnode. We will need it if we are
5868 * deleting, since we must have rights to remove
5869 * entries in the parent directory, as well as the
5870 * rights to delete the object itself.
5871 */
5872 wantdelete = input[i].ad_flags & _DELETE_OK;
5873 for (j = i + 1; (j < desc_actual) && (input[j].ad_name_offset == 0); j++) {
5874 if (input[j].ad_flags & _DELETE_OK) {
5875 wantdelete = 1;
5876 }
5877 }
5878
5879 niopts = FOLLOW | AUDITVNPATH1;
5880
5881 /* need parent for vnode_authorize for deletion test */
5882 if (wantdelete) {
5883 niopts |= WANTPARENT;
5884 }
5885
5886 /* do the lookup */
5887 NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, UIO_SYSSPACE,
5888 CAST_USER_ADDR_T(((const char *)input) + input[i].ad_name_offset),
5889 &context);
5890 error = namei(&nd);
5891 if (!error) {
5892 vp = nd.ni_vp;
5893 if (wantdelete) {
5894 dvp = nd.ni_dvp;
5895 }
5896 }
5897 nameidone(&nd);
5898 }
5899
5900 /*
5901 * Handle lookup errors.
5902 */
5903 switch (error) {
5904 case ENOENT:
5905 case EACCES:
5906 case EPERM:
5907 case ENOTDIR:
5908 result[i] = error;
5909 break;
5910 case 0:
5911 /* run this access check */
5912 result[i] = access1(vp, dvp, input[i].ad_flags, &context);
5913 break;
5914 default:
5915 /* fatal lookup error */
5916
5917 goto out;
5918 }
5919 }
5920
5921 AUDIT_ARG(data, result, sizeof(errno_t), desc_actual);
5922
5923 /* copy out results */
5924 error = copyout(result, uap->results, desc_actual * sizeof(errno_t));
5925
5926 out:
5927 if (input && input != stack_input) {
5928 FREE(input, M_TEMP);
5929 }
5930 if (result) {
5931 FREE(result, M_TEMP);
5932 }
5933 if (vp) {
5934 vnode_put(vp);
5935 }
5936 if (dvp) {
5937 vnode_put(dvp);
5938 }
5939 if (IS_VALID_CRED(context.vc_ucred)) {
5940 kauth_cred_unref(&context.vc_ucred);
5941 }
5942 return error;
5943 }
5944
5945
5946 /*
5947 * Returns: 0 Success
5948 * namei:EFAULT Bad address
5949 * namei:ENAMETOOLONG Filename too long
5950 * namei:ENOENT No such file or directory
5951 * namei:ELOOP Too many levels of symbolic links
5952 * namei:EBADF Bad file descriptor
5953 * namei:ENOTDIR Not a directory
5954 * namei:???
5955 * access1:
5956 */
5957 static int
5958 faccessat_internal(vfs_context_t ctx, int fd, user_addr_t path, int amode,
5959 int flag, enum uio_seg segflg)
5960 {
5961 int error;
5962 struct nameidata nd;
5963 int niopts;
5964 struct vfs_context context;
5965 #if NAMEDRSRCFORK
5966 int is_namedstream = 0;
5967 #endif
5968
5969 /*
5970 * Unless the AT_EACCESS option is used, Access is defined as checking
5971 * against the process' real identity, even if operations are checking
5972 * the effective identity. So we need to tweak the credential
5973 * in the context for that case.
5974 */
5975 if (!(flag & AT_EACCESS)) {
5976 context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
5977 } else {
5978 context.vc_ucred = ctx->vc_ucred;
5979 }
5980 context.vc_thread = ctx->vc_thread;
5981
5982
5983 niopts = (flag & AT_SYMLINK_NOFOLLOW ? NOFOLLOW : FOLLOW) | AUDITVNPATH1;
5984 /* need parent for vnode_authorize for deletion test */
5985 if (amode & _DELETE_OK) {
5986 niopts |= WANTPARENT;
5987 }
5988 NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, segflg,
5989 path, &context);
5990
5991 #if NAMEDRSRCFORK
5992 /* access(F_OK) calls are allowed for resource forks. */
5993 if (amode == F_OK) {
5994 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
5995 }
5996 #endif
5997 error = nameiat(&nd, fd);
5998 if (error) {
5999 goto out;
6000 }
6001
6002 #if NAMEDRSRCFORK
6003 /* Grab reference on the shadow stream file vnode to
6004 * force an inactive on release which will mark it
6005 * for recycle.
6006 */
6007 if (vnode_isnamedstream(nd.ni_vp) &&
6008 (nd.ni_vp->v_parent != NULLVP) &&
6009 vnode_isshadow(nd.ni_vp)) {
6010 is_namedstream = 1;
6011 vnode_ref(nd.ni_vp);
6012 }
6013 #endif
6014
6015 error = access1(nd.ni_vp, nd.ni_dvp, amode, &context);
6016
6017 #if NAMEDRSRCFORK
6018 if (is_namedstream) {
6019 vnode_rele(nd.ni_vp);
6020 }
6021 #endif
6022
6023 vnode_put(nd.ni_vp);
6024 if (amode & _DELETE_OK) {
6025 vnode_put(nd.ni_dvp);
6026 }
6027 nameidone(&nd);
6028
6029 out:
6030 if (!(flag & AT_EACCESS)) {
6031 kauth_cred_unref(&context.vc_ucred);
6032 }
6033 return error;
6034 }
6035
6036 int
6037 access(__unused proc_t p, struct access_args *uap, __unused int32_t *retval)
6038 {
6039 return faccessat_internal(vfs_context_current(), AT_FDCWD,
6040 uap->path, uap->flags, 0, UIO_USERSPACE);
6041 }
6042
6043 int
6044 faccessat(__unused proc_t p, struct faccessat_args *uap,
6045 __unused int32_t *retval)
6046 {
6047 if (uap->flag & ~(AT_EACCESS | AT_SYMLINK_NOFOLLOW)) {
6048 return EINVAL;
6049 }
6050
6051 return faccessat_internal(vfs_context_current(), uap->fd,
6052 uap->path, uap->amode, uap->flag, UIO_USERSPACE);
6053 }
6054
6055 /*
6056 * Returns: 0 Success
6057 * EFAULT
6058 * copyout:EFAULT
6059 * namei:???
6060 * vn_stat:???
6061 */
6062 static int
6063 fstatat_internal(vfs_context_t ctx, user_addr_t path, user_addr_t ub,
6064 user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64,
6065 enum uio_seg segflg, int fd, int flag)
6066 {
6067 struct nameidata nd;
6068 int follow;
6069 union {
6070 struct stat sb;
6071 struct stat64 sb64;
6072 } source = {};
6073 union {
6074 struct user64_stat user64_sb;
6075 struct user32_stat user32_sb;
6076 struct user64_stat64 user64_sb64;
6077 struct user32_stat64 user32_sb64;
6078 } dest = {};
6079 caddr_t sbp;
6080 int error, my_size;
6081 kauth_filesec_t fsec;
6082 size_t xsecurity_bufsize;
6083 void * statptr;
6084 struct fileproc *fp = NULL;
6085 int needsrealdev = 0;
6086
6087 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
6088 NDINIT(&nd, LOOKUP, OP_GETATTR, follow | AUDITVNPATH1,
6089 segflg, path, ctx);
6090
6091 #if NAMEDRSRCFORK
6092 int is_namedstream = 0;
6093 /* stat calls are allowed for resource forks. */
6094 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
6095 #endif
6096
6097 if (flag & AT_FDONLY) {
6098 vnode_t fvp;
6099
6100 error = fp_getfvp(vfs_context_proc(ctx), fd, &fp, &fvp);
6101 if (error) {
6102 return error;
6103 }
6104 if ((error = vnode_getwithref(fvp))) {
6105 file_drop(fd);
6106 return error;
6107 }
6108 nd.ni_vp = fvp;
6109 } else {
6110 error = nameiat(&nd, fd);
6111 if (error) {
6112 return error;
6113 }
6114 }
6115 fsec = KAUTH_FILESEC_NONE;
6116
6117 statptr = (void *)&source;
6118
6119 #if NAMEDRSRCFORK
6120 /* Grab reference on the shadow stream file vnode to
6121 * force an inactive on release which will mark it
6122 * for recycle.
6123 */
6124 if (vnode_isnamedstream(nd.ni_vp) &&
6125 (nd.ni_vp->v_parent != NULLVP) &&
6126 vnode_isshadow(nd.ni_vp)) {
6127 is_namedstream = 1;
6128 vnode_ref(nd.ni_vp);
6129 }
6130 #endif
6131
6132 needsrealdev = flag & AT_REALDEV ? 1 : 0;
6133 if (fp && (xsecurity == USER_ADDR_NULL)) {
6134 /*
6135 * If the caller has the file open, and is not
6136 * requesting extended security information, we are
6137 * going to let them get the basic stat information.
6138 */
6139 error = vn_stat_noauth(nd.ni_vp, statptr, NULL, isstat64, needsrealdev, ctx,
6140 fp->f_fglob->fg_cred);
6141 } else {
6142 error = vn_stat(nd.ni_vp, statptr, (xsecurity != USER_ADDR_NULL ? &fsec : NULL),
6143 isstat64, needsrealdev, ctx);
6144 }
6145
6146 #if NAMEDRSRCFORK
6147 if (is_namedstream) {
6148 vnode_rele(nd.ni_vp);
6149 }
6150 #endif
6151 vnode_put(nd.ni_vp);
6152 nameidone(&nd);
6153 if (fp) {
6154 file_drop(fd);
6155 fp = NULL;
6156 }
6157
6158 if (error) {
6159 return error;
6160 }
6161 /* Zap spare fields */
6162 if (isstat64 != 0) {
6163 source.sb64.st_lspare = 0;
6164 source.sb64.st_qspare[0] = 0LL;
6165 source.sb64.st_qspare[1] = 0LL;
6166 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
6167 munge_user64_stat64(&source.sb64, &dest.user64_sb64);
6168 my_size = sizeof(dest.user64_sb64);
6169 sbp = (caddr_t)&dest.user64_sb64;
6170 } else {
6171 munge_user32_stat64(&source.sb64, &dest.user32_sb64);
6172 my_size = sizeof(dest.user32_sb64);
6173 sbp = (caddr_t)&dest.user32_sb64;
6174 }
6175 /*
6176 * Check if we raced (post lookup) against the last unlink of a file.
6177 */
6178 if ((source.sb64.st_nlink == 0) && S_ISREG(source.sb64.st_mode)) {
6179 source.sb64.st_nlink = 1;
6180 }
6181 } else {
6182 source.sb.st_lspare = 0;
6183 source.sb.st_qspare[0] = 0LL;
6184 source.sb.st_qspare[1] = 0LL;
6185 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
6186 munge_user64_stat(&source.sb, &dest.user64_sb);
6187 my_size = sizeof(dest.user64_sb);
6188 sbp = (caddr_t)&dest.user64_sb;
6189 } else {
6190 munge_user32_stat(&source.sb, &dest.user32_sb);
6191 my_size = sizeof(dest.user32_sb);
6192 sbp = (caddr_t)&dest.user32_sb;
6193 }
6194
6195 /*
6196 * Check if we raced (post lookup) against the last unlink of a file.
6197 */
6198 if ((source.sb.st_nlink == 0) && S_ISREG(source.sb.st_mode)) {
6199 source.sb.st_nlink = 1;
6200 }
6201 }
6202 if ((error = copyout(sbp, ub, my_size)) != 0) {
6203 goto out;
6204 }
6205
6206 /* caller wants extended security information? */
6207 if (xsecurity != USER_ADDR_NULL) {
6208 /* did we get any? */
6209 if (fsec == KAUTH_FILESEC_NONE) {
6210 if (susize(xsecurity_size, 0) != 0) {
6211 error = EFAULT;
6212 goto out;
6213 }
6214 } else {
6215 /* find the user buffer size */
6216 xsecurity_bufsize = fusize(xsecurity_size);
6217
6218 /* copy out the actual data size */
6219 if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
6220 error = EFAULT;
6221 goto out;
6222 }
6223
6224 /* if the caller supplied enough room, copy out to it */
6225 if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec)) {
6226 error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
6227 }
6228 }
6229 }
6230 out:
6231 if (fsec != KAUTH_FILESEC_NONE) {
6232 kauth_filesec_free(fsec);
6233 }
6234 return error;
6235 }
6236
6237 /*
6238 * stat_extended: Get file status; with extended security (ACL).
6239 *
6240 * Parameters: p (ignored)
6241 * uap User argument descriptor (see below)
6242 * retval (ignored)
6243 *
6244 * Indirect: uap->path Path of file to get status from
6245 * uap->ub User buffer (holds file status info)
6246 * uap->xsecurity ACL to get (extended security)
6247 * uap->xsecurity_size Size of ACL
6248 *
6249 * Returns: 0 Success
6250 * !0 errno value
6251 *
6252 */
6253 int
6254 stat_extended(__unused proc_t p, struct stat_extended_args *uap,
6255 __unused int32_t *retval)
6256 {
6257 return fstatat_internal(vfs_context_current(), uap->path, uap->ub,
6258 uap->xsecurity, uap->xsecurity_size, 0, UIO_USERSPACE, AT_FDCWD,
6259 0);
6260 }
6261
6262 /*
6263 * Returns: 0 Success
6264 * fstatat_internal:??? [see fstatat_internal() in this file]
6265 */
6266 int
6267 stat(__unused proc_t p, struct stat_args *uap, __unused int32_t *retval)
6268 {
6269 return fstatat_internal(vfs_context_current(), uap->path, uap->ub,
6270 0, 0, 0, UIO_USERSPACE, AT_FDCWD, 0);
6271 }
6272
6273 int
6274 stat64(__unused proc_t p, struct stat64_args *uap, __unused int32_t *retval)
6275 {
6276 return fstatat_internal(vfs_context_current(), uap->path, uap->ub,
6277 0, 0, 1, UIO_USERSPACE, AT_FDCWD, 0);
6278 }
6279
6280 /*
6281 * stat64_extended: Get file status; can handle large inode numbers; with extended security (ACL).
6282 *
6283 * Parameters: p (ignored)
6284 * uap User argument descriptor (see below)
6285 * retval (ignored)
6286 *
6287 * Indirect: uap->path Path of file to get status from
6288 * uap->ub User buffer (holds file status info)
6289 * uap->xsecurity ACL to get (extended security)
6290 * uap->xsecurity_size Size of ACL
6291 *
6292 * Returns: 0 Success
6293 * !0 errno value
6294 *
6295 */
6296 int
6297 stat64_extended(__unused proc_t p, struct stat64_extended_args *uap, __unused int32_t *retval)
6298 {
6299 return fstatat_internal(vfs_context_current(), uap->path, uap->ub,
6300 uap->xsecurity, uap->xsecurity_size, 1, UIO_USERSPACE, AT_FDCWD,
6301 0);
6302 }
6303
6304 /*
6305 * lstat_extended: Get file status; does not follow links; with extended security (ACL).
6306 *
6307 * Parameters: p (ignored)
6308 * uap User argument descriptor (see below)
6309 * retval (ignored)
6310 *
6311 * Indirect: uap->path Path of file to get status from
6312 * uap->ub User buffer (holds file status info)
6313 * uap->xsecurity ACL to get (extended security)
6314 * uap->xsecurity_size Size of ACL
6315 *
6316 * Returns: 0 Success
6317 * !0 errno value
6318 *
6319 */
6320 int
6321 lstat_extended(__unused proc_t p, struct lstat_extended_args *uap, __unused int32_t *retval)
6322 {
6323 return fstatat_internal(vfs_context_current(), uap->path, uap->ub,
6324 uap->xsecurity, uap->xsecurity_size, 0, UIO_USERSPACE, AT_FDCWD,
6325 AT_SYMLINK_NOFOLLOW);
6326 }
6327
6328 /*
6329 * Get file status; this version does not follow links.
6330 */
6331 int
6332 lstat(__unused proc_t p, struct lstat_args *uap, __unused int32_t *retval)
6333 {
6334 return fstatat_internal(vfs_context_current(), uap->path, uap->ub,
6335 0, 0, 0, UIO_USERSPACE, AT_FDCWD, AT_SYMLINK_NOFOLLOW);
6336 }
6337
6338 int
6339 lstat64(__unused proc_t p, struct lstat64_args *uap, __unused int32_t *retval)
6340 {
6341 return fstatat_internal(vfs_context_current(), uap->path, uap->ub,
6342 0, 0, 1, UIO_USERSPACE, AT_FDCWD, AT_SYMLINK_NOFOLLOW);
6343 }
6344
6345 /*
6346 * lstat64_extended: Get file status; can handle large inode numbers; does not
6347 * follow links; with extended security (ACL).
6348 *
6349 * Parameters: p (ignored)
6350 * uap User argument descriptor (see below)
6351 * retval (ignored)
6352 *
6353 * Indirect: uap->path Path of file to get status from
6354 * uap->ub User buffer (holds file status info)
6355 * uap->xsecurity ACL to get (extended security)
6356 * uap->xsecurity_size Size of ACL
6357 *
6358 * Returns: 0 Success
6359 * !0 errno value
6360 *
6361 */
6362 int
6363 lstat64_extended(__unused proc_t p, struct lstat64_extended_args *uap, __unused int32_t *retval)
6364 {
6365 return fstatat_internal(vfs_context_current(), uap->path, uap->ub,
6366 uap->xsecurity, uap->xsecurity_size, 1, UIO_USERSPACE, AT_FDCWD,
6367 AT_SYMLINK_NOFOLLOW);
6368 }
6369
6370 int
6371 fstatat(__unused proc_t p, struct fstatat_args *uap, __unused int32_t *retval)
6372 {
6373 if (uap->flag & ~(AT_SYMLINK_NOFOLLOW | AT_REALDEV | AT_FDONLY)) {
6374 return EINVAL;
6375 }
6376
6377 return fstatat_internal(vfs_context_current(), uap->path, uap->ub,
6378 0, 0, 0, UIO_USERSPACE, uap->fd, uap->flag);
6379 }
6380
6381 int
6382 fstatat64(__unused proc_t p, struct fstatat64_args *uap,
6383 __unused int32_t *retval)
6384 {
6385 if (uap->flag & ~(AT_SYMLINK_NOFOLLOW | AT_REALDEV | AT_FDONLY)) {
6386 return EINVAL;
6387 }
6388
6389 return fstatat_internal(vfs_context_current(), uap->path, uap->ub,
6390 0, 0, 1, UIO_USERSPACE, uap->fd, uap->flag);
6391 }
6392
6393 /*
6394 * Get configurable pathname variables.
6395 *
6396 * Returns: 0 Success
6397 * namei:???
6398 * vn_pathconf:???
6399 *
6400 * Notes: Global implementation constants are intended to be
6401 * implemented in this function directly; all other constants
6402 * are per-FS implementation, and therefore must be handled in
6403 * each respective FS, instead.
6404 *
6405 * XXX We implement some things globally right now that should actually be
6406 * XXX per-FS; we will need to deal with this at some point.
6407 */
6408 /* ARGSUSED */
6409 int
6410 pathconf(__unused proc_t p, struct pathconf_args *uap, int32_t *retval)
6411 {
6412 int error;
6413 struct nameidata nd;
6414 vfs_context_t ctx = vfs_context_current();
6415
6416 NDINIT(&nd, LOOKUP, OP_PATHCONF, FOLLOW | AUDITVNPATH1,
6417 UIO_USERSPACE, uap->path, ctx);
6418 error = namei(&nd);
6419 if (error) {
6420 return error;
6421 }
6422
6423 error = vn_pathconf(nd.ni_vp, uap->name, retval, ctx);
6424
6425 vnode_put(nd.ni_vp);
6426 nameidone(&nd);
6427 return error;
6428 }
6429
6430 /*
6431 * Return target name of a symbolic link.
6432 */
6433 /* ARGSUSED */
6434 static int
6435 readlinkat_internal(vfs_context_t ctx, int fd, user_addr_t path,
6436 enum uio_seg seg, user_addr_t buf, size_t bufsize, enum uio_seg bufseg,
6437 int *retval)
6438 {
6439 vnode_t vp;
6440 uio_t auio;
6441 int error;
6442 struct nameidata nd;
6443 char uio_buf[UIO_SIZEOF(1)];
6444
6445 NDINIT(&nd, LOOKUP, OP_READLINK, NOFOLLOW | AUDITVNPATH1,
6446 seg, path, ctx);
6447
6448 error = nameiat(&nd, fd);
6449 if (error) {
6450 return error;
6451 }
6452 vp = nd.ni_vp;
6453
6454 nameidone(&nd);
6455
6456 auio = uio_createwithbuffer(1, 0, bufseg, UIO_READ,
6457 &uio_buf[0], sizeof(uio_buf));
6458 uio_addiov(auio, buf, bufsize);
6459 if (vp->v_type != VLNK) {
6460 error = EINVAL;
6461 } else {
6462 #if CONFIG_MACF
6463 error = mac_vnode_check_readlink(ctx, vp);
6464 #endif
6465 if (error == 0) {
6466 error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA,
6467 ctx);
6468 }
6469 if (error == 0) {
6470 error = VNOP_READLINK(vp, auio, ctx);
6471 }
6472 }
6473 vnode_put(vp);
6474
6475 *retval = bufsize - (int)uio_resid(auio);
6476 return error;
6477 }
6478
6479 int
6480 readlink(proc_t p, struct readlink_args *uap, int32_t *retval)
6481 {
6482 enum uio_seg procseg;
6483
6484 procseg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
6485 return readlinkat_internal(vfs_context_current(), AT_FDCWD,
6486 CAST_USER_ADDR_T(uap->path), procseg, CAST_USER_ADDR_T(uap->buf),
6487 uap->count, procseg, retval);
6488 }
6489
6490 int
6491 readlinkat(proc_t p, struct readlinkat_args *uap, int32_t *retval)
6492 {
6493 enum uio_seg procseg;
6494
6495 procseg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
6496 return readlinkat_internal(vfs_context_current(), uap->fd, uap->path,
6497 procseg, uap->buf, uap->bufsize, procseg, retval);
6498 }
6499
6500 /*
6501 * Change file flags, the deep inner layer.
6502 */
6503 static int
6504 chflags0(vnode_t vp, struct vnode_attr *va,
6505 int (*setattr)(vnode_t, void *, vfs_context_t),
6506 void *arg, vfs_context_t ctx)
6507 {
6508 kauth_action_t action = 0;
6509 int error;
6510
6511 #if CONFIG_MACF
6512 error = mac_vnode_check_setflags(ctx, vp, va->va_flags);
6513 if (error) {
6514 goto out;
6515 }
6516 #endif
6517
6518 /* request authorisation, disregard immutability */
6519 if ((error = vnode_authattr(vp, va, &action, ctx)) != 0) {
6520 goto out;
6521 }
6522 /*
6523 * Request that the auth layer disregard those file flags it's allowed to when
6524 * authorizing this operation; we need to do this in order to be able to
6525 * clear immutable flags.
6526 */
6527 if (action && ((error = vnode_authorize(vp, NULL, action | KAUTH_VNODE_NOIMMUTABLE, ctx)) != 0)) {
6528 goto out;
6529 }
6530 error = (*setattr)(vp, arg, ctx);
6531
6532 #if CONFIG_MACF
6533 if (error == 0) {
6534 mac_vnode_notify_setflags(ctx, vp, va->va_flags);
6535 }
6536 #endif
6537
6538 out:
6539 return error;
6540 }
6541
6542 /*
6543 * Change file flags.
6544 *
6545 * NOTE: this will vnode_put() `vp'
6546 */
6547 static int
6548 chflags1(vnode_t vp, int flags, vfs_context_t ctx)
6549 {
6550 struct vnode_attr va;
6551 int error;
6552
6553 VATTR_INIT(&va);
6554 VATTR_SET(&va, va_flags, flags);
6555
6556 error = chflags0(vp, &va, (void *)vnode_setattr, &va, ctx);
6557 vnode_put(vp);
6558
6559 if ((error == 0) && !VATTR_IS_SUPPORTED(&va, va_flags)) {
6560 error = ENOTSUP;
6561 }
6562
6563 return error;
6564 }
6565
6566 /*
6567 * Change flags of a file given a path name.
6568 */
6569 /* ARGSUSED */
6570 int
6571 chflags(__unused proc_t p, struct chflags_args *uap, __unused int32_t *retval)
6572 {
6573 vnode_t vp;
6574 vfs_context_t ctx = vfs_context_current();
6575 int error;
6576 struct nameidata nd;
6577
6578 AUDIT_ARG(fflags, uap->flags);
6579 NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
6580 UIO_USERSPACE, uap->path, ctx);
6581 error = namei(&nd);
6582 if (error) {
6583 return error;
6584 }
6585 vp = nd.ni_vp;
6586 nameidone(&nd);
6587
6588 /* we don't vnode_put() here because chflags1 does internally */
6589 error = chflags1(vp, uap->flags, ctx);
6590
6591 return error;
6592 }
6593
6594 /*
6595 * Change flags of a file given a file descriptor.
6596 */
6597 /* ARGSUSED */
6598 int
6599 fchflags(__unused proc_t p, struct fchflags_args *uap, __unused int32_t *retval)
6600 {
6601 vnode_t vp;
6602 int error;
6603
6604 AUDIT_ARG(fd, uap->fd);
6605 AUDIT_ARG(fflags, uap->flags);
6606 if ((error = file_vnode(uap->fd, &vp))) {
6607 return error;
6608 }
6609
6610 if ((error = vnode_getwithref(vp))) {
6611 file_drop(uap->fd);
6612 return error;
6613 }
6614
6615 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6616
6617 /* we don't vnode_put() here because chflags1 does internally */
6618 error = chflags1(vp, uap->flags, vfs_context_current());
6619
6620 file_drop(uap->fd);
6621 return error;
6622 }
6623
6624 /*
6625 * Change security information on a filesystem object.
6626 *
6627 * Returns: 0 Success
6628 * EPERM Operation not permitted
6629 * vnode_authattr:??? [anything vnode_authattr can return]
6630 * vnode_authorize:??? [anything vnode_authorize can return]
6631 * vnode_setattr:??? [anything vnode_setattr can return]
6632 *
6633 * Notes: If vnode_authattr or vnode_authorize return EACCES, it will be
6634 * translated to EPERM before being returned.
6635 */
6636 static int
6637 chmod_vnode(vfs_context_t ctx, vnode_t vp, struct vnode_attr *vap)
6638 {
6639 kauth_action_t action;
6640 int error;
6641
6642 AUDIT_ARG(mode, vap->va_mode);
6643 /* XXX audit new args */
6644
6645 #if NAMEDSTREAMS
6646 /* chmod calls are not allowed for resource forks. */
6647 if (vp->v_flag & VISNAMEDSTREAM) {
6648 return EPERM;
6649 }
6650 #endif
6651
6652 #if CONFIG_MACF
6653 if (VATTR_IS_ACTIVE(vap, va_mode) &&
6654 (error = mac_vnode_check_setmode(ctx, vp, (mode_t)vap->va_mode)) != 0) {
6655 return error;
6656 }
6657
6658 if (VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid)) {
6659 if ((error = mac_vnode_check_setowner(ctx, vp,
6660 VATTR_IS_ACTIVE(vap, va_uid) ? vap->va_uid : -1,
6661 VATTR_IS_ACTIVE(vap, va_gid) ? vap->va_gid : -1))) {
6662 return error;
6663 }
6664 }
6665
6666 if (VATTR_IS_ACTIVE(vap, va_acl) &&
6667 (error = mac_vnode_check_setacl(ctx, vp, vap->va_acl))) {
6668 return error;
6669 }
6670 #endif
6671
6672 /* make sure that the caller is allowed to set this security information */
6673 if (((error = vnode_authattr(vp, vap, &action, ctx)) != 0) ||
6674 ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
6675 if (error == EACCES) {
6676 error = EPERM;
6677 }
6678 return error;
6679 }
6680
6681 if ((error = vnode_setattr(vp, vap, ctx)) != 0) {
6682 return error;
6683 }
6684
6685 #if CONFIG_MACF
6686 if (VATTR_IS_ACTIVE(vap, va_mode)) {
6687 mac_vnode_notify_setmode(ctx, vp, (mode_t)vap->va_mode);
6688 }
6689
6690 if (VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid)) {
6691 mac_vnode_notify_setowner(ctx, vp,
6692 VATTR_IS_ACTIVE(vap, va_uid) ? vap->va_uid : -1,
6693 VATTR_IS_ACTIVE(vap, va_gid) ? vap->va_gid : -1);
6694 }
6695
6696 if (VATTR_IS_ACTIVE(vap, va_acl)) {
6697 mac_vnode_notify_setacl(ctx, vp, vap->va_acl);
6698 }
6699 #endif
6700
6701 return error;
6702 }
6703
6704
6705 /*
6706 * Change mode of a file given a path name.
6707 *
6708 * Returns: 0 Success
6709 * namei:??? [anything namei can return]
6710 * chmod_vnode:??? [anything chmod_vnode can return]
6711 */
6712 static int
6713 chmodat(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap,
6714 int fd, int flag, enum uio_seg segflg)
6715 {
6716 struct nameidata nd;
6717 int follow, error;
6718
6719 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
6720 NDINIT(&nd, LOOKUP, OP_SETATTR, follow | AUDITVNPATH1,
6721 segflg, path, ctx);
6722 if ((error = nameiat(&nd, fd))) {
6723 return error;
6724 }
6725 error = chmod_vnode(ctx, nd.ni_vp, vap);
6726 vnode_put(nd.ni_vp);
6727 nameidone(&nd);
6728 return error;
6729 }
6730
6731 /*
6732 * chmod_extended: Change the mode of a file given a path name; with extended
6733 * argument list (including extended security (ACL)).
6734 *
6735 * Parameters: p Process requesting the open
6736 * uap User argument descriptor (see below)
6737 * retval (ignored)
6738 *
6739 * Indirect: uap->path Path to object (same as 'chmod')
6740 * uap->uid UID to set
6741 * uap->gid GID to set
6742 * uap->mode File mode to set (same as 'chmod')
6743 * uap->xsecurity ACL to set (or delete)
6744 *
6745 * Returns: 0 Success
6746 * !0 errno value
6747 *
6748 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
6749 *
6750 * XXX: We should enummerate the possible errno values here, and where
6751 * in the code they originated.
6752 */
6753 int
6754 chmod_extended(__unused proc_t p, struct chmod_extended_args *uap, __unused int32_t *retval)
6755 {
6756 int error;
6757 struct vnode_attr va;
6758 kauth_filesec_t xsecdst;
6759
6760 AUDIT_ARG(owner, uap->uid, uap->gid);
6761
6762 VATTR_INIT(&va);
6763 if (uap->mode != -1) {
6764 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
6765 }
6766 if (uap->uid != KAUTH_UID_NONE) {
6767 VATTR_SET(&va, va_uid, uap->uid);
6768 }
6769 if (uap->gid != KAUTH_GID_NONE) {
6770 VATTR_SET(&va, va_gid, uap->gid);
6771 }
6772
6773 xsecdst = NULL;
6774 switch (uap->xsecurity) {
6775 /* explicit remove request */
6776 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
6777 VATTR_SET(&va, va_acl, NULL);
6778 break;
6779 /* not being set */
6780 case USER_ADDR_NULL:
6781 break;
6782 default:
6783 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0) {
6784 return error;
6785 }
6786 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
6787 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va.va_acl->acl_entrycount);
6788 }
6789
6790 error = chmodat(vfs_context_current(), uap->path, &va, AT_FDCWD, 0,
6791 UIO_USERSPACE);
6792
6793 if (xsecdst != NULL) {
6794 kauth_filesec_free(xsecdst);
6795 }
6796 return error;
6797 }
6798
6799 /*
6800 * Returns: 0 Success
6801 * chmodat:??? [anything chmodat can return]
6802 */
6803 static int
6804 fchmodat_internal(vfs_context_t ctx, user_addr_t path, int mode, int fd,
6805 int flag, enum uio_seg segflg)
6806 {
6807 struct vnode_attr va;
6808
6809 VATTR_INIT(&va);
6810 VATTR_SET(&va, va_mode, mode & ALLPERMS);
6811
6812 return chmodat(ctx, path, &va, fd, flag, segflg);
6813 }
6814
6815 int
6816 chmod(__unused proc_t p, struct chmod_args *uap, __unused int32_t *retval)
6817 {
6818 return fchmodat_internal(vfs_context_current(), uap->path, uap->mode,
6819 AT_FDCWD, 0, UIO_USERSPACE);
6820 }
6821
6822 int
6823 fchmodat(__unused proc_t p, struct fchmodat_args *uap, __unused int32_t *retval)
6824 {
6825 if (uap->flag & ~AT_SYMLINK_NOFOLLOW) {
6826 return EINVAL;
6827 }
6828
6829 return fchmodat_internal(vfs_context_current(), uap->path, uap->mode,
6830 uap->fd, uap->flag, UIO_USERSPACE);
6831 }
6832
6833 /*
6834 * Change mode of a file given a file descriptor.
6835 */
6836 static int
6837 fchmod1(__unused proc_t p, int fd, struct vnode_attr *vap)
6838 {
6839 vnode_t vp;
6840 int error;
6841
6842 AUDIT_ARG(fd, fd);
6843
6844 if ((error = file_vnode(fd, &vp)) != 0) {
6845 return error;
6846 }
6847 if ((error = vnode_getwithref(vp)) != 0) {
6848 file_drop(fd);
6849 return error;
6850 }
6851 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6852
6853 error = chmod_vnode(vfs_context_current(), vp, vap);
6854 (void)vnode_put(vp);
6855 file_drop(fd);
6856
6857 return error;
6858 }
6859
6860 /*
6861 * fchmod_extended: Change mode of a file given a file descriptor; with
6862 * extended argument list (including extended security (ACL)).
6863 *
6864 * Parameters: p Process requesting to change file mode
6865 * uap User argument descriptor (see below)
6866 * retval (ignored)
6867 *
6868 * Indirect: uap->mode File mode to set (same as 'chmod')
6869 * uap->uid UID to set
6870 * uap->gid GID to set
6871 * uap->xsecurity ACL to set (or delete)
6872 * uap->fd File descriptor of file to change mode
6873 *
6874 * Returns: 0 Success
6875 * !0 errno value
6876 *
6877 */
6878 int
6879 fchmod_extended(proc_t p, struct fchmod_extended_args *uap, __unused int32_t *retval)
6880 {
6881 int error;
6882 struct vnode_attr va;
6883 kauth_filesec_t xsecdst;
6884
6885 AUDIT_ARG(owner, uap->uid, uap->gid);
6886
6887 VATTR_INIT(&va);
6888 if (uap->mode != -1) {
6889 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
6890 }
6891 if (uap->uid != KAUTH_UID_NONE) {
6892 VATTR_SET(&va, va_uid, uap->uid);
6893 }
6894 if (uap->gid != KAUTH_GID_NONE) {
6895 VATTR_SET(&va, va_gid, uap->gid);
6896 }
6897
6898 xsecdst = NULL;
6899 switch (uap->xsecurity) {
6900 case USER_ADDR_NULL:
6901 VATTR_SET(&va, va_acl, NULL);
6902 break;
6903 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
6904 VATTR_SET(&va, va_acl, NULL);
6905 break;
6906 /* not being set */
6907 case CAST_USER_ADDR_T(-1):
6908 break;
6909 default:
6910 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0) {
6911 return error;
6912 }
6913 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
6914 }
6915
6916 error = fchmod1(p, uap->fd, &va);
6917
6918
6919 switch (uap->xsecurity) {
6920 case USER_ADDR_NULL:
6921 case CAST_USER_ADDR_T(-1):
6922 break;
6923 default:
6924 if (xsecdst != NULL) {
6925 kauth_filesec_free(xsecdst);
6926 }
6927 }
6928 return error;
6929 }
6930
6931 int
6932 fchmod(proc_t p, struct fchmod_args *uap, __unused int32_t *retval)
6933 {
6934 struct vnode_attr va;
6935
6936 VATTR_INIT(&va);
6937 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
6938
6939 return fchmod1(p, uap->fd, &va);
6940 }
6941
6942
6943 /*
6944 * Set ownership given a path name.
6945 */
6946 /* ARGSUSED */
6947 static int
6948 fchownat_internal(vfs_context_t ctx, int fd, user_addr_t path, uid_t uid,
6949 gid_t gid, int flag, enum uio_seg segflg)
6950 {
6951 vnode_t vp;
6952 struct vnode_attr va;
6953 int error;
6954 struct nameidata nd;
6955 int follow;
6956 kauth_action_t action;
6957
6958 AUDIT_ARG(owner, uid, gid);
6959
6960 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
6961 NDINIT(&nd, LOOKUP, OP_SETATTR, follow | AUDITVNPATH1, segflg,
6962 path, ctx);
6963 error = nameiat(&nd, fd);
6964 if (error) {
6965 return error;
6966 }
6967 vp = nd.ni_vp;
6968
6969 nameidone(&nd);
6970
6971 VATTR_INIT(&va);
6972 if (uid != (uid_t)VNOVAL) {
6973 VATTR_SET(&va, va_uid, uid);
6974 }
6975 if (gid != (gid_t)VNOVAL) {
6976 VATTR_SET(&va, va_gid, gid);
6977 }
6978
6979 #if CONFIG_MACF
6980 error = mac_vnode_check_setowner(ctx, vp, uid, gid);
6981 if (error) {
6982 goto out;
6983 }
6984 #endif
6985
6986 /* preflight and authorize attribute changes */
6987 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
6988 goto out;
6989 }
6990 if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
6991 goto out;
6992 }
6993 error = vnode_setattr(vp, &va, ctx);
6994
6995 #if CONFIG_MACF
6996 if (error == 0) {
6997 mac_vnode_notify_setowner(ctx, vp, uid, gid);
6998 }
6999 #endif
7000
7001 out:
7002 /*
7003 * EACCES is only allowed from namei(); permissions failure should
7004 * return EPERM, so we need to translate the error code.
7005 */
7006 if (error == EACCES) {
7007 error = EPERM;
7008 }
7009
7010 vnode_put(vp);
7011 return error;
7012 }
7013
7014 int
7015 chown(__unused proc_t p, struct chown_args *uap, __unused int32_t *retval)
7016 {
7017 return fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path,
7018 uap->uid, uap->gid, 0, UIO_USERSPACE);
7019 }
7020
7021 int
7022 lchown(__unused proc_t p, struct lchown_args *uap, __unused int32_t *retval)
7023 {
7024 return fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path,
7025 uap->owner, uap->group, AT_SYMLINK_NOFOLLOW, UIO_USERSPACE);
7026 }
7027
7028 int
7029 fchownat(__unused proc_t p, struct fchownat_args *uap, __unused int32_t *retval)
7030 {
7031 if (uap->flag & ~AT_SYMLINK_NOFOLLOW) {
7032 return EINVAL;
7033 }
7034
7035 return fchownat_internal(vfs_context_current(), uap->fd, uap->path,
7036 uap->uid, uap->gid, uap->flag, UIO_USERSPACE);
7037 }
7038
7039 /*
7040 * Set ownership given a file descriptor.
7041 */
7042 /* ARGSUSED */
7043 int
7044 fchown(__unused proc_t p, struct fchown_args *uap, __unused int32_t *retval)
7045 {
7046 struct vnode_attr va;
7047 vfs_context_t ctx = vfs_context_current();
7048 vnode_t vp;
7049 int error;
7050 kauth_action_t action;
7051
7052 AUDIT_ARG(owner, uap->uid, uap->gid);
7053 AUDIT_ARG(fd, uap->fd);
7054
7055 if ((error = file_vnode(uap->fd, &vp))) {
7056 return error;
7057 }
7058
7059 if ((error = vnode_getwithref(vp))) {
7060 file_drop(uap->fd);
7061 return error;
7062 }
7063 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
7064
7065 VATTR_INIT(&va);
7066 if (uap->uid != VNOVAL) {
7067 VATTR_SET(&va, va_uid, uap->uid);
7068 }
7069 if (uap->gid != VNOVAL) {
7070 VATTR_SET(&va, va_gid, uap->gid);
7071 }
7072
7073 #if NAMEDSTREAMS
7074 /* chown calls are not allowed for resource forks. */
7075 if (vp->v_flag & VISNAMEDSTREAM) {
7076 error = EPERM;
7077 goto out;
7078 }
7079 #endif
7080
7081 #if CONFIG_MACF
7082 error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
7083 if (error) {
7084 goto out;
7085 }
7086 #endif
7087
7088 /* preflight and authorize attribute changes */
7089 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
7090 goto out;
7091 }
7092 if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
7093 if (error == EACCES) {
7094 error = EPERM;
7095 }
7096 goto out;
7097 }
7098 error = vnode_setattr(vp, &va, ctx);
7099
7100 #if CONFIG_MACF
7101 if (error == 0) {
7102 mac_vnode_notify_setowner(ctx, vp, uap->uid, uap->gid);
7103 }
7104 #endif
7105
7106 out:
7107 (void)vnode_put(vp);
7108 file_drop(uap->fd);
7109 return error;
7110 }
7111
7112 static int
7113 getutimes(user_addr_t usrtvp, struct timespec *tsp)
7114 {
7115 int error;
7116
7117 if (usrtvp == USER_ADDR_NULL) {
7118 struct timeval old_tv;
7119 /* XXX Y2038 bug because of microtime argument */
7120 microtime(&old_tv);
7121 TIMEVAL_TO_TIMESPEC(&old_tv, &tsp[0]);
7122 tsp[1] = tsp[0];
7123 } else {
7124 if (IS_64BIT_PROCESS(current_proc())) {
7125 struct user64_timeval tv[2];
7126 error = copyin(usrtvp, (void *)tv, sizeof(tv));
7127 if (error) {
7128 return error;
7129 }
7130 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
7131 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
7132 } else {
7133 struct user32_timeval tv[2];
7134 error = copyin(usrtvp, (void *)tv, sizeof(tv));
7135 if (error) {
7136 return error;
7137 }
7138 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
7139 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
7140 }
7141 }
7142 return 0;
7143 }
7144
7145 static int
7146 setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts,
7147 int nullflag)
7148 {
7149 int error;
7150 struct vnode_attr va;
7151 kauth_action_t action;
7152
7153 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
7154
7155 VATTR_INIT(&va);
7156 VATTR_SET(&va, va_access_time, ts[0]);
7157 VATTR_SET(&va, va_modify_time, ts[1]);
7158 if (nullflag) {
7159 va.va_vaflags |= VA_UTIMES_NULL;
7160 }
7161
7162 #if NAMEDSTREAMS
7163 /* utimes calls are not allowed for resource forks. */
7164 if (vp->v_flag & VISNAMEDSTREAM) {
7165 error = EPERM;
7166 goto out;
7167 }
7168 #endif
7169
7170 #if CONFIG_MACF
7171 error = mac_vnode_check_setutimes(ctx, vp, ts[0], ts[1]);
7172 if (error) {
7173 goto out;
7174 }
7175 #endif
7176 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
7177 if (!nullflag && error == EACCES) {
7178 error = EPERM;
7179 }
7180 goto out;
7181 }
7182
7183 /* since we may not need to auth anything, check here */
7184 if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
7185 if (!nullflag && error == EACCES) {
7186 error = EPERM;
7187 }
7188 goto out;
7189 }
7190 error = vnode_setattr(vp, &va, ctx);
7191
7192 #if CONFIG_MACF
7193 if (error == 0) {
7194 mac_vnode_notify_setutimes(ctx, vp, ts[0], ts[1]);
7195 }
7196 #endif
7197
7198 out:
7199 return error;
7200 }
7201
7202 /*
7203 * Set the access and modification times of a file.
7204 */
7205 /* ARGSUSED */
7206 int
7207 utimes(__unused proc_t p, struct utimes_args *uap, __unused int32_t *retval)
7208 {
7209 struct timespec ts[2];
7210 user_addr_t usrtvp;
7211 int error;
7212 struct nameidata nd;
7213 vfs_context_t ctx = vfs_context_current();
7214
7215 /*
7216 * AUDIT: Needed to change the order of operations to do the
7217 * name lookup first because auditing wants the path.
7218 */
7219 NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
7220 UIO_USERSPACE, uap->path, ctx);
7221 error = namei(&nd);
7222 if (error) {
7223 return error;
7224 }
7225 nameidone(&nd);
7226
7227 /*
7228 * Fetch the user-supplied time. If usrtvp is USER_ADDR_NULL, we fetch
7229 * the current time instead.
7230 */
7231 usrtvp = uap->tptr;
7232 if ((error = getutimes(usrtvp, ts)) != 0) {
7233 goto out;
7234 }
7235
7236 error = setutimes(ctx, nd.ni_vp, ts, usrtvp == USER_ADDR_NULL);
7237
7238 out:
7239 vnode_put(nd.ni_vp);
7240 return error;
7241 }
7242
7243 /*
7244 * Set the access and modification times of a file.
7245 */
7246 /* ARGSUSED */
7247 int
7248 futimes(__unused proc_t p, struct futimes_args *uap, __unused int32_t *retval)
7249 {
7250 struct timespec ts[2];
7251 vnode_t vp;
7252 user_addr_t usrtvp;
7253 int error;
7254
7255 AUDIT_ARG(fd, uap->fd);
7256 usrtvp = uap->tptr;
7257 if ((error = getutimes(usrtvp, ts)) != 0) {
7258 return error;
7259 }
7260 if ((error = file_vnode(uap->fd, &vp)) != 0) {
7261 return error;
7262 }
7263 if ((error = vnode_getwithref(vp))) {
7264 file_drop(uap->fd);
7265 return error;
7266 }
7267
7268 error = setutimes(vfs_context_current(), vp, ts, usrtvp == 0);
7269 vnode_put(vp);
7270 file_drop(uap->fd);
7271 return error;
7272 }
7273
7274 /*
7275 * Truncate a file given its path name.
7276 */
7277 /* ARGSUSED */
7278 int
7279 truncate(__unused proc_t p, struct truncate_args *uap, __unused int32_t *retval)
7280 {
7281 vnode_t vp;
7282 struct vnode_attr va;
7283 vfs_context_t ctx = vfs_context_current();
7284 int error;
7285 struct nameidata nd;
7286 kauth_action_t action;
7287
7288 if (uap->length < 0) {
7289 return EINVAL;
7290 }
7291 NDINIT(&nd, LOOKUP, OP_TRUNCATE, FOLLOW | AUDITVNPATH1,
7292 UIO_USERSPACE, uap->path, ctx);
7293 if ((error = namei(&nd))) {
7294 return error;
7295 }
7296 vp = nd.ni_vp;
7297
7298 nameidone(&nd);
7299
7300 VATTR_INIT(&va);
7301 VATTR_SET(&va, va_data_size, uap->length);
7302
7303 #if CONFIG_MACF
7304 error = mac_vnode_check_truncate(ctx, NOCRED, vp);
7305 if (error) {
7306 goto out;
7307 }
7308 #endif
7309
7310 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
7311 goto out;
7312 }
7313 if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
7314 goto out;
7315 }
7316 error = vnode_setattr(vp, &va, ctx);
7317
7318 #if CONFIG_MACF
7319 if (error == 0) {
7320 mac_vnode_notify_truncate(ctx, NOCRED, vp);
7321 }
7322 #endif
7323
7324 out:
7325 vnode_put(vp);
7326 return error;
7327 }
7328
7329 /*
7330 * Truncate a file given a file descriptor.
7331 */
7332 /* ARGSUSED */
7333 int
7334 ftruncate(proc_t p, struct ftruncate_args *uap, int32_t *retval)
7335 {
7336 vfs_context_t ctx = vfs_context_current();
7337 struct vnode_attr va;
7338 vnode_t vp;
7339 struct fileproc *fp;
7340 int error;
7341 int fd = uap->fd;
7342
7343 AUDIT_ARG(fd, uap->fd);
7344 if (uap->length < 0) {
7345 return EINVAL;
7346 }
7347
7348 if ((error = fp_lookup(p, fd, &fp, 0))) {
7349 return error;
7350 }
7351
7352 switch (FILEGLOB_DTYPE(fp->f_fglob)) {
7353 case DTYPE_PSXSHM:
7354 error = pshm_truncate(p, fp, uap->fd, uap->length, retval);
7355 goto out;
7356 case DTYPE_VNODE:
7357 break;
7358 default:
7359 error = EINVAL;
7360 goto out;
7361 }
7362
7363 vp = (vnode_t)fp->f_fglob->fg_data;
7364
7365 if ((fp->f_fglob->fg_flag & FWRITE) == 0) {
7366 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
7367 error = EINVAL;
7368 goto out;
7369 }
7370
7371 if ((error = vnode_getwithref(vp)) != 0) {
7372 goto out;
7373 }
7374
7375 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
7376
7377 #if CONFIG_MACF
7378 error = mac_vnode_check_truncate(ctx,
7379 fp->f_fglob->fg_cred, vp);
7380 if (error) {
7381 (void)vnode_put(vp);
7382 goto out;
7383 }
7384 #endif
7385 VATTR_INIT(&va);
7386 VATTR_SET(&va, va_data_size, uap->length);
7387 error = vnode_setattr(vp, &va, ctx);
7388
7389 #if CONFIG_MACF
7390 if (error == 0) {
7391 mac_vnode_notify_truncate(ctx, fp->f_fglob->fg_cred, vp);
7392 }
7393 #endif
7394
7395 (void)vnode_put(vp);
7396 out:
7397 file_drop(fd);
7398 return error;
7399 }
7400
7401
7402 /*
7403 * Sync an open file with synchronized I/O _file_ integrity completion
7404 */
7405 /* ARGSUSED */
7406 int
7407 fsync(proc_t p, struct fsync_args *uap, __unused int32_t *retval)
7408 {
7409 __pthread_testcancel(1);
7410 return fsync_common(p, uap, MNT_WAIT);
7411 }
7412
7413
7414 /*
7415 * Sync an open file with synchronized I/O _file_ integrity completion
7416 *
7417 * Notes: This is a legacy support function that does not test for
7418 * thread cancellation points.
7419 */
7420 /* ARGSUSED */
7421 int
7422 fsync_nocancel(proc_t p, struct fsync_nocancel_args *uap, __unused int32_t *retval)
7423 {
7424 return fsync_common(p, (struct fsync_args *)uap, MNT_WAIT);
7425 }
7426
7427
7428 /*
7429 * Sync an open file with synchronized I/O _data_ integrity completion
7430 */
7431 /* ARGSUSED */
7432 int
7433 fdatasync(proc_t p, struct fdatasync_args *uap, __unused int32_t *retval)
7434 {
7435 __pthread_testcancel(1);
7436 return fsync_common(p, (struct fsync_args *)uap, MNT_DWAIT);
7437 }
7438
7439
7440 /*
7441 * fsync_common
7442 *
7443 * Common fsync code to support both synchronized I/O file integrity completion
7444 * (normal fsync) and synchronized I/O data integrity completion (fdatasync).
7445 *
7446 * If 'flags' is MNT_DWAIT, the caller is requesting data integrity, which
7447 * will only guarantee that the file data contents are retrievable. If
7448 * 'flags' is MNT_WAIT, the caller is rewuesting file integrity, which also
7449 * includes additional metadata unnecessary for retrieving the file data
7450 * contents, such as atime, mtime, ctime, etc., also be committed to stable
7451 * storage.
7452 *
7453 * Parameters: p The process
7454 * uap->fd The descriptor to synchronize
7455 * flags The data integrity flags
7456 *
7457 * Returns: int Success
7458 * fp_getfvp:EBADF Bad file descriptor
7459 * fp_getfvp:ENOTSUP fd does not refer to a vnode
7460 * VNOP_FSYNC:??? unspecified
7461 *
7462 * Notes: We use struct fsync_args because it is a short name, and all
7463 * caller argument structures are otherwise identical.
7464 */
7465 static int
7466 fsync_common(proc_t p, struct fsync_args *uap, int flags)
7467 {
7468 vnode_t vp;
7469 struct fileproc *fp;
7470 vfs_context_t ctx = vfs_context_current();
7471 int error;
7472
7473 AUDIT_ARG(fd, uap->fd);
7474
7475 if ((error = fp_getfvp(p, uap->fd, &fp, &vp))) {
7476 return error;
7477 }
7478 if ((error = vnode_getwithref(vp))) {
7479 file_drop(uap->fd);
7480 return error;
7481 }
7482
7483 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
7484
7485 error = VNOP_FSYNC(vp, flags, ctx);
7486
7487 #if NAMEDRSRCFORK
7488 /* Sync resource fork shadow file if necessary. */
7489 if ((error == 0) &&
7490 (vp->v_flag & VISNAMEDSTREAM) &&
7491 (vp->v_parent != NULLVP) &&
7492 vnode_isshadow(vp) &&
7493 (fp->f_flags & FP_WRITTEN)) {
7494 (void) vnode_flushnamedstream(vp->v_parent, vp, ctx);
7495 }
7496 #endif
7497
7498 (void)vnode_put(vp);
7499 file_drop(uap->fd);
7500 return error;
7501 }
7502
7503 /*
7504 * Duplicate files. Source must be a file, target must be a file or
7505 * must not exist.
7506 *
7507 * XXX Copyfile authorisation checking is woefully inadequate, and will not
7508 * perform inheritance correctly.
7509 */
7510 /* ARGSUSED */
7511 int
7512 copyfile(__unused proc_t p, struct copyfile_args *uap, __unused int32_t *retval)
7513 {
7514 vnode_t tvp, fvp, tdvp, sdvp;
7515 struct nameidata fromnd, tond;
7516 int error;
7517 vfs_context_t ctx = vfs_context_current();
7518 #if CONFIG_MACF
7519 struct filedesc *fdp = (vfs_context_proc(ctx))->p_fd;
7520 struct vnode_attr va;
7521 #endif
7522
7523 /* Check that the flags are valid. */
7524
7525 if (uap->flags & ~CPF_MASK) {
7526 return EINVAL;
7527 }
7528
7529 NDINIT(&fromnd, LOOKUP, OP_COPYFILE, AUDITVNPATH1,
7530 UIO_USERSPACE, uap->from, ctx);
7531 if ((error = namei(&fromnd))) {
7532 return error;
7533 }
7534 fvp = fromnd.ni_vp;
7535
7536 NDINIT(&tond, CREATE, OP_LINK,
7537 LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNPATH2 | CN_NBMOUNTLOOK,
7538 UIO_USERSPACE, uap->to, ctx);
7539 if ((error = namei(&tond))) {
7540 goto out1;
7541 }
7542 tdvp = tond.ni_dvp;
7543 tvp = tond.ni_vp;
7544
7545 if (tvp != NULL) {
7546 if (!(uap->flags & CPF_OVERWRITE)) {
7547 error = EEXIST;
7548 goto out;
7549 }
7550 }
7551
7552 if (fvp->v_type == VDIR || (tvp && tvp->v_type == VDIR)) {
7553 error = EISDIR;
7554 goto out;
7555 }
7556
7557 /* This calls existing MAC hooks for open */
7558 if ((error = vn_authorize_open_existing(fvp, &fromnd.ni_cnd, FREAD, ctx,
7559 NULL))) {
7560 goto out;
7561 }
7562
7563 if (tvp) {
7564 /*
7565 * See unlinkat_internal for an explanation of the potential
7566 * ENOENT from the MAC hook but the gist is that the MAC hook
7567 * can fail because vn_getpath isn't able to return the full
7568 * path. We choose to ignore this failure.
7569 */
7570 error = vn_authorize_unlink(tdvp, tvp, &tond.ni_cnd, ctx, NULL);
7571 if (error && error != ENOENT) {
7572 goto out;
7573 }
7574 error = 0;
7575 }
7576
7577 #if CONFIG_MACF
7578 VATTR_INIT(&va);
7579 VATTR_SET(&va, va_type, fvp->v_type);
7580 /* Mask off all but regular access permissions */
7581 VATTR_SET(&va, va_mode,
7582 ((((uap->mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT) & ACCESSPERMS));
7583 error = mac_vnode_check_create(ctx, tdvp, &tond.ni_cnd, &va);
7584 if (error) {
7585 goto out;
7586 }
7587 #endif /* CONFIG_MACF */
7588
7589 if ((error = vnode_authorize(tdvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0) {
7590 goto out;
7591 }
7592
7593 if (fvp == tdvp) {
7594 error = EINVAL;
7595 }
7596 /*
7597 * If source is the same as the destination (that is the
7598 * same inode number) then there is nothing to do.
7599 * (fixed to have POSIX semantics - CSM 3/2/98)
7600 */
7601 if (fvp == tvp) {
7602 error = -1;
7603 }
7604 if (!error) {
7605 error = VNOP_COPYFILE(fvp, tdvp, tvp, &tond.ni_cnd, uap->mode, uap->flags, ctx);
7606 }
7607 out:
7608 sdvp = tond.ni_startdir;
7609 /*
7610 * nameidone has to happen before we vnode_put(tdvp)
7611 * since it may need to release the fs_nodelock on the tdvp
7612 */
7613 nameidone(&tond);
7614
7615 if (tvp) {
7616 vnode_put(tvp);
7617 }
7618 vnode_put(tdvp);
7619 vnode_put(sdvp);
7620 out1:
7621 vnode_put(fvp);
7622
7623 nameidone(&fromnd);
7624
7625 if (error == -1) {
7626 return 0;
7627 }
7628 return error;
7629 }
7630
7631 #define CLONE_SNAPSHOT_FALLBACKS_ENABLED 1
7632
7633 /*
7634 * Helper function for doing clones. The caller is expected to provide an
7635 * iocounted source vnode and release it.
7636 */
7637 static int
7638 clonefile_internal(vnode_t fvp, boolean_t data_read_authorised, int dst_dirfd,
7639 user_addr_t dst, uint32_t flags, vfs_context_t ctx)
7640 {
7641 vnode_t tvp, tdvp;
7642 struct nameidata tond;
7643 int error;
7644 int follow;
7645 boolean_t free_src_acl;
7646 boolean_t attr_cleanup;
7647 enum vtype v_type;
7648 kauth_action_t action;
7649 struct componentname *cnp;
7650 uint32_t defaulted;
7651 struct vnode_attr va;
7652 struct vnode_attr nva;
7653 uint32_t vnop_flags;
7654
7655 v_type = vnode_vtype(fvp);
7656 switch (v_type) {
7657 case VLNK:
7658 /* FALLTHRU */
7659 case VREG:
7660 action = KAUTH_VNODE_ADD_FILE;
7661 break;
7662 case VDIR:
7663 if (vnode_isvroot(fvp) || vnode_ismount(fvp) ||
7664 fvp->v_mountedhere) {
7665 return EINVAL;
7666 }
7667 action = KAUTH_VNODE_ADD_SUBDIRECTORY;
7668 break;
7669 default:
7670 return EINVAL;
7671 }
7672
7673 AUDIT_ARG(fd2, dst_dirfd);
7674 AUDIT_ARG(value32, flags);
7675
7676 follow = (flags & CLONE_NOFOLLOW) ? NOFOLLOW : FOLLOW;
7677 NDINIT(&tond, CREATE, OP_LINK, follow | WANTPARENT | AUDITVNPATH2,
7678 UIO_USERSPACE, dst, ctx);
7679 if ((error = nameiat(&tond, dst_dirfd))) {
7680 return error;
7681 }
7682 cnp = &tond.ni_cnd;
7683 tdvp = tond.ni_dvp;
7684 tvp = tond.ni_vp;
7685
7686 free_src_acl = FALSE;
7687 attr_cleanup = FALSE;
7688
7689 if (tvp != NULL) {
7690 error = EEXIST;
7691 goto out;
7692 }
7693
7694 if (vnode_mount(tdvp) != vnode_mount(fvp)) {
7695 error = EXDEV;
7696 goto out;
7697 }
7698
7699 #if CONFIG_MACF
7700 if ((error = mac_vnode_check_clone(ctx, tdvp, fvp, cnp))) {
7701 goto out;
7702 }
7703 #endif
7704 if ((error = vnode_authorize(tdvp, NULL, action, ctx))) {
7705 goto out;
7706 }
7707
7708 action = KAUTH_VNODE_GENERIC_READ_BITS;
7709 if (data_read_authorised) {
7710 action &= ~KAUTH_VNODE_READ_DATA;
7711 }
7712 if ((error = vnode_authorize(fvp, NULL, action, ctx))) {
7713 goto out;
7714 }
7715
7716 /*
7717 * certain attributes may need to be changed from the source, we ask for
7718 * those here.
7719 */
7720 VATTR_INIT(&va);
7721 VATTR_WANTED(&va, va_uid);
7722 VATTR_WANTED(&va, va_gid);
7723 VATTR_WANTED(&va, va_mode);
7724 VATTR_WANTED(&va, va_flags);
7725 VATTR_WANTED(&va, va_acl);
7726
7727 if ((error = vnode_getattr(fvp, &va, ctx)) != 0) {
7728 goto out;
7729 }
7730
7731 VATTR_INIT(&nva);
7732 VATTR_SET(&nva, va_type, v_type);
7733 if (VATTR_IS_SUPPORTED(&va, va_acl) && va.va_acl != NULL) {
7734 VATTR_SET(&nva, va_acl, va.va_acl);
7735 free_src_acl = TRUE;
7736 }
7737
7738 /* Handle ACL inheritance, initialize vap. */
7739 if (v_type == VLNK) {
7740 error = vnode_authattr_new(tdvp, &nva, 0, ctx);
7741 } else {
7742 error = vn_attribute_prepare(tdvp, &nva, &defaulted, ctx);
7743 if (error) {
7744 goto out;
7745 }
7746 attr_cleanup = TRUE;
7747 }
7748
7749 vnop_flags = VNODE_CLONEFILE_DEFAULT;
7750 /*
7751 * We've got initial values for all security parameters,
7752 * If we are superuser, then we can change owners to be the
7753 * same as the source. Both superuser and the owner have default
7754 * WRITE_SECURITY privileges so all other fields can be taken
7755 * from source as well.
7756 */
7757 if (!(flags & CLONE_NOOWNERCOPY) && vfs_context_issuser(ctx)) {
7758 if (VATTR_IS_SUPPORTED(&va, va_uid)) {
7759 VATTR_SET(&nva, va_uid, va.va_uid);
7760 }
7761 if (VATTR_IS_SUPPORTED(&va, va_gid)) {
7762 VATTR_SET(&nva, va_gid, va.va_gid);
7763 }
7764 } else {
7765 vnop_flags |= VNODE_CLONEFILE_NOOWNERCOPY;
7766 }
7767
7768 if (VATTR_IS_SUPPORTED(&va, va_mode)) {
7769 VATTR_SET(&nva, va_mode, va.va_mode);
7770 }
7771 if (VATTR_IS_SUPPORTED(&va, va_flags)) {
7772 VATTR_SET(&nva, va_flags,
7773 ((va.va_flags & ~(UF_DATAVAULT | SF_RESTRICTED)) | /* Turn off from source */
7774 (nva.va_flags & (UF_DATAVAULT | SF_RESTRICTED))));
7775 }
7776
7777 error = VNOP_CLONEFILE(fvp, tdvp, &tvp, cnp, &nva, vnop_flags, ctx);
7778
7779 if (!error && tvp) {
7780 int update_flags = 0;
7781 #if CONFIG_FSE
7782 int fsevent;
7783 #endif /* CONFIG_FSE */
7784
7785 #if CONFIG_MACF
7786 (void)vnode_label(vnode_mount(tvp), tdvp, tvp, cnp,
7787 VNODE_LABEL_CREATE, ctx);
7788 #endif
7789 /*
7790 * If some of the requested attributes weren't handled by the
7791 * VNOP, use our fallback code.
7792 */
7793 if (!VATTR_ALL_SUPPORTED(&va)) {
7794 (void)vnode_setattr_fallback(tvp, &nva, ctx);
7795 }
7796
7797 // Make sure the name & parent pointers are hooked up
7798 if (tvp->v_name == NULL) {
7799 update_flags |= VNODE_UPDATE_NAME;
7800 }
7801 if (tvp->v_parent == NULLVP) {
7802 update_flags |= VNODE_UPDATE_PARENT;
7803 }
7804
7805 if (update_flags) {
7806 (void)vnode_update_identity(tvp, tdvp, cnp->cn_nameptr,
7807 cnp->cn_namelen, cnp->cn_hash, update_flags);
7808 }
7809
7810 #if CONFIG_FSE
7811 switch (vnode_vtype(tvp)) {
7812 case VLNK:
7813 /* FALLTHRU */
7814 case VREG:
7815 fsevent = FSE_CREATE_FILE;
7816 break;
7817 case VDIR:
7818 fsevent = FSE_CREATE_DIR;
7819 break;
7820 default:
7821 goto out;
7822 }
7823
7824 if (need_fsevent(fsevent, tvp)) {
7825 /*
7826 * The following is a sequence of three explicit events.
7827 * A pair of FSE_CLONE events representing the source and destination
7828 * followed by an FSE_CREATE_[FILE | DIR] for the destination.
7829 * fseventsd may coalesce the destination clone and create events
7830 * into a single event resulting in the following sequence for a client
7831 * FSE_CLONE (src)
7832 * FSE_CLONE | FSE_CREATE (dst)
7833 */
7834 add_fsevent(FSE_CLONE, ctx, FSE_ARG_VNODE, fvp, FSE_ARG_VNODE, tvp,
7835 FSE_ARG_DONE);
7836 add_fsevent(fsevent, ctx, FSE_ARG_VNODE, tvp,
7837 FSE_ARG_DONE);
7838 }
7839 #endif /* CONFIG_FSE */
7840 }
7841
7842 out:
7843 if (attr_cleanup) {
7844 vn_attribute_cleanup(&nva, defaulted);
7845 }
7846 if (free_src_acl && va.va_acl) {
7847 kauth_acl_free(va.va_acl);
7848 }
7849 nameidone(&tond);
7850 if (tvp) {
7851 vnode_put(tvp);
7852 }
7853 vnode_put(tdvp);
7854 return error;
7855 }
7856
7857 /*
7858 * clone files or directories, target must not exist.
7859 */
7860 /* ARGSUSED */
7861 int
7862 clonefileat(__unused proc_t p, struct clonefileat_args *uap,
7863 __unused int32_t *retval)
7864 {
7865 vnode_t fvp;
7866 struct nameidata fromnd;
7867 int follow;
7868 int error;
7869 vfs_context_t ctx = vfs_context_current();
7870
7871 /* Check that the flags are valid. */
7872 if (uap->flags & ~(CLONE_NOFOLLOW | CLONE_NOOWNERCOPY)) {
7873 return EINVAL;
7874 }
7875
7876 AUDIT_ARG(fd, uap->src_dirfd);
7877
7878 follow = (uap->flags & CLONE_NOFOLLOW) ? NOFOLLOW : FOLLOW;
7879 NDINIT(&fromnd, LOOKUP, OP_COPYFILE, follow | AUDITVNPATH1,
7880 UIO_USERSPACE, uap->src, ctx);
7881 if ((error = nameiat(&fromnd, uap->src_dirfd))) {
7882 return error;
7883 }
7884
7885 fvp = fromnd.ni_vp;
7886 nameidone(&fromnd);
7887
7888 error = clonefile_internal(fvp, FALSE, uap->dst_dirfd, uap->dst,
7889 uap->flags, ctx);
7890
7891 vnode_put(fvp);
7892 return error;
7893 }
7894
7895 int
7896 fclonefileat(__unused proc_t p, struct fclonefileat_args *uap,
7897 __unused int32_t *retval)
7898 {
7899 vnode_t fvp;
7900 struct fileproc *fp;
7901 int error;
7902 vfs_context_t ctx = vfs_context_current();
7903
7904 /* Check that the flags are valid. */
7905 if (uap->flags & ~(CLONE_NOFOLLOW | CLONE_NOOWNERCOPY)) {
7906 return EINVAL;
7907 }
7908
7909 AUDIT_ARG(fd, uap->src_fd);
7910 error = fp_getfvp(p, uap->src_fd, &fp, &fvp);
7911 if (error) {
7912 return error;
7913 }
7914
7915 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
7916 AUDIT_ARG(vnpath_withref, fvp, ARG_VNODE1);
7917 error = EBADF;
7918 goto out;
7919 }
7920
7921 if ((error = vnode_getwithref(fvp))) {
7922 goto out;
7923 }
7924
7925 AUDIT_ARG(vnpath, fvp, ARG_VNODE1);
7926
7927 error = clonefile_internal(fvp, TRUE, uap->dst_dirfd, uap->dst,
7928 uap->flags, ctx);
7929
7930 vnode_put(fvp);
7931 out:
7932 file_drop(uap->src_fd);
7933 return error;
7934 }
7935
7936 static int
7937 rename_submounts_callback(mount_t mp, void *arg)
7938 {
7939 int error = 0;
7940 mount_t pmp = (mount_t)arg;
7941 int prefix_len = strlen(pmp->mnt_vfsstat.f_mntonname);
7942
7943 if (strncmp(mp->mnt_vfsstat.f_mntonname, pmp->mnt_vfsstat.f_mntonname, prefix_len) != 0) {
7944 return 0;
7945 }
7946
7947 if (mp->mnt_vfsstat.f_mntonname[prefix_len] != '/') {
7948 return 0;
7949 }
7950
7951 if ((error = vfs_busy(mp, LK_NOWAIT))) {
7952 printf("vfs_busy failed with %d for %s\n", error, mp->mnt_vfsstat.f_mntonname);
7953 return -1;
7954 }
7955
7956 int pathlen = MAXPATHLEN;
7957 if ((error = vn_getpath_ext(mp->mnt_vnodecovered, NULL, mp->mnt_vfsstat.f_mntonname, &pathlen, VN_GETPATH_FSENTER))) {
7958 printf("vn_getpath_ext failed with %d for mnt_vnodecovered of %s\n", error, mp->mnt_vfsstat.f_mntonname);
7959 }
7960
7961 vfs_unbusy(mp);
7962
7963 return error;
7964 }
7965
7966 /*
7967 * Rename files. Source and destination must either both be directories,
7968 * or both not be directories. If target is a directory, it must be empty.
7969 */
7970 /* ARGSUSED */
7971 static int
7972 renameat_internal(vfs_context_t ctx, int fromfd, user_addr_t from,
7973 int tofd, user_addr_t to, int segflg, vfs_rename_flags_t flags)
7974 {
7975 if (flags & ~VFS_RENAME_FLAGS_MASK) {
7976 return EINVAL;
7977 }
7978
7979 if (ISSET(flags, VFS_RENAME_SWAP) && ISSET(flags, VFS_RENAME_EXCL)) {
7980 return EINVAL;
7981 }
7982
7983 vnode_t tvp, tdvp;
7984 vnode_t fvp, fdvp;
7985 struct nameidata *fromnd, *tond;
7986 int error;
7987 int do_retry;
7988 int retry_count;
7989 int mntrename;
7990 int need_event;
7991 int need_kpath2;
7992 int has_listeners;
7993 const char *oname = NULL;
7994 char *from_name = NULL, *to_name = NULL;
7995 char *from_name_no_firmlink = NULL, *to_name_no_firmlink = NULL;
7996 int from_len = 0, to_len = 0;
7997 int from_len_no_firmlink = 0, to_len_no_firmlink = 0;
7998 int holding_mntlock;
7999 mount_t locked_mp = NULL;
8000 vnode_t oparent = NULLVP;
8001 #if CONFIG_FSE
8002 fse_info from_finfo, to_finfo;
8003 #endif
8004 int from_truncated = 0, to_truncated = 0;
8005 int from_truncated_no_firmlink = 0, to_truncated_no_firmlink = 0;
8006 int batched = 0;
8007 struct vnode_attr *fvap, *tvap;
8008 int continuing = 0;
8009 /* carving out a chunk for structs that are too big to be on stack. */
8010 struct {
8011 struct nameidata from_node, to_node;
8012 struct vnode_attr fv_attr, tv_attr;
8013 } * __rename_data;
8014 MALLOC(__rename_data, void *, sizeof(*__rename_data), M_TEMP, M_WAITOK);
8015 fromnd = &__rename_data->from_node;
8016 tond = &__rename_data->to_node;
8017
8018 holding_mntlock = 0;
8019 do_retry = 0;
8020 retry_count = 0;
8021 retry:
8022 fvp = tvp = NULL;
8023 fdvp = tdvp = NULL;
8024 fvap = tvap = NULL;
8025 mntrename = FALSE;
8026
8027 NDINIT(fromnd, DELETE, OP_UNLINK, WANTPARENT | AUDITVNPATH1,
8028 segflg, from, ctx);
8029 fromnd->ni_flag = NAMEI_COMPOUNDRENAME;
8030
8031 NDINIT(tond, RENAME, OP_RENAME, WANTPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK,
8032 segflg, to, ctx);
8033 tond->ni_flag = NAMEI_COMPOUNDRENAME;
8034
8035 continue_lookup:
8036 if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
8037 if ((error = nameiat(fromnd, fromfd))) {
8038 goto out1;
8039 }
8040 fdvp = fromnd->ni_dvp;
8041 fvp = fromnd->ni_vp;
8042
8043 if (fvp && fvp->v_type == VDIR) {
8044 tond->ni_cnd.cn_flags |= WILLBEDIR;
8045 }
8046 }
8047
8048 if ((tond->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
8049 if ((error = nameiat(tond, tofd))) {
8050 /*
8051 * Translate error code for rename("dir1", "dir2/.").
8052 */
8053 if (error == EISDIR && fvp->v_type == VDIR) {
8054 error = EINVAL;
8055 }
8056 goto out1;
8057 }
8058 tdvp = tond->ni_dvp;
8059 tvp = tond->ni_vp;
8060 }
8061
8062 #if DEVELOPMENT || DEBUG
8063 /*
8064 * XXX VSWAP: Check for entitlements or special flag here
8065 * so we can restrict access appropriately.
8066 */
8067 #else /* DEVELOPMENT || DEBUG */
8068
8069 if (fromnd->ni_vp && vnode_isswap(fromnd->ni_vp) && (ctx != vfs_context_kernel())) {
8070 error = EPERM;
8071 goto out1;
8072 }
8073
8074 if (tond->ni_vp && vnode_isswap(tond->ni_vp) && (ctx != vfs_context_kernel())) {
8075 error = EPERM;
8076 goto out1;
8077 }
8078 #endif /* DEVELOPMENT || DEBUG */
8079
8080 if (!tvp && ISSET(flags, VFS_RENAME_SWAP)) {
8081 error = ENOENT;
8082 goto out1;
8083 }
8084
8085 if (tvp && ISSET(flags, VFS_RENAME_EXCL)) {
8086 error = EEXIST;
8087 goto out1;
8088 }
8089
8090 batched = vnode_compound_rename_available(fdvp);
8091
8092 #if CONFIG_FSE
8093 need_event = need_fsevent(FSE_RENAME, fdvp);
8094 if (need_event) {
8095 if (fvp) {
8096 get_fse_info(fvp, &from_finfo, ctx);
8097 } else {
8098 error = vfs_get_notify_attributes(&__rename_data->fv_attr);
8099 if (error) {
8100 goto out1;
8101 }
8102
8103 fvap = &__rename_data->fv_attr;
8104 }
8105
8106 if (tvp) {
8107 get_fse_info(tvp, &to_finfo, ctx);
8108 } else if (batched) {
8109 error = vfs_get_notify_attributes(&__rename_data->tv_attr);
8110 if (error) {
8111 goto out1;
8112 }
8113
8114 tvap = &__rename_data->tv_attr;
8115 }
8116 }
8117 #else
8118 need_event = 0;
8119 #endif /* CONFIG_FSE */
8120
8121 has_listeners = kauth_authorize_fileop_has_listeners();
8122
8123 need_kpath2 = 0;
8124 #if CONFIG_AUDIT
8125 if (AUDIT_RECORD_EXISTS()) {
8126 need_kpath2 = 1;
8127 }
8128 #endif
8129
8130 if (need_event || has_listeners) {
8131 if (from_name == NULL) {
8132 GET_PATH(from_name);
8133 if (from_name == NULL) {
8134 error = ENOMEM;
8135 goto out1;
8136 }
8137 }
8138
8139 from_len = safe_getpath(fdvp, fromnd->ni_cnd.cn_nameptr, from_name, MAXPATHLEN, &from_truncated);
8140
8141 if (from_name_no_firmlink == NULL) {
8142 GET_PATH(from_name_no_firmlink);
8143 if (from_name_no_firmlink == NULL) {
8144 error = ENOMEM;
8145 goto out1;
8146 }
8147 }
8148
8149 from_len_no_firmlink = safe_getpath_no_firmlink(fdvp, fromnd->ni_cnd.cn_nameptr, from_name_no_firmlink, MAXPATHLEN, &from_truncated_no_firmlink);
8150 }
8151
8152 if (need_event || need_kpath2 || has_listeners) {
8153 if (to_name == NULL) {
8154 GET_PATH(to_name);
8155 if (to_name == NULL) {
8156 error = ENOMEM;
8157 goto out1;
8158 }
8159 }
8160
8161 to_len = safe_getpath(tdvp, tond->ni_cnd.cn_nameptr, to_name, MAXPATHLEN, &to_truncated);
8162
8163 if (to_name_no_firmlink == NULL) {
8164 GET_PATH(to_name_no_firmlink);
8165 if (to_name_no_firmlink == NULL) {
8166 error = ENOMEM;
8167 goto out1;
8168 }
8169 }
8170
8171 to_len_no_firmlink = safe_getpath_no_firmlink(tdvp, tond->ni_cnd.cn_nameptr, to_name_no_firmlink, MAXPATHLEN, &to_truncated_no_firmlink);
8172 if (to_name && need_kpath2) {
8173 AUDIT_ARG(kpath, to_name, ARG_KPATH2);
8174 }
8175 }
8176 if (!fvp) {
8177 /*
8178 * Claim: this check will never reject a valid rename.
8179 * For success, either fvp must be on the same mount as tdvp, or fvp must sit atop a vnode on the same mount as tdvp.
8180 * Suppose fdvp and tdvp are not on the same mount.
8181 * If fvp is on the same mount as tdvp, then fvp is not on the same mount as fdvp, so fvp is the root of its filesystem. If fvp is the root,
8182 * then you can't move it to within another dir on the same mountpoint.
8183 * If fvp sits atop a vnode on the same mount as fdvp, then that vnode must be part of the same mount as fdvp, which is a contradiction.
8184 *
8185 * If this check passes, then we are safe to pass these vnodes to the same FS.
8186 */
8187 if (fdvp->v_mount != tdvp->v_mount) {
8188 error = EXDEV;
8189 goto out1;
8190 }
8191 goto skipped_lookup;
8192 }
8193
8194 if (!batched) {
8195 error = vn_authorize_renamex_with_paths(fdvp, fvp, &fromnd->ni_cnd, from_name, tdvp, tvp, &tond->ni_cnd, to_name, ctx, flags, NULL);
8196 if (error) {
8197 if (error == ENOENT) {
8198 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
8199 /*
8200 * We encountered a race where after doing the namei, tvp stops
8201 * being valid. If so, simply re-drive the rename call from the
8202 * top.
8203 */
8204 do_retry = 1;
8205 retry_count += 1;
8206 }
8207 }
8208 goto out1;
8209 }
8210 }
8211
8212 /*
8213 * If the source and destination are the same (i.e. they're
8214 * links to the same vnode) and the target file system is
8215 * case sensitive, then there is nothing to do.
8216 *
8217 * XXX Come back to this.
8218 */
8219 if (fvp == tvp) {
8220 int pathconf_val;
8221
8222 /*
8223 * Note: if _PC_CASE_SENSITIVE selector isn't supported,
8224 * then assume that this file system is case sensitive.
8225 */
8226 if (VNOP_PATHCONF(fvp, _PC_CASE_SENSITIVE, &pathconf_val, ctx) != 0 ||
8227 pathconf_val != 0) {
8228 goto out1;
8229 }
8230 }
8231
8232 /*
8233 * Allow the renaming of mount points.
8234 * - target must not exist
8235 * - target must reside in the same directory as source
8236 * - union mounts cannot be renamed
8237 * - "/" cannot be renamed
8238 *
8239 * XXX Handle this in VFS after a continued lookup (if we missed
8240 * in the cache to start off)
8241 *
8242 * N.B. If RENAME_SWAP is being used, then @tvp != NULL and so
8243 * we'll skip past here. The file system is responsible for
8244 * checking that @tvp is not a descendent of @fvp and vice versa
8245 * so it should always return EINVAL if either @tvp or @fvp is the
8246 * root of a volume.
8247 */
8248 if ((fvp->v_flag & VROOT) &&
8249 (fvp->v_type == VDIR) &&
8250 (tvp == NULL) &&
8251 (fvp->v_mountedhere == NULL) &&
8252 (fdvp == tdvp) &&
8253 ((fvp->v_mount->mnt_flag & (MNT_UNION | MNT_ROOTFS)) == 0) &&
8254 ((fvp->v_mount->mnt_kern_flag & MNTK_SYSTEM) == 0) &&
8255 (fvp->v_mount->mnt_vnodecovered != NULLVP)) {
8256 vnode_t coveredvp;
8257
8258 /* switch fvp to the covered vnode */
8259 coveredvp = fvp->v_mount->mnt_vnodecovered;
8260 if ((vnode_getwithref(coveredvp))) {
8261 error = ENOENT;
8262 goto out1;
8263 }
8264 vnode_put(fvp);
8265
8266 fvp = coveredvp;
8267 mntrename = TRUE;
8268 }
8269 /*
8270 * Check for cross-device rename.
8271 */
8272 if ((fvp->v_mount != tdvp->v_mount) ||
8273 (tvp && (fvp->v_mount != tvp->v_mount))) {
8274 error = EXDEV;
8275 goto out1;
8276 }
8277
8278 /*
8279 * If source is the same as the destination (that is the
8280 * same inode number) then there is nothing to do...
8281 * EXCEPT if the underlying file system supports case
8282 * insensitivity and is case preserving. In this case
8283 * the file system needs to handle the special case of
8284 * getting the same vnode as target (fvp) and source (tvp).
8285 *
8286 * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
8287 * and _PC_CASE_PRESERVING can have this exception, and they need to
8288 * handle the special case of getting the same vnode as target and
8289 * source. NOTE: Then the target is unlocked going into vnop_rename,
8290 * so not to cause locking problems. There is a single reference on tvp.
8291 *
8292 * NOTE - that fvp == tvp also occurs if they are hard linked and
8293 * that correct behaviour then is just to return success without doing
8294 * anything.
8295 *
8296 * XXX filesystem should take care of this itself, perhaps...
8297 */
8298 if (fvp == tvp && fdvp == tdvp) {
8299 if (fromnd->ni_cnd.cn_namelen == tond->ni_cnd.cn_namelen &&
8300 !bcmp(fromnd->ni_cnd.cn_nameptr, tond->ni_cnd.cn_nameptr,
8301 fromnd->ni_cnd.cn_namelen)) {
8302 goto out1;
8303 }
8304 }
8305
8306 if (holding_mntlock && fvp->v_mount != locked_mp) {
8307 /*
8308 * we're holding a reference and lock
8309 * on locked_mp, but it no longer matches
8310 * what we want to do... so drop our hold
8311 */
8312 mount_unlock_renames(locked_mp);
8313 mount_drop(locked_mp, 0);
8314 holding_mntlock = 0;
8315 }
8316 if (tdvp != fdvp && fvp->v_type == VDIR) {
8317 /*
8318 * serialize renames that re-shape
8319 * the tree... if holding_mntlock is
8320 * set, then we're ready to go...
8321 * otherwise we
8322 * first need to drop the iocounts
8323 * we picked up, second take the
8324 * lock to serialize the access,
8325 * then finally start the lookup
8326 * process over with the lock held
8327 */
8328 if (!holding_mntlock) {
8329 /*
8330 * need to grab a reference on
8331 * the mount point before we
8332 * drop all the iocounts... once
8333 * the iocounts are gone, the mount
8334 * could follow
8335 */
8336 locked_mp = fvp->v_mount;
8337 mount_ref(locked_mp, 0);
8338
8339 /*
8340 * nameidone has to happen before we vnode_put(tvp)
8341 * since it may need to release the fs_nodelock on the tvp
8342 */
8343 nameidone(tond);
8344
8345 if (tvp) {
8346 vnode_put(tvp);
8347 }
8348 vnode_put(tdvp);
8349
8350 /*
8351 * nameidone has to happen before we vnode_put(fdvp)
8352 * since it may need to release the fs_nodelock on the fvp
8353 */
8354 nameidone(fromnd);
8355
8356 vnode_put(fvp);
8357 vnode_put(fdvp);
8358
8359 mount_lock_renames(locked_mp);
8360 holding_mntlock = 1;
8361
8362 goto retry;
8363 }
8364 } else {
8365 /*
8366 * when we dropped the iocounts to take
8367 * the lock, we allowed the identity of
8368 * the various vnodes to change... if they did,
8369 * we may no longer be dealing with a rename
8370 * that reshapes the tree... once we're holding
8371 * the iocounts, the vnodes can't change type
8372 * so we're free to drop the lock at this point
8373 * and continue on
8374 */
8375 if (holding_mntlock) {
8376 mount_unlock_renames(locked_mp);
8377 mount_drop(locked_mp, 0);
8378 holding_mntlock = 0;
8379 }
8380 }
8381
8382 // save these off so we can later verify that fvp is the same
8383 oname = fvp->v_name;
8384 oparent = fvp->v_parent;
8385
8386 skipped_lookup:
8387 error = vn_rename(fdvp, &fvp, &fromnd->ni_cnd, fvap,
8388 tdvp, &tvp, &tond->ni_cnd, tvap,
8389 flags, ctx);
8390
8391 if (holding_mntlock) {
8392 /*
8393 * we can drop our serialization
8394 * lock now
8395 */
8396 mount_unlock_renames(locked_mp);
8397 mount_drop(locked_mp, 0);
8398 holding_mntlock = 0;
8399 }
8400 if (error) {
8401 if (error == EDATALESS) {
8402 /*
8403 * If we've been here before, something has gone
8404 * horribly wrong and we should just get out lest
8405 * we spiral around the drain forever.
8406 */
8407 if (flags & VFS_RENAME_DATALESS) {
8408 error = EIO;
8409 goto out1;
8410 }
8411
8412 /*
8413 * The object we're renaming is dataless (or has a
8414 * dataless descendent) and requires materialization
8415 * before the rename occurs. But we're holding the
8416 * mount point's rename lock, so it's not safe to
8417 * make the upcall.
8418 *
8419 * In this case, we release the lock, perform the
8420 * materialization, and start the whole thing over.
8421 */
8422 error = vnode_materialize_dataless_file(fvp,
8423 NAMESPACE_HANDLER_RENAME_OP);
8424
8425 if (error == 0) {
8426 /*
8427 * The next time around we need to tell the
8428 * file system that the materializtaion has
8429 * been performed.
8430 */
8431 flags |= VFS_RENAME_DATALESS;
8432 do_retry = 1;
8433 }
8434 goto out1;
8435 }
8436 if (error == EKEEPLOOKING) {
8437 if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) == 0) {
8438 if ((tond->ni_flag & NAMEI_CONTLOOKUP) == 0) {
8439 panic("EKEEPLOOKING without NAMEI_CONTLOOKUP on either ndp?");
8440 }
8441 }
8442
8443 fromnd->ni_vp = fvp;
8444 tond->ni_vp = tvp;
8445
8446 goto continue_lookup;
8447 }
8448
8449 /*
8450 * We may encounter a race in the VNOP where the destination didn't
8451 * exist when we did the namei, but it does by the time we go and
8452 * try to create the entry. In this case, we should re-drive this rename
8453 * call from the top again. Currently, only HFS bubbles out ERECYCLE,
8454 * but other filesystems susceptible to this race could return it, too.
8455 */
8456 if (error == ERECYCLE) {
8457 do_retry = 1;
8458 }
8459
8460 /*
8461 * For compound VNOPs, the authorization callback may return
8462 * ENOENT in case of racing hardlink lookups hitting the name
8463 * cache, redrive the lookup.
8464 */
8465 if (batched && error == ENOENT) {
8466 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
8467 do_retry = 1;
8468 retry_count += 1;
8469 }
8470 }
8471
8472 goto out1;
8473 }
8474
8475 /* call out to allow 3rd party notification of rename.
8476 * Ignore result of kauth_authorize_fileop call.
8477 */
8478 kauth_authorize_fileop(vfs_context_ucred(ctx),
8479 KAUTH_FILEOP_RENAME,
8480 (uintptr_t)from_name, (uintptr_t)to_name);
8481 if (flags & VFS_RENAME_SWAP) {
8482 kauth_authorize_fileop(vfs_context_ucred(ctx),
8483 KAUTH_FILEOP_RENAME,
8484 (uintptr_t)to_name, (uintptr_t)from_name);
8485 }
8486
8487 #if CONFIG_FSE
8488 if (from_name != NULL && to_name != NULL) {
8489 if (from_truncated || to_truncated) {
8490 // set it here since only the from_finfo gets reported up to user space
8491 from_finfo.mode |= FSE_TRUNCATED_PATH;
8492 }
8493
8494 if (tvap && tvp) {
8495 vnode_get_fse_info_from_vap(tvp, &to_finfo, tvap);
8496 }
8497 if (fvap) {
8498 vnode_get_fse_info_from_vap(fvp, &from_finfo, fvap);
8499 }
8500
8501 if (tvp) {
8502 add_fsevent(FSE_RENAME, ctx,
8503 FSE_ARG_STRING, from_len_no_firmlink, from_name_no_firmlink,
8504 FSE_ARG_FINFO, &from_finfo,
8505 FSE_ARG_STRING, to_len_no_firmlink, to_name_no_firmlink,
8506 FSE_ARG_FINFO, &to_finfo,
8507 FSE_ARG_DONE);
8508 if (flags & VFS_RENAME_SWAP) {
8509 /*
8510 * Strictly speaking, swap is the equivalent of
8511 * *three* renames. FSEvents clients should only take
8512 * the events as a hint, so we only bother reporting
8513 * two.
8514 */
8515 add_fsevent(FSE_RENAME, ctx,
8516 FSE_ARG_STRING, to_len_no_firmlink, to_name_no_firmlink,
8517 FSE_ARG_FINFO, &to_finfo,
8518 FSE_ARG_STRING, from_len_no_firmlink, from_name_no_firmlink,
8519 FSE_ARG_FINFO, &from_finfo,
8520 FSE_ARG_DONE);
8521 }
8522 } else {
8523 add_fsevent(FSE_RENAME, ctx,
8524 FSE_ARG_STRING, from_len_no_firmlink, from_name_no_firmlink,
8525 FSE_ARG_FINFO, &from_finfo,
8526 FSE_ARG_STRING, to_len_no_firmlink, to_name_no_firmlink,
8527 FSE_ARG_DONE);
8528 }
8529 }
8530 #endif /* CONFIG_FSE */
8531
8532 /*
8533 * update filesystem's mount point data
8534 */
8535 if (mntrename) {
8536 char *cp, *pathend, *mpname;
8537 char * tobuf;
8538 struct mount *mp;
8539 int maxlen;
8540 size_t len = 0;
8541
8542 mp = fvp->v_mountedhere;
8543
8544 if (vfs_busy(mp, LK_NOWAIT)) {
8545 error = EBUSY;
8546 goto out1;
8547 }
8548 MALLOC_ZONE(tobuf, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
8549
8550 if (UIO_SEG_IS_USER_SPACE(segflg)) {
8551 error = copyinstr(to, tobuf, MAXPATHLEN, &len);
8552 } else {
8553 error = copystr((void *)to, tobuf, MAXPATHLEN, &len);
8554 }
8555 if (!error) {
8556 /* find current mount point prefix */
8557 pathend = &mp->mnt_vfsstat.f_mntonname[0];
8558 for (cp = pathend; *cp != '\0'; ++cp) {
8559 if (*cp == '/') {
8560 pathend = cp + 1;
8561 }
8562 }
8563 /* find last component of target name */
8564 for (mpname = cp = tobuf; *cp != '\0'; ++cp) {
8565 if (*cp == '/') {
8566 mpname = cp + 1;
8567 }
8568 }
8569
8570 /* Update f_mntonname of sub mounts */
8571 vfs_iterate(0, rename_submounts_callback, (void *)mp);
8572
8573 /* append name to prefix */
8574 maxlen = MAXPATHLEN - (pathend - mp->mnt_vfsstat.f_mntonname);
8575 bzero(pathend, maxlen);
8576
8577 strlcpy(pathend, mpname, maxlen);
8578 }
8579 FREE_ZONE(tobuf, MAXPATHLEN, M_NAMEI);
8580
8581 vfs_unbusy(mp);
8582
8583 vfs_event_signal(NULL, VQ_UPDATE, (intptr_t)NULL);
8584 }
8585 /*
8586 * fix up name & parent pointers. note that we first
8587 * check that fvp has the same name/parent pointers it
8588 * had before the rename call... this is a 'weak' check
8589 * at best...
8590 *
8591 * XXX oparent and oname may not be set in the compound vnop case
8592 */
8593 if (batched || (oname == fvp->v_name && oparent == fvp->v_parent)) {
8594 int update_flags;
8595
8596 update_flags = VNODE_UPDATE_NAME;
8597
8598 if (fdvp != tdvp) {
8599 update_flags |= VNODE_UPDATE_PARENT;
8600 }
8601
8602 vnode_update_identity(fvp, tdvp, tond->ni_cnd.cn_nameptr, tond->ni_cnd.cn_namelen, tond->ni_cnd.cn_hash, update_flags);
8603 }
8604 out1:
8605 if (to_name != NULL) {
8606 RELEASE_PATH(to_name);
8607 to_name = NULL;
8608 }
8609 if (to_name_no_firmlink != NULL) {
8610 RELEASE_PATH(to_name_no_firmlink);
8611 to_name_no_firmlink = NULL;
8612 }
8613 if (from_name != NULL) {
8614 RELEASE_PATH(from_name);
8615 from_name = NULL;
8616 }
8617 if (from_name_no_firmlink != NULL) {
8618 RELEASE_PATH(from_name_no_firmlink);
8619 from_name_no_firmlink = NULL;
8620 }
8621 if (holding_mntlock) {
8622 mount_unlock_renames(locked_mp);
8623 mount_drop(locked_mp, 0);
8624 holding_mntlock = 0;
8625 }
8626 if (tdvp) {
8627 /*
8628 * nameidone has to happen before we vnode_put(tdvp)
8629 * since it may need to release the fs_nodelock on the tdvp
8630 */
8631 nameidone(tond);
8632
8633 if (tvp) {
8634 vnode_put(tvp);
8635 }
8636 vnode_put(tdvp);
8637 }
8638 if (fdvp) {
8639 /*
8640 * nameidone has to happen before we vnode_put(fdvp)
8641 * since it may need to release the fs_nodelock on the fdvp
8642 */
8643 nameidone(fromnd);
8644
8645 if (fvp) {
8646 vnode_put(fvp);
8647 }
8648 vnode_put(fdvp);
8649 }
8650
8651 /*
8652 * If things changed after we did the namei, then we will re-drive
8653 * this rename call from the top.
8654 */
8655 if (do_retry) {
8656 do_retry = 0;
8657 goto retry;
8658 }
8659
8660 FREE(__rename_data, M_TEMP);
8661 return error;
8662 }
8663
8664 int
8665 rename(__unused proc_t p, struct rename_args *uap, __unused int32_t *retval)
8666 {
8667 return renameat_internal(vfs_context_current(), AT_FDCWD, uap->from,
8668 AT_FDCWD, uap->to, UIO_USERSPACE, 0);
8669 }
8670
8671 int
8672 renameatx_np(__unused proc_t p, struct renameatx_np_args *uap, __unused int32_t *retval)
8673 {
8674 return renameat_internal(
8675 vfs_context_current(),
8676 uap->fromfd, uap->from,
8677 uap->tofd, uap->to,
8678 UIO_USERSPACE, uap->flags);
8679 }
8680
8681 int
8682 renameat(__unused proc_t p, struct renameat_args *uap, __unused int32_t *retval)
8683 {
8684 return renameat_internal(vfs_context_current(), uap->fromfd, uap->from,
8685 uap->tofd, uap->to, UIO_USERSPACE, 0);
8686 }
8687
8688 /*
8689 * Make a directory file.
8690 *
8691 * Returns: 0 Success
8692 * EEXIST
8693 * namei:???
8694 * vnode_authorize:???
8695 * vn_create:???
8696 */
8697 /* ARGSUSED */
8698 static int
8699 mkdir1at(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap, int fd,
8700 enum uio_seg segflg)
8701 {
8702 vnode_t vp, dvp;
8703 int error;
8704 int update_flags = 0;
8705 int batched;
8706 struct nameidata nd;
8707
8708 AUDIT_ARG(mode, vap->va_mode);
8709 NDINIT(&nd, CREATE, OP_MKDIR, LOCKPARENT | AUDITVNPATH1, segflg,
8710 path, ctx);
8711 nd.ni_cnd.cn_flags |= WILLBEDIR;
8712 nd.ni_flag = NAMEI_COMPOUNDMKDIR;
8713
8714 continue_lookup:
8715 error = nameiat(&nd, fd);
8716 if (error) {
8717 return error;
8718 }
8719 dvp = nd.ni_dvp;
8720 vp = nd.ni_vp;
8721
8722 if (vp != NULL) {
8723 error = EEXIST;
8724 goto out;
8725 }
8726
8727 batched = vnode_compound_mkdir_available(dvp);
8728
8729 VATTR_SET(vap, va_type, VDIR);
8730
8731 /*
8732 * XXX
8733 * Don't authorize in VFS for compound VNOP.... mkdir -p today assumes that it will
8734 * only get EXISTS or EISDIR for existing path components, and not that it could see
8735 * EACCESS/EPERM--so if we authorize for mkdir on "/" for "mkdir -p /tmp/foo/bar/baz"
8736 * it will fail in a spurious manner. Need to figure out if this is valid behavior.
8737 */
8738 if ((error = vn_authorize_mkdir(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0) {
8739 if (error == EACCES || error == EPERM) {
8740 int error2;
8741
8742 nameidone(&nd);
8743 vnode_put(dvp);
8744 dvp = NULLVP;
8745
8746 /*
8747 * Try a lookup without "NAMEI_COMPOUNDVNOP" to make sure we return EEXIST
8748 * rather than EACCESS if the target exists.
8749 */
8750 NDINIT(&nd, LOOKUP, OP_MKDIR, AUDITVNPATH1, segflg,
8751 path, ctx);
8752 error2 = nameiat(&nd, fd);
8753 if (error2) {
8754 goto out;
8755 } else {
8756 vp = nd.ni_vp;
8757 error = EEXIST;
8758 goto out;
8759 }
8760 }
8761
8762 goto out;
8763 }
8764
8765 /*
8766 * make the directory
8767 */
8768 if ((error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx)) != 0) {
8769 if (error == EKEEPLOOKING) {
8770 nd.ni_vp = vp;
8771 goto continue_lookup;
8772 }
8773
8774 goto out;
8775 }
8776
8777 // Make sure the name & parent pointers are hooked up
8778 if (vp->v_name == NULL) {
8779 update_flags |= VNODE_UPDATE_NAME;
8780 }
8781 if (vp->v_parent == NULLVP) {
8782 update_flags |= VNODE_UPDATE_PARENT;
8783 }
8784
8785 if (update_flags) {
8786 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
8787 }
8788
8789 #if CONFIG_FSE
8790 add_fsevent(FSE_CREATE_DIR, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE);
8791 #endif
8792
8793 out:
8794 /*
8795 * nameidone has to happen before we vnode_put(dvp)
8796 * since it may need to release the fs_nodelock on the dvp
8797 */
8798 nameidone(&nd);
8799
8800 if (vp) {
8801 vnode_put(vp);
8802 }
8803 if (dvp) {
8804 vnode_put(dvp);
8805 }
8806
8807 return error;
8808 }
8809
8810 /*
8811 * mkdir_extended: Create a directory; with extended security (ACL).
8812 *
8813 * Parameters: p Process requesting to create the directory
8814 * uap User argument descriptor (see below)
8815 * retval (ignored)
8816 *
8817 * Indirect: uap->path Path of directory to create
8818 * uap->mode Access permissions to set
8819 * uap->xsecurity ACL to set
8820 *
8821 * Returns: 0 Success
8822 * !0 Not success
8823 *
8824 */
8825 int
8826 mkdir_extended(proc_t p, struct mkdir_extended_args *uap, __unused int32_t *retval)
8827 {
8828 int ciferror;
8829 kauth_filesec_t xsecdst;
8830 struct vnode_attr va;
8831
8832 AUDIT_ARG(owner, uap->uid, uap->gid);
8833
8834 xsecdst = NULL;
8835 if ((uap->xsecurity != USER_ADDR_NULL) &&
8836 ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)) {
8837 return ciferror;
8838 }
8839
8840 VATTR_INIT(&va);
8841 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
8842 if (xsecdst != NULL) {
8843 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
8844 }
8845
8846 ciferror = mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD,
8847 UIO_USERSPACE);
8848 if (xsecdst != NULL) {
8849 kauth_filesec_free(xsecdst);
8850 }
8851 return ciferror;
8852 }
8853
8854 int
8855 mkdir(proc_t p, struct mkdir_args *uap, __unused int32_t *retval)
8856 {
8857 struct vnode_attr va;
8858
8859 VATTR_INIT(&va);
8860 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
8861
8862 return mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD,
8863 UIO_USERSPACE);
8864 }
8865
8866 int
8867 mkdirat(proc_t p, struct mkdirat_args *uap, __unused int32_t *retval)
8868 {
8869 struct vnode_attr va;
8870
8871 VATTR_INIT(&va);
8872 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
8873
8874 return mkdir1at(vfs_context_current(), uap->path, &va, uap->fd,
8875 UIO_USERSPACE);
8876 }
8877
8878 static int
8879 rmdirat_internal(vfs_context_t ctx, int fd, user_addr_t dirpath,
8880 enum uio_seg segflg, int unlink_flags)
8881 {
8882 vnode_t vp, dvp;
8883 int error;
8884 struct nameidata nd;
8885 char *path = NULL;
8886 char *no_firmlink_path = NULL;
8887 int len_path = 0;
8888 int len_no_firmlink_path = 0;
8889 int has_listeners = 0;
8890 int need_event = 0;
8891 int truncated_path = 0;
8892 int truncated_no_firmlink_path = 0;
8893 #if CONFIG_FSE
8894 struct vnode_attr va;
8895 #endif /* CONFIG_FSE */
8896 struct vnode_attr *vap = NULL;
8897 int restart_count = 0;
8898 int batched;
8899
8900 int restart_flag;
8901
8902 /*
8903 * This loop exists to restart rmdir in the unlikely case that two
8904 * processes are simultaneously trying to remove the same directory
8905 * containing orphaned appleDouble files.
8906 */
8907 do {
8908 NDINIT(&nd, DELETE, OP_RMDIR, LOCKPARENT | AUDITVNPATH1,
8909 segflg, dirpath, ctx);
8910 nd.ni_flag = NAMEI_COMPOUNDRMDIR;
8911 continue_lookup:
8912 restart_flag = 0;
8913 vap = NULL;
8914
8915 error = nameiat(&nd, fd);
8916 if (error) {
8917 return error;
8918 }
8919
8920 dvp = nd.ni_dvp;
8921 vp = nd.ni_vp;
8922
8923 if (vp) {
8924 batched = vnode_compound_rmdir_available(vp);
8925
8926 if (vp->v_flag & VROOT) {
8927 /*
8928 * The root of a mounted filesystem cannot be deleted.
8929 */
8930 error = EBUSY;
8931 goto out;
8932 }
8933
8934 #if DEVELOPMENT || DEBUG
8935 /*
8936 * XXX VSWAP: Check for entitlements or special flag here
8937 * so we can restrict access appropriately.
8938 */
8939 #else /* DEVELOPMENT || DEBUG */
8940
8941 if (vnode_isswap(vp) && (ctx != vfs_context_kernel())) {
8942 error = EPERM;
8943 goto out;
8944 }
8945 #endif /* DEVELOPMENT || DEBUG */
8946
8947 /*
8948 * Removed a check here; we used to abort if vp's vid
8949 * was not the same as what we'd seen the last time around.
8950 * I do not think that check was valid, because if we retry
8951 * and all dirents are gone, the directory could legitimately
8952 * be recycled but still be present in a situation where we would
8953 * have had permission to delete. Therefore, we won't make
8954 * an effort to preserve that check now that we may not have a
8955 * vp here.
8956 */
8957
8958 if (!batched) {
8959 error = vn_authorize_rmdir(dvp, vp, &nd.ni_cnd, ctx, NULL);
8960 if (error) {
8961 if (error == ENOENT) {
8962 if (restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
8963 restart_flag = 1;
8964 restart_count += 1;
8965 }
8966 }
8967 goto out;
8968 }
8969 }
8970 } else {
8971 batched = 1;
8972
8973 if (!vnode_compound_rmdir_available(dvp)) {
8974 panic("No error, but no compound rmdir?");
8975 }
8976 }
8977
8978 #if CONFIG_FSE
8979 fse_info finfo;
8980
8981 need_event = need_fsevent(FSE_DELETE, dvp);
8982 if (need_event) {
8983 if (!batched) {
8984 get_fse_info(vp, &finfo, ctx);
8985 } else {
8986 error = vfs_get_notify_attributes(&va);
8987 if (error) {
8988 goto out;
8989 }
8990
8991 vap = &va;
8992 }
8993 }
8994 #endif
8995 has_listeners = kauth_authorize_fileop_has_listeners();
8996 if (need_event || has_listeners) {
8997 if (path == NULL) {
8998 GET_PATH(path);
8999 if (path == NULL) {
9000 error = ENOMEM;
9001 goto out;
9002 }
9003 }
9004
9005 len_path = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated_path);
9006
9007 if (no_firmlink_path == NULL) {
9008 GET_PATH(no_firmlink_path);
9009 if (no_firmlink_path == NULL) {
9010 error = ENOMEM;
9011 goto out;
9012 }
9013 }
9014
9015 len_no_firmlink_path = safe_getpath_no_firmlink(dvp, nd.ni_cnd.cn_nameptr, no_firmlink_path, MAXPATHLEN, &truncated_no_firmlink_path);
9016 #if CONFIG_FSE
9017 if (truncated_no_firmlink_path) {
9018 finfo.mode |= FSE_TRUNCATED_PATH;
9019 }
9020 #endif
9021 }
9022
9023 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
9024 nd.ni_vp = vp;
9025 if (vp == NULLVP) {
9026 /* Couldn't find a vnode */
9027 goto out;
9028 }
9029
9030 if (error == EKEEPLOOKING) {
9031 goto continue_lookup;
9032 } else if (batched && error == ENOENT) {
9033 if (restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
9034 /*
9035 * For compound VNOPs, the authorization callback
9036 * may return ENOENT in case of racing hard link lookups
9037 * redrive the lookup.
9038 */
9039 restart_flag = 1;
9040 restart_count += 1;
9041 goto out;
9042 }
9043 }
9044
9045 /*
9046 * XXX There's no provision for passing flags
9047 * to VNOP_RMDIR(). So, if vn_rmdir() fails
9048 * because it's not empty, then we try again
9049 * with VNOP_REMOVE(), passing in a special
9050 * flag that clever file systems will know
9051 * how to handle.
9052 */
9053 if (error == ENOTEMPTY &&
9054 (unlink_flags & VNODE_REMOVE_DATALESS_DIR) != 0) {
9055 /*
9056 * If this fails, we want to keep the original
9057 * error.
9058 */
9059 if (vn_remove(dvp, &vp, &nd,
9060 VNODE_REMOVE_DATALESS_DIR, vap, ctx) == 0) {
9061 error = 0;
9062 }
9063 }
9064
9065 #if CONFIG_APPLEDOUBLE
9066 /*
9067 * Special case to remove orphaned AppleDouble
9068 * files. I don't like putting this in the kernel,
9069 * but carbon does not like putting this in carbon either,
9070 * so here we are.
9071 */
9072 if (error == ENOTEMPTY) {
9073 int ad_error = rmdir_remove_orphaned_appleDouble(vp, ctx, &restart_flag);
9074 if (ad_error == EBUSY) {
9075 error = ad_error;
9076 goto out;
9077 }
9078
9079
9080 /*
9081 * Assuming everything went well, we will try the RMDIR again
9082 */
9083 if (!ad_error) {
9084 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
9085 }
9086 }
9087 #endif /* CONFIG_APPLEDOUBLE */
9088 /*
9089 * Call out to allow 3rd party notification of delete.
9090 * Ignore result of kauth_authorize_fileop call.
9091 */
9092 if (!error) {
9093 if (has_listeners) {
9094 kauth_authorize_fileop(vfs_context_ucred(ctx),
9095 KAUTH_FILEOP_DELETE,
9096 (uintptr_t)vp,
9097 (uintptr_t)path);
9098 }
9099
9100 if (vp->v_flag & VISHARDLINK) {
9101 // see the comment in unlink1() about why we update
9102 // the parent of a hard link when it is removed
9103 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
9104 }
9105
9106 #if CONFIG_FSE
9107 if (need_event) {
9108 if (vap) {
9109 vnode_get_fse_info_from_vap(vp, &finfo, vap);
9110 }
9111 add_fsevent(FSE_DELETE, ctx,
9112 FSE_ARG_STRING, len_no_firmlink_path, no_firmlink_path,
9113 FSE_ARG_FINFO, &finfo,
9114 FSE_ARG_DONE);
9115 }
9116 #endif
9117 }
9118
9119 out:
9120 if (path != NULL) {
9121 RELEASE_PATH(path);
9122 path = NULL;
9123 }
9124
9125 if (no_firmlink_path != NULL) {
9126 RELEASE_PATH(no_firmlink_path);
9127 no_firmlink_path = NULL;
9128 }
9129
9130 /*
9131 * nameidone has to happen before we vnode_put(dvp)
9132 * since it may need to release the fs_nodelock on the dvp
9133 */
9134 nameidone(&nd);
9135 vnode_put(dvp);
9136
9137 if (vp) {
9138 vnode_put(vp);
9139 }
9140
9141 if (restart_flag == 0) {
9142 wakeup_one((caddr_t)vp);
9143 return error;
9144 }
9145 tsleep(vp, PVFS, "rm AD", 1);
9146 } while (restart_flag != 0);
9147
9148 return error;
9149 }
9150
9151 /*
9152 * Remove a directory file.
9153 */
9154 /* ARGSUSED */
9155 int
9156 rmdir(__unused proc_t p, struct rmdir_args *uap, __unused int32_t *retval)
9157 {
9158 return rmdirat_internal(vfs_context_current(), AT_FDCWD,
9159 CAST_USER_ADDR_T(uap->path), UIO_USERSPACE, 0);
9160 }
9161
9162 /* Get direntry length padded to 8 byte alignment */
9163 #define DIRENT64_LEN(namlen) \
9164 ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
9165
9166 /* Get dirent length padded to 4 byte alignment */
9167 #define DIRENT_LEN(namelen) \
9168 ((sizeof(struct dirent) + (namelen + 1) - (__DARWIN_MAXNAMLEN + 1) + 3) & ~3)
9169
9170 /* Get the end of this dirent */
9171 #define DIRENT_END(dep) \
9172 (((char *)(dep)) + (dep)->d_reclen - 1)
9173
9174 errno_t
9175 vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag,
9176 int *numdirent, vfs_context_t ctxp)
9177 {
9178 /* Check if fs natively supports VNODE_READDIR_EXTENDED */
9179 if ((vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED) &&
9180 ((vp->v_mount->mnt_kern_flag & MNTK_DENY_READDIREXT) == 0)) {
9181 return VNOP_READDIR(vp, uio, flags, eofflag, numdirent, ctxp);
9182 } else {
9183 size_t bufsize;
9184 void * bufptr;
9185 uio_t auio;
9186 struct direntry *entry64;
9187 struct dirent *dep;
9188 int bytesread;
9189 int error;
9190
9191 /*
9192 * We're here because the underlying file system does not
9193 * support direnties or we mounted denying support so we must
9194 * fall back to dirents and convert them to direntries.
9195 *
9196 * Our kernel buffer needs to be smaller since re-packing will
9197 * expand each dirent. The worse case (when the name length
9198 * is 3 or less) corresponds to a struct direntry size of 32
9199 * bytes (8-byte aligned) and a struct dirent size of 12 bytes
9200 * (4-byte aligned). So having a buffer that is 3/8 the size
9201 * will prevent us from reading more than we can pack.
9202 *
9203 * Since this buffer is wired memory, we will limit the
9204 * buffer size to a maximum of 32K. We would really like to
9205 * use 32K in the MIN(), but we use magic number 87371 to
9206 * prevent uio_resid() * 3 / 8 from overflowing.
9207 */
9208 bufsize = 3 * MIN((user_size_t)uio_resid(uio), 87371u) / 8;
9209 MALLOC(bufptr, void *, bufsize, M_TEMP, M_WAITOK);
9210 if (bufptr == NULL) {
9211 return ENOMEM;
9212 }
9213
9214 auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
9215 uio_addiov(auio, (uintptr_t)bufptr, bufsize);
9216 auio->uio_offset = uio->uio_offset;
9217
9218 error = VNOP_READDIR(vp, auio, 0, eofflag, numdirent, ctxp);
9219
9220 dep = (struct dirent *)bufptr;
9221 bytesread = bufsize - uio_resid(auio);
9222
9223 MALLOC(entry64, struct direntry *, sizeof(struct direntry),
9224 M_TEMP, M_WAITOK);
9225 /*
9226 * Convert all the entries and copy them out to user's buffer.
9227 */
9228 while (error == 0 && (char *)dep < ((char *)bufptr + bytesread)) {
9229 size_t enbufsize = DIRENT64_LEN(dep->d_namlen);
9230
9231 if (DIRENT_END(dep) > ((char *)bufptr + bytesread) ||
9232 DIRENT_LEN(dep->d_namlen) > dep->d_reclen) {
9233 printf("%s: %s: Bad dirent recived from directory %s\n", __func__,
9234 vp->v_mount->mnt_vfsstat.f_mntonname,
9235 vp->v_name ? vp->v_name : "<unknown>");
9236 error = EIO;
9237 break;
9238 }
9239
9240 bzero(entry64, enbufsize);
9241 /* Convert a dirent to a dirent64. */
9242 entry64->d_ino = dep->d_ino;
9243 entry64->d_seekoff = 0;
9244 entry64->d_reclen = enbufsize;
9245 entry64->d_namlen = dep->d_namlen;
9246 entry64->d_type = dep->d_type;
9247 bcopy(dep->d_name, entry64->d_name, dep->d_namlen + 1);
9248
9249 /* Move to next entry. */
9250 dep = (struct dirent *)((char *)dep + dep->d_reclen);
9251
9252 /* Copy entry64 to user's buffer. */
9253 error = uiomove((caddr_t)entry64, entry64->d_reclen, uio);
9254 }
9255
9256 /* Update the real offset using the offset we got from VNOP_READDIR. */
9257 if (error == 0) {
9258 uio->uio_offset = auio->uio_offset;
9259 }
9260 uio_free(auio);
9261 FREE(bufptr, M_TEMP);
9262 FREE(entry64, M_TEMP);
9263 return error;
9264 }
9265 }
9266
9267 #define GETDIRENTRIES_MAXBUFSIZE (128 * 1024 * 1024U)
9268
9269 /*
9270 * Read a block of directory entries in a file system independent format.
9271 */
9272 static int
9273 getdirentries_common(int fd, user_addr_t bufp, user_size_t bufsize, ssize_t *bytesread,
9274 off_t *offset, int *eofflag, int flags)
9275 {
9276 vnode_t vp;
9277 struct vfs_context context = *vfs_context_current(); /* local copy */
9278 struct fileproc *fp;
9279 uio_t auio;
9280 int spacetype = proc_is64bit(vfs_context_proc(&context)) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9281 off_t loff;
9282 int error, numdirent;
9283 char uio_buf[UIO_SIZEOF(1)];
9284
9285 error = fp_getfvp(vfs_context_proc(&context), fd, &fp, &vp);
9286 if (error) {
9287 return error;
9288 }
9289 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
9290 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
9291 error = EBADF;
9292 goto out;
9293 }
9294
9295 if (bufsize > GETDIRENTRIES_MAXBUFSIZE) {
9296 bufsize = GETDIRENTRIES_MAXBUFSIZE;
9297 }
9298
9299 #if CONFIG_MACF
9300 error = mac_file_check_change_offset(vfs_context_ucred(&context), fp->f_fglob);
9301 if (error) {
9302 goto out;
9303 }
9304 #endif
9305 if ((error = vnode_getwithref(vp))) {
9306 goto out;
9307 }
9308 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
9309
9310 unionread:
9311 if (vp->v_type != VDIR) {
9312 (void)vnode_put(vp);
9313 error = EINVAL;
9314 goto out;
9315 }
9316
9317 #if CONFIG_MACF
9318 error = mac_vnode_check_readdir(&context, vp);
9319 if (error != 0) {
9320 (void)vnode_put(vp);
9321 goto out;
9322 }
9323 #endif /* MAC */
9324
9325 loff = fp->f_fglob->fg_offset;
9326 auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
9327 uio_addiov(auio, bufp, bufsize);
9328
9329 if (flags & VNODE_READDIR_EXTENDED) {
9330 error = vnode_readdir64(vp, auio, flags, eofflag, &numdirent, &context);
9331 fp->f_fglob->fg_offset = uio_offset(auio);
9332 } else {
9333 error = VNOP_READDIR(vp, auio, 0, eofflag, &numdirent, &context);
9334 fp->f_fglob->fg_offset = uio_offset(auio);
9335 }
9336 if (error) {
9337 (void)vnode_put(vp);
9338 goto out;
9339 }
9340
9341 if ((user_ssize_t)bufsize == uio_resid(auio)) {
9342 if (union_dircheckp) {
9343 error = union_dircheckp(&vp, fp, &context);
9344 if (error == -1) {
9345 goto unionread;
9346 }
9347 if (error) {
9348 (void)vnode_put(vp);
9349 goto out;
9350 }
9351 }
9352
9353 if ((vp->v_mount->mnt_flag & MNT_UNION)) {
9354 struct vnode *tvp = vp;
9355 if (lookup_traverse_union(tvp, &vp, &context) == 0) {
9356 vnode_ref(vp);
9357 fp->f_fglob->fg_data = (caddr_t) vp;
9358 fp->f_fglob->fg_offset = 0;
9359 vnode_rele(tvp);
9360 vnode_put(tvp);
9361 goto unionread;
9362 }
9363 vp = tvp;
9364 }
9365 }
9366
9367 vnode_put(vp);
9368 if (offset) {
9369 *offset = loff;
9370 }
9371
9372 *bytesread = bufsize - uio_resid(auio);
9373 out:
9374 file_drop(fd);
9375 return error;
9376 }
9377
9378
9379 int
9380 getdirentries(__unused struct proc *p, struct getdirentries_args *uap, int32_t *retval)
9381 {
9382 off_t offset;
9383 ssize_t bytesread;
9384 int error, eofflag;
9385
9386 AUDIT_ARG(fd, uap->fd);
9387 error = getdirentries_common(uap->fd, uap->buf, uap->count,
9388 &bytesread, &offset, &eofflag, 0);
9389
9390 if (error == 0) {
9391 if (proc_is64bit(p)) {
9392 user64_long_t base = (user64_long_t)offset;
9393 error = copyout((caddr_t)&base, uap->basep, sizeof(user64_long_t));
9394 } else {
9395 user32_long_t base = (user32_long_t)offset;
9396 error = copyout((caddr_t)&base, uap->basep, sizeof(user32_long_t));
9397 }
9398 *retval = bytesread;
9399 }
9400 return error;
9401 }
9402
9403 int
9404 getdirentries64(__unused struct proc *p, struct getdirentries64_args *uap, user_ssize_t *retval)
9405 {
9406 off_t offset;
9407 ssize_t bytesread;
9408 int error, eofflag;
9409 user_size_t bufsize;
9410
9411 AUDIT_ARG(fd, uap->fd);
9412
9413 /*
9414 * If the buffer is at least GETDIRENTRIES64_EXTENDED_BUFSIZE large,
9415 * then the kernel carves out the last 4 bytes to return extended
9416 * information to userspace (namely whether we reached EOF with this call).
9417 */
9418 if (uap->bufsize >= GETDIRENTRIES64_EXTENDED_BUFSIZE) {
9419 bufsize = uap->bufsize - sizeof(getdirentries64_flags_t);
9420 } else {
9421 bufsize = uap->bufsize;
9422 }
9423
9424 error = getdirentries_common(uap->fd, uap->buf, bufsize,
9425 &bytesread, &offset, &eofflag, VNODE_READDIR_EXTENDED);
9426
9427 if (error == 0) {
9428 *retval = bytesread;
9429 error = copyout((caddr_t)&offset, uap->position, sizeof(off_t));
9430
9431 if (error == 0 && uap->bufsize >= GETDIRENTRIES64_EXTENDED_BUFSIZE) {
9432 getdirentries64_flags_t flags = 0;
9433 if (eofflag) {
9434 flags |= GETDIRENTRIES64_EOF;
9435 }
9436 error = copyout(&flags, (user_addr_t)uap->buf + bufsize,
9437 sizeof(flags));
9438 }
9439 }
9440 return error;
9441 }
9442
9443
9444 /*
9445 * Set the mode mask for creation of filesystem nodes.
9446 * XXX implement xsecurity
9447 */
9448 #define UMASK_NOXSECURITY (void *)1 /* leave existing xsecurity alone */
9449 static int
9450 umask1(proc_t p, int newmask, __unused kauth_filesec_t fsec, int32_t *retval)
9451 {
9452 struct filedesc *fdp;
9453
9454 AUDIT_ARG(mask, newmask);
9455 proc_fdlock(p);
9456 fdp = p->p_fd;
9457 *retval = fdp->fd_cmask;
9458 fdp->fd_cmask = newmask & ALLPERMS;
9459 proc_fdunlock(p);
9460 return 0;
9461 }
9462
9463 /*
9464 * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL).
9465 *
9466 * Parameters: p Process requesting to set the umask
9467 * uap User argument descriptor (see below)
9468 * retval umask of the process (parameter p)
9469 *
9470 * Indirect: uap->newmask umask to set
9471 * uap->xsecurity ACL to set
9472 *
9473 * Returns: 0 Success
9474 * !0 Not success
9475 *
9476 */
9477 int
9478 umask_extended(proc_t p, struct umask_extended_args *uap, int32_t *retval)
9479 {
9480 int ciferror;
9481 kauth_filesec_t xsecdst;
9482
9483 xsecdst = KAUTH_FILESEC_NONE;
9484 if (uap->xsecurity != USER_ADDR_NULL) {
9485 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0) {
9486 return ciferror;
9487 }
9488 } else {
9489 xsecdst = KAUTH_FILESEC_NONE;
9490 }
9491
9492 ciferror = umask1(p, uap->newmask, xsecdst, retval);
9493
9494 if (xsecdst != KAUTH_FILESEC_NONE) {
9495 kauth_filesec_free(xsecdst);
9496 }
9497 return ciferror;
9498 }
9499
9500 int
9501 umask(proc_t p, struct umask_args *uap, int32_t *retval)
9502 {
9503 return umask1(p, uap->newmask, UMASK_NOXSECURITY, retval);
9504 }
9505
9506 /*
9507 * Void all references to file by ripping underlying filesystem
9508 * away from vnode.
9509 */
9510 /* ARGSUSED */
9511 int
9512 revoke(proc_t p, struct revoke_args *uap, __unused int32_t *retval)
9513 {
9514 vnode_t vp;
9515 struct vnode_attr va;
9516 vfs_context_t ctx = vfs_context_current();
9517 int error;
9518 struct nameidata nd;
9519
9520 NDINIT(&nd, LOOKUP, OP_REVOKE, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
9521 uap->path, ctx);
9522 error = namei(&nd);
9523 if (error) {
9524 return error;
9525 }
9526 vp = nd.ni_vp;
9527
9528 nameidone(&nd);
9529
9530 if (!(vnode_ischr(vp) || vnode_isblk(vp))) {
9531 error = ENOTSUP;
9532 goto out;
9533 }
9534
9535 if (vnode_isblk(vp) && vnode_ismountedon(vp)) {
9536 error = EBUSY;
9537 goto out;
9538 }
9539
9540 #if CONFIG_MACF
9541 error = mac_vnode_check_revoke(ctx, vp);
9542 if (error) {
9543 goto out;
9544 }
9545 #endif
9546
9547 VATTR_INIT(&va);
9548 VATTR_WANTED(&va, va_uid);
9549 if ((error = vnode_getattr(vp, &va, ctx))) {
9550 goto out;
9551 }
9552 if (kauth_cred_getuid(vfs_context_ucred(ctx)) != va.va_uid &&
9553 (error = suser(vfs_context_ucred(ctx), &p->p_acflag))) {
9554 goto out;
9555 }
9556 if (vp->v_usecount > 0 || (vnode_isaliased(vp))) {
9557 VNOP_REVOKE(vp, REVOKEALL, ctx);
9558 }
9559 out:
9560 vnode_put(vp);
9561 return error;
9562 }
9563
9564
9565 /*
9566 * HFS/HFS PlUS SPECIFIC SYSTEM CALLS
9567 * The following system calls are designed to support features
9568 * which are specific to the HFS & HFS Plus volume formats
9569 */
9570
9571
9572 /*
9573 * Obtain attribute information on objects in a directory while enumerating
9574 * the directory.
9575 */
9576 /* ARGSUSED */
9577 int
9578 getdirentriesattr(proc_t p, struct getdirentriesattr_args *uap, int32_t *retval)
9579 {
9580 vnode_t vp;
9581 struct fileproc *fp;
9582 uio_t auio = NULL;
9583 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9584 uint32_t count = 0, savecount = 0;
9585 uint32_t newstate = 0;
9586 int error, eofflag;
9587 uint32_t loff = 0;
9588 struct attrlist attributelist;
9589 vfs_context_t ctx = vfs_context_current();
9590 int fd = uap->fd;
9591 char uio_buf[UIO_SIZEOF(1)];
9592 kauth_action_t action;
9593
9594 AUDIT_ARG(fd, fd);
9595
9596 /* Get the attributes into kernel space */
9597 if ((error = copyin(uap->alist, (caddr_t)&attributelist, sizeof(attributelist)))) {
9598 return error;
9599 }
9600 if ((error = copyin(uap->count, (caddr_t)&count, sizeof(count)))) {
9601 return error;
9602 }
9603 savecount = count;
9604 if ((error = fp_getfvp(p, fd, &fp, &vp))) {
9605 return error;
9606 }
9607 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
9608 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
9609 error = EBADF;
9610 goto out;
9611 }
9612
9613
9614 #if CONFIG_MACF
9615 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
9616 fp->f_fglob);
9617 if (error) {
9618 goto out;
9619 }
9620 #endif
9621
9622
9623 if ((error = vnode_getwithref(vp))) {
9624 goto out;
9625 }
9626
9627 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
9628
9629 unionread:
9630 if (vp->v_type != VDIR) {
9631 (void)vnode_put(vp);
9632 error = EINVAL;
9633 goto out;
9634 }
9635
9636 #if CONFIG_MACF
9637 error = mac_vnode_check_readdir(ctx, vp);
9638 if (error != 0) {
9639 (void)vnode_put(vp);
9640 goto out;
9641 }
9642 #endif /* MAC */
9643
9644 /* set up the uio structure which will contain the users return buffer */
9645 loff = fp->f_fglob->fg_offset;
9646 auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
9647 uio_addiov(auio, uap->buffer, uap->buffersize);
9648
9649 /*
9650 * If the only item requested is file names, we can let that past with
9651 * just LIST_DIRECTORY. If they want any other attributes, that means
9652 * they need SEARCH as well.
9653 */
9654 action = KAUTH_VNODE_LIST_DIRECTORY;
9655 if ((attributelist.commonattr & ~ATTR_CMN_NAME) ||
9656 attributelist.fileattr || attributelist.dirattr) {
9657 action |= KAUTH_VNODE_SEARCH;
9658 }
9659
9660 if ((error = vnode_authorize(vp, NULL, action, ctx)) == 0) {
9661 /* Believe it or not, uap->options only has 32-bits of valid
9662 * info, so truncate before extending again */
9663
9664 error = VNOP_READDIRATTR(vp, &attributelist, auio, count,
9665 (u_long)(uint32_t)uap->options, &newstate, &eofflag, &count, ctx);
9666 }
9667
9668 if (error) {
9669 (void) vnode_put(vp);
9670 goto out;
9671 }
9672
9673 /*
9674 * If we've got the last entry of a directory in a union mount
9675 * then reset the eofflag and pretend there's still more to come.
9676 * The next call will again set eofflag and the buffer will be empty,
9677 * so traverse to the underlying directory and do the directory
9678 * read there.
9679 */
9680 if (eofflag && vp->v_mount->mnt_flag & MNT_UNION) {
9681 if (uio_resid(auio) < (user_ssize_t) uap->buffersize) { // Got some entries
9682 eofflag = 0;
9683 } else { // Empty buffer
9684 struct vnode *tvp = vp;
9685 if (lookup_traverse_union(tvp, &vp, ctx) == 0) {
9686 vnode_ref_ext(vp, fp->f_fglob->fg_flag & O_EVTONLY, 0);
9687 fp->f_fglob->fg_data = (caddr_t) vp;
9688 fp->f_fglob->fg_offset = 0; // reset index for new dir
9689 count = savecount;
9690 vnode_rele_internal(tvp, fp->f_fglob->fg_flag & O_EVTONLY, 0, 0);
9691 vnode_put(tvp);
9692 goto unionread;
9693 }
9694 vp = tvp;
9695 }
9696 }
9697
9698 (void)vnode_put(vp);
9699
9700 if (error) {
9701 goto out;
9702 }
9703 fp->f_fglob->fg_offset = uio_offset(auio); /* should be multiple of dirent, not variable */
9704
9705 if ((error = copyout((caddr_t) &count, uap->count, sizeof(count)))) {
9706 goto out;
9707 }
9708 if ((error = copyout((caddr_t) &newstate, uap->newstate, sizeof(newstate)))) {
9709 goto out;
9710 }
9711 if ((error = copyout((caddr_t) &loff, uap->basep, sizeof(loff)))) {
9712 goto out;
9713 }
9714
9715 *retval = eofflag; /* similar to getdirentries */
9716 error = 0;
9717 out:
9718 file_drop(fd);
9719 return error; /* return error earlier, an retval of 0 or 1 now */
9720 } /* end of getdirentriesattr system call */
9721
9722 /*
9723 * Exchange data between two files
9724 */
9725
9726 /* ARGSUSED */
9727 int
9728 exchangedata(__unused proc_t p, struct exchangedata_args *uap, __unused int32_t *retval)
9729 {
9730 struct nameidata fnd, snd;
9731 vfs_context_t ctx = vfs_context_current();
9732 vnode_t fvp;
9733 vnode_t svp;
9734 int error;
9735 u_int32_t nameiflags;
9736 char *fpath = NULL;
9737 char *spath = NULL;
9738 int flen = 0, slen = 0;
9739 int from_truncated = 0, to_truncated = 0;
9740 #if CONFIG_FSE
9741 fse_info f_finfo, s_finfo;
9742 #endif
9743
9744 nameiflags = 0;
9745 if ((uap->options & FSOPT_NOFOLLOW) == 0) {
9746 nameiflags |= FOLLOW;
9747 }
9748
9749 NDINIT(&fnd, LOOKUP, OP_EXCHANGEDATA, nameiflags | AUDITVNPATH1,
9750 UIO_USERSPACE, uap->path1, ctx);
9751
9752 error = namei(&fnd);
9753 if (error) {
9754 goto out2;
9755 }
9756
9757 nameidone(&fnd);
9758 fvp = fnd.ni_vp;
9759
9760 NDINIT(&snd, LOOKUP, OP_EXCHANGEDATA, CN_NBMOUNTLOOK | nameiflags | AUDITVNPATH2,
9761 UIO_USERSPACE, uap->path2, ctx);
9762
9763 error = namei(&snd);
9764 if (error) {
9765 vnode_put(fvp);
9766 goto out2;
9767 }
9768 nameidone(&snd);
9769 svp = snd.ni_vp;
9770
9771 /*
9772 * if the files are the same, return an inval error
9773 */
9774 if (svp == fvp) {
9775 error = EINVAL;
9776 goto out;
9777 }
9778
9779 /*
9780 * if the files are on different volumes, return an error
9781 */
9782 if (svp->v_mount != fvp->v_mount) {
9783 error = EXDEV;
9784 goto out;
9785 }
9786
9787 /* If they're not files, return an error */
9788 if ((vnode_isreg(fvp) == 0) || (vnode_isreg(svp) == 0)) {
9789 error = EINVAL;
9790 goto out;
9791 }
9792
9793 #if CONFIG_MACF
9794 error = mac_vnode_check_exchangedata(ctx,
9795 fvp, svp);
9796 if (error) {
9797 goto out;
9798 }
9799 #endif
9800 if (((error = vnode_authorize(fvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0) ||
9801 ((error = vnode_authorize(svp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0)) {
9802 goto out;
9803 }
9804
9805 if (
9806 #if CONFIG_FSE
9807 need_fsevent(FSE_EXCHANGE, fvp) ||
9808 #endif
9809 kauth_authorize_fileop_has_listeners()) {
9810 GET_PATH(fpath);
9811 GET_PATH(spath);
9812 if (fpath == NULL || spath == NULL) {
9813 error = ENOMEM;
9814 goto out;
9815 }
9816
9817 flen = safe_getpath(fvp, NULL, fpath, MAXPATHLEN, &from_truncated);
9818 slen = safe_getpath(svp, NULL, spath, MAXPATHLEN, &to_truncated);
9819
9820 #if CONFIG_FSE
9821 get_fse_info(fvp, &f_finfo, ctx);
9822 get_fse_info(svp, &s_finfo, ctx);
9823 if (from_truncated || to_truncated) {
9824 // set it here since only the f_finfo gets reported up to user space
9825 f_finfo.mode |= FSE_TRUNCATED_PATH;
9826 }
9827 #endif
9828 }
9829 /* Ok, make the call */
9830 error = VNOP_EXCHANGE(fvp, svp, 0, ctx);
9831
9832 if (error == 0) {
9833 const char *tmpname;
9834
9835 if (fpath != NULL && spath != NULL) {
9836 /* call out to allow 3rd party notification of exchangedata.
9837 * Ignore result of kauth_authorize_fileop call.
9838 */
9839 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_EXCHANGE,
9840 (uintptr_t)fpath, (uintptr_t)spath);
9841 }
9842 name_cache_lock();
9843
9844 tmpname = fvp->v_name;
9845 fvp->v_name = svp->v_name;
9846 svp->v_name = tmpname;
9847
9848 if (fvp->v_parent != svp->v_parent) {
9849 vnode_t tmp;
9850
9851 tmp = fvp->v_parent;
9852 fvp->v_parent = svp->v_parent;
9853 svp->v_parent = tmp;
9854 }
9855 name_cache_unlock();
9856
9857 #if CONFIG_FSE
9858 if (fpath != NULL && spath != NULL) {
9859 add_fsevent(FSE_EXCHANGE, ctx,
9860 FSE_ARG_STRING, flen, fpath,
9861 FSE_ARG_FINFO, &f_finfo,
9862 FSE_ARG_STRING, slen, spath,
9863 FSE_ARG_FINFO, &s_finfo,
9864 FSE_ARG_DONE);
9865 }
9866 #endif
9867 }
9868
9869 out:
9870 if (fpath != NULL) {
9871 RELEASE_PATH(fpath);
9872 }
9873 if (spath != NULL) {
9874 RELEASE_PATH(spath);
9875 }
9876 vnode_put(svp);
9877 vnode_put(fvp);
9878 out2:
9879 return error;
9880 }
9881
9882 /*
9883 * Return (in MB) the amount of freespace on the given vnode's volume.
9884 */
9885 uint32_t freespace_mb(vnode_t vp);
9886
9887 uint32_t
9888 freespace_mb(vnode_t vp)
9889 {
9890 vfs_update_vfsstat(vp->v_mount, vfs_context_current(), VFS_USER_EVENT);
9891 return ((uint64_t)vp->v_mount->mnt_vfsstat.f_bavail *
9892 vp->v_mount->mnt_vfsstat.f_bsize) >> 20;
9893 }
9894
9895 #if CONFIG_SEARCHFS
9896
9897 /* ARGSUSED */
9898
9899 int
9900 searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval)
9901 {
9902 vnode_t vp, tvp;
9903 int i, error = 0;
9904 int fserror = 0;
9905 struct nameidata nd;
9906 struct user64_fssearchblock searchblock;
9907 struct searchstate *state;
9908 struct attrlist *returnattrs;
9909 struct timeval timelimit;
9910 void *searchparams1, *searchparams2;
9911 uio_t auio = NULL;
9912 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9913 uint32_t nummatches;
9914 int mallocsize;
9915 uint32_t nameiflags;
9916 vfs_context_t ctx = vfs_context_current();
9917 char uio_buf[UIO_SIZEOF(1)];
9918
9919 /* Start by copying in fsearchblock parameter list */
9920 if (IS_64BIT_PROCESS(p)) {
9921 error = copyin(uap->searchblock, (caddr_t) &searchblock, sizeof(searchblock));
9922 timelimit.tv_sec = searchblock.timelimit.tv_sec;
9923 timelimit.tv_usec = searchblock.timelimit.tv_usec;
9924 } else {
9925 struct user32_fssearchblock tmp_searchblock;
9926
9927 error = copyin(uap->searchblock, (caddr_t) &tmp_searchblock, sizeof(tmp_searchblock));
9928 // munge into 64-bit version
9929 searchblock.returnattrs = CAST_USER_ADDR_T(tmp_searchblock.returnattrs);
9930 searchblock.returnbuffer = CAST_USER_ADDR_T(tmp_searchblock.returnbuffer);
9931 searchblock.returnbuffersize = tmp_searchblock.returnbuffersize;
9932 searchblock.maxmatches = tmp_searchblock.maxmatches;
9933 /*
9934 * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary
9935 * from a 32 bit long, and tv_usec is already a signed 32 bit int.
9936 */
9937 timelimit.tv_sec = (__darwin_time_t) tmp_searchblock.timelimit.tv_sec;
9938 timelimit.tv_usec = (__darwin_useconds_t) tmp_searchblock.timelimit.tv_usec;
9939 searchblock.searchparams1 = CAST_USER_ADDR_T(tmp_searchblock.searchparams1);
9940 searchblock.sizeofsearchparams1 = tmp_searchblock.sizeofsearchparams1;
9941 searchblock.searchparams2 = CAST_USER_ADDR_T(tmp_searchblock.searchparams2);
9942 searchblock.sizeofsearchparams2 = tmp_searchblock.sizeofsearchparams2;
9943 searchblock.searchattrs = tmp_searchblock.searchattrs;
9944 }
9945 if (error) {
9946 return error;
9947 }
9948
9949 /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
9950 */
9951 if (searchblock.sizeofsearchparams1 > SEARCHFS_MAX_SEARCHPARMS ||
9952 searchblock.sizeofsearchparams2 > SEARCHFS_MAX_SEARCHPARMS) {
9953 return EINVAL;
9954 }
9955
9956 /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
9957 /* It all has to do into local memory and it's not that big so we might as well put it all together. */
9958 /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
9959 /* block. */
9960 /* */
9961 /* NOTE: we allocate an extra 8 bytes to account for the difference in size of the searchstate */
9962 /* due to the changes in rdar://problem/12438273. That way if a 3rd party file system */
9963 /* assumes the size is still 556 bytes it will continue to work */
9964
9965 mallocsize = searchblock.sizeofsearchparams1 + searchblock.sizeofsearchparams2 +
9966 sizeof(struct attrlist) + sizeof(struct searchstate) + (2 * sizeof(uint32_t));
9967
9968 MALLOC(searchparams1, void *, mallocsize, M_TEMP, M_WAITOK);
9969
9970 /* Now set up the various pointers to the correct place in our newly allocated memory */
9971
9972 searchparams2 = (void *) (((caddr_t) searchparams1) + searchblock.sizeofsearchparams1);
9973 returnattrs = (struct attrlist *) (((caddr_t) searchparams2) + searchblock.sizeofsearchparams2);
9974 state = (struct searchstate *) (((caddr_t) returnattrs) + sizeof(struct attrlist));
9975
9976 /* Now copy in the stuff given our local variables. */
9977
9978 if ((error = copyin(searchblock.searchparams1, searchparams1, searchblock.sizeofsearchparams1))) {
9979 goto freeandexit;
9980 }
9981
9982 if ((error = copyin(searchblock.searchparams2, searchparams2, searchblock.sizeofsearchparams2))) {
9983 goto freeandexit;
9984 }
9985
9986 if ((error = copyin(searchblock.returnattrs, (caddr_t) returnattrs, sizeof(struct attrlist)))) {
9987 goto freeandexit;
9988 }
9989
9990 if ((error = copyin(uap->state, (caddr_t) state, sizeof(struct searchstate)))) {
9991 goto freeandexit;
9992 }
9993
9994 /*
9995 * When searching a union mount, need to set the
9996 * start flag at the first call on each layer to
9997 * reset state for the new volume.
9998 */
9999 if (uap->options & SRCHFS_START) {
10000 state->ss_union_layer = 0;
10001 } else {
10002 uap->options |= state->ss_union_flags;
10003 }
10004 state->ss_union_flags = 0;
10005
10006 /*
10007 * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter,
10008 * which is passed in with an attrreference_t, we need to inspect the buffer manually here.
10009 * The KPI does not provide us the ability to pass in the length of the buffers searchparams1
10010 * and searchparams2. To obviate the need for all searchfs-supporting filesystems to
10011 * validate the user-supplied data offset of the attrreference_t, we'll do it here.
10012 */
10013
10014 if (searchblock.searchattrs.commonattr & ATTR_CMN_NAME) {
10015 attrreference_t* string_ref;
10016 u_int32_t* start_length;
10017 user64_size_t param_length;
10018
10019 /* validate searchparams1 */
10020 param_length = searchblock.sizeofsearchparams1;
10021 /* skip the word that specifies length of the buffer */
10022 start_length = (u_int32_t*) searchparams1;
10023 start_length = start_length + 1;
10024 string_ref = (attrreference_t*) start_length;
10025
10026 /* ensure no negative offsets or too big offsets */
10027 if (string_ref->attr_dataoffset < 0) {
10028 error = EINVAL;
10029 goto freeandexit;
10030 }
10031 if (string_ref->attr_length > MAXPATHLEN) {
10032 error = EINVAL;
10033 goto freeandexit;
10034 }
10035
10036 /* Check for pointer overflow in the string ref */
10037 if (((char*) string_ref + string_ref->attr_dataoffset) < (char*) string_ref) {
10038 error = EINVAL;
10039 goto freeandexit;
10040 }
10041
10042 if (((char*) string_ref + string_ref->attr_dataoffset) > ((char*)searchparams1 + param_length)) {
10043 error = EINVAL;
10044 goto freeandexit;
10045 }
10046 if (((char*)string_ref + string_ref->attr_dataoffset + string_ref->attr_length) > ((char*)searchparams1 + param_length)) {
10047 error = EINVAL;
10048 goto freeandexit;
10049 }
10050 }
10051
10052 /* set up the uio structure which will contain the users return buffer */
10053 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
10054 uio_addiov(auio, searchblock.returnbuffer, searchblock.returnbuffersize);
10055
10056 nameiflags = 0;
10057 if ((uap->options & FSOPT_NOFOLLOW) == 0) {
10058 nameiflags |= FOLLOW;
10059 }
10060 NDINIT(&nd, LOOKUP, OP_SEARCHFS, nameiflags | AUDITVNPATH1,
10061 UIO_USERSPACE, uap->path, ctx);
10062
10063 error = namei(&nd);
10064 if (error) {
10065 goto freeandexit;
10066 }
10067 vp = nd.ni_vp;
10068 nameidone(&nd);
10069
10070 /*
10071 * Switch to the root vnode for the volume
10072 */
10073 error = VFS_ROOT(vnode_mount(vp), &tvp, ctx);
10074 vnode_put(vp);
10075 if (error) {
10076 goto freeandexit;
10077 }
10078 vp = tvp;
10079
10080 /*
10081 * If it's a union mount, the path lookup takes
10082 * us to the top layer. But we may need to descend
10083 * to a lower layer. For non-union mounts the layer
10084 * is always zero.
10085 */
10086 for (i = 0; i < (int) state->ss_union_layer; i++) {
10087 if ((vp->v_mount->mnt_flag & MNT_UNION) == 0) {
10088 break;
10089 }
10090 tvp = vp;
10091 vp = vp->v_mount->mnt_vnodecovered;
10092 if (vp == NULL) {
10093 vnode_put(tvp);
10094 error = ENOENT;
10095 goto freeandexit;
10096 }
10097 error = vnode_getwithref(vp);
10098 vnode_put(tvp);
10099 if (error) {
10100 goto freeandexit;
10101 }
10102 }
10103
10104 #if CONFIG_MACF
10105 error = mac_vnode_check_searchfs(ctx, vp, &searchblock.searchattrs);
10106 if (error) {
10107 vnode_put(vp);
10108 goto freeandexit;
10109 }
10110 #endif
10111
10112
10113 /*
10114 * If searchblock.maxmatches == 0, then skip the search. This has happened
10115 * before and sometimes the underlying code doesnt deal with it well.
10116 */
10117 if (searchblock.maxmatches == 0) {
10118 nummatches = 0;
10119 goto saveandexit;
10120 }
10121
10122 /*
10123 * Allright, we have everything we need, so lets make that call.
10124 *
10125 * We keep special track of the return value from the file system:
10126 * EAGAIN is an acceptable error condition that shouldn't keep us
10127 * from copying out any results...
10128 */
10129
10130 fserror = VNOP_SEARCHFS(vp,
10131 searchparams1,
10132 searchparams2,
10133 &searchblock.searchattrs,
10134 (u_long)searchblock.maxmatches,
10135 &timelimit,
10136 returnattrs,
10137 &nummatches,
10138 (u_long)uap->scriptcode,
10139 (u_long)uap->options,
10140 auio,
10141 (struct searchstate *) &state->ss_fsstate,
10142 ctx);
10143
10144 /*
10145 * If it's a union mount we need to be called again
10146 * to search the mounted-on filesystem.
10147 */
10148 if ((vp->v_mount->mnt_flag & MNT_UNION) && fserror == 0) {
10149 state->ss_union_flags = SRCHFS_START;
10150 state->ss_union_layer++; // search next layer down
10151 fserror = EAGAIN;
10152 }
10153
10154 saveandexit:
10155
10156 vnode_put(vp);
10157
10158 /* Now copy out the stuff that needs copying out. That means the number of matches, the
10159 * search state. Everything was already put into he return buffer by the vop call. */
10160
10161 if ((error = copyout((caddr_t) state, uap->state, sizeof(struct searchstate))) != 0) {
10162 goto freeandexit;
10163 }
10164
10165 if ((error = suulong(uap->nummatches, (uint64_t)nummatches)) != 0) {
10166 goto freeandexit;
10167 }
10168
10169 error = fserror;
10170
10171 freeandexit:
10172
10173 FREE(searchparams1, M_TEMP);
10174
10175 return error;
10176 } /* end of searchfs system call */
10177
10178 #else /* CONFIG_SEARCHFS */
10179
10180 int
10181 searchfs(__unused proc_t p, __unused struct searchfs_args *uap, __unused int32_t *retval)
10182 {
10183 return ENOTSUP;
10184 }
10185
10186 #endif /* CONFIG_SEARCHFS */
10187
10188
10189 #if CONFIG_DATALESS_FILES
10190
10191 /*
10192 * === Namespace Resolver Up-call Mechanism ===
10193 *
10194 * When I/O is performed to a dataless file or directory (read, write,
10195 * lookup-in, etc.), the file system performs an upcall to the namespace
10196 * resolver (filecoordinationd) to materialize the object.
10197 *
10198 * We need multiple up-calls to be in flight at once, and we need these
10199 * up-calls to be interruptible, thus the following implementation:
10200 *
10201 * => The nspace_resolver_request represents the in-kernel request state.
10202 * It contains a request ID, storage space for the errno code returned
10203 * by filecoordinationd, and flags.
10204 *
10205 * => The request ID is simply a global monotonically incrementing 32-bit
10206 * number. Outstanding requests are stored in a hash table, and the
10207 * hash function is extremely simple.
10208 *
10209 * => When an upcall is to be made to filecoordinationd, a request structure
10210 * is allocated on the stack (it is small, and needs to live only during
10211 * the duration of the call to resolve_nspace_item_ext()). It is
10212 * initialized and inserted into the table. Some backpressure from
10213 * filecoordinationd is applied by limiting the numnber of entries that
10214 * can be inserted into the table (and thus limiting the number of
10215 * outstanding requests issued to filecoordinationd); waiting for an
10216 * available slot is interruptible.
10217 *
10218 * => Once the request has been inserted into the table, the up-call is made
10219 * to filecoordinationd via a MiG-generated stub. The up-call returns
10220 * immediately and filecoordinationd processes the request asynchronously.
10221 *
10222 * => The caller now waits for the request to complete. Tnis is achieved by
10223 * sleeping on the address of the request structure and waiting for
10224 * filecoordinationd to mark the request structure as complete. This
10225 * is an interruptible sleep call; if interrupted, the request structure
10226 * is removed from the table and EINTR is returned to the caller. If
10227 * this occurs, an advisory up-call is made to filecoordinationd with
10228 * the request ID to indicate that the request can be aborted or
10229 * de-prioritized at the discretion of filecoordinationd.
10230 *
10231 * => When filecoordinationd has completed the request, it signals completion
10232 * by writing to the vfs.nspace.complete sysctl node. Only a process
10233 * decorated as a namespace resolver can write to this sysctl node. The
10234 * value is a request ID / errno tuple passed as an array of 2 uint32_t's.
10235 * The request ID is looked up in the table, and if the request is found,
10236 * the error code is stored in the request structure and a wakeup()
10237 * issued on the address of the request structure. If the request is not
10238 * found, we simply drop the completion notification, assuming that the
10239 * caller was interrupted.
10240 *
10241 * => When the waiting thread wakes up, it extracts the error code from the
10242 * request structure, removes the request from the table, and returns the
10243 * error code to the calling function. Fini!
10244 */
10245
10246 struct nspace_resolver_request {
10247 LIST_ENTRY(nspace_resolver_request) r_hashlink;
10248 uint32_t r_req_id;
10249 int r_resolver_error;
10250 int r_flags;
10251 };
10252
10253 #define RRF_COMPLETE 0x0001
10254
10255 static uint32_t
10256 next_nspace_req_id(void)
10257 {
10258 static uint32_t next_req_id;
10259
10260 return OSAddAtomic(1, &next_req_id);
10261 }
10262
10263 #define NSPACE_RESOLVER_REQ_HASHSIZE 32 /* XXX tune */
10264 #define NSPACE_RESOLVER_MAX_OUTSTANDING 256 /* XXX tune */
10265
10266 static LIST_HEAD(nspace_resolver_requesthead,
10267 nspace_resolver_request) * nspace_resolver_request_hashtbl;
10268 static u_long nspace_resolver_request_hashmask;
10269 static u_int nspace_resolver_request_count;
10270 static bool nspace_resolver_request_wait_slot;
10271 static lck_grp_t *nspace_resolver_request_lck_grp;
10272 static lck_mtx_t nspace_resolver_request_hash_mutex;
10273
10274 #define NSPACE_REQ_LOCK() \
10275 lck_mtx_lock(&nspace_resolver_request_hash_mutex)
10276 #define NSPACE_REQ_UNLOCK() \
10277 lck_mtx_unlock(&nspace_resolver_request_hash_mutex)
10278
10279 #define NSPACE_RESOLVER_HASH(req_id) \
10280 (&nspace_resolver_request_hashtbl[(req_id) & \
10281 nspace_resolver_request_hashmask])
10282
10283 static struct nspace_resolver_request *
10284 nspace_resolver_req_lookup(uint32_t req_id)
10285 {
10286 struct nspace_resolver_requesthead *bucket;
10287 struct nspace_resolver_request *req;
10288
10289 bucket = NSPACE_RESOLVER_HASH(req_id);
10290 LIST_FOREACH(req, bucket, r_hashlink) {
10291 if (req->r_req_id == req_id) {
10292 return req;
10293 }
10294 }
10295
10296 return NULL;
10297 }
10298
10299 static int
10300 nspace_resolver_req_add(struct nspace_resolver_request *req)
10301 {
10302 struct nspace_resolver_requesthead *bucket;
10303 int error;
10304
10305 while (nspace_resolver_request_count >=
10306 NSPACE_RESOLVER_MAX_OUTSTANDING) {
10307 nspace_resolver_request_wait_slot = true;
10308 error = msleep(&nspace_resolver_request_count,
10309 &nspace_resolver_request_hash_mutex,
10310 PVFS | PCATCH, "nspacerq", NULL);
10311 if (error) {
10312 return error;
10313 }
10314 }
10315
10316 bucket = NSPACE_RESOLVER_HASH(req->r_req_id);
10317 #if DIAGNOSTIC
10318 assert(nspace_resolver_req_lookup(req->r_req_id) == NULL);
10319 #endif /* DIAGNOSTIC */
10320 LIST_INSERT_HEAD(bucket, req, r_hashlink);
10321 nspace_resolver_request_count++;
10322
10323 return 0;
10324 }
10325
10326 static void
10327 nspace_resolver_req_remove(struct nspace_resolver_request *req)
10328 {
10329 struct nspace_resolver_requesthead *bucket;
10330
10331 bucket = NSPACE_RESOLVER_HASH(req->r_req_id);
10332 #if DIAGNOSTIC
10333 assert(nspace_resolver_req_lookup(req->r_req_id) != NULL);
10334 #endif /* DIAGNOSTIC */
10335 LIST_REMOVE(req, r_hashlink);
10336 nspace_resolver_request_count--;
10337
10338 if (nspace_resolver_request_wait_slot) {
10339 nspace_resolver_request_wait_slot = false;
10340 wakeup(&nspace_resolver_request_count);
10341 }
10342 }
10343
10344 static void
10345 nspace_resolver_req_cancel(uint32_t req_id)
10346 {
10347 kern_return_t kr;
10348 mach_port_t mp;
10349
10350 // Failures here aren't fatal -- the cancellation message
10351 // sent to the resolver is merely advisory.
10352
10353 kr = host_get_filecoordinationd_port(host_priv_self(), &mp);
10354 if (kr != KERN_SUCCESS || !IPC_PORT_VALID(mp)) {
10355 return;
10356 }
10357
10358 kr = send_nspace_resolve_cancel(mp, req_id);
10359 if (kr != KERN_SUCCESS) {
10360 os_log_error(OS_LOG_DEFAULT,
10361 "NSPACE send_nspace_resolve_cancel failure: %d", kr);
10362 }
10363
10364 ipc_port_release_send(mp);
10365 }
10366
10367 static int
10368 nspace_resolver_req_wait(struct nspace_resolver_request *req)
10369 {
10370 bool send_cancel_message = false;
10371 int error;
10372
10373 NSPACE_REQ_LOCK();
10374
10375 while ((req->r_flags & RRF_COMPLETE) == 0) {
10376 error = msleep(req, &nspace_resolver_request_hash_mutex,
10377 PVFS | PCATCH, "nspace", NULL);
10378 if (error && error != ERESTART) {
10379 req->r_resolver_error = (error == EINTR) ? EINTR :
10380 ETIMEDOUT;
10381 send_cancel_message = true;
10382 break;
10383 }
10384 }
10385
10386 nspace_resolver_req_remove(req);
10387
10388 NSPACE_REQ_UNLOCK();
10389
10390 if (send_cancel_message) {
10391 nspace_resolver_req_cancel(req->r_req_id);
10392 }
10393
10394 return req->r_resolver_error;
10395 }
10396
10397 static void
10398 nspace_resolver_req_mark_complete(
10399 struct nspace_resolver_request *req,
10400 int resolver_error)
10401 {
10402 req->r_resolver_error = resolver_error;
10403 req->r_flags |= RRF_COMPLETE;
10404 wakeup(req);
10405 }
10406
10407 static void
10408 nspace_resolver_req_completed(uint32_t req_id, int resolver_error)
10409 {
10410 struct nspace_resolver_request *req;
10411
10412 NSPACE_REQ_LOCK();
10413
10414 // If we don't find the request corresponding to our req_id,
10415 // just drop the completion signal on the floor; it's likely
10416 // that the requester interrupted with a signal.
10417
10418 req = nspace_resolver_req_lookup(req_id);
10419 if (req) {
10420 nspace_resolver_req_mark_complete(req, resolver_error);
10421 }
10422
10423 NSPACE_REQ_UNLOCK();
10424 }
10425
10426 static struct proc *nspace_resolver_proc;
10427
10428 static int
10429 nspace_resolver_get_proc_state(struct proc *p, int *is_resolver)
10430 {
10431 *is_resolver = ((p->p_lflag & P_LNSPACE_RESOLVER) &&
10432 p == nspace_resolver_proc) ? 1 : 0;
10433 return 0;
10434 }
10435
10436 static int
10437 nspace_resolver_set_proc_state(struct proc *p, int is_resolver)
10438 {
10439 vfs_context_t ctx = vfs_context_current();
10440 int error = 0;
10441
10442 //
10443 // The system filecoordinationd runs as uid == 0. This also
10444 // has the nice side-effect of filtering out filecoordinationd
10445 // running in the simulator.
10446 //
10447 if (!vfs_context_issuser(ctx)) {
10448 return EPERM;
10449 }
10450
10451 error = priv_check_cred(vfs_context_ucred(ctx),
10452 PRIV_VFS_DATALESS_RESOLVER, 0);
10453 if (error) {
10454 return error;
10455 }
10456
10457 if (is_resolver) {
10458 NSPACE_REQ_LOCK();
10459
10460 if (nspace_resolver_proc == NULL) {
10461 proc_lock(p);
10462 p->p_lflag |= P_LNSPACE_RESOLVER;
10463 proc_unlock(p);
10464 nspace_resolver_proc = p;
10465 } else {
10466 error = EBUSY;
10467 }
10468
10469 NSPACE_REQ_UNLOCK();
10470 } else {
10471 // This is basically just like the exit case.
10472 // nspace_resolver_exited() will verify that the
10473 // process is the resolver, and will clear the
10474 // global.
10475 nspace_resolver_exited(p);
10476 }
10477
10478 return error;
10479 }
10480
10481 static int
10482 nspace_materialization_get_proc_state(struct proc *p, int *is_prevented)
10483 {
10484 if ((p->p_lflag & P_LNSPACE_RESOLVER) != 0 ||
10485 (p->p_vfs_iopolicy &
10486 P_VFS_IOPOLICY_MATERIALIZE_DATALESS_FILES) == 0) {
10487 *is_prevented = 1;
10488 } else {
10489 *is_prevented = 0;
10490 }
10491 return 0;
10492 }
10493
10494 static int
10495 nspace_materialization_set_proc_state(struct proc *p, int is_prevented)
10496 {
10497 if (p->p_lflag & P_LNSPACE_RESOLVER) {
10498 return is_prevented ? 0 : EBUSY;
10499 }
10500
10501 if (is_prevented) {
10502 OSBitAndAtomic16(~((uint16_t)P_VFS_IOPOLICY_MATERIALIZE_DATALESS_FILES), &p->p_vfs_iopolicy);
10503 } else {
10504 OSBitOrAtomic16((uint16_t)P_VFS_IOPOLICY_MATERIALIZE_DATALESS_FILES, &p->p_vfs_iopolicy);
10505 }
10506 return 0;
10507 }
10508
10509 static int
10510 nspace_materialization_get_thread_state(int *is_prevented)
10511 {
10512 uthread_t ut = get_bsdthread_info(current_thread());
10513
10514 *is_prevented = (ut->uu_flag & UT_NSPACE_NODATALESSFAULTS) ? 1 : 0;
10515 return 0;
10516 }
10517
10518 static int
10519 nspace_materialization_set_thread_state(int is_prevented)
10520 {
10521 uthread_t ut = get_bsdthread_info(current_thread());
10522
10523 if (is_prevented) {
10524 ut->uu_flag |= UT_NSPACE_NODATALESSFAULTS;
10525 } else {
10526 ut->uu_flag &= ~UT_NSPACE_NODATALESSFAULTS;
10527 }
10528 return 0;
10529 }
10530
10531 static int
10532 nspace_materialization_is_prevented(void)
10533 {
10534 proc_t p = current_proc();
10535 uthread_t ut = (uthread_t)get_bsdthread_info(current_thread());
10536 vfs_context_t ctx = vfs_context_current();
10537
10538 /*
10539 * Kernel context ==> return EDEADLK, as we would with any random
10540 * process decorated as no-materialize.
10541 */
10542 if (ctx == vfs_context_kernel()) {
10543 return EDEADLK;
10544 }
10545
10546 /*
10547 * If the process has the dataless-manipulation entitlement,
10548 * materialization is prevented, and depending on the kind
10549 * of file system operation, things get to proceed as if the
10550 * object is not dataless.
10551 */
10552 if (vfs_context_is_dataless_manipulator(ctx)) {
10553 return EJUSTRETURN;
10554 }
10555
10556 /*
10557 * Per-thread decorations override any process-wide decorations.
10558 * (Foundation uses this, and this overrides even the dataless-
10559 * manipulation entitlement so as to make API contracts consistent.)
10560 */
10561 if (ut != NULL) {
10562 if (ut->uu_flag & UT_NSPACE_NODATALESSFAULTS) {
10563 return EDEADLK;
10564 }
10565 if (ut->uu_flag & UT_NSPACE_FORCEDATALESSFAULTS) {
10566 return 0;
10567 }
10568 }
10569
10570 /*
10571 * If the process's iopolicy specifies that dataless files
10572 * can be materialized, then we let it go ahead.
10573 */
10574 if (p->p_vfs_iopolicy & P_VFS_IOPOLICY_MATERIALIZE_DATALESS_FILES) {
10575 return 0;
10576 }
10577
10578 /*
10579 * The default behavior is to not materialize dataless files;
10580 * return to the caller that deadlock was detected.
10581 */
10582 return EDEADLK;
10583 }
10584
10585 /* the vfs.nspace branch */
10586 SYSCTL_NODE(_vfs, OID_AUTO, nspace, CTLFLAG_RW | CTLFLAG_LOCKED, NULL, "vfs nspace hinge");
10587
10588 static int
10589 sysctl_nspace_resolver(__unused struct sysctl_oid *oidp,
10590 __unused void *arg1, __unused int arg2, struct sysctl_req *req)
10591 {
10592 struct proc *p = req->p;
10593 int new_value, old_value, changed = 0;
10594 int error;
10595
10596 error = nspace_resolver_get_proc_state(p, &old_value);
10597 if (error) {
10598 return error;
10599 }
10600
10601 error = sysctl_io_number(req, old_value, sizeof(int), &new_value,
10602 &changed);
10603 if (error == 0 && changed) {
10604 error = nspace_resolver_set_proc_state(p, new_value);
10605 }
10606 return error;
10607 }
10608
10609 /* decorate this process as the dataless file resolver */
10610 SYSCTL_PROC(_vfs_nspace, OID_AUTO, resolver,
10611 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED,
10612 0, 0, sysctl_nspace_resolver, "I", "");
10613
10614 static int
10615 sysctl_nspace_prevent_materialization(__unused struct sysctl_oid *oidp,
10616 __unused void *arg1, __unused int arg2, struct sysctl_req *req)
10617 {
10618 struct proc *p = req->p;
10619 int new_value, old_value, changed = 0;
10620 int error;
10621
10622 error = nspace_materialization_get_proc_state(p, &old_value);
10623 if (error) {
10624 return error;
10625 }
10626
10627 error = sysctl_io_number(req, old_value, sizeof(int), &new_value,
10628 &changed);
10629 if (error == 0 && changed) {
10630 error = nspace_materialization_set_proc_state(p, new_value);
10631 }
10632 return error;
10633 }
10634
10635 /* decorate this process as not wanting to materialize dataless files */
10636 SYSCTL_PROC(_vfs_nspace, OID_AUTO, prevent_materialization,
10637 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED,
10638 0, 0, sysctl_nspace_prevent_materialization, "I", "");
10639
10640 static int
10641 sysctl_nspace_thread_prevent_materialization(__unused struct sysctl_oid *oidp,
10642 __unused void *arg1, __unused int arg2, struct sysctl_req *req)
10643 {
10644 int new_value, old_value, changed = 0;
10645 int error;
10646
10647 error = nspace_materialization_get_thread_state(&old_value);
10648 if (error) {
10649 return error;
10650 }
10651
10652 error = sysctl_io_number(req, old_value, sizeof(int), &new_value,
10653 &changed);
10654 if (error == 0 && changed) {
10655 error = nspace_materialization_set_thread_state(new_value);
10656 }
10657 return error;
10658 }
10659
10660 /* decorate this thread as not wanting to materialize dataless files */
10661 SYSCTL_PROC(_vfs_nspace, OID_AUTO, thread_prevent_materialization,
10662 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED,
10663 0, 0, sysctl_nspace_thread_prevent_materialization, "I", "");
10664
10665 static int
10666 sysctl_nspace_complete(__unused struct sysctl_oid *oidp, __unused void *arg1,
10667 __unused int arg2, struct sysctl_req *req)
10668 {
10669 struct proc *p = req->p;
10670 uint32_t req_status[2] = { 0, 0 };
10671 int error, is_resolver, changed = 0;
10672
10673 error = nspace_resolver_get_proc_state(p, &is_resolver);
10674 if (error) {
10675 return error;
10676 }
10677
10678 if (!is_resolver) {
10679 return EPERM;
10680 }
10681
10682 error = sysctl_io_opaque(req, req_status, sizeof(req_status),
10683 &changed);
10684 if (error) {
10685 return error;
10686 }
10687
10688 /*
10689 * req_status[0] is the req_id
10690 *
10691 * req_status[1] is the errno
10692 */
10693 if (error == 0 && changed) {
10694 nspace_resolver_req_completed(req_status[0],
10695 (int)req_status[1]);
10696 }
10697 return error;
10698 }
10699
10700 /* Resolver reports completed reqs here. */
10701 SYSCTL_PROC(_vfs_nspace, OID_AUTO, complete,
10702 CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED,
10703 0, 0, sysctl_nspace_complete, "-", "");
10704
10705 #endif /* CONFIG_DATALESS_FILES */
10706
10707 #if CONFIG_DATALESS_FILES
10708 #define __no_dataless_unused /* nothing */
10709 #else
10710 #define __no_dataless_unused __unused
10711 #endif
10712
10713 void
10714 nspace_resolver_init(void)
10715 {
10716 #if CONFIG_DATALESS_FILES
10717 nspace_resolver_request_lck_grp =
10718 lck_grp_alloc_init("file namespace resolver", NULL);
10719
10720 lck_mtx_init(&nspace_resolver_request_hash_mutex,
10721 nspace_resolver_request_lck_grp, NULL);
10722
10723 nspace_resolver_request_hashtbl =
10724 hashinit(NSPACE_RESOLVER_REQ_HASHSIZE,
10725 M_VNODE /* XXX */, &nspace_resolver_request_hashmask);
10726 #endif /* CONFIG_DATALESS_FILES */
10727 }
10728
10729 void
10730 nspace_resolver_exited(struct proc *p __no_dataless_unused)
10731 {
10732 #if CONFIG_DATALESS_FILES
10733 struct nspace_resolver_requesthead *bucket;
10734 struct nspace_resolver_request *req;
10735 u_long idx;
10736
10737 NSPACE_REQ_LOCK();
10738
10739 if ((p->p_lflag & P_LNSPACE_RESOLVER) &&
10740 p == nspace_resolver_proc) {
10741 for (idx = 0; idx <= nspace_resolver_request_hashmask; idx++) {
10742 bucket = &nspace_resolver_request_hashtbl[idx];
10743 LIST_FOREACH(req, bucket, r_hashlink) {
10744 nspace_resolver_req_mark_complete(req,
10745 ETIMEDOUT);
10746 }
10747 }
10748 nspace_resolver_proc = NULL;
10749 }
10750
10751 NSPACE_REQ_UNLOCK();
10752 #endif /* CONFIG_DATALESS_FILES */
10753 }
10754
10755 int
10756 resolve_nspace_item(struct vnode *vp, uint64_t op)
10757 {
10758 return resolve_nspace_item_ext(vp, op, NULL);
10759 }
10760
10761 #define DATALESS_RESOLVER_ENTITLEMENT \
10762 "com.apple.private.vfs.dataless-resolver"
10763 #define DATALESS_MANIPULATION_ENTITLEMENT \
10764 "com.apple.private.vfs.dataless-manipulation"
10765
10766 /*
10767 * Return TRUE if the vfs context is associated with a process entitled
10768 * for dataless manipulation.
10769 *
10770 * XXX Arguably belongs in vfs_subr.c, but is here because of the
10771 * complication around CONFIG_DATALESS_FILES.
10772 */
10773 boolean_t
10774 vfs_context_is_dataless_manipulator(vfs_context_t ctx __unused)
10775 {
10776 #if CONFIG_DATALESS_FILES
10777 assert(ctx->vc_thread == current_thread());
10778 task_t const task = current_task();
10779 return IOTaskHasEntitlement(task, DATALESS_MANIPULATION_ENTITLEMENT) ||
10780 IOTaskHasEntitlement(task, DATALESS_RESOLVER_ENTITLEMENT);
10781 #else
10782 return false;
10783 #endif /* CONFIG_DATALESS_FILES */
10784 }
10785
10786 int
10787 resolve_nspace_item_ext(
10788 struct vnode *vp __no_dataless_unused,
10789 uint64_t op __no_dataless_unused,
10790 void *arg __unused)
10791 {
10792 #if CONFIG_DATALESS_FILES
10793 int error;
10794 mach_port_t mp;
10795 char *path = NULL;
10796 int path_len;
10797 kern_return_t kr;
10798 struct nspace_resolver_request req;
10799
10800 // only allow namespace events on regular files, directories and symlinks.
10801 if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
10802 return EFTYPE;
10803 }
10804
10805 //
10806 // if this is a snapshot event and the vnode is on a
10807 // disk image just pretend nothing happened since any
10808 // change to the disk image will cause the disk image
10809 // itself to get backed up and this avoids multi-way
10810 // deadlocks between the snapshot handler and the ever
10811 // popular diskimages-helper process. the variable
10812 // nspace_allow_virtual_devs allows this behavior to
10813 // be overridden (for use by the Mobile TimeMachine
10814 // testing infrastructure which uses disk images)
10815 //
10816 if (op & NAMESPACE_HANDLER_SNAPSHOT_EVENT) {
10817 os_log_debug(OS_LOG_DEFAULT, "NSPACE SNAPSHOT not handled");
10818 return ENOTSUP;
10819 }
10820
10821 error = nspace_materialization_is_prevented();
10822 if (error) {
10823 os_log_debug(OS_LOG_DEFAULT,
10824 "NSPACE process/thread is decorated as no-materialization");
10825 return error;
10826 }
10827
10828 kr = host_get_filecoordinationd_port(host_priv_self(), &mp);
10829 if (kr != KERN_SUCCESS || !IPC_PORT_VALID(mp)) {
10830 os_log_error(OS_LOG_DEFAULT, "NSPACE no port");
10831 // Treat this like being unable to access the backing
10832 // store server.
10833 return ETIMEDOUT;
10834 }
10835
10836 MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
10837 if (path == NULL) {
10838 error = ENOMEM;
10839 goto out_release_port;
10840 }
10841 path_len = MAXPATHLEN;
10842
10843 error = vn_getpath(vp, path, &path_len);
10844 if (error == 0) {
10845 int xxx_rdar44371223; /* XXX Mig bug */
10846 req.r_req_id = next_nspace_req_id();
10847 req.r_resolver_error = 0;
10848 req.r_flags = 0;
10849
10850 NSPACE_REQ_LOCK();
10851 error = nspace_resolver_req_add(&req);
10852 NSPACE_REQ_UNLOCK();
10853 if (error) {
10854 goto out_release_port;
10855 }
10856
10857 os_log_debug(OS_LOG_DEFAULT, "NSPACE resolve_path call");
10858 kr = send_nspace_resolve_path(mp, req.r_req_id,
10859 current_proc()->p_pid, (uint32_t)(op & 0xffffffff),
10860 path, &xxx_rdar44371223);
10861 if (kr != KERN_SUCCESS) {
10862 // Also treat this like being unable to access
10863 // the backing store server.
10864 os_log_error(OS_LOG_DEFAULT,
10865 "NSPACE resolve_path failure: %d", kr);
10866 error = ETIMEDOUT;
10867
10868 NSPACE_REQ_LOCK();
10869 nspace_resolver_req_remove(&req);
10870 NSPACE_REQ_UNLOCK();
10871 goto out_release_port;
10872 }
10873
10874 // Give back the memory we allocated earlier while
10875 // we wait; we no longer need it.
10876 FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
10877 path = NULL;
10878
10879 // Request has been submitted to the resolver.
10880 // Now (interruptibly) wait for completion.
10881 // Upon requrn, the request will have been removed
10882 // from the lookup table.
10883 error = nspace_resolver_req_wait(&req);
10884 }
10885
10886 out_release_port:
10887 if (path != NULL) {
10888 FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
10889 }
10890 ipc_port_release_send(mp);
10891
10892 return error;
10893 #else
10894 return ENOTSUP;
10895 #endif /* CONFIG_DATALESS_FILES */
10896 }
10897
10898 int
10899 nspace_snapshot_event(__unused vnode_t vp, __unused time_t ctime,
10900 __unused uint64_t op_type, __unused void *arg)
10901 {
10902 return 0;
10903 }
10904
10905 #if 0
10906 static int
10907 build_volfs_path(struct vnode *vp, char *path, int *len)
10908 {
10909 struct vnode_attr va;
10910 int ret;
10911
10912 VATTR_INIT(&va);
10913 VATTR_WANTED(&va, va_fsid);
10914 VATTR_WANTED(&va, va_fileid);
10915
10916 if (vnode_getattr(vp, &va, vfs_context_kernel()) != 0) {
10917 *len = snprintf(path, *len, "/non/existent/path/because/vnode_getattr/failed") + 1;
10918 ret = -1;
10919 } else {
10920 *len = snprintf(path, *len, "/.vol/%d/%lld", (dev_t)va.va_fsid, va.va_fileid) + 1;
10921 ret = 0;
10922 }
10923
10924 return ret;
10925 }
10926 #endif
10927
10928 static unsigned long
10929 fsctl_bogus_command_compat(unsigned long cmd)
10930 {
10931 switch (cmd) {
10932 case IOCBASECMD(FSIOC_SYNC_VOLUME):
10933 return FSIOC_SYNC_VOLUME;
10934 case IOCBASECMD(FSIOC_ROUTEFS_SETROUTEID):
10935 return FSIOC_ROUTEFS_SETROUTEID;
10936 case IOCBASECMD(FSIOC_SET_PACKAGE_EXTS):
10937 return FSIOC_SET_PACKAGE_EXTS;
10938 case IOCBASECMD(FSIOC_SET_FSTYPENAME_OVERRIDE):
10939 return FSIOC_SET_FSTYPENAME_OVERRIDE;
10940 case IOCBASECMD(DISK_CONDITIONER_IOC_GET):
10941 return DISK_CONDITIONER_IOC_GET;
10942 case IOCBASECMD(DISK_CONDITIONER_IOC_SET):
10943 return DISK_CONDITIONER_IOC_SET;
10944 case IOCBASECMD(FSIOC_FIOSEEKHOLE):
10945 return FSIOC_FIOSEEKHOLE;
10946 case IOCBASECMD(FSIOC_FIOSEEKDATA):
10947 return FSIOC_FIOSEEKDATA;
10948 case IOCBASECMD(SPOTLIGHT_IOC_GET_MOUNT_TIME):
10949 return SPOTLIGHT_IOC_GET_MOUNT_TIME;
10950 case IOCBASECMD(SPOTLIGHT_IOC_GET_LAST_MTIME):
10951 return SPOTLIGHT_IOC_GET_LAST_MTIME;
10952 }
10953
10954 return cmd;
10955 }
10956
10957 static int
10958 cas_bsdflags_setattr(vnode_t vp, void *arg, vfs_context_t ctx)
10959 {
10960 return VNOP_IOCTL(vp, FSIOC_CAS_BSDFLAGS, arg, FWRITE, ctx);
10961 }
10962
10963 /*
10964 * Make a filesystem-specific control call:
10965 */
10966 /* ARGSUSED */
10967 static int
10968 fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long options, vfs_context_t ctx)
10969 {
10970 int error = 0;
10971 boolean_t is64bit;
10972 u_int size;
10973 #define STK_PARAMS 128
10974 char stkbuf[STK_PARAMS] = {0};
10975 caddr_t data, memp;
10976 vnode_t vp = *arg_vp;
10977
10978 if (vp->v_type == VCHR || vp->v_type == VBLK) {
10979 return ENOTTY;
10980 }
10981
10982 cmd = fsctl_bogus_command_compat(cmd);
10983
10984 size = IOCPARM_LEN(cmd);
10985 if (size > IOCPARM_MAX) {
10986 return EINVAL;
10987 }
10988
10989 is64bit = proc_is64bit(p);
10990
10991 memp = NULL;
10992
10993 if (size > sizeof(stkbuf)) {
10994 if ((memp = (caddr_t)kalloc(size)) == 0) {
10995 return ENOMEM;
10996 }
10997 data = memp;
10998 } else {
10999 data = &stkbuf[0];
11000 };
11001
11002 if (cmd & IOC_IN) {
11003 if (size) {
11004 error = copyin(udata, data, size);
11005 if (error) {
11006 if (memp) {
11007 kfree(memp, size);
11008 }
11009 return error;
11010 }
11011 } else {
11012 if (is64bit) {
11013 *(user_addr_t *)data = udata;
11014 } else {
11015 *(uint32_t *)data = (uint32_t)udata;
11016 }
11017 };
11018 } else if ((cmd & IOC_OUT) && size) {
11019 /*
11020 * Zero the buffer so the user always
11021 * gets back something deterministic.
11022 */
11023 bzero(data, size);
11024 } else if (cmd & IOC_VOID) {
11025 if (is64bit) {
11026 *(user_addr_t *)data = udata;
11027 } else {
11028 *(uint32_t *)data = (uint32_t)udata;
11029 }
11030 }
11031
11032 /* Check to see if it's a generic command */
11033 switch (cmd) {
11034 case FSIOC_SYNC_VOLUME: {
11035 struct vfs_attr vfa;
11036 mount_t mp = vp->v_mount;
11037 unsigned arg;
11038
11039
11040 /* record vid of vp so we can drop it below. */
11041 uint32_t vvid = vp->v_id;
11042
11043 /*
11044 * Then grab mount_iterref so that we can release the vnode.
11045 * Without this, a thread may call vnode_iterate_prepare then
11046 * get into a deadlock because we've never released the root vp
11047 */
11048 error = mount_iterref(mp, 0);
11049 if (error) {
11050 break;
11051 }
11052 vnode_put(vp);
11053
11054 arg = MNT_NOWAIT;
11055 if (*(uint32_t*)data & FSCTL_SYNC_WAIT) {
11056 arg = MNT_WAIT;
11057 }
11058
11059 /*
11060 * If the filessytem supports multiple filesytems in a
11061 * partition (For eg APFS volumes in a container, it knows
11062 * that the waitfor argument to VFS_SYNC are flags.
11063 */
11064 VFSATTR_INIT(&vfa);
11065 VFSATTR_WANTED(&vfa, f_capabilities);
11066 if ((vfs_getattr(mp, &vfa, vfs_context_current()) == 0) &&
11067 VFSATTR_IS_SUPPORTED(&vfa, f_capabilities) &&
11068 ((vfa.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_SHARED_SPACE)) &&
11069 ((vfa.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_SHARED_SPACE))) {
11070 arg |= MNT_VOLUME;
11071 }
11072
11073 /* issue the sync for this volume */
11074 (void)sync_callback(mp, &arg);
11075
11076 /*
11077 * Then release the mount_iterref once we're done syncing; it's not
11078 * needed for the VNOP_IOCTL below
11079 */
11080 mount_iterdrop(mp);
11081
11082 if (arg & FSCTL_SYNC_FULLSYNC) {
11083 /* re-obtain vnode iocount on the root vp, if possible */
11084 error = vnode_getwithvid(vp, vvid);
11085 if (error == 0) {
11086 error = VNOP_IOCTL(vp, F_FULLFSYNC, (caddr_t)NULL, 0, ctx);
11087 vnode_put(vp);
11088 }
11089 }
11090 /* mark the argument VP as having been released */
11091 *arg_vp = NULL;
11092 }
11093 break;
11094
11095 case FSIOC_ROUTEFS_SETROUTEID: {
11096 #if ROUTEFS
11097 char routepath[MAXPATHLEN];
11098 size_t len = 0;
11099
11100 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
11101 break;
11102 }
11103 bzero(routepath, MAXPATHLEN);
11104 error = copyinstr(udata, &routepath[0], MAXPATHLEN, &len);
11105 if (error) {
11106 break;
11107 }
11108 error = routefs_kernel_mount(routepath);
11109 if (error) {
11110 break;
11111 }
11112 #endif
11113 }
11114 break;
11115
11116 case FSIOC_SET_PACKAGE_EXTS: {
11117 user_addr_t ext_strings;
11118 uint32_t num_entries;
11119 uint32_t max_width;
11120
11121 if ((error = priv_check_cred(kauth_cred_get(), PRIV_PACKAGE_EXTENSIONS, 0))) {
11122 break;
11123 }
11124
11125 if ((is64bit && size != sizeof(user64_package_ext_info))
11126 || (is64bit == 0 && size != sizeof(user32_package_ext_info))) {
11127 // either you're 64-bit and passed a 64-bit struct or
11128 // you're 32-bit and passed a 32-bit struct. otherwise
11129 // it's not ok.
11130 error = EINVAL;
11131 break;
11132 }
11133
11134 if (is64bit) {
11135 ext_strings = ((user64_package_ext_info *)data)->strings;
11136 num_entries = ((user64_package_ext_info *)data)->num_entries;
11137 max_width = ((user64_package_ext_info *)data)->max_width;
11138 } else {
11139 ext_strings = CAST_USER_ADDR_T(((user32_package_ext_info *)data)->strings);
11140 num_entries = ((user32_package_ext_info *)data)->num_entries;
11141 max_width = ((user32_package_ext_info *)data)->max_width;
11142 }
11143 error = set_package_extensions_table(ext_strings, num_entries, max_width);
11144 }
11145 break;
11146
11147 case FSIOC_SET_FSTYPENAME_OVERRIDE:
11148 {
11149 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
11150 break;
11151 }
11152 if (vp->v_mount) {
11153 mount_lock(vp->v_mount);
11154 if (data[0] != 0) {
11155 strlcpy(&vp->v_mount->fstypename_override[0], data, MFSTYPENAMELEN);
11156 vp->v_mount->mnt_kern_flag |= MNTK_TYPENAME_OVERRIDE;
11157 if (vfs_isrdonly(vp->v_mount) && strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
11158 vp->v_mount->mnt_kern_flag |= MNTK_EXTENDED_SECURITY;
11159 vp->v_mount->mnt_kern_flag &= ~MNTK_AUTH_OPAQUE;
11160 }
11161 } else {
11162 if (strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
11163 vp->v_mount->mnt_kern_flag &= ~MNTK_EXTENDED_SECURITY;
11164 }
11165 vp->v_mount->mnt_kern_flag &= ~MNTK_TYPENAME_OVERRIDE;
11166 vp->v_mount->fstypename_override[0] = '\0';
11167 }
11168 mount_unlock(vp->v_mount);
11169 }
11170 }
11171 break;
11172
11173 case DISK_CONDITIONER_IOC_GET: {
11174 error = disk_conditioner_get_info(vp->v_mount, (disk_conditioner_info *)data);
11175 }
11176 break;
11177
11178 case DISK_CONDITIONER_IOC_SET: {
11179 error = disk_conditioner_set_info(vp->v_mount, (disk_conditioner_info *)data);
11180 }
11181 break;
11182
11183 case FSIOC_CAS_BSDFLAGS: {
11184 struct fsioc_cas_bsdflags *cas = (struct fsioc_cas_bsdflags *)data;
11185 struct vnode_attr va;
11186
11187 VATTR_INIT(&va);
11188 VATTR_SET(&va, va_flags, cas->new_flags);
11189
11190 error = chflags0(vp, &va, cas_bsdflags_setattr, cas, ctx);
11191 }
11192 break;
11193
11194 case FSIOC_FD_ONLY_OPEN_ONCE: {
11195 if (vnode_usecount(vp) > 1) {
11196 error = EBUSY;
11197 } else {
11198 error = 0;
11199 }
11200 }
11201 break;
11202
11203 default: {
11204 /* other, known commands shouldn't be passed down here */
11205 switch (cmd) {
11206 case F_PUNCHHOLE:
11207 case F_TRIM_ACTIVE_FILE:
11208 case F_RDADVISE:
11209 case F_TRANSCODEKEY:
11210 case F_GETPROTECTIONLEVEL:
11211 case F_GETDEFAULTPROTLEVEL:
11212 case F_MAKECOMPRESSED:
11213 case F_SET_GREEDY_MODE:
11214 case F_SETSTATICCONTENT:
11215 case F_SETIOTYPE:
11216 case F_SETBACKINGSTORE:
11217 case F_GETPATH_MTMINFO:
11218 case APFSIOC_REVERT_TO_SNAPSHOT:
11219 case FSIOC_FIOSEEKHOLE:
11220 case FSIOC_FIOSEEKDATA:
11221 case HFS_GET_BOOT_INFO:
11222 case HFS_SET_BOOT_INFO:
11223 case FIOPINSWAP:
11224 case F_CHKCLEAN:
11225 case F_FULLFSYNC:
11226 case F_BARRIERFSYNC:
11227 case F_FREEZE_FS:
11228 case F_THAW_FS:
11229 error = EINVAL;
11230 goto outdrop;
11231 }
11232 /* Invoke the filesystem-specific code */
11233 error = VNOP_IOCTL(vp, cmd, data, options, ctx);
11234 }
11235 } /* end switch stmt */
11236
11237 /*
11238 * if no errors, copy any data to user. Size was
11239 * already set and checked above.
11240 */
11241 if (error == 0 && (cmd & IOC_OUT) && size) {
11242 error = copyout(data, udata, size);
11243 }
11244
11245 outdrop:
11246 if (memp) {
11247 kfree(memp, size);
11248 }
11249
11250 return error;
11251 }
11252
11253 /* ARGSUSED */
11254 int
11255 fsctl(proc_t p, struct fsctl_args *uap, __unused int32_t *retval)
11256 {
11257 int error;
11258 struct nameidata nd;
11259 u_long nameiflags;
11260 vnode_t vp = NULL;
11261 vfs_context_t ctx = vfs_context_current();
11262
11263 AUDIT_ARG(cmd, uap->cmd);
11264 AUDIT_ARG(value32, uap->options);
11265 /* Get the vnode for the file we are getting info on: */
11266 nameiflags = 0;
11267 //
11268 // if we come through fsctl() then the file is by definition not open.
11269 // therefore for the FSIOC_FD_ONLY_OPEN_ONCE selector we return an error
11270 // lest the caller mistakenly thinks the only open is their own (but in
11271 // reality it's someone elses).
11272 //
11273 if (uap->cmd == FSIOC_FD_ONLY_OPEN_ONCE) {
11274 return EINVAL;
11275 }
11276 if ((uap->options & FSOPT_NOFOLLOW) == 0) {
11277 nameiflags |= FOLLOW;
11278 }
11279 if (uap->cmd == FSIOC_FIRMLINK_CTL) {
11280 nameiflags |= (CN_FIRMLINK_NOFOLLOW | NOCACHE);
11281 }
11282 NDINIT(&nd, LOOKUP, OP_FSCTL, nameiflags | AUDITVNPATH1,
11283 UIO_USERSPACE, uap->path, ctx);
11284 if ((error = namei(&nd))) {
11285 goto done;
11286 }
11287 vp = nd.ni_vp;
11288 nameidone(&nd);
11289
11290 #if CONFIG_MACF
11291 error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd);
11292 if (error) {
11293 goto done;
11294 }
11295 #endif
11296
11297 error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
11298
11299 done:
11300 if (vp) {
11301 vnode_put(vp);
11302 }
11303 return error;
11304 }
11305 /* ARGSUSED */
11306 int
11307 ffsctl(proc_t p, struct ffsctl_args *uap, __unused int32_t *retval)
11308 {
11309 int error;
11310 vnode_t vp = NULL;
11311 vfs_context_t ctx = vfs_context_current();
11312 int fd = -1;
11313
11314 AUDIT_ARG(fd, uap->fd);
11315 AUDIT_ARG(cmd, uap->cmd);
11316 AUDIT_ARG(value32, uap->options);
11317
11318 /* Get the vnode for the file we are getting info on: */
11319 if ((error = file_vnode(uap->fd, &vp))) {
11320 return error;
11321 }
11322 fd = uap->fd;
11323 if ((error = vnode_getwithref(vp))) {
11324 file_drop(fd);
11325 return error;
11326 }
11327
11328 #if CONFIG_MACF
11329 if ((error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd))) {
11330 file_drop(fd);
11331 vnode_put(vp);
11332 return error;
11333 }
11334 #endif
11335
11336 error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
11337
11338 file_drop(fd);
11339
11340 /*validate vp; fsctl_internal() can drop iocount and reset vp to NULL*/
11341 if (vp) {
11342 vnode_put(vp);
11343 }
11344
11345 return error;
11346 }
11347 /* end of fsctl system call */
11348
11349 /*
11350 * Retrieve the data of an extended attribute.
11351 */
11352 int
11353 getxattr(proc_t p, struct getxattr_args *uap, user_ssize_t *retval)
11354 {
11355 vnode_t vp;
11356 struct nameidata nd;
11357 char attrname[XATTR_MAXNAMELEN + 1];
11358 vfs_context_t ctx = vfs_context_current();
11359 uio_t auio = NULL;
11360 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
11361 size_t attrsize = 0;
11362 size_t namelen;
11363 u_int32_t nameiflags;
11364 int error;
11365 char uio_buf[UIO_SIZEOF(1)];
11366
11367 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT)) {
11368 return EINVAL;
11369 }
11370
11371 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
11372 NDINIT(&nd, LOOKUP, OP_GETXATTR, nameiflags, spacetype, uap->path, ctx);
11373 if ((error = namei(&nd))) {
11374 return error;
11375 }
11376 vp = nd.ni_vp;
11377 nameidone(&nd);
11378
11379 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
11380 if (error != 0) {
11381 goto out;
11382 }
11383 if (xattr_protected(attrname)) {
11384 if (!vfs_context_issuser(ctx) || strcmp(attrname, "com.apple.system.Security") != 0) {
11385 error = EPERM;
11386 goto out;
11387 }
11388 }
11389 /*
11390 * the specific check for 0xffffffff is a hack to preserve
11391 * binaray compatibilty in K64 with applications that discovered
11392 * that passing in a buf pointer and a size of -1 resulted in
11393 * just the size of the indicated extended attribute being returned.
11394 * this isn't part of the documented behavior, but because of the
11395 * original implemtation's check for "uap->size > 0", this behavior
11396 * was allowed. In K32 that check turned into a signed comparison
11397 * even though uap->size is unsigned... in K64, we blow by that
11398 * check because uap->size is unsigned and doesn't get sign smeared
11399 * in the munger for a 32 bit user app. we also need to add a
11400 * check to limit the maximum size of the buffer being passed in...
11401 * unfortunately, the underlying fileystems seem to just malloc
11402 * the requested size even if the actual extended attribute is tiny.
11403 * because that malloc is for kernel wired memory, we have to put a
11404 * sane limit on it.
11405 *
11406 * U32 running on K64 will yield 0x00000000ffffffff for uap->size
11407 * U64 running on K64 will yield -1 (64 bits wide)
11408 * U32/U64 running on K32 will yield -1 (32 bits wide)
11409 */
11410 if (uap->size == 0xffffffff || uap->size == (size_t)-1) {
11411 goto no_uio;
11412 }
11413
11414 if (uap->value) {
11415 if (uap->size > (size_t)XATTR_MAXSIZE) {
11416 uap->size = XATTR_MAXSIZE;
11417 }
11418
11419 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
11420 &uio_buf[0], sizeof(uio_buf));
11421 uio_addiov(auio, uap->value, uap->size);
11422 }
11423 no_uio:
11424 error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, ctx);
11425 out:
11426 vnode_put(vp);
11427
11428 if (auio) {
11429 *retval = uap->size - uio_resid(auio);
11430 } else {
11431 *retval = (user_ssize_t)attrsize;
11432 }
11433
11434 return error;
11435 }
11436
11437 /*
11438 * Retrieve the data of an extended attribute.
11439 */
11440 int
11441 fgetxattr(proc_t p, struct fgetxattr_args *uap, user_ssize_t *retval)
11442 {
11443 vnode_t vp;
11444 char attrname[XATTR_MAXNAMELEN + 1];
11445 uio_t auio = NULL;
11446 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
11447 size_t attrsize = 0;
11448 size_t namelen;
11449 int error;
11450 char uio_buf[UIO_SIZEOF(1)];
11451
11452 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT)) {
11453 return EINVAL;
11454 }
11455
11456 if ((error = file_vnode(uap->fd, &vp))) {
11457 return error;
11458 }
11459 if ((error = vnode_getwithref(vp))) {
11460 file_drop(uap->fd);
11461 return error;
11462 }
11463 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
11464 if (error != 0) {
11465 goto out;
11466 }
11467 if (xattr_protected(attrname)) {
11468 error = EPERM;
11469 goto out;
11470 }
11471 if (uap->value && uap->size > 0) {
11472 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
11473 &uio_buf[0], sizeof(uio_buf));
11474 uio_addiov(auio, uap->value, uap->size);
11475 }
11476
11477 error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, vfs_context_current());
11478 out:
11479 (void)vnode_put(vp);
11480 file_drop(uap->fd);
11481
11482 if (auio) {
11483 *retval = uap->size - uio_resid(auio);
11484 } else {
11485 *retval = (user_ssize_t)attrsize;
11486 }
11487 return error;
11488 }
11489
11490 /*
11491 * Set the data of an extended attribute.
11492 */
11493 int
11494 setxattr(proc_t p, struct setxattr_args *uap, int *retval)
11495 {
11496 vnode_t vp;
11497 struct nameidata nd;
11498 char attrname[XATTR_MAXNAMELEN + 1];
11499 vfs_context_t ctx = vfs_context_current();
11500 uio_t auio = NULL;
11501 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
11502 size_t namelen;
11503 u_int32_t nameiflags;
11504 int error;
11505 char uio_buf[UIO_SIZEOF(1)];
11506
11507 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT)) {
11508 return EINVAL;
11509 }
11510
11511 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
11512 if (error != 0) {
11513 if (error == EPERM) {
11514 /* if the string won't fit in attrname, copyinstr emits EPERM */
11515 return ENAMETOOLONG;
11516 }
11517 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
11518 return error;
11519 }
11520 if (xattr_protected(attrname)) {
11521 return EPERM;
11522 }
11523 if (uap->size != 0 && uap->value == 0) {
11524 return EINVAL;
11525 }
11526
11527 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
11528 NDINIT(&nd, LOOKUP, OP_SETXATTR, nameiflags, spacetype, uap->path, ctx);
11529 if ((error = namei(&nd))) {
11530 return error;
11531 }
11532 vp = nd.ni_vp;
11533 nameidone(&nd);
11534
11535 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
11536 &uio_buf[0], sizeof(uio_buf));
11537 uio_addiov(auio, uap->value, uap->size);
11538
11539 error = vn_setxattr(vp, attrname, auio, uap->options, ctx);
11540 #if CONFIG_FSE
11541 if (error == 0) {
11542 add_fsevent(FSE_XATTR_MODIFIED, ctx,
11543 FSE_ARG_VNODE, vp,
11544 FSE_ARG_DONE);
11545 }
11546 #endif
11547 vnode_put(vp);
11548 *retval = 0;
11549 return error;
11550 }
11551
11552 /*
11553 * Set the data of an extended attribute.
11554 */
11555 int
11556 fsetxattr(proc_t p, struct fsetxattr_args *uap, int *retval)
11557 {
11558 vnode_t vp;
11559 char attrname[XATTR_MAXNAMELEN + 1];
11560 uio_t auio = NULL;
11561 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
11562 size_t namelen;
11563 int error;
11564 char uio_buf[UIO_SIZEOF(1)];
11565 #if CONFIG_FSE
11566 vfs_context_t ctx = vfs_context_current();
11567 #endif
11568
11569 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT)) {
11570 return EINVAL;
11571 }
11572
11573 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
11574 if (error != 0) {
11575 if (error == EPERM) {
11576 /* if the string won't fit in attrname, copyinstr emits EPERM */
11577 return ENAMETOOLONG;
11578 }
11579 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
11580 return error;
11581 }
11582 if (xattr_protected(attrname)) {
11583 return EPERM;
11584 }
11585 if (uap->size != 0 && uap->value == 0) {
11586 return EINVAL;
11587 }
11588 if ((error = file_vnode(uap->fd, &vp))) {
11589 return error;
11590 }
11591 if ((error = vnode_getwithref(vp))) {
11592 file_drop(uap->fd);
11593 return error;
11594 }
11595 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
11596 &uio_buf[0], sizeof(uio_buf));
11597 uio_addiov(auio, uap->value, uap->size);
11598
11599 error = vn_setxattr(vp, attrname, auio, uap->options, vfs_context_current());
11600 #if CONFIG_FSE
11601 if (error == 0) {
11602 add_fsevent(FSE_XATTR_MODIFIED, ctx,
11603 FSE_ARG_VNODE, vp,
11604 FSE_ARG_DONE);
11605 }
11606 #endif
11607 vnode_put(vp);
11608 file_drop(uap->fd);
11609 *retval = 0;
11610 return error;
11611 }
11612
11613 /*
11614 * Remove an extended attribute.
11615 * XXX Code duplication here.
11616 */
11617 int
11618 removexattr(proc_t p, struct removexattr_args *uap, int *retval)
11619 {
11620 vnode_t vp;
11621 struct nameidata nd;
11622 char attrname[XATTR_MAXNAMELEN + 1];
11623 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
11624 vfs_context_t ctx = vfs_context_current();
11625 size_t namelen;
11626 u_int32_t nameiflags;
11627 int error;
11628
11629 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT)) {
11630 return EINVAL;
11631 }
11632
11633 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
11634 if (error != 0) {
11635 return error;
11636 }
11637 if (xattr_protected(attrname)) {
11638 return EPERM;
11639 }
11640 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
11641 NDINIT(&nd, LOOKUP, OP_REMOVEXATTR, nameiflags, spacetype, uap->path, ctx);
11642 if ((error = namei(&nd))) {
11643 return error;
11644 }
11645 vp = nd.ni_vp;
11646 nameidone(&nd);
11647
11648 error = vn_removexattr(vp, attrname, uap->options, ctx);
11649 #if CONFIG_FSE
11650 if (error == 0) {
11651 add_fsevent(FSE_XATTR_REMOVED, ctx,
11652 FSE_ARG_VNODE, vp,
11653 FSE_ARG_DONE);
11654 }
11655 #endif
11656 vnode_put(vp);
11657 *retval = 0;
11658 return error;
11659 }
11660
11661 /*
11662 * Remove an extended attribute.
11663 * XXX Code duplication here.
11664 */
11665 int
11666 fremovexattr(__unused proc_t p, struct fremovexattr_args *uap, int *retval)
11667 {
11668 vnode_t vp;
11669 char attrname[XATTR_MAXNAMELEN + 1];
11670 size_t namelen;
11671 int error;
11672 #if CONFIG_FSE
11673 vfs_context_t ctx = vfs_context_current();
11674 #endif
11675
11676 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT)) {
11677 return EINVAL;
11678 }
11679
11680 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
11681 if (error != 0) {
11682 return error;
11683 }
11684 if (xattr_protected(attrname)) {
11685 return EPERM;
11686 }
11687 if ((error = file_vnode(uap->fd, &vp))) {
11688 return error;
11689 }
11690 if ((error = vnode_getwithref(vp))) {
11691 file_drop(uap->fd);
11692 return error;
11693 }
11694
11695 error = vn_removexattr(vp, attrname, uap->options, vfs_context_current());
11696 #if CONFIG_FSE
11697 if (error == 0) {
11698 add_fsevent(FSE_XATTR_REMOVED, ctx,
11699 FSE_ARG_VNODE, vp,
11700 FSE_ARG_DONE);
11701 }
11702 #endif
11703 vnode_put(vp);
11704 file_drop(uap->fd);
11705 *retval = 0;
11706 return error;
11707 }
11708
11709 /*
11710 * Retrieve the list of extended attribute names.
11711 * XXX Code duplication here.
11712 */
11713 int
11714 listxattr(proc_t p, struct listxattr_args *uap, user_ssize_t *retval)
11715 {
11716 vnode_t vp;
11717 struct nameidata nd;
11718 vfs_context_t ctx = vfs_context_current();
11719 uio_t auio = NULL;
11720 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
11721 size_t attrsize = 0;
11722 u_int32_t nameiflags;
11723 int error;
11724 char uio_buf[UIO_SIZEOF(1)];
11725
11726 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT)) {
11727 return EINVAL;
11728 }
11729
11730 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
11731 NDINIT(&nd, LOOKUP, OP_LISTXATTR, nameiflags, spacetype, uap->path, ctx);
11732 if ((error = namei(&nd))) {
11733 return error;
11734 }
11735 vp = nd.ni_vp;
11736 nameidone(&nd);
11737 if (uap->namebuf != 0 && uap->bufsize > 0) {
11738 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
11739 &uio_buf[0], sizeof(uio_buf));
11740 uio_addiov(auio, uap->namebuf, uap->bufsize);
11741 }
11742
11743 error = vn_listxattr(vp, auio, &attrsize, uap->options, ctx);
11744
11745 vnode_put(vp);
11746 if (auio) {
11747 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
11748 } else {
11749 *retval = (user_ssize_t)attrsize;
11750 }
11751 return error;
11752 }
11753
11754 /*
11755 * Retrieve the list of extended attribute names.
11756 * XXX Code duplication here.
11757 */
11758 int
11759 flistxattr(proc_t p, struct flistxattr_args *uap, user_ssize_t *retval)
11760 {
11761 vnode_t vp;
11762 uio_t auio = NULL;
11763 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
11764 size_t attrsize = 0;
11765 int error;
11766 char uio_buf[UIO_SIZEOF(1)];
11767
11768 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT)) {
11769 return EINVAL;
11770 }
11771
11772 if ((error = file_vnode(uap->fd, &vp))) {
11773 return error;
11774 }
11775 if ((error = vnode_getwithref(vp))) {
11776 file_drop(uap->fd);
11777 return error;
11778 }
11779 if (uap->namebuf != 0 && uap->bufsize > 0) {
11780 auio = uio_createwithbuffer(1, 0, spacetype,
11781 UIO_READ, &uio_buf[0], sizeof(uio_buf));
11782 uio_addiov(auio, uap->namebuf, uap->bufsize);
11783 }
11784
11785 error = vn_listxattr(vp, auio, &attrsize, uap->options, vfs_context_current());
11786
11787 vnode_put(vp);
11788 file_drop(uap->fd);
11789 if (auio) {
11790 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
11791 } else {
11792 *retval = (user_ssize_t)attrsize;
11793 }
11794 return error;
11795 }
11796
11797 static int
11798 fsgetpath_internal(vfs_context_t ctx, int volfs_id, uint64_t objid,
11799 vm_size_t bufsize, caddr_t buf, uint32_t options, int *pathlen)
11800 {
11801 int error;
11802 struct mount *mp = NULL;
11803 vnode_t vp;
11804 int length;
11805 int bpflags;
11806 /* maximum number of times to retry build_path */
11807 unsigned int retries = 0x10;
11808
11809 if (bufsize > PAGE_SIZE) {
11810 return EINVAL;
11811 }
11812
11813 if (buf == NULL) {
11814 return ENOMEM;
11815 }
11816
11817 retry:
11818 if ((mp = mount_lookupby_volfsid(volfs_id, 1)) == NULL) {
11819 error = ENOTSUP; /* unexpected failure */
11820 return ENOTSUP;
11821 }
11822
11823 unionget:
11824 if (objid == 2) {
11825 struct vfs_attr vfsattr;
11826 int use_vfs_root = TRUE;
11827
11828 VFSATTR_INIT(&vfsattr);
11829 VFSATTR_WANTED(&vfsattr, f_capabilities);
11830 if (!(options & FSOPT_ISREALFSID) &&
11831 vfs_getattr(mp, &vfsattr, vfs_context_kernel()) == 0 &&
11832 VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) {
11833 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_VOL_GROUPS) &&
11834 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_VOL_GROUPS)) {
11835 use_vfs_root = FALSE;
11836 }
11837 }
11838
11839 if (use_vfs_root) {
11840 error = VFS_ROOT(mp, &vp, ctx);
11841 } else {
11842 error = VFS_VGET(mp, objid, &vp, ctx);
11843 }
11844 } else {
11845 error = VFS_VGET(mp, (ino64_t)objid, &vp, ctx);
11846 }
11847
11848 if (error == ENOENT && (mp->mnt_flag & MNT_UNION)) {
11849 /*
11850 * If the fileid isn't found and we're in a union
11851 * mount volume, then see if the fileid is in the
11852 * mounted-on volume.
11853 */
11854 struct mount *tmp = mp;
11855 mp = vnode_mount(tmp->mnt_vnodecovered);
11856 vfs_unbusy(tmp);
11857 if (vfs_busy(mp, LK_NOWAIT) == 0) {
11858 goto unionget;
11859 }
11860 } else {
11861 vfs_unbusy(mp);
11862 }
11863
11864 if (error) {
11865 return error;
11866 }
11867
11868 #if CONFIG_MACF
11869 error = mac_vnode_check_fsgetpath(ctx, vp);
11870 if (error) {
11871 vnode_put(vp);
11872 return error;
11873 }
11874 #endif
11875
11876 /* Obtain the absolute path to this vnode. */
11877 bpflags = vfs_context_suser(ctx) ? BUILDPATH_CHECKACCESS : 0;
11878 if (options & FSOPT_NOFIRMLINKPATH) {
11879 bpflags |= BUILDPATH_NO_FIRMLINK;
11880 }
11881 bpflags |= BUILDPATH_CHECK_MOVED;
11882 error = build_path(vp, buf, bufsize, &length, bpflags, ctx);
11883 vnode_put(vp);
11884
11885 if (error) {
11886 /* there was a race building the path, try a few more times */
11887 if (error == EAGAIN) {
11888 --retries;
11889 if (retries > 0) {
11890 goto retry;
11891 }
11892
11893 error = ENOENT;
11894 }
11895 goto out;
11896 }
11897
11898 AUDIT_ARG(text, buf);
11899
11900 if (kdebug_enable) {
11901 long dbg_parms[NUMPARMS];
11902 int dbg_namelen;
11903
11904 dbg_namelen = (int)sizeof(dbg_parms);
11905
11906 if (length < dbg_namelen) {
11907 memcpy((char *)dbg_parms, buf, length);
11908 memset((char *)dbg_parms + length, 0, dbg_namelen - length);
11909
11910 dbg_namelen = length;
11911 } else {
11912 memcpy((char *)dbg_parms, buf + (length - dbg_namelen), dbg_namelen);
11913 }
11914
11915 kdebug_vfs_lookup(dbg_parms, dbg_namelen, (void *)vp,
11916 KDBG_VFS_LOOKUP_FLAG_LOOKUP);
11917 }
11918
11919 *pathlen = (user_ssize_t)length; /* may be superseded by error */
11920
11921 out:
11922 return error;
11923 }
11924
11925 /*
11926 * Obtain the full pathname of a file system object by id.
11927 */
11928 static int
11929 fsgetpath_extended(user_addr_t buf, int bufsize, user_addr_t user_fsid, uint64_t objid,
11930 uint32_t options, user_ssize_t *retval)
11931 {
11932 vfs_context_t ctx = vfs_context_current();
11933 fsid_t fsid;
11934 char *realpath;
11935 int length;
11936 int error;
11937
11938 if (options & ~(FSOPT_NOFIRMLINKPATH | FSOPT_ISREALFSID)) {
11939 return EINVAL;
11940 }
11941
11942 if ((error = copyin(user_fsid, (caddr_t)&fsid, sizeof(fsid)))) {
11943 return error;
11944 }
11945 AUDIT_ARG(value32, fsid.val[0]);
11946 AUDIT_ARG(value64, objid);
11947 /* Restrict output buffer size for now. */
11948
11949 if (bufsize > PAGE_SIZE || bufsize <= 0) {
11950 return EINVAL;
11951 }
11952 MALLOC(realpath, char *, bufsize, M_TEMP, M_WAITOK | M_ZERO);
11953 if (realpath == NULL) {
11954 return ENOMEM;
11955 }
11956
11957 error = fsgetpath_internal(ctx, fsid.val[0], objid, bufsize, realpath,
11958 options, &length);
11959
11960 if (error) {
11961 goto out;
11962 }
11963
11964 error = copyout((caddr_t)realpath, buf, length);
11965
11966 *retval = (user_ssize_t)length; /* may be superseded by error */
11967 out:
11968 if (realpath) {
11969 FREE(realpath, M_TEMP);
11970 }
11971 return error;
11972 }
11973
11974 int
11975 fsgetpath(__unused proc_t p, struct fsgetpath_args *uap, user_ssize_t *retval)
11976 {
11977 return fsgetpath_extended(uap->buf, uap->bufsize, uap->fsid, uap->objid,
11978 0, retval);
11979 }
11980
11981 int
11982 fsgetpath_ext(__unused proc_t p, struct fsgetpath_ext_args *uap, user_ssize_t *retval)
11983 {
11984 return fsgetpath_extended(uap->buf, uap->bufsize, uap->fsid, uap->objid,
11985 uap->options, retval);
11986 }
11987
11988 /*
11989 * Common routine to handle various flavors of statfs data heading out
11990 * to user space.
11991 *
11992 * Returns: 0 Success
11993 * EFAULT
11994 */
11995 static int
11996 munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
11997 user_addr_t bufp, int *sizep, boolean_t is_64_bit,
11998 boolean_t partial_copy)
11999 {
12000 int error;
12001 int my_size, copy_size;
12002
12003 if (is_64_bit) {
12004 struct user64_statfs sfs;
12005 my_size = copy_size = sizeof(sfs);
12006 bzero(&sfs, my_size);
12007 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
12008 sfs.f_type = mp->mnt_vtable->vfc_typenum;
12009 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
12010 sfs.f_bsize = (user64_long_t)sfsp->f_bsize;
12011 sfs.f_iosize = (user64_long_t)sfsp->f_iosize;
12012 sfs.f_blocks = (user64_long_t)sfsp->f_blocks;
12013 sfs.f_bfree = (user64_long_t)sfsp->f_bfree;
12014 sfs.f_bavail = (user64_long_t)sfsp->f_bavail;
12015 sfs.f_files = (user64_long_t)sfsp->f_files;
12016 sfs.f_ffree = (user64_long_t)sfsp->f_ffree;
12017 sfs.f_fsid = sfsp->f_fsid;
12018 sfs.f_owner = sfsp->f_owner;
12019 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
12020 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
12021 } else {
12022 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
12023 }
12024 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
12025 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
12026
12027 if (partial_copy) {
12028 copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
12029 }
12030 error = copyout((caddr_t)&sfs, bufp, copy_size);
12031 } else {
12032 struct user32_statfs sfs;
12033
12034 my_size = copy_size = sizeof(sfs);
12035 bzero(&sfs, my_size);
12036
12037 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
12038 sfs.f_type = mp->mnt_vtable->vfc_typenum;
12039 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
12040
12041 /*
12042 * It's possible for there to be more than 2^^31 blocks in the filesystem, so we
12043 * have to fudge the numbers here in that case. We inflate the blocksize in order
12044 * to reflect the filesystem size as best we can.
12045 */
12046 if ((sfsp->f_blocks > INT_MAX)
12047 /* Hack for 4061702 . I think the real fix is for Carbon to
12048 * look for some volume capability and not depend on hidden
12049 * semantics agreed between a FS and carbon.
12050 * f_blocks, f_bfree, and f_bavail set to -1 is the trigger
12051 * for Carbon to set bNoVolumeSizes volume attribute.
12052 * Without this the webdavfs files cannot be copied onto
12053 * disk as they look huge. This change should not affect
12054 * XSAN as they should not setting these to -1..
12055 */
12056 && (sfsp->f_blocks != 0xffffffffffffffffULL)
12057 && (sfsp->f_bfree != 0xffffffffffffffffULL)
12058 && (sfsp->f_bavail != 0xffffffffffffffffULL)) {
12059 int shift;
12060
12061 /*
12062 * Work out how far we have to shift the block count down to make it fit.
12063 * Note that it's possible to have to shift so far that the resulting
12064 * blocksize would be unreportably large. At that point, we will clip
12065 * any values that don't fit.
12066 *
12067 * For safety's sake, we also ensure that f_iosize is never reported as
12068 * being smaller than f_bsize.
12069 */
12070 for (shift = 0; shift < 32; shift++) {
12071 if ((sfsp->f_blocks >> shift) <= INT_MAX) {
12072 break;
12073 }
12074 if ((sfsp->f_bsize << (shift + 1)) > INT_MAX) {
12075 break;
12076 }
12077 }
12078 #define __SHIFT_OR_CLIP(x, s) ((((x) >> (s)) > INT_MAX) ? INT_MAX : ((x) >> (s)))
12079 sfs.f_blocks = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_blocks, shift);
12080 sfs.f_bfree = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bfree, shift);
12081 sfs.f_bavail = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bavail, shift);
12082 #undef __SHIFT_OR_CLIP
12083 sfs.f_bsize = (user32_long_t)(sfsp->f_bsize << shift);
12084 sfs.f_iosize = lmax(sfsp->f_iosize, sfsp->f_bsize);
12085 } else {
12086 /* filesystem is small enough to be reported honestly */
12087 sfs.f_bsize = (user32_long_t)sfsp->f_bsize;
12088 sfs.f_iosize = (user32_long_t)sfsp->f_iosize;
12089 sfs.f_blocks = (user32_long_t)sfsp->f_blocks;
12090 sfs.f_bfree = (user32_long_t)sfsp->f_bfree;
12091 sfs.f_bavail = (user32_long_t)sfsp->f_bavail;
12092 }
12093 sfs.f_files = (user32_long_t)sfsp->f_files;
12094 sfs.f_ffree = (user32_long_t)sfsp->f_ffree;
12095 sfs.f_fsid = sfsp->f_fsid;
12096 sfs.f_owner = sfsp->f_owner;
12097 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
12098 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
12099 } else {
12100 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
12101 }
12102 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
12103 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
12104
12105 if (partial_copy) {
12106 copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
12107 }
12108 error = copyout((caddr_t)&sfs, bufp, copy_size);
12109 }
12110
12111 if (sizep != NULL) {
12112 *sizep = my_size;
12113 }
12114 return error;
12115 }
12116
12117 /*
12118 * copy stat structure into user_stat structure.
12119 */
12120 void
12121 munge_user64_stat(struct stat *sbp, struct user64_stat *usbp)
12122 {
12123 bzero(usbp, sizeof(*usbp));
12124
12125 usbp->st_dev = sbp->st_dev;
12126 usbp->st_ino = sbp->st_ino;
12127 usbp->st_mode = sbp->st_mode;
12128 usbp->st_nlink = sbp->st_nlink;
12129 usbp->st_uid = sbp->st_uid;
12130 usbp->st_gid = sbp->st_gid;
12131 usbp->st_rdev = sbp->st_rdev;
12132 #ifndef _POSIX_C_SOURCE
12133 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
12134 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
12135 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
12136 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
12137 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
12138 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
12139 #else
12140 usbp->st_atime = sbp->st_atime;
12141 usbp->st_atimensec = sbp->st_atimensec;
12142 usbp->st_mtime = sbp->st_mtime;
12143 usbp->st_mtimensec = sbp->st_mtimensec;
12144 usbp->st_ctime = sbp->st_ctime;
12145 usbp->st_ctimensec = sbp->st_ctimensec;
12146 #endif
12147 usbp->st_size = sbp->st_size;
12148 usbp->st_blocks = sbp->st_blocks;
12149 usbp->st_blksize = sbp->st_blksize;
12150 usbp->st_flags = sbp->st_flags;
12151 usbp->st_gen = sbp->st_gen;
12152 usbp->st_lspare = sbp->st_lspare;
12153 usbp->st_qspare[0] = sbp->st_qspare[0];
12154 usbp->st_qspare[1] = sbp->st_qspare[1];
12155 }
12156
12157 void
12158 munge_user32_stat(struct stat *sbp, struct user32_stat *usbp)
12159 {
12160 bzero(usbp, sizeof(*usbp));
12161
12162 usbp->st_dev = sbp->st_dev;
12163 usbp->st_ino = sbp->st_ino;
12164 usbp->st_mode = sbp->st_mode;
12165 usbp->st_nlink = sbp->st_nlink;
12166 usbp->st_uid = sbp->st_uid;
12167 usbp->st_gid = sbp->st_gid;
12168 usbp->st_rdev = sbp->st_rdev;
12169 #ifndef _POSIX_C_SOURCE
12170 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
12171 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
12172 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
12173 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
12174 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
12175 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
12176 #else
12177 usbp->st_atime = sbp->st_atime;
12178 usbp->st_atimensec = sbp->st_atimensec;
12179 usbp->st_mtime = sbp->st_mtime;
12180 usbp->st_mtimensec = sbp->st_mtimensec;
12181 usbp->st_ctime = sbp->st_ctime;
12182 usbp->st_ctimensec = sbp->st_ctimensec;
12183 #endif
12184 usbp->st_size = sbp->st_size;
12185 usbp->st_blocks = sbp->st_blocks;
12186 usbp->st_blksize = sbp->st_blksize;
12187 usbp->st_flags = sbp->st_flags;
12188 usbp->st_gen = sbp->st_gen;
12189 usbp->st_lspare = sbp->st_lspare;
12190 usbp->st_qspare[0] = sbp->st_qspare[0];
12191 usbp->st_qspare[1] = sbp->st_qspare[1];
12192 }
12193
12194 /*
12195 * copy stat64 structure into user_stat64 structure.
12196 */
12197 void
12198 munge_user64_stat64(struct stat64 *sbp, struct user64_stat64 *usbp)
12199 {
12200 bzero(usbp, sizeof(*usbp));
12201
12202 usbp->st_dev = sbp->st_dev;
12203 usbp->st_ino = sbp->st_ino;
12204 usbp->st_mode = sbp->st_mode;
12205 usbp->st_nlink = sbp->st_nlink;
12206 usbp->st_uid = sbp->st_uid;
12207 usbp->st_gid = sbp->st_gid;
12208 usbp->st_rdev = sbp->st_rdev;
12209 #ifndef _POSIX_C_SOURCE
12210 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
12211 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
12212 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
12213 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
12214 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
12215 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
12216 usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
12217 usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
12218 #else
12219 usbp->st_atime = sbp->st_atime;
12220 usbp->st_atimensec = sbp->st_atimensec;
12221 usbp->st_mtime = sbp->st_mtime;
12222 usbp->st_mtimensec = sbp->st_mtimensec;
12223 usbp->st_ctime = sbp->st_ctime;
12224 usbp->st_ctimensec = sbp->st_ctimensec;
12225 usbp->st_birthtime = sbp->st_birthtime;
12226 usbp->st_birthtimensec = sbp->st_birthtimensec;
12227 #endif
12228 usbp->st_size = sbp->st_size;
12229 usbp->st_blocks = sbp->st_blocks;
12230 usbp->st_blksize = sbp->st_blksize;
12231 usbp->st_flags = sbp->st_flags;
12232 usbp->st_gen = sbp->st_gen;
12233 usbp->st_lspare = sbp->st_lspare;
12234 usbp->st_qspare[0] = sbp->st_qspare[0];
12235 usbp->st_qspare[1] = sbp->st_qspare[1];
12236 }
12237
12238 void
12239 munge_user32_stat64(struct stat64 *sbp, struct user32_stat64 *usbp)
12240 {
12241 bzero(usbp, sizeof(*usbp));
12242
12243 usbp->st_dev = sbp->st_dev;
12244 usbp->st_ino = sbp->st_ino;
12245 usbp->st_mode = sbp->st_mode;
12246 usbp->st_nlink = sbp->st_nlink;
12247 usbp->st_uid = sbp->st_uid;
12248 usbp->st_gid = sbp->st_gid;
12249 usbp->st_rdev = sbp->st_rdev;
12250 #ifndef _POSIX_C_SOURCE
12251 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
12252 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
12253 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
12254 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
12255 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
12256 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
12257 usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
12258 usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
12259 #else
12260 usbp->st_atime = sbp->st_atime;
12261 usbp->st_atimensec = sbp->st_atimensec;
12262 usbp->st_mtime = sbp->st_mtime;
12263 usbp->st_mtimensec = sbp->st_mtimensec;
12264 usbp->st_ctime = sbp->st_ctime;
12265 usbp->st_ctimensec = sbp->st_ctimensec;
12266 usbp->st_birthtime = sbp->st_birthtime;
12267 usbp->st_birthtimensec = sbp->st_birthtimensec;
12268 #endif
12269 usbp->st_size = sbp->st_size;
12270 usbp->st_blocks = sbp->st_blocks;
12271 usbp->st_blksize = sbp->st_blksize;
12272 usbp->st_flags = sbp->st_flags;
12273 usbp->st_gen = sbp->st_gen;
12274 usbp->st_lspare = sbp->st_lspare;
12275 usbp->st_qspare[0] = sbp->st_qspare[0];
12276 usbp->st_qspare[1] = sbp->st_qspare[1];
12277 }
12278
12279 /*
12280 * Purge buffer cache for simulating cold starts
12281 */
12282 static int
12283 vnode_purge_callback(struct vnode *vp, __unused void *cargs)
12284 {
12285 ubc_msync(vp, (off_t)0, ubc_getsize(vp), NULL /* off_t *resid_off */, UBC_PUSHALL | UBC_INVALIDATE);
12286
12287 return VNODE_RETURNED;
12288 }
12289
12290 static int
12291 vfs_purge_callback(mount_t mp, __unused void * arg)
12292 {
12293 vnode_iterate(mp, VNODE_WAIT | VNODE_ITERATE_ALL, vnode_purge_callback, NULL);
12294
12295 return VFS_RETURNED;
12296 }
12297
12298 int
12299 vfs_purge(__unused struct proc *p, __unused struct vfs_purge_args *uap, __unused int32_t *retval)
12300 {
12301 if (!kauth_cred_issuser(kauth_cred_get())) {
12302 return EPERM;
12303 }
12304
12305 vfs_iterate(0 /* flags */, vfs_purge_callback, NULL);
12306
12307 return 0;
12308 }
12309
12310 /*
12311 * gets the vnode associated with the (unnamed) snapshot directory
12312 * for a Filesystem. The snapshot directory vnode is returned with
12313 * an iocount on it.
12314 */
12315 int
12316 vnode_get_snapdir(vnode_t rvp, vnode_t *sdvpp, vfs_context_t ctx)
12317 {
12318 return VFS_VGET_SNAPDIR(vnode_mount(rvp), sdvpp, ctx);
12319 }
12320
12321 /*
12322 * Get the snapshot vnode.
12323 *
12324 * If successful, the call returns with an iocount on *rvpp ,*sdvpp and
12325 * needs nameidone() on ndp.
12326 *
12327 * If the snapshot vnode exists it is returned in ndp->ni_vp.
12328 *
12329 * If it returns with an error, *rvpp, *sdvpp are NULL and nameidone() is
12330 * not needed.
12331 */
12332 static int
12333 vnode_get_snapshot(int dirfd, vnode_t *rvpp, vnode_t *sdvpp,
12334 user_addr_t name, struct nameidata *ndp, int32_t op,
12335 #if !CONFIG_TRIGGERS
12336 __unused
12337 #endif
12338 enum path_operation pathop,
12339 vfs_context_t ctx)
12340 {
12341 int error, i;
12342 caddr_t name_buf;
12343 size_t name_len;
12344 struct vfs_attr vfa;
12345
12346 *sdvpp = NULLVP;
12347 *rvpp = NULLVP;
12348
12349 error = vnode_getfromfd(ctx, dirfd, rvpp);
12350 if (error) {
12351 return error;
12352 }
12353
12354 if (!vnode_isvroot(*rvpp)) {
12355 error = EINVAL;
12356 goto out;
12357 }
12358
12359 /* Make sure the filesystem supports snapshots */
12360 VFSATTR_INIT(&vfa);
12361 VFSATTR_WANTED(&vfa, f_capabilities);
12362 if ((vfs_getattr(vnode_mount(*rvpp), &vfa, ctx) != 0) ||
12363 !VFSATTR_IS_SUPPORTED(&vfa, f_capabilities) ||
12364 !((vfa.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] &
12365 VOL_CAP_INT_SNAPSHOT)) ||
12366 !((vfa.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] &
12367 VOL_CAP_INT_SNAPSHOT))) {
12368 error = ENOTSUP;
12369 goto out;
12370 }
12371
12372 error = vnode_get_snapdir(*rvpp, sdvpp, ctx);
12373 if (error) {
12374 goto out;
12375 }
12376
12377 MALLOC(name_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
12378 error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
12379 if (error) {
12380 goto out1;
12381 }
12382
12383 /*
12384 * Some sanity checks- name can't be empty, "." or ".." or have slashes.
12385 * (the length returned by copyinstr includes the terminating NUL)
12386 */
12387 if ((name_len == 1) || (name_len == 2 && name_buf[0] == '.') ||
12388 (name_len == 3 && name_buf[0] == '.' && name_buf[1] == '.')) {
12389 error = EINVAL;
12390 goto out1;
12391 }
12392 for (i = 0; i < (int)name_len && name_buf[i] != '/'; i++) {
12393 ;
12394 }
12395 if (i < (int)name_len) {
12396 error = EINVAL;
12397 goto out1;
12398 }
12399
12400 #if CONFIG_MACF
12401 if (op == CREATE) {
12402 error = mac_mount_check_snapshot_create(ctx, vnode_mount(*rvpp),
12403 name_buf);
12404 } else if (op == DELETE) {
12405 error = mac_mount_check_snapshot_delete(ctx, vnode_mount(*rvpp),
12406 name_buf);
12407 }
12408 if (error) {
12409 goto out1;
12410 }
12411 #endif
12412
12413 /* Check if the snapshot already exists ... */
12414 NDINIT(ndp, op, pathop, USEDVP | NOCACHE | AUDITVNPATH1,
12415 UIO_SYSSPACE, CAST_USER_ADDR_T(name_buf), ctx);
12416 ndp->ni_dvp = *sdvpp;
12417
12418 error = namei(ndp);
12419 out1:
12420 FREE(name_buf, M_TEMP);
12421 out:
12422 if (error) {
12423 if (*sdvpp) {
12424 vnode_put(*sdvpp);
12425 *sdvpp = NULLVP;
12426 }
12427 if (*rvpp) {
12428 vnode_put(*rvpp);
12429 *rvpp = NULLVP;
12430 }
12431 }
12432 return error;
12433 }
12434
12435 /*
12436 * create a filesystem snapshot (for supporting filesystems)
12437 *
12438 * A much simplified version of openat(dirfd, name, O_CREAT | O_EXCL)
12439 * We get to the (unnamed) snapshot directory vnode and create the vnode
12440 * for the snapshot in it.
12441 *
12442 * Restrictions:
12443 *
12444 * a) Passed in name for snapshot cannot have slashes.
12445 * b) name can't be "." or ".."
12446 *
12447 * Since this requires superuser privileges, vnode_authorize calls are not
12448 * made.
12449 */
12450 static int
12451 snapshot_create(int dirfd, user_addr_t name, __unused uint32_t flags,
12452 vfs_context_t ctx)
12453 {
12454 vnode_t rvp, snapdvp;
12455 int error;
12456 struct nameidata namend;
12457
12458 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, CREATE,
12459 OP_LINK, ctx);
12460 if (error) {
12461 return error;
12462 }
12463
12464 if (namend.ni_vp) {
12465 vnode_put(namend.ni_vp);
12466 error = EEXIST;
12467 } else {
12468 struct vnode_attr va;
12469 vnode_t vp = NULLVP;
12470
12471 VATTR_INIT(&va);
12472 VATTR_SET(&va, va_type, VREG);
12473 VATTR_SET(&va, va_mode, 0);
12474
12475 error = vn_create(snapdvp, &vp, &namend, &va,
12476 VN_CREATE_NOAUTH | VN_CREATE_NOINHERIT, 0, NULL, ctx);
12477 if (!error && vp) {
12478 vnode_put(vp);
12479 }
12480 }
12481
12482 nameidone(&namend);
12483 vnode_put(snapdvp);
12484 vnode_put(rvp);
12485 return error;
12486 }
12487
12488 /*
12489 * Delete a Filesystem snapshot
12490 *
12491 * get the vnode for the unnamed snapshot directory and the snapshot and
12492 * delete the snapshot.
12493 */
12494 static int
12495 snapshot_delete(int dirfd, user_addr_t name, __unused uint32_t flags,
12496 vfs_context_t ctx)
12497 {
12498 vnode_t rvp, snapdvp;
12499 int error;
12500 struct nameidata namend;
12501
12502 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, DELETE,
12503 OP_UNLINK, ctx);
12504 if (error) {
12505 goto out;
12506 }
12507
12508 error = VNOP_REMOVE(snapdvp, namend.ni_vp, &namend.ni_cnd,
12509 VNODE_REMOVE_SKIP_NAMESPACE_EVENT, ctx);
12510
12511 vnode_put(namend.ni_vp);
12512 nameidone(&namend);
12513 vnode_put(snapdvp);
12514 vnode_put(rvp);
12515 out:
12516 return error;
12517 }
12518
12519 /*
12520 * Revert a filesystem to a snapshot
12521 *
12522 * Marks the filesystem to revert to the given snapshot on next mount.
12523 */
12524 static int
12525 snapshot_revert(int dirfd, user_addr_t name, __unused uint32_t flags,
12526 vfs_context_t ctx)
12527 {
12528 int error;
12529 vnode_t rvp;
12530 mount_t mp;
12531 struct fs_snapshot_revert_args revert_data;
12532 struct componentname cnp;
12533 caddr_t name_buf;
12534 size_t name_len;
12535
12536 error = vnode_getfromfd(ctx, dirfd, &rvp);
12537 if (error) {
12538 return error;
12539 }
12540 mp = vnode_mount(rvp);
12541
12542 MALLOC(name_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
12543 error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
12544 if (error) {
12545 FREE(name_buf, M_TEMP);
12546 vnode_put(rvp);
12547 return error;
12548 }
12549
12550 #if CONFIG_MACF
12551 error = mac_mount_check_snapshot_revert(ctx, mp, name_buf);
12552 if (error) {
12553 FREE(name_buf, M_TEMP);
12554 vnode_put(rvp);
12555 return error;
12556 }
12557 #endif
12558
12559 /*
12560 * Grab mount_iterref so that we can release the vnode,
12561 * since VFSIOC_REVERT_SNAPSHOT could conceivably cause a sync.
12562 */
12563 error = mount_iterref(mp, 0);
12564 vnode_put(rvp);
12565 if (error) {
12566 FREE(name_buf, M_TEMP);
12567 return error;
12568 }
12569
12570 memset(&cnp, 0, sizeof(cnp));
12571 cnp.cn_pnbuf = (char *)name_buf;
12572 cnp.cn_nameiop = LOOKUP;
12573 cnp.cn_flags = ISLASTCN | HASBUF;
12574 cnp.cn_pnlen = MAXPATHLEN;
12575 cnp.cn_nameptr = cnp.cn_pnbuf;
12576 cnp.cn_namelen = (int)name_len;
12577 revert_data.sr_cnp = &cnp;
12578
12579 error = VFS_IOCTL(mp, VFSIOC_REVERT_SNAPSHOT, (caddr_t)&revert_data, 0, ctx);
12580 mount_iterdrop(mp);
12581 FREE(name_buf, M_TEMP);
12582
12583 if (error) {
12584 /* If there was any error, try again using VNOP_IOCTL */
12585
12586 vnode_t snapdvp;
12587 struct nameidata namend;
12588
12589 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, LOOKUP,
12590 OP_LOOKUP, ctx);
12591 if (error) {
12592 return error;
12593 }
12594
12595
12596 error = VNOP_IOCTL(namend.ni_vp, APFSIOC_REVERT_TO_SNAPSHOT, (caddr_t) NULL,
12597 0, ctx);
12598
12599 vnode_put(namend.ni_vp);
12600 nameidone(&namend);
12601 vnode_put(snapdvp);
12602 vnode_put(rvp);
12603 }
12604
12605 return error;
12606 }
12607
12608 /*
12609 * rename a Filesystem snapshot
12610 *
12611 * get the vnode for the unnamed snapshot directory and the snapshot and
12612 * rename the snapshot. This is a very specialised (and simple) case of
12613 * rename(2) (which has to deal with a lot more complications). It differs
12614 * slightly from rename(2) in that EEXIST is returned if the new name exists.
12615 */
12616 static int
12617 snapshot_rename(int dirfd, user_addr_t old, user_addr_t new,
12618 __unused uint32_t flags, vfs_context_t ctx)
12619 {
12620 vnode_t rvp, snapdvp;
12621 int error, i;
12622 caddr_t newname_buf;
12623 size_t name_len;
12624 vnode_t fvp;
12625 struct nameidata *fromnd, *tond;
12626 /* carving out a chunk for structs that are too big to be on stack. */
12627 struct {
12628 struct nameidata from_node;
12629 struct nameidata to_node;
12630 } * __rename_data;
12631
12632 MALLOC(__rename_data, void *, sizeof(*__rename_data), M_TEMP, M_WAITOK);
12633 fromnd = &__rename_data->from_node;
12634 tond = &__rename_data->to_node;
12635
12636 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, old, fromnd, DELETE,
12637 OP_UNLINK, ctx);
12638 if (error) {
12639 goto out;
12640 }
12641 fvp = fromnd->ni_vp;
12642
12643 MALLOC(newname_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
12644 error = copyinstr(new, newname_buf, MAXPATHLEN, &name_len);
12645 if (error) {
12646 goto out1;
12647 }
12648
12649 /*
12650 * Some sanity checks- new name can't be empty, "." or ".." or have
12651 * slashes.
12652 * (the length returned by copyinstr includes the terminating NUL)
12653 *
12654 * The FS rename VNOP is suppossed to handle this but we'll pick it
12655 * off here itself.
12656 */
12657 if ((name_len == 1) || (name_len == 2 && newname_buf[0] == '.') ||
12658 (name_len == 3 && newname_buf[0] == '.' && newname_buf[1] == '.')) {
12659 error = EINVAL;
12660 goto out1;
12661 }
12662 for (i = 0; i < (int)name_len && newname_buf[i] != '/'; i++) {
12663 ;
12664 }
12665 if (i < (int)name_len) {
12666 error = EINVAL;
12667 goto out1;
12668 }
12669
12670 #if CONFIG_MACF
12671 error = mac_mount_check_snapshot_create(ctx, vnode_mount(rvp),
12672 newname_buf);
12673 if (error) {
12674 goto out1;
12675 }
12676 #endif
12677
12678 NDINIT(tond, RENAME, OP_RENAME, USEDVP | NOCACHE | AUDITVNPATH2,
12679 UIO_SYSSPACE, CAST_USER_ADDR_T(newname_buf), ctx);
12680 tond->ni_dvp = snapdvp;
12681
12682 error = namei(tond);
12683 if (error) {
12684 goto out2;
12685 } else if (tond->ni_vp) {
12686 /*
12687 * snapshot rename behaves differently than rename(2) - if the
12688 * new name exists, EEXIST is returned.
12689 */
12690 vnode_put(tond->ni_vp);
12691 error = EEXIST;
12692 goto out2;
12693 }
12694
12695 error = VNOP_RENAME(snapdvp, fvp, &fromnd->ni_cnd, snapdvp, NULLVP,
12696 &tond->ni_cnd, ctx);
12697
12698 out2:
12699 nameidone(tond);
12700 out1:
12701 FREE(newname_buf, M_TEMP);
12702 vnode_put(fvp);
12703 vnode_put(snapdvp);
12704 vnode_put(rvp);
12705 nameidone(fromnd);
12706 out:
12707 FREE(__rename_data, M_TEMP);
12708 return error;
12709 }
12710
12711 /*
12712 * Mount a Filesystem snapshot
12713 *
12714 * get the vnode for the unnamed snapshot directory and the snapshot and
12715 * mount the snapshot.
12716 */
12717 static int
12718 snapshot_mount(int dirfd, user_addr_t name, user_addr_t directory,
12719 __unused user_addr_t mnt_data, __unused uint32_t flags, vfs_context_t ctx)
12720 {
12721 vnode_t rvp, snapdvp, snapvp, vp, pvp;
12722 int error;
12723 struct nameidata *snapndp, *dirndp;
12724 /* carving out a chunk for structs that are too big to be on stack. */
12725 struct {
12726 struct nameidata snapnd;
12727 struct nameidata dirnd;
12728 } * __snapshot_mount_data;
12729
12730 MALLOC(__snapshot_mount_data, void *, sizeof(*__snapshot_mount_data),
12731 M_TEMP, M_WAITOK);
12732 snapndp = &__snapshot_mount_data->snapnd;
12733 dirndp = &__snapshot_mount_data->dirnd;
12734
12735 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, snapndp, LOOKUP,
12736 OP_LOOKUP, ctx);
12737 if (error) {
12738 goto out;
12739 }
12740
12741 snapvp = snapndp->ni_vp;
12742 if (!vnode_mount(rvp) || (vnode_mount(rvp) == dead_mountp)) {
12743 error = EIO;
12744 goto out1;
12745 }
12746
12747 /* Get the vnode to be covered */
12748 NDINIT(dirndp, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
12749 UIO_USERSPACE, directory, ctx);
12750 error = namei(dirndp);
12751 if (error) {
12752 goto out1;
12753 }
12754
12755 vp = dirndp->ni_vp;
12756 pvp = dirndp->ni_dvp;
12757
12758 if ((vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_ROOTFS)) {
12759 error = EINVAL;
12760 } else {
12761 mount_t mp = vnode_mount(rvp);
12762 struct fs_snapshot_mount_args smnt_data;
12763
12764 smnt_data.sm_mp = mp;
12765 smnt_data.sm_cnp = &snapndp->ni_cnd;
12766 error = mount_common(mp->mnt_vfsstat.f_fstypename, pvp, vp,
12767 &dirndp->ni_cnd, CAST_USER_ADDR_T(&smnt_data), flags & MNT_DONTBROWSE,
12768 KERNEL_MOUNT_SNAPSHOT, NULL, FALSE, ctx);
12769 }
12770
12771 vnode_put(vp);
12772 vnode_put(pvp);
12773 nameidone(dirndp);
12774 out1:
12775 vnode_put(snapvp);
12776 vnode_put(snapdvp);
12777 vnode_put(rvp);
12778 nameidone(snapndp);
12779 out:
12780 FREE(__snapshot_mount_data, M_TEMP);
12781 return error;
12782 }
12783
12784 /*
12785 * Root from a snapshot of the filesystem
12786 *
12787 * Marks the filesystem to root from the given snapshot on next boot.
12788 */
12789 static int
12790 snapshot_root(int dirfd, user_addr_t name, __unused uint32_t flags,
12791 vfs_context_t ctx)
12792 {
12793 int error;
12794 vnode_t rvp;
12795 mount_t mp;
12796 struct fs_snapshot_root_args root_data;
12797 struct componentname cnp;
12798 caddr_t name_buf;
12799 size_t name_len;
12800
12801 error = vnode_getfromfd(ctx, dirfd, &rvp);
12802 if (error) {
12803 return error;
12804 }
12805 mp = vnode_mount(rvp);
12806
12807 MALLOC(name_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
12808 error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
12809 if (error) {
12810 FREE(name_buf, M_TEMP);
12811 vnode_put(rvp);
12812 return error;
12813 }
12814
12815 // XXX MAC checks ?
12816
12817 /*
12818 * Grab mount_iterref so that we can release the vnode,
12819 * since VFSIOC_ROOT_SNAPSHOT could conceivably cause a sync.
12820 */
12821 error = mount_iterref(mp, 0);
12822 vnode_put(rvp);
12823 if (error) {
12824 FREE(name_buf, M_TEMP);
12825 return error;
12826 }
12827
12828 memset(&cnp, 0, sizeof(cnp));
12829 cnp.cn_pnbuf = (char *)name_buf;
12830 cnp.cn_nameiop = LOOKUP;
12831 cnp.cn_flags = ISLASTCN | HASBUF;
12832 cnp.cn_pnlen = MAXPATHLEN;
12833 cnp.cn_nameptr = cnp.cn_pnbuf;
12834 cnp.cn_namelen = (int)name_len;
12835 root_data.sr_cnp = &cnp;
12836
12837 error = VFS_IOCTL(mp, VFSIOC_ROOT_SNAPSHOT, (caddr_t)&root_data, 0, ctx);
12838
12839 mount_iterdrop(mp);
12840 FREE(name_buf, M_TEMP);
12841
12842 return error;
12843 }
12844
12845 /*
12846 * FS snapshot operations dispatcher
12847 */
12848 int
12849 fs_snapshot(__unused proc_t p, struct fs_snapshot_args *uap,
12850 __unused int32_t *retval)
12851 {
12852 int error;
12853 vfs_context_t ctx = vfs_context_current();
12854
12855 AUDIT_ARG(fd, uap->dirfd);
12856 AUDIT_ARG(value32, uap->op);
12857
12858 error = priv_check_cred(vfs_context_ucred(ctx), PRIV_VFS_SNAPSHOT, 0);
12859 if (error) {
12860 return error;
12861 }
12862
12863 /*
12864 * Enforce user authorization for snapshot modification operations
12865 */
12866 if ((uap->op != SNAPSHOT_OP_MOUNT) &&
12867 (uap->op != SNAPSHOT_OP_ROOT)) {
12868 vnode_t dvp = NULLVP;
12869 vnode_t devvp = NULLVP;
12870 mount_t mp;
12871
12872 error = vnode_getfromfd(ctx, uap->dirfd, &dvp);
12873 if (error) {
12874 return error;
12875 }
12876 mp = vnode_mount(dvp);
12877 devvp = mp->mnt_devvp;
12878
12879 /* get an iocount on devvp */
12880 if (devvp == NULLVP) {
12881 error = vnode_lookup(mp->mnt_vfsstat.f_mntfromname, 0, &devvp, ctx);
12882 /* for mounts which arent block devices */
12883 if (error == ENOENT) {
12884 error = ENXIO;
12885 }
12886 } else {
12887 error = vnode_getwithref(devvp);
12888 }
12889
12890 if (error) {
12891 vnode_put(dvp);
12892 return error;
12893 }
12894
12895 if ((vfs_context_issuser(ctx) == 0) &&
12896 (vnode_authorize(devvp, NULL, KAUTH_VNODE_WRITE_DATA, ctx) != 0)) {
12897 error = EPERM;
12898 }
12899 vnode_put(dvp);
12900 vnode_put(devvp);
12901
12902 if (error) {
12903 return error;
12904 }
12905 }
12906
12907 switch (uap->op) {
12908 case SNAPSHOT_OP_CREATE:
12909 error = snapshot_create(uap->dirfd, uap->name1, uap->flags, ctx);
12910 break;
12911 case SNAPSHOT_OP_DELETE:
12912 error = snapshot_delete(uap->dirfd, uap->name1, uap->flags, ctx);
12913 break;
12914 case SNAPSHOT_OP_RENAME:
12915 error = snapshot_rename(uap->dirfd, uap->name1, uap->name2,
12916 uap->flags, ctx);
12917 break;
12918 case SNAPSHOT_OP_MOUNT:
12919 error = snapshot_mount(uap->dirfd, uap->name1, uap->name2,
12920 uap->data, uap->flags, ctx);
12921 break;
12922 case SNAPSHOT_OP_REVERT:
12923 error = snapshot_revert(uap->dirfd, uap->name1, uap->flags, ctx);
12924 break;
12925 #if CONFIG_MNT_ROOTSNAP
12926 case SNAPSHOT_OP_ROOT:
12927 error = snapshot_root(uap->dirfd, uap->name1, uap->flags, ctx);
12928 break;
12929 #endif /* CONFIG_MNT_ROOTSNAP */
12930 default:
12931 error = ENOSYS;
12932 }
12933
12934 return error;
12935 }