]> git.saurik.com Git - apple/xnu.git/blob - bsd/vfs/vfs_syscalls.c
dccc77bd61fddc787e9cffa80600b6c40e86cf55
[apple/xnu.git] / bsd / vfs / vfs_syscalls.c
1 /*
2 * Copyright (c) 1995-2017 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1989, 1993
30 * The Regents of the University of California. All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)vfs_syscalls.c 8.41 (Berkeley) 6/15/95
66 */
67 /*
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
71 * Version 2.0.
72 */
73
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/namei.h>
77 #include <sys/filedesc.h>
78 #include <sys/kernel.h>
79 #include <sys/file_internal.h>
80 #include <sys/stat.h>
81 #include <sys/vnode_internal.h>
82 #include <sys/mount_internal.h>
83 #include <sys/proc_internal.h>
84 #include <sys/kauth.h>
85 #include <sys/uio_internal.h>
86 #include <sys/malloc.h>
87 #include <sys/mman.h>
88 #include <sys/dirent.h>
89 #include <sys/attr.h>
90 #include <sys/sysctl.h>
91 #include <sys/ubc.h>
92 #include <sys/quota.h>
93 #include <sys/kdebug.h>
94 #include <sys/fsevents.h>
95 #include <sys/imgsrc.h>
96 #include <sys/sysproto.h>
97 #include <sys/xattr.h>
98 #include <sys/fcntl.h>
99 #include <sys/fsctl.h>
100 #include <sys/ubc_internal.h>
101 #include <sys/disk.h>
102 #include <sys/content_protection.h>
103 #include <sys/clonefile.h>
104 #include <sys/snapshot.h>
105 #include <sys/priv.h>
106 #include <machine/cons.h>
107 #include <machine/limits.h>
108 #include <miscfs/specfs/specdev.h>
109
110 #include <vfs/vfs_disk_conditioner.h>
111
112 #include <security/audit/audit.h>
113 #include <bsm/audit_kevents.h>
114
115 #include <mach/mach_types.h>
116 #include <kern/kern_types.h>
117 #include <kern/kalloc.h>
118 #include <kern/task.h>
119
120 #include <vm/vm_pageout.h>
121 #include <vm/vm_protos.h>
122
123 #include <libkern/OSAtomic.h>
124 #include <pexpert/pexpert.h>
125 #include <IOKit/IOBSD.h>
126
127 #if ROUTEFS
128 #include <miscfs/routefs/routefs.h>
129 #endif /* ROUTEFS */
130
131 #if CONFIG_MACF
132 #include <security/mac.h>
133 #include <security/mac_framework.h>
134 #endif
135
136 #if CONFIG_FSE
137 #define GET_PATH(x) \
138 (x) = get_pathbuff();
139 #define RELEASE_PATH(x) \
140 release_pathbuff(x);
141 #else
142 #define GET_PATH(x) \
143 MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
144 #define RELEASE_PATH(x) \
145 FREE_ZONE((x), MAXPATHLEN, M_NAMEI);
146 #endif /* CONFIG_FSE */
147
148 #ifndef HFS_GET_BOOT_INFO
149 #define HFS_GET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00004)
150 #endif
151
152 #ifndef HFS_SET_BOOT_INFO
153 #define HFS_SET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00005)
154 #endif
155
156 #ifndef APFSIOC_REVERT_TO_SNAPSHOT
157 #define APFSIOC_REVERT_TO_SNAPSHOT _IOW('J', 1, u_int64_t)
158 #endif
159
160 extern void disk_conditioner_unmount(mount_t mp);
161
162 /* struct for checkdirs iteration */
163 struct cdirargs {
164 vnode_t olddp;
165 vnode_t newdp;
166 };
167 /* callback for checkdirs iteration */
168 static int checkdirs_callback(proc_t p, void * arg);
169
170 static int change_dir(struct nameidata *ndp, vfs_context_t ctx);
171 static int checkdirs(vnode_t olddp, vfs_context_t ctx);
172 void enablequotas(struct mount *mp, vfs_context_t ctx);
173 static int getfsstat_callback(mount_t mp, void * arg);
174 static int getutimes(user_addr_t usrtvp, struct timespec *tsp);
175 static int setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts, int nullflag);
176 static int sync_callback(mount_t, void *);
177 static void hibernate_sync_thread(void *, __unused wait_result_t);
178 static int hibernate_sync_async(int);
179 static int munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
180 user_addr_t bufp, int *sizep, boolean_t is_64_bit,
181 boolean_t partial_copy);
182 static int statfs64_common(struct mount *mp, struct vfsstatfs *sfsp,
183 user_addr_t bufp);
184 static int fsync_common(proc_t p, struct fsync_args *uap, int flags);
185 static int mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
186 struct componentname *cnp, user_addr_t fsmountargs,
187 int flags, uint32_t internal_flags, char *labelstr, boolean_t kernelmount,
188 vfs_context_t ctx);
189 void vfs_notify_mount(vnode_t pdvp);
190
191 int prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth);
192
193 struct fd_vn_data * fg_vn_data_alloc(void);
194
195 /*
196 * Max retries for ENOENT returns from vn_authorize_{rmdir, unlink, rename}
197 * Concurrent lookups (or lookups by ids) on hard links can cause the
198 * vn_getpath (which does not re-enter the filesystem as vn_getpath_fsenter
199 * does) to return ENOENT as the path cannot be returned from the name cache
200 * alone. We have no option but to retry and hope to get one namei->reverse path
201 * generation done without an intervening lookup, lookup by id on the hard link
202 * item. This is only an issue for MAC hooks which cannot reenter the filesystem
203 * which currently are the MAC hooks for rename, unlink and rmdir.
204 */
205 #define MAX_AUTHORIZE_ENOENT_RETRIES 1024
206
207 static int rmdirat_internal(vfs_context_t, int, user_addr_t, enum uio_seg);
208
209 static int fsgetpath_internal(vfs_context_t, int, uint64_t, vm_size_t, caddr_t, int *);
210
211 #ifdef CONFIG_IMGSRC_ACCESS
212 static int authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx);
213 static int place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx);
214 static void undo_place_on_covered_vp(mount_t mp, vnode_t vp);
215 static int mount_begin_update(mount_t mp, vfs_context_t ctx, int flags);
216 static void mount_end_update(mount_t mp);
217 static int relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp, const char *fsname, vfs_context_t ctx, boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index);
218 #endif /* CONFIG_IMGSRC_ACCESS */
219
220 int (*union_dircheckp)(struct vnode **, struct fileproc *, vfs_context_t);
221
222 __private_extern__
223 int sync_internal(void);
224
225 __private_extern__
226 int unlink1(vfs_context_t, vnode_t, user_addr_t, enum uio_seg, int);
227
228 extern lck_grp_t *fd_vn_lck_grp;
229 extern lck_grp_attr_t *fd_vn_lck_grp_attr;
230 extern lck_attr_t *fd_vn_lck_attr;
231
232 /*
233 * incremented each time a mount or unmount operation occurs
234 * used to invalidate the cached value of the rootvp in the
235 * mount structure utilized by cache_lookup_path
236 */
237 uint32_t mount_generation = 0;
238
239 /* counts number of mount and unmount operations */
240 unsigned int vfs_nummntops=0;
241
242 extern const struct fileops vnops;
243 #if CONFIG_APPLEDOUBLE
244 extern errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *);
245 #endif /* CONFIG_APPLEDOUBLE */
246
247 /*
248 * Virtual File System System Calls
249 */
250
251 #if NFSCLIENT || DEVFS || ROUTEFS
252 /*
253 * Private in-kernel mounting spi (NFS only, not exported)
254 */
255 __private_extern__
256 boolean_t
257 vfs_iskernelmount(mount_t mp)
258 {
259 return ((mp->mnt_kern_flag & MNTK_KERNEL_MOUNT) ? TRUE : FALSE);
260 }
261
262 __private_extern__
263 int
264 kernel_mount(char *fstype, vnode_t pvp, vnode_t vp, const char *path,
265 void *data, __unused size_t datalen, int syscall_flags, __unused uint32_t kern_flags, vfs_context_t ctx)
266 {
267 struct nameidata nd;
268 boolean_t did_namei;
269 int error;
270
271 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
272 UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx);
273
274 /*
275 * Get the vnode to be covered if it's not supplied
276 */
277 if (vp == NULLVP) {
278 error = namei(&nd);
279 if (error)
280 return (error);
281 vp = nd.ni_vp;
282 pvp = nd.ni_dvp;
283 did_namei = TRUE;
284 } else {
285 char *pnbuf = CAST_DOWN(char *, path);
286
287 nd.ni_cnd.cn_pnbuf = pnbuf;
288 nd.ni_cnd.cn_pnlen = strlen(pnbuf) + 1;
289 did_namei = FALSE;
290 }
291
292 error = mount_common(fstype, pvp, vp, &nd.ni_cnd, CAST_USER_ADDR_T(data),
293 syscall_flags, kern_flags, NULL, TRUE, ctx);
294
295 if (did_namei) {
296 vnode_put(vp);
297 vnode_put(pvp);
298 nameidone(&nd);
299 }
300
301 return (error);
302 }
303 #endif /* NFSCLIENT || DEVFS */
304
305 /*
306 * Mount a file system.
307 */
308 /* ARGSUSED */
309 int
310 mount(proc_t p, struct mount_args *uap, __unused int32_t *retval)
311 {
312 struct __mac_mount_args muap;
313
314 muap.type = uap->type;
315 muap.path = uap->path;
316 muap.flags = uap->flags;
317 muap.data = uap->data;
318 muap.mac_p = USER_ADDR_NULL;
319 return (__mac_mount(p, &muap, retval));
320 }
321
322 int
323 fmount(__unused proc_t p, struct fmount_args *uap, __unused int32_t *retval)
324 {
325 struct componentname cn;
326 vfs_context_t ctx = vfs_context_current();
327 size_t dummy = 0;
328 int error;
329 int flags = uap->flags;
330 char fstypename[MFSNAMELEN];
331 char *labelstr = NULL; /* regular mount call always sets it to NULL for __mac_mount() */
332 vnode_t pvp;
333 vnode_t vp;
334
335 AUDIT_ARG(fd, uap->fd);
336 AUDIT_ARG(fflags, flags);
337 /* fstypename will get audited by mount_common */
338
339 /* Sanity check the flags */
340 if (flags & (MNT_IMGSRC_BY_INDEX|MNT_ROOTFS)) {
341 return (ENOTSUP);
342 }
343
344 if (flags & MNT_UNION) {
345 return (EPERM);
346 }
347
348 error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy);
349 if (error) {
350 return (error);
351 }
352
353 if ((error = file_vnode(uap->fd, &vp)) != 0) {
354 return (error);
355 }
356
357 if ((error = vnode_getwithref(vp)) != 0) {
358 file_drop(uap->fd);
359 return (error);
360 }
361
362 pvp = vnode_getparent(vp);
363 if (pvp == NULL) {
364 vnode_put(vp);
365 file_drop(uap->fd);
366 return (EINVAL);
367 }
368
369 memset(&cn, 0, sizeof(struct componentname));
370 MALLOC(cn.cn_pnbuf, char *, MAXPATHLEN, M_TEMP, M_WAITOK);
371 cn.cn_pnlen = MAXPATHLEN;
372
373 if((error = vn_getpath(vp, cn.cn_pnbuf, &cn.cn_pnlen)) != 0) {
374 FREE(cn.cn_pnbuf, M_TEMP);
375 vnode_put(pvp);
376 vnode_put(vp);
377 file_drop(uap->fd);
378 return (error);
379 }
380
381 error = mount_common(fstypename, pvp, vp, &cn, uap->data, flags, 0, labelstr, FALSE, ctx);
382
383 FREE(cn.cn_pnbuf, M_TEMP);
384 vnode_put(pvp);
385 vnode_put(vp);
386 file_drop(uap->fd);
387
388 return (error);
389 }
390
391 void
392 vfs_notify_mount(vnode_t pdvp)
393 {
394 vfs_event_signal(NULL, VQ_MOUNT, (intptr_t)NULL);
395 lock_vnode_and_post(pdvp, NOTE_WRITE);
396 }
397
398 /*
399 * __mac_mount:
400 * Mount a file system taking into account MAC label behavior.
401 * See mount(2) man page for more information
402 *
403 * Parameters: p Process requesting the mount
404 * uap User argument descriptor (see below)
405 * retval (ignored)
406 *
407 * Indirect: uap->type Filesystem type
408 * uap->path Path to mount
409 * uap->data Mount arguments
410 * uap->mac_p MAC info
411 * uap->flags Mount flags
412 *
413 *
414 * Returns: 0 Success
415 * !0 Not success
416 */
417 boolean_t root_fs_upgrade_try = FALSE;
418
419 int
420 __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int32_t *retval)
421 {
422 vnode_t pvp = NULL;
423 vnode_t vp = NULL;
424 int need_nameidone = 0;
425 vfs_context_t ctx = vfs_context_current();
426 char fstypename[MFSNAMELEN];
427 struct nameidata nd;
428 size_t dummy=0;
429 char *labelstr = NULL;
430 int flags = uap->flags;
431 int error;
432 #if CONFIG_IMGSRC_ACCESS || CONFIG_MACF
433 boolean_t is_64bit = IS_64BIT_PROCESS(p);
434 #else
435 #pragma unused(p)
436 #endif
437 /*
438 * Get the fs type name from user space
439 */
440 error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy);
441 if (error)
442 return (error);
443
444 /*
445 * Get the vnode to be covered
446 */
447 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
448 UIO_USERSPACE, uap->path, ctx);
449 error = namei(&nd);
450 if (error) {
451 goto out;
452 }
453 need_nameidone = 1;
454 vp = nd.ni_vp;
455 pvp = nd.ni_dvp;
456
457 #ifdef CONFIG_IMGSRC_ACCESS
458 /* Mounting image source cannot be batched with other operations */
459 if (flags == MNT_IMGSRC_BY_INDEX) {
460 error = relocate_imageboot_source(pvp, vp, &nd.ni_cnd, fstypename,
461 ctx, is_64bit, uap->data, (flags == MNT_IMGSRC_BY_INDEX));
462 goto out;
463 }
464 #endif /* CONFIG_IMGSRC_ACCESS */
465
466 #if CONFIG_MACF
467 /*
468 * Get the label string (if any) from user space
469 */
470 if (uap->mac_p != USER_ADDR_NULL) {
471 struct user_mac mac;
472 size_t ulen = 0;
473
474 if (is_64bit) {
475 struct user64_mac mac64;
476 error = copyin(uap->mac_p, &mac64, sizeof(mac64));
477 mac.m_buflen = mac64.m_buflen;
478 mac.m_string = mac64.m_string;
479 } else {
480 struct user32_mac mac32;
481 error = copyin(uap->mac_p, &mac32, sizeof(mac32));
482 mac.m_buflen = mac32.m_buflen;
483 mac.m_string = mac32.m_string;
484 }
485 if (error)
486 goto out;
487 if ((mac.m_buflen > MAC_MAX_LABEL_BUF_LEN) ||
488 (mac.m_buflen < 2)) {
489 error = EINVAL;
490 goto out;
491 }
492 MALLOC(labelstr, char *, mac.m_buflen, M_MACTEMP, M_WAITOK);
493 error = copyinstr(mac.m_string, labelstr, mac.m_buflen, &ulen);
494 if (error) {
495 goto out;
496 }
497 AUDIT_ARG(mac_string, labelstr);
498 }
499 #endif /* CONFIG_MACF */
500
501 AUDIT_ARG(fflags, flags);
502
503 #if SECURE_KERNEL
504 if (flags & MNT_UNION) {
505 /* No union mounts on release kernels */
506 error = EPERM;
507 goto out;
508 }
509 #endif
510
511 if ((vp->v_flag & VROOT) &&
512 (vp->v_mount->mnt_flag & MNT_ROOTFS)) {
513 if (!(flags & MNT_UNION)) {
514 flags |= MNT_UPDATE;
515 }
516 else {
517 /*
518 * For a union mount on '/', treat it as fresh
519 * mount instead of update.
520 * Otherwise, union mouting on '/' used to panic the
521 * system before, since mnt_vnodecovered was found to
522 * be NULL for '/' which is required for unionlookup
523 * after it gets ENOENT on union mount.
524 */
525 flags = (flags & ~(MNT_UPDATE));
526 }
527
528 #if SECURE_KERNEL
529 if ((flags & MNT_RDONLY) == 0) {
530 /* Release kernels are not allowed to mount "/" as rw */
531 error = EPERM;
532 goto out;
533 }
534 #endif
535 /*
536 * See 7392553 for more details on why this check exists.
537 * Suffice to say: If this check is ON and something tries
538 * to mount the rootFS RW, we'll turn off the codesign
539 * bitmap optimization.
540 */
541 #if CHECK_CS_VALIDATION_BITMAP
542 if ((flags & MNT_RDONLY) == 0 ) {
543 root_fs_upgrade_try = TRUE;
544 }
545 #endif
546 }
547
548 error = mount_common(fstypename, pvp, vp, &nd.ni_cnd, uap->data, flags, 0,
549 labelstr, FALSE, ctx);
550
551 out:
552
553 #if CONFIG_MACF
554 if (labelstr)
555 FREE(labelstr, M_MACTEMP);
556 #endif /* CONFIG_MACF */
557
558 if (vp) {
559 vnode_put(vp);
560 }
561 if (pvp) {
562 vnode_put(pvp);
563 }
564 if (need_nameidone) {
565 nameidone(&nd);
566 }
567
568 return (error);
569 }
570
571 /*
572 * common mount implementation (final stage of mounting)
573
574 * Arguments:
575 * fstypename file system type (ie it's vfs name)
576 * pvp parent of covered vnode
577 * vp covered vnode
578 * cnp component name (ie path) of covered vnode
579 * flags generic mount flags
580 * fsmountargs file system specific data
581 * labelstr optional MAC label
582 * kernelmount TRUE for mounts initiated from inside the kernel
583 * ctx caller's context
584 */
585 static int
586 mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
587 struct componentname *cnp, user_addr_t fsmountargs, int flags, uint32_t internal_flags,
588 char *labelstr, boolean_t kernelmount, vfs_context_t ctx)
589 {
590 #if !CONFIG_MACF
591 #pragma unused(labelstr)
592 #endif
593 struct vnode *devvp = NULLVP;
594 struct vnode *device_vnode = NULLVP;
595 #if CONFIG_MACF
596 struct vnode *rvp;
597 #endif
598 struct mount *mp;
599 struct vfstable *vfsp = (struct vfstable *)0;
600 struct proc *p = vfs_context_proc(ctx);
601 int error, flag = 0;
602 user_addr_t devpath = USER_ADDR_NULL;
603 int ronly = 0;
604 int mntalloc = 0;
605 boolean_t vfsp_ref = FALSE;
606 boolean_t is_rwlock_locked = FALSE;
607 boolean_t did_rele = FALSE;
608 boolean_t have_usecount = FALSE;
609
610 /*
611 * Process an update for an existing mount
612 */
613 if (flags & MNT_UPDATE) {
614 if ((vp->v_flag & VROOT) == 0) {
615 error = EINVAL;
616 goto out1;
617 }
618 mp = vp->v_mount;
619
620 /* unmount in progress return error */
621 mount_lock_spin(mp);
622 if (mp->mnt_lflag & MNT_LUNMOUNT) {
623 mount_unlock(mp);
624 error = EBUSY;
625 goto out1;
626 }
627 mount_unlock(mp);
628 lck_rw_lock_exclusive(&mp->mnt_rwlock);
629 is_rwlock_locked = TRUE;
630 /*
631 * We only allow the filesystem to be reloaded if it
632 * is currently mounted read-only.
633 */
634 if ((flags & MNT_RELOAD) &&
635 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
636 error = ENOTSUP;
637 goto out1;
638 }
639
640 /*
641 * If content protection is enabled, update mounts are not
642 * allowed to turn it off.
643 */
644 if ((mp->mnt_flag & MNT_CPROTECT) &&
645 ((flags & MNT_CPROTECT) == 0)) {
646 error = EINVAL;
647 goto out1;
648 }
649
650 #ifdef CONFIG_IMGSRC_ACCESS
651 /* Can't downgrade the backer of the root FS */
652 if ((mp->mnt_kern_flag & MNTK_BACKS_ROOT) &&
653 (!vfs_isrdonly(mp)) && (flags & MNT_RDONLY)) {
654 error = ENOTSUP;
655 goto out1;
656 }
657 #endif /* CONFIG_IMGSRC_ACCESS */
658
659 /*
660 * Only root, or the user that did the original mount is
661 * permitted to update it.
662 */
663 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
664 (error = suser(vfs_context_ucred(ctx), &p->p_acflag))) {
665 goto out1;
666 }
667 #if CONFIG_MACF
668 error = mac_mount_check_remount(ctx, mp);
669 if (error != 0) {
670 goto out1;
671 }
672 #endif
673 /*
674 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
675 * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
676 */
677 if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
678 flags |= MNT_NOSUID | MNT_NODEV;
679 if (mp->mnt_flag & MNT_NOEXEC)
680 flags |= MNT_NOEXEC;
681 }
682 flag = mp->mnt_flag;
683
684
685
686 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
687
688 vfsp = mp->mnt_vtable;
689 goto update;
690 }
691
692 /*
693 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
694 * MNT_NOEXEC if mount point is already MNT_NOEXEC.
695 */
696 if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
697 flags |= MNT_NOSUID | MNT_NODEV;
698 if (vp->v_mount->mnt_flag & MNT_NOEXEC)
699 flags |= MNT_NOEXEC;
700 }
701
702 /* XXXAUDIT: Should we capture the type on the error path as well? */
703 AUDIT_ARG(text, fstypename);
704 mount_list_lock();
705 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
706 if (!strncmp(vfsp->vfc_name, fstypename, MFSNAMELEN)) {
707 vfsp->vfc_refcount++;
708 vfsp_ref = TRUE;
709 break;
710 }
711 mount_list_unlock();
712 if (vfsp == NULL) {
713 error = ENODEV;
714 goto out1;
715 }
716
717 /*
718 * VFC_VFSLOCALARGS is not currently supported for kernel mounts
719 */
720 if (kernelmount && (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS)) {
721 error = EINVAL; /* unsupported request */
722 goto out1;
723 }
724
725 error = prepare_coveredvp(vp, ctx, cnp, fstypename, ((internal_flags & KERNEL_MOUNT_NOAUTH) != 0));
726 if (error != 0) {
727 goto out1;
728 }
729
730 /*
731 * Allocate and initialize the filesystem (mount_t)
732 */
733 MALLOC_ZONE(mp, struct mount *, (u_int32_t)sizeof(struct mount),
734 M_MOUNT, M_WAITOK);
735 bzero((char *)mp, (u_int32_t)sizeof(struct mount));
736 mntalloc = 1;
737
738 /* Initialize the default IO constraints */
739 mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
740 mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
741 mp->mnt_maxsegreadsize = mp->mnt_maxreadcnt;
742 mp->mnt_maxsegwritesize = mp->mnt_maxwritecnt;
743 mp->mnt_devblocksize = DEV_BSIZE;
744 mp->mnt_alignmentmask = PAGE_MASK;
745 mp->mnt_ioqueue_depth = MNT_DEFAULT_IOQUEUE_DEPTH;
746 mp->mnt_ioscale = 1;
747 mp->mnt_ioflags = 0;
748 mp->mnt_realrootvp = NULLVP;
749 mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL;
750
751 TAILQ_INIT(&mp->mnt_vnodelist);
752 TAILQ_INIT(&mp->mnt_workerqueue);
753 TAILQ_INIT(&mp->mnt_newvnodes);
754 mount_lock_init(mp);
755 lck_rw_lock_exclusive(&mp->mnt_rwlock);
756 is_rwlock_locked = TRUE;
757 mp->mnt_op = vfsp->vfc_vfsops;
758 mp->mnt_vtable = vfsp;
759 //mp->mnt_stat.f_type = vfsp->vfc_typenum;
760 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
761 strlcpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN);
762 strlcpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
763 mp->mnt_vnodecovered = vp;
764 mp->mnt_vfsstat.f_owner = kauth_cred_getuid(vfs_context_ucred(ctx));
765 mp->mnt_throttle_mask = LOWPRI_MAX_NUM_DEV - 1;
766 mp->mnt_devbsdunit = 0;
767
768 /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
769 vfs_setowner(mp, KAUTH_UID_NONE, KAUTH_GID_NONE);
770
771 #if NFSCLIENT || DEVFS || ROUTEFS
772 if (kernelmount)
773 mp->mnt_kern_flag |= MNTK_KERNEL_MOUNT;
774 if ((internal_flags & KERNEL_MOUNT_PERMIT_UNMOUNT) != 0)
775 mp->mnt_kern_flag |= MNTK_PERMIT_UNMOUNT;
776 #endif /* NFSCLIENT || DEVFS */
777
778 update:
779
780 /*
781 * Set the mount level flags.
782 */
783 if (flags & MNT_RDONLY)
784 mp->mnt_flag |= MNT_RDONLY;
785 else if (mp->mnt_flag & MNT_RDONLY) {
786 // disallow read/write upgrades of file systems that
787 // had the TYPENAME_OVERRIDE feature set.
788 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
789 error = EPERM;
790 goto out1;
791 }
792 mp->mnt_kern_flag |= MNTK_WANTRDWR;
793 }
794 mp->mnt_flag &= ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
795 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
796 MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
797 MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
798 MNT_QUARANTINE | MNT_CPROTECT);
799
800 #if SECURE_KERNEL
801 #if !CONFIG_MNT_SUID
802 /*
803 * On release builds of iOS based platforms, always enforce NOSUID on
804 * all mounts. We do this here because we can catch update mounts as well as
805 * non-update mounts in this case.
806 */
807 mp->mnt_flag |= (MNT_NOSUID);
808 #endif
809 #endif
810
811 mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
812 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
813 MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
814 MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
815 MNT_QUARANTINE | MNT_CPROTECT);
816
817 #if CONFIG_MACF
818 if (flags & MNT_MULTILABEL) {
819 if (vfsp->vfc_vfsflags & VFC_VFSNOMACLABEL) {
820 error = EINVAL;
821 goto out1;
822 }
823 mp->mnt_flag |= MNT_MULTILABEL;
824 }
825 #endif
826 /*
827 * Process device path for local file systems if requested
828 */
829 if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS &&
830 !(internal_flags & KERNEL_MOUNT_SNAPSHOT)) {
831 if (vfs_context_is64bit(ctx)) {
832 if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
833 goto out1;
834 fsmountargs += sizeof(devpath);
835 } else {
836 user32_addr_t tmp;
837 if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
838 goto out1;
839 /* munge into LP64 addr */
840 devpath = CAST_USER_ADDR_T(tmp);
841 fsmountargs += sizeof(tmp);
842 }
843
844 /* Lookup device and authorize access to it */
845 if ((devpath)) {
846 struct nameidata nd;
847
848 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW, UIO_USERSPACE, devpath, ctx);
849 if ( (error = namei(&nd)) )
850 goto out1;
851
852 strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
853 devvp = nd.ni_vp;
854
855 nameidone(&nd);
856
857 if (devvp->v_type != VBLK) {
858 error = ENOTBLK;
859 goto out2;
860 }
861 if (major(devvp->v_rdev) >= nblkdev) {
862 error = ENXIO;
863 goto out2;
864 }
865 /*
866 * If mount by non-root, then verify that user has necessary
867 * permissions on the device.
868 */
869 if (suser(vfs_context_ucred(ctx), NULL) != 0) {
870 mode_t accessmode = KAUTH_VNODE_READ_DATA;
871
872 if ((mp->mnt_flag & MNT_RDONLY) == 0)
873 accessmode |= KAUTH_VNODE_WRITE_DATA;
874 if ((error = vnode_authorize(devvp, NULL, accessmode, ctx)) != 0)
875 goto out2;
876 }
877 }
878 /* On first mount, preflight and open device */
879 if (devpath && ((flags & MNT_UPDATE) == 0)) {
880 if ( (error = vnode_ref(devvp)) )
881 goto out2;
882 /*
883 * Disallow multiple mounts of the same device.
884 * Disallow mounting of a device that is currently in use
885 * (except for root, which might share swap device for miniroot).
886 * Flush out any old buffers remaining from a previous use.
887 */
888 if ( (error = vfs_mountedon(devvp)) )
889 goto out3;
890
891 if (vcount(devvp) > 1 && !(vfs_flags(mp) & MNT_ROOTFS)) {
892 error = EBUSY;
893 goto out3;
894 }
895 if ( (error = VNOP_FSYNC(devvp, MNT_WAIT, ctx)) ) {
896 error = ENOTBLK;
897 goto out3;
898 }
899 if ( (error = buf_invalidateblks(devvp, BUF_WRITE_DATA, 0, 0)) )
900 goto out3;
901
902 ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
903 #if CONFIG_MACF
904 error = mac_vnode_check_open(ctx,
905 devvp,
906 ronly ? FREAD : FREAD|FWRITE);
907 if (error)
908 goto out3;
909 #endif /* MAC */
910 if ( (error = VNOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, ctx)) )
911 goto out3;
912
913 mp->mnt_devvp = devvp;
914 device_vnode = devvp;
915
916 } else if ((mp->mnt_flag & MNT_RDONLY) &&
917 (mp->mnt_kern_flag & MNTK_WANTRDWR) &&
918 (device_vnode = mp->mnt_devvp)) {
919 dev_t dev;
920 int maj;
921 /*
922 * If upgrade to read-write by non-root, then verify
923 * that user has necessary permissions on the device.
924 */
925 vnode_getalways(device_vnode);
926
927 if (suser(vfs_context_ucred(ctx), NULL) &&
928 (error = vnode_authorize(device_vnode, NULL,
929 KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA,
930 ctx)) != 0) {
931 vnode_put(device_vnode);
932 goto out2;
933 }
934
935 /* Tell the device that we're upgrading */
936 dev = (dev_t)device_vnode->v_rdev;
937 maj = major(dev);
938
939 if ((u_int)maj >= (u_int)nblkdev)
940 panic("Volume mounted on a device with invalid major number.");
941
942 error = bdevsw[maj].d_open(dev, FREAD | FWRITE, S_IFBLK, p);
943 vnode_put(device_vnode);
944 device_vnode = NULLVP;
945 if (error != 0) {
946 goto out2;
947 }
948 }
949 }
950 #if CONFIG_MACF
951 if ((flags & MNT_UPDATE) == 0) {
952 mac_mount_label_init(mp);
953 mac_mount_label_associate(ctx, mp);
954 }
955 if (labelstr) {
956 if ((flags & MNT_UPDATE) != 0) {
957 error = mac_mount_check_label_update(ctx, mp);
958 if (error != 0)
959 goto out3;
960 }
961 }
962 #endif
963 /*
964 * Mount the filesystem.
965 */
966 if (internal_flags & KERNEL_MOUNT_SNAPSHOT) {
967 error = VFS_IOCTL(mp, VFSIOC_MOUNT_SNAPSHOT,
968 (caddr_t)fsmountargs, 0, ctx);
969 } else {
970 error = VFS_MOUNT(mp, device_vnode, fsmountargs, ctx);
971 }
972
973 if (flags & MNT_UPDATE) {
974 if (mp->mnt_kern_flag & MNTK_WANTRDWR)
975 mp->mnt_flag &= ~MNT_RDONLY;
976 mp->mnt_flag &=~
977 (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
978 mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
979 if (error)
980 mp->mnt_flag = flag; /* restore flag value */
981 vfs_event_signal(NULL, VQ_UPDATE, (intptr_t)NULL);
982 lck_rw_done(&mp->mnt_rwlock);
983 is_rwlock_locked = FALSE;
984 if (!error)
985 enablequotas(mp, ctx);
986 goto exit;
987 }
988
989 /*
990 * Put the new filesystem on the mount list after root.
991 */
992 if (error == 0) {
993 struct vfs_attr vfsattr;
994 #if CONFIG_MACF
995 if (vfs_flags(mp) & MNT_MULTILABEL) {
996 error = VFS_ROOT(mp, &rvp, ctx);
997 if (error) {
998 printf("%s() VFS_ROOT returned %d\n", __func__, error);
999 goto out3;
1000 }
1001 error = vnode_label(mp, NULL, rvp, NULL, 0, ctx);
1002 /*
1003 * drop reference provided by VFS_ROOT
1004 */
1005 vnode_put(rvp);
1006
1007 if (error)
1008 goto out3;
1009 }
1010 #endif /* MAC */
1011
1012 vnode_lock_spin(vp);
1013 CLR(vp->v_flag, VMOUNT);
1014 vp->v_mountedhere = mp;
1015 vnode_unlock(vp);
1016
1017 /*
1018 * taking the name_cache_lock exclusively will
1019 * insure that everyone is out of the fast path who
1020 * might be trying to use a now stale copy of
1021 * vp->v_mountedhere->mnt_realrootvp
1022 * bumping mount_generation causes the cached values
1023 * to be invalidated
1024 */
1025 name_cache_lock();
1026 mount_generation++;
1027 name_cache_unlock();
1028
1029 error = vnode_ref(vp);
1030 if (error != 0) {
1031 goto out4;
1032 }
1033
1034 have_usecount = TRUE;
1035
1036 error = checkdirs(vp, ctx);
1037 if (error != 0) {
1038 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1039 goto out4;
1040 }
1041 /*
1042 * there is no cleanup code here so I have made it void
1043 * we need to revisit this
1044 */
1045 (void)VFS_START(mp, 0, ctx);
1046
1047 if (mount_list_add(mp) != 0) {
1048 /*
1049 * The system is shutting down trying to umount
1050 * everything, so fail with a plausible errno.
1051 */
1052 error = EBUSY;
1053 goto out4;
1054 }
1055 lck_rw_done(&mp->mnt_rwlock);
1056 is_rwlock_locked = FALSE;
1057
1058 /* Check if this mounted file system supports EAs or named streams. */
1059 /* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */
1060 VFSATTR_INIT(&vfsattr);
1061 VFSATTR_WANTED(&vfsattr, f_capabilities);
1062 if (strncmp(mp->mnt_vfsstat.f_fstypename, "webdav", sizeof("webdav")) != 0 &&
1063 vfs_getattr(mp, &vfsattr, ctx) == 0 &&
1064 VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) {
1065 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR) &&
1066 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR)) {
1067 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
1068 }
1069 #if NAMEDSTREAMS
1070 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS) &&
1071 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS)) {
1072 mp->mnt_kern_flag |= MNTK_NAMED_STREAMS;
1073 }
1074 #endif
1075 /* Check if this file system supports path from id lookups. */
1076 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID) &&
1077 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID)) {
1078 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
1079 } else if (mp->mnt_flag & MNT_DOVOLFS) {
1080 /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */
1081 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
1082 }
1083
1084 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_DIR_HARDLINKS) &&
1085 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_DIR_HARDLINKS)) {
1086 mp->mnt_kern_flag |= MNTK_DIR_HARDLINKS;
1087 }
1088 }
1089 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSNATIVEXATTR) {
1090 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
1091 }
1092 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSPREFLIGHT) {
1093 mp->mnt_kern_flag |= MNTK_UNMOUNT_PREFLIGHT;
1094 }
1095 /* increment the operations count */
1096 OSAddAtomic(1, &vfs_nummntops);
1097 enablequotas(mp, ctx);
1098
1099 if (device_vnode) {
1100 device_vnode->v_specflags |= SI_MOUNTEDON;
1101
1102 /*
1103 * cache the IO attributes for the underlying physical media...
1104 * an error return indicates the underlying driver doesn't
1105 * support all the queries necessary... however, reasonable
1106 * defaults will have been set, so no reason to bail or care
1107 */
1108 vfs_init_io_attributes(device_vnode, mp);
1109 }
1110
1111 /* Now that mount is setup, notify the listeners */
1112 vfs_notify_mount(pvp);
1113 IOBSDMountChange(mp, kIOMountChangeMount);
1114
1115 } else {
1116 /* If we fail a fresh mount, there should be no vnodes left hooked into the mountpoint. */
1117 if (mp->mnt_vnodelist.tqh_first != NULL) {
1118 panic("mount_common(): mount of %s filesystem failed with %d, but vnode list is not empty.",
1119 mp->mnt_vtable->vfc_name, error);
1120 }
1121
1122 vnode_lock_spin(vp);
1123 CLR(vp->v_flag, VMOUNT);
1124 vnode_unlock(vp);
1125 mount_list_lock();
1126 mp->mnt_vtable->vfc_refcount--;
1127 mount_list_unlock();
1128
1129 if (device_vnode ) {
1130 vnode_rele(device_vnode);
1131 VNOP_CLOSE(device_vnode, ronly ? FREAD : FREAD|FWRITE, ctx);
1132 }
1133 lck_rw_done(&mp->mnt_rwlock);
1134 is_rwlock_locked = FALSE;
1135
1136 /*
1137 * if we get here, we have a mount structure that needs to be freed,
1138 * but since the coveredvp hasn't yet been updated to point at it,
1139 * no need to worry about other threads holding a crossref on this mp
1140 * so it's ok to just free it
1141 */
1142 mount_lock_destroy(mp);
1143 #if CONFIG_MACF
1144 mac_mount_label_destroy(mp);
1145 #endif
1146 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1147 }
1148 exit:
1149 /*
1150 * drop I/O count on the device vp if there was one
1151 */
1152 if (devpath && devvp)
1153 vnode_put(devvp);
1154
1155 return(error);
1156
1157 /* Error condition exits */
1158 out4:
1159 (void)VFS_UNMOUNT(mp, MNT_FORCE, ctx);
1160
1161 /*
1162 * If the mount has been placed on the covered vp,
1163 * it may have been discovered by now, so we have
1164 * to treat this just like an unmount
1165 */
1166 mount_lock_spin(mp);
1167 mp->mnt_lflag |= MNT_LDEAD;
1168 mount_unlock(mp);
1169
1170 if (device_vnode != NULLVP) {
1171 vnode_rele(device_vnode);
1172 VNOP_CLOSE(device_vnode, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
1173 ctx);
1174 did_rele = TRUE;
1175 }
1176
1177 vnode_lock_spin(vp);
1178
1179 mp->mnt_crossref++;
1180 vp->v_mountedhere = (mount_t) 0;
1181
1182 vnode_unlock(vp);
1183
1184 if (have_usecount) {
1185 vnode_rele(vp);
1186 }
1187 out3:
1188 if (devpath && ((flags & MNT_UPDATE) == 0) && (!did_rele))
1189 vnode_rele(devvp);
1190 out2:
1191 if (devpath && devvp)
1192 vnode_put(devvp);
1193 out1:
1194 /* Release mnt_rwlock only when it was taken */
1195 if (is_rwlock_locked == TRUE) {
1196 lck_rw_done(&mp->mnt_rwlock);
1197 }
1198
1199 if (mntalloc) {
1200 if (mp->mnt_crossref)
1201 mount_dropcrossref(mp, vp, 0);
1202 else {
1203 mount_lock_destroy(mp);
1204 #if CONFIG_MACF
1205 mac_mount_label_destroy(mp);
1206 #endif
1207 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1208 }
1209 }
1210 if (vfsp_ref) {
1211 mount_list_lock();
1212 vfsp->vfc_refcount--;
1213 mount_list_unlock();
1214 }
1215
1216 return(error);
1217 }
1218
1219 /*
1220 * Flush in-core data, check for competing mount attempts,
1221 * and set VMOUNT
1222 */
1223 int
1224 prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth)
1225 {
1226 #if !CONFIG_MACF
1227 #pragma unused(cnp,fsname)
1228 #endif
1229 struct vnode_attr va;
1230 int error;
1231
1232 if (!skip_auth) {
1233 /*
1234 * If the user is not root, ensure that they own the directory
1235 * onto which we are attempting to mount.
1236 */
1237 VATTR_INIT(&va);
1238 VATTR_WANTED(&va, va_uid);
1239 if ((error = vnode_getattr(vp, &va, ctx)) ||
1240 (va.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
1241 (!vfs_context_issuser(ctx)))) {
1242 error = EPERM;
1243 goto out;
1244 }
1245 }
1246
1247 if ( (error = VNOP_FSYNC(vp, MNT_WAIT, ctx)) )
1248 goto out;
1249
1250 if ( (error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0)) )
1251 goto out;
1252
1253 if (vp->v_type != VDIR) {
1254 error = ENOTDIR;
1255 goto out;
1256 }
1257
1258 if (ISSET(vp->v_flag, VMOUNT) && (vp->v_mountedhere != NULL)) {
1259 error = EBUSY;
1260 goto out;
1261 }
1262
1263 #if CONFIG_MACF
1264 error = mac_mount_check_mount(ctx, vp,
1265 cnp, fsname);
1266 if (error != 0)
1267 goto out;
1268 #endif
1269
1270 vnode_lock_spin(vp);
1271 SET(vp->v_flag, VMOUNT);
1272 vnode_unlock(vp);
1273
1274 out:
1275 return error;
1276 }
1277
1278 #if CONFIG_IMGSRC_ACCESS
1279
1280 #if DEBUG
1281 #define IMGSRC_DEBUG(args...) printf(args)
1282 #else
1283 #define IMGSRC_DEBUG(args...) do { } while(0)
1284 #endif
1285
1286 static int
1287 authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx)
1288 {
1289 struct nameidata nd;
1290 vnode_t vp, realdevvp;
1291 mode_t accessmode;
1292 int error;
1293
1294 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW, UIO_USERSPACE, devpath, ctx);
1295 if ( (error = namei(&nd)) ) {
1296 IMGSRC_DEBUG("namei() failed with %d\n", error);
1297 return error;
1298 }
1299
1300 vp = nd.ni_vp;
1301
1302 if (!vnode_isblk(vp)) {
1303 IMGSRC_DEBUG("Not block device.\n");
1304 error = ENOTBLK;
1305 goto out;
1306 }
1307
1308 realdevvp = mp->mnt_devvp;
1309 if (realdevvp == NULLVP) {
1310 IMGSRC_DEBUG("No device backs the mount.\n");
1311 error = ENXIO;
1312 goto out;
1313 }
1314
1315 error = vnode_getwithref(realdevvp);
1316 if (error != 0) {
1317 IMGSRC_DEBUG("Coudn't get iocount on device.\n");
1318 goto out;
1319 }
1320
1321 if (vnode_specrdev(vp) != vnode_specrdev(realdevvp)) {
1322 IMGSRC_DEBUG("Wrong dev_t.\n");
1323 error = ENXIO;
1324 goto out1;
1325 }
1326
1327 strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
1328
1329 /*
1330 * If mount by non-root, then verify that user has necessary
1331 * permissions on the device.
1332 */
1333 if (!vfs_context_issuser(ctx)) {
1334 accessmode = KAUTH_VNODE_READ_DATA;
1335 if ((mp->mnt_flag & MNT_RDONLY) == 0)
1336 accessmode |= KAUTH_VNODE_WRITE_DATA;
1337 if ((error = vnode_authorize(vp, NULL, accessmode, ctx)) != 0) {
1338 IMGSRC_DEBUG("Access denied.\n");
1339 goto out1;
1340 }
1341 }
1342
1343 *devvpp = vp;
1344
1345 out1:
1346 vnode_put(realdevvp);
1347 out:
1348 nameidone(&nd);
1349 if (error) {
1350 vnode_put(vp);
1351 }
1352
1353 return error;
1354 }
1355
1356 /*
1357 * Clear VMOUNT, set v_mountedhere, and mnt_vnodecovered, ref the vnode,
1358 * and call checkdirs()
1359 */
1360 static int
1361 place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx)
1362 {
1363 int error;
1364
1365 mp->mnt_vnodecovered = vp; /* XXX This is normally only set at init-time ... */
1366
1367 vnode_lock_spin(vp);
1368 CLR(vp->v_flag, VMOUNT);
1369 vp->v_mountedhere = mp;
1370 vnode_unlock(vp);
1371
1372 /*
1373 * taking the name_cache_lock exclusively will
1374 * insure that everyone is out of the fast path who
1375 * might be trying to use a now stale copy of
1376 * vp->v_mountedhere->mnt_realrootvp
1377 * bumping mount_generation causes the cached values
1378 * to be invalidated
1379 */
1380 name_cache_lock();
1381 mount_generation++;
1382 name_cache_unlock();
1383
1384 error = vnode_ref(vp);
1385 if (error != 0) {
1386 goto out;
1387 }
1388
1389 error = checkdirs(vp, ctx);
1390 if (error != 0) {
1391 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1392 vnode_rele(vp);
1393 goto out;
1394 }
1395
1396 out:
1397 if (error != 0) {
1398 mp->mnt_vnodecovered = NULLVP;
1399 }
1400 return error;
1401 }
1402
1403 static void
1404 undo_place_on_covered_vp(mount_t mp, vnode_t vp)
1405 {
1406 vnode_rele(vp);
1407 vnode_lock_spin(vp);
1408 vp->v_mountedhere = (mount_t)NULL;
1409 vnode_unlock(vp);
1410
1411 mp->mnt_vnodecovered = NULLVP;
1412 }
1413
1414 static int
1415 mount_begin_update(mount_t mp, vfs_context_t ctx, int flags)
1416 {
1417 int error;
1418
1419 /* unmount in progress return error */
1420 mount_lock_spin(mp);
1421 if (mp->mnt_lflag & MNT_LUNMOUNT) {
1422 mount_unlock(mp);
1423 return EBUSY;
1424 }
1425 mount_unlock(mp);
1426 lck_rw_lock_exclusive(&mp->mnt_rwlock);
1427
1428 /*
1429 * We only allow the filesystem to be reloaded if it
1430 * is currently mounted read-only.
1431 */
1432 if ((flags & MNT_RELOAD) &&
1433 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
1434 error = ENOTSUP;
1435 goto out;
1436 }
1437
1438 /*
1439 * Only root, or the user that did the original mount is
1440 * permitted to update it.
1441 */
1442 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
1443 (!vfs_context_issuser(ctx))) {
1444 error = EPERM;
1445 goto out;
1446 }
1447 #if CONFIG_MACF
1448 error = mac_mount_check_remount(ctx, mp);
1449 if (error != 0) {
1450 goto out;
1451 }
1452 #endif
1453
1454 out:
1455 if (error) {
1456 lck_rw_done(&mp->mnt_rwlock);
1457 }
1458
1459 return error;
1460 }
1461
1462 static void
1463 mount_end_update(mount_t mp)
1464 {
1465 lck_rw_done(&mp->mnt_rwlock);
1466 }
1467
1468 static int
1469 get_imgsrc_rootvnode(uint32_t height, vnode_t *rvpp)
1470 {
1471 vnode_t vp;
1472
1473 if (height >= MAX_IMAGEBOOT_NESTING) {
1474 return EINVAL;
1475 }
1476
1477 vp = imgsrc_rootvnodes[height];
1478 if ((vp != NULLVP) && (vnode_get(vp) == 0)) {
1479 *rvpp = vp;
1480 return 0;
1481 } else {
1482 return ENOENT;
1483 }
1484 }
1485
1486 static int
1487 relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp,
1488 const char *fsname, vfs_context_t ctx,
1489 boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index)
1490 {
1491 int error;
1492 mount_t mp;
1493 boolean_t placed = FALSE;
1494 vnode_t devvp = NULLVP;
1495 struct vfstable *vfsp;
1496 user_addr_t devpath;
1497 char *old_mntonname;
1498 vnode_t rvp;
1499 uint32_t height;
1500 uint32_t flags;
1501
1502 /* If we didn't imageboot, nothing to move */
1503 if (imgsrc_rootvnodes[0] == NULLVP) {
1504 return EINVAL;
1505 }
1506
1507 /* Only root can do this */
1508 if (!vfs_context_issuser(ctx)) {
1509 return EPERM;
1510 }
1511
1512 IMGSRC_DEBUG("looking for root vnode.\n");
1513
1514 /*
1515 * Get root vnode of filesystem we're moving.
1516 */
1517 if (by_index) {
1518 if (is64bit) {
1519 struct user64_mnt_imgsrc_args mia64;
1520 error = copyin(fsmountargs, &mia64, sizeof(mia64));
1521 if (error != 0) {
1522 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1523 return error;
1524 }
1525
1526 height = mia64.mi_height;
1527 flags = mia64.mi_flags;
1528 devpath = mia64.mi_devpath;
1529 } else {
1530 struct user32_mnt_imgsrc_args mia32;
1531 error = copyin(fsmountargs, &mia32, sizeof(mia32));
1532 if (error != 0) {
1533 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1534 return error;
1535 }
1536
1537 height = mia32.mi_height;
1538 flags = mia32.mi_flags;
1539 devpath = mia32.mi_devpath;
1540 }
1541 } else {
1542 /*
1543 * For binary compatibility--assumes one level of nesting.
1544 */
1545 if (is64bit) {
1546 if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
1547 return error;
1548 } else {
1549 user32_addr_t tmp;
1550 if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
1551 return error;
1552
1553 /* munge into LP64 addr */
1554 devpath = CAST_USER_ADDR_T(tmp);
1555 }
1556
1557 height = 0;
1558 flags = 0;
1559 }
1560
1561 if (flags != 0) {
1562 IMGSRC_DEBUG("%s: Got nonzero flags.\n", __FUNCTION__);
1563 return EINVAL;
1564 }
1565
1566 error = get_imgsrc_rootvnode(height, &rvp);
1567 if (error != 0) {
1568 IMGSRC_DEBUG("getting root vnode failed with %d\n", error);
1569 return error;
1570 }
1571
1572 IMGSRC_DEBUG("got root vnode.\n");
1573
1574 MALLOC(old_mntonname, char*, MAXPATHLEN, M_TEMP, M_WAITOK);
1575
1576 /* Can only move once */
1577 mp = vnode_mount(rvp);
1578 if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
1579 IMGSRC_DEBUG("Already moved.\n");
1580 error = EBUSY;
1581 goto out0;
1582 }
1583
1584 IMGSRC_DEBUG("Starting updated.\n");
1585
1586 /* Get exclusive rwlock on mount, authorize update on mp */
1587 error = mount_begin_update(mp , ctx, 0);
1588 if (error != 0) {
1589 IMGSRC_DEBUG("Starting updated failed with %d\n", error);
1590 goto out0;
1591 }
1592
1593 /*
1594 * It can only be moved once. Flag is set under the rwlock,
1595 * so we're now safe to proceed.
1596 */
1597 if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
1598 IMGSRC_DEBUG("Already moved [2]\n");
1599 goto out1;
1600 }
1601
1602
1603 IMGSRC_DEBUG("Preparing coveredvp.\n");
1604
1605 /* Mark covered vnode as mount in progress, authorize placing mount on top */
1606 error = prepare_coveredvp(vp, ctx, cnp, fsname, FALSE);
1607 if (error != 0) {
1608 IMGSRC_DEBUG("Preparing coveredvp failed with %d.\n", error);
1609 goto out1;
1610 }
1611
1612 IMGSRC_DEBUG("Covered vp OK.\n");
1613
1614 /* Sanity check the name caller has provided */
1615 vfsp = mp->mnt_vtable;
1616 if (strncmp(vfsp->vfc_name, fsname, MFSNAMELEN) != 0) {
1617 IMGSRC_DEBUG("Wrong fs name.\n");
1618 error = EINVAL;
1619 goto out2;
1620 }
1621
1622 /* Check the device vnode and update mount-from name, for local filesystems */
1623 if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
1624 IMGSRC_DEBUG("Local, doing device validation.\n");
1625
1626 if (devpath != USER_ADDR_NULL) {
1627 error = authorize_devpath_and_update_mntfromname(mp, devpath, &devvp, ctx);
1628 if (error) {
1629 IMGSRC_DEBUG("authorize_devpath_and_update_mntfromname() failed.\n");
1630 goto out2;
1631 }
1632
1633 vnode_put(devvp);
1634 }
1635 }
1636
1637 /*
1638 * Place mp on top of vnode, ref the vnode, call checkdirs(),
1639 * and increment the name cache's mount generation
1640 */
1641
1642 IMGSRC_DEBUG("About to call place_mount_and_checkdirs().\n");
1643 error = place_mount_and_checkdirs(mp, vp, ctx);
1644 if (error != 0) {
1645 goto out2;
1646 }
1647
1648 placed = TRUE;
1649
1650 strlcpy(old_mntonname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN);
1651 strlcpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
1652
1653 /* Forbid future moves */
1654 mount_lock(mp);
1655 mp->mnt_kern_flag |= MNTK_HAS_MOVED;
1656 mount_unlock(mp);
1657
1658 /* Finally, add to mount list, completely ready to go */
1659 if (mount_list_add(mp) != 0) {
1660 /*
1661 * The system is shutting down trying to umount
1662 * everything, so fail with a plausible errno.
1663 */
1664 error = EBUSY;
1665 goto out3;
1666 }
1667
1668 mount_end_update(mp);
1669 vnode_put(rvp);
1670 FREE(old_mntonname, M_TEMP);
1671
1672 vfs_notify_mount(pvp);
1673
1674 return 0;
1675 out3:
1676 strlcpy(mp->mnt_vfsstat.f_mntonname, old_mntonname, MAXPATHLEN);
1677
1678 mount_lock(mp);
1679 mp->mnt_kern_flag &= ~(MNTK_HAS_MOVED);
1680 mount_unlock(mp);
1681
1682 out2:
1683 /*
1684 * Placing the mp on the vnode clears VMOUNT,
1685 * so cleanup is different after that point
1686 */
1687 if (placed) {
1688 /* Rele the vp, clear VMOUNT and v_mountedhere */
1689 undo_place_on_covered_vp(mp, vp);
1690 } else {
1691 vnode_lock_spin(vp);
1692 CLR(vp->v_flag, VMOUNT);
1693 vnode_unlock(vp);
1694 }
1695 out1:
1696 mount_end_update(mp);
1697
1698 out0:
1699 vnode_put(rvp);
1700 FREE(old_mntonname, M_TEMP);
1701 return error;
1702 }
1703
1704 #endif /* CONFIG_IMGSRC_ACCESS */
1705
1706 void
1707 enablequotas(struct mount *mp, vfs_context_t ctx)
1708 {
1709 struct nameidata qnd;
1710 int type;
1711 char qfpath[MAXPATHLEN];
1712 const char *qfname = QUOTAFILENAME;
1713 const char *qfopsname = QUOTAOPSNAME;
1714 const char *qfextension[] = INITQFNAMES;
1715
1716 /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
1717 if (strncmp(mp->mnt_vfsstat.f_fstypename, "hfs", sizeof("hfs")) != 0 ) {
1718 return;
1719 }
1720 /*
1721 * Enable filesystem disk quotas if necessary.
1722 * We ignore errors as this should not interfere with final mount
1723 */
1724 for (type=0; type < MAXQUOTAS; type++) {
1725 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfopsname, qfextension[type]);
1726 NDINIT(&qnd, LOOKUP, OP_MOUNT, FOLLOW, UIO_SYSSPACE,
1727 CAST_USER_ADDR_T(qfpath), ctx);
1728 if (namei(&qnd) != 0)
1729 continue; /* option file to trigger quotas is not present */
1730 vnode_put(qnd.ni_vp);
1731 nameidone(&qnd);
1732 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfname, qfextension[type]);
1733
1734 (void) VFS_QUOTACTL(mp, QCMD(Q_QUOTAON, type), 0, qfpath, ctx);
1735 }
1736 return;
1737 }
1738
1739
1740 static int
1741 checkdirs_callback(proc_t p, void * arg)
1742 {
1743 struct cdirargs * cdrp = (struct cdirargs * )arg;
1744 vnode_t olddp = cdrp->olddp;
1745 vnode_t newdp = cdrp->newdp;
1746 struct filedesc *fdp;
1747 vnode_t tvp;
1748 vnode_t fdp_cvp;
1749 vnode_t fdp_rvp;
1750 int cdir_changed = 0;
1751 int rdir_changed = 0;
1752
1753 /*
1754 * XXX Also needs to iterate each thread in the process to see if it
1755 * XXX is using a per-thread current working directory, and, if so,
1756 * XXX update that as well.
1757 */
1758
1759 proc_fdlock(p);
1760 fdp = p->p_fd;
1761 if (fdp == (struct filedesc *)0) {
1762 proc_fdunlock(p);
1763 return(PROC_RETURNED);
1764 }
1765 fdp_cvp = fdp->fd_cdir;
1766 fdp_rvp = fdp->fd_rdir;
1767 proc_fdunlock(p);
1768
1769 if (fdp_cvp == olddp) {
1770 vnode_ref(newdp);
1771 tvp = fdp->fd_cdir;
1772 fdp_cvp = newdp;
1773 cdir_changed = 1;
1774 vnode_rele(tvp);
1775 }
1776 if (fdp_rvp == olddp) {
1777 vnode_ref(newdp);
1778 tvp = fdp->fd_rdir;
1779 fdp_rvp = newdp;
1780 rdir_changed = 1;
1781 vnode_rele(tvp);
1782 }
1783 if (cdir_changed || rdir_changed) {
1784 proc_fdlock(p);
1785 fdp->fd_cdir = fdp_cvp;
1786 fdp->fd_rdir = fdp_rvp;
1787 proc_fdunlock(p);
1788 }
1789 return(PROC_RETURNED);
1790 }
1791
1792
1793
1794 /*
1795 * Scan all active processes to see if any of them have a current
1796 * or root directory onto which the new filesystem has just been
1797 * mounted. If so, replace them with the new mount point.
1798 */
1799 static int
1800 checkdirs(vnode_t olddp, vfs_context_t ctx)
1801 {
1802 vnode_t newdp;
1803 vnode_t tvp;
1804 int err;
1805 struct cdirargs cdr;
1806
1807 if (olddp->v_usecount == 1)
1808 return(0);
1809 err = VFS_ROOT(olddp->v_mountedhere, &newdp, ctx);
1810
1811 if (err != 0) {
1812 #if DIAGNOSTIC
1813 panic("mount: lost mount: error %d", err);
1814 #endif
1815 return(err);
1816 }
1817
1818 cdr.olddp = olddp;
1819 cdr.newdp = newdp;
1820 /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
1821 proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS, checkdirs_callback, (void *)&cdr, NULL, NULL);
1822
1823 if (rootvnode == olddp) {
1824 vnode_ref(newdp);
1825 tvp = rootvnode;
1826 rootvnode = newdp;
1827 vnode_rele(tvp);
1828 }
1829
1830 vnode_put(newdp);
1831 return(0);
1832 }
1833
1834 /*
1835 * Unmount a file system.
1836 *
1837 * Note: unmount takes a path to the vnode mounted on as argument,
1838 * not special file (as before).
1839 */
1840 /* ARGSUSED */
1841 int
1842 unmount(__unused proc_t p, struct unmount_args *uap, __unused int32_t *retval)
1843 {
1844 vnode_t vp;
1845 struct mount *mp;
1846 int error;
1847 struct nameidata nd;
1848 vfs_context_t ctx = vfs_context_current();
1849
1850 NDINIT(&nd, LOOKUP, OP_UNMOUNT, FOLLOW | AUDITVNPATH1,
1851 UIO_USERSPACE, uap->path, ctx);
1852 error = namei(&nd);
1853 if (error)
1854 return (error);
1855 vp = nd.ni_vp;
1856 mp = vp->v_mount;
1857 nameidone(&nd);
1858
1859 #if CONFIG_MACF
1860 error = mac_mount_check_umount(ctx, mp);
1861 if (error != 0) {
1862 vnode_put(vp);
1863 return (error);
1864 }
1865 #endif
1866 /*
1867 * Must be the root of the filesystem
1868 */
1869 if ((vp->v_flag & VROOT) == 0) {
1870 vnode_put(vp);
1871 return (EINVAL);
1872 }
1873 mount_ref(mp, 0);
1874 vnode_put(vp);
1875 /* safedounmount consumes the mount ref */
1876 return (safedounmount(mp, uap->flags, ctx));
1877 }
1878
1879 int
1880 vfs_unmountbyfsid(fsid_t *fsid, int flags, vfs_context_t ctx)
1881 {
1882 mount_t mp;
1883
1884 mp = mount_list_lookupby_fsid(fsid, 0, 1);
1885 if (mp == (mount_t)0) {
1886 return(ENOENT);
1887 }
1888 mount_ref(mp, 0);
1889 mount_iterdrop(mp);
1890 /* safedounmount consumes the mount ref */
1891 return(safedounmount(mp, flags, ctx));
1892 }
1893
1894
1895 /*
1896 * The mount struct comes with a mount ref which will be consumed.
1897 * Do the actual file system unmount, prevent some common foot shooting.
1898 */
1899 int
1900 safedounmount(struct mount *mp, int flags, vfs_context_t ctx)
1901 {
1902 int error;
1903 proc_t p = vfs_context_proc(ctx);
1904
1905 /*
1906 * If the file system is not responding and MNT_NOBLOCK
1907 * is set and not a forced unmount then return EBUSY.
1908 */
1909 if ((mp->mnt_kern_flag & MNT_LNOTRESP) &&
1910 (flags & MNT_NOBLOCK) && ((flags & MNT_FORCE) == 0)) {
1911 error = EBUSY;
1912 goto out;
1913 }
1914
1915 /*
1916 * Skip authorization if the mount is tagged as permissive and
1917 * this is not a forced-unmount attempt.
1918 */
1919 if (!(((mp->mnt_kern_flag & MNTK_PERMIT_UNMOUNT) != 0) && ((flags & MNT_FORCE) == 0))) {
1920 /*
1921 * Only root, or the user that did the original mount is
1922 * permitted to unmount this filesystem.
1923 */
1924 if ((mp->mnt_vfsstat.f_owner != kauth_cred_getuid(kauth_cred_get())) &&
1925 (error = suser(kauth_cred_get(), &p->p_acflag)))
1926 goto out;
1927 }
1928 /*
1929 * Don't allow unmounting the root file system.
1930 */
1931 if (mp->mnt_flag & MNT_ROOTFS) {
1932 error = EBUSY; /* the root is always busy */
1933 goto out;
1934 }
1935
1936 #ifdef CONFIG_IMGSRC_ACCESS
1937 if (mp->mnt_kern_flag & MNTK_BACKS_ROOT) {
1938 error = EBUSY;
1939 goto out;
1940 }
1941 #endif /* CONFIG_IMGSRC_ACCESS */
1942
1943 return (dounmount(mp, flags, 1, ctx));
1944
1945 out:
1946 mount_drop(mp, 0);
1947 return(error);
1948 }
1949
1950 /*
1951 * Do the actual file system unmount.
1952 */
1953 int
1954 dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx)
1955 {
1956 vnode_t coveredvp = (vnode_t)0;
1957 int error;
1958 int needwakeup = 0;
1959 int forcedunmount = 0;
1960 int lflags = 0;
1961 struct vnode *devvp = NULLVP;
1962 #if CONFIG_TRIGGERS
1963 proc_t p = vfs_context_proc(ctx);
1964 int did_vflush = 0;
1965 int pflags_save = 0;
1966 #endif /* CONFIG_TRIGGERS */
1967
1968 #if CONFIG_FSE
1969 if (!(flags & MNT_FORCE)) {
1970 fsevent_unmount(mp, ctx); /* has to come first! */
1971 }
1972 #endif
1973
1974 mount_lock(mp);
1975
1976 /*
1977 * If already an unmount in progress just return EBUSY.
1978 * Even a forced unmount cannot override.
1979 */
1980 if (mp->mnt_lflag & MNT_LUNMOUNT) {
1981 if (withref != 0)
1982 mount_drop(mp, 1);
1983 mount_unlock(mp);
1984 return (EBUSY);
1985 }
1986
1987 if (flags & MNT_FORCE) {
1988 forcedunmount = 1;
1989 mp->mnt_lflag |= MNT_LFORCE;
1990 }
1991
1992 #if CONFIG_TRIGGERS
1993 if (flags & MNT_NOBLOCK && p != kernproc)
1994 pflags_save = OSBitOrAtomic(P_NOREMOTEHANG, &p->p_flag);
1995 #endif
1996
1997 mp->mnt_kern_flag |= MNTK_UNMOUNT;
1998 mp->mnt_lflag |= MNT_LUNMOUNT;
1999 mp->mnt_flag &=~ MNT_ASYNC;
2000 /*
2001 * anyone currently in the fast path that
2002 * trips over the cached rootvp will be
2003 * dumped out and forced into the slow path
2004 * to regenerate a new cached value
2005 */
2006 mp->mnt_realrootvp = NULLVP;
2007 mount_unlock(mp);
2008
2009 if (forcedunmount && (flags & MNT_LNOSUB) == 0) {
2010 /*
2011 * Force unmount any mounts in this filesystem.
2012 * If any unmounts fail - just leave them dangling.
2013 * Avoids recursion.
2014 */
2015 (void) dounmount_submounts(mp, flags | MNT_LNOSUB, ctx);
2016 }
2017
2018 /*
2019 * taking the name_cache_lock exclusively will
2020 * insure that everyone is out of the fast path who
2021 * might be trying to use a now stale copy of
2022 * vp->v_mountedhere->mnt_realrootvp
2023 * bumping mount_generation causes the cached values
2024 * to be invalidated
2025 */
2026 name_cache_lock();
2027 mount_generation++;
2028 name_cache_unlock();
2029
2030
2031 lck_rw_lock_exclusive(&mp->mnt_rwlock);
2032 if (withref != 0)
2033 mount_drop(mp, 0);
2034 error = 0;
2035 if (forcedunmount == 0) {
2036 ubc_umount(mp); /* release cached vnodes */
2037 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
2038 error = VFS_SYNC(mp, MNT_WAIT, ctx);
2039 if (error) {
2040 mount_lock(mp);
2041 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
2042 mp->mnt_lflag &= ~MNT_LUNMOUNT;
2043 mp->mnt_lflag &= ~MNT_LFORCE;
2044 goto out;
2045 }
2046 }
2047 }
2048
2049 /* free disk_conditioner_info structure for this mount */
2050 disk_conditioner_unmount(mp);
2051
2052 IOBSDMountChange(mp, kIOMountChangeUnmount);
2053
2054 #if CONFIG_TRIGGERS
2055 vfs_nested_trigger_unmounts(mp, flags, ctx);
2056 did_vflush = 1;
2057 #endif
2058 if (forcedunmount)
2059 lflags |= FORCECLOSE;
2060 error = vflush(mp, NULLVP, SKIPSWAP | SKIPSYSTEM | SKIPROOT | lflags);
2061 if ((forcedunmount == 0) && error) {
2062 mount_lock(mp);
2063 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
2064 mp->mnt_lflag &= ~MNT_LUNMOUNT;
2065 mp->mnt_lflag &= ~MNT_LFORCE;
2066 goto out;
2067 }
2068
2069 /* make sure there are no one in the mount iterations or lookup */
2070 mount_iterdrain(mp);
2071
2072 error = VFS_UNMOUNT(mp, flags, ctx);
2073 if (error) {
2074 mount_iterreset(mp);
2075 mount_lock(mp);
2076 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
2077 mp->mnt_lflag &= ~MNT_LUNMOUNT;
2078 mp->mnt_lflag &= ~MNT_LFORCE;
2079 goto out;
2080 }
2081
2082 /* increment the operations count */
2083 if (!error)
2084 OSAddAtomic(1, &vfs_nummntops);
2085
2086 if ( mp->mnt_devvp && mp->mnt_vtable->vfc_vfsflags & VFC_VFSLOCALARGS) {
2087 /* hold an io reference and drop the usecount before close */
2088 devvp = mp->mnt_devvp;
2089 vnode_getalways(devvp);
2090 vnode_rele(devvp);
2091 VNOP_CLOSE(devvp, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
2092 ctx);
2093 vnode_clearmountedon(devvp);
2094 vnode_put(devvp);
2095 }
2096 lck_rw_done(&mp->mnt_rwlock);
2097 mount_list_remove(mp);
2098 lck_rw_lock_exclusive(&mp->mnt_rwlock);
2099
2100 /* mark the mount point hook in the vp but not drop the ref yet */
2101 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
2102 /*
2103 * The covered vnode needs special handling. Trying to get an
2104 * iocount must not block here as this may lead to deadlocks
2105 * if the Filesystem to which the covered vnode belongs is
2106 * undergoing forced unmounts. Since we hold a usecount, the
2107 * vnode cannot be reused (it can, however, still be terminated)
2108 */
2109 vnode_getalways(coveredvp);
2110 vnode_lock_spin(coveredvp);
2111
2112 mp->mnt_crossref++;
2113 coveredvp->v_mountedhere = (struct mount *)0;
2114 CLR(coveredvp->v_flag, VMOUNT);
2115
2116 vnode_unlock(coveredvp);
2117 vnode_put(coveredvp);
2118 }
2119
2120 mount_list_lock();
2121 mp->mnt_vtable->vfc_refcount--;
2122 mount_list_unlock();
2123
2124 cache_purgevfs(mp); /* remove cache entries for this file sys */
2125 vfs_event_signal(NULL, VQ_UNMOUNT, (intptr_t)NULL);
2126 mount_lock(mp);
2127 mp->mnt_lflag |= MNT_LDEAD;
2128
2129 if (mp->mnt_lflag & MNT_LWAIT) {
2130 /*
2131 * do the wakeup here
2132 * in case we block in mount_refdrain
2133 * which will drop the mount lock
2134 * and allow anyone blocked in vfs_busy
2135 * to wakeup and see the LDEAD state
2136 */
2137 mp->mnt_lflag &= ~MNT_LWAIT;
2138 wakeup((caddr_t)mp);
2139 }
2140 mount_refdrain(mp);
2141 out:
2142 if (mp->mnt_lflag & MNT_LWAIT) {
2143 mp->mnt_lflag &= ~MNT_LWAIT;
2144 needwakeup = 1;
2145 }
2146
2147 #if CONFIG_TRIGGERS
2148 if (flags & MNT_NOBLOCK && p != kernproc) {
2149 // Restore P_NOREMOTEHANG bit to its previous value
2150 if ((pflags_save & P_NOREMOTEHANG) == 0)
2151 OSBitAndAtomic(~((uint32_t) P_NOREMOTEHANG), &p->p_flag);
2152 }
2153
2154 /*
2155 * Callback and context are set together under the mount lock, and
2156 * never cleared, so we're safe to examine them here, drop the lock,
2157 * and call out.
2158 */
2159 if (mp->mnt_triggercallback != NULL) {
2160 mount_unlock(mp);
2161 if (error == 0) {
2162 mp->mnt_triggercallback(mp, VTC_RELEASE, mp->mnt_triggerdata, ctx);
2163 } else if (did_vflush) {
2164 mp->mnt_triggercallback(mp, VTC_REPLACE, mp->mnt_triggerdata, ctx);
2165 }
2166 } else {
2167 mount_unlock(mp);
2168 }
2169 #else
2170 mount_unlock(mp);
2171 #endif /* CONFIG_TRIGGERS */
2172
2173 lck_rw_done(&mp->mnt_rwlock);
2174
2175 if (needwakeup)
2176 wakeup((caddr_t)mp);
2177
2178 if (!error) {
2179 if ((coveredvp != NULLVP)) {
2180 vnode_t pvp = NULLVP;
2181
2182 /*
2183 * The covered vnode needs special handling. Trying to
2184 * get an iocount must not block here as this may lead
2185 * to deadlocks if the Filesystem to which the covered
2186 * vnode belongs is undergoing forced unmounts. Since we
2187 * hold a usecount, the vnode cannot be reused
2188 * (it can, however, still be terminated).
2189 */
2190 vnode_getalways(coveredvp);
2191
2192 mount_dropcrossref(mp, coveredvp, 0);
2193 /*
2194 * We'll _try_ to detect if this really needs to be
2195 * done. The coveredvp can only be in termination (or
2196 * terminated) if the coveredvp's mount point is in a
2197 * forced unmount (or has been) since we still hold the
2198 * ref.
2199 */
2200 if (!vnode_isrecycled(coveredvp)) {
2201 pvp = vnode_getparent(coveredvp);
2202 #if CONFIG_TRIGGERS
2203 if (coveredvp->v_resolve) {
2204 vnode_trigger_rearm(coveredvp, ctx);
2205 }
2206 #endif
2207 }
2208
2209 vnode_rele(coveredvp);
2210 vnode_put(coveredvp);
2211 coveredvp = NULLVP;
2212
2213 if (pvp) {
2214 lock_vnode_and_post(pvp, NOTE_WRITE);
2215 vnode_put(pvp);
2216 }
2217 } else if (mp->mnt_flag & MNT_ROOTFS) {
2218 mount_lock_destroy(mp);
2219 #if CONFIG_MACF
2220 mac_mount_label_destroy(mp);
2221 #endif
2222 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
2223 } else
2224 panic("dounmount: no coveredvp");
2225 }
2226 return (error);
2227 }
2228
2229 /*
2230 * Unmount any mounts in this filesystem.
2231 */
2232 void
2233 dounmount_submounts(struct mount *mp, int flags, vfs_context_t ctx)
2234 {
2235 mount_t smp;
2236 fsid_t *fsids, fsid;
2237 int fsids_sz;
2238 int count = 0, i, m = 0;
2239 vnode_t vp;
2240
2241 mount_list_lock();
2242
2243 // Get an array to hold the submounts fsids.
2244 TAILQ_FOREACH(smp, &mountlist, mnt_list)
2245 count++;
2246 fsids_sz = count * sizeof(fsid_t);
2247 MALLOC(fsids, fsid_t *, fsids_sz, M_TEMP, M_NOWAIT);
2248 if (fsids == NULL) {
2249 mount_list_unlock();
2250 goto out;
2251 }
2252 fsids[0] = mp->mnt_vfsstat.f_fsid; // Prime the pump
2253
2254 /*
2255 * Fill the array with submount fsids.
2256 * Since mounts are always added to the tail of the mount list, the
2257 * list is always in mount order.
2258 * For each mount check if the mounted-on vnode belongs to a
2259 * mount that's already added to our array of mounts to be unmounted.
2260 */
2261 for (smp = TAILQ_NEXT(mp, mnt_list); smp; smp = TAILQ_NEXT(smp, mnt_list)) {
2262 vp = smp->mnt_vnodecovered;
2263 if (vp == NULL)
2264 continue;
2265 fsid = vnode_mount(vp)->mnt_vfsstat.f_fsid; // Underlying fsid
2266 for (i = 0; i <= m; i++) {
2267 if (fsids[i].val[0] == fsid.val[0] &&
2268 fsids[i].val[1] == fsid.val[1]) {
2269 fsids[++m] = smp->mnt_vfsstat.f_fsid;
2270 break;
2271 }
2272 }
2273 }
2274 mount_list_unlock();
2275
2276 // Unmount the submounts in reverse order. Ignore errors.
2277 for (i = m; i > 0; i--) {
2278 smp = mount_list_lookupby_fsid(&fsids[i], 0, 1);
2279 if (smp) {
2280 mount_ref(smp, 0);
2281 mount_iterdrop(smp);
2282 (void) dounmount(smp, flags, 1, ctx);
2283 }
2284 }
2285 out:
2286 if (fsids)
2287 FREE(fsids, M_TEMP);
2288 }
2289
2290 void
2291 mount_dropcrossref(mount_t mp, vnode_t dp, int need_put)
2292 {
2293 vnode_lock(dp);
2294 mp->mnt_crossref--;
2295
2296 if (mp->mnt_crossref < 0)
2297 panic("mount cross refs -ve");
2298
2299 if ((mp != dp->v_mountedhere) && (mp->mnt_crossref == 0)) {
2300
2301 if (need_put)
2302 vnode_put_locked(dp);
2303 vnode_unlock(dp);
2304
2305 mount_lock_destroy(mp);
2306 #if CONFIG_MACF
2307 mac_mount_label_destroy(mp);
2308 #endif
2309 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
2310 return;
2311 }
2312 if (need_put)
2313 vnode_put_locked(dp);
2314 vnode_unlock(dp);
2315 }
2316
2317
2318 /*
2319 * Sync each mounted filesystem.
2320 */
2321 #if DIAGNOSTIC
2322 int syncprt = 0;
2323 #endif
2324
2325 int print_vmpage_stat=0;
2326 int sync_timeout = 60; // Sync time limit (sec)
2327
2328
2329 static int
2330 sync_callback(mount_t mp, __unused void *arg)
2331 {
2332 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
2333 int asyncflag = mp->mnt_flag & MNT_ASYNC;
2334
2335 mp->mnt_flag &= ~MNT_ASYNC;
2336 VFS_SYNC(mp, arg ? MNT_WAIT : MNT_NOWAIT, vfs_context_kernel());
2337 if (asyncflag)
2338 mp->mnt_flag |= MNT_ASYNC;
2339 }
2340
2341 return (VFS_RETURNED);
2342 }
2343
2344 /* ARGSUSED */
2345 int
2346 sync(__unused proc_t p, __unused struct sync_args *uap, __unused int32_t *retval)
2347 {
2348 vfs_iterate(LK_NOWAIT, sync_callback, NULL);
2349
2350 if (print_vmpage_stat) {
2351 vm_countdirtypages();
2352 }
2353
2354 #if DIAGNOSTIC
2355 if (syncprt)
2356 vfs_bufstats();
2357 #endif /* DIAGNOSTIC */
2358 return 0;
2359 }
2360
2361 static void
2362 hibernate_sync_thread(void *arg, __unused wait_result_t wr)
2363 {
2364 int *timeout = (int *) arg;
2365
2366 vfs_iterate(LK_NOWAIT, sync_callback, NULL);
2367
2368 if (timeout)
2369 wakeup((caddr_t) timeout);
2370 if (print_vmpage_stat) {
2371 vm_countdirtypages();
2372 }
2373
2374 #if DIAGNOSTIC
2375 if (syncprt)
2376 vfs_bufstats();
2377 #endif /* DIAGNOSTIC */
2378 }
2379
2380 /*
2381 * Sync in a separate thread so we can time out if it blocks.
2382 */
2383 static int
2384 hibernate_sync_async(int timeout)
2385 {
2386 thread_t thd;
2387 int error;
2388 struct timespec ts = {timeout, 0};
2389
2390 lck_mtx_lock(sync_mtx_lck);
2391 if (kernel_thread_start(hibernate_sync_thread, &timeout, &thd) != KERN_SUCCESS) {
2392 printf("hibernate_sync_thread failed\n");
2393 lck_mtx_unlock(sync_mtx_lck);
2394 return (0);
2395 }
2396
2397 error = msleep((caddr_t) &timeout, sync_mtx_lck, (PVFS | PDROP | PCATCH), "hibernate_sync_thread", &ts);
2398 if (error) {
2399 printf("sync timed out: %d sec\n", timeout);
2400 }
2401 thread_deallocate(thd);
2402
2403 return (0);
2404 }
2405
2406 /*
2407 * An in-kernel sync for power management to call.
2408 */
2409 __private_extern__ int
2410 sync_internal(void)
2411 {
2412 (void) hibernate_sync_async(sync_timeout);
2413
2414 return 0;
2415 } /* end of sync_internal call */
2416
2417 /*
2418 * Change filesystem quotas.
2419 */
2420 #if QUOTA
2421 int
2422 quotactl(proc_t p, struct quotactl_args *uap, __unused int32_t *retval)
2423 {
2424 struct mount *mp;
2425 int error, quota_cmd, quota_status;
2426 caddr_t datap;
2427 size_t fnamelen;
2428 struct nameidata nd;
2429 vfs_context_t ctx = vfs_context_current();
2430 struct dqblk my_dqblk;
2431
2432 AUDIT_ARG(uid, uap->uid);
2433 AUDIT_ARG(cmd, uap->cmd);
2434 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
2435 uap->path, ctx);
2436 error = namei(&nd);
2437 if (error)
2438 return (error);
2439 mp = nd.ni_vp->v_mount;
2440 vnode_put(nd.ni_vp);
2441 nameidone(&nd);
2442
2443 /* copyin any data we will need for downstream code */
2444 quota_cmd = uap->cmd >> SUBCMDSHIFT;
2445
2446 switch (quota_cmd) {
2447 case Q_QUOTAON:
2448 /* uap->arg specifies a file from which to take the quotas */
2449 fnamelen = MAXPATHLEN;
2450 datap = kalloc(MAXPATHLEN);
2451 error = copyinstr(uap->arg, datap, MAXPATHLEN, &fnamelen);
2452 break;
2453 case Q_GETQUOTA:
2454 /* uap->arg is a pointer to a dqblk structure. */
2455 datap = (caddr_t) &my_dqblk;
2456 break;
2457 case Q_SETQUOTA:
2458 case Q_SETUSE:
2459 /* uap->arg is a pointer to a dqblk structure. */
2460 datap = (caddr_t) &my_dqblk;
2461 if (proc_is64bit(p)) {
2462 struct user_dqblk my_dqblk64;
2463 error = copyin(uap->arg, (caddr_t)&my_dqblk64, sizeof (my_dqblk64));
2464 if (error == 0) {
2465 munge_dqblk(&my_dqblk, &my_dqblk64, FALSE);
2466 }
2467 }
2468 else {
2469 error = copyin(uap->arg, (caddr_t)&my_dqblk, sizeof (my_dqblk));
2470 }
2471 break;
2472 case Q_QUOTASTAT:
2473 /* uap->arg is a pointer to an integer */
2474 datap = (caddr_t) &quota_status;
2475 break;
2476 default:
2477 datap = NULL;
2478 break;
2479 } /* switch */
2480
2481 if (error == 0) {
2482 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, datap, ctx);
2483 }
2484
2485 switch (quota_cmd) {
2486 case Q_QUOTAON:
2487 if (datap != NULL)
2488 kfree(datap, MAXPATHLEN);
2489 break;
2490 case Q_GETQUOTA:
2491 /* uap->arg is a pointer to a dqblk structure we need to copy out to */
2492 if (error == 0) {
2493 if (proc_is64bit(p)) {
2494 struct user_dqblk my_dqblk64;
2495
2496 memset(&my_dqblk64, 0, sizeof(my_dqblk64));
2497 munge_dqblk(&my_dqblk, &my_dqblk64, TRUE);
2498 error = copyout((caddr_t)&my_dqblk64, uap->arg, sizeof (my_dqblk64));
2499 }
2500 else {
2501 error = copyout(datap, uap->arg, sizeof (struct dqblk));
2502 }
2503 }
2504 break;
2505 case Q_QUOTASTAT:
2506 /* uap->arg is a pointer to an integer */
2507 if (error == 0) {
2508 error = copyout(datap, uap->arg, sizeof(quota_status));
2509 }
2510 break;
2511 default:
2512 break;
2513 } /* switch */
2514
2515 return (error);
2516 }
2517 #else
2518 int
2519 quotactl(__unused proc_t p, __unused struct quotactl_args *uap, __unused int32_t *retval)
2520 {
2521 return (EOPNOTSUPP);
2522 }
2523 #endif /* QUOTA */
2524
2525 /*
2526 * Get filesystem statistics.
2527 *
2528 * Returns: 0 Success
2529 * namei:???
2530 * vfs_update_vfsstat:???
2531 * munge_statfs:EFAULT
2532 */
2533 /* ARGSUSED */
2534 int
2535 statfs(__unused proc_t p, struct statfs_args *uap, __unused int32_t *retval)
2536 {
2537 struct mount *mp;
2538 struct vfsstatfs *sp;
2539 int error;
2540 struct nameidata nd;
2541 vfs_context_t ctx = vfs_context_current();
2542 vnode_t vp;
2543
2544 NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1,
2545 UIO_USERSPACE, uap->path, ctx);
2546 error = namei(&nd);
2547 if (error != 0)
2548 return (error);
2549 vp = nd.ni_vp;
2550 mp = vp->v_mount;
2551 sp = &mp->mnt_vfsstat;
2552 nameidone(&nd);
2553
2554 #if CONFIG_MACF
2555 error = mac_mount_check_stat(ctx, mp);
2556 if (error != 0)
2557 return (error);
2558 #endif
2559
2560 error = vfs_update_vfsstat(mp, ctx, VFS_USER_EVENT);
2561 if (error != 0) {
2562 vnode_put(vp);
2563 return (error);
2564 }
2565
2566 error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
2567 vnode_put(vp);
2568 return (error);
2569 }
2570
2571 /*
2572 * Get filesystem statistics.
2573 */
2574 /* ARGSUSED */
2575 int
2576 fstatfs(__unused proc_t p, struct fstatfs_args *uap, __unused int32_t *retval)
2577 {
2578 vnode_t vp;
2579 struct mount *mp;
2580 struct vfsstatfs *sp;
2581 int error;
2582
2583 AUDIT_ARG(fd, uap->fd);
2584
2585 if ( (error = file_vnode(uap->fd, &vp)) )
2586 return (error);
2587
2588 error = vnode_getwithref(vp);
2589 if (error) {
2590 file_drop(uap->fd);
2591 return (error);
2592 }
2593
2594 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2595
2596 mp = vp->v_mount;
2597 if (!mp) {
2598 error = EBADF;
2599 goto out;
2600 }
2601
2602 #if CONFIG_MACF
2603 error = mac_mount_check_stat(vfs_context_current(), mp);
2604 if (error != 0)
2605 goto out;
2606 #endif
2607
2608 sp = &mp->mnt_vfsstat;
2609 if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
2610 goto out;
2611 }
2612
2613 error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
2614
2615 out:
2616 file_drop(uap->fd);
2617 vnode_put(vp);
2618
2619 return (error);
2620 }
2621
2622 /*
2623 * Common routine to handle copying of statfs64 data to user space
2624 */
2625 static int
2626 statfs64_common(struct mount *mp, struct vfsstatfs *sfsp, user_addr_t bufp)
2627 {
2628 int error;
2629 struct statfs64 sfs;
2630
2631 bzero(&sfs, sizeof(sfs));
2632
2633 sfs.f_bsize = sfsp->f_bsize;
2634 sfs.f_iosize = (int32_t)sfsp->f_iosize;
2635 sfs.f_blocks = sfsp->f_blocks;
2636 sfs.f_bfree = sfsp->f_bfree;
2637 sfs.f_bavail = sfsp->f_bavail;
2638 sfs.f_files = sfsp->f_files;
2639 sfs.f_ffree = sfsp->f_ffree;
2640 sfs.f_fsid = sfsp->f_fsid;
2641 sfs.f_owner = sfsp->f_owner;
2642 sfs.f_type = mp->mnt_vtable->vfc_typenum;
2643 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
2644 sfs.f_fssubtype = sfsp->f_fssubtype;
2645 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
2646 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN);
2647 } else {
2648 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSTYPENAMELEN);
2649 }
2650 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MAXPATHLEN);
2651 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MAXPATHLEN);
2652
2653 error = copyout((caddr_t)&sfs, bufp, sizeof(sfs));
2654
2655 return(error);
2656 }
2657
2658 /*
2659 * Get file system statistics in 64-bit mode
2660 */
2661 int
2662 statfs64(__unused struct proc *p, struct statfs64_args *uap, __unused int32_t *retval)
2663 {
2664 struct mount *mp;
2665 struct vfsstatfs *sp;
2666 int error;
2667 struct nameidata nd;
2668 vfs_context_t ctxp = vfs_context_current();
2669 vnode_t vp;
2670
2671 NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1,
2672 UIO_USERSPACE, uap->path, ctxp);
2673 error = namei(&nd);
2674 if (error != 0)
2675 return (error);
2676 vp = nd.ni_vp;
2677 mp = vp->v_mount;
2678 sp = &mp->mnt_vfsstat;
2679 nameidone(&nd);
2680
2681 #if CONFIG_MACF
2682 error = mac_mount_check_stat(ctxp, mp);
2683 if (error != 0)
2684 return (error);
2685 #endif
2686
2687 error = vfs_update_vfsstat(mp, ctxp, VFS_USER_EVENT);
2688 if (error != 0) {
2689 vnode_put(vp);
2690 return (error);
2691 }
2692
2693 error = statfs64_common(mp, sp, uap->buf);
2694 vnode_put(vp);
2695
2696 return (error);
2697 }
2698
2699 /*
2700 * Get file system statistics in 64-bit mode
2701 */
2702 int
2703 fstatfs64(__unused struct proc *p, struct fstatfs64_args *uap, __unused int32_t *retval)
2704 {
2705 struct vnode *vp;
2706 struct mount *mp;
2707 struct vfsstatfs *sp;
2708 int error;
2709
2710 AUDIT_ARG(fd, uap->fd);
2711
2712 if ( (error = file_vnode(uap->fd, &vp)) )
2713 return (error);
2714
2715 error = vnode_getwithref(vp);
2716 if (error) {
2717 file_drop(uap->fd);
2718 return (error);
2719 }
2720
2721 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2722
2723 mp = vp->v_mount;
2724 if (!mp) {
2725 error = EBADF;
2726 goto out;
2727 }
2728
2729 #if CONFIG_MACF
2730 error = mac_mount_check_stat(vfs_context_current(), mp);
2731 if (error != 0)
2732 goto out;
2733 #endif
2734
2735 sp = &mp->mnt_vfsstat;
2736 if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
2737 goto out;
2738 }
2739
2740 error = statfs64_common(mp, sp, uap->buf);
2741
2742 out:
2743 file_drop(uap->fd);
2744 vnode_put(vp);
2745
2746 return (error);
2747 }
2748
2749 struct getfsstat_struct {
2750 user_addr_t sfsp;
2751 user_addr_t *mp;
2752 int count;
2753 int maxcount;
2754 int flags;
2755 int error;
2756 };
2757
2758
2759 static int
2760 getfsstat_callback(mount_t mp, void * arg)
2761 {
2762
2763 struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
2764 struct vfsstatfs *sp;
2765 int error, my_size;
2766 vfs_context_t ctx = vfs_context_current();
2767
2768 if (fstp->sfsp && fstp->count < fstp->maxcount) {
2769 #if CONFIG_MACF
2770 error = mac_mount_check_stat(ctx, mp);
2771 if (error != 0) {
2772 fstp->error = error;
2773 return(VFS_RETURNED_DONE);
2774 }
2775 #endif
2776 sp = &mp->mnt_vfsstat;
2777 /*
2778 * If MNT_NOWAIT is specified, do not refresh the
2779 * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT.
2780 */
2781 if (((fstp->flags & MNT_NOWAIT) == 0 || (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
2782 (error = vfs_update_vfsstat(mp, ctx,
2783 VFS_USER_EVENT))) {
2784 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
2785 return(VFS_RETURNED);
2786 }
2787
2788 /*
2789 * Need to handle LP64 version of struct statfs
2790 */
2791 error = munge_statfs(mp, sp, fstp->sfsp, &my_size, IS_64BIT_PROCESS(vfs_context_proc(ctx)), FALSE);
2792 if (error) {
2793 fstp->error = error;
2794 return(VFS_RETURNED_DONE);
2795 }
2796 fstp->sfsp += my_size;
2797
2798 if (fstp->mp) {
2799 #if CONFIG_MACF
2800 error = mac_mount_label_get(mp, *fstp->mp);
2801 if (error) {
2802 fstp->error = error;
2803 return(VFS_RETURNED_DONE);
2804 }
2805 #endif
2806 fstp->mp++;
2807 }
2808 }
2809 fstp->count++;
2810 return(VFS_RETURNED);
2811 }
2812
2813 /*
2814 * Get statistics on all filesystems.
2815 */
2816 int
2817 getfsstat(__unused proc_t p, struct getfsstat_args *uap, int *retval)
2818 {
2819 struct __mac_getfsstat_args muap;
2820
2821 muap.buf = uap->buf;
2822 muap.bufsize = uap->bufsize;
2823 muap.mac = USER_ADDR_NULL;
2824 muap.macsize = 0;
2825 muap.flags = uap->flags;
2826
2827 return (__mac_getfsstat(p, &muap, retval));
2828 }
2829
2830 /*
2831 * __mac_getfsstat: Get MAC-related file system statistics
2832 *
2833 * Parameters: p (ignored)
2834 * uap User argument descriptor (see below)
2835 * retval Count of file system statistics (N stats)
2836 *
2837 * Indirect: uap->bufsize Buffer size
2838 * uap->macsize MAC info size
2839 * uap->buf Buffer where information will be returned
2840 * uap->mac MAC info
2841 * uap->flags File system flags
2842 *
2843 *
2844 * Returns: 0 Success
2845 * !0 Not success
2846 *
2847 */
2848 int
2849 __mac_getfsstat(__unused proc_t p, struct __mac_getfsstat_args *uap, int *retval)
2850 {
2851 user_addr_t sfsp;
2852 user_addr_t *mp;
2853 size_t count, maxcount, bufsize, macsize;
2854 struct getfsstat_struct fst;
2855
2856 bufsize = (size_t) uap->bufsize;
2857 macsize = (size_t) uap->macsize;
2858
2859 if (IS_64BIT_PROCESS(p)) {
2860 maxcount = bufsize / sizeof(struct user64_statfs);
2861 }
2862 else {
2863 maxcount = bufsize / sizeof(struct user32_statfs);
2864 }
2865 sfsp = uap->buf;
2866 count = 0;
2867
2868 mp = NULL;
2869
2870 #if CONFIG_MACF
2871 if (uap->mac != USER_ADDR_NULL) {
2872 u_int32_t *mp0;
2873 int error;
2874 unsigned int i;
2875
2876 count = (macsize / (IS_64BIT_PROCESS(p) ? 8 : 4));
2877 if (count != maxcount)
2878 return (EINVAL);
2879
2880 /* Copy in the array */
2881 MALLOC(mp0, u_int32_t *, macsize, M_MACTEMP, M_WAITOK);
2882 if (mp0 == NULL) {
2883 return (ENOMEM);
2884 }
2885
2886 error = copyin(uap->mac, mp0, macsize);
2887 if (error) {
2888 FREE(mp0, M_MACTEMP);
2889 return (error);
2890 }
2891
2892 /* Normalize to an array of user_addr_t */
2893 MALLOC(mp, user_addr_t *, count * sizeof(user_addr_t), M_MACTEMP, M_WAITOK);
2894 if (mp == NULL) {
2895 FREE(mp0, M_MACTEMP);
2896 return (ENOMEM);
2897 }
2898
2899 for (i = 0; i < count; i++) {
2900 if (IS_64BIT_PROCESS(p))
2901 mp[i] = ((user_addr_t *)mp0)[i];
2902 else
2903 mp[i] = (user_addr_t)mp0[i];
2904 }
2905 FREE(mp0, M_MACTEMP);
2906 }
2907 #endif
2908
2909
2910 fst.sfsp = sfsp;
2911 fst.mp = mp;
2912 fst.flags = uap->flags;
2913 fst.count = 0;
2914 fst.error = 0;
2915 fst.maxcount = maxcount;
2916
2917
2918 vfs_iterate(0, getfsstat_callback, &fst);
2919
2920 if (mp)
2921 FREE(mp, M_MACTEMP);
2922
2923 if (fst.error ) {
2924 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
2925 return(fst.error);
2926 }
2927
2928 if (fst.sfsp && fst.count > fst.maxcount)
2929 *retval = fst.maxcount;
2930 else
2931 *retval = fst.count;
2932 return (0);
2933 }
2934
2935 static int
2936 getfsstat64_callback(mount_t mp, void * arg)
2937 {
2938 struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
2939 struct vfsstatfs *sp;
2940 int error;
2941
2942 if (fstp->sfsp && fstp->count < fstp->maxcount) {
2943 #if CONFIG_MACF
2944 error = mac_mount_check_stat(vfs_context_current(), mp);
2945 if (error != 0) {
2946 fstp->error = error;
2947 return(VFS_RETURNED_DONE);
2948 }
2949 #endif
2950 sp = &mp->mnt_vfsstat;
2951 /*
2952 * If MNT_NOWAIT is specified, do not refresh the fsstat
2953 * cache. MNT_WAIT overrides MNT_NOWAIT.
2954 *
2955 * We treat MNT_DWAIT as MNT_WAIT for all instances of
2956 * getfsstat, since the constants are out of the same
2957 * namespace.
2958 */
2959 if (((fstp->flags & MNT_NOWAIT) == 0 ||
2960 (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
2961 (error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT))) {
2962 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
2963 return(VFS_RETURNED);
2964 }
2965
2966 error = statfs64_common(mp, sp, fstp->sfsp);
2967 if (error) {
2968 fstp->error = error;
2969 return(VFS_RETURNED_DONE);
2970 }
2971 fstp->sfsp += sizeof(struct statfs64);
2972 }
2973 fstp->count++;
2974 return(VFS_RETURNED);
2975 }
2976
2977 /*
2978 * Get statistics on all file systems in 64 bit mode.
2979 */
2980 int
2981 getfsstat64(__unused proc_t p, struct getfsstat64_args *uap, int *retval)
2982 {
2983 user_addr_t sfsp;
2984 int count, maxcount;
2985 struct getfsstat_struct fst;
2986
2987 maxcount = uap->bufsize / sizeof(struct statfs64);
2988
2989 sfsp = uap->buf;
2990 count = 0;
2991
2992 fst.sfsp = sfsp;
2993 fst.flags = uap->flags;
2994 fst.count = 0;
2995 fst.error = 0;
2996 fst.maxcount = maxcount;
2997
2998 vfs_iterate(0, getfsstat64_callback, &fst);
2999
3000 if (fst.error ) {
3001 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
3002 return(fst.error);
3003 }
3004
3005 if (fst.sfsp && fst.count > fst.maxcount)
3006 *retval = fst.maxcount;
3007 else
3008 *retval = fst.count;
3009
3010 return (0);
3011 }
3012
3013 /*
3014 * gets the associated vnode with the file descriptor passed.
3015 * as input
3016 *
3017 * INPUT
3018 * ctx - vfs context of caller
3019 * fd - file descriptor for which vnode is required.
3020 * vpp - Pointer to pointer to vnode to be returned.
3021 *
3022 * The vnode is returned with an iocount so any vnode obtained
3023 * by this call needs a vnode_put
3024 *
3025 */
3026 int
3027 vnode_getfromfd(vfs_context_t ctx, int fd, vnode_t *vpp)
3028 {
3029 int error;
3030 vnode_t vp;
3031 struct fileproc *fp;
3032 proc_t p = vfs_context_proc(ctx);
3033
3034 *vpp = NULLVP;
3035
3036 error = fp_getfvp(p, fd, &fp, &vp);
3037 if (error)
3038 return (error);
3039
3040 error = vnode_getwithref(vp);
3041 if (error) {
3042 (void)fp_drop(p, fd, fp, 0);
3043 return (error);
3044 }
3045
3046 (void)fp_drop(p, fd, fp, 0);
3047 *vpp = vp;
3048 return (error);
3049 }
3050
3051 /*
3052 * Wrapper function around namei to start lookup from a directory
3053 * specified by a file descriptor ni_dirfd.
3054 *
3055 * In addition to all the errors returned by namei, this call can
3056 * return ENOTDIR if the file descriptor does not refer to a directory.
3057 * and EBADF if the file descriptor is not valid.
3058 */
3059 int
3060 nameiat(struct nameidata *ndp, int dirfd)
3061 {
3062 if ((dirfd != AT_FDCWD) &&
3063 !(ndp->ni_flag & NAMEI_CONTLOOKUP) &&
3064 !(ndp->ni_cnd.cn_flags & USEDVP)) {
3065 int error = 0;
3066 char c;
3067
3068 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
3069 error = copyin(ndp->ni_dirp, &c, sizeof(char));
3070 if (error)
3071 return (error);
3072 } else {
3073 c = *((char *)(ndp->ni_dirp));
3074 }
3075
3076 if (c != '/') {
3077 vnode_t dvp_at;
3078
3079 error = vnode_getfromfd(ndp->ni_cnd.cn_context, dirfd,
3080 &dvp_at);
3081 if (error)
3082 return (error);
3083
3084 if (vnode_vtype(dvp_at) != VDIR) {
3085 vnode_put(dvp_at);
3086 return (ENOTDIR);
3087 }
3088
3089 ndp->ni_dvp = dvp_at;
3090 ndp->ni_cnd.cn_flags |= USEDVP;
3091 error = namei(ndp);
3092 ndp->ni_cnd.cn_flags &= ~USEDVP;
3093 vnode_put(dvp_at);
3094 return (error);
3095 }
3096 }
3097
3098 return (namei(ndp));
3099 }
3100
3101 /*
3102 * Change current working directory to a given file descriptor.
3103 */
3104 /* ARGSUSED */
3105 static int
3106 common_fchdir(proc_t p, struct fchdir_args *uap, int per_thread)
3107 {
3108 struct filedesc *fdp = p->p_fd;
3109 vnode_t vp;
3110 vnode_t tdp;
3111 vnode_t tvp;
3112 struct mount *mp;
3113 int error;
3114 vfs_context_t ctx = vfs_context_current();
3115
3116 AUDIT_ARG(fd, uap->fd);
3117 if (per_thread && uap->fd == -1) {
3118 /*
3119 * Switching back from per-thread to per process CWD; verify we
3120 * in fact have one before proceeding. The only success case
3121 * for this code path is to return 0 preemptively after zapping
3122 * the thread structure contents.
3123 */
3124 thread_t th = vfs_context_thread(ctx);
3125 if (th) {
3126 uthread_t uth = get_bsdthread_info(th);
3127 tvp = uth->uu_cdir;
3128 uth->uu_cdir = NULLVP;
3129 if (tvp != NULLVP) {
3130 vnode_rele(tvp);
3131 return (0);
3132 }
3133 }
3134 return (EBADF);
3135 }
3136
3137 if ( (error = file_vnode(uap->fd, &vp)) )
3138 return(error);
3139 if ( (error = vnode_getwithref(vp)) ) {
3140 file_drop(uap->fd);
3141 return(error);
3142 }
3143
3144 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
3145
3146 if (vp->v_type != VDIR) {
3147 error = ENOTDIR;
3148 goto out;
3149 }
3150
3151 #if CONFIG_MACF
3152 error = mac_vnode_check_chdir(ctx, vp);
3153 if (error)
3154 goto out;
3155 #endif
3156 error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
3157 if (error)
3158 goto out;
3159
3160 while (!error && (mp = vp->v_mountedhere) != NULL) {
3161 if (vfs_busy(mp, LK_NOWAIT)) {
3162 error = EACCES;
3163 goto out;
3164 }
3165 error = VFS_ROOT(mp, &tdp, ctx);
3166 vfs_unbusy(mp);
3167 if (error)
3168 break;
3169 vnode_put(vp);
3170 vp = tdp;
3171 }
3172 if (error)
3173 goto out;
3174 if ( (error = vnode_ref(vp)) )
3175 goto out;
3176 vnode_put(vp);
3177
3178 if (per_thread) {
3179 thread_t th = vfs_context_thread(ctx);
3180 if (th) {
3181 uthread_t uth = get_bsdthread_info(th);
3182 tvp = uth->uu_cdir;
3183 uth->uu_cdir = vp;
3184 OSBitOrAtomic(P_THCWD, &p->p_flag);
3185 } else {
3186 vnode_rele(vp);
3187 return (ENOENT);
3188 }
3189 } else {
3190 proc_fdlock(p);
3191 tvp = fdp->fd_cdir;
3192 fdp->fd_cdir = vp;
3193 proc_fdunlock(p);
3194 }
3195
3196 if (tvp)
3197 vnode_rele(tvp);
3198 file_drop(uap->fd);
3199
3200 return (0);
3201 out:
3202 vnode_put(vp);
3203 file_drop(uap->fd);
3204
3205 return(error);
3206 }
3207
3208 int
3209 fchdir(proc_t p, struct fchdir_args *uap, __unused int32_t *retval)
3210 {
3211 return common_fchdir(p, uap, 0);
3212 }
3213
3214 int
3215 __pthread_fchdir(proc_t p, struct __pthread_fchdir_args *uap, __unused int32_t *retval)
3216 {
3217 return common_fchdir(p, (void *)uap, 1);
3218 }
3219
3220 /*
3221 * Change current working directory (".").
3222 *
3223 * Returns: 0 Success
3224 * change_dir:ENOTDIR
3225 * change_dir:???
3226 * vnode_ref:ENOENT No such file or directory
3227 */
3228 /* ARGSUSED */
3229 static int
3230 common_chdir(proc_t p, struct chdir_args *uap, int per_thread)
3231 {
3232 struct filedesc *fdp = p->p_fd;
3233 int error;
3234 struct nameidata nd;
3235 vnode_t tvp;
3236 vfs_context_t ctx = vfs_context_current();
3237
3238 NDINIT(&nd, LOOKUP, OP_CHDIR, FOLLOW | AUDITVNPATH1,
3239 UIO_USERSPACE, uap->path, ctx);
3240 error = change_dir(&nd, ctx);
3241 if (error)
3242 return (error);
3243 if ( (error = vnode_ref(nd.ni_vp)) ) {
3244 vnode_put(nd.ni_vp);
3245 return (error);
3246 }
3247 /*
3248 * drop the iocount we picked up in change_dir
3249 */
3250 vnode_put(nd.ni_vp);
3251
3252 if (per_thread) {
3253 thread_t th = vfs_context_thread(ctx);
3254 if (th) {
3255 uthread_t uth = get_bsdthread_info(th);
3256 tvp = uth->uu_cdir;
3257 uth->uu_cdir = nd.ni_vp;
3258 OSBitOrAtomic(P_THCWD, &p->p_flag);
3259 } else {
3260 vnode_rele(nd.ni_vp);
3261 return (ENOENT);
3262 }
3263 } else {
3264 proc_fdlock(p);
3265 tvp = fdp->fd_cdir;
3266 fdp->fd_cdir = nd.ni_vp;
3267 proc_fdunlock(p);
3268 }
3269
3270 if (tvp)
3271 vnode_rele(tvp);
3272
3273 return (0);
3274 }
3275
3276
3277 /*
3278 * chdir
3279 *
3280 * Change current working directory (".") for the entire process
3281 *
3282 * Parameters: p Process requesting the call
3283 * uap User argument descriptor (see below)
3284 * retval (ignored)
3285 *
3286 * Indirect parameters: uap->path Directory path
3287 *
3288 * Returns: 0 Success
3289 * common_chdir: ENOTDIR
3290 * common_chdir: ENOENT No such file or directory
3291 * common_chdir: ???
3292 *
3293 */
3294 int
3295 chdir(proc_t p, struct chdir_args *uap, __unused int32_t *retval)
3296 {
3297 return common_chdir(p, (void *)uap, 0);
3298 }
3299
3300 /*
3301 * __pthread_chdir
3302 *
3303 * Change current working directory (".") for a single thread
3304 *
3305 * Parameters: p Process requesting the call
3306 * uap User argument descriptor (see below)
3307 * retval (ignored)
3308 *
3309 * Indirect parameters: uap->path Directory path
3310 *
3311 * Returns: 0 Success
3312 * common_chdir: ENOTDIR
3313 * common_chdir: ENOENT No such file or directory
3314 * common_chdir: ???
3315 *
3316 */
3317 int
3318 __pthread_chdir(proc_t p, struct __pthread_chdir_args *uap, __unused int32_t *retval)
3319 {
3320 return common_chdir(p, (void *)uap, 1);
3321 }
3322
3323
3324 /*
3325 * Change notion of root (``/'') directory.
3326 */
3327 /* ARGSUSED */
3328 int
3329 chroot(proc_t p, struct chroot_args *uap, __unused int32_t *retval)
3330 {
3331 struct filedesc *fdp = p->p_fd;
3332 int error;
3333 struct nameidata nd;
3334 vnode_t tvp;
3335 vfs_context_t ctx = vfs_context_current();
3336
3337 if ((error = suser(kauth_cred_get(), &p->p_acflag)))
3338 return (error);
3339
3340 NDINIT(&nd, LOOKUP, OP_CHROOT, FOLLOW | AUDITVNPATH1,
3341 UIO_USERSPACE, uap->path, ctx);
3342 error = change_dir(&nd, ctx);
3343 if (error)
3344 return (error);
3345
3346 #if CONFIG_MACF
3347 error = mac_vnode_check_chroot(ctx, nd.ni_vp,
3348 &nd.ni_cnd);
3349 if (error) {
3350 vnode_put(nd.ni_vp);
3351 return (error);
3352 }
3353 #endif
3354
3355 if ( (error = vnode_ref(nd.ni_vp)) ) {
3356 vnode_put(nd.ni_vp);
3357 return (error);
3358 }
3359 vnode_put(nd.ni_vp);
3360
3361 proc_fdlock(p);
3362 tvp = fdp->fd_rdir;
3363 fdp->fd_rdir = nd.ni_vp;
3364 fdp->fd_flags |= FD_CHROOT;
3365 proc_fdunlock(p);
3366
3367 if (tvp != NULL)
3368 vnode_rele(tvp);
3369
3370 return (0);
3371 }
3372
3373 /*
3374 * Common routine for chroot and chdir.
3375 *
3376 * Returns: 0 Success
3377 * ENOTDIR Not a directory
3378 * namei:??? [anything namei can return]
3379 * vnode_authorize:??? [anything vnode_authorize can return]
3380 */
3381 static int
3382 change_dir(struct nameidata *ndp, vfs_context_t ctx)
3383 {
3384 vnode_t vp;
3385 int error;
3386
3387 if ((error = namei(ndp)))
3388 return (error);
3389 nameidone(ndp);
3390 vp = ndp->ni_vp;
3391
3392 if (vp->v_type != VDIR) {
3393 vnode_put(vp);
3394 return (ENOTDIR);
3395 }
3396
3397 #if CONFIG_MACF
3398 error = mac_vnode_check_chdir(ctx, vp);
3399 if (error) {
3400 vnode_put(vp);
3401 return (error);
3402 }
3403 #endif
3404
3405 error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
3406 if (error) {
3407 vnode_put(vp);
3408 return (error);
3409 }
3410
3411 return (error);
3412 }
3413
3414 /*
3415 * Free the vnode data (for directories) associated with the file glob.
3416 */
3417 struct fd_vn_data *
3418 fg_vn_data_alloc(void)
3419 {
3420 struct fd_vn_data *fvdata;
3421
3422 /* Allocate per fd vnode data */
3423 MALLOC(fvdata, struct fd_vn_data *, (sizeof(struct fd_vn_data)),
3424 M_FD_VN_DATA, M_WAITOK | M_ZERO);
3425 lck_mtx_init(&fvdata->fv_lock, fd_vn_lck_grp, fd_vn_lck_attr);
3426 return fvdata;
3427 }
3428
3429 /*
3430 * Free the vnode data (for directories) associated with the file glob.
3431 */
3432 void
3433 fg_vn_data_free(void *fgvndata)
3434 {
3435 struct fd_vn_data *fvdata = (struct fd_vn_data *)fgvndata;
3436
3437 if (fvdata->fv_buf)
3438 FREE(fvdata->fv_buf, M_FD_DIRBUF);
3439 lck_mtx_destroy(&fvdata->fv_lock, fd_vn_lck_grp);
3440 FREE(fvdata, M_FD_VN_DATA);
3441 }
3442
3443 /*
3444 * Check permissions, allocate an open file structure,
3445 * and call the device open routine if any.
3446 *
3447 * Returns: 0 Success
3448 * EINVAL
3449 * EINTR
3450 * falloc:ENFILE
3451 * falloc:EMFILE
3452 * falloc:ENOMEM
3453 * vn_open_auth:???
3454 * dupfdopen:???
3455 * VNOP_ADVLOCK:???
3456 * vnode_setsize:???
3457 *
3458 * XXX Need to implement uid, gid
3459 */
3460 int
3461 open1(vfs_context_t ctx, struct nameidata *ndp, int uflags,
3462 struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra,
3463 int32_t *retval)
3464 {
3465 proc_t p = vfs_context_proc(ctx);
3466 uthread_t uu = get_bsdthread_info(vfs_context_thread(ctx));
3467 struct fileproc *fp;
3468 vnode_t vp;
3469 int flags, oflags;
3470 int type, indx, error;
3471 struct flock lf;
3472 struct vfs_context context;
3473
3474 oflags = uflags;
3475
3476 if ((oflags & O_ACCMODE) == O_ACCMODE)
3477 return(EINVAL);
3478
3479 flags = FFLAGS(uflags);
3480 CLR(flags, FENCRYPTED);
3481 CLR(flags, FUNENCRYPTED);
3482
3483 AUDIT_ARG(fflags, oflags);
3484 AUDIT_ARG(mode, vap->va_mode);
3485
3486 if ((error = falloc_withalloc(p,
3487 &fp, &indx, ctx, fp_zalloc, cra)) != 0) {
3488 return (error);
3489 }
3490 uu->uu_dupfd = -indx - 1;
3491
3492 if ((error = vn_open_auth(ndp, &flags, vap))) {
3493 if ((error == ENODEV || error == ENXIO) && (uu->uu_dupfd >= 0)){ /* XXX from fdopen */
3494 if ((error = dupfdopen(p->p_fd, indx, uu->uu_dupfd, flags, error)) == 0) {
3495 fp_drop(p, indx, NULL, 0);
3496 *retval = indx;
3497 return (0);
3498 }
3499 }
3500 if (error == ERESTART)
3501 error = EINTR;
3502 fp_free(p, indx, fp);
3503 return (error);
3504 }
3505 uu->uu_dupfd = 0;
3506 vp = ndp->ni_vp;
3507
3508 fp->f_fglob->fg_flag = flags & (FMASK | O_EVTONLY | FENCRYPTED | FUNENCRYPTED);
3509 fp->f_fglob->fg_ops = &vnops;
3510 fp->f_fglob->fg_data = (caddr_t)vp;
3511
3512 if (flags & (O_EXLOCK | O_SHLOCK)) {
3513 lf.l_whence = SEEK_SET;
3514 lf.l_start = 0;
3515 lf.l_len = 0;
3516 if (flags & O_EXLOCK)
3517 lf.l_type = F_WRLCK;
3518 else
3519 lf.l_type = F_RDLCK;
3520 type = F_FLOCK;
3521 if ((flags & FNONBLOCK) == 0)
3522 type |= F_WAIT;
3523 #if CONFIG_MACF
3524 error = mac_file_check_lock(vfs_context_ucred(ctx), fp->f_fglob,
3525 F_SETLK, &lf);
3526 if (error)
3527 goto bad;
3528 #endif
3529 if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx, NULL)))
3530 goto bad;
3531 fp->f_fglob->fg_flag |= FHASLOCK;
3532 }
3533
3534 #if DEVELOPMENT || DEBUG
3535 /*
3536 * XXX VSWAP: Check for entitlements or special flag here
3537 * so we can restrict access appropriately.
3538 */
3539 #else /* DEVELOPMENT || DEBUG */
3540
3541 if (vnode_isswap(vp) && (flags & (FWRITE | O_TRUNC)) && (ctx != vfs_context_kernel())) {
3542 /* block attempt to write/truncate swapfile */
3543 error = EPERM;
3544 goto bad;
3545 }
3546 #endif /* DEVELOPMENT || DEBUG */
3547
3548 /* try to truncate by setting the size attribute */
3549 if ((flags & O_TRUNC) && ((error = vnode_setsize(vp, (off_t)0, 0, ctx)) != 0))
3550 goto bad;
3551
3552 /*
3553 * For directories we hold some additional information in the fd.
3554 */
3555 if (vnode_vtype(vp) == VDIR) {
3556 fp->f_fglob->fg_vn_data = fg_vn_data_alloc();
3557 } else {
3558 fp->f_fglob->fg_vn_data = NULL;
3559 }
3560
3561 vnode_put(vp);
3562
3563 /*
3564 * The first terminal open (without a O_NOCTTY) by a session leader
3565 * results in it being set as the controlling terminal.
3566 */
3567 if (vnode_istty(vp) && !(p->p_flag & P_CONTROLT) &&
3568 !(flags & O_NOCTTY)) {
3569 int tmp = 0;
3570
3571 (void)(*fp->f_fglob->fg_ops->fo_ioctl)(fp, (int)TIOCSCTTY,
3572 (caddr_t)&tmp, ctx);
3573 }
3574
3575 proc_fdlock(p);
3576 if (flags & O_CLOEXEC)
3577 *fdflags(p, indx) |= UF_EXCLOSE;
3578 if (flags & O_CLOFORK)
3579 *fdflags(p, indx) |= UF_FORKCLOSE;
3580 procfdtbl_releasefd(p, indx, NULL);
3581
3582 #if CONFIG_SECLUDED_MEMORY
3583 if (secluded_for_filecache &&
3584 FILEGLOB_DTYPE(fp->f_fglob) == DTYPE_VNODE &&
3585 vnode_vtype(vp) == VREG) {
3586 memory_object_control_t moc;
3587
3588 moc = ubc_getobject(vp, UBC_FLAGS_NONE);
3589
3590 if (moc == MEMORY_OBJECT_CONTROL_NULL) {
3591 /* nothing to do... */
3592 } else if (fp->f_fglob->fg_flag & FWRITE) {
3593 /* writable -> no longer eligible for secluded pages */
3594 memory_object_mark_eligible_for_secluded(moc,
3595 FALSE);
3596 } else if (secluded_for_filecache == 1) {
3597 char pathname[32] = { 0, };
3598 size_t copied;
3599 /* XXX FBDP: better way to detect /Applications/ ? */
3600 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
3601 copyinstr(ndp->ni_dirp,
3602 pathname,
3603 sizeof (pathname),
3604 &copied);
3605 } else {
3606 copystr(CAST_DOWN(void *, ndp->ni_dirp),
3607 pathname,
3608 sizeof (pathname),
3609 &copied);
3610 }
3611 pathname[sizeof (pathname) - 1] = '\0';
3612 if (strncmp(pathname,
3613 "/Applications/",
3614 strlen("/Applications/")) == 0 &&
3615 strncmp(pathname,
3616 "/Applications/Camera.app/",
3617 strlen("/Applications/Camera.app/")) != 0) {
3618 /*
3619 * not writable
3620 * AND from "/Applications/"
3621 * AND not from "/Applications/Camera.app/"
3622 * ==> eligible for secluded
3623 */
3624 memory_object_mark_eligible_for_secluded(moc,
3625 TRUE);
3626 }
3627 } else if (secluded_for_filecache == 2) {
3628 #if __arm64__
3629 #define DYLD_SHARED_CACHE_NAME "dyld_shared_cache_arm64"
3630 #elif __arm__
3631 #define DYLD_SHARED_CACHE_NAME "dyld_shared_cache_armv7"
3632 #else
3633 /* not implemented... */
3634 #endif
3635 if (!strncmp(vp->v_name,
3636 DYLD_SHARED_CACHE_NAME,
3637 strlen(DYLD_SHARED_CACHE_NAME)) ||
3638 !strncmp(vp->v_name,
3639 "dyld",
3640 strlen(vp->v_name)) ||
3641 !strncmp(vp->v_name,
3642 "launchd",
3643 strlen(vp->v_name)) ||
3644 !strncmp(vp->v_name,
3645 "Camera",
3646 strlen(vp->v_name)) ||
3647 !strncmp(vp->v_name,
3648 "mediaserverd",
3649 strlen(vp->v_name))) {
3650 /*
3651 * This file matters when launching Camera:
3652 * do not store its contents in the secluded
3653 * pool that will be drained on Camera launch.
3654 */
3655 memory_object_mark_eligible_for_secluded(moc,
3656 FALSE);
3657 }
3658 }
3659 }
3660 #endif /* CONFIG_SECLUDED_MEMORY */
3661
3662 fp_drop(p, indx, fp, 1);
3663 proc_fdunlock(p);
3664
3665 *retval = indx;
3666
3667 return (0);
3668 bad:
3669 context = *vfs_context_current();
3670 context.vc_ucred = fp->f_fglob->fg_cred;
3671
3672 if ((fp->f_fglob->fg_flag & FHASLOCK) &&
3673 (FILEGLOB_DTYPE(fp->f_fglob) == DTYPE_VNODE)) {
3674 lf.l_whence = SEEK_SET;
3675 lf.l_start = 0;
3676 lf.l_len = 0;
3677 lf.l_type = F_UNLCK;
3678
3679 (void)VNOP_ADVLOCK(
3680 vp, (caddr_t)fp->f_fglob, F_UNLCK, &lf, F_FLOCK, ctx, NULL);
3681 }
3682
3683 vn_close(vp, fp->f_fglob->fg_flag, &context);
3684 vnode_put(vp);
3685 fp_free(p, indx, fp);
3686
3687 return (error);
3688 }
3689
3690 /*
3691 * While most of the *at syscall handlers can call nameiat() which
3692 * is a wrapper around namei, the use of namei and initialisation
3693 * of nameidata are far removed and in different functions - namei
3694 * gets called in vn_open_auth for open1. So we'll just do here what
3695 * nameiat() does.
3696 */
3697 static int
3698 open1at(vfs_context_t ctx, struct nameidata *ndp, int uflags,
3699 struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra, int32_t *retval,
3700 int dirfd)
3701 {
3702 if ((dirfd != AT_FDCWD) && !(ndp->ni_cnd.cn_flags & USEDVP)) {
3703 int error;
3704 char c;
3705
3706 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
3707 error = copyin(ndp->ni_dirp, &c, sizeof(char));
3708 if (error)
3709 return (error);
3710 } else {
3711 c = *((char *)(ndp->ni_dirp));
3712 }
3713
3714 if (c != '/') {
3715 vnode_t dvp_at;
3716
3717 error = vnode_getfromfd(ndp->ni_cnd.cn_context, dirfd,
3718 &dvp_at);
3719 if (error)
3720 return (error);
3721
3722 if (vnode_vtype(dvp_at) != VDIR) {
3723 vnode_put(dvp_at);
3724 return (ENOTDIR);
3725 }
3726
3727 ndp->ni_dvp = dvp_at;
3728 ndp->ni_cnd.cn_flags |= USEDVP;
3729 error = open1(ctx, ndp, uflags, vap, fp_zalloc, cra,
3730 retval);
3731 vnode_put(dvp_at);
3732 return (error);
3733 }
3734 }
3735
3736 return (open1(ctx, ndp, uflags, vap, fp_zalloc, cra, retval));
3737 }
3738
3739 /*
3740 * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)).
3741 *
3742 * Parameters: p Process requesting the open
3743 * uap User argument descriptor (see below)
3744 * retval Pointer to an area to receive the
3745 * return calue from the system call
3746 *
3747 * Indirect: uap->path Path to open (same as 'open')
3748 * uap->flags Flags to open (same as 'open'
3749 * uap->uid UID to set, if creating
3750 * uap->gid GID to set, if creating
3751 * uap->mode File mode, if creating (same as 'open')
3752 * uap->xsecurity ACL to set, if creating
3753 *
3754 * Returns: 0 Success
3755 * !0 errno value
3756 *
3757 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
3758 *
3759 * XXX: We should enummerate the possible errno values here, and where
3760 * in the code they originated.
3761 */
3762 int
3763 open_extended(proc_t p, struct open_extended_args *uap, int32_t *retval)
3764 {
3765 struct filedesc *fdp = p->p_fd;
3766 int ciferror;
3767 kauth_filesec_t xsecdst;
3768 struct vnode_attr va;
3769 struct nameidata nd;
3770 int cmode;
3771
3772 AUDIT_ARG(owner, uap->uid, uap->gid);
3773
3774 xsecdst = NULL;
3775 if ((uap->xsecurity != USER_ADDR_NULL) &&
3776 ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
3777 return ciferror;
3778
3779 VATTR_INIT(&va);
3780 cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3781 VATTR_SET(&va, va_mode, cmode);
3782 if (uap->uid != KAUTH_UID_NONE)
3783 VATTR_SET(&va, va_uid, uap->uid);
3784 if (uap->gid != KAUTH_GID_NONE)
3785 VATTR_SET(&va, va_gid, uap->gid);
3786 if (xsecdst != NULL)
3787 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
3788
3789 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
3790 uap->path, vfs_context_current());
3791
3792 ciferror = open1(vfs_context_current(), &nd, uap->flags, &va,
3793 fileproc_alloc_init, NULL, retval);
3794 if (xsecdst != NULL)
3795 kauth_filesec_free(xsecdst);
3796
3797 return ciferror;
3798 }
3799
3800 /*
3801 * Go through the data-protected atomically controlled open (2)
3802 *
3803 * int open_dprotected_np(user_addr_t path, int flags, int class, int dpflags, int mode)
3804 */
3805 int open_dprotected_np (__unused proc_t p, struct open_dprotected_np_args *uap, int32_t *retval) {
3806 int flags = uap->flags;
3807 int class = uap->class;
3808 int dpflags = uap->dpflags;
3809
3810 /*
3811 * Follow the same path as normal open(2)
3812 * Look up the item if it exists, and acquire the vnode.
3813 */
3814 struct filedesc *fdp = p->p_fd;
3815 struct vnode_attr va;
3816 struct nameidata nd;
3817 int cmode;
3818 int error;
3819
3820 VATTR_INIT(&va);
3821 /* Mask off all but regular access permissions */
3822 cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3823 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
3824
3825 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
3826 uap->path, vfs_context_current());
3827
3828 /*
3829 * Initialize the extra fields in vnode_attr to pass down our
3830 * extra fields.
3831 * 1. target cprotect class.
3832 * 2. set a flag to mark it as requiring open-raw-encrypted semantics.
3833 */
3834 if (flags & O_CREAT) {
3835 /* lower level kernel code validates that the class is valid before applying it. */
3836 if (class != PROTECTION_CLASS_DEFAULT) {
3837 /*
3838 * PROTECTION_CLASS_DEFAULT implies that we make the class for this
3839 * file behave the same as open (2)
3840 */
3841 VATTR_SET(&va, va_dataprotect_class, class);
3842 }
3843 }
3844
3845 if (dpflags & (O_DP_GETRAWENCRYPTED|O_DP_GETRAWUNENCRYPTED)) {
3846 if ( flags & (O_RDWR | O_WRONLY)) {
3847 /* Not allowed to write raw encrypted bytes */
3848 return EINVAL;
3849 }
3850 if (uap->dpflags & O_DP_GETRAWENCRYPTED) {
3851 VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED);
3852 }
3853 if (uap->dpflags & O_DP_GETRAWUNENCRYPTED) {
3854 VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWUNENCRYPTED);
3855 }
3856 }
3857
3858 error = open1(vfs_context_current(), &nd, uap->flags, &va,
3859 fileproc_alloc_init, NULL, retval);
3860
3861 return error;
3862 }
3863
3864 static int
3865 openat_internal(vfs_context_t ctx, user_addr_t path, int flags, int mode,
3866 int fd, enum uio_seg segflg, int *retval)
3867 {
3868 struct filedesc *fdp = (vfs_context_proc(ctx))->p_fd;
3869 struct vnode_attr va;
3870 struct nameidata nd;
3871 int cmode;
3872
3873 VATTR_INIT(&va);
3874 /* Mask off all but regular access permissions */
3875 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3876 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
3877
3878 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1,
3879 segflg, path, ctx);
3880
3881 return (open1at(ctx, &nd, flags, &va, fileproc_alloc_init, NULL,
3882 retval, fd));
3883 }
3884
3885 int
3886 open(proc_t p, struct open_args *uap, int32_t *retval)
3887 {
3888 __pthread_testcancel(1);
3889 return(open_nocancel(p, (struct open_nocancel_args *)uap, retval));
3890 }
3891
3892 int
3893 open_nocancel(__unused proc_t p, struct open_nocancel_args *uap,
3894 int32_t *retval)
3895 {
3896 return (openat_internal(vfs_context_current(), uap->path, uap->flags,
3897 uap->mode, AT_FDCWD, UIO_USERSPACE, retval));
3898 }
3899
3900 int
3901 openat_nocancel(__unused proc_t p, struct openat_nocancel_args *uap,
3902 int32_t *retval)
3903 {
3904 return (openat_internal(vfs_context_current(), uap->path, uap->flags,
3905 uap->mode, uap->fd, UIO_USERSPACE, retval));
3906 }
3907
3908 int
3909 openat(proc_t p, struct openat_args *uap, int32_t *retval)
3910 {
3911 __pthread_testcancel(1);
3912 return(openat_nocancel(p, (struct openat_nocancel_args *)uap, retval));
3913 }
3914
3915 /*
3916 * openbyid_np: open a file given a file system id and a file system object id
3917 * the hfs file system object id is an fsobj_id_t {uint32, uint32}
3918 * file systems that don't support object ids it is a node id (uint64_t).
3919 *
3920 * Parameters: p Process requesting the open
3921 * uap User argument descriptor (see below)
3922 * retval Pointer to an area to receive the
3923 * return calue from the system call
3924 *
3925 * Indirect: uap->path Path to open (same as 'open')
3926 *
3927 * uap->fsid id of target file system
3928 * uap->objid id of target file system object
3929 * uap->flags Flags to open (same as 'open')
3930 *
3931 * Returns: 0 Success
3932 * !0 errno value
3933 *
3934 *
3935 * XXX: We should enummerate the possible errno values here, and where
3936 * in the code they originated.
3937 */
3938 int
3939 openbyid_np(__unused proc_t p, struct openbyid_np_args *uap, int *retval)
3940 {
3941 fsid_t fsid;
3942 uint64_t objid;
3943 int error;
3944 char *buf = NULL;
3945 int buflen = MAXPATHLEN;
3946 int pathlen = 0;
3947 vfs_context_t ctx = vfs_context_current();
3948
3949 if ((error = priv_check_cred(vfs_context_ucred(ctx), PRIV_VFS_OPEN_BY_ID, 0))) {
3950 return (error);
3951 }
3952
3953 if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
3954 return (error);
3955 }
3956
3957 /*uap->obj is an fsobj_id_t defined as struct {uint32_t, uint32_t} */
3958 if ((error = copyin(uap->objid, (caddr_t)&objid, sizeof(uint64_t)))) {
3959 return (error);
3960 }
3961
3962 AUDIT_ARG(value32, fsid.val[0]);
3963 AUDIT_ARG(value64, objid);
3964
3965 /*resolve path from fsis, objid*/
3966 do {
3967 MALLOC(buf, char *, buflen + 1, M_TEMP, M_WAITOK);
3968 if (buf == NULL) {
3969 return (ENOMEM);
3970 }
3971
3972 error = fsgetpath_internal(
3973 ctx, fsid.val[0], objid,
3974 buflen, buf, &pathlen);
3975
3976 if (error) {
3977 FREE(buf, M_TEMP);
3978 buf = NULL;
3979 }
3980 } while (error == ENOSPC && (buflen += MAXPATHLEN));
3981
3982 if (error) {
3983 return error;
3984 }
3985
3986 buf[pathlen] = 0;
3987
3988 error = openat_internal(
3989 ctx, (user_addr_t)buf, uap->oflags, 0, AT_FDCWD, UIO_SYSSPACE, retval);
3990
3991 FREE(buf, M_TEMP);
3992
3993 return error;
3994 }
3995
3996
3997 /*
3998 * Create a special file.
3999 */
4000 static int mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap);
4001
4002 int
4003 mknod(proc_t p, struct mknod_args *uap, __unused int32_t *retval)
4004 {
4005 struct vnode_attr va;
4006 vfs_context_t ctx = vfs_context_current();
4007 int error;
4008 struct nameidata nd;
4009 vnode_t vp, dvp;
4010
4011 VATTR_INIT(&va);
4012 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
4013 VATTR_SET(&va, va_rdev, uap->dev);
4014
4015 /* If it's a mknod() of a FIFO, call mkfifo1() instead */
4016 if ((uap->mode & S_IFMT) == S_IFIFO)
4017 return(mkfifo1(ctx, uap->path, &va));
4018
4019 AUDIT_ARG(mode, uap->mode);
4020 AUDIT_ARG(value32, uap->dev);
4021
4022 if ((error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
4023 return (error);
4024 NDINIT(&nd, CREATE, OP_MKNOD, LOCKPARENT | AUDITVNPATH1,
4025 UIO_USERSPACE, uap->path, ctx);
4026 error = namei(&nd);
4027 if (error)
4028 return (error);
4029 dvp = nd.ni_dvp;
4030 vp = nd.ni_vp;
4031
4032 if (vp != NULL) {
4033 error = EEXIST;
4034 goto out;
4035 }
4036
4037 switch (uap->mode & S_IFMT) {
4038 case S_IFCHR:
4039 VATTR_SET(&va, va_type, VCHR);
4040 break;
4041 case S_IFBLK:
4042 VATTR_SET(&va, va_type, VBLK);
4043 break;
4044 default:
4045 error = EINVAL;
4046 goto out;
4047 }
4048
4049 #if CONFIG_MACF
4050 error = mac_vnode_check_create(ctx,
4051 nd.ni_dvp, &nd.ni_cnd, &va);
4052 if (error)
4053 goto out;
4054 #endif
4055
4056 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
4057 goto out;
4058
4059 if ((error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx)) != 0)
4060 goto out;
4061
4062 if (vp) {
4063 int update_flags = 0;
4064
4065 // Make sure the name & parent pointers are hooked up
4066 if (vp->v_name == NULL)
4067 update_flags |= VNODE_UPDATE_NAME;
4068 if (vp->v_parent == NULLVP)
4069 update_flags |= VNODE_UPDATE_PARENT;
4070
4071 if (update_flags)
4072 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
4073
4074 #if CONFIG_FSE
4075 add_fsevent(FSE_CREATE_FILE, ctx,
4076 FSE_ARG_VNODE, vp,
4077 FSE_ARG_DONE);
4078 #endif
4079 }
4080
4081 out:
4082 /*
4083 * nameidone has to happen before we vnode_put(dvp)
4084 * since it may need to release the fs_nodelock on the dvp
4085 */
4086 nameidone(&nd);
4087
4088 if (vp)
4089 vnode_put(vp);
4090 vnode_put(dvp);
4091
4092 return (error);
4093 }
4094
4095 /*
4096 * Create a named pipe.
4097 *
4098 * Returns: 0 Success
4099 * EEXIST
4100 * namei:???
4101 * vnode_authorize:???
4102 * vn_create:???
4103 */
4104 static int
4105 mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap)
4106 {
4107 vnode_t vp, dvp;
4108 int error;
4109 struct nameidata nd;
4110
4111 NDINIT(&nd, CREATE, OP_MKFIFO, LOCKPARENT | AUDITVNPATH1,
4112 UIO_USERSPACE, upath, ctx);
4113 error = namei(&nd);
4114 if (error)
4115 return (error);
4116 dvp = nd.ni_dvp;
4117 vp = nd.ni_vp;
4118
4119 /* check that this is a new file and authorize addition */
4120 if (vp != NULL) {
4121 error = EEXIST;
4122 goto out;
4123 }
4124 VATTR_SET(vap, va_type, VFIFO);
4125
4126 if ((error = vn_authorize_create(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0)
4127 goto out;
4128
4129 error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx);
4130 out:
4131 /*
4132 * nameidone has to happen before we vnode_put(dvp)
4133 * since it may need to release the fs_nodelock on the dvp
4134 */
4135 nameidone(&nd);
4136
4137 if (vp)
4138 vnode_put(vp);
4139 vnode_put(dvp);
4140
4141 return error;
4142 }
4143
4144
4145 /*
4146 * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)).
4147 *
4148 * Parameters: p Process requesting the open
4149 * uap User argument descriptor (see below)
4150 * retval (Ignored)
4151 *
4152 * Indirect: uap->path Path to fifo (same as 'mkfifo')
4153 * uap->uid UID to set
4154 * uap->gid GID to set
4155 * uap->mode File mode to set (same as 'mkfifo')
4156 * uap->xsecurity ACL to set, if creating
4157 *
4158 * Returns: 0 Success
4159 * !0 errno value
4160 *
4161 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
4162 *
4163 * XXX: We should enummerate the possible errno values here, and where
4164 * in the code they originated.
4165 */
4166 int
4167 mkfifo_extended(proc_t p, struct mkfifo_extended_args *uap, __unused int32_t *retval)
4168 {
4169 int ciferror;
4170 kauth_filesec_t xsecdst;
4171 struct vnode_attr va;
4172
4173 AUDIT_ARG(owner, uap->uid, uap->gid);
4174
4175 xsecdst = KAUTH_FILESEC_NONE;
4176 if (uap->xsecurity != USER_ADDR_NULL) {
4177 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
4178 return ciferror;
4179 }
4180
4181 VATTR_INIT(&va);
4182 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
4183 if (uap->uid != KAUTH_UID_NONE)
4184 VATTR_SET(&va, va_uid, uap->uid);
4185 if (uap->gid != KAUTH_GID_NONE)
4186 VATTR_SET(&va, va_gid, uap->gid);
4187 if (xsecdst != KAUTH_FILESEC_NONE)
4188 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
4189
4190 ciferror = mkfifo1(vfs_context_current(), uap->path, &va);
4191
4192 if (xsecdst != KAUTH_FILESEC_NONE)
4193 kauth_filesec_free(xsecdst);
4194 return ciferror;
4195 }
4196
4197 /* ARGSUSED */
4198 int
4199 mkfifo(proc_t p, struct mkfifo_args *uap, __unused int32_t *retval)
4200 {
4201 struct vnode_attr va;
4202
4203 VATTR_INIT(&va);
4204 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
4205
4206 return(mkfifo1(vfs_context_current(), uap->path, &va));
4207 }
4208
4209
4210 static char *
4211 my_strrchr(char *p, int ch)
4212 {
4213 char *save;
4214
4215 for (save = NULL;; ++p) {
4216 if (*p == ch)
4217 save = p;
4218 if (!*p)
4219 return(save);
4220 }
4221 /* NOTREACHED */
4222 }
4223
4224 extern int safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path);
4225
4226 int
4227 safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path)
4228 {
4229 int ret, len = _len;
4230
4231 *truncated_path = 0;
4232 ret = vn_getpath(dvp, path, &len);
4233 if (ret == 0 && len < (MAXPATHLEN - 1)) {
4234 if (leafname) {
4235 path[len-1] = '/';
4236 len += strlcpy(&path[len], leafname, MAXPATHLEN-len) + 1;
4237 if (len > MAXPATHLEN) {
4238 char *ptr;
4239
4240 // the string got truncated!
4241 *truncated_path = 1;
4242 ptr = my_strrchr(path, '/');
4243 if (ptr) {
4244 *ptr = '\0'; // chop off the string at the last directory component
4245 }
4246 len = strlen(path) + 1;
4247 }
4248 }
4249 } else if (ret == 0) {
4250 *truncated_path = 1;
4251 } else if (ret != 0) {
4252 struct vnode *mydvp=dvp;
4253
4254 if (ret != ENOSPC) {
4255 printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n",
4256 dvp, dvp->v_name ? dvp->v_name : "no-name", ret);
4257 }
4258 *truncated_path = 1;
4259
4260 do {
4261 if (mydvp->v_parent != NULL) {
4262 mydvp = mydvp->v_parent;
4263 } else if (mydvp->v_mount) {
4264 strlcpy(path, mydvp->v_mount->mnt_vfsstat.f_mntonname, _len);
4265 break;
4266 } else {
4267 // no parent and no mount point? only thing is to punt and say "/" changed
4268 strlcpy(path, "/", _len);
4269 len = 2;
4270 mydvp = NULL;
4271 }
4272
4273 if (mydvp == NULL) {
4274 break;
4275 }
4276
4277 len = _len;
4278 ret = vn_getpath(mydvp, path, &len);
4279 } while (ret == ENOSPC);
4280 }
4281
4282 return len;
4283 }
4284
4285
4286 /*
4287 * Make a hard file link.
4288 *
4289 * Returns: 0 Success
4290 * EPERM
4291 * EEXIST
4292 * EXDEV
4293 * namei:???
4294 * vnode_authorize:???
4295 * VNOP_LINK:???
4296 */
4297 /* ARGSUSED */
4298 static int
4299 linkat_internal(vfs_context_t ctx, int fd1, user_addr_t path, int fd2,
4300 user_addr_t link, int flag, enum uio_seg segflg)
4301 {
4302 vnode_t vp, dvp, lvp;
4303 struct nameidata nd;
4304 int follow;
4305 int error;
4306 #if CONFIG_FSE
4307 fse_info finfo;
4308 #endif
4309 int need_event, has_listeners;
4310 char *target_path = NULL;
4311 int truncated=0;
4312
4313 vp = dvp = lvp = NULLVP;
4314
4315 /* look up the object we are linking to */
4316 follow = (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW;
4317 NDINIT(&nd, LOOKUP, OP_LOOKUP, AUDITVNPATH1 | follow,
4318 segflg, path, ctx);
4319
4320 error = nameiat(&nd, fd1);
4321 if (error)
4322 return (error);
4323 vp = nd.ni_vp;
4324
4325 nameidone(&nd);
4326
4327 /*
4328 * Normally, linking to directories is not supported.
4329 * However, some file systems may have limited support.
4330 */
4331 if (vp->v_type == VDIR) {
4332 if (!ISSET(vp->v_mount->mnt_kern_flag, MNTK_DIR_HARDLINKS)) {
4333 error = EPERM; /* POSIX */
4334 goto out;
4335 }
4336
4337 /* Linking to a directory requires ownership. */
4338 if (!kauth_cred_issuser(vfs_context_ucred(ctx))) {
4339 struct vnode_attr dva;
4340
4341 VATTR_INIT(&dva);
4342 VATTR_WANTED(&dva, va_uid);
4343 if (vnode_getattr(vp, &dva, ctx) != 0 ||
4344 !VATTR_IS_SUPPORTED(&dva, va_uid) ||
4345 (dva.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)))) {
4346 error = EACCES;
4347 goto out;
4348 }
4349 }
4350 }
4351
4352 /* lookup the target node */
4353 #if CONFIG_TRIGGERS
4354 nd.ni_op = OP_LINK;
4355 #endif
4356 nd.ni_cnd.cn_nameiop = CREATE;
4357 nd.ni_cnd.cn_flags = LOCKPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK;
4358 nd.ni_dirp = link;
4359 error = nameiat(&nd, fd2);
4360 if (error != 0)
4361 goto out;
4362 dvp = nd.ni_dvp;
4363 lvp = nd.ni_vp;
4364
4365 #if CONFIG_MACF
4366 if ((error = mac_vnode_check_link(ctx, dvp, vp, &nd.ni_cnd)) != 0)
4367 goto out2;
4368 #endif
4369
4370 /* or to anything that kauth doesn't want us to (eg. immutable items) */
4371 if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_LINKTARGET, ctx)) != 0)
4372 goto out2;
4373
4374 /* target node must not exist */
4375 if (lvp != NULLVP) {
4376 error = EEXIST;
4377 goto out2;
4378 }
4379 /* cannot link across mountpoints */
4380 if (vnode_mount(vp) != vnode_mount(dvp)) {
4381 error = EXDEV;
4382 goto out2;
4383 }
4384
4385 /* authorize creation of the target note */
4386 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
4387 goto out2;
4388
4389 /* and finally make the link */
4390 error = VNOP_LINK(vp, dvp, &nd.ni_cnd, ctx);
4391 if (error)
4392 goto out2;
4393
4394 #if CONFIG_MACF
4395 (void)mac_vnode_notify_link(ctx, vp, dvp, &nd.ni_cnd);
4396 #endif
4397
4398 #if CONFIG_FSE
4399 need_event = need_fsevent(FSE_CREATE_FILE, dvp);
4400 #else
4401 need_event = 0;
4402 #endif
4403 has_listeners = kauth_authorize_fileop_has_listeners();
4404
4405 if (need_event || has_listeners) {
4406 char *link_to_path = NULL;
4407 int len, link_name_len;
4408
4409 /* build the path to the new link file */
4410 GET_PATH(target_path);
4411 if (target_path == NULL) {
4412 error = ENOMEM;
4413 goto out2;
4414 }
4415
4416 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, target_path, MAXPATHLEN, &truncated);
4417
4418 if (has_listeners) {
4419 /* build the path to file we are linking to */
4420 GET_PATH(link_to_path);
4421 if (link_to_path == NULL) {
4422 error = ENOMEM;
4423 goto out2;
4424 }
4425
4426 link_name_len = MAXPATHLEN;
4427 if (vn_getpath(vp, link_to_path, &link_name_len) == 0) {
4428 /*
4429 * Call out to allow 3rd party notification of rename.
4430 * Ignore result of kauth_authorize_fileop call.
4431 */
4432 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_LINK,
4433 (uintptr_t)link_to_path,
4434 (uintptr_t)target_path);
4435 }
4436 if (link_to_path != NULL) {
4437 RELEASE_PATH(link_to_path);
4438 }
4439 }
4440 #if CONFIG_FSE
4441 if (need_event) {
4442 /* construct fsevent */
4443 if (get_fse_info(vp, &finfo, ctx) == 0) {
4444 if (truncated) {
4445 finfo.mode |= FSE_TRUNCATED_PATH;
4446 }
4447
4448 // build the path to the destination of the link
4449 add_fsevent(FSE_CREATE_FILE, ctx,
4450 FSE_ARG_STRING, len, target_path,
4451 FSE_ARG_FINFO, &finfo,
4452 FSE_ARG_DONE);
4453 }
4454 if (vp->v_parent) {
4455 add_fsevent(FSE_STAT_CHANGED, ctx,
4456 FSE_ARG_VNODE, vp->v_parent,
4457 FSE_ARG_DONE);
4458 }
4459 }
4460 #endif
4461 }
4462 out2:
4463 /*
4464 * nameidone has to happen before we vnode_put(dvp)
4465 * since it may need to release the fs_nodelock on the dvp
4466 */
4467 nameidone(&nd);
4468 if (target_path != NULL) {
4469 RELEASE_PATH(target_path);
4470 }
4471 out:
4472 if (lvp)
4473 vnode_put(lvp);
4474 if (dvp)
4475 vnode_put(dvp);
4476 vnode_put(vp);
4477 return (error);
4478 }
4479
4480 int
4481 link(__unused proc_t p, struct link_args *uap, __unused int32_t *retval)
4482 {
4483 return (linkat_internal(vfs_context_current(), AT_FDCWD, uap->path,
4484 AT_FDCWD, uap->link, AT_SYMLINK_FOLLOW, UIO_USERSPACE));
4485 }
4486
4487 int
4488 linkat(__unused proc_t p, struct linkat_args *uap, __unused int32_t *retval)
4489 {
4490 if (uap->flag & ~AT_SYMLINK_FOLLOW)
4491 return (EINVAL);
4492
4493 return (linkat_internal(vfs_context_current(), uap->fd1, uap->path,
4494 uap->fd2, uap->link, uap->flag, UIO_USERSPACE));
4495 }
4496
4497 /*
4498 * Make a symbolic link.
4499 *
4500 * We could add support for ACLs here too...
4501 */
4502 /* ARGSUSED */
4503 static int
4504 symlinkat_internal(vfs_context_t ctx, user_addr_t path_data, int fd,
4505 user_addr_t link, enum uio_seg segflg)
4506 {
4507 struct vnode_attr va;
4508 char *path;
4509 int error;
4510 struct nameidata nd;
4511 vnode_t vp, dvp;
4512 size_t dummy=0;
4513 proc_t p;
4514
4515 error = 0;
4516 if (UIO_SEG_IS_USER_SPACE(segflg)) {
4517 MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
4518 error = copyinstr(path_data, path, MAXPATHLEN, &dummy);
4519 } else {
4520 path = (char *)path_data;
4521 }
4522 if (error)
4523 goto out;
4524 AUDIT_ARG(text, path); /* This is the link string */
4525
4526 NDINIT(&nd, CREATE, OP_SYMLINK, LOCKPARENT | AUDITVNPATH1,
4527 segflg, link, ctx);
4528
4529 error = nameiat(&nd, fd);
4530 if (error)
4531 goto out;
4532 dvp = nd.ni_dvp;
4533 vp = nd.ni_vp;
4534
4535 p = vfs_context_proc(ctx);
4536 VATTR_INIT(&va);
4537 VATTR_SET(&va, va_type, VLNK);
4538 VATTR_SET(&va, va_mode, ACCESSPERMS & ~p->p_fd->fd_cmask);
4539
4540 #if CONFIG_MACF
4541 error = mac_vnode_check_create(ctx,
4542 dvp, &nd.ni_cnd, &va);
4543 #endif
4544 if (error != 0) {
4545 goto skipit;
4546 }
4547
4548 if (vp != NULL) {
4549 error = EEXIST;
4550 goto skipit;
4551 }
4552
4553 /* authorize */
4554 if (error == 0)
4555 error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
4556 /* get default ownership, etc. */
4557 if (error == 0)
4558 error = vnode_authattr_new(dvp, &va, 0, ctx);
4559 if (error == 0)
4560 error = VNOP_SYMLINK(dvp, &vp, &nd.ni_cnd, &va, path, ctx);
4561
4562 #if CONFIG_MACF
4563 if (error == 0 && vp)
4564 error = vnode_label(vnode_mount(vp), dvp, vp, &nd.ni_cnd, VNODE_LABEL_CREATE, ctx);
4565 #endif
4566
4567 /* do fallback attribute handling */
4568 if (error == 0 && vp)
4569 error = vnode_setattr_fallback(vp, &va, ctx);
4570
4571 if (error == 0) {
4572 int update_flags = 0;
4573
4574 /*check if a new vnode was created, else try to get one*/
4575 if (vp == NULL) {
4576 nd.ni_cnd.cn_nameiop = LOOKUP;
4577 #if CONFIG_TRIGGERS
4578 nd.ni_op = OP_LOOKUP;
4579 #endif
4580 nd.ni_cnd.cn_flags = 0;
4581 error = nameiat(&nd, fd);
4582 vp = nd.ni_vp;
4583
4584 if (vp == NULL)
4585 goto skipit;
4586 }
4587
4588 #if 0 /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
4589 /* call out to allow 3rd party notification of rename.
4590 * Ignore result of kauth_authorize_fileop call.
4591 */
4592 if (kauth_authorize_fileop_has_listeners() &&
4593 namei(&nd) == 0) {
4594 char *new_link_path = NULL;
4595 int len;
4596
4597 /* build the path to the new link file */
4598 new_link_path = get_pathbuff();
4599 len = MAXPATHLEN;
4600 vn_getpath(dvp, new_link_path, &len);
4601 if ((len + 1 + nd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) {
4602 new_link_path[len - 1] = '/';
4603 strlcpy(&new_link_path[len], nd.ni_cnd.cn_nameptr, MAXPATHLEN-len);
4604 }
4605
4606 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_SYMLINK,
4607 (uintptr_t)path, (uintptr_t)new_link_path);
4608 if (new_link_path != NULL)
4609 release_pathbuff(new_link_path);
4610 }
4611 #endif
4612 // Make sure the name & parent pointers are hooked up
4613 if (vp->v_name == NULL)
4614 update_flags |= VNODE_UPDATE_NAME;
4615 if (vp->v_parent == NULLVP)
4616 update_flags |= VNODE_UPDATE_PARENT;
4617
4618 if (update_flags)
4619 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
4620
4621 #if CONFIG_FSE
4622 add_fsevent(FSE_CREATE_FILE, ctx,
4623 FSE_ARG_VNODE, vp,
4624 FSE_ARG_DONE);
4625 #endif
4626 }
4627
4628 skipit:
4629 /*
4630 * nameidone has to happen before we vnode_put(dvp)
4631 * since it may need to release the fs_nodelock on the dvp
4632 */
4633 nameidone(&nd);
4634
4635 if (vp)
4636 vnode_put(vp);
4637 vnode_put(dvp);
4638 out:
4639 if (path && (path != (char *)path_data))
4640 FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
4641
4642 return (error);
4643 }
4644
4645 int
4646 symlink(__unused proc_t p, struct symlink_args *uap, __unused int32_t *retval)
4647 {
4648 return (symlinkat_internal(vfs_context_current(), uap->path, AT_FDCWD,
4649 uap->link, UIO_USERSPACE));
4650 }
4651
4652 int
4653 symlinkat(__unused proc_t p, struct symlinkat_args *uap,
4654 __unused int32_t *retval)
4655 {
4656 return (symlinkat_internal(vfs_context_current(), uap->path1, uap->fd,
4657 uap->path2, UIO_USERSPACE));
4658 }
4659
4660 /*
4661 * Delete a whiteout from the filesystem.
4662 * No longer supported.
4663 */
4664 int
4665 undelete(__unused proc_t p, __unused struct undelete_args *uap, __unused int32_t *retval)
4666 {
4667 return (ENOTSUP);
4668 }
4669
4670 /*
4671 * Delete a name from the filesystem.
4672 */
4673 /* ARGSUSED */
4674 static int
4675 unlinkat_internal(vfs_context_t ctx, int fd, vnode_t start_dvp,
4676 user_addr_t path_arg, enum uio_seg segflg, int unlink_flags)
4677 {
4678 struct nameidata nd;
4679 vnode_t vp, dvp;
4680 int error;
4681 struct componentname *cnp;
4682 char *path = NULL;
4683 int len=0;
4684 #if CONFIG_FSE
4685 fse_info finfo;
4686 struct vnode_attr va;
4687 #endif
4688 int flags;
4689 int need_event;
4690 int has_listeners;
4691 int truncated_path;
4692 int batched;
4693 struct vnode_attr *vap;
4694 int do_retry;
4695 int retry_count = 0;
4696 int cn_flags;
4697
4698 cn_flags = LOCKPARENT;
4699 if (!(unlink_flags & VNODE_REMOVE_NO_AUDIT_PATH))
4700 cn_flags |= AUDITVNPATH1;
4701 /* If a starting dvp is passed, it trumps any fd passed. */
4702 if (start_dvp)
4703 cn_flags |= USEDVP;
4704
4705 #if NAMEDRSRCFORK
4706 /* unlink or delete is allowed on rsrc forks and named streams */
4707 cn_flags |= CN_ALLOWRSRCFORK;
4708 #endif
4709
4710 retry:
4711 do_retry = 0;
4712 flags = 0;
4713 need_event = 0;
4714 has_listeners = 0;
4715 truncated_path = 0;
4716 vap = NULL;
4717
4718 NDINIT(&nd, DELETE, OP_UNLINK, cn_flags, segflg, path_arg, ctx);
4719
4720 nd.ni_dvp = start_dvp;
4721 nd.ni_flag |= NAMEI_COMPOUNDREMOVE;
4722 cnp = &nd.ni_cnd;
4723
4724 continue_lookup:
4725 error = nameiat(&nd, fd);
4726 if (error)
4727 return (error);
4728
4729 dvp = nd.ni_dvp;
4730 vp = nd.ni_vp;
4731
4732
4733 /* With Carbon delete semantics, busy files cannot be deleted */
4734 if (unlink_flags & VNODE_REMOVE_NODELETEBUSY) {
4735 flags |= VNODE_REMOVE_NODELETEBUSY;
4736 }
4737
4738 /* Skip any potential upcalls if told to. */
4739 if (unlink_flags & VNODE_REMOVE_SKIP_NAMESPACE_EVENT) {
4740 flags |= VNODE_REMOVE_SKIP_NAMESPACE_EVENT;
4741 }
4742
4743 if (vp) {
4744 batched = vnode_compound_remove_available(vp);
4745 /*
4746 * The root of a mounted filesystem cannot be deleted.
4747 */
4748 if (vp->v_flag & VROOT) {
4749 error = EBUSY;
4750 }
4751
4752 #if DEVELOPMENT || DEBUG
4753 /*
4754 * XXX VSWAP: Check for entitlements or special flag here
4755 * so we can restrict access appropriately.
4756 */
4757 #else /* DEVELOPMENT || DEBUG */
4758
4759 if (vnode_isswap(vp) && (ctx != vfs_context_kernel())) {
4760 error = EPERM;
4761 goto out;
4762 }
4763 #endif /* DEVELOPMENT || DEBUG */
4764
4765 if (!batched) {
4766 error = vn_authorize_unlink(dvp, vp, cnp, ctx, NULL);
4767 if (error) {
4768 if (error == ENOENT) {
4769 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
4770 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
4771 do_retry = 1;
4772 retry_count++;
4773 }
4774 }
4775 goto out;
4776 }
4777 }
4778 } else {
4779 batched = 1;
4780
4781 if (!vnode_compound_remove_available(dvp)) {
4782 panic("No vp, but no compound remove?");
4783 }
4784 }
4785
4786 #if CONFIG_FSE
4787 need_event = need_fsevent(FSE_DELETE, dvp);
4788 if (need_event) {
4789 if (!batched) {
4790 if ((vp->v_flag & VISHARDLINK) == 0) {
4791 /* XXX need to get these data in batched VNOP */
4792 get_fse_info(vp, &finfo, ctx);
4793 }
4794 } else {
4795 error = vfs_get_notify_attributes(&va);
4796 if (error) {
4797 goto out;
4798 }
4799
4800 vap = &va;
4801 }
4802 }
4803 #endif
4804 has_listeners = kauth_authorize_fileop_has_listeners();
4805 if (need_event || has_listeners) {
4806 if (path == NULL) {
4807 GET_PATH(path);
4808 if (path == NULL) {
4809 error = ENOMEM;
4810 goto out;
4811 }
4812 }
4813 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated_path);
4814 }
4815
4816 #if NAMEDRSRCFORK
4817 if (nd.ni_cnd.cn_flags & CN_WANTSRSRCFORK)
4818 error = vnode_removenamedstream(dvp, vp, XATTR_RESOURCEFORK_NAME, 0, ctx);
4819 else
4820 #endif
4821 {
4822 error = vn_remove(dvp, &nd.ni_vp, &nd, flags, vap, ctx);
4823 vp = nd.ni_vp;
4824 if (error == EKEEPLOOKING) {
4825 if (!batched) {
4826 panic("EKEEPLOOKING, but not a filesystem that supports compound VNOPs?");
4827 }
4828
4829 if ((nd.ni_flag & NAMEI_CONTLOOKUP) == 0) {
4830 panic("EKEEPLOOKING, but continue flag not set?");
4831 }
4832
4833 if (vnode_isdir(vp)) {
4834 error = EISDIR;
4835 goto out;
4836 }
4837 goto continue_lookup;
4838 } else if (error == ENOENT && batched) {
4839 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
4840 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
4841 /*
4842 * For compound VNOPs, the authorization callback may
4843 * return ENOENT in case of racing hardlink lookups
4844 * hitting the name cache, redrive the lookup.
4845 */
4846 do_retry = 1;
4847 retry_count += 1;
4848 goto out;
4849 }
4850 }
4851 }
4852
4853 /*
4854 * Call out to allow 3rd party notification of delete.
4855 * Ignore result of kauth_authorize_fileop call.
4856 */
4857 if (!error) {
4858 if (has_listeners) {
4859 kauth_authorize_fileop(vfs_context_ucred(ctx),
4860 KAUTH_FILEOP_DELETE,
4861 (uintptr_t)vp,
4862 (uintptr_t)path);
4863 }
4864
4865 if (vp->v_flag & VISHARDLINK) {
4866 //
4867 // if a hardlink gets deleted we want to blow away the
4868 // v_parent link because the path that got us to this
4869 // instance of the link is no longer valid. this will
4870 // force the next call to get the path to ask the file
4871 // system instead of just following the v_parent link.
4872 //
4873 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
4874 }
4875
4876 #if CONFIG_FSE
4877 if (need_event) {
4878 if (vp->v_flag & VISHARDLINK) {
4879 get_fse_info(vp, &finfo, ctx);
4880 } else if (vap) {
4881 vnode_get_fse_info_from_vap(vp, &finfo, vap);
4882 }
4883 if (truncated_path) {
4884 finfo.mode |= FSE_TRUNCATED_PATH;
4885 }
4886 add_fsevent(FSE_DELETE, ctx,
4887 FSE_ARG_STRING, len, path,
4888 FSE_ARG_FINFO, &finfo,
4889 FSE_ARG_DONE);
4890 }
4891 #endif
4892 }
4893
4894 out:
4895 if (path != NULL)
4896 RELEASE_PATH(path);
4897
4898 #if NAMEDRSRCFORK
4899 /* recycle the deleted rsrc fork vnode to force a reclaim, which
4900 * will cause its shadow file to go away if necessary.
4901 */
4902 if (vp && (vnode_isnamedstream(vp)) &&
4903 (vp->v_parent != NULLVP) &&
4904 vnode_isshadow(vp)) {
4905 vnode_recycle(vp);
4906 }
4907 #endif
4908 /*
4909 * nameidone has to happen before we vnode_put(dvp)
4910 * since it may need to release the fs_nodelock on the dvp
4911 */
4912 nameidone(&nd);
4913 vnode_put(dvp);
4914 if (vp) {
4915 vnode_put(vp);
4916 }
4917
4918 if (do_retry) {
4919 goto retry;
4920 }
4921
4922 return (error);
4923 }
4924
4925 int
4926 unlink1(vfs_context_t ctx, vnode_t start_dvp, user_addr_t path_arg,
4927 enum uio_seg segflg, int unlink_flags)
4928 {
4929 return (unlinkat_internal(ctx, AT_FDCWD, start_dvp, path_arg, segflg,
4930 unlink_flags));
4931 }
4932
4933 /*
4934 * Delete a name from the filesystem using Carbon semantics.
4935 */
4936 int
4937 delete(__unused proc_t p, struct delete_args *uap, __unused int32_t *retval)
4938 {
4939 return (unlinkat_internal(vfs_context_current(), AT_FDCWD, NULLVP,
4940 uap->path, UIO_USERSPACE, VNODE_REMOVE_NODELETEBUSY));
4941 }
4942
4943 /*
4944 * Delete a name from the filesystem using POSIX semantics.
4945 */
4946 int
4947 unlink(__unused proc_t p, struct unlink_args *uap, __unused int32_t *retval)
4948 {
4949 return (unlinkat_internal(vfs_context_current(), AT_FDCWD, NULLVP,
4950 uap->path, UIO_USERSPACE, 0));
4951 }
4952
4953 int
4954 unlinkat(__unused proc_t p, struct unlinkat_args *uap, __unused int32_t *retval)
4955 {
4956 if (uap->flag & ~AT_REMOVEDIR)
4957 return (EINVAL);
4958
4959 if (uap->flag & AT_REMOVEDIR)
4960 return (rmdirat_internal(vfs_context_current(), uap->fd,
4961 uap->path, UIO_USERSPACE));
4962 else
4963 return (unlinkat_internal(vfs_context_current(), uap->fd,
4964 NULLVP, uap->path, UIO_USERSPACE, 0));
4965 }
4966
4967 /*
4968 * Reposition read/write file offset.
4969 */
4970 int
4971 lseek(proc_t p, struct lseek_args *uap, off_t *retval)
4972 {
4973 struct fileproc *fp;
4974 vnode_t vp;
4975 struct vfs_context *ctx;
4976 off_t offset = uap->offset, file_size;
4977 int error;
4978
4979 if ( (error = fp_getfvp(p,uap->fd, &fp, &vp)) ) {
4980 if (error == ENOTSUP)
4981 return (ESPIPE);
4982 return (error);
4983 }
4984 if (vnode_isfifo(vp)) {
4985 file_drop(uap->fd);
4986 return(ESPIPE);
4987 }
4988
4989
4990 ctx = vfs_context_current();
4991 #if CONFIG_MACF
4992 if (uap->whence == L_INCR && uap->offset == 0)
4993 error = mac_file_check_get_offset(vfs_context_ucred(ctx),
4994 fp->f_fglob);
4995 else
4996 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
4997 fp->f_fglob);
4998 if (error) {
4999 file_drop(uap->fd);
5000 return (error);
5001 }
5002 #endif
5003 if ( (error = vnode_getwithref(vp)) ) {
5004 file_drop(uap->fd);
5005 return(error);
5006 }
5007
5008 switch (uap->whence) {
5009 case L_INCR:
5010 offset += fp->f_fglob->fg_offset;
5011 break;
5012 case L_XTND:
5013 if ((error = vnode_size(vp, &file_size, ctx)) != 0)
5014 break;
5015 offset += file_size;
5016 break;
5017 case L_SET:
5018 break;
5019 case SEEK_HOLE:
5020 error = VNOP_IOCTL(vp, FSIOC_FIOSEEKHOLE, (caddr_t)&offset, 0, ctx);
5021 break;
5022 case SEEK_DATA:
5023 error = VNOP_IOCTL(vp, FSIOC_FIOSEEKDATA, (caddr_t)&offset, 0, ctx);
5024 break;
5025 default:
5026 error = EINVAL;
5027 }
5028 if (error == 0) {
5029 if (uap->offset > 0 && offset < 0) {
5030 /* Incremented/relative move past max size */
5031 error = EOVERFLOW;
5032 } else {
5033 /*
5034 * Allow negative offsets on character devices, per
5035 * POSIX 1003.1-2001. Most likely for writing disk
5036 * labels.
5037 */
5038 if (offset < 0 && vp->v_type != VCHR) {
5039 /* Decremented/relative move before start */
5040 error = EINVAL;
5041 } else {
5042 /* Success */
5043 fp->f_fglob->fg_offset = offset;
5044 *retval = fp->f_fglob->fg_offset;
5045 }
5046 }
5047 }
5048
5049 /*
5050 * An lseek can affect whether data is "available to read." Use
5051 * hint of NOTE_NONE so no EVFILT_VNODE events fire
5052 */
5053 post_event_if_success(vp, error, NOTE_NONE);
5054 (void)vnode_put(vp);
5055 file_drop(uap->fd);
5056 return (error);
5057 }
5058
5059
5060 /*
5061 * Check access permissions.
5062 *
5063 * Returns: 0 Success
5064 * vnode_authorize:???
5065 */
5066 static int
5067 access1(vnode_t vp, vnode_t dvp, int uflags, vfs_context_t ctx)
5068 {
5069 kauth_action_t action;
5070 int error;
5071
5072 /*
5073 * If just the regular access bits, convert them to something
5074 * that vnode_authorize will understand.
5075 */
5076 if (!(uflags & _ACCESS_EXTENDED_MASK)) {
5077 action = 0;
5078 if (uflags & R_OK)
5079 action |= KAUTH_VNODE_READ_DATA; /* aka KAUTH_VNODE_LIST_DIRECTORY */
5080 if (uflags & W_OK) {
5081 if (vnode_isdir(vp)) {
5082 action |= KAUTH_VNODE_ADD_FILE |
5083 KAUTH_VNODE_ADD_SUBDIRECTORY;
5084 /* might want delete rights here too */
5085 } else {
5086 action |= KAUTH_VNODE_WRITE_DATA;
5087 }
5088 }
5089 if (uflags & X_OK) {
5090 if (vnode_isdir(vp)) {
5091 action |= KAUTH_VNODE_SEARCH;
5092 } else {
5093 action |= KAUTH_VNODE_EXECUTE;
5094 }
5095 }
5096 } else {
5097 /* take advantage of definition of uflags */
5098 action = uflags >> 8;
5099 }
5100
5101 #if CONFIG_MACF
5102 error = mac_vnode_check_access(ctx, vp, uflags);
5103 if (error)
5104 return (error);
5105 #endif /* MAC */
5106
5107 /* action == 0 means only check for existence */
5108 if (action != 0) {
5109 error = vnode_authorize(vp, dvp, action | KAUTH_VNODE_ACCESS, ctx);
5110 } else {
5111 error = 0;
5112 }
5113
5114 return(error);
5115 }
5116
5117
5118
5119 /*
5120 * access_extended: Check access permissions in bulk.
5121 *
5122 * Description: uap->entries Pointer to an array of accessx
5123 * descriptor structs, plus one or
5124 * more NULL terminated strings (see
5125 * "Notes" section below).
5126 * uap->size Size of the area pointed to by
5127 * uap->entries.
5128 * uap->results Pointer to the results array.
5129 *
5130 * Returns: 0 Success
5131 * ENOMEM Insufficient memory
5132 * EINVAL Invalid arguments
5133 * namei:EFAULT Bad address
5134 * namei:ENAMETOOLONG Filename too long
5135 * namei:ENOENT No such file or directory
5136 * namei:ELOOP Too many levels of symbolic links
5137 * namei:EBADF Bad file descriptor
5138 * namei:ENOTDIR Not a directory
5139 * namei:???
5140 * access1:
5141 *
5142 * Implicit returns:
5143 * uap->results Array contents modified
5144 *
5145 * Notes: The uap->entries are structured as an arbitrary length array
5146 * of accessx descriptors, followed by one or more NULL terminated
5147 * strings
5148 *
5149 * struct accessx_descriptor[0]
5150 * ...
5151 * struct accessx_descriptor[n]
5152 * char name_data[0];
5153 *
5154 * We determine the entry count by walking the buffer containing
5155 * the uap->entries argument descriptor. For each descriptor we
5156 * see, the valid values for the offset ad_name_offset will be
5157 * in the byte range:
5158 *
5159 * [ uap->entries + sizeof(struct accessx_descriptor) ]
5160 * to
5161 * [ uap->entries + uap->size - 2 ]
5162 *
5163 * since we must have at least one string, and the string must
5164 * be at least one character plus the NULL terminator in length.
5165 *
5166 * XXX: Need to support the check-as uid argument
5167 */
5168 int
5169 access_extended(__unused proc_t p, struct access_extended_args *uap, __unused int32_t *retval)
5170 {
5171 struct accessx_descriptor *input = NULL;
5172 errno_t *result = NULL;
5173 errno_t error = 0;
5174 int wantdelete = 0;
5175 unsigned int desc_max, desc_actual, i, j;
5176 struct vfs_context context;
5177 struct nameidata nd;
5178 int niopts;
5179 vnode_t vp = NULL;
5180 vnode_t dvp = NULL;
5181 #define ACCESSX_MAX_DESCR_ON_STACK 10
5182 struct accessx_descriptor stack_input[ACCESSX_MAX_DESCR_ON_STACK];
5183
5184 context.vc_ucred = NULL;
5185
5186 /*
5187 * Validate parameters; if valid, copy the descriptor array and string
5188 * arguments into local memory. Before proceeding, the following
5189 * conditions must have been met:
5190 *
5191 * o The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
5192 * o There must be sufficient room in the request for at least one
5193 * descriptor and a one yte NUL terminated string.
5194 * o The allocation of local storage must not fail.
5195 */
5196 if (uap->size > ACCESSX_MAX_TABLESIZE)
5197 return(ENOMEM);
5198 if (uap->size < (sizeof(struct accessx_descriptor) + 2))
5199 return(EINVAL);
5200 if (uap->size <= sizeof (stack_input)) {
5201 input = stack_input;
5202 } else {
5203 MALLOC(input, struct accessx_descriptor *, uap->size, M_TEMP, M_WAITOK);
5204 if (input == NULL) {
5205 error = ENOMEM;
5206 goto out;
5207 }
5208 }
5209 error = copyin(uap->entries, input, uap->size);
5210 if (error)
5211 goto out;
5212
5213 AUDIT_ARG(opaque, input, uap->size);
5214
5215 /*
5216 * Force NUL termination of the copyin buffer to avoid nami() running
5217 * off the end. If the caller passes us bogus data, they may get a
5218 * bogus result.
5219 */
5220 ((char *)input)[uap->size - 1] = 0;
5221
5222 /*
5223 * Access is defined as checking against the process' real identity,
5224 * even if operations are checking the effective identity. This
5225 * requires that we use a local vfs context.
5226 */
5227 context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
5228 context.vc_thread = current_thread();
5229
5230 /*
5231 * Find out how many entries we have, so we can allocate the result
5232 * array by walking the list and adjusting the count downward by the
5233 * earliest string offset we see.
5234 */
5235 desc_max = (uap->size - 2) / sizeof(struct accessx_descriptor);
5236 desc_actual = desc_max;
5237 for (i = 0; i < desc_actual; i++) {
5238 /*
5239 * Take the offset to the name string for this entry and
5240 * convert to an input array index, which would be one off
5241 * the end of the array if this entry was the lowest-addressed
5242 * name string.
5243 */
5244 j = input[i].ad_name_offset / sizeof(struct accessx_descriptor);
5245
5246 /*
5247 * An offset greater than the max allowable offset is an error.
5248 * It is also an error for any valid entry to point
5249 * to a location prior to the end of the current entry, if
5250 * it's not a reference to the string of the previous entry.
5251 */
5252 if (j > desc_max || (j != 0 && j <= i)) {
5253 error = EINVAL;
5254 goto out;
5255 }
5256
5257 /* Also do not let ad_name_offset point to something beyond the size of the input */
5258 if (input[i].ad_name_offset >= uap->size) {
5259 error = EINVAL;
5260 goto out;
5261 }
5262
5263 /*
5264 * An offset of 0 means use the previous descriptor's offset;
5265 * this is used to chain multiple requests for the same file
5266 * to avoid multiple lookups.
5267 */
5268 if (j == 0) {
5269 /* This is not valid for the first entry */
5270 if (i == 0) {
5271 error = EINVAL;
5272 goto out;
5273 }
5274 continue;
5275 }
5276
5277 /*
5278 * If the offset of the string for this descriptor is before
5279 * what we believe is the current actual last descriptor,
5280 * then we need to adjust our estimate downward; this permits
5281 * the string table following the last descriptor to be out
5282 * of order relative to the descriptor list.
5283 */
5284 if (j < desc_actual)
5285 desc_actual = j;
5286 }
5287
5288 /*
5289 * We limit the actual number of descriptors we are willing to process
5290 * to a hard maximum of ACCESSX_MAX_DESCRIPTORS. If the number being
5291 * requested does not exceed this limit,
5292 */
5293 if (desc_actual > ACCESSX_MAX_DESCRIPTORS) {
5294 error = ENOMEM;
5295 goto out;
5296 }
5297 MALLOC(result, errno_t *, desc_actual * sizeof(errno_t), M_TEMP, M_WAITOK);
5298 if (result == NULL) {
5299 error = ENOMEM;
5300 goto out;
5301 }
5302
5303 /*
5304 * Do the work by iterating over the descriptor entries we know to
5305 * at least appear to contain valid data.
5306 */
5307 error = 0;
5308 for (i = 0; i < desc_actual; i++) {
5309 /*
5310 * If the ad_name_offset is 0, then we use the previous
5311 * results to make the check; otherwise, we are looking up
5312 * a new file name.
5313 */
5314 if (input[i].ad_name_offset != 0) {
5315 /* discard old vnodes */
5316 if (vp) {
5317 vnode_put(vp);
5318 vp = NULL;
5319 }
5320 if (dvp) {
5321 vnode_put(dvp);
5322 dvp = NULL;
5323 }
5324
5325 /*
5326 * Scan forward in the descriptor list to see if we
5327 * need the parent vnode. We will need it if we are
5328 * deleting, since we must have rights to remove
5329 * entries in the parent directory, as well as the
5330 * rights to delete the object itself.
5331 */
5332 wantdelete = input[i].ad_flags & _DELETE_OK;
5333 for (j = i + 1; (j < desc_actual) && (input[j].ad_name_offset == 0); j++)
5334 if (input[j].ad_flags & _DELETE_OK)
5335 wantdelete = 1;
5336
5337 niopts = FOLLOW | AUDITVNPATH1;
5338
5339 /* need parent for vnode_authorize for deletion test */
5340 if (wantdelete)
5341 niopts |= WANTPARENT;
5342
5343 /* do the lookup */
5344 NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, UIO_SYSSPACE,
5345 CAST_USER_ADDR_T(((const char *)input) + input[i].ad_name_offset),
5346 &context);
5347 error = namei(&nd);
5348 if (!error) {
5349 vp = nd.ni_vp;
5350 if (wantdelete)
5351 dvp = nd.ni_dvp;
5352 }
5353 nameidone(&nd);
5354 }
5355
5356 /*
5357 * Handle lookup errors.
5358 */
5359 switch(error) {
5360 case ENOENT:
5361 case EACCES:
5362 case EPERM:
5363 case ENOTDIR:
5364 result[i] = error;
5365 break;
5366 case 0:
5367 /* run this access check */
5368 result[i] = access1(vp, dvp, input[i].ad_flags, &context);
5369 break;
5370 default:
5371 /* fatal lookup error */
5372
5373 goto out;
5374 }
5375 }
5376
5377 AUDIT_ARG(data, result, sizeof(errno_t), desc_actual);
5378
5379 /* copy out results */
5380 error = copyout(result, uap->results, desc_actual * sizeof(errno_t));
5381
5382 out:
5383 if (input && input != stack_input)
5384 FREE(input, M_TEMP);
5385 if (result)
5386 FREE(result, M_TEMP);
5387 if (vp)
5388 vnode_put(vp);
5389 if (dvp)
5390 vnode_put(dvp);
5391 if (IS_VALID_CRED(context.vc_ucred))
5392 kauth_cred_unref(&context.vc_ucred);
5393 return(error);
5394 }
5395
5396
5397 /*
5398 * Returns: 0 Success
5399 * namei:EFAULT Bad address
5400 * namei:ENAMETOOLONG Filename too long
5401 * namei:ENOENT No such file or directory
5402 * namei:ELOOP Too many levels of symbolic links
5403 * namei:EBADF Bad file descriptor
5404 * namei:ENOTDIR Not a directory
5405 * namei:???
5406 * access1:
5407 */
5408 static int
5409 faccessat_internal(vfs_context_t ctx, int fd, user_addr_t path, int amode,
5410 int flag, enum uio_seg segflg)
5411 {
5412 int error;
5413 struct nameidata nd;
5414 int niopts;
5415 struct vfs_context context;
5416 #if NAMEDRSRCFORK
5417 int is_namedstream = 0;
5418 #endif
5419
5420 /*
5421 * Unless the AT_EACCESS option is used, Access is defined as checking
5422 * against the process' real identity, even if operations are checking
5423 * the effective identity. So we need to tweak the credential
5424 * in the context for that case.
5425 */
5426 if (!(flag & AT_EACCESS))
5427 context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
5428 else
5429 context.vc_ucred = ctx->vc_ucred;
5430 context.vc_thread = ctx->vc_thread;
5431
5432
5433 niopts = FOLLOW | AUDITVNPATH1;
5434 /* need parent for vnode_authorize for deletion test */
5435 if (amode & _DELETE_OK)
5436 niopts |= WANTPARENT;
5437 NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, segflg,
5438 path, &context);
5439
5440 #if NAMEDRSRCFORK
5441 /* access(F_OK) calls are allowed for resource forks. */
5442 if (amode == F_OK)
5443 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
5444 #endif
5445 error = nameiat(&nd, fd);
5446 if (error)
5447 goto out;
5448
5449 #if NAMEDRSRCFORK
5450 /* Grab reference on the shadow stream file vnode to
5451 * force an inactive on release which will mark it
5452 * for recycle.
5453 */
5454 if (vnode_isnamedstream(nd.ni_vp) &&
5455 (nd.ni_vp->v_parent != NULLVP) &&
5456 vnode_isshadow(nd.ni_vp)) {
5457 is_namedstream = 1;
5458 vnode_ref(nd.ni_vp);
5459 }
5460 #endif
5461
5462 error = access1(nd.ni_vp, nd.ni_dvp, amode, &context);
5463
5464 #if NAMEDRSRCFORK
5465 if (is_namedstream) {
5466 vnode_rele(nd.ni_vp);
5467 }
5468 #endif
5469
5470 vnode_put(nd.ni_vp);
5471 if (amode & _DELETE_OK)
5472 vnode_put(nd.ni_dvp);
5473 nameidone(&nd);
5474
5475 out:
5476 if (!(flag & AT_EACCESS))
5477 kauth_cred_unref(&context.vc_ucred);
5478 return (error);
5479 }
5480
5481 int
5482 access(__unused proc_t p, struct access_args *uap, __unused int32_t *retval)
5483 {
5484 return (faccessat_internal(vfs_context_current(), AT_FDCWD,
5485 uap->path, uap->flags, 0, UIO_USERSPACE));
5486 }
5487
5488 int
5489 faccessat(__unused proc_t p, struct faccessat_args *uap,
5490 __unused int32_t *retval)
5491 {
5492 if (uap->flag & ~AT_EACCESS)
5493 return (EINVAL);
5494
5495 return (faccessat_internal(vfs_context_current(), uap->fd,
5496 uap->path, uap->amode, uap->flag, UIO_USERSPACE));
5497 }
5498
5499 /*
5500 * Returns: 0 Success
5501 * EFAULT
5502 * copyout:EFAULT
5503 * namei:???
5504 * vn_stat:???
5505 */
5506 static int
5507 fstatat_internal(vfs_context_t ctx, user_addr_t path, user_addr_t ub,
5508 user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64,
5509 enum uio_seg segflg, int fd, int flag)
5510 {
5511 struct nameidata nd;
5512 int follow;
5513 union {
5514 struct stat sb;
5515 struct stat64 sb64;
5516 } source = {};
5517 union {
5518 struct user64_stat user64_sb;
5519 struct user32_stat user32_sb;
5520 struct user64_stat64 user64_sb64;
5521 struct user32_stat64 user32_sb64;
5522 } dest = {};
5523 caddr_t sbp;
5524 int error, my_size;
5525 kauth_filesec_t fsec;
5526 size_t xsecurity_bufsize;
5527 void * statptr;
5528
5529 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
5530 NDINIT(&nd, LOOKUP, OP_GETATTR, follow | AUDITVNPATH1,
5531 segflg, path, ctx);
5532
5533 #if NAMEDRSRCFORK
5534 int is_namedstream = 0;
5535 /* stat calls are allowed for resource forks. */
5536 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
5537 #endif
5538 error = nameiat(&nd, fd);
5539 if (error)
5540 return (error);
5541 fsec = KAUTH_FILESEC_NONE;
5542
5543 statptr = (void *)&source;
5544
5545 #if NAMEDRSRCFORK
5546 /* Grab reference on the shadow stream file vnode to
5547 * force an inactive on release which will mark it
5548 * for recycle.
5549 */
5550 if (vnode_isnamedstream(nd.ni_vp) &&
5551 (nd.ni_vp->v_parent != NULLVP) &&
5552 vnode_isshadow(nd.ni_vp)) {
5553 is_namedstream = 1;
5554 vnode_ref(nd.ni_vp);
5555 }
5556 #endif
5557
5558 error = vn_stat(nd.ni_vp, statptr, (xsecurity != USER_ADDR_NULL ? &fsec : NULL), isstat64, ctx);
5559
5560 #if NAMEDRSRCFORK
5561 if (is_namedstream) {
5562 vnode_rele(nd.ni_vp);
5563 }
5564 #endif
5565 vnode_put(nd.ni_vp);
5566 nameidone(&nd);
5567
5568 if (error)
5569 return (error);
5570 /* Zap spare fields */
5571 if (isstat64 != 0) {
5572 source.sb64.st_lspare = 0;
5573 source.sb64.st_qspare[0] = 0LL;
5574 source.sb64.st_qspare[1] = 0LL;
5575 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
5576 munge_user64_stat64(&source.sb64, &dest.user64_sb64);
5577 my_size = sizeof(dest.user64_sb64);
5578 sbp = (caddr_t)&dest.user64_sb64;
5579 } else {
5580 munge_user32_stat64(&source.sb64, &dest.user32_sb64);
5581 my_size = sizeof(dest.user32_sb64);
5582 sbp = (caddr_t)&dest.user32_sb64;
5583 }
5584 /*
5585 * Check if we raced (post lookup) against the last unlink of a file.
5586 */
5587 if ((source.sb64.st_nlink == 0) && S_ISREG(source.sb64.st_mode)) {
5588 source.sb64.st_nlink = 1;
5589 }
5590 } else {
5591 source.sb.st_lspare = 0;
5592 source.sb.st_qspare[0] = 0LL;
5593 source.sb.st_qspare[1] = 0LL;
5594 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
5595 munge_user64_stat(&source.sb, &dest.user64_sb);
5596 my_size = sizeof(dest.user64_sb);
5597 sbp = (caddr_t)&dest.user64_sb;
5598 } else {
5599 munge_user32_stat(&source.sb, &dest.user32_sb);
5600 my_size = sizeof(dest.user32_sb);
5601 sbp = (caddr_t)&dest.user32_sb;
5602 }
5603
5604 /*
5605 * Check if we raced (post lookup) against the last unlink of a file.
5606 */
5607 if ((source.sb.st_nlink == 0) && S_ISREG(source.sb.st_mode)) {
5608 source.sb.st_nlink = 1;
5609 }
5610 }
5611 if ((error = copyout(sbp, ub, my_size)) != 0)
5612 goto out;
5613
5614 /* caller wants extended security information? */
5615 if (xsecurity != USER_ADDR_NULL) {
5616
5617 /* did we get any? */
5618 if (fsec == KAUTH_FILESEC_NONE) {
5619 if (susize(xsecurity_size, 0) != 0) {
5620 error = EFAULT;
5621 goto out;
5622 }
5623 } else {
5624 /* find the user buffer size */
5625 xsecurity_bufsize = fusize(xsecurity_size);
5626
5627 /* copy out the actual data size */
5628 if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
5629 error = EFAULT;
5630 goto out;
5631 }
5632
5633 /* if the caller supplied enough room, copy out to it */
5634 if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec))
5635 error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
5636 }
5637 }
5638 out:
5639 if (fsec != KAUTH_FILESEC_NONE)
5640 kauth_filesec_free(fsec);
5641 return (error);
5642 }
5643
5644 /*
5645 * stat_extended: Get file status; with extended security (ACL).
5646 *
5647 * Parameters: p (ignored)
5648 * uap User argument descriptor (see below)
5649 * retval (ignored)
5650 *
5651 * Indirect: uap->path Path of file to get status from
5652 * uap->ub User buffer (holds file status info)
5653 * uap->xsecurity ACL to get (extended security)
5654 * uap->xsecurity_size Size of ACL
5655 *
5656 * Returns: 0 Success
5657 * !0 errno value
5658 *
5659 */
5660 int
5661 stat_extended(__unused proc_t p, struct stat_extended_args *uap,
5662 __unused int32_t *retval)
5663 {
5664 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5665 uap->xsecurity, uap->xsecurity_size, 0, UIO_USERSPACE, AT_FDCWD,
5666 0));
5667 }
5668
5669 /*
5670 * Returns: 0 Success
5671 * fstatat_internal:??? [see fstatat_internal() in this file]
5672 */
5673 int
5674 stat(__unused proc_t p, struct stat_args *uap, __unused int32_t *retval)
5675 {
5676 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5677 0, 0, 0, UIO_USERSPACE, AT_FDCWD, 0));
5678 }
5679
5680 int
5681 stat64(__unused proc_t p, struct stat64_args *uap, __unused int32_t *retval)
5682 {
5683 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5684 0, 0, 1, UIO_USERSPACE, AT_FDCWD, 0));
5685 }
5686
5687 /*
5688 * stat64_extended: Get file status; can handle large inode numbers; with extended security (ACL).
5689 *
5690 * Parameters: p (ignored)
5691 * uap User argument descriptor (see below)
5692 * retval (ignored)
5693 *
5694 * Indirect: uap->path Path of file to get status from
5695 * uap->ub User buffer (holds file status info)
5696 * uap->xsecurity ACL to get (extended security)
5697 * uap->xsecurity_size Size of ACL
5698 *
5699 * Returns: 0 Success
5700 * !0 errno value
5701 *
5702 */
5703 int
5704 stat64_extended(__unused proc_t p, struct stat64_extended_args *uap, __unused int32_t *retval)
5705 {
5706 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5707 uap->xsecurity, uap->xsecurity_size, 1, UIO_USERSPACE, AT_FDCWD,
5708 0));
5709 }
5710
5711 /*
5712 * lstat_extended: Get file status; does not follow links; with extended security (ACL).
5713 *
5714 * Parameters: p (ignored)
5715 * uap User argument descriptor (see below)
5716 * retval (ignored)
5717 *
5718 * Indirect: uap->path Path of file to get status from
5719 * uap->ub User buffer (holds file status info)
5720 * uap->xsecurity ACL to get (extended security)
5721 * uap->xsecurity_size Size of ACL
5722 *
5723 * Returns: 0 Success
5724 * !0 errno value
5725 *
5726 */
5727 int
5728 lstat_extended(__unused proc_t p, struct lstat_extended_args *uap, __unused int32_t *retval)
5729 {
5730 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5731 uap->xsecurity, uap->xsecurity_size, 0, UIO_USERSPACE, AT_FDCWD,
5732 AT_SYMLINK_NOFOLLOW));
5733 }
5734
5735 /*
5736 * Get file status; this version does not follow links.
5737 */
5738 int
5739 lstat(__unused proc_t p, struct lstat_args *uap, __unused int32_t *retval)
5740 {
5741 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5742 0, 0, 0, UIO_USERSPACE, AT_FDCWD, AT_SYMLINK_NOFOLLOW));
5743 }
5744
5745 int
5746 lstat64(__unused proc_t p, struct lstat64_args *uap, __unused int32_t *retval)
5747 {
5748 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5749 0, 0, 1, UIO_USERSPACE, AT_FDCWD, AT_SYMLINK_NOFOLLOW));
5750 }
5751
5752 /*
5753 * lstat64_extended: Get file status; can handle large inode numbers; does not
5754 * follow links; with extended security (ACL).
5755 *
5756 * Parameters: p (ignored)
5757 * uap User argument descriptor (see below)
5758 * retval (ignored)
5759 *
5760 * Indirect: uap->path Path of file to get status from
5761 * uap->ub User buffer (holds file status info)
5762 * uap->xsecurity ACL to get (extended security)
5763 * uap->xsecurity_size Size of ACL
5764 *
5765 * Returns: 0 Success
5766 * !0 errno value
5767 *
5768 */
5769 int
5770 lstat64_extended(__unused proc_t p, struct lstat64_extended_args *uap, __unused int32_t *retval)
5771 {
5772 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5773 uap->xsecurity, uap->xsecurity_size, 1, UIO_USERSPACE, AT_FDCWD,
5774 AT_SYMLINK_NOFOLLOW));
5775 }
5776
5777 int
5778 fstatat(__unused proc_t p, struct fstatat_args *uap, __unused int32_t *retval)
5779 {
5780 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
5781 return (EINVAL);
5782
5783 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5784 0, 0, 0, UIO_USERSPACE, uap->fd, uap->flag));
5785 }
5786
5787 int
5788 fstatat64(__unused proc_t p, struct fstatat64_args *uap,
5789 __unused int32_t *retval)
5790 {
5791 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
5792 return (EINVAL);
5793
5794 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5795 0, 0, 1, UIO_USERSPACE, uap->fd, uap->flag));
5796 }
5797
5798 /*
5799 * Get configurable pathname variables.
5800 *
5801 * Returns: 0 Success
5802 * namei:???
5803 * vn_pathconf:???
5804 *
5805 * Notes: Global implementation constants are intended to be
5806 * implemented in this function directly; all other constants
5807 * are per-FS implementation, and therefore must be handled in
5808 * each respective FS, instead.
5809 *
5810 * XXX We implement some things globally right now that should actually be
5811 * XXX per-FS; we will need to deal with this at some point.
5812 */
5813 /* ARGSUSED */
5814 int
5815 pathconf(__unused proc_t p, struct pathconf_args *uap, int32_t *retval)
5816 {
5817 int error;
5818 struct nameidata nd;
5819 vfs_context_t ctx = vfs_context_current();
5820
5821 NDINIT(&nd, LOOKUP, OP_PATHCONF, FOLLOW | AUDITVNPATH1,
5822 UIO_USERSPACE, uap->path, ctx);
5823 error = namei(&nd);
5824 if (error)
5825 return (error);
5826
5827 error = vn_pathconf(nd.ni_vp, uap->name, retval, ctx);
5828
5829 vnode_put(nd.ni_vp);
5830 nameidone(&nd);
5831 return (error);
5832 }
5833
5834 /*
5835 * Return target name of a symbolic link.
5836 */
5837 /* ARGSUSED */
5838 static int
5839 readlinkat_internal(vfs_context_t ctx, int fd, user_addr_t path,
5840 enum uio_seg seg, user_addr_t buf, size_t bufsize, enum uio_seg bufseg,
5841 int *retval)
5842 {
5843 vnode_t vp;
5844 uio_t auio;
5845 int error;
5846 struct nameidata nd;
5847 char uio_buf[ UIO_SIZEOF(1) ];
5848
5849 NDINIT(&nd, LOOKUP, OP_READLINK, NOFOLLOW | AUDITVNPATH1,
5850 seg, path, ctx);
5851
5852 error = nameiat(&nd, fd);
5853 if (error)
5854 return (error);
5855 vp = nd.ni_vp;
5856
5857 nameidone(&nd);
5858
5859 auio = uio_createwithbuffer(1, 0, bufseg, UIO_READ,
5860 &uio_buf[0], sizeof(uio_buf));
5861 uio_addiov(auio, buf, bufsize);
5862 if (vp->v_type != VLNK) {
5863 error = EINVAL;
5864 } else {
5865 #if CONFIG_MACF
5866 error = mac_vnode_check_readlink(ctx, vp);
5867 #endif
5868 if (error == 0)
5869 error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA,
5870 ctx);
5871 if (error == 0)
5872 error = VNOP_READLINK(vp, auio, ctx);
5873 }
5874 vnode_put(vp);
5875
5876 *retval = bufsize - (int)uio_resid(auio);
5877 return (error);
5878 }
5879
5880 int
5881 readlink(proc_t p, struct readlink_args *uap, int32_t *retval)
5882 {
5883 enum uio_seg procseg;
5884
5885 procseg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
5886 return (readlinkat_internal(vfs_context_current(), AT_FDCWD,
5887 CAST_USER_ADDR_T(uap->path), procseg, CAST_USER_ADDR_T(uap->buf),
5888 uap->count, procseg, retval));
5889 }
5890
5891 int
5892 readlinkat(proc_t p, struct readlinkat_args *uap, int32_t *retval)
5893 {
5894 enum uio_seg procseg;
5895
5896 procseg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
5897 return (readlinkat_internal(vfs_context_current(), uap->fd, uap->path,
5898 procseg, uap->buf, uap->bufsize, procseg, retval));
5899 }
5900
5901 /*
5902 * Change file flags.
5903 *
5904 * NOTE: this will vnode_put() `vp'
5905 */
5906 static int
5907 chflags1(vnode_t vp, int flags, vfs_context_t ctx)
5908 {
5909 struct vnode_attr va;
5910 kauth_action_t action;
5911 int error;
5912
5913 VATTR_INIT(&va);
5914 VATTR_SET(&va, va_flags, flags);
5915
5916 #if CONFIG_MACF
5917 error = mac_vnode_check_setflags(ctx, vp, flags);
5918 if (error)
5919 goto out;
5920 #endif
5921
5922 /* request authorisation, disregard immutability */
5923 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
5924 goto out;
5925 /*
5926 * Request that the auth layer disregard those file flags it's allowed to when
5927 * authorizing this operation; we need to do this in order to be able to
5928 * clear immutable flags.
5929 */
5930 if (action && ((error = vnode_authorize(vp, NULL, action | KAUTH_VNODE_NOIMMUTABLE, ctx)) != 0))
5931 goto out;
5932 error = vnode_setattr(vp, &va, ctx);
5933
5934 #if CONFIG_MACF
5935 if (error == 0)
5936 mac_vnode_notify_setflags(ctx, vp, flags);
5937 #endif
5938
5939 if ((error == 0) && !VATTR_IS_SUPPORTED(&va, va_flags)) {
5940 error = ENOTSUP;
5941 }
5942 out:
5943 vnode_put(vp);
5944 return(error);
5945 }
5946
5947 /*
5948 * Change flags of a file given a path name.
5949 */
5950 /* ARGSUSED */
5951 int
5952 chflags(__unused proc_t p, struct chflags_args *uap, __unused int32_t *retval)
5953 {
5954 vnode_t vp;
5955 vfs_context_t ctx = vfs_context_current();
5956 int error;
5957 struct nameidata nd;
5958
5959 AUDIT_ARG(fflags, uap->flags);
5960 NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
5961 UIO_USERSPACE, uap->path, ctx);
5962 error = namei(&nd);
5963 if (error)
5964 return (error);
5965 vp = nd.ni_vp;
5966 nameidone(&nd);
5967
5968 /* we don't vnode_put() here because chflags1 does internally */
5969 error = chflags1(vp, uap->flags, ctx);
5970
5971 return(error);
5972 }
5973
5974 /*
5975 * Change flags of a file given a file descriptor.
5976 */
5977 /* ARGSUSED */
5978 int
5979 fchflags(__unused proc_t p, struct fchflags_args *uap, __unused int32_t *retval)
5980 {
5981 vnode_t vp;
5982 int error;
5983
5984 AUDIT_ARG(fd, uap->fd);
5985 AUDIT_ARG(fflags, uap->flags);
5986 if ( (error = file_vnode(uap->fd, &vp)) )
5987 return (error);
5988
5989 if ((error = vnode_getwithref(vp))) {
5990 file_drop(uap->fd);
5991 return(error);
5992 }
5993
5994 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5995
5996 /* we don't vnode_put() here because chflags1 does internally */
5997 error = chflags1(vp, uap->flags, vfs_context_current());
5998
5999 file_drop(uap->fd);
6000 return (error);
6001 }
6002
6003 /*
6004 * Change security information on a filesystem object.
6005 *
6006 * Returns: 0 Success
6007 * EPERM Operation not permitted
6008 * vnode_authattr:??? [anything vnode_authattr can return]
6009 * vnode_authorize:??? [anything vnode_authorize can return]
6010 * vnode_setattr:??? [anything vnode_setattr can return]
6011 *
6012 * Notes: If vnode_authattr or vnode_authorize return EACCES, it will be
6013 * translated to EPERM before being returned.
6014 */
6015 static int
6016 chmod_vnode(vfs_context_t ctx, vnode_t vp, struct vnode_attr *vap)
6017 {
6018 kauth_action_t action;
6019 int error;
6020
6021 AUDIT_ARG(mode, vap->va_mode);
6022 /* XXX audit new args */
6023
6024 #if NAMEDSTREAMS
6025 /* chmod calls are not allowed for resource forks. */
6026 if (vp->v_flag & VISNAMEDSTREAM) {
6027 return (EPERM);
6028 }
6029 #endif
6030
6031 #if CONFIG_MACF
6032 if (VATTR_IS_ACTIVE(vap, va_mode) &&
6033 (error = mac_vnode_check_setmode(ctx, vp, (mode_t)vap->va_mode)) != 0)
6034 return (error);
6035
6036 if (VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid)) {
6037 if ((error = mac_vnode_check_setowner(ctx, vp,
6038 VATTR_IS_ACTIVE(vap, va_uid) ? vap->va_uid : -1,
6039 VATTR_IS_ACTIVE(vap, va_gid) ? vap->va_gid : -1)))
6040 return (error);
6041 }
6042
6043 if (VATTR_IS_ACTIVE(vap, va_acl) &&
6044 (error = mac_vnode_check_setacl(ctx, vp, vap->va_acl)))
6045 return (error);
6046 #endif
6047
6048 /* make sure that the caller is allowed to set this security information */
6049 if (((error = vnode_authattr(vp, vap, &action, ctx)) != 0) ||
6050 ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
6051 if (error == EACCES)
6052 error = EPERM;
6053 return(error);
6054 }
6055
6056 if ((error = vnode_setattr(vp, vap, ctx)) != 0)
6057 return (error);
6058
6059 #if CONFIG_MACF
6060 if (VATTR_IS_ACTIVE(vap, va_mode))
6061 mac_vnode_notify_setmode(ctx, vp, (mode_t)vap->va_mode);
6062
6063 if (VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid))
6064 mac_vnode_notify_setowner(ctx, vp,
6065 VATTR_IS_ACTIVE(vap, va_uid) ? vap->va_uid : -1,
6066 VATTR_IS_ACTIVE(vap, va_gid) ? vap->va_gid : -1);
6067
6068 if (VATTR_IS_ACTIVE(vap, va_acl))
6069 mac_vnode_notify_setacl(ctx, vp, vap->va_acl);
6070 #endif
6071
6072 return (error);
6073 }
6074
6075
6076 /*
6077 * Change mode of a file given a path name.
6078 *
6079 * Returns: 0 Success
6080 * namei:??? [anything namei can return]
6081 * chmod_vnode:??? [anything chmod_vnode can return]
6082 */
6083 static int
6084 chmodat(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap,
6085 int fd, int flag, enum uio_seg segflg)
6086 {
6087 struct nameidata nd;
6088 int follow, error;
6089
6090 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
6091 NDINIT(&nd, LOOKUP, OP_SETATTR, follow | AUDITVNPATH1,
6092 segflg, path, ctx);
6093 if ((error = nameiat(&nd, fd)))
6094 return (error);
6095 error = chmod_vnode(ctx, nd.ni_vp, vap);
6096 vnode_put(nd.ni_vp);
6097 nameidone(&nd);
6098 return(error);
6099 }
6100
6101 /*
6102 * chmod_extended: Change the mode of a file given a path name; with extended
6103 * argument list (including extended security (ACL)).
6104 *
6105 * Parameters: p Process requesting the open
6106 * uap User argument descriptor (see below)
6107 * retval (ignored)
6108 *
6109 * Indirect: uap->path Path to object (same as 'chmod')
6110 * uap->uid UID to set
6111 * uap->gid GID to set
6112 * uap->mode File mode to set (same as 'chmod')
6113 * uap->xsecurity ACL to set (or delete)
6114 *
6115 * Returns: 0 Success
6116 * !0 errno value
6117 *
6118 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
6119 *
6120 * XXX: We should enummerate the possible errno values here, and where
6121 * in the code they originated.
6122 */
6123 int
6124 chmod_extended(__unused proc_t p, struct chmod_extended_args *uap, __unused int32_t *retval)
6125 {
6126 int error;
6127 struct vnode_attr va;
6128 kauth_filesec_t xsecdst;
6129
6130 AUDIT_ARG(owner, uap->uid, uap->gid);
6131
6132 VATTR_INIT(&va);
6133 if (uap->mode != -1)
6134 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
6135 if (uap->uid != KAUTH_UID_NONE)
6136 VATTR_SET(&va, va_uid, uap->uid);
6137 if (uap->gid != KAUTH_GID_NONE)
6138 VATTR_SET(&va, va_gid, uap->gid);
6139
6140 xsecdst = NULL;
6141 switch(uap->xsecurity) {
6142 /* explicit remove request */
6143 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
6144 VATTR_SET(&va, va_acl, NULL);
6145 break;
6146 /* not being set */
6147 case USER_ADDR_NULL:
6148 break;
6149 default:
6150 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
6151 return(error);
6152 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
6153 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va.va_acl->acl_entrycount);
6154 }
6155
6156 error = chmodat(vfs_context_current(), uap->path, &va, AT_FDCWD, 0,
6157 UIO_USERSPACE);
6158
6159 if (xsecdst != NULL)
6160 kauth_filesec_free(xsecdst);
6161 return(error);
6162 }
6163
6164 /*
6165 * Returns: 0 Success
6166 * chmodat:??? [anything chmodat can return]
6167 */
6168 static int
6169 fchmodat_internal(vfs_context_t ctx, user_addr_t path, int mode, int fd,
6170 int flag, enum uio_seg segflg)
6171 {
6172 struct vnode_attr va;
6173
6174 VATTR_INIT(&va);
6175 VATTR_SET(&va, va_mode, mode & ALLPERMS);
6176
6177 return (chmodat(ctx, path, &va, fd, flag, segflg));
6178 }
6179
6180 int
6181 chmod(__unused proc_t p, struct chmod_args *uap, __unused int32_t *retval)
6182 {
6183 return (fchmodat_internal(vfs_context_current(), uap->path, uap->mode,
6184 AT_FDCWD, 0, UIO_USERSPACE));
6185 }
6186
6187 int
6188 fchmodat(__unused proc_t p, struct fchmodat_args *uap, __unused int32_t *retval)
6189 {
6190 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
6191 return (EINVAL);
6192
6193 return (fchmodat_internal(vfs_context_current(), uap->path, uap->mode,
6194 uap->fd, uap->flag, UIO_USERSPACE));
6195 }
6196
6197 /*
6198 * Change mode of a file given a file descriptor.
6199 */
6200 static int
6201 fchmod1(__unused proc_t p, int fd, struct vnode_attr *vap)
6202 {
6203 vnode_t vp;
6204 int error;
6205
6206 AUDIT_ARG(fd, fd);
6207
6208 if ((error = file_vnode(fd, &vp)) != 0)
6209 return (error);
6210 if ((error = vnode_getwithref(vp)) != 0) {
6211 file_drop(fd);
6212 return(error);
6213 }
6214 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6215
6216 error = chmod_vnode(vfs_context_current(), vp, vap);
6217 (void)vnode_put(vp);
6218 file_drop(fd);
6219
6220 return (error);
6221 }
6222
6223 /*
6224 * fchmod_extended: Change mode of a file given a file descriptor; with
6225 * extended argument list (including extended security (ACL)).
6226 *
6227 * Parameters: p Process requesting to change file mode
6228 * uap User argument descriptor (see below)
6229 * retval (ignored)
6230 *
6231 * Indirect: uap->mode File mode to set (same as 'chmod')
6232 * uap->uid UID to set
6233 * uap->gid GID to set
6234 * uap->xsecurity ACL to set (or delete)
6235 * uap->fd File descriptor of file to change mode
6236 *
6237 * Returns: 0 Success
6238 * !0 errno value
6239 *
6240 */
6241 int
6242 fchmod_extended(proc_t p, struct fchmod_extended_args *uap, __unused int32_t *retval)
6243 {
6244 int error;
6245 struct vnode_attr va;
6246 kauth_filesec_t xsecdst;
6247
6248 AUDIT_ARG(owner, uap->uid, uap->gid);
6249
6250 VATTR_INIT(&va);
6251 if (uap->mode != -1)
6252 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
6253 if (uap->uid != KAUTH_UID_NONE)
6254 VATTR_SET(&va, va_uid, uap->uid);
6255 if (uap->gid != KAUTH_GID_NONE)
6256 VATTR_SET(&va, va_gid, uap->gid);
6257
6258 xsecdst = NULL;
6259 switch(uap->xsecurity) {
6260 case USER_ADDR_NULL:
6261 VATTR_SET(&va, va_acl, NULL);
6262 break;
6263 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
6264 VATTR_SET(&va, va_acl, NULL);
6265 break;
6266 /* not being set */
6267 case CAST_USER_ADDR_T(-1):
6268 break;
6269 default:
6270 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
6271 return(error);
6272 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
6273 }
6274
6275 error = fchmod1(p, uap->fd, &va);
6276
6277
6278 switch(uap->xsecurity) {
6279 case USER_ADDR_NULL:
6280 case CAST_USER_ADDR_T(-1):
6281 break;
6282 default:
6283 if (xsecdst != NULL)
6284 kauth_filesec_free(xsecdst);
6285 }
6286 return(error);
6287 }
6288
6289 int
6290 fchmod(proc_t p, struct fchmod_args *uap, __unused int32_t *retval)
6291 {
6292 struct vnode_attr va;
6293
6294 VATTR_INIT(&va);
6295 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
6296
6297 return(fchmod1(p, uap->fd, &va));
6298 }
6299
6300
6301 /*
6302 * Set ownership given a path name.
6303 */
6304 /* ARGSUSED */
6305 static int
6306 fchownat_internal(vfs_context_t ctx, int fd, user_addr_t path, uid_t uid,
6307 gid_t gid, int flag, enum uio_seg segflg)
6308 {
6309 vnode_t vp;
6310 struct vnode_attr va;
6311 int error;
6312 struct nameidata nd;
6313 int follow;
6314 kauth_action_t action;
6315
6316 AUDIT_ARG(owner, uid, gid);
6317
6318 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
6319 NDINIT(&nd, LOOKUP, OP_SETATTR, follow | AUDITVNPATH1, segflg,
6320 path, ctx);
6321 error = nameiat(&nd, fd);
6322 if (error)
6323 return (error);
6324 vp = nd.ni_vp;
6325
6326 nameidone(&nd);
6327
6328 VATTR_INIT(&va);
6329 if (uid != (uid_t)VNOVAL)
6330 VATTR_SET(&va, va_uid, uid);
6331 if (gid != (gid_t)VNOVAL)
6332 VATTR_SET(&va, va_gid, gid);
6333
6334 #if CONFIG_MACF
6335 error = mac_vnode_check_setowner(ctx, vp, uid, gid);
6336 if (error)
6337 goto out;
6338 #endif
6339
6340 /* preflight and authorize attribute changes */
6341 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
6342 goto out;
6343 if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
6344 goto out;
6345 error = vnode_setattr(vp, &va, ctx);
6346
6347 #if CONFIG_MACF
6348 if (error == 0)
6349 mac_vnode_notify_setowner(ctx, vp, uid, gid);
6350 #endif
6351
6352 out:
6353 /*
6354 * EACCES is only allowed from namei(); permissions failure should
6355 * return EPERM, so we need to translate the error code.
6356 */
6357 if (error == EACCES)
6358 error = EPERM;
6359
6360 vnode_put(vp);
6361 return (error);
6362 }
6363
6364 int
6365 chown(__unused proc_t p, struct chown_args *uap, __unused int32_t *retval)
6366 {
6367 return (fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path,
6368 uap->uid, uap->gid, 0, UIO_USERSPACE));
6369 }
6370
6371 int
6372 lchown(__unused proc_t p, struct lchown_args *uap, __unused int32_t *retval)
6373 {
6374 return (fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path,
6375 uap->owner, uap->group, AT_SYMLINK_NOFOLLOW, UIO_USERSPACE));
6376 }
6377
6378 int
6379 fchownat(__unused proc_t p, struct fchownat_args *uap, __unused int32_t *retval)
6380 {
6381 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
6382 return (EINVAL);
6383
6384 return (fchownat_internal(vfs_context_current(), uap->fd, uap->path,
6385 uap->uid, uap->gid, uap->flag, UIO_USERSPACE));
6386 }
6387
6388 /*
6389 * Set ownership given a file descriptor.
6390 */
6391 /* ARGSUSED */
6392 int
6393 fchown(__unused proc_t p, struct fchown_args *uap, __unused int32_t *retval)
6394 {
6395 struct vnode_attr va;
6396 vfs_context_t ctx = vfs_context_current();
6397 vnode_t vp;
6398 int error;
6399 kauth_action_t action;
6400
6401 AUDIT_ARG(owner, uap->uid, uap->gid);
6402 AUDIT_ARG(fd, uap->fd);
6403
6404 if ( (error = file_vnode(uap->fd, &vp)) )
6405 return (error);
6406
6407 if ( (error = vnode_getwithref(vp)) ) {
6408 file_drop(uap->fd);
6409 return(error);
6410 }
6411 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6412
6413 VATTR_INIT(&va);
6414 if (uap->uid != VNOVAL)
6415 VATTR_SET(&va, va_uid, uap->uid);
6416 if (uap->gid != VNOVAL)
6417 VATTR_SET(&va, va_gid, uap->gid);
6418
6419 #if NAMEDSTREAMS
6420 /* chown calls are not allowed for resource forks. */
6421 if (vp->v_flag & VISNAMEDSTREAM) {
6422 error = EPERM;
6423 goto out;
6424 }
6425 #endif
6426
6427 #if CONFIG_MACF
6428 error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
6429 if (error)
6430 goto out;
6431 #endif
6432
6433 /* preflight and authorize attribute changes */
6434 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
6435 goto out;
6436 if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
6437 if (error == EACCES)
6438 error = EPERM;
6439 goto out;
6440 }
6441 error = vnode_setattr(vp, &va, ctx);
6442
6443 #if CONFIG_MACF
6444 if (error == 0)
6445 mac_vnode_notify_setowner(ctx, vp, uap->uid, uap->gid);
6446 #endif
6447
6448 out:
6449 (void)vnode_put(vp);
6450 file_drop(uap->fd);
6451 return (error);
6452 }
6453
6454 static int
6455 getutimes(user_addr_t usrtvp, struct timespec *tsp)
6456 {
6457 int error;
6458
6459 if (usrtvp == USER_ADDR_NULL) {
6460 struct timeval old_tv;
6461 /* XXX Y2038 bug because of microtime argument */
6462 microtime(&old_tv);
6463 TIMEVAL_TO_TIMESPEC(&old_tv, &tsp[0]);
6464 tsp[1] = tsp[0];
6465 } else {
6466 if (IS_64BIT_PROCESS(current_proc())) {
6467 struct user64_timeval tv[2];
6468 error = copyin(usrtvp, (void *)tv, sizeof(tv));
6469 if (error)
6470 return (error);
6471 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
6472 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
6473 } else {
6474 struct user32_timeval tv[2];
6475 error = copyin(usrtvp, (void *)tv, sizeof(tv));
6476 if (error)
6477 return (error);
6478 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
6479 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
6480 }
6481 }
6482 return 0;
6483 }
6484
6485 static int
6486 setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts,
6487 int nullflag)
6488 {
6489 int error;
6490 struct vnode_attr va;
6491 kauth_action_t action;
6492
6493 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6494
6495 VATTR_INIT(&va);
6496 VATTR_SET(&va, va_access_time, ts[0]);
6497 VATTR_SET(&va, va_modify_time, ts[1]);
6498 if (nullflag)
6499 va.va_vaflags |= VA_UTIMES_NULL;
6500
6501 #if NAMEDSTREAMS
6502 /* utimes calls are not allowed for resource forks. */
6503 if (vp->v_flag & VISNAMEDSTREAM) {
6504 error = EPERM;
6505 goto out;
6506 }
6507 #endif
6508
6509 #if CONFIG_MACF
6510 error = mac_vnode_check_setutimes(ctx, vp, ts[0], ts[1]);
6511 if (error)
6512 goto out;
6513 #endif
6514 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
6515 if (!nullflag && error == EACCES)
6516 error = EPERM;
6517 goto out;
6518 }
6519
6520 /* since we may not need to auth anything, check here */
6521 if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
6522 if (!nullflag && error == EACCES)
6523 error = EPERM;
6524 goto out;
6525 }
6526 error = vnode_setattr(vp, &va, ctx);
6527
6528 #if CONFIG_MACF
6529 if (error == 0)
6530 mac_vnode_notify_setutimes(ctx, vp, ts[0], ts[1]);
6531 #endif
6532
6533 out:
6534 return error;
6535 }
6536
6537 /*
6538 * Set the access and modification times of a file.
6539 */
6540 /* ARGSUSED */
6541 int
6542 utimes(__unused proc_t p, struct utimes_args *uap, __unused int32_t *retval)
6543 {
6544 struct timespec ts[2];
6545 user_addr_t usrtvp;
6546 int error;
6547 struct nameidata nd;
6548 vfs_context_t ctx = vfs_context_current();
6549
6550 /*
6551 * AUDIT: Needed to change the order of operations to do the
6552 * name lookup first because auditing wants the path.
6553 */
6554 NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
6555 UIO_USERSPACE, uap->path, ctx);
6556 error = namei(&nd);
6557 if (error)
6558 return (error);
6559 nameidone(&nd);
6560
6561 /*
6562 * Fetch the user-supplied time. If usrtvp is USER_ADDR_NULL, we fetch
6563 * the current time instead.
6564 */
6565 usrtvp = uap->tptr;
6566 if ((error = getutimes(usrtvp, ts)) != 0)
6567 goto out;
6568
6569 error = setutimes(ctx, nd.ni_vp, ts, usrtvp == USER_ADDR_NULL);
6570
6571 out:
6572 vnode_put(nd.ni_vp);
6573 return (error);
6574 }
6575
6576 /*
6577 * Set the access and modification times of a file.
6578 */
6579 /* ARGSUSED */
6580 int
6581 futimes(__unused proc_t p, struct futimes_args *uap, __unused int32_t *retval)
6582 {
6583 struct timespec ts[2];
6584 vnode_t vp;
6585 user_addr_t usrtvp;
6586 int error;
6587
6588 AUDIT_ARG(fd, uap->fd);
6589 usrtvp = uap->tptr;
6590 if ((error = getutimes(usrtvp, ts)) != 0)
6591 return (error);
6592 if ((error = file_vnode(uap->fd, &vp)) != 0)
6593 return (error);
6594 if((error = vnode_getwithref(vp))) {
6595 file_drop(uap->fd);
6596 return(error);
6597 }
6598
6599 error = setutimes(vfs_context_current(), vp, ts, usrtvp == 0);
6600 vnode_put(vp);
6601 file_drop(uap->fd);
6602 return(error);
6603 }
6604
6605 /*
6606 * Truncate a file given its path name.
6607 */
6608 /* ARGSUSED */
6609 int
6610 truncate(__unused proc_t p, struct truncate_args *uap, __unused int32_t *retval)
6611 {
6612 vnode_t vp;
6613 struct vnode_attr va;
6614 vfs_context_t ctx = vfs_context_current();
6615 int error;
6616 struct nameidata nd;
6617 kauth_action_t action;
6618
6619 if (uap->length < 0)
6620 return(EINVAL);
6621 NDINIT(&nd, LOOKUP, OP_TRUNCATE, FOLLOW | AUDITVNPATH1,
6622 UIO_USERSPACE, uap->path, ctx);
6623 if ((error = namei(&nd)))
6624 return (error);
6625 vp = nd.ni_vp;
6626
6627 nameidone(&nd);
6628
6629 VATTR_INIT(&va);
6630 VATTR_SET(&va, va_data_size, uap->length);
6631
6632 #if CONFIG_MACF
6633 error = mac_vnode_check_truncate(ctx, NOCRED, vp);
6634 if (error)
6635 goto out;
6636 #endif
6637
6638 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
6639 goto out;
6640 if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
6641 goto out;
6642 error = vnode_setattr(vp, &va, ctx);
6643
6644 #if CONFIG_MACF
6645 if (error == 0)
6646 mac_vnode_notify_truncate(ctx, NOCRED, vp);
6647 #endif
6648
6649 out:
6650 vnode_put(vp);
6651 return (error);
6652 }
6653
6654 /*
6655 * Truncate a file given a file descriptor.
6656 */
6657 /* ARGSUSED */
6658 int
6659 ftruncate(proc_t p, struct ftruncate_args *uap, int32_t *retval)
6660 {
6661 vfs_context_t ctx = vfs_context_current();
6662 struct vnode_attr va;
6663 vnode_t vp;
6664 struct fileproc *fp;
6665 int error ;
6666 int fd = uap->fd;
6667
6668 AUDIT_ARG(fd, uap->fd);
6669 if (uap->length < 0)
6670 return(EINVAL);
6671
6672 if ( (error = fp_lookup(p,fd,&fp,0)) ) {
6673 return(error);
6674 }
6675
6676 switch (FILEGLOB_DTYPE(fp->f_fglob)) {
6677 case DTYPE_PSXSHM:
6678 error = pshm_truncate(p, fp, uap->fd, uap->length, retval);
6679 goto out;
6680 case DTYPE_VNODE:
6681 break;
6682 default:
6683 error = EINVAL;
6684 goto out;
6685 }
6686
6687 vp = (vnode_t)fp->f_fglob->fg_data;
6688
6689 if ((fp->f_fglob->fg_flag & FWRITE) == 0) {
6690 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
6691 error = EINVAL;
6692 goto out;
6693 }
6694
6695 if ((error = vnode_getwithref(vp)) != 0) {
6696 goto out;
6697 }
6698
6699 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6700
6701 #if CONFIG_MACF
6702 error = mac_vnode_check_truncate(ctx,
6703 fp->f_fglob->fg_cred, vp);
6704 if (error) {
6705 (void)vnode_put(vp);
6706 goto out;
6707 }
6708 #endif
6709 VATTR_INIT(&va);
6710 VATTR_SET(&va, va_data_size, uap->length);
6711 error = vnode_setattr(vp, &va, ctx);
6712
6713 #if CONFIG_MACF
6714 if (error == 0)
6715 mac_vnode_notify_truncate(ctx, fp->f_fglob->fg_cred, vp);
6716 #endif
6717
6718 (void)vnode_put(vp);
6719 out:
6720 file_drop(fd);
6721 return (error);
6722 }
6723
6724
6725 /*
6726 * Sync an open file with synchronized I/O _file_ integrity completion
6727 */
6728 /* ARGSUSED */
6729 int
6730 fsync(proc_t p, struct fsync_args *uap, __unused int32_t *retval)
6731 {
6732 __pthread_testcancel(1);
6733 return(fsync_common(p, uap, MNT_WAIT));
6734 }
6735
6736
6737 /*
6738 * Sync an open file with synchronized I/O _file_ integrity completion
6739 *
6740 * Notes: This is a legacy support function that does not test for
6741 * thread cancellation points.
6742 */
6743 /* ARGSUSED */
6744 int
6745 fsync_nocancel(proc_t p, struct fsync_nocancel_args *uap, __unused int32_t *retval)
6746 {
6747 return(fsync_common(p, (struct fsync_args *)uap, MNT_WAIT));
6748 }
6749
6750
6751 /*
6752 * Sync an open file with synchronized I/O _data_ integrity completion
6753 */
6754 /* ARGSUSED */
6755 int
6756 fdatasync(proc_t p, struct fdatasync_args *uap, __unused int32_t *retval)
6757 {
6758 __pthread_testcancel(1);
6759 return(fsync_common(p, (struct fsync_args *)uap, MNT_DWAIT));
6760 }
6761
6762
6763 /*
6764 * fsync_common
6765 *
6766 * Common fsync code to support both synchronized I/O file integrity completion
6767 * (normal fsync) and synchronized I/O data integrity completion (fdatasync).
6768 *
6769 * If 'flags' is MNT_DWAIT, the caller is requesting data integrity, which
6770 * will only guarantee that the file data contents are retrievable. If
6771 * 'flags' is MNT_WAIT, the caller is rewuesting file integrity, which also
6772 * includes additional metadata unnecessary for retrieving the file data
6773 * contents, such as atime, mtime, ctime, etc., also be committed to stable
6774 * storage.
6775 *
6776 * Parameters: p The process
6777 * uap->fd The descriptor to synchronize
6778 * flags The data integrity flags
6779 *
6780 * Returns: int Success
6781 * fp_getfvp:EBADF Bad file descriptor
6782 * fp_getfvp:ENOTSUP fd does not refer to a vnode
6783 * VNOP_FSYNC:??? unspecified
6784 *
6785 * Notes: We use struct fsync_args because it is a short name, and all
6786 * caller argument structures are otherwise identical.
6787 */
6788 static int
6789 fsync_common(proc_t p, struct fsync_args *uap, int flags)
6790 {
6791 vnode_t vp;
6792 struct fileproc *fp;
6793 vfs_context_t ctx = vfs_context_current();
6794 int error;
6795
6796 AUDIT_ARG(fd, uap->fd);
6797
6798 if ( (error = fp_getfvp(p, uap->fd, &fp, &vp)) )
6799 return (error);
6800 if ( (error = vnode_getwithref(vp)) ) {
6801 file_drop(uap->fd);
6802 return(error);
6803 }
6804
6805 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6806
6807 error = VNOP_FSYNC(vp, flags, ctx);
6808
6809 #if NAMEDRSRCFORK
6810 /* Sync resource fork shadow file if necessary. */
6811 if ((error == 0) &&
6812 (vp->v_flag & VISNAMEDSTREAM) &&
6813 (vp->v_parent != NULLVP) &&
6814 vnode_isshadow(vp) &&
6815 (fp->f_flags & FP_WRITTEN)) {
6816 (void) vnode_flushnamedstream(vp->v_parent, vp, ctx);
6817 }
6818 #endif
6819
6820 (void)vnode_put(vp);
6821 file_drop(uap->fd);
6822 return (error);
6823 }
6824
6825 /*
6826 * Duplicate files. Source must be a file, target must be a file or
6827 * must not exist.
6828 *
6829 * XXX Copyfile authorisation checking is woefully inadequate, and will not
6830 * perform inheritance correctly.
6831 */
6832 /* ARGSUSED */
6833 int
6834 copyfile(__unused proc_t p, struct copyfile_args *uap, __unused int32_t *retval)
6835 {
6836 vnode_t tvp, fvp, tdvp, sdvp;
6837 struct nameidata fromnd, tond;
6838 int error;
6839 vfs_context_t ctx = vfs_context_current();
6840 #if CONFIG_MACF
6841 struct filedesc *fdp = (vfs_context_proc(ctx))->p_fd;
6842 struct vnode_attr va;
6843 #endif
6844
6845 /* Check that the flags are valid. */
6846
6847 if (uap->flags & ~CPF_MASK) {
6848 return(EINVAL);
6849 }
6850
6851 NDINIT(&fromnd, LOOKUP, OP_COPYFILE, AUDITVNPATH1,
6852 UIO_USERSPACE, uap->from, ctx);
6853 if ((error = namei(&fromnd)))
6854 return (error);
6855 fvp = fromnd.ni_vp;
6856
6857 NDINIT(&tond, CREATE, OP_LINK,
6858 LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNPATH2 | CN_NBMOUNTLOOK,
6859 UIO_USERSPACE, uap->to, ctx);
6860 if ((error = namei(&tond))) {
6861 goto out1;
6862 }
6863 tdvp = tond.ni_dvp;
6864 tvp = tond.ni_vp;
6865
6866 if (tvp != NULL) {
6867 if (!(uap->flags & CPF_OVERWRITE)) {
6868 error = EEXIST;
6869 goto out;
6870 }
6871 }
6872
6873 if (fvp->v_type == VDIR || (tvp && tvp->v_type == VDIR)) {
6874 error = EISDIR;
6875 goto out;
6876 }
6877
6878 /* This calls existing MAC hooks for open */
6879 if ((error = vn_authorize_open_existing(fvp, &fromnd.ni_cnd, FREAD, ctx,
6880 NULL))) {
6881 goto out;
6882 }
6883
6884 if (tvp) {
6885 /*
6886 * See unlinkat_internal for an explanation of the potential
6887 * ENOENT from the MAC hook but the gist is that the MAC hook
6888 * can fail because vn_getpath isn't able to return the full
6889 * path. We choose to ignore this failure.
6890 */
6891 error = vn_authorize_unlink(tdvp, tvp, &tond.ni_cnd, ctx, NULL);
6892 if (error && error != ENOENT)
6893 goto out;
6894 error = 0;
6895 }
6896
6897 #if CONFIG_MACF
6898 VATTR_INIT(&va);
6899 VATTR_SET(&va, va_type, fvp->v_type);
6900 /* Mask off all but regular access permissions */
6901 VATTR_SET(&va, va_mode,
6902 ((((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT) & ACCESSPERMS));
6903 error = mac_vnode_check_create(ctx, tdvp, &tond.ni_cnd, &va);
6904 if (error)
6905 goto out;
6906 #endif /* CONFIG_MACF */
6907
6908 if ((error = vnode_authorize(tdvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
6909 goto out;
6910
6911 if (fvp == tdvp)
6912 error = EINVAL;
6913 /*
6914 * If source is the same as the destination (that is the
6915 * same inode number) then there is nothing to do.
6916 * (fixed to have POSIX semantics - CSM 3/2/98)
6917 */
6918 if (fvp == tvp)
6919 error = -1;
6920 if (!error)
6921 error = VNOP_COPYFILE(fvp, tdvp, tvp, &tond.ni_cnd, uap->mode, uap->flags, ctx);
6922 out:
6923 sdvp = tond.ni_startdir;
6924 /*
6925 * nameidone has to happen before we vnode_put(tdvp)
6926 * since it may need to release the fs_nodelock on the tdvp
6927 */
6928 nameidone(&tond);
6929
6930 if (tvp)
6931 vnode_put(tvp);
6932 vnode_put(tdvp);
6933 vnode_put(sdvp);
6934 out1:
6935 vnode_put(fvp);
6936
6937 nameidone(&fromnd);
6938
6939 if (error == -1)
6940 return (0);
6941 return (error);
6942 }
6943
6944 #define CLONE_SNAPSHOT_FALLBACKS_ENABLED 1
6945
6946 /*
6947 * Helper function for doing clones. The caller is expected to provide an
6948 * iocounted source vnode and release it.
6949 */
6950 static int
6951 clonefile_internal(vnode_t fvp, boolean_t data_read_authorised, int dst_dirfd,
6952 user_addr_t dst, uint32_t flags, vfs_context_t ctx)
6953 {
6954 vnode_t tvp, tdvp;
6955 struct nameidata tond;
6956 int error;
6957 int follow;
6958 boolean_t free_src_acl;
6959 boolean_t attr_cleanup;
6960 enum vtype v_type;
6961 kauth_action_t action;
6962 struct componentname *cnp;
6963 uint32_t defaulted;
6964 struct vnode_attr va;
6965 struct vnode_attr nva;
6966 uint32_t vnop_flags;
6967
6968 v_type = vnode_vtype(fvp);
6969 switch (v_type) {
6970 case VLNK:
6971 /* FALLTHRU */
6972 case VREG:
6973 action = KAUTH_VNODE_ADD_FILE;
6974 break;
6975 case VDIR:
6976 if (vnode_isvroot(fvp) || vnode_ismount(fvp) ||
6977 fvp->v_mountedhere) {
6978 return (EINVAL);
6979 }
6980 action = KAUTH_VNODE_ADD_SUBDIRECTORY;
6981 break;
6982 default:
6983 return (EINVAL);
6984 }
6985
6986 AUDIT_ARG(fd2, dst_dirfd);
6987 AUDIT_ARG(value32, flags);
6988
6989 follow = (flags & CLONE_NOFOLLOW) ? NOFOLLOW : FOLLOW;
6990 NDINIT(&tond, CREATE, OP_LINK, follow | WANTPARENT | AUDITVNPATH2,
6991 UIO_USERSPACE, dst, ctx);
6992 if ((error = nameiat(&tond, dst_dirfd)))
6993 return (error);
6994 cnp = &tond.ni_cnd;
6995 tdvp = tond.ni_dvp;
6996 tvp = tond.ni_vp;
6997
6998 free_src_acl = FALSE;
6999 attr_cleanup = FALSE;
7000
7001 if (tvp != NULL) {
7002 error = EEXIST;
7003 goto out;
7004 }
7005
7006 if (vnode_mount(tdvp) != vnode_mount(fvp)) {
7007 error = EXDEV;
7008 goto out;
7009 }
7010
7011 #if CONFIG_MACF
7012 if ((error = mac_vnode_check_clone(ctx, tdvp, fvp, cnp)))
7013 goto out;
7014 #endif
7015 if ((error = vnode_authorize(tdvp, NULL, action, ctx)))
7016 goto out;
7017
7018 action = KAUTH_VNODE_GENERIC_READ_BITS;
7019 if (data_read_authorised)
7020 action &= ~KAUTH_VNODE_READ_DATA;
7021 if ((error = vnode_authorize(fvp, NULL, action, ctx)))
7022 goto out;
7023
7024 /*
7025 * certain attributes may need to be changed from the source, we ask for
7026 * those here.
7027 */
7028 VATTR_INIT(&va);
7029 VATTR_WANTED(&va, va_uid);
7030 VATTR_WANTED(&va, va_gid);
7031 VATTR_WANTED(&va, va_mode);
7032 VATTR_WANTED(&va, va_flags);
7033 VATTR_WANTED(&va, va_acl);
7034
7035 if ((error = vnode_getattr(fvp, &va, ctx)) != 0)
7036 goto out;
7037
7038 VATTR_INIT(&nva);
7039 VATTR_SET(&nva, va_type, v_type);
7040 if (VATTR_IS_SUPPORTED(&va, va_acl) && va.va_acl != NULL) {
7041 VATTR_SET(&nva, va_acl, va.va_acl);
7042 free_src_acl = TRUE;
7043 }
7044
7045 /* Handle ACL inheritance, initialize vap. */
7046 if (v_type == VLNK) {
7047 error = vnode_authattr_new(tdvp, &nva, 0, ctx);
7048 } else {
7049 error = vn_attribute_prepare(tdvp, &nva, &defaulted, ctx);
7050 if (error)
7051 goto out;
7052 attr_cleanup = TRUE;
7053 }
7054
7055 vnop_flags = VNODE_CLONEFILE_DEFAULT;
7056 /*
7057 * We've got initial values for all security parameters,
7058 * If we are superuser, then we can change owners to be the
7059 * same as the source. Both superuser and the owner have default
7060 * WRITE_SECURITY privileges so all other fields can be taken
7061 * from source as well.
7062 */
7063 if (!(flags & CLONE_NOOWNERCOPY) && vfs_context_issuser(ctx)) {
7064 if (VATTR_IS_SUPPORTED(&va, va_uid))
7065 VATTR_SET(&nva, va_uid, va.va_uid);
7066 if (VATTR_IS_SUPPORTED(&va, va_gid))
7067 VATTR_SET(&nva, va_gid, va.va_gid);
7068 } else {
7069 vnop_flags |= VNODE_CLONEFILE_NOOWNERCOPY;
7070 }
7071
7072 if (VATTR_IS_SUPPORTED(&va, va_mode))
7073 VATTR_SET(&nva, va_mode, va.va_mode);
7074 if (VATTR_IS_SUPPORTED(&va, va_flags)) {
7075 VATTR_SET(&nva, va_flags,
7076 ((va.va_flags & ~(UF_DATAVAULT | SF_RESTRICTED)) | /* Turn off from source */
7077 (nva.va_flags & (UF_DATAVAULT | SF_RESTRICTED))));
7078 }
7079
7080 error = VNOP_CLONEFILE(fvp, tdvp, &tvp, cnp, &nva, vnop_flags, ctx);
7081
7082 if (!error && tvp) {
7083 int update_flags = 0;
7084 #if CONFIG_FSE
7085 int fsevent;
7086 #endif /* CONFIG_FSE */
7087
7088 #if CONFIG_MACF
7089 (void)vnode_label(vnode_mount(tvp), tdvp, tvp, cnp,
7090 VNODE_LABEL_CREATE, ctx);
7091 #endif
7092 /*
7093 * If some of the requested attributes weren't handled by the
7094 * VNOP, use our fallback code.
7095 */
7096 if (!VATTR_ALL_SUPPORTED(&va))
7097 (void)vnode_setattr_fallback(tvp, &nva, ctx);
7098
7099 // Make sure the name & parent pointers are hooked up
7100 if (tvp->v_name == NULL)
7101 update_flags |= VNODE_UPDATE_NAME;
7102 if (tvp->v_parent == NULLVP)
7103 update_flags |= VNODE_UPDATE_PARENT;
7104
7105 if (update_flags) {
7106 (void)vnode_update_identity(tvp, tdvp, cnp->cn_nameptr,
7107 cnp->cn_namelen, cnp->cn_hash, update_flags);
7108 }
7109
7110 #if CONFIG_FSE
7111 switch (vnode_vtype(tvp)) {
7112 case VLNK:
7113 /* FALLTHRU */
7114 case VREG:
7115 fsevent = FSE_CREATE_FILE;
7116 break;
7117 case VDIR:
7118 fsevent = FSE_CREATE_DIR;
7119 break;
7120 default:
7121 goto out;
7122 }
7123
7124 if (need_fsevent(fsevent, tvp)) {
7125 /*
7126 * The following is a sequence of three explicit events.
7127 * A pair of FSE_CLONE events representing the source and destination
7128 * followed by an FSE_CREATE_[FILE | DIR] for the destination.
7129 * fseventsd may coalesce the destination clone and create events
7130 * into a single event resulting in the following sequence for a client
7131 * FSE_CLONE (src)
7132 * FSE_CLONE | FSE_CREATE (dst)
7133 */
7134 add_fsevent(FSE_CLONE, ctx, FSE_ARG_VNODE, fvp, FSE_ARG_VNODE, tvp,
7135 FSE_ARG_DONE);
7136 add_fsevent(fsevent, ctx, FSE_ARG_VNODE, tvp,
7137 FSE_ARG_DONE);
7138 }
7139 #endif /* CONFIG_FSE */
7140 }
7141
7142 out:
7143 if (attr_cleanup)
7144 vn_attribute_cleanup(&nva, defaulted);
7145 if (free_src_acl && va.va_acl)
7146 kauth_acl_free(va.va_acl);
7147 nameidone(&tond);
7148 if (tvp)
7149 vnode_put(tvp);
7150 vnode_put(tdvp);
7151 return (error);
7152 }
7153
7154 /*
7155 * clone files or directories, target must not exist.
7156 */
7157 /* ARGSUSED */
7158 int
7159 clonefileat(__unused proc_t p, struct clonefileat_args *uap,
7160 __unused int32_t *retval)
7161 {
7162 vnode_t fvp;
7163 struct nameidata fromnd;
7164 int follow;
7165 int error;
7166 vfs_context_t ctx = vfs_context_current();
7167
7168 /* Check that the flags are valid. */
7169 if (uap->flags & ~(CLONE_NOFOLLOW | CLONE_NOOWNERCOPY))
7170 return (EINVAL);
7171
7172 AUDIT_ARG(fd, uap->src_dirfd);
7173
7174 follow = (uap->flags & CLONE_NOFOLLOW) ? NOFOLLOW : FOLLOW;
7175 NDINIT(&fromnd, LOOKUP, OP_COPYFILE, follow | AUDITVNPATH1,
7176 UIO_USERSPACE, uap->src, ctx);
7177 if ((error = nameiat(&fromnd, uap->src_dirfd)))
7178 return (error);
7179
7180 fvp = fromnd.ni_vp;
7181 nameidone(&fromnd);
7182
7183 error = clonefile_internal(fvp, FALSE, uap->dst_dirfd, uap->dst,
7184 uap->flags, ctx);
7185
7186 vnode_put(fvp);
7187 return (error);
7188 }
7189
7190 int
7191 fclonefileat(__unused proc_t p, struct fclonefileat_args *uap,
7192 __unused int32_t *retval)
7193 {
7194 vnode_t fvp;
7195 struct fileproc *fp;
7196 int error;
7197 vfs_context_t ctx = vfs_context_current();
7198
7199 /* Check that the flags are valid. */
7200 if (uap->flags & ~(CLONE_NOFOLLOW | CLONE_NOOWNERCOPY))
7201 return (EINVAL);
7202
7203 AUDIT_ARG(fd, uap->src_fd);
7204 error = fp_getfvp(p, uap->src_fd, &fp, &fvp);
7205 if (error)
7206 return (error);
7207
7208 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
7209 AUDIT_ARG(vnpath_withref, fvp, ARG_VNODE1);
7210 error = EBADF;
7211 goto out;
7212 }
7213
7214 if ((error = vnode_getwithref(fvp)))
7215 goto out;
7216
7217 AUDIT_ARG(vnpath, fvp, ARG_VNODE1);
7218
7219 error = clonefile_internal(fvp, TRUE, uap->dst_dirfd, uap->dst,
7220 uap->flags, ctx);
7221
7222 vnode_put(fvp);
7223 out:
7224 file_drop(uap->src_fd);
7225 return (error);
7226 }
7227
7228 /*
7229 * Rename files. Source and destination must either both be directories,
7230 * or both not be directories. If target is a directory, it must be empty.
7231 */
7232 /* ARGSUSED */
7233 static int
7234 renameat_internal(vfs_context_t ctx, int fromfd, user_addr_t from,
7235 int tofd, user_addr_t to, int segflg, vfs_rename_flags_t flags)
7236 {
7237 if (flags & ~VFS_RENAME_FLAGS_MASK)
7238 return EINVAL;
7239
7240 if (ISSET(flags, VFS_RENAME_SWAP) && ISSET(flags, VFS_RENAME_EXCL))
7241 return EINVAL;
7242
7243 vnode_t tvp, tdvp;
7244 vnode_t fvp, fdvp;
7245 struct nameidata *fromnd, *tond;
7246 int error;
7247 int do_retry;
7248 int retry_count;
7249 int mntrename;
7250 int need_event;
7251 const char *oname = NULL;
7252 char *from_name = NULL, *to_name = NULL;
7253 int from_len=0, to_len=0;
7254 int holding_mntlock;
7255 mount_t locked_mp = NULL;
7256 vnode_t oparent = NULLVP;
7257 #if CONFIG_FSE
7258 fse_info from_finfo, to_finfo;
7259 #endif
7260 int from_truncated=0, to_truncated;
7261 int batched = 0;
7262 struct vnode_attr *fvap, *tvap;
7263 int continuing = 0;
7264 /* carving out a chunk for structs that are too big to be on stack. */
7265 struct {
7266 struct nameidata from_node, to_node;
7267 struct vnode_attr fv_attr, tv_attr;
7268 } * __rename_data;
7269 MALLOC(__rename_data, void *, sizeof(*__rename_data), M_TEMP, M_WAITOK);
7270 fromnd = &__rename_data->from_node;
7271 tond = &__rename_data->to_node;
7272
7273 holding_mntlock = 0;
7274 do_retry = 0;
7275 retry_count = 0;
7276 retry:
7277 fvp = tvp = NULL;
7278 fdvp = tdvp = NULL;
7279 fvap = tvap = NULL;
7280 mntrename = FALSE;
7281
7282 NDINIT(fromnd, DELETE, OP_UNLINK, WANTPARENT | AUDITVNPATH1,
7283 segflg, from, ctx);
7284 fromnd->ni_flag = NAMEI_COMPOUNDRENAME;
7285
7286 NDINIT(tond, RENAME, OP_RENAME, WANTPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK,
7287 segflg, to, ctx);
7288 tond->ni_flag = NAMEI_COMPOUNDRENAME;
7289
7290 continue_lookup:
7291 if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
7292 if ( (error = nameiat(fromnd, fromfd)) )
7293 goto out1;
7294 fdvp = fromnd->ni_dvp;
7295 fvp = fromnd->ni_vp;
7296
7297 if (fvp && fvp->v_type == VDIR)
7298 tond->ni_cnd.cn_flags |= WILLBEDIR;
7299 }
7300
7301 if ((tond->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
7302 if ( (error = nameiat(tond, tofd)) ) {
7303 /*
7304 * Translate error code for rename("dir1", "dir2/.").
7305 */
7306 if (error == EISDIR && fvp->v_type == VDIR)
7307 error = EINVAL;
7308 goto out1;
7309 }
7310 tdvp = tond->ni_dvp;
7311 tvp = tond->ni_vp;
7312 }
7313
7314 #if DEVELOPMENT || DEBUG
7315 /*
7316 * XXX VSWAP: Check for entitlements or special flag here
7317 * so we can restrict access appropriately.
7318 */
7319 #else /* DEVELOPMENT || DEBUG */
7320
7321 if (fromnd->ni_vp && vnode_isswap(fromnd->ni_vp) && (ctx != vfs_context_kernel())) {
7322 error = EPERM;
7323 goto out1;
7324 }
7325
7326 if (tond->ni_vp && vnode_isswap(tond->ni_vp) && (ctx != vfs_context_kernel())) {
7327 error = EPERM;
7328 goto out1;
7329 }
7330 #endif /* DEVELOPMENT || DEBUG */
7331
7332 if (!tvp && ISSET(flags, VFS_RENAME_SWAP)) {
7333 error = ENOENT;
7334 goto out1;
7335 }
7336
7337 if (tvp && ISSET(flags, VFS_RENAME_EXCL)) {
7338 error = EEXIST;
7339 goto out1;
7340 }
7341
7342 batched = vnode_compound_rename_available(fdvp);
7343 if (!fvp) {
7344 /*
7345 * Claim: this check will never reject a valid rename.
7346 * For success, either fvp must be on the same mount as tdvp, or fvp must sit atop a vnode on the same mount as tdvp.
7347 * Suppose fdvp and tdvp are not on the same mount.
7348 * If fvp is on the same mount as tdvp, then fvp is not on the same mount as fdvp, so fvp is the root of its filesystem. If fvp is the root,
7349 * then you can't move it to within another dir on the same mountpoint.
7350 * If fvp sits atop a vnode on the same mount as fdvp, then that vnode must be part of the same mount as fdvp, which is a contradiction.
7351 *
7352 * If this check passes, then we are safe to pass these vnodes to the same FS.
7353 */
7354 if (fdvp->v_mount != tdvp->v_mount) {
7355 error = EXDEV;
7356 goto out1;
7357 }
7358 goto skipped_lookup;
7359 }
7360
7361 if (!batched) {
7362 error = vn_authorize_renamex(fdvp, fvp, &fromnd->ni_cnd, tdvp, tvp, &tond->ni_cnd, ctx, flags, NULL);
7363 if (error) {
7364 if (error == ENOENT) {
7365 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
7366 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
7367 /*
7368 * We encountered a race where after doing the namei, tvp stops
7369 * being valid. If so, simply re-drive the rename call from the
7370 * top.
7371 */
7372 do_retry = 1;
7373 retry_count += 1;
7374 }
7375 }
7376 goto out1;
7377 }
7378 }
7379
7380 /*
7381 * If the source and destination are the same (i.e. they're
7382 * links to the same vnode) and the target file system is
7383 * case sensitive, then there is nothing to do.
7384 *
7385 * XXX Come back to this.
7386 */
7387 if (fvp == tvp) {
7388 int pathconf_val;
7389
7390 /*
7391 * Note: if _PC_CASE_SENSITIVE selector isn't supported,
7392 * then assume that this file system is case sensitive.
7393 */
7394 if (VNOP_PATHCONF(fvp, _PC_CASE_SENSITIVE, &pathconf_val, ctx) != 0 ||
7395 pathconf_val != 0) {
7396 goto out1;
7397 }
7398 }
7399
7400 /*
7401 * Allow the renaming of mount points.
7402 * - target must not exist
7403 * - target must reside in the same directory as source
7404 * - union mounts cannot be renamed
7405 * - "/" cannot be renamed
7406 *
7407 * XXX Handle this in VFS after a continued lookup (if we missed
7408 * in the cache to start off)
7409 *
7410 * N.B. If RENAME_SWAP is being used, then @tvp != NULL and so
7411 * we'll skip past here. The file system is responsible for
7412 * checking that @tvp is not a descendent of @fvp and vice versa
7413 * so it should always return EINVAL if either @tvp or @fvp is the
7414 * root of a volume.
7415 */
7416 if ((fvp->v_flag & VROOT) &&
7417 (fvp->v_type == VDIR) &&
7418 (tvp == NULL) &&
7419 (fvp->v_mountedhere == NULL) &&
7420 (fdvp == tdvp) &&
7421 ((fvp->v_mount->mnt_flag & (MNT_UNION | MNT_ROOTFS)) == 0) &&
7422 (fvp->v_mount->mnt_vnodecovered != NULLVP)) {
7423 vnode_t coveredvp;
7424
7425 /* switch fvp to the covered vnode */
7426 coveredvp = fvp->v_mount->mnt_vnodecovered;
7427 if ( (vnode_getwithref(coveredvp)) ) {
7428 error = ENOENT;
7429 goto out1;
7430 }
7431 vnode_put(fvp);
7432
7433 fvp = coveredvp;
7434 mntrename = TRUE;
7435 }
7436 /*
7437 * Check for cross-device rename.
7438 */
7439 if ((fvp->v_mount != tdvp->v_mount) ||
7440 (tvp && (fvp->v_mount != tvp->v_mount))) {
7441 error = EXDEV;
7442 goto out1;
7443 }
7444
7445 /*
7446 * If source is the same as the destination (that is the
7447 * same inode number) then there is nothing to do...
7448 * EXCEPT if the underlying file system supports case
7449 * insensitivity and is case preserving. In this case
7450 * the file system needs to handle the special case of
7451 * getting the same vnode as target (fvp) and source (tvp).
7452 *
7453 * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
7454 * and _PC_CASE_PRESERVING can have this exception, and they need to
7455 * handle the special case of getting the same vnode as target and
7456 * source. NOTE: Then the target is unlocked going into vnop_rename,
7457 * so not to cause locking problems. There is a single reference on tvp.
7458 *
7459 * NOTE - that fvp == tvp also occurs if they are hard linked and
7460 * that correct behaviour then is just to return success without doing
7461 * anything.
7462 *
7463 * XXX filesystem should take care of this itself, perhaps...
7464 */
7465 if (fvp == tvp && fdvp == tdvp) {
7466 if (fromnd->ni_cnd.cn_namelen == tond->ni_cnd.cn_namelen &&
7467 !bcmp(fromnd->ni_cnd.cn_nameptr, tond->ni_cnd.cn_nameptr,
7468 fromnd->ni_cnd.cn_namelen)) {
7469 goto out1;
7470 }
7471 }
7472
7473 if (holding_mntlock && fvp->v_mount != locked_mp) {
7474 /*
7475 * we're holding a reference and lock
7476 * on locked_mp, but it no longer matches
7477 * what we want to do... so drop our hold
7478 */
7479 mount_unlock_renames(locked_mp);
7480 mount_drop(locked_mp, 0);
7481 holding_mntlock = 0;
7482 }
7483 if (tdvp != fdvp && fvp->v_type == VDIR) {
7484 /*
7485 * serialize renames that re-shape
7486 * the tree... if holding_mntlock is
7487 * set, then we're ready to go...
7488 * otherwise we
7489 * first need to drop the iocounts
7490 * we picked up, second take the
7491 * lock to serialize the access,
7492 * then finally start the lookup
7493 * process over with the lock held
7494 */
7495 if (!holding_mntlock) {
7496 /*
7497 * need to grab a reference on
7498 * the mount point before we
7499 * drop all the iocounts... once
7500 * the iocounts are gone, the mount
7501 * could follow
7502 */
7503 locked_mp = fvp->v_mount;
7504 mount_ref(locked_mp, 0);
7505
7506 /*
7507 * nameidone has to happen before we vnode_put(tvp)
7508 * since it may need to release the fs_nodelock on the tvp
7509 */
7510 nameidone(tond);
7511
7512 if (tvp)
7513 vnode_put(tvp);
7514 vnode_put(tdvp);
7515
7516 /*
7517 * nameidone has to happen before we vnode_put(fdvp)
7518 * since it may need to release the fs_nodelock on the fvp
7519 */
7520 nameidone(fromnd);
7521
7522 vnode_put(fvp);
7523 vnode_put(fdvp);
7524
7525 mount_lock_renames(locked_mp);
7526 holding_mntlock = 1;
7527
7528 goto retry;
7529 }
7530 } else {
7531 /*
7532 * when we dropped the iocounts to take
7533 * the lock, we allowed the identity of
7534 * the various vnodes to change... if they did,
7535 * we may no longer be dealing with a rename
7536 * that reshapes the tree... once we're holding
7537 * the iocounts, the vnodes can't change type
7538 * so we're free to drop the lock at this point
7539 * and continue on
7540 */
7541 if (holding_mntlock) {
7542 mount_unlock_renames(locked_mp);
7543 mount_drop(locked_mp, 0);
7544 holding_mntlock = 0;
7545 }
7546 }
7547
7548 // save these off so we can later verify that fvp is the same
7549 oname = fvp->v_name;
7550 oparent = fvp->v_parent;
7551
7552 skipped_lookup:
7553 #if CONFIG_FSE
7554 need_event = need_fsevent(FSE_RENAME, fdvp);
7555 if (need_event) {
7556 if (fvp) {
7557 get_fse_info(fvp, &from_finfo, ctx);
7558 } else {
7559 error = vfs_get_notify_attributes(&__rename_data->fv_attr);
7560 if (error) {
7561 goto out1;
7562 }
7563
7564 fvap = &__rename_data->fv_attr;
7565 }
7566
7567 if (tvp) {
7568 get_fse_info(tvp, &to_finfo, ctx);
7569 } else if (batched) {
7570 error = vfs_get_notify_attributes(&__rename_data->tv_attr);
7571 if (error) {
7572 goto out1;
7573 }
7574
7575 tvap = &__rename_data->tv_attr;
7576 }
7577 }
7578 #else
7579 need_event = 0;
7580 #endif /* CONFIG_FSE */
7581
7582 if (need_event || kauth_authorize_fileop_has_listeners()) {
7583 if (from_name == NULL) {
7584 GET_PATH(from_name);
7585 if (from_name == NULL) {
7586 error = ENOMEM;
7587 goto out1;
7588 }
7589 }
7590
7591 from_len = safe_getpath(fdvp, fromnd->ni_cnd.cn_nameptr, from_name, MAXPATHLEN, &from_truncated);
7592
7593 if (to_name == NULL) {
7594 GET_PATH(to_name);
7595 if (to_name == NULL) {
7596 error = ENOMEM;
7597 goto out1;
7598 }
7599 }
7600
7601 to_len = safe_getpath(tdvp, tond->ni_cnd.cn_nameptr, to_name, MAXPATHLEN, &to_truncated);
7602 }
7603 error = vn_rename(fdvp, &fvp, &fromnd->ni_cnd, fvap,
7604 tdvp, &tvp, &tond->ni_cnd, tvap,
7605 flags, ctx);
7606
7607 if (holding_mntlock) {
7608 /*
7609 * we can drop our serialization
7610 * lock now
7611 */
7612 mount_unlock_renames(locked_mp);
7613 mount_drop(locked_mp, 0);
7614 holding_mntlock = 0;
7615 }
7616 if (error) {
7617 if (error == EKEEPLOOKING) {
7618 if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) == 0) {
7619 if ((tond->ni_flag & NAMEI_CONTLOOKUP) == 0) {
7620 panic("EKEEPLOOKING without NAMEI_CONTLOOKUP on either ndp?");
7621 }
7622 }
7623
7624 fromnd->ni_vp = fvp;
7625 tond->ni_vp = tvp;
7626
7627 goto continue_lookup;
7628 }
7629
7630 /*
7631 * We may encounter a race in the VNOP where the destination didn't
7632 * exist when we did the namei, but it does by the time we go and
7633 * try to create the entry. In this case, we should re-drive this rename
7634 * call from the top again. Currently, only HFS bubbles out ERECYCLE,
7635 * but other filesystems susceptible to this race could return it, too.
7636 */
7637 if (error == ERECYCLE) {
7638 do_retry = 1;
7639 }
7640
7641 /*
7642 * For compound VNOPs, the authorization callback may return
7643 * ENOENT in case of racing hardlink lookups hitting the name
7644 * cache, redrive the lookup.
7645 */
7646 if (batched && error == ENOENT) {
7647 assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
7648 if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
7649 do_retry = 1;
7650 retry_count += 1;
7651 }
7652 }
7653
7654 goto out1;
7655 }
7656
7657 /* call out to allow 3rd party notification of rename.
7658 * Ignore result of kauth_authorize_fileop call.
7659 */
7660 kauth_authorize_fileop(vfs_context_ucred(ctx),
7661 KAUTH_FILEOP_RENAME,
7662 (uintptr_t)from_name, (uintptr_t)to_name);
7663 if (flags & VFS_RENAME_SWAP) {
7664 kauth_authorize_fileop(vfs_context_ucred(ctx),
7665 KAUTH_FILEOP_RENAME,
7666 (uintptr_t)to_name, (uintptr_t)from_name);
7667 }
7668
7669 #if CONFIG_FSE
7670 if (from_name != NULL && to_name != NULL) {
7671 if (from_truncated || to_truncated) {
7672 // set it here since only the from_finfo gets reported up to user space
7673 from_finfo.mode |= FSE_TRUNCATED_PATH;
7674 }
7675
7676 if (tvap && tvp) {
7677 vnode_get_fse_info_from_vap(tvp, &to_finfo, tvap);
7678 }
7679 if (fvap) {
7680 vnode_get_fse_info_from_vap(fvp, &from_finfo, fvap);
7681 }
7682
7683 if (tvp) {
7684 add_fsevent(FSE_RENAME, ctx,
7685 FSE_ARG_STRING, from_len, from_name,
7686 FSE_ARG_FINFO, &from_finfo,
7687 FSE_ARG_STRING, to_len, to_name,
7688 FSE_ARG_FINFO, &to_finfo,
7689 FSE_ARG_DONE);
7690 if (flags & VFS_RENAME_SWAP) {
7691 /*
7692 * Strictly speaking, swap is the equivalent of
7693 * *three* renames. FSEvents clients should only take
7694 * the events as a hint, so we only bother reporting
7695 * two.
7696 */
7697 add_fsevent(FSE_RENAME, ctx,
7698 FSE_ARG_STRING, to_len, to_name,
7699 FSE_ARG_FINFO, &to_finfo,
7700 FSE_ARG_STRING, from_len, from_name,
7701 FSE_ARG_FINFO, &from_finfo,
7702 FSE_ARG_DONE);
7703 }
7704 } else {
7705 add_fsevent(FSE_RENAME, ctx,
7706 FSE_ARG_STRING, from_len, from_name,
7707 FSE_ARG_FINFO, &from_finfo,
7708 FSE_ARG_STRING, to_len, to_name,
7709 FSE_ARG_DONE);
7710 }
7711 }
7712 #endif /* CONFIG_FSE */
7713
7714 /*
7715 * update filesystem's mount point data
7716 */
7717 if (mntrename) {
7718 char *cp, *pathend, *mpname;
7719 char * tobuf;
7720 struct mount *mp;
7721 int maxlen;
7722 size_t len = 0;
7723
7724 mp = fvp->v_mountedhere;
7725
7726 if (vfs_busy(mp, LK_NOWAIT)) {
7727 error = EBUSY;
7728 goto out1;
7729 }
7730 MALLOC_ZONE(tobuf, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
7731
7732 if (UIO_SEG_IS_USER_SPACE(segflg))
7733 error = copyinstr(to, tobuf, MAXPATHLEN, &len);
7734 else
7735 error = copystr((void *)to, tobuf, MAXPATHLEN, &len);
7736 if (!error) {
7737 /* find current mount point prefix */
7738 pathend = &mp->mnt_vfsstat.f_mntonname[0];
7739 for (cp = pathend; *cp != '\0'; ++cp) {
7740 if (*cp == '/')
7741 pathend = cp + 1;
7742 }
7743 /* find last component of target name */
7744 for (mpname = cp = tobuf; *cp != '\0'; ++cp) {
7745 if (*cp == '/')
7746 mpname = cp + 1;
7747 }
7748 /* append name to prefix */
7749 maxlen = MAXPATHLEN - (pathend - mp->mnt_vfsstat.f_mntonname);
7750 bzero(pathend, maxlen);
7751 strlcpy(pathend, mpname, maxlen);
7752 }
7753 FREE_ZONE(tobuf, MAXPATHLEN, M_NAMEI);
7754
7755 vfs_unbusy(mp);
7756 }
7757 /*
7758 * fix up name & parent pointers. note that we first
7759 * check that fvp has the same name/parent pointers it
7760 * had before the rename call... this is a 'weak' check
7761 * at best...
7762 *
7763 * XXX oparent and oname may not be set in the compound vnop case
7764 */
7765 if (batched || (oname == fvp->v_name && oparent == fvp->v_parent)) {
7766 int update_flags;
7767
7768 update_flags = VNODE_UPDATE_NAME;
7769
7770 if (fdvp != tdvp)
7771 update_flags |= VNODE_UPDATE_PARENT;
7772
7773 vnode_update_identity(fvp, tdvp, tond->ni_cnd.cn_nameptr, tond->ni_cnd.cn_namelen, tond->ni_cnd.cn_hash, update_flags);
7774 }
7775 out1:
7776 if (to_name != NULL) {
7777 RELEASE_PATH(to_name);
7778 to_name = NULL;
7779 }
7780 if (from_name != NULL) {
7781 RELEASE_PATH(from_name);
7782 from_name = NULL;
7783 }
7784 if (holding_mntlock) {
7785 mount_unlock_renames(locked_mp);
7786 mount_drop(locked_mp, 0);
7787 holding_mntlock = 0;
7788 }
7789 if (tdvp) {
7790 /*
7791 * nameidone has to happen before we vnode_put(tdvp)
7792 * since it may need to release the fs_nodelock on the tdvp
7793 */
7794 nameidone(tond);
7795
7796 if (tvp)
7797 vnode_put(tvp);
7798 vnode_put(tdvp);
7799 }
7800 if (fdvp) {
7801 /*
7802 * nameidone has to happen before we vnode_put(fdvp)
7803 * since it may need to release the fs_nodelock on the fdvp
7804 */
7805 nameidone(fromnd);
7806
7807 if (fvp)
7808 vnode_put(fvp);
7809 vnode_put(fdvp);
7810 }
7811
7812 /*
7813 * If things changed after we did the namei, then we will re-drive
7814 * this rename call from the top.
7815 */
7816 if (do_retry) {
7817 do_retry = 0;
7818 goto retry;
7819 }
7820
7821 FREE(__rename_data, M_TEMP);
7822 return (error);
7823 }
7824
7825 int
7826 rename(__unused proc_t p, struct rename_args *uap, __unused int32_t *retval)
7827 {
7828 return (renameat_internal(vfs_context_current(), AT_FDCWD, uap->from,
7829 AT_FDCWD, uap->to, UIO_USERSPACE, 0));
7830 }
7831
7832 int renameatx_np(__unused proc_t p, struct renameatx_np_args *uap, __unused int32_t *retval)
7833 {
7834 return renameat_internal(
7835 vfs_context_current(),
7836 uap->fromfd, uap->from,
7837 uap->tofd, uap->to,
7838 UIO_USERSPACE, uap->flags);
7839 }
7840
7841 int
7842 renameat(__unused proc_t p, struct renameat_args *uap, __unused int32_t *retval)
7843 {
7844 return (renameat_internal(vfs_context_current(), uap->fromfd, uap->from,
7845 uap->tofd, uap->to, UIO_USERSPACE, 0));
7846 }
7847
7848 /*
7849 * Make a directory file.
7850 *
7851 * Returns: 0 Success
7852 * EEXIST
7853 * namei:???
7854 * vnode_authorize:???
7855 * vn_create:???
7856 */
7857 /* ARGSUSED */
7858 static int
7859 mkdir1at(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap, int fd,
7860 enum uio_seg segflg)
7861 {
7862 vnode_t vp, dvp;
7863 int error;
7864 int update_flags = 0;
7865 int batched;
7866 struct nameidata nd;
7867
7868 AUDIT_ARG(mode, vap->va_mode);
7869 NDINIT(&nd, CREATE, OP_MKDIR, LOCKPARENT | AUDITVNPATH1, segflg,
7870 path, ctx);
7871 nd.ni_cnd.cn_flags |= WILLBEDIR;
7872 nd.ni_flag = NAMEI_COMPOUNDMKDIR;
7873
7874 continue_lookup:
7875 error = nameiat(&nd, fd);
7876 if (error)
7877 return (error);
7878 dvp = nd.ni_dvp;
7879 vp = nd.ni_vp;
7880
7881 if (vp != NULL) {
7882 error = EEXIST;
7883 goto out;
7884 }
7885
7886 batched = vnode_compound_mkdir_available(dvp);
7887
7888 VATTR_SET(vap, va_type, VDIR);
7889
7890 /*
7891 * XXX
7892 * Don't authorize in VFS for compound VNOP.... mkdir -p today assumes that it will
7893 * only get EXISTS or EISDIR for existing path components, and not that it could see
7894 * EACCESS/EPERM--so if we authorize for mkdir on "/" for "mkdir -p /tmp/foo/bar/baz"
7895 * it will fail in a spurious manner. Need to figure out if this is valid behavior.
7896 */
7897 if ((error = vn_authorize_mkdir(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0) {
7898 if (error == EACCES || error == EPERM) {
7899 int error2;
7900
7901 nameidone(&nd);
7902 vnode_put(dvp);
7903 dvp = NULLVP;
7904
7905 /*
7906 * Try a lookup without "NAMEI_COMPOUNDVNOP" to make sure we return EEXIST
7907 * rather than EACCESS if the target exists.
7908 */
7909 NDINIT(&nd, LOOKUP, OP_MKDIR, AUDITVNPATH1, segflg,
7910 path, ctx);
7911 error2 = nameiat(&nd, fd);
7912 if (error2) {
7913 goto out;
7914 } else {
7915 vp = nd.ni_vp;
7916 error = EEXIST;
7917 goto out;
7918 }
7919 }
7920
7921 goto out;
7922 }
7923
7924 /*
7925 * make the directory
7926 */
7927 if ((error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx)) != 0) {
7928 if (error == EKEEPLOOKING) {
7929 nd.ni_vp = vp;
7930 goto continue_lookup;
7931 }
7932
7933 goto out;
7934 }
7935
7936 // Make sure the name & parent pointers are hooked up
7937 if (vp->v_name == NULL)
7938 update_flags |= VNODE_UPDATE_NAME;
7939 if (vp->v_parent == NULLVP)
7940 update_flags |= VNODE_UPDATE_PARENT;
7941
7942 if (update_flags)
7943 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
7944
7945 #if CONFIG_FSE
7946 add_fsevent(FSE_CREATE_DIR, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE);
7947 #endif
7948
7949 out:
7950 /*
7951 * nameidone has to happen before we vnode_put(dvp)
7952 * since it may need to release the fs_nodelock on the dvp
7953 */
7954 nameidone(&nd);
7955
7956 if (vp)
7957 vnode_put(vp);
7958 if (dvp)
7959 vnode_put(dvp);
7960
7961 return (error);
7962 }
7963
7964 /*
7965 * mkdir_extended: Create a directory; with extended security (ACL).
7966 *
7967 * Parameters: p Process requesting to create the directory
7968 * uap User argument descriptor (see below)
7969 * retval (ignored)
7970 *
7971 * Indirect: uap->path Path of directory to create
7972 * uap->mode Access permissions to set
7973 * uap->xsecurity ACL to set
7974 *
7975 * Returns: 0 Success
7976 * !0 Not success
7977 *
7978 */
7979 int
7980 mkdir_extended(proc_t p, struct mkdir_extended_args *uap, __unused int32_t *retval)
7981 {
7982 int ciferror;
7983 kauth_filesec_t xsecdst;
7984 struct vnode_attr va;
7985
7986 AUDIT_ARG(owner, uap->uid, uap->gid);
7987
7988 xsecdst = NULL;
7989 if ((uap->xsecurity != USER_ADDR_NULL) &&
7990 ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
7991 return ciferror;
7992
7993 VATTR_INIT(&va);
7994 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
7995 if (xsecdst != NULL)
7996 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
7997
7998 ciferror = mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD,
7999 UIO_USERSPACE);
8000 if (xsecdst != NULL)
8001 kauth_filesec_free(xsecdst);
8002 return ciferror;
8003 }
8004
8005 int
8006 mkdir(proc_t p, struct mkdir_args *uap, __unused int32_t *retval)
8007 {
8008 struct vnode_attr va;
8009
8010 VATTR_INIT(&va);
8011 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
8012
8013 return (mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD,
8014 UIO_USERSPACE));
8015 }
8016
8017 int
8018 mkdirat(proc_t p, struct mkdirat_args *uap, __unused int32_t *retval)
8019 {
8020 struct vnode_attr va;
8021
8022 VATTR_INIT(&va);
8023 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
8024
8025 return(mkdir1at(vfs_context_current(), uap->path, &va, uap->fd,
8026 UIO_USERSPACE));
8027 }
8028
8029 static int
8030 rmdirat_internal(vfs_context_t ctx, int fd, user_addr_t dirpath,
8031 enum uio_seg segflg)
8032 {
8033 vnode_t vp, dvp;
8034 int error;
8035 struct nameidata nd;
8036 char *path = NULL;
8037 int len=0;
8038 int has_listeners = 0;
8039 int need_event = 0;
8040 int truncated = 0;
8041 #if CONFIG_FSE
8042 struct vnode_attr va;
8043 #endif /* CONFIG_FSE */
8044 struct vnode_attr *vap = NULL;
8045 int restart_count = 0;
8046 int batched;
8047
8048 int restart_flag;
8049
8050 /*
8051 * This loop exists to restart rmdir in the unlikely case that two
8052 * processes are simultaneously trying to remove the same directory
8053 * containing orphaned appleDouble files.
8054 */
8055 do {
8056 NDINIT(&nd, DELETE, OP_RMDIR, LOCKPARENT | AUDITVNPATH1,
8057 segflg, dirpath, ctx);
8058 nd.ni_flag = NAMEI_COMPOUNDRMDIR;
8059 continue_lookup:
8060 restart_flag = 0;
8061 vap = NULL;
8062
8063 error = nameiat(&nd, fd);
8064 if (error)
8065 return (error);
8066
8067 dvp = nd.ni_dvp;
8068 vp = nd.ni_vp;
8069
8070 if (vp) {
8071 batched = vnode_compound_rmdir_available(vp);
8072
8073 if (vp->v_flag & VROOT) {
8074 /*
8075 * The root of a mounted filesystem cannot be deleted.
8076 */
8077 error = EBUSY;
8078 goto out;
8079 }
8080
8081 #if DEVELOPMENT || DEBUG
8082 /*
8083 * XXX VSWAP: Check for entitlements or special flag here
8084 * so we can restrict access appropriately.
8085 */
8086 #else /* DEVELOPMENT || DEBUG */
8087
8088 if (vnode_isswap(vp) && (ctx != vfs_context_kernel())) {
8089 error = EPERM;
8090 goto out;
8091 }
8092 #endif /* DEVELOPMENT || DEBUG */
8093
8094 /*
8095 * Removed a check here; we used to abort if vp's vid
8096 * was not the same as what we'd seen the last time around.
8097 * I do not think that check was valid, because if we retry
8098 * and all dirents are gone, the directory could legitimately
8099 * be recycled but still be present in a situation where we would
8100 * have had permission to delete. Therefore, we won't make
8101 * an effort to preserve that check now that we may not have a
8102 * vp here.
8103 */
8104
8105 if (!batched) {
8106 error = vn_authorize_rmdir(dvp, vp, &nd.ni_cnd, ctx, NULL);
8107 if (error) {
8108 if (error == ENOENT) {
8109 assert(restart_count < MAX_AUTHORIZE_ENOENT_RETRIES);
8110 if (restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
8111 restart_flag = 1;
8112 restart_count += 1;
8113 }
8114 }
8115 goto out;
8116 }
8117 }
8118 } else {
8119 batched = 1;
8120
8121 if (!vnode_compound_rmdir_available(dvp)) {
8122 panic("No error, but no compound rmdir?");
8123 }
8124 }
8125
8126 #if CONFIG_FSE
8127 fse_info finfo;
8128
8129 need_event = need_fsevent(FSE_DELETE, dvp);
8130 if (need_event) {
8131 if (!batched) {
8132 get_fse_info(vp, &finfo, ctx);
8133 } else {
8134 error = vfs_get_notify_attributes(&va);
8135 if (error) {
8136 goto out;
8137 }
8138
8139 vap = &va;
8140 }
8141 }
8142 #endif
8143 has_listeners = kauth_authorize_fileop_has_listeners();
8144 if (need_event || has_listeners) {
8145 if (path == NULL) {
8146 GET_PATH(path);
8147 if (path == NULL) {
8148 error = ENOMEM;
8149 goto out;
8150 }
8151 }
8152
8153 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated);
8154 #if CONFIG_FSE
8155 if (truncated) {
8156 finfo.mode |= FSE_TRUNCATED_PATH;
8157 }
8158 #endif
8159 }
8160
8161 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
8162 nd.ni_vp = vp;
8163 if (vp == NULLVP) {
8164 /* Couldn't find a vnode */
8165 goto out;
8166 }
8167
8168 if (error == EKEEPLOOKING) {
8169 goto continue_lookup;
8170 } else if (batched && error == ENOENT) {
8171 assert(restart_count < MAX_AUTHORIZE_ENOENT_RETRIES);
8172 if (restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
8173 /*
8174 * For compound VNOPs, the authorization callback
8175 * may return ENOENT in case of racing hard link lookups
8176 * redrive the lookup.
8177 */
8178 restart_flag = 1;
8179 restart_count += 1;
8180 goto out;
8181 }
8182 }
8183 #if CONFIG_APPLEDOUBLE
8184 /*
8185 * Special case to remove orphaned AppleDouble
8186 * files. I don't like putting this in the kernel,
8187 * but carbon does not like putting this in carbon either,
8188 * so here we are.
8189 */
8190 if (error == ENOTEMPTY) {
8191 error = rmdir_remove_orphaned_appleDouble(vp, ctx, &restart_flag);
8192 if (error == EBUSY) {
8193 goto out;
8194 }
8195
8196
8197 /*
8198 * Assuming everything went well, we will try the RMDIR again
8199 */
8200 if (!error)
8201 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
8202 }
8203 #endif /* CONFIG_APPLEDOUBLE */
8204 /*
8205 * Call out to allow 3rd party notification of delete.
8206 * Ignore result of kauth_authorize_fileop call.
8207 */
8208 if (!error) {
8209 if (has_listeners) {
8210 kauth_authorize_fileop(vfs_context_ucred(ctx),
8211 KAUTH_FILEOP_DELETE,
8212 (uintptr_t)vp,
8213 (uintptr_t)path);
8214 }
8215
8216 if (vp->v_flag & VISHARDLINK) {
8217 // see the comment in unlink1() about why we update
8218 // the parent of a hard link when it is removed
8219 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
8220 }
8221
8222 #if CONFIG_FSE
8223 if (need_event) {
8224 if (vap) {
8225 vnode_get_fse_info_from_vap(vp, &finfo, vap);
8226 }
8227 add_fsevent(FSE_DELETE, ctx,
8228 FSE_ARG_STRING, len, path,
8229 FSE_ARG_FINFO, &finfo,
8230 FSE_ARG_DONE);
8231 }
8232 #endif
8233 }
8234
8235 out:
8236 if (path != NULL) {
8237 RELEASE_PATH(path);
8238 path = NULL;
8239 }
8240 /*
8241 * nameidone has to happen before we vnode_put(dvp)
8242 * since it may need to release the fs_nodelock on the dvp
8243 */
8244 nameidone(&nd);
8245 vnode_put(dvp);
8246
8247 if (vp)
8248 vnode_put(vp);
8249
8250 if (restart_flag == 0) {
8251 wakeup_one((caddr_t)vp);
8252 return (error);
8253 }
8254 tsleep(vp, PVFS, "rm AD", 1);
8255
8256 } while (restart_flag != 0);
8257
8258 return (error);
8259
8260 }
8261
8262 /*
8263 * Remove a directory file.
8264 */
8265 /* ARGSUSED */
8266 int
8267 rmdir(__unused proc_t p, struct rmdir_args *uap, __unused int32_t *retval)
8268 {
8269 return (rmdirat_internal(vfs_context_current(), AT_FDCWD,
8270 CAST_USER_ADDR_T(uap->path), UIO_USERSPACE));
8271 }
8272
8273 /* Get direntry length padded to 8 byte alignment */
8274 #define DIRENT64_LEN(namlen) \
8275 ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
8276
8277 /* Get dirent length padded to 4 byte alignment */
8278 #define DIRENT_LEN(namelen) \
8279 ((sizeof(struct dirent) + (namelen + 1) - (__DARWIN_MAXNAMLEN + 1) + 3) & ~3)
8280
8281 /* Get the end of this dirent */
8282 #define DIRENT_END(dep) \
8283 (((char *)(dep)) + (dep)->d_reclen - 1)
8284
8285 errno_t
8286 vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag,
8287 int *numdirent, vfs_context_t ctxp)
8288 {
8289 /* Check if fs natively supports VNODE_READDIR_EXTENDED */
8290 if ((vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED) &&
8291 ((vp->v_mount->mnt_kern_flag & MNTK_DENY_READDIREXT) == 0)) {
8292 return VNOP_READDIR(vp, uio, flags, eofflag, numdirent, ctxp);
8293 } else {
8294 size_t bufsize;
8295 void * bufptr;
8296 uio_t auio;
8297 struct direntry *entry64;
8298 struct dirent *dep;
8299 int bytesread;
8300 int error;
8301
8302 /*
8303 * We're here because the underlying file system does not
8304 * support direnties or we mounted denying support so we must
8305 * fall back to dirents and convert them to direntries.
8306 *
8307 * Our kernel buffer needs to be smaller since re-packing will
8308 * expand each dirent. The worse case (when the name length
8309 * is 3 or less) corresponds to a struct direntry size of 32
8310 * bytes (8-byte aligned) and a struct dirent size of 12 bytes
8311 * (4-byte aligned). So having a buffer that is 3/8 the size
8312 * will prevent us from reading more than we can pack.
8313 *
8314 * Since this buffer is wired memory, we will limit the
8315 * buffer size to a maximum of 32K. We would really like to
8316 * use 32K in the MIN(), but we use magic number 87371 to
8317 * prevent uio_resid() * 3 / 8 from overflowing.
8318 */
8319 bufsize = 3 * MIN((user_size_t)uio_resid(uio), 87371u) / 8;
8320 MALLOC(bufptr, void *, bufsize, M_TEMP, M_WAITOK);
8321 if (bufptr == NULL) {
8322 return ENOMEM;
8323 }
8324
8325 auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
8326 uio_addiov(auio, (uintptr_t)bufptr, bufsize);
8327 auio->uio_offset = uio->uio_offset;
8328
8329 error = VNOP_READDIR(vp, auio, 0, eofflag, numdirent, ctxp);
8330
8331 dep = (struct dirent *)bufptr;
8332 bytesread = bufsize - uio_resid(auio);
8333
8334 MALLOC(entry64, struct direntry *, sizeof(struct direntry),
8335 M_TEMP, M_WAITOK);
8336 /*
8337 * Convert all the entries and copy them out to user's buffer.
8338 */
8339 while (error == 0 && (char *)dep < ((char *)bufptr + bytesread)) {
8340 size_t enbufsize = DIRENT64_LEN(dep->d_namlen);
8341
8342 if (DIRENT_END(dep) > ((char *)bufptr + bytesread) ||
8343 DIRENT_LEN(dep->d_namlen) > dep->d_reclen) {
8344 printf("%s: %s: Bad dirent recived from directory %s\n", __func__,
8345 vp->v_mount->mnt_vfsstat.f_mntonname,
8346 vp->v_name ? vp->v_name : "<unknown>");
8347 error = EIO;
8348 break;
8349 }
8350
8351 bzero(entry64, enbufsize);
8352 /* Convert a dirent to a dirent64. */
8353 entry64->d_ino = dep->d_ino;
8354 entry64->d_seekoff = 0;
8355 entry64->d_reclen = enbufsize;
8356 entry64->d_namlen = dep->d_namlen;
8357 entry64->d_type = dep->d_type;
8358 bcopy(dep->d_name, entry64->d_name, dep->d_namlen + 1);
8359
8360 /* Move to next entry. */
8361 dep = (struct dirent *)((char *)dep + dep->d_reclen);
8362
8363 /* Copy entry64 to user's buffer. */
8364 error = uiomove((caddr_t)entry64, entry64->d_reclen, uio);
8365 }
8366
8367 /* Update the real offset using the offset we got from VNOP_READDIR. */
8368 if (error == 0) {
8369 uio->uio_offset = auio->uio_offset;
8370 }
8371 uio_free(auio);
8372 FREE(bufptr, M_TEMP);
8373 FREE(entry64, M_TEMP);
8374 return (error);
8375 }
8376 }
8377
8378 #define GETDIRENTRIES_MAXBUFSIZE (128 * 1024 * 1024U)
8379
8380 /*
8381 * Read a block of directory entries in a file system independent format.
8382 */
8383 static int
8384 getdirentries_common(int fd, user_addr_t bufp, user_size_t bufsize, ssize_t *bytesread,
8385 off_t *offset, int flags)
8386 {
8387 vnode_t vp;
8388 struct vfs_context context = *vfs_context_current(); /* local copy */
8389 struct fileproc *fp;
8390 uio_t auio;
8391 int spacetype = proc_is64bit(vfs_context_proc(&context)) ? UIO_USERSPACE64 : UIO_USERSPACE32;
8392 off_t loff;
8393 int error, eofflag, numdirent;
8394 char uio_buf[ UIO_SIZEOF(1) ];
8395
8396 error = fp_getfvp(vfs_context_proc(&context), fd, &fp, &vp);
8397 if (error) {
8398 return (error);
8399 }
8400 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
8401 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
8402 error = EBADF;
8403 goto out;
8404 }
8405
8406 if (bufsize > GETDIRENTRIES_MAXBUFSIZE)
8407 bufsize = GETDIRENTRIES_MAXBUFSIZE;
8408
8409 #if CONFIG_MACF
8410 error = mac_file_check_change_offset(vfs_context_ucred(&context), fp->f_fglob);
8411 if (error)
8412 goto out;
8413 #endif
8414 if ( (error = vnode_getwithref(vp)) ) {
8415 goto out;
8416 }
8417 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
8418
8419 unionread:
8420 if (vp->v_type != VDIR) {
8421 (void)vnode_put(vp);
8422 error = EINVAL;
8423 goto out;
8424 }
8425
8426 #if CONFIG_MACF
8427 error = mac_vnode_check_readdir(&context, vp);
8428 if (error != 0) {
8429 (void)vnode_put(vp);
8430 goto out;
8431 }
8432 #endif /* MAC */
8433
8434 loff = fp->f_fglob->fg_offset;
8435 auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
8436 uio_addiov(auio, bufp, bufsize);
8437
8438 if (flags & VNODE_READDIR_EXTENDED) {
8439 error = vnode_readdir64(vp, auio, flags, &eofflag, &numdirent, &context);
8440 fp->f_fglob->fg_offset = uio_offset(auio);
8441 } else {
8442 error = VNOP_READDIR(vp, auio, 0, &eofflag, &numdirent, &context);
8443 fp->f_fglob->fg_offset = uio_offset(auio);
8444 }
8445 if (error) {
8446 (void)vnode_put(vp);
8447 goto out;
8448 }
8449
8450 if ((user_ssize_t)bufsize == uio_resid(auio)){
8451 if (union_dircheckp) {
8452 error = union_dircheckp(&vp, fp, &context);
8453 if (error == -1)
8454 goto unionread;
8455 if (error) {
8456 (void)vnode_put(vp);
8457 goto out;
8458 }
8459 }
8460
8461 if ((vp->v_mount->mnt_flag & MNT_UNION)) {
8462 struct vnode *tvp = vp;
8463 if (lookup_traverse_union(tvp, &vp, &context) == 0) {
8464 vnode_ref(vp);
8465 fp->f_fglob->fg_data = (caddr_t) vp;
8466 fp->f_fglob->fg_offset = 0;
8467 vnode_rele(tvp);
8468 vnode_put(tvp);
8469 goto unionread;
8470 }
8471 vp = tvp;
8472 }
8473 }
8474
8475 vnode_put(vp);
8476 if (offset) {
8477 *offset = loff;
8478 }
8479
8480 *bytesread = bufsize - uio_resid(auio);
8481 out:
8482 file_drop(fd);
8483 return (error);
8484 }
8485
8486
8487 int
8488 getdirentries(__unused struct proc *p, struct getdirentries_args *uap, int32_t *retval)
8489 {
8490 off_t offset;
8491 ssize_t bytesread;
8492 int error;
8493
8494 AUDIT_ARG(fd, uap->fd);
8495 error = getdirentries_common(uap->fd, uap->buf, uap->count, &bytesread, &offset, 0);
8496
8497 if (error == 0) {
8498 if (proc_is64bit(p)) {
8499 user64_long_t base = (user64_long_t)offset;
8500 error = copyout((caddr_t)&base, uap->basep, sizeof(user64_long_t));
8501 } else {
8502 user32_long_t base = (user32_long_t)offset;
8503 error = copyout((caddr_t)&base, uap->basep, sizeof(user32_long_t));
8504 }
8505 *retval = bytesread;
8506 }
8507 return (error);
8508 }
8509
8510 int
8511 getdirentries64(__unused struct proc *p, struct getdirentries64_args *uap, user_ssize_t *retval)
8512 {
8513 off_t offset;
8514 ssize_t bytesread;
8515 int error;
8516
8517 AUDIT_ARG(fd, uap->fd);
8518 error = getdirentries_common(uap->fd, uap->buf, uap->bufsize, &bytesread, &offset, VNODE_READDIR_EXTENDED);
8519
8520 if (error == 0) {
8521 *retval = bytesread;
8522 error = copyout((caddr_t)&offset, uap->position, sizeof(off_t));
8523 }
8524 return (error);
8525 }
8526
8527
8528 /*
8529 * Set the mode mask for creation of filesystem nodes.
8530 * XXX implement xsecurity
8531 */
8532 #define UMASK_NOXSECURITY (void *)1 /* leave existing xsecurity alone */
8533 static int
8534 umask1(proc_t p, int newmask, __unused kauth_filesec_t fsec, int32_t *retval)
8535 {
8536 struct filedesc *fdp;
8537
8538 AUDIT_ARG(mask, newmask);
8539 proc_fdlock(p);
8540 fdp = p->p_fd;
8541 *retval = fdp->fd_cmask;
8542 fdp->fd_cmask = newmask & ALLPERMS;
8543 proc_fdunlock(p);
8544 return (0);
8545 }
8546
8547 /*
8548 * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL).
8549 *
8550 * Parameters: p Process requesting to set the umask
8551 * uap User argument descriptor (see below)
8552 * retval umask of the process (parameter p)
8553 *
8554 * Indirect: uap->newmask umask to set
8555 * uap->xsecurity ACL to set
8556 *
8557 * Returns: 0 Success
8558 * !0 Not success
8559 *
8560 */
8561 int
8562 umask_extended(proc_t p, struct umask_extended_args *uap, int32_t *retval)
8563 {
8564 int ciferror;
8565 kauth_filesec_t xsecdst;
8566
8567 xsecdst = KAUTH_FILESEC_NONE;
8568 if (uap->xsecurity != USER_ADDR_NULL) {
8569 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
8570 return ciferror;
8571 } else {
8572 xsecdst = KAUTH_FILESEC_NONE;
8573 }
8574
8575 ciferror = umask1(p, uap->newmask, xsecdst, retval);
8576
8577 if (xsecdst != KAUTH_FILESEC_NONE)
8578 kauth_filesec_free(xsecdst);
8579 return ciferror;
8580 }
8581
8582 int
8583 umask(proc_t p, struct umask_args *uap, int32_t *retval)
8584 {
8585 return(umask1(p, uap->newmask, UMASK_NOXSECURITY, retval));
8586 }
8587
8588 /*
8589 * Void all references to file by ripping underlying filesystem
8590 * away from vnode.
8591 */
8592 /* ARGSUSED */
8593 int
8594 revoke(proc_t p, struct revoke_args *uap, __unused int32_t *retval)
8595 {
8596 vnode_t vp;
8597 struct vnode_attr va;
8598 vfs_context_t ctx = vfs_context_current();
8599 int error;
8600 struct nameidata nd;
8601
8602 NDINIT(&nd, LOOKUP, OP_REVOKE, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
8603 uap->path, ctx);
8604 error = namei(&nd);
8605 if (error)
8606 return (error);
8607 vp = nd.ni_vp;
8608
8609 nameidone(&nd);
8610
8611 if (!(vnode_ischr(vp) || vnode_isblk(vp))) {
8612 error = ENOTSUP;
8613 goto out;
8614 }
8615
8616 if (vnode_isblk(vp) && vnode_ismountedon(vp)) {
8617 error = EBUSY;
8618 goto out;
8619 }
8620
8621 #if CONFIG_MACF
8622 error = mac_vnode_check_revoke(ctx, vp);
8623 if (error)
8624 goto out;
8625 #endif
8626
8627 VATTR_INIT(&va);
8628 VATTR_WANTED(&va, va_uid);
8629 if ((error = vnode_getattr(vp, &va, ctx)))
8630 goto out;
8631 if (kauth_cred_getuid(vfs_context_ucred(ctx)) != va.va_uid &&
8632 (error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
8633 goto out;
8634 if (vp->v_usecount > 0 || (vnode_isaliased(vp)))
8635 VNOP_REVOKE(vp, REVOKEALL, ctx);
8636 out:
8637 vnode_put(vp);
8638 return (error);
8639 }
8640
8641
8642 /*
8643 * HFS/HFS PlUS SPECIFIC SYSTEM CALLS
8644 * The following system calls are designed to support features
8645 * which are specific to the HFS & HFS Plus volume formats
8646 */
8647
8648
8649 /*
8650 * Obtain attribute information on objects in a directory while enumerating
8651 * the directory.
8652 */
8653 /* ARGSUSED */
8654 int
8655 getdirentriesattr (proc_t p, struct getdirentriesattr_args *uap, int32_t *retval)
8656 {
8657 vnode_t vp;
8658 struct fileproc *fp;
8659 uio_t auio = NULL;
8660 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
8661 uint32_t count, savecount;
8662 uint32_t newstate;
8663 int error, eofflag;
8664 uint32_t loff;
8665 struct attrlist attributelist;
8666 vfs_context_t ctx = vfs_context_current();
8667 int fd = uap->fd;
8668 char uio_buf[ UIO_SIZEOF(1) ];
8669 kauth_action_t action;
8670
8671 AUDIT_ARG(fd, fd);
8672
8673 /* Get the attributes into kernel space */
8674 if ((error = copyin(uap->alist, (caddr_t)&attributelist, sizeof(attributelist)))) {
8675 return(error);
8676 }
8677 if ((error = copyin(uap->count, (caddr_t)&count, sizeof(count)))) {
8678 return(error);
8679 }
8680 savecount = count;
8681 if ( (error = fp_getfvp(p, fd, &fp, &vp)) ) {
8682 return (error);
8683 }
8684 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
8685 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
8686 error = EBADF;
8687 goto out;
8688 }
8689
8690
8691 #if CONFIG_MACF
8692 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
8693 fp->f_fglob);
8694 if (error)
8695 goto out;
8696 #endif
8697
8698
8699 if ( (error = vnode_getwithref(vp)) )
8700 goto out;
8701
8702 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
8703
8704 unionread:
8705 if (vp->v_type != VDIR) {
8706 (void)vnode_put(vp);
8707 error = EINVAL;
8708 goto out;
8709 }
8710
8711 #if CONFIG_MACF
8712 error = mac_vnode_check_readdir(ctx, vp);
8713 if (error != 0) {
8714 (void)vnode_put(vp);
8715 goto out;
8716 }
8717 #endif /* MAC */
8718
8719 /* set up the uio structure which will contain the users return buffer */
8720 loff = fp->f_fglob->fg_offset;
8721 auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
8722 uio_addiov(auio, uap->buffer, uap->buffersize);
8723
8724 /*
8725 * If the only item requested is file names, we can let that past with
8726 * just LIST_DIRECTORY. If they want any other attributes, that means
8727 * they need SEARCH as well.
8728 */
8729 action = KAUTH_VNODE_LIST_DIRECTORY;
8730 if ((attributelist.commonattr & ~ATTR_CMN_NAME) ||
8731 attributelist.fileattr || attributelist.dirattr)
8732 action |= KAUTH_VNODE_SEARCH;
8733
8734 if ((error = vnode_authorize(vp, NULL, action, ctx)) == 0) {
8735
8736 /* Believe it or not, uap->options only has 32-bits of valid
8737 * info, so truncate before extending again */
8738
8739 error = VNOP_READDIRATTR(vp, &attributelist, auio, count,
8740 (u_long)(uint32_t)uap->options, &newstate, &eofflag, &count, ctx);
8741 }
8742
8743 if (error) {
8744 (void) vnode_put(vp);
8745 goto out;
8746 }
8747
8748 /*
8749 * If we've got the last entry of a directory in a union mount
8750 * then reset the eofflag and pretend there's still more to come.
8751 * The next call will again set eofflag and the buffer will be empty,
8752 * so traverse to the underlying directory and do the directory
8753 * read there.
8754 */
8755 if (eofflag && vp->v_mount->mnt_flag & MNT_UNION) {
8756 if (uio_resid(auio) < (user_ssize_t) uap->buffersize) { // Got some entries
8757 eofflag = 0;
8758 } else { // Empty buffer
8759 struct vnode *tvp = vp;
8760 if (lookup_traverse_union(tvp, &vp, ctx) == 0) {
8761 vnode_ref_ext(vp, fp->f_fglob->fg_flag & O_EVTONLY, 0);
8762 fp->f_fglob->fg_data = (caddr_t) vp;
8763 fp->f_fglob->fg_offset = 0; // reset index for new dir
8764 count = savecount;
8765 vnode_rele_internal(tvp, fp->f_fglob->fg_flag & O_EVTONLY, 0, 0);
8766 vnode_put(tvp);
8767 goto unionread;
8768 }
8769 vp = tvp;
8770 }
8771 }
8772
8773 (void)vnode_put(vp);
8774
8775 if (error)
8776 goto out;
8777 fp->f_fglob->fg_offset = uio_offset(auio); /* should be multiple of dirent, not variable */
8778
8779 if ((error = copyout((caddr_t) &count, uap->count, sizeof(count))))
8780 goto out;
8781 if ((error = copyout((caddr_t) &newstate, uap->newstate, sizeof(newstate))))
8782 goto out;
8783 if ((error = copyout((caddr_t) &loff, uap->basep, sizeof(loff))))
8784 goto out;
8785
8786 *retval = eofflag; /* similar to getdirentries */
8787 error = 0;
8788 out:
8789 file_drop(fd);
8790 return (error); /* return error earlier, an retval of 0 or 1 now */
8791
8792 } /* end of getdirentriesattr system call */
8793
8794 /*
8795 * Exchange data between two files
8796 */
8797
8798 /* ARGSUSED */
8799 int
8800 exchangedata (__unused proc_t p, struct exchangedata_args *uap, __unused int32_t *retval)
8801 {
8802
8803 struct nameidata fnd, snd;
8804 vfs_context_t ctx = vfs_context_current();
8805 vnode_t fvp;
8806 vnode_t svp;
8807 int error;
8808 u_int32_t nameiflags;
8809 char *fpath = NULL;
8810 char *spath = NULL;
8811 int flen=0, slen=0;
8812 int from_truncated=0, to_truncated=0;
8813 #if CONFIG_FSE
8814 fse_info f_finfo, s_finfo;
8815 #endif
8816
8817 nameiflags = 0;
8818 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
8819
8820 NDINIT(&fnd, LOOKUP, OP_EXCHANGEDATA, nameiflags | AUDITVNPATH1,
8821 UIO_USERSPACE, uap->path1, ctx);
8822
8823 error = namei(&fnd);
8824 if (error)
8825 goto out2;
8826
8827 nameidone(&fnd);
8828 fvp = fnd.ni_vp;
8829
8830 NDINIT(&snd, LOOKUP, OP_EXCHANGEDATA, CN_NBMOUNTLOOK | nameiflags | AUDITVNPATH2,
8831 UIO_USERSPACE, uap->path2, ctx);
8832
8833 error = namei(&snd);
8834 if (error) {
8835 vnode_put(fvp);
8836 goto out2;
8837 }
8838 nameidone(&snd);
8839 svp = snd.ni_vp;
8840
8841 /*
8842 * if the files are the same, return an inval error
8843 */
8844 if (svp == fvp) {
8845 error = EINVAL;
8846 goto out;
8847 }
8848
8849 /*
8850 * if the files are on different volumes, return an error
8851 */
8852 if (svp->v_mount != fvp->v_mount) {
8853 error = EXDEV;
8854 goto out;
8855 }
8856
8857 /* If they're not files, return an error */
8858 if ( (vnode_isreg(fvp) == 0) || (vnode_isreg(svp) == 0)) {
8859 error = EINVAL;
8860 goto out;
8861 }
8862
8863 #if CONFIG_MACF
8864 error = mac_vnode_check_exchangedata(ctx,
8865 fvp, svp);
8866 if (error)
8867 goto out;
8868 #endif
8869 if (((error = vnode_authorize(fvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0) ||
8870 ((error = vnode_authorize(svp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0))
8871 goto out;
8872
8873 if (
8874 #if CONFIG_FSE
8875 need_fsevent(FSE_EXCHANGE, fvp) ||
8876 #endif
8877 kauth_authorize_fileop_has_listeners()) {
8878 GET_PATH(fpath);
8879 GET_PATH(spath);
8880 if (fpath == NULL || spath == NULL) {
8881 error = ENOMEM;
8882 goto out;
8883 }
8884
8885 flen = safe_getpath(fvp, NULL, fpath, MAXPATHLEN, &from_truncated);
8886 slen = safe_getpath(svp, NULL, spath, MAXPATHLEN, &to_truncated);
8887
8888 #if CONFIG_FSE
8889 get_fse_info(fvp, &f_finfo, ctx);
8890 get_fse_info(svp, &s_finfo, ctx);
8891 if (from_truncated || to_truncated) {
8892 // set it here since only the f_finfo gets reported up to user space
8893 f_finfo.mode |= FSE_TRUNCATED_PATH;
8894 }
8895 #endif
8896 }
8897 /* Ok, make the call */
8898 error = VNOP_EXCHANGE(fvp, svp, 0, ctx);
8899
8900 if (error == 0) {
8901 const char *tmpname;
8902
8903 if (fpath != NULL && spath != NULL) {
8904 /* call out to allow 3rd party notification of exchangedata.
8905 * Ignore result of kauth_authorize_fileop call.
8906 */
8907 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_EXCHANGE,
8908 (uintptr_t)fpath, (uintptr_t)spath);
8909 }
8910 name_cache_lock();
8911
8912 tmpname = fvp->v_name;
8913 fvp->v_name = svp->v_name;
8914 svp->v_name = tmpname;
8915
8916 if (fvp->v_parent != svp->v_parent) {
8917 vnode_t tmp;
8918
8919 tmp = fvp->v_parent;
8920 fvp->v_parent = svp->v_parent;
8921 svp->v_parent = tmp;
8922 }
8923 name_cache_unlock();
8924
8925 #if CONFIG_FSE
8926 if (fpath != NULL && spath != NULL) {
8927 add_fsevent(FSE_EXCHANGE, ctx,
8928 FSE_ARG_STRING, flen, fpath,
8929 FSE_ARG_FINFO, &f_finfo,
8930 FSE_ARG_STRING, slen, spath,
8931 FSE_ARG_FINFO, &s_finfo,
8932 FSE_ARG_DONE);
8933 }
8934 #endif
8935 }
8936
8937 out:
8938 if (fpath != NULL)
8939 RELEASE_PATH(fpath);
8940 if (spath != NULL)
8941 RELEASE_PATH(spath);
8942 vnode_put(svp);
8943 vnode_put(fvp);
8944 out2:
8945 return (error);
8946 }
8947
8948 /*
8949 * Return (in MB) the amount of freespace on the given vnode's volume.
8950 */
8951 uint32_t freespace_mb(vnode_t vp);
8952
8953 uint32_t
8954 freespace_mb(vnode_t vp)
8955 {
8956 vfs_update_vfsstat(vp->v_mount, vfs_context_current(), VFS_USER_EVENT);
8957 return (((uint64_t)vp->v_mount->mnt_vfsstat.f_bavail *
8958 vp->v_mount->mnt_vfsstat.f_bsize) >> 20);
8959 }
8960
8961 #if CONFIG_SEARCHFS
8962
8963 /* ARGSUSED */
8964
8965 int
8966 searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval)
8967 {
8968 vnode_t vp, tvp;
8969 int i, error=0;
8970 int fserror = 0;
8971 struct nameidata nd;
8972 struct user64_fssearchblock searchblock;
8973 struct searchstate *state;
8974 struct attrlist *returnattrs;
8975 struct timeval timelimit;
8976 void *searchparams1,*searchparams2;
8977 uio_t auio = NULL;
8978 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
8979 uint32_t nummatches;
8980 int mallocsize;
8981 uint32_t nameiflags;
8982 vfs_context_t ctx = vfs_context_current();
8983 char uio_buf[ UIO_SIZEOF(1) ];
8984
8985 /* Start by copying in fsearchblock parameter list */
8986 if (IS_64BIT_PROCESS(p)) {
8987 error = copyin(uap->searchblock, (caddr_t) &searchblock, sizeof(searchblock));
8988 timelimit.tv_sec = searchblock.timelimit.tv_sec;
8989 timelimit.tv_usec = searchblock.timelimit.tv_usec;
8990 }
8991 else {
8992 struct user32_fssearchblock tmp_searchblock;
8993
8994 error = copyin(uap->searchblock, (caddr_t) &tmp_searchblock, sizeof(tmp_searchblock));
8995 // munge into 64-bit version
8996 searchblock.returnattrs = CAST_USER_ADDR_T(tmp_searchblock.returnattrs);
8997 searchblock.returnbuffer = CAST_USER_ADDR_T(tmp_searchblock.returnbuffer);
8998 searchblock.returnbuffersize = tmp_searchblock.returnbuffersize;
8999 searchblock.maxmatches = tmp_searchblock.maxmatches;
9000 /*
9001 * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary
9002 * from a 32 bit long, and tv_usec is already a signed 32 bit int.
9003 */
9004 timelimit.tv_sec = (__darwin_time_t) tmp_searchblock.timelimit.tv_sec;
9005 timelimit.tv_usec = (__darwin_useconds_t) tmp_searchblock.timelimit.tv_usec;
9006 searchblock.searchparams1 = CAST_USER_ADDR_T(tmp_searchblock.searchparams1);
9007 searchblock.sizeofsearchparams1 = tmp_searchblock.sizeofsearchparams1;
9008 searchblock.searchparams2 = CAST_USER_ADDR_T(tmp_searchblock.searchparams2);
9009 searchblock.sizeofsearchparams2 = tmp_searchblock.sizeofsearchparams2;
9010 searchblock.searchattrs = tmp_searchblock.searchattrs;
9011 }
9012 if (error)
9013 return(error);
9014
9015 /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
9016 */
9017 if (searchblock.sizeofsearchparams1 > SEARCHFS_MAX_SEARCHPARMS ||
9018 searchblock.sizeofsearchparams2 > SEARCHFS_MAX_SEARCHPARMS)
9019 return(EINVAL);
9020
9021 /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
9022 /* It all has to do into local memory and it's not that big so we might as well put it all together. */
9023 /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
9024 /* block. */
9025 /* */
9026 /* NOTE: we allocate an extra 8 bytes to account for the difference in size of the searchstate */
9027 /* due to the changes in rdar://problem/12438273. That way if a 3rd party file system */
9028 /* assumes the size is still 556 bytes it will continue to work */
9029
9030 mallocsize = searchblock.sizeofsearchparams1 + searchblock.sizeofsearchparams2 +
9031 sizeof(struct attrlist) + sizeof(struct searchstate) + (2*sizeof(uint32_t));
9032
9033 MALLOC(searchparams1, void *, mallocsize, M_TEMP, M_WAITOK);
9034
9035 /* Now set up the various pointers to the correct place in our newly allocated memory */
9036
9037 searchparams2 = (void *) (((caddr_t) searchparams1) + searchblock.sizeofsearchparams1);
9038 returnattrs = (struct attrlist *) (((caddr_t) searchparams2) + searchblock.sizeofsearchparams2);
9039 state = (struct searchstate *) (((caddr_t) returnattrs) + sizeof (struct attrlist));
9040
9041 /* Now copy in the stuff given our local variables. */
9042
9043 if ((error = copyin(searchblock.searchparams1, searchparams1, searchblock.sizeofsearchparams1)))
9044 goto freeandexit;
9045
9046 if ((error = copyin(searchblock.searchparams2, searchparams2, searchblock.sizeofsearchparams2)))
9047 goto freeandexit;
9048
9049 if ((error = copyin(searchblock.returnattrs, (caddr_t) returnattrs, sizeof(struct attrlist))))
9050 goto freeandexit;
9051
9052 if ((error = copyin(uap->state, (caddr_t) state, sizeof(struct searchstate))))
9053 goto freeandexit;
9054
9055 /*
9056 * When searching a union mount, need to set the
9057 * start flag at the first call on each layer to
9058 * reset state for the new volume.
9059 */
9060 if (uap->options & SRCHFS_START)
9061 state->ss_union_layer = 0;
9062 else
9063 uap->options |= state->ss_union_flags;
9064 state->ss_union_flags = 0;
9065
9066 /*
9067 * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter,
9068 * which is passed in with an attrreference_t, we need to inspect the buffer manually here.
9069 * The KPI does not provide us the ability to pass in the length of the buffers searchparams1
9070 * and searchparams2. To obviate the need for all searchfs-supporting filesystems to
9071 * validate the user-supplied data offset of the attrreference_t, we'll do it here.
9072 */
9073
9074 if (searchblock.searchattrs.commonattr & ATTR_CMN_NAME) {
9075 attrreference_t* string_ref;
9076 u_int32_t* start_length;
9077 user64_size_t param_length;
9078
9079 /* validate searchparams1 */
9080 param_length = searchblock.sizeofsearchparams1;
9081 /* skip the word that specifies length of the buffer */
9082 start_length= (u_int32_t*) searchparams1;
9083 start_length= start_length+1;
9084 string_ref= (attrreference_t*) start_length;
9085
9086 /* ensure no negative offsets or too big offsets */
9087 if (string_ref->attr_dataoffset < 0 ) {
9088 error = EINVAL;
9089 goto freeandexit;
9090 }
9091 if (string_ref->attr_length > MAXPATHLEN) {
9092 error = EINVAL;
9093 goto freeandexit;
9094 }
9095
9096 /* Check for pointer overflow in the string ref */
9097 if (((char*) string_ref + string_ref->attr_dataoffset) < (char*) string_ref) {
9098 error = EINVAL;
9099 goto freeandexit;
9100 }
9101
9102 if (((char*) string_ref + string_ref->attr_dataoffset) > ((char*)searchparams1 + param_length)) {
9103 error = EINVAL;
9104 goto freeandexit;
9105 }
9106 if (((char*)string_ref + string_ref->attr_dataoffset + string_ref->attr_length) > ((char*)searchparams1 + param_length)) {
9107 error = EINVAL;
9108 goto freeandexit;
9109 }
9110 }
9111
9112 /* set up the uio structure which will contain the users return buffer */
9113 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
9114 uio_addiov(auio, searchblock.returnbuffer, searchblock.returnbuffersize);
9115
9116 nameiflags = 0;
9117 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
9118 NDINIT(&nd, LOOKUP, OP_SEARCHFS, nameiflags | AUDITVNPATH1,
9119 UIO_USERSPACE, uap->path, ctx);
9120
9121 error = namei(&nd);
9122 if (error)
9123 goto freeandexit;
9124 vp = nd.ni_vp;
9125 nameidone(&nd);
9126
9127 /*
9128 * Switch to the root vnode for the volume
9129 */
9130 error = VFS_ROOT(vnode_mount(vp), &tvp, ctx);
9131 vnode_put(vp);
9132 if (error)
9133 goto freeandexit;
9134 vp = tvp;
9135
9136 /*
9137 * If it's a union mount, the path lookup takes
9138 * us to the top layer. But we may need to descend
9139 * to a lower layer. For non-union mounts the layer
9140 * is always zero.
9141 */
9142 for (i = 0; i < (int) state->ss_union_layer; i++) {
9143 if ((vp->v_mount->mnt_flag & MNT_UNION) == 0)
9144 break;
9145 tvp = vp;
9146 vp = vp->v_mount->mnt_vnodecovered;
9147 if (vp == NULL) {
9148 vnode_put(tvp);
9149 error = ENOENT;
9150 goto freeandexit;
9151 }
9152 error = vnode_getwithref(vp);
9153 vnode_put(tvp);
9154 if (error)
9155 goto freeandexit;
9156 }
9157
9158 #if CONFIG_MACF
9159 error = mac_vnode_check_searchfs(ctx, vp, &searchblock.searchattrs);
9160 if (error) {
9161 vnode_put(vp);
9162 goto freeandexit;
9163 }
9164 #endif
9165
9166
9167 /*
9168 * If searchblock.maxmatches == 0, then skip the search. This has happened
9169 * before and sometimes the underlying code doesnt deal with it well.
9170 */
9171 if (searchblock.maxmatches == 0) {
9172 nummatches = 0;
9173 goto saveandexit;
9174 }
9175
9176 /*
9177 * Allright, we have everything we need, so lets make that call.
9178 *
9179 * We keep special track of the return value from the file system:
9180 * EAGAIN is an acceptable error condition that shouldn't keep us
9181 * from copying out any results...
9182 */
9183
9184 fserror = VNOP_SEARCHFS(vp,
9185 searchparams1,
9186 searchparams2,
9187 &searchblock.searchattrs,
9188 (u_long)searchblock.maxmatches,
9189 &timelimit,
9190 returnattrs,
9191 &nummatches,
9192 (u_long)uap->scriptcode,
9193 (u_long)uap->options,
9194 auio,
9195 (struct searchstate *) &state->ss_fsstate,
9196 ctx);
9197
9198 /*
9199 * If it's a union mount we need to be called again
9200 * to search the mounted-on filesystem.
9201 */
9202 if ((vp->v_mount->mnt_flag & MNT_UNION) && fserror == 0) {
9203 state->ss_union_flags = SRCHFS_START;
9204 state->ss_union_layer++; // search next layer down
9205 fserror = EAGAIN;
9206 }
9207
9208 saveandexit:
9209
9210 vnode_put(vp);
9211
9212 /* Now copy out the stuff that needs copying out. That means the number of matches, the
9213 search state. Everything was already put into he return buffer by the vop call. */
9214
9215 if ((error = copyout((caddr_t) state, uap->state, sizeof(struct searchstate))) != 0)
9216 goto freeandexit;
9217
9218 if ((error = suulong(uap->nummatches, (uint64_t)nummatches)) != 0)
9219 goto freeandexit;
9220
9221 error = fserror;
9222
9223 freeandexit:
9224
9225 FREE(searchparams1,M_TEMP);
9226
9227 return(error);
9228
9229
9230 } /* end of searchfs system call */
9231
9232 #else /* CONFIG_SEARCHFS */
9233
9234 int
9235 searchfs(__unused proc_t p, __unused struct searchfs_args *uap, __unused int32_t *retval)
9236 {
9237 return (ENOTSUP);
9238 }
9239
9240 #endif /* CONFIG_SEARCHFS */
9241
9242
9243 lck_grp_attr_t * nspace_group_attr;
9244 lck_attr_t * nspace_lock_attr;
9245 lck_grp_t * nspace_mutex_group;
9246
9247 lck_mtx_t nspace_handler_lock;
9248 lck_mtx_t nspace_handler_exclusion_lock;
9249
9250 time_t snapshot_timestamp=0;
9251 int nspace_allow_virtual_devs=0;
9252
9253 void nspace_handler_init(void);
9254
9255 typedef struct nspace_item_info {
9256 struct vnode *vp;
9257 void *arg;
9258 uint64_t op;
9259 uint32_t vid;
9260 uint32_t flags;
9261 uint32_t token;
9262 uint32_t refcount;
9263 } nspace_item_info;
9264
9265 #define MAX_NSPACE_ITEMS 128
9266 nspace_item_info nspace_items[MAX_NSPACE_ITEMS];
9267 uint32_t nspace_item_idx=0; // also used as the sleep/wakeup rendezvous address
9268 uint32_t nspace_token_id=0;
9269 uint32_t nspace_handler_timeout = 15; // seconds
9270
9271 #define NSPACE_ITEM_NEW 0x0001
9272 #define NSPACE_ITEM_PROCESSING 0x0002
9273 #define NSPACE_ITEM_DEAD 0x0004
9274 #define NSPACE_ITEM_CANCELLED 0x0008
9275 #define NSPACE_ITEM_DONE 0x0010
9276 #define NSPACE_ITEM_RESET_TIMER 0x0020
9277
9278 #define NSPACE_ITEM_NSPACE_EVENT 0x0040
9279 #define NSPACE_ITEM_SNAPSHOT_EVENT 0x0080
9280
9281 #define NSPACE_ITEM_ALL_EVENT_TYPES (NSPACE_ITEM_NSPACE_EVENT | NSPACE_ITEM_SNAPSHOT_EVENT)
9282
9283 //#pragma optimization_level 0
9284
9285 typedef enum {
9286 NSPACE_HANDLER_NSPACE = 0,
9287 NSPACE_HANDLER_SNAPSHOT = 1,
9288
9289 NSPACE_HANDLER_COUNT,
9290 } nspace_type_t;
9291
9292 typedef struct {
9293 uint64_t handler_tid;
9294 struct proc *handler_proc;
9295 int handler_busy;
9296 } nspace_handler_t;
9297
9298 nspace_handler_t nspace_handlers[NSPACE_HANDLER_COUNT];
9299
9300 /* namespace fsctl functions */
9301 static int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type);
9302 static int nspace_item_flags_for_type(nspace_type_t nspace_type);
9303 static int nspace_open_flags_for_type(nspace_type_t nspace_type);
9304 static nspace_type_t nspace_type_for_op(uint64_t op);
9305 static int nspace_is_special_process(struct proc *proc);
9306 static int vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx);
9307 static int wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type);
9308 static int validate_namespace_args (int is64bit, int size);
9309 static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data);
9310
9311
9312 static inline int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type)
9313 {
9314 switch(nspace_type) {
9315 case NSPACE_HANDLER_NSPACE:
9316 return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_NSPACE_EVENT;
9317 case NSPACE_HANDLER_SNAPSHOT:
9318 return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_SNAPSHOT_EVENT;
9319 default:
9320 printf("nspace_flags_matches_handler: invalid type %u\n", (int)nspace_type);
9321 return 0;
9322 }
9323 }
9324
9325 static inline int nspace_item_flags_for_type(nspace_type_t nspace_type)
9326 {
9327 switch(nspace_type) {
9328 case NSPACE_HANDLER_NSPACE:
9329 return NSPACE_ITEM_NSPACE_EVENT;
9330 case NSPACE_HANDLER_SNAPSHOT:
9331 return NSPACE_ITEM_SNAPSHOT_EVENT;
9332 default:
9333 printf("nspace_item_flags_for_type: invalid type %u\n", (int)nspace_type);
9334 return 0;
9335 }
9336 }
9337
9338 static inline int nspace_open_flags_for_type(nspace_type_t nspace_type)
9339 {
9340 switch(nspace_type) {
9341 case NSPACE_HANDLER_NSPACE:
9342 return FREAD | FWRITE | O_EVTONLY;
9343 case NSPACE_HANDLER_SNAPSHOT:
9344 return FREAD | O_EVTONLY;
9345 default:
9346 printf("nspace_open_flags_for_type: invalid type %u\n", (int)nspace_type);
9347 return 0;
9348 }
9349 }
9350
9351 static inline nspace_type_t nspace_type_for_op(uint64_t op)
9352 {
9353 switch(op & NAMESPACE_HANDLER_EVENT_TYPE_MASK) {
9354 case NAMESPACE_HANDLER_NSPACE_EVENT:
9355 return NSPACE_HANDLER_NSPACE;
9356 case NAMESPACE_HANDLER_SNAPSHOT_EVENT:
9357 return NSPACE_HANDLER_SNAPSHOT;
9358 default:
9359 printf("nspace_type_for_op: invalid op mask %llx\n", op & NAMESPACE_HANDLER_EVENT_TYPE_MASK);
9360 return NSPACE_HANDLER_NSPACE;
9361 }
9362 }
9363
9364 static inline int nspace_is_special_process(struct proc *proc)
9365 {
9366 int i;
9367 for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
9368 if (proc == nspace_handlers[i].handler_proc)
9369 return 1;
9370 }
9371 return 0;
9372 }
9373
9374 void
9375 nspace_handler_init(void)
9376 {
9377 nspace_lock_attr = lck_attr_alloc_init();
9378 nspace_group_attr = lck_grp_attr_alloc_init();
9379 nspace_mutex_group = lck_grp_alloc_init("nspace-mutex", nspace_group_attr);
9380 lck_mtx_init(&nspace_handler_lock, nspace_mutex_group, nspace_lock_attr);
9381 lck_mtx_init(&nspace_handler_exclusion_lock, nspace_mutex_group, nspace_lock_attr);
9382 memset(&nspace_items[0], 0, sizeof(nspace_items));
9383 }
9384
9385 void
9386 nspace_proc_exit(struct proc *p)
9387 {
9388 int i, event_mask = 0;
9389
9390 for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
9391 if (p == nspace_handlers[i].handler_proc) {
9392 event_mask |= nspace_item_flags_for_type(i);
9393 nspace_handlers[i].handler_tid = 0;
9394 nspace_handlers[i].handler_proc = NULL;
9395 }
9396 }
9397
9398 if (event_mask == 0) {
9399 return;
9400 }
9401
9402 lck_mtx_lock(&nspace_handler_lock);
9403 if (event_mask & NSPACE_ITEM_SNAPSHOT_EVENT) {
9404 // if this process was the snapshot handler, zero snapshot_timeout
9405 snapshot_timestamp = 0;
9406 }
9407
9408 //
9409 // unblock anyone that's waiting for the handler that died
9410 //
9411 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9412 if (nspace_items[i].flags & (NSPACE_ITEM_NEW | NSPACE_ITEM_PROCESSING)) {
9413
9414 if ( nspace_items[i].flags & event_mask ) {
9415
9416 if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
9417 vnode_lock_spin(nspace_items[i].vp);
9418 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
9419 vnode_unlock(nspace_items[i].vp);
9420 }
9421 nspace_items[i].vp = NULL;
9422 nspace_items[i].vid = 0;
9423 nspace_items[i].flags = NSPACE_ITEM_DONE;
9424 nspace_items[i].token = 0;
9425
9426 wakeup((caddr_t)&(nspace_items[i].vp));
9427 }
9428 }
9429 }
9430
9431 wakeup((caddr_t)&nspace_item_idx);
9432 lck_mtx_unlock(&nspace_handler_lock);
9433 }
9434
9435
9436 int
9437 resolve_nspace_item(struct vnode *vp, uint64_t op)
9438 {
9439 return resolve_nspace_item_ext(vp, op, NULL);
9440 }
9441
9442 int
9443 resolve_nspace_item_ext(struct vnode *vp, uint64_t op, void *arg)
9444 {
9445 int i, error, keep_waiting;
9446 struct timespec ts;
9447 nspace_type_t nspace_type = nspace_type_for_op(op);
9448
9449 // only allow namespace events on regular files, directories and symlinks.
9450 if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
9451 return 0;
9452 }
9453
9454 //
9455 // if this is a snapshot event and the vnode is on a
9456 // disk image just pretend nothing happened since any
9457 // change to the disk image will cause the disk image
9458 // itself to get backed up and this avoids multi-way
9459 // deadlocks between the snapshot handler and the ever
9460 // popular diskimages-helper process. the variable
9461 // nspace_allow_virtual_devs allows this behavior to
9462 // be overridden (for use by the Mobile TimeMachine
9463 // testing infrastructure which uses disk images)
9464 //
9465 if ( (op & NAMESPACE_HANDLER_SNAPSHOT_EVENT)
9466 && (vp->v_mount != NULL)
9467 && (vp->v_mount->mnt_kern_flag & MNTK_VIRTUALDEV)
9468 && !nspace_allow_virtual_devs) {
9469
9470 return 0;
9471 }
9472
9473 // if (thread_tid(current_thread()) == namespace_handler_tid) {
9474 if (nspace_handlers[nspace_type].handler_proc == NULL) {
9475 return 0;
9476 }
9477
9478 if (nspace_is_special_process(current_proc())) {
9479 return EDEADLK;
9480 }
9481
9482 lck_mtx_lock(&nspace_handler_lock);
9483
9484 retry:
9485 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9486 if (vp == nspace_items[i].vp && op == nspace_items[i].op) {
9487 break;
9488 }
9489 }
9490
9491 if (i >= MAX_NSPACE_ITEMS) {
9492 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9493 if (nspace_items[i].flags == 0) {
9494 break;
9495 }
9496 }
9497 } else {
9498 nspace_items[i].refcount++;
9499 }
9500
9501 if (i >= MAX_NSPACE_ITEMS) {
9502 ts.tv_sec = nspace_handler_timeout;
9503 ts.tv_nsec = 0;
9504
9505 error = msleep((caddr_t)&nspace_token_id, &nspace_handler_lock, PVFS|PCATCH, "nspace-no-space", &ts);
9506 if (error == 0) {
9507 // an entry got free'd up, go see if we can get a slot
9508 goto retry;
9509 } else {
9510 lck_mtx_unlock(&nspace_handler_lock);
9511 return error;
9512 }
9513 }
9514
9515 //
9516 // if it didn't already exist, add it. if it did exist
9517 // we'll get woken up when someone does a wakeup() on
9518 // the slot in the nspace_items table.
9519 //
9520 if (vp != nspace_items[i].vp) {
9521 nspace_items[i].vp = vp;
9522 nspace_items[i].arg = (arg == NSPACE_REARM_NO_ARG) ? NULL : arg; // arg is {NULL, true, uio *} - only pass uio thru to the user
9523 nspace_items[i].op = op;
9524 nspace_items[i].vid = vnode_vid(vp);
9525 nspace_items[i].flags = NSPACE_ITEM_NEW;
9526 nspace_items[i].flags |= nspace_item_flags_for_type(nspace_type);
9527 if (nspace_items[i].flags & NSPACE_ITEM_SNAPSHOT_EVENT) {
9528 if (arg) {
9529 vnode_lock_spin(vp);
9530 vp->v_flag |= VNEEDSSNAPSHOT;
9531 vnode_unlock(vp);
9532 }
9533 }
9534
9535 nspace_items[i].token = 0;
9536 nspace_items[i].refcount = 1;
9537
9538 wakeup((caddr_t)&nspace_item_idx);
9539 }
9540
9541 //
9542 // Now go to sleep until the handler does a wakeup on this
9543 // slot in the nspace_items table (or we timeout).
9544 //
9545 keep_waiting = 1;
9546 while(keep_waiting) {
9547 ts.tv_sec = nspace_handler_timeout;
9548 ts.tv_nsec = 0;
9549 error = msleep((caddr_t)&(nspace_items[i].vp), &nspace_handler_lock, PVFS|PCATCH, "namespace-done", &ts);
9550
9551 if (nspace_items[i].flags & NSPACE_ITEM_DONE) {
9552 error = 0;
9553 } else if (nspace_items[i].flags & NSPACE_ITEM_CANCELLED) {
9554 error = nspace_items[i].token;
9555 } else if (error == EWOULDBLOCK || error == ETIMEDOUT) {
9556 if (nspace_items[i].flags & NSPACE_ITEM_RESET_TIMER) {
9557 nspace_items[i].flags &= ~NSPACE_ITEM_RESET_TIMER;
9558 continue;
9559 } else {
9560 error = ETIMEDOUT;
9561 }
9562 } else if (error == 0) {
9563 // hmmm, why did we get woken up?
9564 printf("woken up for token %d but it's not done, cancelled or timedout and error == 0.\n",
9565 nspace_items[i].token);
9566 }
9567
9568 if (--nspace_items[i].refcount == 0) {
9569 nspace_items[i].vp = NULL; // clear this so that no one will match on it again
9570 nspace_items[i].arg = NULL;
9571 nspace_items[i].token = 0; // clear this so that the handler will not find it anymore
9572 nspace_items[i].flags = 0; // this clears it for re-use
9573 }
9574 wakeup(&nspace_token_id);
9575 keep_waiting = 0;
9576 }
9577
9578 lck_mtx_unlock(&nspace_handler_lock);
9579
9580 return error;
9581 }
9582
9583 int nspace_snapshot_event(vnode_t vp, time_t ctime, uint64_t op_type, void *arg)
9584 {
9585 int snapshot_error = 0;
9586
9587 if (vp == NULL) {
9588 return 0;
9589 }
9590
9591 /* Swap files are special; skip them */
9592 if (vnode_isswap(vp)) {
9593 return 0;
9594 }
9595
9596 if (ctime != 0 && snapshot_timestamp != 0 && (ctime <= snapshot_timestamp || vnode_needssnapshots(vp))) {
9597 // the change time is within this epoch
9598 int error;
9599
9600 error = resolve_nspace_item_ext(vp, op_type | NAMESPACE_HANDLER_SNAPSHOT_EVENT, arg);
9601 if (error == EDEADLK) {
9602 snapshot_error = 0;
9603 } else if (error) {
9604 if (error == EAGAIN) {
9605 printf("nspace_snapshot_event: timed out waiting for namespace handler...\n");
9606 } else if (error == EINTR) {
9607 // printf("nspace_snapshot_event: got a signal while waiting for namespace handler...\n");
9608 snapshot_error = EINTR;
9609 }
9610 }
9611 }
9612
9613 return snapshot_error;
9614 }
9615
9616 int
9617 get_nspace_item_status(struct vnode *vp, int32_t *status)
9618 {
9619 int i;
9620
9621 lck_mtx_lock(&nspace_handler_lock);
9622 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9623 if (nspace_items[i].vp == vp) {
9624 break;
9625 }
9626 }
9627
9628 if (i >= MAX_NSPACE_ITEMS) {
9629 lck_mtx_unlock(&nspace_handler_lock);
9630 return ENOENT;
9631 }
9632
9633 *status = nspace_items[i].flags;
9634 lck_mtx_unlock(&nspace_handler_lock);
9635 return 0;
9636 }
9637
9638
9639 #if 0
9640 static int
9641 build_volfs_path(struct vnode *vp, char *path, int *len)
9642 {
9643 struct vnode_attr va;
9644 int ret;
9645
9646 VATTR_INIT(&va);
9647 VATTR_WANTED(&va, va_fsid);
9648 VATTR_WANTED(&va, va_fileid);
9649
9650 if (vnode_getattr(vp, &va, vfs_context_kernel()) != 0) {
9651 *len = snprintf(path, *len, "/non/existent/path/because/vnode_getattr/failed") + 1;
9652 ret = -1;
9653 } else {
9654 *len = snprintf(path, *len, "/.vol/%d/%lld", (dev_t)va.va_fsid, va.va_fileid) + 1;
9655 ret = 0;
9656 }
9657
9658 return ret;
9659 }
9660 #endif
9661
9662 //
9663 // Note: this function does NOT check permissions on all of the
9664 // parent directories leading to this vnode. It should only be
9665 // called on behalf of a root process. Otherwise a process may
9666 // get access to a file because the file itself is readable even
9667 // though its parent directories would prevent access.
9668 //
9669 static int
9670 vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx)
9671 {
9672 int error, action;
9673
9674 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
9675 return error;
9676 }
9677
9678 #if CONFIG_MACF
9679 error = mac_vnode_check_open(ctx, vp, fmode);
9680 if (error)
9681 return error;
9682 #endif
9683
9684 /* compute action to be authorized */
9685 action = 0;
9686 if (fmode & FREAD) {
9687 action |= KAUTH_VNODE_READ_DATA;
9688 }
9689 if (fmode & (FWRITE | O_TRUNC)) {
9690 /*
9691 * If we are writing, appending, and not truncating,
9692 * indicate that we are appending so that if the
9693 * UF_APPEND or SF_APPEND bits are set, we do not deny
9694 * the open.
9695 */
9696 if ((fmode & O_APPEND) && !(fmode & O_TRUNC)) {
9697 action |= KAUTH_VNODE_APPEND_DATA;
9698 } else {
9699 action |= KAUTH_VNODE_WRITE_DATA;
9700 }
9701 }
9702
9703 if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)
9704 return error;
9705
9706
9707 //
9708 // if the vnode is tagged VOPENEVT and the current process
9709 // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY
9710 // flag to the open mode so that this open won't count against
9711 // the vnode when carbon delete() does a vnode_isinuse() to see
9712 // if a file is currently in use. this allows spotlight
9713 // importers to not interfere with carbon apps that depend on
9714 // the no-delete-if-busy semantics of carbon delete().
9715 //
9716 if ((vp->v_flag & VOPENEVT) && (current_proc()->p_flag & P_CHECKOPENEVT)) {
9717 fmode |= O_EVTONLY;
9718 }
9719
9720 if ( (error = VNOP_OPEN(vp, fmode, ctx)) ) {
9721 return error;
9722 }
9723 if ( (error = vnode_ref_ext(vp, fmode, 0)) ) {
9724 VNOP_CLOSE(vp, fmode, ctx);
9725 return error;
9726 }
9727
9728 /* Call out to allow 3rd party notification of open.
9729 * Ignore result of kauth_authorize_fileop call.
9730 */
9731 #if CONFIG_MACF
9732 mac_vnode_notify_open(ctx, vp, fmode);
9733 #endif
9734 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN,
9735 (uintptr_t)vp, 0);
9736
9737
9738 return 0;
9739 }
9740
9741 static int
9742 wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type)
9743 {
9744 int i;
9745 int error = 0;
9746 int unblock = 0;
9747 task_t curtask;
9748
9749 lck_mtx_lock(&nspace_handler_exclusion_lock);
9750 if (nspace_handlers[nspace_type].handler_busy) {
9751 lck_mtx_unlock(&nspace_handler_exclusion_lock);
9752 return EBUSY;
9753 }
9754
9755 nspace_handlers[nspace_type].handler_busy = 1;
9756 lck_mtx_unlock(&nspace_handler_exclusion_lock);
9757
9758 /*
9759 * Any process that gets here will be one of the namespace handlers.
9760 * As such, they should be prevented from acquiring DMG vnodes during vnode reclamation
9761 * as we can cause deadlocks to occur, because the namespace handler may prevent
9762 * VNOP_INACTIVE from proceeding. Mark the current task as a P_DEPENDENCY_CAPABLE
9763 * process.
9764 */
9765 curtask = current_task();
9766 bsd_set_dependency_capable (curtask);
9767
9768 lck_mtx_lock(&nspace_handler_lock);
9769 if (nspace_handlers[nspace_type].handler_proc == NULL) {
9770 nspace_handlers[nspace_type].handler_tid = thread_tid(current_thread());
9771 nspace_handlers[nspace_type].handler_proc = current_proc();
9772 }
9773
9774 if (nspace_type == NSPACE_HANDLER_SNAPSHOT &&
9775 (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
9776 error = EINVAL;
9777 }
9778
9779 while (error == 0) {
9780
9781 /* Try to find matching namespace item */
9782 for (i = 0; i < MAX_NSPACE_ITEMS; i++) {
9783 if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
9784 if (nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
9785 break;
9786 }
9787 }
9788 }
9789
9790 if (i >= MAX_NSPACE_ITEMS) {
9791 /* Nothing is there yet. Wait for wake up and retry */
9792 error = msleep((caddr_t)&nspace_item_idx, &nspace_handler_lock, PVFS|PCATCH, "namespace-items", 0);
9793 if ((nspace_type == NSPACE_HANDLER_SNAPSHOT) && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
9794 /* Prevent infinite loop if snapshot handler exited */
9795 error = EINVAL;
9796 break;
9797 }
9798 continue;
9799 }
9800
9801 nspace_items[i].flags &= ~NSPACE_ITEM_NEW;
9802 nspace_items[i].flags |= NSPACE_ITEM_PROCESSING;
9803 nspace_items[i].token = ++nspace_token_id;
9804
9805 assert(nspace_items[i].vp);
9806 struct fileproc *fp;
9807 int32_t indx;
9808 int32_t fmode;
9809 struct proc *p = current_proc();
9810 vfs_context_t ctx = vfs_context_current();
9811 struct vnode_attr va;
9812 bool vn_get_succsessful = false;
9813 bool vn_open_successful = false;
9814 bool fp_alloc_successful = false;
9815
9816 /*
9817 * Use vnode pointer to acquire a file descriptor for
9818 * hand-off to userland
9819 */
9820 fmode = nspace_open_flags_for_type(nspace_type);
9821 error = vnode_getwithvid(nspace_items[i].vp, nspace_items[i].vid);
9822 if (error) goto cleanup;
9823 vn_get_succsessful = true;
9824
9825 error = vn_open_with_vp(nspace_items[i].vp, fmode, ctx);
9826 if (error) goto cleanup;
9827 vn_open_successful = true;
9828
9829 error = falloc(p, &fp, &indx, ctx);
9830 if (error) goto cleanup;
9831 fp_alloc_successful = true;
9832
9833 fp->f_fglob->fg_flag = fmode;
9834 fp->f_fglob->fg_ops = &vnops;
9835 fp->f_fglob->fg_data = (caddr_t)nspace_items[i].vp;
9836
9837 proc_fdlock(p);
9838 procfdtbl_releasefd(p, indx, NULL);
9839 fp_drop(p, indx, fp, 1);
9840 proc_fdunlock(p);
9841
9842 /*
9843 * All variants of the namespace handler struct support these three fields:
9844 * token, flags, and the FD pointer
9845 */
9846 error = copyout(&nspace_items[i].token, nhd->token, sizeof(uint32_t));
9847 if (error) goto cleanup;
9848 error = copyout(&nspace_items[i].op, nhd->flags, sizeof(uint64_t));
9849 if (error) goto cleanup;
9850 error = copyout(&indx, nhd->fdptr, sizeof(uint32_t));
9851 if (error) goto cleanup;
9852
9853 /*
9854 * Handle optional fields:
9855 * extended version support an info ptr (offset, length), and the
9856 *
9857 * namedata version supports a unique per-link object ID
9858 *
9859 */
9860 if (nhd->infoptr) {
9861 uio_t uio = (uio_t)nspace_items[i].arg;
9862 uint64_t u_offset, u_length;
9863
9864 if (uio) {
9865 u_offset = uio_offset(uio);
9866 u_length = uio_resid(uio);
9867 } else {
9868 u_offset = 0;
9869 u_length = 0;
9870 }
9871 error = copyout(&u_offset, nhd->infoptr, sizeof(uint64_t));
9872 if (error) goto cleanup;
9873 error = copyout(&u_length, nhd->infoptr + sizeof(uint64_t), sizeof(uint64_t));
9874 if (error) goto cleanup;
9875 }
9876
9877 if (nhd->objid) {
9878 VATTR_INIT(&va);
9879 VATTR_WANTED(&va, va_linkid);
9880 error = vnode_getattr(nspace_items[i].vp, &va, ctx);
9881 if (error) goto cleanup;
9882
9883 uint64_t linkid = 0;
9884 if (VATTR_IS_SUPPORTED (&va, va_linkid)) {
9885 linkid = (uint64_t)va.va_linkid;
9886 }
9887 error = copyout(&linkid, nhd->objid, sizeof(uint64_t));
9888 }
9889 cleanup:
9890 if (error) {
9891 if (fp_alloc_successful) fp_free(p, indx, fp);
9892 if (vn_open_successful) vn_close(nspace_items[i].vp, fmode, ctx);
9893 unblock = 1;
9894 }
9895
9896 if (vn_get_succsessful) vnode_put(nspace_items[i].vp);
9897
9898 break;
9899 }
9900
9901 if (unblock) {
9902 if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
9903 vnode_lock_spin(nspace_items[i].vp);
9904 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
9905 vnode_unlock(nspace_items[i].vp);
9906 }
9907 nspace_items[i].vp = NULL;
9908 nspace_items[i].vid = 0;
9909 nspace_items[i].flags = NSPACE_ITEM_DONE;
9910 nspace_items[i].token = 0;
9911
9912 wakeup((caddr_t)&(nspace_items[i].vp));
9913 }
9914
9915 if (nspace_type == NSPACE_HANDLER_SNAPSHOT) {
9916 // just go through every snapshot event and unblock it immediately.
9917 if (error && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
9918 for(i = 0; i < MAX_NSPACE_ITEMS; i++) {
9919 if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
9920 if (nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
9921 nspace_items[i].vp = NULL;
9922 nspace_items[i].vid = 0;
9923 nspace_items[i].flags = NSPACE_ITEM_DONE;
9924 nspace_items[i].token = 0;
9925
9926 wakeup((caddr_t)&(nspace_items[i].vp));
9927 }
9928 }
9929 }
9930 }
9931 }
9932
9933 lck_mtx_unlock(&nspace_handler_lock);
9934
9935 lck_mtx_lock(&nspace_handler_exclusion_lock);
9936 nspace_handlers[nspace_type].handler_busy = 0;
9937 lck_mtx_unlock(&nspace_handler_exclusion_lock);
9938
9939 return error;
9940 }
9941
9942 static inline int validate_namespace_args (int is64bit, int size) {
9943
9944 if (is64bit) {
9945 /* Must be one of these */
9946 if (size == sizeof(user64_namespace_handler_info)) {
9947 goto sizeok;
9948 }
9949 if (size == sizeof(user64_namespace_handler_info_ext)) {
9950 goto sizeok;
9951 }
9952 if (size == sizeof(user64_namespace_handler_data)) {
9953 goto sizeok;
9954 }
9955 return EINVAL;
9956 }
9957 else {
9958 /* 32 bit -- must be one of these */
9959 if (size == sizeof(user32_namespace_handler_info)) {
9960 goto sizeok;
9961 }
9962 if (size == sizeof(user32_namespace_handler_info_ext)) {
9963 goto sizeok;
9964 }
9965 if (size == sizeof(user32_namespace_handler_data)) {
9966 goto sizeok;
9967 }
9968 return EINVAL;
9969 }
9970
9971 sizeok:
9972
9973 return 0;
9974
9975 }
9976
9977 static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data)
9978 {
9979 int error = 0;
9980 namespace_handler_data nhd;
9981
9982 bzero (&nhd, sizeof(namespace_handler_data));
9983
9984 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
9985 return error;
9986 }
9987
9988 error = validate_namespace_args (is64bit, size);
9989 if (error) {
9990 return error;
9991 }
9992
9993 /* Copy in the userland pointers into our kernel-only struct */
9994
9995 if (is64bit) {
9996 /* 64 bit userland structures */
9997 nhd.token = (user_addr_t)((user64_namespace_handler_info *)data)->token;
9998 nhd.flags = (user_addr_t)((user64_namespace_handler_info *)data)->flags;
9999 nhd.fdptr = (user_addr_t)((user64_namespace_handler_info *)data)->fdptr;
10000
10001 /* If the size is greater than the standard info struct, add in extra fields */
10002 if (size > (sizeof(user64_namespace_handler_info))) {
10003 if (size >= (sizeof(user64_namespace_handler_info_ext))) {
10004 nhd.infoptr = (user_addr_t)((user64_namespace_handler_info_ext *)data)->infoptr;
10005 }
10006 if (size == (sizeof(user64_namespace_handler_data))) {
10007 nhd.objid = (user_addr_t)((user64_namespace_handler_data*)data)->objid;
10008 }
10009 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
10010 }
10011 }
10012 else {
10013 /* 32 bit userland structures */
10014 nhd.token = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->token);
10015 nhd.flags = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->flags);
10016 nhd.fdptr = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->fdptr);
10017
10018 if (size > (sizeof(user32_namespace_handler_info))) {
10019 if (size >= (sizeof(user32_namespace_handler_info_ext))) {
10020 nhd.infoptr = CAST_USER_ADDR_T(((user32_namespace_handler_info_ext *)data)->infoptr);
10021 }
10022 if (size == (sizeof(user32_namespace_handler_data))) {
10023 nhd.objid = (user_addr_t)((user32_namespace_handler_data*)data)->objid;
10024 }
10025 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
10026 }
10027 }
10028
10029 return wait_for_namespace_event(&nhd, nspace_type);
10030 }
10031
10032 static unsigned long
10033 fsctl_bogus_command_compat(unsigned long cmd)
10034 {
10035
10036 switch (cmd) {
10037 case IOCBASECMD(FSIOC_SYNC_VOLUME):
10038 return (FSIOC_SYNC_VOLUME);
10039 case IOCBASECMD(FSIOC_ROUTEFS_SETROUTEID):
10040 return (FSIOC_ROUTEFS_SETROUTEID);
10041 case IOCBASECMD(FSIOC_SET_PACKAGE_EXTS):
10042 return (FSIOC_SET_PACKAGE_EXTS);
10043 case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_GET):
10044 return (FSIOC_NAMESPACE_HANDLER_GET);
10045 case IOCBASECMD(FSIOC_OLD_SNAPSHOT_HANDLER_GET):
10046 return (FSIOC_OLD_SNAPSHOT_HANDLER_GET);
10047 case IOCBASECMD(FSIOC_SNAPSHOT_HANDLER_GET_EXT):
10048 return (FSIOC_SNAPSHOT_HANDLER_GET_EXT);
10049 case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_UPDATE):
10050 return (FSIOC_NAMESPACE_HANDLER_UPDATE);
10051 case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_UNBLOCK):
10052 return (FSIOC_NAMESPACE_HANDLER_UNBLOCK);
10053 case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_CANCEL):
10054 return (FSIOC_NAMESPACE_HANDLER_CANCEL);
10055 case IOCBASECMD(FSIOC_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME):
10056 return (FSIOC_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME);
10057 case IOCBASECMD(FSIOC_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS):
10058 return (FSIOC_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS);
10059 case IOCBASECMD(FSIOC_SET_FSTYPENAME_OVERRIDE):
10060 return (FSIOC_SET_FSTYPENAME_OVERRIDE);
10061 case IOCBASECMD(DISK_CONDITIONER_IOC_GET):
10062 return (DISK_CONDITIONER_IOC_GET);
10063 case IOCBASECMD(DISK_CONDITIONER_IOC_SET):
10064 return (DISK_CONDITIONER_IOC_SET);
10065 case IOCBASECMD(FSIOC_FIOSEEKHOLE):
10066 return (FSIOC_FIOSEEKHOLE);
10067 case IOCBASECMD(FSIOC_FIOSEEKDATA):
10068 return (FSIOC_FIOSEEKDATA);
10069 case IOCBASECMD(SPOTLIGHT_IOC_GET_MOUNT_TIME):
10070 return (SPOTLIGHT_IOC_GET_MOUNT_TIME);
10071 case IOCBASECMD(SPOTLIGHT_IOC_GET_LAST_MTIME):
10072 return (SPOTLIGHT_IOC_GET_LAST_MTIME);
10073 }
10074
10075 return (cmd);
10076 }
10077
10078 /*
10079 * Make a filesystem-specific control call:
10080 */
10081 /* ARGSUSED */
10082 static int
10083 fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long options, vfs_context_t ctx)
10084 {
10085 int error=0;
10086 boolean_t is64bit;
10087 u_int size;
10088 #define STK_PARAMS 128
10089 char stkbuf[STK_PARAMS] = {0};
10090 caddr_t data, memp;
10091 vnode_t vp = *arg_vp;
10092
10093 cmd = fsctl_bogus_command_compat(cmd);
10094
10095 size = IOCPARM_LEN(cmd);
10096 if (size > IOCPARM_MAX) return (EINVAL);
10097
10098 is64bit = proc_is64bit(p);
10099
10100 memp = NULL;
10101
10102 if (size > sizeof (stkbuf)) {
10103 if ((memp = (caddr_t)kalloc(size)) == 0) return ENOMEM;
10104 data = memp;
10105 } else {
10106 data = &stkbuf[0];
10107 };
10108
10109 if (cmd & IOC_IN) {
10110 if (size) {
10111 error = copyin(udata, data, size);
10112 if (error) {
10113 if (memp) {
10114 kfree (memp, size);
10115 }
10116 return error;
10117 }
10118 } else {
10119 if (is64bit) {
10120 *(user_addr_t *)data = udata;
10121 }
10122 else {
10123 *(uint32_t *)data = (uint32_t)udata;
10124 }
10125 };
10126 } else if ((cmd & IOC_OUT) && size) {
10127 /*
10128 * Zero the buffer so the user always
10129 * gets back something deterministic.
10130 */
10131 bzero(data, size);
10132 } else if (cmd & IOC_VOID) {
10133 if (is64bit) {
10134 *(user_addr_t *)data = udata;
10135 }
10136 else {
10137 *(uint32_t *)data = (uint32_t)udata;
10138 }
10139 }
10140
10141 /* Check to see if it's a generic command */
10142 switch (cmd) {
10143
10144 case FSIOC_SYNC_VOLUME: {
10145 mount_t mp = vp->v_mount;
10146 int arg = *(uint32_t*)data;
10147
10148 /* record vid of vp so we can drop it below. */
10149 uint32_t vvid = vp->v_id;
10150
10151 /*
10152 * Then grab mount_iterref so that we can release the vnode.
10153 * Without this, a thread may call vnode_iterate_prepare then
10154 * get into a deadlock because we've never released the root vp
10155 */
10156 error = mount_iterref (mp, 0);
10157 if (error) {
10158 break;
10159 }
10160 vnode_put(vp);
10161
10162 /* issue the sync for this volume */
10163 (void)sync_callback(mp, (arg & FSCTL_SYNC_WAIT) ? &arg : NULL);
10164
10165 /*
10166 * Then release the mount_iterref once we're done syncing; it's not
10167 * needed for the VNOP_IOCTL below
10168 */
10169 mount_iterdrop(mp);
10170
10171 if (arg & FSCTL_SYNC_FULLSYNC) {
10172 /* re-obtain vnode iocount on the root vp, if possible */
10173 error = vnode_getwithvid (vp, vvid);
10174 if (error == 0) {
10175 error = VNOP_IOCTL(vp, F_FULLFSYNC, (caddr_t)NULL, 0, ctx);
10176 vnode_put (vp);
10177 }
10178 }
10179 /* mark the argument VP as having been released */
10180 *arg_vp = NULL;
10181 }
10182 break;
10183
10184 case FSIOC_ROUTEFS_SETROUTEID: {
10185 #if ROUTEFS
10186 char routepath[MAXPATHLEN];
10187 size_t len = 0;
10188
10189 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
10190 break;
10191 }
10192 bzero(routepath, MAXPATHLEN);
10193 error = copyinstr(udata, &routepath[0], MAXPATHLEN, &len);
10194 if (error) {
10195 break;
10196 }
10197 error = routefs_kernel_mount(routepath);
10198 if (error) {
10199 break;
10200 }
10201 #endif
10202 }
10203 break;
10204
10205 case FSIOC_SET_PACKAGE_EXTS: {
10206 user_addr_t ext_strings;
10207 uint32_t num_entries;
10208 uint32_t max_width;
10209
10210 if ((error = priv_check_cred(kauth_cred_get(), PRIV_PACKAGE_EXTENSIONS, 0)))
10211 break;
10212
10213 if ( (is64bit && size != sizeof(user64_package_ext_info))
10214 || (is64bit == 0 && size != sizeof(user32_package_ext_info))) {
10215
10216 // either you're 64-bit and passed a 64-bit struct or
10217 // you're 32-bit and passed a 32-bit struct. otherwise
10218 // it's not ok.
10219 error = EINVAL;
10220 break;
10221 }
10222
10223 if (is64bit) {
10224 ext_strings = ((user64_package_ext_info *)data)->strings;
10225 num_entries = ((user64_package_ext_info *)data)->num_entries;
10226 max_width = ((user64_package_ext_info *)data)->max_width;
10227 } else {
10228 ext_strings = CAST_USER_ADDR_T(((user32_package_ext_info *)data)->strings);
10229 num_entries = ((user32_package_ext_info *)data)->num_entries;
10230 max_width = ((user32_package_ext_info *)data)->max_width;
10231 }
10232 error = set_package_extensions_table(ext_strings, num_entries, max_width);
10233 }
10234 break;
10235
10236 /* namespace handlers */
10237 case FSIOC_NAMESPACE_HANDLER_GET: {
10238 error = process_namespace_fsctl(NSPACE_HANDLER_NSPACE, is64bit, size, data);
10239 }
10240 break;
10241
10242 /* Snapshot handlers */
10243 case FSIOC_OLD_SNAPSHOT_HANDLER_GET: {
10244 error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
10245 }
10246 break;
10247
10248 case FSIOC_SNAPSHOT_HANDLER_GET_EXT: {
10249 error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
10250 }
10251 break;
10252
10253 case FSIOC_NAMESPACE_HANDLER_UPDATE: {
10254 uint32_t token, val;
10255 int i;
10256
10257 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
10258 break;
10259 }
10260
10261 if (!nspace_is_special_process(p)) {
10262 error = EINVAL;
10263 break;
10264 }
10265
10266 token = ((uint32_t *)data)[0];
10267 val = ((uint32_t *)data)[1];
10268
10269 lck_mtx_lock(&nspace_handler_lock);
10270
10271 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
10272 if (nspace_items[i].token == token) {
10273 break; /* exit for loop, not case stmt */
10274 }
10275 }
10276
10277 if (i >= MAX_NSPACE_ITEMS) {
10278 error = ENOENT;
10279 } else {
10280 //
10281 // if this bit is set, when resolve_nspace_item() times out
10282 // it will loop and go back to sleep.
10283 //
10284 nspace_items[i].flags |= NSPACE_ITEM_RESET_TIMER;
10285 }
10286
10287 lck_mtx_unlock(&nspace_handler_lock);
10288
10289 if (error) {
10290 printf("nspace-handler-update: did not find token %u\n", token);
10291 }
10292 }
10293 break;
10294
10295 case FSIOC_NAMESPACE_HANDLER_UNBLOCK: {
10296 uint32_t token, val;
10297 int i;
10298
10299 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
10300 break;
10301 }
10302
10303 if (!nspace_is_special_process(p)) {
10304 error = EINVAL;
10305 break;
10306 }
10307
10308 token = ((uint32_t *)data)[0];
10309 val = ((uint32_t *)data)[1];
10310
10311 lck_mtx_lock(&nspace_handler_lock);
10312
10313 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
10314 if (nspace_items[i].token == token) {
10315 break; /* exit for loop, not case statement */
10316 }
10317 }
10318
10319 if (i >= MAX_NSPACE_ITEMS) {
10320 printf("nspace-handler-unblock: did not find token %u\n", token);
10321 error = ENOENT;
10322 } else {
10323 if (val == 0 && nspace_items[i].vp) {
10324 vnode_lock_spin(nspace_items[i].vp);
10325 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
10326 vnode_unlock(nspace_items[i].vp);
10327 }
10328
10329 nspace_items[i].vp = NULL;
10330 nspace_items[i].arg = NULL;
10331 nspace_items[i].op = 0;
10332 nspace_items[i].vid = 0;
10333 nspace_items[i].flags = NSPACE_ITEM_DONE;
10334 nspace_items[i].token = 0;
10335
10336 wakeup((caddr_t)&(nspace_items[i].vp));
10337 }
10338
10339 lck_mtx_unlock(&nspace_handler_lock);
10340 }
10341 break;
10342
10343 case FSIOC_NAMESPACE_HANDLER_CANCEL: {
10344 uint32_t token, val;
10345 int i;
10346
10347 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
10348 break;
10349 }
10350
10351 if (!nspace_is_special_process(p)) {
10352 error = EINVAL;
10353 break;
10354 }
10355
10356 token = ((uint32_t *)data)[0];
10357 val = ((uint32_t *)data)[1];
10358
10359 lck_mtx_lock(&nspace_handler_lock);
10360
10361 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
10362 if (nspace_items[i].token == token) {
10363 break; /* exit for loop, not case stmt */
10364 }
10365 }
10366
10367 if (i >= MAX_NSPACE_ITEMS) {
10368 printf("nspace-handler-cancel: did not find token %u\n", token);
10369 error = ENOENT;
10370 } else {
10371 if (nspace_items[i].vp) {
10372 vnode_lock_spin(nspace_items[i].vp);
10373 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
10374 vnode_unlock(nspace_items[i].vp);
10375 }
10376
10377 nspace_items[i].vp = NULL;
10378 nspace_items[i].arg = NULL;
10379 nspace_items[i].vid = 0;
10380 nspace_items[i].token = val;
10381 nspace_items[i].flags &= ~NSPACE_ITEM_PROCESSING;
10382 nspace_items[i].flags |= NSPACE_ITEM_CANCELLED;
10383
10384 wakeup((caddr_t)&(nspace_items[i].vp));
10385 }
10386
10387 lck_mtx_unlock(&nspace_handler_lock);
10388 }
10389 break;
10390
10391 case FSIOC_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME: {
10392 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
10393 break;
10394 }
10395
10396 // we explicitly do not do the namespace_handler_proc check here
10397
10398 lck_mtx_lock(&nspace_handler_lock);
10399 snapshot_timestamp = ((uint32_t *)data)[0];
10400 wakeup(&nspace_item_idx);
10401 lck_mtx_unlock(&nspace_handler_lock);
10402 printf("nspace-handler-set-snapshot-time: %d\n", (int)snapshot_timestamp);
10403
10404 }
10405 break;
10406
10407 case FSIOC_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS:
10408 {
10409 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
10410 break;
10411 }
10412
10413 lck_mtx_lock(&nspace_handler_lock);
10414 nspace_allow_virtual_devs = ((uint32_t *)data)[0];
10415 lck_mtx_unlock(&nspace_handler_lock);
10416 printf("nspace-snapshot-handler will%s allow events on disk-images\n",
10417 nspace_allow_virtual_devs ? "" : " NOT");
10418 error = 0;
10419
10420 }
10421 break;
10422
10423 case FSIOC_SET_FSTYPENAME_OVERRIDE:
10424 {
10425 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
10426 break;
10427 }
10428 if (vp->v_mount) {
10429 mount_lock(vp->v_mount);
10430 if (data[0] != 0) {
10431 strlcpy(&vp->v_mount->fstypename_override[0], data, MFSTYPENAMELEN);
10432 vp->v_mount->mnt_kern_flag |= MNTK_TYPENAME_OVERRIDE;
10433 if (vfs_isrdonly(vp->v_mount) && strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
10434 vp->v_mount->mnt_kern_flag |= MNTK_EXTENDED_SECURITY;
10435 vp->v_mount->mnt_kern_flag &= ~MNTK_AUTH_OPAQUE;
10436 }
10437 } else {
10438 if (strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
10439 vp->v_mount->mnt_kern_flag &= ~MNTK_EXTENDED_SECURITY;
10440 }
10441 vp->v_mount->mnt_kern_flag &= ~MNTK_TYPENAME_OVERRIDE;
10442 vp->v_mount->fstypename_override[0] = '\0';
10443 }
10444 mount_unlock(vp->v_mount);
10445 }
10446 }
10447 break;
10448
10449 case DISK_CONDITIONER_IOC_GET: {
10450 error = disk_conditioner_get_info(vp->v_mount, (disk_conditioner_info *)data);
10451 }
10452 break;
10453
10454 case DISK_CONDITIONER_IOC_SET: {
10455 error = disk_conditioner_set_info(vp->v_mount, (disk_conditioner_info *)data);
10456 }
10457 break;
10458
10459 default: {
10460 /* other, known commands shouldn't be passed down here */
10461 switch (cmd) {
10462 case F_PUNCHHOLE:
10463 case F_TRIM_ACTIVE_FILE:
10464 case F_RDADVISE:
10465 case F_TRANSCODEKEY:
10466 case F_GETPROTECTIONLEVEL:
10467 case F_GETDEFAULTPROTLEVEL:
10468 case F_MAKECOMPRESSED:
10469 case F_SET_GREEDY_MODE:
10470 case F_SETSTATICCONTENT:
10471 case F_SETIOTYPE:
10472 case F_SETBACKINGSTORE:
10473 case F_GETPATH_MTMINFO:
10474 case APFSIOC_REVERT_TO_SNAPSHOT:
10475 case FSIOC_FIOSEEKHOLE:
10476 case FSIOC_FIOSEEKDATA:
10477 case HFS_GET_BOOT_INFO:
10478 case HFS_SET_BOOT_INFO:
10479 case FIOPINSWAP:
10480 case F_CHKCLEAN:
10481 case F_FULLFSYNC:
10482 case F_BARRIERFSYNC:
10483 case F_FREEZE_FS:
10484 case F_THAW_FS:
10485 error = EINVAL;
10486 goto outdrop;
10487 }
10488 /* Invoke the filesystem-specific code */
10489 error = VNOP_IOCTL(vp, cmd, data, options, ctx);
10490 }
10491
10492 } /* end switch stmt */
10493
10494 /*
10495 * if no errors, copy any data to user. Size was
10496 * already set and checked above.
10497 */
10498 if (error == 0 && (cmd & IOC_OUT) && size)
10499 error = copyout(data, udata, size);
10500
10501 outdrop:
10502 if (memp) {
10503 kfree(memp, size);
10504 }
10505
10506 return error;
10507 }
10508
10509 /* ARGSUSED */
10510 int
10511 fsctl (proc_t p, struct fsctl_args *uap, __unused int32_t *retval)
10512 {
10513 int error;
10514 struct nameidata nd;
10515 u_long nameiflags;
10516 vnode_t vp = NULL;
10517 vfs_context_t ctx = vfs_context_current();
10518
10519 AUDIT_ARG(cmd, uap->cmd);
10520 AUDIT_ARG(value32, uap->options);
10521 /* Get the vnode for the file we are getting info on: */
10522 nameiflags = 0;
10523 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
10524 NDINIT(&nd, LOOKUP, OP_FSCTL, nameiflags | AUDITVNPATH1,
10525 UIO_USERSPACE, uap->path, ctx);
10526 if ((error = namei(&nd))) goto done;
10527 vp = nd.ni_vp;
10528 nameidone(&nd);
10529
10530 #if CONFIG_MACF
10531 error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd);
10532 if (error) {
10533 goto done;
10534 }
10535 #endif
10536
10537 error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
10538
10539 done:
10540 if (vp)
10541 vnode_put(vp);
10542 return error;
10543 }
10544 /* ARGSUSED */
10545 int
10546 ffsctl (proc_t p, struct ffsctl_args *uap, __unused int32_t *retval)
10547 {
10548 int error;
10549 vnode_t vp = NULL;
10550 vfs_context_t ctx = vfs_context_current();
10551 int fd = -1;
10552
10553 AUDIT_ARG(fd, uap->fd);
10554 AUDIT_ARG(cmd, uap->cmd);
10555 AUDIT_ARG(value32, uap->options);
10556
10557 /* Get the vnode for the file we are getting info on: */
10558 if ((error = file_vnode(uap->fd, &vp)))
10559 return error;
10560 fd = uap->fd;
10561 if ((error = vnode_getwithref(vp))) {
10562 file_drop(fd);
10563 return error;
10564 }
10565
10566 #if CONFIG_MACF
10567 if ((error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd))) {
10568 file_drop(fd);
10569 vnode_put(vp);
10570 return error;
10571 }
10572 #endif
10573
10574 error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
10575
10576 file_drop(fd);
10577
10578 /*validate vp; fsctl_internal() can drop iocount and reset vp to NULL*/
10579 if (vp) {
10580 vnode_put(vp);
10581 }
10582
10583 return error;
10584 }
10585 /* end of fsctl system call */
10586
10587 /*
10588 * Retrieve the data of an extended attribute.
10589 */
10590 int
10591 getxattr(proc_t p, struct getxattr_args *uap, user_ssize_t *retval)
10592 {
10593 vnode_t vp;
10594 struct nameidata nd;
10595 char attrname[XATTR_MAXNAMELEN+1];
10596 vfs_context_t ctx = vfs_context_current();
10597 uio_t auio = NULL;
10598 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10599 size_t attrsize = 0;
10600 size_t namelen;
10601 u_int32_t nameiflags;
10602 int error;
10603 char uio_buf[ UIO_SIZEOF(1) ];
10604
10605 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
10606 return (EINVAL);
10607
10608 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
10609 NDINIT(&nd, LOOKUP, OP_GETXATTR, nameiflags, spacetype, uap->path, ctx);
10610 if ((error = namei(&nd))) {
10611 return (error);
10612 }
10613 vp = nd.ni_vp;
10614 nameidone(&nd);
10615
10616 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
10617 goto out;
10618 }
10619 if (xattr_protected(attrname)) {
10620 if (!vfs_context_issuser(ctx) || strcmp(attrname, "com.apple.system.Security") != 0) {
10621 error = EPERM;
10622 goto out;
10623 }
10624 }
10625 /*
10626 * the specific check for 0xffffffff is a hack to preserve
10627 * binaray compatibilty in K64 with applications that discovered
10628 * that passing in a buf pointer and a size of -1 resulted in
10629 * just the size of the indicated extended attribute being returned.
10630 * this isn't part of the documented behavior, but because of the
10631 * original implemtation's check for "uap->size > 0", this behavior
10632 * was allowed. In K32 that check turned into a signed comparison
10633 * even though uap->size is unsigned... in K64, we blow by that
10634 * check because uap->size is unsigned and doesn't get sign smeared
10635 * in the munger for a 32 bit user app. we also need to add a
10636 * check to limit the maximum size of the buffer being passed in...
10637 * unfortunately, the underlying fileystems seem to just malloc
10638 * the requested size even if the actual extended attribute is tiny.
10639 * because that malloc is for kernel wired memory, we have to put a
10640 * sane limit on it.
10641 *
10642 * U32 running on K64 will yield 0x00000000ffffffff for uap->size
10643 * U64 running on K64 will yield -1 (64 bits wide)
10644 * U32/U64 running on K32 will yield -1 (32 bits wide)
10645 */
10646 if (uap->size == 0xffffffff || uap->size == (size_t)-1)
10647 goto no_uio;
10648
10649 if (uap->value) {
10650 if (uap->size > (size_t)XATTR_MAXSIZE)
10651 uap->size = XATTR_MAXSIZE;
10652
10653 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
10654 &uio_buf[0], sizeof(uio_buf));
10655 uio_addiov(auio, uap->value, uap->size);
10656 }
10657 no_uio:
10658 error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, ctx);
10659 out:
10660 vnode_put(vp);
10661
10662 if (auio) {
10663 *retval = uap->size - uio_resid(auio);
10664 } else {
10665 *retval = (user_ssize_t)attrsize;
10666 }
10667
10668 return (error);
10669 }
10670
10671 /*
10672 * Retrieve the data of an extended attribute.
10673 */
10674 int
10675 fgetxattr(proc_t p, struct fgetxattr_args *uap, user_ssize_t *retval)
10676 {
10677 vnode_t vp;
10678 char attrname[XATTR_MAXNAMELEN+1];
10679 uio_t auio = NULL;
10680 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10681 size_t attrsize = 0;
10682 size_t namelen;
10683 int error;
10684 char uio_buf[ UIO_SIZEOF(1) ];
10685
10686 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
10687 return (EINVAL);
10688
10689 if ( (error = file_vnode(uap->fd, &vp)) ) {
10690 return (error);
10691 }
10692 if ( (error = vnode_getwithref(vp)) ) {
10693 file_drop(uap->fd);
10694 return(error);
10695 }
10696 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
10697 goto out;
10698 }
10699 if (xattr_protected(attrname)) {
10700 error = EPERM;
10701 goto out;
10702 }
10703 if (uap->value && uap->size > 0) {
10704 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
10705 &uio_buf[0], sizeof(uio_buf));
10706 uio_addiov(auio, uap->value, uap->size);
10707 }
10708
10709 error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, vfs_context_current());
10710 out:
10711 (void)vnode_put(vp);
10712 file_drop(uap->fd);
10713
10714 if (auio) {
10715 *retval = uap->size - uio_resid(auio);
10716 } else {
10717 *retval = (user_ssize_t)attrsize;
10718 }
10719 return (error);
10720 }
10721
10722 /*
10723 * Set the data of an extended attribute.
10724 */
10725 int
10726 setxattr(proc_t p, struct setxattr_args *uap, int *retval)
10727 {
10728 vnode_t vp;
10729 struct nameidata nd;
10730 char attrname[XATTR_MAXNAMELEN+1];
10731 vfs_context_t ctx = vfs_context_current();
10732 uio_t auio = NULL;
10733 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10734 size_t namelen;
10735 u_int32_t nameiflags;
10736 int error;
10737 char uio_buf[ UIO_SIZEOF(1) ];
10738
10739 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
10740 return (EINVAL);
10741
10742 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
10743 if (error == EPERM) {
10744 /* if the string won't fit in attrname, copyinstr emits EPERM */
10745 return (ENAMETOOLONG);
10746 }
10747 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
10748 return error;
10749 }
10750 if (xattr_protected(attrname))
10751 return(EPERM);
10752 if (uap->size != 0 && uap->value == 0) {
10753 return (EINVAL);
10754 }
10755
10756 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
10757 NDINIT(&nd, LOOKUP, OP_SETXATTR, nameiflags, spacetype, uap->path, ctx);
10758 if ((error = namei(&nd))) {
10759 return (error);
10760 }
10761 vp = nd.ni_vp;
10762 nameidone(&nd);
10763
10764 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
10765 &uio_buf[0], sizeof(uio_buf));
10766 uio_addiov(auio, uap->value, uap->size);
10767
10768 error = vn_setxattr(vp, attrname, auio, uap->options, ctx);
10769 #if CONFIG_FSE
10770 if (error == 0) {
10771 add_fsevent(FSE_XATTR_MODIFIED, ctx,
10772 FSE_ARG_VNODE, vp,
10773 FSE_ARG_DONE);
10774 }
10775 #endif
10776 vnode_put(vp);
10777 *retval = 0;
10778 return (error);
10779 }
10780
10781 /*
10782 * Set the data of an extended attribute.
10783 */
10784 int
10785 fsetxattr(proc_t p, struct fsetxattr_args *uap, int *retval)
10786 {
10787 vnode_t vp;
10788 char attrname[XATTR_MAXNAMELEN+1];
10789 uio_t auio = NULL;
10790 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10791 size_t namelen;
10792 int error;
10793 char uio_buf[ UIO_SIZEOF(1) ];
10794 #if CONFIG_FSE
10795 vfs_context_t ctx = vfs_context_current();
10796 #endif
10797
10798 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
10799 return (EINVAL);
10800
10801 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
10802 if (error == EPERM) {
10803 /* if the string won't fit in attrname, copyinstr emits EPERM */
10804 return (ENAMETOOLONG);
10805 }
10806 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
10807 return error;
10808 }
10809 if (xattr_protected(attrname))
10810 return(EPERM);
10811 if (uap->size != 0 && uap->value == 0) {
10812 return (EINVAL);
10813 }
10814 if ( (error = file_vnode(uap->fd, &vp)) ) {
10815 return (error);
10816 }
10817 if ( (error = vnode_getwithref(vp)) ) {
10818 file_drop(uap->fd);
10819 return(error);
10820 }
10821 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
10822 &uio_buf[0], sizeof(uio_buf));
10823 uio_addiov(auio, uap->value, uap->size);
10824
10825 error = vn_setxattr(vp, attrname, auio, uap->options, vfs_context_current());
10826 #if CONFIG_FSE
10827 if (error == 0) {
10828 add_fsevent(FSE_XATTR_MODIFIED, ctx,
10829 FSE_ARG_VNODE, vp,
10830 FSE_ARG_DONE);
10831 }
10832 #endif
10833 vnode_put(vp);
10834 file_drop(uap->fd);
10835 *retval = 0;
10836 return (error);
10837 }
10838
10839 /*
10840 * Remove an extended attribute.
10841 * XXX Code duplication here.
10842 */
10843 int
10844 removexattr(proc_t p, struct removexattr_args *uap, int *retval)
10845 {
10846 vnode_t vp;
10847 struct nameidata nd;
10848 char attrname[XATTR_MAXNAMELEN+1];
10849 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10850 vfs_context_t ctx = vfs_context_current();
10851 size_t namelen;
10852 u_int32_t nameiflags;
10853 int error;
10854
10855 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
10856 return (EINVAL);
10857
10858 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
10859 if (error != 0) {
10860 return (error);
10861 }
10862 if (xattr_protected(attrname))
10863 return(EPERM);
10864 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
10865 NDINIT(&nd, LOOKUP, OP_REMOVEXATTR, nameiflags, spacetype, uap->path, ctx);
10866 if ((error = namei(&nd))) {
10867 return (error);
10868 }
10869 vp = nd.ni_vp;
10870 nameidone(&nd);
10871
10872 error = vn_removexattr(vp, attrname, uap->options, ctx);
10873 #if CONFIG_FSE
10874 if (error == 0) {
10875 add_fsevent(FSE_XATTR_REMOVED, ctx,
10876 FSE_ARG_VNODE, vp,
10877 FSE_ARG_DONE);
10878 }
10879 #endif
10880 vnode_put(vp);
10881 *retval = 0;
10882 return (error);
10883 }
10884
10885 /*
10886 * Remove an extended attribute.
10887 * XXX Code duplication here.
10888 */
10889 int
10890 fremovexattr(__unused proc_t p, struct fremovexattr_args *uap, int *retval)
10891 {
10892 vnode_t vp;
10893 char attrname[XATTR_MAXNAMELEN+1];
10894 size_t namelen;
10895 int error;
10896 #if CONFIG_FSE
10897 vfs_context_t ctx = vfs_context_current();
10898 #endif
10899
10900 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
10901 return (EINVAL);
10902
10903 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
10904 if (error != 0) {
10905 return (error);
10906 }
10907 if (xattr_protected(attrname))
10908 return(EPERM);
10909 if ( (error = file_vnode(uap->fd, &vp)) ) {
10910 return (error);
10911 }
10912 if ( (error = vnode_getwithref(vp)) ) {
10913 file_drop(uap->fd);
10914 return(error);
10915 }
10916
10917 error = vn_removexattr(vp, attrname, uap->options, vfs_context_current());
10918 #if CONFIG_FSE
10919 if (error == 0) {
10920 add_fsevent(FSE_XATTR_REMOVED, ctx,
10921 FSE_ARG_VNODE, vp,
10922 FSE_ARG_DONE);
10923 }
10924 #endif
10925 vnode_put(vp);
10926 file_drop(uap->fd);
10927 *retval = 0;
10928 return (error);
10929 }
10930
10931 /*
10932 * Retrieve the list of extended attribute names.
10933 * XXX Code duplication here.
10934 */
10935 int
10936 listxattr(proc_t p, struct listxattr_args *uap, user_ssize_t *retval)
10937 {
10938 vnode_t vp;
10939 struct nameidata nd;
10940 vfs_context_t ctx = vfs_context_current();
10941 uio_t auio = NULL;
10942 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10943 size_t attrsize = 0;
10944 u_int32_t nameiflags;
10945 int error;
10946 char uio_buf[ UIO_SIZEOF(1) ];
10947
10948 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
10949 return (EINVAL);
10950
10951 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
10952 NDINIT(&nd, LOOKUP, OP_LISTXATTR, nameiflags, spacetype, uap->path, ctx);
10953 if ((error = namei(&nd))) {
10954 return (error);
10955 }
10956 vp = nd.ni_vp;
10957 nameidone(&nd);
10958 if (uap->namebuf != 0 && uap->bufsize > 0) {
10959 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
10960 &uio_buf[0], sizeof(uio_buf));
10961 uio_addiov(auio, uap->namebuf, uap->bufsize);
10962 }
10963
10964 error = vn_listxattr(vp, auio, &attrsize, uap->options, ctx);
10965
10966 vnode_put(vp);
10967 if (auio) {
10968 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
10969 } else {
10970 *retval = (user_ssize_t)attrsize;
10971 }
10972 return (error);
10973 }
10974
10975 /*
10976 * Retrieve the list of extended attribute names.
10977 * XXX Code duplication here.
10978 */
10979 int
10980 flistxattr(proc_t p, struct flistxattr_args *uap, user_ssize_t *retval)
10981 {
10982 vnode_t vp;
10983 uio_t auio = NULL;
10984 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10985 size_t attrsize = 0;
10986 int error;
10987 char uio_buf[ UIO_SIZEOF(1) ];
10988
10989 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
10990 return (EINVAL);
10991
10992 if ( (error = file_vnode(uap->fd, &vp)) ) {
10993 return (error);
10994 }
10995 if ( (error = vnode_getwithref(vp)) ) {
10996 file_drop(uap->fd);
10997 return(error);
10998 }
10999 if (uap->namebuf != 0 && uap->bufsize > 0) {
11000 auio = uio_createwithbuffer(1, 0, spacetype,
11001 UIO_READ, &uio_buf[0], sizeof(uio_buf));
11002 uio_addiov(auio, uap->namebuf, uap->bufsize);
11003 }
11004
11005 error = vn_listxattr(vp, auio, &attrsize, uap->options, vfs_context_current());
11006
11007 vnode_put(vp);
11008 file_drop(uap->fd);
11009 if (auio) {
11010 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
11011 } else {
11012 *retval = (user_ssize_t)attrsize;
11013 }
11014 return (error);
11015 }
11016
11017 static int fsgetpath_internal(
11018 vfs_context_t ctx, int volfs_id, uint64_t objid,
11019 vm_size_t bufsize, caddr_t buf, int *pathlen)
11020 {
11021 int error;
11022 struct mount *mp = NULL;
11023 vnode_t vp;
11024 int length;
11025 int bpflags;
11026 /* maximum number of times to retry build_path */
11027 unsigned int retries = 0x10;
11028
11029 if (bufsize > PAGE_SIZE) {
11030 return (EINVAL);
11031 }
11032
11033 if (buf == NULL) {
11034 return (ENOMEM);
11035 }
11036
11037 retry:
11038 if ((mp = mount_lookupby_volfsid(volfs_id, 1)) == NULL) {
11039 error = ENOTSUP; /* unexpected failure */
11040 return ENOTSUP;
11041 }
11042
11043 unionget:
11044 if (objid == 2) {
11045 error = VFS_ROOT(mp, &vp, ctx);
11046 } else {
11047 error = VFS_VGET(mp, (ino64_t)objid, &vp, ctx);
11048 }
11049
11050 if (error == ENOENT && (mp->mnt_flag & MNT_UNION)) {
11051 /*
11052 * If the fileid isn't found and we're in a union
11053 * mount volume, then see if the fileid is in the
11054 * mounted-on volume.
11055 */
11056 struct mount *tmp = mp;
11057 mp = vnode_mount(tmp->mnt_vnodecovered);
11058 vfs_unbusy(tmp);
11059 if (vfs_busy(mp, LK_NOWAIT) == 0)
11060 goto unionget;
11061 } else {
11062 vfs_unbusy(mp);
11063 }
11064
11065 if (error) {
11066 return error;
11067 }
11068
11069 #if CONFIG_MACF
11070 error = mac_vnode_check_fsgetpath(ctx, vp);
11071 if (error) {
11072 vnode_put(vp);
11073 return error;
11074 }
11075 #endif
11076
11077 /* Obtain the absolute path to this vnode. */
11078 bpflags = vfs_context_suser(ctx) ? BUILDPATH_CHECKACCESS : 0;
11079 bpflags |= BUILDPATH_CHECK_MOVED;
11080 error = build_path(vp, buf, bufsize, &length, bpflags, ctx);
11081 vnode_put(vp);
11082
11083 if (error) {
11084 /* there was a race building the path, try a few more times */
11085 if (error == EAGAIN) {
11086 --retries;
11087 if (retries > 0)
11088 goto retry;
11089
11090 error = ENOENT;
11091 }
11092 goto out;
11093 }
11094
11095 AUDIT_ARG(text, buf);
11096
11097 if (kdebug_enable) {
11098 long dbg_parms[NUMPARMS];
11099 int dbg_namelen;
11100
11101 dbg_namelen = (int)sizeof(dbg_parms);
11102
11103 if (length < dbg_namelen) {
11104 memcpy((char *)dbg_parms, buf, length);
11105 memset((char *)dbg_parms + length, 0, dbg_namelen - length);
11106
11107 dbg_namelen = length;
11108 } else {
11109 memcpy((char *)dbg_parms, buf + (length - dbg_namelen), dbg_namelen);
11110 }
11111
11112 kdebug_lookup_gen_events(dbg_parms, dbg_namelen, (void *)vp, TRUE);
11113 }
11114
11115 *pathlen = (user_ssize_t)length; /* may be superseded by error */
11116
11117 out:
11118 return (error);
11119 }
11120
11121 /*
11122 * Obtain the full pathname of a file system object by id.
11123 */
11124 int
11125 fsgetpath(__unused proc_t p, struct fsgetpath_args *uap, user_ssize_t *retval)
11126 {
11127 vfs_context_t ctx = vfs_context_current();
11128 fsid_t fsid;
11129 char *realpath;
11130 int length;
11131 int error;
11132
11133 if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
11134 return (error);
11135 }
11136 AUDIT_ARG(value32, fsid.val[0]);
11137 AUDIT_ARG(value64, uap->objid);
11138 /* Restrict output buffer size for now. */
11139
11140 if (uap->bufsize > PAGE_SIZE) {
11141 return (EINVAL);
11142 }
11143 MALLOC(realpath, char *, uap->bufsize, M_TEMP, M_WAITOK);
11144 if (realpath == NULL) {
11145 return (ENOMEM);
11146 }
11147
11148 error = fsgetpath_internal(
11149 ctx, fsid.val[0], uap->objid,
11150 uap->bufsize, realpath, &length);
11151
11152 if (error) {
11153 goto out;
11154 }
11155
11156 error = copyout((caddr_t)realpath, uap->buf, length);
11157
11158 *retval = (user_ssize_t)length; /* may be superseded by error */
11159 out:
11160 if (realpath) {
11161 FREE(realpath, M_TEMP);
11162 }
11163 return (error);
11164 }
11165
11166 /*
11167 * Common routine to handle various flavors of statfs data heading out
11168 * to user space.
11169 *
11170 * Returns: 0 Success
11171 * EFAULT
11172 */
11173 static int
11174 munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
11175 user_addr_t bufp, int *sizep, boolean_t is_64_bit,
11176 boolean_t partial_copy)
11177 {
11178 int error;
11179 int my_size, copy_size;
11180
11181 if (is_64_bit) {
11182 struct user64_statfs sfs;
11183 my_size = copy_size = sizeof(sfs);
11184 bzero(&sfs, my_size);
11185 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
11186 sfs.f_type = mp->mnt_vtable->vfc_typenum;
11187 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
11188 sfs.f_bsize = (user64_long_t)sfsp->f_bsize;
11189 sfs.f_iosize = (user64_long_t)sfsp->f_iosize;
11190 sfs.f_blocks = (user64_long_t)sfsp->f_blocks;
11191 sfs.f_bfree = (user64_long_t)sfsp->f_bfree;
11192 sfs.f_bavail = (user64_long_t)sfsp->f_bavail;
11193 sfs.f_files = (user64_long_t)sfsp->f_files;
11194 sfs.f_ffree = (user64_long_t)sfsp->f_ffree;
11195 sfs.f_fsid = sfsp->f_fsid;
11196 sfs.f_owner = sfsp->f_owner;
11197 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
11198 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
11199 } else {
11200 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
11201 }
11202 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
11203 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
11204
11205 if (partial_copy) {
11206 copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
11207 }
11208 error = copyout((caddr_t)&sfs, bufp, copy_size);
11209 }
11210 else {
11211 struct user32_statfs sfs;
11212
11213 my_size = copy_size = sizeof(sfs);
11214 bzero(&sfs, my_size);
11215
11216 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
11217 sfs.f_type = mp->mnt_vtable->vfc_typenum;
11218 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
11219
11220 /*
11221 * It's possible for there to be more than 2^^31 blocks in the filesystem, so we
11222 * have to fudge the numbers here in that case. We inflate the blocksize in order
11223 * to reflect the filesystem size as best we can.
11224 */
11225 if ((sfsp->f_blocks > INT_MAX)
11226 /* Hack for 4061702 . I think the real fix is for Carbon to
11227 * look for some volume capability and not depend on hidden
11228 * semantics agreed between a FS and carbon.
11229 * f_blocks, f_bfree, and f_bavail set to -1 is the trigger
11230 * for Carbon to set bNoVolumeSizes volume attribute.
11231 * Without this the webdavfs files cannot be copied onto
11232 * disk as they look huge. This change should not affect
11233 * XSAN as they should not setting these to -1..
11234 */
11235 && (sfsp->f_blocks != 0xffffffffffffffffULL)
11236 && (sfsp->f_bfree != 0xffffffffffffffffULL)
11237 && (sfsp->f_bavail != 0xffffffffffffffffULL)) {
11238 int shift;
11239
11240 /*
11241 * Work out how far we have to shift the block count down to make it fit.
11242 * Note that it's possible to have to shift so far that the resulting
11243 * blocksize would be unreportably large. At that point, we will clip
11244 * any values that don't fit.
11245 *
11246 * For safety's sake, we also ensure that f_iosize is never reported as
11247 * being smaller than f_bsize.
11248 */
11249 for (shift = 0; shift < 32; shift++) {
11250 if ((sfsp->f_blocks >> shift) <= INT_MAX)
11251 break;
11252 if ((sfsp->f_bsize << (shift + 1)) > INT_MAX)
11253 break;
11254 }
11255 #define __SHIFT_OR_CLIP(x, s) ((((x) >> (s)) > INT_MAX) ? INT_MAX : ((x) >> (s)))
11256 sfs.f_blocks = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_blocks, shift);
11257 sfs.f_bfree = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bfree, shift);
11258 sfs.f_bavail = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bavail, shift);
11259 #undef __SHIFT_OR_CLIP
11260 sfs.f_bsize = (user32_long_t)(sfsp->f_bsize << shift);
11261 sfs.f_iosize = lmax(sfsp->f_iosize, sfsp->f_bsize);
11262 } else {
11263 /* filesystem is small enough to be reported honestly */
11264 sfs.f_bsize = (user32_long_t)sfsp->f_bsize;
11265 sfs.f_iosize = (user32_long_t)sfsp->f_iosize;
11266 sfs.f_blocks = (user32_long_t)sfsp->f_blocks;
11267 sfs.f_bfree = (user32_long_t)sfsp->f_bfree;
11268 sfs.f_bavail = (user32_long_t)sfsp->f_bavail;
11269 }
11270 sfs.f_files = (user32_long_t)sfsp->f_files;
11271 sfs.f_ffree = (user32_long_t)sfsp->f_ffree;
11272 sfs.f_fsid = sfsp->f_fsid;
11273 sfs.f_owner = sfsp->f_owner;
11274 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
11275 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
11276 } else {
11277 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
11278 }
11279 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
11280 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
11281
11282 if (partial_copy) {
11283 copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
11284 }
11285 error = copyout((caddr_t)&sfs, bufp, copy_size);
11286 }
11287
11288 if (sizep != NULL) {
11289 *sizep = my_size;
11290 }
11291 return(error);
11292 }
11293
11294 /*
11295 * copy stat structure into user_stat structure.
11296 */
11297 void munge_user64_stat(struct stat *sbp, struct user64_stat *usbp)
11298 {
11299 bzero(usbp, sizeof(*usbp));
11300
11301 usbp->st_dev = sbp->st_dev;
11302 usbp->st_ino = sbp->st_ino;
11303 usbp->st_mode = sbp->st_mode;
11304 usbp->st_nlink = sbp->st_nlink;
11305 usbp->st_uid = sbp->st_uid;
11306 usbp->st_gid = sbp->st_gid;
11307 usbp->st_rdev = sbp->st_rdev;
11308 #ifndef _POSIX_C_SOURCE
11309 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
11310 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
11311 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
11312 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
11313 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
11314 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
11315 #else
11316 usbp->st_atime = sbp->st_atime;
11317 usbp->st_atimensec = sbp->st_atimensec;
11318 usbp->st_mtime = sbp->st_mtime;
11319 usbp->st_mtimensec = sbp->st_mtimensec;
11320 usbp->st_ctime = sbp->st_ctime;
11321 usbp->st_ctimensec = sbp->st_ctimensec;
11322 #endif
11323 usbp->st_size = sbp->st_size;
11324 usbp->st_blocks = sbp->st_blocks;
11325 usbp->st_blksize = sbp->st_blksize;
11326 usbp->st_flags = sbp->st_flags;
11327 usbp->st_gen = sbp->st_gen;
11328 usbp->st_lspare = sbp->st_lspare;
11329 usbp->st_qspare[0] = sbp->st_qspare[0];
11330 usbp->st_qspare[1] = sbp->st_qspare[1];
11331 }
11332
11333 void munge_user32_stat(struct stat *sbp, struct user32_stat *usbp)
11334 {
11335 bzero(usbp, sizeof(*usbp));
11336
11337 usbp->st_dev = sbp->st_dev;
11338 usbp->st_ino = sbp->st_ino;
11339 usbp->st_mode = sbp->st_mode;
11340 usbp->st_nlink = sbp->st_nlink;
11341 usbp->st_uid = sbp->st_uid;
11342 usbp->st_gid = sbp->st_gid;
11343 usbp->st_rdev = sbp->st_rdev;
11344 #ifndef _POSIX_C_SOURCE
11345 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
11346 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
11347 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
11348 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
11349 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
11350 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
11351 #else
11352 usbp->st_atime = sbp->st_atime;
11353 usbp->st_atimensec = sbp->st_atimensec;
11354 usbp->st_mtime = sbp->st_mtime;
11355 usbp->st_mtimensec = sbp->st_mtimensec;
11356 usbp->st_ctime = sbp->st_ctime;
11357 usbp->st_ctimensec = sbp->st_ctimensec;
11358 #endif
11359 usbp->st_size = sbp->st_size;
11360 usbp->st_blocks = sbp->st_blocks;
11361 usbp->st_blksize = sbp->st_blksize;
11362 usbp->st_flags = sbp->st_flags;
11363 usbp->st_gen = sbp->st_gen;
11364 usbp->st_lspare = sbp->st_lspare;
11365 usbp->st_qspare[0] = sbp->st_qspare[0];
11366 usbp->st_qspare[1] = sbp->st_qspare[1];
11367 }
11368
11369 /*
11370 * copy stat64 structure into user_stat64 structure.
11371 */
11372 void munge_user64_stat64(struct stat64 *sbp, struct user64_stat64 *usbp)
11373 {
11374 bzero(usbp, sizeof(*usbp));
11375
11376 usbp->st_dev = sbp->st_dev;
11377 usbp->st_ino = sbp->st_ino;
11378 usbp->st_mode = sbp->st_mode;
11379 usbp->st_nlink = sbp->st_nlink;
11380 usbp->st_uid = sbp->st_uid;
11381 usbp->st_gid = sbp->st_gid;
11382 usbp->st_rdev = sbp->st_rdev;
11383 #ifndef _POSIX_C_SOURCE
11384 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
11385 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
11386 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
11387 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
11388 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
11389 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
11390 usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
11391 usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
11392 #else
11393 usbp->st_atime = sbp->st_atime;
11394 usbp->st_atimensec = sbp->st_atimensec;
11395 usbp->st_mtime = sbp->st_mtime;
11396 usbp->st_mtimensec = sbp->st_mtimensec;
11397 usbp->st_ctime = sbp->st_ctime;
11398 usbp->st_ctimensec = sbp->st_ctimensec;
11399 usbp->st_birthtime = sbp->st_birthtime;
11400 usbp->st_birthtimensec = sbp->st_birthtimensec;
11401 #endif
11402 usbp->st_size = sbp->st_size;
11403 usbp->st_blocks = sbp->st_blocks;
11404 usbp->st_blksize = sbp->st_blksize;
11405 usbp->st_flags = sbp->st_flags;
11406 usbp->st_gen = sbp->st_gen;
11407 usbp->st_lspare = sbp->st_lspare;
11408 usbp->st_qspare[0] = sbp->st_qspare[0];
11409 usbp->st_qspare[1] = sbp->st_qspare[1];
11410 }
11411
11412 void munge_user32_stat64(struct stat64 *sbp, struct user32_stat64 *usbp)
11413 {
11414 bzero(usbp, sizeof(*usbp));
11415
11416 usbp->st_dev = sbp->st_dev;
11417 usbp->st_ino = sbp->st_ino;
11418 usbp->st_mode = sbp->st_mode;
11419 usbp->st_nlink = sbp->st_nlink;
11420 usbp->st_uid = sbp->st_uid;
11421 usbp->st_gid = sbp->st_gid;
11422 usbp->st_rdev = sbp->st_rdev;
11423 #ifndef _POSIX_C_SOURCE
11424 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
11425 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
11426 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
11427 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
11428 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
11429 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
11430 usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
11431 usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
11432 #else
11433 usbp->st_atime = sbp->st_atime;
11434 usbp->st_atimensec = sbp->st_atimensec;
11435 usbp->st_mtime = sbp->st_mtime;
11436 usbp->st_mtimensec = sbp->st_mtimensec;
11437 usbp->st_ctime = sbp->st_ctime;
11438 usbp->st_ctimensec = sbp->st_ctimensec;
11439 usbp->st_birthtime = sbp->st_birthtime;
11440 usbp->st_birthtimensec = sbp->st_birthtimensec;
11441 #endif
11442 usbp->st_size = sbp->st_size;
11443 usbp->st_blocks = sbp->st_blocks;
11444 usbp->st_blksize = sbp->st_blksize;
11445 usbp->st_flags = sbp->st_flags;
11446 usbp->st_gen = sbp->st_gen;
11447 usbp->st_lspare = sbp->st_lspare;
11448 usbp->st_qspare[0] = sbp->st_qspare[0];
11449 usbp->st_qspare[1] = sbp->st_qspare[1];
11450 }
11451
11452 /*
11453 * Purge buffer cache for simulating cold starts
11454 */
11455 static int vnode_purge_callback(struct vnode *vp, __unused void *cargs)
11456 {
11457 ubc_msync(vp, (off_t)0, ubc_getsize(vp), NULL /* off_t *resid_off */, UBC_PUSHALL | UBC_INVALIDATE);
11458
11459 return VNODE_RETURNED;
11460 }
11461
11462 static int vfs_purge_callback(mount_t mp, __unused void * arg)
11463 {
11464 vnode_iterate(mp, VNODE_WAIT | VNODE_ITERATE_ALL, vnode_purge_callback, NULL);
11465
11466 return VFS_RETURNED;
11467 }
11468
11469 int
11470 vfs_purge(__unused struct proc *p, __unused struct vfs_purge_args *uap, __unused int32_t *retval)
11471 {
11472 if (!kauth_cred_issuser(kauth_cred_get()))
11473 return EPERM;
11474
11475 vfs_iterate(0/* flags */, vfs_purge_callback, NULL);
11476
11477 return 0;
11478 }
11479
11480 /*
11481 * gets the vnode associated with the (unnamed) snapshot directory
11482 * for a Filesystem. The snapshot directory vnode is returned with
11483 * an iocount on it.
11484 */
11485 int
11486 vnode_get_snapdir(vnode_t rvp, vnode_t *sdvpp, vfs_context_t ctx)
11487 {
11488 return (VFS_VGET_SNAPDIR(vnode_mount(rvp), sdvpp, ctx));
11489 }
11490
11491 /*
11492 * Get the snapshot vnode.
11493 *
11494 * If successful, the call returns with an iocount on *rvpp ,*sdvpp and
11495 * needs nameidone() on ndp.
11496 *
11497 * If the snapshot vnode exists it is returned in ndp->ni_vp.
11498 *
11499 * If it returns with an error, *rvpp, *sdvpp are NULL and nameidone() is
11500 * not needed.
11501 */
11502 static int
11503 vnode_get_snapshot(int dirfd, vnode_t *rvpp, vnode_t *sdvpp,
11504 user_addr_t name, struct nameidata *ndp, int32_t op,
11505 #if !CONFIG_TRIGGERS
11506 __unused
11507 #endif
11508 enum path_operation pathop,
11509 vfs_context_t ctx)
11510 {
11511 int error, i;
11512 caddr_t name_buf;
11513 size_t name_len;
11514 struct vfs_attr vfa;
11515
11516 *sdvpp = NULLVP;
11517 *rvpp = NULLVP;
11518
11519 error = vnode_getfromfd(ctx, dirfd, rvpp);
11520 if (error)
11521 return (error);
11522
11523 if (!vnode_isvroot(*rvpp)) {
11524 error = EINVAL;
11525 goto out;
11526 }
11527
11528 /* Make sure the filesystem supports snapshots */
11529 VFSATTR_INIT(&vfa);
11530 VFSATTR_WANTED(&vfa, f_capabilities);
11531 if ((vfs_getattr(vnode_mount(*rvpp), &vfa, ctx) != 0) ||
11532 !VFSATTR_IS_SUPPORTED(&vfa, f_capabilities) ||
11533 !((vfa.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] &
11534 VOL_CAP_INT_SNAPSHOT)) ||
11535 !((vfa.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] &
11536 VOL_CAP_INT_SNAPSHOT))) {
11537 error = ENOTSUP;
11538 goto out;
11539 }
11540
11541 error = vnode_get_snapdir(*rvpp, sdvpp, ctx);
11542 if (error)
11543 goto out;
11544
11545 MALLOC(name_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
11546 error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
11547 if (error)
11548 goto out1;
11549
11550 /*
11551 * Some sanity checks- name can't be empty, "." or ".." or have slashes.
11552 * (the length returned by copyinstr includes the terminating NUL)
11553 */
11554 if ((name_len == 1) || (name_len == 2 && name_buf[0] == '.') ||
11555 (name_len == 3 && name_buf[0] == '.' && name_buf[1] == '.')) {
11556 error = EINVAL;
11557 goto out1;
11558 }
11559 for (i = 0; i < (int)name_len && name_buf[i] != '/'; i++);
11560 if (i < (int)name_len) {
11561 error = EINVAL;
11562 goto out1;
11563 }
11564
11565 #if CONFIG_MACF
11566 if (op == CREATE) {
11567 error = mac_mount_check_snapshot_create(ctx, vnode_mount(*rvpp),
11568 name_buf);
11569 } else if (op == DELETE) {
11570 error = mac_mount_check_snapshot_delete(ctx, vnode_mount(*rvpp),
11571 name_buf);
11572 }
11573 if (error)
11574 goto out1;
11575 #endif
11576
11577 /* Check if the snapshot already exists ... */
11578 NDINIT(ndp, op, pathop, USEDVP | NOCACHE | AUDITVNPATH1,
11579 UIO_SYSSPACE, CAST_USER_ADDR_T(name_buf), ctx);
11580 ndp->ni_dvp = *sdvpp;
11581
11582 error = namei(ndp);
11583 out1:
11584 FREE(name_buf, M_TEMP);
11585 out:
11586 if (error) {
11587 if (*sdvpp) {
11588 vnode_put(*sdvpp);
11589 *sdvpp = NULLVP;
11590 }
11591 if (*rvpp) {
11592 vnode_put(*rvpp);
11593 *rvpp = NULLVP;
11594 }
11595 }
11596 return (error);
11597 }
11598
11599 /*
11600 * create a filesystem snapshot (for supporting filesystems)
11601 *
11602 * A much simplified version of openat(dirfd, name, O_CREAT | O_EXCL)
11603 * We get to the (unnamed) snapshot directory vnode and create the vnode
11604 * for the snapshot in it.
11605 *
11606 * Restrictions:
11607 *
11608 * a) Passed in name for snapshot cannot have slashes.
11609 * b) name can't be "." or ".."
11610 *
11611 * Since this requires superuser privileges, vnode_authorize calls are not
11612 * made.
11613 */
11614 static int
11615 snapshot_create(int dirfd, user_addr_t name, __unused uint32_t flags,
11616 vfs_context_t ctx)
11617 {
11618 vnode_t rvp, snapdvp;
11619 int error;
11620 struct nameidata namend;
11621
11622 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, CREATE,
11623 OP_LINK, ctx);
11624 if (error)
11625 return (error);
11626
11627 if (namend.ni_vp) {
11628 vnode_put(namend.ni_vp);
11629 error = EEXIST;
11630 } else {
11631 struct vnode_attr va;
11632 vnode_t vp = NULLVP;
11633
11634 VATTR_INIT(&va);
11635 VATTR_SET(&va, va_type, VREG);
11636 VATTR_SET(&va, va_mode, 0);
11637
11638 error = vn_create(snapdvp, &vp, &namend, &va,
11639 VN_CREATE_NOAUTH | VN_CREATE_NOINHERIT, 0, NULL, ctx);
11640 if (!error && vp)
11641 vnode_put(vp);
11642 }
11643
11644 nameidone(&namend);
11645 vnode_put(snapdvp);
11646 vnode_put(rvp);
11647 return (error);
11648 }
11649
11650 /*
11651 * Delete a Filesystem snapshot
11652 *
11653 * get the vnode for the unnamed snapshot directory and the snapshot and
11654 * delete the snapshot.
11655 */
11656 static int
11657 snapshot_delete(int dirfd, user_addr_t name, __unused uint32_t flags,
11658 vfs_context_t ctx)
11659 {
11660 vnode_t rvp, snapdvp;
11661 int error;
11662 struct nameidata namend;
11663
11664 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, DELETE,
11665 OP_UNLINK, ctx);
11666 if (error)
11667 goto out;
11668
11669 error = VNOP_REMOVE(snapdvp, namend.ni_vp, &namend.ni_cnd,
11670 VNODE_REMOVE_SKIP_NAMESPACE_EVENT, ctx);
11671
11672 vnode_put(namend.ni_vp);
11673 nameidone(&namend);
11674 vnode_put(snapdvp);
11675 vnode_put(rvp);
11676 out:
11677 return (error);
11678 }
11679
11680 /*
11681 * Revert a filesystem to a snapshot
11682 *
11683 * Marks the filesystem to revert to the given snapshot on next mount.
11684 */
11685 static int
11686 snapshot_revert(int dirfd, user_addr_t name, __unused uint32_t flags,
11687 vfs_context_t ctx)
11688 {
11689 int error;
11690 vnode_t rvp;
11691 mount_t mp;
11692 struct fs_snapshot_revert_args revert_data;
11693 struct componentname cnp;
11694 caddr_t name_buf;
11695 size_t name_len;
11696
11697 error = vnode_getfromfd(ctx, dirfd, &rvp);
11698 if (error) {
11699 return (error);
11700 }
11701 mp = vnode_mount(rvp);
11702
11703 MALLOC(name_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
11704 error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
11705 if (error) {
11706 FREE(name_buf, M_TEMP);
11707 vnode_put(rvp);
11708 return (error);
11709 }
11710
11711 #if CONFIG_MACF
11712 error = mac_mount_check_snapshot_revert(ctx, mp, name_buf);
11713 if (error) {
11714 FREE(name_buf, M_TEMP);
11715 vnode_put(rvp);
11716 return (error);
11717 }
11718 #endif
11719
11720 /*
11721 * Grab mount_iterref so that we can release the vnode,
11722 * since VFSIOC_REVERT_SNAPSHOT could conceivably cause a sync.
11723 */
11724 error = mount_iterref (mp, 0);
11725 vnode_put(rvp);
11726 if (error) {
11727 FREE(name_buf, M_TEMP);
11728 return (error);
11729 }
11730
11731 memset(&cnp, 0, sizeof(cnp));
11732 cnp.cn_pnbuf = (char *)name_buf;
11733 cnp.cn_nameiop = LOOKUP;
11734 cnp.cn_flags = ISLASTCN | HASBUF;
11735 cnp.cn_pnlen = MAXPATHLEN;
11736 cnp.cn_nameptr = cnp.cn_pnbuf;
11737 cnp.cn_namelen = (int)name_len;
11738 revert_data.sr_cnp = &cnp;
11739
11740 error = VFS_IOCTL(mp, VFSIOC_REVERT_SNAPSHOT, (caddr_t)&revert_data, 0, ctx);
11741 mount_iterdrop(mp);
11742 FREE(name_buf, M_TEMP);
11743
11744 if (error) {
11745 /* If there was any error, try again using VNOP_IOCTL */
11746
11747 vnode_t snapdvp;
11748 struct nameidata namend;
11749
11750 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, &namend, LOOKUP,
11751 OP_LOOKUP, ctx);
11752 if (error) {
11753 return (error);
11754 }
11755
11756
11757 error = VNOP_IOCTL(namend.ni_vp, APFSIOC_REVERT_TO_SNAPSHOT, (caddr_t) NULL,
11758 0, ctx);
11759
11760 vnode_put(namend.ni_vp);
11761 nameidone(&namend);
11762 vnode_put(snapdvp);
11763 vnode_put(rvp);
11764 }
11765
11766 return (error);
11767 }
11768
11769 /*
11770 * rename a Filesystem snapshot
11771 *
11772 * get the vnode for the unnamed snapshot directory and the snapshot and
11773 * rename the snapshot. This is a very specialised (and simple) case of
11774 * rename(2) (which has to deal with a lot more complications). It differs
11775 * slightly from rename(2) in that EEXIST is returned if the new name exists.
11776 */
11777 static int
11778 snapshot_rename(int dirfd, user_addr_t old, user_addr_t new,
11779 __unused uint32_t flags, vfs_context_t ctx)
11780 {
11781 vnode_t rvp, snapdvp;
11782 int error, i;
11783 caddr_t newname_buf;
11784 size_t name_len;
11785 vnode_t fvp;
11786 struct nameidata *fromnd, *tond;
11787 /* carving out a chunk for structs that are too big to be on stack. */
11788 struct {
11789 struct nameidata from_node;
11790 struct nameidata to_node;
11791 } * __rename_data;
11792
11793 MALLOC(__rename_data, void *, sizeof(*__rename_data), M_TEMP, M_WAITOK);
11794 fromnd = &__rename_data->from_node;
11795 tond = &__rename_data->to_node;
11796
11797 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, old, fromnd, DELETE,
11798 OP_UNLINK, ctx);
11799 if (error)
11800 goto out;
11801 fvp = fromnd->ni_vp;
11802
11803 MALLOC(newname_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
11804 error = copyinstr(new, newname_buf, MAXPATHLEN, &name_len);
11805 if (error)
11806 goto out1;
11807
11808 /*
11809 * Some sanity checks- new name can't be empty, "." or ".." or have
11810 * slashes.
11811 * (the length returned by copyinstr includes the terminating NUL)
11812 *
11813 * The FS rename VNOP is suppossed to handle this but we'll pick it
11814 * off here itself.
11815 */
11816 if ((name_len == 1) || (name_len == 2 && newname_buf[0] == '.') ||
11817 (name_len == 3 && newname_buf[0] == '.' && newname_buf[1] == '.')) {
11818 error = EINVAL;
11819 goto out1;
11820 }
11821 for (i = 0; i < (int)name_len && newname_buf[i] != '/'; i++);
11822 if (i < (int)name_len) {
11823 error = EINVAL;
11824 goto out1;
11825 }
11826
11827 #if CONFIG_MACF
11828 error = mac_mount_check_snapshot_create(ctx, vnode_mount(rvp),
11829 newname_buf);
11830 if (error)
11831 goto out1;
11832 #endif
11833
11834 NDINIT(tond, RENAME, OP_RENAME, USEDVP | NOCACHE | AUDITVNPATH2,
11835 UIO_SYSSPACE, CAST_USER_ADDR_T(newname_buf), ctx);
11836 tond->ni_dvp = snapdvp;
11837
11838 error = namei(tond);
11839 if (error) {
11840 goto out2;
11841 } else if (tond->ni_vp) {
11842 /*
11843 * snapshot rename behaves differently than rename(2) - if the
11844 * new name exists, EEXIST is returned.
11845 */
11846 vnode_put(tond->ni_vp);
11847 error = EEXIST;
11848 goto out2;
11849 }
11850
11851 error = VNOP_RENAME(snapdvp, fvp, &fromnd->ni_cnd, snapdvp, NULLVP,
11852 &tond->ni_cnd, ctx);
11853
11854 out2:
11855 nameidone(tond);
11856 out1:
11857 FREE(newname_buf, M_TEMP);
11858 vnode_put(fvp);
11859 vnode_put(snapdvp);
11860 vnode_put(rvp);
11861 nameidone(fromnd);
11862 out:
11863 FREE(__rename_data, M_TEMP);
11864 return (error);
11865 }
11866
11867 /*
11868 * Mount a Filesystem snapshot
11869 *
11870 * get the vnode for the unnamed snapshot directory and the snapshot and
11871 * mount the snapshot.
11872 */
11873 static int
11874 snapshot_mount(int dirfd, user_addr_t name, user_addr_t directory,
11875 __unused user_addr_t mnt_data, __unused uint32_t flags, vfs_context_t ctx)
11876 {
11877 vnode_t rvp, snapdvp, snapvp, vp, pvp;
11878 int error;
11879 struct nameidata *snapndp, *dirndp;
11880 /* carving out a chunk for structs that are too big to be on stack. */
11881 struct {
11882 struct nameidata snapnd;
11883 struct nameidata dirnd;
11884 } * __snapshot_mount_data;
11885
11886 MALLOC(__snapshot_mount_data, void *, sizeof(*__snapshot_mount_data),
11887 M_TEMP, M_WAITOK);
11888 snapndp = &__snapshot_mount_data->snapnd;
11889 dirndp = &__snapshot_mount_data->dirnd;
11890
11891 error = vnode_get_snapshot(dirfd, &rvp, &snapdvp, name, snapndp, LOOKUP,
11892 OP_LOOKUP, ctx);
11893 if (error)
11894 goto out;
11895
11896 snapvp = snapndp->ni_vp;
11897 if (!vnode_mount(rvp) || (vnode_mount(rvp) == dead_mountp)) {
11898 error = EIO;
11899 goto out1;
11900 }
11901
11902 /* Get the vnode to be covered */
11903 NDINIT(dirndp, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
11904 UIO_USERSPACE, directory, ctx);
11905 error = namei(dirndp);
11906 if (error)
11907 goto out1;
11908
11909 vp = dirndp->ni_vp;
11910 pvp = dirndp->ni_dvp;
11911
11912 if ((vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_ROOTFS)) {
11913 error = EINVAL;
11914 } else {
11915 mount_t mp = vnode_mount(rvp);
11916 struct fs_snapshot_mount_args smnt_data;
11917
11918 smnt_data.sm_mp = mp;
11919 smnt_data.sm_cnp = &snapndp->ni_cnd;
11920 error = mount_common(mp->mnt_vfsstat.f_fstypename, pvp, vp,
11921 &dirndp->ni_cnd, CAST_USER_ADDR_T(&smnt_data), flags & MNT_DONTBROWSE,
11922 KERNEL_MOUNT_SNAPSHOT, NULL, FALSE, ctx);
11923 }
11924
11925 vnode_put(vp);
11926 vnode_put(pvp);
11927 nameidone(dirndp);
11928 out1:
11929 vnode_put(snapvp);
11930 vnode_put(snapdvp);
11931 vnode_put(rvp);
11932 nameidone(snapndp);
11933 out:
11934 FREE(__snapshot_mount_data, M_TEMP);
11935 return (error);
11936 }
11937
11938 /*
11939 * Root from a snapshot of the filesystem
11940 *
11941 * Marks the filesystem to root from the given snapshot on next boot.
11942 */
11943 static int
11944 snapshot_root(int dirfd, user_addr_t name, __unused uint32_t flags,
11945 vfs_context_t ctx)
11946 {
11947 int error;
11948 vnode_t rvp;
11949 mount_t mp;
11950 struct fs_snapshot_root_args root_data;
11951 struct componentname cnp;
11952 caddr_t name_buf;
11953 size_t name_len;
11954
11955 error = vnode_getfromfd(ctx, dirfd, &rvp);
11956 if (error) {
11957 return (error);
11958 }
11959 mp = vnode_mount(rvp);
11960
11961 MALLOC(name_buf, caddr_t, MAXPATHLEN, M_TEMP, M_WAITOK);
11962 error = copyinstr(name, name_buf, MAXPATHLEN, &name_len);
11963 if (error) {
11964 FREE(name_buf, M_TEMP);
11965 vnode_put(rvp);
11966 return (error);
11967 }
11968
11969 // XXX MAC checks ?
11970
11971 /*
11972 * Grab mount_iterref so that we can release the vnode,
11973 * since VFSIOC_ROOT_SNAPSHOT could conceivably cause a sync.
11974 */
11975 error = mount_iterref (mp, 0);
11976 vnode_put(rvp);
11977 if (error) {
11978 FREE(name_buf, M_TEMP);
11979 return (error);
11980 }
11981
11982 memset(&cnp, 0, sizeof(cnp));
11983 cnp.cn_pnbuf = (char *)name_buf;
11984 cnp.cn_nameiop = LOOKUP;
11985 cnp.cn_flags = ISLASTCN | HASBUF;
11986 cnp.cn_pnlen = MAXPATHLEN;
11987 cnp.cn_nameptr = cnp.cn_pnbuf;
11988 cnp.cn_namelen = (int)name_len;
11989 root_data.sr_cnp = &cnp;
11990
11991 error = VFS_IOCTL(mp, VFSIOC_ROOT_SNAPSHOT, (caddr_t)&root_data, 0, ctx);
11992
11993 mount_iterdrop(mp);
11994 FREE(name_buf, M_TEMP);
11995
11996 return (error);
11997 }
11998
11999 /*
12000 * FS snapshot operations dispatcher
12001 */
12002 int
12003 fs_snapshot(__unused proc_t p, struct fs_snapshot_args *uap,
12004 __unused int32_t *retval)
12005 {
12006 int error;
12007 vfs_context_t ctx = vfs_context_current();
12008
12009 AUDIT_ARG(fd, uap->dirfd);
12010 AUDIT_ARG(value32, uap->op);
12011
12012 error = priv_check_cred(vfs_context_ucred(ctx), PRIV_VFS_SNAPSHOT, 0);
12013 if (error)
12014 return (error);
12015
12016 switch (uap->op) {
12017 case SNAPSHOT_OP_CREATE:
12018 error = snapshot_create(uap->dirfd, uap->name1, uap->flags, ctx);
12019 break;
12020 case SNAPSHOT_OP_DELETE:
12021 error = snapshot_delete(uap->dirfd, uap->name1, uap->flags, ctx);
12022 break;
12023 case SNAPSHOT_OP_RENAME:
12024 error = snapshot_rename(uap->dirfd, uap->name1, uap->name2,
12025 uap->flags, ctx);
12026 break;
12027 case SNAPSHOT_OP_MOUNT:
12028 error = snapshot_mount(uap->dirfd, uap->name1, uap->name2,
12029 uap->data, uap->flags, ctx);
12030 break;
12031 case SNAPSHOT_OP_REVERT:
12032 error = snapshot_revert(uap->dirfd, uap->name1, uap->flags, ctx);
12033 break;
12034 #if !TARGET_OS_OSX
12035 case SNAPSHOT_OP_ROOT:
12036 error = snapshot_root(uap->dirfd, uap->name1, uap->flags, ctx);
12037 break;
12038 #endif /* !TARGET_OS_OSX */
12039 default:
12040 error = ENOSYS;
12041 }
12042
12043 return (error);
12044 }