]> git.saurik.com Git - apple/xnu.git/blob - bsd/vfs/vfs_syscalls.c
521cb571366c8f1cc3490b021a40fe5666064802
[apple/xnu.git] / bsd / vfs / vfs_syscalls.c
1 /*
2 * Copyright (c) 1995-2014 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1989, 1993
30 * The Regents of the University of California. All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)vfs_syscalls.c 8.41 (Berkeley) 6/15/95
66 */
67 /*
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
71 * Version 2.0.
72 */
73
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/namei.h>
77 #include <sys/filedesc.h>
78 #include <sys/kernel.h>
79 #include <sys/file_internal.h>
80 #include <sys/stat.h>
81 #include <sys/vnode_internal.h>
82 #include <sys/mount_internal.h>
83 #include <sys/proc_internal.h>
84 #include <sys/kauth.h>
85 #include <sys/uio_internal.h>
86 #include <sys/malloc.h>
87 #include <sys/mman.h>
88 #include <sys/dirent.h>
89 #include <sys/attr.h>
90 #include <sys/sysctl.h>
91 #include <sys/ubc.h>
92 #include <sys/quota.h>
93 #include <sys/kdebug.h>
94 #include <sys/fsevents.h>
95 #include <sys/imgsrc.h>
96 #include <sys/sysproto.h>
97 #include <sys/xattr.h>
98 #include <sys/fcntl.h>
99 #include <sys/fsctl.h>
100 #include <sys/ubc_internal.h>
101 #include <sys/disk.h>
102 #include <machine/cons.h>
103 #include <machine/limits.h>
104 #include <miscfs/specfs/specdev.h>
105
106 #include <security/audit/audit.h>
107 #include <bsm/audit_kevents.h>
108
109 #include <mach/mach_types.h>
110 #include <kern/kern_types.h>
111 #include <kern/kalloc.h>
112 #include <kern/task.h>
113
114 #include <vm/vm_pageout.h>
115
116 #include <libkern/OSAtomic.h>
117 #include <pexpert/pexpert.h>
118
119 #if CONFIG_MACF
120 #include <security/mac.h>
121 #include <security/mac_framework.h>
122 #endif
123
124 #if CONFIG_FSE
125 #define GET_PATH(x) \
126 (x) = get_pathbuff();
127 #define RELEASE_PATH(x) \
128 release_pathbuff(x);
129 #else
130 #define GET_PATH(x) \
131 MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
132 #define RELEASE_PATH(x) \
133 FREE_ZONE((x), MAXPATHLEN, M_NAMEI);
134 #endif /* CONFIG_FSE */
135
136 /* struct for checkdirs iteration */
137 struct cdirargs {
138 vnode_t olddp;
139 vnode_t newdp;
140 };
141 /* callback for checkdirs iteration */
142 static int checkdirs_callback(proc_t p, void * arg);
143
144 static int change_dir(struct nameidata *ndp, vfs_context_t ctx);
145 static int checkdirs(vnode_t olddp, vfs_context_t ctx);
146 void enablequotas(struct mount *mp, vfs_context_t ctx);
147 static int getfsstat_callback(mount_t mp, void * arg);
148 static int getutimes(user_addr_t usrtvp, struct timespec *tsp);
149 static int setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts, int nullflag);
150 static int sync_callback(mount_t, void *);
151 static void sync_thread(void *, __unused wait_result_t);
152 static int sync_async(int);
153 static int munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
154 user_addr_t bufp, int *sizep, boolean_t is_64_bit,
155 boolean_t partial_copy);
156 static int statfs64_common(struct mount *mp, struct vfsstatfs *sfsp,
157 user_addr_t bufp);
158 static int fsync_common(proc_t p, struct fsync_args *uap, int flags);
159 static int mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
160 struct componentname *cnp, user_addr_t fsmountargs,
161 int flags, uint32_t internal_flags, char *labelstr, boolean_t kernelmount,
162 vfs_context_t ctx);
163 void vfs_notify_mount(vnode_t pdvp);
164
165 int prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth);
166
167 struct fd_vn_data * fg_vn_data_alloc(void);
168
169 static int rmdirat_internal(vfs_context_t, int, user_addr_t, enum uio_seg);
170
171 static int fsgetpath_internal(vfs_context_t, int, uint64_t, vm_size_t, caddr_t, int *);
172
173 #ifdef CONFIG_IMGSRC_ACCESS
174 static int authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx);
175 static int place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx);
176 static void undo_place_on_covered_vp(mount_t mp, vnode_t vp);
177 static int mount_begin_update(mount_t mp, vfs_context_t ctx, int flags);
178 static void mount_end_update(mount_t mp);
179 static int relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp, const char *fsname, vfs_context_t ctx, boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index);
180 #endif /* CONFIG_IMGSRC_ACCESS */
181
182 int (*union_dircheckp)(struct vnode **, struct fileproc *, vfs_context_t);
183
184 __private_extern__
185 int sync_internal(void);
186
187 __private_extern__
188 int unlink1(vfs_context_t, struct nameidata *, int);
189
190 extern lck_grp_t *fd_vn_lck_grp;
191 extern lck_grp_attr_t *fd_vn_lck_grp_attr;
192 extern lck_attr_t *fd_vn_lck_attr;
193
194 /*
195 * incremented each time a mount or unmount operation occurs
196 * used to invalidate the cached value of the rootvp in the
197 * mount structure utilized by cache_lookup_path
198 */
199 uint32_t mount_generation = 0;
200
201 /* counts number of mount and unmount operations */
202 unsigned int vfs_nummntops=0;
203
204 extern const struct fileops vnops;
205 #if CONFIG_APPLEDOUBLE
206 extern errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *);
207 #endif /* CONFIG_APPLEDOUBLE */
208
209 typedef uint32_t vfs_rename_flags_t;
210 #if CONFIG_SECLUDED_RENAME
211 enum {
212 VFS_SECLUDE_RENAME = 0x00000001
213 };
214 #endif
215
216 /*
217 * Virtual File System System Calls
218 */
219
220 #if NFSCLIENT || DEVFS
221 /*
222 * Private in-kernel mounting spi (NFS only, not exported)
223 */
224 __private_extern__
225 boolean_t
226 vfs_iskernelmount(mount_t mp)
227 {
228 return ((mp->mnt_kern_flag & MNTK_KERNEL_MOUNT) ? TRUE : FALSE);
229 }
230
231 __private_extern__
232 int
233 kernel_mount(char *fstype, vnode_t pvp, vnode_t vp, const char *path,
234 void *data, __unused size_t datalen, int syscall_flags, __unused uint32_t kern_flags, vfs_context_t ctx)
235 {
236 struct nameidata nd;
237 boolean_t did_namei;
238 int error;
239
240 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
241 UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx);
242
243 /*
244 * Get the vnode to be covered if it's not supplied
245 */
246 if (vp == NULLVP) {
247 error = namei(&nd);
248 if (error)
249 return (error);
250 vp = nd.ni_vp;
251 pvp = nd.ni_dvp;
252 did_namei = TRUE;
253 } else {
254 char *pnbuf = CAST_DOWN(char *, path);
255
256 nd.ni_cnd.cn_pnbuf = pnbuf;
257 nd.ni_cnd.cn_pnlen = strlen(pnbuf) + 1;
258 did_namei = FALSE;
259 }
260
261 error = mount_common(fstype, pvp, vp, &nd.ni_cnd, CAST_USER_ADDR_T(data),
262 syscall_flags, kern_flags, NULL, TRUE, ctx);
263
264 if (did_namei) {
265 vnode_put(vp);
266 vnode_put(pvp);
267 nameidone(&nd);
268 }
269
270 return (error);
271 }
272 #endif /* NFSCLIENT || DEVFS */
273
274 /*
275 * Mount a file system.
276 */
277 /* ARGSUSED */
278 int
279 mount(proc_t p, struct mount_args *uap, __unused int32_t *retval)
280 {
281 struct __mac_mount_args muap;
282
283 muap.type = uap->type;
284 muap.path = uap->path;
285 muap.flags = uap->flags;
286 muap.data = uap->data;
287 muap.mac_p = USER_ADDR_NULL;
288 return (__mac_mount(p, &muap, retval));
289 }
290
291 void
292 vfs_notify_mount(vnode_t pdvp)
293 {
294 vfs_event_signal(NULL, VQ_MOUNT, (intptr_t)NULL);
295 lock_vnode_and_post(pdvp, NOTE_WRITE);
296 }
297
298 /*
299 * __mac_mount:
300 * Mount a file system taking into account MAC label behavior.
301 * See mount(2) man page for more information
302 *
303 * Parameters: p Process requesting the mount
304 * uap User argument descriptor (see below)
305 * retval (ignored)
306 *
307 * Indirect: uap->type Filesystem type
308 * uap->path Path to mount
309 * uap->data Mount arguments
310 * uap->mac_p MAC info
311 * uap->flags Mount flags
312 *
313 *
314 * Returns: 0 Success
315 * !0 Not success
316 */
317 boolean_t root_fs_upgrade_try = FALSE;
318
319 int
320 __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int32_t *retval)
321 {
322 vnode_t pvp = NULL;
323 vnode_t vp = NULL;
324 int need_nameidone = 0;
325 vfs_context_t ctx = vfs_context_current();
326 char fstypename[MFSNAMELEN];
327 struct nameidata nd;
328 size_t dummy=0;
329 char *labelstr = NULL;
330 int flags = uap->flags;
331 int error;
332 #if CONFIG_IMGSRC_ACCESS || CONFIG_MACF
333 boolean_t is_64bit = IS_64BIT_PROCESS(p);
334 #else
335 #pragma unused(p)
336 #endif
337 /*
338 * Get the fs type name from user space
339 */
340 error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy);
341 if (error)
342 return (error);
343
344 /*
345 * Get the vnode to be covered
346 */
347 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT,
348 UIO_USERSPACE, uap->path, ctx);
349 error = namei(&nd);
350 if (error) {
351 goto out;
352 }
353 need_nameidone = 1;
354 vp = nd.ni_vp;
355 pvp = nd.ni_dvp;
356
357 #ifdef CONFIG_IMGSRC_ACCESS
358 /* Mounting image source cannot be batched with other operations */
359 if (flags == MNT_IMGSRC_BY_INDEX) {
360 error = relocate_imageboot_source(pvp, vp, &nd.ni_cnd, fstypename,
361 ctx, is_64bit, uap->data, (flags == MNT_IMGSRC_BY_INDEX));
362 goto out;
363 }
364 #endif /* CONFIG_IMGSRC_ACCESS */
365
366 #if CONFIG_MACF
367 /*
368 * Get the label string (if any) from user space
369 */
370 if (uap->mac_p != USER_ADDR_NULL) {
371 struct user_mac mac;
372 size_t ulen = 0;
373
374 if (is_64bit) {
375 struct user64_mac mac64;
376 error = copyin(uap->mac_p, &mac64, sizeof(mac64));
377 mac.m_buflen = mac64.m_buflen;
378 mac.m_string = mac64.m_string;
379 } else {
380 struct user32_mac mac32;
381 error = copyin(uap->mac_p, &mac32, sizeof(mac32));
382 mac.m_buflen = mac32.m_buflen;
383 mac.m_string = mac32.m_string;
384 }
385 if (error)
386 goto out;
387 if ((mac.m_buflen > MAC_MAX_LABEL_BUF_LEN) ||
388 (mac.m_buflen < 2)) {
389 error = EINVAL;
390 goto out;
391 }
392 MALLOC(labelstr, char *, mac.m_buflen, M_MACTEMP, M_WAITOK);
393 error = copyinstr(mac.m_string, labelstr, mac.m_buflen, &ulen);
394 if (error) {
395 goto out;
396 }
397 AUDIT_ARG(mac_string, labelstr);
398 }
399 #endif /* CONFIG_MACF */
400
401 AUDIT_ARG(fflags, flags);
402
403 if ((vp->v_flag & VROOT) &&
404 (vp->v_mount->mnt_flag & MNT_ROOTFS)) {
405 if (!(flags & MNT_UNION)) {
406 flags |= MNT_UPDATE;
407 }
408 else {
409 /*
410 * For a union mount on '/', treat it as fresh
411 * mount instead of update.
412 * Otherwise, union mouting on '/' used to panic the
413 * system before, since mnt_vnodecovered was found to
414 * be NULL for '/' which is required for unionlookup
415 * after it gets ENOENT on union mount.
416 */
417 flags = (flags & ~(MNT_UPDATE));
418 }
419
420 #ifdef SECURE_KERNEL
421 if ((flags & MNT_RDONLY) == 0) {
422 /* Release kernels are not allowed to mount "/" as rw */
423 error = EPERM;
424 goto out;
425 }
426 #endif
427 /*
428 * See 7392553 for more details on why this check exists.
429 * Suffice to say: If this check is ON and something tries
430 * to mount the rootFS RW, we'll turn off the codesign
431 * bitmap optimization.
432 */
433 #if CHECK_CS_VALIDATION_BITMAP
434 if ((flags & MNT_RDONLY) == 0 ) {
435 root_fs_upgrade_try = TRUE;
436 }
437 #endif
438 }
439
440 error = mount_common(fstypename, pvp, vp, &nd.ni_cnd, uap->data, flags, 0,
441 labelstr, FALSE, ctx);
442
443 out:
444
445 #if CONFIG_MACF
446 if (labelstr)
447 FREE(labelstr, M_MACTEMP);
448 #endif /* CONFIG_MACF */
449
450 if (vp) {
451 vnode_put(vp);
452 }
453 if (pvp) {
454 vnode_put(pvp);
455 }
456 if (need_nameidone) {
457 nameidone(&nd);
458 }
459
460 return (error);
461 }
462
463 /*
464 * common mount implementation (final stage of mounting)
465
466 * Arguments:
467 * fstypename file system type (ie it's vfs name)
468 * pvp parent of covered vnode
469 * vp covered vnode
470 * cnp component name (ie path) of covered vnode
471 * flags generic mount flags
472 * fsmountargs file system specific data
473 * labelstr optional MAC label
474 * kernelmount TRUE for mounts initiated from inside the kernel
475 * ctx caller's context
476 */
477 static int
478 mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
479 struct componentname *cnp, user_addr_t fsmountargs, int flags, uint32_t internal_flags,
480 char *labelstr, boolean_t kernelmount, vfs_context_t ctx)
481 {
482 #if !CONFIG_MACF
483 #pragma unused(labelstr)
484 #endif
485 struct vnode *devvp = NULLVP;
486 struct vnode *device_vnode = NULLVP;
487 #if CONFIG_MACF
488 struct vnode *rvp;
489 #endif
490 struct mount *mp;
491 struct vfstable *vfsp = (struct vfstable *)0;
492 struct proc *p = vfs_context_proc(ctx);
493 int error, flag = 0;
494 user_addr_t devpath = USER_ADDR_NULL;
495 int ronly = 0;
496 int mntalloc = 0;
497 boolean_t vfsp_ref = FALSE;
498 boolean_t is_rwlock_locked = FALSE;
499 boolean_t did_rele = FALSE;
500 boolean_t have_usecount = FALSE;
501
502 /*
503 * Process an update for an existing mount
504 */
505 if (flags & MNT_UPDATE) {
506 if ((vp->v_flag & VROOT) == 0) {
507 error = EINVAL;
508 goto out1;
509 }
510 mp = vp->v_mount;
511
512 /* unmount in progress return error */
513 mount_lock_spin(mp);
514 if (mp->mnt_lflag & MNT_LUNMOUNT) {
515 mount_unlock(mp);
516 error = EBUSY;
517 goto out1;
518 }
519 mount_unlock(mp);
520 lck_rw_lock_exclusive(&mp->mnt_rwlock);
521 is_rwlock_locked = TRUE;
522 /*
523 * We only allow the filesystem to be reloaded if it
524 * is currently mounted read-only.
525 */
526 if ((flags & MNT_RELOAD) &&
527 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
528 error = ENOTSUP;
529 goto out1;
530 }
531
532 /*
533 * If content protection is enabled, update mounts are not
534 * allowed to turn it off.
535 */
536 if ((mp->mnt_flag & MNT_CPROTECT) &&
537 ((flags & MNT_CPROTECT) == 0)) {
538 error = EINVAL;
539 goto out1;
540 }
541
542 #ifdef CONFIG_IMGSRC_ACCESS
543 /* Can't downgrade the backer of the root FS */
544 if ((mp->mnt_kern_flag & MNTK_BACKS_ROOT) &&
545 (!vfs_isrdonly(mp)) && (flags & MNT_RDONLY)) {
546 error = ENOTSUP;
547 goto out1;
548 }
549 #endif /* CONFIG_IMGSRC_ACCESS */
550
551 /*
552 * Only root, or the user that did the original mount is
553 * permitted to update it.
554 */
555 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
556 (error = suser(vfs_context_ucred(ctx), &p->p_acflag))) {
557 goto out1;
558 }
559 #if CONFIG_MACF
560 error = mac_mount_check_remount(ctx, mp);
561 if (error != 0) {
562 goto out1;
563 }
564 #endif
565 /*
566 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
567 * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
568 */
569 if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
570 flags |= MNT_NOSUID | MNT_NODEV;
571 if (mp->mnt_flag & MNT_NOEXEC)
572 flags |= MNT_NOEXEC;
573 }
574 flag = mp->mnt_flag;
575
576
577
578 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
579
580 vfsp = mp->mnt_vtable;
581 goto update;
582 }
583 /*
584 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
585 * MNT_NOEXEC if mount point is already MNT_NOEXEC.
586 */
587 if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
588 flags |= MNT_NOSUID | MNT_NODEV;
589 if (vp->v_mount->mnt_flag & MNT_NOEXEC)
590 flags |= MNT_NOEXEC;
591 }
592
593 /* XXXAUDIT: Should we capture the type on the error path as well? */
594 AUDIT_ARG(text, fstypename);
595 mount_list_lock();
596 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
597 if (!strncmp(vfsp->vfc_name, fstypename, MFSNAMELEN)) {
598 vfsp->vfc_refcount++;
599 vfsp_ref = TRUE;
600 break;
601 }
602 mount_list_unlock();
603 if (vfsp == NULL) {
604 error = ENODEV;
605 goto out1;
606 }
607
608 /*
609 * VFC_VFSLOCALARGS is not currently supported for kernel mounts
610 */
611 if (kernelmount && (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS)) {
612 error = EINVAL; /* unsupported request */
613 goto out1;
614 }
615
616 error = prepare_coveredvp(vp, ctx, cnp, fstypename, ((internal_flags & KERNEL_MOUNT_NOAUTH) != 0));
617 if (error != 0) {
618 goto out1;
619 }
620
621 /*
622 * Allocate and initialize the filesystem (mount_t)
623 */
624 MALLOC_ZONE(mp, struct mount *, (u_int32_t)sizeof(struct mount),
625 M_MOUNT, M_WAITOK);
626 bzero((char *)mp, (u_int32_t)sizeof(struct mount));
627 mntalloc = 1;
628
629 /* Initialize the default IO constraints */
630 mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
631 mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
632 mp->mnt_maxsegreadsize = mp->mnt_maxreadcnt;
633 mp->mnt_maxsegwritesize = mp->mnt_maxwritecnt;
634 mp->mnt_devblocksize = DEV_BSIZE;
635 mp->mnt_alignmentmask = PAGE_MASK;
636 mp->mnt_ioqueue_depth = MNT_DEFAULT_IOQUEUE_DEPTH;
637 mp->mnt_ioscale = 1;
638 mp->mnt_ioflags = 0;
639 mp->mnt_realrootvp = NULLVP;
640 mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL;
641
642 TAILQ_INIT(&mp->mnt_vnodelist);
643 TAILQ_INIT(&mp->mnt_workerqueue);
644 TAILQ_INIT(&mp->mnt_newvnodes);
645 mount_lock_init(mp);
646 lck_rw_lock_exclusive(&mp->mnt_rwlock);
647 is_rwlock_locked = TRUE;
648 mp->mnt_op = vfsp->vfc_vfsops;
649 mp->mnt_vtable = vfsp;
650 //mp->mnt_stat.f_type = vfsp->vfc_typenum;
651 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
652 strlcpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN);
653 strlcpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
654 mp->mnt_vnodecovered = vp;
655 mp->mnt_vfsstat.f_owner = kauth_cred_getuid(vfs_context_ucred(ctx));
656 mp->mnt_throttle_mask = LOWPRI_MAX_NUM_DEV - 1;
657 mp->mnt_devbsdunit = 0;
658
659 /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
660 vfs_setowner(mp, KAUTH_UID_NONE, KAUTH_GID_NONE);
661
662 #if NFSCLIENT || DEVFS
663 if (kernelmount)
664 mp->mnt_kern_flag |= MNTK_KERNEL_MOUNT;
665 if ((internal_flags & KERNEL_MOUNT_PERMIT_UNMOUNT) != 0)
666 mp->mnt_kern_flag |= MNTK_PERMIT_UNMOUNT;
667 #endif /* NFSCLIENT || DEVFS */
668
669 update:
670 /*
671 * Set the mount level flags.
672 */
673 if (flags & MNT_RDONLY)
674 mp->mnt_flag |= MNT_RDONLY;
675 else if (mp->mnt_flag & MNT_RDONLY) {
676 // disallow read/write upgrades of file systems that
677 // had the TYPENAME_OVERRIDE feature set.
678 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
679 error = EPERM;
680 goto out1;
681 }
682 mp->mnt_kern_flag |= MNTK_WANTRDWR;
683 }
684 mp->mnt_flag &= ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
685 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
686 MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
687 MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
688 MNT_QUARANTINE | MNT_CPROTECT);
689 mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
690 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
691 MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE |
692 MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME |
693 MNT_QUARANTINE | MNT_CPROTECT);
694
695 #if CONFIG_MACF
696 if (flags & MNT_MULTILABEL) {
697 if (vfsp->vfc_vfsflags & VFC_VFSNOMACLABEL) {
698 error = EINVAL;
699 goto out1;
700 }
701 mp->mnt_flag |= MNT_MULTILABEL;
702 }
703 #endif
704 /*
705 * Process device path for local file systems if requested
706 */
707 if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
708 if (vfs_context_is64bit(ctx)) {
709 if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
710 goto out1;
711 fsmountargs += sizeof(devpath);
712 } else {
713 user32_addr_t tmp;
714 if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
715 goto out1;
716 /* munge into LP64 addr */
717 devpath = CAST_USER_ADDR_T(tmp);
718 fsmountargs += sizeof(tmp);
719 }
720
721 /* Lookup device and authorize access to it */
722 if ((devpath)) {
723 struct nameidata nd;
724
725 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW, UIO_USERSPACE, devpath, ctx);
726 if ( (error = namei(&nd)) )
727 goto out1;
728
729 strncpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
730 devvp = nd.ni_vp;
731
732 nameidone(&nd);
733
734 if (devvp->v_type != VBLK) {
735 error = ENOTBLK;
736 goto out2;
737 }
738 if (major(devvp->v_rdev) >= nblkdev) {
739 error = ENXIO;
740 goto out2;
741 }
742 /*
743 * If mount by non-root, then verify that user has necessary
744 * permissions on the device.
745 */
746 if (suser(vfs_context_ucred(ctx), NULL) != 0) {
747 mode_t accessmode = KAUTH_VNODE_READ_DATA;
748
749 if ((mp->mnt_flag & MNT_RDONLY) == 0)
750 accessmode |= KAUTH_VNODE_WRITE_DATA;
751 if ((error = vnode_authorize(devvp, NULL, accessmode, ctx)) != 0)
752 goto out2;
753 }
754 }
755 /* On first mount, preflight and open device */
756 if (devpath && ((flags & MNT_UPDATE) == 0)) {
757 if ( (error = vnode_ref(devvp)) )
758 goto out2;
759 /*
760 * Disallow multiple mounts of the same device.
761 * Disallow mounting of a device that is currently in use
762 * (except for root, which might share swap device for miniroot).
763 * Flush out any old buffers remaining from a previous use.
764 */
765 if ( (error = vfs_mountedon(devvp)) )
766 goto out3;
767
768 if (vcount(devvp) > 1 && !(vfs_flags(mp) & MNT_ROOTFS)) {
769 error = EBUSY;
770 goto out3;
771 }
772 if ( (error = VNOP_FSYNC(devvp, MNT_WAIT, ctx)) ) {
773 error = ENOTBLK;
774 goto out3;
775 }
776 if ( (error = buf_invalidateblks(devvp, BUF_WRITE_DATA, 0, 0)) )
777 goto out3;
778
779 ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
780 #if CONFIG_MACF
781 error = mac_vnode_check_open(ctx,
782 devvp,
783 ronly ? FREAD : FREAD|FWRITE);
784 if (error)
785 goto out3;
786 #endif /* MAC */
787 if ( (error = VNOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, ctx)) )
788 goto out3;
789
790 mp->mnt_devvp = devvp;
791 device_vnode = devvp;
792
793 } else if ((mp->mnt_flag & MNT_RDONLY) &&
794 (mp->mnt_kern_flag & MNTK_WANTRDWR) &&
795 (device_vnode = mp->mnt_devvp)) {
796 dev_t dev;
797 int maj;
798 /*
799 * If upgrade to read-write by non-root, then verify
800 * that user has necessary permissions on the device.
801 */
802 vnode_getalways(device_vnode);
803
804 if (suser(vfs_context_ucred(ctx), NULL) &&
805 (error = vnode_authorize(device_vnode, NULL,
806 KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA,
807 ctx)) != 0) {
808 vnode_put(device_vnode);
809 goto out2;
810 }
811
812 /* Tell the device that we're upgrading */
813 dev = (dev_t)device_vnode->v_rdev;
814 maj = major(dev);
815
816 if ((u_int)maj >= (u_int)nblkdev)
817 panic("Volume mounted on a device with invalid major number.");
818
819 error = bdevsw[maj].d_open(dev, FREAD | FWRITE, S_IFBLK, p);
820 vnode_put(device_vnode);
821 device_vnode = NULLVP;
822 if (error != 0) {
823 goto out2;
824 }
825 }
826 }
827 #if CONFIG_MACF
828 if ((flags & MNT_UPDATE) == 0) {
829 mac_mount_label_init(mp);
830 mac_mount_label_associate(ctx, mp);
831 }
832 if (labelstr) {
833 if ((flags & MNT_UPDATE) != 0) {
834 error = mac_mount_check_label_update(ctx, mp);
835 if (error != 0)
836 goto out3;
837 }
838 }
839 #endif
840 /*
841 * Mount the filesystem.
842 */
843 error = VFS_MOUNT(mp, device_vnode, fsmountargs, ctx);
844
845 if (flags & MNT_UPDATE) {
846 if (mp->mnt_kern_flag & MNTK_WANTRDWR)
847 mp->mnt_flag &= ~MNT_RDONLY;
848 mp->mnt_flag &=~
849 (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
850 mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
851 if (error)
852 mp->mnt_flag = flag; /* restore flag value */
853 vfs_event_signal(NULL, VQ_UPDATE, (intptr_t)NULL);
854 lck_rw_done(&mp->mnt_rwlock);
855 is_rwlock_locked = FALSE;
856 if (!error)
857 enablequotas(mp, ctx);
858 goto exit;
859 }
860
861 /*
862 * Put the new filesystem on the mount list after root.
863 */
864 if (error == 0) {
865 struct vfs_attr vfsattr;
866 #if CONFIG_MACF
867 if (vfs_flags(mp) & MNT_MULTILABEL) {
868 error = VFS_ROOT(mp, &rvp, ctx);
869 if (error) {
870 printf("%s() VFS_ROOT returned %d\n", __func__, error);
871 goto out3;
872 }
873 error = vnode_label(mp, NULL, rvp, NULL, 0, ctx);
874 /*
875 * drop reference provided by VFS_ROOT
876 */
877 vnode_put(rvp);
878
879 if (error)
880 goto out3;
881 }
882 #endif /* MAC */
883
884 vnode_lock_spin(vp);
885 CLR(vp->v_flag, VMOUNT);
886 vp->v_mountedhere = mp;
887 vnode_unlock(vp);
888
889 /*
890 * taking the name_cache_lock exclusively will
891 * insure that everyone is out of the fast path who
892 * might be trying to use a now stale copy of
893 * vp->v_mountedhere->mnt_realrootvp
894 * bumping mount_generation causes the cached values
895 * to be invalidated
896 */
897 name_cache_lock();
898 mount_generation++;
899 name_cache_unlock();
900
901 error = vnode_ref(vp);
902 if (error != 0) {
903 goto out4;
904 }
905
906 have_usecount = TRUE;
907
908 error = checkdirs(vp, ctx);
909 if (error != 0) {
910 /* Unmount the filesystem as cdir/rdirs cannot be updated */
911 goto out4;
912 }
913 /*
914 * there is no cleanup code here so I have made it void
915 * we need to revisit this
916 */
917 (void)VFS_START(mp, 0, ctx);
918
919 if (mount_list_add(mp) != 0) {
920 /*
921 * The system is shutting down trying to umount
922 * everything, so fail with a plausible errno.
923 */
924 error = EBUSY;
925 goto out4;
926 }
927 lck_rw_done(&mp->mnt_rwlock);
928 is_rwlock_locked = FALSE;
929
930 /* Check if this mounted file system supports EAs or named streams. */
931 /* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */
932 VFSATTR_INIT(&vfsattr);
933 VFSATTR_WANTED(&vfsattr, f_capabilities);
934 if (strncmp(mp->mnt_vfsstat.f_fstypename, "webdav", sizeof("webdav")) != 0 &&
935 vfs_getattr(mp, &vfsattr, ctx) == 0 &&
936 VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) {
937 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR) &&
938 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR)) {
939 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
940 }
941 #if NAMEDSTREAMS
942 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS) &&
943 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS)) {
944 mp->mnt_kern_flag |= MNTK_NAMED_STREAMS;
945 }
946 #endif
947 /* Check if this file system supports path from id lookups. */
948 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID) &&
949 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID)) {
950 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
951 } else if (mp->mnt_flag & MNT_DOVOLFS) {
952 /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */
953 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
954 }
955 }
956 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSNATIVEXATTR) {
957 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
958 }
959 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSPREFLIGHT) {
960 mp->mnt_kern_flag |= MNTK_UNMOUNT_PREFLIGHT;
961 }
962 /* increment the operations count */
963 OSAddAtomic(1, &vfs_nummntops);
964 enablequotas(mp, ctx);
965
966 if (device_vnode) {
967 device_vnode->v_specflags |= SI_MOUNTEDON;
968
969 /*
970 * cache the IO attributes for the underlying physical media...
971 * an error return indicates the underlying driver doesn't
972 * support all the queries necessary... however, reasonable
973 * defaults will have been set, so no reason to bail or care
974 */
975 vfs_init_io_attributes(device_vnode, mp);
976 }
977
978 /* Now that mount is setup, notify the listeners */
979 vfs_notify_mount(pvp);
980 } else {
981 /* If we fail a fresh mount, there should be no vnodes left hooked into the mountpoint. */
982 if (mp->mnt_vnodelist.tqh_first != NULL) {
983 panic("mount_common(): mount of %s filesystem failed with %d, but vnode list is not empty.",
984 mp->mnt_vtable->vfc_name, error);
985 }
986
987 vnode_lock_spin(vp);
988 CLR(vp->v_flag, VMOUNT);
989 vnode_unlock(vp);
990 mount_list_lock();
991 mp->mnt_vtable->vfc_refcount--;
992 mount_list_unlock();
993
994 if (device_vnode ) {
995 vnode_rele(device_vnode);
996 VNOP_CLOSE(device_vnode, ronly ? FREAD : FREAD|FWRITE, ctx);
997 }
998 lck_rw_done(&mp->mnt_rwlock);
999 is_rwlock_locked = FALSE;
1000
1001 /*
1002 * if we get here, we have a mount structure that needs to be freed,
1003 * but since the coveredvp hasn't yet been updated to point at it,
1004 * no need to worry about other threads holding a crossref on this mp
1005 * so it's ok to just free it
1006 */
1007 mount_lock_destroy(mp);
1008 #if CONFIG_MACF
1009 mac_mount_label_destroy(mp);
1010 #endif
1011 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1012 }
1013 exit:
1014 /*
1015 * drop I/O count on the device vp if there was one
1016 */
1017 if (devpath && devvp)
1018 vnode_put(devvp);
1019
1020 return(error);
1021
1022 /* Error condition exits */
1023 out4:
1024 (void)VFS_UNMOUNT(mp, MNT_FORCE, ctx);
1025
1026 /*
1027 * If the mount has been placed on the covered vp,
1028 * it may have been discovered by now, so we have
1029 * to treat this just like an unmount
1030 */
1031 mount_lock_spin(mp);
1032 mp->mnt_lflag |= MNT_LDEAD;
1033 mount_unlock(mp);
1034
1035 if (device_vnode != NULLVP) {
1036 vnode_rele(device_vnode);
1037 VNOP_CLOSE(device_vnode, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
1038 ctx);
1039 did_rele = TRUE;
1040 }
1041
1042 vnode_lock_spin(vp);
1043
1044 mp->mnt_crossref++;
1045 vp->v_mountedhere = (mount_t) 0;
1046
1047 vnode_unlock(vp);
1048
1049 if (have_usecount) {
1050 vnode_rele(vp);
1051 }
1052 out3:
1053 if (devpath && ((flags & MNT_UPDATE) == 0) && (!did_rele))
1054 vnode_rele(devvp);
1055 out2:
1056 if (devpath && devvp)
1057 vnode_put(devvp);
1058 out1:
1059 /* Release mnt_rwlock only when it was taken */
1060 if (is_rwlock_locked == TRUE) {
1061 lck_rw_done(&mp->mnt_rwlock);
1062 }
1063
1064 if (mntalloc) {
1065 if (mp->mnt_crossref)
1066 mount_dropcrossref(mp, vp, 0);
1067 else {
1068 mount_lock_destroy(mp);
1069 #if CONFIG_MACF
1070 mac_mount_label_destroy(mp);
1071 #endif
1072 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1073 }
1074 }
1075 if (vfsp_ref) {
1076 mount_list_lock();
1077 vfsp->vfc_refcount--;
1078 mount_list_unlock();
1079 }
1080
1081 return(error);
1082 }
1083
1084 /*
1085 * Flush in-core data, check for competing mount attempts,
1086 * and set VMOUNT
1087 */
1088 int
1089 prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth)
1090 {
1091 #if !CONFIG_MACF
1092 #pragma unused(cnp,fsname)
1093 #endif
1094 struct vnode_attr va;
1095 int error;
1096
1097 if (!skip_auth) {
1098 /*
1099 * If the user is not root, ensure that they own the directory
1100 * onto which we are attempting to mount.
1101 */
1102 VATTR_INIT(&va);
1103 VATTR_WANTED(&va, va_uid);
1104 if ((error = vnode_getattr(vp, &va, ctx)) ||
1105 (va.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
1106 (!vfs_context_issuser(ctx)))) {
1107 error = EPERM;
1108 goto out;
1109 }
1110 }
1111
1112 if ( (error = VNOP_FSYNC(vp, MNT_WAIT, ctx)) )
1113 goto out;
1114
1115 if ( (error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0)) )
1116 goto out;
1117
1118 if (vp->v_type != VDIR) {
1119 error = ENOTDIR;
1120 goto out;
1121 }
1122
1123 if (ISSET(vp->v_flag, VMOUNT) && (vp->v_mountedhere != NULL)) {
1124 error = EBUSY;
1125 goto out;
1126 }
1127
1128 #if CONFIG_MACF
1129 error = mac_mount_check_mount(ctx, vp,
1130 cnp, fsname);
1131 if (error != 0)
1132 goto out;
1133 #endif
1134
1135 vnode_lock_spin(vp);
1136 SET(vp->v_flag, VMOUNT);
1137 vnode_unlock(vp);
1138
1139 out:
1140 return error;
1141 }
1142
1143 #if CONFIG_IMGSRC_ACCESS
1144
1145 #if DEBUG
1146 #define IMGSRC_DEBUG(args...) printf(args)
1147 #else
1148 #define IMGSRC_DEBUG(args...) do { } while(0)
1149 #endif
1150
1151 static int
1152 authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx)
1153 {
1154 struct nameidata nd;
1155 vnode_t vp, realdevvp;
1156 mode_t accessmode;
1157 int error;
1158
1159 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW, UIO_USERSPACE, devpath, ctx);
1160 if ( (error = namei(&nd)) ) {
1161 IMGSRC_DEBUG("namei() failed with %d\n", error);
1162 return error;
1163 }
1164
1165 vp = nd.ni_vp;
1166
1167 if (!vnode_isblk(vp)) {
1168 IMGSRC_DEBUG("Not block device.\n");
1169 error = ENOTBLK;
1170 goto out;
1171 }
1172
1173 realdevvp = mp->mnt_devvp;
1174 if (realdevvp == NULLVP) {
1175 IMGSRC_DEBUG("No device backs the mount.\n");
1176 error = ENXIO;
1177 goto out;
1178 }
1179
1180 error = vnode_getwithref(realdevvp);
1181 if (error != 0) {
1182 IMGSRC_DEBUG("Coudn't get iocount on device.\n");
1183 goto out;
1184 }
1185
1186 if (vnode_specrdev(vp) != vnode_specrdev(realdevvp)) {
1187 IMGSRC_DEBUG("Wrong dev_t.\n");
1188 error = ENXIO;
1189 goto out1;
1190 }
1191
1192 strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
1193
1194 /*
1195 * If mount by non-root, then verify that user has necessary
1196 * permissions on the device.
1197 */
1198 if (!vfs_context_issuser(ctx)) {
1199 accessmode = KAUTH_VNODE_READ_DATA;
1200 if ((mp->mnt_flag & MNT_RDONLY) == 0)
1201 accessmode |= KAUTH_VNODE_WRITE_DATA;
1202 if ((error = vnode_authorize(vp, NULL, accessmode, ctx)) != 0) {
1203 IMGSRC_DEBUG("Access denied.\n");
1204 goto out1;
1205 }
1206 }
1207
1208 *devvpp = vp;
1209
1210 out1:
1211 vnode_put(realdevvp);
1212 out:
1213 nameidone(&nd);
1214 if (error) {
1215 vnode_put(vp);
1216 }
1217
1218 return error;
1219 }
1220
1221 /*
1222 * Clear VMOUNT, set v_mountedhere, and mnt_vnodecovered, ref the vnode,
1223 * and call checkdirs()
1224 */
1225 static int
1226 place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx)
1227 {
1228 int error;
1229
1230 mp->mnt_vnodecovered = vp; /* XXX This is normally only set at init-time ... */
1231
1232 vnode_lock_spin(vp);
1233 CLR(vp->v_flag, VMOUNT);
1234 vp->v_mountedhere = mp;
1235 vnode_unlock(vp);
1236
1237 /*
1238 * taking the name_cache_lock exclusively will
1239 * insure that everyone is out of the fast path who
1240 * might be trying to use a now stale copy of
1241 * vp->v_mountedhere->mnt_realrootvp
1242 * bumping mount_generation causes the cached values
1243 * to be invalidated
1244 */
1245 name_cache_lock();
1246 mount_generation++;
1247 name_cache_unlock();
1248
1249 error = vnode_ref(vp);
1250 if (error != 0) {
1251 goto out;
1252 }
1253
1254 error = checkdirs(vp, ctx);
1255 if (error != 0) {
1256 /* Unmount the filesystem as cdir/rdirs cannot be updated */
1257 vnode_rele(vp);
1258 goto out;
1259 }
1260
1261 out:
1262 if (error != 0) {
1263 mp->mnt_vnodecovered = NULLVP;
1264 }
1265 return error;
1266 }
1267
1268 static void
1269 undo_place_on_covered_vp(mount_t mp, vnode_t vp)
1270 {
1271 vnode_rele(vp);
1272 vnode_lock_spin(vp);
1273 vp->v_mountedhere = (mount_t)NULL;
1274 vnode_unlock(vp);
1275
1276 mp->mnt_vnodecovered = NULLVP;
1277 }
1278
1279 static int
1280 mount_begin_update(mount_t mp, vfs_context_t ctx, int flags)
1281 {
1282 int error;
1283
1284 /* unmount in progress return error */
1285 mount_lock_spin(mp);
1286 if (mp->mnt_lflag & MNT_LUNMOUNT) {
1287 mount_unlock(mp);
1288 return EBUSY;
1289 }
1290 mount_unlock(mp);
1291 lck_rw_lock_exclusive(&mp->mnt_rwlock);
1292
1293 /*
1294 * We only allow the filesystem to be reloaded if it
1295 * is currently mounted read-only.
1296 */
1297 if ((flags & MNT_RELOAD) &&
1298 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
1299 error = ENOTSUP;
1300 goto out;
1301 }
1302
1303 /*
1304 * Only root, or the user that did the original mount is
1305 * permitted to update it.
1306 */
1307 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
1308 (!vfs_context_issuser(ctx))) {
1309 error = EPERM;
1310 goto out;
1311 }
1312 #if CONFIG_MACF
1313 error = mac_mount_check_remount(ctx, mp);
1314 if (error != 0) {
1315 goto out;
1316 }
1317 #endif
1318
1319 out:
1320 if (error) {
1321 lck_rw_done(&mp->mnt_rwlock);
1322 }
1323
1324 return error;
1325 }
1326
1327 static void
1328 mount_end_update(mount_t mp)
1329 {
1330 lck_rw_done(&mp->mnt_rwlock);
1331 }
1332
1333 static int
1334 get_imgsrc_rootvnode(uint32_t height, vnode_t *rvpp)
1335 {
1336 vnode_t vp;
1337
1338 if (height >= MAX_IMAGEBOOT_NESTING) {
1339 return EINVAL;
1340 }
1341
1342 vp = imgsrc_rootvnodes[height];
1343 if ((vp != NULLVP) && (vnode_get(vp) == 0)) {
1344 *rvpp = vp;
1345 return 0;
1346 } else {
1347 return ENOENT;
1348 }
1349 }
1350
1351 static int
1352 relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp,
1353 const char *fsname, vfs_context_t ctx,
1354 boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index)
1355 {
1356 int error;
1357 mount_t mp;
1358 boolean_t placed = FALSE;
1359 vnode_t devvp = NULLVP;
1360 struct vfstable *vfsp;
1361 user_addr_t devpath;
1362 char *old_mntonname;
1363 vnode_t rvp;
1364 uint32_t height;
1365 uint32_t flags;
1366
1367 /* If we didn't imageboot, nothing to move */
1368 if (imgsrc_rootvnodes[0] == NULLVP) {
1369 return EINVAL;
1370 }
1371
1372 /* Only root can do this */
1373 if (!vfs_context_issuser(ctx)) {
1374 return EPERM;
1375 }
1376
1377 IMGSRC_DEBUG("looking for root vnode.\n");
1378
1379 /*
1380 * Get root vnode of filesystem we're moving.
1381 */
1382 if (by_index) {
1383 if (is64bit) {
1384 struct user64_mnt_imgsrc_args mia64;
1385 error = copyin(fsmountargs, &mia64, sizeof(mia64));
1386 if (error != 0) {
1387 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1388 return error;
1389 }
1390
1391 height = mia64.mi_height;
1392 flags = mia64.mi_flags;
1393 devpath = mia64.mi_devpath;
1394 } else {
1395 struct user32_mnt_imgsrc_args mia32;
1396 error = copyin(fsmountargs, &mia32, sizeof(mia32));
1397 if (error != 0) {
1398 IMGSRC_DEBUG("Failed to copy in arguments.\n");
1399 return error;
1400 }
1401
1402 height = mia32.mi_height;
1403 flags = mia32.mi_flags;
1404 devpath = mia32.mi_devpath;
1405 }
1406 } else {
1407 /*
1408 * For binary compatibility--assumes one level of nesting.
1409 */
1410 if (is64bit) {
1411 if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
1412 return error;
1413 } else {
1414 user32_addr_t tmp;
1415 if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
1416 return error;
1417
1418 /* munge into LP64 addr */
1419 devpath = CAST_USER_ADDR_T(tmp);
1420 }
1421
1422 height = 0;
1423 flags = 0;
1424 }
1425
1426 if (flags != 0) {
1427 IMGSRC_DEBUG("%s: Got nonzero flags.\n", __FUNCTION__);
1428 return EINVAL;
1429 }
1430
1431 error = get_imgsrc_rootvnode(height, &rvp);
1432 if (error != 0) {
1433 IMGSRC_DEBUG("getting root vnode failed with %d\n", error);
1434 return error;
1435 }
1436
1437 IMGSRC_DEBUG("got root vnode.\n");
1438
1439 MALLOC(old_mntonname, char*, MAXPATHLEN, M_TEMP, M_WAITOK);
1440
1441 /* Can only move once */
1442 mp = vnode_mount(rvp);
1443 if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
1444 IMGSRC_DEBUG("Already moved.\n");
1445 error = EBUSY;
1446 goto out0;
1447 }
1448
1449 IMGSRC_DEBUG("Starting updated.\n");
1450
1451 /* Get exclusive rwlock on mount, authorize update on mp */
1452 error = mount_begin_update(mp , ctx, 0);
1453 if (error != 0) {
1454 IMGSRC_DEBUG("Starting updated failed with %d\n", error);
1455 goto out0;
1456 }
1457
1458 /*
1459 * It can only be moved once. Flag is set under the rwlock,
1460 * so we're now safe to proceed.
1461 */
1462 if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
1463 IMGSRC_DEBUG("Already moved [2]\n");
1464 goto out1;
1465 }
1466
1467
1468 IMGSRC_DEBUG("Preparing coveredvp.\n");
1469
1470 /* Mark covered vnode as mount in progress, authorize placing mount on top */
1471 error = prepare_coveredvp(vp, ctx, cnp, fsname, FALSE);
1472 if (error != 0) {
1473 IMGSRC_DEBUG("Preparing coveredvp failed with %d.\n", error);
1474 goto out1;
1475 }
1476
1477 IMGSRC_DEBUG("Covered vp OK.\n");
1478
1479 /* Sanity check the name caller has provided */
1480 vfsp = mp->mnt_vtable;
1481 if (strncmp(vfsp->vfc_name, fsname, MFSNAMELEN) != 0) {
1482 IMGSRC_DEBUG("Wrong fs name.\n");
1483 error = EINVAL;
1484 goto out2;
1485 }
1486
1487 /* Check the device vnode and update mount-from name, for local filesystems */
1488 if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
1489 IMGSRC_DEBUG("Local, doing device validation.\n");
1490
1491 if (devpath != USER_ADDR_NULL) {
1492 error = authorize_devpath_and_update_mntfromname(mp, devpath, &devvp, ctx);
1493 if (error) {
1494 IMGSRC_DEBUG("authorize_devpath_and_update_mntfromname() failed.\n");
1495 goto out2;
1496 }
1497
1498 vnode_put(devvp);
1499 }
1500 }
1501
1502 /*
1503 * Place mp on top of vnode, ref the vnode, call checkdirs(),
1504 * and increment the name cache's mount generation
1505 */
1506
1507 IMGSRC_DEBUG("About to call place_mount_and_checkdirs().\n");
1508 error = place_mount_and_checkdirs(mp, vp, ctx);
1509 if (error != 0) {
1510 goto out2;
1511 }
1512
1513 placed = TRUE;
1514
1515 strncpy(old_mntonname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN);
1516 strncpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
1517
1518 /* Forbid future moves */
1519 mount_lock(mp);
1520 mp->mnt_kern_flag |= MNTK_HAS_MOVED;
1521 mount_unlock(mp);
1522
1523 /* Finally, add to mount list, completely ready to go */
1524 if (mount_list_add(mp) != 0) {
1525 /*
1526 * The system is shutting down trying to umount
1527 * everything, so fail with a plausible errno.
1528 */
1529 error = EBUSY;
1530 goto out3;
1531 }
1532
1533 mount_end_update(mp);
1534 vnode_put(rvp);
1535 FREE(old_mntonname, M_TEMP);
1536
1537 vfs_notify_mount(pvp);
1538
1539 return 0;
1540 out3:
1541 strncpy(mp->mnt_vfsstat.f_mntonname, old_mntonname, MAXPATHLEN);
1542
1543 mount_lock(mp);
1544 mp->mnt_kern_flag &= ~(MNTK_HAS_MOVED);
1545 mount_unlock(mp);
1546
1547 out2:
1548 /*
1549 * Placing the mp on the vnode clears VMOUNT,
1550 * so cleanup is different after that point
1551 */
1552 if (placed) {
1553 /* Rele the vp, clear VMOUNT and v_mountedhere */
1554 undo_place_on_covered_vp(mp, vp);
1555 } else {
1556 vnode_lock_spin(vp);
1557 CLR(vp->v_flag, VMOUNT);
1558 vnode_unlock(vp);
1559 }
1560 out1:
1561 mount_end_update(mp);
1562
1563 out0:
1564 vnode_put(rvp);
1565 FREE(old_mntonname, M_TEMP);
1566 return error;
1567 }
1568
1569 #endif /* CONFIG_IMGSRC_ACCESS */
1570
1571 void
1572 enablequotas(struct mount *mp, vfs_context_t ctx)
1573 {
1574 struct nameidata qnd;
1575 int type;
1576 char qfpath[MAXPATHLEN];
1577 const char *qfname = QUOTAFILENAME;
1578 const char *qfopsname = QUOTAOPSNAME;
1579 const char *qfextension[] = INITQFNAMES;
1580
1581 /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
1582 if (strncmp(mp->mnt_vfsstat.f_fstypename, "hfs", sizeof("hfs")) != 0 ) {
1583 return;
1584 }
1585 /*
1586 * Enable filesystem disk quotas if necessary.
1587 * We ignore errors as this should not interfere with final mount
1588 */
1589 for (type=0; type < MAXQUOTAS; type++) {
1590 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfopsname, qfextension[type]);
1591 NDINIT(&qnd, LOOKUP, OP_MOUNT, FOLLOW, UIO_SYSSPACE,
1592 CAST_USER_ADDR_T(qfpath), ctx);
1593 if (namei(&qnd) != 0)
1594 continue; /* option file to trigger quotas is not present */
1595 vnode_put(qnd.ni_vp);
1596 nameidone(&qnd);
1597 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfname, qfextension[type]);
1598
1599 (void) VFS_QUOTACTL(mp, QCMD(Q_QUOTAON, type), 0, qfpath, ctx);
1600 }
1601 return;
1602 }
1603
1604
1605 static int
1606 checkdirs_callback(proc_t p, void * arg)
1607 {
1608 struct cdirargs * cdrp = (struct cdirargs * )arg;
1609 vnode_t olddp = cdrp->olddp;
1610 vnode_t newdp = cdrp->newdp;
1611 struct filedesc *fdp;
1612 vnode_t tvp;
1613 vnode_t fdp_cvp;
1614 vnode_t fdp_rvp;
1615 int cdir_changed = 0;
1616 int rdir_changed = 0;
1617
1618 /*
1619 * XXX Also needs to iterate each thread in the process to see if it
1620 * XXX is using a per-thread current working directory, and, if so,
1621 * XXX update that as well.
1622 */
1623
1624 proc_fdlock(p);
1625 fdp = p->p_fd;
1626 if (fdp == (struct filedesc *)0) {
1627 proc_fdunlock(p);
1628 return(PROC_RETURNED);
1629 }
1630 fdp_cvp = fdp->fd_cdir;
1631 fdp_rvp = fdp->fd_rdir;
1632 proc_fdunlock(p);
1633
1634 if (fdp_cvp == olddp) {
1635 vnode_ref(newdp);
1636 tvp = fdp->fd_cdir;
1637 fdp_cvp = newdp;
1638 cdir_changed = 1;
1639 vnode_rele(tvp);
1640 }
1641 if (fdp_rvp == olddp) {
1642 vnode_ref(newdp);
1643 tvp = fdp->fd_rdir;
1644 fdp_rvp = newdp;
1645 rdir_changed = 1;
1646 vnode_rele(tvp);
1647 }
1648 if (cdir_changed || rdir_changed) {
1649 proc_fdlock(p);
1650 fdp->fd_cdir = fdp_cvp;
1651 fdp->fd_rdir = fdp_rvp;
1652 proc_fdunlock(p);
1653 }
1654 return(PROC_RETURNED);
1655 }
1656
1657
1658
1659 /*
1660 * Scan all active processes to see if any of them have a current
1661 * or root directory onto which the new filesystem has just been
1662 * mounted. If so, replace them with the new mount point.
1663 */
1664 static int
1665 checkdirs(vnode_t olddp, vfs_context_t ctx)
1666 {
1667 vnode_t newdp;
1668 vnode_t tvp;
1669 int err;
1670 struct cdirargs cdr;
1671
1672 if (olddp->v_usecount == 1)
1673 return(0);
1674 err = VFS_ROOT(olddp->v_mountedhere, &newdp, ctx);
1675
1676 if (err != 0) {
1677 #if DIAGNOSTIC
1678 panic("mount: lost mount: error %d", err);
1679 #endif
1680 return(err);
1681 }
1682
1683 cdr.olddp = olddp;
1684 cdr.newdp = newdp;
1685 /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
1686 proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS, checkdirs_callback, (void *)&cdr, NULL, NULL);
1687
1688 if (rootvnode == olddp) {
1689 vnode_ref(newdp);
1690 tvp = rootvnode;
1691 rootvnode = newdp;
1692 vnode_rele(tvp);
1693 }
1694
1695 vnode_put(newdp);
1696 return(0);
1697 }
1698
1699 /*
1700 * Unmount a file system.
1701 *
1702 * Note: unmount takes a path to the vnode mounted on as argument,
1703 * not special file (as before).
1704 */
1705 /* ARGSUSED */
1706 int
1707 unmount(__unused proc_t p, struct unmount_args *uap, __unused int32_t *retval)
1708 {
1709 vnode_t vp;
1710 struct mount *mp;
1711 int error;
1712 struct nameidata nd;
1713 vfs_context_t ctx = vfs_context_current();
1714
1715 NDINIT(&nd, LOOKUP, OP_UNMOUNT, FOLLOW | AUDITVNPATH1,
1716 UIO_USERSPACE, uap->path, ctx);
1717 error = namei(&nd);
1718 if (error)
1719 return (error);
1720 vp = nd.ni_vp;
1721 mp = vp->v_mount;
1722 nameidone(&nd);
1723
1724 #if CONFIG_MACF
1725 error = mac_mount_check_umount(ctx, mp);
1726 if (error != 0) {
1727 vnode_put(vp);
1728 return (error);
1729 }
1730 #endif
1731 /*
1732 * Must be the root of the filesystem
1733 */
1734 if ((vp->v_flag & VROOT) == 0) {
1735 vnode_put(vp);
1736 return (EINVAL);
1737 }
1738 mount_ref(mp, 0);
1739 vnode_put(vp);
1740 /* safedounmount consumes the mount ref */
1741 return (safedounmount(mp, uap->flags, ctx));
1742 }
1743
1744 int
1745 vfs_unmountbyfsid(fsid_t * fsid, int flags, vfs_context_t ctx)
1746 {
1747 mount_t mp;
1748
1749 mp = mount_list_lookupby_fsid(fsid, 0, 1);
1750 if (mp == (mount_t)0) {
1751 return(ENOENT);
1752 }
1753 mount_ref(mp, 0);
1754 mount_iterdrop(mp);
1755 /* safedounmount consumes the mount ref */
1756 return(safedounmount(mp, flags, ctx));
1757 }
1758
1759
1760 /*
1761 * The mount struct comes with a mount ref which will be consumed.
1762 * Do the actual file system unmount, prevent some common foot shooting.
1763 */
1764 int
1765 safedounmount(struct mount *mp, int flags, vfs_context_t ctx)
1766 {
1767 int error;
1768 proc_t p = vfs_context_proc(ctx);
1769
1770 /*
1771 * If the file system is not responding and MNT_NOBLOCK
1772 * is set and not a forced unmount then return EBUSY.
1773 */
1774 if ((mp->mnt_kern_flag & MNT_LNOTRESP) &&
1775 (flags & MNT_NOBLOCK) && ((flags & MNT_FORCE) == 0)) {
1776 error = EBUSY;
1777 goto out;
1778 }
1779
1780 /*
1781 * Skip authorization if the mount is tagged as permissive and
1782 * this is not a forced-unmount attempt.
1783 */
1784 if (!(((mp->mnt_kern_flag & MNTK_PERMIT_UNMOUNT) != 0) && ((flags & MNT_FORCE) == 0))) {
1785 /*
1786 * Only root, or the user that did the original mount is
1787 * permitted to unmount this filesystem.
1788 */
1789 if ((mp->mnt_vfsstat.f_owner != kauth_cred_getuid(kauth_cred_get())) &&
1790 (error = suser(kauth_cred_get(), &p->p_acflag)))
1791 goto out;
1792 }
1793 /*
1794 * Don't allow unmounting the root file system.
1795 */
1796 if (mp->mnt_flag & MNT_ROOTFS) {
1797 error = EBUSY; /* the root is always busy */
1798 goto out;
1799 }
1800
1801 #ifdef CONFIG_IMGSRC_ACCESS
1802 if (mp->mnt_kern_flag & MNTK_BACKS_ROOT) {
1803 error = EBUSY;
1804 goto out;
1805 }
1806 #endif /* CONFIG_IMGSRC_ACCESS */
1807
1808 return (dounmount(mp, flags, 1, ctx));
1809
1810 out:
1811 mount_drop(mp, 0);
1812 return(error);
1813 }
1814
1815 /*
1816 * Do the actual file system unmount.
1817 */
1818 int
1819 dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx)
1820 {
1821 vnode_t coveredvp = (vnode_t)0;
1822 int error;
1823 int needwakeup = 0;
1824 int forcedunmount = 0;
1825 int lflags = 0;
1826 struct vnode *devvp = NULLVP;
1827 #if CONFIG_TRIGGERS
1828 proc_t p = vfs_context_proc(ctx);
1829 int did_vflush = 0;
1830 int pflags_save = 0;
1831 #endif /* CONFIG_TRIGGERS */
1832
1833 mount_lock(mp);
1834
1835 /*
1836 * If already an unmount in progress just return EBUSY.
1837 * Even a forced unmount cannot override.
1838 */
1839 if (mp->mnt_lflag & MNT_LUNMOUNT) {
1840 if (withref != 0)
1841 mount_drop(mp, 1);
1842 mount_unlock(mp);
1843 return (EBUSY);
1844 }
1845
1846 if (flags & MNT_FORCE) {
1847 forcedunmount = 1;
1848 mp->mnt_lflag |= MNT_LFORCE;
1849 }
1850
1851 #if CONFIG_TRIGGERS
1852 if (flags & MNT_NOBLOCK && p != kernproc)
1853 pflags_save = OSBitOrAtomic(P_NOREMOTEHANG, &p->p_flag);
1854 #endif
1855
1856 mp->mnt_kern_flag |= MNTK_UNMOUNT;
1857 mp->mnt_lflag |= MNT_LUNMOUNT;
1858 mp->mnt_flag &=~ MNT_ASYNC;
1859 /*
1860 * anyone currently in the fast path that
1861 * trips over the cached rootvp will be
1862 * dumped out and forced into the slow path
1863 * to regenerate a new cached value
1864 */
1865 mp->mnt_realrootvp = NULLVP;
1866 mount_unlock(mp);
1867
1868 if (forcedunmount && (flags & MNT_LNOSUB) == 0) {
1869 /*
1870 * Force unmount any mounts in this filesystem.
1871 * If any unmounts fail - just leave them dangling.
1872 * Avoids recursion.
1873 */
1874 (void) dounmount_submounts(mp, flags | MNT_LNOSUB, ctx);
1875 }
1876
1877 /*
1878 * taking the name_cache_lock exclusively will
1879 * insure that everyone is out of the fast path who
1880 * might be trying to use a now stale copy of
1881 * vp->v_mountedhere->mnt_realrootvp
1882 * bumping mount_generation causes the cached values
1883 * to be invalidated
1884 */
1885 name_cache_lock();
1886 mount_generation++;
1887 name_cache_unlock();
1888
1889
1890 lck_rw_lock_exclusive(&mp->mnt_rwlock);
1891 if (withref != 0)
1892 mount_drop(mp, 0);
1893 #if CONFIG_FSE
1894 fsevent_unmount(mp); /* has to come first! */
1895 #endif
1896 error = 0;
1897 if (forcedunmount == 0) {
1898 ubc_umount(mp); /* release cached vnodes */
1899 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
1900 error = VFS_SYNC(mp, MNT_WAIT, ctx);
1901 if (error) {
1902 mount_lock(mp);
1903 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1904 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1905 mp->mnt_lflag &= ~MNT_LFORCE;
1906 goto out;
1907 }
1908 }
1909 }
1910
1911 #if CONFIG_TRIGGERS
1912 vfs_nested_trigger_unmounts(mp, flags, ctx);
1913 did_vflush = 1;
1914 #endif
1915 if (forcedunmount)
1916 lflags |= FORCECLOSE;
1917 error = vflush(mp, NULLVP, SKIPSWAP | SKIPSYSTEM | SKIPROOT | lflags);
1918 if ((forcedunmount == 0) && error) {
1919 mount_lock(mp);
1920 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1921 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1922 mp->mnt_lflag &= ~MNT_LFORCE;
1923 goto out;
1924 }
1925
1926 /* make sure there are no one in the mount iterations or lookup */
1927 mount_iterdrain(mp);
1928
1929 error = VFS_UNMOUNT(mp, flags, ctx);
1930 if (error) {
1931 mount_iterreset(mp);
1932 mount_lock(mp);
1933 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1934 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1935 mp->mnt_lflag &= ~MNT_LFORCE;
1936 goto out;
1937 }
1938
1939 /* increment the operations count */
1940 if (!error)
1941 OSAddAtomic(1, &vfs_nummntops);
1942
1943 if ( mp->mnt_devvp && mp->mnt_vtable->vfc_vfsflags & VFC_VFSLOCALARGS) {
1944 /* hold an io reference and drop the usecount before close */
1945 devvp = mp->mnt_devvp;
1946 vnode_getalways(devvp);
1947 vnode_rele(devvp);
1948 VNOP_CLOSE(devvp, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
1949 ctx);
1950 vnode_clearmountedon(devvp);
1951 vnode_put(devvp);
1952 }
1953 lck_rw_done(&mp->mnt_rwlock);
1954 mount_list_remove(mp);
1955 lck_rw_lock_exclusive(&mp->mnt_rwlock);
1956
1957 /* mark the mount point hook in the vp but not drop the ref yet */
1958 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
1959 /*
1960 * The covered vnode needs special handling. Trying to get an
1961 * iocount must not block here as this may lead to deadlocks
1962 * if the Filesystem to which the covered vnode belongs is
1963 * undergoing forced unmounts. Since we hold a usecount, the
1964 * vnode cannot be reused (it can, however, still be terminated)
1965 */
1966 vnode_getalways(coveredvp);
1967 vnode_lock_spin(coveredvp);
1968
1969 mp->mnt_crossref++;
1970 coveredvp->v_mountedhere = (struct mount *)0;
1971 CLR(coveredvp->v_flag, VMOUNT);
1972
1973 vnode_unlock(coveredvp);
1974 vnode_put(coveredvp);
1975 }
1976
1977 mount_list_lock();
1978 mp->mnt_vtable->vfc_refcount--;
1979 mount_list_unlock();
1980
1981 cache_purgevfs(mp); /* remove cache entries for this file sys */
1982 vfs_event_signal(NULL, VQ_UNMOUNT, (intptr_t)NULL);
1983 mount_lock(mp);
1984 mp->mnt_lflag |= MNT_LDEAD;
1985
1986 if (mp->mnt_lflag & MNT_LWAIT) {
1987 /*
1988 * do the wakeup here
1989 * in case we block in mount_refdrain
1990 * which will drop the mount lock
1991 * and allow anyone blocked in vfs_busy
1992 * to wakeup and see the LDEAD state
1993 */
1994 mp->mnt_lflag &= ~MNT_LWAIT;
1995 wakeup((caddr_t)mp);
1996 }
1997 mount_refdrain(mp);
1998 out:
1999 if (mp->mnt_lflag & MNT_LWAIT) {
2000 mp->mnt_lflag &= ~MNT_LWAIT;
2001 needwakeup = 1;
2002 }
2003
2004 #if CONFIG_TRIGGERS
2005 if (flags & MNT_NOBLOCK && p != kernproc) {
2006 // Restore P_NOREMOTEHANG bit to its previous value
2007 if ((pflags_save & P_NOREMOTEHANG) == 0)
2008 OSBitAndAtomic(~((uint32_t) P_NOREMOTEHANG), &p->p_flag);
2009 }
2010
2011 /*
2012 * Callback and context are set together under the mount lock, and
2013 * never cleared, so we're safe to examine them here, drop the lock,
2014 * and call out.
2015 */
2016 if (mp->mnt_triggercallback != NULL) {
2017 mount_unlock(mp);
2018 if (error == 0) {
2019 mp->mnt_triggercallback(mp, VTC_RELEASE, mp->mnt_triggerdata, ctx);
2020 } else if (did_vflush) {
2021 mp->mnt_triggercallback(mp, VTC_REPLACE, mp->mnt_triggerdata, ctx);
2022 }
2023 } else {
2024 mount_unlock(mp);
2025 }
2026 #else
2027 mount_unlock(mp);
2028 #endif /* CONFIG_TRIGGERS */
2029
2030 lck_rw_done(&mp->mnt_rwlock);
2031
2032 if (needwakeup)
2033 wakeup((caddr_t)mp);
2034
2035 if (!error) {
2036 if ((coveredvp != NULLVP)) {
2037 vnode_t pvp = NULLVP;
2038
2039 /*
2040 * The covered vnode needs special handling. Trying to
2041 * get an iocount must not block here as this may lead
2042 * to deadlocks if the Filesystem to which the covered
2043 * vnode belongs is undergoing forced unmounts. Since we
2044 * hold a usecount, the vnode cannot be reused
2045 * (it can, however, still be terminated).
2046 */
2047 vnode_getalways(coveredvp);
2048
2049 mount_dropcrossref(mp, coveredvp, 0);
2050 /*
2051 * We'll _try_ to detect if this really needs to be
2052 * done. The coveredvp can only be in termination (or
2053 * terminated) if the coveredvp's mount point is in a
2054 * forced unmount (or has been) since we still hold the
2055 * ref.
2056 */
2057 if (!vnode_isrecycled(coveredvp)) {
2058 pvp = vnode_getparent(coveredvp);
2059 #if CONFIG_TRIGGERS
2060 if (coveredvp->v_resolve) {
2061 vnode_trigger_rearm(coveredvp, ctx);
2062 }
2063 #endif
2064 }
2065
2066 vnode_rele(coveredvp);
2067 vnode_put(coveredvp);
2068 coveredvp = NULLVP;
2069
2070 if (pvp) {
2071 lock_vnode_and_post(pvp, NOTE_WRITE);
2072 vnode_put(pvp);
2073 }
2074 } else if (mp->mnt_flag & MNT_ROOTFS) {
2075 mount_lock_destroy(mp);
2076 #if CONFIG_MACF
2077 mac_mount_label_destroy(mp);
2078 #endif
2079 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
2080 } else
2081 panic("dounmount: no coveredvp");
2082 }
2083 return (error);
2084 }
2085
2086 /*
2087 * Unmount any mounts in this filesystem.
2088 */
2089 void
2090 dounmount_submounts(struct mount *mp, int flags, vfs_context_t ctx)
2091 {
2092 mount_t smp;
2093 fsid_t *fsids, fsid;
2094 int fsids_sz;
2095 int count = 0, i, m = 0;
2096 vnode_t vp;
2097
2098 mount_list_lock();
2099
2100 // Get an array to hold the submounts fsids.
2101 TAILQ_FOREACH(smp, &mountlist, mnt_list)
2102 count++;
2103 fsids_sz = count * sizeof(fsid_t);
2104 MALLOC(fsids, fsid_t *, fsids_sz, M_TEMP, M_NOWAIT);
2105 if (fsids == NULL) {
2106 mount_list_unlock();
2107 goto out;
2108 }
2109 fsids[0] = mp->mnt_vfsstat.f_fsid; // Prime the pump
2110
2111 /*
2112 * Fill the array with submount fsids.
2113 * Since mounts are always added to the tail of the mount list, the
2114 * list is always in mount order.
2115 * For each mount check if the mounted-on vnode belongs to a
2116 * mount that's already added to our array of mounts to be unmounted.
2117 */
2118 for (smp = TAILQ_NEXT(mp, mnt_list); smp; smp = TAILQ_NEXT(smp, mnt_list)) {
2119 vp = smp->mnt_vnodecovered;
2120 if (vp == NULL)
2121 continue;
2122 fsid = vnode_mount(vp)->mnt_vfsstat.f_fsid; // Underlying fsid
2123 for (i = 0; i <= m; i++) {
2124 if (fsids[i].val[0] == fsid.val[0] &&
2125 fsids[i].val[1] == fsid.val[1]) {
2126 fsids[++m] = smp->mnt_vfsstat.f_fsid;
2127 break;
2128 }
2129 }
2130 }
2131 mount_list_unlock();
2132
2133 // Unmount the submounts in reverse order. Ignore errors.
2134 for (i = m; i > 0; i--) {
2135 smp = mount_list_lookupby_fsid(&fsids[i], 0, 1);
2136 if (smp) {
2137 mount_ref(smp, 0);
2138 mount_iterdrop(smp);
2139 (void) dounmount(smp, flags, 1, ctx);
2140 }
2141 }
2142 out:
2143 if (fsids)
2144 FREE(fsids, M_TEMP);
2145 }
2146
2147 void
2148 mount_dropcrossref(mount_t mp, vnode_t dp, int need_put)
2149 {
2150 vnode_lock(dp);
2151 mp->mnt_crossref--;
2152
2153 if (mp->mnt_crossref < 0)
2154 panic("mount cross refs -ve");
2155
2156 if ((mp != dp->v_mountedhere) && (mp->mnt_crossref == 0)) {
2157
2158 if (need_put)
2159 vnode_put_locked(dp);
2160 vnode_unlock(dp);
2161
2162 mount_lock_destroy(mp);
2163 #if CONFIG_MACF
2164 mac_mount_label_destroy(mp);
2165 #endif
2166 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
2167 return;
2168 }
2169 if (need_put)
2170 vnode_put_locked(dp);
2171 vnode_unlock(dp);
2172 }
2173
2174
2175 /*
2176 * Sync each mounted filesystem.
2177 */
2178 #if DIAGNOSTIC
2179 int syncprt = 0;
2180 #endif
2181
2182 int print_vmpage_stat=0;
2183 int sync_timeout = 60; // Sync time limit (sec)
2184
2185 static int
2186 sync_callback(mount_t mp, __unused void *arg)
2187 {
2188 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
2189 int asyncflag = mp->mnt_flag & MNT_ASYNC;
2190
2191 mp->mnt_flag &= ~MNT_ASYNC;
2192 VFS_SYNC(mp, arg ? MNT_WAIT : MNT_NOWAIT, vfs_context_kernel());
2193 if (asyncflag)
2194 mp->mnt_flag |= MNT_ASYNC;
2195 }
2196
2197 return (VFS_RETURNED);
2198 }
2199
2200 /* ARGSUSED */
2201 int
2202 sync(__unused proc_t p, __unused struct sync_args *uap, __unused int32_t *retval)
2203 {
2204 vfs_iterate(LK_NOWAIT, sync_callback, NULL);
2205
2206 if (print_vmpage_stat) {
2207 vm_countdirtypages();
2208 }
2209
2210 #if DIAGNOSTIC
2211 if (syncprt)
2212 vfs_bufstats();
2213 #endif /* DIAGNOSTIC */
2214 return 0;
2215 }
2216
2217 static void
2218 sync_thread(void *arg, __unused wait_result_t wr)
2219 {
2220 int *timeout = (int *) arg;
2221
2222 vfs_iterate(LK_NOWAIT, sync_callback, NULL);
2223
2224 if (timeout)
2225 wakeup((caddr_t) timeout);
2226 if (print_vmpage_stat) {
2227 vm_countdirtypages();
2228 }
2229
2230 #if DIAGNOSTIC
2231 if (syncprt)
2232 vfs_bufstats();
2233 #endif /* DIAGNOSTIC */
2234 }
2235
2236 /*
2237 * Sync in a separate thread so we can time out if it blocks.
2238 */
2239 static int
2240 sync_async(int timeout)
2241 {
2242 thread_t thd;
2243 int error;
2244 struct timespec ts = {timeout, 0};
2245
2246 lck_mtx_lock(sync_mtx_lck);
2247 if (kernel_thread_start(sync_thread, &timeout, &thd) != KERN_SUCCESS) {
2248 printf("sync_thread failed\n");
2249 lck_mtx_unlock(sync_mtx_lck);
2250 return (0);
2251 }
2252
2253 error = msleep((caddr_t) &timeout, sync_mtx_lck, (PVFS | PDROP | PCATCH), "sync_thread", &ts);
2254 if (error) {
2255 printf("sync timed out: %d sec\n", timeout);
2256 }
2257 thread_deallocate(thd);
2258
2259 return (0);
2260 }
2261
2262 /*
2263 * An in-kernel sync for power management to call.
2264 */
2265 __private_extern__ int
2266 sync_internal(void)
2267 {
2268 (void) sync_async(sync_timeout);
2269
2270 return 0;
2271 } /* end of sync_internal call */
2272
2273 /*
2274 * Change filesystem quotas.
2275 */
2276 #if QUOTA
2277 int
2278 quotactl(proc_t p, struct quotactl_args *uap, __unused int32_t *retval)
2279 {
2280 struct mount *mp;
2281 int error, quota_cmd, quota_status;
2282 caddr_t datap;
2283 size_t fnamelen;
2284 struct nameidata nd;
2285 vfs_context_t ctx = vfs_context_current();
2286 struct dqblk my_dqblk;
2287
2288 AUDIT_ARG(uid, uap->uid);
2289 AUDIT_ARG(cmd, uap->cmd);
2290 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
2291 uap->path, ctx);
2292 error = namei(&nd);
2293 if (error)
2294 return (error);
2295 mp = nd.ni_vp->v_mount;
2296 vnode_put(nd.ni_vp);
2297 nameidone(&nd);
2298
2299 /* copyin any data we will need for downstream code */
2300 quota_cmd = uap->cmd >> SUBCMDSHIFT;
2301
2302 switch (quota_cmd) {
2303 case Q_QUOTAON:
2304 /* uap->arg specifies a file from which to take the quotas */
2305 fnamelen = MAXPATHLEN;
2306 datap = kalloc(MAXPATHLEN);
2307 error = copyinstr(uap->arg, datap, MAXPATHLEN, &fnamelen);
2308 break;
2309 case Q_GETQUOTA:
2310 /* uap->arg is a pointer to a dqblk structure. */
2311 datap = (caddr_t) &my_dqblk;
2312 break;
2313 case Q_SETQUOTA:
2314 case Q_SETUSE:
2315 /* uap->arg is a pointer to a dqblk structure. */
2316 datap = (caddr_t) &my_dqblk;
2317 if (proc_is64bit(p)) {
2318 struct user_dqblk my_dqblk64;
2319 error = copyin(uap->arg, (caddr_t)&my_dqblk64, sizeof (my_dqblk64));
2320 if (error == 0) {
2321 munge_dqblk(&my_dqblk, &my_dqblk64, FALSE);
2322 }
2323 }
2324 else {
2325 error = copyin(uap->arg, (caddr_t)&my_dqblk, sizeof (my_dqblk));
2326 }
2327 break;
2328 case Q_QUOTASTAT:
2329 /* uap->arg is a pointer to an integer */
2330 datap = (caddr_t) &quota_status;
2331 break;
2332 default:
2333 datap = NULL;
2334 break;
2335 } /* switch */
2336
2337 if (error == 0) {
2338 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, datap, ctx);
2339 }
2340
2341 switch (quota_cmd) {
2342 case Q_QUOTAON:
2343 if (datap != NULL)
2344 kfree(datap, MAXPATHLEN);
2345 break;
2346 case Q_GETQUOTA:
2347 /* uap->arg is a pointer to a dqblk structure we need to copy out to */
2348 if (error == 0) {
2349 if (proc_is64bit(p)) {
2350 struct user_dqblk my_dqblk64 = {.dqb_bhardlimit = 0};
2351 munge_dqblk(&my_dqblk, &my_dqblk64, TRUE);
2352 error = copyout((caddr_t)&my_dqblk64, uap->arg, sizeof (my_dqblk64));
2353 }
2354 else {
2355 error = copyout(datap, uap->arg, sizeof (struct dqblk));
2356 }
2357 }
2358 break;
2359 case Q_QUOTASTAT:
2360 /* uap->arg is a pointer to an integer */
2361 if (error == 0) {
2362 error = copyout(datap, uap->arg, sizeof(quota_status));
2363 }
2364 break;
2365 default:
2366 break;
2367 } /* switch */
2368
2369 return (error);
2370 }
2371 #else
2372 int
2373 quotactl(__unused proc_t p, __unused struct quotactl_args *uap, __unused int32_t *retval)
2374 {
2375 return (EOPNOTSUPP);
2376 }
2377 #endif /* QUOTA */
2378
2379 /*
2380 * Get filesystem statistics.
2381 *
2382 * Returns: 0 Success
2383 * namei:???
2384 * vfs_update_vfsstat:???
2385 * munge_statfs:EFAULT
2386 */
2387 /* ARGSUSED */
2388 int
2389 statfs(__unused proc_t p, struct statfs_args *uap, __unused int32_t *retval)
2390 {
2391 struct mount *mp;
2392 struct vfsstatfs *sp;
2393 int error;
2394 struct nameidata nd;
2395 vfs_context_t ctx = vfs_context_current();
2396 vnode_t vp;
2397
2398 NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1,
2399 UIO_USERSPACE, uap->path, ctx);
2400 error = namei(&nd);
2401 if (error)
2402 return (error);
2403 vp = nd.ni_vp;
2404 mp = vp->v_mount;
2405 sp = &mp->mnt_vfsstat;
2406 nameidone(&nd);
2407
2408 error = vfs_update_vfsstat(mp, ctx, VFS_USER_EVENT);
2409 if (error != 0) {
2410 vnode_put(vp);
2411 return (error);
2412 }
2413
2414 error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
2415 vnode_put(vp);
2416 return (error);
2417 }
2418
2419 /*
2420 * Get filesystem statistics.
2421 */
2422 /* ARGSUSED */
2423 int
2424 fstatfs(__unused proc_t p, struct fstatfs_args *uap, __unused int32_t *retval)
2425 {
2426 vnode_t vp;
2427 struct mount *mp;
2428 struct vfsstatfs *sp;
2429 int error;
2430
2431 AUDIT_ARG(fd, uap->fd);
2432
2433 if ( (error = file_vnode(uap->fd, &vp)) )
2434 return (error);
2435
2436 error = vnode_getwithref(vp);
2437 if (error) {
2438 file_drop(uap->fd);
2439 return (error);
2440 }
2441
2442 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2443
2444 mp = vp->v_mount;
2445 if (!mp) {
2446 error = EBADF;
2447 goto out;
2448 }
2449 sp = &mp->mnt_vfsstat;
2450 if ((error = vfs_update_vfsstat(mp,vfs_context_current(),VFS_USER_EVENT)) != 0) {
2451 goto out;
2452 }
2453
2454 error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
2455
2456 out:
2457 file_drop(uap->fd);
2458 vnode_put(vp);
2459
2460 return (error);
2461 }
2462
2463 /*
2464 * Common routine to handle copying of statfs64 data to user space
2465 */
2466 static int
2467 statfs64_common(struct mount *mp, struct vfsstatfs *sfsp, user_addr_t bufp)
2468 {
2469 int error;
2470 struct statfs64 sfs;
2471
2472 bzero(&sfs, sizeof(sfs));
2473
2474 sfs.f_bsize = sfsp->f_bsize;
2475 sfs.f_iosize = (int32_t)sfsp->f_iosize;
2476 sfs.f_blocks = sfsp->f_blocks;
2477 sfs.f_bfree = sfsp->f_bfree;
2478 sfs.f_bavail = sfsp->f_bavail;
2479 sfs.f_files = sfsp->f_files;
2480 sfs.f_ffree = sfsp->f_ffree;
2481 sfs.f_fsid = sfsp->f_fsid;
2482 sfs.f_owner = sfsp->f_owner;
2483 sfs.f_type = mp->mnt_vtable->vfc_typenum;
2484 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
2485 sfs.f_fssubtype = sfsp->f_fssubtype;
2486 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
2487 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN);
2488 } else {
2489 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSTYPENAMELEN);
2490 }
2491 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MAXPATHLEN);
2492 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MAXPATHLEN);
2493
2494 error = copyout((caddr_t)&sfs, bufp, sizeof(sfs));
2495
2496 return(error);
2497 }
2498
2499 /*
2500 * Get file system statistics in 64-bit mode
2501 */
2502 int
2503 statfs64(__unused struct proc *p, struct statfs64_args *uap, __unused int32_t *retval)
2504 {
2505 struct mount *mp;
2506 struct vfsstatfs *sp;
2507 int error;
2508 struct nameidata nd;
2509 vfs_context_t ctxp = vfs_context_current();
2510 vnode_t vp;
2511
2512 NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1,
2513 UIO_USERSPACE, uap->path, ctxp);
2514 error = namei(&nd);
2515 if (error)
2516 return (error);
2517 vp = nd.ni_vp;
2518 mp = vp->v_mount;
2519 sp = &mp->mnt_vfsstat;
2520 nameidone(&nd);
2521
2522 error = vfs_update_vfsstat(mp, ctxp, VFS_USER_EVENT);
2523 if (error != 0) {
2524 vnode_put(vp);
2525 return (error);
2526 }
2527
2528 error = statfs64_common(mp, sp, uap->buf);
2529 vnode_put(vp);
2530
2531 return (error);
2532 }
2533
2534 /*
2535 * Get file system statistics in 64-bit mode
2536 */
2537 int
2538 fstatfs64(__unused struct proc *p, struct fstatfs64_args *uap, __unused int32_t *retval)
2539 {
2540 struct vnode *vp;
2541 struct mount *mp;
2542 struct vfsstatfs *sp;
2543 int error;
2544
2545 AUDIT_ARG(fd, uap->fd);
2546
2547 if ( (error = file_vnode(uap->fd, &vp)) )
2548 return (error);
2549
2550 error = vnode_getwithref(vp);
2551 if (error) {
2552 file_drop(uap->fd);
2553 return (error);
2554 }
2555
2556 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
2557
2558 mp = vp->v_mount;
2559 if (!mp) {
2560 error = EBADF;
2561 goto out;
2562 }
2563 sp = &mp->mnt_vfsstat;
2564 if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
2565 goto out;
2566 }
2567
2568 error = statfs64_common(mp, sp, uap->buf);
2569
2570 out:
2571 file_drop(uap->fd);
2572 vnode_put(vp);
2573
2574 return (error);
2575 }
2576
2577 struct getfsstat_struct {
2578 user_addr_t sfsp;
2579 user_addr_t *mp;
2580 int count;
2581 int maxcount;
2582 int flags;
2583 int error;
2584 };
2585
2586
2587 static int
2588 getfsstat_callback(mount_t mp, void * arg)
2589 {
2590
2591 struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
2592 struct vfsstatfs *sp;
2593 int error, my_size;
2594 vfs_context_t ctx = vfs_context_current();
2595
2596 if (fstp->sfsp && fstp->count < fstp->maxcount) {
2597 sp = &mp->mnt_vfsstat;
2598 /*
2599 * If MNT_NOWAIT is specified, do not refresh the
2600 * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT.
2601 */
2602 if (((fstp->flags & MNT_NOWAIT) == 0 || (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
2603 (error = vfs_update_vfsstat(mp, ctx,
2604 VFS_USER_EVENT))) {
2605 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
2606 return(VFS_RETURNED);
2607 }
2608
2609 /*
2610 * Need to handle LP64 version of struct statfs
2611 */
2612 error = munge_statfs(mp, sp, fstp->sfsp, &my_size, IS_64BIT_PROCESS(vfs_context_proc(ctx)), FALSE);
2613 if (error) {
2614 fstp->error = error;
2615 return(VFS_RETURNED_DONE);
2616 }
2617 fstp->sfsp += my_size;
2618
2619 if (fstp->mp) {
2620 #if CONFIG_MACF
2621 error = mac_mount_label_get(mp, *fstp->mp);
2622 if (error) {
2623 fstp->error = error;
2624 return(VFS_RETURNED_DONE);
2625 }
2626 #endif
2627 fstp->mp++;
2628 }
2629 }
2630 fstp->count++;
2631 return(VFS_RETURNED);
2632 }
2633
2634 /*
2635 * Get statistics on all filesystems.
2636 */
2637 int
2638 getfsstat(__unused proc_t p, struct getfsstat_args *uap, int *retval)
2639 {
2640 struct __mac_getfsstat_args muap;
2641
2642 muap.buf = uap->buf;
2643 muap.bufsize = uap->bufsize;
2644 muap.mac = USER_ADDR_NULL;
2645 muap.macsize = 0;
2646 muap.flags = uap->flags;
2647
2648 return (__mac_getfsstat(p, &muap, retval));
2649 }
2650
2651 /*
2652 * __mac_getfsstat: Get MAC-related file system statistics
2653 *
2654 * Parameters: p (ignored)
2655 * uap User argument descriptor (see below)
2656 * retval Count of file system statistics (N stats)
2657 *
2658 * Indirect: uap->bufsize Buffer size
2659 * uap->macsize MAC info size
2660 * uap->buf Buffer where information will be returned
2661 * uap->mac MAC info
2662 * uap->flags File system flags
2663 *
2664 *
2665 * Returns: 0 Success
2666 * !0 Not success
2667 *
2668 */
2669 int
2670 __mac_getfsstat(__unused proc_t p, struct __mac_getfsstat_args *uap, int *retval)
2671 {
2672 user_addr_t sfsp;
2673 user_addr_t *mp;
2674 size_t count, maxcount, bufsize, macsize;
2675 struct getfsstat_struct fst;
2676
2677 bufsize = (size_t) uap->bufsize;
2678 macsize = (size_t) uap->macsize;
2679
2680 if (IS_64BIT_PROCESS(p)) {
2681 maxcount = bufsize / sizeof(struct user64_statfs);
2682 }
2683 else {
2684 maxcount = bufsize / sizeof(struct user32_statfs);
2685 }
2686 sfsp = uap->buf;
2687 count = 0;
2688
2689 mp = NULL;
2690
2691 #if CONFIG_MACF
2692 if (uap->mac != USER_ADDR_NULL) {
2693 u_int32_t *mp0;
2694 int error;
2695 unsigned int i;
2696
2697 count = (macsize / (IS_64BIT_PROCESS(p) ? 8 : 4));
2698 if (count != maxcount)
2699 return (EINVAL);
2700
2701 /* Copy in the array */
2702 MALLOC(mp0, u_int32_t *, macsize, M_MACTEMP, M_WAITOK);
2703 if (mp0 == NULL) {
2704 return (ENOMEM);
2705 }
2706
2707 error = copyin(uap->mac, mp0, macsize);
2708 if (error) {
2709 FREE(mp0, M_MACTEMP);
2710 return (error);
2711 }
2712
2713 /* Normalize to an array of user_addr_t */
2714 MALLOC(mp, user_addr_t *, count * sizeof(user_addr_t), M_MACTEMP, M_WAITOK);
2715 if (mp == NULL) {
2716 FREE(mp0, M_MACTEMP);
2717 return (ENOMEM);
2718 }
2719
2720 for (i = 0; i < count; i++) {
2721 if (IS_64BIT_PROCESS(p))
2722 mp[i] = ((user_addr_t *)mp0)[i];
2723 else
2724 mp[i] = (user_addr_t)mp0[i];
2725 }
2726 FREE(mp0, M_MACTEMP);
2727 }
2728 #endif
2729
2730
2731 fst.sfsp = sfsp;
2732 fst.mp = mp;
2733 fst.flags = uap->flags;
2734 fst.count = 0;
2735 fst.error = 0;
2736 fst.maxcount = maxcount;
2737
2738
2739 vfs_iterate(0, getfsstat_callback, &fst);
2740
2741 if (mp)
2742 FREE(mp, M_MACTEMP);
2743
2744 if (fst.error ) {
2745 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
2746 return(fst.error);
2747 }
2748
2749 if (fst.sfsp && fst.count > fst.maxcount)
2750 *retval = fst.maxcount;
2751 else
2752 *retval = fst.count;
2753 return (0);
2754 }
2755
2756 static int
2757 getfsstat64_callback(mount_t mp, void * arg)
2758 {
2759 struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
2760 struct vfsstatfs *sp;
2761 int error;
2762
2763 if (fstp->sfsp && fstp->count < fstp->maxcount) {
2764 sp = &mp->mnt_vfsstat;
2765 /*
2766 * If MNT_NOWAIT is specified, do not refresh the fsstat
2767 * cache. MNT_WAIT overrides MNT_NOWAIT.
2768 *
2769 * We treat MNT_DWAIT as MNT_WAIT for all instances of
2770 * getfsstat, since the constants are out of the same
2771 * namespace.
2772 */
2773 if (((fstp->flags & MNT_NOWAIT) == 0 ||
2774 (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
2775 (error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT))) {
2776 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
2777 return(VFS_RETURNED);
2778 }
2779
2780 error = statfs64_common(mp, sp, fstp->sfsp);
2781 if (error) {
2782 fstp->error = error;
2783 return(VFS_RETURNED_DONE);
2784 }
2785 fstp->sfsp += sizeof(struct statfs64);
2786 }
2787 fstp->count++;
2788 return(VFS_RETURNED);
2789 }
2790
2791 /*
2792 * Get statistics on all file systems in 64 bit mode.
2793 */
2794 int
2795 getfsstat64(__unused proc_t p, struct getfsstat64_args *uap, int *retval)
2796 {
2797 user_addr_t sfsp;
2798 int count, maxcount;
2799 struct getfsstat_struct fst;
2800
2801 maxcount = uap->bufsize / sizeof(struct statfs64);
2802
2803 sfsp = uap->buf;
2804 count = 0;
2805
2806 fst.sfsp = sfsp;
2807 fst.flags = uap->flags;
2808 fst.count = 0;
2809 fst.error = 0;
2810 fst.maxcount = maxcount;
2811
2812 vfs_iterate(0, getfsstat64_callback, &fst);
2813
2814 if (fst.error ) {
2815 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
2816 return(fst.error);
2817 }
2818
2819 if (fst.sfsp && fst.count > fst.maxcount)
2820 *retval = fst.maxcount;
2821 else
2822 *retval = fst.count;
2823
2824 return (0);
2825 }
2826
2827 /*
2828 * gets the associated vnode with the file descriptor passed.
2829 * as input
2830 *
2831 * INPUT
2832 * ctx - vfs context of caller
2833 * fd - file descriptor for which vnode is required.
2834 * vpp - Pointer to pointer to vnode to be returned.
2835 *
2836 * The vnode is returned with an iocount so any vnode obtained
2837 * by this call needs a vnode_put
2838 *
2839 */
2840 static int
2841 vnode_getfromfd(vfs_context_t ctx, int fd, vnode_t *vpp)
2842 {
2843 int error;
2844 vnode_t vp;
2845 struct fileproc *fp;
2846 proc_t p = vfs_context_proc(ctx);
2847
2848 *vpp = NULLVP;
2849
2850 error = fp_getfvp(p, fd, &fp, &vp);
2851 if (error)
2852 return (error);
2853
2854 error = vnode_getwithref(vp);
2855 if (error) {
2856 (void)fp_drop(p, fd, fp, 0);
2857 return (error);
2858 }
2859
2860 (void)fp_drop(p, fd, fp, 0);
2861 *vpp = vp;
2862 return (error);
2863 }
2864
2865 /*
2866 * Wrapper function around namei to start lookup from a directory
2867 * specified by a file descriptor ni_dirfd.
2868 *
2869 * In addition to all the errors returned by namei, this call can
2870 * return ENOTDIR if the file descriptor does not refer to a directory.
2871 * and EBADF if the file descriptor is not valid.
2872 */
2873 int
2874 nameiat(struct nameidata *ndp, int dirfd)
2875 {
2876 if ((dirfd != AT_FDCWD) &&
2877 !(ndp->ni_flag & NAMEI_CONTLOOKUP) &&
2878 !(ndp->ni_cnd.cn_flags & USEDVP)) {
2879 int error = 0;
2880 char c;
2881
2882 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
2883 error = copyin(ndp->ni_dirp, &c, sizeof(char));
2884 if (error)
2885 return (error);
2886 } else {
2887 c = *((char *)(ndp->ni_dirp));
2888 }
2889
2890 if (c != '/') {
2891 vnode_t dvp_at;
2892
2893 error = vnode_getfromfd(ndp->ni_cnd.cn_context, dirfd,
2894 &dvp_at);
2895 if (error)
2896 return (error);
2897
2898 if (vnode_vtype(dvp_at) != VDIR) {
2899 vnode_put(dvp_at);
2900 return (ENOTDIR);
2901 }
2902
2903 ndp->ni_dvp = dvp_at;
2904 ndp->ni_cnd.cn_flags |= USEDVP;
2905 error = namei(ndp);
2906 ndp->ni_cnd.cn_flags &= ~USEDVP;
2907 vnode_put(dvp_at);
2908 return (error);
2909 }
2910 }
2911
2912 return (namei(ndp));
2913 }
2914
2915 /*
2916 * Change current working directory to a given file descriptor.
2917 */
2918 /* ARGSUSED */
2919 static int
2920 common_fchdir(proc_t p, struct fchdir_args *uap, int per_thread)
2921 {
2922 struct filedesc *fdp = p->p_fd;
2923 vnode_t vp;
2924 vnode_t tdp;
2925 vnode_t tvp;
2926 struct mount *mp;
2927 int error;
2928 vfs_context_t ctx = vfs_context_current();
2929
2930 AUDIT_ARG(fd, uap->fd);
2931 if (per_thread && uap->fd == -1) {
2932 /*
2933 * Switching back from per-thread to per process CWD; verify we
2934 * in fact have one before proceeding. The only success case
2935 * for this code path is to return 0 preemptively after zapping
2936 * the thread structure contents.
2937 */
2938 thread_t th = vfs_context_thread(ctx);
2939 if (th) {
2940 uthread_t uth = get_bsdthread_info(th);
2941 tvp = uth->uu_cdir;
2942 uth->uu_cdir = NULLVP;
2943 if (tvp != NULLVP) {
2944 vnode_rele(tvp);
2945 return (0);
2946 }
2947 }
2948 return (EBADF);
2949 }
2950
2951 if ( (error = file_vnode(uap->fd, &vp)) )
2952 return(error);
2953 if ( (error = vnode_getwithref(vp)) ) {
2954 file_drop(uap->fd);
2955 return(error);
2956 }
2957
2958 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
2959
2960 if (vp->v_type != VDIR) {
2961 error = ENOTDIR;
2962 goto out;
2963 }
2964
2965 #if CONFIG_MACF
2966 error = mac_vnode_check_chdir(ctx, vp);
2967 if (error)
2968 goto out;
2969 #endif
2970 error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
2971 if (error)
2972 goto out;
2973
2974 while (!error && (mp = vp->v_mountedhere) != NULL) {
2975 if (vfs_busy(mp, LK_NOWAIT)) {
2976 error = EACCES;
2977 goto out;
2978 }
2979 error = VFS_ROOT(mp, &tdp, ctx);
2980 vfs_unbusy(mp);
2981 if (error)
2982 break;
2983 vnode_put(vp);
2984 vp = tdp;
2985 }
2986 if (error)
2987 goto out;
2988 if ( (error = vnode_ref(vp)) )
2989 goto out;
2990 vnode_put(vp);
2991
2992 if (per_thread) {
2993 thread_t th = vfs_context_thread(ctx);
2994 if (th) {
2995 uthread_t uth = get_bsdthread_info(th);
2996 tvp = uth->uu_cdir;
2997 uth->uu_cdir = vp;
2998 OSBitOrAtomic(P_THCWD, &p->p_flag);
2999 } else {
3000 vnode_rele(vp);
3001 return (ENOENT);
3002 }
3003 } else {
3004 proc_fdlock(p);
3005 tvp = fdp->fd_cdir;
3006 fdp->fd_cdir = vp;
3007 proc_fdunlock(p);
3008 }
3009
3010 if (tvp)
3011 vnode_rele(tvp);
3012 file_drop(uap->fd);
3013
3014 return (0);
3015 out:
3016 vnode_put(vp);
3017 file_drop(uap->fd);
3018
3019 return(error);
3020 }
3021
3022 int
3023 fchdir(proc_t p, struct fchdir_args *uap, __unused int32_t *retval)
3024 {
3025 return common_fchdir(p, uap, 0);
3026 }
3027
3028 int
3029 __pthread_fchdir(proc_t p, struct __pthread_fchdir_args *uap, __unused int32_t *retval)
3030 {
3031 return common_fchdir(p, (void *)uap, 1);
3032 }
3033
3034 /*
3035 * Change current working directory (".").
3036 *
3037 * Returns: 0 Success
3038 * change_dir:ENOTDIR
3039 * change_dir:???
3040 * vnode_ref:ENOENT No such file or directory
3041 */
3042 /* ARGSUSED */
3043 static int
3044 common_chdir(proc_t p, struct chdir_args *uap, int per_thread)
3045 {
3046 struct filedesc *fdp = p->p_fd;
3047 int error;
3048 struct nameidata nd;
3049 vnode_t tvp;
3050 vfs_context_t ctx = vfs_context_current();
3051
3052 NDINIT(&nd, LOOKUP, OP_CHDIR, FOLLOW | AUDITVNPATH1,
3053 UIO_USERSPACE, uap->path, ctx);
3054 error = change_dir(&nd, ctx);
3055 if (error)
3056 return (error);
3057 if ( (error = vnode_ref(nd.ni_vp)) ) {
3058 vnode_put(nd.ni_vp);
3059 return (error);
3060 }
3061 /*
3062 * drop the iocount we picked up in change_dir
3063 */
3064 vnode_put(nd.ni_vp);
3065
3066 if (per_thread) {
3067 thread_t th = vfs_context_thread(ctx);
3068 if (th) {
3069 uthread_t uth = get_bsdthread_info(th);
3070 tvp = uth->uu_cdir;
3071 uth->uu_cdir = nd.ni_vp;
3072 OSBitOrAtomic(P_THCWD, &p->p_flag);
3073 } else {
3074 vnode_rele(nd.ni_vp);
3075 return (ENOENT);
3076 }
3077 } else {
3078 proc_fdlock(p);
3079 tvp = fdp->fd_cdir;
3080 fdp->fd_cdir = nd.ni_vp;
3081 proc_fdunlock(p);
3082 }
3083
3084 if (tvp)
3085 vnode_rele(tvp);
3086
3087 return (0);
3088 }
3089
3090
3091 /*
3092 * chdir
3093 *
3094 * Change current working directory (".") for the entire process
3095 *
3096 * Parameters: p Process requesting the call
3097 * uap User argument descriptor (see below)
3098 * retval (ignored)
3099 *
3100 * Indirect parameters: uap->path Directory path
3101 *
3102 * Returns: 0 Success
3103 * common_chdir: ENOTDIR
3104 * common_chdir: ENOENT No such file or directory
3105 * common_chdir: ???
3106 *
3107 */
3108 int
3109 chdir(proc_t p, struct chdir_args *uap, __unused int32_t *retval)
3110 {
3111 return common_chdir(p, (void *)uap, 0);
3112 }
3113
3114 /*
3115 * __pthread_chdir
3116 *
3117 * Change current working directory (".") for a single thread
3118 *
3119 * Parameters: p Process requesting the call
3120 * uap User argument descriptor (see below)
3121 * retval (ignored)
3122 *
3123 * Indirect parameters: uap->path Directory path
3124 *
3125 * Returns: 0 Success
3126 * common_chdir: ENOTDIR
3127 * common_chdir: ENOENT No such file or directory
3128 * common_chdir: ???
3129 *
3130 */
3131 int
3132 __pthread_chdir(proc_t p, struct __pthread_chdir_args *uap, __unused int32_t *retval)
3133 {
3134 return common_chdir(p, (void *)uap, 1);
3135 }
3136
3137
3138 /*
3139 * Change notion of root (``/'') directory.
3140 */
3141 /* ARGSUSED */
3142 int
3143 chroot(proc_t p, struct chroot_args *uap, __unused int32_t *retval)
3144 {
3145 struct filedesc *fdp = p->p_fd;
3146 int error;
3147 struct nameidata nd;
3148 vnode_t tvp;
3149 vfs_context_t ctx = vfs_context_current();
3150
3151 if ((error = suser(kauth_cred_get(), &p->p_acflag)))
3152 return (error);
3153
3154 NDINIT(&nd, LOOKUP, OP_CHROOT, FOLLOW | AUDITVNPATH1,
3155 UIO_USERSPACE, uap->path, ctx);
3156 error = change_dir(&nd, ctx);
3157 if (error)
3158 return (error);
3159
3160 #if CONFIG_MACF
3161 error = mac_vnode_check_chroot(ctx, nd.ni_vp,
3162 &nd.ni_cnd);
3163 if (error) {
3164 vnode_put(nd.ni_vp);
3165 return (error);
3166 }
3167 #endif
3168
3169 if ( (error = vnode_ref(nd.ni_vp)) ) {
3170 vnode_put(nd.ni_vp);
3171 return (error);
3172 }
3173 vnode_put(nd.ni_vp);
3174
3175 proc_fdlock(p);
3176 tvp = fdp->fd_rdir;
3177 fdp->fd_rdir = nd.ni_vp;
3178 fdp->fd_flags |= FD_CHROOT;
3179 proc_fdunlock(p);
3180
3181 if (tvp != NULL)
3182 vnode_rele(tvp);
3183
3184 return (0);
3185 }
3186
3187 /*
3188 * Common routine for chroot and chdir.
3189 *
3190 * Returns: 0 Success
3191 * ENOTDIR Not a directory
3192 * namei:??? [anything namei can return]
3193 * vnode_authorize:??? [anything vnode_authorize can return]
3194 */
3195 static int
3196 change_dir(struct nameidata *ndp, vfs_context_t ctx)
3197 {
3198 vnode_t vp;
3199 int error;
3200
3201 if ((error = namei(ndp)))
3202 return (error);
3203 nameidone(ndp);
3204 vp = ndp->ni_vp;
3205
3206 if (vp->v_type != VDIR) {
3207 vnode_put(vp);
3208 return (ENOTDIR);
3209 }
3210
3211 #if CONFIG_MACF
3212 error = mac_vnode_check_chdir(ctx, vp);
3213 if (error) {
3214 vnode_put(vp);
3215 return (error);
3216 }
3217 #endif
3218
3219 error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
3220 if (error) {
3221 vnode_put(vp);
3222 return (error);
3223 }
3224
3225 return (error);
3226 }
3227
3228 /*
3229 * Free the vnode data (for directories) associated with the file glob.
3230 */
3231 struct fd_vn_data *
3232 fg_vn_data_alloc(void)
3233 {
3234 struct fd_vn_data *fvdata;
3235
3236 /* Allocate per fd vnode data */
3237 MALLOC(fvdata, struct fd_vn_data *, (sizeof(struct fd_vn_data)),
3238 M_FD_VN_DATA, M_WAITOK | M_ZERO);
3239 lck_mtx_init(&fvdata->fv_lock, fd_vn_lck_grp, fd_vn_lck_attr);
3240 return fvdata;
3241 }
3242
3243 /*
3244 * Free the vnode data (for directories) associated with the file glob.
3245 */
3246 void
3247 fg_vn_data_free(void *fgvndata)
3248 {
3249 struct fd_vn_data *fvdata = (struct fd_vn_data *)fgvndata;
3250
3251 if (fvdata->fv_buf)
3252 FREE(fvdata->fv_buf, M_FD_DIRBUF);
3253 lck_mtx_destroy(&fvdata->fv_lock, fd_vn_lck_grp);
3254 FREE(fvdata, M_FD_VN_DATA);
3255 }
3256
3257 /*
3258 * Check permissions, allocate an open file structure,
3259 * and call the device open routine if any.
3260 *
3261 * Returns: 0 Success
3262 * EINVAL
3263 * EINTR
3264 * falloc:ENFILE
3265 * falloc:EMFILE
3266 * falloc:ENOMEM
3267 * vn_open_auth:???
3268 * dupfdopen:???
3269 * VNOP_ADVLOCK:???
3270 * vnode_setsize:???
3271 *
3272 * XXX Need to implement uid, gid
3273 */
3274 int
3275 open1(vfs_context_t ctx, struct nameidata *ndp, int uflags,
3276 struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra,
3277 int32_t *retval)
3278 {
3279 proc_t p = vfs_context_proc(ctx);
3280 uthread_t uu = get_bsdthread_info(vfs_context_thread(ctx));
3281 struct fileproc *fp;
3282 vnode_t vp;
3283 int flags, oflags;
3284 int type, indx, error;
3285 struct flock lf;
3286 int no_controlling_tty = 0;
3287 int deny_controlling_tty = 0;
3288 struct session *sessp = SESSION_NULL;
3289
3290 oflags = uflags;
3291
3292 if ((oflags & O_ACCMODE) == O_ACCMODE)
3293 return(EINVAL);
3294 flags = FFLAGS(uflags);
3295
3296 AUDIT_ARG(fflags, oflags);
3297 AUDIT_ARG(mode, vap->va_mode);
3298
3299 if ((error = falloc_withalloc(p,
3300 &fp, &indx, ctx, fp_zalloc, cra)) != 0) {
3301 return (error);
3302 }
3303 uu->uu_dupfd = -indx - 1;
3304
3305 if (!(p->p_flag & P_CONTROLT)) {
3306 sessp = proc_session(p);
3307 no_controlling_tty = 1;
3308 /*
3309 * If conditions would warrant getting a controlling tty if
3310 * the device being opened is a tty (see ttyopen in tty.c),
3311 * but the open flags deny it, set a flag in the session to
3312 * prevent it.
3313 */
3314 if (SESS_LEADER(p, sessp) &&
3315 sessp->s_ttyvp == NULL &&
3316 (flags & O_NOCTTY)) {
3317 session_lock(sessp);
3318 sessp->s_flags |= S_NOCTTY;
3319 session_unlock(sessp);
3320 deny_controlling_tty = 1;
3321 }
3322 }
3323
3324 if ((error = vn_open_auth(ndp, &flags, vap))) {
3325 if ((error == ENODEV || error == ENXIO) && (uu->uu_dupfd >= 0)){ /* XXX from fdopen */
3326 if ((error = dupfdopen(p->p_fd, indx, uu->uu_dupfd, flags, error)) == 0) {
3327 fp_drop(p, indx, NULL, 0);
3328 *retval = indx;
3329 if (deny_controlling_tty) {
3330 session_lock(sessp);
3331 sessp->s_flags &= ~S_NOCTTY;
3332 session_unlock(sessp);
3333 }
3334 if (sessp != SESSION_NULL)
3335 session_rele(sessp);
3336 return (0);
3337 }
3338 }
3339 if (error == ERESTART)
3340 error = EINTR;
3341 fp_free(p, indx, fp);
3342
3343 if (deny_controlling_tty) {
3344 session_lock(sessp);
3345 sessp->s_flags &= ~S_NOCTTY;
3346 session_unlock(sessp);
3347 }
3348 if (sessp != SESSION_NULL)
3349 session_rele(sessp);
3350 return (error);
3351 }
3352 uu->uu_dupfd = 0;
3353 vp = ndp->ni_vp;
3354
3355 fp->f_fglob->fg_flag = flags & (FMASK | O_EVTONLY);
3356 fp->f_fglob->fg_ops = &vnops;
3357 fp->f_fglob->fg_data = (caddr_t)vp;
3358
3359 #if CONFIG_PROTECT
3360 if (VATTR_IS_ACTIVE (vap, va_dataprotect_flags)) {
3361 if (vap->va_dataprotect_flags & VA_DP_RAWENCRYPTED) {
3362 fp->f_fglob->fg_flag |= FENCRYPTED;
3363 }
3364 }
3365 #endif
3366
3367 if (flags & (O_EXLOCK | O_SHLOCK)) {
3368 lf.l_whence = SEEK_SET;
3369 lf.l_start = 0;
3370 lf.l_len = 0;
3371 if (flags & O_EXLOCK)
3372 lf.l_type = F_WRLCK;
3373 else
3374 lf.l_type = F_RDLCK;
3375 type = F_FLOCK;
3376 if ((flags & FNONBLOCK) == 0)
3377 type |= F_WAIT;
3378 #if CONFIG_MACF
3379 error = mac_file_check_lock(vfs_context_ucred(ctx), fp->f_fglob,
3380 F_SETLK, &lf);
3381 if (error)
3382 goto bad;
3383 #endif
3384 if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx, NULL)))
3385 goto bad;
3386 fp->f_fglob->fg_flag |= FHASLOCK;
3387 }
3388
3389 /* try to truncate by setting the size attribute */
3390 if ((flags & O_TRUNC) && ((error = vnode_setsize(vp, (off_t)0, 0, ctx)) != 0))
3391 goto bad;
3392
3393 /*
3394 * If the open flags denied the acquisition of a controlling tty,
3395 * clear the flag in the session structure that prevented the lower
3396 * level code from assigning one.
3397 */
3398 if (deny_controlling_tty) {
3399 session_lock(sessp);
3400 sessp->s_flags &= ~S_NOCTTY;
3401 session_unlock(sessp);
3402 }
3403
3404 /*
3405 * If a controlling tty was set by the tty line discipline, then we
3406 * want to set the vp of the tty into the session structure. We have
3407 * a race here because we can't get to the vp for the tp in ttyopen,
3408 * because it's not passed as a parameter in the open path.
3409 */
3410 if (no_controlling_tty && (p->p_flag & P_CONTROLT)) {
3411 vnode_t ttyvp;
3412
3413 session_lock(sessp);
3414 ttyvp = sessp->s_ttyvp;
3415 sessp->s_ttyvp = vp;
3416 sessp->s_ttyvid = vnode_vid(vp);
3417 session_unlock(sessp);
3418 }
3419
3420 /*
3421 * For directories we hold some additional information in the fd.
3422 */
3423 if (vnode_vtype(vp) == VDIR) {
3424 fp->f_fglob->fg_vn_data = fg_vn_data_alloc();
3425 } else {
3426 fp->f_fglob->fg_vn_data = NULL;
3427 }
3428
3429 vnode_put(vp);
3430
3431 proc_fdlock(p);
3432 if (flags & O_CLOEXEC)
3433 *fdflags(p, indx) |= UF_EXCLOSE;
3434 if (flags & O_CLOFORK)
3435 *fdflags(p, indx) |= UF_FORKCLOSE;
3436 procfdtbl_releasefd(p, indx, NULL);
3437 fp_drop(p, indx, fp, 1);
3438 proc_fdunlock(p);
3439
3440 *retval = indx;
3441
3442 if (sessp != SESSION_NULL)
3443 session_rele(sessp);
3444 return (0);
3445 bad:
3446 if (deny_controlling_tty) {
3447 session_lock(sessp);
3448 sessp->s_flags &= ~S_NOCTTY;
3449 session_unlock(sessp);
3450 }
3451 if (sessp != SESSION_NULL)
3452 session_rele(sessp);
3453
3454 struct vfs_context context = *vfs_context_current();
3455 context.vc_ucred = fp->f_fglob->fg_cred;
3456
3457 if ((fp->f_fglob->fg_flag & FHASLOCK) &&
3458 (FILEGLOB_DTYPE(fp->f_fglob) == DTYPE_VNODE)) {
3459 lf.l_whence = SEEK_SET;
3460 lf.l_start = 0;
3461 lf.l_len = 0;
3462 lf.l_type = F_UNLCK;
3463
3464 (void)VNOP_ADVLOCK(
3465 vp, (caddr_t)fp->f_fglob, F_UNLCK, &lf, F_FLOCK, ctx, NULL);
3466 }
3467
3468 vn_close(vp, fp->f_fglob->fg_flag, &context);
3469 vnode_put(vp);
3470 fp_free(p, indx, fp);
3471
3472 return (error);
3473 }
3474
3475 /*
3476 * While most of the *at syscall handlers can call nameiat() which
3477 * is a wrapper around namei, the use of namei and initialisation
3478 * of nameidata are far removed and in different functions - namei
3479 * gets called in vn_open_auth for open1. So we'll just do here what
3480 * nameiat() does.
3481 */
3482 static int
3483 open1at(vfs_context_t ctx, struct nameidata *ndp, int uflags,
3484 struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra, int32_t *retval,
3485 int dirfd)
3486 {
3487 if ((dirfd != AT_FDCWD) && !(ndp->ni_cnd.cn_flags & USEDVP)) {
3488 int error;
3489 char c;
3490
3491 if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
3492 error = copyin(ndp->ni_dirp, &c, sizeof(char));
3493 if (error)
3494 return (error);
3495 } else {
3496 c = *((char *)(ndp->ni_dirp));
3497 }
3498
3499 if (c != '/') {
3500 vnode_t dvp_at;
3501
3502 error = vnode_getfromfd(ndp->ni_cnd.cn_context, dirfd,
3503 &dvp_at);
3504 if (error)
3505 return (error);
3506
3507 if (vnode_vtype(dvp_at) != VDIR) {
3508 vnode_put(dvp_at);
3509 return (ENOTDIR);
3510 }
3511
3512 ndp->ni_dvp = dvp_at;
3513 ndp->ni_cnd.cn_flags |= USEDVP;
3514 error = open1(ctx, ndp, uflags, vap, fp_zalloc, cra,
3515 retval);
3516 vnode_put(dvp_at);
3517 return (error);
3518 }
3519 }
3520
3521 return (open1(ctx, ndp, uflags, vap, fp_zalloc, cra, retval));
3522 }
3523
3524 /*
3525 * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)).
3526 *
3527 * Parameters: p Process requesting the open
3528 * uap User argument descriptor (see below)
3529 * retval Pointer to an area to receive the
3530 * return calue from the system call
3531 *
3532 * Indirect: uap->path Path to open (same as 'open')
3533 * uap->flags Flags to open (same as 'open'
3534 * uap->uid UID to set, if creating
3535 * uap->gid GID to set, if creating
3536 * uap->mode File mode, if creating (same as 'open')
3537 * uap->xsecurity ACL to set, if creating
3538 *
3539 * Returns: 0 Success
3540 * !0 errno value
3541 *
3542 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
3543 *
3544 * XXX: We should enummerate the possible errno values here, and where
3545 * in the code they originated.
3546 */
3547 int
3548 open_extended(proc_t p, struct open_extended_args *uap, int32_t *retval)
3549 {
3550 struct filedesc *fdp = p->p_fd;
3551 int ciferror;
3552 kauth_filesec_t xsecdst;
3553 struct vnode_attr va;
3554 struct nameidata nd;
3555 int cmode;
3556
3557 AUDIT_ARG(owner, uap->uid, uap->gid);
3558
3559 xsecdst = NULL;
3560 if ((uap->xsecurity != USER_ADDR_NULL) &&
3561 ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
3562 return ciferror;
3563
3564 VATTR_INIT(&va);
3565 cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3566 VATTR_SET(&va, va_mode, cmode);
3567 if (uap->uid != KAUTH_UID_NONE)
3568 VATTR_SET(&va, va_uid, uap->uid);
3569 if (uap->gid != KAUTH_GID_NONE)
3570 VATTR_SET(&va, va_gid, uap->gid);
3571 if (xsecdst != NULL)
3572 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
3573
3574 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
3575 uap->path, vfs_context_current());
3576
3577 ciferror = open1(vfs_context_current(), &nd, uap->flags, &va,
3578 fileproc_alloc_init, NULL, retval);
3579 if (xsecdst != NULL)
3580 kauth_filesec_free(xsecdst);
3581
3582 return ciferror;
3583 }
3584
3585 /*
3586 * Go through the data-protected atomically controlled open (2)
3587 *
3588 * int open_dprotected_np(user_addr_t path, int flags, int class, int dpflags, int mode)
3589 */
3590 int open_dprotected_np (__unused proc_t p, struct open_dprotected_np_args *uap, int32_t *retval) {
3591 int flags = uap->flags;
3592 int class = uap->class;
3593 int dpflags = uap->dpflags;
3594
3595 /*
3596 * Follow the same path as normal open(2)
3597 * Look up the item if it exists, and acquire the vnode.
3598 */
3599 struct filedesc *fdp = p->p_fd;
3600 struct vnode_attr va;
3601 struct nameidata nd;
3602 int cmode;
3603 int error;
3604
3605 VATTR_INIT(&va);
3606 /* Mask off all but regular access permissions */
3607 cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3608 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
3609
3610 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
3611 uap->path, vfs_context_current());
3612
3613 /*
3614 * Initialize the extra fields in vnode_attr to pass down our
3615 * extra fields.
3616 * 1. target cprotect class.
3617 * 2. set a flag to mark it as requiring open-raw-encrypted semantics.
3618 */
3619 if (flags & O_CREAT) {
3620 VATTR_SET(&va, va_dataprotect_class, class);
3621 }
3622
3623 if (dpflags & O_DP_GETRAWENCRYPTED) {
3624 if ( flags & (O_RDWR | O_WRONLY)) {
3625 /* Not allowed to write raw encrypted bytes */
3626 return EINVAL;
3627 }
3628 VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED);
3629 }
3630
3631 error = open1(vfs_context_current(), &nd, uap->flags, &va,
3632 fileproc_alloc_init, NULL, retval);
3633
3634 return error;
3635 }
3636
3637 static int
3638 openat_internal(vfs_context_t ctx, user_addr_t path, int flags, int mode,
3639 int fd, enum uio_seg segflg, int *retval)
3640 {
3641 struct filedesc *fdp = (vfs_context_proc(ctx))->p_fd;
3642 struct vnode_attr va;
3643 struct nameidata nd;
3644 int cmode;
3645
3646 VATTR_INIT(&va);
3647 /* Mask off all but regular access permissions */
3648 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
3649 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
3650
3651 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1,
3652 segflg, path, ctx);
3653
3654 return (open1at(ctx, &nd, flags, &va, fileproc_alloc_init, NULL,
3655 retval, fd));
3656 }
3657
3658 int
3659 open(proc_t p, struct open_args *uap, int32_t *retval)
3660 {
3661 __pthread_testcancel(1);
3662 return(open_nocancel(p, (struct open_nocancel_args *)uap, retval));
3663 }
3664
3665 int
3666 open_nocancel(__unused proc_t p, struct open_nocancel_args *uap,
3667 int32_t *retval)
3668 {
3669 return (openat_internal(vfs_context_current(), uap->path, uap->flags,
3670 uap->mode, AT_FDCWD, UIO_USERSPACE, retval));
3671 }
3672
3673 int
3674 openat_nocancel(__unused proc_t p, struct openat_nocancel_args *uap,
3675 int32_t *retval)
3676 {
3677 return (openat_internal(vfs_context_current(), uap->path, uap->flags,
3678 uap->mode, uap->fd, UIO_USERSPACE, retval));
3679 }
3680
3681 int
3682 openat(proc_t p, struct openat_args *uap, int32_t *retval)
3683 {
3684 __pthread_testcancel(1);
3685 return(openat_nocancel(p, (struct openat_nocancel_args *)uap, retval));
3686 }
3687
3688 /*
3689 * openbyid_np: open a file given a file system id and a file system object id
3690 * the hfs file system object id is an fsobj_id_t {uint32, uint32}
3691 * file systems that don't support object ids it is a node id (uint64_t).
3692 *
3693 * Parameters: p Process requesting the open
3694 * uap User argument descriptor (see below)
3695 * retval Pointer to an area to receive the
3696 * return calue from the system call
3697 *
3698 * Indirect: uap->path Path to open (same as 'open')
3699 *
3700 * uap->fsid id of target file system
3701 * uap->objid id of target file system object
3702 * uap->flags Flags to open (same as 'open')
3703 *
3704 * Returns: 0 Success
3705 * !0 errno value
3706 *
3707 *
3708 * XXX: We should enummerate the possible errno values here, and where
3709 * in the code they originated.
3710 */
3711 int
3712 openbyid_np(__unused proc_t p, struct openbyid_np_args *uap, int *retval)
3713 {
3714 fsid_t fsid;
3715 uint64_t objid;
3716 int error;
3717 char *buf = NULL;
3718 int buflen = MAXPATHLEN;
3719 int pathlen = 0;
3720 vfs_context_t ctx = vfs_context_current();
3721
3722 if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
3723 return (error);
3724 }
3725
3726 /*uap->obj is an fsobj_id_t defined as struct {uint32_t, uint32_t} */
3727 if ((error = copyin(uap->objid, (caddr_t)&objid, sizeof(uint64_t)))) {
3728 return (error);
3729 }
3730
3731 AUDIT_ARG(value32, fsid.val[0]);
3732 AUDIT_ARG(value64, objid);
3733
3734 /*resolve path from fsis, objid*/
3735 do {
3736 MALLOC(buf, char *, buflen + 1, M_TEMP, M_WAITOK);
3737 if (buf == NULL) {
3738 return (ENOMEM);
3739 }
3740
3741 error = fsgetpath_internal(
3742 ctx, fsid.val[0], objid,
3743 buflen, buf, &pathlen);
3744
3745 if (error) {
3746 FREE(buf, M_TEMP);
3747 buf = NULL;
3748 }
3749 } while (error == ENOSPC && (buflen += MAXPATHLEN));
3750
3751 if (error) {
3752 return error;
3753 }
3754
3755 buf[pathlen] = 0;
3756
3757 error = openat_internal(
3758 ctx, (user_addr_t)buf, uap->oflags, 0, AT_FDCWD, UIO_SYSSPACE, retval);
3759
3760 FREE(buf, M_TEMP);
3761
3762 return error;
3763 }
3764
3765
3766 /*
3767 * Create a special file.
3768 */
3769 static int mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap);
3770
3771 int
3772 mknod(proc_t p, struct mknod_args *uap, __unused int32_t *retval)
3773 {
3774 struct vnode_attr va;
3775 vfs_context_t ctx = vfs_context_current();
3776 int error;
3777 struct nameidata nd;
3778 vnode_t vp, dvp;
3779
3780 VATTR_INIT(&va);
3781 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
3782 VATTR_SET(&va, va_rdev, uap->dev);
3783
3784 /* If it's a mknod() of a FIFO, call mkfifo1() instead */
3785 if ((uap->mode & S_IFMT) == S_IFIFO)
3786 return(mkfifo1(ctx, uap->path, &va));
3787
3788 AUDIT_ARG(mode, uap->mode);
3789 AUDIT_ARG(value32, uap->dev);
3790
3791 if ((error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
3792 return (error);
3793 NDINIT(&nd, CREATE, OP_MKNOD, LOCKPARENT | AUDITVNPATH1,
3794 UIO_USERSPACE, uap->path, ctx);
3795 error = namei(&nd);
3796 if (error)
3797 return (error);
3798 dvp = nd.ni_dvp;
3799 vp = nd.ni_vp;
3800
3801 if (vp != NULL) {
3802 error = EEXIST;
3803 goto out;
3804 }
3805
3806 switch (uap->mode & S_IFMT) {
3807 case S_IFMT: /* used by badsect to flag bad sectors */
3808 VATTR_SET(&va, va_type, VBAD);
3809 break;
3810 case S_IFCHR:
3811 VATTR_SET(&va, va_type, VCHR);
3812 break;
3813 case S_IFBLK:
3814 VATTR_SET(&va, va_type, VBLK);
3815 break;
3816 default:
3817 error = EINVAL;
3818 goto out;
3819 }
3820
3821 #if CONFIG_MACF
3822 error = mac_vnode_check_create(ctx,
3823 nd.ni_dvp, &nd.ni_cnd, &va);
3824 if (error)
3825 goto out;
3826 #endif
3827
3828 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
3829 goto out;
3830
3831 if ((error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx)) != 0)
3832 goto out;
3833
3834 if (vp) {
3835 int update_flags = 0;
3836
3837 // Make sure the name & parent pointers are hooked up
3838 if (vp->v_name == NULL)
3839 update_flags |= VNODE_UPDATE_NAME;
3840 if (vp->v_parent == NULLVP)
3841 update_flags |= VNODE_UPDATE_PARENT;
3842
3843 if (update_flags)
3844 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
3845
3846 #if CONFIG_FSE
3847 add_fsevent(FSE_CREATE_FILE, ctx,
3848 FSE_ARG_VNODE, vp,
3849 FSE_ARG_DONE);
3850 #endif
3851 }
3852
3853 out:
3854 /*
3855 * nameidone has to happen before we vnode_put(dvp)
3856 * since it may need to release the fs_nodelock on the dvp
3857 */
3858 nameidone(&nd);
3859
3860 if (vp)
3861 vnode_put(vp);
3862 vnode_put(dvp);
3863
3864 return (error);
3865 }
3866
3867 /*
3868 * Create a named pipe.
3869 *
3870 * Returns: 0 Success
3871 * EEXIST
3872 * namei:???
3873 * vnode_authorize:???
3874 * vn_create:???
3875 */
3876 static int
3877 mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap)
3878 {
3879 vnode_t vp, dvp;
3880 int error;
3881 struct nameidata nd;
3882
3883 NDINIT(&nd, CREATE, OP_MKFIFO, LOCKPARENT | AUDITVNPATH1,
3884 UIO_USERSPACE, upath, ctx);
3885 error = namei(&nd);
3886 if (error)
3887 return (error);
3888 dvp = nd.ni_dvp;
3889 vp = nd.ni_vp;
3890
3891 /* check that this is a new file and authorize addition */
3892 if (vp != NULL) {
3893 error = EEXIST;
3894 goto out;
3895 }
3896 VATTR_SET(vap, va_type, VFIFO);
3897
3898 if ((error = vn_authorize_create(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0)
3899 goto out;
3900
3901 error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx);
3902 out:
3903 /*
3904 * nameidone has to happen before we vnode_put(dvp)
3905 * since it may need to release the fs_nodelock on the dvp
3906 */
3907 nameidone(&nd);
3908
3909 if (vp)
3910 vnode_put(vp);
3911 vnode_put(dvp);
3912
3913 return error;
3914 }
3915
3916
3917 /*
3918 * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)).
3919 *
3920 * Parameters: p Process requesting the open
3921 * uap User argument descriptor (see below)
3922 * retval (Ignored)
3923 *
3924 * Indirect: uap->path Path to fifo (same as 'mkfifo')
3925 * uap->uid UID to set
3926 * uap->gid GID to set
3927 * uap->mode File mode to set (same as 'mkfifo')
3928 * uap->xsecurity ACL to set, if creating
3929 *
3930 * Returns: 0 Success
3931 * !0 errno value
3932 *
3933 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
3934 *
3935 * XXX: We should enummerate the possible errno values here, and where
3936 * in the code they originated.
3937 */
3938 int
3939 mkfifo_extended(proc_t p, struct mkfifo_extended_args *uap, __unused int32_t *retval)
3940 {
3941 int ciferror;
3942 kauth_filesec_t xsecdst;
3943 struct vnode_attr va;
3944
3945 AUDIT_ARG(owner, uap->uid, uap->gid);
3946
3947 xsecdst = KAUTH_FILESEC_NONE;
3948 if (uap->xsecurity != USER_ADDR_NULL) {
3949 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
3950 return ciferror;
3951 }
3952
3953 VATTR_INIT(&va);
3954 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
3955 if (uap->uid != KAUTH_UID_NONE)
3956 VATTR_SET(&va, va_uid, uap->uid);
3957 if (uap->gid != KAUTH_GID_NONE)
3958 VATTR_SET(&va, va_gid, uap->gid);
3959 if (xsecdst != KAUTH_FILESEC_NONE)
3960 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
3961
3962 ciferror = mkfifo1(vfs_context_current(), uap->path, &va);
3963
3964 if (xsecdst != KAUTH_FILESEC_NONE)
3965 kauth_filesec_free(xsecdst);
3966 return ciferror;
3967 }
3968
3969 /* ARGSUSED */
3970 int
3971 mkfifo(proc_t p, struct mkfifo_args *uap, __unused int32_t *retval)
3972 {
3973 struct vnode_attr va;
3974
3975 VATTR_INIT(&va);
3976 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
3977
3978 return(mkfifo1(vfs_context_current(), uap->path, &va));
3979 }
3980
3981
3982 static char *
3983 my_strrchr(char *p, int ch)
3984 {
3985 char *save;
3986
3987 for (save = NULL;; ++p) {
3988 if (*p == ch)
3989 save = p;
3990 if (!*p)
3991 return(save);
3992 }
3993 /* NOTREACHED */
3994 }
3995
3996 extern int safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path);
3997
3998 int
3999 safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path)
4000 {
4001 int ret, len = _len;
4002
4003 *truncated_path = 0;
4004 ret = vn_getpath(dvp, path, &len);
4005 if (ret == 0 && len < (MAXPATHLEN - 1)) {
4006 if (leafname) {
4007 path[len-1] = '/';
4008 len += strlcpy(&path[len], leafname, MAXPATHLEN-len) + 1;
4009 if (len > MAXPATHLEN) {
4010 char *ptr;
4011
4012 // the string got truncated!
4013 *truncated_path = 1;
4014 ptr = my_strrchr(path, '/');
4015 if (ptr) {
4016 *ptr = '\0'; // chop off the string at the last directory component
4017 }
4018 len = strlen(path) + 1;
4019 }
4020 }
4021 } else if (ret == 0) {
4022 *truncated_path = 1;
4023 } else if (ret != 0) {
4024 struct vnode *mydvp=dvp;
4025
4026 if (ret != ENOSPC) {
4027 printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n",
4028 dvp, dvp->v_name ? dvp->v_name : "no-name", ret);
4029 }
4030 *truncated_path = 1;
4031
4032 do {
4033 if (mydvp->v_parent != NULL) {
4034 mydvp = mydvp->v_parent;
4035 } else if (mydvp->v_mount) {
4036 strlcpy(path, mydvp->v_mount->mnt_vfsstat.f_mntonname, _len);
4037 break;
4038 } else {
4039 // no parent and no mount point? only thing is to punt and say "/" changed
4040 strlcpy(path, "/", _len);
4041 len = 2;
4042 mydvp = NULL;
4043 }
4044
4045 if (mydvp == NULL) {
4046 break;
4047 }
4048
4049 len = _len;
4050 ret = vn_getpath(mydvp, path, &len);
4051 } while (ret == ENOSPC);
4052 }
4053
4054 return len;
4055 }
4056
4057
4058 /*
4059 * Make a hard file link.
4060 *
4061 * Returns: 0 Success
4062 * EPERM
4063 * EEXIST
4064 * EXDEV
4065 * namei:???
4066 * vnode_authorize:???
4067 * VNOP_LINK:???
4068 */
4069 /* ARGSUSED */
4070 static int
4071 linkat_internal(vfs_context_t ctx, int fd1, user_addr_t path, int fd2,
4072 user_addr_t link, int flag, enum uio_seg segflg)
4073 {
4074 vnode_t vp, dvp, lvp;
4075 struct nameidata nd;
4076 int follow;
4077 int error;
4078 #if CONFIG_FSE
4079 fse_info finfo;
4080 #endif
4081 int need_event, has_listeners;
4082 char *target_path = NULL;
4083 int truncated=0;
4084
4085 vp = dvp = lvp = NULLVP;
4086
4087 /* look up the object we are linking to */
4088 follow = (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW;
4089 NDINIT(&nd, LOOKUP, OP_LOOKUP, AUDITVNPATH1 | follow,
4090 segflg, path, ctx);
4091
4092 error = nameiat(&nd, fd1);
4093 if (error)
4094 return (error);
4095 vp = nd.ni_vp;
4096
4097 nameidone(&nd);
4098
4099 /*
4100 * Normally, linking to directories is not supported.
4101 * However, some file systems may have limited support.
4102 */
4103 if (vp->v_type == VDIR) {
4104 if (!(vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSDIRLINKS)) {
4105 error = EPERM; /* POSIX */
4106 goto out;
4107 }
4108 /* Linking to a directory requires ownership. */
4109 if (!kauth_cred_issuser(vfs_context_ucred(ctx))) {
4110 struct vnode_attr dva;
4111
4112 VATTR_INIT(&dva);
4113 VATTR_WANTED(&dva, va_uid);
4114 if (vnode_getattr(vp, &dva, ctx) != 0 ||
4115 !VATTR_IS_SUPPORTED(&dva, va_uid) ||
4116 (dva.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)))) {
4117 error = EACCES;
4118 goto out;
4119 }
4120 }
4121 }
4122
4123 /* lookup the target node */
4124 #if CONFIG_TRIGGERS
4125 nd.ni_op = OP_LINK;
4126 #endif
4127 nd.ni_cnd.cn_nameiop = CREATE;
4128 nd.ni_cnd.cn_flags = LOCKPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK;
4129 nd.ni_dirp = link;
4130 error = nameiat(&nd, fd2);
4131 if (error != 0)
4132 goto out;
4133 dvp = nd.ni_dvp;
4134 lvp = nd.ni_vp;
4135
4136 #if CONFIG_MACF
4137 if ((error = mac_vnode_check_link(ctx, dvp, vp, &nd.ni_cnd)) != 0)
4138 goto out2;
4139 #endif
4140
4141 /* or to anything that kauth doesn't want us to (eg. immutable items) */
4142 if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_LINKTARGET, ctx)) != 0)
4143 goto out2;
4144
4145 /* target node must not exist */
4146 if (lvp != NULLVP) {
4147 error = EEXIST;
4148 goto out2;
4149 }
4150 /* cannot link across mountpoints */
4151 if (vnode_mount(vp) != vnode_mount(dvp)) {
4152 error = EXDEV;
4153 goto out2;
4154 }
4155
4156 /* authorize creation of the target note */
4157 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
4158 goto out2;
4159
4160 /* and finally make the link */
4161 error = VNOP_LINK(vp, dvp, &nd.ni_cnd, ctx);
4162 if (error)
4163 goto out2;
4164
4165 #if CONFIG_MACF
4166 (void)mac_vnode_notify_link(ctx, vp, dvp, &nd.ni_cnd);
4167 #endif
4168
4169 #if CONFIG_FSE
4170 need_event = need_fsevent(FSE_CREATE_FILE, dvp);
4171 #else
4172 need_event = 0;
4173 #endif
4174 has_listeners = kauth_authorize_fileop_has_listeners();
4175
4176 if (need_event || has_listeners) {
4177 char *link_to_path = NULL;
4178 int len, link_name_len;
4179
4180 /* build the path to the new link file */
4181 GET_PATH(target_path);
4182 if (target_path == NULL) {
4183 error = ENOMEM;
4184 goto out2;
4185 }
4186
4187 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, target_path, MAXPATHLEN, &truncated);
4188
4189 if (has_listeners) {
4190 /* build the path to file we are linking to */
4191 GET_PATH(link_to_path);
4192 if (link_to_path == NULL) {
4193 error = ENOMEM;
4194 goto out2;
4195 }
4196
4197 link_name_len = MAXPATHLEN;
4198 if (vn_getpath(vp, link_to_path, &link_name_len) == 0) {
4199 /*
4200 * Call out to allow 3rd party notification of rename.
4201 * Ignore result of kauth_authorize_fileop call.
4202 */
4203 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_LINK,
4204 (uintptr_t)link_to_path,
4205 (uintptr_t)target_path);
4206 }
4207 if (link_to_path != NULL) {
4208 RELEASE_PATH(link_to_path);
4209 }
4210 }
4211 #if CONFIG_FSE
4212 if (need_event) {
4213 /* construct fsevent */
4214 if (get_fse_info(vp, &finfo, ctx) == 0) {
4215 if (truncated) {
4216 finfo.mode |= FSE_TRUNCATED_PATH;
4217 }
4218
4219 // build the path to the destination of the link
4220 add_fsevent(FSE_CREATE_FILE, ctx,
4221 FSE_ARG_STRING, len, target_path,
4222 FSE_ARG_FINFO, &finfo,
4223 FSE_ARG_DONE);
4224 }
4225 if (vp->v_parent) {
4226 add_fsevent(FSE_STAT_CHANGED, ctx,
4227 FSE_ARG_VNODE, vp->v_parent,
4228 FSE_ARG_DONE);
4229 }
4230 }
4231 #endif
4232 }
4233 out2:
4234 /*
4235 * nameidone has to happen before we vnode_put(dvp)
4236 * since it may need to release the fs_nodelock on the dvp
4237 */
4238 nameidone(&nd);
4239 if (target_path != NULL) {
4240 RELEASE_PATH(target_path);
4241 }
4242 out:
4243 if (lvp)
4244 vnode_put(lvp);
4245 if (dvp)
4246 vnode_put(dvp);
4247 vnode_put(vp);
4248 return (error);
4249 }
4250
4251 int
4252 link(__unused proc_t p, struct link_args *uap, __unused int32_t *retval)
4253 {
4254 return (linkat_internal(vfs_context_current(), AT_FDCWD, uap->path,
4255 AT_FDCWD, uap->link, AT_SYMLINK_FOLLOW, UIO_USERSPACE));
4256 }
4257
4258 int
4259 linkat(__unused proc_t p, struct linkat_args *uap, __unused int32_t *retval)
4260 {
4261 if (uap->flag & ~AT_SYMLINK_FOLLOW)
4262 return (EINVAL);
4263
4264 return (linkat_internal(vfs_context_current(), uap->fd1, uap->path,
4265 uap->fd2, uap->link, uap->flag, UIO_USERSPACE));
4266 }
4267
4268 /*
4269 * Make a symbolic link.
4270 *
4271 * We could add support for ACLs here too...
4272 */
4273 /* ARGSUSED */
4274 static int
4275 symlinkat_internal(vfs_context_t ctx, user_addr_t path_data, int fd,
4276 user_addr_t link, enum uio_seg segflg)
4277 {
4278 struct vnode_attr va;
4279 char *path;
4280 int error;
4281 struct nameidata nd;
4282 vnode_t vp, dvp;
4283 uint32_t dfflags; // Directory file flags
4284 size_t dummy=0;
4285 proc_t p;
4286
4287 error = 0;
4288 if (UIO_SEG_IS_USER_SPACE(segflg)) {
4289 MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
4290 error = copyinstr(path_data, path, MAXPATHLEN, &dummy);
4291 } else {
4292 path = (char *)path_data;
4293 }
4294 if (error)
4295 goto out;
4296 AUDIT_ARG(text, path); /* This is the link string */
4297
4298 NDINIT(&nd, CREATE, OP_SYMLINK, LOCKPARENT | AUDITVNPATH1,
4299 segflg, link, ctx);
4300
4301 error = nameiat(&nd, fd);
4302 if (error)
4303 goto out;
4304 dvp = nd.ni_dvp;
4305 vp = nd.ni_vp;
4306
4307 p = vfs_context_proc(ctx);
4308 VATTR_INIT(&va);
4309 VATTR_SET(&va, va_type, VLNK);
4310 VATTR_SET(&va, va_mode, ACCESSPERMS & ~p->p_fd->fd_cmask);
4311
4312 /*
4313 * Handle inheritance of restricted flag
4314 */
4315 error = vnode_flags(dvp, &dfflags, ctx);
4316 if (error)
4317 goto skipit;
4318 if (dfflags & SF_RESTRICTED)
4319 VATTR_SET(&va, va_flags, SF_RESTRICTED);
4320
4321 #if CONFIG_MACF
4322 error = mac_vnode_check_create(ctx,
4323 dvp, &nd.ni_cnd, &va);
4324 #endif
4325 if (error != 0) {
4326 goto skipit;
4327 }
4328
4329 if (vp != NULL) {
4330 error = EEXIST;
4331 goto skipit;
4332 }
4333
4334 /* authorize */
4335 if (error == 0)
4336 error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
4337 /* get default ownership, etc. */
4338 if (error == 0)
4339 error = vnode_authattr_new(dvp, &va, 0, ctx);
4340 if (error == 0)
4341 error = VNOP_SYMLINK(dvp, &vp, &nd.ni_cnd, &va, path, ctx);
4342
4343 #if CONFIG_MACF
4344 if (error == 0)
4345 error = vnode_label(vnode_mount(vp), dvp, vp, &nd.ni_cnd, VNODE_LABEL_CREATE, ctx);
4346 #endif
4347
4348 /* do fallback attribute handling */
4349 if (error == 0)
4350 error = vnode_setattr_fallback(vp, &va, ctx);
4351
4352 if (error == 0) {
4353 int update_flags = 0;
4354
4355 if (vp == NULL) {
4356 nd.ni_cnd.cn_nameiop = LOOKUP;
4357 #if CONFIG_TRIGGERS
4358 nd.ni_op = OP_LOOKUP;
4359 #endif
4360 nd.ni_cnd.cn_flags = 0;
4361 error = nameiat(&nd, fd);
4362 vp = nd.ni_vp;
4363
4364 if (vp == NULL)
4365 goto skipit;
4366 }
4367
4368 #if 0 /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
4369 /* call out to allow 3rd party notification of rename.
4370 * Ignore result of kauth_authorize_fileop call.
4371 */
4372 if (kauth_authorize_fileop_has_listeners() &&
4373 namei(&nd) == 0) {
4374 char *new_link_path = NULL;
4375 int len;
4376
4377 /* build the path to the new link file */
4378 new_link_path = get_pathbuff();
4379 len = MAXPATHLEN;
4380 vn_getpath(dvp, new_link_path, &len);
4381 if ((len + 1 + nd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) {
4382 new_link_path[len - 1] = '/';
4383 strlcpy(&new_link_path[len], nd.ni_cnd.cn_nameptr, MAXPATHLEN-len);
4384 }
4385
4386 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_SYMLINK,
4387 (uintptr_t)path, (uintptr_t)new_link_path);
4388 if (new_link_path != NULL)
4389 release_pathbuff(new_link_path);
4390 }
4391 #endif
4392 // Make sure the name & parent pointers are hooked up
4393 if (vp->v_name == NULL)
4394 update_flags |= VNODE_UPDATE_NAME;
4395 if (vp->v_parent == NULLVP)
4396 update_flags |= VNODE_UPDATE_PARENT;
4397
4398 if (update_flags)
4399 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
4400
4401 #if CONFIG_FSE
4402 add_fsevent(FSE_CREATE_FILE, ctx,
4403 FSE_ARG_VNODE, vp,
4404 FSE_ARG_DONE);
4405 #endif
4406 }
4407
4408 skipit:
4409 /*
4410 * nameidone has to happen before we vnode_put(dvp)
4411 * since it may need to release the fs_nodelock on the dvp
4412 */
4413 nameidone(&nd);
4414
4415 if (vp)
4416 vnode_put(vp);
4417 vnode_put(dvp);
4418 out:
4419 if (path && (path != (char *)path_data))
4420 FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
4421
4422 return (error);
4423 }
4424
4425 int
4426 symlink(__unused proc_t p, struct symlink_args *uap, __unused int32_t *retval)
4427 {
4428 return (symlinkat_internal(vfs_context_current(), uap->path, AT_FDCWD,
4429 uap->link, UIO_USERSPACE));
4430 }
4431
4432 int
4433 symlinkat(__unused proc_t p, struct symlinkat_args *uap,
4434 __unused int32_t *retval)
4435 {
4436 return (symlinkat_internal(vfs_context_current(), uap->path1, uap->fd,
4437 uap->path2, UIO_USERSPACE));
4438 }
4439
4440 /*
4441 * Delete a whiteout from the filesystem.
4442 * No longer supported.
4443 */
4444 int
4445 undelete(__unused proc_t p, __unused struct undelete_args *uap, __unused int32_t *retval)
4446 {
4447 return (ENOTSUP);
4448 }
4449
4450 /*
4451 * Delete a name from the filesystem.
4452 */
4453 /* ARGSUSED */
4454 static int
4455 unlink1at(vfs_context_t ctx, struct nameidata *ndp, int unlink_flags, int fd)
4456 {
4457 vnode_t vp, dvp;
4458 int error;
4459 struct componentname *cnp;
4460 char *path = NULL;
4461 int len=0;
4462 #if CONFIG_FSE
4463 fse_info finfo;
4464 struct vnode_attr va;
4465 #endif
4466 int flags = 0;
4467 int need_event = 0;
4468 int has_listeners = 0;
4469 int truncated_path=0;
4470 int batched;
4471 struct vnode_attr *vap = NULL;
4472
4473 #if NAMEDRSRCFORK
4474 /* unlink or delete is allowed on rsrc forks and named streams */
4475 ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
4476 #endif
4477
4478 ndp->ni_cnd.cn_flags |= LOCKPARENT;
4479 ndp->ni_flag |= NAMEI_COMPOUNDREMOVE;
4480 cnp = &ndp->ni_cnd;
4481
4482 lookup_continue:
4483 error = nameiat(ndp, fd);
4484 if (error)
4485 return (error);
4486
4487 dvp = ndp->ni_dvp;
4488 vp = ndp->ni_vp;
4489
4490
4491 /* With Carbon delete semantics, busy files cannot be deleted */
4492 if (unlink_flags & VNODE_REMOVE_NODELETEBUSY) {
4493 flags |= VNODE_REMOVE_NODELETEBUSY;
4494 }
4495
4496 /* Skip any potential upcalls if told to. */
4497 if (unlink_flags & VNODE_REMOVE_SKIP_NAMESPACE_EVENT) {
4498 flags |= VNODE_REMOVE_SKIP_NAMESPACE_EVENT;
4499 }
4500
4501 if (vp) {
4502 batched = vnode_compound_remove_available(vp);
4503 /*
4504 * The root of a mounted filesystem cannot be deleted.
4505 */
4506 if (vp->v_flag & VROOT) {
4507 error = EBUSY;
4508 }
4509
4510 if (!batched) {
4511 error = vn_authorize_unlink(dvp, vp, cnp, ctx, NULL);
4512 if (error) {
4513 goto out;
4514 }
4515 }
4516 } else {
4517 batched = 1;
4518
4519 if (!vnode_compound_remove_available(dvp)) {
4520 panic("No vp, but no compound remove?");
4521 }
4522 }
4523
4524 #if CONFIG_FSE
4525 need_event = need_fsevent(FSE_DELETE, dvp);
4526 if (need_event) {
4527 if (!batched) {
4528 if ((vp->v_flag & VISHARDLINK) == 0) {
4529 /* XXX need to get these data in batched VNOP */
4530 get_fse_info(vp, &finfo, ctx);
4531 }
4532 } else {
4533 error = vfs_get_notify_attributes(&va);
4534 if (error) {
4535 goto out;
4536 }
4537
4538 vap = &va;
4539 }
4540 }
4541 #endif
4542 has_listeners = kauth_authorize_fileop_has_listeners();
4543 if (need_event || has_listeners) {
4544 if (path == NULL) {
4545 GET_PATH(path);
4546 if (path == NULL) {
4547 error = ENOMEM;
4548 goto out;
4549 }
4550 }
4551 len = safe_getpath(dvp, ndp->ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated_path);
4552 }
4553
4554 #if NAMEDRSRCFORK
4555 if (ndp->ni_cnd.cn_flags & CN_WANTSRSRCFORK)
4556 error = vnode_removenamedstream(dvp, vp, XATTR_RESOURCEFORK_NAME, 0, ctx);
4557 else
4558 #endif
4559 {
4560 error = vn_remove(dvp, &ndp->ni_vp, ndp, flags, vap, ctx);
4561 vp = ndp->ni_vp;
4562 if (error == EKEEPLOOKING) {
4563 if (!batched) {
4564 panic("EKEEPLOOKING, but not a filesystem that supports compound VNOPs?");
4565 }
4566
4567 if ((ndp->ni_flag & NAMEI_CONTLOOKUP) == 0) {
4568 panic("EKEEPLOOKING, but continue flag not set?");
4569 }
4570
4571 if (vnode_isdir(vp)) {
4572 error = EISDIR;
4573 goto out;
4574 }
4575 goto lookup_continue;
4576 }
4577 }
4578
4579 /*
4580 * Call out to allow 3rd party notification of delete.
4581 * Ignore result of kauth_authorize_fileop call.
4582 */
4583 if (!error) {
4584 if (has_listeners) {
4585 kauth_authorize_fileop(vfs_context_ucred(ctx),
4586 KAUTH_FILEOP_DELETE,
4587 (uintptr_t)vp,
4588 (uintptr_t)path);
4589 }
4590
4591 if (vp->v_flag & VISHARDLINK) {
4592 //
4593 // if a hardlink gets deleted we want to blow away the
4594 // v_parent link because the path that got us to this
4595 // instance of the link is no longer valid. this will
4596 // force the next call to get the path to ask the file
4597 // system instead of just following the v_parent link.
4598 //
4599 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
4600 }
4601
4602 #if CONFIG_FSE
4603 if (need_event) {
4604 if (vp->v_flag & VISHARDLINK) {
4605 get_fse_info(vp, &finfo, ctx);
4606 } else if (vap) {
4607 vnode_get_fse_info_from_vap(vp, &finfo, vap);
4608 }
4609 if (truncated_path) {
4610 finfo.mode |= FSE_TRUNCATED_PATH;
4611 }
4612 add_fsevent(FSE_DELETE, ctx,
4613 FSE_ARG_STRING, len, path,
4614 FSE_ARG_FINFO, &finfo,
4615 FSE_ARG_DONE);
4616 }
4617 #endif
4618 }
4619
4620 out:
4621 if (path != NULL)
4622 RELEASE_PATH(path);
4623
4624 #if NAMEDRSRCFORK
4625 /* recycle the deleted rsrc fork vnode to force a reclaim, which
4626 * will cause its shadow file to go away if necessary.
4627 */
4628 if (vp && (vnode_isnamedstream(vp)) &&
4629 (vp->v_parent != NULLVP) &&
4630 vnode_isshadow(vp)) {
4631 vnode_recycle(vp);
4632 }
4633 #endif
4634 /*
4635 * nameidone has to happen before we vnode_put(dvp)
4636 * since it may need to release the fs_nodelock on the dvp
4637 */
4638 nameidone(ndp);
4639 vnode_put(dvp);
4640 if (vp) {
4641 vnode_put(vp);
4642 }
4643 return (error);
4644 }
4645
4646 int
4647 unlink1(vfs_context_t ctx, struct nameidata *ndp, int unlink_flags)
4648 {
4649 return (unlink1at(ctx, ndp, unlink_flags, AT_FDCWD));
4650 }
4651
4652 /*
4653 * Delete a name from the filesystem using POSIX semantics.
4654 */
4655 static int
4656 unlinkat_internal(vfs_context_t ctx, int fd, user_addr_t path,
4657 enum uio_seg segflg)
4658 {
4659 struct nameidata nd;
4660
4661 NDINIT(&nd, DELETE, OP_UNLINK, AUDITVNPATH1, segflg,
4662 path, ctx);
4663 return (unlink1at(ctx, &nd, 0, fd));
4664 }
4665
4666 int
4667 unlink(__unused proc_t p, struct unlink_args *uap, __unused int32_t *retval)
4668 {
4669 return (unlinkat_internal(vfs_context_current(), AT_FDCWD, uap->path,
4670 UIO_USERSPACE));
4671 }
4672
4673 int
4674 unlinkat(__unused proc_t p, struct unlinkat_args *uap, __unused int32_t *retval)
4675 {
4676 if (uap->flag & ~AT_REMOVEDIR)
4677 return (EINVAL);
4678
4679 if (uap->flag & AT_REMOVEDIR)
4680 return (rmdirat_internal(vfs_context_current(), uap->fd,
4681 uap->path, UIO_USERSPACE));
4682 else
4683 return (unlinkat_internal(vfs_context_current(), uap->fd,
4684 uap->path, UIO_USERSPACE));
4685 }
4686
4687 /*
4688 * Delete a name from the filesystem using Carbon semantics.
4689 */
4690 int
4691 delete(__unused proc_t p, struct delete_args *uap, __unused int32_t *retval)
4692 {
4693 struct nameidata nd;
4694 vfs_context_t ctx = vfs_context_current();
4695
4696 NDINIT(&nd, DELETE, OP_UNLINK, AUDITVNPATH1, UIO_USERSPACE,
4697 uap->path, ctx);
4698 return unlink1(ctx, &nd, VNODE_REMOVE_NODELETEBUSY);
4699 }
4700
4701 /*
4702 * Reposition read/write file offset.
4703 */
4704 int
4705 lseek(proc_t p, struct lseek_args *uap, off_t *retval)
4706 {
4707 struct fileproc *fp;
4708 vnode_t vp;
4709 struct vfs_context *ctx;
4710 off_t offset = uap->offset, file_size;
4711 int error;
4712
4713 if ( (error = fp_getfvp(p,uap->fd, &fp, &vp)) ) {
4714 if (error == ENOTSUP)
4715 return (ESPIPE);
4716 return (error);
4717 }
4718 if (vnode_isfifo(vp)) {
4719 file_drop(uap->fd);
4720 return(ESPIPE);
4721 }
4722
4723
4724 ctx = vfs_context_current();
4725 #if CONFIG_MACF
4726 if (uap->whence == L_INCR && uap->offset == 0)
4727 error = mac_file_check_get_offset(vfs_context_ucred(ctx),
4728 fp->f_fglob);
4729 else
4730 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
4731 fp->f_fglob);
4732 if (error) {
4733 file_drop(uap->fd);
4734 return (error);
4735 }
4736 #endif
4737 if ( (error = vnode_getwithref(vp)) ) {
4738 file_drop(uap->fd);
4739 return(error);
4740 }
4741
4742 switch (uap->whence) {
4743 case L_INCR:
4744 offset += fp->f_fglob->fg_offset;
4745 break;
4746 case L_XTND:
4747 if ((error = vnode_size(vp, &file_size, ctx)) != 0)
4748 break;
4749 offset += file_size;
4750 break;
4751 case L_SET:
4752 break;
4753 default:
4754 error = EINVAL;
4755 }
4756 if (error == 0) {
4757 if (uap->offset > 0 && offset < 0) {
4758 /* Incremented/relative move past max size */
4759 error = EOVERFLOW;
4760 } else {
4761 /*
4762 * Allow negative offsets on character devices, per
4763 * POSIX 1003.1-2001. Most likely for writing disk
4764 * labels.
4765 */
4766 if (offset < 0 && vp->v_type != VCHR) {
4767 /* Decremented/relative move before start */
4768 error = EINVAL;
4769 } else {
4770 /* Success */
4771 fp->f_fglob->fg_offset = offset;
4772 *retval = fp->f_fglob->fg_offset;
4773 }
4774 }
4775 }
4776
4777 /*
4778 * An lseek can affect whether data is "available to read." Use
4779 * hint of NOTE_NONE so no EVFILT_VNODE events fire
4780 */
4781 post_event_if_success(vp, error, NOTE_NONE);
4782 (void)vnode_put(vp);
4783 file_drop(uap->fd);
4784 return (error);
4785 }
4786
4787
4788 /*
4789 * Check access permissions.
4790 *
4791 * Returns: 0 Success
4792 * vnode_authorize:???
4793 */
4794 static int
4795 access1(vnode_t vp, vnode_t dvp, int uflags, vfs_context_t ctx)
4796 {
4797 kauth_action_t action;
4798 int error;
4799
4800 /*
4801 * If just the regular access bits, convert them to something
4802 * that vnode_authorize will understand.
4803 */
4804 if (!(uflags & _ACCESS_EXTENDED_MASK)) {
4805 action = 0;
4806 if (uflags & R_OK)
4807 action |= KAUTH_VNODE_READ_DATA; /* aka KAUTH_VNODE_LIST_DIRECTORY */
4808 if (uflags & W_OK) {
4809 if (vnode_isdir(vp)) {
4810 action |= KAUTH_VNODE_ADD_FILE |
4811 KAUTH_VNODE_ADD_SUBDIRECTORY;
4812 /* might want delete rights here too */
4813 } else {
4814 action |= KAUTH_VNODE_WRITE_DATA;
4815 }
4816 }
4817 if (uflags & X_OK) {
4818 if (vnode_isdir(vp)) {
4819 action |= KAUTH_VNODE_SEARCH;
4820 } else {
4821 action |= KAUTH_VNODE_EXECUTE;
4822 }
4823 }
4824 } else {
4825 /* take advantage of definition of uflags */
4826 action = uflags >> 8;
4827 }
4828
4829 #if CONFIG_MACF
4830 error = mac_vnode_check_access(ctx, vp, uflags);
4831 if (error)
4832 return (error);
4833 #endif /* MAC */
4834
4835 /* action == 0 means only check for existence */
4836 if (action != 0) {
4837 error = vnode_authorize(vp, dvp, action | KAUTH_VNODE_ACCESS, ctx);
4838 } else {
4839 error = 0;
4840 }
4841
4842 return(error);
4843 }
4844
4845
4846
4847 /*
4848 * access_extended: Check access permissions in bulk.
4849 *
4850 * Description: uap->entries Pointer to an array of accessx
4851 * descriptor structs, plus one or
4852 * more NULL terminated strings (see
4853 * "Notes" section below).
4854 * uap->size Size of the area pointed to by
4855 * uap->entries.
4856 * uap->results Pointer to the results array.
4857 *
4858 * Returns: 0 Success
4859 * ENOMEM Insufficient memory
4860 * EINVAL Invalid arguments
4861 * namei:EFAULT Bad address
4862 * namei:ENAMETOOLONG Filename too long
4863 * namei:ENOENT No such file or directory
4864 * namei:ELOOP Too many levels of symbolic links
4865 * namei:EBADF Bad file descriptor
4866 * namei:ENOTDIR Not a directory
4867 * namei:???
4868 * access1:
4869 *
4870 * Implicit returns:
4871 * uap->results Array contents modified
4872 *
4873 * Notes: The uap->entries are structured as an arbitrary length array
4874 * of accessx descriptors, followed by one or more NULL terminated
4875 * strings
4876 *
4877 * struct accessx_descriptor[0]
4878 * ...
4879 * struct accessx_descriptor[n]
4880 * char name_data[0];
4881 *
4882 * We determine the entry count by walking the buffer containing
4883 * the uap->entries argument descriptor. For each descriptor we
4884 * see, the valid values for the offset ad_name_offset will be
4885 * in the byte range:
4886 *
4887 * [ uap->entries + sizeof(struct accessx_descriptor) ]
4888 * to
4889 * [ uap->entries + uap->size - 2 ]
4890 *
4891 * since we must have at least one string, and the string must
4892 * be at least one character plus the NULL terminator in length.
4893 *
4894 * XXX: Need to support the check-as uid argument
4895 */
4896 int
4897 access_extended(__unused proc_t p, struct access_extended_args *uap, __unused int32_t *retval)
4898 {
4899 struct accessx_descriptor *input = NULL;
4900 errno_t *result = NULL;
4901 errno_t error = 0;
4902 int wantdelete = 0;
4903 unsigned int desc_max, desc_actual, i, j;
4904 struct vfs_context context;
4905 struct nameidata nd;
4906 int niopts;
4907 vnode_t vp = NULL;
4908 vnode_t dvp = NULL;
4909 #define ACCESSX_MAX_DESCR_ON_STACK 10
4910 struct accessx_descriptor stack_input[ACCESSX_MAX_DESCR_ON_STACK];
4911
4912 context.vc_ucred = NULL;
4913
4914 /*
4915 * Validate parameters; if valid, copy the descriptor array and string
4916 * arguments into local memory. Before proceeding, the following
4917 * conditions must have been met:
4918 *
4919 * o The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
4920 * o There must be sufficient room in the request for at least one
4921 * descriptor and a one yte NUL terminated string.
4922 * o The allocation of local storage must not fail.
4923 */
4924 if (uap->size > ACCESSX_MAX_TABLESIZE)
4925 return(ENOMEM);
4926 if (uap->size < (sizeof(struct accessx_descriptor) + 2))
4927 return(EINVAL);
4928 if (uap->size <= sizeof (stack_input)) {
4929 input = stack_input;
4930 } else {
4931 MALLOC(input, struct accessx_descriptor *, uap->size, M_TEMP, M_WAITOK);
4932 if (input == NULL) {
4933 error = ENOMEM;
4934 goto out;
4935 }
4936 }
4937 error = copyin(uap->entries, input, uap->size);
4938 if (error)
4939 goto out;
4940
4941 AUDIT_ARG(opaque, input, uap->size);
4942
4943 /*
4944 * Force NUL termination of the copyin buffer to avoid nami() running
4945 * off the end. If the caller passes us bogus data, they may get a
4946 * bogus result.
4947 */
4948 ((char *)input)[uap->size - 1] = 0;
4949
4950 /*
4951 * Access is defined as checking against the process' real identity,
4952 * even if operations are checking the effective identity. This
4953 * requires that we use a local vfs context.
4954 */
4955 context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
4956 context.vc_thread = current_thread();
4957
4958 /*
4959 * Find out how many entries we have, so we can allocate the result
4960 * array by walking the list and adjusting the count downward by the
4961 * earliest string offset we see.
4962 */
4963 desc_max = (uap->size - 2) / sizeof(struct accessx_descriptor);
4964 desc_actual = desc_max;
4965 for (i = 0; i < desc_actual; i++) {
4966 /*
4967 * Take the offset to the name string for this entry and
4968 * convert to an input array index, which would be one off
4969 * the end of the array if this entry was the lowest-addressed
4970 * name string.
4971 */
4972 j = input[i].ad_name_offset / sizeof(struct accessx_descriptor);
4973
4974 /*
4975 * An offset greater than the max allowable offset is an error.
4976 * It is also an error for any valid entry to point
4977 * to a location prior to the end of the current entry, if
4978 * it's not a reference to the string of the previous entry.
4979 */
4980 if (j > desc_max || (j != 0 && j <= i)) {
4981 error = EINVAL;
4982 goto out;
4983 }
4984
4985 /*
4986 * An offset of 0 means use the previous descriptor's offset;
4987 * this is used to chain multiple requests for the same file
4988 * to avoid multiple lookups.
4989 */
4990 if (j == 0) {
4991 /* This is not valid for the first entry */
4992 if (i == 0) {
4993 error = EINVAL;
4994 goto out;
4995 }
4996 continue;
4997 }
4998
4999 /*
5000 * If the offset of the string for this descriptor is before
5001 * what we believe is the current actual last descriptor,
5002 * then we need to adjust our estimate downward; this permits
5003 * the string table following the last descriptor to be out
5004 * of order relative to the descriptor list.
5005 */
5006 if (j < desc_actual)
5007 desc_actual = j;
5008 }
5009
5010 /*
5011 * We limit the actual number of descriptors we are willing to process
5012 * to a hard maximum of ACCESSX_MAX_DESCRIPTORS. If the number being
5013 * requested does not exceed this limit,
5014 */
5015 if (desc_actual > ACCESSX_MAX_DESCRIPTORS) {
5016 error = ENOMEM;
5017 goto out;
5018 }
5019 MALLOC(result, errno_t *, desc_actual * sizeof(errno_t), M_TEMP, M_WAITOK);
5020 if (result == NULL) {
5021 error = ENOMEM;
5022 goto out;
5023 }
5024
5025 /*
5026 * Do the work by iterating over the descriptor entries we know to
5027 * at least appear to contain valid data.
5028 */
5029 error = 0;
5030 for (i = 0; i < desc_actual; i++) {
5031 /*
5032 * If the ad_name_offset is 0, then we use the previous
5033 * results to make the check; otherwise, we are looking up
5034 * a new file name.
5035 */
5036 if (input[i].ad_name_offset != 0) {
5037 /* discard old vnodes */
5038 if (vp) {
5039 vnode_put(vp);
5040 vp = NULL;
5041 }
5042 if (dvp) {
5043 vnode_put(dvp);
5044 dvp = NULL;
5045 }
5046
5047 /*
5048 * Scan forward in the descriptor list to see if we
5049 * need the parent vnode. We will need it if we are
5050 * deleting, since we must have rights to remove
5051 * entries in the parent directory, as well as the
5052 * rights to delete the object itself.
5053 */
5054 wantdelete = input[i].ad_flags & _DELETE_OK;
5055 for (j = i + 1; (j < desc_actual) && (input[j].ad_name_offset == 0); j++)
5056 if (input[j].ad_flags & _DELETE_OK)
5057 wantdelete = 1;
5058
5059 niopts = FOLLOW | AUDITVNPATH1;
5060
5061 /* need parent for vnode_authorize for deletion test */
5062 if (wantdelete)
5063 niopts |= WANTPARENT;
5064
5065 /* do the lookup */
5066 NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, UIO_SYSSPACE,
5067 CAST_USER_ADDR_T(((const char *)input) + input[i].ad_name_offset),
5068 &context);
5069 error = namei(&nd);
5070 if (!error) {
5071 vp = nd.ni_vp;
5072 if (wantdelete)
5073 dvp = nd.ni_dvp;
5074 }
5075 nameidone(&nd);
5076 }
5077
5078 /*
5079 * Handle lookup errors.
5080 */
5081 switch(error) {
5082 case ENOENT:
5083 case EACCES:
5084 case EPERM:
5085 case ENOTDIR:
5086 result[i] = error;
5087 break;
5088 case 0:
5089 /* run this access check */
5090 result[i] = access1(vp, dvp, input[i].ad_flags, &context);
5091 break;
5092 default:
5093 /* fatal lookup error */
5094
5095 goto out;
5096 }
5097 }
5098
5099 AUDIT_ARG(data, result, sizeof(errno_t), desc_actual);
5100
5101 /* copy out results */
5102 error = copyout(result, uap->results, desc_actual * sizeof(errno_t));
5103
5104 out:
5105 if (input && input != stack_input)
5106 FREE(input, M_TEMP);
5107 if (result)
5108 FREE(result, M_TEMP);
5109 if (vp)
5110 vnode_put(vp);
5111 if (dvp)
5112 vnode_put(dvp);
5113 if (IS_VALID_CRED(context.vc_ucred))
5114 kauth_cred_unref(&context.vc_ucred);
5115 return(error);
5116 }
5117
5118
5119 /*
5120 * Returns: 0 Success
5121 * namei:EFAULT Bad address
5122 * namei:ENAMETOOLONG Filename too long
5123 * namei:ENOENT No such file or directory
5124 * namei:ELOOP Too many levels of symbolic links
5125 * namei:EBADF Bad file descriptor
5126 * namei:ENOTDIR Not a directory
5127 * namei:???
5128 * access1:
5129 */
5130 static int
5131 faccessat_internal(vfs_context_t ctx, int fd, user_addr_t path, int amode,
5132 int flag, enum uio_seg segflg)
5133 {
5134 int error;
5135 struct nameidata nd;
5136 int niopts;
5137 struct vfs_context context;
5138 #if NAMEDRSRCFORK
5139 int is_namedstream = 0;
5140 #endif
5141
5142 /*
5143 * Unless the AT_EACCESS option is used, Access is defined as checking
5144 * against the process' real identity, even if operations are checking
5145 * the effective identity. So we need to tweak the credential
5146 * in the context for that case.
5147 */
5148 if (!(flag & AT_EACCESS))
5149 context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
5150 else
5151 context.vc_ucred = ctx->vc_ucred;
5152 context.vc_thread = ctx->vc_thread;
5153
5154
5155 niopts = FOLLOW | AUDITVNPATH1;
5156 /* need parent for vnode_authorize for deletion test */
5157 if (amode & _DELETE_OK)
5158 niopts |= WANTPARENT;
5159 NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, segflg,
5160 path, &context);
5161
5162 #if NAMEDRSRCFORK
5163 /* access(F_OK) calls are allowed for resource forks. */
5164 if (amode == F_OK)
5165 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
5166 #endif
5167 error = nameiat(&nd, fd);
5168 if (error)
5169 goto out;
5170
5171 #if NAMEDRSRCFORK
5172 /* Grab reference on the shadow stream file vnode to
5173 * force an inactive on release which will mark it
5174 * for recycle.
5175 */
5176 if (vnode_isnamedstream(nd.ni_vp) &&
5177 (nd.ni_vp->v_parent != NULLVP) &&
5178 vnode_isshadow(nd.ni_vp)) {
5179 is_namedstream = 1;
5180 vnode_ref(nd.ni_vp);
5181 }
5182 #endif
5183
5184 error = access1(nd.ni_vp, nd.ni_dvp, amode, &context);
5185
5186 #if NAMEDRSRCFORK
5187 if (is_namedstream) {
5188 vnode_rele(nd.ni_vp);
5189 }
5190 #endif
5191
5192 vnode_put(nd.ni_vp);
5193 if (amode & _DELETE_OK)
5194 vnode_put(nd.ni_dvp);
5195 nameidone(&nd);
5196
5197 out:
5198 if (!(flag & AT_EACCESS))
5199 kauth_cred_unref(&context.vc_ucred);
5200 return (error);
5201 }
5202
5203 int
5204 access(__unused proc_t p, struct access_args *uap, __unused int32_t *retval)
5205 {
5206 return (faccessat_internal(vfs_context_current(), AT_FDCWD,
5207 uap->path, uap->flags, 0, UIO_USERSPACE));
5208 }
5209
5210 int
5211 faccessat(__unused proc_t p, struct faccessat_args *uap,
5212 __unused int32_t *retval)
5213 {
5214 if (uap->flag & ~AT_EACCESS)
5215 return (EINVAL);
5216
5217 return (faccessat_internal(vfs_context_current(), uap->fd,
5218 uap->path, uap->amode, uap->flag, UIO_USERSPACE));
5219 }
5220
5221 /*
5222 * Returns: 0 Success
5223 * EFAULT
5224 * copyout:EFAULT
5225 * namei:???
5226 * vn_stat:???
5227 */
5228 static int
5229 fstatat_internal(vfs_context_t ctx, user_addr_t path, user_addr_t ub,
5230 user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64,
5231 enum uio_seg segflg, int fd, int flag)
5232 {
5233 struct nameidata nd;
5234 int follow;
5235 union {
5236 struct stat sb;
5237 struct stat64 sb64;
5238 } source;
5239 union {
5240 struct user64_stat user64_sb;
5241 struct user32_stat user32_sb;
5242 struct user64_stat64 user64_sb64;
5243 struct user32_stat64 user32_sb64;
5244 } dest;
5245 caddr_t sbp;
5246 int error, my_size;
5247 kauth_filesec_t fsec;
5248 size_t xsecurity_bufsize;
5249 void * statptr;
5250
5251 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
5252 NDINIT(&nd, LOOKUP, OP_GETATTR, follow | AUDITVNPATH1,
5253 segflg, path, ctx);
5254
5255 #if NAMEDRSRCFORK
5256 int is_namedstream = 0;
5257 /* stat calls are allowed for resource forks. */
5258 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
5259 #endif
5260 error = nameiat(&nd, fd);
5261 if (error)
5262 return (error);
5263 fsec = KAUTH_FILESEC_NONE;
5264
5265 statptr = (void *)&source;
5266
5267 #if NAMEDRSRCFORK
5268 /* Grab reference on the shadow stream file vnode to
5269 * force an inactive on release which will mark it
5270 * for recycle.
5271 */
5272 if (vnode_isnamedstream(nd.ni_vp) &&
5273 (nd.ni_vp->v_parent != NULLVP) &&
5274 vnode_isshadow(nd.ni_vp)) {
5275 is_namedstream = 1;
5276 vnode_ref(nd.ni_vp);
5277 }
5278 #endif
5279
5280 error = vn_stat(nd.ni_vp, statptr, (xsecurity != USER_ADDR_NULL ? &fsec : NULL), isstat64, ctx);
5281
5282 #if NAMEDRSRCFORK
5283 if (is_namedstream) {
5284 vnode_rele(nd.ni_vp);
5285 }
5286 #endif
5287 vnode_put(nd.ni_vp);
5288 nameidone(&nd);
5289
5290 if (error)
5291 return (error);
5292 /* Zap spare fields */
5293 if (isstat64 != 0) {
5294 source.sb64.st_lspare = 0;
5295 source.sb64.st_qspare[0] = 0LL;
5296 source.sb64.st_qspare[1] = 0LL;
5297 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
5298 munge_user64_stat64(&source.sb64, &dest.user64_sb64);
5299 my_size = sizeof(dest.user64_sb64);
5300 sbp = (caddr_t)&dest.user64_sb64;
5301 } else {
5302 munge_user32_stat64(&source.sb64, &dest.user32_sb64);
5303 my_size = sizeof(dest.user32_sb64);
5304 sbp = (caddr_t)&dest.user32_sb64;
5305 }
5306 /*
5307 * Check if we raced (post lookup) against the last unlink of a file.
5308 */
5309 if ((source.sb64.st_nlink == 0) && S_ISREG(source.sb64.st_mode)) {
5310 source.sb64.st_nlink = 1;
5311 }
5312 } else {
5313 source.sb.st_lspare = 0;
5314 source.sb.st_qspare[0] = 0LL;
5315 source.sb.st_qspare[1] = 0LL;
5316 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
5317 munge_user64_stat(&source.sb, &dest.user64_sb);
5318 my_size = sizeof(dest.user64_sb);
5319 sbp = (caddr_t)&dest.user64_sb;
5320 } else {
5321 munge_user32_stat(&source.sb, &dest.user32_sb);
5322 my_size = sizeof(dest.user32_sb);
5323 sbp = (caddr_t)&dest.user32_sb;
5324 }
5325
5326 /*
5327 * Check if we raced (post lookup) against the last unlink of a file.
5328 */
5329 if ((source.sb.st_nlink == 0) && S_ISREG(source.sb.st_mode)) {
5330 source.sb.st_nlink = 1;
5331 }
5332 }
5333 if ((error = copyout(sbp, ub, my_size)) != 0)
5334 goto out;
5335
5336 /* caller wants extended security information? */
5337 if (xsecurity != USER_ADDR_NULL) {
5338
5339 /* did we get any? */
5340 if (fsec == KAUTH_FILESEC_NONE) {
5341 if (susize(xsecurity_size, 0) != 0) {
5342 error = EFAULT;
5343 goto out;
5344 }
5345 } else {
5346 /* find the user buffer size */
5347 xsecurity_bufsize = fusize(xsecurity_size);
5348
5349 /* copy out the actual data size */
5350 if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
5351 error = EFAULT;
5352 goto out;
5353 }
5354
5355 /* if the caller supplied enough room, copy out to it */
5356 if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec))
5357 error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
5358 }
5359 }
5360 out:
5361 if (fsec != KAUTH_FILESEC_NONE)
5362 kauth_filesec_free(fsec);
5363 return (error);
5364 }
5365
5366 /*
5367 * stat_extended: Get file status; with extended security (ACL).
5368 *
5369 * Parameters: p (ignored)
5370 * uap User argument descriptor (see below)
5371 * retval (ignored)
5372 *
5373 * Indirect: uap->path Path of file to get status from
5374 * uap->ub User buffer (holds file status info)
5375 * uap->xsecurity ACL to get (extended security)
5376 * uap->xsecurity_size Size of ACL
5377 *
5378 * Returns: 0 Success
5379 * !0 errno value
5380 *
5381 */
5382 int
5383 stat_extended(__unused proc_t p, struct stat_extended_args *uap,
5384 __unused int32_t *retval)
5385 {
5386 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5387 uap->xsecurity, uap->xsecurity_size, 0, UIO_USERSPACE, AT_FDCWD,
5388 0));
5389 }
5390
5391 /*
5392 * Returns: 0 Success
5393 * fstatat_internal:??? [see fstatat_internal() in this file]
5394 */
5395 int
5396 stat(__unused proc_t p, struct stat_args *uap, __unused int32_t *retval)
5397 {
5398 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5399 0, 0, 0, UIO_USERSPACE, AT_FDCWD, 0));
5400 }
5401
5402 int
5403 stat64(__unused proc_t p, struct stat64_args *uap, __unused int32_t *retval)
5404 {
5405 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5406 0, 0, 1, UIO_USERSPACE, AT_FDCWD, 0));
5407 }
5408
5409 /*
5410 * stat64_extended: Get file status; can handle large inode numbers; with extended security (ACL).
5411 *
5412 * Parameters: p (ignored)
5413 * uap User argument descriptor (see below)
5414 * retval (ignored)
5415 *
5416 * Indirect: uap->path Path of file to get status from
5417 * uap->ub User buffer (holds file status info)
5418 * uap->xsecurity ACL to get (extended security)
5419 * uap->xsecurity_size Size of ACL
5420 *
5421 * Returns: 0 Success
5422 * !0 errno value
5423 *
5424 */
5425 int
5426 stat64_extended(__unused proc_t p, struct stat64_extended_args *uap, __unused int32_t *retval)
5427 {
5428 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5429 uap->xsecurity, uap->xsecurity_size, 1, UIO_USERSPACE, AT_FDCWD,
5430 0));
5431 }
5432
5433 /*
5434 * lstat_extended: Get file status; does not follow links; with extended security (ACL).
5435 *
5436 * Parameters: p (ignored)
5437 * uap User argument descriptor (see below)
5438 * retval (ignored)
5439 *
5440 * Indirect: uap->path Path of file to get status from
5441 * uap->ub User buffer (holds file status info)
5442 * uap->xsecurity ACL to get (extended security)
5443 * uap->xsecurity_size Size of ACL
5444 *
5445 * Returns: 0 Success
5446 * !0 errno value
5447 *
5448 */
5449 int
5450 lstat_extended(__unused proc_t p, struct lstat_extended_args *uap, __unused int32_t *retval)
5451 {
5452 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5453 uap->xsecurity, uap->xsecurity_size, 0, UIO_USERSPACE, AT_FDCWD,
5454 AT_SYMLINK_NOFOLLOW));
5455 }
5456
5457 /*
5458 * Get file status; this version does not follow links.
5459 */
5460 int
5461 lstat(__unused proc_t p, struct lstat_args *uap, __unused int32_t *retval)
5462 {
5463 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5464 0, 0, 0, UIO_USERSPACE, AT_FDCWD, AT_SYMLINK_NOFOLLOW));
5465 }
5466
5467 int
5468 lstat64(__unused proc_t p, struct lstat64_args *uap, __unused int32_t *retval)
5469 {
5470 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5471 0, 0, 1, UIO_USERSPACE, AT_FDCWD, AT_SYMLINK_NOFOLLOW));
5472 }
5473
5474 /*
5475 * lstat64_extended: Get file status; can handle large inode numbers; does not
5476 * follow links; with extended security (ACL).
5477 *
5478 * Parameters: p (ignored)
5479 * uap User argument descriptor (see below)
5480 * retval (ignored)
5481 *
5482 * Indirect: uap->path Path of file to get status from
5483 * uap->ub User buffer (holds file status info)
5484 * uap->xsecurity ACL to get (extended security)
5485 * uap->xsecurity_size Size of ACL
5486 *
5487 * Returns: 0 Success
5488 * !0 errno value
5489 *
5490 */
5491 int
5492 lstat64_extended(__unused proc_t p, struct lstat64_extended_args *uap, __unused int32_t *retval)
5493 {
5494 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5495 uap->xsecurity, uap->xsecurity_size, 1, UIO_USERSPACE, AT_FDCWD,
5496 AT_SYMLINK_NOFOLLOW));
5497 }
5498
5499 int
5500 fstatat(__unused proc_t p, struct fstatat_args *uap, __unused int32_t *retval)
5501 {
5502 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
5503 return (EINVAL);
5504
5505 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5506 0, 0, 0, UIO_USERSPACE, uap->fd, uap->flag));
5507 }
5508
5509 int
5510 fstatat64(__unused proc_t p, struct fstatat64_args *uap,
5511 __unused int32_t *retval)
5512 {
5513 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
5514 return (EINVAL);
5515
5516 return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
5517 0, 0, 1, UIO_USERSPACE, uap->fd, uap->flag));
5518 }
5519
5520 /*
5521 * Get configurable pathname variables.
5522 *
5523 * Returns: 0 Success
5524 * namei:???
5525 * vn_pathconf:???
5526 *
5527 * Notes: Global implementation constants are intended to be
5528 * implemented in this function directly; all other constants
5529 * are per-FS implementation, and therefore must be handled in
5530 * each respective FS, instead.
5531 *
5532 * XXX We implement some things globally right now that should actually be
5533 * XXX per-FS; we will need to deal with this at some point.
5534 */
5535 /* ARGSUSED */
5536 int
5537 pathconf(__unused proc_t p, struct pathconf_args *uap, int32_t *retval)
5538 {
5539 int error;
5540 struct nameidata nd;
5541 vfs_context_t ctx = vfs_context_current();
5542
5543 NDINIT(&nd, LOOKUP, OP_PATHCONF, FOLLOW | AUDITVNPATH1,
5544 UIO_USERSPACE, uap->path, ctx);
5545 error = namei(&nd);
5546 if (error)
5547 return (error);
5548
5549 error = vn_pathconf(nd.ni_vp, uap->name, retval, ctx);
5550
5551 vnode_put(nd.ni_vp);
5552 nameidone(&nd);
5553 return (error);
5554 }
5555
5556 /*
5557 * Return target name of a symbolic link.
5558 */
5559 /* ARGSUSED */
5560 static int
5561 readlinkat_internal(vfs_context_t ctx, int fd, user_addr_t path,
5562 enum uio_seg seg, user_addr_t buf, size_t bufsize, enum uio_seg bufseg,
5563 int *retval)
5564 {
5565 vnode_t vp;
5566 uio_t auio;
5567 int error;
5568 struct nameidata nd;
5569 char uio_buf[ UIO_SIZEOF(1) ];
5570
5571 NDINIT(&nd, LOOKUP, OP_READLINK, NOFOLLOW | AUDITVNPATH1,
5572 seg, path, ctx);
5573
5574 error = nameiat(&nd, fd);
5575 if (error)
5576 return (error);
5577 vp = nd.ni_vp;
5578
5579 nameidone(&nd);
5580
5581 auio = uio_createwithbuffer(1, 0, bufseg, UIO_READ,
5582 &uio_buf[0], sizeof(uio_buf));
5583 uio_addiov(auio, buf, bufsize);
5584 if (vp->v_type != VLNK) {
5585 error = EINVAL;
5586 } else {
5587 #if CONFIG_MACF
5588 error = mac_vnode_check_readlink(ctx, vp);
5589 #endif
5590 if (error == 0)
5591 error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA,
5592 ctx);
5593 if (error == 0)
5594 error = VNOP_READLINK(vp, auio, ctx);
5595 }
5596 vnode_put(vp);
5597
5598 *retval = bufsize - (int)uio_resid(auio);
5599 return (error);
5600 }
5601
5602 int
5603 readlink(proc_t p, struct readlink_args *uap, int32_t *retval)
5604 {
5605 enum uio_seg procseg;
5606
5607 procseg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
5608 return (readlinkat_internal(vfs_context_current(), AT_FDCWD,
5609 CAST_USER_ADDR_T(uap->path), procseg, CAST_USER_ADDR_T(uap->buf),
5610 uap->count, procseg, retval));
5611 }
5612
5613 int
5614 readlinkat(proc_t p, struct readlinkat_args *uap, int32_t *retval)
5615 {
5616 enum uio_seg procseg;
5617
5618 procseg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
5619 return (readlinkat_internal(vfs_context_current(), uap->fd, uap->path,
5620 procseg, uap->buf, uap->bufsize, procseg, retval));
5621 }
5622
5623 /*
5624 * Change file flags.
5625 */
5626 static int
5627 chflags1(vnode_t vp, int flags, vfs_context_t ctx)
5628 {
5629 struct vnode_attr va;
5630 kauth_action_t action;
5631 int error;
5632
5633 VATTR_INIT(&va);
5634 VATTR_SET(&va, va_flags, flags);
5635
5636 #if CONFIG_MACF
5637 error = mac_vnode_check_setflags(ctx, vp, flags);
5638 if (error)
5639 goto out;
5640 #endif
5641
5642 /* request authorisation, disregard immutability */
5643 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
5644 goto out;
5645 /*
5646 * Request that the auth layer disregard those file flags it's allowed to when
5647 * authorizing this operation; we need to do this in order to be able to
5648 * clear immutable flags.
5649 */
5650 if (action && ((error = vnode_authorize(vp, NULL, action | KAUTH_VNODE_NOIMMUTABLE, ctx)) != 0))
5651 goto out;
5652 error = vnode_setattr(vp, &va, ctx);
5653
5654 if ((error == 0) && !VATTR_IS_SUPPORTED(&va, va_flags)) {
5655 error = ENOTSUP;
5656 }
5657 out:
5658 vnode_put(vp);
5659 return(error);
5660 }
5661
5662 /*
5663 * Change flags of a file given a path name.
5664 */
5665 /* ARGSUSED */
5666 int
5667 chflags(__unused proc_t p, struct chflags_args *uap, __unused int32_t *retval)
5668 {
5669 vnode_t vp;
5670 vfs_context_t ctx = vfs_context_current();
5671 int error;
5672 struct nameidata nd;
5673
5674 AUDIT_ARG(fflags, uap->flags);
5675 NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
5676 UIO_USERSPACE, uap->path, ctx);
5677 error = namei(&nd);
5678 if (error)
5679 return (error);
5680 vp = nd.ni_vp;
5681 nameidone(&nd);
5682
5683 error = chflags1(vp, uap->flags, ctx);
5684
5685 return(error);
5686 }
5687
5688 /*
5689 * Change flags of a file given a file descriptor.
5690 */
5691 /* ARGSUSED */
5692 int
5693 fchflags(__unused proc_t p, struct fchflags_args *uap, __unused int32_t *retval)
5694 {
5695 vnode_t vp;
5696 int error;
5697
5698 AUDIT_ARG(fd, uap->fd);
5699 AUDIT_ARG(fflags, uap->flags);
5700 if ( (error = file_vnode(uap->fd, &vp)) )
5701 return (error);
5702
5703 if ((error = vnode_getwithref(vp))) {
5704 file_drop(uap->fd);
5705 return(error);
5706 }
5707
5708 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5709
5710 error = chflags1(vp, uap->flags, vfs_context_current());
5711
5712 file_drop(uap->fd);
5713 return (error);
5714 }
5715
5716 /*
5717 * Change security information on a filesystem object.
5718 *
5719 * Returns: 0 Success
5720 * EPERM Operation not permitted
5721 * vnode_authattr:??? [anything vnode_authattr can return]
5722 * vnode_authorize:??? [anything vnode_authorize can return]
5723 * vnode_setattr:??? [anything vnode_setattr can return]
5724 *
5725 * Notes: If vnode_authattr or vnode_authorize return EACCES, it will be
5726 * translated to EPERM before being returned.
5727 */
5728 static int
5729 chmod_vnode(vfs_context_t ctx, vnode_t vp, struct vnode_attr *vap)
5730 {
5731 kauth_action_t action;
5732 int error;
5733
5734 AUDIT_ARG(mode, vap->va_mode);
5735 /* XXX audit new args */
5736
5737 #if NAMEDSTREAMS
5738 /* chmod calls are not allowed for resource forks. */
5739 if (vp->v_flag & VISNAMEDSTREAM) {
5740 return (EPERM);
5741 }
5742 #endif
5743
5744 #if CONFIG_MACF
5745 if (VATTR_IS_ACTIVE(vap, va_mode) &&
5746 (error = mac_vnode_check_setmode(ctx, vp, (mode_t)vap->va_mode)) != 0)
5747 return (error);
5748 #endif
5749
5750 /* make sure that the caller is allowed to set this security information */
5751 if (((error = vnode_authattr(vp, vap, &action, ctx)) != 0) ||
5752 ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
5753 if (error == EACCES)
5754 error = EPERM;
5755 return(error);
5756 }
5757
5758 error = vnode_setattr(vp, vap, ctx);
5759
5760 return (error);
5761 }
5762
5763
5764 /*
5765 * Change mode of a file given a path name.
5766 *
5767 * Returns: 0 Success
5768 * namei:??? [anything namei can return]
5769 * chmod_vnode:??? [anything chmod_vnode can return]
5770 */
5771 static int
5772 chmodat(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap,
5773 int fd, int flag, enum uio_seg segflg)
5774 {
5775 struct nameidata nd;
5776 int follow, error;
5777
5778 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
5779 NDINIT(&nd, LOOKUP, OP_SETATTR, follow | AUDITVNPATH1,
5780 segflg, path, ctx);
5781 if ((error = nameiat(&nd, fd)))
5782 return (error);
5783 error = chmod_vnode(ctx, nd.ni_vp, vap);
5784 vnode_put(nd.ni_vp);
5785 nameidone(&nd);
5786 return(error);
5787 }
5788
5789 /*
5790 * chmod_extended: Change the mode of a file given a path name; with extended
5791 * argument list (including extended security (ACL)).
5792 *
5793 * Parameters: p Process requesting the open
5794 * uap User argument descriptor (see below)
5795 * retval (ignored)
5796 *
5797 * Indirect: uap->path Path to object (same as 'chmod')
5798 * uap->uid UID to set
5799 * uap->gid GID to set
5800 * uap->mode File mode to set (same as 'chmod')
5801 * uap->xsecurity ACL to set (or delete)
5802 *
5803 * Returns: 0 Success
5804 * !0 errno value
5805 *
5806 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
5807 *
5808 * XXX: We should enummerate the possible errno values here, and where
5809 * in the code they originated.
5810 */
5811 int
5812 chmod_extended(__unused proc_t p, struct chmod_extended_args *uap, __unused int32_t *retval)
5813 {
5814 int error;
5815 struct vnode_attr va;
5816 kauth_filesec_t xsecdst;
5817
5818 AUDIT_ARG(owner, uap->uid, uap->gid);
5819
5820 VATTR_INIT(&va);
5821 if (uap->mode != -1)
5822 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
5823 if (uap->uid != KAUTH_UID_NONE)
5824 VATTR_SET(&va, va_uid, uap->uid);
5825 if (uap->gid != KAUTH_GID_NONE)
5826 VATTR_SET(&va, va_gid, uap->gid);
5827
5828 xsecdst = NULL;
5829 switch(uap->xsecurity) {
5830 /* explicit remove request */
5831 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
5832 VATTR_SET(&va, va_acl, NULL);
5833 break;
5834 /* not being set */
5835 case USER_ADDR_NULL:
5836 break;
5837 default:
5838 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
5839 return(error);
5840 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
5841 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va.va_acl->acl_entrycount);
5842 }
5843
5844 error = chmodat(vfs_context_current(), uap->path, &va, AT_FDCWD, 0,
5845 UIO_USERSPACE);
5846
5847 if (xsecdst != NULL)
5848 kauth_filesec_free(xsecdst);
5849 return(error);
5850 }
5851
5852 /*
5853 * Returns: 0 Success
5854 * chmodat:??? [anything chmodat can return]
5855 */
5856 static int
5857 fchmodat_internal(vfs_context_t ctx, user_addr_t path, int mode, int fd,
5858 int flag, enum uio_seg segflg)
5859 {
5860 struct vnode_attr va;
5861
5862 VATTR_INIT(&va);
5863 VATTR_SET(&va, va_mode, mode & ALLPERMS);
5864
5865 return (chmodat(ctx, path, &va, fd, flag, segflg));
5866 }
5867
5868 int
5869 chmod(__unused proc_t p, struct chmod_args *uap, __unused int32_t *retval)
5870 {
5871 return (fchmodat_internal(vfs_context_current(), uap->path, uap->mode,
5872 AT_FDCWD, 0, UIO_USERSPACE));
5873 }
5874
5875 int
5876 fchmodat(__unused proc_t p, struct fchmodat_args *uap, __unused int32_t *retval)
5877 {
5878 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
5879 return (EINVAL);
5880
5881 return (fchmodat_internal(vfs_context_current(), uap->path, uap->mode,
5882 uap->fd, uap->flag, UIO_USERSPACE));
5883 }
5884
5885 /*
5886 * Change mode of a file given a file descriptor.
5887 */
5888 static int
5889 fchmod1(__unused proc_t p, int fd, struct vnode_attr *vap)
5890 {
5891 vnode_t vp;
5892 int error;
5893
5894 AUDIT_ARG(fd, fd);
5895
5896 if ((error = file_vnode(fd, &vp)) != 0)
5897 return (error);
5898 if ((error = vnode_getwithref(vp)) != 0) {
5899 file_drop(fd);
5900 return(error);
5901 }
5902 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5903
5904 error = chmod_vnode(vfs_context_current(), vp, vap);
5905 (void)vnode_put(vp);
5906 file_drop(fd);
5907
5908 return (error);
5909 }
5910
5911 /*
5912 * fchmod_extended: Change mode of a file given a file descriptor; with
5913 * extended argument list (including extended security (ACL)).
5914 *
5915 * Parameters: p Process requesting to change file mode
5916 * uap User argument descriptor (see below)
5917 * retval (ignored)
5918 *
5919 * Indirect: uap->mode File mode to set (same as 'chmod')
5920 * uap->uid UID to set
5921 * uap->gid GID to set
5922 * uap->xsecurity ACL to set (or delete)
5923 * uap->fd File descriptor of file to change mode
5924 *
5925 * Returns: 0 Success
5926 * !0 errno value
5927 *
5928 */
5929 int
5930 fchmod_extended(proc_t p, struct fchmod_extended_args *uap, __unused int32_t *retval)
5931 {
5932 int error;
5933 struct vnode_attr va;
5934 kauth_filesec_t xsecdst;
5935
5936 AUDIT_ARG(owner, uap->uid, uap->gid);
5937
5938 VATTR_INIT(&va);
5939 if (uap->mode != -1)
5940 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
5941 if (uap->uid != KAUTH_UID_NONE)
5942 VATTR_SET(&va, va_uid, uap->uid);
5943 if (uap->gid != KAUTH_GID_NONE)
5944 VATTR_SET(&va, va_gid, uap->gid);
5945
5946 xsecdst = NULL;
5947 switch(uap->xsecurity) {
5948 case USER_ADDR_NULL:
5949 VATTR_SET(&va, va_acl, NULL);
5950 break;
5951 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
5952 VATTR_SET(&va, va_acl, NULL);
5953 break;
5954 /* not being set */
5955 case CAST_USER_ADDR_T(-1):
5956 break;
5957 default:
5958 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
5959 return(error);
5960 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
5961 }
5962
5963 error = fchmod1(p, uap->fd, &va);
5964
5965
5966 switch(uap->xsecurity) {
5967 case USER_ADDR_NULL:
5968 case CAST_USER_ADDR_T(-1):
5969 break;
5970 default:
5971 if (xsecdst != NULL)
5972 kauth_filesec_free(xsecdst);
5973 }
5974 return(error);
5975 }
5976
5977 int
5978 fchmod(proc_t p, struct fchmod_args *uap, __unused int32_t *retval)
5979 {
5980 struct vnode_attr va;
5981
5982 VATTR_INIT(&va);
5983 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
5984
5985 return(fchmod1(p, uap->fd, &va));
5986 }
5987
5988
5989 /*
5990 * Set ownership given a path name.
5991 */
5992 /* ARGSUSED */
5993 static int
5994 fchownat_internal(vfs_context_t ctx, int fd, user_addr_t path, uid_t uid,
5995 gid_t gid, int flag, enum uio_seg segflg)
5996 {
5997 vnode_t vp;
5998 struct vnode_attr va;
5999 int error;
6000 struct nameidata nd;
6001 int follow;
6002 kauth_action_t action;
6003
6004 AUDIT_ARG(owner, uid, gid);
6005
6006 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
6007 NDINIT(&nd, LOOKUP, OP_SETATTR, follow | AUDITVNPATH1, segflg,
6008 path, ctx);
6009 error = nameiat(&nd, fd);
6010 if (error)
6011 return (error);
6012 vp = nd.ni_vp;
6013
6014 nameidone(&nd);
6015
6016 VATTR_INIT(&va);
6017 if (uid != (uid_t)VNOVAL)
6018 VATTR_SET(&va, va_uid, uid);
6019 if (gid != (gid_t)VNOVAL)
6020 VATTR_SET(&va, va_gid, gid);
6021
6022 #if CONFIG_MACF
6023 error = mac_vnode_check_setowner(ctx, vp, uid, gid);
6024 if (error)
6025 goto out;
6026 #endif
6027
6028 /* preflight and authorize attribute changes */
6029 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
6030 goto out;
6031 if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
6032 goto out;
6033 error = vnode_setattr(vp, &va, ctx);
6034
6035 out:
6036 /*
6037 * EACCES is only allowed from namei(); permissions failure should
6038 * return EPERM, so we need to translate the error code.
6039 */
6040 if (error == EACCES)
6041 error = EPERM;
6042
6043 vnode_put(vp);
6044 return (error);
6045 }
6046
6047 int
6048 chown(__unused proc_t p, struct chown_args *uap, __unused int32_t *retval)
6049 {
6050 return (fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path,
6051 uap->uid, uap->gid, 0, UIO_USERSPACE));
6052 }
6053
6054 int
6055 lchown(__unused proc_t p, struct lchown_args *uap, __unused int32_t *retval)
6056 {
6057 return (fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path,
6058 uap->owner, uap->group, AT_SYMLINK_NOFOLLOW, UIO_USERSPACE));
6059 }
6060
6061 int
6062 fchownat(__unused proc_t p, struct fchownat_args *uap, __unused int32_t *retval)
6063 {
6064 if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
6065 return (EINVAL);
6066
6067 return (fchownat_internal(vfs_context_current(), uap->fd, uap->path,
6068 uap->uid, uap->gid, uap->flag, UIO_USERSPACE));
6069 }
6070
6071 /*
6072 * Set ownership given a file descriptor.
6073 */
6074 /* ARGSUSED */
6075 int
6076 fchown(__unused proc_t p, struct fchown_args *uap, __unused int32_t *retval)
6077 {
6078 struct vnode_attr va;
6079 vfs_context_t ctx = vfs_context_current();
6080 vnode_t vp;
6081 int error;
6082 kauth_action_t action;
6083
6084 AUDIT_ARG(owner, uap->uid, uap->gid);
6085 AUDIT_ARG(fd, uap->fd);
6086
6087 if ( (error = file_vnode(uap->fd, &vp)) )
6088 return (error);
6089
6090 if ( (error = vnode_getwithref(vp)) ) {
6091 file_drop(uap->fd);
6092 return(error);
6093 }
6094 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6095
6096 VATTR_INIT(&va);
6097 if (uap->uid != VNOVAL)
6098 VATTR_SET(&va, va_uid, uap->uid);
6099 if (uap->gid != VNOVAL)
6100 VATTR_SET(&va, va_gid, uap->gid);
6101
6102 #if NAMEDSTREAMS
6103 /* chown calls are not allowed for resource forks. */
6104 if (vp->v_flag & VISNAMEDSTREAM) {
6105 error = EPERM;
6106 goto out;
6107 }
6108 #endif
6109
6110 #if CONFIG_MACF
6111 error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
6112 if (error)
6113 goto out;
6114 #endif
6115
6116 /* preflight and authorize attribute changes */
6117 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
6118 goto out;
6119 if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
6120 if (error == EACCES)
6121 error = EPERM;
6122 goto out;
6123 }
6124 error = vnode_setattr(vp, &va, ctx);
6125
6126 out:
6127 (void)vnode_put(vp);
6128 file_drop(uap->fd);
6129 return (error);
6130 }
6131
6132 static int
6133 getutimes(user_addr_t usrtvp, struct timespec *tsp)
6134 {
6135 int error;
6136
6137 if (usrtvp == USER_ADDR_NULL) {
6138 struct timeval old_tv;
6139 /* XXX Y2038 bug because of microtime argument */
6140 microtime(&old_tv);
6141 TIMEVAL_TO_TIMESPEC(&old_tv, &tsp[0]);
6142 tsp[1] = tsp[0];
6143 } else {
6144 if (IS_64BIT_PROCESS(current_proc())) {
6145 struct user64_timeval tv[2];
6146 error = copyin(usrtvp, (void *)tv, sizeof(tv));
6147 if (error)
6148 return (error);
6149 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
6150 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
6151 } else {
6152 struct user32_timeval tv[2];
6153 error = copyin(usrtvp, (void *)tv, sizeof(tv));
6154 if (error)
6155 return (error);
6156 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
6157 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
6158 }
6159 }
6160 return 0;
6161 }
6162
6163 static int
6164 setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts,
6165 int nullflag)
6166 {
6167 int error;
6168 struct vnode_attr va;
6169 kauth_action_t action;
6170
6171 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6172
6173 VATTR_INIT(&va);
6174 VATTR_SET(&va, va_access_time, ts[0]);
6175 VATTR_SET(&va, va_modify_time, ts[1]);
6176 if (nullflag)
6177 va.va_vaflags |= VA_UTIMES_NULL;
6178
6179 #if NAMEDSTREAMS
6180 /* utimes calls are not allowed for resource forks. */
6181 if (vp->v_flag & VISNAMEDSTREAM) {
6182 error = EPERM;
6183 goto out;
6184 }
6185 #endif
6186
6187 #if CONFIG_MACF
6188 error = mac_vnode_check_setutimes(ctx, vp, ts[0], ts[1]);
6189 if (error)
6190 goto out;
6191 #endif
6192 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
6193 if (!nullflag && error == EACCES)
6194 error = EPERM;
6195 goto out;
6196 }
6197
6198 /* since we may not need to auth anything, check here */
6199 if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
6200 if (!nullflag && error == EACCES)
6201 error = EPERM;
6202 goto out;
6203 }
6204 error = vnode_setattr(vp, &va, ctx);
6205
6206 out:
6207 return error;
6208 }
6209
6210 /*
6211 * Set the access and modification times of a file.
6212 */
6213 /* ARGSUSED */
6214 int
6215 utimes(__unused proc_t p, struct utimes_args *uap, __unused int32_t *retval)
6216 {
6217 struct timespec ts[2];
6218 user_addr_t usrtvp;
6219 int error;
6220 struct nameidata nd;
6221 vfs_context_t ctx = vfs_context_current();
6222
6223 /*
6224 * AUDIT: Needed to change the order of operations to do the
6225 * name lookup first because auditing wants the path.
6226 */
6227 NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
6228 UIO_USERSPACE, uap->path, ctx);
6229 error = namei(&nd);
6230 if (error)
6231 return (error);
6232 nameidone(&nd);
6233
6234 /*
6235 * Fetch the user-supplied time. If usrtvp is USER_ADDR_NULL, we fetch
6236 * the current time instead.
6237 */
6238 usrtvp = uap->tptr;
6239 if ((error = getutimes(usrtvp, ts)) != 0)
6240 goto out;
6241
6242 error = setutimes(ctx, nd.ni_vp, ts, usrtvp == USER_ADDR_NULL);
6243
6244 out:
6245 vnode_put(nd.ni_vp);
6246 return (error);
6247 }
6248
6249 /*
6250 * Set the access and modification times of a file.
6251 */
6252 /* ARGSUSED */
6253 int
6254 futimes(__unused proc_t p, struct futimes_args *uap, __unused int32_t *retval)
6255 {
6256 struct timespec ts[2];
6257 vnode_t vp;
6258 user_addr_t usrtvp;
6259 int error;
6260
6261 AUDIT_ARG(fd, uap->fd);
6262 usrtvp = uap->tptr;
6263 if ((error = getutimes(usrtvp, ts)) != 0)
6264 return (error);
6265 if ((error = file_vnode(uap->fd, &vp)) != 0)
6266 return (error);
6267 if((error = vnode_getwithref(vp))) {
6268 file_drop(uap->fd);
6269 return(error);
6270 }
6271
6272 error = setutimes(vfs_context_current(), vp, ts, usrtvp == 0);
6273 vnode_put(vp);
6274 file_drop(uap->fd);
6275 return(error);
6276 }
6277
6278 /*
6279 * Truncate a file given its path name.
6280 */
6281 /* ARGSUSED */
6282 int
6283 truncate(__unused proc_t p, struct truncate_args *uap, __unused int32_t *retval)
6284 {
6285 vnode_t vp;
6286 struct vnode_attr va;
6287 vfs_context_t ctx = vfs_context_current();
6288 int error;
6289 struct nameidata nd;
6290 kauth_action_t action;
6291
6292 if (uap->length < 0)
6293 return(EINVAL);
6294 NDINIT(&nd, LOOKUP, OP_TRUNCATE, FOLLOW | AUDITVNPATH1,
6295 UIO_USERSPACE, uap->path, ctx);
6296 if ((error = namei(&nd)))
6297 return (error);
6298 vp = nd.ni_vp;
6299
6300 nameidone(&nd);
6301
6302 VATTR_INIT(&va);
6303 VATTR_SET(&va, va_data_size, uap->length);
6304
6305 #if CONFIG_MACF
6306 error = mac_vnode_check_truncate(ctx, NOCRED, vp);
6307 if (error)
6308 goto out;
6309 #endif
6310
6311 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
6312 goto out;
6313 if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
6314 goto out;
6315 error = vnode_setattr(vp, &va, ctx);
6316 out:
6317 vnode_put(vp);
6318 return (error);
6319 }
6320
6321 /*
6322 * Truncate a file given a file descriptor.
6323 */
6324 /* ARGSUSED */
6325 int
6326 ftruncate(proc_t p, struct ftruncate_args *uap, int32_t *retval)
6327 {
6328 vfs_context_t ctx = vfs_context_current();
6329 struct vnode_attr va;
6330 vnode_t vp;
6331 struct fileproc *fp;
6332 int error ;
6333 int fd = uap->fd;
6334
6335 AUDIT_ARG(fd, uap->fd);
6336 if (uap->length < 0)
6337 return(EINVAL);
6338
6339 if ( (error = fp_lookup(p,fd,&fp,0)) ) {
6340 return(error);
6341 }
6342
6343 switch (FILEGLOB_DTYPE(fp->f_fglob)) {
6344 case DTYPE_PSXSHM:
6345 error = pshm_truncate(p, fp, uap->fd, uap->length, retval);
6346 goto out;
6347 case DTYPE_VNODE:
6348 break;
6349 default:
6350 error = EINVAL;
6351 goto out;
6352 }
6353
6354 vp = (vnode_t)fp->f_fglob->fg_data;
6355
6356 if ((fp->f_fglob->fg_flag & FWRITE) == 0) {
6357 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
6358 error = EINVAL;
6359 goto out;
6360 }
6361
6362 if ((error = vnode_getwithref(vp)) != 0) {
6363 goto out;
6364 }
6365
6366 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6367
6368 #if CONFIG_MACF
6369 error = mac_vnode_check_truncate(ctx,
6370 fp->f_fglob->fg_cred, vp);
6371 if (error) {
6372 (void)vnode_put(vp);
6373 goto out;
6374 }
6375 #endif
6376 VATTR_INIT(&va);
6377 VATTR_SET(&va, va_data_size, uap->length);
6378 error = vnode_setattr(vp, &va, ctx);
6379 (void)vnode_put(vp);
6380 out:
6381 file_drop(fd);
6382 return (error);
6383 }
6384
6385
6386 /*
6387 * Sync an open file with synchronized I/O _file_ integrity completion
6388 */
6389 /* ARGSUSED */
6390 int
6391 fsync(proc_t p, struct fsync_args *uap, __unused int32_t *retval)
6392 {
6393 __pthread_testcancel(1);
6394 return(fsync_common(p, uap, MNT_WAIT));
6395 }
6396
6397
6398 /*
6399 * Sync an open file with synchronized I/O _file_ integrity completion
6400 *
6401 * Notes: This is a legacy support function that does not test for
6402 * thread cancellation points.
6403 */
6404 /* ARGSUSED */
6405 int
6406 fsync_nocancel(proc_t p, struct fsync_nocancel_args *uap, __unused int32_t *retval)
6407 {
6408 return(fsync_common(p, (struct fsync_args *)uap, MNT_WAIT));
6409 }
6410
6411
6412 /*
6413 * Sync an open file with synchronized I/O _data_ integrity completion
6414 */
6415 /* ARGSUSED */
6416 int
6417 fdatasync(proc_t p, struct fdatasync_args *uap, __unused int32_t *retval)
6418 {
6419 __pthread_testcancel(1);
6420 return(fsync_common(p, (struct fsync_args *)uap, MNT_DWAIT));
6421 }
6422
6423
6424 /*
6425 * fsync_common
6426 *
6427 * Common fsync code to support both synchronized I/O file integrity completion
6428 * (normal fsync) and synchronized I/O data integrity completion (fdatasync).
6429 *
6430 * If 'flags' is MNT_DWAIT, the caller is requesting data integrity, which
6431 * will only guarantee that the file data contents are retrievable. If
6432 * 'flags' is MNT_WAIT, the caller is rewuesting file integrity, which also
6433 * includes additional metadata unnecessary for retrieving the file data
6434 * contents, such as atime, mtime, ctime, etc., also be committed to stable
6435 * storage.
6436 *
6437 * Parameters: p The process
6438 * uap->fd The descriptor to synchronize
6439 * flags The data integrity flags
6440 *
6441 * Returns: int Success
6442 * fp_getfvp:EBADF Bad file descriptor
6443 * fp_getfvp:ENOTSUP fd does not refer to a vnode
6444 * VNOP_FSYNC:??? unspecified
6445 *
6446 * Notes: We use struct fsync_args because it is a short name, and all
6447 * caller argument structures are otherwise identical.
6448 */
6449 static int
6450 fsync_common(proc_t p, struct fsync_args *uap, int flags)
6451 {
6452 vnode_t vp;
6453 struct fileproc *fp;
6454 vfs_context_t ctx = vfs_context_current();
6455 int error;
6456
6457 AUDIT_ARG(fd, uap->fd);
6458
6459 if ( (error = fp_getfvp(p, uap->fd, &fp, &vp)) )
6460 return (error);
6461 if ( (error = vnode_getwithref(vp)) ) {
6462 file_drop(uap->fd);
6463 return(error);
6464 }
6465
6466 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6467
6468 error = VNOP_FSYNC(vp, flags, ctx);
6469
6470 #if NAMEDRSRCFORK
6471 /* Sync resource fork shadow file if necessary. */
6472 if ((error == 0) &&
6473 (vp->v_flag & VISNAMEDSTREAM) &&
6474 (vp->v_parent != NULLVP) &&
6475 vnode_isshadow(vp) &&
6476 (fp->f_flags & FP_WRITTEN)) {
6477 (void) vnode_flushnamedstream(vp->v_parent, vp, ctx);
6478 }
6479 #endif
6480
6481 (void)vnode_put(vp);
6482 file_drop(uap->fd);
6483 return (error);
6484 }
6485
6486 /*
6487 * Duplicate files. Source must be a file, target must be a file or
6488 * must not exist.
6489 *
6490 * XXX Copyfile authorisation checking is woefully inadequate, and will not
6491 * perform inheritance correctly.
6492 */
6493 /* ARGSUSED */
6494 int
6495 copyfile(__unused proc_t p, struct copyfile_args *uap, __unused int32_t *retval)
6496 {
6497 vnode_t tvp, fvp, tdvp, sdvp;
6498 struct nameidata fromnd, tond;
6499 int error;
6500 vfs_context_t ctx = vfs_context_current();
6501
6502 /* Check that the flags are valid. */
6503
6504 if (uap->flags & ~CPF_MASK) {
6505 return(EINVAL);
6506 }
6507
6508 NDINIT(&fromnd, LOOKUP, OP_COPYFILE, SAVESTART | AUDITVNPATH1,
6509 UIO_USERSPACE, uap->from, ctx);
6510 if ((error = namei(&fromnd)))
6511 return (error);
6512 fvp = fromnd.ni_vp;
6513
6514 NDINIT(&tond, CREATE, OP_LINK,
6515 LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNPATH2 | CN_NBMOUNTLOOK,
6516 UIO_USERSPACE, uap->to, ctx);
6517 if ((error = namei(&tond))) {
6518 goto out1;
6519 }
6520 tdvp = tond.ni_dvp;
6521 tvp = tond.ni_vp;
6522
6523 if (tvp != NULL) {
6524 if (!(uap->flags & CPF_OVERWRITE)) {
6525 error = EEXIST;
6526 goto out;
6527 }
6528 }
6529 if (fvp->v_type == VDIR || (tvp && tvp->v_type == VDIR)) {
6530 error = EISDIR;
6531 goto out;
6532 }
6533
6534 if ((error = vnode_authorize(tdvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
6535 goto out;
6536
6537 if (fvp == tdvp)
6538 error = EINVAL;
6539 /*
6540 * If source is the same as the destination (that is the
6541 * same inode number) then there is nothing to do.
6542 * (fixed to have POSIX semantics - CSM 3/2/98)
6543 */
6544 if (fvp == tvp)
6545 error = -1;
6546 if (!error)
6547 error = VNOP_COPYFILE(fvp, tdvp, tvp, &tond.ni_cnd, uap->mode, uap->flags, ctx);
6548 out:
6549 sdvp = tond.ni_startdir;
6550 /*
6551 * nameidone has to happen before we vnode_put(tdvp)
6552 * since it may need to release the fs_nodelock on the tdvp
6553 */
6554 nameidone(&tond);
6555
6556 if (tvp)
6557 vnode_put(tvp);
6558 vnode_put(tdvp);
6559 vnode_put(sdvp);
6560 out1:
6561 vnode_put(fvp);
6562
6563 if (fromnd.ni_startdir)
6564 vnode_put(fromnd.ni_startdir);
6565 nameidone(&fromnd);
6566
6567 if (error == -1)
6568 return (0);
6569 return (error);
6570 }
6571
6572
6573 /*
6574 * Rename files. Source and destination must either both be directories,
6575 * or both not be directories. If target is a directory, it must be empty.
6576 */
6577 /* ARGSUSED */
6578 static int
6579 renameat_internal(vfs_context_t ctx, int fromfd, user_addr_t from,
6580 int tofd, user_addr_t to, int segflg, vfs_rename_flags_t flags)
6581 {
6582 vnode_t tvp, tdvp;
6583 vnode_t fvp, fdvp;
6584 struct nameidata *fromnd, *tond;
6585 int error;
6586 int do_retry;
6587 int mntrename;
6588 int need_event;
6589 const char *oname = NULL;
6590 char *from_name = NULL, *to_name = NULL;
6591 int from_len=0, to_len=0;
6592 int holding_mntlock;
6593 mount_t locked_mp = NULL;
6594 vnode_t oparent = NULLVP;
6595 #if CONFIG_FSE
6596 fse_info from_finfo, to_finfo;
6597 #endif
6598 int from_truncated=0, to_truncated;
6599 int batched = 0;
6600 struct vnode_attr *fvap, *tvap;
6601 int continuing = 0;
6602 /* carving out a chunk for structs that are too big to be on stack. */
6603 struct {
6604 struct nameidata from_node, to_node;
6605 struct vnode_attr fv_attr, tv_attr;
6606 } * __rename_data;
6607 MALLOC(__rename_data, void *, sizeof(*__rename_data), M_TEMP, M_WAITOK);
6608 fromnd = &__rename_data->from_node;
6609 tond = &__rename_data->to_node;
6610
6611 holding_mntlock = 0;
6612 do_retry = 0;
6613 retry:
6614 fvp = tvp = NULL;
6615 fdvp = tdvp = NULL;
6616 fvap = tvap = NULL;
6617 mntrename = FALSE;
6618
6619 NDINIT(fromnd, DELETE, OP_UNLINK, WANTPARENT | AUDITVNPATH1,
6620 segflg, from, ctx);
6621 fromnd->ni_flag = NAMEI_COMPOUNDRENAME;
6622
6623 NDINIT(tond, RENAME, OP_RENAME, WANTPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK,
6624 segflg, to, ctx);
6625 tond->ni_flag = NAMEI_COMPOUNDRENAME;
6626
6627 continue_lookup:
6628 if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
6629 if ( (error = nameiat(fromnd, fromfd)) )
6630 goto out1;
6631 fdvp = fromnd->ni_dvp;
6632 fvp = fromnd->ni_vp;
6633
6634 if (fvp && fvp->v_type == VDIR)
6635 tond->ni_cnd.cn_flags |= WILLBEDIR;
6636 }
6637
6638 if ((tond->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) {
6639 if ( (error = nameiat(tond, tofd)) ) {
6640 /*
6641 * Translate error code for rename("dir1", "dir2/.").
6642 */
6643 if (error == EISDIR && fvp->v_type == VDIR)
6644 error = EINVAL;
6645 goto out1;
6646 }
6647 tdvp = tond->ni_dvp;
6648 tvp = tond->ni_vp;
6649 }
6650
6651 batched = vnode_compound_rename_available(fdvp);
6652 if (!fvp) {
6653 /*
6654 * Claim: this check will never reject a valid rename.
6655 * For success, either fvp must be on the same mount as tdvp, or fvp must sit atop a vnode on the same mount as tdvp.
6656 * Suppose fdvp and tdvp are not on the same mount.
6657 * If fvp is on the same mount as tdvp, then fvp is not on the same mount as fdvp, so fvp is the root of its filesystem. If fvp is the root,
6658 * then you can't move it to within another dir on the same mountpoint.
6659 * If fvp sits atop a vnode on the same mount as fdvp, then that vnode must be part of the same mount as fdvp, which is a contradiction.
6660 *
6661 * If this check passes, then we are safe to pass these vnodes to the same FS.
6662 */
6663 if (fdvp->v_mount != tdvp->v_mount) {
6664 error = EXDEV;
6665 goto out1;
6666 }
6667 goto skipped_lookup;
6668 }
6669
6670 if (!batched) {
6671 error = vn_authorize_rename(fdvp, fvp, &fromnd->ni_cnd, tdvp, tvp, &tond->ni_cnd, ctx, NULL);
6672 if (error) {
6673 if (error == ENOENT) {
6674 /*
6675 * We encountered a race where after doing the namei, tvp stops
6676 * being valid. If so, simply re-drive the rename call from the
6677 * top.
6678 */
6679 do_retry = 1;
6680 }
6681 goto out1;
6682 }
6683 }
6684
6685 /*
6686 * If the source and destination are the same (i.e. they're
6687 * links to the same vnode) and the target file system is
6688 * case sensitive, then there is nothing to do.
6689 *
6690 * XXX Come back to this.
6691 */
6692 if (fvp == tvp) {
6693 int pathconf_val;
6694
6695 /*
6696 * Note: if _PC_CASE_SENSITIVE selector isn't supported,
6697 * then assume that this file system is case sensitive.
6698 */
6699 if (VNOP_PATHCONF(fvp, _PC_CASE_SENSITIVE, &pathconf_val, ctx) != 0 ||
6700 pathconf_val != 0) {
6701 goto out1;
6702 }
6703 }
6704
6705 /*
6706 * Allow the renaming of mount points.
6707 * - target must not exist
6708 * - target must reside in the same directory as source
6709 * - union mounts cannot be renamed
6710 * - "/" cannot be renamed
6711 *
6712 * XXX Handle this in VFS after a continued lookup (if we missed
6713 * in the cache to start off)
6714 */
6715 if ((fvp->v_flag & VROOT) &&
6716 (fvp->v_type == VDIR) &&
6717 (tvp == NULL) &&
6718 (fvp->v_mountedhere == NULL) &&
6719 (fdvp == tdvp) &&
6720 ((fvp->v_mount->mnt_flag & (MNT_UNION | MNT_ROOTFS)) == 0) &&
6721 (fvp->v_mount->mnt_vnodecovered != NULLVP)) {
6722 vnode_t coveredvp;
6723
6724 /* switch fvp to the covered vnode */
6725 coveredvp = fvp->v_mount->mnt_vnodecovered;
6726 if ( (vnode_getwithref(coveredvp)) ) {
6727 error = ENOENT;
6728 goto out1;
6729 }
6730 vnode_put(fvp);
6731
6732 fvp = coveredvp;
6733 mntrename = TRUE;
6734 }
6735 /*
6736 * Check for cross-device rename.
6737 */
6738 if ((fvp->v_mount != tdvp->v_mount) ||
6739 (tvp && (fvp->v_mount != tvp->v_mount))) {
6740 error = EXDEV;
6741 goto out1;
6742 }
6743
6744 /*
6745 * If source is the same as the destination (that is the
6746 * same inode number) then there is nothing to do...
6747 * EXCEPT if the underlying file system supports case
6748 * insensitivity and is case preserving. In this case
6749 * the file system needs to handle the special case of
6750 * getting the same vnode as target (fvp) and source (tvp).
6751 *
6752 * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
6753 * and _PC_CASE_PRESERVING can have this exception, and they need to
6754 * handle the special case of getting the same vnode as target and
6755 * source. NOTE: Then the target is unlocked going into vnop_rename,
6756 * so not to cause locking problems. There is a single reference on tvp.
6757 *
6758 * NOTE - that fvp == tvp also occurs if they are hard linked and
6759 * that correct behaviour then is just to return success without doing
6760 * anything.
6761 *
6762 * XXX filesystem should take care of this itself, perhaps...
6763 */
6764 if (fvp == tvp && fdvp == tdvp) {
6765 if (fromnd->ni_cnd.cn_namelen == tond->ni_cnd.cn_namelen &&
6766 !bcmp(fromnd->ni_cnd.cn_nameptr, tond->ni_cnd.cn_nameptr,
6767 fromnd->ni_cnd.cn_namelen)) {
6768 goto out1;
6769 }
6770 }
6771
6772 if (holding_mntlock && fvp->v_mount != locked_mp) {
6773 /*
6774 * we're holding a reference and lock
6775 * on locked_mp, but it no longer matches
6776 * what we want to do... so drop our hold
6777 */
6778 mount_unlock_renames(locked_mp);
6779 mount_drop(locked_mp, 0);
6780 holding_mntlock = 0;
6781 }
6782 if (tdvp != fdvp && fvp->v_type == VDIR) {
6783 /*
6784 * serialize renames that re-shape
6785 * the tree... if holding_mntlock is
6786 * set, then we're ready to go...
6787 * otherwise we
6788 * first need to drop the iocounts
6789 * we picked up, second take the
6790 * lock to serialize the access,
6791 * then finally start the lookup
6792 * process over with the lock held
6793 */
6794 if (!holding_mntlock) {
6795 /*
6796 * need to grab a reference on
6797 * the mount point before we
6798 * drop all the iocounts... once
6799 * the iocounts are gone, the mount
6800 * could follow
6801 */
6802 locked_mp = fvp->v_mount;
6803 mount_ref(locked_mp, 0);
6804
6805 /*
6806 * nameidone has to happen before we vnode_put(tvp)
6807 * since it may need to release the fs_nodelock on the tvp
6808 */
6809 nameidone(tond);
6810
6811 if (tvp)
6812 vnode_put(tvp);
6813 vnode_put(tdvp);
6814
6815 /*
6816 * nameidone has to happen before we vnode_put(fdvp)
6817 * since it may need to release the fs_nodelock on the fvp
6818 */
6819 nameidone(fromnd);
6820
6821 vnode_put(fvp);
6822 vnode_put(fdvp);
6823
6824 mount_lock_renames(locked_mp);
6825 holding_mntlock = 1;
6826
6827 goto retry;
6828 }
6829 } else {
6830 /*
6831 * when we dropped the iocounts to take
6832 * the lock, we allowed the identity of
6833 * the various vnodes to change... if they did,
6834 * we may no longer be dealing with a rename
6835 * that reshapes the tree... once we're holding
6836 * the iocounts, the vnodes can't change type
6837 * so we're free to drop the lock at this point
6838 * and continue on
6839 */
6840 if (holding_mntlock) {
6841 mount_unlock_renames(locked_mp);
6842 mount_drop(locked_mp, 0);
6843 holding_mntlock = 0;
6844 }
6845 }
6846
6847 // save these off so we can later verify that fvp is the same
6848 oname = fvp->v_name;
6849 oparent = fvp->v_parent;
6850
6851 skipped_lookup:
6852 #if CONFIG_FSE
6853 need_event = need_fsevent(FSE_RENAME, fdvp);
6854 if (need_event) {
6855 if (fvp) {
6856 get_fse_info(fvp, &from_finfo, ctx);
6857 } else {
6858 error = vfs_get_notify_attributes(&__rename_data->fv_attr);
6859 if (error) {
6860 goto out1;
6861 }
6862
6863 fvap = &__rename_data->fv_attr;
6864 }
6865
6866 if (tvp) {
6867 get_fse_info(tvp, &to_finfo, ctx);
6868 } else if (batched) {
6869 error = vfs_get_notify_attributes(&__rename_data->tv_attr);
6870 if (error) {
6871 goto out1;
6872 }
6873
6874 tvap = &__rename_data->tv_attr;
6875 }
6876 }
6877 #else
6878 need_event = 0;
6879 #endif /* CONFIG_FSE */
6880
6881 if (need_event || kauth_authorize_fileop_has_listeners()) {
6882 if (from_name == NULL) {
6883 GET_PATH(from_name);
6884 if (from_name == NULL) {
6885 error = ENOMEM;
6886 goto out1;
6887 }
6888 }
6889
6890 from_len = safe_getpath(fdvp, fromnd->ni_cnd.cn_nameptr, from_name, MAXPATHLEN, &from_truncated);
6891
6892 if (to_name == NULL) {
6893 GET_PATH(to_name);
6894 if (to_name == NULL) {
6895 error = ENOMEM;
6896 goto out1;
6897 }
6898 }
6899
6900 to_len = safe_getpath(tdvp, tond->ni_cnd.cn_nameptr, to_name, MAXPATHLEN, &to_truncated);
6901 }
6902 #if CONFIG_SECLUDED_RENAME
6903 if (flags & VFS_SECLUDE_RENAME) {
6904 fromnd->ni_cnd.cn_flags |= CN_SECLUDE_RENAME;
6905 }
6906 #else
6907 #pragma unused(flags)
6908 #endif
6909 error = vn_rename(fdvp, &fvp, &fromnd->ni_cnd, fvap,
6910 tdvp, &tvp, &tond->ni_cnd, tvap,
6911 0, ctx);
6912
6913 if (holding_mntlock) {
6914 /*
6915 * we can drop our serialization
6916 * lock now
6917 */
6918 mount_unlock_renames(locked_mp);
6919 mount_drop(locked_mp, 0);
6920 holding_mntlock = 0;
6921 }
6922 if (error) {
6923 if (error == EKEEPLOOKING) {
6924 if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) == 0) {
6925 if ((tond->ni_flag & NAMEI_CONTLOOKUP) == 0) {
6926 panic("EKEEPLOOKING without NAMEI_CONTLOOKUP on either ndp?");
6927 }
6928 }
6929
6930 fromnd->ni_vp = fvp;
6931 tond->ni_vp = tvp;
6932
6933 goto continue_lookup;
6934 }
6935
6936 /*
6937 * We may encounter a race in the VNOP where the destination didn't
6938 * exist when we did the namei, but it does by the time we go and
6939 * try to create the entry. In this case, we should re-drive this rename
6940 * call from the top again. Currently, only HFS bubbles out ERECYCLE,
6941 * but other filesystems susceptible to this race could return it, too.
6942 */
6943 if (error == ERECYCLE) {
6944 do_retry = 1;
6945 }
6946
6947 goto out1;
6948 }
6949
6950 /* call out to allow 3rd party notification of rename.
6951 * Ignore result of kauth_authorize_fileop call.
6952 */
6953 kauth_authorize_fileop(vfs_context_ucred(ctx),
6954 KAUTH_FILEOP_RENAME,
6955 (uintptr_t)from_name, (uintptr_t)to_name);
6956
6957 #if CONFIG_FSE
6958 if (from_name != NULL && to_name != NULL) {
6959 if (from_truncated || to_truncated) {
6960 // set it here since only the from_finfo gets reported up to user space
6961 from_finfo.mode |= FSE_TRUNCATED_PATH;
6962 }
6963
6964 if (tvap && tvp) {
6965 vnode_get_fse_info_from_vap(tvp, &to_finfo, tvap);
6966 }
6967 if (fvap) {
6968 vnode_get_fse_info_from_vap(fvp, &from_finfo, fvap);
6969 }
6970
6971 if (tvp) {
6972 add_fsevent(FSE_RENAME, ctx,
6973 FSE_ARG_STRING, from_len, from_name,
6974 FSE_ARG_FINFO, &from_finfo,
6975 FSE_ARG_STRING, to_len, to_name,
6976 FSE_ARG_FINFO, &to_finfo,
6977 FSE_ARG_DONE);
6978 } else {
6979 add_fsevent(FSE_RENAME, ctx,
6980 FSE_ARG_STRING, from_len, from_name,
6981 FSE_ARG_FINFO, &from_finfo,
6982 FSE_ARG_STRING, to_len, to_name,
6983 FSE_ARG_DONE);
6984 }
6985 }
6986 #endif /* CONFIG_FSE */
6987
6988 /*
6989 * update filesystem's mount point data
6990 */
6991 if (mntrename) {
6992 char *cp, *pathend, *mpname;
6993 char * tobuf;
6994 struct mount *mp;
6995 int maxlen;
6996 size_t len = 0;
6997
6998 mp = fvp->v_mountedhere;
6999
7000 if (vfs_busy(mp, LK_NOWAIT)) {
7001 error = EBUSY;
7002 goto out1;
7003 }
7004 MALLOC_ZONE(tobuf, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
7005
7006 if (UIO_SEG_IS_USER_SPACE(segflg))
7007 error = copyinstr(to, tobuf, MAXPATHLEN, &len);
7008 else
7009 error = copystr((void *)to, tobuf, MAXPATHLEN, &len);
7010 if (!error) {
7011 /* find current mount point prefix */
7012 pathend = &mp->mnt_vfsstat.f_mntonname[0];
7013 for (cp = pathend; *cp != '\0'; ++cp) {
7014 if (*cp == '/')
7015 pathend = cp + 1;
7016 }
7017 /* find last component of target name */
7018 for (mpname = cp = tobuf; *cp != '\0'; ++cp) {
7019 if (*cp == '/')
7020 mpname = cp + 1;
7021 }
7022 /* append name to prefix */
7023 maxlen = MAXPATHLEN - (pathend - mp->mnt_vfsstat.f_mntonname);
7024 bzero(pathend, maxlen);
7025 strlcpy(pathend, mpname, maxlen);
7026 }
7027 FREE_ZONE(tobuf, MAXPATHLEN, M_NAMEI);
7028
7029 vfs_unbusy(mp);
7030 }
7031 /*
7032 * fix up name & parent pointers. note that we first
7033 * check that fvp has the same name/parent pointers it
7034 * had before the rename call... this is a 'weak' check
7035 * at best...
7036 *
7037 * XXX oparent and oname may not be set in the compound vnop case
7038 */
7039 if (batched || (oname == fvp->v_name && oparent == fvp->v_parent)) {
7040 int update_flags;
7041
7042 update_flags = VNODE_UPDATE_NAME;
7043
7044 if (fdvp != tdvp)
7045 update_flags |= VNODE_UPDATE_PARENT;
7046
7047 vnode_update_identity(fvp, tdvp, tond->ni_cnd.cn_nameptr, tond->ni_cnd.cn_namelen, tond->ni_cnd.cn_hash, update_flags);
7048 }
7049 out1:
7050 if (to_name != NULL) {
7051 RELEASE_PATH(to_name);
7052 to_name = NULL;
7053 }
7054 if (from_name != NULL) {
7055 RELEASE_PATH(from_name);
7056 from_name = NULL;
7057 }
7058 if (holding_mntlock) {
7059 mount_unlock_renames(locked_mp);
7060 mount_drop(locked_mp, 0);
7061 holding_mntlock = 0;
7062 }
7063 if (tdvp) {
7064 /*
7065 * nameidone has to happen before we vnode_put(tdvp)
7066 * since it may need to release the fs_nodelock on the tdvp
7067 */
7068 nameidone(tond);
7069
7070 if (tvp)
7071 vnode_put(tvp);
7072 vnode_put(tdvp);
7073 }
7074 if (fdvp) {
7075 /*
7076 * nameidone has to happen before we vnode_put(fdvp)
7077 * since it may need to release the fs_nodelock on the fdvp
7078 */
7079 nameidone(fromnd);
7080
7081 if (fvp)
7082 vnode_put(fvp);
7083 vnode_put(fdvp);
7084 }
7085
7086 /*
7087 * If things changed after we did the namei, then we will re-drive
7088 * this rename call from the top.
7089 */
7090 if (do_retry) {
7091 do_retry = 0;
7092 goto retry;
7093 }
7094
7095 FREE(__rename_data, M_TEMP);
7096 return (error);
7097 }
7098
7099 int
7100 rename(__unused proc_t p, struct rename_args *uap, __unused int32_t *retval)
7101 {
7102 return (renameat_internal(vfs_context_current(), AT_FDCWD, uap->from,
7103 AT_FDCWD, uap->to, UIO_USERSPACE, 0));
7104 }
7105
7106 #if CONFIG_SECLUDED_RENAME
7107 int rename_ext(__unused proc_t p, struct rename_ext_args *uap, __unused int32_t *retval)
7108 {
7109 return renameat_internal(
7110 vfs_context_current(),
7111 AT_FDCWD, uap->from,
7112 AT_FDCWD, uap->to,
7113 UIO_USERSPACE, uap->flags);
7114 }
7115 #endif
7116
7117 int
7118 renameat(__unused proc_t p, struct renameat_args *uap, __unused int32_t *retval)
7119 {
7120 return (renameat_internal(vfs_context_current(), uap->fromfd, uap->from,
7121 uap->tofd, uap->to, UIO_USERSPACE, 0));
7122 }
7123
7124 /*
7125 * Make a directory file.
7126 *
7127 * Returns: 0 Success
7128 * EEXIST
7129 * namei:???
7130 * vnode_authorize:???
7131 * vn_create:???
7132 */
7133 /* ARGSUSED */
7134 static int
7135 mkdir1at(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap, int fd,
7136 enum uio_seg segflg)
7137 {
7138 vnode_t vp, dvp;
7139 int error;
7140 int update_flags = 0;
7141 int batched;
7142 struct nameidata nd;
7143
7144 AUDIT_ARG(mode, vap->va_mode);
7145 NDINIT(&nd, CREATE, OP_MKDIR, LOCKPARENT | AUDITVNPATH1, segflg,
7146 path, ctx);
7147 nd.ni_cnd.cn_flags |= WILLBEDIR;
7148 nd.ni_flag = NAMEI_COMPOUNDMKDIR;
7149
7150 continue_lookup:
7151 error = nameiat(&nd, fd);
7152 if (error)
7153 return (error);
7154 dvp = nd.ni_dvp;
7155 vp = nd.ni_vp;
7156
7157 if (vp != NULL) {
7158 error = EEXIST;
7159 goto out;
7160 }
7161
7162 batched = vnode_compound_mkdir_available(dvp);
7163
7164 VATTR_SET(vap, va_type, VDIR);
7165
7166 /*
7167 * XXX
7168 * Don't authorize in VFS for compound VNOP.... mkdir -p today assumes that it will
7169 * only get EXISTS or EISDIR for existing path components, and not that it could see
7170 * EACCESS/EPERM--so if we authorize for mkdir on "/" for "mkdir -p /tmp/foo/bar/baz"
7171 * it will fail in a spurious manner. Need to figure out if this is valid behavior.
7172 */
7173 if ((error = vn_authorize_mkdir(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0) {
7174 if (error == EACCES || error == EPERM) {
7175 int error2;
7176
7177 nameidone(&nd);
7178 vnode_put(dvp);
7179 dvp = NULLVP;
7180
7181 /*
7182 * Try a lookup without "NAMEI_COMPOUNDVNOP" to make sure we return EEXIST
7183 * rather than EACCESS if the target exists.
7184 */
7185 NDINIT(&nd, LOOKUP, OP_MKDIR, AUDITVNPATH1, segflg,
7186 path, ctx);
7187 error2 = nameiat(&nd, fd);
7188 if (error2) {
7189 goto out;
7190 } else {
7191 vp = nd.ni_vp;
7192 error = EEXIST;
7193 goto out;
7194 }
7195 }
7196
7197 goto out;
7198 }
7199
7200 /*
7201 * make the directory
7202 */
7203 if ((error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx)) != 0) {
7204 if (error == EKEEPLOOKING) {
7205 nd.ni_vp = vp;
7206 goto continue_lookup;
7207 }
7208
7209 goto out;
7210 }
7211
7212 // Make sure the name & parent pointers are hooked up
7213 if (vp->v_name == NULL)
7214 update_flags |= VNODE_UPDATE_NAME;
7215 if (vp->v_parent == NULLVP)
7216 update_flags |= VNODE_UPDATE_PARENT;
7217
7218 if (update_flags)
7219 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
7220
7221 #if CONFIG_FSE
7222 add_fsevent(FSE_CREATE_DIR, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE);
7223 #endif
7224
7225 out:
7226 /*
7227 * nameidone has to happen before we vnode_put(dvp)
7228 * since it may need to release the fs_nodelock on the dvp
7229 */
7230 nameidone(&nd);
7231
7232 if (vp)
7233 vnode_put(vp);
7234 if (dvp)
7235 vnode_put(dvp);
7236
7237 return (error);
7238 }
7239
7240 /*
7241 * mkdir_extended: Create a directory; with extended security (ACL).
7242 *
7243 * Parameters: p Process requesting to create the directory
7244 * uap User argument descriptor (see below)
7245 * retval (ignored)
7246 *
7247 * Indirect: uap->path Path of directory to create
7248 * uap->mode Access permissions to set
7249 * uap->xsecurity ACL to set
7250 *
7251 * Returns: 0 Success
7252 * !0 Not success
7253 *
7254 */
7255 int
7256 mkdir_extended(proc_t p, struct mkdir_extended_args *uap, __unused int32_t *retval)
7257 {
7258 int ciferror;
7259 kauth_filesec_t xsecdst;
7260 struct vnode_attr va;
7261
7262 AUDIT_ARG(owner, uap->uid, uap->gid);
7263
7264 xsecdst = NULL;
7265 if ((uap->xsecurity != USER_ADDR_NULL) &&
7266 ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
7267 return ciferror;
7268
7269 VATTR_INIT(&va);
7270 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
7271 if (xsecdst != NULL)
7272 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
7273
7274 ciferror = mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD,
7275 UIO_USERSPACE);
7276 if (xsecdst != NULL)
7277 kauth_filesec_free(xsecdst);
7278 return ciferror;
7279 }
7280
7281 int
7282 mkdir(proc_t p, struct mkdir_args *uap, __unused int32_t *retval)
7283 {
7284 struct vnode_attr va;
7285
7286 VATTR_INIT(&va);
7287 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
7288
7289 return (mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD,
7290 UIO_USERSPACE));
7291 }
7292
7293 int
7294 mkdirat(proc_t p, struct mkdirat_args *uap, __unused int32_t *retval)
7295 {
7296 struct vnode_attr va;
7297
7298 VATTR_INIT(&va);
7299 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
7300
7301 return(mkdir1at(vfs_context_current(), uap->path, &va, uap->fd,
7302 UIO_USERSPACE));
7303 }
7304
7305 static int
7306 rmdirat_internal(vfs_context_t ctx, int fd, user_addr_t dirpath,
7307 enum uio_seg segflg)
7308 {
7309 vnode_t vp, dvp;
7310 int error;
7311 struct nameidata nd;
7312 char *path = NULL;
7313 int len=0;
7314 int has_listeners = 0;
7315 int need_event = 0;
7316 int truncated = 0;
7317 #if CONFIG_FSE
7318 struct vnode_attr va;
7319 #endif /* CONFIG_FSE */
7320 struct vnode_attr *vap = NULL;
7321 int batched;
7322
7323 int restart_flag;
7324
7325 /*
7326 * This loop exists to restart rmdir in the unlikely case that two
7327 * processes are simultaneously trying to remove the same directory
7328 * containing orphaned appleDouble files.
7329 */
7330 do {
7331 NDINIT(&nd, DELETE, OP_RMDIR, LOCKPARENT | AUDITVNPATH1,
7332 segflg, dirpath, ctx);
7333 nd.ni_flag = NAMEI_COMPOUNDRMDIR;
7334 continue_lookup:
7335 restart_flag = 0;
7336 vap = NULL;
7337
7338 error = nameiat(&nd, fd);
7339 if (error)
7340 return (error);
7341
7342 dvp = nd.ni_dvp;
7343 vp = nd.ni_vp;
7344
7345 if (vp) {
7346 batched = vnode_compound_rmdir_available(vp);
7347
7348 if (vp->v_flag & VROOT) {
7349 /*
7350 * The root of a mounted filesystem cannot be deleted.
7351 */
7352 error = EBUSY;
7353 goto out;
7354 }
7355
7356 /*
7357 * Removed a check here; we used to abort if vp's vid
7358 * was not the same as what we'd seen the last time around.
7359 * I do not think that check was valid, because if we retry
7360 * and all dirents are gone, the directory could legitimately
7361 * be recycled but still be present in a situation where we would
7362 * have had permission to delete. Therefore, we won't make
7363 * an effort to preserve that check now that we may not have a
7364 * vp here.
7365 */
7366
7367 if (!batched) {
7368 error = vn_authorize_rmdir(dvp, vp, &nd.ni_cnd, ctx, NULL);
7369 if (error) {
7370 goto out;
7371 }
7372 }
7373 } else {
7374 batched = 1;
7375
7376 if (!vnode_compound_rmdir_available(dvp)) {
7377 panic("No error, but no compound rmdir?");
7378 }
7379 }
7380
7381 #if CONFIG_FSE
7382 fse_info finfo;
7383
7384 need_event = need_fsevent(FSE_DELETE, dvp);
7385 if (need_event) {
7386 if (!batched) {
7387 get_fse_info(vp, &finfo, ctx);
7388 } else {
7389 error = vfs_get_notify_attributes(&va);
7390 if (error) {
7391 goto out;
7392 }
7393
7394 vap = &va;
7395 }
7396 }
7397 #endif
7398 has_listeners = kauth_authorize_fileop_has_listeners();
7399 if (need_event || has_listeners) {
7400 if (path == NULL) {
7401 GET_PATH(path);
7402 if (path == NULL) {
7403 error = ENOMEM;
7404 goto out;
7405 }
7406 }
7407
7408 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated);
7409 #if CONFIG_FSE
7410 if (truncated) {
7411 finfo.mode |= FSE_TRUNCATED_PATH;
7412 }
7413 #endif
7414 }
7415
7416 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
7417 nd.ni_vp = vp;
7418 if (vp == NULLVP) {
7419 /* Couldn't find a vnode */
7420 goto out;
7421 }
7422
7423 if (error == EKEEPLOOKING) {
7424 goto continue_lookup;
7425 }
7426 #if CONFIG_APPLEDOUBLE
7427 /*
7428 * Special case to remove orphaned AppleDouble
7429 * files. I don't like putting this in the kernel,
7430 * but carbon does not like putting this in carbon either,
7431 * so here we are.
7432 */
7433 if (error == ENOTEMPTY) {
7434 error = rmdir_remove_orphaned_appleDouble(vp, ctx, &restart_flag);
7435 if (error == EBUSY) {
7436 goto out;
7437 }
7438
7439
7440 /*
7441 * Assuming everything went well, we will try the RMDIR again
7442 */
7443 if (!error)
7444 error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
7445 }
7446 #endif /* CONFIG_APPLEDOUBLE */
7447 /*
7448 * Call out to allow 3rd party notification of delete.
7449 * Ignore result of kauth_authorize_fileop call.
7450 */
7451 if (!error) {
7452 if (has_listeners) {
7453 kauth_authorize_fileop(vfs_context_ucred(ctx),
7454 KAUTH_FILEOP_DELETE,
7455 (uintptr_t)vp,
7456 (uintptr_t)path);
7457 }
7458
7459 if (vp->v_flag & VISHARDLINK) {
7460 // see the comment in unlink1() about why we update
7461 // the parent of a hard link when it is removed
7462 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
7463 }
7464
7465 #if CONFIG_FSE
7466 if (need_event) {
7467 if (vap) {
7468 vnode_get_fse_info_from_vap(vp, &finfo, vap);
7469 }
7470 add_fsevent(FSE_DELETE, ctx,
7471 FSE_ARG_STRING, len, path,
7472 FSE_ARG_FINFO, &finfo,
7473 FSE_ARG_DONE);
7474 }
7475 #endif
7476 }
7477
7478 out:
7479 if (path != NULL) {
7480 RELEASE_PATH(path);
7481 path = NULL;
7482 }
7483 /*
7484 * nameidone has to happen before we vnode_put(dvp)
7485 * since it may need to release the fs_nodelock on the dvp
7486 */
7487 nameidone(&nd);
7488 vnode_put(dvp);
7489
7490 if (vp)
7491 vnode_put(vp);
7492
7493 if (restart_flag == 0) {
7494 wakeup_one((caddr_t)vp);
7495 return (error);
7496 }
7497 tsleep(vp, PVFS, "rm AD", 1);
7498
7499 } while (restart_flag != 0);
7500
7501 return (error);
7502
7503 }
7504
7505 /*
7506 * Remove a directory file.
7507 */
7508 /* ARGSUSED */
7509 int
7510 rmdir(__unused proc_t p, struct rmdir_args *uap, __unused int32_t *retval)
7511 {
7512 return (rmdirat_internal(vfs_context_current(), AT_FDCWD,
7513 CAST_USER_ADDR_T(uap->path), UIO_USERSPACE));
7514 }
7515
7516 /* Get direntry length padded to 8 byte alignment */
7517 #define DIRENT64_LEN(namlen) \
7518 ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
7519
7520 errno_t
7521 vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag,
7522 int *numdirent, vfs_context_t ctxp)
7523 {
7524 /* Check if fs natively supports VNODE_READDIR_EXTENDED */
7525 if ((vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED) &&
7526 ((vp->v_mount->mnt_kern_flag & MNTK_DENY_READDIREXT) == 0)) {
7527 return VNOP_READDIR(vp, uio, flags, eofflag, numdirent, ctxp);
7528 } else {
7529 size_t bufsize;
7530 void * bufptr;
7531 uio_t auio;
7532 struct direntry *entry64;
7533 struct dirent *dep;
7534 int bytesread;
7535 int error;
7536
7537 /*
7538 * Our kernel buffer needs to be smaller since re-packing
7539 * will expand each dirent. The worse case (when the name
7540 * length is 3) corresponds to a struct direntry size of 32
7541 * bytes (8-byte aligned) and a struct dirent size of 12 bytes
7542 * (4-byte aligned). So having a buffer that is 3/8 the size
7543 * will prevent us from reading more than we can pack.
7544 *
7545 * Since this buffer is wired memory, we will limit the
7546 * buffer size to a maximum of 32K. We would really like to
7547 * use 32K in the MIN(), but we use magic number 87371 to
7548 * prevent uio_resid() * 3 / 8 from overflowing.
7549 */
7550 bufsize = 3 * MIN((user_size_t)uio_resid(uio), 87371u) / 8;
7551 MALLOC(bufptr, void *, bufsize, M_TEMP, M_WAITOK);
7552 if (bufptr == NULL) {
7553 return ENOMEM;
7554 }
7555
7556 auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
7557 uio_addiov(auio, (uintptr_t)bufptr, bufsize);
7558 auio->uio_offset = uio->uio_offset;
7559
7560 error = VNOP_READDIR(vp, auio, 0, eofflag, numdirent, ctxp);
7561
7562 dep = (struct dirent *)bufptr;
7563 bytesread = bufsize - uio_resid(auio);
7564
7565 MALLOC(entry64, struct direntry *, sizeof(struct direntry),
7566 M_TEMP, M_WAITOK);
7567 /*
7568 * Convert all the entries and copy them out to user's buffer.
7569 */
7570 while (error == 0 && (char *)dep < ((char *)bufptr + bytesread)) {
7571 size_t enbufsize = DIRENT64_LEN(dep->d_namlen);
7572
7573 bzero(entry64, enbufsize);
7574 /* Convert a dirent to a dirent64. */
7575 entry64->d_ino = dep->d_ino;
7576 entry64->d_seekoff = 0;
7577 entry64->d_reclen = enbufsize;
7578 entry64->d_namlen = dep->d_namlen;
7579 entry64->d_type = dep->d_type;
7580 bcopy(dep->d_name, entry64->d_name, dep->d_namlen + 1);
7581
7582 /* Move to next entry. */
7583 dep = (struct dirent *)((char *)dep + dep->d_reclen);
7584
7585 /* Copy entry64 to user's buffer. */
7586 error = uiomove((caddr_t)entry64, entry64->d_reclen, uio);
7587 }
7588
7589 /* Update the real offset using the offset we got from VNOP_READDIR. */
7590 if (error == 0) {
7591 uio->uio_offset = auio->uio_offset;
7592 }
7593 uio_free(auio);
7594 FREE(bufptr, M_TEMP);
7595 FREE(entry64, M_TEMP);
7596 return (error);
7597 }
7598 }
7599
7600 #define GETDIRENTRIES_MAXBUFSIZE (128 * 1024 * 1024U)
7601
7602 /*
7603 * Read a block of directory entries in a file system independent format.
7604 */
7605 static int
7606 getdirentries_common(int fd, user_addr_t bufp, user_size_t bufsize, ssize_t *bytesread,
7607 off_t *offset, int flags)
7608 {
7609 vnode_t vp;
7610 struct vfs_context context = *vfs_context_current(); /* local copy */
7611 struct fileproc *fp;
7612 uio_t auio;
7613 int spacetype = proc_is64bit(vfs_context_proc(&context)) ? UIO_USERSPACE64 : UIO_USERSPACE32;
7614 off_t loff;
7615 int error, eofflag, numdirent;
7616 char uio_buf[ UIO_SIZEOF(1) ];
7617
7618 error = fp_getfvp(vfs_context_proc(&context), fd, &fp, &vp);
7619 if (error) {
7620 return (error);
7621 }
7622 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
7623 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
7624 error = EBADF;
7625 goto out;
7626 }
7627
7628 if (bufsize > GETDIRENTRIES_MAXBUFSIZE)
7629 bufsize = GETDIRENTRIES_MAXBUFSIZE;
7630
7631 #if CONFIG_MACF
7632 error = mac_file_check_change_offset(vfs_context_ucred(&context), fp->f_fglob);
7633 if (error)
7634 goto out;
7635 #endif
7636 if ( (error = vnode_getwithref(vp)) ) {
7637 goto out;
7638 }
7639 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
7640
7641 unionread:
7642 if (vp->v_type != VDIR) {
7643 (void)vnode_put(vp);
7644 error = EINVAL;
7645 goto out;
7646 }
7647
7648 #if CONFIG_MACF
7649 error = mac_vnode_check_readdir(&context, vp);
7650 if (error != 0) {
7651 (void)vnode_put(vp);
7652 goto out;
7653 }
7654 #endif /* MAC */
7655
7656 loff = fp->f_fglob->fg_offset;
7657 auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
7658 uio_addiov(auio, bufp, bufsize);
7659
7660 if (flags & VNODE_READDIR_EXTENDED) {
7661 error = vnode_readdir64(vp, auio, flags, &eofflag, &numdirent, &context);
7662 fp->f_fglob->fg_offset = uio_offset(auio);
7663 } else {
7664 error = VNOP_READDIR(vp, auio, 0, &eofflag, &numdirent, &context);
7665 fp->f_fglob->fg_offset = uio_offset(auio);
7666 }
7667 if (error) {
7668 (void)vnode_put(vp);
7669 goto out;
7670 }
7671
7672 if ((user_ssize_t)bufsize == uio_resid(auio)){
7673 if (union_dircheckp) {
7674 error = union_dircheckp(&vp, fp, &context);
7675 if (error == -1)
7676 goto unionread;
7677 if (error)
7678 goto out;
7679 }
7680
7681 if ((vp->v_mount->mnt_flag & MNT_UNION)) {
7682 struct vnode *tvp = vp;
7683 if (lookup_traverse_union(tvp, &vp, &context) == 0) {
7684 vnode_ref(vp);
7685 fp->f_fglob->fg_data = (caddr_t) vp;
7686 fp->f_fglob->fg_offset = 0;
7687 vnode_rele(tvp);
7688 vnode_put(tvp);
7689 goto unionread;
7690 }
7691 vp = tvp;
7692 }
7693 }
7694
7695 vnode_put(vp);
7696 if (offset) {
7697 *offset = loff;
7698 }
7699
7700 *bytesread = bufsize - uio_resid(auio);
7701 out:
7702 file_drop(fd);
7703 return (error);
7704 }
7705
7706
7707 int
7708 getdirentries(__unused struct proc *p, struct getdirentries_args *uap, int32_t *retval)
7709 {
7710 off_t offset;
7711 ssize_t bytesread;
7712 int error;
7713
7714 AUDIT_ARG(fd, uap->fd);
7715 error = getdirentries_common(uap->fd, uap->buf, uap->count, &bytesread, &offset, 0);
7716
7717 if (error == 0) {
7718 if (proc_is64bit(p)) {
7719 user64_long_t base = (user64_long_t)offset;
7720 error = copyout((caddr_t)&base, uap->basep, sizeof(user64_long_t));
7721 } else {
7722 user32_long_t base = (user32_long_t)offset;
7723 error = copyout((caddr_t)&base, uap->basep, sizeof(user32_long_t));
7724 }
7725 *retval = bytesread;
7726 }
7727 return (error);
7728 }
7729
7730 int
7731 getdirentries64(__unused struct proc *p, struct getdirentries64_args *uap, user_ssize_t *retval)
7732 {
7733 off_t offset;
7734 ssize_t bytesread;
7735 int error;
7736
7737 AUDIT_ARG(fd, uap->fd);
7738 error = getdirentries_common(uap->fd, uap->buf, uap->bufsize, &bytesread, &offset, VNODE_READDIR_EXTENDED);
7739
7740 if (error == 0) {
7741 *retval = bytesread;
7742 error = copyout((caddr_t)&offset, uap->position, sizeof(off_t));
7743 }
7744 return (error);
7745 }
7746
7747
7748 /*
7749 * Set the mode mask for creation of filesystem nodes.
7750 * XXX implement xsecurity
7751 */
7752 #define UMASK_NOXSECURITY (void *)1 /* leave existing xsecurity alone */
7753 static int
7754 umask1(proc_t p, int newmask, __unused kauth_filesec_t fsec, int32_t *retval)
7755 {
7756 struct filedesc *fdp;
7757
7758 AUDIT_ARG(mask, newmask);
7759 proc_fdlock(p);
7760 fdp = p->p_fd;
7761 *retval = fdp->fd_cmask;
7762 fdp->fd_cmask = newmask & ALLPERMS;
7763 proc_fdunlock(p);
7764 return (0);
7765 }
7766
7767 /*
7768 * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL).
7769 *
7770 * Parameters: p Process requesting to set the umask
7771 * uap User argument descriptor (see below)
7772 * retval umask of the process (parameter p)
7773 *
7774 * Indirect: uap->newmask umask to set
7775 * uap->xsecurity ACL to set
7776 *
7777 * Returns: 0 Success
7778 * !0 Not success
7779 *
7780 */
7781 int
7782 umask_extended(proc_t p, struct umask_extended_args *uap, int32_t *retval)
7783 {
7784 int ciferror;
7785 kauth_filesec_t xsecdst;
7786
7787 xsecdst = KAUTH_FILESEC_NONE;
7788 if (uap->xsecurity != USER_ADDR_NULL) {
7789 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
7790 return ciferror;
7791 } else {
7792 xsecdst = KAUTH_FILESEC_NONE;
7793 }
7794
7795 ciferror = umask1(p, uap->newmask, xsecdst, retval);
7796
7797 if (xsecdst != KAUTH_FILESEC_NONE)
7798 kauth_filesec_free(xsecdst);
7799 return ciferror;
7800 }
7801
7802 int
7803 umask(proc_t p, struct umask_args *uap, int32_t *retval)
7804 {
7805 return(umask1(p, uap->newmask, UMASK_NOXSECURITY, retval));
7806 }
7807
7808 /*
7809 * Void all references to file by ripping underlying filesystem
7810 * away from vnode.
7811 */
7812 /* ARGSUSED */
7813 int
7814 revoke(proc_t p, struct revoke_args *uap, __unused int32_t *retval)
7815 {
7816 vnode_t vp;
7817 struct vnode_attr va;
7818 vfs_context_t ctx = vfs_context_current();
7819 int error;
7820 struct nameidata nd;
7821
7822 NDINIT(&nd, LOOKUP, OP_REVOKE, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
7823 uap->path, ctx);
7824 error = namei(&nd);
7825 if (error)
7826 return (error);
7827 vp = nd.ni_vp;
7828
7829 nameidone(&nd);
7830
7831 if (!(vnode_ischr(vp) || vnode_isblk(vp))) {
7832 error = ENOTSUP;
7833 goto out;
7834 }
7835
7836 if (vnode_isblk(vp) && vnode_ismountedon(vp)) {
7837 error = EBUSY;
7838 goto out;
7839 }
7840
7841 #if CONFIG_MACF
7842 error = mac_vnode_check_revoke(ctx, vp);
7843 if (error)
7844 goto out;
7845 #endif
7846
7847 VATTR_INIT(&va);
7848 VATTR_WANTED(&va, va_uid);
7849 if ((error = vnode_getattr(vp, &va, ctx)))
7850 goto out;
7851 if (kauth_cred_getuid(vfs_context_ucred(ctx)) != va.va_uid &&
7852 (error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
7853 goto out;
7854 if (vp->v_usecount > 0 || (vnode_isaliased(vp)))
7855 VNOP_REVOKE(vp, REVOKEALL, ctx);
7856 out:
7857 vnode_put(vp);
7858 return (error);
7859 }
7860
7861
7862 /*
7863 * HFS/HFS PlUS SPECIFIC SYSTEM CALLS
7864 * The following system calls are designed to support features
7865 * which are specific to the HFS & HFS Plus volume formats
7866 */
7867
7868
7869 /*
7870 * Obtain attribute information on objects in a directory while enumerating
7871 * the directory.
7872 */
7873 /* ARGSUSED */
7874 int
7875 getdirentriesattr (proc_t p, struct getdirentriesattr_args *uap, int32_t *retval)
7876 {
7877 vnode_t vp;
7878 struct fileproc *fp;
7879 uio_t auio = NULL;
7880 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
7881 uint32_t count, savecount;
7882 uint32_t newstate;
7883 int error, eofflag;
7884 uint32_t loff;
7885 struct attrlist attributelist;
7886 vfs_context_t ctx = vfs_context_current();
7887 int fd = uap->fd;
7888 char uio_buf[ UIO_SIZEOF(1) ];
7889 kauth_action_t action;
7890
7891 AUDIT_ARG(fd, fd);
7892
7893 /* Get the attributes into kernel space */
7894 if ((error = copyin(uap->alist, (caddr_t)&attributelist, sizeof(attributelist)))) {
7895 return(error);
7896 }
7897 if ((error = copyin(uap->count, (caddr_t)&count, sizeof(count)))) {
7898 return(error);
7899 }
7900 savecount = count;
7901 if ( (error = fp_getfvp(p, fd, &fp, &vp)) ) {
7902 return (error);
7903 }
7904 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
7905 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
7906 error = EBADF;
7907 goto out;
7908 }
7909
7910
7911 #if CONFIG_MACF
7912 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
7913 fp->f_fglob);
7914 if (error)
7915 goto out;
7916 #endif
7917
7918
7919 if ( (error = vnode_getwithref(vp)) )
7920 goto out;
7921
7922 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
7923
7924 unionread:
7925 if (vp->v_type != VDIR) {
7926 (void)vnode_put(vp);
7927 error = EINVAL;
7928 goto out;
7929 }
7930
7931 #if CONFIG_MACF
7932 error = mac_vnode_check_readdir(ctx, vp);
7933 if (error != 0) {
7934 (void)vnode_put(vp);
7935 goto out;
7936 }
7937 #endif /* MAC */
7938
7939 /* set up the uio structure which will contain the users return buffer */
7940 loff = fp->f_fglob->fg_offset;
7941 auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
7942 uio_addiov(auio, uap->buffer, uap->buffersize);
7943
7944 /*
7945 * If the only item requested is file names, we can let that past with
7946 * just LIST_DIRECTORY. If they want any other attributes, that means
7947 * they need SEARCH as well.
7948 */
7949 action = KAUTH_VNODE_LIST_DIRECTORY;
7950 if ((attributelist.commonattr & ~ATTR_CMN_NAME) ||
7951 attributelist.fileattr || attributelist.dirattr)
7952 action |= KAUTH_VNODE_SEARCH;
7953
7954 if ((error = vnode_authorize(vp, NULL, action, ctx)) == 0) {
7955
7956 /* Believe it or not, uap->options only has 32-bits of valid
7957 * info, so truncate before extending again */
7958
7959 error = VNOP_READDIRATTR(vp, &attributelist, auio, count,
7960 (u_long)(uint32_t)uap->options, &newstate, &eofflag, &count, ctx);
7961 }
7962
7963 if (error) {
7964 (void) vnode_put(vp);
7965 goto out;
7966 }
7967
7968 /*
7969 * If we've got the last entry of a directory in a union mount
7970 * then reset the eofflag and pretend there's still more to come.
7971 * The next call will again set eofflag and the buffer will be empty,
7972 * so traverse to the underlying directory and do the directory
7973 * read there.
7974 */
7975 if (eofflag && vp->v_mount->mnt_flag & MNT_UNION) {
7976 if (uio_resid(auio) < (user_ssize_t) uap->buffersize) { // Got some entries
7977 eofflag = 0;
7978 } else { // Empty buffer
7979 struct vnode *tvp = vp;
7980 if (lookup_traverse_union(tvp, &vp, ctx) == 0) {
7981 vnode_ref_ext(vp, fp->f_fglob->fg_flag & O_EVTONLY, 0);
7982 fp->f_fglob->fg_data = (caddr_t) vp;
7983 fp->f_fglob->fg_offset = 0; // reset index for new dir
7984 count = savecount;
7985 vnode_rele_internal(tvp, fp->f_fglob->fg_flag & O_EVTONLY, 0, 0);
7986 vnode_put(tvp);
7987 goto unionread;
7988 }
7989 vp = tvp;
7990 }
7991 }
7992
7993 (void)vnode_put(vp);
7994
7995 if (error)
7996 goto out;
7997 fp->f_fglob->fg_offset = uio_offset(auio); /* should be multiple of dirent, not variable */
7998
7999 if ((error = copyout((caddr_t) &count, uap->count, sizeof(count))))
8000 goto out;
8001 if ((error = copyout((caddr_t) &newstate, uap->newstate, sizeof(newstate))))
8002 goto out;
8003 if ((error = copyout((caddr_t) &loff, uap->basep, sizeof(loff))))
8004 goto out;
8005
8006 *retval = eofflag; /* similar to getdirentries */
8007 error = 0;
8008 out:
8009 file_drop(fd);
8010 return (error); /* return error earlier, an retval of 0 or 1 now */
8011
8012 } /* end of getdirentriesattr system call */
8013
8014 /*
8015 * Exchange data between two files
8016 */
8017
8018 /* ARGSUSED */
8019 int
8020 exchangedata (__unused proc_t p, struct exchangedata_args *uap, __unused int32_t *retval)
8021 {
8022
8023 struct nameidata fnd, snd;
8024 vfs_context_t ctx = vfs_context_current();
8025 vnode_t fvp;
8026 vnode_t svp;
8027 int error;
8028 u_int32_t nameiflags;
8029 char *fpath = NULL;
8030 char *spath = NULL;
8031 int flen=0, slen=0;
8032 int from_truncated=0, to_truncated=0;
8033 #if CONFIG_FSE
8034 fse_info f_finfo, s_finfo;
8035 #endif
8036
8037 nameiflags = 0;
8038 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
8039
8040 NDINIT(&fnd, LOOKUP, OP_EXCHANGEDATA, nameiflags | AUDITVNPATH1,
8041 UIO_USERSPACE, uap->path1, ctx);
8042
8043 error = namei(&fnd);
8044 if (error)
8045 goto out2;
8046
8047 nameidone(&fnd);
8048 fvp = fnd.ni_vp;
8049
8050 NDINIT(&snd, LOOKUP, OP_EXCHANGEDATA, CN_NBMOUNTLOOK | nameiflags | AUDITVNPATH2,
8051 UIO_USERSPACE, uap->path2, ctx);
8052
8053 error = namei(&snd);
8054 if (error) {
8055 vnode_put(fvp);
8056 goto out2;
8057 }
8058 nameidone(&snd);
8059 svp = snd.ni_vp;
8060
8061 /*
8062 * if the files are the same, return an inval error
8063 */
8064 if (svp == fvp) {
8065 error = EINVAL;
8066 goto out;
8067 }
8068
8069 /*
8070 * if the files are on different volumes, return an error
8071 */
8072 if (svp->v_mount != fvp->v_mount) {
8073 error = EXDEV;
8074 goto out;
8075 }
8076
8077 /* If they're not files, return an error */
8078 if ( (vnode_isreg(fvp) == 0) || (vnode_isreg(svp) == 0)) {
8079 error = EINVAL;
8080 goto out;
8081 }
8082
8083 #if CONFIG_MACF
8084 error = mac_vnode_check_exchangedata(ctx,
8085 fvp, svp);
8086 if (error)
8087 goto out;
8088 #endif
8089 if (((error = vnode_authorize(fvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0) ||
8090 ((error = vnode_authorize(svp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0))
8091 goto out;
8092
8093 if (
8094 #if CONFIG_FSE
8095 need_fsevent(FSE_EXCHANGE, fvp) ||
8096 #endif
8097 kauth_authorize_fileop_has_listeners()) {
8098 GET_PATH(fpath);
8099 GET_PATH(spath);
8100 if (fpath == NULL || spath == NULL) {
8101 error = ENOMEM;
8102 goto out;
8103 }
8104
8105 flen = safe_getpath(fvp, NULL, fpath, MAXPATHLEN, &from_truncated);
8106 slen = safe_getpath(svp, NULL, spath, MAXPATHLEN, &to_truncated);
8107
8108 #if CONFIG_FSE
8109 get_fse_info(fvp, &f_finfo, ctx);
8110 get_fse_info(svp, &s_finfo, ctx);
8111 if (from_truncated || to_truncated) {
8112 // set it here since only the f_finfo gets reported up to user space
8113 f_finfo.mode |= FSE_TRUNCATED_PATH;
8114 }
8115 #endif
8116 }
8117 /* Ok, make the call */
8118 error = VNOP_EXCHANGE(fvp, svp, 0, ctx);
8119
8120 if (error == 0) {
8121 const char *tmpname;
8122
8123 if (fpath != NULL && spath != NULL) {
8124 /* call out to allow 3rd party notification of exchangedata.
8125 * Ignore result of kauth_authorize_fileop call.
8126 */
8127 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_EXCHANGE,
8128 (uintptr_t)fpath, (uintptr_t)spath);
8129 }
8130 name_cache_lock();
8131
8132 tmpname = fvp->v_name;
8133 fvp->v_name = svp->v_name;
8134 svp->v_name = tmpname;
8135
8136 if (fvp->v_parent != svp->v_parent) {
8137 vnode_t tmp;
8138
8139 tmp = fvp->v_parent;
8140 fvp->v_parent = svp->v_parent;
8141 svp->v_parent = tmp;
8142 }
8143 name_cache_unlock();
8144
8145 #if CONFIG_FSE
8146 if (fpath != NULL && spath != NULL) {
8147 add_fsevent(FSE_EXCHANGE, ctx,
8148 FSE_ARG_STRING, flen, fpath,
8149 FSE_ARG_FINFO, &f_finfo,
8150 FSE_ARG_STRING, slen, spath,
8151 FSE_ARG_FINFO, &s_finfo,
8152 FSE_ARG_DONE);
8153 }
8154 #endif
8155 }
8156
8157 out:
8158 if (fpath != NULL)
8159 RELEASE_PATH(fpath);
8160 if (spath != NULL)
8161 RELEASE_PATH(spath);
8162 vnode_put(svp);
8163 vnode_put(fvp);
8164 out2:
8165 return (error);
8166 }
8167
8168 /*
8169 * Return (in MB) the amount of freespace on the given vnode's volume.
8170 */
8171 uint32_t freespace_mb(vnode_t vp);
8172
8173 uint32_t
8174 freespace_mb(vnode_t vp)
8175 {
8176 vfs_update_vfsstat(vp->v_mount, vfs_context_current(), VFS_USER_EVENT);
8177 return (((uint64_t)vp->v_mount->mnt_vfsstat.f_bavail *
8178 vp->v_mount->mnt_vfsstat.f_bsize) >> 20);
8179 }
8180
8181 #if CONFIG_SEARCHFS
8182
8183 /* ARGSUSED */
8184
8185 int
8186 searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval)
8187 {
8188 vnode_t vp, tvp;
8189 int i, error=0;
8190 int fserror = 0;
8191 struct nameidata nd;
8192 struct user64_fssearchblock searchblock;
8193 struct searchstate *state;
8194 struct attrlist *returnattrs;
8195 struct timeval timelimit;
8196 void *searchparams1,*searchparams2;
8197 uio_t auio = NULL;
8198 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
8199 uint32_t nummatches;
8200 int mallocsize;
8201 uint32_t nameiflags;
8202 vfs_context_t ctx = vfs_context_current();
8203 char uio_buf[ UIO_SIZEOF(1) ];
8204
8205 /* Start by copying in fsearchblock parameter list */
8206 if (IS_64BIT_PROCESS(p)) {
8207 error = copyin(uap->searchblock, (caddr_t) &searchblock, sizeof(searchblock));
8208 timelimit.tv_sec = searchblock.timelimit.tv_sec;
8209 timelimit.tv_usec = searchblock.timelimit.tv_usec;
8210 }
8211 else {
8212 struct user32_fssearchblock tmp_searchblock;
8213
8214 error = copyin(uap->searchblock, (caddr_t) &tmp_searchblock, sizeof(tmp_searchblock));
8215 // munge into 64-bit version
8216 searchblock.returnattrs = CAST_USER_ADDR_T(tmp_searchblock.returnattrs);
8217 searchblock.returnbuffer = CAST_USER_ADDR_T(tmp_searchblock.returnbuffer);
8218 searchblock.returnbuffersize = tmp_searchblock.returnbuffersize;
8219 searchblock.maxmatches = tmp_searchblock.maxmatches;
8220 /*
8221 * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary
8222 * from a 32 bit long, and tv_usec is already a signed 32 bit int.
8223 */
8224 timelimit.tv_sec = (__darwin_time_t) tmp_searchblock.timelimit.tv_sec;
8225 timelimit.tv_usec = (__darwin_useconds_t) tmp_searchblock.timelimit.tv_usec;
8226 searchblock.searchparams1 = CAST_USER_ADDR_T(tmp_searchblock.searchparams1);
8227 searchblock.sizeofsearchparams1 = tmp_searchblock.sizeofsearchparams1;
8228 searchblock.searchparams2 = CAST_USER_ADDR_T(tmp_searchblock.searchparams2);
8229 searchblock.sizeofsearchparams2 = tmp_searchblock.sizeofsearchparams2;
8230 searchblock.searchattrs = tmp_searchblock.searchattrs;
8231 }
8232 if (error)
8233 return(error);
8234
8235 /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
8236 */
8237 if (searchblock.sizeofsearchparams1 > SEARCHFS_MAX_SEARCHPARMS ||
8238 searchblock.sizeofsearchparams2 > SEARCHFS_MAX_SEARCHPARMS)
8239 return(EINVAL);
8240
8241 /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
8242 /* It all has to do into local memory and it's not that big so we might as well put it all together. */
8243 /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
8244 /* block. */
8245 /* */
8246 /* NOTE: we allocate an extra 8 bytes to account for the difference in size of the searchstate */
8247 /* due to the changes in rdar://problem/12438273. That way if a 3rd party file system */
8248 /* assumes the size is still 556 bytes it will continue to work */
8249
8250 mallocsize = searchblock.sizeofsearchparams1 + searchblock.sizeofsearchparams2 +
8251 sizeof(struct attrlist) + sizeof(struct searchstate) + (2*sizeof(uint32_t));
8252
8253 MALLOC(searchparams1, void *, mallocsize, M_TEMP, M_WAITOK);
8254
8255 /* Now set up the various pointers to the correct place in our newly allocated memory */
8256
8257 searchparams2 = (void *) (((caddr_t) searchparams1) + searchblock.sizeofsearchparams1);
8258 returnattrs = (struct attrlist *) (((caddr_t) searchparams2) + searchblock.sizeofsearchparams2);
8259 state = (struct searchstate *) (((caddr_t) returnattrs) + sizeof (struct attrlist));
8260
8261 /* Now copy in the stuff given our local variables. */
8262
8263 if ((error = copyin(searchblock.searchparams1, searchparams1, searchblock.sizeofsearchparams1)))
8264 goto freeandexit;
8265
8266 if ((error = copyin(searchblock.searchparams2, searchparams2, searchblock.sizeofsearchparams2)))
8267 goto freeandexit;
8268
8269 if ((error = copyin(searchblock.returnattrs, (caddr_t) returnattrs, sizeof(struct attrlist))))
8270 goto freeandexit;
8271
8272 if ((error = copyin(uap->state, (caddr_t) state, sizeof(struct searchstate))))
8273 goto freeandexit;
8274
8275 /*
8276 * When searching a union mount, need to set the
8277 * start flag at the first call on each layer to
8278 * reset state for the new volume.
8279 */
8280 if (uap->options & SRCHFS_START)
8281 state->ss_union_layer = 0;
8282 else
8283 uap->options |= state->ss_union_flags;
8284 state->ss_union_flags = 0;
8285
8286 /*
8287 * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter,
8288 * which is passed in with an attrreference_t, we need to inspect the buffer manually here.
8289 * The KPI does not provide us the ability to pass in the length of the buffers searchparams1
8290 * and searchparams2. To obviate the need for all searchfs-supporting filesystems to
8291 * validate the user-supplied data offset of the attrreference_t, we'll do it here.
8292 */
8293
8294 if (searchblock.searchattrs.commonattr & ATTR_CMN_NAME) {
8295 attrreference_t* string_ref;
8296 u_int32_t* start_length;
8297 user64_size_t param_length;
8298
8299 /* validate searchparams1 */
8300 param_length = searchblock.sizeofsearchparams1;
8301 /* skip the word that specifies length of the buffer */
8302 start_length= (u_int32_t*) searchparams1;
8303 start_length= start_length+1;
8304 string_ref= (attrreference_t*) start_length;
8305
8306 /* ensure no negative offsets or too big offsets */
8307 if (string_ref->attr_dataoffset < 0 ) {
8308 error = EINVAL;
8309 goto freeandexit;
8310 }
8311 if (string_ref->attr_length > MAXPATHLEN) {
8312 error = EINVAL;
8313 goto freeandexit;
8314 }
8315
8316 /* Check for pointer overflow in the string ref */
8317 if (((char*) string_ref + string_ref->attr_dataoffset) < (char*) string_ref) {
8318 error = EINVAL;
8319 goto freeandexit;
8320 }
8321
8322 if (((char*) string_ref + string_ref->attr_dataoffset) > ((char*)searchparams1 + param_length)) {
8323 error = EINVAL;
8324 goto freeandexit;
8325 }
8326 if (((char*)string_ref + string_ref->attr_dataoffset + string_ref->attr_length) > ((char*)searchparams1 + param_length)) {
8327 error = EINVAL;
8328 goto freeandexit;
8329 }
8330 }
8331
8332 /* set up the uio structure which will contain the users return buffer */
8333 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
8334 uio_addiov(auio, searchblock.returnbuffer, searchblock.returnbuffersize);
8335
8336 nameiflags = 0;
8337 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
8338 NDINIT(&nd, LOOKUP, OP_SEARCHFS, nameiflags | AUDITVNPATH1,
8339 UIO_USERSPACE, uap->path, ctx);
8340
8341 error = namei(&nd);
8342 if (error)
8343 goto freeandexit;
8344 vp = nd.ni_vp;
8345 nameidone(&nd);
8346
8347 /*
8348 * Switch to the root vnode for the volume
8349 */
8350 error = VFS_ROOT(vnode_mount(vp), &tvp, ctx);
8351 vnode_put(vp);
8352 if (error)
8353 goto freeandexit;
8354 vp = tvp;
8355
8356 /*
8357 * If it's a union mount, the path lookup takes
8358 * us to the top layer. But we may need to descend
8359 * to a lower layer. For non-union mounts the layer
8360 * is always zero.
8361 */
8362 for (i = 0; i < (int) state->ss_union_layer; i++) {
8363 if ((vp->v_mount->mnt_flag & MNT_UNION) == 0)
8364 break;
8365 tvp = vp;
8366 vp = vp->v_mount->mnt_vnodecovered;
8367 if (vp == NULL) {
8368 vnode_put(tvp);
8369 error = ENOENT;
8370 goto freeandexit;
8371 }
8372 vnode_getwithref(vp);
8373 vnode_put(tvp);
8374 }
8375
8376 #if CONFIG_MACF
8377 error = mac_vnode_check_searchfs(ctx, vp, &searchblock.searchattrs);
8378 if (error) {
8379 vnode_put(vp);
8380 goto freeandexit;
8381 }
8382 #endif
8383
8384
8385 /*
8386 * If searchblock.maxmatches == 0, then skip the search. This has happened
8387 * before and sometimes the underlying code doesnt deal with it well.
8388 */
8389 if (searchblock.maxmatches == 0) {
8390 nummatches = 0;
8391 goto saveandexit;
8392 }
8393
8394 /*
8395 * Allright, we have everything we need, so lets make that call.
8396 *
8397 * We keep special track of the return value from the file system:
8398 * EAGAIN is an acceptable error condition that shouldn't keep us
8399 * from copying out any results...
8400 */
8401
8402 fserror = VNOP_SEARCHFS(vp,
8403 searchparams1,
8404 searchparams2,
8405 &searchblock.searchattrs,
8406 (u_long)searchblock.maxmatches,
8407 &timelimit,
8408 returnattrs,
8409 &nummatches,
8410 (u_long)uap->scriptcode,
8411 (u_long)uap->options,
8412 auio,
8413 (struct searchstate *) &state->ss_fsstate,
8414 ctx);
8415
8416 /*
8417 * If it's a union mount we need to be called again
8418 * to search the mounted-on filesystem.
8419 */
8420 if ((vp->v_mount->mnt_flag & MNT_UNION) && fserror == 0) {
8421 state->ss_union_flags = SRCHFS_START;
8422 state->ss_union_layer++; // search next layer down
8423 fserror = EAGAIN;
8424 }
8425
8426 saveandexit:
8427
8428 vnode_put(vp);
8429
8430 /* Now copy out the stuff that needs copying out. That means the number of matches, the
8431 search state. Everything was already put into he return buffer by the vop call. */
8432
8433 if ((error = copyout((caddr_t) state, uap->state, sizeof(struct searchstate))) != 0)
8434 goto freeandexit;
8435
8436 if ((error = suulong(uap->nummatches, (uint64_t)nummatches)) != 0)
8437 goto freeandexit;
8438
8439 error = fserror;
8440
8441 freeandexit:
8442
8443 FREE(searchparams1,M_TEMP);
8444
8445 return(error);
8446
8447
8448 } /* end of searchfs system call */
8449
8450 #else /* CONFIG_SEARCHFS */
8451
8452 int
8453 searchfs(__unused proc_t p, __unused struct searchfs_args *uap, __unused int32_t *retval)
8454 {
8455 return (ENOTSUP);
8456 }
8457
8458 #endif /* CONFIG_SEARCHFS */
8459
8460
8461 lck_grp_attr_t * nspace_group_attr;
8462 lck_attr_t * nspace_lock_attr;
8463 lck_grp_t * nspace_mutex_group;
8464
8465 lck_mtx_t nspace_handler_lock;
8466 lck_mtx_t nspace_handler_exclusion_lock;
8467
8468 time_t snapshot_timestamp=0;
8469 int nspace_allow_virtual_devs=0;
8470
8471 void nspace_handler_init(void);
8472
8473 typedef struct nspace_item_info {
8474 struct vnode *vp;
8475 void *arg;
8476 uint64_t op;
8477 uint32_t vid;
8478 uint32_t flags;
8479 uint32_t token;
8480 uint32_t refcount;
8481 } nspace_item_info;
8482
8483 #define MAX_NSPACE_ITEMS 128
8484 nspace_item_info nspace_items[MAX_NSPACE_ITEMS];
8485 uint32_t nspace_item_idx=0; // also used as the sleep/wakeup rendezvous address
8486 uint32_t nspace_token_id=0;
8487 uint32_t nspace_handler_timeout = 15; // seconds
8488
8489 #define NSPACE_ITEM_NEW 0x0001
8490 #define NSPACE_ITEM_PROCESSING 0x0002
8491 #define NSPACE_ITEM_DEAD 0x0004
8492 #define NSPACE_ITEM_CANCELLED 0x0008
8493 #define NSPACE_ITEM_DONE 0x0010
8494 #define NSPACE_ITEM_RESET_TIMER 0x0020
8495
8496 #define NSPACE_ITEM_NSPACE_EVENT 0x0040
8497 #define NSPACE_ITEM_SNAPSHOT_EVENT 0x0080
8498
8499 #define NSPACE_ITEM_ALL_EVENT_TYPES (NSPACE_ITEM_NSPACE_EVENT | NSPACE_ITEM_SNAPSHOT_EVENT)
8500
8501 //#pragma optimization_level 0
8502
8503 typedef enum {
8504 NSPACE_HANDLER_NSPACE = 0,
8505 NSPACE_HANDLER_SNAPSHOT = 1,
8506
8507 NSPACE_HANDLER_COUNT,
8508 } nspace_type_t;
8509
8510 typedef struct {
8511 uint64_t handler_tid;
8512 struct proc *handler_proc;
8513 int handler_busy;
8514 } nspace_handler_t;
8515
8516 nspace_handler_t nspace_handlers[NSPACE_HANDLER_COUNT];
8517
8518 /* namespace fsctl functions */
8519 static int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type);
8520 static int nspace_item_flags_for_type(nspace_type_t nspace_type);
8521 static int nspace_open_flags_for_type(nspace_type_t nspace_type);
8522 static nspace_type_t nspace_type_for_op(uint64_t op);
8523 static int nspace_is_special_process(struct proc *proc);
8524 static int vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx);
8525 static int wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type);
8526 static int validate_namespace_args (int is64bit, int size);
8527 static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data);
8528
8529
8530 static inline int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type)
8531 {
8532 switch(nspace_type) {
8533 case NSPACE_HANDLER_NSPACE:
8534 return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_NSPACE_EVENT;
8535 case NSPACE_HANDLER_SNAPSHOT:
8536 return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_SNAPSHOT_EVENT;
8537 default:
8538 printf("nspace_flags_matches_handler: invalid type %u\n", (int)nspace_type);
8539 return 0;
8540 }
8541 }
8542
8543 static inline int nspace_item_flags_for_type(nspace_type_t nspace_type)
8544 {
8545 switch(nspace_type) {
8546 case NSPACE_HANDLER_NSPACE:
8547 return NSPACE_ITEM_NSPACE_EVENT;
8548 case NSPACE_HANDLER_SNAPSHOT:
8549 return NSPACE_ITEM_SNAPSHOT_EVENT;
8550 default:
8551 printf("nspace_item_flags_for_type: invalid type %u\n", (int)nspace_type);
8552 return 0;
8553 }
8554 }
8555
8556 static inline int nspace_open_flags_for_type(nspace_type_t nspace_type)
8557 {
8558 switch(nspace_type) {
8559 case NSPACE_HANDLER_NSPACE:
8560 return FREAD | FWRITE | O_EVTONLY;
8561 case NSPACE_HANDLER_SNAPSHOT:
8562 return FREAD | O_EVTONLY;
8563 default:
8564 printf("nspace_open_flags_for_type: invalid type %u\n", (int)nspace_type);
8565 return 0;
8566 }
8567 }
8568
8569 static inline nspace_type_t nspace_type_for_op(uint64_t op)
8570 {
8571 switch(op & NAMESPACE_HANDLER_EVENT_TYPE_MASK) {
8572 case NAMESPACE_HANDLER_NSPACE_EVENT:
8573 return NSPACE_HANDLER_NSPACE;
8574 case NAMESPACE_HANDLER_SNAPSHOT_EVENT:
8575 return NSPACE_HANDLER_SNAPSHOT;
8576 default:
8577 printf("nspace_type_for_op: invalid op mask %llx\n", op & NAMESPACE_HANDLER_EVENT_TYPE_MASK);
8578 return NSPACE_HANDLER_NSPACE;
8579 }
8580 }
8581
8582 static inline int nspace_is_special_process(struct proc *proc)
8583 {
8584 int i;
8585 for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
8586 if (proc == nspace_handlers[i].handler_proc)
8587 return 1;
8588 }
8589 return 0;
8590 }
8591
8592 void
8593 nspace_handler_init(void)
8594 {
8595 nspace_lock_attr = lck_attr_alloc_init();
8596 nspace_group_attr = lck_grp_attr_alloc_init();
8597 nspace_mutex_group = lck_grp_alloc_init("nspace-mutex", nspace_group_attr);
8598 lck_mtx_init(&nspace_handler_lock, nspace_mutex_group, nspace_lock_attr);
8599 lck_mtx_init(&nspace_handler_exclusion_lock, nspace_mutex_group, nspace_lock_attr);
8600 memset(&nspace_items[0], 0, sizeof(nspace_items));
8601 }
8602
8603 void
8604 nspace_proc_exit(struct proc *p)
8605 {
8606 int i, event_mask = 0;
8607
8608 for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
8609 if (p == nspace_handlers[i].handler_proc) {
8610 event_mask |= nspace_item_flags_for_type(i);
8611 nspace_handlers[i].handler_tid = 0;
8612 nspace_handlers[i].handler_proc = NULL;
8613 }
8614 }
8615
8616 if (event_mask == 0) {
8617 return;
8618 }
8619
8620 if (event_mask & NSPACE_ITEM_SNAPSHOT_EVENT) {
8621 // if this process was the snapshot handler, zero snapshot_timeout
8622 snapshot_timestamp = 0;
8623 }
8624
8625 //
8626 // unblock anyone that's waiting for the handler that died
8627 //
8628 lck_mtx_lock(&nspace_handler_lock);
8629 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8630 if (nspace_items[i].flags & (NSPACE_ITEM_NEW | NSPACE_ITEM_PROCESSING)) {
8631
8632 if ( nspace_items[i].flags & event_mask ) {
8633
8634 if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
8635 vnode_lock_spin(nspace_items[i].vp);
8636 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
8637 vnode_unlock(nspace_items[i].vp);
8638 }
8639 nspace_items[i].vp = NULL;
8640 nspace_items[i].vid = 0;
8641 nspace_items[i].flags = NSPACE_ITEM_DONE;
8642 nspace_items[i].token = 0;
8643
8644 wakeup((caddr_t)&(nspace_items[i].vp));
8645 }
8646 }
8647 }
8648
8649 wakeup((caddr_t)&nspace_item_idx);
8650 lck_mtx_unlock(&nspace_handler_lock);
8651 }
8652
8653
8654 int
8655 resolve_nspace_item(struct vnode *vp, uint64_t op)
8656 {
8657 return resolve_nspace_item_ext(vp, op, NULL);
8658 }
8659
8660 int
8661 resolve_nspace_item_ext(struct vnode *vp, uint64_t op, void *arg)
8662 {
8663 int i, error, keep_waiting;
8664 struct timespec ts;
8665 nspace_type_t nspace_type = nspace_type_for_op(op);
8666
8667 // only allow namespace events on regular files, directories and symlinks.
8668 if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
8669 return 0;
8670 }
8671
8672 //
8673 // if this is a snapshot event and the vnode is on a
8674 // disk image just pretend nothing happened since any
8675 // change to the disk image will cause the disk image
8676 // itself to get backed up and this avoids multi-way
8677 // deadlocks between the snapshot handler and the ever
8678 // popular diskimages-helper process. the variable
8679 // nspace_allow_virtual_devs allows this behavior to
8680 // be overridden (for use by the Mobile TimeMachine
8681 // testing infrastructure which uses disk images)
8682 //
8683 if ( (op & NAMESPACE_HANDLER_SNAPSHOT_EVENT)
8684 && (vp->v_mount != NULL)
8685 && (vp->v_mount->mnt_kern_flag & MNTK_VIRTUALDEV)
8686 && !nspace_allow_virtual_devs) {
8687
8688 return 0;
8689 }
8690
8691 // if (thread_tid(current_thread()) == namespace_handler_tid) {
8692 if (nspace_handlers[nspace_type].handler_proc == NULL) {
8693 return 0;
8694 }
8695
8696 if (nspace_is_special_process(current_proc())) {
8697 return EDEADLK;
8698 }
8699
8700 lck_mtx_lock(&nspace_handler_lock);
8701
8702 retry:
8703 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8704 if (vp == nspace_items[i].vp && op == nspace_items[i].op) {
8705 break;
8706 }
8707 }
8708
8709 if (i >= MAX_NSPACE_ITEMS) {
8710 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8711 if (nspace_items[i].flags == 0) {
8712 break;
8713 }
8714 }
8715 } else {
8716 nspace_items[i].refcount++;
8717 }
8718
8719 if (i >= MAX_NSPACE_ITEMS) {
8720 ts.tv_sec = nspace_handler_timeout;
8721 ts.tv_nsec = 0;
8722
8723 error = msleep((caddr_t)&nspace_token_id, &nspace_handler_lock, PVFS|PCATCH, "nspace-no-space", &ts);
8724 if (error == 0) {
8725 // an entry got free'd up, go see if we can get a slot
8726 goto retry;
8727 } else {
8728 lck_mtx_unlock(&nspace_handler_lock);
8729 return error;
8730 }
8731 }
8732
8733 //
8734 // if it didn't already exist, add it. if it did exist
8735 // we'll get woken up when someone does a wakeup() on
8736 // the slot in the nspace_items table.
8737 //
8738 if (vp != nspace_items[i].vp) {
8739 nspace_items[i].vp = vp;
8740 nspace_items[i].arg = (arg == NSPACE_REARM_NO_ARG) ? NULL : arg; // arg is {NULL, true, uio *} - only pass uio thru to the user
8741 nspace_items[i].op = op;
8742 nspace_items[i].vid = vnode_vid(vp);
8743 nspace_items[i].flags = NSPACE_ITEM_NEW;
8744 nspace_items[i].flags |= nspace_item_flags_for_type(nspace_type);
8745 if (nspace_items[i].flags & NSPACE_ITEM_SNAPSHOT_EVENT) {
8746 if (arg) {
8747 vnode_lock_spin(vp);
8748 vp->v_flag |= VNEEDSSNAPSHOT;
8749 vnode_unlock(vp);
8750 }
8751 }
8752
8753 nspace_items[i].token = 0;
8754 nspace_items[i].refcount = 1;
8755
8756 wakeup((caddr_t)&nspace_item_idx);
8757 }
8758
8759 //
8760 // Now go to sleep until the handler does a wakeup on this
8761 // slot in the nspace_items table (or we timeout).
8762 //
8763 keep_waiting = 1;
8764 while(keep_waiting) {
8765 ts.tv_sec = nspace_handler_timeout;
8766 ts.tv_nsec = 0;
8767 error = msleep((caddr_t)&(nspace_items[i].vp), &nspace_handler_lock, PVFS|PCATCH, "namespace-done", &ts);
8768
8769 if (nspace_items[i].flags & NSPACE_ITEM_DONE) {
8770 error = 0;
8771 } else if (nspace_items[i].flags & NSPACE_ITEM_CANCELLED) {
8772 error = nspace_items[i].token;
8773 } else if (error == EWOULDBLOCK || error == ETIMEDOUT) {
8774 if (nspace_items[i].flags & NSPACE_ITEM_RESET_TIMER) {
8775 nspace_items[i].flags &= ~NSPACE_ITEM_RESET_TIMER;
8776 continue;
8777 } else {
8778 error = ETIMEDOUT;
8779 }
8780 } else if (error == 0) {
8781 // hmmm, why did we get woken up?
8782 printf("woken up for token %d but it's not done, cancelled or timedout and error == 0.\n",
8783 nspace_items[i].token);
8784 }
8785
8786 if (--nspace_items[i].refcount == 0) {
8787 nspace_items[i].vp = NULL; // clear this so that no one will match on it again
8788 nspace_items[i].arg = NULL;
8789 nspace_items[i].token = 0; // clear this so that the handler will not find it anymore
8790 nspace_items[i].flags = 0; // this clears it for re-use
8791 }
8792 wakeup(&nspace_token_id);
8793 keep_waiting = 0;
8794 }
8795
8796 lck_mtx_unlock(&nspace_handler_lock);
8797
8798 return error;
8799 }
8800
8801
8802 int
8803 get_nspace_item_status(struct vnode *vp, int32_t *status)
8804 {
8805 int i;
8806
8807 lck_mtx_lock(&nspace_handler_lock);
8808 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8809 if (nspace_items[i].vp == vp) {
8810 break;
8811 }
8812 }
8813
8814 if (i >= MAX_NSPACE_ITEMS) {
8815 lck_mtx_unlock(&nspace_handler_lock);
8816 return ENOENT;
8817 }
8818
8819 *status = nspace_items[i].flags;
8820 lck_mtx_unlock(&nspace_handler_lock);
8821 return 0;
8822 }
8823
8824
8825 #if 0
8826 static int
8827 build_volfs_path(struct vnode *vp, char *path, int *len)
8828 {
8829 struct vnode_attr va;
8830 int ret;
8831
8832 VATTR_INIT(&va);
8833 VATTR_WANTED(&va, va_fsid);
8834 VATTR_WANTED(&va, va_fileid);
8835
8836 if (vnode_getattr(vp, &va, vfs_context_kernel()) != 0) {
8837 *len = snprintf(path, *len, "/non/existent/path/because/vnode_getattr/failed") + 1;
8838 ret = -1;
8839 } else {
8840 *len = snprintf(path, *len, "/.vol/%d/%lld", (dev_t)va.va_fsid, va.va_fileid) + 1;
8841 ret = 0;
8842 }
8843
8844 return ret;
8845 }
8846 #endif
8847
8848 //
8849 // Note: this function does NOT check permissions on all of the
8850 // parent directories leading to this vnode. It should only be
8851 // called on behalf of a root process. Otherwise a process may
8852 // get access to a file because the file itself is readable even
8853 // though its parent directories would prevent access.
8854 //
8855 static int
8856 vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx)
8857 {
8858 int error, action;
8859
8860 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
8861 return error;
8862 }
8863
8864 #if CONFIG_MACF
8865 error = mac_vnode_check_open(ctx, vp, fmode);
8866 if (error)
8867 return error;
8868 #endif
8869
8870 /* compute action to be authorized */
8871 action = 0;
8872 if (fmode & FREAD) {
8873 action |= KAUTH_VNODE_READ_DATA;
8874 }
8875 if (fmode & (FWRITE | O_TRUNC)) {
8876 /*
8877 * If we are writing, appending, and not truncating,
8878 * indicate that we are appending so that if the
8879 * UF_APPEND or SF_APPEND bits are set, we do not deny
8880 * the open.
8881 */
8882 if ((fmode & O_APPEND) && !(fmode & O_TRUNC)) {
8883 action |= KAUTH_VNODE_APPEND_DATA;
8884 } else {
8885 action |= KAUTH_VNODE_WRITE_DATA;
8886 }
8887 }
8888
8889 if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)
8890 return error;
8891
8892
8893 //
8894 // if the vnode is tagged VOPENEVT and the current process
8895 // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY
8896 // flag to the open mode so that this open won't count against
8897 // the vnode when carbon delete() does a vnode_isinuse() to see
8898 // if a file is currently in use. this allows spotlight
8899 // importers to not interfere with carbon apps that depend on
8900 // the no-delete-if-busy semantics of carbon delete().
8901 //
8902 if ((vp->v_flag & VOPENEVT) && (current_proc()->p_flag & P_CHECKOPENEVT)) {
8903 fmode |= O_EVTONLY;
8904 }
8905
8906 if ( (error = VNOP_OPEN(vp, fmode, ctx)) ) {
8907 return error;
8908 }
8909 if ( (error = vnode_ref_ext(vp, fmode, 0)) ) {
8910 VNOP_CLOSE(vp, fmode, ctx);
8911 return error;
8912 }
8913
8914 /* Call out to allow 3rd party notification of open.
8915 * Ignore result of kauth_authorize_fileop call.
8916 */
8917 #if CONFIG_MACF
8918 mac_vnode_notify_open(ctx, vp, fmode);
8919 #endif
8920 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN,
8921 (uintptr_t)vp, 0);
8922
8923
8924 return 0;
8925 }
8926
8927 static int
8928 wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type)
8929 {
8930 int i, error=0, unblock=0;
8931 task_t curtask;
8932
8933 lck_mtx_lock(&nspace_handler_exclusion_lock);
8934 if (nspace_handlers[nspace_type].handler_busy) {
8935 lck_mtx_unlock(&nspace_handler_exclusion_lock);
8936 return EBUSY;
8937 }
8938 nspace_handlers[nspace_type].handler_busy = 1;
8939 lck_mtx_unlock(&nspace_handler_exclusion_lock);
8940
8941 /*
8942 * Any process that gets here will be one of the namespace handlers.
8943 * As such, they should be prevented from acquiring DMG vnodes during vnode reclamation
8944 * as we can cause deadlocks to occur, because the namespace handler may prevent
8945 * VNOP_INACTIVE from proceeding. Mark the current task as a P_DEPENDENCY_CAPABLE
8946 * process.
8947 */
8948 curtask = current_task();
8949 bsd_set_dependency_capable (curtask);
8950
8951 lck_mtx_lock(&nspace_handler_lock);
8952 if (nspace_handlers[nspace_type].handler_proc == NULL) {
8953 nspace_handlers[nspace_type].handler_tid = thread_tid(current_thread());
8954 nspace_handlers[nspace_type].handler_proc = current_proc();
8955 }
8956
8957 while (error == 0) {
8958
8959 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
8960 if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
8961 if (!nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
8962 continue;
8963 }
8964 break;
8965 }
8966 }
8967
8968 if (i < MAX_NSPACE_ITEMS) {
8969 nspace_items[i].flags &= ~NSPACE_ITEM_NEW;
8970 nspace_items[i].flags |= NSPACE_ITEM_PROCESSING;
8971 nspace_items[i].token = ++nspace_token_id;
8972
8973 if (nspace_items[i].vp) {
8974 struct fileproc *fp;
8975 int32_t indx, fmode;
8976 struct proc *p = current_proc();
8977 vfs_context_t ctx = vfs_context_current();
8978 struct vnode_attr va;
8979
8980
8981 /*
8982 * Use vnode pointer to acquire a file descriptor for
8983 * hand-off to userland
8984 */
8985 fmode = nspace_open_flags_for_type(nspace_type);
8986 error = vnode_getwithvid(nspace_items[i].vp, nspace_items[i].vid);
8987 if (error) {
8988 unblock = 1;
8989 break;
8990 }
8991 error = vn_open_with_vp(nspace_items[i].vp, fmode, ctx);
8992 if (error) {
8993 unblock = 1;
8994 vnode_put(nspace_items[i].vp);
8995 break;
8996 }
8997
8998 if ((error = falloc(p, &fp, &indx, ctx))) {
8999 vn_close(nspace_items[i].vp, fmode, ctx);
9000 vnode_put(nspace_items[i].vp);
9001 unblock = 1;
9002 break;
9003 }
9004
9005 fp->f_fglob->fg_flag = fmode;
9006 fp->f_fglob->fg_ops = &vnops;
9007 fp->f_fglob->fg_data = (caddr_t)nspace_items[i].vp;
9008
9009 proc_fdlock(p);
9010 procfdtbl_releasefd(p, indx, NULL);
9011 fp_drop(p, indx, fp, 1);
9012 proc_fdunlock(p);
9013
9014 /*
9015 * All variants of the namespace handler struct support these three fields:
9016 * token, flags, and the FD pointer
9017 */
9018 error = copyout(&nspace_items[i].token, nhd->token, sizeof(uint32_t));
9019 error = copyout(&nspace_items[i].op, nhd->flags, sizeof(uint64_t));
9020 error = copyout(&indx, nhd->fdptr, sizeof(uint32_t));
9021
9022 /*
9023 * Handle optional fields:
9024 * extended version support an info ptr (offset, length), and the
9025 *
9026 * namedata version supports a unique per-link object ID
9027 *
9028 */
9029 if (nhd->infoptr) {
9030 uio_t uio = (uio_t)nspace_items[i].arg;
9031 uint64_t u_offset, u_length;
9032
9033 if (uio) {
9034 u_offset = uio_offset(uio);
9035 u_length = uio_resid(uio);
9036 } else {
9037 u_offset = 0;
9038 u_length = 0;
9039 }
9040 error = copyout(&u_offset, nhd->infoptr, sizeof(uint64_t));
9041 error = copyout(&u_length, nhd->infoptr+sizeof(uint64_t), sizeof(uint64_t));
9042 }
9043
9044 if (nhd->objid) {
9045 VATTR_INIT(&va);
9046 VATTR_WANTED(&va, va_linkid);
9047 error = vnode_getattr(nspace_items[i].vp, &va, ctx);
9048 if (error == 0 ) {
9049 uint64_t linkid = 0;
9050 if (VATTR_IS_SUPPORTED (&va, va_linkid)) {
9051 linkid = (uint64_t)va.va_linkid;
9052 }
9053 error = copyout (&linkid, nhd->objid, sizeof(uint64_t));
9054 }
9055 }
9056
9057 if (error) {
9058 vn_close(nspace_items[i].vp, fmode, ctx);
9059 fp_free(p, indx, fp);
9060 unblock = 1;
9061 }
9062
9063 vnode_put(nspace_items[i].vp);
9064
9065 break;
9066 } else {
9067 printf("wait_for_nspace_event: failed (nspace_items[%d] == %p error %d, name %s)\n",
9068 i, nspace_items[i].vp, error, nspace_items[i].vp->v_name);
9069 }
9070
9071 } else {
9072 error = msleep((caddr_t)&nspace_item_idx, &nspace_handler_lock, PVFS|PCATCH, "namespace-items", 0);
9073 if ((nspace_type == NSPACE_HANDLER_SNAPSHOT) && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
9074 error = EINVAL;
9075 break;
9076 }
9077
9078 }
9079 }
9080
9081 if (unblock) {
9082 if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
9083 vnode_lock_spin(nspace_items[i].vp);
9084 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
9085 vnode_unlock(nspace_items[i].vp);
9086 }
9087 nspace_items[i].vp = NULL;
9088 nspace_items[i].vid = 0;
9089 nspace_items[i].flags = NSPACE_ITEM_DONE;
9090 nspace_items[i].token = 0;
9091
9092 wakeup((caddr_t)&(nspace_items[i].vp));
9093 }
9094
9095 if (nspace_type == NSPACE_HANDLER_SNAPSHOT) {
9096 // just go through every snapshot event and unblock it immediately.
9097 if (error && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
9098 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9099 if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
9100 if (nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
9101 nspace_items[i].vp = NULL;
9102 nspace_items[i].vid = 0;
9103 nspace_items[i].flags = NSPACE_ITEM_DONE;
9104 nspace_items[i].token = 0;
9105
9106 wakeup((caddr_t)&(nspace_items[i].vp));
9107 }
9108 }
9109 }
9110 }
9111 }
9112
9113 lck_mtx_unlock(&nspace_handler_lock);
9114
9115 lck_mtx_lock(&nspace_handler_exclusion_lock);
9116 nspace_handlers[nspace_type].handler_busy = 0;
9117 lck_mtx_unlock(&nspace_handler_exclusion_lock);
9118
9119 return error;
9120 }
9121
9122 static inline int validate_namespace_args (int is64bit, int size) {
9123
9124 if (is64bit) {
9125 /* Must be one of these */
9126 if (size == sizeof(user64_namespace_handler_info)) {
9127 goto sizeok;
9128 }
9129 if (size == sizeof(user64_namespace_handler_info_ext)) {
9130 goto sizeok;
9131 }
9132 if (size == sizeof(user64_namespace_handler_data)) {
9133 goto sizeok;
9134 }
9135 return EINVAL;
9136 }
9137 else {
9138 /* 32 bit -- must be one of these */
9139 if (size == sizeof(user32_namespace_handler_info)) {
9140 goto sizeok;
9141 }
9142 if (size == sizeof(user32_namespace_handler_info_ext)) {
9143 goto sizeok;
9144 }
9145 if (size == sizeof(user32_namespace_handler_data)) {
9146 goto sizeok;
9147 }
9148 return EINVAL;
9149 }
9150
9151 sizeok:
9152
9153 return 0;
9154
9155 }
9156
9157 static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data)
9158 {
9159 int error = 0;
9160 namespace_handler_data nhd;
9161
9162 bzero (&nhd, sizeof(namespace_handler_data));
9163
9164 if (nspace_type == NSPACE_HANDLER_SNAPSHOT &&
9165 (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) {
9166 return EINVAL;
9167 }
9168
9169 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
9170 return error;
9171 }
9172
9173 error = validate_namespace_args (is64bit, size);
9174 if (error) {
9175 return error;
9176 }
9177
9178 /* Copy in the userland pointers into our kernel-only struct */
9179
9180 if (is64bit) {
9181 /* 64 bit userland structures */
9182 nhd.token = (user_addr_t)((user64_namespace_handler_info *)data)->token;
9183 nhd.flags = (user_addr_t)((user64_namespace_handler_info *)data)->flags;
9184 nhd.fdptr = (user_addr_t)((user64_namespace_handler_info *)data)->fdptr;
9185
9186 /* If the size is greater than the standard info struct, add in extra fields */
9187 if (size > (sizeof(user64_namespace_handler_info))) {
9188 if (size >= (sizeof(user64_namespace_handler_info_ext))) {
9189 nhd.infoptr = (user_addr_t)((user64_namespace_handler_info_ext *)data)->infoptr;
9190 }
9191 if (size == (sizeof(user64_namespace_handler_data))) {
9192 nhd.objid = (user_addr_t)((user64_namespace_handler_data*)data)->objid;
9193 }
9194 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
9195 }
9196 }
9197 else {
9198 /* 32 bit userland structures */
9199 nhd.token = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->token);
9200 nhd.flags = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->flags);
9201 nhd.fdptr = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->fdptr);
9202
9203 if (size > (sizeof(user32_namespace_handler_info))) {
9204 if (size >= (sizeof(user32_namespace_handler_info_ext))) {
9205 nhd.infoptr = CAST_USER_ADDR_T(((user32_namespace_handler_info_ext *)data)->infoptr);
9206 }
9207 if (size == (sizeof(user32_namespace_handler_data))) {
9208 nhd.objid = (user_addr_t)((user32_namespace_handler_data*)data)->objid;
9209 }
9210 /* Otherwise the fields were pre-zeroed when we did the bzero above. */
9211 }
9212 }
9213
9214 return wait_for_namespace_event(&nhd, nspace_type);
9215 }
9216
9217 /*
9218 * Make a filesystem-specific control call:
9219 */
9220 /* ARGSUSED */
9221 static int
9222 fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long options, vfs_context_t ctx)
9223 {
9224 int error=0;
9225 boolean_t is64bit;
9226 u_int size;
9227 #define STK_PARAMS 128
9228 char stkbuf[STK_PARAMS];
9229 caddr_t data, memp;
9230 vnode_t vp = *arg_vp;
9231
9232 size = IOCPARM_LEN(cmd);
9233 if (size > IOCPARM_MAX) return (EINVAL);
9234
9235 is64bit = proc_is64bit(p);
9236
9237 memp = NULL;
9238 if (size > sizeof (stkbuf)) {
9239 if ((memp = (caddr_t)kalloc(size)) == 0) return ENOMEM;
9240 data = memp;
9241 } else {
9242 data = &stkbuf[0];
9243 };
9244
9245 if (cmd & IOC_IN) {
9246 if (size) {
9247 error = copyin(udata, data, size);
9248 if (error) {
9249 if (memp) {
9250 kfree (memp, size);
9251 }
9252 return error;
9253 }
9254 } else {
9255 if (is64bit) {
9256 *(user_addr_t *)data = udata;
9257 }
9258 else {
9259 *(uint32_t *)data = (uint32_t)udata;
9260 }
9261 };
9262 } else if ((cmd & IOC_OUT) && size) {
9263 /*
9264 * Zero the buffer so the user always
9265 * gets back something deterministic.
9266 */
9267 bzero(data, size);
9268 } else if (cmd & IOC_VOID) {
9269 if (is64bit) {
9270 *(user_addr_t *)data = udata;
9271 }
9272 else {
9273 *(uint32_t *)data = (uint32_t)udata;
9274 }
9275 }
9276
9277 /* Check to see if it's a generic command */
9278 switch (IOCBASECMD(cmd)) {
9279
9280 case FSCTL_SYNC_VOLUME: {
9281 mount_t mp = vp->v_mount;
9282 int arg = *(uint32_t*)data;
9283
9284 /* record vid of vp so we can drop it below. */
9285 uint32_t vvid = vp->v_id;
9286
9287 /*
9288 * Then grab mount_iterref so that we can release the vnode.
9289 * Without this, a thread may call vnode_iterate_prepare then
9290 * get into a deadlock because we've never released the root vp
9291 */
9292 error = mount_iterref (mp, 0);
9293 if (error) {
9294 break;
9295 }
9296 vnode_put(vp);
9297
9298 /* issue the sync for this volume */
9299 (void)sync_callback(mp, (arg & FSCTL_SYNC_WAIT) ? &arg : NULL);
9300
9301 /*
9302 * Then release the mount_iterref once we're done syncing; it's not
9303 * needed for the VNOP_IOCTL below
9304 */
9305 mount_iterdrop(mp);
9306
9307 if (arg & FSCTL_SYNC_FULLSYNC) {
9308 /* re-obtain vnode iocount on the root vp, if possible */
9309 error = vnode_getwithvid (vp, vvid);
9310 if (error == 0) {
9311 error = VNOP_IOCTL(vp, F_FULLFSYNC, (caddr_t)NULL, 0, ctx);
9312 vnode_put (vp);
9313 }
9314 }
9315 /* mark the argument VP as having been released */
9316 *arg_vp = NULL;
9317 }
9318 break;
9319
9320 case FSCTL_SET_PACKAGE_EXTS: {
9321 user_addr_t ext_strings;
9322 uint32_t num_entries;
9323 uint32_t max_width;
9324
9325 if ( (is64bit && size != sizeof(user64_package_ext_info))
9326 || (is64bit == 0 && size != sizeof(user32_package_ext_info))) {
9327
9328 // either you're 64-bit and passed a 64-bit struct or
9329 // you're 32-bit and passed a 32-bit struct. otherwise
9330 // it's not ok.
9331 error = EINVAL;
9332 break;
9333 }
9334
9335 if (is64bit) {
9336 ext_strings = ((user64_package_ext_info *)data)->strings;
9337 num_entries = ((user64_package_ext_info *)data)->num_entries;
9338 max_width = ((user64_package_ext_info *)data)->max_width;
9339 } else {
9340 ext_strings = CAST_USER_ADDR_T(((user32_package_ext_info *)data)->strings);
9341 num_entries = ((user32_package_ext_info *)data)->num_entries;
9342 max_width = ((user32_package_ext_info *)data)->max_width;
9343 }
9344 error = set_package_extensions_table(ext_strings, num_entries, max_width);
9345 }
9346 break;
9347
9348 /* namespace handlers */
9349 case FSCTL_NAMESPACE_HANDLER_GET: {
9350 error = process_namespace_fsctl(NSPACE_HANDLER_NSPACE, is64bit, size, data);
9351 }
9352 break;
9353
9354 /* Snapshot handlers */
9355 case FSCTL_OLD_SNAPSHOT_HANDLER_GET: {
9356 error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
9357 }
9358 break;
9359
9360 case FSCTL_SNAPSHOT_HANDLER_GET_EXT: {
9361 error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
9362 }
9363 break;
9364
9365 case FSCTL_NAMESPACE_HANDLER_UPDATE: {
9366 uint32_t token, val;
9367 int i;
9368
9369 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
9370 break;
9371 }
9372
9373 if (!nspace_is_special_process(p)) {
9374 error = EINVAL;
9375 break;
9376 }
9377
9378 token = ((uint32_t *)data)[0];
9379 val = ((uint32_t *)data)[1];
9380
9381 lck_mtx_lock(&nspace_handler_lock);
9382
9383 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9384 if (nspace_items[i].token == token) {
9385 break; /* exit for loop, not case stmt */
9386 }
9387 }
9388
9389 if (i >= MAX_NSPACE_ITEMS) {
9390 error = ENOENT;
9391 } else {
9392 //
9393 // if this bit is set, when resolve_nspace_item() times out
9394 // it will loop and go back to sleep.
9395 //
9396 nspace_items[i].flags |= NSPACE_ITEM_RESET_TIMER;
9397 }
9398
9399 lck_mtx_unlock(&nspace_handler_lock);
9400
9401 if (error) {
9402 printf("nspace-handler-update: did not find token %u\n", token);
9403 }
9404 }
9405 break;
9406
9407 case FSCTL_NAMESPACE_HANDLER_UNBLOCK: {
9408 uint32_t token, val;
9409 int i;
9410
9411 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
9412 break;
9413 }
9414
9415 if (!nspace_is_special_process(p)) {
9416 error = EINVAL;
9417 break;
9418 }
9419
9420 token = ((uint32_t *)data)[0];
9421 val = ((uint32_t *)data)[1];
9422
9423 lck_mtx_lock(&nspace_handler_lock);
9424
9425 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9426 if (nspace_items[i].token == token) {
9427 break; /* exit for loop, not case statement */
9428 }
9429 }
9430
9431 if (i >= MAX_NSPACE_ITEMS) {
9432 printf("nspace-handler-unblock: did not find token %u\n", token);
9433 error = ENOENT;
9434 } else {
9435 if (val == 0 && nspace_items[i].vp) {
9436 vnode_lock_spin(nspace_items[i].vp);
9437 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
9438 vnode_unlock(nspace_items[i].vp);
9439 }
9440
9441 nspace_items[i].vp = NULL;
9442 nspace_items[i].arg = NULL;
9443 nspace_items[i].op = 0;
9444 nspace_items[i].vid = 0;
9445 nspace_items[i].flags = NSPACE_ITEM_DONE;
9446 nspace_items[i].token = 0;
9447
9448 wakeup((caddr_t)&(nspace_items[i].vp));
9449 }
9450
9451 lck_mtx_unlock(&nspace_handler_lock);
9452 }
9453 break;
9454
9455 case FSCTL_NAMESPACE_HANDLER_CANCEL: {
9456 uint32_t token, val;
9457 int i;
9458
9459 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
9460 break;
9461 }
9462
9463 if (!nspace_is_special_process(p)) {
9464 error = EINVAL;
9465 break;
9466 }
9467
9468 token = ((uint32_t *)data)[0];
9469 val = ((uint32_t *)data)[1];
9470
9471 lck_mtx_lock(&nspace_handler_lock);
9472
9473 for(i=0; i < MAX_NSPACE_ITEMS; i++) {
9474 if (nspace_items[i].token == token) {
9475 break; /* exit for loop, not case stmt */
9476 }
9477 }
9478
9479 if (i >= MAX_NSPACE_ITEMS) {
9480 printf("nspace-handler-cancel: did not find token %u\n", token);
9481 error = ENOENT;
9482 } else {
9483 if (nspace_items[i].vp) {
9484 vnode_lock_spin(nspace_items[i].vp);
9485 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
9486 vnode_unlock(nspace_items[i].vp);
9487 }
9488
9489 nspace_items[i].vp = NULL;
9490 nspace_items[i].arg = NULL;
9491 nspace_items[i].vid = 0;
9492 nspace_items[i].token = val;
9493 nspace_items[i].flags &= ~NSPACE_ITEM_PROCESSING;
9494 nspace_items[i].flags |= NSPACE_ITEM_CANCELLED;
9495
9496 wakeup((caddr_t)&(nspace_items[i].vp));
9497 }
9498
9499 lck_mtx_unlock(&nspace_handler_lock);
9500 }
9501 break;
9502
9503 case FSCTL_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME: {
9504 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
9505 break;
9506 }
9507
9508 // we explicitly do not do the namespace_handler_proc check here
9509
9510 lck_mtx_lock(&nspace_handler_lock);
9511 snapshot_timestamp = ((uint32_t *)data)[0];
9512 wakeup(&nspace_item_idx);
9513 lck_mtx_unlock(&nspace_handler_lock);
9514 printf("nspace-handler-set-snapshot-time: %d\n", (int)snapshot_timestamp);
9515
9516 }
9517 break;
9518
9519 case FSCTL_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS:
9520 {
9521 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
9522 break;
9523 }
9524
9525 lck_mtx_lock(&nspace_handler_lock);
9526 nspace_allow_virtual_devs = ((uint32_t *)data)[0];
9527 lck_mtx_unlock(&nspace_handler_lock);
9528 printf("nspace-snapshot-handler will%s allow events on disk-images\n",
9529 nspace_allow_virtual_devs ? "" : " NOT");
9530 error = 0;
9531
9532 }
9533 break;
9534
9535 case FSCTL_SET_FSTYPENAME_OVERRIDE:
9536 {
9537 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
9538 break;
9539 }
9540 if (vp->v_mount) {
9541 mount_lock(vp->v_mount);
9542 if (data[0] != 0) {
9543 strlcpy(&vp->v_mount->fstypename_override[0], data, MFSTYPENAMELEN);
9544 vp->v_mount->mnt_kern_flag |= MNTK_TYPENAME_OVERRIDE;
9545 if (vfs_isrdonly(vp->v_mount) && strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
9546 vp->v_mount->mnt_kern_flag |= MNTK_EXTENDED_SECURITY;
9547 vp->v_mount->mnt_kern_flag &= ~MNTK_AUTH_OPAQUE;
9548 }
9549 } else {
9550 if (strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
9551 vp->v_mount->mnt_kern_flag &= ~MNTK_EXTENDED_SECURITY;
9552 }
9553 vp->v_mount->mnt_kern_flag &= ~MNTK_TYPENAME_OVERRIDE;
9554 vp->v_mount->fstypename_override[0] = '\0';
9555 }
9556 mount_unlock(vp->v_mount);
9557 }
9558 }
9559 break;
9560
9561 default: {
9562 /* Invoke the filesystem-specific code */
9563 error = VNOP_IOCTL(vp, IOCBASECMD(cmd), data, options, ctx);
9564 }
9565
9566 } /* end switch stmt */
9567
9568 /*
9569 * if no errors, copy any data to user. Size was
9570 * already set and checked above.
9571 */
9572 if (error == 0 && (cmd & IOC_OUT) && size)
9573 error = copyout(data, udata, size);
9574
9575 if (memp) {
9576 kfree(memp, size);
9577 }
9578
9579 return error;
9580 }
9581
9582 /* ARGSUSED */
9583 int
9584 fsctl (proc_t p, struct fsctl_args *uap, __unused int32_t *retval)
9585 {
9586 int error;
9587 struct nameidata nd;
9588 u_long nameiflags;
9589 vnode_t vp = NULL;
9590 vfs_context_t ctx = vfs_context_current();
9591
9592 AUDIT_ARG(cmd, uap->cmd);
9593 AUDIT_ARG(value32, uap->options);
9594 /* Get the vnode for the file we are getting info on: */
9595 nameiflags = 0;
9596 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
9597 NDINIT(&nd, LOOKUP, OP_FSCTL, nameiflags | AUDITVNPATH1,
9598 UIO_USERSPACE, uap->path, ctx);
9599 if ((error = namei(&nd))) goto done;
9600 vp = nd.ni_vp;
9601 nameidone(&nd);
9602
9603 #if CONFIG_MACF
9604 error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd);
9605 if (error) {
9606 goto done;
9607 }
9608 #endif
9609
9610 error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
9611
9612 done:
9613 if (vp)
9614 vnode_put(vp);
9615 return error;
9616 }
9617 /* ARGSUSED */
9618 int
9619 ffsctl (proc_t p, struct ffsctl_args *uap, __unused int32_t *retval)
9620 {
9621 int error;
9622 vnode_t vp = NULL;
9623 vfs_context_t ctx = vfs_context_current();
9624 int fd = -1;
9625
9626 AUDIT_ARG(fd, uap->fd);
9627 AUDIT_ARG(cmd, uap->cmd);
9628 AUDIT_ARG(value32, uap->options);
9629
9630 /* Get the vnode for the file we are getting info on: */
9631 if ((error = file_vnode(uap->fd, &vp)))
9632 goto done;
9633 fd = uap->fd;
9634 if ((error = vnode_getwithref(vp))) {
9635 goto done;
9636 }
9637
9638 #if CONFIG_MACF
9639 error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd);
9640 if (error) {
9641 goto done;
9642 }
9643 #endif
9644
9645 error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
9646
9647 done:
9648 if (fd != -1)
9649 file_drop(fd);
9650
9651 if (vp)
9652 vnode_put(vp);
9653 return error;
9654 }
9655 /* end of fsctl system call */
9656
9657 /*
9658 * Retrieve the data of an extended attribute.
9659 */
9660 int
9661 getxattr(proc_t p, struct getxattr_args *uap, user_ssize_t *retval)
9662 {
9663 vnode_t vp;
9664 struct nameidata nd;
9665 char attrname[XATTR_MAXNAMELEN+1];
9666 vfs_context_t ctx = vfs_context_current();
9667 uio_t auio = NULL;
9668 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9669 size_t attrsize = 0;
9670 size_t namelen;
9671 u_int32_t nameiflags;
9672 int error;
9673 char uio_buf[ UIO_SIZEOF(1) ];
9674
9675 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
9676 return (EINVAL);
9677
9678 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
9679 NDINIT(&nd, LOOKUP, OP_GETXATTR, nameiflags, spacetype, uap->path, ctx);
9680 if ((error = namei(&nd))) {
9681 return (error);
9682 }
9683 vp = nd.ni_vp;
9684 nameidone(&nd);
9685
9686 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
9687 goto out;
9688 }
9689 if (xattr_protected(attrname)) {
9690 if (!vfs_context_issuser(ctx) || strcmp(attrname, "com.apple.system.Security") != 0) {
9691 error = EPERM;
9692 goto out;
9693 }
9694 }
9695 /*
9696 * the specific check for 0xffffffff is a hack to preserve
9697 * binaray compatibilty in K64 with applications that discovered
9698 * that passing in a buf pointer and a size of -1 resulted in
9699 * just the size of the indicated extended attribute being returned.
9700 * this isn't part of the documented behavior, but because of the
9701 * original implemtation's check for "uap->size > 0", this behavior
9702 * was allowed. In K32 that check turned into a signed comparison
9703 * even though uap->size is unsigned... in K64, we blow by that
9704 * check because uap->size is unsigned and doesn't get sign smeared
9705 * in the munger for a 32 bit user app. we also need to add a
9706 * check to limit the maximum size of the buffer being passed in...
9707 * unfortunately, the underlying fileystems seem to just malloc
9708 * the requested size even if the actual extended attribute is tiny.
9709 * because that malloc is for kernel wired memory, we have to put a
9710 * sane limit on it.
9711 *
9712 * U32 running on K64 will yield 0x00000000ffffffff for uap->size
9713 * U64 running on K64 will yield -1 (64 bits wide)
9714 * U32/U64 running on K32 will yield -1 (32 bits wide)
9715 */
9716 if (uap->size == 0xffffffff || uap->size == (size_t)-1)
9717 goto no_uio;
9718
9719 if (uap->value) {
9720 if (uap->size > (size_t)XATTR_MAXSIZE)
9721 uap->size = XATTR_MAXSIZE;
9722
9723 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
9724 &uio_buf[0], sizeof(uio_buf));
9725 uio_addiov(auio, uap->value, uap->size);
9726 }
9727 no_uio:
9728 error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, ctx);
9729 out:
9730 vnode_put(vp);
9731
9732 if (auio) {
9733 *retval = uap->size - uio_resid(auio);
9734 } else {
9735 *retval = (user_ssize_t)attrsize;
9736 }
9737
9738 return (error);
9739 }
9740
9741 /*
9742 * Retrieve the data of an extended attribute.
9743 */
9744 int
9745 fgetxattr(proc_t p, struct fgetxattr_args *uap, user_ssize_t *retval)
9746 {
9747 vnode_t vp;
9748 char attrname[XATTR_MAXNAMELEN+1];
9749 uio_t auio = NULL;
9750 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9751 size_t attrsize = 0;
9752 size_t namelen;
9753 int error;
9754 char uio_buf[ UIO_SIZEOF(1) ];
9755
9756 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
9757 return (EINVAL);
9758
9759 if ( (error = file_vnode(uap->fd, &vp)) ) {
9760 return (error);
9761 }
9762 if ( (error = vnode_getwithref(vp)) ) {
9763 file_drop(uap->fd);
9764 return(error);
9765 }
9766 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
9767 goto out;
9768 }
9769 if (xattr_protected(attrname)) {
9770 error = EPERM;
9771 goto out;
9772 }
9773 if (uap->value && uap->size > 0) {
9774 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
9775 &uio_buf[0], sizeof(uio_buf));
9776 uio_addiov(auio, uap->value, uap->size);
9777 }
9778
9779 error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, vfs_context_current());
9780 out:
9781 (void)vnode_put(vp);
9782 file_drop(uap->fd);
9783
9784 if (auio) {
9785 *retval = uap->size - uio_resid(auio);
9786 } else {
9787 *retval = (user_ssize_t)attrsize;
9788 }
9789 return (error);
9790 }
9791
9792 /*
9793 * Set the data of an extended attribute.
9794 */
9795 int
9796 setxattr(proc_t p, struct setxattr_args *uap, int *retval)
9797 {
9798 vnode_t vp;
9799 struct nameidata nd;
9800 char attrname[XATTR_MAXNAMELEN+1];
9801 vfs_context_t ctx = vfs_context_current();
9802 uio_t auio = NULL;
9803 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9804 size_t namelen;
9805 u_int32_t nameiflags;
9806 int error;
9807 char uio_buf[ UIO_SIZEOF(1) ];
9808
9809 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
9810 return (EINVAL);
9811
9812 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
9813 if (error == EPERM) {
9814 /* if the string won't fit in attrname, copyinstr emits EPERM */
9815 return (ENAMETOOLONG);
9816 }
9817 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */
9818 return error;
9819 }
9820 if (xattr_protected(attrname))
9821 return(EPERM);
9822 if (uap->size != 0 && uap->value == 0) {
9823 return (EINVAL);
9824 }
9825
9826 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
9827 NDINIT(&nd, LOOKUP, OP_SETXATTR, nameiflags, spacetype, uap->path, ctx);
9828 if ((error = namei(&nd))) {
9829 return (error);
9830 }
9831 vp = nd.ni_vp;
9832 nameidone(&nd);
9833
9834 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
9835 &uio_buf[0], sizeof(uio_buf));
9836 uio_addiov(auio, uap->value, uap->size);
9837
9838 error = vn_setxattr(vp, attrname, auio, uap->options, ctx);
9839 #if CONFIG_FSE
9840 if (error == 0) {
9841 add_fsevent(FSE_XATTR_MODIFIED, ctx,
9842 FSE_ARG_VNODE, vp,
9843 FSE_ARG_DONE);
9844 }
9845 #endif
9846 vnode_put(vp);
9847 *retval = 0;
9848 return (error);
9849 }
9850
9851 /*
9852 * Set the data of an extended attribute.
9853 */
9854 int
9855 fsetxattr(proc_t p, struct fsetxattr_args *uap, int *retval)
9856 {
9857 vnode_t vp;
9858 char attrname[XATTR_MAXNAMELEN+1];
9859 uio_t auio = NULL;
9860 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9861 size_t namelen;
9862 int error;
9863 char uio_buf[ UIO_SIZEOF(1) ];
9864 #if CONFIG_FSE
9865 vfs_context_t ctx = vfs_context_current();
9866 #endif
9867
9868 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
9869 return (EINVAL);
9870
9871 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
9872 return (error);
9873 }
9874 if (xattr_protected(attrname))
9875 return(EPERM);
9876 if (uap->size != 0 && uap->value == 0) {
9877 return (EINVAL);
9878 }
9879 if ( (error = file_vnode(uap->fd, &vp)) ) {
9880 return (error);
9881 }
9882 if ( (error = vnode_getwithref(vp)) ) {
9883 file_drop(uap->fd);
9884 return(error);
9885 }
9886 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
9887 &uio_buf[0], sizeof(uio_buf));
9888 uio_addiov(auio, uap->value, uap->size);
9889
9890 error = vn_setxattr(vp, attrname, auio, uap->options, vfs_context_current());
9891 #if CONFIG_FSE
9892 if (error == 0) {
9893 add_fsevent(FSE_XATTR_MODIFIED, ctx,
9894 FSE_ARG_VNODE, vp,
9895 FSE_ARG_DONE);
9896 }
9897 #endif
9898 vnode_put(vp);
9899 file_drop(uap->fd);
9900 *retval = 0;
9901 return (error);
9902 }
9903
9904 /*
9905 * Remove an extended attribute.
9906 * XXX Code duplication here.
9907 */
9908 int
9909 removexattr(proc_t p, struct removexattr_args *uap, int *retval)
9910 {
9911 vnode_t vp;
9912 struct nameidata nd;
9913 char attrname[XATTR_MAXNAMELEN+1];
9914 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
9915 vfs_context_t ctx = vfs_context_current();
9916 size_t namelen;
9917 u_int32_t nameiflags;
9918 int error;
9919
9920 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
9921 return (EINVAL);
9922
9923 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
9924 if (error != 0) {
9925 return (error);
9926 }
9927 if (xattr_protected(attrname))
9928 return(EPERM);
9929 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
9930 NDINIT(&nd, LOOKUP, OP_REMOVEXATTR, nameiflags, spacetype, uap->path, ctx);
9931 if ((error = namei(&nd))) {
9932 return (error);
9933 }
9934 vp = nd.ni_vp;
9935 nameidone(&nd);
9936
9937 error = vn_removexattr(vp, attrname, uap->options, ctx);
9938 #if CONFIG_FSE
9939 if (error == 0) {
9940 add_fsevent(FSE_XATTR_REMOVED, ctx,
9941 FSE_ARG_VNODE, vp,
9942 FSE_ARG_DONE);
9943 }
9944 #endif
9945 vnode_put(vp);
9946 *retval = 0;
9947 return (error);
9948 }
9949
9950 /*
9951 * Remove an extended attribute.
9952 * XXX Code duplication here.
9953 */
9954 int
9955 fremovexattr(__unused proc_t p, struct fremovexattr_args *uap, int *retval)
9956 {
9957 vnode_t vp;
9958 char attrname[XATTR_MAXNAMELEN+1];
9959 size_t namelen;
9960 int error;
9961 #if CONFIG_FSE
9962 vfs_context_t ctx = vfs_context_current();
9963 #endif
9964
9965 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
9966 return (EINVAL);
9967
9968 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
9969 if (error != 0) {
9970 return (error);
9971 }
9972 if (xattr_protected(attrname))
9973 return(EPERM);
9974 if ( (error = file_vnode(uap->fd, &vp)) ) {
9975 return (error);
9976 }
9977 if ( (error = vnode_getwithref(vp)) ) {
9978 file_drop(uap->fd);
9979 return(error);
9980 }
9981
9982 error = vn_removexattr(vp, attrname, uap->options, vfs_context_current());
9983 #if CONFIG_FSE
9984 if (error == 0) {
9985 add_fsevent(FSE_XATTR_REMOVED, ctx,
9986 FSE_ARG_VNODE, vp,
9987 FSE_ARG_DONE);
9988 }
9989 #endif
9990 vnode_put(vp);
9991 file_drop(uap->fd);
9992 *retval = 0;
9993 return (error);
9994 }
9995
9996 /*
9997 * Retrieve the list of extended attribute names.
9998 * XXX Code duplication here.
9999 */
10000 int
10001 listxattr(proc_t p, struct listxattr_args *uap, user_ssize_t *retval)
10002 {
10003 vnode_t vp;
10004 struct nameidata nd;
10005 vfs_context_t ctx = vfs_context_current();
10006 uio_t auio = NULL;
10007 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10008 size_t attrsize = 0;
10009 u_int32_t nameiflags;
10010 int error;
10011 char uio_buf[ UIO_SIZEOF(1) ];
10012
10013 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
10014 return (EINVAL);
10015
10016 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
10017 NDINIT(&nd, LOOKUP, OP_LISTXATTR, nameiflags, spacetype, uap->path, ctx);
10018 if ((error = namei(&nd))) {
10019 return (error);
10020 }
10021 vp = nd.ni_vp;
10022 nameidone(&nd);
10023 if (uap->namebuf != 0 && uap->bufsize > 0) {
10024 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
10025 &uio_buf[0], sizeof(uio_buf));
10026 uio_addiov(auio, uap->namebuf, uap->bufsize);
10027 }
10028
10029 error = vn_listxattr(vp, auio, &attrsize, uap->options, ctx);
10030
10031 vnode_put(vp);
10032 if (auio) {
10033 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
10034 } else {
10035 *retval = (user_ssize_t)attrsize;
10036 }
10037 return (error);
10038 }
10039
10040 /*
10041 * Retrieve the list of extended attribute names.
10042 * XXX Code duplication here.
10043 */
10044 int
10045 flistxattr(proc_t p, struct flistxattr_args *uap, user_ssize_t *retval)
10046 {
10047 vnode_t vp;
10048 uio_t auio = NULL;
10049 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
10050 size_t attrsize = 0;
10051 int error;
10052 char uio_buf[ UIO_SIZEOF(1) ];
10053
10054 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
10055 return (EINVAL);
10056
10057 if ( (error = file_vnode(uap->fd, &vp)) ) {
10058 return (error);
10059 }
10060 if ( (error = vnode_getwithref(vp)) ) {
10061 file_drop(uap->fd);
10062 return(error);
10063 }
10064 if (uap->namebuf != 0 && uap->bufsize > 0) {
10065 auio = uio_createwithbuffer(1, 0, spacetype,
10066 UIO_READ, &uio_buf[0], sizeof(uio_buf));
10067 uio_addiov(auio, uap->namebuf, uap->bufsize);
10068 }
10069
10070 error = vn_listxattr(vp, auio, &attrsize, uap->options, vfs_context_current());
10071
10072 vnode_put(vp);
10073 file_drop(uap->fd);
10074 if (auio) {
10075 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
10076 } else {
10077 *retval = (user_ssize_t)attrsize;
10078 }
10079 return (error);
10080 }
10081
10082 static int fsgetpath_internal(
10083 vfs_context_t ctx, int volfs_id, uint64_t objid,
10084 vm_size_t bufsize, caddr_t buf, int *pathlen)
10085 {
10086 int error;
10087 struct mount *mp = NULL;
10088 vnode_t vp;
10089 int length;
10090 int bpflags;
10091
10092 if (bufsize > PAGE_SIZE) {
10093 return (EINVAL);
10094 }
10095
10096 if (buf == NULL) {
10097 return (ENOMEM);
10098 }
10099
10100 if ((mp = mount_lookupby_volfsid(volfs_id, 1)) == NULL) {
10101 error = ENOTSUP; /* unexpected failure */
10102 return ENOTSUP;
10103 }
10104
10105 unionget:
10106 if (objid == 2) {
10107 error = VFS_ROOT(mp, &vp, ctx);
10108 } else {
10109 error = VFS_VGET(mp, (ino64_t)objid, &vp, ctx);
10110 }
10111
10112 if (error == ENOENT && (mp->mnt_flag & MNT_UNION)) {
10113 /*
10114 * If the fileid isn't found and we're in a union
10115 * mount volume, then see if the fileid is in the
10116 * mounted-on volume.
10117 */
10118 struct mount *tmp = mp;
10119 mp = vnode_mount(tmp->mnt_vnodecovered);
10120 vfs_unbusy(tmp);
10121 if (vfs_busy(mp, LK_NOWAIT) == 0)
10122 goto unionget;
10123 } else {
10124 vfs_unbusy(mp);
10125 }
10126
10127 if (error) {
10128 return error;
10129 }
10130
10131 #if CONFIG_MACF
10132 error = mac_vnode_check_fsgetpath(ctx, vp);
10133 if (error) {
10134 vnode_put(vp);
10135 return error;
10136 }
10137 #endif
10138
10139 /* Obtain the absolute path to this vnode. */
10140 bpflags = vfs_context_suser(ctx) ? BUILDPATH_CHECKACCESS : 0;
10141 bpflags |= BUILDPATH_CHECK_MOVED;
10142 error = build_path(vp, buf, bufsize, &length, bpflags, ctx);
10143 vnode_put(vp);
10144
10145 if (error) {
10146 goto out;
10147 }
10148
10149 AUDIT_ARG(text, buf);
10150
10151 if (kdebug_enable) {
10152 long dbg_parms[NUMPARMS];
10153 int dbg_namelen;
10154
10155 dbg_namelen = (int)sizeof(dbg_parms);
10156
10157 if (length < dbg_namelen) {
10158 memcpy((char *)dbg_parms, buf, length);
10159 memset((char *)dbg_parms + length, 0, dbg_namelen - length);
10160
10161 dbg_namelen = length;
10162 } else {
10163 memcpy((char *)dbg_parms, buf + (length - dbg_namelen), dbg_namelen);
10164 }
10165
10166 kdebug_lookup_gen_events(dbg_parms, dbg_namelen, (void *)vp, TRUE);
10167 }
10168
10169 *pathlen = (user_ssize_t)length; /* may be superseded by error */
10170
10171 out:
10172 return (error);
10173 }
10174
10175 /*
10176 * Obtain the full pathname of a file system object by id.
10177 *
10178 * This is a private SPI used by the File Manager.
10179 */
10180 __private_extern__
10181 int
10182 fsgetpath(__unused proc_t p, struct fsgetpath_args *uap, user_ssize_t *retval)
10183 {
10184 vfs_context_t ctx = vfs_context_current();
10185 fsid_t fsid;
10186 char *realpath;
10187 int length;
10188 int error;
10189
10190 if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
10191 return (error);
10192 }
10193 AUDIT_ARG(value32, fsid.val[0]);
10194 AUDIT_ARG(value64, uap->objid);
10195 /* Restrict output buffer size for now. */
10196
10197 if (uap->bufsize > PAGE_SIZE) {
10198 return (EINVAL);
10199 }
10200 MALLOC(realpath, char *, uap->bufsize, M_TEMP, M_WAITOK);
10201 if (realpath == NULL) {
10202 return (ENOMEM);
10203 }
10204
10205 error = fsgetpath_internal(
10206 ctx, fsid.val[0], uap->objid,
10207 uap->bufsize, realpath, &length);
10208
10209 if (error) {
10210 goto out;
10211 }
10212
10213 error = copyout((caddr_t)realpath, uap->buf, length);
10214
10215 *retval = (user_ssize_t)length; /* may be superseded by error */
10216 out:
10217 if (realpath) {
10218 FREE(realpath, M_TEMP);
10219 }
10220 return (error);
10221 }
10222
10223 /*
10224 * Common routine to handle various flavors of statfs data heading out
10225 * to user space.
10226 *
10227 * Returns: 0 Success
10228 * EFAULT
10229 */
10230 static int
10231 munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
10232 user_addr_t bufp, int *sizep, boolean_t is_64_bit,
10233 boolean_t partial_copy)
10234 {
10235 int error;
10236 int my_size, copy_size;
10237
10238 if (is_64_bit) {
10239 struct user64_statfs sfs;
10240 my_size = copy_size = sizeof(sfs);
10241 bzero(&sfs, my_size);
10242 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
10243 sfs.f_type = mp->mnt_vtable->vfc_typenum;
10244 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
10245 sfs.f_bsize = (user64_long_t)sfsp->f_bsize;
10246 sfs.f_iosize = (user64_long_t)sfsp->f_iosize;
10247 sfs.f_blocks = (user64_long_t)sfsp->f_blocks;
10248 sfs.f_bfree = (user64_long_t)sfsp->f_bfree;
10249 sfs.f_bavail = (user64_long_t)sfsp->f_bavail;
10250 sfs.f_files = (user64_long_t)sfsp->f_files;
10251 sfs.f_ffree = (user64_long_t)sfsp->f_ffree;
10252 sfs.f_fsid = sfsp->f_fsid;
10253 sfs.f_owner = sfsp->f_owner;
10254 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
10255 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
10256 } else {
10257 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
10258 }
10259 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
10260 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
10261
10262 if (partial_copy) {
10263 copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
10264 }
10265 error = copyout((caddr_t)&sfs, bufp, copy_size);
10266 }
10267 else {
10268 struct user32_statfs sfs;
10269
10270 my_size = copy_size = sizeof(sfs);
10271 bzero(&sfs, my_size);
10272
10273 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
10274 sfs.f_type = mp->mnt_vtable->vfc_typenum;
10275 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
10276
10277 /*
10278 * It's possible for there to be more than 2^^31 blocks in the filesystem, so we
10279 * have to fudge the numbers here in that case. We inflate the blocksize in order
10280 * to reflect the filesystem size as best we can.
10281 */
10282 if ((sfsp->f_blocks > INT_MAX)
10283 /* Hack for 4061702 . I think the real fix is for Carbon to
10284 * look for some volume capability and not depend on hidden
10285 * semantics agreed between a FS and carbon.
10286 * f_blocks, f_bfree, and f_bavail set to -1 is the trigger
10287 * for Carbon to set bNoVolumeSizes volume attribute.
10288 * Without this the webdavfs files cannot be copied onto
10289 * disk as they look huge. This change should not affect
10290 * XSAN as they should not setting these to -1..
10291 */
10292 && (sfsp->f_blocks != 0xffffffffffffffffULL)
10293 && (sfsp->f_bfree != 0xffffffffffffffffULL)
10294 && (sfsp->f_bavail != 0xffffffffffffffffULL)) {
10295 int shift;
10296
10297 /*
10298 * Work out how far we have to shift the block count down to make it fit.
10299 * Note that it's possible to have to shift so far that the resulting
10300 * blocksize would be unreportably large. At that point, we will clip
10301 * any values that don't fit.
10302 *
10303 * For safety's sake, we also ensure that f_iosize is never reported as
10304 * being smaller than f_bsize.
10305 */
10306 for (shift = 0; shift < 32; shift++) {
10307 if ((sfsp->f_blocks >> shift) <= INT_MAX)
10308 break;
10309 if ((sfsp->f_bsize << (shift + 1)) > INT_MAX)
10310 break;
10311 }
10312 #define __SHIFT_OR_CLIP(x, s) ((((x) >> (s)) > INT_MAX) ? INT_MAX : ((x) >> (s)))
10313 sfs.f_blocks = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_blocks, shift);
10314 sfs.f_bfree = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bfree, shift);
10315 sfs.f_bavail = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bavail, shift);
10316 #undef __SHIFT_OR_CLIP
10317 sfs.f_bsize = (user32_long_t)(sfsp->f_bsize << shift);
10318 sfs.f_iosize = lmax(sfsp->f_iosize, sfsp->f_bsize);
10319 } else {
10320 /* filesystem is small enough to be reported honestly */
10321 sfs.f_bsize = (user32_long_t)sfsp->f_bsize;
10322 sfs.f_iosize = (user32_long_t)sfsp->f_iosize;
10323 sfs.f_blocks = (user32_long_t)sfsp->f_blocks;
10324 sfs.f_bfree = (user32_long_t)sfsp->f_bfree;
10325 sfs.f_bavail = (user32_long_t)sfsp->f_bavail;
10326 }
10327 sfs.f_files = (user32_long_t)sfsp->f_files;
10328 sfs.f_ffree = (user32_long_t)sfsp->f_ffree;
10329 sfs.f_fsid = sfsp->f_fsid;
10330 sfs.f_owner = sfsp->f_owner;
10331 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
10332 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
10333 } else {
10334 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
10335 }
10336 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
10337 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
10338
10339 if (partial_copy) {
10340 copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
10341 }
10342 error = copyout((caddr_t)&sfs, bufp, copy_size);
10343 }
10344
10345 if (sizep != NULL) {
10346 *sizep = my_size;
10347 }
10348 return(error);
10349 }
10350
10351 /*
10352 * copy stat structure into user_stat structure.
10353 */
10354 void munge_user64_stat(struct stat *sbp, struct user64_stat *usbp)
10355 {
10356 bzero(usbp, sizeof(*usbp));
10357
10358 usbp->st_dev = sbp->st_dev;
10359 usbp->st_ino = sbp->st_ino;
10360 usbp->st_mode = sbp->st_mode;
10361 usbp->st_nlink = sbp->st_nlink;
10362 usbp->st_uid = sbp->st_uid;
10363 usbp->st_gid = sbp->st_gid;
10364 usbp->st_rdev = sbp->st_rdev;
10365 #ifndef _POSIX_C_SOURCE
10366 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
10367 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
10368 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
10369 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
10370 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
10371 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
10372 #else
10373 usbp->st_atime = sbp->st_atime;
10374 usbp->st_atimensec = sbp->st_atimensec;
10375 usbp->st_mtime = sbp->st_mtime;
10376 usbp->st_mtimensec = sbp->st_mtimensec;
10377 usbp->st_ctime = sbp->st_ctime;
10378 usbp->st_ctimensec = sbp->st_ctimensec;
10379 #endif
10380 usbp->st_size = sbp->st_size;
10381 usbp->st_blocks = sbp->st_blocks;
10382 usbp->st_blksize = sbp->st_blksize;
10383 usbp->st_flags = sbp->st_flags;
10384 usbp->st_gen = sbp->st_gen;
10385 usbp->st_lspare = sbp->st_lspare;
10386 usbp->st_qspare[0] = sbp->st_qspare[0];
10387 usbp->st_qspare[1] = sbp->st_qspare[1];
10388 }
10389
10390 void munge_user32_stat(struct stat *sbp, struct user32_stat *usbp)
10391 {
10392 bzero(usbp, sizeof(*usbp));
10393
10394 usbp->st_dev = sbp->st_dev;
10395 usbp->st_ino = sbp->st_ino;
10396 usbp->st_mode = sbp->st_mode;
10397 usbp->st_nlink = sbp->st_nlink;
10398 usbp->st_uid = sbp->st_uid;
10399 usbp->st_gid = sbp->st_gid;
10400 usbp->st_rdev = sbp->st_rdev;
10401 #ifndef _POSIX_C_SOURCE
10402 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
10403 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
10404 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
10405 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
10406 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
10407 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
10408 #else
10409 usbp->st_atime = sbp->st_atime;
10410 usbp->st_atimensec = sbp->st_atimensec;
10411 usbp->st_mtime = sbp->st_mtime;
10412 usbp->st_mtimensec = sbp->st_mtimensec;
10413 usbp->st_ctime = sbp->st_ctime;
10414 usbp->st_ctimensec = sbp->st_ctimensec;
10415 #endif
10416 usbp->st_size = sbp->st_size;
10417 usbp->st_blocks = sbp->st_blocks;
10418 usbp->st_blksize = sbp->st_blksize;
10419 usbp->st_flags = sbp->st_flags;
10420 usbp->st_gen = sbp->st_gen;
10421 usbp->st_lspare = sbp->st_lspare;
10422 usbp->st_qspare[0] = sbp->st_qspare[0];
10423 usbp->st_qspare[1] = sbp->st_qspare[1];
10424 }
10425
10426 /*
10427 * copy stat64 structure into user_stat64 structure.
10428 */
10429 void munge_user64_stat64(struct stat64 *sbp, struct user64_stat64 *usbp)
10430 {
10431 bzero(usbp, sizeof(*usbp));
10432
10433 usbp->st_dev = sbp->st_dev;
10434 usbp->st_ino = sbp->st_ino;
10435 usbp->st_mode = sbp->st_mode;
10436 usbp->st_nlink = sbp->st_nlink;
10437 usbp->st_uid = sbp->st_uid;
10438 usbp->st_gid = sbp->st_gid;
10439 usbp->st_rdev = sbp->st_rdev;
10440 #ifndef _POSIX_C_SOURCE
10441 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
10442 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
10443 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
10444 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
10445 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
10446 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
10447 usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
10448 usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
10449 #else
10450 usbp->st_atime = sbp->st_atime;
10451 usbp->st_atimensec = sbp->st_atimensec;
10452 usbp->st_mtime = sbp->st_mtime;
10453 usbp->st_mtimensec = sbp->st_mtimensec;
10454 usbp->st_ctime = sbp->st_ctime;
10455 usbp->st_ctimensec = sbp->st_ctimensec;
10456 usbp->st_birthtime = sbp->st_birthtime;
10457 usbp->st_birthtimensec = sbp->st_birthtimensec;
10458 #endif
10459 usbp->st_size = sbp->st_size;
10460 usbp->st_blocks = sbp->st_blocks;
10461 usbp->st_blksize = sbp->st_blksize;
10462 usbp->st_flags = sbp->st_flags;
10463 usbp->st_gen = sbp->st_gen;
10464 usbp->st_lspare = sbp->st_lspare;
10465 usbp->st_qspare[0] = sbp->st_qspare[0];
10466 usbp->st_qspare[1] = sbp->st_qspare[1];
10467 }
10468
10469 void munge_user32_stat64(struct stat64 *sbp, struct user32_stat64 *usbp)
10470 {
10471 bzero(usbp, sizeof(*usbp));
10472
10473 usbp->st_dev = sbp->st_dev;
10474 usbp->st_ino = sbp->st_ino;
10475 usbp->st_mode = sbp->st_mode;
10476 usbp->st_nlink = sbp->st_nlink;
10477 usbp->st_uid = sbp->st_uid;
10478 usbp->st_gid = sbp->st_gid;
10479 usbp->st_rdev = sbp->st_rdev;
10480 #ifndef _POSIX_C_SOURCE
10481 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
10482 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
10483 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
10484 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
10485 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
10486 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
10487 usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
10488 usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
10489 #else
10490 usbp->st_atime = sbp->st_atime;
10491 usbp->st_atimensec = sbp->st_atimensec;
10492 usbp->st_mtime = sbp->st_mtime;
10493 usbp->st_mtimensec = sbp->st_mtimensec;
10494 usbp->st_ctime = sbp->st_ctime;
10495 usbp->st_ctimensec = sbp->st_ctimensec;
10496 usbp->st_birthtime = sbp->st_birthtime;
10497 usbp->st_birthtimensec = sbp->st_birthtimensec;
10498 #endif
10499 usbp->st_size = sbp->st_size;
10500 usbp->st_blocks = sbp->st_blocks;
10501 usbp->st_blksize = sbp->st_blksize;
10502 usbp->st_flags = sbp->st_flags;
10503 usbp->st_gen = sbp->st_gen;
10504 usbp->st_lspare = sbp->st_lspare;
10505 usbp->st_qspare[0] = sbp->st_qspare[0];
10506 usbp->st_qspare[1] = sbp->st_qspare[1];
10507 }
10508
10509 /*
10510 * Purge buffer cache for simulating cold starts
10511 */
10512 static int vnode_purge_callback(struct vnode *vp, __unused void *cargs)
10513 {
10514 ubc_msync(vp, (off_t)0, ubc_getsize(vp), NULL /* off_t *resid_off */, UBC_PUSHALL | UBC_INVALIDATE);
10515
10516 return VNODE_RETURNED;
10517 }
10518
10519 static int vfs_purge_callback(mount_t mp, __unused void * arg)
10520 {
10521 vnode_iterate(mp, VNODE_WAIT | VNODE_ITERATE_ALL, vnode_purge_callback, NULL);
10522
10523 return VFS_RETURNED;
10524 }
10525
10526 int
10527 vfs_purge(__unused struct proc *p, __unused struct vfs_purge_args *uap, __unused int32_t *retval)
10528 {
10529 if (!kauth_cred_issuser(kauth_cred_get()))
10530 return EPERM;
10531
10532 vfs_iterate(0/* flags */, vfs_purge_callback, NULL);
10533
10534 return 0;
10535 }
10536