]> git.saurik.com Git - apple/xnu.git/blob - bsd/vfs/vfs_syscalls.c
23653799fbcec651a7849d6e7822fe97b0b0914e
[apple/xnu.git] / bsd / vfs / vfs_syscalls.c
1 /*
2 * Copyright (c) 1995-2008 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1989, 1993
30 * The Regents of the University of California. All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)vfs_syscalls.c 8.41 (Berkeley) 6/15/95
66 */
67 /*
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
71 * Version 2.0.
72 */
73
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/namei.h>
77 #include <sys/filedesc.h>
78 #include <sys/kernel.h>
79 #include <sys/file_internal.h>
80 #include <sys/stat.h>
81 #include <sys/vnode_internal.h>
82 #include <sys/mount_internal.h>
83 #include <sys/proc_internal.h>
84 #include <sys/kauth.h>
85 #include <sys/uio_internal.h>
86 #include <sys/malloc.h>
87 #include <sys/mman.h>
88 #include <sys/dirent.h>
89 #include <sys/attr.h>
90 #include <sys/sysctl.h>
91 #include <sys/ubc.h>
92 #include <sys/quota.h>
93 #include <sys/kdebug.h>
94 #include <sys/fsevents.h>
95 #include <sys/sysproto.h>
96 #include <sys/xattr.h>
97 #include <sys/fcntl.h>
98 #include <sys/fsctl.h>
99 #include <sys/ubc_internal.h>
100 #include <sys/disk.h>
101 #include <machine/cons.h>
102 #include <machine/limits.h>
103 #include <miscfs/specfs/specdev.h>
104 #include <miscfs/union/union.h>
105
106 #include <security/audit/audit.h>
107 #include <bsm/audit_kevents.h>
108
109 #include <mach/mach_types.h>
110 #include <kern/kern_types.h>
111 #include <kern/kalloc.h>
112
113 #include <vm/vm_pageout.h>
114
115 #include <libkern/OSAtomic.h>
116 #include <pexpert/pexpert.h>
117
118 #if CONFIG_MACF
119 #include <security/mac.h>
120 #include <security/mac_framework.h>
121 #endif
122
123 #if CONFIG_FSE
124 #define GET_PATH(x) \
125 (x) = get_pathbuff();
126 #define RELEASE_PATH(x) \
127 release_pathbuff(x);
128 #else
129 #define GET_PATH(x) \
130 MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
131 #define RELEASE_PATH(x) \
132 FREE_ZONE((x), MAXPATHLEN, M_NAMEI);
133 #endif /* CONFIG_FSE */
134
135 /* struct for checkdirs iteration */
136 struct cdirargs {
137 vnode_t olddp;
138 vnode_t newdp;
139 };
140 /* callback for checkdirs iteration */
141 static int checkdirs_callback(proc_t p, void * arg);
142
143 static int change_dir(struct nameidata *ndp, vfs_context_t ctx);
144 static int checkdirs(vnode_t olddp, vfs_context_t ctx);
145 void enablequotas(struct mount *mp, vfs_context_t ctx);
146 static int getfsstat_callback(mount_t mp, void * arg);
147 static int getutimes(user_addr_t usrtvp, struct timespec *tsp);
148 static int setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts, int nullflag);
149 static int sync_callback(mount_t, void *);
150 static int munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
151 user_addr_t bufp, int *sizep, boolean_t is_64_bit,
152 boolean_t partial_copy);
153 static int statfs64_common(struct mount *mp, struct vfsstatfs *sfsp,
154 user_addr_t bufp);
155 static int fsync_common(proc_t p, struct fsync_args *uap, int flags);
156 int (*union_dircheckp)(struct vnode **, struct fileproc *, vfs_context_t);
157
158 __private_extern__
159 int sync_internal(void);
160
161 __private_extern__
162 int open1(vfs_context_t, struct nameidata *, int, struct vnode_attr *, int32_t *);
163
164 __private_extern__
165 int unlink1(vfs_context_t, struct nameidata *, int);
166
167
168 #ifdef __APPLE_API_OBSOLETE
169 struct fstatv_args {
170 int fd; /* file descriptor of the target file */
171 struct vstat *vsb; /* vstat structure for returned info */
172 };
173 struct lstatv_args {
174 const char *path; /* pathname of the target file */
175 struct vstat *vsb; /* vstat structure for returned info */
176 };
177 struct mkcomplex_args {
178 const char *path; /* pathname of the file to be created */
179 mode_t mode; /* access mode for the newly created file */
180 u_int32_t type; /* format of the complex file */
181 };
182 struct statv_args {
183 const char *path; /* pathname of the target file */
184 struct vstat *vsb; /* vstat structure for returned info */
185 };
186
187 int fstatv(proc_t p, struct fstatv_args *uap, int32_t *retval);
188 int lstatv(proc_t p, struct lstatv_args *uap, int32_t *retval);
189 int mkcomplex(proc_t p, struct mkcomplex_args *uap, int32_t *retval);
190 int statv(proc_t p, struct statv_args *uap, int32_t *retval);
191
192 #endif /* __APPLE_API_OBSOLETE */
193
194 /*
195 * incremented each time a mount or unmount operation occurs
196 * used to invalidate the cached value of the rootvp in the
197 * mount structure utilized by cache_lookup_path
198 */
199 uint32_t mount_generation = 0;
200
201 /* counts number of mount and unmount operations */
202 unsigned int vfs_nummntops=0;
203
204 extern struct fileops vnops;
205 extern errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *);
206
207
208 /*
209 * Virtual File System System Calls
210 */
211
212 /*
213 * Mount a file system.
214 */
215 /* ARGSUSED */
216 int
217 mount(proc_t p, struct mount_args *uap, __unused int32_t *retval)
218 {
219 struct __mac_mount_args muap;
220
221 muap.type = uap->type;
222 muap.path = uap->path;
223 muap.flags = uap->flags;
224 muap.data = uap->data;
225 muap.mac_p = USER_ADDR_NULL;
226 return (__mac_mount(p, &muap, retval));
227 }
228
229 /*
230 * __mac_mount:
231 * Mount a file system taking into account MAC label behavior.
232 * See mount(2) man page for more information
233 *
234 * Parameters: p Process requesting the mount
235 * uap User argument descriptor (see below)
236 * retval (ignored)
237 *
238 * Indirect: uap->type Filesystem type
239 * uap->path Path to mount
240 * uap->data Mount arguments
241 * uap->mac_p MAC info
242 * uap->flags Mount flags
243 *
244 *
245 * Returns: 0 Success
246 * !0 Not success
247 */
248 int
249 __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int32_t *retval)
250 {
251 struct vnode *vp, *pvp;
252 struct vnode *devvp = NULLVP;
253 struct vnode *device_vnode = NULLVP;
254 #if CONFIG_MACF
255 struct vnode *rvp;
256 #endif
257 struct mount *mp;
258 struct vfstable *vfsp = (struct vfstable *)0;
259 int error, flag = 0;
260 struct vnode_attr va;
261 vfs_context_t ctx = vfs_context_current();
262 struct nameidata nd;
263 struct nameidata nd1;
264 char fstypename[MFSNAMELEN];
265 size_t dummy=0;
266 user_addr_t devpath = USER_ADDR_NULL;
267 user_addr_t fsmountargs = uap->data;
268 int ronly = 0;
269 int mntalloc = 0;
270 boolean_t vfsp_ref = FALSE;
271 mode_t accessmode;
272 boolean_t is_64bit;
273 boolean_t is_rwlock_locked = FALSE;
274 boolean_t did_rele = FALSE;
275 boolean_t have_usecount = FALSE;
276
277 AUDIT_ARG(fflags, uap->flags);
278
279 is_64bit = proc_is64bit(p);
280
281 /*
282 * Get vnode to be covered
283 */
284 NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1 | WANTPARENT,
285 UIO_USERSPACE, uap->path, ctx);
286 error = namei(&nd);
287 if (error)
288 return (error);
289 vp = nd.ni_vp;
290 pvp = nd.ni_dvp;
291
292 if ((vp->v_flag & VROOT) &&
293 (vp->v_mount->mnt_flag & MNT_ROOTFS))
294 uap->flags |= MNT_UPDATE;
295
296 error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy);
297 if (error)
298 goto out1;
299
300 if (uap->flags & MNT_UPDATE) {
301 if ((vp->v_flag & VROOT) == 0) {
302 error = EINVAL;
303 goto out1;
304 }
305 mp = vp->v_mount;
306
307 /* unmount in progress return error */
308 mount_lock_spin(mp);
309 if (mp->mnt_lflag & MNT_LUNMOUNT) {
310 mount_unlock(mp);
311 error = EBUSY;
312 goto out1;
313 }
314 mount_unlock(mp);
315 lck_rw_lock_exclusive(&mp->mnt_rwlock);
316 is_rwlock_locked = TRUE;
317 /*
318 * We only allow the filesystem to be reloaded if it
319 * is currently mounted read-only.
320 */
321 if ((uap->flags & MNT_RELOAD) &&
322 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
323 error = ENOTSUP;
324 goto out1;
325 }
326 /*
327 * Only root, or the user that did the original mount is
328 * permitted to update it.
329 */
330 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
331 (error = suser(vfs_context_ucred(ctx), &p->p_acflag))) {
332 goto out1;
333 }
334 #if CONFIG_MACF
335 error = mac_mount_check_remount(ctx, mp);
336 if (error != 0) {
337 lck_rw_done(&mp->mnt_rwlock);
338 goto out1;
339 }
340 #endif
341 /*
342 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
343 * and MNT_NOEXEC if mount point is already MNT_NOEXEC.
344 */
345 if (suser(vfs_context_ucred(ctx), NULL)) {
346 uap->flags |= MNT_NOSUID | MNT_NODEV;
347 if (mp->mnt_flag & MNT_NOEXEC)
348 uap->flags |= MNT_NOEXEC;
349 }
350 flag = mp->mnt_flag;
351
352 mp->mnt_flag |=
353 uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
354
355 vfsp = mp->mnt_vtable;
356 goto update;
357 }
358 /*
359 * If the user is not root, ensure that they own the directory
360 * onto which we are attempting to mount.
361 */
362 VATTR_INIT(&va);
363 VATTR_WANTED(&va, va_uid);
364 if ((error = vnode_getattr(vp, &va, ctx)) ||
365 (va.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
366 (error = suser(vfs_context_ucred(ctx), &p->p_acflag)))) {
367 goto out1;
368 }
369 /*
370 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
371 * MNT_NOEXEC if mount point is already MNT_NOEXEC.
372 */
373 if (suser(vfs_context_ucred(ctx), NULL)) {
374 uap->flags |= MNT_NOSUID | MNT_NODEV;
375 if (vp->v_mount->mnt_flag & MNT_NOEXEC)
376 uap->flags |= MNT_NOEXEC;
377 }
378 if ( (error = VNOP_FSYNC(vp, MNT_WAIT, ctx)) )
379 goto out1;
380
381 if ( (error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0)) )
382 goto out1;
383
384 if (vp->v_type != VDIR) {
385 error = ENOTDIR;
386 goto out1;
387 }
388
389 /* XXXAUDIT: Should we capture the type on the error path as well? */
390 AUDIT_ARG(text, fstypename);
391 mount_list_lock();
392 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
393 if (!strncmp(vfsp->vfc_name, fstypename, MFSNAMELEN)) {
394 vfsp->vfc_refcount++;
395 vfsp_ref = TRUE;
396 break;
397 }
398 mount_list_unlock();
399 if (vfsp == NULL) {
400 error = ENODEV;
401 goto out1;
402 }
403 #if CONFIG_MACF
404 error = mac_mount_check_mount(ctx, vp,
405 &nd.ni_cnd, vfsp->vfc_name);
406 if (error != 0)
407 goto out1;
408 #endif
409 if (ISSET(vp->v_flag, VMOUNT) && (vp->v_mountedhere != NULL)) {
410 error = EBUSY;
411 goto out1;
412 }
413 vnode_lock_spin(vp);
414 SET(vp->v_flag, VMOUNT);
415 vnode_unlock(vp);
416
417 /*
418 * Allocate and initialize the filesystem.
419 */
420 MALLOC_ZONE(mp, struct mount *, (u_int32_t)sizeof(struct mount),
421 M_MOUNT, M_WAITOK);
422 bzero((char *)mp, (u_int32_t)sizeof(struct mount));
423 mntalloc = 1;
424
425 /* Initialize the default IO constraints */
426 mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
427 mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
428 mp->mnt_maxsegreadsize = mp->mnt_maxreadcnt;
429 mp->mnt_maxsegwritesize = mp->mnt_maxwritecnt;
430 mp->mnt_devblocksize = DEV_BSIZE;
431 mp->mnt_alignmentmask = PAGE_MASK;
432 mp->mnt_ioqueue_depth = MNT_DEFAULT_IOQUEUE_DEPTH;
433 mp->mnt_ioscale = 1;
434 mp->mnt_ioflags = 0;
435 mp->mnt_realrootvp = NULLVP;
436 mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL;
437
438 TAILQ_INIT(&mp->mnt_vnodelist);
439 TAILQ_INIT(&mp->mnt_workerqueue);
440 TAILQ_INIT(&mp->mnt_newvnodes);
441 mount_lock_init(mp);
442 lck_rw_lock_exclusive(&mp->mnt_rwlock);
443 is_rwlock_locked = TRUE;
444 mp->mnt_op = vfsp->vfc_vfsops;
445 mp->mnt_vtable = vfsp;
446 //mp->mnt_stat.f_type = vfsp->vfc_typenum;
447 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
448 strncpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN);
449 strncpy(mp->mnt_vfsstat.f_mntonname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
450 mp->mnt_vnodecovered = vp;
451 mp->mnt_vfsstat.f_owner = kauth_cred_getuid(vfs_context_ucred(ctx));
452 mp->mnt_devbsdunit = LOWPRI_MAX_NUM_DEV - 1;
453
454 /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
455 vfs_setowner(mp, KAUTH_UID_NONE, KAUTH_GID_NONE);
456
457 update:
458 /*
459 * Set the mount level flags.
460 */
461 if (uap->flags & MNT_RDONLY)
462 mp->mnt_flag |= MNT_RDONLY;
463 else if (mp->mnt_flag & MNT_RDONLY)
464 mp->mnt_kern_flag |= MNTK_WANTRDWR;
465 mp->mnt_flag &= ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
466 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
467 MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE | MNT_AUTOMOUNTED |
468 MNT_DEFWRITE | MNT_NOATIME | MNT_QUARANTINE);
469 mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
470 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC |
471 MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE | MNT_AUTOMOUNTED |
472 MNT_DEFWRITE | MNT_NOATIME | MNT_QUARANTINE);
473
474 #if CONFIG_MACF
475 if (uap->flags & MNT_MULTILABEL) {
476 if (vfsp->vfc_vfsflags & VFC_VFSNOMACLABEL) {
477 error = EINVAL;
478 goto out1;
479 }
480 mp->mnt_flag |= MNT_MULTILABEL;
481 }
482 #endif
483
484 if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
485 if (is_64bit) {
486 if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
487 goto out1;
488 fsmountargs += sizeof(devpath);
489 } else {
490 user32_addr_t tmp;
491 if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
492 goto out1;
493 /* munge into LP64 addr */
494 devpath = CAST_USER_ADDR_T(tmp);
495 fsmountargs += sizeof(tmp);
496 }
497
498 /* if it is not update and device name needs to be parsed */
499 if ((devpath)) {
500 NDINIT(&nd1, LOOKUP, FOLLOW, UIO_USERSPACE, devpath, ctx);
501 if ( (error = namei(&nd1)) )
502 goto out1;
503
504 strncpy(mp->mnt_vfsstat.f_mntfromname, nd1.ni_cnd.cn_pnbuf, MAXPATHLEN);
505 devvp = nd1.ni_vp;
506
507 nameidone(&nd1);
508
509 if (devvp->v_type != VBLK) {
510 error = ENOTBLK;
511 goto out2;
512 }
513 if (major(devvp->v_rdev) >= nblkdev) {
514 error = ENXIO;
515 goto out2;
516 }
517 /*
518 * If mount by non-root, then verify that user has necessary
519 * permissions on the device.
520 */
521 if (suser(vfs_context_ucred(ctx), NULL) != 0) {
522 accessmode = KAUTH_VNODE_READ_DATA;
523 if ((mp->mnt_flag & MNT_RDONLY) == 0)
524 accessmode |= KAUTH_VNODE_WRITE_DATA;
525 if ((error = vnode_authorize(devvp, NULL, accessmode, ctx)) != 0)
526 goto out2;
527 }
528 }
529 if (devpath && ((uap->flags & MNT_UPDATE) == 0)) {
530 if ( (error = vnode_ref(devvp)) )
531 goto out2;
532 /*
533 * Disallow multiple mounts of the same device.
534 * Disallow mounting of a device that is currently in use
535 * (except for root, which might share swap device for miniroot).
536 * Flush out any old buffers remaining from a previous use.
537 */
538 if ( (error = vfs_mountedon(devvp)) )
539 goto out3;
540
541 if (vcount(devvp) > 1 && !(vfs_flags(mp) & MNT_ROOTFS)) {
542 error = EBUSY;
543 goto out3;
544 }
545 if ( (error = VNOP_FSYNC(devvp, MNT_WAIT, ctx)) ) {
546 error = ENOTBLK;
547 goto out3;
548 }
549 if ( (error = buf_invalidateblks(devvp, BUF_WRITE_DATA, 0, 0)) )
550 goto out3;
551
552 ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
553 #if CONFIG_MACF
554 error = mac_vnode_check_open(ctx,
555 devvp,
556 ronly ? FREAD : FREAD|FWRITE);
557 if (error)
558 goto out3;
559 #endif /* MAC */
560 if ( (error = VNOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, ctx)) )
561 goto out3;
562
563 mp->mnt_devvp = devvp;
564 device_vnode = devvp;
565 } else {
566 if ((mp->mnt_flag & MNT_RDONLY) && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
567 dev_t dev;
568 int maj;
569 /*
570 * If upgrade to read-write by non-root, then verify
571 * that user has necessary permissions on the device.
572 */
573 device_vnode = mp->mnt_devvp;
574
575 if (device_vnode) {
576 vnode_getalways(device_vnode);
577
578 if (suser(vfs_context_ucred(ctx), NULL)) {
579 if ((error = vnode_authorize(device_vnode, NULL,
580 KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0) {
581 vnode_put(device_vnode);
582 goto out2;
583 }
584 }
585
586 /* Tell the device that we're upgrading */
587 dev = (dev_t)device_vnode->v_rdev;
588 maj = major(dev);
589
590 if ((u_int)maj >= (u_int)nblkdev)
591 panic("Volume mounted on a device with invalid major number.\n");
592
593 error = bdevsw[maj].d_open(dev, FREAD | FWRITE, S_IFBLK, p);
594
595 vnode_put(device_vnode);
596 if (error != 0) {
597 goto out2;
598 }
599 }
600 }
601 device_vnode = NULLVP;
602 }
603 }
604 #if CONFIG_MACF
605 if ((uap->flags & MNT_UPDATE) == 0) {
606 mac_mount_label_init(mp);
607 mac_mount_label_associate(ctx, mp);
608 }
609 if (uap->mac_p != USER_ADDR_NULL) {
610 struct user_mac mac;
611 char *labelstr = NULL;
612 size_t ulen = 0;
613
614 if ((uap->flags & MNT_UPDATE) != 0) {
615 error = mac_mount_check_label_update(
616 ctx, mp);
617 if (error != 0)
618 goto out3;
619 }
620 if (is_64bit) {
621 error = copyin(uap->mac_p, &mac, sizeof(mac));
622 } else {
623 struct mac mac32;
624 error = copyin(uap->mac_p, &mac32, sizeof(mac32));
625 mac.m_buflen = mac32.m_buflen;
626 mac.m_string = CAST_USER_ADDR_T(mac32.m_string);
627 }
628 if (error != 0)
629 goto out3;
630 if ((mac.m_buflen > MAC_MAX_LABEL_BUF_LEN) ||
631 (mac.m_buflen < 2)) {
632 error = EINVAL;
633 goto out3;
634 }
635 MALLOC(labelstr, char *, mac.m_buflen, M_MACTEMP, M_WAITOK);
636 error = copyinstr(mac.m_string, labelstr, mac.m_buflen, &ulen);
637 if (error != 0) {
638 FREE(labelstr, M_MACTEMP);
639 goto out3;
640 }
641 AUDIT_ARG(mac_string, labelstr);
642 error = mac_mount_label_internalize(mp->mnt_mntlabel, labelstr);
643 FREE(labelstr, M_MACTEMP);
644 if (error != 0)
645 goto out3;
646 }
647 #endif
648 if (device_vnode != NULL) {
649 VNOP_IOCTL(device_vnode, DKIOCGETBSDUNIT, (caddr_t)&mp->mnt_devbsdunit, 0, NULL);
650 mp->mnt_devbsdunit %= LOWPRI_MAX_NUM_DEV;
651 }
652
653 /*
654 * Mount the filesystem.
655 */
656 error = VFS_MOUNT(mp, device_vnode, fsmountargs, ctx);
657
658 if (uap->flags & MNT_UPDATE) {
659 if (mp->mnt_kern_flag & MNTK_WANTRDWR)
660 mp->mnt_flag &= ~MNT_RDONLY;
661 mp->mnt_flag &=~
662 (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
663 mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
664 if (error)
665 mp->mnt_flag = flag;
666 vfs_event_signal(NULL, VQ_UPDATE, (intptr_t)NULL);
667 lck_rw_done(&mp->mnt_rwlock);
668 is_rwlock_locked = FALSE;
669 if (!error)
670 enablequotas(mp, ctx);
671 goto out2;
672 }
673 /*
674 * Put the new filesystem on the mount list after root.
675 */
676 if (error == 0) {
677 struct vfs_attr vfsattr;
678 #if CONFIG_MACF
679 if (vfs_flags(mp) & MNT_MULTILABEL) {
680 error = VFS_ROOT(mp, &rvp, ctx);
681 if (error) {
682 printf("%s() VFS_ROOT returned %d\n", __func__, error);
683 goto out3;
684 }
685 error = vnode_label(mp, NULL, rvp, NULL, 0, ctx);
686 /*
687 * drop reference provided by VFS_ROOT
688 */
689 vnode_put(rvp);
690
691 if (error)
692 goto out3;
693 }
694 #endif /* MAC */
695
696 vnode_lock_spin(vp);
697 CLR(vp->v_flag, VMOUNT);
698 vp->v_mountedhere = mp;
699 vnode_unlock(vp);
700
701 /*
702 * taking the name_cache_lock exclusively will
703 * insure that everyone is out of the fast path who
704 * might be trying to use a now stale copy of
705 * vp->v_mountedhere->mnt_realrootvp
706 * bumping mount_generation causes the cached values
707 * to be invalidated
708 */
709 name_cache_lock();
710 mount_generation++;
711 name_cache_unlock();
712
713 error = vnode_ref(vp);
714 if (error != 0) {
715 goto out4;
716 }
717
718 have_usecount = TRUE;
719
720 error = checkdirs(vp, ctx);
721 if (error != 0) {
722 /* Unmount the filesystem as cdir/rdirs cannot be updated */
723 goto out4;
724 }
725 /*
726 * there is no cleanup code here so I have made it void
727 * we need to revisit this
728 */
729 (void)VFS_START(mp, 0, ctx);
730
731 error = mount_list_add(mp);
732 if (error != 0) {
733 goto out4;
734 }
735
736 lck_rw_done(&mp->mnt_rwlock);
737 is_rwlock_locked = FALSE;
738
739 /* Check if this mounted file system supports EAs or named streams. */
740 /* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */
741 VFSATTR_INIT(&vfsattr);
742 VFSATTR_WANTED(&vfsattr, f_capabilities);
743 if (strncmp(mp->mnt_vfsstat.f_fstypename, "webdav", sizeof("webdav")) != 0 &&
744 vfs_getattr(mp, &vfsattr, ctx) == 0 &&
745 VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) {
746 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR) &&
747 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR)) {
748 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
749 }
750 #if NAMEDSTREAMS
751 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS) &&
752 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS)) {
753 mp->mnt_kern_flag |= MNTK_NAMED_STREAMS;
754 }
755 #endif
756 /* Check if this file system supports path from id lookups. */
757 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID) &&
758 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID)) {
759 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
760 } else if (mp->mnt_flag & MNT_DOVOLFS) {
761 /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */
762 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID;
763 }
764 }
765 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSNATIVEXATTR) {
766 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS;
767 }
768 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSPREFLIGHT) {
769 mp->mnt_kern_flag |= MNTK_UNMOUNT_PREFLIGHT;
770 }
771 /* increment the operations count */
772 OSAddAtomic(1, &vfs_nummntops);
773 enablequotas(mp, ctx);
774
775 if (device_vnode) {
776 device_vnode->v_specflags |= SI_MOUNTEDON;
777
778 /*
779 * cache the IO attributes for the underlying physical media...
780 * an error return indicates the underlying driver doesn't
781 * support all the queries necessary... however, reasonable
782 * defaults will have been set, so no reason to bail or care
783 */
784 vfs_init_io_attributes(device_vnode, mp);
785 }
786
787 /* Now that mount is setup, notify the listeners */
788 vfs_event_signal(NULL, VQ_MOUNT, (intptr_t)NULL);
789 } else {
790 vnode_lock_spin(vp);
791 CLR(vp->v_flag, VMOUNT);
792 vnode_unlock(vp);
793 mount_list_lock();
794 mp->mnt_vtable->vfc_refcount--;
795 mount_list_unlock();
796
797 if (device_vnode ) {
798 vnode_rele(device_vnode);
799 VNOP_CLOSE(device_vnode, ronly ? FREAD : FREAD|FWRITE, ctx);
800 }
801 lck_rw_done(&mp->mnt_rwlock);
802 is_rwlock_locked = FALSE;
803 mount_lock_destroy(mp);
804 #if CONFIG_MACF
805 mac_mount_label_destroy(mp);
806 #endif
807 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
808 }
809 nameidone(&nd);
810
811 /*
812 * drop I/O count on covered 'vp' and
813 * on the device vp if there was one
814 */
815 if (devpath && devvp)
816 vnode_put(devvp);
817 vnode_put(vp);
818
819 /* Note that we've changed something in the parent directory */
820 post_event_if_success(pvp, error, NOTE_WRITE);
821 vnode_put(pvp);
822
823 return(error);
824
825 out4:
826 (void)VFS_UNMOUNT(mp, MNT_FORCE, ctx);
827 if (device_vnode != NULLVP) {
828 vnode_rele(device_vnode);
829 VNOP_CLOSE(device_vnode, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
830 ctx);
831 did_rele = TRUE;
832 }
833 vnode_lock_spin(vp);
834 vp->v_mountedhere = (mount_t) 0;
835 vnode_unlock(vp);
836
837 if (have_usecount) {
838 vnode_rele(vp);
839 }
840 out3:
841 if (devpath && ((uap->flags & MNT_UPDATE) == 0) && (!did_rele))
842 vnode_rele(devvp);
843 out2:
844 if (devpath && devvp)
845 vnode_put(devvp);
846 out1:
847 /* Release mnt_rwlock only when it was taken */
848 if (is_rwlock_locked == TRUE) {
849 lck_rw_done(&mp->mnt_rwlock);
850 }
851 if (mntalloc) {
852 #if CONFIG_MACF
853 mac_mount_label_destroy(mp);
854 #endif
855 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
856 }
857
858 if (vfsp_ref) {
859 mount_list_lock();
860 vfsp->vfc_refcount--;
861 mount_list_unlock();
862 }
863 vnode_put(vp);
864 vnode_put(pvp);
865 nameidone(&nd);
866
867 return(error);
868 }
869
870 void
871 enablequotas(struct mount *mp, vfs_context_t ctx)
872 {
873 struct nameidata qnd;
874 int type;
875 char qfpath[MAXPATHLEN];
876 const char *qfname = QUOTAFILENAME;
877 const char *qfopsname = QUOTAOPSNAME;
878 const char *qfextension[] = INITQFNAMES;
879
880 /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
881 if (strncmp(mp->mnt_vfsstat.f_fstypename, "hfs", sizeof("hfs")) != 0 ) {
882 return;
883 }
884 /*
885 * Enable filesystem disk quotas if necessary.
886 * We ignore errors as this should not interfere with final mount
887 */
888 for (type=0; type < MAXQUOTAS; type++) {
889 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfopsname, qfextension[type]);
890 NDINIT(&qnd, LOOKUP, FOLLOW, UIO_SYSSPACE, CAST_USER_ADDR_T(qfpath), ctx);
891 if (namei(&qnd) != 0)
892 continue; /* option file to trigger quotas is not present */
893 vnode_put(qnd.ni_vp);
894 nameidone(&qnd);
895 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfname, qfextension[type]);
896
897 (void) VFS_QUOTACTL(mp, QCMD(Q_QUOTAON, type), 0, qfpath, ctx);
898 }
899 return;
900 }
901
902
903 static int
904 checkdirs_callback(proc_t p, void * arg)
905 {
906 struct cdirargs * cdrp = (struct cdirargs * )arg;
907 vnode_t olddp = cdrp->olddp;
908 vnode_t newdp = cdrp->newdp;
909 struct filedesc *fdp;
910 vnode_t tvp;
911 vnode_t fdp_cvp;
912 vnode_t fdp_rvp;
913 int cdir_changed = 0;
914 int rdir_changed = 0;
915
916 /*
917 * XXX Also needs to iterate each thread in the process to see if it
918 * XXX is using a per-thread current working directory, and, if so,
919 * XXX update that as well.
920 */
921
922 proc_fdlock(p);
923 fdp = p->p_fd;
924 if (fdp == (struct filedesc *)0) {
925 proc_fdunlock(p);
926 return(PROC_RETURNED);
927 }
928 fdp_cvp = fdp->fd_cdir;
929 fdp_rvp = fdp->fd_rdir;
930 proc_fdunlock(p);
931
932 if (fdp_cvp == olddp) {
933 vnode_ref(newdp);
934 tvp = fdp->fd_cdir;
935 fdp_cvp = newdp;
936 cdir_changed = 1;
937 vnode_rele(tvp);
938 }
939 if (fdp_rvp == olddp) {
940 vnode_ref(newdp);
941 tvp = fdp->fd_rdir;
942 fdp_rvp = newdp;
943 rdir_changed = 1;
944 vnode_rele(tvp);
945 }
946 if (cdir_changed || rdir_changed) {
947 proc_fdlock(p);
948 fdp->fd_cdir = fdp_cvp;
949 fdp->fd_rdir = fdp_rvp;
950 proc_fdunlock(p);
951 }
952 return(PROC_RETURNED);
953 }
954
955
956
957 /*
958 * Scan all active processes to see if any of them have a current
959 * or root directory onto which the new filesystem has just been
960 * mounted. If so, replace them with the new mount point.
961 */
962 static int
963 checkdirs(vnode_t olddp, vfs_context_t ctx)
964 {
965 vnode_t newdp;
966 vnode_t tvp;
967 int err;
968 struct cdirargs cdr;
969 struct uthread * uth = get_bsdthread_info(current_thread());
970
971 if (olddp->v_usecount == 1)
972 return(0);
973 if (uth != (struct uthread *)0)
974 uth->uu_notrigger = 1;
975 err = VFS_ROOT(olddp->v_mountedhere, &newdp, ctx);
976 if (uth != (struct uthread *)0)
977 uth->uu_notrigger = 0;
978
979 if (err != 0) {
980 #if DIAGNOSTIC
981 panic("mount: lost mount: error %d", err);
982 #endif
983 return(err);
984 }
985
986 cdr.olddp = olddp;
987 cdr.newdp = newdp;
988 /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
989 proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS, checkdirs_callback, (void *)&cdr, NULL, NULL);
990
991 if (rootvnode == olddp) {
992 vnode_ref(newdp);
993 tvp = rootvnode;
994 rootvnode = newdp;
995 vnode_rele(tvp);
996 }
997
998 vnode_put(newdp);
999 return(0);
1000 }
1001
1002 /*
1003 * Unmount a file system.
1004 *
1005 * Note: unmount takes a path to the vnode mounted on as argument,
1006 * not special file (as before).
1007 */
1008 /* ARGSUSED */
1009 int
1010 unmount(__unused proc_t p, struct unmount_args *uap, __unused int32_t *retval)
1011 {
1012 vnode_t vp;
1013 struct mount *mp;
1014 int error;
1015 struct nameidata nd;
1016 vfs_context_t ctx = vfs_context_current();
1017
1018 NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1,
1019 UIO_USERSPACE, uap->path, ctx);
1020 error = namei(&nd);
1021 if (error)
1022 return (error);
1023 vp = nd.ni_vp;
1024 mp = vp->v_mount;
1025 nameidone(&nd);
1026
1027 #if CONFIG_MACF
1028 error = mac_mount_check_umount(ctx, mp);
1029 if (error != 0) {
1030 vnode_put(vp);
1031 return (error);
1032 }
1033 #endif
1034 /*
1035 * Must be the root of the filesystem
1036 */
1037 if ((vp->v_flag & VROOT) == 0) {
1038 vnode_put(vp);
1039 return (EINVAL);
1040 }
1041 mount_ref(mp, 0);
1042 vnode_put(vp);
1043 /* safedounmount consumes the mount ref */
1044 return (safedounmount(mp, uap->flags, ctx));
1045 }
1046
1047 int
1048 vfs_unmountbyfsid(fsid_t * fsid, int flags, vfs_context_t ctx)
1049 {
1050 mount_t mp;
1051
1052 mp = mount_list_lookupby_fsid(fsid, 0, 1);
1053 if (mp == (mount_t)0) {
1054 return(ENOENT);
1055 }
1056 mount_ref(mp, 0);
1057 mount_iterdrop(mp);
1058 /* safedounmount consumes the mount ref */
1059 return(safedounmount(mp, flags, ctx));
1060 }
1061
1062
1063 /*
1064 * The mount struct comes with a mount ref which will be consumed.
1065 * Do the actual file system unmount, prevent some common foot shooting.
1066 */
1067 int
1068 safedounmount(struct mount *mp, int flags, vfs_context_t ctx)
1069 {
1070 int error;
1071 proc_t p = vfs_context_proc(ctx);
1072
1073 /*
1074 * Only root, or the user that did the original mount is
1075 * permitted to unmount this filesystem.
1076 */
1077 if ((mp->mnt_vfsstat.f_owner != kauth_cred_getuid(kauth_cred_get())) &&
1078 (error = suser(kauth_cred_get(), &p->p_acflag)))
1079 goto out;
1080
1081 /*
1082 * Don't allow unmounting the root file system.
1083 */
1084 if (mp->mnt_flag & MNT_ROOTFS) {
1085 error = EBUSY; /* the root is always busy */
1086 goto out;
1087 }
1088
1089 return (dounmount(mp, flags, 1, ctx));
1090
1091 out:
1092 mount_drop(mp, 0);
1093 return(error);
1094 }
1095
1096 /*
1097 * Do the actual file system unmount.
1098 */
1099 int
1100 dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx)
1101 {
1102 vnode_t coveredvp = (vnode_t)0;
1103 int error;
1104 int needwakeup = 0;
1105 int forcedunmount = 0;
1106 int lflags = 0;
1107 struct vnode *devvp = NULLVP;
1108
1109 if (flags & MNT_FORCE)
1110 forcedunmount = 1;
1111 mount_lock(mp);
1112 /* XXX post jaguar fix LK_DRAIN - then clean this up */
1113 if ((flags & MNT_FORCE)) {
1114 mp->mnt_kern_flag |= MNTK_FRCUNMOUNT;
1115 mp->mnt_lflag |= MNT_LFORCE;
1116 }
1117 if (mp->mnt_lflag & MNT_LUNMOUNT) {
1118 mp->mnt_lflag |= MNT_LWAIT;
1119 if(withref != 0)
1120 mount_drop(mp, 1);
1121 msleep((caddr_t)mp, &mp->mnt_mlock, (PVFS | PDROP), "dounmount", NULL);
1122 /*
1123 * The prior unmount attempt has probably succeeded.
1124 * Do not dereference mp here - returning EBUSY is safest.
1125 */
1126 return (EBUSY);
1127 }
1128 mp->mnt_kern_flag |= MNTK_UNMOUNT;
1129 mp->mnt_lflag |= MNT_LUNMOUNT;
1130 mp->mnt_flag &=~ MNT_ASYNC;
1131 /*
1132 * anyone currently in the fast path that
1133 * trips over the cached rootvp will be
1134 * dumped out and forced into the slow path
1135 * to regenerate a new cached value
1136 */
1137 mp->mnt_realrootvp = NULLVP;
1138 mount_unlock(mp);
1139
1140 /*
1141 * taking the name_cache_lock exclusively will
1142 * insure that everyone is out of the fast path who
1143 * might be trying to use a now stale copy of
1144 * vp->v_mountedhere->mnt_realrootvp
1145 * bumping mount_generation causes the cached values
1146 * to be invalidated
1147 */
1148 name_cache_lock();
1149 mount_generation++;
1150 name_cache_unlock();
1151
1152
1153 lck_rw_lock_exclusive(&mp->mnt_rwlock);
1154 if (withref != 0)
1155 mount_drop(mp, 0);
1156 #if CONFIG_FSE
1157 fsevent_unmount(mp); /* has to come first! */
1158 #endif
1159 error = 0;
1160 if (forcedunmount == 0) {
1161 ubc_umount(mp); /* release cached vnodes */
1162 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
1163 error = VFS_SYNC(mp, MNT_WAIT, ctx);
1164 if (error) {
1165 mount_lock(mp);
1166 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1167 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1168 mp->mnt_lflag &= ~MNT_LFORCE;
1169 goto out;
1170 }
1171 }
1172 }
1173
1174 if (forcedunmount)
1175 lflags |= FORCECLOSE;
1176 error = vflush(mp, NULLVP, SKIPSWAP | SKIPSYSTEM | SKIPROOT | lflags);
1177 if ((forcedunmount == 0) && error) {
1178 mount_lock(mp);
1179 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1180 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1181 mp->mnt_lflag &= ~MNT_LFORCE;
1182 goto out;
1183 }
1184
1185 /* make sure there are no one in the mount iterations or lookup */
1186 mount_iterdrain(mp);
1187
1188 error = VFS_UNMOUNT(mp, flags, ctx);
1189 if (error) {
1190 mount_iterreset(mp);
1191 mount_lock(mp);
1192 mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
1193 mp->mnt_lflag &= ~MNT_LUNMOUNT;
1194 mp->mnt_lflag &= ~MNT_LFORCE;
1195 goto out;
1196 }
1197
1198 /* increment the operations count */
1199 if (!error)
1200 OSAddAtomic(1, &vfs_nummntops);
1201
1202 if ( mp->mnt_devvp && mp->mnt_vtable->vfc_vfsflags & VFC_VFSLOCALARGS) {
1203 /* hold an io reference and drop the usecount before close */
1204 devvp = mp->mnt_devvp;
1205 vnode_getalways(devvp);
1206 vnode_rele(devvp);
1207 VNOP_CLOSE(devvp, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
1208 ctx);
1209 vnode_clearmountedon(devvp);
1210 vnode_put(devvp);
1211 }
1212 lck_rw_done(&mp->mnt_rwlock);
1213 mount_list_remove(mp);
1214 lck_rw_lock_exclusive(&mp->mnt_rwlock);
1215
1216 /* mark the mount point hook in the vp but not drop the ref yet */
1217 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
1218 vnode_getwithref(coveredvp);
1219 vnode_lock_spin(coveredvp);
1220 coveredvp->v_mountedhere = (struct mount *)0;
1221 vnode_unlock(coveredvp);
1222 vnode_put(coveredvp);
1223 }
1224
1225 mount_list_lock();
1226 mp->mnt_vtable->vfc_refcount--;
1227 mount_list_unlock();
1228
1229 cache_purgevfs(mp); /* remove cache entries for this file sys */
1230 vfs_event_signal(NULL, VQ_UNMOUNT, (intptr_t)NULL);
1231 mount_lock(mp);
1232 mp->mnt_lflag |= MNT_LDEAD;
1233
1234 if (mp->mnt_lflag & MNT_LWAIT) {
1235 /*
1236 * do the wakeup here
1237 * in case we block in mount_refdrain
1238 * which will drop the mount lock
1239 * and allow anyone blocked in vfs_busy
1240 * to wakeup and see the LDEAD state
1241 */
1242 mp->mnt_lflag &= ~MNT_LWAIT;
1243 wakeup((caddr_t)mp);
1244 }
1245 mount_refdrain(mp);
1246 out:
1247 if (mp->mnt_lflag & MNT_LWAIT) {
1248 mp->mnt_lflag &= ~MNT_LWAIT;
1249 needwakeup = 1;
1250 }
1251 mount_unlock(mp);
1252 lck_rw_done(&mp->mnt_rwlock);
1253
1254 if (needwakeup)
1255 wakeup((caddr_t)mp);
1256 if (!error) {
1257 if ((coveredvp != NULLVP)) {
1258 vnode_t pvp;
1259
1260 vnode_getwithref(coveredvp);
1261 pvp = vnode_getparent(coveredvp);
1262 vnode_rele(coveredvp);
1263 vnode_lock_spin(coveredvp);
1264 if(mp->mnt_crossref == 0) {
1265 vnode_unlock(coveredvp);
1266 mount_lock_destroy(mp);
1267 #if CONFIG_MACF
1268 mac_mount_label_destroy(mp);
1269 #endif
1270 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1271 } else {
1272 coveredvp->v_lflag |= VL_MOUNTDEAD;
1273 vnode_unlock(coveredvp);
1274 }
1275 vnode_put(coveredvp);
1276
1277 if (pvp) {
1278 lock_vnode_and_post(pvp, NOTE_WRITE);
1279 vnode_put(pvp);
1280 }
1281 } else if (mp->mnt_flag & MNT_ROOTFS) {
1282 mount_lock_destroy(mp);
1283 #if CONFIG_MACF
1284 mac_mount_label_destroy(mp);
1285 #endif
1286 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1287 } else
1288 panic("dounmount: no coveredvp");
1289 }
1290 return (error);
1291 }
1292
1293 void
1294 mount_dropcrossref(mount_t mp, vnode_t dp, int need_put)
1295 {
1296 vnode_lock(dp);
1297 mp->mnt_crossref--;
1298 if (mp->mnt_crossref < 0)
1299 panic("mount cross refs -ve");
1300 if (((dp->v_lflag & VL_MOUNTDEAD) == VL_MOUNTDEAD) && (mp->mnt_crossref == 0)) {
1301 dp->v_lflag &= ~VL_MOUNTDEAD;
1302 if (need_put)
1303 vnode_put_locked(dp);
1304 vnode_unlock(dp);
1305 mount_lock_destroy(mp);
1306 #if CONFIG_MACF
1307 mac_mount_label_destroy(mp);
1308 #endif
1309 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
1310 return;
1311 }
1312 if (need_put)
1313 vnode_put_locked(dp);
1314 vnode_unlock(dp);
1315 }
1316
1317
1318 /*
1319 * Sync each mounted filesystem.
1320 */
1321 #if DIAGNOSTIC
1322 int syncprt = 0;
1323 struct ctldebug debug0 = { "syncprt", &syncprt };
1324 #endif
1325
1326 int print_vmpage_stat=0;
1327
1328 static int
1329 sync_callback(mount_t mp, void * arg)
1330 {
1331 int asyncflag;
1332
1333 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
1334 asyncflag = mp->mnt_flag & MNT_ASYNC;
1335 mp->mnt_flag &= ~MNT_ASYNC;
1336 VFS_SYNC(mp, arg ? MNT_WAIT : MNT_NOWAIT, vfs_context_current());
1337 if (asyncflag)
1338 mp->mnt_flag |= MNT_ASYNC;
1339 }
1340 return(VFS_RETURNED);
1341 }
1342
1343
1344 #include <kern/clock.h>
1345
1346 clock_sec_t sync_wait_time = 0;
1347
1348 /* ARGSUSED */
1349 int
1350 sync(__unused proc_t p, __unused struct sync_args *uap, __unused int32_t *retval)
1351 {
1352 clock_nsec_t nsecs;
1353
1354 vfs_iterate(LK_NOWAIT, sync_callback, (void *)0);
1355
1356 {
1357 static fsid_t fsid = { { 0, 0 } };
1358
1359 clock_get_calendar_microtime(&sync_wait_time, &nsecs);
1360 vfs_event_signal(&fsid, VQ_SYNCEVENT, (intptr_t)NULL);
1361 wakeup((caddr_t)&sync_wait_time);
1362 }
1363
1364 {
1365 if(print_vmpage_stat) {
1366 vm_countdirtypages();
1367 }
1368 }
1369 #if DIAGNOSTIC
1370 if (syncprt)
1371 vfs_bufstats();
1372 #endif /* DIAGNOSTIC */
1373 return (0);
1374 }
1375
1376 /*
1377 * Change filesystem quotas.
1378 */
1379 #if QUOTA
1380 static int quotactl_funneled(proc_t p, struct quotactl_args *uap, int32_t *retval);
1381
1382 int
1383 quotactl(proc_t p, struct quotactl_args *uap, int32_t *retval)
1384 {
1385 boolean_t funnel_state;
1386 int error;
1387
1388 funnel_state = thread_funnel_set(kernel_flock, TRUE);
1389 error = quotactl_funneled(p, uap, retval);
1390 thread_funnel_set(kernel_flock, funnel_state);
1391 return(error);
1392 }
1393
1394 static int
1395 quotactl_funneled(proc_t p, struct quotactl_args *uap, __unused int32_t *retval)
1396 {
1397 struct mount *mp;
1398 int error, quota_cmd, quota_status;
1399 caddr_t datap;
1400 size_t fnamelen;
1401 struct nameidata nd;
1402 vfs_context_t ctx = vfs_context_current();
1403 struct dqblk my_dqblk;
1404
1405 AUDIT_ARG(uid, uap->uid);
1406 AUDIT_ARG(cmd, uap->cmd);
1407 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
1408 UIO_USERSPACE, uap->path, ctx);
1409 error = namei(&nd);
1410 if (error)
1411 return (error);
1412 mp = nd.ni_vp->v_mount;
1413 vnode_put(nd.ni_vp);
1414 nameidone(&nd);
1415
1416 /* copyin any data we will need for downstream code */
1417 quota_cmd = uap->cmd >> SUBCMDSHIFT;
1418
1419 switch (quota_cmd) {
1420 case Q_QUOTAON:
1421 /* uap->arg specifies a file from which to take the quotas */
1422 fnamelen = MAXPATHLEN;
1423 datap = kalloc(MAXPATHLEN);
1424 error = copyinstr(uap->arg, datap, MAXPATHLEN, &fnamelen);
1425 break;
1426 case Q_GETQUOTA:
1427 /* uap->arg is a pointer to a dqblk structure. */
1428 datap = (caddr_t) &my_dqblk;
1429 break;
1430 case Q_SETQUOTA:
1431 case Q_SETUSE:
1432 /* uap->arg is a pointer to a dqblk structure. */
1433 datap = (caddr_t) &my_dqblk;
1434 if (proc_is64bit(p)) {
1435 struct user_dqblk my_dqblk64;
1436 error = copyin(uap->arg, (caddr_t)&my_dqblk64, sizeof (my_dqblk64));
1437 if (error == 0) {
1438 munge_dqblk(&my_dqblk, &my_dqblk64, FALSE);
1439 }
1440 }
1441 else {
1442 error = copyin(uap->arg, (caddr_t)&my_dqblk, sizeof (my_dqblk));
1443 }
1444 break;
1445 case Q_QUOTASTAT:
1446 /* uap->arg is a pointer to an integer */
1447 datap = (caddr_t) &quota_status;
1448 break;
1449 default:
1450 datap = NULL;
1451 break;
1452 } /* switch */
1453
1454 if (error == 0) {
1455 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, datap, ctx);
1456 }
1457
1458 switch (quota_cmd) {
1459 case Q_QUOTAON:
1460 if (datap != NULL)
1461 kfree(datap, MAXPATHLEN);
1462 break;
1463 case Q_GETQUOTA:
1464 /* uap->arg is a pointer to a dqblk structure we need to copy out to */
1465 if (error == 0) {
1466 if (proc_is64bit(p)) {
1467 struct user_dqblk my_dqblk64;
1468 munge_dqblk(&my_dqblk, &my_dqblk64, TRUE);
1469 error = copyout((caddr_t)&my_dqblk64, uap->arg, sizeof (my_dqblk64));
1470 }
1471 else {
1472 error = copyout(datap, uap->arg, sizeof (struct dqblk));
1473 }
1474 }
1475 break;
1476 case Q_QUOTASTAT:
1477 /* uap->arg is a pointer to an integer */
1478 if (error == 0) {
1479 error = copyout(datap, uap->arg, sizeof(quota_status));
1480 }
1481 break;
1482 default:
1483 break;
1484 } /* switch */
1485
1486 return (error);
1487 }
1488 #else
1489 int
1490 quotactl(__unused proc_t p, __unused struct quotactl_args *uap, __unused int32_t *retval)
1491 {
1492 return (EOPNOTSUPP);
1493 }
1494 #endif /* QUOTA */
1495
1496 /*
1497 * Get filesystem statistics.
1498 *
1499 * Returns: 0 Success
1500 * namei:???
1501 * vfs_update_vfsstat:???
1502 * munge_statfs:EFAULT
1503 */
1504 /* ARGSUSED */
1505 int
1506 statfs(__unused proc_t p, struct statfs_args *uap, __unused int32_t *retval)
1507 {
1508 struct mount *mp;
1509 struct vfsstatfs *sp;
1510 int error;
1511 struct nameidata nd;
1512 vfs_context_t ctx = vfs_context_current();
1513 vnode_t vp;
1514
1515 NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1,
1516 UIO_USERSPACE, uap->path, ctx);
1517 error = namei(&nd);
1518 if (error)
1519 return (error);
1520 vp = nd.ni_vp;
1521 mp = vp->v_mount;
1522 sp = &mp->mnt_vfsstat;
1523 nameidone(&nd);
1524
1525 error = vfs_update_vfsstat(mp, ctx, VFS_USER_EVENT);
1526 vnode_put(vp);
1527 if (error != 0)
1528 return (error);
1529
1530 error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
1531 return (error);
1532 }
1533
1534 /*
1535 * Get filesystem statistics.
1536 */
1537 /* ARGSUSED */
1538 int
1539 fstatfs(__unused proc_t p, struct fstatfs_args *uap, __unused int32_t *retval)
1540 {
1541 vnode_t vp;
1542 struct mount *mp;
1543 struct vfsstatfs *sp;
1544 int error;
1545
1546 AUDIT_ARG(fd, uap->fd);
1547
1548 if ( (error = file_vnode(uap->fd, &vp)) )
1549 return (error);
1550
1551 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
1552
1553 mp = vp->v_mount;
1554 if (!mp) {
1555 file_drop(uap->fd);
1556 return (EBADF);
1557 }
1558 sp = &mp->mnt_vfsstat;
1559 if ((error = vfs_update_vfsstat(mp,vfs_context_current(),VFS_USER_EVENT)) != 0) {
1560 file_drop(uap->fd);
1561 return (error);
1562 }
1563 file_drop(uap->fd);
1564
1565 error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
1566
1567 return (error);
1568 }
1569
1570 /*
1571 * Common routine to handle copying of statfs64 data to user space
1572 */
1573 static int
1574 statfs64_common(struct mount *mp, struct vfsstatfs *sfsp, user_addr_t bufp)
1575 {
1576 int error;
1577 struct statfs64 sfs;
1578
1579 bzero(&sfs, sizeof(sfs));
1580
1581 sfs.f_bsize = sfsp->f_bsize;
1582 sfs.f_iosize = (int32_t)sfsp->f_iosize;
1583 sfs.f_blocks = sfsp->f_blocks;
1584 sfs.f_bfree = sfsp->f_bfree;
1585 sfs.f_bavail = sfsp->f_bavail;
1586 sfs.f_files = sfsp->f_files;
1587 sfs.f_ffree = sfsp->f_ffree;
1588 sfs.f_fsid = sfsp->f_fsid;
1589 sfs.f_owner = sfsp->f_owner;
1590 sfs.f_type = mp->mnt_vtable->vfc_typenum;
1591 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1592 sfs.f_fssubtype = sfsp->f_fssubtype;
1593 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSTYPENAMELEN);
1594 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MAXPATHLEN);
1595 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MAXPATHLEN);
1596
1597 error = copyout((caddr_t)&sfs, bufp, sizeof(sfs));
1598
1599 return(error);
1600 }
1601
1602 /*
1603 * Get file system statistics in 64-bit mode
1604 */
1605 int
1606 statfs64(__unused struct proc *p, struct statfs64_args *uap, __unused int32_t *retval)
1607 {
1608 struct mount *mp;
1609 struct vfsstatfs *sp;
1610 int error;
1611 struct nameidata nd;
1612 vfs_context_t ctxp = vfs_context_current();
1613 vnode_t vp;
1614
1615 NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1,
1616 UIO_USERSPACE, uap->path, ctxp);
1617 error = namei(&nd);
1618 if (error)
1619 return (error);
1620 vp = nd.ni_vp;
1621 mp = vp->v_mount;
1622 sp = &mp->mnt_vfsstat;
1623 nameidone(&nd);
1624
1625 error = vfs_update_vfsstat(mp, ctxp, VFS_USER_EVENT);
1626 vnode_put(vp);
1627 if (error != 0)
1628 return (error);
1629
1630 error = statfs64_common(mp, sp, uap->buf);
1631
1632 return (error);
1633 }
1634
1635 /*
1636 * Get file system statistics in 64-bit mode
1637 */
1638 int
1639 fstatfs64(__unused struct proc *p, struct fstatfs64_args *uap, __unused int32_t *retval)
1640 {
1641 struct vnode *vp;
1642 struct mount *mp;
1643 struct vfsstatfs *sp;
1644 int error;
1645
1646 AUDIT_ARG(fd, uap->fd);
1647
1648 if ( (error = file_vnode(uap->fd, &vp)) )
1649 return (error);
1650
1651 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
1652
1653 mp = vp->v_mount;
1654 if (!mp) {
1655 file_drop(uap->fd);
1656 return (EBADF);
1657 }
1658 sp = &mp->mnt_vfsstat;
1659 if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
1660 file_drop(uap->fd);
1661 return (error);
1662 }
1663 file_drop(uap->fd);
1664
1665 error = statfs64_common(mp, sp, uap->buf);
1666
1667 return (error);
1668 }
1669
1670 struct getfsstat_struct {
1671 user_addr_t sfsp;
1672 user_addr_t *mp;
1673 int count;
1674 int maxcount;
1675 int flags;
1676 int error;
1677 };
1678
1679
1680 static int
1681 getfsstat_callback(mount_t mp, void * arg)
1682 {
1683
1684 struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
1685 struct vfsstatfs *sp;
1686 int error, my_size;
1687 vfs_context_t ctx = vfs_context_current();
1688
1689 if (fstp->sfsp && fstp->count < fstp->maxcount) {
1690 sp = &mp->mnt_vfsstat;
1691 /*
1692 * If MNT_NOWAIT is specified, do not refresh the
1693 * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT.
1694 */
1695 if (((fstp->flags & MNT_NOWAIT) == 0 || (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
1696 (error = vfs_update_vfsstat(mp, ctx,
1697 VFS_USER_EVENT))) {
1698 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
1699 return(VFS_RETURNED);
1700 }
1701
1702 /*
1703 * Need to handle LP64 version of struct statfs
1704 */
1705 error = munge_statfs(mp, sp, fstp->sfsp, &my_size, IS_64BIT_PROCESS(vfs_context_proc(ctx)), FALSE);
1706 if (error) {
1707 fstp->error = error;
1708 return(VFS_RETURNED_DONE);
1709 }
1710 fstp->sfsp += my_size;
1711
1712 if (fstp->mp) {
1713 error = mac_mount_label_get(mp, *fstp->mp);
1714 if (error) {
1715 fstp->error = error;
1716 return(VFS_RETURNED_DONE);
1717 }
1718 fstp->mp++;
1719 }
1720 }
1721 fstp->count++;
1722 return(VFS_RETURNED);
1723 }
1724
1725 /*
1726 * Get statistics on all filesystems.
1727 */
1728 int
1729 getfsstat(__unused proc_t p, struct getfsstat_args *uap, int *retval)
1730 {
1731 struct __mac_getfsstat_args muap;
1732
1733 muap.buf = uap->buf;
1734 muap.bufsize = uap->bufsize;
1735 muap.mac = USER_ADDR_NULL;
1736 muap.macsize = 0;
1737 muap.flags = uap->flags;
1738
1739 return (__mac_getfsstat(p, &muap, retval));
1740 }
1741
1742 /*
1743 * __mac_getfsstat: Get MAC-related file system statistics
1744 *
1745 * Parameters: p (ignored)
1746 * uap User argument descriptor (see below)
1747 * retval Count of file system statistics (N stats)
1748 *
1749 * Indirect: uap->bufsize Buffer size
1750 * uap->macsize MAC info size
1751 * uap->buf Buffer where information will be returned
1752 * uap->mac MAC info
1753 * uap->flags File system flags
1754 *
1755 *
1756 * Returns: 0 Success
1757 * !0 Not success
1758 *
1759 */
1760 int
1761 __mac_getfsstat(__unused proc_t p, struct __mac_getfsstat_args *uap, int *retval)
1762 {
1763 user_addr_t sfsp;
1764 user_addr_t *mp;
1765 size_t count, maxcount, bufsize, macsize;
1766 struct getfsstat_struct fst;
1767
1768 bufsize = (size_t) uap->bufsize;
1769 macsize = (size_t) uap->macsize;
1770
1771 if (IS_64BIT_PROCESS(p)) {
1772 maxcount = bufsize / sizeof(struct user64_statfs);
1773 }
1774 else {
1775 maxcount = bufsize / sizeof(struct user32_statfs);
1776 }
1777 sfsp = uap->buf;
1778 count = 0;
1779
1780 mp = NULL;
1781
1782 #if CONFIG_MACF
1783 if (uap->mac != USER_ADDR_NULL) {
1784 u_int32_t *mp0;
1785 int error;
1786 unsigned int i;
1787
1788 count = (macsize / (IS_64BIT_PROCESS(p) ? 8 : 4));
1789 if (count != maxcount)
1790 return (EINVAL);
1791
1792 /* Copy in the array */
1793 MALLOC(mp0, u_int32_t *, macsize, M_MACTEMP, M_WAITOK);
1794 if (mp0 == NULL) {
1795 return (ENOMEM);
1796 }
1797
1798 error = copyin(uap->mac, mp0, macsize);
1799 if (error) {
1800 FREE(mp0, M_MACTEMP);
1801 return (error);
1802 }
1803
1804 /* Normalize to an array of user_addr_t */
1805 MALLOC(mp, user_addr_t *, count * sizeof(user_addr_t), M_MACTEMP, M_WAITOK);
1806 if (mp == NULL) {
1807 FREE(mp0, M_MACTEMP);
1808 return (ENOMEM);
1809 }
1810
1811 for (i = 0; i < count; i++) {
1812 if (IS_64BIT_PROCESS(p))
1813 mp[i] = ((user_addr_t *)mp0)[i];
1814 else
1815 mp[i] = (user_addr_t)mp0[i];
1816 }
1817 FREE(mp0, M_MACTEMP);
1818 }
1819 #endif
1820
1821
1822 fst.sfsp = sfsp;
1823 fst.mp = mp;
1824 fst.flags = uap->flags;
1825 fst.count = 0;
1826 fst.error = 0;
1827 fst.maxcount = maxcount;
1828
1829
1830 vfs_iterate(0, getfsstat_callback, &fst);
1831
1832 if (mp)
1833 FREE(mp, M_MACTEMP);
1834
1835 if (fst.error ) {
1836 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
1837 return(fst.error);
1838 }
1839
1840 if (fst.sfsp && fst.count > fst.maxcount)
1841 *retval = fst.maxcount;
1842 else
1843 *retval = fst.count;
1844 return (0);
1845 }
1846
1847 static int
1848 getfsstat64_callback(mount_t mp, void * arg)
1849 {
1850 struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
1851 struct vfsstatfs *sp;
1852 int error;
1853
1854 if (fstp->sfsp && fstp->count < fstp->maxcount) {
1855 sp = &mp->mnt_vfsstat;
1856 /*
1857 * If MNT_NOWAIT is specified, do not refresh the fsstat
1858 * cache. MNT_WAIT overrides MNT_NOWAIT.
1859 *
1860 * We treat MNT_DWAIT as MNT_WAIT for all instances of
1861 * getfsstat, since the constants are out of the same
1862 * namespace.
1863 */
1864 if (((fstp->flags & MNT_NOWAIT) == 0 ||
1865 (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
1866 (error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT))) {
1867 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
1868 return(VFS_RETURNED);
1869 }
1870
1871 error = statfs64_common(mp, sp, fstp->sfsp);
1872 if (error) {
1873 fstp->error = error;
1874 return(VFS_RETURNED_DONE);
1875 }
1876 fstp->sfsp += sizeof(struct statfs64);
1877 }
1878 fstp->count++;
1879 return(VFS_RETURNED);
1880 }
1881
1882 /*
1883 * Get statistics on all file systems in 64 bit mode.
1884 */
1885 int
1886 getfsstat64(__unused proc_t p, struct getfsstat64_args *uap, int *retval)
1887 {
1888 user_addr_t sfsp;
1889 int count, maxcount;
1890 struct getfsstat_struct fst;
1891
1892 maxcount = uap->bufsize / sizeof(struct statfs64);
1893
1894 sfsp = uap->buf;
1895 count = 0;
1896
1897 fst.sfsp = sfsp;
1898 fst.flags = uap->flags;
1899 fst.count = 0;
1900 fst.error = 0;
1901 fst.maxcount = maxcount;
1902
1903 vfs_iterate(0, getfsstat64_callback, &fst);
1904
1905 if (fst.error ) {
1906 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
1907 return(fst.error);
1908 }
1909
1910 if (fst.sfsp && fst.count > fst.maxcount)
1911 *retval = fst.maxcount;
1912 else
1913 *retval = fst.count;
1914
1915 return (0);
1916 }
1917
1918 /*
1919 * Change current working directory to a given file descriptor.
1920 */
1921 /* ARGSUSED */
1922 static int
1923 common_fchdir(proc_t p, struct fchdir_args *uap, int per_thread)
1924 {
1925 struct filedesc *fdp = p->p_fd;
1926 vnode_t vp;
1927 vnode_t tdp;
1928 vnode_t tvp;
1929 struct mount *mp;
1930 int error;
1931 vfs_context_t ctx = vfs_context_current();
1932
1933 AUDIT_ARG(fd, uap->fd);
1934 if (per_thread && uap->fd == -1) {
1935 /*
1936 * Switching back from per-thread to per process CWD; verify we
1937 * in fact have one before proceeding. The only success case
1938 * for this code path is to return 0 preemptively after zapping
1939 * the thread structure contents.
1940 */
1941 thread_t th = vfs_context_thread(ctx);
1942 if (th) {
1943 uthread_t uth = get_bsdthread_info(th);
1944 tvp = uth->uu_cdir;
1945 uth->uu_cdir = NULLVP;
1946 if (tvp != NULLVP) {
1947 vnode_rele(tvp);
1948 return (0);
1949 }
1950 }
1951 return (EBADF);
1952 }
1953
1954 if ( (error = file_vnode(uap->fd, &vp)) )
1955 return(error);
1956 if ( (error = vnode_getwithref(vp)) ) {
1957 file_drop(uap->fd);
1958 return(error);
1959 }
1960
1961 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
1962
1963 if (vp->v_type != VDIR) {
1964 error = ENOTDIR;
1965 goto out;
1966 }
1967
1968 #if CONFIG_MACF
1969 error = mac_vnode_check_chdir(ctx, vp);
1970 if (error)
1971 goto out;
1972 #endif
1973 error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
1974 if (error)
1975 goto out;
1976
1977 while (!error && (mp = vp->v_mountedhere) != NULL) {
1978 if (vfs_busy(mp, LK_NOWAIT)) {
1979 error = EACCES;
1980 goto out;
1981 }
1982 error = VFS_ROOT(mp, &tdp, ctx);
1983 vfs_unbusy(mp);
1984 if (error)
1985 break;
1986 vnode_put(vp);
1987 vp = tdp;
1988 }
1989 if (error)
1990 goto out;
1991 if ( (error = vnode_ref(vp)) )
1992 goto out;
1993 vnode_put(vp);
1994
1995 if (per_thread) {
1996 thread_t th = vfs_context_thread(ctx);
1997 if (th) {
1998 uthread_t uth = get_bsdthread_info(th);
1999 tvp = uth->uu_cdir;
2000 uth->uu_cdir = vp;
2001 OSBitOrAtomic(P_THCWD, &p->p_flag);
2002 } else {
2003 vnode_rele(vp);
2004 return (ENOENT);
2005 }
2006 } else {
2007 proc_fdlock(p);
2008 tvp = fdp->fd_cdir;
2009 fdp->fd_cdir = vp;
2010 proc_fdunlock(p);
2011 }
2012
2013 if (tvp)
2014 vnode_rele(tvp);
2015 file_drop(uap->fd);
2016
2017 return (0);
2018 out:
2019 vnode_put(vp);
2020 file_drop(uap->fd);
2021
2022 return(error);
2023 }
2024
2025 int
2026 fchdir(proc_t p, struct fchdir_args *uap, __unused int32_t *retval)
2027 {
2028 return common_fchdir(p, uap, 0);
2029 }
2030
2031 int
2032 __pthread_fchdir(proc_t p, struct __pthread_fchdir_args *uap, __unused int32_t *retval)
2033 {
2034 return common_fchdir(p, (void *)uap, 1);
2035 }
2036
2037 /*
2038 * Change current working directory (".").
2039 *
2040 * Returns: 0 Success
2041 * change_dir:ENOTDIR
2042 * change_dir:???
2043 * vnode_ref:ENOENT No such file or directory
2044 */
2045 /* ARGSUSED */
2046 static int
2047 common_chdir(proc_t p, struct chdir_args *uap, int per_thread)
2048 {
2049 struct filedesc *fdp = p->p_fd;
2050 int error;
2051 struct nameidata nd;
2052 vnode_t tvp;
2053 vfs_context_t ctx = vfs_context_current();
2054
2055 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
2056 UIO_USERSPACE, uap->path, ctx);
2057 error = change_dir(&nd, ctx);
2058 if (error)
2059 return (error);
2060 if ( (error = vnode_ref(nd.ni_vp)) ) {
2061 vnode_put(nd.ni_vp);
2062 return (error);
2063 }
2064 /*
2065 * drop the iocount we picked up in change_dir
2066 */
2067 vnode_put(nd.ni_vp);
2068
2069 if (per_thread) {
2070 thread_t th = vfs_context_thread(ctx);
2071 if (th) {
2072 uthread_t uth = get_bsdthread_info(th);
2073 tvp = uth->uu_cdir;
2074 uth->uu_cdir = nd.ni_vp;
2075 OSBitOrAtomic(P_THCWD, &p->p_flag);
2076 } else {
2077 vnode_rele(nd.ni_vp);
2078 return (ENOENT);
2079 }
2080 } else {
2081 proc_fdlock(p);
2082 tvp = fdp->fd_cdir;
2083 fdp->fd_cdir = nd.ni_vp;
2084 proc_fdunlock(p);
2085 }
2086
2087 if (tvp)
2088 vnode_rele(tvp);
2089
2090 return (0);
2091 }
2092
2093
2094 /*
2095 * chdir
2096 *
2097 * Change current working directory (".") for the entire process
2098 *
2099 * Parameters: p Process requesting the call
2100 * uap User argument descriptor (see below)
2101 * retval (ignored)
2102 *
2103 * Indirect parameters: uap->path Directory path
2104 *
2105 * Returns: 0 Success
2106 * common_chdir: ENOTDIR
2107 * common_chdir: ENOENT No such file or directory
2108 * common_chdir: ???
2109 *
2110 */
2111 int
2112 chdir(proc_t p, struct chdir_args *uap, __unused int32_t *retval)
2113 {
2114 return common_chdir(p, (void *)uap, 0);
2115 }
2116
2117 /*
2118 * __pthread_chdir
2119 *
2120 * Change current working directory (".") for a single thread
2121 *
2122 * Parameters: p Process requesting the call
2123 * uap User argument descriptor (see below)
2124 * retval (ignored)
2125 *
2126 * Indirect parameters: uap->path Directory path
2127 *
2128 * Returns: 0 Success
2129 * common_chdir: ENOTDIR
2130 * common_chdir: ENOENT No such file or directory
2131 * common_chdir: ???
2132 *
2133 */
2134 int
2135 __pthread_chdir(proc_t p, struct __pthread_chdir_args *uap, __unused int32_t *retval)
2136 {
2137 return common_chdir(p, (void *)uap, 1);
2138 }
2139
2140
2141 /*
2142 * Change notion of root (``/'') directory.
2143 */
2144 /* ARGSUSED */
2145 int
2146 chroot(proc_t p, struct chroot_args *uap, __unused int32_t *retval)
2147 {
2148 struct filedesc *fdp = p->p_fd;
2149 int error;
2150 struct nameidata nd;
2151 vnode_t tvp;
2152 vfs_context_t ctx = vfs_context_current();
2153
2154 if ((error = suser(kauth_cred_get(), &p->p_acflag)))
2155 return (error);
2156
2157 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
2158 UIO_USERSPACE, uap->path, ctx);
2159 error = change_dir(&nd, ctx);
2160 if (error)
2161 return (error);
2162
2163 #if CONFIG_MACF
2164 error = mac_vnode_check_chroot(ctx, nd.ni_vp,
2165 &nd.ni_cnd);
2166 if (error) {
2167 vnode_put(nd.ni_vp);
2168 return (error);
2169 }
2170 #endif
2171
2172 if ( (error = vnode_ref(nd.ni_vp)) ) {
2173 vnode_put(nd.ni_vp);
2174 return (error);
2175 }
2176 vnode_put(nd.ni_vp);
2177
2178 proc_fdlock(p);
2179 tvp = fdp->fd_rdir;
2180 fdp->fd_rdir = nd.ni_vp;
2181 fdp->fd_flags |= FD_CHROOT;
2182 proc_fdunlock(p);
2183
2184 if (tvp != NULL)
2185 vnode_rele(tvp);
2186
2187 return (0);
2188 }
2189
2190 /*
2191 * Common routine for chroot and chdir.
2192 *
2193 * Returns: 0 Success
2194 * ENOTDIR Not a directory
2195 * namei:??? [anything namei can return]
2196 * vnode_authorize:??? [anything vnode_authorize can return]
2197 */
2198 static int
2199 change_dir(struct nameidata *ndp, vfs_context_t ctx)
2200 {
2201 vnode_t vp;
2202 int error;
2203
2204 if ((error = namei(ndp)))
2205 return (error);
2206 nameidone(ndp);
2207 vp = ndp->ni_vp;
2208
2209 if (vp->v_type != VDIR) {
2210 vnode_put(vp);
2211 return (ENOTDIR);
2212 }
2213
2214 #if CONFIG_MACF
2215 error = mac_vnode_check_chdir(ctx, vp);
2216 if (error) {
2217 vnode_put(vp);
2218 return (error);
2219 }
2220 #endif
2221
2222 error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
2223 if (error) {
2224 vnode_put(vp);
2225 return (error);
2226 }
2227
2228 return (error);
2229 }
2230
2231 /*
2232 * Check permissions, allocate an open file structure,
2233 * and call the device open routine if any.
2234 *
2235 * Returns: 0 Success
2236 * EINVAL
2237 * EINTR
2238 * falloc:ENFILE
2239 * falloc:EMFILE
2240 * falloc:ENOMEM
2241 * vn_open_auth:???
2242 * dupfdopen:???
2243 * VNOP_ADVLOCK:???
2244 * vnode_setsize:???
2245 *
2246 * XXX Need to implement uid, gid
2247 */
2248 int
2249 open1(vfs_context_t ctx, struct nameidata *ndp, int uflags, struct vnode_attr *vap, int32_t *retval)
2250 {
2251 proc_t p = vfs_context_proc(ctx);
2252 uthread_t uu = get_bsdthread_info(vfs_context_thread(ctx));
2253 struct filedesc *fdp = p->p_fd;
2254 struct fileproc *fp;
2255 vnode_t vp;
2256 int flags, oflags;
2257 struct fileproc *nfp;
2258 int type, indx, error;
2259 struct flock lf;
2260 int no_controlling_tty = 0;
2261 int deny_controlling_tty = 0;
2262 struct session *sessp = SESSION_NULL;
2263 struct vfs_context context = *vfs_context_current(); /* local copy */
2264
2265 oflags = uflags;
2266
2267 if ((oflags & O_ACCMODE) == O_ACCMODE)
2268 return(EINVAL);
2269 flags = FFLAGS(uflags);
2270
2271 AUDIT_ARG(fflags, oflags);
2272 AUDIT_ARG(mode, vap->va_mode);
2273
2274 if ( (error = falloc(p, &nfp, &indx, ctx)) ) {
2275 return (error);
2276 }
2277 fp = nfp;
2278 uu->uu_dupfd = -indx - 1;
2279
2280 if (!(p->p_flag & P_CONTROLT)) {
2281 sessp = proc_session(p);
2282 no_controlling_tty = 1;
2283 /*
2284 * If conditions would warrant getting a controlling tty if
2285 * the device being opened is a tty (see ttyopen in tty.c),
2286 * but the open flags deny it, set a flag in the session to
2287 * prevent it.
2288 */
2289 if (SESS_LEADER(p, sessp) &&
2290 sessp->s_ttyvp == NULL &&
2291 (flags & O_NOCTTY)) {
2292 session_lock(sessp);
2293 sessp->s_flags |= S_NOCTTY;
2294 session_unlock(sessp);
2295 deny_controlling_tty = 1;
2296 }
2297 }
2298
2299 if ((error = vn_open_auth(ndp, &flags, vap))) {
2300 if ((error == ENODEV || error == ENXIO) && (uu->uu_dupfd >= 0)){ /* XXX from fdopen */
2301 if ((error = dupfdopen(fdp, indx, uu->uu_dupfd, flags, error)) == 0) {
2302 fp_drop(p, indx, NULL, 0);
2303 *retval = indx;
2304 if (deny_controlling_tty) {
2305 session_lock(sessp);
2306 sessp->s_flags &= ~S_NOCTTY;
2307 session_unlock(sessp);
2308 }
2309 if (sessp != SESSION_NULL)
2310 session_rele(sessp);
2311 return (0);
2312 }
2313 }
2314 if (error == ERESTART)
2315 error = EINTR;
2316 fp_free(p, indx, fp);
2317
2318 if (deny_controlling_tty) {
2319 session_lock(sessp);
2320 sessp->s_flags &= ~S_NOCTTY;
2321 session_unlock(sessp);
2322 }
2323 if (sessp != SESSION_NULL)
2324 session_rele(sessp);
2325 return (error);
2326 }
2327 uu->uu_dupfd = 0;
2328 vp = ndp->ni_vp;
2329
2330 fp->f_fglob->fg_flag = flags & (FMASK | O_EVTONLY);
2331 fp->f_fglob->fg_type = DTYPE_VNODE;
2332 fp->f_fglob->fg_ops = &vnops;
2333 fp->f_fglob->fg_data = (caddr_t)vp;
2334
2335 if (flags & (O_EXLOCK | O_SHLOCK)) {
2336 lf.l_whence = SEEK_SET;
2337 lf.l_start = 0;
2338 lf.l_len = 0;
2339 if (flags & O_EXLOCK)
2340 lf.l_type = F_WRLCK;
2341 else
2342 lf.l_type = F_RDLCK;
2343 type = F_FLOCK;
2344 if ((flags & FNONBLOCK) == 0)
2345 type |= F_WAIT;
2346 #if CONFIG_MACF
2347 error = mac_file_check_lock(vfs_context_ucred(ctx), fp->f_fglob,
2348 F_SETLK, &lf);
2349 if (error)
2350 goto bad;
2351 #endif
2352 if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx)))
2353 goto bad;
2354 fp->f_fglob->fg_flag |= FHASLOCK;
2355 }
2356
2357 /* try to truncate by setting the size attribute */
2358 if ((flags & O_TRUNC) && ((error = vnode_setsize(vp, (off_t)0, 0, ctx)) != 0))
2359 goto bad;
2360
2361 /*
2362 * If the open flags denied the acquisition of a controlling tty,
2363 * clear the flag in the session structure that prevented the lower
2364 * level code from assigning one.
2365 */
2366 if (deny_controlling_tty) {
2367 session_lock(sessp);
2368 sessp->s_flags &= ~S_NOCTTY;
2369 session_unlock(sessp);
2370 }
2371
2372 /*
2373 * If a controlling tty was set by the tty line discipline, then we
2374 * want to set the vp of the tty into the session structure. We have
2375 * a race here because we can't get to the vp for the tp in ttyopen,
2376 * because it's not passed as a parameter in the open path.
2377 */
2378 if (no_controlling_tty && (p->p_flag & P_CONTROLT)) {
2379 vnode_t ttyvp;
2380 vnode_ref(vp);
2381 session_lock(sessp);
2382 ttyvp = sessp->s_ttyvp;
2383 sessp->s_ttyvp = vp;
2384 sessp->s_ttyvid = vnode_vid(vp);
2385 session_unlock(sessp);
2386 if (ttyvp != NULLVP)
2387 vnode_rele(ttyvp);
2388 }
2389
2390 vnode_put(vp);
2391
2392 proc_fdlock(p);
2393 procfdtbl_releasefd(p, indx, NULL);
2394 fp_drop(p, indx, fp, 1);
2395 proc_fdunlock(p);
2396
2397 *retval = indx;
2398
2399 if (sessp != SESSION_NULL)
2400 session_rele(sessp);
2401 return (0);
2402 bad:
2403 if (deny_controlling_tty) {
2404 session_lock(sessp);
2405 sessp->s_flags &= ~S_NOCTTY;
2406 session_unlock(sessp);
2407 }
2408 if (sessp != SESSION_NULL)
2409 session_rele(sessp);
2410
2411 /* Modify local copy (to not damage thread copy) */
2412 context.vc_ucred = fp->f_fglob->fg_cred;
2413
2414 vn_close(vp, fp->f_fglob->fg_flag, &context);
2415 vnode_put(vp);
2416 fp_free(p, indx, fp);
2417
2418 return (error);
2419
2420 }
2421
2422 /*
2423 * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)).
2424 *
2425 * Parameters: p Process requesting the open
2426 * uap User argument descriptor (see below)
2427 * retval Pointer to an area to receive the
2428 * return calue from the system call
2429 *
2430 * Indirect: uap->path Path to open (same as 'open')
2431 * uap->flags Flags to open (same as 'open'
2432 * uap->uid UID to set, if creating
2433 * uap->gid GID to set, if creating
2434 * uap->mode File mode, if creating (same as 'open')
2435 * uap->xsecurity ACL to set, if creating
2436 *
2437 * Returns: 0 Success
2438 * !0 errno value
2439 *
2440 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
2441 *
2442 * XXX: We should enummerate the possible errno values here, and where
2443 * in the code they originated.
2444 */
2445 int
2446 open_extended(proc_t p, struct open_extended_args *uap, int32_t *retval)
2447 {
2448 struct filedesc *fdp = p->p_fd;
2449 int ciferror;
2450 kauth_filesec_t xsecdst;
2451 struct vnode_attr va;
2452 struct nameidata nd;
2453 int cmode;
2454
2455 AUDIT_ARG(owner, uap->uid, uap->gid);
2456
2457 xsecdst = NULL;
2458 if ((uap->xsecurity != USER_ADDR_NULL) &&
2459 ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
2460 return ciferror;
2461
2462 VATTR_INIT(&va);
2463 cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
2464 VATTR_SET(&va, va_mode, cmode);
2465 if (uap->uid != KAUTH_UID_NONE)
2466 VATTR_SET(&va, va_uid, uap->uid);
2467 if (uap->gid != KAUTH_GID_NONE)
2468 VATTR_SET(&va, va_gid, uap->gid);
2469 if (xsecdst != NULL)
2470 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
2471
2472 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, uap->path, vfs_context_current());
2473
2474 ciferror = open1(vfs_context_current(), &nd, uap->flags, &va, retval);
2475 if (xsecdst != NULL)
2476 kauth_filesec_free(xsecdst);
2477
2478 return ciferror;
2479 }
2480
2481 int
2482 open(proc_t p, struct open_args *uap, int32_t *retval)
2483 {
2484 __pthread_testcancel(1);
2485 return(open_nocancel(p, (struct open_nocancel_args *)uap, retval));
2486 }
2487
2488 int
2489 open_nocancel(proc_t p, struct open_nocancel_args *uap, int32_t *retval)
2490 {
2491 struct filedesc *fdp = p->p_fd;
2492 struct vnode_attr va;
2493 struct nameidata nd;
2494 int cmode;
2495
2496 VATTR_INIT(&va);
2497 /* Mask off all but regular access permissions */
2498 cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
2499 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
2500
2501 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, uap->path, vfs_context_current());
2502
2503 return(open1(vfs_context_current(), &nd, uap->flags, &va, retval));
2504 }
2505
2506
2507 /*
2508 * Create a special file.
2509 */
2510 static int mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap);
2511
2512 int
2513 mknod(proc_t p, struct mknod_args *uap, __unused int32_t *retval)
2514 {
2515 struct vnode_attr va;
2516 vfs_context_t ctx = vfs_context_current();
2517 int error;
2518 int whiteout = 0;
2519 struct nameidata nd;
2520 vnode_t vp, dvp;
2521
2522 VATTR_INIT(&va);
2523 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
2524 VATTR_SET(&va, va_rdev, uap->dev);
2525
2526 /* If it's a mknod() of a FIFO, call mkfifo1() instead */
2527 if ((uap->mode & S_IFMT) == S_IFIFO)
2528 return(mkfifo1(ctx, uap->path, &va));
2529
2530 AUDIT_ARG(mode, uap->mode);
2531 AUDIT_ARG(value32, uap->dev);
2532
2533 if ((error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
2534 return (error);
2535 NDINIT(&nd, CREATE, LOCKPARENT | AUDITVNPATH1,
2536 UIO_USERSPACE, uap->path, ctx);
2537 error = namei(&nd);
2538 if (error)
2539 return (error);
2540 dvp = nd.ni_dvp;
2541 vp = nd.ni_vp;
2542
2543 if (vp != NULL) {
2544 error = EEXIST;
2545 goto out;
2546 }
2547
2548 switch (uap->mode & S_IFMT) {
2549 case S_IFMT: /* used by badsect to flag bad sectors */
2550 VATTR_SET(&va, va_type, VBAD);
2551 break;
2552 case S_IFCHR:
2553 VATTR_SET(&va, va_type, VCHR);
2554 break;
2555 case S_IFBLK:
2556 VATTR_SET(&va, va_type, VBLK);
2557 break;
2558 case S_IFWHT:
2559 whiteout = 1;
2560 break;
2561 default:
2562 error = EINVAL;
2563 goto out;
2564 }
2565
2566 #if CONFIG_MACF
2567 if (!whiteout) {
2568 error = mac_vnode_check_create(ctx,
2569 nd.ni_dvp, &nd.ni_cnd, &va);
2570 if (error)
2571 goto out;
2572 }
2573 #endif
2574
2575 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
2576 goto out;
2577
2578 if (whiteout) {
2579 error = VNOP_WHITEOUT(dvp, &nd.ni_cnd, CREATE, ctx);
2580 } else {
2581 error = vn_create(dvp, &vp, &nd.ni_cnd, &va, 0, ctx);
2582 }
2583 if (error)
2584 goto out;
2585
2586 if (vp) {
2587 int update_flags = 0;
2588
2589 // Make sure the name & parent pointers are hooked up
2590 if (vp->v_name == NULL)
2591 update_flags |= VNODE_UPDATE_NAME;
2592 if (vp->v_parent == NULLVP)
2593 update_flags |= VNODE_UPDATE_PARENT;
2594
2595 if (update_flags)
2596 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
2597
2598 #if CONFIG_FSE
2599 add_fsevent(FSE_CREATE_FILE, ctx,
2600 FSE_ARG_VNODE, vp,
2601 FSE_ARG_DONE);
2602 #endif
2603 }
2604
2605 out:
2606 /*
2607 * nameidone has to happen before we vnode_put(dvp)
2608 * since it may need to release the fs_nodelock on the dvp
2609 */
2610 nameidone(&nd);
2611
2612 if (vp)
2613 vnode_put(vp);
2614 vnode_put(dvp);
2615
2616 return (error);
2617 }
2618
2619 /*
2620 * Create a named pipe.
2621 *
2622 * Returns: 0 Success
2623 * EEXIST
2624 * namei:???
2625 * vnode_authorize:???
2626 * vn_create:???
2627 */
2628 static int
2629 mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap)
2630 {
2631 vnode_t vp, dvp;
2632 int error;
2633 struct nameidata nd;
2634
2635 NDINIT(&nd, CREATE, LOCKPARENT | AUDITVNPATH1,
2636 UIO_USERSPACE, upath, ctx);
2637 error = namei(&nd);
2638 if (error)
2639 return (error);
2640 dvp = nd.ni_dvp;
2641 vp = nd.ni_vp;
2642
2643 /* check that this is a new file and authorize addition */
2644 if (vp != NULL) {
2645 error = EEXIST;
2646 goto out;
2647 }
2648 VATTR_SET(vap, va_type, VFIFO);
2649
2650 #if CONFIG_MACF
2651 error = mac_vnode_check_create(ctx, nd.ni_dvp,
2652 &nd.ni_cnd, vap);
2653 if (error)
2654 goto out;
2655 #endif
2656
2657
2658 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
2659 goto out;
2660
2661
2662 error = vn_create(dvp, &vp, &nd.ni_cnd, vap, 0, ctx);
2663 out:
2664 /*
2665 * nameidone has to happen before we vnode_put(dvp)
2666 * since it may need to release the fs_nodelock on the dvp
2667 */
2668 nameidone(&nd);
2669
2670 if (vp)
2671 vnode_put(vp);
2672 vnode_put(dvp);
2673
2674 return error;
2675 }
2676
2677
2678 /*
2679 * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)).
2680 *
2681 * Parameters: p Process requesting the open
2682 * uap User argument descriptor (see below)
2683 * retval (Ignored)
2684 *
2685 * Indirect: uap->path Path to fifo (same as 'mkfifo')
2686 * uap->uid UID to set
2687 * uap->gid GID to set
2688 * uap->mode File mode to set (same as 'mkfifo')
2689 * uap->xsecurity ACL to set, if creating
2690 *
2691 * Returns: 0 Success
2692 * !0 errno value
2693 *
2694 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
2695 *
2696 * XXX: We should enummerate the possible errno values here, and where
2697 * in the code they originated.
2698 */
2699 int
2700 mkfifo_extended(proc_t p, struct mkfifo_extended_args *uap, __unused int32_t *retval)
2701 {
2702 int ciferror;
2703 kauth_filesec_t xsecdst;
2704 struct vnode_attr va;
2705
2706 AUDIT_ARG(owner, uap->uid, uap->gid);
2707
2708 xsecdst = KAUTH_FILESEC_NONE;
2709 if (uap->xsecurity != USER_ADDR_NULL) {
2710 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
2711 return ciferror;
2712 }
2713
2714 VATTR_INIT(&va);
2715 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
2716 if (uap->uid != KAUTH_UID_NONE)
2717 VATTR_SET(&va, va_uid, uap->uid);
2718 if (uap->gid != KAUTH_GID_NONE)
2719 VATTR_SET(&va, va_gid, uap->gid);
2720 if (xsecdst != KAUTH_FILESEC_NONE)
2721 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
2722
2723 ciferror = mkfifo1(vfs_context_current(), uap->path, &va);
2724
2725 if (xsecdst != KAUTH_FILESEC_NONE)
2726 kauth_filesec_free(xsecdst);
2727 return ciferror;
2728 }
2729
2730 /* ARGSUSED */
2731 int
2732 mkfifo(proc_t p, struct mkfifo_args *uap, __unused int32_t *retval)
2733 {
2734 struct vnode_attr va;
2735
2736 VATTR_INIT(&va);
2737 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
2738
2739 return(mkfifo1(vfs_context_current(), uap->path, &va));
2740 }
2741
2742
2743 static char *
2744 my_strrchr(char *p, int ch)
2745 {
2746 char *save;
2747
2748 for (save = NULL;; ++p) {
2749 if (*p == ch)
2750 save = p;
2751 if (!*p)
2752 return(save);
2753 }
2754 /* NOTREACHED */
2755 }
2756
2757 extern int safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path);
2758
2759 int
2760 safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path)
2761 {
2762 int ret, len = _len;
2763
2764 *truncated_path = 0;
2765 ret = vn_getpath(dvp, path, &len);
2766 if (ret == 0 && len < (MAXPATHLEN - 1)) {
2767 if (leafname) {
2768 path[len-1] = '/';
2769 len += strlcpy(&path[len], leafname, MAXPATHLEN-len) + 1;
2770 if (len > MAXPATHLEN) {
2771 char *ptr;
2772
2773 // the string got truncated!
2774 *truncated_path = 1;
2775 ptr = my_strrchr(path, '/');
2776 if (ptr) {
2777 *ptr = '\0'; // chop off the string at the last directory component
2778 }
2779 len = strlen(path) + 1;
2780 }
2781 }
2782 } else if (ret == 0) {
2783 *truncated_path = 1;
2784 } else if (ret != 0) {
2785 struct vnode *mydvp=dvp;
2786
2787 if (ret != ENOSPC) {
2788 printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n",
2789 dvp, dvp->v_name ? dvp->v_name : "no-name", ret);
2790 }
2791 *truncated_path = 1;
2792
2793 do {
2794 if (mydvp->v_parent != NULL) {
2795 mydvp = mydvp->v_parent;
2796 } else if (mydvp->v_mount) {
2797 strlcpy(path, mydvp->v_mount->mnt_vfsstat.f_mntonname, _len);
2798 break;
2799 } else {
2800 // no parent and no mount point? only thing is to punt and say "/" changed
2801 strlcpy(path, "/", _len);
2802 len = 2;
2803 mydvp = NULL;
2804 }
2805
2806 if (mydvp == NULL) {
2807 break;
2808 }
2809
2810 len = _len;
2811 ret = vn_getpath(mydvp, path, &len);
2812 } while (ret == ENOSPC);
2813 }
2814
2815 return len;
2816 }
2817
2818
2819 /*
2820 * Make a hard file link.
2821 *
2822 * Returns: 0 Success
2823 * EPERM
2824 * EEXIST
2825 * EXDEV
2826 * namei:???
2827 * vnode_authorize:???
2828 * VNOP_LINK:???
2829 */
2830 /* ARGSUSED */
2831 int
2832 link(__unused proc_t p, struct link_args *uap, __unused int32_t *retval)
2833 {
2834 vnode_t vp, dvp, lvp;
2835 struct nameidata nd;
2836 vfs_context_t ctx = vfs_context_current();
2837 int error;
2838 #if CONFIG_FSE
2839 fse_info finfo;
2840 #endif
2841 int need_event, has_listeners;
2842 char *target_path = NULL;
2843 int truncated=0;
2844
2845 vp = dvp = lvp = NULLVP;
2846
2847 /* look up the object we are linking to */
2848 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
2849 UIO_USERSPACE, uap->path, ctx);
2850 error = namei(&nd);
2851 if (error)
2852 return (error);
2853 vp = nd.ni_vp;
2854
2855 nameidone(&nd);
2856
2857 /*
2858 * Normally, linking to directories is not supported.
2859 * However, some file systems may have limited support.
2860 */
2861 if (vp->v_type == VDIR) {
2862 if (!(vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSDIRLINKS)) {
2863 error = EPERM; /* POSIX */
2864 goto out;
2865 }
2866 /* Linking to a directory requires ownership. */
2867 if (!kauth_cred_issuser(vfs_context_ucred(ctx))) {
2868 struct vnode_attr dva;
2869
2870 VATTR_INIT(&dva);
2871 VATTR_WANTED(&dva, va_uid);
2872 if (vnode_getattr(vp, &dva, ctx) != 0 ||
2873 !VATTR_IS_SUPPORTED(&dva, va_uid) ||
2874 (dva.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)))) {
2875 error = EACCES;
2876 goto out;
2877 }
2878 }
2879 }
2880
2881 /* lookup the target node */
2882 nd.ni_cnd.cn_nameiop = CREATE;
2883 nd.ni_cnd.cn_flags = LOCKPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK;
2884 nd.ni_dirp = uap->link;
2885 error = namei(&nd);
2886 if (error != 0)
2887 goto out;
2888 dvp = nd.ni_dvp;
2889 lvp = nd.ni_vp;
2890
2891 #if CONFIG_MACF
2892 if ((error = mac_vnode_check_link(ctx, dvp, vp, &nd.ni_cnd)) != 0)
2893 goto out2;
2894 #endif
2895
2896 /* or to anything that kauth doesn't want us to (eg. immutable items) */
2897 if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_LINKTARGET, ctx)) != 0)
2898 goto out2;
2899
2900 /* target node must not exist */
2901 if (lvp != NULLVP) {
2902 error = EEXIST;
2903 goto out2;
2904 }
2905 /* cannot link across mountpoints */
2906 if (vnode_mount(vp) != vnode_mount(dvp)) {
2907 error = EXDEV;
2908 goto out2;
2909 }
2910
2911 /* authorize creation of the target note */
2912 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
2913 goto out2;
2914
2915 /* and finally make the link */
2916 error = VNOP_LINK(vp, dvp, &nd.ni_cnd, ctx);
2917 if (error)
2918 goto out2;
2919
2920 #if CONFIG_FSE
2921 need_event = need_fsevent(FSE_CREATE_FILE, dvp);
2922 #else
2923 need_event = 0;
2924 #endif
2925 has_listeners = kauth_authorize_fileop_has_listeners();
2926
2927 if (need_event || has_listeners) {
2928 char *link_to_path = NULL;
2929 int len, link_name_len;
2930
2931 /* build the path to the new link file */
2932 GET_PATH(target_path);
2933 if (target_path == NULL) {
2934 error = ENOMEM;
2935 goto out2;
2936 }
2937
2938 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, target_path, MAXPATHLEN, &truncated);
2939
2940 if (has_listeners) {
2941 /* build the path to file we are linking to */
2942 GET_PATH(link_to_path);
2943 if (link_to_path == NULL) {
2944 error = ENOMEM;
2945 goto out2;
2946 }
2947
2948 link_name_len = MAXPATHLEN;
2949 vn_getpath(vp, link_to_path, &link_name_len);
2950
2951 /*
2952 * Call out to allow 3rd party notification of rename.
2953 * Ignore result of kauth_authorize_fileop call.
2954 */
2955 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_LINK,
2956 (uintptr_t)link_to_path, (uintptr_t)target_path);
2957 if (link_to_path != NULL) {
2958 RELEASE_PATH(link_to_path);
2959 }
2960 }
2961 #if CONFIG_FSE
2962 if (need_event) {
2963 /* construct fsevent */
2964 if (get_fse_info(vp, &finfo, ctx) == 0) {
2965 if (truncated) {
2966 finfo.mode |= FSE_TRUNCATED_PATH;
2967 }
2968
2969 // build the path to the destination of the link
2970 add_fsevent(FSE_CREATE_FILE, ctx,
2971 FSE_ARG_STRING, len, target_path,
2972 FSE_ARG_FINFO, &finfo,
2973 FSE_ARG_DONE);
2974 }
2975 if (vp->v_parent) {
2976 add_fsevent(FSE_STAT_CHANGED, ctx,
2977 FSE_ARG_VNODE, vp->v_parent,
2978 FSE_ARG_DONE);
2979 }
2980 }
2981 #endif
2982 }
2983 out2:
2984 /*
2985 * nameidone has to happen before we vnode_put(dvp)
2986 * since it may need to release the fs_nodelock on the dvp
2987 */
2988 nameidone(&nd);
2989 if (target_path != NULL) {
2990 RELEASE_PATH(target_path);
2991 }
2992 out:
2993 if (lvp)
2994 vnode_put(lvp);
2995 if (dvp)
2996 vnode_put(dvp);
2997 vnode_put(vp);
2998 return (error);
2999 }
3000
3001 /*
3002 * Make a symbolic link.
3003 *
3004 * We could add support for ACLs here too...
3005 */
3006 /* ARGSUSED */
3007 int
3008 symlink(proc_t p, struct symlink_args *uap, __unused int32_t *retval)
3009 {
3010 struct vnode_attr va;
3011 char *path;
3012 int error;
3013 struct nameidata nd;
3014 vfs_context_t ctx = vfs_context_current();
3015 vnode_t vp, dvp;
3016 size_t dummy=0;
3017
3018 MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
3019 error = copyinstr(uap->path, path, MAXPATHLEN, &dummy);
3020 if (error)
3021 goto out;
3022 AUDIT_ARG(text, path); /* This is the link string */
3023
3024 NDINIT(&nd, CREATE, LOCKPARENT | AUDITVNPATH1,
3025 UIO_USERSPACE, uap->link, ctx);
3026 error = namei(&nd);
3027 if (error)
3028 goto out;
3029 dvp = nd.ni_dvp;
3030 vp = nd.ni_vp;
3031
3032 VATTR_INIT(&va);
3033 VATTR_SET(&va, va_type, VLNK);
3034 VATTR_SET(&va, va_mode, ACCESSPERMS & ~p->p_fd->fd_cmask);
3035 #if CONFIG_MACF
3036 error = mac_vnode_check_create(ctx,
3037 dvp, &nd.ni_cnd, &va);
3038 #endif
3039 if (error != 0) {
3040 goto skipit;
3041 }
3042
3043 if (vp != NULL) {
3044 error = EEXIST;
3045 goto skipit;
3046 }
3047
3048 /* authorize */
3049 if (error == 0)
3050 error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
3051 /* get default ownership, etc. */
3052 if (error == 0)
3053 error = vnode_authattr_new(dvp, &va, 0, ctx);
3054 if (error == 0)
3055 error = VNOP_SYMLINK(dvp, &vp, &nd.ni_cnd, &va, path, ctx);
3056
3057 /* do fallback attribute handling */
3058 if (error == 0)
3059 error = vnode_setattr_fallback(vp, &va, ctx);
3060
3061 if (error == 0) {
3062 int update_flags = 0;
3063
3064 if (vp == NULL) {
3065 nd.ni_cnd.cn_nameiop = LOOKUP;
3066 nd.ni_cnd.cn_flags = 0;
3067 error = namei(&nd);
3068 vp = nd.ni_vp;
3069
3070 if (vp == NULL)
3071 goto skipit;
3072 }
3073
3074 #if 0 /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
3075 /* call out to allow 3rd party notification of rename.
3076 * Ignore result of kauth_authorize_fileop call.
3077 */
3078 if (kauth_authorize_fileop_has_listeners() &&
3079 namei(&nd) == 0) {
3080 char *new_link_path = NULL;
3081 int len;
3082
3083 /* build the path to the new link file */
3084 new_link_path = get_pathbuff();
3085 len = MAXPATHLEN;
3086 vn_getpath(dvp, new_link_path, &len);
3087 if ((len + 1 + nd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) {
3088 new_link_path[len - 1] = '/';
3089 strlcpy(&new_link_path[len], nd.ni_cnd.cn_nameptr, MAXPATHLEN-len);
3090 }
3091
3092 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_SYMLINK,
3093 (uintptr_t)path, (uintptr_t)new_link_path);
3094 if (new_link_path != NULL)
3095 release_pathbuff(new_link_path);
3096 }
3097 #endif
3098 // Make sure the name & parent pointers are hooked up
3099 if (vp->v_name == NULL)
3100 update_flags |= VNODE_UPDATE_NAME;
3101 if (vp->v_parent == NULLVP)
3102 update_flags |= VNODE_UPDATE_PARENT;
3103
3104 if (update_flags)
3105 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
3106
3107 #if CONFIG_FSE
3108 add_fsevent(FSE_CREATE_FILE, ctx,
3109 FSE_ARG_VNODE, vp,
3110 FSE_ARG_DONE);
3111 #endif
3112 }
3113
3114 skipit:
3115 /*
3116 * nameidone has to happen before we vnode_put(dvp)
3117 * since it may need to release the fs_nodelock on the dvp
3118 */
3119 nameidone(&nd);
3120
3121 if (vp)
3122 vnode_put(vp);
3123 vnode_put(dvp);
3124 out:
3125 FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
3126
3127 return (error);
3128 }
3129
3130 /*
3131 * Delete a whiteout from the filesystem.
3132 * XXX authorization not implmented for whiteouts
3133 */
3134 int
3135 undelete(__unused proc_t p, struct undelete_args *uap, __unused int32_t *retval)
3136 {
3137 int error;
3138 struct nameidata nd;
3139 vfs_context_t ctx = vfs_context_current();
3140 vnode_t vp, dvp;
3141
3142 NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT|AUDITVNPATH1,
3143 UIO_USERSPACE, uap->path, ctx);
3144 error = namei(&nd);
3145 if (error)
3146 return (error);
3147 dvp = nd.ni_dvp;
3148 vp = nd.ni_vp;
3149
3150 if (vp == NULLVP && (nd.ni_cnd.cn_flags & ISWHITEOUT)) {
3151 error = VNOP_WHITEOUT(dvp, &nd.ni_cnd, DELETE, ctx);
3152 } else
3153 error = EEXIST;
3154
3155 /*
3156 * nameidone has to happen before we vnode_put(dvp)
3157 * since it may need to release the fs_nodelock on the dvp
3158 */
3159 nameidone(&nd);
3160
3161 if (vp)
3162 vnode_put(vp);
3163 vnode_put(dvp);
3164
3165 return (error);
3166 }
3167
3168
3169 /*
3170 * Delete a name from the filesystem.
3171 */
3172 /* ARGSUSED */
3173 int
3174 unlink1(vfs_context_t ctx, struct nameidata *ndp, int nodelbusy)
3175 {
3176 vnode_t vp, dvp;
3177 int error;
3178 struct componentname *cnp;
3179 char *path = NULL;
3180 int len=0;
3181 #if CONFIG_FSE
3182 fse_info finfo;
3183 #endif
3184 int flags = 0;
3185 int need_event = 0;
3186 int has_listeners = 0;
3187 int truncated_path=0;
3188 #if NAMEDRSRCFORK
3189 /* unlink or delete is allowed on rsrc forks and named streams */
3190 ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
3191 #endif
3192
3193 ndp->ni_cnd.cn_flags |= LOCKPARENT;
3194 cnp = &ndp->ni_cnd;
3195
3196 error = namei(ndp);
3197 if (error)
3198 return (error);
3199
3200 dvp = ndp->ni_dvp;
3201 vp = ndp->ni_vp;
3202
3203 /* With Carbon delete semantics, busy files cannot be deleted */
3204 if (nodelbusy) {
3205 flags |= VNODE_REMOVE_NODELETEBUSY;
3206 }
3207
3208 /*
3209 * Normally, unlinking of directories is not supported.
3210 * However, some file systems may have limited support.
3211 */
3212 if ((vp->v_type == VDIR) &&
3213 !(vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSDIRLINKS)) {
3214 error = EPERM; /* POSIX */
3215 }
3216
3217 /*
3218 * The root of a mounted filesystem cannot be deleted.
3219 */
3220 if (vp->v_flag & VROOT) {
3221 error = EBUSY;
3222 }
3223 if (error)
3224 goto out;
3225
3226
3227 /* authorize the delete operation */
3228 #if CONFIG_MACF
3229 if (!error)
3230 error = mac_vnode_check_unlink(ctx,
3231 dvp, vp, cnp);
3232 #endif /* MAC */
3233 if (!error)
3234 error = vnode_authorize(vp, ndp->ni_dvp, KAUTH_VNODE_DELETE, ctx);
3235 if (error)
3236 goto out;
3237
3238 #if CONFIG_FSE
3239 need_event = need_fsevent(FSE_DELETE, dvp);
3240 if (need_event) {
3241 if ((vp->v_flag & VISHARDLINK) == 0) {
3242 get_fse_info(vp, &finfo, ctx);
3243 }
3244 }
3245 #endif
3246 has_listeners = kauth_authorize_fileop_has_listeners();
3247 if (need_event || has_listeners) {
3248 GET_PATH(path);
3249 if (path == NULL) {
3250 error = ENOMEM;
3251 goto out;
3252 }
3253
3254 len = safe_getpath(dvp, ndp->ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated_path);
3255 }
3256
3257 #if NAMEDRSRCFORK
3258 if (ndp->ni_cnd.cn_flags & CN_WANTSRSRCFORK)
3259 error = vnode_removenamedstream(dvp, vp, XATTR_RESOURCEFORK_NAME, 0, ctx);
3260 else
3261 #endif
3262 error = VNOP_REMOVE(dvp, vp, &ndp->ni_cnd, flags, ctx);
3263
3264 /*
3265 * Call out to allow 3rd party notification of delete.
3266 * Ignore result of kauth_authorize_fileop call.
3267 */
3268 if (!error) {
3269 if (has_listeners) {
3270 kauth_authorize_fileop(vfs_context_ucred(ctx),
3271 KAUTH_FILEOP_DELETE,
3272 (uintptr_t)vp,
3273 (uintptr_t)path);
3274 }
3275
3276 if (vp->v_flag & VISHARDLINK) {
3277 //
3278 // if a hardlink gets deleted we want to blow away the
3279 // v_parent link because the path that got us to this
3280 // instance of the link is no longer valid. this will
3281 // force the next call to get the path to ask the file
3282 // system instead of just following the v_parent link.
3283 //
3284 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
3285 }
3286
3287 #if CONFIG_FSE
3288 if (need_event) {
3289 if (vp->v_flag & VISHARDLINK) {
3290 get_fse_info(vp, &finfo, ctx);
3291 }
3292 if (truncated_path) {
3293 finfo.mode |= FSE_TRUNCATED_PATH;
3294 }
3295 add_fsevent(FSE_DELETE, ctx,
3296 FSE_ARG_STRING, len, path,
3297 FSE_ARG_FINFO, &finfo,
3298 FSE_ARG_DONE);
3299 }
3300 #endif
3301 }
3302 if (path != NULL)
3303 RELEASE_PATH(path);
3304
3305 /*
3306 * nameidone has to happen before we vnode_put(dvp)
3307 * since it may need to release the fs_nodelock on the dvp
3308 */
3309 out:
3310 #if NAMEDRSRCFORK
3311 /* recycle the deleted rsrc fork vnode to force a reclaim, which
3312 * will cause its shadow file to go away if necessary.
3313 */
3314 if ((vnode_isnamedstream(ndp->ni_vp)) &&
3315 (ndp->ni_vp->v_parent != NULLVP) &&
3316 vnode_isshadow(ndp->ni_vp)) {
3317 vnode_recycle(ndp->ni_vp);
3318 }
3319 #endif
3320 nameidone(ndp);
3321 vnode_put(dvp);
3322 vnode_put(vp);
3323 return (error);
3324 }
3325
3326 /*
3327 * Delete a name from the filesystem using POSIX semantics.
3328 */
3329 int
3330 unlink(__unused proc_t p, struct unlink_args *uap, __unused int32_t *retval)
3331 {
3332 struct nameidata nd;
3333 vfs_context_t ctx = vfs_context_current();
3334
3335 NDINIT(&nd, DELETE, AUDITVNPATH1, UIO_USERSPACE, uap->path, ctx);
3336 return unlink1(ctx, &nd, 0);
3337 }
3338
3339 /*
3340 * Delete a name from the filesystem using Carbon semantics.
3341 */
3342 int
3343 delete(__unused proc_t p, struct delete_args *uap, __unused int32_t *retval)
3344 {
3345 struct nameidata nd;
3346 vfs_context_t ctx = vfs_context_current();
3347
3348 NDINIT(&nd, DELETE, AUDITVNPATH1, UIO_USERSPACE, uap->path, ctx);
3349 return unlink1(ctx, &nd, 1);
3350 }
3351
3352 /*
3353 * Reposition read/write file offset.
3354 */
3355 int
3356 lseek(proc_t p, struct lseek_args *uap, off_t *retval)
3357 {
3358 struct fileproc *fp;
3359 vnode_t vp;
3360 struct vfs_context *ctx;
3361 off_t offset = uap->offset, file_size;
3362 int error;
3363
3364 if ( (error = fp_getfvp(p,uap->fd, &fp, &vp)) ) {
3365 if (error == ENOTSUP)
3366 return (ESPIPE);
3367 return (error);
3368 }
3369 if (vnode_isfifo(vp)) {
3370 file_drop(uap->fd);
3371 return(ESPIPE);
3372 }
3373
3374
3375 ctx = vfs_context_current();
3376 #if CONFIG_MACF
3377 if (uap->whence == L_INCR && uap->offset == 0)
3378 error = mac_file_check_get_offset(vfs_context_ucred(ctx),
3379 fp->f_fglob);
3380 else
3381 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
3382 fp->f_fglob);
3383 if (error) {
3384 file_drop(uap->fd);
3385 return (error);
3386 }
3387 #endif
3388 if ( (error = vnode_getwithref(vp)) ) {
3389 file_drop(uap->fd);
3390 return(error);
3391 }
3392
3393 switch (uap->whence) {
3394 case L_INCR:
3395 offset += fp->f_fglob->fg_offset;
3396 break;
3397 case L_XTND:
3398 if ((error = vnode_size(vp, &file_size, ctx)) != 0)
3399 break;
3400 offset += file_size;
3401 break;
3402 case L_SET:
3403 break;
3404 default:
3405 error = EINVAL;
3406 }
3407 if (error == 0) {
3408 if (uap->offset > 0 && offset < 0) {
3409 /* Incremented/relative move past max size */
3410 error = EOVERFLOW;
3411 } else {
3412 /*
3413 * Allow negative offsets on character devices, per
3414 * POSIX 1003.1-2001. Most likely for writing disk
3415 * labels.
3416 */
3417 if (offset < 0 && vp->v_type != VCHR) {
3418 /* Decremented/relative move before start */
3419 error = EINVAL;
3420 } else {
3421 /* Success */
3422 fp->f_fglob->fg_offset = offset;
3423 *retval = fp->f_fglob->fg_offset;
3424 }
3425 }
3426 }
3427
3428 /*
3429 * An lseek can affect whether data is "available to read." Use
3430 * hint of NOTE_NONE so no EVFILT_VNODE events fire
3431 */
3432 post_event_if_success(vp, error, NOTE_NONE);
3433 (void)vnode_put(vp);
3434 file_drop(uap->fd);
3435 return (error);
3436 }
3437
3438
3439 /*
3440 * Check access permissions.
3441 *
3442 * Returns: 0 Success
3443 * vnode_authorize:???
3444 */
3445 static int
3446 access1(vnode_t vp, vnode_t dvp, int uflags, vfs_context_t ctx)
3447 {
3448 kauth_action_t action;
3449 int error;
3450
3451 /*
3452 * If just the regular access bits, convert them to something
3453 * that vnode_authorize will understand.
3454 */
3455 if (!(uflags & _ACCESS_EXTENDED_MASK)) {
3456 action = 0;
3457 if (uflags & R_OK)
3458 action |= KAUTH_VNODE_READ_DATA; /* aka KAUTH_VNODE_LIST_DIRECTORY */
3459 if (uflags & W_OK) {
3460 if (vnode_isdir(vp)) {
3461 action |= KAUTH_VNODE_ADD_FILE |
3462 KAUTH_VNODE_ADD_SUBDIRECTORY;
3463 /* might want delete rights here too */
3464 } else {
3465 action |= KAUTH_VNODE_WRITE_DATA;
3466 }
3467 }
3468 if (uflags & X_OK) {
3469 if (vnode_isdir(vp)) {
3470 action |= KAUTH_VNODE_SEARCH;
3471 } else {
3472 action |= KAUTH_VNODE_EXECUTE;
3473 }
3474 }
3475 } else {
3476 /* take advantage of definition of uflags */
3477 action = uflags >> 8;
3478 }
3479
3480 #if CONFIG_MACF
3481 error = mac_vnode_check_access(ctx, vp, uflags);
3482 if (error)
3483 return (error);
3484 #endif /* MAC */
3485
3486 /* action == 0 means only check for existence */
3487 if (action != 0) {
3488 error = vnode_authorize(vp, dvp, action | KAUTH_VNODE_ACCESS, ctx);
3489 } else {
3490 error = 0;
3491 }
3492
3493 return(error);
3494 }
3495
3496
3497
3498 /*
3499 * access_extended: Check access permissions in bulk.
3500 *
3501 * Description: uap->entries Pointer to an array of accessx
3502 * descriptor structs, plus one or
3503 * more NULL terminated strings (see
3504 * "Notes" section below).
3505 * uap->size Size of the area pointed to by
3506 * uap->entries.
3507 * uap->results Pointer to the results array.
3508 *
3509 * Returns: 0 Success
3510 * ENOMEM Insufficient memory
3511 * EINVAL Invalid arguments
3512 * namei:EFAULT Bad address
3513 * namei:ENAMETOOLONG Filename too long
3514 * namei:ENOENT No such file or directory
3515 * namei:ELOOP Too many levels of symbolic links
3516 * namei:EBADF Bad file descriptor
3517 * namei:ENOTDIR Not a directory
3518 * namei:???
3519 * access1:
3520 *
3521 * Implicit returns:
3522 * uap->results Array contents modified
3523 *
3524 * Notes: The uap->entries are structured as an arbitrary length array
3525 * of accessx descriptors, followed by one or more NULL terminated
3526 * strings
3527 *
3528 * struct accessx_descriptor[0]
3529 * ...
3530 * struct accessx_descriptor[n]
3531 * char name_data[0];
3532 *
3533 * We determine the entry count by walking the buffer containing
3534 * the uap->entries argument descriptor. For each descriptor we
3535 * see, the valid values for the offset ad_name_offset will be
3536 * in the byte range:
3537 *
3538 * [ uap->entries + sizeof(struct accessx_descriptor) ]
3539 * to
3540 * [ uap->entries + uap->size - 2 ]
3541 *
3542 * since we must have at least one string, and the string must
3543 * be at least one character plus the NULL terminator in length.
3544 *
3545 * XXX: Need to support the check-as uid argument
3546 */
3547 int
3548 access_extended(__unused proc_t p, struct access_extended_args *uap, __unused int32_t *retval)
3549 {
3550 struct accessx_descriptor *input = NULL;
3551 errno_t *result = NULL;
3552 errno_t error = 0;
3553 int wantdelete = 0;
3554 unsigned int desc_max, desc_actual, i, j;
3555 struct vfs_context context;
3556 struct nameidata nd;
3557 int niopts;
3558 vnode_t vp = NULL;
3559 vnode_t dvp = NULL;
3560 #define ACCESSX_MAX_DESCR_ON_STACK 10
3561 struct accessx_descriptor stack_input[ACCESSX_MAX_DESCR_ON_STACK];
3562
3563 context.vc_ucred = NULL;
3564
3565 /*
3566 * Validate parameters; if valid, copy the descriptor array and string
3567 * arguments into local memory. Before proceeding, the following
3568 * conditions must have been met:
3569 *
3570 * o The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
3571 * o There must be sufficient room in the request for at least one
3572 * descriptor and a one yte NUL terminated string.
3573 * o The allocation of local storage must not fail.
3574 */
3575 if (uap->size > ACCESSX_MAX_TABLESIZE)
3576 return(ENOMEM);
3577 if (uap->size < (sizeof(struct accessx_descriptor) + 2))
3578 return(EINVAL);
3579 if (uap->size <= sizeof (stack_input)) {
3580 input = stack_input;
3581 } else {
3582 MALLOC(input, struct accessx_descriptor *, uap->size, M_TEMP, M_WAITOK);
3583 if (input == NULL) {
3584 error = ENOMEM;
3585 goto out;
3586 }
3587 }
3588 error = copyin(uap->entries, input, uap->size);
3589 if (error)
3590 goto out;
3591
3592 AUDIT_ARG(opaque, input, uap->size);
3593
3594 /*
3595 * Force NUL termination of the copyin buffer to avoid nami() running
3596 * off the end. If the caller passes us bogus data, they may get a
3597 * bogus result.
3598 */
3599 ((char *)input)[uap->size - 1] = 0;
3600
3601 /*
3602 * Access is defined as checking against the process' real identity,
3603 * even if operations are checking the effective identity. This
3604 * requires that we use a local vfs context.
3605 */
3606 context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
3607 context.vc_thread = current_thread();
3608
3609 /*
3610 * Find out how many entries we have, so we can allocate the result
3611 * array by walking the list and adjusting the count downward by the
3612 * earliest string offset we see.
3613 */
3614 desc_max = (uap->size - 2) / sizeof(struct accessx_descriptor);
3615 desc_actual = desc_max;
3616 for (i = 0; i < desc_actual; i++) {
3617 /*
3618 * Take the offset to the name string for this entry and
3619 * convert to an input array index, which would be one off
3620 * the end of the array if this entry was the lowest-addressed
3621 * name string.
3622 */
3623 j = input[i].ad_name_offset / sizeof(struct accessx_descriptor);
3624
3625 /*
3626 * An offset greater than the max allowable offset is an error.
3627 * It is also an error for any valid entry to point
3628 * to a location prior to the end of the current entry, if
3629 * it's not a reference to the string of the previous entry.
3630 */
3631 if (j > desc_max || (j != 0 && j <= i)) {
3632 error = EINVAL;
3633 goto out;
3634 }
3635
3636 /*
3637 * An offset of 0 means use the previous descriptor's offset;
3638 * this is used to chain multiple requests for the same file
3639 * to avoid multiple lookups.
3640 */
3641 if (j == 0) {
3642 /* This is not valid for the first entry */
3643 if (i == 0) {
3644 error = EINVAL;
3645 goto out;
3646 }
3647 continue;
3648 }
3649
3650 /*
3651 * If the offset of the string for this descriptor is before
3652 * what we believe is the current actual last descriptor,
3653 * then we need to adjust our estimate downward; this permits
3654 * the string table following the last descriptor to be out
3655 * of order relative to the descriptor list.
3656 */
3657 if (j < desc_actual)
3658 desc_actual = j;
3659 }
3660
3661 /*
3662 * We limit the actual number of descriptors we are willing to process
3663 * to a hard maximum of ACCESSX_MAX_DESCRIPTORS. If the number being
3664 * requested does not exceed this limit,
3665 */
3666 if (desc_actual > ACCESSX_MAX_DESCRIPTORS) {
3667 error = ENOMEM;
3668 goto out;
3669 }
3670 MALLOC(result, errno_t *, desc_actual * sizeof(errno_t), M_TEMP, M_WAITOK);
3671 if (result == NULL) {
3672 error = ENOMEM;
3673 goto out;
3674 }
3675
3676 /*
3677 * Do the work by iterating over the descriptor entries we know to
3678 * at least appear to contain valid data.
3679 */
3680 error = 0;
3681 for (i = 0; i < desc_actual; i++) {
3682 /*
3683 * If the ad_name_offset is 0, then we use the previous
3684 * results to make the check; otherwise, we are looking up
3685 * a new file name.
3686 */
3687 if (input[i].ad_name_offset != 0) {
3688 /* discard old vnodes */
3689 if (vp) {
3690 vnode_put(vp);
3691 vp = NULL;
3692 }
3693 if (dvp) {
3694 vnode_put(dvp);
3695 dvp = NULL;
3696 }
3697
3698 /*
3699 * Scan forward in the descriptor list to see if we
3700 * need the parent vnode. We will need it if we are
3701 * deleting, since we must have rights to remove
3702 * entries in the parent directory, as well as the
3703 * rights to delete the object itself.
3704 */
3705 wantdelete = input[i].ad_flags & _DELETE_OK;
3706 for (j = i + 1; (j < desc_actual) && (input[j].ad_name_offset == 0); j++)
3707 if (input[j].ad_flags & _DELETE_OK)
3708 wantdelete = 1;
3709
3710 niopts = FOLLOW | AUDITVNPATH1;
3711
3712 /* need parent for vnode_authorize for deletion test */
3713 if (wantdelete)
3714 niopts |= WANTPARENT;
3715
3716 /* do the lookup */
3717 NDINIT(&nd, LOOKUP, niopts, UIO_SYSSPACE, CAST_USER_ADDR_T(((const char *)input) + input[i].ad_name_offset), &context);
3718 error = namei(&nd);
3719 if (!error) {
3720 vp = nd.ni_vp;
3721 if (wantdelete)
3722 dvp = nd.ni_dvp;
3723 }
3724 nameidone(&nd);
3725 }
3726
3727 /*
3728 * Handle lookup errors.
3729 */
3730 switch(error) {
3731 case ENOENT:
3732 case EACCES:
3733 case EPERM:
3734 case ENOTDIR:
3735 result[i] = error;
3736 break;
3737 case 0:
3738 /* run this access check */
3739 result[i] = access1(vp, dvp, input[i].ad_flags, &context);
3740 break;
3741 default:
3742 /* fatal lookup error */
3743
3744 goto out;
3745 }
3746 }
3747
3748 AUDIT_ARG(data, result, sizeof(errno_t), desc_actual);
3749
3750 /* copy out results */
3751 error = copyout(result, uap->results, desc_actual * sizeof(errno_t));
3752
3753 out:
3754 if (input && input != stack_input)
3755 FREE(input, M_TEMP);
3756 if (result)
3757 FREE(result, M_TEMP);
3758 if (vp)
3759 vnode_put(vp);
3760 if (dvp)
3761 vnode_put(dvp);
3762 if (IS_VALID_CRED(context.vc_ucred))
3763 kauth_cred_unref(&context.vc_ucred);
3764 return(error);
3765 }
3766
3767
3768 /*
3769 * Returns: 0 Success
3770 * namei:EFAULT Bad address
3771 * namei:ENAMETOOLONG Filename too long
3772 * namei:ENOENT No such file or directory
3773 * namei:ELOOP Too many levels of symbolic links
3774 * namei:EBADF Bad file descriptor
3775 * namei:ENOTDIR Not a directory
3776 * namei:???
3777 * access1:
3778 */
3779 int
3780 access(__unused proc_t p, struct access_args *uap, __unused int32_t *retval)
3781 {
3782 int error;
3783 struct nameidata nd;
3784 int niopts;
3785 struct vfs_context context;
3786 #if NAMEDRSRCFORK
3787 int is_namedstream = 0;
3788 #endif
3789
3790 /*
3791 * Access is defined as checking against the process'
3792 * real identity, even if operations are checking the
3793 * effective identity. So we need to tweak the credential
3794 * in the context.
3795 */
3796 context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
3797 context.vc_thread = current_thread();
3798
3799 niopts = FOLLOW | AUDITVNPATH1;
3800 /* need parent for vnode_authorize for deletion test */
3801 if (uap->flags & _DELETE_OK)
3802 niopts |= WANTPARENT;
3803 NDINIT(&nd, LOOKUP, niopts, UIO_USERSPACE, uap->path, &context);
3804
3805 #if NAMEDRSRCFORK
3806 /* access(F_OK) calls are allowed for resource forks. */
3807 if (uap->flags == F_OK)
3808 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
3809 #endif
3810 error = namei(&nd);
3811 if (error)
3812 goto out;
3813
3814 #if NAMEDRSRCFORK
3815 /* Grab reference on the shadow stream file vnode to
3816 * force an inactive on release which will mark it
3817 * for recycle.
3818 */
3819 if (vnode_isnamedstream(nd.ni_vp) &&
3820 (nd.ni_vp->v_parent != NULLVP) &&
3821 vnode_isshadow(nd.ni_vp)) {
3822 is_namedstream = 1;
3823 vnode_ref(nd.ni_vp);
3824 }
3825 #endif
3826
3827 error = access1(nd.ni_vp, nd.ni_dvp, uap->flags, &context);
3828
3829 #if NAMEDRSRCFORK
3830 if (is_namedstream) {
3831 vnode_rele(nd.ni_vp);
3832 }
3833 #endif
3834
3835 vnode_put(nd.ni_vp);
3836 if (uap->flags & _DELETE_OK)
3837 vnode_put(nd.ni_dvp);
3838 nameidone(&nd);
3839
3840 out:
3841 kauth_cred_unref(&context.vc_ucred);
3842 return(error);
3843 }
3844
3845
3846 /*
3847 * Returns: 0 Success
3848 * EFAULT
3849 * copyout:EFAULT
3850 * namei:???
3851 * vn_stat:???
3852 */
3853 static int
3854 stat2(vfs_context_t ctx, struct nameidata *ndp, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
3855 {
3856 union {
3857 struct stat sb;
3858 struct stat64 sb64;
3859 } source;
3860 union {
3861 struct user64_stat user64_sb;
3862 struct user32_stat user32_sb;
3863 struct user64_stat64 user64_sb64;
3864 struct user32_stat64 user32_sb64;
3865 } dest;
3866 caddr_t sbp;
3867 int error, my_size;
3868 kauth_filesec_t fsec;
3869 size_t xsecurity_bufsize;
3870 void * statptr;
3871
3872 #if NAMEDRSRCFORK
3873 int is_namedstream = 0;
3874 /* stat calls are allowed for resource forks. */
3875 ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
3876 #endif
3877 error = namei(ndp);
3878 if (error)
3879 return (error);
3880 fsec = KAUTH_FILESEC_NONE;
3881
3882 statptr = (void *)&source;
3883
3884 #if NAMEDRSRCFORK
3885 /* Grab reference on the shadow stream file vnode to
3886 * force an inactive on release which will mark it
3887 * for recycle.
3888 */
3889 if (vnode_isnamedstream(ndp->ni_vp) &&
3890 (ndp->ni_vp->v_parent != NULLVP) &&
3891 vnode_isshadow(ndp->ni_vp)) {
3892 is_namedstream = 1;
3893 vnode_ref(ndp->ni_vp);
3894 }
3895 #endif
3896
3897 error = vn_stat(ndp->ni_vp, statptr, (xsecurity != USER_ADDR_NULL ? &fsec : NULL), isstat64, ctx);
3898
3899 #if NAMEDRSRCFORK
3900 if (is_namedstream) {
3901 vnode_rele(ndp->ni_vp);
3902 }
3903 #endif
3904 vnode_put(ndp->ni_vp);
3905 nameidone(ndp);
3906
3907 if (error)
3908 return (error);
3909 /* Zap spare fields */
3910 if (isstat64 != 0) {
3911 source.sb64.st_lspare = 0;
3912 source.sb64.st_qspare[0] = 0LL;
3913 source.sb64.st_qspare[1] = 0LL;
3914 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
3915 munge_user64_stat64(&source.sb64, &dest.user64_sb64);
3916 my_size = sizeof(dest.user64_sb64);
3917 sbp = (caddr_t)&dest.user64_sb64;
3918 } else {
3919 munge_user32_stat64(&source.sb64, &dest.user32_sb64);
3920 my_size = sizeof(dest.user32_sb64);
3921 sbp = (caddr_t)&dest.user32_sb64;
3922 }
3923 /*
3924 * Check if we raced (post lookup) against the last unlink of a file.
3925 */
3926 if ((source.sb64.st_nlink == 0) && S_ISREG(source.sb64.st_mode)) {
3927 source.sb64.st_nlink = 1;
3928 }
3929 } else {
3930 source.sb.st_lspare = 0;
3931 source.sb.st_qspare[0] = 0LL;
3932 source.sb.st_qspare[1] = 0LL;
3933 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
3934 munge_user64_stat(&source.sb, &dest.user64_sb);
3935 my_size = sizeof(dest.user64_sb);
3936 sbp = (caddr_t)&dest.user64_sb;
3937 } else {
3938 munge_user32_stat(&source.sb, &dest.user32_sb);
3939 my_size = sizeof(dest.user32_sb);
3940 sbp = (caddr_t)&dest.user32_sb;
3941 }
3942
3943 /*
3944 * Check if we raced (post lookup) against the last unlink of a file.
3945 */
3946 if ((source.sb.st_nlink == 0) && S_ISREG(source.sb.st_mode)) {
3947 source.sb.st_nlink = 1;
3948 }
3949 }
3950 if ((error = copyout(sbp, ub, my_size)) != 0)
3951 goto out;
3952
3953 /* caller wants extended security information? */
3954 if (xsecurity != USER_ADDR_NULL) {
3955
3956 /* did we get any? */
3957 if (fsec == KAUTH_FILESEC_NONE) {
3958 if (susize(xsecurity_size, 0) != 0) {
3959 error = EFAULT;
3960 goto out;
3961 }
3962 } else {
3963 /* find the user buffer size */
3964 xsecurity_bufsize = fusize(xsecurity_size);
3965
3966 /* copy out the actual data size */
3967 if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
3968 error = EFAULT;
3969 goto out;
3970 }
3971
3972 /* if the caller supplied enough room, copy out to it */
3973 if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec))
3974 error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
3975 }
3976 }
3977 out:
3978 if (fsec != KAUTH_FILESEC_NONE)
3979 kauth_filesec_free(fsec);
3980 return (error);
3981 }
3982
3983 /*
3984 * Get file status; this version follows links.
3985 *
3986 * Returns: 0 Success
3987 * stat2:??? [see stat2() in this file]
3988 */
3989 static int
3990 stat1(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
3991 {
3992 struct nameidata nd;
3993 vfs_context_t ctx = vfs_context_current();
3994
3995 NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1,
3996 UIO_USERSPACE, path, ctx);
3997 return(stat2(ctx, &nd, ub, xsecurity, xsecurity_size, isstat64));
3998 }
3999
4000 /*
4001 * stat_extended: Get file status; with extended security (ACL).
4002 *
4003 * Parameters: p (ignored)
4004 * uap User argument descriptor (see below)
4005 * retval (ignored)
4006 *
4007 * Indirect: uap->path Path of file to get status from
4008 * uap->ub User buffer (holds file status info)
4009 * uap->xsecurity ACL to get (extended security)
4010 * uap->xsecurity_size Size of ACL
4011 *
4012 * Returns: 0 Success
4013 * !0 errno value
4014 *
4015 */
4016 int
4017 stat_extended(__unused proc_t p, struct stat_extended_args *uap, __unused int32_t *retval)
4018 {
4019 return (stat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 0));
4020 }
4021
4022 /*
4023 * Returns: 0 Success
4024 * stat1:??? [see stat1() in this file]
4025 */
4026 int
4027 stat(__unused proc_t p, struct stat_args *uap, __unused int32_t *retval)
4028 {
4029 return(stat1(uap->path, uap->ub, 0, 0, 0));
4030 }
4031
4032 int
4033 stat64(__unused proc_t p, struct stat64_args *uap, __unused int32_t *retval)
4034 {
4035 return(stat1(uap->path, uap->ub, 0, 0, 1));
4036 }
4037
4038 /*
4039 * stat64_extended: Get file status; can handle large inode numbers; with extended security (ACL).
4040 *
4041 * Parameters: p (ignored)
4042 * uap User argument descriptor (see below)
4043 * retval (ignored)
4044 *
4045 * Indirect: uap->path Path of file to get status from
4046 * uap->ub User buffer (holds file status info)
4047 * uap->xsecurity ACL to get (extended security)
4048 * uap->xsecurity_size Size of ACL
4049 *
4050 * Returns: 0 Success
4051 * !0 errno value
4052 *
4053 */
4054 int
4055 stat64_extended(__unused proc_t p, struct stat64_extended_args *uap, __unused int32_t *retval)
4056 {
4057 return (stat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 1));
4058 }
4059 /*
4060 * Get file status; this version does not follow links.
4061 */
4062 static int
4063 lstat1(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64)
4064 {
4065 struct nameidata nd;
4066 vfs_context_t ctx = vfs_context_current();
4067
4068 NDINIT(&nd, LOOKUP, NOTRIGGER | NOFOLLOW | AUDITVNPATH1,
4069 UIO_USERSPACE, path, ctx);
4070
4071 return(stat2(ctx, &nd, ub, xsecurity, xsecurity_size, isstat64));
4072 }
4073
4074 /*
4075 * lstat_extended: Get file status; does not follow links; with extended security (ACL).
4076 *
4077 * Parameters: p (ignored)
4078 * uap User argument descriptor (see below)
4079 * retval (ignored)
4080 *
4081 * Indirect: uap->path Path of file to get status from
4082 * uap->ub User buffer (holds file status info)
4083 * uap->xsecurity ACL to get (extended security)
4084 * uap->xsecurity_size Size of ACL
4085 *
4086 * Returns: 0 Success
4087 * !0 errno value
4088 *
4089 */
4090 int
4091 lstat_extended(__unused proc_t p, struct lstat_extended_args *uap, __unused int32_t *retval)
4092 {
4093 return (lstat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 0));
4094 }
4095
4096 int
4097 lstat(__unused proc_t p, struct lstat_args *uap, __unused int32_t *retval)
4098 {
4099 return(lstat1(uap->path, uap->ub, 0, 0, 0));
4100 }
4101
4102 int
4103 lstat64(__unused proc_t p, struct lstat64_args *uap, __unused int32_t *retval)
4104 {
4105 return(lstat1(uap->path, uap->ub, 0, 0, 1));
4106 }
4107
4108 /*
4109 * lstat64_extended: Get file status; can handle large inode numbers; does not
4110 * follow links; with extended security (ACL).
4111 *
4112 * Parameters: p (ignored)
4113 * uap User argument descriptor (see below)
4114 * retval (ignored)
4115 *
4116 * Indirect: uap->path Path of file to get status from
4117 * uap->ub User buffer (holds file status info)
4118 * uap->xsecurity ACL to get (extended security)
4119 * uap->xsecurity_size Size of ACL
4120 *
4121 * Returns: 0 Success
4122 * !0 errno value
4123 *
4124 */
4125 int
4126 lstat64_extended(__unused proc_t p, struct lstat64_extended_args *uap, __unused int32_t *retval)
4127 {
4128 return (lstat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 1));
4129 }
4130
4131 /*
4132 * Get configurable pathname variables.
4133 *
4134 * Returns: 0 Success
4135 * namei:???
4136 * vn_pathconf:???
4137 *
4138 * Notes: Global implementation constants are intended to be
4139 * implemented in this function directly; all other constants
4140 * are per-FS implementation, and therefore must be handled in
4141 * each respective FS, instead.
4142 *
4143 * XXX We implement some things globally right now that should actually be
4144 * XXX per-FS; we will need to deal with this at some point.
4145 */
4146 /* ARGSUSED */
4147 int
4148 pathconf(__unused proc_t p, struct pathconf_args *uap, int32_t *retval)
4149 {
4150 int error;
4151 struct nameidata nd;
4152 vfs_context_t ctx = vfs_context_current();
4153
4154 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
4155 UIO_USERSPACE, uap->path, ctx);
4156 error = namei(&nd);
4157 if (error)
4158 return (error);
4159
4160 error = vn_pathconf(nd.ni_vp, uap->name, retval, ctx);
4161
4162 vnode_put(nd.ni_vp);
4163 nameidone(&nd);
4164 return (error);
4165 }
4166
4167 /*
4168 * Return target name of a symbolic link.
4169 */
4170 /* ARGSUSED */
4171 int
4172 readlink(proc_t p, struct readlink_args *uap, int32_t *retval)
4173 {
4174 vnode_t vp;
4175 uio_t auio;
4176 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
4177 int error;
4178 struct nameidata nd;
4179 vfs_context_t ctx = vfs_context_current();
4180 char uio_buf[ UIO_SIZEOF(1) ];
4181
4182 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNPATH1,
4183 UIO_USERSPACE, uap->path, ctx);
4184 error = namei(&nd);
4185 if (error)
4186 return (error);
4187 vp = nd.ni_vp;
4188
4189 nameidone(&nd);
4190
4191 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
4192 &uio_buf[0], sizeof(uio_buf));
4193 uio_addiov(auio, uap->buf, uap->count);
4194 if (vp->v_type != VLNK)
4195 error = EINVAL;
4196 else {
4197 #if CONFIG_MACF
4198 error = mac_vnode_check_readlink(ctx,
4199 vp);
4200 #endif
4201 if (error == 0)
4202 error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, ctx);
4203 if (error == 0)
4204 error = VNOP_READLINK(vp, auio, ctx);
4205 }
4206 vnode_put(vp);
4207
4208 /* Safe: uio_resid() is bounded above by "count", and "count" is an int */
4209 *retval = uap->count - (int)uio_resid(auio);
4210 return (error);
4211 }
4212
4213 /*
4214 * Change file flags.
4215 */
4216 static int
4217 chflags1(vnode_t vp, int flags, vfs_context_t ctx)
4218 {
4219 struct vnode_attr va;
4220 kauth_action_t action;
4221 int error;
4222
4223 VATTR_INIT(&va);
4224 VATTR_SET(&va, va_flags, flags);
4225
4226 #if CONFIG_MACF
4227 error = mac_vnode_check_setflags(ctx, vp, flags);
4228 if (error)
4229 goto out;
4230 #endif
4231
4232 /* request authorisation, disregard immutability */
4233 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
4234 goto out;
4235 /*
4236 * Request that the auth layer disregard those file flags it's allowed to when
4237 * authorizing this operation; we need to do this in order to be able to
4238 * clear immutable flags.
4239 */
4240 if (action && ((error = vnode_authorize(vp, NULL, action | KAUTH_VNODE_NOIMMUTABLE, ctx)) != 0))
4241 goto out;
4242 error = vnode_setattr(vp, &va, ctx);
4243
4244 if ((error == 0) && !VATTR_IS_SUPPORTED(&va, va_flags)) {
4245 error = ENOTSUP;
4246 }
4247 out:
4248 vnode_put(vp);
4249 return(error);
4250 }
4251
4252 /*
4253 * Change flags of a file given a path name.
4254 */
4255 /* ARGSUSED */
4256 int
4257 chflags(__unused proc_t p, struct chflags_args *uap, __unused int32_t *retval)
4258 {
4259 vnode_t vp;
4260 vfs_context_t ctx = vfs_context_current();
4261 int error;
4262 struct nameidata nd;
4263
4264 AUDIT_ARG(fflags, uap->flags);
4265 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
4266 UIO_USERSPACE, uap->path, ctx);
4267 error = namei(&nd);
4268 if (error)
4269 return (error);
4270 vp = nd.ni_vp;
4271 nameidone(&nd);
4272
4273 error = chflags1(vp, uap->flags, ctx);
4274
4275 return(error);
4276 }
4277
4278 /*
4279 * Change flags of a file given a file descriptor.
4280 */
4281 /* ARGSUSED */
4282 int
4283 fchflags(__unused proc_t p, struct fchflags_args *uap, __unused int32_t *retval)
4284 {
4285 vnode_t vp;
4286 int error;
4287
4288 AUDIT_ARG(fd, uap->fd);
4289 AUDIT_ARG(fflags, uap->flags);
4290 if ( (error = file_vnode(uap->fd, &vp)) )
4291 return (error);
4292
4293 if ((error = vnode_getwithref(vp))) {
4294 file_drop(uap->fd);
4295 return(error);
4296 }
4297
4298 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
4299
4300 error = chflags1(vp, uap->flags, vfs_context_current());
4301
4302 file_drop(uap->fd);
4303 return (error);
4304 }
4305
4306 /*
4307 * Change security information on a filesystem object.
4308 *
4309 * Returns: 0 Success
4310 * EPERM Operation not permitted
4311 * vnode_authattr:??? [anything vnode_authattr can return]
4312 * vnode_authorize:??? [anything vnode_authorize can return]
4313 * vnode_setattr:??? [anything vnode_setattr can return]
4314 *
4315 * Notes: If vnode_authattr or vnode_authorize return EACCES, it will be
4316 * translated to EPERM before being returned.
4317 */
4318 static int
4319 chmod2(vfs_context_t ctx, vnode_t vp, struct vnode_attr *vap)
4320 {
4321 kauth_action_t action;
4322 int error;
4323
4324 AUDIT_ARG(mode, vap->va_mode);
4325 /* XXX audit new args */
4326
4327 #if NAMEDSTREAMS
4328 /* chmod calls are not allowed for resource forks. */
4329 if (vp->v_flag & VISNAMEDSTREAM) {
4330 return (EPERM);
4331 }
4332 #endif
4333
4334 #if CONFIG_MACF
4335 error = mac_vnode_check_setmode(ctx, vp, (mode_t)vap->va_mode);
4336 if (error)
4337 return (error);
4338 #endif
4339
4340 /* make sure that the caller is allowed to set this security information */
4341 if (((error = vnode_authattr(vp, vap, &action, ctx)) != 0) ||
4342 ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
4343 if (error == EACCES)
4344 error = EPERM;
4345 return(error);
4346 }
4347
4348 error = vnode_setattr(vp, vap, ctx);
4349
4350 return (error);
4351 }
4352
4353
4354 /*
4355 * Change mode of a file given a path name.
4356 *
4357 * Returns: 0 Success
4358 * namei:??? [anything namei can return]
4359 * chmod2:??? [anything chmod2 can return]
4360 */
4361 static int
4362 chmod1(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap)
4363 {
4364 struct nameidata nd;
4365 int error;
4366
4367 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
4368 UIO_USERSPACE, path, ctx);
4369 if ((error = namei(&nd)))
4370 return (error);
4371 error = chmod2(ctx, nd.ni_vp, vap);
4372 vnode_put(nd.ni_vp);
4373 nameidone(&nd);
4374 return(error);
4375 }
4376
4377 /*
4378 * chmod_extended: Change the mode of a file given a path name; with extended
4379 * argument list (including extended security (ACL)).
4380 *
4381 * Parameters: p Process requesting the open
4382 * uap User argument descriptor (see below)
4383 * retval (ignored)
4384 *
4385 * Indirect: uap->path Path to object (same as 'chmod')
4386 * uap->uid UID to set
4387 * uap->gid GID to set
4388 * uap->mode File mode to set (same as 'chmod')
4389 * uap->xsecurity ACL to set (or delete)
4390 *
4391 * Returns: 0 Success
4392 * !0 errno value
4393 *
4394 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
4395 *
4396 * XXX: We should enummerate the possible errno values here, and where
4397 * in the code they originated.
4398 */
4399 int
4400 chmod_extended(__unused proc_t p, struct chmod_extended_args *uap, __unused int32_t *retval)
4401 {
4402 int error;
4403 struct vnode_attr va;
4404 kauth_filesec_t xsecdst;
4405
4406 AUDIT_ARG(owner, uap->uid, uap->gid);
4407
4408 VATTR_INIT(&va);
4409 if (uap->mode != -1)
4410 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
4411 if (uap->uid != KAUTH_UID_NONE)
4412 VATTR_SET(&va, va_uid, uap->uid);
4413 if (uap->gid != KAUTH_GID_NONE)
4414 VATTR_SET(&va, va_gid, uap->gid);
4415
4416 xsecdst = NULL;
4417 switch(uap->xsecurity) {
4418 /* explicit remove request */
4419 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
4420 VATTR_SET(&va, va_acl, NULL);
4421 break;
4422 /* not being set */
4423 case USER_ADDR_NULL:
4424 break;
4425 default:
4426 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
4427 return(error);
4428 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
4429 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va.va_acl->acl_entrycount);
4430 }
4431
4432 error = chmod1(vfs_context_current(), uap->path, &va);
4433
4434 if (xsecdst != NULL)
4435 kauth_filesec_free(xsecdst);
4436 return(error);
4437 }
4438
4439 /*
4440 * Returns: 0 Success
4441 * chmod1:??? [anything chmod1 can return]
4442 */
4443 int
4444 chmod(__unused proc_t p, struct chmod_args *uap, __unused int32_t *retval)
4445 {
4446 struct vnode_attr va;
4447
4448 VATTR_INIT(&va);
4449 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
4450
4451 return(chmod1(vfs_context_current(), uap->path, &va));
4452 }
4453
4454 /*
4455 * Change mode of a file given a file descriptor.
4456 */
4457 static int
4458 fchmod1(__unused proc_t p, int fd, struct vnode_attr *vap)
4459 {
4460 vnode_t vp;
4461 int error;
4462
4463 AUDIT_ARG(fd, fd);
4464
4465 if ((error = file_vnode(fd, &vp)) != 0)
4466 return (error);
4467 if ((error = vnode_getwithref(vp)) != 0) {
4468 file_drop(fd);
4469 return(error);
4470 }
4471 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
4472
4473 error = chmod2(vfs_context_current(), vp, vap);
4474 (void)vnode_put(vp);
4475 file_drop(fd);
4476
4477 return (error);
4478 }
4479
4480 /*
4481 * fchmod_extended: Change mode of a file given a file descriptor; with
4482 * extended argument list (including extended security (ACL)).
4483 *
4484 * Parameters: p Process requesting to change file mode
4485 * uap User argument descriptor (see below)
4486 * retval (ignored)
4487 *
4488 * Indirect: uap->mode File mode to set (same as 'chmod')
4489 * uap->uid UID to set
4490 * uap->gid GID to set
4491 * uap->xsecurity ACL to set (or delete)
4492 * uap->fd File descriptor of file to change mode
4493 *
4494 * Returns: 0 Success
4495 * !0 errno value
4496 *
4497 */
4498 int
4499 fchmod_extended(proc_t p, struct fchmod_extended_args *uap, __unused int32_t *retval)
4500 {
4501 int error;
4502 struct vnode_attr va;
4503 kauth_filesec_t xsecdst;
4504
4505 AUDIT_ARG(owner, uap->uid, uap->gid);
4506
4507 VATTR_INIT(&va);
4508 if (uap->mode != -1)
4509 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
4510 if (uap->uid != KAUTH_UID_NONE)
4511 VATTR_SET(&va, va_uid, uap->uid);
4512 if (uap->gid != KAUTH_GID_NONE)
4513 VATTR_SET(&va, va_gid, uap->gid);
4514
4515 xsecdst = NULL;
4516 switch(uap->xsecurity) {
4517 case USER_ADDR_NULL:
4518 VATTR_SET(&va, va_acl, NULL);
4519 break;
4520 case CAST_USER_ADDR_T(-1):
4521 break;
4522 default:
4523 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
4524 return(error);
4525 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
4526 }
4527
4528 error = fchmod1(p, uap->fd, &va);
4529
4530
4531 switch(uap->xsecurity) {
4532 case USER_ADDR_NULL:
4533 case CAST_USER_ADDR_T(-1):
4534 break;
4535 default:
4536 if (xsecdst != NULL)
4537 kauth_filesec_free(xsecdst);
4538 }
4539 return(error);
4540 }
4541
4542 int
4543 fchmod(proc_t p, struct fchmod_args *uap, __unused int32_t *retval)
4544 {
4545 struct vnode_attr va;
4546
4547 VATTR_INIT(&va);
4548 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
4549
4550 return(fchmod1(p, uap->fd, &va));
4551 }
4552
4553
4554 /*
4555 * Set ownership given a path name.
4556 */
4557 /* ARGSUSED */
4558 static int
4559 chown1(vfs_context_t ctx, struct chown_args *uap, __unused int32_t *retval, int follow)
4560 {
4561 vnode_t vp;
4562 struct vnode_attr va;
4563 int error;
4564 struct nameidata nd;
4565 kauth_action_t action;
4566
4567 AUDIT_ARG(owner, uap->uid, uap->gid);
4568
4569 NDINIT(&nd, LOOKUP, (follow ? FOLLOW : 0) | NOTRIGGER | AUDITVNPATH1,
4570 UIO_USERSPACE, uap->path, ctx);
4571 error = namei(&nd);
4572 if (error)
4573 return (error);
4574 vp = nd.ni_vp;
4575
4576 nameidone(&nd);
4577
4578 VATTR_INIT(&va);
4579 if (uap->uid != VNOVAL)
4580 VATTR_SET(&va, va_uid, uap->uid);
4581 if (uap->gid != VNOVAL)
4582 VATTR_SET(&va, va_gid, uap->gid);
4583
4584 #if CONFIG_MACF
4585 error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
4586 if (error)
4587 goto out;
4588 #endif
4589
4590 /* preflight and authorize attribute changes */
4591 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
4592 goto out;
4593 if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
4594 goto out;
4595 error = vnode_setattr(vp, &va, ctx);
4596
4597 out:
4598 /*
4599 * EACCES is only allowed from namei(); permissions failure should
4600 * return EPERM, so we need to translate the error code.
4601 */
4602 if (error == EACCES)
4603 error = EPERM;
4604
4605 vnode_put(vp);
4606 return (error);
4607 }
4608
4609 int
4610 chown(__unused proc_t p, struct chown_args *uap, int32_t *retval)
4611 {
4612 return chown1(vfs_context_current(), uap, retval, 1);
4613 }
4614
4615 int
4616 lchown(__unused proc_t p, struct lchown_args *uap, int32_t *retval)
4617 {
4618 /* Argument list identical, but machine generated; cast for chown1() */
4619 return chown1(vfs_context_current(), (struct chown_args *)uap, retval, 0);
4620 }
4621
4622 /*
4623 * Set ownership given a file descriptor.
4624 */
4625 /* ARGSUSED */
4626 int
4627 fchown(__unused proc_t p, struct fchown_args *uap, __unused int32_t *retval)
4628 {
4629 struct vnode_attr va;
4630 vfs_context_t ctx = vfs_context_current();
4631 vnode_t vp;
4632 int error;
4633 kauth_action_t action;
4634
4635 AUDIT_ARG(owner, uap->uid, uap->gid);
4636 AUDIT_ARG(fd, uap->fd);
4637
4638 if ( (error = file_vnode(uap->fd, &vp)) )
4639 return (error);
4640
4641 if ( (error = vnode_getwithref(vp)) ) {
4642 file_drop(uap->fd);
4643 return(error);
4644 }
4645 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
4646
4647 VATTR_INIT(&va);
4648 if (uap->uid != VNOVAL)
4649 VATTR_SET(&va, va_uid, uap->uid);
4650 if (uap->gid != VNOVAL)
4651 VATTR_SET(&va, va_gid, uap->gid);
4652
4653 #if NAMEDSTREAMS
4654 /* chown calls are not allowed for resource forks. */
4655 if (vp->v_flag & VISNAMEDSTREAM) {
4656 error = EPERM;
4657 goto out;
4658 }
4659 #endif
4660
4661 #if CONFIG_MACF
4662 error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
4663 if (error)
4664 goto out;
4665 #endif
4666
4667 /* preflight and authorize attribute changes */
4668 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
4669 goto out;
4670 if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
4671 if (error == EACCES)
4672 error = EPERM;
4673 goto out;
4674 }
4675 error = vnode_setattr(vp, &va, ctx);
4676
4677 out:
4678 (void)vnode_put(vp);
4679 file_drop(uap->fd);
4680 return (error);
4681 }
4682
4683 static int
4684 getutimes(user_addr_t usrtvp, struct timespec *tsp)
4685 {
4686 int error;
4687
4688 if (usrtvp == USER_ADDR_NULL) {
4689 struct timeval old_tv;
4690 /* XXX Y2038 bug because of microtime argument */
4691 microtime(&old_tv);
4692 TIMEVAL_TO_TIMESPEC(&old_tv, &tsp[0]);
4693 tsp[1] = tsp[0];
4694 } else {
4695 if (IS_64BIT_PROCESS(current_proc())) {
4696 struct user64_timeval tv[2];
4697 error = copyin(usrtvp, (void *)tv, sizeof(tv));
4698 if (error)
4699 return (error);
4700 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
4701 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
4702 } else {
4703 struct user32_timeval tv[2];
4704 error = copyin(usrtvp, (void *)tv, sizeof(tv));
4705 if (error)
4706 return (error);
4707 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
4708 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
4709 }
4710 }
4711 return 0;
4712 }
4713
4714 static int
4715 setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts,
4716 int nullflag)
4717 {
4718 int error;
4719 struct vnode_attr va;
4720 kauth_action_t action;
4721
4722 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
4723
4724 VATTR_INIT(&va);
4725 VATTR_SET(&va, va_access_time, ts[0]);
4726 VATTR_SET(&va, va_modify_time, ts[1]);
4727 if (nullflag)
4728 va.va_vaflags |= VA_UTIMES_NULL;
4729
4730 #if NAMEDSTREAMS
4731 /* utimes calls are not allowed for resource forks. */
4732 if (vp->v_flag & VISNAMEDSTREAM) {
4733 error = EPERM;
4734 goto out;
4735 }
4736 #endif
4737
4738 #if CONFIG_MACF
4739 error = mac_vnode_check_setutimes(ctx, vp, ts[0], ts[1]);
4740 if (error)
4741 goto out;
4742 #endif
4743 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
4744 if (!nullflag && error == EACCES)
4745 error = EPERM;
4746 goto out;
4747 }
4748
4749 /* since we may not need to auth anything, check here */
4750 if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
4751 if (!nullflag && error == EACCES)
4752 error = EPERM;
4753 goto out;
4754 }
4755 error = vnode_setattr(vp, &va, ctx);
4756
4757 out:
4758 return error;
4759 }
4760
4761 /*
4762 * Set the access and modification times of a file.
4763 */
4764 /* ARGSUSED */
4765 int
4766 utimes(__unused proc_t p, struct utimes_args *uap, __unused int32_t *retval)
4767 {
4768 struct timespec ts[2];
4769 user_addr_t usrtvp;
4770 int error;
4771 struct nameidata nd;
4772 vfs_context_t ctx = vfs_context_current();
4773
4774 /*
4775 * AUDIT: Needed to change the order of operations to do the
4776 * name lookup first because auditing wants the path.
4777 */
4778 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
4779 UIO_USERSPACE, uap->path, ctx);
4780 error = namei(&nd);
4781 if (error)
4782 return (error);
4783 nameidone(&nd);
4784
4785 /*
4786 * Fetch the user-supplied time. If usrtvp is USER_ADDR_NULL, we fetch
4787 * the current time instead.
4788 */
4789 usrtvp = uap->tptr;
4790 if ((error = getutimes(usrtvp, ts)) != 0)
4791 goto out;
4792
4793 error = setutimes(ctx, nd.ni_vp, ts, usrtvp == USER_ADDR_NULL);
4794
4795 out:
4796 vnode_put(nd.ni_vp);
4797 return (error);
4798 }
4799
4800 /*
4801 * Set the access and modification times of a file.
4802 */
4803 /* ARGSUSED */
4804 int
4805 futimes(__unused proc_t p, struct futimes_args *uap, __unused int32_t *retval)
4806 {
4807 struct timespec ts[2];
4808 vnode_t vp;
4809 user_addr_t usrtvp;
4810 int error;
4811
4812 AUDIT_ARG(fd, uap->fd);
4813 usrtvp = uap->tptr;
4814 if ((error = getutimes(usrtvp, ts)) != 0)
4815 return (error);
4816 if ((error = file_vnode(uap->fd, &vp)) != 0)
4817 return (error);
4818 if((error = vnode_getwithref(vp))) {
4819 file_drop(uap->fd);
4820 return(error);
4821 }
4822
4823 error = setutimes(vfs_context_current(), vp, ts, usrtvp == 0);
4824 vnode_put(vp);
4825 file_drop(uap->fd);
4826 return(error);
4827 }
4828
4829 /*
4830 * Truncate a file given its path name.
4831 */
4832 /* ARGSUSED */
4833 int
4834 truncate(__unused proc_t p, struct truncate_args *uap, __unused int32_t *retval)
4835 {
4836 vnode_t vp;
4837 struct vnode_attr va;
4838 vfs_context_t ctx = vfs_context_current();
4839 int error;
4840 struct nameidata nd;
4841 kauth_action_t action;
4842
4843 if (uap->length < 0)
4844 return(EINVAL);
4845 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
4846 UIO_USERSPACE, uap->path, ctx);
4847 if ((error = namei(&nd)))
4848 return (error);
4849 vp = nd.ni_vp;
4850
4851 nameidone(&nd);
4852
4853 VATTR_INIT(&va);
4854 VATTR_SET(&va, va_data_size, uap->length);
4855
4856 #if CONFIG_MACF
4857 error = mac_vnode_check_truncate(ctx, NOCRED, vp);
4858 if (error)
4859 goto out;
4860 #endif
4861
4862 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
4863 goto out;
4864 if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
4865 goto out;
4866 error = vnode_setattr(vp, &va, ctx);
4867 out:
4868 vnode_put(vp);
4869 return (error);
4870 }
4871
4872 /*
4873 * Truncate a file given a file descriptor.
4874 */
4875 /* ARGSUSED */
4876 int
4877 ftruncate(proc_t p, struct ftruncate_args *uap, int32_t *retval)
4878 {
4879 vfs_context_t ctx = vfs_context_current();
4880 struct vnode_attr va;
4881 vnode_t vp;
4882 struct fileproc *fp;
4883 int error ;
4884 int fd = uap->fd;
4885
4886 AUDIT_ARG(fd, uap->fd);
4887 if (uap->length < 0)
4888 return(EINVAL);
4889
4890 if ( (error = fp_lookup(p,fd,&fp,0)) ) {
4891 return(error);
4892 }
4893
4894 if (fp->f_fglob->fg_type == DTYPE_PSXSHM) {
4895 error = pshm_truncate(p, fp, uap->fd, uap->length, retval);
4896 goto out;
4897 }
4898 if (fp->f_fglob->fg_type != DTYPE_VNODE) {
4899 error = EINVAL;
4900 goto out;
4901 }
4902
4903 vp = (vnode_t)fp->f_fglob->fg_data;
4904
4905 if ((fp->f_fglob->fg_flag & FWRITE) == 0) {
4906 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
4907 error = EINVAL;
4908 goto out;
4909 }
4910
4911 if ((error = vnode_getwithref(vp)) != 0) {
4912 goto out;
4913 }
4914
4915 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
4916
4917 #if CONFIG_MACF
4918 error = mac_vnode_check_truncate(ctx,
4919 fp->f_fglob->fg_cred, vp);
4920 if (error) {
4921 (void)vnode_put(vp);
4922 goto out;
4923 }
4924 #endif
4925 VATTR_INIT(&va);
4926 VATTR_SET(&va, va_data_size, uap->length);
4927 error = vnode_setattr(vp, &va, ctx);
4928 (void)vnode_put(vp);
4929 out:
4930 file_drop(fd);
4931 return (error);
4932 }
4933
4934
4935 /*
4936 * Sync an open file with synchronized I/O _file_ integrity completion
4937 */
4938 /* ARGSUSED */
4939 int
4940 fsync(proc_t p, struct fsync_args *uap, __unused int32_t *retval)
4941 {
4942 __pthread_testcancel(1);
4943 return(fsync_common(p, uap, MNT_WAIT));
4944 }
4945
4946
4947 /*
4948 * Sync an open file with synchronized I/O _file_ integrity completion
4949 *
4950 * Notes: This is a legacy support function that does not test for
4951 * thread cancellation points.
4952 */
4953 /* ARGSUSED */
4954 int
4955 fsync_nocancel(proc_t p, struct fsync_nocancel_args *uap, __unused int32_t *retval)
4956 {
4957 return(fsync_common(p, (struct fsync_args *)uap, MNT_WAIT));
4958 }
4959
4960
4961 /*
4962 * Sync an open file with synchronized I/O _data_ integrity completion
4963 */
4964 /* ARGSUSED */
4965 int
4966 fdatasync(proc_t p, struct fdatasync_args *uap, __unused int32_t *retval)
4967 {
4968 __pthread_testcancel(1);
4969 return(fsync_common(p, (struct fsync_args *)uap, MNT_DWAIT));
4970 }
4971
4972
4973 /*
4974 * fsync_common
4975 *
4976 * Common fsync code to support both synchronized I/O file integrity completion
4977 * (normal fsync) and synchronized I/O data integrity completion (fdatasync).
4978 *
4979 * If 'flags' is MNT_DWAIT, the caller is requesting data integrity, which
4980 * will only guarantee that the file data contents are retrievable. If
4981 * 'flags' is MNT_WAIT, the caller is rewuesting file integrity, which also
4982 * includes additional metadata unnecessary for retrieving the file data
4983 * contents, such as atime, mtime, ctime, etc., also be committed to stable
4984 * storage.
4985 *
4986 * Parameters: p The process
4987 * uap->fd The descriptor to synchronize
4988 * flags The data integrity flags
4989 *
4990 * Returns: int Success
4991 * fp_getfvp:EBADF Bad file descriptor
4992 * fp_getfvp:ENOTSUP fd does not refer to a vnode
4993 * VNOP_FSYNC:??? unspecified
4994 *
4995 * Notes: We use struct fsync_args because it is a short name, and all
4996 * caller argument structures are otherwise identical.
4997 */
4998 static int
4999 fsync_common(proc_t p, struct fsync_args *uap, int flags)
5000 {
5001 vnode_t vp;
5002 struct fileproc *fp;
5003 vfs_context_t ctx = vfs_context_current();
5004 int error;
5005
5006 AUDIT_ARG(fd, uap->fd);
5007
5008 if ( (error = fp_getfvp(p, uap->fd, &fp, &vp)) )
5009 return (error);
5010 if ( (error = vnode_getwithref(vp)) ) {
5011 file_drop(uap->fd);
5012 return(error);
5013 }
5014
5015 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
5016
5017 error = VNOP_FSYNC(vp, flags, ctx);
5018
5019 #if NAMEDRSRCFORK
5020 /* Sync resource fork shadow file if necessary. */
5021 if ((error == 0) &&
5022 (vp->v_flag & VISNAMEDSTREAM) &&
5023 (vp->v_parent != NULLVP) &&
5024 vnode_isshadow(vp) &&
5025 (fp->f_flags & FP_WRITTEN)) {
5026 (void) vnode_flushnamedstream(vp->v_parent, vp, ctx);
5027 }
5028 #endif
5029
5030 (void)vnode_put(vp);
5031 file_drop(uap->fd);
5032 return (error);
5033 }
5034
5035 /*
5036 * Duplicate files. Source must be a file, target must be a file or
5037 * must not exist.
5038 *
5039 * XXX Copyfile authorisation checking is woefully inadequate, and will not
5040 * perform inheritance correctly.
5041 */
5042 /* ARGSUSED */
5043 int
5044 copyfile(__unused proc_t p, struct copyfile_args *uap, __unused int32_t *retval)
5045 {
5046 vnode_t tvp, fvp, tdvp, sdvp;
5047 struct nameidata fromnd, tond;
5048 int error;
5049 vfs_context_t ctx = vfs_context_current();
5050
5051 /* Check that the flags are valid. */
5052
5053 if (uap->flags & ~CPF_MASK) {
5054 return(EINVAL);
5055 }
5056
5057 NDINIT(&fromnd, LOOKUP, SAVESTART | AUDITVNPATH1,
5058 UIO_USERSPACE, uap->from, ctx);
5059 if ((error = namei(&fromnd)))
5060 return (error);
5061 fvp = fromnd.ni_vp;
5062
5063 NDINIT(&tond, CREATE, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNPATH2 | CN_NBMOUNTLOOK,
5064 UIO_USERSPACE, uap->to, ctx);
5065 if ((error = namei(&tond))) {
5066 goto out1;
5067 }
5068 tdvp = tond.ni_dvp;
5069 tvp = tond.ni_vp;
5070
5071 if (tvp != NULL) {
5072 if (!(uap->flags & CPF_OVERWRITE)) {
5073 error = EEXIST;
5074 goto out;
5075 }
5076 }
5077 if (fvp->v_type == VDIR || (tvp && tvp->v_type == VDIR)) {
5078 error = EISDIR;
5079 goto out;
5080 }
5081
5082 if ((error = vnode_authorize(tdvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
5083 goto out;
5084
5085 if (fvp == tdvp)
5086 error = EINVAL;
5087 /*
5088 * If source is the same as the destination (that is the
5089 * same inode number) then there is nothing to do.
5090 * (fixed to have POSIX semantics - CSM 3/2/98)
5091 */
5092 if (fvp == tvp)
5093 error = -1;
5094 if (!error)
5095 error = VNOP_COPYFILE(fvp, tdvp, tvp, &tond.ni_cnd, uap->mode, uap->flags, ctx);
5096 out:
5097 sdvp = tond.ni_startdir;
5098 /*
5099 * nameidone has to happen before we vnode_put(tdvp)
5100 * since it may need to release the fs_nodelock on the tdvp
5101 */
5102 nameidone(&tond);
5103
5104 if (tvp)
5105 vnode_put(tvp);
5106 vnode_put(tdvp);
5107 vnode_put(sdvp);
5108 out1:
5109 vnode_put(fvp);
5110
5111 if (fromnd.ni_startdir)
5112 vnode_put(fromnd.ni_startdir);
5113 nameidone(&fromnd);
5114
5115 if (error == -1)
5116 return (0);
5117 return (error);
5118 }
5119
5120
5121 /*
5122 * Rename files. Source and destination must either both be directories,
5123 * or both not be directories. If target is a directory, it must be empty.
5124 */
5125 /* ARGSUSED */
5126 int
5127 rename(__unused proc_t p, struct rename_args *uap, __unused int32_t *retval)
5128 {
5129 vnode_t tvp, tdvp;
5130 vnode_t fvp, fdvp;
5131 struct nameidata fromnd, tond;
5132 vfs_context_t ctx = vfs_context_current();
5133 int error;
5134 int do_retry;
5135 int mntrename;
5136 int need_event;
5137 const char *oname;
5138 char *from_name = NULL, *to_name = NULL;
5139 int from_len=0, to_len=0;
5140 int holding_mntlock;
5141 mount_t locked_mp = NULL;
5142 vnode_t oparent;
5143 #if CONFIG_FSE
5144 fse_info from_finfo, to_finfo;
5145 #endif
5146 int from_truncated=0, to_truncated;
5147
5148 holding_mntlock = 0;
5149 do_retry = 0;
5150 retry:
5151 fvp = tvp = NULL;
5152 fdvp = tdvp = NULL;
5153 mntrename = FALSE;
5154
5155 NDINIT(&fromnd, DELETE, WANTPARENT | AUDITVNPATH1, UIO_USERSPACE, uap->from, ctx);
5156
5157 if ( (error = namei(&fromnd)) )
5158 goto out1;
5159 fdvp = fromnd.ni_dvp;
5160 fvp = fromnd.ni_vp;
5161
5162 #if CONFIG_MACF
5163 error = mac_vnode_check_rename_from(ctx, fdvp, fvp, &fromnd.ni_cnd);
5164 if (error)
5165 goto out1;
5166 #endif
5167
5168 NDINIT(&tond, RENAME, WANTPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK , UIO_USERSPACE, uap->to, ctx);
5169 if (fvp->v_type == VDIR)
5170 tond.ni_cnd.cn_flags |= WILLBEDIR;
5171
5172 if ( (error = namei(&tond)) ) {
5173 /*
5174 * Translate error code for rename("dir1", "dir2/.").
5175 */
5176 if (error == EISDIR && fvp->v_type == VDIR)
5177 error = EINVAL;
5178 goto out1;
5179 }
5180 tdvp = tond.ni_dvp;
5181 tvp = tond.ni_vp;
5182
5183 #if CONFIG_MACF
5184 error = mac_vnode_check_rename_to(ctx,
5185 tdvp, tvp, fdvp == tdvp, &tond.ni_cnd);
5186 if (error)
5187 goto out1;
5188 #endif
5189
5190 if (tvp != NULL) {
5191 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
5192 error = ENOTDIR;
5193 goto out1;
5194 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
5195 error = EISDIR;
5196 goto out1;
5197 }
5198 }
5199 if (fvp == tdvp) {
5200 error = EINVAL;
5201 goto out1;
5202 }
5203 /*
5204 * If the source and destination are the same (i.e. they're
5205 * links to the same vnode) and the target file system is
5206 * case sensitive, then there is nothing to do.
5207 */
5208 if (fvp == tvp) {
5209 int pathconf_val;
5210
5211 /*
5212 * Note: if _PC_CASE_SENSITIVE selector isn't supported,
5213 * then assume that this file system is case sensitive.
5214 */
5215 if (VNOP_PATHCONF(fvp, _PC_CASE_SENSITIVE, &pathconf_val, ctx) != 0 ||
5216 pathconf_val != 0) {
5217 goto out1;
5218 }
5219 }
5220
5221 /*
5222 * Authorization.
5223 *
5224 * If tvp is a directory and not the same as fdvp, or tdvp is not
5225 * the same as fdvp, the node is moving between directories and we
5226 * need rights to remove from the old and add to the new.
5227 *
5228 * If tvp already exists and is not a directory, we need to be
5229 * allowed to delete it.
5230 *
5231 * Note that we do not inherit when renaming.
5232 *
5233 * XXX This needs to be revisited to implement the deferred-inherit bit
5234 */
5235 {
5236 int moving = 0;
5237
5238 error = 0;
5239 if ((tvp != NULL) && vnode_isdir(tvp)) {
5240 if (tvp != fdvp)
5241 moving = 1;
5242 } else if (tdvp != fdvp) {
5243 moving = 1;
5244 }
5245 /*
5246 * must have delete rights to remove the old name even in
5247 * the simple case of fdvp == tdvp.
5248 *
5249 * If fvp is a directory, and we are changing it's parent,
5250 * then we also need rights to rewrite its ".." entry as well.
5251 */
5252 if (vnode_isdir(fvp)) {
5253 if ((error = vnode_authorize(fvp, fdvp, KAUTH_VNODE_DELETE | KAUTH_VNODE_ADD_SUBDIRECTORY, ctx)) != 0)
5254 goto auth_exit;
5255 } else {
5256 if ((error = vnode_authorize(fvp, fdvp, KAUTH_VNODE_DELETE, ctx)) != 0)
5257 goto auth_exit;
5258 }
5259 if (moving) {
5260 /* moving into tdvp or tvp, must have rights to add */
5261 if ((error = vnode_authorize(((tvp != NULL) && vnode_isdir(tvp)) ? tvp : tdvp,
5262 NULL,
5263 vnode_isdir(fvp) ? KAUTH_VNODE_ADD_SUBDIRECTORY : KAUTH_VNODE_ADD_FILE,
5264 ctx)) != 0) {
5265 /*
5266 * We could encounter a race where after doing the namei, tvp stops
5267 * being valid. If so, simply re-drive the rename call from the
5268 * top.
5269 */
5270 if (error == ENOENT) {
5271 do_retry = 1;
5272 }
5273 goto auth_exit;
5274 }
5275 } else {
5276 /* node staying in same directory, must be allowed to add new name */
5277 if ((error = vnode_authorize(fdvp, NULL,
5278 vnode_isdir(fvp) ? KAUTH_VNODE_ADD_SUBDIRECTORY : KAUTH_VNODE_ADD_FILE, ctx)) != 0)
5279 goto auth_exit;
5280 }
5281 /* overwriting tvp */
5282 if ((tvp != NULL) && !vnode_isdir(tvp) &&
5283 ((error = vnode_authorize(tvp, tdvp, KAUTH_VNODE_DELETE, ctx)) != 0)) {
5284 /*
5285 * We could encounter a race where after doing the namei, tvp stops
5286 * being valid. If so, simply re-drive the rename call from the
5287 * top.
5288 */
5289 if (error == ENOENT) {
5290 do_retry = 1;
5291 }
5292 goto auth_exit;
5293 }
5294
5295 /* XXX more checks? */
5296
5297 auth_exit:
5298 /* authorization denied */
5299 if (error != 0)
5300 goto out1;
5301 }
5302 /*
5303 * Allow the renaming of mount points.
5304 * - target must not exist
5305 * - target must reside in the same directory as source
5306 * - union mounts cannot be renamed
5307 * - "/" cannot be renamed
5308 */
5309 if ((fvp->v_flag & VROOT) &&
5310 (fvp->v_type == VDIR) &&
5311 (tvp == NULL) &&
5312 (fvp->v_mountedhere == NULL) &&
5313 (fdvp == tdvp) &&
5314 ((fvp->v_mount->mnt_flag & (MNT_UNION | MNT_ROOTFS)) == 0) &&
5315 (fvp->v_mount->mnt_vnodecovered != NULLVP)) {
5316 vnode_t coveredvp;
5317
5318 /* switch fvp to the covered vnode */
5319 coveredvp = fvp->v_mount->mnt_vnodecovered;
5320 if ( (vnode_getwithref(coveredvp)) ) {
5321 error = ENOENT;
5322 goto out1;
5323 }
5324 vnode_put(fvp);
5325
5326 fvp = coveredvp;
5327 mntrename = TRUE;
5328 }
5329 /*
5330 * Check for cross-device rename.
5331 */
5332 if ((fvp->v_mount != tdvp->v_mount) ||
5333 (tvp && (fvp->v_mount != tvp->v_mount))) {
5334 error = EXDEV;
5335 goto out1;
5336 }
5337 /*
5338 * Avoid renaming "." and "..".
5339 */
5340 if (fvp->v_type == VDIR &&
5341 ((fdvp == fvp) ||
5342 (fromnd.ni_cnd.cn_namelen == 1 && fromnd.ni_cnd.cn_nameptr[0] == '.') ||
5343 ((fromnd.ni_cnd.cn_flags | tond.ni_cnd.cn_flags) & ISDOTDOT)) ) {
5344 error = EINVAL;
5345 goto out1;
5346 }
5347 /*
5348 * The following edge case is caught here:
5349 * (to cannot be a descendent of from)
5350 *
5351 * o fdvp
5352 * /
5353 * /
5354 * o fvp
5355 * \
5356 * \
5357 * o tdvp
5358 * /
5359 * /
5360 * o tvp
5361 */
5362 if (tdvp->v_parent == fvp) {
5363 error = EINVAL;
5364 goto out1;
5365 }
5366
5367 /*
5368 * If source is the same as the destination (that is the
5369 * same inode number) then there is nothing to do...
5370 * EXCEPT if the underlying file system supports case
5371 * insensitivity and is case preserving. In this case
5372 * the file system needs to handle the special case of
5373 * getting the same vnode as target (fvp) and source (tvp).
5374 *
5375 * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
5376 * and _PC_CASE_PRESERVING can have this exception, and they need to
5377 * handle the special case of getting the same vnode as target and
5378 * source. NOTE: Then the target is unlocked going into vnop_rename,
5379 * so not to cause locking problems. There is a single reference on tvp.
5380 *
5381 * NOTE - that fvp == tvp also occurs if they are hard linked and
5382 * that correct behaviour then is just to return success without doing
5383 * anything.
5384 */
5385 if (fvp == tvp && fdvp == tdvp) {
5386 if (fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
5387 !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
5388 fromnd.ni_cnd.cn_namelen)) {
5389 goto out1;
5390 }
5391 }
5392
5393 if (holding_mntlock && fvp->v_mount != locked_mp) {
5394 /*
5395 * we're holding a reference and lock
5396 * on locked_mp, but it no longer matches
5397 * what we want to do... so drop our hold
5398 */
5399 mount_unlock_renames(locked_mp);
5400 mount_drop(locked_mp, 0);
5401 holding_mntlock = 0;
5402 }
5403 if (tdvp != fdvp && fvp->v_type == VDIR) {
5404 /*
5405 * serialize renames that re-shape
5406 * the tree... if holding_mntlock is
5407 * set, then we're ready to go...
5408 * otherwise we
5409 * first need to drop the iocounts
5410 * we picked up, second take the
5411 * lock to serialize the access,
5412 * then finally start the lookup
5413 * process over with the lock held
5414 */
5415 if (!holding_mntlock) {
5416 /*
5417 * need to grab a reference on
5418 * the mount point before we
5419 * drop all the iocounts... once
5420 * the iocounts are gone, the mount
5421 * could follow
5422 */
5423 locked_mp = fvp->v_mount;
5424 mount_ref(locked_mp, 0);
5425
5426 /*
5427 * nameidone has to happen before we vnode_put(tvp)
5428 * since it may need to release the fs_nodelock on the tvp
5429 */
5430 nameidone(&tond);
5431
5432 if (tvp)
5433 vnode_put(tvp);
5434 vnode_put(tdvp);
5435
5436 /*
5437 * nameidone has to happen before we vnode_put(fdvp)
5438 * since it may need to release the fs_nodelock on the fvp
5439 */
5440 nameidone(&fromnd);
5441
5442 vnode_put(fvp);
5443 vnode_put(fdvp);
5444
5445 mount_lock_renames(locked_mp);
5446 holding_mntlock = 1;
5447
5448 goto retry;
5449 }
5450 } else {
5451 /*
5452 * when we dropped the iocounts to take
5453 * the lock, we allowed the identity of
5454 * the various vnodes to change... if they did,
5455 * we may no longer be dealing with a rename
5456 * that reshapes the tree... once we're holding
5457 * the iocounts, the vnodes can't change type
5458 * so we're free to drop the lock at this point
5459 * and continue on
5460 */
5461 if (holding_mntlock) {
5462 mount_unlock_renames(locked_mp);
5463 mount_drop(locked_mp, 0);
5464 holding_mntlock = 0;
5465 }
5466 }
5467 // save these off so we can later verify that fvp is the same
5468 oname = fvp->v_name;
5469 oparent = fvp->v_parent;
5470
5471 #if CONFIG_FSE
5472 need_event = need_fsevent(FSE_RENAME, fvp);
5473 if (need_event) {
5474 get_fse_info(fvp, &from_finfo, ctx);
5475
5476 if (tvp) {
5477 get_fse_info(tvp, &to_finfo, ctx);
5478 }
5479 }
5480 #else
5481 need_event = 0;
5482 #endif /* CONFIG_FSE */
5483
5484 if (need_event || kauth_authorize_fileop_has_listeners()) {
5485 GET_PATH(from_name);
5486 if (from_name == NULL) {
5487 error = ENOMEM;
5488 goto out1;
5489 }
5490
5491 from_len = safe_getpath(fdvp, fromnd.ni_cnd.cn_nameptr, from_name, MAXPATHLEN, &from_truncated);
5492
5493 GET_PATH(to_name);
5494 if (to_name == NULL) {
5495 error = ENOMEM;
5496 goto out1;
5497 }
5498
5499 to_len = safe_getpath(tdvp, tond.ni_cnd.cn_nameptr, to_name, MAXPATHLEN, &to_truncated);
5500 }
5501
5502 error = VNOP_RENAME(fdvp, fvp, &fromnd.ni_cnd,
5503 tdvp, tvp, &tond.ni_cnd,
5504 ctx);
5505
5506 if (holding_mntlock) {
5507 /*
5508 * we can drop our serialization
5509 * lock now
5510 */
5511 mount_unlock_renames(locked_mp);
5512 mount_drop(locked_mp, 0);
5513 holding_mntlock = 0;
5514 }
5515 if (error) {
5516 /*
5517 * We may encounter a race in the VNOP where the destination didn't
5518 * exist when we did the namei, but it does by the time we go and
5519 * try to create the entry. In this case, we should re-drive this rename
5520 * call from the top again. Currently, only HFS bubbles out ERECYCLE,
5521 * but other filesystems susceptible to this race could return it, too.
5522 */
5523 if (error == ERECYCLE) {
5524 do_retry = 1;
5525 }
5526
5527 goto out1;
5528 }
5529
5530 /* call out to allow 3rd party notification of rename.
5531 * Ignore result of kauth_authorize_fileop call.
5532 */
5533 kauth_authorize_fileop(vfs_context_ucred(ctx),
5534 KAUTH_FILEOP_RENAME,
5535 (uintptr_t)from_name, (uintptr_t)to_name);
5536
5537 #if CONFIG_FSE
5538 if (from_name != NULL && to_name != NULL) {
5539 if (from_truncated || to_truncated) {
5540 // set it here since only the from_finfo gets reported up to user space
5541 from_finfo.mode |= FSE_TRUNCATED_PATH;
5542 }
5543 if (tvp) {
5544 add_fsevent(FSE_RENAME, ctx,
5545 FSE_ARG_STRING, from_len, from_name,
5546 FSE_ARG_FINFO, &from_finfo,
5547 FSE_ARG_STRING, to_len, to_name,
5548 FSE_ARG_FINFO, &to_finfo,
5549 FSE_ARG_DONE);
5550 } else {
5551 add_fsevent(FSE_RENAME, ctx,
5552 FSE_ARG_STRING, from_len, from_name,
5553 FSE_ARG_FINFO, &from_finfo,
5554 FSE_ARG_STRING, to_len, to_name,
5555 FSE_ARG_DONE);
5556 }
5557 }
5558 #endif /* CONFIG_FSE */
5559
5560 /*
5561 * update filesystem's mount point data
5562 */
5563 if (mntrename) {
5564 char *cp, *pathend, *mpname;
5565 char * tobuf;
5566 struct mount *mp;
5567 int maxlen;
5568 size_t len = 0;
5569
5570 mp = fvp->v_mountedhere;
5571
5572 if (vfs_busy(mp, LK_NOWAIT)) {
5573 error = EBUSY;
5574 goto out1;
5575 }
5576 MALLOC_ZONE(tobuf, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
5577
5578 error = copyinstr(uap->to, tobuf, MAXPATHLEN, &len);
5579 if (!error) {
5580 /* find current mount point prefix */
5581 pathend = &mp->mnt_vfsstat.f_mntonname[0];
5582 for (cp = pathend; *cp != '\0'; ++cp) {
5583 if (*cp == '/')
5584 pathend = cp + 1;
5585 }
5586 /* find last component of target name */
5587 for (mpname = cp = tobuf; *cp != '\0'; ++cp) {
5588 if (*cp == '/')
5589 mpname = cp + 1;
5590 }
5591 /* append name to prefix */
5592 maxlen = MAXPATHLEN - (pathend - mp->mnt_vfsstat.f_mntonname);
5593 bzero(pathend, maxlen);
5594 strlcpy(pathend, mpname, maxlen);
5595 }
5596 FREE_ZONE(tobuf, MAXPATHLEN, M_NAMEI);
5597
5598 vfs_unbusy(mp);
5599 }
5600 /*
5601 * fix up name & parent pointers. note that we first
5602 * check that fvp has the same name/parent pointers it
5603 * had before the rename call... this is a 'weak' check
5604 * at best...
5605 */
5606 if (oname == fvp->v_name && oparent == fvp->v_parent) {
5607 int update_flags;
5608
5609 update_flags = VNODE_UPDATE_NAME;
5610
5611 if (fdvp != tdvp)
5612 update_flags |= VNODE_UPDATE_PARENT;
5613
5614 vnode_update_identity(fvp, tdvp, tond.ni_cnd.cn_nameptr, tond.ni_cnd.cn_namelen, tond.ni_cnd.cn_hash, update_flags);
5615 }
5616 out1:
5617 if (to_name != NULL) {
5618 RELEASE_PATH(to_name);
5619 to_name = NULL;
5620 }
5621 if (from_name != NULL) {
5622 RELEASE_PATH(from_name);
5623 from_name = NULL;
5624 }
5625 if (holding_mntlock) {
5626 mount_unlock_renames(locked_mp);
5627 mount_drop(locked_mp, 0);
5628 holding_mntlock = 0;
5629 }
5630 if (tdvp) {
5631 /*
5632 * nameidone has to happen before we vnode_put(tdvp)
5633 * since it may need to release the fs_nodelock on the tdvp
5634 */
5635 nameidone(&tond);
5636
5637 if (tvp)
5638 vnode_put(tvp);
5639 vnode_put(tdvp);
5640 }
5641 if (fdvp) {
5642 /*
5643 * nameidone has to happen before we vnode_put(fdvp)
5644 * since it may need to release the fs_nodelock on the fdvp
5645 */
5646 nameidone(&fromnd);
5647
5648 if (fvp)
5649 vnode_put(fvp);
5650 vnode_put(fdvp);
5651 }
5652
5653 /*
5654 * If things changed after we did the namei, then we will re-drive
5655 * this rename call from the top.
5656 */
5657 if(do_retry) {
5658 do_retry = 0;
5659 goto retry;
5660 }
5661
5662 return (error);
5663 }
5664
5665 /*
5666 * Make a directory file.
5667 *
5668 * Returns: 0 Success
5669 * EEXIST
5670 * namei:???
5671 * vnode_authorize:???
5672 * vn_create:???
5673 */
5674 /* ARGSUSED */
5675 static int
5676 mkdir1(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap)
5677 {
5678 vnode_t vp, dvp;
5679 int error;
5680 int update_flags = 0;
5681 struct nameidata nd;
5682
5683 AUDIT_ARG(mode, vap->va_mode);
5684 NDINIT(&nd, CREATE, LOCKPARENT | AUDITVNPATH1,
5685 UIO_USERSPACE, path, ctx);
5686 nd.ni_cnd.cn_flags |= WILLBEDIR;
5687 error = namei(&nd);
5688 if (error)
5689 return (error);
5690 dvp = nd.ni_dvp;
5691 vp = nd.ni_vp;
5692
5693 if (vp != NULL) {
5694 error = EEXIST;
5695 goto out;
5696 }
5697
5698 VATTR_SET(vap, va_type, VDIR);
5699
5700 #if CONFIG_MACF
5701 error = mac_vnode_check_create(ctx,
5702 nd.ni_dvp, &nd.ni_cnd, vap);
5703 if (error)
5704 goto out;
5705 #endif
5706
5707 /* authorize addition of a directory to the parent */
5708 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_SUBDIRECTORY, ctx)) != 0)
5709 goto out;
5710
5711
5712 /* make the directory */
5713 if ((error = vn_create(dvp, &vp, &nd.ni_cnd, vap, 0, ctx)) != 0)
5714 goto out;
5715
5716 // Make sure the name & parent pointers are hooked up
5717 if (vp->v_name == NULL)
5718 update_flags |= VNODE_UPDATE_NAME;
5719 if (vp->v_parent == NULLVP)
5720 update_flags |= VNODE_UPDATE_PARENT;
5721
5722 if (update_flags)
5723 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
5724
5725 #if CONFIG_FSE
5726 add_fsevent(FSE_CREATE_DIR, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE);
5727 #endif
5728
5729 out:
5730 /*
5731 * nameidone has to happen before we vnode_put(dvp)
5732 * since it may need to release the fs_nodelock on the dvp
5733 */
5734 nameidone(&nd);
5735
5736 if (vp)
5737 vnode_put(vp);
5738 vnode_put(dvp);
5739
5740 return (error);
5741 }
5742
5743 /*
5744 * mkdir_extended: Create a directory; with extended security (ACL).
5745 *
5746 * Parameters: p Process requesting to create the directory
5747 * uap User argument descriptor (see below)
5748 * retval (ignored)
5749 *
5750 * Indirect: uap->path Path of directory to create
5751 * uap->mode Access permissions to set
5752 * uap->xsecurity ACL to set
5753 *
5754 * Returns: 0 Success
5755 * !0 Not success
5756 *
5757 */
5758 int
5759 mkdir_extended(proc_t p, struct mkdir_extended_args *uap, __unused int32_t *retval)
5760 {
5761 int ciferror;
5762 kauth_filesec_t xsecdst;
5763 struct vnode_attr va;
5764
5765 AUDIT_ARG(owner, uap->uid, uap->gid);
5766
5767 xsecdst = NULL;
5768 if ((uap->xsecurity != USER_ADDR_NULL) &&
5769 ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
5770 return ciferror;
5771
5772 VATTR_INIT(&va);
5773 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
5774 if (xsecdst != NULL)
5775 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
5776
5777 ciferror = mkdir1(vfs_context_current(), uap->path, &va);
5778 if (xsecdst != NULL)
5779 kauth_filesec_free(xsecdst);
5780 return ciferror;
5781 }
5782
5783 int
5784 mkdir(proc_t p, struct mkdir_args *uap, __unused int32_t *retval)
5785 {
5786 struct vnode_attr va;
5787
5788 VATTR_INIT(&va);
5789 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
5790
5791 return(mkdir1(vfs_context_current(), uap->path, &va));
5792 }
5793
5794 /*
5795 * Remove a directory file.
5796 */
5797 /* ARGSUSED */
5798 int
5799 rmdir(__unused proc_t p, struct rmdir_args *uap, __unused int32_t *retval)
5800 {
5801 vnode_t vp, dvp;
5802 int error;
5803 struct nameidata nd;
5804 vfs_context_t ctx = vfs_context_current();
5805
5806 int restart_flag;
5807 uint32_t oldvp_id = UINT32_MAX;
5808
5809 /*
5810 * This loop exists to restart rmdir in the unlikely case that two
5811 * processes are simultaneously trying to remove the same directory
5812 * containing orphaned appleDouble files.
5813 */
5814 do {
5815 restart_flag = 0;
5816
5817 NDINIT(&nd, DELETE, LOCKPARENT | AUDITVNPATH1,
5818 UIO_USERSPACE, uap->path, ctx);
5819 error = namei(&nd);
5820 if (error)
5821 return (error);
5822
5823 dvp = nd.ni_dvp;
5824 vp = nd.ni_vp;
5825
5826
5827 /*
5828 * If being restarted check if the new vp
5829 * still has the same v_id.
5830 */
5831 if (oldvp_id != UINT32_MAX && oldvp_id != vp->v_id) {
5832 error = ENOENT;
5833 goto out;
5834 }
5835
5836 if (vp->v_type != VDIR) {
5837 /*
5838 * rmdir only deals with directories
5839 */
5840 error = ENOTDIR;
5841 } else if (dvp == vp) {
5842 /*
5843 * No rmdir "." please.
5844 */
5845 error = EINVAL;
5846 } else if (vp->v_flag & VROOT) {
5847 /*
5848 * The root of a mounted filesystem cannot be deleted.
5849 */
5850 error = EBUSY;
5851 } else {
5852 #if CONFIG_MACF
5853 error = mac_vnode_check_unlink(ctx, dvp,
5854 vp, &nd.ni_cnd);
5855 if (!error)
5856 #endif
5857 error = vnode_authorize(vp, nd.ni_dvp, KAUTH_VNODE_DELETE, ctx);
5858 }
5859 if (!error) {
5860 char *path = NULL;
5861 int len=0;
5862 int has_listeners = 0;
5863 int need_event = 0;
5864 int truncated = 0;
5865 #if CONFIG_FSE
5866 fse_info finfo;
5867
5868 need_event = need_fsevent(FSE_DELETE, dvp);
5869 if (need_event) {
5870 get_fse_info(vp, &finfo, ctx);
5871 }
5872 #endif
5873 has_listeners = kauth_authorize_fileop_has_listeners();
5874 if (need_event || has_listeners) {
5875 GET_PATH(path);
5876 if (path == NULL) {
5877 error = ENOMEM;
5878 goto out;
5879 }
5880
5881 len = safe_getpath(vp, NULL, path, MAXPATHLEN, &truncated);
5882 #if CONFIG_FSE
5883 if (truncated) {
5884 finfo.mode |= FSE_TRUNCATED_PATH;
5885 }
5886 #endif
5887 }
5888
5889 error = VNOP_RMDIR(dvp, vp, &nd.ni_cnd, ctx);
5890
5891 /*
5892 * Special case to remove orphaned AppleDouble
5893 * files. I don't like putting this in the kernel,
5894 * but carbon does not like putting this in carbon either,
5895 * so here we are.
5896 */
5897 if (error == ENOTEMPTY) {
5898 error = rmdir_remove_orphaned_appleDouble(vp, ctx, &restart_flag);
5899 if (error == EBUSY) {
5900 oldvp_id = vp->v_id;
5901 goto out;
5902 }
5903
5904
5905 /*
5906 * Assuming everything went well, we will try the RMDIR again
5907 */
5908 if (!error)
5909 error = VNOP_RMDIR(dvp, vp, &nd.ni_cnd, ctx);
5910 }
5911
5912 /*
5913 * Call out to allow 3rd party notification of delete.
5914 * Ignore result of kauth_authorize_fileop call.
5915 */
5916 if (!error) {
5917 if (has_listeners) {
5918 kauth_authorize_fileop(vfs_context_ucred(ctx),
5919 KAUTH_FILEOP_DELETE,
5920 (uintptr_t)vp,
5921 (uintptr_t)path);
5922 }
5923
5924 if (vp->v_flag & VISHARDLINK) {
5925 // see the comment in unlink1() about why we update
5926 // the parent of a hard link when it is removed
5927 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
5928 }
5929
5930 #if CONFIG_FSE
5931 if (need_event) {
5932 add_fsevent(FSE_DELETE, ctx,
5933 FSE_ARG_STRING, len, path,
5934 FSE_ARG_FINFO, &finfo,
5935 FSE_ARG_DONE);
5936 }
5937 #endif
5938 }
5939 if (path != NULL)
5940 RELEASE_PATH(path);
5941 }
5942
5943 out:
5944 /*
5945 * nameidone has to happen before we vnode_put(dvp)
5946 * since it may need to release the fs_nodelock on the dvp
5947 */
5948 nameidone(&nd);
5949
5950 vnode_put(dvp);
5951 vnode_put(vp);
5952
5953 if (restart_flag == 0) {
5954 wakeup_one((caddr_t)vp);
5955 return (error);
5956 }
5957 tsleep(vp, PVFS, "rm AD", 1);
5958
5959 } while (restart_flag != 0);
5960
5961 return (error);
5962
5963 }
5964
5965 /* Get direntry length padded to 8 byte alignment */
5966 #define DIRENT64_LEN(namlen) \
5967 ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
5968
5969 static errno_t
5970 vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag,
5971 int *numdirent, vfs_context_t ctxp)
5972 {
5973 /* Check if fs natively supports VNODE_READDIR_EXTENDED */
5974 if (vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED) {
5975 return VNOP_READDIR(vp, uio, flags, eofflag, numdirent, ctxp);
5976 } else {
5977 size_t bufsize;
5978 void * bufptr;
5979 uio_t auio;
5980 struct direntry entry64;
5981 struct dirent *dep;
5982 int bytesread;
5983 int error;
5984
5985 /*
5986 * Our kernel buffer needs to be smaller since re-packing
5987 * will expand each dirent. The worse case (when the name
5988 * length is 3) corresponds to a struct direntry size of 32
5989 * bytes (8-byte aligned) and a struct dirent size of 12 bytes
5990 * (4-byte aligned). So having a buffer that is 3/8 the size
5991 * will prevent us from reading more than we can pack.
5992 *
5993 * Since this buffer is wired memory, we will limit the
5994 * buffer size to a maximum of 32K. We would really like to
5995 * use 32K in the MIN(), but we use magic number 87371 to
5996 * prevent uio_resid() * 3 / 8 from overflowing.
5997 */
5998 bufsize = 3 * MIN(uio_resid(uio), 87371) / 8;
5999 MALLOC(bufptr, void *, bufsize, M_TEMP, M_WAITOK);
6000 if (bufptr == NULL) {
6001 return ENOMEM;
6002 }
6003
6004 auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
6005 uio_addiov(auio, (uintptr_t)bufptr, bufsize);
6006 auio->uio_offset = uio->uio_offset;
6007
6008 error = VNOP_READDIR(vp, auio, 0, eofflag, numdirent, ctxp);
6009
6010 dep = (struct dirent *)bufptr;
6011 bytesread = bufsize - uio_resid(auio);
6012
6013 /*
6014 * Convert all the entries and copy them out to user's buffer.
6015 */
6016 while (error == 0 && (char *)dep < ((char *)bufptr + bytesread)) {
6017 /* Convert a dirent to a dirent64. */
6018 entry64.d_ino = dep->d_ino;
6019 entry64.d_seekoff = 0;
6020 entry64.d_reclen = DIRENT64_LEN(dep->d_namlen);
6021 entry64.d_namlen = dep->d_namlen;
6022 entry64.d_type = dep->d_type;
6023 bcopy(dep->d_name, entry64.d_name, dep->d_namlen + 1);
6024
6025 /* Move to next entry. */
6026 dep = (struct dirent *)((char *)dep + dep->d_reclen);
6027
6028 /* Copy entry64 to user's buffer. */
6029 error = uiomove((caddr_t)&entry64, entry64.d_reclen, uio);
6030 }
6031
6032 /* Update the real offset using the offset we got from VNOP_READDIR. */
6033 if (error == 0) {
6034 uio->uio_offset = auio->uio_offset;
6035 }
6036 uio_free(auio);
6037 FREE(bufptr, M_TEMP);
6038 return (error);
6039 }
6040 }
6041
6042 /*
6043 * Read a block of directory entries in a file system independent format.
6044 */
6045 static int
6046 getdirentries_common(int fd, user_addr_t bufp, user_size_t bufsize, ssize_t *bytesread,
6047 off_t *offset, int flags)
6048 {
6049 vnode_t vp;
6050 struct vfs_context context = *vfs_context_current(); /* local copy */
6051 struct fileproc *fp;
6052 uio_t auio;
6053 int spacetype = proc_is64bit(vfs_context_proc(&context)) ? UIO_USERSPACE64 : UIO_USERSPACE32;
6054 off_t loff;
6055 int error, eofflag, numdirent;
6056 char uio_buf[ UIO_SIZEOF(1) ];
6057
6058 error = fp_getfvp(vfs_context_proc(&context), fd, &fp, &vp);
6059 if (error) {
6060 return (error);
6061 }
6062 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
6063 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
6064 error = EBADF;
6065 goto out;
6066 }
6067
6068 #if CONFIG_MACF
6069 error = mac_file_check_change_offset(vfs_context_ucred(&context), fp->f_fglob);
6070 if (error)
6071 goto out;
6072 #endif
6073 if ( (error = vnode_getwithref(vp)) ) {
6074 goto out;
6075 }
6076 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6077
6078 unionread:
6079 if (vp->v_type != VDIR) {
6080 (void)vnode_put(vp);
6081 error = EINVAL;
6082 goto out;
6083 }
6084
6085 #if CONFIG_MACF
6086 error = mac_vnode_check_readdir(&context, vp);
6087 if (error != 0) {
6088 (void)vnode_put(vp);
6089 goto out;
6090 }
6091 #endif /* MAC */
6092
6093 loff = fp->f_fglob->fg_offset;
6094 auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
6095 uio_addiov(auio, bufp, bufsize);
6096
6097 if (flags & VNODE_READDIR_EXTENDED) {
6098 error = vnode_readdir64(vp, auio, flags, &eofflag, &numdirent, &context);
6099 fp->f_fglob->fg_offset = uio_offset(auio);
6100 } else {
6101 error = VNOP_READDIR(vp, auio, 0, &eofflag, &numdirent, &context);
6102 fp->f_fglob->fg_offset = uio_offset(auio);
6103 }
6104 if (error) {
6105 (void)vnode_put(vp);
6106 goto out;
6107 }
6108
6109 if ((user_ssize_t)bufsize == uio_resid(auio)){
6110 if (union_dircheckp) {
6111 error = union_dircheckp(&vp, fp, &context);
6112 if (error == -1)
6113 goto unionread;
6114 if (error)
6115 goto out;
6116 }
6117
6118 if ((vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_UNION)) {
6119 struct vnode *tvp = vp;
6120 vp = vp->v_mount->mnt_vnodecovered;
6121 vnode_getwithref(vp);
6122 vnode_ref(vp);
6123 fp->f_fglob->fg_data = (caddr_t) vp;
6124 fp->f_fglob->fg_offset = 0;
6125 vnode_rele(tvp);
6126 vnode_put(tvp);
6127 goto unionread;
6128 }
6129 }
6130
6131 vnode_put(vp);
6132 if (offset) {
6133 *offset = loff;
6134 }
6135
6136 *bytesread = bufsize - uio_resid(auio);
6137 out:
6138 file_drop(fd);
6139 return (error);
6140 }
6141
6142
6143 int
6144 getdirentries(__unused struct proc *p, struct getdirentries_args *uap, int32_t *retval)
6145 {
6146 off_t offset;
6147 ssize_t bytesread;
6148 int error;
6149
6150 AUDIT_ARG(fd, uap->fd);
6151 error = getdirentries_common(uap->fd, uap->buf, uap->count, &bytesread, &offset, 0);
6152
6153 if (error == 0) {
6154 if (proc_is64bit(p)) {
6155 user64_long_t base = (user64_long_t)offset;
6156 error = copyout((caddr_t)&base, uap->basep, sizeof(user64_long_t));
6157 } else {
6158 user32_long_t base = (user32_long_t)offset;
6159 error = copyout((caddr_t)&base, uap->basep, sizeof(user32_long_t));
6160 }
6161 *retval = bytesread;
6162 }
6163 return (error);
6164 }
6165
6166 int
6167 getdirentries64(__unused struct proc *p, struct getdirentries64_args *uap, user_ssize_t *retval)
6168 {
6169 off_t offset;
6170 ssize_t bytesread;
6171 int error;
6172
6173 AUDIT_ARG(fd, uap->fd);
6174 error = getdirentries_common(uap->fd, uap->buf, uap->bufsize, &bytesread, &offset, VNODE_READDIR_EXTENDED);
6175
6176 if (error == 0) {
6177 *retval = bytesread;
6178 error = copyout((caddr_t)&offset, uap->position, sizeof(off_t));
6179 }
6180 return (error);
6181 }
6182
6183
6184 /*
6185 * Set the mode mask for creation of filesystem nodes.
6186 * XXX implement xsecurity
6187 */
6188 #define UMASK_NOXSECURITY (void *)1 /* leave existing xsecurity alone */
6189 static int
6190 umask1(proc_t p, int newmask, __unused kauth_filesec_t fsec, int32_t *retval)
6191 {
6192 struct filedesc *fdp;
6193
6194 AUDIT_ARG(mask, newmask);
6195 proc_fdlock(p);
6196 fdp = p->p_fd;
6197 *retval = fdp->fd_cmask;
6198 fdp->fd_cmask = newmask & ALLPERMS;
6199 proc_fdunlock(p);
6200 return (0);
6201 }
6202
6203 /*
6204 * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL).
6205 *
6206 * Parameters: p Process requesting to set the umask
6207 * uap User argument descriptor (see below)
6208 * retval umask of the process (parameter p)
6209 *
6210 * Indirect: uap->newmask umask to set
6211 * uap->xsecurity ACL to set
6212 *
6213 * Returns: 0 Success
6214 * !0 Not success
6215 *
6216 */
6217 int
6218 umask_extended(proc_t p, struct umask_extended_args *uap, int32_t *retval)
6219 {
6220 int ciferror;
6221 kauth_filesec_t xsecdst;
6222
6223 xsecdst = KAUTH_FILESEC_NONE;
6224 if (uap->xsecurity != USER_ADDR_NULL) {
6225 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
6226 return ciferror;
6227 } else {
6228 xsecdst = KAUTH_FILESEC_NONE;
6229 }
6230
6231 ciferror = umask1(p, uap->newmask, xsecdst, retval);
6232
6233 if (xsecdst != KAUTH_FILESEC_NONE)
6234 kauth_filesec_free(xsecdst);
6235 return ciferror;
6236 }
6237
6238 int
6239 umask(proc_t p, struct umask_args *uap, int32_t *retval)
6240 {
6241 return(umask1(p, uap->newmask, UMASK_NOXSECURITY, retval));
6242 }
6243
6244 /*
6245 * Void all references to file by ripping underlying filesystem
6246 * away from vnode.
6247 */
6248 /* ARGSUSED */
6249 int
6250 revoke(proc_t p, struct revoke_args *uap, __unused int32_t *retval)
6251 {
6252 vnode_t vp;
6253 struct vnode_attr va;
6254 vfs_context_t ctx = vfs_context_current();
6255 int error;
6256 struct nameidata nd;
6257
6258 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1,
6259 UIO_USERSPACE, uap->path, ctx);
6260 error = namei(&nd);
6261 if (error)
6262 return (error);
6263 vp = nd.ni_vp;
6264
6265 nameidone(&nd);
6266
6267 if (!(vnode_ischr(vp) || vnode_isblk(vp))) {
6268 error = ENOTSUP;
6269 goto out;
6270 }
6271
6272 if (vnode_isblk(vp) && vnode_ismountedon(vp)) {
6273 error = EBUSY;
6274 goto out;
6275 }
6276
6277 #if CONFIG_MACF
6278 error = mac_vnode_check_revoke(ctx, vp);
6279 if (error)
6280 goto out;
6281 #endif
6282
6283 VATTR_INIT(&va);
6284 VATTR_WANTED(&va, va_uid);
6285 if ((error = vnode_getattr(vp, &va, ctx)))
6286 goto out;
6287 if (kauth_cred_getuid(vfs_context_ucred(ctx)) != va.va_uid &&
6288 (error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
6289 goto out;
6290 if (vp->v_usecount > 0 || (vnode_isaliased(vp)))
6291 VNOP_REVOKE(vp, REVOKEALL, ctx);
6292 out:
6293 vnode_put(vp);
6294 return (error);
6295 }
6296
6297
6298 /*
6299 * HFS/HFS PlUS SPECIFIC SYSTEM CALLS
6300 * The following system calls are designed to support features
6301 * which are specific to the HFS & HFS Plus volume formats
6302 */
6303
6304 #ifdef __APPLE_API_OBSOLETE
6305
6306 /************************************************/
6307 /* *** Following calls will be deleted soon *** */
6308 /************************************************/
6309
6310 /*
6311 * Make a complex file. A complex file is one with multiple forks (data streams)
6312 */
6313 /* ARGSUSED */
6314 int
6315 mkcomplex(__unused proc_t p, __unused struct mkcomplex_args *uap, __unused int32_t *retval)
6316 {
6317 return (ENOTSUP);
6318 }
6319
6320 /*
6321 * Extended stat call which returns volumeid and vnodeid as well as other info
6322 */
6323 /* ARGSUSED */
6324 int
6325 statv(__unused proc_t p,
6326 __unused struct statv_args *uap,
6327 __unused int32_t *retval)
6328 {
6329 return (ENOTSUP); /* We'll just return an error for now */
6330
6331 } /* end of statv system call */
6332
6333 /*
6334 * Extended lstat call which returns volumeid and vnodeid as well as other info
6335 */
6336 /* ARGSUSED */
6337 int
6338 lstatv(__unused proc_t p,
6339 __unused struct lstatv_args *uap,
6340 __unused int32_t *retval)
6341 {
6342 return (ENOTSUP); /* We'll just return an error for now */
6343 } /* end of lstatv system call */
6344
6345 /*
6346 * Extended fstat call which returns volumeid and vnodeid as well as other info
6347 */
6348 /* ARGSUSED */
6349 int
6350 fstatv(__unused proc_t p,
6351 __unused struct fstatv_args *uap,
6352 __unused int32_t *retval)
6353 {
6354 return (ENOTSUP); /* We'll just return an error for now */
6355 } /* end of fstatv system call */
6356
6357
6358 /************************************************/
6359 /* *** Preceding calls will be deleted soon *** */
6360 /************************************************/
6361
6362 #endif /* __APPLE_API_OBSOLETE */
6363
6364 /*
6365 * Obtain attribute information on objects in a directory while enumerating
6366 * the directory. This call does not yet support union mounted directories.
6367 * TO DO
6368 * 1.union mounted directories.
6369 */
6370
6371 /* ARGSUSED */
6372 int
6373 getdirentriesattr (proc_t p, struct getdirentriesattr_args *uap, int32_t *retval)
6374 {
6375 vnode_t vp;
6376 struct fileproc *fp;
6377 uio_t auio = NULL;
6378 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
6379 uint32_t count;
6380 uint32_t newstate;
6381 int error, eofflag;
6382 uint32_t loff;
6383 struct attrlist attributelist;
6384 vfs_context_t ctx = vfs_context_current();
6385 int fd = uap->fd;
6386 char uio_buf[ UIO_SIZEOF(1) ];
6387 kauth_action_t action;
6388
6389 AUDIT_ARG(fd, fd);
6390
6391 /* Get the attributes into kernel space */
6392 if ((error = copyin(uap->alist, (caddr_t)&attributelist, sizeof(attributelist)))) {
6393 return(error);
6394 }
6395 if ((error = copyin(uap->count, (caddr_t)&count, sizeof(count)))) {
6396 return(error);
6397 }
6398 if ( (error = fp_getfvp(p, fd, &fp, &vp)) ) {
6399 return (error);
6400 }
6401 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
6402 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
6403 error = EBADF;
6404 goto out;
6405 }
6406
6407
6408 #if CONFIG_MACF
6409 error = mac_file_check_change_offset(vfs_context_ucred(ctx),
6410 fp->f_fglob);
6411 if (error)
6412 goto out;
6413 #endif
6414
6415
6416 if ( (error = vnode_getwithref(vp)) )
6417 goto out;
6418
6419 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
6420
6421 if (vp->v_type != VDIR) {
6422 (void)vnode_put(vp);
6423 error = EINVAL;
6424 goto out;
6425 }
6426
6427 #if CONFIG_MACF
6428 error = mac_vnode_check_readdir(ctx, vp);
6429 if (error != 0) {
6430 (void)vnode_put(vp);
6431 goto out;
6432 }
6433 #endif /* MAC */
6434
6435 /* set up the uio structure which will contain the users return buffer */
6436 loff = fp->f_fglob->fg_offset;
6437 auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ,
6438 &uio_buf[0], sizeof(uio_buf));
6439 uio_addiov(auio, uap->buffer, uap->buffersize);
6440
6441 /*
6442 * If the only item requested is file names, we can let that past with
6443 * just LIST_DIRECTORY. If they want any other attributes, that means
6444 * they need SEARCH as well.
6445 */
6446 action = KAUTH_VNODE_LIST_DIRECTORY;
6447 if ((attributelist.commonattr & ~ATTR_CMN_NAME) ||
6448 attributelist.fileattr || attributelist.dirattr)
6449 action |= KAUTH_VNODE_SEARCH;
6450
6451 if ((error = vnode_authorize(vp, NULL, action, ctx)) == 0) {
6452
6453 /* Believe it or not, uap->options only has 32-bits of valid
6454 * info, so truncate before extending again */
6455 error = VNOP_READDIRATTR(vp, &attributelist, auio,
6456 count,
6457 (u_long)(uint32_t)uap->options, &newstate, &eofflag,
6458 &count, ctx);
6459 }
6460 (void)vnode_put(vp);
6461
6462 if (error)
6463 goto out;
6464 fp->f_fglob->fg_offset = uio_offset(auio); /* should be multiple of dirent, not variable */
6465
6466 if ((error = copyout((caddr_t) &count, uap->count, sizeof(count))))
6467 goto out;
6468 if ((error = copyout((caddr_t) &newstate, uap->newstate, sizeof(newstate))))
6469 goto out;
6470 if ((error = copyout((caddr_t) &loff, uap->basep, sizeof(loff))))
6471 goto out;
6472
6473 *retval = eofflag; /* similar to getdirentries */
6474 error = 0;
6475 out:
6476 file_drop(fd);
6477 return (error); /* return error earlier, an retval of 0 or 1 now */
6478
6479 } /* end of getdirentryattr system call */
6480
6481 /*
6482 * Exchange data between two files
6483 */
6484
6485 /* ARGSUSED */
6486 int
6487 exchangedata (__unused proc_t p, struct exchangedata_args *uap, __unused int32_t *retval)
6488 {
6489
6490 struct nameidata fnd, snd;
6491 vfs_context_t ctx = vfs_context_current();
6492 vnode_t fvp;
6493 vnode_t svp;
6494 int error;
6495 u_int32_t nameiflags;
6496 char *fpath = NULL;
6497 char *spath = NULL;
6498 int flen=0, slen=0;
6499 int from_truncated=0, to_truncated=0;
6500 #if CONFIG_FSE
6501 fse_info f_finfo, s_finfo;
6502 #endif
6503
6504 nameiflags = 0;
6505 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
6506
6507 NDINIT(&fnd, LOOKUP, nameiflags | AUDITVNPATH1,
6508 UIO_USERSPACE, uap->path1, ctx);
6509
6510 error = namei(&fnd);
6511 if (error)
6512 goto out2;
6513
6514 nameidone(&fnd);
6515 fvp = fnd.ni_vp;
6516
6517 NDINIT(&snd, LOOKUP | CN_NBMOUNTLOOK, nameiflags | AUDITVNPATH2,
6518 UIO_USERSPACE, uap->path2, ctx);
6519
6520 error = namei(&snd);
6521 if (error) {
6522 vnode_put(fvp);
6523 goto out2;
6524 }
6525 nameidone(&snd);
6526 svp = snd.ni_vp;
6527
6528 /*
6529 * if the files are the same, return an inval error
6530 */
6531 if (svp == fvp) {
6532 error = EINVAL;
6533 goto out;
6534 }
6535
6536 /*
6537 * if the files are on different volumes, return an error
6538 */
6539 if (svp->v_mount != fvp->v_mount) {
6540 error = EXDEV;
6541 goto out;
6542 }
6543
6544 #if CONFIG_MACF
6545 error = mac_vnode_check_exchangedata(ctx,
6546 fvp, svp);
6547 if (error)
6548 goto out;
6549 #endif
6550 if (((error = vnode_authorize(fvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0) ||
6551 ((error = vnode_authorize(svp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0))
6552 goto out;
6553
6554 if (
6555 #if CONFIG_FSE
6556 need_fsevent(FSE_EXCHANGE, fvp) ||
6557 #endif
6558 kauth_authorize_fileop_has_listeners()) {
6559 GET_PATH(fpath);
6560 GET_PATH(spath);
6561 if (fpath == NULL || spath == NULL) {
6562 error = ENOMEM;
6563 goto out;
6564 }
6565
6566 flen = safe_getpath(fvp, NULL, fpath, MAXPATHLEN, &from_truncated);
6567 slen = safe_getpath(svp, NULL, spath, MAXPATHLEN, &to_truncated);
6568
6569 #if CONFIG_FSE
6570 get_fse_info(fvp, &f_finfo, ctx);
6571 get_fse_info(svp, &s_finfo, ctx);
6572 if (from_truncated || to_truncated) {
6573 // set it here since only the f_finfo gets reported up to user space
6574 f_finfo.mode |= FSE_TRUNCATED_PATH;
6575 }
6576 #endif
6577 }
6578 /* Ok, make the call */
6579 error = VNOP_EXCHANGE(fvp, svp, 0, ctx);
6580
6581 if (error == 0) {
6582 const char *tmpname;
6583
6584 if (fpath != NULL && spath != NULL) {
6585 /* call out to allow 3rd party notification of exchangedata.
6586 * Ignore result of kauth_authorize_fileop call.
6587 */
6588 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_EXCHANGE,
6589 (uintptr_t)fpath, (uintptr_t)spath);
6590 }
6591 name_cache_lock();
6592
6593 tmpname = fvp->v_name;
6594 fvp->v_name = svp->v_name;
6595 svp->v_name = tmpname;
6596
6597 if (fvp->v_parent != svp->v_parent) {
6598 vnode_t tmp;
6599
6600 tmp = fvp->v_parent;
6601 fvp->v_parent = svp->v_parent;
6602 svp->v_parent = tmp;
6603 }
6604 name_cache_unlock();
6605
6606 #if CONFIG_FSE
6607 if (fpath != NULL && spath != NULL) {
6608 add_fsevent(FSE_EXCHANGE, ctx,
6609 FSE_ARG_STRING, flen, fpath,
6610 FSE_ARG_FINFO, &f_finfo,
6611 FSE_ARG_STRING, slen, spath,
6612 FSE_ARG_FINFO, &s_finfo,
6613 FSE_ARG_DONE);
6614 }
6615 #endif
6616 }
6617
6618 out:
6619 if (fpath != NULL)
6620 RELEASE_PATH(fpath);
6621 if (spath != NULL)
6622 RELEASE_PATH(spath);
6623 vnode_put(svp);
6624 vnode_put(fvp);
6625 out2:
6626 return (error);
6627 }
6628
6629
6630 /* ARGSUSED */
6631
6632 int
6633 searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval)
6634 {
6635 vnode_t vp;
6636 int error=0;
6637 int fserror = 0;
6638 struct nameidata nd;
6639 struct user64_fssearchblock searchblock;
6640 struct searchstate *state;
6641 struct attrlist *returnattrs;
6642 struct timeval timelimit;
6643 void *searchparams1,*searchparams2;
6644 uio_t auio = NULL;
6645 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
6646 uint32_t nummatches;
6647 int mallocsize;
6648 uint32_t nameiflags;
6649 vfs_context_t ctx = vfs_context_current();
6650 char uio_buf[ UIO_SIZEOF(1) ];
6651
6652 /* Start by copying in fsearchblock paramater list */
6653 if (IS_64BIT_PROCESS(p)) {
6654 error = copyin(uap->searchblock, (caddr_t) &searchblock, sizeof(searchblock));
6655 timelimit.tv_sec = searchblock.timelimit.tv_sec;
6656 timelimit.tv_usec = searchblock.timelimit.tv_usec;
6657 }
6658 else {
6659 struct user32_fssearchblock tmp_searchblock;
6660
6661 error = copyin(uap->searchblock, (caddr_t) &tmp_searchblock, sizeof(tmp_searchblock));
6662 // munge into 64-bit version
6663 searchblock.returnattrs = CAST_USER_ADDR_T(tmp_searchblock.returnattrs);
6664 searchblock.returnbuffer = CAST_USER_ADDR_T(tmp_searchblock.returnbuffer);
6665 searchblock.returnbuffersize = tmp_searchblock.returnbuffersize;
6666 searchblock.maxmatches = tmp_searchblock.maxmatches;
6667 /*
6668 * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary
6669 * from a 32 bit long, and tv_usec is already a signed 32 bit int.
6670 */
6671 timelimit.tv_sec = (__darwin_time_t) tmp_searchblock.timelimit.tv_sec;
6672 timelimit.tv_usec = (__darwin_useconds_t) tmp_searchblock.timelimit.tv_usec;
6673 searchblock.searchparams1 = CAST_USER_ADDR_T(tmp_searchblock.searchparams1);
6674 searchblock.sizeofsearchparams1 = tmp_searchblock.sizeofsearchparams1;
6675 searchblock.searchparams2 = CAST_USER_ADDR_T(tmp_searchblock.searchparams2);
6676 searchblock.sizeofsearchparams2 = tmp_searchblock.sizeofsearchparams2;
6677 searchblock.searchattrs = tmp_searchblock.searchattrs;
6678 }
6679 if (error)
6680 return(error);
6681
6682 /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
6683 */
6684 if (searchblock.sizeofsearchparams1 > SEARCHFS_MAX_SEARCHPARMS ||
6685 searchblock.sizeofsearchparams2 > SEARCHFS_MAX_SEARCHPARMS)
6686 return(EINVAL);
6687
6688 /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
6689 /* It all has to do into local memory and it's not that big so we might as well put it all together. */
6690 /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
6691 /* block. */
6692
6693 mallocsize = searchblock.sizeofsearchparams1 + searchblock.sizeofsearchparams2 +
6694 sizeof(struct attrlist) + sizeof(struct searchstate);
6695
6696 MALLOC(searchparams1, void *, mallocsize, M_TEMP, M_WAITOK);
6697
6698 /* Now set up the various pointers to the correct place in our newly allocated memory */
6699
6700 searchparams2 = (void *) (((caddr_t) searchparams1) + searchblock.sizeofsearchparams1);
6701 returnattrs = (struct attrlist *) (((caddr_t) searchparams2) + searchblock.sizeofsearchparams2);
6702 state = (struct searchstate *) (((caddr_t) returnattrs) + sizeof (struct attrlist));
6703
6704 /* Now copy in the stuff given our local variables. */
6705
6706 if ((error = copyin(searchblock.searchparams1, searchparams1, searchblock.sizeofsearchparams1)))
6707 goto freeandexit;
6708
6709 if ((error = copyin(searchblock.searchparams2, searchparams2, searchblock.sizeofsearchparams2)))
6710 goto freeandexit;
6711
6712 if ((error = copyin(searchblock.returnattrs, (caddr_t) returnattrs, sizeof(struct attrlist))))
6713 goto freeandexit;
6714
6715 if ((error = copyin(uap->state, (caddr_t) state, sizeof(struct searchstate))))
6716 goto freeandexit;
6717
6718
6719 /*
6720 * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter,
6721 * which is passed in with an attrreference_t, we need to inspect the buffer manually here.
6722 * The KPI does not provide us the ability to pass in the length of the buffers searchparams1
6723 * and searchparams2. To obviate the need for all searchfs-supporting filesystems to
6724 * validate the user-supplied data offset of the attrreference_t, we'll do it here.
6725 */
6726
6727 if (searchblock.searchattrs.commonattr & ATTR_CMN_NAME) {
6728 attrreference_t* string_ref;
6729 u_int32_t* start_length;
6730 user64_size_t param_length;
6731
6732 /* validate searchparams1 */
6733 param_length = searchblock.sizeofsearchparams1;
6734 /* skip the word that specifies length of the buffer */
6735 start_length= (u_int32_t*) searchparams1;
6736 start_length= start_length+1;
6737 string_ref= (attrreference_t*) start_length;
6738
6739 /* ensure no negative offsets or too big offsets */
6740 if (string_ref->attr_dataoffset < 0 ) {
6741 error = EINVAL;
6742 goto freeandexit;
6743 }
6744 if (string_ref->attr_length > MAXPATHLEN) {
6745 error = EINVAL;
6746 goto freeandexit;
6747 }
6748
6749 /* Check for pointer overflow in the string ref */
6750 if (((char*) string_ref + string_ref->attr_dataoffset) < (char*) string_ref) {
6751 error = EINVAL;
6752 goto freeandexit;
6753 }
6754
6755 if (((char*) string_ref + string_ref->attr_dataoffset) > ((char*)searchparams1 + param_length)) {
6756 error = EINVAL;
6757 goto freeandexit;
6758 }
6759 if (((char*)string_ref + string_ref->attr_dataoffset + string_ref->attr_length) > ((char*)searchparams1 + param_length)) {
6760 error = EINVAL;
6761 goto freeandexit;
6762 }
6763 }
6764
6765 /* set up the uio structure which will contain the users return buffer */
6766 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
6767 &uio_buf[0], sizeof(uio_buf));
6768 uio_addiov(auio, searchblock.returnbuffer, searchblock.returnbuffersize);
6769
6770 nameiflags = 0;
6771 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
6772 NDINIT(&nd, LOOKUP, nameiflags | AUDITVNPATH1,
6773 UIO_USERSPACE, uap->path, ctx);
6774
6775 error = namei(&nd);
6776 if (error)
6777 goto freeandexit;
6778
6779 nameidone(&nd);
6780 vp = nd.ni_vp;
6781
6782
6783 /*
6784 * If searchblock.maxmatches == 0, then skip the search. This has happened
6785 * before and sometimes the underlyning code doesnt deal with it well.
6786 */
6787 if (searchblock.maxmatches == 0) {
6788 nummatches = 0;
6789 goto saveandexit;
6790 }
6791
6792 /*
6793 Allright, we have everything we need, so lets make that call.
6794
6795 We keep special track of the return value from the file system:
6796 EAGAIN is an acceptable error condition that shouldn't keep us
6797 from copying out any results...
6798 */
6799
6800 fserror = VNOP_SEARCHFS(vp,
6801 searchparams1,
6802 searchparams2,
6803 &searchblock.searchattrs,
6804 (u_long)searchblock.maxmatches,
6805 &timelimit,
6806 returnattrs,
6807 &nummatches,
6808 (u_long)uap->scriptcode,
6809 (u_long)uap->options,
6810 auio,
6811 state,
6812 ctx);
6813
6814 saveandexit:
6815
6816 vnode_put(vp);
6817
6818 /* Now copy out the stuff that needs copying out. That means the number of matches, the
6819 search state. Everything was already put into he return buffer by the vop call. */
6820
6821 if ((error = copyout((caddr_t) state, uap->state, sizeof(struct searchstate))) != 0)
6822 goto freeandexit;
6823
6824 if ((error = suulong(uap->nummatches, (uint64_t)nummatches)) != 0)
6825 goto freeandexit;
6826
6827 error = fserror;
6828
6829 freeandexit:
6830
6831 FREE(searchparams1,M_TEMP);
6832
6833 return(error);
6834
6835
6836 } /* end of searchfs system call */
6837
6838
6839 /*
6840 * Make a filesystem-specific control call:
6841 */
6842 /* ARGSUSED */
6843 static int
6844 fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long options, vfs_context_t ctx)
6845 {
6846 int error=0;
6847 boolean_t is64bit;
6848 u_int size;
6849 #define STK_PARAMS 128
6850 char stkbuf[STK_PARAMS];
6851 caddr_t data, memp;
6852 vnode_t vp = *arg_vp;
6853
6854 size = IOCPARM_LEN(cmd);
6855 if (size > IOCPARM_MAX) return (EINVAL);
6856
6857 is64bit = proc_is64bit(p);
6858
6859 memp = NULL;
6860 if (size > sizeof (stkbuf)) {
6861 if ((memp = (caddr_t)kalloc(size)) == 0) return ENOMEM;
6862 data = memp;
6863 } else {
6864 data = &stkbuf[0];
6865 };
6866
6867 if (cmd & IOC_IN) {
6868 if (size) {
6869 error = copyin(udata, data, size);
6870 if (error) goto FSCtl_Exit;
6871 } else {
6872 if (is64bit) {
6873 *(user_addr_t *)data = udata;
6874 }
6875 else {
6876 *(uint32_t *)data = (uint32_t)udata;
6877 }
6878 };
6879 } else if ((cmd & IOC_OUT) && size) {
6880 /*
6881 * Zero the buffer so the user always
6882 * gets back something deterministic.
6883 */
6884 bzero(data, size);
6885 } else if (cmd & IOC_VOID) {
6886 if (is64bit) {
6887 *(user_addr_t *)data = udata;
6888 }
6889 else {
6890 *(uint32_t *)data = (uint32_t)udata;
6891 }
6892 }
6893
6894 /* Check to see if it's a generic command */
6895 if (IOCBASECMD(cmd) == FSCTL_SYNC_VOLUME) {
6896 mount_t mp = vp->v_mount;
6897 int arg = *(uint32_t*)data;
6898
6899 /* record vid of vp so we can drop it below. */
6900 uint32_t vvid = vp->v_id;
6901
6902 /*
6903 * Then grab mount_iterref so that we can release the vnode.
6904 * Without this, a thread may call vnode_iterate_prepare then
6905 * get into a deadlock because we've never released the root vp
6906 */
6907 error = mount_iterref (mp, 0);
6908 if (error) {
6909 goto FSCtl_Exit;
6910 }
6911 vnode_put(vp);
6912
6913 /* issue the sync for this volume */
6914 (void)sync_callback(mp, (arg & FSCTL_SYNC_WAIT) ? &arg : NULL);
6915
6916 /*
6917 * Then release the mount_iterref once we're done syncing; it's not
6918 * needed for the VNOP_IOCTL below
6919 */
6920 mount_iterdrop(mp);
6921
6922 if (arg & FSCTL_SYNC_FULLSYNC) {
6923 /* re-obtain vnode iocount on the root vp, if possible */
6924 error = vnode_getwithvid (vp, vvid);
6925 if (error == 0) {
6926 error = VNOP_IOCTL(vp, F_FULLFSYNC, (caddr_t)NULL, 0, ctx);
6927 vnode_put (vp);
6928 }
6929 }
6930 /* mark the argument VP as having been released */
6931 *arg_vp = NULL;
6932
6933 } else if (IOCBASECMD(cmd) == FSCTL_SET_PACKAGE_EXTS) {
6934 user_addr_t ext_strings;
6935 uint32_t num_entries;
6936 uint32_t max_width;
6937
6938 if ( (is64bit && size != sizeof(user64_package_ext_info))
6939 || (is64bit == 0 && size != sizeof(user32_package_ext_info))) {
6940
6941 // either you're 64-bit and passed a 64-bit struct or
6942 // you're 32-bit and passed a 32-bit struct. otherwise
6943 // it's not ok.
6944 error = EINVAL;
6945 goto FSCtl_Exit;
6946 }
6947
6948 if (is64bit) {
6949 ext_strings = ((user64_package_ext_info *)data)->strings;
6950 num_entries = ((user64_package_ext_info *)data)->num_entries;
6951 max_width = ((user64_package_ext_info *)data)->max_width;
6952 } else {
6953 ext_strings = CAST_USER_ADDR_T(((user32_package_ext_info *)data)->strings);
6954 num_entries = ((user32_package_ext_info *)data)->num_entries;
6955 max_width = ((user32_package_ext_info *)data)->max_width;
6956 }
6957
6958 error = set_package_extensions_table(ext_strings, num_entries, max_width);
6959
6960 } else if (IOCBASECMD(cmd) == FSCTL_WAIT_FOR_SYNC) {
6961 error = tsleep((caddr_t)&sync_wait_time, PVFS|PCATCH, "sync-wait", 0);
6962 if (error == 0) {
6963 *(uint32_t *)data = (uint32_t)sync_wait_time;
6964 error = 0;
6965 } else {
6966 error *= -1;
6967 }
6968
6969 } else {
6970 /* Invoke the filesystem-specific code */
6971 error = VNOP_IOCTL(vp, IOCBASECMD(cmd), data, options, ctx);
6972 }
6973
6974
6975 /*
6976 * Copy any data to user, size was
6977 * already set and checked above.
6978 */
6979 if (error == 0 && (cmd & IOC_OUT) && size)
6980 error = copyout(data, udata, size);
6981
6982 FSCtl_Exit:
6983 if (memp) kfree(memp, size);
6984
6985 return error;
6986 }
6987
6988 /* ARGSUSED */
6989 int
6990 fsctl (proc_t p, struct fsctl_args *uap, __unused int32_t *retval)
6991 {
6992 int error;
6993 struct nameidata nd;
6994 u_long nameiflags;
6995 vnode_t vp = NULL;
6996 vfs_context_t ctx = vfs_context_current();
6997
6998 AUDIT_ARG(cmd, uap->cmd);
6999 AUDIT_ARG(value32, uap->options);
7000 /* Get the vnode for the file we are getting info on: */
7001 nameiflags = 0;
7002 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW;
7003 NDINIT(&nd, LOOKUP, nameiflags | AUDITVNPATH1, UIO_USERSPACE,
7004 uap->path, ctx);
7005 if ((error = namei(&nd))) goto done;
7006 vp = nd.ni_vp;
7007 nameidone(&nd);
7008
7009 #if CONFIG_MACF
7010 error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd);
7011 if (error) {
7012 goto done;
7013 }
7014 #endif
7015
7016 error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
7017
7018 done:
7019 if (vp)
7020 vnode_put(vp);
7021 return error;
7022 }
7023 /* ARGSUSED */
7024 int
7025 ffsctl (proc_t p, struct ffsctl_args *uap, __unused int32_t *retval)
7026 {
7027 int error;
7028 vnode_t vp = NULL;
7029 vfs_context_t ctx = vfs_context_current();
7030 int fd = -1;
7031
7032 AUDIT_ARG(fd, uap->fd);
7033 AUDIT_ARG(cmd, uap->cmd);
7034 AUDIT_ARG(value32, uap->options);
7035
7036 /* Get the vnode for the file we are getting info on: */
7037 if ((error = file_vnode(uap->fd, &vp)))
7038 goto done;
7039 fd = uap->fd;
7040 if ((error = vnode_getwithref(vp))) {
7041 goto done;
7042 }
7043
7044 #if CONFIG_MACF
7045 error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd);
7046 if (error) {
7047 goto done;
7048 }
7049 #endif
7050
7051 error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
7052
7053 done:
7054 if (fd != -1)
7055 file_drop(fd);
7056
7057 if (vp)
7058 vnode_put(vp);
7059 return error;
7060 }
7061 /* end of fsctl system call */
7062
7063 /*
7064 * An in-kernel sync for power management to call.
7065 */
7066 __private_extern__ int
7067 sync_internal(void)
7068 {
7069 int error;
7070
7071 struct sync_args data;
7072
7073 int retval[2];
7074
7075
7076 error = sync(current_proc(), &data, &retval[0]);
7077
7078
7079 return (error);
7080 } /* end of sync_internal call */
7081
7082
7083 /*
7084 * Retrieve the data of an extended attribute.
7085 */
7086 int
7087 getxattr(proc_t p, struct getxattr_args *uap, user_ssize_t *retval)
7088 {
7089 vnode_t vp;
7090 struct nameidata nd;
7091 char attrname[XATTR_MAXNAMELEN+1];
7092 vfs_context_t ctx = vfs_context_current();
7093 uio_t auio = NULL;
7094 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
7095 size_t attrsize = 0;
7096 size_t namelen;
7097 u_int32_t nameiflags;
7098 int error;
7099 char uio_buf[ UIO_SIZEOF(1) ];
7100
7101 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
7102 return (EINVAL);
7103
7104 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
7105 NDINIT(&nd, LOOKUP, nameiflags, spacetype, uap->path, ctx);
7106 if ((error = namei(&nd))) {
7107 return (error);
7108 }
7109 vp = nd.ni_vp;
7110 nameidone(&nd);
7111
7112 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
7113 goto out;
7114 }
7115 if (xattr_protected(attrname)) {
7116 error = EPERM;
7117 goto out;
7118 }
7119 /*
7120 * the specific check for 0xffffffff is a hack to preserve
7121 * binaray compatibilty in K64 with applications that discovered
7122 * that passing in a buf pointer and a size of -1 resulted in
7123 * just the size of the indicated extended attribute being returned.
7124 * this isn't part of the documented behavior, but because of the
7125 * original implemtation's check for "uap->size > 0", this behavior
7126 * was allowed. In K32 that check turned into a signed comparison
7127 * even though uap->size is unsigned... in K64, we blow by that
7128 * check because uap->size is unsigned and doesn't get sign smeared
7129 * in the munger for a 32 bit user app. we also need to add a
7130 * check to limit the maximum size of the buffer being passed in...
7131 * unfortunately, the underlying fileystems seem to just malloc
7132 * the requested size even if the actual extended attribute is tiny.
7133 * because that malloc is for kernel wired memory, we have to put a
7134 * sane limit on it.
7135 *
7136 * U32 running on K64 will yield 0x00000000ffffffff for uap->size
7137 * U64 running on K64 will yield -1 (64 bits wide)
7138 * U32/U64 running on K32 will yield -1 (32 bits wide)
7139 */
7140 if (uap->size == 0xffffffff || uap->size == (size_t)-1)
7141 goto no_uio;
7142
7143 if (uap->size > (size_t)XATTR_MAXSIZE)
7144 uap->size = XATTR_MAXSIZE;
7145
7146 if (uap->value) {
7147 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
7148 &uio_buf[0], sizeof(uio_buf));
7149 uio_addiov(auio, uap->value, uap->size);
7150 }
7151 no_uio:
7152 error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, ctx);
7153 out:
7154 vnode_put(vp);
7155
7156 if (auio) {
7157 *retval = uap->size - uio_resid(auio);
7158 } else {
7159 *retval = (user_ssize_t)attrsize;
7160 }
7161
7162 return (error);
7163 }
7164
7165 /*
7166 * Retrieve the data of an extended attribute.
7167 */
7168 int
7169 fgetxattr(proc_t p, struct fgetxattr_args *uap, user_ssize_t *retval)
7170 {
7171 vnode_t vp;
7172 char attrname[XATTR_MAXNAMELEN+1];
7173 uio_t auio = NULL;
7174 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
7175 size_t attrsize = 0;
7176 size_t namelen;
7177 int error;
7178 char uio_buf[ UIO_SIZEOF(1) ];
7179
7180 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
7181 return (EINVAL);
7182
7183 if ( (error = file_vnode(uap->fd, &vp)) ) {
7184 return (error);
7185 }
7186 if ( (error = vnode_getwithref(vp)) ) {
7187 file_drop(uap->fd);
7188 return(error);
7189 }
7190 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
7191 goto out;
7192 }
7193 if (xattr_protected(attrname)) {
7194 error = EPERM;
7195 goto out;
7196 }
7197 if (uap->value && uap->size > 0) {
7198 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
7199 &uio_buf[0], sizeof(uio_buf));
7200 uio_addiov(auio, uap->value, uap->size);
7201 }
7202
7203 error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, vfs_context_current());
7204 out:
7205 (void)vnode_put(vp);
7206 file_drop(uap->fd);
7207
7208 if (auio) {
7209 *retval = uap->size - uio_resid(auio);
7210 } else {
7211 *retval = (user_ssize_t)attrsize;
7212 }
7213 return (error);
7214 }
7215
7216 /*
7217 * Set the data of an extended attribute.
7218 */
7219 int
7220 setxattr(proc_t p, struct setxattr_args *uap, int *retval)
7221 {
7222 vnode_t vp;
7223 struct nameidata nd;
7224 char attrname[XATTR_MAXNAMELEN+1];
7225 vfs_context_t ctx = vfs_context_current();
7226 uio_t auio = NULL;
7227 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
7228 size_t namelen;
7229 u_int32_t nameiflags;
7230 int error;
7231 char uio_buf[ UIO_SIZEOF(1) ];
7232
7233 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
7234 return (EINVAL);
7235
7236 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
7237 return (error);
7238 }
7239 if (xattr_protected(attrname))
7240 return(EPERM);
7241 if (uap->size != 0 && uap->value == 0) {
7242 return (EINVAL);
7243 }
7244
7245 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
7246 NDINIT(&nd, LOOKUP, nameiflags, spacetype, uap->path, ctx);
7247 if ((error = namei(&nd))) {
7248 return (error);
7249 }
7250 vp = nd.ni_vp;
7251 nameidone(&nd);
7252
7253 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
7254 &uio_buf[0], sizeof(uio_buf));
7255 uio_addiov(auio, uap->value, uap->size);
7256
7257 error = vn_setxattr(vp, attrname, auio, uap->options, ctx);
7258 #if CONFIG_FSE
7259 if (error == 0) {
7260 add_fsevent(FSE_XATTR_MODIFIED, ctx,
7261 FSE_ARG_VNODE, vp,
7262 FSE_ARG_DONE);
7263 }
7264 #endif
7265 vnode_put(vp);
7266 *retval = 0;
7267 return (error);
7268 }
7269
7270 /*
7271 * Set the data of an extended attribute.
7272 */
7273 int
7274 fsetxattr(proc_t p, struct fsetxattr_args *uap, int *retval)
7275 {
7276 vnode_t vp;
7277 char attrname[XATTR_MAXNAMELEN+1];
7278 uio_t auio = NULL;
7279 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
7280 size_t namelen;
7281 int error;
7282 char uio_buf[ UIO_SIZEOF(1) ];
7283 vfs_context_t ctx = vfs_context_current();
7284
7285 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
7286 return (EINVAL);
7287
7288 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
7289 return (error);
7290 }
7291 if (xattr_protected(attrname))
7292 return(EPERM);
7293 if (uap->size != 0 && uap->value == 0) {
7294 return (EINVAL);
7295 }
7296 if ( (error = file_vnode(uap->fd, &vp)) ) {
7297 return (error);
7298 }
7299 if ( (error = vnode_getwithref(vp)) ) {
7300 file_drop(uap->fd);
7301 return(error);
7302 }
7303 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
7304 &uio_buf[0], sizeof(uio_buf));
7305 uio_addiov(auio, uap->value, uap->size);
7306
7307 error = vn_setxattr(vp, attrname, auio, uap->options, vfs_context_current());
7308 #if CONFIG_FSE
7309 if (error == 0) {
7310 add_fsevent(FSE_XATTR_MODIFIED, ctx,
7311 FSE_ARG_VNODE, vp,
7312 FSE_ARG_DONE);
7313 }
7314 #endif
7315 vnode_put(vp);
7316 file_drop(uap->fd);
7317 *retval = 0;
7318 return (error);
7319 }
7320
7321 /*
7322 * Remove an extended attribute.
7323 * XXX Code duplication here.
7324 */
7325 int
7326 removexattr(proc_t p, struct removexattr_args *uap, int *retval)
7327 {
7328 vnode_t vp;
7329 struct nameidata nd;
7330 char attrname[XATTR_MAXNAMELEN+1];
7331 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
7332 vfs_context_t ctx = vfs_context_current();
7333 size_t namelen;
7334 u_int32_t nameiflags;
7335 int error;
7336
7337 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
7338 return (EINVAL);
7339
7340 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
7341 if (error != 0) {
7342 return (error);
7343 }
7344 if (xattr_protected(attrname))
7345 return(EPERM);
7346 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
7347 NDINIT(&nd, LOOKUP, nameiflags, spacetype, uap->path, ctx);
7348 if ((error = namei(&nd))) {
7349 return (error);
7350 }
7351 vp = nd.ni_vp;
7352 nameidone(&nd);
7353
7354 error = vn_removexattr(vp, attrname, uap->options, ctx);
7355 #if CONFIG_FSE
7356 if (error == 0) {
7357 add_fsevent(FSE_XATTR_REMOVED, ctx,
7358 FSE_ARG_VNODE, vp,
7359 FSE_ARG_DONE);
7360 }
7361 #endif
7362 vnode_put(vp);
7363 *retval = 0;
7364 return (error);
7365 }
7366
7367 /*
7368 * Remove an extended attribute.
7369 * XXX Code duplication here.
7370 */
7371 int
7372 fremovexattr(__unused proc_t p, struct fremovexattr_args *uap, int *retval)
7373 {
7374 vnode_t vp;
7375 char attrname[XATTR_MAXNAMELEN+1];
7376 size_t namelen;
7377 int error;
7378 vfs_context_t ctx = vfs_context_current();
7379
7380 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
7381 return (EINVAL);
7382
7383 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
7384 if (error != 0) {
7385 return (error);
7386 }
7387 if (xattr_protected(attrname))
7388 return(EPERM);
7389 if ( (error = file_vnode(uap->fd, &vp)) ) {
7390 return (error);
7391 }
7392 if ( (error = vnode_getwithref(vp)) ) {
7393 file_drop(uap->fd);
7394 return(error);
7395 }
7396
7397 error = vn_removexattr(vp, attrname, uap->options, vfs_context_current());
7398 #if CONFIG_FSE
7399 if (error == 0) {
7400 add_fsevent(FSE_XATTR_REMOVED, ctx,
7401 FSE_ARG_VNODE, vp,
7402 FSE_ARG_DONE);
7403 }
7404 #endif
7405 vnode_put(vp);
7406 file_drop(uap->fd);
7407 *retval = 0;
7408 return (error);
7409 }
7410
7411 /*
7412 * Retrieve the list of extended attribute names.
7413 * XXX Code duplication here.
7414 */
7415 int
7416 listxattr(proc_t p, struct listxattr_args *uap, user_ssize_t *retval)
7417 {
7418 vnode_t vp;
7419 struct nameidata nd;
7420 vfs_context_t ctx = vfs_context_current();
7421 uio_t auio = NULL;
7422 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
7423 size_t attrsize = 0;
7424 u_int32_t nameiflags;
7425 int error;
7426 char uio_buf[ UIO_SIZEOF(1) ];
7427
7428 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT))
7429 return (EINVAL);
7430
7431 nameiflags = ((uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW) | NOTRIGGER;
7432 NDINIT(&nd, LOOKUP, nameiflags, spacetype, uap->path, ctx);
7433 if ((error = namei(&nd))) {
7434 return (error);
7435 }
7436 vp = nd.ni_vp;
7437 nameidone(&nd);
7438 if (uap->namebuf != 0 && uap->bufsize > 0) {
7439 auio = uio_createwithbuffer(1, 0, spacetype,
7440 UIO_READ, &uio_buf[0], sizeof(uio_buf));
7441 uio_addiov(auio, uap->namebuf, uap->bufsize);
7442 }
7443
7444 error = vn_listxattr(vp, auio, &attrsize, uap->options, ctx);
7445
7446 vnode_put(vp);
7447 if (auio) {
7448 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
7449 } else {
7450 *retval = (user_ssize_t)attrsize;
7451 }
7452 return (error);
7453 }
7454
7455 /*
7456 * Retrieve the list of extended attribute names.
7457 * XXX Code duplication here.
7458 */
7459 int
7460 flistxattr(proc_t p, struct flistxattr_args *uap, user_ssize_t *retval)
7461 {
7462 vnode_t vp;
7463 uio_t auio = NULL;
7464 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
7465 size_t attrsize = 0;
7466 int error;
7467 char uio_buf[ UIO_SIZEOF(1) ];
7468
7469 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT))
7470 return (EINVAL);
7471
7472 if ( (error = file_vnode(uap->fd, &vp)) ) {
7473 return (error);
7474 }
7475 if ( (error = vnode_getwithref(vp)) ) {
7476 file_drop(uap->fd);
7477 return(error);
7478 }
7479 if (uap->namebuf != 0 && uap->bufsize > 0) {
7480 auio = uio_createwithbuffer(1, 0, spacetype,
7481 UIO_READ, &uio_buf[0], sizeof(uio_buf));
7482 uio_addiov(auio, uap->namebuf, uap->bufsize);
7483 }
7484
7485 error = vn_listxattr(vp, auio, &attrsize, uap->options, vfs_context_current());
7486
7487 vnode_put(vp);
7488 file_drop(uap->fd);
7489 if (auio) {
7490 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
7491 } else {
7492 *retval = (user_ssize_t)attrsize;
7493 }
7494 return (error);
7495 }
7496
7497 /*
7498 * Obtain the full pathname of a file system object by id.
7499 *
7500 * This is a private SPI used by the File Manager.
7501 */
7502 __private_extern__
7503 int
7504 fsgetpath(__unused proc_t p, struct fsgetpath_args *uap, user_ssize_t *retval)
7505 {
7506 vnode_t vp;
7507 struct mount *mp = NULL;
7508 vfs_context_t ctx = vfs_context_current();
7509 fsid_t fsid;
7510 char *realpath;
7511 int bpflags;
7512 int length;
7513 int error;
7514
7515 if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
7516 return (error);
7517 }
7518 AUDIT_ARG(value32, fsid.val[0]);
7519 AUDIT_ARG(value64, uap->objid);
7520 /* Restrict output buffer size for now. */
7521 if (uap->bufsize > PAGE_SIZE) {
7522 return (EINVAL);
7523 }
7524 MALLOC(realpath, char *, uap->bufsize, M_TEMP, M_WAITOK);
7525 if (realpath == NULL) {
7526 return (ENOMEM);
7527 }
7528 /* Find the target mountpoint. */
7529 if ((mp = mount_lookupby_volfsid(fsid.val[0], 1)) == NULL) {
7530 error = ENOTSUP; /* unexpected failure */
7531 goto out;
7532 }
7533 /* Find the target vnode. */
7534 if (uap->objid == 2) {
7535 error = VFS_ROOT(mp, &vp, ctx);
7536 } else {
7537 error = VFS_VGET(mp, (ino64_t)uap->objid, &vp, ctx);
7538 }
7539 vfs_unbusy(mp);
7540 if (error) {
7541 goto out;
7542 }
7543 /* Obtain the absolute path to this vnode. */
7544 bpflags = vfs_context_suser(ctx) ? BUILDPATH_CHECKACCESS : 0;
7545 error = build_path(vp, realpath, uap->bufsize, &length, bpflags, ctx);
7546 vnode_put(vp);
7547 if (error) {
7548 goto out;
7549 }
7550 AUDIT_ARG(text, realpath);
7551 error = copyout((caddr_t)realpath, uap->buf, length);
7552
7553 *retval = (user_ssize_t)length; /* may be superseded by error */
7554 out:
7555 if (realpath) {
7556 FREE(realpath, M_TEMP);
7557 }
7558 return (error);
7559 }
7560
7561 /*
7562 * Common routine to handle various flavors of statfs data heading out
7563 * to user space.
7564 *
7565 * Returns: 0 Success
7566 * EFAULT
7567 */
7568 static int
7569 munge_statfs(struct mount *mp, struct vfsstatfs *sfsp,
7570 user_addr_t bufp, int *sizep, boolean_t is_64_bit,
7571 boolean_t partial_copy)
7572 {
7573 int error;
7574 int my_size, copy_size;
7575
7576 if (is_64_bit) {
7577 struct user64_statfs sfs;
7578 my_size = copy_size = sizeof(sfs);
7579 bzero(&sfs, my_size);
7580 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
7581 sfs.f_type = mp->mnt_vtable->vfc_typenum;
7582 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
7583 sfs.f_bsize = (user64_long_t)sfsp->f_bsize;
7584 sfs.f_iosize = (user64_long_t)sfsp->f_iosize;
7585 sfs.f_blocks = (user64_long_t)sfsp->f_blocks;
7586 sfs.f_bfree = (user64_long_t)sfsp->f_bfree;
7587 sfs.f_bavail = (user64_long_t)sfsp->f_bavail;
7588 sfs.f_files = (user64_long_t)sfsp->f_files;
7589 sfs.f_ffree = (user64_long_t)sfsp->f_ffree;
7590 sfs.f_fsid = sfsp->f_fsid;
7591 sfs.f_owner = sfsp->f_owner;
7592 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
7593 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
7594 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
7595
7596 if (partial_copy) {
7597 copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
7598 }
7599 error = copyout((caddr_t)&sfs, bufp, copy_size);
7600 }
7601 else {
7602 struct user32_statfs sfs;
7603
7604 my_size = copy_size = sizeof(sfs);
7605 bzero(&sfs, my_size);
7606
7607 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
7608 sfs.f_type = mp->mnt_vtable->vfc_typenum;
7609 sfs.f_reserved1 = (short)sfsp->f_fssubtype;
7610
7611 /*
7612 * It's possible for there to be more than 2^^31 blocks in the filesystem, so we
7613 * have to fudge the numbers here in that case. We inflate the blocksize in order
7614 * to reflect the filesystem size as best we can.
7615 */
7616 if ((sfsp->f_blocks > INT_MAX)
7617 /* Hack for 4061702 . I think the real fix is for Carbon to
7618 * look for some volume capability and not depend on hidden
7619 * semantics agreed between a FS and carbon.
7620 * f_blocks, f_bfree, and f_bavail set to -1 is the trigger
7621 * for Carbon to set bNoVolumeSizes volume attribute.
7622 * Without this the webdavfs files cannot be copied onto
7623 * disk as they look huge. This change should not affect
7624 * XSAN as they should not setting these to -1..
7625 */
7626 && (sfsp->f_blocks != 0xffffffffffffffffULL)
7627 && (sfsp->f_bfree != 0xffffffffffffffffULL)
7628 && (sfsp->f_bavail != 0xffffffffffffffffULL)) {
7629 int shift;
7630
7631 /*
7632 * Work out how far we have to shift the block count down to make it fit.
7633 * Note that it's possible to have to shift so far that the resulting
7634 * blocksize would be unreportably large. At that point, we will clip
7635 * any values that don't fit.
7636 *
7637 * For safety's sake, we also ensure that f_iosize is never reported as
7638 * being smaller than f_bsize.
7639 */
7640 for (shift = 0; shift < 32; shift++) {
7641 if ((sfsp->f_blocks >> shift) <= INT_MAX)
7642 break;
7643 if ((sfsp->f_bsize << (shift + 1)) > INT_MAX)
7644 break;
7645 }
7646 #define __SHIFT_OR_CLIP(x, s) ((((x) >> (s)) > INT_MAX) ? INT_MAX : ((x) >> (s)))
7647 sfs.f_blocks = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_blocks, shift);
7648 sfs.f_bfree = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bfree, shift);
7649 sfs.f_bavail = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bavail, shift);
7650 #undef __SHIFT_OR_CLIP
7651 sfs.f_bsize = (user32_long_t)(sfsp->f_bsize << shift);
7652 sfs.f_iosize = lmax(sfsp->f_iosize, sfsp->f_bsize);
7653 } else {
7654 /* filesystem is small enough to be reported honestly */
7655 sfs.f_bsize = (user32_long_t)sfsp->f_bsize;
7656 sfs.f_iosize = (user32_long_t)sfsp->f_iosize;
7657 sfs.f_blocks = (user32_long_t)sfsp->f_blocks;
7658 sfs.f_bfree = (user32_long_t)sfsp->f_bfree;
7659 sfs.f_bavail = (user32_long_t)sfsp->f_bavail;
7660 }
7661 sfs.f_files = (user32_long_t)sfsp->f_files;
7662 sfs.f_ffree = (user32_long_t)sfsp->f_ffree;
7663 sfs.f_fsid = sfsp->f_fsid;
7664 sfs.f_owner = sfsp->f_owner;
7665 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
7666 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
7667 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
7668
7669 if (partial_copy) {
7670 copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
7671 }
7672 error = copyout((caddr_t)&sfs, bufp, copy_size);
7673 }
7674
7675 if (sizep != NULL) {
7676 *sizep = my_size;
7677 }
7678 return(error);
7679 }
7680
7681 /*
7682 * copy stat structure into user_stat structure.
7683 */
7684 void munge_user64_stat(struct stat *sbp, struct user64_stat *usbp)
7685 {
7686 bzero(usbp, sizeof(*usbp));
7687
7688 usbp->st_dev = sbp->st_dev;
7689 usbp->st_ino = sbp->st_ino;
7690 usbp->st_mode = sbp->st_mode;
7691 usbp->st_nlink = sbp->st_nlink;
7692 usbp->st_uid = sbp->st_uid;
7693 usbp->st_gid = sbp->st_gid;
7694 usbp->st_rdev = sbp->st_rdev;
7695 #ifndef _POSIX_C_SOURCE
7696 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
7697 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
7698 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
7699 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
7700 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
7701 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
7702 #else
7703 usbp->st_atime = sbp->st_atime;
7704 usbp->st_atimensec = sbp->st_atimensec;
7705 usbp->st_mtime = sbp->st_mtime;
7706 usbp->st_mtimensec = sbp->st_mtimensec;
7707 usbp->st_ctime = sbp->st_ctime;
7708 usbp->st_ctimensec = sbp->st_ctimensec;
7709 #endif
7710 usbp->st_size = sbp->st_size;
7711 usbp->st_blocks = sbp->st_blocks;
7712 usbp->st_blksize = sbp->st_blksize;
7713 usbp->st_flags = sbp->st_flags;
7714 usbp->st_gen = sbp->st_gen;
7715 usbp->st_lspare = sbp->st_lspare;
7716 usbp->st_qspare[0] = sbp->st_qspare[0];
7717 usbp->st_qspare[1] = sbp->st_qspare[1];
7718 }
7719
7720 void munge_user32_stat(struct stat *sbp, struct user32_stat *usbp)
7721 {
7722 bzero(usbp, sizeof(*usbp));
7723
7724 usbp->st_dev = sbp->st_dev;
7725 usbp->st_ino = sbp->st_ino;
7726 usbp->st_mode = sbp->st_mode;
7727 usbp->st_nlink = sbp->st_nlink;
7728 usbp->st_uid = sbp->st_uid;
7729 usbp->st_gid = sbp->st_gid;
7730 usbp->st_rdev = sbp->st_rdev;
7731 #ifndef _POSIX_C_SOURCE
7732 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
7733 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
7734 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
7735 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
7736 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
7737 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
7738 #else
7739 usbp->st_atime = sbp->st_atime;
7740 usbp->st_atimensec = sbp->st_atimensec;
7741 usbp->st_mtime = sbp->st_mtime;
7742 usbp->st_mtimensec = sbp->st_mtimensec;
7743 usbp->st_ctime = sbp->st_ctime;
7744 usbp->st_ctimensec = sbp->st_ctimensec;
7745 #endif
7746 usbp->st_size = sbp->st_size;
7747 usbp->st_blocks = sbp->st_blocks;
7748 usbp->st_blksize = sbp->st_blksize;
7749 usbp->st_flags = sbp->st_flags;
7750 usbp->st_gen = sbp->st_gen;
7751 usbp->st_lspare = sbp->st_lspare;
7752 usbp->st_qspare[0] = sbp->st_qspare[0];
7753 usbp->st_qspare[1] = sbp->st_qspare[1];
7754 }
7755
7756 /*
7757 * copy stat64 structure into user_stat64 structure.
7758 */
7759 void munge_user64_stat64(struct stat64 *sbp, struct user64_stat64 *usbp)
7760 {
7761 bzero(usbp, sizeof(*usbp));
7762
7763 usbp->st_dev = sbp->st_dev;
7764 usbp->st_ino = sbp->st_ino;
7765 usbp->st_mode = sbp->st_mode;
7766 usbp->st_nlink = sbp->st_nlink;
7767 usbp->st_uid = sbp->st_uid;
7768 usbp->st_gid = sbp->st_gid;
7769 usbp->st_rdev = sbp->st_rdev;
7770 #ifndef _POSIX_C_SOURCE
7771 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
7772 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
7773 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
7774 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
7775 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
7776 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
7777 usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
7778 usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
7779 #else
7780 usbp->st_atime = sbp->st_atime;
7781 usbp->st_atimensec = sbp->st_atimensec;
7782 usbp->st_mtime = sbp->st_mtime;
7783 usbp->st_mtimensec = sbp->st_mtimensec;
7784 usbp->st_ctime = sbp->st_ctime;
7785 usbp->st_ctimensec = sbp->st_ctimensec;
7786 usbp->st_birthtime = sbp->st_birthtime;
7787 usbp->st_birthtimensec = sbp->st_birthtimensec;
7788 #endif
7789 usbp->st_size = sbp->st_size;
7790 usbp->st_blocks = sbp->st_blocks;
7791 usbp->st_blksize = sbp->st_blksize;
7792 usbp->st_flags = sbp->st_flags;
7793 usbp->st_gen = sbp->st_gen;
7794 usbp->st_lspare = sbp->st_lspare;
7795 usbp->st_qspare[0] = sbp->st_qspare[0];
7796 usbp->st_qspare[1] = sbp->st_qspare[1];
7797 }
7798
7799 void munge_user32_stat64(struct stat64 *sbp, struct user32_stat64 *usbp)
7800 {
7801 bzero(usbp, sizeof(*usbp));
7802
7803 usbp->st_dev = sbp->st_dev;
7804 usbp->st_ino = sbp->st_ino;
7805 usbp->st_mode = sbp->st_mode;
7806 usbp->st_nlink = sbp->st_nlink;
7807 usbp->st_uid = sbp->st_uid;
7808 usbp->st_gid = sbp->st_gid;
7809 usbp->st_rdev = sbp->st_rdev;
7810 #ifndef _POSIX_C_SOURCE
7811 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
7812 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
7813 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
7814 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
7815 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
7816 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
7817 usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
7818 usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
7819 #else
7820 usbp->st_atime = sbp->st_atime;
7821 usbp->st_atimensec = sbp->st_atimensec;
7822 usbp->st_mtime = sbp->st_mtime;
7823 usbp->st_mtimensec = sbp->st_mtimensec;
7824 usbp->st_ctime = sbp->st_ctime;
7825 usbp->st_ctimensec = sbp->st_ctimensec;
7826 usbp->st_birthtime = sbp->st_birthtime;
7827 usbp->st_birthtimensec = sbp->st_birthtimensec;
7828 #endif
7829 usbp->st_size = sbp->st_size;
7830 usbp->st_blocks = sbp->st_blocks;
7831 usbp->st_blksize = sbp->st_blksize;
7832 usbp->st_flags = sbp->st_flags;
7833 usbp->st_gen = sbp->st_gen;
7834 usbp->st_lspare = sbp->st_lspare;
7835 usbp->st_qspare[0] = sbp->st_qspare[0];
7836 usbp->st_qspare[1] = sbp->st_qspare[1];
7837 }