]> git.saurik.com Git - apple/xnu.git/blob - bsd/vfs/vfs_vnops.c
a8fc43f7c406b28937756011c947eeaf61b849ae
[apple/xnu.git] / bsd / vfs / vfs_vnops.c
1 /*
2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30 * Copyright (c) 1982, 1986, 1989, 1993
31 * The Regents of the University of California. All rights reserved.
32 * (c) UNIX System Laboratories, Inc.
33 * All or some portions of this file are derived from material licensed
34 * to the University of California by American Telephone and Telegraph
35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
36 * the permission of UNIX System Laboratories, Inc.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software
47 * must display the following acknowledgement:
48 * This product includes software developed by the University of
49 * California, Berkeley and its contributors.
50 * 4. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 * @(#)vfs_vnops.c 8.14 (Berkeley) 6/15/95
67 *
68 */
69 /*
70 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
71 * support for mandatory and extensible security protections. This notice
72 * is included in support of clause 2.2 (b) of the Apple Public License,
73 * Version 2.0.
74 */
75
76 #include <sys/param.h>
77 #include <sys/types.h>
78 #include <sys/systm.h>
79 #include <sys/kernel.h>
80 #include <sys/file_internal.h>
81 #include <sys/stat.h>
82 #include <sys/proc_internal.h>
83 #include <sys/kauth.h>
84 #include <sys/mount_internal.h>
85 #include <sys/namei.h>
86 #include <sys/vnode_internal.h>
87 #include <sys/ioctl.h>
88 #include <sys/tty.h>
89 /* Temporary workaround for ubc.h until <rdar://4714366 is resolved */
90 #define ubc_setcred ubc_setcred_deprecated
91 #include <sys/ubc.h>
92 #undef ubc_setcred
93 int ubc_setcred(struct vnode *, struct proc *);
94 #include <sys/conf.h>
95 #include <sys/disk.h>
96 #include <sys/fsevents.h>
97 #include <sys/kdebug.h>
98 #include <sys/xattr.h>
99 #include <sys/ubc_internal.h>
100 #include <sys/uio_internal.h>
101 #include <sys/resourcevar.h>
102 #include <sys/signalvar.h>
103
104 #include <vm/vm_kern.h>
105 #include <vm/vm_map.h>
106
107 #include <miscfs/specfs/specdev.h>
108
109 #if CONFIG_MACF
110 #include <security/mac_framework.h>
111 #endif
112
113
114 static int vn_closefile(struct fileglob *fp, vfs_context_t ctx);
115 static int vn_ioctl(struct fileproc *fp, u_long com, caddr_t data,
116 vfs_context_t ctx);
117 static int vn_read(struct fileproc *fp, struct uio *uio, int flags,
118 vfs_context_t ctx);
119 static int vn_write(struct fileproc *fp, struct uio *uio, int flags,
120 vfs_context_t ctx);
121 static int vn_select( struct fileproc *fp, int which, void * wql,
122 vfs_context_t ctx);
123 static int vn_kqfilt_add(struct fileproc *fp, struct knote *kn,
124 vfs_context_t ctx);
125 #if 0
126 static int vn_kqfilt_remove(struct vnode *vp, uintptr_t ident,
127 vfs_context_t ctx);
128 #endif
129
130 struct fileops vnops =
131 { vn_read, vn_write, vn_ioctl, vn_select, vn_closefile, vn_kqfilt_add, NULL };
132
133 /*
134 * Common code for vnode open operations.
135 * Check permissions, and call the VNOP_OPEN or VNOP_CREATE routine.
136 *
137 * XXX the profusion of interfaces here is probably a bad thing.
138 */
139 int
140 vn_open(struct nameidata *ndp, int fmode, int cmode)
141 {
142 return(vn_open_modflags(ndp, &fmode, cmode));
143 }
144
145 int
146 vn_open_modflags(struct nameidata *ndp, int *fmodep, int cmode)
147 {
148 struct vnode_attr va;
149
150 VATTR_INIT(&va);
151 VATTR_SET(&va, va_mode, cmode);
152
153 return(vn_open_auth(ndp, fmodep, &va));
154 }
155
156 /*
157 * Open a file with authorization, updating the contents of the structures
158 * pointed to by ndp, fmodep, and vap as necessary to perform the requested
159 * operation. This function is used for both opens of existing files, and
160 * creation of new files.
161 *
162 * Parameters: ndp The nami data pointer describing the
163 * file
164 * fmodep A pointer to an int containg the mode
165 * information to be used for the open
166 * vap A pointer to the vnode attribute
167 * descriptor to be used for the open
168 *
169 * Indirect: * Contents of the data structures pointed
170 * to by the parameters are modified as
171 * necessary to the requested operation.
172 *
173 * Returns: 0 Success
174 * !0 errno value
175 *
176 * Notes: The kauth_filesec_t in 'vap', if any, is in host byte order.
177 *
178 * The contents of '*ndp' will be modified, based on the other
179 * arguments to this function, and to return file and directory
180 * data necessary to satisfy the requested operation.
181 *
182 * If the file does not exist and we are creating it, then the
183 * O_TRUNC flag will be cleared in '*fmodep' to indicate to the
184 * caller that the file was not truncated.
185 *
186 * If the file exists and the O_EXCL flag was not specified, then
187 * the O_CREAT flag will be cleared in '*fmodep' to indicate to
188 * the caller that the existing file was merely opened rather
189 * than created.
190 *
191 * The contents of '*vap' will be modified as necessary to
192 * complete the operation, including setting of supported
193 * attribute, clearing of fields containing unsupported attributes
194 * in the request, if the request proceeds without them, etc..
195 *
196 * XXX: This function is too complicated in actings on its arguments
197 *
198 * XXX: We should enummerate the possible errno values here, and where
199 * in the code they originated.
200 */
201 int
202 vn_open_auth(struct nameidata *ndp, int *fmodep, struct vnode_attr *vap)
203 {
204 struct vnode *vp;
205 struct vnode *dvp;
206 vfs_context_t ctx = ndp->ni_cnd.cn_context;
207 int error;
208 int fmode;
209 kauth_action_t action;
210
211 again:
212 vp = NULL;
213 dvp = NULL;
214 fmode = *fmodep;
215 if (fmode & O_CREAT) {
216 if ( (fmode & O_DIRECTORY) ) {
217 error = EINVAL;
218 goto out;
219 }
220 ndp->ni_cnd.cn_nameiop = CREATE;
221 /* Inherit USEDVP flag only */
222 ndp->ni_cnd.cn_flags &= USEDVP;
223 ndp->ni_cnd.cn_flags |= LOCKPARENT | LOCKLEAF | AUDITVNPATH1;
224 #if NAMEDRSRCFORK
225 /* open calls are allowed for resource forks. */
226 ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
227 #endif
228 if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0)
229 ndp->ni_cnd.cn_flags |= FOLLOW;
230 if ( (error = namei(ndp)) )
231 goto out;
232 dvp = ndp->ni_dvp;
233 vp = ndp->ni_vp;
234
235 /* not found, create */
236 if (vp == NULL) {
237 /* must have attributes for a new file */
238 if (vap == NULL) {
239 error = EINVAL;
240 goto badcreate;
241 }
242
243 VATTR_SET(vap, va_type, VREG);
244 #if CONFIG_MACF
245 error = mac_vnode_check_create(ctx,
246 dvp, &ndp->ni_cnd, vap);
247 if (error)
248 goto badcreate;
249 #endif /* MAC */
250
251 /* authorize before creating */
252 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
253 goto badcreate;
254
255 if (fmode & O_EXCL)
256 vap->va_vaflags |= VA_EXCLUSIVE;
257 #if NAMEDRSRCFORK
258 if (ndp->ni_cnd.cn_flags & CN_WANTSRSRCFORK) {
259 if ((error = vnode_makenamedstream(dvp, &ndp->ni_vp, XATTR_RESOURCEFORK_NAME, 0, ctx)) != 0)
260 goto badcreate;
261 } else
262 #endif
263 if ((error = vn_create(dvp, &ndp->ni_vp, &ndp->ni_cnd, vap, 0, ctx)) != 0)
264 goto badcreate;
265
266 vp = ndp->ni_vp;
267
268 if (vp) {
269 int update_flags = 0;
270
271 // Make sure the name & parent pointers are hooked up
272 if (vp->v_name == NULL)
273 update_flags |= VNODE_UPDATE_NAME;
274 if (vp->v_parent == NULLVP)
275 update_flags |= VNODE_UPDATE_PARENT;
276
277 if (update_flags)
278 vnode_update_identity(vp, dvp, ndp->ni_cnd.cn_nameptr, ndp->ni_cnd.cn_namelen, ndp->ni_cnd.cn_hash, update_flags);
279
280 #if CONFIG_FSE
281 if (need_fsevent(FSE_CREATE_FILE, vp)) {
282 add_fsevent(FSE_CREATE_FILE, ctx,
283 FSE_ARG_VNODE, vp,
284 FSE_ARG_DONE);
285 }
286 #endif
287
288 }
289 /*
290 * nameidone has to happen before we vnode_put(dvp)
291 * and clear the ni_dvp field, since it may need
292 * to release the fs_nodelock on the dvp
293 */
294 badcreate:
295 nameidone(ndp);
296 ndp->ni_dvp = NULL;
297 vnode_put(dvp);
298
299 if (error) {
300 /*
301 * Check for a creation race.
302 */
303 if ((error == EEXIST) && !(fmode & O_EXCL)) {
304 goto again;
305 }
306 goto bad;
307 }
308 fmode &= ~O_TRUNC;
309 } else {
310 nameidone(ndp);
311 ndp->ni_dvp = NULL;
312 vnode_put(dvp);
313
314 if (fmode & O_EXCL) {
315 error = EEXIST;
316 goto bad;
317 }
318 fmode &= ~O_CREAT;
319 }
320 } else {
321 ndp->ni_cnd.cn_nameiop = LOOKUP;
322 /* Inherit USEDVP flag only */
323 ndp->ni_cnd.cn_flags &= USEDVP;
324 ndp->ni_cnd.cn_flags |= FOLLOW | LOCKLEAF | AUDITVNPATH1;
325 #if NAMEDRSRCFORK
326 /* open calls are allowed for resource forks. */
327 ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
328 #endif
329 if (fmode & O_NOFOLLOW || fmode & O_SYMLINK) {
330 ndp->ni_cnd.cn_flags &= ~FOLLOW;
331 }
332
333 if ( (error = namei(ndp)) )
334 goto out;
335 vp = ndp->ni_vp;
336 nameidone(ndp);
337 ndp->ni_dvp = NULL;
338
339 if ( (fmode & O_DIRECTORY) && vp->v_type != VDIR ) {
340 error = ENOTDIR;
341 goto bad;
342 }
343 }
344
345 if (vp->v_type == VSOCK && vp->v_tag != VT_FDESC) {
346 error = EOPNOTSUPP; /* Operation not supported on socket */
347 goto bad;
348 }
349
350 if (vp->v_type == VLNK && (fmode & O_NOFOLLOW) != 0) {
351 error = ELOOP; /* O_NOFOLLOW was specified and the target is a symbolic link */
352 goto bad;
353 }
354
355 /* authorize open of an existing file */
356 if ((fmode & O_CREAT) == 0) {
357
358 /* disallow write operations on directories */
359 if (vnode_isdir(vp) && (fmode & (FWRITE | O_TRUNC))) {
360 error = EISDIR;
361 goto bad;
362 }
363
364 #if CONFIG_MACF
365 error = mac_vnode_check_open(ctx, vp, fmode);
366 if (error)
367 goto bad;
368 #endif
369
370 /* compute action to be authorized */
371 action = 0;
372 if (fmode & FREAD) {
373 action |= KAUTH_VNODE_READ_DATA;
374 }
375 if (fmode & (FWRITE | O_TRUNC)) {
376 /*
377 * If we are writing, appending, and not truncating,
378 * indicate that we are appending so that if the
379 * UF_APPEND or SF_APPEND bits are set, we do not deny
380 * the open.
381 */
382 if ((fmode & O_APPEND) && !(fmode & O_TRUNC)) {
383 action |= KAUTH_VNODE_APPEND_DATA;
384 } else {
385 action |= KAUTH_VNODE_WRITE_DATA;
386 }
387 }
388 if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)
389 goto bad;
390
391
392 //
393 // if the vnode is tagged VOPENEVT and the current process
394 // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY
395 // flag to the open mode so that this open won't count against
396 // the vnode when carbon delete() does a vnode_isinuse() to see
397 // if a file is currently in use. this allows spotlight
398 // importers to not interfere with carbon apps that depend on
399 // the no-delete-if-busy semantics of carbon delete().
400 //
401 if ((vp->v_flag & VOPENEVT) && (current_proc()->p_flag & P_CHECKOPENEVT)) {
402 fmode |= O_EVTONLY;
403 }
404
405 }
406
407 if ( (error = VNOP_OPEN(vp, fmode, ctx)) ) {
408 goto bad;
409 }
410 if ( (error = vnode_ref_ext(vp, fmode)) ) {
411 goto bad2;
412 }
413
414 /* call out to allow 3rd party notification of open.
415 * Ignore result of kauth_authorize_fileop call.
416 */
417 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN,
418 (uintptr_t)vp, 0);
419
420 *fmodep = fmode;
421 return (0);
422 bad2:
423 VNOP_CLOSE(vp, fmode, ctx);
424 bad:
425 ndp->ni_vp = NULL;
426 if (vp) {
427 vnode_put(vp);
428 /*
429 * Check for a race against unlink. We had a vnode
430 * but according to vnode_authorize or VNOP_OPEN it
431 * no longer exists.
432 *
433 * EREDRIVEOPEN: means that we were hit by the tty allocation race.
434 */
435 if (((error == ENOENT) && (*fmodep & O_CREAT)) || (error == EREDRIVEOPEN)) {
436 goto again;
437 }
438 }
439 out:
440 return (error);
441 }
442
443 #if vn_access_DEPRECATED
444 /*
445 * Authorize an action against a vnode. This has been the canonical way to
446 * ensure that the credential/process/etc. referenced by a vfs_context
447 * is granted the rights called out in 'mode' against the vnode 'vp'.
448 *
449 * Unfortunately, the use of VREAD/VWRITE/VEXEC makes it very difficult
450 * to add support for more rights. As such, this interface will be deprecated
451 * and callers will use vnode_authorize instead.
452 */
453 int
454 vn_access(vnode_t vp, int mode, vfs_context_t context)
455 {
456 kauth_action_t action;
457
458 action = 0;
459 if (mode & VREAD)
460 action |= KAUTH_VNODE_READ_DATA;
461 if (mode & VWRITE)
462 action |= KAUTH_VNODE_WRITE_DATA;
463 if (mode & VEXEC)
464 action |= KAUTH_VNODE_EXECUTE;
465
466 return(vnode_authorize(vp, NULL, action, context));
467 }
468 #endif /* vn_access_DEPRECATED */
469
470 /*
471 * Vnode close call
472 */
473 int
474 vn_close(struct vnode *vp, int flags, vfs_context_t ctx)
475 {
476 int error;
477
478 #if CONFIG_FSE
479 if (flags & FWASWRITTEN) {
480 if (need_fsevent(FSE_CONTENT_MODIFIED, vp)) {
481 add_fsevent(FSE_CONTENT_MODIFIED, ctx,
482 FSE_ARG_VNODE, vp,
483 FSE_ARG_DONE);
484 }
485 }
486 #endif
487
488 #if NAMEDRSRCFORK
489 /* Sync data from resource fork shadow file if needed. */
490 if ((vp->v_flag & VISNAMEDSTREAM) &&
491 (vp->v_parent != NULLVP) &&
492 !(vp->v_parent->v_mount->mnt_kern_flag & MNTK_NAMED_STREAMS)) {
493 if (flags & FWASWRITTEN) {
494 (void) vnode_flushnamedstream(vp->v_parent, vp, ctx);
495 }
496 }
497 #endif
498
499 /* work around for foxhound */
500 if (vp->v_type == VBLK)
501 (void)vnode_rele_ext(vp, flags, 0);
502
503 error = VNOP_CLOSE(vp, flags, ctx);
504
505 if (vp->v_type != VBLK)
506 (void)vnode_rele_ext(vp, flags, 0);
507
508 return (error);
509 }
510
511 static int
512 vn_read_swapfile(
513 struct vnode *vp,
514 uio_t uio)
515 {
516 static char *swap_read_zero_page = NULL;
517 int error;
518 off_t swap_count, this_count;
519 off_t file_end, read_end;
520 off_t prev_resid;
521
522 /*
523 * Reading from a swap file will get you all zeroes.
524 */
525 error = 0;
526 swap_count = uio_resid(uio);
527
528 file_end = ubc_getsize(vp);
529 read_end = uio->uio_offset + uio_resid(uio);
530 if (uio->uio_offset >= file_end) {
531 /* uio starts after end of file: nothing to read */
532 swap_count = 0;
533 } else if (read_end > file_end) {
534 /* uio extends beyond end of file: stop before that */
535 swap_count -= (read_end - file_end);
536 }
537
538 while (swap_count > 0) {
539 if (swap_read_zero_page == NULL) {
540 char *my_zero_page;
541 int funnel_state;
542
543 /*
544 * Take kernel funnel so that only one thread
545 * sets up "swap_read_zero_page".
546 */
547 funnel_state = thread_funnel_set(kernel_flock, TRUE);
548
549 if (swap_read_zero_page == NULL) {
550 MALLOC(my_zero_page, char *, PAGE_SIZE,
551 M_TEMP, M_WAITOK);
552 memset(my_zero_page, '?', PAGE_SIZE);
553 /*
554 * Adding a newline character here
555 * and there prevents "less(1)", for
556 * example, from getting too confused
557 * about a file with one really really
558 * long line.
559 */
560 my_zero_page[PAGE_SIZE-1] = '\n';
561 if (swap_read_zero_page == NULL) {
562 swap_read_zero_page = my_zero_page;
563 } else {
564 FREE(my_zero_page, M_TEMP);
565 }
566 } else {
567 /*
568 * Someone else raced us here and won;
569 * just use their page.
570 */
571 }
572 thread_funnel_set(kernel_flock, funnel_state);
573 }
574
575 this_count = swap_count;
576 if (this_count > PAGE_SIZE) {
577 this_count = PAGE_SIZE;
578 }
579
580 prev_resid = uio_resid(uio);
581 error = uiomove((caddr_t) swap_read_zero_page,
582 this_count,
583 uio);
584 if (error) {
585 break;
586 }
587 swap_count -= (prev_resid - uio_resid(uio));
588 }
589
590 return error;
591 }
592 /*
593 * Package up an I/O request on a vnode into a uio and do it.
594 */
595 int
596 vn_rdwr(
597 enum uio_rw rw,
598 struct vnode *vp,
599 caddr_t base,
600 int len,
601 off_t offset,
602 enum uio_seg segflg,
603 int ioflg,
604 kauth_cred_t cred,
605 int *aresid,
606 proc_t p)
607 {
608 return vn_rdwr_64(rw,
609 vp,
610 (uint64_t)(uintptr_t)base,
611 (int64_t)len,
612 offset,
613 segflg,
614 ioflg,
615 cred,
616 aresid,
617 p);
618 }
619
620
621 int
622 vn_rdwr_64(
623 enum uio_rw rw,
624 struct vnode *vp,
625 uint64_t base,
626 int64_t len,
627 off_t offset,
628 enum uio_seg segflg,
629 int ioflg,
630 kauth_cred_t cred,
631 int *aresid,
632 proc_t p)
633 {
634 uio_t auio;
635 int spacetype;
636 struct vfs_context context;
637 int error=0;
638 char uio_buf[ UIO_SIZEOF(1) ];
639
640 context.vc_thread = current_thread();
641 context.vc_ucred = cred;
642
643 if (UIO_SEG_IS_USER_SPACE(segflg)) {
644 spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
645 }
646 else {
647 spacetype = UIO_SYSSPACE;
648 }
649 auio = uio_createwithbuffer(1, offset, spacetype, rw,
650 &uio_buf[0], sizeof(uio_buf));
651 uio_addiov(auio, base, len);
652
653 #if CONFIG_MACF
654 /* XXXMAC
655 * IO_NOAUTH should be re-examined.
656 * Likely that mediation should be performed in caller.
657 */
658 if ((ioflg & IO_NOAUTH) == 0) {
659 /* passed cred is fp->f_cred */
660 if (rw == UIO_READ)
661 error = mac_vnode_check_read(&context, cred, vp);
662 else
663 error = mac_vnode_check_write(&context, cred, vp);
664 }
665 #endif
666
667 if (error == 0) {
668 if (rw == UIO_READ) {
669 if (vp->v_flag & VSWAP) {
670 error = vn_read_swapfile(vp, auio);
671 } else {
672 error = VNOP_READ(vp, auio, ioflg, &context);
673 }
674 } else {
675 error = VNOP_WRITE(vp, auio, ioflg, &context);
676 }
677 }
678
679 if (aresid)
680 // LP64todo - fix this
681 *aresid = uio_resid(auio);
682 else
683 if (uio_resid(auio) && error == 0)
684 error = EIO;
685 return (error);
686 }
687
688 /*
689 * File table vnode read routine.
690 */
691 static int
692 vn_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
693 {
694 struct vnode *vp;
695 int error, ioflag;
696 off_t count;
697
698 vp = (struct vnode *)fp->f_fglob->fg_data;
699 if ( (error = vnode_getwithref(vp)) ) {
700 return(error);
701 }
702
703 #if CONFIG_MACF
704 error = mac_vnode_check_read(ctx, vfs_context_ucred(ctx), vp);
705 if (error) {
706 (void)vnode_put(vp);
707 return (error);
708 }
709 #endif
710
711 ioflag = 0;
712 if (fp->f_fglob->fg_flag & FNONBLOCK)
713 ioflag |= IO_NDELAY;
714 if ((fp->f_fglob->fg_flag & FNOCACHE) || vnode_isnocache(vp))
715 ioflag |= IO_NOCACHE;
716 if (fp->f_fglob->fg_flag & FNORDAHEAD)
717 ioflag |= IO_RAOFF;
718
719 if ((flags & FOF_OFFSET) == 0)
720 uio->uio_offset = fp->f_fglob->fg_offset;
721 count = uio_resid(uio);
722
723 if (vp->v_flag & VSWAP) {
724 /* special case for swap files */
725 error = vn_read_swapfile(vp, uio);
726 } else {
727 error = VNOP_READ(vp, uio, ioflag, ctx);
728 }
729 if ((flags & FOF_OFFSET) == 0)
730 fp->f_fglob->fg_offset += count - uio_resid(uio);
731
732 (void)vnode_put(vp);
733 return (error);
734 }
735
736
737 /*
738 * File table vnode write routine.
739 */
740 static int
741 vn_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
742 {
743 struct vnode *vp;
744 int error, ioflag;
745 off_t count;
746 int clippedsize = 0;
747 int partialwrite=0;
748 int residcount, oldcount;
749 proc_t p = vfs_context_proc(ctx);
750
751 count = 0;
752 vp = (struct vnode *)fp->f_fglob->fg_data;
753 if ( (error = vnode_getwithref(vp)) ) {
754 return(error);
755 }
756
757 #if CONFIG_MACF
758 error = mac_vnode_check_write(ctx, vfs_context_ucred(ctx), vp);
759 if (error) {
760 (void)vnode_put(vp);
761 return (error);
762 }
763 #endif
764
765 ioflag = IO_UNIT;
766 if (vp->v_type == VREG && (fp->f_fglob->fg_flag & O_APPEND))
767 ioflag |= IO_APPEND;
768 if (fp->f_fglob->fg_flag & FNONBLOCK)
769 ioflag |= IO_NDELAY;
770 if ((fp->f_fglob->fg_flag & FNOCACHE) || vnode_isnocache(vp))
771 ioflag |= IO_NOCACHE;
772 if ((fp->f_fglob->fg_flag & O_FSYNC) ||
773 (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS)))
774 ioflag |= IO_SYNC;
775
776 if ((flags & FOF_OFFSET) == 0) {
777 uio->uio_offset = fp->f_fglob->fg_offset;
778 count = uio_resid(uio);
779 }
780 if (((flags & FOF_OFFSET) == 0) &&
781 vfs_context_proc(ctx) && (vp->v_type == VREG) &&
782 (((rlim_t)(uio->uio_offset + uio_uio_resid(uio)) > p->p_rlimit[RLIMIT_FSIZE].rlim_cur) ||
783 ((rlim_t)uio_uio_resid(uio) > (p->p_rlimit[RLIMIT_FSIZE].rlim_cur - uio->uio_offset)))) {
784 /*
785 * If the requested residual would cause us to go past the
786 * administrative limit, then we need to adjust the residual
787 * down to cause fewer bytes than requested to be written. If
788 * we can't do that (e.g. the residual is already 1 byte),
789 * then we fail the write with EFBIG.
790 */
791 residcount = uio_uio_resid(uio);
792 if ((rlim_t)(uio->uio_offset + uio_uio_resid(uio)) > p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
793 clippedsize = (uio->uio_offset + uio_uio_resid(uio)) - p->p_rlimit[RLIMIT_FSIZE].rlim_cur;
794 } else if ((rlim_t)uio_uio_resid(uio) > (p->p_rlimit[RLIMIT_FSIZE].rlim_cur - uio->uio_offset)) {
795 clippedsize = (p->p_rlimit[RLIMIT_FSIZE].rlim_cur - uio->uio_offset);
796 }
797 if (clippedsize >= residcount) {
798 psignal(p, SIGXFSZ);
799 vnode_put(vp);
800 return (EFBIG);
801 }
802 partialwrite = 1;
803 uio_setresid(uio, residcount-clippedsize);
804 }
805 if ((flags & FOF_OFFSET) != 0) {
806 /* for pwrite, append should be ignored */
807 ioflag &= ~IO_APPEND;
808 if (p && (vp->v_type == VREG) &&
809 ((rlim_t)uio->uio_offset >= p->p_rlimit[RLIMIT_FSIZE].rlim_cur)) {
810 psignal(p, SIGXFSZ);
811 vnode_put(vp);
812 return (EFBIG);
813 }
814 if (p && (vp->v_type == VREG) &&
815 ((rlim_t)(uio->uio_offset + uio_uio_resid(uio)) > p->p_rlimit[RLIMIT_FSIZE].rlim_cur)) {
816 //Debugger("vn_bwrite:overstepping the bounds");
817 residcount = uio_uio_resid(uio);
818 clippedsize = (uio->uio_offset + uio_uio_resid(uio)) - p->p_rlimit[RLIMIT_FSIZE].rlim_cur;
819 partialwrite = 1;
820 uio_setresid(uio, residcount-clippedsize);
821 }
822 }
823
824 error = VNOP_WRITE(vp, uio, ioflag, ctx);
825
826 if (partialwrite) {
827 oldcount = uio_resid(uio);
828 uio_setresid(uio, oldcount + clippedsize);
829 }
830
831 if ((flags & FOF_OFFSET) == 0) {
832 if (ioflag & IO_APPEND)
833 fp->f_fglob->fg_offset = uio->uio_offset;
834 else
835 fp->f_fglob->fg_offset += count - uio_resid(uio);
836 }
837
838 /*
839 * Set the credentials on successful writes
840 */
841 if ((error == 0) && (vp->v_tag == VT_NFS) && (UBCINFOEXISTS(vp))) {
842 /*
843 * When called from aio subsystem, we only have the proc from
844 * which to get the credential, at this point, so use that
845 * instead. This means aio functions are incompatible with
846 * per-thread credentials (aio operations are proxied). We
847 * can't easily correct the aio vs. settid race in this case
848 * anyway, so we disallow it.
849 */
850 if ((flags & FOF_PCRED) == 0) {
851 ubc_setthreadcred(vp, p, current_thread());
852 } else {
853 ubc_setcred(vp, p);
854 }
855 }
856 (void)vnode_put(vp);
857 return (error);
858 }
859
860 /*
861 * File table vnode stat routine.
862 *
863 * Returns: 0 Success
864 * EBADF
865 * ENOMEM
866 * vnode_getattr:???
867 */
868 int
869 vn_stat_noauth(struct vnode *vp, void *sbptr, kauth_filesec_t *xsec, int isstat64, vfs_context_t ctx)
870 {
871 struct vnode_attr va;
872 int error;
873 u_short mode;
874 kauth_filesec_t fsec;
875 struct stat *sb = (struct stat *)0; /* warning avoidance ; protected by isstat64 */
876 struct stat64 * sb64 = (struct stat64 *)0; /* warning avoidance ; protected by isstat64 */
877
878 if (isstat64 != 0)
879 sb64 = (struct stat64 *)sbptr;
880 else
881 sb = (struct stat *)sbptr;
882
883 VATTR_INIT(&va);
884 VATTR_WANTED(&va, va_fsid);
885 VATTR_WANTED(&va, va_fileid);
886 VATTR_WANTED(&va, va_mode);
887 VATTR_WANTED(&va, va_type);
888 VATTR_WANTED(&va, va_nlink);
889 VATTR_WANTED(&va, va_uid);
890 VATTR_WANTED(&va, va_gid);
891 VATTR_WANTED(&va, va_rdev);
892 VATTR_WANTED(&va, va_data_size);
893 VATTR_WANTED(&va, va_access_time);
894 VATTR_WANTED(&va, va_modify_time);
895 VATTR_WANTED(&va, va_change_time);
896 VATTR_WANTED(&va, va_create_time);
897 VATTR_WANTED(&va, va_flags);
898 VATTR_WANTED(&va, va_gen);
899 VATTR_WANTED(&va, va_iosize);
900 /* lower layers will synthesise va_total_alloc from va_data_size if required */
901 VATTR_WANTED(&va, va_total_alloc);
902 if (xsec != NULL) {
903 VATTR_WANTED(&va, va_uuuid);
904 VATTR_WANTED(&va, va_guuid);
905 VATTR_WANTED(&va, va_acl);
906 }
907 error = vnode_getattr(vp, &va, ctx);
908 if (error)
909 goto out;
910 /*
911 * Copy from vattr table
912 */
913 if (isstat64 != 0) {
914 sb64->st_dev = va.va_fsid;
915 sb64->st_ino = (ino64_t)va.va_fileid;
916
917 } else {
918 sb->st_dev = va.va_fsid;
919 sb->st_ino = (ino_t)va.va_fileid;
920 }
921 mode = va.va_mode;
922 switch (vp->v_type) {
923 case VREG:
924 mode |= S_IFREG;
925 break;
926 case VDIR:
927 mode |= S_IFDIR;
928 break;
929 case VBLK:
930 mode |= S_IFBLK;
931 break;
932 case VCHR:
933 mode |= S_IFCHR;
934 break;
935 case VLNK:
936 mode |= S_IFLNK;
937 break;
938 case VSOCK:
939 mode |= S_IFSOCK;
940 break;
941 case VFIFO:
942 mode |= S_IFIFO;
943 break;
944 default:
945 error = EBADF;
946 goto out;
947 };
948 if (isstat64 != 0) {
949 sb64->st_mode = mode;
950 sb64->st_nlink = VATTR_IS_SUPPORTED(&va, va_nlink) ? (u_int16_t)va.va_nlink : 1;
951 sb64->st_uid = va.va_uid;
952 sb64->st_gid = va.va_gid;
953 sb64->st_rdev = va.va_rdev;
954 sb64->st_size = va.va_data_size;
955 sb64->st_atimespec = va.va_access_time;
956 sb64->st_mtimespec = va.va_modify_time;
957 sb64->st_ctimespec = va.va_change_time;
958 sb64->st_birthtimespec =
959 VATTR_IS_SUPPORTED(&va, va_create_time) ? va.va_create_time : va.va_change_time;
960 sb64->st_blksize = va.va_iosize;
961 sb64->st_flags = va.va_flags;
962 sb64->st_blocks = roundup(va.va_total_alloc, 512) / 512;
963 } else {
964 sb->st_mode = mode;
965 sb->st_nlink = VATTR_IS_SUPPORTED(&va, va_nlink) ? (u_int16_t)va.va_nlink : 1;
966 sb->st_uid = va.va_uid;
967 sb->st_gid = va.va_gid;
968 sb->st_rdev = va.va_rdev;
969 sb->st_size = va.va_data_size;
970 sb->st_atimespec = va.va_access_time;
971 sb->st_mtimespec = va.va_modify_time;
972 sb->st_ctimespec = va.va_change_time;
973 sb->st_blksize = va.va_iosize;
974 sb->st_flags = va.va_flags;
975 sb->st_blocks = roundup(va.va_total_alloc, 512) / 512;
976 }
977
978 /* if we're interested in exended security data and we got an ACL */
979 if (xsec != NULL) {
980 if (!VATTR_IS_SUPPORTED(&va, va_acl) &&
981 !VATTR_IS_SUPPORTED(&va, va_uuuid) &&
982 !VATTR_IS_SUPPORTED(&va, va_guuid)) {
983 *xsec = KAUTH_FILESEC_NONE;
984 } else {
985
986 if (VATTR_IS_SUPPORTED(&va, va_acl) && (va.va_acl != NULL)) {
987 fsec = kauth_filesec_alloc(va.va_acl->acl_entrycount);
988 } else {
989 fsec = kauth_filesec_alloc(0);
990 }
991 if (fsec == NULL) {
992 error = ENOMEM;
993 goto out;
994 }
995 fsec->fsec_magic = KAUTH_FILESEC_MAGIC;
996 if (VATTR_IS_SUPPORTED(&va, va_uuuid)) {
997 fsec->fsec_owner = va.va_uuuid;
998 } else {
999 fsec->fsec_owner = kauth_null_guid;
1000 }
1001 if (VATTR_IS_SUPPORTED(&va, va_guuid)) {
1002 fsec->fsec_group = va.va_guuid;
1003 } else {
1004 fsec->fsec_group = kauth_null_guid;
1005 }
1006 if (VATTR_IS_SUPPORTED(&va, va_acl) && (va.va_acl != NULL)) {
1007 bcopy(va.va_acl, &(fsec->fsec_acl), KAUTH_ACL_COPYSIZE(va.va_acl));
1008 } else {
1009 fsec->fsec_acl.acl_entrycount = KAUTH_FILESEC_NOACL;
1010 }
1011 *xsec = fsec;
1012 }
1013 }
1014
1015 /* Do not give the generation number out to unpriviledged users */
1016 if (va.va_gen && !vfs_context_issuser(ctx)) {
1017 if (isstat64 != 0)
1018 sb64->st_gen = 0;
1019 else
1020 sb->st_gen = 0;
1021 } else {
1022 if (isstat64 != 0)
1023 sb64->st_gen = va.va_gen;
1024 else
1025 sb->st_gen = va.va_gen;
1026 }
1027
1028 error = 0;
1029 out:
1030 if (VATTR_IS_SUPPORTED(&va, va_acl) && va.va_acl != NULL)
1031 kauth_acl_free(va.va_acl);
1032 return (error);
1033 }
1034
1035 int
1036 vn_stat(struct vnode *vp, void *sb, kauth_filesec_t *xsec, int isstat64, vfs_context_t ctx)
1037 {
1038 int error;
1039
1040 #if CONFIG_MACF
1041 error = mac_vnode_check_stat(ctx, NOCRED, vp);
1042 if (error)
1043 return (error);
1044 #endif
1045
1046 /* authorize */
1047 if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_ATTRIBUTES | KAUTH_VNODE_READ_SECURITY, ctx)) != 0)
1048 return(error);
1049
1050 /* actual stat */
1051 return(vn_stat_noauth(vp, sb, xsec, isstat64, ctx));
1052 }
1053
1054
1055 /*
1056 * File table vnode ioctl routine.
1057 */
1058 static int
1059 vn_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx)
1060 {
1061 struct vnode *vp = ((struct vnode *)fp->f_fglob->fg_data);
1062 off_t file_size;
1063 int error;
1064 struct vnode *ttyvp;
1065 int funnel_state;
1066 struct session * sessp;
1067
1068 if ( (error = vnode_getwithref(vp)) ) {
1069 return(error);
1070 }
1071
1072 #if CONFIG_MACF
1073 error = mac_vnode_check_ioctl(ctx, vp, com);
1074 if (error)
1075 goto out;
1076 #endif
1077
1078 switch (vp->v_type) {
1079 case VREG:
1080 case VDIR:
1081 if (com == FIONREAD) {
1082 if ((error = vnode_size(vp, &file_size, ctx)) != 0)
1083 goto out;
1084 *(int *)data = file_size - fp->f_fglob->fg_offset;
1085 goto out;
1086 }
1087 if (com == FIONBIO || com == FIOASYNC) { /* XXX */
1088 goto out;
1089 }
1090 /* fall into ... */
1091
1092 default:
1093 error = ENOTTY;
1094 goto out;
1095
1096 case VFIFO:
1097 case VCHR:
1098 case VBLK:
1099
1100 /* Should not be able to set block size from user space */
1101 if (com == DKIOCSETBLOCKSIZE) {
1102 error = EPERM;
1103 goto out;
1104 }
1105
1106 if (com == FIODTYPE) {
1107 if (vp->v_type == VBLK) {
1108 if (major(vp->v_rdev) >= nblkdev) {
1109 error = ENXIO;
1110 goto out;
1111 }
1112 *(int *)data = bdevsw[major(vp->v_rdev)].d_type;
1113
1114 } else if (vp->v_type == VCHR) {
1115 if (major(vp->v_rdev) >= nchrdev) {
1116 error = ENXIO;
1117 goto out;
1118 }
1119 *(int *)data = cdevsw[major(vp->v_rdev)].d_type;
1120 } else {
1121 error = ENOTTY;
1122 goto out;
1123 }
1124 goto out;
1125 }
1126 error = VNOP_IOCTL(vp, com, data, fp->f_fglob->fg_flag, ctx);
1127
1128 if (error == 0 && com == TIOCSCTTY) {
1129 vnode_ref(vp);
1130
1131 funnel_state = thread_funnel_set(kernel_flock, TRUE);
1132 sessp = proc_session(vfs_context_proc(ctx));
1133
1134 session_lock(sessp);
1135 ttyvp = sessp->s_ttyvp;
1136 sessp->s_ttyvp = vp;
1137 sessp->s_ttyvid = vnode_vid(vp);
1138 session_unlock(sessp);
1139 session_rele(sessp);
1140 thread_funnel_set(kernel_flock, funnel_state);
1141
1142 if (ttyvp)
1143 vnode_rele(ttyvp);
1144 }
1145 }
1146 out:
1147 (void)vnode_put(vp);
1148 return(error);
1149 }
1150
1151 /*
1152 * File table vnode select routine.
1153 */
1154 static int
1155 vn_select(struct fileproc *fp, int which, void *wql, __unused vfs_context_t ctx)
1156 {
1157 int error;
1158 struct vnode * vp = (struct vnode *)fp->f_fglob->fg_data;
1159 struct vfs_context context;
1160
1161 if ( (error = vnode_getwithref(vp)) == 0 ) {
1162 context.vc_thread = current_thread();
1163 context.vc_ucred = fp->f_fglob->fg_cred;
1164
1165 #if CONFIG_MACF
1166 /*
1167 * XXX We should use a per thread credential here; minimally,
1168 * XXX the process credential should have a persistent
1169 * XXX reference on it before being passed in here.
1170 */
1171 error = mac_vnode_check_select(ctx, vp, which);
1172 if (error == 0)
1173 #endif
1174 error = VNOP_SELECT(vp, which, fp->f_fglob->fg_flag, wql, ctx);
1175
1176 (void)vnode_put(vp);
1177 }
1178 return(error);
1179
1180 }
1181
1182 /*
1183 * Check that the vnode is still valid, and if so
1184 * acquire requested lock.
1185 */
1186 int
1187 vn_lock(__unused vnode_t vp, __unused int flags, __unused proc_t p)
1188 {
1189 return (0);
1190 }
1191
1192 /*
1193 * File table vnode close routine.
1194 */
1195 static int
1196 vn_closefile(struct fileglob *fg, vfs_context_t ctx)
1197 {
1198 struct vnode *vp = (struct vnode *)fg->fg_data;
1199 int error;
1200 struct flock lf;
1201
1202 if ( (error = vnode_getwithref(vp)) == 0 ) {
1203
1204 if ((fg->fg_flag & FHASLOCK) && fg->fg_type == DTYPE_VNODE) {
1205 lf.l_whence = SEEK_SET;
1206 lf.l_start = 0;
1207 lf.l_len = 0;
1208 lf.l_type = F_UNLCK;
1209
1210 (void)VNOP_ADVLOCK(vp, (caddr_t)fg, F_UNLCK, &lf, F_FLOCK, ctx);
1211 }
1212 error = vn_close(vp, fg->fg_flag, ctx);
1213
1214 (void)vnode_put(vp);
1215 }
1216 return(error);
1217 }
1218
1219 /*
1220 * Returns: 0 Success
1221 * VNOP_PATHCONF:???
1222 */
1223 int
1224 vn_pathconf(vnode_t vp, int name, register_t *retval, vfs_context_t ctx)
1225 {
1226 int error = 0;
1227
1228 switch(name) {
1229 case _PC_EXTENDED_SECURITY_NP:
1230 *retval = vfs_extendedsecurity(vnode_mount(vp)) ? 1 : 0;
1231 break;
1232 case _PC_AUTH_OPAQUE_NP:
1233 *retval = vfs_authopaque(vnode_mount(vp));
1234 break;
1235 case _PC_2_SYMLINKS:
1236 *retval = 1; /* XXX NOTSUP on MSDOS, etc. */
1237 break;
1238 case _PC_ALLOC_SIZE_MIN:
1239 *retval = 1; /* XXX lie: 1 byte */
1240 break;
1241 case _PC_ASYNC_IO: /* unistd.h: _POSIX_ASYNCHRONUS_IO */
1242 *retval = 1; /* [AIO] option is supported */
1243 break;
1244 case _PC_PRIO_IO: /* unistd.h: _POSIX_PRIORITIZED_IO */
1245 *retval = 0; /* [PIO] option is not supported */
1246 break;
1247 case _PC_REC_INCR_XFER_SIZE:
1248 *retval = 4096; /* XXX go from MIN to MAX 4K at a time */
1249 break;
1250 case _PC_REC_MIN_XFER_SIZE:
1251 *retval = 4096; /* XXX recommend 4K minimum reads/writes */
1252 break;
1253 case _PC_REC_MAX_XFER_SIZE:
1254 *retval = 65536; /* XXX recommend 64K maximum reads/writes */
1255 break;
1256 case _PC_REC_XFER_ALIGN:
1257 *retval = 4096; /* XXX recommend page aligned buffers */
1258 break;
1259 case _PC_SYMLINK_MAX:
1260 *retval = 255; /* Minimum acceptable POSIX value */
1261 break;
1262 case _PC_SYNC_IO: /* unistd.h: _POSIX_SYNCHRONIZED_IO */
1263 *retval = 0; /* [SIO] option is not supported */
1264 break;
1265 default:
1266 error = VNOP_PATHCONF(vp, name, retval, ctx);
1267 break;
1268 }
1269
1270 return (error);
1271 }
1272
1273 static int
1274 vn_kqfilt_add(struct fileproc *fp, struct knote *kn, vfs_context_t ctx)
1275 {
1276 struct vnode *vp = (struct vnode *)fp->f_fglob->fg_data;
1277 int error;
1278 int funnel_state;
1279
1280 if ( (error = vnode_getwithref(vp)) == 0 ) {
1281
1282 #if CONFIG_MACF
1283 error = mac_vnode_check_kqfilter(ctx, fp->f_fglob->fg_cred, kn, vp);
1284 if (error) {
1285 (void)vnode_put(vp);
1286 return (error);
1287 }
1288 #endif
1289
1290 funnel_state = thread_funnel_set(kernel_flock, TRUE);
1291 error = VNOP_KQFILT_ADD(vp, kn, ctx);
1292 thread_funnel_set(kernel_flock, funnel_state);
1293
1294 (void)vnode_put(vp);
1295 }
1296 return (error);
1297 }
1298
1299 #if 0
1300 /* No one calls this yet. */
1301 static int
1302 vn_kqfilt_remove(vp, ident, ctx)
1303 struct vnode *vp;
1304 uintptr_t ident;
1305 vfs_context_t ctx;
1306 {
1307 int error;
1308 int funnel_state;
1309
1310 if ( (error = vnode_getwithref(vp)) == 0 ) {
1311
1312 funnel_state = thread_funnel_set(kernel_flock, TRUE);
1313 error = VNOP_KQFILT_REMOVE(vp, ident, ctx);
1314 thread_funnel_set(kernel_flock, funnel_state);
1315
1316 (void)vnode_put(vp);
1317 }
1318 return (error);
1319 }
1320 #endif