]> git.saurik.com Git - apple/xnu.git/blob - bsd/vfs/vfs_vnops.c
c7e566bab8d5e8f8d8e6158656afa2f8f990aee6
[apple/xnu.git] / bsd / vfs / vfs_vnops.c
1 /*
2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30 * Copyright (c) 1982, 1986, 1989, 1993
31 * The Regents of the University of California. All rights reserved.
32 * (c) UNIX System Laboratories, Inc.
33 * All or some portions of this file are derived from material licensed
34 * to the University of California by American Telephone and Telegraph
35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
36 * the permission of UNIX System Laboratories, Inc.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software
47 * must display the following acknowledgement:
48 * This product includes software developed by the University of
49 * California, Berkeley and its contributors.
50 * 4. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 * @(#)vfs_vnops.c 8.14 (Berkeley) 6/15/95
67 *
68 */
69 /*
70 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
71 * support for mandatory and extensible security protections. This notice
72 * is included in support of clause 2.2 (b) of the Apple Public License,
73 * Version 2.0.
74 */
75
76 #include <sys/param.h>
77 #include <sys/types.h>
78 #include <sys/systm.h>
79 #include <sys/kernel.h>
80 #include <sys/file_internal.h>
81 #include <sys/stat.h>
82 #include <sys/proc_internal.h>
83 #include <sys/kauth.h>
84 #include <sys/mount_internal.h>
85 #include <sys/namei.h>
86 #include <sys/vnode_internal.h>
87 #include <sys/ioctl.h>
88 #include <sys/tty.h>
89 /* Temporary workaround for ubc.h until <rdar://4714366 is resolved */
90 #define ubc_setcred ubc_setcred_deprecated
91 #include <sys/ubc.h>
92 #undef ubc_setcred
93 int ubc_setcred(struct vnode *, struct proc *);
94 #include <sys/conf.h>
95 #include <sys/disk.h>
96 #include <sys/fsevents.h>
97 #include <sys/kdebug.h>
98 #include <sys/xattr.h>
99 #include <sys/ubc_internal.h>
100 #include <sys/uio_internal.h>
101 #include <sys/resourcevar.h>
102 #include <sys/signalvar.h>
103
104 #include <vm/vm_kern.h>
105 #include <vm/vm_map.h>
106
107 #include <miscfs/specfs/specdev.h>
108
109 #if CONFIG_MACF
110 #include <security/mac_framework.h>
111 #endif
112
113
114 static int vn_closefile(struct fileglob *fp, vfs_context_t ctx);
115 static int vn_ioctl(struct fileproc *fp, u_long com, caddr_t data,
116 vfs_context_t ctx);
117 static int vn_read(struct fileproc *fp, struct uio *uio, int flags,
118 vfs_context_t ctx);
119 static int vn_write(struct fileproc *fp, struct uio *uio, int flags,
120 vfs_context_t ctx);
121 static int vn_select( struct fileproc *fp, int which, void * wql,
122 vfs_context_t ctx);
123 static int vn_kqfilt_add(struct fileproc *fp, struct knote *kn,
124 vfs_context_t ctx);
125 #if 0
126 static int vn_kqfilt_remove(struct vnode *vp, uintptr_t ident,
127 vfs_context_t ctx);
128 #endif
129
130 struct fileops vnops =
131 { vn_read, vn_write, vn_ioctl, vn_select, vn_closefile, vn_kqfilt_add, NULL };
132
133 /*
134 * Common code for vnode open operations.
135 * Check permissions, and call the VNOP_OPEN or VNOP_CREATE routine.
136 *
137 * XXX the profusion of interfaces here is probably a bad thing.
138 */
139 int
140 vn_open(struct nameidata *ndp, int fmode, int cmode)
141 {
142 return(vn_open_modflags(ndp, &fmode, cmode));
143 }
144
145 int
146 vn_open_modflags(struct nameidata *ndp, int *fmodep, int cmode)
147 {
148 struct vnode_attr va;
149
150 VATTR_INIT(&va);
151 VATTR_SET(&va, va_mode, cmode);
152
153 return(vn_open_auth(ndp, fmodep, &va));
154 }
155
156 /*
157 * Open a file with authorization, updating the contents of the structures
158 * pointed to by ndp, fmodep, and vap as necessary to perform the requested
159 * operation. This function is used for both opens of existing files, and
160 * creation of new files.
161 *
162 * Parameters: ndp The nami data pointer describing the
163 * file
164 * fmodep A pointer to an int containg the mode
165 * information to be used for the open
166 * vap A pointer to the vnode attribute
167 * descriptor to be used for the open
168 *
169 * Indirect: * Contents of the data structures pointed
170 * to by the parameters are modified as
171 * necessary to the requested operation.
172 *
173 * Returns: 0 Success
174 * !0 errno value
175 *
176 * Notes: The kauth_filesec_t in 'vap', if any, is in host byte order.
177 *
178 * The contents of '*ndp' will be modified, based on the other
179 * arguments to this function, and to return file and directory
180 * data necessary to satisfy the requested operation.
181 *
182 * If the file does not exist and we are creating it, then the
183 * O_TRUNC flag will be cleared in '*fmodep' to indicate to the
184 * caller that the file was not truncated.
185 *
186 * If the file exists and the O_EXCL flag was not specified, then
187 * the O_CREAT flag will be cleared in '*fmodep' to indicate to
188 * the caller that the existing file was merely opened rather
189 * than created.
190 *
191 * The contents of '*vap' will be modified as necessary to
192 * complete the operation, including setting of supported
193 * attribute, clearing of fields containing unsupported attributes
194 * in the request, if the request proceeds without them, etc..
195 *
196 * XXX: This function is too complicated in actings on its arguments
197 *
198 * XXX: We should enummerate the possible errno values here, and where
199 * in the code they originated.
200 */
201 int
202 vn_open_auth(struct nameidata *ndp, int *fmodep, struct vnode_attr *vap)
203 {
204 struct vnode *vp;
205 struct vnode *dvp;
206 vfs_context_t ctx = ndp->ni_cnd.cn_context;
207 int error;
208 int fmode;
209 kauth_action_t action;
210
211 again:
212 vp = NULL;
213 dvp = NULL;
214 fmode = *fmodep;
215 if (fmode & O_CREAT) {
216 if ( (fmode & O_DIRECTORY) ) {
217 error = EINVAL;
218 goto out;
219 }
220 ndp->ni_cnd.cn_nameiop = CREATE;
221 /* Inherit USEDVP flag only */
222 ndp->ni_cnd.cn_flags &= USEDVP;
223 ndp->ni_cnd.cn_flags |= LOCKPARENT | LOCKLEAF | AUDITVNPATH1;
224 #if NAMEDRSRCFORK
225 /* open calls are allowed for resource forks. */
226 ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
227 #endif
228 if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0)
229 ndp->ni_cnd.cn_flags |= FOLLOW;
230 if ( (error = namei(ndp)) )
231 goto out;
232 dvp = ndp->ni_dvp;
233 vp = ndp->ni_vp;
234
235 /* not found, create */
236 if (vp == NULL) {
237 /* must have attributes for a new file */
238 if (vap == NULL) {
239 error = EINVAL;
240 goto badcreate;
241 }
242
243 VATTR_SET(vap, va_type, VREG);
244 #if CONFIG_MACF
245 error = mac_vnode_check_create(ctx,
246 dvp, &ndp->ni_cnd, vap);
247 if (error)
248 goto badcreate;
249 #endif /* MAC */
250
251 /* authorize before creating */
252 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
253 goto badcreate;
254
255 if (fmode & O_EXCL)
256 vap->va_vaflags |= VA_EXCLUSIVE;
257 #if NAMEDRSRCFORK
258 if (ndp->ni_cnd.cn_flags & CN_WANTSRSRCFORK) {
259 if ((error = vnode_makenamedstream(dvp, &ndp->ni_vp, XATTR_RESOURCEFORK_NAME, 0, ctx)) != 0)
260 goto badcreate;
261 } else
262 #endif
263 if ((error = vn_create(dvp, &ndp->ni_vp, &ndp->ni_cnd, vap, 0, ctx)) != 0)
264 goto badcreate;
265
266 vp = ndp->ni_vp;
267
268 if (vp) {
269 int update_flags = 0;
270
271 // Make sure the name & parent pointers are hooked up
272 if (vp->v_name == NULL)
273 update_flags |= VNODE_UPDATE_NAME;
274 if (vp->v_parent == NULLVP)
275 update_flags |= VNODE_UPDATE_PARENT;
276
277 if (update_flags)
278 vnode_update_identity(vp, dvp, ndp->ni_cnd.cn_nameptr, ndp->ni_cnd.cn_namelen, ndp->ni_cnd.cn_hash, update_flags);
279
280 #if CONFIG_FSE
281 if (need_fsevent(FSE_CREATE_FILE, vp)) {
282 add_fsevent(FSE_CREATE_FILE, ctx,
283 FSE_ARG_VNODE, vp,
284 FSE_ARG_DONE);
285 }
286 #endif
287
288 }
289 /*
290 * nameidone has to happen before we vnode_put(dvp)
291 * and clear the ni_dvp field, since it may need
292 * to release the fs_nodelock on the dvp
293 */
294 badcreate:
295 nameidone(ndp);
296 ndp->ni_dvp = NULL;
297 vnode_put(dvp);
298
299 if (error) {
300 /*
301 * Check for a creation race.
302 */
303 if ((error == EEXIST) && !(fmode & O_EXCL)) {
304 goto again;
305 }
306 goto bad;
307 }
308 fmode &= ~O_TRUNC;
309 } else {
310 nameidone(ndp);
311 ndp->ni_dvp = NULL;
312 vnode_put(dvp);
313
314 if (fmode & O_EXCL) {
315 error = EEXIST;
316 goto bad;
317 }
318 fmode &= ~O_CREAT;
319 }
320 } else {
321 ndp->ni_cnd.cn_nameiop = LOOKUP;
322 /* Inherit USEDVP flag only */
323 ndp->ni_cnd.cn_flags &= USEDVP;
324 ndp->ni_cnd.cn_flags |= FOLLOW | LOCKLEAF | AUDITVNPATH1;
325 #if NAMEDRSRCFORK
326 /* open calls are allowed for resource forks. */
327 ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
328 #endif
329 if (fmode & O_NOFOLLOW || fmode & O_SYMLINK) {
330 ndp->ni_cnd.cn_flags &= ~FOLLOW;
331 }
332
333 if ( (error = namei(ndp)) )
334 goto out;
335 vp = ndp->ni_vp;
336 nameidone(ndp);
337 ndp->ni_dvp = NULL;
338
339 if ( (fmode & O_DIRECTORY) && vp->v_type != VDIR ) {
340 error = ENOTDIR;
341 goto bad;
342 }
343 }
344
345 if (vp->v_type == VSOCK && vp->v_tag != VT_FDESC) {
346 error = EOPNOTSUPP; /* Operation not supported on socket */
347 goto bad;
348 }
349
350 if (vp->v_type == VLNK && (fmode & O_NOFOLLOW) != 0) {
351 error = ELOOP; /* O_NOFOLLOW was specified and the target is a symbolic link */
352 goto bad;
353 }
354
355 /* authorize open of an existing file */
356 if ((fmode & O_CREAT) == 0) {
357
358 /* disallow write operations on directories */
359 if (vnode_isdir(vp) && (fmode & (FWRITE | O_TRUNC))) {
360 error = EISDIR;
361 goto bad;
362 }
363
364 #if CONFIG_MACF
365 error = mac_vnode_check_open(ctx, vp, fmode);
366 if (error)
367 goto bad;
368 #endif
369
370 /* compute action to be authorized */
371 action = 0;
372 if (fmode & FREAD) {
373 action |= KAUTH_VNODE_READ_DATA;
374 }
375 if (fmode & (FWRITE | O_TRUNC)) {
376 /*
377 * If we are writing, appending, and not truncating,
378 * indicate that we are appending so that if the
379 * UF_APPEND or SF_APPEND bits are set, we do not deny
380 * the open.
381 */
382 if ((fmode & O_APPEND) && !(fmode & O_TRUNC)) {
383 action |= KAUTH_VNODE_APPEND_DATA;
384 } else {
385 action |= KAUTH_VNODE_WRITE_DATA;
386 }
387 }
388 if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)
389 goto bad;
390
391
392 //
393 // if the vnode is tagged VOPENEVT and the current process
394 // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY
395 // flag to the open mode so that this open won't count against
396 // the vnode when carbon delete() does a vnode_isinuse() to see
397 // if a file is currently in use. this allows spotlight
398 // importers to not interfere with carbon apps that depend on
399 // the no-delete-if-busy semantics of carbon delete().
400 //
401 if ((vp->v_flag & VOPENEVT) && (current_proc()->p_flag & P_CHECKOPENEVT)) {
402 fmode |= O_EVTONLY;
403 }
404
405 }
406
407 if ( (error = VNOP_OPEN(vp, fmode, ctx)) ) {
408 goto bad;
409 }
410 if ( (error = vnode_ref_ext(vp, fmode)) ) {
411 goto bad;
412 }
413
414 /* call out to allow 3rd party notification of open.
415 * Ignore result of kauth_authorize_fileop call.
416 */
417 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN,
418 (uintptr_t)vp, 0);
419
420 *fmodep = fmode;
421 return (0);
422 bad:
423 ndp->ni_vp = NULL;
424 if (vp) {
425 vnode_put(vp);
426 /*
427 * Check for a race against unlink. We had a vnode
428 * but according to vnode_authorize or VNOP_OPEN it
429 * no longer exists.
430 */
431 if ((error == ENOENT) && (*fmodep & O_CREAT)) {
432 goto again;
433 }
434 }
435 out:
436 return (error);
437 }
438
439 #if vn_access_DEPRECATED
440 /*
441 * Authorize an action against a vnode. This has been the canonical way to
442 * ensure that the credential/process/etc. referenced by a vfs_context
443 * is granted the rights called out in 'mode' against the vnode 'vp'.
444 *
445 * Unfortunately, the use of VREAD/VWRITE/VEXEC makes it very difficult
446 * to add support for more rights. As such, this interface will be deprecated
447 * and callers will use vnode_authorize instead.
448 */
449 int
450 vn_access(vnode_t vp, int mode, vfs_context_t context)
451 {
452 kauth_action_t action;
453
454 action = 0;
455 if (mode & VREAD)
456 action |= KAUTH_VNODE_READ_DATA;
457 if (mode & VWRITE)
458 action |= KAUTH_VNODE_WRITE_DATA;
459 if (mode & VEXEC)
460 action |= KAUTH_VNODE_EXECUTE;
461
462 return(vnode_authorize(vp, NULL, action, context));
463 }
464 #endif /* vn_access_DEPRECATED */
465
466 /*
467 * Vnode close call
468 */
469 int
470 vn_close(struct vnode *vp, int flags, vfs_context_t ctx)
471 {
472 int error;
473
474 #if CONFIG_FSE
475 if (flags & FWASWRITTEN) {
476 if (need_fsevent(FSE_CONTENT_MODIFIED, vp)) {
477 add_fsevent(FSE_CONTENT_MODIFIED, ctx,
478 FSE_ARG_VNODE, vp,
479 FSE_ARG_DONE);
480 }
481 }
482 #endif
483
484 #if NAMEDRSRCFORK
485 /* Sync data from resource fork shadow file if needed. */
486 if ((vp->v_flag & VISNAMEDSTREAM) &&
487 (vp->v_parent != NULLVP) &&
488 !(vp->v_parent->v_mount->mnt_kern_flag & MNTK_NAMED_STREAMS)) {
489 if (flags & FWASWRITTEN) {
490 (void) vnode_flushnamedstream(vp->v_parent, vp, ctx);
491 }
492 }
493 #endif
494 error = VNOP_CLOSE(vp, flags, ctx);
495 (void)vnode_rele_ext(vp, flags, 0);
496
497 return (error);
498 }
499
500 static int
501 vn_read_swapfile(
502 struct vnode *vp,
503 uio_t uio)
504 {
505 static char *swap_read_zero_page = NULL;
506 int error;
507 off_t swap_count, this_count;
508 off_t file_end, read_end;
509 off_t prev_resid;
510
511 /*
512 * Reading from a swap file will get you all zeroes.
513 */
514 error = 0;
515 swap_count = uio_resid(uio);
516
517 file_end = ubc_getsize(vp);
518 read_end = uio->uio_offset + uio_resid(uio);
519 if (uio->uio_offset >= file_end) {
520 /* uio starts after end of file: nothing to read */
521 swap_count = 0;
522 } else if (read_end > file_end) {
523 /* uio extends beyond end of file: stop before that */
524 swap_count -= (read_end - file_end);
525 }
526
527 while (swap_count > 0) {
528 if (swap_read_zero_page == NULL) {
529 char *my_zero_page;
530 int funnel_state;
531
532 /*
533 * Take kernel funnel so that only one thread
534 * sets up "swap_read_zero_page".
535 */
536 funnel_state = thread_funnel_set(kernel_flock, TRUE);
537
538 if (swap_read_zero_page == NULL) {
539 MALLOC(my_zero_page, char *, PAGE_SIZE,
540 M_TEMP, M_WAITOK);
541 memset(my_zero_page, '?', PAGE_SIZE);
542 /*
543 * Adding a newline character here
544 * and there prevents "less(1)", for
545 * example, from getting too confused
546 * about a file with one really really
547 * long line.
548 */
549 my_zero_page[PAGE_SIZE-1] = '\n';
550 if (swap_read_zero_page == NULL) {
551 swap_read_zero_page = my_zero_page;
552 } else {
553 FREE(my_zero_page, M_TEMP);
554 }
555 } else {
556 /*
557 * Someone else raced us here and won;
558 * just use their page.
559 */
560 }
561 thread_funnel_set(kernel_flock, funnel_state);
562 }
563
564 this_count = swap_count;
565 if (this_count > PAGE_SIZE) {
566 this_count = PAGE_SIZE;
567 }
568
569 prev_resid = uio_resid(uio);
570 error = uiomove((caddr_t) swap_read_zero_page,
571 this_count,
572 uio);
573 if (error) {
574 break;
575 }
576 swap_count -= (prev_resid - uio_resid(uio));
577 }
578
579 return error;
580 }
581 /*
582 * Package up an I/O request on a vnode into a uio and do it.
583 */
584 int
585 vn_rdwr(
586 enum uio_rw rw,
587 struct vnode *vp,
588 caddr_t base,
589 int len,
590 off_t offset,
591 enum uio_seg segflg,
592 int ioflg,
593 kauth_cred_t cred,
594 int *aresid,
595 proc_t p)
596 {
597 return vn_rdwr_64(rw,
598 vp,
599 (uint64_t)(uintptr_t)base,
600 (int64_t)len,
601 offset,
602 segflg,
603 ioflg,
604 cred,
605 aresid,
606 p);
607 }
608
609
610 int
611 vn_rdwr_64(
612 enum uio_rw rw,
613 struct vnode *vp,
614 uint64_t base,
615 int64_t len,
616 off_t offset,
617 enum uio_seg segflg,
618 int ioflg,
619 kauth_cred_t cred,
620 int *aresid,
621 proc_t p)
622 {
623 uio_t auio;
624 int spacetype;
625 struct vfs_context context;
626 int error=0;
627 char uio_buf[ UIO_SIZEOF(1) ];
628
629 context.vc_thread = current_thread();
630 context.vc_ucred = cred;
631
632 if (UIO_SEG_IS_USER_SPACE(segflg)) {
633 spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
634 }
635 else {
636 spacetype = UIO_SYSSPACE;
637 }
638 auio = uio_createwithbuffer(1, offset, spacetype, rw,
639 &uio_buf[0], sizeof(uio_buf));
640 uio_addiov(auio, base, len);
641
642 #if CONFIG_MACF
643 /* XXXMAC
644 * IO_NOAUTH should be re-examined.
645 * Likely that mediation should be performed in caller.
646 */
647 if ((ioflg & IO_NOAUTH) == 0) {
648 /* passed cred is fp->f_cred */
649 if (rw == UIO_READ)
650 error = mac_vnode_check_read(&context, cred, vp);
651 else
652 error = mac_vnode_check_write(&context, cred, vp);
653 }
654 #endif
655
656 if (error == 0) {
657 if (rw == UIO_READ) {
658 if (vp->v_flag & VSWAP) {
659 error = vn_read_swapfile(vp, auio);
660 } else {
661 error = VNOP_READ(vp, auio, ioflg, &context);
662 }
663 } else {
664 error = VNOP_WRITE(vp, auio, ioflg, &context);
665 }
666 }
667
668 if (aresid)
669 // LP64todo - fix this
670 *aresid = uio_resid(auio);
671 else
672 if (uio_resid(auio) && error == 0)
673 error = EIO;
674 return (error);
675 }
676
677 /*
678 * File table vnode read routine.
679 */
680 static int
681 vn_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
682 {
683 struct vnode *vp;
684 int error, ioflag;
685 off_t count;
686
687 vp = (struct vnode *)fp->f_fglob->fg_data;
688 if ( (error = vnode_getwithref(vp)) ) {
689 return(error);
690 }
691
692 #if CONFIG_MACF
693 error = mac_vnode_check_read(ctx, vfs_context_ucred(ctx), vp);
694 if (error) {
695 (void)vnode_put(vp);
696 return (error);
697 }
698 #endif
699
700 ioflag = 0;
701 if (fp->f_fglob->fg_flag & FNONBLOCK)
702 ioflag |= IO_NDELAY;
703 if ((fp->f_fglob->fg_flag & FNOCACHE) || vnode_isnocache(vp))
704 ioflag |= IO_NOCACHE;
705 if (fp->f_fglob->fg_flag & FNORDAHEAD)
706 ioflag |= IO_RAOFF;
707
708 if ((flags & FOF_OFFSET) == 0)
709 uio->uio_offset = fp->f_fglob->fg_offset;
710 count = uio_resid(uio);
711
712 if (vp->v_flag & VSWAP) {
713 /* special case for swap files */
714 error = vn_read_swapfile(vp, uio);
715 } else {
716 error = VNOP_READ(vp, uio, ioflag, ctx);
717 }
718 if ((flags & FOF_OFFSET) == 0)
719 fp->f_fglob->fg_offset += count - uio_resid(uio);
720
721 (void)vnode_put(vp);
722 return (error);
723 }
724
725
726 /*
727 * File table vnode write routine.
728 */
729 static int
730 vn_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
731 {
732 struct vnode *vp;
733 int error, ioflag;
734 off_t count;
735 int clippedsize = 0;
736 int partialwrite=0;
737 int residcount, oldcount;
738 proc_t p = vfs_context_proc(ctx);
739
740 count = 0;
741 vp = (struct vnode *)fp->f_fglob->fg_data;
742 if ( (error = vnode_getwithref(vp)) ) {
743 return(error);
744 }
745
746 #if CONFIG_MACF
747 error = mac_vnode_check_write(ctx, vfs_context_ucred(ctx), vp);
748 if (error) {
749 (void)vnode_put(vp);
750 return (error);
751 }
752 #endif
753
754 ioflag = IO_UNIT;
755 if (vp->v_type == VREG && (fp->f_fglob->fg_flag & O_APPEND))
756 ioflag |= IO_APPEND;
757 if (fp->f_fglob->fg_flag & FNONBLOCK)
758 ioflag |= IO_NDELAY;
759 if ((fp->f_fglob->fg_flag & FNOCACHE) || vnode_isnocache(vp))
760 ioflag |= IO_NOCACHE;
761 if ((fp->f_fglob->fg_flag & O_FSYNC) ||
762 (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS)))
763 ioflag |= IO_SYNC;
764
765 if ((flags & FOF_OFFSET) == 0) {
766 uio->uio_offset = fp->f_fglob->fg_offset;
767 count = uio_resid(uio);
768 }
769 if (((flags & FOF_OFFSET) == 0) &&
770 vfs_context_proc(ctx) && (vp->v_type == VREG) &&
771 (((rlim_t)(uio->uio_offset + uio_uio_resid(uio)) > p->p_rlimit[RLIMIT_FSIZE].rlim_cur) ||
772 ((rlim_t)uio_uio_resid(uio) > (p->p_rlimit[RLIMIT_FSIZE].rlim_cur - uio->uio_offset)))) {
773 /*
774 * If the requested residual would cause us to go past the
775 * administrative limit, then we need to adjust the residual
776 * down to cause fewer bytes than requested to be written. If
777 * we can't do that (e.g. the residual is already 1 byte),
778 * then we fail the write with EFBIG.
779 */
780 residcount = uio_uio_resid(uio);
781 if ((rlim_t)(uio->uio_offset + uio_uio_resid(uio)) > p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
782 clippedsize = (uio->uio_offset + uio_uio_resid(uio)) - p->p_rlimit[RLIMIT_FSIZE].rlim_cur;
783 } else if ((rlim_t)uio_uio_resid(uio) > (p->p_rlimit[RLIMIT_FSIZE].rlim_cur - uio->uio_offset)) {
784 clippedsize = (p->p_rlimit[RLIMIT_FSIZE].rlim_cur - uio->uio_offset);
785 }
786 if (clippedsize >= residcount) {
787 psignal(p, SIGXFSZ);
788 vnode_put(vp);
789 return (EFBIG);
790 }
791 partialwrite = 1;
792 uio_setresid(uio, residcount-clippedsize);
793 }
794 if ((flags & FOF_OFFSET) != 0) {
795 /* for pwrite, append should be ignored */
796 ioflag &= ~IO_APPEND;
797 if (p && (vp->v_type == VREG) &&
798 ((rlim_t)uio->uio_offset >= p->p_rlimit[RLIMIT_FSIZE].rlim_cur)) {
799 psignal(p, SIGXFSZ);
800 vnode_put(vp);
801 return (EFBIG);
802 }
803 if (p && (vp->v_type == VREG) &&
804 ((rlim_t)(uio->uio_offset + uio_uio_resid(uio)) > p->p_rlimit[RLIMIT_FSIZE].rlim_cur)) {
805 //Debugger("vn_bwrite:overstepping the bounds");
806 residcount = uio_uio_resid(uio);
807 clippedsize = (uio->uio_offset + uio_uio_resid(uio)) - p->p_rlimit[RLIMIT_FSIZE].rlim_cur;
808 partialwrite = 1;
809 uio_setresid(uio, residcount-clippedsize);
810 }
811 }
812
813 error = VNOP_WRITE(vp, uio, ioflag, ctx);
814
815 if (partialwrite) {
816 oldcount = uio_resid(uio);
817 uio_setresid(uio, oldcount + clippedsize);
818 }
819
820 if ((flags & FOF_OFFSET) == 0) {
821 if (ioflag & IO_APPEND)
822 fp->f_fglob->fg_offset = uio->uio_offset;
823 else
824 fp->f_fglob->fg_offset += count - uio_resid(uio);
825 }
826
827 /*
828 * Set the credentials on successful writes
829 */
830 if ((error == 0) && (vp->v_tag == VT_NFS) && (UBCINFOEXISTS(vp))) {
831 /*
832 * When called from aio subsystem, we only have the proc from
833 * which to get the credential, at this point, so use that
834 * instead. This means aio functions are incompatible with
835 * per-thread credentials (aio operations are proxied). We
836 * can't easily correct the aio vs. settid race in this case
837 * anyway, so we disallow it.
838 */
839 if ((flags & FOF_PCRED) == 0) {
840 ubc_setthreadcred(vp, p, current_thread());
841 } else {
842 ubc_setcred(vp, p);
843 }
844 }
845 (void)vnode_put(vp);
846 return (error);
847 }
848
849 /*
850 * File table vnode stat routine.
851 *
852 * Returns: 0 Success
853 * EBADF
854 * ENOMEM
855 * vnode_getattr:???
856 */
857 int
858 vn_stat_noauth(struct vnode *vp, void *sbptr, kauth_filesec_t *xsec, int isstat64, vfs_context_t ctx)
859 {
860 struct vnode_attr va;
861 int error;
862 u_short mode;
863 kauth_filesec_t fsec;
864 struct stat *sb = (struct stat *)0; /* warning avoidance ; protected by isstat64 */
865 struct stat64 * sb64 = (struct stat64 *)0; /* warning avoidance ; protected by isstat64 */
866
867 if (isstat64 != 0)
868 sb64 = (struct stat64 *)sbptr;
869 else
870 sb = (struct stat *)sbptr;
871
872 VATTR_INIT(&va);
873 VATTR_WANTED(&va, va_fsid);
874 VATTR_WANTED(&va, va_fileid);
875 VATTR_WANTED(&va, va_mode);
876 VATTR_WANTED(&va, va_type);
877 VATTR_WANTED(&va, va_nlink);
878 VATTR_WANTED(&va, va_uid);
879 VATTR_WANTED(&va, va_gid);
880 VATTR_WANTED(&va, va_rdev);
881 VATTR_WANTED(&va, va_data_size);
882 VATTR_WANTED(&va, va_access_time);
883 VATTR_WANTED(&va, va_modify_time);
884 VATTR_WANTED(&va, va_change_time);
885 VATTR_WANTED(&va, va_create_time);
886 VATTR_WANTED(&va, va_flags);
887 VATTR_WANTED(&va, va_gen);
888 VATTR_WANTED(&va, va_iosize);
889 /* lower layers will synthesise va_total_alloc from va_data_size if required */
890 VATTR_WANTED(&va, va_total_alloc);
891 if (xsec != NULL) {
892 VATTR_WANTED(&va, va_uuuid);
893 VATTR_WANTED(&va, va_guuid);
894 VATTR_WANTED(&va, va_acl);
895 }
896 error = vnode_getattr(vp, &va, ctx);
897 if (error)
898 goto out;
899 /*
900 * Copy from vattr table
901 */
902 if (isstat64 != 0) {
903 sb64->st_dev = va.va_fsid;
904 sb64->st_ino = (ino64_t)va.va_fileid;
905
906 } else {
907 sb->st_dev = va.va_fsid;
908 sb->st_ino = (ino_t)va.va_fileid;
909 }
910 mode = va.va_mode;
911 switch (vp->v_type) {
912 case VREG:
913 mode |= S_IFREG;
914 break;
915 case VDIR:
916 mode |= S_IFDIR;
917 break;
918 case VBLK:
919 mode |= S_IFBLK;
920 break;
921 case VCHR:
922 mode |= S_IFCHR;
923 break;
924 case VLNK:
925 mode |= S_IFLNK;
926 break;
927 case VSOCK:
928 mode |= S_IFSOCK;
929 break;
930 case VFIFO:
931 mode |= S_IFIFO;
932 break;
933 default:
934 error = EBADF;
935 goto out;
936 };
937 if (isstat64 != 0) {
938 sb64->st_mode = mode;
939 sb64->st_nlink = VATTR_IS_SUPPORTED(&va, va_nlink) ? (u_int16_t)va.va_nlink : 1;
940 sb64->st_uid = va.va_uid;
941 sb64->st_gid = va.va_gid;
942 sb64->st_rdev = va.va_rdev;
943 sb64->st_size = va.va_data_size;
944 sb64->st_atimespec = va.va_access_time;
945 sb64->st_mtimespec = va.va_modify_time;
946 sb64->st_ctimespec = va.va_change_time;
947 sb64->st_birthtimespec =
948 VATTR_IS_SUPPORTED(&va, va_create_time) ? va.va_create_time : va.va_change_time;
949 sb64->st_blksize = va.va_iosize;
950 sb64->st_flags = va.va_flags;
951 sb64->st_blocks = roundup(va.va_total_alloc, 512) / 512;
952 } else {
953 sb->st_mode = mode;
954 sb->st_nlink = VATTR_IS_SUPPORTED(&va, va_nlink) ? (u_int16_t)va.va_nlink : 1;
955 sb->st_uid = va.va_uid;
956 sb->st_gid = va.va_gid;
957 sb->st_rdev = va.va_rdev;
958 sb->st_size = va.va_data_size;
959 sb->st_atimespec = va.va_access_time;
960 sb->st_mtimespec = va.va_modify_time;
961 sb->st_ctimespec = va.va_change_time;
962 sb->st_blksize = va.va_iosize;
963 sb->st_flags = va.va_flags;
964 sb->st_blocks = roundup(va.va_total_alloc, 512) / 512;
965 }
966
967 /* if we're interested in exended security data and we got an ACL */
968 if (xsec != NULL) {
969 if (!VATTR_IS_SUPPORTED(&va, va_acl) &&
970 !VATTR_IS_SUPPORTED(&va, va_uuuid) &&
971 !VATTR_IS_SUPPORTED(&va, va_guuid)) {
972 *xsec = KAUTH_FILESEC_NONE;
973 } else {
974
975 if (VATTR_IS_SUPPORTED(&va, va_acl) && (va.va_acl != NULL)) {
976 fsec = kauth_filesec_alloc(va.va_acl->acl_entrycount);
977 } else {
978 fsec = kauth_filesec_alloc(0);
979 }
980 if (fsec == NULL) {
981 error = ENOMEM;
982 goto out;
983 }
984 fsec->fsec_magic = KAUTH_FILESEC_MAGIC;
985 if (VATTR_IS_SUPPORTED(&va, va_uuuid)) {
986 fsec->fsec_owner = va.va_uuuid;
987 } else {
988 fsec->fsec_owner = kauth_null_guid;
989 }
990 if (VATTR_IS_SUPPORTED(&va, va_guuid)) {
991 fsec->fsec_group = va.va_guuid;
992 } else {
993 fsec->fsec_group = kauth_null_guid;
994 }
995 if (VATTR_IS_SUPPORTED(&va, va_acl) && (va.va_acl != NULL)) {
996 bcopy(va.va_acl, &(fsec->fsec_acl), KAUTH_ACL_COPYSIZE(va.va_acl));
997 } else {
998 fsec->fsec_acl.acl_entrycount = KAUTH_FILESEC_NOACL;
999 }
1000 *xsec = fsec;
1001 }
1002 }
1003
1004 /* Do not give the generation number out to unpriviledged users */
1005 if (va.va_gen && !vfs_context_issuser(ctx)) {
1006 if (isstat64 != 0)
1007 sb64->st_gen = 0;
1008 else
1009 sb->st_gen = 0;
1010 } else {
1011 if (isstat64 != 0)
1012 sb64->st_gen = va.va_gen;
1013 else
1014 sb->st_gen = va.va_gen;
1015 }
1016
1017 error = 0;
1018 out:
1019 if (VATTR_IS_SUPPORTED(&va, va_acl) && va.va_acl != NULL)
1020 kauth_acl_free(va.va_acl);
1021 return (error);
1022 }
1023
1024 int
1025 vn_stat(struct vnode *vp, void *sb, kauth_filesec_t *xsec, int isstat64, vfs_context_t ctx)
1026 {
1027 int error;
1028
1029 #if CONFIG_MACF
1030 error = mac_vnode_check_stat(ctx, NOCRED, vp);
1031 if (error)
1032 return (error);
1033 #endif
1034
1035 /* authorize */
1036 if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_ATTRIBUTES | KAUTH_VNODE_READ_SECURITY, ctx)) != 0)
1037 return(error);
1038
1039 /* actual stat */
1040 return(vn_stat_noauth(vp, sb, xsec, isstat64, ctx));
1041 }
1042
1043
1044 /*
1045 * File table vnode ioctl routine.
1046 */
1047 static int
1048 vn_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx)
1049 {
1050 struct vnode *vp = ((struct vnode *)fp->f_fglob->fg_data);
1051 off_t file_size;
1052 int error;
1053 struct vnode *ttyvp;
1054 int funnel_state;
1055 struct session * sessp;
1056
1057 if ( (error = vnode_getwithref(vp)) ) {
1058 return(error);
1059 }
1060
1061 #if CONFIG_MACF
1062 error = mac_vnode_check_ioctl(ctx, vp, com);
1063 if (error)
1064 goto out;
1065 #endif
1066
1067 switch (vp->v_type) {
1068 case VREG:
1069 case VDIR:
1070 if (com == FIONREAD) {
1071 if ((error = vnode_size(vp, &file_size, ctx)) != 0)
1072 goto out;
1073 *(int *)data = file_size - fp->f_fglob->fg_offset;
1074 goto out;
1075 }
1076 if (com == FIONBIO || com == FIOASYNC) { /* XXX */
1077 goto out;
1078 }
1079 /* fall into ... */
1080
1081 default:
1082 error = ENOTTY;
1083 goto out;
1084
1085 case VFIFO:
1086 case VCHR:
1087 case VBLK:
1088
1089 /* Should not be able to set block size from user space */
1090 if (com == DKIOCSETBLOCKSIZE) {
1091 error = EPERM;
1092 goto out;
1093 }
1094
1095 if (com == FIODTYPE) {
1096 if (vp->v_type == VBLK) {
1097 if (major(vp->v_rdev) >= nblkdev) {
1098 error = ENXIO;
1099 goto out;
1100 }
1101 *(int *)data = bdevsw[major(vp->v_rdev)].d_type;
1102
1103 } else if (vp->v_type == VCHR) {
1104 if (major(vp->v_rdev) >= nchrdev) {
1105 error = ENXIO;
1106 goto out;
1107 }
1108 *(int *)data = cdevsw[major(vp->v_rdev)].d_type;
1109 } else {
1110 error = ENOTTY;
1111 goto out;
1112 }
1113 goto out;
1114 }
1115 error = VNOP_IOCTL(vp, com, data, fp->f_fglob->fg_flag, ctx);
1116
1117 if (error == 0 && com == TIOCSCTTY) {
1118 vnode_ref(vp);
1119
1120 funnel_state = thread_funnel_set(kernel_flock, TRUE);
1121 sessp = proc_session(vfs_context_proc(ctx));
1122
1123 session_lock(sessp);
1124 ttyvp = sessp->s_ttyvp;
1125 sessp->s_ttyvp = vp;
1126 sessp->s_ttyvid = vnode_vid(vp);
1127 session_unlock(sessp);
1128 session_rele(sessp);
1129 thread_funnel_set(kernel_flock, funnel_state);
1130
1131 if (ttyvp)
1132 vnode_rele(ttyvp);
1133 }
1134 }
1135 out:
1136 (void)vnode_put(vp);
1137 return(error);
1138 }
1139
1140 /*
1141 * File table vnode select routine.
1142 */
1143 static int
1144 vn_select(struct fileproc *fp, int which, void *wql, __unused vfs_context_t ctx)
1145 {
1146 int error;
1147 struct vnode * vp = (struct vnode *)fp->f_fglob->fg_data;
1148 struct vfs_context context;
1149
1150 if ( (error = vnode_getwithref(vp)) == 0 ) {
1151 context.vc_thread = current_thread();
1152 context.vc_ucred = fp->f_fglob->fg_cred;
1153
1154 #if CONFIG_MACF
1155 /*
1156 * XXX We should use a per thread credential here; minimally,
1157 * XXX the process credential should have a persistent
1158 * XXX reference on it before being passed in here.
1159 */
1160 error = mac_vnode_check_select(ctx, vp, which);
1161 if (error == 0)
1162 #endif
1163 error = VNOP_SELECT(vp, which, fp->f_fglob->fg_flag, wql, ctx);
1164
1165 (void)vnode_put(vp);
1166 }
1167 return(error);
1168
1169 }
1170
1171 /*
1172 * Check that the vnode is still valid, and if so
1173 * acquire requested lock.
1174 */
1175 int
1176 vn_lock(__unused vnode_t vp, __unused int flags, __unused proc_t p)
1177 {
1178 return (0);
1179 }
1180
1181 /*
1182 * File table vnode close routine.
1183 */
1184 static int
1185 vn_closefile(struct fileglob *fg, vfs_context_t ctx)
1186 {
1187 struct vnode *vp = (struct vnode *)fg->fg_data;
1188 int error;
1189 struct flock lf;
1190
1191 if ( (error = vnode_getwithref(vp)) == 0 ) {
1192
1193 if ((fg->fg_flag & FHASLOCK) && fg->fg_type == DTYPE_VNODE) {
1194 lf.l_whence = SEEK_SET;
1195 lf.l_start = 0;
1196 lf.l_len = 0;
1197 lf.l_type = F_UNLCK;
1198
1199 (void)VNOP_ADVLOCK(vp, (caddr_t)fg, F_UNLCK, &lf, F_FLOCK, ctx);
1200 }
1201 error = vn_close(vp, fg->fg_flag, ctx);
1202
1203 (void)vnode_put(vp);
1204 }
1205 return(error);
1206 }
1207
1208 /*
1209 * Returns: 0 Success
1210 * VNOP_PATHCONF:???
1211 */
1212 int
1213 vn_pathconf(vnode_t vp, int name, register_t *retval, vfs_context_t ctx)
1214 {
1215 int error = 0;
1216
1217 switch(name) {
1218 case _PC_EXTENDED_SECURITY_NP:
1219 *retval = vfs_extendedsecurity(vnode_mount(vp)) ? 1 : 0;
1220 break;
1221 case _PC_AUTH_OPAQUE_NP:
1222 *retval = vfs_authopaque(vnode_mount(vp));
1223 break;
1224 case _PC_2_SYMLINKS:
1225 *retval = 1; /* XXX NOTSUP on MSDOS, etc. */
1226 break;
1227 case _PC_ALLOC_SIZE_MIN:
1228 *retval = 1; /* XXX lie: 1 byte */
1229 break;
1230 case _PC_ASYNC_IO: /* unistd.h: _POSIX_ASYNCHRONUS_IO */
1231 *retval = 1; /* [AIO] option is supported */
1232 break;
1233 case _PC_PRIO_IO: /* unistd.h: _POSIX_PRIORITIZED_IO */
1234 *retval = 0; /* [PIO] option is not supported */
1235 break;
1236 case _PC_REC_INCR_XFER_SIZE:
1237 *retval = 4096; /* XXX go from MIN to MAX 4K at a time */
1238 break;
1239 case _PC_REC_MIN_XFER_SIZE:
1240 *retval = 4096; /* XXX recommend 4K minimum reads/writes */
1241 break;
1242 case _PC_REC_MAX_XFER_SIZE:
1243 *retval = 65536; /* XXX recommend 64K maximum reads/writes */
1244 break;
1245 case _PC_REC_XFER_ALIGN:
1246 *retval = 4096; /* XXX recommend page aligned buffers */
1247 break;
1248 case _PC_SYMLINK_MAX:
1249 *retval = 255; /* Minimum acceptable POSIX value */
1250 break;
1251 case _PC_SYNC_IO: /* unistd.h: _POSIX_SYNCHRONIZED_IO */
1252 *retval = 0; /* [SIO] option is not supported */
1253 break;
1254 default:
1255 error = VNOP_PATHCONF(vp, name, retval, ctx);
1256 break;
1257 }
1258
1259 return (error);
1260 }
1261
1262 static int
1263 vn_kqfilt_add(struct fileproc *fp, struct knote *kn, vfs_context_t ctx)
1264 {
1265 struct vnode *vp = (struct vnode *)fp->f_fglob->fg_data;
1266 int error;
1267 int funnel_state;
1268
1269 if ( (error = vnode_getwithref(vp)) == 0 ) {
1270
1271 #if CONFIG_MACF
1272 error = mac_vnode_check_kqfilter(ctx, fp->f_fglob->fg_cred, kn, vp);
1273 if (error) {
1274 (void)vnode_put(vp);
1275 return (error);
1276 }
1277 #endif
1278
1279 funnel_state = thread_funnel_set(kernel_flock, TRUE);
1280 error = VNOP_KQFILT_ADD(vp, kn, ctx);
1281 thread_funnel_set(kernel_flock, funnel_state);
1282
1283 (void)vnode_put(vp);
1284 }
1285 return (error);
1286 }
1287
1288 #if 0
1289 /* No one calls this yet. */
1290 static int
1291 vn_kqfilt_remove(vp, ident, ctx)
1292 struct vnode *vp;
1293 uintptr_t ident;
1294 vfs_context_t ctx;
1295 {
1296 int error;
1297 int funnel_state;
1298
1299 if ( (error = vnode_getwithref(vp)) == 0 ) {
1300
1301 funnel_state = thread_funnel_set(kernel_flock, TRUE);
1302 error = VNOP_KQFILT_REMOVE(vp, ident, ctx);
1303 thread_funnel_set(kernel_flock, funnel_state);
1304
1305 (void)vnode_put(vp);
1306 }
1307 return (error);
1308 }
1309 #endif