]> git.saurik.com Git - apple/xnu.git/blob - bsd/vfs/vfs_vnops.c
xnu-1228.7.58.tar.gz
[apple/xnu.git] / bsd / vfs / vfs_vnops.c
1 /*
2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30 * Copyright (c) 1982, 1986, 1989, 1993
31 * The Regents of the University of California. All rights reserved.
32 * (c) UNIX System Laboratories, Inc.
33 * All or some portions of this file are derived from material licensed
34 * to the University of California by American Telephone and Telegraph
35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
36 * the permission of UNIX System Laboratories, Inc.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software
47 * must display the following acknowledgement:
48 * This product includes software developed by the University of
49 * California, Berkeley and its contributors.
50 * 4. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 * @(#)vfs_vnops.c 8.14 (Berkeley) 6/15/95
67 *
68 */
69 /*
70 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
71 * support for mandatory and extensible security protections. This notice
72 * is included in support of clause 2.2 (b) of the Apple Public License,
73 * Version 2.0.
74 */
75
76 #include <sys/param.h>
77 #include <sys/types.h>
78 #include <sys/systm.h>
79 #include <sys/kernel.h>
80 #include <sys/file_internal.h>
81 #include <sys/stat.h>
82 #include <sys/proc_internal.h>
83 #include <sys/kauth.h>
84 #include <sys/mount_internal.h>
85 #include <sys/namei.h>
86 #include <sys/vnode_internal.h>
87 #include <sys/ioctl.h>
88 #include <sys/tty.h>
89 /* Temporary workaround for ubc.h until <rdar://4714366 is resolved */
90 #define ubc_setcred ubc_setcred_deprecated
91 #include <sys/ubc.h>
92 #undef ubc_setcred
93 int ubc_setcred(struct vnode *, struct proc *);
94 #include <sys/conf.h>
95 #include <sys/disk.h>
96 #include <sys/fsevents.h>
97 #include <sys/kdebug.h>
98 #include <sys/xattr.h>
99 #include <sys/ubc_internal.h>
100 #include <sys/uio_internal.h>
101 #include <sys/resourcevar.h>
102 #include <sys/signalvar.h>
103
104 #include <vm/vm_kern.h>
105 #include <vm/vm_map.h>
106
107 #include <miscfs/specfs/specdev.h>
108
109 #if CONFIG_MACF
110 #include <security/mac_framework.h>
111 #endif
112
113
114 static int vn_closefile(struct fileglob *fp, vfs_context_t ctx);
115 static int vn_ioctl(struct fileproc *fp, u_long com, caddr_t data,
116 vfs_context_t ctx);
117 static int vn_read(struct fileproc *fp, struct uio *uio, int flags,
118 vfs_context_t ctx);
119 static int vn_write(struct fileproc *fp, struct uio *uio, int flags,
120 vfs_context_t ctx);
121 static int vn_select( struct fileproc *fp, int which, void * wql,
122 vfs_context_t ctx);
123 static int vn_kqfilt_add(struct fileproc *fp, struct knote *kn,
124 vfs_context_t ctx);
125 #if 0
126 static int vn_kqfilt_remove(struct vnode *vp, uintptr_t ident,
127 vfs_context_t ctx);
128 #endif
129
130 struct fileops vnops =
131 { vn_read, vn_write, vn_ioctl, vn_select, vn_closefile, vn_kqfilt_add, NULL };
132
133 /*
134 * Common code for vnode open operations.
135 * Check permissions, and call the VNOP_OPEN or VNOP_CREATE routine.
136 *
137 * XXX the profusion of interfaces here is probably a bad thing.
138 */
139 int
140 vn_open(struct nameidata *ndp, int fmode, int cmode)
141 {
142 return(vn_open_modflags(ndp, &fmode, cmode));
143 }
144
145 int
146 vn_open_modflags(struct nameidata *ndp, int *fmodep, int cmode)
147 {
148 struct vnode_attr va;
149
150 VATTR_INIT(&va);
151 VATTR_SET(&va, va_mode, cmode);
152
153 return(vn_open_auth(ndp, fmodep, &va));
154 }
155
156 /*
157 * Open a file with authorization, updating the contents of the structures
158 * pointed to by ndp, fmodep, and vap as necessary to perform the requested
159 * operation. This function is used for both opens of existing files, and
160 * creation of new files.
161 *
162 * Parameters: ndp The nami data pointer describing the
163 * file
164 * fmodep A pointer to an int containg the mode
165 * information to be used for the open
166 * vap A pointer to the vnode attribute
167 * descriptor to be used for the open
168 *
169 * Indirect: * Contents of the data structures pointed
170 * to by the parameters are modified as
171 * necessary to the requested operation.
172 *
173 * Returns: 0 Success
174 * !0 errno value
175 *
176 * Notes: The kauth_filesec_t in 'vap', if any, is in host byte order.
177 *
178 * The contents of '*ndp' will be modified, based on the other
179 * arguments to this function, and to return file and directory
180 * data necessary to satisfy the requested operation.
181 *
182 * If the file does not exist and we are creating it, then the
183 * O_TRUNC flag will be cleared in '*fmodep' to indicate to the
184 * caller that the file was not truncated.
185 *
186 * If the file exists and the O_EXCL flag was not specified, then
187 * the O_CREAT flag will be cleared in '*fmodep' to indicate to
188 * the caller that the existing file was merely opened rather
189 * than created.
190 *
191 * The contents of '*vap' will be modified as necessary to
192 * complete the operation, including setting of supported
193 * attribute, clearing of fields containing unsupported attributes
194 * in the request, if the request proceeds without them, etc..
195 *
196 * XXX: This function is too complicated in actings on its arguments
197 *
198 * XXX: We should enummerate the possible errno values here, and where
199 * in the code they originated.
200 */
201 int
202 vn_open_auth(struct nameidata *ndp, int *fmodep, struct vnode_attr *vap)
203 {
204 struct vnode *vp;
205 struct vnode *dvp;
206 vfs_context_t ctx = ndp->ni_cnd.cn_context;
207 int error;
208 int fmode;
209 kauth_action_t action;
210
211 again:
212 vp = NULL;
213 dvp = NULL;
214 fmode = *fmodep;
215 if (fmode & O_CREAT) {
216 if ( (fmode & O_DIRECTORY) ) {
217 error = EINVAL;
218 goto out;
219 }
220 ndp->ni_cnd.cn_nameiop = CREATE;
221 /* Inherit USEDVP flag only */
222 ndp->ni_cnd.cn_flags &= USEDVP;
223 ndp->ni_cnd.cn_flags |= LOCKPARENT | LOCKLEAF | AUDITVNPATH1;
224 #if NAMEDRSRCFORK
225 /* open calls are allowed for resource forks. */
226 ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
227 #endif
228 if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0)
229 ndp->ni_cnd.cn_flags |= FOLLOW;
230 if ( (error = namei(ndp)) )
231 goto out;
232 dvp = ndp->ni_dvp;
233 vp = ndp->ni_vp;
234
235 /* not found, create */
236 if (vp == NULL) {
237 /* must have attributes for a new file */
238 if (vap == NULL) {
239 error = EINVAL;
240 goto badcreate;
241 }
242
243 VATTR_SET(vap, va_type, VREG);
244 #if CONFIG_MACF
245 error = mac_vnode_check_create(ctx,
246 dvp, &ndp->ni_cnd, vap);
247 if (error)
248 goto badcreate;
249 #endif /* MAC */
250
251 /* authorize before creating */
252 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
253 goto badcreate;
254
255 if (fmode & O_EXCL)
256 vap->va_vaflags |= VA_EXCLUSIVE;
257 #if NAMEDRSRCFORK
258 if (ndp->ni_cnd.cn_flags & CN_WANTSRSRCFORK) {
259 if ((error = vnode_makenamedstream(dvp, &ndp->ni_vp, XATTR_RESOURCEFORK_NAME, 0, ctx)) != 0)
260 goto badcreate;
261 } else
262 #endif
263 if ((error = vn_create(dvp, &ndp->ni_vp, &ndp->ni_cnd, vap, 0, ctx)) != 0)
264 goto badcreate;
265
266 vp = ndp->ni_vp;
267
268 if (vp) {
269 int update_flags = 0;
270
271 // Make sure the name & parent pointers are hooked up
272 if (vp->v_name == NULL)
273 update_flags |= VNODE_UPDATE_NAME;
274 if (vp->v_parent == NULLVP)
275 update_flags |= VNODE_UPDATE_PARENT;
276
277 if (update_flags)
278 vnode_update_identity(vp, dvp, ndp->ni_cnd.cn_nameptr, ndp->ni_cnd.cn_namelen, ndp->ni_cnd.cn_hash, update_flags);
279
280 #if CONFIG_FSE
281 if (need_fsevent(FSE_CREATE_FILE, vp)) {
282 add_fsevent(FSE_CREATE_FILE, ctx,
283 FSE_ARG_VNODE, vp,
284 FSE_ARG_DONE);
285 }
286 #endif
287
288 }
289 /*
290 * nameidone has to happen before we vnode_put(dvp)
291 * and clear the ni_dvp field, since it may need
292 * to release the fs_nodelock on the dvp
293 */
294 badcreate:
295 nameidone(ndp);
296 ndp->ni_dvp = NULL;
297 vnode_put(dvp);
298
299 if (error) {
300 /*
301 * Check for a creation race.
302 */
303 if ((error == EEXIST) && !(fmode & O_EXCL)) {
304 goto again;
305 }
306 goto bad;
307 }
308 fmode &= ~O_TRUNC;
309 } else {
310 nameidone(ndp);
311 ndp->ni_dvp = NULL;
312 vnode_put(dvp);
313
314 if (fmode & O_EXCL) {
315 error = EEXIST;
316 goto bad;
317 }
318 fmode &= ~O_CREAT;
319 }
320 } else {
321 ndp->ni_cnd.cn_nameiop = LOOKUP;
322 /* Inherit USEDVP flag only */
323 ndp->ni_cnd.cn_flags &= USEDVP;
324 ndp->ni_cnd.cn_flags |= FOLLOW | LOCKLEAF | AUDITVNPATH1;
325 #if NAMEDRSRCFORK
326 /* open calls are allowed for resource forks. */
327 ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
328 #endif
329 if (fmode & O_NOFOLLOW || fmode & O_SYMLINK) {
330 ndp->ni_cnd.cn_flags &= ~FOLLOW;
331 }
332
333 if ( (error = namei(ndp)) )
334 goto out;
335 vp = ndp->ni_vp;
336 nameidone(ndp);
337 ndp->ni_dvp = NULL;
338
339 if ( (fmode & O_DIRECTORY) && vp->v_type != VDIR ) {
340 error = ENOTDIR;
341 goto bad;
342 }
343 }
344
345 if (vp->v_type == VSOCK && vp->v_tag != VT_FDESC) {
346 error = EOPNOTSUPP; /* Operation not supported on socket */
347 goto bad;
348 }
349
350 if (vp->v_type == VLNK && (fmode & O_NOFOLLOW) != 0) {
351 error = ELOOP; /* O_NOFOLLOW was specified and the target is a symbolic link */
352 goto bad;
353 }
354
355 /* authorize open of an existing file */
356 if ((fmode & O_CREAT) == 0) {
357
358 /* disallow write operations on directories */
359 if (vnode_isdir(vp) && (fmode & (FWRITE | O_TRUNC))) {
360 error = EISDIR;
361 goto bad;
362 }
363
364 #if CONFIG_MACF
365 error = mac_vnode_check_open(ctx, vp, fmode);
366 if (error)
367 goto bad;
368 #endif
369
370 /* compute action to be authorized */
371 action = 0;
372 if (fmode & FREAD) {
373 action |= KAUTH_VNODE_READ_DATA;
374 }
375 if (fmode & (FWRITE | O_TRUNC)) {
376 /*
377 * If we are writing, appending, and not truncating,
378 * indicate that we are appending so that if the
379 * UF_APPEND or SF_APPEND bits are set, we do not deny
380 * the open.
381 */
382 if ((fmode & O_APPEND) && !(fmode & O_TRUNC)) {
383 action |= KAUTH_VNODE_APPEND_DATA;
384 } else {
385 action |= KAUTH_VNODE_WRITE_DATA;
386 }
387 }
388 if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)
389 goto bad;
390
391
392 //
393 // if the vnode is tagged VOPENEVT and the current process
394 // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY
395 // flag to the open mode so that this open won't count against
396 // the vnode when carbon delete() does a vnode_isinuse() to see
397 // if a file is currently in use. this allows spotlight
398 // importers to not interfere with carbon apps that depend on
399 // the no-delete-if-busy semantics of carbon delete().
400 //
401 if ((vp->v_flag & VOPENEVT) && (current_proc()->p_flag & P_CHECKOPENEVT)) {
402 fmode |= O_EVTONLY;
403 }
404
405 }
406
407 if ( (error = VNOP_OPEN(vp, fmode, ctx)) ) {
408 goto bad;
409 }
410 if ( (error = vnode_ref_ext(vp, fmode)) ) {
411 goto bad;
412 }
413
414 /* call out to allow 3rd party notification of open.
415 * Ignore result of kauth_authorize_fileop call.
416 */
417 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN,
418 (uintptr_t)vp, 0);
419
420 *fmodep = fmode;
421 return (0);
422 bad:
423 ndp->ni_vp = NULL;
424 if (vp) {
425 vnode_put(vp);
426 /*
427 * Check for a race against unlink. We had a vnode
428 * but according to vnode_authorize or VNOP_OPEN it
429 * no longer exists.
430 *
431 * EREDRIVEOPEN: means that we were hit by the tty allocation race.
432 */
433 if (((error == ENOENT) && (*fmodep & O_CREAT)) || (error == EREDRIVEOPEN)) {
434 goto again;
435 }
436 }
437 out:
438 return (error);
439 }
440
441 #if vn_access_DEPRECATED
442 /*
443 * Authorize an action against a vnode. This has been the canonical way to
444 * ensure that the credential/process/etc. referenced by a vfs_context
445 * is granted the rights called out in 'mode' against the vnode 'vp'.
446 *
447 * Unfortunately, the use of VREAD/VWRITE/VEXEC makes it very difficult
448 * to add support for more rights. As such, this interface will be deprecated
449 * and callers will use vnode_authorize instead.
450 */
451 int
452 vn_access(vnode_t vp, int mode, vfs_context_t context)
453 {
454 kauth_action_t action;
455
456 action = 0;
457 if (mode & VREAD)
458 action |= KAUTH_VNODE_READ_DATA;
459 if (mode & VWRITE)
460 action |= KAUTH_VNODE_WRITE_DATA;
461 if (mode & VEXEC)
462 action |= KAUTH_VNODE_EXECUTE;
463
464 return(vnode_authorize(vp, NULL, action, context));
465 }
466 #endif /* vn_access_DEPRECATED */
467
468 /*
469 * Vnode close call
470 */
471 int
472 vn_close(struct vnode *vp, int flags, vfs_context_t ctx)
473 {
474 int error;
475
476 #if CONFIG_FSE
477 if (flags & FWASWRITTEN) {
478 if (need_fsevent(FSE_CONTENT_MODIFIED, vp)) {
479 add_fsevent(FSE_CONTENT_MODIFIED, ctx,
480 FSE_ARG_VNODE, vp,
481 FSE_ARG_DONE);
482 }
483 }
484 #endif
485
486 #if NAMEDRSRCFORK
487 /* Sync data from resource fork shadow file if needed. */
488 if ((vp->v_flag & VISNAMEDSTREAM) &&
489 (vp->v_parent != NULLVP) &&
490 !(vp->v_parent->v_mount->mnt_kern_flag & MNTK_NAMED_STREAMS)) {
491 if (flags & FWASWRITTEN) {
492 (void) vnode_flushnamedstream(vp->v_parent, vp, ctx);
493 }
494 }
495 #endif
496 error = VNOP_CLOSE(vp, flags, ctx);
497 (void)vnode_rele_ext(vp, flags, 0);
498
499 return (error);
500 }
501
502 static int
503 vn_read_swapfile(
504 struct vnode *vp,
505 uio_t uio)
506 {
507 static char *swap_read_zero_page = NULL;
508 int error;
509 off_t swap_count, this_count;
510 off_t file_end, read_end;
511 off_t prev_resid;
512
513 /*
514 * Reading from a swap file will get you all zeroes.
515 */
516 error = 0;
517 swap_count = uio_resid(uio);
518
519 file_end = ubc_getsize(vp);
520 read_end = uio->uio_offset + uio_resid(uio);
521 if (uio->uio_offset >= file_end) {
522 /* uio starts after end of file: nothing to read */
523 swap_count = 0;
524 } else if (read_end > file_end) {
525 /* uio extends beyond end of file: stop before that */
526 swap_count -= (read_end - file_end);
527 }
528
529 while (swap_count > 0) {
530 if (swap_read_zero_page == NULL) {
531 char *my_zero_page;
532 int funnel_state;
533
534 /*
535 * Take kernel funnel so that only one thread
536 * sets up "swap_read_zero_page".
537 */
538 funnel_state = thread_funnel_set(kernel_flock, TRUE);
539
540 if (swap_read_zero_page == NULL) {
541 MALLOC(my_zero_page, char *, PAGE_SIZE,
542 M_TEMP, M_WAITOK);
543 memset(my_zero_page, '?', PAGE_SIZE);
544 /*
545 * Adding a newline character here
546 * and there prevents "less(1)", for
547 * example, from getting too confused
548 * about a file with one really really
549 * long line.
550 */
551 my_zero_page[PAGE_SIZE-1] = '\n';
552 if (swap_read_zero_page == NULL) {
553 swap_read_zero_page = my_zero_page;
554 } else {
555 FREE(my_zero_page, M_TEMP);
556 }
557 } else {
558 /*
559 * Someone else raced us here and won;
560 * just use their page.
561 */
562 }
563 thread_funnel_set(kernel_flock, funnel_state);
564 }
565
566 this_count = swap_count;
567 if (this_count > PAGE_SIZE) {
568 this_count = PAGE_SIZE;
569 }
570
571 prev_resid = uio_resid(uio);
572 error = uiomove((caddr_t) swap_read_zero_page,
573 this_count,
574 uio);
575 if (error) {
576 break;
577 }
578 swap_count -= (prev_resid - uio_resid(uio));
579 }
580
581 return error;
582 }
583 /*
584 * Package up an I/O request on a vnode into a uio and do it.
585 */
586 int
587 vn_rdwr(
588 enum uio_rw rw,
589 struct vnode *vp,
590 caddr_t base,
591 int len,
592 off_t offset,
593 enum uio_seg segflg,
594 int ioflg,
595 kauth_cred_t cred,
596 int *aresid,
597 proc_t p)
598 {
599 return vn_rdwr_64(rw,
600 vp,
601 (uint64_t)(uintptr_t)base,
602 (int64_t)len,
603 offset,
604 segflg,
605 ioflg,
606 cred,
607 aresid,
608 p);
609 }
610
611
612 int
613 vn_rdwr_64(
614 enum uio_rw rw,
615 struct vnode *vp,
616 uint64_t base,
617 int64_t len,
618 off_t offset,
619 enum uio_seg segflg,
620 int ioflg,
621 kauth_cred_t cred,
622 int *aresid,
623 proc_t p)
624 {
625 uio_t auio;
626 int spacetype;
627 struct vfs_context context;
628 int error=0;
629 char uio_buf[ UIO_SIZEOF(1) ];
630
631 context.vc_thread = current_thread();
632 context.vc_ucred = cred;
633
634 if (UIO_SEG_IS_USER_SPACE(segflg)) {
635 spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
636 }
637 else {
638 spacetype = UIO_SYSSPACE;
639 }
640 auio = uio_createwithbuffer(1, offset, spacetype, rw,
641 &uio_buf[0], sizeof(uio_buf));
642 uio_addiov(auio, base, len);
643
644 #if CONFIG_MACF
645 /* XXXMAC
646 * IO_NOAUTH should be re-examined.
647 * Likely that mediation should be performed in caller.
648 */
649 if ((ioflg & IO_NOAUTH) == 0) {
650 /* passed cred is fp->f_cred */
651 if (rw == UIO_READ)
652 error = mac_vnode_check_read(&context, cred, vp);
653 else
654 error = mac_vnode_check_write(&context, cred, vp);
655 }
656 #endif
657
658 if (error == 0) {
659 if (rw == UIO_READ) {
660 if (vp->v_flag & VSWAP) {
661 error = vn_read_swapfile(vp, auio);
662 } else {
663 error = VNOP_READ(vp, auio, ioflg, &context);
664 }
665 } else {
666 error = VNOP_WRITE(vp, auio, ioflg, &context);
667 }
668 }
669
670 if (aresid)
671 // LP64todo - fix this
672 *aresid = uio_resid(auio);
673 else
674 if (uio_resid(auio) && error == 0)
675 error = EIO;
676 return (error);
677 }
678
679 /*
680 * File table vnode read routine.
681 */
682 static int
683 vn_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
684 {
685 struct vnode *vp;
686 int error, ioflag;
687 off_t count;
688
689 vp = (struct vnode *)fp->f_fglob->fg_data;
690 if ( (error = vnode_getwithref(vp)) ) {
691 return(error);
692 }
693
694 #if CONFIG_MACF
695 error = mac_vnode_check_read(ctx, vfs_context_ucred(ctx), vp);
696 if (error) {
697 (void)vnode_put(vp);
698 return (error);
699 }
700 #endif
701
702 ioflag = 0;
703 if (fp->f_fglob->fg_flag & FNONBLOCK)
704 ioflag |= IO_NDELAY;
705 if ((fp->f_fglob->fg_flag & FNOCACHE) || vnode_isnocache(vp))
706 ioflag |= IO_NOCACHE;
707 if (fp->f_fglob->fg_flag & FNORDAHEAD)
708 ioflag |= IO_RAOFF;
709
710 if ((flags & FOF_OFFSET) == 0)
711 uio->uio_offset = fp->f_fglob->fg_offset;
712 count = uio_resid(uio);
713
714 if (vp->v_flag & VSWAP) {
715 /* special case for swap files */
716 error = vn_read_swapfile(vp, uio);
717 } else {
718 error = VNOP_READ(vp, uio, ioflag, ctx);
719 }
720 if ((flags & FOF_OFFSET) == 0)
721 fp->f_fglob->fg_offset += count - uio_resid(uio);
722
723 (void)vnode_put(vp);
724 return (error);
725 }
726
727
728 /*
729 * File table vnode write routine.
730 */
731 static int
732 vn_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
733 {
734 struct vnode *vp;
735 int error, ioflag;
736 off_t count;
737 int clippedsize = 0;
738 int partialwrite=0;
739 int residcount, oldcount;
740 proc_t p = vfs_context_proc(ctx);
741
742 count = 0;
743 vp = (struct vnode *)fp->f_fglob->fg_data;
744 if ( (error = vnode_getwithref(vp)) ) {
745 return(error);
746 }
747
748 #if CONFIG_MACF
749 error = mac_vnode_check_write(ctx, vfs_context_ucred(ctx), vp);
750 if (error) {
751 (void)vnode_put(vp);
752 return (error);
753 }
754 #endif
755
756 ioflag = IO_UNIT;
757 if (vp->v_type == VREG && (fp->f_fglob->fg_flag & O_APPEND))
758 ioflag |= IO_APPEND;
759 if (fp->f_fglob->fg_flag & FNONBLOCK)
760 ioflag |= IO_NDELAY;
761 if ((fp->f_fglob->fg_flag & FNOCACHE) || vnode_isnocache(vp))
762 ioflag |= IO_NOCACHE;
763 if ((fp->f_fglob->fg_flag & O_FSYNC) ||
764 (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS)))
765 ioflag |= IO_SYNC;
766
767 if ((flags & FOF_OFFSET) == 0) {
768 uio->uio_offset = fp->f_fglob->fg_offset;
769 count = uio_resid(uio);
770 }
771 if (((flags & FOF_OFFSET) == 0) &&
772 vfs_context_proc(ctx) && (vp->v_type == VREG) &&
773 (((rlim_t)(uio->uio_offset + uio_uio_resid(uio)) > p->p_rlimit[RLIMIT_FSIZE].rlim_cur) ||
774 ((rlim_t)uio_uio_resid(uio) > (p->p_rlimit[RLIMIT_FSIZE].rlim_cur - uio->uio_offset)))) {
775 /*
776 * If the requested residual would cause us to go past the
777 * administrative limit, then we need to adjust the residual
778 * down to cause fewer bytes than requested to be written. If
779 * we can't do that (e.g. the residual is already 1 byte),
780 * then we fail the write with EFBIG.
781 */
782 residcount = uio_uio_resid(uio);
783 if ((rlim_t)(uio->uio_offset + uio_uio_resid(uio)) > p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
784 clippedsize = (uio->uio_offset + uio_uio_resid(uio)) - p->p_rlimit[RLIMIT_FSIZE].rlim_cur;
785 } else if ((rlim_t)uio_uio_resid(uio) > (p->p_rlimit[RLIMIT_FSIZE].rlim_cur - uio->uio_offset)) {
786 clippedsize = (p->p_rlimit[RLIMIT_FSIZE].rlim_cur - uio->uio_offset);
787 }
788 if (clippedsize >= residcount) {
789 psignal(p, SIGXFSZ);
790 vnode_put(vp);
791 return (EFBIG);
792 }
793 partialwrite = 1;
794 uio_setresid(uio, residcount-clippedsize);
795 }
796 if ((flags & FOF_OFFSET) != 0) {
797 /* for pwrite, append should be ignored */
798 ioflag &= ~IO_APPEND;
799 if (p && (vp->v_type == VREG) &&
800 ((rlim_t)uio->uio_offset >= p->p_rlimit[RLIMIT_FSIZE].rlim_cur)) {
801 psignal(p, SIGXFSZ);
802 vnode_put(vp);
803 return (EFBIG);
804 }
805 if (p && (vp->v_type == VREG) &&
806 ((rlim_t)(uio->uio_offset + uio_uio_resid(uio)) > p->p_rlimit[RLIMIT_FSIZE].rlim_cur)) {
807 //Debugger("vn_bwrite:overstepping the bounds");
808 residcount = uio_uio_resid(uio);
809 clippedsize = (uio->uio_offset + uio_uio_resid(uio)) - p->p_rlimit[RLIMIT_FSIZE].rlim_cur;
810 partialwrite = 1;
811 uio_setresid(uio, residcount-clippedsize);
812 }
813 }
814
815 error = VNOP_WRITE(vp, uio, ioflag, ctx);
816
817 if (partialwrite) {
818 oldcount = uio_resid(uio);
819 uio_setresid(uio, oldcount + clippedsize);
820 }
821
822 if ((flags & FOF_OFFSET) == 0) {
823 if (ioflag & IO_APPEND)
824 fp->f_fglob->fg_offset = uio->uio_offset;
825 else
826 fp->f_fglob->fg_offset += count - uio_resid(uio);
827 }
828
829 /*
830 * Set the credentials on successful writes
831 */
832 if ((error == 0) && (vp->v_tag == VT_NFS) && (UBCINFOEXISTS(vp))) {
833 /*
834 * When called from aio subsystem, we only have the proc from
835 * which to get the credential, at this point, so use that
836 * instead. This means aio functions are incompatible with
837 * per-thread credentials (aio operations are proxied). We
838 * can't easily correct the aio vs. settid race in this case
839 * anyway, so we disallow it.
840 */
841 if ((flags & FOF_PCRED) == 0) {
842 ubc_setthreadcred(vp, p, current_thread());
843 } else {
844 ubc_setcred(vp, p);
845 }
846 }
847 (void)vnode_put(vp);
848 return (error);
849 }
850
851 /*
852 * File table vnode stat routine.
853 *
854 * Returns: 0 Success
855 * EBADF
856 * ENOMEM
857 * vnode_getattr:???
858 */
859 int
860 vn_stat_noauth(struct vnode *vp, void *sbptr, kauth_filesec_t *xsec, int isstat64, vfs_context_t ctx)
861 {
862 struct vnode_attr va;
863 int error;
864 u_short mode;
865 kauth_filesec_t fsec;
866 struct stat *sb = (struct stat *)0; /* warning avoidance ; protected by isstat64 */
867 struct stat64 * sb64 = (struct stat64 *)0; /* warning avoidance ; protected by isstat64 */
868
869 if (isstat64 != 0)
870 sb64 = (struct stat64 *)sbptr;
871 else
872 sb = (struct stat *)sbptr;
873
874 VATTR_INIT(&va);
875 VATTR_WANTED(&va, va_fsid);
876 VATTR_WANTED(&va, va_fileid);
877 VATTR_WANTED(&va, va_mode);
878 VATTR_WANTED(&va, va_type);
879 VATTR_WANTED(&va, va_nlink);
880 VATTR_WANTED(&va, va_uid);
881 VATTR_WANTED(&va, va_gid);
882 VATTR_WANTED(&va, va_rdev);
883 VATTR_WANTED(&va, va_data_size);
884 VATTR_WANTED(&va, va_access_time);
885 VATTR_WANTED(&va, va_modify_time);
886 VATTR_WANTED(&va, va_change_time);
887 VATTR_WANTED(&va, va_create_time);
888 VATTR_WANTED(&va, va_flags);
889 VATTR_WANTED(&va, va_gen);
890 VATTR_WANTED(&va, va_iosize);
891 /* lower layers will synthesise va_total_alloc from va_data_size if required */
892 VATTR_WANTED(&va, va_total_alloc);
893 if (xsec != NULL) {
894 VATTR_WANTED(&va, va_uuuid);
895 VATTR_WANTED(&va, va_guuid);
896 VATTR_WANTED(&va, va_acl);
897 }
898 error = vnode_getattr(vp, &va, ctx);
899 if (error)
900 goto out;
901 /*
902 * Copy from vattr table
903 */
904 if (isstat64 != 0) {
905 sb64->st_dev = va.va_fsid;
906 sb64->st_ino = (ino64_t)va.va_fileid;
907
908 } else {
909 sb->st_dev = va.va_fsid;
910 sb->st_ino = (ino_t)va.va_fileid;
911 }
912 mode = va.va_mode;
913 switch (vp->v_type) {
914 case VREG:
915 mode |= S_IFREG;
916 break;
917 case VDIR:
918 mode |= S_IFDIR;
919 break;
920 case VBLK:
921 mode |= S_IFBLK;
922 break;
923 case VCHR:
924 mode |= S_IFCHR;
925 break;
926 case VLNK:
927 mode |= S_IFLNK;
928 break;
929 case VSOCK:
930 mode |= S_IFSOCK;
931 break;
932 case VFIFO:
933 mode |= S_IFIFO;
934 break;
935 default:
936 error = EBADF;
937 goto out;
938 };
939 if (isstat64 != 0) {
940 sb64->st_mode = mode;
941 sb64->st_nlink = VATTR_IS_SUPPORTED(&va, va_nlink) ? (u_int16_t)va.va_nlink : 1;
942 sb64->st_uid = va.va_uid;
943 sb64->st_gid = va.va_gid;
944 sb64->st_rdev = va.va_rdev;
945 sb64->st_size = va.va_data_size;
946 sb64->st_atimespec = va.va_access_time;
947 sb64->st_mtimespec = va.va_modify_time;
948 sb64->st_ctimespec = va.va_change_time;
949 sb64->st_birthtimespec =
950 VATTR_IS_SUPPORTED(&va, va_create_time) ? va.va_create_time : va.va_change_time;
951 sb64->st_blksize = va.va_iosize;
952 sb64->st_flags = va.va_flags;
953 sb64->st_blocks = roundup(va.va_total_alloc, 512) / 512;
954 } else {
955 sb->st_mode = mode;
956 sb->st_nlink = VATTR_IS_SUPPORTED(&va, va_nlink) ? (u_int16_t)va.va_nlink : 1;
957 sb->st_uid = va.va_uid;
958 sb->st_gid = va.va_gid;
959 sb->st_rdev = va.va_rdev;
960 sb->st_size = va.va_data_size;
961 sb->st_atimespec = va.va_access_time;
962 sb->st_mtimespec = va.va_modify_time;
963 sb->st_ctimespec = va.va_change_time;
964 sb->st_blksize = va.va_iosize;
965 sb->st_flags = va.va_flags;
966 sb->st_blocks = roundup(va.va_total_alloc, 512) / 512;
967 }
968
969 /* if we're interested in exended security data and we got an ACL */
970 if (xsec != NULL) {
971 if (!VATTR_IS_SUPPORTED(&va, va_acl) &&
972 !VATTR_IS_SUPPORTED(&va, va_uuuid) &&
973 !VATTR_IS_SUPPORTED(&va, va_guuid)) {
974 *xsec = KAUTH_FILESEC_NONE;
975 } else {
976
977 if (VATTR_IS_SUPPORTED(&va, va_acl) && (va.va_acl != NULL)) {
978 fsec = kauth_filesec_alloc(va.va_acl->acl_entrycount);
979 } else {
980 fsec = kauth_filesec_alloc(0);
981 }
982 if (fsec == NULL) {
983 error = ENOMEM;
984 goto out;
985 }
986 fsec->fsec_magic = KAUTH_FILESEC_MAGIC;
987 if (VATTR_IS_SUPPORTED(&va, va_uuuid)) {
988 fsec->fsec_owner = va.va_uuuid;
989 } else {
990 fsec->fsec_owner = kauth_null_guid;
991 }
992 if (VATTR_IS_SUPPORTED(&va, va_guuid)) {
993 fsec->fsec_group = va.va_guuid;
994 } else {
995 fsec->fsec_group = kauth_null_guid;
996 }
997 if (VATTR_IS_SUPPORTED(&va, va_acl) && (va.va_acl != NULL)) {
998 bcopy(va.va_acl, &(fsec->fsec_acl), KAUTH_ACL_COPYSIZE(va.va_acl));
999 } else {
1000 fsec->fsec_acl.acl_entrycount = KAUTH_FILESEC_NOACL;
1001 }
1002 *xsec = fsec;
1003 }
1004 }
1005
1006 /* Do not give the generation number out to unpriviledged users */
1007 if (va.va_gen && !vfs_context_issuser(ctx)) {
1008 if (isstat64 != 0)
1009 sb64->st_gen = 0;
1010 else
1011 sb->st_gen = 0;
1012 } else {
1013 if (isstat64 != 0)
1014 sb64->st_gen = va.va_gen;
1015 else
1016 sb->st_gen = va.va_gen;
1017 }
1018
1019 error = 0;
1020 out:
1021 if (VATTR_IS_SUPPORTED(&va, va_acl) && va.va_acl != NULL)
1022 kauth_acl_free(va.va_acl);
1023 return (error);
1024 }
1025
1026 int
1027 vn_stat(struct vnode *vp, void *sb, kauth_filesec_t *xsec, int isstat64, vfs_context_t ctx)
1028 {
1029 int error;
1030
1031 #if CONFIG_MACF
1032 error = mac_vnode_check_stat(ctx, NOCRED, vp);
1033 if (error)
1034 return (error);
1035 #endif
1036
1037 /* authorize */
1038 if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_ATTRIBUTES | KAUTH_VNODE_READ_SECURITY, ctx)) != 0)
1039 return(error);
1040
1041 /* actual stat */
1042 return(vn_stat_noauth(vp, sb, xsec, isstat64, ctx));
1043 }
1044
1045
1046 /*
1047 * File table vnode ioctl routine.
1048 */
1049 static int
1050 vn_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx)
1051 {
1052 struct vnode *vp = ((struct vnode *)fp->f_fglob->fg_data);
1053 off_t file_size;
1054 int error;
1055 struct vnode *ttyvp;
1056 int funnel_state;
1057 struct session * sessp;
1058
1059 if ( (error = vnode_getwithref(vp)) ) {
1060 return(error);
1061 }
1062
1063 #if CONFIG_MACF
1064 error = mac_vnode_check_ioctl(ctx, vp, com);
1065 if (error)
1066 goto out;
1067 #endif
1068
1069 switch (vp->v_type) {
1070 case VREG:
1071 case VDIR:
1072 if (com == FIONREAD) {
1073 if ((error = vnode_size(vp, &file_size, ctx)) != 0)
1074 goto out;
1075 *(int *)data = file_size - fp->f_fglob->fg_offset;
1076 goto out;
1077 }
1078 if (com == FIONBIO || com == FIOASYNC) { /* XXX */
1079 goto out;
1080 }
1081 /* fall into ... */
1082
1083 default:
1084 error = ENOTTY;
1085 goto out;
1086
1087 case VFIFO:
1088 case VCHR:
1089 case VBLK:
1090
1091 /* Should not be able to set block size from user space */
1092 if (com == DKIOCSETBLOCKSIZE) {
1093 error = EPERM;
1094 goto out;
1095 }
1096
1097 if (com == FIODTYPE) {
1098 if (vp->v_type == VBLK) {
1099 if (major(vp->v_rdev) >= nblkdev) {
1100 error = ENXIO;
1101 goto out;
1102 }
1103 *(int *)data = bdevsw[major(vp->v_rdev)].d_type;
1104
1105 } else if (vp->v_type == VCHR) {
1106 if (major(vp->v_rdev) >= nchrdev) {
1107 error = ENXIO;
1108 goto out;
1109 }
1110 *(int *)data = cdevsw[major(vp->v_rdev)].d_type;
1111 } else {
1112 error = ENOTTY;
1113 goto out;
1114 }
1115 goto out;
1116 }
1117 error = VNOP_IOCTL(vp, com, data, fp->f_fglob->fg_flag, ctx);
1118
1119 if (error == 0 && com == TIOCSCTTY) {
1120 vnode_ref(vp);
1121
1122 funnel_state = thread_funnel_set(kernel_flock, TRUE);
1123 sessp = proc_session(vfs_context_proc(ctx));
1124
1125 session_lock(sessp);
1126 ttyvp = sessp->s_ttyvp;
1127 sessp->s_ttyvp = vp;
1128 sessp->s_ttyvid = vnode_vid(vp);
1129 session_unlock(sessp);
1130 session_rele(sessp);
1131 thread_funnel_set(kernel_flock, funnel_state);
1132
1133 if (ttyvp)
1134 vnode_rele(ttyvp);
1135 }
1136 }
1137 out:
1138 (void)vnode_put(vp);
1139 return(error);
1140 }
1141
1142 /*
1143 * File table vnode select routine.
1144 */
1145 static int
1146 vn_select(struct fileproc *fp, int which, void *wql, __unused vfs_context_t ctx)
1147 {
1148 int error;
1149 struct vnode * vp = (struct vnode *)fp->f_fglob->fg_data;
1150 struct vfs_context context;
1151
1152 if ( (error = vnode_getwithref(vp)) == 0 ) {
1153 context.vc_thread = current_thread();
1154 context.vc_ucred = fp->f_fglob->fg_cred;
1155
1156 #if CONFIG_MACF
1157 /*
1158 * XXX We should use a per thread credential here; minimally,
1159 * XXX the process credential should have a persistent
1160 * XXX reference on it before being passed in here.
1161 */
1162 error = mac_vnode_check_select(ctx, vp, which);
1163 if (error == 0)
1164 #endif
1165 error = VNOP_SELECT(vp, which, fp->f_fglob->fg_flag, wql, ctx);
1166
1167 (void)vnode_put(vp);
1168 }
1169 return(error);
1170
1171 }
1172
1173 /*
1174 * Check that the vnode is still valid, and if so
1175 * acquire requested lock.
1176 */
1177 int
1178 vn_lock(__unused vnode_t vp, __unused int flags, __unused proc_t p)
1179 {
1180 return (0);
1181 }
1182
1183 /*
1184 * File table vnode close routine.
1185 */
1186 static int
1187 vn_closefile(struct fileglob *fg, vfs_context_t ctx)
1188 {
1189 struct vnode *vp = (struct vnode *)fg->fg_data;
1190 int error;
1191 struct flock lf;
1192
1193 if ( (error = vnode_getwithref(vp)) == 0 ) {
1194
1195 if ((fg->fg_flag & FHASLOCK) && fg->fg_type == DTYPE_VNODE) {
1196 lf.l_whence = SEEK_SET;
1197 lf.l_start = 0;
1198 lf.l_len = 0;
1199 lf.l_type = F_UNLCK;
1200
1201 (void)VNOP_ADVLOCK(vp, (caddr_t)fg, F_UNLCK, &lf, F_FLOCK, ctx);
1202 }
1203 error = vn_close(vp, fg->fg_flag, ctx);
1204
1205 (void)vnode_put(vp);
1206 }
1207 return(error);
1208 }
1209
1210 /*
1211 * Returns: 0 Success
1212 * VNOP_PATHCONF:???
1213 */
1214 int
1215 vn_pathconf(vnode_t vp, int name, register_t *retval, vfs_context_t ctx)
1216 {
1217 int error = 0;
1218
1219 switch(name) {
1220 case _PC_EXTENDED_SECURITY_NP:
1221 *retval = vfs_extendedsecurity(vnode_mount(vp)) ? 1 : 0;
1222 break;
1223 case _PC_AUTH_OPAQUE_NP:
1224 *retval = vfs_authopaque(vnode_mount(vp));
1225 break;
1226 case _PC_2_SYMLINKS:
1227 *retval = 1; /* XXX NOTSUP on MSDOS, etc. */
1228 break;
1229 case _PC_ALLOC_SIZE_MIN:
1230 *retval = 1; /* XXX lie: 1 byte */
1231 break;
1232 case _PC_ASYNC_IO: /* unistd.h: _POSIX_ASYNCHRONUS_IO */
1233 *retval = 1; /* [AIO] option is supported */
1234 break;
1235 case _PC_PRIO_IO: /* unistd.h: _POSIX_PRIORITIZED_IO */
1236 *retval = 0; /* [PIO] option is not supported */
1237 break;
1238 case _PC_REC_INCR_XFER_SIZE:
1239 *retval = 4096; /* XXX go from MIN to MAX 4K at a time */
1240 break;
1241 case _PC_REC_MIN_XFER_SIZE:
1242 *retval = 4096; /* XXX recommend 4K minimum reads/writes */
1243 break;
1244 case _PC_REC_MAX_XFER_SIZE:
1245 *retval = 65536; /* XXX recommend 64K maximum reads/writes */
1246 break;
1247 case _PC_REC_XFER_ALIGN:
1248 *retval = 4096; /* XXX recommend page aligned buffers */
1249 break;
1250 case _PC_SYMLINK_MAX:
1251 *retval = 255; /* Minimum acceptable POSIX value */
1252 break;
1253 case _PC_SYNC_IO: /* unistd.h: _POSIX_SYNCHRONIZED_IO */
1254 *retval = 0; /* [SIO] option is not supported */
1255 break;
1256 default:
1257 error = VNOP_PATHCONF(vp, name, retval, ctx);
1258 break;
1259 }
1260
1261 return (error);
1262 }
1263
1264 static int
1265 vn_kqfilt_add(struct fileproc *fp, struct knote *kn, vfs_context_t ctx)
1266 {
1267 struct vnode *vp = (struct vnode *)fp->f_fglob->fg_data;
1268 int error;
1269 int funnel_state;
1270
1271 if ( (error = vnode_getwithref(vp)) == 0 ) {
1272
1273 #if CONFIG_MACF
1274 error = mac_vnode_check_kqfilter(ctx, fp->f_fglob->fg_cred, kn, vp);
1275 if (error) {
1276 (void)vnode_put(vp);
1277 return (error);
1278 }
1279 #endif
1280
1281 funnel_state = thread_funnel_set(kernel_flock, TRUE);
1282 error = VNOP_KQFILT_ADD(vp, kn, ctx);
1283 thread_funnel_set(kernel_flock, funnel_state);
1284
1285 (void)vnode_put(vp);
1286 }
1287 return (error);
1288 }
1289
1290 #if 0
1291 /* No one calls this yet. */
1292 static int
1293 vn_kqfilt_remove(vp, ident, ctx)
1294 struct vnode *vp;
1295 uintptr_t ident;
1296 vfs_context_t ctx;
1297 {
1298 int error;
1299 int funnel_state;
1300
1301 if ( (error = vnode_getwithref(vp)) == 0 ) {
1302
1303 funnel_state = thread_funnel_set(kernel_flock, TRUE);
1304 error = VNOP_KQFILT_REMOVE(vp, ident, ctx);
1305 thread_funnel_set(kernel_flock, funnel_state);
1306
1307 (void)vnode_put(vp);
1308 }
1309 return (error);
1310 }
1311 #endif