]> git.saurik.com Git - apple/xnu.git/blob - bsd/vfs/vfs_vnops.c
xnu-1699.22.81.tar.gz
[apple/xnu.git] / bsd / vfs / vfs_vnops.c
1 /*
2 * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30 * Copyright (c) 1982, 1986, 1989, 1993
31 * The Regents of the University of California. All rights reserved.
32 * (c) UNIX System Laboratories, Inc.
33 * All or some portions of this file are derived from material licensed
34 * to the University of California by American Telephone and Telegraph
35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
36 * the permission of UNIX System Laboratories, Inc.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software
47 * must display the following acknowledgement:
48 * This product includes software developed by the University of
49 * California, Berkeley and its contributors.
50 * 4. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 * @(#)vfs_vnops.c 8.14 (Berkeley) 6/15/95
67 *
68 */
69 /*
70 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
71 * support for mandatory and extensible security protections. This notice
72 * is included in support of clause 2.2 (b) of the Apple Public License,
73 * Version 2.0.
74 */
75
76 #include <sys/param.h>
77 #include <sys/types.h>
78 #include <sys/systm.h>
79 #include <sys/kernel.h>
80 #include <sys/file_internal.h>
81 #include <sys/stat.h>
82 #include <sys/proc_internal.h>
83 #include <sys/kauth.h>
84 #include <sys/mount_internal.h>
85 #include <sys/namei.h>
86 #include <sys/vnode_internal.h>
87 #include <sys/ioctl.h>
88 #include <sys/tty.h>
89 /* Temporary workaround for ubc.h until <rdar://4714366 is resolved */
90 #define ubc_setcred ubc_setcred_deprecated
91 #include <sys/ubc.h>
92 #undef ubc_setcred
93 int ubc_setcred(struct vnode *, struct proc *);
94 #include <sys/conf.h>
95 #include <sys/disk.h>
96 #include <sys/fsevents.h>
97 #include <sys/kdebug.h>
98 #include <sys/xattr.h>
99 #include <sys/ubc_internal.h>
100 #include <sys/uio_internal.h>
101 #include <sys/resourcevar.h>
102 #include <sys/signalvar.h>
103
104 #include <vm/vm_kern.h>
105 #include <vm/vm_map.h>
106
107 #include <miscfs/specfs/specdev.h>
108 #include <miscfs/fifofs/fifo.h>
109
110 #if CONFIG_MACF
111 #include <security/mac_framework.h>
112 #endif
113
114
115 static int vn_closefile(struct fileglob *fp, vfs_context_t ctx);
116 static int vn_ioctl(struct fileproc *fp, u_long com, caddr_t data,
117 vfs_context_t ctx);
118 static int vn_read(struct fileproc *fp, struct uio *uio, int flags,
119 vfs_context_t ctx);
120 static int vn_write(struct fileproc *fp, struct uio *uio, int flags,
121 vfs_context_t ctx);
122 static int vn_select( struct fileproc *fp, int which, void * wql,
123 vfs_context_t ctx);
124 static int vn_kqfilt_add(struct fileproc *fp, struct knote *kn,
125 vfs_context_t ctx);
126 static void filt_vndetach(struct knote *kn);
127 static int filt_vnode(struct knote *kn, long hint);
128 static int vn_open_auth_finish(vnode_t vp, int fmode, vfs_context_t ctx);
129 #if 0
130 static int vn_kqfilt_remove(struct vnode *vp, uintptr_t ident,
131 vfs_context_t ctx);
132 #endif
133
134 struct fileops vnops =
135 { vn_read, vn_write, vn_ioctl, vn_select, vn_closefile, vn_kqfilt_add, NULL };
136
137 struct filterops vnode_filtops = {
138 .f_isfd = 1,
139 .f_attach = NULL,
140 .f_detach = filt_vndetach,
141 .f_event = filt_vnode
142 };
143
144 /*
145 * Common code for vnode open operations.
146 * Check permissions, and call the VNOP_OPEN or VNOP_CREATE routine.
147 *
148 * XXX the profusion of interfaces here is probably a bad thing.
149 */
150 int
151 vn_open(struct nameidata *ndp, int fmode, int cmode)
152 {
153 return(vn_open_modflags(ndp, &fmode, cmode));
154 }
155
156 int
157 vn_open_modflags(struct nameidata *ndp, int *fmodep, int cmode)
158 {
159 struct vnode_attr va;
160
161 VATTR_INIT(&va);
162 VATTR_SET(&va, va_mode, cmode);
163
164 return(vn_open_auth(ndp, fmodep, &va));
165 }
166
167 static int
168 vn_open_auth_finish(vnode_t vp, int fmode, vfs_context_t ctx)
169 {
170 int error;
171
172 if ((error = vnode_ref_ext(vp, fmode, 0)) != 0) {
173 goto bad;
174 }
175
176 /* call out to allow 3rd party notification of open.
177 * Ignore result of kauth_authorize_fileop call.
178 */
179 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN,
180 (uintptr_t)vp, 0);
181
182 return 0;
183
184 bad:
185 return error;
186
187 }
188
189 /*
190 * May do nameidone() to allow safely adding an FSEvent. Cue off of ni_dvp to
191 * determine whether that has happened.
192 */
193 static int
194 vn_open_auth_do_create(struct nameidata *ndp, struct vnode_attr *vap, int fmode, boolean_t *did_create, boolean_t *did_open, vfs_context_t ctx)
195 {
196 uint32_t status = 0;
197 vnode_t dvp = ndp->ni_dvp;
198 int batched;
199 int error;
200 vnode_t vp;
201
202 batched = vnode_compound_open_available(ndp->ni_dvp);
203 *did_open = FALSE;
204
205 VATTR_SET(vap, va_type, VREG);
206 if (fmode & O_EXCL)
207 vap->va_vaflags |= VA_EXCLUSIVE;
208
209 #if NAMEDRSRCFORK
210 if (ndp->ni_cnd.cn_flags & CN_WANTSRSRCFORK) {
211 if ((error = vn_authorize_create(dvp, &ndp->ni_cnd, vap, ctx, NULL)) != 0)
212 goto out;
213 if ((error = vnode_makenamedstream(dvp, &ndp->ni_vp, XATTR_RESOURCEFORK_NAME, 0, ctx)) != 0)
214 goto out;
215 *did_create = TRUE;
216 } else {
217 #endif
218 if (!batched) {
219 if ((error = vn_authorize_create(dvp, &ndp->ni_cnd, vap, ctx, NULL)) != 0)
220 goto out;
221 }
222
223 error = vn_create(dvp, &ndp->ni_vp, ndp, vap, VN_CREATE_DOOPEN, fmode, &status, ctx);
224 if (error != 0) {
225 if (batched) {
226 *did_create = (status & COMPOUND_OPEN_STATUS_DID_CREATE) ? TRUE : FALSE;
227 } else {
228 *did_create = FALSE;
229 }
230
231 if (error == EKEEPLOOKING) {
232 if (*did_create) {
233 panic("EKEEPLOOKING, but we did a create?");
234 }
235 if (!batched) {
236 panic("EKEEPLOOKING from filesystem that doesn't support compound vnops?");
237 }
238 if ((ndp->ni_flag & NAMEI_CONTLOOKUP) == 0) {
239 panic("EKEEPLOOKING, but continue flag not set?");
240 }
241
242 /*
243 * Do NOT drop the dvp: we need everything to continue the lookup.
244 */
245 return error;
246 }
247 } else {
248 if (batched) {
249 *did_create = (status & COMPOUND_OPEN_STATUS_DID_CREATE) ? 1 : 0;
250 *did_open = TRUE;
251 } else {
252 *did_create = TRUE;
253 }
254 }
255 #if NAMEDRSRCFORK
256 }
257 #endif
258
259 /*
260 * Unlock the fsnode (if locked) here so that we are free
261 * to drop the dvp iocount and prevent deadlock in build_path().
262 * nameidone() will still do the right thing later.
263 */
264 vp = ndp->ni_vp;
265 namei_unlock_fsnode(ndp);
266
267 if (*did_create) {
268 int update_flags = 0;
269
270 // Make sure the name & parent pointers are hooked up
271 if (vp->v_name == NULL)
272 update_flags |= VNODE_UPDATE_NAME;
273 if (vp->v_parent == NULLVP)
274 update_flags |= VNODE_UPDATE_PARENT;
275
276 if (update_flags)
277 vnode_update_identity(vp, dvp, ndp->ni_cnd.cn_nameptr, ndp->ni_cnd.cn_namelen, ndp->ni_cnd.cn_hash, update_flags);
278
279 vnode_put(dvp);
280 ndp->ni_dvp = NULLVP;
281
282 #if CONFIG_FSE
283 if (need_fsevent(FSE_CREATE_FILE, vp)) {
284 add_fsevent(FSE_CREATE_FILE, ctx,
285 FSE_ARG_VNODE, vp,
286 FSE_ARG_DONE);
287 }
288 #endif
289 }
290 out:
291 if (ndp->ni_dvp != NULLVP) {
292 vnode_put(dvp);
293 ndp->ni_dvp = NULLVP;
294 }
295
296 return error;
297 }
298
299 /*
300 * Open a file with authorization, updating the contents of the structures
301 * pointed to by ndp, fmodep, and vap as necessary to perform the requested
302 * operation. This function is used for both opens of existing files, and
303 * creation of new files.
304 *
305 * Parameters: ndp The nami data pointer describing the
306 * file
307 * fmodep A pointer to an int containg the mode
308 * information to be used for the open
309 * vap A pointer to the vnode attribute
310 * descriptor to be used for the open
311 *
312 * Indirect: * Contents of the data structures pointed
313 * to by the parameters are modified as
314 * necessary to the requested operation.
315 *
316 * Returns: 0 Success
317 * !0 errno value
318 *
319 * Notes: The kauth_filesec_t in 'vap', if any, is in host byte order.
320 *
321 * The contents of '*ndp' will be modified, based on the other
322 * arguments to this function, and to return file and directory
323 * data necessary to satisfy the requested operation.
324 *
325 * If the file does not exist and we are creating it, then the
326 * O_TRUNC flag will be cleared in '*fmodep' to indicate to the
327 * caller that the file was not truncated.
328 *
329 * If the file exists and the O_EXCL flag was not specified, then
330 * the O_CREAT flag will be cleared in '*fmodep' to indicate to
331 * the caller that the existing file was merely opened rather
332 * than created.
333 *
334 * The contents of '*vap' will be modified as necessary to
335 * complete the operation, including setting of supported
336 * attribute, clearing of fields containing unsupported attributes
337 * in the request, if the request proceeds without them, etc..
338 *
339 * XXX: This function is too complicated in actings on its arguments
340 *
341 * XXX: We should enummerate the possible errno values here, and where
342 * in the code they originated.
343 */
344 int
345 vn_open_auth(struct nameidata *ndp, int *fmodep, struct vnode_attr *vap)
346 {
347 struct vnode *vp;
348 struct vnode *dvp;
349 vfs_context_t ctx = ndp->ni_cnd.cn_context;
350 int error;
351 int fmode;
352 uint32_t origcnflags;
353 boolean_t did_create;
354 boolean_t did_open;
355 boolean_t need_vnop_open;
356 boolean_t batched;
357 boolean_t ref_failed;
358
359 again:
360 vp = NULL;
361 dvp = NULL;
362 batched = FALSE;
363 did_create = FALSE;
364 need_vnop_open = TRUE;
365 ref_failed = FALSE;
366 fmode = *fmodep;
367 origcnflags = ndp->ni_cnd.cn_flags;
368
369 /*
370 * O_CREAT
371 */
372 if (fmode & O_CREAT) {
373 if ( (fmode & O_DIRECTORY) ) {
374 error = EINVAL;
375 goto out;
376 }
377 ndp->ni_cnd.cn_nameiop = CREATE;
378 #if CONFIG_TRIGGERS
379 ndp->ni_op = OP_LINK;
380 #endif
381 /* Inherit USEDVP, vnode_open() supported flags only */
382 ndp->ni_cnd.cn_flags &= (USEDVP | NOCROSSMOUNT | DOWHITEOUT);
383 ndp->ni_cnd.cn_flags |= LOCKPARENT | LOCKLEAF | AUDITVNPATH1;
384 ndp->ni_flag = NAMEI_COMPOUNDOPEN;
385 #if NAMEDRSRCFORK
386 /* open calls are allowed for resource forks. */
387 ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
388 #endif
389 if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0 && (origcnflags & FOLLOW) != 0)
390 ndp->ni_cnd.cn_flags |= FOLLOW;
391
392 continue_create_lookup:
393 if ( (error = namei(ndp)) )
394 goto out;
395
396 dvp = ndp->ni_dvp;
397 vp = ndp->ni_vp;
398
399 batched = vnode_compound_open_available(dvp);
400
401 /* not found, create */
402 if (vp == NULL) {
403 /* must have attributes for a new file */
404 if (vap == NULL) {
405 error = EINVAL;
406 goto out;
407 }
408 /*
409 * Attempt a create. For a system supporting compound VNOPs, we may
410 * find an existing file or create one; in either case, we will already
411 * have the file open and no VNOP_OPEN() will be needed.
412 */
413 error = vn_open_auth_do_create(ndp, vap, fmode, &did_create, &did_open, ctx);
414
415 dvp = ndp->ni_dvp;
416 vp = ndp->ni_vp;
417
418 /*
419 * Detected a node that the filesystem couldn't handle. Don't call
420 * nameidone() yet, because we need that path buffer.
421 */
422 if (error == EKEEPLOOKING) {
423 if (!batched) {
424 panic("EKEEPLOOKING from a filesystem that doesn't support compound VNOPs?");
425 }
426 goto continue_create_lookup;
427 }
428
429 nameidone(ndp);
430 if (dvp) {
431 panic("Shouldn't have a dvp here.");
432 }
433
434 if (error) {
435 /*
436 * Check for a creation or unlink race.
437 */
438 if (((error == EEXIST) && !(fmode & O_EXCL)) ||
439 ((error == ENOENT) && (fmode & O_CREAT))){
440 if (vp)
441 vnode_put(vp);
442 goto again;
443 }
444 goto bad;
445 }
446
447 need_vnop_open = !did_open;
448 } else {
449 if (fmode & O_EXCL)
450 error = EEXIST;
451
452 /*
453 * We have a vnode. Use compound open if available
454 * or else fall through to "traditional" path. Note: can't
455 * do a compound open for root, because the parent belongs
456 * to a different FS.
457 */
458 if (error == 0 && batched && (vnode_mount(dvp) == vnode_mount(vp))) {
459 error = VNOP_COMPOUND_OPEN(dvp, &ndp->ni_vp, ndp, 0, fmode, NULL, NULL, ctx);
460
461 if (error == 0) {
462 vp = ndp->ni_vp;
463 need_vnop_open = FALSE;
464 } else if (error == EKEEPLOOKING) {
465 if ((ndp->ni_flag & NAMEI_CONTLOOKUP) == 0) {
466 panic("EKEEPLOOKING, but continue flag not set?");
467 }
468 goto continue_create_lookup;
469 }
470 }
471 nameidone(ndp);
472 vnode_put(dvp);
473 ndp->ni_dvp = NULLVP;
474
475 if (error) {
476 goto bad;
477 }
478
479 fmode &= ~O_CREAT;
480
481 /* Fall through */
482 }
483 } else {
484 /*
485 * Not O_CREAT
486 */
487 ndp->ni_cnd.cn_nameiop = LOOKUP;
488 /* Inherit USEDVP, vnode_open() supported flags only */
489 ndp->ni_cnd.cn_flags &= (USEDVP | NOCROSSMOUNT | DOWHITEOUT);
490 ndp->ni_cnd.cn_flags |= FOLLOW | LOCKLEAF | AUDITVNPATH1 | WANTPARENT;
491 #if NAMEDRSRCFORK
492 /* open calls are allowed for resource forks. */
493 ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
494 #endif
495 ndp->ni_flag = NAMEI_COMPOUNDOPEN;
496
497 /* preserve NOFOLLOW from vnode_open() */
498 if (fmode & O_NOFOLLOW || fmode & O_SYMLINK || (origcnflags & FOLLOW) == 0) {
499 ndp->ni_cnd.cn_flags &= ~FOLLOW;
500 }
501
502 /* Do a lookup, possibly going directly to filesystem for compound operation */
503 do {
504 if ( (error = namei(ndp)) )
505 goto out;
506 vp = ndp->ni_vp;
507 dvp = ndp->ni_dvp;
508
509 /* Check for batched lookup-open */
510 batched = vnode_compound_open_available(dvp);
511 if (batched && ((vp == NULLVP) || (vnode_mount(dvp) == vnode_mount(vp)))) {
512 error = VNOP_COMPOUND_OPEN(dvp, &ndp->ni_vp, ndp, 0, fmode, NULL, NULL, ctx);
513 vp = ndp->ni_vp;
514 if (error == 0) {
515 need_vnop_open = FALSE;
516 } else if (error == EKEEPLOOKING) {
517 if ((ndp->ni_flag & NAMEI_CONTLOOKUP) == 0) {
518 panic("EKEEPLOOKING, but continue flag not set?");
519 }
520 }
521 }
522 } while (error == EKEEPLOOKING);
523
524 nameidone(ndp);
525 vnode_put(dvp);
526 ndp->ni_dvp = NULLVP;
527
528 if (error) {
529 goto bad;
530 }
531 }
532
533 /*
534 * By this point, nameidone() is called, dvp iocount is dropped,
535 * and dvp pointer is cleared.
536 */
537 if (ndp->ni_dvp != NULLVP) {
538 panic("Haven't cleaned up adequately in vn_open_auth()");
539 }
540
541 /*
542 * Expect to use this code for filesystems without compound VNOPs, for the root
543 * of a filesystem, which can't be "looked up" in the sense of VNOP_LOOKUP(),
544 * and for shadow files, which do not live on the same filesystems as their "parents."
545 */
546 if (need_vnop_open) {
547 if (batched && !vnode_isvroot(vp) && !vnode_isnamedstream(vp)) {
548 panic("Why am I trying to use VNOP_OPEN() on anything other than the root or a named stream?");
549 }
550
551 if (!did_create) {
552 error = vn_authorize_open_existing(vp, &ndp->ni_cnd, fmode, ctx, NULL);
553 if (error) {
554 goto bad;
555 }
556 }
557
558 error = VNOP_OPEN(vp, fmode, ctx);
559 if (error) {
560 goto bad;
561 }
562 need_vnop_open = FALSE;
563 }
564
565 // if the vnode is tagged VOPENEVT and the current process
566 // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY
567 // flag to the open mode so that this open won't count against
568 // the vnode when carbon delete() does a vnode_isinuse() to see
569 // if a file is currently in use. this allows spotlight
570 // importers to not interfere with carbon apps that depend on
571 // the no-delete-if-busy semantics of carbon delete().
572 //
573 if (!did_create && (vp->v_flag & VOPENEVT) && (current_proc()->p_flag & P_CHECKOPENEVT)) {
574 fmode |= O_EVTONLY;
575 }
576
577 /*
578 * Grab reference, etc.
579 */
580 error = vn_open_auth_finish(vp, fmode, ctx);
581 if (error) {
582 ref_failed = TRUE;
583 goto bad;
584 }
585
586 /* Compound VNOP open is responsible for doing the truncate */
587 if (batched || did_create)
588 fmode &= ~O_TRUNC;
589
590 *fmodep = fmode;
591 return (0);
592
593 bad:
594 /* Opened either explicitly or by a batched create */
595 if (!need_vnop_open) {
596 VNOP_CLOSE(vp, fmode, ctx);
597 }
598
599 ndp->ni_vp = NULL;
600 if (vp) {
601 #if NAMEDRSRCFORK
602 /* Aggressively recycle shadow files if we error'd out during open() */
603 if ((vnode_isnamedstream(vp)) &&
604 (vp->v_parent != NULLVP) &&
605 (vnode_isshadow(vp))) {
606 vnode_recycle(vp);
607 }
608 #endif
609 vnode_put(vp);
610 /*
611 * Check for a race against unlink. We had a vnode
612 * but according to vnode_authorize or VNOP_OPEN it
613 * no longer exists.
614 *
615 * EREDRIVEOPEN: means that we were hit by the tty allocation race.
616 */
617 if (((error == ENOENT) && (*fmodep & O_CREAT)) || (error == EREDRIVEOPEN) || ref_failed) {
618 goto again;
619 }
620 }
621
622 out:
623 return (error);
624 }
625
626 #if vn_access_DEPRECATED
627 /*
628 * Authorize an action against a vnode. This has been the canonical way to
629 * ensure that the credential/process/etc. referenced by a vfs_context
630 * is granted the rights called out in 'mode' against the vnode 'vp'.
631 *
632 * Unfortunately, the use of VREAD/VWRITE/VEXEC makes it very difficult
633 * to add support for more rights. As such, this interface will be deprecated
634 * and callers will use vnode_authorize instead.
635 */
636 int
637 vn_access(vnode_t vp, int mode, vfs_context_t context)
638 {
639 kauth_action_t action;
640
641 action = 0;
642 if (mode & VREAD)
643 action |= KAUTH_VNODE_READ_DATA;
644 if (mode & VWRITE)
645 action |= KAUTH_VNODE_WRITE_DATA;
646 if (mode & VEXEC)
647 action |= KAUTH_VNODE_EXECUTE;
648
649 return(vnode_authorize(vp, NULL, action, context));
650 }
651 #endif /* vn_access_DEPRECATED */
652
653 /*
654 * Vnode close call
655 */
656 int
657 vn_close(struct vnode *vp, int flags, vfs_context_t ctx)
658 {
659 int error;
660
661 #if NAMEDRSRCFORK
662 /* Sync data from resource fork shadow file if needed. */
663 if ((vp->v_flag & VISNAMEDSTREAM) &&
664 (vp->v_parent != NULLVP) &&
665 vnode_isshadow(vp)) {
666 if (flags & FWASWRITTEN) {
667 (void) vnode_flushnamedstream(vp->v_parent, vp, ctx);
668 }
669 }
670 #endif
671
672 /* work around for foxhound */
673 if (vnode_isspec(vp))
674 (void)vnode_rele_ext(vp, flags, 0);
675
676 error = VNOP_CLOSE(vp, flags, ctx);
677
678 #if CONFIG_FSE
679 if (flags & FWASWRITTEN) {
680 if (need_fsevent(FSE_CONTENT_MODIFIED, vp)) {
681 add_fsevent(FSE_CONTENT_MODIFIED, ctx,
682 FSE_ARG_VNODE, vp,
683 FSE_ARG_DONE);
684 }
685 }
686 #endif
687
688 if (!vnode_isspec(vp))
689 (void)vnode_rele_ext(vp, flags, 0);
690
691 return (error);
692 }
693
694 static int
695 vn_read_swapfile(
696 struct vnode *vp,
697 uio_t uio)
698 {
699 int error;
700 off_t swap_count, this_count;
701 off_t file_end, read_end;
702 off_t prev_resid;
703 char *my_swap_page;
704
705 /*
706 * Reading from a swap file will get you zeroes.
707 */
708
709 my_swap_page = NULL;
710 error = 0;
711 swap_count = uio_resid(uio);
712
713 file_end = ubc_getsize(vp);
714 read_end = uio->uio_offset + uio_resid(uio);
715 if (uio->uio_offset >= file_end) {
716 /* uio starts after end of file: nothing to read */
717 swap_count = 0;
718 } else if (read_end > file_end) {
719 /* uio extends beyond end of file: stop before that */
720 swap_count -= (read_end - file_end);
721 }
722
723 while (swap_count > 0) {
724 if (my_swap_page == NULL) {
725 MALLOC(my_swap_page, char *, PAGE_SIZE,
726 M_TEMP, M_WAITOK);
727 memset(my_swap_page, '\0', PAGE_SIZE);
728 /* add an end-of-line to keep line counters happy */
729 my_swap_page[PAGE_SIZE-1] = '\n';
730 }
731 this_count = swap_count;
732 if (this_count > PAGE_SIZE) {
733 this_count = PAGE_SIZE;
734 }
735
736 prev_resid = uio_resid(uio);
737 error = uiomove((caddr_t) my_swap_page,
738 this_count,
739 uio);
740 if (error) {
741 break;
742 }
743 swap_count -= (prev_resid - uio_resid(uio));
744 }
745 if (my_swap_page != NULL) {
746 FREE(my_swap_page, M_TEMP);
747 my_swap_page = NULL;
748 }
749
750 return error;
751 }
752 /*
753 * Package up an I/O request on a vnode into a uio and do it.
754 */
755 int
756 vn_rdwr(
757 enum uio_rw rw,
758 struct vnode *vp,
759 caddr_t base,
760 int len,
761 off_t offset,
762 enum uio_seg segflg,
763 int ioflg,
764 kauth_cred_t cred,
765 int *aresid,
766 proc_t p)
767 {
768 int64_t resid;
769 int result;
770
771 result = vn_rdwr_64(rw,
772 vp,
773 (uint64_t)(uintptr_t)base,
774 (int64_t)len,
775 offset,
776 segflg,
777 ioflg,
778 cred,
779 &resid,
780 p);
781
782 /* "resid" should be bounded above by "len," which is an int */
783 if (aresid != NULL) {
784 *aresid = resid;
785 }
786
787 return result;
788 }
789
790
791 int
792 vn_rdwr_64(
793 enum uio_rw rw,
794 struct vnode *vp,
795 uint64_t base,
796 int64_t len,
797 off_t offset,
798 enum uio_seg segflg,
799 int ioflg,
800 kauth_cred_t cred,
801 int64_t *aresid,
802 proc_t p)
803 {
804 uio_t auio;
805 int spacetype;
806 struct vfs_context context;
807 int error=0;
808 char uio_buf[ UIO_SIZEOF(1) ];
809
810 context.vc_thread = current_thread();
811 context.vc_ucred = cred;
812
813 if (UIO_SEG_IS_USER_SPACE(segflg)) {
814 spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
815 }
816 else {
817 spacetype = UIO_SYSSPACE;
818 }
819 auio = uio_createwithbuffer(1, offset, spacetype, rw,
820 &uio_buf[0], sizeof(uio_buf));
821 uio_addiov(auio, base, len);
822
823 #if CONFIG_MACF
824 /* XXXMAC
825 * IO_NOAUTH should be re-examined.
826 * Likely that mediation should be performed in caller.
827 */
828 if ((ioflg & IO_NOAUTH) == 0) {
829 /* passed cred is fp->f_cred */
830 if (rw == UIO_READ)
831 error = mac_vnode_check_read(&context, cred, vp);
832 else
833 error = mac_vnode_check_write(&context, cred, vp);
834 }
835 #endif
836
837 if (error == 0) {
838 if (rw == UIO_READ) {
839 if (vnode_isswap(vp)) {
840 error = vn_read_swapfile(vp, auio);
841 } else {
842 error = VNOP_READ(vp, auio, ioflg, &context);
843 }
844 } else {
845 error = VNOP_WRITE(vp, auio, ioflg, &context);
846 }
847 }
848
849 if (aresid)
850 *aresid = uio_resid(auio);
851 else
852 if (uio_resid(auio) && error == 0)
853 error = EIO;
854 return (error);
855 }
856
857 /*
858 * File table vnode read routine.
859 */
860 static int
861 vn_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
862 {
863 struct vnode *vp;
864 int error, ioflag;
865 off_t count;
866
867 vp = (struct vnode *)fp->f_fglob->fg_data;
868 if ( (error = vnode_getwithref(vp)) ) {
869 return(error);
870 }
871
872 #if CONFIG_MACF
873 error = mac_vnode_check_read(ctx, vfs_context_ucred(ctx), vp);
874 if (error) {
875 (void)vnode_put(vp);
876 return (error);
877 }
878 #endif
879
880 ioflag = 0;
881 if (fp->f_fglob->fg_flag & FNONBLOCK)
882 ioflag |= IO_NDELAY;
883 if ((fp->f_fglob->fg_flag & FNOCACHE) || vnode_isnocache(vp))
884 ioflag |= IO_NOCACHE;
885 if (fp->f_fglob->fg_flag & FNORDAHEAD)
886 ioflag |= IO_RAOFF;
887
888 if ((flags & FOF_OFFSET) == 0)
889 uio->uio_offset = fp->f_fglob->fg_offset;
890 count = uio_resid(uio);
891
892 if (vnode_isswap(vp)) {
893 /* special case for swap files */
894 error = vn_read_swapfile(vp, uio);
895 } else {
896 error = VNOP_READ(vp, uio, ioflag, ctx);
897 }
898 if ((flags & FOF_OFFSET) == 0)
899 fp->f_fglob->fg_offset += count - uio_resid(uio);
900
901 (void)vnode_put(vp);
902 return (error);
903 }
904
905
906 /*
907 * File table vnode write routine.
908 */
909 static int
910 vn_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
911 {
912 struct vnode *vp;
913 int error, ioflag;
914 off_t count;
915 int clippedsize = 0;
916 int partialwrite=0;
917 int residcount, oldcount;
918 proc_t p = vfs_context_proc(ctx);
919
920 count = 0;
921 vp = (struct vnode *)fp->f_fglob->fg_data;
922 if ( (error = vnode_getwithref(vp)) ) {
923 return(error);
924 }
925
926 #if CONFIG_MACF
927 error = mac_vnode_check_write(ctx, vfs_context_ucred(ctx), vp);
928 if (error) {
929 (void)vnode_put(vp);
930 return (error);
931 }
932 #endif
933
934 ioflag = IO_UNIT;
935 if (vp->v_type == VREG && (fp->f_fglob->fg_flag & O_APPEND))
936 ioflag |= IO_APPEND;
937 if (fp->f_fglob->fg_flag & FNONBLOCK)
938 ioflag |= IO_NDELAY;
939 if ((fp->f_fglob->fg_flag & FNOCACHE) || vnode_isnocache(vp))
940 ioflag |= IO_NOCACHE;
941 if (fp->f_fglob->fg_flag & FNODIRECT)
942 ioflag |= IO_NODIRECT;
943
944 /*
945 * Treat synchronous mounts and O_FSYNC on the fd as equivalent.
946 *
947 * XXX We treat O_DSYNC as O_FSYNC for now, since we can not delay
948 * XXX the non-essential metadata without some additional VFS work;
949 * XXX the intent at this point is to plumb the interface for it.
950 */
951 if ((fp->f_fglob->fg_flag & (O_FSYNC|O_DSYNC)) ||
952 (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS))) {
953 ioflag |= IO_SYNC;
954 }
955
956 if ((flags & FOF_OFFSET) == 0) {
957 uio->uio_offset = fp->f_fglob->fg_offset;
958 count = uio_resid(uio);
959 }
960 if (((flags & FOF_OFFSET) == 0) &&
961 vfs_context_proc(ctx) && (vp->v_type == VREG) &&
962 (((rlim_t)(uio->uio_offset + uio_resid(uio)) > p->p_rlimit[RLIMIT_FSIZE].rlim_cur) ||
963 ((rlim_t)uio_resid(uio) > (p->p_rlimit[RLIMIT_FSIZE].rlim_cur - uio->uio_offset)))) {
964 /*
965 * If the requested residual would cause us to go past the
966 * administrative limit, then we need to adjust the residual
967 * down to cause fewer bytes than requested to be written. If
968 * we can't do that (e.g. the residual is already 1 byte),
969 * then we fail the write with EFBIG.
970 */
971 residcount = uio_resid(uio);
972 if ((rlim_t)(uio->uio_offset + uio_resid(uio)) > p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
973 clippedsize = (uio->uio_offset + uio_resid(uio)) - p->p_rlimit[RLIMIT_FSIZE].rlim_cur;
974 } else if ((rlim_t)uio_resid(uio) > (p->p_rlimit[RLIMIT_FSIZE].rlim_cur - uio->uio_offset)) {
975 clippedsize = (p->p_rlimit[RLIMIT_FSIZE].rlim_cur - uio->uio_offset);
976 }
977 if (clippedsize >= residcount) {
978 psignal(p, SIGXFSZ);
979 vnode_put(vp);
980 return (EFBIG);
981 }
982 partialwrite = 1;
983 uio_setresid(uio, residcount-clippedsize);
984 }
985 if ((flags & FOF_OFFSET) != 0) {
986 /* for pwrite, append should be ignored */
987 ioflag &= ~IO_APPEND;
988 if (p && (vp->v_type == VREG) &&
989 ((rlim_t)uio->uio_offset >= p->p_rlimit[RLIMIT_FSIZE].rlim_cur)) {
990 psignal(p, SIGXFSZ);
991 vnode_put(vp);
992 return (EFBIG);
993 }
994 if (p && (vp->v_type == VREG) &&
995 ((rlim_t)(uio->uio_offset + uio_resid(uio)) > p->p_rlimit[RLIMIT_FSIZE].rlim_cur)) {
996 //Debugger("vn_bwrite:overstepping the bounds");
997 residcount = uio_resid(uio);
998 clippedsize = (uio->uio_offset + uio_resid(uio)) - p->p_rlimit[RLIMIT_FSIZE].rlim_cur;
999 partialwrite = 1;
1000 uio_setresid(uio, residcount-clippedsize);
1001 }
1002 }
1003
1004 error = VNOP_WRITE(vp, uio, ioflag, ctx);
1005
1006 if (partialwrite) {
1007 oldcount = uio_resid(uio);
1008 uio_setresid(uio, oldcount + clippedsize);
1009 }
1010
1011 if ((flags & FOF_OFFSET) == 0) {
1012 if (ioflag & IO_APPEND)
1013 fp->f_fglob->fg_offset = uio->uio_offset;
1014 else
1015 fp->f_fglob->fg_offset += count - uio_resid(uio);
1016 }
1017
1018 /*
1019 * Set the credentials on successful writes
1020 */
1021 if ((error == 0) && (vp->v_tag == VT_NFS) && (UBCINFOEXISTS(vp))) {
1022 /*
1023 * When called from aio subsystem, we only have the proc from
1024 * which to get the credential, at this point, so use that
1025 * instead. This means aio functions are incompatible with
1026 * per-thread credentials (aio operations are proxied). We
1027 * can't easily correct the aio vs. settid race in this case
1028 * anyway, so we disallow it.
1029 */
1030 if ((flags & FOF_PCRED) == 0) {
1031 ubc_setthreadcred(vp, p, current_thread());
1032 } else {
1033 ubc_setcred(vp, p);
1034 }
1035 }
1036 (void)vnode_put(vp);
1037 return (error);
1038 }
1039
1040 /*
1041 * File table vnode stat routine.
1042 *
1043 * Returns: 0 Success
1044 * EBADF
1045 * ENOMEM
1046 * vnode_getattr:???
1047 */
1048 int
1049 vn_stat_noauth(struct vnode *vp, void *sbptr, kauth_filesec_t *xsec, int isstat64, vfs_context_t ctx)
1050 {
1051 struct vnode_attr va;
1052 int error;
1053 u_short mode;
1054 kauth_filesec_t fsec;
1055 struct stat *sb = (struct stat *)0; /* warning avoidance ; protected by isstat64 */
1056 struct stat64 * sb64 = (struct stat64 *)0; /* warning avoidance ; protected by isstat64 */
1057
1058 if (isstat64 != 0)
1059 sb64 = (struct stat64 *)sbptr;
1060 else
1061 sb = (struct stat *)sbptr;
1062 memset(&va, 0, sizeof(va));
1063 VATTR_INIT(&va);
1064 VATTR_WANTED(&va, va_fsid);
1065 VATTR_WANTED(&va, va_fileid);
1066 VATTR_WANTED(&va, va_mode);
1067 VATTR_WANTED(&va, va_type);
1068 VATTR_WANTED(&va, va_nlink);
1069 VATTR_WANTED(&va, va_uid);
1070 VATTR_WANTED(&va, va_gid);
1071 VATTR_WANTED(&va, va_rdev);
1072 VATTR_WANTED(&va, va_data_size);
1073 VATTR_WANTED(&va, va_access_time);
1074 VATTR_WANTED(&va, va_modify_time);
1075 VATTR_WANTED(&va, va_change_time);
1076 VATTR_WANTED(&va, va_create_time);
1077 VATTR_WANTED(&va, va_flags);
1078 VATTR_WANTED(&va, va_gen);
1079 VATTR_WANTED(&va, va_iosize);
1080 /* lower layers will synthesise va_total_alloc from va_data_size if required */
1081 VATTR_WANTED(&va, va_total_alloc);
1082 if (xsec != NULL) {
1083 VATTR_WANTED(&va, va_uuuid);
1084 VATTR_WANTED(&va, va_guuid);
1085 VATTR_WANTED(&va, va_acl);
1086 }
1087 error = vnode_getattr(vp, &va, ctx);
1088 if (error)
1089 goto out;
1090 /*
1091 * Copy from vattr table
1092 */
1093 if (isstat64 != 0) {
1094 sb64->st_dev = va.va_fsid;
1095 sb64->st_ino = (ino64_t)va.va_fileid;
1096
1097 } else {
1098 sb->st_dev = va.va_fsid;
1099 sb->st_ino = (ino_t)va.va_fileid;
1100 }
1101 mode = va.va_mode;
1102 switch (vp->v_type) {
1103 case VREG:
1104 mode |= S_IFREG;
1105 break;
1106 case VDIR:
1107 mode |= S_IFDIR;
1108 break;
1109 case VBLK:
1110 mode |= S_IFBLK;
1111 break;
1112 case VCHR:
1113 mode |= S_IFCHR;
1114 break;
1115 case VLNK:
1116 mode |= S_IFLNK;
1117 break;
1118 case VSOCK:
1119 mode |= S_IFSOCK;
1120 break;
1121 case VFIFO:
1122 mode |= S_IFIFO;
1123 break;
1124 default:
1125 error = EBADF;
1126 goto out;
1127 };
1128 if (isstat64 != 0) {
1129 sb64->st_mode = mode;
1130 sb64->st_nlink = VATTR_IS_SUPPORTED(&va, va_nlink) ? (u_int16_t)va.va_nlink : 1;
1131 sb64->st_uid = va.va_uid;
1132 sb64->st_gid = va.va_gid;
1133 sb64->st_rdev = va.va_rdev;
1134 sb64->st_size = va.va_data_size;
1135 sb64->st_atimespec = va.va_access_time;
1136 sb64->st_mtimespec = va.va_modify_time;
1137 sb64->st_ctimespec = va.va_change_time;
1138 sb64->st_birthtimespec =
1139 VATTR_IS_SUPPORTED(&va, va_create_time) ? va.va_create_time : va.va_change_time;
1140 sb64->st_blksize = va.va_iosize;
1141 sb64->st_flags = va.va_flags;
1142 sb64->st_blocks = roundup(va.va_total_alloc, 512) / 512;
1143 } else {
1144 sb->st_mode = mode;
1145 sb->st_nlink = VATTR_IS_SUPPORTED(&va, va_nlink) ? (u_int16_t)va.va_nlink : 1;
1146 sb->st_uid = va.va_uid;
1147 sb->st_gid = va.va_gid;
1148 sb->st_rdev = va.va_rdev;
1149 sb->st_size = va.va_data_size;
1150 sb->st_atimespec = va.va_access_time;
1151 sb->st_mtimespec = va.va_modify_time;
1152 sb->st_ctimespec = va.va_change_time;
1153 sb->st_blksize = va.va_iosize;
1154 sb->st_flags = va.va_flags;
1155 sb->st_blocks = roundup(va.va_total_alloc, 512) / 512;
1156 }
1157
1158 /* if we're interested in extended security data and we got an ACL */
1159 if (xsec != NULL) {
1160 if (!VATTR_IS_SUPPORTED(&va, va_acl) &&
1161 !VATTR_IS_SUPPORTED(&va, va_uuuid) &&
1162 !VATTR_IS_SUPPORTED(&va, va_guuid)) {
1163 *xsec = KAUTH_FILESEC_NONE;
1164 } else {
1165
1166 if (VATTR_IS_SUPPORTED(&va, va_acl) && (va.va_acl != NULL)) {
1167 fsec = kauth_filesec_alloc(va.va_acl->acl_entrycount);
1168 } else {
1169 fsec = kauth_filesec_alloc(0);
1170 }
1171 if (fsec == NULL) {
1172 error = ENOMEM;
1173 goto out;
1174 }
1175 fsec->fsec_magic = KAUTH_FILESEC_MAGIC;
1176 if (VATTR_IS_SUPPORTED(&va, va_uuuid)) {
1177 fsec->fsec_owner = va.va_uuuid;
1178 } else {
1179 fsec->fsec_owner = kauth_null_guid;
1180 }
1181 if (VATTR_IS_SUPPORTED(&va, va_guuid)) {
1182 fsec->fsec_group = va.va_guuid;
1183 } else {
1184 fsec->fsec_group = kauth_null_guid;
1185 }
1186 if (VATTR_IS_SUPPORTED(&va, va_acl) && (va.va_acl != NULL)) {
1187 bcopy(va.va_acl, &(fsec->fsec_acl), KAUTH_ACL_COPYSIZE(va.va_acl));
1188 } else {
1189 fsec->fsec_acl.acl_entrycount = KAUTH_FILESEC_NOACL;
1190 }
1191 *xsec = fsec;
1192 }
1193 }
1194
1195 /* Do not give the generation number out to unpriviledged users */
1196 if (va.va_gen && !vfs_context_issuser(ctx)) {
1197 if (isstat64 != 0)
1198 sb64->st_gen = 0;
1199 else
1200 sb->st_gen = 0;
1201 } else {
1202 if (isstat64 != 0)
1203 sb64->st_gen = va.va_gen;
1204 else
1205 sb->st_gen = va.va_gen;
1206 }
1207
1208 error = 0;
1209 out:
1210 if (VATTR_IS_SUPPORTED(&va, va_acl) && va.va_acl != NULL)
1211 kauth_acl_free(va.va_acl);
1212 return (error);
1213 }
1214
1215 int
1216 vn_stat(struct vnode *vp, void *sb, kauth_filesec_t *xsec, int isstat64, vfs_context_t ctx)
1217 {
1218 int error;
1219
1220 #if CONFIG_MACF
1221 error = mac_vnode_check_stat(ctx, NOCRED, vp);
1222 if (error)
1223 return (error);
1224 #endif
1225
1226 /* authorize */
1227 if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_ATTRIBUTES | KAUTH_VNODE_READ_SECURITY, ctx)) != 0)
1228 return(error);
1229
1230 /* actual stat */
1231 return(vn_stat_noauth(vp, sb, xsec, isstat64, ctx));
1232 }
1233
1234
1235 /*
1236 * File table vnode ioctl routine.
1237 */
1238 static int
1239 vn_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx)
1240 {
1241 struct vnode *vp = ((struct vnode *)fp->f_fglob->fg_data);
1242 off_t file_size;
1243 int error;
1244 struct vnode *ttyvp;
1245 int funnel_state;
1246 struct session * sessp;
1247
1248 if ( (error = vnode_getwithref(vp)) ) {
1249 return(error);
1250 }
1251
1252 #if CONFIG_MACF
1253 error = mac_vnode_check_ioctl(ctx, vp, com);
1254 if (error)
1255 goto out;
1256 #endif
1257
1258 switch (vp->v_type) {
1259 case VREG:
1260 case VDIR:
1261 if (com == FIONREAD) {
1262 if ((error = vnode_size(vp, &file_size, ctx)) != 0)
1263 goto out;
1264 *(int *)data = file_size - fp->f_fglob->fg_offset;
1265 goto out;
1266 }
1267 if (com == FIONBIO || com == FIOASYNC) { /* XXX */
1268 goto out;
1269 }
1270 /* fall into ... */
1271
1272 default:
1273 error = ENOTTY;
1274 goto out;
1275
1276 case VFIFO:
1277 case VCHR:
1278 case VBLK:
1279
1280 /* Should not be able to set block size from user space */
1281 if (com == DKIOCSETBLOCKSIZE) {
1282 error = EPERM;
1283 goto out;
1284 }
1285
1286 if (com == FIODTYPE) {
1287 if (vp->v_type == VBLK) {
1288 if (major(vp->v_rdev) >= nblkdev) {
1289 error = ENXIO;
1290 goto out;
1291 }
1292 *(int *)data = bdevsw[major(vp->v_rdev)].d_type;
1293
1294 } else if (vp->v_type == VCHR) {
1295 if (major(vp->v_rdev) >= nchrdev) {
1296 error = ENXIO;
1297 goto out;
1298 }
1299 *(int *)data = cdevsw[major(vp->v_rdev)].d_type;
1300 } else {
1301 error = ENOTTY;
1302 goto out;
1303 }
1304 goto out;
1305 }
1306 error = VNOP_IOCTL(vp, com, data, fp->f_fglob->fg_flag, ctx);
1307
1308 if (error == 0 && com == TIOCSCTTY) {
1309 error = vnode_ref_ext(vp, 0, VNODE_REF_FORCE);
1310 if (error != 0) {
1311 panic("vnode_ref_ext() failed despite VNODE_REF_FORCE?!");
1312 }
1313
1314 funnel_state = thread_funnel_set(kernel_flock, TRUE);
1315 sessp = proc_session(vfs_context_proc(ctx));
1316
1317 session_lock(sessp);
1318 ttyvp = sessp->s_ttyvp;
1319 sessp->s_ttyvp = vp;
1320 sessp->s_ttyvid = vnode_vid(vp);
1321 session_unlock(sessp);
1322 session_rele(sessp);
1323 thread_funnel_set(kernel_flock, funnel_state);
1324
1325 if (ttyvp)
1326 vnode_rele(ttyvp);
1327 }
1328 }
1329 out:
1330 (void)vnode_put(vp);
1331 return(error);
1332 }
1333
1334 /*
1335 * File table vnode select routine.
1336 */
1337 static int
1338 vn_select(struct fileproc *fp, int which, void *wql, __unused vfs_context_t ctx)
1339 {
1340 int error;
1341 struct vnode * vp = (struct vnode *)fp->f_fglob->fg_data;
1342 struct vfs_context context;
1343
1344 if ( (error = vnode_getwithref(vp)) == 0 ) {
1345 context.vc_thread = current_thread();
1346 context.vc_ucred = fp->f_fglob->fg_cred;
1347
1348 #if CONFIG_MACF
1349 /*
1350 * XXX We should use a per thread credential here; minimally,
1351 * XXX the process credential should have a persistent
1352 * XXX reference on it before being passed in here.
1353 */
1354 error = mac_vnode_check_select(ctx, vp, which);
1355 if (error == 0)
1356 #endif
1357 error = VNOP_SELECT(vp, which, fp->f_fglob->fg_flag, wql, ctx);
1358
1359 (void)vnode_put(vp);
1360 }
1361 return(error);
1362
1363 }
1364
1365 /*
1366 * File table vnode close routine.
1367 */
1368 static int
1369 vn_closefile(struct fileglob *fg, vfs_context_t ctx)
1370 {
1371 struct vnode *vp = (struct vnode *)fg->fg_data;
1372 int error;
1373 struct flock lf;
1374
1375 if ( (error = vnode_getwithref(vp)) == 0 ) {
1376
1377 if ((fg->fg_flag & FHASLOCK) && fg->fg_type == DTYPE_VNODE) {
1378 lf.l_whence = SEEK_SET;
1379 lf.l_start = 0;
1380 lf.l_len = 0;
1381 lf.l_type = F_UNLCK;
1382
1383 (void)VNOP_ADVLOCK(vp, (caddr_t)fg, F_UNLCK, &lf, F_FLOCK, ctx);
1384 }
1385 error = vn_close(vp, fg->fg_flag, ctx);
1386
1387 (void)vnode_put(vp);
1388 }
1389 return(error);
1390 }
1391
1392 /*
1393 * Returns: 0 Success
1394 * VNOP_PATHCONF:???
1395 */
1396 int
1397 vn_pathconf(vnode_t vp, int name, int32_t *retval, vfs_context_t ctx)
1398 {
1399 int error = 0;
1400 struct vfs_attr vfa;
1401
1402 switch(name) {
1403 case _PC_EXTENDED_SECURITY_NP:
1404 *retval = vfs_extendedsecurity(vnode_mount(vp)) ? 1 : 0;
1405 break;
1406 case _PC_AUTH_OPAQUE_NP:
1407 *retval = vfs_authopaque(vnode_mount(vp));
1408 break;
1409 case _PC_2_SYMLINKS:
1410 *retval = 1; /* XXX NOTSUP on MSDOS, etc. */
1411 break;
1412 case _PC_ALLOC_SIZE_MIN:
1413 *retval = 1; /* XXX lie: 1 byte */
1414 break;
1415 case _PC_ASYNC_IO: /* unistd.h: _POSIX_ASYNCHRONUS_IO */
1416 *retval = 1; /* [AIO] option is supported */
1417 break;
1418 case _PC_PRIO_IO: /* unistd.h: _POSIX_PRIORITIZED_IO */
1419 *retval = 0; /* [PIO] option is not supported */
1420 break;
1421 case _PC_REC_INCR_XFER_SIZE:
1422 *retval = 4096; /* XXX go from MIN to MAX 4K at a time */
1423 break;
1424 case _PC_REC_MIN_XFER_SIZE:
1425 *retval = 4096; /* XXX recommend 4K minimum reads/writes */
1426 break;
1427 case _PC_REC_MAX_XFER_SIZE:
1428 *retval = 65536; /* XXX recommend 64K maximum reads/writes */
1429 break;
1430 case _PC_REC_XFER_ALIGN:
1431 *retval = 4096; /* XXX recommend page aligned buffers */
1432 break;
1433 case _PC_SYMLINK_MAX:
1434 *retval = 255; /* Minimum acceptable POSIX value */
1435 break;
1436 case _PC_SYNC_IO: /* unistd.h: _POSIX_SYNCHRONIZED_IO */
1437 *retval = 0; /* [SIO] option is not supported */
1438 break;
1439 case _PC_XATTR_SIZE_BITS:
1440 /* The number of bits used to store maximum extended
1441 * attribute size in bytes. For example, if the maximum
1442 * attribute size supported by a file system is 128K, the
1443 * value returned will be 18. However a value 18 can mean
1444 * that the maximum attribute size can be anywhere from
1445 * (256KB - 1) to 128KB. As a special case, the resource
1446 * fork can have much larger size, and some file system
1447 * specific extended attributes can have smaller and preset
1448 * size; for example, Finder Info is always 32 bytes.
1449 */
1450 memset(&vfa, 0, sizeof(vfa));
1451 VFSATTR_INIT(&vfa);
1452 VFSATTR_WANTED(&vfa, f_capabilities);
1453 if (vfs_getattr(vnode_mount(vp), &vfa, ctx) == 0 &&
1454 (VFSATTR_IS_SUPPORTED(&vfa, f_capabilities)) &&
1455 (vfa.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR) &&
1456 (vfa.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR)) {
1457 /* Supports native extended attributes */
1458 error = VNOP_PATHCONF(vp, name, retval, ctx);
1459 } else {
1460 /* Number of bits used to represent the maximum size of
1461 * extended attribute stored in an Apple Double file.
1462 */
1463 *retval = AD_XATTR_SIZE_BITS;
1464 }
1465 break;
1466 default:
1467 error = VNOP_PATHCONF(vp, name, retval, ctx);
1468 break;
1469 }
1470
1471 return (error);
1472 }
1473
1474 static int
1475 vn_kqfilt_add(struct fileproc *fp, struct knote *kn, vfs_context_t ctx)
1476 {
1477 int error;
1478 struct vnode *vp;
1479
1480 vp = (struct vnode *)fp->f_fglob->fg_data;
1481
1482 /*
1483 * Don't attach a knote to a dead vnode.
1484 */
1485 if ((error = vget_internal(vp, 0, VNODE_NODEAD)) == 0) {
1486 switch (kn->kn_filter) {
1487 case EVFILT_READ:
1488 case EVFILT_WRITE:
1489 if (vnode_isfifo(vp)) {
1490 /* We'll only watch FIFOs that use our fifofs */
1491 if (!(vp->v_fifoinfo && vp->v_fifoinfo->fi_readsock)) {
1492 error = ENOTSUP;
1493 }
1494
1495 } else if (!vnode_isreg(vp)) {
1496 if (vnode_ischr(vp) &&
1497 (error = spec_kqfilter(vp, kn)) == 0) {
1498 /* claimed by a special device */
1499 vnode_put(vp);
1500 return 0;
1501 }
1502
1503 error = EINVAL;
1504 }
1505 break;
1506 case EVFILT_VNODE:
1507 break;
1508 default:
1509 error = EINVAL;
1510 }
1511
1512 if (error) {
1513 vnode_put(vp);
1514 return error;
1515 }
1516
1517 #if CONFIG_MACF
1518 error = mac_vnode_check_kqfilter(ctx, fp->f_fglob->fg_cred, kn, vp);
1519 if (error) {
1520 vnode_put(vp);
1521 return error;
1522 }
1523 #endif
1524
1525 kn->kn_hook = (void*)vp;
1526 kn->kn_hookid = vnode_vid(vp);
1527 kn->kn_fop = &vnode_filtops;
1528
1529 vnode_lock(vp);
1530 KNOTE_ATTACH(&vp->v_knotes, kn);
1531 vnode_unlock(vp);
1532
1533 /* Ask the filesystem to provide remove notifications, but ignore failure */
1534 VNOP_MONITOR(vp, 0, VNODE_MONITOR_BEGIN, (void*) kn, ctx);
1535
1536 vnode_put(vp);
1537 }
1538
1539 return (error);
1540 }
1541
1542 static void
1543 filt_vndetach(struct knote *kn)
1544 {
1545 vfs_context_t ctx = vfs_context_current();
1546 struct vnode *vp;
1547 vp = (struct vnode *)kn->kn_hook;
1548 if (vnode_getwithvid(vp, kn->kn_hookid))
1549 return;
1550
1551 vnode_lock(vp);
1552 KNOTE_DETACH(&vp->v_knotes, kn);
1553 vnode_unlock(vp);
1554
1555 /*
1556 * Tell a (generally networked) filesystem that we're no longer watching
1557 * If the FS wants to track contexts, it should still be using the one from
1558 * the VNODE_MONITOR_BEGIN.
1559 */
1560 VNOP_MONITOR(vp, 0, VNODE_MONITOR_END, (void*)kn, ctx);
1561 vnode_put(vp);
1562 }
1563
1564
1565 /*
1566 * Used for EVFILT_READ
1567 *
1568 * Takes only VFIFO or VREG. vnode is locked. We handle the "poll" case
1569 * differently than the regular case for VREG files. If not in poll(),
1570 * then we need to know current fileproc offset for VREG.
1571 */
1572 static intptr_t
1573 vnode_readable_data_count(vnode_t vp, off_t current_offset, int ispoll)
1574 {
1575 if (vnode_isfifo(vp)) {
1576 int cnt;
1577 int err = fifo_charcount(vp, &cnt);
1578 if (err == 0) {
1579 return (intptr_t)cnt;
1580 } else {
1581 return (intptr_t)0;
1582 }
1583 } else if (vnode_isreg(vp)) {
1584 if (ispoll) {
1585 return (intptr_t)1;
1586 }
1587
1588 off_t amount;
1589 amount = vp->v_un.vu_ubcinfo->ui_size - current_offset;
1590 if (amount > (off_t)INTPTR_MAX) {
1591 return INTPTR_MAX;
1592 } else if (amount < (off_t)INTPTR_MIN) {
1593 return INTPTR_MIN;
1594 } else {
1595 return (intptr_t)amount;
1596 }
1597 } else {
1598 panic("Should never have an EVFILT_READ except for reg or fifo.");
1599 return 0;
1600 }
1601 }
1602
1603 /*
1604 * Used for EVFILT_WRITE.
1605 *
1606 * For regular vnodes, we can always write (1). For named pipes,
1607 * see how much space there is in the buffer. Nothing else is covered.
1608 */
1609 static intptr_t
1610 vnode_writable_space_count(vnode_t vp)
1611 {
1612 if (vnode_isfifo(vp)) {
1613 long spc;
1614 int err = fifo_freespace(vp, &spc);
1615 if (err == 0) {
1616 return (intptr_t)spc;
1617 } else {
1618 return (intptr_t)0;
1619 }
1620 } else if (vnode_isreg(vp)) {
1621 return (intptr_t)1;
1622 } else {
1623 panic("Should never have an EVFILT_READ except for reg or fifo.");
1624 return 0;
1625 }
1626 }
1627
1628 /*
1629 * Determine whether this knote should be active
1630 *
1631 * This is kind of subtle.
1632 * --First, notice if the vnode has been revoked: in so, override hint
1633 * --EVFILT_READ knotes are checked no matter what the hint is
1634 * --Other knotes activate based on hint.
1635 * --If hint is revoke, set special flags and activate
1636 */
1637 static int
1638 filt_vnode(struct knote *kn, long hint)
1639 {
1640 vnode_t vp = (struct vnode *)kn->kn_hook;
1641 int activate = 0;
1642 long orig_hint = hint;
1643
1644 if (0 == hint) {
1645 vnode_lock(vp);
1646
1647 if (vnode_getiocount(vp, kn->kn_hookid, VNODE_NODEAD | VNODE_WITHID) != 0) {
1648 /* Is recycled */
1649 hint = NOTE_REVOKE;
1650 }
1651 } else {
1652 lck_mtx_assert(&vp->v_lock, LCK_MTX_ASSERT_OWNED);
1653 }
1654
1655 /* Special handling for vnodes that are in recycle or already gone */
1656 if (NOTE_REVOKE == hint) {
1657 kn->kn_flags |= (EV_EOF | EV_ONESHOT);
1658 activate = 1;
1659
1660 if ((kn->kn_filter == EVFILT_VNODE) && (kn->kn_sfflags & NOTE_REVOKE)) {
1661 kn->kn_fflags |= NOTE_REVOKE;
1662 }
1663 } else {
1664 switch(kn->kn_filter) {
1665 case EVFILT_READ:
1666 kn->kn_data = vnode_readable_data_count(vp, kn->kn_fp->f_fglob->fg_offset, (kn->kn_flags & EV_POLL));
1667
1668 if (kn->kn_data != 0) {
1669 activate = 1;
1670 }
1671 break;
1672 case EVFILT_WRITE:
1673 kn->kn_data = vnode_writable_space_count(vp);
1674
1675 if (kn->kn_data != 0) {
1676 activate = 1;
1677 }
1678 break;
1679 case EVFILT_VNODE:
1680 /* Check events this note matches against the hint */
1681 if (kn->kn_sfflags & hint) {
1682 kn->kn_fflags |= hint; /* Set which event occurred */
1683 }
1684 if (kn->kn_fflags != 0) {
1685 activate = 1;
1686 }
1687 break;
1688 default:
1689 panic("Invalid knote filter on a vnode!\n");
1690 }
1691 }
1692
1693 if (orig_hint == 0) {
1694 /*
1695 * Definitely need to unlock, may need to put
1696 */
1697 if (hint == 0) {
1698 vnode_put_locked(vp);
1699 }
1700 vnode_unlock(vp);
1701 }
1702
1703 return (activate);
1704 }