]> git.saurik.com Git - apple/xnu.git/blob - bsd/vfs/vfs_vnops.c
xnu-2050.7.9.tar.gz
[apple/xnu.git] / bsd / vfs / vfs_vnops.c
1 /*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30 * Copyright (c) 1982, 1986, 1989, 1993
31 * The Regents of the University of California. All rights reserved.
32 * (c) UNIX System Laboratories, Inc.
33 * All or some portions of this file are derived from material licensed
34 * to the University of California by American Telephone and Telegraph
35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
36 * the permission of UNIX System Laboratories, Inc.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software
47 * must display the following acknowledgement:
48 * This product includes software developed by the University of
49 * California, Berkeley and its contributors.
50 * 4. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 * @(#)vfs_vnops.c 8.14 (Berkeley) 6/15/95
67 *
68 */
69 /*
70 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
71 * support for mandatory and extensible security protections. This notice
72 * is included in support of clause 2.2 (b) of the Apple Public License,
73 * Version 2.0.
74 */
75
76 #include <sys/param.h>
77 #include <sys/types.h>
78 #include <sys/systm.h>
79 #include <sys/kernel.h>
80 #include <sys/file_internal.h>
81 #include <sys/stat.h>
82 #include <sys/proc_internal.h>
83 #include <sys/kauth.h>
84 #include <sys/mount_internal.h>
85 #include <sys/namei.h>
86 #include <sys/vnode_internal.h>
87 #include <sys/ioctl.h>
88 #include <sys/tty.h>
89 /* Temporary workaround for ubc.h until <rdar://4714366 is resolved */
90 #define ubc_setcred ubc_setcred_deprecated
91 #include <sys/ubc.h>
92 #undef ubc_setcred
93 int ubc_setcred(struct vnode *, struct proc *);
94 #include <sys/conf.h>
95 #include <sys/disk.h>
96 #include <sys/fsevents.h>
97 #include <sys/kdebug.h>
98 #include <sys/xattr.h>
99 #include <sys/ubc_internal.h>
100 #include <sys/uio_internal.h>
101 #include <sys/resourcevar.h>
102 #include <sys/signalvar.h>
103
104 #include <vm/vm_kern.h>
105 #include <vm/vm_map.h>
106
107 #include <miscfs/specfs/specdev.h>
108 #include <miscfs/fifofs/fifo.h>
109
110 #if CONFIG_MACF
111 #include <security/mac_framework.h>
112 #endif
113
114 #if CONFIG_PROTECT
115 #include <sys/cprotect.h>
116 #endif
117
118
119 static int vn_closefile(struct fileglob *fp, vfs_context_t ctx);
120 static int vn_ioctl(struct fileproc *fp, u_long com, caddr_t data,
121 vfs_context_t ctx);
122 static int vn_read(struct fileproc *fp, struct uio *uio, int flags,
123 vfs_context_t ctx);
124 static int vn_write(struct fileproc *fp, struct uio *uio, int flags,
125 vfs_context_t ctx);
126 static int vn_select( struct fileproc *fp, int which, void * wql,
127 vfs_context_t ctx);
128 static int vn_kqfilt_add(struct fileproc *fp, struct knote *kn,
129 vfs_context_t ctx);
130 static void filt_vndetach(struct knote *kn);
131 static int filt_vnode(struct knote *kn, long hint);
132 static int vn_open_auth_finish(vnode_t vp, int fmode, vfs_context_t ctx);
133 #if 0
134 static int vn_kqfilt_remove(struct vnode *vp, uintptr_t ident,
135 vfs_context_t ctx);
136 #endif
137
138 struct fileops vnops =
139 { vn_read, vn_write, vn_ioctl, vn_select, vn_closefile, vn_kqfilt_add, NULL };
140
141 struct filterops vnode_filtops = {
142 .f_isfd = 1,
143 .f_attach = NULL,
144 .f_detach = filt_vndetach,
145 .f_event = filt_vnode
146 };
147
148 /*
149 * Common code for vnode open operations.
150 * Check permissions, and call the VNOP_OPEN or VNOP_CREATE routine.
151 *
152 * XXX the profusion of interfaces here is probably a bad thing.
153 */
154 int
155 vn_open(struct nameidata *ndp, int fmode, int cmode)
156 {
157 return(vn_open_modflags(ndp, &fmode, cmode));
158 }
159
160 int
161 vn_open_modflags(struct nameidata *ndp, int *fmodep, int cmode)
162 {
163 struct vnode_attr va;
164
165 VATTR_INIT(&va);
166 VATTR_SET(&va, va_mode, cmode);
167
168 return(vn_open_auth(ndp, fmodep, &va));
169 }
170
171 static int
172 vn_open_auth_finish(vnode_t vp, int fmode, vfs_context_t ctx)
173 {
174 int error;
175
176 if ((error = vnode_ref_ext(vp, fmode, 0)) != 0) {
177 goto bad;
178 }
179
180 /* call out to allow 3rd party notification of open.
181 * Ignore result of kauth_authorize_fileop call.
182 */
183 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN,
184 (uintptr_t)vp, 0);
185
186 return 0;
187
188 bad:
189 return error;
190
191 }
192
193 /*
194 * May do nameidone() to allow safely adding an FSEvent. Cue off of ni_dvp to
195 * determine whether that has happened.
196 */
197 static int
198 vn_open_auth_do_create(struct nameidata *ndp, struct vnode_attr *vap, int fmode, boolean_t *did_create, boolean_t *did_open, vfs_context_t ctx)
199 {
200 uint32_t status = 0;
201 vnode_t dvp = ndp->ni_dvp;
202 int batched;
203 int error;
204 vnode_t vp;
205
206 batched = vnode_compound_open_available(ndp->ni_dvp);
207 *did_open = FALSE;
208
209 VATTR_SET(vap, va_type, VREG);
210 if (fmode & O_EXCL)
211 vap->va_vaflags |= VA_EXCLUSIVE;
212
213 #if NAMEDRSRCFORK
214 if (ndp->ni_cnd.cn_flags & CN_WANTSRSRCFORK) {
215 if ((error = vn_authorize_create(dvp, &ndp->ni_cnd, vap, ctx, NULL)) != 0)
216 goto out;
217 if ((error = vnode_makenamedstream(dvp, &ndp->ni_vp, XATTR_RESOURCEFORK_NAME, 0, ctx)) != 0)
218 goto out;
219 *did_create = TRUE;
220 } else {
221 #endif
222 if (!batched) {
223 if ((error = vn_authorize_create(dvp, &ndp->ni_cnd, vap, ctx, NULL)) != 0)
224 goto out;
225 }
226
227 error = vn_create(dvp, &ndp->ni_vp, ndp, vap, VN_CREATE_DOOPEN, fmode, &status, ctx);
228 if (error != 0) {
229 if (batched) {
230 *did_create = (status & COMPOUND_OPEN_STATUS_DID_CREATE) ? TRUE : FALSE;
231 } else {
232 *did_create = FALSE;
233 }
234
235 if (error == EKEEPLOOKING) {
236 if (*did_create) {
237 panic("EKEEPLOOKING, but we did a create?");
238 }
239 if (!batched) {
240 panic("EKEEPLOOKING from filesystem that doesn't support compound vnops?");
241 }
242 if ((ndp->ni_flag & NAMEI_CONTLOOKUP) == 0) {
243 panic("EKEEPLOOKING, but continue flag not set?");
244 }
245
246 /*
247 * Do NOT drop the dvp: we need everything to continue the lookup.
248 */
249 return error;
250 }
251 } else {
252 if (batched) {
253 *did_create = (status & COMPOUND_OPEN_STATUS_DID_CREATE) ? 1 : 0;
254 *did_open = TRUE;
255 } else {
256 *did_create = TRUE;
257 }
258 }
259 #if NAMEDRSRCFORK
260 }
261 #endif
262
263 /*
264 * Unlock the fsnode (if locked) here so that we are free
265 * to drop the dvp iocount and prevent deadlock in build_path().
266 * nameidone() will still do the right thing later.
267 */
268 vp = ndp->ni_vp;
269 namei_unlock_fsnode(ndp);
270
271 if (*did_create) {
272 int update_flags = 0;
273
274 // Make sure the name & parent pointers are hooked up
275 if (vp->v_name == NULL)
276 update_flags |= VNODE_UPDATE_NAME;
277 if (vp->v_parent == NULLVP)
278 update_flags |= VNODE_UPDATE_PARENT;
279
280 if (update_flags)
281 vnode_update_identity(vp, dvp, ndp->ni_cnd.cn_nameptr, ndp->ni_cnd.cn_namelen, ndp->ni_cnd.cn_hash, update_flags);
282
283 vnode_put(dvp);
284 ndp->ni_dvp = NULLVP;
285
286 #if CONFIG_FSE
287 if (need_fsevent(FSE_CREATE_FILE, vp)) {
288 add_fsevent(FSE_CREATE_FILE, ctx,
289 FSE_ARG_VNODE, vp,
290 FSE_ARG_DONE);
291 }
292 #endif
293 }
294 out:
295 if (ndp->ni_dvp != NULLVP) {
296 vnode_put(dvp);
297 ndp->ni_dvp = NULLVP;
298 }
299
300 return error;
301 }
302
303 /*
304 * Open a file with authorization, updating the contents of the structures
305 * pointed to by ndp, fmodep, and vap as necessary to perform the requested
306 * operation. This function is used for both opens of existing files, and
307 * creation of new files.
308 *
309 * Parameters: ndp The nami data pointer describing the
310 * file
311 * fmodep A pointer to an int containg the mode
312 * information to be used for the open
313 * vap A pointer to the vnode attribute
314 * descriptor to be used for the open
315 *
316 * Indirect: * Contents of the data structures pointed
317 * to by the parameters are modified as
318 * necessary to the requested operation.
319 *
320 * Returns: 0 Success
321 * !0 errno value
322 *
323 * Notes: The kauth_filesec_t in 'vap', if any, is in host byte order.
324 *
325 * The contents of '*ndp' will be modified, based on the other
326 * arguments to this function, and to return file and directory
327 * data necessary to satisfy the requested operation.
328 *
329 * If the file does not exist and we are creating it, then the
330 * O_TRUNC flag will be cleared in '*fmodep' to indicate to the
331 * caller that the file was not truncated.
332 *
333 * If the file exists and the O_EXCL flag was not specified, then
334 * the O_CREAT flag will be cleared in '*fmodep' to indicate to
335 * the caller that the existing file was merely opened rather
336 * than created.
337 *
338 * The contents of '*vap' will be modified as necessary to
339 * complete the operation, including setting of supported
340 * attribute, clearing of fields containing unsupported attributes
341 * in the request, if the request proceeds without them, etc..
342 *
343 * XXX: This function is too complicated in actings on its arguments
344 *
345 * XXX: We should enummerate the possible errno values here, and where
346 * in the code they originated.
347 */
348 int
349 vn_open_auth(struct nameidata *ndp, int *fmodep, struct vnode_attr *vap)
350 {
351 struct vnode *vp;
352 struct vnode *dvp;
353 vfs_context_t ctx = ndp->ni_cnd.cn_context;
354 int error;
355 int fmode;
356 uint32_t origcnflags;
357 boolean_t did_create;
358 boolean_t did_open;
359 boolean_t need_vnop_open;
360 boolean_t batched;
361 boolean_t ref_failed;
362
363 again:
364 vp = NULL;
365 dvp = NULL;
366 batched = FALSE;
367 did_create = FALSE;
368 need_vnop_open = TRUE;
369 ref_failed = FALSE;
370 fmode = *fmodep;
371 origcnflags = ndp->ni_cnd.cn_flags;
372
373 /*
374 * O_CREAT
375 */
376 if (fmode & O_CREAT) {
377 if ( (fmode & O_DIRECTORY) ) {
378 error = EINVAL;
379 goto out;
380 }
381 ndp->ni_cnd.cn_nameiop = CREATE;
382 #if CONFIG_TRIGGERS
383 ndp->ni_op = OP_LINK;
384 #endif
385 /* Inherit USEDVP, vnode_open() supported flags only */
386 ndp->ni_cnd.cn_flags &= (USEDVP | NOCROSSMOUNT | DOWHITEOUT);
387 ndp->ni_cnd.cn_flags |= LOCKPARENT | LOCKLEAF | AUDITVNPATH1;
388 ndp->ni_flag = NAMEI_COMPOUNDOPEN;
389 #if NAMEDRSRCFORK
390 /* open calls are allowed for resource forks. */
391 ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
392 #endif
393 if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0 && (origcnflags & FOLLOW) != 0)
394 ndp->ni_cnd.cn_flags |= FOLLOW;
395
396 continue_create_lookup:
397 if ( (error = namei(ndp)) )
398 goto out;
399
400 dvp = ndp->ni_dvp;
401 vp = ndp->ni_vp;
402
403 batched = vnode_compound_open_available(dvp);
404
405 /* not found, create */
406 if (vp == NULL) {
407 /* must have attributes for a new file */
408 if (vap == NULL) {
409 error = EINVAL;
410 goto out;
411 }
412 /*
413 * Attempt a create. For a system supporting compound VNOPs, we may
414 * find an existing file or create one; in either case, we will already
415 * have the file open and no VNOP_OPEN() will be needed.
416 */
417 error = vn_open_auth_do_create(ndp, vap, fmode, &did_create, &did_open, ctx);
418
419 dvp = ndp->ni_dvp;
420 vp = ndp->ni_vp;
421
422 /*
423 * Detected a node that the filesystem couldn't handle. Don't call
424 * nameidone() yet, because we need that path buffer.
425 */
426 if (error == EKEEPLOOKING) {
427 if (!batched) {
428 panic("EKEEPLOOKING from a filesystem that doesn't support compound VNOPs?");
429 }
430 goto continue_create_lookup;
431 }
432
433 nameidone(ndp);
434 if (dvp) {
435 panic("Shouldn't have a dvp here.");
436 }
437
438 if (error) {
439 /*
440 * Check for a creation or unlink race.
441 */
442 if (((error == EEXIST) && !(fmode & O_EXCL)) ||
443 ((error == ENOENT) && (fmode & O_CREAT))){
444 if (vp)
445 vnode_put(vp);
446 goto again;
447 }
448 goto bad;
449 }
450
451 need_vnop_open = !did_open;
452 }
453 else {
454 if (fmode & O_EXCL)
455 error = EEXIST;
456
457 /*
458 * We have a vnode. Use compound open if available
459 * or else fall through to "traditional" path. Note: can't
460 * do a compound open for root, because the parent belongs
461 * to a different FS.
462 */
463 if (error == 0 && batched && (vnode_mount(dvp) == vnode_mount(vp))) {
464 error = VNOP_COMPOUND_OPEN(dvp, &ndp->ni_vp, ndp, 0, fmode, NULL, NULL, ctx);
465
466 if (error == 0) {
467 vp = ndp->ni_vp;
468 need_vnop_open = FALSE;
469 } else if (error == EKEEPLOOKING) {
470 if ((ndp->ni_flag & NAMEI_CONTLOOKUP) == 0) {
471 panic("EKEEPLOOKING, but continue flag not set?");
472 }
473 goto continue_create_lookup;
474 }
475 }
476 nameidone(ndp);
477 vnode_put(dvp);
478 ndp->ni_dvp = NULLVP;
479
480 if (error) {
481 goto bad;
482 }
483
484 fmode &= ~O_CREAT;
485
486 /* Fall through */
487 }
488 } else {
489 /*
490 * Not O_CREAT
491 */
492 ndp->ni_cnd.cn_nameiop = LOOKUP;
493 /* Inherit USEDVP, vnode_open() supported flags only */
494 ndp->ni_cnd.cn_flags &= (USEDVP | NOCROSSMOUNT | DOWHITEOUT);
495 ndp->ni_cnd.cn_flags |= FOLLOW | LOCKLEAF | AUDITVNPATH1 | WANTPARENT;
496 #if NAMEDRSRCFORK
497 /* open calls are allowed for resource forks. */
498 ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
499 #endif
500 ndp->ni_flag = NAMEI_COMPOUNDOPEN;
501
502 /* preserve NOFOLLOW from vnode_open() */
503 if (fmode & O_NOFOLLOW || fmode & O_SYMLINK || (origcnflags & FOLLOW) == 0) {
504 ndp->ni_cnd.cn_flags &= ~FOLLOW;
505 }
506
507 /* Do a lookup, possibly going directly to filesystem for compound operation */
508 do {
509 if ( (error = namei(ndp)) )
510 goto out;
511 vp = ndp->ni_vp;
512 dvp = ndp->ni_dvp;
513
514 /* Check for batched lookup-open */
515 batched = vnode_compound_open_available(dvp);
516 if (batched && ((vp == NULLVP) || (vnode_mount(dvp) == vnode_mount(vp)))) {
517 error = VNOP_COMPOUND_OPEN(dvp, &ndp->ni_vp, ndp, 0, fmode, NULL, NULL, ctx);
518 vp = ndp->ni_vp;
519 if (error == 0) {
520 need_vnop_open = FALSE;
521 } else if (error == EKEEPLOOKING) {
522 if ((ndp->ni_flag & NAMEI_CONTLOOKUP) == 0) {
523 panic("EKEEPLOOKING, but continue flag not set?");
524 }
525 }
526 }
527 } while (error == EKEEPLOOKING);
528
529 nameidone(ndp);
530 vnode_put(dvp);
531 ndp->ni_dvp = NULLVP;
532
533 if (error) {
534 goto bad;
535 }
536 }
537
538 /*
539 * By this point, nameidone() is called, dvp iocount is dropped,
540 * and dvp pointer is cleared.
541 */
542 if (ndp->ni_dvp != NULLVP) {
543 panic("Haven't cleaned up adequately in vn_open_auth()");
544 }
545
546 /*
547 * Expect to use this code for filesystems without compound VNOPs, for the root
548 * of a filesystem, which can't be "looked up" in the sense of VNOP_LOOKUP(),
549 * and for shadow files, which do not live on the same filesystems as their "parents."
550 */
551 if (need_vnop_open) {
552 if (batched && !vnode_isvroot(vp) && !vnode_isnamedstream(vp)) {
553 panic("Why am I trying to use VNOP_OPEN() on anything other than the root or a named stream?");
554 }
555
556 if (!did_create) {
557 error = vn_authorize_open_existing(vp, &ndp->ni_cnd, fmode, ctx, NULL);
558 if (error) {
559 goto bad;
560 }
561 }
562
563 #if CONFIG_PROTECT
564 /*
565 * Perform any content protection access checks prior to calling
566 * into the filesystem, if the raw encrypted mode was not
567 * requested.
568 *
569 * If the va_dataprotect_flags are NOT active, or if they are,
570 * but they do not have the VA_DP_RAWENCRYPTED bit set, then we need
571 * to perform the checks.
572 */
573 if (!(VATTR_IS_ACTIVE (vap, va_dataprotect_flags)) ||
574 ((vap->va_dataprotect_flags & VA_DP_RAWENCRYPTED) == 0)) {
575 error = cp_handle_open (vp, fmode);
576 if (error) {
577 goto bad;
578 }
579 }
580 #endif
581
582 error = VNOP_OPEN(vp, fmode, ctx);
583 if (error) {
584 goto bad;
585 }
586 need_vnop_open = FALSE;
587 }
588
589 // if the vnode is tagged VOPENEVT and the current process
590 // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY
591 // flag to the open mode so that this open won't count against
592 // the vnode when carbon delete() does a vnode_isinuse() to see
593 // if a file is currently in use. this allows spotlight
594 // importers to not interfere with carbon apps that depend on
595 // the no-delete-if-busy semantics of carbon delete().
596 //
597 if (!did_create && (vp->v_flag & VOPENEVT) && (current_proc()->p_flag & P_CHECKOPENEVT)) {
598 fmode |= O_EVTONLY;
599 }
600
601 /*
602 * Grab reference, etc.
603 */
604 error = vn_open_auth_finish(vp, fmode, ctx);
605 if (error) {
606 ref_failed = TRUE;
607 goto bad;
608 }
609
610 /* Compound VNOP open is responsible for doing the truncate */
611 if (batched || did_create)
612 fmode &= ~O_TRUNC;
613
614 *fmodep = fmode;
615 return (0);
616
617 bad:
618 /* Opened either explicitly or by a batched create */
619 if (!need_vnop_open) {
620 VNOP_CLOSE(vp, fmode, ctx);
621 }
622
623 ndp->ni_vp = NULL;
624 if (vp) {
625 #if NAMEDRSRCFORK
626 /* Aggressively recycle shadow files if we error'd out during open() */
627 if ((vnode_isnamedstream(vp)) &&
628 (vp->v_parent != NULLVP) &&
629 (vnode_isshadow(vp))) {
630 vnode_recycle(vp);
631 }
632 #endif
633 vnode_put(vp);
634 /*
635 * Check for a race against unlink. We had a vnode
636 * but according to vnode_authorize or VNOP_OPEN it
637 * no longer exists.
638 *
639 * EREDRIVEOPEN: means that we were hit by the tty allocation race.
640 */
641 if (((error == ENOENT) && (*fmodep & O_CREAT)) || (error == EREDRIVEOPEN) || ref_failed) {
642 goto again;
643 }
644 }
645
646 out:
647 return (error);
648 }
649
650 #if vn_access_DEPRECATED
651 /*
652 * Authorize an action against a vnode. This has been the canonical way to
653 * ensure that the credential/process/etc. referenced by a vfs_context
654 * is granted the rights called out in 'mode' against the vnode 'vp'.
655 *
656 * Unfortunately, the use of VREAD/VWRITE/VEXEC makes it very difficult
657 * to add support for more rights. As such, this interface will be deprecated
658 * and callers will use vnode_authorize instead.
659 */
660 int
661 vn_access(vnode_t vp, int mode, vfs_context_t context)
662 {
663 kauth_action_t action;
664
665 action = 0;
666 if (mode & VREAD)
667 action |= KAUTH_VNODE_READ_DATA;
668 if (mode & VWRITE)
669 action |= KAUTH_VNODE_WRITE_DATA;
670 if (mode & VEXEC)
671 action |= KAUTH_VNODE_EXECUTE;
672
673 return(vnode_authorize(vp, NULL, action, context));
674 }
675 #endif /* vn_access_DEPRECATED */
676
677 /*
678 * Vnode close call
679 */
680 int
681 vn_close(struct vnode *vp, int flags, vfs_context_t ctx)
682 {
683 int error;
684
685 #if NAMEDRSRCFORK
686 /* Sync data from resource fork shadow file if needed. */
687 if ((vp->v_flag & VISNAMEDSTREAM) &&
688 (vp->v_parent != NULLVP) &&
689 vnode_isshadow(vp)) {
690 if (flags & FWASWRITTEN) {
691 (void) vnode_flushnamedstream(vp->v_parent, vp, ctx);
692 }
693 }
694 #endif
695
696 /* work around for foxhound */
697 if (vnode_isspec(vp))
698 (void)vnode_rele_ext(vp, flags, 0);
699
700 error = VNOP_CLOSE(vp, flags, ctx);
701
702 #if CONFIG_FSE
703 if (flags & FWASWRITTEN) {
704 if (need_fsevent(FSE_CONTENT_MODIFIED, vp)) {
705 add_fsevent(FSE_CONTENT_MODIFIED, ctx,
706 FSE_ARG_VNODE, vp,
707 FSE_ARG_DONE);
708 }
709 }
710 #endif
711
712 if (!vnode_isspec(vp))
713 (void)vnode_rele_ext(vp, flags, 0);
714
715 return (error);
716 }
717
718 static int
719 vn_read_swapfile(
720 struct vnode *vp,
721 uio_t uio)
722 {
723 int error;
724 off_t swap_count, this_count;
725 off_t file_end, read_end;
726 off_t prev_resid;
727 char *my_swap_page;
728
729 /*
730 * Reading from a swap file will get you zeroes.
731 */
732
733 my_swap_page = NULL;
734 error = 0;
735 swap_count = uio_resid(uio);
736
737 file_end = ubc_getsize(vp);
738 read_end = uio->uio_offset + uio_resid(uio);
739 if (uio->uio_offset >= file_end) {
740 /* uio starts after end of file: nothing to read */
741 swap_count = 0;
742 } else if (read_end > file_end) {
743 /* uio extends beyond end of file: stop before that */
744 swap_count -= (read_end - file_end);
745 }
746
747 while (swap_count > 0) {
748 if (my_swap_page == NULL) {
749 MALLOC(my_swap_page, char *, PAGE_SIZE,
750 M_TEMP, M_WAITOK);
751 memset(my_swap_page, '\0', PAGE_SIZE);
752 /* add an end-of-line to keep line counters happy */
753 my_swap_page[PAGE_SIZE-1] = '\n';
754 }
755 this_count = swap_count;
756 if (this_count > PAGE_SIZE) {
757 this_count = PAGE_SIZE;
758 }
759
760 prev_resid = uio_resid(uio);
761 error = uiomove((caddr_t) my_swap_page,
762 this_count,
763 uio);
764 if (error) {
765 break;
766 }
767 swap_count -= (prev_resid - uio_resid(uio));
768 }
769 if (my_swap_page != NULL) {
770 FREE(my_swap_page, M_TEMP);
771 my_swap_page = NULL;
772 }
773
774 return error;
775 }
776 /*
777 * Package up an I/O request on a vnode into a uio and do it.
778 */
779 int
780 vn_rdwr(
781 enum uio_rw rw,
782 struct vnode *vp,
783 caddr_t base,
784 int len,
785 off_t offset,
786 enum uio_seg segflg,
787 int ioflg,
788 kauth_cred_t cred,
789 int *aresid,
790 proc_t p)
791 {
792 int64_t resid;
793 int result;
794
795 result = vn_rdwr_64(rw,
796 vp,
797 (uint64_t)(uintptr_t)base,
798 (int64_t)len,
799 offset,
800 segflg,
801 ioflg,
802 cred,
803 &resid,
804 p);
805
806 /* "resid" should be bounded above by "len," which is an int */
807 if (aresid != NULL) {
808 *aresid = resid;
809 }
810
811 return result;
812 }
813
814
815 int
816 vn_rdwr_64(
817 enum uio_rw rw,
818 struct vnode *vp,
819 uint64_t base,
820 int64_t len,
821 off_t offset,
822 enum uio_seg segflg,
823 int ioflg,
824 kauth_cred_t cred,
825 int64_t *aresid,
826 proc_t p)
827 {
828 uio_t auio;
829 int spacetype;
830 struct vfs_context context;
831 int error=0;
832 char uio_buf[ UIO_SIZEOF(1) ];
833
834 context.vc_thread = current_thread();
835 context.vc_ucred = cred;
836
837 if (UIO_SEG_IS_USER_SPACE(segflg)) {
838 spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
839 }
840 else {
841 spacetype = UIO_SYSSPACE;
842 }
843 auio = uio_createwithbuffer(1, offset, spacetype, rw,
844 &uio_buf[0], sizeof(uio_buf));
845 uio_addiov(auio, base, len);
846
847 #if CONFIG_MACF
848 /* XXXMAC
849 * IO_NOAUTH should be re-examined.
850 * Likely that mediation should be performed in caller.
851 */
852 if ((ioflg & IO_NOAUTH) == 0) {
853 /* passed cred is fp->f_cred */
854 if (rw == UIO_READ)
855 error = mac_vnode_check_read(&context, cred, vp);
856 else
857 error = mac_vnode_check_write(&context, cred, vp);
858 }
859 #endif
860
861 if (error == 0) {
862 if (rw == UIO_READ) {
863 if (vnode_isswap(vp)) {
864 error = vn_read_swapfile(vp, auio);
865 } else {
866 error = VNOP_READ(vp, auio, ioflg, &context);
867 }
868 } else {
869 error = VNOP_WRITE(vp, auio, ioflg, &context);
870 }
871 }
872
873 if (aresid)
874 *aresid = uio_resid(auio);
875 else
876 if (uio_resid(auio) && error == 0)
877 error = EIO;
878 return (error);
879 }
880
881 /*
882 * File table vnode read routine.
883 */
884 static int
885 vn_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
886 {
887 struct vnode *vp;
888 int error, ioflag;
889 off_t count;
890
891 vp = (struct vnode *)fp->f_fglob->fg_data;
892 if ( (error = vnode_getwithref(vp)) ) {
893 return(error);
894 }
895
896 #if CONFIG_MACF
897 error = mac_vnode_check_read(ctx, vfs_context_ucred(ctx), vp);
898 if (error) {
899 (void)vnode_put(vp);
900 return (error);
901 }
902 #endif
903
904 /* This signals to VNOP handlers that this read came from a file table read */
905 ioflag = IO_SYSCALL_DISPATCH;
906
907 if (fp->f_fglob->fg_flag & FNONBLOCK)
908 ioflag |= IO_NDELAY;
909 if ((fp->f_fglob->fg_flag & FNOCACHE) || vnode_isnocache(vp))
910 ioflag |= IO_NOCACHE;
911 if (fp->f_fglob->fg_flag & FENCRYPTED) {
912 ioflag |= IO_ENCRYPTED;
913 }
914 if (fp->f_fglob->fg_flag & FNORDAHEAD)
915 ioflag |= IO_RAOFF;
916
917 if ((flags & FOF_OFFSET) == 0)
918 uio->uio_offset = fp->f_fglob->fg_offset;
919 count = uio_resid(uio);
920
921 if (vnode_isswap(vp)) {
922 /* special case for swap files */
923 error = vn_read_swapfile(vp, uio);
924 } else {
925 error = VNOP_READ(vp, uio, ioflag, ctx);
926 }
927 if ((flags & FOF_OFFSET) == 0)
928 fp->f_fglob->fg_offset += count - uio_resid(uio);
929
930 (void)vnode_put(vp);
931 return (error);
932 }
933
934
935 /*
936 * File table vnode write routine.
937 */
938 static int
939 vn_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
940 {
941 struct vnode *vp;
942 int error, ioflag;
943 off_t count;
944 int clippedsize = 0;
945 int partialwrite=0;
946 int residcount, oldcount;
947 proc_t p = vfs_context_proc(ctx);
948
949 count = 0;
950 vp = (struct vnode *)fp->f_fglob->fg_data;
951 if ( (error = vnode_getwithref(vp)) ) {
952 return(error);
953 }
954
955 #if CONFIG_MACF
956 error = mac_vnode_check_write(ctx, vfs_context_ucred(ctx), vp);
957 if (error) {
958 (void)vnode_put(vp);
959 return (error);
960 }
961 #endif
962
963 /*
964 * IO_SYSCALL_DISPATCH signals to VNOP handlers that this write originated
965 * from a file table write.
966 */
967 ioflag = (IO_UNIT | IO_SYSCALL_DISPATCH);
968
969 if (vp->v_type == VREG && (fp->f_fglob->fg_flag & O_APPEND))
970 ioflag |= IO_APPEND;
971 if (fp->f_fglob->fg_flag & FNONBLOCK)
972 ioflag |= IO_NDELAY;
973 if ((fp->f_fglob->fg_flag & FNOCACHE) || vnode_isnocache(vp))
974 ioflag |= IO_NOCACHE;
975 if (fp->f_fglob->fg_flag & FNODIRECT)
976 ioflag |= IO_NODIRECT;
977 if (fp->f_fglob->fg_flag & FSINGLE_WRITER)
978 ioflag |= IO_SINGLE_WRITER;
979
980 /*
981 * Treat synchronous mounts and O_FSYNC on the fd as equivalent.
982 *
983 * XXX We treat O_DSYNC as O_FSYNC for now, since we can not delay
984 * XXX the non-essential metadata without some additional VFS work;
985 * XXX the intent at this point is to plumb the interface for it.
986 */
987 if ((fp->f_fglob->fg_flag & (O_FSYNC|O_DSYNC)) ||
988 (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS))) {
989 ioflag |= IO_SYNC;
990 }
991
992 if ((flags & FOF_OFFSET) == 0) {
993 uio->uio_offset = fp->f_fglob->fg_offset;
994 count = uio_resid(uio);
995 }
996 if (((flags & FOF_OFFSET) == 0) &&
997 vfs_context_proc(ctx) && (vp->v_type == VREG) &&
998 (((rlim_t)(uio->uio_offset + uio_resid(uio)) > p->p_rlimit[RLIMIT_FSIZE].rlim_cur) ||
999 ((rlim_t)uio_resid(uio) > (p->p_rlimit[RLIMIT_FSIZE].rlim_cur - uio->uio_offset)))) {
1000 /*
1001 * If the requested residual would cause us to go past the
1002 * administrative limit, then we need to adjust the residual
1003 * down to cause fewer bytes than requested to be written. If
1004 * we can't do that (e.g. the residual is already 1 byte),
1005 * then we fail the write with EFBIG.
1006 */
1007 residcount = uio_resid(uio);
1008 if ((rlim_t)(uio->uio_offset + uio_resid(uio)) > p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
1009 clippedsize = (uio->uio_offset + uio_resid(uio)) - p->p_rlimit[RLIMIT_FSIZE].rlim_cur;
1010 } else if ((rlim_t)uio_resid(uio) > (p->p_rlimit[RLIMIT_FSIZE].rlim_cur - uio->uio_offset)) {
1011 clippedsize = (p->p_rlimit[RLIMIT_FSIZE].rlim_cur - uio->uio_offset);
1012 }
1013 if (clippedsize >= residcount) {
1014 psignal(p, SIGXFSZ);
1015 vnode_put(vp);
1016 return (EFBIG);
1017 }
1018 partialwrite = 1;
1019 uio_setresid(uio, residcount-clippedsize);
1020 }
1021 if ((flags & FOF_OFFSET) != 0) {
1022 /* for pwrite, append should be ignored */
1023 ioflag &= ~IO_APPEND;
1024 if (p && (vp->v_type == VREG) &&
1025 ((rlim_t)uio->uio_offset >= p->p_rlimit[RLIMIT_FSIZE].rlim_cur)) {
1026 psignal(p, SIGXFSZ);
1027 vnode_put(vp);
1028 return (EFBIG);
1029 }
1030 if (p && (vp->v_type == VREG) &&
1031 ((rlim_t)(uio->uio_offset + uio_resid(uio)) > p->p_rlimit[RLIMIT_FSIZE].rlim_cur)) {
1032 //Debugger("vn_bwrite:overstepping the bounds");
1033 residcount = uio_resid(uio);
1034 clippedsize = (uio->uio_offset + uio_resid(uio)) - p->p_rlimit[RLIMIT_FSIZE].rlim_cur;
1035 partialwrite = 1;
1036 uio_setresid(uio, residcount-clippedsize);
1037 }
1038 }
1039
1040 error = VNOP_WRITE(vp, uio, ioflag, ctx);
1041
1042 if (partialwrite) {
1043 oldcount = uio_resid(uio);
1044 uio_setresid(uio, oldcount + clippedsize);
1045 }
1046
1047 if ((flags & FOF_OFFSET) == 0) {
1048 if (ioflag & IO_APPEND)
1049 fp->f_fglob->fg_offset = uio->uio_offset;
1050 else
1051 fp->f_fglob->fg_offset += count - uio_resid(uio);
1052 }
1053
1054 /*
1055 * Set the credentials on successful writes
1056 */
1057 if ((error == 0) && (vp->v_tag == VT_NFS) && (UBCINFOEXISTS(vp))) {
1058 /*
1059 * When called from aio subsystem, we only have the proc from
1060 * which to get the credential, at this point, so use that
1061 * instead. This means aio functions are incompatible with
1062 * per-thread credentials (aio operations are proxied). We
1063 * can't easily correct the aio vs. settid race in this case
1064 * anyway, so we disallow it.
1065 */
1066 if ((flags & FOF_PCRED) == 0) {
1067 ubc_setthreadcred(vp, p, current_thread());
1068 } else {
1069 ubc_setcred(vp, p);
1070 }
1071 }
1072 (void)vnode_put(vp);
1073 return (error);
1074 }
1075
1076 /*
1077 * File table vnode stat routine.
1078 *
1079 * Returns: 0 Success
1080 * EBADF
1081 * ENOMEM
1082 * vnode_getattr:???
1083 */
1084 int
1085 vn_stat_noauth(struct vnode *vp, void *sbptr, kauth_filesec_t *xsec, int isstat64, vfs_context_t ctx)
1086 {
1087 struct vnode_attr va;
1088 int error;
1089 u_short mode;
1090 kauth_filesec_t fsec;
1091 struct stat *sb = (struct stat *)0; /* warning avoidance ; protected by isstat64 */
1092 struct stat64 * sb64 = (struct stat64 *)0; /* warning avoidance ; protected by isstat64 */
1093
1094 if (isstat64 != 0)
1095 sb64 = (struct stat64 *)sbptr;
1096 else
1097 sb = (struct stat *)sbptr;
1098 memset(&va, 0, sizeof(va));
1099 VATTR_INIT(&va);
1100 VATTR_WANTED(&va, va_fsid);
1101 VATTR_WANTED(&va, va_fileid);
1102 VATTR_WANTED(&va, va_mode);
1103 VATTR_WANTED(&va, va_type);
1104 VATTR_WANTED(&va, va_nlink);
1105 VATTR_WANTED(&va, va_uid);
1106 VATTR_WANTED(&va, va_gid);
1107 VATTR_WANTED(&va, va_rdev);
1108 VATTR_WANTED(&va, va_data_size);
1109 VATTR_WANTED(&va, va_access_time);
1110 VATTR_WANTED(&va, va_modify_time);
1111 VATTR_WANTED(&va, va_change_time);
1112 VATTR_WANTED(&va, va_create_time);
1113 VATTR_WANTED(&va, va_flags);
1114 VATTR_WANTED(&va, va_gen);
1115 VATTR_WANTED(&va, va_iosize);
1116 /* lower layers will synthesise va_total_alloc from va_data_size if required */
1117 VATTR_WANTED(&va, va_total_alloc);
1118 if (xsec != NULL) {
1119 VATTR_WANTED(&va, va_uuuid);
1120 VATTR_WANTED(&va, va_guuid);
1121 VATTR_WANTED(&va, va_acl);
1122 }
1123 error = vnode_getattr(vp, &va, ctx);
1124 if (error)
1125 goto out;
1126 /*
1127 * Copy from vattr table
1128 */
1129 if (isstat64 != 0) {
1130 sb64->st_dev = va.va_fsid;
1131 sb64->st_ino = (ino64_t)va.va_fileid;
1132
1133 } else {
1134 sb->st_dev = va.va_fsid;
1135 sb->st_ino = (ino_t)va.va_fileid;
1136 }
1137 mode = va.va_mode;
1138 switch (vp->v_type) {
1139 case VREG:
1140 mode |= S_IFREG;
1141 break;
1142 case VDIR:
1143 mode |= S_IFDIR;
1144 break;
1145 case VBLK:
1146 mode |= S_IFBLK;
1147 break;
1148 case VCHR:
1149 mode |= S_IFCHR;
1150 break;
1151 case VLNK:
1152 mode |= S_IFLNK;
1153 break;
1154 case VSOCK:
1155 mode |= S_IFSOCK;
1156 break;
1157 case VFIFO:
1158 mode |= S_IFIFO;
1159 break;
1160 default:
1161 error = EBADF;
1162 goto out;
1163 };
1164 if (isstat64 != 0) {
1165 sb64->st_mode = mode;
1166 sb64->st_nlink = VATTR_IS_SUPPORTED(&va, va_nlink) ? (u_int16_t)va.va_nlink : 1;
1167 sb64->st_uid = va.va_uid;
1168 sb64->st_gid = va.va_gid;
1169 sb64->st_rdev = va.va_rdev;
1170 sb64->st_size = va.va_data_size;
1171 sb64->st_atimespec = va.va_access_time;
1172 sb64->st_mtimespec = va.va_modify_time;
1173 sb64->st_ctimespec = va.va_change_time;
1174 sb64->st_birthtimespec =
1175 VATTR_IS_SUPPORTED(&va, va_create_time) ? va.va_create_time : va.va_change_time;
1176 sb64->st_blksize = va.va_iosize;
1177 sb64->st_flags = va.va_flags;
1178 sb64->st_blocks = roundup(va.va_total_alloc, 512) / 512;
1179 } else {
1180 sb->st_mode = mode;
1181 sb->st_nlink = VATTR_IS_SUPPORTED(&va, va_nlink) ? (u_int16_t)va.va_nlink : 1;
1182 sb->st_uid = va.va_uid;
1183 sb->st_gid = va.va_gid;
1184 sb->st_rdev = va.va_rdev;
1185 sb->st_size = va.va_data_size;
1186 sb->st_atimespec = va.va_access_time;
1187 sb->st_mtimespec = va.va_modify_time;
1188 sb->st_ctimespec = va.va_change_time;
1189 sb->st_blksize = va.va_iosize;
1190 sb->st_flags = va.va_flags;
1191 sb->st_blocks = roundup(va.va_total_alloc, 512) / 512;
1192 }
1193
1194 /* if we're interested in extended security data and we got an ACL */
1195 if (xsec != NULL) {
1196 if (!VATTR_IS_SUPPORTED(&va, va_acl) &&
1197 !VATTR_IS_SUPPORTED(&va, va_uuuid) &&
1198 !VATTR_IS_SUPPORTED(&va, va_guuid)) {
1199 *xsec = KAUTH_FILESEC_NONE;
1200 } else {
1201
1202 if (VATTR_IS_SUPPORTED(&va, va_acl) && (va.va_acl != NULL)) {
1203 fsec = kauth_filesec_alloc(va.va_acl->acl_entrycount);
1204 } else {
1205 fsec = kauth_filesec_alloc(0);
1206 }
1207 if (fsec == NULL) {
1208 error = ENOMEM;
1209 goto out;
1210 }
1211 fsec->fsec_magic = KAUTH_FILESEC_MAGIC;
1212 if (VATTR_IS_SUPPORTED(&va, va_uuuid)) {
1213 fsec->fsec_owner = va.va_uuuid;
1214 } else {
1215 fsec->fsec_owner = kauth_null_guid;
1216 }
1217 if (VATTR_IS_SUPPORTED(&va, va_guuid)) {
1218 fsec->fsec_group = va.va_guuid;
1219 } else {
1220 fsec->fsec_group = kauth_null_guid;
1221 }
1222 if (VATTR_IS_SUPPORTED(&va, va_acl) && (va.va_acl != NULL)) {
1223 bcopy(va.va_acl, &(fsec->fsec_acl), KAUTH_ACL_COPYSIZE(va.va_acl));
1224 } else {
1225 fsec->fsec_acl.acl_entrycount = KAUTH_FILESEC_NOACL;
1226 }
1227 *xsec = fsec;
1228 }
1229 }
1230
1231 /* Do not give the generation number out to unpriviledged users */
1232 if (va.va_gen && !vfs_context_issuser(ctx)) {
1233 if (isstat64 != 0)
1234 sb64->st_gen = 0;
1235 else
1236 sb->st_gen = 0;
1237 } else {
1238 if (isstat64 != 0)
1239 sb64->st_gen = va.va_gen;
1240 else
1241 sb->st_gen = va.va_gen;
1242 }
1243
1244 error = 0;
1245 out:
1246 if (VATTR_IS_SUPPORTED(&va, va_acl) && va.va_acl != NULL)
1247 kauth_acl_free(va.va_acl);
1248 return (error);
1249 }
1250
1251 int
1252 vn_stat(struct vnode *vp, void *sb, kauth_filesec_t *xsec, int isstat64, vfs_context_t ctx)
1253 {
1254 int error;
1255
1256 #if CONFIG_MACF
1257 error = mac_vnode_check_stat(ctx, NOCRED, vp);
1258 if (error)
1259 return (error);
1260 #endif
1261
1262 /* authorize */
1263 if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_ATTRIBUTES | KAUTH_VNODE_READ_SECURITY, ctx)) != 0)
1264 return(error);
1265
1266 /* actual stat */
1267 return(vn_stat_noauth(vp, sb, xsec, isstat64, ctx));
1268 }
1269
1270
1271 /*
1272 * File table vnode ioctl routine.
1273 */
1274 static int
1275 vn_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx)
1276 {
1277 struct vnode *vp = ((struct vnode *)fp->f_fglob->fg_data);
1278 off_t file_size;
1279 int error;
1280 struct vnode *ttyvp;
1281 int funnel_state;
1282 struct session * sessp;
1283
1284 if ( (error = vnode_getwithref(vp)) ) {
1285 return(error);
1286 }
1287
1288 #if CONFIG_MACF
1289 error = mac_vnode_check_ioctl(ctx, vp, com);
1290 if (error)
1291 goto out;
1292 #endif
1293
1294 switch (vp->v_type) {
1295 case VREG:
1296 case VDIR:
1297 if (com == FIONREAD) {
1298 if ((error = vnode_size(vp, &file_size, ctx)) != 0)
1299 goto out;
1300 *(int *)data = file_size - fp->f_fglob->fg_offset;
1301 goto out;
1302 }
1303 if (com == FIONBIO || com == FIOASYNC) { /* XXX */
1304 goto out;
1305 }
1306 /* fall into ... */
1307
1308 default:
1309 error = ENOTTY;
1310 goto out;
1311
1312 case VFIFO:
1313 case VCHR:
1314 case VBLK:
1315
1316 /* Should not be able to set block size from user space */
1317 if (com == DKIOCSETBLOCKSIZE) {
1318 error = EPERM;
1319 goto out;
1320 }
1321
1322 if (com == FIODTYPE) {
1323 if (vp->v_type == VBLK) {
1324 if (major(vp->v_rdev) >= nblkdev) {
1325 error = ENXIO;
1326 goto out;
1327 }
1328 *(int *)data = D_TYPEMASK & bdevsw[major(vp->v_rdev)].d_type;
1329
1330 } else if (vp->v_type == VCHR) {
1331 if (major(vp->v_rdev) >= nchrdev) {
1332 error = ENXIO;
1333 goto out;
1334 }
1335 *(int *)data = D_TYPEMASK & cdevsw[major(vp->v_rdev)].d_type;
1336 } else {
1337 error = ENOTTY;
1338 goto out;
1339 }
1340 goto out;
1341 }
1342 error = VNOP_IOCTL(vp, com, data, fp->f_fglob->fg_flag, ctx);
1343
1344 if (error == 0 && com == TIOCSCTTY) {
1345 error = vnode_ref_ext(vp, 0, VNODE_REF_FORCE);
1346 if (error != 0) {
1347 panic("vnode_ref_ext() failed despite VNODE_REF_FORCE?!");
1348 }
1349
1350 funnel_state = thread_funnel_set(kernel_flock, TRUE);
1351 sessp = proc_session(vfs_context_proc(ctx));
1352
1353 session_lock(sessp);
1354 ttyvp = sessp->s_ttyvp;
1355 sessp->s_ttyvp = vp;
1356 sessp->s_ttyvid = vnode_vid(vp);
1357 session_unlock(sessp);
1358 session_rele(sessp);
1359 thread_funnel_set(kernel_flock, funnel_state);
1360
1361 if (ttyvp)
1362 vnode_rele(ttyvp);
1363 }
1364 }
1365 out:
1366 (void)vnode_put(vp);
1367 return(error);
1368 }
1369
1370 /*
1371 * File table vnode select routine.
1372 */
1373 static int
1374 vn_select(struct fileproc *fp, int which, void *wql, __unused vfs_context_t ctx)
1375 {
1376 int error;
1377 struct vnode * vp = (struct vnode *)fp->f_fglob->fg_data;
1378 struct vfs_context context;
1379
1380 if ( (error = vnode_getwithref(vp)) == 0 ) {
1381 context.vc_thread = current_thread();
1382 context.vc_ucred = fp->f_fglob->fg_cred;
1383
1384 #if CONFIG_MACF
1385 /*
1386 * XXX We should use a per thread credential here; minimally,
1387 * XXX the process credential should have a persistent
1388 * XXX reference on it before being passed in here.
1389 */
1390 error = mac_vnode_check_select(ctx, vp, which);
1391 if (error == 0)
1392 #endif
1393 error = VNOP_SELECT(vp, which, fp->f_fglob->fg_flag, wql, ctx);
1394
1395 (void)vnode_put(vp);
1396 }
1397 return(error);
1398
1399 }
1400
1401 /*
1402 * File table vnode close routine.
1403 */
1404 static int
1405 vn_closefile(struct fileglob *fg, vfs_context_t ctx)
1406 {
1407 struct vnode *vp = (struct vnode *)fg->fg_data;
1408 int error;
1409 struct flock lf;
1410
1411 if ( (error = vnode_getwithref(vp)) == 0 ) {
1412
1413 if ((fg->fg_flag & FHASLOCK) && fg->fg_type == DTYPE_VNODE) {
1414 lf.l_whence = SEEK_SET;
1415 lf.l_start = 0;
1416 lf.l_len = 0;
1417 lf.l_type = F_UNLCK;
1418
1419 (void)VNOP_ADVLOCK(vp, (caddr_t)fg, F_UNLCK, &lf, F_FLOCK, ctx);
1420 }
1421 error = vn_close(vp, fg->fg_flag, ctx);
1422
1423 (void)vnode_put(vp);
1424 }
1425 return(error);
1426 }
1427
1428 /*
1429 * Returns: 0 Success
1430 * VNOP_PATHCONF:???
1431 */
1432 int
1433 vn_pathconf(vnode_t vp, int name, int32_t *retval, vfs_context_t ctx)
1434 {
1435 int error = 0;
1436 struct vfs_attr vfa;
1437
1438 switch(name) {
1439 case _PC_EXTENDED_SECURITY_NP:
1440 *retval = vfs_extendedsecurity(vnode_mount(vp)) ? 1 : 0;
1441 break;
1442 case _PC_AUTH_OPAQUE_NP:
1443 *retval = vfs_authopaque(vnode_mount(vp));
1444 break;
1445 case _PC_2_SYMLINKS:
1446 *retval = 1; /* XXX NOTSUP on MSDOS, etc. */
1447 break;
1448 case _PC_ALLOC_SIZE_MIN:
1449 *retval = 1; /* XXX lie: 1 byte */
1450 break;
1451 case _PC_ASYNC_IO: /* unistd.h: _POSIX_ASYNCHRONUS_IO */
1452 *retval = 1; /* [AIO] option is supported */
1453 break;
1454 case _PC_PRIO_IO: /* unistd.h: _POSIX_PRIORITIZED_IO */
1455 *retval = 0; /* [PIO] option is not supported */
1456 break;
1457 case _PC_REC_INCR_XFER_SIZE:
1458 *retval = 4096; /* XXX go from MIN to MAX 4K at a time */
1459 break;
1460 case _PC_REC_MIN_XFER_SIZE:
1461 *retval = 4096; /* XXX recommend 4K minimum reads/writes */
1462 break;
1463 case _PC_REC_MAX_XFER_SIZE:
1464 *retval = 65536; /* XXX recommend 64K maximum reads/writes */
1465 break;
1466 case _PC_REC_XFER_ALIGN:
1467 *retval = 4096; /* XXX recommend page aligned buffers */
1468 break;
1469 case _PC_SYMLINK_MAX:
1470 *retval = 255; /* Minimum acceptable POSIX value */
1471 break;
1472 case _PC_SYNC_IO: /* unistd.h: _POSIX_SYNCHRONIZED_IO */
1473 *retval = 0; /* [SIO] option is not supported */
1474 break;
1475 case _PC_XATTR_SIZE_BITS:
1476 /* The number of bits used to store maximum extended
1477 * attribute size in bytes. For example, if the maximum
1478 * attribute size supported by a file system is 128K, the
1479 * value returned will be 18. However a value 18 can mean
1480 * that the maximum attribute size can be anywhere from
1481 * (256KB - 1) to 128KB. As a special case, the resource
1482 * fork can have much larger size, and some file system
1483 * specific extended attributes can have smaller and preset
1484 * size; for example, Finder Info is always 32 bytes.
1485 */
1486 memset(&vfa, 0, sizeof(vfa));
1487 VFSATTR_INIT(&vfa);
1488 VFSATTR_WANTED(&vfa, f_capabilities);
1489 if (vfs_getattr(vnode_mount(vp), &vfa, ctx) == 0 &&
1490 (VFSATTR_IS_SUPPORTED(&vfa, f_capabilities)) &&
1491 (vfa.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR) &&
1492 (vfa.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR)) {
1493 /* Supports native extended attributes */
1494 error = VNOP_PATHCONF(vp, name, retval, ctx);
1495 } else {
1496 /* Number of bits used to represent the maximum size of
1497 * extended attribute stored in an Apple Double file.
1498 */
1499 *retval = AD_XATTR_SIZE_BITS;
1500 }
1501 break;
1502 default:
1503 error = VNOP_PATHCONF(vp, name, retval, ctx);
1504 break;
1505 }
1506
1507 return (error);
1508 }
1509
1510 static int
1511 vn_kqfilt_add(struct fileproc *fp, struct knote *kn, vfs_context_t ctx)
1512 {
1513 int error;
1514 struct vnode *vp;
1515
1516 vp = (struct vnode *)fp->f_fglob->fg_data;
1517
1518 /*
1519 * Don't attach a knote to a dead vnode.
1520 */
1521 if ((error = vget_internal(vp, 0, VNODE_NODEAD)) == 0) {
1522 switch (kn->kn_filter) {
1523 case EVFILT_READ:
1524 case EVFILT_WRITE:
1525 if (vnode_isfifo(vp)) {
1526 /* We'll only watch FIFOs that use our fifofs */
1527 if (!(vp->v_fifoinfo && vp->v_fifoinfo->fi_readsock)) {
1528 error = ENOTSUP;
1529 }
1530
1531 } else if (!vnode_isreg(vp)) {
1532 if (vnode_ischr(vp) &&
1533 (error = spec_kqfilter(vp, kn)) == 0) {
1534 /* claimed by a special device */
1535 vnode_put(vp);
1536 return 0;
1537 }
1538
1539 error = EINVAL;
1540 }
1541 break;
1542 case EVFILT_VNODE:
1543 break;
1544 default:
1545 error = EINVAL;
1546 }
1547
1548 if (error) {
1549 vnode_put(vp);
1550 return error;
1551 }
1552
1553 #if CONFIG_MACF
1554 error = mac_vnode_check_kqfilter(ctx, fp->f_fglob->fg_cred, kn, vp);
1555 if (error) {
1556 vnode_put(vp);
1557 return error;
1558 }
1559 #endif
1560
1561 kn->kn_hook = (void*)vp;
1562 kn->kn_hookid = vnode_vid(vp);
1563 kn->kn_fop = &vnode_filtops;
1564
1565 vnode_lock(vp);
1566 KNOTE_ATTACH(&vp->v_knotes, kn);
1567 vnode_unlock(vp);
1568
1569 /* Ask the filesystem to provide remove notifications, but ignore failure */
1570 VNOP_MONITOR(vp, 0, VNODE_MONITOR_BEGIN, (void*) kn, ctx);
1571
1572 vnode_put(vp);
1573 }
1574
1575 return (error);
1576 }
1577
1578 static void
1579 filt_vndetach(struct knote *kn)
1580 {
1581 vfs_context_t ctx = vfs_context_current();
1582 struct vnode *vp;
1583 vp = (struct vnode *)kn->kn_hook;
1584 if (vnode_getwithvid(vp, kn->kn_hookid))
1585 return;
1586
1587 vnode_lock(vp);
1588 KNOTE_DETACH(&vp->v_knotes, kn);
1589 vnode_unlock(vp);
1590
1591 /*
1592 * Tell a (generally networked) filesystem that we're no longer watching
1593 * If the FS wants to track contexts, it should still be using the one from
1594 * the VNODE_MONITOR_BEGIN.
1595 */
1596 VNOP_MONITOR(vp, 0, VNODE_MONITOR_END, (void*)kn, ctx);
1597 vnode_put(vp);
1598 }
1599
1600
1601 /*
1602 * Used for EVFILT_READ
1603 *
1604 * Takes only VFIFO or VREG. vnode is locked. We handle the "poll" case
1605 * differently than the regular case for VREG files. If not in poll(),
1606 * then we need to know current fileproc offset for VREG.
1607 */
1608 static intptr_t
1609 vnode_readable_data_count(vnode_t vp, off_t current_offset, int ispoll)
1610 {
1611 if (vnode_isfifo(vp)) {
1612 int cnt;
1613 int err = fifo_charcount(vp, &cnt);
1614 if (err == 0) {
1615 return (intptr_t)cnt;
1616 } else {
1617 return (intptr_t)0;
1618 }
1619 } else if (vnode_isreg(vp)) {
1620 if (ispoll) {
1621 return (intptr_t)1;
1622 }
1623
1624 off_t amount;
1625 amount = vp->v_un.vu_ubcinfo->ui_size - current_offset;
1626 if (amount > (off_t)INTPTR_MAX) {
1627 return INTPTR_MAX;
1628 } else if (amount < (off_t)INTPTR_MIN) {
1629 return INTPTR_MIN;
1630 } else {
1631 return (intptr_t)amount;
1632 }
1633 } else {
1634 panic("Should never have an EVFILT_READ except for reg or fifo.");
1635 return 0;
1636 }
1637 }
1638
1639 /*
1640 * Used for EVFILT_WRITE.
1641 *
1642 * For regular vnodes, we can always write (1). For named pipes,
1643 * see how much space there is in the buffer. Nothing else is covered.
1644 */
1645 static intptr_t
1646 vnode_writable_space_count(vnode_t vp)
1647 {
1648 if (vnode_isfifo(vp)) {
1649 long spc;
1650 int err = fifo_freespace(vp, &spc);
1651 if (err == 0) {
1652 return (intptr_t)spc;
1653 } else {
1654 return (intptr_t)0;
1655 }
1656 } else if (vnode_isreg(vp)) {
1657 return (intptr_t)1;
1658 } else {
1659 panic("Should never have an EVFILT_READ except for reg or fifo.");
1660 return 0;
1661 }
1662 }
1663
1664 /*
1665 * Determine whether this knote should be active
1666 *
1667 * This is kind of subtle.
1668 * --First, notice if the vnode has been revoked: in so, override hint
1669 * --EVFILT_READ knotes are checked no matter what the hint is
1670 * --Other knotes activate based on hint.
1671 * --If hint is revoke, set special flags and activate
1672 */
1673 static int
1674 filt_vnode(struct knote *kn, long hint)
1675 {
1676 vnode_t vp = (struct vnode *)kn->kn_hook;
1677 int activate = 0;
1678 long orig_hint = hint;
1679
1680 if (0 == hint) {
1681 vnode_lock(vp);
1682
1683 if (vnode_getiocount(vp, kn->kn_hookid, VNODE_NODEAD | VNODE_WITHID) != 0) {
1684 /* Is recycled */
1685 hint = NOTE_REVOKE;
1686 }
1687 } else {
1688 lck_mtx_assert(&vp->v_lock, LCK_MTX_ASSERT_OWNED);
1689 }
1690
1691 /* Special handling for vnodes that are in recycle or already gone */
1692 if (NOTE_REVOKE == hint) {
1693 kn->kn_flags |= (EV_EOF | EV_ONESHOT);
1694 activate = 1;
1695
1696 if ((kn->kn_filter == EVFILT_VNODE) && (kn->kn_sfflags & NOTE_REVOKE)) {
1697 kn->kn_fflags |= NOTE_REVOKE;
1698 }
1699 } else {
1700 switch(kn->kn_filter) {
1701 case EVFILT_READ:
1702 kn->kn_data = vnode_readable_data_count(vp, kn->kn_fp->f_fglob->fg_offset, (kn->kn_flags & EV_POLL));
1703
1704 if (kn->kn_data != 0) {
1705 activate = 1;
1706 }
1707 break;
1708 case EVFILT_WRITE:
1709 kn->kn_data = vnode_writable_space_count(vp);
1710
1711 if (kn->kn_data != 0) {
1712 activate = 1;
1713 }
1714 break;
1715 case EVFILT_VNODE:
1716 /* Check events this note matches against the hint */
1717 if (kn->kn_sfflags & hint) {
1718 kn->kn_fflags |= hint; /* Set which event occurred */
1719 }
1720 if (kn->kn_fflags != 0) {
1721 activate = 1;
1722 }
1723 break;
1724 default:
1725 panic("Invalid knote filter on a vnode!\n");
1726 }
1727 }
1728
1729 if (orig_hint == 0) {
1730 /*
1731 * Definitely need to unlock, may need to put
1732 */
1733 if (hint == 0) {
1734 vnode_put_locked(vp);
1735 }
1736 vnode_unlock(vp);
1737 }
1738
1739 return (activate);
1740 }