]> git.saurik.com Git - apple/xnu.git/blob - bsd/vfs/vfs_vnops.c
671a51fc39e21f8f07280d378065ef391515d9e6
[apple/xnu.git] / bsd / vfs / vfs_vnops.c
1 /*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30 * Copyright (c) 1982, 1986, 1989, 1993
31 * The Regents of the University of California. All rights reserved.
32 * (c) UNIX System Laboratories, Inc.
33 * All or some portions of this file are derived from material licensed
34 * to the University of California by American Telephone and Telegraph
35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
36 * the permission of UNIX System Laboratories, Inc.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software
47 * must display the following acknowledgement:
48 * This product includes software developed by the University of
49 * California, Berkeley and its contributors.
50 * 4. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 * @(#)vfs_vnops.c 8.14 (Berkeley) 6/15/95
67 *
68 */
69 /*
70 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
71 * support for mandatory and extensible security protections. This notice
72 * is included in support of clause 2.2 (b) of the Apple Public License,
73 * Version 2.0.
74 */
75
76 #include <sys/param.h>
77 #include <sys/types.h>
78 #include <sys/systm.h>
79 #include <sys/kernel.h>
80 #include <sys/file_internal.h>
81 #include <sys/stat.h>
82 #include <sys/proc_internal.h>
83 #include <sys/kauth.h>
84 #include <sys/mount_internal.h>
85 #include <sys/namei.h>
86 #include <sys/vnode_internal.h>
87 #include <sys/ioctl.h>
88 #include <sys/tty.h>
89 /* Temporary workaround for ubc.h until <rdar://4714366 is resolved */
90 #define ubc_setcred ubc_setcred_deprecated
91 #include <sys/ubc.h>
92 #undef ubc_setcred
93 int ubc_setcred(struct vnode *, struct proc *);
94 #include <sys/conf.h>
95 #include <sys/disk.h>
96 #include <sys/fsevents.h>
97 #include <sys/kdebug.h>
98 #include <sys/xattr.h>
99 #include <sys/ubc_internal.h>
100 #include <sys/uio_internal.h>
101 #include <sys/resourcevar.h>
102 #include <sys/signalvar.h>
103
104 #include <vm/vm_kern.h>
105 #include <vm/vm_map.h>
106
107 #include <miscfs/specfs/specdev.h>
108 #include <miscfs/fifofs/fifo.h>
109
110 #if CONFIG_MACF
111 #include <security/mac_framework.h>
112 #endif
113
114 #if CONFIG_PROTECT
115 #include <sys/cprotect.h>
116 #endif
117
118
119 static int vn_closefile(struct fileglob *fp, vfs_context_t ctx);
120 static int vn_ioctl(struct fileproc *fp, u_long com, caddr_t data,
121 vfs_context_t ctx);
122 static int vn_read(struct fileproc *fp, struct uio *uio, int flags,
123 vfs_context_t ctx);
124 static int vn_write(struct fileproc *fp, struct uio *uio, int flags,
125 vfs_context_t ctx);
126 static int vn_select( struct fileproc *fp, int which, void * wql,
127 vfs_context_t ctx);
128 static int vn_kqfilt_add(struct fileproc *fp, struct knote *kn,
129 vfs_context_t ctx);
130 static void filt_vndetach(struct knote *kn);
131 static int filt_vnode(struct knote *kn, long hint);
132 static int vn_open_auth_finish(vnode_t vp, int fmode, vfs_context_t ctx);
133 #if 0
134 static int vn_kqfilt_remove(struct vnode *vp, uintptr_t ident,
135 vfs_context_t ctx);
136 #endif
137
138 struct fileops vnops =
139 { vn_read, vn_write, vn_ioctl, vn_select, vn_closefile, vn_kqfilt_add, NULL };
140
141 struct filterops vnode_filtops = {
142 .f_isfd = 1,
143 .f_attach = NULL,
144 .f_detach = filt_vndetach,
145 .f_event = filt_vnode
146 };
147
148 /*
149 * Common code for vnode open operations.
150 * Check permissions, and call the VNOP_OPEN or VNOP_CREATE routine.
151 *
152 * XXX the profusion of interfaces here is probably a bad thing.
153 */
154 int
155 vn_open(struct nameidata *ndp, int fmode, int cmode)
156 {
157 return(vn_open_modflags(ndp, &fmode, cmode));
158 }
159
160 int
161 vn_open_modflags(struct nameidata *ndp, int *fmodep, int cmode)
162 {
163 struct vnode_attr va;
164
165 VATTR_INIT(&va);
166 VATTR_SET(&va, va_mode, cmode);
167
168 return(vn_open_auth(ndp, fmodep, &va));
169 }
170
171 static int
172 vn_open_auth_finish(vnode_t vp, int fmode, vfs_context_t ctx)
173 {
174 int error;
175
176 if ((error = vnode_ref_ext(vp, fmode, 0)) != 0) {
177 goto bad;
178 }
179
180 /* Call out to allow 3rd party notification of open.
181 * Ignore result of kauth_authorize_fileop call.
182 */
183 #if CONFIG_MACF
184 mac_vnode_notify_open(ctx, vp, fmode);
185 #endif
186 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN,
187 (uintptr_t)vp, 0);
188
189 return 0;
190
191 bad:
192 return error;
193
194 }
195
196 /*
197 * May do nameidone() to allow safely adding an FSEvent. Cue off of ni_dvp to
198 * determine whether that has happened.
199 */
200 static int
201 vn_open_auth_do_create(struct nameidata *ndp, struct vnode_attr *vap, int fmode, boolean_t *did_create, boolean_t *did_open, vfs_context_t ctx)
202 {
203 uint32_t status = 0;
204 vnode_t dvp = ndp->ni_dvp;
205 int batched;
206 int error;
207 vnode_t vp;
208
209 batched = vnode_compound_open_available(ndp->ni_dvp);
210 *did_open = FALSE;
211
212 VATTR_SET(vap, va_type, VREG);
213 if (fmode & O_EXCL)
214 vap->va_vaflags |= VA_EXCLUSIVE;
215
216 #if NAMEDRSRCFORK
217 if (ndp->ni_cnd.cn_flags & CN_WANTSRSRCFORK) {
218 if ((error = vn_authorize_create(dvp, &ndp->ni_cnd, vap, ctx, NULL)) != 0)
219 goto out;
220 if ((error = vnode_makenamedstream(dvp, &ndp->ni_vp, XATTR_RESOURCEFORK_NAME, 0, ctx)) != 0)
221 goto out;
222 *did_create = TRUE;
223 } else {
224 #endif
225 if (!batched) {
226 if ((error = vn_authorize_create(dvp, &ndp->ni_cnd, vap, ctx, NULL)) != 0)
227 goto out;
228 }
229
230 error = vn_create(dvp, &ndp->ni_vp, ndp, vap, VN_CREATE_DOOPEN, fmode, &status, ctx);
231 if (error != 0) {
232 if (batched) {
233 *did_create = (status & COMPOUND_OPEN_STATUS_DID_CREATE) ? TRUE : FALSE;
234 } else {
235 *did_create = FALSE;
236 }
237
238 if (error == EKEEPLOOKING) {
239 if (*did_create) {
240 panic("EKEEPLOOKING, but we did a create?");
241 }
242 if (!batched) {
243 panic("EKEEPLOOKING from filesystem that doesn't support compound vnops?");
244 }
245 if ((ndp->ni_flag & NAMEI_CONTLOOKUP) == 0) {
246 panic("EKEEPLOOKING, but continue flag not set?");
247 }
248
249 /*
250 * Do NOT drop the dvp: we need everything to continue the lookup.
251 */
252 return error;
253 }
254 } else {
255 if (batched) {
256 *did_create = (status & COMPOUND_OPEN_STATUS_DID_CREATE) ? 1 : 0;
257 *did_open = TRUE;
258 } else {
259 *did_create = TRUE;
260 }
261 }
262 #if NAMEDRSRCFORK
263 }
264 #endif
265
266 /*
267 * Unlock the fsnode (if locked) here so that we are free
268 * to drop the dvp iocount and prevent deadlock in build_path().
269 * nameidone() will still do the right thing later.
270 */
271 vp = ndp->ni_vp;
272 namei_unlock_fsnode(ndp);
273
274 if (*did_create) {
275 int update_flags = 0;
276
277 // Make sure the name & parent pointers are hooked up
278 if (vp->v_name == NULL)
279 update_flags |= VNODE_UPDATE_NAME;
280 if (vp->v_parent == NULLVP)
281 update_flags |= VNODE_UPDATE_PARENT;
282
283 if (update_flags)
284 vnode_update_identity(vp, dvp, ndp->ni_cnd.cn_nameptr, ndp->ni_cnd.cn_namelen, ndp->ni_cnd.cn_hash, update_flags);
285
286 vnode_put(dvp);
287 ndp->ni_dvp = NULLVP;
288
289 #if CONFIG_FSE
290 if (need_fsevent(FSE_CREATE_FILE, vp)) {
291 add_fsevent(FSE_CREATE_FILE, ctx,
292 FSE_ARG_VNODE, vp,
293 FSE_ARG_DONE);
294 }
295 #endif
296 }
297 out:
298 if (ndp->ni_dvp != NULLVP) {
299 vnode_put(dvp);
300 ndp->ni_dvp = NULLVP;
301 }
302
303 return error;
304 }
305
306 /*
307 * Open a file with authorization, updating the contents of the structures
308 * pointed to by ndp, fmodep, and vap as necessary to perform the requested
309 * operation. This function is used for both opens of existing files, and
310 * creation of new files.
311 *
312 * Parameters: ndp The nami data pointer describing the
313 * file
314 * fmodep A pointer to an int containg the mode
315 * information to be used for the open
316 * vap A pointer to the vnode attribute
317 * descriptor to be used for the open
318 *
319 * Indirect: * Contents of the data structures pointed
320 * to by the parameters are modified as
321 * necessary to the requested operation.
322 *
323 * Returns: 0 Success
324 * !0 errno value
325 *
326 * Notes: The kauth_filesec_t in 'vap', if any, is in host byte order.
327 *
328 * The contents of '*ndp' will be modified, based on the other
329 * arguments to this function, and to return file and directory
330 * data necessary to satisfy the requested operation.
331 *
332 * If the file does not exist and we are creating it, then the
333 * O_TRUNC flag will be cleared in '*fmodep' to indicate to the
334 * caller that the file was not truncated.
335 *
336 * If the file exists and the O_EXCL flag was not specified, then
337 * the O_CREAT flag will be cleared in '*fmodep' to indicate to
338 * the caller that the existing file was merely opened rather
339 * than created.
340 *
341 * The contents of '*vap' will be modified as necessary to
342 * complete the operation, including setting of supported
343 * attribute, clearing of fields containing unsupported attributes
344 * in the request, if the request proceeds without them, etc..
345 *
346 * XXX: This function is too complicated in actings on its arguments
347 *
348 * XXX: We should enummerate the possible errno values here, and where
349 * in the code they originated.
350 */
351 int
352 vn_open_auth(struct nameidata *ndp, int *fmodep, struct vnode_attr *vap)
353 {
354 struct vnode *vp;
355 struct vnode *dvp;
356 vfs_context_t ctx = ndp->ni_cnd.cn_context;
357 int error;
358 int fmode;
359 uint32_t origcnflags;
360 boolean_t did_create;
361 boolean_t did_open;
362 boolean_t need_vnop_open;
363 boolean_t batched;
364 boolean_t ref_failed;
365
366 again:
367 vp = NULL;
368 dvp = NULL;
369 batched = FALSE;
370 did_create = FALSE;
371 need_vnop_open = TRUE;
372 ref_failed = FALSE;
373 fmode = *fmodep;
374 origcnflags = ndp->ni_cnd.cn_flags;
375
376 /*
377 * O_CREAT
378 */
379 if (fmode & O_CREAT) {
380 if ( (fmode & O_DIRECTORY) ) {
381 error = EINVAL;
382 goto out;
383 }
384 ndp->ni_cnd.cn_nameiop = CREATE;
385 #if CONFIG_TRIGGERS
386 ndp->ni_op = OP_LINK;
387 #endif
388 /* Inherit USEDVP, vnode_open() supported flags only */
389 ndp->ni_cnd.cn_flags &= (USEDVP | NOCROSSMOUNT | DOWHITEOUT);
390 ndp->ni_cnd.cn_flags |= LOCKPARENT | LOCKLEAF | AUDITVNPATH1;
391 ndp->ni_flag = NAMEI_COMPOUNDOPEN;
392 #if NAMEDRSRCFORK
393 /* open calls are allowed for resource forks. */
394 ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
395 #endif
396 if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0 && (origcnflags & FOLLOW) != 0)
397 ndp->ni_cnd.cn_flags |= FOLLOW;
398
399 continue_create_lookup:
400 if ( (error = namei(ndp)) )
401 goto out;
402
403 dvp = ndp->ni_dvp;
404 vp = ndp->ni_vp;
405
406 batched = vnode_compound_open_available(dvp);
407
408 /* not found, create */
409 if (vp == NULL) {
410 /* must have attributes for a new file */
411 if (vap == NULL) {
412 error = EINVAL;
413 goto out;
414 }
415 /*
416 * Attempt a create. For a system supporting compound VNOPs, we may
417 * find an existing file or create one; in either case, we will already
418 * have the file open and no VNOP_OPEN() will be needed.
419 */
420 error = vn_open_auth_do_create(ndp, vap, fmode, &did_create, &did_open, ctx);
421
422 dvp = ndp->ni_dvp;
423 vp = ndp->ni_vp;
424
425 /*
426 * Detected a node that the filesystem couldn't handle. Don't call
427 * nameidone() yet, because we need that path buffer.
428 */
429 if (error == EKEEPLOOKING) {
430 if (!batched) {
431 panic("EKEEPLOOKING from a filesystem that doesn't support compound VNOPs?");
432 }
433 goto continue_create_lookup;
434 }
435
436 nameidone(ndp);
437 if (dvp) {
438 panic("Shouldn't have a dvp here.");
439 }
440
441 if (error) {
442 /*
443 * Check for a creation or unlink race.
444 */
445 if (((error == EEXIST) && !(fmode & O_EXCL)) ||
446 ((error == ENOENT) && (fmode & O_CREAT))){
447 if (vp)
448 vnode_put(vp);
449 goto again;
450 }
451 goto bad;
452 }
453
454 need_vnop_open = !did_open;
455 }
456 else {
457 if (fmode & O_EXCL)
458 error = EEXIST;
459
460 /*
461 * We have a vnode. Use compound open if available
462 * or else fall through to "traditional" path. Note: can't
463 * do a compound open for root, because the parent belongs
464 * to a different FS.
465 */
466 if (error == 0 && batched && (vnode_mount(dvp) == vnode_mount(vp))) {
467 error = VNOP_COMPOUND_OPEN(dvp, &ndp->ni_vp, ndp, 0, fmode, NULL, NULL, ctx);
468
469 if (error == 0) {
470 vp = ndp->ni_vp;
471 need_vnop_open = FALSE;
472 } else if (error == EKEEPLOOKING) {
473 if ((ndp->ni_flag & NAMEI_CONTLOOKUP) == 0) {
474 panic("EKEEPLOOKING, but continue flag not set?");
475 }
476 goto continue_create_lookup;
477 }
478 }
479 nameidone(ndp);
480 vnode_put(dvp);
481 ndp->ni_dvp = NULLVP;
482
483 if (error) {
484 goto bad;
485 }
486
487 fmode &= ~O_CREAT;
488
489 /* Fall through */
490 }
491 } else {
492 /*
493 * Not O_CREAT
494 */
495 ndp->ni_cnd.cn_nameiop = LOOKUP;
496 /* Inherit USEDVP, vnode_open() supported flags only */
497 ndp->ni_cnd.cn_flags &= (USEDVP | NOCROSSMOUNT | DOWHITEOUT);
498 ndp->ni_cnd.cn_flags |= FOLLOW | LOCKLEAF | AUDITVNPATH1 | WANTPARENT;
499 #if NAMEDRSRCFORK
500 /* open calls are allowed for resource forks. */
501 ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
502 #endif
503 ndp->ni_flag = NAMEI_COMPOUNDOPEN;
504
505 /* preserve NOFOLLOW from vnode_open() */
506 if (fmode & O_NOFOLLOW || fmode & O_SYMLINK || (origcnflags & FOLLOW) == 0) {
507 ndp->ni_cnd.cn_flags &= ~FOLLOW;
508 }
509
510 /* Do a lookup, possibly going directly to filesystem for compound operation */
511 do {
512 if ( (error = namei(ndp)) )
513 goto out;
514 vp = ndp->ni_vp;
515 dvp = ndp->ni_dvp;
516
517 /* Check for batched lookup-open */
518 batched = vnode_compound_open_available(dvp);
519 if (batched && ((vp == NULLVP) || (vnode_mount(dvp) == vnode_mount(vp)))) {
520 error = VNOP_COMPOUND_OPEN(dvp, &ndp->ni_vp, ndp, 0, fmode, NULL, NULL, ctx);
521 vp = ndp->ni_vp;
522 if (error == 0) {
523 need_vnop_open = FALSE;
524 } else if (error == EKEEPLOOKING) {
525 if ((ndp->ni_flag & NAMEI_CONTLOOKUP) == 0) {
526 panic("EKEEPLOOKING, but continue flag not set?");
527 }
528 }
529 }
530 } while (error == EKEEPLOOKING);
531
532 nameidone(ndp);
533 vnode_put(dvp);
534 ndp->ni_dvp = NULLVP;
535
536 if (error) {
537 goto bad;
538 }
539 }
540
541 /*
542 * By this point, nameidone() is called, dvp iocount is dropped,
543 * and dvp pointer is cleared.
544 */
545 if (ndp->ni_dvp != NULLVP) {
546 panic("Haven't cleaned up adequately in vn_open_auth()");
547 }
548
549 /*
550 * Expect to use this code for filesystems without compound VNOPs, for the root
551 * of a filesystem, which can't be "looked up" in the sense of VNOP_LOOKUP(),
552 * and for shadow files, which do not live on the same filesystems as their "parents."
553 */
554 if (need_vnop_open) {
555 if (batched && !vnode_isvroot(vp) && !vnode_isnamedstream(vp)) {
556 panic("Why am I trying to use VNOP_OPEN() on anything other than the root or a named stream?");
557 }
558
559 if (!did_create) {
560 error = vn_authorize_open_existing(vp, &ndp->ni_cnd, fmode, ctx, NULL);
561 if (error) {
562 goto bad;
563 }
564 }
565
566 #if CONFIG_PROTECT
567 /*
568 * Perform any content protection access checks prior to calling
569 * into the filesystem, if the raw encrypted mode was not
570 * requested.
571 *
572 * If the va_dataprotect_flags are NOT active, or if they are,
573 * but they do not have the VA_DP_RAWENCRYPTED bit set, then we need
574 * to perform the checks.
575 */
576 if (!(VATTR_IS_ACTIVE (vap, va_dataprotect_flags)) ||
577 ((vap->va_dataprotect_flags & VA_DP_RAWENCRYPTED) == 0)) {
578 error = cp_handle_open (vp, fmode);
579 if (error) {
580 goto bad;
581 }
582 }
583 #endif
584
585 error = VNOP_OPEN(vp, fmode, ctx);
586 if (error) {
587 goto bad;
588 }
589 need_vnop_open = FALSE;
590 }
591
592 // if the vnode is tagged VOPENEVT and the current process
593 // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY
594 // flag to the open mode so that this open won't count against
595 // the vnode when carbon delete() does a vnode_isinuse() to see
596 // if a file is currently in use. this allows spotlight
597 // importers to not interfere with carbon apps that depend on
598 // the no-delete-if-busy semantics of carbon delete().
599 //
600 if (!did_create && (vp->v_flag & VOPENEVT) && (current_proc()->p_flag & P_CHECKOPENEVT)) {
601 fmode |= O_EVTONLY;
602 }
603
604 /*
605 * Grab reference, etc.
606 */
607 error = vn_open_auth_finish(vp, fmode, ctx);
608 if (error) {
609 ref_failed = TRUE;
610 goto bad;
611 }
612
613 /* Compound VNOP open is responsible for doing the truncate */
614 if (batched || did_create)
615 fmode &= ~O_TRUNC;
616
617 *fmodep = fmode;
618 return (0);
619
620 bad:
621 /* Opened either explicitly or by a batched create */
622 if (!need_vnop_open) {
623 VNOP_CLOSE(vp, fmode, ctx);
624 }
625
626 ndp->ni_vp = NULL;
627 if (vp) {
628 #if NAMEDRSRCFORK
629 /* Aggressively recycle shadow files if we error'd out during open() */
630 if ((vnode_isnamedstream(vp)) &&
631 (vp->v_parent != NULLVP) &&
632 (vnode_isshadow(vp))) {
633 vnode_recycle(vp);
634 }
635 #endif
636 vnode_put(vp);
637 /*
638 * Check for a race against unlink. We had a vnode
639 * but according to vnode_authorize or VNOP_OPEN it
640 * no longer exists.
641 *
642 * EREDRIVEOPEN: means that we were hit by the tty allocation race.
643 */
644 if (((error == ENOENT) && (*fmodep & O_CREAT)) || (error == EREDRIVEOPEN) || ref_failed) {
645 goto again;
646 }
647 }
648
649 out:
650 return (error);
651 }
652
653 #if vn_access_DEPRECATED
654 /*
655 * Authorize an action against a vnode. This has been the canonical way to
656 * ensure that the credential/process/etc. referenced by a vfs_context
657 * is granted the rights called out in 'mode' against the vnode 'vp'.
658 *
659 * Unfortunately, the use of VREAD/VWRITE/VEXEC makes it very difficult
660 * to add support for more rights. As such, this interface will be deprecated
661 * and callers will use vnode_authorize instead.
662 */
663 int
664 vn_access(vnode_t vp, int mode, vfs_context_t context)
665 {
666 kauth_action_t action;
667
668 action = 0;
669 if (mode & VREAD)
670 action |= KAUTH_VNODE_READ_DATA;
671 if (mode & VWRITE)
672 action |= KAUTH_VNODE_WRITE_DATA;
673 if (mode & VEXEC)
674 action |= KAUTH_VNODE_EXECUTE;
675
676 return(vnode_authorize(vp, NULL, action, context));
677 }
678 #endif /* vn_access_DEPRECATED */
679
680 /*
681 * Vnode close call
682 */
683 int
684 vn_close(struct vnode *vp, int flags, vfs_context_t ctx)
685 {
686 int error;
687
688 #if NAMEDRSRCFORK
689 /* Sync data from resource fork shadow file if needed. */
690 if ((vp->v_flag & VISNAMEDSTREAM) &&
691 (vp->v_parent != NULLVP) &&
692 vnode_isshadow(vp)) {
693 if (flags & FWASWRITTEN) {
694 (void) vnode_flushnamedstream(vp->v_parent, vp, ctx);
695 }
696 }
697 #endif
698
699 /* work around for foxhound */
700 if (vnode_isspec(vp))
701 (void)vnode_rele_ext(vp, flags, 0);
702
703 error = VNOP_CLOSE(vp, flags, ctx);
704
705 #if CONFIG_FSE
706 if (flags & FWASWRITTEN) {
707 if (need_fsevent(FSE_CONTENT_MODIFIED, vp)) {
708 add_fsevent(FSE_CONTENT_MODIFIED, ctx,
709 FSE_ARG_VNODE, vp,
710 FSE_ARG_DONE);
711 }
712 }
713 #endif
714
715 if (!vnode_isspec(vp))
716 (void)vnode_rele_ext(vp, flags, 0);
717
718 return (error);
719 }
720
721 static int
722 vn_read_swapfile(
723 struct vnode *vp,
724 uio_t uio)
725 {
726 int error;
727 off_t swap_count, this_count;
728 off_t file_end, read_end;
729 off_t prev_resid;
730 char *my_swap_page;
731
732 /*
733 * Reading from a swap file will get you zeroes.
734 */
735
736 my_swap_page = NULL;
737 error = 0;
738 swap_count = uio_resid(uio);
739
740 file_end = ubc_getsize(vp);
741 read_end = uio->uio_offset + uio_resid(uio);
742 if (uio->uio_offset >= file_end) {
743 /* uio starts after end of file: nothing to read */
744 swap_count = 0;
745 } else if (read_end > file_end) {
746 /* uio extends beyond end of file: stop before that */
747 swap_count -= (read_end - file_end);
748 }
749
750 while (swap_count > 0) {
751 if (my_swap_page == NULL) {
752 MALLOC(my_swap_page, char *, PAGE_SIZE,
753 M_TEMP, M_WAITOK);
754 memset(my_swap_page, '\0', PAGE_SIZE);
755 /* add an end-of-line to keep line counters happy */
756 my_swap_page[PAGE_SIZE-1] = '\n';
757 }
758 this_count = swap_count;
759 if (this_count > PAGE_SIZE) {
760 this_count = PAGE_SIZE;
761 }
762
763 prev_resid = uio_resid(uio);
764 error = uiomove((caddr_t) my_swap_page,
765 this_count,
766 uio);
767 if (error) {
768 break;
769 }
770 swap_count -= (prev_resid - uio_resid(uio));
771 }
772 if (my_swap_page != NULL) {
773 FREE(my_swap_page, M_TEMP);
774 my_swap_page = NULL;
775 }
776
777 return error;
778 }
779 /*
780 * Package up an I/O request on a vnode into a uio and do it.
781 */
782 int
783 vn_rdwr(
784 enum uio_rw rw,
785 struct vnode *vp,
786 caddr_t base,
787 int len,
788 off_t offset,
789 enum uio_seg segflg,
790 int ioflg,
791 kauth_cred_t cred,
792 int *aresid,
793 proc_t p)
794 {
795 int64_t resid;
796 int result;
797
798 result = vn_rdwr_64(rw,
799 vp,
800 (uint64_t)(uintptr_t)base,
801 (int64_t)len,
802 offset,
803 segflg,
804 ioflg,
805 cred,
806 &resid,
807 p);
808
809 /* "resid" should be bounded above by "len," which is an int */
810 if (aresid != NULL) {
811 *aresid = resid;
812 }
813
814 return result;
815 }
816
817
818 int
819 vn_rdwr_64(
820 enum uio_rw rw,
821 struct vnode *vp,
822 uint64_t base,
823 int64_t len,
824 off_t offset,
825 enum uio_seg segflg,
826 int ioflg,
827 kauth_cred_t cred,
828 int64_t *aresid,
829 proc_t p)
830 {
831 uio_t auio;
832 int spacetype;
833 struct vfs_context context;
834 int error=0;
835 char uio_buf[ UIO_SIZEOF(1) ];
836
837 context.vc_thread = current_thread();
838 context.vc_ucred = cred;
839
840 if (UIO_SEG_IS_USER_SPACE(segflg)) {
841 spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
842 }
843 else {
844 spacetype = UIO_SYSSPACE;
845 }
846 auio = uio_createwithbuffer(1, offset, spacetype, rw,
847 &uio_buf[0], sizeof(uio_buf));
848 uio_addiov(auio, base, len);
849
850 #if CONFIG_MACF
851 /* XXXMAC
852 * IO_NOAUTH should be re-examined.
853 * Likely that mediation should be performed in caller.
854 */
855 if ((ioflg & IO_NOAUTH) == 0) {
856 /* passed cred is fp->f_cred */
857 if (rw == UIO_READ)
858 error = mac_vnode_check_read(&context, cred, vp);
859 else
860 error = mac_vnode_check_write(&context, cred, vp);
861 }
862 #endif
863
864 if (error == 0) {
865 if (rw == UIO_READ) {
866 if (vnode_isswap(vp)) {
867 error = vn_read_swapfile(vp, auio);
868 } else {
869 error = VNOP_READ(vp, auio, ioflg, &context);
870 }
871 } else {
872 error = VNOP_WRITE(vp, auio, ioflg, &context);
873 }
874 }
875
876 if (aresid)
877 *aresid = uio_resid(auio);
878 else
879 if (uio_resid(auio) && error == 0)
880 error = EIO;
881 return (error);
882 }
883
884 /*
885 * File table vnode read routine.
886 */
887 static int
888 vn_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
889 {
890 struct vnode *vp;
891 int error, ioflag;
892 off_t count;
893
894 vp = (struct vnode *)fp->f_fglob->fg_data;
895 if ( (error = vnode_getwithref(vp)) ) {
896 return(error);
897 }
898
899 #if CONFIG_MACF
900 error = mac_vnode_check_read(ctx, vfs_context_ucred(ctx), vp);
901 if (error) {
902 (void)vnode_put(vp);
903 return (error);
904 }
905 #endif
906
907 /* This signals to VNOP handlers that this read came from a file table read */
908 ioflag = IO_SYSCALL_DISPATCH;
909
910 if (fp->f_fglob->fg_flag & FNONBLOCK)
911 ioflag |= IO_NDELAY;
912 if ((fp->f_fglob->fg_flag & FNOCACHE) || vnode_isnocache(vp))
913 ioflag |= IO_NOCACHE;
914 if (fp->f_fglob->fg_flag & FENCRYPTED) {
915 ioflag |= IO_ENCRYPTED;
916 }
917 if (fp->f_fglob->fg_flag & FNORDAHEAD)
918 ioflag |= IO_RAOFF;
919
920 if ((flags & FOF_OFFSET) == 0)
921 uio->uio_offset = fp->f_fglob->fg_offset;
922 count = uio_resid(uio);
923
924 if (vnode_isswap(vp)) {
925 /* special case for swap files */
926 error = vn_read_swapfile(vp, uio);
927 } else {
928 error = VNOP_READ(vp, uio, ioflag, ctx);
929 }
930 if ((flags & FOF_OFFSET) == 0)
931 fp->f_fglob->fg_offset += count - uio_resid(uio);
932
933 (void)vnode_put(vp);
934 return (error);
935 }
936
937
938 /*
939 * File table vnode write routine.
940 */
941 static int
942 vn_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx)
943 {
944 struct vnode *vp;
945 int error, ioflag;
946 off_t count;
947 int clippedsize = 0;
948 int partialwrite=0;
949 int residcount, oldcount;
950 proc_t p = vfs_context_proc(ctx);
951
952 count = 0;
953 vp = (struct vnode *)fp->f_fglob->fg_data;
954 if ( (error = vnode_getwithref(vp)) ) {
955 return(error);
956 }
957
958 #if CONFIG_MACF
959 error = mac_vnode_check_write(ctx, vfs_context_ucred(ctx), vp);
960 if (error) {
961 (void)vnode_put(vp);
962 return (error);
963 }
964 #endif
965
966 /*
967 * IO_SYSCALL_DISPATCH signals to VNOP handlers that this write originated
968 * from a file table write.
969 */
970 ioflag = (IO_UNIT | IO_SYSCALL_DISPATCH);
971
972 if (vp->v_type == VREG && (fp->f_fglob->fg_flag & O_APPEND))
973 ioflag |= IO_APPEND;
974 if (fp->f_fglob->fg_flag & FNONBLOCK)
975 ioflag |= IO_NDELAY;
976 if ((fp->f_fglob->fg_flag & FNOCACHE) || vnode_isnocache(vp))
977 ioflag |= IO_NOCACHE;
978 if (fp->f_fglob->fg_flag & FNODIRECT)
979 ioflag |= IO_NODIRECT;
980 if (fp->f_fglob->fg_flag & FSINGLE_WRITER)
981 ioflag |= IO_SINGLE_WRITER;
982
983 /*
984 * Treat synchronous mounts and O_FSYNC on the fd as equivalent.
985 *
986 * XXX We treat O_DSYNC as O_FSYNC for now, since we can not delay
987 * XXX the non-essential metadata without some additional VFS work;
988 * XXX the intent at this point is to plumb the interface for it.
989 */
990 if ((fp->f_fglob->fg_flag & (O_FSYNC|O_DSYNC)) ||
991 (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS))) {
992 ioflag |= IO_SYNC;
993 }
994
995 if ((flags & FOF_OFFSET) == 0) {
996 uio->uio_offset = fp->f_fglob->fg_offset;
997 count = uio_resid(uio);
998 }
999 if (((flags & FOF_OFFSET) == 0) &&
1000 vfs_context_proc(ctx) && (vp->v_type == VREG) &&
1001 (((rlim_t)(uio->uio_offset + uio_resid(uio)) > p->p_rlimit[RLIMIT_FSIZE].rlim_cur) ||
1002 ((rlim_t)uio_resid(uio) > (p->p_rlimit[RLIMIT_FSIZE].rlim_cur - uio->uio_offset)))) {
1003 /*
1004 * If the requested residual would cause us to go past the
1005 * administrative limit, then we need to adjust the residual
1006 * down to cause fewer bytes than requested to be written. If
1007 * we can't do that (e.g. the residual is already 1 byte),
1008 * then we fail the write with EFBIG.
1009 */
1010 residcount = uio_resid(uio);
1011 if ((rlim_t)(uio->uio_offset + uio_resid(uio)) > p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
1012 clippedsize = (uio->uio_offset + uio_resid(uio)) - p->p_rlimit[RLIMIT_FSIZE].rlim_cur;
1013 } else if ((rlim_t)uio_resid(uio) > (p->p_rlimit[RLIMIT_FSIZE].rlim_cur - uio->uio_offset)) {
1014 clippedsize = (p->p_rlimit[RLIMIT_FSIZE].rlim_cur - uio->uio_offset);
1015 }
1016 if (clippedsize >= residcount) {
1017 psignal(p, SIGXFSZ);
1018 vnode_put(vp);
1019 return (EFBIG);
1020 }
1021 partialwrite = 1;
1022 uio_setresid(uio, residcount-clippedsize);
1023 }
1024 if ((flags & FOF_OFFSET) != 0) {
1025 /* for pwrite, append should be ignored */
1026 ioflag &= ~IO_APPEND;
1027 if (p && (vp->v_type == VREG) &&
1028 ((rlim_t)uio->uio_offset >= p->p_rlimit[RLIMIT_FSIZE].rlim_cur)) {
1029 psignal(p, SIGXFSZ);
1030 vnode_put(vp);
1031 return (EFBIG);
1032 }
1033 if (p && (vp->v_type == VREG) &&
1034 ((rlim_t)(uio->uio_offset + uio_resid(uio)) > p->p_rlimit[RLIMIT_FSIZE].rlim_cur)) {
1035 //Debugger("vn_bwrite:overstepping the bounds");
1036 residcount = uio_resid(uio);
1037 clippedsize = (uio->uio_offset + uio_resid(uio)) - p->p_rlimit[RLIMIT_FSIZE].rlim_cur;
1038 partialwrite = 1;
1039 uio_setresid(uio, residcount-clippedsize);
1040 }
1041 }
1042
1043 error = VNOP_WRITE(vp, uio, ioflag, ctx);
1044
1045 if (partialwrite) {
1046 oldcount = uio_resid(uio);
1047 uio_setresid(uio, oldcount + clippedsize);
1048 }
1049
1050 if ((flags & FOF_OFFSET) == 0) {
1051 if (ioflag & IO_APPEND)
1052 fp->f_fglob->fg_offset = uio->uio_offset;
1053 else
1054 fp->f_fglob->fg_offset += count - uio_resid(uio);
1055 }
1056
1057 /*
1058 * Set the credentials on successful writes
1059 */
1060 if ((error == 0) && (vp->v_tag == VT_NFS) && (UBCINFOEXISTS(vp))) {
1061 /*
1062 * When called from aio subsystem, we only have the proc from
1063 * which to get the credential, at this point, so use that
1064 * instead. This means aio functions are incompatible with
1065 * per-thread credentials (aio operations are proxied). We
1066 * can't easily correct the aio vs. settid race in this case
1067 * anyway, so we disallow it.
1068 */
1069 if ((flags & FOF_PCRED) == 0) {
1070 ubc_setthreadcred(vp, p, current_thread());
1071 } else {
1072 ubc_setcred(vp, p);
1073 }
1074 }
1075 (void)vnode_put(vp);
1076 return (error);
1077 }
1078
1079 /*
1080 * File table vnode stat routine.
1081 *
1082 * Returns: 0 Success
1083 * EBADF
1084 * ENOMEM
1085 * vnode_getattr:???
1086 */
1087 int
1088 vn_stat_noauth(struct vnode *vp, void *sbptr, kauth_filesec_t *xsec, int isstat64, vfs_context_t ctx)
1089 {
1090 struct vnode_attr va;
1091 int error;
1092 u_short mode;
1093 kauth_filesec_t fsec;
1094 struct stat *sb = (struct stat *)0; /* warning avoidance ; protected by isstat64 */
1095 struct stat64 * sb64 = (struct stat64 *)0; /* warning avoidance ; protected by isstat64 */
1096
1097 if (isstat64 != 0)
1098 sb64 = (struct stat64 *)sbptr;
1099 else
1100 sb = (struct stat *)sbptr;
1101 memset(&va, 0, sizeof(va));
1102 VATTR_INIT(&va);
1103 VATTR_WANTED(&va, va_fsid);
1104 VATTR_WANTED(&va, va_fileid);
1105 VATTR_WANTED(&va, va_mode);
1106 VATTR_WANTED(&va, va_type);
1107 VATTR_WANTED(&va, va_nlink);
1108 VATTR_WANTED(&va, va_uid);
1109 VATTR_WANTED(&va, va_gid);
1110 VATTR_WANTED(&va, va_rdev);
1111 VATTR_WANTED(&va, va_data_size);
1112 VATTR_WANTED(&va, va_access_time);
1113 VATTR_WANTED(&va, va_modify_time);
1114 VATTR_WANTED(&va, va_change_time);
1115 VATTR_WANTED(&va, va_create_time);
1116 VATTR_WANTED(&va, va_flags);
1117 VATTR_WANTED(&va, va_gen);
1118 VATTR_WANTED(&va, va_iosize);
1119 /* lower layers will synthesise va_total_alloc from va_data_size if required */
1120 VATTR_WANTED(&va, va_total_alloc);
1121 if (xsec != NULL) {
1122 VATTR_WANTED(&va, va_uuuid);
1123 VATTR_WANTED(&va, va_guuid);
1124 VATTR_WANTED(&va, va_acl);
1125 }
1126 error = vnode_getattr(vp, &va, ctx);
1127 if (error)
1128 goto out;
1129 /*
1130 * Copy from vattr table
1131 */
1132 if (isstat64 != 0) {
1133 sb64->st_dev = va.va_fsid;
1134 sb64->st_ino = (ino64_t)va.va_fileid;
1135
1136 } else {
1137 sb->st_dev = va.va_fsid;
1138 sb->st_ino = (ino_t)va.va_fileid;
1139 }
1140 mode = va.va_mode;
1141 switch (vp->v_type) {
1142 case VREG:
1143 mode |= S_IFREG;
1144 break;
1145 case VDIR:
1146 mode |= S_IFDIR;
1147 break;
1148 case VBLK:
1149 mode |= S_IFBLK;
1150 break;
1151 case VCHR:
1152 mode |= S_IFCHR;
1153 break;
1154 case VLNK:
1155 mode |= S_IFLNK;
1156 break;
1157 case VSOCK:
1158 mode |= S_IFSOCK;
1159 break;
1160 case VFIFO:
1161 mode |= S_IFIFO;
1162 break;
1163 default:
1164 error = EBADF;
1165 goto out;
1166 };
1167 if (isstat64 != 0) {
1168 sb64->st_mode = mode;
1169 sb64->st_nlink = VATTR_IS_SUPPORTED(&va, va_nlink) ? (u_int16_t)va.va_nlink : 1;
1170 sb64->st_uid = va.va_uid;
1171 sb64->st_gid = va.va_gid;
1172 sb64->st_rdev = va.va_rdev;
1173 sb64->st_size = va.va_data_size;
1174 sb64->st_atimespec = va.va_access_time;
1175 sb64->st_mtimespec = va.va_modify_time;
1176 sb64->st_ctimespec = va.va_change_time;
1177 sb64->st_birthtimespec =
1178 VATTR_IS_SUPPORTED(&va, va_create_time) ? va.va_create_time : va.va_change_time;
1179 sb64->st_blksize = va.va_iosize;
1180 sb64->st_flags = va.va_flags;
1181 sb64->st_blocks = roundup(va.va_total_alloc, 512) / 512;
1182 } else {
1183 sb->st_mode = mode;
1184 sb->st_nlink = VATTR_IS_SUPPORTED(&va, va_nlink) ? (u_int16_t)va.va_nlink : 1;
1185 sb->st_uid = va.va_uid;
1186 sb->st_gid = va.va_gid;
1187 sb->st_rdev = va.va_rdev;
1188 sb->st_size = va.va_data_size;
1189 sb->st_atimespec = va.va_access_time;
1190 sb->st_mtimespec = va.va_modify_time;
1191 sb->st_ctimespec = va.va_change_time;
1192 sb->st_blksize = va.va_iosize;
1193 sb->st_flags = va.va_flags;
1194 sb->st_blocks = roundup(va.va_total_alloc, 512) / 512;
1195 }
1196
1197 /* if we're interested in extended security data and we got an ACL */
1198 if (xsec != NULL) {
1199 if (!VATTR_IS_SUPPORTED(&va, va_acl) &&
1200 !VATTR_IS_SUPPORTED(&va, va_uuuid) &&
1201 !VATTR_IS_SUPPORTED(&va, va_guuid)) {
1202 *xsec = KAUTH_FILESEC_NONE;
1203 } else {
1204
1205 if (VATTR_IS_SUPPORTED(&va, va_acl) && (va.va_acl != NULL)) {
1206 fsec = kauth_filesec_alloc(va.va_acl->acl_entrycount);
1207 } else {
1208 fsec = kauth_filesec_alloc(0);
1209 }
1210 if (fsec == NULL) {
1211 error = ENOMEM;
1212 goto out;
1213 }
1214 fsec->fsec_magic = KAUTH_FILESEC_MAGIC;
1215 if (VATTR_IS_SUPPORTED(&va, va_uuuid)) {
1216 fsec->fsec_owner = va.va_uuuid;
1217 } else {
1218 fsec->fsec_owner = kauth_null_guid;
1219 }
1220 if (VATTR_IS_SUPPORTED(&va, va_guuid)) {
1221 fsec->fsec_group = va.va_guuid;
1222 } else {
1223 fsec->fsec_group = kauth_null_guid;
1224 }
1225 if (VATTR_IS_SUPPORTED(&va, va_acl) && (va.va_acl != NULL)) {
1226 bcopy(va.va_acl, &(fsec->fsec_acl), KAUTH_ACL_COPYSIZE(va.va_acl));
1227 } else {
1228 fsec->fsec_acl.acl_entrycount = KAUTH_FILESEC_NOACL;
1229 }
1230 *xsec = fsec;
1231 }
1232 }
1233
1234 /* Do not give the generation number out to unpriviledged users */
1235 if (va.va_gen && !vfs_context_issuser(ctx)) {
1236 if (isstat64 != 0)
1237 sb64->st_gen = 0;
1238 else
1239 sb->st_gen = 0;
1240 } else {
1241 if (isstat64 != 0)
1242 sb64->st_gen = va.va_gen;
1243 else
1244 sb->st_gen = va.va_gen;
1245 }
1246
1247 error = 0;
1248 out:
1249 if (VATTR_IS_SUPPORTED(&va, va_acl) && va.va_acl != NULL)
1250 kauth_acl_free(va.va_acl);
1251 return (error);
1252 }
1253
1254 int
1255 vn_stat(struct vnode *vp, void *sb, kauth_filesec_t *xsec, int isstat64, vfs_context_t ctx)
1256 {
1257 int error;
1258
1259 #if CONFIG_MACF
1260 error = mac_vnode_check_stat(ctx, NOCRED, vp);
1261 if (error)
1262 return (error);
1263 #endif
1264
1265 /* authorize */
1266 if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_ATTRIBUTES | KAUTH_VNODE_READ_SECURITY, ctx)) != 0)
1267 return(error);
1268
1269 /* actual stat */
1270 return(vn_stat_noauth(vp, sb, xsec, isstat64, ctx));
1271 }
1272
1273
1274 /*
1275 * File table vnode ioctl routine.
1276 */
1277 static int
1278 vn_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx)
1279 {
1280 struct vnode *vp = ((struct vnode *)fp->f_fglob->fg_data);
1281 off_t file_size;
1282 int error;
1283 struct vnode *ttyvp;
1284 int funnel_state;
1285 struct session * sessp;
1286
1287 if ( (error = vnode_getwithref(vp)) ) {
1288 return(error);
1289 }
1290
1291 #if CONFIG_MACF
1292 error = mac_vnode_check_ioctl(ctx, vp, com);
1293 if (error)
1294 goto out;
1295 #endif
1296
1297 switch (vp->v_type) {
1298 case VREG:
1299 case VDIR:
1300 if (com == FIONREAD) {
1301 if ((error = vnode_size(vp, &file_size, ctx)) != 0)
1302 goto out;
1303 *(int *)data = file_size - fp->f_fglob->fg_offset;
1304 goto out;
1305 }
1306 if (com == FIONBIO || com == FIOASYNC) { /* XXX */
1307 goto out;
1308 }
1309 /* fall into ... */
1310
1311 default:
1312 error = ENOTTY;
1313 goto out;
1314
1315 case VFIFO:
1316 case VCHR:
1317 case VBLK:
1318
1319 /* Should not be able to set block size from user space */
1320 if (com == DKIOCSETBLOCKSIZE) {
1321 error = EPERM;
1322 goto out;
1323 }
1324
1325 if (com == FIODTYPE) {
1326 if (vp->v_type == VBLK) {
1327 if (major(vp->v_rdev) >= nblkdev) {
1328 error = ENXIO;
1329 goto out;
1330 }
1331 *(int *)data = D_TYPEMASK & bdevsw[major(vp->v_rdev)].d_type;
1332
1333 } else if (vp->v_type == VCHR) {
1334 if (major(vp->v_rdev) >= nchrdev) {
1335 error = ENXIO;
1336 goto out;
1337 }
1338 *(int *)data = D_TYPEMASK & cdevsw[major(vp->v_rdev)].d_type;
1339 } else {
1340 error = ENOTTY;
1341 goto out;
1342 }
1343 goto out;
1344 }
1345 error = VNOP_IOCTL(vp, com, data, fp->f_fglob->fg_flag, ctx);
1346
1347 if (error == 0 && com == TIOCSCTTY) {
1348 error = vnode_ref_ext(vp, 0, VNODE_REF_FORCE);
1349 if (error != 0) {
1350 panic("vnode_ref_ext() failed despite VNODE_REF_FORCE?!");
1351 }
1352
1353 funnel_state = thread_funnel_set(kernel_flock, TRUE);
1354 sessp = proc_session(vfs_context_proc(ctx));
1355
1356 session_lock(sessp);
1357 ttyvp = sessp->s_ttyvp;
1358 sessp->s_ttyvp = vp;
1359 sessp->s_ttyvid = vnode_vid(vp);
1360 session_unlock(sessp);
1361 session_rele(sessp);
1362 thread_funnel_set(kernel_flock, funnel_state);
1363
1364 if (ttyvp)
1365 vnode_rele(ttyvp);
1366 }
1367 }
1368 out:
1369 (void)vnode_put(vp);
1370 return(error);
1371 }
1372
1373 /*
1374 * File table vnode select routine.
1375 */
1376 static int
1377 vn_select(struct fileproc *fp, int which, void *wql, __unused vfs_context_t ctx)
1378 {
1379 int error;
1380 struct vnode * vp = (struct vnode *)fp->f_fglob->fg_data;
1381 struct vfs_context context;
1382
1383 if ( (error = vnode_getwithref(vp)) == 0 ) {
1384 context.vc_thread = current_thread();
1385 context.vc_ucred = fp->f_fglob->fg_cred;
1386
1387 #if CONFIG_MACF
1388 /*
1389 * XXX We should use a per thread credential here; minimally,
1390 * XXX the process credential should have a persistent
1391 * XXX reference on it before being passed in here.
1392 */
1393 error = mac_vnode_check_select(ctx, vp, which);
1394 if (error == 0)
1395 #endif
1396 error = VNOP_SELECT(vp, which, fp->f_fglob->fg_flag, wql, ctx);
1397
1398 (void)vnode_put(vp);
1399 }
1400 return(error);
1401
1402 }
1403
1404 /*
1405 * File table vnode close routine.
1406 */
1407 static int
1408 vn_closefile(struct fileglob *fg, vfs_context_t ctx)
1409 {
1410 struct vnode *vp = (struct vnode *)fg->fg_data;
1411 int error;
1412 struct flock lf;
1413
1414 if ( (error = vnode_getwithref(vp)) == 0 ) {
1415
1416 if ((fg->fg_flag & FHASLOCK) && fg->fg_type == DTYPE_VNODE) {
1417 lf.l_whence = SEEK_SET;
1418 lf.l_start = 0;
1419 lf.l_len = 0;
1420 lf.l_type = F_UNLCK;
1421
1422 (void)VNOP_ADVLOCK(vp, (caddr_t)fg, F_UNLCK, &lf, F_FLOCK, ctx);
1423 }
1424 error = vn_close(vp, fg->fg_flag, ctx);
1425
1426 (void)vnode_put(vp);
1427 }
1428 return(error);
1429 }
1430
1431 /*
1432 * Returns: 0 Success
1433 * VNOP_PATHCONF:???
1434 */
1435 int
1436 vn_pathconf(vnode_t vp, int name, int32_t *retval, vfs_context_t ctx)
1437 {
1438 int error = 0;
1439 struct vfs_attr vfa;
1440
1441 switch(name) {
1442 case _PC_EXTENDED_SECURITY_NP:
1443 *retval = vfs_extendedsecurity(vnode_mount(vp)) ? 1 : 0;
1444 break;
1445 case _PC_AUTH_OPAQUE_NP:
1446 *retval = vfs_authopaque(vnode_mount(vp));
1447 break;
1448 case _PC_2_SYMLINKS:
1449 *retval = 1; /* XXX NOTSUP on MSDOS, etc. */
1450 break;
1451 case _PC_ALLOC_SIZE_MIN:
1452 *retval = 1; /* XXX lie: 1 byte */
1453 break;
1454 case _PC_ASYNC_IO: /* unistd.h: _POSIX_ASYNCHRONUS_IO */
1455 *retval = 1; /* [AIO] option is supported */
1456 break;
1457 case _PC_PRIO_IO: /* unistd.h: _POSIX_PRIORITIZED_IO */
1458 *retval = 0; /* [PIO] option is not supported */
1459 break;
1460 case _PC_REC_INCR_XFER_SIZE:
1461 *retval = 4096; /* XXX go from MIN to MAX 4K at a time */
1462 break;
1463 case _PC_REC_MIN_XFER_SIZE:
1464 *retval = 4096; /* XXX recommend 4K minimum reads/writes */
1465 break;
1466 case _PC_REC_MAX_XFER_SIZE:
1467 *retval = 65536; /* XXX recommend 64K maximum reads/writes */
1468 break;
1469 case _PC_REC_XFER_ALIGN:
1470 *retval = 4096; /* XXX recommend page aligned buffers */
1471 break;
1472 case _PC_SYMLINK_MAX:
1473 *retval = 255; /* Minimum acceptable POSIX value */
1474 break;
1475 case _PC_SYNC_IO: /* unistd.h: _POSIX_SYNCHRONIZED_IO */
1476 *retval = 0; /* [SIO] option is not supported */
1477 break;
1478 case _PC_XATTR_SIZE_BITS:
1479 /* The number of bits used to store maximum extended
1480 * attribute size in bytes. For example, if the maximum
1481 * attribute size supported by a file system is 128K, the
1482 * value returned will be 18. However a value 18 can mean
1483 * that the maximum attribute size can be anywhere from
1484 * (256KB - 1) to 128KB. As a special case, the resource
1485 * fork can have much larger size, and some file system
1486 * specific extended attributes can have smaller and preset
1487 * size; for example, Finder Info is always 32 bytes.
1488 */
1489 memset(&vfa, 0, sizeof(vfa));
1490 VFSATTR_INIT(&vfa);
1491 VFSATTR_WANTED(&vfa, f_capabilities);
1492 if (vfs_getattr(vnode_mount(vp), &vfa, ctx) == 0 &&
1493 (VFSATTR_IS_SUPPORTED(&vfa, f_capabilities)) &&
1494 (vfa.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR) &&
1495 (vfa.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR)) {
1496 /* Supports native extended attributes */
1497 error = VNOP_PATHCONF(vp, name, retval, ctx);
1498 } else {
1499 /* Number of bits used to represent the maximum size of
1500 * extended attribute stored in an Apple Double file.
1501 */
1502 *retval = AD_XATTR_SIZE_BITS;
1503 }
1504 break;
1505 default:
1506 error = VNOP_PATHCONF(vp, name, retval, ctx);
1507 break;
1508 }
1509
1510 return (error);
1511 }
1512
1513 static int
1514 vn_kqfilt_add(struct fileproc *fp, struct knote *kn, vfs_context_t ctx)
1515 {
1516 int error;
1517 struct vnode *vp;
1518
1519 vp = (struct vnode *)fp->f_fglob->fg_data;
1520
1521 /*
1522 * Don't attach a knote to a dead vnode.
1523 */
1524 if ((error = vget_internal(vp, 0, VNODE_NODEAD)) == 0) {
1525 switch (kn->kn_filter) {
1526 case EVFILT_READ:
1527 case EVFILT_WRITE:
1528 if (vnode_isfifo(vp)) {
1529 /* We'll only watch FIFOs that use our fifofs */
1530 if (!(vp->v_fifoinfo && vp->v_fifoinfo->fi_readsock)) {
1531 error = ENOTSUP;
1532 }
1533
1534 } else if (!vnode_isreg(vp)) {
1535 if (vnode_ischr(vp) &&
1536 (error = spec_kqfilter(vp, kn)) == 0) {
1537 /* claimed by a special device */
1538 vnode_put(vp);
1539 return 0;
1540 }
1541
1542 error = EINVAL;
1543 }
1544 break;
1545 case EVFILT_VNODE:
1546 break;
1547 default:
1548 error = EINVAL;
1549 }
1550
1551 if (error) {
1552 vnode_put(vp);
1553 return error;
1554 }
1555
1556 #if CONFIG_MACF
1557 error = mac_vnode_check_kqfilter(ctx, fp->f_fglob->fg_cred, kn, vp);
1558 if (error) {
1559 vnode_put(vp);
1560 return error;
1561 }
1562 #endif
1563
1564 kn->kn_hook = (void*)vp;
1565 kn->kn_hookid = vnode_vid(vp);
1566 kn->kn_fop = &vnode_filtops;
1567
1568 vnode_lock(vp);
1569 KNOTE_ATTACH(&vp->v_knotes, kn);
1570 vnode_unlock(vp);
1571
1572 /* Ask the filesystem to provide remove notifications, but ignore failure */
1573 VNOP_MONITOR(vp, 0, VNODE_MONITOR_BEGIN, (void*) kn, ctx);
1574
1575 vnode_put(vp);
1576 }
1577
1578 return (error);
1579 }
1580
1581 static void
1582 filt_vndetach(struct knote *kn)
1583 {
1584 vfs_context_t ctx = vfs_context_current();
1585 struct vnode *vp;
1586 vp = (struct vnode *)kn->kn_hook;
1587 if (vnode_getwithvid(vp, kn->kn_hookid))
1588 return;
1589
1590 vnode_lock(vp);
1591 KNOTE_DETACH(&vp->v_knotes, kn);
1592 vnode_unlock(vp);
1593
1594 /*
1595 * Tell a (generally networked) filesystem that we're no longer watching
1596 * If the FS wants to track contexts, it should still be using the one from
1597 * the VNODE_MONITOR_BEGIN.
1598 */
1599 VNOP_MONITOR(vp, 0, VNODE_MONITOR_END, (void*)kn, ctx);
1600 vnode_put(vp);
1601 }
1602
1603
1604 /*
1605 * Used for EVFILT_READ
1606 *
1607 * Takes only VFIFO or VREG. vnode is locked. We handle the "poll" case
1608 * differently than the regular case for VREG files. If not in poll(),
1609 * then we need to know current fileproc offset for VREG.
1610 */
1611 static intptr_t
1612 vnode_readable_data_count(vnode_t vp, off_t current_offset, int ispoll)
1613 {
1614 if (vnode_isfifo(vp)) {
1615 int cnt;
1616 int err = fifo_charcount(vp, &cnt);
1617 if (err == 0) {
1618 return (intptr_t)cnt;
1619 } else {
1620 return (intptr_t)0;
1621 }
1622 } else if (vnode_isreg(vp)) {
1623 if (ispoll) {
1624 return (intptr_t)1;
1625 }
1626
1627 off_t amount;
1628 amount = vp->v_un.vu_ubcinfo->ui_size - current_offset;
1629 if (amount > (off_t)INTPTR_MAX) {
1630 return INTPTR_MAX;
1631 } else if (amount < (off_t)INTPTR_MIN) {
1632 return INTPTR_MIN;
1633 } else {
1634 return (intptr_t)amount;
1635 }
1636 } else {
1637 panic("Should never have an EVFILT_READ except for reg or fifo.");
1638 return 0;
1639 }
1640 }
1641
1642 /*
1643 * Used for EVFILT_WRITE.
1644 *
1645 * For regular vnodes, we can always write (1). For named pipes,
1646 * see how much space there is in the buffer. Nothing else is covered.
1647 */
1648 static intptr_t
1649 vnode_writable_space_count(vnode_t vp)
1650 {
1651 if (vnode_isfifo(vp)) {
1652 long spc;
1653 int err = fifo_freespace(vp, &spc);
1654 if (err == 0) {
1655 return (intptr_t)spc;
1656 } else {
1657 return (intptr_t)0;
1658 }
1659 } else if (vnode_isreg(vp)) {
1660 return (intptr_t)1;
1661 } else {
1662 panic("Should never have an EVFILT_READ except for reg or fifo.");
1663 return 0;
1664 }
1665 }
1666
1667 /*
1668 * Determine whether this knote should be active
1669 *
1670 * This is kind of subtle.
1671 * --First, notice if the vnode has been revoked: in so, override hint
1672 * --EVFILT_READ knotes are checked no matter what the hint is
1673 * --Other knotes activate based on hint.
1674 * --If hint is revoke, set special flags and activate
1675 */
1676 static int
1677 filt_vnode(struct knote *kn, long hint)
1678 {
1679 vnode_t vp = (struct vnode *)kn->kn_hook;
1680 int activate = 0;
1681 long orig_hint = hint;
1682
1683 if (0 == hint) {
1684 vnode_lock(vp);
1685
1686 if (vnode_getiocount(vp, kn->kn_hookid, VNODE_NODEAD | VNODE_WITHID) != 0) {
1687 /* Is recycled */
1688 hint = NOTE_REVOKE;
1689 }
1690 } else {
1691 lck_mtx_assert(&vp->v_lock, LCK_MTX_ASSERT_OWNED);
1692 }
1693
1694 /* Special handling for vnodes that are in recycle or already gone */
1695 if (NOTE_REVOKE == hint) {
1696 kn->kn_flags |= (EV_EOF | EV_ONESHOT);
1697 activate = 1;
1698
1699 if ((kn->kn_filter == EVFILT_VNODE) && (kn->kn_sfflags & NOTE_REVOKE)) {
1700 kn->kn_fflags |= NOTE_REVOKE;
1701 }
1702 } else {
1703 switch(kn->kn_filter) {
1704 case EVFILT_READ:
1705 kn->kn_data = vnode_readable_data_count(vp, kn->kn_fp->f_fglob->fg_offset, (kn->kn_flags & EV_POLL));
1706
1707 if (kn->kn_data != 0) {
1708 activate = 1;
1709 }
1710 break;
1711 case EVFILT_WRITE:
1712 kn->kn_data = vnode_writable_space_count(vp);
1713
1714 if (kn->kn_data != 0) {
1715 activate = 1;
1716 }
1717 break;
1718 case EVFILT_VNODE:
1719 /* Check events this note matches against the hint */
1720 if (kn->kn_sfflags & hint) {
1721 kn->kn_fflags |= hint; /* Set which event occurred */
1722 }
1723 if (kn->kn_fflags != 0) {
1724 activate = 1;
1725 }
1726 break;
1727 default:
1728 panic("Invalid knote filter on a vnode!\n");
1729 }
1730 }
1731
1732 if (orig_hint == 0) {
1733 /*
1734 * Definitely need to unlock, may need to put
1735 */
1736 if (hint == 0) {
1737 vnode_put_locked(vp);
1738 }
1739 vnode_unlock(vp);
1740 }
1741
1742 return (activate);
1743 }