]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Copyright (c) 2000-2014 Apple Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ | |
29 | /* | |
30 | * Copyright (c) 1982, 1986, 1989, 1993 | |
31 | * The Regents of the University of California. All rights reserved. | |
32 | * (c) UNIX System Laboratories, Inc. | |
33 | * All or some portions of this file are derived from material licensed | |
34 | * to the University of California by American Telephone and Telegraph | |
35 | * Co. or Unix System Laboratories, Inc. and are reproduced herein with | |
36 | * the permission of UNIX System Laboratories, Inc. | |
37 | * | |
38 | * Redistribution and use in source and binary forms, with or without | |
39 | * modification, are permitted provided that the following conditions | |
40 | * are met: | |
41 | * 1. Redistributions of source code must retain the above copyright | |
42 | * notice, this list of conditions and the following disclaimer. | |
43 | * 2. Redistributions in binary form must reproduce the above copyright | |
44 | * notice, this list of conditions and the following disclaimer in the | |
45 | * documentation and/or other materials provided with the distribution. | |
46 | * 3. All advertising materials mentioning features or use of this software | |
47 | * must display the following acknowledgement: | |
48 | * This product includes software developed by the University of | |
49 | * California, Berkeley and its contributors. | |
50 | * 4. Neither the name of the University nor the names of its contributors | |
51 | * may be used to endorse or promote products derived from this software | |
52 | * without specific prior written permission. | |
53 | * | |
54 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
55 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
56 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
57 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
58 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
59 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
60 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
61 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
62 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
63 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
64 | * SUCH DAMAGE. | |
65 | * | |
66 | * @(#)vfs_vnops.c 8.14 (Berkeley) 6/15/95 | |
67 | * | |
68 | */ | |
69 | /* | |
70 | * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce | |
71 | * support for mandatory and extensible security protections. This notice | |
72 | * is included in support of clause 2.2 (b) of the Apple Public License, | |
73 | * Version 2.0. | |
74 | */ | |
75 | ||
76 | #include <sys/param.h> | |
77 | #include <sys/types.h> | |
78 | #include <sys/systm.h> | |
79 | #include <sys/kernel.h> | |
80 | #include <sys/file_internal.h> | |
81 | #include <sys/stat.h> | |
82 | #include <sys/proc_internal.h> | |
83 | #include <sys/kauth.h> | |
84 | #include <sys/mount_internal.h> | |
85 | #include <sys/namei.h> | |
86 | #include <sys/vnode_internal.h> | |
87 | #include <sys/ioctl.h> | |
88 | #include <sys/tty.h> | |
89 | /* Temporary workaround for ubc.h until <rdar://4714366 is resolved */ | |
90 | #define ubc_setcred ubc_setcred_deprecated | |
91 | #include <sys/ubc.h> | |
92 | #undef ubc_setcred | |
93 | int ubc_setcred(struct vnode *, struct proc *); | |
94 | #include <sys/conf.h> | |
95 | #include <sys/disk.h> | |
96 | #include <sys/fsevents.h> | |
97 | #include <sys/kdebug.h> | |
98 | #include <sys/xattr.h> | |
99 | #include <sys/ubc_internal.h> | |
100 | #include <sys/uio_internal.h> | |
101 | #include <sys/resourcevar.h> | |
102 | #include <sys/signalvar.h> | |
103 | ||
104 | #include <vm/vm_kern.h> | |
105 | #include <vm/vm_map.h> | |
106 | ||
107 | #include <miscfs/specfs/specdev.h> | |
108 | #include <miscfs/fifofs/fifo.h> | |
109 | ||
110 | #if CONFIG_MACF | |
111 | #include <security/mac_framework.h> | |
112 | #endif | |
113 | ||
114 | #if CONFIG_PROTECT | |
115 | #include <sys/cprotect.h> | |
116 | #endif | |
117 | ||
118 | extern void sigpup_attach_vnode(vnode_t); /* XXX */ | |
119 | ||
120 | static int vn_closefile(struct fileglob *fp, vfs_context_t ctx); | |
121 | static int vn_ioctl(struct fileproc *fp, u_long com, caddr_t data, | |
122 | vfs_context_t ctx); | |
123 | static int vn_read(struct fileproc *fp, struct uio *uio, int flags, | |
124 | vfs_context_t ctx); | |
125 | static int vn_write(struct fileproc *fp, struct uio *uio, int flags, | |
126 | vfs_context_t ctx); | |
127 | static int vn_select( struct fileproc *fp, int which, void * wql, | |
128 | vfs_context_t ctx); | |
129 | static int vn_kqfilt_add(struct fileproc *fp, struct knote *kn, | |
130 | vfs_context_t ctx); | |
131 | static void filt_vndetach(struct knote *kn); | |
132 | static int filt_vnode(struct knote *kn, long hint); | |
133 | static int vn_open_auth_finish(vnode_t vp, int fmode, vfs_context_t ctx); | |
134 | #if 0 | |
135 | static int vn_kqfilt_remove(struct vnode *vp, uintptr_t ident, | |
136 | vfs_context_t ctx); | |
137 | #endif | |
138 | ||
139 | const struct fileops vnops = { | |
140 | DTYPE_VNODE, | |
141 | vn_read, | |
142 | vn_write, | |
143 | vn_ioctl, | |
144 | vn_select, | |
145 | vn_closefile, | |
146 | vn_kqfilt_add, | |
147 | NULL | |
148 | }; | |
149 | ||
150 | struct filterops vnode_filtops = { | |
151 | .f_isfd = 1, | |
152 | .f_attach = NULL, | |
153 | .f_detach = filt_vndetach, | |
154 | .f_event = filt_vnode | |
155 | }; | |
156 | ||
157 | /* | |
158 | * Common code for vnode open operations. | |
159 | * Check permissions, and call the VNOP_OPEN or VNOP_CREATE routine. | |
160 | * | |
161 | * XXX the profusion of interfaces here is probably a bad thing. | |
162 | */ | |
163 | int | |
164 | vn_open(struct nameidata *ndp, int fmode, int cmode) | |
165 | { | |
166 | return(vn_open_modflags(ndp, &fmode, cmode)); | |
167 | } | |
168 | ||
169 | int | |
170 | vn_open_modflags(struct nameidata *ndp, int *fmodep, int cmode) | |
171 | { | |
172 | struct vnode_attr va; | |
173 | ||
174 | VATTR_INIT(&va); | |
175 | VATTR_SET(&va, va_mode, cmode); | |
176 | ||
177 | return(vn_open_auth(ndp, fmodep, &va)); | |
178 | } | |
179 | ||
180 | static int | |
181 | vn_open_auth_finish(vnode_t vp, int fmode, vfs_context_t ctx) | |
182 | { | |
183 | int error; | |
184 | ||
185 | if ((error = vnode_ref_ext(vp, fmode, 0)) != 0) { | |
186 | goto bad; | |
187 | } | |
188 | ||
189 | /* Call out to allow 3rd party notification of open. | |
190 | * Ignore result of kauth_authorize_fileop call. | |
191 | */ | |
192 | #if CONFIG_MACF | |
193 | mac_vnode_notify_open(ctx, vp, fmode); | |
194 | #endif | |
195 | kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN, | |
196 | (uintptr_t)vp, 0); | |
197 | ||
198 | sigpup_attach_vnode(vp); | |
199 | ||
200 | return 0; | |
201 | ||
202 | bad: | |
203 | return error; | |
204 | ||
205 | } | |
206 | ||
207 | /* | |
208 | * May do nameidone() to allow safely adding an FSEvent. Cue off of ni_dvp to | |
209 | * determine whether that has happened. | |
210 | */ | |
211 | static int | |
212 | vn_open_auth_do_create(struct nameidata *ndp, struct vnode_attr *vap, int fmode, boolean_t *did_create, boolean_t *did_open, vfs_context_t ctx) | |
213 | { | |
214 | uint32_t status = 0; | |
215 | vnode_t dvp = ndp->ni_dvp; | |
216 | int batched; | |
217 | int error; | |
218 | vnode_t vp; | |
219 | ||
220 | batched = vnode_compound_open_available(ndp->ni_dvp); | |
221 | *did_open = FALSE; | |
222 | ||
223 | VATTR_SET(vap, va_type, VREG); | |
224 | if (fmode & O_EXCL) | |
225 | vap->va_vaflags |= VA_EXCLUSIVE; | |
226 | ||
227 | #if NAMEDRSRCFORK | |
228 | if (ndp->ni_cnd.cn_flags & CN_WANTSRSRCFORK) { | |
229 | if ((error = vn_authorize_create(dvp, &ndp->ni_cnd, vap, ctx, NULL)) != 0) | |
230 | goto out; | |
231 | if ((error = vnode_makenamedstream(dvp, &ndp->ni_vp, XATTR_RESOURCEFORK_NAME, 0, ctx)) != 0) | |
232 | goto out; | |
233 | *did_create = TRUE; | |
234 | } else { | |
235 | #endif | |
236 | if (!batched) { | |
237 | if ((error = vn_authorize_create(dvp, &ndp->ni_cnd, vap, ctx, NULL)) != 0) | |
238 | goto out; | |
239 | } | |
240 | ||
241 | error = vn_create(dvp, &ndp->ni_vp, ndp, vap, VN_CREATE_DOOPEN, fmode, &status, ctx); | |
242 | if (error != 0) { | |
243 | if (batched) { | |
244 | *did_create = (status & COMPOUND_OPEN_STATUS_DID_CREATE) ? TRUE : FALSE; | |
245 | } else { | |
246 | *did_create = FALSE; | |
247 | } | |
248 | ||
249 | if (error == EKEEPLOOKING) { | |
250 | if (*did_create) { | |
251 | panic("EKEEPLOOKING, but we did a create?"); | |
252 | } | |
253 | if (!batched) { | |
254 | panic("EKEEPLOOKING from filesystem that doesn't support compound vnops?"); | |
255 | } | |
256 | if ((ndp->ni_flag & NAMEI_CONTLOOKUP) == 0) { | |
257 | panic("EKEEPLOOKING, but continue flag not set?"); | |
258 | } | |
259 | ||
260 | /* | |
261 | * Do NOT drop the dvp: we need everything to continue the lookup. | |
262 | */ | |
263 | return error; | |
264 | } | |
265 | } else { | |
266 | if (batched) { | |
267 | *did_create = (status & COMPOUND_OPEN_STATUS_DID_CREATE) ? 1 : 0; | |
268 | *did_open = TRUE; | |
269 | } else { | |
270 | *did_create = TRUE; | |
271 | } | |
272 | } | |
273 | #if NAMEDRSRCFORK | |
274 | } | |
275 | #endif | |
276 | ||
277 | vp = ndp->ni_vp; | |
278 | ||
279 | if (*did_create) { | |
280 | int update_flags = 0; | |
281 | ||
282 | // Make sure the name & parent pointers are hooked up | |
283 | if (vp->v_name == NULL) | |
284 | update_flags |= VNODE_UPDATE_NAME; | |
285 | if (vp->v_parent == NULLVP) | |
286 | update_flags |= VNODE_UPDATE_PARENT; | |
287 | ||
288 | if (update_flags) | |
289 | vnode_update_identity(vp, dvp, ndp->ni_cnd.cn_nameptr, ndp->ni_cnd.cn_namelen, ndp->ni_cnd.cn_hash, update_flags); | |
290 | ||
291 | vnode_put(dvp); | |
292 | ndp->ni_dvp = NULLVP; | |
293 | ||
294 | #if CONFIG_FSE | |
295 | if (need_fsevent(FSE_CREATE_FILE, vp)) { | |
296 | add_fsevent(FSE_CREATE_FILE, ctx, | |
297 | FSE_ARG_VNODE, vp, | |
298 | FSE_ARG_DONE); | |
299 | } | |
300 | #endif | |
301 | } | |
302 | out: | |
303 | if (ndp->ni_dvp != NULLVP) { | |
304 | vnode_put(dvp); | |
305 | ndp->ni_dvp = NULLVP; | |
306 | } | |
307 | ||
308 | return error; | |
309 | } | |
310 | ||
311 | /* | |
312 | * Open a file with authorization, updating the contents of the structures | |
313 | * pointed to by ndp, fmodep, and vap as necessary to perform the requested | |
314 | * operation. This function is used for both opens of existing files, and | |
315 | * creation of new files. | |
316 | * | |
317 | * Parameters: ndp The nami data pointer describing the | |
318 | * file | |
319 | * fmodep A pointer to an int containg the mode | |
320 | * information to be used for the open | |
321 | * vap A pointer to the vnode attribute | |
322 | * descriptor to be used for the open | |
323 | * | |
324 | * Indirect: * Contents of the data structures pointed | |
325 | * to by the parameters are modified as | |
326 | * necessary to the requested operation. | |
327 | * | |
328 | * Returns: 0 Success | |
329 | * !0 errno value | |
330 | * | |
331 | * Notes: The kauth_filesec_t in 'vap', if any, is in host byte order. | |
332 | * | |
333 | * The contents of '*ndp' will be modified, based on the other | |
334 | * arguments to this function, and to return file and directory | |
335 | * data necessary to satisfy the requested operation. | |
336 | * | |
337 | * If the file does not exist and we are creating it, then the | |
338 | * O_TRUNC flag will be cleared in '*fmodep' to indicate to the | |
339 | * caller that the file was not truncated. | |
340 | * | |
341 | * If the file exists and the O_EXCL flag was not specified, then | |
342 | * the O_CREAT flag will be cleared in '*fmodep' to indicate to | |
343 | * the caller that the existing file was merely opened rather | |
344 | * than created. | |
345 | * | |
346 | * The contents of '*vap' will be modified as necessary to | |
347 | * complete the operation, including setting of supported | |
348 | * attribute, clearing of fields containing unsupported attributes | |
349 | * in the request, if the request proceeds without them, etc.. | |
350 | * | |
351 | * XXX: This function is too complicated in actings on its arguments | |
352 | * | |
353 | * XXX: We should enummerate the possible errno values here, and where | |
354 | * in the code they originated. | |
355 | */ | |
356 | int | |
357 | vn_open_auth(struct nameidata *ndp, int *fmodep, struct vnode_attr *vap) | |
358 | { | |
359 | struct vnode *vp; | |
360 | struct vnode *dvp; | |
361 | vfs_context_t ctx = ndp->ni_cnd.cn_context; | |
362 | int error; | |
363 | int fmode; | |
364 | uint32_t origcnflags; | |
365 | boolean_t did_create; | |
366 | boolean_t did_open; | |
367 | boolean_t need_vnop_open; | |
368 | boolean_t batched; | |
369 | boolean_t ref_failed; | |
370 | ||
371 | again: | |
372 | vp = NULL; | |
373 | dvp = NULL; | |
374 | batched = FALSE; | |
375 | did_create = FALSE; | |
376 | need_vnop_open = TRUE; | |
377 | ref_failed = FALSE; | |
378 | fmode = *fmodep; | |
379 | origcnflags = ndp->ni_cnd.cn_flags; | |
380 | ||
381 | /* | |
382 | * O_CREAT | |
383 | */ | |
384 | if (fmode & O_CREAT) { | |
385 | if ( (fmode & O_DIRECTORY) ) { | |
386 | error = EINVAL; | |
387 | goto out; | |
388 | } | |
389 | ndp->ni_cnd.cn_nameiop = CREATE; | |
390 | #if CONFIG_TRIGGERS | |
391 | ndp->ni_op = OP_LINK; | |
392 | #endif | |
393 | /* Inherit USEDVP, vnode_open() supported flags only */ | |
394 | ndp->ni_cnd.cn_flags &= (USEDVP | NOCROSSMOUNT); | |
395 | ndp->ni_cnd.cn_flags |= LOCKPARENT | LOCKLEAF | AUDITVNPATH1; | |
396 | ndp->ni_flag = NAMEI_COMPOUNDOPEN; | |
397 | #if NAMEDRSRCFORK | |
398 | /* open calls are allowed for resource forks. */ | |
399 | ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK; | |
400 | #endif | |
401 | if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0 && (origcnflags & FOLLOW) != 0) | |
402 | ndp->ni_cnd.cn_flags |= FOLLOW; | |
403 | ||
404 | continue_create_lookup: | |
405 | if ( (error = namei(ndp)) ) | |
406 | goto out; | |
407 | ||
408 | dvp = ndp->ni_dvp; | |
409 | vp = ndp->ni_vp; | |
410 | ||
411 | batched = vnode_compound_open_available(dvp); | |
412 | ||
413 | /* not found, create */ | |
414 | if (vp == NULL) { | |
415 | /* must have attributes for a new file */ | |
416 | if (vap == NULL) { | |
417 | vnode_put(dvp); | |
418 | error = EINVAL; | |
419 | goto out; | |
420 | } | |
421 | /* | |
422 | * Attempt a create. For a system supporting compound VNOPs, we may | |
423 | * find an existing file or create one; in either case, we will already | |
424 | * have the file open and no VNOP_OPEN() will be needed. | |
425 | */ | |
426 | error = vn_open_auth_do_create(ndp, vap, fmode, &did_create, &did_open, ctx); | |
427 | ||
428 | dvp = ndp->ni_dvp; | |
429 | vp = ndp->ni_vp; | |
430 | ||
431 | /* | |
432 | * Detected a node that the filesystem couldn't handle. Don't call | |
433 | * nameidone() yet, because we need that path buffer. | |
434 | */ | |
435 | if (error == EKEEPLOOKING) { | |
436 | if (!batched) { | |
437 | panic("EKEEPLOOKING from a filesystem that doesn't support compound VNOPs?"); | |
438 | } | |
439 | goto continue_create_lookup; | |
440 | } | |
441 | ||
442 | nameidone(ndp); | |
443 | if (dvp) { | |
444 | panic("Shouldn't have a dvp here."); | |
445 | } | |
446 | ||
447 | if (error) { | |
448 | /* | |
449 | * Check for a creation or unlink race. | |
450 | */ | |
451 | if (((error == EEXIST) && !(fmode & O_EXCL)) || | |
452 | ((error == ENOENT) && (fmode & O_CREAT))){ | |
453 | if (vp) | |
454 | vnode_put(vp); | |
455 | goto again; | |
456 | } | |
457 | goto bad; | |
458 | } | |
459 | ||
460 | need_vnop_open = !did_open; | |
461 | } | |
462 | else { | |
463 | if (fmode & O_EXCL) | |
464 | error = EEXIST; | |
465 | ||
466 | /* | |
467 | * We have a vnode. Use compound open if available | |
468 | * or else fall through to "traditional" path. Note: can't | |
469 | * do a compound open for root, because the parent belongs | |
470 | * to a different FS. | |
471 | */ | |
472 | if (error == 0 && batched && (vnode_mount(dvp) == vnode_mount(vp))) { | |
473 | error = VNOP_COMPOUND_OPEN(dvp, &ndp->ni_vp, ndp, 0, fmode, NULL, NULL, ctx); | |
474 | ||
475 | if (error == 0) { | |
476 | vp = ndp->ni_vp; | |
477 | need_vnop_open = FALSE; | |
478 | } else if (error == EKEEPLOOKING) { | |
479 | if ((ndp->ni_flag & NAMEI_CONTLOOKUP) == 0) { | |
480 | panic("EKEEPLOOKING, but continue flag not set?"); | |
481 | } | |
482 | goto continue_create_lookup; | |
483 | } | |
484 | } | |
485 | nameidone(ndp); | |
486 | vnode_put(dvp); | |
487 | ndp->ni_dvp = NULLVP; | |
488 | ||
489 | if (error) { | |
490 | goto bad; | |
491 | } | |
492 | ||
493 | fmode &= ~O_CREAT; | |
494 | ||
495 | /* Fall through */ | |
496 | } | |
497 | } | |
498 | else { | |
499 | /* | |
500 | * Not O_CREAT | |
501 | */ | |
502 | ndp->ni_cnd.cn_nameiop = LOOKUP; | |
503 | /* Inherit USEDVP, vnode_open() supported flags only */ | |
504 | ndp->ni_cnd.cn_flags &= (USEDVP | NOCROSSMOUNT); | |
505 | ndp->ni_cnd.cn_flags |= FOLLOW | LOCKLEAF | AUDITVNPATH1 | WANTPARENT; | |
506 | #if NAMEDRSRCFORK | |
507 | /* open calls are allowed for resource forks. */ | |
508 | ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK; | |
509 | #endif | |
510 | ndp->ni_flag = NAMEI_COMPOUNDOPEN; | |
511 | ||
512 | /* preserve NOFOLLOW from vnode_open() */ | |
513 | if (fmode & O_NOFOLLOW || fmode & O_SYMLINK || (origcnflags & FOLLOW) == 0) { | |
514 | ndp->ni_cnd.cn_flags &= ~FOLLOW; | |
515 | } | |
516 | ||
517 | /* Do a lookup, possibly going directly to filesystem for compound operation */ | |
518 | do { | |
519 | if ( (error = namei(ndp)) ) | |
520 | goto out; | |
521 | vp = ndp->ni_vp; | |
522 | dvp = ndp->ni_dvp; | |
523 | ||
524 | /* Check for batched lookup-open */ | |
525 | batched = vnode_compound_open_available(dvp); | |
526 | if (batched && ((vp == NULLVP) || (vnode_mount(dvp) == vnode_mount(vp)))) { | |
527 | error = VNOP_COMPOUND_OPEN(dvp, &ndp->ni_vp, ndp, 0, fmode, NULL, NULL, ctx); | |
528 | vp = ndp->ni_vp; | |
529 | if (error == 0) { | |
530 | need_vnop_open = FALSE; | |
531 | } else if (error == EKEEPLOOKING) { | |
532 | if ((ndp->ni_flag & NAMEI_CONTLOOKUP) == 0) { | |
533 | panic("EKEEPLOOKING, but continue flag not set?"); | |
534 | } | |
535 | } | |
536 | } | |
537 | } while (error == EKEEPLOOKING); | |
538 | ||
539 | nameidone(ndp); | |
540 | vnode_put(dvp); | |
541 | ndp->ni_dvp = NULLVP; | |
542 | ||
543 | if (error) { | |
544 | goto bad; | |
545 | } | |
546 | } | |
547 | ||
548 | /* | |
549 | * By this point, nameidone() is called, dvp iocount is dropped, | |
550 | * and dvp pointer is cleared. | |
551 | */ | |
552 | if (ndp->ni_dvp != NULLVP) { | |
553 | panic("Haven't cleaned up adequately in vn_open_auth()"); | |
554 | } | |
555 | ||
556 | /* | |
557 | * Expect to use this code for filesystems without compound VNOPs, for the root | |
558 | * of a filesystem, which can't be "looked up" in the sense of VNOP_LOOKUP(), | |
559 | * and for shadow files, which do not live on the same filesystems as their "parents." | |
560 | */ | |
561 | if (need_vnop_open) { | |
562 | if (batched && !vnode_isvroot(vp) && !vnode_isnamedstream(vp)) { | |
563 | panic("Why am I trying to use VNOP_OPEN() on anything other than the root or a named stream?"); | |
564 | } | |
565 | ||
566 | if (!did_create) { | |
567 | error = vn_authorize_open_existing(vp, &ndp->ni_cnd, fmode, ctx, NULL); | |
568 | if (error) { | |
569 | goto bad; | |
570 | } | |
571 | } | |
572 | ||
573 | #if CONFIG_PROTECT | |
574 | /* | |
575 | * Perform any content protection access checks prior to calling | |
576 | * into the filesystem, if the raw encrypted mode was not | |
577 | * requested. | |
578 | * | |
579 | * If the va_dataprotect_flags are NOT active, or if they are, | |
580 | * but they do not have the VA_DP_RAWENCRYPTED bit set, then we need | |
581 | * to perform the checks. | |
582 | */ | |
583 | if (!(VATTR_IS_ACTIVE (vap, va_dataprotect_flags)) || | |
584 | ((vap->va_dataprotect_flags & VA_DP_RAWENCRYPTED) == 0)) { | |
585 | error = cp_handle_open (vp, fmode); | |
586 | if (error) { | |
587 | goto bad; | |
588 | } | |
589 | } | |
590 | #endif | |
591 | ||
592 | error = VNOP_OPEN(vp, fmode, ctx); | |
593 | if (error) { | |
594 | goto bad; | |
595 | } | |
596 | need_vnop_open = FALSE; | |
597 | } | |
598 | ||
599 | // if the vnode is tagged VOPENEVT and the current process | |
600 | // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY | |
601 | // flag to the open mode so that this open won't count against | |
602 | // the vnode when carbon delete() does a vnode_isinuse() to see | |
603 | // if a file is currently in use. this allows spotlight | |
604 | // importers to not interfere with carbon apps that depend on | |
605 | // the no-delete-if-busy semantics of carbon delete(). | |
606 | // | |
607 | if (!did_create && (vp->v_flag & VOPENEVT) && (current_proc()->p_flag & P_CHECKOPENEVT)) { | |
608 | fmode |= O_EVTONLY; | |
609 | } | |
610 | ||
611 | /* | |
612 | * Grab reference, etc. | |
613 | */ | |
614 | error = vn_open_auth_finish(vp, fmode, ctx); | |
615 | if (error) { | |
616 | ref_failed = TRUE; | |
617 | goto bad; | |
618 | } | |
619 | ||
620 | /* Compound VNOP open is responsible for doing the truncate */ | |
621 | if (batched || did_create) | |
622 | fmode &= ~O_TRUNC; | |
623 | ||
624 | *fmodep = fmode; | |
625 | return (0); | |
626 | ||
627 | bad: | |
628 | /* Opened either explicitly or by a batched create */ | |
629 | if (!need_vnop_open) { | |
630 | VNOP_CLOSE(vp, fmode, ctx); | |
631 | } | |
632 | ||
633 | ndp->ni_vp = NULL; | |
634 | if (vp) { | |
635 | #if NAMEDRSRCFORK | |
636 | /* Aggressively recycle shadow files if we error'd out during open() */ | |
637 | if ((vnode_isnamedstream(vp)) && | |
638 | (vp->v_parent != NULLVP) && | |
639 | (vnode_isshadow(vp))) { | |
640 | vnode_recycle(vp); | |
641 | } | |
642 | #endif | |
643 | vnode_put(vp); | |
644 | /* | |
645 | * Check for a race against unlink. We had a vnode | |
646 | * but according to vnode_authorize or VNOP_OPEN it | |
647 | * no longer exists. | |
648 | * | |
649 | * EREDRIVEOPEN: means that we were hit by the tty allocation race. | |
650 | */ | |
651 | if (((error == ENOENT) && (*fmodep & O_CREAT)) || (error == EREDRIVEOPEN) || ref_failed) { | |
652 | goto again; | |
653 | } | |
654 | } | |
655 | ||
656 | out: | |
657 | return (error); | |
658 | } | |
659 | ||
660 | #if vn_access_DEPRECATED | |
661 | /* | |
662 | * Authorize an action against a vnode. This has been the canonical way to | |
663 | * ensure that the credential/process/etc. referenced by a vfs_context | |
664 | * is granted the rights called out in 'mode' against the vnode 'vp'. | |
665 | * | |
666 | * Unfortunately, the use of VREAD/VWRITE/VEXEC makes it very difficult | |
667 | * to add support for more rights. As such, this interface will be deprecated | |
668 | * and callers will use vnode_authorize instead. | |
669 | */ | |
670 | int | |
671 | vn_access(vnode_t vp, int mode, vfs_context_t context) | |
672 | { | |
673 | kauth_action_t action; | |
674 | ||
675 | action = 0; | |
676 | if (mode & VREAD) | |
677 | action |= KAUTH_VNODE_READ_DATA; | |
678 | if (mode & VWRITE) | |
679 | action |= KAUTH_VNODE_WRITE_DATA; | |
680 | if (mode & VEXEC) | |
681 | action |= KAUTH_VNODE_EXECUTE; | |
682 | ||
683 | return(vnode_authorize(vp, NULL, action, context)); | |
684 | } | |
685 | #endif /* vn_access_DEPRECATED */ | |
686 | ||
687 | /* | |
688 | * Vnode close call | |
689 | */ | |
690 | int | |
691 | vn_close(struct vnode *vp, int flags, vfs_context_t ctx) | |
692 | { | |
693 | int error; | |
694 | int flusherror = 0; | |
695 | ||
696 | #if NAMEDRSRCFORK | |
697 | /* Sync data from resource fork shadow file if needed. */ | |
698 | if ((vp->v_flag & VISNAMEDSTREAM) && | |
699 | (vp->v_parent != NULLVP) && | |
700 | vnode_isshadow(vp)) { | |
701 | if (flags & FWASWRITTEN) { | |
702 | flusherror = vnode_flushnamedstream(vp->v_parent, vp, ctx); | |
703 | } | |
704 | } | |
705 | #endif | |
706 | ||
707 | /* work around for foxhound */ | |
708 | if (vnode_isspec(vp)) | |
709 | (void)vnode_rele_ext(vp, flags, 0); | |
710 | ||
711 | /* | |
712 | * On HFS, we flush when the last writer closes. We do this | |
713 | * because resource fork vnodes hold a reference on data fork | |
714 | * vnodes and that will prevent them from getting VNOP_INACTIVE | |
715 | * which will delay when we flush cached data. In future, we | |
716 | * might find it beneficial to do this for all file systems. | |
717 | * Note that it's OK to access v_writecount without the lock | |
718 | * in this context. | |
719 | */ | |
720 | if (vp->v_tag == VT_HFS && (flags & FWRITE) && vp->v_writecount == 1) | |
721 | VNOP_FSYNC(vp, MNT_NOWAIT, ctx); | |
722 | ||
723 | error = VNOP_CLOSE(vp, flags, ctx); | |
724 | ||
725 | #if CONFIG_FSE | |
726 | if (flags & FWASWRITTEN) { | |
727 | if (need_fsevent(FSE_CONTENT_MODIFIED, vp)) { | |
728 | add_fsevent(FSE_CONTENT_MODIFIED, ctx, | |
729 | FSE_ARG_VNODE, vp, | |
730 | FSE_ARG_DONE); | |
731 | } | |
732 | } | |
733 | #endif | |
734 | ||
735 | if (!vnode_isspec(vp)) | |
736 | (void)vnode_rele_ext(vp, flags, 0); | |
737 | ||
738 | if (flusherror) { | |
739 | error = flusherror; | |
740 | } | |
741 | return (error); | |
742 | } | |
743 | ||
744 | static int | |
745 | vn_read_swapfile( | |
746 | struct vnode *vp, | |
747 | uio_t uio) | |
748 | { | |
749 | int error; | |
750 | off_t swap_count, this_count; | |
751 | off_t file_end, read_end; | |
752 | off_t prev_resid; | |
753 | char *my_swap_page; | |
754 | ||
755 | /* | |
756 | * Reading from a swap file will get you zeroes. | |
757 | */ | |
758 | ||
759 | my_swap_page = NULL; | |
760 | error = 0; | |
761 | swap_count = uio_resid(uio); | |
762 | ||
763 | file_end = ubc_getsize(vp); | |
764 | read_end = uio->uio_offset + uio_resid(uio); | |
765 | if (uio->uio_offset >= file_end) { | |
766 | /* uio starts after end of file: nothing to read */ | |
767 | swap_count = 0; | |
768 | } else if (read_end > file_end) { | |
769 | /* uio extends beyond end of file: stop before that */ | |
770 | swap_count -= (read_end - file_end); | |
771 | } | |
772 | ||
773 | while (swap_count > 0) { | |
774 | if (my_swap_page == NULL) { | |
775 | MALLOC(my_swap_page, char *, PAGE_SIZE, | |
776 | M_TEMP, M_WAITOK); | |
777 | memset(my_swap_page, '\0', PAGE_SIZE); | |
778 | /* add an end-of-line to keep line counters happy */ | |
779 | my_swap_page[PAGE_SIZE-1] = '\n'; | |
780 | } | |
781 | this_count = swap_count; | |
782 | if (this_count > PAGE_SIZE) { | |
783 | this_count = PAGE_SIZE; | |
784 | } | |
785 | ||
786 | prev_resid = uio_resid(uio); | |
787 | error = uiomove((caddr_t) my_swap_page, | |
788 | this_count, | |
789 | uio); | |
790 | if (error) { | |
791 | break; | |
792 | } | |
793 | swap_count -= (prev_resid - uio_resid(uio)); | |
794 | } | |
795 | if (my_swap_page != NULL) { | |
796 | FREE(my_swap_page, M_TEMP); | |
797 | my_swap_page = NULL; | |
798 | } | |
799 | ||
800 | return error; | |
801 | } | |
802 | /* | |
803 | * Package up an I/O request on a vnode into a uio and do it. | |
804 | */ | |
805 | int | |
806 | vn_rdwr( | |
807 | enum uio_rw rw, | |
808 | struct vnode *vp, | |
809 | caddr_t base, | |
810 | int len, | |
811 | off_t offset, | |
812 | enum uio_seg segflg, | |
813 | int ioflg, | |
814 | kauth_cred_t cred, | |
815 | int *aresid, | |
816 | proc_t p) | |
817 | { | |
818 | int64_t resid; | |
819 | int result; | |
820 | ||
821 | result = vn_rdwr_64(rw, | |
822 | vp, | |
823 | (uint64_t)(uintptr_t)base, | |
824 | (int64_t)len, | |
825 | offset, | |
826 | segflg, | |
827 | ioflg, | |
828 | cred, | |
829 | &resid, | |
830 | p); | |
831 | ||
832 | /* "resid" should be bounded above by "len," which is an int */ | |
833 | if (aresid != NULL) { | |
834 | *aresid = resid; | |
835 | } | |
836 | ||
837 | return result; | |
838 | } | |
839 | ||
840 | ||
841 | int | |
842 | vn_rdwr_64( | |
843 | enum uio_rw rw, | |
844 | struct vnode *vp, | |
845 | uint64_t base, | |
846 | int64_t len, | |
847 | off_t offset, | |
848 | enum uio_seg segflg, | |
849 | int ioflg, | |
850 | kauth_cred_t cred, | |
851 | int64_t *aresid, | |
852 | proc_t p) | |
853 | { | |
854 | uio_t auio; | |
855 | int spacetype; | |
856 | struct vfs_context context; | |
857 | int error=0; | |
858 | char uio_buf[ UIO_SIZEOF(1) ]; | |
859 | ||
860 | context.vc_thread = current_thread(); | |
861 | context.vc_ucred = cred; | |
862 | ||
863 | if (UIO_SEG_IS_USER_SPACE(segflg)) { | |
864 | spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32; | |
865 | } | |
866 | else { | |
867 | spacetype = UIO_SYSSPACE; | |
868 | } | |
869 | auio = uio_createwithbuffer(1, offset, spacetype, rw, | |
870 | &uio_buf[0], sizeof(uio_buf)); | |
871 | uio_addiov(auio, base, len); | |
872 | ||
873 | #if CONFIG_MACF | |
874 | /* XXXMAC | |
875 | * IO_NOAUTH should be re-examined. | |
876 | * Likely that mediation should be performed in caller. | |
877 | */ | |
878 | if ((ioflg & IO_NOAUTH) == 0) { | |
879 | /* passed cred is fp->f_cred */ | |
880 | if (rw == UIO_READ) | |
881 | error = mac_vnode_check_read(&context, cred, vp); | |
882 | else | |
883 | error = mac_vnode_check_write(&context, cred, vp); | |
884 | } | |
885 | #endif | |
886 | ||
887 | if (error == 0) { | |
888 | if (rw == UIO_READ) { | |
889 | if (vnode_isswap(vp) && ((ioflg & IO_SWAP_DISPATCH) == 0)) { | |
890 | error = vn_read_swapfile(vp, auio); | |
891 | } else { | |
892 | error = VNOP_READ(vp, auio, ioflg, &context); | |
893 | } | |
894 | } else { | |
895 | error = VNOP_WRITE(vp, auio, ioflg, &context); | |
896 | } | |
897 | } | |
898 | ||
899 | if (aresid) | |
900 | *aresid = uio_resid(auio); | |
901 | else | |
902 | if (uio_resid(auio) && error == 0) | |
903 | error = EIO; | |
904 | return (error); | |
905 | } | |
906 | ||
907 | static inline void | |
908 | vn_offset_lock(struct fileglob *fg) | |
909 | { | |
910 | lck_mtx_lock_spin(&fg->fg_lock); | |
911 | while (fg->fg_lflags & FG_OFF_LOCKED) { | |
912 | fg->fg_lflags |= FG_OFF_LOCKWANT; | |
913 | msleep(&fg->fg_lflags, &fg->fg_lock, PVFS | PSPIN, | |
914 | "fg_offset_lock_wait", 0); | |
915 | } | |
916 | fg->fg_lflags |= FG_OFF_LOCKED; | |
917 | lck_mtx_unlock(&fg->fg_lock); | |
918 | } | |
919 | ||
920 | static inline void | |
921 | vn_offset_unlock(struct fileglob *fg) | |
922 | { | |
923 | int lock_wanted = 0; | |
924 | ||
925 | lck_mtx_lock_spin(&fg->fg_lock); | |
926 | if (fg->fg_lflags & FG_OFF_LOCKWANT) { | |
927 | lock_wanted = 1; | |
928 | } | |
929 | fg->fg_lflags &= ~(FG_OFF_LOCKED | FG_OFF_LOCKWANT); | |
930 | lck_mtx_unlock(&fg->fg_lock); | |
931 | if (lock_wanted) { | |
932 | wakeup(&fg->fg_lflags); | |
933 | } | |
934 | } | |
935 | ||
936 | /* | |
937 | * File table vnode read routine. | |
938 | */ | |
939 | static int | |
940 | vn_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx) | |
941 | { | |
942 | struct vnode *vp; | |
943 | int error; | |
944 | int ioflag; | |
945 | off_t count; | |
946 | int offset_locked = 0; | |
947 | ||
948 | vp = (struct vnode *)fp->f_fglob->fg_data; | |
949 | if ( (error = vnode_getwithref(vp)) ) { | |
950 | return(error); | |
951 | } | |
952 | ||
953 | #if CONFIG_MACF | |
954 | error = mac_vnode_check_read(ctx, vfs_context_ucred(ctx), vp); | |
955 | if (error) { | |
956 | (void)vnode_put(vp); | |
957 | return (error); | |
958 | } | |
959 | #endif | |
960 | ||
961 | /* This signals to VNOP handlers that this read came from a file table read */ | |
962 | ioflag = IO_SYSCALL_DISPATCH; | |
963 | ||
964 | if (fp->f_fglob->fg_flag & FNONBLOCK) | |
965 | ioflag |= IO_NDELAY; | |
966 | if ((fp->f_fglob->fg_flag & FNOCACHE) || vnode_isnocache(vp)) | |
967 | ioflag |= IO_NOCACHE; | |
968 | if (fp->f_fglob->fg_flag & FENCRYPTED) { | |
969 | ioflag |= IO_ENCRYPTED; | |
970 | } | |
971 | if (fp->f_fglob->fg_flag & FNORDAHEAD) | |
972 | ioflag |= IO_RAOFF; | |
973 | ||
974 | if ((flags & FOF_OFFSET) == 0) { | |
975 | if ((vnode_vtype(vp) == VREG) && !vnode_isswap(vp)) { | |
976 | vn_offset_lock(fp->f_fglob); | |
977 | offset_locked = 1; | |
978 | } | |
979 | uio->uio_offset = fp->f_fglob->fg_offset; | |
980 | } | |
981 | count = uio_resid(uio); | |
982 | ||
983 | if (vnode_isswap(vp)) { | |
984 | /* special case for swap files */ | |
985 | error = vn_read_swapfile(vp, uio); | |
986 | } else { | |
987 | error = VNOP_READ(vp, uio, ioflag, ctx); | |
988 | } | |
989 | if ((flags & FOF_OFFSET) == 0) { | |
990 | fp->f_fglob->fg_offset += count - uio_resid(uio); | |
991 | if (offset_locked) { | |
992 | vn_offset_unlock(fp->f_fglob); | |
993 | offset_locked = 0; | |
994 | } | |
995 | } | |
996 | ||
997 | (void)vnode_put(vp); | |
998 | return (error); | |
999 | } | |
1000 | ||
1001 | ||
1002 | /* | |
1003 | * File table vnode write routine. | |
1004 | */ | |
1005 | static int | |
1006 | vn_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx) | |
1007 | { | |
1008 | struct vnode *vp; | |
1009 | int error, ioflag; | |
1010 | off_t count; | |
1011 | int clippedsize = 0; | |
1012 | int partialwrite=0; | |
1013 | int residcount, oldcount; | |
1014 | int offset_locked = 0; | |
1015 | proc_t p = vfs_context_proc(ctx); | |
1016 | ||
1017 | count = 0; | |
1018 | vp = (struct vnode *)fp->f_fglob->fg_data; | |
1019 | if ( (error = vnode_getwithref(vp)) ) { | |
1020 | return(error); | |
1021 | } | |
1022 | ||
1023 | #if CONFIG_MACF | |
1024 | error = mac_vnode_check_write(ctx, vfs_context_ucred(ctx), vp); | |
1025 | if (error) { | |
1026 | (void)vnode_put(vp); | |
1027 | return (error); | |
1028 | } | |
1029 | #endif | |
1030 | ||
1031 | /* | |
1032 | * IO_SYSCALL_DISPATCH signals to VNOP handlers that this write came from | |
1033 | * a file table write | |
1034 | */ | |
1035 | ioflag = (IO_UNIT | IO_SYSCALL_DISPATCH); | |
1036 | ||
1037 | if (vp->v_type == VREG && (fp->f_fglob->fg_flag & O_APPEND)) | |
1038 | ioflag |= IO_APPEND; | |
1039 | if (fp->f_fglob->fg_flag & FNONBLOCK) | |
1040 | ioflag |= IO_NDELAY; | |
1041 | if ((fp->f_fglob->fg_flag & FNOCACHE) || vnode_isnocache(vp)) | |
1042 | ioflag |= IO_NOCACHE; | |
1043 | if (fp->f_fglob->fg_flag & FNODIRECT) | |
1044 | ioflag |= IO_NODIRECT; | |
1045 | if (fp->f_fglob->fg_flag & FSINGLE_WRITER) | |
1046 | ioflag |= IO_SINGLE_WRITER; | |
1047 | ||
1048 | /* | |
1049 | * Treat synchronous mounts and O_FSYNC on the fd as equivalent. | |
1050 | * | |
1051 | * XXX We treat O_DSYNC as O_FSYNC for now, since we can not delay | |
1052 | * XXX the non-essential metadata without some additional VFS work; | |
1053 | * XXX the intent at this point is to plumb the interface for it. | |
1054 | */ | |
1055 | if ((fp->f_fglob->fg_flag & (O_FSYNC|O_DSYNC)) || | |
1056 | (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS))) { | |
1057 | ioflag |= IO_SYNC; | |
1058 | } | |
1059 | ||
1060 | if ((flags & FOF_OFFSET) == 0) { | |
1061 | if ((vnode_vtype(vp) == VREG) && !vnode_isswap(vp)) { | |
1062 | vn_offset_lock(fp->f_fglob); | |
1063 | offset_locked = 1; | |
1064 | } | |
1065 | uio->uio_offset = fp->f_fglob->fg_offset; | |
1066 | count = uio_resid(uio); | |
1067 | } | |
1068 | if (((flags & FOF_OFFSET) == 0) && | |
1069 | vfs_context_proc(ctx) && (vp->v_type == VREG) && | |
1070 | (((rlim_t)(uio->uio_offset + uio_resid(uio)) > p->p_rlimit[RLIMIT_FSIZE].rlim_cur) || | |
1071 | ((rlim_t)uio_resid(uio) > (p->p_rlimit[RLIMIT_FSIZE].rlim_cur - uio->uio_offset)))) { | |
1072 | /* | |
1073 | * If the requested residual would cause us to go past the | |
1074 | * administrative limit, then we need to adjust the residual | |
1075 | * down to cause fewer bytes than requested to be written. If | |
1076 | * we can't do that (e.g. the residual is already 1 byte), | |
1077 | * then we fail the write with EFBIG. | |
1078 | */ | |
1079 | residcount = uio_resid(uio); | |
1080 | if ((rlim_t)(uio->uio_offset + uio_resid(uio)) > p->p_rlimit[RLIMIT_FSIZE].rlim_cur) { | |
1081 | clippedsize = (uio->uio_offset + uio_resid(uio)) - p->p_rlimit[RLIMIT_FSIZE].rlim_cur; | |
1082 | } else if ((rlim_t)uio_resid(uio) > (p->p_rlimit[RLIMIT_FSIZE].rlim_cur - uio->uio_offset)) { | |
1083 | clippedsize = (p->p_rlimit[RLIMIT_FSIZE].rlim_cur - uio->uio_offset); | |
1084 | } | |
1085 | if (clippedsize >= residcount) { | |
1086 | psignal(p, SIGXFSZ); | |
1087 | error = EFBIG; | |
1088 | goto error_out; | |
1089 | } | |
1090 | partialwrite = 1; | |
1091 | uio_setresid(uio, residcount-clippedsize); | |
1092 | } | |
1093 | if ((flags & FOF_OFFSET) != 0) { | |
1094 | /* for pwrite, append should be ignored */ | |
1095 | ioflag &= ~IO_APPEND; | |
1096 | if (p && (vp->v_type == VREG) && | |
1097 | ((rlim_t)uio->uio_offset >= p->p_rlimit[RLIMIT_FSIZE].rlim_cur)) { | |
1098 | psignal(p, SIGXFSZ); | |
1099 | error = EFBIG; | |
1100 | goto error_out; | |
1101 | } | |
1102 | if (p && (vp->v_type == VREG) && | |
1103 | ((rlim_t)(uio->uio_offset + uio_resid(uio)) > p->p_rlimit[RLIMIT_FSIZE].rlim_cur)) { | |
1104 | //Debugger("vn_bwrite:overstepping the bounds"); | |
1105 | residcount = uio_resid(uio); | |
1106 | clippedsize = (uio->uio_offset + uio_resid(uio)) - p->p_rlimit[RLIMIT_FSIZE].rlim_cur; | |
1107 | partialwrite = 1; | |
1108 | uio_setresid(uio, residcount-clippedsize); | |
1109 | } | |
1110 | } | |
1111 | ||
1112 | error = VNOP_WRITE(vp, uio, ioflag, ctx); | |
1113 | ||
1114 | if (partialwrite) { | |
1115 | oldcount = uio_resid(uio); | |
1116 | uio_setresid(uio, oldcount + clippedsize); | |
1117 | } | |
1118 | ||
1119 | if ((flags & FOF_OFFSET) == 0) { | |
1120 | if (ioflag & IO_APPEND) | |
1121 | fp->f_fglob->fg_offset = uio->uio_offset; | |
1122 | else | |
1123 | fp->f_fglob->fg_offset += count - uio_resid(uio); | |
1124 | if (offset_locked) { | |
1125 | vn_offset_unlock(fp->f_fglob); | |
1126 | offset_locked = 0; | |
1127 | } | |
1128 | } | |
1129 | ||
1130 | /* | |
1131 | * Set the credentials on successful writes | |
1132 | */ | |
1133 | if ((error == 0) && (vp->v_tag == VT_NFS) && (UBCINFOEXISTS(vp))) { | |
1134 | /* | |
1135 | * When called from aio subsystem, we only have the proc from | |
1136 | * which to get the credential, at this point, so use that | |
1137 | * instead. This means aio functions are incompatible with | |
1138 | * per-thread credentials (aio operations are proxied). We | |
1139 | * can't easily correct the aio vs. settid race in this case | |
1140 | * anyway, so we disallow it. | |
1141 | */ | |
1142 | if ((flags & FOF_PCRED) == 0) { | |
1143 | ubc_setthreadcred(vp, p, current_thread()); | |
1144 | } else { | |
1145 | ubc_setcred(vp, p); | |
1146 | } | |
1147 | } | |
1148 | (void)vnode_put(vp); | |
1149 | return (error); | |
1150 | ||
1151 | error_out: | |
1152 | if (offset_locked) { | |
1153 | vn_offset_unlock(fp->f_fglob); | |
1154 | } | |
1155 | (void)vnode_put(vp); | |
1156 | return (error); | |
1157 | } | |
1158 | ||
1159 | /* | |
1160 | * File table vnode stat routine. | |
1161 | * | |
1162 | * Returns: 0 Success | |
1163 | * EBADF | |
1164 | * ENOMEM | |
1165 | * vnode_getattr:??? | |
1166 | */ | |
1167 | int | |
1168 | vn_stat_noauth(struct vnode *vp, void *sbptr, kauth_filesec_t *xsec, int isstat64, vfs_context_t ctx) | |
1169 | { | |
1170 | struct vnode_attr va; | |
1171 | int error; | |
1172 | u_short mode; | |
1173 | kauth_filesec_t fsec; | |
1174 | struct stat *sb = (struct stat *)0; /* warning avoidance ; protected by isstat64 */ | |
1175 | struct stat64 * sb64 = (struct stat64 *)0; /* warning avoidance ; protected by isstat64 */ | |
1176 | ||
1177 | if (isstat64 != 0) | |
1178 | sb64 = (struct stat64 *)sbptr; | |
1179 | else | |
1180 | sb = (struct stat *)sbptr; | |
1181 | memset(&va, 0, sizeof(va)); | |
1182 | VATTR_INIT(&va); | |
1183 | VATTR_WANTED(&va, va_fsid); | |
1184 | VATTR_WANTED(&va, va_fileid); | |
1185 | VATTR_WANTED(&va, va_mode); | |
1186 | VATTR_WANTED(&va, va_type); | |
1187 | VATTR_WANTED(&va, va_nlink); | |
1188 | VATTR_WANTED(&va, va_uid); | |
1189 | VATTR_WANTED(&va, va_gid); | |
1190 | VATTR_WANTED(&va, va_rdev); | |
1191 | VATTR_WANTED(&va, va_data_size); | |
1192 | VATTR_WANTED(&va, va_access_time); | |
1193 | VATTR_WANTED(&va, va_modify_time); | |
1194 | VATTR_WANTED(&va, va_change_time); | |
1195 | VATTR_WANTED(&va, va_create_time); | |
1196 | VATTR_WANTED(&va, va_flags); | |
1197 | VATTR_WANTED(&va, va_gen); | |
1198 | VATTR_WANTED(&va, va_iosize); | |
1199 | /* lower layers will synthesise va_total_alloc from va_data_size if required */ | |
1200 | VATTR_WANTED(&va, va_total_alloc); | |
1201 | if (xsec != NULL) { | |
1202 | VATTR_WANTED(&va, va_uuuid); | |
1203 | VATTR_WANTED(&va, va_guuid); | |
1204 | VATTR_WANTED(&va, va_acl); | |
1205 | } | |
1206 | error = vnode_getattr(vp, &va, ctx); | |
1207 | if (error) | |
1208 | goto out; | |
1209 | /* | |
1210 | * Copy from vattr table | |
1211 | */ | |
1212 | if (isstat64 != 0) { | |
1213 | sb64->st_dev = va.va_fsid; | |
1214 | sb64->st_ino = (ino64_t)va.va_fileid; | |
1215 | ||
1216 | } else { | |
1217 | sb->st_dev = va.va_fsid; | |
1218 | sb->st_ino = (ino_t)va.va_fileid; | |
1219 | } | |
1220 | mode = va.va_mode; | |
1221 | switch (vp->v_type) { | |
1222 | case VREG: | |
1223 | mode |= S_IFREG; | |
1224 | break; | |
1225 | case VDIR: | |
1226 | mode |= S_IFDIR; | |
1227 | break; | |
1228 | case VBLK: | |
1229 | mode |= S_IFBLK; | |
1230 | break; | |
1231 | case VCHR: | |
1232 | mode |= S_IFCHR; | |
1233 | break; | |
1234 | case VLNK: | |
1235 | mode |= S_IFLNK; | |
1236 | break; | |
1237 | case VSOCK: | |
1238 | mode |= S_IFSOCK; | |
1239 | break; | |
1240 | case VFIFO: | |
1241 | mode |= S_IFIFO; | |
1242 | break; | |
1243 | default: | |
1244 | error = EBADF; | |
1245 | goto out; | |
1246 | }; | |
1247 | if (isstat64 != 0) { | |
1248 | sb64->st_mode = mode; | |
1249 | sb64->st_nlink = VATTR_IS_SUPPORTED(&va, va_nlink) ? (u_int16_t)va.va_nlink : 1; | |
1250 | sb64->st_uid = va.va_uid; | |
1251 | sb64->st_gid = va.va_gid; | |
1252 | sb64->st_rdev = va.va_rdev; | |
1253 | sb64->st_size = va.va_data_size; | |
1254 | sb64->st_atimespec = va.va_access_time; | |
1255 | sb64->st_mtimespec = va.va_modify_time; | |
1256 | sb64->st_ctimespec = va.va_change_time; | |
1257 | sb64->st_birthtimespec = | |
1258 | VATTR_IS_SUPPORTED(&va, va_create_time) ? va.va_create_time : va.va_change_time; | |
1259 | sb64->st_blksize = va.va_iosize; | |
1260 | sb64->st_flags = va.va_flags; | |
1261 | sb64->st_blocks = roundup(va.va_total_alloc, 512) / 512; | |
1262 | } else { | |
1263 | sb->st_mode = mode; | |
1264 | sb->st_nlink = VATTR_IS_SUPPORTED(&va, va_nlink) ? (u_int16_t)va.va_nlink : 1; | |
1265 | sb->st_uid = va.va_uid; | |
1266 | sb->st_gid = va.va_gid; | |
1267 | sb->st_rdev = va.va_rdev; | |
1268 | sb->st_size = va.va_data_size; | |
1269 | sb->st_atimespec = va.va_access_time; | |
1270 | sb->st_mtimespec = va.va_modify_time; | |
1271 | sb->st_ctimespec = va.va_change_time; | |
1272 | sb->st_blksize = va.va_iosize; | |
1273 | sb->st_flags = va.va_flags; | |
1274 | sb->st_blocks = roundup(va.va_total_alloc, 512) / 512; | |
1275 | } | |
1276 | ||
1277 | /* if we're interested in extended security data and we got an ACL */ | |
1278 | if (xsec != NULL) { | |
1279 | if (!VATTR_IS_SUPPORTED(&va, va_acl) && | |
1280 | !VATTR_IS_SUPPORTED(&va, va_uuuid) && | |
1281 | !VATTR_IS_SUPPORTED(&va, va_guuid)) { | |
1282 | *xsec = KAUTH_FILESEC_NONE; | |
1283 | } else { | |
1284 | ||
1285 | if (VATTR_IS_SUPPORTED(&va, va_acl) && (va.va_acl != NULL)) { | |
1286 | fsec = kauth_filesec_alloc(va.va_acl->acl_entrycount); | |
1287 | } else { | |
1288 | fsec = kauth_filesec_alloc(0); | |
1289 | } | |
1290 | if (fsec == NULL) { | |
1291 | error = ENOMEM; | |
1292 | goto out; | |
1293 | } | |
1294 | fsec->fsec_magic = KAUTH_FILESEC_MAGIC; | |
1295 | if (VATTR_IS_SUPPORTED(&va, va_uuuid)) { | |
1296 | fsec->fsec_owner = va.va_uuuid; | |
1297 | } else { | |
1298 | fsec->fsec_owner = kauth_null_guid; | |
1299 | } | |
1300 | if (VATTR_IS_SUPPORTED(&va, va_guuid)) { | |
1301 | fsec->fsec_group = va.va_guuid; | |
1302 | } else { | |
1303 | fsec->fsec_group = kauth_null_guid; | |
1304 | } | |
1305 | if (VATTR_IS_SUPPORTED(&va, va_acl) && (va.va_acl != NULL)) { | |
1306 | bcopy(va.va_acl, &(fsec->fsec_acl), KAUTH_ACL_COPYSIZE(va.va_acl)); | |
1307 | } else { | |
1308 | fsec->fsec_acl.acl_entrycount = KAUTH_FILESEC_NOACL; | |
1309 | } | |
1310 | *xsec = fsec; | |
1311 | } | |
1312 | } | |
1313 | ||
1314 | /* Do not give the generation number out to unpriviledged users */ | |
1315 | if (va.va_gen && !vfs_context_issuser(ctx)) { | |
1316 | if (isstat64 != 0) | |
1317 | sb64->st_gen = 0; | |
1318 | else | |
1319 | sb->st_gen = 0; | |
1320 | } else { | |
1321 | if (isstat64 != 0) | |
1322 | sb64->st_gen = va.va_gen; | |
1323 | else | |
1324 | sb->st_gen = va.va_gen; | |
1325 | } | |
1326 | ||
1327 | error = 0; | |
1328 | out: | |
1329 | if (VATTR_IS_SUPPORTED(&va, va_acl) && va.va_acl != NULL) | |
1330 | kauth_acl_free(va.va_acl); | |
1331 | return (error); | |
1332 | } | |
1333 | ||
1334 | int | |
1335 | vn_stat(struct vnode *vp, void *sb, kauth_filesec_t *xsec, int isstat64, vfs_context_t ctx) | |
1336 | { | |
1337 | int error; | |
1338 | ||
1339 | #if CONFIG_MACF | |
1340 | error = mac_vnode_check_stat(ctx, NOCRED, vp); | |
1341 | if (error) | |
1342 | return (error); | |
1343 | #endif | |
1344 | ||
1345 | /* authorize */ | |
1346 | if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_ATTRIBUTES | KAUTH_VNODE_READ_SECURITY, ctx)) != 0) | |
1347 | return(error); | |
1348 | ||
1349 | /* actual stat */ | |
1350 | return(vn_stat_noauth(vp, sb, xsec, isstat64, ctx)); | |
1351 | } | |
1352 | ||
1353 | ||
1354 | /* | |
1355 | * File table vnode ioctl routine. | |
1356 | */ | |
1357 | static int | |
1358 | vn_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx) | |
1359 | { | |
1360 | struct vnode *vp = ((struct vnode *)fp->f_fglob->fg_data); | |
1361 | off_t file_size; | |
1362 | int error; | |
1363 | struct vnode *ttyvp; | |
1364 | struct session * sessp; | |
1365 | ||
1366 | if ( (error = vnode_getwithref(vp)) ) { | |
1367 | return(error); | |
1368 | } | |
1369 | ||
1370 | #if CONFIG_MACF | |
1371 | error = mac_vnode_check_ioctl(ctx, vp, com); | |
1372 | if (error) | |
1373 | goto out; | |
1374 | #endif | |
1375 | ||
1376 | switch (vp->v_type) { | |
1377 | case VREG: | |
1378 | case VDIR: | |
1379 | if (com == FIONREAD) { | |
1380 | if ((error = vnode_size(vp, &file_size, ctx)) != 0) | |
1381 | goto out; | |
1382 | *(int *)data = file_size - fp->f_fglob->fg_offset; | |
1383 | goto out; | |
1384 | } | |
1385 | if (com == FIONBIO || com == FIOASYNC) { /* XXX */ | |
1386 | goto out; | |
1387 | } | |
1388 | /* fall into ... */ | |
1389 | ||
1390 | default: | |
1391 | error = ENOTTY; | |
1392 | goto out; | |
1393 | ||
1394 | case VFIFO: | |
1395 | case VCHR: | |
1396 | case VBLK: | |
1397 | ||
1398 | /* Should not be able to set block size from user space */ | |
1399 | if (com == DKIOCSETBLOCKSIZE) { | |
1400 | error = EPERM; | |
1401 | goto out; | |
1402 | } | |
1403 | ||
1404 | if (com == FIODTYPE) { | |
1405 | if (vp->v_type == VBLK) { | |
1406 | if (major(vp->v_rdev) >= nblkdev) { | |
1407 | error = ENXIO; | |
1408 | goto out; | |
1409 | } | |
1410 | *(int *)data = bdevsw[major(vp->v_rdev)].d_type; | |
1411 | ||
1412 | } else if (vp->v_type == VCHR) { | |
1413 | if (major(vp->v_rdev) >= nchrdev) { | |
1414 | error = ENXIO; | |
1415 | goto out; | |
1416 | } | |
1417 | *(int *)data = cdevsw[major(vp->v_rdev)].d_type; | |
1418 | } else { | |
1419 | error = ENOTTY; | |
1420 | goto out; | |
1421 | } | |
1422 | goto out; | |
1423 | } | |
1424 | error = VNOP_IOCTL(vp, com, data, fp->f_fglob->fg_flag, ctx); | |
1425 | ||
1426 | if (error == 0 && com == TIOCSCTTY) { | |
1427 | sessp = proc_session(vfs_context_proc(ctx)); | |
1428 | ||
1429 | session_lock(sessp); | |
1430 | ttyvp = sessp->s_ttyvp; | |
1431 | sessp->s_ttyvp = vp; | |
1432 | sessp->s_ttyvid = vnode_vid(vp); | |
1433 | session_unlock(sessp); | |
1434 | session_rele(sessp); | |
1435 | } | |
1436 | } | |
1437 | out: | |
1438 | (void)vnode_put(vp); | |
1439 | return(error); | |
1440 | } | |
1441 | ||
1442 | /* | |
1443 | * File table vnode select routine. | |
1444 | */ | |
1445 | static int | |
1446 | vn_select(struct fileproc *fp, int which, void *wql, __unused vfs_context_t ctx) | |
1447 | { | |
1448 | int error; | |
1449 | struct vnode * vp = (struct vnode *)fp->f_fglob->fg_data; | |
1450 | struct vfs_context context; | |
1451 | ||
1452 | if ( (error = vnode_getwithref(vp)) == 0 ) { | |
1453 | context.vc_thread = current_thread(); | |
1454 | context.vc_ucred = fp->f_fglob->fg_cred; | |
1455 | ||
1456 | #if CONFIG_MACF | |
1457 | /* | |
1458 | * XXX We should use a per thread credential here; minimally, | |
1459 | * XXX the process credential should have a persistent | |
1460 | * XXX reference on it before being passed in here. | |
1461 | */ | |
1462 | error = mac_vnode_check_select(ctx, vp, which); | |
1463 | if (error == 0) | |
1464 | #endif | |
1465 | error = VNOP_SELECT(vp, which, fp->f_fglob->fg_flag, wql, ctx); | |
1466 | ||
1467 | (void)vnode_put(vp); | |
1468 | } | |
1469 | return(error); | |
1470 | ||
1471 | } | |
1472 | ||
1473 | /* | |
1474 | * File table vnode close routine. | |
1475 | */ | |
1476 | static int | |
1477 | vn_closefile(struct fileglob *fg, vfs_context_t ctx) | |
1478 | { | |
1479 | struct vnode *vp = (struct vnode *)fg->fg_data; | |
1480 | int error; | |
1481 | struct flock lf; | |
1482 | ||
1483 | if ( (error = vnode_getwithref(vp)) == 0 ) { | |
1484 | ||
1485 | if ((fg->fg_flag & FHASLOCK) && | |
1486 | FILEGLOB_DTYPE(fg) == DTYPE_VNODE) { | |
1487 | lf.l_whence = SEEK_SET; | |
1488 | lf.l_start = 0; | |
1489 | lf.l_len = 0; | |
1490 | lf.l_type = F_UNLCK; | |
1491 | ||
1492 | (void)VNOP_ADVLOCK(vp, (caddr_t)fg, F_UNLCK, &lf, F_FLOCK, ctx, NULL); | |
1493 | } | |
1494 | error = vn_close(vp, fg->fg_flag, ctx); | |
1495 | ||
1496 | (void)vnode_put(vp); | |
1497 | } | |
1498 | return(error); | |
1499 | } | |
1500 | ||
1501 | /* | |
1502 | * Returns: 0 Success | |
1503 | * VNOP_PATHCONF:??? | |
1504 | */ | |
1505 | int | |
1506 | vn_pathconf(vnode_t vp, int name, int32_t *retval, vfs_context_t ctx) | |
1507 | { | |
1508 | int error = 0; | |
1509 | struct vfs_attr vfa; | |
1510 | ||
1511 | switch(name) { | |
1512 | case _PC_EXTENDED_SECURITY_NP: | |
1513 | *retval = vfs_extendedsecurity(vnode_mount(vp)) ? 1 : 0; | |
1514 | break; | |
1515 | case _PC_AUTH_OPAQUE_NP: | |
1516 | *retval = vfs_authopaque(vnode_mount(vp)); | |
1517 | break; | |
1518 | case _PC_2_SYMLINKS: | |
1519 | *retval = 1; /* XXX NOTSUP on MSDOS, etc. */ | |
1520 | break; | |
1521 | case _PC_ALLOC_SIZE_MIN: | |
1522 | *retval = 1; /* XXX lie: 1 byte */ | |
1523 | break; | |
1524 | case _PC_ASYNC_IO: /* unistd.h: _POSIX_ASYNCHRONUS_IO */ | |
1525 | *retval = 1; /* [AIO] option is supported */ | |
1526 | break; | |
1527 | case _PC_PRIO_IO: /* unistd.h: _POSIX_PRIORITIZED_IO */ | |
1528 | *retval = 0; /* [PIO] option is not supported */ | |
1529 | break; | |
1530 | case _PC_REC_INCR_XFER_SIZE: | |
1531 | *retval = 4096; /* XXX go from MIN to MAX 4K at a time */ | |
1532 | break; | |
1533 | case _PC_REC_MIN_XFER_SIZE: | |
1534 | *retval = 4096; /* XXX recommend 4K minimum reads/writes */ | |
1535 | break; | |
1536 | case _PC_REC_MAX_XFER_SIZE: | |
1537 | *retval = 65536; /* XXX recommend 64K maximum reads/writes */ | |
1538 | break; | |
1539 | case _PC_REC_XFER_ALIGN: | |
1540 | *retval = 4096; /* XXX recommend page aligned buffers */ | |
1541 | break; | |
1542 | case _PC_SYMLINK_MAX: | |
1543 | *retval = 255; /* Minimum acceptable POSIX value */ | |
1544 | break; | |
1545 | case _PC_SYNC_IO: /* unistd.h: _POSIX_SYNCHRONIZED_IO */ | |
1546 | *retval = 0; /* [SIO] option is not supported */ | |
1547 | break; | |
1548 | case _PC_XATTR_SIZE_BITS: | |
1549 | /* The number of bits used to store maximum extended | |
1550 | * attribute size in bytes. For example, if the maximum | |
1551 | * attribute size supported by a file system is 128K, the | |
1552 | * value returned will be 18. However a value 18 can mean | |
1553 | * that the maximum attribute size can be anywhere from | |
1554 | * (256KB - 1) to 128KB. As a special case, the resource | |
1555 | * fork can have much larger size, and some file system | |
1556 | * specific extended attributes can have smaller and preset | |
1557 | * size; for example, Finder Info is always 32 bytes. | |
1558 | */ | |
1559 | memset(&vfa, 0, sizeof(vfa)); | |
1560 | VFSATTR_INIT(&vfa); | |
1561 | VFSATTR_WANTED(&vfa, f_capabilities); | |
1562 | if (vfs_getattr(vnode_mount(vp), &vfa, ctx) == 0 && | |
1563 | (VFSATTR_IS_SUPPORTED(&vfa, f_capabilities)) && | |
1564 | (vfa.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR) && | |
1565 | (vfa.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR)) { | |
1566 | /* Supports native extended attributes */ | |
1567 | error = VNOP_PATHCONF(vp, name, retval, ctx); | |
1568 | } else { | |
1569 | /* Number of bits used to represent the maximum size of | |
1570 | * extended attribute stored in an Apple Double file. | |
1571 | */ | |
1572 | *retval = AD_XATTR_SIZE_BITS; | |
1573 | } | |
1574 | break; | |
1575 | default: | |
1576 | error = VNOP_PATHCONF(vp, name, retval, ctx); | |
1577 | break; | |
1578 | } | |
1579 | ||
1580 | return (error); | |
1581 | } | |
1582 | ||
1583 | static int | |
1584 | vn_kqfilt_add(struct fileproc *fp, struct knote *kn, vfs_context_t ctx) | |
1585 | { | |
1586 | int error; | |
1587 | struct vnode *vp; | |
1588 | ||
1589 | vp = (struct vnode *)fp->f_fglob->fg_data; | |
1590 | ||
1591 | /* | |
1592 | * Don't attach a knote to a dead vnode. | |
1593 | */ | |
1594 | if ((error = vget_internal(vp, 0, VNODE_NODEAD)) == 0) { | |
1595 | switch (kn->kn_filter) { | |
1596 | case EVFILT_READ: | |
1597 | case EVFILT_WRITE: | |
1598 | if (vnode_isfifo(vp)) { | |
1599 | /* We'll only watch FIFOs that use our fifofs */ | |
1600 | if (!(vp->v_fifoinfo && vp->v_fifoinfo->fi_readsock)) { | |
1601 | error = ENOTSUP; | |
1602 | } | |
1603 | ||
1604 | } else if (!vnode_isreg(vp)) { | |
1605 | if (vnode_ischr(vp) && | |
1606 | (error = spec_kqfilter(vp, kn)) == 0) { | |
1607 | /* claimed by a special device */ | |
1608 | vnode_put(vp); | |
1609 | return 0; | |
1610 | } | |
1611 | ||
1612 | error = EINVAL; | |
1613 | } | |
1614 | break; | |
1615 | case EVFILT_VNODE: | |
1616 | break; | |
1617 | default: | |
1618 | error = EINVAL; | |
1619 | } | |
1620 | ||
1621 | if (error) { | |
1622 | vnode_put(vp); | |
1623 | return error; | |
1624 | } | |
1625 | ||
1626 | #if CONFIG_MACF | |
1627 | error = mac_vnode_check_kqfilter(ctx, fp->f_fglob->fg_cred, kn, vp); | |
1628 | if (error) { | |
1629 | vnode_put(vp); | |
1630 | return error; | |
1631 | } | |
1632 | #endif | |
1633 | ||
1634 | kn->kn_hook = (void*)vp; | |
1635 | kn->kn_hookid = vnode_vid(vp); | |
1636 | kn->kn_fop = &vnode_filtops; | |
1637 | ||
1638 | vnode_lock(vp); | |
1639 | KNOTE_ATTACH(&vp->v_knotes, kn); | |
1640 | vnode_unlock(vp); | |
1641 | ||
1642 | /* Ask the filesystem to provide remove notifications, but ignore failure */ | |
1643 | VNOP_MONITOR(vp, 0, VNODE_MONITOR_BEGIN, (void*) kn, ctx); | |
1644 | ||
1645 | vnode_put(vp); | |
1646 | } | |
1647 | ||
1648 | return (error); | |
1649 | } | |
1650 | ||
1651 | static void | |
1652 | filt_vndetach(struct knote *kn) | |
1653 | { | |
1654 | vfs_context_t ctx = vfs_context_current(); | |
1655 | struct vnode *vp; | |
1656 | vp = (struct vnode *)kn->kn_hook; | |
1657 | if (vnode_getwithvid(vp, kn->kn_hookid)) | |
1658 | return; | |
1659 | ||
1660 | vnode_lock(vp); | |
1661 | KNOTE_DETACH(&vp->v_knotes, kn); | |
1662 | vnode_unlock(vp); | |
1663 | ||
1664 | /* | |
1665 | * Tell a (generally networked) filesystem that we're no longer watching | |
1666 | * If the FS wants to track contexts, it should still be using the one from | |
1667 | * the VNODE_MONITOR_BEGIN. | |
1668 | */ | |
1669 | VNOP_MONITOR(vp, 0, VNODE_MONITOR_END, (void*)kn, ctx); | |
1670 | vnode_put(vp); | |
1671 | } | |
1672 | ||
1673 | ||
1674 | /* | |
1675 | * Used for EVFILT_READ | |
1676 | * | |
1677 | * Takes only VFIFO or VREG. vnode is locked. We handle the "poll" case | |
1678 | * differently than the regular case for VREG files. If not in poll(), | |
1679 | * then we need to know current fileproc offset for VREG. | |
1680 | */ | |
1681 | static intptr_t | |
1682 | vnode_readable_data_count(vnode_t vp, off_t current_offset, int ispoll) | |
1683 | { | |
1684 | if (vnode_isfifo(vp)) { | |
1685 | #if FIFO | |
1686 | int cnt; | |
1687 | int err = fifo_charcount(vp, &cnt); | |
1688 | if (err == 0) { | |
1689 | return (intptr_t)cnt; | |
1690 | } else | |
1691 | #endif | |
1692 | { | |
1693 | return (intptr_t)0; | |
1694 | } | |
1695 | } else if (vnode_isreg(vp)) { | |
1696 | if (ispoll) { | |
1697 | return (intptr_t)1; | |
1698 | } | |
1699 | ||
1700 | off_t amount; | |
1701 | amount = vp->v_un.vu_ubcinfo->ui_size - current_offset; | |
1702 | if (amount > (off_t)INTPTR_MAX) { | |
1703 | return INTPTR_MAX; | |
1704 | } else if (amount < (off_t)INTPTR_MIN) { | |
1705 | return INTPTR_MIN; | |
1706 | } else { | |
1707 | return (intptr_t)amount; | |
1708 | } | |
1709 | } else { | |
1710 | panic("Should never have an EVFILT_READ except for reg or fifo."); | |
1711 | return 0; | |
1712 | } | |
1713 | } | |
1714 | ||
1715 | /* | |
1716 | * Used for EVFILT_WRITE. | |
1717 | * | |
1718 | * For regular vnodes, we can always write (1). For named pipes, | |
1719 | * see how much space there is in the buffer. Nothing else is covered. | |
1720 | */ | |
1721 | static intptr_t | |
1722 | vnode_writable_space_count(vnode_t vp) | |
1723 | { | |
1724 | if (vnode_isfifo(vp)) { | |
1725 | #if FIFO | |
1726 | long spc; | |
1727 | int err = fifo_freespace(vp, &spc); | |
1728 | if (err == 0) { | |
1729 | return (intptr_t)spc; | |
1730 | } else | |
1731 | #endif | |
1732 | { | |
1733 | return (intptr_t)0; | |
1734 | } | |
1735 | } else if (vnode_isreg(vp)) { | |
1736 | return (intptr_t)1; | |
1737 | } else { | |
1738 | panic("Should never have an EVFILT_READ except for reg or fifo."); | |
1739 | return 0; | |
1740 | } | |
1741 | } | |
1742 | ||
1743 | /* | |
1744 | * Determine whether this knote should be active | |
1745 | * | |
1746 | * This is kind of subtle. | |
1747 | * --First, notice if the vnode has been revoked: in so, override hint | |
1748 | * --EVFILT_READ knotes are checked no matter what the hint is | |
1749 | * --Other knotes activate based on hint. | |
1750 | * --If hint is revoke, set special flags and activate | |
1751 | */ | |
1752 | static int | |
1753 | filt_vnode(struct knote *kn, long hint) | |
1754 | { | |
1755 | vnode_t vp = (struct vnode *)kn->kn_hook; | |
1756 | int activate = 0; | |
1757 | long orig_hint = hint; | |
1758 | ||
1759 | if (0 == hint) { | |
1760 | vnode_lock(vp); | |
1761 | ||
1762 | if (vnode_getiocount(vp, kn->kn_hookid, VNODE_NODEAD | VNODE_WITHID) != 0) { | |
1763 | /* Is recycled */ | |
1764 | hint = NOTE_REVOKE; | |
1765 | } | |
1766 | } else { | |
1767 | lck_mtx_assert(&vp->v_lock, LCK_MTX_ASSERT_OWNED); | |
1768 | } | |
1769 | ||
1770 | /* Special handling for vnodes that are in recycle or already gone */ | |
1771 | if (NOTE_REVOKE == hint) { | |
1772 | kn->kn_flags |= (EV_EOF | EV_ONESHOT); | |
1773 | activate = 1; | |
1774 | ||
1775 | if ((kn->kn_filter == EVFILT_VNODE) && (kn->kn_sfflags & NOTE_REVOKE)) { | |
1776 | kn->kn_fflags |= NOTE_REVOKE; | |
1777 | } | |
1778 | } else { | |
1779 | switch(kn->kn_filter) { | |
1780 | case EVFILT_READ: | |
1781 | kn->kn_data = vnode_readable_data_count(vp, kn->kn_fp->f_fglob->fg_offset, (kn->kn_flags & EV_POLL)); | |
1782 | ||
1783 | if (kn->kn_data != 0) { | |
1784 | activate = 1; | |
1785 | } | |
1786 | break; | |
1787 | case EVFILT_WRITE: | |
1788 | kn->kn_data = vnode_writable_space_count(vp); | |
1789 | ||
1790 | if (kn->kn_data != 0) { | |
1791 | activate = 1; | |
1792 | } | |
1793 | break; | |
1794 | case EVFILT_VNODE: | |
1795 | /* Check events this note matches against the hint */ | |
1796 | if (kn->kn_sfflags & hint) { | |
1797 | kn->kn_fflags |= hint; /* Set which event occurred */ | |
1798 | } | |
1799 | if (kn->kn_fflags != 0) { | |
1800 | activate = 1; | |
1801 | } | |
1802 | break; | |
1803 | default: | |
1804 | panic("Invalid knote filter on a vnode!\n"); | |
1805 | } | |
1806 | } | |
1807 | ||
1808 | if (orig_hint == 0) { | |
1809 | /* | |
1810 | * Definitely need to unlock, may need to put | |
1811 | */ | |
1812 | if (hint == 0) { | |
1813 | vnode_put_locked(vp); | |
1814 | } | |
1815 | vnode_unlock(vp); | |
1816 | } | |
1817 | ||
1818 | return (activate); | |
1819 | } |