]>
Commit | Line | Data |
---|---|---|
1c79356b | 1 | /* |
f427ee49 | 2 | * Copyright (c) 2000-2020 Apple Inc. All rights reserved. |
5d5c5d0d | 3 | * |
2d21ac55 | 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
0a7de745 | 5 | * |
2d21ac55 A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
0a7de745 | 14 | * |
2d21ac55 A |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
0a7de745 | 17 | * |
2d21ac55 A |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
8f6c56a5 A |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
2d21ac55 A |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
0a7de745 | 25 | * |
2d21ac55 | 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
1c79356b A |
27 | */ |
28 | /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ | |
29 | /* | |
30 | * Copyright (c) 1982, 1986, 1989, 1993 | |
31 | * The Regents of the University of California. All rights reserved. | |
32 | * (c) UNIX System Laboratories, Inc. | |
33 | * All or some portions of this file are derived from material licensed | |
34 | * to the University of California by American Telephone and Telegraph | |
35 | * Co. or Unix System Laboratories, Inc. and are reproduced herein with | |
36 | * the permission of UNIX System Laboratories, Inc. | |
37 | * | |
38 | * Redistribution and use in source and binary forms, with or without | |
39 | * modification, are permitted provided that the following conditions | |
40 | * are met: | |
41 | * 1. Redistributions of source code must retain the above copyright | |
42 | * notice, this list of conditions and the following disclaimer. | |
43 | * 2. Redistributions in binary form must reproduce the above copyright | |
44 | * notice, this list of conditions and the following disclaimer in the | |
45 | * documentation and/or other materials provided with the distribution. | |
46 | * 3. All advertising materials mentioning features or use of this software | |
47 | * must display the following acknowledgement: | |
48 | * This product includes software developed by the University of | |
49 | * California, Berkeley and its contributors. | |
50 | * 4. Neither the name of the University nor the names of its contributors | |
51 | * may be used to endorse or promote products derived from this software | |
52 | * without specific prior written permission. | |
53 | * | |
54 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
55 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
56 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
57 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
58 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
59 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
60 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
61 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
62 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
63 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
64 | * SUCH DAMAGE. | |
65 | * | |
66 | * @(#)vfs_vnops.c 8.14 (Berkeley) 6/15/95 | |
67 | * | |
1c79356b | 68 | */ |
2d21ac55 A |
69 | /* |
70 | * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce | |
71 | * support for mandatory and extensible security protections. This notice | |
72 | * is included in support of clause 2.2 (b) of the Apple Public License, | |
73 | * Version 2.0. | |
74 | */ | |
1c79356b A |
75 | |
76 | #include <sys/param.h> | |
91447636 | 77 | #include <sys/types.h> |
1c79356b A |
78 | #include <sys/systm.h> |
79 | #include <sys/kernel.h> | |
91447636 | 80 | #include <sys/file_internal.h> |
1c79356b | 81 | #include <sys/stat.h> |
91447636 A |
82 | #include <sys/proc_internal.h> |
83 | #include <sys/kauth.h> | |
84 | #include <sys/mount_internal.h> | |
1c79356b | 85 | #include <sys/namei.h> |
91447636 | 86 | #include <sys/vnode_internal.h> |
1c79356b A |
87 | #include <sys/ioctl.h> |
88 | #include <sys/tty.h> | |
2d21ac55 A |
89 | /* Temporary workaround for ubc.h until <rdar://4714366 is resolved */ |
90 | #define ubc_setcred ubc_setcred_deprecated | |
1c79356b | 91 | #include <sys/ubc.h> |
2d21ac55 | 92 | #undef ubc_setcred |
0a7de745 | 93 | int ubc_setcred(struct vnode *, struct proc *); |
9bccf70c A |
94 | #include <sys/conf.h> |
95 | #include <sys/disk.h> | |
91447636 A |
96 | #include <sys/fsevents.h> |
97 | #include <sys/kdebug.h> | |
98 | #include <sys/xattr.h> | |
99 | #include <sys/ubc_internal.h> | |
100 | #include <sys/uio_internal.h> | |
0c530ab8 | 101 | #include <sys/resourcevar.h> |
2d21ac55 | 102 | #include <sys/signalvar.h> |
9bccf70c A |
103 | |
104 | #include <vm/vm_kern.h> | |
91447636 | 105 | #include <vm/vm_map.h> |
9bccf70c A |
106 | |
107 | #include <miscfs/specfs/specdev.h> | |
b0d623f7 | 108 | #include <miscfs/fifofs/fifo.h> |
9bccf70c | 109 | |
2d21ac55 A |
110 | #if CONFIG_MACF |
111 | #include <security/mac_framework.h> | |
112 | #endif | |
91447636 | 113 | |
3e170ce0 | 114 | #include <IOKit/IOBSD.h> |
5ba3f43e | 115 | #include <libkern/section_keywords.h> |
91447636 | 116 | |
2d21ac55 A |
117 | static int vn_closefile(struct fileglob *fp, vfs_context_t ctx); |
118 | static int vn_ioctl(struct fileproc *fp, u_long com, caddr_t data, | |
0a7de745 | 119 | vfs_context_t ctx); |
2d21ac55 | 120 | static int vn_read(struct fileproc *fp, struct uio *uio, int flags, |
0a7de745 | 121 | vfs_context_t ctx); |
2d21ac55 | 122 | static int vn_write(struct fileproc *fp, struct uio *uio, int flags, |
0a7de745 | 123 | vfs_context_t ctx); |
2d21ac55 | 124 | static int vn_select( struct fileproc *fp, int which, void * wql, |
0a7de745 | 125 | vfs_context_t ctx); |
cb323159 A |
126 | static int vn_kqfilter(struct fileproc *fp, struct knote *kn, |
127 | struct kevent_qos_s *kev); | |
b0d623f7 A |
128 | static void filt_vndetach(struct knote *kn); |
129 | static int filt_vnode(struct knote *kn, long hint); | |
cb323159 A |
130 | static int filt_vnode_common(struct knote *kn, struct kevent_qos_s *kev, |
131 | vnode_t vp, long hint); | |
6d2010ae | 132 | static int vn_open_auth_finish(vnode_t vp, int fmode, vfs_context_t ctx); |
1c79356b | 133 | |
39236c6e | 134 | const struct fileops vnops = { |
cb323159 A |
135 | .fo_type = DTYPE_VNODE, |
136 | .fo_read = vn_read, | |
137 | .fo_write = vn_write, | |
138 | .fo_ioctl = vn_ioctl, | |
139 | .fo_select = vn_select, | |
140 | .fo_close = vn_closefile, | |
141 | .fo_drain = fo_no_drain, | |
142 | .fo_kqfilter = vn_kqfilter, | |
39236c6e | 143 | }; |
1c79356b | 144 | |
cb323159 A |
145 | static int filt_vntouch(struct knote *kn, struct kevent_qos_s *kev); |
146 | static int filt_vnprocess(struct knote *kn, struct kevent_qos_s*kev); | |
39037602 | 147 | |
5ba3f43e A |
148 | SECURITY_READ_ONLY_EARLY(struct filterops) vnode_filtops = { |
149 | .f_isfd = 1, | |
150 | .f_attach = NULL, | |
151 | .f_detach = filt_vndetach, | |
39037602 A |
152 | .f_event = filt_vnode, |
153 | .f_touch = filt_vntouch, | |
154 | .f_process = filt_vnprocess, | |
b0d623f7 A |
155 | }; |
156 | ||
1c79356b A |
157 | /* |
158 | * Common code for vnode open operations. | |
91447636 A |
159 | * Check permissions, and call the VNOP_OPEN or VNOP_CREATE routine. |
160 | * | |
161 | * XXX the profusion of interfaces here is probably a bad thing. | |
1c79356b | 162 | */ |
9bccf70c | 163 | int |
91447636 | 164 | vn_open(struct nameidata *ndp, int fmode, int cmode) |
55e303ae | 165 | { |
0a7de745 | 166 | return vn_open_modflags(ndp, &fmode, cmode); |
55e303ae A |
167 | } |
168 | ||
91447636 A |
169 | int |
170 | vn_open_modflags(struct nameidata *ndp, int *fmodep, int cmode) | |
1c79356b | 171 | { |
f427ee49 A |
172 | int error; |
173 | struct vnode_attr *vap; | |
91447636 | 174 | |
f427ee49 A |
175 | vap = kheap_alloc(KHEAP_TEMP, sizeof(struct vnode_attr), M_WAITOK); |
176 | ||
177 | VATTR_INIT(vap); | |
178 | VATTR_SET(vap, va_mode, (mode_t)cmode); | |
179 | ||
180 | error = vn_open_auth(ndp, fmodep, vap); | |
181 | ||
182 | kheap_free(KHEAP_TEMP, vap, sizeof(struct vnode_attr)); | |
0a7de745 | 183 | |
f427ee49 | 184 | return error; |
91447636 A |
185 | } |
186 | ||
6d2010ae A |
187 | static int |
188 | vn_open_auth_finish(vnode_t vp, int fmode, vfs_context_t ctx) | |
189 | { | |
190 | int error; | |
191 | ||
192 | if ((error = vnode_ref_ext(vp, fmode, 0)) != 0) { | |
193 | goto bad; | |
194 | } | |
195 | ||
0a7de745 | 196 | /* Call out to allow 3rd party notification of open. |
6d2010ae A |
197 | * Ignore result of kauth_authorize_fileop call. |
198 | */ | |
4b17d6b6 A |
199 | #if CONFIG_MACF |
200 | mac_vnode_notify_open(ctx, vp, fmode); | |
201 | #endif | |
0a7de745 A |
202 | kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN, |
203 | (uintptr_t)vp, 0); | |
6d2010ae A |
204 | |
205 | return 0; | |
206 | ||
207 | bad: | |
208 | return error; | |
6d2010ae A |
209 | } |
210 | ||
211 | /* | |
212 | * May do nameidone() to allow safely adding an FSEvent. Cue off of ni_dvp to | |
0a7de745 | 213 | * determine whether that has happened. |
6d2010ae A |
214 | */ |
215 | static int | |
216 | vn_open_auth_do_create(struct nameidata *ndp, struct vnode_attr *vap, int fmode, boolean_t *did_create, boolean_t *did_open, vfs_context_t ctx) | |
217 | { | |
218 | uint32_t status = 0; | |
219 | vnode_t dvp = ndp->ni_dvp; | |
220 | int batched; | |
221 | int error; | |
222 | vnode_t vp; | |
223 | ||
224 | batched = vnode_compound_open_available(ndp->ni_dvp); | |
225 | *did_open = FALSE; | |
226 | ||
227 | VATTR_SET(vap, va_type, VREG); | |
0a7de745 | 228 | if (fmode & O_EXCL) { |
6d2010ae | 229 | vap->va_vaflags |= VA_EXCLUSIVE; |
0a7de745 | 230 | } |
6d2010ae A |
231 | |
232 | #if NAMEDRSRCFORK | |
233 | if (ndp->ni_cnd.cn_flags & CN_WANTSRSRCFORK) { | |
0a7de745 | 234 | if ((error = vn_authorize_create(dvp, &ndp->ni_cnd, vap, ctx, NULL)) != 0) { |
6d2010ae | 235 | goto out; |
0a7de745 A |
236 | } |
237 | if ((error = vnode_makenamedstream(dvp, &ndp->ni_vp, XATTR_RESOURCEFORK_NAME, 0, ctx)) != 0) { | |
6d2010ae | 238 | goto out; |
0a7de745 | 239 | } |
6d2010ae A |
240 | *did_create = TRUE; |
241 | } else { | |
242 | #endif | |
0a7de745 A |
243 | if (!batched) { |
244 | if ((error = vn_authorize_create(dvp, &ndp->ni_cnd, vap, ctx, NULL)) != 0) { | |
245 | goto out; | |
6d2010ae | 246 | } |
0a7de745 | 247 | } |
6d2010ae | 248 | |
0a7de745 A |
249 | error = vn_create(dvp, &ndp->ni_vp, ndp, vap, VN_CREATE_DOOPEN, fmode, &status, ctx); |
250 | if (error != 0) { | |
251 | if (batched) { | |
252 | *did_create = (status & COMPOUND_OPEN_STATUS_DID_CREATE) ? TRUE : FALSE; | |
253 | } else { | |
254 | *did_create = FALSE; | |
255 | } | |
6d2010ae | 256 | |
0a7de745 A |
257 | if (error == EKEEPLOOKING) { |
258 | if (*did_create) { | |
259 | panic("EKEEPLOOKING, but we did a create?"); | |
6d2010ae | 260 | } |
0a7de745 A |
261 | if (!batched) { |
262 | panic("EKEEPLOOKING from filesystem that doesn't support compound vnops?"); | |
263 | } | |
264 | if ((ndp->ni_flag & NAMEI_CONTLOOKUP) == 0) { | |
265 | panic("EKEEPLOOKING, but continue flag not set?"); | |
6d2010ae | 266 | } |
0a7de745 A |
267 | |
268 | /* | |
269 | * Do NOT drop the dvp: we need everything to continue the lookup. | |
270 | */ | |
271 | return error; | |
272 | } | |
273 | } else { | |
274 | if (batched) { | |
275 | *did_create = (status & COMPOUND_OPEN_STATUS_DID_CREATE) ? 1 : 0; | |
276 | *did_open = TRUE; | |
277 | } else { | |
278 | *did_create = TRUE; | |
6d2010ae | 279 | } |
6d2010ae | 280 | } |
0a7de745 A |
281 | #if NAMEDRSRCFORK |
282 | } | |
6d2010ae A |
283 | #endif |
284 | ||
6d2010ae | 285 | vp = ndp->ni_vp; |
6d2010ae A |
286 | |
287 | if (*did_create) { | |
0a7de745 | 288 | int update_flags = 0; |
6d2010ae A |
289 | |
290 | // Make sure the name & parent pointers are hooked up | |
0a7de745 | 291 | if (vp->v_name == NULL) { |
6d2010ae | 292 | update_flags |= VNODE_UPDATE_NAME; |
0a7de745 A |
293 | } |
294 | if (vp->v_parent == NULLVP) { | |
6d2010ae | 295 | update_flags |= VNODE_UPDATE_PARENT; |
0a7de745 | 296 | } |
6d2010ae | 297 | |
0a7de745 | 298 | if (update_flags) { |
6d2010ae | 299 | vnode_update_identity(vp, dvp, ndp->ni_cnd.cn_nameptr, ndp->ni_cnd.cn_namelen, ndp->ni_cnd.cn_hash, update_flags); |
0a7de745 | 300 | } |
6d2010ae A |
301 | |
302 | vnode_put(dvp); | |
303 | ndp->ni_dvp = NULLVP; | |
304 | ||
305 | #if CONFIG_FSE | |
306 | if (need_fsevent(FSE_CREATE_FILE, vp)) { | |
307 | add_fsevent(FSE_CREATE_FILE, ctx, | |
0a7de745 A |
308 | FSE_ARG_VNODE, vp, |
309 | FSE_ARG_DONE); | |
6d2010ae A |
310 | } |
311 | #endif | |
312 | } | |
313 | out: | |
314 | if (ndp->ni_dvp != NULLVP) { | |
315 | vnode_put(dvp); | |
316 | ndp->ni_dvp = NULLVP; | |
317 | } | |
318 | ||
319 | return error; | |
320 | } | |
321 | ||
3e170ce0 A |
322 | /* |
323 | * This is the number of times we'll loop in vn_open_auth without explicitly | |
324 | * yielding the CPU when we determine we have to retry. | |
325 | */ | |
0a7de745 | 326 | #define RETRY_NO_YIELD_COUNT 5 |
3e170ce0 | 327 | |
0c530ab8 A |
328 | /* |
329 | * Open a file with authorization, updating the contents of the structures | |
330 | * pointed to by ndp, fmodep, and vap as necessary to perform the requested | |
331 | * operation. This function is used for both opens of existing files, and | |
332 | * creation of new files. | |
333 | * | |
334 | * Parameters: ndp The nami data pointer describing the | |
335 | * file | |
336 | * fmodep A pointer to an int containg the mode | |
337 | * information to be used for the open | |
338 | * vap A pointer to the vnode attribute | |
339 | * descriptor to be used for the open | |
340 | * | |
341 | * Indirect: * Contents of the data structures pointed | |
342 | * to by the parameters are modified as | |
343 | * necessary to the requested operation. | |
344 | * | |
345 | * Returns: 0 Success | |
346 | * !0 errno value | |
347 | * | |
348 | * Notes: The kauth_filesec_t in 'vap', if any, is in host byte order. | |
349 | * | |
350 | * The contents of '*ndp' will be modified, based on the other | |
351 | * arguments to this function, and to return file and directory | |
352 | * data necessary to satisfy the requested operation. | |
353 | * | |
354 | * If the file does not exist and we are creating it, then the | |
355 | * O_TRUNC flag will be cleared in '*fmodep' to indicate to the | |
356 | * caller that the file was not truncated. | |
357 | * | |
358 | * If the file exists and the O_EXCL flag was not specified, then | |
359 | * the O_CREAT flag will be cleared in '*fmodep' to indicate to | |
360 | * the caller that the existing file was merely opened rather | |
361 | * than created. | |
362 | * | |
363 | * The contents of '*vap' will be modified as necessary to | |
364 | * complete the operation, including setting of supported | |
365 | * attribute, clearing of fields containing unsupported attributes | |
366 | * in the request, if the request proceeds without them, etc.. | |
367 | * | |
368 | * XXX: This function is too complicated in actings on its arguments | |
369 | * | |
370 | * XXX: We should enummerate the possible errno values here, and where | |
371 | * in the code they originated. | |
372 | */ | |
91447636 A |
373 | int |
374 | vn_open_auth(struct nameidata *ndp, int *fmodep, struct vnode_attr *vap) | |
375 | { | |
376 | struct vnode *vp; | |
377 | struct vnode *dvp; | |
378 | vfs_context_t ctx = ndp->ni_cnd.cn_context; | |
1c79356b | 379 | int error; |
91447636 | 380 | int fmode; |
b0d623f7 | 381 | uint32_t origcnflags; |
6d2010ae A |
382 | boolean_t did_create; |
383 | boolean_t did_open; | |
384 | boolean_t need_vnop_open; | |
385 | boolean_t batched; | |
386 | boolean_t ref_failed; | |
3e170ce0 | 387 | int nretries = 0; |
1c79356b | 388 | |
91447636 A |
389 | again: |
390 | vp = NULL; | |
391 | dvp = NULL; | |
6d2010ae A |
392 | batched = FALSE; |
393 | did_create = FALSE; | |
394 | need_vnop_open = TRUE; | |
395 | ref_failed = FALSE; | |
91447636 | 396 | fmode = *fmodep; |
b0d623f7 | 397 | origcnflags = ndp->ni_cnd.cn_flags; |
6d2010ae | 398 | |
39037602 | 399 | // If raw encrypted mode is requested, handle that here |
0a7de745 A |
400 | if (VATTR_IS_ACTIVE(vap, va_dataprotect_flags) |
401 | && ISSET(vap->va_dataprotect_flags, VA_DP_RAWENCRYPTED)) { | |
39037602 A |
402 | fmode |= FENCRYPTED; |
403 | } | |
404 | ||
f427ee49 A |
405 | if ((fmode & O_NOFOLLOW_ANY) && (fmode & (O_SYMLINK | O_NOFOLLOW))) { |
406 | error = EINVAL; | |
407 | goto out; | |
408 | } | |
409 | ||
6d2010ae A |
410 | /* |
411 | * O_CREAT | |
412 | */ | |
1c79356b | 413 | if (fmode & O_CREAT) { |
0a7de745 A |
414 | if ((fmode & O_DIRECTORY)) { |
415 | error = EINVAL; | |
0c530ab8 A |
416 | goto out; |
417 | } | |
1c79356b | 418 | ndp->ni_cnd.cn_nameiop = CREATE; |
6d2010ae A |
419 | #if CONFIG_TRIGGERS |
420 | ndp->ni_op = OP_LINK; | |
421 | #endif | |
b0d623f7 | 422 | /* Inherit USEDVP, vnode_open() supported flags only */ |
fe8ab488 | 423 | ndp->ni_cnd.cn_flags &= (USEDVP | NOCROSSMOUNT); |
2d21ac55 | 424 | ndp->ni_cnd.cn_flags |= LOCKPARENT | LOCKLEAF | AUDITVNPATH1; |
6d2010ae | 425 | ndp->ni_flag = NAMEI_COMPOUNDOPEN; |
2d21ac55 A |
426 | #if NAMEDRSRCFORK |
427 | /* open calls are allowed for resource forks. */ | |
428 | ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK; | |
429 | #endif | |
0a7de745 | 430 | if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0 && (origcnflags & FOLLOW) != 0) { |
1c79356b | 431 | ndp->ni_cnd.cn_flags |= FOLLOW; |
0a7de745 | 432 | } |
f427ee49 A |
433 | if (fmode & O_NOFOLLOW_ANY) { |
434 | /* will return ELOOP on the first symlink to be hit */ | |
435 | ndp->ni_flag |= NAMEI_NOFOLLOW_ANY; | |
436 | } | |
6d2010ae A |
437 | |
438 | continue_create_lookup: | |
0a7de745 | 439 | if ((error = namei(ndp))) { |
91447636 | 440 | goto out; |
0a7de745 | 441 | } |
6d2010ae | 442 | |
91447636 A |
443 | dvp = ndp->ni_dvp; |
444 | vp = ndp->ni_vp; | |
445 | ||
6d2010ae | 446 | batched = vnode_compound_open_available(dvp); |
2d21ac55 | 447 | |
6d2010ae A |
448 | /* not found, create */ |
449 | if (vp == NULL) { | |
450 | /* must have attributes for a new file */ | |
451 | if (vap == NULL) { | |
fe8ab488 | 452 | vnode_put(dvp); |
6d2010ae A |
453 | error = EINVAL; |
454 | goto out; | |
455 | } | |
456 | /* | |
457 | * Attempt a create. For a system supporting compound VNOPs, we may | |
458 | * find an existing file or create one; in either case, we will already | |
459 | * have the file open and no VNOP_OPEN() will be needed. | |
460 | */ | |
461 | error = vn_open_auth_do_create(ndp, vap, fmode, &did_create, &did_open, ctx); | |
91447636 | 462 | |
6d2010ae | 463 | dvp = ndp->ni_dvp; |
91447636 A |
464 | vp = ndp->ni_vp; |
465 | ||
0a7de745 | 466 | /* |
6d2010ae A |
467 | * Detected a node that the filesystem couldn't handle. Don't call |
468 | * nameidone() yet, because we need that path buffer. | |
469 | */ | |
470 | if (error == EKEEPLOOKING) { | |
471 | if (!batched) { | |
472 | panic("EKEEPLOOKING from a filesystem that doesn't support compound VNOPs?"); | |
91447636 | 473 | } |
6d2010ae | 474 | goto continue_create_lookup; |
91447636 | 475 | } |
b0d623f7 | 476 | |
6d2010ae | 477 | nameidone(ndp); |
b0d623f7 | 478 | if (dvp) { |
6d2010ae | 479 | panic("Shouldn't have a dvp here."); |
b0d623f7 | 480 | } |
91447636 A |
481 | |
482 | if (error) { | |
483 | /* | |
3e170ce0 | 484 | * Check for a create race. |
91447636 | 485 | */ |
0a7de745 A |
486 | if ((error == EEXIST) && !(fmode & O_EXCL)) { |
487 | if (vp) { | |
6d2010ae | 488 | vnode_put(vp); |
0a7de745 | 489 | } |
91447636 A |
490 | goto again; |
491 | } | |
492 | goto bad; | |
55e303ae | 493 | } |
6d2010ae A |
494 | |
495 | need_vnop_open = !did_open; | |
0a7de745 A |
496 | } else { |
497 | if (fmode & O_EXCL) { | |
6d2010ae | 498 | error = EEXIST; |
0a7de745 | 499 | } |
6d2010ae | 500 | |
0a7de745 A |
501 | /* |
502 | * We have a vnode. Use compound open if available | |
6d2010ae A |
503 | * or else fall through to "traditional" path. Note: can't |
504 | * do a compound open for root, because the parent belongs | |
505 | * to a different FS. | |
506 | */ | |
507 | if (error == 0 && batched && (vnode_mount(dvp) == vnode_mount(vp))) { | |
508 | error = VNOP_COMPOUND_OPEN(dvp, &ndp->ni_vp, ndp, 0, fmode, NULL, NULL, ctx); | |
509 | ||
510 | if (error == 0) { | |
511 | vp = ndp->ni_vp; | |
512 | need_vnop_open = FALSE; | |
513 | } else if (error == EKEEPLOOKING) { | |
514 | if ((ndp->ni_flag & NAMEI_CONTLOOKUP) == 0) { | |
515 | panic("EKEEPLOOKING, but continue flag not set?"); | |
516 | } | |
517 | goto continue_create_lookup; | |
0a7de745 | 518 | } |
6d2010ae | 519 | } |
91447636 | 520 | nameidone(ndp); |
91447636 | 521 | vnode_put(dvp); |
6d2010ae | 522 | ndp->ni_dvp = NULLVP; |
91447636 | 523 | |
6d2010ae | 524 | if (error) { |
1c79356b A |
525 | goto bad; |
526 | } | |
6d2010ae | 527 | |
1c79356b | 528 | fmode &= ~O_CREAT; |
6d2010ae A |
529 | |
530 | /* Fall through */ | |
1c79356b | 531 | } |
0a7de745 | 532 | } else { |
6d2010ae A |
533 | /* |
534 | * Not O_CREAT | |
535 | */ | |
1c79356b | 536 | ndp->ni_cnd.cn_nameiop = LOOKUP; |
b0d623f7 | 537 | /* Inherit USEDVP, vnode_open() supported flags only */ |
fe8ab488 | 538 | ndp->ni_cnd.cn_flags &= (USEDVP | NOCROSSMOUNT); |
6d2010ae | 539 | ndp->ni_cnd.cn_flags |= FOLLOW | LOCKLEAF | AUDITVNPATH1 | WANTPARENT; |
2d21ac55 A |
540 | #if NAMEDRSRCFORK |
541 | /* open calls are allowed for resource forks. */ | |
542 | ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK; | |
543 | #endif | |
0a7de745 | 544 | if (fmode & FENCRYPTED) { |
39037602 | 545 | ndp->ni_cnd.cn_flags |= CN_RAW_ENCRYPTED | CN_SKIPNAMECACHE; |
0a7de745 | 546 | } |
6d2010ae A |
547 | ndp->ni_flag = NAMEI_COMPOUNDOPEN; |
548 | ||
b0d623f7 A |
549 | /* preserve NOFOLLOW from vnode_open() */ |
550 | if (fmode & O_NOFOLLOW || fmode & O_SYMLINK || (origcnflags & FOLLOW) == 0) { | |
6d2010ae | 551 | ndp->ni_cnd.cn_flags &= ~FOLLOW; |
2d21ac55 | 552 | } |
f427ee49 A |
553 | if (fmode & O_NOFOLLOW_ANY) { |
554 | /* will return ELOOP on the first symlink to be hit */ | |
555 | ndp->ni_flag |= NAMEI_NOFOLLOW_ANY; | |
556 | } | |
2d21ac55 | 557 | |
6d2010ae A |
558 | /* Do a lookup, possibly going directly to filesystem for compound operation */ |
559 | do { | |
0a7de745 | 560 | if ((error = namei(ndp))) { |
6d2010ae | 561 | goto out; |
0a7de745 | 562 | } |
6d2010ae A |
563 | vp = ndp->ni_vp; |
564 | dvp = ndp->ni_dvp; | |
565 | ||
566 | /* Check for batched lookup-open */ | |
567 | batched = vnode_compound_open_available(dvp); | |
568 | if (batched && ((vp == NULLVP) || (vnode_mount(dvp) == vnode_mount(vp)))) { | |
569 | error = VNOP_COMPOUND_OPEN(dvp, &ndp->ni_vp, ndp, 0, fmode, NULL, NULL, ctx); | |
570 | vp = ndp->ni_vp; | |
571 | if (error == 0) { | |
572 | need_vnop_open = FALSE; | |
573 | } else if (error == EKEEPLOOKING) { | |
574 | if ((ndp->ni_flag & NAMEI_CONTLOOKUP) == 0) { | |
575 | panic("EKEEPLOOKING, but continue flag not set?"); | |
576 | } | |
577 | } | |
578 | } | |
579 | } while (error == EKEEPLOOKING); | |
580 | ||
91447636 | 581 | nameidone(ndp); |
6d2010ae A |
582 | vnode_put(dvp); |
583 | ndp->ni_dvp = NULLVP; | |
0c530ab8 | 584 | |
6d2010ae | 585 | if (error) { |
0c530ab8 A |
586 | goto bad; |
587 | } | |
1c79356b | 588 | } |
2d21ac55 | 589 | |
0a7de745 | 590 | /* |
6d2010ae A |
591 | * By this point, nameidone() is called, dvp iocount is dropped, |
592 | * and dvp pointer is cleared. | |
593 | */ | |
594 | if (ndp->ni_dvp != NULLVP) { | |
595 | panic("Haven't cleaned up adequately in vn_open_auth()"); | |
2d21ac55 | 596 | } |
9bccf70c | 597 | |
6d2010ae | 598 | /* |
0a7de745 | 599 | * Expect to use this code for filesystems without compound VNOPs, for the root |
6d2010ae A |
600 | * of a filesystem, which can't be "looked up" in the sense of VNOP_LOOKUP(), |
601 | * and for shadow files, which do not live on the same filesystems as their "parents." | |
602 | */ | |
603 | if (need_vnop_open) { | |
604 | if (batched && !vnode_isvroot(vp) && !vnode_isnamedstream(vp)) { | |
605 | panic("Why am I trying to use VNOP_OPEN() on anything other than the root or a named stream?"); | |
1c79356b | 606 | } |
91447636 | 607 | |
6d2010ae A |
608 | if (!did_create) { |
609 | error = vn_authorize_open_existing(vp, &ndp->ni_cnd, fmode, ctx, NULL); | |
610 | if (error) { | |
611 | goto bad; | |
2d21ac55 A |
612 | } |
613 | } | |
2d21ac55 | 614 | |
0a7de745 A |
615 | if (VATTR_IS_ACTIVE(vap, va_dataprotect_flags) |
616 | && ISSET(vap->va_dataprotect_flags, VA_DP_RAWUNENCRYPTED)) { | |
3e170ce0 A |
617 | /* Don't allow unencrypted io request from user space unless entitled */ |
618 | boolean_t entitled = FALSE; | |
619 | #if !SECURE_KERNEL | |
620 | entitled = IOTaskHasEntitlement(current_task(), "com.apple.private.security.file-unencrypt-access"); | |
621 | #endif | |
622 | if (!entitled) { | |
623 | error = EPERM; | |
316670eb A |
624 | goto bad; |
625 | } | |
3e170ce0 A |
626 | fmode |= FUNENCRYPTED; |
627 | } | |
628 | ||
6d2010ae A |
629 | error = VNOP_OPEN(vp, fmode, ctx); |
630 | if (error) { | |
631 | goto bad; | |
2d21ac55 | 632 | } |
6d2010ae A |
633 | need_vnop_open = FALSE; |
634 | } | |
2d21ac55 | 635 | |
6d2010ae A |
636 | // if the vnode is tagged VOPENEVT and the current process |
637 | // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY | |
638 | // flag to the open mode so that this open won't count against | |
639 | // the vnode when carbon delete() does a vnode_isinuse() to see | |
640 | // if a file is currently in use. this allows spotlight | |
641 | // importers to not interfere with carbon apps that depend on | |
642 | // the no-delete-if-busy semantics of carbon delete(). | |
643 | // | |
644 | if (!did_create && (vp->v_flag & VOPENEVT) && (current_proc()->p_flag & P_CHECKOPENEVT)) { | |
645 | fmode |= O_EVTONLY; | |
1c79356b | 646 | } |
0b4e3aa0 | 647 | |
6d2010ae A |
648 | /* |
649 | * Grab reference, etc. | |
650 | */ | |
651 | error = vn_open_auth_finish(vp, fmode, ctx); | |
652 | if (error) { | |
653 | ref_failed = TRUE; | |
0b4e3aa0 A |
654 | goto bad; |
655 | } | |
91447636 | 656 | |
6d2010ae | 657 | /* Compound VNOP open is responsible for doing the truncate */ |
0a7de745 | 658 | if (batched || did_create) { |
6d2010ae | 659 | fmode &= ~O_TRUNC; |
0a7de745 | 660 | } |
0b4e3aa0 | 661 | |
55e303ae | 662 | *fmodep = fmode; |
0a7de745 | 663 | return 0; |
6d2010ae | 664 | |
1c79356b | 665 | bad: |
6d2010ae A |
666 | /* Opened either explicitly or by a batched create */ |
667 | if (!need_vnop_open) { | |
668 | VNOP_CLOSE(vp, fmode, ctx); | |
669 | } | |
670 | ||
55e303ae | 671 | ndp->ni_vp = NULL; |
91447636 | 672 | if (vp) { |
c910b4d9 | 673 | #if NAMEDRSRCFORK |
b0d623f7 A |
674 | /* Aggressively recycle shadow files if we error'd out during open() */ |
675 | if ((vnode_isnamedstream(vp)) && | |
0a7de745 A |
676 | (vp->v_parent != NULLVP) && |
677 | (vnode_isshadow(vp))) { | |
678 | vnode_recycle(vp); | |
c910b4d9 A |
679 | } |
680 | #endif | |
681 | vnode_put(vp); | |
91447636 A |
682 | /* |
683 | * Check for a race against unlink. We had a vnode | |
684 | * but according to vnode_authorize or VNOP_OPEN it | |
685 | * no longer exists. | |
935ed37a A |
686 | * |
687 | * EREDRIVEOPEN: means that we were hit by the tty allocation race. | |
91447636 | 688 | */ |
6d2010ae | 689 | if (((error == ENOENT) && (*fmodep & O_CREAT)) || (error == EREDRIVEOPEN) || ref_failed) { |
3e170ce0 A |
690 | /* |
691 | * We'll retry here but it may be possible that we get | |
692 | * into a retry "spin" inside the kernel and not allow | |
693 | * threads, which need to run in order for the retry | |
694 | * loop to end, to run. An example is an open of a | |
695 | * terminal which is getting revoked and we spin here | |
696 | * without yielding becasue namei and VNOP_OPEN are | |
697 | * successful but vnode_ref fails. The revoke needs | |
698 | * threads with an iocount to run but if spin here we | |
699 | * may possibly be blcoking other threads from running. | |
700 | * | |
701 | * We start yielding the CPU after some number of | |
702 | * retries for increasing durations. Note that this is | |
703 | * still a loop without an exit condition. | |
704 | */ | |
705 | nretries += 1; | |
706 | if (nretries > RETRY_NO_YIELD_COUNT) { | |
707 | /* Every hz/100 secs is 10 msecs ... */ | |
708 | tsleep(&nretries, PVFS, "vn_open_auth_retry", | |
0a7de745 | 709 | MIN((nretries * (hz / 100)), hz)); |
3e170ce0 | 710 | } |
91447636 A |
711 | goto again; |
712 | } | |
713 | } | |
6d2010ae | 714 | |
91447636 | 715 | out: |
0a7de745 | 716 | return error; |
1c79356b A |
717 | } |
718 | ||
2d21ac55 | 719 | #if vn_access_DEPRECATED |
1c79356b | 720 | /* |
91447636 A |
721 | * Authorize an action against a vnode. This has been the canonical way to |
722 | * ensure that the credential/process/etc. referenced by a vfs_context | |
723 | * is granted the rights called out in 'mode' against the vnode 'vp'. | |
724 | * | |
725 | * Unfortunately, the use of VREAD/VWRITE/VEXEC makes it very difficult | |
726 | * to add support for more rights. As such, this interface will be deprecated | |
727 | * and callers will use vnode_authorize instead. | |
1c79356b | 728 | */ |
9bccf70c | 729 | int |
91447636 | 730 | vn_access(vnode_t vp, int mode, vfs_context_t context) |
1c79356b | 731 | { |
0a7de745 A |
732 | kauth_action_t action; |
733 | ||
734 | action = 0; | |
735 | if (mode & VREAD) { | |
736 | action |= KAUTH_VNODE_READ_DATA; | |
737 | } | |
738 | if (mode & VWRITE) { | |
91447636 | 739 | action |= KAUTH_VNODE_WRITE_DATA; |
0a7de745 A |
740 | } |
741 | if (mode & VEXEC) { | |
742 | action |= KAUTH_VNODE_EXECUTE; | |
743 | } | |
744 | ||
745 | return vnode_authorize(vp, NULL, action, context); | |
1c79356b | 746 | } |
0a7de745 | 747 | #endif /* vn_access_DEPRECATED */ |
1c79356b A |
748 | |
749 | /* | |
750 | * Vnode close call | |
751 | */ | |
9bccf70c | 752 | int |
2d21ac55 | 753 | vn_close(struct vnode *vp, int flags, vfs_context_t ctx) |
1c79356b A |
754 | { |
755 | int error; | |
39236c6e | 756 | int flusherror = 0; |
1c79356b | 757 | |
2d21ac55 | 758 | #if NAMEDRSRCFORK |
cf7d32b8 | 759 | /* Sync data from resource fork shadow file if needed. */ |
0a7de745 | 760 | if ((vp->v_flag & VISNAMEDSTREAM) && |
2d21ac55 | 761 | (vp->v_parent != NULLVP) && |
b0d623f7 | 762 | vnode_isshadow(vp)) { |
2d21ac55 | 763 | if (flags & FWASWRITTEN) { |
39236c6e | 764 | flusherror = vnode_flushnamedstream(vp->v_parent, vp, ctx); |
2d21ac55 | 765 | } |
2d21ac55 A |
766 | } |
767 | #endif | |
cb323159 A |
768 | /* |
769 | * If vnode @vp belongs to a chardev or a blkdev then it is handled | |
770 | * specially. We first drop its user reference count @vp->v_usecount | |
771 | * before calling VNOP_CLOSE(). This was done historically to ensure | |
772 | * that the last close of a special device vnode performed some | |
773 | * conditional cleanups. Now we still need to drop this reference here | |
774 | * to ensure that devfsspec_close() can check if the vnode is still in | |
775 | * use. | |
776 | */ | |
0a7de745 | 777 | if (vnode_isspec(vp)) { |
593a1d5f | 778 | (void)vnode_rele_ext(vp, flags, 0); |
0a7de745 | 779 | } |
593a1d5f | 780 | |
fe8ab488 A |
781 | /* |
782 | * On HFS, we flush when the last writer closes. We do this | |
783 | * because resource fork vnodes hold a reference on data fork | |
784 | * vnodes and that will prevent them from getting VNOP_INACTIVE | |
785 | * which will delay when we flush cached data. In future, we | |
786 | * might find it beneficial to do this for all file systems. | |
787 | * Note that it's OK to access v_writecount without the lock | |
788 | * in this context. | |
789 | */ | |
0a7de745 | 790 | if (vp->v_tag == VT_HFS && (flags & FWRITE) && vp->v_writecount == 1) { |
fe8ab488 | 791 | VNOP_FSYNC(vp, MNT_NOWAIT, ctx); |
0a7de745 | 792 | } |
fe8ab488 | 793 | |
2d21ac55 | 794 | error = VNOP_CLOSE(vp, flags, ctx); |
91447636 | 795 | |
6d2010ae A |
796 | #if CONFIG_FSE |
797 | if (flags & FWASWRITTEN) { | |
0a7de745 A |
798 | if (need_fsevent(FSE_CONTENT_MODIFIED, vp)) { |
799 | add_fsevent(FSE_CONTENT_MODIFIED, ctx, | |
800 | FSE_ARG_VNODE, vp, | |
801 | FSE_ARG_DONE); | |
6d2010ae A |
802 | } |
803 | } | |
804 | #endif | |
805 | ||
0a7de745 | 806 | if (!vnode_isspec(vp)) { |
593a1d5f | 807 | (void)vnode_rele_ext(vp, flags, 0); |
0a7de745 A |
808 | } |
809 | ||
39236c6e A |
810 | if (flusherror) { |
811 | error = flusherror; | |
812 | } | |
0a7de745 | 813 | return error; |
1c79356b A |
814 | } |
815 | ||
91447636 A |
816 | static int |
817 | vn_read_swapfile( | |
0a7de745 A |
818 | struct vnode *vp, |
819 | uio_t uio) | |
91447636 | 820 | { |
0a7de745 A |
821 | int error; |
822 | off_t swap_count, this_count; | |
823 | off_t file_end, read_end; | |
824 | off_t prev_resid; | |
825 | char *my_swap_page; | |
91447636 A |
826 | |
827 | /* | |
b0d623f7 | 828 | * Reading from a swap file will get you zeroes. |
91447636 | 829 | */ |
b0d623f7 A |
830 | |
831 | my_swap_page = NULL; | |
91447636 A |
832 | error = 0; |
833 | swap_count = uio_resid(uio); | |
834 | ||
835 | file_end = ubc_getsize(vp); | |
836 | read_end = uio->uio_offset + uio_resid(uio); | |
837 | if (uio->uio_offset >= file_end) { | |
838 | /* uio starts after end of file: nothing to read */ | |
839 | swap_count = 0; | |
840 | } else if (read_end > file_end) { | |
841 | /* uio extends beyond end of file: stop before that */ | |
842 | swap_count -= (read_end - file_end); | |
843 | } | |
844 | ||
845 | while (swap_count > 0) { | |
b0d623f7 | 846 | if (my_swap_page == NULL) { |
f427ee49 | 847 | my_swap_page = kheap_alloc(KHEAP_TEMP, PAGE_SIZE, Z_WAITOK | Z_ZERO); |
b0d623f7 | 848 | /* add an end-of-line to keep line counters happy */ |
0a7de745 | 849 | my_swap_page[PAGE_SIZE - 1] = '\n'; |
91447636 | 850 | } |
91447636 A |
851 | this_count = swap_count; |
852 | if (this_count > PAGE_SIZE) { | |
853 | this_count = PAGE_SIZE; | |
854 | } | |
855 | ||
856 | prev_resid = uio_resid(uio); | |
b0d623f7 | 857 | error = uiomove((caddr_t) my_swap_page, |
f427ee49 | 858 | (int)this_count, |
0a7de745 | 859 | uio); |
91447636 A |
860 | if (error) { |
861 | break; | |
862 | } | |
863 | swap_count -= (prev_resid - uio_resid(uio)); | |
864 | } | |
f427ee49 | 865 | kheap_free(KHEAP_TEMP, my_swap_page, PAGE_SIZE); |
91447636 A |
866 | |
867 | return error; | |
868 | } | |
1c79356b A |
869 | /* |
870 | * Package up an I/O request on a vnode into a uio and do it. | |
871 | */ | |
9bccf70c | 872 | int |
91447636 A |
873 | vn_rdwr( |
874 | enum uio_rw rw, | |
875 | struct vnode *vp, | |
876 | caddr_t base, | |
877 | int len, | |
878 | off_t offset, | |
879 | enum uio_seg segflg, | |
880 | int ioflg, | |
881 | kauth_cred_t cred, | |
882 | int *aresid, | |
2d21ac55 | 883 | proc_t p) |
1c79356b | 884 | { |
b0d623f7 A |
885 | int64_t resid; |
886 | int result; | |
0a7de745 | 887 | |
f427ee49 A |
888 | if (len < 0) { |
889 | return EINVAL; | |
890 | } | |
891 | ||
b0d623f7 | 892 | result = vn_rdwr_64(rw, |
0a7de745 A |
893 | vp, |
894 | (uint64_t)(uintptr_t)base, | |
895 | (int64_t)len, | |
896 | offset, | |
897 | segflg, | |
898 | ioflg, | |
899 | cred, | |
900 | &resid, | |
901 | p); | |
b0d623f7 A |
902 | |
903 | /* "resid" should be bounded above by "len," which is an int */ | |
904 | if (aresid != NULL) { | |
f427ee49 | 905 | *aresid = (int)resid; |
b0d623f7 A |
906 | } |
907 | ||
908 | return result; | |
91447636 A |
909 | } |
910 | ||
911 | ||
912 | int | |
913 | vn_rdwr_64( | |
914 | enum uio_rw rw, | |
915 | struct vnode *vp, | |
916 | uint64_t base, | |
917 | int64_t len, | |
918 | off_t offset, | |
919 | enum uio_seg segflg, | |
920 | int ioflg, | |
921 | kauth_cred_t cred, | |
b0d623f7 | 922 | int64_t *aresid, |
2d21ac55 | 923 | proc_t p) |
91447636 A |
924 | { |
925 | uio_t auio; | |
926 | int spacetype; | |
927 | struct vfs_context context; | |
0a7de745 A |
928 | int error = 0; |
929 | char uio_buf[UIO_SIZEOF(1)]; | |
91447636 | 930 | |
2d21ac55 | 931 | context.vc_thread = current_thread(); |
91447636 | 932 | context.vc_ucred = cred; |
1c79356b | 933 | |
91447636 A |
934 | if (UIO_SEG_IS_USER_SPACE(segflg)) { |
935 | spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32; | |
0a7de745 | 936 | } else { |
91447636 A |
937 | spacetype = UIO_SYSSPACE; |
938 | } | |
f427ee49 A |
939 | |
940 | if (len < 0) { | |
941 | return EINVAL; | |
942 | } | |
943 | ||
0a7de745 A |
944 | auio = uio_createwithbuffer(1, offset, spacetype, rw, |
945 | &uio_buf[0], sizeof(uio_buf)); | |
f427ee49 | 946 | uio_addiov(auio, CAST_USER_ADDR_T(base), (user_size_t)len); |
91447636 | 947 | |
2d21ac55 A |
948 | #if CONFIG_MACF |
949 | /* XXXMAC | |
0a7de745 A |
950 | * IO_NOAUTH should be re-examined. |
951 | * Likely that mediation should be performed in caller. | |
2d21ac55 A |
952 | */ |
953 | if ((ioflg & IO_NOAUTH) == 0) { | |
0a7de745 A |
954 | /* passed cred is fp->f_cred */ |
955 | if (rw == UIO_READ) { | |
2d21ac55 | 956 | error = mac_vnode_check_read(&context, cred, vp); |
0a7de745 | 957 | } else { |
2d21ac55 | 958 | error = mac_vnode_check_write(&context, cred, vp); |
0a7de745 | 959 | } |
2d21ac55 A |
960 | } |
961 | #endif | |
962 | ||
963 | if (error == 0) { | |
964 | if (rw == UIO_READ) { | |
39236c6e | 965 | if (vnode_isswap(vp) && ((ioflg & IO_SWAP_DISPATCH) == 0)) { |
2d21ac55 A |
966 | error = vn_read_swapfile(vp, auio); |
967 | } else { | |
968 | error = VNOP_READ(vp, auio, ioflg, &context); | |
969 | } | |
91447636 | 970 | } else { |
2d21ac55 | 971 | error = VNOP_WRITE(vp, auio, ioflg, &context); |
91447636 | 972 | } |
91447636 | 973 | } |
1c79356b | 974 | |
0a7de745 | 975 | if (aresid) { |
91447636 | 976 | *aresid = uio_resid(auio); |
f427ee49 | 977 | assert(*aresid <= len); |
0a7de745 A |
978 | } else if (uio_resid(auio) && error == 0) { |
979 | error = EIO; | |
980 | } | |
981 | return error; | |
1c79356b A |
982 | } |
983 | ||
fe8ab488 A |
984 | static inline void |
985 | vn_offset_lock(struct fileglob *fg) | |
986 | { | |
987 | lck_mtx_lock_spin(&fg->fg_lock); | |
988 | while (fg->fg_lflags & FG_OFF_LOCKED) { | |
989 | fg->fg_lflags |= FG_OFF_LOCKWANT; | |
990 | msleep(&fg->fg_lflags, &fg->fg_lock, PVFS | PSPIN, | |
991 | "fg_offset_lock_wait", 0); | |
992 | } | |
993 | fg->fg_lflags |= FG_OFF_LOCKED; | |
994 | lck_mtx_unlock(&fg->fg_lock); | |
995 | } | |
996 | ||
997 | static inline void | |
998 | vn_offset_unlock(struct fileglob *fg) | |
999 | { | |
1000 | int lock_wanted = 0; | |
1001 | ||
1002 | lck_mtx_lock_spin(&fg->fg_lock); | |
1003 | if (fg->fg_lflags & FG_OFF_LOCKWANT) { | |
1004 | lock_wanted = 1; | |
1005 | } | |
1006 | fg->fg_lflags &= ~(FG_OFF_LOCKED | FG_OFF_LOCKWANT); | |
1007 | lck_mtx_unlock(&fg->fg_lock); | |
1008 | if (lock_wanted) { | |
1009 | wakeup(&fg->fg_lflags); | |
1010 | } | |
1011 | } | |
1012 | ||
1c79356b A |
1013 | /* |
1014 | * File table vnode read routine. | |
1015 | */ | |
9bccf70c | 1016 | static int |
2d21ac55 | 1017 | vn_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx) |
1c79356b | 1018 | { |
9bccf70c | 1019 | struct vnode *vp; |
39236c6e A |
1020 | int error; |
1021 | int ioflag; | |
f427ee49 A |
1022 | off_t read_offset; |
1023 | user_ssize_t read_len; | |
1024 | user_ssize_t adjusted_read_len; | |
1025 | user_ssize_t clippedsize; | |
1026 | bool offset_locked; | |
9bccf70c | 1027 | |
f427ee49 A |
1028 | read_len = uio_resid(uio); |
1029 | if (read_len < 0 || read_len > INT_MAX) { | |
1030 | return EINVAL; | |
1031 | } | |
1032 | adjusted_read_len = read_len; | |
1033 | clippedsize = 0; | |
1034 | offset_locked = false; | |
1035 | ||
1036 | vp = (struct vnode *)fp->fp_glob->fg_data; | |
0a7de745 A |
1037 | if ((error = vnode_getwithref(vp))) { |
1038 | return error; | |
91447636 | 1039 | } |
2d21ac55 A |
1040 | |
1041 | #if CONFIG_MACF | |
1042 | error = mac_vnode_check_read(ctx, vfs_context_ucred(ctx), vp); | |
1043 | if (error) { | |
1044 | (void)vnode_put(vp); | |
0a7de745 | 1045 | return error; |
2d21ac55 A |
1046 | } |
1047 | #endif | |
1048 | ||
316670eb A |
1049 | /* This signals to VNOP handlers that this read came from a file table read */ |
1050 | ioflag = IO_SYSCALL_DISPATCH; | |
1051 | ||
f427ee49 | 1052 | if (fp->fp_glob->fg_flag & FNONBLOCK) { |
9bccf70c | 1053 | ioflag |= IO_NDELAY; |
0a7de745 | 1054 | } |
f427ee49 | 1055 | if ((fp->fp_glob->fg_flag & FNOCACHE) || vnode_isnocache(vp)) { |
0a7de745 A |
1056 | ioflag |= IO_NOCACHE; |
1057 | } | |
f427ee49 | 1058 | if (fp->fp_glob->fg_flag & FENCRYPTED) { |
316670eb A |
1059 | ioflag |= IO_ENCRYPTED; |
1060 | } | |
f427ee49 | 1061 | if (fp->fp_glob->fg_flag & FUNENCRYPTED) { |
3e170ce0 A |
1062 | ioflag |= IO_SKIP_ENCRYPTION; |
1063 | } | |
f427ee49 | 1064 | if (fp->fp_glob->fg_flag & O_EVTONLY) { |
3e170ce0 A |
1065 | ioflag |= IO_EVTONLY; |
1066 | } | |
f427ee49 | 1067 | if (fp->fp_glob->fg_flag & FNORDAHEAD) { |
0a7de745 A |
1068 | ioflag |= IO_RAOFF; |
1069 | } | |
91447636 | 1070 | |
fe8ab488 A |
1071 | if ((flags & FOF_OFFSET) == 0) { |
1072 | if ((vnode_vtype(vp) == VREG) && !vnode_isswap(vp)) { | |
f427ee49 A |
1073 | vn_offset_lock(fp->fp_glob); |
1074 | offset_locked = true; | |
1075 | } | |
1076 | read_offset = fp->fp_glob->fg_offset; | |
1077 | uio_setoffset(uio, read_offset); | |
1078 | } else { | |
1079 | read_offset = uio_offset(uio); | |
1080 | /* POSIX allows negative offsets for character devices. */ | |
1081 | if ((read_offset < 0) && (vnode_vtype(vp) != VCHR)) { | |
1082 | error = EINVAL; | |
1083 | goto error_out; | |
fe8ab488 | 1084 | } |
fe8ab488 | 1085 | } |
f427ee49 A |
1086 | |
1087 | if (read_offset == INT64_MAX) { | |
1088 | /* can't read any more */ | |
1089 | error = 0; | |
1090 | goto error_out; | |
1091 | } | |
1092 | ||
1093 | /* | |
1094 | * If offset + len will cause overflow, reduce the len to a value | |
1095 | * (adjusted_read_len) where it won't | |
1096 | */ | |
1097 | if ((read_offset >= 0) && (INT64_MAX - read_offset) < read_len) { | |
1098 | /* | |
1099 | * 0 read_offset INT64_MAX | |
1100 | * |-----------------------------------------------|----------|~~~ | |
1101 | * <--read_len--> | |
1102 | * <-adjusted-> | |
1103 | */ | |
1104 | adjusted_read_len = (user_ssize_t)(INT64_MAX - read_offset); | |
1105 | } | |
1106 | ||
1107 | if (adjusted_read_len < read_len) { | |
1108 | uio_setresid(uio, adjusted_read_len); | |
1109 | clippedsize = read_len - adjusted_read_len; | |
1110 | } | |
91447636 | 1111 | |
3e170ce0 | 1112 | if (vnode_isswap(vp) && !(IO_SKIP_ENCRYPTION & ioflag)) { |
91447636 A |
1113 | /* special case for swap files */ |
1114 | error = vn_read_swapfile(vp, uio); | |
1115 | } else { | |
2d21ac55 | 1116 | error = VNOP_READ(vp, uio, ioflag, ctx); |
9bccf70c | 1117 | } |
00867663 | 1118 | |
f427ee49 A |
1119 | if (clippedsize) { |
1120 | uio_setresid(uio, (uio_resid(uio) + clippedsize)); | |
1121 | } | |
1122 | ||
fe8ab488 | 1123 | if ((flags & FOF_OFFSET) == 0) { |
f427ee49 A |
1124 | fp->fp_glob->fg_offset += read_len - uio_resid(uio); |
1125 | } | |
1126 | ||
1127 | error_out: | |
1128 | if (offset_locked) { | |
1129 | vn_offset_unlock(fp->fp_glob); | |
1130 | offset_locked = false; | |
fe8ab488 | 1131 | } |
91447636 A |
1132 | |
1133 | (void)vnode_put(vp); | |
0a7de745 | 1134 | return error; |
1c79356b A |
1135 | } |
1136 | ||
1137 | ||
1138 | /* | |
1139 | * File table vnode write routine. | |
1140 | */ | |
9bccf70c | 1141 | static int |
2d21ac55 | 1142 | vn_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx) |
1c79356b | 1143 | { |
9bccf70c A |
1144 | struct vnode *vp; |
1145 | int error, ioflag; | |
f427ee49 A |
1146 | off_t write_offset; |
1147 | off_t write_end_offset; | |
1148 | user_ssize_t write_len; | |
1149 | user_ssize_t adjusted_write_len; | |
1150 | user_ssize_t clippedsize; | |
1151 | bool offset_locked; | |
2d21ac55 | 1152 | proc_t p = vfs_context_proc(ctx); |
f427ee49 | 1153 | rlim_t rlim_cur_fsize = p ? proc_limitgetcur(p, RLIMIT_FSIZE, TRUE) : 0; |
91447636 | 1154 | |
f427ee49 A |
1155 | write_len = uio_resid(uio); |
1156 | if (write_len < 0 || write_len > INT_MAX) { | |
1157 | return EINVAL; | |
1158 | } | |
1159 | adjusted_write_len = write_len; | |
1160 | clippedsize = 0; | |
1161 | offset_locked = false; | |
1162 | ||
1163 | vp = (struct vnode *)fp->fp_glob->fg_data; | |
0a7de745 A |
1164 | if ((error = vnode_getwithref(vp))) { |
1165 | return error; | |
91447636 | 1166 | } |
2d21ac55 A |
1167 | |
1168 | #if CONFIG_MACF | |
1169 | error = mac_vnode_check_write(ctx, vfs_context_ucred(ctx), vp); | |
1170 | if (error) { | |
1171 | (void)vnode_put(vp); | |
0a7de745 | 1172 | return error; |
2d21ac55 A |
1173 | } |
1174 | #endif | |
1175 | ||
39236c6e A |
1176 | /* |
1177 | * IO_SYSCALL_DISPATCH signals to VNOP handlers that this write came from | |
1178 | * a file table write | |
316670eb A |
1179 | */ |
1180 | ioflag = (IO_UNIT | IO_SYSCALL_DISPATCH); | |
1181 | ||
f427ee49 | 1182 | if (vp->v_type == VREG && (fp->fp_glob->fg_flag & O_APPEND)) { |
1c79356b | 1183 | ioflag |= IO_APPEND; |
0a7de745 | 1184 | } |
f427ee49 | 1185 | if (fp->fp_glob->fg_flag & FNONBLOCK) { |
1c79356b | 1186 | ioflag |= IO_NDELAY; |
0a7de745 | 1187 | } |
f427ee49 | 1188 | if ((fp->fp_glob->fg_flag & FNOCACHE) || vnode_isnocache(vp)) { |
0a7de745 A |
1189 | ioflag |= IO_NOCACHE; |
1190 | } | |
f427ee49 | 1191 | if (fp->fp_glob->fg_flag & FNODIRECT) { |
6d2010ae | 1192 | ioflag |= IO_NODIRECT; |
0a7de745 | 1193 | } |
f427ee49 | 1194 | if (fp->fp_glob->fg_flag & FSINGLE_WRITER) { |
316670eb | 1195 | ioflag |= IO_SINGLE_WRITER; |
0a7de745 | 1196 | } |
f427ee49 | 1197 | if (fp->fp_glob->fg_flag & O_EVTONLY) { |
3e170ce0 | 1198 | ioflag |= IO_EVTONLY; |
0a7de745 | 1199 | } |
6d2010ae | 1200 | |
b0d623f7 A |
1201 | /* |
1202 | * Treat synchronous mounts and O_FSYNC on the fd as equivalent. | |
1203 | * | |
1204 | * XXX We treat O_DSYNC as O_FSYNC for now, since we can not delay | |
1205 | * XXX the non-essential metadata without some additional VFS work; | |
1206 | * XXX the intent at this point is to plumb the interface for it. | |
1207 | */ | |
f427ee49 | 1208 | if ((fp->fp_glob->fg_flag & (O_FSYNC | O_DSYNC)) || |
0a7de745 | 1209 | (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS))) { |
1c79356b | 1210 | ioflag |= IO_SYNC; |
b0d623f7 | 1211 | } |
91447636 | 1212 | |
9bccf70c | 1213 | if ((flags & FOF_OFFSET) == 0) { |
fe8ab488 | 1214 | if ((vnode_vtype(vp) == VREG) && !vnode_isswap(vp)) { |
f427ee49 A |
1215 | vn_offset_lock(fp->fp_glob); |
1216 | offset_locked = true; | |
fe8ab488 | 1217 | } |
f427ee49 A |
1218 | write_offset = fp->fp_glob->fg_offset; |
1219 | uio_setoffset(uio, write_offset); | |
1220 | } else { | |
1221 | /* for pwrite, append should be ignored */ | |
1222 | ioflag &= ~IO_APPEND; | |
1223 | write_offset = uio_offset(uio); | |
1224 | /* POSIX allows negative offsets for character devices. */ | |
1225 | if ((write_offset < 0) && (vnode_vtype(vp) != VCHR)) { | |
1226 | error = EINVAL; | |
1227 | goto error_out; | |
1228 | } | |
1229 | } | |
1230 | ||
1231 | if (write_offset == INT64_MAX) { | |
1232 | /* writes are not possible */ | |
1233 | error = EFBIG; | |
1234 | goto error_out; | |
1235 | } | |
1236 | ||
1237 | /* | |
1238 | * write_len is the original write length that was requested. | |
1239 | * We may however need to reduce that becasue of two reasons | |
1240 | * | |
1241 | * 1) If write_offset + write_len will exceed OFF_T_MAX (i.e. INT64_MAX) | |
1242 | * and/or | |
1243 | * 2) If write_offset + write_len will exceed the administrative | |
1244 | * limit for the maximum file size. | |
1245 | * | |
1246 | * In both cases the write will be denied if we can't write even a single | |
1247 | * byte otherwise it will be "clipped" (i.e. a short write). | |
1248 | */ | |
1249 | ||
1250 | /* | |
1251 | * If offset + len will cause overflow, reduce the len | |
1252 | * to a value (adjusted_write_len) where it won't | |
1253 | */ | |
1254 | if ((write_offset >= 0) && (INT64_MAX - write_offset) < write_len) { | |
1255 | /* | |
1256 | * 0 write_offset INT64_MAX | |
1257 | * |-----------------------------------------------|----------|~~~ | |
1258 | * <--write_len--> | |
1259 | * <-adjusted-> | |
1260 | */ | |
1261 | adjusted_write_len = (user_ssize_t)(INT64_MAX - write_offset); | |
9bccf70c | 1262 | } |
f427ee49 A |
1263 | |
1264 | /* write_end_offset will always be [0, INT64_MAX] */ | |
1265 | write_end_offset = write_offset + adjusted_write_len; | |
1266 | ||
1267 | if (p && (vp->v_type == VREG) && | |
1268 | (rlim_cur_fsize != RLIM_INFINITY) && | |
1269 | (rlim_cur_fsize <= INT64_MAX) && | |
1270 | (write_end_offset > (off_t)rlim_cur_fsize)) { | |
0a7de745 | 1271 | /* |
2d21ac55 A |
1272 | * If the requested residual would cause us to go past the |
1273 | * administrative limit, then we need to adjust the residual | |
1274 | * down to cause fewer bytes than requested to be written. If | |
1275 | * we can't do that (e.g. the residual is already 1 byte), | |
1276 | * then we fail the write with EFBIG. | |
1277 | */ | |
f427ee49 A |
1278 | if (write_offset >= (off_t)rlim_cur_fsize) { |
1279 | /* | |
1280 | * 0 rlim_fsize write_offset write_end INT64_MAX | |
1281 | * |------------------------|----------|-------------|--------| | |
1282 | * <--write_len--> | |
1283 | * | |
1284 | * write not permitted | |
1285 | */ | |
2d21ac55 | 1286 | psignal(p, SIGXFSZ); |
fe8ab488 A |
1287 | error = EFBIG; |
1288 | goto error_out; | |
2d21ac55 | 1289 | } |
f427ee49 A |
1290 | |
1291 | /* | |
1292 | * 0 write_offset rlim_fsize write_end INT64_MAX | |
1293 | * |------------------------|-----------|---------|------------| | |
1294 | * <------write_len------> | |
1295 | * <-adjusted--> | |
1296 | */ | |
1297 | adjusted_write_len = (user_ssize_t)((off_t)rlim_cur_fsize - write_offset); | |
1298 | assert((adjusted_write_len > 0) && (adjusted_write_len < write_len)); | |
2d21ac55 | 1299 | } |
f427ee49 A |
1300 | |
1301 | if (adjusted_write_len < write_len) { | |
1302 | uio_setresid(uio, adjusted_write_len); | |
1303 | clippedsize = write_len - adjusted_write_len; | |
2d21ac55 A |
1304 | } |
1305 | ||
1306 | error = VNOP_WRITE(vp, uio, ioflag, ctx); | |
91447636 | 1307 | |
f427ee49 A |
1308 | /* |
1309 | * If we had to reduce the size of write requested either because | |
1310 | * of rlimit or because it would have exceeded | |
1311 | * maximum file size, we have to add that back to the residual so | |
1312 | * it correctly reflects what we did in this function. | |
1313 | */ | |
1314 | if (clippedsize) { | |
1315 | uio_setresid(uio, (uio_resid(uio) + clippedsize)); | |
2d21ac55 | 1316 | } |
1c79356b | 1317 | |
9bccf70c | 1318 | if ((flags & FOF_OFFSET) == 0) { |
0a7de745 | 1319 | if (ioflag & IO_APPEND) { |
f427ee49 | 1320 | fp->fp_glob->fg_offset = uio_offset(uio); |
0a7de745 | 1321 | } else { |
f427ee49 | 1322 | fp->fp_glob->fg_offset += (write_len - uio_resid(uio)); |
0a7de745 | 1323 | } |
fe8ab488 | 1324 | if (offset_locked) { |
f427ee49 A |
1325 | vn_offset_unlock(fp->fp_glob); |
1326 | offset_locked = false; | |
fe8ab488 | 1327 | } |
9bccf70c A |
1328 | } |
1329 | ||
1c79356b A |
1330 | /* |
1331 | * Set the credentials on successful writes | |
1332 | */ | |
1333 | if ((error == 0) && (vp->v_tag == VT_NFS) && (UBCINFOEXISTS(vp))) { | |
0a7de745 | 1334 | /* |
13fec989 A |
1335 | * When called from aio subsystem, we only have the proc from |
1336 | * which to get the credential, at this point, so use that | |
1337 | * instead. This means aio functions are incompatible with | |
1338 | * per-thread credentials (aio operations are proxied). We | |
1339 | * can't easily correct the aio vs. settid race in this case | |
1340 | * anyway, so we disallow it. | |
1341 | */ | |
1342 | if ((flags & FOF_PCRED) == 0) { | |
1343 | ubc_setthreadcred(vp, p, current_thread()); | |
1344 | } else { | |
1345 | ubc_setcred(vp, p); | |
1346 | } | |
1c79356b | 1347 | } |
91447636 | 1348 | (void)vnode_put(vp); |
0a7de745 | 1349 | return error; |
fe8ab488 A |
1350 | |
1351 | error_out: | |
1352 | if (offset_locked) { | |
f427ee49 | 1353 | vn_offset_unlock(fp->fp_glob); |
fe8ab488 A |
1354 | } |
1355 | (void)vnode_put(vp); | |
0a7de745 | 1356 | return error; |
1c79356b A |
1357 | } |
1358 | ||
1359 | /* | |
1360 | * File table vnode stat routine. | |
2d21ac55 A |
1361 | * |
1362 | * Returns: 0 Success | |
1363 | * EBADF | |
1364 | * ENOMEM | |
1365 | * vnode_getattr:??? | |
1c79356b | 1366 | */ |
9bccf70c | 1367 | int |
743345f9 | 1368 | vn_stat_noauth(struct vnode *vp, void *sbptr, kauth_filesec_t *xsec, int isstat64, |
cb323159 | 1369 | int needsrealdev, vfs_context_t ctx, struct ucred *file_cred) |
1c79356b | 1370 | { |
91447636 | 1371 | struct vnode_attr va; |
1c79356b A |
1372 | int error; |
1373 | u_short mode; | |
91447636 | 1374 | kauth_filesec_t fsec; |
0a7de745 | 1375 | struct stat *sb = (struct stat *)0; /* warning avoidance ; protected by isstat64 */ |
2d21ac55 A |
1376 | struct stat64 * sb64 = (struct stat64 *)0; /* warning avoidance ; protected by isstat64 */ |
1377 | ||
0a7de745 | 1378 | if (isstat64 != 0) { |
2d21ac55 | 1379 | sb64 = (struct stat64 *)sbptr; |
0a7de745 | 1380 | } else { |
2d21ac55 | 1381 | sb = (struct stat *)sbptr; |
0a7de745 | 1382 | } |
b0d623f7 | 1383 | memset(&va, 0, sizeof(va)); |
91447636 A |
1384 | VATTR_INIT(&va); |
1385 | VATTR_WANTED(&va, va_fsid); | |
1386 | VATTR_WANTED(&va, va_fileid); | |
1387 | VATTR_WANTED(&va, va_mode); | |
1388 | VATTR_WANTED(&va, va_type); | |
1389 | VATTR_WANTED(&va, va_nlink); | |
1390 | VATTR_WANTED(&va, va_uid); | |
1391 | VATTR_WANTED(&va, va_gid); | |
1392 | VATTR_WANTED(&va, va_rdev); | |
1393 | VATTR_WANTED(&va, va_data_size); | |
1394 | VATTR_WANTED(&va, va_access_time); | |
1395 | VATTR_WANTED(&va, va_modify_time); | |
1396 | VATTR_WANTED(&va, va_change_time); | |
2d21ac55 | 1397 | VATTR_WANTED(&va, va_create_time); |
91447636 A |
1398 | VATTR_WANTED(&va, va_flags); |
1399 | VATTR_WANTED(&va, va_gen); | |
1400 | VATTR_WANTED(&va, va_iosize); | |
1401 | /* lower layers will synthesise va_total_alloc from va_data_size if required */ | |
1402 | VATTR_WANTED(&va, va_total_alloc); | |
1403 | if (xsec != NULL) { | |
1404 | VATTR_WANTED(&va, va_uuuid); | |
1405 | VATTR_WANTED(&va, va_guuid); | |
1406 | VATTR_WANTED(&va, va_acl); | |
1407 | } | |
cb323159 A |
1408 | if (needsrealdev) { |
1409 | va.va_vaflags = VA_REALFSID; | |
1410 | } | |
91447636 | 1411 | error = vnode_getattr(vp, &va, ctx); |
0a7de745 | 1412 | if (error) { |
91447636 | 1413 | goto out; |
0a7de745 | 1414 | } |
743345f9 A |
1415 | #if CONFIG_MACF |
1416 | /* | |
1417 | * Give MAC polices a chance to reject or filter the attributes | |
1418 | * returned by the filesystem. Note that MAC policies are consulted | |
1419 | * *after* calling the filesystem because filesystems can return more | |
1420 | * attributes than were requested so policies wouldn't be authoritative | |
1421 | * is consulted beforehand. This also gives policies an opportunity | |
1422 | * to change the values of attributes retrieved. | |
1423 | */ | |
1424 | error = mac_vnode_check_getattr(ctx, file_cred, vp, &va); | |
0a7de745 | 1425 | if (error) { |
743345f9 | 1426 | goto out; |
0a7de745 | 1427 | } |
743345f9 | 1428 | #endif |
1c79356b A |
1429 | /* |
1430 | * Copy from vattr table | |
1431 | */ | |
2d21ac55 A |
1432 | if (isstat64 != 0) { |
1433 | sb64->st_dev = va.va_fsid; | |
1434 | sb64->st_ino = (ino64_t)va.va_fileid; | |
2d21ac55 A |
1435 | } else { |
1436 | sb->st_dev = va.va_fsid; | |
1437 | sb->st_ino = (ino_t)va.va_fileid; | |
1438 | } | |
91447636 | 1439 | mode = va.va_mode; |
1c79356b A |
1440 | switch (vp->v_type) { |
1441 | case VREG: | |
1442 | mode |= S_IFREG; | |
1443 | break; | |
1444 | case VDIR: | |
1445 | mode |= S_IFDIR; | |
1446 | break; | |
1447 | case VBLK: | |
1448 | mode |= S_IFBLK; | |
1449 | break; | |
1450 | case VCHR: | |
1451 | mode |= S_IFCHR; | |
1452 | break; | |
1453 | case VLNK: | |
1454 | mode |= S_IFLNK; | |
1455 | break; | |
1456 | case VSOCK: | |
1457 | mode |= S_IFSOCK; | |
1458 | break; | |
1459 | case VFIFO: | |
1460 | mode |= S_IFIFO; | |
1461 | break; | |
1462 | default: | |
91447636 A |
1463 | error = EBADF; |
1464 | goto out; | |
0a7de745 A |
1465 | } |
1466 | ; | |
2d21ac55 A |
1467 | if (isstat64 != 0) { |
1468 | sb64->st_mode = mode; | |
5ba3f43e | 1469 | sb64->st_nlink = VATTR_IS_SUPPORTED(&va, va_nlink) ? va.va_nlink > UINT16_MAX ? UINT16_MAX : (u_int16_t)va.va_nlink : 1; |
2d21ac55 A |
1470 | sb64->st_uid = va.va_uid; |
1471 | sb64->st_gid = va.va_gid; | |
1472 | sb64->st_rdev = va.va_rdev; | |
1473 | sb64->st_size = va.va_data_size; | |
1474 | sb64->st_atimespec = va.va_access_time; | |
1475 | sb64->st_mtimespec = va.va_modify_time; | |
1476 | sb64->st_ctimespec = va.va_change_time; | |
3e170ce0 A |
1477 | if (VATTR_IS_SUPPORTED(&va, va_create_time)) { |
1478 | sb64->st_birthtimespec = va.va_create_time; | |
1479 | } else { | |
1480 | sb64->st_birthtimespec.tv_sec = sb64->st_birthtimespec.tv_nsec = 0; | |
1481 | } | |
2d21ac55 A |
1482 | sb64->st_blksize = va.va_iosize; |
1483 | sb64->st_flags = va.va_flags; | |
1484 | sb64->st_blocks = roundup(va.va_total_alloc, 512) / 512; | |
1485 | } else { | |
1486 | sb->st_mode = mode; | |
5ba3f43e | 1487 | sb->st_nlink = VATTR_IS_SUPPORTED(&va, va_nlink) ? va.va_nlink > UINT16_MAX ? UINT16_MAX : (u_int16_t)va.va_nlink : 1; |
2d21ac55 A |
1488 | sb->st_uid = va.va_uid; |
1489 | sb->st_gid = va.va_gid; | |
1490 | sb->st_rdev = va.va_rdev; | |
1491 | sb->st_size = va.va_data_size; | |
1492 | sb->st_atimespec = va.va_access_time; | |
1493 | sb->st_mtimespec = va.va_modify_time; | |
1494 | sb->st_ctimespec = va.va_change_time; | |
1495 | sb->st_blksize = va.va_iosize; | |
1496 | sb->st_flags = va.va_flags; | |
1497 | sb->st_blocks = roundup(va.va_total_alloc, 512) / 512; | |
1498 | } | |
91447636 | 1499 | |
6d2010ae | 1500 | /* if we're interested in extended security data and we got an ACL */ |
91447636 A |
1501 | if (xsec != NULL) { |
1502 | if (!VATTR_IS_SUPPORTED(&va, va_acl) && | |
1503 | !VATTR_IS_SUPPORTED(&va, va_uuuid) && | |
1504 | !VATTR_IS_SUPPORTED(&va, va_guuid)) { | |
1505 | *xsec = KAUTH_FILESEC_NONE; | |
1506 | } else { | |
91447636 A |
1507 | if (VATTR_IS_SUPPORTED(&va, va_acl) && (va.va_acl != NULL)) { |
1508 | fsec = kauth_filesec_alloc(va.va_acl->acl_entrycount); | |
1509 | } else { | |
1510 | fsec = kauth_filesec_alloc(0); | |
1511 | } | |
1512 | if (fsec == NULL) { | |
1513 | error = ENOMEM; | |
1514 | goto out; | |
1515 | } | |
1516 | fsec->fsec_magic = KAUTH_FILESEC_MAGIC; | |
1517 | if (VATTR_IS_SUPPORTED(&va, va_uuuid)) { | |
1518 | fsec->fsec_owner = va.va_uuuid; | |
1519 | } else { | |
1520 | fsec->fsec_owner = kauth_null_guid; | |
1521 | } | |
1522 | if (VATTR_IS_SUPPORTED(&va, va_guuid)) { | |
1523 | fsec->fsec_group = va.va_guuid; | |
1524 | } else { | |
1525 | fsec->fsec_group = kauth_null_guid; | |
1526 | } | |
1527 | if (VATTR_IS_SUPPORTED(&va, va_acl) && (va.va_acl != NULL)) { | |
cb323159 | 1528 | __nochk_bcopy(va.va_acl, &(fsec->fsec_acl), KAUTH_ACL_COPYSIZE(va.va_acl)); |
91447636 A |
1529 | } else { |
1530 | fsec->fsec_acl.acl_entrycount = KAUTH_FILESEC_NOACL; | |
1531 | } | |
1532 | *xsec = fsec; | |
1533 | } | |
1534 | } | |
0a7de745 | 1535 | |
1c79356b | 1536 | /* Do not give the generation number out to unpriviledged users */ |
2d21ac55 | 1537 | if (va.va_gen && !vfs_context_issuser(ctx)) { |
0a7de745 A |
1538 | if (isstat64 != 0) { |
1539 | sb64->st_gen = 0; | |
1540 | } else { | |
1541 | sb->st_gen = 0; | |
1542 | } | |
2d21ac55 | 1543 | } else { |
0a7de745 A |
1544 | if (isstat64 != 0) { |
1545 | sb64->st_gen = va.va_gen; | |
1546 | } else { | |
2d21ac55 | 1547 | sb->st_gen = va.va_gen; |
0a7de745 | 1548 | } |
2d21ac55 | 1549 | } |
91447636 A |
1550 | |
1551 | error = 0; | |
1552 | out: | |
0a7de745 | 1553 | if (VATTR_IS_SUPPORTED(&va, va_acl) && va.va_acl != NULL) { |
91447636 | 1554 | kauth_acl_free(va.va_acl); |
0a7de745 A |
1555 | } |
1556 | return error; | |
91447636 A |
1557 | } |
1558 | ||
1559 | int | |
cb323159 | 1560 | vn_stat(struct vnode *vp, void *sb, kauth_filesec_t *xsec, int isstat64, int needsrealdev, vfs_context_t ctx) |
91447636 A |
1561 | { |
1562 | int error; | |
1563 | ||
2d21ac55 A |
1564 | #if CONFIG_MACF |
1565 | error = mac_vnode_check_stat(ctx, NOCRED, vp); | |
0a7de745 A |
1566 | if (error) { |
1567 | return error; | |
1568 | } | |
2d21ac55 A |
1569 | #endif |
1570 | ||
91447636 | 1571 | /* authorize */ |
0a7de745 A |
1572 | if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_ATTRIBUTES | KAUTH_VNODE_READ_SECURITY, ctx)) != 0) { |
1573 | return error; | |
1574 | } | |
91447636 A |
1575 | |
1576 | /* actual stat */ | |
cb323159 | 1577 | return vn_stat_noauth(vp, sb, xsec, isstat64, needsrealdev, ctx, NOCRED); |
1c79356b A |
1578 | } |
1579 | ||
91447636 | 1580 | |
1c79356b A |
1581 | /* |
1582 | * File table vnode ioctl routine. | |
1583 | */ | |
9bccf70c | 1584 | static int |
2d21ac55 | 1585 | vn_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx) |
1c79356b | 1586 | { |
f427ee49 | 1587 | struct vnode *vp = ((struct vnode *)fp->fp_glob->fg_data); |
91447636 | 1588 | off_t file_size; |
1c79356b | 1589 | int error; |
fa4905b1 | 1590 | struct vnode *ttyvp; |
2d21ac55 | 1591 | struct session * sessp; |
0a7de745 A |
1592 | |
1593 | if ((error = vnode_getwithref(vp))) { | |
1594 | return error; | |
91447636 | 1595 | } |
91447636 | 1596 | |
2d21ac55 A |
1597 | #if CONFIG_MACF |
1598 | error = mac_vnode_check_ioctl(ctx, vp, com); | |
0a7de745 | 1599 | if (error) { |
2d21ac55 | 1600 | goto out; |
0a7de745 | 1601 | } |
2d21ac55 | 1602 | #endif |
1c79356b | 1603 | |
2d21ac55 | 1604 | switch (vp->v_type) { |
1c79356b A |
1605 | case VREG: |
1606 | case VDIR: | |
1607 | if (com == FIONREAD) { | |
f427ee49 | 1608 | off_t temp_nbytes; |
0a7de745 | 1609 | if ((error = vnode_size(vp, &file_size, ctx)) != 0) { |
91447636 | 1610 | goto out; |
0a7de745 | 1611 | } |
f427ee49 A |
1612 | temp_nbytes = file_size - fp->fp_glob->fg_offset; |
1613 | if (temp_nbytes > INT_MAX) { | |
1614 | *(int *)data = INT_MAX; | |
1615 | } else if (temp_nbytes < 0) { | |
1616 | *(int *)data = 0; | |
1617 | } else { | |
1618 | *(int *)data = (int)temp_nbytes; | |
1619 | } | |
91447636 A |
1620 | goto out; |
1621 | } | |
0a7de745 | 1622 | if (com == FIONBIO || com == FIOASYNC) { /* XXX */ |
91447636 | 1623 | goto out; |
1c79356b | 1624 | } |
f427ee49 | 1625 | OS_FALLTHROUGH; |
1c79356b A |
1626 | |
1627 | default: | |
91447636 A |
1628 | error = ENOTTY; |
1629 | goto out; | |
1c79356b A |
1630 | |
1631 | case VFIFO: | |
1632 | case VCHR: | |
1633 | case VBLK: | |
9bccf70c | 1634 | |
f427ee49 | 1635 | if (com == TIOCREVOKE || com == TIOCREVOKECLEAR) { |
cb323159 A |
1636 | error = ENOTTY; |
1637 | goto out; | |
1638 | } | |
1639 | ||
91447636 A |
1640 | /* Should not be able to set block size from user space */ |
1641 | if (com == DKIOCSETBLOCKSIZE) { | |
1642 | error = EPERM; | |
1643 | goto out; | |
1644 | } | |
1645 | ||
1646 | if (com == FIODTYPE) { | |
1647 | if (vp->v_type == VBLK) { | |
1648 | if (major(vp->v_rdev) >= nblkdev) { | |
1649 | error = ENXIO; | |
1650 | goto out; | |
1651 | } | |
39236c6e | 1652 | *(int *)data = bdevsw[major(vp->v_rdev)].d_type; |
91447636 A |
1653 | } else if (vp->v_type == VCHR) { |
1654 | if (major(vp->v_rdev) >= nchrdev) { | |
1655 | error = ENXIO; | |
1656 | goto out; | |
1657 | } | |
39236c6e | 1658 | *(int *)data = cdevsw[major(vp->v_rdev)].d_type; |
91447636 A |
1659 | } else { |
1660 | error = ENOTTY; | |
1661 | goto out; | |
1662 | } | |
1663 | goto out; | |
1664 | } | |
f427ee49 | 1665 | error = VNOP_IOCTL(vp, com, data, fp->fp_glob->fg_flag, ctx); |
91447636 A |
1666 | |
1667 | if (error == 0 && com == TIOCSCTTY) { | |
2d21ac55 A |
1668 | sessp = proc_session(vfs_context_proc(ctx)); |
1669 | ||
1670 | session_lock(sessp); | |
1671 | ttyvp = sessp->s_ttyvp; | |
1672 | sessp->s_ttyvp = vp; | |
1673 | sessp->s_ttyvid = vnode_vid(vp); | |
1674 | session_unlock(sessp); | |
1675 | session_rele(sessp); | |
91447636 | 1676 | } |
1c79356b | 1677 | } |
91447636 A |
1678 | out: |
1679 | (void)vnode_put(vp); | |
0a7de745 | 1680 | return error; |
1c79356b A |
1681 | } |
1682 | ||
1683 | /* | |
1684 | * File table vnode select routine. | |
1685 | */ | |
9bccf70c | 1686 | static int |
2d21ac55 | 1687 | vn_select(struct fileproc *fp, int which, void *wql, __unused vfs_context_t ctx) |
1c79356b | 1688 | { |
91447636 | 1689 | int error; |
f427ee49 | 1690 | struct vnode * vp = (struct vnode *)fp->fp_glob->fg_data; |
91447636 A |
1691 | struct vfs_context context; |
1692 | ||
0a7de745 | 1693 | if ((error = vnode_getwithref(vp)) == 0) { |
2d21ac55 | 1694 | context.vc_thread = current_thread(); |
f427ee49 | 1695 | context.vc_ucred = fp->fp_glob->fg_cred; |
91447636 | 1696 | |
2d21ac55 A |
1697 | #if CONFIG_MACF |
1698 | /* | |
1699 | * XXX We should use a per thread credential here; minimally, | |
1700 | * XXX the process credential should have a persistent | |
1701 | * XXX reference on it before being passed in here. | |
1702 | */ | |
1703 | error = mac_vnode_check_select(ctx, vp, which); | |
1704 | if (error == 0) | |
1705 | #endif | |
f427ee49 | 1706 | error = VNOP_SELECT(vp, which, fp->fp_glob->fg_flag, wql, ctx); |
1c79356b | 1707 | |
91447636 A |
1708 | (void)vnode_put(vp); |
1709 | } | |
0a7de745 | 1710 | return error; |
1c79356b A |
1711 | } |
1712 | ||
1c79356b A |
1713 | /* |
1714 | * File table vnode close routine. | |
1715 | */ | |
9bccf70c | 1716 | static int |
2d21ac55 | 1717 | vn_closefile(struct fileglob *fg, vfs_context_t ctx) |
1c79356b | 1718 | { |
3e170ce0 | 1719 | struct vnode *vp = fg->fg_data; |
91447636 A |
1720 | int error; |
1721 | ||
0a7de745 | 1722 | if ((error = vnode_getwithref(vp)) == 0) { |
3e170ce0 | 1723 | if (FILEGLOB_DTYPE(fg) == DTYPE_VNODE && |
f427ee49 | 1724 | ((fg->fg_flag & FWASLOCKED) != 0 || |
3e170ce0 A |
1725 | (fg->fg_lflags & FG_HAS_OFDLOCK) != 0)) { |
1726 | struct flock lf = { | |
1727 | .l_whence = SEEK_SET, | |
1728 | .l_start = 0, | |
1729 | .l_len = 0, | |
1730 | .l_type = F_UNLCK | |
1731 | }; | |
1732 | ||
f427ee49 | 1733 | if ((fg->fg_flag & FWASLOCKED) != 0) { |
3e170ce0 A |
1734 | (void) VNOP_ADVLOCK(vp, (caddr_t)fg, |
1735 | F_UNLCK, &lf, F_FLOCK, ctx, NULL); | |
0a7de745 | 1736 | } |
3e170ce0 | 1737 | |
0a7de745 | 1738 | if ((fg->fg_lflags & FG_HAS_OFDLOCK) != 0) { |
3e170ce0 A |
1739 | (void) VNOP_ADVLOCK(vp, (caddr_t)fg, |
1740 | F_UNLCK, &lf, F_OFD_LOCK, ctx, NULL); | |
0a7de745 | 1741 | } |
2d21ac55 | 1742 | } |
0a7de745 | 1743 | error = vn_close(vp, fg->fg_flag, ctx); |
3e170ce0 | 1744 | (void) vnode_put(vp); |
91447636 | 1745 | } |
0a7de745 | 1746 | return error; |
91447636 A |
1747 | } |
1748 | ||
2d21ac55 A |
1749 | /* |
1750 | * Returns: 0 Success | |
1751 | * VNOP_PATHCONF:??? | |
1752 | */ | |
91447636 | 1753 | int |
b0d623f7 | 1754 | vn_pathconf(vnode_t vp, int name, int32_t *retval, vfs_context_t ctx) |
91447636 | 1755 | { |
0a7de745 | 1756 | int error = 0; |
6d2010ae | 1757 | struct vfs_attr vfa; |
91447636 | 1758 | |
0a7de745 | 1759 | switch (name) { |
91447636 | 1760 | case _PC_EXTENDED_SECURITY_NP: |
2d21ac55 | 1761 | *retval = vfs_extendedsecurity(vnode_mount(vp)) ? 1 : 0; |
91447636 A |
1762 | break; |
1763 | case _PC_AUTH_OPAQUE_NP: | |
1764 | *retval = vfs_authopaque(vnode_mount(vp)); | |
1765 | break; | |
2d21ac55 | 1766 | case _PC_2_SYMLINKS: |
0a7de745 | 1767 | *retval = 1; /* XXX NOTSUP on MSDOS, etc. */ |
2d21ac55 A |
1768 | break; |
1769 | case _PC_ALLOC_SIZE_MIN: | |
0a7de745 | 1770 | *retval = 1; /* XXX lie: 1 byte */ |
2d21ac55 | 1771 | break; |
0a7de745 A |
1772 | case _PC_ASYNC_IO: /* unistd.h: _POSIX_ASYNCHRONUS_IO */ |
1773 | *retval = 1; /* [AIO] option is supported */ | |
2d21ac55 | 1774 | break; |
0a7de745 A |
1775 | case _PC_PRIO_IO: /* unistd.h: _POSIX_PRIORITIZED_IO */ |
1776 | *retval = 0; /* [PIO] option is not supported */ | |
2d21ac55 A |
1777 | break; |
1778 | case _PC_REC_INCR_XFER_SIZE: | |
0a7de745 | 1779 | *retval = 4096; /* XXX go from MIN to MAX 4K at a time */ |
2d21ac55 A |
1780 | break; |
1781 | case _PC_REC_MIN_XFER_SIZE: | |
0a7de745 | 1782 | *retval = 4096; /* XXX recommend 4K minimum reads/writes */ |
2d21ac55 A |
1783 | break; |
1784 | case _PC_REC_MAX_XFER_SIZE: | |
1785 | *retval = 65536; /* XXX recommend 64K maximum reads/writes */ | |
1786 | break; | |
1787 | case _PC_REC_XFER_ALIGN: | |
0a7de745 | 1788 | *retval = 4096; /* XXX recommend page aligned buffers */ |
2d21ac55 A |
1789 | break; |
1790 | case _PC_SYMLINK_MAX: | |
0a7de745 | 1791 | *retval = 255; /* Minimum acceptable POSIX value */ |
2d21ac55 | 1792 | break; |
0a7de745 A |
1793 | case _PC_SYNC_IO: /* unistd.h: _POSIX_SYNCHRONIZED_IO */ |
1794 | *retval = 0; /* [SIO] option is not supported */ | |
2d21ac55 | 1795 | break; |
6d2010ae | 1796 | case _PC_XATTR_SIZE_BITS: |
0a7de745 A |
1797 | /* The number of bits used to store maximum extended |
1798 | * attribute size in bytes. For example, if the maximum | |
1799 | * attribute size supported by a file system is 128K, the | |
1800 | * value returned will be 18. However a value 18 can mean | |
1801 | * that the maximum attribute size can be anywhere from | |
1802 | * (256KB - 1) to 128KB. As a special case, the resource | |
1803 | * fork can have much larger size, and some file system | |
1804 | * specific extended attributes can have smaller and preset | |
6d2010ae A |
1805 | * size; for example, Finder Info is always 32 bytes. |
1806 | */ | |
1807 | memset(&vfa, 0, sizeof(vfa)); | |
1808 | VFSATTR_INIT(&vfa); | |
1809 | VFSATTR_WANTED(&vfa, f_capabilities); | |
1810 | if (vfs_getattr(vnode_mount(vp), &vfa, ctx) == 0 && | |
0a7de745 A |
1811 | (VFSATTR_IS_SUPPORTED(&vfa, f_capabilities)) && |
1812 | (vfa.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR) && | |
6d2010ae A |
1813 | (vfa.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR)) { |
1814 | /* Supports native extended attributes */ | |
1815 | error = VNOP_PATHCONF(vp, name, retval, ctx); | |
1816 | } else { | |
0a7de745 | 1817 | /* Number of bits used to represent the maximum size of |
6d2010ae A |
1818 | * extended attribute stored in an Apple Double file. |
1819 | */ | |
1820 | *retval = AD_XATTR_SIZE_BITS; | |
1821 | } | |
1822 | break; | |
91447636 A |
1823 | default: |
1824 | error = VNOP_PATHCONF(vp, name, retval, ctx); | |
1825 | break; | |
1826 | } | |
1c79356b | 1827 | |
0a7de745 | 1828 | return error; |
1c79356b | 1829 | } |
55e303ae A |
1830 | |
1831 | static int | |
cb323159 | 1832 | vn_kqfilter(struct fileproc *fp, struct knote *kn, struct kevent_qos_s *kev) |
55e303ae | 1833 | { |
cb323159 | 1834 | vfs_context_t ctx = vfs_context_current(); |
b0d623f7 | 1835 | struct vnode *vp; |
39037602 A |
1836 | int error = 0; |
1837 | int result = 0; | |
5ba3f43e | 1838 | |
f427ee49 | 1839 | vp = (struct vnode *)fp->fp_glob->fg_data; |
b0d623f7 A |
1840 | |
1841 | /* | |
1842 | * Don't attach a knote to a dead vnode. | |
1843 | */ | |
1844 | if ((error = vget_internal(vp, 0, VNODE_NODEAD)) == 0) { | |
1845 | switch (kn->kn_filter) { | |
0a7de745 A |
1846 | case EVFILT_READ: |
1847 | case EVFILT_WRITE: | |
1848 | if (vnode_isfifo(vp)) { | |
1849 | /* We'll only watch FIFOs that use our fifofs */ | |
1850 | if (!(vp->v_fifoinfo && vp->v_fifoinfo->fi_readsock)) { | |
1851 | error = ENOTSUP; | |
1852 | } | |
1853 | } else if (!vnode_isreg(vp)) { | |
1854 | if (vnode_ischr(vp)) { | |
1855 | result = spec_kqfilter(vp, kn, kev); | |
1856 | if ((kn->kn_flags & EV_ERROR) == 0) { | |
1857 | /* claimed by a special device */ | |
1858 | vnode_put(vp); | |
1859 | return result; | |
b0d623f7 | 1860 | } |
b0d623f7 | 1861 | } |
b0d623f7 | 1862 | error = EINVAL; |
0a7de745 A |
1863 | } |
1864 | break; | |
1865 | case EVFILT_VNODE: | |
1866 | break; | |
1867 | default: | |
1868 | error = EINVAL; | |
b0d623f7 A |
1869 | } |
1870 | ||
39037602 | 1871 | if (error == 0) { |
2d21ac55 | 1872 | #if CONFIG_MACF |
f427ee49 | 1873 | error = mac_vnode_check_kqfilter(ctx, fp->fp_glob->fg_cred, kn, vp); |
39037602 A |
1874 | if (error) { |
1875 | vnode_put(vp); | |
1876 | goto out; | |
1877 | } | |
2d21ac55 | 1878 | #endif |
91447636 | 1879 | |
39037602 | 1880 | kn->kn_hook = (void*)vp; |
39037602 | 1881 | kn->kn_filtid = EVFILTID_VN; |
91447636 | 1882 | |
39037602 A |
1883 | vnode_lock(vp); |
1884 | KNOTE_ATTACH(&vp->v_knotes, kn); | |
cb323159 | 1885 | result = filt_vnode_common(kn, NULL, vp, 0); |
39037602 | 1886 | vnode_unlock(vp); |
b0d623f7 | 1887 | |
39037602 A |
1888 | /* |
1889 | * Ask the filesystem to provide remove notifications, | |
1890 | * but ignore failure | |
1891 | */ | |
0a7de745 | 1892 | VNOP_MONITOR(vp, 0, VNODE_MONITOR_BEGIN, (void*) kn, ctx); |
39037602 | 1893 | } |
b0d623f7 A |
1894 | |
1895 | vnode_put(vp); | |
91447636 | 1896 | } |
b0d623f7 | 1897 | |
0a7de745 | 1898 | out: |
39037602 | 1899 | if (error) { |
cb323159 | 1900 | knote_set_error(kn, error); |
39037602 A |
1901 | } |
1902 | ||
1903 | return result; | |
55e303ae A |
1904 | } |
1905 | ||
b0d623f7 A |
1906 | static void |
1907 | filt_vndetach(struct knote *kn) | |
55e303ae | 1908 | { |
b0d623f7 | 1909 | vfs_context_t ctx = vfs_context_current(); |
cb323159 A |
1910 | struct vnode *vp = (struct vnode *)kn->kn_hook; |
1911 | uint32_t vid = vnode_vid(vp); | |
1912 | if (vnode_getwithvid(vp, vid)) { | |
b0d623f7 | 1913 | return; |
0a7de745 | 1914 | } |
b0d623f7 A |
1915 | |
1916 | vnode_lock(vp); | |
1917 | KNOTE_DETACH(&vp->v_knotes, kn); | |
1918 | vnode_unlock(vp); | |
0a7de745 A |
1919 | |
1920 | /* | |
1921 | * Tell a (generally networked) filesystem that we're no longer watching | |
b0d623f7 A |
1922 | * If the FS wants to track contexts, it should still be using the one from |
1923 | * the VNODE_MONITOR_BEGIN. | |
1924 | */ | |
1925 | VNOP_MONITOR(vp, 0, VNODE_MONITOR_END, (void*)kn, ctx); | |
1926 | vnode_put(vp); | |
1927 | } | |
91447636 | 1928 | |
91447636 | 1929 | |
b0d623f7 A |
1930 | /* |
1931 | * Used for EVFILT_READ | |
1932 | * | |
1933 | * Takes only VFIFO or VREG. vnode is locked. We handle the "poll" case | |
1934 | * differently than the regular case for VREG files. If not in poll(), | |
1935 | * then we need to know current fileproc offset for VREG. | |
1936 | */ | |
d9a64523 | 1937 | static int64_t |
b0d623f7 A |
1938 | vnode_readable_data_count(vnode_t vp, off_t current_offset, int ispoll) |
1939 | { | |
1940 | if (vnode_isfifo(vp)) { | |
39236c6e | 1941 | #if FIFO |
b0d623f7 A |
1942 | int cnt; |
1943 | int err = fifo_charcount(vp, &cnt); | |
1944 | if (err == 0) { | |
d9a64523 | 1945 | return (int64_t)cnt; |
0a7de745 | 1946 | } else |
39236c6e A |
1947 | #endif |
1948 | { | |
d9a64523 | 1949 | return 0; |
b0d623f7 A |
1950 | } |
1951 | } else if (vnode_isreg(vp)) { | |
1952 | if (ispoll) { | |
d9a64523 | 1953 | return 1; |
b0d623f7 A |
1954 | } |
1955 | ||
1956 | off_t amount; | |
1957 | amount = vp->v_un.vu_ubcinfo->ui_size - current_offset; | |
d9a64523 A |
1958 | if (amount > INT64_MAX) { |
1959 | return INT64_MAX; | |
1960 | } else if (amount < INT64_MIN) { | |
1961 | return INT64_MIN; | |
b0d623f7 | 1962 | } else { |
d9a64523 | 1963 | return (int64_t)amount; |
0a7de745 | 1964 | } |
b0d623f7 A |
1965 | } else { |
1966 | panic("Should never have an EVFILT_READ except for reg or fifo."); | |
1967 | return 0; | |
91447636 | 1968 | } |
55e303ae | 1969 | } |
b0d623f7 A |
1970 | |
1971 | /* | |
0a7de745 | 1972 | * Used for EVFILT_WRITE. |
b0d623f7 A |
1973 | * |
1974 | * For regular vnodes, we can always write (1). For named pipes, | |
1975 | * see how much space there is in the buffer. Nothing else is covered. | |
1976 | */ | |
1977 | static intptr_t | |
0a7de745 | 1978 | vnode_writable_space_count(vnode_t vp) |
b0d623f7 A |
1979 | { |
1980 | if (vnode_isfifo(vp)) { | |
39236c6e | 1981 | #if FIFO |
b0d623f7 A |
1982 | long spc; |
1983 | int err = fifo_freespace(vp, &spc); | |
1984 | if (err == 0) { | |
1985 | return (intptr_t)spc; | |
0a7de745 | 1986 | } else |
39236c6e A |
1987 | #endif |
1988 | { | |
b0d623f7 A |
1989 | return (intptr_t)0; |
1990 | } | |
1991 | } else if (vnode_isreg(vp)) { | |
1992 | return (intptr_t)1; | |
1993 | } else { | |
1994 | panic("Should never have an EVFILT_READ except for reg or fifo."); | |
1995 | return 0; | |
1996 | } | |
1997 | } | |
1998 | ||
0a7de745 | 1999 | /* |
b0d623f7 | 2000 | * Determine whether this knote should be active |
0a7de745 A |
2001 | * |
2002 | * This is kind of subtle. | |
2003 | * --First, notice if the vnode has been revoked: in so, override hint | |
2004 | * --EVFILT_READ knotes are checked no matter what the hint is | |
2005 | * --Other knotes activate based on hint. | |
2006 | * --If hint is revoke, set special flags and activate | |
b0d623f7 A |
2007 | */ |
2008 | static int | |
cb323159 | 2009 | filt_vnode_common(struct knote *kn, struct kevent_qos_s *kev, vnode_t vp, long hint) |
b0d623f7 | 2010 | { |
b0d623f7 | 2011 | int activate = 0; |
cb323159 | 2012 | int64_t data = 0; |
b0d623f7 | 2013 | |
39037602 | 2014 | lck_mtx_assert(&vp->v_lock, LCK_MTX_ASSERT_OWNED); |
b0d623f7 | 2015 | |
6d2010ae | 2016 | /* Special handling for vnodes that are in recycle or already gone */ |
b0d623f7 A |
2017 | if (NOTE_REVOKE == hint) { |
2018 | kn->kn_flags |= (EV_EOF | EV_ONESHOT); | |
2019 | activate = 1; | |
2020 | ||
2021 | if ((kn->kn_filter == EVFILT_VNODE) && (kn->kn_sfflags & NOTE_REVOKE)) { | |
2022 | kn->kn_fflags |= NOTE_REVOKE; | |
2023 | } | |
2024 | } else { | |
0a7de745 A |
2025 | switch (kn->kn_filter) { |
2026 | case EVFILT_READ: | |
f427ee49 | 2027 | data = vnode_readable_data_count(vp, kn->kn_fp->fp_glob->fg_offset, (kn->kn_flags & EV_POLL)); |
cb323159 | 2028 | activate = (data != 0); |
0a7de745 A |
2029 | break; |
2030 | case EVFILT_WRITE: | |
cb323159 A |
2031 | data = vnode_writable_space_count(vp); |
2032 | activate = (data != 0); | |
0a7de745 A |
2033 | break; |
2034 | case EVFILT_VNODE: | |
2035 | /* Check events this note matches against the hint */ | |
2036 | if (kn->kn_sfflags & hint) { | |
2037 | kn->kn_fflags |= hint; /* Set which event occurred */ | |
2038 | } | |
cb323159 | 2039 | activate = (kn->kn_fflags != 0); |
0a7de745 A |
2040 | break; |
2041 | default: | |
2042 | panic("Invalid knote filter on a vnode!\n"); | |
b0d623f7 A |
2043 | } |
2044 | } | |
cb323159 A |
2045 | |
2046 | if (kev && activate) { | |
2047 | knote_fill_kevent(kn, kev, data); | |
2048 | } | |
2049 | ||
0a7de745 | 2050 | return activate; |
39037602 | 2051 | } |
b0d623f7 | 2052 | |
39037602 A |
2053 | static int |
2054 | filt_vnode(struct knote *kn, long hint) | |
2055 | { | |
2056 | vnode_t vp = (struct vnode *)kn->kn_hook; | |
2057 | ||
cb323159 | 2058 | return filt_vnode_common(kn, NULL, vp, hint); |
39037602 A |
2059 | } |
2060 | ||
2061 | static int | |
cb323159 | 2062 | filt_vntouch(struct knote *kn, struct kevent_qos_s *kev) |
39037602 A |
2063 | { |
2064 | vnode_t vp = (struct vnode *)kn->kn_hook; | |
cb323159 | 2065 | uint32_t vid = vnode_vid(vp); |
39037602 A |
2066 | int activate; |
2067 | int hint = 0; | |
2068 | ||
2069 | vnode_lock(vp); | |
cb323159 | 2070 | if (vnode_getiocount(vp, vid, VNODE_NODEAD | VNODE_WITHID) != 0) { |
39037602 A |
2071 | /* is recycled */ |
2072 | hint = NOTE_REVOKE; | |
2073 | } | |
2074 | ||
2075 | /* accept new input fflags mask */ | |
2076 | kn->kn_sfflags = kev->fflags; | |
39037602 | 2077 | |
cb323159 | 2078 | activate = filt_vnode_common(kn, NULL, vp, hint); |
39037602 | 2079 | |
0a7de745 | 2080 | if (hint == 0) { |
39037602 | 2081 | vnode_put_locked(vp); |
0a7de745 | 2082 | } |
39037602 A |
2083 | vnode_unlock(vp); |
2084 | ||
2085 | return activate; | |
2086 | } | |
2087 | ||
2088 | static int | |
cb323159 | 2089 | filt_vnprocess(struct knote *kn, struct kevent_qos_s *kev) |
39037602 | 2090 | { |
39037602 | 2091 | vnode_t vp = (struct vnode *)kn->kn_hook; |
cb323159 | 2092 | uint32_t vid = vnode_vid(vp); |
39037602 A |
2093 | int activate; |
2094 | int hint = 0; | |
2095 | ||
2096 | vnode_lock(vp); | |
cb323159 | 2097 | if (vnode_getiocount(vp, vid, VNODE_NODEAD | VNODE_WITHID) != 0) { |
39037602 A |
2098 | /* Is recycled */ |
2099 | hint = NOTE_REVOKE; | |
0a7de745 | 2100 | } |
cb323159 | 2101 | activate = filt_vnode_common(kn, kev, vp, hint); |
6d2010ae | 2102 | |
39037602 | 2103 | /* Definitely need to unlock, may need to put */ |
0a7de745 | 2104 | if (hint == 0) { |
39037602 | 2105 | vnode_put_locked(vp); |
0a7de745 | 2106 | } |
39037602 A |
2107 | vnode_unlock(vp); |
2108 | ||
2109 | return activate; | |
b0d623f7 | 2110 | } |