]> git.saurik.com Git - apple/xnu.git/blob - bsd/vfs/vfs_cache.c
9f3aaa548be901f7f51a5f17d5b54f1ec145aa29
[apple/xnu.git] / bsd / vfs / vfs_cache.c
1 /*
2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30 * Copyright (c) 1989, 1993, 1995
31 * The Regents of the University of California. All rights reserved.
32 *
33 * This code is derived from software contributed to Berkeley by
34 * Poul-Henning Kamp of the FreeBSD Project.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by the University of
47 * California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 *
65 * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95
66 */
67 /*
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
71 * Version 2.0.
72 */
73 #include <sys/param.h>
74 #include <sys/systm.h>
75 #include <sys/time.h>
76 #include <sys/mount_internal.h>
77 #include <sys/vnode_internal.h>
78 #include <miscfs/specfs/specdev.h>
79 #include <sys/namei.h>
80 #include <sys/errno.h>
81 #include <sys/malloc.h>
82 #include <sys/kauth.h>
83 #include <sys/user.h>
84 #include <sys/paths.h>
85 #include <os/overflow.h>
86
87 #if CONFIG_MACF
88 #include <security/mac_framework.h>
89 #endif
90
91 /*
92 * Name caching works as follows:
93 *
94 * Names found by directory scans are retained in a cache
95 * for future reference. It is managed LRU, so frequently
96 * used names will hang around. Cache is indexed by hash value
97 * obtained from (vp, name) where vp refers to the directory
98 * containing name.
99 *
100 * If it is a "negative" entry, (i.e. for a name that is known NOT to
101 * exist) the vnode pointer will be NULL.
102 *
103 * Upon reaching the last segment of a path, if the reference
104 * is for DELETE, or NOCACHE is set (rewrite), and the
105 * name is located in the cache, it will be dropped.
106 */
107
108 /*
109 * Structures associated with name cacheing.
110 */
111
112 LIST_HEAD(nchashhead, namecache) * nchashtbl; /* Hash Table */
113 u_long nchashmask;
114 u_long nchash; /* size of hash table - 1 */
115 long numcache; /* number of cache entries allocated */
116 int desiredNodes;
117 int desiredNegNodes;
118 int ncs_negtotal;
119 int nc_disabled = 0;
120 TAILQ_HEAD(, namecache) nchead; /* chain of all name cache entries */
121 TAILQ_HEAD(, namecache) neghead; /* chain of only negative cache entries */
122
123
124 #if COLLECT_STATS
125
126 struct nchstats nchstats; /* cache effectiveness statistics */
127
128 #define NCHSTAT(v) { \
129 nchstats.v++; \
130 }
131 #define NAME_CACHE_LOCK() name_cache_lock()
132 #define NAME_CACHE_UNLOCK() name_cache_unlock()
133 #define NAME_CACHE_LOCK_SHARED() name_cache_lock()
134
135 #else
136
137 #define NCHSTAT(v)
138 #define NAME_CACHE_LOCK() name_cache_lock()
139 #define NAME_CACHE_UNLOCK() name_cache_unlock()
140 #define NAME_CACHE_LOCK_SHARED() name_cache_lock_shared()
141
142 #endif
143
144
145 /* vars for name cache list lock */
146 lck_grp_t * namecache_lck_grp;
147 lck_grp_attr_t * namecache_lck_grp_attr;
148 lck_attr_t * namecache_lck_attr;
149
150 lck_grp_t * strcache_lck_grp;
151 lck_grp_attr_t * strcache_lck_grp_attr;
152 lck_attr_t * strcache_lck_attr;
153
154 lck_rw_t * namecache_rw_lock;
155 lck_rw_t * strtable_rw_lock;
156
157 #define NUM_STRCACHE_LOCKS 1024
158
159 lck_mtx_t strcache_mtx_locks[NUM_STRCACHE_LOCKS];
160
161
162 static vnode_t cache_lookup_locked(vnode_t dvp, struct componentname *cnp);
163 static const char *add_name_internal(const char *, uint32_t, u_int, boolean_t, u_int);
164 static void init_string_table(void);
165 static void cache_delete(struct namecache *, int);
166 static void cache_enter_locked(vnode_t dvp, vnode_t vp, struct componentname *cnp, const char *strname);
167
168 #ifdef DUMP_STRING_TABLE
169 /*
170 * Internal dump function used for debugging
171 */
172 void dump_string_table(void);
173 #endif /* DUMP_STRING_TABLE */
174
175 static void init_crc32(void);
176 static unsigned int crc32tab[256];
177
178
179 #define NCHHASH(dvp, hash_val) \
180 (&nchashtbl[(dvp->v_id ^ (hash_val)) & nchashmask])
181
182 /*
183 * This function tries to check if a directory vp is a subdirectory of dvp
184 * only from valid v_parent pointers. It is called with the name cache lock
185 * held and does not drop the lock anytime inside the function.
186 *
187 * It returns a boolean that indicates whether or not it was able to
188 * successfully infer the parent/descendent relationship via the v_parent
189 * pointers, or if it could not infer such relationship and that the decision
190 * must be delegated to the owning filesystem.
191 *
192 * If it does not defer the decision, i.e. it was successfuly able to determine
193 * the parent/descendent relationship, *is_subdir tells the caller if vp is a
194 * subdirectory of dvp.
195 *
196 * If the decision is deferred, *next_vp is where it stopped i.e. *next_vp
197 * is the vnode whose parent is to be determined from the filesystem.
198 * *is_subdir, in this case, is not indicative of anything and should be
199 * ignored.
200 *
201 * The return value and output args should be used as follows :
202 *
203 * defer = cache_check_vnode_issubdir(vp, dvp, is_subdir, next_vp);
204 * if (!defer) {
205 * if (*is_subdir)
206 * vp is subdirectory;
207 * else
208 * vp is not a subdirectory;
209 * } else {
210 * if (*next_vp)
211 * check this vnode's parent from the filesystem
212 * else
213 * error (likely because of forced unmount).
214 * }
215 *
216 */
217 static boolean_t
218 cache_check_vnode_issubdir(vnode_t vp, vnode_t dvp, boolean_t *is_subdir,
219 vnode_t *next_vp)
220 {
221 vnode_t tvp = vp;
222 int defer = FALSE;
223
224 *is_subdir = FALSE;
225 *next_vp = NULLVP;
226 while (1) {
227 mount_t tmp;
228
229 if (tvp == dvp) {
230 *is_subdir = TRUE;
231 break;
232 } else if (tvp == rootvnode) {
233 /* *is_subdir = FALSE */
234 break;
235 }
236
237 tmp = tvp->v_mount;
238 while ((tvp->v_flag & VROOT) && tmp && tmp->mnt_vnodecovered &&
239 tvp != dvp && tvp != rootvnode) {
240 tvp = tmp->mnt_vnodecovered;
241 tmp = tvp->v_mount;
242 }
243
244 /*
245 * If dvp is not at the top of a mount "stack" then
246 * vp is not a subdirectory of dvp either.
247 */
248 if (tvp == dvp || tvp == rootvnode) {
249 /* *is_subdir = FALSE */
250 break;
251 }
252
253 if (!tmp) {
254 defer = TRUE;
255 *next_vp = NULLVP;
256 break;
257 }
258
259 if ((tvp->v_flag & VISHARDLINK) || !(tvp->v_parent)) {
260 defer = TRUE;
261 *next_vp = tvp;
262 break;
263 }
264
265 tvp = tvp->v_parent;
266 }
267
268 return defer;
269 }
270
271 /* maximum times retry from potentially transient errors in vnode_issubdir */
272 #define MAX_ERROR_RETRY 3
273
274 /*
275 * This function checks if a given directory (vp) is a subdirectory of dvp.
276 * It walks backwards from vp and if it hits dvp in its parent chain,
277 * it is a subdirectory. If it encounters the root directory, it is not
278 * a subdirectory.
279 *
280 * This function returns an error if it is unsuccessful and 0 on success.
281 *
282 * On entry (and exit) vp has an iocount and if this function has to take
283 * any iocounts on other vnodes in the parent chain traversal, it releases them.
284 */
285 int
286 vnode_issubdir(vnode_t vp, vnode_t dvp, int *is_subdir, vfs_context_t ctx)
287 {
288 vnode_t start_vp, tvp;
289 vnode_t vp_with_iocount;
290 int error = 0;
291 char dotdotbuf[] = "..";
292 int error_retry_count = 0; /* retry count for potentially transient
293 * errors */
294
295 *is_subdir = FALSE;
296 tvp = start_vp = vp;
297 /*
298 * Anytime we acquire an iocount in this function, we save the vnode
299 * in this variable and release it before exiting.
300 */
301 vp_with_iocount = NULLVP;
302
303 while (1) {
304 boolean_t defer;
305 vnode_t pvp;
306 uint32_t vid;
307 struct componentname cn;
308 boolean_t is_subdir_locked = FALSE;
309
310 if (tvp == dvp) {
311 *is_subdir = TRUE;
312 break;
313 } else if (tvp == rootvnode) {
314 /* *is_subdir = FALSE */
315 break;
316 }
317
318 NAME_CACHE_LOCK_SHARED();
319
320 defer = cache_check_vnode_issubdir(tvp, dvp, &is_subdir_locked,
321 &tvp);
322
323 if (defer && tvp) {
324 vid = vnode_vid(tvp);
325 }
326
327 NAME_CACHE_UNLOCK();
328
329 if (!defer) {
330 *is_subdir = is_subdir_locked;
331 break;
332 }
333
334 if (!tvp) {
335 if (error_retry_count++ < MAX_ERROR_RETRY) {
336 tvp = vp;
337 continue;
338 }
339 error = ENOENT;
340 break;
341 }
342
343 if (tvp != start_vp) {
344 if (vp_with_iocount) {
345 vnode_put(vp_with_iocount);
346 vp_with_iocount = NULLVP;
347 }
348
349 error = vnode_getwithvid(tvp, vid);
350 if (error) {
351 if (error_retry_count++ < MAX_ERROR_RETRY) {
352 tvp = vp;
353 error = 0;
354 continue;
355 }
356 break;
357 }
358
359 vp_with_iocount = tvp;
360 }
361
362 bzero(&cn, sizeof(cn));
363 cn.cn_nameiop = LOOKUP;
364 cn.cn_flags = ISLASTCN | ISDOTDOT;
365 cn.cn_context = ctx;
366 cn.cn_pnbuf = &dotdotbuf[0];
367 cn.cn_pnlen = sizeof(dotdotbuf);
368 cn.cn_nameptr = cn.cn_pnbuf;
369 cn.cn_namelen = 2;
370
371 pvp = NULLVP;
372 if ((error = VNOP_LOOKUP(tvp, &pvp, &cn, ctx))) {
373 break;
374 }
375
376 if (!(tvp->v_flag & VISHARDLINK) && tvp->v_parent != pvp) {
377 (void)vnode_update_identity(tvp, pvp, NULL, 0, 0,
378 VNODE_UPDATE_PARENT);
379 }
380
381 if (vp_with_iocount) {
382 vnode_put(vp_with_iocount);
383 }
384
385 vp_with_iocount = tvp = pvp;
386 }
387
388 if (vp_with_iocount) {
389 vnode_put(vp_with_iocount);
390 }
391
392 return error;
393 }
394
395 /*
396 * This function builds the path in "buff" from the supplied vnode.
397 * The length of the buffer *INCLUDING* the trailing zero byte is
398 * returned in outlen. NOTE: the length includes the trailing zero
399 * byte and thus the length is one greater than what strlen would
400 * return. This is important and lots of code elsewhere in the kernel
401 * assumes this behavior.
402 *
403 * This function can call vnop in file system if the parent vnode
404 * does not exist or when called for hardlinks via volfs path.
405 * If BUILDPATH_NO_FS_ENTER is set in flags, it only uses values present
406 * in the name cache and does not enter the file system.
407 *
408 * If BUILDPATH_CHECK_MOVED is set in flags, we return EAGAIN when
409 * we encounter ENOENT during path reconstruction. ENOENT means that
410 * one of the parents moved while we were building the path. The
411 * caller can special handle this case by calling build_path again.
412 *
413 * If BUILDPATH_VOLUME_RELATIVE is set in flags, we return path
414 * that is relative to the nearest mount point, i.e. do not
415 * cross over mount points during building the path.
416 *
417 * passed in vp must have a valid io_count reference
418 *
419 * If parent vnode is non-NULL it also must have an io count. This
420 * allows build_path_with_parent to be safely called for operations
421 * unlink, rmdir and rename that already have io counts on the target
422 * and the directory. In this way build_path_with_parent does not have
423 * to try and obtain an additional io count on the parent. Taking an
424 * io count ont the parent can lead to dead lock if a forced unmount
425 * occures at the right moment. For a fuller explaination on how this
426 * can occur see the comment for vn_getpath_with_parent.
427 *
428 */
429 int
430 build_path_with_parent(vnode_t first_vp, vnode_t parent_vp, char *buff, int buflen, int *outlen, int flags, vfs_context_t ctx)
431 {
432 vnode_t vp, tvp;
433 vnode_t vp_with_iocount;
434 vnode_t proc_root_dir_vp;
435 char *end;
436 const char *str;
437 int len;
438 int ret = 0;
439 int fixhardlink;
440
441 if (first_vp == NULLVP) {
442 return EINVAL;
443 }
444
445 if (buflen <= 1) {
446 return ENOSPC;
447 }
448
449 /*
450 * Grab the process fd so we can evaluate fd_rdir.
451 */
452 if (vfs_context_proc(ctx)->p_fd) {
453 proc_root_dir_vp = vfs_context_proc(ctx)->p_fd->fd_rdir;
454 } else {
455 proc_root_dir_vp = NULL;
456 }
457
458 vp_with_iocount = NULLVP;
459 again:
460 vp = first_vp;
461
462 end = &buff[buflen - 1];
463 *end = '\0';
464
465 /*
466 * holding the NAME_CACHE_LOCK in shared mode is
467 * sufficient to stabilize both the vp->v_parent chain
468 * and the 'vp->v_mount->mnt_vnodecovered' chain
469 *
470 * if we need to drop this lock, we must first grab the v_id
471 * from the vnode we're currently working with... if that
472 * vnode doesn't already have an io_count reference (the vp
473 * passed in comes with one), we must grab a reference
474 * after we drop the NAME_CACHE_LOCK via vnode_getwithvid...
475 * deadlocks may result if you call vnode_get while holding
476 * the NAME_CACHE_LOCK... we lazily release the reference
477 * we pick up the next time we encounter a need to drop
478 * the NAME_CACHE_LOCK or before we return from this routine
479 */
480 NAME_CACHE_LOCK_SHARED();
481
482 /*
483 * Check if this is the root of a file system.
484 */
485 while (vp && vp->v_flag & VROOT) {
486 if (vp->v_mount == NULL) {
487 ret = EINVAL;
488 goto out_unlock;
489 }
490 if ((vp->v_mount->mnt_flag & MNT_ROOTFS) || (vp == proc_root_dir_vp)) {
491 /*
492 * It's the root of the root file system, so it's
493 * just "/".
494 */
495 *--end = '/';
496
497 goto out_unlock;
498 } else {
499 /*
500 * This the root of the volume and the caller does not
501 * want to cross mount points. Therefore just return
502 * '/' as the relative path.
503 */
504 if (flags & BUILDPATH_VOLUME_RELATIVE) {
505 *--end = '/';
506 goto out_unlock;
507 } else {
508 vp = vp->v_mount->mnt_vnodecovered;
509 }
510 }
511 }
512
513 while ((vp != NULLVP) && (vp->v_parent != vp)) {
514 int vid;
515
516 /*
517 * For hardlinks the v_name may be stale, so if its OK
518 * to enter a file system, ask the file system for the
519 * name and parent (below).
520 */
521 fixhardlink = (vp->v_flag & VISHARDLINK) &&
522 (vp->v_mount->mnt_kern_flag & MNTK_PATH_FROM_ID) &&
523 !(flags & BUILDPATH_NO_FS_ENTER);
524
525 if (!fixhardlink) {
526 str = vp->v_name;
527
528 if (str == NULL || *str == '\0') {
529 if (vp->v_parent != NULL) {
530 ret = EINVAL;
531 } else {
532 ret = ENOENT;
533 }
534 goto out_unlock;
535 }
536 len = strlen(str);
537 /*
538 * Check that there's enough space (including space for the '/')
539 */
540 if ((end - buff) < (len + 1)) {
541 ret = ENOSPC;
542 goto out_unlock;
543 }
544 /*
545 * Copy the name backwards.
546 */
547 str += len;
548
549 for (; len > 0; len--) {
550 *--end = *--str;
551 }
552 /*
553 * Add a path separator.
554 */
555 *--end = '/';
556 }
557
558 /*
559 * Walk up the parent chain.
560 */
561 if (((vp->v_parent != NULLVP) && !fixhardlink) ||
562 (flags & BUILDPATH_NO_FS_ENTER)) {
563 /*
564 * In this if () block we are not allowed to enter the filesystem
565 * to conclusively get the most accurate parent identifier.
566 * As a result, if 'vp' does not identify '/' and it
567 * does not have a valid v_parent, then error out
568 * and disallow further path construction
569 */
570 if ((vp->v_parent == NULLVP) && (rootvnode != vp)) {
571 /*
572 * Only '/' is allowed to have a NULL parent
573 * pointer. Upper level callers should ideally
574 * re-drive name lookup on receiving a ENOENT.
575 */
576 ret = ENOENT;
577
578 /* The code below will exit early if 'tvp = vp' == NULL */
579 }
580 vp = vp->v_parent;
581
582 /*
583 * if the vnode we have in hand isn't a directory and it
584 * has a v_parent, then we started with the resource fork
585 * so skip up to avoid getting a duplicate copy of the
586 * file name in the path.
587 */
588 if (vp && !vnode_isdir(vp) && vp->v_parent) {
589 vp = vp->v_parent;
590 }
591 } else {
592 /*
593 * No parent, go get it if supported.
594 */
595 struct vnode_attr va;
596 vnode_t dvp;
597
598 /*
599 * Make sure file system supports obtaining a path from id.
600 */
601 if (!(vp->v_mount->mnt_kern_flag & MNTK_PATH_FROM_ID)) {
602 ret = ENOENT;
603 goto out_unlock;
604 }
605 vid = vp->v_id;
606
607 NAME_CACHE_UNLOCK();
608
609 if (vp != first_vp && vp != parent_vp && vp != vp_with_iocount) {
610 if (vp_with_iocount) {
611 vnode_put(vp_with_iocount);
612 vp_with_iocount = NULLVP;
613 }
614 if (vnode_getwithvid(vp, vid)) {
615 goto again;
616 }
617 vp_with_iocount = vp;
618 }
619 VATTR_INIT(&va);
620 VATTR_WANTED(&va, va_parentid);
621
622 if (fixhardlink) {
623 VATTR_WANTED(&va, va_name);
624 MALLOC_ZONE(va.va_name, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK);
625 } else {
626 va.va_name = NULL;
627 }
628 /*
629 * Ask the file system for its parent id and for its name (optional).
630 */
631 ret = vnode_getattr(vp, &va, ctx);
632
633 if (fixhardlink) {
634 if ((ret == 0) && (VATTR_IS_SUPPORTED(&va, va_name))) {
635 str = va.va_name;
636 vnode_update_identity(vp, NULL, str, strlen(str), 0, VNODE_UPDATE_NAME);
637 } else if (vp->v_name) {
638 str = vp->v_name;
639 ret = 0;
640 } else {
641 ret = ENOENT;
642 goto bad_news;
643 }
644 len = strlen(str);
645
646 /*
647 * Check that there's enough space.
648 */
649 if ((end - buff) < (len + 1)) {
650 ret = ENOSPC;
651 } else {
652 /* Copy the name backwards. */
653 str += len;
654
655 for (; len > 0; len--) {
656 *--end = *--str;
657 }
658 /*
659 * Add a path separator.
660 */
661 *--end = '/';
662 }
663 bad_news:
664 FREE_ZONE(va.va_name, MAXPATHLEN, M_NAMEI);
665 }
666 if (ret || !VATTR_IS_SUPPORTED(&va, va_parentid)) {
667 ret = ENOENT;
668 goto out;
669 }
670 /*
671 * Ask the file system for the parent vnode.
672 */
673 if ((ret = VFS_VGET(vp->v_mount, (ino64_t)va.va_parentid, &dvp, ctx))) {
674 goto out;
675 }
676
677 if (!fixhardlink && (vp->v_parent != dvp)) {
678 vnode_update_identity(vp, dvp, NULL, 0, 0, VNODE_UPDATE_PARENT);
679 }
680
681 if (vp_with_iocount) {
682 vnode_put(vp_with_iocount);
683 }
684 vp = dvp;
685 vp_with_iocount = vp;
686
687 NAME_CACHE_LOCK_SHARED();
688
689 /*
690 * if the vnode we have in hand isn't a directory and it
691 * has a v_parent, then we started with the resource fork
692 * so skip up to avoid getting a duplicate copy of the
693 * file name in the path.
694 */
695 if (vp && !vnode_isdir(vp) && vp->v_parent) {
696 vp = vp->v_parent;
697 }
698 }
699
700 if (vp && (flags & BUILDPATH_CHECKACCESS)) {
701 vid = vp->v_id;
702
703 NAME_CACHE_UNLOCK();
704
705 if (vp != first_vp && vp != parent_vp && vp != vp_with_iocount) {
706 if (vp_with_iocount) {
707 vnode_put(vp_with_iocount);
708 vp_with_iocount = NULLVP;
709 }
710 if (vnode_getwithvid(vp, vid)) {
711 goto again;
712 }
713 vp_with_iocount = vp;
714 }
715 if ((ret = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx))) {
716 goto out; /* no peeking */
717 }
718 NAME_CACHE_LOCK_SHARED();
719 }
720
721 /*
722 * When a mount point is crossed switch the vp.
723 * Continue until we find the root or we find
724 * a vnode that's not the root of a mounted
725 * file system.
726 */
727 tvp = vp;
728
729 while (tvp) {
730 if (tvp == proc_root_dir_vp) {
731 goto out_unlock; /* encountered the root */
732 }
733 if (!(tvp->v_flag & VROOT) || !tvp->v_mount) {
734 break; /* not the root of a mounted FS */
735 }
736 if (flags & BUILDPATH_VOLUME_RELATIVE) {
737 /* Do not cross over mount points */
738 tvp = NULL;
739 } else {
740 tvp = tvp->v_mount->mnt_vnodecovered;
741 }
742 }
743 if (tvp == NULLVP) {
744 goto out_unlock;
745 }
746 vp = tvp;
747 }
748 out_unlock:
749 NAME_CACHE_UNLOCK();
750 out:
751 if (vp_with_iocount) {
752 vnode_put(vp_with_iocount);
753 }
754 /*
755 * Slide the name down to the beginning of the buffer.
756 */
757 memmove(buff, end, &buff[buflen] - end);
758
759 /*
760 * length includes the trailing zero byte
761 */
762 *outlen = &buff[buflen] - end;
763
764 /* One of the parents was moved during path reconstruction.
765 * The caller is interested in knowing whether any of the
766 * parents moved via BUILDPATH_CHECK_MOVED, so return EAGAIN.
767 */
768 if ((ret == ENOENT) && (flags & BUILDPATH_CHECK_MOVED)) {
769 ret = EAGAIN;
770 }
771
772 return ret;
773 }
774
775 int
776 build_path(vnode_t first_vp, char *buff, int buflen, int *outlen, int flags, vfs_context_t ctx)
777 {
778 return build_path_with_parent(first_vp, NULL, buff, buflen, outlen, flags, ctx);
779 }
780
781 /*
782 * return NULLVP if vp's parent doesn't
783 * exist, or we can't get a valid iocount
784 * else return the parent of vp
785 */
786 vnode_t
787 vnode_getparent(vnode_t vp)
788 {
789 vnode_t pvp = NULLVP;
790 int pvid;
791
792 NAME_CACHE_LOCK_SHARED();
793 /*
794 * v_parent is stable behind the name_cache lock
795 * however, the only thing we can really guarantee
796 * is that we've grabbed a valid iocount on the
797 * parent of 'vp' at the time we took the name_cache lock...
798 * once we drop the lock, vp could get re-parented
799 */
800 if ((pvp = vp->v_parent) != NULLVP) {
801 pvid = pvp->v_id;
802
803 NAME_CACHE_UNLOCK();
804
805 if (vnode_getwithvid(pvp, pvid) != 0) {
806 pvp = NULL;
807 }
808 } else {
809 NAME_CACHE_UNLOCK();
810 }
811 return pvp;
812 }
813
814 const char *
815 vnode_getname(vnode_t vp)
816 {
817 const char *name = NULL;
818
819 NAME_CACHE_LOCK_SHARED();
820
821 if (vp->v_name) {
822 name = vfs_addname(vp->v_name, strlen(vp->v_name), 0, 0);
823 }
824 NAME_CACHE_UNLOCK();
825
826 return name;
827 }
828
829 void
830 vnode_putname(const char *name)
831 {
832 vfs_removename(name);
833 }
834
835 static const char unknown_vnodename[] = "(unknown vnode name)";
836
837 const char *
838 vnode_getname_printable(vnode_t vp)
839 {
840 const char *name = vnode_getname(vp);
841 if (name != NULL) {
842 return name;
843 }
844
845 switch (vp->v_type) {
846 case VCHR:
847 case VBLK:
848 {
849 /*
850 * Create an artificial dev name from
851 * major and minor device number
852 */
853 char dev_name[64];
854 (void) snprintf(dev_name, sizeof(dev_name),
855 "%c(%u, %u)", VCHR == vp->v_type ? 'c':'b',
856 major(vp->v_rdev), minor(vp->v_rdev));
857 /*
858 * Add the newly created dev name to the name
859 * cache to allow easier cleanup. Also,
860 * vfs_addname allocates memory for the new name
861 * and returns it.
862 */
863 NAME_CACHE_LOCK_SHARED();
864 name = vfs_addname(dev_name, strlen(dev_name), 0, 0);
865 NAME_CACHE_UNLOCK();
866 return name;
867 }
868 default:
869 return unknown_vnodename;
870 }
871 }
872
873 void
874 vnode_putname_printable(const char *name)
875 {
876 if (name == unknown_vnodename) {
877 return;
878 }
879 vnode_putname(name);
880 }
881
882
883 /*
884 * if VNODE_UPDATE_PARENT, and we can take
885 * a reference on dvp, then update vp with
886 * it's new parent... if vp already has a parent,
887 * then drop the reference vp held on it
888 *
889 * if VNODE_UPDATE_NAME,
890 * then drop string ref on v_name if it exists, and if name is non-NULL
891 * then pick up a string reference on name and record it in v_name...
892 * optionally pass in the length and hashval of name if known
893 *
894 * if VNODE_UPDATE_CACHE, flush the name cache entries associated with vp
895 */
896 void
897 vnode_update_identity(vnode_t vp, vnode_t dvp, const char *name, int name_len, uint32_t name_hashval, int flags)
898 {
899 struct namecache *ncp;
900 vnode_t old_parentvp = NULLVP;
901 int isstream = (vp->v_flag & VISNAMEDSTREAM);
902 int kusecountbumped = 0;
903 kauth_cred_t tcred = NULL;
904 const char *vname = NULL;
905 const char *tname = NULL;
906
907 if (flags & VNODE_UPDATE_PARENT) {
908 if (dvp && vnode_ref(dvp) != 0) {
909 dvp = NULLVP;
910 }
911 /* Don't count a stream's parent ref during unmounts */
912 if (isstream && dvp && (dvp != vp) && (dvp != vp->v_parent) && (dvp->v_type == VREG)) {
913 vnode_lock_spin(dvp);
914 ++dvp->v_kusecount;
915 kusecountbumped = 1;
916 vnode_unlock(dvp);
917 }
918 } else {
919 dvp = NULLVP;
920 }
921 if ((flags & VNODE_UPDATE_NAME)) {
922 if (name != vp->v_name) {
923 if (name && *name) {
924 if (name_len == 0) {
925 name_len = strlen(name);
926 }
927 tname = vfs_addname(name, name_len, name_hashval, 0);
928 }
929 } else {
930 flags &= ~VNODE_UPDATE_NAME;
931 }
932 }
933 if ((flags & (VNODE_UPDATE_PURGE | VNODE_UPDATE_PARENT | VNODE_UPDATE_CACHE | VNODE_UPDATE_NAME))) {
934 NAME_CACHE_LOCK();
935
936 if ((flags & VNODE_UPDATE_PURGE)) {
937 if (vp->v_parent) {
938 vp->v_parent->v_nc_generation++;
939 }
940
941 while ((ncp = LIST_FIRST(&vp->v_nclinks))) {
942 cache_delete(ncp, 1);
943 }
944
945 while ((ncp = TAILQ_FIRST(&vp->v_ncchildren))) {
946 cache_delete(ncp, 1);
947 }
948
949 /*
950 * Use a temp variable to avoid kauth_cred_unref() while NAME_CACHE_LOCK is held
951 */
952 tcred = vp->v_cred;
953 vp->v_cred = NOCRED;
954 vp->v_authorized_actions = 0;
955 vp->v_cred_timestamp = 0;
956 }
957 if ((flags & VNODE_UPDATE_NAME)) {
958 vname = vp->v_name;
959 vp->v_name = tname;
960 }
961 if (flags & VNODE_UPDATE_PARENT) {
962 if (dvp != vp && dvp != vp->v_parent) {
963 old_parentvp = vp->v_parent;
964 vp->v_parent = dvp;
965 dvp = NULLVP;
966
967 if (old_parentvp) {
968 flags |= VNODE_UPDATE_CACHE;
969 }
970 }
971 }
972 if (flags & VNODE_UPDATE_CACHE) {
973 while ((ncp = LIST_FIRST(&vp->v_nclinks))) {
974 cache_delete(ncp, 1);
975 }
976 }
977 NAME_CACHE_UNLOCK();
978
979 if (vname != NULL) {
980 vfs_removename(vname);
981 }
982
983 if (IS_VALID_CRED(tcred)) {
984 kauth_cred_unref(&tcred);
985 }
986 }
987 if (dvp != NULLVP) {
988 /* Back-out the ref we took if we lost a race for vp->v_parent. */
989 if (kusecountbumped) {
990 vnode_lock_spin(dvp);
991 if (dvp->v_kusecount > 0) {
992 --dvp->v_kusecount;
993 }
994 vnode_unlock(dvp);
995 }
996 vnode_rele(dvp);
997 }
998 if (old_parentvp) {
999 struct uthread *ut;
1000
1001 if (isstream) {
1002 vnode_lock_spin(old_parentvp);
1003 if ((old_parentvp->v_type != VDIR) && (old_parentvp->v_kusecount > 0)) {
1004 --old_parentvp->v_kusecount;
1005 }
1006 vnode_unlock(old_parentvp);
1007 }
1008 ut = get_bsdthread_info(current_thread());
1009
1010 /*
1011 * indicated to vnode_rele that it shouldn't do a
1012 * vnode_reclaim at this time... instead it will
1013 * chain the vnode to the uu_vreclaims list...
1014 * we'll be responsible for calling vnode_reclaim
1015 * on each of the vnodes in this list...
1016 */
1017 ut->uu_defer_reclaims = 1;
1018 ut->uu_vreclaims = NULLVP;
1019
1020 while ((vp = old_parentvp) != NULLVP) {
1021 vnode_lock_spin(vp);
1022 vnode_rele_internal(vp, 0, 0, 1);
1023
1024 /*
1025 * check to see if the vnode is now in the state
1026 * that would have triggered a vnode_reclaim in vnode_rele
1027 * if it is, we save it's parent pointer and then NULL
1028 * out the v_parent field... we'll drop the reference
1029 * that was held on the next iteration of this loop...
1030 * this short circuits a potential deep recursion if we
1031 * have a long chain of parents in this state...
1032 * we'll sit in this loop until we run into
1033 * a parent in this chain that is not in this state
1034 *
1035 * make our check and the vnode_rele atomic
1036 * with respect to the current vnode we're working on
1037 * by holding the vnode lock
1038 * if vnode_rele deferred the vnode_reclaim and has put
1039 * this vnode on the list to be reaped by us, than
1040 * it has left this vnode with an iocount == 1
1041 */
1042 if ((vp->v_iocount == 1) && (vp->v_usecount == 0) &&
1043 ((vp->v_lflag & (VL_MARKTERM | VL_TERMINATE | VL_DEAD)) == VL_MARKTERM)) {
1044 /*
1045 * vnode_rele wanted to do a vnode_reclaim on this vnode
1046 * it should be sitting on the head of the uu_vreclaims chain
1047 * pull the parent pointer now so that when we do the
1048 * vnode_reclaim for each of the vnodes in the uu_vreclaims
1049 * list, we won't recurse back through here
1050 *
1051 * need to do a convert here in case vnode_rele_internal
1052 * returns with the lock held in the spin mode... it
1053 * can drop and retake the lock under certain circumstances
1054 */
1055 vnode_lock_convert(vp);
1056
1057 NAME_CACHE_LOCK();
1058 old_parentvp = vp->v_parent;
1059 vp->v_parent = NULLVP;
1060 NAME_CACHE_UNLOCK();
1061 } else {
1062 /*
1063 * we're done... we ran into a vnode that isn't
1064 * being terminated
1065 */
1066 old_parentvp = NULLVP;
1067 }
1068 vnode_unlock(vp);
1069 }
1070 ut->uu_defer_reclaims = 0;
1071
1072 while ((vp = ut->uu_vreclaims) != NULLVP) {
1073 ut->uu_vreclaims = vp->v_defer_reclaimlist;
1074
1075 /*
1076 * vnode_put will drive the vnode_reclaim if
1077 * we are still the only reference on this vnode
1078 */
1079 vnode_put(vp);
1080 }
1081 }
1082 }
1083
1084
1085 /*
1086 * Mark a vnode as having multiple hard links. HFS makes use of this
1087 * because it keeps track of each link separately, and wants to know
1088 * which link was actually used.
1089 *
1090 * This will cause the name cache to force a VNOP_LOOKUP on the vnode
1091 * so that HFS can post-process the lookup. Also, volfs will call
1092 * VNOP_GETATTR2 to determine the parent, instead of using v_parent.
1093 */
1094 void
1095 vnode_setmultipath(vnode_t vp)
1096 {
1097 vnode_lock_spin(vp);
1098
1099 /*
1100 * In theory, we're changing the vnode's identity as far as the
1101 * name cache is concerned, so we ought to grab the name cache lock
1102 * here. However, there is already a race, and grabbing the name
1103 * cache lock only makes the race window slightly smaller.
1104 *
1105 * The race happens because the vnode already exists in the name
1106 * cache, and could be found by one thread before another thread
1107 * can set the hard link flag.
1108 */
1109
1110 vp->v_flag |= VISHARDLINK;
1111
1112 vnode_unlock(vp);
1113 }
1114
1115
1116
1117 /*
1118 * backwards compatibility
1119 */
1120 void
1121 vnode_uncache_credentials(vnode_t vp)
1122 {
1123 vnode_uncache_authorized_action(vp, KAUTH_INVALIDATE_CACHED_RIGHTS);
1124 }
1125
1126
1127 /*
1128 * use the exclusive form of NAME_CACHE_LOCK to protect the update of the
1129 * following fields in the vnode: v_cred_timestamp, v_cred, v_authorized_actions
1130 * we use this lock so that we can look at the v_cred and v_authorized_actions
1131 * atomically while behind the NAME_CACHE_LOCK in shared mode in 'cache_lookup_path',
1132 * which is the super-hot path... if we are updating the authorized actions for this
1133 * vnode, we are already in the super-slow and far less frequented path so its not
1134 * that bad that we take the lock exclusive for this case... of course we strive
1135 * to hold it for the minimum amount of time possible
1136 */
1137
1138 void
1139 vnode_uncache_authorized_action(vnode_t vp, kauth_action_t action)
1140 {
1141 kauth_cred_t tcred = NOCRED;
1142
1143 NAME_CACHE_LOCK();
1144
1145 vp->v_authorized_actions &= ~action;
1146
1147 if (action == KAUTH_INVALIDATE_CACHED_RIGHTS &&
1148 IS_VALID_CRED(vp->v_cred)) {
1149 /*
1150 * Use a temp variable to avoid kauth_cred_unref() while NAME_CACHE_LOCK is held
1151 */
1152 tcred = vp->v_cred;
1153 vp->v_cred = NOCRED;
1154 }
1155 NAME_CACHE_UNLOCK();
1156
1157 if (tcred != NOCRED) {
1158 kauth_cred_unref(&tcred);
1159 }
1160 }
1161
1162
1163 extern int bootarg_vnode_cache_defeat; /* default = 0, from bsd_init.c */
1164
1165 boolean_t
1166 vnode_cache_is_authorized(vnode_t vp, vfs_context_t ctx, kauth_action_t action)
1167 {
1168 kauth_cred_t ucred;
1169 boolean_t retval = FALSE;
1170
1171 /* Boot argument to defeat rights caching */
1172 if (bootarg_vnode_cache_defeat) {
1173 return FALSE;
1174 }
1175
1176 if ((vp->v_mount->mnt_kern_flag & (MNTK_AUTH_OPAQUE | MNTK_AUTH_CACHE_TTL))) {
1177 /*
1178 * a TTL is enabled on the rights cache... handle it here
1179 * a TTL of 0 indicates that no rights should be cached
1180 */
1181 if (vp->v_mount->mnt_authcache_ttl) {
1182 if (!(vp->v_mount->mnt_kern_flag & MNTK_AUTH_CACHE_TTL)) {
1183 /*
1184 * For filesystems marked only MNTK_AUTH_OPAQUE (generally network ones),
1185 * we will only allow a SEARCH right on a directory to be cached...
1186 * that cached right always has a default TTL associated with it
1187 */
1188 if (action != KAUTH_VNODE_SEARCH || vp->v_type != VDIR) {
1189 vp = NULLVP;
1190 }
1191 }
1192 if (vp != NULLVP && vnode_cache_is_stale(vp) == TRUE) {
1193 vnode_uncache_authorized_action(vp, vp->v_authorized_actions);
1194 vp = NULLVP;
1195 }
1196 } else {
1197 vp = NULLVP;
1198 }
1199 }
1200 if (vp != NULLVP) {
1201 ucred = vfs_context_ucred(ctx);
1202
1203 NAME_CACHE_LOCK_SHARED();
1204
1205 if (vp->v_cred == ucred && (vp->v_authorized_actions & action) == action) {
1206 retval = TRUE;
1207 }
1208
1209 NAME_CACHE_UNLOCK();
1210 }
1211 return retval;
1212 }
1213
1214
1215 void
1216 vnode_cache_authorized_action(vnode_t vp, vfs_context_t ctx, kauth_action_t action)
1217 {
1218 kauth_cred_t tcred = NOCRED;
1219 kauth_cred_t ucred;
1220 struct timeval tv;
1221 boolean_t ttl_active = FALSE;
1222
1223 ucred = vfs_context_ucred(ctx);
1224
1225 if (!IS_VALID_CRED(ucred) || action == 0) {
1226 return;
1227 }
1228
1229 if ((vp->v_mount->mnt_kern_flag & (MNTK_AUTH_OPAQUE | MNTK_AUTH_CACHE_TTL))) {
1230 /*
1231 * a TTL is enabled on the rights cache... handle it here
1232 * a TTL of 0 indicates that no rights should be cached
1233 */
1234 if (vp->v_mount->mnt_authcache_ttl == 0) {
1235 return;
1236 }
1237
1238 if (!(vp->v_mount->mnt_kern_flag & MNTK_AUTH_CACHE_TTL)) {
1239 /*
1240 * only cache SEARCH action for filesystems marked
1241 * MNTK_AUTH_OPAQUE on VDIRs...
1242 * the lookup_path code will time these out
1243 */
1244 if ((action & ~KAUTH_VNODE_SEARCH) || vp->v_type != VDIR) {
1245 return;
1246 }
1247 }
1248 ttl_active = TRUE;
1249
1250 microuptime(&tv);
1251 }
1252 NAME_CACHE_LOCK();
1253
1254 if (vp->v_cred != ucred) {
1255 kauth_cred_ref(ucred);
1256 /*
1257 * Use a temp variable to avoid kauth_cred_unref() while NAME_CACHE_LOCK is held
1258 */
1259 tcred = vp->v_cred;
1260 vp->v_cred = ucred;
1261 vp->v_authorized_actions = 0;
1262 }
1263 if (ttl_active == TRUE && vp->v_authorized_actions == 0) {
1264 /*
1265 * only reset the timestamnp on the
1266 * first authorization cached after the previous
1267 * timer has expired or we're switching creds...
1268 * 'vnode_cache_is_authorized' will clear the
1269 * authorized actions if the TTL is active and
1270 * it has expired
1271 */
1272 vp->v_cred_timestamp = tv.tv_sec;
1273 }
1274 vp->v_authorized_actions |= action;
1275
1276 NAME_CACHE_UNLOCK();
1277
1278 if (IS_VALID_CRED(tcred)) {
1279 kauth_cred_unref(&tcred);
1280 }
1281 }
1282
1283
1284 boolean_t
1285 vnode_cache_is_stale(vnode_t vp)
1286 {
1287 struct timeval tv;
1288 boolean_t retval;
1289
1290 microuptime(&tv);
1291
1292 if ((tv.tv_sec - vp->v_cred_timestamp) > vp->v_mount->mnt_authcache_ttl) {
1293 retval = TRUE;
1294 } else {
1295 retval = FALSE;
1296 }
1297
1298 return retval;
1299 }
1300
1301
1302
1303 /*
1304 * Returns: 0 Success
1305 * ERECYCLE vnode was recycled from underneath us. Force lookup to be re-driven from namei.
1306 * This errno value should not be seen by anyone outside of the kernel.
1307 */
1308 int
1309 cache_lookup_path(struct nameidata *ndp, struct componentname *cnp, vnode_t dp,
1310 vfs_context_t ctx, int *dp_authorized, vnode_t last_dp)
1311 {
1312 char *cp; /* pointer into pathname argument */
1313 int vid;
1314 int vvid = 0; /* protected by vp != NULLVP */
1315 vnode_t vp = NULLVP;
1316 vnode_t tdp = NULLVP;
1317 kauth_cred_t ucred;
1318 boolean_t ttl_enabled = FALSE;
1319 struct timeval tv;
1320 mount_t mp;
1321 unsigned int hash;
1322 int error = 0;
1323 boolean_t dotdotchecked = FALSE;
1324
1325 #if CONFIG_TRIGGERS
1326 vnode_t trigger_vp;
1327 #endif /* CONFIG_TRIGGERS */
1328
1329 ucred = vfs_context_ucred(ctx);
1330 ndp->ni_flag &= ~(NAMEI_TRAILINGSLASH);
1331
1332 NAME_CACHE_LOCK_SHARED();
1333
1334 if (dp->v_mount && (dp->v_mount->mnt_kern_flag & (MNTK_AUTH_OPAQUE | MNTK_AUTH_CACHE_TTL))) {
1335 ttl_enabled = TRUE;
1336 microuptime(&tv);
1337 }
1338 for (;;) {
1339 /*
1340 * Search a directory.
1341 *
1342 * The cn_hash value is for use by cache_lookup
1343 * The last component of the filename is left accessible via
1344 * cnp->cn_nameptr for callers that need the name.
1345 */
1346 hash = 0;
1347 cp = cnp->cn_nameptr;
1348
1349 while (*cp && (*cp != '/')) {
1350 hash = crc32tab[((hash >> 24) ^ (unsigned char)*cp++)] ^ hash << 8;
1351 }
1352 /*
1353 * the crc generator can legitimately generate
1354 * a 0... however, 0 for us means that we
1355 * haven't computed a hash, so use 1 instead
1356 */
1357 if (hash == 0) {
1358 hash = 1;
1359 }
1360 cnp->cn_hash = hash;
1361 cnp->cn_namelen = cp - cnp->cn_nameptr;
1362
1363 ndp->ni_pathlen -= cnp->cn_namelen;
1364 ndp->ni_next = cp;
1365
1366 /*
1367 * Replace multiple slashes by a single slash and trailing slashes
1368 * by a null. This must be done before VNOP_LOOKUP() because some
1369 * fs's don't know about trailing slashes. Remember if there were
1370 * trailing slashes to handle symlinks, existing non-directories
1371 * and non-existing files that won't be directories specially later.
1372 */
1373 while (*cp == '/' && (cp[1] == '/' || cp[1] == '\0')) {
1374 cp++;
1375 ndp->ni_pathlen--;
1376
1377 if (*cp == '\0') {
1378 ndp->ni_flag |= NAMEI_TRAILINGSLASH;
1379 *ndp->ni_next = '\0';
1380 }
1381 }
1382 ndp->ni_next = cp;
1383
1384 cnp->cn_flags &= ~(MAKEENTRY | ISLASTCN | ISDOTDOT);
1385
1386 if (*cp == '\0') {
1387 cnp->cn_flags |= ISLASTCN;
1388 }
1389
1390 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.') {
1391 cnp->cn_flags |= ISDOTDOT;
1392 }
1393
1394 *dp_authorized = 0;
1395 #if NAMEDRSRCFORK
1396 /*
1397 * Process a request for a file's resource fork.
1398 *
1399 * Consume the _PATH_RSRCFORKSPEC suffix and tag the path.
1400 */
1401 if ((ndp->ni_pathlen == sizeof(_PATH_RSRCFORKSPEC)) &&
1402 (cp[1] == '.' && cp[2] == '.') &&
1403 bcmp(cp, _PATH_RSRCFORKSPEC, sizeof(_PATH_RSRCFORKSPEC)) == 0) {
1404 /* Skip volfs file systems that don't support native streams. */
1405 if ((dp->v_mount != NULL) &&
1406 (dp->v_mount->mnt_flag & MNT_DOVOLFS) &&
1407 (dp->v_mount->mnt_kern_flag & MNTK_NAMED_STREAMS) == 0) {
1408 goto skiprsrcfork;
1409 }
1410 cnp->cn_flags |= CN_WANTSRSRCFORK;
1411 cnp->cn_flags |= ISLASTCN;
1412 ndp->ni_next[0] = '\0';
1413 ndp->ni_pathlen = 1;
1414 }
1415 skiprsrcfork:
1416 #endif
1417
1418 #if CONFIG_MACF
1419
1420 /*
1421 * Name cache provides authorization caching (see below)
1422 * that will short circuit MAC checks in lookup().
1423 * We must perform MAC check here. On denial
1424 * dp_authorized will remain 0 and second check will
1425 * be perfomed in lookup().
1426 */
1427 if (!(cnp->cn_flags & DONOTAUTH)) {
1428 error = mac_vnode_check_lookup(ctx, dp, cnp);
1429 if (error) {
1430 NAME_CACHE_UNLOCK();
1431 goto errorout;
1432 }
1433 }
1434 #endif /* MAC */
1435 if (ttl_enabled &&
1436 (dp->v_mount->mnt_authcache_ttl == 0 ||
1437 ((tv.tv_sec - dp->v_cred_timestamp) > dp->v_mount->mnt_authcache_ttl))) {
1438 break;
1439 }
1440
1441 /*
1442 * NAME_CACHE_LOCK holds these fields stable
1443 *
1444 * We can't cache KAUTH_VNODE_SEARCHBYANYONE for root correctly
1445 * so we make an ugly check for root here. root is always
1446 * allowed and breaking out of here only to find out that is
1447 * authorized by virtue of being root is very very expensive.
1448 * However, the check for not root is valid only for filesystems
1449 * which use local authorization.
1450 *
1451 * XXX: Remove the check for root when we can reliably set
1452 * KAUTH_VNODE_SEARCHBYANYONE as root.
1453 */
1454 if ((dp->v_cred != ucred || !(dp->v_authorized_actions & KAUTH_VNODE_SEARCH)) &&
1455 !(dp->v_authorized_actions & KAUTH_VNODE_SEARCHBYANYONE) &&
1456 (ttl_enabled || !vfs_context_issuser(ctx))) {
1457 break;
1458 }
1459
1460 /*
1461 * indicate that we're allowed to traverse this directory...
1462 * even if we fail the cache lookup or decide to bail for
1463 * some other reason, this information is valid and is used
1464 * to avoid doing a vnode_authorize before the call to VNOP_LOOKUP
1465 */
1466 *dp_authorized = 1;
1467
1468 if ((cnp->cn_flags & (ISLASTCN | ISDOTDOT))) {
1469 if (cnp->cn_nameiop != LOOKUP) {
1470 break;
1471 }
1472 if (cnp->cn_flags & LOCKPARENT) {
1473 break;
1474 }
1475 if (cnp->cn_flags & NOCACHE) {
1476 break;
1477 }
1478 if (cnp->cn_flags & ISDOTDOT) {
1479 /*
1480 * Force directory hardlinks to go to
1481 * file system for ".." requests.
1482 */
1483 if ((dp->v_flag & VISHARDLINK)) {
1484 break;
1485 }
1486 /*
1487 * Quit here only if we can't use
1488 * the parent directory pointer or
1489 * don't have one. Otherwise, we'll
1490 * use it below.
1491 */
1492 if ((dp->v_flag & VROOT) ||
1493 dp == ndp->ni_rootdir ||
1494 dp->v_parent == NULLVP) {
1495 break;
1496 }
1497 }
1498 }
1499
1500 if ((cnp->cn_flags & CN_SKIPNAMECACHE)) {
1501 /*
1502 * Force lookup to go to the filesystem with
1503 * all cnp fields set up.
1504 */
1505 break;
1506 }
1507
1508 /*
1509 * "." and ".." aren't supposed to be cached, so check
1510 * for them before checking the cache.
1511 */
1512 if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
1513 vp = dp;
1514 } else if ((cnp->cn_flags & ISDOTDOT)) {
1515 /*
1516 * If this is a chrooted process, we need to check if
1517 * the process is trying to break out of its chrooted
1518 * jail. We do that by trying to determine if dp is
1519 * a subdirectory of ndp->ni_rootdir. If we aren't
1520 * able to determine that by the v_parent pointers, we
1521 * will leave the fast path.
1522 *
1523 * Since this function may see dotdot components
1524 * many times and it has the name cache lock held for
1525 * the entire duration, we optimise this by doing this
1526 * check only once per cache_lookup_path call.
1527 * If dotdotchecked is set, it means we've done this
1528 * check once already and don't need to do it again.
1529 */
1530 if (!dotdotchecked && (ndp->ni_rootdir != rootvnode)) {
1531 vnode_t tvp = dp;
1532 boolean_t defer = FALSE;
1533 boolean_t is_subdir = FALSE;
1534
1535 defer = cache_check_vnode_issubdir(tvp,
1536 ndp->ni_rootdir, &is_subdir, &tvp);
1537
1538 if (defer) {
1539 /* defer to Filesystem */
1540 break;
1541 } else if (!is_subdir) {
1542 /*
1543 * This process is trying to break out
1544 * of its chrooted jail, so all its
1545 * dotdot accesses will be translated to
1546 * its root directory.
1547 */
1548 vp = ndp->ni_rootdir;
1549 } else {
1550 /*
1551 * All good, let this dotdot access
1552 * proceed normally
1553 */
1554 vp = dp->v_parent;
1555 }
1556 dotdotchecked = TRUE;
1557 } else {
1558 vp = dp->v_parent;
1559 }
1560 } else {
1561 if ((vp = cache_lookup_locked(dp, cnp)) == NULLVP) {
1562 break;
1563 }
1564
1565 if ((vp->v_flag & VISHARDLINK)) {
1566 /*
1567 * The file system wants a VNOP_LOOKUP on this vnode
1568 */
1569 vp = NULL;
1570 break;
1571 }
1572 }
1573 if ((cnp->cn_flags & ISLASTCN)) {
1574 break;
1575 }
1576
1577 if (vp->v_type != VDIR) {
1578 if (vp->v_type != VLNK) {
1579 vp = NULL;
1580 }
1581 break;
1582 }
1583
1584 if ((mp = vp->v_mountedhere) && ((cnp->cn_flags & NOCROSSMOUNT) == 0)) {
1585 vnode_t tmp_vp = mp->mnt_realrootvp;
1586 if (tmp_vp == NULLVP || mp->mnt_generation != mount_generation ||
1587 mp->mnt_realrootvp_vid != tmp_vp->v_id) {
1588 break;
1589 }
1590 vp = tmp_vp;
1591 }
1592
1593 #if CONFIG_TRIGGERS
1594 /*
1595 * After traversing all mountpoints stacked here, if we have a
1596 * trigger in hand, resolve it. Note that we don't need to
1597 * leave the fast path if the mount has already happened.
1598 */
1599 if (vp->v_resolve) {
1600 break;
1601 }
1602 #endif /* CONFIG_TRIGGERS */
1603
1604
1605 dp = vp;
1606 vp = NULLVP;
1607
1608 cnp->cn_nameptr = ndp->ni_next + 1;
1609 ndp->ni_pathlen--;
1610 while (*cnp->cn_nameptr == '/') {
1611 cnp->cn_nameptr++;
1612 ndp->ni_pathlen--;
1613 }
1614 }
1615 if (vp != NULLVP) {
1616 vvid = vp->v_id;
1617 }
1618 vid = dp->v_id;
1619
1620 NAME_CACHE_UNLOCK();
1621
1622 if ((vp != NULLVP) && (vp->v_type != VLNK) &&
1623 ((cnp->cn_flags & (ISLASTCN | LOCKPARENT | WANTPARENT | SAVESTART)) == ISLASTCN)) {
1624 /*
1625 * if we've got a child and it's the last component, and
1626 * the lookup doesn't need to return the parent then we
1627 * can skip grabbing an iocount on the parent, since all
1628 * we're going to do with it is a vnode_put just before
1629 * we return from 'lookup'. If it's a symbolic link,
1630 * we need the parent in case the link happens to be
1631 * a relative pathname.
1632 */
1633 tdp = dp;
1634 dp = NULLVP;
1635 } else {
1636 need_dp:
1637 /*
1638 * return the last directory we looked at
1639 * with an io reference held. If it was the one passed
1640 * in as a result of the last iteration of VNOP_LOOKUP,
1641 * it should already hold an io ref. No need to increase ref.
1642 */
1643 if (last_dp != dp) {
1644 if (dp == ndp->ni_usedvp) {
1645 /*
1646 * if this vnode matches the one passed in via USEDVP
1647 * than this context already holds an io_count... just
1648 * use vnode_get to get an extra ref for lookup to play
1649 * with... can't use the getwithvid variant here because
1650 * it will block behind a vnode_drain which would result
1651 * in a deadlock (since we already own an io_count that the
1652 * vnode_drain is waiting on)... vnode_get grabs the io_count
1653 * immediately w/o waiting... it always succeeds
1654 */
1655 vnode_get(dp);
1656 } else if ((error = vnode_getwithvid_drainok(dp, vid))) {
1657 /*
1658 * failure indicates the vnode
1659 * changed identity or is being
1660 * TERMINATED... in either case
1661 * punt this lookup.
1662 *
1663 * don't necessarily return ENOENT, though, because
1664 * we really want to go back to disk and make sure it's
1665 * there or not if someone else is changing this
1666 * vnode. That being said, the one case where we do want
1667 * to return ENOENT is when the vnode's mount point is
1668 * in the process of unmounting and we might cause a deadlock
1669 * in our attempt to take an iocount. An ENODEV error return
1670 * is from vnode_get* is an indication this but we change that
1671 * ENOENT for upper layers.
1672 */
1673 if (error == ENODEV) {
1674 error = ENOENT;
1675 } else {
1676 error = ERECYCLE;
1677 }
1678 goto errorout;
1679 }
1680 }
1681 }
1682 if (vp != NULLVP) {
1683 if ((vnode_getwithvid_drainok(vp, vvid))) {
1684 vp = NULLVP;
1685
1686 /*
1687 * can't get reference on the vp we'd like
1688 * to return... if we didn't grab a reference
1689 * on the directory (due to fast path bypass),
1690 * then we need to do it now... we can't return
1691 * with both ni_dvp and ni_vp NULL, and no
1692 * error condition
1693 */
1694 if (dp == NULLVP) {
1695 dp = tdp;
1696 goto need_dp;
1697 }
1698 }
1699 }
1700
1701 ndp->ni_dvp = dp;
1702 ndp->ni_vp = vp;
1703
1704 #if CONFIG_TRIGGERS
1705 trigger_vp = vp ? vp : dp;
1706 if ((error == 0) && (trigger_vp != NULLVP) && vnode_isdir(trigger_vp)) {
1707 error = vnode_trigger_resolve(trigger_vp, ndp, ctx);
1708 if (error) {
1709 if (vp) {
1710 vnode_put(vp);
1711 }
1712 if (dp) {
1713 vnode_put(dp);
1714 }
1715 goto errorout;
1716 }
1717 }
1718 #endif /* CONFIG_TRIGGERS */
1719
1720 errorout:
1721 /*
1722 * If we came into cache_lookup_path after an iteration of the lookup loop that
1723 * resulted in a call to VNOP_LOOKUP, then VNOP_LOOKUP returned a vnode with a io ref
1724 * on it. It is now the job of cache_lookup_path to drop the ref on this vnode
1725 * when it is no longer needed. If we get to this point, and last_dp is not NULL
1726 * and it is ALSO not the dvp we want to return to caller of this function, it MUST be
1727 * the case that we got to a subsequent path component and this previous vnode is
1728 * no longer needed. We can then drop the io ref on it.
1729 */
1730 if ((last_dp != NULLVP) && (last_dp != ndp->ni_dvp)) {
1731 vnode_put(last_dp);
1732 }
1733
1734 //initialized to 0, should be the same if no error cases occurred.
1735 return error;
1736 }
1737
1738
1739 static vnode_t
1740 cache_lookup_locked(vnode_t dvp, struct componentname *cnp)
1741 {
1742 struct namecache *ncp;
1743 struct nchashhead *ncpp;
1744 long namelen = cnp->cn_namelen;
1745 unsigned int hashval = cnp->cn_hash;
1746
1747 if (nc_disabled) {
1748 return NULL;
1749 }
1750
1751 ncpp = NCHHASH(dvp, cnp->cn_hash);
1752 LIST_FOREACH(ncp, ncpp, nc_hash) {
1753 if ((ncp->nc_dvp == dvp) && (ncp->nc_hashval == hashval)) {
1754 if (strncmp(ncp->nc_name, cnp->cn_nameptr, namelen) == 0 && ncp->nc_name[namelen] == 0) {
1755 break;
1756 }
1757 }
1758 }
1759 if (ncp == 0) {
1760 /*
1761 * We failed to find an entry
1762 */
1763 NCHSTAT(ncs_miss);
1764 return NULL;
1765 }
1766 NCHSTAT(ncs_goodhits);
1767
1768 return ncp->nc_vp;
1769 }
1770
1771
1772 unsigned int hash_string(const char *cp, int len);
1773 //
1774 // Have to take a len argument because we may only need to
1775 // hash part of a componentname.
1776 //
1777 unsigned int
1778 hash_string(const char *cp, int len)
1779 {
1780 unsigned hash = 0;
1781
1782 if (len) {
1783 while (len--) {
1784 hash = crc32tab[((hash >> 24) ^ (unsigned char)*cp++)] ^ hash << 8;
1785 }
1786 } else {
1787 while (*cp != '\0') {
1788 hash = crc32tab[((hash >> 24) ^ (unsigned char)*cp++)] ^ hash << 8;
1789 }
1790 }
1791 /*
1792 * the crc generator can legitimately generate
1793 * a 0... however, 0 for us means that we
1794 * haven't computed a hash, so use 1 instead
1795 */
1796 if (hash == 0) {
1797 hash = 1;
1798 }
1799 return hash;
1800 }
1801
1802
1803 /*
1804 * Lookup an entry in the cache
1805 *
1806 * We don't do this if the segment name is long, simply so the cache
1807 * can avoid holding long names (which would either waste space, or
1808 * add greatly to the complexity).
1809 *
1810 * Lookup is called with dvp pointing to the directory to search,
1811 * cnp pointing to the name of the entry being sought. If the lookup
1812 * succeeds, the vnode is returned in *vpp, and a status of -1 is
1813 * returned. If the lookup determines that the name does not exist
1814 * (negative cacheing), a status of ENOENT is returned. If the lookup
1815 * fails, a status of zero is returned.
1816 */
1817
1818 int
1819 cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
1820 {
1821 struct namecache *ncp;
1822 struct nchashhead *ncpp;
1823 long namelen = cnp->cn_namelen;
1824 unsigned int hashval;
1825 boolean_t have_exclusive = FALSE;
1826 uint32_t vid;
1827 vnode_t vp;
1828
1829 if (cnp->cn_hash == 0) {
1830 cnp->cn_hash = hash_string(cnp->cn_nameptr, cnp->cn_namelen);
1831 }
1832 hashval = cnp->cn_hash;
1833
1834 if (nc_disabled) {
1835 return 0;
1836 }
1837
1838 NAME_CACHE_LOCK_SHARED();
1839
1840 relook:
1841 ncpp = NCHHASH(dvp, cnp->cn_hash);
1842 LIST_FOREACH(ncp, ncpp, nc_hash) {
1843 if ((ncp->nc_dvp == dvp) && (ncp->nc_hashval == hashval)) {
1844 if (strncmp(ncp->nc_name, cnp->cn_nameptr, namelen) == 0 && ncp->nc_name[namelen] == 0) {
1845 break;
1846 }
1847 }
1848 }
1849 /* We failed to find an entry */
1850 if (ncp == 0) {
1851 NCHSTAT(ncs_miss);
1852 NAME_CACHE_UNLOCK();
1853 return 0;
1854 }
1855
1856 /* We don't want to have an entry, so dump it */
1857 if ((cnp->cn_flags & MAKEENTRY) == 0) {
1858 if (have_exclusive == TRUE) {
1859 NCHSTAT(ncs_badhits);
1860 cache_delete(ncp, 1);
1861 NAME_CACHE_UNLOCK();
1862 return 0;
1863 }
1864 NAME_CACHE_UNLOCK();
1865 NAME_CACHE_LOCK();
1866 have_exclusive = TRUE;
1867 goto relook;
1868 }
1869 vp = ncp->nc_vp;
1870
1871 /* We found a "positive" match, return the vnode */
1872 if (vp) {
1873 NCHSTAT(ncs_goodhits);
1874
1875 vid = vp->v_id;
1876 NAME_CACHE_UNLOCK();
1877
1878 if (vnode_getwithvid(vp, vid)) {
1879 #if COLLECT_STATS
1880 NAME_CACHE_LOCK();
1881 NCHSTAT(ncs_badvid);
1882 NAME_CACHE_UNLOCK();
1883 #endif
1884 return 0;
1885 }
1886 *vpp = vp;
1887 return -1;
1888 }
1889
1890 /* We found a negative match, and want to create it, so purge */
1891 if (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) {
1892 if (have_exclusive == TRUE) {
1893 NCHSTAT(ncs_badhits);
1894 cache_delete(ncp, 1);
1895 NAME_CACHE_UNLOCK();
1896 return 0;
1897 }
1898 NAME_CACHE_UNLOCK();
1899 NAME_CACHE_LOCK();
1900 have_exclusive = TRUE;
1901 goto relook;
1902 }
1903
1904 /*
1905 * We found a "negative" match, ENOENT notifies client of this match.
1906 */
1907 NCHSTAT(ncs_neghits);
1908
1909 NAME_CACHE_UNLOCK();
1910 return ENOENT;
1911 }
1912
1913 const char *
1914 cache_enter_create(vnode_t dvp, vnode_t vp, struct componentname *cnp)
1915 {
1916 const char *strname;
1917
1918 if (cnp->cn_hash == 0) {
1919 cnp->cn_hash = hash_string(cnp->cn_nameptr, cnp->cn_namelen);
1920 }
1921
1922 /*
1923 * grab 2 references on the string entered
1924 * one for the cache_enter_locked to consume
1925 * and the second to be consumed by v_name (vnode_create call point)
1926 */
1927 strname = add_name_internal(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, TRUE, 0);
1928
1929 NAME_CACHE_LOCK();
1930
1931 cache_enter_locked(dvp, vp, cnp, strname);
1932
1933 NAME_CACHE_UNLOCK();
1934
1935 return strname;
1936 }
1937
1938
1939 /*
1940 * Add an entry to the cache...
1941 * but first check to see if the directory
1942 * that this entry is to be associated with has
1943 * had any cache_purges applied since we took
1944 * our identity snapshot... this check needs to
1945 * be done behind the name cache lock
1946 */
1947 void
1948 cache_enter_with_gen(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, int gen)
1949 {
1950 if (cnp->cn_hash == 0) {
1951 cnp->cn_hash = hash_string(cnp->cn_nameptr, cnp->cn_namelen);
1952 }
1953
1954 NAME_CACHE_LOCK();
1955
1956 if (dvp->v_nc_generation == gen) {
1957 (void)cache_enter_locked(dvp, vp, cnp, NULL);
1958 }
1959
1960 NAME_CACHE_UNLOCK();
1961 }
1962
1963
1964 /*
1965 * Add an entry to the cache.
1966 */
1967 void
1968 cache_enter(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
1969 {
1970 const char *strname;
1971
1972 if (cnp->cn_hash == 0) {
1973 cnp->cn_hash = hash_string(cnp->cn_nameptr, cnp->cn_namelen);
1974 }
1975
1976 /*
1977 * grab 1 reference on the string entered
1978 * for the cache_enter_locked to consume
1979 */
1980 strname = add_name_internal(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, FALSE, 0);
1981
1982 NAME_CACHE_LOCK();
1983
1984 cache_enter_locked(dvp, vp, cnp, strname);
1985
1986 NAME_CACHE_UNLOCK();
1987 }
1988
1989
1990 static void
1991 cache_enter_locked(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, const char *strname)
1992 {
1993 struct namecache *ncp, *negp;
1994 struct nchashhead *ncpp;
1995
1996 if (nc_disabled) {
1997 return;
1998 }
1999
2000 /*
2001 * if the entry is for -ve caching vp is null
2002 */
2003 if ((vp != NULLVP) && (LIST_FIRST(&vp->v_nclinks))) {
2004 /*
2005 * someone beat us to the punch..
2006 * this vnode is already in the cache
2007 */
2008 if (strname != NULL) {
2009 vfs_removename(strname);
2010 }
2011 return;
2012 }
2013 /*
2014 * We allocate a new entry if we are less than the maximum
2015 * allowed and the one at the front of the list is in use.
2016 * Otherwise we use the one at the front of the list.
2017 */
2018 if (numcache < desiredNodes &&
2019 ((ncp = nchead.tqh_first) == NULL ||
2020 ncp->nc_hash.le_prev != 0)) {
2021 /*
2022 * Allocate one more entry
2023 */
2024 ncp = (struct namecache *)_MALLOC_ZONE(sizeof(*ncp), M_CACHE, M_WAITOK);
2025 numcache++;
2026 } else {
2027 /*
2028 * reuse an old entry
2029 */
2030 ncp = TAILQ_FIRST(&nchead);
2031 TAILQ_REMOVE(&nchead, ncp, nc_entry);
2032
2033 if (ncp->nc_hash.le_prev != 0) {
2034 /*
2035 * still in use... we need to
2036 * delete it before re-using it
2037 */
2038 NCHSTAT(ncs_stolen);
2039 cache_delete(ncp, 0);
2040 }
2041 }
2042 NCHSTAT(ncs_enters);
2043
2044 /*
2045 * Fill in cache info, if vp is NULL this is a "negative" cache entry.
2046 */
2047 ncp->nc_vp = vp;
2048 ncp->nc_dvp = dvp;
2049 ncp->nc_hashval = cnp->cn_hash;
2050
2051 if (strname == NULL) {
2052 ncp->nc_name = add_name_internal(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, FALSE, 0);
2053 } else {
2054 ncp->nc_name = strname;
2055 }
2056
2057 //
2058 // If the bytes of the name associated with the vnode differ,
2059 // use the name associated with the vnode since the file system
2060 // may have set that explicitly in the case of a lookup on a
2061 // case-insensitive file system where the case of the looked up
2062 // name differs from what is on disk. For more details, see:
2063 // <rdar://problem/8044697> FSEvents doesn't always decompose diacritical unicode chars in the paths of the changed directories
2064 //
2065 const char *vn_name = vp ? vp->v_name : NULL;
2066 unsigned int len = vn_name ? strlen(vn_name) : 0;
2067 if (vn_name && ncp && ncp->nc_name && strncmp(ncp->nc_name, vn_name, len) != 0) {
2068 unsigned int hash = hash_string(vn_name, len);
2069
2070 vfs_removename(ncp->nc_name);
2071 ncp->nc_name = add_name_internal(vn_name, len, hash, FALSE, 0);
2072 ncp->nc_hashval = hash;
2073 }
2074
2075 /*
2076 * make us the newest entry in the cache
2077 * i.e. we'll be the last to be stolen
2078 */
2079 TAILQ_INSERT_TAIL(&nchead, ncp, nc_entry);
2080
2081 ncpp = NCHHASH(dvp, cnp->cn_hash);
2082 #if DIAGNOSTIC
2083 {
2084 struct namecache *p;
2085
2086 for (p = ncpp->lh_first; p != 0; p = p->nc_hash.le_next) {
2087 if (p == ncp) {
2088 panic("cache_enter: duplicate");
2089 }
2090 }
2091 }
2092 #endif
2093 /*
2094 * make us available to be found via lookup
2095 */
2096 LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
2097
2098 if (vp) {
2099 /*
2100 * add to the list of name cache entries
2101 * that point at vp
2102 */
2103 LIST_INSERT_HEAD(&vp->v_nclinks, ncp, nc_un.nc_link);
2104 } else {
2105 /*
2106 * this is a negative cache entry (vp == NULL)
2107 * stick it on the negative cache list.
2108 */
2109 TAILQ_INSERT_TAIL(&neghead, ncp, nc_un.nc_negentry);
2110
2111 ncs_negtotal++;
2112
2113 if (ncs_negtotal > desiredNegNodes) {
2114 /*
2115 * if we've reached our desired limit
2116 * of negative cache entries, delete
2117 * the oldest
2118 */
2119 negp = TAILQ_FIRST(&neghead);
2120 cache_delete(negp, 1);
2121 }
2122 }
2123 /*
2124 * add us to the list of name cache entries that
2125 * are children of dvp
2126 */
2127 if (vp) {
2128 TAILQ_INSERT_TAIL(&dvp->v_ncchildren, ncp, nc_child);
2129 } else {
2130 TAILQ_INSERT_HEAD(&dvp->v_ncchildren, ncp, nc_child);
2131 }
2132 }
2133
2134
2135 /*
2136 * Initialize CRC-32 remainder table.
2137 */
2138 static void
2139 init_crc32(void)
2140 {
2141 /*
2142 * the CRC-32 generator polynomial is:
2143 * x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^10
2144 * + x^8 + x^7 + x^5 + x^4 + x^2 + x + 1
2145 */
2146 unsigned int crc32_polynomial = 0x04c11db7;
2147 unsigned int i, j;
2148
2149 /*
2150 * pre-calculate the CRC-32 remainder for each possible octet encoding
2151 */
2152 for (i = 0; i < 256; i++) {
2153 unsigned int crc_rem = i << 24;
2154
2155 for (j = 0; j < 8; j++) {
2156 if (crc_rem & 0x80000000) {
2157 crc_rem = (crc_rem << 1) ^ crc32_polynomial;
2158 } else {
2159 crc_rem = (crc_rem << 1);
2160 }
2161 }
2162 crc32tab[i] = crc_rem;
2163 }
2164 }
2165
2166
2167 /*
2168 * Name cache initialization, from vfs_init() when we are booting
2169 */
2170 void
2171 nchinit(void)
2172 {
2173 int i;
2174
2175 desiredNegNodes = (desiredvnodes / 10);
2176 desiredNodes = desiredvnodes + desiredNegNodes;
2177
2178 TAILQ_INIT(&nchead);
2179 TAILQ_INIT(&neghead);
2180
2181 init_crc32();
2182
2183 nchashtbl = hashinit(MAX(CONFIG_NC_HASH, (2 * desiredNodes)), M_CACHE, &nchash);
2184 nchashmask = nchash;
2185 nchash++;
2186
2187 init_string_table();
2188
2189 /* Allocate name cache lock group attribute and group */
2190 namecache_lck_grp_attr = lck_grp_attr_alloc_init();
2191
2192 namecache_lck_grp = lck_grp_alloc_init("Name Cache", namecache_lck_grp_attr);
2193
2194 /* Allocate name cache lock attribute */
2195 namecache_lck_attr = lck_attr_alloc_init();
2196
2197 /* Allocate name cache lock */
2198 namecache_rw_lock = lck_rw_alloc_init(namecache_lck_grp, namecache_lck_attr);
2199
2200
2201 /* Allocate string cache lock group attribute and group */
2202 strcache_lck_grp_attr = lck_grp_attr_alloc_init();
2203
2204 strcache_lck_grp = lck_grp_alloc_init("String Cache", strcache_lck_grp_attr);
2205
2206 /* Allocate string cache lock attribute */
2207 strcache_lck_attr = lck_attr_alloc_init();
2208
2209 /* Allocate string cache lock */
2210 strtable_rw_lock = lck_rw_alloc_init(strcache_lck_grp, strcache_lck_attr);
2211
2212 for (i = 0; i < NUM_STRCACHE_LOCKS; i++) {
2213 lck_mtx_init(&strcache_mtx_locks[i], strcache_lck_grp, strcache_lck_attr);
2214 }
2215 }
2216
2217 void
2218 name_cache_lock_shared(void)
2219 {
2220 lck_rw_lock_shared(namecache_rw_lock);
2221 }
2222
2223 void
2224 name_cache_lock(void)
2225 {
2226 lck_rw_lock_exclusive(namecache_rw_lock);
2227 }
2228
2229 void
2230 name_cache_unlock(void)
2231 {
2232 lck_rw_done(namecache_rw_lock);
2233 }
2234
2235
2236 int
2237 resize_namecache(int newsize)
2238 {
2239 struct nchashhead *new_table;
2240 struct nchashhead *old_table;
2241 struct nchashhead *old_head, *head;
2242 struct namecache *entry, *next;
2243 uint32_t i, hashval;
2244 int dNodes, dNegNodes, nelements;
2245 u_long new_size, old_size;
2246
2247 if (newsize < 0) {
2248 return EINVAL;
2249 }
2250
2251 dNegNodes = (newsize / 10);
2252 dNodes = newsize + dNegNodes;
2253 // we don't support shrinking yet
2254 if (dNodes <= desiredNodes) {
2255 return 0;
2256 }
2257
2258 if (os_mul_overflow(dNodes, 2, &nelements)) {
2259 return EINVAL;
2260 }
2261
2262 new_table = hashinit(nelements, M_CACHE, &nchashmask);
2263 new_size = nchashmask + 1;
2264
2265 if (new_table == NULL) {
2266 return ENOMEM;
2267 }
2268
2269 NAME_CACHE_LOCK();
2270 // do the switch!
2271 old_table = nchashtbl;
2272 nchashtbl = new_table;
2273 old_size = nchash;
2274 nchash = new_size;
2275
2276 // walk the old table and insert all the entries into
2277 // the new table
2278 //
2279 for (i = 0; i < old_size; i++) {
2280 old_head = &old_table[i];
2281 for (entry = old_head->lh_first; entry != NULL; entry = next) {
2282 //
2283 // XXXdbg - Beware: this assumes that hash_string() does
2284 // the same thing as what happens in
2285 // lookup() over in vfs_lookup.c
2286 hashval = hash_string(entry->nc_name, 0);
2287 entry->nc_hashval = hashval;
2288 head = NCHHASH(entry->nc_dvp, hashval);
2289
2290 next = entry->nc_hash.le_next;
2291 LIST_INSERT_HEAD(head, entry, nc_hash);
2292 }
2293 }
2294 desiredNodes = dNodes;
2295 desiredNegNodes = dNegNodes;
2296
2297 NAME_CACHE_UNLOCK();
2298 FREE(old_table, M_CACHE);
2299
2300 return 0;
2301 }
2302
2303 static void
2304 cache_delete(struct namecache *ncp, int free_entry)
2305 {
2306 NCHSTAT(ncs_deletes);
2307
2308 if (ncp->nc_vp) {
2309 LIST_REMOVE(ncp, nc_un.nc_link);
2310 } else {
2311 TAILQ_REMOVE(&neghead, ncp, nc_un.nc_negentry);
2312 ncs_negtotal--;
2313 }
2314 TAILQ_REMOVE(&(ncp->nc_dvp->v_ncchildren), ncp, nc_child);
2315
2316 LIST_REMOVE(ncp, nc_hash);
2317 /*
2318 * this field is used to indicate
2319 * that the entry is in use and
2320 * must be deleted before it can
2321 * be reused...
2322 */
2323 ncp->nc_hash.le_prev = NULL;
2324
2325 vfs_removename(ncp->nc_name);
2326 ncp->nc_name = NULL;
2327 if (free_entry) {
2328 TAILQ_REMOVE(&nchead, ncp, nc_entry);
2329 FREE_ZONE(ncp, sizeof(*ncp), M_CACHE);
2330 numcache--;
2331 }
2332 }
2333
2334
2335 /*
2336 * purge the entry associated with the
2337 * specified vnode from the name cache
2338 */
2339 void
2340 cache_purge(vnode_t vp)
2341 {
2342 struct namecache *ncp;
2343 kauth_cred_t tcred = NULL;
2344
2345 if ((LIST_FIRST(&vp->v_nclinks) == NULL) &&
2346 (TAILQ_FIRST(&vp->v_ncchildren) == NULL) &&
2347 (vp->v_cred == NOCRED) &&
2348 (vp->v_parent == NULLVP)) {
2349 return;
2350 }
2351
2352 NAME_CACHE_LOCK();
2353
2354 if (vp->v_parent) {
2355 vp->v_parent->v_nc_generation++;
2356 }
2357
2358 while ((ncp = LIST_FIRST(&vp->v_nclinks))) {
2359 cache_delete(ncp, 1);
2360 }
2361
2362 while ((ncp = TAILQ_FIRST(&vp->v_ncchildren))) {
2363 cache_delete(ncp, 1);
2364 }
2365
2366 /*
2367 * Use a temp variable to avoid kauth_cred_unref() while NAME_CACHE_LOCK is held
2368 */
2369 tcred = vp->v_cred;
2370 vp->v_cred = NOCRED;
2371 vp->v_authorized_actions = 0;
2372
2373 NAME_CACHE_UNLOCK();
2374
2375 if (IS_VALID_CRED(tcred)) {
2376 kauth_cred_unref(&tcred);
2377 }
2378 }
2379
2380 /*
2381 * Purge all negative cache entries that are children of the
2382 * given vnode. A case-insensitive file system (or any file
2383 * system that has multiple equivalent names for the same
2384 * directory entry) can use this when creating or renaming
2385 * to remove negative entries that may no longer apply.
2386 */
2387 void
2388 cache_purge_negatives(vnode_t vp)
2389 {
2390 struct namecache *ncp, *next_ncp;
2391
2392 NAME_CACHE_LOCK();
2393
2394 TAILQ_FOREACH_SAFE(ncp, &vp->v_ncchildren, nc_child, next_ncp) {
2395 if (ncp->nc_vp) {
2396 break;
2397 }
2398
2399 cache_delete(ncp, 1);
2400 }
2401
2402 NAME_CACHE_UNLOCK();
2403 }
2404
2405 /*
2406 * Flush all entries referencing a particular filesystem.
2407 *
2408 * Since we need to check it anyway, we will flush all the invalid
2409 * entries at the same time.
2410 */
2411 void
2412 cache_purgevfs(struct mount *mp)
2413 {
2414 struct nchashhead *ncpp;
2415 struct namecache *ncp;
2416
2417 NAME_CACHE_LOCK();
2418 /* Scan hash tables for applicable entries */
2419 for (ncpp = &nchashtbl[nchash - 1]; ncpp >= nchashtbl; ncpp--) {
2420 restart:
2421 for (ncp = ncpp->lh_first; ncp != 0; ncp = ncp->nc_hash.le_next) {
2422 if (ncp->nc_dvp->v_mount == mp) {
2423 cache_delete(ncp, 0);
2424 goto restart;
2425 }
2426 }
2427 }
2428 NAME_CACHE_UNLOCK();
2429 }
2430
2431
2432
2433 //
2434 // String ref routines
2435 //
2436 static LIST_HEAD(stringhead, string_t) * string_ref_table;
2437 static u_long string_table_mask;
2438 static uint32_t filled_buckets = 0;
2439
2440
2441 typedef struct string_t {
2442 LIST_ENTRY(string_t) hash_chain;
2443 const char *str;
2444 uint32_t refcount;
2445 } string_t;
2446
2447
2448 static void
2449 resize_string_ref_table(void)
2450 {
2451 struct stringhead *new_table;
2452 struct stringhead *old_table;
2453 struct stringhead *old_head, *head;
2454 string_t *entry, *next;
2455 uint32_t i, hashval;
2456 u_long new_mask, old_mask;
2457
2458 /*
2459 * need to hold the table lock exclusively
2460 * in order to grow the table... need to recheck
2461 * the need to resize again after we've taken
2462 * the lock exclusively in case some other thread
2463 * beat us to the punch
2464 */
2465 lck_rw_lock_exclusive(strtable_rw_lock);
2466
2467 if (4 * filled_buckets < ((string_table_mask + 1) * 3)) {
2468 lck_rw_done(strtable_rw_lock);
2469 return;
2470 }
2471 new_table = hashinit((string_table_mask + 1) * 2, M_CACHE, &new_mask);
2472
2473 if (new_table == NULL) {
2474 printf("failed to resize the hash table.\n");
2475 lck_rw_done(strtable_rw_lock);
2476 return;
2477 }
2478
2479 // do the switch!
2480 old_table = string_ref_table;
2481 string_ref_table = new_table;
2482 old_mask = string_table_mask;
2483 string_table_mask = new_mask;
2484 filled_buckets = 0;
2485
2486 // walk the old table and insert all the entries into
2487 // the new table
2488 //
2489 for (i = 0; i <= old_mask; i++) {
2490 old_head = &old_table[i];
2491 for (entry = old_head->lh_first; entry != NULL; entry = next) {
2492 hashval = hash_string((const char *)entry->str, 0);
2493 head = &string_ref_table[hashval & string_table_mask];
2494 if (head->lh_first == NULL) {
2495 filled_buckets++;
2496 }
2497 next = entry->hash_chain.le_next;
2498 LIST_INSERT_HEAD(head, entry, hash_chain);
2499 }
2500 }
2501 lck_rw_done(strtable_rw_lock);
2502
2503 FREE(old_table, M_CACHE);
2504 }
2505
2506
2507 static void
2508 init_string_table(void)
2509 {
2510 string_ref_table = hashinit(CONFIG_VFS_NAMES, M_CACHE, &string_table_mask);
2511 }
2512
2513
2514 const char *
2515 vfs_addname(const char *name, uint32_t len, u_int hashval, u_int flags)
2516 {
2517 return add_name_internal(name, len, hashval, FALSE, flags);
2518 }
2519
2520
2521 static const char *
2522 add_name_internal(const char *name, uint32_t len, u_int hashval, boolean_t need_extra_ref, __unused u_int flags)
2523 {
2524 struct stringhead *head;
2525 string_t *entry;
2526 uint32_t chain_len = 0;
2527 uint32_t hash_index;
2528 uint32_t lock_index;
2529 char *ptr;
2530
2531 if (len > MAXPATHLEN) {
2532 len = MAXPATHLEN;
2533 }
2534
2535 /*
2536 * if the length already accounts for the null-byte, then
2537 * subtract one so later on we don't index past the end
2538 * of the string.
2539 */
2540 if (len > 0 && name[len - 1] == '\0') {
2541 len--;
2542 }
2543 if (hashval == 0) {
2544 hashval = hash_string(name, len);
2545 }
2546
2547 /*
2548 * take this lock 'shared' to keep the hash stable
2549 * if someone else decides to grow the pool they
2550 * will take this lock exclusively
2551 */
2552 lck_rw_lock_shared(strtable_rw_lock);
2553
2554 /*
2555 * If the table gets more than 3/4 full, resize it
2556 */
2557 if (4 * filled_buckets >= ((string_table_mask + 1) * 3)) {
2558 lck_rw_done(strtable_rw_lock);
2559
2560 resize_string_ref_table();
2561
2562 lck_rw_lock_shared(strtable_rw_lock);
2563 }
2564 hash_index = hashval & string_table_mask;
2565 lock_index = hash_index % NUM_STRCACHE_LOCKS;
2566
2567 head = &string_ref_table[hash_index];
2568
2569 lck_mtx_lock_spin(&strcache_mtx_locks[lock_index]);
2570
2571 for (entry = head->lh_first; entry != NULL; chain_len++, entry = entry->hash_chain.le_next) {
2572 if (strncmp(entry->str, name, len) == 0 && entry->str[len] == 0) {
2573 entry->refcount++;
2574 break;
2575 }
2576 }
2577 if (entry == NULL) {
2578 lck_mtx_convert_spin(&strcache_mtx_locks[lock_index]);
2579 /*
2580 * it wasn't already there so add it.
2581 */
2582 MALLOC(entry, string_t *, sizeof(string_t) + len + 1, M_TEMP, M_WAITOK);
2583
2584 if (head->lh_first == NULL) {
2585 OSAddAtomic(1, &filled_buckets);
2586 }
2587 ptr = (char *)((char *)entry + sizeof(string_t));
2588 strncpy(ptr, name, len);
2589 ptr[len] = '\0';
2590 entry->str = ptr;
2591 entry->refcount = 1;
2592 LIST_INSERT_HEAD(head, entry, hash_chain);
2593 }
2594 if (need_extra_ref == TRUE) {
2595 entry->refcount++;
2596 }
2597
2598 lck_mtx_unlock(&strcache_mtx_locks[lock_index]);
2599 lck_rw_done(strtable_rw_lock);
2600
2601 return (const char *)entry->str;
2602 }
2603
2604
2605 int
2606 vfs_removename(const char *nameref)
2607 {
2608 struct stringhead *head;
2609 string_t *entry;
2610 uint32_t hashval;
2611 uint32_t hash_index;
2612 uint32_t lock_index;
2613 int retval = ENOENT;
2614
2615 hashval = hash_string(nameref, 0);
2616
2617 /*
2618 * take this lock 'shared' to keep the hash stable
2619 * if someone else decides to grow the pool they
2620 * will take this lock exclusively
2621 */
2622 lck_rw_lock_shared(strtable_rw_lock);
2623 /*
2624 * must compute the head behind the table lock
2625 * since the size and location of the table
2626 * can change on the fly
2627 */
2628 hash_index = hashval & string_table_mask;
2629 lock_index = hash_index % NUM_STRCACHE_LOCKS;
2630
2631 head = &string_ref_table[hash_index];
2632
2633 lck_mtx_lock_spin(&strcache_mtx_locks[lock_index]);
2634
2635 for (entry = head->lh_first; entry != NULL; entry = entry->hash_chain.le_next) {
2636 if (entry->str == nameref) {
2637 entry->refcount--;
2638
2639 if (entry->refcount == 0) {
2640 LIST_REMOVE(entry, hash_chain);
2641
2642 if (head->lh_first == NULL) {
2643 OSAddAtomic(-1, &filled_buckets);
2644 }
2645 } else {
2646 entry = NULL;
2647 }
2648 retval = 0;
2649 break;
2650 }
2651 }
2652 lck_mtx_unlock(&strcache_mtx_locks[lock_index]);
2653 lck_rw_done(strtable_rw_lock);
2654
2655 if (entry != NULL) {
2656 FREE(entry, M_TEMP);
2657 }
2658
2659 return retval;
2660 }
2661
2662
2663 #ifdef DUMP_STRING_TABLE
2664 void
2665 dump_string_table(void)
2666 {
2667 struct stringhead *head;
2668 string_t *entry;
2669 u_long i;
2670
2671 lck_rw_lock_shared(strtable_rw_lock);
2672
2673 for (i = 0; i <= string_table_mask; i++) {
2674 head = &string_ref_table[i];
2675 for (entry = head->lh_first; entry != NULL; entry = entry->hash_chain.le_next) {
2676 printf("%6d - %s\n", entry->refcount, entry->str);
2677 }
2678 }
2679 lck_rw_done(strtable_rw_lock);
2680 }
2681 #endif /* DUMP_STRING_TABLE */