]> git.saurik.com Git - apple/xnu.git/blob - bsd/vfs/vfs_cache.c
xnu-3248.60.10.tar.gz
[apple/xnu.git] / bsd / vfs / vfs_cache.c
1 /*
2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30 * Copyright (c) 1989, 1993, 1995
31 * The Regents of the University of California. All rights reserved.
32 *
33 * This code is derived from software contributed to Berkeley by
34 * Poul-Henning Kamp of the FreeBSD Project.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by the University of
47 * California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 *
65 * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95
66 */
67 /*
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
71 * Version 2.0.
72 */
73 #include <sys/param.h>
74 #include <sys/systm.h>
75 #include <sys/time.h>
76 #include <sys/mount_internal.h>
77 #include <sys/vnode_internal.h>
78 #include <miscfs/specfs/specdev.h>
79 #include <sys/namei.h>
80 #include <sys/errno.h>
81 #include <sys/malloc.h>
82 #include <sys/kauth.h>
83 #include <sys/user.h>
84 #include <sys/paths.h>
85
86 #if CONFIG_MACF
87 #include <security/mac_framework.h>
88 #endif
89
90 /*
91 * Name caching works as follows:
92 *
93 * Names found by directory scans are retained in a cache
94 * for future reference. It is managed LRU, so frequently
95 * used names will hang around. Cache is indexed by hash value
96 * obtained from (vp, name) where vp refers to the directory
97 * containing name.
98 *
99 * If it is a "negative" entry, (i.e. for a name that is known NOT to
100 * exist) the vnode pointer will be NULL.
101 *
102 * Upon reaching the last segment of a path, if the reference
103 * is for DELETE, or NOCACHE is set (rewrite), and the
104 * name is located in the cache, it will be dropped.
105 */
106
107 /*
108 * Structures associated with name cacheing.
109 */
110
111 LIST_HEAD(nchashhead, namecache) *nchashtbl; /* Hash Table */
112 u_long nchashmask;
113 u_long nchash; /* size of hash table - 1 */
114 long numcache; /* number of cache entries allocated */
115 int desiredNodes;
116 int desiredNegNodes;
117 int ncs_negtotal;
118 int nc_disabled = 0;
119 TAILQ_HEAD(, namecache) nchead; /* chain of all name cache entries */
120 TAILQ_HEAD(, namecache) neghead; /* chain of only negative cache entries */
121
122
123 #if COLLECT_STATS
124
125 struct nchstats nchstats; /* cache effectiveness statistics */
126
127 #define NCHSTAT(v) { \
128 nchstats.v++; \
129 }
130 #define NAME_CACHE_LOCK() name_cache_lock()
131 #define NAME_CACHE_UNLOCK() name_cache_unlock()
132 #define NAME_CACHE_LOCK_SHARED() name_cache_lock()
133
134 #else
135
136 #define NCHSTAT(v)
137 #define NAME_CACHE_LOCK() name_cache_lock()
138 #define NAME_CACHE_UNLOCK() name_cache_unlock()
139 #define NAME_CACHE_LOCK_SHARED() name_cache_lock_shared()
140
141 #endif
142
143
144 /* vars for name cache list lock */
145 lck_grp_t * namecache_lck_grp;
146 lck_grp_attr_t * namecache_lck_grp_attr;
147 lck_attr_t * namecache_lck_attr;
148
149 lck_grp_t * strcache_lck_grp;
150 lck_grp_attr_t * strcache_lck_grp_attr;
151 lck_attr_t * strcache_lck_attr;
152
153 lck_rw_t * namecache_rw_lock;
154 lck_rw_t * strtable_rw_lock;
155
156 #define NUM_STRCACHE_LOCKS 1024
157
158 lck_mtx_t strcache_mtx_locks[NUM_STRCACHE_LOCKS];
159
160
161 static vnode_t cache_lookup_locked(vnode_t dvp, struct componentname *cnp);
162 static const char *add_name_internal(const char *, uint32_t, u_int, boolean_t, u_int);
163 static void init_string_table(void);
164 static void cache_delete(struct namecache *, int);
165 static void cache_enter_locked(vnode_t dvp, vnode_t vp, struct componentname *cnp, const char *strname);
166
167 #ifdef DUMP_STRING_TABLE
168 /*
169 * Internal dump function used for debugging
170 */
171 void dump_string_table(void);
172 #endif /* DUMP_STRING_TABLE */
173
174 static void init_crc32(void);
175 static unsigned int crc32tab[256];
176
177
178 #define NCHHASH(dvp, hash_val) \
179 (&nchashtbl[(dvp->v_id ^ (hash_val)) & nchashmask])
180
181
182
183 /*
184 * This function builds the path to a filename in "buff". The
185 * length of the buffer *INCLUDING* the trailing zero byte is
186 * returned in outlen. NOTE: the length includes the trailing
187 * zero byte and thus the length is one greater than what strlen
188 * would return. This is important and lots of code elsewhere
189 * in the kernel assumes this behavior.
190 *
191 * This function can call vnop in file system if the parent vnode
192 * does not exist or when called for hardlinks via volfs path.
193 * If BUILDPATH_NO_FS_ENTER is set in flags, it only uses values present
194 * in the name cache and does not enter the file system.
195 *
196 * If BUILDPATH_CHECK_MOVED is set in flags, we return EAGAIN when
197 * we encounter ENOENT during path reconstruction. ENOENT means that
198 * one of the parents moved while we were building the path. The
199 * caller can special handle this case by calling build_path again.
200 *
201 * If BUILDPATH_VOLUME_RELATIVE is set in flags, we return path
202 * that is relative to the nearest mount point, i.e. do not
203 * cross over mount points during building the path.
204 *
205 * passed in vp must have a valid io_count reference
206 */
207 int
208 build_path(vnode_t first_vp, char *buff, int buflen, int *outlen, int flags, vfs_context_t ctx)
209 {
210 vnode_t vp, tvp;
211 vnode_t vp_with_iocount;
212 vnode_t proc_root_dir_vp;
213 char *end;
214 const char *str;
215 int len;
216 int ret = 0;
217 int fixhardlink;
218
219 if (first_vp == NULLVP)
220 return (EINVAL);
221
222 if (buflen <= 1)
223 return (ENOSPC);
224
225 /*
226 * Grab the process fd so we can evaluate fd_rdir.
227 */
228 if (vfs_context_proc(ctx)->p_fd)
229 proc_root_dir_vp = vfs_context_proc(ctx)->p_fd->fd_rdir;
230 else
231 proc_root_dir_vp = NULL;
232
233 vp_with_iocount = NULLVP;
234 again:
235 vp = first_vp;
236
237 end = &buff[buflen-1];
238 *end = '\0';
239
240 /*
241 * holding the NAME_CACHE_LOCK in shared mode is
242 * sufficient to stabilize both the vp->v_parent chain
243 * and the 'vp->v_mount->mnt_vnodecovered' chain
244 *
245 * if we need to drop this lock, we must first grab the v_id
246 * from the vnode we're currently working with... if that
247 * vnode doesn't already have an io_count reference (the vp
248 * passed in comes with one), we must grab a reference
249 * after we drop the NAME_CACHE_LOCK via vnode_getwithvid...
250 * deadlocks may result if you call vnode_get while holding
251 * the NAME_CACHE_LOCK... we lazily release the reference
252 * we pick up the next time we encounter a need to drop
253 * the NAME_CACHE_LOCK or before we return from this routine
254 */
255 NAME_CACHE_LOCK_SHARED();
256
257 /*
258 * Check if this is the root of a file system.
259 */
260 while (vp && vp->v_flag & VROOT) {
261 if (vp->v_mount == NULL) {
262 ret = EINVAL;
263 goto out_unlock;
264 }
265 if ((vp->v_mount->mnt_flag & MNT_ROOTFS) || (vp == proc_root_dir_vp)) {
266 /*
267 * It's the root of the root file system, so it's
268 * just "/".
269 */
270 *--end = '/';
271
272 goto out_unlock;
273 } else {
274 /*
275 * This the root of the volume and the caller does not
276 * want to cross mount points. Therefore just return
277 * '/' as the relative path.
278 */
279 if (flags & BUILDPATH_VOLUME_RELATIVE) {
280 *--end = '/';
281 goto out_unlock;
282 } else {
283 vp = vp->v_mount->mnt_vnodecovered;
284 }
285 }
286 }
287
288 while ((vp != NULLVP) && (vp->v_parent != vp)) {
289 int vid;
290
291 /*
292 * For hardlinks the v_name may be stale, so if its OK
293 * to enter a file system, ask the file system for the
294 * name and parent (below).
295 */
296 fixhardlink = (vp->v_flag & VISHARDLINK) &&
297 (vp->v_mount->mnt_kern_flag & MNTK_PATH_FROM_ID) &&
298 !(flags & BUILDPATH_NO_FS_ENTER);
299
300 if (!fixhardlink) {
301 str = vp->v_name;
302
303 if (str == NULL || *str == '\0') {
304 if (vp->v_parent != NULL)
305 ret = EINVAL;
306 else
307 ret = ENOENT;
308 goto out_unlock;
309 }
310 len = strlen(str);
311 /*
312 * Check that there's enough space (including space for the '/')
313 */
314 if ((end - buff) < (len + 1)) {
315 ret = ENOSPC;
316 goto out_unlock;
317 }
318 /*
319 * Copy the name backwards.
320 */
321 str += len;
322
323 for (; len > 0; len--)
324 *--end = *--str;
325 /*
326 * Add a path separator.
327 */
328 *--end = '/';
329 }
330
331 /*
332 * Walk up the parent chain.
333 */
334 if (((vp->v_parent != NULLVP) && !fixhardlink) ||
335 (flags & BUILDPATH_NO_FS_ENTER)) {
336
337 /*
338 * In this if () block we are not allowed to enter the filesystem
339 * to conclusively get the most accurate parent identifier.
340 * As a result, if 'vp' does not identify '/' and it
341 * does not have a valid v_parent, then error out
342 * and disallow further path construction
343 */
344 if ((vp->v_parent == NULLVP) && (rootvnode != vp)) {
345 /*
346 * Only '/' is allowed to have a NULL parent
347 * pointer. Upper level callers should ideally
348 * re-drive name lookup on receiving a ENOENT.
349 */
350 ret = ENOENT;
351
352 /* The code below will exit early if 'tvp = vp' == NULL */
353 }
354 vp = vp->v_parent;
355
356 /*
357 * if the vnode we have in hand isn't a directory and it
358 * has a v_parent, then we started with the resource fork
359 * so skip up to avoid getting a duplicate copy of the
360 * file name in the path.
361 */
362 if (vp && !vnode_isdir(vp) && vp->v_parent) {
363 vp = vp->v_parent;
364 }
365 } else {
366 /*
367 * No parent, go get it if supported.
368 */
369 struct vnode_attr va;
370 vnode_t dvp;
371
372 /*
373 * Make sure file system supports obtaining a path from id.
374 */
375 if (!(vp->v_mount->mnt_kern_flag & MNTK_PATH_FROM_ID)) {
376 ret = ENOENT;
377 goto out_unlock;
378 }
379 vid = vp->v_id;
380
381 NAME_CACHE_UNLOCK();
382
383 if (vp != first_vp && vp != vp_with_iocount) {
384 if (vp_with_iocount) {
385 vnode_put(vp_with_iocount);
386 vp_with_iocount = NULLVP;
387 }
388 if (vnode_getwithvid(vp, vid))
389 goto again;
390 vp_with_iocount = vp;
391 }
392 VATTR_INIT(&va);
393 VATTR_WANTED(&va, va_parentid);
394
395 if (fixhardlink) {
396 VATTR_WANTED(&va, va_name);
397 MALLOC_ZONE(va.va_name, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK);
398 } else {
399 va.va_name = NULL;
400 }
401 /*
402 * Ask the file system for its parent id and for its name (optional).
403 */
404 ret = vnode_getattr(vp, &va, ctx);
405
406 if (fixhardlink) {
407 if ((ret == 0) && (VATTR_IS_SUPPORTED(&va, va_name))) {
408 str = va.va_name;
409 vnode_update_identity(vp, NULL, str, strlen(str), 0, VNODE_UPDATE_NAME);
410 } else if (vp->v_name) {
411 str = vp->v_name;
412 ret = 0;
413 } else {
414 ret = ENOENT;
415 goto bad_news;
416 }
417 len = strlen(str);
418
419 /*
420 * Check that there's enough space.
421 */
422 if ((end - buff) < (len + 1)) {
423 ret = ENOSPC;
424 } else {
425 /* Copy the name backwards. */
426 str += len;
427
428 for (; len > 0; len--) {
429 *--end = *--str;
430 }
431 /*
432 * Add a path separator.
433 */
434 *--end = '/';
435 }
436 bad_news:
437 FREE_ZONE(va.va_name, MAXPATHLEN, M_NAMEI);
438 }
439 if (ret || !VATTR_IS_SUPPORTED(&va, va_parentid)) {
440 ret = ENOENT;
441 goto out;
442 }
443 /*
444 * Ask the file system for the parent vnode.
445 */
446 if ((ret = VFS_VGET(vp->v_mount, (ino64_t)va.va_parentid, &dvp, ctx)))
447 goto out;
448
449 if (!fixhardlink && (vp->v_parent != dvp))
450 vnode_update_identity(vp, dvp, NULL, 0, 0, VNODE_UPDATE_PARENT);
451
452 if (vp_with_iocount)
453 vnode_put(vp_with_iocount);
454 vp = dvp;
455 vp_with_iocount = vp;
456
457 NAME_CACHE_LOCK_SHARED();
458
459 /*
460 * if the vnode we have in hand isn't a directory and it
461 * has a v_parent, then we started with the resource fork
462 * so skip up to avoid getting a duplicate copy of the
463 * file name in the path.
464 */
465 if (vp && !vnode_isdir(vp) && vp->v_parent)
466 vp = vp->v_parent;
467 }
468
469 /*
470 * When a mount point is crossed switch the vp.
471 * Continue until we find the root or we find
472 * a vnode that's not the root of a mounted
473 * file system.
474 */
475 tvp = vp;
476
477 while (tvp) {
478 if (tvp == proc_root_dir_vp)
479 goto out_unlock; /* encountered the root */
480
481 if (!(tvp->v_flag & VROOT) || !tvp->v_mount)
482 break; /* not the root of a mounted FS */
483
484 if (flags & BUILDPATH_VOLUME_RELATIVE) {
485 /* Do not cross over mount points */
486 tvp = NULL;
487 } else {
488 tvp = tvp->v_mount->mnt_vnodecovered;
489 }
490 }
491 if (tvp == NULLVP)
492 goto out_unlock;
493 vp = tvp;
494
495 if (vp && (flags & BUILDPATH_CHECKACCESS)) {
496 vid = vp->v_id;
497
498 NAME_CACHE_UNLOCK();
499
500 if (vp != first_vp && vp != vp_with_iocount) {
501 if (vp_with_iocount) {
502 vnode_put(vp_with_iocount);
503 vp_with_iocount = NULLVP;
504 }
505 if (vnode_getwithvid(vp, vid))
506 goto again;
507 vp_with_iocount = vp;
508 }
509 if ((ret = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx)))
510 goto out; /* no peeking */
511
512 NAME_CACHE_LOCK_SHARED();
513 }
514 }
515 out_unlock:
516 NAME_CACHE_UNLOCK();
517 out:
518 if (vp_with_iocount)
519 vnode_put(vp_with_iocount);
520 /*
521 * Slide the name down to the beginning of the buffer.
522 */
523 memmove(buff, end, &buff[buflen] - end);
524
525 /*
526 * length includes the trailing zero byte
527 */
528 *outlen = &buff[buflen] - end;
529
530 /* One of the parents was moved during path reconstruction.
531 * The caller is interested in knowing whether any of the
532 * parents moved via BUILDPATH_CHECK_MOVED, so return EAGAIN.
533 */
534 if ((ret == ENOENT) && (flags & BUILDPATH_CHECK_MOVED)) {
535 ret = EAGAIN;
536 }
537
538 return (ret);
539 }
540
541
542 /*
543 * return NULLVP if vp's parent doesn't
544 * exist, or we can't get a valid iocount
545 * else return the parent of vp
546 */
547 vnode_t
548 vnode_getparent(vnode_t vp)
549 {
550 vnode_t pvp = NULLVP;
551 int pvid;
552
553 NAME_CACHE_LOCK_SHARED();
554 /*
555 * v_parent is stable behind the name_cache lock
556 * however, the only thing we can really guarantee
557 * is that we've grabbed a valid iocount on the
558 * parent of 'vp' at the time we took the name_cache lock...
559 * once we drop the lock, vp could get re-parented
560 */
561 if ( (pvp = vp->v_parent) != NULLVP ) {
562 pvid = pvp->v_id;
563
564 NAME_CACHE_UNLOCK();
565
566 if (vnode_getwithvid(pvp, pvid) != 0)
567 pvp = NULL;
568 } else
569 NAME_CACHE_UNLOCK();
570 return (pvp);
571 }
572
573 const char *
574 vnode_getname(vnode_t vp)
575 {
576 const char *name = NULL;
577
578 NAME_CACHE_LOCK_SHARED();
579
580 if (vp->v_name)
581 name = vfs_addname(vp->v_name, strlen(vp->v_name), 0, 0);
582 NAME_CACHE_UNLOCK();
583
584 return (name);
585 }
586
587 void
588 vnode_putname(const char *name)
589 {
590 vfs_removename(name);
591 }
592
593 static const char unknown_vnodename[] = "(unknown vnode name)";
594
595 const char *
596 vnode_getname_printable(vnode_t vp)
597 {
598 const char *name = vnode_getname(vp);
599 if (name != NULL)
600 return name;
601
602 switch (vp->v_type) {
603 case VCHR:
604 case VBLK:
605 {
606 /*
607 * Create an artificial dev name from
608 * major and minor device number
609 */
610 char dev_name[64];
611 (void) snprintf(dev_name, sizeof(dev_name),
612 "%c(%u, %u)", VCHR == vp->v_type ? 'c':'b',
613 major(vp->v_rdev), minor(vp->v_rdev));
614 /*
615 * Add the newly created dev name to the name
616 * cache to allow easier cleanup. Also,
617 * vfs_addname allocates memory for the new name
618 * and returns it.
619 */
620 NAME_CACHE_LOCK_SHARED();
621 name = vfs_addname(dev_name, strlen(dev_name), 0, 0);
622 NAME_CACHE_UNLOCK();
623 return name;
624 }
625 default:
626 return unknown_vnodename;
627 }
628 }
629
630 void
631 vnode_putname_printable(const char *name)
632 {
633 if (name == unknown_vnodename)
634 return;
635 vnode_putname(name);
636 }
637
638
639 /*
640 * if VNODE_UPDATE_PARENT, and we can take
641 * a reference on dvp, then update vp with
642 * it's new parent... if vp already has a parent,
643 * then drop the reference vp held on it
644 *
645 * if VNODE_UPDATE_NAME,
646 * then drop string ref on v_name if it exists, and if name is non-NULL
647 * then pick up a string reference on name and record it in v_name...
648 * optionally pass in the length and hashval of name if known
649 *
650 * if VNODE_UPDATE_CACHE, flush the name cache entries associated with vp
651 */
652 void
653 vnode_update_identity(vnode_t vp, vnode_t dvp, const char *name, int name_len, uint32_t name_hashval, int flags)
654 {
655 struct namecache *ncp;
656 vnode_t old_parentvp = NULLVP;
657 #if NAMEDSTREAMS
658 int isstream = (vp->v_flag & VISNAMEDSTREAM);
659 int kusecountbumped = 0;
660 #endif
661 kauth_cred_t tcred = NULL;
662 const char *vname = NULL;
663 const char *tname = NULL;
664
665 if (flags & VNODE_UPDATE_PARENT) {
666 if (dvp && vnode_ref(dvp) != 0) {
667 dvp = NULLVP;
668 }
669 #if NAMEDSTREAMS
670 /* Don't count a stream's parent ref during unmounts */
671 if (isstream && dvp && (dvp != vp) && (dvp != vp->v_parent) && (dvp->v_type == VREG)) {
672 vnode_lock_spin(dvp);
673 ++dvp->v_kusecount;
674 kusecountbumped = 1;
675 vnode_unlock(dvp);
676 }
677 #endif
678 } else {
679 dvp = NULLVP;
680 }
681 if ( (flags & VNODE_UPDATE_NAME) ) {
682 if (name != vp->v_name) {
683 if (name && *name) {
684 if (name_len == 0)
685 name_len = strlen(name);
686 tname = vfs_addname(name, name_len, name_hashval, 0);
687 }
688 } else
689 flags &= ~VNODE_UPDATE_NAME;
690 }
691 if ( (flags & (VNODE_UPDATE_PURGE | VNODE_UPDATE_PARENT | VNODE_UPDATE_CACHE | VNODE_UPDATE_NAME)) ) {
692
693 NAME_CACHE_LOCK();
694
695 if ( (flags & VNODE_UPDATE_PURGE) ) {
696
697 if (vp->v_parent)
698 vp->v_parent->v_nc_generation++;
699
700 while ( (ncp = LIST_FIRST(&vp->v_nclinks)) )
701 cache_delete(ncp, 1);
702
703 while ( (ncp = LIST_FIRST(&vp->v_ncchildren)) )
704 cache_delete(ncp, 1);
705
706 /*
707 * Use a temp variable to avoid kauth_cred_unref() while NAME_CACHE_LOCK is held
708 */
709 tcred = vp->v_cred;
710 vp->v_cred = NOCRED;
711 vp->v_authorized_actions = 0;
712 }
713 if ( (flags & VNODE_UPDATE_NAME) ) {
714 vname = vp->v_name;
715 vp->v_name = tname;
716 }
717 if (flags & VNODE_UPDATE_PARENT) {
718 if (dvp != vp && dvp != vp->v_parent) {
719 old_parentvp = vp->v_parent;
720 vp->v_parent = dvp;
721 dvp = NULLVP;
722
723 if (old_parentvp)
724 flags |= VNODE_UPDATE_CACHE;
725 }
726 }
727 if (flags & VNODE_UPDATE_CACHE) {
728 while ( (ncp = LIST_FIRST(&vp->v_nclinks)) )
729 cache_delete(ncp, 1);
730 }
731 NAME_CACHE_UNLOCK();
732
733 if (vname != NULL)
734 vfs_removename(vname);
735
736 if (IS_VALID_CRED(tcred))
737 kauth_cred_unref(&tcred);
738 }
739 if (dvp != NULLVP) {
740 #if NAMEDSTREAMS
741 /* Back-out the ref we took if we lost a race for vp->v_parent. */
742 if (kusecountbumped) {
743 vnode_lock_spin(dvp);
744 if (dvp->v_kusecount > 0)
745 --dvp->v_kusecount;
746 vnode_unlock(dvp);
747 }
748 #endif
749 vnode_rele(dvp);
750 }
751 if (old_parentvp) {
752 struct uthread *ut;
753
754 #if NAMEDSTREAMS
755 if (isstream) {
756 vnode_lock_spin(old_parentvp);
757 if ((old_parentvp->v_type != VDIR) && (old_parentvp->v_kusecount > 0))
758 --old_parentvp->v_kusecount;
759 vnode_unlock(old_parentvp);
760 }
761 #endif
762 ut = get_bsdthread_info(current_thread());
763
764 /*
765 * indicated to vnode_rele that it shouldn't do a
766 * vnode_reclaim at this time... instead it will
767 * chain the vnode to the uu_vreclaims list...
768 * we'll be responsible for calling vnode_reclaim
769 * on each of the vnodes in this list...
770 */
771 ut->uu_defer_reclaims = 1;
772 ut->uu_vreclaims = NULLVP;
773
774 while ( (vp = old_parentvp) != NULLVP ) {
775
776 vnode_lock_spin(vp);
777 vnode_rele_internal(vp, 0, 0, 1);
778
779 /*
780 * check to see if the vnode is now in the state
781 * that would have triggered a vnode_reclaim in vnode_rele
782 * if it is, we save it's parent pointer and then NULL
783 * out the v_parent field... we'll drop the reference
784 * that was held on the next iteration of this loop...
785 * this short circuits a potential deep recursion if we
786 * have a long chain of parents in this state...
787 * we'll sit in this loop until we run into
788 * a parent in this chain that is not in this state
789 *
790 * make our check and the vnode_rele atomic
791 * with respect to the current vnode we're working on
792 * by holding the vnode lock
793 * if vnode_rele deferred the vnode_reclaim and has put
794 * this vnode on the list to be reaped by us, than
795 * it has left this vnode with an iocount == 1
796 */
797 if ( (vp->v_iocount == 1) && (vp->v_usecount == 0) &&
798 ((vp->v_lflag & (VL_MARKTERM | VL_TERMINATE | VL_DEAD)) == VL_MARKTERM)) {
799 /*
800 * vnode_rele wanted to do a vnode_reclaim on this vnode
801 * it should be sitting on the head of the uu_vreclaims chain
802 * pull the parent pointer now so that when we do the
803 * vnode_reclaim for each of the vnodes in the uu_vreclaims
804 * list, we won't recurse back through here
805 *
806 * need to do a convert here in case vnode_rele_internal
807 * returns with the lock held in the spin mode... it
808 * can drop and retake the lock under certain circumstances
809 */
810 vnode_lock_convert(vp);
811
812 NAME_CACHE_LOCK();
813 old_parentvp = vp->v_parent;
814 vp->v_parent = NULLVP;
815 NAME_CACHE_UNLOCK();
816 } else {
817 /*
818 * we're done... we ran into a vnode that isn't
819 * being terminated
820 */
821 old_parentvp = NULLVP;
822 }
823 vnode_unlock(vp);
824 }
825 ut->uu_defer_reclaims = 0;
826
827 while ( (vp = ut->uu_vreclaims) != NULLVP) {
828 ut->uu_vreclaims = vp->v_defer_reclaimlist;
829
830 /*
831 * vnode_put will drive the vnode_reclaim if
832 * we are still the only reference on this vnode
833 */
834 vnode_put(vp);
835 }
836 }
837 }
838
839
840 /*
841 * Mark a vnode as having multiple hard links. HFS makes use of this
842 * because it keeps track of each link separately, and wants to know
843 * which link was actually used.
844 *
845 * This will cause the name cache to force a VNOP_LOOKUP on the vnode
846 * so that HFS can post-process the lookup. Also, volfs will call
847 * VNOP_GETATTR2 to determine the parent, instead of using v_parent.
848 */
849 void vnode_setmultipath(vnode_t vp)
850 {
851 vnode_lock_spin(vp);
852
853 /*
854 * In theory, we're changing the vnode's identity as far as the
855 * name cache is concerned, so we ought to grab the name cache lock
856 * here. However, there is already a race, and grabbing the name
857 * cache lock only makes the race window slightly smaller.
858 *
859 * The race happens because the vnode already exists in the name
860 * cache, and could be found by one thread before another thread
861 * can set the hard link flag.
862 */
863
864 vp->v_flag |= VISHARDLINK;
865
866 vnode_unlock(vp);
867 }
868
869
870
871 /*
872 * backwards compatibility
873 */
874 void vnode_uncache_credentials(vnode_t vp)
875 {
876 vnode_uncache_authorized_action(vp, KAUTH_INVALIDATE_CACHED_RIGHTS);
877 }
878
879
880 /*
881 * use the exclusive form of NAME_CACHE_LOCK to protect the update of the
882 * following fields in the vnode: v_cred_timestamp, v_cred, v_authorized_actions
883 * we use this lock so that we can look at the v_cred and v_authorized_actions
884 * atomically while behind the NAME_CACHE_LOCK in shared mode in 'cache_lookup_path',
885 * which is the super-hot path... if we are updating the authorized actions for this
886 * vnode, we are already in the super-slow and far less frequented path so its not
887 * that bad that we take the lock exclusive for this case... of course we strive
888 * to hold it for the minimum amount of time possible
889 */
890
891 void vnode_uncache_authorized_action(vnode_t vp, kauth_action_t action)
892 {
893 kauth_cred_t tcred = NOCRED;
894
895 NAME_CACHE_LOCK();
896
897 vp->v_authorized_actions &= ~action;
898
899 if (action == KAUTH_INVALIDATE_CACHED_RIGHTS &&
900 IS_VALID_CRED(vp->v_cred)) {
901 /*
902 * Use a temp variable to avoid kauth_cred_unref() while NAME_CACHE_LOCK is held
903 */
904 tcred = vp->v_cred;
905 vp->v_cred = NOCRED;
906 }
907 NAME_CACHE_UNLOCK();
908
909 if (tcred != NOCRED)
910 kauth_cred_unref(&tcred);
911 }
912
913
914 extern int bootarg_vnode_cache_defeat; /* default = 0, from bsd_init.c */
915
916 boolean_t
917 vnode_cache_is_authorized(vnode_t vp, vfs_context_t ctx, kauth_action_t action)
918 {
919 kauth_cred_t ucred;
920 boolean_t retval = FALSE;
921
922 /* Boot argument to defeat rights caching */
923 if (bootarg_vnode_cache_defeat)
924 return FALSE;
925
926 if ( (vp->v_mount->mnt_kern_flag & (MNTK_AUTH_OPAQUE | MNTK_AUTH_CACHE_TTL)) ) {
927 /*
928 * a TTL is enabled on the rights cache... handle it here
929 * a TTL of 0 indicates that no rights should be cached
930 */
931 if (vp->v_mount->mnt_authcache_ttl) {
932 if ( !(vp->v_mount->mnt_kern_flag & MNTK_AUTH_CACHE_TTL) ) {
933 /*
934 * For filesystems marked only MNTK_AUTH_OPAQUE (generally network ones),
935 * we will only allow a SEARCH right on a directory to be cached...
936 * that cached right always has a default TTL associated with it
937 */
938 if (action != KAUTH_VNODE_SEARCH || vp->v_type != VDIR)
939 vp = NULLVP;
940 }
941 if (vp != NULLVP && vnode_cache_is_stale(vp) == TRUE) {
942 vnode_uncache_authorized_action(vp, vp->v_authorized_actions);
943 vp = NULLVP;
944 }
945 } else
946 vp = NULLVP;
947 }
948 if (vp != NULLVP) {
949 ucred = vfs_context_ucred(ctx);
950
951 NAME_CACHE_LOCK_SHARED();
952
953 if (vp->v_cred == ucred && (vp->v_authorized_actions & action) == action)
954 retval = TRUE;
955
956 NAME_CACHE_UNLOCK();
957 }
958 return retval;
959 }
960
961
962 void vnode_cache_authorized_action(vnode_t vp, vfs_context_t ctx, kauth_action_t action)
963 {
964 kauth_cred_t tcred = NOCRED;
965 kauth_cred_t ucred;
966 struct timeval tv;
967 boolean_t ttl_active = FALSE;
968
969 ucred = vfs_context_ucred(ctx);
970
971 if (!IS_VALID_CRED(ucred) || action == 0)
972 return;
973
974 if ( (vp->v_mount->mnt_kern_flag & (MNTK_AUTH_OPAQUE | MNTK_AUTH_CACHE_TTL)) ) {
975 /*
976 * a TTL is enabled on the rights cache... handle it here
977 * a TTL of 0 indicates that no rights should be cached
978 */
979 if (vp->v_mount->mnt_authcache_ttl == 0)
980 return;
981
982 if ( !(vp->v_mount->mnt_kern_flag & MNTK_AUTH_CACHE_TTL) ) {
983 /*
984 * only cache SEARCH action for filesystems marked
985 * MNTK_AUTH_OPAQUE on VDIRs...
986 * the lookup_path code will time these out
987 */
988 if ( (action & ~KAUTH_VNODE_SEARCH) || vp->v_type != VDIR )
989 return;
990 }
991 ttl_active = TRUE;
992
993 microuptime(&tv);
994 }
995 NAME_CACHE_LOCK();
996
997 if (vp->v_cred != ucred) {
998 kauth_cred_ref(ucred);
999 /*
1000 * Use a temp variable to avoid kauth_cred_unref() while NAME_CACHE_LOCK is held
1001 */
1002 tcred = vp->v_cred;
1003 vp->v_cred = ucred;
1004 vp->v_authorized_actions = 0;
1005 }
1006 if (ttl_active == TRUE && vp->v_authorized_actions == 0) {
1007 /*
1008 * only reset the timestamnp on the
1009 * first authorization cached after the previous
1010 * timer has expired or we're switching creds...
1011 * 'vnode_cache_is_authorized' will clear the
1012 * authorized actions if the TTL is active and
1013 * it has expired
1014 */
1015 vp->v_cred_timestamp = tv.tv_sec;
1016 }
1017 vp->v_authorized_actions |= action;
1018
1019 NAME_CACHE_UNLOCK();
1020
1021 if (IS_VALID_CRED(tcred))
1022 kauth_cred_unref(&tcred);
1023 }
1024
1025
1026 boolean_t vnode_cache_is_stale(vnode_t vp)
1027 {
1028 struct timeval tv;
1029 boolean_t retval;
1030
1031 microuptime(&tv);
1032
1033 if ((tv.tv_sec - vp->v_cred_timestamp) > vp->v_mount->mnt_authcache_ttl)
1034 retval = TRUE;
1035 else
1036 retval = FALSE;
1037
1038 return retval;
1039 }
1040
1041
1042
1043 /*
1044 * Returns: 0 Success
1045 * ERECYCLE vnode was recycled from underneath us. Force lookup to be re-driven from namei.
1046 * This errno value should not be seen by anyone outside of the kernel.
1047 */
1048 int
1049 cache_lookup_path(struct nameidata *ndp, struct componentname *cnp, vnode_t dp,
1050 vfs_context_t ctx, int *dp_authorized, vnode_t last_dp)
1051 {
1052 char *cp; /* pointer into pathname argument */
1053 int vid;
1054 int vvid = 0; /* protected by vp != NULLVP */
1055 vnode_t vp = NULLVP;
1056 vnode_t tdp = NULLVP;
1057 kauth_cred_t ucred;
1058 boolean_t ttl_enabled = FALSE;
1059 struct timeval tv;
1060 mount_t mp;
1061 unsigned int hash;
1062 int error = 0;
1063
1064 #if CONFIG_TRIGGERS
1065 vnode_t trigger_vp;
1066 #endif /* CONFIG_TRIGGERS */
1067
1068 ucred = vfs_context_ucred(ctx);
1069 ndp->ni_flag &= ~(NAMEI_TRAILINGSLASH);
1070
1071 NAME_CACHE_LOCK_SHARED();
1072
1073 if ( dp->v_mount && (dp->v_mount->mnt_kern_flag & (MNTK_AUTH_OPAQUE | MNTK_AUTH_CACHE_TTL)) ) {
1074 ttl_enabled = TRUE;
1075 microuptime(&tv);
1076 }
1077 for (;;) {
1078 /*
1079 * Search a directory.
1080 *
1081 * The cn_hash value is for use by cache_lookup
1082 * The last component of the filename is left accessible via
1083 * cnp->cn_nameptr for callers that need the name.
1084 */
1085 hash = 0;
1086 cp = cnp->cn_nameptr;
1087
1088 while (*cp && (*cp != '/')) {
1089 hash = crc32tab[((hash >> 24) ^ (unsigned char)*cp++)] ^ hash << 8;
1090 }
1091 /*
1092 * the crc generator can legitimately generate
1093 * a 0... however, 0 for us means that we
1094 * haven't computed a hash, so use 1 instead
1095 */
1096 if (hash == 0)
1097 hash = 1;
1098 cnp->cn_hash = hash;
1099 cnp->cn_namelen = cp - cnp->cn_nameptr;
1100
1101 ndp->ni_pathlen -= cnp->cn_namelen;
1102 ndp->ni_next = cp;
1103
1104 /*
1105 * Replace multiple slashes by a single slash and trailing slashes
1106 * by a null. This must be done before VNOP_LOOKUP() because some
1107 * fs's don't know about trailing slashes. Remember if there were
1108 * trailing slashes to handle symlinks, existing non-directories
1109 * and non-existing files that won't be directories specially later.
1110 */
1111 while (*cp == '/' && (cp[1] == '/' || cp[1] == '\0')) {
1112 cp++;
1113 ndp->ni_pathlen--;
1114
1115 if (*cp == '\0') {
1116 ndp->ni_flag |= NAMEI_TRAILINGSLASH;
1117 *ndp->ni_next = '\0';
1118 }
1119 }
1120 ndp->ni_next = cp;
1121
1122 cnp->cn_flags &= ~(MAKEENTRY | ISLASTCN | ISDOTDOT);
1123
1124 if (*cp == '\0')
1125 cnp->cn_flags |= ISLASTCN;
1126
1127 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.')
1128 cnp->cn_flags |= ISDOTDOT;
1129
1130 *dp_authorized = 0;
1131 #if NAMEDRSRCFORK
1132 /*
1133 * Process a request for a file's resource fork.
1134 *
1135 * Consume the _PATH_RSRCFORKSPEC suffix and tag the path.
1136 */
1137 if ((ndp->ni_pathlen == sizeof(_PATH_RSRCFORKSPEC)) &&
1138 (cp[1] == '.' && cp[2] == '.') &&
1139 bcmp(cp, _PATH_RSRCFORKSPEC, sizeof(_PATH_RSRCFORKSPEC)) == 0) {
1140 /* Skip volfs file systems that don't support native streams. */
1141 if ((dp->v_mount != NULL) &&
1142 (dp->v_mount->mnt_flag & MNT_DOVOLFS) &&
1143 (dp->v_mount->mnt_kern_flag & MNTK_NAMED_STREAMS) == 0) {
1144 goto skiprsrcfork;
1145 }
1146 cnp->cn_flags |= CN_WANTSRSRCFORK;
1147 cnp->cn_flags |= ISLASTCN;
1148 ndp->ni_next[0] = '\0';
1149 ndp->ni_pathlen = 1;
1150 }
1151 skiprsrcfork:
1152 #endif
1153
1154 #if CONFIG_MACF
1155
1156 /*
1157 * Name cache provides authorization caching (see below)
1158 * that will short circuit MAC checks in lookup().
1159 * We must perform MAC check here. On denial
1160 * dp_authorized will remain 0 and second check will
1161 * be perfomed in lookup().
1162 */
1163 if (!(cnp->cn_flags & DONOTAUTH)) {
1164 error = mac_vnode_check_lookup(ctx, dp, cnp);
1165 if (error) {
1166 NAME_CACHE_UNLOCK();
1167 goto errorout;
1168 }
1169 }
1170 #endif /* MAC */
1171 if (ttl_enabled && ((tv.tv_sec - dp->v_cred_timestamp) > dp->v_mount->mnt_authcache_ttl))
1172 break;
1173
1174 /*
1175 * NAME_CACHE_LOCK holds these fields stable
1176 *
1177 * We can't cache KAUTH_VNODE_SEARCHBYANYONE for root correctly
1178 * so we make an ugly check for root here. root is always
1179 * allowed and breaking out of here only to find out that is
1180 * authorized by virtue of being root is very very expensive.
1181 */
1182 if ((dp->v_cred != ucred || !(dp->v_authorized_actions & KAUTH_VNODE_SEARCH)) &&
1183 !(dp->v_authorized_actions & KAUTH_VNODE_SEARCHBYANYONE) &&
1184 !vfs_context_issuser(ctx))
1185 break;
1186
1187 /*
1188 * indicate that we're allowed to traverse this directory...
1189 * even if we fail the cache lookup or decide to bail for
1190 * some other reason, this information is valid and is used
1191 * to avoid doing a vnode_authorize before the call to VNOP_LOOKUP
1192 */
1193 *dp_authorized = 1;
1194
1195 if ( (cnp->cn_flags & (ISLASTCN | ISDOTDOT)) ) {
1196 if (cnp->cn_nameiop != LOOKUP)
1197 break;
1198 if (cnp->cn_flags & LOCKPARENT)
1199 break;
1200 if (cnp->cn_flags & NOCACHE)
1201 break;
1202 if (cnp->cn_flags & ISDOTDOT) {
1203 /*
1204 * Force directory hardlinks to go to
1205 * file system for ".." requests.
1206 */
1207 if (dp && (dp->v_flag & VISHARDLINK)) {
1208 break;
1209 }
1210 /*
1211 * Quit here only if we can't use
1212 * the parent directory pointer or
1213 * don't have one. Otherwise, we'll
1214 * use it below.
1215 */
1216 if ((dp->v_flag & VROOT) ||
1217 dp == ndp->ni_rootdir ||
1218 dp->v_parent == NULLVP)
1219 break;
1220 }
1221 }
1222
1223 if ((cnp->cn_flags & CN_SKIPNAMECACHE)) {
1224 /*
1225 * Force lookup to go to the filesystem with
1226 * all cnp fields set up.
1227 */
1228 break;
1229 }
1230
1231 /*
1232 * "." and ".." aren't supposed to be cached, so check
1233 * for them before checking the cache.
1234 */
1235 if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.')
1236 vp = dp;
1237 else if ( (cnp->cn_flags & ISDOTDOT) )
1238 vp = dp->v_parent;
1239 else {
1240 if ( (vp = cache_lookup_locked(dp, cnp)) == NULLVP)
1241 break;
1242
1243 if ( (vp->v_flag & VISHARDLINK) ) {
1244 /*
1245 * The file system wants a VNOP_LOOKUP on this vnode
1246 */
1247 vp = NULL;
1248 break;
1249 }
1250 }
1251 if ( (cnp->cn_flags & ISLASTCN) )
1252 break;
1253
1254 if (vp->v_type != VDIR) {
1255 if (vp->v_type != VLNK)
1256 vp = NULL;
1257 break;
1258 }
1259
1260 if ( (mp = vp->v_mountedhere) && ((cnp->cn_flags & NOCROSSMOUNT) == 0)) {
1261 vnode_t tmp_vp = mp->mnt_realrootvp;
1262 if (tmp_vp == NULLVP || mp->mnt_generation != mount_generation ||
1263 mp->mnt_realrootvp_vid != tmp_vp->v_id)
1264 break;
1265 vp = tmp_vp;
1266 }
1267
1268 #if CONFIG_TRIGGERS
1269 /*
1270 * After traversing all mountpoints stacked here, if we have a
1271 * trigger in hand, resolve it. Note that we don't need to
1272 * leave the fast path if the mount has already happened.
1273 */
1274 if (vp->v_resolve)
1275 break;
1276 #endif /* CONFIG_TRIGGERS */
1277
1278
1279 dp = vp;
1280 vp = NULLVP;
1281
1282 cnp->cn_nameptr = ndp->ni_next + 1;
1283 ndp->ni_pathlen--;
1284 while (*cnp->cn_nameptr == '/') {
1285 cnp->cn_nameptr++;
1286 ndp->ni_pathlen--;
1287 }
1288 }
1289 if (vp != NULLVP)
1290 vvid = vp->v_id;
1291 vid = dp->v_id;
1292
1293 NAME_CACHE_UNLOCK();
1294
1295 if ((vp != NULLVP) && (vp->v_type != VLNK) &&
1296 ((cnp->cn_flags & (ISLASTCN | LOCKPARENT | WANTPARENT | SAVESTART)) == ISLASTCN)) {
1297 /*
1298 * if we've got a child and it's the last component, and
1299 * the lookup doesn't need to return the parent then we
1300 * can skip grabbing an iocount on the parent, since all
1301 * we're going to do with it is a vnode_put just before
1302 * we return from 'lookup'. If it's a symbolic link,
1303 * we need the parent in case the link happens to be
1304 * a relative pathname.
1305 */
1306 tdp = dp;
1307 dp = NULLVP;
1308 } else {
1309 need_dp:
1310 /*
1311 * return the last directory we looked at
1312 * with an io reference held. If it was the one passed
1313 * in as a result of the last iteration of VNOP_LOOKUP,
1314 * it should already hold an io ref. No need to increase ref.
1315 */
1316 if (last_dp != dp){
1317
1318 if (dp == ndp->ni_usedvp) {
1319 /*
1320 * if this vnode matches the one passed in via USEDVP
1321 * than this context already holds an io_count... just
1322 * use vnode_get to get an extra ref for lookup to play
1323 * with... can't use the getwithvid variant here because
1324 * it will block behind a vnode_drain which would result
1325 * in a deadlock (since we already own an io_count that the
1326 * vnode_drain is waiting on)... vnode_get grabs the io_count
1327 * immediately w/o waiting... it always succeeds
1328 */
1329 vnode_get(dp);
1330 } else if ((error = vnode_getwithvid_drainok(dp, vid))) {
1331 /*
1332 * failure indicates the vnode
1333 * changed identity or is being
1334 * TERMINATED... in either case
1335 * punt this lookup.
1336 *
1337 * don't necessarily return ENOENT, though, because
1338 * we really want to go back to disk and make sure it's
1339 * there or not if someone else is changing this
1340 * vnode. That being said, the one case where we do want
1341 * to return ENOENT is when the vnode's mount point is
1342 * in the process of unmounting and we might cause a deadlock
1343 * in our attempt to take an iocount. An ENODEV error return
1344 * is from vnode_get* is an indication this but we change that
1345 * ENOENT for upper layers.
1346 */
1347 if (error == ENODEV) {
1348 error = ENOENT;
1349 } else {
1350 error = ERECYCLE;
1351 }
1352 goto errorout;
1353 }
1354 }
1355 }
1356 if (vp != NULLVP) {
1357 if ( (vnode_getwithvid_drainok(vp, vvid)) ) {
1358 vp = NULLVP;
1359
1360 /*
1361 * can't get reference on the vp we'd like
1362 * to return... if we didn't grab a reference
1363 * on the directory (due to fast path bypass),
1364 * then we need to do it now... we can't return
1365 * with both ni_dvp and ni_vp NULL, and no
1366 * error condition
1367 */
1368 if (dp == NULLVP) {
1369 dp = tdp;
1370 goto need_dp;
1371 }
1372 }
1373 }
1374
1375 ndp->ni_dvp = dp;
1376 ndp->ni_vp = vp;
1377
1378 #if CONFIG_TRIGGERS
1379 trigger_vp = vp ? vp : dp;
1380 if ((error == 0) && (trigger_vp != NULLVP) && vnode_isdir(trigger_vp)) {
1381 error = vnode_trigger_resolve(trigger_vp, ndp, ctx);
1382 if (error) {
1383 if (vp)
1384 vnode_put(vp);
1385 if (dp)
1386 vnode_put(dp);
1387 goto errorout;
1388 }
1389 }
1390 #endif /* CONFIG_TRIGGERS */
1391
1392 errorout:
1393 /*
1394 * If we came into cache_lookup_path after an iteration of the lookup loop that
1395 * resulted in a call to VNOP_LOOKUP, then VNOP_LOOKUP returned a vnode with a io ref
1396 * on it. It is now the job of cache_lookup_path to drop the ref on this vnode
1397 * when it is no longer needed. If we get to this point, and last_dp is not NULL
1398 * and it is ALSO not the dvp we want to return to caller of this function, it MUST be
1399 * the case that we got to a subsequent path component and this previous vnode is
1400 * no longer needed. We can then drop the io ref on it.
1401 */
1402 if ((last_dp != NULLVP) && (last_dp != ndp->ni_dvp)){
1403 vnode_put(last_dp);
1404 }
1405
1406 //initialized to 0, should be the same if no error cases occurred.
1407 return error;
1408 }
1409
1410
1411 static vnode_t
1412 cache_lookup_locked(vnode_t dvp, struct componentname *cnp)
1413 {
1414 struct namecache *ncp;
1415 struct nchashhead *ncpp;
1416 long namelen = cnp->cn_namelen;
1417 unsigned int hashval = cnp->cn_hash;
1418
1419 if (nc_disabled) {
1420 return NULL;
1421 }
1422
1423 ncpp = NCHHASH(dvp, cnp->cn_hash);
1424 LIST_FOREACH(ncp, ncpp, nc_hash) {
1425 if ((ncp->nc_dvp == dvp) && (ncp->nc_hashval == hashval)) {
1426 if (memcmp(ncp->nc_name, cnp->cn_nameptr, namelen) == 0 && ncp->nc_name[namelen] == 0)
1427 break;
1428 }
1429 }
1430 if (ncp == 0) {
1431 /*
1432 * We failed to find an entry
1433 */
1434 NCHSTAT(ncs_miss);
1435 return (NULL);
1436 }
1437 NCHSTAT(ncs_goodhits);
1438
1439 return (ncp->nc_vp);
1440 }
1441
1442
1443 unsigned int hash_string(const char *cp, int len);
1444 //
1445 // Have to take a len argument because we may only need to
1446 // hash part of a componentname.
1447 //
1448 unsigned int
1449 hash_string(const char *cp, int len)
1450 {
1451 unsigned hash = 0;
1452
1453 if (len) {
1454 while (len--) {
1455 hash = crc32tab[((hash >> 24) ^ (unsigned char)*cp++)] ^ hash << 8;
1456 }
1457 } else {
1458 while (*cp != '\0') {
1459 hash = crc32tab[((hash >> 24) ^ (unsigned char)*cp++)] ^ hash << 8;
1460 }
1461 }
1462 /*
1463 * the crc generator can legitimately generate
1464 * a 0... however, 0 for us means that we
1465 * haven't computed a hash, so use 1 instead
1466 */
1467 if (hash == 0)
1468 hash = 1;
1469 return hash;
1470 }
1471
1472
1473 /*
1474 * Lookup an entry in the cache
1475 *
1476 * We don't do this if the segment name is long, simply so the cache
1477 * can avoid holding long names (which would either waste space, or
1478 * add greatly to the complexity).
1479 *
1480 * Lookup is called with dvp pointing to the directory to search,
1481 * cnp pointing to the name of the entry being sought. If the lookup
1482 * succeeds, the vnode is returned in *vpp, and a status of -1 is
1483 * returned. If the lookup determines that the name does not exist
1484 * (negative cacheing), a status of ENOENT is returned. If the lookup
1485 * fails, a status of zero is returned.
1486 */
1487
1488 int
1489 cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
1490 {
1491 struct namecache *ncp;
1492 struct nchashhead *ncpp;
1493 long namelen = cnp->cn_namelen;
1494 unsigned int hashval;
1495 boolean_t have_exclusive = FALSE;
1496 uint32_t vid;
1497 vnode_t vp;
1498
1499 if (cnp->cn_hash == 0)
1500 cnp->cn_hash = hash_string(cnp->cn_nameptr, cnp->cn_namelen);
1501 hashval = cnp->cn_hash;
1502
1503 if (nc_disabled) {
1504 return 0;
1505 }
1506
1507 NAME_CACHE_LOCK_SHARED();
1508
1509 relook:
1510 ncpp = NCHHASH(dvp, cnp->cn_hash);
1511 LIST_FOREACH(ncp, ncpp, nc_hash) {
1512 if ((ncp->nc_dvp == dvp) && (ncp->nc_hashval == hashval)) {
1513 if (memcmp(ncp->nc_name, cnp->cn_nameptr, namelen) == 0 && ncp->nc_name[namelen] == 0)
1514 break;
1515 }
1516 }
1517 /* We failed to find an entry */
1518 if (ncp == 0) {
1519 NCHSTAT(ncs_miss);
1520 NAME_CACHE_UNLOCK();
1521 return (0);
1522 }
1523
1524 /* We don't want to have an entry, so dump it */
1525 if ((cnp->cn_flags & MAKEENTRY) == 0) {
1526 if (have_exclusive == TRUE) {
1527 NCHSTAT(ncs_badhits);
1528 cache_delete(ncp, 1);
1529 NAME_CACHE_UNLOCK();
1530 return (0);
1531 }
1532 NAME_CACHE_UNLOCK();
1533 NAME_CACHE_LOCK();
1534 have_exclusive = TRUE;
1535 goto relook;
1536 }
1537 vp = ncp->nc_vp;
1538
1539 /* We found a "positive" match, return the vnode */
1540 if (vp) {
1541 NCHSTAT(ncs_goodhits);
1542
1543 vid = vp->v_id;
1544 NAME_CACHE_UNLOCK();
1545
1546 if (vnode_getwithvid(vp, vid)) {
1547 #if COLLECT_STATS
1548 NAME_CACHE_LOCK();
1549 NCHSTAT(ncs_badvid);
1550 NAME_CACHE_UNLOCK();
1551 #endif
1552 return (0);
1553 }
1554 *vpp = vp;
1555 return (-1);
1556 }
1557
1558 /* We found a negative match, and want to create it, so purge */
1559 if (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) {
1560 if (have_exclusive == TRUE) {
1561 NCHSTAT(ncs_badhits);
1562 cache_delete(ncp, 1);
1563 NAME_CACHE_UNLOCK();
1564 return (0);
1565 }
1566 NAME_CACHE_UNLOCK();
1567 NAME_CACHE_LOCK();
1568 have_exclusive = TRUE;
1569 goto relook;
1570 }
1571
1572 /*
1573 * We found a "negative" match, ENOENT notifies client of this match.
1574 */
1575 NCHSTAT(ncs_neghits);
1576
1577 NAME_CACHE_UNLOCK();
1578 return (ENOENT);
1579 }
1580
1581 const char *
1582 cache_enter_create(vnode_t dvp, vnode_t vp, struct componentname *cnp)
1583 {
1584 const char *strname;
1585
1586 if (cnp->cn_hash == 0)
1587 cnp->cn_hash = hash_string(cnp->cn_nameptr, cnp->cn_namelen);
1588
1589 /*
1590 * grab 2 references on the string entered
1591 * one for the cache_enter_locked to consume
1592 * and the second to be consumed by v_name (vnode_create call point)
1593 */
1594 strname = add_name_internal(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, TRUE, 0);
1595
1596 NAME_CACHE_LOCK();
1597
1598 cache_enter_locked(dvp, vp, cnp, strname);
1599
1600 NAME_CACHE_UNLOCK();
1601
1602 return (strname);
1603 }
1604
1605
1606 /*
1607 * Add an entry to the cache...
1608 * but first check to see if the directory
1609 * that this entry is to be associated with has
1610 * had any cache_purges applied since we took
1611 * our identity snapshot... this check needs to
1612 * be done behind the name cache lock
1613 */
1614 void
1615 cache_enter_with_gen(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, int gen)
1616 {
1617
1618 if (cnp->cn_hash == 0)
1619 cnp->cn_hash = hash_string(cnp->cn_nameptr, cnp->cn_namelen);
1620
1621 NAME_CACHE_LOCK();
1622
1623 if (dvp->v_nc_generation == gen)
1624 (void)cache_enter_locked(dvp, vp, cnp, NULL);
1625
1626 NAME_CACHE_UNLOCK();
1627 }
1628
1629
1630 /*
1631 * Add an entry to the cache.
1632 */
1633 void
1634 cache_enter(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
1635 {
1636 const char *strname;
1637
1638 if (cnp->cn_hash == 0)
1639 cnp->cn_hash = hash_string(cnp->cn_nameptr, cnp->cn_namelen);
1640
1641 /*
1642 * grab 1 reference on the string entered
1643 * for the cache_enter_locked to consume
1644 */
1645 strname = add_name_internal(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, FALSE, 0);
1646
1647 NAME_CACHE_LOCK();
1648
1649 cache_enter_locked(dvp, vp, cnp, strname);
1650
1651 NAME_CACHE_UNLOCK();
1652 }
1653
1654
1655 static void
1656 cache_enter_locked(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, const char *strname)
1657 {
1658 struct namecache *ncp, *negp;
1659 struct nchashhead *ncpp;
1660
1661 if (nc_disabled)
1662 return;
1663
1664 /*
1665 * if the entry is for -ve caching vp is null
1666 */
1667 if ((vp != NULLVP) && (LIST_FIRST(&vp->v_nclinks))) {
1668 /*
1669 * someone beat us to the punch..
1670 * this vnode is already in the cache
1671 */
1672 if (strname != NULL)
1673 vfs_removename(strname);
1674 return;
1675 }
1676 /*
1677 * We allocate a new entry if we are less than the maximum
1678 * allowed and the one at the front of the list is in use.
1679 * Otherwise we use the one at the front of the list.
1680 */
1681 if (numcache < desiredNodes &&
1682 ((ncp = nchead.tqh_first) == NULL ||
1683 ncp->nc_hash.le_prev != 0)) {
1684 /*
1685 * Allocate one more entry
1686 */
1687 ncp = (struct namecache *)_MALLOC_ZONE(sizeof(*ncp), M_CACHE, M_WAITOK);
1688 numcache++;
1689 } else {
1690 /*
1691 * reuse an old entry
1692 */
1693 ncp = TAILQ_FIRST(&nchead);
1694 TAILQ_REMOVE(&nchead, ncp, nc_entry);
1695
1696 if (ncp->nc_hash.le_prev != 0) {
1697 /*
1698 * still in use... we need to
1699 * delete it before re-using it
1700 */
1701 NCHSTAT(ncs_stolen);
1702 cache_delete(ncp, 0);
1703 }
1704 }
1705 NCHSTAT(ncs_enters);
1706
1707 /*
1708 * Fill in cache info, if vp is NULL this is a "negative" cache entry.
1709 */
1710 ncp->nc_vp = vp;
1711 ncp->nc_dvp = dvp;
1712 ncp->nc_hashval = cnp->cn_hash;
1713
1714 if (strname == NULL)
1715 ncp->nc_name = add_name_internal(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, FALSE, 0);
1716 else
1717 ncp->nc_name = strname;
1718
1719 //
1720 // If the bytes of the name associated with the vnode differ,
1721 // use the name associated with the vnode since the file system
1722 // may have set that explicitly in the case of a lookup on a
1723 // case-insensitive file system where the case of the looked up
1724 // name differs from what is on disk. For more details, see:
1725 // <rdar://problem/8044697> FSEvents doesn't always decompose diacritical unicode chars in the paths of the changed directories
1726 //
1727 const char *vn_name = vp ? vp->v_name : NULL;
1728 unsigned int len = vn_name ? strlen(vn_name) : 0;
1729 if (vn_name && ncp && ncp->nc_name && strncmp(ncp->nc_name, vn_name, len) != 0) {
1730 unsigned int hash = hash_string(vn_name, len);
1731
1732 vfs_removename(ncp->nc_name);
1733 ncp->nc_name = add_name_internal(vn_name, len, hash, FALSE, 0);
1734 ncp->nc_hashval = hash;
1735 }
1736
1737 /*
1738 * make us the newest entry in the cache
1739 * i.e. we'll be the last to be stolen
1740 */
1741 TAILQ_INSERT_TAIL(&nchead, ncp, nc_entry);
1742
1743 ncpp = NCHHASH(dvp, cnp->cn_hash);
1744 #if DIAGNOSTIC
1745 {
1746 struct namecache *p;
1747
1748 for (p = ncpp->lh_first; p != 0; p = p->nc_hash.le_next)
1749 if (p == ncp)
1750 panic("cache_enter: duplicate");
1751 }
1752 #endif
1753 /*
1754 * make us available to be found via lookup
1755 */
1756 LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
1757
1758 if (vp) {
1759 /*
1760 * add to the list of name cache entries
1761 * that point at vp
1762 */
1763 LIST_INSERT_HEAD(&vp->v_nclinks, ncp, nc_un.nc_link);
1764 } else {
1765 /*
1766 * this is a negative cache entry (vp == NULL)
1767 * stick it on the negative cache list.
1768 */
1769 TAILQ_INSERT_TAIL(&neghead, ncp, nc_un.nc_negentry);
1770
1771 ncs_negtotal++;
1772
1773 if (ncs_negtotal > desiredNegNodes) {
1774 /*
1775 * if we've reached our desired limit
1776 * of negative cache entries, delete
1777 * the oldest
1778 */
1779 negp = TAILQ_FIRST(&neghead);
1780 cache_delete(negp, 1);
1781 }
1782 }
1783 /*
1784 * add us to the list of name cache entries that
1785 * are children of dvp
1786 */
1787 LIST_INSERT_HEAD(&dvp->v_ncchildren, ncp, nc_child);
1788 }
1789
1790
1791 /*
1792 * Initialize CRC-32 remainder table.
1793 */
1794 static void init_crc32(void)
1795 {
1796 /*
1797 * the CRC-32 generator polynomial is:
1798 * x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^10
1799 * + x^8 + x^7 + x^5 + x^4 + x^2 + x + 1
1800 */
1801 unsigned int crc32_polynomial = 0x04c11db7;
1802 unsigned int i,j;
1803
1804 /*
1805 * pre-calculate the CRC-32 remainder for each possible octet encoding
1806 */
1807 for (i = 0; i < 256; i++) {
1808 unsigned int crc_rem = i << 24;
1809
1810 for (j = 0; j < 8; j++) {
1811 if (crc_rem & 0x80000000)
1812 crc_rem = (crc_rem << 1) ^ crc32_polynomial;
1813 else
1814 crc_rem = (crc_rem << 1);
1815 }
1816 crc32tab[i] = crc_rem;
1817 }
1818 }
1819
1820
1821 /*
1822 * Name cache initialization, from vfs_init() when we are booting
1823 */
1824 void
1825 nchinit(void)
1826 {
1827 int i;
1828
1829 desiredNegNodes = (desiredvnodes / 10);
1830 desiredNodes = desiredvnodes + desiredNegNodes;
1831
1832 TAILQ_INIT(&nchead);
1833 TAILQ_INIT(&neghead);
1834
1835 init_crc32();
1836
1837 nchashtbl = hashinit(MAX(CONFIG_NC_HASH, (2 *desiredNodes)), M_CACHE, &nchash);
1838 nchashmask = nchash;
1839 nchash++;
1840
1841 init_string_table();
1842
1843 /* Allocate name cache lock group attribute and group */
1844 namecache_lck_grp_attr= lck_grp_attr_alloc_init();
1845
1846 namecache_lck_grp = lck_grp_alloc_init("Name Cache", namecache_lck_grp_attr);
1847
1848 /* Allocate name cache lock attribute */
1849 namecache_lck_attr = lck_attr_alloc_init();
1850
1851 /* Allocate name cache lock */
1852 namecache_rw_lock = lck_rw_alloc_init(namecache_lck_grp, namecache_lck_attr);
1853
1854
1855 /* Allocate string cache lock group attribute and group */
1856 strcache_lck_grp_attr= lck_grp_attr_alloc_init();
1857
1858 strcache_lck_grp = lck_grp_alloc_init("String Cache", strcache_lck_grp_attr);
1859
1860 /* Allocate string cache lock attribute */
1861 strcache_lck_attr = lck_attr_alloc_init();
1862
1863 /* Allocate string cache lock */
1864 strtable_rw_lock = lck_rw_alloc_init(strcache_lck_grp, strcache_lck_attr);
1865
1866 for (i = 0; i < NUM_STRCACHE_LOCKS; i++)
1867 lck_mtx_init(&strcache_mtx_locks[i], strcache_lck_grp, strcache_lck_attr);
1868 }
1869
1870 void
1871 name_cache_lock_shared(void)
1872 {
1873 lck_rw_lock_shared(namecache_rw_lock);
1874 }
1875
1876 void
1877 name_cache_lock(void)
1878 {
1879 lck_rw_lock_exclusive(namecache_rw_lock);
1880 }
1881
1882 void
1883 name_cache_unlock(void)
1884 {
1885 lck_rw_done(namecache_rw_lock);
1886 }
1887
1888
1889 int
1890 resize_namecache(u_int newsize)
1891 {
1892 struct nchashhead *new_table;
1893 struct nchashhead *old_table;
1894 struct nchashhead *old_head, *head;
1895 struct namecache *entry, *next;
1896 uint32_t i, hashval;
1897 int dNodes, dNegNodes;
1898 u_long new_size, old_size;
1899
1900 dNegNodes = (newsize / 10);
1901 dNodes = newsize + dNegNodes;
1902
1903 // we don't support shrinking yet
1904 if (dNodes <= desiredNodes) {
1905 return 0;
1906 }
1907 new_table = hashinit(2 * dNodes, M_CACHE, &nchashmask);
1908 new_size = nchashmask + 1;
1909
1910 if (new_table == NULL) {
1911 return ENOMEM;
1912 }
1913
1914 NAME_CACHE_LOCK();
1915 // do the switch!
1916 old_table = nchashtbl;
1917 nchashtbl = new_table;
1918 old_size = nchash;
1919 nchash = new_size;
1920
1921 // walk the old table and insert all the entries into
1922 // the new table
1923 //
1924 for(i=0; i < old_size; i++) {
1925 old_head = &old_table[i];
1926 for (entry=old_head->lh_first; entry != NULL; entry=next) {
1927 //
1928 // XXXdbg - Beware: this assumes that hash_string() does
1929 // the same thing as what happens in
1930 // lookup() over in vfs_lookup.c
1931 hashval = hash_string(entry->nc_name, 0);
1932 entry->nc_hashval = hashval;
1933 head = NCHHASH(entry->nc_dvp, hashval);
1934
1935 next = entry->nc_hash.le_next;
1936 LIST_INSERT_HEAD(head, entry, nc_hash);
1937 }
1938 }
1939 desiredNodes = dNodes;
1940 desiredNegNodes = dNegNodes;
1941
1942 NAME_CACHE_UNLOCK();
1943 FREE(old_table, M_CACHE);
1944
1945 return 0;
1946 }
1947
1948 static void
1949 cache_delete(struct namecache *ncp, int age_entry)
1950 {
1951 NCHSTAT(ncs_deletes);
1952
1953 if (ncp->nc_vp) {
1954 LIST_REMOVE(ncp, nc_un.nc_link);
1955 } else {
1956 TAILQ_REMOVE(&neghead, ncp, nc_un.nc_negentry);
1957 ncs_negtotal--;
1958 }
1959 LIST_REMOVE(ncp, nc_child);
1960
1961 LIST_REMOVE(ncp, nc_hash);
1962 /*
1963 * this field is used to indicate
1964 * that the entry is in use and
1965 * must be deleted before it can
1966 * be reused...
1967 */
1968 ncp->nc_hash.le_prev = NULL;
1969
1970 if (age_entry) {
1971 /*
1972 * make it the next one available
1973 * for cache_enter's use
1974 */
1975 TAILQ_REMOVE(&nchead, ncp, nc_entry);
1976 TAILQ_INSERT_HEAD(&nchead, ncp, nc_entry);
1977 }
1978 vfs_removename(ncp->nc_name);
1979 ncp->nc_name = NULL;
1980 }
1981
1982
1983 /*
1984 * purge the entry associated with the
1985 * specified vnode from the name cache
1986 */
1987 void
1988 cache_purge(vnode_t vp)
1989 {
1990 struct namecache *ncp;
1991 kauth_cred_t tcred = NULL;
1992
1993 if ((LIST_FIRST(&vp->v_nclinks) == NULL) &&
1994 (LIST_FIRST(&vp->v_ncchildren) == NULL) &&
1995 (vp->v_cred == NOCRED) &&
1996 (vp->v_parent == NULLVP))
1997 return;
1998
1999 NAME_CACHE_LOCK();
2000
2001 if (vp->v_parent)
2002 vp->v_parent->v_nc_generation++;
2003
2004 while ( (ncp = LIST_FIRST(&vp->v_nclinks)) )
2005 cache_delete(ncp, 1);
2006
2007 while ( (ncp = LIST_FIRST(&vp->v_ncchildren)) )
2008 cache_delete(ncp, 1);
2009
2010 /*
2011 * Use a temp variable to avoid kauth_cred_unref() while NAME_CACHE_LOCK is held
2012 */
2013 tcred = vp->v_cred;
2014 vp->v_cred = NOCRED;
2015 vp->v_authorized_actions = 0;
2016
2017 NAME_CACHE_UNLOCK();
2018
2019 if (IS_VALID_CRED(tcred))
2020 kauth_cred_unref(&tcred);
2021 }
2022
2023 /*
2024 * Purge all negative cache entries that are children of the
2025 * given vnode. A case-insensitive file system (or any file
2026 * system that has multiple equivalent names for the same
2027 * directory entry) can use this when creating or renaming
2028 * to remove negative entries that may no longer apply.
2029 */
2030 void
2031 cache_purge_negatives(vnode_t vp)
2032 {
2033 struct namecache *ncp, *next_ncp;
2034
2035 NAME_CACHE_LOCK();
2036
2037 LIST_FOREACH_SAFE(ncp, &vp->v_ncchildren, nc_child, next_ncp)
2038 if (ncp->nc_vp == NULL)
2039 cache_delete(ncp , 1);
2040
2041 NAME_CACHE_UNLOCK();
2042 }
2043
2044 /*
2045 * Flush all entries referencing a particular filesystem.
2046 *
2047 * Since we need to check it anyway, we will flush all the invalid
2048 * entries at the same time.
2049 */
2050 void
2051 cache_purgevfs(struct mount *mp)
2052 {
2053 struct nchashhead *ncpp;
2054 struct namecache *ncp;
2055
2056 NAME_CACHE_LOCK();
2057 /* Scan hash tables for applicable entries */
2058 for (ncpp = &nchashtbl[nchash - 1]; ncpp >= nchashtbl; ncpp--) {
2059 restart:
2060 for (ncp = ncpp->lh_first; ncp != 0; ncp = ncp->nc_hash.le_next) {
2061 if (ncp->nc_dvp->v_mount == mp) {
2062 cache_delete(ncp, 0);
2063 goto restart;
2064 }
2065 }
2066 }
2067 NAME_CACHE_UNLOCK();
2068 }
2069
2070
2071
2072 //
2073 // String ref routines
2074 //
2075 static LIST_HEAD(stringhead, string_t) *string_ref_table;
2076 static u_long string_table_mask;
2077 static uint32_t filled_buckets=0;
2078
2079
2080 typedef struct string_t {
2081 LIST_ENTRY(string_t) hash_chain;
2082 const char *str;
2083 uint32_t refcount;
2084 } string_t;
2085
2086
2087 static void
2088 resize_string_ref_table(void)
2089 {
2090 struct stringhead *new_table;
2091 struct stringhead *old_table;
2092 struct stringhead *old_head, *head;
2093 string_t *entry, *next;
2094 uint32_t i, hashval;
2095 u_long new_mask, old_mask;
2096
2097 /*
2098 * need to hold the table lock exclusively
2099 * in order to grow the table... need to recheck
2100 * the need to resize again after we've taken
2101 * the lock exclusively in case some other thread
2102 * beat us to the punch
2103 */
2104 lck_rw_lock_exclusive(strtable_rw_lock);
2105
2106 if (4 * filled_buckets < ((string_table_mask + 1) * 3)) {
2107 lck_rw_done(strtable_rw_lock);
2108 return;
2109 }
2110 new_table = hashinit((string_table_mask + 1) * 2, M_CACHE, &new_mask);
2111
2112 if (new_table == NULL) {
2113 printf("failed to resize the hash table.\n");
2114 lck_rw_done(strtable_rw_lock);
2115 return;
2116 }
2117
2118 // do the switch!
2119 old_table = string_ref_table;
2120 string_ref_table = new_table;
2121 old_mask = string_table_mask;
2122 string_table_mask = new_mask;
2123 filled_buckets = 0;
2124
2125 // walk the old table and insert all the entries into
2126 // the new table
2127 //
2128 for (i = 0; i <= old_mask; i++) {
2129 old_head = &old_table[i];
2130 for (entry = old_head->lh_first; entry != NULL; entry = next) {
2131 hashval = hash_string((const char *)entry->str, 0);
2132 head = &string_ref_table[hashval & string_table_mask];
2133 if (head->lh_first == NULL) {
2134 filled_buckets++;
2135 }
2136 next = entry->hash_chain.le_next;
2137 LIST_INSERT_HEAD(head, entry, hash_chain);
2138 }
2139 }
2140 lck_rw_done(strtable_rw_lock);
2141
2142 FREE(old_table, M_CACHE);
2143 }
2144
2145
2146 static void
2147 init_string_table(void)
2148 {
2149 string_ref_table = hashinit(CONFIG_VFS_NAMES, M_CACHE, &string_table_mask);
2150 }
2151
2152
2153 const char *
2154 vfs_addname(const char *name, uint32_t len, u_int hashval, u_int flags)
2155 {
2156 return (add_name_internal(name, len, hashval, FALSE, flags));
2157 }
2158
2159
2160 static const char *
2161 add_name_internal(const char *name, uint32_t len, u_int hashval, boolean_t need_extra_ref, __unused u_int flags)
2162 {
2163 struct stringhead *head;
2164 string_t *entry;
2165 uint32_t chain_len = 0;
2166 uint32_t hash_index;
2167 uint32_t lock_index;
2168 char *ptr;
2169
2170 /*
2171 * if the length already accounts for the null-byte, then
2172 * subtract one so later on we don't index past the end
2173 * of the string.
2174 */
2175 if (len > 0 && name[len-1] == '\0') {
2176 len--;
2177 }
2178 if (hashval == 0) {
2179 hashval = hash_string(name, len);
2180 }
2181
2182 /*
2183 * take this lock 'shared' to keep the hash stable
2184 * if someone else decides to grow the pool they
2185 * will take this lock exclusively
2186 */
2187 lck_rw_lock_shared(strtable_rw_lock);
2188
2189 /*
2190 * If the table gets more than 3/4 full, resize it
2191 */
2192 if (4 * filled_buckets >= ((string_table_mask + 1) * 3)) {
2193 lck_rw_done(strtable_rw_lock);
2194
2195 resize_string_ref_table();
2196
2197 lck_rw_lock_shared(strtable_rw_lock);
2198 }
2199 hash_index = hashval & string_table_mask;
2200 lock_index = hash_index % NUM_STRCACHE_LOCKS;
2201
2202 head = &string_ref_table[hash_index];
2203
2204 lck_mtx_lock_spin(&strcache_mtx_locks[lock_index]);
2205
2206 for (entry = head->lh_first; entry != NULL; chain_len++, entry = entry->hash_chain.le_next) {
2207 if (memcmp(entry->str, name, len) == 0 && entry->str[len] == 0) {
2208 entry->refcount++;
2209 break;
2210 }
2211 }
2212 if (entry == NULL) {
2213 lck_mtx_convert_spin(&strcache_mtx_locks[lock_index]);
2214 /*
2215 * it wasn't already there so add it.
2216 */
2217 MALLOC(entry, string_t *, sizeof(string_t) + len + 1, M_TEMP, M_WAITOK);
2218
2219 if (head->lh_first == NULL) {
2220 OSAddAtomic(1, &filled_buckets);
2221 }
2222 ptr = (char *)((char *)entry + sizeof(string_t));
2223 strncpy(ptr, name, len);
2224 ptr[len] = '\0';
2225 entry->str = ptr;
2226 entry->refcount = 1;
2227 LIST_INSERT_HEAD(head, entry, hash_chain);
2228 }
2229 if (need_extra_ref == TRUE)
2230 entry->refcount++;
2231
2232 lck_mtx_unlock(&strcache_mtx_locks[lock_index]);
2233 lck_rw_done(strtable_rw_lock);
2234
2235 return (const char *)entry->str;
2236 }
2237
2238
2239 int
2240 vfs_removename(const char *nameref)
2241 {
2242 struct stringhead *head;
2243 string_t *entry;
2244 uint32_t hashval;
2245 uint32_t hash_index;
2246 uint32_t lock_index;
2247 int retval = ENOENT;
2248
2249 hashval = hash_string(nameref, 0);
2250
2251 /*
2252 * take this lock 'shared' to keep the hash stable
2253 * if someone else decides to grow the pool they
2254 * will take this lock exclusively
2255 */
2256 lck_rw_lock_shared(strtable_rw_lock);
2257 /*
2258 * must compute the head behind the table lock
2259 * since the size and location of the table
2260 * can change on the fly
2261 */
2262 hash_index = hashval & string_table_mask;
2263 lock_index = hash_index % NUM_STRCACHE_LOCKS;
2264
2265 head = &string_ref_table[hash_index];
2266
2267 lck_mtx_lock_spin(&strcache_mtx_locks[lock_index]);
2268
2269 for (entry = head->lh_first; entry != NULL; entry = entry->hash_chain.le_next) {
2270 if (entry->str == nameref) {
2271 entry->refcount--;
2272
2273 if (entry->refcount == 0) {
2274 LIST_REMOVE(entry, hash_chain);
2275
2276 if (head->lh_first == NULL) {
2277 OSAddAtomic(-1, &filled_buckets);
2278 }
2279 } else {
2280 entry = NULL;
2281 }
2282 retval = 0;
2283 break;
2284 }
2285 }
2286 lck_mtx_unlock(&strcache_mtx_locks[lock_index]);
2287 lck_rw_done(strtable_rw_lock);
2288
2289 if (entry != NULL)
2290 FREE(entry, M_TEMP);
2291
2292 return retval;
2293 }
2294
2295
2296 #ifdef DUMP_STRING_TABLE
2297 void
2298 dump_string_table(void)
2299 {
2300 struct stringhead *head;
2301 string_t *entry;
2302 u_long i;
2303
2304 lck_rw_lock_shared(strtable_rw_lock);
2305
2306 for (i = 0; i <= string_table_mask; i++) {
2307 head = &string_ref_table[i];
2308 for (entry=head->lh_first; entry != NULL; entry=entry->hash_chain.le_next) {
2309 printf("%6d - %s\n", entry->refcount, entry->str);
2310 }
2311 }
2312 lck_rw_done(strtable_rw_lock);
2313 }
2314 #endif /* DUMP_STRING_TABLE */