]> git.saurik.com Git - apple/xnu.git/blame_incremental - bsd/vfs/vfs_cache.c
xnu-517.3.15.tar.gz
[apple/xnu.git] / bsd / vfs / vfs_cache.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
7 *
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * file.
14 *
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
22 *
23 * @APPLE_LICENSE_HEADER_END@
24 */
25/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
26/*
27 * Copyright (c) 1989, 1993, 1995
28 * The Regents of the University of California. All rights reserved.
29 *
30 * This code is derived from software contributed to Berkeley by
31 * Poul-Henning Kamp of the FreeBSD Project.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 * 3. All advertising materials mentioning features or use of this software
42 * must display the following acknowledgement:
43 * This product includes software developed by the University of
44 * California, Berkeley and its contributors.
45 * 4. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 *
62 * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95
63 */
64#include <sys/param.h>
65#include <sys/systm.h>
66#include <sys/time.h>
67#include <sys/mount.h>
68#include <sys/vnode.h>
69#include <sys/namei.h>
70#include <sys/errno.h>
71#include <sys/malloc.h>
72
73/*
74 * Name caching works as follows:
75 *
76 * Names found by directory scans are retained in a cache
77 * for future reference. It is managed LRU, so frequently
78 * used names will hang around. Cache is indexed by hash value
79 * obtained from (vp, name) where vp refers to the directory
80 * containing name.
81 *
82 * If it is a "negative" entry, (i.e. for a name that is known NOT to
83 * exist) the vnode pointer will be NULL.
84 *
85 * For simplicity (and economy of storage), names longer than
86 * a maximum length of NCHNAMLEN are not cached; they occur
87 * infrequently in any case, and are almost never of interest.
88 *
89 * Upon reaching the last segment of a path, if the reference
90 * is for DELETE, or NOCACHE is set (rewrite), and the
91 * name is located in the cache, it will be dropped.
92 */
93
94/*
95 * Structures associated with name cacheing.
96 */
97#define NCHHASH(dvp, hash_val) \
98 (&nchashtbl[((u_long)(dvp) ^ ((dvp)->v_id ^ (hash_val))) & nchash])
99LIST_HEAD(nchashhead, namecache) *nchashtbl; /* Hash Table */
100u_long nchash; /* size of hash table - 1 */
101long numcache; /* number of cache entries allocated */
102TAILQ_HEAD(, namecache) nclruhead; /* LRU chain */
103struct nchstats nchstats; /* cache effectiveness statistics */
104u_long nextvnodeid = 0;
105int doingcache = 1; /* 1 => enable the cache */
106
107/*
108 * Delete an entry from its hash list and move it to the front
109 * of the LRU list for immediate reuse.
110 *
111 * NOTE: THESE MACROS CAN BLOCK (in the call to remove_name())
112 * SO BE CAREFUL IF YOU HOLD POINTERS TO nclruhead OR
113 * nchashtbl.
114 */
115#if DIAGNOSTIC
116#define PURGE(ncp) { \
117 if (ncp->nc_hash.le_prev == 0) \
118 panic("namecache purge le_prev"); \
119 if (ncp->nc_hash.le_next == ncp) \
120 panic("namecache purge le_next"); \
121 LIST_REMOVE(ncp, nc_hash); \
122 ncp->nc_hash.le_prev = 0; \
123 TAILQ_REMOVE(&nclruhead, ncp, nc_lru); \
124 TAILQ_INSERT_HEAD(&nclruhead, ncp, nc_lru); \
125 /* this has to come last because it could block */ \
126 remove_name(ncp->nc_name); \
127 ncp->nc_name = NULL; \
128}
129#else
130#define PURGE(ncp) { \
131 LIST_REMOVE(ncp, nc_hash); \
132 ncp->nc_hash.le_prev = 0; \
133 TAILQ_REMOVE(&nclruhead, ncp, nc_lru); \
134 TAILQ_INSERT_HEAD(&nclruhead, ncp, nc_lru); \
135 /* this has to come last because it could block */ \
136 remove_name(ncp->nc_name); \
137 ncp->nc_name = NULL; \
138}
139#endif /* DIAGNOSTIC */
140
141/*
142 * Move an entry that has been used to the tail of the LRU list
143 * so that it will be preserved for future use.
144 */
145#define TOUCH(ncp) { \
146 if (ncp->nc_lru.tqe_next != 0) { \
147 TAILQ_REMOVE(&nclruhead, ncp, nc_lru); \
148 TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru); \
149 } \
150}
151
152
153//
154// Have to take a len argument because we may only need to
155// hash part of a componentname.
156//
157static unsigned int
158hash_string(const char *str, int len)
159{
160 unsigned int i, hashval = 0;
161
162 if (len == 0) {
163 for(i=1; *str != 0; i++, str++) {
164 hashval += (unsigned char)*str * i;
165 }
166 } else {
167 for(i=len; i > 0; i--, str++) {
168 hashval += (unsigned char)*str * (len - i + 1);
169 }
170 }
171
172 return hashval;
173}
174
175
176
177
178/*
179 * Lookup an entry in the cache
180 *
181 * We don't do this if the segment name is long, simply so the cache
182 * can avoid holding long names (which would either waste space, or
183 * add greatly to the complexity).
184 *
185 * Lookup is called with dvp pointing to the directory to search,
186 * cnp pointing to the name of the entry being sought. If the lookup
187 * succeeds, the vnode is returned in *vpp, and a status of -1 is
188 * returned. If the lookup determines that the name does not exist
189 * (negative cacheing), a status of ENOENT is returned. If the lookup
190 * fails, a status of zero is returned.
191 */
192
193int
194cache_lookup(dvp, vpp, cnp)
195 struct vnode *dvp;
196 struct vnode **vpp;
197 struct componentname *cnp;
198{
199 register struct namecache *ncp, *nnp;
200 register struct nchashhead *ncpp;
201 register long namelen = cnp->cn_namelen;
202 char *nameptr = cnp->cn_nameptr;
203
204 if (!doingcache) {
205 cnp->cn_flags &= ~MAKEENTRY;
206 return (0);
207 }
208
209 ncpp = NCHHASH(dvp, cnp->cn_hash);
210 for (ncp = ncpp->lh_first; ncp != 0; ncp = nnp) {
211 nnp = ncp->nc_hash.le_next;
212
213 if (ncp->nc_dvp == dvp &&
214 strncmp(ncp->nc_name, nameptr, namelen) == 0 &&
215 ncp->nc_name[namelen] == 0) {
216 /* Make sure the vp isn't stale. */
217 if ((ncp->nc_dvpid != dvp->v_id) ||
218 (ncp->nc_vp && ncp->nc_vpid != ncp->nc_vp->v_id)) {
219 nchstats.ncs_falsehits++;
220 PURGE(ncp);
221 continue;
222 }
223 break;
224 }
225 }
226
227 /* We failed to find an entry */
228 if (ncp == 0) {
229 nchstats.ncs_miss++;
230 return (0);
231 }
232
233 /* We don't want to have an entry, so dump it */
234 if ((cnp->cn_flags & MAKEENTRY) == 0) {
235 nchstats.ncs_badhits++;
236 PURGE(ncp);
237 return (0);
238 }
239
240 /* We found a "positive" match, return the vnode */
241 if (ncp->nc_vp) {
242 if (ncp->nc_vp->v_flag & (VUINIT|VXLOCK|VTERMINATE|VORECLAIM)) {
243 PURGE(ncp);
244 return (0);
245 }
246
247 nchstats.ncs_goodhits++;
248 TOUCH(ncp);
249 *vpp = ncp->nc_vp;
250 return (-1);
251 }
252
253 /* We found a negative match, and want to create it, so purge */
254 if (cnp->cn_nameiop == CREATE) {
255 nchstats.ncs_badhits++;
256 PURGE(ncp);
257 return (0);
258 }
259
260 /*
261 * We found a "negative" match, ENOENT notifies client of this match.
262 * The nc_vpid field records whether this is a whiteout.
263 */
264 nchstats.ncs_neghits++;
265 TOUCH(ncp);
266 cnp->cn_flags |= ncp->nc_vpid;
267 return (ENOENT);
268}
269
270/*
271 * Add an entry to the cache.
272 */
273void
274cache_enter(dvp, vp, cnp)
275 struct vnode *dvp;
276 struct vnode *vp;
277 struct componentname *cnp;
278{
279 register struct namecache *ncp;
280 register struct nchashhead *ncpp;
281
282 if (!doingcache)
283 return;
284
285 /*
286 * We allocate a new entry if we are less than the maximum
287 * allowed and the one at the front of the LRU list is in use.
288 * Otherwise we use the one at the front of the LRU list.
289 */
290 if (numcache < desiredvnodes &&
291 ((ncp = nclruhead.tqh_first) == NULL ||
292 ncp->nc_hash.le_prev != 0)) {
293 /* Add one more entry */
294 ncp = (struct namecache *)
295 _MALLOC_ZONE((u_long)sizeof *ncp, M_CACHE, M_WAITOK);
296 numcache++;
297 } else if (ncp = nclruhead.tqh_first) {
298 /* reuse an old entry */
299 TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
300 if (ncp->nc_hash.le_prev != 0) {
301#if DIAGNOSTIC
302 if (ncp->nc_hash.le_next == ncp)
303 panic("cache_enter: le_next");
304#endif
305 LIST_REMOVE(ncp, nc_hash);
306 remove_name(ncp->nc_name);
307 ncp->nc_name = NULL;
308 ncp->nc_hash.le_prev = 0;
309 }
310 } else {
311 /* give up */
312 return;
313 }
314
315 /*
316 * Fill in cache info, if vp is NULL this is a "negative" cache entry.
317 * For negative entries, we have to record whether it is a whiteout.
318 * the whiteout flag is stored in the nc_vpid field which is
319 * otherwise unused.
320 */
321 ncp->nc_vp = vp;
322 if (vp)
323 ncp->nc_vpid = vp->v_id;
324 else
325 ncp->nc_vpid = cnp->cn_flags & ISWHITEOUT;
326 ncp->nc_dvp = dvp;
327 ncp->nc_dvpid = dvp->v_id;
328 ncp->nc_name = add_name(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, 0);
329 TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
330 ncpp = NCHHASH(dvp, cnp->cn_hash);
331#if DIAGNOSTIC
332 {
333 register struct namecache *p;
334
335 for (p = ncpp->lh_first; p != 0; p = p->nc_hash.le_next)
336 if (p == ncp)
337 panic("cache_enter: duplicate");
338 }
339#endif
340 LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
341}
342
343/*
344 * Name cache initialization, from vfs_init() when we are booting
345 */
346void
347nchinit()
348{
349 static void init_string_table(void);
350
351 TAILQ_INIT(&nclruhead);
352 nchashtbl = hashinit(MAX(4096, desiredvnodes), M_CACHE, &nchash);
353
354 init_string_table();
355}
356
357
358int
359resize_namecache(u_int newsize)
360{
361 struct nchashhead *new_table;
362 struct nchashhead *old_table;
363 struct nchashhead *old_head, *head;
364 struct namecache *entry, *next;
365 uint32_t i;
366 u_long new_mask, old_mask;
367
368 // we don't support shrinking yet
369 if (newsize < nchash) {
370 return 0;
371 }
372
373 new_table = hashinit(newsize, M_CACHE, &new_mask);
374 if (new_table == NULL) {
375 return ENOMEM;
376 }
377
378 // do the switch!
379 old_table = nchashtbl;
380 nchashtbl = new_table;
381 old_mask = nchash;
382 nchash = new_mask;
383
384 // walk the old table and insert all the entries into
385 // the new table
386 //
387 for(i=0; i <= old_mask; i++) {
388 old_head = &old_table[i];
389 for (entry=old_head->lh_first; entry != NULL; entry=next) {
390 //
391 // XXXdbg - Beware: this assumes that hash_string() does
392 // the same thing as what happens in
393 // lookup() over in vfs_lookup.c
394 head = NCHHASH(entry->nc_dvp, hash_string(entry->nc_name, 0));
395
396 next = entry->nc_hash.le_next;
397 LIST_INSERT_HEAD(head, entry, nc_hash);
398 }
399 }
400
401 FREE(old_table, M_CACHE);
402
403 return 0;
404}
405
406
407
408
409/*
410 * Invalidate a all entries to particular vnode.
411 *
412 * We actually just increment the v_id, that will do it. The entries will
413 * be purged by lookup as they get found. If the v_id wraps around, we
414 * need to ditch the entire cache, to avoid confusion. No valid vnode will
415 * ever have (v_id == 0).
416 */
417void
418cache_purge(vp)
419 struct vnode *vp;
420{
421 struct namecache *ncp;
422 struct nchashhead *ncpp;
423
424 vp->v_id = ++nextvnodeid;
425 if (nextvnodeid != 0)
426 return;
427 for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) {
428 while (ncp = ncpp->lh_first)
429 PURGE(ncp);
430 }
431 vp->v_id = ++nextvnodeid;
432}
433
434/*
435 * Flush all entries referencing a particular filesystem.
436 *
437 * Since we need to check it anyway, we will flush all the invalid
438 * entriess at the same time.
439 */
440void
441cache_purgevfs(mp)
442 struct mount *mp;
443{
444 struct nchashhead *ncpp;
445 struct namecache *ncp, *nnp;
446
447 /* Scan hash tables for applicable entries */
448 for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) {
449 for (ncp = ncpp->lh_first; ncp != 0; ncp = nnp) {
450 nnp = ncp->nc_hash.le_next;
451 if (ncp->nc_dvpid != ncp->nc_dvp->v_id ||
452 (ncp->nc_vp && ncp->nc_vpid != ncp->nc_vp->v_id) ||
453 ncp->nc_dvp->v_mount == mp) {
454 PURGE(ncp);
455 }
456 }
457 }
458}
459
460
461
462//
463// String ref routines
464//
465static LIST_HEAD(stringhead, string_t) *string_ref_table;
466static u_long string_table_mask;
467static uint32_t max_chain_len=0;
468static struct stringhead *long_chain_head=NULL;
469static uint32_t filled_buckets=0;
470static uint32_t num_dups=0;
471static uint32_t nstrings=0;
472
473typedef struct string_t {
474 LIST_ENTRY(string_t) hash_chain;
475 unsigned char *str;
476 uint32_t refcount;
477} string_t;
478
479
480
481static int
482resize_string_ref_table()
483{
484 struct stringhead *new_table;
485 struct stringhead *old_table;
486 struct stringhead *old_head, *head;
487 string_t *entry, *next;
488 uint32_t i, hashval;
489 u_long new_mask, old_mask;
490
491 new_table = hashinit((string_table_mask + 1) * 2, M_CACHE, &new_mask);
492 if (new_table == NULL) {
493 return ENOMEM;
494 }
495
496 // do the switch!
497 old_table = string_ref_table;
498 string_ref_table = new_table;
499 old_mask = string_table_mask;
500 string_table_mask = new_mask;
501
502 printf("resize: max chain len %d, new table size %d\n",
503 max_chain_len, new_mask + 1);
504 max_chain_len = 0;
505 long_chain_head = NULL;
506 filled_buckets = 0;
507
508 // walk the old table and insert all the entries into
509 // the new table
510 //
511 for(i=0; i <= old_mask; i++) {
512 old_head = &old_table[i];
513 for (entry=old_head->lh_first; entry != NULL; entry=next) {
514 hashval = hash_string(entry->str, 0);
515 head = &string_ref_table[hashval & string_table_mask];
516 if (head->lh_first == NULL) {
517 filled_buckets++;
518 }
519
520 next = entry->hash_chain.le_next;
521 LIST_INSERT_HEAD(head, entry, hash_chain);
522 }
523 }
524
525 FREE(old_table, M_CACHE);
526
527 return 0;
528}
529
530
531static void
532init_string_table(void)
533{
534 string_ref_table = hashinit(4096, M_CACHE, &string_table_mask);
535}
536
537
538char *
539add_name(const char *name, size_t len, u_int hashval, u_int flags)
540{
541 struct stringhead *head;
542 string_t *entry;
543 int chain_len = 0;
544
545 //
546 // If the table gets more than 3/4 full, resize it
547 //
548 if (4*filled_buckets >= ((string_table_mask + 1) * 3)) {
549 if (resize_string_ref_table() != 0) {
550 printf("failed to resize the hash table.\n");
551 }
552 }
553
554 if (hashval == 0) {
555 hashval = hash_string(name, len);
556 }
557
558 head = &string_ref_table[hashval & string_table_mask];
559 for (entry=head->lh_first; entry != NULL; chain_len++, entry=entry->hash_chain.le_next) {
560 if (strncmp(entry->str, name, len) == 0 && entry->str[len] == '\0') {
561 entry->refcount++;
562 num_dups++;
563 break;
564 }
565 }
566
567 if (entry == NULL) {
568 // it wasn't already there so add it.
569 MALLOC(entry, string_t *, sizeof(string_t) + len + 1, M_TEMP, M_WAITOK);
570
571 // have to get "head" again because we could have blocked
572 // in malloc and thus head could have changed.
573 //
574 head = &string_ref_table[hashval & string_table_mask];
575 if (head->lh_first == NULL) {
576 filled_buckets++;
577 }
578
579 LIST_INSERT_HEAD(head, entry, hash_chain);
580 entry->str = (char *)((char *)entry + sizeof(string_t));
581 strncpy(entry->str, name, len);
582 entry->str[len] = '\0';
583 entry->refcount = 1;
584
585 if (chain_len > max_chain_len) {
586 max_chain_len = chain_len;
587 long_chain_head = head;
588 }
589
590 nstrings++;
591 }
592
593 return entry->str;
594}
595
596int
597remove_name(const char *nameref)
598{
599 struct stringhead *head;
600 string_t *entry;
601 uint32_t hashval;
602
603 hashval = hash_string(nameref, 0);
604 head = &string_ref_table[hashval & string_table_mask];
605 for (entry=head->lh_first; entry != NULL; entry=entry->hash_chain.le_next) {
606 if (entry->str == (unsigned char *)nameref) {
607 entry->refcount--;
608 if (entry->refcount == 0) {
609 LIST_REMOVE(entry, hash_chain);
610 if (head->lh_first == NULL) {
611 filled_buckets--;
612 }
613 entry->str = NULL;
614 nstrings--;
615
616 FREE(entry, M_TEMP);
617 } else {
618 num_dups--;
619 }
620
621 return 0;
622 }
623 }
624
625 return ENOENT;
626}
627
628
629void
630dump_string_table(void)
631{
632 struct stringhead *head;
633 string_t *entry;
634 int i;
635
636 for(i=0; i <= string_table_mask; i++) {
637 head = &string_ref_table[i];
638 for (entry=head->lh_first; entry != NULL; entry=entry->hash_chain.le_next) {
639 printf("%6d - %s\n", entry->refcount, entry->str);
640 }
641 }
642}