]> git.saurik.com Git - apple/xnu.git/blame - bsd/vfs/vfs_cache.c
xnu-517.9.4.tar.gz
[apple/xnu.git] / bsd / vfs / vfs_cache.c
CommitLineData
1c79356b 1/*
55e303ae 2 * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.
1c79356b
A
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
e5568f75
A
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
1c79356b 11 *
e5568f75
A
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
1c79356b
A
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
e5568f75
A
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
1c79356b
A
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
23/*
24 * Copyright (c) 1989, 1993, 1995
25 * The Regents of the University of California. All rights reserved.
26 *
27 * This code is derived from software contributed to Berkeley by
28 * Poul-Henning Kamp of the FreeBSD Project.
29 *
30 * Redistribution and use in source and binary forms, with or without
31 * modification, are permitted provided that the following conditions
32 * are met:
33 * 1. Redistributions of source code must retain the above copyright
34 * notice, this list of conditions and the following disclaimer.
35 * 2. Redistributions in binary form must reproduce the above copyright
36 * notice, this list of conditions and the following disclaimer in the
37 * documentation and/or other materials provided with the distribution.
38 * 3. All advertising materials mentioning features or use of this software
39 * must display the following acknowledgement:
40 * This product includes software developed by the University of
41 * California, Berkeley and its contributors.
42 * 4. Neither the name of the University nor the names of its contributors
43 * may be used to endorse or promote products derived from this software
44 * without specific prior written permission.
45 *
46 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
47 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
48 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
49 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
50 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
51 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
52 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
53 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
54 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
55 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
56 * SUCH DAMAGE.
57 *
58 *
59 * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95
60 */
61#include <sys/param.h>
62#include <sys/systm.h>
63#include <sys/time.h>
64#include <sys/mount.h>
65#include <sys/vnode.h>
66#include <sys/namei.h>
67#include <sys/errno.h>
68#include <sys/malloc.h>
69
70/*
71 * Name caching works as follows:
72 *
73 * Names found by directory scans are retained in a cache
74 * for future reference. It is managed LRU, so frequently
75 * used names will hang around. Cache is indexed by hash value
76 * obtained from (vp, name) where vp refers to the directory
77 * containing name.
78 *
79 * If it is a "negative" entry, (i.e. for a name that is known NOT to
80 * exist) the vnode pointer will be NULL.
81 *
82 * For simplicity (and economy of storage), names longer than
83 * a maximum length of NCHNAMLEN are not cached; they occur
84 * infrequently in any case, and are almost never of interest.
85 *
86 * Upon reaching the last segment of a path, if the reference
87 * is for DELETE, or NOCACHE is set (rewrite), and the
88 * name is located in the cache, it will be dropped.
89 */
90
91/*
92 * Structures associated with name cacheing.
93 */
55e303ae
A
94#define NCHHASH(dvp, hash_val) \
95 (&nchashtbl[((u_long)(dvp) ^ ((dvp)->v_id ^ (hash_val))) & nchash])
1c79356b
A
96LIST_HEAD(nchashhead, namecache) *nchashtbl; /* Hash Table */
97u_long nchash; /* size of hash table - 1 */
98long numcache; /* number of cache entries allocated */
99TAILQ_HEAD(, namecache) nclruhead; /* LRU chain */
100struct nchstats nchstats; /* cache effectiveness statistics */
101u_long nextvnodeid = 0;
102int doingcache = 1; /* 1 => enable the cache */
103
104/*
105 * Delete an entry from its hash list and move it to the front
106 * of the LRU list for immediate reuse.
55e303ae
A
107 *
108 * NOTE: THESE MACROS CAN BLOCK (in the call to remove_name())
109 * SO BE CAREFUL IF YOU HOLD POINTERS TO nclruhead OR
110 * nchashtbl.
1c79356b
A
111 */
112#if DIAGNOSTIC
113#define PURGE(ncp) { \
114 if (ncp->nc_hash.le_prev == 0) \
115 panic("namecache purge le_prev"); \
116 if (ncp->nc_hash.le_next == ncp) \
117 panic("namecache purge le_next"); \
118 LIST_REMOVE(ncp, nc_hash); \
119 ncp->nc_hash.le_prev = 0; \
120 TAILQ_REMOVE(&nclruhead, ncp, nc_lru); \
121 TAILQ_INSERT_HEAD(&nclruhead, ncp, nc_lru); \
55e303ae
A
122 /* this has to come last because it could block */ \
123 remove_name(ncp->nc_name); \
124 ncp->nc_name = NULL; \
1c79356b
A
125}
126#else
127#define PURGE(ncp) { \
128 LIST_REMOVE(ncp, nc_hash); \
129 ncp->nc_hash.le_prev = 0; \
130 TAILQ_REMOVE(&nclruhead, ncp, nc_lru); \
131 TAILQ_INSERT_HEAD(&nclruhead, ncp, nc_lru); \
55e303ae
A
132 /* this has to come last because it could block */ \
133 remove_name(ncp->nc_name); \
134 ncp->nc_name = NULL; \
1c79356b
A
135}
136#endif /* DIAGNOSTIC */
137
138/*
139 * Move an entry that has been used to the tail of the LRU list
140 * so that it will be preserved for future use.
141 */
142#define TOUCH(ncp) { \
143 if (ncp->nc_lru.tqe_next != 0) { \
144 TAILQ_REMOVE(&nclruhead, ncp, nc_lru); \
145 TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru); \
146 } \
147}
148
55e303ae
A
149
150//
151// Have to take a len argument because we may only need to
152// hash part of a componentname.
153//
154static unsigned int
155hash_string(const char *str, int len)
156{
157 unsigned int i, hashval = 0;
158
159 if (len == 0) {
160 for(i=1; *str != 0; i++, str++) {
161 hashval += (unsigned char)*str * i;
162 }
163 } else {
164 for(i=len; i > 0; i--, str++) {
165 hashval += (unsigned char)*str * (len - i + 1);
166 }
167 }
168
169 return hashval;
170}
171
172
173
174
1c79356b
A
175/*
176 * Lookup an entry in the cache
177 *
178 * We don't do this if the segment name is long, simply so the cache
179 * can avoid holding long names (which would either waste space, or
180 * add greatly to the complexity).
181 *
182 * Lookup is called with dvp pointing to the directory to search,
183 * cnp pointing to the name of the entry being sought. If the lookup
184 * succeeds, the vnode is returned in *vpp, and a status of -1 is
185 * returned. If the lookup determines that the name does not exist
186 * (negative cacheing), a status of ENOENT is returned. If the lookup
187 * fails, a status of zero is returned.
188 */
189
190int
191cache_lookup(dvp, vpp, cnp)
192 struct vnode *dvp;
193 struct vnode **vpp;
194 struct componentname *cnp;
195{
196 register struct namecache *ncp, *nnp;
197 register struct nchashhead *ncpp;
55e303ae
A
198 register long namelen = cnp->cn_namelen;
199 char *nameptr = cnp->cn_nameptr;
1c79356b
A
200
201 if (!doingcache) {
202 cnp->cn_flags &= ~MAKEENTRY;
203 return (0);
204 }
1c79356b 205
55e303ae 206 ncpp = NCHHASH(dvp, cnp->cn_hash);
1c79356b
A
207 for (ncp = ncpp->lh_first; ncp != 0; ncp = nnp) {
208 nnp = ncp->nc_hash.le_next;
55e303ae 209
1c79356b 210 if (ncp->nc_dvp == dvp &&
55e303ae
A
211 strncmp(ncp->nc_name, nameptr, namelen) == 0 &&
212 ncp->nc_name[namelen] == 0) {
213 /* Make sure the vp isn't stale. */
214 if ((ncp->nc_dvpid != dvp->v_id) ||
215 (ncp->nc_vp && ncp->nc_vpid != ncp->nc_vp->v_id)) {
216 nchstats.ncs_falsehits++;
217 PURGE(ncp);
218 continue;
219 }
1c79356b 220 break;
55e303ae 221 }
1c79356b
A
222 }
223
224 /* We failed to find an entry */
225 if (ncp == 0) {
226 nchstats.ncs_miss++;
227 return (0);
228 }
229
230 /* We don't want to have an entry, so dump it */
231 if ((cnp->cn_flags & MAKEENTRY) == 0) {
232 nchstats.ncs_badhits++;
233 PURGE(ncp);
234 return (0);
235 }
236
237 /* We found a "positive" match, return the vnode */
238 if (ncp->nc_vp) {
55e303ae
A
239 if (ncp->nc_vp->v_flag & (VUINIT|VXLOCK|VTERMINATE|VORECLAIM)) {
240 PURGE(ncp);
241 return (0);
242 }
243
1c79356b
A
244 nchstats.ncs_goodhits++;
245 TOUCH(ncp);
246 *vpp = ncp->nc_vp;
247 return (-1);
248 }
249
250 /* We found a negative match, and want to create it, so purge */
251 if (cnp->cn_nameiop == CREATE) {
252 nchstats.ncs_badhits++;
253 PURGE(ncp);
254 return (0);
255 }
256
257 /*
258 * We found a "negative" match, ENOENT notifies client of this match.
259 * The nc_vpid field records whether this is a whiteout.
260 */
261 nchstats.ncs_neghits++;
262 TOUCH(ncp);
263 cnp->cn_flags |= ncp->nc_vpid;
264 return (ENOENT);
265}
266
267/*
268 * Add an entry to the cache.
269 */
270void
271cache_enter(dvp, vp, cnp)
272 struct vnode *dvp;
273 struct vnode *vp;
274 struct componentname *cnp;
275{
276 register struct namecache *ncp;
277 register struct nchashhead *ncpp;
278
279 if (!doingcache)
280 return;
281
1c79356b
A
282 /*
283 * We allocate a new entry if we are less than the maximum
284 * allowed and the one at the front of the LRU list is in use.
285 * Otherwise we use the one at the front of the LRU list.
286 */
287 if (numcache < desiredvnodes &&
288 ((ncp = nclruhead.tqh_first) == NULL ||
289 ncp->nc_hash.le_prev != 0)) {
290 /* Add one more entry */
291 ncp = (struct namecache *)
292 _MALLOC_ZONE((u_long)sizeof *ncp, M_CACHE, M_WAITOK);
293 numcache++;
294 } else if (ncp = nclruhead.tqh_first) {
295 /* reuse an old entry */
296 TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
297 if (ncp->nc_hash.le_prev != 0) {
298#if DIAGNOSTIC
299 if (ncp->nc_hash.le_next == ncp)
300 panic("cache_enter: le_next");
301#endif
302 LIST_REMOVE(ncp, nc_hash);
55e303ae
A
303 remove_name(ncp->nc_name);
304 ncp->nc_name = NULL;
1c79356b
A
305 ncp->nc_hash.le_prev = 0;
306 }
307 } else {
308 /* give up */
309 return;
310 }
311
312 /*
313 * Fill in cache info, if vp is NULL this is a "negative" cache entry.
314 * For negative entries, we have to record whether it is a whiteout.
315 * the whiteout flag is stored in the nc_vpid field which is
316 * otherwise unused.
317 */
318 ncp->nc_vp = vp;
319 if (vp)
320 ncp->nc_vpid = vp->v_id;
321 else
322 ncp->nc_vpid = cnp->cn_flags & ISWHITEOUT;
323 ncp->nc_dvp = dvp;
324 ncp->nc_dvpid = dvp->v_id;
55e303ae 325 ncp->nc_name = add_name(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, 0);
1c79356b 326 TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
55e303ae 327 ncpp = NCHHASH(dvp, cnp->cn_hash);
1c79356b
A
328#if DIAGNOSTIC
329 {
330 register struct namecache *p;
331
332 for (p = ncpp->lh_first; p != 0; p = p->nc_hash.le_next)
333 if (p == ncp)
334 panic("cache_enter: duplicate");
335 }
336#endif
337 LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
338}
339
340/*
341 * Name cache initialization, from vfs_init() when we are booting
342 */
343void
344nchinit()
345{
55e303ae
A
346 static void init_string_table(void);
347
348 TAILQ_INIT(&nclruhead);
349 nchashtbl = hashinit(MAX(4096, desiredvnodes), M_CACHE, &nchash);
1c79356b 350
55e303ae 351 init_string_table();
1c79356b
A
352}
353
55e303ae
A
354
355int
356resize_namecache(u_int newsize)
357{
358 struct nchashhead *new_table;
359 struct nchashhead *old_table;
360 struct nchashhead *old_head, *head;
361 struct namecache *entry, *next;
362 uint32_t i;
363 u_long new_mask, old_mask;
364
365 // we don't support shrinking yet
366 if (newsize < nchash) {
367 return 0;
368 }
369
370 new_table = hashinit(newsize, M_CACHE, &new_mask);
371 if (new_table == NULL) {
372 return ENOMEM;
373 }
374
375 // do the switch!
376 old_table = nchashtbl;
377 nchashtbl = new_table;
378 old_mask = nchash;
379 nchash = new_mask;
380
381 // walk the old table and insert all the entries into
382 // the new table
383 //
384 for(i=0; i <= old_mask; i++) {
385 old_head = &old_table[i];
386 for (entry=old_head->lh_first; entry != NULL; entry=next) {
387 //
388 // XXXdbg - Beware: this assumes that hash_string() does
389 // the same thing as what happens in
390 // lookup() over in vfs_lookup.c
391 head = NCHHASH(entry->nc_dvp, hash_string(entry->nc_name, 0));
392
393 next = entry->nc_hash.le_next;
394 LIST_INSERT_HEAD(head, entry, nc_hash);
395 }
396 }
397
398 FREE(old_table, M_CACHE);
399
400 return 0;
401}
402
403
404
405
1c79356b
A
406/*
407 * Invalidate a all entries to particular vnode.
408 *
409 * We actually just increment the v_id, that will do it. The entries will
410 * be purged by lookup as they get found. If the v_id wraps around, we
411 * need to ditch the entire cache, to avoid confusion. No valid vnode will
412 * ever have (v_id == 0).
413 */
414void
415cache_purge(vp)
416 struct vnode *vp;
417{
418 struct namecache *ncp;
419 struct nchashhead *ncpp;
420
421 vp->v_id = ++nextvnodeid;
422 if (nextvnodeid != 0)
423 return;
424 for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) {
425 while (ncp = ncpp->lh_first)
426 PURGE(ncp);
427 }
428 vp->v_id = ++nextvnodeid;
429}
430
431/*
432 * Flush all entries referencing a particular filesystem.
433 *
434 * Since we need to check it anyway, we will flush all the invalid
435 * entriess at the same time.
436 */
437void
438cache_purgevfs(mp)
439 struct mount *mp;
440{
441 struct nchashhead *ncpp;
442 struct namecache *ncp, *nnp;
443
444 /* Scan hash tables for applicable entries */
445 for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) {
446 for (ncp = ncpp->lh_first; ncp != 0; ncp = nnp) {
447 nnp = ncp->nc_hash.le_next;
448 if (ncp->nc_dvpid != ncp->nc_dvp->v_id ||
449 (ncp->nc_vp && ncp->nc_vpid != ncp->nc_vp->v_id) ||
450 ncp->nc_dvp->v_mount == mp) {
451 PURGE(ncp);
452 }
453 }
454 }
455}
55e303ae
A
456
457
458
459//
460// String ref routines
461//
462static LIST_HEAD(stringhead, string_t) *string_ref_table;
463static u_long string_table_mask;
464static uint32_t max_chain_len=0;
465static struct stringhead *long_chain_head=NULL;
466static uint32_t filled_buckets=0;
467static uint32_t num_dups=0;
468static uint32_t nstrings=0;
469
470typedef struct string_t {
471 LIST_ENTRY(string_t) hash_chain;
472 unsigned char *str;
473 uint32_t refcount;
474} string_t;
475
476
477
478static int
479resize_string_ref_table()
480{
481 struct stringhead *new_table;
482 struct stringhead *old_table;
483 struct stringhead *old_head, *head;
484 string_t *entry, *next;
485 uint32_t i, hashval;
486 u_long new_mask, old_mask;
487
488 new_table = hashinit((string_table_mask + 1) * 2, M_CACHE, &new_mask);
489 if (new_table == NULL) {
490 return ENOMEM;
491 }
492
493 // do the switch!
494 old_table = string_ref_table;
495 string_ref_table = new_table;
496 old_mask = string_table_mask;
497 string_table_mask = new_mask;
498
499 printf("resize: max chain len %d, new table size %d\n",
500 max_chain_len, new_mask + 1);
501 max_chain_len = 0;
502 long_chain_head = NULL;
503 filled_buckets = 0;
504
505 // walk the old table and insert all the entries into
506 // the new table
507 //
508 for(i=0; i <= old_mask; i++) {
509 old_head = &old_table[i];
510 for (entry=old_head->lh_first; entry != NULL; entry=next) {
511 hashval = hash_string(entry->str, 0);
512 head = &string_ref_table[hashval & string_table_mask];
513 if (head->lh_first == NULL) {
514 filled_buckets++;
515 }
516
517 next = entry->hash_chain.le_next;
518 LIST_INSERT_HEAD(head, entry, hash_chain);
519 }
520 }
521
522 FREE(old_table, M_CACHE);
523
524 return 0;
525}
526
527
528static void
529init_string_table(void)
530{
531 string_ref_table = hashinit(4096, M_CACHE, &string_table_mask);
532}
533
534
535char *
536add_name(const char *name, size_t len, u_int hashval, u_int flags)
537{
538 struct stringhead *head;
539 string_t *entry;
540 int chain_len = 0;
541
542 //
543 // If the table gets more than 3/4 full, resize it
544 //
545 if (4*filled_buckets >= ((string_table_mask + 1) * 3)) {
546 if (resize_string_ref_table() != 0) {
547 printf("failed to resize the hash table.\n");
548 }
549 }
550
551 if (hashval == 0) {
552 hashval = hash_string(name, len);
553 }
554
555 head = &string_ref_table[hashval & string_table_mask];
556 for (entry=head->lh_first; entry != NULL; chain_len++, entry=entry->hash_chain.le_next) {
557 if (strncmp(entry->str, name, len) == 0 && entry->str[len] == '\0') {
558 entry->refcount++;
559 num_dups++;
560 break;
561 }
562 }
563
564 if (entry == NULL) {
565 // it wasn't already there so add it.
566 MALLOC(entry, string_t *, sizeof(string_t) + len + 1, M_TEMP, M_WAITOK);
567
568 // have to get "head" again because we could have blocked
569 // in malloc and thus head could have changed.
570 //
571 head = &string_ref_table[hashval & string_table_mask];
572 if (head->lh_first == NULL) {
573 filled_buckets++;
574 }
575
576 LIST_INSERT_HEAD(head, entry, hash_chain);
577 entry->str = (char *)((char *)entry + sizeof(string_t));
578 strncpy(entry->str, name, len);
579 entry->str[len] = '\0';
580 entry->refcount = 1;
581
582 if (chain_len > max_chain_len) {
583 max_chain_len = chain_len;
584 long_chain_head = head;
585 }
586
587 nstrings++;
588 }
589
590 return entry->str;
591}
592
593int
594remove_name(const char *nameref)
595{
596 struct stringhead *head;
597 string_t *entry;
598 uint32_t hashval;
599
600 hashval = hash_string(nameref, 0);
601 head = &string_ref_table[hashval & string_table_mask];
602 for (entry=head->lh_first; entry != NULL; entry=entry->hash_chain.le_next) {
603 if (entry->str == (unsigned char *)nameref) {
604 entry->refcount--;
605 if (entry->refcount == 0) {
606 LIST_REMOVE(entry, hash_chain);
607 if (head->lh_first == NULL) {
608 filled_buckets--;
609 }
610 entry->str = NULL;
611 nstrings--;
612
613 FREE(entry, M_TEMP);
614 } else {
615 num_dups--;
616 }
617
618 return 0;
619 }
620 }
621
622 return ENOENT;
623}
624
625
626void
627dump_string_table(void)
628{
629 struct stringhead *head;
630 string_t *entry;
631 int i;
632
633 for(i=0; i <= string_table_mask; i++) {
634 head = &string_ref_table[i];
635 for (entry=head->lh_first; entry != NULL; entry=entry->hash_chain.le_next) {
636 printf("%6d - %s\n", entry->refcount, entry->str);
637 }
638 }
639}