]> git.saurik.com Git - apple/xnu.git/blame - bsd/vfs/vfs_cache.c
xnu-4903.241.1.tar.gz
[apple/xnu.git] / bsd / vfs / vfs_cache.c
CommitLineData
1c79356b 1/*
c18c124e 2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29/*
30 * Copyright (c) 1989, 1993, 1995
31 * The Regents of the University of California. All rights reserved.
32 *
33 * This code is derived from software contributed to Berkeley by
34 * Poul-Henning Kamp of the FreeBSD Project.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by the University of
47 * California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 *
65 * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95
66 */
2d21ac55
A
67/*
68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
69 * support for mandatory and extensible security protections. This notice
70 * is included in support of clause 2.2 (b) of the Apple Public License,
71 * Version 2.0.
72 */
1c79356b
A
73#include <sys/param.h>
74#include <sys/systm.h>
75#include <sys/time.h>
91447636
A
76#include <sys/mount_internal.h>
77#include <sys/vnode_internal.h>
39236c6e 78#include <miscfs/specfs/specdev.h>
1c79356b
A
79#include <sys/namei.h>
80#include <sys/errno.h>
81#include <sys/malloc.h>
91447636
A
82#include <sys/kauth.h>
83#include <sys/user.h>
2d21ac55 84#include <sys/paths.h>
d9a64523 85#include <os/overflow.h>
2d21ac55
A
86
87#if CONFIG_MACF
88#include <security/mac_framework.h>
89#endif
1c79356b
A
90
91/*
92 * Name caching works as follows:
93 *
94 * Names found by directory scans are retained in a cache
95 * for future reference. It is managed LRU, so frequently
96 * used names will hang around. Cache is indexed by hash value
97 * obtained from (vp, name) where vp refers to the directory
98 * containing name.
99 *
100 * If it is a "negative" entry, (i.e. for a name that is known NOT to
101 * exist) the vnode pointer will be NULL.
102 *
1c79356b
A
103 * Upon reaching the last segment of a path, if the reference
104 * is for DELETE, or NOCACHE is set (rewrite), and the
105 * name is located in the cache, it will be dropped.
106 */
107
108/*
109 * Structures associated with name cacheing.
110 */
91447636 111
1c79356b 112LIST_HEAD(nchashhead, namecache) *nchashtbl; /* Hash Table */
91447636 113u_long nchashmask;
1c79356b
A
114u_long nchash; /* size of hash table - 1 */
115long numcache; /* number of cache entries allocated */
91447636
A
116int desiredNodes;
117int desiredNegNodes;
0c530ab8 118int ncs_negtotal;
6d2010ae 119int nc_disabled = 0;
91447636
A
120TAILQ_HEAD(, namecache) nchead; /* chain of all name cache entries */
121TAILQ_HEAD(, namecache) neghead; /* chain of only negative cache entries */
0c530ab8
A
122
123
124#if COLLECT_STATS
125
1c79356b 126struct nchstats nchstats; /* cache effectiveness statistics */
91447636 127
0c530ab8
A
128#define NCHSTAT(v) { \
129 nchstats.v++; \
130}
131#define NAME_CACHE_LOCK() name_cache_lock()
132#define NAME_CACHE_UNLOCK() name_cache_unlock()
133#define NAME_CACHE_LOCK_SHARED() name_cache_lock()
134
135#else
136
137#define NCHSTAT(v)
138#define NAME_CACHE_LOCK() name_cache_lock()
139#define NAME_CACHE_UNLOCK() name_cache_unlock()
140#define NAME_CACHE_LOCK_SHARED() name_cache_lock_shared()
141
142#endif
143
144
91447636
A
145/* vars for name cache list lock */
146lck_grp_t * namecache_lck_grp;
147lck_grp_attr_t * namecache_lck_grp_attr;
148lck_attr_t * namecache_lck_attr;
b0d623f7
A
149
150lck_grp_t * strcache_lck_grp;
151lck_grp_attr_t * strcache_lck_grp_attr;
152lck_attr_t * strcache_lck_attr;
153
154lck_rw_t * namecache_rw_lock;
155lck_rw_t * strtable_rw_lock;
156
157#define NUM_STRCACHE_LOCKS 1024
158
159lck_mtx_t strcache_mtx_locks[NUM_STRCACHE_LOCKS];
160
91447636
A
161
162static vnode_t cache_lookup_locked(vnode_t dvp, struct componentname *cnp);
b0d623f7 163static const char *add_name_internal(const char *, uint32_t, u_int, boolean_t, u_int);
39236c6e 164static void init_string_table(void);
91447636 165static void cache_delete(struct namecache *, int);
b0d623f7 166static void cache_enter_locked(vnode_t dvp, vnode_t vp, struct componentname *cnp, const char *strname);
2d21ac55
A
167
168#ifdef DUMP_STRING_TABLE
169/*
170 * Internal dump function used for debugging
171 */
172void dump_string_table(void);
173#endif /* DUMP_STRING_TABLE */
91447636 174
39236c6e 175static void init_crc32(void);
91447636
A
176static unsigned int crc32tab[256];
177
178
179#define NCHHASH(dvp, hash_val) \
180 (&nchashtbl[(dvp->v_id ^ (hash_val)) & nchashmask])
181
39037602
A
182/*
183 * This function tries to check if a directory vp is a subdirectory of dvp
184 * only from valid v_parent pointers. It is called with the name cache lock
185 * held and does not drop the lock anytime inside the function.
186 *
187 * It returns a boolean that indicates whether or not it was able to
188 * successfully infer the parent/descendent relationship via the v_parent
189 * pointers, or if it could not infer such relationship and that the decision
190 * must be delegated to the owning filesystem.
191 *
192 * If it does not defer the decision, i.e. it was successfuly able to determine
193 * the parent/descendent relationship, *is_subdir tells the caller if vp is a
194 * subdirectory of dvp.
195 *
196 * If the decision is deferred, *next_vp is where it stopped i.e. *next_vp
197 * is the vnode whose parent is to be determined from the filesystem.
198 * *is_subdir, in this case, is not indicative of anything and should be
199 * ignored.
200 *
201 * The return value and output args should be used as follows :
202 *
203 * defer = cache_check_vnode_issubdir(vp, dvp, is_subdir, next_vp);
204 * if (!defer) {
205 * if (*is_subdir)
206 * vp is subdirectory;
207 * else
208 * vp is not a subdirectory;
209 * } else {
210 * if (*next_vp)
211 * check this vnode's parent from the filesystem
212 * else
213 * error (likely because of forced unmount).
214 * }
215 *
216 */
217static boolean_t
218cache_check_vnode_issubdir(vnode_t vp, vnode_t dvp, boolean_t *is_subdir,
219 vnode_t *next_vp)
220{
221 vnode_t tvp = vp;
222 int defer = FALSE;
223
224 *is_subdir = FALSE;
225 *next_vp = NULLVP;
226 while (1) {
227 mount_t tmp;
228
229 if (tvp == dvp) {
230 *is_subdir = TRUE;
231 break;
232 } else if (tvp == rootvnode) {
233 /* *is_subdir = FALSE */
234 break;
235 }
236
237 tmp = tvp->v_mount;
238 while ((tvp->v_flag & VROOT) && tmp && tmp->mnt_vnodecovered &&
239 tvp != dvp && tvp != rootvnode) {
240 tvp = tmp->mnt_vnodecovered;
241 tmp = tvp->v_mount;
242 }
243
244 /*
245 * If dvp is not at the top of a mount "stack" then
246 * vp is not a subdirectory of dvp either.
247 */
248 if (tvp == dvp || tvp == rootvnode) {
249 /* *is_subdir = FALSE */
250 break;
251 }
252
253 if (!tmp) {
254 defer = TRUE;
255 *next_vp = NULLVP;
256 break;
257 }
258
259 if ((tvp->v_flag & VISHARDLINK) || !(tvp->v_parent)) {
260 defer = TRUE;
261 *next_vp = tvp;
262 break;
263 }
91447636 264
39037602
A
265 tvp = tvp->v_parent;
266 }
267
268 return (defer);
269}
270
271/* maximum times retry from potentially transient errors in vnode_issubdir */
272#define MAX_ERROR_RETRY 3
273
274/*
275 * This function checks if a given directory (vp) is a subdirectory of dvp.
276 * It walks backwards from vp and if it hits dvp in its parent chain,
277 * it is a subdirectory. If it encounters the root directory, it is not
278 * a subdirectory.
279 *
280 * This function returns an error if it is unsuccessful and 0 on success.
281 *
282 * On entry (and exit) vp has an iocount and if this function has to take
283 * any iocounts on other vnodes in the parent chain traversal, it releases them.
284 */
285int
286vnode_issubdir(vnode_t vp, vnode_t dvp, int *is_subdir, vfs_context_t ctx)
287{
288 vnode_t start_vp, tvp;
289 vnode_t vp_with_iocount;
290 int error = 0;
291 char dotdotbuf[] = "..";
292 int error_retry_count = 0; /* retry count for potentially transient
293 errors */
294
295 *is_subdir = FALSE;
296 tvp = start_vp = vp;
297 /*
298 * Anytime we acquire an iocount in this function, we save the vnode
299 * in this variable and release it before exiting.
300 */
301 vp_with_iocount = NULLVP;
302
303 while (1) {
304 boolean_t defer;
305 vnode_t pvp;
306 uint32_t vid;
307 struct componentname cn;
308 boolean_t is_subdir_locked = FALSE;
309
310 if (tvp == dvp) {
311 *is_subdir = TRUE;
312 break;
313 } else if (tvp == rootvnode) {
314 /* *is_subdir = FALSE */
315 break;
316 }
317
318 NAME_CACHE_LOCK_SHARED();
319
320 defer = cache_check_vnode_issubdir(tvp, dvp, &is_subdir_locked,
321 &tvp);
322
323 if (defer && tvp)
324 vid = vnode_vid(tvp);
325
326 NAME_CACHE_UNLOCK();
327
328 if (!defer) {
329 *is_subdir = is_subdir_locked;
330 break;
331 }
332
333 if (!tvp) {
334 if (error_retry_count++ < MAX_ERROR_RETRY) {
335 tvp = vp;
336 continue;
337 }
338 error = ENOENT;
339 break;
340 }
341
342 if (tvp != start_vp) {
343 if (vp_with_iocount) {
344 vnode_put(vp_with_iocount);
345 vp_with_iocount = NULLVP;
346 }
347
348 error = vnode_getwithvid(tvp, vid);
349 if (error) {
350 if (error_retry_count++ < MAX_ERROR_RETRY) {
351 tvp = vp;
352 error = 0;
353 continue;
354 }
355 break;
356 }
357
358 vp_with_iocount = tvp;
359 }
360
361 bzero(&cn, sizeof(cn));
362 cn.cn_nameiop = LOOKUP;
363 cn.cn_flags = ISLASTCN | ISDOTDOT;
364 cn.cn_context = ctx;
365 cn.cn_pnbuf = &dotdotbuf[0];
366 cn.cn_pnlen = sizeof(dotdotbuf);
367 cn.cn_nameptr = cn.cn_pnbuf;
368 cn.cn_namelen = 2;
369
370 pvp = NULLVP;
371 if ((error = VNOP_LOOKUP(tvp, &pvp, &cn, ctx)))
372 break;
373
374 if (!(tvp->v_flag & VISHARDLINK) && tvp->v_parent != pvp) {
375 (void)vnode_update_identity(tvp, pvp, NULL, 0, 0,
376 VNODE_UPDATE_PARENT);
377 }
378
379 if (vp_with_iocount)
380 vnode_put(vp_with_iocount);
381
382 vp_with_iocount = tvp = pvp;
383 }
384
385 if (vp_with_iocount)
386 vnode_put(vp_with_iocount);
387
388 return (error);
389}
91447636 390
b0d623f7 391/*
5ba3f43e
A
392 * This function builds the path in "buff" from the supplied vnode.
393 * The length of the buffer *INCLUDING* the trailing zero byte is
394 * returned in outlen. NOTE: the length includes the trailing zero
395 * byte and thus the length is one greater than what strlen would
396 * return. This is important and lots of code elsewhere in the kernel
397 * assumes this behavior.
b0d623f7
A
398 *
399 * This function can call vnop in file system if the parent vnode
400 * does not exist or when called for hardlinks via volfs path.
401 * If BUILDPATH_NO_FS_ENTER is set in flags, it only uses values present
402 * in the name cache and does not enter the file system.
403 *
316670eb
A
404 * If BUILDPATH_CHECK_MOVED is set in flags, we return EAGAIN when
405 * we encounter ENOENT during path reconstruction. ENOENT means that
406 * one of the parents moved while we were building the path. The
407 * caller can special handle this case by calling build_path again.
408 *
39236c6e
A
409 * If BUILDPATH_VOLUME_RELATIVE is set in flags, we return path
410 * that is relative to the nearest mount point, i.e. do not
411 * cross over mount points during building the path.
412 *
b0d623f7 413 * passed in vp must have a valid io_count reference
5ba3f43e
A
414 *
415 * If parent vnode is non-NULL it also must have an io count. This
416 * allows build_path_with_parent to be safely called for operations
417 * unlink, rmdir and rename that already have io counts on the target
418 * and the directory. In this way build_path_with_parent does not have
419 * to try and obtain an additional io count on the parent. Taking an
420 * io count ont the parent can lead to dead lock if a forced unmount
421 * occures at the right moment. For a fuller explaination on how this
422 * can occur see the comment for vn_getpath_with_parent.
423 *
b0d623f7 424 */
91447636 425int
5ba3f43e 426build_path_with_parent(vnode_t first_vp, vnode_t parent_vp, char *buff, int buflen, int *outlen, int flags, vfs_context_t ctx)
91447636 427{
b0d623f7
A
428 vnode_t vp, tvp;
429 vnode_t vp_with_iocount;
2d21ac55
A
430 vnode_t proc_root_dir_vp;
431 char *end;
432 const char *str;
433 int len;
434 int ret = 0;
435 int fixhardlink;
436
b0d623f7 437 if (first_vp == NULLVP)
2d21ac55 438 return (EINVAL);
fe8ab488
A
439
440 if (buflen <= 1)
441 return (ENOSPC);
b0d623f7
A
442
443 /*
444 * Grab the process fd so we can evaluate fd_rdir.
445 */
446 if (vfs_context_proc(ctx)->p_fd)
447 proc_root_dir_vp = vfs_context_proc(ctx)->p_fd->fd_rdir;
448 else
449 proc_root_dir_vp = NULL;
450
451 vp_with_iocount = NULLVP;
2d21ac55
A
452again:
453 vp = first_vp;
b0d623f7 454
91447636
A
455 end = &buff[buflen-1];
456 *end = '\0';
457
b0d623f7
A
458 /*
459 * holding the NAME_CACHE_LOCK in shared mode is
460 * sufficient to stabilize both the vp->v_parent chain
461 * and the 'vp->v_mount->mnt_vnodecovered' chain
462 *
463 * if we need to drop this lock, we must first grab the v_id
464 * from the vnode we're currently working with... if that
465 * vnode doesn't already have an io_count reference (the vp
466 * passed in comes with one), we must grab a reference
467 * after we drop the NAME_CACHE_LOCK via vnode_getwithvid...
468 * deadlocks may result if you call vnode_get while holding
469 * the NAME_CACHE_LOCK... we lazily release the reference
470 * we pick up the next time we encounter a need to drop
471 * the NAME_CACHE_LOCK or before we return from this routine
472 */
473 NAME_CACHE_LOCK_SHARED();
474
475 /*
476 * Check if this is the root of a file system.
477 */
2d21ac55
A
478 while (vp && vp->v_flag & VROOT) {
479 if (vp->v_mount == NULL) {
b0d623f7
A
480 ret = EINVAL;
481 goto out_unlock;
2d21ac55
A
482 }
483 if ((vp->v_mount->mnt_flag & MNT_ROOTFS) || (vp == proc_root_dir_vp)) {
484 /*
485 * It's the root of the root file system, so it's
486 * just "/".
487 */
91447636 488 *--end = '/';
b0d623f7
A
489
490 goto out_unlock;
91447636 491 } else {
39236c6e
A
492 /*
493 * This the root of the volume and the caller does not
494 * want to cross mount points. Therefore just return
495 * '/' as the relative path.
496 */
497 if (flags & BUILDPATH_VOLUME_RELATIVE) {
498 *--end = '/';
499 goto out_unlock;
500 } else {
501 vp = vp->v_mount->mnt_vnodecovered;
502 }
91447636
A
503 }
504 }
91447636 505
2d21ac55 506 while ((vp != NULLVP) && (vp->v_parent != vp)) {
b0d623f7
A
507 int vid;
508
91447636 509 /*
2d21ac55
A
510 * For hardlinks the v_name may be stale, so if its OK
511 * to enter a file system, ask the file system for the
512 * name and parent (below).
91447636 513 */
2d21ac55
A
514 fixhardlink = (vp->v_flag & VISHARDLINK) &&
515 (vp->v_mount->mnt_kern_flag & MNTK_PATH_FROM_ID) &&
516 !(flags & BUILDPATH_NO_FS_ENTER);
b0d623f7 517
2d21ac55
A
518 if (!fixhardlink) {
519 str = vp->v_name;
b0d623f7 520
2d21ac55 521 if (str == NULL || *str == '\0') {
b0d623f7 522 if (vp->v_parent != NULL)
2d21ac55 523 ret = EINVAL;
b0d623f7 524 else
2d21ac55 525 ret = ENOENT;
b0d623f7 526 goto out_unlock;
2d21ac55
A
527 }
528 len = strlen(str);
529 /*
530 * Check that there's enough space (including space for the '/')
531 */
532 if ((end - buff) < (len + 1)) {
533 ret = ENOSPC;
b0d623f7 534 goto out_unlock;
2d21ac55 535 }
b0d623f7
A
536 /*
537 * Copy the name backwards.
538 */
2d21ac55
A
539 str += len;
540
b0d623f7 541 for (; len > 0; len--)
2d21ac55 542 *--end = *--str;
b0d623f7
A
543 /*
544 * Add a path separator.
545 */
2d21ac55 546 *--end = '/';
91447636 547 }
91447636 548
91447636 549 /*
2d21ac55 550 * Walk up the parent chain.
91447636 551 */
2d21ac55 552 if (((vp->v_parent != NULLVP) && !fixhardlink) ||
316670eb
A
553 (flags & BUILDPATH_NO_FS_ENTER)) {
554
6d2010ae
A
555 /*
556 * In this if () block we are not allowed to enter the filesystem
557 * to conclusively get the most accurate parent identifier.
558 * As a result, if 'vp' does not identify '/' and it
559 * does not have a valid v_parent, then error out
560 * and disallow further path construction
561 */
562 if ((vp->v_parent == NULLVP) && (rootvnode != vp)) {
c18c124e
A
563 /*
564 * Only '/' is allowed to have a NULL parent
565 * pointer. Upper level callers should ideally
566 * re-drive name lookup on receiving a ENOENT.
567 */
568 ret = ENOENT;
6d2010ae
A
569
570 /* The code below will exit early if 'tvp = vp' == NULL */
571 }
6d2010ae 572 vp = vp->v_parent;
316670eb 573
b0d623f7
A
574 /*
575 * if the vnode we have in hand isn't a directory and it
576 * has a v_parent, then we started with the resource fork
577 * so skip up to avoid getting a duplicate copy of the
578 * file name in the path.
579 */
316670eb 580 if (vp && !vnode_isdir(vp) && vp->v_parent) {
b0d623f7 581 vp = vp->v_parent;
316670eb 582 }
b0d623f7
A
583 } else {
584 /*
585 * No parent, go get it if supported.
586 */
2d21ac55
A
587 struct vnode_attr va;
588 vnode_t dvp;
2d21ac55 589
b0d623f7
A
590 /*
591 * Make sure file system supports obtaining a path from id.
592 */
2d21ac55
A
593 if (!(vp->v_mount->mnt_kern_flag & MNTK_PATH_FROM_ID)) {
594 ret = ENOENT;
b0d623f7 595 goto out_unlock;
2d21ac55 596 }
b0d623f7
A
597 vid = vp->v_id;
598
2d21ac55 599 NAME_CACHE_UNLOCK();
91447636 600
5ba3f43e 601 if (vp != first_vp && vp != parent_vp && vp != vp_with_iocount) {
b0d623f7
A
602 if (vp_with_iocount) {
603 vnode_put(vp_with_iocount);
604 vp_with_iocount = NULLVP;
605 }
606 if (vnode_getwithvid(vp, vid))
607 goto again;
608 vp_with_iocount = vp;
609 }
2d21ac55
A
610 VATTR_INIT(&va);
611 VATTR_WANTED(&va, va_parentid);
b0d623f7 612
2d21ac55
A
613 if (fixhardlink) {
614 VATTR_WANTED(&va, va_name);
615 MALLOC_ZONE(va.va_name, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK);
91447636 616 } else {
2d21ac55
A
617 va.va_name = NULL;
618 }
b0d623f7
A
619 /*
620 * Ask the file system for its parent id and for its name (optional).
621 */
2d21ac55 622 ret = vnode_getattr(vp, &va, ctx);
935ed37a 623
2d21ac55 624 if (fixhardlink) {
935ed37a
A
625 if ((ret == 0) && (VATTR_IS_SUPPORTED(&va, va_name))) {
626 str = va.va_name;
b0d623f7 627 vnode_update_identity(vp, NULL, str, strlen(str), 0, VNODE_UPDATE_NAME);
935ed37a
A
628 } else if (vp->v_name) {
629 str = vp->v_name;
630 ret = 0;
631 } else {
632 ret = ENOENT;
633 goto bad_news;
634 }
635 len = strlen(str);
2d21ac55 636
b0d623f7
A
637 /*
638 * Check that there's enough space.
639 */
935ed37a
A
640 if ((end - buff) < (len + 1)) {
641 ret = ENOSPC;
642 } else {
643 /* Copy the name backwards. */
644 str += len;
b0d623f7 645
935ed37a
A
646 for (; len > 0; len--) {
647 *--end = *--str;
2d21ac55 648 }
b0d623f7
A
649 /*
650 * Add a path separator.
651 */
935ed37a 652 *--end = '/';
2d21ac55 653 }
b0d623f7 654bad_news:
2d21ac55
A
655 FREE_ZONE(va.va_name, MAXPATHLEN, M_NAMEI);
656 }
657 if (ret || !VATTR_IS_SUPPORTED(&va, va_parentid)) {
2d21ac55
A
658 ret = ENOENT;
659 goto out;
660 }
b0d623f7
A
661 /*
662 * Ask the file system for the parent vnode.
663 */
664 if ((ret = VFS_VGET(vp->v_mount, (ino64_t)va.va_parentid, &dvp, ctx)))
2d21ac55 665 goto out;
b0d623f7
A
666
667 if (!fixhardlink && (vp->v_parent != dvp))
2d21ac55 668 vnode_update_identity(vp, dvp, NULL, 0, 0, VNODE_UPDATE_PARENT);
b0d623f7
A
669
670 if (vp_with_iocount)
671 vnode_put(vp_with_iocount);
2d21ac55 672 vp = dvp;
b0d623f7 673 vp_with_iocount = vp;
2d21ac55 674
b0d623f7 675 NAME_CACHE_LOCK_SHARED();
2d21ac55 676
b0d623f7
A
677 /*
678 * if the vnode we have in hand isn't a directory and it
679 * has a v_parent, then we started with the resource fork
680 * so skip up to avoid getting a duplicate copy of the
681 * file name in the path.
682 */
683 if (vp && !vnode_isdir(vp) && vp->v_parent)
684 vp = vp->v_parent;
91447636 685 }
39236c6e 686
39037602
A
687 if (vp && (flags & BUILDPATH_CHECKACCESS)) {
688 vid = vp->v_id;
689
690 NAME_CACHE_UNLOCK();
691
5ba3f43e 692 if (vp != first_vp && vp != parent_vp && vp != vp_with_iocount) {
39037602
A
693 if (vp_with_iocount) {
694 vnode_put(vp_with_iocount);
695 vp_with_iocount = NULLVP;
696 }
697 if (vnode_getwithvid(vp, vid))
698 goto again;
699 vp_with_iocount = vp;
700 }
701 if ((ret = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx)))
702 goto out; /* no peeking */
703
704 NAME_CACHE_LOCK_SHARED();
705 }
706
91447636 707 /*
2d21ac55
A
708 * When a mount point is crossed switch the vp.
709 * Continue until we find the root or we find
710 * a vnode that's not the root of a mounted
711 * file system.
91447636 712 */
b0d623f7
A
713 tvp = vp;
714
715 while (tvp) {
716 if (tvp == proc_root_dir_vp)
717 goto out_unlock; /* encountered the root */
718
719 if (!(tvp->v_flag & VROOT) || !tvp->v_mount)
720 break; /* not the root of a mounted FS */
39236c6e
A
721
722 if (flags & BUILDPATH_VOLUME_RELATIVE) {
723 /* Do not cross over mount points */
724 tvp = NULL;
725 } else {
726 tvp = tvp->v_mount->mnt_vnodecovered;
727 }
b0d623f7
A
728 }
729 if (tvp == NULLVP)
730 goto out_unlock;
731 vp = tvp;
91447636 732 }
b0d623f7 733out_unlock:
0c530ab8 734 NAME_CACHE_UNLOCK();
91447636 735out:
b0d623f7
A
736 if (vp_with_iocount)
737 vnode_put(vp_with_iocount);
738 /*
739 * Slide the name down to the beginning of the buffer.
740 */
91447636 741 memmove(buff, end, &buff[buflen] - end);
b0d623f7
A
742
743 /*
744 * length includes the trailing zero byte
745 */
746 *outlen = &buff[buflen] - end;
91447636 747
316670eb
A
748 /* One of the parents was moved during path reconstruction.
749 * The caller is interested in knowing whether any of the
750 * parents moved via BUILDPATH_CHECK_MOVED, so return EAGAIN.
751 */
752 if ((ret == ENOENT) && (flags & BUILDPATH_CHECK_MOVED)) {
753 ret = EAGAIN;
754 }
755
2d21ac55 756 return (ret);
91447636
A
757}
758
5ba3f43e
A
759int
760build_path(vnode_t first_vp, char *buff, int buflen, int *outlen, int flags, vfs_context_t ctx)
761{
762 return (build_path_with_parent(first_vp, NULL, buff, buflen, outlen, flags, ctx));
763}
1c79356b
A
764
765/*
91447636
A
766 * return NULLVP if vp's parent doesn't
767 * exist, or we can't get a valid iocount
768 * else return the parent of vp
1c79356b 769 */
91447636
A
770vnode_t
771vnode_getparent(vnode_t vp)
772{
773 vnode_t pvp = NULLVP;
774 int pvid;
775
0c530ab8 776 NAME_CACHE_LOCK_SHARED();
91447636
A
777 /*
778 * v_parent is stable behind the name_cache lock
779 * however, the only thing we can really guarantee
780 * is that we've grabbed a valid iocount on the
781 * parent of 'vp' at the time we took the name_cache lock...
782 * once we drop the lock, vp could get re-parented
783 */
784 if ( (pvp = vp->v_parent) != NULLVP ) {
785 pvid = pvp->v_id;
786
0c530ab8 787 NAME_CACHE_UNLOCK();
91447636
A
788
789 if (vnode_getwithvid(pvp, pvid) != 0)
790 pvp = NULL;
791 } else
0c530ab8 792 NAME_CACHE_UNLOCK();
91447636
A
793 return (pvp);
794}
795
2d21ac55 796const char *
91447636
A
797vnode_getname(vnode_t vp)
798{
2d21ac55 799 const char *name = NULL;
b0d623f7
A
800
801 NAME_CACHE_LOCK_SHARED();
91447636
A
802
803 if (vp->v_name)
b0d623f7 804 name = vfs_addname(vp->v_name, strlen(vp->v_name), 0, 0);
0c530ab8 805 NAME_CACHE_UNLOCK();
91447636
A
806
807 return (name);
1c79356b 808}
91447636
A
809
810void
2d21ac55 811vnode_putname(const char *name)
91447636 812{
b0d623f7 813 vfs_removename(name);
91447636
A
814}
815
39236c6e
A
816static const char unknown_vnodename[] = "(unknown vnode name)";
817
818const char *
819vnode_getname_printable(vnode_t vp)
820{
821 const char *name = vnode_getname(vp);
822 if (name != NULL)
823 return name;
824
825 switch (vp->v_type) {
826 case VCHR:
827 case VBLK:
828 {
829 /*
830 * Create an artificial dev name from
831 * major and minor device number
832 */
833 char dev_name[64];
834 (void) snprintf(dev_name, sizeof(dev_name),
835 "%c(%u, %u)", VCHR == vp->v_type ? 'c':'b',
836 major(vp->v_rdev), minor(vp->v_rdev));
837 /*
838 * Add the newly created dev name to the name
839 * cache to allow easier cleanup. Also,
840 * vfs_addname allocates memory for the new name
841 * and returns it.
842 */
843 NAME_CACHE_LOCK_SHARED();
844 name = vfs_addname(dev_name, strlen(dev_name), 0, 0);
845 NAME_CACHE_UNLOCK();
846 return name;
847 }
848 default:
849 return unknown_vnodename;
850 }
851}
852
853void
854vnode_putname_printable(const char *name)
855{
856 if (name == unknown_vnodename)
857 return;
858 vnode_putname(name);
859}
860
91447636
A
861
862/*
863 * if VNODE_UPDATE_PARENT, and we can take
864 * a reference on dvp, then update vp with
865 * it's new parent... if vp already has a parent,
866 * then drop the reference vp held on it
867 *
868 * if VNODE_UPDATE_NAME,
869 * then drop string ref on v_name if it exists, and if name is non-NULL
870 * then pick up a string reference on name and record it in v_name...
871 * optionally pass in the length and hashval of name if known
872 *
873 * if VNODE_UPDATE_CACHE, flush the name cache entries associated with vp
874 */
875void
b0d623f7 876vnode_update_identity(vnode_t vp, vnode_t dvp, const char *name, int name_len, uint32_t name_hashval, int flags)
91447636
A
877{
878 struct namecache *ncp;
879 vnode_t old_parentvp = NULLVP;
2d21ac55
A
880 int isstream = (vp->v_flag & VISNAMEDSTREAM);
881 int kusecountbumped = 0;
b0d623f7
A
882 kauth_cred_t tcred = NULL;
883 const char *vname = NULL;
884 const char *tname = NULL;
91447636
A
885
886 if (flags & VNODE_UPDATE_PARENT) {
2d21ac55
A
887 if (dvp && vnode_ref(dvp) != 0) {
888 dvp = NULLVP;
889 }
2d21ac55
A
890 /* Don't count a stream's parent ref during unmounts */
891 if (isstream && dvp && (dvp != vp) && (dvp != vp->v_parent) && (dvp->v_type == VREG)) {
892 vnode_lock_spin(dvp);
893 ++dvp->v_kusecount;
894 kusecountbumped = 1;
895 vnode_unlock(dvp);
896 }
2d21ac55 897 } else {
91447636 898 dvp = NULLVP;
2d21ac55 899 }
b0d623f7
A
900 if ( (flags & VNODE_UPDATE_NAME) ) {
901 if (name != vp->v_name) {
902 if (name && *name) {
903 if (name_len == 0)
904 name_len = strlen(name);
905 tname = vfs_addname(name, name_len, name_hashval, 0);
906 }
907 } else
908 flags &= ~VNODE_UPDATE_NAME;
909 }
910 if ( (flags & (VNODE_UPDATE_PURGE | VNODE_UPDATE_PARENT | VNODE_UPDATE_CACHE | VNODE_UPDATE_NAME)) ) {
91447636 911
b0d623f7
A
912 NAME_CACHE_LOCK();
913
914 if ( (flags & VNODE_UPDATE_PURGE) ) {
915
916 if (vp->v_parent)
917 vp->v_parent->v_nc_generation++;
918
919 while ( (ncp = LIST_FIRST(&vp->v_nclinks)) )
920 cache_delete(ncp, 1);
921
39037602 922 while ( (ncp = TAILQ_FIRST(&vp->v_ncchildren)) )
b0d623f7
A
923 cache_delete(ncp, 1);
924
925 /*
926 * Use a temp variable to avoid kauth_cred_unref() while NAME_CACHE_LOCK is held
927 */
928 tcred = vp->v_cred;
929 vp->v_cred = NOCRED;
930 vp->v_authorized_actions = 0;
813fb2f6 931 vp->v_cred_timestamp = 0;
91447636 932 }
b0d623f7
A
933 if ( (flags & VNODE_UPDATE_NAME) ) {
934 vname = vp->v_name;
935 vp->v_name = tname;
91447636 936 }
b0d623f7
A
937 if (flags & VNODE_UPDATE_PARENT) {
938 if (dvp != vp && dvp != vp->v_parent) {
939 old_parentvp = vp->v_parent;
940 vp->v_parent = dvp;
941 dvp = NULLVP;
91447636 942
b0d623f7
A
943 if (old_parentvp)
944 flags |= VNODE_UPDATE_CACHE;
945 }
91447636 946 }
b0d623f7
A
947 if (flags & VNODE_UPDATE_CACHE) {
948 while ( (ncp = LIST_FIRST(&vp->v_nclinks)) )
949 cache_delete(ncp, 1);
950 }
951 NAME_CACHE_UNLOCK();
91447636 952
b0d623f7
A
953 if (vname != NULL)
954 vfs_removename(vname);
955
956 if (IS_VALID_CRED(tcred))
957 kauth_cred_unref(&tcred);
958 }
2d21ac55 959 if (dvp != NULLVP) {
2d21ac55
A
960 /* Back-out the ref we took if we lost a race for vp->v_parent. */
961 if (kusecountbumped) {
962 vnode_lock_spin(dvp);
963 if (dvp->v_kusecount > 0)
964 --dvp->v_kusecount;
965 vnode_unlock(dvp);
966 }
91447636 967 vnode_rele(dvp);
2d21ac55 968 }
91447636
A
969 if (old_parentvp) {
970 struct uthread *ut;
971
2d21ac55
A
972 if (isstream) {
973 vnode_lock_spin(old_parentvp);
974 if ((old_parentvp->v_type != VDIR) && (old_parentvp->v_kusecount > 0))
975 --old_parentvp->v_kusecount;
976 vnode_unlock(old_parentvp);
977 }
91447636
A
978 ut = get_bsdthread_info(current_thread());
979
980 /*
981 * indicated to vnode_rele that it shouldn't do a
982 * vnode_reclaim at this time... instead it will
983 * chain the vnode to the uu_vreclaims list...
984 * we'll be responsible for calling vnode_reclaim
985 * on each of the vnodes in this list...
986 */
987 ut->uu_defer_reclaims = 1;
988 ut->uu_vreclaims = NULLVP;
989
990 while ( (vp = old_parentvp) != NULLVP ) {
991
b0d623f7 992 vnode_lock_spin(vp);
91447636
A
993 vnode_rele_internal(vp, 0, 0, 1);
994
995 /*
996 * check to see if the vnode is now in the state
997 * that would have triggered a vnode_reclaim in vnode_rele
998 * if it is, we save it's parent pointer and then NULL
999 * out the v_parent field... we'll drop the reference
1000 * that was held on the next iteration of this loop...
1001 * this short circuits a potential deep recursion if we
1002 * have a long chain of parents in this state...
1003 * we'll sit in this loop until we run into
1004 * a parent in this chain that is not in this state
1005 *
b0d623f7 1006 * make our check and the vnode_rele atomic
91447636
A
1007 * with respect to the current vnode we're working on
1008 * by holding the vnode lock
1009 * if vnode_rele deferred the vnode_reclaim and has put
1010 * this vnode on the list to be reaped by us, than
1011 * it has left this vnode with an iocount == 1
1012 */
1013 if ( (vp->v_iocount == 1) && (vp->v_usecount == 0) &&
1014 ((vp->v_lflag & (VL_MARKTERM | VL_TERMINATE | VL_DEAD)) == VL_MARKTERM)) {
1015 /*
1016 * vnode_rele wanted to do a vnode_reclaim on this vnode
1017 * it should be sitting on the head of the uu_vreclaims chain
1018 * pull the parent pointer now so that when we do the
1019 * vnode_reclaim for each of the vnodes in the uu_vreclaims
1020 * list, we won't recurse back through here
2d21ac55
A
1021 *
1022 * need to do a convert here in case vnode_rele_internal
1023 * returns with the lock held in the spin mode... it
1024 * can drop and retake the lock under certain circumstances
91447636 1025 */
2d21ac55
A
1026 vnode_lock_convert(vp);
1027
0c530ab8 1028 NAME_CACHE_LOCK();
91447636
A
1029 old_parentvp = vp->v_parent;
1030 vp->v_parent = NULLVP;
0c530ab8 1031 NAME_CACHE_UNLOCK();
91447636
A
1032 } else {
1033 /*
1034 * we're done... we ran into a vnode that isn't
1035 * being terminated
1036 */
1037 old_parentvp = NULLVP;
1038 }
1039 vnode_unlock(vp);
1040 }
1041 ut->uu_defer_reclaims = 0;
1042
1043 while ( (vp = ut->uu_vreclaims) != NULLVP) {
1044 ut->uu_vreclaims = vp->v_defer_reclaimlist;
1045
1046 /*
1047 * vnode_put will drive the vnode_reclaim if
1048 * we are still the only reference on this vnode
1049 */
1050 vnode_put(vp);
1051 }
1052 }
1c79356b 1053}
91447636 1054
1c79356b
A
1055
1056/*
91447636
A
1057 * Mark a vnode as having multiple hard links. HFS makes use of this
1058 * because it keeps track of each link separately, and wants to know
1059 * which link was actually used.
1060 *
1061 * This will cause the name cache to force a VNOP_LOOKUP on the vnode
1062 * so that HFS can post-process the lookup. Also, volfs will call
1063 * VNOP_GETATTR2 to determine the parent, instead of using v_parent.
1c79356b 1064 */
2d21ac55 1065void vnode_setmultipath(vnode_t vp)
91447636 1066{
2d21ac55 1067 vnode_lock_spin(vp);
91447636
A
1068
1069 /*
1070 * In theory, we're changing the vnode's identity as far as the
1071 * name cache is concerned, so we ought to grab the name cache lock
1072 * here. However, there is already a race, and grabbing the name
1073 * cache lock only makes the race window slightly smaller.
1074 *
1075 * The race happens because the vnode already exists in the name
1076 * cache, and could be found by one thread before another thread
1077 * can set the hard link flag.
1078 */
1079
1080 vp->v_flag |= VISHARDLINK;
1081
1082 vnode_unlock(vp);
1083}
1084
1085
2d21ac55
A
1086
1087/*
1088 * backwards compatibility
1089 */
91447636
A
1090void vnode_uncache_credentials(vnode_t vp)
1091{
2d21ac55 1092 vnode_uncache_authorized_action(vp, KAUTH_INVALIDATE_CACHED_RIGHTS);
91447636
A
1093}
1094
1095
2d21ac55
A
1096/*
1097 * use the exclusive form of NAME_CACHE_LOCK to protect the update of the
1098 * following fields in the vnode: v_cred_timestamp, v_cred, v_authorized_actions
1099 * we use this lock so that we can look at the v_cred and v_authorized_actions
1100 * atomically while behind the NAME_CACHE_LOCK in shared mode in 'cache_lookup_path',
1101 * which is the super-hot path... if we are updating the authorized actions for this
1102 * vnode, we are already in the super-slow and far less frequented path so its not
1103 * that bad that we take the lock exclusive for this case... of course we strive
1104 * to hold it for the minimum amount of time possible
1105 */
91447636 1106
2d21ac55
A
1107void vnode_uncache_authorized_action(vnode_t vp, kauth_action_t action)
1108{
1109 kauth_cred_t tcred = NOCRED;
91447636 1110
2d21ac55 1111 NAME_CACHE_LOCK();
91447636 1112
2d21ac55 1113 vp->v_authorized_actions &= ~action;
91447636 1114
2d21ac55
A
1115 if (action == KAUTH_INVALIDATE_CACHED_RIGHTS &&
1116 IS_VALID_CRED(vp->v_cred)) {
1117 /*
1118 * Use a temp variable to avoid kauth_cred_unref() while NAME_CACHE_LOCK is held
1119 */
1120 tcred = vp->v_cred;
1121 vp->v_cred = NOCRED;
91447636 1122 }
2d21ac55 1123 NAME_CACHE_UNLOCK();
0c530ab8 1124
2d21ac55 1125 if (tcred != NOCRED)
0c530ab8 1126 kauth_cred_unref(&tcred);
91447636
A
1127}
1128
2d21ac55 1129
6d2010ae
A
1130extern int bootarg_vnode_cache_defeat; /* default = 0, from bsd_init.c */
1131
1132boolean_t
1133vnode_cache_is_authorized(vnode_t vp, vfs_context_t ctx, kauth_action_t action)
91447636 1134{
91447636 1135 kauth_cred_t ucred;
2d21ac55 1136 boolean_t retval = FALSE;
91447636 1137
6d2010ae
A
1138 /* Boot argument to defeat rights caching */
1139 if (bootarg_vnode_cache_defeat)
1140 return FALSE;
1141
2d21ac55
A
1142 if ( (vp->v_mount->mnt_kern_flag & (MNTK_AUTH_OPAQUE | MNTK_AUTH_CACHE_TTL)) ) {
1143 /*
1144 * a TTL is enabled on the rights cache... handle it here
1145 * a TTL of 0 indicates that no rights should be cached
1146 */
1147 if (vp->v_mount->mnt_authcache_ttl) {
1148 if ( !(vp->v_mount->mnt_kern_flag & MNTK_AUTH_CACHE_TTL) ) {
1149 /*
1150 * For filesystems marked only MNTK_AUTH_OPAQUE (generally network ones),
1151 * we will only allow a SEARCH right on a directory to be cached...
1152 * that cached right always has a default TTL associated with it
1153 */
1154 if (action != KAUTH_VNODE_SEARCH || vp->v_type != VDIR)
1155 vp = NULLVP;
1156 }
1157 if (vp != NULLVP && vnode_cache_is_stale(vp) == TRUE) {
1158 vnode_uncache_authorized_action(vp, vp->v_authorized_actions);
1159 vp = NULLVP;
1160 }
1161 } else
1162 vp = NULLVP;
1163 }
1164 if (vp != NULLVP) {
1165 ucred = vfs_context_ucred(ctx);
91447636 1166
2d21ac55 1167 NAME_CACHE_LOCK_SHARED();
91447636 1168
2d21ac55
A
1169 if (vp->v_cred == ucred && (vp->v_authorized_actions & action) == action)
1170 retval = TRUE;
1171
1172 NAME_CACHE_UNLOCK();
91447636 1173 }
2d21ac55
A
1174 return retval;
1175}
91447636 1176
2d21ac55
A
1177
1178void vnode_cache_authorized_action(vnode_t vp, vfs_context_t ctx, kauth_action_t action)
1179{
1180 kauth_cred_t tcred = NOCRED;
1181 kauth_cred_t ucred;
1182 struct timeval tv;
1183 boolean_t ttl_active = FALSE;
1184
1185 ucred = vfs_context_ucred(ctx);
1186
1187 if (!IS_VALID_CRED(ucred) || action == 0)
1188 return;
1189
1190 if ( (vp->v_mount->mnt_kern_flag & (MNTK_AUTH_OPAQUE | MNTK_AUTH_CACHE_TTL)) ) {
1191 /*
1192 * a TTL is enabled on the rights cache... handle it here
1193 * a TTL of 0 indicates that no rights should be cached
91447636 1194 */
2d21ac55
A
1195 if (vp->v_mount->mnt_authcache_ttl == 0)
1196 return;
91447636 1197
2d21ac55
A
1198 if ( !(vp->v_mount->mnt_kern_flag & MNTK_AUTH_CACHE_TTL) ) {
1199 /*
1200 * only cache SEARCH action for filesystems marked
1201 * MNTK_AUTH_OPAQUE on VDIRs...
1202 * the lookup_path code will time these out
1203 */
1204 if ( (action & ~KAUTH_VNODE_SEARCH) || vp->v_type != VDIR )
1205 return;
91447636 1206 }
2d21ac55 1207 ttl_active = TRUE;
91447636 1208
2d21ac55
A
1209 microuptime(&tv);
1210 }
1211 NAME_CACHE_LOCK();
91447636 1212
2d21ac55
A
1213 if (vp->v_cred != ucred) {
1214 kauth_cred_ref(ucred);
1215 /*
1216 * Use a temp variable to avoid kauth_cred_unref() while NAME_CACHE_LOCK is held
1217 */
1218 tcred = vp->v_cred;
1219 vp->v_cred = ucred;
1220 vp->v_authorized_actions = 0;
1221 }
1222 if (ttl_active == TRUE && vp->v_authorized_actions == 0) {
1223 /*
1224 * only reset the timestamnp on the
1225 * first authorization cached after the previous
1226 * timer has expired or we're switching creds...
1227 * 'vnode_cache_is_authorized' will clear the
1228 * authorized actions if the TTL is active and
1229 * it has expired
1230 */
1231 vp->v_cred_timestamp = tv.tv_sec;
1232 }
1233 vp->v_authorized_actions |= action;
91447636 1234
0c530ab8 1235 NAME_CACHE_UNLOCK();
91447636 1236
2d21ac55
A
1237 if (IS_VALID_CRED(tcred))
1238 kauth_cred_unref(&tcred);
91447636
A
1239}
1240
2d21ac55
A
1241
1242boolean_t vnode_cache_is_stale(vnode_t vp)
1243{
1244 struct timeval tv;
1245 boolean_t retval;
1246
1247 microuptime(&tv);
1248
1249 if ((tv.tv_sec - vp->v_cred_timestamp) > vp->v_mount->mnt_authcache_ttl)
1250 retval = TRUE;
1251 else
1252 retval = FALSE;
1253
1254 return retval;
1255}
1256
1257
1258
1259/*
1260 * Returns: 0 Success
4a3eedf9
A
1261 * ERECYCLE vnode was recycled from underneath us. Force lookup to be re-driven from namei.
1262 * This errno value should not be seen by anyone outside of the kernel.
2d21ac55 1263 */
91447636 1264int
4a3eedf9 1265cache_lookup_path(struct nameidata *ndp, struct componentname *cnp, vnode_t dp,
6d2010ae 1266 vfs_context_t ctx, int *dp_authorized, vnode_t last_dp)
91447636
A
1267{
1268 char *cp; /* pointer into pathname argument */
2d21ac55
A
1269 int vid;
1270 int vvid = 0; /* protected by vp != NULLVP */
91447636
A
1271 vnode_t vp = NULLVP;
1272 vnode_t tdp = NULLVP;
1273 kauth_cred_t ucred;
2d21ac55
A
1274 boolean_t ttl_enabled = FALSE;
1275 struct timeval tv;
b0d623f7 1276 mount_t mp;
91447636 1277 unsigned int hash;
4a3eedf9 1278 int error = 0;
39037602 1279 boolean_t dotdotchecked = FALSE;
91447636 1280
6d2010ae
A
1281#if CONFIG_TRIGGERS
1282 vnode_t trigger_vp;
1283#endif /* CONFIG_TRIGGERS */
1284
2d21ac55 1285 ucred = vfs_context_ucred(ctx);
6d2010ae 1286 ndp->ni_flag &= ~(NAMEI_TRAILINGSLASH);
91447636 1287
0c530ab8 1288 NAME_CACHE_LOCK_SHARED();
91447636 1289
2d21ac55
A
1290 if ( dp->v_mount && (dp->v_mount->mnt_kern_flag & (MNTK_AUTH_OPAQUE | MNTK_AUTH_CACHE_TTL)) ) {
1291 ttl_enabled = TRUE;
91447636
A
1292 microuptime(&tv);
1293 }
1294 for (;;) {
39236c6e 1295 /*
91447636
A
1296 * Search a directory.
1297 *
1298 * The cn_hash value is for use by cache_lookup
1299 * The last component of the filename is left accessible via
1300 * cnp->cn_nameptr for callers that need the name.
1301 */
1302 hash = 0;
1303 cp = cnp->cn_nameptr;
1304
1305 while (*cp && (*cp != '/')) {
b0d623f7 1306 hash = crc32tab[((hash >> 24) ^ (unsigned char)*cp++)] ^ hash << 8;
91447636
A
1307 }
1308 /*
1309 * the crc generator can legitimately generate
1310 * a 0... however, 0 for us means that we
1311 * haven't computed a hash, so use 1 instead
1312 */
1313 if (hash == 0)
1314 hash = 1;
1315 cnp->cn_hash = hash;
1316 cnp->cn_namelen = cp - cnp->cn_nameptr;
1317
1318 ndp->ni_pathlen -= cnp->cn_namelen;
1319 ndp->ni_next = cp;
1320
1321 /*
1322 * Replace multiple slashes by a single slash and trailing slashes
1323 * by a null. This must be done before VNOP_LOOKUP() because some
1324 * fs's don't know about trailing slashes. Remember if there were
1325 * trailing slashes to handle symlinks, existing non-directories
1326 * and non-existing files that won't be directories specially later.
1327 */
1328 while (*cp == '/' && (cp[1] == '/' || cp[1] == '\0')) {
1329 cp++;
1330 ndp->ni_pathlen--;
1331
1332 if (*cp == '\0') {
6d2010ae 1333 ndp->ni_flag |= NAMEI_TRAILINGSLASH;
91447636
A
1334 *ndp->ni_next = '\0';
1335 }
1336 }
1337 ndp->ni_next = cp;
1338
1339 cnp->cn_flags &= ~(MAKEENTRY | ISLASTCN | ISDOTDOT);
1340
1341 if (*cp == '\0')
1342 cnp->cn_flags |= ISLASTCN;
1343
1344 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.')
1345 cnp->cn_flags |= ISDOTDOT;
1346
1347 *dp_authorized = 0;
2d21ac55
A
1348#if NAMEDRSRCFORK
1349 /*
1350 * Process a request for a file's resource fork.
1351 *
1352 * Consume the _PATH_RSRCFORKSPEC suffix and tag the path.
1353 */
1354 if ((ndp->ni_pathlen == sizeof(_PATH_RSRCFORKSPEC)) &&
1355 (cp[1] == '.' && cp[2] == '.') &&
1356 bcmp(cp, _PATH_RSRCFORKSPEC, sizeof(_PATH_RSRCFORKSPEC)) == 0) {
1357 /* Skip volfs file systems that don't support native streams. */
1358 if ((dp->v_mount != NULL) &&
1359 (dp->v_mount->mnt_flag & MNT_DOVOLFS) &&
1360 (dp->v_mount->mnt_kern_flag & MNTK_NAMED_STREAMS) == 0) {
1361 goto skiprsrcfork;
1362 }
1363 cnp->cn_flags |= CN_WANTSRSRCFORK;
1364 cnp->cn_flags |= ISLASTCN;
1365 ndp->ni_next[0] = '\0';
1366 ndp->ni_pathlen = 1;
1367 }
1368skiprsrcfork:
1369#endif
91447636 1370
2d21ac55
A
1371#if CONFIG_MACF
1372
5ba3f43e 1373 /*
2d21ac55
A
1374 * Name cache provides authorization caching (see below)
1375 * that will short circuit MAC checks in lookup().
1376 * We must perform MAC check here. On denial
1377 * dp_authorized will remain 0 and second check will
1378 * be perfomed in lookup().
1379 */
1380 if (!(cnp->cn_flags & DONOTAUTH)) {
1381 error = mac_vnode_check_lookup(ctx, dp, cnp);
1382 if (error) {
b0d623f7 1383 NAME_CACHE_UNLOCK();
4a3eedf9 1384 goto errorout;
2d21ac55
A
1385 }
1386 }
1387#endif /* MAC */
813fb2f6
A
1388 if (ttl_enabled &&
1389 (dp->v_mount->mnt_authcache_ttl == 0 ||
1390 ((tv.tv_sec - dp->v_cred_timestamp) > dp->v_mount->mnt_authcache_ttl))) {
91447636 1391 break;
813fb2f6 1392 }
91447636 1393
2d21ac55
A
1394 /*
1395 * NAME_CACHE_LOCK holds these fields stable
490019cf
A
1396 *
1397 * We can't cache KAUTH_VNODE_SEARCHBYANYONE for root correctly
39037602 1398 * so we make an ugly check for root here. root is always
490019cf
A
1399 * allowed and breaking out of here only to find out that is
1400 * authorized by virtue of being root is very very expensive.
813fb2f6
A
1401 * However, the check for not root is valid only for filesystems
1402 * which use local authorization.
1403 *
1404 * XXX: Remove the check for root when we can reliably set
1405 * KAUTH_VNODE_SEARCHBYANYONE as root.
2d21ac55
A
1406 */
1407 if ((dp->v_cred != ucred || !(dp->v_authorized_actions & KAUTH_VNODE_SEARCH)) &&
490019cf 1408 !(dp->v_authorized_actions & KAUTH_VNODE_SEARCHBYANYONE) &&
813fb2f6 1409 (ttl_enabled || !vfs_context_issuser(ctx))) {
91447636 1410 break;
813fb2f6 1411 }
2d21ac55 1412
91447636
A
1413 /*
1414 * indicate that we're allowed to traverse this directory...
1415 * even if we fail the cache lookup or decide to bail for
1416 * some other reason, this information is valid and is used
1417 * to avoid doing a vnode_authorize before the call to VNOP_LOOKUP
1418 */
1419 *dp_authorized = 1;
1420
1421 if ( (cnp->cn_flags & (ISLASTCN | ISDOTDOT)) ) {
6d2010ae
A
1422 if (cnp->cn_nameiop != LOOKUP)
1423 break;
1424 if (cnp->cn_flags & LOCKPARENT)
1425 break;
1426 if (cnp->cn_flags & NOCACHE)
1427 break;
743b1565 1428 if (cnp->cn_flags & ISDOTDOT) {
2d21ac55
A
1429 /*
1430 * Force directory hardlinks to go to
1431 * file system for ".." requests.
1432 */
d9a64523 1433 if ((dp->v_flag & VISHARDLINK)) {
2d21ac55
A
1434 break;
1435 }
743b1565
A
1436 /*
1437 * Quit here only if we can't use
1438 * the parent directory pointer or
1439 * don't have one. Otherwise, we'll
1440 * use it below.
1441 */
6601e61a
A
1442 if ((dp->v_flag & VROOT) ||
1443 dp == ndp->ni_rootdir ||
743b1565
A
1444 dp->v_parent == NULLVP)
1445 break;
1446 }
1447 }
1448
39236c6e
A
1449 if ((cnp->cn_flags & CN_SKIPNAMECACHE)) {
1450 /*
1451 * Force lookup to go to the filesystem with
1452 * all cnp fields set up.
1453 */
1454 break;
1455 }
1456
743b1565
A
1457 /*
1458 * "." and ".." aren't supposed to be cached, so check
1459 * for them before checking the cache.
1460 */
1461 if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.')
1462 vp = dp;
39037602
A
1463 else if ( (cnp->cn_flags & ISDOTDOT) ) {
1464 /*
1465 * If this is a chrooted process, we need to check if
1466 * the process is trying to break out of its chrooted
1467 * jail. We do that by trying to determine if dp is
1468 * a subdirectory of ndp->ni_rootdir. If we aren't
1469 * able to determine that by the v_parent pointers, we
1470 * will leave the fast path.
1471 *
1472 * Since this function may see dotdot components
1473 * many times and it has the name cache lock held for
1474 * the entire duration, we optimise this by doing this
1475 * check only once per cache_lookup_path call.
1476 * If dotdotchecked is set, it means we've done this
1477 * check once already and don't need to do it again.
1478 */
1479 if (!dotdotchecked && (ndp->ni_rootdir != rootvnode)) {
1480 vnode_t tvp = dp;
1481 boolean_t defer = FALSE;
1482 boolean_t is_subdir = FALSE;
1483
1484 defer = cache_check_vnode_issubdir(tvp,
1485 ndp->ni_rootdir, &is_subdir, &tvp);
1486
1487 if (defer) {
1488 /* defer to Filesystem */
1489 break;
1490 } else if (!is_subdir) {
1491 /*
1492 * This process is trying to break out
1493 * of its chrooted jail, so all its
1494 * dotdot accesses will be translated to
1495 * its root directory.
1496 */
1497 vp = ndp->ni_rootdir;
1498 } else {
1499 /*
1500 * All good, let this dotdot access
1501 * proceed normally
1502 */
1503 vp = dp->v_parent;
1504 }
1505 dotdotchecked = TRUE;
1506 } else {
1507 vp = dp->v_parent;
1508 }
1509 } else {
743b1565
A
1510 if ( (vp = cache_lookup_locked(dp, cnp)) == NULLVP)
1511 break;
91447636 1512
b0d623f7
A
1513 if ( (vp->v_flag & VISHARDLINK) ) {
1514 /*
1515 * The file system wants a VNOP_LOOKUP on this vnode
1516 */
1517 vp = NULL;
1518 break;
1519 }
1520 }
91447636
A
1521 if ( (cnp->cn_flags & ISLASTCN) )
1522 break;
1523
1524 if (vp->v_type != VDIR) {
1525 if (vp->v_type != VLNK)
1526 vp = NULL;
1527 break;
1528 }
6d2010ae 1529
2d21ac55 1530 if ( (mp = vp->v_mountedhere) && ((cnp->cn_flags & NOCROSSMOUNT) == 0)) {
3e170ce0
A
1531 vnode_t tmp_vp = mp->mnt_realrootvp;
1532 if (tmp_vp == NULLVP || mp->mnt_generation != mount_generation ||
1533 mp->mnt_realrootvp_vid != tmp_vp->v_id)
1534 break;
1535 vp = tmp_vp;
2d21ac55 1536 }
6d2010ae
A
1537
1538#if CONFIG_TRIGGERS
1539 /*
1540 * After traversing all mountpoints stacked here, if we have a
1541 * trigger in hand, resolve it. Note that we don't need to
1542 * leave the fast path if the mount has already happened.
1543 */
3e170ce0 1544 if (vp->v_resolve)
6d2010ae 1545 break;
6d2010ae
A
1546#endif /* CONFIG_TRIGGERS */
1547
1548
91447636
A
1549 dp = vp;
1550 vp = NULLVP;
1551
1552 cnp->cn_nameptr = ndp->ni_next + 1;
1553 ndp->ni_pathlen--;
1554 while (*cnp->cn_nameptr == '/') {
1555 cnp->cn_nameptr++;
1556 ndp->ni_pathlen--;
1557 }
1558 }
1559 if (vp != NULLVP)
1560 vvid = vp->v_id;
1561 vid = dp->v_id;
1562
0c530ab8 1563 NAME_CACHE_UNLOCK();
91447636 1564
91447636
A
1565 if ((vp != NULLVP) && (vp->v_type != VLNK) &&
1566 ((cnp->cn_flags & (ISLASTCN | LOCKPARENT | WANTPARENT | SAVESTART)) == ISLASTCN)) {
1567 /*
1568 * if we've got a child and it's the last component, and
1569 * the lookup doesn't need to return the parent then we
1570 * can skip grabbing an iocount on the parent, since all
1571 * we're going to do with it is a vnode_put just before
1572 * we return from 'lookup'. If it's a symbolic link,
1573 * we need the parent in case the link happens to be
1574 * a relative pathname.
1575 */
1576 tdp = dp;
1577 dp = NULLVP;
1578 } else {
1579need_dp:
4a3eedf9 1580 /*
91447636 1581 * return the last directory we looked at
4a3eedf9
A
1582 * with an io reference held. If it was the one passed
1583 * in as a result of the last iteration of VNOP_LOOKUP,
1584 * it should already hold an io ref. No need to increase ref.
91447636 1585 */
4a3eedf9
A
1586 if (last_dp != dp){
1587
1588 if (dp == ndp->ni_usedvp) {
1589 /*
1590 * if this vnode matches the one passed in via USEDVP
1591 * than this context already holds an io_count... just
1592 * use vnode_get to get an extra ref for lookup to play
1593 * with... can't use the getwithvid variant here because
1594 * it will block behind a vnode_drain which would result
1595 * in a deadlock (since we already own an io_count that the
1596 * vnode_drain is waiting on)... vnode_get grabs the io_count
1597 * immediately w/o waiting... it always succeeds
1598 */
1599 vnode_get(dp);
39236c6e 1600 } else if ((error = vnode_getwithvid_drainok(dp, vid))) {
4a3eedf9
A
1601 /*
1602 * failure indicates the vnode
1603 * changed identity or is being
1604 * TERMINATED... in either case
1605 * punt this lookup.
1606 *
1607 * don't necessarily return ENOENT, though, because
1608 * we really want to go back to disk and make sure it's
1609 * there or not if someone else is changing this
39236c6e
A
1610 * vnode. That being said, the one case where we do want
1611 * to return ENOENT is when the vnode's mount point is
1612 * in the process of unmounting and we might cause a deadlock
1613 * in our attempt to take an iocount. An ENODEV error return
1614 * is from vnode_get* is an indication this but we change that
1615 * ENOENT for upper layers.
4a3eedf9 1616 */
39236c6e
A
1617 if (error == ENODEV) {
1618 error = ENOENT;
1619 } else {
1620 error = ERECYCLE;
1621 }
4a3eedf9
A
1622 goto errorout;
1623 }
91447636
A
1624 }
1625 }
1626 if (vp != NULLVP) {
6d2010ae 1627 if ( (vnode_getwithvid_drainok(vp, vvid)) ) {
91447636
A
1628 vp = NULLVP;
1629
1630 /*
1631 * can't get reference on the vp we'd like
1632 * to return... if we didn't grab a reference
1633 * on the directory (due to fast path bypass),
1634 * then we need to do it now... we can't return
1635 * with both ni_dvp and ni_vp NULL, and no
1636 * error condition
1637 */
1638 if (dp == NULLVP) {
1639 dp = tdp;
1640 goto need_dp;
1641 }
1642 }
1643 }
6d2010ae 1644
91447636
A
1645 ndp->ni_dvp = dp;
1646 ndp->ni_vp = vp;
1647
6d2010ae
A
1648#if CONFIG_TRIGGERS
1649 trigger_vp = vp ? vp : dp;
1650 if ((error == 0) && (trigger_vp != NULLVP) && vnode_isdir(trigger_vp)) {
1651 error = vnode_trigger_resolve(trigger_vp, ndp, ctx);
1652 if (error) {
1653 if (vp)
1654 vnode_put(vp);
1655 if (dp)
1656 vnode_put(dp);
1657 goto errorout;
1658 }
1659 }
1660#endif /* CONFIG_TRIGGERS */
1661
4a3eedf9
A
1662errorout:
1663 /*
1664 * If we came into cache_lookup_path after an iteration of the lookup loop that
1665 * resulted in a call to VNOP_LOOKUP, then VNOP_LOOKUP returned a vnode with a io ref
1666 * on it. It is now the job of cache_lookup_path to drop the ref on this vnode
1667 * when it is no longer needed. If we get to this point, and last_dp is not NULL
1668 * and it is ALSO not the dvp we want to return to caller of this function, it MUST be
1669 * the case that we got to a subsequent path component and this previous vnode is
1670 * no longer needed. We can then drop the io ref on it.
1671 */
1672 if ((last_dp != NULLVP) && (last_dp != ndp->ni_dvp)){
1673 vnode_put(last_dp);
1674 }
1675
1676 //initialized to 0, should be the same if no error cases occurred.
1677 return error;
91447636
A
1678}
1679
1680
1681static vnode_t
1682cache_lookup_locked(vnode_t dvp, struct componentname *cnp)
1683{
2d21ac55
A
1684 struct namecache *ncp;
1685 struct nchashhead *ncpp;
1686 long namelen = cnp->cn_namelen;
fe8ab488 1687 unsigned int hashval = cnp->cn_hash;
91447636 1688
6d2010ae
A
1689 if (nc_disabled) {
1690 return NULL;
1691 }
1692
91447636
A
1693 ncpp = NCHHASH(dvp, cnp->cn_hash);
1694 LIST_FOREACH(ncp, ncpp, nc_hash) {
1695 if ((ncp->nc_dvp == dvp) && (ncp->nc_hashval == hashval)) {
5ba3f43e 1696 if (strncmp(ncp->nc_name, cnp->cn_nameptr, namelen) == 0 && ncp->nc_name[namelen] == 0)
91447636
A
1697 break;
1698 }
1699 }
0c530ab8 1700 if (ncp == 0) {
91447636
A
1701 /*
1702 * We failed to find an entry
1703 */
0c530ab8 1704 NCHSTAT(ncs_miss);
91447636 1705 return (NULL);
0c530ab8
A
1706 }
1707 NCHSTAT(ncs_goodhits);
91447636 1708
b0d623f7 1709 return (ncp->nc_vp);
1c79356b
A
1710}
1711
55e303ae 1712
39236c6e 1713unsigned int hash_string(const char *cp, int len);
55e303ae
A
1714//
1715// Have to take a len argument because we may only need to
1716// hash part of a componentname.
1717//
39236c6e 1718unsigned int
91447636 1719hash_string(const char *cp, int len)
55e303ae 1720{
91447636 1721 unsigned hash = 0;
55e303ae 1722
91447636
A
1723 if (len) {
1724 while (len--) {
b0d623f7 1725 hash = crc32tab[((hash >> 24) ^ (unsigned char)*cp++)] ^ hash << 8;
91447636 1726 }
55e303ae 1727 } else {
91447636 1728 while (*cp != '\0') {
b0d623f7 1729 hash = crc32tab[((hash >> 24) ^ (unsigned char)*cp++)] ^ hash << 8;
91447636 1730 }
55e303ae 1731 }
91447636
A
1732 /*
1733 * the crc generator can legitimately generate
1734 * a 0... however, 0 for us means that we
1735 * haven't computed a hash, so use 1 instead
1736 */
1737 if (hash == 0)
1738 hash = 1;
1739 return hash;
55e303ae
A
1740}
1741
1742
1c79356b
A
1743/*
1744 * Lookup an entry in the cache
1745 *
1746 * We don't do this if the segment name is long, simply so the cache
1747 * can avoid holding long names (which would either waste space, or
1748 * add greatly to the complexity).
1749 *
1750 * Lookup is called with dvp pointing to the directory to search,
1751 * cnp pointing to the name of the entry being sought. If the lookup
1752 * succeeds, the vnode is returned in *vpp, and a status of -1 is
1753 * returned. If the lookup determines that the name does not exist
1754 * (negative cacheing), a status of ENOENT is returned. If the lookup
1755 * fails, a status of zero is returned.
1756 */
1757
1758int
2d21ac55 1759cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
1c79356b 1760{
2d21ac55
A
1761 struct namecache *ncp;
1762 struct nchashhead *ncpp;
1763 long namelen = cnp->cn_namelen;
b0d623f7 1764 unsigned int hashval;
0c530ab8 1765 boolean_t have_exclusive = FALSE;
91447636
A
1766 uint32_t vid;
1767 vnode_t vp;
1c79356b 1768
b0d623f7
A
1769 if (cnp->cn_hash == 0)
1770 cnp->cn_hash = hash_string(cnp->cn_nameptr, cnp->cn_namelen);
fe8ab488 1771 hashval = cnp->cn_hash;
b0d623f7 1772
6d2010ae
A
1773 if (nc_disabled) {
1774 return 0;
1775 }
1776
0c530ab8 1777 NAME_CACHE_LOCK_SHARED();
1c79356b 1778
0c530ab8 1779relook:
b0d623f7 1780 ncpp = NCHHASH(dvp, cnp->cn_hash);
91447636
A
1781 LIST_FOREACH(ncp, ncpp, nc_hash) {
1782 if ((ncp->nc_dvp == dvp) && (ncp->nc_hashval == hashval)) {
5ba3f43e 1783 if (strncmp(ncp->nc_name, cnp->cn_nameptr, namelen) == 0 && ncp->nc_name[namelen] == 0)
91447636 1784 break;
55e303ae 1785 }
1c79356b 1786 }
1c79356b
A
1787 /* We failed to find an entry */
1788 if (ncp == 0) {
0c530ab8
A
1789 NCHSTAT(ncs_miss);
1790 NAME_CACHE_UNLOCK();
1c79356b
A
1791 return (0);
1792 }
1793
1794 /* We don't want to have an entry, so dump it */
1795 if ((cnp->cn_flags & MAKEENTRY) == 0) {
0c530ab8
A
1796 if (have_exclusive == TRUE) {
1797 NCHSTAT(ncs_badhits);
1798 cache_delete(ncp, 1);
1799 NAME_CACHE_UNLOCK();
1800 return (0);
1801 }
1802 NAME_CACHE_UNLOCK();
1803 NAME_CACHE_LOCK();
1804 have_exclusive = TRUE;
1805 goto relook;
1c79356b 1806 }
91447636 1807 vp = ncp->nc_vp;
1c79356b
A
1808
1809 /* We found a "positive" match, return the vnode */
91447636 1810 if (vp) {
0c530ab8 1811 NCHSTAT(ncs_goodhits);
91447636
A
1812
1813 vid = vp->v_id;
0c530ab8 1814 NAME_CACHE_UNLOCK();
91447636
A
1815
1816 if (vnode_getwithvid(vp, vid)) {
0c530ab8
A
1817#if COLLECT_STATS
1818 NAME_CACHE_LOCK();
1819 NCHSTAT(ncs_badvid);
1820 NAME_CACHE_UNLOCK();
1821#endif
91447636
A
1822 return (0);
1823 }
1824 *vpp = vp;
1c79356b
A
1825 return (-1);
1826 }
1827
1828 /* We found a negative match, and want to create it, so purge */
91447636 1829 if (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) {
0c530ab8
A
1830 if (have_exclusive == TRUE) {
1831 NCHSTAT(ncs_badhits);
1832 cache_delete(ncp, 1);
1833 NAME_CACHE_UNLOCK();
1834 return (0);
1835 }
1836 NAME_CACHE_UNLOCK();
1837 NAME_CACHE_LOCK();
1838 have_exclusive = TRUE;
1839 goto relook;
1c79356b
A
1840 }
1841
1842 /*
1843 * We found a "negative" match, ENOENT notifies client of this match.
1c79356b 1844 */
0c530ab8 1845 NCHSTAT(ncs_neghits);
91447636 1846
0c530ab8 1847 NAME_CACHE_UNLOCK();
1c79356b
A
1848 return (ENOENT);
1849}
1850
b0d623f7
A
1851const char *
1852cache_enter_create(vnode_t dvp, vnode_t vp, struct componentname *cnp)
1853{
1854 const char *strname;
1855
1856 if (cnp->cn_hash == 0)
1857 cnp->cn_hash = hash_string(cnp->cn_nameptr, cnp->cn_namelen);
1858
1859 /*
1860 * grab 2 references on the string entered
1861 * one for the cache_enter_locked to consume
1862 * and the second to be consumed by v_name (vnode_create call point)
1863 */
1864 strname = add_name_internal(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, TRUE, 0);
1865
1866 NAME_CACHE_LOCK();
1867
1868 cache_enter_locked(dvp, vp, cnp, strname);
1869
1870 NAME_CACHE_UNLOCK();
1871
1872 return (strname);
1873}
1874
2d21ac55 1875
1c79356b 1876/*
2d21ac55
A
1877 * Add an entry to the cache...
1878 * but first check to see if the directory
1879 * that this entry is to be associated with has
1880 * had any cache_purges applied since we took
1881 * our identity snapshot... this check needs to
1882 * be done behind the name cache lock
1c79356b
A
1883 */
1884void
2d21ac55 1885cache_enter_with_gen(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, int gen)
1c79356b 1886{
1c79356b 1887
91447636
A
1888 if (cnp->cn_hash == 0)
1889 cnp->cn_hash = hash_string(cnp->cn_nameptr, cnp->cn_namelen);
1890
0c530ab8 1891 NAME_CACHE_LOCK();
1c79356b 1892
2d21ac55 1893 if (dvp->v_nc_generation == gen)
b0d623f7 1894 (void)cache_enter_locked(dvp, vp, cnp, NULL);
2d21ac55
A
1895
1896 NAME_CACHE_UNLOCK();
1897}
1898
1899
1900/*
1901 * Add an entry to the cache.
1902 */
1903void
1904cache_enter(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
1905{
b0d623f7
A
1906 const char *strname;
1907
2d21ac55
A
1908 if (cnp->cn_hash == 0)
1909 cnp->cn_hash = hash_string(cnp->cn_nameptr, cnp->cn_namelen);
1910
b0d623f7
A
1911 /*
1912 * grab 1 reference on the string entered
1913 * for the cache_enter_locked to consume
1914 */
1915 strname = add_name_internal(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, FALSE, 0);
1916
2d21ac55
A
1917 NAME_CACHE_LOCK();
1918
b0d623f7 1919 cache_enter_locked(dvp, vp, cnp, strname);
2d21ac55
A
1920
1921 NAME_CACHE_UNLOCK();
1922}
1923
1924
1925static void
b0d623f7 1926cache_enter_locked(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, const char *strname)
2d21ac55
A
1927{
1928 struct namecache *ncp, *negp;
1929 struct nchashhead *ncpp;
1930
6d2010ae
A
1931 if (nc_disabled)
1932 return;
1933
2d21ac55
A
1934 /*
1935 * if the entry is for -ve caching vp is null
1936 */
91447636
A
1937 if ((vp != NULLVP) && (LIST_FIRST(&vp->v_nclinks))) {
1938 /*
1939 * someone beat us to the punch..
1940 * this vnode is already in the cache
1941 */
b0d623f7
A
1942 if (strname != NULL)
1943 vfs_removename(strname);
0c530ab8 1944 return;
91447636 1945 }
1c79356b
A
1946 /*
1947 * We allocate a new entry if we are less than the maximum
91447636
A
1948 * allowed and the one at the front of the list is in use.
1949 * Otherwise we use the one at the front of the list.
1c79356b 1950 */
91447636
A
1951 if (numcache < desiredNodes &&
1952 ((ncp = nchead.tqh_first) == NULL ||
1953 ncp->nc_hash.le_prev != 0)) {
1954 /*
1955 * Allocate one more entry
1956 */
b0d623f7 1957 ncp = (struct namecache *)_MALLOC_ZONE(sizeof(*ncp), M_CACHE, M_WAITOK);
1c79356b 1958 numcache++;
91447636
A
1959 } else {
1960 /*
1961 * reuse an old entry
1962 */
1963 ncp = TAILQ_FIRST(&nchead);
1964 TAILQ_REMOVE(&nchead, ncp, nc_entry);
1965
1c79356b 1966 if (ncp->nc_hash.le_prev != 0) {
91447636
A
1967 /*
1968 * still in use... we need to
1969 * delete it before re-using it
1970 */
0c530ab8 1971 NCHSTAT(ncs_stolen);
91447636 1972 cache_delete(ncp, 0);
1c79356b 1973 }
1c79356b 1974 }
0c530ab8 1975 NCHSTAT(ncs_enters);
1c79356b
A
1976
1977 /*
1978 * Fill in cache info, if vp is NULL this is a "negative" cache entry.
1c79356b
A
1979 */
1980 ncp->nc_vp = vp;
1c79356b 1981 ncp->nc_dvp = dvp;
91447636 1982 ncp->nc_hashval = cnp->cn_hash;
91447636 1983
b0d623f7
A
1984 if (strname == NULL)
1985 ncp->nc_name = add_name_internal(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, FALSE, 0);
1986 else
1987 ncp->nc_name = strname;
3e170ce0
A
1988
1989 //
1990 // If the bytes of the name associated with the vnode differ,
1991 // use the name associated with the vnode since the file system
1992 // may have set that explicitly in the case of a lookup on a
1993 // case-insensitive file system where the case of the looked up
1994 // name differs from what is on disk. For more details, see:
1995 // <rdar://problem/8044697> FSEvents doesn't always decompose diacritical unicode chars in the paths of the changed directories
1996 //
1997 const char *vn_name = vp ? vp->v_name : NULL;
1998 unsigned int len = vn_name ? strlen(vn_name) : 0;
1999 if (vn_name && ncp && ncp->nc_name && strncmp(ncp->nc_name, vn_name, len) != 0) {
2000 unsigned int hash = hash_string(vn_name, len);
2001
2002 vfs_removename(ncp->nc_name);
2003 ncp->nc_name = add_name_internal(vn_name, len, hash, FALSE, 0);
2004 ncp->nc_hashval = hash;
2005 }
2006
91447636
A
2007 /*
2008 * make us the newest entry in the cache
2009 * i.e. we'll be the last to be stolen
2010 */
2011 TAILQ_INSERT_TAIL(&nchead, ncp, nc_entry);
2012
55e303ae 2013 ncpp = NCHHASH(dvp, cnp->cn_hash);
1c79356b
A
2014#if DIAGNOSTIC
2015 {
2d21ac55 2016 struct namecache *p;
1c79356b
A
2017
2018 for (p = ncpp->lh_first; p != 0; p = p->nc_hash.le_next)
2019 if (p == ncp)
2020 panic("cache_enter: duplicate");
2021 }
2022#endif
91447636
A
2023 /*
2024 * make us available to be found via lookup
2025 */
1c79356b 2026 LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
91447636
A
2027
2028 if (vp) {
2029 /*
2030 * add to the list of name cache entries
2031 * that point at vp
2032 */
2033 LIST_INSERT_HEAD(&vp->v_nclinks, ncp, nc_un.nc_link);
2034 } else {
2035 /*
2036 * this is a negative cache entry (vp == NULL)
fe8ab488 2037 * stick it on the negative cache list.
91447636
A
2038 */
2039 TAILQ_INSERT_TAIL(&neghead, ncp, nc_un.nc_negentry);
2040
0c530ab8 2041 ncs_negtotal++;
91447636 2042
0c530ab8 2043 if (ncs_negtotal > desiredNegNodes) {
91447636
A
2044 /*
2045 * if we've reached our desired limit
2046 * of negative cache entries, delete
2047 * the oldest
2048 */
2049 negp = TAILQ_FIRST(&neghead);
91447636
A
2050 cache_delete(negp, 1);
2051 }
2052 }
2053 /*
2054 * add us to the list of name cache entries that
2055 * are children of dvp
2056 */
39037602
A
2057 if (vp)
2058 TAILQ_INSERT_TAIL(&dvp->v_ncchildren, ncp, nc_child);
2059 else
2060 TAILQ_INSERT_HEAD(&dvp->v_ncchildren, ncp, nc_child);
1c79356b
A
2061}
2062
91447636
A
2063
2064/*
2065 * Initialize CRC-32 remainder table.
2066 */
2067static void init_crc32(void)
2068{
2069 /*
2070 * the CRC-32 generator polynomial is:
2071 * x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^10
2072 * + x^8 + x^7 + x^5 + x^4 + x^2 + x + 1
2073 */
2074 unsigned int crc32_polynomial = 0x04c11db7;
2075 unsigned int i,j;
2076
2077 /*
2078 * pre-calculate the CRC-32 remainder for each possible octet encoding
2079 */
2080 for (i = 0; i < 256; i++) {
2081 unsigned int crc_rem = i << 24;
2082
2083 for (j = 0; j < 8; j++) {
2084 if (crc_rem & 0x80000000)
2085 crc_rem = (crc_rem << 1) ^ crc32_polynomial;
2086 else
2087 crc_rem = (crc_rem << 1);
2088 }
2089 crc32tab[i] = crc_rem;
2090 }
2091}
2092
2093
1c79356b
A
2094/*
2095 * Name cache initialization, from vfs_init() when we are booting
2096 */
2097void
91447636
A
2098nchinit(void)
2099{
b0d623f7
A
2100 int i;
2101
91447636
A
2102 desiredNegNodes = (desiredvnodes / 10);
2103 desiredNodes = desiredvnodes + desiredNegNodes;
2104
2105 TAILQ_INIT(&nchead);
2106 TAILQ_INIT(&neghead);
2107
2108 init_crc32();
2109
2d21ac55 2110 nchashtbl = hashinit(MAX(CONFIG_NC_HASH, (2 *desiredNodes)), M_CACHE, &nchash);
91447636
A
2111 nchashmask = nchash;
2112 nchash++;
2113
2114 init_string_table();
2115
b0d623f7 2116 /* Allocate name cache lock group attribute and group */
91447636 2117 namecache_lck_grp_attr= lck_grp_attr_alloc_init();
91447636
A
2118
2119 namecache_lck_grp = lck_grp_alloc_init("Name Cache", namecache_lck_grp_attr);
2120
b0d623f7 2121 /* Allocate name cache lock attribute */
91447636 2122 namecache_lck_attr = lck_attr_alloc_init();
91447636 2123
b0d623f7 2124 /* Allocate name cache lock */
0c530ab8 2125 namecache_rw_lock = lck_rw_alloc_init(namecache_lck_grp, namecache_lck_attr);
91447636
A
2126
2127
b0d623f7
A
2128 /* Allocate string cache lock group attribute and group */
2129 strcache_lck_grp_attr= lck_grp_attr_alloc_init();
2130
2131 strcache_lck_grp = lck_grp_alloc_init("String Cache", strcache_lck_grp_attr);
2132
2133 /* Allocate string cache lock attribute */
2134 strcache_lck_attr = lck_attr_alloc_init();
2135
2136 /* Allocate string cache lock */
2137 strtable_rw_lock = lck_rw_alloc_init(strcache_lck_grp, strcache_lck_attr);
2138
2139 for (i = 0; i < NUM_STRCACHE_LOCKS; i++)
2140 lck_mtx_init(&strcache_mtx_locks[i], strcache_lck_grp, strcache_lck_attr);
91447636
A
2141}
2142
0c530ab8
A
2143void
2144name_cache_lock_shared(void)
2145{
2146 lck_rw_lock_shared(namecache_rw_lock);
2147}
2148
91447636
A
2149void
2150name_cache_lock(void)
1c79356b 2151{
0c530ab8 2152 lck_rw_lock_exclusive(namecache_rw_lock);
91447636 2153}
55e303ae 2154
91447636
A
2155void
2156name_cache_unlock(void)
2157{
0c530ab8 2158 lck_rw_done(namecache_rw_lock);
1c79356b
A
2159}
2160
55e303ae
A
2161
2162int
d9a64523 2163resize_namecache(int newsize)
55e303ae 2164{
91447636
A
2165 struct nchashhead *new_table;
2166 struct nchashhead *old_table;
2167 struct nchashhead *old_head, *head;
2168 struct namecache *entry, *next;
2169 uint32_t i, hashval;
d9a64523 2170 int dNodes, dNegNodes, nelements;
91447636
A
2171 u_long new_size, old_size;
2172
d9a64523
A
2173 if (newsize < 0)
2174 return EINVAL;
2175
91447636
A
2176 dNegNodes = (newsize / 10);
2177 dNodes = newsize + dNegNodes;
55e303ae 2178 // we don't support shrinking yet
b0d623f7 2179 if (dNodes <= desiredNodes) {
d9a64523
A
2180 return 0;
2181 }
2182
2183 if (os_mul_overflow(dNodes, 2, &nelements)) {
2184 return EINVAL;
55e303ae 2185 }
d9a64523
A
2186
2187 new_table = hashinit(nelements, M_CACHE, &nchashmask);
91447636 2188 new_size = nchashmask + 1;
55e303ae 2189
55e303ae 2190 if (new_table == NULL) {
d9a64523 2191 return ENOMEM;
55e303ae
A
2192 }
2193
0c530ab8 2194 NAME_CACHE_LOCK();
55e303ae
A
2195 // do the switch!
2196 old_table = nchashtbl;
2197 nchashtbl = new_table;
91447636
A
2198 old_size = nchash;
2199 nchash = new_size;
55e303ae
A
2200
2201 // walk the old table and insert all the entries into
2202 // the new table
2203 //
91447636 2204 for(i=0; i < old_size; i++) {
55e303ae
A
2205 old_head = &old_table[i];
2206 for (entry=old_head->lh_first; entry != NULL; entry=next) {
2207 //
2208 // XXXdbg - Beware: this assumes that hash_string() does
2209 // the same thing as what happens in
2210 // lookup() over in vfs_lookup.c
91447636
A
2211 hashval = hash_string(entry->nc_name, 0);
2212 entry->nc_hashval = hashval;
2213 head = NCHHASH(entry->nc_dvp, hashval);
2214
55e303ae
A
2215 next = entry->nc_hash.le_next;
2216 LIST_INSERT_HEAD(head, entry, nc_hash);
2217 }
2218 }
91447636
A
2219 desiredNodes = dNodes;
2220 desiredNegNodes = dNegNodes;
55e303ae 2221
0c530ab8 2222 NAME_CACHE_UNLOCK();
55e303ae
A
2223 FREE(old_table, M_CACHE);
2224
2225 return 0;
2226}
2227
91447636 2228static void
5ba3f43e 2229cache_delete(struct namecache *ncp, int free_entry)
91447636 2230{
0c530ab8 2231 NCHSTAT(ncs_deletes);
91447636
A
2232
2233 if (ncp->nc_vp) {
2234 LIST_REMOVE(ncp, nc_un.nc_link);
2235 } else {
2236 TAILQ_REMOVE(&neghead, ncp, nc_un.nc_negentry);
0c530ab8 2237 ncs_negtotal--;
91447636 2238 }
39037602 2239 TAILQ_REMOVE(&(ncp->nc_dvp->v_ncchildren), ncp, nc_child);
91447636
A
2240
2241 LIST_REMOVE(ncp, nc_hash);
2242 /*
2243 * this field is used to indicate
2244 * that the entry is in use and
2245 * must be deleted before it can
2246 * be reused...
2247 */
2248 ncp->nc_hash.le_prev = NULL;
2249
b0d623f7 2250 vfs_removename(ncp->nc_name);
91447636 2251 ncp->nc_name = NULL;
5ba3f43e
A
2252 if (free_entry) {
2253 TAILQ_REMOVE(&nchead, ncp, nc_entry);
2254 FREE_ZONE(ncp, sizeof(*ncp), M_CACHE);
2255 numcache--;
2256 }
91447636
A
2257}
2258
2259
2260/*
2261 * purge the entry associated with the
2262 * specified vnode from the name cache
2263 */
2264void
2265cache_purge(vnode_t vp)
2266{
2267 struct namecache *ncp;
2d21ac55 2268 kauth_cred_t tcred = NULL;
91447636 2269
6d2010ae 2270 if ((LIST_FIRST(&vp->v_nclinks) == NULL) &&
39037602 2271 (TAILQ_FIRST(&vp->v_ncchildren) == NULL) &&
6d2010ae
A
2272 (vp->v_cred == NOCRED) &&
2273 (vp->v_parent == NULLVP))
91447636
A
2274 return;
2275
0c530ab8 2276 NAME_CACHE_LOCK();
55e303ae 2277
2d21ac55
A
2278 if (vp->v_parent)
2279 vp->v_parent->v_nc_generation++;
2280
91447636
A
2281 while ( (ncp = LIST_FIRST(&vp->v_nclinks)) )
2282 cache_delete(ncp, 1);
55e303ae 2283
39037602 2284 while ( (ncp = TAILQ_FIRST(&vp->v_ncchildren)) )
91447636
A
2285 cache_delete(ncp, 1);
2286
2d21ac55
A
2287 /*
2288 * Use a temp variable to avoid kauth_cred_unref() while NAME_CACHE_LOCK is held
2289 */
2290 tcred = vp->v_cred;
2291 vp->v_cred = NOCRED;
2292 vp->v_authorized_actions = 0;
2293
0c530ab8 2294 NAME_CACHE_UNLOCK();
2d21ac55
A
2295
2296 if (IS_VALID_CRED(tcred))
2297 kauth_cred_unref(&tcred);
91447636 2298}
55e303ae 2299
1c79356b 2300/*
91447636
A
2301 * Purge all negative cache entries that are children of the
2302 * given vnode. A case-insensitive file system (or any file
2303 * system that has multiple equivalent names for the same
2304 * directory entry) can use this when creating or renaming
2305 * to remove negative entries that may no longer apply.
1c79356b
A
2306 */
2307void
91447636 2308cache_purge_negatives(vnode_t vp)
1c79356b 2309{
b0d623f7 2310 struct namecache *ncp, *next_ncp;
1c79356b 2311
0c530ab8 2312 NAME_CACHE_LOCK();
91447636 2313
39037602
A
2314 TAILQ_FOREACH_SAFE(ncp, &vp->v_ncchildren, nc_child, next_ncp) {
2315 if (ncp->nc_vp)
2316 break;
2317
2318 cache_delete(ncp, 1);
2319 }
91447636 2320
0c530ab8 2321 NAME_CACHE_UNLOCK();
1c79356b
A
2322}
2323
2324/*
2325 * Flush all entries referencing a particular filesystem.
2326 *
2327 * Since we need to check it anyway, we will flush all the invalid
91447636 2328 * entries at the same time.
1c79356b
A
2329 */
2330void
2d21ac55 2331cache_purgevfs(struct mount *mp)
1c79356b
A
2332{
2333 struct nchashhead *ncpp;
91447636 2334 struct namecache *ncp;
1c79356b 2335
0c530ab8 2336 NAME_CACHE_LOCK();
1c79356b 2337 /* Scan hash tables for applicable entries */
91447636
A
2338 for (ncpp = &nchashtbl[nchash - 1]; ncpp >= nchashtbl; ncpp--) {
2339restart:
2340 for (ncp = ncpp->lh_first; ncp != 0; ncp = ncp->nc_hash.le_next) {
2341 if (ncp->nc_dvp->v_mount == mp) {
2342 cache_delete(ncp, 0);
2343 goto restart;
1c79356b
A
2344 }
2345 }
2346 }
0c530ab8 2347 NAME_CACHE_UNLOCK();
1c79356b 2348}
55e303ae
A
2349
2350
2351
2352//
2353// String ref routines
2354//
2355static LIST_HEAD(stringhead, string_t) *string_ref_table;
2356static u_long string_table_mask;
55e303ae 2357static uint32_t filled_buckets=0;
b0d623f7 2358
55e303ae
A
2359
2360typedef struct string_t {
2361 LIST_ENTRY(string_t) hash_chain;
2d21ac55 2362 const char *str;
55e303ae
A
2363 uint32_t refcount;
2364} string_t;
2365
2366
b0d623f7 2367static void
91447636 2368resize_string_ref_table(void)
55e303ae 2369{
b0d623f7
A
2370 struct stringhead *new_table;
2371 struct stringhead *old_table;
2372 struct stringhead *old_head, *head;
2373 string_t *entry, *next;
2374 uint32_t i, hashval;
2375 u_long new_mask, old_mask;
55e303ae 2376
b0d623f7
A
2377 /*
2378 * need to hold the table lock exclusively
2379 * in order to grow the table... need to recheck
2380 * the need to resize again after we've taken
2381 * the lock exclusively in case some other thread
2382 * beat us to the punch
2383 */
2384 lck_rw_lock_exclusive(strtable_rw_lock);
55e303ae 2385
b0d623f7
A
2386 if (4 * filled_buckets < ((string_table_mask + 1) * 3)) {
2387 lck_rw_done(strtable_rw_lock);
2388 return;
2389 }
2390 new_table = hashinit((string_table_mask + 1) * 2, M_CACHE, &new_mask);
55e303ae 2391
b0d623f7
A
2392 if (new_table == NULL) {
2393 printf("failed to resize the hash table.\n");
2394 lck_rw_done(strtable_rw_lock);
2395 return;
2396 }
2397
2398 // do the switch!
2399 old_table = string_ref_table;
2400 string_ref_table = new_table;
2401 old_mask = string_table_mask;
2402 string_table_mask = new_mask;
2403 filled_buckets = 0;
55e303ae 2404
b0d623f7
A
2405 // walk the old table and insert all the entries into
2406 // the new table
2407 //
2408 for (i = 0; i <= old_mask; i++) {
2409 old_head = &old_table[i];
2410 for (entry = old_head->lh_first; entry != NULL; entry = next) {
2411 hashval = hash_string((const char *)entry->str, 0);
2412 head = &string_ref_table[hashval & string_table_mask];
2413 if (head->lh_first == NULL) {
2414 filled_buckets++;
2415 }
2416 next = entry->hash_chain.le_next;
2417 LIST_INSERT_HEAD(head, entry, hash_chain);
2418 }
55e303ae 2419 }
b0d623f7 2420 lck_rw_done(strtable_rw_lock);
55e303ae 2421
b0d623f7 2422 FREE(old_table, M_CACHE);
55e303ae
A
2423}
2424
2425
2426static void
2427init_string_table(void)
2428{
2d21ac55 2429 string_ref_table = hashinit(CONFIG_VFS_NAMES, M_CACHE, &string_table_mask);
55e303ae
A
2430}
2431
2432
2d21ac55 2433const char *
b0d623f7 2434vfs_addname(const char *name, uint32_t len, u_int hashval, u_int flags)
91447636 2435{
b0d623f7 2436 return (add_name_internal(name, len, hashval, FALSE, flags));
91447636
A
2437}
2438
b0d623f7 2439
2d21ac55 2440static const char *
b0d623f7 2441add_name_internal(const char *name, uint32_t len, u_int hashval, boolean_t need_extra_ref, __unused u_int flags)
55e303ae 2442{
b0d623f7
A
2443 struct stringhead *head;
2444 string_t *entry;
2445 uint32_t chain_len = 0;
2446 uint32_t hash_index;
2447 uint32_t lock_index;
2448 char *ptr;
55e303ae 2449
39037602
A
2450 if (len > MAXPATHLEN)
2451 len = MAXPATHLEN;
2452
b0d623f7
A
2453 /*
2454 * if the length already accounts for the null-byte, then
2455 * subtract one so later on we don't index past the end
2456 * of the string.
2457 */
2458 if (len > 0 && name[len-1] == '\0') {
2459 len--;
2460 }
6d2010ae
A
2461 if (hashval == 0) {
2462 hashval = hash_string(name, len);
2463 }
2464
b0d623f7
A
2465 /*
2466 * take this lock 'shared' to keep the hash stable
2467 * if someone else decides to grow the pool they
2468 * will take this lock exclusively
2469 */
2470 lck_rw_lock_shared(strtable_rw_lock);
55e303ae 2471
b0d623f7
A
2472 /*
2473 * If the table gets more than 3/4 full, resize it
2474 */
2475 if (4 * filled_buckets >= ((string_table_mask + 1) * 3)) {
2476 lck_rw_done(strtable_rw_lock);
55e303ae 2477
b0d623f7
A
2478 resize_string_ref_table();
2479
2480 lck_rw_lock_shared(strtable_rw_lock);
55e303ae 2481 }
b0d623f7
A
2482 hash_index = hashval & string_table_mask;
2483 lock_index = hash_index % NUM_STRCACHE_LOCKS;
2484
2485 head = &string_ref_table[hash_index];
55e303ae 2486
b0d623f7 2487 lck_mtx_lock_spin(&strcache_mtx_locks[lock_index]);
55e303ae 2488
b0d623f7 2489 for (entry = head->lh_first; entry != NULL; chain_len++, entry = entry->hash_chain.le_next) {
5ba3f43e 2490 if (strncmp(entry->str, name, len) == 0 && entry->str[len] == 0) {
b0d623f7
A
2491 entry->refcount++;
2492 break;
2493 }
55e303ae 2494 }
b0d623f7
A
2495 if (entry == NULL) {
2496 lck_mtx_convert_spin(&strcache_mtx_locks[lock_index]);
2497 /*
2498 * it wasn't already there so add it.
2499 */
2500 MALLOC(entry, string_t *, sizeof(string_t) + len + 1, M_TEMP, M_WAITOK);
55e303ae 2501
b0d623f7
A
2502 if (head->lh_first == NULL) {
2503 OSAddAtomic(1, &filled_buckets);
2504 }
2505 ptr = (char *)((char *)entry + sizeof(string_t));
2506 strncpy(ptr, name, len);
2507 ptr[len] = '\0';
2508 entry->str = ptr;
2509 entry->refcount = 1;
2510 LIST_INSERT_HEAD(head, entry, hash_chain);
2511 }
2512 if (need_extra_ref == TRUE)
2513 entry->refcount++;
55e303ae 2514
b0d623f7
A
2515 lck_mtx_unlock(&strcache_mtx_locks[lock_index]);
2516 lck_rw_done(strtable_rw_lock);
2517
2518 return (const char *)entry->str;
55e303ae
A
2519}
2520
b0d623f7 2521
55e303ae 2522int
91447636
A
2523vfs_removename(const char *nameref)
2524{
b0d623f7
A
2525 struct stringhead *head;
2526 string_t *entry;
2527 uint32_t hashval;
2528 uint32_t hash_index;
2529 uint32_t lock_index;
2530 int retval = ENOENT;
91447636 2531
b0d623f7 2532 hashval = hash_string(nameref, 0);
91447636 2533
b0d623f7
A
2534 /*
2535 * take this lock 'shared' to keep the hash stable
2536 * if someone else decides to grow the pool they
2537 * will take this lock exclusively
2538 */
2539 lck_rw_lock_shared(strtable_rw_lock);
2540 /*
2541 * must compute the head behind the table lock
2542 * since the size and location of the table
2543 * can change on the fly
2544 */
2545 hash_index = hashval & string_table_mask;
2546 lock_index = hash_index % NUM_STRCACHE_LOCKS;
91447636 2547
b0d623f7 2548 head = &string_ref_table[hash_index];
91447636 2549
b0d623f7 2550 lck_mtx_lock_spin(&strcache_mtx_locks[lock_index]);
55e303ae 2551
b0d623f7
A
2552 for (entry = head->lh_first; entry != NULL; entry = entry->hash_chain.le_next) {
2553 if (entry->str == nameref) {
2554 entry->refcount--;
55e303ae 2555
b0d623f7
A
2556 if (entry->refcount == 0) {
2557 LIST_REMOVE(entry, hash_chain);
2558
2559 if (head->lh_first == NULL) {
2560 OSAddAtomic(-1, &filled_buckets);
2561 }
2562 } else {
2563 entry = NULL;
2564 }
2565 retval = 0;
2566 break;
2567 }
55e303ae 2568 }
b0d623f7
A
2569 lck_mtx_unlock(&strcache_mtx_locks[lock_index]);
2570 lck_rw_done(strtable_rw_lock);
55e303ae 2571
b0d623f7
A
2572 if (entry != NULL)
2573 FREE(entry, M_TEMP);
2574
2575 return retval;
55e303ae
A
2576}
2577
2578
2d21ac55 2579#ifdef DUMP_STRING_TABLE
55e303ae
A
2580void
2581dump_string_table(void)
2582{
2583 struct stringhead *head;
2584 string_t *entry;
91447636 2585 u_long i;
55e303ae 2586
b0d623f7 2587 lck_rw_lock_shared(strtable_rw_lock);
0c530ab8 2588
91447636 2589 for (i = 0; i <= string_table_mask; i++) {
55e303ae
A
2590 head = &string_ref_table[i];
2591 for (entry=head->lh_first; entry != NULL; entry=entry->hash_chain.le_next) {
2592 printf("%6d - %s\n", entry->refcount, entry->str);
2593 }
2594 }
b0d623f7 2595 lck_rw_done(strtable_rw_lock);
55e303ae 2596}
2d21ac55 2597#endif /* DUMP_STRING_TABLE */