]> git.saurik.com Git - apple/xnu.git/blob - bsd/hfs/hfs_cnode.c
970f2648eafc53586bee35e7893f68ddb8ae4078
[apple/xnu.git] / bsd / hfs / hfs_cnode.c
1 /*
2 * Copyright (c) 2002-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/proc.h>
31 #include <sys/vnode.h>
32 #include <sys/mount.h>
33 #include <sys/kernel.h>
34 #include <sys/malloc.h>
35 #include <sys/time.h>
36 #include <sys/ubc.h>
37 #include <sys/quota.h>
38 #include <sys/kdebug.h>
39 #include <libkern/OSByteOrder.h>
40 #include <sys/buf_internal.h>
41
42 #include <kern/locks.h>
43
44 #include <miscfs/specfs/specdev.h>
45 #include <miscfs/fifofs/fifo.h>
46
47 #include <hfs/hfs.h>
48 #include <hfs/hfs_catalog.h>
49 #include <hfs/hfs_cnode.h>
50 #include <hfs/hfs_quota.h>
51 #include <hfs/hfs_format.h>
52
53 extern int prtactive;
54
55 extern lck_attr_t * hfs_lock_attr;
56 extern lck_grp_t * hfs_mutex_group;
57 extern lck_grp_t * hfs_rwlock_group;
58
59 static void hfs_reclaim_cnode(struct cnode *);
60 static int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim);
61 static int hfs_isordered(struct cnode *, struct cnode *);
62
63 extern int hfs_removefile_callback(struct buf *bp, void *hfsmp);
64
65 __inline__ int hfs_checkdeleted (struct cnode *cp) {
66 return ((cp->c_flag & (C_DELETED | C_NOEXISTS)) ? ENOENT : 0);
67 }
68
69 /*
70 * Function used by a special fcntl() that decorates a cnode/vnode that
71 * indicates it is backing another filesystem, like a disk image.
72 *
73 * the argument 'val' indicates whether or not to set the bit in the cnode flags
74 *
75 * Returns non-zero on failure. 0 on success
76 */
77 int hfs_set_backingstore (struct vnode *vp, int val) {
78 struct cnode *cp = NULL;
79 int err = 0;
80
81 cp = VTOC(vp);
82 if (!vnode_isreg(vp) && !vnode_isdir(vp)) {
83 return EINVAL;
84 }
85
86 /* lock the cnode */
87 err = hfs_lock (cp, HFS_EXCLUSIVE_LOCK);
88 if (err) {
89 return err;
90 }
91
92 if (val) {
93 cp->c_flag |= C_BACKINGSTORE;
94 }
95 else {
96 cp->c_flag &= ~C_BACKINGSTORE;
97 }
98
99 /* unlock everything */
100 hfs_unlock (cp);
101
102 return err;
103 }
104
105 /*
106 * Function used by a special fcntl() that check to see if a cnode/vnode
107 * indicates it is backing another filesystem, like a disk image.
108 *
109 * the argument 'val' is an output argument for whether or not the bit is set
110 *
111 * Returns non-zero on failure. 0 on success
112 */
113
114 int hfs_is_backingstore (struct vnode *vp, int *val) {
115 struct cnode *cp = NULL;
116 int err = 0;
117
118 if (!vnode_isreg(vp) && !vnode_isdir(vp)) {
119 *val = 0;
120 return 0;
121 }
122
123 cp = VTOC(vp);
124
125 /* lock the cnode */
126 err = hfs_lock (cp, HFS_SHARED_LOCK);
127 if (err) {
128 return err;
129 }
130
131 if (cp->c_flag & C_BACKINGSTORE) {
132 *val = 1;
133 }
134 else {
135 *val = 0;
136 }
137
138 /* unlock everything */
139 hfs_unlock (cp);
140
141 return err;
142 }
143
144
145 /*
146 * hfs_cnode_teardown
147 *
148 * This is an internal function that is invoked from both hfs_vnop_inactive
149 * and hfs_vnop_reclaim. As VNOP_INACTIVE is not necessarily called from vnodes
150 * being recycled and reclaimed, it is important that we do any post-processing
151 * necessary for the cnode in both places. Important tasks include things such as
152 * releasing the blocks from an open-unlinked file when all references to it have dropped,
153 * and handling resource forks separately from data forks.
154 *
155 * Note that we take only the vnode as an argument here (rather than the cnode).
156 * Recall that each cnode supports two forks (rsrc/data), and we can always get the right
157 * cnode from either of the vnodes, but the reverse is not true -- we can't determine which
158 * vnode we need to reclaim if only the cnode is supplied.
159 *
160 * This function is idempotent and safe to call from both hfs_vnop_inactive and hfs_vnop_reclaim
161 * if both are invoked right after the other. In the second call, most of this function's if()
162 * conditions will fail, since they apply generally to cnodes still marked with C_DELETED.
163 * As a quick check to see if this function is necessary, determine if the cnode is already
164 * marked C_NOEXISTS. If it is, then it is safe to skip this function. The only tasks that
165 * remain for cnodes marked in such a fashion is to teardown their fork references and
166 * release all directory hints and hardlink origins. However, both of those are done
167 * in hfs_vnop_reclaim. hfs_update, by definition, is not necessary if the cnode's catalog
168 * entry is no longer there.
169 *
170 * 'reclaim' argument specifies whether or not we were called from hfs_vnop_reclaim. If we are
171 * invoked from hfs_vnop_reclaim, we can not call functions that cluster_push since the UBC info
172 * is totally gone by that point.
173 *
174 * Assumes that both truncate and cnode locks for 'cp' are held.
175 */
176 static
177 int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim) {
178
179 int forkcount = 0;
180 enum vtype v_type;
181 struct cnode *cp;
182 int error = 0;
183 int started_tr = 0;
184 struct hfsmount *hfsmp = VTOHFS(vp);
185 struct proc *p = vfs_context_proc(ctx);
186 int truncated = 0;
187 cat_cookie_t cookie;
188 int cat_reserve = 0;
189 int lockflags;
190 int ea_error = 0;
191
192 v_type = vnode_vtype(vp);
193 cp = VTOC(vp);
194
195 if (cp->c_datafork) {
196 ++forkcount;
197 }
198 if (cp->c_rsrcfork) {
199 ++forkcount;
200 }
201
202
203 /*
204 * Skip the call to ubc_setsize if we're being invoked on behalf of reclaim.
205 * The dirty regions would have already been synced to disk, so informing UBC
206 * that they can toss the pages doesn't help anyone at this point.
207 *
208 * Note that this is a performance problem if the vnode goes straight to reclaim
209 * (and skips inactive), since there would be no way for anyone to notify the UBC
210 * that all pages in this file are basically useless.
211 */
212 if (reclaim == 0) {
213 /*
214 * Check whether we are tearing down a cnode with only one remaining fork.
215 * If there are blocks in its filefork, then we need to unlock the cnode
216 * before calling ubc_setsize. The cluster layer may re-enter the filesystem
217 * (i.e. VNOP_BLOCKMAP), and if we retain the cnode lock, we could double-lock
218 * panic.
219 */
220
221 if ((v_type == VREG || v_type == VLNK) &&
222 (cp->c_flag & C_DELETED) &&
223 (VTOF(vp)->ff_blocks != 0) && (forkcount == 1)) {
224 hfs_unlock(cp);
225 /* ubc_setsize just fails if we were to call this from VNOP_RECLAIM */
226 ubc_setsize(vp, 0);
227 (void) hfs_lock(cp, HFS_FORCE_LOCK);
228 }
229 }
230
231 /*
232 * Push file data out for normal files that haven't been evicted from
233 * the namespace. We only do this if this function was not called from reclaim,
234 * because by that point the UBC information has been totally torn down.
235 *
236 * There should also be no way that a normal file that has NOT been deleted from
237 * the namespace to skip INACTIVE and go straight to RECLAIM. That race only happens
238 * when the file becomes open-unlinked.
239 */
240 if ((v_type == VREG) &&
241 (!ISSET(cp->c_flag, C_DELETED)) &&
242 (!ISSET(cp->c_flag, C_NOEXISTS)) &&
243 (VTOF(vp)->ff_blocks) &&
244 (reclaim == 0)) {
245 /*
246 * Note that if content protection is enabled, then this is where we will
247 * attempt to issue IOs for all dirty regions of this file.
248 *
249 * If we're called from hfs_vnop_inactive, all this means is at the time
250 * the logic for deciding to call this function, there were not any lingering
251 * mmap/fd references for this file. However, there is nothing preventing the system
252 * from creating a new reference in between the time that logic was checked
253 * and we entered hfs_vnop_inactive. As a result, the only time we can guarantee
254 * that there aren't any references is during vnop_reclaim.
255 */
256 hfs_filedone(vp, ctx);
257 }
258
259 /*
260 * We're holding the cnode lock now. Stall behind any shadow BPs that may
261 * be involved with this vnode if it is a symlink. We don't want to allow
262 * the blocks that we're about to release to be put back into the pool if there
263 * is pending I/O to them.
264 */
265 if (v_type == VLNK) {
266 /*
267 * This will block if the asynchronous journal flush is in progress.
268 * If this symlink is not being renamed over and doesn't have any open FDs,
269 * then we'll remove it from the journal's bufs below in kill_block.
270 */
271 buf_wait_for_shadow_io (vp, 0);
272 }
273
274 /*
275 * Remove any directory hints or cached origins
276 */
277 if (v_type == VDIR) {
278 hfs_reldirhints(cp, 0);
279 }
280 if (cp->c_flag & C_HARDLINK) {
281 hfs_relorigins(cp);
282 }
283
284 /*
285 * This check is slightly complicated. We should only truncate data
286 * in very specific cases for open-unlinked files. This is because
287 * we want to ensure that the resource fork continues to be available
288 * if the caller has the data fork open. However, this is not symmetric;
289 * someone who has the resource fork open need not be able to access the data
290 * fork once the data fork has gone inactive.
291 *
292 * If we're the last fork, then we have cleaning up to do.
293 *
294 * A) last fork, and vp == c_vp
295 * Truncate away own fork data. If rsrc fork is not in core, truncate it too.
296 *
297 * B) last fork, and vp == c_rsrc_vp
298 * Truncate ourselves, assume data fork has been cleaned due to C).
299 *
300 * If we're not the last fork, then things are a little different:
301 *
302 * C) not the last fork, vp == c_vp
303 * Truncate ourselves. Once the file has gone out of the namespace,
304 * it cannot be further opened. Further access to the rsrc fork may
305 * continue, however.
306 *
307 * D) not the last fork, vp == c_rsrc_vp
308 * Don't enter the block below, just clean up vnode and push it out of core.
309 */
310
311 if ((v_type == VREG || v_type == VLNK) &&
312 (cp->c_flag & C_DELETED) &&
313 ((forkcount == 1) || (!VNODE_IS_RSRC(vp)))) {
314
315 /* Start a transaction here. We're about to change file sizes */
316 if (started_tr == 0) {
317 if (hfs_start_transaction(hfsmp) != 0) {
318 error = EINVAL;
319 goto out;
320 }
321 else {
322 started_tr = 1;
323 }
324 }
325
326 /* Truncate away our own fork data. (Case A, B, C above) */
327 if (VTOF(vp)->ff_blocks != 0) {
328
329 /*
330 * At this point, we have decided that this cnode is
331 * suitable for full removal. We are about to deallocate
332 * its blocks and remove its entry from the catalog.
333 * If it was a symlink, then it's possible that the operation
334 * which created it is still in the current transaction group
335 * due to coalescing. Take action here to kill the data blocks
336 * of the symlink out of the journal before moving to
337 * deallocate the blocks. We need to be in the middle of
338 * a transaction before calling buf_iterate like this.
339 *
340 * Note: we have to kill any potential symlink buffers out of
341 * the journal prior to deallocating their blocks. This is so
342 * that we don't race with another thread that may be doing an
343 * an allocation concurrently and pick up these blocks. It could
344 * generate I/O against them which could go out ahead of our journal
345 * transaction.
346 */
347
348 if (hfsmp->jnl && vnode_islnk(vp)) {
349 buf_iterate(vp, hfs_removefile_callback, BUF_SKIP_NONLOCKED, (void *)hfsmp);
350 }
351
352 /*
353 * Since we're already inside a transaction,
354 * tell hfs_truncate to skip the ubc_setsize.
355 *
356 * This truncate call (and the one below) is fine from VNOP_RECLAIM's
357 * context because we're only removing blocks, not zero-filling new
358 * ones. The C_DELETED check above makes things much simpler.
359 */
360 error = hfs_truncate(vp, (off_t)0, IO_NDELAY, 1, 0, ctx);
361 if (error) {
362 goto out;
363 }
364 truncated = 1;
365 }
366
367 /*
368 * Truncate away the resource fork, if we represent the data fork and
369 * it is the last fork. That means, by definition, the rsrc fork is not in
370 * core. To avoid bringing a vnode into core for the sole purpose of deleting the
371 * data in the resource fork, we call cat_lookup directly, then hfs_release_storage
372 * to get rid of the resource fork's data.
373 *
374 * This is invoked via case A above only.
375 */
376 if ((cp->c_blocks > 0) && (forkcount == 1) && (vp != cp->c_rsrc_vp)) {
377 struct cat_lookup_buffer *lookup_rsrc = NULL;
378 struct cat_desc *desc_ptr = NULL;
379 lockflags = 0;
380
381 MALLOC(lookup_rsrc, struct cat_lookup_buffer*, sizeof (struct cat_lookup_buffer), M_TEMP, M_WAITOK);
382 if (lookup_rsrc == NULL) {
383 printf("hfs_cnode_teardown: ENOMEM from MALLOC\n");
384 error = ENOMEM;
385 goto out;
386 }
387 else {
388 bzero (lookup_rsrc, sizeof (struct cat_lookup_buffer));
389 }
390
391 if (cp->c_desc.cd_namelen == 0) {
392 /* Initialize the rsrc descriptor for lookup if necessary*/
393 MAKE_DELETED_NAME (lookup_rsrc->lookup_name, HFS_TEMPLOOKUP_NAMELEN, cp->c_fileid);
394
395 lookup_rsrc->lookup_desc.cd_nameptr = (const uint8_t*) lookup_rsrc->lookup_name;
396 lookup_rsrc->lookup_desc.cd_namelen = strlen (lookup_rsrc->lookup_name);
397 lookup_rsrc->lookup_desc.cd_parentcnid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
398 lookup_rsrc->lookup_desc.cd_cnid = cp->c_cnid;
399
400 desc_ptr = &lookup_rsrc->lookup_desc;
401 }
402 else {
403 desc_ptr = &cp->c_desc;
404 }
405
406 lockflags = hfs_systemfile_lock (hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
407
408 error = cat_lookup (hfsmp, desc_ptr, 1, (struct cat_desc *) NULL,
409 (struct cat_attr*) NULL, &lookup_rsrc->lookup_fork.ff_data, NULL);
410
411 hfs_systemfile_unlock (hfsmp, lockflags);
412
413 if (error) {
414 FREE (lookup_rsrc, M_TEMP);
415 goto out;
416 }
417
418 /*
419 * Make the filefork in our temporary struct look like a real
420 * filefork. Fill in the cp, sysfileinfo and rangelist fields..
421 */
422 rl_init (&lookup_rsrc->lookup_fork.ff_invalidranges);
423 lookup_rsrc->lookup_fork.ff_cp = cp;
424
425 /*
426 * If there were no errors, then we have the catalog's fork information
427 * for the resource fork in question. Go ahead and delete the data in it now.
428 */
429
430 error = hfs_release_storage (hfsmp, NULL, &lookup_rsrc->lookup_fork, cp->c_fileid);
431 FREE(lookup_rsrc, M_TEMP);
432
433 if (error) {
434 goto out;
435 }
436
437 /*
438 * This fileid's resource fork extents have now been fully deleted on-disk
439 * and this CNID is no longer valid. At this point, we should be able to
440 * zero out cp->c_blocks to indicate there is no data left in this file.
441 */
442 cp->c_blocks = 0;
443 }
444
445 /* End the transaction from the start of the file truncation segment */
446 if (started_tr) {
447 hfs_end_transaction(hfsmp);
448 started_tr = 0;
449 }
450 }
451
452 /*
453 * If we represent the last fork (or none in the case of a dir),
454 * and the cnode has become open-unlinked,
455 * AND it has EA's, then we need to get rid of them.
456 *
457 * Note that this must happen outside of any other transactions
458 * because it starts/ends its own transactions and grabs its
459 * own locks. This is to prevent a file with a lot of attributes
460 * from creating a transaction that is too large (which panics).
461 */
462 if ((cp->c_attr.ca_recflags & kHFSHasAttributesMask) != 0 &&
463 (cp->c_flag & C_DELETED) &&
464 (forkcount <= 1)) {
465
466 ea_error = hfs_removeallattr(hfsmp, cp->c_fileid);
467 }
468
469
470 /*
471 * If the cnode represented an open-unlinked file, then now
472 * actually remove the cnode's catalog entry and release all blocks
473 * it may have been using.
474 */
475 if ((cp->c_flag & C_DELETED) && (forkcount <= 1)) {
476 /*
477 * Mark cnode in transit so that no one can get this
478 * cnode from cnode hash.
479 */
480 // hfs_chash_mark_in_transit(hfsmp, cp);
481 // XXXdbg - remove the cnode from the hash table since it's deleted
482 // otherwise someone could go to sleep on the cnode and not
483 // be woken up until this vnode gets recycled which could be
484 // a very long time...
485 hfs_chashremove(hfsmp, cp);
486
487 cp->c_flag |= C_NOEXISTS; // XXXdbg
488 cp->c_rdev = 0;
489
490 if (started_tr == 0) {
491 if (hfs_start_transaction(hfsmp) != 0) {
492 error = EINVAL;
493 goto out;
494 }
495 started_tr = 1;
496 }
497
498 /*
499 * Reserve some space in the Catalog file.
500 */
501 if ((error = cat_preflight(hfsmp, CAT_DELETE, &cookie, p))) {
502 goto out;
503 }
504 cat_reserve = 1;
505
506 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK);
507
508 if (cp->c_blocks > 0) {
509 printf("hfs_inactive: deleting non-empty%sfile %d, "
510 "blks %d\n", VNODE_IS_RSRC(vp) ? " rsrc " : " ",
511 (int)cp->c_fileid, (int)cp->c_blocks);
512 }
513
514 //
515 // release the name pointer in the descriptor so that
516 // cat_delete() will use the file-id to do the deletion.
517 // in the case of hard links this is imperative (in the
518 // case of regular files the fileid and cnid are the
519 // same so it doesn't matter).
520 //
521 cat_releasedesc(&cp->c_desc);
522
523 /*
524 * The descriptor name may be zero,
525 * in which case the fileid is used.
526 */
527 error = cat_delete(hfsmp, &cp->c_desc, &cp->c_attr);
528
529 if (error && truncated && (error != ENXIO))
530 printf("hfs_inactive: couldn't delete a truncated file!");
531
532 /* Update HFS Private Data dir */
533 if (error == 0) {
534 hfsmp->hfs_private_attr[FILE_HARDLINKS].ca_entries--;
535 if (vnode_isdir(vp)) {
536 DEC_FOLDERCOUNT(hfsmp, hfsmp->hfs_private_attr[FILE_HARDLINKS]);
537 }
538 (void)cat_update(hfsmp, &hfsmp->hfs_private_desc[FILE_HARDLINKS],
539 &hfsmp->hfs_private_attr[FILE_HARDLINKS], NULL, NULL);
540 }
541
542 hfs_systemfile_unlock(hfsmp, lockflags);
543
544 if (error) {
545 goto out;
546 }
547
548 #if QUOTA
549 if (hfsmp->hfs_flags & HFS_QUOTAS)
550 (void)hfs_chkiq(cp, -1, NOCRED, 0);
551 #endif /* QUOTA */
552
553 /* Already set C_NOEXISTS at the beginning of this block */
554 cp->c_flag &= ~C_DELETED;
555 cp->c_touch_chgtime = TRUE;
556 cp->c_touch_modtime = TRUE;
557
558 if (error == 0)
559 hfs_volupdate(hfsmp, (v_type == VDIR) ? VOL_RMDIR : VOL_RMFILE, 0);
560 }
561
562 /*
563 * A file may have had delayed allocations, in which case hfs_update
564 * would not have updated the catalog record (cat_update). We need
565 * to do that now, before we lose our fork data. We also need to
566 * force the update, or hfs_update will again skip the cat_update.
567 *
568 * If the file has C_NOEXISTS set, then we can skip the hfs_update call
569 * because the catalog entry has already been removed. There would be no point
570 * to looking up the entry in the catalog to modify it when we already know it's gone
571 */
572 if ((!ISSET(cp->c_flag, C_NOEXISTS)) &&
573 ((cp->c_flag & C_MODIFIED) || cp->c_touch_acctime ||
574 cp->c_touch_chgtime || cp->c_touch_modtime)) {
575
576 if ((cp->c_flag & C_MODIFIED) || cp->c_touch_modtime){
577 cp->c_flag |= C_FORCEUPDATE;
578 }
579 hfs_update(vp, 0);
580 }
581
582 out:
583 if (cat_reserve)
584 cat_postflight(hfsmp, &cookie, p);
585
586 // XXXdbg - have to do this because a goto could have come here
587 if (started_tr) {
588 hfs_end_transaction(hfsmp);
589 started_tr = 0;
590 }
591
592 #if 0
593 #if CONFIG_PROTECT
594 /*
595 * cnode truncate lock and cnode lock are both held exclusive here.
596 *
597 * Go ahead and flush the keys out if this cnode is the last fork
598 * and it is not class F. Class F keys should not be purged because they only
599 * exist in memory and have no persistent keys. Only do this
600 * if we haven't already done it yet (maybe a vnode skipped inactive
601 * and went straight to reclaim). This function gets called from both reclaim and
602 * inactive, so it will happen first in inactive if possible.
603 *
604 * We need to be mindful that all pending IO for this file has already been
605 * issued and completed before we bzero out the key. This is because
606 * if it isn't, tossing the key here could result in garbage IO being
607 * written (by using the bzero'd key) if the writes are happening asynchronously.
608 *
609 * In addition, class A files may have already been purged due to the
610 * lock event occurring.
611 */
612 if (forkcount == 1) {
613 struct cprotect *entry = cp->c_cpentry;
614 if ((entry) && (entry->cp_pclass != PROTECTION_CLASS_F)) {
615 if ((cp->c_cpentry->cp_flags & CP_KEY_FLUSHED) == 0) {
616 cp->c_cpentry->cp_flags |= CP_KEY_FLUSHED;
617 bzero (cp->c_cpentry->cp_cache_key, cp->c_cpentry->cp_cache_key_len);
618 bzero (cp->c_cpentry->cp_cache_iv_ctx, sizeof(aes_encrypt_ctx));
619 }
620 }
621 }
622 #endif
623 #endif
624
625 return error;
626 }
627
628
629 /*
630 * hfs_vnop_inactive
631 *
632 * The last usecount on the vnode has gone away, so we need to tear down
633 * any remaining data still residing in the cnode. If necessary, write out
634 * remaining blocks or delete the cnode's entry in the catalog.
635 */
636 int
637 hfs_vnop_inactive(struct vnop_inactive_args *ap)
638 {
639 struct vnode *vp = ap->a_vp;
640 struct cnode *cp;
641 struct hfsmount *hfsmp = VTOHFS(vp);
642 struct proc *p = vfs_context_proc(ap->a_context);
643 int error = 0;
644 int took_trunc_lock = 0;
645 enum vtype v_type;
646
647 v_type = vnode_vtype(vp);
648 cp = VTOC(vp);
649
650 if ((hfsmp->hfs_flags & HFS_READ_ONLY) || vnode_issystem(vp) ||
651 (hfsmp->hfs_freezing_proc == p)) {
652 error = 0;
653 goto inactive_done;
654 }
655
656 /*
657 * For safety, do NOT call vnode_recycle from inside this function. This can cause
658 * problems in the following scenario:
659 *
660 * vnode_create -> vnode_reclaim_internal -> vclean -> VNOP_INACTIVE
661 *
662 * If we're being invoked as a result of a reclaim that was already in-flight, then we
663 * cannot call vnode_recycle again. Being in reclaim means that there are no usecounts or
664 * iocounts by definition. As a result, if we were to call vnode_recycle, it would immediately
665 * try to re-enter reclaim again and panic.
666 *
667 * Currently, there are three things that can cause us (VNOP_INACTIVE) to get called.
668 * 1) last usecount goes away on the vnode (vnode_rele)
669 * 2) last iocount goes away on a vnode that previously had usecounts but didn't have
670 * vnode_recycle called (vnode_put)
671 * 3) vclean by way of reclaim
672 *
673 * In this function we would generally want to call vnode_recycle to speed things
674 * along to ensure that we don't leak blocks due to open-unlinked files. However, by
675 * virtue of being in this function already, we can call hfs_cnode_teardown, which
676 * will release blocks held by open-unlinked files, and mark them C_NOEXISTS so that
677 * there's no entry in the catalog and no backing store anymore. If that's the case,
678 * then we really don't care all that much when the vnode actually goes through reclaim.
679 * Further, the HFS VNOPs that manipulated the namespace in order to create the open-
680 * unlinked file in the first place should have already called vnode_recycle on the vnode
681 * to guarantee that it would go through reclaim in a speedy way.
682 */
683
684 if (cp->c_flag & C_NOEXISTS) {
685 /*
686 * If the cnode has already had its cat entry removed, then
687 * just skip to the end. We don't need to do anything here.
688 */
689 error = 0;
690 goto inactive_done;
691 }
692
693 if ((v_type == VREG || v_type == VLNK)) {
694 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK);
695 took_trunc_lock = 1;
696 }
697
698 (void) hfs_lock(cp, HFS_FORCE_LOCK);
699
700 /*
701 * Call cnode_teardown to push out dirty blocks to disk, release open-unlinked
702 * files' blocks from being in use, and move the cnode from C_DELETED to C_NOEXISTS.
703 */
704 error = hfs_cnode_teardown (vp, ap->a_context, 0);
705
706 /*
707 * Drop the truncate lock before unlocking the cnode
708 * (which can potentially perform a vnode_put and
709 * recycle the vnode which in turn might require the
710 * truncate lock)
711 */
712 if (took_trunc_lock) {
713 hfs_unlock_truncate(cp, 0);
714 }
715
716 hfs_unlock(cp);
717
718 inactive_done:
719
720 return error;
721 }
722
723
724 /*
725 * File clean-up (zero fill and shrink peof).
726 */
727
728 int
729 hfs_filedone(struct vnode *vp, vfs_context_t context)
730 {
731 struct cnode *cp;
732 struct filefork *fp;
733 struct hfsmount *hfsmp;
734 struct rl_entry *invalid_range;
735 off_t leof;
736 u_int32_t blks, blocksize;
737 /* flags for zero-filling sparse ranges */
738 int cluster_flags = IO_CLOSE;
739 int cluster_zero_flags = IO_HEADZEROFILL | IO_NOZERODIRTY | IO_NOCACHE;
740
741 cp = VTOC(vp);
742 fp = VTOF(vp);
743 hfsmp = VTOHFS(vp);
744 leof = fp->ff_size;
745
746 if ((hfsmp->hfs_flags & HFS_READ_ONLY) || (fp->ff_blocks == 0))
747 return (0);
748
749 #if CONFIG_PROTECT
750 /*
751 * Figure out if we need to do synchronous IO.
752 *
753 * If the file represents a content-protected file, we may need
754 * to issue synchronous IO when we dispatch to the cluster layer.
755 * If we didn't, then the IO would go out to the disk asynchronously.
756 * If the vnode hits the end of inactive before getting reclaimed, the
757 * content protection keys would be wiped/bzeroed out, and we'd end up
758 * trying to issue the IO with an invalid key. This will lead to file
759 * corruption. IO_SYNC will force the cluster_push to wait until all IOs
760 * have completed (though they may be in the track cache).
761 */
762 if (cp_fs_protected(VTOVFS(vp))) {
763 cluster_flags |= IO_SYNC;
764 cluster_zero_flags |= IO_SYNC;
765 }
766 #endif
767
768 /*
769 * If we are being invoked from F_SWAPDATAEXTENTS, then we
770 * need to issue synchronous IO; Unless we are sure that all
771 * of the data has been written to the disk, we won't know
772 * that all of the blocks have been allocated properly.
773 */
774 if (cp->c_flag & C_SWAPINPROGRESS) {
775 cluster_flags |= IO_SYNC;
776 }
777
778 hfs_unlock(cp);
779 (void) cluster_push(vp, cluster_flags);
780 hfs_lock(cp, HFS_FORCE_LOCK);
781
782 /*
783 * Explicitly zero out the areas of file
784 * that are currently marked invalid.
785 */
786 while ((invalid_range = TAILQ_FIRST(&fp->ff_invalidranges))) {
787 off_t start = invalid_range->rl_start;
788 off_t end = invalid_range->rl_end;
789
790 /* The range about to be written must be validated
791 * first, so that VNOP_BLOCKMAP() will return the
792 * appropriate mapping for the cluster code:
793 */
794 rl_remove(start, end, &fp->ff_invalidranges);
795
796 hfs_unlock(cp);
797 (void) cluster_write(vp, (struct uio *) 0,
798 leof, end + 1, start, (off_t)0, cluster_zero_flags);
799 hfs_lock(cp, HFS_FORCE_LOCK);
800 cp->c_flag |= C_MODIFIED;
801 }
802 cp->c_flag &= ~C_ZFWANTSYNC;
803 cp->c_zftimeout = 0;
804 blocksize = VTOVCB(vp)->blockSize;
805 blks = leof / blocksize;
806 if (((off_t)blks * (off_t)blocksize) != leof)
807 blks++;
808 /*
809 * Shrink the peof to the smallest size neccessary to contain the leof.
810 */
811 if (blks < fp->ff_blocks) {
812 (void) hfs_truncate(vp, leof, IO_NDELAY, 0, 0, context);
813 }
814
815 hfs_unlock(cp);
816 (void) cluster_push(vp, cluster_flags);
817 hfs_lock(cp, HFS_FORCE_LOCK);
818
819 /*
820 * If the hfs_truncate didn't happen to flush the vnode's
821 * information out to disk, force it to be updated now that
822 * all invalid ranges have been zero-filled and validated:
823 */
824 if (cp->c_flag & C_MODIFIED) {
825 hfs_update(vp, 0);
826 }
827 return (0);
828 }
829
830
831 /*
832 * Reclaim a cnode so that it can be used for other purposes.
833 */
834 int
835 hfs_vnop_reclaim(struct vnop_reclaim_args *ap)
836 {
837 struct vnode *vp = ap->a_vp;
838 struct cnode *cp;
839 struct filefork *fp = NULL;
840 struct filefork *altfp = NULL;
841 struct hfsmount *hfsmp = VTOHFS(vp);
842 vfs_context_t ctx = ap->a_context;
843 int reclaim_cnode = 0;
844 int err = 0;
845 enum vtype v_type;
846
847 v_type = vnode_vtype(vp);
848 cp = VTOC(vp);
849
850 /*
851 * We don't take the truncate lock since by the time reclaim comes along,
852 * all dirty pages have been synced and nobody should be competing
853 * with us for this thread.
854 */
855 (void) hfs_lock (cp, HFS_FORCE_LOCK);
856
857 /*
858 * Sync to disk any remaining data in the cnode/vnode. This includes
859 * a call to hfs_update if the cnode has outbound data.
860 *
861 * If C_NOEXISTS is set on the cnode, then there's nothing teardown needs to do
862 * because the catalog entry for this cnode is already gone.
863 */
864 if (!ISSET(cp->c_flag, C_NOEXISTS)) {
865 err = hfs_cnode_teardown(vp, ctx, 1);
866 }
867
868 /*
869 * Keep track of an inactive hot file.
870 */
871 if (!vnode_isdir(vp) &&
872 !vnode_issystem(vp) &&
873 !(cp->c_flag & (C_DELETED | C_NOEXISTS)) ) {
874 (void) hfs_addhotfile(vp);
875 }
876 vnode_removefsref(vp);
877
878 /*
879 * Find file fork for this vnode (if any)
880 * Also check if another fork is active
881 */
882 if (cp->c_vp == vp) {
883 fp = cp->c_datafork;
884 altfp = cp->c_rsrcfork;
885
886 cp->c_datafork = NULL;
887 cp->c_vp = NULL;
888 } else if (cp->c_rsrc_vp == vp) {
889 fp = cp->c_rsrcfork;
890 altfp = cp->c_datafork;
891
892 cp->c_rsrcfork = NULL;
893 cp->c_rsrc_vp = NULL;
894 } else {
895 panic("hfs_vnop_reclaim: vp points to wrong cnode (vp=%p cp->c_vp=%p cp->c_rsrc_vp=%p)\n", vp, cp->c_vp, cp->c_rsrc_vp);
896 }
897 /*
898 * On the last fork, remove the cnode from its hash chain.
899 */
900 if (altfp == NULL) {
901 /* If we can't remove it then the cnode must persist! */
902 if (hfs_chashremove(hfsmp, cp) == 0)
903 reclaim_cnode = 1;
904 /*
905 * Remove any directory hints
906 */
907 if (vnode_isdir(vp)) {
908 hfs_reldirhints(cp, 0);
909 }
910
911 if(cp->c_flag & C_HARDLINK) {
912 hfs_relorigins(cp);
913 }
914 }
915 /* Release the file fork and related data */
916 if (fp) {
917 /* Dump cached symlink data */
918 if (vnode_islnk(vp) && (fp->ff_symlinkptr != NULL)) {
919 FREE(fp->ff_symlinkptr, M_TEMP);
920 }
921 FREE_ZONE(fp, sizeof(struct filefork), M_HFSFORK);
922 }
923
924 /*
925 * If there was only one active fork then we can release the cnode.
926 */
927 if (reclaim_cnode) {
928 hfs_chashwakeup(hfsmp, cp, H_ALLOC | H_TRANSIT);
929 hfs_reclaim_cnode(cp);
930 }
931 else {
932 /*
933 * cnode in use. If it is a directory, it could have
934 * no live forks. Just release the lock.
935 */
936 hfs_unlock(cp);
937 }
938
939 vnode_clearfsnode(vp);
940 return (0);
941 }
942
943
944 extern int (**hfs_vnodeop_p) (void *);
945 extern int (**hfs_std_vnodeop_p) (void *);
946 extern int (**hfs_specop_p) (void *);
947 #if FIFO
948 extern int (**hfs_fifoop_p) (void *);
949 #endif
950
951 /*
952 * hfs_getnewvnode - get new default vnode
953 *
954 * The vnode is returned with an iocount and the cnode locked
955 */
956 int
957 hfs_getnewvnode(
958 struct hfsmount *hfsmp,
959 struct vnode *dvp,
960 struct componentname *cnp,
961 struct cat_desc *descp,
962 int flags,
963 struct cat_attr *attrp,
964 struct cat_fork *forkp,
965 struct vnode **vpp,
966 int *out_flags)
967 {
968 struct mount *mp = HFSTOVFS(hfsmp);
969 struct vnode *vp = NULL;
970 struct vnode **cvpp;
971 struct vnode *tvp = NULLVP;
972 struct cnode *cp = NULL;
973 struct filefork *fp = NULL;
974 int hfs_standard = 0;
975 int retval;
976 int issystemfile;
977 int wantrsrc;
978 int hflags = 0;
979 struct vnode_fsparam vfsp;
980 enum vtype vtype;
981 #if QUOTA
982 int i;
983 #endif /* QUOTA */
984
985 hfs_standard = (hfsmp->hfs_flags & HFS_STANDARD);
986
987 if (attrp->ca_fileid == 0) {
988 *vpp = NULL;
989 return (ENOENT);
990 }
991
992 #if !FIFO
993 if (IFTOVT(attrp->ca_mode) == VFIFO) {
994 *vpp = NULL;
995 return (ENOTSUP);
996 }
997 #endif /* !FIFO */
998 vtype = IFTOVT(attrp->ca_mode);
999 issystemfile = (descp->cd_flags & CD_ISMETA) && (vtype == VREG);
1000 wantrsrc = flags & GNV_WANTRSRC;
1001
1002 /* Zero out the out_flags */
1003 *out_flags = 0;
1004
1005 #ifdef HFS_CHECK_LOCK_ORDER
1006 /*
1007 * The only case were its permissible to hold the parent cnode
1008 * lock is during a create operation (hfs_makenode) or when
1009 * we don't need the cnode lock (GNV_SKIPLOCK).
1010 */
1011 if ((dvp != NULL) &&
1012 (flags & (GNV_CREATE | GNV_SKIPLOCK)) == 0 &&
1013 VTOC(dvp)->c_lockowner == current_thread()) {
1014 panic("hfs_getnewvnode: unexpected hold of parent cnode %p", VTOC(dvp));
1015 }
1016 #endif /* HFS_CHECK_LOCK_ORDER */
1017
1018 /*
1019 * Get a cnode (new or existing)
1020 */
1021 cp = hfs_chash_getcnode(hfsmp, attrp->ca_fileid, vpp, wantrsrc,
1022 (flags & GNV_SKIPLOCK), out_flags, &hflags);
1023
1024 /*
1025 * If the id is no longer valid for lookups we'll get back a NULL cp.
1026 */
1027 if (cp == NULL) {
1028 return (ENOENT);
1029 }
1030
1031 /*
1032 * If we get a cnode/vnode pair out of hfs_chash_getcnode, then update the
1033 * descriptor in the cnode as needed if the cnode represents a hardlink.
1034 * We want the caller to get the most up-to-date copy of the descriptor
1035 * as possible. However, we only do anything here if there was a valid vnode.
1036 * If there isn't a vnode, then the cnode is brand new and needs to be initialized
1037 * as it doesn't have a descriptor or cat_attr yet.
1038 *
1039 * If we are about to replace the descriptor with the user-supplied one, then validate
1040 * that the descriptor correctly acknowledges this item is a hardlink. We could be
1041 * subject to a race where the calling thread invoked cat_lookup, got a valid lookup
1042 * result but the file was not yet a hardlink. With sufficient delay between there
1043 * and here, we might accidentally copy in the raw inode ID into the descriptor in the
1044 * call below. If the descriptor's CNID is the same as the fileID then it must
1045 * not yet have been a hardlink when the lookup occurred.
1046 */
1047
1048 if (!(hfs_checkdeleted(cp))) {
1049 if ((cp->c_flag & C_HARDLINK) && descp->cd_nameptr && descp->cd_namelen > 0) {
1050 /* If cnode is uninitialized, its c_attr will be zeroed out; cnids wont match. */
1051 if ((descp->cd_cnid == cp->c_attr.ca_fileid) &&
1052 (attrp->ca_linkcount != cp->c_attr.ca_linkcount)){
1053 if ((flags & GNV_SKIPLOCK) == 0) {
1054 /*
1055 * Then we took the lock. Drop it before calling
1056 * vnode_put, which may invoke hfs_vnop_inactive and need to take
1057 * the cnode lock again.
1058 */
1059 hfs_unlock(cp);
1060 }
1061
1062 /*
1063 * Emit ERECYCLE and GNV_CAT_ATTRCHANGED to
1064 * force a re-drive in the lookup routine.
1065 * Drop the iocount on the vnode obtained from
1066 * chash_getcnode if needed.
1067 */
1068 if (*vpp != NULL) {
1069 vnode_put (*vpp);
1070 *vpp = NULL;
1071 }
1072
1073 /*
1074 * If we raced with VNOP_RECLAIM for this vnode, the hash code could
1075 * have observed it after the c_vp or c_rsrc_vp fields had been torn down;
1076 * the hash code peeks at those fields without holding the cnode lock because
1077 * it needs to be fast. As a result, we may have set H_ATTACH in the chash
1078 * call above. Since we're bailing out, unset whatever flags we just set, and
1079 * wake up all waiters for this cnode.
1080 */
1081 if (hflags) {
1082 hfs_chashwakeup(hfsmp, cp, hflags);
1083 }
1084
1085 *out_flags = GNV_CAT_ATTRCHANGED;
1086 return ERECYCLE;
1087 }
1088 else {
1089 /*
1090 * Otherwise, CNID != fileid. Go ahead and copy in the new descriptor.
1091 *
1092 * Replacing the descriptor here is fine because we looked up the item without
1093 * a vnode in hand before. If a vnode existed, its identity must be attached to this
1094 * item. We are not susceptible to the lookup fastpath issue at this point.
1095 */
1096 replace_desc(cp, descp);
1097 }
1098 }
1099 }
1100
1101 /* Check if we found a matching vnode */
1102 if (*vpp != NULL) {
1103 return (0);
1104 }
1105
1106 /*
1107 * If this is a new cnode then initialize it.
1108 */
1109 if (ISSET(cp->c_hflag, H_ALLOC)) {
1110 lck_rw_init(&cp->c_truncatelock, hfs_rwlock_group, hfs_lock_attr);
1111 #if HFS_COMPRESSION
1112 cp->c_decmp = NULL;
1113 #endif
1114
1115 /* Make sure its still valid (ie exists on disk). */
1116 if (!(flags & GNV_CREATE)) {
1117 int error = 0;
1118 if (!hfs_valid_cnode (hfsmp, dvp, (wantrsrc ? NULL : cnp), cp->c_fileid, attrp, &error)) {
1119 hfs_chash_abort(hfsmp, cp);
1120 hfs_reclaim_cnode(cp);
1121 *vpp = NULL;
1122 /*
1123 * If we hit this case, that means that the entry was there in the catalog when
1124 * we did a cat_lookup earlier. Think hfs_lookup. However, in between the time
1125 * that we checked the catalog and the time we went to get a vnode/cnode for it,
1126 * it had been removed from the namespace and the vnode totally reclaimed. As a result,
1127 * it's not there in the catalog during the check in hfs_valid_cnode and we bubble out
1128 * an ENOENT. To indicate to the caller that they should really double-check the
1129 * entry (it could have been renamed over and gotten a new fileid), we mark a bit
1130 * in the output flags.
1131 */
1132 if (error == ENOENT) {
1133 *out_flags = GNV_CAT_DELETED;
1134 return ENOENT;
1135 }
1136
1137 /*
1138 * Also, we need to protect the cat_attr acquired during hfs_lookup and passed into
1139 * this function as an argument because the catalog may have changed w.r.t hardlink
1140 * link counts and the firstlink field. If that validation check fails, then let
1141 * lookup re-drive itself to get valid/consistent data with the same failure condition below.
1142 */
1143 if (error == ERECYCLE) {
1144 *out_flags = GNV_CAT_ATTRCHANGED;
1145 return (ERECYCLE);
1146 }
1147 }
1148 }
1149 bcopy(attrp, &cp->c_attr, sizeof(struct cat_attr));
1150 bcopy(descp, &cp->c_desc, sizeof(struct cat_desc));
1151
1152 /* The name was inherited so clear descriptor state... */
1153 descp->cd_namelen = 0;
1154 descp->cd_nameptr = NULL;
1155 descp->cd_flags &= ~CD_HASBUF;
1156
1157 /* Tag hardlinks */
1158 if ((vtype == VREG || vtype == VDIR) &&
1159 ((descp->cd_cnid != attrp->ca_fileid) ||
1160 (attrp->ca_recflags & kHFSHasLinkChainMask))) {
1161 cp->c_flag |= C_HARDLINK;
1162 }
1163 /*
1164 * Fix-up dir link counts.
1165 *
1166 * Earlier versions of Leopard used ca_linkcount for posix
1167 * nlink support (effectively the sub-directory count + 2).
1168 * That is now accomplished using the ca_dircount field with
1169 * the corresponding kHFSHasFolderCountMask flag.
1170 *
1171 * For directories the ca_linkcount is the true link count,
1172 * tracking the number of actual hardlinks to a directory.
1173 *
1174 * We only do this if the mount has HFS_FOLDERCOUNT set;
1175 * at the moment, we only set that for HFSX volumes.
1176 */
1177 if ((hfsmp->hfs_flags & HFS_FOLDERCOUNT) &&
1178 (vtype == VDIR) &&
1179 !(attrp->ca_recflags & kHFSHasFolderCountMask) &&
1180 (cp->c_attr.ca_linkcount > 1)) {
1181 if (cp->c_attr.ca_entries == 0)
1182 cp->c_attr.ca_dircount = 0;
1183 else
1184 cp->c_attr.ca_dircount = cp->c_attr.ca_linkcount - 2;
1185
1186 cp->c_attr.ca_linkcount = 1;
1187 cp->c_attr.ca_recflags |= kHFSHasFolderCountMask;
1188 if ( !(hfsmp->hfs_flags & HFS_READ_ONLY) )
1189 cp->c_flag |= C_MODIFIED;
1190 }
1191 #if QUOTA
1192 if (hfsmp->hfs_flags & HFS_QUOTAS) {
1193 for (i = 0; i < MAXQUOTAS; i++)
1194 cp->c_dquot[i] = NODQUOT;
1195 }
1196 #endif /* QUOTA */
1197 /* Mark the output flag that we're vending a new cnode */
1198 *out_flags |= GNV_NEW_CNODE;
1199 }
1200
1201 if (vtype == VDIR) {
1202 if (cp->c_vp != NULL)
1203 panic("hfs_getnewvnode: orphaned vnode (data)");
1204 cvpp = &cp->c_vp;
1205 } else {
1206 if (forkp && attrp->ca_blocks < forkp->cf_blocks)
1207 panic("hfs_getnewvnode: bad ca_blocks (too small)");
1208 /*
1209 * Allocate and initialize a file fork...
1210 */
1211 MALLOC_ZONE(fp, struct filefork *, sizeof(struct filefork),
1212 M_HFSFORK, M_WAITOK);
1213 fp->ff_cp = cp;
1214 if (forkp)
1215 bcopy(forkp, &fp->ff_data, sizeof(struct cat_fork));
1216 else
1217 bzero(&fp->ff_data, sizeof(struct cat_fork));
1218 rl_init(&fp->ff_invalidranges);
1219 fp->ff_sysfileinfo = 0;
1220
1221 if (wantrsrc) {
1222 if (cp->c_rsrcfork != NULL)
1223 panic("hfs_getnewvnode: orphaned rsrc fork");
1224 if (cp->c_rsrc_vp != NULL)
1225 panic("hfs_getnewvnode: orphaned vnode (rsrc)");
1226 cp->c_rsrcfork = fp;
1227 cvpp = &cp->c_rsrc_vp;
1228 if ( (tvp = cp->c_vp) != NULLVP )
1229 cp->c_flag |= C_NEED_DVNODE_PUT;
1230 } else {
1231 if (cp->c_datafork != NULL)
1232 panic("hfs_getnewvnode: orphaned data fork");
1233 if (cp->c_vp != NULL)
1234 panic("hfs_getnewvnode: orphaned vnode (data)");
1235 cp->c_datafork = fp;
1236 cvpp = &cp->c_vp;
1237 if ( (tvp = cp->c_rsrc_vp) != NULLVP)
1238 cp->c_flag |= C_NEED_RVNODE_PUT;
1239 }
1240 }
1241 if (tvp != NULLVP) {
1242 /*
1243 * grab an iocount on the vnode we weren't
1244 * interested in (i.e. we want the resource fork
1245 * but the cnode already has the data fork)
1246 * to prevent it from being
1247 * recycled by us when we call vnode_create
1248 * which will result in a deadlock when we
1249 * try to take the cnode lock in hfs_vnop_fsync or
1250 * hfs_vnop_reclaim... vnode_get can be called here
1251 * because we already hold the cnode lock which will
1252 * prevent the vnode from changing identity until
1253 * we drop it.. vnode_get will not block waiting for
1254 * a change of state... however, it will return an
1255 * error if the current iocount == 0 and we've already
1256 * started to terminate the vnode... we don't need/want to
1257 * grab an iocount in the case since we can't cause
1258 * the fileystem to be re-entered on this thread for this vp
1259 *
1260 * the matching vnode_put will happen in hfs_unlock
1261 * after we've dropped the cnode lock
1262 */
1263 if ( vnode_get(tvp) != 0)
1264 cp->c_flag &= ~(C_NEED_RVNODE_PUT | C_NEED_DVNODE_PUT);
1265 }
1266 vfsp.vnfs_mp = mp;
1267 vfsp.vnfs_vtype = vtype;
1268 vfsp.vnfs_str = "hfs";
1269 if ((cp->c_flag & C_HARDLINK) && (vtype == VDIR)) {
1270 vfsp.vnfs_dvp = NULL; /* no parent for me! */
1271 vfsp.vnfs_cnp = NULL; /* no name for me! */
1272 } else {
1273 vfsp.vnfs_dvp = dvp;
1274 vfsp.vnfs_cnp = cnp;
1275 }
1276 vfsp.vnfs_fsnode = cp;
1277
1278 /*
1279 * Special Case HFS Standard VNOPs from HFS+, since
1280 * HFS standard is readonly/deprecated as of 10.6
1281 */
1282
1283 #if FIFO
1284 if (vtype == VFIFO )
1285 vfsp.vnfs_vops = hfs_fifoop_p;
1286 else
1287 #endif
1288 if (vtype == VBLK || vtype == VCHR)
1289 vfsp.vnfs_vops = hfs_specop_p;
1290 else if (hfs_standard)
1291 vfsp.vnfs_vops = hfs_std_vnodeop_p;
1292 else
1293 vfsp.vnfs_vops = hfs_vnodeop_p;
1294
1295 if (vtype == VBLK || vtype == VCHR)
1296 vfsp.vnfs_rdev = attrp->ca_rdev;
1297 else
1298 vfsp.vnfs_rdev = 0;
1299
1300 if (forkp)
1301 vfsp.vnfs_filesize = forkp->cf_size;
1302 else
1303 vfsp.vnfs_filesize = 0;
1304
1305 vfsp.vnfs_flags = VNFS_ADDFSREF;
1306 if (dvp == NULLVP || cnp == NULL || !(cnp->cn_flags & MAKEENTRY) || (flags & GNV_NOCACHE))
1307 vfsp.vnfs_flags |= VNFS_NOCACHE;
1308
1309 /* Tag system files */
1310 vfsp.vnfs_marksystem = issystemfile;
1311
1312 /* Tag root directory */
1313 if (descp->cd_cnid == kHFSRootFolderID)
1314 vfsp.vnfs_markroot = 1;
1315 else
1316 vfsp.vnfs_markroot = 0;
1317
1318 if ((retval = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, cvpp))) {
1319 if (fp) {
1320 if (fp == cp->c_datafork)
1321 cp->c_datafork = NULL;
1322 else
1323 cp->c_rsrcfork = NULL;
1324
1325 FREE_ZONE(fp, sizeof(struct filefork), M_HFSFORK);
1326 }
1327 /*
1328 * If this is a newly created cnode or a vnode reclaim
1329 * occurred during the attachment, then cleanup the cnode.
1330 */
1331 if ((cp->c_vp == NULL) && (cp->c_rsrc_vp == NULL)) {
1332 hfs_chash_abort(hfsmp, cp);
1333 hfs_reclaim_cnode(cp);
1334 }
1335 else {
1336 hfs_chashwakeup(hfsmp, cp, H_ALLOC | H_ATTACH);
1337 if ((flags & GNV_SKIPLOCK) == 0){
1338 hfs_unlock(cp);
1339 }
1340 }
1341 *vpp = NULL;
1342 return (retval);
1343 }
1344 vp = *cvpp;
1345 vnode_settag(vp, VT_HFS);
1346 if (cp->c_flag & C_HARDLINK) {
1347 vnode_setmultipath(vp);
1348 }
1349 /*
1350 * Tag resource fork vnodes as needing an VNOP_INACTIVE
1351 * so that any deferred removes (open unlinked files)
1352 * have the chance to process the resource fork.
1353 */
1354 if (VNODE_IS_RSRC(vp)) {
1355 int err;
1356 KERNEL_DEBUG_CONSTANT((FSDBG_CODE(DBG_FSRW, 37)), cp->c_vp, cp->c_rsrc_vp, 0, 0, 0);
1357
1358 /* Force VL_NEEDINACTIVE on this vnode */
1359 err = vnode_ref(vp);
1360 if (err == 0) {
1361 vnode_rele(vp);
1362 }
1363 }
1364 hfs_chashwakeup(hfsmp, cp, H_ALLOC | H_ATTACH);
1365
1366 /*
1367 * Stop tracking an active hot file.
1368 */
1369 if (!(flags & GNV_CREATE) && (vtype != VDIR) && !issystemfile) {
1370 (void) hfs_removehotfile(vp);
1371 }
1372
1373 #if CONFIG_PROTECT
1374 /* Initialize the cp data structures. The key should be in place now. */
1375 if (!issystemfile && (*out_flags & GNV_NEW_CNODE)) {
1376 cp_entry_init(cp, mp);
1377 }
1378 #endif
1379
1380 *vpp = vp;
1381 return (0);
1382 }
1383
1384
1385 static void
1386 hfs_reclaim_cnode(struct cnode *cp)
1387 {
1388 #if QUOTA
1389 int i;
1390
1391 for (i = 0; i < MAXQUOTAS; i++) {
1392 if (cp->c_dquot[i] != NODQUOT) {
1393 dqreclaim(cp->c_dquot[i]);
1394 cp->c_dquot[i] = NODQUOT;
1395 }
1396 }
1397 #endif /* QUOTA */
1398
1399 /*
1400 * If the descriptor has a name then release it
1401 */
1402 if ((cp->c_desc.cd_flags & CD_HASBUF) && (cp->c_desc.cd_nameptr != 0)) {
1403 const char *nameptr;
1404
1405 nameptr = (const char *) cp->c_desc.cd_nameptr;
1406 cp->c_desc.cd_nameptr = 0;
1407 cp->c_desc.cd_flags &= ~CD_HASBUF;
1408 cp->c_desc.cd_namelen = 0;
1409 vfs_removename(nameptr);
1410 }
1411
1412 /*
1413 * We only call this function if we are in hfs_vnop_reclaim and
1414 * attempting to reclaim a cnode with only one live fork. Because the vnode
1415 * went through reclaim, any future attempts to use this item will have to
1416 * go through lookup again, which will need to create a new vnode. Thus,
1417 * destroying the locks below (while they were still held during our parent
1418 * function hfs_vnop_reclaim) is safe.
1419 */
1420
1421 lck_rw_destroy(&cp->c_rwlock, hfs_rwlock_group);
1422 lck_rw_destroy(&cp->c_truncatelock, hfs_rwlock_group);
1423 #if HFS_COMPRESSION
1424 if (cp->c_decmp) {
1425 decmpfs_cnode_destroy(cp->c_decmp);
1426 FREE_ZONE(cp->c_decmp, sizeof(*(cp->c_decmp)), M_DECMPFS_CNODE);
1427 }
1428 #endif
1429 #if CONFIG_PROTECT
1430 cp_entry_destroy(&cp->c_cpentry);
1431 #endif
1432
1433
1434 bzero(cp, sizeof(struct cnode));
1435 FREE_ZONE(cp, sizeof(struct cnode), M_HFSNODE);
1436 }
1437
1438
1439 /*
1440 * hfs_valid_cnode
1441 *
1442 * This function is used to validate data that is stored in-core against what is contained
1443 * in the catalog. Common uses include validating that the parent-child relationship still exist
1444 * for a specific directory entry (guaranteeing it has not been renamed into a different spot) at
1445 * the point of the check.
1446 */
1447 int
1448 hfs_valid_cnode(struct hfsmount *hfsmp, struct vnode *dvp, struct componentname *cnp,
1449 cnid_t cnid, struct cat_attr *cattr, int *error)
1450 {
1451 struct cat_attr attr;
1452 struct cat_desc cndesc;
1453 int stillvalid = 0;
1454 int lockflags;
1455
1456 /* System files are always valid */
1457 if (cnid < kHFSFirstUserCatalogNodeID) {
1458 *error = 0;
1459 return (1);
1460 }
1461
1462 /* XXX optimization: check write count in dvp */
1463
1464 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
1465
1466 if (dvp && cnp) {
1467 int lookup = 0;
1468 struct cat_fork fork;
1469 bzero(&cndesc, sizeof(cndesc));
1470 cndesc.cd_nameptr = (const u_int8_t *)cnp->cn_nameptr;
1471 cndesc.cd_namelen = cnp->cn_namelen;
1472 cndesc.cd_parentcnid = VTOC(dvp)->c_fileid;
1473 cndesc.cd_hint = VTOC(dvp)->c_childhint;
1474
1475 /*
1476 * We have to be careful when calling cat_lookup. The result argument
1477 * 'attr' may get different results based on whether or not you ask
1478 * for the filefork to be supplied as output. This is because cat_lookupbykey
1479 * will attempt to do basic validation/smoke tests against the resident
1480 * extents if there are no overflow extent records, but it needs someplace
1481 * in memory to store the on-disk fork structures.
1482 *
1483 * Since hfs_lookup calls cat_lookup with a filefork argument, we should
1484 * do the same here, to verify that block count differences are not
1485 * due to calling the function with different styles. cat_lookupbykey
1486 * will request the volume be fsck'd if there is true on-disk corruption
1487 * where the number of blocks does not match the number generated by
1488 * summing the number of blocks in the resident extents.
1489 */
1490
1491 lookup = cat_lookup (hfsmp, &cndesc, 0, NULL, &attr, &fork, NULL);
1492
1493 if ((lookup == 0) && (cnid == attr.ca_fileid)) {
1494 stillvalid = 1;
1495 *error = 0;
1496 }
1497 else {
1498 *error = ENOENT;
1499 }
1500
1501 /*
1502 * In hfs_getnewvnode, we may encounter a time-of-check vs. time-of-vnode creation
1503 * race. Specifically, if there is no vnode/cnode pair for the directory entry
1504 * being looked up, we have to go to the catalog. But since we don't hold any locks (aside
1505 * from the dvp in 'shared' mode) there is nothing to protect us against the catalog record
1506 * changing in between the time we do the cat_lookup there and the time we re-grab the
1507 * catalog lock above to do another cat_lookup.
1508 *
1509 * However, we need to check more than just the CNID and parent-child name relationships above.
1510 * Hardlinks can suffer the same race in the following scenario: Suppose we do a
1511 * cat_lookup, and find a leaf record and a raw inode for a hardlink. Now, we have
1512 * the cat_attr in hand (passed in above). But in between then and now, the vnode was
1513 * created by a competing hfs_getnewvnode call, and is manipulated and reclaimed before we get
1514 * a chance to do anything. This is possible if there are a lot of threads thrashing around
1515 * with the cnode hash. In this case, if we don't check/validate the cat_attr in-hand, we will
1516 * blindly stuff it into the cnode, which will make the in-core data inconsistent with what is
1517 * on disk. So validate the cat_attr below, if required. This race cannot happen if the cnode/vnode
1518 * already exists, as it does in the case of rename and delete.
1519 */
1520 if (stillvalid && cattr != NULL) {
1521 if (cattr->ca_linkcount != attr.ca_linkcount) {
1522 stillvalid = 0;
1523 *error = ERECYCLE;
1524 goto notvalid;
1525 }
1526
1527 if (cattr->ca_union1.cau_linkref != attr.ca_union1.cau_linkref) {
1528 stillvalid = 0;
1529 *error = ERECYCLE;
1530 goto notvalid;
1531 }
1532
1533 if (cattr->ca_union3.cau_firstlink != attr.ca_union3.cau_firstlink) {
1534 stillvalid = 0;
1535 *error = ERECYCLE;
1536 goto notvalid;
1537 }
1538
1539 if (cattr->ca_union2.cau_blocks != attr.ca_union2.cau_blocks) {
1540 stillvalid = 0;
1541 *error = ERECYCLE;
1542 goto notvalid;
1543 }
1544 }
1545 } else {
1546 if (cat_idlookup(hfsmp, cnid, 0, NULL, NULL, NULL) == 0) {
1547 stillvalid = 1;
1548 *error = 0;
1549 }
1550 else {
1551 *error = ENOENT;
1552 }
1553 }
1554 notvalid:
1555 hfs_systemfile_unlock(hfsmp, lockflags);
1556
1557 return (stillvalid);
1558 }
1559
1560
1561 /*
1562 * Per HI and Finder requirements, HFS should add in the
1563 * date/time that a particular directory entry was added
1564 * to the containing directory.
1565 * This is stored in the extended Finder Info for the
1566 * item in question.
1567 *
1568 * Note that this field is also set explicitly in the hfs_vnop_setxattr code.
1569 * We must ignore user attempts to set this part of the finderinfo, and
1570 * so we need to save a local copy of the date added, write in the user
1571 * finderinfo, then stuff the value back in.
1572 */
1573 void hfs_write_dateadded (struct cat_attr *attrp, u_int32_t dateadded) {
1574 u_int8_t *finfo = NULL;
1575
1576 /* overlay the FinderInfo to the correct pointer, and advance */
1577 finfo = (u_int8_t*)attrp->ca_finderinfo;
1578 finfo = finfo + 16;
1579
1580 /*
1581 * Make sure to write it out as big endian, since that's how
1582 * finder info is defined.
1583 *
1584 * NOTE: This is a Unix-epoch timestamp, not a HFS/Traditional Mac timestamp.
1585 */
1586 if (S_ISREG(attrp->ca_mode)) {
1587 struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo;
1588 extinfo->date_added = OSSwapHostToBigInt32(dateadded);
1589 attrp->ca_recflags |= kHFSHasDateAddedMask;
1590 }
1591 else if (S_ISDIR(attrp->ca_mode)) {
1592 struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)finfo;
1593 extinfo->date_added = OSSwapHostToBigInt32(dateadded);
1594 attrp->ca_recflags |= kHFSHasDateAddedMask;
1595 }
1596 /* If it were neither directory/file, then we'd bail out */
1597 return;
1598 }
1599
1600
1601 u_int32_t hfs_get_dateadded (struct cnode *cp) {
1602 u_int8_t *finfo = NULL;
1603 u_int32_t dateadded = 0;
1604
1605 if ((cp->c_attr.ca_recflags & kHFSHasDateAddedMask) == 0) {
1606 /* Date added was never set. Return 0. */
1607 return dateadded;
1608 }
1609
1610
1611 /* overlay the FinderInfo to the correct pointer, and advance */
1612 finfo = (u_int8_t*)cp->c_finderinfo;
1613 finfo = finfo + 16;
1614
1615 /*
1616 * FinderInfo is written out in big endian... make sure to convert it to host
1617 * native before we use it.
1618 */
1619 if (S_ISREG(cp->c_attr.ca_mode)) {
1620 struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo;
1621 dateadded = OSSwapBigToHostInt32 (extinfo->date_added);
1622 }
1623 else if (S_ISDIR(cp->c_attr.ca_mode)) {
1624 struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)finfo;
1625 dateadded = OSSwapBigToHostInt32 (extinfo->date_added);
1626 }
1627
1628 return dateadded;
1629 }
1630
1631 /*
1632 * Touch cnode times based on c_touch_xxx flags
1633 *
1634 * cnode must be locked exclusive
1635 *
1636 * This will also update the volume modify time
1637 */
1638 void
1639 hfs_touchtimes(struct hfsmount *hfsmp, struct cnode* cp)
1640 {
1641 vfs_context_t ctx;
1642 /* don't modify times if volume is read-only */
1643 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1644 cp->c_touch_acctime = FALSE;
1645 cp->c_touch_chgtime = FALSE;
1646 cp->c_touch_modtime = FALSE;
1647 return;
1648 }
1649 else if (hfsmp->hfs_flags & HFS_STANDARD) {
1650 /* HFS Standard doesn't support access times */
1651 cp->c_touch_acctime = FALSE;
1652 }
1653
1654 ctx = vfs_context_current();
1655 /*
1656 * Skip access time updates if:
1657 * . MNT_NOATIME is set
1658 * . a file system freeze is in progress
1659 * . a file system resize is in progress
1660 * . the vnode associated with this cnode is marked for rapid aging
1661 */
1662 if (cp->c_touch_acctime) {
1663 if ((vfs_flags(hfsmp->hfs_mp) & MNT_NOATIME) ||
1664 (hfsmp->hfs_freezing_proc != NULL) ||
1665 (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) ||
1666 (cp->c_vp && ((vnode_israge(cp->c_vp) || (vfs_ctx_skipatime(ctx)))))) {
1667
1668 cp->c_touch_acctime = FALSE;
1669 }
1670 }
1671 if (cp->c_touch_acctime || cp->c_touch_chgtime ||
1672 cp->c_touch_modtime || (cp->c_flag & C_NEEDS_DATEADDED)) {
1673 struct timeval tv;
1674 int touchvol = 0;
1675
1676 microtime(&tv);
1677
1678 if (cp->c_touch_acctime) {
1679 cp->c_atime = tv.tv_sec;
1680 /*
1681 * When the access time is the only thing changing
1682 * then make sure its sufficiently newer before
1683 * committing it to disk.
1684 */
1685 if ((((u_int32_t)cp->c_atime - (u_int32_t)(cp)->c_attr.ca_atimeondisk) >
1686 ATIME_ONDISK_ACCURACY)) {
1687 cp->c_flag |= C_MODIFIED;
1688 }
1689 cp->c_touch_acctime = FALSE;
1690 }
1691 if (cp->c_touch_modtime) {
1692 cp->c_mtime = tv.tv_sec;
1693 cp->c_touch_modtime = FALSE;
1694 cp->c_flag |= C_MODIFIED;
1695 touchvol = 1;
1696 #if 1
1697 /*
1698 * HFS dates that WE set must be adjusted for DST
1699 */
1700 if ((hfsmp->hfs_flags & HFS_STANDARD) && gTimeZone.tz_dsttime) {
1701 cp->c_mtime += 3600;
1702 }
1703 #endif
1704 }
1705 if (cp->c_touch_chgtime) {
1706 cp->c_ctime = tv.tv_sec;
1707 cp->c_touch_chgtime = FALSE;
1708 cp->c_flag |= C_MODIFIED;
1709 touchvol = 1;
1710 }
1711
1712 if (cp->c_flag & C_NEEDS_DATEADDED) {
1713 hfs_write_dateadded (&(cp->c_attr), tv.tv_sec);
1714 cp->c_flag |= C_MODIFIED;
1715 /* untwiddle the bit */
1716 cp->c_flag &= ~C_NEEDS_DATEADDED;
1717 touchvol = 1;
1718 }
1719
1720 /* Touch the volume modtime if needed */
1721 if (touchvol) {
1722 MarkVCBDirty(hfsmp);
1723 HFSTOVCB(hfsmp)->vcbLsMod = tv.tv_sec;
1724 }
1725 }
1726 }
1727
1728 /*
1729 * Lock a cnode.
1730 */
1731 int
1732 hfs_lock(struct cnode *cp, enum hfslocktype locktype)
1733 {
1734 void * thread = current_thread();
1735
1736 if (cp->c_lockowner == thread) {
1737 /*
1738 * Only the extents and bitmap file's support lock recursion.
1739 */
1740 if ((cp->c_fileid == kHFSExtentsFileID) ||
1741 (cp->c_fileid == kHFSAllocationFileID)) {
1742 cp->c_syslockcount++;
1743 } else {
1744 panic("hfs_lock: locking against myself!");
1745 }
1746 } else if (locktype == HFS_SHARED_LOCK) {
1747 lck_rw_lock_shared(&cp->c_rwlock);
1748 cp->c_lockowner = HFS_SHARED_OWNER;
1749
1750 } else /* HFS_EXCLUSIVE_LOCK */ {
1751 lck_rw_lock_exclusive(&cp->c_rwlock);
1752 cp->c_lockowner = thread;
1753
1754 /*
1755 * Only the extents and bitmap file's support lock recursion.
1756 */
1757 if ((cp->c_fileid == kHFSExtentsFileID) ||
1758 (cp->c_fileid == kHFSAllocationFileID)) {
1759 cp->c_syslockcount = 1;
1760 }
1761 }
1762
1763 #ifdef HFS_CHECK_LOCK_ORDER
1764 /*
1765 * Regular cnodes (non-system files) cannot be locked
1766 * while holding the journal lock or a system file lock.
1767 */
1768 if (!(cp->c_desc.cd_flags & CD_ISMETA) &&
1769 ((cp->c_fileid > kHFSFirstUserCatalogNodeID) || (cp->c_fileid == kHFSRootFolderID))) {
1770 vnode_t vp = NULLVP;
1771
1772 /* Find corresponding vnode. */
1773 if (cp->c_vp != NULLVP && VTOC(cp->c_vp) == cp) {
1774 vp = cp->c_vp;
1775 } else if (cp->c_rsrc_vp != NULLVP && VTOC(cp->c_rsrc_vp) == cp) {
1776 vp = cp->c_rsrc_vp;
1777 }
1778 if (vp != NULLVP) {
1779 struct hfsmount *hfsmp = VTOHFS(vp);
1780
1781 if (hfsmp->jnl && (journal_owner(hfsmp->jnl) == thread)) {
1782 /* This will eventually be a panic here. */
1783 printf("hfs_lock: bad lock order (cnode after journal)\n");
1784 }
1785 if (hfsmp->hfs_catalog_cp && hfsmp->hfs_catalog_cp->c_lockowner == thread) {
1786 panic("hfs_lock: bad lock order (cnode after catalog)");
1787 }
1788 if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == thread) {
1789 panic("hfs_lock: bad lock order (cnode after attribute)");
1790 }
1791 if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == thread) {
1792 panic("hfs_lock: bad lock order (cnode after extents)");
1793 }
1794 }
1795 }
1796 #endif /* HFS_CHECK_LOCK_ORDER */
1797
1798 /*
1799 * Skip cnodes that no longer exist (were deleted).
1800 */
1801 if ((locktype != HFS_FORCE_LOCK) &&
1802 ((cp->c_desc.cd_flags & CD_ISMETA) == 0) &&
1803 (cp->c_flag & C_NOEXISTS)) {
1804 hfs_unlock(cp);
1805 return (ENOENT);
1806 }
1807 return (0);
1808 }
1809
1810 /*
1811 * Lock a pair of cnodes.
1812 */
1813 int
1814 hfs_lockpair(struct cnode *cp1, struct cnode *cp2, enum hfslocktype locktype)
1815 {
1816 struct cnode *first, *last;
1817 int error;
1818
1819 /*
1820 * If cnodes match then just lock one.
1821 */
1822 if (cp1 == cp2) {
1823 return hfs_lock(cp1, locktype);
1824 }
1825
1826 /*
1827 * Lock in cnode address order.
1828 */
1829 if (cp1 < cp2) {
1830 first = cp1;
1831 last = cp2;
1832 } else {
1833 first = cp2;
1834 last = cp1;
1835 }
1836
1837 if ( (error = hfs_lock(first, locktype))) {
1838 return (error);
1839 }
1840 if ( (error = hfs_lock(last, locktype))) {
1841 hfs_unlock(first);
1842 return (error);
1843 }
1844 return (0);
1845 }
1846
1847 /*
1848 * Check ordering of two cnodes. Return true if they are are in-order.
1849 */
1850 static int
1851 hfs_isordered(struct cnode *cp1, struct cnode *cp2)
1852 {
1853 if (cp1 == cp2)
1854 return (0);
1855 if (cp1 == NULL || cp2 == (struct cnode *)0xffffffff)
1856 return (1);
1857 if (cp2 == NULL || cp1 == (struct cnode *)0xffffffff)
1858 return (0);
1859 /*
1860 * Locking order is cnode address order.
1861 */
1862 return (cp1 < cp2);
1863 }
1864
1865 /*
1866 * Acquire 4 cnode locks.
1867 * - locked in cnode address order (lesser address first).
1868 * - all or none of the locks are taken
1869 * - only one lock taken per cnode (dup cnodes are skipped)
1870 * - some of the cnode pointers may be null
1871 */
1872 int
1873 hfs_lockfour(struct cnode *cp1, struct cnode *cp2, struct cnode *cp3,
1874 struct cnode *cp4, enum hfslocktype locktype, struct cnode **error_cnode)
1875 {
1876 struct cnode * a[3];
1877 struct cnode * b[3];
1878 struct cnode * list[4];
1879 struct cnode * tmp;
1880 int i, j, k;
1881 int error;
1882 if (error_cnode) {
1883 *error_cnode = NULL;
1884 }
1885
1886 if (hfs_isordered(cp1, cp2)) {
1887 a[0] = cp1; a[1] = cp2;
1888 } else {
1889 a[0] = cp2; a[1] = cp1;
1890 }
1891 if (hfs_isordered(cp3, cp4)) {
1892 b[0] = cp3; b[1] = cp4;
1893 } else {
1894 b[0] = cp4; b[1] = cp3;
1895 }
1896 a[2] = (struct cnode *)0xffffffff; /* sentinel value */
1897 b[2] = (struct cnode *)0xffffffff; /* sentinel value */
1898
1899 /*
1900 * Build the lock list, skipping over duplicates
1901 */
1902 for (i = 0, j = 0, k = 0; (i < 2 || j < 2); ) {
1903 tmp = hfs_isordered(a[i], b[j]) ? a[i++] : b[j++];
1904 if (k == 0 || tmp != list[k-1])
1905 list[k++] = tmp;
1906 }
1907
1908 /*
1909 * Now we can lock using list[0 - k].
1910 * Skip over NULL entries.
1911 */
1912 for (i = 0; i < k; ++i) {
1913 if (list[i])
1914 if ((error = hfs_lock(list[i], locktype))) {
1915 /* Only stuff error_cnode if requested */
1916 if (error_cnode) {
1917 *error_cnode = list[i];
1918 }
1919 /* Drop any locks we acquired. */
1920 while (--i >= 0) {
1921 if (list[i])
1922 hfs_unlock(list[i]);
1923 }
1924 return (error);
1925 }
1926 }
1927 return (0);
1928 }
1929
1930
1931 /*
1932 * Unlock a cnode.
1933 */
1934 void
1935 hfs_unlock(struct cnode *cp)
1936 {
1937 vnode_t rvp = NULLVP;
1938 vnode_t vp = NULLVP;
1939 u_int32_t c_flag;
1940 void *lockowner;
1941
1942 /*
1943 * Only the extents and bitmap file's support lock recursion.
1944 */
1945 if ((cp->c_fileid == kHFSExtentsFileID) ||
1946 (cp->c_fileid == kHFSAllocationFileID)) {
1947 if (--cp->c_syslockcount > 0) {
1948 return;
1949 }
1950 }
1951 c_flag = cp->c_flag;
1952 cp->c_flag &= ~(C_NEED_DVNODE_PUT | C_NEED_RVNODE_PUT | C_NEED_DATA_SETSIZE | C_NEED_RSRC_SETSIZE);
1953
1954 if (c_flag & (C_NEED_DVNODE_PUT | C_NEED_DATA_SETSIZE)) {
1955 vp = cp->c_vp;
1956 }
1957 if (c_flag & (C_NEED_RVNODE_PUT | C_NEED_RSRC_SETSIZE)) {
1958 rvp = cp->c_rsrc_vp;
1959 }
1960
1961 lockowner = cp->c_lockowner;
1962 if (lockowner == current_thread()) {
1963 cp->c_lockowner = NULL;
1964 lck_rw_unlock_exclusive(&cp->c_rwlock);
1965 } else {
1966 lck_rw_unlock_shared(&cp->c_rwlock);
1967 }
1968
1969 /* Perform any vnode post processing after cnode lock is dropped. */
1970 if (vp) {
1971 if (c_flag & C_NEED_DATA_SETSIZE)
1972 ubc_setsize(vp, 0);
1973 if (c_flag & C_NEED_DVNODE_PUT)
1974 vnode_put(vp);
1975 }
1976 if (rvp) {
1977 if (c_flag & C_NEED_RSRC_SETSIZE)
1978 ubc_setsize(rvp, 0);
1979 if (c_flag & C_NEED_RVNODE_PUT)
1980 vnode_put(rvp);
1981 }
1982 }
1983
1984 /*
1985 * Unlock a pair of cnodes.
1986 */
1987 void
1988 hfs_unlockpair(struct cnode *cp1, struct cnode *cp2)
1989 {
1990 hfs_unlock(cp1);
1991 if (cp2 != cp1)
1992 hfs_unlock(cp2);
1993 }
1994
1995 /*
1996 * Unlock a group of cnodes.
1997 */
1998 void
1999 hfs_unlockfour(struct cnode *cp1, struct cnode *cp2, struct cnode *cp3, struct cnode *cp4)
2000 {
2001 struct cnode * list[4];
2002 int i, k = 0;
2003
2004 if (cp1) {
2005 hfs_unlock(cp1);
2006 list[k++] = cp1;
2007 }
2008 if (cp2) {
2009 for (i = 0; i < k; ++i) {
2010 if (list[i] == cp2)
2011 goto skip1;
2012 }
2013 hfs_unlock(cp2);
2014 list[k++] = cp2;
2015 }
2016 skip1:
2017 if (cp3) {
2018 for (i = 0; i < k; ++i) {
2019 if (list[i] == cp3)
2020 goto skip2;
2021 }
2022 hfs_unlock(cp3);
2023 list[k++] = cp3;
2024 }
2025 skip2:
2026 if (cp4) {
2027 for (i = 0; i < k; ++i) {
2028 if (list[i] == cp4)
2029 return;
2030 }
2031 hfs_unlock(cp4);
2032 }
2033 }
2034
2035
2036 /*
2037 * Protect a cnode against a truncation.
2038 *
2039 * Used mainly by read/write since they don't hold the
2040 * cnode lock across calls to the cluster layer.
2041 *
2042 * The process doing a truncation must take the lock
2043 * exclusive. The read/write processes can take it
2044 * shared. The locktype argument is the same as supplied to
2045 * hfs_lock.
2046 */
2047 void
2048 hfs_lock_truncate(struct cnode *cp, enum hfslocktype locktype)
2049 {
2050 void * thread = current_thread();
2051
2052 if (cp->c_truncatelockowner == thread) {
2053 /*
2054 * Only HFS_RECURSE_TRUNCLOCK is allowed to recurse.
2055 *
2056 * This is needed on the hfs_vnop_pagein path where we need to ensure
2057 * the file does not change sizes while we are paging in. However,
2058 * we may already hold the lock exclusive due to another
2059 * VNOP from earlier in the call stack. So if we already hold
2060 * the truncate lock exclusive, allow it to proceed, but ONLY if
2061 * it's in the recursive case.
2062 */
2063 if (locktype != HFS_RECURSE_TRUNCLOCK) {
2064 panic("hfs_lock_truncate: cnode %p locked!", cp);
2065 }
2066 }
2067 /* HFS_RECURSE_TRUNCLOCK takes a shared lock if it is not already locked */
2068 else if ((locktype == HFS_SHARED_LOCK) || (locktype == HFS_RECURSE_TRUNCLOCK)) {
2069 lck_rw_lock_shared(&cp->c_truncatelock);
2070 cp->c_truncatelockowner = HFS_SHARED_OWNER;
2071 }
2072 else { /* must be an HFS_EXCLUSIVE_LOCK */
2073 lck_rw_lock_exclusive(&cp->c_truncatelock);
2074 cp->c_truncatelockowner = thread;
2075 }
2076 }
2077
2078
2079 /*
2080 * Attempt to get the truncate lock. If it cannot be acquired, error out.
2081 * This function is needed in the degenerate hfs_vnop_pagein during force unmount
2082 * case. To prevent deadlocks while a VM copy object is moving pages, HFS vnop pagein will
2083 * temporarily need to disable V2 semantics.
2084 */
2085 int hfs_try_trunclock (struct cnode *cp, enum hfslocktype locktype) {
2086 void * thread = current_thread();
2087 boolean_t didlock = false;
2088
2089 if (cp->c_truncatelockowner == thread) {
2090 /*
2091 * Only HFS_RECURSE_TRUNCLOCK is allowed to recurse.
2092 *
2093 * This is needed on the hfs_vnop_pagein path where we need to ensure
2094 * the file does not change sizes while we are paging in. However,
2095 * we may already hold the lock exclusive due to another
2096 * VNOP from earlier in the call stack. So if we already hold
2097 * the truncate lock exclusive, allow it to proceed, but ONLY if
2098 * it's in the recursive case.
2099 */
2100 if (locktype != HFS_RECURSE_TRUNCLOCK) {
2101 panic("hfs_lock_truncate: cnode %p locked!", cp);
2102 }
2103 }
2104 /* HFS_RECURSE_TRUNCLOCK takes a shared lock if it is not already locked */
2105 else if ((locktype == HFS_SHARED_LOCK) || (locktype == HFS_RECURSE_TRUNCLOCK)) {
2106 didlock = lck_rw_try_lock(&cp->c_truncatelock, LCK_RW_TYPE_SHARED);
2107 if (didlock) {
2108 cp->c_truncatelockowner = HFS_SHARED_OWNER;
2109 }
2110 }
2111 else { /* must be an HFS_EXCLUSIVE_LOCK */
2112 didlock = lck_rw_try_lock (&cp->c_truncatelock, LCK_RW_TYPE_EXCLUSIVE);
2113 if (didlock) {
2114 cp->c_truncatelockowner = thread;
2115 }
2116 }
2117
2118 return didlock;
2119 }
2120
2121
2122 /*
2123 * Unlock the truncate lock, which protects against size changes.
2124 *
2125 * The been_recursed argument is used when we may need to return
2126 * from this function without actually unlocking the truncate lock.
2127 */
2128 void
2129 hfs_unlock_truncate(struct cnode *cp, int been_recursed)
2130 {
2131 void *thread = current_thread();
2132
2133 /*
2134 * If been_recursed is nonzero AND the current lock owner of the
2135 * truncate lock is our current thread, then we must have recursively
2136 * taken the lock earlier on. If the lock were unlocked,
2137 * HFS_RECURSE_TRUNCLOCK took a shared lock and it would fall through
2138 * to the SHARED case below.
2139 *
2140 * If been_recursed is zero (most of the time) then we check the
2141 * lockowner field to infer whether the lock was taken exclusively or
2142 * shared in order to know what underlying lock routine to call.
2143 */
2144 if (been_recursed) {
2145 if (cp->c_truncatelockowner == thread) {
2146 return;
2147 }
2148 }
2149
2150 /* HFS_LOCK_EXCLUSIVE */
2151 if (thread == cp->c_truncatelockowner) {
2152 cp->c_truncatelockowner = NULL;
2153 lck_rw_unlock_exclusive(&cp->c_truncatelock);
2154 }
2155 /* HFS_LOCK_SHARED */
2156 else {
2157 lck_rw_unlock_shared(&cp->c_truncatelock);
2158 }
2159 }