2 * Copyright (c) 2002-2014 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 #include <sys/param.h>
29 #include <sys/systm.h>
31 #include <sys/vnode.h>
32 #include <sys/mount.h>
33 #include <sys/kernel.h>
34 #include <sys/malloc.h>
37 #include <sys/quota.h>
38 #include <sys/kdebug.h>
39 #include <libkern/OSByteOrder.h>
40 #include <sys/buf_internal.h>
42 #include <kern/locks.h>
44 #include <miscfs/specfs/specdev.h>
45 #include <miscfs/fifofs/fifo.h>
48 #include <hfs/hfs_catalog.h>
49 #include <hfs/hfs_cnode.h>
50 #include <hfs/hfs_quota.h>
51 #include <hfs/hfs_format.h>
52 #include <hfs/hfs_kdebug.h>
56 extern lck_attr_t
* hfs_lock_attr
;
57 extern lck_grp_t
* hfs_mutex_group
;
58 extern lck_grp_t
* hfs_rwlock_group
;
60 static void hfs_reclaim_cnode(struct cnode
*);
61 static int hfs_cnode_teardown (struct vnode
*vp
, vfs_context_t ctx
, int reclaim
);
62 static int hfs_isordered(struct cnode
*, struct cnode
*);
64 extern int hfs_removefile_callback(struct buf
*bp
, void *hfsmp
);
67 __inline__
int hfs_checkdeleted (struct cnode
*cp
) {
68 return ((cp
->c_flag
& (C_DELETED
| C_NOEXISTS
)) ? ENOENT
: 0);
72 * Function used by a special fcntl() that decorates a cnode/vnode that
73 * indicates it is backing another filesystem, like a disk image.
75 * the argument 'val' indicates whether or not to set the bit in the cnode flags
77 * Returns non-zero on failure. 0 on success
79 int hfs_set_backingstore (struct vnode
*vp
, int val
) {
80 struct cnode
*cp
= NULL
;
84 if (!vnode_isreg(vp
) && !vnode_isdir(vp
)) {
89 err
= hfs_lock (cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
95 cp
->c_flag
|= C_BACKINGSTORE
;
98 cp
->c_flag
&= ~C_BACKINGSTORE
;
101 /* unlock everything */
108 * Function used by a special fcntl() that check to see if a cnode/vnode
109 * indicates it is backing another filesystem, like a disk image.
111 * the argument 'val' is an output argument for whether or not the bit is set
113 * Returns non-zero on failure. 0 on success
116 int hfs_is_backingstore (struct vnode
*vp
, int *val
) {
117 struct cnode
*cp
= NULL
;
120 if (!vnode_isreg(vp
) && !vnode_isdir(vp
)) {
128 err
= hfs_lock (cp
, HFS_SHARED_LOCK
, HFS_LOCK_DEFAULT
);
133 if (cp
->c_flag
& C_BACKINGSTORE
) {
140 /* unlock everything */
150 * This is an internal function that is invoked from both hfs_vnop_inactive
151 * and hfs_vnop_reclaim. As VNOP_INACTIVE is not necessarily called from vnodes
152 * being recycled and reclaimed, it is important that we do any post-processing
153 * necessary for the cnode in both places. Important tasks include things such as
154 * releasing the blocks from an open-unlinked file when all references to it have dropped,
155 * and handling resource forks separately from data forks.
157 * Note that we take only the vnode as an argument here (rather than the cnode).
158 * Recall that each cnode supports two forks (rsrc/data), and we can always get the right
159 * cnode from either of the vnodes, but the reverse is not true -- we can't determine which
160 * vnode we need to reclaim if only the cnode is supplied.
162 * This function is idempotent and safe to call from both hfs_vnop_inactive and hfs_vnop_reclaim
163 * if both are invoked right after the other. In the second call, most of this function's if()
164 * conditions will fail, since they apply generally to cnodes still marked with C_DELETED.
165 * As a quick check to see if this function is necessary, determine if the cnode is already
166 * marked C_NOEXISTS. If it is, then it is safe to skip this function. The only tasks that
167 * remain for cnodes marked in such a fashion is to teardown their fork references and
168 * release all directory hints and hardlink origins. However, both of those are done
169 * in hfs_vnop_reclaim. hfs_update, by definition, is not necessary if the cnode's catalog
170 * entry is no longer there.
172 * 'reclaim' argument specifies whether or not we were called from hfs_vnop_reclaim. If we are
173 * invoked from hfs_vnop_reclaim, we can not call functions that cluster_push since the UBC info
174 * is totally gone by that point.
176 * Assumes that both truncate and cnode locks for 'cp' are held.
179 int hfs_cnode_teardown (struct vnode
*vp
, vfs_context_t ctx
, int reclaim
)
186 struct hfsmount
*hfsmp
= VTOHFS(vp
);
187 struct proc
*p
= vfs_context_proc(ctx
);
194 v_type
= vnode_vtype(vp
);
197 if (cp
->c_datafork
) {
200 if (cp
->c_rsrcfork
) {
206 * Skip the call to ubc_setsize if we're being invoked on behalf of reclaim.
207 * The dirty regions would have already been synced to disk, so informing UBC
208 * that they can toss the pages doesn't help anyone at this point.
210 * Note that this is a performance problem if the vnode goes straight to reclaim
211 * (and skips inactive), since there would be no way for anyone to notify the UBC
212 * that all pages in this file are basically useless.
216 * Check whether we are tearing down a cnode with only one remaining fork.
217 * If there are blocks in its filefork, then we need to unlock the cnode
218 * before calling ubc_setsize. The cluster layer may re-enter the filesystem
219 * (i.e. VNOP_BLOCKMAP), and if we retain the cnode lock, we could double-lock
223 if ((v_type
== VREG
|| v_type
== VLNK
) &&
224 (cp
->c_flag
& C_DELETED
) &&
225 (VTOF(vp
)->ff_blocks
!= 0) && (forkcount
== 1)) {
227 /* ubc_setsize just fails if we were to call this from VNOP_RECLAIM */
229 (void) hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
234 * Push file data out for normal files that haven't been evicted from
235 * the namespace. We only do this if this function was not called from reclaim,
236 * because by that point the UBC information has been totally torn down.
238 * There should also be no way that a normal file that has NOT been deleted from
239 * the namespace to skip INACTIVE and go straight to RECLAIM. That race only happens
240 * when the file becomes open-unlinked.
242 if ((v_type
== VREG
) &&
243 (!ISSET(cp
->c_flag
, C_DELETED
)) &&
244 (!ISSET(cp
->c_flag
, C_NOEXISTS
)) &&
245 (VTOF(vp
)->ff_blocks
) &&
248 * Note that if content protection is enabled, then this is where we will
249 * attempt to issue IOs for all dirty regions of this file.
251 * If we're called from hfs_vnop_inactive, all this means is at the time
252 * the logic for deciding to call this function, there were not any lingering
253 * mmap/fd references for this file. However, there is nothing preventing the system
254 * from creating a new reference in between the time that logic was checked
255 * and we entered hfs_vnop_inactive. As a result, the only time we can guarantee
256 * that there aren't any references is during vnop_reclaim.
258 hfs_filedone(vp
, ctx
, 0);
262 * We're holding the cnode lock now. Stall behind any shadow BPs that may
263 * be involved with this vnode if it is a symlink. We don't want to allow
264 * the blocks that we're about to release to be put back into the pool if there
265 * is pending I/O to them.
267 if (v_type
== VLNK
) {
269 * This will block if the asynchronous journal flush is in progress.
270 * If this symlink is not being renamed over and doesn't have any open FDs,
271 * then we'll remove it from the journal's bufs below in kill_block.
273 buf_wait_for_shadow_io (vp
, 0);
277 * Remove any directory hints or cached origins
279 if (v_type
== VDIR
) {
280 hfs_reldirhints(cp
, 0);
282 if (cp
->c_flag
& C_HARDLINK
) {
287 * This check is slightly complicated. We should only truncate data
288 * in very specific cases for open-unlinked files. This is because
289 * we want to ensure that the resource fork continues to be available
290 * if the caller has the data fork open. However, this is not symmetric;
291 * someone who has the resource fork open need not be able to access the data
292 * fork once the data fork has gone inactive.
294 * If we're the last fork, then we have cleaning up to do.
296 * A) last fork, and vp == c_vp
297 * Truncate away own fork data. If rsrc fork is not in core, truncate it too.
299 * B) last fork, and vp == c_rsrc_vp
300 * Truncate ourselves, assume data fork has been cleaned due to C).
302 * If we're not the last fork, then things are a little different:
304 * C) not the last fork, vp == c_vp
305 * Truncate ourselves. Once the file has gone out of the namespace,
306 * it cannot be further opened. Further access to the rsrc fork may
309 * D) not the last fork, vp == c_rsrc_vp
310 * Don't enter the block below, just clean up vnode and push it out of core.
313 if ((v_type
== VREG
|| v_type
== VLNK
) &&
314 (cp
->c_flag
& C_DELETED
) &&
315 ((forkcount
== 1) || (!VNODE_IS_RSRC(vp
)))) {
317 /* Truncate away our own fork data. (Case A, B, C above) */
318 if (VTOF(vp
)->ff_blocks
!= 0) {
323 * Encapsulate the entire change (including truncating the link) in
324 * nested transactions if we are modifying a symlink, because we know that its
325 * file length will be at most 4k, and we can fit both the truncation and
326 * any relevant bitmap changes into a single journal transaction. We also want
327 * the kill_block code to execute in the same transaction so that any dirty symlink
328 * blocks will not be written. Otherwise, rely on
329 * hfs_truncate doing its own transactions to ensure that we don't blow up
332 if ((started_tr
== 0) && (v_type
== VLNK
)) {
333 if (hfs_start_transaction(hfsmp
) != 0) {
343 * At this point, we have decided that this cnode is
344 * suitable for full removal. We are about to deallocate
345 * its blocks and remove its entry from the catalog.
346 * If it was a symlink, then it's possible that the operation
347 * which created it is still in the current transaction group
348 * due to coalescing. Take action here to kill the data blocks
349 * of the symlink out of the journal before moving to
350 * deallocate the blocks. We need to be in the middle of
351 * a transaction before calling buf_iterate like this.
353 * Note: we have to kill any potential symlink buffers out of
354 * the journal prior to deallocating their blocks. This is so
355 * that we don't race with another thread that may be doing an
356 * an allocation concurrently and pick up these blocks. It could
357 * generate I/O against them which could go out ahead of our journal
361 if (hfsmp
->jnl
&& vnode_islnk(vp
)) {
362 buf_iterate(vp
, hfs_removefile_callback
, BUF_SKIP_NONLOCKED
, (void *)hfsmp
);
367 * This truncate call (and the one below) is fine from VNOP_RECLAIM's
368 * context because we're only removing blocks, not zero-filling new
369 * ones. The C_DELETED check above makes things much simpler.
371 error
= hfs_truncate(vp
, (off_t
)0, IO_NDELAY
, 0, ctx
);
377 /* (SYMLINKS ONLY): Close/End our transaction after truncating the file record */
379 hfs_end_transaction(hfsmp
);
386 * Truncate away the resource fork, if we represent the data fork and
387 * it is the last fork. That means, by definition, the rsrc fork is not in
388 * core. To avoid bringing a vnode into core for the sole purpose of deleting the
389 * data in the resource fork, we call cat_lookup directly, then hfs_release_storage
390 * to get rid of the resource fork's data. Note that because we are holding the
391 * cnode lock, it is impossible for a competing thread to create the resource fork
392 * vnode from underneath us while we do this.
394 * This is invoked via case A above only.
396 if ((cp
->c_blocks
> 0) && (forkcount
== 1) && (vp
!= cp
->c_rsrc_vp
)) {
397 struct cat_lookup_buffer
*lookup_rsrc
= NULL
;
398 struct cat_desc
*desc_ptr
= NULL
;
401 MALLOC(lookup_rsrc
, struct cat_lookup_buffer
*, sizeof (struct cat_lookup_buffer
), M_TEMP
, M_WAITOK
);
402 if (lookup_rsrc
== NULL
) {
403 printf("hfs_cnode_teardown: ENOMEM from MALLOC\n");
408 bzero (lookup_rsrc
, sizeof (struct cat_lookup_buffer
));
411 if (cp
->c_desc
.cd_namelen
== 0) {
412 /* Initialize the rsrc descriptor for lookup if necessary*/
413 MAKE_DELETED_NAME (lookup_rsrc
->lookup_name
, HFS_TEMPLOOKUP_NAMELEN
, cp
->c_fileid
);
415 lookup_rsrc
->lookup_desc
.cd_nameptr
= (const uint8_t*) lookup_rsrc
->lookup_name
;
416 lookup_rsrc
->lookup_desc
.cd_namelen
= strlen (lookup_rsrc
->lookup_name
);
417 lookup_rsrc
->lookup_desc
.cd_parentcnid
= hfsmp
->hfs_private_desc
[FILE_HARDLINKS
].cd_cnid
;
418 lookup_rsrc
->lookup_desc
.cd_cnid
= cp
->c_cnid
;
420 desc_ptr
= &lookup_rsrc
->lookup_desc
;
423 desc_ptr
= &cp
->c_desc
;
426 lockflags
= hfs_systemfile_lock (hfsmp
, SFL_CATALOG
, HFS_SHARED_LOCK
);
428 error
= cat_lookup (hfsmp
, desc_ptr
, 1, 0, (struct cat_desc
*) NULL
,
429 (struct cat_attr
*) NULL
, &lookup_rsrc
->lookup_fork
.ff_data
, NULL
);
431 hfs_systemfile_unlock (hfsmp
, lockflags
);
434 FREE (lookup_rsrc
, M_TEMP
);
439 * Make the filefork in our temporary struct look like a real
440 * filefork. Fill in the cp, sysfileinfo and rangelist fields..
442 rl_init (&lookup_rsrc
->lookup_fork
.ff_invalidranges
);
443 lookup_rsrc
->lookup_fork
.ff_cp
= cp
;
446 * If there were no errors, then we have the catalog's fork information
447 * for the resource fork in question. Go ahead and delete the data in it now.
450 error
= hfs_release_storage (hfsmp
, NULL
, &lookup_rsrc
->lookup_fork
, cp
->c_fileid
);
451 FREE(lookup_rsrc
, M_TEMP
);
458 * This fileid's resource fork extents have now been fully deleted on-disk
459 * and this CNID is no longer valid. At this point, we should be able to
460 * zero out cp->c_blocks to indicate there is no data left in this file.
467 * If we represent the last fork (or none in the case of a dir),
468 * and the cnode has become open-unlinked,
469 * AND it has EA's, then we need to get rid of them.
471 * Note that this must happen outside of any other transactions
472 * because it starts/ends its own transactions and grabs its
473 * own locks. This is to prevent a file with a lot of attributes
474 * from creating a transaction that is too large (which panics).
476 if ((cp
->c_attr
.ca_recflags
& kHFSHasAttributesMask
) != 0 &&
477 (cp
->c_flag
& C_DELETED
) &&
480 ea_error
= hfs_removeallattr(hfsmp
, cp
->c_fileid
);
485 * If the cnode represented an open-unlinked file, then now
486 * actually remove the cnode's catalog entry and release all blocks
487 * it may have been using.
489 if ((cp
->c_flag
& C_DELETED
) && (forkcount
<= 1)) {
491 * Mark cnode in transit so that no one can get this
492 * cnode from cnode hash.
494 // hfs_chash_mark_in_transit(hfsmp, cp);
495 // XXXdbg - remove the cnode from the hash table since it's deleted
496 // otherwise someone could go to sleep on the cnode and not
497 // be woken up until this vnode gets recycled which could be
498 // a very long time...
499 hfs_chashremove(hfsmp
, cp
);
501 cp
->c_flag
|= C_NOEXISTS
; // XXXdbg
504 if (started_tr
== 0) {
505 if (hfs_start_transaction(hfsmp
) != 0) {
513 * Reserve some space in the Catalog file.
515 if ((error
= cat_preflight(hfsmp
, CAT_DELETE
, &cookie
, p
))) {
520 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_CATALOG
| SFL_ATTRIBUTE
, HFS_EXCLUSIVE_LOCK
);
522 if (cp
->c_blocks
> 0) {
523 printf("hfs_inactive: deleting non-empty%sfile %d, "
524 "blks %d\n", VNODE_IS_RSRC(vp
) ? " rsrc " : " ",
525 (int)cp
->c_fileid
, (int)cp
->c_blocks
);
529 // release the name pointer in the descriptor so that
530 // cat_delete() will use the file-id to do the deletion.
531 // in the case of hard links this is imperative (in the
532 // case of regular files the fileid and cnid are the
533 // same so it doesn't matter).
535 cat_releasedesc(&cp
->c_desc
);
538 * The descriptor name may be zero,
539 * in which case the fileid is used.
541 error
= cat_delete(hfsmp
, &cp
->c_desc
, &cp
->c_attr
);
543 if (error
&& truncated
&& (error
!= ENXIO
)) {
544 printf("hfs_inactive: couldn't delete a truncated file!");
547 /* Update HFS Private Data dir */
549 hfsmp
->hfs_private_attr
[FILE_HARDLINKS
].ca_entries
--;
550 if (vnode_isdir(vp
)) {
551 DEC_FOLDERCOUNT(hfsmp
, hfsmp
->hfs_private_attr
[FILE_HARDLINKS
]);
553 (void)cat_update(hfsmp
, &hfsmp
->hfs_private_desc
[FILE_HARDLINKS
],
554 &hfsmp
->hfs_private_attr
[FILE_HARDLINKS
], NULL
, NULL
);
557 hfs_systemfile_unlock(hfsmp
, lockflags
);
564 if (hfsmp
->hfs_flags
& HFS_QUOTAS
)
565 (void)hfs_chkiq(cp
, -1, NOCRED
, 0);
568 /* Already set C_NOEXISTS at the beginning of this block */
569 cp
->c_flag
&= ~C_DELETED
;
570 cp
->c_touch_chgtime
= TRUE
;
571 cp
->c_touch_modtime
= TRUE
;
574 hfs_volupdate(hfsmp
, (v_type
== VDIR
) ? VOL_RMDIR
: VOL_RMFILE
, 0);
578 * A file may have had delayed allocations, in which case hfs_update
579 * would not have updated the catalog record (cat_update). We need
580 * to do that now, before we lose our fork data. We also need to
581 * force the update, or hfs_update will again skip the cat_update.
583 * If the file has C_NOEXISTS set, then we can skip the hfs_update call
584 * because the catalog entry has already been removed. There would be no point
585 * to looking up the entry in the catalog to modify it when we already know it's gone
587 if ((!ISSET(cp
->c_flag
, C_NOEXISTS
)) &&
588 ((cp
->c_flag
& C_MODIFIED
) || cp
->c_touch_acctime
||
589 cp
->c_touch_chgtime
|| cp
->c_touch_modtime
)) {
591 if ((cp
->c_flag
& C_MODIFIED
) || cp
->c_touch_modtime
){
592 cp
->c_flag
|= C_FORCEUPDATE
;
598 * Since we are about to finish what might be an inactive call, propagate
599 * any remaining modified or touch bits from the cnode to the vnode. This
600 * serves as a hint to vnode recycling that we shouldn't recycle this vnode
603 if (ISSET(cp
->c_flag
, C_MODIFIED
) || ISSET(cp
->c_flag
, C_FORCEUPDATE
) ||
604 cp
->c_touch_acctime
|| cp
->c_touch_chgtime
||
605 cp
->c_touch_modtime
|| ISSET(cp
->c_flag
, C_NEEDS_DATEADDED
) ||
606 ISSET(cp
->c_flag
, C_DELETED
)) {
609 vnode_cleardirty(vp
);
614 cat_postflight(hfsmp
, &cookie
, p
);
616 // XXXdbg - have to do this because a goto could have come here
618 hfs_end_transaction(hfsmp
);
625 * cnode truncate lock and cnode lock are both held exclusive here.
627 * Go ahead and flush the keys out if this cnode is the last fork
628 * and it is not class F. Class F keys should not be purged because they only
629 * exist in memory and have no persistent keys. Only do this
630 * if we haven't already done it yet (maybe a vnode skipped inactive
631 * and went straight to reclaim). This function gets called from both reclaim and
632 * inactive, so it will happen first in inactive if possible.
634 * We need to be mindful that all pending IO for this file has already been
635 * issued and completed before we bzero out the key. This is because
636 * if it isn't, tossing the key here could result in garbage IO being
637 * written (by using the bzero'd key) if the writes are happening asynchronously.
639 * In addition, class A files may have already been purged due to the
640 * lock event occurring.
642 if (forkcount
== 1) {
643 struct cprotect
*entry
= cp
->c_cpentry
;
644 if ((entry
) && ( CP_CLASS(entry
->cp_pclass
) != PROTECTION_CLASS_F
)) {
645 if ((cp
->c_cpentry
->cp_flags
& CP_KEY_FLUSHED
) == 0) {
646 cp
->c_cpentry
->cp_flags
|= CP_KEY_FLUSHED
;
647 bzero (cp
->c_cpentry
->cp_cache_key
, cp
->c_cpentry
->cp_cache_key_len
);
648 bzero (cp
->c_cpentry
->cp_cache_iv_ctx
, sizeof(aes_encrypt_ctx
));
662 * The last usecount on the vnode has gone away, so we need to tear down
663 * any remaining data still residing in the cnode. If necessary, write out
664 * remaining blocks or delete the cnode's entry in the catalog.
667 hfs_vnop_inactive(struct vnop_inactive_args
*ap
)
669 struct vnode
*vp
= ap
->a_vp
;
671 struct hfsmount
*hfsmp
= VTOHFS(vp
);
672 struct proc
*p
= vfs_context_proc(ap
->a_context
);
674 int took_trunc_lock
= 0;
677 v_type
= vnode_vtype(vp
);
680 if ((hfsmp
->hfs_flags
& HFS_READ_ONLY
) || vnode_issystem(vp
) ||
681 (hfsmp
->hfs_freezing_proc
== p
)) {
687 * For safety, do NOT call vnode_recycle from inside this function. This can cause
688 * problems in the following scenario:
690 * vnode_create -> vnode_reclaim_internal -> vclean -> VNOP_INACTIVE
692 * If we're being invoked as a result of a reclaim that was already in-flight, then we
693 * cannot call vnode_recycle again. Being in reclaim means that there are no usecounts or
694 * iocounts by definition. As a result, if we were to call vnode_recycle, it would immediately
695 * try to re-enter reclaim again and panic.
697 * Currently, there are three things that can cause us (VNOP_INACTIVE) to get called.
698 * 1) last usecount goes away on the vnode (vnode_rele)
699 * 2) last iocount goes away on a vnode that previously had usecounts but didn't have
700 * vnode_recycle called (vnode_put)
701 * 3) vclean by way of reclaim
703 * In this function we would generally want to call vnode_recycle to speed things
704 * along to ensure that we don't leak blocks due to open-unlinked files. However, by
705 * virtue of being in this function already, we can call hfs_cnode_teardown, which
706 * will release blocks held by open-unlinked files, and mark them C_NOEXISTS so that
707 * there's no entry in the catalog and no backing store anymore. If that's the case,
708 * then we really don't care all that much when the vnode actually goes through reclaim.
709 * Further, the HFS VNOPs that manipulated the namespace in order to create the open-
710 * unlinked file in the first place should have already called vnode_recycle on the vnode
711 * to guarantee that it would go through reclaim in a speedy way.
714 if (cp
->c_flag
& C_NOEXISTS
) {
716 * If the cnode has already had its cat entry removed, then
717 * just skip to the end. We don't need to do anything here.
723 if ((v_type
== VREG
|| v_type
== VLNK
)) {
724 hfs_lock_truncate(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
728 (void) hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
731 * Call cnode_teardown to push out dirty blocks to disk, release open-unlinked
732 * files' blocks from being in use, and move the cnode from C_DELETED to C_NOEXISTS.
734 error
= hfs_cnode_teardown (vp
, ap
->a_context
, 0);
737 * Drop the truncate lock before unlocking the cnode
738 * (which can potentially perform a vnode_put and
739 * recycle the vnode which in turn might require the
742 if (took_trunc_lock
) {
743 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
755 * File clean-up (zero fill and shrink peof).
759 hfs_filedone(struct vnode
*vp
, vfs_context_t context
,
760 hfs_file_done_opts_t opts
)
764 struct hfsmount
*hfsmp
;
765 struct rl_entry
*invalid_range
;
767 u_int32_t blks
, blocksize
;
768 /* flags for zero-filling sparse ranges */
769 int cluster_flags
= IO_CLOSE
;
770 int cluster_zero_flags
= IO_HEADZEROFILL
| IO_NOZERODIRTY
| IO_NOCACHE
;
777 if ((hfsmp
->hfs_flags
& HFS_READ_ONLY
) || (fp
->ff_blocks
== 0))
780 if (!ISSET(opts
, HFS_FILE_DONE_NO_SYNC
)) {
783 * Figure out if we need to do synchronous IO.
785 * If the file represents a content-protected file, we may need
786 * to issue synchronous IO when we dispatch to the cluster layer.
787 * If we didn't, then the IO would go out to the disk asynchronously.
788 * If the vnode hits the end of inactive before getting reclaimed, the
789 * content protection keys would be wiped/bzeroed out, and we'd end up
790 * trying to issue the IO with an invalid key. This will lead to file
791 * corruption. IO_SYNC will force the cluster_push to wait until all IOs
792 * have completed (though they may be in the track cache).
794 if (cp_fs_protected(VTOVFS(vp
))) {
795 cluster_flags
|= IO_SYNC
;
796 cluster_zero_flags
|= IO_SYNC
;
801 (void) cluster_push(vp
, cluster_flags
);
802 hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
806 * Explicitly zero out the areas of file
807 * that are currently marked invalid.
809 while ((invalid_range
= TAILQ_FIRST(&fp
->ff_invalidranges
))) {
810 off_t start
= invalid_range
->rl_start
;
811 off_t end
= invalid_range
->rl_end
;
813 /* The range about to be written must be validated
814 * first, so that VNOP_BLOCKMAP() will return the
815 * appropriate mapping for the cluster code:
817 rl_remove(start
, end
, &fp
->ff_invalidranges
);
820 (void) cluster_write(vp
, (struct uio
*) 0,
821 leof
, end
+ 1, start
, (off_t
)0, cluster_zero_flags
);
822 hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
823 cp
->c_flag
|= C_MODIFIED
;
825 cp
->c_flag
&= ~C_ZFWANTSYNC
;
827 blocksize
= VTOVCB(vp
)->blockSize
;
828 blks
= leof
/ blocksize
;
829 if (((off_t
)blks
* (off_t
)blocksize
) != leof
)
832 * Shrink the peof to the smallest size neccessary to contain the leof.
834 if (blks
< fp
->ff_blocks
) {
835 (void) hfs_truncate(vp
, leof
, IO_NDELAY
, HFS_TRUNCATE_SKIPTIMES
, context
);
838 if (!ISSET(opts
, HFS_FILE_DONE_NO_SYNC
)) {
840 (void) cluster_push(vp
, cluster_flags
);
841 hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
844 * If the hfs_truncate didn't happen to flush the vnode's
845 * information out to disk, force it to be updated now that
846 * all invalid ranges have been zero-filled and validated:
848 if (cp
->c_flag
& C_MODIFIED
) {
858 * Reclaim a cnode so that it can be used for other purposes.
861 hfs_vnop_reclaim(struct vnop_reclaim_args
*ap
)
863 struct vnode
*vp
= ap
->a_vp
;
865 struct filefork
*fp
= NULL
;
866 struct filefork
*altfp
= NULL
;
867 struct hfsmount
*hfsmp
= VTOHFS(vp
);
868 vfs_context_t ctx
= ap
->a_context
;
869 int reclaim_cnode
= 0;
873 v_type
= vnode_vtype(vp
);
877 * We don't take the truncate lock since by the time reclaim comes along,
878 * all dirty pages have been synced and nobody should be competing
879 * with us for this thread.
881 (void) hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
884 * Sync to disk any remaining data in the cnode/vnode. This includes
885 * a call to hfs_update if the cnode has outbound data.
887 * If C_NOEXISTS is set on the cnode, then there's nothing teardown needs to do
888 * because the catalog entry for this cnode is already gone.
890 if (!ISSET(cp
->c_flag
, C_NOEXISTS
)) {
891 err
= hfs_cnode_teardown(vp
, ctx
, 1);
895 * Keep track of an inactive hot file.
897 if (!vnode_isdir(vp
) &&
898 !vnode_issystem(vp
) &&
899 !(cp
->c_flag
& (C_DELETED
| C_NOEXISTS
)) ) {
900 (void) hfs_addhotfile(vp
);
902 vnode_removefsref(vp
);
905 * Find file fork for this vnode (if any)
906 * Also check if another fork is active
908 if (cp
->c_vp
== vp
) {
910 altfp
= cp
->c_rsrcfork
;
912 cp
->c_datafork
= NULL
;
914 } else if (cp
->c_rsrc_vp
== vp
) {
916 altfp
= cp
->c_datafork
;
918 cp
->c_rsrcfork
= NULL
;
919 cp
->c_rsrc_vp
= NULL
;
921 panic("hfs_vnop_reclaim: vp points to wrong cnode (vp=%p cp->c_vp=%p cp->c_rsrc_vp=%p)\n", vp
, cp
->c_vp
, cp
->c_rsrc_vp
);
924 * On the last fork, remove the cnode from its hash chain.
927 /* If we can't remove it then the cnode must persist! */
928 if (hfs_chashremove(hfsmp
, cp
) == 0)
931 * Remove any directory hints
933 if (vnode_isdir(vp
)) {
934 hfs_reldirhints(cp
, 0);
937 if(cp
->c_flag
& C_HARDLINK
) {
941 /* Release the file fork and related data */
943 /* Dump cached symlink data */
944 if (vnode_islnk(vp
) && (fp
->ff_symlinkptr
!= NULL
)) {
945 FREE(fp
->ff_symlinkptr
, M_TEMP
);
947 FREE_ZONE(fp
, sizeof(struct filefork
), M_HFSFORK
);
951 * If there was only one active fork then we can release the cnode.
954 hfs_chashwakeup(hfsmp
, cp
, H_ALLOC
| H_TRANSIT
);
956 hfs_reclaim_cnode(cp
);
960 * cnode in use. If it is a directory, it could have
961 * no live forks. Just release the lock.
966 vnode_clearfsnode(vp
);
971 extern int (**hfs_vnodeop_p
) (void *);
972 extern int (**hfs_specop_p
) (void *);
974 extern int (**hfs_fifoop_p
) (void *);
978 extern int (**hfs_std_vnodeop_p
) (void *);
982 * hfs_getnewvnode - get new default vnode
984 * The vnode is returned with an iocount and the cnode locked
988 struct hfsmount
*hfsmp
,
990 struct componentname
*cnp
,
991 struct cat_desc
*descp
,
993 struct cat_attr
*attrp
,
994 struct cat_fork
*forkp
,
998 struct mount
*mp
= HFSTOVFS(hfsmp
);
999 struct vnode
*vp
= NULL
;
1000 struct vnode
**cvpp
;
1001 struct vnode
*tvp
= NULLVP
;
1002 struct cnode
*cp
= NULL
;
1003 struct filefork
*fp
= NULL
;
1004 int hfs_standard
= 0;
1009 struct vnode_fsparam vfsp
;
1015 hfs_standard
= (hfsmp
->hfs_flags
& HFS_STANDARD
);
1017 if (attrp
->ca_fileid
== 0) {
1023 if (IFTOVT(attrp
->ca_mode
) == VFIFO
) {
1028 vtype
= IFTOVT(attrp
->ca_mode
);
1029 issystemfile
= (descp
->cd_flags
& CD_ISMETA
) && (vtype
== VREG
);
1030 wantrsrc
= flags
& GNV_WANTRSRC
;
1032 /* Sanity check the vtype and mode */
1033 if (vtype
== VBAD
) {
1034 /* Mark the FS as corrupt and bail out */
1035 hfs_mark_inconsistent(hfsmp
, HFS_INCONSISTENCY_DETECTED
);
1039 /* Zero out the out_flags */
1042 #ifdef HFS_CHECK_LOCK_ORDER
1044 * The only case were its permissible to hold the parent cnode
1045 * lock is during a create operation (hfs_makenode) or when
1046 * we don't need the cnode lock (GNV_SKIPLOCK).
1048 if ((dvp
!= NULL
) &&
1049 (flags
& (GNV_CREATE
| GNV_SKIPLOCK
)) == 0 &&
1050 VTOC(dvp
)->c_lockowner
== current_thread()) {
1051 panic("hfs_getnewvnode: unexpected hold of parent cnode %p", VTOC(dvp
));
1053 #endif /* HFS_CHECK_LOCK_ORDER */
1056 * Get a cnode (new or existing)
1058 cp
= hfs_chash_getcnode(hfsmp
, attrp
->ca_fileid
, vpp
, wantrsrc
,
1059 (flags
& GNV_SKIPLOCK
), out_flags
, &hflags
);
1062 * If the id is no longer valid for lookups we'll get back a NULL cp.
1069 * If we get a cnode/vnode pair out of hfs_chash_getcnode, then update the
1070 * descriptor in the cnode as needed if the cnode represents a hardlink.
1071 * We want the caller to get the most up-to-date copy of the descriptor
1072 * as possible. However, we only do anything here if there was a valid vnode.
1073 * If there isn't a vnode, then the cnode is brand new and needs to be initialized
1074 * as it doesn't have a descriptor or cat_attr yet.
1076 * If we are about to replace the descriptor with the user-supplied one, then validate
1077 * that the descriptor correctly acknowledges this item is a hardlink. We could be
1078 * subject to a race where the calling thread invoked cat_lookup, got a valid lookup
1079 * result but the file was not yet a hardlink. With sufficient delay between there
1080 * and here, we might accidentally copy in the raw inode ID into the descriptor in the
1081 * call below. If the descriptor's CNID is the same as the fileID then it must
1082 * not yet have been a hardlink when the lookup occurred.
1085 if (!(hfs_checkdeleted(cp
))) {
1086 if ((cp
->c_flag
& C_HARDLINK
) && descp
->cd_nameptr
&& descp
->cd_namelen
> 0) {
1087 /* If cnode is uninitialized, its c_attr will be zeroed out; cnids wont match. */
1088 if ((descp
->cd_cnid
== cp
->c_attr
.ca_fileid
) &&
1089 (attrp
->ca_linkcount
!= cp
->c_attr
.ca_linkcount
)){
1090 if ((flags
& GNV_SKIPLOCK
) == 0) {
1092 * Then we took the lock. Drop it before calling
1093 * vnode_put, which may invoke hfs_vnop_inactive and need to take
1094 * the cnode lock again.
1100 * Emit ERECYCLE and GNV_CAT_ATTRCHANGED to
1101 * force a re-drive in the lookup routine.
1102 * Drop the iocount on the vnode obtained from
1103 * chash_getcnode if needed.
1111 * If we raced with VNOP_RECLAIM for this vnode, the hash code could
1112 * have observed it after the c_vp or c_rsrc_vp fields had been torn down;
1113 * the hash code peeks at those fields without holding the cnode lock because
1114 * it needs to be fast. As a result, we may have set H_ATTACH in the chash
1115 * call above. Since we're bailing out, unset whatever flags we just set, and
1116 * wake up all waiters for this cnode.
1119 hfs_chashwakeup(hfsmp
, cp
, hflags
);
1122 *out_flags
= GNV_CAT_ATTRCHANGED
;
1127 * Otherwise, CNID != fileid. Go ahead and copy in the new descriptor.
1129 * Replacing the descriptor here is fine because we looked up the item without
1130 * a vnode in hand before. If a vnode existed, its identity must be attached to this
1131 * item. We are not susceptible to the lookup fastpath issue at this point.
1133 replace_desc(cp
, descp
);
1136 * This item was a hardlink, and its name needed to be updated. By replacing the
1137 * descriptor above, we've now updated the cnode's internal representation of
1138 * its link ID/CNID, parent ID, and its name. However, VFS must now be alerted
1139 * to the fact that this vnode now has a new parent, since we cannot guarantee
1140 * that the new link lived in the same directory as the alternative name for
1143 if ((*vpp
!= NULL
) && (cnp
)) {
1144 /* we could be requesting the rsrc of a hardlink file... */
1145 vnode_update_identity (*vpp
, dvp
, cnp
->cn_nameptr
, cnp
->cn_namelen
, cnp
->cn_hash
,
1146 (VNODE_UPDATE_PARENT
| VNODE_UPDATE_NAME
));
1152 /* Check if we found a matching vnode */
1158 * If this is a new cnode then initialize it.
1160 if (ISSET(cp
->c_hflag
, H_ALLOC
)) {
1161 lck_rw_init(&cp
->c_truncatelock
, hfs_rwlock_group
, hfs_lock_attr
);
1166 /* Make sure its still valid (ie exists on disk). */
1167 if (!(flags
& GNV_CREATE
)) {
1169 if (!hfs_valid_cnode (hfsmp
, dvp
, (wantrsrc
? NULL
: cnp
), cp
->c_fileid
, attrp
, &error
)) {
1170 hfs_chash_abort(hfsmp
, cp
);
1171 if ((flags
& GNV_SKIPLOCK
) == 0) {
1174 hfs_reclaim_cnode(cp
);
1177 * If we hit this case, that means that the entry was there in the catalog when
1178 * we did a cat_lookup earlier. Think hfs_lookup. However, in between the time
1179 * that we checked the catalog and the time we went to get a vnode/cnode for it,
1180 * it had been removed from the namespace and the vnode totally reclaimed. As a result,
1181 * it's not there in the catalog during the check in hfs_valid_cnode and we bubble out
1182 * an ENOENT. To indicate to the caller that they should really double-check the
1183 * entry (it could have been renamed over and gotten a new fileid), we mark a bit
1184 * in the output flags.
1186 if (error
== ENOENT
) {
1187 *out_flags
= GNV_CAT_DELETED
;
1192 * Also, we need to protect the cat_attr acquired during hfs_lookup and passed into
1193 * this function as an argument because the catalog may have changed w.r.t hardlink
1194 * link counts and the firstlink field. If that validation check fails, then let
1195 * lookup re-drive itself to get valid/consistent data with the same failure condition below.
1197 if (error
== ERECYCLE
) {
1198 *out_flags
= GNV_CAT_ATTRCHANGED
;
1203 bcopy(attrp
, &cp
->c_attr
, sizeof(struct cat_attr
));
1204 bcopy(descp
, &cp
->c_desc
, sizeof(struct cat_desc
));
1206 /* The name was inherited so clear descriptor state... */
1207 descp
->cd_namelen
= 0;
1208 descp
->cd_nameptr
= NULL
;
1209 descp
->cd_flags
&= ~CD_HASBUF
;
1212 if ((vtype
== VREG
|| vtype
== VDIR
) &&
1213 ((descp
->cd_cnid
!= attrp
->ca_fileid
) ||
1214 (attrp
->ca_recflags
& kHFSHasLinkChainMask
))) {
1215 cp
->c_flag
|= C_HARDLINK
;
1218 * Fix-up dir link counts.
1220 * Earlier versions of Leopard used ca_linkcount for posix
1221 * nlink support (effectively the sub-directory count + 2).
1222 * That is now accomplished using the ca_dircount field with
1223 * the corresponding kHFSHasFolderCountMask flag.
1225 * For directories the ca_linkcount is the true link count,
1226 * tracking the number of actual hardlinks to a directory.
1228 * We only do this if the mount has HFS_FOLDERCOUNT set;
1229 * at the moment, we only set that for HFSX volumes.
1231 if ((hfsmp
->hfs_flags
& HFS_FOLDERCOUNT
) &&
1233 !(attrp
->ca_recflags
& kHFSHasFolderCountMask
) &&
1234 (cp
->c_attr
.ca_linkcount
> 1)) {
1235 if (cp
->c_attr
.ca_entries
== 0)
1236 cp
->c_attr
.ca_dircount
= 0;
1238 cp
->c_attr
.ca_dircount
= cp
->c_attr
.ca_linkcount
- 2;
1240 cp
->c_attr
.ca_linkcount
= 1;
1241 cp
->c_attr
.ca_recflags
|= kHFSHasFolderCountMask
;
1242 if ( !(hfsmp
->hfs_flags
& HFS_READ_ONLY
) )
1243 cp
->c_flag
|= C_MODIFIED
;
1246 if (hfsmp
->hfs_flags
& HFS_QUOTAS
) {
1247 for (i
= 0; i
< MAXQUOTAS
; i
++)
1248 cp
->c_dquot
[i
] = NODQUOT
;
1251 /* Mark the output flag that we're vending a new cnode */
1252 *out_flags
|= GNV_NEW_CNODE
;
1255 if (vtype
== VDIR
) {
1256 if (cp
->c_vp
!= NULL
)
1257 panic("hfs_getnewvnode: orphaned vnode (data)");
1260 if (forkp
&& attrp
->ca_blocks
< forkp
->cf_blocks
)
1261 panic("hfs_getnewvnode: bad ca_blocks (too small)");
1263 * Allocate and initialize a file fork...
1265 MALLOC_ZONE(fp
, struct filefork
*, sizeof(struct filefork
),
1266 M_HFSFORK
, M_WAITOK
);
1269 bcopy(forkp
, &fp
->ff_data
, sizeof(struct cat_fork
));
1271 bzero(&fp
->ff_data
, sizeof(struct cat_fork
));
1272 rl_init(&fp
->ff_invalidranges
);
1273 fp
->ff_sysfileinfo
= 0;
1276 if (cp
->c_rsrcfork
!= NULL
)
1277 panic("hfs_getnewvnode: orphaned rsrc fork");
1278 if (cp
->c_rsrc_vp
!= NULL
)
1279 panic("hfs_getnewvnode: orphaned vnode (rsrc)");
1280 cp
->c_rsrcfork
= fp
;
1281 cvpp
= &cp
->c_rsrc_vp
;
1282 if ( (tvp
= cp
->c_vp
) != NULLVP
)
1283 cp
->c_flag
|= C_NEED_DVNODE_PUT
;
1285 if (cp
->c_datafork
!= NULL
)
1286 panic("hfs_getnewvnode: orphaned data fork");
1287 if (cp
->c_vp
!= NULL
)
1288 panic("hfs_getnewvnode: orphaned vnode (data)");
1289 cp
->c_datafork
= fp
;
1291 if ( (tvp
= cp
->c_rsrc_vp
) != NULLVP
)
1292 cp
->c_flag
|= C_NEED_RVNODE_PUT
;
1295 if (tvp
!= NULLVP
) {
1297 * grab an iocount on the vnode we weren't
1298 * interested in (i.e. we want the resource fork
1299 * but the cnode already has the data fork)
1300 * to prevent it from being
1301 * recycled by us when we call vnode_create
1302 * which will result in a deadlock when we
1303 * try to take the cnode lock in hfs_vnop_fsync or
1304 * hfs_vnop_reclaim... vnode_get can be called here
1305 * because we already hold the cnode lock which will
1306 * prevent the vnode from changing identity until
1307 * we drop it.. vnode_get will not block waiting for
1308 * a change of state... however, it will return an
1309 * error if the current iocount == 0 and we've already
1310 * started to terminate the vnode... we don't need/want to
1311 * grab an iocount in the case since we can't cause
1312 * the fileystem to be re-entered on this thread for this vp
1314 * the matching vnode_put will happen in hfs_unlock
1315 * after we've dropped the cnode lock
1317 if ( vnode_get(tvp
) != 0)
1318 cp
->c_flag
&= ~(C_NEED_RVNODE_PUT
| C_NEED_DVNODE_PUT
);
1321 vfsp
.vnfs_vtype
= vtype
;
1322 vfsp
.vnfs_str
= "hfs";
1323 if ((cp
->c_flag
& C_HARDLINK
) && (vtype
== VDIR
)) {
1324 vfsp
.vnfs_dvp
= NULL
; /* no parent for me! */
1325 vfsp
.vnfs_cnp
= NULL
; /* no name for me! */
1327 vfsp
.vnfs_dvp
= dvp
;
1328 vfsp
.vnfs_cnp
= cnp
;
1330 vfsp
.vnfs_fsnode
= cp
;
1333 * Special Case HFS Standard VNOPs from HFS+, since
1334 * HFS standard is readonly/deprecated as of 10.6
1338 if (vtype
== VFIFO
)
1339 vfsp
.vnfs_vops
= hfs_fifoop_p
;
1342 if (vtype
== VBLK
|| vtype
== VCHR
)
1343 vfsp
.vnfs_vops
= hfs_specop_p
;
1345 else if (hfs_standard
)
1346 vfsp
.vnfs_vops
= hfs_std_vnodeop_p
;
1349 vfsp
.vnfs_vops
= hfs_vnodeop_p
;
1351 if (vtype
== VBLK
|| vtype
== VCHR
)
1352 vfsp
.vnfs_rdev
= attrp
->ca_rdev
;
1357 vfsp
.vnfs_filesize
= forkp
->cf_size
;
1359 vfsp
.vnfs_filesize
= 0;
1361 vfsp
.vnfs_flags
= VNFS_ADDFSREF
;
1362 if (dvp
== NULLVP
|| cnp
== NULL
|| !(cnp
->cn_flags
& MAKEENTRY
) || (flags
& GNV_NOCACHE
))
1363 vfsp
.vnfs_flags
|= VNFS_NOCACHE
;
1365 /* Tag system files */
1366 vfsp
.vnfs_marksystem
= issystemfile
;
1368 /* Tag root directory */
1369 if (descp
->cd_cnid
== kHFSRootFolderID
)
1370 vfsp
.vnfs_markroot
= 1;
1372 vfsp
.vnfs_markroot
= 0;
1374 if ((retval
= vnode_create(VNCREATE_FLAVOR
, VCREATESIZE
, &vfsp
, cvpp
))) {
1376 if (fp
== cp
->c_datafork
)
1377 cp
->c_datafork
= NULL
;
1379 cp
->c_rsrcfork
= NULL
;
1381 FREE_ZONE(fp
, sizeof(struct filefork
), M_HFSFORK
);
1384 * If this is a newly created cnode or a vnode reclaim
1385 * occurred during the attachment, then cleanup the cnode.
1387 if ((cp
->c_vp
== NULL
) && (cp
->c_rsrc_vp
== NULL
)) {
1388 hfs_chash_abort(hfsmp
, cp
);
1389 hfs_reclaim_cnode(cp
);
1392 hfs_chashwakeup(hfsmp
, cp
, H_ALLOC
| H_ATTACH
);
1393 if ((flags
& GNV_SKIPLOCK
) == 0){
1401 vnode_settag(vp
, VT_HFS
);
1402 if (cp
->c_flag
& C_HARDLINK
) {
1403 vnode_setmultipath(vp
);
1406 * Tag resource fork vnodes as needing an VNOP_INACTIVE
1407 * so that any deferred removes (open unlinked files)
1408 * have the chance to process the resource fork.
1410 if (VNODE_IS_RSRC(vp
)) {
1413 KERNEL_DEBUG_CONSTANT(HFSDBG_GETNEWVNODE
, VM_KERNEL_ADDRPERM(cp
->c_vp
), VM_KERNEL_ADDRPERM(cp
->c_rsrc_vp
), 0, 0, 0);
1415 /* Force VL_NEEDINACTIVE on this vnode */
1416 err
= vnode_ref(vp
);
1421 hfs_chashwakeup(hfsmp
, cp
, H_ALLOC
| H_ATTACH
);
1424 * Stop tracking an active hot file.
1426 if (!(flags
& GNV_CREATE
) && (vtype
!= VDIR
) && !issystemfile
) {
1427 (void) hfs_removehotfile(vp
);
1431 /* Initialize the cp data structures. The key should be in place now. */
1432 if (!issystemfile
&& (*out_flags
& GNV_NEW_CNODE
)) {
1433 cp_entry_init(cp
, mp
);
1443 hfs_reclaim_cnode(struct cnode
*cp
)
1448 for (i
= 0; i
< MAXQUOTAS
; i
++) {
1449 if (cp
->c_dquot
[i
] != NODQUOT
) {
1450 dqreclaim(cp
->c_dquot
[i
]);
1451 cp
->c_dquot
[i
] = NODQUOT
;
1457 * If the descriptor has a name then release it
1459 if ((cp
->c_desc
.cd_flags
& CD_HASBUF
) && (cp
->c_desc
.cd_nameptr
!= 0)) {
1460 const char *nameptr
;
1462 nameptr
= (const char *) cp
->c_desc
.cd_nameptr
;
1463 cp
->c_desc
.cd_nameptr
= 0;
1464 cp
->c_desc
.cd_flags
&= ~CD_HASBUF
;
1465 cp
->c_desc
.cd_namelen
= 0;
1466 vfs_removename(nameptr
);
1470 * We only call this function if we are in hfs_vnop_reclaim and
1471 * attempting to reclaim a cnode with only one live fork. Because the vnode
1472 * went through reclaim, any future attempts to use this item will have to
1473 * go through lookup again, which will need to create a new vnode. Thus,
1474 * destroying the locks below is safe.
1477 lck_rw_destroy(&cp
->c_rwlock
, hfs_rwlock_group
);
1478 lck_rw_destroy(&cp
->c_truncatelock
, hfs_rwlock_group
);
1481 decmpfs_cnode_destroy(cp
->c_decmp
);
1482 FREE_ZONE(cp
->c_decmp
, sizeof(*(cp
->c_decmp
)), M_DECMPFS_CNODE
);
1486 cp_entry_destroy(cp
->c_cpentry
);
1487 cp
->c_cpentry
= NULL
;
1491 bzero(cp
, sizeof(struct cnode
));
1492 FREE_ZONE(cp
, sizeof(struct cnode
), M_HFSNODE
);
1499 * This function is used to validate data that is stored in-core against what is contained
1500 * in the catalog. Common uses include validating that the parent-child relationship still exist
1501 * for a specific directory entry (guaranteeing it has not been renamed into a different spot) at
1502 * the point of the check.
1505 hfs_valid_cnode(struct hfsmount
*hfsmp
, struct vnode
*dvp
, struct componentname
*cnp
,
1506 cnid_t cnid
, struct cat_attr
*cattr
, int *error
)
1508 struct cat_attr attr
;
1509 struct cat_desc cndesc
;
1513 /* System files are always valid */
1514 if (cnid
< kHFSFirstUserCatalogNodeID
) {
1519 /* XXX optimization: check write count in dvp */
1521 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_CATALOG
, HFS_SHARED_LOCK
);
1525 struct cat_fork fork
;
1526 bzero(&cndesc
, sizeof(cndesc
));
1527 cndesc
.cd_nameptr
= (const u_int8_t
*)cnp
->cn_nameptr
;
1528 cndesc
.cd_namelen
= cnp
->cn_namelen
;
1529 cndesc
.cd_parentcnid
= VTOC(dvp
)->c_fileid
;
1530 cndesc
.cd_hint
= VTOC(dvp
)->c_childhint
;
1533 * We have to be careful when calling cat_lookup. The result argument
1534 * 'attr' may get different results based on whether or not you ask
1535 * for the filefork to be supplied as output. This is because cat_lookupbykey
1536 * will attempt to do basic validation/smoke tests against the resident
1537 * extents if there are no overflow extent records, but it needs someplace
1538 * in memory to store the on-disk fork structures.
1540 * Since hfs_lookup calls cat_lookup with a filefork argument, we should
1541 * do the same here, to verify that block count differences are not
1542 * due to calling the function with different styles. cat_lookupbykey
1543 * will request the volume be fsck'd if there is true on-disk corruption
1544 * where the number of blocks does not match the number generated by
1545 * summing the number of blocks in the resident extents.
1548 lookup
= cat_lookup (hfsmp
, &cndesc
, 0, 0, NULL
, &attr
, &fork
, NULL
);
1550 if ((lookup
== 0) && (cnid
== attr
.ca_fileid
)) {
1559 * In hfs_getnewvnode, we may encounter a time-of-check vs. time-of-vnode creation
1560 * race. Specifically, if there is no vnode/cnode pair for the directory entry
1561 * being looked up, we have to go to the catalog. But since we don't hold any locks (aside
1562 * from the dvp in 'shared' mode) there is nothing to protect us against the catalog record
1563 * changing in between the time we do the cat_lookup there and the time we re-grab the
1564 * catalog lock above to do another cat_lookup.
1566 * However, we need to check more than just the CNID and parent-child name relationships above.
1567 * Hardlinks can suffer the same race in the following scenario: Suppose we do a
1568 * cat_lookup, and find a leaf record and a raw inode for a hardlink. Now, we have
1569 * the cat_attr in hand (passed in above). But in between then and now, the vnode was
1570 * created by a competing hfs_getnewvnode call, and is manipulated and reclaimed before we get
1571 * a chance to do anything. This is possible if there are a lot of threads thrashing around
1572 * with the cnode hash. In this case, if we don't check/validate the cat_attr in-hand, we will
1573 * blindly stuff it into the cnode, which will make the in-core data inconsistent with what is
1574 * on disk. So validate the cat_attr below, if required. This race cannot happen if the cnode/vnode
1575 * already exists, as it does in the case of rename and delete.
1577 if (stillvalid
&& cattr
!= NULL
) {
1578 if (cattr
->ca_linkcount
!= attr
.ca_linkcount
) {
1584 if (cattr
->ca_union1
.cau_linkref
!= attr
.ca_union1
.cau_linkref
) {
1590 if (cattr
->ca_union3
.cau_firstlink
!= attr
.ca_union3
.cau_firstlink
) {
1596 if (cattr
->ca_union2
.cau_blocks
!= attr
.ca_union2
.cau_blocks
) {
1603 if (cat_idlookup(hfsmp
, cnid
, 0, 0, NULL
, NULL
, NULL
) == 0) {
1612 hfs_systemfile_unlock(hfsmp
, lockflags
);
1614 return (stillvalid
);
1619 * Per HI and Finder requirements, HFS should add in the
1620 * date/time that a particular directory entry was added
1621 * to the containing directory.
1622 * This is stored in the extended Finder Info for the
1625 * Note that this field is also set explicitly in the hfs_vnop_setxattr code.
1626 * We must ignore user attempts to set this part of the finderinfo, and
1627 * so we need to save a local copy of the date added, write in the user
1628 * finderinfo, then stuff the value back in.
1630 void hfs_write_dateadded (struct cat_attr
*attrp
, u_int32_t dateadded
) {
1631 u_int8_t
*finfo
= NULL
;
1633 /* overlay the FinderInfo to the correct pointer, and advance */
1634 finfo
= (u_int8_t
*)attrp
->ca_finderinfo
;
1638 * Make sure to write it out as big endian, since that's how
1639 * finder info is defined.
1641 * NOTE: This is a Unix-epoch timestamp, not a HFS/Traditional Mac timestamp.
1643 if (S_ISREG(attrp
->ca_mode
)) {
1644 struct FndrExtendedFileInfo
*extinfo
= (struct FndrExtendedFileInfo
*)finfo
;
1645 extinfo
->date_added
= OSSwapHostToBigInt32(dateadded
);
1646 attrp
->ca_recflags
|= kHFSHasDateAddedMask
;
1648 else if (S_ISDIR(attrp
->ca_mode
)) {
1649 struct FndrExtendedDirInfo
*extinfo
= (struct FndrExtendedDirInfo
*)finfo
;
1650 extinfo
->date_added
= OSSwapHostToBigInt32(dateadded
);
1651 attrp
->ca_recflags
|= kHFSHasDateAddedMask
;
1653 /* If it were neither directory/file, then we'd bail out */
1658 hfs_get_dateadded_internal(const uint8_t *finderinfo
, mode_t mode
)
1660 u_int8_t
*finfo
= NULL
;
1661 u_int32_t dateadded
= 0;
1665 /* overlay the FinderInfo to the correct pointer, and advance */
1666 finfo
= (u_int8_t
*)finderinfo
+ 16;
1669 * FinderInfo is written out in big endian... make sure to convert it to host
1670 * native before we use it.
1672 if (S_ISREG(mode
)) {
1673 struct FndrExtendedFileInfo
*extinfo
= (struct FndrExtendedFileInfo
*)finfo
;
1674 dateadded
= OSSwapBigToHostInt32 (extinfo
->date_added
);
1676 else if (S_ISDIR(mode
)) {
1677 struct FndrExtendedDirInfo
*extinfo
= (struct FndrExtendedDirInfo
*)finfo
;
1678 dateadded
= OSSwapBigToHostInt32 (extinfo
->date_added
);
1685 hfs_get_dateadded(struct cnode
*cp
)
1687 if ((cp
->c_attr
.ca_recflags
& kHFSHasDateAddedMask
) == 0) {
1688 /* Date added was never set. Return 0. */
1692 return (hfs_get_dateadded_internal((u_int8_t
*)cp
->c_finderinfo
,
1693 cp
->c_attr
.ca_mode
));
1697 hfs_get_dateadded_from_blob(const uint8_t *finderinfo
, mode_t mode
)
1699 return (hfs_get_dateadded_internal(finderinfo
, mode
));
1703 * Per HI and Finder requirements, HFS maintains a "write/generation
1704 * count" for each file that is incremented on any write & pageout.
1705 * It should start at 1 to reserve "0" as a special value. If it
1706 * should ever wrap around, it will skip using 0.
1708 * Note that finderinfo is manipulated in hfs_vnop_setxattr and care
1709 * is and should be taken to ignore user attempts to set the part of
1710 * the finderinfo that records the generation counter.
1712 * Any change to the generation counter *must* not be visible before
1713 * the change that caused it (for obvious reasons), and given the
1714 * limitations of our current architecture, the change to the
1715 * generation counter may occur some time afterwards (particularly in
1716 * the case where a file is mapped writable---more on that below).
1718 * We make no guarantees about the consistency of a file. In other
1719 * words, a reader that is operating concurrently with a writer might
1720 * see some, but not all of writer's changes, and the generation
1721 * counter will *not* necessarily tell you this has happened. To
1722 * enforce consistency, clients must make their own arrangements
1723 * e.g. use file locking.
1725 * We treat files that are mapped writable as a special case: when
1726 * that happens, clients requesting the generation count will be told
1727 * it has a generation count of zero and they use that knowledge as a
1728 * hint that the file is changing and it therefore might be prudent to
1729 * wait until it is no longer mapped writable. Clients should *not*
1730 * rely on this behaviour however; we might decide that it's better
1731 * for us to publish the fact that a file is mapped writable via
1732 * alternate means and return the generation counter when it is mapped
1733 * writable as it still has some, albeit limited, use. We reserve the
1734 * right to make this change.
1736 * Lastly, it's important to realise that because data and metadata
1737 * take different paths through the system, it's possible upon crash
1738 * or sudden power loss and after a restart, that a change may be
1739 * visible to the rest of the system without a corresponding change to
1740 * the generation counter. The reverse may also be true, but for all
1741 * practical applications this shouldn't be an issue.
1743 void hfs_write_gencount (struct cat_attr
*attrp
, uint32_t gencount
) {
1744 u_int8_t
*finfo
= NULL
;
1746 /* overlay the FinderInfo to the correct pointer, and advance */
1747 finfo
= (u_int8_t
*)attrp
->ca_finderinfo
;
1751 * Make sure to write it out as big endian, since that's how
1752 * finder info is defined.
1754 * Generation count is only supported for files.
1756 if (S_ISREG(attrp
->ca_mode
)) {
1757 struct FndrExtendedFileInfo
*extinfo
= (struct FndrExtendedFileInfo
*)finfo
;
1758 extinfo
->write_gen_counter
= OSSwapHostToBigInt32(gencount
);
1761 /* If it were neither directory/file, then we'd bail out */
1766 * Increase the gen count by 1; if it wraps around to 0, increment by
1767 * two. The cnode *must* be locked exclusively by the caller.
1769 * You may think holding the lock is unnecessary because we only need
1770 * to change the counter, but consider this sequence of events: thread
1771 * A calls hfs_incr_gencount and the generation counter is 2 upon
1772 * entry. A context switch occurs and thread B increments the counter
1773 * to 3, thread C now gets the generation counter (for whatever
1774 * purpose), and then another thread makes another change and the
1775 * generation counter is incremented again---it's now 4. Now thread A
1776 * continues and it sets the generation counter back to 3. So you can
1777 * see, thread C would miss the change that caused the generation
1778 * counter to increment to 4 and for this reason the cnode *must*
1779 * always be locked exclusively.
1781 uint32_t hfs_incr_gencount (struct cnode
*cp
) {
1782 u_int8_t
*finfo
= NULL
;
1783 u_int32_t gcount
= 0;
1785 /* overlay the FinderInfo to the correct pointer, and advance */
1786 finfo
= (u_int8_t
*)cp
->c_finderinfo
;
1790 * FinderInfo is written out in big endian... make sure to convert it to host
1791 * native before we use it.
1793 * NOTE: the write_gen_counter is stored in the same location in both the
1794 * FndrExtendedFileInfo and FndrExtendedDirInfo structs (it's the
1795 * last 32-bit word) so it is safe to have one code path here.
1797 if (S_ISDIR(cp
->c_attr
.ca_mode
) || S_ISREG(cp
->c_attr
.ca_mode
)) {
1798 struct FndrExtendedFileInfo
*extinfo
= (struct FndrExtendedFileInfo
*)finfo
;
1799 gcount
= OSSwapBigToHostInt32 (extinfo
->write_gen_counter
);
1801 /* Was it zero to begin with (file originated in 10.8 or earlier?) */
1809 /* Did it wrap around ? */
1813 extinfo
->write_gen_counter
= OSSwapHostToBigInt32 (gcount
);
1815 SET(cp
->c_flag
, C_MODIFIED
);
1825 * There is no need for any locks here (other than an iocount on an
1826 * associated vnode) because reading and writing an aligned 32 bit
1827 * integer should be atomic on all platforms we support.
1830 hfs_get_gencount_internal(const uint8_t *finderinfo
, mode_t mode
)
1832 u_int8_t
*finfo
= NULL
;
1833 u_int32_t gcount
= 0;
1835 /* overlay the FinderInfo to the correct pointer, and advance */
1836 finfo
= (u_int8_t
*)finderinfo
;
1840 * FinderInfo is written out in big endian... make sure to convert it to host
1841 * native before we use it.
1843 * NOTE: the write_gen_counter is stored in the same location in both the
1844 * FndrExtendedFileInfo and FndrExtendedDirInfo structs (it's the
1845 * last 32-bit word) so it is safe to have one code path here.
1847 if (S_ISDIR(mode
) || S_ISREG(mode
)) {
1848 struct FndrExtendedFileInfo
*extinfo
= (struct FndrExtendedFileInfo
*)finfo
;
1849 gcount
= OSSwapBigToHostInt32 (extinfo
->write_gen_counter
);
1852 * Is it zero? File might originate in 10.8 or earlier. We lie and bump it to 1,
1853 * since the incrementer code is able to handle this case and will double-increment
1864 /* Getter for the gen count */
1865 u_int32_t
hfs_get_gencount (struct cnode
*cp
) {
1866 return hfs_get_gencount_internal(cp
->c_finderinfo
, cp
->c_attr
.ca_mode
);
1869 /* Getter for the gen count from a buffer (currently pointer to finderinfo)*/
1870 u_int32_t
hfs_get_gencount_from_blob (const uint8_t *finfoblob
, mode_t mode
) {
1871 return hfs_get_gencount_internal(finfoblob
, mode
);
1874 void hfs_clear_might_be_dirty_flag(cnode_t
*cp
)
1877 * If we're about to touch both mtime and ctime, we can clear the
1878 * C_MIGHT_BE_DIRTY_FROM_MAPPING since we can guarantee that
1879 * subsequent page-outs can only be for data made dirty before
1882 CLR(cp
->c_flag
, C_MIGHT_BE_DIRTY_FROM_MAPPING
);
1886 * Touch cnode times based on c_touch_xxx flags
1888 * cnode must be locked exclusive
1890 * This will also update the volume modify time
1893 hfs_touchtimes(struct hfsmount
*hfsmp
, struct cnode
* cp
)
1896 /* don't modify times if volume is read-only */
1897 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
1898 cp
->c_touch_acctime
= FALSE
;
1899 cp
->c_touch_chgtime
= FALSE
;
1900 cp
->c_touch_modtime
= FALSE
;
1904 else if (hfsmp
->hfs_flags
& HFS_STANDARD
) {
1905 /* HFS Standard doesn't support access times */
1906 cp
->c_touch_acctime
= FALSE
;
1910 ctx
= vfs_context_current();
1912 * Skip access time updates if:
1913 * . MNT_NOATIME is set
1914 * . a file system freeze is in progress
1915 * . a file system resize is in progress
1916 * . the vnode associated with this cnode is marked for rapid aging
1918 if (cp
->c_touch_acctime
) {
1919 if ((vfs_flags(hfsmp
->hfs_mp
) & MNT_NOATIME
) ||
1920 hfsmp
->hfs_freeze_state
!= HFS_THAWED
||
1921 (hfsmp
->hfs_flags
& HFS_RESIZE_IN_PROGRESS
) ||
1922 (cp
->c_vp
&& ((vnode_israge(cp
->c_vp
) || (vfs_ctx_skipatime(ctx
)))))) {
1924 cp
->c_touch_acctime
= FALSE
;
1927 if (cp
->c_touch_acctime
|| cp
->c_touch_chgtime
||
1928 cp
->c_touch_modtime
|| (cp
->c_flag
& C_NEEDS_DATEADDED
)) {
1932 if (cp
->c_touch_modtime
&& cp
->c_touch_chgtime
)
1933 hfs_clear_might_be_dirty_flag(cp
);
1937 if (cp
->c_touch_acctime
) {
1938 cp
->c_atime
= tv
.tv_sec
;
1940 * When the access time is the only thing changing
1941 * then make sure its sufficiently newer before
1942 * committing it to disk.
1944 if ((((u_int32_t
)cp
->c_atime
- (u_int32_t
)(cp
)->c_attr
.ca_atimeondisk
) >
1945 ATIME_ONDISK_ACCURACY
)) {
1946 cp
->c_flag
|= C_MODIFIED
;
1948 cp
->c_touch_acctime
= FALSE
;
1950 if (cp
->c_touch_modtime
) {
1951 cp
->c_mtime
= tv
.tv_sec
;
1952 cp
->c_touch_modtime
= FALSE
;
1953 cp
->c_flag
|= C_MODIFIED
;
1957 * HFS dates that WE set must be adjusted for DST
1959 if ((hfsmp
->hfs_flags
& HFS_STANDARD
) && gTimeZone
.tz_dsttime
) {
1960 cp
->c_mtime
+= 3600;
1964 if (cp
->c_touch_chgtime
) {
1965 cp
->c_ctime
= tv
.tv_sec
;
1966 cp
->c_touch_chgtime
= FALSE
;
1967 cp
->c_flag
|= C_MODIFIED
;
1971 if (cp
->c_flag
& C_NEEDS_DATEADDED
) {
1972 hfs_write_dateadded (&(cp
->c_attr
), tv
.tv_sec
);
1973 cp
->c_flag
|= C_MODIFIED
;
1974 /* untwiddle the bit */
1975 cp
->c_flag
&= ~C_NEEDS_DATEADDED
;
1979 /* Touch the volume modtime if needed */
1981 MarkVCBDirty(hfsmp
);
1982 HFSTOVCB(hfsmp
)->vcbLsMod
= tv
.tv_sec
;
1987 // Use this if you don't want to check the return code
1988 void hfs_lock_always(cnode_t
*cp
, enum hfs_locktype locktype
)
1990 hfs_lock(cp
, locktype
, HFS_LOCK_ALWAYS
);
1995 * N.B. If you add any failure cases, *make* sure hfs_lock_always works
1998 hfs_lock(struct cnode
*cp
, enum hfs_locktype locktype
, enum hfs_lockflags flags
)
2000 thread_t thread
= current_thread();
2002 if (cp
->c_lockowner
== thread
) {
2003 /* Only the extents and bitmap files support lock recursion. */
2004 if ((cp
->c_fileid
== kHFSExtentsFileID
) ||
2005 (cp
->c_fileid
== kHFSAllocationFileID
)) {
2006 cp
->c_syslockcount
++;
2008 panic("hfs_lock: locking against myself!");
2010 } else if (locktype
== HFS_SHARED_LOCK
) {
2011 lck_rw_lock_shared(&cp
->c_rwlock
);
2012 cp
->c_lockowner
= HFS_SHARED_OWNER
;
2014 } else { /* HFS_EXCLUSIVE_LOCK */
2015 lck_rw_lock_exclusive(&cp
->c_rwlock
);
2016 cp
->c_lockowner
= thread
;
2018 /* Only the extents and bitmap files support lock recursion. */
2019 if ((cp
->c_fileid
== kHFSExtentsFileID
) ||
2020 (cp
->c_fileid
== kHFSAllocationFileID
)) {
2021 cp
->c_syslockcount
= 1;
2025 #ifdef HFS_CHECK_LOCK_ORDER
2027 * Regular cnodes (non-system files) cannot be locked
2028 * while holding the journal lock or a system file lock.
2030 if (!(cp
->c_desc
.cd_flags
& CD_ISMETA
) &&
2031 ((cp
->c_fileid
> kHFSFirstUserCatalogNodeID
) || (cp
->c_fileid
== kHFSRootFolderID
))) {
2032 vnode_t vp
= NULLVP
;
2034 /* Find corresponding vnode. */
2035 if (cp
->c_vp
!= NULLVP
&& VTOC(cp
->c_vp
) == cp
) {
2037 } else if (cp
->c_rsrc_vp
!= NULLVP
&& VTOC(cp
->c_rsrc_vp
) == cp
) {
2041 struct hfsmount
*hfsmp
= VTOHFS(vp
);
2043 if (hfsmp
->jnl
&& (journal_owner(hfsmp
->jnl
) == thread
)) {
2044 /* This will eventually be a panic here. */
2045 printf("hfs_lock: bad lock order (cnode after journal)\n");
2047 if (hfsmp
->hfs_catalog_cp
&& hfsmp
->hfs_catalog_cp
->c_lockowner
== thread
) {
2048 panic("hfs_lock: bad lock order (cnode after catalog)");
2050 if (hfsmp
->hfs_attribute_cp
&& hfsmp
->hfs_attribute_cp
->c_lockowner
== thread
) {
2051 panic("hfs_lock: bad lock order (cnode after attribute)");
2053 if (hfsmp
->hfs_extents_cp
&& hfsmp
->hfs_extents_cp
->c_lockowner
== thread
) {
2054 panic("hfs_lock: bad lock order (cnode after extents)");
2058 #endif /* HFS_CHECK_LOCK_ORDER */
2061 * Skip cnodes for regular files that no longer exist
2062 * (marked deleted, catalog entry gone).
2064 if (((flags
& HFS_LOCK_ALLOW_NOEXISTS
) == 0) &&
2065 ((cp
->c_desc
.cd_flags
& CD_ISMETA
) == 0) &&
2066 (cp
->c_flag
& C_NOEXISTS
)) {
2074 * Lock a pair of cnodes.
2077 hfs_lockpair(struct cnode
*cp1
, struct cnode
*cp2
, enum hfs_locktype locktype
)
2079 struct cnode
*first
, *last
;
2083 * If cnodes match then just lock one.
2086 return hfs_lock(cp1
, locktype
, HFS_LOCK_DEFAULT
);
2090 * Lock in cnode address order.
2100 if ( (error
= hfs_lock(first
, locktype
, HFS_LOCK_DEFAULT
))) {
2103 if ( (error
= hfs_lock(last
, locktype
, HFS_LOCK_DEFAULT
))) {
2111 * Check ordering of two cnodes. Return true if they are are in-order.
2114 hfs_isordered(struct cnode
*cp1
, struct cnode
*cp2
)
2118 if (cp1
== NULL
|| cp2
== (struct cnode
*)0xffffffff)
2120 if (cp2
== NULL
|| cp1
== (struct cnode
*)0xffffffff)
2123 * Locking order is cnode address order.
2129 * Acquire 4 cnode locks.
2130 * - locked in cnode address order (lesser address first).
2131 * - all or none of the locks are taken
2132 * - only one lock taken per cnode (dup cnodes are skipped)
2133 * - some of the cnode pointers may be null
2136 hfs_lockfour(struct cnode
*cp1
, struct cnode
*cp2
, struct cnode
*cp3
,
2137 struct cnode
*cp4
, enum hfs_locktype locktype
, struct cnode
**error_cnode
)
2139 struct cnode
* a
[3];
2140 struct cnode
* b
[3];
2141 struct cnode
* list
[4];
2146 *error_cnode
= NULL
;
2149 if (hfs_isordered(cp1
, cp2
)) {
2150 a
[0] = cp1
; a
[1] = cp2
;
2152 a
[0] = cp2
; a
[1] = cp1
;
2154 if (hfs_isordered(cp3
, cp4
)) {
2155 b
[0] = cp3
; b
[1] = cp4
;
2157 b
[0] = cp4
; b
[1] = cp3
;
2159 a
[2] = (struct cnode
*)0xffffffff; /* sentinel value */
2160 b
[2] = (struct cnode
*)0xffffffff; /* sentinel value */
2163 * Build the lock list, skipping over duplicates
2165 for (i
= 0, j
= 0, k
= 0; (i
< 2 || j
< 2); ) {
2166 tmp
= hfs_isordered(a
[i
], b
[j
]) ? a
[i
++] : b
[j
++];
2167 if (k
== 0 || tmp
!= list
[k
-1])
2172 * Now we can lock using list[0 - k].
2173 * Skip over NULL entries.
2175 for (i
= 0; i
< k
; ++i
) {
2177 if ((error
= hfs_lock(list
[i
], locktype
, HFS_LOCK_DEFAULT
))) {
2178 /* Only stuff error_cnode if requested */
2180 *error_cnode
= list
[i
];
2182 /* Drop any locks we acquired. */
2185 hfs_unlock(list
[i
]);
2198 hfs_unlock(struct cnode
*cp
)
2200 vnode_t rvp
= NULLVP
;
2201 vnode_t vp
= NULLVP
;
2205 * Only the extents and bitmap file's support lock recursion.
2207 if ((cp
->c_fileid
== kHFSExtentsFileID
) ||
2208 (cp
->c_fileid
== kHFSAllocationFileID
)) {
2209 if (--cp
->c_syslockcount
> 0) {
2214 const thread_t thread
= current_thread();
2216 if (cp
->c_lockowner
== thread
) {
2217 c_flag
= cp
->c_flag
;
2219 // If we have the truncate lock, we must defer the puts
2220 if (cp
->c_truncatelockowner
== thread
) {
2221 if (ISSET(c_flag
, C_NEED_DVNODE_PUT
)
2222 && !cp
->c_need_dvnode_put_after_truncate_unlock
) {
2223 CLR(c_flag
, C_NEED_DVNODE_PUT
);
2224 cp
->c_need_dvnode_put_after_truncate_unlock
= true;
2226 if (ISSET(c_flag
, C_NEED_RVNODE_PUT
)
2227 && !cp
->c_need_rvnode_put_after_truncate_unlock
) {
2228 CLR(c_flag
, C_NEED_RVNODE_PUT
);
2229 cp
->c_need_rvnode_put_after_truncate_unlock
= true;
2233 CLR(cp
->c_flag
, (C_NEED_DATA_SETSIZE
| C_NEED_RSRC_SETSIZE
2234 | C_NEED_DVNODE_PUT
| C_NEED_RVNODE_PUT
));
2236 if (c_flag
& (C_NEED_DVNODE_PUT
| C_NEED_DATA_SETSIZE
)) {
2239 if (c_flag
& (C_NEED_RVNODE_PUT
| C_NEED_RSRC_SETSIZE
)) {
2240 rvp
= cp
->c_rsrc_vp
;
2243 cp
->c_lockowner
= NULL
;
2244 lck_rw_unlock_exclusive(&cp
->c_rwlock
);
2246 lck_rw_unlock_shared(&cp
->c_rwlock
);
2249 /* Perform any vnode post processing after cnode lock is dropped. */
2251 if (c_flag
& C_NEED_DATA_SETSIZE
) {
2252 ubc_setsize(vp
, VTOF(vp
)->ff_size
);
2255 * If this is a compressed file, we need to reset the
2256 * compression state. We will have set the size to zero
2257 * above and it will get fixed up later (in exactly the
2258 * same way that new vnodes are fixed up). Note that we
2259 * should only be able to get here if the truncate lock is
2260 * held exclusively and so we do the reset when that's
2263 decmpfs_cnode
*dp
= VTOCMP(vp
);
2264 if (dp
&& decmpfs_cnode_get_vnode_state(dp
) != FILE_TYPE_UNKNOWN
)
2265 cp
->c_need_decmpfs_reset
= true;
2268 if (c_flag
& C_NEED_DVNODE_PUT
)
2272 if (c_flag
& C_NEED_RSRC_SETSIZE
)
2273 ubc_setsize(rvp
, VTOF(rvp
)->ff_size
);
2274 if (c_flag
& C_NEED_RVNODE_PUT
)
2280 * Unlock a pair of cnodes.
2283 hfs_unlockpair(struct cnode
*cp1
, struct cnode
*cp2
)
2291 * Unlock a group of cnodes.
2294 hfs_unlockfour(struct cnode
*cp1
, struct cnode
*cp2
, struct cnode
*cp3
, struct cnode
*cp4
)
2296 struct cnode
* list
[4];
2304 for (i
= 0; i
< k
; ++i
) {
2313 for (i
= 0; i
< k
; ++i
) {
2322 for (i
= 0; i
< k
; ++i
) {
2332 * Protect a cnode against a truncation.
2334 * Used mainly by read/write since they don't hold the
2335 * cnode lock across calls to the cluster layer.
2337 * The process doing a truncation must take the lock
2338 * exclusive. The read/write processes can take it
2339 * shared. The locktype argument is the same as supplied to
2343 hfs_lock_truncate(struct cnode
*cp
, enum hfs_locktype locktype
, enum hfs_lockflags flags
)
2345 thread_t thread
= current_thread();
2347 if (cp
->c_truncatelockowner
== thread
) {
2349 * Ignore grabbing the lock if it the current thread already
2350 * holds exclusive lock.
2352 * This is needed on the hfs_vnop_pagein path where we need to ensure
2353 * the file does not change sizes while we are paging in. However,
2354 * we may already hold the lock exclusive due to another
2355 * VNOP from earlier in the call stack. So if we already hold
2356 * the truncate lock exclusive, allow it to proceed, but ONLY if
2357 * it's in the recursive case.
2359 if ((flags
& HFS_LOCK_SKIP_IF_EXCLUSIVE
) == 0) {
2360 panic("hfs_lock_truncate: cnode %p locked!", cp
);
2362 } else if (locktype
== HFS_SHARED_LOCK
) {
2363 lck_rw_lock_shared(&cp
->c_truncatelock
);
2364 cp
->c_truncatelockowner
= HFS_SHARED_OWNER
;
2365 } else { /* HFS_EXCLUSIVE_LOCK */
2366 lck_rw_lock_exclusive(&cp
->c_truncatelock
);
2367 cp
->c_truncatelockowner
= thread
;
2373 * Attempt to get the truncate lock. If it cannot be acquired, error out.
2374 * This function is needed in the degenerate hfs_vnop_pagein during force unmount
2375 * case. To prevent deadlocks while a VM copy object is moving pages, HFS vnop pagein will
2376 * temporarily need to disable V2 semantics.
2378 int hfs_try_trunclock (struct cnode
*cp
, enum hfs_locktype locktype
, enum hfs_lockflags flags
)
2380 thread_t thread
= current_thread();
2381 boolean_t didlock
= false;
2383 if (cp
->c_truncatelockowner
== thread
) {
2385 * Ignore grabbing the lock if the current thread already
2386 * holds exclusive lock.
2388 * This is needed on the hfs_vnop_pagein path where we need to ensure
2389 * the file does not change sizes while we are paging in. However,
2390 * we may already hold the lock exclusive due to another
2391 * VNOP from earlier in the call stack. So if we already hold
2392 * the truncate lock exclusive, allow it to proceed, but ONLY if
2393 * it's in the recursive case.
2395 if ((flags
& HFS_LOCK_SKIP_IF_EXCLUSIVE
) == 0) {
2396 panic("hfs_lock_truncate: cnode %p locked!", cp
);
2398 } else if (locktype
== HFS_SHARED_LOCK
) {
2399 didlock
= lck_rw_try_lock(&cp
->c_truncatelock
, LCK_RW_TYPE_SHARED
);
2401 cp
->c_truncatelockowner
= HFS_SHARED_OWNER
;
2403 } else { /* HFS_EXCLUSIVE_LOCK */
2404 didlock
= lck_rw_try_lock (&cp
->c_truncatelock
, LCK_RW_TYPE_EXCLUSIVE
);
2406 cp
->c_truncatelockowner
= thread
;
2415 * Unlock the truncate lock, which protects against size changes.
2417 * If HFS_LOCK_SKIP_IF_EXCLUSIVE flag was set, it means that a previous
2418 * hfs_lock_truncate() might have skipped grabbing a lock because
2419 * the current thread was already holding the lock exclusive and
2420 * we may need to return from this function without actually unlocking
2421 * the truncate lock.
2424 hfs_unlock_truncate(struct cnode
*cp
, enum hfs_lockflags flags
)
2426 thread_t thread
= current_thread();
2429 * If HFS_LOCK_SKIP_IF_EXCLUSIVE is set in the flags AND the current
2430 * lock owner of the truncate lock is our current thread, then
2431 * we must have skipped taking the lock earlier by in
2432 * hfs_lock_truncate() by setting HFS_LOCK_SKIP_IF_EXCLUSIVE in the
2433 * flags (as the current thread was current lock owner).
2435 * If HFS_LOCK_SKIP_IF_EXCLUSIVE is not set (most of the time) then
2436 * we check the lockowner field to infer whether the lock was taken
2437 * exclusively or shared in order to know what underlying lock
2440 if (flags
& HFS_LOCK_SKIP_IF_EXCLUSIVE
) {
2441 if (cp
->c_truncatelockowner
== thread
) {
2446 /* HFS_LOCK_EXCLUSIVE */
2447 if (thread
== cp
->c_truncatelockowner
) {
2448 vnode_t vp
= NULL
, rvp
= NULL
;
2451 * Deal with any pending set sizes. We need to call
2452 * ubc_setsize before we drop the exclusive lock. Ideally,
2453 * hfs_unlock should be called before hfs_unlock_truncate but
2454 * that's a lot to ask people to remember :-)
2456 if (cp
->c_lockowner
== thread
2457 && ISSET(cp
->c_flag
, C_NEED_DATA_SETSIZE
| C_NEED_RSRC_SETSIZE
)) {
2458 // hfs_unlock will do the setsize calls for us
2460 hfs_lock_always(cp
, HFS_EXCLUSIVE_LOCK
);
2463 if (cp
->c_need_dvnode_put_after_truncate_unlock
) {
2465 cp
->c_need_dvnode_put_after_truncate_unlock
= false;
2467 if (cp
->c_need_rvnode_put_after_truncate_unlock
) {
2468 rvp
= cp
->c_rsrc_vp
;
2469 cp
->c_need_rvnode_put_after_truncate_unlock
= false;
2473 bool reset_decmpfs
= cp
->c_need_decmpfs_reset
;
2474 cp
->c_need_decmpfs_reset
= false;
2477 cp
->c_truncatelockowner
= NULL
;
2478 lck_rw_unlock_exclusive(&cp
->c_truncatelock
);
2481 if (reset_decmpfs
) {
2482 decmpfs_cnode
*dp
= cp
->c_decmp
;
2483 if (dp
&& decmpfs_cnode_get_vnode_state(dp
) != FILE_TYPE_UNKNOWN
)
2484 decmpfs_cnode_set_vnode_state(dp
, FILE_TYPE_UNKNOWN
, 0);
2493 } else { /* HFS_LOCK_SHARED */
2494 lck_rw_unlock_shared(&cp
->c_truncatelock
);