2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* @(#)hfs_readwrite.c 1.0
30 * (c) 1998-2001 Apple Inc. All Rights Reserved
32 * hfs_readwrite.c -- vnode operations to deal with reading and writing files.
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/kernel.h>
39 #include <sys/fcntl.h>
43 #include <sys/kauth.h>
44 #include <sys/vnode.h>
46 #include <sys/vfs_context.h>
48 #include <sys/sysctl.h>
49 #include <sys/fsctl.h>
51 #include <sys/fsevents.h>
52 #include <uuid/uuid.h>
54 #include <libkern/OSDebug.h>
56 #include <miscfs/specfs/specdev.h>
60 #include <vm/vm_pageout.h>
61 #include <vm/vm_kern.h>
63 #include <IOKit/IOBSD.h>
65 #include <sys/kdebug.h>
68 #include "hfs_attrlist.h"
69 #include "hfs_endian.h"
70 #include "hfs_fsctl.h"
71 #include "hfs_quota.h"
72 #include "FileMgrInternal.h"
73 #include "BTreesInternal.h"
74 #include "hfs_cnode.h"
77 #if HFS_CONFIG_KEY_ROLL
78 #include "hfs_key_roll.h"
81 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
84 MAXHFSFILESIZE
= 0x7FFFFFFF /* this needs to go in the mount structure */
87 /* from bsd/hfs/hfs_vfsops.c */
88 extern int hfs_vfs_vget (struct mount
*mp
, ino64_t ino
, struct vnode
**vpp
, vfs_context_t context
);
90 /* from hfs_hotfiles.c */
91 extern int hfs_pin_overflow_extents (struct hfsmount
*hfsmp
, uint32_t fileid
,
92 uint8_t forktype
, uint32_t *pinned
);
94 static int hfs_clonefile(struct vnode
*, int, int, int);
95 static int hfs_clonesysfile(struct vnode
*, int, int, int, kauth_cred_t
, struct proc
*);
96 static int do_hfs_truncate(struct vnode
*vp
, off_t length
, int flags
, int skip
, vfs_context_t context
);
100 * Read data from a file.
103 hfs_vnop_read(struct vnop_read_args
*ap
)
106 struct vnop_read_args {
107 struct vnodeop_desc *a_desc;
111 vfs_context_t a_context;
115 uio_t uio
= ap
->a_uio
;
116 struct vnode
*vp
= ap
->a_vp
;
119 struct hfsmount
*hfsmp
;
122 off_t start_resid
= uio_resid(uio
);
123 off_t offset
= uio_offset(uio
);
125 int took_truncate_lock
= 0;
127 int throttled_count
= 0;
129 /* Preflight checks */
130 if (!vnode_isreg(vp
)) {
131 /* can only read regular files */
137 if (start_resid
== 0)
138 return (0); /* Nothing left to do */
140 return (EINVAL
); /* cant read from a negative offset */
143 if ((ap
->a_ioflag
& (IO_SKIP_ENCRYPTION
|IO_SYSCALL_DISPATCH
)) ==
144 (IO_SKIP_ENCRYPTION
|IO_SYSCALL_DISPATCH
)) {
145 /* Don't allow unencrypted io request from user space */
151 if (VNODE_IS_RSRC(vp
)) {
152 if (hfs_hides_rsrc(ap
->a_context
, VTOC(vp
), 1)) { /* 1 == don't take the cnode lock */
155 /* otherwise read the resource fork normally */
157 int compressed
= hfs_file_is_compressed(VTOC(vp
), 1); /* 1 == don't take the cnode lock */
159 retval
= decmpfs_read_compressed(ap
, &compressed
, VTOCMP(vp
));
160 if (retval
== 0 && !(ap
->a_ioflag
& IO_EVTONLY
) && vnode_isfastdevicecandidate(vp
)) {
161 (void) hfs_addhotfile(vp
);
165 /* successful read, update the access time */
166 VTOC(vp
)->c_touch_acctime
= TRUE
;
169 // compressed files are not traditional hot file candidates
170 // but they may be for CF (which ignores the ff_bytesread
173 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
) {
174 VTOF(vp
)->ff_bytesread
= 0;
179 /* otherwise the file was converted back to a regular file while we were reading it */
181 } else if ((VTOC(vp
)->c_bsdflags
& UF_COMPRESSED
)) {
184 error
= check_for_dataless_file(vp
, NAMESPACE_HANDLER_READ_OP
);
191 #endif /* HFS_COMPRESSION */
198 if ((retval
= cp_handle_vnop (vp
, CP_READ_ACCESS
, ap
->a_ioflag
)) != 0) {
202 #if HFS_CONFIG_KEY_ROLL
203 if (ISSET(ap
->a_ioflag
, IO_ENCRYPTED
)) {
204 off_rsrc_t off_rsrc
= off_rsrc_make(offset
+ start_resid
,
207 retval
= hfs_key_roll_up_to(ap
->a_context
, vp
, off_rsrc
);
211 #endif // HFS_CONFIG_KEY_ROLL
212 #endif // CONFIG_PROTECT
215 * If this read request originated from a syscall (as opposed to
216 * an in-kernel page fault or something), then set it up for
219 if (ap
->a_ioflag
& IO_SYSCALL_DISPATCH
) {
220 io_throttle
= IO_RETURN_ON_THROTTLE
;
225 /* Protect against a size change. */
226 hfs_lock_truncate(cp
, HFS_SHARED_LOCK
, HFS_LOCK_DEFAULT
);
227 took_truncate_lock
= 1;
229 filesize
= fp
->ff_size
;
230 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
233 * Check the file size. Note that per POSIX spec, we return 0 at
234 * file EOF, so attempting a read at an offset that is too big
235 * should just return 0 on HFS+. Since the return value was initialized
236 * to 0 above, we just jump to exit. HFS Standard has its own behavior.
238 if (offset
> filesize
) {
240 if ((hfsmp
->hfs_flags
& HFS_STANDARD
) &&
241 (offset
> (off_t
)MAXHFSFILESIZE
)) {
248 KERNEL_DEBUG(HFSDBG_READ
| DBG_FUNC_START
,
249 (int)uio_offset(uio
), uio_resid(uio
), (int)filesize
, (int)filebytes
, 0);
251 retval
= cluster_read(vp
, uio
, filesize
, ap
->a_ioflag
|io_throttle
);
253 cp
->c_touch_acctime
= TRUE
;
255 KERNEL_DEBUG(HFSDBG_READ
| DBG_FUNC_END
,
256 (int)uio_offset(uio
), uio_resid(uio
), (int)filesize
, (int)filebytes
, 0);
259 * Keep track blocks read
261 if (hfsmp
->hfc_stage
== HFC_RECORDING
&& retval
== 0) {
262 int took_cnode_lock
= 0;
265 bytesread
= start_resid
- uio_resid(uio
);
267 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
268 if ((fp
->ff_bytesread
+ bytesread
) > 0x00000000ffffffff) {
269 hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
273 * If this file hasn't been seen since the start of
274 * the current sampling period then start over.
276 if (cp
->c_atime
< hfsmp
->hfc_timebase
) {
279 fp
->ff_bytesread
= bytesread
;
281 cp
->c_atime
= tv
.tv_sec
;
283 fp
->ff_bytesread
+= bytesread
;
286 if (!(ap
->a_ioflag
& IO_EVTONLY
) && vnode_isfastdevicecandidate(vp
)) {
288 // We don't add hotfiles for processes doing IO_EVTONLY I/O
289 // on the assumption that they're system processes such as
290 // mdworker which scan everything in the system (and thus
291 // do not represent user-initiated access to files)
293 (void) hfs_addhotfile(vp
);
299 if (took_truncate_lock
) {
300 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
302 if (retval
== EAGAIN
) {
303 throttle_lowpri_io(1);
310 throttle_info_reset_window(NULL
);
315 * Ideally, this wouldn't be necessary; the cluster code should be
316 * able to handle this on the read-side. See <rdar://20420068>.
318 static errno_t
hfs_zero_eof_page(vnode_t vp
, off_t zero_up_to
)
320 hfs_assert(VTOC(vp
)->c_lockowner
!= current_thread());
321 hfs_assert(VTOC(vp
)->c_truncatelockowner
== current_thread());
323 struct filefork
*fp
= VTOF(vp
);
325 if (!(fp
->ff_size
& PAGE_MASK_64
) || zero_up_to
<= fp
->ff_size
) {
330 zero_up_to
= MIN(zero_up_to
, (off_t
)round_page_64(fp
->ff_size
));
332 /* N.B. At present, @zero_up_to is not important because the cluster
333 code will always zero up to the end of the page anyway. */
334 return cluster_write(vp
, NULL
, fp
->ff_size
, zero_up_to
,
335 fp
->ff_size
, 0, IO_HEADZEROFILL
);
339 * Write data to a file.
342 hfs_vnop_write(struct vnop_write_args
*ap
)
344 uio_t uio
= ap
->a_uio
;
345 struct vnode
*vp
= ap
->a_vp
;
348 struct hfsmount
*hfsmp
;
349 kauth_cred_t cred
= NULL
;
352 off_t bytesToAdd
= 0;
353 off_t actualBytesAdded
;
358 int ioflag
= ap
->a_ioflag
;
361 int cnode_locked
= 0;
362 int partialwrite
= 0;
364 time_t orig_ctime
=VTOC(vp
)->c_ctime
;
365 int took_truncate_lock
= 0;
366 int io_return_on_throttle
= 0;
367 int throttled_count
= 0;
370 if ( hfs_file_is_compressed(VTOC(vp
), 1) ) { /* 1 == don't take the cnode lock */
371 int state
= decmpfs_cnode_get_vnode_state(VTOCMP(vp
));
373 case FILE_IS_COMPRESSED
:
375 case FILE_IS_CONVERTING
:
376 /* if FILE_IS_CONVERTING, we allow writes but do not
377 bother with snapshots or else we will deadlock.
382 printf("invalid state %d for compressed file\n", state
);
385 } else if ((VTOC(vp
)->c_bsdflags
& UF_COMPRESSED
)) {
388 error
= check_for_dataless_file(vp
, NAMESPACE_HANDLER_WRITE_OP
);
395 nspace_snapshot_event(vp
, orig_ctime
, NAMESPACE_HANDLER_WRITE_OP
, uio
);
401 if ((ioflag
& (IO_SKIP_ENCRYPTION
|IO_SYSCALL_DISPATCH
)) ==
402 (IO_SKIP_ENCRYPTION
|IO_SYSCALL_DISPATCH
)) {
403 /* Don't allow unencrypted io request from user space */
408 resid
= uio_resid(uio
);
409 offset
= uio_offset(uio
);
415 if (!vnode_isreg(vp
))
416 return (EPERM
); /* Can only write regular files */
423 if ((retval
= cp_handle_vnop (vp
, CP_WRITE_ACCESS
, 0)) != 0) {
428 eflags
= kEFDeferMask
; /* defer file block allocations */
431 * When the underlying device is sparse and space
432 * is low (< 8MB), stop doing delayed allocations
433 * and begin doing synchronous I/O.
435 if ((hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) &&
436 (hfs_freeblks(hfsmp
, 0) < 2048)) {
437 eflags
&= ~kEFDeferMask
;
440 #endif /* HFS_SPARSE_DEV */
442 if ((ioflag
& (IO_SINGLE_WRITER
| IO_SYSCALL_DISPATCH
)) ==
443 (IO_SINGLE_WRITER
| IO_SYSCALL_DISPATCH
)) {
444 io_return_on_throttle
= IO_RETURN_ON_THROTTLE
;
449 * Protect against a size change.
451 * Note: If took_truncate_lock is true, then we previously got the lock shared
452 * but needed to upgrade to exclusive. So try getting it exclusive from the
455 if (ioflag
& IO_APPEND
|| took_truncate_lock
) {
456 hfs_lock_truncate(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
459 hfs_lock_truncate(cp
, HFS_SHARED_LOCK
, HFS_LOCK_DEFAULT
);
461 took_truncate_lock
= 1;
464 if (ioflag
& IO_APPEND
) {
465 uio_setoffset(uio
, fp
->ff_size
);
466 offset
= fp
->ff_size
;
468 if ((cp
->c_bsdflags
& APPEND
) && offset
!= fp
->ff_size
) {
473 cred
= vfs_context_ucred(ap
->a_context
);
474 if (cred
&& suser(cred
, NULL
) != 0)
475 eflags
|= kEFReserveMask
;
477 origFileSize
= fp
->ff_size
;
478 writelimit
= offset
+ resid
;
481 * We may need an exclusive truncate lock for several reasons, all
482 * of which are because we may be writing to a (portion of a) block
483 * for the first time, and we need to make sure no readers see the
484 * prior, uninitialized contents of the block. The cases are:
486 * 1. We have unallocated (delayed allocation) blocks. We may be
487 * allocating new blocks to the file and writing to them.
488 * (A more precise check would be whether the range we're writing
489 * to contains delayed allocation blocks.)
490 * 2. We need to extend the file. The bytes between the old EOF
491 * and the new EOF are not yet initialized. This is important
492 * even if we're not allocating new blocks to the file. If the
493 * old EOF and new EOF are in the same block, we still need to
494 * protect that range of bytes until they are written for the
497 * If we had a shared lock with the above cases, we need to try to upgrade
498 * to an exclusive lock. If the upgrade fails, we will lose the shared
499 * lock, and will need to take the truncate lock again; the took_truncate_lock
500 * flag will still be set, causing us to try for an exclusive lock next time.
502 if ((cp
->c_truncatelockowner
== HFS_SHARED_OWNER
) &&
503 ((fp
->ff_unallocblocks
!= 0) ||
504 (writelimit
> origFileSize
))) {
505 if (lck_rw_lock_shared_to_exclusive(&cp
->c_truncatelock
) == FALSE
) {
507 * Lock upgrade failed and we lost our shared lock, try again.
508 * Note: we do not set took_truncate_lock=0 here. Leaving it
509 * set to 1 will cause us to try to get the lock exclusive.
514 /* Store the owner in the c_truncatelockowner field if we successfully upgrade */
515 cp
->c_truncatelockowner
= current_thread();
519 if ( (retval
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
))) {
524 filebytes
= hfs_blk_to_bytes(fp
->ff_blocks
, hfsmp
->blockSize
);
526 if (offset
> filebytes
527 && (hfs_blk_to_bytes(hfs_freeblks(hfsmp
, ISSET(eflags
, kEFReserveMask
)),
528 hfsmp
->blockSize
) < offset
- filebytes
)) {
533 KERNEL_DEBUG(HFSDBG_WRITE
| DBG_FUNC_START
,
534 (int)offset
, uio_resid(uio
), (int)fp
->ff_size
,
537 /* Check if we do not need to extend the file */
538 if (writelimit
<= filebytes
) {
542 bytesToAdd
= writelimit
- filebytes
;
545 retval
= hfs_chkdq(cp
, (int64_t)(roundup(bytesToAdd
, hfsmp
->blockSize
)),
551 if (hfs_start_transaction(hfsmp
) != 0) {
556 while (writelimit
> filebytes
) {
557 bytesToAdd
= writelimit
- filebytes
;
559 /* Protect extents b-tree and allocation bitmap */
560 lockflags
= SFL_BITMAP
;
561 if (overflow_extents(fp
))
562 lockflags
|= SFL_EXTENTS
;
563 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
565 /* Files that are changing size are not hot file candidates. */
566 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
567 fp
->ff_bytesread
= 0;
569 retval
= MacToVFSError(ExtendFileC (hfsmp
, (FCB
*)fp
, bytesToAdd
,
570 0, eflags
, &actualBytesAdded
));
572 hfs_systemfile_unlock(hfsmp
, lockflags
);
574 if ((actualBytesAdded
== 0) && (retval
== E_NONE
))
576 if (retval
!= E_NONE
)
578 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
579 KERNEL_DEBUG(HFSDBG_WRITE
| DBG_FUNC_NONE
,
580 (int)offset
, uio_resid(uio
), (int)fp
->ff_size
, (int)filebytes
, 0);
582 (void) hfs_update(vp
, 0);
583 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
584 (void) hfs_end_transaction(hfsmp
);
587 * If we didn't grow the file enough try a partial write.
588 * POSIX expects this behavior.
590 if ((retval
== ENOSPC
) && (filebytes
> offset
)) {
593 uio_setresid(uio
, (uio_resid(uio
) - bytesToAdd
));
595 writelimit
= filebytes
;
598 if (retval
== E_NONE
) {
603 if (writelimit
> fp
->ff_size
) {
604 filesize
= writelimit
;
606 rl_add(fp
->ff_size
, writelimit
- 1 , &fp
->ff_invalidranges
);
608 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
610 filesize
= fp
->ff_size
;
612 lflag
= ioflag
& ~(IO_TAILZEROFILL
| IO_HEADZEROFILL
| IO_NOZEROVALID
| IO_NOZERODIRTY
);
615 * We no longer use IO_HEADZEROFILL or IO_TAILZEROFILL (except
616 * for one case below). For the regions that lie before the
617 * beginning and after the end of this write that are in the
618 * same page, we let the cluster code handle zeroing that out
619 * if necessary. If those areas are not cached, the cluster
620 * code will try and read those areas in, and in the case
621 * where those regions have never been written to,
622 * hfs_vnop_blockmap will consult the invalid ranges and then
623 * indicate that. The cluster code will zero out those areas.
626 head_off
= trunc_page_64(offset
);
628 if (head_off
< offset
&& head_off
>= fp
->ff_size
) {
630 * The first page is beyond current EOF, so as an
631 * optimisation, we can pass IO_HEADZEROFILL.
633 lflag
|= IO_HEADZEROFILL
;
640 * We need to tell UBC the fork's new size BEFORE calling
641 * cluster_write, in case any of the new pages need to be
642 * paged out before cluster_write completes (which does happen
643 * in embedded systems due to extreme memory pressure).
644 * Similarly, we need to tell hfs_vnop_pageout what the new EOF
645 * will be, so that it can pass that on to cluster_pageout, and
646 * allow those pageouts.
648 * We don't update ff_size yet since we don't want pageins to
649 * be able to see uninitialized data between the old and new
650 * EOF, until cluster_write has completed and initialized that
653 * The vnode pager relies on the file size last given to UBC via
654 * ubc_setsize. hfs_vnop_pageout relies on fp->ff_new_size or
655 * ff_size (whichever is larger). NOTE: ff_new_size is always
656 * zero, unless we are extending the file via write.
658 if (filesize
> fp
->ff_size
) {
659 retval
= hfs_zero_eof_page(vp
, offset
);
662 fp
->ff_new_size
= filesize
;
663 ubc_setsize(vp
, filesize
);
665 retval
= cluster_write(vp
, uio
, fp
->ff_size
, filesize
, head_off
,
666 0, lflag
| IO_NOZERODIRTY
| io_return_on_throttle
);
668 fp
->ff_new_size
= 0; /* no longer extending; use ff_size */
670 if (retval
== EAGAIN
) {
672 * EAGAIN indicates that we still have I/O to do, but
673 * that we now need to be throttled
675 if (resid
!= uio_resid(uio
)) {
677 * did manage to do some I/O before returning EAGAIN
679 resid
= uio_resid(uio
);
680 offset
= uio_offset(uio
);
682 cp
->c_touch_chgtime
= TRUE
;
683 cp
->c_touch_modtime
= TRUE
;
684 hfs_incr_gencount(cp
);
686 if (filesize
> fp
->ff_size
) {
688 * we called ubc_setsize before the call to
689 * cluster_write... since we only partially
690 * completed the I/O, we need to
691 * re-adjust our idea of the filesize based
694 ubc_setsize(vp
, offset
);
696 fp
->ff_size
= offset
;
700 if (filesize
> origFileSize
) {
701 ubc_setsize(vp
, origFileSize
);
706 if (filesize
> origFileSize
) {
707 fp
->ff_size
= filesize
;
709 /* Files that are changing size are not hot file candidates. */
710 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
711 fp
->ff_bytesread
= 0;
714 fp
->ff_new_size
= 0; /* ff_size now has the correct size */
717 uio_setresid(uio
, (uio_resid(uio
) + bytesToAdd
));
721 if (vnode_should_flush_after_write(vp
, ioflag
))
722 hfs_flush(hfsmp
, HFS_FLUSH_CACHE
);
726 hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
730 if (resid
> uio_resid(uio
)) {
731 cp
->c_touch_chgtime
= TRUE
;
732 cp
->c_touch_modtime
= TRUE
;
733 hfs_incr_gencount(cp
);
736 * If we successfully wrote any data, and we are not the superuser
737 * we clear the setuid and setgid bits as a precaution against
740 if (cp
->c_mode
& (S_ISUID
| S_ISGID
)) {
741 cred
= vfs_context_ucred(ap
->a_context
);
742 if (cred
&& suser(cred
, NULL
)) {
743 cp
->c_mode
&= ~(S_ISUID
| S_ISGID
);
748 if (ioflag
& IO_UNIT
) {
749 (void)hfs_truncate(vp
, origFileSize
, ioflag
& IO_SYNC
,
751 uio_setoffset(uio
, (uio_offset(uio
) - (resid
- uio_resid(uio
))));
752 uio_setresid(uio
, resid
);
753 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
755 } else if ((ioflag
& IO_SYNC
) && (resid
> uio_resid(uio
)))
756 retval
= hfs_update(vp
, 0);
758 /* Updating vcbWrCnt doesn't need to be atomic. */
761 KERNEL_DEBUG(HFSDBG_WRITE
| DBG_FUNC_END
,
762 (int)uio_offset(uio
), uio_resid(uio
), (int)fp
->ff_size
, (int)filebytes
, 0);
764 if (retval
&& took_truncate_lock
765 && cp
->c_truncatelockowner
== current_thread()) {
767 rl_remove(fp
->ff_size
, RL_INFINITY
, &fp
->ff_invalidranges
);
773 if (took_truncate_lock
) {
774 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
776 if (retval
== EAGAIN
) {
777 throttle_lowpri_io(1);
784 throttle_info_reset_window(NULL
);
788 /* support for the "bulk-access" fcntl */
790 #define CACHE_LEVELS 16
791 #define NUM_CACHE_ENTRIES (64*16)
792 #define PARENT_IDS_FLAG 0x100
794 struct access_cache
{
796 int cachehits
; /* these two for statistics gathering */
798 unsigned int *acache
;
799 unsigned char *haveaccess
;
803 uid_t uid
; /* IN: effective user id */
804 short flags
; /* IN: access requested (i.e. R_OK) */
805 short num_groups
; /* IN: number of groups user belongs to */
806 int num_files
; /* IN: number of files to process */
807 int *file_ids
; /* IN: array of file ids */
808 gid_t
*groups
; /* IN: array of groups */
809 short *access
; /* OUT: access info for each file (0 for 'has access') */
810 } __attribute__((unavailable
)); // this structure is for reference purposes only
812 struct user32_access_t
{
813 uid_t uid
; /* IN: effective user id */
814 short flags
; /* IN: access requested (i.e. R_OK) */
815 short num_groups
; /* IN: number of groups user belongs to */
816 int num_files
; /* IN: number of files to process */
817 user32_addr_t file_ids
; /* IN: array of file ids */
818 user32_addr_t groups
; /* IN: array of groups */
819 user32_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
822 struct user64_access_t
{
823 uid_t uid
; /* IN: effective user id */
824 short flags
; /* IN: access requested (i.e. R_OK) */
825 short num_groups
; /* IN: number of groups user belongs to */
826 int num_files
; /* IN: number of files to process */
827 user64_addr_t file_ids
; /* IN: array of file ids */
828 user64_addr_t groups
; /* IN: array of groups */
829 user64_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
833 // these are the "extended" versions of the above structures
834 // note that it is crucial that they be different sized than
835 // the regular version
836 struct ext_access_t
{
837 uint32_t flags
; /* IN: access requested (i.e. R_OK) */
838 uint32_t num_files
; /* IN: number of files to process */
839 uint32_t map_size
; /* IN: size of the bit map */
840 uint32_t *file_ids
; /* IN: Array of file ids */
841 char *bitmap
; /* OUT: hash-bitmap of interesting directory ids */
842 short *access
; /* OUT: access info for each file (0 for 'has access') */
843 uint32_t num_parents
; /* future use */
844 cnid_t
*parents
; /* future use */
845 } __attribute__((unavailable
)); // this structure is for reference purposes only
847 struct user32_ext_access_t
{
848 uint32_t flags
; /* IN: access requested (i.e. R_OK) */
849 uint32_t num_files
; /* IN: number of files to process */
850 uint32_t map_size
; /* IN: size of the bit map */
851 user32_addr_t file_ids
; /* IN: Array of file ids */
852 user32_addr_t bitmap
; /* OUT: hash-bitmap of interesting directory ids */
853 user32_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
854 uint32_t num_parents
; /* future use */
855 user32_addr_t parents
; /* future use */
858 struct user64_ext_access_t
{
859 uint32_t flags
; /* IN: access requested (i.e. R_OK) */
860 uint32_t num_files
; /* IN: number of files to process */
861 uint32_t map_size
; /* IN: size of the bit map */
862 user64_addr_t file_ids
; /* IN: array of file ids */
863 user64_addr_t bitmap
; /* IN: array of groups */
864 user64_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
865 uint32_t num_parents
;/* future use */
866 user64_addr_t parents
;/* future use */
871 * Perform a binary search for the given parent_id. Return value is
872 * the index if there is a match. If no_match_indexp is non-NULL it
873 * will be assigned with the index to insert the item (even if it was
876 static int cache_binSearch(cnid_t
*array
, unsigned int hi
, cnid_t parent_id
, int *no_match_indexp
)
882 unsigned int mid
= ((hi
- lo
)/2) + lo
;
883 unsigned int this_id
= array
[mid
];
885 if (parent_id
== this_id
) {
890 if (parent_id
< this_id
) {
895 if (parent_id
> this_id
) {
901 /* check if lo and hi converged on the match */
902 if (parent_id
== array
[hi
]) {
906 if (no_match_indexp
) {
907 *no_match_indexp
= hi
;
915 lookup_bucket(struct access_cache
*cache
, int *indexp
, cnid_t parent_id
)
919 int index
, no_match_index
;
921 if (cache
->numcached
== 0) {
923 return 0; // table is empty, so insert at index=0 and report no match
926 if (cache
->numcached
> NUM_CACHE_ENTRIES
) {
927 cache
->numcached
= NUM_CACHE_ENTRIES
;
930 hi
= cache
->numcached
- 1;
932 index
= cache_binSearch(cache
->acache
, hi
, parent_id
, &no_match_index
);
934 /* if no existing entry found, find index for new one */
936 index
= no_match_index
;
947 * Add a node to the access_cache at the given index (or do a lookup first
948 * to find the index if -1 is passed in). We currently do a replace rather
949 * than an insert if the cache is full.
952 add_node(struct access_cache
*cache
, int index
, cnid_t nodeID
, int access
)
954 int lookup_index
= -1;
956 /* need to do a lookup first if -1 passed for index */
958 if (lookup_bucket(cache
, &lookup_index
, nodeID
)) {
959 if (cache
->haveaccess
[lookup_index
] != access
&& cache
->haveaccess
[lookup_index
] == ESRCH
) {
960 // only update an entry if the previous access was ESRCH (i.e. a scope checking error)
961 cache
->haveaccess
[lookup_index
] = access
;
964 /* mission accomplished */
967 index
= lookup_index
;
972 /* if the cache is full, do a replace rather than an insert */
973 if (cache
->numcached
>= NUM_CACHE_ENTRIES
) {
974 cache
->numcached
= NUM_CACHE_ENTRIES
-1;
976 if (index
> cache
->numcached
) {
977 index
= cache
->numcached
;
981 if (index
< cache
->numcached
&& index
< NUM_CACHE_ENTRIES
&& nodeID
> cache
->acache
[index
]) {
985 if (index
>= 0 && index
< cache
->numcached
) {
986 /* only do bcopy if we're inserting */
987 bcopy( cache
->acache
+index
, cache
->acache
+(index
+1), (cache
->numcached
- index
)*sizeof(int) );
988 bcopy( cache
->haveaccess
+index
, cache
->haveaccess
+(index
+1), (cache
->numcached
- index
)*sizeof(unsigned char) );
991 cache
->acache
[index
] = nodeID
;
992 cache
->haveaccess
[index
] = access
;
1006 snoop_callback(const cnode_t
*cp
, void *arg
)
1008 struct cinfo
*cip
= arg
;
1010 cip
->uid
= cp
->c_uid
;
1011 cip
->gid
= cp
->c_gid
;
1012 cip
->mode
= cp
->c_mode
;
1013 cip
->parentcnid
= cp
->c_parentcnid
;
1014 cip
->recflags
= cp
->c_attr
.ca_recflags
;
1020 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
1021 * isn't incore, then go to the catalog.
1024 do_attr_lookup(struct hfsmount
*hfsmp
, struct access_cache
*cache
, cnid_t cnid
,
1025 struct cnode
*skip_cp
, CatalogKey
*keyp
, struct cat_attr
*cnattrp
)
1029 /* if this id matches the one the fsctl was called with, skip the lookup */
1030 if (cnid
== skip_cp
->c_cnid
) {
1031 cnattrp
->ca_uid
= skip_cp
->c_uid
;
1032 cnattrp
->ca_gid
= skip_cp
->c_gid
;
1033 cnattrp
->ca_mode
= skip_cp
->c_mode
;
1034 cnattrp
->ca_recflags
= skip_cp
->c_attr
.ca_recflags
;
1035 keyp
->hfsPlus
.parentID
= skip_cp
->c_parentcnid
;
1037 struct cinfo c_info
;
1039 /* otherwise, check the cnode hash incase the file/dir is incore */
1040 error
= hfs_chash_snoop(hfsmp
, cnid
, 0, snoop_callback
, &c_info
);
1042 if (error
== EACCES
) {
1045 } else if (!error
) {
1046 cnattrp
->ca_uid
= c_info
.uid
;
1047 cnattrp
->ca_gid
= c_info
.gid
;
1048 cnattrp
->ca_mode
= c_info
.mode
;
1049 cnattrp
->ca_recflags
= c_info
.recflags
;
1050 keyp
->hfsPlus
.parentID
= c_info
.parentcnid
;
1054 if (throttle_io_will_be_throttled(-1, HFSTOVFS(hfsmp
)))
1055 throttle_lowpri_io(1);
1057 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_CATALOG
, HFS_SHARED_LOCK
);
1059 /* lookup this cnid in the catalog */
1060 error
= cat_getkeyplusattr(hfsmp
, cnid
, keyp
, cnattrp
);
1062 hfs_systemfile_unlock(hfsmp
, lockflags
);
1073 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
1074 * up to CACHE_LEVELS as we progress towards the root.
1077 do_access_check(struct hfsmount
*hfsmp
, int *err
, struct access_cache
*cache
, HFSCatalogNodeID nodeID
,
1078 struct cnode
*skip_cp
, struct proc
*theProcPtr
, kauth_cred_t myp_ucred
,
1079 struct vfs_context
*my_context
,
1083 uint32_t num_parents
)
1087 HFSCatalogNodeID thisNodeID
;
1088 unsigned int myPerms
;
1089 struct cat_attr cnattr
;
1090 int cache_index
= -1, scope_index
= -1, scope_idx_start
= -1;
1093 int i
= 0, ids_to_cache
= 0;
1094 int parent_ids
[CACHE_LEVELS
];
1096 thisNodeID
= nodeID
;
1097 while (thisNodeID
>= kRootDirID
) {
1098 myResult
= 0; /* default to "no access" */
1100 /* check the cache before resorting to hitting the catalog */
1102 /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
1103 * to look any further after hitting cached dir */
1105 if (lookup_bucket(cache
, &cache_index
, thisNodeID
)) {
1107 myErr
= cache
->haveaccess
[cache_index
];
1108 if (scope_index
!= -1) {
1109 if (myErr
== ESRCH
) {
1113 scope_index
= 0; // so we'll just use the cache result
1114 scope_idx_start
= ids_to_cache
;
1116 myResult
= (myErr
== 0) ? 1 : 0;
1117 goto ExitThisRoutine
;
1123 tmp
= cache_binSearch(parents
, num_parents
-1, thisNodeID
, NULL
);
1124 if (scope_index
== -1)
1126 if (tmp
!= -1 && scope_idx_start
== -1 && ids_to_cache
< CACHE_LEVELS
) {
1127 scope_idx_start
= ids_to_cache
;
1131 /* remember which parents we want to cache */
1132 if (ids_to_cache
< CACHE_LEVELS
) {
1133 parent_ids
[ids_to_cache
] = thisNodeID
;
1136 // Inefficient (using modulo) and we might want to use a hash function, not rely on the node id to be "nice"...
1137 if (bitmap
&& map_size
) {
1138 bitmap
[(thisNodeID
/8)%(map_size
)]|=(1<<(thisNodeID
&7));
1142 /* do the lookup (checks the cnode hash, then the catalog) */
1143 myErr
= do_attr_lookup(hfsmp
, cache
, thisNodeID
, skip_cp
, &catkey
, &cnattr
);
1145 goto ExitThisRoutine
; /* no access */
1148 /* Root always gets access. */
1149 if (suser(myp_ucred
, NULL
) == 0) {
1150 thisNodeID
= catkey
.hfsPlus
.parentID
;
1155 // if the thing has acl's, do the full permission check
1156 if ((cnattr
.ca_recflags
& kHFSHasSecurityMask
) != 0) {
1159 /* get the vnode for this cnid */
1160 myErr
= hfs_vget(hfsmp
, thisNodeID
, &vp
, 0, 0);
1163 goto ExitThisRoutine
;
1166 thisNodeID
= VTOC(vp
)->c_parentcnid
;
1168 hfs_unlock(VTOC(vp
));
1170 if (vnode_vtype(vp
) == VDIR
) {
1171 myErr
= vnode_authorize(vp
, NULL
, (KAUTH_VNODE_SEARCH
| KAUTH_VNODE_LIST_DIRECTORY
), my_context
);
1173 myErr
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_READ_DATA
, my_context
);
1179 goto ExitThisRoutine
;
1183 int mode
= cnattr
.ca_mode
& S_IFMT
;
1184 myPerms
= DerivePermissionSummary(cnattr
.ca_uid
, cnattr
.ca_gid
, cnattr
.ca_mode
, hfsmp
->hfs_mp
,myp_ucred
, theProcPtr
);
1186 if (mode
== S_IFDIR
) {
1187 flags
= R_OK
| X_OK
;
1191 if ( (myPerms
& flags
) != flags
) {
1194 goto ExitThisRoutine
; /* no access */
1197 /* up the hierarchy we go */
1198 thisNodeID
= catkey
.hfsPlus
.parentID
;
1202 /* if here, we have access to this node */
1206 if (parents
&& myErr
== 0 && scope_index
== -1) {
1215 /* cache the parent directory(ies) */
1216 for (i
= 0; i
< ids_to_cache
; i
++) {
1217 if (myErr
== 0 && parents
&& (scope_idx_start
== -1 || i
> scope_idx_start
)) {
1218 add_node(cache
, -1, parent_ids
[i
], ESRCH
);
1220 add_node(cache
, -1, parent_ids
[i
], myErr
);
1228 do_bulk_access_check(struct hfsmount
*hfsmp
, struct vnode
*vp
,
1229 struct vnop_ioctl_args
*ap
, int arg_size
, vfs_context_t context
)
1234 * NOTE: on entry, the vnode has an io_ref. In case this vnode
1235 * happens to be in our list of file_ids, we'll note it
1236 * avoid calling hfs_chashget_nowait() on that id as that
1237 * will cause a "locking against myself" panic.
1239 Boolean check_leaf
= true;
1241 struct user64_ext_access_t
*user_access_structp
;
1242 struct user64_ext_access_t tmp_user_access
;
1243 struct access_cache cache
;
1245 int error
= 0, prev_parent_check_ok
=1;
1249 unsigned int num_files
= 0;
1251 int num_parents
= 0;
1255 cnid_t
*parents
=NULL
;
1259 cnid_t prevParent_cnid
= 0;
1260 unsigned int myPerms
;
1262 struct cat_attr cnattr
;
1264 struct cnode
*skip_cp
= VTOC(vp
);
1265 kauth_cred_t cred
= vfs_context_ucred(context
);
1266 proc_t p
= vfs_context_proc(context
);
1268 is64bit
= proc_is64bit(p
);
1270 /* initialize the local cache and buffers */
1271 cache
.numcached
= 0;
1272 cache
.cachehits
= 0;
1274 cache
.acache
= NULL
;
1275 cache
.haveaccess
= NULL
;
1277 /* struct copyin done during dispatch... need to copy file_id array separately */
1278 if (ap
->a_data
== NULL
) {
1280 goto err_exit_bulk_access
;
1284 if (arg_size
!= sizeof(struct user64_ext_access_t
)) {
1286 goto err_exit_bulk_access
;
1289 user_access_structp
= (struct user64_ext_access_t
*)ap
->a_data
;
1291 } else if (arg_size
== sizeof(struct user32_access_t
)) {
1292 struct user32_access_t
*accessp
= (struct user32_access_t
*)ap
->a_data
;
1294 // convert an old style bulk-access struct to the new style
1295 tmp_user_access
.flags
= accessp
->flags
;
1296 tmp_user_access
.num_files
= accessp
->num_files
;
1297 tmp_user_access
.map_size
= 0;
1298 tmp_user_access
.file_ids
= CAST_USER_ADDR_T(accessp
->file_ids
);
1299 tmp_user_access
.bitmap
= USER_ADDR_NULL
;
1300 tmp_user_access
.access
= CAST_USER_ADDR_T(accessp
->access
);
1301 tmp_user_access
.num_parents
= 0;
1302 user_access_structp
= &tmp_user_access
;
1304 } else if (arg_size
== sizeof(struct user32_ext_access_t
)) {
1305 struct user32_ext_access_t
*accessp
= (struct user32_ext_access_t
*)ap
->a_data
;
1307 // up-cast from a 32-bit version of the struct
1308 tmp_user_access
.flags
= accessp
->flags
;
1309 tmp_user_access
.num_files
= accessp
->num_files
;
1310 tmp_user_access
.map_size
= accessp
->map_size
;
1311 tmp_user_access
.num_parents
= accessp
->num_parents
;
1313 tmp_user_access
.file_ids
= CAST_USER_ADDR_T(accessp
->file_ids
);
1314 tmp_user_access
.bitmap
= CAST_USER_ADDR_T(accessp
->bitmap
);
1315 tmp_user_access
.access
= CAST_USER_ADDR_T(accessp
->access
);
1316 tmp_user_access
.parents
= CAST_USER_ADDR_T(accessp
->parents
);
1318 user_access_structp
= &tmp_user_access
;
1321 goto err_exit_bulk_access
;
1324 map_size
= user_access_structp
->map_size
;
1326 num_files
= user_access_structp
->num_files
;
1328 num_parents
= user_access_structp
->num_parents
;
1330 if (num_files
< 1) {
1331 goto err_exit_bulk_access
;
1333 if (num_files
> 1024) {
1335 goto err_exit_bulk_access
;
1338 if (num_parents
> 1024) {
1340 goto err_exit_bulk_access
;
1343 file_ids
= hfs_malloc(sizeof(int) * num_files
);
1344 access
= hfs_malloc(sizeof(short) * num_files
);
1346 bitmap
= hfs_mallocz(sizeof(char) * map_size
);
1350 parents
= hfs_malloc(sizeof(cnid_t
) * num_parents
);
1353 cache
.acache
= hfs_malloc(sizeof(int) * NUM_CACHE_ENTRIES
);
1354 cache
.haveaccess
= hfs_malloc(sizeof(unsigned char) * NUM_CACHE_ENTRIES
);
1356 if ((error
= copyin(user_access_structp
->file_ids
, (caddr_t
)file_ids
,
1357 num_files
* sizeof(int)))) {
1358 goto err_exit_bulk_access
;
1362 if ((error
= copyin(user_access_structp
->parents
, (caddr_t
)parents
,
1363 num_parents
* sizeof(cnid_t
)))) {
1364 goto err_exit_bulk_access
;
1368 flags
= user_access_structp
->flags
;
1369 if ((flags
& (F_OK
| R_OK
| W_OK
| X_OK
)) == 0) {
1373 /* check if we've been passed leaf node ids or parent ids */
1374 if (flags
& PARENT_IDS_FLAG
) {
1378 /* Check access to each file_id passed in */
1379 for (i
= 0; i
< num_files
; i
++) {
1381 cnid
= (cnid_t
) file_ids
[i
];
1383 /* root always has access */
1384 if ((!parents
) && (!suser(cred
, NULL
))) {
1390 /* do the lookup (checks the cnode hash, then the catalog) */
1391 error
= do_attr_lookup(hfsmp
, &cache
, cnid
, skip_cp
, &catkey
, &cnattr
);
1393 access
[i
] = (short) error
;
1398 // Check if the leaf matches one of the parent scopes
1399 leaf_index
= cache_binSearch(parents
, num_parents
-1, cnid
, NULL
);
1400 if (leaf_index
>= 0 && parents
[leaf_index
] == cnid
)
1401 prev_parent_check_ok
= 0;
1402 else if (leaf_index
>= 0)
1403 prev_parent_check_ok
= 1;
1406 // if the thing has acl's, do the full permission check
1407 if ((cnattr
.ca_recflags
& kHFSHasSecurityMask
) != 0) {
1410 /* get the vnode for this cnid */
1411 myErr
= hfs_vget(hfsmp
, cnid
, &cvp
, 0, 0);
1417 hfs_unlock(VTOC(cvp
));
1419 if (vnode_vtype(cvp
) == VDIR
) {
1420 myErr
= vnode_authorize(cvp
, NULL
, (KAUTH_VNODE_SEARCH
| KAUTH_VNODE_LIST_DIRECTORY
), context
);
1422 myErr
= vnode_authorize(cvp
, NULL
, KAUTH_VNODE_READ_DATA
, context
);
1431 /* before calling CheckAccess(), check the target file for read access */
1432 myPerms
= DerivePermissionSummary(cnattr
.ca_uid
, cnattr
.ca_gid
,
1433 cnattr
.ca_mode
, hfsmp
->hfs_mp
, cred
, p
);
1435 /* fail fast if no access */
1436 if ((myPerms
& flags
) == 0) {
1442 /* we were passed an array of parent ids */
1443 catkey
.hfsPlus
.parentID
= cnid
;
1446 /* if the last guy had the same parent and had access, we're done */
1447 if (i
> 0 && catkey
.hfsPlus
.parentID
== prevParent_cnid
&& access
[i
-1] == 0 && prev_parent_check_ok
) {
1453 myaccess
= do_access_check(hfsmp
, &error
, &cache
, catkey
.hfsPlus
.parentID
,
1454 skip_cp
, p
, cred
, context
,bitmap
, map_size
, parents
, num_parents
);
1456 if (myaccess
|| (error
== ESRCH
&& leaf_index
!= -1)) {
1457 access
[i
] = 0; // have access.. no errors to report
1459 access
[i
] = (error
!= 0 ? (short) error
: EACCES
);
1462 prevParent_cnid
= catkey
.hfsPlus
.parentID
;
1465 /* copyout the access array */
1466 if ((error
= copyout((caddr_t
)access
, user_access_structp
->access
,
1467 num_files
* sizeof (short)))) {
1468 goto err_exit_bulk_access
;
1470 if (map_size
&& bitmap
) {
1471 if ((error
= copyout((caddr_t
)bitmap
, user_access_structp
->bitmap
,
1472 map_size
* sizeof (char)))) {
1473 goto err_exit_bulk_access
;
1478 err_exit_bulk_access
:
1480 hfs_free(file_ids
, sizeof(int) * num_files
);
1481 hfs_free(parents
, sizeof(cnid_t
) * num_parents
);
1482 hfs_free(bitmap
, sizeof(char) * map_size
);
1483 hfs_free(access
, sizeof(short) * num_files
);
1484 hfs_free(cache
.acache
, sizeof(int) * NUM_CACHE_ENTRIES
);
1485 hfs_free(cache
.haveaccess
, sizeof(unsigned char) * NUM_CACHE_ENTRIES
);
1491 /* end "bulk-access" support */
1495 * Control filesystem operating characteristics.
1498 hfs_vnop_ioctl( struct vnop_ioctl_args
/* {
1503 vfs_context_t a_context;
1506 struct vnode
* vp
= ap
->a_vp
;
1507 struct hfsmount
*hfsmp
= VTOHFS(vp
);
1508 vfs_context_t context
= ap
->a_context
;
1509 kauth_cred_t cred
= vfs_context_ucred(context
);
1510 proc_t p
= vfs_context_proc(context
);
1511 struct vfsstatfs
*vfsp
;
1513 off_t jnl_start
, jnl_size
;
1514 struct hfs_journal_info
*jip
;
1517 off_t uncompressed_size
= -1;
1518 int decmpfs_error
= 0;
1520 if (ap
->a_command
== F_RDADVISE
) {
1521 /* we need to inspect the decmpfs state of the file as early as possible */
1522 compressed
= hfs_file_is_compressed(VTOC(vp
), 0);
1524 if (VNODE_IS_RSRC(vp
)) {
1525 /* if this is the resource fork, treat it as if it were empty */
1526 uncompressed_size
= 0;
1528 decmpfs_error
= hfs_uncompressed_size_of_compressed_file(NULL
, vp
, 0, &uncompressed_size
, 0);
1529 if (decmpfs_error
!= 0) {
1530 /* failed to get the uncompressed size, we'll check for this later */
1531 uncompressed_size
= -1;
1536 #endif /* HFS_COMPRESSION */
1538 is64bit
= proc_is64bit(p
);
1541 #if HFS_CONFIG_KEY_ROLL
1542 // The HFSIOC_KEY_ROLL fsctl does its own access checks
1543 if (ap
->a_command
!= HFSIOC_KEY_ROLL
)
1547 if ((error
= cp_handle_vnop(vp
, CP_WRITE_ACCESS
, 0)) != 0) {
1551 #endif /* CONFIG_PROTECT */
1553 switch (ap
->a_command
) {
1555 case HFSIOC_GETPATH
:
1557 struct vnode
*file_vp
;
1562 #ifdef VN_GETPATH_NEW
1564 #else // VN_GETPATH_NEW
1566 #endif // VN_GETPATH_NEW
1568 /* Caller must be owner of file system. */
1569 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1570 if (suser(cred
, NULL
) &&
1571 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1574 /* Target vnode must be file system's root. */
1575 if (!vnode_isvroot(vp
)) {
1578 bufptr
= (char *)ap
->a_data
;
1579 cnid
= strtoul(bufptr
, NULL
, 10);
1580 if (ap
->a_fflag
& HFS_GETPATH_VOLUME_RELATIVE
) {
1581 flags
|= BUILDPATH_VOLUME_RELATIVE
;
1584 /* We need to call hfs_vfs_vget to leverage the code that will
1585 * fix the origin list for us if needed, as opposed to calling
1586 * hfs_vget, since we will need the parent for vn_getpath_ext call.
1589 if ((error
= hfs_vfs_vget(HFSTOVFS(hfsmp
), cnid
, &file_vp
, context
))) {
1593 outlen
= sizeof(pathname_t
);
1594 error
= vn_getpath_ext(file_vp
, NULLVP
, bufptr
, &outlen
, flags
);
1600 case HFSIOC_SET_MAX_DEFRAG_SIZE
:
1602 int error
= 0; /* Assume success */
1603 u_int32_t maxsize
= 0;
1605 if (vnode_vfsisrdonly(vp
)) {
1608 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1609 if (!kauth_cred_issuser(cred
)) {
1610 return (EACCES
); /* must be root */
1613 maxsize
= *(u_int32_t
*)ap
->a_data
;
1615 hfs_lock_mount(hfsmp
);
1616 if (maxsize
> HFS_MAX_DEFRAG_SIZE
) {
1620 hfsmp
->hfs_defrag_max
= maxsize
;
1622 hfs_unlock_mount(hfsmp
);
1627 case HFSIOC_FORCE_ENABLE_DEFRAG
:
1629 int error
= 0; /* Assume success */
1630 u_int32_t do_enable
= 0;
1632 if (vnode_vfsisrdonly(vp
)) {
1635 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1636 if (!kauth_cred_issuser(cred
)) {
1637 return (EACCES
); /* must be root */
1640 do_enable
= *(u_int32_t
*)ap
->a_data
;
1642 hfs_lock_mount(hfsmp
);
1643 if (do_enable
!= 0) {
1644 hfsmp
->hfs_defrag_nowait
= 1;
1650 hfs_unlock_mount(hfsmp
);
1656 case HFSIOC_TRANSFER_DOCUMENT_ID
:
1658 struct cnode
*cp
= NULL
;
1660 u_int32_t to_fd
= *(u_int32_t
*)ap
->a_data
;
1661 struct fileproc
*to_fp
;
1662 struct vnode
*to_vp
;
1663 struct cnode
*to_cp
;
1667 if ((error
= fp_getfvp(p
, to_fd
, &to_fp
, &to_vp
)) != 0) {
1668 //printf("could not get the vnode for fd %d (err %d)\n", to_fd, error);
1671 if ( (error
= vnode_getwithref(to_vp
)) ) {
1676 if (VTOHFS(to_vp
) != hfsmp
) {
1678 goto transfer_cleanup
;
1681 int need_unlock
= 1;
1682 to_cp
= VTOC(to_vp
);
1683 error
= hfs_lockpair(cp
, to_cp
, HFS_EXCLUSIVE_LOCK
);
1685 //printf("could not lock the pair of cnodes (error %d)\n", error);
1686 goto transfer_cleanup
;
1689 if (!(cp
->c_bsdflags
& UF_TRACKED
)) {
1691 } else if (to_cp
->c_bsdflags
& UF_TRACKED
) {
1693 // if the destination is already tracked, return an error
1694 // as otherwise it's a silent deletion of the target's
1698 } else if (S_ISDIR(cp
->c_attr
.ca_mode
) || S_ISREG(cp
->c_attr
.ca_mode
) || S_ISLNK(cp
->c_attr
.ca_mode
)) {
1700 // we can use the FndrExtendedFileInfo because the doc-id is the first
1701 // thing in both it and the ExtendedDirInfo struct which is fixed in
1702 // format and can not change layout
1704 struct FndrExtendedFileInfo
*f_extinfo
= (struct FndrExtendedFileInfo
*)((u_int8_t
*)cp
->c_finderinfo
+ 16);
1705 struct FndrExtendedFileInfo
*to_extinfo
= (struct FndrExtendedFileInfo
*)((u_int8_t
*)to_cp
->c_finderinfo
+ 16);
1707 if (f_extinfo
->document_id
== 0) {
1710 hfs_unlockpair(cp
, to_cp
); // have to unlock to be able to get a new-id
1712 if ((error
= hfs_generate_document_id(hfsmp
, &new_id
)) == 0) {
1714 // re-lock the pair now that we have the document-id
1716 hfs_lockpair(cp
, to_cp
, HFS_EXCLUSIVE_LOCK
);
1717 f_extinfo
->document_id
= new_id
;
1719 goto transfer_cleanup
;
1723 to_extinfo
->document_id
= f_extinfo
->document_id
;
1724 f_extinfo
->document_id
= 0;
1725 //printf("TRANSFERRING: doc-id %d from ino %d to ino %d\n", to_extinfo->document_id, cp->c_fileid, to_cp->c_fileid);
1727 // make sure the destination is also UF_TRACKED
1728 to_cp
->c_bsdflags
|= UF_TRACKED
;
1729 cp
->c_bsdflags
&= ~UF_TRACKED
;
1731 // mark the cnodes dirty
1732 cp
->c_flag
|= C_MODIFIED
;
1733 to_cp
->c_flag
|= C_MODIFIED
;
1736 if ((error
= hfs_start_transaction(hfsmp
)) == 0) {
1738 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_CATALOG
, HFS_EXCLUSIVE_LOCK
);
1740 (void) cat_update(hfsmp
, &cp
->c_desc
, &cp
->c_attr
, NULL
, NULL
);
1741 (void) cat_update(hfsmp
, &to_cp
->c_desc
, &to_cp
->c_attr
, NULL
, NULL
);
1743 hfs_systemfile_unlock (hfsmp
, lockflags
);
1744 (void) hfs_end_transaction(hfsmp
);
1747 add_fsevent(FSE_DOCID_CHANGED
, context
,
1748 FSE_ARG_DEV
, hfsmp
->hfs_raw_dev
,
1749 FSE_ARG_INO
, (ino64_t
)cp
->c_fileid
, // src inode #
1750 FSE_ARG_INO
, (ino64_t
)to_cp
->c_fileid
, // dst inode #
1751 FSE_ARG_INT32
, to_extinfo
->document_id
,
1754 hfs_unlockpair(cp
, to_cp
); // unlock this so we can send the fsevents
1757 if (need_fsevent(FSE_STAT_CHANGED
, vp
)) {
1758 add_fsevent(FSE_STAT_CHANGED
, context
, FSE_ARG_VNODE
, vp
, FSE_ARG_DONE
);
1760 if (need_fsevent(FSE_STAT_CHANGED
, to_vp
)) {
1761 add_fsevent(FSE_STAT_CHANGED
, context
, FSE_ARG_VNODE
, to_vp
, FSE_ARG_DONE
);
1766 hfs_unlockpair(cp
, to_cp
);
1778 case HFSIOC_PREV_LINK
:
1779 case HFSIOC_NEXT_LINK
:
1786 /* Caller must be owner of file system. */
1787 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1788 if (suser(cred
, NULL
) &&
1789 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1792 /* Target vnode must be file system's root. */
1793 if (!vnode_isvroot(vp
)) {
1796 linkfileid
= *(cnid_t
*)ap
->a_data
;
1797 if (linkfileid
< kHFSFirstUserCatalogNodeID
) {
1800 if ((error
= hfs_lookup_siblinglinks(hfsmp
, linkfileid
, &prevlinkid
, &nextlinkid
))) {
1803 if (ap
->a_command
== HFSIOC_NEXT_LINK
) {
1804 *(cnid_t
*)ap
->a_data
= nextlinkid
;
1806 *(cnid_t
*)ap
->a_data
= prevlinkid
;
1811 case HFSIOC_RESIZE_PROGRESS
: {
1813 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1814 if (suser(cred
, NULL
) &&
1815 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1816 return (EACCES
); /* must be owner of file system */
1818 if (!vnode_isvroot(vp
)) {
1821 /* file system must not be mounted read-only */
1822 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
1826 return hfs_resize_progress(hfsmp
, (u_int32_t
*)ap
->a_data
);
1829 case HFSIOC_RESIZE_VOLUME
: {
1834 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1835 if (suser(cred
, NULL
) &&
1836 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1837 return (EACCES
); /* must be owner of file system */
1839 if (!vnode_isvroot(vp
)) {
1843 /* filesystem must not be mounted read only */
1844 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
1847 newsize
= *(u_int64_t
*)ap
->a_data
;
1848 cursize
= (u_int64_t
)hfsmp
->totalBlocks
* (u_int64_t
)hfsmp
->blockSize
;
1850 if (newsize
== cursize
) {
1853 IOBSDMountChange(hfsmp
->hfs_mp
, kIOMountChangeWillResize
);
1854 if (newsize
> cursize
) {
1855 ret
= hfs_extendfs(hfsmp
, *(u_int64_t
*)ap
->a_data
, context
);
1857 ret
= hfs_truncatefs(hfsmp
, *(u_int64_t
*)ap
->a_data
, context
);
1859 IOBSDMountChange(hfsmp
->hfs_mp
, kIOMountChangeDidResize
);
1862 case HFSIOC_CHANGE_NEXT_ALLOCATION
: {
1863 int error
= 0; /* Assume success */
1866 if (vnode_vfsisrdonly(vp
)) {
1869 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1870 if (suser(cred
, NULL
) &&
1871 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1872 return (EACCES
); /* must be owner of file system */
1874 if (!vnode_isvroot(vp
)) {
1877 hfs_lock_mount(hfsmp
);
1878 location
= *(u_int32_t
*)ap
->a_data
;
1879 if ((location
>= hfsmp
->allocLimit
) &&
1880 (location
!= HFS_NO_UPDATE_NEXT_ALLOCATION
)) {
1882 goto fail_change_next_allocation
;
1884 /* Return previous value. */
1885 *(u_int32_t
*)ap
->a_data
= hfsmp
->nextAllocation
;
1886 if (location
== HFS_NO_UPDATE_NEXT_ALLOCATION
) {
1887 /* On magic value for location, set nextAllocation to next block
1888 * after metadata zone and set flag in mount structure to indicate
1889 * that nextAllocation should not be updated again.
1891 if (hfsmp
->hfs_metazone_end
!= 0) {
1892 HFS_UPDATE_NEXT_ALLOCATION(hfsmp
, hfsmp
->hfs_metazone_end
+ 1);
1894 hfsmp
->hfs_flags
|= HFS_SKIP_UPDATE_NEXT_ALLOCATION
;
1896 hfsmp
->hfs_flags
&= ~HFS_SKIP_UPDATE_NEXT_ALLOCATION
;
1897 HFS_UPDATE_NEXT_ALLOCATION(hfsmp
, location
);
1899 MarkVCBDirty(hfsmp
);
1900 fail_change_next_allocation
:
1901 hfs_unlock_mount(hfsmp
);
1906 case HFSIOC_SETBACKINGSTOREINFO
: {
1907 struct vnode
* di_vp
;
1908 struct hfs_backingstoreinfo
*bsdata
;
1911 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
1914 if (hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) {
1917 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1918 if (suser(cred
, NULL
) &&
1919 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1920 return (EACCES
); /* must be owner of file system */
1922 bsdata
= (struct hfs_backingstoreinfo
*)ap
->a_data
;
1923 if (bsdata
== NULL
) {
1926 if ((error
= file_vnode(bsdata
->backingfd
, &di_vp
))) {
1929 if ((error
= vnode_getwithref(di_vp
))) {
1930 file_drop(bsdata
->backingfd
);
1934 if (vnode_mount(vp
) == vnode_mount(di_vp
)) {
1935 (void)vnode_put(di_vp
);
1936 file_drop(bsdata
->backingfd
);
1940 // Dropped in unmount
1943 hfs_lock_mount(hfsmp
);
1944 hfsmp
->hfs_backingvp
= di_vp
;
1945 hfsmp
->hfs_flags
|= HFS_HAS_SPARSE_DEVICE
;
1946 hfsmp
->hfs_sparsebandblks
= bsdata
->bandsize
/ hfsmp
->blockSize
* 4;
1947 hfs_unlock_mount(hfsmp
);
1949 /* We check the MNTK_VIRTUALDEV bit instead of marking the dependent process */
1952 * If the sparse image is on a sparse image file (as opposed to a sparse
1953 * bundle), then we may need to limit the free space to the maximum size
1954 * of a file on that volume. So we query (using pathconf), and if we get
1955 * a meaningful result, we cache the number of blocks for later use in
1958 hfsmp
->hfs_backingfs_maxblocks
= 0;
1959 if (vnode_vtype(di_vp
) == VREG
) {
1962 terr
= vn_pathconf(di_vp
, _PC_FILESIZEBITS
, &hostbits
, context
);
1963 if (terr
== 0 && hostbits
!= 0 && hostbits
< 64) {
1964 u_int64_t hostfilesizemax
= ((u_int64_t
)1) << hostbits
;
1966 hfsmp
->hfs_backingfs_maxblocks
= hostfilesizemax
/ hfsmp
->blockSize
;
1970 /* The free extent cache is managed differently for sparse devices.
1971 * There is a window between which the volume is mounted and the
1972 * device is marked as sparse, so the free extent cache for this
1973 * volume is currently initialized as normal volume (sorted by block
1974 * count). Reset the cache so that it will be rebuilt again
1975 * for sparse device (sorted by start block).
1977 ResetVCBFreeExtCache(hfsmp
);
1979 (void)vnode_put(di_vp
);
1980 file_drop(bsdata
->backingfd
);
1984 case HFSIOC_CLRBACKINGSTOREINFO
: {
1985 struct vnode
* tmpvp
;
1987 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1988 if (suser(cred
, NULL
) &&
1989 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1990 return (EACCES
); /* must be owner of file system */
1992 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
1996 if ((hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) &&
1997 hfsmp
->hfs_backingvp
) {
1999 hfs_lock_mount(hfsmp
);
2000 hfsmp
->hfs_flags
&= ~HFS_HAS_SPARSE_DEVICE
;
2001 tmpvp
= hfsmp
->hfs_backingvp
;
2002 hfsmp
->hfs_backingvp
= NULLVP
;
2003 hfsmp
->hfs_sparsebandblks
= 0;
2004 hfs_unlock_mount(hfsmp
);
2010 #endif /* HFS_SPARSE_DEV */
2012 /* Change the next CNID stored in the VH */
2013 case HFSIOC_CHANGE_NEXTCNID
: {
2014 int error
= 0; /* Assume success */
2019 if (vnode_vfsisrdonly(vp
)) {
2022 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
2023 if (suser(cred
, NULL
) &&
2024 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
2025 return (EACCES
); /* must be owner of file system */
2028 fileid
= *(u_int32_t
*)ap
->a_data
;
2030 /* Must have catalog lock excl. to advance the CNID pointer */
2031 lockflags
= hfs_systemfile_lock (hfsmp
, SFL_CATALOG
, HFS_EXCLUSIVE_LOCK
);
2033 hfs_lock_mount(hfsmp
);
2035 /* If it is less than the current next CNID, force the wraparound bit to be set */
2036 if (fileid
< hfsmp
->vcbNxtCNID
) {
2040 /* Return previous value. */
2041 *(u_int32_t
*)ap
->a_data
= hfsmp
->vcbNxtCNID
;
2043 hfsmp
->vcbNxtCNID
= fileid
;
2046 hfsmp
->vcbAtrb
|= kHFSCatalogNodeIDsReusedMask
;
2049 MarkVCBDirty(hfsmp
);
2050 hfs_unlock_mount(hfsmp
);
2051 hfs_systemfile_unlock (hfsmp
, lockflags
);
2059 mp
= vnode_mount(vp
);
2060 hfsmp
= VFSTOHFS(mp
);
2065 vfsp
= vfs_statfs(mp
);
2067 if (kauth_cred_getuid(cred
) != vfsp
->f_owner
&&
2068 !kauth_cred_issuser(cred
))
2071 return hfs_freeze(hfsmp
);
2075 vfsp
= vfs_statfs(vnode_mount(vp
));
2076 if (kauth_cred_getuid(cred
) != vfsp
->f_owner
&&
2077 !kauth_cred_issuser(cred
))
2080 return hfs_thaw(hfsmp
, current_proc());
2083 case HFSIOC_EXT_BULKACCESS32
:
2084 case HFSIOC_EXT_BULKACCESS64
: {
2087 if (hfsmp
->hfs_flags
& HFS_STANDARD
) {
2093 size
= sizeof(struct user64_ext_access_t
);
2095 size
= sizeof(struct user32_ext_access_t
);
2098 return do_bulk_access_check(hfsmp
, vp
, ap
, size
, context
);
2101 case HFSIOC_SET_XATTREXTENTS_STATE
: {
2104 if (ap
->a_data
== NULL
) {
2108 state
= *(int *)ap
->a_data
;
2110 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2114 /* Super-user can enable or disable extent-based extended
2115 * attribute support on a volume
2116 * Note: Starting Mac OS X 10.7, extent-based extended attributes
2117 * are enabled by default, so any change will be transient only
2118 * till the volume is remounted.
2120 if (!kauth_cred_issuser(kauth_cred_get())) {
2123 if (state
== 0 || state
== 1)
2124 return hfs_set_volxattr(hfsmp
, HFSIOC_SET_XATTREXTENTS_STATE
, state
);
2129 case F_SETSTATICCONTENT
: {
2131 int enable_static
= 0;
2132 struct cnode
*cp
= NULL
;
2134 * lock the cnode, decorate the cnode flag, and bail out.
2135 * VFS should have already authenticated the caller for us.
2140 * Note that even though ap->a_data is of type caddr_t,
2141 * the fcntl layer at the syscall handler will pass in NULL
2142 * or 1 depending on what the argument supplied to the fcntl
2143 * was. So it is in fact correct to check the ap->a_data
2144 * argument for zero or non-zero value when deciding whether or not
2145 * to enable the static bit in the cnode.
2149 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2154 error
= hfs_lock (cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2156 if (enable_static
) {
2157 cp
->c_flag
|= C_SSD_STATIC
;
2160 cp
->c_flag
&= ~C_SSD_STATIC
;
2167 case F_SET_GREEDY_MODE
: {
2169 int enable_greedy_mode
= 0;
2170 struct cnode
*cp
= NULL
;
2172 * lock the cnode, decorate the cnode flag, and bail out.
2173 * VFS should have already authenticated the caller for us.
2178 * Note that even though ap->a_data is of type caddr_t,
2179 * the fcntl layer at the syscall handler will pass in NULL
2180 * or 1 depending on what the argument supplied to the fcntl
2181 * was. So it is in fact correct to check the ap->a_data
2182 * argument for zero or non-zero value when deciding whether or not
2183 * to enable the greedy mode bit in the cnode.
2185 enable_greedy_mode
= 1;
2187 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2192 error
= hfs_lock (cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2194 if (enable_greedy_mode
) {
2195 cp
->c_flag
|= C_SSD_GREEDY_MODE
;
2198 cp
->c_flag
&= ~C_SSD_GREEDY_MODE
;
2207 uint32_t iotypeflag
= 0;
2209 struct cnode
*cp
= NULL
;
2211 * lock the cnode, decorate the cnode flag, and bail out.
2212 * VFS should have already authenticated the caller for us.
2215 if (ap
->a_data
== NULL
) {
2220 * Note that even though ap->a_data is of type caddr_t, we
2221 * can only use 32 bits of flag values.
2223 iotypeflag
= (uint32_t) ap
->a_data
;
2224 switch (iotypeflag
) {
2225 case F_IOTYPE_ISOCHRONOUS
:
2232 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2237 error
= hfs_lock (cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2239 switch (iotypeflag
) {
2240 case F_IOTYPE_ISOCHRONOUS
:
2241 cp
->c_flag
|= C_IO_ISOCHRONOUS
;
2251 case F_MAKECOMPRESSED
: {
2253 uint32_t gen_counter
;
2254 struct cnode
*cp
= NULL
;
2255 int reset_decmp
= 0;
2257 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2262 * acquire & lock the cnode.
2263 * VFS should have already authenticated the caller for us.
2268 * Cast the pointer into a uint32_t so we can extract the
2269 * supplied generation counter.
2271 gen_counter
= *((uint32_t*)ap
->a_data
);
2279 /* Grab truncate lock first; we may truncate the file */
2280 hfs_lock_truncate (cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2282 error
= hfs_lock (cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2284 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
2288 /* Are there any other usecounts/FDs? */
2289 if (vnode_isinuse(vp
, 1)) {
2291 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
2295 /* now we have the cnode locked down; Validate arguments */
2296 if (cp
->c_attr
.ca_flags
& (UF_IMMUTABLE
| UF_COMPRESSED
)) {
2297 /* EINVAL if you are trying to manipulate an IMMUTABLE file */
2299 hfs_unlock_truncate (cp
, HFS_LOCK_DEFAULT
);
2303 if ((hfs_get_gencount (cp
)) == gen_counter
) {
2305 * OK, the gen_counter matched. Go for it:
2306 * Toggle state bits, truncate file, and suppress mtime update
2309 cp
->c_bsdflags
|= UF_COMPRESSED
;
2311 error
= hfs_truncate(vp
, 0, IO_NDELAY
, HFS_TRUNCATE_SKIPTIMES
,
2318 /* Unlock cnode before executing decmpfs ; they may need to get an EA */
2322 * Reset the decmp state while still holding the truncate lock. We need to
2323 * serialize here against a listxattr on this node which may occur at any
2326 * Even if '0/skiplock' is passed in 2nd argument to hfs_file_is_compressed,
2327 * that will still potentially require getting the com.apple.decmpfs EA. If the
2328 * EA is required, then we can't hold the cnode lock, because the getxattr call is
2329 * generic(through VFS), and can't pass along any info telling it that we're already
2330 * holding it (the lock). If we don't serialize, then we risk listxattr stopping
2331 * and trying to fill in the hfs_file_is_compressed info during the callback
2332 * operation, which will result in deadlock against the b-tree node.
2334 * So, to serialize against listxattr (which will grab buf_t meta references on
2335 * the b-tree blocks), we hold the truncate lock as we're manipulating the
2338 if ((reset_decmp
) && (error
== 0)) {
2339 decmpfs_cnode
*dp
= VTOCMP (vp
);
2341 decmpfs_cnode_set_vnode_state(dp
, FILE_TYPE_UNKNOWN
, 0);
2344 /* Initialize the decmpfs node as needed */
2345 (void) hfs_file_is_compressed (cp
, 0); /* ok to take lock */
2348 hfs_unlock_truncate (cp
, HFS_LOCK_DEFAULT
);
2354 case F_SETBACKINGSTORE
: {
2359 * See comment in F_SETSTATICCONTENT re: using
2360 * a null check for a_data
2363 error
= hfs_set_backingstore (vp
, 1);
2366 error
= hfs_set_backingstore (vp
, 0);
2372 case F_GETPATH_MTMINFO
: {
2375 int *data
= (int*) ap
->a_data
;
2377 /* Ask if this is a backingstore vnode */
2378 error
= hfs_is_backingstore (vp
, data
);
2386 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2389 error
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2391 error
= hfs_fsync(vp
, MNT_WAIT
, HFS_FSYNC_FULL
, p
);
2392 hfs_unlock(VTOC(vp
));
2398 case F_BARRIERFSYNC
: {
2401 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2404 error
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2406 error
= hfs_fsync(vp
, MNT_WAIT
, HFS_FSYNC_BARRIER
, p
);
2407 hfs_unlock(VTOC(vp
));
2414 register struct cnode
*cp
;
2417 if (!vnode_isreg(vp
))
2420 error
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2424 * used by regression test to determine if
2425 * all the dirty pages (via write) have been cleaned
2426 * after a call to 'fsysnc'.
2428 error
= is_file_clean(vp
, VTOF(vp
)->ff_size
);
2435 register struct radvisory
*ra
;
2436 struct filefork
*fp
;
2439 if (!vnode_isreg(vp
))
2442 ra
= (struct radvisory
*)(ap
->a_data
);
2445 /* Protect against a size change. */
2446 hfs_lock_truncate(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2450 if (uncompressed_size
== -1) {
2451 /* fetching the uncompressed size failed above, so return the error */
2452 error
= decmpfs_error
;
2453 } else if (ra
->ra_offset
>= uncompressed_size
) {
2456 error
= advisory_read(vp
, uncompressed_size
, ra
->ra_offset
, ra
->ra_count
);
2459 #endif /* HFS_COMPRESSION */
2460 if (ra
->ra_offset
>= fp
->ff_size
) {
2463 error
= advisory_read(vp
, fp
->ff_size
, ra
->ra_offset
, ra
->ra_count
);
2466 hfs_unlock_truncate(VTOC(vp
), HFS_LOCK_DEFAULT
);
2470 case HFSIOC_GET_VOL_CREATE_TIME_32
: {
2471 *(user32_time_t
*)(ap
->a_data
) = (user32_time_t
) (to_bsd_time(VTOVCB(vp
)->localCreateDate
));
2475 case HFSIOC_GET_VOL_CREATE_TIME_64
: {
2476 *(user64_time_t
*)(ap
->a_data
) = (user64_time_t
) (to_bsd_time(VTOVCB(vp
)->localCreateDate
));
2480 case SPOTLIGHT_IOC_GET_MOUNT_TIME
:
2481 *(uint32_t *)ap
->a_data
= hfsmp
->hfs_mount_time
;
2484 case SPOTLIGHT_IOC_GET_LAST_MTIME
:
2485 *(uint32_t *)ap
->a_data
= hfsmp
->hfs_last_mounted_mtime
;
2488 case HFSIOC_GET_VERY_LOW_DISK
:
2489 *(uint32_t*)ap
->a_data
= hfsmp
->hfs_freespace_notify_dangerlimit
;
2492 case HFSIOC_SET_VERY_LOW_DISK
:
2493 if (*(uint32_t *)ap
->a_data
>= hfsmp
->hfs_freespace_notify_warninglimit
) {
2497 hfsmp
->hfs_freespace_notify_dangerlimit
= *(uint32_t *)ap
->a_data
;
2500 case HFSIOC_GET_LOW_DISK
:
2501 *(uint32_t*)ap
->a_data
= hfsmp
->hfs_freespace_notify_warninglimit
;
2504 case HFSIOC_SET_LOW_DISK
:
2505 if ( *(uint32_t *)ap
->a_data
>= hfsmp
->hfs_freespace_notify_desiredlevel
2506 || *(uint32_t *)ap
->a_data
<= hfsmp
->hfs_freespace_notify_dangerlimit
) {
2511 hfsmp
->hfs_freespace_notify_warninglimit
= *(uint32_t *)ap
->a_data
;
2514 /* The following two fsctls were ported from apfs. */
2515 case APFSIOC_GET_NEAR_LOW_DISK
:
2516 *(uint32_t*)ap
->a_data
= hfsmp
->hfs_freespace_notify_nearwarninglimit
;
2519 case APFSIOC_SET_NEAR_LOW_DISK
:
2520 if ( *(uint32_t *)ap
->a_data
>= hfsmp
->hfs_freespace_notify_desiredlevel
2521 || *(uint32_t *)ap
->a_data
<= hfsmp
->hfs_freespace_notify_warninglimit
) {
2525 hfsmp
->hfs_freespace_notify_nearwarninglimit
= *(uint32_t *)ap
->a_data
;
2528 case HFSIOC_GET_DESIRED_DISK
:
2529 *(uint32_t*)ap
->a_data
= hfsmp
->hfs_freespace_notify_desiredlevel
;
2532 case HFSIOC_SET_DESIRED_DISK
:
2533 if (*(uint32_t *)ap
->a_data
<= hfsmp
->hfs_freespace_notify_warninglimit
) {
2537 hfsmp
->hfs_freespace_notify_desiredlevel
= *(uint32_t *)ap
->a_data
;
2540 case HFSIOC_VOLUME_STATUS
:
2541 *(uint32_t *)ap
->a_data
= hfsmp
->hfs_notification_conditions
;
2544 case HFS_SET_BOOT_INFO
:
2545 if (!vnode_isvroot(vp
))
2547 if (!kauth_cred_issuser(cred
) && (kauth_cred_getuid(cred
) != vfs_statfs(HFSTOVFS(hfsmp
))->f_owner
))
2548 return(EACCES
); /* must be superuser or owner of filesystem */
2549 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2552 hfs_lock_mount (hfsmp
);
2553 bcopy(ap
->a_data
, &hfsmp
->vcbFndrInfo
, sizeof(hfsmp
->vcbFndrInfo
));
2554 /* Null out the cached UUID, to be safe */
2555 uuid_clear (hfsmp
->hfs_full_uuid
);
2556 hfs_unlock_mount (hfsmp
);
2557 (void) hfs_flushvolumeheader(hfsmp
, HFS_FVH_WAIT
);
2560 case HFS_GET_BOOT_INFO
:
2561 if (!vnode_isvroot(vp
))
2563 hfs_lock_mount (hfsmp
);
2564 bcopy(&hfsmp
->vcbFndrInfo
, ap
->a_data
, sizeof(hfsmp
->vcbFndrInfo
));
2565 hfs_unlock_mount(hfsmp
);
2568 /* case HFS_MARK_BOOT_CORRUPT: _IO are the same */
2569 case HFSIOC_MARK_BOOT_CORRUPT
:
2570 /* Mark the boot volume corrupt by setting
2571 * kHFSVolumeInconsistentBit in the volume header. This will
2572 * force fsck_hfs on next mount.
2574 if (!kauth_cred_issuser(kauth_cred_get())) {
2578 /* Allowed only on the root vnode of the boot volume */
2579 if (!(vfs_flags(HFSTOVFS(hfsmp
)) & MNT_ROOTFS
) ||
2580 !vnode_isvroot(vp
)) {
2583 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2586 printf ("hfs_vnop_ioctl: Marking the boot volume corrupt.\n");
2587 hfs_mark_inconsistent(hfsmp
, HFS_FSCK_FORCED
);
2590 case HFSIOC_GET_JOURNAL_INFO
:
2591 jip
= (struct hfs_journal_info
*)ap
->a_data
;
2596 if (hfsmp
->jnl
== NULL
) {
2600 jnl_start
= hfs_blk_to_bytes(hfsmp
->jnl_start
, hfsmp
->blockSize
) + hfsmp
->hfsPlusIOPosOffset
;
2601 jnl_size
= hfsmp
->jnl_size
;
2604 jip
->jstart
= jnl_start
;
2605 jip
->jsize
= jnl_size
;
2608 case HFSIOC_SET_ALWAYS_ZEROFILL
: {
2609 struct cnode
*cp
= VTOC(vp
);
2611 if (*(int *)ap
->a_data
) {
2612 cp
->c_flag
|= C_ALWAYS_ZEROFILL
;
2614 cp
->c_flag
&= ~C_ALWAYS_ZEROFILL
;
2619 /* case HFS_DISABLE_METAZONE: _IO are the same */
2620 case HFSIOC_DISABLE_METAZONE
: {
2621 /* Only root can disable metadata zone */
2622 if (!kauth_cred_issuser(kauth_cred_get())) {
2625 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2629 /* Disable metadata zone now */
2630 (void) hfs_metadatazone_init(hfsmp
, true);
2631 printf ("hfs: Disabling metadata zone on %s\n", hfsmp
->vcbVN
);
2636 case HFSIOC_FSINFO_METADATA_BLOCKS
: {
2638 struct hfsinfo_metadata
*hinfo
;
2640 hinfo
= (struct hfsinfo_metadata
*)ap
->a_data
;
2642 /* Get information about number of metadata blocks */
2643 error
= hfs_getinfo_metadata_blocks(hfsmp
, hinfo
);
2651 case HFSIOC_GET_FSINFO
: {
2652 hfs_fsinfo
*fsinfo
= (hfs_fsinfo
*)ap
->a_data
;
2654 /* Only root is allowed to get fsinfo */
2655 if (!kauth_cred_issuser(kauth_cred_get())) {
2660 * Make sure that the caller's version number matches with
2661 * the kernel's version number. This will make sure that
2662 * if the structures being read/written into are changed
2663 * by the kernel, the caller will not read incorrect data.
2665 * The first three fields --- request_type, version and
2666 * flags are same for all the hfs_fsinfo structures, so
2667 * we can access the version number by assuming any
2668 * structure for now.
2670 if (fsinfo
->header
.version
!= HFS_FSINFO_VERSION
) {
2674 /* Make sure that the current file system is not marked inconsistent */
2675 if (hfsmp
->vcbAtrb
& kHFSVolumeInconsistentMask
) {
2679 return hfs_get_fsinfo(hfsmp
, ap
->a_data
);
2682 case HFSIOC_CS_FREESPACE_TRIM
: {
2686 /* Only root allowed */
2687 if (!kauth_cred_issuser(kauth_cred_get())) {
2692 * This core functionality is similar to hfs_scan_blocks().
2693 * The main difference is that hfs_scan_blocks() is called
2694 * as part of mount where we are assured that the journal is
2695 * empty to start with. This fcntl() can be called on a
2696 * mounted volume, therefore it has to flush the content of
2697 * the journal as well as ensure the state of summary table.
2699 * This fcntl scans over the entire allocation bitmap,
2700 * creates list of all the free blocks, and issues TRIM
2701 * down to the underlying device. This can take long time
2702 * as it can generate up to 512MB of read I/O.
2705 if ((hfsmp
->hfs_flags
& HFS_SUMMARY_TABLE
) == 0) {
2706 error
= hfs_init_summary(hfsmp
);
2708 printf("hfs: fsctl() could not initialize summary table for %s\n", hfsmp
->vcbVN
);
2714 * The journal maintains list of recently deallocated blocks to
2715 * issue DKIOCUNMAPs when the corresponding journal transaction is
2716 * flushed to the disk. To avoid any race conditions, we only
2717 * want one active trim list and only one thread issuing DKIOCUNMAPs.
2718 * Therefore we make sure that the journal trim list is sync'ed,
2719 * empty, and not modifiable for the duration of our scan.
2721 * Take the journal lock before flushing the journal to the disk.
2722 * We will keep on holding the journal lock till we don't get the
2723 * bitmap lock to make sure that no new journal transactions can
2724 * start. This will make sure that the journal trim list is not
2725 * modified after the journal flush and before getting bitmap lock.
2726 * We can release the journal lock after we acquire the bitmap
2727 * lock as it will prevent any further block deallocations.
2729 hfs_journal_lock(hfsmp
);
2731 /* Flush the journal and wait for all I/Os to finish up */
2732 error
= hfs_flush(hfsmp
, HFS_FLUSH_JOURNAL_META
);
2734 hfs_journal_unlock(hfsmp
);
2738 /* Take bitmap lock to ensure it is not being modified */
2739 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_BITMAP
, HFS_EXCLUSIVE_LOCK
);
2741 /* Release the journal lock */
2742 hfs_journal_unlock(hfsmp
);
2745 * ScanUnmapBlocks reads the bitmap in large block size
2746 * (up to 1MB) unlike the runtime which reads the bitmap
2747 * in the 4K block size. This can cause buf_t collisions
2748 * and potential data corruption. To avoid this, we
2749 * invalidate all the existing buffers associated with
2750 * the bitmap vnode before scanning it.
2752 * Note: ScanUnmapBlock() cleans up all the buffers
2753 * after itself, so there won't be any large buffers left
2754 * for us to clean up after it returns.
2756 error
= buf_invalidateblks(hfsmp
->hfs_allocation_vp
, 0, 0, 0);
2758 hfs_systemfile_unlock(hfsmp
, lockflags
);
2762 /* Traverse bitmap and issue DKIOCUNMAPs */
2763 error
= ScanUnmapBlocks(hfsmp
);
2764 hfs_systemfile_unlock(hfsmp
, lockflags
);
2772 case HFSIOC_SET_HOTFILE_STATE
: {
2774 struct cnode
*cp
= VTOC(vp
);
2775 uint32_t hf_state
= *((uint32_t*)ap
->a_data
);
2776 uint32_t num_unpinned
= 0;
2778 error
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2783 // printf("hfs: setting hotfile state %d on %s\n", hf_state, vp->v_name);
2784 if (hf_state
== HFS_MARK_FASTDEVCANDIDATE
) {
2785 vnode_setfastdevicecandidate(vp
);
2787 cp
->c_attr
.ca_recflags
|= kHFSFastDevCandidateMask
;
2788 cp
->c_attr
.ca_recflags
&= ~kHFSDoNotFastDevPinMask
;
2789 cp
->c_flag
|= C_MODIFIED
;
2790 } else if (hf_state
== HFS_UNMARK_FASTDEVCANDIDATE
|| hf_state
== HFS_NEVER_FASTDEVCANDIDATE
) {
2791 vnode_clearfastdevicecandidate(vp
);
2792 hfs_removehotfile(vp
);
2794 if (cp
->c_attr
.ca_recflags
& kHFSFastDevPinnedMask
) {
2795 hfs_pin_vnode(hfsmp
, vp
, HFS_UNPIN_IT
, &num_unpinned
);
2798 if (hf_state
== HFS_NEVER_FASTDEVCANDIDATE
) {
2799 cp
->c_attr
.ca_recflags
|= kHFSDoNotFastDevPinMask
;
2801 cp
->c_attr
.ca_recflags
&= ~(kHFSFastDevCandidateMask
|kHFSFastDevPinnedMask
);
2802 cp
->c_flag
|= C_MODIFIED
;
2808 if (num_unpinned
!= 0) {
2809 lck_mtx_lock(&hfsmp
->hfc_mutex
);
2810 hfsmp
->hfs_hotfile_freeblks
+= num_unpinned
;
2811 lck_mtx_unlock(&hfsmp
->hfc_mutex
);
2818 case HFSIOC_REPIN_HOTFILE_STATE
: {
2820 uint32_t repin_what
= *((uint32_t*)ap
->a_data
);
2822 /* Only root allowed */
2823 if (!kauth_cred_issuser(kauth_cred_get())) {
2827 if (!(hfsmp
->hfs_flags
& (HFS_CS_METADATA_PIN
| HFS_CS_HOTFILE_PIN
))) {
2828 // this system is neither regular Fusion or Cooperative Fusion
2829 // so this fsctl makes no sense.
2834 // After a converting a CoreStorage volume to be encrypted, the
2835 // extents could have moved around underneath us. This call
2836 // allows corestoraged to re-pin everything that should be
2837 // pinned (it would happen on the next reboot too but that could
2838 // be a long time away).
2840 if ((repin_what
& HFS_REPIN_METADATA
) && (hfsmp
->hfs_flags
& HFS_CS_METADATA_PIN
)) {
2841 hfs_pin_fs_metadata(hfsmp
);
2843 if ((repin_what
& HFS_REPIN_USERDATA
) && (hfsmp
->hfs_flags
& HFS_CS_HOTFILE_PIN
)) {
2844 hfs_repin_hotfiles(hfsmp
);
2846 if ((repin_what
& HFS_REPIN_USERDATA
) && (hfsmp
->hfs_flags
& HFS_CS_SWAPFILE_PIN
)) {
2847 //XXX Swapfiles (marked SWAP_PINNED) may have moved too.
2848 //XXX Do we care? They have a more transient/dynamic nature/lifetime.
2854 #if HFS_CONFIG_KEY_ROLL
2856 case HFSIOC_KEY_ROLL
: {
2857 if (!kauth_cred_issuser(kauth_cred_get()))
2860 hfs_key_roll_args_t
*args
= (hfs_key_roll_args_t
*)ap
->a_data
;
2862 return hfs_key_roll_op(ap
->a_context
, ap
->a_vp
, args
);
2865 case HFSIOC_GET_KEY_AUTO_ROLL
: {
2866 if (!kauth_cred_issuser(kauth_cred_get()))
2869 hfs_key_auto_roll_args_t
*args
= (hfs_key_auto_roll_args_t
*)ap
->a_data
;
2870 if (args
->api_version
!= HFS_KEY_AUTO_ROLL_API_VERSION_1
)
2872 args
->flags
= (ISSET(hfsmp
->cproot_flags
, CP_ROOT_AUTO_ROLL_OLD_CLASS_GENERATION
)
2873 ? HFS_KEY_AUTO_ROLL_OLD_CLASS_GENERATION
: 0);
2874 args
->min_key_os_version
= hfsmp
->hfs_auto_roll_min_key_os_version
;
2875 args
->max_key_os_version
= hfsmp
->hfs_auto_roll_max_key_os_version
;
2879 case HFSIOC_SET_KEY_AUTO_ROLL
: {
2880 if (!kauth_cred_issuser(kauth_cred_get()))
2883 hfs_key_auto_roll_args_t
*args
= (hfs_key_auto_roll_args_t
*)ap
->a_data
;
2884 if (args
->api_version
!= HFS_KEY_AUTO_ROLL_API_VERSION_1
)
2886 return cp_set_auto_roll(hfsmp
, args
);
2889 #endif // HFS_CONFIG_KEY_ROLL
2892 case F_TRANSCODEKEY
:
2894 * This API is only supported when called via kernel so
2895 * a_fflag must be set to 1 (it's not possible to get here
2896 * with it set to 1 via fsctl).
2898 if (ap
->a_fflag
!= 1)
2900 return cp_vnode_transcode(vp
, (cp_key_t
*)ap
->a_data
);
2902 case F_GETPROTECTIONLEVEL
:
2903 return cp_get_root_major_vers (vp
, (uint32_t *)ap
->a_data
);
2905 case F_GETDEFAULTPROTLEVEL
:
2906 return cp_get_default_level(vp
, (uint32_t *)ap
->a_data
);
2907 #endif // CONFIG_PROTECT
2910 return hfs_pin_vnode(hfsmp
, vp
, HFS_PIN_IT
| HFS_DATALESS_PIN
,
2913 case FSIOC_CAS_BSDFLAGS
: {
2914 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2919 struct fsioc_cas_bsdflags
*cas
= (void *)ap
->a_data
;
2920 struct cnode
*cp
= VTOC(vp
);
2921 u_int32_t document_id
= 0;
2922 int decmpfs_reset_state
= 0;
2925 /* Don't allow modification of the journal. */
2926 if (hfs_is_journal_file(hfsmp
, cp
)) {
2930 if ((error
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
))) {
2934 cas
->actual_flags
= cp
->c_bsdflags
;
2935 if (cas
->actual_flags
!= cas
->expected_flags
) {
2941 // Check if we'll need a document_id. If so, we need to drop the lock
2942 // (to avoid any possible deadlock with the root vnode which has to get
2943 // locked to get the document id), generate the document_id, re-acquire
2944 // the lock, and perform the CAS check again. We do it in this sequence
2945 // in order to avoid throwing away document_ids in the case where the
2946 // CAS check fails. Note that it can still happen, but by performing
2947 // the check first, hopefully we can reduce the ocurrence.
2949 if ((cas
->new_flags
& UF_TRACKED
) && !(VTOC(vp
)->c_bsdflags
& UF_TRACKED
)) {
2950 struct FndrExtendedDirInfo
*fip
= (struct FndrExtendedDirInfo
*)((char *)&(VTOC(vp
)->c_attr
.ca_finderinfo
) + 16);
2952 // If the document_id is not set, get a new one. It will be set
2953 // on the file down below once we hold the cnode lock.
2955 if (fip
->document_id
== 0) {
2957 // Drat, we have to generate one. Unlock the cnode, do the
2958 // deed, re-lock the cnode, and then to the CAS check again
2959 // to see if we lost the race.
2962 if (hfs_generate_document_id(hfsmp
, &document_id
) != 0) {
2965 if ((error
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
))) {
2968 cas
->actual_flags
= cp
->c_bsdflags
;
2969 if (cas
->actual_flags
!= cas
->expected_flags
) {
2976 bool setting_compression
= false;
2978 if (!(cas
->actual_flags
& UF_COMPRESSED
) && (cas
->new_flags
& UF_COMPRESSED
))
2979 setting_compression
= true;
2981 if (setting_compression
) {
2982 hfs_lock_truncate(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2983 if (VTOF(vp
)->ff_size
) {
2984 // hfs_truncate will deal with the cnode lock
2985 error
= hfs_truncate(vp
, 0, IO_NDELAY
, 0, ap
->a_context
);
2987 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
2991 error
= hfs_set_bsd_flags(hfsmp
, cp
, cas
->new_flags
,
2992 document_id
, ap
->a_context
,
2993 &decmpfs_reset_state
);
2995 error
= hfs_update(vp
, 0);
3003 if (decmpfs_reset_state
) {
3005 * we've changed the UF_COMPRESSED flag, so reset the decmpfs state for this cnode
3006 * but don't do it while holding the hfs cnode lock
3008 decmpfs_cnode
*dp
= VTOCMP(vp
);
3011 * call hfs_lazy_init_decmpfs_cnode() to make sure that the decmpfs_cnode
3012 * is filled in; we need a decmpfs_cnode to prevent decmpfs state changes
3013 * on this file if it's locked
3015 dp
= hfs_lazy_init_decmpfs_cnode(VTOC(vp
));
3017 /* failed to allocate a decmpfs_cnode */
3018 return ENOMEM
; /* what should this be? */
3021 decmpfs_cnode_set_vnode_state(dp
, FILE_TYPE_UNKNOWN
, 0);
3040 hfs_vnop_select(__unused
struct vnop_select_args
*ap
)
3042 struct vnop_select_args {
3047 vfs_context_t a_context;
3052 * We should really check to see if I/O is possible.
3058 * Converts a logical block number to a physical block, and optionally returns
3059 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
3060 * The physical block number is based on the device block size, currently its 512.
3061 * The block run is returned in logical blocks, and is the REMAINING amount of blocks
3064 hfs_bmap(struct vnode
*vp
, daddr_t bn
, struct vnode
**vpp
, daddr64_t
*bnp
, unsigned int *runp
)
3066 struct filefork
*fp
= VTOF(vp
);
3067 struct hfsmount
*hfsmp
= VTOHFS(vp
);
3068 int retval
= E_NONE
;
3069 u_int32_t logBlockSize
;
3070 size_t bytesContAvail
= 0;
3071 off_t blockposition
;
3076 * Check for underlying vnode requests and ensure that logical
3077 * to physical mapping is requested.
3080 *vpp
= hfsmp
->hfs_devvp
;
3084 logBlockSize
= GetLogicalBlockSize(vp
);
3085 blockposition
= (off_t
)bn
* logBlockSize
;
3087 lockExtBtree
= overflow_extents(fp
);
3090 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_EXTENTS
, HFS_EXCLUSIVE_LOCK
);
3092 retval
= MacToVFSError(
3093 MapFileBlockC (HFSTOVCB(hfsmp
),
3101 hfs_systemfile_unlock(hfsmp
, lockflags
);
3103 if (retval
== E_NONE
) {
3104 /* Figure out how many read ahead blocks there are */
3106 if (can_cluster(logBlockSize
)) {
3107 /* Make sure this result never goes negative: */
3108 *runp
= (bytesContAvail
< logBlockSize
) ? 0 : (bytesContAvail
/ logBlockSize
) - 1;
3118 * Convert logical block number to file offset.
3121 hfs_vnop_blktooff(struct vnop_blktooff_args
*ap
)
3123 struct vnop_blktooff_args {
3130 if (ap
->a_vp
== NULL
)
3132 *ap
->a_offset
= (off_t
)ap
->a_lblkno
* (off_t
)GetLogicalBlockSize(ap
->a_vp
);
3138 * Convert file offset to logical block number.
3141 hfs_vnop_offtoblk(struct vnop_offtoblk_args
*ap
)
3143 struct vnop_offtoblk_args {
3146 daddr64_t *a_lblkno;
3150 if (ap
->a_vp
== NULL
)
3152 *ap
->a_lblkno
= (daddr64_t
)(ap
->a_offset
/ (off_t
)GetLogicalBlockSize(ap
->a_vp
));
3158 * Map file offset to physical block number.
3160 * If this function is called for write operation, and if the file
3161 * had virtual blocks allocated (delayed allocation), real blocks
3162 * are allocated by calling ExtendFileC().
3164 * If this function is called for read operation, and if the file
3165 * had virtual blocks allocated (delayed allocation), no change
3166 * to the size of file is done, and if required, rangelist is
3167 * searched for mapping.
3169 * System file cnodes are expected to be locked (shared or exclusive).
3171 * -- INVALID RANGES --
3173 * Invalid ranges are used to keep track of where we have extended a
3174 * file, but have not yet written that data to disk. In the past we
3175 * would clear up the invalid ranges as we wrote to those areas, but
3176 * before data was actually flushed to disk. The problem with that
3177 * approach is that the data can be left in the cache and is therefore
3178 * still not valid on disk. So now we clear up the ranges here, when
3179 * the flags field has VNODE_WRITE set, indicating a write is about to
3180 * occur. This isn't ideal (ideally we want to clear them up when
3181 * know the data has been successfully written), but it's the best we
3184 * For reads, we use the invalid ranges here in block map to indicate
3185 * to the caller that the data should be zeroed (a_bpn == -1). We
3186 * have to be careful about what ranges we return to the cluster code.
3187 * Currently the cluster code can only handle non-rounded values for
3188 * the EOF; it cannot handle funny sized ranges in the middle of the
3189 * file (the main problem is that it sends down odd sized I/Os to the
3190 * disk). Our code currently works because whilst the very first
3191 * offset and the last offset in the invalid ranges are not aligned,
3192 * gaps in the invalid ranges between the first and last, have to be
3193 * aligned (because we always write page sized blocks). For example,
3194 * consider this arrangement:
3196 * +-------------+-----+-------+------+
3197 * | |XXXXX| |XXXXXX|
3198 * +-------------+-----+-------+------+
3201 * This shows two invalid ranges <a, b> and <c, d>. Whilst a and d
3202 * are not necessarily aligned, b and c *must* be.
3204 * Zero-filling occurs in a number of ways:
3206 * 1. When a read occurs and we return with a_bpn == -1.
3208 * 2. When hfs_fsync or hfs_filedone calls hfs_flush_invalid_ranges
3209 * which will cause us to iterate over the ranges bringing in
3210 * pages that are not present in the cache and zeroing them. Any
3211 * pages that are already in the cache are left untouched. Note
3212 * that hfs_fsync does not always flush invalid ranges.
3214 * 3. When we extend a file we zero out from the old EOF to the end
3215 * of the page. It would be nice if we didn't have to do this if
3216 * the page wasn't present (and could defer it), but because of
3217 * the problem described above, we have to.
3219 * The invalid ranges are also used to restrict the size that we write
3220 * out on disk: see hfs_prepare_fork_for_update.
3222 * Note that invalid ranges are ignored when neither the VNODE_READ or
3223 * the VNODE_WRITE flag is specified. This is useful for the
3224 * F_LOG2PHYS* fcntls which are not interested in invalid ranges: they
3225 * just want to know whether blocks are physically allocated or not.
3228 hfs_vnop_blockmap(struct vnop_blockmap_args
*ap
)
3230 struct vnop_blockmap_args {
3238 vfs_context_t a_context;
3242 struct vnode
*vp
= ap
->a_vp
;
3244 struct filefork
*fp
;
3245 struct hfsmount
*hfsmp
;
3246 size_t bytesContAvail
= ap
->a_size
;
3247 int retval
= E_NONE
;
3250 struct rl_entry
*invalid_range
;
3251 enum rl_overlaptype overlaptype
;
3256 if (VNODE_IS_RSRC(vp
)) {
3257 /* allow blockmaps to the resource fork */
3259 if ( hfs_file_is_compressed(VTOC(vp
), 1) ) { /* 1 == don't take the cnode lock */
3260 int state
= decmpfs_cnode_get_vnode_state(VTOCMP(vp
));
3262 case FILE_IS_COMPRESSED
:
3264 case FILE_IS_CONVERTING
:
3265 /* if FILE_IS_CONVERTING, we allow blockmap */
3268 printf("invalid state %d for compressed file\n", state
);
3273 #endif /* HFS_COMPRESSION */
3275 /* Do not allow blockmap operation on a directory */
3276 if (vnode_isdir(vp
)) {
3281 * Check for underlying vnode requests and ensure that logical
3282 * to physical mapping is requested.
3284 if (ap
->a_bpn
== NULL
)
3291 if ( !vnode_issystem(vp
) && !vnode_islnk(vp
) && !vnode_isswap(vp
)) {
3292 if (cp
->c_lockowner
!= current_thread()) {
3293 hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
3297 // For reads, check the invalid ranges
3298 if (ISSET(ap
->a_flags
, VNODE_READ
)) {
3299 if (ap
->a_foffset
>= fp
->ff_size
) {
3304 overlaptype
= rl_scan(&fp
->ff_invalidranges
, ap
->a_foffset
,
3305 ap
->a_foffset
+ (off_t
)bytesContAvail
- 1,
3307 switch(overlaptype
) {
3308 case RL_MATCHINGOVERLAP
:
3309 case RL_OVERLAPCONTAINSRANGE
:
3310 case RL_OVERLAPSTARTSBEFORE
:
3311 /* There's no valid block for this byte offset */
3312 *ap
->a_bpn
= (daddr64_t
)-1;
3313 /* There's no point limiting the amount to be returned
3314 * if the invalid range that was hit extends all the way
3315 * to the EOF (i.e. there's no valid bytes between the
3316 * end of this range and the file's EOF):
3318 if (((off_t
)fp
->ff_size
> (invalid_range
->rl_end
+ 1)) &&
3319 ((size_t)(invalid_range
->rl_end
+ 1 - ap
->a_foffset
) < bytesContAvail
)) {
3320 bytesContAvail
= invalid_range
->rl_end
+ 1 - ap
->a_foffset
;
3326 case RL_OVERLAPISCONTAINED
:
3327 case RL_OVERLAPENDSAFTER
:
3328 /* The range of interest hits an invalid block before the end: */
3329 if (invalid_range
->rl_start
== ap
->a_foffset
) {
3330 /* There's actually no valid information to be had starting here: */
3331 *ap
->a_bpn
= (daddr64_t
)-1;
3332 if (((off_t
)fp
->ff_size
> (invalid_range
->rl_end
+ 1)) &&
3333 ((size_t)(invalid_range
->rl_end
+ 1 - ap
->a_foffset
) < bytesContAvail
)) {
3334 bytesContAvail
= invalid_range
->rl_end
+ 1 - ap
->a_foffset
;
3341 * Sadly, the lower layers don't like us to
3342 * return unaligned ranges, so we skip over
3343 * any invalid ranges here that are less than
3344 * a page: zeroing of those bits is not our
3345 * responsibility (it's dealt with elsewhere).
3348 off_t rounded_start
= round_page_64(invalid_range
->rl_start
);
3349 if ((off_t
)bytesContAvail
< rounded_start
- ap
->a_foffset
)
3351 if (rounded_start
< invalid_range
->rl_end
+ 1) {
3352 bytesContAvail
= rounded_start
- ap
->a_foffset
;
3355 } while ((invalid_range
= TAILQ_NEXT(invalid_range
,
3367 if (cp
->c_cpentry
) {
3368 const int direction
= (ISSET(ap
->a_flags
, VNODE_WRITE
)
3369 ? VNODE_WRITE
: VNODE_READ
);
3371 cp_io_params_t io_params
;
3372 cp_io_params(hfsmp
, cp
->c_cpentry
,
3373 off_rsrc_make(ap
->a_foffset
, VNODE_IS_RSRC(vp
)),
3374 direction
, &io_params
);
3376 if (io_params
.max_len
< (off_t
)bytesContAvail
)
3377 bytesContAvail
= io_params
.max_len
;
3379 if (io_params
.phys_offset
!= -1) {
3380 *ap
->a_bpn
= ((io_params
.phys_offset
+ hfsmp
->hfsPlusIOPosOffset
)
3381 / hfsmp
->hfs_logical_block_size
);
3391 /* Check virtual blocks only when performing write operation */
3392 if ((ap
->a_flags
& VNODE_WRITE
) && (fp
->ff_unallocblocks
!= 0)) {
3393 if (hfs_start_transaction(hfsmp
) != 0) {
3399 syslocks
= SFL_EXTENTS
| SFL_BITMAP
;
3401 } else if (overflow_extents(fp
)) {
3402 syslocks
= SFL_EXTENTS
;
3406 lockflags
= hfs_systemfile_lock(hfsmp
, syslocks
, HFS_EXCLUSIVE_LOCK
);
3409 * Check for any delayed allocations.
3411 if ((ap
->a_flags
& VNODE_WRITE
) && (fp
->ff_unallocblocks
!= 0)) {
3413 u_int32_t loanedBlocks
;
3416 // Make sure we have a transaction. It's possible
3417 // that we came in and fp->ff_unallocblocks was zero
3418 // but during the time we blocked acquiring the extents
3419 // btree, ff_unallocblocks became non-zero and so we
3420 // will need to start a transaction.
3422 if (started_tr
== 0) {
3424 hfs_systemfile_unlock(hfsmp
, lockflags
);
3431 * Note: ExtendFileC will Release any blocks on loan and
3432 * aquire real blocks. So we ask to extend by zero bytes
3433 * since ExtendFileC will account for the virtual blocks.
3436 loanedBlocks
= fp
->ff_unallocblocks
;
3437 retval
= ExtendFileC(hfsmp
, (FCB
*)fp
, 0, 0,
3438 kEFAllMask
| kEFNoClumpMask
, &actbytes
);
3441 fp
->ff_unallocblocks
= loanedBlocks
;
3442 cp
->c_blocks
+= loanedBlocks
;
3443 fp
->ff_blocks
+= loanedBlocks
;
3445 hfs_lock_mount (hfsmp
);
3446 hfsmp
->loanedBlocks
+= loanedBlocks
;
3447 hfs_unlock_mount (hfsmp
);
3449 hfs_systemfile_unlock(hfsmp
, lockflags
);
3450 cp
->c_flag
|= C_MODIFIED
;
3452 (void) hfs_update(vp
, 0);
3453 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
3455 hfs_end_transaction(hfsmp
);
3462 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, bytesContAvail
, ap
->a_foffset
,
3463 ap
->a_bpn
, &bytesContAvail
);
3465 hfs_systemfile_unlock(hfsmp
, lockflags
);
3470 /* On write, always return error because virtual blocks, if any,
3471 * should have been allocated in ExtendFileC(). We do not
3472 * allocate virtual blocks on read, therefore return error
3473 * only if no virtual blocks are allocated. Otherwise we search
3474 * rangelist for zero-fills
3476 if ((MacToVFSError(retval
) != ERANGE
) ||
3477 (ap
->a_flags
& VNODE_WRITE
) ||
3478 ((ap
->a_flags
& VNODE_READ
) && (fp
->ff_unallocblocks
== 0))) {
3482 /* Validate if the start offset is within logical file size */
3483 if (ap
->a_foffset
>= fp
->ff_size
) {
3488 * At this point, we have encountered a failure during
3489 * MapFileBlockC that resulted in ERANGE, and we are not
3490 * servicing a write, and there are borrowed blocks.
3492 * However, the cluster layer will not call blockmap for
3493 * blocks that are borrowed and in-cache. We have to assume
3494 * that because we observed ERANGE being emitted from
3495 * MapFileBlockC, this extent range is not valid on-disk. So
3496 * we treat this as a mapping that needs to be zero-filled
3500 if (fp
->ff_size
- ap
->a_foffset
< (off_t
)bytesContAvail
)
3501 bytesContAvail
= fp
->ff_size
- ap
->a_foffset
;
3503 *ap
->a_bpn
= (daddr64_t
) -1;
3511 if (ISSET(ap
->a_flags
, VNODE_WRITE
)) {
3512 struct rl_entry
*r
= TAILQ_FIRST(&fp
->ff_invalidranges
);
3514 // See if we might be overlapping invalid ranges...
3515 if (r
&& (ap
->a_foffset
+ (off_t
)bytesContAvail
) > r
->rl_start
) {
3517 * Mark the file as needing an update if we think the
3518 * on-disk EOF has changed.
3520 if (ap
->a_foffset
<= r
->rl_start
)
3521 SET(cp
->c_flag
, C_MODIFIED
);
3524 * This isn't the ideal place to put this. Ideally, we
3525 * should do something *after* we have successfully
3526 * written to the range, but that's difficult to do
3527 * because we cannot take locks in the callback. At
3528 * present, the cluster code will call us with VNODE_WRITE
3529 * set just before it's about to write the data so we know
3530 * that data is about to be written. If we get an I/O
3531 * error at this point then chances are the metadata
3532 * update to follow will also have an I/O error so the
3533 * risk here is small.
3535 rl_remove(ap
->a_foffset
, ap
->a_foffset
+ bytesContAvail
- 1,
3536 &fp
->ff_invalidranges
);
3538 if (!TAILQ_FIRST(&fp
->ff_invalidranges
)) {
3539 cp
->c_flag
&= ~C_ZFWANTSYNC
;
3540 cp
->c_zftimeout
= 0;
3546 *ap
->a_run
= bytesContAvail
;
3549 *(int *)ap
->a_poff
= 0;
3553 hfs_update(vp
, TRUE
);
3554 hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
3555 hfs_end_transaction(hfsmp
);
3562 return (MacToVFSError(retval
));
3566 * prepare and issue the I/O
3567 * buf_strategy knows how to deal
3568 * with requests that require
3572 hfs_vnop_strategy(struct vnop_strategy_args
*ap
)
3574 buf_t bp
= ap
->a_bp
;
3575 vnode_t vp
= buf_vnode(bp
);
3578 /* Mark buffer as containing static data if cnode flag set */
3579 if (VTOC(vp
)->c_flag
& C_SSD_STATIC
) {
3583 /* Mark buffer as containing static data if cnode flag set */
3584 if (VTOC(vp
)->c_flag
& C_SSD_GREEDY_MODE
) {
3585 bufattr_markgreedymode(buf_attr(bp
));
3588 /* mark buffer as containing burst mode data if cnode flag set */
3589 if (VTOC(vp
)->c_flag
& C_IO_ISOCHRONOUS
) {
3590 bufattr_markisochronous(buf_attr(bp
));
3594 error
= cp_handle_strategy(bp
);
3600 error
= buf_strategy(VTOHFS(vp
)->hfs_devvp
, ap
);
3606 do_hfs_truncate(struct vnode
*vp
, off_t length
, int flags
, int truncateflags
, vfs_context_t context
)
3608 register struct cnode
*cp
= VTOC(vp
);
3609 struct filefork
*fp
= VTOF(vp
);
3610 kauth_cred_t cred
= vfs_context_ucred(context
);
3613 off_t actualBytesAdded
;
3615 u_int32_t fileblocks
;
3617 struct hfsmount
*hfsmp
;
3619 int suppress_times
= (truncateflags
& HFS_TRUNCATE_SKIPTIMES
);
3621 blksize
= VTOVCB(vp
)->blockSize
;
3622 fileblocks
= fp
->ff_blocks
;
3623 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
3625 KERNEL_DEBUG(HFSDBG_TRUNCATE
| DBG_FUNC_START
,
3626 (int)length
, (int)fp
->ff_size
, (int)filebytes
, 0, 0);
3631 /* This should only happen with a corrupt filesystem */
3632 if ((off_t
)fp
->ff_size
< 0)
3635 if ((!ISHFSPLUS(VTOVCB(vp
))) && (length
> (off_t
)MAXHFSFILESIZE
))
3642 /* Files that are changing size are not hot file candidates. */
3643 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
3644 fp
->ff_bytesread
= 0;
3648 * We cannot just check if fp->ff_size == length (as an optimization)
3649 * since there may be extra physical blocks that also need truncation.
3652 if ((retval
= hfs_getinoquota(cp
)))
3657 * Lengthen the size of the file. We must ensure that the
3658 * last byte of the file is allocated. Since the smallest
3659 * value of ff_size is 0, length will be at least 1.
3661 if (length
> (off_t
)fp
->ff_size
) {
3663 retval
= hfs_chkdq(cp
, (int64_t)(roundup(length
- filebytes
, blksize
)),
3669 * If we don't have enough physical space then
3670 * we need to extend the physical size.
3672 if (length
> filebytes
) {
3674 u_int32_t blockHint
= 0;
3676 /* All or nothing and don't round up to clumpsize. */
3677 eflags
= kEFAllMask
| kEFNoClumpMask
;
3679 if (cred
&& (suser(cred
, NULL
) != 0)) {
3680 eflags
|= kEFReserveMask
; /* keep a reserve */
3684 * Allocate Journal and Quota files in metadata zone.
3686 if (filebytes
== 0 &&
3687 hfsmp
->hfs_flags
& HFS_METADATA_ZONE
&&
3688 hfs_virtualmetafile(cp
)) {
3689 eflags
|= kEFMetadataMask
;
3690 blockHint
= hfsmp
->hfs_metazone_start
;
3692 if (hfs_start_transaction(hfsmp
) != 0) {
3697 /* Protect extents b-tree and allocation bitmap */
3698 lockflags
= SFL_BITMAP
;
3699 if (overflow_extents(fp
))
3700 lockflags
|= SFL_EXTENTS
;
3701 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
3704 * Keep growing the file as long as the current EOF is
3705 * less than the desired value.
3707 while ((length
> filebytes
) && (retval
== E_NONE
)) {
3708 bytesToAdd
= length
- filebytes
;
3709 retval
= MacToVFSError(ExtendFileC(VTOVCB(vp
),
3714 &actualBytesAdded
));
3716 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)blksize
;
3717 if (actualBytesAdded
== 0 && retval
== E_NONE
) {
3718 if (length
> filebytes
)
3724 hfs_systemfile_unlock(hfsmp
, lockflags
);
3728 hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
3731 hfs_end_transaction(hfsmp
);
3736 KERNEL_DEBUG(HFSDBG_TRUNCATE
| DBG_FUNC_NONE
,
3737 (int)length
, (int)fp
->ff_size
, (int)filebytes
, 0, 0);
3740 if (ISSET(flags
, IO_NOZEROFILL
)) {
3741 // An optimisation for the hibernation file
3742 if (vnode_isswap(vp
))
3743 rl_remove_all(&fp
->ff_invalidranges
);
3745 if (!vnode_issystem(vp
) && retval
== E_NONE
) {
3746 if (length
> (off_t
)fp
->ff_size
) {
3749 /* Extending the file: time to fill out the current last page w. zeroes? */
3750 if (fp
->ff_size
& PAGE_MASK_64
) {
3751 /* There might be some valid data at the start of the (current) last page
3752 of the file, so zero out the remainder of that page to ensure the
3753 entire page contains valid data. */
3755 retval
= hfs_zero_eof_page(vp
, length
);
3756 hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
3757 if (retval
) goto Err_Exit
;
3760 rl_add(fp
->ff_size
, length
- 1, &fp
->ff_invalidranges
);
3761 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
3764 panic("hfs_truncate: invoked on non-UBC object?!");
3767 if (suppress_times
== 0) {
3768 cp
->c_touch_modtime
= TRUE
;
3770 fp
->ff_size
= length
;
3772 } else { /* Shorten the size of the file */
3774 // An optimisation for the hibernation file
3775 if (ISSET(flags
, IO_NOZEROFILL
) && vnode_isswap(vp
)) {
3776 rl_remove_all(&fp
->ff_invalidranges
);
3777 } else if ((off_t
)fp
->ff_size
> length
) {
3778 /* Any space previously marked as invalid is now irrelevant: */
3779 rl_remove(length
, fp
->ff_size
- 1, &fp
->ff_invalidranges
);
3783 * Account for any unmapped blocks. Note that the new
3784 * file length can still end up with unmapped blocks.
3786 if (fp
->ff_unallocblocks
> 0) {
3787 u_int32_t finalblks
;
3788 u_int32_t loanedBlocks
;
3790 hfs_lock_mount(hfsmp
);
3791 loanedBlocks
= fp
->ff_unallocblocks
;
3792 cp
->c_blocks
-= loanedBlocks
;
3793 fp
->ff_blocks
-= loanedBlocks
;
3794 fp
->ff_unallocblocks
= 0;
3796 hfsmp
->loanedBlocks
-= loanedBlocks
;
3798 finalblks
= (length
+ blksize
- 1) / blksize
;
3799 if (finalblks
> fp
->ff_blocks
) {
3800 /* calculate required unmapped blocks */
3801 loanedBlocks
= finalblks
- fp
->ff_blocks
;
3802 hfsmp
->loanedBlocks
+= loanedBlocks
;
3804 fp
->ff_unallocblocks
= loanedBlocks
;
3805 cp
->c_blocks
+= loanedBlocks
;
3806 fp
->ff_blocks
+= loanedBlocks
;
3808 hfs_unlock_mount (hfsmp
);
3811 off_t savedbytes
= ((off_t
)fp
->ff_blocks
* (off_t
)blksize
);
3812 if (hfs_start_transaction(hfsmp
) != 0) {
3817 if (fp
->ff_unallocblocks
== 0) {
3818 /* Protect extents b-tree and allocation bitmap */
3819 lockflags
= SFL_BITMAP
;
3820 if (overflow_extents(fp
))
3821 lockflags
|= SFL_EXTENTS
;
3822 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
3824 retval
= MacToVFSError(TruncateFileC(VTOVCB(vp
), (FCB
*)fp
, length
, 0,
3825 FORK_IS_RSRC (fp
), FTOC(fp
)->c_fileid
, false));
3827 hfs_systemfile_unlock(hfsmp
, lockflags
);
3831 fp
->ff_size
= length
;
3834 hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
3836 hfs_end_transaction(hfsmp
);
3838 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)blksize
;
3842 /* These are bytesreleased */
3843 (void) hfs_chkdq(cp
, (int64_t)-(savedbytes
- filebytes
), NOCRED
, 0);
3847 // Unlike when growing a file, we adjust the hotfile block count here
3848 // instead of deeper down in the block allocation code because we do
3849 // not necessarily have a vnode or "fcb" at the time we're deleting
3850 // the file and so we wouldn't know if it was hotfile cached or not
3852 hfs_hotfile_adjust_blocks(vp
, (int64_t)((savedbytes
- filebytes
) / blksize
));
3856 * Only set update flag if the logical length changes & we aren't
3857 * suppressing modtime updates.
3859 if (((off_t
)fp
->ff_size
!= length
) && (suppress_times
== 0)) {
3860 cp
->c_touch_modtime
= TRUE
;
3862 fp
->ff_size
= length
;
3864 if (cp
->c_mode
& (S_ISUID
| S_ISGID
)) {
3865 if (!vfs_context_issuser(context
))
3866 cp
->c_mode
&= ~(S_ISUID
| S_ISGID
);
3868 cp
->c_flag
|= C_MODIFIED
;
3869 cp
->c_touch_chgtime
= TRUE
; /* status changed */
3870 if (suppress_times
== 0) {
3871 cp
->c_touch_modtime
= TRUE
; /* file data was modified */
3874 * If we are not suppressing the modtime update, then
3875 * update the gen count as well.
3877 if (S_ISREG(cp
->c_attr
.ca_mode
) || S_ISLNK (cp
->c_attr
.ca_mode
)) {
3878 hfs_incr_gencount(cp
);
3882 retval
= hfs_update(vp
, 0);
3884 KERNEL_DEBUG(HFSDBG_TRUNCATE
| DBG_FUNC_NONE
,
3885 -1, -1, -1, retval
, 0);
3890 KERNEL_DEBUG(HFSDBG_TRUNCATE
| DBG_FUNC_END
,
3891 (int)length
, (int)fp
->ff_size
, (int)filebytes
, retval
, 0);
3897 * Preparation which must be done prior to deleting the catalog record
3898 * of a file or directory. In order to make the on-disk as safe as possible,
3899 * we remove the catalog entry before releasing the bitmap blocks and the
3900 * overflow extent records. However, some work must be done prior to deleting
3901 * the catalog record.
3903 * When calling this function, the cnode must exist both in memory and on-disk.
3904 * If there are both resource fork and data fork vnodes, this function should
3905 * be called on both.
3909 hfs_prepare_release_storage (struct hfsmount
*hfsmp
, struct vnode
*vp
) {
3911 struct filefork
*fp
= VTOF(vp
);
3912 struct cnode
*cp
= VTOC(vp
);
3917 /* Cannot truncate an HFS directory! */
3918 if (vnode_isdir(vp
)) {
3923 * See the comment below in hfs_truncate for why we need to call
3924 * setsize here. Essentially we want to avoid pending IO if we
3925 * already know that the blocks are going to be released here.
3926 * This function is only called when totally removing all storage for a file, so
3927 * we can take a shortcut and immediately setsize (0);
3931 /* This should only happen with a corrupt filesystem */
3932 if ((off_t
)fp
->ff_size
< 0)
3936 * We cannot just check if fp->ff_size == length (as an optimization)
3937 * since there may be extra physical blocks that also need truncation.
3940 if ((retval
= hfs_getinoquota(cp
))) {
3945 /* Wipe out any invalid ranges which have yet to be backed by disk */
3946 rl_remove(0, fp
->ff_size
- 1, &fp
->ff_invalidranges
);
3949 * Account for any unmapped blocks. Since we're deleting the
3950 * entire file, we don't have to worry about just shrinking
3951 * to a smaller number of borrowed blocks.
3953 if (fp
->ff_unallocblocks
> 0) {
3954 u_int32_t loanedBlocks
;
3956 hfs_lock_mount (hfsmp
);
3957 loanedBlocks
= fp
->ff_unallocblocks
;
3958 cp
->c_blocks
-= loanedBlocks
;
3959 fp
->ff_blocks
-= loanedBlocks
;
3960 fp
->ff_unallocblocks
= 0;
3962 hfsmp
->loanedBlocks
-= loanedBlocks
;
3964 hfs_unlock_mount (hfsmp
);
3972 * Special wrapper around calling TruncateFileC. This function is useable
3973 * even when the catalog record does not exist any longer, making it ideal
3974 * for use when deleting a file. The simplification here is that we know
3975 * that we are releasing all blocks.
3977 * Note that this function may be called when there is no vnode backing
3978 * the file fork in question. We may call this from hfs_vnop_inactive
3979 * to clear out resource fork data (and may not want to clear out the data
3980 * fork yet). As a result, we pointer-check both sets of inputs before
3981 * doing anything with them.
3983 * The caller is responsible for saving off a copy of the filefork(s)
3984 * embedded within the cnode prior to calling this function. The pointers
3985 * supplied as arguments must be valid even if the cnode is no longer valid.
3989 hfs_release_storage (struct hfsmount
*hfsmp
, struct filefork
*datafork
,
3990 struct filefork
*rsrcfork
, u_int32_t fileid
) {
3993 u_int32_t fileblocks
;
3998 blksize
= hfsmp
->blockSize
;
4002 off_t prev_filebytes
;
4004 datafork
->ff_size
= 0;
4006 fileblocks
= datafork
->ff_blocks
;
4007 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
4008 prev_filebytes
= filebytes
;
4010 /* We killed invalid ranges and loaned blocks before we removed the catalog entry */
4012 while (filebytes
> 0) {
4013 if (filebytes
> HFS_BIGFILE_SIZE
) {
4014 filebytes
-= HFS_BIGFILE_SIZE
;
4019 /* Start a transaction, and wipe out as many blocks as we can in this iteration */
4020 if (hfs_start_transaction(hfsmp
) != 0) {
4025 if (datafork
->ff_unallocblocks
== 0) {
4026 /* Protect extents b-tree and allocation bitmap */
4027 lockflags
= SFL_BITMAP
;
4028 if (overflow_extents(datafork
))
4029 lockflags
|= SFL_EXTENTS
;
4030 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
4032 error
= MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp
), datafork
, filebytes
, 1, 0, fileid
, false));
4034 hfs_systemfile_unlock(hfsmp
, lockflags
);
4036 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
4038 struct cnode
*cp
= datafork
? FTOC(datafork
) : NULL
;
4040 vp
= cp
? CTOV(cp
, 0) : NULL
;
4041 hfs_hotfile_adjust_blocks(vp
, (int64_t)((prev_filebytes
- filebytes
) / blksize
));
4042 prev_filebytes
= filebytes
;
4044 /* Finish the transaction and start over if necessary */
4045 hfs_end_transaction(hfsmp
);
4054 if (error
== 0 && rsrcfork
) {
4055 rsrcfork
->ff_size
= 0;
4057 fileblocks
= rsrcfork
->ff_blocks
;
4058 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
4060 /* We killed invalid ranges and loaned blocks before we removed the catalog entry */
4062 while (filebytes
> 0) {
4063 if (filebytes
> HFS_BIGFILE_SIZE
) {
4064 filebytes
-= HFS_BIGFILE_SIZE
;
4069 /* Start a transaction, and wipe out as many blocks as we can in this iteration */
4070 if (hfs_start_transaction(hfsmp
) != 0) {
4075 if (rsrcfork
->ff_unallocblocks
== 0) {
4076 /* Protect extents b-tree and allocation bitmap */
4077 lockflags
= SFL_BITMAP
;
4078 if (overflow_extents(rsrcfork
))
4079 lockflags
|= SFL_EXTENTS
;
4080 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
4082 error
= MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp
), rsrcfork
, filebytes
, 1, 1, fileid
, false));
4084 hfs_systemfile_unlock(hfsmp
, lockflags
);
4086 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
4088 /* Finish the transaction and start over if necessary */
4089 hfs_end_transaction(hfsmp
);
4100 errno_t
hfs_ubc_setsize(vnode_t vp
, off_t len
, bool have_cnode_lock
)
4105 * Call ubc_setsize to give the VM subsystem a chance to do
4106 * whatever it needs to with existing pages before we delete
4107 * blocks. Note that symlinks don't use the UBC so we'll
4108 * get back ENOENT in that case.
4110 if (have_cnode_lock
) {
4111 error
= ubc_setsize_ex(vp
, len
, UBC_SETSIZE_NO_FS_REENTRY
);
4112 if (error
== EAGAIN
) {
4113 cnode_t
*cp
= VTOC(vp
);
4115 if (cp
->c_truncatelockowner
!= current_thread())
4116 hfs_warn("hfs: hfs_ubc_setsize called without exclusive truncate lock!");
4119 error
= ubc_setsize_ex(vp
, len
, 0);
4120 hfs_lock_always(cp
, HFS_EXCLUSIVE_LOCK
);
4123 error
= ubc_setsize_ex(vp
, len
, 0);
4125 return error
== ENOENT
? 0 : error
;
4129 * Truncate a cnode to at most length size, freeing (or adding) the
4133 hfs_truncate(struct vnode
*vp
, off_t length
, int flags
,
4134 int truncateflags
, vfs_context_t context
)
4136 struct filefork
*fp
= VTOF(vp
);
4138 u_int32_t fileblocks
;
4141 struct cnode
*cp
= VTOC(vp
);
4142 hfsmount_t
*hfsmp
= VTOHFS(vp
);
4144 /* Cannot truncate an HFS directory! */
4145 if (vnode_isdir(vp
)) {
4148 /* A swap file cannot change size. */
4149 if (vnode_isswap(vp
) && length
&& !ISSET(flags
, IO_NOAUTH
)) {
4153 blksize
= hfsmp
->blockSize
;
4154 fileblocks
= fp
->ff_blocks
;
4155 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
4157 bool caller_has_cnode_lock
= (cp
->c_lockowner
== current_thread());
4159 error
= hfs_ubc_setsize(vp
, length
, caller_has_cnode_lock
);
4163 if (!caller_has_cnode_lock
) {
4164 error
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
4169 if (vnode_islnk(vp
) && cp
->c_datafork
->ff_symlinkptr
) {
4170 hfs_free(cp
->c_datafork
->ff_symlinkptr
, cp
->c_datafork
->ff_size
);
4171 cp
->c_datafork
->ff_symlinkptr
= NULL
;
4174 // have to loop truncating or growing files that are
4175 // really big because otherwise transactions can get
4176 // enormous and consume too many kernel resources.
4178 if (length
< filebytes
) {
4179 while (filebytes
> length
) {
4180 if ((filebytes
- length
) > HFS_BIGFILE_SIZE
) {
4181 filebytes
-= HFS_BIGFILE_SIZE
;
4185 error
= do_hfs_truncate(vp
, filebytes
, flags
, truncateflags
, context
);
4189 } else if (length
> filebytes
) {
4190 kauth_cred_t cred
= vfs_context_ucred(context
);
4191 const bool keep_reserve
= cred
&& suser(cred
, NULL
) != 0;
4193 if (hfs_freeblks(hfsmp
, keep_reserve
)
4194 < howmany(length
- filebytes
, blksize
)) {
4197 while (filebytes
< length
) {
4198 if ((length
- filebytes
) > HFS_BIGFILE_SIZE
) {
4199 filebytes
+= HFS_BIGFILE_SIZE
;
4203 error
= do_hfs_truncate(vp
, filebytes
, flags
, truncateflags
, context
);
4208 } else /* Same logical size */ {
4210 error
= do_hfs_truncate(vp
, length
, flags
, truncateflags
, context
);
4212 /* Files that are changing size are not hot file candidates. */
4213 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
) {
4214 fp
->ff_bytesread
= 0;
4217 #if HFS_CONFIG_KEY_ROLL
4218 if (!error
&& cp
->c_truncatelockowner
== current_thread()) {
4219 hfs_key_roll_check(cp
, true);
4223 if (!caller_has_cnode_lock
)
4226 // Make sure UBC's size matches up (in case we didn't completely succeed)
4227 errno_t err2
= hfs_ubc_setsize(vp
, fp
->ff_size
, caller_has_cnode_lock
);
4236 * Preallocate file storage space.
4239 hfs_vnop_allocate(struct vnop_allocate_args
/* {
4243 off_t *a_bytesallocated;
4245 vfs_context_t a_context;
4248 struct vnode
*vp
= ap
->a_vp
;
4250 struct filefork
*fp
;
4252 off_t length
= ap
->a_length
;
4254 off_t moreBytesRequested
;
4255 off_t actualBytesAdded
;
4257 u_int32_t fileblocks
;
4258 int retval
, retval2
;
4259 u_int32_t blockHint
;
4260 u_int32_t extendFlags
; /* For call to ExtendFileC */
4261 struct hfsmount
*hfsmp
;
4262 kauth_cred_t cred
= vfs_context_ucred(ap
->a_context
);
4266 *(ap
->a_bytesallocated
) = 0;
4268 if (!vnode_isreg(vp
))
4270 if (length
< (off_t
)0)
4275 orig_ctime
= VTOC(vp
)->c_ctime
;
4277 nspace_snapshot_event(vp
, orig_ctime
, ap
->a_length
== 0 ? NAMESPACE_HANDLER_TRUNCATE_OP
|NAMESPACE_HANDLER_DELETE_OP
: NAMESPACE_HANDLER_TRUNCATE_OP
, NULL
);
4279 hfs_lock_truncate(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
4281 if ((retval
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
))) {
4289 fileblocks
= fp
->ff_blocks
;
4290 filebytes
= (off_t
)fileblocks
* (off_t
)vcb
->blockSize
;
4292 if ((ap
->a_flags
& ALLOCATEFROMVOL
) && (length
< filebytes
)) {
4297 /* Fill in the flags word for the call to Extend the file */
4299 extendFlags
= kEFNoClumpMask
;
4300 if (ap
->a_flags
& ALLOCATECONTIG
)
4301 extendFlags
|= kEFContigMask
;
4302 if (ap
->a_flags
& ALLOCATEALL
)
4303 extendFlags
|= kEFAllMask
;
4304 if (cred
&& suser(cred
, NULL
) != 0)
4305 extendFlags
|= kEFReserveMask
;
4306 if (hfs_virtualmetafile(cp
))
4307 extendFlags
|= kEFMetadataMask
;
4311 startingPEOF
= filebytes
;
4313 if (ap
->a_flags
& ALLOCATEFROMPEOF
)
4314 length
+= filebytes
;
4315 else if (ap
->a_flags
& ALLOCATEFROMVOL
)
4316 blockHint
= ap
->a_offset
/ VTOVCB(vp
)->blockSize
;
4318 /* If no changes are necesary, then we're done */
4319 if (filebytes
== length
)
4323 * Lengthen the size of the file. We must ensure that the
4324 * last byte of the file is allocated. Since the smallest
4325 * value of filebytes is 0, length will be at least 1.
4327 if (length
> filebytes
) {
4328 if (ISSET(extendFlags
, kEFAllMask
)
4329 && (hfs_freeblks(hfsmp
, ISSET(extendFlags
, kEFReserveMask
))
4330 < howmany(length
- filebytes
, hfsmp
->blockSize
))) {
4335 off_t total_bytes_added
= 0, orig_request_size
;
4337 orig_request_size
= moreBytesRequested
= length
- filebytes
;
4340 retval
= hfs_chkdq(cp
,
4341 (int64_t)(roundup(moreBytesRequested
, vcb
->blockSize
)),
4348 * Metadata zone checks.
4350 if (hfsmp
->hfs_flags
& HFS_METADATA_ZONE
) {
4352 * Allocate Journal and Quota files in metadata zone.
4354 if (hfs_virtualmetafile(cp
)) {
4355 blockHint
= hfsmp
->hfs_metazone_start
;
4356 } else if ((blockHint
>= hfsmp
->hfs_metazone_start
) &&
4357 (blockHint
<= hfsmp
->hfs_metazone_end
)) {
4359 * Move blockHint outside metadata zone.
4361 blockHint
= hfsmp
->hfs_metazone_end
+ 1;
4366 while ((length
> filebytes
) && (retval
== E_NONE
)) {
4367 off_t bytesRequested
;
4369 if (hfs_start_transaction(hfsmp
) != 0) {
4374 /* Protect extents b-tree and allocation bitmap */
4375 lockflags
= SFL_BITMAP
;
4376 if (overflow_extents(fp
))
4377 lockflags
|= SFL_EXTENTS
;
4378 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
4380 if (moreBytesRequested
>= HFS_BIGFILE_SIZE
) {
4381 bytesRequested
= HFS_BIGFILE_SIZE
;
4383 bytesRequested
= moreBytesRequested
;
4386 if (extendFlags
& kEFContigMask
) {
4387 // if we're on a sparse device, this will force it to do a
4388 // full scan to find the space needed.
4389 hfsmp
->hfs_flags
&= ~HFS_DID_CONTIG_SCAN
;
4392 retval
= MacToVFSError(ExtendFileC(vcb
,
4397 &actualBytesAdded
));
4399 if (retval
== E_NONE
) {
4400 *(ap
->a_bytesallocated
) += actualBytesAdded
;
4401 total_bytes_added
+= actualBytesAdded
;
4402 moreBytesRequested
-= actualBytesAdded
;
4403 if (blockHint
!= 0) {
4404 blockHint
+= actualBytesAdded
/ vcb
->blockSize
;
4407 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)vcb
->blockSize
;
4409 hfs_systemfile_unlock(hfsmp
, lockflags
);
4412 (void) hfs_update(vp
, 0);
4413 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
4416 hfs_end_transaction(hfsmp
);
4421 * if we get an error and no changes were made then exit
4422 * otherwise we must do the hfs_update to reflect the changes
4424 if (retval
&& (startingPEOF
== filebytes
))
4428 * Adjust actualBytesAdded to be allocation block aligned, not
4429 * clump size aligned.
4430 * NOTE: So what we are reporting does not affect reality
4431 * until the file is closed, when we truncate the file to allocation
4434 if (total_bytes_added
!= 0 && orig_request_size
< total_bytes_added
)
4435 *(ap
->a_bytesallocated
) =
4436 roundup(orig_request_size
, (off_t
)vcb
->blockSize
);
4438 } else { /* Shorten the size of the file */
4441 * N.B. At present, this code is never called. If and when we
4442 * do start using it, it looks like there might be slightly
4443 * strange semantics with the file size: it's possible for the
4444 * file size to *increase* e.g. if current file size is 5,
4445 * length is 1024 and filebytes is 4096, the file size will
4446 * end up being 1024 bytes. This isn't necessarily a problem
4447 * but it's not consistent with the code above which doesn't
4448 * change the file size.
4451 retval
= hfs_truncate(vp
, length
, 0, 0, ap
->a_context
);
4452 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)vcb
->blockSize
;
4455 * if we get an error and no changes were made then exit
4456 * otherwise we must do the hfs_update to reflect the changes
4458 if (retval
&& (startingPEOF
== filebytes
)) goto Err_Exit
;
4460 /* These are bytesreleased */
4461 (void) hfs_chkdq(cp
, (int64_t)-((startingPEOF
- filebytes
)), NOCRED
,0);
4464 if (fp
->ff_size
> filebytes
) {
4465 fp
->ff_size
= filebytes
;
4467 hfs_ubc_setsize(vp
, fp
->ff_size
, true);
4472 cp
->c_flag
|= C_MODIFIED
;
4473 cp
->c_touch_chgtime
= TRUE
;
4474 cp
->c_touch_modtime
= TRUE
;
4475 retval2
= hfs_update(vp
, 0);
4480 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
4487 * Pagein for HFS filesystem
4490 hfs_vnop_pagein(struct vnop_pagein_args
*ap
)
4492 struct vnop_pagein_args {
4495 vm_offset_t a_pl_offset,
4499 vfs_context_t a_context;
4505 struct filefork
*fp
;
4508 upl_page_info_t
*pl
;
4510 off_t page_needed_f_offset
;
4515 boolean_t truncate_lock_held
= FALSE
;
4516 boolean_t file_converted
= FALSE
;
4524 if ((error
= cp_handle_vnop(vp
, CP_READ_ACCESS
| CP_WRITE_ACCESS
, 0)) != 0) {
4526 * If we errored here, then this means that one of two things occurred:
4527 * 1. there was a problem with the decryption of the key.
4528 * 2. the device is locked and we are not allowed to access this particular file.
4530 * Either way, this means that we need to shut down this upl now. As long as
4531 * the pl pointer is NULL (meaning that we're supposed to create the UPL ourselves)
4532 * then we create a upl and immediately abort it.
4534 if (ap
->a_pl
== NULL
) {
4535 /* create the upl */
4536 ubc_create_upl (vp
, ap
->a_f_offset
, ap
->a_size
, &upl
, &pl
,
4537 UPL_UBC_PAGEIN
| UPL_RET_ONLY_ABSENT
);
4538 /* mark the range as needed so it doesn't immediately get discarded upon abort */
4539 ubc_upl_range_needed (upl
, ap
->a_pl_offset
/ PAGE_SIZE
, 1);
4541 /* Abort the range */
4542 ubc_upl_abort_range (upl
, 0, ap
->a_size
, UPL_ABORT_FREE_ON_EMPTY
| UPL_ABORT_ERROR
);
4548 #endif /* CONFIG_PROTECT */
4550 if (ap
->a_pl
!= NULL
) {
4552 * this can only happen for swap files now that
4553 * we're asking for V2 paging behavior...
4554 * so don't need to worry about decompression, or
4555 * keeping track of blocks read or taking the truncate lock
4557 error
= cluster_pagein(vp
, ap
->a_pl
, ap
->a_pl_offset
, ap
->a_f_offset
,
4558 ap
->a_size
, (off_t
)fp
->ff_size
, ap
->a_flags
);
4562 page_needed_f_offset
= ap
->a_f_offset
+ ap
->a_pl_offset
;
4566 * take truncate lock (shared/recursive) to guard against
4567 * zero-fill thru fsync interfering, but only for v2
4569 * the HFS_RECURSE_TRUNCLOCK arg indicates that we want the
4570 * lock shared and we are allowed to recurse 1 level if this thread already
4571 * owns the lock exclusively... this can legally occur
4572 * if we are doing a shrinking ftruncate against a file
4573 * that is mapped private, and the pages being truncated
4574 * do not currently exist in the cache... in that case
4575 * we will have to page-in the missing pages in order
4576 * to provide them to the private mapping... we must
4577 * also call hfs_unlock_truncate with a postive been_recursed
4578 * arg to indicate that if we have recursed, there is no need to drop
4579 * the lock. Allowing this simple recursion is necessary
4580 * in order to avoid a certain deadlock... since the ftruncate
4581 * already holds the truncate lock exclusively, if we try
4582 * to acquire it shared to protect the pagein path, we will
4585 * NOTE: The if () block below is a workaround in order to prevent a
4586 * VM deadlock. See rdar://7853471.
4588 * If we are in a forced unmount, then launchd will still have the
4589 * dyld_shared_cache file mapped as it is trying to reboot. If we
4590 * take the truncate lock here to service a page fault, then our
4591 * thread could deadlock with the forced-unmount. The forced unmount
4592 * thread will try to reclaim the dyld_shared_cache vnode, but since it's
4593 * marked C_DELETED, it will call ubc_setsize(0). As a result, the unmount
4594 * thread will think it needs to copy all of the data out of the file
4595 * and into a VM copy object. If we hold the cnode lock here, then that
4596 * VM operation will not be able to proceed, because we'll set a busy page
4597 * before attempting to grab the lock. Note that this isn't as simple as "don't
4598 * call ubc_setsize" because doing that would just shift the problem to the
4599 * ubc_msync done before the vnode is reclaimed.
4601 * So, if a forced unmount on this volume is in flight AND the cnode is
4602 * marked C_DELETED, then just go ahead and do the page in without taking
4603 * the lock (thus suspending pagein_v2 semantics temporarily). Since it's on a file
4604 * that is not going to be available on the next mount, this seems like a
4605 * OK solution from a correctness point of view, even though it is hacky.
4607 if (vfs_isforce(vnode_mount(vp
))) {
4608 if (cp
->c_flag
& C_DELETED
) {
4609 /* If we don't get it, then just go ahead and operate without the lock */
4610 truncate_lock_held
= hfs_try_trunclock(cp
, HFS_SHARED_LOCK
, HFS_LOCK_SKIP_IF_EXCLUSIVE
);
4614 hfs_lock_truncate(cp
, HFS_SHARED_LOCK
, HFS_LOCK_SKIP_IF_EXCLUSIVE
);
4615 truncate_lock_held
= TRUE
;
4618 kret
= ubc_create_upl(vp
, ap
->a_f_offset
, ap
->a_size
, &upl
, &pl
, UPL_UBC_PAGEIN
| UPL_RET_ONLY_ABSENT
);
4620 if ((kret
!= KERN_SUCCESS
) || (upl
== (upl_t
) NULL
)) {
4624 ubc_upl_range_needed(upl
, ap
->a_pl_offset
/ PAGE_SIZE
, 1);
4626 upl_size
= isize
= ap
->a_size
;
4629 * Scan from the back to find the last page in the UPL, so that we
4630 * aren't looking at a UPL that may have already been freed by the
4631 * preceding aborts/completions.
4633 for (pg_index
= ((isize
) / PAGE_SIZE
); pg_index
> 0;) {
4634 if (upl_page_present(pl
, --pg_index
))
4636 if (pg_index
== 0) {
4638 * no absent pages were found in the range specified
4639 * just abort the UPL to get rid of it and then we're done
4641 ubc_upl_abort_range(upl
, 0, isize
, UPL_ABORT_FREE_ON_EMPTY
);
4646 * initialize the offset variables before we touch the UPL.
4647 * f_offset is the position into the file, in bytes
4648 * offset is the position into the UPL, in bytes
4649 * pg_index is the pg# of the UPL we're operating on
4650 * isize is the offset into the UPL of the last page that is present.
4652 isize
= ((pg_index
+ 1) * PAGE_SIZE
);
4655 f_offset
= ap
->a_f_offset
;
4661 if ( !upl_page_present(pl
, pg_index
)) {
4663 * we asked for RET_ONLY_ABSENT, so it's possible
4664 * to get back empty slots in the UPL.
4665 * just skip over them
4667 f_offset
+= PAGE_SIZE
;
4668 offset
+= PAGE_SIZE
;
4675 * We know that we have at least one absent page.
4676 * Now checking to see how many in a row we have
4679 xsize
= isize
- PAGE_SIZE
;
4682 if ( !upl_page_present(pl
, pg_index
+ num_of_pages
))
4687 xsize
= num_of_pages
* PAGE_SIZE
;
4690 if (VNODE_IS_RSRC(vp
)) {
4691 /* allow pageins of the resource fork */
4693 int compressed
= hfs_file_is_compressed(VTOC(vp
), 1); /* 1 == don't take the cnode lock */
4697 if (truncate_lock_held
) {
4699 * can't hold the truncate lock when calling into the decmpfs layer
4700 * since it calls back into this layer... even though we're only
4701 * holding the lock in shared mode, and the re-entrant path only
4702 * takes the lock shared, we can deadlock if some other thread
4703 * tries to grab the lock exclusively in between.
4705 hfs_unlock_truncate(cp
, HFS_LOCK_SKIP_IF_EXCLUSIVE
);
4706 truncate_lock_held
= FALSE
;
4709 ap
->a_pl_offset
= offset
;
4710 ap
->a_f_offset
= f_offset
;
4713 error
= decmpfs_pagein_compressed(ap
, &compressed
, VTOCMP(vp
));
4715 * note that decpfs_pagein_compressed can change the state of
4716 * 'compressed'... it will set it to 0 if the file is no longer
4717 * compressed once the compression lock is successfully taken
4718 * i.e. we would block on that lock while the file is being inflated
4720 if (error
== 0 && vnode_isfastdevicecandidate(vp
)) {
4721 (void) hfs_addhotfile(vp
);
4725 /* successful page-in, update the access time */
4726 VTOC(vp
)->c_touch_acctime
= TRUE
;
4729 // compressed files are not traditional hot file candidates
4730 // but they may be for CF (which ignores the ff_bytesread
4733 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
) {
4734 fp
->ff_bytesread
= 0;
4736 } else if (error
== EAGAIN
) {
4738 * EAGAIN indicates someone else already holds the compression lock...
4739 * to avoid deadlocking, we'll abort this range of pages with an
4740 * indication that the pagein needs to be redriven
4742 ubc_upl_abort_range(upl
, (upl_offset_t
) offset
, xsize
, UPL_ABORT_FREE_ON_EMPTY
| UPL_ABORT_RESTART
);
4743 } else if (error
== ENOSPC
) {
4745 if (upl_size
== PAGE_SIZE
)
4746 panic("decmpfs_pagein_compressed: couldn't ubc_upl_map a single page\n");
4748 ubc_upl_abort_range(upl
, (upl_offset_t
) offset
, isize
, UPL_ABORT_FREE_ON_EMPTY
);
4750 ap
->a_size
= PAGE_SIZE
;
4752 ap
->a_pl_offset
= 0;
4753 ap
->a_f_offset
= page_needed_f_offset
;
4757 ubc_upl_abort(upl
, UPL_ABORT_FREE_ON_EMPTY
| UPL_ABORT_ERROR
);
4760 goto pagein_next_range
;
4764 * Set file_converted only if the file became decompressed while we were
4765 * paging in. If it were still compressed, we would re-start the loop using the goto
4766 * in the above block. This avoid us overloading truncate_lock_held as our retry_pagein
4767 * condition below, since we could have avoided taking the truncate lock to prevent
4768 * a deadlock in the force unmount case.
4770 file_converted
= TRUE
;
4773 if (file_converted
== TRUE
) {
4775 * the file was converted back to a regular file after we first saw it as compressed
4776 * we need to abort the upl, retake the truncate lock, recreate the UPL and start over
4777 * reset a_size so that we consider what remains of the original request
4778 * and null out a_upl and a_pl_offset.
4780 * We should only be able to get into this block if the decmpfs_pagein_compressed
4781 * successfully decompressed the range in question for this file.
4783 ubc_upl_abort_range(upl
, (upl_offset_t
) offset
, isize
, UPL_ABORT_FREE_ON_EMPTY
);
4787 ap
->a_pl_offset
= 0;
4789 /* Reset file_converted back to false so that we don't infinite-loop. */
4790 file_converted
= FALSE
;
4795 error
= cluster_pagein(vp
, upl
, offset
, f_offset
, xsize
, (off_t
)fp
->ff_size
, ap
->a_flags
);
4798 * Keep track of blocks read.
4800 if ( !vnode_isswap(vp
) && VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
&& error
== 0) {
4802 int took_cnode_lock
= 0;
4804 if (ap
->a_f_offset
== 0 && fp
->ff_size
< PAGE_SIZE
)
4805 bytesread
= fp
->ff_size
;
4809 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
4810 if ((fp
->ff_bytesread
+ bytesread
) > 0x00000000ffffffff && cp
->c_lockowner
!= current_thread()) {
4811 hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
4812 took_cnode_lock
= 1;
4815 * If this file hasn't been seen since the start of
4816 * the current sampling period then start over.
4818 if (cp
->c_atime
< VTOHFS(vp
)->hfc_timebase
) {
4821 fp
->ff_bytesread
= bytesread
;
4823 cp
->c_atime
= tv
.tv_sec
;
4825 fp
->ff_bytesread
+= bytesread
;
4827 cp
->c_touch_acctime
= TRUE
;
4829 if (vnode_isfastdevicecandidate(vp
)) {
4830 (void) hfs_addhotfile(vp
);
4832 if (took_cnode_lock
)
4839 pg_index
+= num_of_pages
;
4845 if (truncate_lock_held
== TRUE
) {
4846 /* Note 1 is passed to hfs_unlock_truncate in been_recursed argument */
4847 hfs_unlock_truncate(cp
, HFS_LOCK_SKIP_IF_EXCLUSIVE
);
4854 * Pageout for HFS filesystem.
4857 hfs_vnop_pageout(struct vnop_pageout_args
*ap
)
4859 struct vnop_pageout_args {
4862 vm_offset_t a_pl_offset,
4866 vfs_context_t a_context;
4870 vnode_t vp
= ap
->a_vp
;
4872 struct filefork
*fp
;
4876 upl_page_info_t
* pl
= NULL
;
4877 vm_offset_t a_pl_offset
;
4879 int is_pageoutv2
= 0;
4885 a_flags
= ap
->a_flags
;
4886 a_pl_offset
= ap
->a_pl_offset
;
4889 * we can tell if we're getting the new or old behavior from the UPL
4891 if ((upl
= ap
->a_pl
) == NULL
) {
4896 * we're in control of any UPL we commit
4897 * make sure someone hasn't accidentally passed in UPL_NOCOMMIT
4899 a_flags
&= ~UPL_NOCOMMIT
;
4903 * For V2 semantics, we want to take the cnode truncate lock
4904 * shared to guard against the file size changing via zero-filling.
4906 * However, we have to be careful because we may be invoked
4907 * via the ubc_msync path to write out dirty mmap'd pages
4908 * in response to a lock event on a content-protected
4909 * filesystem (e.g. to write out class A files).
4910 * As a result, we want to take the truncate lock 'SHARED' with
4911 * the mini-recursion locktype so that we don't deadlock/panic
4912 * because we may be already holding the truncate lock exclusive to force any other
4913 * IOs to have blocked behind us.
4915 hfs_lock_truncate(cp
, HFS_SHARED_LOCK
, HFS_LOCK_SKIP_IF_EXCLUSIVE
);
4917 if (a_flags
& UPL_MSYNC
) {
4918 request_flags
= UPL_UBC_MSYNC
| UPL_RET_ONLY_DIRTY
;
4921 request_flags
= UPL_UBC_PAGEOUT
| UPL_RET_ONLY_DIRTY
;
4924 kret
= ubc_create_upl(vp
, ap
->a_f_offset
, ap
->a_size
, &upl
, &pl
, request_flags
);
4926 if ((kret
!= KERN_SUCCESS
) || (upl
== (upl_t
) NULL
)) {
4932 * from this point forward upl points at the UPL we're working with
4933 * it was either passed in or we succesfully created it
4937 * Figure out where the file ends, for pageout purposes. If
4938 * ff_new_size > ff_size, then we're in the middle of extending the
4939 * file via a write, so it is safe (and necessary) that we be able
4940 * to pageout up to that point.
4942 filesize
= fp
->ff_size
;
4943 if (fp
->ff_new_size
> filesize
)
4944 filesize
= fp
->ff_new_size
;
4947 * Now that HFS is opting into VFC_VFSVNOP_PAGEOUTV2, we may need to operate on our own
4948 * UPL instead of relying on the UPL passed into us. We go ahead and do that here,
4949 * scanning for dirty ranges. We'll issue our own N cluster_pageout calls, for
4950 * N dirty ranges in the UPL. Note that this is almost a direct copy of the
4951 * logic in vnode_pageout except that we need to do it after grabbing the truncate
4952 * lock in HFS so that we don't lock invert ourselves.
4954 * Note that we can still get into this function on behalf of the default pager with
4955 * non-V2 behavior (swapfiles). However in that case, we did not grab locks above
4956 * since fsync and other writing threads will grab the locks, then mark the
4957 * relevant pages as busy. But the pageout codepath marks the pages as busy,
4958 * and THEN would attempt to grab the truncate lock, which would result in deadlock. So
4959 * we do not try to grab anything for the pre-V2 case, which should only be accessed
4960 * by the paging/VM system.
4972 f_offset
= ap
->a_f_offset
;
4975 * Scan from the back to find the last page in the UPL, so that we
4976 * aren't looking at a UPL that may have already been freed by the
4977 * preceding aborts/completions.
4979 for (pg_index
= ((isize
) / PAGE_SIZE
); pg_index
> 0;) {
4980 if (upl_page_present(pl
, --pg_index
))
4982 if (pg_index
== 0) {
4983 ubc_upl_abort_range(upl
, 0, isize
, UPL_ABORT_FREE_ON_EMPTY
);
4989 * initialize the offset variables before we touch the UPL.
4990 * a_f_offset is the position into the file, in bytes
4991 * offset is the position into the UPL, in bytes
4992 * pg_index is the pg# of the UPL we're operating on.
4993 * isize is the offset into the UPL of the last non-clean page.
4995 isize
= ((pg_index
+ 1) * PAGE_SIZE
);
5004 if ( !upl_page_present(pl
, pg_index
)) {
5006 * we asked for RET_ONLY_DIRTY, so it's possible
5007 * to get back empty slots in the UPL.
5008 * just skip over them
5010 f_offset
+= PAGE_SIZE
;
5011 offset
+= PAGE_SIZE
;
5017 if ( !upl_dirty_page(pl
, pg_index
)) {
5018 panic ("hfs_vnop_pageout: unforeseen clean page @ index %d for UPL %p\n", pg_index
, upl
);
5022 * We know that we have at least one dirty page.
5023 * Now checking to see how many in a row we have
5026 xsize
= isize
- PAGE_SIZE
;
5029 if ( !upl_dirty_page(pl
, pg_index
+ num_of_pages
))
5034 xsize
= num_of_pages
* PAGE_SIZE
;
5036 if ((error
= cluster_pageout(vp
, upl
, offset
, f_offset
,
5037 xsize
, filesize
, a_flags
))) {
5044 pg_index
+= num_of_pages
;
5046 /* capture errnos bubbled out of cluster_pageout if they occurred */
5047 if (error_ret
!= 0) {
5050 } /* end block for v2 pageout behavior */
5053 * just call cluster_pageout for old pre-v2 behavior
5055 retval
= cluster_pageout(vp
, upl
, a_pl_offset
, ap
->a_f_offset
,
5056 ap
->a_size
, filesize
, a_flags
);
5060 * If data was written, update the modification time of the file
5061 * but only if it's mapped writable; we will have touched the
5062 * modifcation time for direct writes.
5064 if (retval
== 0 && (ubc_is_mapped_writable(vp
)
5065 || ISSET(cp
->c_flag
, C_MIGHT_BE_DIRTY_FROM_MAPPING
))) {
5066 hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
5068 // Check again with lock
5069 bool mapped_writable
= ubc_is_mapped_writable(vp
);
5071 || ISSET(cp
->c_flag
, C_MIGHT_BE_DIRTY_FROM_MAPPING
)) {
5072 cp
->c_touch_modtime
= TRUE
;
5073 cp
->c_touch_chgtime
= TRUE
;
5076 * We only need to increment the generation counter if
5077 * it's currently mapped writable because we incremented
5078 * the counter in hfs_vnop_mnomap.
5080 if (mapped_writable
)
5081 hfs_incr_gencount(VTOC(vp
));
5084 * If setuid or setgid bits are set and this process is
5085 * not the superuser then clear the setuid and setgid bits
5086 * as a precaution against tampering.
5088 if ((cp
->c_mode
& (S_ISUID
| S_ISGID
)) &&
5089 (vfs_context_suser(ap
->a_context
) != 0)) {
5090 cp
->c_mode
&= ~(S_ISUID
| S_ISGID
);
5100 * Release the truncate lock. Note that because
5101 * we may have taken the lock recursively by
5102 * being invoked via ubc_msync due to lockdown,
5103 * we should release it recursively, too.
5105 hfs_unlock_truncate(cp
, HFS_LOCK_SKIP_IF_EXCLUSIVE
);
5111 * Intercept B-Tree node writes to unswap them if necessary.
5114 hfs_vnop_bwrite(struct vnop_bwrite_args
*ap
)
5117 register struct buf
*bp
= ap
->a_bp
;
5118 register struct vnode
*vp
= buf_vnode(bp
);
5119 BlockDescriptor block
;
5121 /* Trap B-Tree writes */
5122 if ((VTOC(vp
)->c_fileid
== kHFSExtentsFileID
) ||
5123 (VTOC(vp
)->c_fileid
== kHFSCatalogFileID
) ||
5124 (VTOC(vp
)->c_fileid
== kHFSAttributesFileID
) ||
5125 (vp
== VTOHFS(vp
)->hfc_filevp
)) {
5128 * Swap and validate the node if it is in native byte order.
5129 * This is always be true on big endian, so we always validate
5130 * before writing here. On little endian, the node typically has
5131 * been swapped and validated when it was written to the journal,
5132 * so we won't do anything here.
5134 if (((u_int16_t
*)((char *)buf_dataptr(bp
) + buf_count(bp
) - 2))[0] == 0x000e) {
5135 /* Prepare the block pointer */
5136 block
.blockHeader
= bp
;
5137 block
.buffer
= (char *)buf_dataptr(bp
);
5138 block
.blockNum
= buf_lblkno(bp
);
5139 /* not found in cache ==> came from disk */
5140 block
.blockReadFromDisk
= (buf_fromcache(bp
) == 0);
5141 block
.blockSize
= buf_count(bp
);
5143 /* Endian un-swap B-Tree node */
5144 retval
= hfs_swap_BTNode (&block
, vp
, kSwapBTNodeHostToBig
, false);
5146 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
5150 /* This buffer shouldn't be locked anymore but if it is clear it */
5151 if ((buf_flags(bp
) & B_LOCKED
)) {
5153 if (VTOHFS(vp
)->jnl
) {
5154 panic("hfs: CLEARING the lock bit on bp %p\n", bp
);
5156 buf_clearflags(bp
, B_LOCKED
);
5158 retval
= vn_bwrite (ap
);
5165 hfs_pin_block_range(struct hfsmount
*hfsmp
, int pin_state
, uint32_t start_block
, uint32_t nblocks
)
5171 memset(&pin
, 0, sizeof(pin
));
5172 pin
.cp_extent
.offset
= ((uint64_t)start_block
) * HFSTOVCB(hfsmp
)->blockSize
;
5173 pin
.cp_extent
.length
= ((uint64_t)nblocks
) * HFSTOVCB(hfsmp
)->blockSize
;
5174 switch (pin_state
) {
5176 ioc
= _DKIOCCSPINEXTENT
;
5177 pin
.cp_flags
= _DKIOCCSPINTOFASTMEDIA
;
5179 case HFS_PIN_IT
| HFS_TEMP_PIN
:
5180 ioc
= _DKIOCCSPINEXTENT
;
5181 pin
.cp_flags
= _DKIOCCSPINTOFASTMEDIA
| _DKIOCCSTEMPORARYPIN
;
5183 case HFS_PIN_IT
| HFS_DATALESS_PIN
:
5184 ioc
= _DKIOCCSPINEXTENT
;
5185 pin
.cp_flags
= _DKIOCCSPINTOFASTMEDIA
| _DKIOCCSPINFORSWAPFILE
;
5188 ioc
= _DKIOCCSUNPINEXTENT
;
5191 case HFS_UNPIN_IT
| HFS_EVICT_PIN
:
5192 ioc
= _DKIOCCSPINEXTENT
;
5193 pin
.cp_flags
= _DKIOCCSPINTOSLOWMEDIA
;
5198 err
= VNOP_IOCTL(hfsmp
->hfs_devvp
, ioc
, (caddr_t
)&pin
, 0, vfs_context_kernel());
5203 // The cnode lock should already be held on entry to this function
5206 hfs_pin_vnode(struct hfsmount
*hfsmp
, struct vnode
*vp
, int pin_state
, uint32_t *num_blocks_pinned
)
5208 struct filefork
*fp
= VTOF(vp
);
5209 int i
, err
=0, need_put
=0;
5210 struct vnode
*rsrc_vp
=NULL
;
5211 uint32_t npinned
= 0;
5214 if (num_blocks_pinned
) {
5215 *num_blocks_pinned
= 0;
5218 if (vnode_vtype(vp
) != VREG
) {
5219 /* Not allowed to pin directories or symlinks */
5220 printf("hfs: can't pin vnode of type %d\n", vnode_vtype(vp
));
5224 if (fp
->ff_unallocblocks
) {
5225 printf("hfs: can't pin a vnode w/unalloced blocks (%d)\n", fp
->ff_unallocblocks
);
5230 * It is possible that if the caller unlocked/re-locked the cnode after checking
5231 * for C_NOEXISTS|C_DELETED that the file could have been deleted while the
5232 * cnode was unlocked. So check the condition again and return ENOENT so that
5233 * the caller knows why we failed to pin the vnode.
5235 if (VTOC(vp
)->c_flag
& (C_NOEXISTS
|C_DELETED
)) {
5236 // makes no sense to pin something that's pending deletion
5240 if (fp
->ff_blocks
== 0 && (VTOC(vp
)->c_bsdflags
& UF_COMPRESSED
)) {
5241 if (!VNODE_IS_RSRC(vp
) && hfs_vgetrsrc(hfsmp
, vp
, &rsrc_vp
) == 0) {
5242 //printf("hfs: fileid %d resource fork nblocks: %d / size: %lld\n", VTOC(vp)->c_fileid,
5243 // VTOC(rsrc_vp)->c_rsrcfork->ff_blocks,VTOC(rsrc_vp)->c_rsrcfork->ff_size);
5245 fp
= VTOC(rsrc_vp
)->c_rsrcfork
;
5249 if (fp
->ff_blocks
== 0) {
5252 // use a distinct error code for a compressed file that has no resource fork;
5253 // we return EALREADY to indicate that the data is already probably hot file
5254 // cached because it's in an EA and the attributes btree is on the ssd
5264 for (i
= 0; i
< kHFSPlusExtentDensity
; i
++) {
5265 if (fp
->ff_extents
[i
].startBlock
== 0) {
5269 err
= hfs_pin_block_range(hfsmp
, pin_state
, fp
->ff_extents
[i
].startBlock
, fp
->ff_extents
[i
].blockCount
);
5273 npinned
+= fp
->ff_extents
[i
].blockCount
;
5277 if (err
|| npinned
== 0) {
5281 if (fp
->ff_extents
[kHFSPlusExtentDensity
-1].startBlock
) {
5283 uint8_t forktype
= 0;
5285 if (fp
== VTOC(vp
)->c_rsrcfork
) {
5289 * The file could have overflow extents, better pin them.
5291 * We assume that since we are holding the cnode lock for this cnode,
5292 * the files extents cannot be manipulated, but the tree could, so we
5293 * need to ensure that it doesn't change behind our back as we iterate it.
5295 int lockflags
= hfs_systemfile_lock (hfsmp
, SFL_EXTENTS
, HFS_SHARED_LOCK
);
5296 err
= hfs_pin_overflow_extents(hfsmp
, VTOC(vp
)->c_fileid
, forktype
, &pblocks
);
5297 hfs_systemfile_unlock (hfsmp
, lockflags
);
5306 if (num_blocks_pinned
) {
5307 *num_blocks_pinned
= npinned
;
5310 if (need_put
&& rsrc_vp
) {
5312 // have to unlock the cnode since it's shared between the
5313 // resource fork vnode and the data fork vnode (and the
5314 // vnode_put() may need to re-acquire the cnode lock to
5315 // reclaim the resource fork vnode)
5317 hfs_unlock(VTOC(vp
));
5319 hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
5326 * Relocate a file to a new location on disk
5327 * cnode must be locked on entry
5329 * Relocation occurs by cloning the file's data from its
5330 * current set of blocks to a new set of blocks. During
5331 * the relocation all of the blocks (old and new) are
5332 * owned by the file.
5339 * ----------------- -----------------
5340 * |///////////////| | | STEP 1 (acquire new blocks)
5341 * ----------------- -----------------
5344 * ----------------- -----------------
5345 * |///////////////| |///////////////| STEP 2 (clone data)
5346 * ----------------- -----------------
5350 * |///////////////| STEP 3 (head truncate blocks)
5354 * During steps 2 and 3 page-outs to file offsets less
5355 * than or equal to N are suspended.
5357 * During step 3 page-ins to the file get suspended.
5360 hfs_relocate(struct vnode
*vp
, u_int32_t blockHint
, kauth_cred_t cred
,
5364 struct filefork
*fp
;
5365 struct hfsmount
*hfsmp
;
5370 u_int32_t nextallocsave
;
5371 daddr64_t sector_a
, sector_b
;
5376 int took_trunc_lock
= 0;
5378 enum vtype vnodetype
;
5380 vnodetype
= vnode_vtype(vp
);
5381 if (vnodetype
!= VREG
) {
5382 /* Not allowed to move symlinks. */
5387 if (hfsmp
->hfs_flags
& HFS_FRAGMENTED_FREESPACE
) {
5393 if (fp
->ff_unallocblocks
)
5398 * <rdar://problem/9118426>
5399 * Disable HFS file relocation on content-protected filesystems
5401 if (cp_fs_protected (hfsmp
->hfs_mp
)) {
5405 /* If it's an SSD, also disable HFS relocation */
5406 if (hfsmp
->hfs_flags
& HFS_SSD
) {
5411 blksize
= hfsmp
->blockSize
;
5413 blockHint
= hfsmp
->nextAllocation
;
5415 if (fp
->ff_size
> 0x7fffffff) {
5419 if (!vnode_issystem(vp
) && (vnodetype
!= VLNK
)) {
5421 hfs_lock_truncate(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
5422 /* Force lock since callers expects lock to be held. */
5423 if ((retval
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
))) {
5424 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
5427 /* No need to continue if file was removed. */
5428 if (cp
->c_flag
& C_NOEXISTS
) {
5429 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
5432 took_trunc_lock
= 1;
5434 headblks
= fp
->ff_blocks
;
5435 datablks
= howmany(fp
->ff_size
, blksize
);
5436 growsize
= datablks
* blksize
;
5437 eflags
= kEFContigMask
| kEFAllMask
| kEFNoClumpMask
;
5438 if (blockHint
>= hfsmp
->hfs_metazone_start
&&
5439 blockHint
<= hfsmp
->hfs_metazone_end
)
5440 eflags
|= kEFMetadataMask
;
5442 if (hfs_start_transaction(hfsmp
) != 0) {
5443 if (took_trunc_lock
)
5444 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
5449 * Protect the extents b-tree and the allocation bitmap
5450 * during MapFileBlockC and ExtendFileC operations.
5452 lockflags
= SFL_BITMAP
;
5453 if (overflow_extents(fp
))
5454 lockflags
|= SFL_EXTENTS
;
5455 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
5457 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, 1, growsize
- 1, §or_a
, NULL
);
5459 retval
= MacToVFSError(retval
);
5464 * STEP 1 - acquire new allocation blocks.
5466 nextallocsave
= hfsmp
->nextAllocation
;
5467 retval
= ExtendFileC(hfsmp
, (FCB
*)fp
, growsize
, blockHint
, eflags
, &newbytes
);
5468 if (eflags
& kEFMetadataMask
) {
5469 hfs_lock_mount(hfsmp
);
5470 HFS_UPDATE_NEXT_ALLOCATION(hfsmp
, nextallocsave
);
5471 MarkVCBDirty(hfsmp
);
5472 hfs_unlock_mount(hfsmp
);
5475 retval
= MacToVFSError(retval
);
5477 cp
->c_flag
|= C_MODIFIED
;
5478 if (newbytes
< growsize
) {
5481 } else if (fp
->ff_blocks
< (headblks
+ datablks
)) {
5482 printf("hfs_relocate: allocation failed id=%u, vol=%s\n", cp
->c_cnid
, hfsmp
->vcbVN
);
5487 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, 1, growsize
, §or_b
, NULL
);
5489 retval
= MacToVFSError(retval
);
5490 } else if ((sector_a
+ 1) == sector_b
) {
5493 } else if ((eflags
& kEFMetadataMask
) &&
5494 ((((u_int64_t
)sector_b
* hfsmp
->hfs_logical_block_size
) / blksize
) >
5495 hfsmp
->hfs_metazone_end
)) {
5497 const char * filestr
;
5498 char emptystr
= '\0';
5500 if (cp
->c_desc
.cd_nameptr
!= NULL
) {
5501 filestr
= (const char *)&cp
->c_desc
.cd_nameptr
[0];
5502 } else if (vnode_name(vp
) != NULL
) {
5503 filestr
= vnode_name(vp
);
5505 filestr
= &emptystr
;
5512 /* Done with system locks and journal for now. */
5513 hfs_systemfile_unlock(hfsmp
, lockflags
);
5515 hfs_end_transaction(hfsmp
);
5520 * Check to see if failure is due to excessive fragmentation.
5522 if ((retval
== ENOSPC
) &&
5523 (hfs_freeblks(hfsmp
, 0) > (datablks
* 2))) {
5524 hfsmp
->hfs_flags
|= HFS_FRAGMENTED_FREESPACE
;
5529 * STEP 2 - clone file data into the new allocation blocks.
5532 if (vnodetype
== VLNK
)
5534 else if (vnode_issystem(vp
))
5535 retval
= hfs_clonesysfile(vp
, headblks
, datablks
, blksize
, cred
, p
);
5537 retval
= hfs_clonefile(vp
, headblks
, datablks
, blksize
);
5539 /* Start transaction for step 3 or for a restore. */
5540 if (hfs_start_transaction(hfsmp
) != 0) {
5549 * STEP 3 - switch to cloned data and remove old blocks.
5551 lockflags
= SFL_BITMAP
;
5552 if (overflow_extents(fp
))
5553 lockflags
|= SFL_EXTENTS
;
5554 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
5556 retval
= HeadTruncateFile(hfsmp
, (FCB
*)fp
, headblks
);
5558 hfs_systemfile_unlock(hfsmp
, lockflags
);
5563 if (took_trunc_lock
)
5564 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
5567 hfs_systemfile_unlock(hfsmp
, lockflags
);
5571 /* Push cnode's new extent data to disk. */
5576 if (cp
->c_cnid
< kHFSFirstUserCatalogNodeID
)
5577 (void) hfs_flushvolumeheader(hfsmp
, HFS_FVH_WAIT
| HFS_FVH_WRITE_ALT
);
5579 (void) hfs_flushvolumeheader(hfsmp
, 0);
5583 hfs_end_transaction(hfsmp
);
5588 if (fp
->ff_blocks
== headblks
) {
5589 if (took_trunc_lock
)
5590 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
5594 * Give back any newly allocated space.
5596 if (lockflags
== 0) {
5597 lockflags
= SFL_BITMAP
;
5598 if (overflow_extents(fp
))
5599 lockflags
|= SFL_EXTENTS
;
5600 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
5603 (void) TruncateFileC(hfsmp
, (FCB
*)fp
, fp
->ff_size
, 0, FORK_IS_RSRC(fp
),
5604 FTOC(fp
)->c_fileid
, false);
5606 hfs_systemfile_unlock(hfsmp
, lockflags
);
5609 if (took_trunc_lock
)
5610 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
5616 * Clone a file's data within the file.
5620 hfs_clonefile(struct vnode
*vp
, int blkstart
, int blkcnt
, int blksize
)
5631 writebase
= blkstart
* blksize
;
5632 copysize
= blkcnt
* blksize
;
5633 iosize
= bufsize
= MIN(copysize
, 128 * 1024);
5636 hfs_unlock(VTOC(vp
));
5639 if ((error
= cp_handle_vnop(vp
, CP_WRITE_ACCESS
, 0)) != 0) {
5640 hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
5643 #endif /* CONFIG_PROTECT */
5645 bufp
= hfs_malloc(bufsize
);
5647 auio
= uio_create(1, 0, UIO_SYSSPACE
, UIO_READ
);
5649 while (offset
< copysize
) {
5650 iosize
= MIN(copysize
- offset
, iosize
);
5652 uio_reset(auio
, offset
, UIO_SYSSPACE
, UIO_READ
);
5653 uio_addiov(auio
, (uintptr_t)bufp
, iosize
);
5655 error
= cluster_read(vp
, auio
, copysize
, IO_NOCACHE
);
5657 printf("hfs_clonefile: cluster_read failed - %d\n", error
);
5660 if (uio_resid(auio
) != 0) {
5661 printf("hfs_clonefile: cluster_read: uio_resid = %lld\n", (int64_t)uio_resid(auio
));
5666 uio_reset(auio
, writebase
+ offset
, UIO_SYSSPACE
, UIO_WRITE
);
5667 uio_addiov(auio
, (uintptr_t)bufp
, iosize
);
5669 error
= cluster_write(vp
, auio
, writebase
+ offset
,
5670 writebase
+ offset
+ iosize
,
5671 uio_offset(auio
), 0, IO_NOCACHE
| IO_SYNC
);
5673 printf("hfs_clonefile: cluster_write failed - %d\n", error
);
5676 if (uio_resid(auio
) != 0) {
5677 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
5685 if ((blksize
& PAGE_MASK
)) {
5687 * since the copy may not have started on a PAGE
5688 * boundary (or may not have ended on one), we
5689 * may have pages left in the cache since NOCACHE
5690 * will let partially written pages linger...
5691 * lets just flush the entire range to make sure
5692 * we don't have any pages left that are beyond
5693 * (or intersect) the real LEOF of this file
5695 ubc_msync(vp
, writebase
, writebase
+ offset
, NULL
, UBC_INVALIDATE
| UBC_PUSHDIRTY
);
5698 * No need to call ubc_msync or hfs_invalbuf
5699 * since the file was copied using IO_NOCACHE and
5700 * the copy was done starting and ending on a page
5701 * boundary in the file.
5704 hfs_free(bufp
, bufsize
);
5706 hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
5711 * Clone a system (metadata) file.
5715 hfs_clonesysfile(struct vnode
*vp
, int blkstart
, int blkcnt
, int blksize
,
5716 kauth_cred_t cred
, struct proc
*p
)
5722 struct buf
*bp
= NULL
;
5725 daddr64_t start_blk
;
5732 iosize
= GetLogicalBlockSize(vp
);
5733 bufsize
= MIN(blkcnt
* blksize
, 1024 * 1024) & ~(iosize
- 1);
5734 breadcnt
= bufsize
/ iosize
;
5736 bufp
= hfs_malloc(bufsize
);
5738 start_blk
= ((daddr64_t
)blkstart
* blksize
) / iosize
;
5739 last_blk
= ((daddr64_t
)blkcnt
* blksize
) / iosize
;
5742 while (blkno
< last_blk
) {
5744 * Read up to a megabyte
5747 for (i
= 0, blk
= blkno
; (i
< breadcnt
) && (blk
< last_blk
); ++i
, ++blk
) {
5748 error
= (int)buf_meta_bread(vp
, blk
, iosize
, cred
, &bp
);
5750 printf("hfs_clonesysfile: meta_bread error %d\n", error
);
5753 if (buf_count(bp
) != iosize
) {
5754 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp
));
5757 bcopy((char *)buf_dataptr(bp
), offset
, iosize
);
5759 buf_markinvalid(bp
);
5767 * Write up to a megabyte
5770 for (i
= 0; (i
< breadcnt
) && (blkno
< last_blk
); ++i
, ++blkno
) {
5771 bp
= buf_getblk(vp
, start_blk
+ blkno
, iosize
, 0, 0, BLK_META
);
5773 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk
+ blkno
);
5777 bcopy(offset
, (char *)buf_dataptr(bp
), iosize
);
5778 error
= (int)buf_bwrite(bp
);
5790 hfs_free(bufp
, bufsize
);
5792 error
= hfs_fsync(vp
, MNT_WAIT
, 0, p
);
5797 errno_t
hfs_flush_invalid_ranges(vnode_t vp
)
5799 cnode_t
*cp
= VTOC(vp
);
5801 hfs_assert(cp
->c_lockowner
== current_thread());
5802 hfs_assert(cp
->c_truncatelockowner
== current_thread());
5804 if (!ISSET(cp
->c_flag
, C_ZFWANTSYNC
) && !cp
->c_zftimeout
)
5807 filefork_t
*fp
= VTOF(vp
);
5810 * We can't hold the cnode lock whilst we call cluster_write so we
5811 * need to copy the extents into a local buffer.
5816 } exts_buf
[max_exts
]; // 256 bytes
5817 struct ext
*exts
= exts_buf
;
5821 struct rl_entry
*r
= TAILQ_FIRST(&fp
->ff_invalidranges
);
5824 /* If we have more than can fit in our stack buffer, switch
5825 to a heap buffer. */
5826 if (exts
== exts_buf
&& ext_count
== max_exts
) {
5828 exts
= hfs_malloc(sizeof(struct ext
) * max_exts
);
5829 memcpy(exts
, exts_buf
, ext_count
* sizeof(struct ext
));
5832 struct rl_entry
*next
= TAILQ_NEXT(r
, rl_link
);
5834 exts
[ext_count
++] = (struct ext
){ r
->rl_start
, r
->rl_end
};
5836 if (!next
|| (ext_count
== max_exts
&& exts
!= exts_buf
)) {
5838 for (int i
= 0; i
< ext_count
; ++i
) {
5839 ret
= cluster_write(vp
, NULL
, fp
->ff_size
, exts
[i
].end
+ 1,
5841 IO_HEADZEROFILL
| IO_NOZERODIRTY
| IO_NOCACHE
);
5843 hfs_lock_always(cp
, HFS_EXCLUSIVE_LOCK
);
5849 hfs_lock_always(cp
, HFS_EXCLUSIVE_LOCK
);
5853 /* Push any existing clusters which should clean up our invalid
5854 ranges as they go through hfs_vnop_blockmap. */
5855 cluster_push(vp
, 0);
5857 hfs_lock_always(cp
, HFS_EXCLUSIVE_LOCK
);
5860 * Get back to where we were (given we dropped the lock).
5861 * This shouldn't be many because we pushed above.
5863 TAILQ_FOREACH(r
, &fp
->ff_invalidranges
, rl_link
) {
5864 if (r
->rl_end
> exts
[ext_count
- 1].end
)
5877 if (exts
!= exts_buf
)
5878 hfs_free(exts
, sizeof(struct ext
) * max_exts
);