2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* @(#)hfs_readwrite.c 1.0
30 * (c) 1998-2001 Apple Inc. All Rights Reserved
32 * hfs_readwrite.c -- vnode operations to deal with reading and writing files.
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/kernel.h>
39 #include <sys/fcntl.h>
43 #include <sys/kauth.h>
44 #include <sys/vnode.h>
46 #include <sys/vfs_context.h>
48 #include <sys/sysctl.h>
49 #include <sys/fsctl.h>
51 #include <sys/fsevents.h>
52 #include <uuid/uuid.h>
54 #include <libkern/OSDebug.h>
56 #include <miscfs/specfs/specdev.h>
60 #include <vm/vm_pageout.h>
61 #include <vm/vm_kern.h>
63 #include <IOKit/IOBSD.h>
65 #include <sys/kdebug.h>
68 #include "hfs_attrlist.h"
69 #include "hfs_endian.h"
70 #include "hfs_fsctl.h"
71 #include "hfs_quota.h"
72 #include "FileMgrInternal.h"
73 #include "BTreesInternal.h"
74 #include "hfs_cnode.h"
77 #if HFS_CONFIG_KEY_ROLL
78 #include "hfs_key_roll.h"
81 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
84 MAXHFSFILESIZE
= 0x7FFFFFFF /* this needs to go in the mount structure */
87 /* from bsd/hfs/hfs_vfsops.c */
88 extern int hfs_vfs_vget (struct mount
*mp
, ino64_t ino
, struct vnode
**vpp
, vfs_context_t context
);
90 /* from hfs_hotfiles.c */
91 extern int hfs_pin_overflow_extents (struct hfsmount
*hfsmp
, uint32_t fileid
,
92 uint8_t forktype
, uint32_t *pinned
);
94 static int hfs_clonefile(struct vnode
*, int, int, int);
95 static int hfs_clonesysfile(struct vnode
*, int, int, int, kauth_cred_t
, struct proc
*);
96 static int do_hfs_truncate(struct vnode
*vp
, off_t length
, int flags
, int skip
, vfs_context_t context
);
100 * Read data from a file.
103 hfs_vnop_read(struct vnop_read_args
*ap
)
106 struct vnop_read_args {
107 struct vnodeop_desc *a_desc;
111 vfs_context_t a_context;
115 uio_t uio
= ap
->a_uio
;
116 struct vnode
*vp
= ap
->a_vp
;
119 struct hfsmount
*hfsmp
;
122 off_t start_resid
= uio_resid(uio
);
123 off_t offset
= uio_offset(uio
);
125 int took_truncate_lock
= 0;
127 int throttled_count
= 0;
129 /* Preflight checks */
130 if (!vnode_isreg(vp
)) {
131 /* can only read regular files */
137 if (start_resid
== 0)
138 return (0); /* Nothing left to do */
140 return (EINVAL
); /* cant read from a negative offset */
143 if ((ap
->a_ioflag
& (IO_SKIP_ENCRYPTION
|IO_SYSCALL_DISPATCH
)) ==
144 (IO_SKIP_ENCRYPTION
|IO_SYSCALL_DISPATCH
)) {
145 /* Don't allow unencrypted io request from user space */
151 if (VNODE_IS_RSRC(vp
)) {
152 if (hfs_hides_rsrc(ap
->a_context
, VTOC(vp
), 1)) { /* 1 == don't take the cnode lock */
155 /* otherwise read the resource fork normally */
157 int compressed
= hfs_file_is_compressed(VTOC(vp
), 1); /* 1 == don't take the cnode lock */
159 retval
= decmpfs_read_compressed(ap
, &compressed
, VTOCMP(vp
));
160 if (retval
== 0 && !(ap
->a_ioflag
& IO_EVTONLY
) && vnode_isfastdevicecandidate(vp
)) {
161 (void) hfs_addhotfile(vp
);
165 /* successful read, update the access time */
166 VTOC(vp
)->c_touch_acctime
= TRUE
;
169 // compressed files are not traditional hot file candidates
170 // but they may be for CF (which ignores the ff_bytesread
173 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
) {
174 VTOF(vp
)->ff_bytesread
= 0;
179 /* otherwise the file was converted back to a regular file while we were reading it */
181 } else if ((VTOC(vp
)->c_bsdflags
& UF_COMPRESSED
)) {
184 error
= check_for_dataless_file(vp
, NAMESPACE_HANDLER_READ_OP
);
191 #endif /* HFS_COMPRESSION */
198 if ((retval
= cp_handle_vnop (vp
, CP_READ_ACCESS
, ap
->a_ioflag
)) != 0) {
202 #if HFS_CONFIG_KEY_ROLL
203 if (ISSET(ap
->a_ioflag
, IO_ENCRYPTED
)) {
204 off_rsrc_t off_rsrc
= off_rsrc_make(offset
+ start_resid
,
207 retval
= hfs_key_roll_up_to(ap
->a_context
, vp
, off_rsrc
);
211 #endif // HFS_CONFIG_KEY_ROLL
212 #endif // CONFIG_PROTECT
215 * If this read request originated from a syscall (as opposed to
216 * an in-kernel page fault or something), then set it up for
219 if (ap
->a_ioflag
& IO_SYSCALL_DISPATCH
) {
220 io_throttle
= IO_RETURN_ON_THROTTLE
;
225 /* Protect against a size change. */
226 hfs_lock_truncate(cp
, HFS_SHARED_LOCK
, HFS_LOCK_DEFAULT
);
227 took_truncate_lock
= 1;
229 filesize
= fp
->ff_size
;
230 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
233 * Check the file size. Note that per POSIX spec, we return 0 at
234 * file EOF, so attempting a read at an offset that is too big
235 * should just return 0 on HFS+. Since the return value was initialized
236 * to 0 above, we just jump to exit. HFS Standard has its own behavior.
238 if (offset
> filesize
) {
240 if ((hfsmp
->hfs_flags
& HFS_STANDARD
) &&
241 (offset
> (off_t
)MAXHFSFILESIZE
)) {
248 KERNEL_DEBUG(HFSDBG_READ
| DBG_FUNC_START
,
249 (int)uio_offset(uio
), uio_resid(uio
), (int)filesize
, (int)filebytes
, 0);
251 retval
= cluster_read(vp
, uio
, filesize
, ap
->a_ioflag
|io_throttle
);
253 cp
->c_touch_acctime
= TRUE
;
255 KERNEL_DEBUG(HFSDBG_READ
| DBG_FUNC_END
,
256 (int)uio_offset(uio
), uio_resid(uio
), (int)filesize
, (int)filebytes
, 0);
259 * Keep track blocks read
261 if (hfsmp
->hfc_stage
== HFC_RECORDING
&& retval
== 0) {
262 int took_cnode_lock
= 0;
265 bytesread
= start_resid
- uio_resid(uio
);
267 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
268 if ((fp
->ff_bytesread
+ bytesread
) > 0x00000000ffffffff) {
269 hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
273 * If this file hasn't been seen since the start of
274 * the current sampling period then start over.
276 if (cp
->c_atime
< hfsmp
->hfc_timebase
) {
279 fp
->ff_bytesread
= bytesread
;
281 cp
->c_atime
= tv
.tv_sec
;
283 fp
->ff_bytesread
+= bytesread
;
286 if (!(ap
->a_ioflag
& IO_EVTONLY
) && vnode_isfastdevicecandidate(vp
)) {
288 // We don't add hotfiles for processes doing IO_EVTONLY I/O
289 // on the assumption that they're system processes such as
290 // mdworker which scan everything in the system (and thus
291 // do not represent user-initiated access to files)
293 (void) hfs_addhotfile(vp
);
299 if (took_truncate_lock
) {
300 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
302 if (retval
== EAGAIN
) {
303 throttle_lowpri_io(1);
310 throttle_info_reset_window(NULL
);
315 * Ideally, this wouldn't be necessary; the cluster code should be
316 * able to handle this on the read-side. See <rdar://20420068>.
318 static errno_t
hfs_zero_eof_page(vnode_t vp
, off_t zero_up_to
)
320 hfs_assert(VTOC(vp
)->c_lockowner
!= current_thread());
321 hfs_assert(VTOC(vp
)->c_truncatelockowner
== current_thread());
323 struct filefork
*fp
= VTOF(vp
);
325 if (!(fp
->ff_size
& PAGE_MASK_64
) || zero_up_to
<= fp
->ff_size
) {
330 zero_up_to
= MIN(zero_up_to
, (off_t
)round_page_64(fp
->ff_size
));
332 /* N.B. At present, @zero_up_to is not important because the cluster
333 code will always zero up to the end of the page anyway. */
334 return cluster_write(vp
, NULL
, fp
->ff_size
, zero_up_to
,
335 fp
->ff_size
, 0, IO_HEADZEROFILL
);
339 * Write data to a file.
342 hfs_vnop_write(struct vnop_write_args
*ap
)
344 uio_t uio
= ap
->a_uio
;
345 struct vnode
*vp
= ap
->a_vp
;
348 struct hfsmount
*hfsmp
;
349 kauth_cred_t cred
= NULL
;
352 off_t bytesToAdd
= 0;
353 off_t actualBytesAdded
;
358 int ioflag
= ap
->a_ioflag
;
361 int cnode_locked
= 0;
362 int partialwrite
= 0;
364 time_t orig_ctime
=VTOC(vp
)->c_ctime
;
365 int took_truncate_lock
= 0;
366 int io_return_on_throttle
= 0;
367 int throttled_count
= 0;
370 if ( hfs_file_is_compressed(VTOC(vp
), 1) ) { /* 1 == don't take the cnode lock */
371 int state
= decmpfs_cnode_get_vnode_state(VTOCMP(vp
));
373 case FILE_IS_COMPRESSED
:
375 case FILE_IS_CONVERTING
:
376 /* if FILE_IS_CONVERTING, we allow writes but do not
377 bother with snapshots or else we will deadlock.
382 printf("invalid state %d for compressed file\n", state
);
385 } else if ((VTOC(vp
)->c_bsdflags
& UF_COMPRESSED
)) {
388 error
= check_for_dataless_file(vp
, NAMESPACE_HANDLER_WRITE_OP
);
395 nspace_snapshot_event(vp
, orig_ctime
, NAMESPACE_HANDLER_WRITE_OP
, uio
);
401 if ((ioflag
& (IO_SKIP_ENCRYPTION
|IO_SYSCALL_DISPATCH
)) ==
402 (IO_SKIP_ENCRYPTION
|IO_SYSCALL_DISPATCH
)) {
403 /* Don't allow unencrypted io request from user space */
408 resid
= uio_resid(uio
);
409 offset
= uio_offset(uio
);
415 if (!vnode_isreg(vp
))
416 return (EPERM
); /* Can only write regular files */
423 if ((retval
= cp_handle_vnop (vp
, CP_WRITE_ACCESS
, 0)) != 0) {
428 eflags
= kEFDeferMask
; /* defer file block allocations */
431 * When the underlying device is sparse and space
432 * is low (< 8MB), stop doing delayed allocations
433 * and begin doing synchronous I/O.
435 if ((hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) &&
436 (hfs_freeblks(hfsmp
, 0) < 2048)) {
437 eflags
&= ~kEFDeferMask
;
440 #endif /* HFS_SPARSE_DEV */
442 if ((ioflag
& (IO_SINGLE_WRITER
| IO_SYSCALL_DISPATCH
)) ==
443 (IO_SINGLE_WRITER
| IO_SYSCALL_DISPATCH
)) {
444 io_return_on_throttle
= IO_RETURN_ON_THROTTLE
;
449 * Protect against a size change.
451 * Note: If took_truncate_lock is true, then we previously got the lock shared
452 * but needed to upgrade to exclusive. So try getting it exclusive from the
455 if (ioflag
& IO_APPEND
|| took_truncate_lock
) {
456 hfs_lock_truncate(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
459 hfs_lock_truncate(cp
, HFS_SHARED_LOCK
, HFS_LOCK_DEFAULT
);
461 took_truncate_lock
= 1;
464 if (ioflag
& IO_APPEND
) {
465 uio_setoffset(uio
, fp
->ff_size
);
466 offset
= fp
->ff_size
;
468 if ((cp
->c_bsdflags
& APPEND
) && offset
!= fp
->ff_size
) {
473 cred
= vfs_context_ucred(ap
->a_context
);
474 if (cred
&& suser(cred
, NULL
) != 0)
475 eflags
|= kEFReserveMask
;
477 origFileSize
= fp
->ff_size
;
478 writelimit
= offset
+ resid
;
481 * We may need an exclusive truncate lock for several reasons, all
482 * of which are because we may be writing to a (portion of a) block
483 * for the first time, and we need to make sure no readers see the
484 * prior, uninitialized contents of the block. The cases are:
486 * 1. We have unallocated (delayed allocation) blocks. We may be
487 * allocating new blocks to the file and writing to them.
488 * (A more precise check would be whether the range we're writing
489 * to contains delayed allocation blocks.)
490 * 2. We need to extend the file. The bytes between the old EOF
491 * and the new EOF are not yet initialized. This is important
492 * even if we're not allocating new blocks to the file. If the
493 * old EOF and new EOF are in the same block, we still need to
494 * protect that range of bytes until they are written for the
497 * If we had a shared lock with the above cases, we need to try to upgrade
498 * to an exclusive lock. If the upgrade fails, we will lose the shared
499 * lock, and will need to take the truncate lock again; the took_truncate_lock
500 * flag will still be set, causing us to try for an exclusive lock next time.
502 if ((cp
->c_truncatelockowner
== HFS_SHARED_OWNER
) &&
503 ((fp
->ff_unallocblocks
!= 0) ||
504 (writelimit
> origFileSize
))) {
505 if (lck_rw_lock_shared_to_exclusive(&cp
->c_truncatelock
) == FALSE
) {
507 * Lock upgrade failed and we lost our shared lock, try again.
508 * Note: we do not set took_truncate_lock=0 here. Leaving it
509 * set to 1 will cause us to try to get the lock exclusive.
514 /* Store the owner in the c_truncatelockowner field if we successfully upgrade */
515 cp
->c_truncatelockowner
= current_thread();
519 if ( (retval
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
))) {
524 filebytes
= hfs_blk_to_bytes(fp
->ff_blocks
, hfsmp
->blockSize
);
526 if (offset
> filebytes
527 && (hfs_blk_to_bytes(hfs_freeblks(hfsmp
, ISSET(eflags
, kEFReserveMask
)),
528 hfsmp
->blockSize
) < offset
- filebytes
)) {
533 KERNEL_DEBUG(HFSDBG_WRITE
| DBG_FUNC_START
,
534 (int)offset
, uio_resid(uio
), (int)fp
->ff_size
,
537 /* Check if we do not need to extend the file */
538 if (writelimit
<= filebytes
) {
542 bytesToAdd
= writelimit
- filebytes
;
545 retval
= hfs_chkdq(cp
, (int64_t)(roundup(bytesToAdd
, hfsmp
->blockSize
)),
551 if (hfs_start_transaction(hfsmp
) != 0) {
556 while (writelimit
> filebytes
) {
557 bytesToAdd
= writelimit
- filebytes
;
559 /* Protect extents b-tree and allocation bitmap */
560 lockflags
= SFL_BITMAP
;
561 if (overflow_extents(fp
))
562 lockflags
|= SFL_EXTENTS
;
563 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
565 /* Files that are changing size are not hot file candidates. */
566 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
567 fp
->ff_bytesread
= 0;
569 retval
= MacToVFSError(ExtendFileC (hfsmp
, (FCB
*)fp
, bytesToAdd
,
570 0, eflags
, &actualBytesAdded
));
572 hfs_systemfile_unlock(hfsmp
, lockflags
);
574 if ((actualBytesAdded
== 0) && (retval
== E_NONE
))
576 if (retval
!= E_NONE
)
578 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
579 KERNEL_DEBUG(HFSDBG_WRITE
| DBG_FUNC_NONE
,
580 (int)offset
, uio_resid(uio
), (int)fp
->ff_size
, (int)filebytes
, 0);
582 (void) hfs_update(vp
, 0);
583 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
584 (void) hfs_end_transaction(hfsmp
);
587 * If we didn't grow the file enough try a partial write.
588 * POSIX expects this behavior.
590 if ((retval
== ENOSPC
) && (filebytes
> offset
)) {
593 uio_setresid(uio
, (uio_resid(uio
) - bytesToAdd
));
595 writelimit
= filebytes
;
598 if (retval
== E_NONE
) {
603 if (writelimit
> fp
->ff_size
) {
604 filesize
= writelimit
;
606 rl_add(fp
->ff_size
, writelimit
- 1 , &fp
->ff_invalidranges
);
608 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
610 filesize
= fp
->ff_size
;
612 lflag
= ioflag
& ~(IO_TAILZEROFILL
| IO_HEADZEROFILL
| IO_NOZEROVALID
| IO_NOZERODIRTY
);
615 * We no longer use IO_HEADZEROFILL or IO_TAILZEROFILL (except
616 * for one case below). For the regions that lie before the
617 * beginning and after the end of this write that are in the
618 * same page, we let the cluster code handle zeroing that out
619 * if necessary. If those areas are not cached, the cluster
620 * code will try and read those areas in, and in the case
621 * where those regions have never been written to,
622 * hfs_vnop_blockmap will consult the invalid ranges and then
623 * indicate that. The cluster code will zero out those areas.
626 head_off
= trunc_page_64(offset
);
628 if (head_off
< offset
&& head_off
>= fp
->ff_size
) {
630 * The first page is beyond current EOF, so as an
631 * optimisation, we can pass IO_HEADZEROFILL.
633 lflag
|= IO_HEADZEROFILL
;
640 * We need to tell UBC the fork's new size BEFORE calling
641 * cluster_write, in case any of the new pages need to be
642 * paged out before cluster_write completes (which does happen
643 * in embedded systems due to extreme memory pressure).
644 * Similarly, we need to tell hfs_vnop_pageout what the new EOF
645 * will be, so that it can pass that on to cluster_pageout, and
646 * allow those pageouts.
648 * We don't update ff_size yet since we don't want pageins to
649 * be able to see uninitialized data between the old and new
650 * EOF, until cluster_write has completed and initialized that
653 * The vnode pager relies on the file size last given to UBC via
654 * ubc_setsize. hfs_vnop_pageout relies on fp->ff_new_size or
655 * ff_size (whichever is larger). NOTE: ff_new_size is always
656 * zero, unless we are extending the file via write.
658 if (filesize
> fp
->ff_size
) {
659 retval
= hfs_zero_eof_page(vp
, offset
);
662 fp
->ff_new_size
= filesize
;
663 ubc_setsize(vp
, filesize
);
665 retval
= cluster_write(vp
, uio
, fp
->ff_size
, filesize
, head_off
,
666 0, lflag
| IO_NOZERODIRTY
| io_return_on_throttle
);
668 fp
->ff_new_size
= 0; /* no longer extending; use ff_size */
670 if (retval
== EAGAIN
) {
672 * EAGAIN indicates that we still have I/O to do, but
673 * that we now need to be throttled
675 if (resid
!= uio_resid(uio
)) {
677 * did manage to do some I/O before returning EAGAIN
679 resid
= uio_resid(uio
);
680 offset
= uio_offset(uio
);
682 cp
->c_touch_chgtime
= TRUE
;
683 cp
->c_touch_modtime
= TRUE
;
684 hfs_incr_gencount(cp
);
686 if (filesize
> fp
->ff_size
) {
688 * we called ubc_setsize before the call to
689 * cluster_write... since we only partially
690 * completed the I/O, we need to
691 * re-adjust our idea of the filesize based
694 ubc_setsize(vp
, offset
);
696 fp
->ff_size
= offset
;
700 if (filesize
> origFileSize
) {
701 ubc_setsize(vp
, origFileSize
);
706 if (filesize
> origFileSize
) {
707 fp
->ff_size
= filesize
;
709 /* Files that are changing size are not hot file candidates. */
710 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
711 fp
->ff_bytesread
= 0;
714 fp
->ff_new_size
= 0; /* ff_size now has the correct size */
717 uio_setresid(uio
, (uio_resid(uio
) + bytesToAdd
));
721 if (vnode_should_flush_after_write(vp
, ioflag
))
722 hfs_flush(hfsmp
, HFS_FLUSH_CACHE
);
726 hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
730 if (resid
> uio_resid(uio
)) {
731 cp
->c_touch_chgtime
= TRUE
;
732 cp
->c_touch_modtime
= TRUE
;
733 hfs_incr_gencount(cp
);
736 * If we successfully wrote any data, and we are not the superuser
737 * we clear the setuid and setgid bits as a precaution against
740 if (cp
->c_mode
& (S_ISUID
| S_ISGID
)) {
741 cred
= vfs_context_ucred(ap
->a_context
);
742 if (cred
&& suser(cred
, NULL
)) {
743 cp
->c_mode
&= ~(S_ISUID
| S_ISGID
);
748 if (ioflag
& IO_UNIT
) {
749 (void)hfs_truncate(vp
, origFileSize
, ioflag
& IO_SYNC
,
751 uio_setoffset(uio
, (uio_offset(uio
) - (resid
- uio_resid(uio
))));
752 uio_setresid(uio
, resid
);
753 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
755 } else if ((ioflag
& IO_SYNC
) && (resid
> uio_resid(uio
)))
756 retval
= hfs_update(vp
, 0);
758 /* Updating vcbWrCnt doesn't need to be atomic. */
761 KERNEL_DEBUG(HFSDBG_WRITE
| DBG_FUNC_END
,
762 (int)uio_offset(uio
), uio_resid(uio
), (int)fp
->ff_size
, (int)filebytes
, 0);
764 if (retval
&& took_truncate_lock
765 && cp
->c_truncatelockowner
== current_thread()) {
767 rl_remove(fp
->ff_size
, RL_INFINITY
, &fp
->ff_invalidranges
);
773 if (took_truncate_lock
) {
774 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
776 if (retval
== EAGAIN
) {
777 throttle_lowpri_io(1);
784 throttle_info_reset_window(NULL
);
788 /* support for the "bulk-access" fcntl */
790 #define CACHE_LEVELS 16
791 #define NUM_CACHE_ENTRIES (64*16)
792 #define PARENT_IDS_FLAG 0x100
794 struct access_cache
{
796 int cachehits
; /* these two for statistics gathering */
798 unsigned int *acache
;
799 unsigned char *haveaccess
;
803 uid_t uid
; /* IN: effective user id */
804 short flags
; /* IN: access requested (i.e. R_OK) */
805 short num_groups
; /* IN: number of groups user belongs to */
806 int num_files
; /* IN: number of files to process */
807 int *file_ids
; /* IN: array of file ids */
808 gid_t
*groups
; /* IN: array of groups */
809 short *access
; /* OUT: access info for each file (0 for 'has access') */
810 } __attribute__((unavailable
)); // this structure is for reference purposes only
812 struct user32_access_t
{
813 uid_t uid
; /* IN: effective user id */
814 short flags
; /* IN: access requested (i.e. R_OK) */
815 short num_groups
; /* IN: number of groups user belongs to */
816 int num_files
; /* IN: number of files to process */
817 user32_addr_t file_ids
; /* IN: array of file ids */
818 user32_addr_t groups
; /* IN: array of groups */
819 user32_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
822 struct user64_access_t
{
823 uid_t uid
; /* IN: effective user id */
824 short flags
; /* IN: access requested (i.e. R_OK) */
825 short num_groups
; /* IN: number of groups user belongs to */
826 int num_files
; /* IN: number of files to process */
827 user64_addr_t file_ids
; /* IN: array of file ids */
828 user64_addr_t groups
; /* IN: array of groups */
829 user64_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
833 // these are the "extended" versions of the above structures
834 // note that it is crucial that they be different sized than
835 // the regular version
836 struct ext_access_t
{
837 uint32_t flags
; /* IN: access requested (i.e. R_OK) */
838 uint32_t num_files
; /* IN: number of files to process */
839 uint32_t map_size
; /* IN: size of the bit map */
840 uint32_t *file_ids
; /* IN: Array of file ids */
841 char *bitmap
; /* OUT: hash-bitmap of interesting directory ids */
842 short *access
; /* OUT: access info for each file (0 for 'has access') */
843 uint32_t num_parents
; /* future use */
844 cnid_t
*parents
; /* future use */
845 } __attribute__((unavailable
)); // this structure is for reference purposes only
847 struct user32_ext_access_t
{
848 uint32_t flags
; /* IN: access requested (i.e. R_OK) */
849 uint32_t num_files
; /* IN: number of files to process */
850 uint32_t map_size
; /* IN: size of the bit map */
851 user32_addr_t file_ids
; /* IN: Array of file ids */
852 user32_addr_t bitmap
; /* OUT: hash-bitmap of interesting directory ids */
853 user32_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
854 uint32_t num_parents
; /* future use */
855 user32_addr_t parents
; /* future use */
858 struct user64_ext_access_t
{
859 uint32_t flags
; /* IN: access requested (i.e. R_OK) */
860 uint32_t num_files
; /* IN: number of files to process */
861 uint32_t map_size
; /* IN: size of the bit map */
862 user64_addr_t file_ids
; /* IN: array of file ids */
863 user64_addr_t bitmap
; /* IN: array of groups */
864 user64_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
865 uint32_t num_parents
;/* future use */
866 user64_addr_t parents
;/* future use */
871 * Perform a binary search for the given parent_id. Return value is
872 * the index if there is a match. If no_match_indexp is non-NULL it
873 * will be assigned with the index to insert the item (even if it was
876 static int cache_binSearch(cnid_t
*array
, unsigned int hi
, cnid_t parent_id
, int *no_match_indexp
)
882 unsigned int mid
= ((hi
- lo
)/2) + lo
;
883 unsigned int this_id
= array
[mid
];
885 if (parent_id
== this_id
) {
890 if (parent_id
< this_id
) {
895 if (parent_id
> this_id
) {
901 /* check if lo and hi converged on the match */
902 if (parent_id
== array
[hi
]) {
906 if (no_match_indexp
) {
907 *no_match_indexp
= hi
;
915 lookup_bucket(struct access_cache
*cache
, int *indexp
, cnid_t parent_id
)
919 int index
, no_match_index
;
921 if (cache
->numcached
== 0) {
923 return 0; // table is empty, so insert at index=0 and report no match
926 if (cache
->numcached
> NUM_CACHE_ENTRIES
) {
927 cache
->numcached
= NUM_CACHE_ENTRIES
;
930 hi
= cache
->numcached
- 1;
932 index
= cache_binSearch(cache
->acache
, hi
, parent_id
, &no_match_index
);
934 /* if no existing entry found, find index for new one */
936 index
= no_match_index
;
947 * Add a node to the access_cache at the given index (or do a lookup first
948 * to find the index if -1 is passed in). We currently do a replace rather
949 * than an insert if the cache is full.
952 add_node(struct access_cache
*cache
, int index
, cnid_t nodeID
, int access
)
954 int lookup_index
= -1;
956 /* need to do a lookup first if -1 passed for index */
958 if (lookup_bucket(cache
, &lookup_index
, nodeID
)) {
959 if (cache
->haveaccess
[lookup_index
] != access
&& cache
->haveaccess
[lookup_index
] == ESRCH
) {
960 // only update an entry if the previous access was ESRCH (i.e. a scope checking error)
961 cache
->haveaccess
[lookup_index
] = access
;
964 /* mission accomplished */
967 index
= lookup_index
;
972 /* if the cache is full, do a replace rather than an insert */
973 if (cache
->numcached
>= NUM_CACHE_ENTRIES
) {
974 cache
->numcached
= NUM_CACHE_ENTRIES
-1;
976 if (index
> cache
->numcached
) {
977 index
= cache
->numcached
;
981 if (index
< cache
->numcached
&& index
< NUM_CACHE_ENTRIES
&& nodeID
> cache
->acache
[index
]) {
985 if (index
>= 0 && index
< cache
->numcached
) {
986 /* only do bcopy if we're inserting */
987 bcopy( cache
->acache
+index
, cache
->acache
+(index
+1), (cache
->numcached
- index
)*sizeof(int) );
988 bcopy( cache
->haveaccess
+index
, cache
->haveaccess
+(index
+1), (cache
->numcached
- index
)*sizeof(unsigned char) );
991 cache
->acache
[index
] = nodeID
;
992 cache
->haveaccess
[index
] = access
;
1006 snoop_callback(const cnode_t
*cp
, void *arg
)
1008 struct cinfo
*cip
= arg
;
1010 cip
->uid
= cp
->c_uid
;
1011 cip
->gid
= cp
->c_gid
;
1012 cip
->mode
= cp
->c_mode
;
1013 cip
->parentcnid
= cp
->c_parentcnid
;
1014 cip
->recflags
= cp
->c_attr
.ca_recflags
;
1020 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
1021 * isn't incore, then go to the catalog.
1024 do_attr_lookup(struct hfsmount
*hfsmp
, struct access_cache
*cache
, cnid_t cnid
,
1025 struct cnode
*skip_cp
, CatalogKey
*keyp
, struct cat_attr
*cnattrp
)
1029 /* if this id matches the one the fsctl was called with, skip the lookup */
1030 if (cnid
== skip_cp
->c_cnid
) {
1031 cnattrp
->ca_uid
= skip_cp
->c_uid
;
1032 cnattrp
->ca_gid
= skip_cp
->c_gid
;
1033 cnattrp
->ca_mode
= skip_cp
->c_mode
;
1034 cnattrp
->ca_recflags
= skip_cp
->c_attr
.ca_recflags
;
1035 keyp
->hfsPlus
.parentID
= skip_cp
->c_parentcnid
;
1037 struct cinfo c_info
;
1039 /* otherwise, check the cnode hash incase the file/dir is incore */
1040 error
= hfs_chash_snoop(hfsmp
, cnid
, 0, snoop_callback
, &c_info
);
1042 if (error
== EACCES
) {
1045 } else if (!error
) {
1046 cnattrp
->ca_uid
= c_info
.uid
;
1047 cnattrp
->ca_gid
= c_info
.gid
;
1048 cnattrp
->ca_mode
= c_info
.mode
;
1049 cnattrp
->ca_recflags
= c_info
.recflags
;
1050 keyp
->hfsPlus
.parentID
= c_info
.parentcnid
;
1054 if (throttle_io_will_be_throttled(-1, HFSTOVFS(hfsmp
)))
1055 throttle_lowpri_io(1);
1057 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_CATALOG
, HFS_SHARED_LOCK
);
1059 /* lookup this cnid in the catalog */
1060 error
= cat_getkeyplusattr(hfsmp
, cnid
, keyp
, cnattrp
);
1062 hfs_systemfile_unlock(hfsmp
, lockflags
);
1073 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
1074 * up to CACHE_LEVELS as we progress towards the root.
1077 do_access_check(struct hfsmount
*hfsmp
, int *err
, struct access_cache
*cache
, HFSCatalogNodeID nodeID
,
1078 struct cnode
*skip_cp
, struct proc
*theProcPtr
, kauth_cred_t myp_ucred
,
1079 struct vfs_context
*my_context
,
1083 uint32_t num_parents
)
1087 HFSCatalogNodeID thisNodeID
;
1088 unsigned int myPerms
;
1089 struct cat_attr cnattr
;
1090 int cache_index
= -1, scope_index
= -1, scope_idx_start
= -1;
1093 int i
= 0, ids_to_cache
= 0;
1094 int parent_ids
[CACHE_LEVELS
];
1096 thisNodeID
= nodeID
;
1097 while (thisNodeID
>= kRootDirID
) {
1098 myResult
= 0; /* default to "no access" */
1100 /* check the cache before resorting to hitting the catalog */
1102 /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
1103 * to look any further after hitting cached dir */
1105 if (lookup_bucket(cache
, &cache_index
, thisNodeID
)) {
1107 myErr
= cache
->haveaccess
[cache_index
];
1108 if (scope_index
!= -1) {
1109 if (myErr
== ESRCH
) {
1113 scope_index
= 0; // so we'll just use the cache result
1114 scope_idx_start
= ids_to_cache
;
1116 myResult
= (myErr
== 0) ? 1 : 0;
1117 goto ExitThisRoutine
;
1123 tmp
= cache_binSearch(parents
, num_parents
-1, thisNodeID
, NULL
);
1124 if (scope_index
== -1)
1126 if (tmp
!= -1 && scope_idx_start
== -1 && ids_to_cache
< CACHE_LEVELS
) {
1127 scope_idx_start
= ids_to_cache
;
1131 /* remember which parents we want to cache */
1132 if (ids_to_cache
< CACHE_LEVELS
) {
1133 parent_ids
[ids_to_cache
] = thisNodeID
;
1136 // Inefficient (using modulo) and we might want to use a hash function, not rely on the node id to be "nice"...
1137 if (bitmap
&& map_size
) {
1138 bitmap
[(thisNodeID
/8)%(map_size
)]|=(1<<(thisNodeID
&7));
1142 /* do the lookup (checks the cnode hash, then the catalog) */
1143 myErr
= do_attr_lookup(hfsmp
, cache
, thisNodeID
, skip_cp
, &catkey
, &cnattr
);
1145 goto ExitThisRoutine
; /* no access */
1148 /* Root always gets access. */
1149 if (suser(myp_ucred
, NULL
) == 0) {
1150 thisNodeID
= catkey
.hfsPlus
.parentID
;
1155 // if the thing has acl's, do the full permission check
1156 if ((cnattr
.ca_recflags
& kHFSHasSecurityMask
) != 0) {
1159 /* get the vnode for this cnid */
1160 myErr
= hfs_vget(hfsmp
, thisNodeID
, &vp
, 0, 0);
1163 goto ExitThisRoutine
;
1166 thisNodeID
= VTOC(vp
)->c_parentcnid
;
1168 hfs_unlock(VTOC(vp
));
1170 if (vnode_vtype(vp
) == VDIR
) {
1171 myErr
= vnode_authorize(vp
, NULL
, (KAUTH_VNODE_SEARCH
| KAUTH_VNODE_LIST_DIRECTORY
), my_context
);
1173 myErr
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_READ_DATA
, my_context
);
1179 goto ExitThisRoutine
;
1183 int mode
= cnattr
.ca_mode
& S_IFMT
;
1184 myPerms
= DerivePermissionSummary(cnattr
.ca_uid
, cnattr
.ca_gid
, cnattr
.ca_mode
, hfsmp
->hfs_mp
,myp_ucred
, theProcPtr
);
1186 if (mode
== S_IFDIR
) {
1187 flags
= R_OK
| X_OK
;
1191 if ( (myPerms
& flags
) != flags
) {
1194 goto ExitThisRoutine
; /* no access */
1197 /* up the hierarchy we go */
1198 thisNodeID
= catkey
.hfsPlus
.parentID
;
1202 /* if here, we have access to this node */
1206 if (parents
&& myErr
== 0 && scope_index
== -1) {
1215 /* cache the parent directory(ies) */
1216 for (i
= 0; i
< ids_to_cache
; i
++) {
1217 if (myErr
== 0 && parents
&& (scope_idx_start
== -1 || i
> scope_idx_start
)) {
1218 add_node(cache
, -1, parent_ids
[i
], ESRCH
);
1220 add_node(cache
, -1, parent_ids
[i
], myErr
);
1228 do_bulk_access_check(struct hfsmount
*hfsmp
, struct vnode
*vp
,
1229 struct vnop_ioctl_args
*ap
, int arg_size
, vfs_context_t context
)
1234 * NOTE: on entry, the vnode has an io_ref. In case this vnode
1235 * happens to be in our list of file_ids, we'll note it
1236 * avoid calling hfs_chashget_nowait() on that id as that
1237 * will cause a "locking against myself" panic.
1239 Boolean check_leaf
= true;
1241 struct user64_ext_access_t
*user_access_structp
;
1242 struct user64_ext_access_t tmp_user_access
;
1243 struct access_cache cache
;
1245 int error
= 0, prev_parent_check_ok
=1;
1249 unsigned int num_files
= 0;
1251 int num_parents
= 0;
1255 cnid_t
*parents
=NULL
;
1259 cnid_t prevParent_cnid
= 0;
1260 unsigned int myPerms
;
1262 struct cat_attr cnattr
;
1264 struct cnode
*skip_cp
= VTOC(vp
);
1265 kauth_cred_t cred
= vfs_context_ucred(context
);
1266 proc_t p
= vfs_context_proc(context
);
1268 is64bit
= proc_is64bit(p
);
1270 /* initialize the local cache and buffers */
1271 cache
.numcached
= 0;
1272 cache
.cachehits
= 0;
1274 cache
.acache
= NULL
;
1275 cache
.haveaccess
= NULL
;
1277 /* struct copyin done during dispatch... need to copy file_id array separately */
1278 if (ap
->a_data
== NULL
) {
1280 goto err_exit_bulk_access
;
1284 if (arg_size
!= sizeof(struct user64_ext_access_t
)) {
1286 goto err_exit_bulk_access
;
1289 user_access_structp
= (struct user64_ext_access_t
*)ap
->a_data
;
1291 } else if (arg_size
== sizeof(struct user32_access_t
)) {
1292 struct user32_access_t
*accessp
= (struct user32_access_t
*)ap
->a_data
;
1294 // convert an old style bulk-access struct to the new style
1295 tmp_user_access
.flags
= accessp
->flags
;
1296 tmp_user_access
.num_files
= accessp
->num_files
;
1297 tmp_user_access
.map_size
= 0;
1298 tmp_user_access
.file_ids
= CAST_USER_ADDR_T(accessp
->file_ids
);
1299 tmp_user_access
.bitmap
= USER_ADDR_NULL
;
1300 tmp_user_access
.access
= CAST_USER_ADDR_T(accessp
->access
);
1301 tmp_user_access
.num_parents
= 0;
1302 user_access_structp
= &tmp_user_access
;
1304 } else if (arg_size
== sizeof(struct user32_ext_access_t
)) {
1305 struct user32_ext_access_t
*accessp
= (struct user32_ext_access_t
*)ap
->a_data
;
1307 // up-cast from a 32-bit version of the struct
1308 tmp_user_access
.flags
= accessp
->flags
;
1309 tmp_user_access
.num_files
= accessp
->num_files
;
1310 tmp_user_access
.map_size
= accessp
->map_size
;
1311 tmp_user_access
.num_parents
= accessp
->num_parents
;
1313 tmp_user_access
.file_ids
= CAST_USER_ADDR_T(accessp
->file_ids
);
1314 tmp_user_access
.bitmap
= CAST_USER_ADDR_T(accessp
->bitmap
);
1315 tmp_user_access
.access
= CAST_USER_ADDR_T(accessp
->access
);
1316 tmp_user_access
.parents
= CAST_USER_ADDR_T(accessp
->parents
);
1318 user_access_structp
= &tmp_user_access
;
1321 goto err_exit_bulk_access
;
1324 map_size
= user_access_structp
->map_size
;
1326 num_files
= user_access_structp
->num_files
;
1328 num_parents
= user_access_structp
->num_parents
;
1330 if (num_files
< 1) {
1331 goto err_exit_bulk_access
;
1333 if (num_files
> 1024) {
1335 goto err_exit_bulk_access
;
1338 if (num_parents
> 1024) {
1340 goto err_exit_bulk_access
;
1343 file_ids
= hfs_malloc(sizeof(int) * num_files
);
1344 access
= hfs_malloc(sizeof(short) * num_files
);
1346 bitmap
= hfs_mallocz(sizeof(char) * map_size
);
1350 parents
= hfs_malloc(sizeof(cnid_t
) * num_parents
);
1353 cache
.acache
= hfs_malloc(sizeof(int) * NUM_CACHE_ENTRIES
);
1354 cache
.haveaccess
= hfs_malloc(sizeof(unsigned char) * NUM_CACHE_ENTRIES
);
1356 if ((error
= copyin(user_access_structp
->file_ids
, (caddr_t
)file_ids
,
1357 num_files
* sizeof(int)))) {
1358 goto err_exit_bulk_access
;
1362 if ((error
= copyin(user_access_structp
->parents
, (caddr_t
)parents
,
1363 num_parents
* sizeof(cnid_t
)))) {
1364 goto err_exit_bulk_access
;
1368 flags
= user_access_structp
->flags
;
1369 if ((flags
& (F_OK
| R_OK
| W_OK
| X_OK
)) == 0) {
1373 /* check if we've been passed leaf node ids or parent ids */
1374 if (flags
& PARENT_IDS_FLAG
) {
1378 /* Check access to each file_id passed in */
1379 for (i
= 0; i
< num_files
; i
++) {
1381 cnid
= (cnid_t
) file_ids
[i
];
1383 /* root always has access */
1384 if ((!parents
) && (!suser(cred
, NULL
))) {
1390 /* do the lookup (checks the cnode hash, then the catalog) */
1391 error
= do_attr_lookup(hfsmp
, &cache
, cnid
, skip_cp
, &catkey
, &cnattr
);
1393 access
[i
] = (short) error
;
1398 // Check if the leaf matches one of the parent scopes
1399 leaf_index
= cache_binSearch(parents
, num_parents
-1, cnid
, NULL
);
1400 if (leaf_index
>= 0 && parents
[leaf_index
] == cnid
)
1401 prev_parent_check_ok
= 0;
1402 else if (leaf_index
>= 0)
1403 prev_parent_check_ok
= 1;
1406 // if the thing has acl's, do the full permission check
1407 if ((cnattr
.ca_recflags
& kHFSHasSecurityMask
) != 0) {
1410 /* get the vnode for this cnid */
1411 myErr
= hfs_vget(hfsmp
, cnid
, &cvp
, 0, 0);
1417 hfs_unlock(VTOC(cvp
));
1419 if (vnode_vtype(cvp
) == VDIR
) {
1420 myErr
= vnode_authorize(cvp
, NULL
, (KAUTH_VNODE_SEARCH
| KAUTH_VNODE_LIST_DIRECTORY
), context
);
1422 myErr
= vnode_authorize(cvp
, NULL
, KAUTH_VNODE_READ_DATA
, context
);
1431 /* before calling CheckAccess(), check the target file for read access */
1432 myPerms
= DerivePermissionSummary(cnattr
.ca_uid
, cnattr
.ca_gid
,
1433 cnattr
.ca_mode
, hfsmp
->hfs_mp
, cred
, p
);
1435 /* fail fast if no access */
1436 if ((myPerms
& flags
) == 0) {
1442 /* we were passed an array of parent ids */
1443 catkey
.hfsPlus
.parentID
= cnid
;
1446 /* if the last guy had the same parent and had access, we're done */
1447 if (i
> 0 && catkey
.hfsPlus
.parentID
== prevParent_cnid
&& access
[i
-1] == 0 && prev_parent_check_ok
) {
1453 myaccess
= do_access_check(hfsmp
, &error
, &cache
, catkey
.hfsPlus
.parentID
,
1454 skip_cp
, p
, cred
, context
,bitmap
, map_size
, parents
, num_parents
);
1456 if (myaccess
|| (error
== ESRCH
&& leaf_index
!= -1)) {
1457 access
[i
] = 0; // have access.. no errors to report
1459 access
[i
] = (error
!= 0 ? (short) error
: EACCES
);
1462 prevParent_cnid
= catkey
.hfsPlus
.parentID
;
1465 /* copyout the access array */
1466 if ((error
= copyout((caddr_t
)access
, user_access_structp
->access
,
1467 num_files
* sizeof (short)))) {
1468 goto err_exit_bulk_access
;
1470 if (map_size
&& bitmap
) {
1471 if ((error
= copyout((caddr_t
)bitmap
, user_access_structp
->bitmap
,
1472 map_size
* sizeof (char)))) {
1473 goto err_exit_bulk_access
;
1478 err_exit_bulk_access
:
1480 hfs_free(file_ids
, sizeof(int) * num_files
);
1481 hfs_free(parents
, sizeof(cnid_t
) * num_parents
);
1482 hfs_free(bitmap
, sizeof(char) * map_size
);
1483 hfs_free(access
, sizeof(short) * num_files
);
1484 hfs_free(cache
.acache
, sizeof(int) * NUM_CACHE_ENTRIES
);
1485 hfs_free(cache
.haveaccess
, sizeof(unsigned char) * NUM_CACHE_ENTRIES
);
1491 /* end "bulk-access" support */
1495 * Control filesystem operating characteristics.
1498 hfs_vnop_ioctl( struct vnop_ioctl_args
/* {
1503 vfs_context_t a_context;
1506 struct vnode
* vp
= ap
->a_vp
;
1507 struct hfsmount
*hfsmp
= VTOHFS(vp
);
1508 vfs_context_t context
= ap
->a_context
;
1509 kauth_cred_t cred
= vfs_context_ucred(context
);
1510 proc_t p
= vfs_context_proc(context
);
1511 struct vfsstatfs
*vfsp
;
1513 off_t jnl_start
, jnl_size
;
1514 struct hfs_journal_info
*jip
;
1517 off_t uncompressed_size
= -1;
1518 int decmpfs_error
= 0;
1520 if (ap
->a_command
== F_RDADVISE
) {
1521 /* we need to inspect the decmpfs state of the file as early as possible */
1522 compressed
= hfs_file_is_compressed(VTOC(vp
), 0);
1524 if (VNODE_IS_RSRC(vp
)) {
1525 /* if this is the resource fork, treat it as if it were empty */
1526 uncompressed_size
= 0;
1528 decmpfs_error
= hfs_uncompressed_size_of_compressed_file(NULL
, vp
, 0, &uncompressed_size
, 0);
1529 if (decmpfs_error
!= 0) {
1530 /* failed to get the uncompressed size, we'll check for this later */
1531 uncompressed_size
= -1;
1536 #endif /* HFS_COMPRESSION */
1538 is64bit
= proc_is64bit(p
);
1541 #if HFS_CONFIG_KEY_ROLL
1542 // The HFSIOC_KEY_ROLL fsctl does its own access checks
1543 if (ap
->a_command
!= HFSIOC_KEY_ROLL
)
1547 if ((error
= cp_handle_vnop(vp
, CP_WRITE_ACCESS
, 0)) != 0) {
1551 #endif /* CONFIG_PROTECT */
1553 switch (ap
->a_command
) {
1555 case HFSIOC_GETPATH
:
1557 struct vnode
*file_vp
;
1562 #ifdef VN_GETPATH_NEW
1564 #else // VN_GETPATH_NEW
1566 #endif // VN_GETPATH_NEW
1568 /* Caller must be owner of file system. */
1569 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1570 if (suser(cred
, NULL
) &&
1571 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1574 /* Target vnode must be file system's root. */
1575 if (!vnode_isvroot(vp
)) {
1578 bufptr
= (char *)ap
->a_data
;
1579 cnid
= strtoul(bufptr
, NULL
, 10);
1580 if (ap
->a_fflag
& HFS_GETPATH_VOLUME_RELATIVE
) {
1581 flags
|= BUILDPATH_VOLUME_RELATIVE
;
1584 /* We need to call hfs_vfs_vget to leverage the code that will
1585 * fix the origin list for us if needed, as opposed to calling
1586 * hfs_vget, since we will need the parent for vn_getpath_ext call.
1589 if ((error
= hfs_vfs_vget(HFSTOVFS(hfsmp
), cnid
, &file_vp
, context
))) {
1593 outlen
= sizeof(pathname_t
);
1594 error
= vn_getpath_ext(file_vp
, NULLVP
, bufptr
, &outlen
, flags
);
1600 case HFSIOC_SET_MAX_DEFRAG_SIZE
:
1602 int error
= 0; /* Assume success */
1603 u_int32_t maxsize
= 0;
1605 if (vnode_vfsisrdonly(vp
)) {
1608 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1609 if (!kauth_cred_issuser(cred
)) {
1610 return (EACCES
); /* must be root */
1613 maxsize
= *(u_int32_t
*)ap
->a_data
;
1615 hfs_lock_mount(hfsmp
);
1616 if (maxsize
> HFS_MAX_DEFRAG_SIZE
) {
1620 hfsmp
->hfs_defrag_max
= maxsize
;
1622 hfs_unlock_mount(hfsmp
);
1627 case HFSIOC_FORCE_ENABLE_DEFRAG
:
1629 int error
= 0; /* Assume success */
1630 u_int32_t do_enable
= 0;
1632 if (vnode_vfsisrdonly(vp
)) {
1635 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1636 if (!kauth_cred_issuser(cred
)) {
1637 return (EACCES
); /* must be root */
1640 do_enable
= *(u_int32_t
*)ap
->a_data
;
1642 hfs_lock_mount(hfsmp
);
1643 if (do_enable
!= 0) {
1644 hfsmp
->hfs_defrag_nowait
= 1;
1650 hfs_unlock_mount(hfsmp
);
1656 case HFSIOC_TRANSFER_DOCUMENT_ID
:
1658 struct cnode
*cp
= NULL
;
1660 u_int32_t to_fd
= *(u_int32_t
*)ap
->a_data
;
1661 struct fileproc
*to_fp
;
1662 struct vnode
*to_vp
;
1663 struct cnode
*to_cp
;
1667 if ((error
= fp_getfvp(p
, to_fd
, &to_fp
, &to_vp
)) != 0) {
1668 //printf("could not get the vnode for fd %d (err %d)\n", to_fd, error);
1671 if ( (error
= vnode_getwithref(to_vp
)) ) {
1676 if (VTOHFS(to_vp
) != hfsmp
) {
1678 goto transfer_cleanup
;
1681 int need_unlock
= 1;
1682 to_cp
= VTOC(to_vp
);
1683 error
= hfs_lockpair(cp
, to_cp
, HFS_EXCLUSIVE_LOCK
);
1685 //printf("could not lock the pair of cnodes (error %d)\n", error);
1686 goto transfer_cleanup
;
1689 if (!(cp
->c_bsdflags
& UF_TRACKED
)) {
1691 } else if (to_cp
->c_bsdflags
& UF_TRACKED
) {
1693 // if the destination is already tracked, return an error
1694 // as otherwise it's a silent deletion of the target's
1698 } else if (S_ISDIR(cp
->c_attr
.ca_mode
) || S_ISREG(cp
->c_attr
.ca_mode
) || S_ISLNK(cp
->c_attr
.ca_mode
)) {
1700 // we can use the FndrExtendedFileInfo because the doc-id is the first
1701 // thing in both it and the ExtendedDirInfo struct which is fixed in
1702 // format and can not change layout
1704 struct FndrExtendedFileInfo
*f_extinfo
= (struct FndrExtendedFileInfo
*)((u_int8_t
*)cp
->c_finderinfo
+ 16);
1705 struct FndrExtendedFileInfo
*to_extinfo
= (struct FndrExtendedFileInfo
*)((u_int8_t
*)to_cp
->c_finderinfo
+ 16);
1707 if (f_extinfo
->document_id
== 0) {
1710 hfs_unlockpair(cp
, to_cp
); // have to unlock to be able to get a new-id
1712 if ((error
= hfs_generate_document_id(hfsmp
, &new_id
)) == 0) {
1714 // re-lock the pair now that we have the document-id
1716 hfs_lockpair(cp
, to_cp
, HFS_EXCLUSIVE_LOCK
);
1717 f_extinfo
->document_id
= new_id
;
1719 goto transfer_cleanup
;
1723 to_extinfo
->document_id
= f_extinfo
->document_id
;
1724 f_extinfo
->document_id
= 0;
1725 //printf("TRANSFERRING: doc-id %d from ino %d to ino %d\n", to_extinfo->document_id, cp->c_fileid, to_cp->c_fileid);
1727 // make sure the destination is also UF_TRACKED
1728 to_cp
->c_bsdflags
|= UF_TRACKED
;
1729 cp
->c_bsdflags
&= ~UF_TRACKED
;
1731 // mark the cnodes dirty
1732 cp
->c_flag
|= C_MODIFIED
;
1733 to_cp
->c_flag
|= C_MODIFIED
;
1736 if ((error
= hfs_start_transaction(hfsmp
)) == 0) {
1738 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_CATALOG
, HFS_EXCLUSIVE_LOCK
);
1740 (void) cat_update(hfsmp
, &cp
->c_desc
, &cp
->c_attr
, NULL
, NULL
);
1741 (void) cat_update(hfsmp
, &to_cp
->c_desc
, &to_cp
->c_attr
, NULL
, NULL
);
1743 hfs_systemfile_unlock (hfsmp
, lockflags
);
1744 (void) hfs_end_transaction(hfsmp
);
1747 add_fsevent(FSE_DOCID_CHANGED
, context
,
1748 FSE_ARG_DEV
, hfsmp
->hfs_raw_dev
,
1749 FSE_ARG_INO
, (ino64_t
)cp
->c_fileid
, // src inode #
1750 FSE_ARG_INO
, (ino64_t
)to_cp
->c_fileid
, // dst inode #
1751 FSE_ARG_INT32
, to_extinfo
->document_id
,
1754 hfs_unlockpair(cp
, to_cp
); // unlock this so we can send the fsevents
1757 if (need_fsevent(FSE_STAT_CHANGED
, vp
)) {
1758 add_fsevent(FSE_STAT_CHANGED
, context
, FSE_ARG_VNODE
, vp
, FSE_ARG_DONE
);
1760 if (need_fsevent(FSE_STAT_CHANGED
, to_vp
)) {
1761 add_fsevent(FSE_STAT_CHANGED
, context
, FSE_ARG_VNODE
, to_vp
, FSE_ARG_DONE
);
1766 hfs_unlockpair(cp
, to_cp
);
1778 case HFSIOC_PREV_LINK
:
1779 case HFSIOC_NEXT_LINK
:
1786 /* Caller must be owner of file system. */
1787 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1788 if (suser(cred
, NULL
) &&
1789 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1792 /* Target vnode must be file system's root. */
1793 if (!vnode_isvroot(vp
)) {
1796 linkfileid
= *(cnid_t
*)ap
->a_data
;
1797 if (linkfileid
< kHFSFirstUserCatalogNodeID
) {
1800 if ((error
= hfs_lookup_siblinglinks(hfsmp
, linkfileid
, &prevlinkid
, &nextlinkid
))) {
1803 if (ap
->a_command
== HFSIOC_NEXT_LINK
) {
1804 *(cnid_t
*)ap
->a_data
= nextlinkid
;
1806 *(cnid_t
*)ap
->a_data
= prevlinkid
;
1811 case HFSIOC_RESIZE_PROGRESS
: {
1813 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1814 if (suser(cred
, NULL
) &&
1815 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1816 return (EACCES
); /* must be owner of file system */
1818 if (!vnode_isvroot(vp
)) {
1821 /* file system must not be mounted read-only */
1822 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
1826 return hfs_resize_progress(hfsmp
, (u_int32_t
*)ap
->a_data
);
1829 case HFSIOC_RESIZE_VOLUME
: {
1834 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1835 if (suser(cred
, NULL
) &&
1836 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1837 return (EACCES
); /* must be owner of file system */
1839 if (!vnode_isvroot(vp
)) {
1843 /* filesystem must not be mounted read only */
1844 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
1847 newsize
= *(u_int64_t
*)ap
->a_data
;
1848 cursize
= (u_int64_t
)hfsmp
->totalBlocks
* (u_int64_t
)hfsmp
->blockSize
;
1850 if (newsize
== cursize
) {
1853 IOBSDMountChange(hfsmp
->hfs_mp
, kIOMountChangeWillResize
);
1854 if (newsize
> cursize
) {
1855 ret
= hfs_extendfs(hfsmp
, *(u_int64_t
*)ap
->a_data
, context
);
1857 ret
= hfs_truncatefs(hfsmp
, *(u_int64_t
*)ap
->a_data
, context
);
1859 IOBSDMountChange(hfsmp
->hfs_mp
, kIOMountChangeDidResize
);
1862 case HFSIOC_CHANGE_NEXT_ALLOCATION
: {
1863 int error
= 0; /* Assume success */
1866 if (vnode_vfsisrdonly(vp
)) {
1869 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1870 if (suser(cred
, NULL
) &&
1871 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1872 return (EACCES
); /* must be owner of file system */
1874 if (!vnode_isvroot(vp
)) {
1877 hfs_lock_mount(hfsmp
);
1878 location
= *(u_int32_t
*)ap
->a_data
;
1879 if ((location
>= hfsmp
->allocLimit
) &&
1880 (location
!= HFS_NO_UPDATE_NEXT_ALLOCATION
)) {
1882 goto fail_change_next_allocation
;
1884 /* Return previous value. */
1885 *(u_int32_t
*)ap
->a_data
= hfsmp
->nextAllocation
;
1886 if (location
== HFS_NO_UPDATE_NEXT_ALLOCATION
) {
1887 /* On magic value for location, set nextAllocation to next block
1888 * after metadata zone and set flag in mount structure to indicate
1889 * that nextAllocation should not be updated again.
1891 if (hfsmp
->hfs_metazone_end
!= 0) {
1892 HFS_UPDATE_NEXT_ALLOCATION(hfsmp
, hfsmp
->hfs_metazone_end
+ 1);
1894 hfsmp
->hfs_flags
|= HFS_SKIP_UPDATE_NEXT_ALLOCATION
;
1896 hfsmp
->hfs_flags
&= ~HFS_SKIP_UPDATE_NEXT_ALLOCATION
;
1897 HFS_UPDATE_NEXT_ALLOCATION(hfsmp
, location
);
1899 MarkVCBDirty(hfsmp
);
1900 fail_change_next_allocation
:
1901 hfs_unlock_mount(hfsmp
);
1906 case HFSIOC_SETBACKINGSTOREINFO
: {
1907 struct vnode
* di_vp
;
1908 struct hfs_backingstoreinfo
*bsdata
;
1911 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
1914 if (hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) {
1917 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1918 if (suser(cred
, NULL
) &&
1919 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1920 return (EACCES
); /* must be owner of file system */
1922 bsdata
= (struct hfs_backingstoreinfo
*)ap
->a_data
;
1923 if (bsdata
== NULL
) {
1926 if ((error
= file_vnode(bsdata
->backingfd
, &di_vp
))) {
1929 if ((error
= vnode_getwithref(di_vp
))) {
1930 file_drop(bsdata
->backingfd
);
1934 if (vnode_mount(vp
) == vnode_mount(di_vp
)) {
1935 (void)vnode_put(di_vp
);
1936 file_drop(bsdata
->backingfd
);
1940 // Dropped in unmount
1943 hfs_lock_mount(hfsmp
);
1944 hfsmp
->hfs_backingvp
= di_vp
;
1945 hfsmp
->hfs_flags
|= HFS_HAS_SPARSE_DEVICE
;
1946 hfsmp
->hfs_sparsebandblks
= bsdata
->bandsize
/ hfsmp
->blockSize
* 4;
1947 hfs_unlock_mount(hfsmp
);
1949 /* We check the MNTK_VIRTUALDEV bit instead of marking the dependent process */
1952 * If the sparse image is on a sparse image file (as opposed to a sparse
1953 * bundle), then we may need to limit the free space to the maximum size
1954 * of a file on that volume. So we query (using pathconf), and if we get
1955 * a meaningful result, we cache the number of blocks for later use in
1958 hfsmp
->hfs_backingfs_maxblocks
= 0;
1959 if (vnode_vtype(di_vp
) == VREG
) {
1962 terr
= vn_pathconf(di_vp
, _PC_FILESIZEBITS
, &hostbits
, context
);
1963 if (terr
== 0 && hostbits
!= 0 && hostbits
< 64) {
1964 u_int64_t hostfilesizemax
= ((u_int64_t
)1) << hostbits
;
1966 hfsmp
->hfs_backingfs_maxblocks
= hostfilesizemax
/ hfsmp
->blockSize
;
1970 /* The free extent cache is managed differently for sparse devices.
1971 * There is a window between which the volume is mounted and the
1972 * device is marked as sparse, so the free extent cache for this
1973 * volume is currently initialized as normal volume (sorted by block
1974 * count). Reset the cache so that it will be rebuilt again
1975 * for sparse device (sorted by start block).
1977 ResetVCBFreeExtCache(hfsmp
);
1979 (void)vnode_put(di_vp
);
1980 file_drop(bsdata
->backingfd
);
1984 case HFSIOC_CLRBACKINGSTOREINFO
: {
1985 struct vnode
* tmpvp
;
1987 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1988 if (suser(cred
, NULL
) &&
1989 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1990 return (EACCES
); /* must be owner of file system */
1992 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
1996 if ((hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) &&
1997 hfsmp
->hfs_backingvp
) {
1999 hfs_lock_mount(hfsmp
);
2000 hfsmp
->hfs_flags
&= ~HFS_HAS_SPARSE_DEVICE
;
2001 tmpvp
= hfsmp
->hfs_backingvp
;
2002 hfsmp
->hfs_backingvp
= NULLVP
;
2003 hfsmp
->hfs_sparsebandblks
= 0;
2004 hfs_unlock_mount(hfsmp
);
2010 #endif /* HFS_SPARSE_DEV */
2012 /* Change the next CNID stored in the VH */
2013 case HFSIOC_CHANGE_NEXTCNID
: {
2014 int error
= 0; /* Assume success */
2019 if (vnode_vfsisrdonly(vp
)) {
2022 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
2023 if (suser(cred
, NULL
) &&
2024 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
2025 return (EACCES
); /* must be owner of file system */
2028 fileid
= *(u_int32_t
*)ap
->a_data
;
2030 /* Must have catalog lock excl. to advance the CNID pointer */
2031 lockflags
= hfs_systemfile_lock (hfsmp
, SFL_CATALOG
, HFS_EXCLUSIVE_LOCK
);
2033 hfs_lock_mount(hfsmp
);
2035 /* If it is less than the current next CNID, force the wraparound bit to be set */
2036 if (fileid
< hfsmp
->vcbNxtCNID
) {
2040 /* Return previous value. */
2041 *(u_int32_t
*)ap
->a_data
= hfsmp
->vcbNxtCNID
;
2043 hfsmp
->vcbNxtCNID
= fileid
;
2046 hfsmp
->vcbAtrb
|= kHFSCatalogNodeIDsReusedMask
;
2049 MarkVCBDirty(hfsmp
);
2050 hfs_unlock_mount(hfsmp
);
2051 hfs_systemfile_unlock (hfsmp
, lockflags
);
2059 mp
= vnode_mount(vp
);
2060 hfsmp
= VFSTOHFS(mp
);
2065 vfsp
= vfs_statfs(mp
);
2067 if (kauth_cred_getuid(cred
) != vfsp
->f_owner
&&
2068 !kauth_cred_issuser(cred
))
2071 return hfs_freeze(hfsmp
);
2075 vfsp
= vfs_statfs(vnode_mount(vp
));
2076 if (kauth_cred_getuid(cred
) != vfsp
->f_owner
&&
2077 !kauth_cred_issuser(cred
))
2080 return hfs_thaw(hfsmp
, current_proc());
2083 case HFSIOC_EXT_BULKACCESS32
:
2084 case HFSIOC_EXT_BULKACCESS64
: {
2087 if (hfsmp
->hfs_flags
& HFS_STANDARD
) {
2093 size
= sizeof(struct user64_ext_access_t
);
2095 size
= sizeof(struct user32_ext_access_t
);
2098 return do_bulk_access_check(hfsmp
, vp
, ap
, size
, context
);
2101 case HFSIOC_SET_XATTREXTENTS_STATE
: {
2104 if (ap
->a_data
== NULL
) {
2108 state
= *(int *)ap
->a_data
;
2110 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2114 /* Super-user can enable or disable extent-based extended
2115 * attribute support on a volume
2116 * Note: Starting Mac OS X 10.7, extent-based extended attributes
2117 * are enabled by default, so any change will be transient only
2118 * till the volume is remounted.
2120 if (!kauth_cred_issuser(kauth_cred_get())) {
2123 if (state
== 0 || state
== 1)
2124 return hfs_set_volxattr(hfsmp
, HFSIOC_SET_XATTREXTENTS_STATE
, state
);
2129 case F_SETSTATICCONTENT
: {
2131 int enable_static
= 0;
2132 struct cnode
*cp
= NULL
;
2134 * lock the cnode, decorate the cnode flag, and bail out.
2135 * VFS should have already authenticated the caller for us.
2140 * Note that even though ap->a_data is of type caddr_t,
2141 * the fcntl layer at the syscall handler will pass in NULL
2142 * or 1 depending on what the argument supplied to the fcntl
2143 * was. So it is in fact correct to check the ap->a_data
2144 * argument for zero or non-zero value when deciding whether or not
2145 * to enable the static bit in the cnode.
2149 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2154 error
= hfs_lock (cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2156 if (enable_static
) {
2157 cp
->c_flag
|= C_SSD_STATIC
;
2160 cp
->c_flag
&= ~C_SSD_STATIC
;
2167 case F_SET_GREEDY_MODE
: {
2169 int enable_greedy_mode
= 0;
2170 struct cnode
*cp
= NULL
;
2172 * lock the cnode, decorate the cnode flag, and bail out.
2173 * VFS should have already authenticated the caller for us.
2178 * Note that even though ap->a_data is of type caddr_t,
2179 * the fcntl layer at the syscall handler will pass in NULL
2180 * or 1 depending on what the argument supplied to the fcntl
2181 * was. So it is in fact correct to check the ap->a_data
2182 * argument for zero or non-zero value when deciding whether or not
2183 * to enable the greedy mode bit in the cnode.
2185 enable_greedy_mode
= 1;
2187 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2192 error
= hfs_lock (cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2194 if (enable_greedy_mode
) {
2195 cp
->c_flag
|= C_SSD_GREEDY_MODE
;
2198 cp
->c_flag
&= ~C_SSD_GREEDY_MODE
;
2207 uint32_t iotypeflag
= 0;
2209 struct cnode
*cp
= NULL
;
2211 * lock the cnode, decorate the cnode flag, and bail out.
2212 * VFS should have already authenticated the caller for us.
2215 if (ap
->a_data
== NULL
) {
2220 * Note that even though ap->a_data is of type caddr_t, we
2221 * can only use 32 bits of flag values.
2223 iotypeflag
= (uint32_t) ap
->a_data
;
2224 switch (iotypeflag
) {
2225 case F_IOTYPE_ISOCHRONOUS
:
2232 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2237 error
= hfs_lock (cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2239 switch (iotypeflag
) {
2240 case F_IOTYPE_ISOCHRONOUS
:
2241 cp
->c_flag
|= C_IO_ISOCHRONOUS
;
2251 case F_MAKECOMPRESSED
: {
2253 uint32_t gen_counter
;
2254 struct cnode
*cp
= NULL
;
2255 int reset_decmp
= 0;
2257 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2262 * acquire & lock the cnode.
2263 * VFS should have already authenticated the caller for us.
2268 * Cast the pointer into a uint32_t so we can extract the
2269 * supplied generation counter.
2271 gen_counter
= *((uint32_t*)ap
->a_data
);
2279 /* Grab truncate lock first; we may truncate the file */
2280 hfs_lock_truncate (cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2282 error
= hfs_lock (cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2284 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
2288 /* Are there any other usecounts/FDs? */
2289 if (vnode_isinuse(vp
, 1)) {
2291 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
2295 /* now we have the cnode locked down; Validate arguments */
2296 if (cp
->c_attr
.ca_flags
& (UF_IMMUTABLE
| UF_COMPRESSED
)) {
2297 /* EINVAL if you are trying to manipulate an IMMUTABLE file */
2299 hfs_unlock_truncate (cp
, HFS_LOCK_DEFAULT
);
2303 if ((hfs_get_gencount (cp
)) == gen_counter
) {
2305 * OK, the gen_counter matched. Go for it:
2306 * Toggle state bits, truncate file, and suppress mtime update
2309 cp
->c_bsdflags
|= UF_COMPRESSED
;
2311 error
= hfs_truncate(vp
, 0, IO_NDELAY
, HFS_TRUNCATE_SKIPTIMES
,
2318 /* Unlock cnode before executing decmpfs ; they may need to get an EA */
2322 * Reset the decmp state while still holding the truncate lock. We need to
2323 * serialize here against a listxattr on this node which may occur at any
2326 * Even if '0/skiplock' is passed in 2nd argument to hfs_file_is_compressed,
2327 * that will still potentially require getting the com.apple.decmpfs EA. If the
2328 * EA is required, then we can't hold the cnode lock, because the getxattr call is
2329 * generic(through VFS), and can't pass along any info telling it that we're already
2330 * holding it (the lock). If we don't serialize, then we risk listxattr stopping
2331 * and trying to fill in the hfs_file_is_compressed info during the callback
2332 * operation, which will result in deadlock against the b-tree node.
2334 * So, to serialize against listxattr (which will grab buf_t meta references on
2335 * the b-tree blocks), we hold the truncate lock as we're manipulating the
2338 if ((reset_decmp
) && (error
== 0)) {
2339 decmpfs_cnode
*dp
= VTOCMP (vp
);
2341 decmpfs_cnode_set_vnode_state(dp
, FILE_TYPE_UNKNOWN
, 0);
2344 /* Initialize the decmpfs node as needed */
2345 (void) hfs_file_is_compressed (cp
, 0); /* ok to take lock */
2348 hfs_unlock_truncate (cp
, HFS_LOCK_DEFAULT
);
2354 case F_SETBACKINGSTORE
: {
2359 * See comment in F_SETSTATICCONTENT re: using
2360 * a null check for a_data
2363 error
= hfs_set_backingstore (vp
, 1);
2366 error
= hfs_set_backingstore (vp
, 0);
2372 case F_GETPATH_MTMINFO
: {
2375 int *data
= (int*) ap
->a_data
;
2377 /* Ask if this is a backingstore vnode */
2378 error
= hfs_is_backingstore (vp
, data
);
2386 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2389 error
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2391 error
= hfs_fsync(vp
, MNT_WAIT
, HFS_FSYNC_FULL
, p
);
2392 hfs_unlock(VTOC(vp
));
2398 case F_BARRIERFSYNC
: {
2401 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2404 error
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2406 error
= hfs_fsync(vp
, MNT_WAIT
, HFS_FSYNC_BARRIER
, p
);
2407 hfs_unlock(VTOC(vp
));
2414 register struct cnode
*cp
;
2417 if (!vnode_isreg(vp
))
2420 error
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2424 * used by regression test to determine if
2425 * all the dirty pages (via write) have been cleaned
2426 * after a call to 'fsysnc'.
2428 error
= is_file_clean(vp
, VTOF(vp
)->ff_size
);
2435 register struct radvisory
*ra
;
2436 struct filefork
*fp
;
2439 if (!vnode_isreg(vp
))
2442 ra
= (struct radvisory
*)(ap
->a_data
);
2445 /* Protect against a size change. */
2446 hfs_lock_truncate(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2450 if (uncompressed_size
== -1) {
2451 /* fetching the uncompressed size failed above, so return the error */
2452 error
= decmpfs_error
;
2453 } else if (ra
->ra_offset
>= uncompressed_size
) {
2456 error
= advisory_read(vp
, uncompressed_size
, ra
->ra_offset
, ra
->ra_count
);
2459 #endif /* HFS_COMPRESSION */
2460 if (ra
->ra_offset
>= fp
->ff_size
) {
2463 error
= advisory_read(vp
, fp
->ff_size
, ra
->ra_offset
, ra
->ra_count
);
2466 hfs_unlock_truncate(VTOC(vp
), HFS_LOCK_DEFAULT
);
2470 case HFSIOC_GET_VOL_CREATE_TIME_32
: {
2471 *(user32_time_t
*)(ap
->a_data
) = (user32_time_t
) (to_bsd_time(VTOVCB(vp
)->localCreateDate
));
2475 case HFSIOC_GET_VOL_CREATE_TIME_64
: {
2476 *(user64_time_t
*)(ap
->a_data
) = (user64_time_t
) (to_bsd_time(VTOVCB(vp
)->localCreateDate
));
2480 case SPOTLIGHT_IOC_GET_MOUNT_TIME
:
2481 *(uint32_t *)ap
->a_data
= hfsmp
->hfs_mount_time
;
2484 case SPOTLIGHT_IOC_GET_LAST_MTIME
:
2485 *(uint32_t *)ap
->a_data
= hfsmp
->hfs_last_mounted_mtime
;
2488 case HFSIOC_GET_VERY_LOW_DISK
:
2489 *(uint32_t*)ap
->a_data
= hfsmp
->hfs_freespace_notify_dangerlimit
;
2492 case HFSIOC_SET_VERY_LOW_DISK
:
2493 if (*(uint32_t *)ap
->a_data
>= hfsmp
->hfs_freespace_notify_warninglimit
) {
2497 hfsmp
->hfs_freespace_notify_dangerlimit
= *(uint32_t *)ap
->a_data
;
2500 case HFSIOC_GET_LOW_DISK
:
2501 *(uint32_t*)ap
->a_data
= hfsmp
->hfs_freespace_notify_warninglimit
;
2504 case HFSIOC_SET_LOW_DISK
:
2505 if ( *(uint32_t *)ap
->a_data
>= hfsmp
->hfs_freespace_notify_desiredlevel
2506 || *(uint32_t *)ap
->a_data
<= hfsmp
->hfs_freespace_notify_dangerlimit
) {
2511 hfsmp
->hfs_freespace_notify_warninglimit
= *(uint32_t *)ap
->a_data
;
2514 /* The following two fsctls were ported from apfs. */
2515 case APFSIOC_GET_NEAR_LOW_DISK
:
2516 *(uint32_t*)ap
->a_data
= hfsmp
->hfs_freespace_notify_nearwarninglimit
;
2519 case APFSIOC_SET_NEAR_LOW_DISK
:
2520 if ( *(uint32_t *)ap
->a_data
>= hfsmp
->hfs_freespace_notify_desiredlevel
2521 || *(uint32_t *)ap
->a_data
<= hfsmp
->hfs_freespace_notify_warninglimit
) {
2525 hfsmp
->hfs_freespace_notify_nearwarninglimit
= *(uint32_t *)ap
->a_data
;
2528 case HFSIOC_GET_DESIRED_DISK
:
2529 *(uint32_t*)ap
->a_data
= hfsmp
->hfs_freespace_notify_desiredlevel
;
2532 case HFSIOC_SET_DESIRED_DISK
:
2533 if (*(uint32_t *)ap
->a_data
<= hfsmp
->hfs_freespace_notify_warninglimit
) {
2537 hfsmp
->hfs_freespace_notify_desiredlevel
= *(uint32_t *)ap
->a_data
;
2540 case HFSIOC_VOLUME_STATUS
:
2541 *(uint32_t *)ap
->a_data
= hfsmp
->hfs_notification_conditions
;
2544 case HFS_SET_BOOT_INFO
:
2545 if (!vnode_isvroot(vp
))
2547 if (!kauth_cred_issuser(cred
) && (kauth_cred_getuid(cred
) != vfs_statfs(HFSTOVFS(hfsmp
))->f_owner
))
2548 return(EACCES
); /* must be superuser or owner of filesystem */
2549 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2552 hfs_lock_mount (hfsmp
);
2553 bcopy(ap
->a_data
, &hfsmp
->vcbFndrInfo
, sizeof(hfsmp
->vcbFndrInfo
));
2554 /* Null out the cached UUID, to be safe */
2555 uuid_clear (hfsmp
->hfs_full_uuid
);
2556 hfs_unlock_mount (hfsmp
);
2557 (void) hfs_flushvolumeheader(hfsmp
, HFS_FVH_WAIT
);
2560 case HFS_GET_BOOT_INFO
:
2561 if (!vnode_isvroot(vp
))
2563 hfs_lock_mount (hfsmp
);
2564 bcopy(&hfsmp
->vcbFndrInfo
, ap
->a_data
, sizeof(hfsmp
->vcbFndrInfo
));
2565 hfs_unlock_mount(hfsmp
);
2568 /* case HFS_MARK_BOOT_CORRUPT: _IO are the same */
2569 case HFSIOC_MARK_BOOT_CORRUPT
:
2570 /* Mark the boot volume corrupt by setting
2571 * kHFSVolumeInconsistentBit in the volume header. This will
2572 * force fsck_hfs on next mount.
2574 if (!kauth_cred_issuser(kauth_cred_get())) {
2578 /* Allowed only on the root vnode of the boot volume */
2579 if (!(vfs_flags(HFSTOVFS(hfsmp
)) & MNT_ROOTFS
) ||
2580 !vnode_isvroot(vp
)) {
2583 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2586 printf ("hfs_vnop_ioctl: Marking the boot volume corrupt.\n");
2587 hfs_mark_inconsistent(hfsmp
, HFS_FSCK_FORCED
);
2590 case HFSIOC_GET_JOURNAL_INFO
:
2591 jip
= (struct hfs_journal_info
*)ap
->a_data
;
2596 if (hfsmp
->jnl
== NULL
) {
2600 jnl_start
= hfs_blk_to_bytes(hfsmp
->jnl_start
, hfsmp
->blockSize
) + hfsmp
->hfsPlusIOPosOffset
;
2601 jnl_size
= hfsmp
->jnl_size
;
2604 jip
->jstart
= jnl_start
;
2605 jip
->jsize
= jnl_size
;
2608 case HFSIOC_SET_ALWAYS_ZEROFILL
: {
2609 struct cnode
*cp
= VTOC(vp
);
2611 if (*(int *)ap
->a_data
) {
2612 cp
->c_flag
|= C_ALWAYS_ZEROFILL
;
2614 cp
->c_flag
&= ~C_ALWAYS_ZEROFILL
;
2619 /* case HFS_DISABLE_METAZONE: _IO are the same */
2620 case HFSIOC_DISABLE_METAZONE
: {
2621 /* Only root can disable metadata zone */
2622 if (!kauth_cred_issuser(kauth_cred_get())) {
2625 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2629 /* Disable metadata zone now */
2630 (void) hfs_metadatazone_init(hfsmp
, true);
2631 printf ("hfs: Disabling metadata zone on %s\n", hfsmp
->vcbVN
);
2636 case HFSIOC_FSINFO_METADATA_BLOCKS
: {
2638 struct hfsinfo_metadata
*hinfo
;
2640 hinfo
= (struct hfsinfo_metadata
*)ap
->a_data
;
2642 /* Get information about number of metadata blocks */
2643 error
= hfs_getinfo_metadata_blocks(hfsmp
, hinfo
);
2651 case HFSIOC_GET_FSINFO
: {
2652 hfs_fsinfo
*fsinfo
= (hfs_fsinfo
*)ap
->a_data
;
2654 /* Only root is allowed to get fsinfo */
2655 if (!kauth_cred_issuser(kauth_cred_get())) {
2660 * Make sure that the caller's version number matches with
2661 * the kernel's version number. This will make sure that
2662 * if the structures being read/written into are changed
2663 * by the kernel, the caller will not read incorrect data.
2665 * The first three fields --- request_type, version and
2666 * flags are same for all the hfs_fsinfo structures, so
2667 * we can access the version number by assuming any
2668 * structure for now.
2670 if (fsinfo
->header
.version
!= HFS_FSINFO_VERSION
) {
2674 /* Make sure that the current file system is not marked inconsistent */
2675 if (hfsmp
->vcbAtrb
& kHFSVolumeInconsistentMask
) {
2679 return hfs_get_fsinfo(hfsmp
, ap
->a_data
);
2682 case HFSIOC_CS_FREESPACE_TRIM
: {
2686 /* Only root allowed */
2687 if (!kauth_cred_issuser(kauth_cred_get())) {
2692 * This core functionality is similar to hfs_scan_blocks().
2693 * The main difference is that hfs_scan_blocks() is called
2694 * as part of mount where we are assured that the journal is
2695 * empty to start with. This fcntl() can be called on a
2696 * mounted volume, therefore it has to flush the content of
2697 * the journal as well as ensure the state of summary table.
2699 * This fcntl scans over the entire allocation bitmap,
2700 * creates list of all the free blocks, and issues TRIM
2701 * down to the underlying device. This can take long time
2702 * as it can generate up to 512MB of read I/O.
2705 if ((hfsmp
->hfs_flags
& HFS_SUMMARY_TABLE
) == 0) {
2706 error
= hfs_init_summary(hfsmp
);
2708 printf("hfs: fsctl() could not initialize summary table for %s\n", hfsmp
->vcbVN
);
2714 * The journal maintains list of recently deallocated blocks to
2715 * issue DKIOCUNMAPs when the corresponding journal transaction is
2716 * flushed to the disk. To avoid any race conditions, we only
2717 * want one active trim list and only one thread issuing DKIOCUNMAPs.
2718 * Therefore we make sure that the journal trim list is sync'ed,
2719 * empty, and not modifiable for the duration of our scan.
2721 * Take the journal lock before flushing the journal to the disk.
2722 * We will keep on holding the journal lock till we don't get the
2723 * bitmap lock to make sure that no new journal transactions can
2724 * start. This will make sure that the journal trim list is not
2725 * modified after the journal flush and before getting bitmap lock.
2726 * We can release the journal lock after we acquire the bitmap
2727 * lock as it will prevent any further block deallocations.
2729 hfs_journal_lock(hfsmp
);
2731 /* Flush the journal and wait for all I/Os to finish up */
2732 error
= hfs_flush(hfsmp
, HFS_FLUSH_JOURNAL_META
);
2734 hfs_journal_unlock(hfsmp
);
2738 /* Take bitmap lock to ensure it is not being modified */
2739 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_BITMAP
, HFS_EXCLUSIVE_LOCK
);
2741 /* Release the journal lock */
2742 hfs_journal_unlock(hfsmp
);
2745 * ScanUnmapBlocks reads the bitmap in large block size
2746 * (up to 1MB) unlike the runtime which reads the bitmap
2747 * in the 4K block size. This can cause buf_t collisions
2748 * and potential data corruption. To avoid this, we
2749 * invalidate all the existing buffers associated with
2750 * the bitmap vnode before scanning it.
2752 * Note: ScanUnmapBlock() cleans up all the buffers
2753 * after itself, so there won't be any large buffers left
2754 * for us to clean up after it returns.
2756 error
= buf_invalidateblks(hfsmp
->hfs_allocation_vp
, 0, 0, 0);
2758 hfs_systemfile_unlock(hfsmp
, lockflags
);
2762 /* Traverse bitmap and issue DKIOCUNMAPs */
2763 error
= ScanUnmapBlocks(hfsmp
);
2764 hfs_systemfile_unlock(hfsmp
, lockflags
);
2772 case HFSIOC_SET_HOTFILE_STATE
: {
2774 struct cnode
*cp
= VTOC(vp
);
2775 uint32_t hf_state
= *((uint32_t*)ap
->a_data
);
2776 uint32_t num_unpinned
= 0;
2778 error
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2783 // printf("hfs: setting hotfile state %d on %s\n", hf_state, vp->v_name);
2784 if (hf_state
== HFS_MARK_FASTDEVCANDIDATE
) {
2785 vnode_setfastdevicecandidate(vp
);
2787 cp
->c_attr
.ca_recflags
|= kHFSFastDevCandidateMask
;
2788 cp
->c_attr
.ca_recflags
&= ~kHFSDoNotFastDevPinMask
;
2789 cp
->c_flag
|= C_MODIFIED
;
2790 } else if (hf_state
== HFS_UNMARK_FASTDEVCANDIDATE
|| hf_state
== HFS_NEVER_FASTDEVCANDIDATE
) {
2791 vnode_clearfastdevicecandidate(vp
);
2792 hfs_removehotfile(vp
);
2794 if (cp
->c_attr
.ca_recflags
& kHFSFastDevPinnedMask
) {
2795 hfs_pin_vnode(hfsmp
, vp
, HFS_UNPIN_IT
, &num_unpinned
);
2798 if (hf_state
== HFS_NEVER_FASTDEVCANDIDATE
) {
2799 cp
->c_attr
.ca_recflags
|= kHFSDoNotFastDevPinMask
;
2801 cp
->c_attr
.ca_recflags
&= ~(kHFSFastDevCandidateMask
|kHFSFastDevPinnedMask
);
2802 cp
->c_flag
|= C_MODIFIED
;
2808 if (num_unpinned
!= 0) {
2809 lck_mtx_lock(&hfsmp
->hfc_mutex
);
2810 hfsmp
->hfs_hotfile_freeblks
+= num_unpinned
;
2811 lck_mtx_unlock(&hfsmp
->hfc_mutex
);
2818 case HFSIOC_REPIN_HOTFILE_STATE
: {
2820 uint32_t repin_what
= *((uint32_t*)ap
->a_data
);
2822 /* Only root allowed */
2823 if (!kauth_cred_issuser(kauth_cred_get())) {
2827 if (!(hfsmp
->hfs_flags
& (HFS_CS_METADATA_PIN
| HFS_CS_HOTFILE_PIN
))) {
2828 // this system is neither regular Fusion or Cooperative Fusion
2829 // so this fsctl makes no sense.
2834 // After a converting a CoreStorage volume to be encrypted, the
2835 // extents could have moved around underneath us. This call
2836 // allows corestoraged to re-pin everything that should be
2837 // pinned (it would happen on the next reboot too but that could
2838 // be a long time away).
2840 if ((repin_what
& HFS_REPIN_METADATA
) && (hfsmp
->hfs_flags
& HFS_CS_METADATA_PIN
)) {
2841 hfs_pin_fs_metadata(hfsmp
);
2843 if ((repin_what
& HFS_REPIN_USERDATA
) && (hfsmp
->hfs_flags
& HFS_CS_HOTFILE_PIN
)) {
2844 hfs_repin_hotfiles(hfsmp
);
2846 if ((repin_what
& HFS_REPIN_USERDATA
) && (hfsmp
->hfs_flags
& HFS_CS_SWAPFILE_PIN
)) {
2847 //XXX Swapfiles (marked SWAP_PINNED) may have moved too.
2848 //XXX Do we care? They have a more transient/dynamic nature/lifetime.
2854 #if HFS_CONFIG_KEY_ROLL
2856 case HFSIOC_KEY_ROLL
: {
2857 if (!kauth_cred_issuser(kauth_cred_get()))
2860 hfs_key_roll_args_t
*args
= (hfs_key_roll_args_t
*)ap
->a_data
;
2862 return hfs_key_roll_op(ap
->a_context
, ap
->a_vp
, args
);
2865 case HFSIOC_GET_KEY_AUTO_ROLL
: {
2866 if (!kauth_cred_issuser(kauth_cred_get()))
2869 hfs_key_auto_roll_args_t
*args
= (hfs_key_auto_roll_args_t
*)ap
->a_data
;
2870 if (args
->api_version
!= HFS_KEY_AUTO_ROLL_API_VERSION_1
)
2872 args
->flags
= (ISSET(hfsmp
->cproot_flags
, CP_ROOT_AUTO_ROLL_OLD_CLASS_GENERATION
)
2873 ? HFS_KEY_AUTO_ROLL_OLD_CLASS_GENERATION
: 0);
2874 args
->min_key_os_version
= hfsmp
->hfs_auto_roll_min_key_os_version
;
2875 args
->max_key_os_version
= hfsmp
->hfs_auto_roll_max_key_os_version
;
2879 case HFSIOC_SET_KEY_AUTO_ROLL
: {
2880 if (!kauth_cred_issuser(kauth_cred_get()))
2883 hfs_key_auto_roll_args_t
*args
= (hfs_key_auto_roll_args_t
*)ap
->a_data
;
2884 if (args
->api_version
!= HFS_KEY_AUTO_ROLL_API_VERSION_1
)
2886 return cp_set_auto_roll(hfsmp
, args
);
2889 #endif // HFS_CONFIG_KEY_ROLL
2892 case F_TRANSCODEKEY
:
2894 * This API is only supported when called via kernel so
2895 * a_fflag must be set to 1 (it's not possible to get here
2896 * with it set to 1 via fsctl).
2898 if (ap
->a_fflag
!= 1)
2900 return cp_vnode_transcode(vp
, (cp_key_t
*)ap
->a_data
);
2902 case F_GETPROTECTIONLEVEL
:
2903 return cp_get_root_major_vers (vp
, (uint32_t *)ap
->a_data
);
2905 case F_GETDEFAULTPROTLEVEL
:
2906 return cp_get_default_level(vp
, (uint32_t *)ap
->a_data
);
2907 #endif // CONFIG_PROTECT
2910 return hfs_pin_vnode(hfsmp
, vp
, HFS_PIN_IT
| HFS_DATALESS_PIN
,
2913 case FSIOC_CAS_BSDFLAGS
: {
2914 struct fsioc_cas_bsdflags
*cas
= (void *)ap
->a_data
;
2915 struct cnode
*cp
= VTOC(vp
);
2916 u_int32_t document_id
= 0;
2917 bool need_truncate
= false;
2918 int decmpfs_reset_state
= 0;
2921 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2925 /* Don't allow modification of the journal. */
2926 if (hfs_is_journal_file(hfsmp
, cp
)) {
2930 // Check if we need to set UF_COMPRESSED.
2931 // If so, ask decmpfs if we're allowed to (and if so, if we need to truncate
2932 // the data fork to 0).
2933 if (!(cas
->expected_flags
& UF_COMPRESSED
) && (cas
->new_flags
& UF_COMPRESSED
)) {
2934 struct vnode_attr vap
;
2936 VATTR_SET(&vap
, va_flags
, cas
->new_flags
);
2938 error
= decmpfs_update_attributes(vp
, &vap
);
2943 // Similar to hfs_vnop_setattr(), we call decmpfs_update_attributes()
2944 // as it is the ultimate arbiter of whether or not UF_COMPRESSED can be set.
2945 // (If the decmpfs xattr is not present or invalid, for example,
2946 // UF_COMPRESSED should *not* be set.)
2947 // It will also tell us if we need to truncate the data fork to 0.
2948 if (!(vap
.va_flags
& UF_COMPRESSED
)) {
2949 // The request to update UF_COMPRESSED is denied.
2950 // (Note that decmpfs_update_attributes() won't touch va_active
2951 // in this case.) Error out.
2955 if (VATTR_IS_ACTIVE(&vap
, va_data_size
) && (vap
.va_data_size
== 0)) {
2956 // We must also truncate this file's data fork to 0.
2957 need_truncate
= true;
2961 if ((error
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
))) {
2965 cas
->actual_flags
= cp
->c_bsdflags
;
2966 if (cas
->actual_flags
!= cas
->expected_flags
) {
2972 // Check if we'll need a document_id. If so, we need to drop the lock
2973 // (to avoid any possible deadlock with the root vnode which has to get
2974 // locked to get the document id), generate the document_id, re-acquire
2975 // the lock, and perform the CAS check again. We do it in this sequence
2976 // in order to avoid throwing away document_ids in the case where the
2977 // CAS check fails. Note that it can still happen, but by performing
2978 // the check first, hopefully we can reduce the ocurrence.
2980 if ((cas
->new_flags
& UF_TRACKED
) && !(VTOC(vp
)->c_bsdflags
& UF_TRACKED
)) {
2981 struct FndrExtendedDirInfo
*fip
= (struct FndrExtendedDirInfo
*)((char *)&(VTOC(vp
)->c_attr
.ca_finderinfo
) + 16);
2983 // If the document_id is not set, get a new one. It will be set
2984 // on the file down below once we hold the cnode lock.
2986 if (fip
->document_id
== 0) {
2988 // Drat, we have to generate one. Unlock the cnode, do the
2989 // deed, re-lock the cnode, and then to the CAS check again
2990 // to see if we lost the race.
2993 if (hfs_generate_document_id(hfsmp
, &document_id
) != 0) {
2996 if ((error
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
))) {
2999 cas
->actual_flags
= cp
->c_bsdflags
;
3000 if (cas
->actual_flags
!= cas
->expected_flags
) {
3007 // Attempt to truncate our data fork to 0 length, if necessary.
3008 if (need_truncate
&& (VTOF(vp
)->ff_size
)) {
3009 hfs_lock_truncate(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
3010 // hfs_truncate will deal with the cnode lock
3011 error
= hfs_truncate(vp
, 0, IO_NDELAY
, 0, ap
->a_context
);
3012 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
3016 error
= hfs_set_bsd_flags(hfsmp
, cp
, cas
->new_flags
,
3017 document_id
, ap
->a_context
,
3018 &decmpfs_reset_state
);
3020 error
= hfs_update(vp
, 0);
3028 if (decmpfs_reset_state
) {
3030 * we've changed the UF_COMPRESSED flag, so reset the decmpfs state for this cnode
3031 * but don't do it while holding the hfs cnode lock
3033 decmpfs_cnode
*dp
= VTOCMP(vp
);
3036 * call hfs_lazy_init_decmpfs_cnode() to make sure that the decmpfs_cnode
3037 * is filled in; we need a decmpfs_cnode to prevent decmpfs state changes
3038 * on this file if it's locked
3040 dp
= hfs_lazy_init_decmpfs_cnode(VTOC(vp
));
3042 /* failed to allocate a decmpfs_cnode */
3043 return ENOMEM
; /* what should this be? */
3046 decmpfs_cnode_set_vnode_state(dp
, FILE_TYPE_UNKNOWN
, 0);
3049 break; // return 0 below
3063 hfs_vnop_select(__unused
struct vnop_select_args
*ap
)
3065 struct vnop_select_args {
3070 vfs_context_t a_context;
3075 * We should really check to see if I/O is possible.
3081 * Converts a logical block number to a physical block, and optionally returns
3082 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
3083 * The physical block number is based on the device block size, currently its 512.
3084 * The block run is returned in logical blocks, and is the REMAINING amount of blocks
3087 hfs_bmap(struct vnode
*vp
, daddr_t bn
, struct vnode
**vpp
, daddr64_t
*bnp
, unsigned int *runp
)
3089 struct filefork
*fp
= VTOF(vp
);
3090 struct hfsmount
*hfsmp
= VTOHFS(vp
);
3091 int retval
= E_NONE
;
3092 u_int32_t logBlockSize
;
3093 size_t bytesContAvail
= 0;
3094 off_t blockposition
;
3099 * Check for underlying vnode requests and ensure that logical
3100 * to physical mapping is requested.
3103 *vpp
= hfsmp
->hfs_devvp
;
3107 logBlockSize
= GetLogicalBlockSize(vp
);
3108 blockposition
= (off_t
)bn
* logBlockSize
;
3110 lockExtBtree
= overflow_extents(fp
);
3113 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_EXTENTS
, HFS_EXCLUSIVE_LOCK
);
3115 retval
= MacToVFSError(
3116 MapFileBlockC (HFSTOVCB(hfsmp
),
3124 hfs_systemfile_unlock(hfsmp
, lockflags
);
3126 if (retval
== E_NONE
) {
3127 /* Figure out how many read ahead blocks there are */
3129 if (can_cluster(logBlockSize
)) {
3130 /* Make sure this result never goes negative: */
3131 *runp
= (bytesContAvail
< logBlockSize
) ? 0 : (bytesContAvail
/ logBlockSize
) - 1;
3141 * Convert logical block number to file offset.
3144 hfs_vnop_blktooff(struct vnop_blktooff_args
*ap
)
3146 struct vnop_blktooff_args {
3153 if (ap
->a_vp
== NULL
)
3155 *ap
->a_offset
= (off_t
)ap
->a_lblkno
* (off_t
)GetLogicalBlockSize(ap
->a_vp
);
3161 * Convert file offset to logical block number.
3164 hfs_vnop_offtoblk(struct vnop_offtoblk_args
*ap
)
3166 struct vnop_offtoblk_args {
3169 daddr64_t *a_lblkno;
3173 if (ap
->a_vp
== NULL
)
3175 *ap
->a_lblkno
= (daddr64_t
)(ap
->a_offset
/ (off_t
)GetLogicalBlockSize(ap
->a_vp
));
3181 * Map file offset to physical block number.
3183 * If this function is called for write operation, and if the file
3184 * had virtual blocks allocated (delayed allocation), real blocks
3185 * are allocated by calling ExtendFileC().
3187 * If this function is called for read operation, and if the file
3188 * had virtual blocks allocated (delayed allocation), no change
3189 * to the size of file is done, and if required, rangelist is
3190 * searched for mapping.
3192 * System file cnodes are expected to be locked (shared or exclusive).
3194 * -- INVALID RANGES --
3196 * Invalid ranges are used to keep track of where we have extended a
3197 * file, but have not yet written that data to disk. In the past we
3198 * would clear up the invalid ranges as we wrote to those areas, but
3199 * before data was actually flushed to disk. The problem with that
3200 * approach is that the data can be left in the cache and is therefore
3201 * still not valid on disk. So now we clear up the ranges here, when
3202 * the flags field has VNODE_WRITE set, indicating a write is about to
3203 * occur. This isn't ideal (ideally we want to clear them up when
3204 * know the data has been successfully written), but it's the best we
3207 * For reads, we use the invalid ranges here in block map to indicate
3208 * to the caller that the data should be zeroed (a_bpn == -1). We
3209 * have to be careful about what ranges we return to the cluster code.
3210 * Currently the cluster code can only handle non-rounded values for
3211 * the EOF; it cannot handle funny sized ranges in the middle of the
3212 * file (the main problem is that it sends down odd sized I/Os to the
3213 * disk). Our code currently works because whilst the very first
3214 * offset and the last offset in the invalid ranges are not aligned,
3215 * gaps in the invalid ranges between the first and last, have to be
3216 * aligned (because we always write page sized blocks). For example,
3217 * consider this arrangement:
3219 * +-------------+-----+-------+------+
3220 * | |XXXXX| |XXXXXX|
3221 * +-------------+-----+-------+------+
3224 * This shows two invalid ranges <a, b> and <c, d>. Whilst a and d
3225 * are not necessarily aligned, b and c *must* be.
3227 * Zero-filling occurs in a number of ways:
3229 * 1. When a read occurs and we return with a_bpn == -1.
3231 * 2. When hfs_fsync or hfs_filedone calls hfs_flush_invalid_ranges
3232 * which will cause us to iterate over the ranges bringing in
3233 * pages that are not present in the cache and zeroing them. Any
3234 * pages that are already in the cache are left untouched. Note
3235 * that hfs_fsync does not always flush invalid ranges.
3237 * 3. When we extend a file we zero out from the old EOF to the end
3238 * of the page. It would be nice if we didn't have to do this if
3239 * the page wasn't present (and could defer it), but because of
3240 * the problem described above, we have to.
3242 * The invalid ranges are also used to restrict the size that we write
3243 * out on disk: see hfs_prepare_fork_for_update.
3245 * Note that invalid ranges are ignored when neither the VNODE_READ or
3246 * the VNODE_WRITE flag is specified. This is useful for the
3247 * F_LOG2PHYS* fcntls which are not interested in invalid ranges: they
3248 * just want to know whether blocks are physically allocated or not.
3251 hfs_vnop_blockmap(struct vnop_blockmap_args
*ap
)
3253 struct vnop_blockmap_args {
3261 vfs_context_t a_context;
3265 struct vnode
*vp
= ap
->a_vp
;
3267 struct filefork
*fp
;
3268 struct hfsmount
*hfsmp
;
3269 size_t bytesContAvail
= ap
->a_size
;
3270 int retval
= E_NONE
;
3273 struct rl_entry
*invalid_range
;
3274 enum rl_overlaptype overlaptype
;
3279 if (VNODE_IS_RSRC(vp
)) {
3280 /* allow blockmaps to the resource fork */
3282 if ( hfs_file_is_compressed(VTOC(vp
), 1) ) { /* 1 == don't take the cnode lock */
3283 int state
= decmpfs_cnode_get_vnode_state(VTOCMP(vp
));
3285 case FILE_IS_COMPRESSED
:
3287 case FILE_IS_CONVERTING
:
3288 /* if FILE_IS_CONVERTING, we allow blockmap */
3291 printf("invalid state %d for compressed file\n", state
);
3296 #endif /* HFS_COMPRESSION */
3298 /* Do not allow blockmap operation on a directory */
3299 if (vnode_isdir(vp
)) {
3304 * Check for underlying vnode requests and ensure that logical
3305 * to physical mapping is requested.
3307 if (ap
->a_bpn
== NULL
)
3314 if ( !vnode_issystem(vp
) && !vnode_islnk(vp
) && !vnode_isswap(vp
)) {
3315 if (cp
->c_lockowner
!= current_thread()) {
3316 hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
3320 // For reads, check the invalid ranges
3321 if (ISSET(ap
->a_flags
, VNODE_READ
)) {
3322 if (ap
->a_foffset
>= fp
->ff_size
) {
3327 overlaptype
= rl_scan(&fp
->ff_invalidranges
, ap
->a_foffset
,
3328 ap
->a_foffset
+ (off_t
)bytesContAvail
- 1,
3330 switch(overlaptype
) {
3331 case RL_MATCHINGOVERLAP
:
3332 case RL_OVERLAPCONTAINSRANGE
:
3333 case RL_OVERLAPSTARTSBEFORE
:
3334 /* There's no valid block for this byte offset */
3335 *ap
->a_bpn
= (daddr64_t
)-1;
3336 /* There's no point limiting the amount to be returned
3337 * if the invalid range that was hit extends all the way
3338 * to the EOF (i.e. there's no valid bytes between the
3339 * end of this range and the file's EOF):
3341 if (((off_t
)fp
->ff_size
> (invalid_range
->rl_end
+ 1)) &&
3342 ((size_t)(invalid_range
->rl_end
+ 1 - ap
->a_foffset
) < bytesContAvail
)) {
3343 bytesContAvail
= invalid_range
->rl_end
+ 1 - ap
->a_foffset
;
3349 case RL_OVERLAPISCONTAINED
:
3350 case RL_OVERLAPENDSAFTER
:
3351 /* The range of interest hits an invalid block before the end: */
3352 if (invalid_range
->rl_start
== ap
->a_foffset
) {
3353 /* There's actually no valid information to be had starting here: */
3354 *ap
->a_bpn
= (daddr64_t
)-1;
3355 if (((off_t
)fp
->ff_size
> (invalid_range
->rl_end
+ 1)) &&
3356 ((size_t)(invalid_range
->rl_end
+ 1 - ap
->a_foffset
) < bytesContAvail
)) {
3357 bytesContAvail
= invalid_range
->rl_end
+ 1 - ap
->a_foffset
;
3364 * Sadly, the lower layers don't like us to
3365 * return unaligned ranges, so we skip over
3366 * any invalid ranges here that are less than
3367 * a page: zeroing of those bits is not our
3368 * responsibility (it's dealt with elsewhere).
3371 off_t rounded_start
= round_page_64(invalid_range
->rl_start
);
3372 if ((off_t
)bytesContAvail
< rounded_start
- ap
->a_foffset
)
3374 if (rounded_start
< invalid_range
->rl_end
+ 1) {
3375 bytesContAvail
= rounded_start
- ap
->a_foffset
;
3378 } while ((invalid_range
= TAILQ_NEXT(invalid_range
,
3390 if (cp
->c_cpentry
) {
3391 const int direction
= (ISSET(ap
->a_flags
, VNODE_WRITE
)
3392 ? VNODE_WRITE
: VNODE_READ
);
3394 cp_io_params_t io_params
;
3395 cp_io_params(hfsmp
, cp
->c_cpentry
,
3396 off_rsrc_make(ap
->a_foffset
, VNODE_IS_RSRC(vp
)),
3397 direction
, &io_params
);
3399 if (io_params
.max_len
< (off_t
)bytesContAvail
)
3400 bytesContAvail
= io_params
.max_len
;
3402 if (io_params
.phys_offset
!= -1) {
3403 *ap
->a_bpn
= ((io_params
.phys_offset
+ hfsmp
->hfsPlusIOPosOffset
)
3404 / hfsmp
->hfs_logical_block_size
);
3414 /* Check virtual blocks only when performing write operation */
3415 if ((ap
->a_flags
& VNODE_WRITE
) && (fp
->ff_unallocblocks
!= 0)) {
3416 if (hfs_start_transaction(hfsmp
) != 0) {
3422 syslocks
= SFL_EXTENTS
| SFL_BITMAP
;
3424 } else if (overflow_extents(fp
)) {
3425 syslocks
= SFL_EXTENTS
;
3429 lockflags
= hfs_systemfile_lock(hfsmp
, syslocks
, HFS_EXCLUSIVE_LOCK
);
3432 * Check for any delayed allocations.
3434 if ((ap
->a_flags
& VNODE_WRITE
) && (fp
->ff_unallocblocks
!= 0)) {
3436 u_int32_t loanedBlocks
;
3439 // Make sure we have a transaction. It's possible
3440 // that we came in and fp->ff_unallocblocks was zero
3441 // but during the time we blocked acquiring the extents
3442 // btree, ff_unallocblocks became non-zero and so we
3443 // will need to start a transaction.
3445 if (started_tr
== 0) {
3447 hfs_systemfile_unlock(hfsmp
, lockflags
);
3454 * Note: ExtendFileC will Release any blocks on loan and
3455 * aquire real blocks. So we ask to extend by zero bytes
3456 * since ExtendFileC will account for the virtual blocks.
3459 loanedBlocks
= fp
->ff_unallocblocks
;
3460 retval
= ExtendFileC(hfsmp
, (FCB
*)fp
, 0, 0,
3461 kEFAllMask
| kEFNoClumpMask
, &actbytes
);
3464 fp
->ff_unallocblocks
= loanedBlocks
;
3465 cp
->c_blocks
+= loanedBlocks
;
3466 fp
->ff_blocks
+= loanedBlocks
;
3468 hfs_lock_mount (hfsmp
);
3469 hfsmp
->loanedBlocks
+= loanedBlocks
;
3470 hfs_unlock_mount (hfsmp
);
3472 hfs_systemfile_unlock(hfsmp
, lockflags
);
3473 cp
->c_flag
|= C_MODIFIED
;
3475 (void) hfs_update(vp
, 0);
3476 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
3478 hfs_end_transaction(hfsmp
);
3486 // check for the alternate xattr vnode
3487 if (vp
== hfsmp
->hfs_attrdata_vp
) {
3488 HFSPlusExtentDescriptor real_fext
;
3489 size_t availableBytes
;
3490 u_int32_t sectorsPerBlock
; // Number of sectors per allocation block
3491 u_int32_t sectorSize
;
3494 if (!hfs_xattr_fext_find(&hfsmp
->hfs_xattr_io
, hfsmp
->blockSize
,
3495 ap
->a_foffset
, &real_fext
, &f_offset
)) {
3496 panic("cannot find xattr fext for %llu", f_offset
);
3499 sectorSize
= hfsmp
->hfs_logical_block_size
;
3500 // Compute the number of sectors in an allocation block
3501 sectorsPerBlock
= hfsmp
->blockSize
/ sectorSize
;
3503 *ap
->a_bpn
= (f_offset
/ hfsmp
->blockSize
) * sectorsPerBlock
;
3504 availableBytes
= real_fext
.blockCount
* hfsmp
->blockSize
- (f_offset
- (real_fext
.startBlock
* hfsmp
->blockSize
));
3505 if (availableBytes
< bytesContAvail
) {
3506 bytesContAvail
= availableBytes
;
3513 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, bytesContAvail
, ap
->a_foffset
,
3514 ap
->a_bpn
, &bytesContAvail
);
3521 hfs_systemfile_unlock(hfsmp
, lockflags
);
3526 /* On write, always return error because virtual blocks, if any,
3527 * should have been allocated in ExtendFileC(). We do not
3528 * allocate virtual blocks on read, therefore return error
3529 * only if no virtual blocks are allocated. Otherwise we search
3530 * rangelist for zero-fills
3532 if ((MacToVFSError(retval
) != ERANGE
) ||
3533 (ap
->a_flags
& VNODE_WRITE
) ||
3534 ((ap
->a_flags
& VNODE_READ
) && (fp
->ff_unallocblocks
== 0))) {
3538 /* Validate if the start offset is within logical file size */
3539 if (ap
->a_foffset
>= fp
->ff_size
) {
3544 * At this point, we have encountered a failure during
3545 * MapFileBlockC that resulted in ERANGE, and we are not
3546 * servicing a write, and there are borrowed blocks.
3548 * However, the cluster layer will not call blockmap for
3549 * blocks that are borrowed and in-cache. We have to assume
3550 * that because we observed ERANGE being emitted from
3551 * MapFileBlockC, this extent range is not valid on-disk. So
3552 * we treat this as a mapping that needs to be zero-filled
3556 if (fp
->ff_size
- ap
->a_foffset
< (off_t
)bytesContAvail
)
3557 bytesContAvail
= fp
->ff_size
- ap
->a_foffset
;
3559 *ap
->a_bpn
= (daddr64_t
) -1;
3567 if (ISSET(ap
->a_flags
, VNODE_WRITE
)) {
3568 struct rl_entry
*r
= TAILQ_FIRST(&fp
->ff_invalidranges
);
3570 // See if we might be overlapping invalid ranges...
3571 if (r
&& (ap
->a_foffset
+ (off_t
)bytesContAvail
) > r
->rl_start
) {
3573 * Mark the file as needing an update if we think the
3574 * on-disk EOF has changed.
3576 if (ap
->a_foffset
<= r
->rl_start
)
3577 SET(cp
->c_flag
, C_MODIFIED
);
3580 * This isn't the ideal place to put this. Ideally, we
3581 * should do something *after* we have successfully
3582 * written to the range, but that's difficult to do
3583 * because we cannot take locks in the callback. At
3584 * present, the cluster code will call us with VNODE_WRITE
3585 * set just before it's about to write the data so we know
3586 * that data is about to be written. If we get an I/O
3587 * error at this point then chances are the metadata
3588 * update to follow will also have an I/O error so the
3589 * risk here is small.
3591 rl_remove(ap
->a_foffset
, ap
->a_foffset
+ bytesContAvail
- 1,
3592 &fp
->ff_invalidranges
);
3594 if (!TAILQ_FIRST(&fp
->ff_invalidranges
)) {
3595 cp
->c_flag
&= ~C_ZFWANTSYNC
;
3596 cp
->c_zftimeout
= 0;
3602 *ap
->a_run
= bytesContAvail
;
3605 *(int *)ap
->a_poff
= 0;
3609 hfs_update(vp
, TRUE
);
3610 hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
3611 hfs_end_transaction(hfsmp
);
3618 return (MacToVFSError(retval
));
3622 * prepare and issue the I/O
3623 * buf_strategy knows how to deal
3624 * with requests that require
3628 hfs_vnop_strategy(struct vnop_strategy_args
*ap
)
3630 buf_t bp
= ap
->a_bp
;
3631 vnode_t vp
= buf_vnode(bp
);
3634 /* Mark buffer as containing static data if cnode flag set */
3635 if (VTOC(vp
)->c_flag
& C_SSD_STATIC
) {
3639 /* Mark buffer as containing static data if cnode flag set */
3640 if (VTOC(vp
)->c_flag
& C_SSD_GREEDY_MODE
) {
3641 bufattr_markgreedymode(buf_attr(bp
));
3644 /* mark buffer as containing burst mode data if cnode flag set */
3645 if (VTOC(vp
)->c_flag
& C_IO_ISOCHRONOUS
) {
3646 bufattr_markisochronous(buf_attr(bp
));
3650 error
= cp_handle_strategy(bp
);
3656 error
= buf_strategy(VTOHFS(vp
)->hfs_devvp
, ap
);
3662 do_hfs_truncate(struct vnode
*vp
, off_t length
, int flags
, int truncateflags
, vfs_context_t context
)
3664 register struct cnode
*cp
= VTOC(vp
);
3665 struct filefork
*fp
= VTOF(vp
);
3666 kauth_cred_t cred
= vfs_context_ucred(context
);
3669 off_t actualBytesAdded
;
3671 u_int32_t fileblocks
;
3673 struct hfsmount
*hfsmp
;
3675 int suppress_times
= (truncateflags
& HFS_TRUNCATE_SKIPTIMES
);
3677 blksize
= VTOVCB(vp
)->blockSize
;
3678 fileblocks
= fp
->ff_blocks
;
3679 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
3681 KERNEL_DEBUG(HFSDBG_TRUNCATE
| DBG_FUNC_START
,
3682 (int)length
, (int)fp
->ff_size
, (int)filebytes
, 0, 0);
3687 /* This should only happen with a corrupt filesystem */
3688 if ((off_t
)fp
->ff_size
< 0)
3691 if ((!ISHFSPLUS(VTOVCB(vp
))) && (length
> (off_t
)MAXHFSFILESIZE
))
3698 /* Files that are changing size are not hot file candidates. */
3699 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
3700 fp
->ff_bytesread
= 0;
3704 * We cannot just check if fp->ff_size == length (as an optimization)
3705 * since there may be extra physical blocks that also need truncation.
3708 if ((retval
= hfs_getinoquota(cp
)))
3713 * Lengthen the size of the file. We must ensure that the
3714 * last byte of the file is allocated. Since the smallest
3715 * value of ff_size is 0, length will be at least 1.
3717 if (length
> (off_t
)fp
->ff_size
) {
3719 retval
= hfs_chkdq(cp
, (int64_t)(roundup(length
- filebytes
, blksize
)),
3725 * If we don't have enough physical space then
3726 * we need to extend the physical size.
3728 if (length
> filebytes
) {
3730 u_int32_t blockHint
= 0;
3732 /* All or nothing and don't round up to clumpsize. */
3733 eflags
= kEFAllMask
| kEFNoClumpMask
;
3735 if (cred
&& (suser(cred
, NULL
) != 0)) {
3736 eflags
|= kEFReserveMask
; /* keep a reserve */
3740 * Allocate Journal and Quota files in metadata zone.
3742 if (filebytes
== 0 &&
3743 hfsmp
->hfs_flags
& HFS_METADATA_ZONE
&&
3744 hfs_virtualmetafile(cp
)) {
3745 eflags
|= kEFMetadataMask
;
3746 blockHint
= hfsmp
->hfs_metazone_start
;
3748 if (hfs_start_transaction(hfsmp
) != 0) {
3753 /* Protect extents b-tree and allocation bitmap */
3754 lockflags
= SFL_BITMAP
;
3755 if (overflow_extents(fp
))
3756 lockflags
|= SFL_EXTENTS
;
3757 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
3760 * Keep growing the file as long as the current EOF is
3761 * less than the desired value.
3763 while ((length
> filebytes
) && (retval
== E_NONE
)) {
3764 bytesToAdd
= length
- filebytes
;
3765 retval
= MacToVFSError(ExtendFileC(VTOVCB(vp
),
3770 &actualBytesAdded
));
3772 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)blksize
;
3773 if (actualBytesAdded
== 0 && retval
== E_NONE
) {
3774 if (length
> filebytes
)
3780 hfs_systemfile_unlock(hfsmp
, lockflags
);
3784 hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
3787 hfs_end_transaction(hfsmp
);
3792 KERNEL_DEBUG(HFSDBG_TRUNCATE
| DBG_FUNC_NONE
,
3793 (int)length
, (int)fp
->ff_size
, (int)filebytes
, 0, 0);
3796 if (ISSET(flags
, IO_NOZEROFILL
)) {
3797 // An optimisation for the hibernation file
3798 if (vnode_isswap(vp
))
3799 rl_remove_all(&fp
->ff_invalidranges
);
3801 if (!vnode_issystem(vp
) && retval
== E_NONE
) {
3802 if (length
> (off_t
)fp
->ff_size
) {
3805 /* Extending the file: time to fill out the current last page w. zeroes? */
3806 if (fp
->ff_size
& PAGE_MASK_64
) {
3807 /* There might be some valid data at the start of the (current) last page
3808 of the file, so zero out the remainder of that page to ensure the
3809 entire page contains valid data. */
3811 retval
= hfs_zero_eof_page(vp
, length
);
3812 hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
3813 if (retval
) goto Err_Exit
;
3816 rl_add(fp
->ff_size
, length
- 1, &fp
->ff_invalidranges
);
3817 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
3820 panic("hfs_truncate: invoked on non-UBC object?!");
3823 if (suppress_times
== 0) {
3824 cp
->c_touch_modtime
= TRUE
;
3826 fp
->ff_size
= length
;
3828 } else { /* Shorten the size of the file */
3830 // An optimisation for the hibernation file
3831 if (ISSET(flags
, IO_NOZEROFILL
) && vnode_isswap(vp
)) {
3832 rl_remove_all(&fp
->ff_invalidranges
);
3833 } else if ((off_t
)fp
->ff_size
> length
) {
3834 /* Any space previously marked as invalid is now irrelevant: */
3835 rl_remove(length
, fp
->ff_size
- 1, &fp
->ff_invalidranges
);
3839 * Account for any unmapped blocks. Note that the new
3840 * file length can still end up with unmapped blocks.
3842 if (fp
->ff_unallocblocks
> 0) {
3843 u_int32_t finalblks
;
3844 u_int32_t loanedBlocks
;
3846 hfs_lock_mount(hfsmp
);
3847 loanedBlocks
= fp
->ff_unallocblocks
;
3848 cp
->c_blocks
-= loanedBlocks
;
3849 fp
->ff_blocks
-= loanedBlocks
;
3850 fp
->ff_unallocblocks
= 0;
3852 hfsmp
->loanedBlocks
-= loanedBlocks
;
3854 finalblks
= (length
+ blksize
- 1) / blksize
;
3855 if (finalblks
> fp
->ff_blocks
) {
3856 /* calculate required unmapped blocks */
3857 loanedBlocks
= finalblks
- fp
->ff_blocks
;
3858 hfsmp
->loanedBlocks
+= loanedBlocks
;
3860 fp
->ff_unallocblocks
= loanedBlocks
;
3861 cp
->c_blocks
+= loanedBlocks
;
3862 fp
->ff_blocks
+= loanedBlocks
;
3864 hfs_unlock_mount (hfsmp
);
3867 off_t savedbytes
= ((off_t
)fp
->ff_blocks
* (off_t
)blksize
);
3868 if (hfs_start_transaction(hfsmp
) != 0) {
3873 if (fp
->ff_unallocblocks
== 0) {
3874 /* Protect extents b-tree and allocation bitmap */
3875 lockflags
= SFL_BITMAP
;
3876 if (overflow_extents(fp
))
3877 lockflags
|= SFL_EXTENTS
;
3878 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
3880 retval
= MacToVFSError(TruncateFileC(VTOVCB(vp
), (FCB
*)fp
, length
, 0,
3881 FORK_IS_RSRC (fp
), FTOC(fp
)->c_fileid
, false));
3883 hfs_systemfile_unlock(hfsmp
, lockflags
);
3887 fp
->ff_size
= length
;
3890 hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
3892 hfs_end_transaction(hfsmp
);
3894 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)blksize
;
3898 /* These are bytesreleased */
3899 (void) hfs_chkdq(cp
, (int64_t)-(savedbytes
- filebytes
), NOCRED
, 0);
3903 // Unlike when growing a file, we adjust the hotfile block count here
3904 // instead of deeper down in the block allocation code because we do
3905 // not necessarily have a vnode or "fcb" at the time we're deleting
3906 // the file and so we wouldn't know if it was hotfile cached or not
3908 hfs_hotfile_adjust_blocks(vp
, (int64_t)((savedbytes
- filebytes
) / blksize
));
3912 * Only set update flag if the logical length changes & we aren't
3913 * suppressing modtime updates.
3915 if (((off_t
)fp
->ff_size
!= length
) && (suppress_times
== 0)) {
3916 cp
->c_touch_modtime
= TRUE
;
3918 fp
->ff_size
= length
;
3920 if (cp
->c_mode
& (S_ISUID
| S_ISGID
)) {
3921 if (!vfs_context_issuser(context
))
3922 cp
->c_mode
&= ~(S_ISUID
| S_ISGID
);
3924 cp
->c_flag
|= C_MODIFIED
;
3925 cp
->c_touch_chgtime
= TRUE
; /* status changed */
3926 if (suppress_times
== 0) {
3927 cp
->c_touch_modtime
= TRUE
; /* file data was modified */
3930 * If we are not suppressing the modtime update, then
3931 * update the gen count as well.
3933 if (S_ISREG(cp
->c_attr
.ca_mode
) || S_ISLNK (cp
->c_attr
.ca_mode
)) {
3934 hfs_incr_gencount(cp
);
3938 retval
= hfs_update(vp
, 0);
3940 KERNEL_DEBUG(HFSDBG_TRUNCATE
| DBG_FUNC_NONE
,
3941 -1, -1, -1, retval
, 0);
3946 KERNEL_DEBUG(HFSDBG_TRUNCATE
| DBG_FUNC_END
,
3947 (int)length
, (int)fp
->ff_size
, (int)filebytes
, retval
, 0);
3953 * Preparation which must be done prior to deleting the catalog record
3954 * of a file or directory. In order to make the on-disk as safe as possible,
3955 * we remove the catalog entry before releasing the bitmap blocks and the
3956 * overflow extent records. However, some work must be done prior to deleting
3957 * the catalog record.
3959 * When calling this function, the cnode must exist both in memory and on-disk.
3960 * If there are both resource fork and data fork vnodes, this function should
3961 * be called on both.
3965 hfs_prepare_release_storage (struct hfsmount
*hfsmp
, struct vnode
*vp
) {
3967 struct filefork
*fp
= VTOF(vp
);
3968 struct cnode
*cp
= VTOC(vp
);
3973 /* Cannot truncate an HFS directory! */
3974 if (vnode_isdir(vp
)) {
3979 * See the comment below in hfs_truncate for why we need to call
3980 * setsize here. Essentially we want to avoid pending IO if we
3981 * already know that the blocks are going to be released here.
3982 * This function is only called when totally removing all storage for a file, so
3983 * we can take a shortcut and immediately setsize (0);
3987 /* This should only happen with a corrupt filesystem */
3988 if ((off_t
)fp
->ff_size
< 0)
3992 * We cannot just check if fp->ff_size == length (as an optimization)
3993 * since there may be extra physical blocks that also need truncation.
3996 if ((retval
= hfs_getinoquota(cp
))) {
4001 /* Wipe out any invalid ranges which have yet to be backed by disk */
4002 rl_remove(0, fp
->ff_size
- 1, &fp
->ff_invalidranges
);
4005 * Account for any unmapped blocks. Since we're deleting the
4006 * entire file, we don't have to worry about just shrinking
4007 * to a smaller number of borrowed blocks.
4009 if (fp
->ff_unallocblocks
> 0) {
4010 u_int32_t loanedBlocks
;
4012 hfs_lock_mount (hfsmp
);
4013 loanedBlocks
= fp
->ff_unallocblocks
;
4014 cp
->c_blocks
-= loanedBlocks
;
4015 fp
->ff_blocks
-= loanedBlocks
;
4016 fp
->ff_unallocblocks
= 0;
4018 hfsmp
->loanedBlocks
-= loanedBlocks
;
4020 hfs_unlock_mount (hfsmp
);
4028 * Special wrapper around calling TruncateFileC. This function is useable
4029 * even when the catalog record does not exist any longer, making it ideal
4030 * for use when deleting a file. The simplification here is that we know
4031 * that we are releasing all blocks.
4033 * Note that this function may be called when there is no vnode backing
4034 * the file fork in question. We may call this from hfs_vnop_inactive
4035 * to clear out resource fork data (and may not want to clear out the data
4036 * fork yet). As a result, we pointer-check both sets of inputs before
4037 * doing anything with them.
4039 * The caller is responsible for saving off a copy of the filefork(s)
4040 * embedded within the cnode prior to calling this function. The pointers
4041 * supplied as arguments must be valid even if the cnode is no longer valid.
4045 hfs_release_storage (struct hfsmount
*hfsmp
, struct filefork
*datafork
,
4046 struct filefork
*rsrcfork
, u_int32_t fileid
) {
4049 u_int32_t fileblocks
;
4054 blksize
= hfsmp
->blockSize
;
4058 off_t prev_filebytes
;
4060 datafork
->ff_size
= 0;
4062 fileblocks
= datafork
->ff_blocks
;
4063 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
4064 prev_filebytes
= filebytes
;
4066 /* We killed invalid ranges and loaned blocks before we removed the catalog entry */
4068 while (filebytes
> 0) {
4069 if (filebytes
> HFS_BIGFILE_SIZE
) {
4070 filebytes
-= HFS_BIGFILE_SIZE
;
4075 /* Start a transaction, and wipe out as many blocks as we can in this iteration */
4076 if (hfs_start_transaction(hfsmp
) != 0) {
4081 if (datafork
->ff_unallocblocks
== 0) {
4082 /* Protect extents b-tree and allocation bitmap */
4083 lockflags
= SFL_BITMAP
;
4084 if (overflow_extents(datafork
))
4085 lockflags
|= SFL_EXTENTS
;
4086 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
4088 error
= MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp
), datafork
, filebytes
, 1, 0, fileid
, false));
4090 hfs_systemfile_unlock(hfsmp
, lockflags
);
4092 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
4094 struct cnode
*cp
= datafork
? FTOC(datafork
) : NULL
;
4096 vp
= cp
? CTOV(cp
, 0) : NULL
;
4097 hfs_hotfile_adjust_blocks(vp
, (int64_t)((prev_filebytes
- filebytes
) / blksize
));
4098 prev_filebytes
= filebytes
;
4100 /* Finish the transaction and start over if necessary */
4101 hfs_end_transaction(hfsmp
);
4110 if (error
== 0 && rsrcfork
) {
4111 rsrcfork
->ff_size
= 0;
4113 fileblocks
= rsrcfork
->ff_blocks
;
4114 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
4116 /* We killed invalid ranges and loaned blocks before we removed the catalog entry */
4118 while (filebytes
> 0) {
4119 if (filebytes
> HFS_BIGFILE_SIZE
) {
4120 filebytes
-= HFS_BIGFILE_SIZE
;
4125 /* Start a transaction, and wipe out as many blocks as we can in this iteration */
4126 if (hfs_start_transaction(hfsmp
) != 0) {
4131 if (rsrcfork
->ff_unallocblocks
== 0) {
4132 /* Protect extents b-tree and allocation bitmap */
4133 lockflags
= SFL_BITMAP
;
4134 if (overflow_extents(rsrcfork
))
4135 lockflags
|= SFL_EXTENTS
;
4136 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
4138 error
= MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp
), rsrcfork
, filebytes
, 1, 1, fileid
, false));
4140 hfs_systemfile_unlock(hfsmp
, lockflags
);
4142 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
4144 /* Finish the transaction and start over if necessary */
4145 hfs_end_transaction(hfsmp
);
4156 errno_t
hfs_ubc_setsize(vnode_t vp
, off_t len
, bool have_cnode_lock
)
4161 * Call ubc_setsize to give the VM subsystem a chance to do
4162 * whatever it needs to with existing pages before we delete
4163 * blocks. Note that symlinks don't use the UBC so we'll
4164 * get back ENOENT in that case.
4166 if (have_cnode_lock
) {
4167 error
= ubc_setsize_ex(vp
, len
, UBC_SETSIZE_NO_FS_REENTRY
);
4168 if (error
== EAGAIN
) {
4169 cnode_t
*cp
= VTOC(vp
);
4171 if (cp
->c_truncatelockowner
!= current_thread())
4172 hfs_warn("hfs: hfs_ubc_setsize called without exclusive truncate lock!");
4175 error
= ubc_setsize_ex(vp
, len
, 0);
4176 hfs_lock_always(cp
, HFS_EXCLUSIVE_LOCK
);
4179 error
= ubc_setsize_ex(vp
, len
, 0);
4181 return error
== ENOENT
? 0 : error
;
4185 * Truncate a cnode to at most length size, freeing (or adding) the
4189 hfs_truncate(struct vnode
*vp
, off_t length
, int flags
,
4190 int truncateflags
, vfs_context_t context
)
4192 struct filefork
*fp
= VTOF(vp
);
4194 u_int32_t fileblocks
;
4197 struct cnode
*cp
= VTOC(vp
);
4198 hfsmount_t
*hfsmp
= VTOHFS(vp
);
4200 /* Cannot truncate an HFS directory! */
4201 if (vnode_isdir(vp
)) {
4204 /* A swap file cannot change size. */
4205 if (vnode_isswap(vp
) && length
&& !ISSET(flags
, IO_NOAUTH
)) {
4209 blksize
= hfsmp
->blockSize
;
4210 fileblocks
= fp
->ff_blocks
;
4211 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
4213 bool caller_has_cnode_lock
= (cp
->c_lockowner
== current_thread());
4215 error
= hfs_ubc_setsize(vp
, length
, caller_has_cnode_lock
);
4219 if (!caller_has_cnode_lock
) {
4220 error
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
4225 if (vnode_islnk(vp
) && cp
->c_datafork
->ff_symlinkptr
) {
4226 hfs_free(cp
->c_datafork
->ff_symlinkptr
, cp
->c_datafork
->ff_size
);
4227 cp
->c_datafork
->ff_symlinkptr
= NULL
;
4230 // have to loop truncating or growing files that are
4231 // really big because otherwise transactions can get
4232 // enormous and consume too many kernel resources.
4234 if (length
< filebytes
) {
4235 while (filebytes
> length
) {
4236 if ((filebytes
- length
) > HFS_BIGFILE_SIZE
) {
4237 filebytes
-= HFS_BIGFILE_SIZE
;
4241 error
= do_hfs_truncate(vp
, filebytes
, flags
, truncateflags
, context
);
4245 } else if (length
> filebytes
) {
4246 kauth_cred_t cred
= vfs_context_ucred(context
);
4247 const bool keep_reserve
= cred
&& suser(cred
, NULL
) != 0;
4249 if (hfs_freeblks(hfsmp
, keep_reserve
)
4250 < howmany(length
- filebytes
, blksize
)) {
4253 while (filebytes
< length
) {
4254 if ((length
- filebytes
) > HFS_BIGFILE_SIZE
) {
4255 filebytes
+= HFS_BIGFILE_SIZE
;
4259 error
= do_hfs_truncate(vp
, filebytes
, flags
, truncateflags
, context
);
4264 } else /* Same logical size */ {
4266 error
= do_hfs_truncate(vp
, length
, flags
, truncateflags
, context
);
4268 /* Files that are changing size are not hot file candidates. */
4269 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
) {
4270 fp
->ff_bytesread
= 0;
4273 #if HFS_CONFIG_KEY_ROLL
4274 if (!error
&& cp
->c_truncatelockowner
== current_thread()) {
4275 hfs_key_roll_check(cp
, true);
4279 if (!caller_has_cnode_lock
)
4282 // Make sure UBC's size matches up (in case we didn't completely succeed)
4283 errno_t err2
= hfs_ubc_setsize(vp
, fp
->ff_size
, caller_has_cnode_lock
);
4292 * Preallocate file storage space.
4295 hfs_vnop_allocate(struct vnop_allocate_args
/* {
4299 off_t *a_bytesallocated;
4301 vfs_context_t a_context;
4304 struct vnode
*vp
= ap
->a_vp
;
4306 struct filefork
*fp
;
4308 off_t length
= ap
->a_length
;
4310 off_t moreBytesRequested
;
4311 off_t actualBytesAdded
;
4313 u_int32_t fileblocks
;
4314 int retval
, retval2
;
4315 u_int32_t blockHint
;
4316 u_int32_t extendFlags
; /* For call to ExtendFileC */
4317 struct hfsmount
*hfsmp
;
4318 kauth_cred_t cred
= vfs_context_ucred(ap
->a_context
);
4322 *(ap
->a_bytesallocated
) = 0;
4324 if (!vnode_isreg(vp
))
4326 if (length
< (off_t
)0)
4331 orig_ctime
= VTOC(vp
)->c_ctime
;
4333 nspace_snapshot_event(vp
, orig_ctime
, ap
->a_length
== 0 ? NAMESPACE_HANDLER_TRUNCATE_OP
|NAMESPACE_HANDLER_DELETE_OP
: NAMESPACE_HANDLER_TRUNCATE_OP
, NULL
);
4335 hfs_lock_truncate(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
4337 if ((retval
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
))) {
4338 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
4346 fileblocks
= fp
->ff_blocks
;
4347 filebytes
= (off_t
)fileblocks
* (off_t
)vcb
->blockSize
;
4349 if ((ap
->a_flags
& ALLOCATEFROMVOL
) && (length
< filebytes
)) {
4354 /* Fill in the flags word for the call to Extend the file */
4356 extendFlags
= kEFNoClumpMask
;
4357 if (ap
->a_flags
& ALLOCATECONTIG
)
4358 extendFlags
|= kEFContigMask
;
4359 if (ap
->a_flags
& ALLOCATEALL
)
4360 extendFlags
|= kEFAllMask
;
4361 if (cred
&& suser(cred
, NULL
) != 0)
4362 extendFlags
|= kEFReserveMask
;
4363 if (hfs_virtualmetafile(cp
))
4364 extendFlags
|= kEFMetadataMask
;
4368 startingPEOF
= filebytes
;
4370 if (ap
->a_flags
& ALLOCATEFROMPEOF
)
4371 length
+= filebytes
;
4372 else if (ap
->a_flags
& ALLOCATEFROMVOL
)
4373 blockHint
= ap
->a_offset
/ VTOVCB(vp
)->blockSize
;
4375 /* If no changes are necesary, then we're done */
4376 if (filebytes
== length
)
4380 * Lengthen the size of the file. We must ensure that the
4381 * last byte of the file is allocated. Since the smallest
4382 * value of filebytes is 0, length will be at least 1.
4384 if (length
> filebytes
) {
4385 if (ISSET(extendFlags
, kEFAllMask
)
4386 && (hfs_freeblks(hfsmp
, ISSET(extendFlags
, kEFReserveMask
))
4387 < howmany(length
- filebytes
, hfsmp
->blockSize
))) {
4392 off_t total_bytes_added
= 0, orig_request_size
;
4394 orig_request_size
= moreBytesRequested
= length
- filebytes
;
4397 retval
= hfs_chkdq(cp
,
4398 (int64_t)(roundup(moreBytesRequested
, vcb
->blockSize
)),
4405 * Metadata zone checks.
4407 if (hfsmp
->hfs_flags
& HFS_METADATA_ZONE
) {
4409 * Allocate Journal and Quota files in metadata zone.
4411 if (hfs_virtualmetafile(cp
)) {
4412 blockHint
= hfsmp
->hfs_metazone_start
;
4413 } else if ((blockHint
>= hfsmp
->hfs_metazone_start
) &&
4414 (blockHint
<= hfsmp
->hfs_metazone_end
)) {
4416 * Move blockHint outside metadata zone.
4418 blockHint
= hfsmp
->hfs_metazone_end
+ 1;
4423 while ((length
> filebytes
) && (retval
== E_NONE
)) {
4424 off_t bytesRequested
;
4426 if (hfs_start_transaction(hfsmp
) != 0) {
4431 /* Protect extents b-tree and allocation bitmap */
4432 lockflags
= SFL_BITMAP
;
4433 if (overflow_extents(fp
))
4434 lockflags
|= SFL_EXTENTS
;
4435 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
4437 if (moreBytesRequested
>= HFS_BIGFILE_SIZE
) {
4438 bytesRequested
= HFS_BIGFILE_SIZE
;
4440 bytesRequested
= moreBytesRequested
;
4443 if (extendFlags
& kEFContigMask
) {
4444 // if we're on a sparse device, this will force it to do a
4445 // full scan to find the space needed.
4446 hfsmp
->hfs_flags
&= ~HFS_DID_CONTIG_SCAN
;
4449 retval
= MacToVFSError(ExtendFileC(vcb
,
4454 &actualBytesAdded
));
4456 if (retval
== E_NONE
) {
4457 *(ap
->a_bytesallocated
) += actualBytesAdded
;
4458 total_bytes_added
+= actualBytesAdded
;
4459 moreBytesRequested
-= actualBytesAdded
;
4460 if (blockHint
!= 0) {
4461 blockHint
+= actualBytesAdded
/ vcb
->blockSize
;
4464 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)vcb
->blockSize
;
4466 hfs_systemfile_unlock(hfsmp
, lockflags
);
4469 (void) hfs_update(vp
, 0);
4470 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
4473 hfs_end_transaction(hfsmp
);
4478 * if we get an error and no changes were made then exit
4479 * otherwise we must do the hfs_update to reflect the changes
4481 if (retval
&& (startingPEOF
== filebytes
))
4485 * Adjust actualBytesAdded to be allocation block aligned, not
4486 * clump size aligned.
4487 * NOTE: So what we are reporting does not affect reality
4488 * until the file is closed, when we truncate the file to allocation
4491 if (total_bytes_added
!= 0 && orig_request_size
< total_bytes_added
)
4492 *(ap
->a_bytesallocated
) =
4493 roundup(orig_request_size
, (off_t
)vcb
->blockSize
);
4495 } else { /* Shorten the size of the file */
4498 * N.B. At present, this code is never called. If and when we
4499 * do start using it, it looks like there might be slightly
4500 * strange semantics with the file size: it's possible for the
4501 * file size to *increase* e.g. if current file size is 5,
4502 * length is 1024 and filebytes is 4096, the file size will
4503 * end up being 1024 bytes. This isn't necessarily a problem
4504 * but it's not consistent with the code above which doesn't
4505 * change the file size.
4508 retval
= hfs_truncate(vp
, length
, 0, 0, ap
->a_context
);
4509 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)vcb
->blockSize
;
4512 * if we get an error and no changes were made then exit
4513 * otherwise we must do the hfs_update to reflect the changes
4515 if (retval
&& (startingPEOF
== filebytes
)) goto Err_Exit
;
4517 /* These are bytesreleased */
4518 (void) hfs_chkdq(cp
, (int64_t)-((startingPEOF
- filebytes
)), NOCRED
,0);
4521 if (fp
->ff_size
> filebytes
) {
4522 fp
->ff_size
= filebytes
;
4524 hfs_ubc_setsize(vp
, fp
->ff_size
, true);
4529 cp
->c_flag
|= C_MODIFIED
;
4530 cp
->c_touch_chgtime
= TRUE
;
4531 cp
->c_touch_modtime
= TRUE
;
4532 retval2
= hfs_update(vp
, 0);
4537 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
4544 * Pagein for HFS filesystem
4547 hfs_vnop_pagein(struct vnop_pagein_args
*ap
)
4549 struct vnop_pagein_args {
4552 vm_offset_t a_pl_offset,
4556 vfs_context_t a_context;
4562 struct filefork
*fp
;
4565 upl_page_info_t
*pl
;
4567 off_t page_needed_f_offset
;
4572 boolean_t truncate_lock_held
= FALSE
;
4573 boolean_t file_converted
= FALSE
;
4581 if ((error
= cp_handle_vnop(vp
, CP_READ_ACCESS
| CP_WRITE_ACCESS
, 0)) != 0) {
4583 * If we errored here, then this means that one of two things occurred:
4584 * 1. there was a problem with the decryption of the key.
4585 * 2. the device is locked and we are not allowed to access this particular file.
4587 * Either way, this means that we need to shut down this upl now. As long as
4588 * the pl pointer is NULL (meaning that we're supposed to create the UPL ourselves)
4589 * then we create a upl and immediately abort it.
4591 if (ap
->a_pl
== NULL
) {
4592 /* create the upl */
4593 ubc_create_upl (vp
, ap
->a_f_offset
, ap
->a_size
, &upl
, &pl
,
4594 UPL_UBC_PAGEIN
| UPL_RET_ONLY_ABSENT
);
4595 /* mark the range as needed so it doesn't immediately get discarded upon abort */
4596 ubc_upl_range_needed (upl
, ap
->a_pl_offset
/ PAGE_SIZE
, 1);
4598 /* Abort the range */
4599 ubc_upl_abort_range (upl
, 0, ap
->a_size
, UPL_ABORT_FREE_ON_EMPTY
| UPL_ABORT_ERROR
);
4605 #endif /* CONFIG_PROTECT */
4607 if (ap
->a_pl
!= NULL
) {
4609 * this can only happen for swap files now that
4610 * we're asking for V2 paging behavior...
4611 * so don't need to worry about decompression, or
4612 * keeping track of blocks read or taking the truncate lock
4614 error
= cluster_pagein(vp
, ap
->a_pl
, ap
->a_pl_offset
, ap
->a_f_offset
,
4615 ap
->a_size
, (off_t
)fp
->ff_size
, ap
->a_flags
);
4619 page_needed_f_offset
= ap
->a_f_offset
+ ap
->a_pl_offset
;
4623 * take truncate lock (shared/recursive) to guard against
4624 * zero-fill thru fsync interfering, but only for v2
4626 * the HFS_RECURSE_TRUNCLOCK arg indicates that we want the
4627 * lock shared and we are allowed to recurse 1 level if this thread already
4628 * owns the lock exclusively... this can legally occur
4629 * if we are doing a shrinking ftruncate against a file
4630 * that is mapped private, and the pages being truncated
4631 * do not currently exist in the cache... in that case
4632 * we will have to page-in the missing pages in order
4633 * to provide them to the private mapping... we must
4634 * also call hfs_unlock_truncate with a postive been_recursed
4635 * arg to indicate that if we have recursed, there is no need to drop
4636 * the lock. Allowing this simple recursion is necessary
4637 * in order to avoid a certain deadlock... since the ftruncate
4638 * already holds the truncate lock exclusively, if we try
4639 * to acquire it shared to protect the pagein path, we will
4642 * NOTE: The if () block below is a workaround in order to prevent a
4643 * VM deadlock. See rdar://7853471.
4645 * If we are in a forced unmount, then launchd will still have the
4646 * dyld_shared_cache file mapped as it is trying to reboot. If we
4647 * take the truncate lock here to service a page fault, then our
4648 * thread could deadlock with the forced-unmount. The forced unmount
4649 * thread will try to reclaim the dyld_shared_cache vnode, but since it's
4650 * marked C_DELETED, it will call ubc_setsize(0). As a result, the unmount
4651 * thread will think it needs to copy all of the data out of the file
4652 * and into a VM copy object. If we hold the cnode lock here, then that
4653 * VM operation will not be able to proceed, because we'll set a busy page
4654 * before attempting to grab the lock. Note that this isn't as simple as "don't
4655 * call ubc_setsize" because doing that would just shift the problem to the
4656 * ubc_msync done before the vnode is reclaimed.
4658 * So, if a forced unmount on this volume is in flight AND the cnode is
4659 * marked C_DELETED, then just go ahead and do the page in without taking
4660 * the lock (thus suspending pagein_v2 semantics temporarily). Since it's on a file
4661 * that is not going to be available on the next mount, this seems like a
4662 * OK solution from a correctness point of view, even though it is hacky.
4664 if (vfs_isforce(vnode_mount(vp
))) {
4665 if (cp
->c_flag
& C_DELETED
) {
4666 /* If we don't get it, then just go ahead and operate without the lock */
4667 truncate_lock_held
= hfs_try_trunclock(cp
, HFS_SHARED_LOCK
, HFS_LOCK_SKIP_IF_EXCLUSIVE
);
4671 hfs_lock_truncate(cp
, HFS_SHARED_LOCK
, HFS_LOCK_SKIP_IF_EXCLUSIVE
);
4672 truncate_lock_held
= TRUE
;
4675 kret
= ubc_create_upl(vp
, ap
->a_f_offset
, ap
->a_size
, &upl
, &pl
, UPL_UBC_PAGEIN
| UPL_RET_ONLY_ABSENT
);
4677 if ((kret
!= KERN_SUCCESS
) || (upl
== (upl_t
) NULL
)) {
4681 ubc_upl_range_needed(upl
, ap
->a_pl_offset
/ PAGE_SIZE
, 1);
4683 upl_size
= isize
= ap
->a_size
;
4686 * Scan from the back to find the last page in the UPL, so that we
4687 * aren't looking at a UPL that may have already been freed by the
4688 * preceding aborts/completions.
4690 for (pg_index
= ((isize
) / PAGE_SIZE
); pg_index
> 0;) {
4691 if (upl_page_present(pl
, --pg_index
))
4693 if (pg_index
== 0) {
4695 * no absent pages were found in the range specified
4696 * just abort the UPL to get rid of it and then we're done
4698 ubc_upl_abort_range(upl
, 0, isize
, UPL_ABORT_FREE_ON_EMPTY
);
4703 * initialize the offset variables before we touch the UPL.
4704 * f_offset is the position into the file, in bytes
4705 * offset is the position into the UPL, in bytes
4706 * pg_index is the pg# of the UPL we're operating on
4707 * isize is the offset into the UPL of the last page that is present.
4709 isize
= ((pg_index
+ 1) * PAGE_SIZE
);
4712 f_offset
= ap
->a_f_offset
;
4718 if ( !upl_page_present(pl
, pg_index
)) {
4720 * we asked for RET_ONLY_ABSENT, so it's possible
4721 * to get back empty slots in the UPL.
4722 * just skip over them
4724 f_offset
+= PAGE_SIZE
;
4725 offset
+= PAGE_SIZE
;
4732 * We know that we have at least one absent page.
4733 * Now checking to see how many in a row we have
4736 xsize
= isize
- PAGE_SIZE
;
4739 if ( !upl_page_present(pl
, pg_index
+ num_of_pages
))
4744 xsize
= num_of_pages
* PAGE_SIZE
;
4747 if (VNODE_IS_RSRC(vp
)) {
4748 /* allow pageins of the resource fork */
4750 int compressed
= hfs_file_is_compressed(VTOC(vp
), 1); /* 1 == don't take the cnode lock */
4754 if (truncate_lock_held
) {
4756 * can't hold the truncate lock when calling into the decmpfs layer
4757 * since it calls back into this layer... even though we're only
4758 * holding the lock in shared mode, and the re-entrant path only
4759 * takes the lock shared, we can deadlock if some other thread
4760 * tries to grab the lock exclusively in between.
4762 hfs_unlock_truncate(cp
, HFS_LOCK_SKIP_IF_EXCLUSIVE
);
4763 truncate_lock_held
= FALSE
;
4766 ap
->a_pl_offset
= offset
;
4767 ap
->a_f_offset
= f_offset
;
4770 error
= decmpfs_pagein_compressed(ap
, &compressed
, VTOCMP(vp
));
4772 * note that decpfs_pagein_compressed can change the state of
4773 * 'compressed'... it will set it to 0 if the file is no longer
4774 * compressed once the compression lock is successfully taken
4775 * i.e. we would block on that lock while the file is being inflated
4777 if (error
== 0 && vnode_isfastdevicecandidate(vp
)) {
4778 (void) hfs_addhotfile(vp
);
4782 /* successful page-in, update the access time */
4783 VTOC(vp
)->c_touch_acctime
= TRUE
;
4786 // compressed files are not traditional hot file candidates
4787 // but they may be for CF (which ignores the ff_bytesread
4790 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
) {
4791 fp
->ff_bytesread
= 0;
4793 } else if (error
== EAGAIN
) {
4795 * EAGAIN indicates someone else already holds the compression lock...
4796 * to avoid deadlocking, we'll abort this range of pages with an
4797 * indication that the pagein needs to be redriven
4799 ubc_upl_abort_range(upl
, (upl_offset_t
) offset
, xsize
, UPL_ABORT_FREE_ON_EMPTY
| UPL_ABORT_RESTART
);
4800 } else if (error
== ENOSPC
) {
4802 if (upl_size
== PAGE_SIZE
)
4803 panic("decmpfs_pagein_compressed: couldn't ubc_upl_map a single page\n");
4805 ubc_upl_abort_range(upl
, (upl_offset_t
) offset
, isize
, UPL_ABORT_FREE_ON_EMPTY
);
4807 ap
->a_size
= PAGE_SIZE
;
4809 ap
->a_pl_offset
= 0;
4810 ap
->a_f_offset
= page_needed_f_offset
;
4814 ubc_upl_abort(upl
, UPL_ABORT_FREE_ON_EMPTY
| UPL_ABORT_ERROR
);
4817 goto pagein_next_range
;
4821 * Set file_converted only if the file became decompressed while we were
4822 * paging in. If it were still compressed, we would re-start the loop using the goto
4823 * in the above block. This avoid us overloading truncate_lock_held as our retry_pagein
4824 * condition below, since we could have avoided taking the truncate lock to prevent
4825 * a deadlock in the force unmount case.
4827 file_converted
= TRUE
;
4830 if (file_converted
== TRUE
) {
4832 * the file was converted back to a regular file after we first saw it as compressed
4833 * we need to abort the upl, retake the truncate lock, recreate the UPL and start over
4834 * reset a_size so that we consider what remains of the original request
4835 * and null out a_upl and a_pl_offset.
4837 * We should only be able to get into this block if the decmpfs_pagein_compressed
4838 * successfully decompressed the range in question for this file.
4840 ubc_upl_abort_range(upl
, (upl_offset_t
) offset
, isize
, UPL_ABORT_FREE_ON_EMPTY
);
4844 ap
->a_pl_offset
= 0;
4846 /* Reset file_converted back to false so that we don't infinite-loop. */
4847 file_converted
= FALSE
;
4852 error
= cluster_pagein(vp
, upl
, offset
, f_offset
, xsize
, (off_t
)fp
->ff_size
, ap
->a_flags
);
4855 * Keep track of blocks read.
4857 if ( !vnode_isswap(vp
) && VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
&& error
== 0) {
4859 int took_cnode_lock
= 0;
4861 if (ap
->a_f_offset
== 0 && fp
->ff_size
< PAGE_SIZE
)
4862 bytesread
= fp
->ff_size
;
4866 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
4867 if ((fp
->ff_bytesread
+ bytesread
) > 0x00000000ffffffff && cp
->c_lockowner
!= current_thread()) {
4868 hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
4869 took_cnode_lock
= 1;
4872 * If this file hasn't been seen since the start of
4873 * the current sampling period then start over.
4875 if (cp
->c_atime
< VTOHFS(vp
)->hfc_timebase
) {
4878 fp
->ff_bytesread
= bytesread
;
4880 cp
->c_atime
= tv
.tv_sec
;
4882 fp
->ff_bytesread
+= bytesread
;
4884 cp
->c_touch_acctime
= TRUE
;
4886 if (vnode_isfastdevicecandidate(vp
)) {
4887 (void) hfs_addhotfile(vp
);
4889 if (took_cnode_lock
)
4896 pg_index
+= num_of_pages
;
4902 if (truncate_lock_held
== TRUE
) {
4903 /* Note 1 is passed to hfs_unlock_truncate in been_recursed argument */
4904 hfs_unlock_truncate(cp
, HFS_LOCK_SKIP_IF_EXCLUSIVE
);
4911 * Pageout for HFS filesystem.
4914 hfs_vnop_pageout(struct vnop_pageout_args
*ap
)
4916 struct vnop_pageout_args {
4919 vm_offset_t a_pl_offset,
4923 vfs_context_t a_context;
4927 vnode_t vp
= ap
->a_vp
;
4929 struct filefork
*fp
;
4933 upl_page_info_t
* pl
= NULL
;
4934 vm_offset_t a_pl_offset
;
4936 int is_pageoutv2
= 0;
4942 a_flags
= ap
->a_flags
;
4943 a_pl_offset
= ap
->a_pl_offset
;
4946 * we can tell if we're getting the new or old behavior from the UPL
4948 if ((upl
= ap
->a_pl
) == NULL
) {
4953 * we're in control of any UPL we commit
4954 * make sure someone hasn't accidentally passed in UPL_NOCOMMIT
4956 a_flags
&= ~UPL_NOCOMMIT
;
4960 * For V2 semantics, we want to take the cnode truncate lock
4961 * shared to guard against the file size changing via zero-filling.
4963 * However, we have to be careful because we may be invoked
4964 * via the ubc_msync path to write out dirty mmap'd pages
4965 * in response to a lock event on a content-protected
4966 * filesystem (e.g. to write out class A files).
4967 * As a result, we want to take the truncate lock 'SHARED' with
4968 * the mini-recursion locktype so that we don't deadlock/panic
4969 * because we may be already holding the truncate lock exclusive to force any other
4970 * IOs to have blocked behind us.
4972 hfs_lock_truncate(cp
, HFS_SHARED_LOCK
, HFS_LOCK_SKIP_IF_EXCLUSIVE
);
4974 if (a_flags
& UPL_MSYNC
) {
4975 request_flags
= UPL_UBC_MSYNC
| UPL_RET_ONLY_DIRTY
;
4978 request_flags
= UPL_UBC_PAGEOUT
| UPL_RET_ONLY_DIRTY
;
4981 kret
= ubc_create_upl(vp
, ap
->a_f_offset
, ap
->a_size
, &upl
, &pl
, request_flags
);
4983 if ((kret
!= KERN_SUCCESS
) || (upl
== (upl_t
) NULL
)) {
4989 * from this point forward upl points at the UPL we're working with
4990 * it was either passed in or we succesfully created it
4994 * Figure out where the file ends, for pageout purposes. If
4995 * ff_new_size > ff_size, then we're in the middle of extending the
4996 * file via a write, so it is safe (and necessary) that we be able
4997 * to pageout up to that point.
4999 filesize
= fp
->ff_size
;
5000 if (fp
->ff_new_size
> filesize
)
5001 filesize
= fp
->ff_new_size
;
5004 * Now that HFS is opting into VFC_VFSVNOP_PAGEOUTV2, we may need to operate on our own
5005 * UPL instead of relying on the UPL passed into us. We go ahead and do that here,
5006 * scanning for dirty ranges. We'll issue our own N cluster_pageout calls, for
5007 * N dirty ranges in the UPL. Note that this is almost a direct copy of the
5008 * logic in vnode_pageout except that we need to do it after grabbing the truncate
5009 * lock in HFS so that we don't lock invert ourselves.
5011 * Note that we can still get into this function on behalf of the default pager with
5012 * non-V2 behavior (swapfiles). However in that case, we did not grab locks above
5013 * since fsync and other writing threads will grab the locks, then mark the
5014 * relevant pages as busy. But the pageout codepath marks the pages as busy,
5015 * and THEN would attempt to grab the truncate lock, which would result in deadlock. So
5016 * we do not try to grab anything for the pre-V2 case, which should only be accessed
5017 * by the paging/VM system.
5029 f_offset
= ap
->a_f_offset
;
5032 * Scan from the back to find the last page in the UPL, so that we
5033 * aren't looking at a UPL that may have already been freed by the
5034 * preceding aborts/completions.
5036 for (pg_index
= ((isize
) / PAGE_SIZE
); pg_index
> 0;) {
5037 if (upl_page_present(pl
, --pg_index
))
5039 if (pg_index
== 0) {
5040 ubc_upl_abort_range(upl
, 0, isize
, UPL_ABORT_FREE_ON_EMPTY
);
5046 * initialize the offset variables before we touch the UPL.
5047 * a_f_offset is the position into the file, in bytes
5048 * offset is the position into the UPL, in bytes
5049 * pg_index is the pg# of the UPL we're operating on.
5050 * isize is the offset into the UPL of the last non-clean page.
5052 isize
= ((pg_index
+ 1) * PAGE_SIZE
);
5061 if ( !upl_page_present(pl
, pg_index
)) {
5063 * we asked for RET_ONLY_DIRTY, so it's possible
5064 * to get back empty slots in the UPL.
5065 * just skip over them
5067 f_offset
+= PAGE_SIZE
;
5068 offset
+= PAGE_SIZE
;
5074 if ( !upl_dirty_page(pl
, pg_index
)) {
5075 panic ("hfs_vnop_pageout: unforeseen clean page @ index %d for UPL %p\n", pg_index
, upl
);
5079 * We know that we have at least one dirty page.
5080 * Now checking to see how many in a row we have
5083 xsize
= isize
- PAGE_SIZE
;
5086 if ( !upl_dirty_page(pl
, pg_index
+ num_of_pages
))
5091 xsize
= num_of_pages
* PAGE_SIZE
;
5093 if ((error
= cluster_pageout(vp
, upl
, offset
, f_offset
,
5094 xsize
, filesize
, a_flags
))) {
5101 pg_index
+= num_of_pages
;
5103 /* capture errnos bubbled out of cluster_pageout if they occurred */
5104 if (error_ret
!= 0) {
5107 } /* end block for v2 pageout behavior */
5110 * just call cluster_pageout for old pre-v2 behavior
5112 retval
= cluster_pageout(vp
, upl
, a_pl_offset
, ap
->a_f_offset
,
5113 ap
->a_size
, filesize
, a_flags
);
5117 * If data was written, update the modification time of the file
5118 * but only if it's mapped writable; we will have touched the
5119 * modifcation time for direct writes.
5121 if (retval
== 0 && (ubc_is_mapped_writable(vp
)
5122 || ISSET(cp
->c_flag
, C_MIGHT_BE_DIRTY_FROM_MAPPING
))) {
5123 hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
5125 // Check again with lock
5126 bool mapped_writable
= ubc_is_mapped_writable(vp
);
5128 || ISSET(cp
->c_flag
, C_MIGHT_BE_DIRTY_FROM_MAPPING
)) {
5129 cp
->c_touch_modtime
= TRUE
;
5130 cp
->c_touch_chgtime
= TRUE
;
5133 * We only need to increment the generation counter if
5134 * it's currently mapped writable because we incremented
5135 * the counter in hfs_vnop_mnomap.
5137 if (mapped_writable
)
5138 hfs_incr_gencount(VTOC(vp
));
5141 * If setuid or setgid bits are set and this process is
5142 * not the superuser then clear the setuid and setgid bits
5143 * as a precaution against tampering.
5145 if ((cp
->c_mode
& (S_ISUID
| S_ISGID
)) &&
5146 (vfs_context_suser(ap
->a_context
) != 0)) {
5147 cp
->c_mode
&= ~(S_ISUID
| S_ISGID
);
5157 * Release the truncate lock. Note that because
5158 * we may have taken the lock recursively by
5159 * being invoked via ubc_msync due to lockdown,
5160 * we should release it recursively, too.
5162 hfs_unlock_truncate(cp
, HFS_LOCK_SKIP_IF_EXCLUSIVE
);
5168 * Intercept B-Tree node writes to unswap them if necessary.
5171 hfs_vnop_bwrite(struct vnop_bwrite_args
*ap
)
5174 register struct buf
*bp
= ap
->a_bp
;
5175 register struct vnode
*vp
= buf_vnode(bp
);
5176 BlockDescriptor block
;
5178 /* Trap B-Tree writes */
5179 if ((VTOC(vp
)->c_fileid
== kHFSExtentsFileID
) ||
5180 (VTOC(vp
)->c_fileid
== kHFSCatalogFileID
) ||
5181 (VTOC(vp
)->c_fileid
== kHFSAttributesFileID
) ||
5182 (vp
== VTOHFS(vp
)->hfc_filevp
)) {
5185 * Swap and validate the node if it is in native byte order.
5186 * This is always be true on big endian, so we always validate
5187 * before writing here. On little endian, the node typically has
5188 * been swapped and validated when it was written to the journal,
5189 * so we won't do anything here.
5191 if (((u_int16_t
*)((char *)buf_dataptr(bp
) + buf_count(bp
) - 2))[0] == 0x000e) {
5192 /* Prepare the block pointer */
5193 block
.blockHeader
= bp
;
5194 block
.buffer
= (char *)buf_dataptr(bp
);
5195 block
.blockNum
= buf_lblkno(bp
);
5196 /* not found in cache ==> came from disk */
5197 block
.blockReadFromDisk
= (buf_fromcache(bp
) == 0);
5198 block
.blockSize
= buf_count(bp
);
5200 /* Endian un-swap B-Tree node */
5201 retval
= hfs_swap_BTNode (&block
, vp
, kSwapBTNodeHostToBig
, false);
5203 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
5207 /* This buffer shouldn't be locked anymore but if it is clear it */
5208 if ((buf_flags(bp
) & B_LOCKED
)) {
5210 if (VTOHFS(vp
)->jnl
) {
5211 panic("hfs: CLEARING the lock bit on bp %p\n", bp
);
5213 buf_clearflags(bp
, B_LOCKED
);
5215 retval
= vn_bwrite (ap
);
5222 hfs_pin_block_range(struct hfsmount
*hfsmp
, int pin_state
, uint32_t start_block
, uint32_t nblocks
)
5228 memset(&pin
, 0, sizeof(pin
));
5229 pin
.cp_extent
.offset
= ((uint64_t)start_block
) * HFSTOVCB(hfsmp
)->blockSize
;
5230 pin
.cp_extent
.length
= ((uint64_t)nblocks
) * HFSTOVCB(hfsmp
)->blockSize
;
5231 switch (pin_state
) {
5233 ioc
= _DKIOCCSPINEXTENT
;
5234 pin
.cp_flags
= _DKIOCCSPINTOFASTMEDIA
;
5236 case HFS_PIN_IT
| HFS_TEMP_PIN
:
5237 ioc
= _DKIOCCSPINEXTENT
;
5238 pin
.cp_flags
= _DKIOCCSPINTOFASTMEDIA
| _DKIOCCSTEMPORARYPIN
;
5240 case HFS_PIN_IT
| HFS_DATALESS_PIN
:
5241 ioc
= _DKIOCCSPINEXTENT
;
5242 pin
.cp_flags
= _DKIOCCSPINTOFASTMEDIA
| _DKIOCCSPINFORSWAPFILE
;
5245 ioc
= _DKIOCCSUNPINEXTENT
;
5248 case HFS_UNPIN_IT
| HFS_EVICT_PIN
:
5249 ioc
= _DKIOCCSPINEXTENT
;
5250 pin
.cp_flags
= _DKIOCCSPINTOSLOWMEDIA
;
5255 err
= VNOP_IOCTL(hfsmp
->hfs_devvp
, ioc
, (caddr_t
)&pin
, 0, vfs_context_kernel());
5260 // The cnode lock should already be held on entry to this function
5263 hfs_pin_vnode(struct hfsmount
*hfsmp
, struct vnode
*vp
, int pin_state
, uint32_t *num_blocks_pinned
)
5265 struct filefork
*fp
= VTOF(vp
);
5266 int i
, err
=0, need_put
=0;
5267 struct vnode
*rsrc_vp
=NULL
;
5268 uint32_t npinned
= 0;
5271 if (num_blocks_pinned
) {
5272 *num_blocks_pinned
= 0;
5275 if (vnode_vtype(vp
) != VREG
) {
5276 /* Not allowed to pin directories or symlinks */
5277 printf("hfs: can't pin vnode of type %d\n", vnode_vtype(vp
));
5281 if (fp
->ff_unallocblocks
) {
5282 printf("hfs: can't pin a vnode w/unalloced blocks (%d)\n", fp
->ff_unallocblocks
);
5287 * It is possible that if the caller unlocked/re-locked the cnode after checking
5288 * for C_NOEXISTS|C_DELETED that the file could have been deleted while the
5289 * cnode was unlocked. So check the condition again and return ENOENT so that
5290 * the caller knows why we failed to pin the vnode.
5292 if (VTOC(vp
)->c_flag
& (C_NOEXISTS
|C_DELETED
)) {
5293 // makes no sense to pin something that's pending deletion
5297 if (fp
->ff_blocks
== 0 && (VTOC(vp
)->c_bsdflags
& UF_COMPRESSED
)) {
5298 if (!VNODE_IS_RSRC(vp
) && hfs_vgetrsrc(hfsmp
, vp
, &rsrc_vp
) == 0) {
5299 //printf("hfs: fileid %d resource fork nblocks: %d / size: %lld\n", VTOC(vp)->c_fileid,
5300 // VTOC(rsrc_vp)->c_rsrcfork->ff_blocks,VTOC(rsrc_vp)->c_rsrcfork->ff_size);
5302 fp
= VTOC(rsrc_vp
)->c_rsrcfork
;
5306 if (fp
->ff_blocks
== 0) {
5309 // use a distinct error code for a compressed file that has no resource fork;
5310 // we return EALREADY to indicate that the data is already probably hot file
5311 // cached because it's in an EA and the attributes btree is on the ssd
5321 for (i
= 0; i
< kHFSPlusExtentDensity
; i
++) {
5322 if (fp
->ff_extents
[i
].startBlock
== 0) {
5326 err
= hfs_pin_block_range(hfsmp
, pin_state
, fp
->ff_extents
[i
].startBlock
, fp
->ff_extents
[i
].blockCount
);
5330 npinned
+= fp
->ff_extents
[i
].blockCount
;
5334 if (err
|| npinned
== 0) {
5338 if (fp
->ff_extents
[kHFSPlusExtentDensity
-1].startBlock
) {
5340 uint8_t forktype
= 0;
5342 if (fp
== VTOC(vp
)->c_rsrcfork
) {
5346 * The file could have overflow extents, better pin them.
5348 * We assume that since we are holding the cnode lock for this cnode,
5349 * the files extents cannot be manipulated, but the tree could, so we
5350 * need to ensure that it doesn't change behind our back as we iterate it.
5352 int lockflags
= hfs_systemfile_lock (hfsmp
, SFL_EXTENTS
, HFS_SHARED_LOCK
);
5353 err
= hfs_pin_overflow_extents(hfsmp
, VTOC(vp
)->c_fileid
, forktype
, &pblocks
);
5354 hfs_systemfile_unlock (hfsmp
, lockflags
);
5363 if (num_blocks_pinned
) {
5364 *num_blocks_pinned
= npinned
;
5367 if (need_put
&& rsrc_vp
) {
5369 // have to unlock the cnode since it's shared between the
5370 // resource fork vnode and the data fork vnode (and the
5371 // vnode_put() may need to re-acquire the cnode lock to
5372 // reclaim the resource fork vnode)
5374 hfs_unlock(VTOC(vp
));
5376 hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
5383 * Relocate a file to a new location on disk
5384 * cnode must be locked on entry
5386 * Relocation occurs by cloning the file's data from its
5387 * current set of blocks to a new set of blocks. During
5388 * the relocation all of the blocks (old and new) are
5389 * owned by the file.
5396 * ----------------- -----------------
5397 * |///////////////| | | STEP 1 (acquire new blocks)
5398 * ----------------- -----------------
5401 * ----------------- -----------------
5402 * |///////////////| |///////////////| STEP 2 (clone data)
5403 * ----------------- -----------------
5407 * |///////////////| STEP 3 (head truncate blocks)
5411 * During steps 2 and 3 page-outs to file offsets less
5412 * than or equal to N are suspended.
5414 * During step 3 page-ins to the file get suspended.
5417 hfs_relocate(struct vnode
*vp
, u_int32_t blockHint
, kauth_cred_t cred
,
5421 struct filefork
*fp
;
5422 struct hfsmount
*hfsmp
;
5427 u_int32_t nextallocsave
;
5428 daddr64_t sector_a
, sector_b
;
5433 int took_trunc_lock
= 0;
5435 enum vtype vnodetype
;
5437 vnodetype
= vnode_vtype(vp
);
5438 if (vnodetype
!= VREG
) {
5439 /* Not allowed to move symlinks. */
5444 if (hfsmp
->hfs_flags
& HFS_FRAGMENTED_FREESPACE
) {
5450 if (fp
->ff_unallocblocks
)
5455 * <rdar://problem/9118426>
5456 * Disable HFS file relocation on content-protected filesystems
5458 if (cp_fs_protected (hfsmp
->hfs_mp
)) {
5462 /* If it's an SSD, also disable HFS relocation */
5463 if (hfsmp
->hfs_flags
& HFS_SSD
) {
5468 blksize
= hfsmp
->blockSize
;
5470 blockHint
= hfsmp
->nextAllocation
;
5472 if (fp
->ff_size
> 0x7fffffff) {
5476 if (!vnode_issystem(vp
) && (vnodetype
!= VLNK
)) {
5478 hfs_lock_truncate(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
5479 /* Force lock since callers expects lock to be held. */
5480 if ((retval
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
))) {
5481 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
5484 /* No need to continue if file was removed. */
5485 if (cp
->c_flag
& C_NOEXISTS
) {
5486 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
5489 took_trunc_lock
= 1;
5491 headblks
= fp
->ff_blocks
;
5492 datablks
= howmany(fp
->ff_size
, blksize
);
5493 growsize
= datablks
* blksize
;
5494 eflags
= kEFContigMask
| kEFAllMask
| kEFNoClumpMask
;
5495 if (blockHint
>= hfsmp
->hfs_metazone_start
&&
5496 blockHint
<= hfsmp
->hfs_metazone_end
)
5497 eflags
|= kEFMetadataMask
;
5499 if (hfs_start_transaction(hfsmp
) != 0) {
5500 if (took_trunc_lock
)
5501 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
5506 * Protect the extents b-tree and the allocation bitmap
5507 * during MapFileBlockC and ExtendFileC operations.
5509 lockflags
= SFL_BITMAP
;
5510 if (overflow_extents(fp
))
5511 lockflags
|= SFL_EXTENTS
;
5512 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
5514 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, 1, growsize
- 1, §or_a
, NULL
);
5516 retval
= MacToVFSError(retval
);
5521 * STEP 1 - acquire new allocation blocks.
5523 nextallocsave
= hfsmp
->nextAllocation
;
5524 retval
= ExtendFileC(hfsmp
, (FCB
*)fp
, growsize
, blockHint
, eflags
, &newbytes
);
5525 if (eflags
& kEFMetadataMask
) {
5526 hfs_lock_mount(hfsmp
);
5527 HFS_UPDATE_NEXT_ALLOCATION(hfsmp
, nextallocsave
);
5528 MarkVCBDirty(hfsmp
);
5529 hfs_unlock_mount(hfsmp
);
5532 retval
= MacToVFSError(retval
);
5534 cp
->c_flag
|= C_MODIFIED
;
5535 if (newbytes
< growsize
) {
5538 } else if (fp
->ff_blocks
< (headblks
+ datablks
)) {
5539 printf("hfs_relocate: allocation failed id=%u, vol=%s\n", cp
->c_cnid
, hfsmp
->vcbVN
);
5544 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, 1, growsize
, §or_b
, NULL
);
5546 retval
= MacToVFSError(retval
);
5547 } else if ((sector_a
+ 1) == sector_b
) {
5550 } else if ((eflags
& kEFMetadataMask
) &&
5551 ((((u_int64_t
)sector_b
* hfsmp
->hfs_logical_block_size
) / blksize
) >
5552 hfsmp
->hfs_metazone_end
)) {
5554 const char * filestr
;
5555 char emptystr
= '\0';
5557 if (cp
->c_desc
.cd_nameptr
!= NULL
) {
5558 filestr
= (const char *)&cp
->c_desc
.cd_nameptr
[0];
5559 } else if (vnode_name(vp
) != NULL
) {
5560 filestr
= vnode_name(vp
);
5562 filestr
= &emptystr
;
5569 /* Done with system locks and journal for now. */
5570 hfs_systemfile_unlock(hfsmp
, lockflags
);
5572 hfs_end_transaction(hfsmp
);
5577 * Check to see if failure is due to excessive fragmentation.
5579 if ((retval
== ENOSPC
) &&
5580 (hfs_freeblks(hfsmp
, 0) > (datablks
* 2))) {
5581 hfsmp
->hfs_flags
|= HFS_FRAGMENTED_FREESPACE
;
5586 * STEP 2 - clone file data into the new allocation blocks.
5589 if (vnodetype
== VLNK
)
5591 else if (vnode_issystem(vp
))
5592 retval
= hfs_clonesysfile(vp
, headblks
, datablks
, blksize
, cred
, p
);
5594 retval
= hfs_clonefile(vp
, headblks
, datablks
, blksize
);
5596 /* Start transaction for step 3 or for a restore. */
5597 if (hfs_start_transaction(hfsmp
) != 0) {
5606 * STEP 3 - switch to cloned data and remove old blocks.
5608 lockflags
= SFL_BITMAP
;
5609 if (overflow_extents(fp
))
5610 lockflags
|= SFL_EXTENTS
;
5611 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
5613 retval
= HeadTruncateFile(hfsmp
, (FCB
*)fp
, headblks
);
5615 hfs_systemfile_unlock(hfsmp
, lockflags
);
5620 if (took_trunc_lock
)
5621 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
5624 hfs_systemfile_unlock(hfsmp
, lockflags
);
5628 /* Push cnode's new extent data to disk. */
5633 if (cp
->c_cnid
< kHFSFirstUserCatalogNodeID
)
5634 (void) hfs_flushvolumeheader(hfsmp
, HFS_FVH_WAIT
| HFS_FVH_WRITE_ALT
);
5636 (void) hfs_flushvolumeheader(hfsmp
, 0);
5640 hfs_end_transaction(hfsmp
);
5645 if (fp
->ff_blocks
== headblks
) {
5646 if (took_trunc_lock
)
5647 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
5651 * Give back any newly allocated space.
5653 if (lockflags
== 0) {
5654 lockflags
= SFL_BITMAP
;
5655 if (overflow_extents(fp
))
5656 lockflags
|= SFL_EXTENTS
;
5657 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
5660 (void) TruncateFileC(hfsmp
, (FCB
*)fp
, fp
->ff_size
, 0, FORK_IS_RSRC(fp
),
5661 FTOC(fp
)->c_fileid
, false);
5663 hfs_systemfile_unlock(hfsmp
, lockflags
);
5666 if (took_trunc_lock
)
5667 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
5673 * Clone a file's data within the file.
5677 hfs_clonefile(struct vnode
*vp
, int blkstart
, int blkcnt
, int blksize
)
5688 writebase
= blkstart
* blksize
;
5689 copysize
= blkcnt
* blksize
;
5690 iosize
= bufsize
= MIN(copysize
, 128 * 1024);
5693 hfs_unlock(VTOC(vp
));
5696 if ((error
= cp_handle_vnop(vp
, CP_WRITE_ACCESS
, 0)) != 0) {
5697 hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
5700 #endif /* CONFIG_PROTECT */
5702 bufp
= hfs_malloc(bufsize
);
5704 auio
= uio_create(1, 0, UIO_SYSSPACE
, UIO_READ
);
5706 while (offset
< copysize
) {
5707 iosize
= MIN(copysize
- offset
, iosize
);
5709 uio_reset(auio
, offset
, UIO_SYSSPACE
, UIO_READ
);
5710 uio_addiov(auio
, (uintptr_t)bufp
, iosize
);
5712 error
= cluster_read(vp
, auio
, copysize
, IO_NOCACHE
);
5714 printf("hfs_clonefile: cluster_read failed - %d\n", error
);
5717 if (uio_resid(auio
) != 0) {
5718 printf("hfs_clonefile: cluster_read: uio_resid = %lld\n", (int64_t)uio_resid(auio
));
5723 uio_reset(auio
, writebase
+ offset
, UIO_SYSSPACE
, UIO_WRITE
);
5724 uio_addiov(auio
, (uintptr_t)bufp
, iosize
);
5726 error
= cluster_write(vp
, auio
, writebase
+ offset
,
5727 writebase
+ offset
+ iosize
,
5728 uio_offset(auio
), 0, IO_NOCACHE
| IO_SYNC
);
5730 printf("hfs_clonefile: cluster_write failed - %d\n", error
);
5733 if (uio_resid(auio
) != 0) {
5734 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
5742 if ((blksize
& PAGE_MASK
)) {
5744 * since the copy may not have started on a PAGE
5745 * boundary (or may not have ended on one), we
5746 * may have pages left in the cache since NOCACHE
5747 * will let partially written pages linger...
5748 * lets just flush the entire range to make sure
5749 * we don't have any pages left that are beyond
5750 * (or intersect) the real LEOF of this file
5752 ubc_msync(vp
, writebase
, writebase
+ offset
, NULL
, UBC_INVALIDATE
| UBC_PUSHDIRTY
);
5755 * No need to call ubc_msync or hfs_invalbuf
5756 * since the file was copied using IO_NOCACHE and
5757 * the copy was done starting and ending on a page
5758 * boundary in the file.
5761 hfs_free(bufp
, bufsize
);
5763 hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
5768 * Clone a system (metadata) file.
5772 hfs_clonesysfile(struct vnode
*vp
, int blkstart
, int blkcnt
, int blksize
,
5773 kauth_cred_t cred
, struct proc
*p
)
5779 struct buf
*bp
= NULL
;
5782 daddr64_t start_blk
;
5789 iosize
= GetLogicalBlockSize(vp
);
5790 bufsize
= MIN(blkcnt
* blksize
, 1024 * 1024) & ~(iosize
- 1);
5791 breadcnt
= bufsize
/ iosize
;
5793 bufp
= hfs_malloc(bufsize
);
5795 start_blk
= ((daddr64_t
)blkstart
* blksize
) / iosize
;
5796 last_blk
= ((daddr64_t
)blkcnt
* blksize
) / iosize
;
5799 while (blkno
< last_blk
) {
5801 * Read up to a megabyte
5804 for (i
= 0, blk
= blkno
; (i
< breadcnt
) && (blk
< last_blk
); ++i
, ++blk
) {
5805 error
= (int)buf_meta_bread(vp
, blk
, iosize
, cred
, &bp
);
5807 printf("hfs_clonesysfile: meta_bread error %d\n", error
);
5810 if (buf_count(bp
) != iosize
) {
5811 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp
));
5814 bcopy((char *)buf_dataptr(bp
), offset
, iosize
);
5816 buf_markinvalid(bp
);
5824 * Write up to a megabyte
5827 for (i
= 0; (i
< breadcnt
) && (blkno
< last_blk
); ++i
, ++blkno
) {
5828 bp
= buf_getblk(vp
, start_blk
+ blkno
, iosize
, 0, 0, BLK_META
);
5830 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk
+ blkno
);
5834 bcopy(offset
, (char *)buf_dataptr(bp
), iosize
);
5835 error
= (int)buf_bwrite(bp
);
5847 hfs_free(bufp
, bufsize
);
5849 error
= hfs_fsync(vp
, MNT_WAIT
, 0, p
);
5854 errno_t
hfs_flush_invalid_ranges(vnode_t vp
)
5856 cnode_t
*cp
= VTOC(vp
);
5858 hfs_assert(cp
->c_lockowner
== current_thread());
5859 hfs_assert(cp
->c_truncatelockowner
== current_thread());
5861 if (!ISSET(cp
->c_flag
, C_ZFWANTSYNC
) && !cp
->c_zftimeout
)
5864 filefork_t
*fp
= VTOF(vp
);
5867 * We can't hold the cnode lock whilst we call cluster_write so we
5868 * need to copy the extents into a local buffer.
5873 } exts_buf
[max_exts
]; // 256 bytes
5874 struct ext
*exts
= exts_buf
;
5878 struct rl_entry
*r
= TAILQ_FIRST(&fp
->ff_invalidranges
);
5881 /* If we have more than can fit in our stack buffer, switch
5882 to a heap buffer. */
5883 if (exts
== exts_buf
&& ext_count
== max_exts
) {
5885 exts
= hfs_malloc(sizeof(struct ext
) * max_exts
);
5886 memcpy(exts
, exts_buf
, ext_count
* sizeof(struct ext
));
5889 struct rl_entry
*next
= TAILQ_NEXT(r
, rl_link
);
5891 exts
[ext_count
++] = (struct ext
){ r
->rl_start
, r
->rl_end
};
5893 if (!next
|| (ext_count
== max_exts
&& exts
!= exts_buf
)) {
5895 for (int i
= 0; i
< ext_count
; ++i
) {
5896 ret
= cluster_write(vp
, NULL
, fp
->ff_size
, exts
[i
].end
+ 1,
5898 IO_HEADZEROFILL
| IO_NOZERODIRTY
| IO_NOCACHE
);
5900 hfs_lock_always(cp
, HFS_EXCLUSIVE_LOCK
);
5906 hfs_lock_always(cp
, HFS_EXCLUSIVE_LOCK
);
5910 /* Push any existing clusters which should clean up our invalid
5911 ranges as they go through hfs_vnop_blockmap. */
5912 cluster_push(vp
, 0);
5914 hfs_lock_always(cp
, HFS_EXCLUSIVE_LOCK
);
5917 * Get back to where we were (given we dropped the lock).
5918 * This shouldn't be many because we pushed above.
5920 TAILQ_FOREACH(r
, &fp
->ff_invalidranges
, rl_link
) {
5921 if (r
->rl_end
> exts
[ext_count
- 1].end
)
5934 if (exts
!= exts_buf
)
5935 hfs_free(exts
, sizeof(struct ext
) * max_exts
);