2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* @(#)hfs_readwrite.c 1.0
30 * (c) 1998-2001 Apple Computer, Inc. All Rights Reserved
32 * hfs_readwrite.c -- vnode operations to deal with reading and writing files.
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/resourcevar.h>
39 #include <sys/kernel.h>
40 #include <sys/fcntl.h>
41 #include <sys/filedesc.h>
44 #include <sys/buf_internal.h>
46 #include <sys/kauth.h>
47 #include <sys/vnode.h>
48 #include <sys/vnode_internal.h>
50 #include <sys/vfs_context.h>
51 #include <sys/fsevents.h>
52 #include <kern/kalloc.h>
54 #include <sys/sysctl.h>
55 #include <sys/fsctl.h>
56 #include <sys/mount_internal.h>
57 #include <sys/file_internal.h>
59 #include <libkern/OSDebug.h>
61 #include <miscfs/specfs/specdev.h>
64 #include <sys/ubc_internal.h>
66 #include <vm/vm_pageout.h>
67 #include <vm/vm_kern.h>
69 #include <IOKit/IOBSD.h>
71 #include <sys/kdebug.h>
74 #include "hfs_attrlist.h"
75 #include "hfs_endian.h"
76 #include "hfs_fsctl.h"
77 #include "hfs_quota.h"
78 #include "hfscommon/headers/FileMgrInternal.h"
79 #include "hfscommon/headers/BTreesInternal.h"
80 #include "hfs_cnode.h"
84 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
87 MAXHFSFILESIZE
= 0x7FFFFFFF /* this needs to go in the mount structure */
90 /* from bsd/hfs/hfs_vfsops.c */
91 extern int hfs_vfs_vget (struct mount
*mp
, ino64_t ino
, struct vnode
**vpp
, vfs_context_t context
);
93 /* from hfs_hotfiles.c */
94 extern int hfs_pin_overflow_extents (struct hfsmount
*hfsmp
, uint32_t fileid
,
95 uint8_t forktype
, uint32_t *pinned
);
97 static int hfs_clonefile(struct vnode
*, int, int, int);
98 static int hfs_clonesysfile(struct vnode
*, int, int, int, kauth_cred_t
, struct proc
*);
99 static int do_hfs_truncate(struct vnode
*vp
, off_t length
, int flags
, int skip
, vfs_context_t context
);
101 /* from bsd/hfs/hfs_vnops.c */
102 extern decmpfs_cnode
* hfs_lazy_init_decmpfs_cnode (struct cnode
*cp
);
106 int flush_cache_on_write
= 0;
107 SYSCTL_INT (_kern
, OID_AUTO
, flush_cache_on_write
, CTLFLAG_RW
| CTLFLAG_LOCKED
, &flush_cache_on_write
, 0, "always flush the drive cache on writes to uncached files");
110 * Read data from a file.
113 hfs_vnop_read(struct vnop_read_args
*ap
)
116 struct vnop_read_args {
117 struct vnodeop_desc *a_desc;
121 vfs_context_t a_context;
125 uio_t uio
= ap
->a_uio
;
126 struct vnode
*vp
= ap
->a_vp
;
129 struct hfsmount
*hfsmp
;
132 off_t start_resid
= uio_resid(uio
);
133 off_t offset
= uio_offset(uio
);
135 int took_truncate_lock
= 0;
137 int throttled_count
= 0;
139 /* Preflight checks */
140 if (!vnode_isreg(vp
)) {
141 /* can only read regular files */
147 if (start_resid
== 0)
148 return (0); /* Nothing left to do */
150 return (EINVAL
); /* cant read from a negative offset */
153 if ((ap
->a_ioflag
& (IO_SKIP_ENCRYPTION
|IO_SYSCALL_DISPATCH
)) ==
154 (IO_SKIP_ENCRYPTION
|IO_SYSCALL_DISPATCH
)) {
155 /* Don't allow unencrypted io request from user space */
161 if (VNODE_IS_RSRC(vp
)) {
162 if (hfs_hides_rsrc(ap
->a_context
, VTOC(vp
), 1)) { /* 1 == don't take the cnode lock */
165 /* otherwise read the resource fork normally */
167 int compressed
= hfs_file_is_compressed(VTOC(vp
), 1); /* 1 == don't take the cnode lock */
169 retval
= decmpfs_read_compressed(ap
, &compressed
, VTOCMP(vp
));
170 if (retval
== 0 && !(ap
->a_ioflag
& IO_EVTONLY
) && vnode_isfastdevicecandidate(vp
)) {
171 (void) hfs_addhotfile(vp
);
175 /* successful read, update the access time */
176 VTOC(vp
)->c_touch_acctime
= TRUE
;
179 // compressed files are not traditional hot file candidates
180 // but they may be for CF (which ignores the ff_bytesread
183 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
) {
184 VTOF(vp
)->ff_bytesread
= 0;
189 /* otherwise the file was converted back to a regular file while we were reading it */
191 } else if ((VTOC(vp
)->c_bsdflags
& UF_COMPRESSED
)) {
194 error
= check_for_dataless_file(vp
, NAMESPACE_HANDLER_READ_OP
);
201 #endif /* HFS_COMPRESSION */
208 if ((retval
= cp_handle_vnop (vp
, CP_READ_ACCESS
, ap
->a_ioflag
)) != 0) {
212 #endif // CONFIG_PROTECT
215 * If this read request originated from a syscall (as opposed to
216 * an in-kernel page fault or something), then set it up for
219 if (ap
->a_ioflag
& IO_SYSCALL_DISPATCH
) {
220 io_throttle
= IO_RETURN_ON_THROTTLE
;
225 /* Protect against a size change. */
226 hfs_lock_truncate(cp
, HFS_SHARED_LOCK
, HFS_LOCK_DEFAULT
);
227 took_truncate_lock
= 1;
229 filesize
= fp
->ff_size
;
230 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
233 * Check the file size. Note that per POSIX spec, we return 0 at
234 * file EOF, so attempting a read at an offset that is too big
235 * should just return 0 on HFS+. Since the return value was initialized
236 * to 0 above, we just jump to exit. HFS Standard has its own behavior.
238 if (offset
> filesize
) {
239 if ((hfsmp
->hfs_flags
& HFS_STANDARD
) &&
240 (offset
> (off_t
)MAXHFSFILESIZE
)) {
246 KERNEL_DEBUG(HFSDBG_READ
| DBG_FUNC_START
,
247 (int)uio_offset(uio
), uio_resid(uio
), (int)filesize
, (int)filebytes
, 0);
249 retval
= cluster_read(vp
, uio
, filesize
, ap
->a_ioflag
|io_throttle
);
251 cp
->c_touch_acctime
= TRUE
;
253 KERNEL_DEBUG(HFSDBG_READ
| DBG_FUNC_END
,
254 (int)uio_offset(uio
), uio_resid(uio
), (int)filesize
, (int)filebytes
, 0);
257 * Keep track blocks read
259 if (hfsmp
->hfc_stage
== HFC_RECORDING
&& retval
== 0) {
260 int took_cnode_lock
= 0;
263 bytesread
= start_resid
- uio_resid(uio
);
265 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
266 if ((fp
->ff_bytesread
+ bytesread
) > 0x00000000ffffffff) {
267 hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
271 * If this file hasn't been seen since the start of
272 * the current sampling period then start over.
274 if (cp
->c_atime
< hfsmp
->hfc_timebase
) {
277 fp
->ff_bytesread
= bytesread
;
279 cp
->c_atime
= tv
.tv_sec
;
281 fp
->ff_bytesread
+= bytesread
;
284 if (!(ap
->a_ioflag
& IO_EVTONLY
) && vnode_isfastdevicecandidate(vp
)) {
286 // We don't add hotfiles for processes doing IO_EVTONLY I/O
287 // on the assumption that they're system processes such as
288 // mdworker which scan everything in the system (and thus
289 // do not represent user-initiated access to files)
291 (void) hfs_addhotfile(vp
);
297 if (took_truncate_lock
) {
298 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
300 if (retval
== EAGAIN
) {
301 throttle_lowpri_io(1);
307 if (throttled_count
) {
308 throttle_info_reset_window((uthread_t
)get_bsdthread_info(current_thread()));
314 * Ideally, this wouldn't be necessary; the cluster code should be
315 * able to handle this on the read-side. See <rdar://20420068>.
317 static errno_t
hfs_zero_eof_page(vnode_t vp
, off_t zero_up_to
)
319 assert(VTOC(vp
)->c_lockowner
!= current_thread());
320 assert(VTOC(vp
)->c_truncatelockowner
== current_thread());
322 struct filefork
*fp
= VTOF(vp
);
324 if (!(fp
->ff_size
& PAGE_MASK_64
) || zero_up_to
<= fp
->ff_size
) {
329 zero_up_to
= MIN(zero_up_to
, (off_t
)round_page_64(fp
->ff_size
));
331 /* N.B. At present, @zero_up_to is not important because the cluster
332 code will always zero up to the end of the page anyway. */
333 return cluster_write(vp
, NULL
, fp
->ff_size
, zero_up_to
,
334 fp
->ff_size
, 0, IO_HEADZEROFILL
);
338 * Write data to a file.
341 hfs_vnop_write(struct vnop_write_args
*ap
)
343 uio_t uio
= ap
->a_uio
;
344 struct vnode
*vp
= ap
->a_vp
;
347 struct hfsmount
*hfsmp
;
348 kauth_cred_t cred
= NULL
;
351 off_t bytesToAdd
= 0;
352 off_t actualBytesAdded
;
357 int ioflag
= ap
->a_ioflag
;
360 int cnode_locked
= 0;
361 int partialwrite
= 0;
363 time_t orig_ctime
=VTOC(vp
)->c_ctime
;
364 int took_truncate_lock
= 0;
365 int io_return_on_throttle
= 0;
366 int throttled_count
= 0;
369 if ( hfs_file_is_compressed(VTOC(vp
), 1) ) { /* 1 == don't take the cnode lock */
370 int state
= decmpfs_cnode_get_vnode_state(VTOCMP(vp
));
372 case FILE_IS_COMPRESSED
:
374 case FILE_IS_CONVERTING
:
375 /* if FILE_IS_CONVERTING, we allow writes but do not
376 bother with snapshots or else we will deadlock.
381 printf("invalid state %d for compressed file\n", state
);
384 } else if ((VTOC(vp
)->c_bsdflags
& UF_COMPRESSED
)) {
387 error
= check_for_dataless_file(vp
, NAMESPACE_HANDLER_WRITE_OP
);
394 check_for_tracked_file(vp
, orig_ctime
, NAMESPACE_HANDLER_WRITE_OP
, uio
);
400 if ((ioflag
& (IO_SKIP_ENCRYPTION
|IO_SYSCALL_DISPATCH
)) ==
401 (IO_SKIP_ENCRYPTION
|IO_SYSCALL_DISPATCH
)) {
402 /* Don't allow unencrypted io request from user space */
407 resid
= uio_resid(uio
);
408 offset
= uio_offset(uio
);
414 if (!vnode_isreg(vp
))
415 return (EPERM
); /* Can only write regular files */
422 if ((retval
= cp_handle_vnop (vp
, CP_WRITE_ACCESS
, 0)) != 0) {
427 eflags
= kEFDeferMask
; /* defer file block allocations */
430 * When the underlying device is sparse and space
431 * is low (< 8MB), stop doing delayed allocations
432 * and begin doing synchronous I/O.
434 if ((hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) &&
435 (hfs_freeblks(hfsmp
, 0) < 2048)) {
436 eflags
&= ~kEFDeferMask
;
439 #endif /* HFS_SPARSE_DEV */
441 if ((ioflag
& (IO_SINGLE_WRITER
| IO_SYSCALL_DISPATCH
)) ==
442 (IO_SINGLE_WRITER
| IO_SYSCALL_DISPATCH
)) {
443 io_return_on_throttle
= IO_RETURN_ON_THROTTLE
;
448 * Protect against a size change.
450 * Note: If took_truncate_lock is true, then we previously got the lock shared
451 * but needed to upgrade to exclusive. So try getting it exclusive from the
454 if (ioflag
& IO_APPEND
|| took_truncate_lock
) {
455 hfs_lock_truncate(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
458 hfs_lock_truncate(cp
, HFS_SHARED_LOCK
, HFS_LOCK_DEFAULT
);
460 took_truncate_lock
= 1;
463 if (ioflag
& IO_APPEND
) {
464 uio_setoffset(uio
, fp
->ff_size
);
465 offset
= fp
->ff_size
;
467 if ((cp
->c_bsdflags
& APPEND
) && offset
!= fp
->ff_size
) {
472 cred
= vfs_context_ucred(ap
->a_context
);
473 if (cred
&& suser(cred
, NULL
) != 0)
474 eflags
|= kEFReserveMask
;
476 origFileSize
= fp
->ff_size
;
477 writelimit
= offset
+ resid
;
480 * We may need an exclusive truncate lock for several reasons, all
481 * of which are because we may be writing to a (portion of a) block
482 * for the first time, and we need to make sure no readers see the
483 * prior, uninitialized contents of the block. The cases are:
485 * 1. We have unallocated (delayed allocation) blocks. We may be
486 * allocating new blocks to the file and writing to them.
487 * (A more precise check would be whether the range we're writing
488 * to contains delayed allocation blocks.)
489 * 2. We need to extend the file. The bytes between the old EOF
490 * and the new EOF are not yet initialized. This is important
491 * even if we're not allocating new blocks to the file. If the
492 * old EOF and new EOF are in the same block, we still need to
493 * protect that range of bytes until they are written for the
496 * If we had a shared lock with the above cases, we need to try to upgrade
497 * to an exclusive lock. If the upgrade fails, we will lose the shared
498 * lock, and will need to take the truncate lock again; the took_truncate_lock
499 * flag will still be set, causing us to try for an exclusive lock next time.
501 if ((cp
->c_truncatelockowner
== HFS_SHARED_OWNER
) &&
502 ((fp
->ff_unallocblocks
!= 0) ||
503 (writelimit
> origFileSize
))) {
504 if (lck_rw_lock_shared_to_exclusive(&cp
->c_truncatelock
) == FALSE
) {
506 * Lock upgrade failed and we lost our shared lock, try again.
507 * Note: we do not set took_truncate_lock=0 here. Leaving it
508 * set to 1 will cause us to try to get the lock exclusive.
513 /* Store the owner in the c_truncatelockowner field if we successfully upgrade */
514 cp
->c_truncatelockowner
= current_thread();
518 if ( (retval
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
))) {
523 filebytes
= hfs_blk_to_bytes(fp
->ff_blocks
, hfsmp
->blockSize
);
525 if (offset
> filebytes
526 && (hfs_blk_to_bytes(hfs_freeblks(hfsmp
, ISSET(eflags
, kEFReserveMask
)),
527 hfsmp
->blockSize
) < offset
- filebytes
)) {
532 KERNEL_DEBUG(HFSDBG_WRITE
| DBG_FUNC_START
,
533 (int)offset
, uio_resid(uio
), (int)fp
->ff_size
,
536 /* Check if we do not need to extend the file */
537 if (writelimit
<= filebytes
) {
541 bytesToAdd
= writelimit
- filebytes
;
544 retval
= hfs_chkdq(cp
, (int64_t)(roundup(bytesToAdd
, hfsmp
->blockSize
)),
550 if (hfs_start_transaction(hfsmp
) != 0) {
555 while (writelimit
> filebytes
) {
556 bytesToAdd
= writelimit
- filebytes
;
558 /* Protect extents b-tree and allocation bitmap */
559 lockflags
= SFL_BITMAP
;
560 if (overflow_extents(fp
))
561 lockflags
|= SFL_EXTENTS
;
562 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
564 /* Files that are changing size are not hot file candidates. */
565 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
566 fp
->ff_bytesread
= 0;
568 retval
= MacToVFSError(ExtendFileC (hfsmp
, (FCB
*)fp
, bytesToAdd
,
569 0, eflags
, &actualBytesAdded
));
571 hfs_systemfile_unlock(hfsmp
, lockflags
);
573 if ((actualBytesAdded
== 0) && (retval
== E_NONE
))
575 if (retval
!= E_NONE
)
577 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
578 KERNEL_DEBUG(HFSDBG_WRITE
| DBG_FUNC_NONE
,
579 (int)offset
, uio_resid(uio
), (int)fp
->ff_size
, (int)filebytes
, 0);
581 (void) hfs_update(vp
, 0);
582 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
583 (void) hfs_end_transaction(hfsmp
);
586 * If we didn't grow the file enough try a partial write.
587 * POSIX expects this behavior.
589 if ((retval
== ENOSPC
) && (filebytes
> offset
)) {
592 uio_setresid(uio
, (uio_resid(uio
) - bytesToAdd
));
594 writelimit
= filebytes
;
597 if (retval
== E_NONE
) {
602 if (writelimit
> fp
->ff_size
) {
603 filesize
= writelimit
;
605 rl_add(fp
->ff_size
, writelimit
- 1 , &fp
->ff_invalidranges
);
607 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
609 filesize
= fp
->ff_size
;
611 lflag
= ioflag
& ~(IO_TAILZEROFILL
| IO_HEADZEROFILL
| IO_NOZEROVALID
| IO_NOZERODIRTY
);
614 * We no longer use IO_HEADZEROFILL or IO_TAILZEROFILL (except
615 * for one case below). For the regions that lie before the
616 * beginning and after the end of this write that are in the
617 * same page, we let the cluster code handle zeroing that out
618 * if necessary. If those areas are not cached, the cluster
619 * code will try and read those areas in, and in the case
620 * where those regions have never been written to,
621 * hfs_vnop_blockmap will consult the invalid ranges and then
622 * indicate that. The cluster code will zero out those areas.
625 head_off
= trunc_page_64(offset
);
627 if (head_off
< offset
&& head_off
>= fp
->ff_size
) {
629 * The first page is beyond current EOF, so as an
630 * optimisation, we can pass IO_HEADZEROFILL.
632 lflag
|= IO_HEADZEROFILL
;
639 * We need to tell UBC the fork's new size BEFORE calling
640 * cluster_write, in case any of the new pages need to be
641 * paged out before cluster_write completes (which does happen
642 * in embedded systems due to extreme memory pressure).
643 * Similarly, we need to tell hfs_vnop_pageout what the new EOF
644 * will be, so that it can pass that on to cluster_pageout, and
645 * allow those pageouts.
647 * We don't update ff_size yet since we don't want pageins to
648 * be able to see uninitialized data between the old and new
649 * EOF, until cluster_write has completed and initialized that
652 * The vnode pager relies on the file size last given to UBC via
653 * ubc_setsize. hfs_vnop_pageout relies on fp->ff_new_size or
654 * ff_size (whichever is larger). NOTE: ff_new_size is always
655 * zero, unless we are extending the file via write.
657 if (filesize
> fp
->ff_size
) {
658 retval
= hfs_zero_eof_page(vp
, offset
);
661 fp
->ff_new_size
= filesize
;
662 ubc_setsize(vp
, filesize
);
664 retval
= cluster_write(vp
, uio
, fp
->ff_size
, filesize
, head_off
,
665 0, lflag
| IO_NOZERODIRTY
| io_return_on_throttle
);
667 fp
->ff_new_size
= 0; /* no longer extending; use ff_size */
669 if (retval
== EAGAIN
) {
671 * EAGAIN indicates that we still have I/O to do, but
672 * that we now need to be throttled
674 if (resid
!= uio_resid(uio
)) {
676 * did manage to do some I/O before returning EAGAIN
678 resid
= uio_resid(uio
);
679 offset
= uio_offset(uio
);
681 cp
->c_touch_chgtime
= TRUE
;
682 cp
->c_touch_modtime
= TRUE
;
683 hfs_incr_gencount(cp
);
685 if (filesize
> fp
->ff_size
) {
687 * we called ubc_setsize before the call to
688 * cluster_write... since we only partially
689 * completed the I/O, we need to
690 * re-adjust our idea of the filesize based
693 ubc_setsize(vp
, offset
);
695 fp
->ff_size
= offset
;
699 if (filesize
> origFileSize
) {
700 ubc_setsize(vp
, origFileSize
);
705 if (filesize
> origFileSize
) {
706 fp
->ff_size
= filesize
;
708 /* Files that are changing size are not hot file candidates. */
709 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
710 fp
->ff_bytesread
= 0;
713 fp
->ff_new_size
= 0; /* ff_size now has the correct size */
716 uio_setresid(uio
, (uio_resid(uio
) + bytesToAdd
));
720 // XXXdbg - see radar 4871353 for more info
722 if (flush_cache_on_write
&& ((ioflag
& IO_NOCACHE
) || vnode_isnocache(vp
))) {
723 hfs_flush(hfsmp
, HFS_FLUSH_CACHE
);
729 hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
733 if (resid
> uio_resid(uio
)) {
734 cp
->c_touch_chgtime
= TRUE
;
735 cp
->c_touch_modtime
= TRUE
;
736 hfs_incr_gencount(cp
);
739 * If we successfully wrote any data, and we are not the superuser
740 * we clear the setuid and setgid bits as a precaution against
743 if (cp
->c_mode
& (S_ISUID
| S_ISGID
)) {
744 cred
= vfs_context_ucred(ap
->a_context
);
745 if (cred
&& suser(cred
, NULL
)) {
746 cp
->c_mode
&= ~(S_ISUID
| S_ISGID
);
751 if (ioflag
& IO_UNIT
) {
752 (void)hfs_truncate(vp
, origFileSize
, ioflag
& IO_SYNC
,
754 uio_setoffset(uio
, (uio_offset(uio
) - (resid
- uio_resid(uio
))));
755 uio_setresid(uio
, resid
);
756 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
758 } else if ((ioflag
& IO_SYNC
) && (resid
> uio_resid(uio
)))
759 retval
= hfs_update(vp
, 0);
761 /* Updating vcbWrCnt doesn't need to be atomic. */
764 KERNEL_DEBUG(HFSDBG_WRITE
| DBG_FUNC_END
,
765 (int)uio_offset(uio
), uio_resid(uio
), (int)fp
->ff_size
, (int)filebytes
, 0);
767 if (retval
&& took_truncate_lock
768 && cp
->c_truncatelockowner
== current_thread()) {
770 rl_remove(fp
->ff_size
, RL_INFINITY
, &fp
->ff_invalidranges
);
776 if (took_truncate_lock
) {
777 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
779 if (retval
== EAGAIN
) {
780 throttle_lowpri_io(1);
786 if (throttled_count
) {
787 throttle_info_reset_window((uthread_t
)get_bsdthread_info(current_thread()));
792 /* support for the "bulk-access" fcntl */
794 #define CACHE_LEVELS 16
795 #define NUM_CACHE_ENTRIES (64*16)
796 #define PARENT_IDS_FLAG 0x100
798 struct access_cache
{
800 int cachehits
; /* these two for statistics gathering */
802 unsigned int *acache
;
803 unsigned char *haveaccess
;
807 uid_t uid
; /* IN: effective user id */
808 short flags
; /* IN: access requested (i.e. R_OK) */
809 short num_groups
; /* IN: number of groups user belongs to */
810 int num_files
; /* IN: number of files to process */
811 int *file_ids
; /* IN: array of file ids */
812 gid_t
*groups
; /* IN: array of groups */
813 short *access
; /* OUT: access info for each file (0 for 'has access') */
814 } __attribute__((unavailable
)); // this structure is for reference purposes only
816 struct user32_access_t
{
817 uid_t uid
; /* IN: effective user id */
818 short flags
; /* IN: access requested (i.e. R_OK) */
819 short num_groups
; /* IN: number of groups user belongs to */
820 int num_files
; /* IN: number of files to process */
821 user32_addr_t file_ids
; /* IN: array of file ids */
822 user32_addr_t groups
; /* IN: array of groups */
823 user32_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
826 struct user64_access_t
{
827 uid_t uid
; /* IN: effective user id */
828 short flags
; /* IN: access requested (i.e. R_OK) */
829 short num_groups
; /* IN: number of groups user belongs to */
830 int num_files
; /* IN: number of files to process */
831 user64_addr_t file_ids
; /* IN: array of file ids */
832 user64_addr_t groups
; /* IN: array of groups */
833 user64_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
837 // these are the "extended" versions of the above structures
838 // note that it is crucial that they be different sized than
839 // the regular version
840 struct ext_access_t
{
841 uint32_t flags
; /* IN: access requested (i.e. R_OK) */
842 uint32_t num_files
; /* IN: number of files to process */
843 uint32_t map_size
; /* IN: size of the bit map */
844 uint32_t *file_ids
; /* IN: Array of file ids */
845 char *bitmap
; /* OUT: hash-bitmap of interesting directory ids */
846 short *access
; /* OUT: access info for each file (0 for 'has access') */
847 uint32_t num_parents
; /* future use */
848 cnid_t
*parents
; /* future use */
849 } __attribute__((unavailable
)); // this structure is for reference purposes only
851 struct user32_ext_access_t
{
852 uint32_t flags
; /* IN: access requested (i.e. R_OK) */
853 uint32_t num_files
; /* IN: number of files to process */
854 uint32_t map_size
; /* IN: size of the bit map */
855 user32_addr_t file_ids
; /* IN: Array of file ids */
856 user32_addr_t bitmap
; /* OUT: hash-bitmap of interesting directory ids */
857 user32_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
858 uint32_t num_parents
; /* future use */
859 user32_addr_t parents
; /* future use */
862 struct user64_ext_access_t
{
863 uint32_t flags
; /* IN: access requested (i.e. R_OK) */
864 uint32_t num_files
; /* IN: number of files to process */
865 uint32_t map_size
; /* IN: size of the bit map */
866 user64_addr_t file_ids
; /* IN: array of file ids */
867 user64_addr_t bitmap
; /* IN: array of groups */
868 user64_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
869 uint32_t num_parents
;/* future use */
870 user64_addr_t parents
;/* future use */
875 * Perform a binary search for the given parent_id. Return value is
876 * the index if there is a match. If no_match_indexp is non-NULL it
877 * will be assigned with the index to insert the item (even if it was
880 static int cache_binSearch(cnid_t
*array
, unsigned int hi
, cnid_t parent_id
, int *no_match_indexp
)
886 unsigned int mid
= ((hi
- lo
)/2) + lo
;
887 unsigned int this_id
= array
[mid
];
889 if (parent_id
== this_id
) {
894 if (parent_id
< this_id
) {
899 if (parent_id
> this_id
) {
905 /* check if lo and hi converged on the match */
906 if (parent_id
== array
[hi
]) {
910 if (no_match_indexp
) {
911 *no_match_indexp
= hi
;
919 lookup_bucket(struct access_cache
*cache
, int *indexp
, cnid_t parent_id
)
923 int index
, no_match_index
;
925 if (cache
->numcached
== 0) {
927 return 0; // table is empty, so insert at index=0 and report no match
930 if (cache
->numcached
> NUM_CACHE_ENTRIES
) {
931 cache
->numcached
= NUM_CACHE_ENTRIES
;
934 hi
= cache
->numcached
- 1;
936 index
= cache_binSearch(cache
->acache
, hi
, parent_id
, &no_match_index
);
938 /* if no existing entry found, find index for new one */
940 index
= no_match_index
;
951 * Add a node to the access_cache at the given index (or do a lookup first
952 * to find the index if -1 is passed in). We currently do a replace rather
953 * than an insert if the cache is full.
956 add_node(struct access_cache
*cache
, int index
, cnid_t nodeID
, int access
)
958 int lookup_index
= -1;
960 /* need to do a lookup first if -1 passed for index */
962 if (lookup_bucket(cache
, &lookup_index
, nodeID
)) {
963 if (cache
->haveaccess
[lookup_index
] != access
&& cache
->haveaccess
[lookup_index
] == ESRCH
) {
964 // only update an entry if the previous access was ESRCH (i.e. a scope checking error)
965 cache
->haveaccess
[lookup_index
] = access
;
968 /* mission accomplished */
971 index
= lookup_index
;
976 /* if the cache is full, do a replace rather than an insert */
977 if (cache
->numcached
>= NUM_CACHE_ENTRIES
) {
978 cache
->numcached
= NUM_CACHE_ENTRIES
-1;
980 if (index
> cache
->numcached
) {
981 index
= cache
->numcached
;
985 if (index
< cache
->numcached
&& index
< NUM_CACHE_ENTRIES
&& nodeID
> cache
->acache
[index
]) {
989 if (index
>= 0 && index
< cache
->numcached
) {
990 /* only do bcopy if we're inserting */
991 bcopy( cache
->acache
+index
, cache
->acache
+(index
+1), (cache
->numcached
- index
)*sizeof(int) );
992 bcopy( cache
->haveaccess
+index
, cache
->haveaccess
+(index
+1), (cache
->numcached
- index
)*sizeof(unsigned char) );
995 cache
->acache
[index
] = nodeID
;
996 cache
->haveaccess
[index
] = access
;
1010 snoop_callback(const cnode_t
*cp
, void *arg
)
1012 struct cinfo
*cip
= arg
;
1014 cip
->uid
= cp
->c_uid
;
1015 cip
->gid
= cp
->c_gid
;
1016 cip
->mode
= cp
->c_mode
;
1017 cip
->parentcnid
= cp
->c_parentcnid
;
1018 cip
->recflags
= cp
->c_attr
.ca_recflags
;
1024 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
1025 * isn't incore, then go to the catalog.
1028 do_attr_lookup(struct hfsmount
*hfsmp
, struct access_cache
*cache
, cnid_t cnid
,
1029 struct cnode
*skip_cp
, CatalogKey
*keyp
, struct cat_attr
*cnattrp
)
1033 /* if this id matches the one the fsctl was called with, skip the lookup */
1034 if (cnid
== skip_cp
->c_cnid
) {
1035 cnattrp
->ca_uid
= skip_cp
->c_uid
;
1036 cnattrp
->ca_gid
= skip_cp
->c_gid
;
1037 cnattrp
->ca_mode
= skip_cp
->c_mode
;
1038 cnattrp
->ca_recflags
= skip_cp
->c_attr
.ca_recflags
;
1039 keyp
->hfsPlus
.parentID
= skip_cp
->c_parentcnid
;
1041 struct cinfo c_info
;
1043 /* otherwise, check the cnode hash incase the file/dir is incore */
1044 error
= hfs_chash_snoop(hfsmp
, cnid
, 0, snoop_callback
, &c_info
);
1046 if (error
== EACCES
) {
1049 } else if (!error
) {
1050 cnattrp
->ca_uid
= c_info
.uid
;
1051 cnattrp
->ca_gid
= c_info
.gid
;
1052 cnattrp
->ca_mode
= c_info
.mode
;
1053 cnattrp
->ca_recflags
= c_info
.recflags
;
1054 keyp
->hfsPlus
.parentID
= c_info
.parentcnid
;
1058 if (throttle_io_will_be_throttled(-1, HFSTOVFS(hfsmp
)))
1059 throttle_lowpri_io(1);
1061 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_CATALOG
, HFS_SHARED_LOCK
);
1063 /* lookup this cnid in the catalog */
1064 error
= cat_getkeyplusattr(hfsmp
, cnid
, keyp
, cnattrp
);
1066 hfs_systemfile_unlock(hfsmp
, lockflags
);
1077 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
1078 * up to CACHE_LEVELS as we progress towards the root.
1081 do_access_check(struct hfsmount
*hfsmp
, int *err
, struct access_cache
*cache
, HFSCatalogNodeID nodeID
,
1082 struct cnode
*skip_cp
, struct proc
*theProcPtr
, kauth_cred_t myp_ucred
,
1083 struct vfs_context
*my_context
,
1087 uint32_t num_parents
)
1091 HFSCatalogNodeID thisNodeID
;
1092 unsigned int myPerms
;
1093 struct cat_attr cnattr
;
1094 int cache_index
= -1, scope_index
= -1, scope_idx_start
= -1;
1097 int i
= 0, ids_to_cache
= 0;
1098 int parent_ids
[CACHE_LEVELS
];
1100 thisNodeID
= nodeID
;
1101 while (thisNodeID
>= kRootDirID
) {
1102 myResult
= 0; /* default to "no access" */
1104 /* check the cache before resorting to hitting the catalog */
1106 /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
1107 * to look any further after hitting cached dir */
1109 if (lookup_bucket(cache
, &cache_index
, thisNodeID
)) {
1111 myErr
= cache
->haveaccess
[cache_index
];
1112 if (scope_index
!= -1) {
1113 if (myErr
== ESRCH
) {
1117 scope_index
= 0; // so we'll just use the cache result
1118 scope_idx_start
= ids_to_cache
;
1120 myResult
= (myErr
== 0) ? 1 : 0;
1121 goto ExitThisRoutine
;
1127 tmp
= cache_binSearch(parents
, num_parents
-1, thisNodeID
, NULL
);
1128 if (scope_index
== -1)
1130 if (tmp
!= -1 && scope_idx_start
== -1 && ids_to_cache
< CACHE_LEVELS
) {
1131 scope_idx_start
= ids_to_cache
;
1135 /* remember which parents we want to cache */
1136 if (ids_to_cache
< CACHE_LEVELS
) {
1137 parent_ids
[ids_to_cache
] = thisNodeID
;
1140 // Inefficient (using modulo) and we might want to use a hash function, not rely on the node id to be "nice"...
1141 if (bitmap
&& map_size
) {
1142 bitmap
[(thisNodeID
/8)%(map_size
)]|=(1<<(thisNodeID
&7));
1146 /* do the lookup (checks the cnode hash, then the catalog) */
1147 myErr
= do_attr_lookup(hfsmp
, cache
, thisNodeID
, skip_cp
, &catkey
, &cnattr
);
1149 goto ExitThisRoutine
; /* no access */
1152 /* Root always gets access. */
1153 if (suser(myp_ucred
, NULL
) == 0) {
1154 thisNodeID
= catkey
.hfsPlus
.parentID
;
1159 // if the thing has acl's, do the full permission check
1160 if ((cnattr
.ca_recflags
& kHFSHasSecurityMask
) != 0) {
1163 /* get the vnode for this cnid */
1164 myErr
= hfs_vget(hfsmp
, thisNodeID
, &vp
, 0, 0);
1167 goto ExitThisRoutine
;
1170 thisNodeID
= VTOC(vp
)->c_parentcnid
;
1172 hfs_unlock(VTOC(vp
));
1174 if (vnode_vtype(vp
) == VDIR
) {
1175 myErr
= vnode_authorize(vp
, NULL
, (KAUTH_VNODE_SEARCH
| KAUTH_VNODE_LIST_DIRECTORY
), my_context
);
1177 myErr
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_READ_DATA
, my_context
);
1183 goto ExitThisRoutine
;
1187 int mode
= cnattr
.ca_mode
& S_IFMT
;
1188 myPerms
= DerivePermissionSummary(cnattr
.ca_uid
, cnattr
.ca_gid
, cnattr
.ca_mode
, hfsmp
->hfs_mp
,myp_ucred
, theProcPtr
);
1190 if (mode
== S_IFDIR
) {
1191 flags
= R_OK
| X_OK
;
1195 if ( (myPerms
& flags
) != flags
) {
1198 goto ExitThisRoutine
; /* no access */
1201 /* up the hierarchy we go */
1202 thisNodeID
= catkey
.hfsPlus
.parentID
;
1206 /* if here, we have access to this node */
1210 if (parents
&& myErr
== 0 && scope_index
== -1) {
1219 /* cache the parent directory(ies) */
1220 for (i
= 0; i
< ids_to_cache
; i
++) {
1221 if (myErr
== 0 && parents
&& (scope_idx_start
== -1 || i
> scope_idx_start
)) {
1222 add_node(cache
, -1, parent_ids
[i
], ESRCH
);
1224 add_node(cache
, -1, parent_ids
[i
], myErr
);
1232 do_bulk_access_check(struct hfsmount
*hfsmp
, struct vnode
*vp
,
1233 struct vnop_ioctl_args
*ap
, int arg_size
, vfs_context_t context
)
1238 * NOTE: on entry, the vnode has an io_ref. In case this vnode
1239 * happens to be in our list of file_ids, we'll note it
1240 * avoid calling hfs_chashget_nowait() on that id as that
1241 * will cause a "locking against myself" panic.
1243 Boolean check_leaf
= true;
1245 struct user64_ext_access_t
*user_access_structp
;
1246 struct user64_ext_access_t tmp_user_access
;
1247 struct access_cache cache
;
1249 int error
= 0, prev_parent_check_ok
=1;
1253 unsigned int num_files
= 0;
1255 int num_parents
= 0;
1259 cnid_t
*parents
=NULL
;
1263 cnid_t prevParent_cnid
= 0;
1264 unsigned int myPerms
;
1266 struct cat_attr cnattr
;
1268 struct cnode
*skip_cp
= VTOC(vp
);
1269 kauth_cred_t cred
= vfs_context_ucred(context
);
1270 proc_t p
= vfs_context_proc(context
);
1272 is64bit
= proc_is64bit(p
);
1274 /* initialize the local cache and buffers */
1275 cache
.numcached
= 0;
1276 cache
.cachehits
= 0;
1278 cache
.acache
= NULL
;
1279 cache
.haveaccess
= NULL
;
1281 /* struct copyin done during dispatch... need to copy file_id array separately */
1282 if (ap
->a_data
== NULL
) {
1284 goto err_exit_bulk_access
;
1288 if (arg_size
!= sizeof(struct user64_ext_access_t
)) {
1290 goto err_exit_bulk_access
;
1293 user_access_structp
= (struct user64_ext_access_t
*)ap
->a_data
;
1295 } else if (arg_size
== sizeof(struct user32_access_t
)) {
1296 struct user32_access_t
*accessp
= (struct user32_access_t
*)ap
->a_data
;
1298 // convert an old style bulk-access struct to the new style
1299 tmp_user_access
.flags
= accessp
->flags
;
1300 tmp_user_access
.num_files
= accessp
->num_files
;
1301 tmp_user_access
.map_size
= 0;
1302 tmp_user_access
.file_ids
= CAST_USER_ADDR_T(accessp
->file_ids
);
1303 tmp_user_access
.bitmap
= USER_ADDR_NULL
;
1304 tmp_user_access
.access
= CAST_USER_ADDR_T(accessp
->access
);
1305 tmp_user_access
.num_parents
= 0;
1306 user_access_structp
= &tmp_user_access
;
1308 } else if (arg_size
== sizeof(struct user32_ext_access_t
)) {
1309 struct user32_ext_access_t
*accessp
= (struct user32_ext_access_t
*)ap
->a_data
;
1311 // up-cast from a 32-bit version of the struct
1312 tmp_user_access
.flags
= accessp
->flags
;
1313 tmp_user_access
.num_files
= accessp
->num_files
;
1314 tmp_user_access
.map_size
= accessp
->map_size
;
1315 tmp_user_access
.num_parents
= accessp
->num_parents
;
1317 tmp_user_access
.file_ids
= CAST_USER_ADDR_T(accessp
->file_ids
);
1318 tmp_user_access
.bitmap
= CAST_USER_ADDR_T(accessp
->bitmap
);
1319 tmp_user_access
.access
= CAST_USER_ADDR_T(accessp
->access
);
1320 tmp_user_access
.parents
= CAST_USER_ADDR_T(accessp
->parents
);
1322 user_access_structp
= &tmp_user_access
;
1325 goto err_exit_bulk_access
;
1328 map_size
= user_access_structp
->map_size
;
1330 num_files
= user_access_structp
->num_files
;
1332 num_parents
= user_access_structp
->num_parents
;
1334 if (num_files
< 1) {
1335 goto err_exit_bulk_access
;
1337 if (num_files
> 1024) {
1339 goto err_exit_bulk_access
;
1342 if (num_parents
> 1024) {
1344 goto err_exit_bulk_access
;
1347 file_ids
= (int *) kalloc(sizeof(int) * num_files
);
1348 access
= (short *) kalloc(sizeof(short) * num_files
);
1350 bitmap
= (char *) kalloc(sizeof(char) * map_size
);
1354 parents
= (cnid_t
*) kalloc(sizeof(cnid_t
) * num_parents
);
1357 cache
.acache
= (unsigned int *) kalloc(sizeof(int) * NUM_CACHE_ENTRIES
);
1358 cache
.haveaccess
= (unsigned char *) kalloc(sizeof(unsigned char) * NUM_CACHE_ENTRIES
);
1360 if (file_ids
== NULL
|| access
== NULL
|| (map_size
!= 0 && bitmap
== NULL
) || cache
.acache
== NULL
|| cache
.haveaccess
== NULL
) {
1362 kfree(file_ids
, sizeof(int) * num_files
);
1365 kfree(bitmap
, sizeof(char) * map_size
);
1368 kfree(access
, sizeof(short) * num_files
);
1371 kfree(cache
.acache
, sizeof(int) * NUM_CACHE_ENTRIES
);
1373 if (cache
.haveaccess
) {
1374 kfree(cache
.haveaccess
, sizeof(unsigned char) * NUM_CACHE_ENTRIES
);
1377 kfree(parents
, sizeof(cnid_t
) * num_parents
);
1382 // make sure the bitmap is zero'ed out...
1384 bzero(bitmap
, (sizeof(char) * map_size
));
1387 if ((error
= copyin(user_access_structp
->file_ids
, (caddr_t
)file_ids
,
1388 num_files
* sizeof(int)))) {
1389 goto err_exit_bulk_access
;
1393 if ((error
= copyin(user_access_structp
->parents
, (caddr_t
)parents
,
1394 num_parents
* sizeof(cnid_t
)))) {
1395 goto err_exit_bulk_access
;
1399 flags
= user_access_structp
->flags
;
1400 if ((flags
& (F_OK
| R_OK
| W_OK
| X_OK
)) == 0) {
1404 /* check if we've been passed leaf node ids or parent ids */
1405 if (flags
& PARENT_IDS_FLAG
) {
1409 /* Check access to each file_id passed in */
1410 for (i
= 0; i
< num_files
; i
++) {
1412 cnid
= (cnid_t
) file_ids
[i
];
1414 /* root always has access */
1415 if ((!parents
) && (!suser(cred
, NULL
))) {
1421 /* do the lookup (checks the cnode hash, then the catalog) */
1422 error
= do_attr_lookup(hfsmp
, &cache
, cnid
, skip_cp
, &catkey
, &cnattr
);
1424 access
[i
] = (short) error
;
1429 // Check if the leaf matches one of the parent scopes
1430 leaf_index
= cache_binSearch(parents
, num_parents
-1, cnid
, NULL
);
1431 if (leaf_index
>= 0 && parents
[leaf_index
] == cnid
)
1432 prev_parent_check_ok
= 0;
1433 else if (leaf_index
>= 0)
1434 prev_parent_check_ok
= 1;
1437 // if the thing has acl's, do the full permission check
1438 if ((cnattr
.ca_recflags
& kHFSHasSecurityMask
) != 0) {
1441 /* get the vnode for this cnid */
1442 myErr
= hfs_vget(hfsmp
, cnid
, &cvp
, 0, 0);
1448 hfs_unlock(VTOC(cvp
));
1450 if (vnode_vtype(cvp
) == VDIR
) {
1451 myErr
= vnode_authorize(cvp
, NULL
, (KAUTH_VNODE_SEARCH
| KAUTH_VNODE_LIST_DIRECTORY
), context
);
1453 myErr
= vnode_authorize(cvp
, NULL
, KAUTH_VNODE_READ_DATA
, context
);
1462 /* before calling CheckAccess(), check the target file for read access */
1463 myPerms
= DerivePermissionSummary(cnattr
.ca_uid
, cnattr
.ca_gid
,
1464 cnattr
.ca_mode
, hfsmp
->hfs_mp
, cred
, p
);
1466 /* fail fast if no access */
1467 if ((myPerms
& flags
) == 0) {
1473 /* we were passed an array of parent ids */
1474 catkey
.hfsPlus
.parentID
= cnid
;
1477 /* if the last guy had the same parent and had access, we're done */
1478 if (i
> 0 && catkey
.hfsPlus
.parentID
== prevParent_cnid
&& access
[i
-1] == 0 && prev_parent_check_ok
) {
1484 myaccess
= do_access_check(hfsmp
, &error
, &cache
, catkey
.hfsPlus
.parentID
,
1485 skip_cp
, p
, cred
, context
,bitmap
, map_size
, parents
, num_parents
);
1487 if (myaccess
|| (error
== ESRCH
&& leaf_index
!= -1)) {
1488 access
[i
] = 0; // have access.. no errors to report
1490 access
[i
] = (error
!= 0 ? (short) error
: EACCES
);
1493 prevParent_cnid
= catkey
.hfsPlus
.parentID
;
1496 /* copyout the access array */
1497 if ((error
= copyout((caddr_t
)access
, user_access_structp
->access
,
1498 num_files
* sizeof (short)))) {
1499 goto err_exit_bulk_access
;
1501 if (map_size
&& bitmap
) {
1502 if ((error
= copyout((caddr_t
)bitmap
, user_access_structp
->bitmap
,
1503 map_size
* sizeof (char)))) {
1504 goto err_exit_bulk_access
;
1509 err_exit_bulk_access
:
1512 kfree(file_ids
, sizeof(int) * num_files
);
1514 kfree(parents
, sizeof(cnid_t
) * num_parents
);
1516 kfree(bitmap
, sizeof(char) * map_size
);
1518 kfree(access
, sizeof(short) * num_files
);
1520 kfree(cache
.acache
, sizeof(int) * NUM_CACHE_ENTRIES
);
1521 if (cache
.haveaccess
)
1522 kfree(cache
.haveaccess
, sizeof(unsigned char) * NUM_CACHE_ENTRIES
);
1528 /* end "bulk-access" support */
1532 * Control filesystem operating characteristics.
1535 hfs_vnop_ioctl( struct vnop_ioctl_args
/* {
1540 vfs_context_t a_context;
1543 struct vnode
* vp
= ap
->a_vp
;
1544 struct hfsmount
*hfsmp
= VTOHFS(vp
);
1545 vfs_context_t context
= ap
->a_context
;
1546 kauth_cred_t cred
= vfs_context_ucred(context
);
1547 proc_t p
= vfs_context_proc(context
);
1548 struct vfsstatfs
*vfsp
;
1550 off_t jnl_start
, jnl_size
;
1551 struct hfs_journal_info
*jip
;
1554 off_t uncompressed_size
= -1;
1555 int decmpfs_error
= 0;
1557 if (ap
->a_command
== F_RDADVISE
) {
1558 /* we need to inspect the decmpfs state of the file as early as possible */
1559 compressed
= hfs_file_is_compressed(VTOC(vp
), 0);
1561 if (VNODE_IS_RSRC(vp
)) {
1562 /* if this is the resource fork, treat it as if it were empty */
1563 uncompressed_size
= 0;
1565 decmpfs_error
= hfs_uncompressed_size_of_compressed_file(NULL
, vp
, 0, &uncompressed_size
, 0);
1566 if (decmpfs_error
!= 0) {
1567 /* failed to get the uncompressed size, we'll check for this later */
1568 uncompressed_size
= -1;
1573 #endif /* HFS_COMPRESSION */
1575 is64bit
= proc_is64bit(p
);
1580 if ((error
= cp_handle_vnop(vp
, CP_WRITE_ACCESS
, 0)) != 0) {
1584 #endif /* CONFIG_PROTECT */
1586 switch (ap
->a_command
) {
1590 struct vnode
*file_vp
;
1597 /* Caller must be owner of file system. */
1598 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1599 if (suser(cred
, NULL
) &&
1600 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1603 /* Target vnode must be file system's root. */
1604 if (!vnode_isvroot(vp
)) {
1607 bufptr
= (char *)ap
->a_data
;
1608 cnid
= strtoul(bufptr
, NULL
, 10);
1609 if (ap
->a_fflag
& HFS_GETPATH_VOLUME_RELATIVE
) {
1610 flags
|= BUILDPATH_VOLUME_RELATIVE
;
1613 /* We need to call hfs_vfs_vget to leverage the code that will
1614 * fix the origin list for us if needed, as opposed to calling
1615 * hfs_vget, since we will need the parent for build_path call.
1618 if ((error
= hfs_vfs_vget(HFSTOVFS(hfsmp
), cnid
, &file_vp
, context
))) {
1621 error
= build_path(file_vp
, bufptr
, sizeof(pathname_t
), &outlen
, flags
, context
);
1627 case HFS_TRANSFER_DOCUMENT_ID
:
1629 struct cnode
*cp
= NULL
;
1631 u_int32_t to_fd
= *(u_int32_t
*)ap
->a_data
;
1632 struct fileproc
*to_fp
;
1633 struct vnode
*to_vp
;
1634 struct cnode
*to_cp
;
1638 if ((error
= fp_getfvp(p
, to_fd
, &to_fp
, &to_vp
)) != 0) {
1639 //printf("could not get the vnode for fd %d (err %d)\n", to_fd, error);
1642 if ( (error
= vnode_getwithref(to_vp
)) ) {
1647 if (VTOHFS(to_vp
) != hfsmp
) {
1649 goto transfer_cleanup
;
1652 int need_unlock
= 1;
1653 to_cp
= VTOC(to_vp
);
1654 error
= hfs_lockpair(cp
, to_cp
, HFS_EXCLUSIVE_LOCK
);
1656 //printf("could not lock the pair of cnodes (error %d)\n", error);
1657 goto transfer_cleanup
;
1660 if (!(cp
->c_bsdflags
& UF_TRACKED
)) {
1662 } else if (to_cp
->c_bsdflags
& UF_TRACKED
) {
1664 // if the destination is already tracked, return an error
1665 // as otherwise it's a silent deletion of the target's
1669 } else if (S_ISDIR(cp
->c_attr
.ca_mode
) || S_ISREG(cp
->c_attr
.ca_mode
) || S_ISLNK(cp
->c_attr
.ca_mode
)) {
1671 // we can use the FndrExtendedFileInfo because the doc-id is the first
1672 // thing in both it and the ExtendedDirInfo struct which is fixed in
1673 // format and can not change layout
1675 struct FndrExtendedFileInfo
*f_extinfo
= (struct FndrExtendedFileInfo
*)((u_int8_t
*)cp
->c_finderinfo
+ 16);
1676 struct FndrExtendedFileInfo
*to_extinfo
= (struct FndrExtendedFileInfo
*)((u_int8_t
*)to_cp
->c_finderinfo
+ 16);
1678 if (f_extinfo
->document_id
== 0) {
1681 hfs_unlockpair(cp
, to_cp
); // have to unlock to be able to get a new-id
1683 if ((error
= hfs_generate_document_id(hfsmp
, &new_id
)) == 0) {
1685 // re-lock the pair now that we have the document-id
1687 hfs_lockpair(cp
, to_cp
, HFS_EXCLUSIVE_LOCK
);
1688 f_extinfo
->document_id
= new_id
;
1690 goto transfer_cleanup
;
1694 to_extinfo
->document_id
= f_extinfo
->document_id
;
1695 f_extinfo
->document_id
= 0;
1696 //printf("TRANSFERRING: doc-id %d from ino %d to ino %d\n", to_extinfo->document_id, cp->c_fileid, to_cp->c_fileid);
1698 // make sure the destination is also UF_TRACKED
1699 to_cp
->c_bsdflags
|= UF_TRACKED
;
1700 cp
->c_bsdflags
&= ~UF_TRACKED
;
1702 // mark the cnodes dirty
1703 cp
->c_flag
|= C_MODIFIED
;
1704 to_cp
->c_flag
|= C_MODIFIED
;
1707 if ((error
= hfs_start_transaction(hfsmp
)) == 0) {
1709 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_CATALOG
, HFS_EXCLUSIVE_LOCK
);
1711 (void) cat_update(hfsmp
, &cp
->c_desc
, &cp
->c_attr
, NULL
, NULL
);
1712 (void) cat_update(hfsmp
, &to_cp
->c_desc
, &to_cp
->c_attr
, NULL
, NULL
);
1714 hfs_systemfile_unlock (hfsmp
, lockflags
);
1715 (void) hfs_end_transaction(hfsmp
);
1719 add_fsevent(FSE_DOCID_CHANGED
, context
,
1720 FSE_ARG_DEV
, hfsmp
->hfs_raw_dev
,
1721 FSE_ARG_INO
, (ino64_t
)cp
->c_fileid
, // src inode #
1722 FSE_ARG_INO
, (ino64_t
)to_cp
->c_fileid
, // dst inode #
1723 FSE_ARG_INT32
, to_extinfo
->document_id
,
1726 hfs_unlockpair(cp
, to_cp
); // unlock this so we can send the fsevents
1729 if (need_fsevent(FSE_STAT_CHANGED
, vp
)) {
1730 add_fsevent(FSE_STAT_CHANGED
, context
, FSE_ARG_VNODE
, vp
, FSE_ARG_DONE
);
1732 if (need_fsevent(FSE_STAT_CHANGED
, to_vp
)) {
1733 add_fsevent(FSE_STAT_CHANGED
, context
, FSE_ARG_VNODE
, to_vp
, FSE_ARG_DONE
);
1736 hfs_unlockpair(cp
, to_cp
); // unlock this so we can send the fsevents
1742 hfs_unlockpair(cp
, to_cp
);
1762 /* Caller must be owner of file system. */
1763 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1764 if (suser(cred
, NULL
) &&
1765 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1768 /* Target vnode must be file system's root. */
1769 if (!vnode_isvroot(vp
)) {
1772 linkfileid
= *(cnid_t
*)ap
->a_data
;
1773 if (linkfileid
< kHFSFirstUserCatalogNodeID
) {
1776 if ((error
= hfs_lookup_siblinglinks(hfsmp
, linkfileid
, &prevlinkid
, &nextlinkid
))) {
1779 if (ap
->a_command
== HFS_NEXT_LINK
) {
1780 *(cnid_t
*)ap
->a_data
= nextlinkid
;
1782 *(cnid_t
*)ap
->a_data
= prevlinkid
;
1787 case HFS_RESIZE_PROGRESS
: {
1789 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1790 if (suser(cred
, NULL
) &&
1791 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1792 return (EACCES
); /* must be owner of file system */
1794 if (!vnode_isvroot(vp
)) {
1797 /* file system must not be mounted read-only */
1798 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
1802 return hfs_resize_progress(hfsmp
, (u_int32_t
*)ap
->a_data
);
1805 case HFS_RESIZE_VOLUME
: {
1810 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1811 if (suser(cred
, NULL
) &&
1812 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1813 return (EACCES
); /* must be owner of file system */
1815 if (!vnode_isvroot(vp
)) {
1819 /* filesystem must not be mounted read only */
1820 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
1823 newsize
= *(u_int64_t
*)ap
->a_data
;
1824 cursize
= (u_int64_t
)hfsmp
->totalBlocks
* (u_int64_t
)hfsmp
->blockSize
;
1826 if (newsize
== cursize
) {
1829 IOBSDMountChange(hfsmp
->hfs_mp
, kIOMountChangeWillResize
);
1830 if (newsize
> cursize
) {
1831 ret
= hfs_extendfs(hfsmp
, *(u_int64_t
*)ap
->a_data
, context
);
1833 ret
= hfs_truncatefs(hfsmp
, *(u_int64_t
*)ap
->a_data
, context
);
1835 IOBSDMountChange(hfsmp
->hfs_mp
, kIOMountChangeDidResize
);
1838 case HFS_CHANGE_NEXT_ALLOCATION
: {
1839 int error
= 0; /* Assume success */
1842 if (vnode_vfsisrdonly(vp
)) {
1845 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1846 if (suser(cred
, NULL
) &&
1847 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1848 return (EACCES
); /* must be owner of file system */
1850 if (!vnode_isvroot(vp
)) {
1853 hfs_lock_mount(hfsmp
);
1854 location
= *(u_int32_t
*)ap
->a_data
;
1855 if ((location
>= hfsmp
->allocLimit
) &&
1856 (location
!= HFS_NO_UPDATE_NEXT_ALLOCATION
)) {
1858 goto fail_change_next_allocation
;
1860 /* Return previous value. */
1861 *(u_int32_t
*)ap
->a_data
= hfsmp
->nextAllocation
;
1862 if (location
== HFS_NO_UPDATE_NEXT_ALLOCATION
) {
1863 /* On magic value for location, set nextAllocation to next block
1864 * after metadata zone and set flag in mount structure to indicate
1865 * that nextAllocation should not be updated again.
1867 if (hfsmp
->hfs_metazone_end
!= 0) {
1868 HFS_UPDATE_NEXT_ALLOCATION(hfsmp
, hfsmp
->hfs_metazone_end
+ 1);
1870 hfsmp
->hfs_flags
|= HFS_SKIP_UPDATE_NEXT_ALLOCATION
;
1872 hfsmp
->hfs_flags
&= ~HFS_SKIP_UPDATE_NEXT_ALLOCATION
;
1873 HFS_UPDATE_NEXT_ALLOCATION(hfsmp
, location
);
1875 MarkVCBDirty(hfsmp
);
1876 fail_change_next_allocation
:
1877 hfs_unlock_mount(hfsmp
);
1882 case HFS_SETBACKINGSTOREINFO
: {
1883 struct vnode
* bsfs_rootvp
;
1884 struct vnode
* di_vp
;
1885 struct hfs_backingstoreinfo
*bsdata
;
1888 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
1891 if (hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) {
1894 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1895 if (suser(cred
, NULL
) &&
1896 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1897 return (EACCES
); /* must be owner of file system */
1899 bsdata
= (struct hfs_backingstoreinfo
*)ap
->a_data
;
1900 if (bsdata
== NULL
) {
1903 if ((error
= file_vnode(bsdata
->backingfd
, &di_vp
))) {
1906 if ((error
= vnode_getwithref(di_vp
))) {
1907 file_drop(bsdata
->backingfd
);
1911 if (vnode_mount(vp
) == vnode_mount(di_vp
)) {
1912 (void)vnode_put(di_vp
);
1913 file_drop(bsdata
->backingfd
);
1918 * Obtain the backing fs root vnode and keep a reference
1919 * on it. This reference will be dropped in hfs_unmount.
1921 error
= VFS_ROOT(vnode_mount(di_vp
), &bsfs_rootvp
, NULL
); /* XXX use context! */
1923 (void)vnode_put(di_vp
);
1924 file_drop(bsdata
->backingfd
);
1927 vnode_ref(bsfs_rootvp
);
1928 vnode_put(bsfs_rootvp
);
1930 hfs_lock_mount(hfsmp
);
1931 hfsmp
->hfs_backingfs_rootvp
= bsfs_rootvp
;
1932 hfsmp
->hfs_flags
|= HFS_HAS_SPARSE_DEVICE
;
1933 hfsmp
->hfs_sparsebandblks
= bsdata
->bandsize
/ hfsmp
->blockSize
* 4;
1934 hfs_unlock_mount(hfsmp
);
1936 /* We check the MNTK_VIRTUALDEV bit instead of marking the dependent process */
1939 * If the sparse image is on a sparse image file (as opposed to a sparse
1940 * bundle), then we may need to limit the free space to the maximum size
1941 * of a file on that volume. So we query (using pathconf), and if we get
1942 * a meaningful result, we cache the number of blocks for later use in
1945 hfsmp
->hfs_backingfs_maxblocks
= 0;
1946 if (vnode_vtype(di_vp
) == VREG
) {
1949 terr
= vn_pathconf(di_vp
, _PC_FILESIZEBITS
, &hostbits
, context
);
1950 if (terr
== 0 && hostbits
!= 0 && hostbits
< 64) {
1951 u_int64_t hostfilesizemax
= ((u_int64_t
)1) << hostbits
;
1953 hfsmp
->hfs_backingfs_maxblocks
= hostfilesizemax
/ hfsmp
->blockSize
;
1957 /* The free extent cache is managed differently for sparse devices.
1958 * There is a window between which the volume is mounted and the
1959 * device is marked as sparse, so the free extent cache for this
1960 * volume is currently initialized as normal volume (sorted by block
1961 * count). Reset the cache so that it will be rebuilt again
1962 * for sparse device (sorted by start block).
1964 ResetVCBFreeExtCache(hfsmp
);
1966 (void)vnode_put(di_vp
);
1967 file_drop(bsdata
->backingfd
);
1970 case HFS_CLRBACKINGSTOREINFO
: {
1971 struct vnode
* tmpvp
;
1973 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1974 if (suser(cred
, NULL
) &&
1975 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1976 return (EACCES
); /* must be owner of file system */
1978 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
1982 if ((hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) &&
1983 hfsmp
->hfs_backingfs_rootvp
) {
1985 hfs_lock_mount(hfsmp
);
1986 hfsmp
->hfs_flags
&= ~HFS_HAS_SPARSE_DEVICE
;
1987 tmpvp
= hfsmp
->hfs_backingfs_rootvp
;
1988 hfsmp
->hfs_backingfs_rootvp
= NULLVP
;
1989 hfsmp
->hfs_sparsebandblks
= 0;
1990 hfs_unlock_mount(hfsmp
);
1996 #endif /* HFS_SPARSE_DEV */
1998 /* Change the next CNID stored in the VH */
1999 case HFS_CHANGE_NEXTCNID
: {
2000 int error
= 0; /* Assume success */
2005 if (vnode_vfsisrdonly(vp
)) {
2008 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
2009 if (suser(cred
, NULL
) &&
2010 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
2011 return (EACCES
); /* must be owner of file system */
2014 fileid
= *(u_int32_t
*)ap
->a_data
;
2016 /* Must have catalog lock excl. to advance the CNID pointer */
2017 lockflags
= hfs_systemfile_lock (hfsmp
, SFL_CATALOG
, HFS_EXCLUSIVE_LOCK
);
2019 hfs_lock_mount(hfsmp
);
2021 /* If it is less than the current next CNID, force the wraparound bit to be set */
2022 if (fileid
< hfsmp
->vcbNxtCNID
) {
2026 /* Return previous value. */
2027 *(u_int32_t
*)ap
->a_data
= hfsmp
->vcbNxtCNID
;
2029 hfsmp
->vcbNxtCNID
= fileid
;
2032 hfsmp
->vcbAtrb
|= kHFSCatalogNodeIDsReusedMask
;
2035 MarkVCBDirty(hfsmp
);
2036 hfs_unlock_mount(hfsmp
);
2037 hfs_systemfile_unlock (hfsmp
, lockflags
);
2045 mp
= vnode_mount(vp
);
2046 hfsmp
= VFSTOHFS(mp
);
2051 vfsp
= vfs_statfs(mp
);
2053 if (kauth_cred_getuid(cred
) != vfsp
->f_owner
&&
2054 !kauth_cred_issuser(cred
))
2057 return hfs_freeze(hfsmp
);
2061 vfsp
= vfs_statfs(vnode_mount(vp
));
2062 if (kauth_cred_getuid(cred
) != vfsp
->f_owner
&&
2063 !kauth_cred_issuser(cred
))
2066 return hfs_thaw(hfsmp
, current_proc());
2069 case HFS_EXT_BULKACCESS_FSCTL
: {
2072 if (hfsmp
->hfs_flags
& HFS_STANDARD
) {
2077 size
= sizeof(struct user64_ext_access_t
);
2079 size
= sizeof(struct user32_ext_access_t
);
2082 return do_bulk_access_check(hfsmp
, vp
, ap
, size
, context
);
2085 case HFS_SET_XATTREXTENTS_STATE
: {
2088 if (ap
->a_data
== NULL
) {
2092 state
= *(int *)ap
->a_data
;
2094 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2098 /* Super-user can enable or disable extent-based extended
2099 * attribute support on a volume
2100 * Note: Starting Mac OS X 10.7, extent-based extended attributes
2101 * are enabled by default, so any change will be transient only
2102 * till the volume is remounted.
2104 if (!kauth_cred_issuser(kauth_cred_get())) {
2107 if (state
== 0 || state
== 1)
2108 return hfs_set_volxattr(hfsmp
, HFS_SET_XATTREXTENTS_STATE
, state
);
2113 case F_SETSTATICCONTENT
: {
2115 int enable_static
= 0;
2116 struct cnode
*cp
= NULL
;
2118 * lock the cnode, decorate the cnode flag, and bail out.
2119 * VFS should have already authenticated the caller for us.
2124 * Note that even though ap->a_data is of type caddr_t,
2125 * the fcntl layer at the syscall handler will pass in NULL
2126 * or 1 depending on what the argument supplied to the fcntl
2127 * was. So it is in fact correct to check the ap->a_data
2128 * argument for zero or non-zero value when deciding whether or not
2129 * to enable the static bit in the cnode.
2133 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2138 error
= hfs_lock (cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2140 if (enable_static
) {
2141 cp
->c_flag
|= C_SSD_STATIC
;
2144 cp
->c_flag
&= ~C_SSD_STATIC
;
2151 case F_SET_GREEDY_MODE
: {
2153 int enable_greedy_mode
= 0;
2154 struct cnode
*cp
= NULL
;
2156 * lock the cnode, decorate the cnode flag, and bail out.
2157 * VFS should have already authenticated the caller for us.
2162 * Note that even though ap->a_data is of type caddr_t,
2163 * the fcntl layer at the syscall handler will pass in NULL
2164 * or 1 depending on what the argument supplied to the fcntl
2165 * was. So it is in fact correct to check the ap->a_data
2166 * argument for zero or non-zero value when deciding whether or not
2167 * to enable the greedy mode bit in the cnode.
2169 enable_greedy_mode
= 1;
2171 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2176 error
= hfs_lock (cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2178 if (enable_greedy_mode
) {
2179 cp
->c_flag
|= C_SSD_GREEDY_MODE
;
2182 cp
->c_flag
&= ~C_SSD_GREEDY_MODE
;
2191 uint32_t iotypeflag
= 0;
2193 struct cnode
*cp
= NULL
;
2195 * lock the cnode, decorate the cnode flag, and bail out.
2196 * VFS should have already authenticated the caller for us.
2199 if (ap
->a_data
== NULL
) {
2204 * Note that even though ap->a_data is of type caddr_t, we
2205 * can only use 32 bits of flag values.
2207 iotypeflag
= (uint32_t) ap
->a_data
;
2208 switch (iotypeflag
) {
2209 case F_IOTYPE_ISOCHRONOUS
:
2216 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2221 error
= hfs_lock (cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2223 switch (iotypeflag
) {
2224 case F_IOTYPE_ISOCHRONOUS
:
2225 cp
->c_flag
|= C_IO_ISOCHRONOUS
;
2235 case F_MAKECOMPRESSED
: {
2237 uint32_t gen_counter
;
2238 struct cnode
*cp
= NULL
;
2239 int reset_decmp
= 0;
2241 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2246 * acquire & lock the cnode.
2247 * VFS should have already authenticated the caller for us.
2252 * Cast the pointer into a uint32_t so we can extract the
2253 * supplied generation counter.
2255 gen_counter
= *((uint32_t*)ap
->a_data
);
2263 /* Grab truncate lock first; we may truncate the file */
2264 hfs_lock_truncate (cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2266 error
= hfs_lock (cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2268 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
2272 /* Are there any other usecounts/FDs? */
2273 if (vnode_isinuse(vp
, 1)) {
2275 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
2279 /* now we have the cnode locked down; Validate arguments */
2280 if (cp
->c_attr
.ca_flags
& (UF_IMMUTABLE
| UF_COMPRESSED
)) {
2281 /* EINVAL if you are trying to manipulate an IMMUTABLE file */
2283 hfs_unlock_truncate (cp
, HFS_LOCK_DEFAULT
);
2287 if ((hfs_get_gencount (cp
)) == gen_counter
) {
2289 * OK, the gen_counter matched. Go for it:
2290 * Toggle state bits, truncate file, and suppress mtime update
2293 cp
->c_bsdflags
|= UF_COMPRESSED
;
2295 error
= hfs_truncate(vp
, 0, IO_NDELAY
, HFS_TRUNCATE_SKIPTIMES
,
2302 /* Unlock cnode before executing decmpfs ; they may need to get an EA */
2306 * Reset the decmp state while still holding the truncate lock. We need to
2307 * serialize here against a listxattr on this node which may occur at any
2310 * Even if '0/skiplock' is passed in 2nd argument to hfs_file_is_compressed,
2311 * that will still potentially require getting the com.apple.decmpfs EA. If the
2312 * EA is required, then we can't hold the cnode lock, because the getxattr call is
2313 * generic(through VFS), and can't pass along any info telling it that we're already
2314 * holding it (the lock). If we don't serialize, then we risk listxattr stopping
2315 * and trying to fill in the hfs_file_is_compressed info during the callback
2316 * operation, which will result in deadlock against the b-tree node.
2318 * So, to serialize against listxattr (which will grab buf_t meta references on
2319 * the b-tree blocks), we hold the truncate lock as we're manipulating the
2322 if ((reset_decmp
) && (error
== 0)) {
2323 decmpfs_cnode
*dp
= VTOCMP (vp
);
2325 decmpfs_cnode_set_vnode_state(dp
, FILE_TYPE_UNKNOWN
, 0);
2328 /* Initialize the decmpfs node as needed */
2329 (void) hfs_file_is_compressed (cp
, 0); /* ok to take lock */
2332 hfs_unlock_truncate (cp
, HFS_LOCK_DEFAULT
);
2338 case F_SETBACKINGSTORE
: {
2343 * See comment in F_SETSTATICCONTENT re: using
2344 * a null check for a_data
2347 error
= hfs_set_backingstore (vp
, 1);
2350 error
= hfs_set_backingstore (vp
, 0);
2356 case F_GETPATH_MTMINFO
: {
2359 int *data
= (int*) ap
->a_data
;
2361 /* Ask if this is a backingstore vnode */
2362 error
= hfs_is_backingstore (vp
, data
);
2370 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2373 error
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2375 error
= hfs_fsync(vp
, MNT_WAIT
, HFS_FSYNC_FULL
, p
);
2376 hfs_unlock(VTOC(vp
));
2382 case F_BARRIERFSYNC
: {
2385 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2388 error
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2390 error
= hfs_fsync(vp
, MNT_WAIT
, HFS_FSYNC_BARRIER
, p
);
2391 hfs_unlock(VTOC(vp
));
2398 register struct cnode
*cp
;
2401 if (!vnode_isreg(vp
))
2404 error
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2408 * used by regression test to determine if
2409 * all the dirty pages (via write) have been cleaned
2410 * after a call to 'fsysnc'.
2412 error
= is_file_clean(vp
, VTOF(vp
)->ff_size
);
2419 register struct radvisory
*ra
;
2420 struct filefork
*fp
;
2423 if (!vnode_isreg(vp
))
2426 ra
= (struct radvisory
*)(ap
->a_data
);
2429 /* Protect against a size change. */
2430 hfs_lock_truncate(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2433 if (compressed
&& (uncompressed_size
== -1)) {
2434 /* fetching the uncompressed size failed above, so return the error */
2435 error
= decmpfs_error
;
2436 } else if ((compressed
&& (ra
->ra_offset
>= uncompressed_size
)) ||
2437 (!compressed
&& (ra
->ra_offset
>= fp
->ff_size
))) {
2440 #else /* HFS_COMPRESSION */
2441 if (ra
->ra_offset
>= fp
->ff_size
) {
2444 #endif /* HFS_COMPRESSION */
2446 error
= advisory_read(vp
, fp
->ff_size
, ra
->ra_offset
, ra
->ra_count
);
2449 hfs_unlock_truncate(VTOC(vp
), HFS_LOCK_DEFAULT
);
2453 case _IOC(IOC_OUT
,'h', 4, 0): /* Create date in local time */
2456 *(user_time_t
*)(ap
->a_data
) = (user_time_t
) (to_bsd_time(VTOVCB(vp
)->localCreateDate
));
2459 *(user32_time_t
*)(ap
->a_data
) = (user32_time_t
) (to_bsd_time(VTOVCB(vp
)->localCreateDate
));
2464 case SPOTLIGHT_FSCTL_GET_MOUNT_TIME
:
2465 *(uint32_t *)ap
->a_data
= hfsmp
->hfs_mount_time
;
2468 case SPOTLIGHT_FSCTL_GET_LAST_MTIME
:
2469 *(uint32_t *)ap
->a_data
= hfsmp
->hfs_last_mounted_mtime
;
2472 case HFS_FSCTL_GET_VERY_LOW_DISK
:
2473 *(uint32_t*)ap
->a_data
= hfsmp
->hfs_freespace_notify_dangerlimit
;
2476 case HFS_FSCTL_SET_VERY_LOW_DISK
:
2477 if (*(uint32_t *)ap
->a_data
>= hfsmp
->hfs_freespace_notify_warninglimit
) {
2481 hfsmp
->hfs_freespace_notify_dangerlimit
= *(uint32_t *)ap
->a_data
;
2484 case HFS_FSCTL_GET_LOW_DISK
:
2485 *(uint32_t*)ap
->a_data
= hfsmp
->hfs_freespace_notify_warninglimit
;
2488 case HFS_FSCTL_SET_LOW_DISK
:
2489 if ( *(uint32_t *)ap
->a_data
>= hfsmp
->hfs_freespace_notify_desiredlevel
2490 || *(uint32_t *)ap
->a_data
<= hfsmp
->hfs_freespace_notify_dangerlimit
) {
2495 hfsmp
->hfs_freespace_notify_warninglimit
= *(uint32_t *)ap
->a_data
;
2498 case HFS_FSCTL_GET_DESIRED_DISK
:
2499 *(uint32_t*)ap
->a_data
= hfsmp
->hfs_freespace_notify_desiredlevel
;
2502 case HFS_FSCTL_SET_DESIRED_DISK
:
2503 if (*(uint32_t *)ap
->a_data
<= hfsmp
->hfs_freespace_notify_warninglimit
) {
2507 hfsmp
->hfs_freespace_notify_desiredlevel
= *(uint32_t *)ap
->a_data
;
2510 case HFS_VOLUME_STATUS
:
2511 *(uint32_t *)ap
->a_data
= hfsmp
->hfs_notification_conditions
;
2514 case HFS_SET_BOOT_INFO
:
2515 if (!vnode_isvroot(vp
))
2517 if (!kauth_cred_issuser(cred
) && (kauth_cred_getuid(cred
) != vfs_statfs(HFSTOVFS(hfsmp
))->f_owner
))
2518 return(EACCES
); /* must be superuser or owner of filesystem */
2519 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2522 hfs_lock_mount (hfsmp
);
2523 bcopy(ap
->a_data
, &hfsmp
->vcbFndrInfo
, sizeof(hfsmp
->vcbFndrInfo
));
2524 hfs_unlock_mount (hfsmp
);
2525 (void) hfs_flushvolumeheader(hfsmp
, HFS_FVH_WAIT
);
2528 case HFS_GET_BOOT_INFO
:
2529 if (!vnode_isvroot(vp
))
2531 hfs_lock_mount (hfsmp
);
2532 bcopy(&hfsmp
->vcbFndrInfo
, ap
->a_data
, sizeof(hfsmp
->vcbFndrInfo
));
2533 hfs_unlock_mount(hfsmp
);
2536 case HFS_MARK_BOOT_CORRUPT
:
2537 /* Mark the boot volume corrupt by setting
2538 * kHFSVolumeInconsistentBit in the volume header. This will
2539 * force fsck_hfs on next mount.
2541 if (!kauth_cred_issuser(kauth_cred_get())) {
2545 /* Allowed only on the root vnode of the boot volume */
2546 if (!(vfs_flags(HFSTOVFS(hfsmp
)) & MNT_ROOTFS
) ||
2547 !vnode_isvroot(vp
)) {
2550 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2553 printf ("hfs_vnop_ioctl: Marking the boot volume corrupt.\n");
2554 hfs_mark_inconsistent(hfsmp
, HFS_FSCK_FORCED
);
2557 case HFS_FSCTL_GET_JOURNAL_INFO
:
2558 jip
= (struct hfs_journal_info
*)ap
->a_data
;
2563 if (hfsmp
->jnl
== NULL
) {
2567 jnl_start
= hfs_blk_to_bytes(hfsmp
->jnl_start
, hfsmp
->blockSize
) + hfsmp
->hfsPlusIOPosOffset
;
2568 jnl_size
= hfsmp
->jnl_size
;
2571 jip
->jstart
= jnl_start
;
2572 jip
->jsize
= jnl_size
;
2575 case HFS_SET_ALWAYS_ZEROFILL
: {
2576 struct cnode
*cp
= VTOC(vp
);
2578 if (*(int *)ap
->a_data
) {
2579 cp
->c_flag
|= C_ALWAYS_ZEROFILL
;
2581 cp
->c_flag
&= ~C_ALWAYS_ZEROFILL
;
2586 case HFS_DISABLE_METAZONE
: {
2587 /* Only root can disable metadata zone */
2588 if (!kauth_cred_issuser(kauth_cred_get())) {
2591 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2595 /* Disable metadata zone now */
2596 (void) hfs_metadatazone_init(hfsmp
, true);
2597 printf ("hfs: Disabling metadata zone on %s\n", hfsmp
->vcbVN
);
2602 case HFS_FSINFO_METADATA_BLOCKS
: {
2604 struct hfsinfo_metadata
*hinfo
;
2606 hinfo
= (struct hfsinfo_metadata
*)ap
->a_data
;
2608 /* Get information about number of metadata blocks */
2609 error
= hfs_getinfo_metadata_blocks(hfsmp
, hinfo
);
2617 case HFS_GET_FSINFO
: {
2618 hfs_fsinfo
*fsinfo
= (hfs_fsinfo
*)ap
->a_data
;
2620 /* Only root is allowed to get fsinfo */
2621 if (!kauth_cred_issuser(kauth_cred_get())) {
2626 * Make sure that the caller's version number matches with
2627 * the kernel's version number. This will make sure that
2628 * if the structures being read/written into are changed
2629 * by the kernel, the caller will not read incorrect data.
2631 * The first three fields --- request_type, version and
2632 * flags are same for all the hfs_fsinfo structures, so
2633 * we can access the version number by assuming any
2634 * structure for now.
2636 if (fsinfo
->header
.version
!= HFS_FSINFO_VERSION
) {
2640 /* Make sure that the current file system is not marked inconsistent */
2641 if (hfsmp
->vcbAtrb
& kHFSVolumeInconsistentMask
) {
2645 return hfs_get_fsinfo(hfsmp
, ap
->a_data
);
2648 case HFS_CS_FREESPACE_TRIM
: {
2652 /* Only root allowed */
2653 if (!kauth_cred_issuser(kauth_cred_get())) {
2658 * This core functionality is similar to hfs_scan_blocks().
2659 * The main difference is that hfs_scan_blocks() is called
2660 * as part of mount where we are assured that the journal is
2661 * empty to start with. This fcntl() can be called on a
2662 * mounted volume, therefore it has to flush the content of
2663 * the journal as well as ensure the state of summary table.
2665 * This fcntl scans over the entire allocation bitmap,
2666 * creates list of all the free blocks, and issues TRIM
2667 * down to the underlying device. This can take long time
2668 * as it can generate up to 512MB of read I/O.
2671 if ((hfsmp
->hfs_flags
& HFS_SUMMARY_TABLE
) == 0) {
2672 error
= hfs_init_summary(hfsmp
);
2674 printf("hfs: fsctl() could not initialize summary table for %s\n", hfsmp
->vcbVN
);
2680 * The journal maintains list of recently deallocated blocks to
2681 * issue DKIOCUNMAPs when the corresponding journal transaction is
2682 * flushed to the disk. To avoid any race conditions, we only
2683 * want one active trim list and only one thread issuing DKIOCUNMAPs.
2684 * Therefore we make sure that the journal trim list is sync'ed,
2685 * empty, and not modifiable for the duration of our scan.
2687 * Take the journal lock before flushing the journal to the disk.
2688 * We will keep on holding the journal lock till we don't get the
2689 * bitmap lock to make sure that no new journal transactions can
2690 * start. This will make sure that the journal trim list is not
2691 * modified after the journal flush and before getting bitmap lock.
2692 * We can release the journal lock after we acquire the bitmap
2693 * lock as it will prevent any further block deallocations.
2695 hfs_journal_lock(hfsmp
);
2697 /* Flush the journal and wait for all I/Os to finish up */
2698 error
= hfs_flush(hfsmp
, HFS_FLUSH_JOURNAL_META
);
2700 hfs_journal_unlock(hfsmp
);
2704 /* Take bitmap lock to ensure it is not being modified */
2705 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_BITMAP
, HFS_EXCLUSIVE_LOCK
);
2707 /* Release the journal lock */
2708 hfs_journal_unlock(hfsmp
);
2711 * ScanUnmapBlocks reads the bitmap in large block size
2712 * (up to 1MB) unlike the runtime which reads the bitmap
2713 * in the 4K block size. This can cause buf_t collisions
2714 * and potential data corruption. To avoid this, we
2715 * invalidate all the existing buffers associated with
2716 * the bitmap vnode before scanning it.
2718 * Note: ScanUnmapBlock() cleans up all the buffers
2719 * after itself, so there won't be any large buffers left
2720 * for us to clean up after it returns.
2722 error
= buf_invalidateblks(hfsmp
->hfs_allocation_vp
, 0, 0, 0);
2724 hfs_systemfile_unlock(hfsmp
, lockflags
);
2728 /* Traverse bitmap and issue DKIOCUNMAPs */
2729 error
= ScanUnmapBlocks(hfsmp
);
2730 hfs_systemfile_unlock(hfsmp
, lockflags
);
2738 case HFS_SET_HOTFILE_STATE
: {
2740 struct cnode
*cp
= VTOC(vp
);
2741 uint32_t hf_state
= *((uint32_t*)ap
->a_data
);
2742 uint32_t num_unpinned
= 0;
2744 error
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2749 // printf("hfs: setting hotfile state %d on %s\n", hf_state, vp->v_name);
2750 if (hf_state
== HFS_MARK_FASTDEVCANDIDATE
) {
2751 vnode_setfastdevicecandidate(vp
);
2753 cp
->c_attr
.ca_recflags
|= kHFSFastDevCandidateMask
;
2754 cp
->c_attr
.ca_recflags
&= ~kHFSDoNotFastDevPinMask
;
2755 cp
->c_flag
|= C_MODIFIED
;
2756 } else if (hf_state
== HFS_UNMARK_FASTDEVCANDIDATE
|| hf_state
== HFS_NEVER_FASTDEVCANDIDATE
) {
2757 vnode_clearfastdevicecandidate(vp
);
2758 hfs_removehotfile(vp
);
2760 if (cp
->c_attr
.ca_recflags
& kHFSFastDevPinnedMask
) {
2761 hfs_pin_vnode(hfsmp
, vp
, HFS_UNPIN_IT
, &num_unpinned
, ap
->a_context
);
2764 if (hf_state
== HFS_NEVER_FASTDEVCANDIDATE
) {
2765 cp
->c_attr
.ca_recflags
|= kHFSDoNotFastDevPinMask
;
2767 cp
->c_attr
.ca_recflags
&= ~(kHFSFastDevCandidateMask
|kHFSFastDevPinnedMask
);
2768 cp
->c_flag
|= C_MODIFIED
;
2774 if (num_unpinned
!= 0) {
2775 lck_mtx_lock(&hfsmp
->hfc_mutex
);
2776 hfsmp
->hfs_hotfile_freeblks
+= num_unpinned
;
2777 lck_mtx_unlock(&hfsmp
->hfc_mutex
);
2785 case HFS_REPIN_HOTFILE_STATE
: {
2787 uint32_t repin_what
= *((uint32_t*)ap
->a_data
);
2789 /* Only root allowed */
2790 if (!kauth_cred_issuser(kauth_cred_get())) {
2794 if (!(hfsmp
->hfs_flags
& (HFS_CS_METADATA_PIN
| HFS_CS_HOTFILE_PIN
))) {
2795 // this system is neither regular Fusion or Cooperative Fusion
2796 // so this fsctl makes no sense.
2801 // After a converting a CoreStorage volume to be encrypted, the
2802 // extents could have moved around underneath us. This call
2803 // allows corestoraged to re-pin everything that should be
2804 // pinned (it would happen on the next reboot too but that could
2805 // be a long time away).
2807 if ((repin_what
& HFS_REPIN_METADATA
) && (hfsmp
->hfs_flags
& HFS_CS_METADATA_PIN
)) {
2808 hfs_pin_fs_metadata(hfsmp
);
2810 if ((repin_what
& HFS_REPIN_USERDATA
) && (hfsmp
->hfs_flags
& HFS_CS_HOTFILE_PIN
)) {
2811 hfs_repin_hotfiles(hfsmp
);
2813 if ((repin_what
& HFS_REPIN_USERDATA
) && (hfsmp
->hfs_flags
& HFS_CS_SWAPFILE_PIN
)) {
2814 //XXX Swapfiles (marked SWAP_PINNED) may have moved too.
2815 //XXX Do we care? They have a more transient/dynamic nature/lifetime.
2834 hfs_vnop_select(__unused
struct vnop_select_args
*ap
)
2836 struct vnop_select_args {
2841 vfs_context_t a_context;
2846 * We should really check to see if I/O is possible.
2852 * Converts a logical block number to a physical block, and optionally returns
2853 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
2854 * The physical block number is based on the device block size, currently its 512.
2855 * The block run is returned in logical blocks, and is the REMAINING amount of blocks
2858 hfs_bmap(struct vnode
*vp
, daddr_t bn
, struct vnode
**vpp
, daddr64_t
*bnp
, unsigned int *runp
)
2860 struct filefork
*fp
= VTOF(vp
);
2861 struct hfsmount
*hfsmp
= VTOHFS(vp
);
2862 int retval
= E_NONE
;
2863 u_int32_t logBlockSize
;
2864 size_t bytesContAvail
= 0;
2865 off_t blockposition
;
2870 * Check for underlying vnode requests and ensure that logical
2871 * to physical mapping is requested.
2874 *vpp
= hfsmp
->hfs_devvp
;
2878 logBlockSize
= GetLogicalBlockSize(vp
);
2879 blockposition
= (off_t
)bn
* logBlockSize
;
2881 lockExtBtree
= overflow_extents(fp
);
2884 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_EXTENTS
, HFS_EXCLUSIVE_LOCK
);
2886 retval
= MacToVFSError(
2887 MapFileBlockC (HFSTOVCB(hfsmp
),
2895 hfs_systemfile_unlock(hfsmp
, lockflags
);
2897 if (retval
== E_NONE
) {
2898 /* Figure out how many read ahead blocks there are */
2900 if (can_cluster(logBlockSize
)) {
2901 /* Make sure this result never goes negative: */
2902 *runp
= (bytesContAvail
< logBlockSize
) ? 0 : (bytesContAvail
/ logBlockSize
) - 1;
2912 * Convert logical block number to file offset.
2915 hfs_vnop_blktooff(struct vnop_blktooff_args
*ap
)
2917 struct vnop_blktooff_args {
2924 if (ap
->a_vp
== NULL
)
2926 *ap
->a_offset
= (off_t
)ap
->a_lblkno
* (off_t
)GetLogicalBlockSize(ap
->a_vp
);
2932 * Convert file offset to logical block number.
2935 hfs_vnop_offtoblk(struct vnop_offtoblk_args
*ap
)
2937 struct vnop_offtoblk_args {
2940 daddr64_t *a_lblkno;
2944 if (ap
->a_vp
== NULL
)
2946 *ap
->a_lblkno
= (daddr64_t
)(ap
->a_offset
/ (off_t
)GetLogicalBlockSize(ap
->a_vp
));
2952 * Map file offset to physical block number.
2954 * If this function is called for write operation, and if the file
2955 * had virtual blocks allocated (delayed allocation), real blocks
2956 * are allocated by calling ExtendFileC().
2958 * If this function is called for read operation, and if the file
2959 * had virtual blocks allocated (delayed allocation), no change
2960 * to the size of file is done, and if required, rangelist is
2961 * searched for mapping.
2963 * System file cnodes are expected to be locked (shared or exclusive).
2965 * -- INVALID RANGES --
2967 * Invalid ranges are used to keep track of where we have extended a
2968 * file, but have not yet written that data to disk. In the past we
2969 * would clear up the invalid ranges as we wrote to those areas, but
2970 * before data was actually flushed to disk. The problem with that
2971 * approach is that the data can be left in the cache and is therefore
2972 * still not valid on disk. So now we clear up the ranges here, when
2973 * the flags field has VNODE_WRITE set, indicating a write is about to
2974 * occur. This isn't ideal (ideally we want to clear them up when
2975 * know the data has been successfully written), but it's the best we
2978 * For reads, we use the invalid ranges here in block map to indicate
2979 * to the caller that the data should be zeroed (a_bpn == -1). We
2980 * have to be careful about what ranges we return to the cluster code.
2981 * Currently the cluster code can only handle non-rounded values for
2982 * the EOF; it cannot handle funny sized ranges in the middle of the
2983 * file (the main problem is that it sends down odd sized I/Os to the
2984 * disk). Our code currently works because whilst the very first
2985 * offset and the last offset in the invalid ranges are not aligned,
2986 * gaps in the invalid ranges between the first and last, have to be
2987 * aligned (because we always write page sized blocks). For example,
2988 * consider this arrangement:
2990 * +-------------+-----+-------+------+
2991 * | |XXXXX| |XXXXXX|
2992 * +-------------+-----+-------+------+
2995 * This shows two invalid ranges <a, b> and <c, d>. Whilst a and d
2996 * are not necessarily aligned, b and c *must* be.
2998 * Zero-filling occurs in a number of ways:
3000 * 1. When a read occurs and we return with a_bpn == -1.
3002 * 2. When hfs_fsync or hfs_filedone calls hfs_flush_invalid_ranges
3003 * which will cause us to iterate over the ranges bringing in
3004 * pages that are not present in the cache and zeroing them. Any
3005 * pages that are already in the cache are left untouched. Note
3006 * that hfs_fsync does not always flush invalid ranges.
3008 * 3. When we extend a file we zero out from the old EOF to the end
3009 * of the page. It would be nice if we didn't have to do this if
3010 * the page wasn't present (and could defer it), but because of
3011 * the problem described above, we have to.
3013 * The invalid ranges are also used to restrict the size that we write
3014 * out on disk: see hfs_prepare_fork_for_update.
3016 * Note that invalid ranges are ignored when neither the VNODE_READ or
3017 * the VNODE_WRITE flag is specified. This is useful for the
3018 * F_LOG2PHYS* fcntls which are not interested in invalid ranges: they
3019 * just want to know whether blocks are physically allocated or not.
3022 hfs_vnop_blockmap(struct vnop_blockmap_args
*ap
)
3024 struct vnop_blockmap_args {
3032 vfs_context_t a_context;
3036 struct vnode
*vp
= ap
->a_vp
;
3038 struct filefork
*fp
;
3039 struct hfsmount
*hfsmp
;
3040 size_t bytesContAvail
= ap
->a_size
;
3041 int retval
= E_NONE
;
3044 struct rl_entry
*invalid_range
;
3045 enum rl_overlaptype overlaptype
;
3050 if (VNODE_IS_RSRC(vp
)) {
3051 /* allow blockmaps to the resource fork */
3053 if ( hfs_file_is_compressed(VTOC(vp
), 1) ) { /* 1 == don't take the cnode lock */
3054 int state
= decmpfs_cnode_get_vnode_state(VTOCMP(vp
));
3056 case FILE_IS_COMPRESSED
:
3058 case FILE_IS_CONVERTING
:
3059 /* if FILE_IS_CONVERTING, we allow blockmap */
3062 printf("invalid state %d for compressed file\n", state
);
3067 #endif /* HFS_COMPRESSION */
3069 /* Do not allow blockmap operation on a directory */
3070 if (vnode_isdir(vp
)) {
3075 * Check for underlying vnode requests and ensure that logical
3076 * to physical mapping is requested.
3078 if (ap
->a_bpn
== NULL
)
3085 if ( !vnode_issystem(vp
) && !vnode_islnk(vp
) && !vnode_isswap(vp
)) {
3086 if (cp
->c_lockowner
!= current_thread()) {
3087 hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
3091 // For reads, check the invalid ranges
3092 if (ISSET(ap
->a_flags
, VNODE_READ
)) {
3093 if (ap
->a_foffset
>= fp
->ff_size
) {
3098 overlaptype
= rl_scan(&fp
->ff_invalidranges
, ap
->a_foffset
,
3099 ap
->a_foffset
+ (off_t
)bytesContAvail
- 1,
3101 switch(overlaptype
) {
3102 case RL_MATCHINGOVERLAP
:
3103 case RL_OVERLAPCONTAINSRANGE
:
3104 case RL_OVERLAPSTARTSBEFORE
:
3105 /* There's no valid block for this byte offset */
3106 *ap
->a_bpn
= (daddr64_t
)-1;
3107 /* There's no point limiting the amount to be returned
3108 * if the invalid range that was hit extends all the way
3109 * to the EOF (i.e. there's no valid bytes between the
3110 * end of this range and the file's EOF):
3112 if (((off_t
)fp
->ff_size
> (invalid_range
->rl_end
+ 1)) &&
3113 ((size_t)(invalid_range
->rl_end
+ 1 - ap
->a_foffset
) < bytesContAvail
)) {
3114 bytesContAvail
= invalid_range
->rl_end
+ 1 - ap
->a_foffset
;
3120 case RL_OVERLAPISCONTAINED
:
3121 case RL_OVERLAPENDSAFTER
:
3122 /* The range of interest hits an invalid block before the end: */
3123 if (invalid_range
->rl_start
== ap
->a_foffset
) {
3124 /* There's actually no valid information to be had starting here: */
3125 *ap
->a_bpn
= (daddr64_t
)-1;
3126 if (((off_t
)fp
->ff_size
> (invalid_range
->rl_end
+ 1)) &&
3127 ((size_t)(invalid_range
->rl_end
+ 1 - ap
->a_foffset
) < bytesContAvail
)) {
3128 bytesContAvail
= invalid_range
->rl_end
+ 1 - ap
->a_foffset
;
3135 * Sadly, the lower layers don't like us to
3136 * return unaligned ranges, so we skip over
3137 * any invalid ranges here that are less than
3138 * a page: zeroing of those bits is not our
3139 * responsibility (it's dealt with elsewhere).
3142 off_t rounded_start
= round_page_64(invalid_range
->rl_start
);
3143 if ((off_t
)bytesContAvail
< rounded_start
- ap
->a_foffset
)
3145 if (rounded_start
< invalid_range
->rl_end
+ 1) {
3146 bytesContAvail
= rounded_start
- ap
->a_foffset
;
3149 } while ((invalid_range
= TAILQ_NEXT(invalid_range
,
3161 if (cp
->c_cpentry
) {
3162 const int direction
= (ISSET(ap
->a_flags
, VNODE_WRITE
)
3163 ? VNODE_WRITE
: VNODE_READ
);
3165 cp_io_params_t io_params
;
3166 cp_io_params(hfsmp
, cp
->c_cpentry
,
3167 off_rsrc_make(ap
->a_foffset
, VNODE_IS_RSRC(vp
)),
3168 direction
, &io_params
);
3170 if (io_params
.max_len
< (off_t
)bytesContAvail
)
3171 bytesContAvail
= io_params
.max_len
;
3173 if (io_params
.phys_offset
!= -1) {
3174 *ap
->a_bpn
= ((io_params
.phys_offset
+ hfsmp
->hfsPlusIOPosOffset
)
3175 / hfsmp
->hfs_logical_block_size
);
3185 /* Check virtual blocks only when performing write operation */
3186 if ((ap
->a_flags
& VNODE_WRITE
) && (fp
->ff_unallocblocks
!= 0)) {
3187 if (hfs_start_transaction(hfsmp
) != 0) {
3193 syslocks
= SFL_EXTENTS
| SFL_BITMAP
;
3195 } else if (overflow_extents(fp
)) {
3196 syslocks
= SFL_EXTENTS
;
3200 lockflags
= hfs_systemfile_lock(hfsmp
, syslocks
, HFS_EXCLUSIVE_LOCK
);
3203 * Check for any delayed allocations.
3205 if ((ap
->a_flags
& VNODE_WRITE
) && (fp
->ff_unallocblocks
!= 0)) {
3207 u_int32_t loanedBlocks
;
3210 // Make sure we have a transaction. It's possible
3211 // that we came in and fp->ff_unallocblocks was zero
3212 // but during the time we blocked acquiring the extents
3213 // btree, ff_unallocblocks became non-zero and so we
3214 // will need to start a transaction.
3216 if (started_tr
== 0) {
3218 hfs_systemfile_unlock(hfsmp
, lockflags
);
3225 * Note: ExtendFileC will Release any blocks on loan and
3226 * aquire real blocks. So we ask to extend by zero bytes
3227 * since ExtendFileC will account for the virtual blocks.
3230 loanedBlocks
= fp
->ff_unallocblocks
;
3231 retval
= ExtendFileC(hfsmp
, (FCB
*)fp
, 0, 0,
3232 kEFAllMask
| kEFNoClumpMask
, &actbytes
);
3235 fp
->ff_unallocblocks
= loanedBlocks
;
3236 cp
->c_blocks
+= loanedBlocks
;
3237 fp
->ff_blocks
+= loanedBlocks
;
3239 hfs_lock_mount (hfsmp
);
3240 hfsmp
->loanedBlocks
+= loanedBlocks
;
3241 hfs_unlock_mount (hfsmp
);
3243 hfs_systemfile_unlock(hfsmp
, lockflags
);
3244 cp
->c_flag
|= C_MODIFIED
;
3246 (void) hfs_update(vp
, 0);
3247 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
3249 hfs_end_transaction(hfsmp
);
3256 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, bytesContAvail
, ap
->a_foffset
,
3257 ap
->a_bpn
, &bytesContAvail
);
3259 hfs_systemfile_unlock(hfsmp
, lockflags
);
3264 /* On write, always return error because virtual blocks, if any,
3265 * should have been allocated in ExtendFileC(). We do not
3266 * allocate virtual blocks on read, therefore return error
3267 * only if no virtual blocks are allocated. Otherwise we search
3268 * rangelist for zero-fills
3270 if ((MacToVFSError(retval
) != ERANGE
) ||
3271 (ap
->a_flags
& VNODE_WRITE
) ||
3272 ((ap
->a_flags
& VNODE_READ
) && (fp
->ff_unallocblocks
== 0))) {
3276 /* Validate if the start offset is within logical file size */
3277 if (ap
->a_foffset
>= fp
->ff_size
) {
3282 * At this point, we have encountered a failure during
3283 * MapFileBlockC that resulted in ERANGE, and we are not
3284 * servicing a write, and there are borrowed blocks.
3286 * However, the cluster layer will not call blockmap for
3287 * blocks that are borrowed and in-cache. We have to assume
3288 * that because we observed ERANGE being emitted from
3289 * MapFileBlockC, this extent range is not valid on-disk. So
3290 * we treat this as a mapping that needs to be zero-filled
3294 if (fp
->ff_size
- ap
->a_foffset
< (off_t
)bytesContAvail
)
3295 bytesContAvail
= fp
->ff_size
- ap
->a_foffset
;
3297 *ap
->a_bpn
= (daddr64_t
) -1;
3305 if (ISSET(ap
->a_flags
, VNODE_WRITE
)) {
3306 struct rl_entry
*r
= TAILQ_FIRST(&fp
->ff_invalidranges
);
3308 // See if we might be overlapping invalid ranges...
3309 if (r
&& (ap
->a_foffset
+ (off_t
)bytesContAvail
) > r
->rl_start
) {
3311 * Mark the file as needing an update if we think the
3312 * on-disk EOF has changed.
3314 if (ap
->a_foffset
<= r
->rl_start
)
3315 SET(cp
->c_flag
, C_MODIFIED
);
3318 * This isn't the ideal place to put this. Ideally, we
3319 * should do something *after* we have successfully
3320 * written to the range, but that's difficult to do
3321 * because we cannot take locks in the callback. At
3322 * present, the cluster code will call us with VNODE_WRITE
3323 * set just before it's about to write the data so we know
3324 * that data is about to be written. If we get an I/O
3325 * error at this point then chances are the metadata
3326 * update to follow will also have an I/O error so the
3327 * risk here is small.
3329 rl_remove(ap
->a_foffset
, ap
->a_foffset
+ bytesContAvail
- 1,
3330 &fp
->ff_invalidranges
);
3332 if (!TAILQ_FIRST(&fp
->ff_invalidranges
)) {
3333 cp
->c_flag
&= ~C_ZFWANTSYNC
;
3334 cp
->c_zftimeout
= 0;
3340 *ap
->a_run
= bytesContAvail
;
3343 *(int *)ap
->a_poff
= 0;
3347 hfs_update(vp
, TRUE
);
3348 hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
3349 hfs_end_transaction(hfsmp
);
3356 return (MacToVFSError(retval
));
3360 * prepare and issue the I/O
3361 * buf_strategy knows how to deal
3362 * with requests that require
3366 hfs_vnop_strategy(struct vnop_strategy_args
*ap
)
3368 buf_t bp
= ap
->a_bp
;
3369 vnode_t vp
= buf_vnode(bp
);
3372 /* Mark buffer as containing static data if cnode flag set */
3373 if (VTOC(vp
)->c_flag
& C_SSD_STATIC
) {
3377 /* Mark buffer as containing static data if cnode flag set */
3378 if (VTOC(vp
)->c_flag
& C_SSD_GREEDY_MODE
) {
3379 bufattr_markgreedymode(&bp
->b_attr
);
3382 /* mark buffer as containing burst mode data if cnode flag set */
3383 if (VTOC(vp
)->c_flag
& C_IO_ISOCHRONOUS
) {
3384 bufattr_markisochronous(&bp
->b_attr
);
3388 error
= cp_handle_strategy(bp
);
3394 error
= buf_strategy(VTOHFS(vp
)->hfs_devvp
, ap
);
3400 do_hfs_truncate(struct vnode
*vp
, off_t length
, int flags
, int truncateflags
, vfs_context_t context
)
3402 register struct cnode
*cp
= VTOC(vp
);
3403 struct filefork
*fp
= VTOF(vp
);
3404 kauth_cred_t cred
= vfs_context_ucred(context
);
3407 off_t actualBytesAdded
;
3409 u_int32_t fileblocks
;
3411 struct hfsmount
*hfsmp
;
3413 int suppress_times
= (truncateflags
& HFS_TRUNCATE_SKIPTIMES
);
3415 blksize
= VTOVCB(vp
)->blockSize
;
3416 fileblocks
= fp
->ff_blocks
;
3417 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
3419 KERNEL_DEBUG(HFSDBG_TRUNCATE
| DBG_FUNC_START
,
3420 (int)length
, (int)fp
->ff_size
, (int)filebytes
, 0, 0);
3425 /* This should only happen with a corrupt filesystem */
3426 if ((off_t
)fp
->ff_size
< 0)
3429 if ((!ISHFSPLUS(VTOVCB(vp
))) && (length
> (off_t
)MAXHFSFILESIZE
))
3436 /* Files that are changing size are not hot file candidates. */
3437 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
3438 fp
->ff_bytesread
= 0;
3442 * We cannot just check if fp->ff_size == length (as an optimization)
3443 * since there may be extra physical blocks that also need truncation.
3446 if ((retval
= hfs_getinoquota(cp
)))
3451 * Lengthen the size of the file. We must ensure that the
3452 * last byte of the file is allocated. Since the smallest
3453 * value of ff_size is 0, length will be at least 1.
3455 if (length
> (off_t
)fp
->ff_size
) {
3457 retval
= hfs_chkdq(cp
, (int64_t)(roundup(length
- filebytes
, blksize
)),
3463 * If we don't have enough physical space then
3464 * we need to extend the physical size.
3466 if (length
> filebytes
) {
3468 u_int32_t blockHint
= 0;
3470 /* All or nothing and don't round up to clumpsize. */
3471 eflags
= kEFAllMask
| kEFNoClumpMask
;
3473 if (cred
&& (suser(cred
, NULL
) != 0)) {
3474 eflags
|= kEFReserveMask
; /* keep a reserve */
3478 * Allocate Journal and Quota files in metadata zone.
3480 if (filebytes
== 0 &&
3481 hfsmp
->hfs_flags
& HFS_METADATA_ZONE
&&
3482 hfs_virtualmetafile(cp
)) {
3483 eflags
|= kEFMetadataMask
;
3484 blockHint
= hfsmp
->hfs_metazone_start
;
3486 if (hfs_start_transaction(hfsmp
) != 0) {
3491 /* Protect extents b-tree and allocation bitmap */
3492 lockflags
= SFL_BITMAP
;
3493 if (overflow_extents(fp
))
3494 lockflags
|= SFL_EXTENTS
;
3495 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
3498 * Keep growing the file as long as the current EOF is
3499 * less than the desired value.
3501 while ((length
> filebytes
) && (retval
== E_NONE
)) {
3502 bytesToAdd
= length
- filebytes
;
3503 retval
= MacToVFSError(ExtendFileC(VTOVCB(vp
),
3508 &actualBytesAdded
));
3510 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)blksize
;
3511 if (actualBytesAdded
== 0 && retval
== E_NONE
) {
3512 if (length
> filebytes
)
3518 hfs_systemfile_unlock(hfsmp
, lockflags
);
3522 hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
3525 hfs_end_transaction(hfsmp
);
3530 KERNEL_DEBUG(HFSDBG_TRUNCATE
| DBG_FUNC_NONE
,
3531 (int)length
, (int)fp
->ff_size
, (int)filebytes
, 0, 0);
3534 if (ISSET(flags
, IO_NOZEROFILL
)) {
3535 // An optimisation for the hibernation file
3536 if (vnode_isswap(vp
))
3537 rl_remove_all(&fp
->ff_invalidranges
);
3539 if (UBCINFOEXISTS(vp
) && (vnode_issystem(vp
) == 0) && retval
== E_NONE
) {
3540 if (length
> (off_t
)fp
->ff_size
) {
3543 /* Extending the file: time to fill out the current last page w. zeroes? */
3544 if (fp
->ff_size
& PAGE_MASK_64
) {
3545 /* There might be some valid data at the start of the (current) last page
3546 of the file, so zero out the remainder of that page to ensure the
3547 entire page contains valid data. */
3549 retval
= hfs_zero_eof_page(vp
, length
);
3550 hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
3551 if (retval
) goto Err_Exit
;
3554 rl_add(fp
->ff_size
, length
- 1, &fp
->ff_invalidranges
);
3555 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
3558 panic("hfs_truncate: invoked on non-UBC object?!");
3561 if (suppress_times
== 0) {
3562 cp
->c_touch_modtime
= TRUE
;
3564 fp
->ff_size
= length
;
3566 } else { /* Shorten the size of the file */
3568 // An optimisation for the hibernation file
3569 if (ISSET(flags
, IO_NOZEROFILL
) && vnode_isswap(vp
)) {
3570 rl_remove_all(&fp
->ff_invalidranges
);
3571 } else if ((off_t
)fp
->ff_size
> length
) {
3572 /* Any space previously marked as invalid is now irrelevant: */
3573 rl_remove(length
, fp
->ff_size
- 1, &fp
->ff_invalidranges
);
3577 * Account for any unmapped blocks. Note that the new
3578 * file length can still end up with unmapped blocks.
3580 if (fp
->ff_unallocblocks
> 0) {
3581 u_int32_t finalblks
;
3582 u_int32_t loanedBlocks
;
3584 hfs_lock_mount(hfsmp
);
3585 loanedBlocks
= fp
->ff_unallocblocks
;
3586 cp
->c_blocks
-= loanedBlocks
;
3587 fp
->ff_blocks
-= loanedBlocks
;
3588 fp
->ff_unallocblocks
= 0;
3590 hfsmp
->loanedBlocks
-= loanedBlocks
;
3592 finalblks
= (length
+ blksize
- 1) / blksize
;
3593 if (finalblks
> fp
->ff_blocks
) {
3594 /* calculate required unmapped blocks */
3595 loanedBlocks
= finalblks
- fp
->ff_blocks
;
3596 hfsmp
->loanedBlocks
+= loanedBlocks
;
3598 fp
->ff_unallocblocks
= loanedBlocks
;
3599 cp
->c_blocks
+= loanedBlocks
;
3600 fp
->ff_blocks
+= loanedBlocks
;
3602 hfs_unlock_mount (hfsmp
);
3605 off_t savedbytes
= ((off_t
)fp
->ff_blocks
* (off_t
)blksize
);
3606 if (hfs_start_transaction(hfsmp
) != 0) {
3611 if (fp
->ff_unallocblocks
== 0) {
3612 /* Protect extents b-tree and allocation bitmap */
3613 lockflags
= SFL_BITMAP
;
3614 if (overflow_extents(fp
))
3615 lockflags
|= SFL_EXTENTS
;
3616 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
3618 retval
= MacToVFSError(TruncateFileC(VTOVCB(vp
), (FCB
*)fp
, length
, 0,
3619 FORK_IS_RSRC (fp
), FTOC(fp
)->c_fileid
, false));
3621 hfs_systemfile_unlock(hfsmp
, lockflags
);
3625 fp
->ff_size
= length
;
3628 hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
3630 hfs_end_transaction(hfsmp
);
3632 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)blksize
;
3636 /* These are bytesreleased */
3637 (void) hfs_chkdq(cp
, (int64_t)-(savedbytes
- filebytes
), NOCRED
, 0);
3641 // Unlike when growing a file, we adjust the hotfile block count here
3642 // instead of deeper down in the block allocation code because we do
3643 // not necessarily have a vnode or "fcb" at the time we're deleting
3644 // the file and so we wouldn't know if it was hotfile cached or not
3646 hfs_hotfile_adjust_blocks(vp
, (int64_t)((savedbytes
- filebytes
) / blksize
));
3650 * Only set update flag if the logical length changes & we aren't
3651 * suppressing modtime updates.
3653 if (((off_t
)fp
->ff_size
!= length
) && (suppress_times
== 0)) {
3654 cp
->c_touch_modtime
= TRUE
;
3656 fp
->ff_size
= length
;
3658 if (cp
->c_mode
& (S_ISUID
| S_ISGID
)) {
3659 if (!vfs_context_issuser(context
))
3660 cp
->c_mode
&= ~(S_ISUID
| S_ISGID
);
3662 cp
->c_flag
|= C_MODIFIED
;
3663 cp
->c_touch_chgtime
= TRUE
; /* status changed */
3664 if (suppress_times
== 0) {
3665 cp
->c_touch_modtime
= TRUE
; /* file data was modified */
3668 * If we are not suppressing the modtime update, then
3669 * update the gen count as well.
3671 if (S_ISREG(cp
->c_attr
.ca_mode
) || S_ISLNK (cp
->c_attr
.ca_mode
)) {
3672 hfs_incr_gencount(cp
);
3676 retval
= hfs_update(vp
, 0);
3678 KERNEL_DEBUG(HFSDBG_TRUNCATE
| DBG_FUNC_NONE
,
3679 -1, -1, -1, retval
, 0);
3684 KERNEL_DEBUG(HFSDBG_TRUNCATE
| DBG_FUNC_END
,
3685 (int)length
, (int)fp
->ff_size
, (int)filebytes
, retval
, 0);
3691 * Preparation which must be done prior to deleting the catalog record
3692 * of a file or directory. In order to make the on-disk as safe as possible,
3693 * we remove the catalog entry before releasing the bitmap blocks and the
3694 * overflow extent records. However, some work must be done prior to deleting
3695 * the catalog record.
3697 * When calling this function, the cnode must exist both in memory and on-disk.
3698 * If there are both resource fork and data fork vnodes, this function should
3699 * be called on both.
3703 hfs_prepare_release_storage (struct hfsmount
*hfsmp
, struct vnode
*vp
) {
3705 struct filefork
*fp
= VTOF(vp
);
3706 struct cnode
*cp
= VTOC(vp
);
3711 /* Cannot truncate an HFS directory! */
3712 if (vnode_isdir(vp
)) {
3717 * See the comment below in hfs_truncate for why we need to call
3718 * setsize here. Essentially we want to avoid pending IO if we
3719 * already know that the blocks are going to be released here.
3720 * This function is only called when totally removing all storage for a file, so
3721 * we can take a shortcut and immediately setsize (0);
3725 /* This should only happen with a corrupt filesystem */
3726 if ((off_t
)fp
->ff_size
< 0)
3730 * We cannot just check if fp->ff_size == length (as an optimization)
3731 * since there may be extra physical blocks that also need truncation.
3734 if ((retval
= hfs_getinoquota(cp
))) {
3739 /* Wipe out any invalid ranges which have yet to be backed by disk */
3740 rl_remove(0, fp
->ff_size
- 1, &fp
->ff_invalidranges
);
3743 * Account for any unmapped blocks. Since we're deleting the
3744 * entire file, we don't have to worry about just shrinking
3745 * to a smaller number of borrowed blocks.
3747 if (fp
->ff_unallocblocks
> 0) {
3748 u_int32_t loanedBlocks
;
3750 hfs_lock_mount (hfsmp
);
3751 loanedBlocks
= fp
->ff_unallocblocks
;
3752 cp
->c_blocks
-= loanedBlocks
;
3753 fp
->ff_blocks
-= loanedBlocks
;
3754 fp
->ff_unallocblocks
= 0;
3756 hfsmp
->loanedBlocks
-= loanedBlocks
;
3758 hfs_unlock_mount (hfsmp
);
3766 * Special wrapper around calling TruncateFileC. This function is useable
3767 * even when the catalog record does not exist any longer, making it ideal
3768 * for use when deleting a file. The simplification here is that we know
3769 * that we are releasing all blocks.
3771 * Note that this function may be called when there is no vnode backing
3772 * the file fork in question. We may call this from hfs_vnop_inactive
3773 * to clear out resource fork data (and may not want to clear out the data
3774 * fork yet). As a result, we pointer-check both sets of inputs before
3775 * doing anything with them.
3777 * The caller is responsible for saving off a copy of the filefork(s)
3778 * embedded within the cnode prior to calling this function. The pointers
3779 * supplied as arguments must be valid even if the cnode is no longer valid.
3783 hfs_release_storage (struct hfsmount
*hfsmp
, struct filefork
*datafork
,
3784 struct filefork
*rsrcfork
, u_int32_t fileid
) {
3787 u_int32_t fileblocks
;
3792 blksize
= hfsmp
->blockSize
;
3796 off_t prev_filebytes
;
3797 datafork
->ff_size
= 0;
3799 fileblocks
= datafork
->ff_blocks
;
3800 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
3801 prev_filebytes
= filebytes
;
3803 /* We killed invalid ranges and loaned blocks before we removed the catalog entry */
3805 while (filebytes
> 0) {
3806 if (filebytes
> HFS_BIGFILE_SIZE
) {
3807 filebytes
-= HFS_BIGFILE_SIZE
;
3812 /* Start a transaction, and wipe out as many blocks as we can in this iteration */
3813 if (hfs_start_transaction(hfsmp
) != 0) {
3818 if (datafork
->ff_unallocblocks
== 0) {
3819 /* Protect extents b-tree and allocation bitmap */
3820 lockflags
= SFL_BITMAP
;
3821 if (overflow_extents(datafork
))
3822 lockflags
|= SFL_EXTENTS
;
3823 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
3825 error
= MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp
), datafork
, filebytes
, 1, 0, fileid
, false));
3827 hfs_systemfile_unlock(hfsmp
, lockflags
);
3829 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
3831 struct cnode
*cp
= datafork
? FTOC(datafork
) : NULL
;
3833 vp
= cp
? CTOV(cp
, 0) : NULL
;
3834 hfs_hotfile_adjust_blocks(vp
, (int64_t)((prev_filebytes
- filebytes
) / blksize
));
3835 prev_filebytes
= filebytes
;
3837 /* Finish the transaction and start over if necessary */
3838 hfs_end_transaction(hfsmp
);
3847 if (error
== 0 && rsrcfork
) {
3848 rsrcfork
->ff_size
= 0;
3850 fileblocks
= rsrcfork
->ff_blocks
;
3851 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
3853 /* We killed invalid ranges and loaned blocks before we removed the catalog entry */
3855 while (filebytes
> 0) {
3856 if (filebytes
> HFS_BIGFILE_SIZE
) {
3857 filebytes
-= HFS_BIGFILE_SIZE
;
3862 /* Start a transaction, and wipe out as many blocks as we can in this iteration */
3863 if (hfs_start_transaction(hfsmp
) != 0) {
3868 if (rsrcfork
->ff_unallocblocks
== 0) {
3869 /* Protect extents b-tree and allocation bitmap */
3870 lockflags
= SFL_BITMAP
;
3871 if (overflow_extents(rsrcfork
))
3872 lockflags
|= SFL_EXTENTS
;
3873 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
3875 error
= MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp
), rsrcfork
, filebytes
, 1, 1, fileid
, false));
3877 hfs_systemfile_unlock(hfsmp
, lockflags
);
3879 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
3881 /* Finish the transaction and start over if necessary */
3882 hfs_end_transaction(hfsmp
);
3893 errno_t
hfs_ubc_setsize(vnode_t vp
, off_t len
, bool have_cnode_lock
)
3898 * Call ubc_setsize to give the VM subsystem a chance to do
3899 * whatever it needs to with existing pages before we delete
3900 * blocks. Note that symlinks don't use the UBC so we'll
3901 * get back ENOENT in that case.
3903 if (have_cnode_lock
) {
3904 error
= ubc_setsize_ex(vp
, len
, UBC_SETSIZE_NO_FS_REENTRY
);
3905 if (error
== EAGAIN
) {
3906 cnode_t
*cp
= VTOC(vp
);
3908 if (cp
->c_truncatelockowner
!= current_thread()) {
3909 #if DEVELOPMENT || DEBUG
3910 panic("hfs: hfs_ubc_setsize called without exclusive truncate lock!");
3912 printf("hfs: hfs_ubc_setsize called without exclusive truncate lock!\n");
3917 error
= ubc_setsize_ex(vp
, len
, 0);
3918 hfs_lock_always(cp
, HFS_EXCLUSIVE_LOCK
);
3921 error
= ubc_setsize_ex(vp
, len
, 0);
3923 return error
== ENOENT
? 0 : error
;
3927 * Truncate a cnode to at most length size, freeing (or adding) the
3931 hfs_truncate(struct vnode
*vp
, off_t length
, int flags
,
3932 int truncateflags
, vfs_context_t context
)
3934 struct filefork
*fp
= VTOF(vp
);
3936 u_int32_t fileblocks
;
3939 struct cnode
*cp
= VTOC(vp
);
3940 hfsmount_t
*hfsmp
= VTOHFS(vp
);
3942 /* Cannot truncate an HFS directory! */
3943 if (vnode_isdir(vp
)) {
3946 /* A swap file cannot change size. */
3947 if (vnode_isswap(vp
) && length
&& !ISSET(flags
, IO_NOAUTH
)) {
3951 blksize
= hfsmp
->blockSize
;
3952 fileblocks
= fp
->ff_blocks
;
3953 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
3955 bool caller_has_cnode_lock
= (cp
->c_lockowner
== current_thread());
3957 error
= hfs_ubc_setsize(vp
, length
, caller_has_cnode_lock
);
3961 if (!caller_has_cnode_lock
) {
3962 error
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
3967 // have to loop truncating or growing files that are
3968 // really big because otherwise transactions can get
3969 // enormous and consume too many kernel resources.
3971 if (length
< filebytes
) {
3972 while (filebytes
> length
) {
3973 if ((filebytes
- length
) > HFS_BIGFILE_SIZE
) {
3974 filebytes
-= HFS_BIGFILE_SIZE
;
3978 error
= do_hfs_truncate(vp
, filebytes
, flags
, truncateflags
, context
);
3982 } else if (length
> filebytes
) {
3983 kauth_cred_t cred
= vfs_context_ucred(context
);
3984 const bool keep_reserve
= cred
&& suser(cred
, NULL
) != 0;
3986 if (hfs_freeblks(hfsmp
, keep_reserve
)
3987 < howmany(length
- filebytes
, blksize
)) {
3990 while (filebytes
< length
) {
3991 if ((length
- filebytes
) > HFS_BIGFILE_SIZE
) {
3992 filebytes
+= HFS_BIGFILE_SIZE
;
3996 error
= do_hfs_truncate(vp
, filebytes
, flags
, truncateflags
, context
);
4001 } else /* Same logical size */ {
4003 error
= do_hfs_truncate(vp
, length
, flags
, truncateflags
, context
);
4005 /* Files that are changing size are not hot file candidates. */
4006 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
) {
4007 fp
->ff_bytesread
= 0;
4011 if (!caller_has_cnode_lock
)
4014 // Make sure UBC's size matches up (in case we didn't completely succeed)
4015 errno_t err2
= hfs_ubc_setsize(vp
, fp
->ff_size
, caller_has_cnode_lock
);
4024 * Preallocate file storage space.
4027 hfs_vnop_allocate(struct vnop_allocate_args
/* {
4031 off_t *a_bytesallocated;
4033 vfs_context_t a_context;
4036 struct vnode
*vp
= ap
->a_vp
;
4038 struct filefork
*fp
;
4040 off_t length
= ap
->a_length
;
4042 off_t moreBytesRequested
;
4043 off_t actualBytesAdded
;
4045 u_int32_t fileblocks
;
4046 int retval
, retval2
;
4047 u_int32_t blockHint
;
4048 u_int32_t extendFlags
; /* For call to ExtendFileC */
4049 struct hfsmount
*hfsmp
;
4050 kauth_cred_t cred
= vfs_context_ucred(ap
->a_context
);
4054 *(ap
->a_bytesallocated
) = 0;
4056 if (!vnode_isreg(vp
))
4058 if (length
< (off_t
)0)
4063 orig_ctime
= VTOC(vp
)->c_ctime
;
4065 check_for_tracked_file(vp
, orig_ctime
, ap
->a_length
== 0 ? NAMESPACE_HANDLER_TRUNCATE_OP
|NAMESPACE_HANDLER_DELETE_OP
: NAMESPACE_HANDLER_TRUNCATE_OP
, NULL
);
4067 hfs_lock_truncate(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
4069 if ((retval
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
))) {
4077 fileblocks
= fp
->ff_blocks
;
4078 filebytes
= (off_t
)fileblocks
* (off_t
)vcb
->blockSize
;
4080 if ((ap
->a_flags
& ALLOCATEFROMVOL
) && (length
< filebytes
)) {
4085 /* Fill in the flags word for the call to Extend the file */
4087 extendFlags
= kEFNoClumpMask
;
4088 if (ap
->a_flags
& ALLOCATECONTIG
)
4089 extendFlags
|= kEFContigMask
;
4090 if (ap
->a_flags
& ALLOCATEALL
)
4091 extendFlags
|= kEFAllMask
;
4092 if (cred
&& suser(cred
, NULL
) != 0)
4093 extendFlags
|= kEFReserveMask
;
4094 if (hfs_virtualmetafile(cp
))
4095 extendFlags
|= kEFMetadataMask
;
4099 startingPEOF
= filebytes
;
4101 if (ap
->a_flags
& ALLOCATEFROMPEOF
)
4102 length
+= filebytes
;
4103 else if (ap
->a_flags
& ALLOCATEFROMVOL
)
4104 blockHint
= ap
->a_offset
/ VTOVCB(vp
)->blockSize
;
4106 /* If no changes are necesary, then we're done */
4107 if (filebytes
== length
)
4111 * Lengthen the size of the file. We must ensure that the
4112 * last byte of the file is allocated. Since the smallest
4113 * value of filebytes is 0, length will be at least 1.
4115 if (length
> filebytes
) {
4116 if (ISSET(extendFlags
, kEFAllMask
)
4117 && (hfs_freeblks(hfsmp
, ISSET(extendFlags
, kEFReserveMask
))
4118 < howmany(length
- filebytes
, hfsmp
->blockSize
))) {
4123 off_t total_bytes_added
= 0, orig_request_size
;
4125 orig_request_size
= moreBytesRequested
= length
- filebytes
;
4128 retval
= hfs_chkdq(cp
,
4129 (int64_t)(roundup(moreBytesRequested
, vcb
->blockSize
)),
4136 * Metadata zone checks.
4138 if (hfsmp
->hfs_flags
& HFS_METADATA_ZONE
) {
4140 * Allocate Journal and Quota files in metadata zone.
4142 if (hfs_virtualmetafile(cp
)) {
4143 blockHint
= hfsmp
->hfs_metazone_start
;
4144 } else if ((blockHint
>= hfsmp
->hfs_metazone_start
) &&
4145 (blockHint
<= hfsmp
->hfs_metazone_end
)) {
4147 * Move blockHint outside metadata zone.
4149 blockHint
= hfsmp
->hfs_metazone_end
+ 1;
4154 while ((length
> filebytes
) && (retval
== E_NONE
)) {
4155 off_t bytesRequested
;
4157 if (hfs_start_transaction(hfsmp
) != 0) {
4162 /* Protect extents b-tree and allocation bitmap */
4163 lockflags
= SFL_BITMAP
;
4164 if (overflow_extents(fp
))
4165 lockflags
|= SFL_EXTENTS
;
4166 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
4168 if (moreBytesRequested
>= HFS_BIGFILE_SIZE
) {
4169 bytesRequested
= HFS_BIGFILE_SIZE
;
4171 bytesRequested
= moreBytesRequested
;
4174 if (extendFlags
& kEFContigMask
) {
4175 // if we're on a sparse device, this will force it to do a
4176 // full scan to find the space needed.
4177 hfsmp
->hfs_flags
&= ~HFS_DID_CONTIG_SCAN
;
4180 retval
= MacToVFSError(ExtendFileC(vcb
,
4185 &actualBytesAdded
));
4187 if (retval
== E_NONE
) {
4188 *(ap
->a_bytesallocated
) += actualBytesAdded
;
4189 total_bytes_added
+= actualBytesAdded
;
4190 moreBytesRequested
-= actualBytesAdded
;
4191 if (blockHint
!= 0) {
4192 blockHint
+= actualBytesAdded
/ vcb
->blockSize
;
4195 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)vcb
->blockSize
;
4197 hfs_systemfile_unlock(hfsmp
, lockflags
);
4200 (void) hfs_update(vp
, 0);
4201 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
4204 hfs_end_transaction(hfsmp
);
4209 * if we get an error and no changes were made then exit
4210 * otherwise we must do the hfs_update to reflect the changes
4212 if (retval
&& (startingPEOF
== filebytes
))
4216 * Adjust actualBytesAdded to be allocation block aligned, not
4217 * clump size aligned.
4218 * NOTE: So what we are reporting does not affect reality
4219 * until the file is closed, when we truncate the file to allocation
4222 if (total_bytes_added
!= 0 && orig_request_size
< total_bytes_added
)
4223 *(ap
->a_bytesallocated
) =
4224 roundup(orig_request_size
, (off_t
)vcb
->blockSize
);
4226 } else { /* Shorten the size of the file */
4229 * N.B. At present, this code is never called. If and when we
4230 * do start using it, it looks like there might be slightly
4231 * strange semantics with the file size: it's possible for the
4232 * file size to *increase* e.g. if current file size is 5,
4233 * length is 1024 and filebytes is 4096, the file size will
4234 * end up being 1024 bytes. This isn't necessarily a problem
4235 * but it's not consistent with the code above which doesn't
4236 * change the file size.
4239 retval
= hfs_truncate(vp
, length
, 0, 0, ap
->a_context
);
4240 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)vcb
->blockSize
;
4243 * if we get an error and no changes were made then exit
4244 * otherwise we must do the hfs_update to reflect the changes
4246 if (retval
&& (startingPEOF
== filebytes
)) goto Err_Exit
;
4248 /* These are bytesreleased */
4249 (void) hfs_chkdq(cp
, (int64_t)-((startingPEOF
- filebytes
)), NOCRED
,0);
4252 if (fp
->ff_size
> filebytes
) {
4253 fp
->ff_size
= filebytes
;
4255 hfs_ubc_setsize(vp
, fp
->ff_size
, true);
4260 cp
->c_flag
|= C_MODIFIED
;
4261 cp
->c_touch_chgtime
= TRUE
;
4262 cp
->c_touch_modtime
= TRUE
;
4263 retval2
= hfs_update(vp
, 0);
4268 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
4275 * Pagein for HFS filesystem
4278 hfs_vnop_pagein(struct vnop_pagein_args
*ap
)
4280 struct vnop_pagein_args {
4283 vm_offset_t a_pl_offset,
4287 vfs_context_t a_context;
4293 struct filefork
*fp
;
4296 upl_page_info_t
*pl
;
4298 off_t page_needed_f_offset
;
4303 boolean_t truncate_lock_held
= FALSE
;
4304 boolean_t file_converted
= FALSE
;
4312 if ((error
= cp_handle_vnop(vp
, CP_READ_ACCESS
| CP_WRITE_ACCESS
, 0)) != 0) {
4314 * If we errored here, then this means that one of two things occurred:
4315 * 1. there was a problem with the decryption of the key.
4316 * 2. the device is locked and we are not allowed to access this particular file.
4318 * Either way, this means that we need to shut down this upl now. As long as
4319 * the pl pointer is NULL (meaning that we're supposed to create the UPL ourselves)
4320 * then we create a upl and immediately abort it.
4322 if (ap
->a_pl
== NULL
) {
4323 /* create the upl */
4324 ubc_create_upl (vp
, ap
->a_f_offset
, ap
->a_size
, &upl
, &pl
,
4325 UPL_UBC_PAGEIN
| UPL_RET_ONLY_ABSENT
);
4326 /* mark the range as needed so it doesn't immediately get discarded upon abort */
4327 ubc_upl_range_needed (upl
, ap
->a_pl_offset
/ PAGE_SIZE
, 1);
4329 /* Abort the range */
4330 ubc_upl_abort_range (upl
, 0, ap
->a_size
, UPL_ABORT_FREE_ON_EMPTY
| UPL_ABORT_ERROR
);
4336 #endif /* CONFIG_PROTECT */
4338 if (ap
->a_pl
!= NULL
) {
4340 * this can only happen for swap files now that
4341 * we're asking for V2 paging behavior...
4342 * so don't need to worry about decompression, or
4343 * keeping track of blocks read or taking the truncate lock
4345 error
= cluster_pagein(vp
, ap
->a_pl
, ap
->a_pl_offset
, ap
->a_f_offset
,
4346 ap
->a_size
, (off_t
)fp
->ff_size
, ap
->a_flags
);
4350 page_needed_f_offset
= ap
->a_f_offset
+ ap
->a_pl_offset
;
4354 * take truncate lock (shared/recursive) to guard against
4355 * zero-fill thru fsync interfering, but only for v2
4357 * the HFS_RECURSE_TRUNCLOCK arg indicates that we want the
4358 * lock shared and we are allowed to recurse 1 level if this thread already
4359 * owns the lock exclusively... this can legally occur
4360 * if we are doing a shrinking ftruncate against a file
4361 * that is mapped private, and the pages being truncated
4362 * do not currently exist in the cache... in that case
4363 * we will have to page-in the missing pages in order
4364 * to provide them to the private mapping... we must
4365 * also call hfs_unlock_truncate with a postive been_recursed
4366 * arg to indicate that if we have recursed, there is no need to drop
4367 * the lock. Allowing this simple recursion is necessary
4368 * in order to avoid a certain deadlock... since the ftruncate
4369 * already holds the truncate lock exclusively, if we try
4370 * to acquire it shared to protect the pagein path, we will
4373 * NOTE: The if () block below is a workaround in order to prevent a
4374 * VM deadlock. See rdar://7853471.
4376 * If we are in a forced unmount, then launchd will still have the
4377 * dyld_shared_cache file mapped as it is trying to reboot. If we
4378 * take the truncate lock here to service a page fault, then our
4379 * thread could deadlock with the forced-unmount. The forced unmount
4380 * thread will try to reclaim the dyld_shared_cache vnode, but since it's
4381 * marked C_DELETED, it will call ubc_setsize(0). As a result, the unmount
4382 * thread will think it needs to copy all of the data out of the file
4383 * and into a VM copy object. If we hold the cnode lock here, then that
4384 * VM operation will not be able to proceed, because we'll set a busy page
4385 * before attempting to grab the lock. Note that this isn't as simple as "don't
4386 * call ubc_setsize" because doing that would just shift the problem to the
4387 * ubc_msync done before the vnode is reclaimed.
4389 * So, if a forced unmount on this volume is in flight AND the cnode is
4390 * marked C_DELETED, then just go ahead and do the page in without taking
4391 * the lock (thus suspending pagein_v2 semantics temporarily). Since it's on a file
4392 * that is not going to be available on the next mount, this seems like a
4393 * OK solution from a correctness point of view, even though it is hacky.
4395 if (vfs_isforce(vp
->v_mount
)) {
4396 if (cp
->c_flag
& C_DELETED
) {
4397 /* If we don't get it, then just go ahead and operate without the lock */
4398 truncate_lock_held
= hfs_try_trunclock(cp
, HFS_SHARED_LOCK
, HFS_LOCK_SKIP_IF_EXCLUSIVE
);
4402 hfs_lock_truncate(cp
, HFS_SHARED_LOCK
, HFS_LOCK_SKIP_IF_EXCLUSIVE
);
4403 truncate_lock_held
= TRUE
;
4406 kret
= ubc_create_upl(vp
, ap
->a_f_offset
, ap
->a_size
, &upl
, &pl
, UPL_UBC_PAGEIN
| UPL_RET_ONLY_ABSENT
);
4408 if ((kret
!= KERN_SUCCESS
) || (upl
== (upl_t
) NULL
)) {
4412 ubc_upl_range_needed(upl
, ap
->a_pl_offset
/ PAGE_SIZE
, 1);
4414 upl_size
= isize
= ap
->a_size
;
4417 * Scan from the back to find the last page in the UPL, so that we
4418 * aren't looking at a UPL that may have already been freed by the
4419 * preceding aborts/completions.
4421 for (pg_index
= ((isize
) / PAGE_SIZE
); pg_index
> 0;) {
4422 if (upl_page_present(pl
, --pg_index
))
4424 if (pg_index
== 0) {
4426 * no absent pages were found in the range specified
4427 * just abort the UPL to get rid of it and then we're done
4429 ubc_upl_abort_range(upl
, 0, isize
, UPL_ABORT_FREE_ON_EMPTY
);
4434 * initialize the offset variables before we touch the UPL.
4435 * f_offset is the position into the file, in bytes
4436 * offset is the position into the UPL, in bytes
4437 * pg_index is the pg# of the UPL we're operating on
4438 * isize is the offset into the UPL of the last page that is present.
4440 isize
= ((pg_index
+ 1) * PAGE_SIZE
);
4443 f_offset
= ap
->a_f_offset
;
4449 if ( !upl_page_present(pl
, pg_index
)) {
4451 * we asked for RET_ONLY_ABSENT, so it's possible
4452 * to get back empty slots in the UPL.
4453 * just skip over them
4455 f_offset
+= PAGE_SIZE
;
4456 offset
+= PAGE_SIZE
;
4463 * We know that we have at least one absent page.
4464 * Now checking to see how many in a row we have
4467 xsize
= isize
- PAGE_SIZE
;
4470 if ( !upl_page_present(pl
, pg_index
+ num_of_pages
))
4475 xsize
= num_of_pages
* PAGE_SIZE
;
4478 if (VNODE_IS_RSRC(vp
)) {
4479 /* allow pageins of the resource fork */
4481 int compressed
= hfs_file_is_compressed(VTOC(vp
), 1); /* 1 == don't take the cnode lock */
4485 if (truncate_lock_held
) {
4487 * can't hold the truncate lock when calling into the decmpfs layer
4488 * since it calls back into this layer... even though we're only
4489 * holding the lock in shared mode, and the re-entrant path only
4490 * takes the lock shared, we can deadlock if some other thread
4491 * tries to grab the lock exclusively in between.
4493 hfs_unlock_truncate(cp
, HFS_LOCK_SKIP_IF_EXCLUSIVE
);
4494 truncate_lock_held
= FALSE
;
4497 ap
->a_pl_offset
= offset
;
4498 ap
->a_f_offset
= f_offset
;
4501 error
= decmpfs_pagein_compressed(ap
, &compressed
, VTOCMP(vp
));
4503 * note that decpfs_pagein_compressed can change the state of
4504 * 'compressed'... it will set it to 0 if the file is no longer
4505 * compressed once the compression lock is successfully taken
4506 * i.e. we would block on that lock while the file is being inflated
4508 if (error
== 0 && vnode_isfastdevicecandidate(vp
)) {
4509 (void) hfs_addhotfile(vp
);
4513 /* successful page-in, update the access time */
4514 VTOC(vp
)->c_touch_acctime
= TRUE
;
4517 // compressed files are not traditional hot file candidates
4518 // but they may be for CF (which ignores the ff_bytesread
4521 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
) {
4522 fp
->ff_bytesread
= 0;
4524 } else if (error
== EAGAIN
) {
4526 * EAGAIN indicates someone else already holds the compression lock...
4527 * to avoid deadlocking, we'll abort this range of pages with an
4528 * indication that the pagein needs to be redriven
4530 ubc_upl_abort_range(upl
, (upl_offset_t
) offset
, xsize
, UPL_ABORT_FREE_ON_EMPTY
| UPL_ABORT_RESTART
);
4531 } else if (error
== ENOSPC
) {
4533 if (upl_size
== PAGE_SIZE
)
4534 panic("decmpfs_pagein_compressed: couldn't ubc_upl_map a single page\n");
4536 ubc_upl_abort_range(upl
, (upl_offset_t
) offset
, isize
, UPL_ABORT_FREE_ON_EMPTY
);
4538 ap
->a_size
= PAGE_SIZE
;
4540 ap
->a_pl_offset
= 0;
4541 ap
->a_f_offset
= page_needed_f_offset
;
4545 goto pagein_next_range
;
4549 * Set file_converted only if the file became decompressed while we were
4550 * paging in. If it were still compressed, we would re-start the loop using the goto
4551 * in the above block. This avoid us overloading truncate_lock_held as our retry_pagein
4552 * condition below, since we could have avoided taking the truncate lock to prevent
4553 * a deadlock in the force unmount case.
4555 file_converted
= TRUE
;
4558 if (file_converted
== TRUE
) {
4560 * the file was converted back to a regular file after we first saw it as compressed
4561 * we need to abort the upl, retake the truncate lock, recreate the UPL and start over
4562 * reset a_size so that we consider what remains of the original request
4563 * and null out a_upl and a_pl_offset.
4565 * We should only be able to get into this block if the decmpfs_pagein_compressed
4566 * successfully decompressed the range in question for this file.
4568 ubc_upl_abort_range(upl
, (upl_offset_t
) offset
, isize
, UPL_ABORT_FREE_ON_EMPTY
);
4572 ap
->a_pl_offset
= 0;
4574 /* Reset file_converted back to false so that we don't infinite-loop. */
4575 file_converted
= FALSE
;
4580 error
= cluster_pagein(vp
, upl
, offset
, f_offset
, xsize
, (off_t
)fp
->ff_size
, ap
->a_flags
);
4583 * Keep track of blocks read.
4585 if ( !vnode_isswap(vp
) && VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
&& error
== 0) {
4587 int took_cnode_lock
= 0;
4589 if (ap
->a_f_offset
== 0 && fp
->ff_size
< PAGE_SIZE
)
4590 bytesread
= fp
->ff_size
;
4594 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
4595 if ((fp
->ff_bytesread
+ bytesread
) > 0x00000000ffffffff && cp
->c_lockowner
!= current_thread()) {
4596 hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
4597 took_cnode_lock
= 1;
4600 * If this file hasn't been seen since the start of
4601 * the current sampling period then start over.
4603 if (cp
->c_atime
< VTOHFS(vp
)->hfc_timebase
) {
4606 fp
->ff_bytesread
= bytesread
;
4608 cp
->c_atime
= tv
.tv_sec
;
4610 fp
->ff_bytesread
+= bytesread
;
4612 cp
->c_touch_acctime
= TRUE
;
4614 if (vnode_isfastdevicecandidate(vp
)) {
4615 (void) hfs_addhotfile(vp
);
4617 if (took_cnode_lock
)
4624 pg_index
+= num_of_pages
;
4630 if (truncate_lock_held
== TRUE
) {
4631 /* Note 1 is passed to hfs_unlock_truncate in been_recursed argument */
4632 hfs_unlock_truncate(cp
, HFS_LOCK_SKIP_IF_EXCLUSIVE
);
4639 * Pageout for HFS filesystem.
4642 hfs_vnop_pageout(struct vnop_pageout_args
*ap
)
4644 struct vnop_pageout_args {
4647 vm_offset_t a_pl_offset,
4651 vfs_context_t a_context;
4655 vnode_t vp
= ap
->a_vp
;
4657 struct filefork
*fp
;
4661 upl_page_info_t
* pl
;
4662 vm_offset_t a_pl_offset
;
4664 int is_pageoutv2
= 0;
4670 a_flags
= ap
->a_flags
;
4671 a_pl_offset
= ap
->a_pl_offset
;
4674 * we can tell if we're getting the new or old behavior from the UPL
4676 if ((upl
= ap
->a_pl
) == NULL
) {
4681 * we're in control of any UPL we commit
4682 * make sure someone hasn't accidentally passed in UPL_NOCOMMIT
4684 a_flags
&= ~UPL_NOCOMMIT
;
4688 * For V2 semantics, we want to take the cnode truncate lock
4689 * shared to guard against the file size changing via zero-filling.
4691 * However, we have to be careful because we may be invoked
4692 * via the ubc_msync path to write out dirty mmap'd pages
4693 * in response to a lock event on a content-protected
4694 * filesystem (e.g. to write out class A files).
4695 * As a result, we want to take the truncate lock 'SHARED' with
4696 * the mini-recursion locktype so that we don't deadlock/panic
4697 * because we may be already holding the truncate lock exclusive to force any other
4698 * IOs to have blocked behind us.
4700 hfs_lock_truncate(cp
, HFS_SHARED_LOCK
, HFS_LOCK_SKIP_IF_EXCLUSIVE
);
4702 if (a_flags
& UPL_MSYNC
) {
4703 request_flags
= UPL_UBC_MSYNC
| UPL_RET_ONLY_DIRTY
;
4706 request_flags
= UPL_UBC_PAGEOUT
| UPL_RET_ONLY_DIRTY
;
4709 kret
= ubc_create_upl(vp
, ap
->a_f_offset
, ap
->a_size
, &upl
, &pl
, request_flags
);
4711 if ((kret
!= KERN_SUCCESS
) || (upl
== (upl_t
) NULL
)) {
4717 * from this point forward upl points at the UPL we're working with
4718 * it was either passed in or we succesfully created it
4722 * Figure out where the file ends, for pageout purposes. If
4723 * ff_new_size > ff_size, then we're in the middle of extending the
4724 * file via a write, so it is safe (and necessary) that we be able
4725 * to pageout up to that point.
4727 filesize
= fp
->ff_size
;
4728 if (fp
->ff_new_size
> filesize
)
4729 filesize
= fp
->ff_new_size
;
4732 * Now that HFS is opting into VFC_VFSVNOP_PAGEOUTV2, we may need to operate on our own
4733 * UPL instead of relying on the UPL passed into us. We go ahead and do that here,
4734 * scanning for dirty ranges. We'll issue our own N cluster_pageout calls, for
4735 * N dirty ranges in the UPL. Note that this is almost a direct copy of the
4736 * logic in vnode_pageout except that we need to do it after grabbing the truncate
4737 * lock in HFS so that we don't lock invert ourselves.
4739 * Note that we can still get into this function on behalf of the default pager with
4740 * non-V2 behavior (swapfiles). However in that case, we did not grab locks above
4741 * since fsync and other writing threads will grab the locks, then mark the
4742 * relevant pages as busy. But the pageout codepath marks the pages as busy,
4743 * and THEN would attempt to grab the truncate lock, which would result in deadlock. So
4744 * we do not try to grab anything for the pre-V2 case, which should only be accessed
4745 * by the paging/VM system.
4757 f_offset
= ap
->a_f_offset
;
4760 * Scan from the back to find the last page in the UPL, so that we
4761 * aren't looking at a UPL that may have already been freed by the
4762 * preceding aborts/completions.
4764 for (pg_index
= ((isize
) / PAGE_SIZE
); pg_index
> 0;) {
4765 if (upl_page_present(pl
, --pg_index
))
4767 if (pg_index
== 0) {
4768 ubc_upl_abort_range(upl
, 0, isize
, UPL_ABORT_FREE_ON_EMPTY
);
4774 * initialize the offset variables before we touch the UPL.
4775 * a_f_offset is the position into the file, in bytes
4776 * offset is the position into the UPL, in bytes
4777 * pg_index is the pg# of the UPL we're operating on.
4778 * isize is the offset into the UPL of the last non-clean page.
4780 isize
= ((pg_index
+ 1) * PAGE_SIZE
);
4789 if ( !upl_page_present(pl
, pg_index
)) {
4791 * we asked for RET_ONLY_DIRTY, so it's possible
4792 * to get back empty slots in the UPL.
4793 * just skip over them
4795 f_offset
+= PAGE_SIZE
;
4796 offset
+= PAGE_SIZE
;
4802 if ( !upl_dirty_page(pl
, pg_index
)) {
4803 panic ("hfs_vnop_pageout: unforeseen clean page @ index %d for UPL %p\n", pg_index
, upl
);
4807 * We know that we have at least one dirty page.
4808 * Now checking to see how many in a row we have
4811 xsize
= isize
- PAGE_SIZE
;
4814 if ( !upl_dirty_page(pl
, pg_index
+ num_of_pages
))
4819 xsize
= num_of_pages
* PAGE_SIZE
;
4821 if ((error
= cluster_pageout(vp
, upl
, offset
, f_offset
,
4822 xsize
, filesize
, a_flags
))) {
4829 pg_index
+= num_of_pages
;
4831 /* capture errnos bubbled out of cluster_pageout if they occurred */
4832 if (error_ret
!= 0) {
4835 } /* end block for v2 pageout behavior */
4838 * just call cluster_pageout for old pre-v2 behavior
4840 retval
= cluster_pageout(vp
, upl
, a_pl_offset
, ap
->a_f_offset
,
4841 ap
->a_size
, filesize
, a_flags
);
4845 * If data was written, update the modification time of the file
4846 * but only if it's mapped writable; we will have touched the
4847 * modifcation time for direct writes.
4849 if (retval
== 0 && (ubc_is_mapped_writable(vp
)
4850 || ISSET(cp
->c_flag
, C_MIGHT_BE_DIRTY_FROM_MAPPING
))) {
4851 hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
4853 // Check again with lock
4854 bool mapped_writable
= ubc_is_mapped_writable(vp
);
4856 || ISSET(cp
->c_flag
, C_MIGHT_BE_DIRTY_FROM_MAPPING
)) {
4857 cp
->c_touch_modtime
= TRUE
;
4858 cp
->c_touch_chgtime
= TRUE
;
4861 * We only need to increment the generation counter if
4862 * it's currently mapped writable because we incremented
4863 * the counter in hfs_vnop_mnomap.
4865 if (mapped_writable
)
4866 hfs_incr_gencount(VTOC(vp
));
4869 * If setuid or setgid bits are set and this process is
4870 * not the superuser then clear the setuid and setgid bits
4871 * as a precaution against tampering.
4873 if ((cp
->c_mode
& (S_ISUID
| S_ISGID
)) &&
4874 (vfs_context_suser(ap
->a_context
) != 0)) {
4875 cp
->c_mode
&= ~(S_ISUID
| S_ISGID
);
4885 * Release the truncate lock. Note that because
4886 * we may have taken the lock recursively by
4887 * being invoked via ubc_msync due to lockdown,
4888 * we should release it recursively, too.
4890 hfs_unlock_truncate(cp
, HFS_LOCK_SKIP_IF_EXCLUSIVE
);
4896 * Intercept B-Tree node writes to unswap them if necessary.
4899 hfs_vnop_bwrite(struct vnop_bwrite_args
*ap
)
4902 register struct buf
*bp
= ap
->a_bp
;
4903 register struct vnode
*vp
= buf_vnode(bp
);
4904 BlockDescriptor block
;
4906 /* Trap B-Tree writes */
4907 if ((VTOC(vp
)->c_fileid
== kHFSExtentsFileID
) ||
4908 (VTOC(vp
)->c_fileid
== kHFSCatalogFileID
) ||
4909 (VTOC(vp
)->c_fileid
== kHFSAttributesFileID
) ||
4910 (vp
== VTOHFS(vp
)->hfc_filevp
)) {
4913 * Swap and validate the node if it is in native byte order.
4914 * This is always be true on big endian, so we always validate
4915 * before writing here. On little endian, the node typically has
4916 * been swapped and validated when it was written to the journal,
4917 * so we won't do anything here.
4919 if (((u_int16_t
*)((char *)buf_dataptr(bp
) + buf_count(bp
) - 2))[0] == 0x000e) {
4920 /* Prepare the block pointer */
4921 block
.blockHeader
= bp
;
4922 block
.buffer
= (char *)buf_dataptr(bp
);
4923 block
.blockNum
= buf_lblkno(bp
);
4924 /* not found in cache ==> came from disk */
4925 block
.blockReadFromDisk
= (buf_fromcache(bp
) == 0);
4926 block
.blockSize
= buf_count(bp
);
4928 /* Endian un-swap B-Tree node */
4929 retval
= hfs_swap_BTNode (&block
, vp
, kSwapBTNodeHostToBig
, false);
4931 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
4935 /* This buffer shouldn't be locked anymore but if it is clear it */
4936 if ((buf_flags(bp
) & B_LOCKED
)) {
4938 if (VTOHFS(vp
)->jnl
) {
4939 panic("hfs: CLEARING the lock bit on bp %p\n", bp
);
4941 buf_clearflags(bp
, B_LOCKED
);
4943 retval
= vn_bwrite (ap
);
4950 hfs_pin_block_range(struct hfsmount
*hfsmp
, int pin_state
, uint32_t start_block
, uint32_t nblocks
, vfs_context_t ctx
)
4956 memset(&pin
, 0, sizeof(pin
));
4957 pin
.cp_extent
.offset
= ((uint64_t)start_block
) * HFSTOVCB(hfsmp
)->blockSize
;
4958 pin
.cp_extent
.length
= ((uint64_t)nblocks
) * HFSTOVCB(hfsmp
)->blockSize
;
4959 switch (pin_state
) {
4961 ioc
= _DKIOCCSPINEXTENT
;
4962 pin
.cp_flags
= _DKIOCCSPINTOFASTMEDIA
;
4964 case HFS_PIN_IT
| HFS_TEMP_PIN
:
4965 ioc
= _DKIOCCSPINEXTENT
;
4966 pin
.cp_flags
= _DKIOCCSPINTOFASTMEDIA
| _DKIOCCSTEMPORARYPIN
;
4968 case HFS_PIN_IT
| HFS_DATALESS_PIN
:
4969 ioc
= _DKIOCCSPINEXTENT
;
4970 pin
.cp_flags
= _DKIOCCSPINTOFASTMEDIA
| _DKIOCCSPINFORSWAPFILE
;
4973 ioc
= _DKIOCCSUNPINEXTENT
;
4976 case HFS_UNPIN_IT
| HFS_EVICT_PIN
:
4977 ioc
= _DKIOCCSPINEXTENT
;
4978 pin
.cp_flags
= _DKIOCCSPINTOSLOWMEDIA
;
4983 err
= VNOP_IOCTL(hfsmp
->hfs_devvp
, ioc
, (caddr_t
)&pin
, 0, ctx
);
4988 // The cnode lock should already be held on entry to this function
4991 hfs_pin_vnode(struct hfsmount
*hfsmp
, struct vnode
*vp
, int pin_state
, uint32_t *num_blocks_pinned
, vfs_context_t ctx
)
4993 struct filefork
*fp
= VTOF(vp
);
4994 int i
, err
=0, need_put
=0;
4995 struct vnode
*rsrc_vp
=NULL
;
4996 uint32_t npinned
= 0;
4999 if (num_blocks_pinned
) {
5000 *num_blocks_pinned
= 0;
5003 if (vnode_vtype(vp
) != VREG
) {
5004 /* Not allowed to pin directories or symlinks */
5005 printf("hfs: can't pin vnode of type %d\n", vnode_vtype(vp
));
5009 if (fp
->ff_unallocblocks
) {
5010 printf("hfs: can't pin a vnode w/unalloced blocks (%d)\n", fp
->ff_unallocblocks
);
5015 * It is possible that if the caller unlocked/re-locked the cnode after checking
5016 * for C_NOEXISTS|C_DELETED that the file could have been deleted while the
5017 * cnode was unlocked. So check the condition again and return ENOENT so that
5018 * the caller knows why we failed to pin the vnode.
5020 if (VTOC(vp
)->c_flag
& (C_NOEXISTS
|C_DELETED
)) {
5021 // makes no sense to pin something that's pending deletion
5025 if (fp
->ff_blocks
== 0 && (VTOC(vp
)->c_bsdflags
& UF_COMPRESSED
)) {
5026 if (!VNODE_IS_RSRC(vp
) && hfs_vgetrsrc(hfsmp
, vp
, &rsrc_vp
) == 0) {
5027 //printf("hfs: fileid %d resource fork nblocks: %d / size: %lld\n", VTOC(vp)->c_fileid,
5028 // VTOC(rsrc_vp)->c_rsrcfork->ff_blocks,VTOC(rsrc_vp)->c_rsrcfork->ff_size);
5030 fp
= VTOC(rsrc_vp
)->c_rsrcfork
;
5034 if (fp
->ff_blocks
== 0) {
5037 // use a distinct error code for a compressed file that has no resource fork;
5038 // we return EALREADY to indicate that the data is already probably hot file
5039 // cached because it's in an EA and the attributes btree is on the ssd
5049 for (i
= 0; i
< kHFSPlusExtentDensity
; i
++) {
5050 if (fp
->ff_extents
[i
].startBlock
== 0) {
5054 err
= hfs_pin_block_range(hfsmp
, pin_state
, fp
->ff_extents
[i
].startBlock
, fp
->ff_extents
[i
].blockCount
, ctx
);
5058 npinned
+= fp
->ff_extents
[i
].blockCount
;
5062 if (err
|| npinned
== 0) {
5066 if (fp
->ff_extents
[kHFSPlusExtentDensity
-1].startBlock
) {
5068 uint8_t forktype
= 0;
5070 if (fp
== VTOC(vp
)->c_rsrcfork
) {
5074 * The file could have overflow extents, better pin them.
5076 * We assume that since we are holding the cnode lock for this cnode,
5077 * the files extents cannot be manipulated, but the tree could, so we
5078 * need to ensure that it doesn't change behind our back as we iterate it.
5080 int lockflags
= hfs_systemfile_lock (hfsmp
, SFL_EXTENTS
, HFS_SHARED_LOCK
);
5081 err
= hfs_pin_overflow_extents(hfsmp
, VTOC(vp
)->c_fileid
, forktype
, &pblocks
);
5082 hfs_systemfile_unlock (hfsmp
, lockflags
);
5091 if (num_blocks_pinned
) {
5092 *num_blocks_pinned
= npinned
;
5095 if (need_put
&& rsrc_vp
) {
5097 // have to unlock the cnode since it's shared between the
5098 // resource fork vnode and the data fork vnode (and the
5099 // vnode_put() may need to re-acquire the cnode lock to
5100 // reclaim the resource fork vnode)
5102 hfs_unlock(VTOC(vp
));
5104 hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
5111 * Relocate a file to a new location on disk
5112 * cnode must be locked on entry
5114 * Relocation occurs by cloning the file's data from its
5115 * current set of blocks to a new set of blocks. During
5116 * the relocation all of the blocks (old and new) are
5117 * owned by the file.
5124 * ----------------- -----------------
5125 * |///////////////| | | STEP 1 (acquire new blocks)
5126 * ----------------- -----------------
5129 * ----------------- -----------------
5130 * |///////////////| |///////////////| STEP 2 (clone data)
5131 * ----------------- -----------------
5135 * |///////////////| STEP 3 (head truncate blocks)
5139 * During steps 2 and 3 page-outs to file offsets less
5140 * than or equal to N are suspended.
5142 * During step 3 page-ins to the file get suspended.
5145 hfs_relocate(struct vnode
*vp
, u_int32_t blockHint
, kauth_cred_t cred
,
5149 struct filefork
*fp
;
5150 struct hfsmount
*hfsmp
;
5155 u_int32_t nextallocsave
;
5156 daddr64_t sector_a
, sector_b
;
5161 int took_trunc_lock
= 0;
5163 enum vtype vnodetype
;
5165 vnodetype
= vnode_vtype(vp
);
5166 if (vnodetype
!= VREG
) {
5167 /* Not allowed to move symlinks. */
5172 if (hfsmp
->hfs_flags
& HFS_FRAGMENTED_FREESPACE
) {
5178 if (fp
->ff_unallocblocks
)
5183 * <rdar://problem/9118426>
5184 * Disable HFS file relocation on content-protected filesystems
5186 if (cp_fs_protected (hfsmp
->hfs_mp
)) {
5190 /* If it's an SSD, also disable HFS relocation */
5191 if (hfsmp
->hfs_flags
& HFS_SSD
) {
5196 blksize
= hfsmp
->blockSize
;
5198 blockHint
= hfsmp
->nextAllocation
;
5200 if (fp
->ff_size
> 0x7fffffff) {
5205 // We do not believe that this call to hfs_fsync() is
5206 // necessary and it causes a journal transaction
5207 // deadlock so we are removing it.
5209 //if (vnodetype == VREG && !vnode_issystem(vp)) {
5210 // retval = hfs_fsync(vp, MNT_WAIT, 0, p);
5215 if (!vnode_issystem(vp
) && (vnodetype
!= VLNK
)) {
5217 hfs_lock_truncate(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
5218 /* Force lock since callers expects lock to be held. */
5219 if ((retval
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
))) {
5220 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
5223 /* No need to continue if file was removed. */
5224 if (cp
->c_flag
& C_NOEXISTS
) {
5225 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
5228 took_trunc_lock
= 1;
5230 headblks
= fp
->ff_blocks
;
5231 datablks
= howmany(fp
->ff_size
, blksize
);
5232 growsize
= datablks
* blksize
;
5233 eflags
= kEFContigMask
| kEFAllMask
| kEFNoClumpMask
;
5234 if (blockHint
>= hfsmp
->hfs_metazone_start
&&
5235 blockHint
<= hfsmp
->hfs_metazone_end
)
5236 eflags
|= kEFMetadataMask
;
5238 if (hfs_start_transaction(hfsmp
) != 0) {
5239 if (took_trunc_lock
)
5240 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
5245 * Protect the extents b-tree and the allocation bitmap
5246 * during MapFileBlockC and ExtendFileC operations.
5248 lockflags
= SFL_BITMAP
;
5249 if (overflow_extents(fp
))
5250 lockflags
|= SFL_EXTENTS
;
5251 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
5253 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, 1, growsize
- 1, §or_a
, NULL
);
5255 retval
= MacToVFSError(retval
);
5260 * STEP 1 - acquire new allocation blocks.
5262 nextallocsave
= hfsmp
->nextAllocation
;
5263 retval
= ExtendFileC(hfsmp
, (FCB
*)fp
, growsize
, blockHint
, eflags
, &newbytes
);
5264 if (eflags
& kEFMetadataMask
) {
5265 hfs_lock_mount(hfsmp
);
5266 HFS_UPDATE_NEXT_ALLOCATION(hfsmp
, nextallocsave
);
5267 MarkVCBDirty(hfsmp
);
5268 hfs_unlock_mount(hfsmp
);
5271 retval
= MacToVFSError(retval
);
5273 cp
->c_flag
|= C_MODIFIED
;
5274 if (newbytes
< growsize
) {
5277 } else if (fp
->ff_blocks
< (headblks
+ datablks
)) {
5278 printf("hfs_relocate: allocation failed id=%u, vol=%s\n", cp
->c_cnid
, hfsmp
->vcbVN
);
5283 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, 1, growsize
, §or_b
, NULL
);
5285 retval
= MacToVFSError(retval
);
5286 } else if ((sector_a
+ 1) == sector_b
) {
5289 } else if ((eflags
& kEFMetadataMask
) &&
5290 ((((u_int64_t
)sector_b
* hfsmp
->hfs_logical_block_size
) / blksize
) >
5291 hfsmp
->hfs_metazone_end
)) {
5293 const char * filestr
;
5294 char emptystr
= '\0';
5296 if (cp
->c_desc
.cd_nameptr
!= NULL
) {
5297 filestr
= (const char *)&cp
->c_desc
.cd_nameptr
[0];
5298 } else if (vnode_name(vp
) != NULL
) {
5299 filestr
= vnode_name(vp
);
5301 filestr
= &emptystr
;
5308 /* Done with system locks and journal for now. */
5309 hfs_systemfile_unlock(hfsmp
, lockflags
);
5311 hfs_end_transaction(hfsmp
);
5316 * Check to see if failure is due to excessive fragmentation.
5318 if ((retval
== ENOSPC
) &&
5319 (hfs_freeblks(hfsmp
, 0) > (datablks
* 2))) {
5320 hfsmp
->hfs_flags
|= HFS_FRAGMENTED_FREESPACE
;
5325 * STEP 2 - clone file data into the new allocation blocks.
5328 if (vnodetype
== VLNK
)
5330 else if (vnode_issystem(vp
))
5331 retval
= hfs_clonesysfile(vp
, headblks
, datablks
, blksize
, cred
, p
);
5333 retval
= hfs_clonefile(vp
, headblks
, datablks
, blksize
);
5335 /* Start transaction for step 3 or for a restore. */
5336 if (hfs_start_transaction(hfsmp
) != 0) {
5345 * STEP 3 - switch to cloned data and remove old blocks.
5347 lockflags
= SFL_BITMAP
;
5348 if (overflow_extents(fp
))
5349 lockflags
|= SFL_EXTENTS
;
5350 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
5352 retval
= HeadTruncateFile(hfsmp
, (FCB
*)fp
, headblks
);
5354 hfs_systemfile_unlock(hfsmp
, lockflags
);
5359 if (took_trunc_lock
)
5360 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
5363 hfs_systemfile_unlock(hfsmp
, lockflags
);
5367 /* Push cnode's new extent data to disk. */
5372 if (cp
->c_cnid
< kHFSFirstUserCatalogNodeID
)
5373 (void) hfs_flushvolumeheader(hfsmp
, HFS_FVH_WAIT
| HFS_FVH_WRITE_ALT
);
5375 (void) hfs_flushvolumeheader(hfsmp
, 0);
5379 hfs_end_transaction(hfsmp
);
5384 if (fp
->ff_blocks
== headblks
) {
5385 if (took_trunc_lock
)
5386 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
5390 * Give back any newly allocated space.
5392 if (lockflags
== 0) {
5393 lockflags
= SFL_BITMAP
;
5394 if (overflow_extents(fp
))
5395 lockflags
|= SFL_EXTENTS
;
5396 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
5399 (void) TruncateFileC(hfsmp
, (FCB
*)fp
, fp
->ff_size
, 0, FORK_IS_RSRC(fp
),
5400 FTOC(fp
)->c_fileid
, false);
5402 hfs_systemfile_unlock(hfsmp
, lockflags
);
5405 if (took_trunc_lock
)
5406 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
5412 * Clone a file's data within the file.
5416 hfs_clonefile(struct vnode
*vp
, int blkstart
, int blkcnt
, int blksize
)
5427 writebase
= blkstart
* blksize
;
5428 copysize
= blkcnt
* blksize
;
5429 iosize
= bufsize
= MIN(copysize
, 128 * 1024);
5432 hfs_unlock(VTOC(vp
));
5435 if ((error
= cp_handle_vnop(vp
, CP_WRITE_ACCESS
, 0)) != 0) {
5436 hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
5439 #endif /* CONFIG_PROTECT */
5441 if (kmem_alloc(kernel_map
, (vm_offset_t
*)&bufp
, bufsize
, VM_KERN_MEMORY_FILE
)) {
5442 hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
5446 auio
= uio_create(1, 0, UIO_SYSSPACE
, UIO_READ
);
5448 while (offset
< copysize
) {
5449 iosize
= MIN(copysize
- offset
, iosize
);
5451 uio_reset(auio
, offset
, UIO_SYSSPACE
, UIO_READ
);
5452 uio_addiov(auio
, (uintptr_t)bufp
, iosize
);
5454 error
= cluster_read(vp
, auio
, copysize
, IO_NOCACHE
);
5456 printf("hfs_clonefile: cluster_read failed - %d\n", error
);
5459 if (uio_resid(auio
) != 0) {
5460 printf("hfs_clonefile: cluster_read: uio_resid = %lld\n", (int64_t)uio_resid(auio
));
5465 uio_reset(auio
, writebase
+ offset
, UIO_SYSSPACE
, UIO_WRITE
);
5466 uio_addiov(auio
, (uintptr_t)bufp
, iosize
);
5468 error
= cluster_write(vp
, auio
, writebase
+ offset
,
5469 writebase
+ offset
+ iosize
,
5470 uio_offset(auio
), 0, IO_NOCACHE
| IO_SYNC
);
5472 printf("hfs_clonefile: cluster_write failed - %d\n", error
);
5475 if (uio_resid(auio
) != 0) {
5476 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
5484 if ((blksize
& PAGE_MASK
)) {
5486 * since the copy may not have started on a PAGE
5487 * boundary (or may not have ended on one), we
5488 * may have pages left in the cache since NOCACHE
5489 * will let partially written pages linger...
5490 * lets just flush the entire range to make sure
5491 * we don't have any pages left that are beyond
5492 * (or intersect) the real LEOF of this file
5494 ubc_msync(vp
, writebase
, writebase
+ offset
, NULL
, UBC_INVALIDATE
| UBC_PUSHDIRTY
);
5497 * No need to call ubc_msync or hfs_invalbuf
5498 * since the file was copied using IO_NOCACHE and
5499 * the copy was done starting and ending on a page
5500 * boundary in the file.
5503 kmem_free(kernel_map
, (vm_offset_t
)bufp
, bufsize
);
5505 hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
5510 * Clone a system (metadata) file.
5514 hfs_clonesysfile(struct vnode
*vp
, int blkstart
, int blkcnt
, int blksize
,
5515 kauth_cred_t cred
, struct proc
*p
)
5521 struct buf
*bp
= NULL
;
5524 daddr64_t start_blk
;
5531 iosize
= GetLogicalBlockSize(vp
);
5532 bufsize
= MIN(blkcnt
* blksize
, 1024 * 1024) & ~(iosize
- 1);
5533 breadcnt
= bufsize
/ iosize
;
5535 if (kmem_alloc(kernel_map
, (vm_offset_t
*)&bufp
, bufsize
, VM_KERN_MEMORY_FILE
)) {
5538 start_blk
= ((daddr64_t
)blkstart
* blksize
) / iosize
;
5539 last_blk
= ((daddr64_t
)blkcnt
* blksize
) / iosize
;
5542 while (blkno
< last_blk
) {
5544 * Read up to a megabyte
5547 for (i
= 0, blk
= blkno
; (i
< breadcnt
) && (blk
< last_blk
); ++i
, ++blk
) {
5548 error
= (int)buf_meta_bread(vp
, blk
, iosize
, cred
, &bp
);
5550 printf("hfs_clonesysfile: meta_bread error %d\n", error
);
5553 if (buf_count(bp
) != iosize
) {
5554 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp
));
5557 bcopy((char *)buf_dataptr(bp
), offset
, iosize
);
5559 buf_markinvalid(bp
);
5567 * Write up to a megabyte
5570 for (i
= 0; (i
< breadcnt
) && (blkno
< last_blk
); ++i
, ++blkno
) {
5571 bp
= buf_getblk(vp
, start_blk
+ blkno
, iosize
, 0, 0, BLK_META
);
5573 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk
+ blkno
);
5577 bcopy(offset
, (char *)buf_dataptr(bp
), iosize
);
5578 error
= (int)buf_bwrite(bp
);
5590 kmem_free(kernel_map
, (vm_offset_t
)bufp
, bufsize
);
5592 error
= hfs_fsync(vp
, MNT_WAIT
, 0, p
);
5597 errno_t
hfs_flush_invalid_ranges(vnode_t vp
)
5599 cnode_t
*cp
= VTOC(vp
);
5601 assert(cp
->c_lockowner
== current_thread());
5602 assert(cp
->c_truncatelockowner
== current_thread());
5604 if (!ISSET(cp
->c_flag
, C_ZFWANTSYNC
) && !cp
->c_zftimeout
)
5607 filefork_t
*fp
= VTOF(vp
);
5610 * We can't hold the cnode lock whilst we call cluster_write so we
5611 * need to copy the extents into a local buffer.
5616 } exts_buf
[max_exts
]; // 256 bytes
5617 struct ext
*exts
= exts_buf
;
5621 struct rl_entry
*r
= TAILQ_FIRST(&fp
->ff_invalidranges
);
5624 /* If we have more than can fit in our stack buffer, switch
5625 to a heap buffer. */
5626 if (exts
== exts_buf
&& ext_count
== max_exts
) {
5628 MALLOC(exts
, struct ext
*, sizeof(struct ext
) * max_exts
,
5630 memcpy(exts
, exts_buf
, ext_count
* sizeof(struct ext
));
5633 struct rl_entry
*next
= TAILQ_NEXT(r
, rl_link
);
5635 exts
[ext_count
++] = (struct ext
){ r
->rl_start
, r
->rl_end
};
5637 if (!next
|| (ext_count
== max_exts
&& exts
!= exts_buf
)) {
5639 for (int i
= 0; i
< ext_count
; ++i
) {
5640 ret
= cluster_write(vp
, NULL
, fp
->ff_size
, exts
[i
].end
+ 1,
5642 IO_HEADZEROFILL
| IO_NOZERODIRTY
| IO_NOCACHE
);
5644 hfs_lock_always(cp
, HFS_EXCLUSIVE_LOCK
);
5650 hfs_lock_always(cp
, HFS_EXCLUSIVE_LOCK
);
5654 /* Push any existing clusters which should clean up our invalid
5655 ranges as they go through hfs_vnop_blockmap. */
5656 cluster_push(vp
, 0);
5658 hfs_lock_always(cp
, HFS_EXCLUSIVE_LOCK
);
5661 * Get back to where we were (given we dropped the lock).
5662 * This shouldn't be many because we pushed above.
5664 TAILQ_FOREACH(r
, &fp
->ff_invalidranges
, rl_link
) {
5665 if (r
->rl_end
> exts
[ext_count
- 1].end
)
5678 if (exts
!= exts_buf
)