2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* @(#)hfs_readwrite.c 1.0
30 * (c) 1998-2001 Apple Computer, Inc. All Rights Reserved
32 * hfs_readwrite.c -- vnode operations to deal with reading and writing files.
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/resourcevar.h>
39 #include <sys/kernel.h>
40 #include <sys/fcntl.h>
41 #include <sys/filedesc.h>
44 #include <sys/buf_internal.h>
46 #include <sys/kauth.h>
47 #include <sys/vnode.h>
48 #include <sys/vnode_internal.h>
50 #include <sys/vfs_context.h>
51 #include <sys/fsevents.h>
52 #include <kern/kalloc.h>
54 #include <sys/sysctl.h>
55 #include <sys/fsctl.h>
56 #include <sys/mount_internal.h>
58 #include <miscfs/specfs/specdev.h>
61 #include <sys/ubc_internal.h>
63 #include <vm/vm_pageout.h>
64 #include <vm/vm_kern.h>
66 #include <sys/kdebug.h>
69 #include "hfs_attrlist.h"
70 #include "hfs_endian.h"
71 #include "hfs_fsctl.h"
72 #include "hfs_quota.h"
73 #include "hfscommon/headers/FileMgrInternal.h"
74 #include "hfscommon/headers/BTreesInternal.h"
75 #include "hfs_cnode.h"
78 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
81 MAXHFSFILESIZE
= 0x7FFFFFFF /* this needs to go in the mount structure */
84 /* from bsd/hfs/hfs_vfsops.c */
85 extern int hfs_vfs_vget (struct mount
*mp
, ino64_t ino
, struct vnode
**vpp
, vfs_context_t context
);
87 static int hfs_clonelink(struct vnode
*, int, kauth_cred_t
, struct proc
*);
88 static int hfs_clonefile(struct vnode
*, int, int, int);
89 static int hfs_clonesysfile(struct vnode
*, int, int, int, kauth_cred_t
, struct proc
*);
90 static int hfs_minorupdate(struct vnode
*vp
);
91 static int do_hfs_truncate(struct vnode
*vp
, off_t length
, int flags
, int skip
, vfs_context_t context
);
94 int flush_cache_on_write
= 0;
95 SYSCTL_INT (_kern
, OID_AUTO
, flush_cache_on_write
, CTLFLAG_RW
| CTLFLAG_LOCKED
, &flush_cache_on_write
, 0, "always flush the drive cache on writes to uncached files");
98 * Read data from a file.
101 hfs_vnop_read(struct vnop_read_args
*ap
)
104 struct vnop_read_args {
105 struct vnodeop_desc *a_desc;
109 vfs_context_t a_context;
113 uio_t uio
= ap
->a_uio
;
114 struct vnode
*vp
= ap
->a_vp
;
117 struct hfsmount
*hfsmp
;
120 off_t start_resid
= uio_resid(uio
);
121 off_t offset
= uio_offset(uio
);
123 int took_truncate_lock
= 0;
126 /* Preflight checks */
127 if (!vnode_isreg(vp
)) {
128 /* can only read regular files */
134 if (start_resid
== 0)
135 return (0); /* Nothing left to do */
137 return (EINVAL
); /* cant read from a negative offset */
140 if (VNODE_IS_RSRC(vp
)) {
141 if (hfs_hides_rsrc(ap
->a_context
, VTOC(vp
), 1)) { /* 1 == don't take the cnode lock */
144 /* otherwise read the resource fork normally */
146 int compressed
= hfs_file_is_compressed(VTOC(vp
), 1); /* 1 == don't take the cnode lock */
148 retval
= decmpfs_read_compressed(ap
, &compressed
, VTOCMP(vp
));
151 /* successful read, update the access time */
152 VTOC(vp
)->c_touch_acctime
= TRUE
;
154 /* compressed files are not hot file candidates */
155 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
) {
156 VTOF(vp
)->ff_bytesread
= 0;
161 /* otherwise the file was converted back to a regular file while we were reading it */
163 } else if ((VTOC(vp
)->c_bsdflags
& UF_COMPRESSED
)) {
166 error
= check_for_dataless_file(vp
, NAMESPACE_HANDLER_READ_OP
);
173 #endif /* HFS_COMPRESSION */
180 if ((retval
= cp_handle_vnop (vp
, CP_READ_ACCESS
, ap
->a_ioflag
)) != 0) {
186 * If this read request originated from a syscall (as opposed to
187 * an in-kernel page fault or something), then set it up for
188 * throttle checks. For example, large EAs may cause a VNOP_READ
189 * to occur, and we wouldn't want to throttle I/O while holding the
192 if (ap
->a_ioflag
& IO_SYSCALL_DISPATCH
) {
193 io_throttle
= IO_RETURN_ON_THROTTLE
;
198 /* Protect against a size change. */
199 hfs_lock_truncate(cp
, HFS_SHARED_LOCK
);
200 took_truncate_lock
= 1;
202 filesize
= fp
->ff_size
;
203 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
204 if (offset
> filesize
) {
205 if ((hfsmp
->hfs_flags
& HFS_STANDARD
) &&
206 (offset
> (off_t
)MAXHFSFILESIZE
)) {
212 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 12)) | DBG_FUNC_START
,
213 (int)uio_offset(uio
), uio_resid(uio
), (int)filesize
, (int)filebytes
, 0);
215 retval
= cluster_read(vp
, uio
, filesize
, ap
->a_ioflag
| (io_throttle
));
217 cp
->c_touch_acctime
= TRUE
;
219 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 12)) | DBG_FUNC_END
,
220 (int)uio_offset(uio
), uio_resid(uio
), (int)filesize
, (int)filebytes
, 0);
223 * Keep track blocks read
225 if (hfsmp
->hfc_stage
== HFC_RECORDING
&& retval
== 0) {
226 int took_cnode_lock
= 0;
229 bytesread
= start_resid
- uio_resid(uio
);
231 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
232 if ((fp
->ff_bytesread
+ bytesread
) > 0x00000000ffffffff) {
233 hfs_lock(cp
, HFS_FORCE_LOCK
);
237 * If this file hasn't been seen since the start of
238 * the current sampling period then start over.
240 if (cp
->c_atime
< hfsmp
->hfc_timebase
) {
243 fp
->ff_bytesread
= bytesread
;
245 cp
->c_atime
= tv
.tv_sec
;
247 fp
->ff_bytesread
+= bytesread
;
253 if (took_truncate_lock
) {
254 hfs_unlock_truncate(cp
, 0);
256 if (retval
== EAGAIN
) {
257 throttle_lowpri_io(1);
266 * Write data to a file.
269 hfs_vnop_write(struct vnop_write_args
*ap
)
271 uio_t uio
= ap
->a_uio
;
272 struct vnode
*vp
= ap
->a_vp
;
275 struct hfsmount
*hfsmp
;
276 kauth_cred_t cred
= NULL
;
279 off_t bytesToAdd
= 0;
280 off_t actualBytesAdded
;
285 int ioflag
= ap
->a_ioflag
;
288 int cnode_locked
= 0;
289 int partialwrite
= 0;
291 time_t orig_ctime
=VTOC(vp
)->c_ctime
;
292 int took_truncate_lock
= 0;
293 int io_return_on_throttle
= 0;
294 struct rl_entry
*invalid_range
;
297 if ( hfs_file_is_compressed(VTOC(vp
), 1) ) { /* 1 == don't take the cnode lock */
298 int state
= decmpfs_cnode_get_vnode_state(VTOCMP(vp
));
300 case FILE_IS_COMPRESSED
:
302 case FILE_IS_CONVERTING
:
303 /* if FILE_IS_CONVERTING, we allow writes but do not
304 bother with snapshots or else we will deadlock.
309 printf("invalid state %d for compressed file\n", state
);
312 } else if ((VTOC(vp
)->c_bsdflags
& UF_COMPRESSED
)) {
315 error
= check_for_dataless_file(vp
, NAMESPACE_HANDLER_WRITE_OP
);
322 check_for_tracked_file(vp
, orig_ctime
, NAMESPACE_HANDLER_WRITE_OP
, uio
);
327 // LP64todo - fix this! uio_resid may be 64-bit value
328 resid
= uio_resid(uio
);
329 offset
= uio_offset(uio
);
335 if (!vnode_isreg(vp
))
336 return (EPERM
); /* Can only write regular files */
343 if ((retval
= cp_handle_vnop (vp
, CP_WRITE_ACCESS
, 0)) != 0) {
348 eflags
= kEFDeferMask
; /* defer file block allocations */
351 * When the underlying device is sparse and space
352 * is low (< 8MB), stop doing delayed allocations
353 * and begin doing synchronous I/O.
355 if ((hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) &&
356 (hfs_freeblks(hfsmp
, 0) < 2048)) {
357 eflags
&= ~kEFDeferMask
;
360 #endif /* HFS_SPARSE_DEV */
362 if ((ioflag
& (IO_SINGLE_WRITER
| IO_RETURN_ON_THROTTLE
)) ==
363 (IO_SINGLE_WRITER
| IO_RETURN_ON_THROTTLE
)) {
364 io_return_on_throttle
= IO_RETURN_ON_THROTTLE
;
367 /* Protect against a size change. */
369 * Protect against a size change.
371 * Note: If took_truncate_lock is true, then we previously got the lock shared
372 * but needed to upgrade to exclusive. So try getting it exclusive from the
375 if (ioflag
& IO_APPEND
|| took_truncate_lock
) {
376 hfs_lock_truncate(cp
, HFS_EXCLUSIVE_LOCK
);
379 hfs_lock_truncate(cp
, HFS_SHARED_LOCK
);
381 took_truncate_lock
= 1;
384 if (ioflag
& IO_APPEND
) {
385 uio_setoffset(uio
, fp
->ff_size
);
386 offset
= fp
->ff_size
;
388 if ((cp
->c_bsdflags
& APPEND
) && offset
!= fp
->ff_size
) {
393 origFileSize
= fp
->ff_size
;
394 writelimit
= offset
+ resid
;
395 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
398 * We may need an exclusive truncate lock for several reasons, all
399 * of which are because we may be writing to a (portion of a) block
400 * for the first time, and we need to make sure no readers see the
401 * prior, uninitialized contents of the block. The cases are:
403 * 1. We have unallocated (delayed allocation) blocks. We may be
404 * allocating new blocks to the file and writing to them.
405 * (A more precise check would be whether the range we're writing
406 * to contains delayed allocation blocks.)
407 * 2. We need to extend the file. The bytes between the old EOF
408 * and the new EOF are not yet initialized. This is important
409 * even if we're not allocating new blocks to the file. If the
410 * old EOF and new EOF are in the same block, we still need to
411 * protect that range of bytes until they are written for the
413 * 3. The write overlaps some invalid ranges (delayed zero fill; that
414 * part of the file has been allocated, but not yet written).
416 * If we had a shared lock with the above cases, we need to try to upgrade
417 * to an exclusive lock. If the upgrade fails, we will lose the shared
418 * lock, and will need to take the truncate lock again; the took_truncate_lock
419 * flag will still be set, causing us to try for an exclusive lock next time.
421 * NOTE: Testing for #3 (delayed zero fill) needs to be done while the cnode
422 * lock is held, since it protects the range lists.
424 if ((cp
->c_truncatelockowner
== HFS_SHARED_OWNER
) &&
425 ((fp
->ff_unallocblocks
!= 0) ||
426 (writelimit
> origFileSize
))) {
427 if (lck_rw_lock_shared_to_exclusive(&cp
->c_truncatelock
) == FALSE
) {
429 * Lock upgrade failed and we lost our shared lock, try again.
430 * Note: we do not set took_truncate_lock=0 here. Leaving it
431 * set to 1 will cause us to try to get the lock exclusive.
436 /* Store the owner in the c_truncatelockowner field if we successfully upgrade */
437 cp
->c_truncatelockowner
= current_thread();
441 if ( (retval
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
))) {
447 * Now that we have the cnode lock, see if there are delayed zero fill ranges
448 * overlapping our write. If so, we need the truncate lock exclusive (see above).
450 if ((cp
->c_truncatelockowner
== HFS_SHARED_OWNER
) &&
451 (rl_scan(&fp
->ff_invalidranges
, offset
, writelimit
-1, &invalid_range
) != RL_NOOVERLAP
)) {
453 * When testing, it appeared that calling lck_rw_lock_shared_to_exclusive() causes
454 * a deadlock, rather than simply returning failure. (That is, it apparently does
455 * not behave like a "try_lock"). Since this condition is rare, just drop the
456 * cnode lock and try again. Since took_truncate_lock is set, we will
457 * automatically take the truncate lock exclusive.
461 hfs_unlock_truncate(cp
, 0);
465 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 0)) | DBG_FUNC_START
,
466 (int)offset
, uio_resid(uio
), (int)fp
->ff_size
,
469 /* Check if we do not need to extend the file */
470 if (writelimit
<= filebytes
) {
474 cred
= vfs_context_ucred(ap
->a_context
);
475 bytesToAdd
= writelimit
- filebytes
;
478 retval
= hfs_chkdq(cp
, (int64_t)(roundup(bytesToAdd
, hfsmp
->blockSize
)),
484 if (hfs_start_transaction(hfsmp
) != 0) {
489 while (writelimit
> filebytes
) {
490 bytesToAdd
= writelimit
- filebytes
;
491 if (cred
&& suser(cred
, NULL
) != 0)
492 eflags
|= kEFReserveMask
;
494 /* Protect extents b-tree and allocation bitmap */
495 lockflags
= SFL_BITMAP
;
496 if (overflow_extents(fp
))
497 lockflags
|= SFL_EXTENTS
;
498 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
500 /* Files that are changing size are not hot file candidates. */
501 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
502 fp
->ff_bytesread
= 0;
504 retval
= MacToVFSError(ExtendFileC (hfsmp
, (FCB
*)fp
, bytesToAdd
,
505 0, eflags
, &actualBytesAdded
));
507 hfs_systemfile_unlock(hfsmp
, lockflags
);
509 if ((actualBytesAdded
== 0) && (retval
== E_NONE
))
511 if (retval
!= E_NONE
)
513 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
514 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 0)) | DBG_FUNC_NONE
,
515 (int)offset
, uio_resid(uio
), (int)fp
->ff_size
, (int)filebytes
, 0);
517 (void) hfs_update(vp
, TRUE
);
518 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
519 (void) hfs_end_transaction(hfsmp
);
522 * If we didn't grow the file enough try a partial write.
523 * POSIX expects this behavior.
525 if ((retval
== ENOSPC
) && (filebytes
> offset
)) {
528 uio_setresid(uio
, (uio_resid(uio
) - bytesToAdd
));
530 writelimit
= filebytes
;
533 if (retval
== E_NONE
) {
542 if (writelimit
> fp
->ff_size
)
543 filesize
= writelimit
;
545 filesize
= fp
->ff_size
;
547 lflag
= ioflag
& ~(IO_TAILZEROFILL
| IO_HEADZEROFILL
| IO_NOZEROVALID
| IO_NOZERODIRTY
);
549 if (offset
<= fp
->ff_size
) {
550 zero_off
= offset
& ~PAGE_MASK_64
;
552 /* Check to see whether the area between the zero_offset and the start
553 of the transfer to see whether is invalid and should be zero-filled
554 as part of the transfer:
556 if (offset
> zero_off
) {
557 if (rl_scan(&fp
->ff_invalidranges
, zero_off
, offset
- 1, &invalid_range
) != RL_NOOVERLAP
)
558 lflag
|= IO_HEADZEROFILL
;
561 off_t eof_page_base
= fp
->ff_size
& ~PAGE_MASK_64
;
563 /* The bytes between fp->ff_size and uio->uio_offset must never be
564 read without being zeroed. The current last block is filled with zeroes
565 if it holds valid data but in all cases merely do a little bookkeeping
566 to track the area from the end of the current last page to the start of
567 the area actually written. For the same reason only the bytes up to the
568 start of the page where this write will start is invalidated; any remainder
569 before uio->uio_offset is explicitly zeroed as part of the cluster_write.
571 Note that inval_start, the start of the page after the current EOF,
572 may be past the start of the write, in which case the zeroing
573 will be handled by the cluser_write of the actual data.
575 inval_start
= (fp
->ff_size
+ (PAGE_SIZE_64
- 1)) & ~PAGE_MASK_64
;
576 inval_end
= offset
& ~PAGE_MASK_64
;
577 zero_off
= fp
->ff_size
;
579 if ((fp
->ff_size
& PAGE_MASK_64
) &&
580 (rl_scan(&fp
->ff_invalidranges
,
583 &invalid_range
) != RL_NOOVERLAP
)) {
584 /* The page containing the EOF is not valid, so the
585 entire page must be made inaccessible now. If the write
586 starts on a page beyond the page containing the eof
587 (inval_end > eof_page_base), add the
588 whole page to the range to be invalidated. Otherwise
589 (i.e. if the write starts on the same page), zero-fill
590 the entire page explicitly now:
592 if (inval_end
> eof_page_base
) {
593 inval_start
= eof_page_base
;
595 zero_off
= eof_page_base
;
599 if (inval_start
< inval_end
) {
601 /* There's some range of data that's going to be marked invalid */
603 if (zero_off
< inval_start
) {
604 /* The pages between inval_start and inval_end are going to be invalidated,
605 and the actual write will start on a page past inval_end. Now's the last
606 chance to zero-fill the page containing the EOF:
610 retval
= cluster_write(vp
, (uio_t
) 0,
611 fp
->ff_size
, inval_start
,
613 lflag
| IO_HEADZEROFILL
| IO_NOZERODIRTY
);
614 hfs_lock(cp
, HFS_FORCE_LOCK
);
616 if (retval
) goto ioerr_exit
;
617 offset
= uio_offset(uio
);
620 /* Mark the remaining area of the newly allocated space as invalid: */
621 rl_add(inval_start
, inval_end
- 1 , &fp
->ff_invalidranges
);
623 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
624 zero_off
= fp
->ff_size
= inval_end
;
627 if (offset
> zero_off
) lflag
|= IO_HEADZEROFILL
;
630 /* Check to see whether the area between the end of the write and the end of
631 the page it falls in is invalid and should be zero-filled as part of the transfer:
633 tail_off
= (writelimit
+ (PAGE_SIZE_64
- 1)) & ~PAGE_MASK_64
;
634 if (tail_off
> filesize
) tail_off
= filesize
;
635 if (tail_off
> writelimit
) {
636 if (rl_scan(&fp
->ff_invalidranges
, writelimit
, tail_off
- 1, &invalid_range
) != RL_NOOVERLAP
) {
637 lflag
|= IO_TAILZEROFILL
;
642 * if the write starts beyond the current EOF (possibly advanced in the
643 * zeroing of the last block, above), then we'll zero fill from the current EOF
644 * to where the write begins:
646 * NOTE: If (and ONLY if) the portion of the file about to be written is
647 * before the current EOF it might be marked as invalid now and must be
648 * made readable (removed from the invalid ranges) before cluster_write
651 io_start
= (lflag
& IO_HEADZEROFILL
) ? zero_off
: offset
;
652 if (io_start
< fp
->ff_size
) {
655 io_end
= (lflag
& IO_TAILZEROFILL
) ? tail_off
: writelimit
;
656 rl_remove(io_start
, io_end
- 1, &fp
->ff_invalidranges
);
663 * We need to tell UBC the fork's new size BEFORE calling
664 * cluster_write, in case any of the new pages need to be
665 * paged out before cluster_write completes (which does happen
666 * in embedded systems due to extreme memory pressure).
667 * Similarly, we need to tell hfs_vnop_pageout what the new EOF
668 * will be, so that it can pass that on to cluster_pageout, and
669 * allow those pageouts.
671 * We don't update ff_size yet since we don't want pageins to
672 * be able to see uninitialized data between the old and new
673 * EOF, until cluster_write has completed and initialized that
676 * The vnode pager relies on the file size last given to UBC via
677 * ubc_setsize. hfs_vnop_pageout relies on fp->ff_new_size or
678 * ff_size (whichever is larger). NOTE: ff_new_size is always
679 * zero, unless we are extending the file via write.
681 if (filesize
> fp
->ff_size
) {
682 fp
->ff_new_size
= filesize
;
683 ubc_setsize(vp
, filesize
);
685 retval
= cluster_write(vp
, uio
, fp
->ff_size
, filesize
, zero_off
,
686 tail_off
, lflag
| IO_NOZERODIRTY
| io_return_on_throttle
);
688 fp
->ff_new_size
= 0; /* no longer extending; use ff_size */
690 if (retval
== EAGAIN
) {
692 * EAGAIN indicates that we still have I/O to do, but
693 * that we now need to be throttled
695 if (resid
!= uio_resid(uio
)) {
697 * did manage to do some I/O before returning EAGAIN
699 resid
= uio_resid(uio
);
700 offset
= uio_offset(uio
);
702 cp
->c_touch_chgtime
= TRUE
;
703 cp
->c_touch_modtime
= TRUE
;
705 if (filesize
> fp
->ff_size
) {
707 * we called ubc_setsize before the call to
708 * cluster_write... since we only partially
709 * completed the I/O, we need to
710 * re-adjust our idea of the filesize based
713 ubc_setsize(vp
, offset
);
715 fp
->ff_size
= offset
;
719 if (filesize
> origFileSize
) {
720 ubc_setsize(vp
, origFileSize
);
725 if (filesize
> origFileSize
) {
726 fp
->ff_size
= filesize
;
728 /* Files that are changing size are not hot file candidates. */
729 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
730 fp
->ff_bytesread
= 0;
733 fp
->ff_new_size
= 0; /* ff_size now has the correct size */
735 /* If we wrote some bytes, then touch the change and mod times */
736 if (resid
> uio_resid(uio
)) {
737 cp
->c_touch_chgtime
= TRUE
;
738 cp
->c_touch_modtime
= TRUE
;
742 uio_setresid(uio
, (uio_resid(uio
) + bytesToAdd
));
746 // XXXdbg - see radar 4871353 for more info
748 if (flush_cache_on_write
&& ((ioflag
& IO_NOCACHE
) || vnode_isnocache(vp
))) {
749 VNOP_IOCTL(hfsmp
->hfs_devvp
, DKIOCSYNCHRONIZECACHE
, NULL
, FWRITE
, NULL
);
755 * If we successfully wrote any data, and we are not the superuser
756 * we clear the setuid and setgid bits as a precaution against
759 if (cp
->c_mode
& (S_ISUID
| S_ISGID
)) {
760 cred
= vfs_context_ucred(ap
->a_context
);
761 if (resid
> uio_resid(uio
) && cred
&& suser(cred
, NULL
)) {
763 hfs_lock(cp
, HFS_FORCE_LOCK
);
766 cp
->c_mode
&= ~(S_ISUID
| S_ISGID
);
770 if (ioflag
& IO_UNIT
) {
772 hfs_lock(cp
, HFS_FORCE_LOCK
);
775 (void)hfs_truncate(vp
, origFileSize
, ioflag
& IO_SYNC
,
776 0, 0, ap
->a_context
);
777 // LP64todo - fix this! resid needs to by user_ssize_t
778 uio_setoffset(uio
, (uio_offset(uio
) - (resid
- uio_resid(uio
))));
779 uio_setresid(uio
, resid
);
780 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
782 } else if ((ioflag
& IO_SYNC
) && (resid
> uio_resid(uio
))) {
784 hfs_lock(cp
, HFS_FORCE_LOCK
);
787 retval
= hfs_update(vp
, TRUE
);
789 /* Updating vcbWrCnt doesn't need to be atomic. */
792 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 0)) | DBG_FUNC_END
,
793 (int)uio_offset(uio
), uio_resid(uio
), (int)fp
->ff_size
, (int)filebytes
, 0);
798 if (took_truncate_lock
) {
799 hfs_unlock_truncate(cp
, 0);
801 if (retval
== EAGAIN
) {
802 throttle_lowpri_io(1);
810 /* support for the "bulk-access" fcntl */
812 #define CACHE_LEVELS 16
813 #define NUM_CACHE_ENTRIES (64*16)
814 #define PARENT_IDS_FLAG 0x100
816 struct access_cache
{
818 int cachehits
; /* these two for statistics gathering */
820 unsigned int *acache
;
821 unsigned char *haveaccess
;
825 uid_t uid
; /* IN: effective user id */
826 short flags
; /* IN: access requested (i.e. R_OK) */
827 short num_groups
; /* IN: number of groups user belongs to */
828 int num_files
; /* IN: number of files to process */
829 int *file_ids
; /* IN: array of file ids */
830 gid_t
*groups
; /* IN: array of groups */
831 short *access
; /* OUT: access info for each file (0 for 'has access') */
832 } __attribute__((unavailable
)); // this structure is for reference purposes only
834 struct user32_access_t
{
835 uid_t uid
; /* IN: effective user id */
836 short flags
; /* IN: access requested (i.e. R_OK) */
837 short num_groups
; /* IN: number of groups user belongs to */
838 int num_files
; /* IN: number of files to process */
839 user32_addr_t file_ids
; /* IN: array of file ids */
840 user32_addr_t groups
; /* IN: array of groups */
841 user32_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
844 struct user64_access_t
{
845 uid_t uid
; /* IN: effective user id */
846 short flags
; /* IN: access requested (i.e. R_OK) */
847 short num_groups
; /* IN: number of groups user belongs to */
848 int num_files
; /* IN: number of files to process */
849 user64_addr_t file_ids
; /* IN: array of file ids */
850 user64_addr_t groups
; /* IN: array of groups */
851 user64_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
855 // these are the "extended" versions of the above structures
856 // note that it is crucial that they be different sized than
857 // the regular version
858 struct ext_access_t
{
859 uint32_t flags
; /* IN: access requested (i.e. R_OK) */
860 uint32_t num_files
; /* IN: number of files to process */
861 uint32_t map_size
; /* IN: size of the bit map */
862 uint32_t *file_ids
; /* IN: Array of file ids */
863 char *bitmap
; /* OUT: hash-bitmap of interesting directory ids */
864 short *access
; /* OUT: access info for each file (0 for 'has access') */
865 uint32_t num_parents
; /* future use */
866 cnid_t
*parents
; /* future use */
867 } __attribute__((unavailable
)); // this structure is for reference purposes only
869 struct user32_ext_access_t
{
870 uint32_t flags
; /* IN: access requested (i.e. R_OK) */
871 uint32_t num_files
; /* IN: number of files to process */
872 uint32_t map_size
; /* IN: size of the bit map */
873 user32_addr_t file_ids
; /* IN: Array of file ids */
874 user32_addr_t bitmap
; /* OUT: hash-bitmap of interesting directory ids */
875 user32_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
876 uint32_t num_parents
; /* future use */
877 user32_addr_t parents
; /* future use */
880 struct user64_ext_access_t
{
881 uint32_t flags
; /* IN: access requested (i.e. R_OK) */
882 uint32_t num_files
; /* IN: number of files to process */
883 uint32_t map_size
; /* IN: size of the bit map */
884 user64_addr_t file_ids
; /* IN: array of file ids */
885 user64_addr_t bitmap
; /* IN: array of groups */
886 user64_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
887 uint32_t num_parents
;/* future use */
888 user64_addr_t parents
;/* future use */
893 * Perform a binary search for the given parent_id. Return value is
894 * the index if there is a match. If no_match_indexp is non-NULL it
895 * will be assigned with the index to insert the item (even if it was
898 static int cache_binSearch(cnid_t
*array
, unsigned int hi
, cnid_t parent_id
, int *no_match_indexp
)
904 unsigned int mid
= ((hi
- lo
)/2) + lo
;
905 unsigned int this_id
= array
[mid
];
907 if (parent_id
== this_id
) {
912 if (parent_id
< this_id
) {
917 if (parent_id
> this_id
) {
923 /* check if lo and hi converged on the match */
924 if (parent_id
== array
[hi
]) {
928 if (no_match_indexp
) {
929 *no_match_indexp
= hi
;
937 lookup_bucket(struct access_cache
*cache
, int *indexp
, cnid_t parent_id
)
941 int index
, no_match_index
;
943 if (cache
->numcached
== 0) {
945 return 0; // table is empty, so insert at index=0 and report no match
948 if (cache
->numcached
> NUM_CACHE_ENTRIES
) {
949 /*printf("hfs: EGAD! numcached is %d... cut our losses and trim to %d\n",
950 cache->numcached, NUM_CACHE_ENTRIES);*/
951 cache
->numcached
= NUM_CACHE_ENTRIES
;
954 hi
= cache
->numcached
- 1;
956 index
= cache_binSearch(cache
->acache
, hi
, parent_id
, &no_match_index
);
958 /* if no existing entry found, find index for new one */
960 index
= no_match_index
;
971 * Add a node to the access_cache at the given index (or do a lookup first
972 * to find the index if -1 is passed in). We currently do a replace rather
973 * than an insert if the cache is full.
976 add_node(struct access_cache
*cache
, int index
, cnid_t nodeID
, int access
)
978 int lookup_index
= -1;
980 /* need to do a lookup first if -1 passed for index */
982 if (lookup_bucket(cache
, &lookup_index
, nodeID
)) {
983 if (cache
->haveaccess
[lookup_index
] != access
&& cache
->haveaccess
[lookup_index
] == ESRCH
) {
984 // only update an entry if the previous access was ESRCH (i.e. a scope checking error)
985 cache
->haveaccess
[lookup_index
] = access
;
988 /* mission accomplished */
991 index
= lookup_index
;
996 /* if the cache is full, do a replace rather than an insert */
997 if (cache
->numcached
>= NUM_CACHE_ENTRIES
) {
998 //printf("hfs: cache is full (%d). replace at index %d\n", cache->numcached, index);
999 cache
->numcached
= NUM_CACHE_ENTRIES
-1;
1001 if (index
> cache
->numcached
) {
1002 // printf("hfs: index %d pinned to %d\n", index, cache->numcached);
1003 index
= cache
->numcached
;
1007 if (index
< cache
->numcached
&& index
< NUM_CACHE_ENTRIES
&& nodeID
> cache
->acache
[index
]) {
1011 if (index
>= 0 && index
< cache
->numcached
) {
1012 /* only do bcopy if we're inserting */
1013 bcopy( cache
->acache
+index
, cache
->acache
+(index
+1), (cache
->numcached
- index
)*sizeof(int) );
1014 bcopy( cache
->haveaccess
+index
, cache
->haveaccess
+(index
+1), (cache
->numcached
- index
)*sizeof(unsigned char) );
1017 cache
->acache
[index
] = nodeID
;
1018 cache
->haveaccess
[index
] = access
;
1032 snoop_callback(const struct cat_desc
*descp
, const struct cat_attr
*attrp
, void * arg
)
1034 struct cinfo
*cip
= (struct cinfo
*)arg
;
1036 cip
->uid
= attrp
->ca_uid
;
1037 cip
->gid
= attrp
->ca_gid
;
1038 cip
->mode
= attrp
->ca_mode
;
1039 cip
->parentcnid
= descp
->cd_parentcnid
;
1040 cip
->recflags
= attrp
->ca_recflags
;
1046 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
1047 * isn't incore, then go to the catalog.
1050 do_attr_lookup(struct hfsmount
*hfsmp
, struct access_cache
*cache
, cnid_t cnid
,
1051 struct cnode
*skip_cp
, CatalogKey
*keyp
, struct cat_attr
*cnattrp
)
1055 /* if this id matches the one the fsctl was called with, skip the lookup */
1056 if (cnid
== skip_cp
->c_cnid
) {
1057 cnattrp
->ca_uid
= skip_cp
->c_uid
;
1058 cnattrp
->ca_gid
= skip_cp
->c_gid
;
1059 cnattrp
->ca_mode
= skip_cp
->c_mode
;
1060 cnattrp
->ca_recflags
= skip_cp
->c_attr
.ca_recflags
;
1061 keyp
->hfsPlus
.parentID
= skip_cp
->c_parentcnid
;
1063 struct cinfo c_info
;
1065 /* otherwise, check the cnode hash incase the file/dir is incore */
1066 if (hfs_chash_snoop(hfsmp
, cnid
, 0, snoop_callback
, &c_info
) == 0) {
1067 cnattrp
->ca_uid
= c_info
.uid
;
1068 cnattrp
->ca_gid
= c_info
.gid
;
1069 cnattrp
->ca_mode
= c_info
.mode
;
1070 cnattrp
->ca_recflags
= c_info
.recflags
;
1071 keyp
->hfsPlus
.parentID
= c_info
.parentcnid
;
1075 if (throttle_io_will_be_throttled(-1, HFSTOVFS(hfsmp
)))
1076 throttle_lowpri_io(1);
1078 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_CATALOG
, HFS_SHARED_LOCK
);
1080 /* lookup this cnid in the catalog */
1081 error
= cat_getkeyplusattr(hfsmp
, cnid
, keyp
, cnattrp
);
1083 hfs_systemfile_unlock(hfsmp
, lockflags
);
1094 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
1095 * up to CACHE_LEVELS as we progress towards the root.
1098 do_access_check(struct hfsmount
*hfsmp
, int *err
, struct access_cache
*cache
, HFSCatalogNodeID nodeID
,
1099 struct cnode
*skip_cp
, struct proc
*theProcPtr
, kauth_cred_t myp_ucred
,
1100 struct vfs_context
*my_context
,
1104 uint32_t num_parents
)
1108 HFSCatalogNodeID thisNodeID
;
1109 unsigned int myPerms
;
1110 struct cat_attr cnattr
;
1111 int cache_index
= -1, scope_index
= -1, scope_idx_start
= -1;
1114 int i
= 0, ids_to_cache
= 0;
1115 int parent_ids
[CACHE_LEVELS
];
1117 thisNodeID
= nodeID
;
1118 while (thisNodeID
>= kRootDirID
) {
1119 myResult
= 0; /* default to "no access" */
1121 /* check the cache before resorting to hitting the catalog */
1123 /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
1124 * to look any further after hitting cached dir */
1126 if (lookup_bucket(cache
, &cache_index
, thisNodeID
)) {
1128 myErr
= cache
->haveaccess
[cache_index
];
1129 if (scope_index
!= -1) {
1130 if (myErr
== ESRCH
) {
1134 scope_index
= 0; // so we'll just use the cache result
1135 scope_idx_start
= ids_to_cache
;
1137 myResult
= (myErr
== 0) ? 1 : 0;
1138 goto ExitThisRoutine
;
1144 tmp
= cache_binSearch(parents
, num_parents
-1, thisNodeID
, NULL
);
1145 if (scope_index
== -1)
1147 if (tmp
!= -1 && scope_idx_start
== -1 && ids_to_cache
< CACHE_LEVELS
) {
1148 scope_idx_start
= ids_to_cache
;
1152 /* remember which parents we want to cache */
1153 if (ids_to_cache
< CACHE_LEVELS
) {
1154 parent_ids
[ids_to_cache
] = thisNodeID
;
1157 // Inefficient (using modulo) and we might want to use a hash function, not rely on the node id to be "nice"...
1158 if (bitmap
&& map_size
) {
1159 bitmap
[(thisNodeID
/8)%(map_size
)]|=(1<<(thisNodeID
&7));
1163 /* do the lookup (checks the cnode hash, then the catalog) */
1164 myErr
= do_attr_lookup(hfsmp
, cache
, thisNodeID
, skip_cp
, &catkey
, &cnattr
);
1166 goto ExitThisRoutine
; /* no access */
1169 /* Root always gets access. */
1170 if (suser(myp_ucred
, NULL
) == 0) {
1171 thisNodeID
= catkey
.hfsPlus
.parentID
;
1176 // if the thing has acl's, do the full permission check
1177 if ((cnattr
.ca_recflags
& kHFSHasSecurityMask
) != 0) {
1180 /* get the vnode for this cnid */
1181 myErr
= hfs_vget(hfsmp
, thisNodeID
, &vp
, 0, 0);
1184 goto ExitThisRoutine
;
1187 thisNodeID
= VTOC(vp
)->c_parentcnid
;
1189 hfs_unlock(VTOC(vp
));
1191 if (vnode_vtype(vp
) == VDIR
) {
1192 myErr
= vnode_authorize(vp
, NULL
, (KAUTH_VNODE_SEARCH
| KAUTH_VNODE_LIST_DIRECTORY
), my_context
);
1194 myErr
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_READ_DATA
, my_context
);
1200 goto ExitThisRoutine
;
1204 int mode
= cnattr
.ca_mode
& S_IFMT
;
1205 myPerms
= DerivePermissionSummary(cnattr
.ca_uid
, cnattr
.ca_gid
, cnattr
.ca_mode
, hfsmp
->hfs_mp
,myp_ucred
, theProcPtr
);
1207 if (mode
== S_IFDIR
) {
1208 flags
= R_OK
| X_OK
;
1212 if ( (myPerms
& flags
) != flags
) {
1215 goto ExitThisRoutine
; /* no access */
1218 /* up the hierarchy we go */
1219 thisNodeID
= catkey
.hfsPlus
.parentID
;
1223 /* if here, we have access to this node */
1227 if (parents
&& myErr
== 0 && scope_index
== -1) {
1236 /* cache the parent directory(ies) */
1237 for (i
= 0; i
< ids_to_cache
; i
++) {
1238 if (myErr
== 0 && parents
&& (scope_idx_start
== -1 || i
> scope_idx_start
)) {
1239 add_node(cache
, -1, parent_ids
[i
], ESRCH
);
1241 add_node(cache
, -1, parent_ids
[i
], myErr
);
1249 do_bulk_access_check(struct hfsmount
*hfsmp
, struct vnode
*vp
,
1250 struct vnop_ioctl_args
*ap
, int arg_size
, vfs_context_t context
)
1255 * NOTE: on entry, the vnode has an io_ref. In case this vnode
1256 * happens to be in our list of file_ids, we'll note it
1257 * avoid calling hfs_chashget_nowait() on that id as that
1258 * will cause a "locking against myself" panic.
1260 Boolean check_leaf
= true;
1262 struct user64_ext_access_t
*user_access_structp
;
1263 struct user64_ext_access_t tmp_user_access
;
1264 struct access_cache cache
;
1266 int error
= 0, prev_parent_check_ok
=1;
1270 unsigned int num_files
= 0;
1272 int num_parents
= 0;
1276 cnid_t
*parents
=NULL
;
1280 cnid_t prevParent_cnid
= 0;
1281 unsigned int myPerms
;
1283 struct cat_attr cnattr
;
1285 struct cnode
*skip_cp
= VTOC(vp
);
1286 kauth_cred_t cred
= vfs_context_ucred(context
);
1287 proc_t p
= vfs_context_proc(context
);
1289 is64bit
= proc_is64bit(p
);
1291 /* initialize the local cache and buffers */
1292 cache
.numcached
= 0;
1293 cache
.cachehits
= 0;
1295 cache
.acache
= NULL
;
1296 cache
.haveaccess
= NULL
;
1298 /* struct copyin done during dispatch... need to copy file_id array separately */
1299 if (ap
->a_data
== NULL
) {
1301 goto err_exit_bulk_access
;
1305 if (arg_size
!= sizeof(struct user64_ext_access_t
)) {
1307 goto err_exit_bulk_access
;
1310 user_access_structp
= (struct user64_ext_access_t
*)ap
->a_data
;
1312 } else if (arg_size
== sizeof(struct user32_access_t
)) {
1313 struct user32_access_t
*accessp
= (struct user32_access_t
*)ap
->a_data
;
1315 // convert an old style bulk-access struct to the new style
1316 tmp_user_access
.flags
= accessp
->flags
;
1317 tmp_user_access
.num_files
= accessp
->num_files
;
1318 tmp_user_access
.map_size
= 0;
1319 tmp_user_access
.file_ids
= CAST_USER_ADDR_T(accessp
->file_ids
);
1320 tmp_user_access
.bitmap
= USER_ADDR_NULL
;
1321 tmp_user_access
.access
= CAST_USER_ADDR_T(accessp
->access
);
1322 tmp_user_access
.num_parents
= 0;
1323 user_access_structp
= &tmp_user_access
;
1325 } else if (arg_size
== sizeof(struct user32_ext_access_t
)) {
1326 struct user32_ext_access_t
*accessp
= (struct user32_ext_access_t
*)ap
->a_data
;
1328 // up-cast from a 32-bit version of the struct
1329 tmp_user_access
.flags
= accessp
->flags
;
1330 tmp_user_access
.num_files
= accessp
->num_files
;
1331 tmp_user_access
.map_size
= accessp
->map_size
;
1332 tmp_user_access
.num_parents
= accessp
->num_parents
;
1334 tmp_user_access
.file_ids
= CAST_USER_ADDR_T(accessp
->file_ids
);
1335 tmp_user_access
.bitmap
= CAST_USER_ADDR_T(accessp
->bitmap
);
1336 tmp_user_access
.access
= CAST_USER_ADDR_T(accessp
->access
);
1337 tmp_user_access
.parents
= CAST_USER_ADDR_T(accessp
->parents
);
1339 user_access_structp
= &tmp_user_access
;
1342 goto err_exit_bulk_access
;
1345 map_size
= user_access_structp
->map_size
;
1347 num_files
= user_access_structp
->num_files
;
1349 num_parents
= user_access_structp
->num_parents
;
1351 if (num_files
< 1) {
1352 goto err_exit_bulk_access
;
1354 if (num_files
> 1024) {
1356 goto err_exit_bulk_access
;
1359 if (num_parents
> 1024) {
1361 goto err_exit_bulk_access
;
1364 file_ids
= (int *) kalloc(sizeof(int) * num_files
);
1365 access
= (short *) kalloc(sizeof(short) * num_files
);
1367 bitmap
= (char *) kalloc(sizeof(char) * map_size
);
1371 parents
= (cnid_t
*) kalloc(sizeof(cnid_t
) * num_parents
);
1374 cache
.acache
= (unsigned int *) kalloc(sizeof(int) * NUM_CACHE_ENTRIES
);
1375 cache
.haveaccess
= (unsigned char *) kalloc(sizeof(unsigned char) * NUM_CACHE_ENTRIES
);
1377 if (file_ids
== NULL
|| access
== NULL
|| (map_size
!= 0 && bitmap
== NULL
) || cache
.acache
== NULL
|| cache
.haveaccess
== NULL
) {
1379 kfree(file_ids
, sizeof(int) * num_files
);
1382 kfree(bitmap
, sizeof(char) * map_size
);
1385 kfree(access
, sizeof(short) * num_files
);
1388 kfree(cache
.acache
, sizeof(int) * NUM_CACHE_ENTRIES
);
1390 if (cache
.haveaccess
) {
1391 kfree(cache
.haveaccess
, sizeof(unsigned char) * NUM_CACHE_ENTRIES
);
1394 kfree(parents
, sizeof(cnid_t
) * num_parents
);
1399 // make sure the bitmap is zero'ed out...
1401 bzero(bitmap
, (sizeof(char) * map_size
));
1404 if ((error
= copyin(user_access_structp
->file_ids
, (caddr_t
)file_ids
,
1405 num_files
* sizeof(int)))) {
1406 goto err_exit_bulk_access
;
1410 if ((error
= copyin(user_access_structp
->parents
, (caddr_t
)parents
,
1411 num_parents
* sizeof(cnid_t
)))) {
1412 goto err_exit_bulk_access
;
1416 flags
= user_access_structp
->flags
;
1417 if ((flags
& (F_OK
| R_OK
| W_OK
| X_OK
)) == 0) {
1421 /* check if we've been passed leaf node ids or parent ids */
1422 if (flags
& PARENT_IDS_FLAG
) {
1426 /* Check access to each file_id passed in */
1427 for (i
= 0; i
< num_files
; i
++) {
1429 cnid
= (cnid_t
) file_ids
[i
];
1431 /* root always has access */
1432 if ((!parents
) && (!suser(cred
, NULL
))) {
1438 /* do the lookup (checks the cnode hash, then the catalog) */
1439 error
= do_attr_lookup(hfsmp
, &cache
, cnid
, skip_cp
, &catkey
, &cnattr
);
1441 access
[i
] = (short) error
;
1446 // Check if the leaf matches one of the parent scopes
1447 leaf_index
= cache_binSearch(parents
, num_parents
-1, cnid
, NULL
);
1448 if (leaf_index
>= 0 && parents
[leaf_index
] == cnid
)
1449 prev_parent_check_ok
= 0;
1450 else if (leaf_index
>= 0)
1451 prev_parent_check_ok
= 1;
1454 // if the thing has acl's, do the full permission check
1455 if ((cnattr
.ca_recflags
& kHFSHasSecurityMask
) != 0) {
1458 /* get the vnode for this cnid */
1459 myErr
= hfs_vget(hfsmp
, cnid
, &cvp
, 0, 0);
1465 hfs_unlock(VTOC(cvp
));
1467 if (vnode_vtype(cvp
) == VDIR
) {
1468 myErr
= vnode_authorize(cvp
, NULL
, (KAUTH_VNODE_SEARCH
| KAUTH_VNODE_LIST_DIRECTORY
), context
);
1470 myErr
= vnode_authorize(cvp
, NULL
, KAUTH_VNODE_READ_DATA
, context
);
1479 /* before calling CheckAccess(), check the target file for read access */
1480 myPerms
= DerivePermissionSummary(cnattr
.ca_uid
, cnattr
.ca_gid
,
1481 cnattr
.ca_mode
, hfsmp
->hfs_mp
, cred
, p
);
1483 /* fail fast if no access */
1484 if ((myPerms
& flags
) == 0) {
1490 /* we were passed an array of parent ids */
1491 catkey
.hfsPlus
.parentID
= cnid
;
1494 /* if the last guy had the same parent and had access, we're done */
1495 if (i
> 0 && catkey
.hfsPlus
.parentID
== prevParent_cnid
&& access
[i
-1] == 0 && prev_parent_check_ok
) {
1501 myaccess
= do_access_check(hfsmp
, &error
, &cache
, catkey
.hfsPlus
.parentID
,
1502 skip_cp
, p
, cred
, context
,bitmap
, map_size
, parents
, num_parents
);
1504 if (myaccess
|| (error
== ESRCH
&& leaf_index
!= -1)) {
1505 access
[i
] = 0; // have access.. no errors to report
1507 access
[i
] = (error
!= 0 ? (short) error
: EACCES
);
1510 prevParent_cnid
= catkey
.hfsPlus
.parentID
;
1513 /* copyout the access array */
1514 if ((error
= copyout((caddr_t
)access
, user_access_structp
->access
,
1515 num_files
* sizeof (short)))) {
1516 goto err_exit_bulk_access
;
1518 if (map_size
&& bitmap
) {
1519 if ((error
= copyout((caddr_t
)bitmap
, user_access_structp
->bitmap
,
1520 map_size
* sizeof (char)))) {
1521 goto err_exit_bulk_access
;
1526 err_exit_bulk_access
:
1528 //printf("hfs: on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
1531 kfree(file_ids
, sizeof(int) * num_files
);
1533 kfree(parents
, sizeof(cnid_t
) * num_parents
);
1535 kfree(bitmap
, sizeof(char) * map_size
);
1537 kfree(access
, sizeof(short) * num_files
);
1539 kfree(cache
.acache
, sizeof(int) * NUM_CACHE_ENTRIES
);
1540 if (cache
.haveaccess
)
1541 kfree(cache
.haveaccess
, sizeof(unsigned char) * NUM_CACHE_ENTRIES
);
1547 /* end "bulk-access" support */
1551 * Callback for use with freeze ioctl.
1554 hfs_freezewrite_callback(struct vnode
*vp
, __unused
void *cargs
)
1556 vnode_waitforwrites(vp
, 0, 0, 0, "hfs freeze");
1562 * Control filesystem operating characteristics.
1565 hfs_vnop_ioctl( struct vnop_ioctl_args
/* {
1570 vfs_context_t a_context;
1573 struct vnode
* vp
= ap
->a_vp
;
1574 struct hfsmount
*hfsmp
= VTOHFS(vp
);
1575 vfs_context_t context
= ap
->a_context
;
1576 kauth_cred_t cred
= vfs_context_ucred(context
);
1577 proc_t p
= vfs_context_proc(context
);
1578 struct vfsstatfs
*vfsp
;
1580 off_t jnl_start
, jnl_size
;
1581 struct hfs_journal_info
*jip
;
1584 off_t uncompressed_size
= -1;
1585 int decmpfs_error
= 0;
1587 if (ap
->a_command
== F_RDADVISE
) {
1588 /* we need to inspect the decmpfs state of the file as early as possible */
1589 compressed
= hfs_file_is_compressed(VTOC(vp
), 0);
1591 if (VNODE_IS_RSRC(vp
)) {
1592 /* if this is the resource fork, treat it as if it were empty */
1593 uncompressed_size
= 0;
1595 decmpfs_error
= hfs_uncompressed_size_of_compressed_file(NULL
, vp
, 0, &uncompressed_size
, 0);
1596 if (decmpfs_error
!= 0) {
1597 /* failed to get the uncompressed size, we'll check for this later */
1598 uncompressed_size
= -1;
1603 #endif /* HFS_COMPRESSION */
1605 is64bit
= proc_is64bit(p
);
1610 if ((error
= cp_handle_vnop(vp
, CP_WRITE_ACCESS
, 0)) != 0) {
1614 #endif /* CONFIG_PROTECT */
1616 switch (ap
->a_command
) {
1620 struct vnode
*file_vp
;
1626 /* Caller must be owner of file system. */
1627 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1628 if (suser(cred
, NULL
) &&
1629 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1632 /* Target vnode must be file system's root. */
1633 if (!vnode_isvroot(vp
)) {
1636 bufptr
= (char *)ap
->a_data
;
1637 cnid
= strtoul(bufptr
, NULL
, 10);
1639 /* We need to call hfs_vfs_vget to leverage the code that will
1640 * fix the origin list for us if needed, as opposed to calling
1641 * hfs_vget, since we will need the parent for build_path call.
1644 if ((error
= hfs_vfs_vget(HFSTOVFS(hfsmp
), cnid
, &file_vp
, context
))) {
1647 error
= build_path(file_vp
, bufptr
, sizeof(pathname_t
), &outlen
, 0, context
);
1661 /* Caller must be owner of file system. */
1662 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1663 if (suser(cred
, NULL
) &&
1664 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1667 /* Target vnode must be file system's root. */
1668 if (!vnode_isvroot(vp
)) {
1671 linkfileid
= *(cnid_t
*)ap
->a_data
;
1672 if (linkfileid
< kHFSFirstUserCatalogNodeID
) {
1675 if ((error
= hfs_lookup_siblinglinks(hfsmp
, linkfileid
, &prevlinkid
, &nextlinkid
))) {
1678 if (ap
->a_command
== HFS_NEXT_LINK
) {
1679 *(cnid_t
*)ap
->a_data
= nextlinkid
;
1681 *(cnid_t
*)ap
->a_data
= prevlinkid
;
1686 case HFS_RESIZE_PROGRESS
: {
1688 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1689 if (suser(cred
, NULL
) &&
1690 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1691 return (EACCES
); /* must be owner of file system */
1693 if (!vnode_isvroot(vp
)) {
1696 /* file system must not be mounted read-only */
1697 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
1701 return hfs_resize_progress(hfsmp
, (u_int32_t
*)ap
->a_data
);
1704 case HFS_RESIZE_VOLUME
: {
1708 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1709 if (suser(cred
, NULL
) &&
1710 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1711 return (EACCES
); /* must be owner of file system */
1713 if (!vnode_isvroot(vp
)) {
1717 /* filesystem must not be mounted read only */
1718 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
1721 newsize
= *(u_int64_t
*)ap
->a_data
;
1722 cursize
= (u_int64_t
)hfsmp
->totalBlocks
* (u_int64_t
)hfsmp
->blockSize
;
1724 if (newsize
> cursize
) {
1725 return hfs_extendfs(hfsmp
, *(u_int64_t
*)ap
->a_data
, context
);
1726 } else if (newsize
< cursize
) {
1727 return hfs_truncatefs(hfsmp
, *(u_int64_t
*)ap
->a_data
, context
);
1732 case HFS_CHANGE_NEXT_ALLOCATION
: {
1733 int error
= 0; /* Assume success */
1736 if (vnode_vfsisrdonly(vp
)) {
1739 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1740 if (suser(cred
, NULL
) &&
1741 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1742 return (EACCES
); /* must be owner of file system */
1744 if (!vnode_isvroot(vp
)) {
1747 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
1748 location
= *(u_int32_t
*)ap
->a_data
;
1749 if ((location
>= hfsmp
->allocLimit
) &&
1750 (location
!= HFS_NO_UPDATE_NEXT_ALLOCATION
)) {
1752 goto fail_change_next_allocation
;
1754 /* Return previous value. */
1755 *(u_int32_t
*)ap
->a_data
= hfsmp
->nextAllocation
;
1756 if (location
== HFS_NO_UPDATE_NEXT_ALLOCATION
) {
1757 /* On magic value for location, set nextAllocation to next block
1758 * after metadata zone and set flag in mount structure to indicate
1759 * that nextAllocation should not be updated again.
1761 if (hfsmp
->hfs_metazone_end
!= 0) {
1762 HFS_UPDATE_NEXT_ALLOCATION(hfsmp
, hfsmp
->hfs_metazone_end
+ 1);
1764 hfsmp
->hfs_flags
|= HFS_SKIP_UPDATE_NEXT_ALLOCATION
;
1766 hfsmp
->hfs_flags
&= ~HFS_SKIP_UPDATE_NEXT_ALLOCATION
;
1767 HFS_UPDATE_NEXT_ALLOCATION(hfsmp
, location
);
1769 MarkVCBDirty(hfsmp
);
1770 fail_change_next_allocation
:
1771 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
1776 case HFS_SETBACKINGSTOREINFO
: {
1777 struct vnode
* bsfs_rootvp
;
1778 struct vnode
* di_vp
;
1779 struct hfs_backingstoreinfo
*bsdata
;
1782 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
1785 if (hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) {
1788 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1789 if (suser(cred
, NULL
) &&
1790 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1791 return (EACCES
); /* must be owner of file system */
1793 bsdata
= (struct hfs_backingstoreinfo
*)ap
->a_data
;
1794 if (bsdata
== NULL
) {
1797 if ((error
= file_vnode(bsdata
->backingfd
, &di_vp
))) {
1800 if ((error
= vnode_getwithref(di_vp
))) {
1801 file_drop(bsdata
->backingfd
);
1805 if (vnode_mount(vp
) == vnode_mount(di_vp
)) {
1806 (void)vnode_put(di_vp
);
1807 file_drop(bsdata
->backingfd
);
1812 * Obtain the backing fs root vnode and keep a reference
1813 * on it. This reference will be dropped in hfs_unmount.
1815 error
= VFS_ROOT(vnode_mount(di_vp
), &bsfs_rootvp
, NULL
); /* XXX use context! */
1817 (void)vnode_put(di_vp
);
1818 file_drop(bsdata
->backingfd
);
1821 vnode_ref(bsfs_rootvp
);
1822 vnode_put(bsfs_rootvp
);
1824 hfsmp
->hfs_backingfs_rootvp
= bsfs_rootvp
;
1826 hfsmp
->hfs_flags
|= HFS_HAS_SPARSE_DEVICE
;
1827 /* The free extent cache is managed differently for sparse devices.
1828 * There is a window between which the volume is mounted and the
1829 * device is marked as sparse, so the free extent cache for this
1830 * volume is currently initialized as normal volume (sorted by block
1831 * count). Reset the cache so that it will be rebuilt again
1832 * for sparse device (sorted by start block).
1834 ResetVCBFreeExtCache(hfsmp
);
1836 hfsmp
->hfs_sparsebandblks
= bsdata
->bandsize
/ HFSTOVCB(hfsmp
)->blockSize
;
1837 hfsmp
->hfs_sparsebandblks
*= 4;
1839 vfs_markdependency(hfsmp
->hfs_mp
);
1842 * If the sparse image is on a sparse image file (as opposed to a sparse
1843 * bundle), then we may need to limit the free space to the maximum size
1844 * of a file on that volume. So we query (using pathconf), and if we get
1845 * a meaningful result, we cache the number of blocks for later use in
1848 hfsmp
->hfs_backingfs_maxblocks
= 0;
1849 if (vnode_vtype(di_vp
) == VREG
) {
1852 terr
= vn_pathconf(di_vp
, _PC_FILESIZEBITS
, &hostbits
, context
);
1853 if (terr
== 0 && hostbits
!= 0 && hostbits
< 64) {
1854 u_int64_t hostfilesizemax
= ((u_int64_t
)1) << hostbits
;
1856 hfsmp
->hfs_backingfs_maxblocks
= hostfilesizemax
/ hfsmp
->blockSize
;
1860 (void)vnode_put(di_vp
);
1861 file_drop(bsdata
->backingfd
);
1864 case HFS_CLRBACKINGSTOREINFO
: {
1865 struct vnode
* tmpvp
;
1867 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1868 if (suser(cred
, NULL
) &&
1869 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1870 return (EACCES
); /* must be owner of file system */
1872 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
1876 if ((hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) &&
1877 hfsmp
->hfs_backingfs_rootvp
) {
1879 hfsmp
->hfs_flags
&= ~HFS_HAS_SPARSE_DEVICE
;
1880 tmpvp
= hfsmp
->hfs_backingfs_rootvp
;
1881 hfsmp
->hfs_backingfs_rootvp
= NULLVP
;
1882 hfsmp
->hfs_sparsebandblks
= 0;
1887 #endif /* HFS_SPARSE_DEV */
1889 /* Change the next CNID stored in the VH */
1890 case HFS_CHANGE_NEXTCNID
: {
1891 int error
= 0; /* Assume success */
1896 if (vnode_vfsisrdonly(vp
)) {
1899 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1900 if (suser(cred
, NULL
) &&
1901 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1902 return (EACCES
); /* must be owner of file system */
1905 fileid
= *(u_int32_t
*)ap
->a_data
;
1907 /* Must have catalog lock excl. to advance the CNID pointer */
1908 lockflags
= hfs_systemfile_lock (hfsmp
, SFL_CATALOG
, HFS_EXCLUSIVE_LOCK
);
1910 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
1912 /* If it is less than the current next CNID, force the wraparound bit to be set */
1913 if (fileid
< hfsmp
->vcbNxtCNID
) {
1917 /* Return previous value. */
1918 *(u_int32_t
*)ap
->a_data
= hfsmp
->vcbNxtCNID
;
1920 hfsmp
->vcbNxtCNID
= fileid
;
1923 hfsmp
->vcbAtrb
|= kHFSCatalogNodeIDsReusedMask
;
1926 MarkVCBDirty(hfsmp
);
1927 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
1928 hfs_systemfile_unlock (hfsmp
, lockflags
);
1936 mp
= vnode_mount(vp
);
1937 hfsmp
= VFSTOHFS(mp
);
1942 vfsp
= vfs_statfs(mp
);
1944 if (kauth_cred_getuid(cred
) != vfsp
->f_owner
&&
1945 !kauth_cred_issuser(cred
))
1948 lck_rw_lock_exclusive(&hfsmp
->hfs_insync
);
1950 // flush things before we get started to try and prevent
1951 // dirty data from being paged out while we're frozen.
1952 // note: can't do this after taking the lock as it will
1953 // deadlock against ourselves.
1954 vnode_iterate(mp
, 0, hfs_freezewrite_callback
, NULL
);
1955 hfs_lock_global (hfsmp
, HFS_EXCLUSIVE_LOCK
);
1957 // DO NOT call hfs_journal_flush() because that takes a
1958 // shared lock on the global exclusive lock!
1959 journal_flush(hfsmp
->jnl
, TRUE
);
1961 // don't need to iterate on all vnodes, we just need to
1962 // wait for writes to the system files and the device vnode
1964 // Now that journal flush waits for all metadata blocks to
1965 // be written out, waiting for btree writes is probably no
1967 if (HFSTOVCB(hfsmp
)->extentsRefNum
)
1968 vnode_waitforwrites(HFSTOVCB(hfsmp
)->extentsRefNum
, 0, 0, 0, "hfs freeze");
1969 if (HFSTOVCB(hfsmp
)->catalogRefNum
)
1970 vnode_waitforwrites(HFSTOVCB(hfsmp
)->catalogRefNum
, 0, 0, 0, "hfs freeze");
1971 if (HFSTOVCB(hfsmp
)->allocationsRefNum
)
1972 vnode_waitforwrites(HFSTOVCB(hfsmp
)->allocationsRefNum
, 0, 0, 0, "hfs freeze");
1973 if (hfsmp
->hfs_attribute_vp
)
1974 vnode_waitforwrites(hfsmp
->hfs_attribute_vp
, 0, 0, 0, "hfs freeze");
1975 vnode_waitforwrites(hfsmp
->hfs_devvp
, 0, 0, 0, "hfs freeze");
1977 hfsmp
->hfs_freezing_proc
= current_proc();
1983 vfsp
= vfs_statfs(vnode_mount(vp
));
1984 if (kauth_cred_getuid(cred
) != vfsp
->f_owner
&&
1985 !kauth_cred_issuser(cred
))
1988 // if we're not the one who froze the fs then we
1990 if (hfsmp
->hfs_freezing_proc
!= current_proc()) {
1994 // NOTE: if you add code here, also go check the
1995 // code that "thaws" the fs in hfs_vnop_close()
1997 hfsmp
->hfs_freezing_proc
= NULL
;
1998 hfs_unlock_global (hfsmp
);
1999 lck_rw_unlock_exclusive(&hfsmp
->hfs_insync
);
2004 case HFS_BULKACCESS_FSCTL
: {
2007 if (hfsmp
->hfs_flags
& HFS_STANDARD
) {
2012 size
= sizeof(struct user64_access_t
);
2014 size
= sizeof(struct user32_access_t
);
2017 return do_bulk_access_check(hfsmp
, vp
, ap
, size
, context
);
2020 case HFS_EXT_BULKACCESS_FSCTL
: {
2023 if (hfsmp
->hfs_flags
& HFS_STANDARD
) {
2028 size
= sizeof(struct user64_ext_access_t
);
2030 size
= sizeof(struct user32_ext_access_t
);
2033 return do_bulk_access_check(hfsmp
, vp
, ap
, size
, context
);
2036 case HFS_SET_XATTREXTENTS_STATE
: {
2039 if (ap
->a_data
== NULL
) {
2043 state
= *(int *)ap
->a_data
;
2045 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2049 /* Super-user can enable or disable extent-based extended
2050 * attribute support on a volume
2051 * Note: Starting Mac OS X 10.7, extent-based extended attributes
2052 * are enabled by default, so any change will be transient only
2053 * till the volume is remounted.
2058 if (state
== 0 || state
== 1)
2059 return hfs_set_volxattr(hfsmp
, HFS_SET_XATTREXTENTS_STATE
, state
);
2064 case F_SETSTATICCONTENT
: {
2066 int enable_static
= 0;
2067 struct cnode
*cp
= NULL
;
2069 * lock the cnode, decorate the cnode flag, and bail out.
2070 * VFS should have already authenticated the caller for us.
2075 * Note that even though ap->a_data is of type caddr_t,
2076 * the fcntl layer at the syscall handler will pass in NULL
2077 * or 1 depending on what the argument supplied to the fcntl
2078 * was. So it is in fact correct to check the ap->a_data
2079 * argument for zero or non-zero value when deciding whether or not
2080 * to enable the static bit in the cnode.
2084 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2089 error
= hfs_lock (cp
, HFS_EXCLUSIVE_LOCK
);
2091 if (enable_static
) {
2092 cp
->c_flag
|= C_SSD_STATIC
;
2095 cp
->c_flag
&= ~C_SSD_STATIC
;
2102 case F_SETBACKINGSTORE
: {
2107 * See comment in F_SETSTATICCONTENT re: using
2108 * a null check for a_data
2111 error
= hfs_set_backingstore (vp
, 1);
2114 error
= hfs_set_backingstore (vp
, 0);
2120 case F_GETPATH_MTMINFO
: {
2123 int *data
= (int*) ap
->a_data
;
2125 /* Ask if this is a backingstore vnode */
2126 error
= hfs_is_backingstore (vp
, data
);
2134 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2137 error
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
);
2139 error
= hfs_fsync(vp
, MNT_WAIT
, TRUE
, p
);
2140 hfs_unlock(VTOC(vp
));
2147 register struct cnode
*cp
;
2150 if (!vnode_isreg(vp
))
2153 error
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
);
2157 * used by regression test to determine if
2158 * all the dirty pages (via write) have been cleaned
2159 * after a call to 'fsysnc'.
2161 error
= is_file_clean(vp
, VTOF(vp
)->ff_size
);
2168 register struct radvisory
*ra
;
2169 struct filefork
*fp
;
2172 if (!vnode_isreg(vp
))
2175 ra
= (struct radvisory
*)(ap
->a_data
);
2178 /* Protect against a size change. */
2179 hfs_lock_truncate(VTOC(vp
), HFS_EXCLUSIVE_LOCK
);
2182 if (compressed
&& (uncompressed_size
== -1)) {
2183 /* fetching the uncompressed size failed above, so return the error */
2184 error
= decmpfs_error
;
2185 } else if ((compressed
&& (ra
->ra_offset
>= uncompressed_size
)) ||
2186 (!compressed
&& (ra
->ra_offset
>= fp
->ff_size
))) {
2189 #else /* HFS_COMPRESSION */
2190 if (ra
->ra_offset
>= fp
->ff_size
) {
2193 #endif /* HFS_COMPRESSION */
2195 error
= advisory_read(vp
, fp
->ff_size
, ra
->ra_offset
, ra
->ra_count
);
2198 hfs_unlock_truncate(VTOC(vp
), 0);
2202 case _IOC(IOC_OUT
,'h', 4, 0): /* Create date in local time */
2205 *(user_time_t
*)(ap
->a_data
) = (user_time_t
) (to_bsd_time(VTOVCB(vp
)->localCreateDate
));
2208 *(user32_time_t
*)(ap
->a_data
) = (user32_time_t
) (to_bsd_time(VTOVCB(vp
)->localCreateDate
));
2213 case SPOTLIGHT_FSCTL_GET_MOUNT_TIME
:
2214 *(uint32_t *)ap
->a_data
= hfsmp
->hfs_mount_time
;
2217 case SPOTLIGHT_FSCTL_GET_LAST_MTIME
:
2218 *(uint32_t *)ap
->a_data
= hfsmp
->hfs_last_mounted_mtime
;
2221 case HFS_FSCTL_GET_VERY_LOW_DISK
:
2222 *(uint32_t*)ap
->a_data
= hfsmp
->hfs_freespace_notify_dangerlimit
;
2225 case HFS_FSCTL_SET_VERY_LOW_DISK
:
2226 if (*(uint32_t *)ap
->a_data
>= hfsmp
->hfs_freespace_notify_warninglimit
) {
2230 hfsmp
->hfs_freespace_notify_dangerlimit
= *(uint32_t *)ap
->a_data
;
2233 case HFS_FSCTL_GET_LOW_DISK
:
2234 *(uint32_t*)ap
->a_data
= hfsmp
->hfs_freespace_notify_warninglimit
;
2237 case HFS_FSCTL_SET_LOW_DISK
:
2238 if ( *(uint32_t *)ap
->a_data
>= hfsmp
->hfs_freespace_notify_desiredlevel
2239 || *(uint32_t *)ap
->a_data
<= hfsmp
->hfs_freespace_notify_dangerlimit
) {
2244 hfsmp
->hfs_freespace_notify_warninglimit
= *(uint32_t *)ap
->a_data
;
2247 case HFS_FSCTL_GET_DESIRED_DISK
:
2248 *(uint32_t*)ap
->a_data
= hfsmp
->hfs_freespace_notify_desiredlevel
;
2251 case HFS_FSCTL_SET_DESIRED_DISK
:
2252 if (*(uint32_t *)ap
->a_data
<= hfsmp
->hfs_freespace_notify_warninglimit
) {
2256 hfsmp
->hfs_freespace_notify_desiredlevel
= *(uint32_t *)ap
->a_data
;
2259 case HFS_VOLUME_STATUS
:
2260 *(uint32_t *)ap
->a_data
= hfsmp
->hfs_notification_conditions
;
2263 case HFS_SET_BOOT_INFO
:
2264 if (!vnode_isvroot(vp
))
2266 if (!kauth_cred_issuser(cred
) && (kauth_cred_getuid(cred
) != vfs_statfs(HFSTOVFS(hfsmp
))->f_owner
))
2267 return(EACCES
); /* must be superuser or owner of filesystem */
2268 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2271 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
2272 bcopy(ap
->a_data
, &hfsmp
->vcbFndrInfo
, sizeof(hfsmp
->vcbFndrInfo
));
2273 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
2274 (void) hfs_flushvolumeheader(hfsmp
, MNT_WAIT
, 0);
2277 case HFS_GET_BOOT_INFO
:
2278 if (!vnode_isvroot(vp
))
2280 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
2281 bcopy(&hfsmp
->vcbFndrInfo
, ap
->a_data
, sizeof(hfsmp
->vcbFndrInfo
));
2282 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
2285 case HFS_MARK_BOOT_CORRUPT
:
2286 /* Mark the boot volume corrupt by setting
2287 * kHFSVolumeInconsistentBit in the volume header. This will
2288 * force fsck_hfs on next mount.
2294 /* Allowed only on the root vnode of the boot volume */
2295 if (!(vfs_flags(HFSTOVFS(hfsmp
)) & MNT_ROOTFS
) ||
2296 !vnode_isvroot(vp
)) {
2299 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2302 printf ("hfs_vnop_ioctl: Marking the boot volume corrupt.\n");
2303 hfs_mark_volume_inconsistent(hfsmp
);
2306 case HFS_FSCTL_GET_JOURNAL_INFO
:
2307 jip
= (struct hfs_journal_info
*)ap
->a_data
;
2312 if (hfsmp
->jnl
== NULL
) {
2316 jnl_start
= (off_t
)(hfsmp
->jnl_start
* HFSTOVCB(hfsmp
)->blockSize
) + (off_t
)HFSTOVCB(hfsmp
)->hfsPlusIOPosOffset
;
2317 jnl_size
= (off_t
)hfsmp
->jnl_size
;
2320 jip
->jstart
= jnl_start
;
2321 jip
->jsize
= jnl_size
;
2324 case HFS_SET_ALWAYS_ZEROFILL
: {
2325 struct cnode
*cp
= VTOC(vp
);
2327 if (*(int *)ap
->a_data
) {
2328 cp
->c_flag
|= C_ALWAYS_ZEROFILL
;
2330 cp
->c_flag
&= ~C_ALWAYS_ZEROFILL
;
2335 case HFS_DISABLE_METAZONE
: {
2336 /* Only root can disable metadata zone */
2340 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2344 /* Disable metadata zone now */
2345 (void) hfs_metadatazone_init(hfsmp
, true);
2346 printf ("hfs: Disabling metadata zone on %s\n", hfsmp
->vcbVN
);
2361 hfs_vnop_select(__unused
struct vnop_select_args
*ap
)
2363 struct vnop_select_args {
2368 vfs_context_t a_context;
2373 * We should really check to see if I/O is possible.
2379 * Converts a logical block number to a physical block, and optionally returns
2380 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
2381 * The physical block number is based on the device block size, currently its 512.
2382 * The block run is returned in logical blocks, and is the REMAINING amount of blocks
2385 hfs_bmap(struct vnode
*vp
, daddr_t bn
, struct vnode
**vpp
, daddr64_t
*bnp
, unsigned int *runp
)
2387 struct filefork
*fp
= VTOF(vp
);
2388 struct hfsmount
*hfsmp
= VTOHFS(vp
);
2389 int retval
= E_NONE
;
2390 u_int32_t logBlockSize
;
2391 size_t bytesContAvail
= 0;
2392 off_t blockposition
;
2397 * Check for underlying vnode requests and ensure that logical
2398 * to physical mapping is requested.
2401 *vpp
= hfsmp
->hfs_devvp
;
2405 logBlockSize
= GetLogicalBlockSize(vp
);
2406 blockposition
= (off_t
)bn
* logBlockSize
;
2408 lockExtBtree
= overflow_extents(fp
);
2411 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_EXTENTS
, HFS_EXCLUSIVE_LOCK
);
2413 retval
= MacToVFSError(
2414 MapFileBlockC (HFSTOVCB(hfsmp
),
2422 hfs_systemfile_unlock(hfsmp
, lockflags
);
2424 if (retval
== E_NONE
) {
2425 /* Figure out how many read ahead blocks there are */
2427 if (can_cluster(logBlockSize
)) {
2428 /* Make sure this result never goes negative: */
2429 *runp
= (bytesContAvail
< logBlockSize
) ? 0 : (bytesContAvail
/ logBlockSize
) - 1;
2439 * Convert logical block number to file offset.
2442 hfs_vnop_blktooff(struct vnop_blktooff_args
*ap
)
2444 struct vnop_blktooff_args {
2451 if (ap
->a_vp
== NULL
)
2453 *ap
->a_offset
= (off_t
)ap
->a_lblkno
* (off_t
)GetLogicalBlockSize(ap
->a_vp
);
2459 * Convert file offset to logical block number.
2462 hfs_vnop_offtoblk(struct vnop_offtoblk_args
*ap
)
2464 struct vnop_offtoblk_args {
2467 daddr64_t *a_lblkno;
2471 if (ap
->a_vp
== NULL
)
2473 *ap
->a_lblkno
= (daddr64_t
)(ap
->a_offset
/ (off_t
)GetLogicalBlockSize(ap
->a_vp
));
2479 * Map file offset to physical block number.
2481 * If this function is called for write operation, and if the file
2482 * had virtual blocks allocated (delayed allocation), real blocks
2483 * are allocated by calling ExtendFileC().
2485 * If this function is called for read operation, and if the file
2486 * had virtual blocks allocated (delayed allocation), no change
2487 * to the size of file is done, and if required, rangelist is
2488 * searched for mapping.
2490 * System file cnodes are expected to be locked (shared or exclusive).
2493 hfs_vnop_blockmap(struct vnop_blockmap_args
*ap
)
2495 struct vnop_blockmap_args {
2503 vfs_context_t a_context;
2507 struct vnode
*vp
= ap
->a_vp
;
2509 struct filefork
*fp
;
2510 struct hfsmount
*hfsmp
;
2511 size_t bytesContAvail
= 0;
2512 int retval
= E_NONE
;
2515 struct rl_entry
*invalid_range
;
2516 enum rl_overlaptype overlaptype
;
2521 if (VNODE_IS_RSRC(vp
)) {
2522 /* allow blockmaps to the resource fork */
2524 if ( hfs_file_is_compressed(VTOC(vp
), 1) ) { /* 1 == don't take the cnode lock */
2525 int state
= decmpfs_cnode_get_vnode_state(VTOCMP(vp
));
2527 case FILE_IS_COMPRESSED
:
2529 case FILE_IS_CONVERTING
:
2530 /* if FILE_IS_CONVERTING, we allow blockmap */
2533 printf("invalid state %d for compressed file\n", state
);
2538 #endif /* HFS_COMPRESSION */
2540 /* Do not allow blockmap operation on a directory */
2541 if (vnode_isdir(vp
)) {
2546 * Check for underlying vnode requests and ensure that logical
2547 * to physical mapping is requested.
2549 if (ap
->a_bpn
== NULL
)
2552 if ( !vnode_issystem(vp
) && !vnode_islnk(vp
) && !vnode_isswap(vp
)) {
2553 if (VTOC(vp
)->c_lockowner
!= current_thread()) {
2554 hfs_lock(VTOC(vp
), HFS_FORCE_LOCK
);
2563 /* Check virtual blocks only when performing write operation */
2564 if ((ap
->a_flags
& VNODE_WRITE
) && (fp
->ff_unallocblocks
!= 0)) {
2565 if (hfs_start_transaction(hfsmp
) != 0) {
2571 syslocks
= SFL_EXTENTS
| SFL_BITMAP
;
2573 } else if (overflow_extents(fp
)) {
2574 syslocks
= SFL_EXTENTS
;
2578 lockflags
= hfs_systemfile_lock(hfsmp
, syslocks
, HFS_EXCLUSIVE_LOCK
);
2581 * Check for any delayed allocations.
2583 if ((ap
->a_flags
& VNODE_WRITE
) && (fp
->ff_unallocblocks
!= 0)) {
2585 u_int32_t loanedBlocks
;
2588 // Make sure we have a transaction. It's possible
2589 // that we came in and fp->ff_unallocblocks was zero
2590 // but during the time we blocked acquiring the extents
2591 // btree, ff_unallocblocks became non-zero and so we
2592 // will need to start a transaction.
2594 if (started_tr
== 0) {
2596 hfs_systemfile_unlock(hfsmp
, lockflags
);
2603 * Note: ExtendFileC will Release any blocks on loan and
2604 * aquire real blocks. So we ask to extend by zero bytes
2605 * since ExtendFileC will account for the virtual blocks.
2608 loanedBlocks
= fp
->ff_unallocblocks
;
2609 retval
= ExtendFileC(hfsmp
, (FCB
*)fp
, 0, 0,
2610 kEFAllMask
| kEFNoClumpMask
, &actbytes
);
2613 fp
->ff_unallocblocks
= loanedBlocks
;
2614 cp
->c_blocks
+= loanedBlocks
;
2615 fp
->ff_blocks
+= loanedBlocks
;
2617 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
2618 hfsmp
->loanedBlocks
+= loanedBlocks
;
2619 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
2621 hfs_systemfile_unlock(hfsmp
, lockflags
);
2622 cp
->c_flag
|= C_MODIFIED
;
2624 (void) hfs_update(vp
, TRUE
);
2625 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2627 hfs_end_transaction(hfsmp
);
2634 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, ap
->a_size
, ap
->a_foffset
,
2635 ap
->a_bpn
, &bytesContAvail
);
2637 hfs_systemfile_unlock(hfsmp
, lockflags
);
2642 (void) hfs_update(vp
, TRUE
);
2643 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2644 hfs_end_transaction(hfsmp
);
2648 /* On write, always return error because virtual blocks, if any,
2649 * should have been allocated in ExtendFileC(). We do not
2650 * allocate virtual blocks on read, therefore return error
2651 * only if no virtual blocks are allocated. Otherwise we search
2652 * rangelist for zero-fills
2654 if ((MacToVFSError(retval
) != ERANGE
) ||
2655 (ap
->a_flags
& VNODE_WRITE
) ||
2656 ((ap
->a_flags
& VNODE_READ
) && (fp
->ff_unallocblocks
== 0))) {
2660 /* Validate if the start offset is within logical file size */
2661 if (ap
->a_foffset
>= fp
->ff_size
) {
2666 * At this point, we have encountered a failure during
2667 * MapFileBlockC that resulted in ERANGE, and we are not servicing
2668 * a write, and there are borrowed blocks.
2670 * However, the cluster layer will not call blockmap for
2671 * blocks that are borrowed and in-cache. We have to assume that
2672 * because we observed ERANGE being emitted from MapFileBlockC, this
2673 * extent range is not valid on-disk. So we treat this as a
2674 * mapping that needs to be zero-filled prior to reading.
2676 * Note that under certain circumstances (such as non-contiguous
2677 * userland VM mappings in the calling process), cluster_io
2678 * may be forced to split a large I/O driven by hfs_vnop_write
2679 * into multiple sub-I/Os that necessitate a RMW cycle. If this is
2680 * the case here, then we have already removed the invalid range list
2681 * mapping prior to getting to this blockmap call, so we should not
2682 * search the invalid rangelist for this byte range.
2685 bytesContAvail
= fp
->ff_size
- ap
->a_foffset
;
2687 * Clip the contiguous available bytes to, at most, the allowable
2688 * maximum or the amount requested.
2691 if (bytesContAvail
> ap
->a_size
) {
2692 bytesContAvail
= ap
->a_size
;
2695 *ap
->a_bpn
= (daddr64_t
) -1;
2701 /* MapFileC() found a valid extent in the filefork. Search the
2702 * mapping information further for invalid file ranges
2704 overlaptype
= rl_scan(&fp
->ff_invalidranges
, ap
->a_foffset
,
2705 ap
->a_foffset
+ (off_t
)bytesContAvail
- 1,
2707 if (overlaptype
!= RL_NOOVERLAP
) {
2708 switch(overlaptype
) {
2709 case RL_MATCHINGOVERLAP
:
2710 case RL_OVERLAPCONTAINSRANGE
:
2711 case RL_OVERLAPSTARTSBEFORE
:
2712 /* There's no valid block for this byte offset */
2713 *ap
->a_bpn
= (daddr64_t
)-1;
2714 /* There's no point limiting the amount to be returned
2715 * if the invalid range that was hit extends all the way
2716 * to the EOF (i.e. there's no valid bytes between the
2717 * end of this range and the file's EOF):
2719 if (((off_t
)fp
->ff_size
> (invalid_range
->rl_end
+ 1)) &&
2720 ((size_t)(invalid_range
->rl_end
+ 1 - ap
->a_foffset
) < bytesContAvail
)) {
2721 bytesContAvail
= invalid_range
->rl_end
+ 1 - ap
->a_foffset
;
2725 case RL_OVERLAPISCONTAINED
:
2726 case RL_OVERLAPENDSAFTER
:
2727 /* The range of interest hits an invalid block before the end: */
2728 if (invalid_range
->rl_start
== ap
->a_foffset
) {
2729 /* There's actually no valid information to be had starting here: */
2730 *ap
->a_bpn
= (daddr64_t
)-1;
2731 if (((off_t
)fp
->ff_size
> (invalid_range
->rl_end
+ 1)) &&
2732 ((size_t)(invalid_range
->rl_end
+ 1 - ap
->a_foffset
) < bytesContAvail
)) {
2733 bytesContAvail
= invalid_range
->rl_end
+ 1 - ap
->a_foffset
;
2736 bytesContAvail
= invalid_range
->rl_start
- ap
->a_foffset
;
2743 if (bytesContAvail
> ap
->a_size
)
2744 bytesContAvail
= ap
->a_size
;
2750 *ap
->a_run
= bytesContAvail
;
2753 *(int *)ap
->a_poff
= 0;
2759 return (MacToVFSError(retval
));
2763 * prepare and issue the I/O
2764 * buf_strategy knows how to deal
2765 * with requests that require
2769 hfs_vnop_strategy(struct vnop_strategy_args
*ap
)
2771 buf_t bp
= ap
->a_bp
;
2772 vnode_t vp
= buf_vnode(bp
);
2775 /* Mark buffer as containing static data if cnode flag set */
2776 if (VTOC(vp
)->c_flag
& C_SSD_STATIC
) {
2783 if ((cp
= cp_get_protected_cnode(vp
)) != NULL
) {
2785 * We rely upon the truncate lock to protect the
2786 * CP cache key from getting tossed prior to our IO finishing here.
2787 * Nearly all cluster io calls to manipulate file payload from HFS
2788 * take the truncate lock before calling into the cluster
2789 * layer to ensure the file size does not change, or that they
2790 * have exclusive right to change the EOF of the file.
2791 * That same guarantee protects us here since the code that
2792 * deals with CP lock events must now take the truncate lock
2793 * before doing anything.
2795 * There is 1 exception here:
2796 * 1) One exception should be the VM swapfile IO, because HFS will
2797 * funnel the VNOP_PAGEOUT directly into a cluster_pageout call for the
2798 * swapfile code only without holding the truncate lock. This is because
2799 * individual swapfiles are maintained at fixed-length sizes by the VM code.
2800 * In non-swapfile IO we use PAGEOUT_V2 semantics which allow us to
2801 * create our own UPL and thus take the truncate lock before calling
2802 * into the cluster layer. In that case, however, we are not concerned
2803 * with the CP blob being wiped out in the middle of the IO
2804 * because there isn't anything to toss; the VM swapfile key stays
2805 * in-core as long as the file is open.
2808 * For filesystem resize, we may not have access to the underlying
2809 * file's cache key for whatever reason (device may be locked). However,
2810 * we do not need it since we are going to use the temporary HFS-wide resize key
2811 * which is generated once we start relocating file content. If this file's I/O
2812 * should be done using the resize key, it will have been supplied already, so
2813 * do not attach the file's cp blob to the buffer.
2815 if ((cp
->c_cpentry
->cp_flags
& CP_RELOCATION_INFLIGHT
) == 0) {
2816 buf_setcpaddr(bp
, cp
->c_cpentry
);
2819 #endif /* CONFIG_PROTECT */
2821 error
= buf_strategy(VTOHFS(vp
)->hfs_devvp
, ap
);
2827 hfs_minorupdate(struct vnode
*vp
) {
2828 struct cnode
*cp
= VTOC(vp
);
2829 cp
->c_flag
&= ~C_MODIFIED
;
2830 cp
->c_touch_acctime
= 0;
2831 cp
->c_touch_chgtime
= 0;
2832 cp
->c_touch_modtime
= 0;
2838 do_hfs_truncate(struct vnode
*vp
, off_t length
, int flags
, int skipupdate
, vfs_context_t context
)
2840 register struct cnode
*cp
= VTOC(vp
);
2841 struct filefork
*fp
= VTOF(vp
);
2842 struct proc
*p
= vfs_context_proc(context
);;
2843 kauth_cred_t cred
= vfs_context_ucred(context
);
2846 off_t actualBytesAdded
;
2848 u_int32_t fileblocks
;
2850 struct hfsmount
*hfsmp
;
2853 blksize
= VTOVCB(vp
)->blockSize
;
2854 fileblocks
= fp
->ff_blocks
;
2855 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
2857 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_START
,
2858 (int)length
, (int)fp
->ff_size
, (int)filebytes
, 0, 0);
2863 /* This should only happen with a corrupt filesystem */
2864 if ((off_t
)fp
->ff_size
< 0)
2867 if ((!ISHFSPLUS(VTOVCB(vp
))) && (length
> (off_t
)MAXHFSFILESIZE
))
2874 /* Files that are changing size are not hot file candidates. */
2875 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
2876 fp
->ff_bytesread
= 0;
2880 * We cannot just check if fp->ff_size == length (as an optimization)
2881 * since there may be extra physical blocks that also need truncation.
2884 if ((retval
= hfs_getinoquota(cp
)))
2889 * Lengthen the size of the file. We must ensure that the
2890 * last byte of the file is allocated. Since the smallest
2891 * value of ff_size is 0, length will be at least 1.
2893 if (length
> (off_t
)fp
->ff_size
) {
2895 retval
= hfs_chkdq(cp
, (int64_t)(roundup(length
- filebytes
, blksize
)),
2901 * If we don't have enough physical space then
2902 * we need to extend the physical size.
2904 if (length
> filebytes
) {
2906 u_int32_t blockHint
= 0;
2908 /* All or nothing and don't round up to clumpsize. */
2909 eflags
= kEFAllMask
| kEFNoClumpMask
;
2911 if (cred
&& suser(cred
, NULL
) != 0)
2912 eflags
|= kEFReserveMask
; /* keep a reserve */
2915 * Allocate Journal and Quota files in metadata zone.
2917 if (filebytes
== 0 &&
2918 hfsmp
->hfs_flags
& HFS_METADATA_ZONE
&&
2919 hfs_virtualmetafile(cp
)) {
2920 eflags
|= kEFMetadataMask
;
2921 blockHint
= hfsmp
->hfs_metazone_start
;
2923 if (hfs_start_transaction(hfsmp
) != 0) {
2928 /* Protect extents b-tree and allocation bitmap */
2929 lockflags
= SFL_BITMAP
;
2930 if (overflow_extents(fp
))
2931 lockflags
|= SFL_EXTENTS
;
2932 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2934 while ((length
> filebytes
) && (retval
== E_NONE
)) {
2935 bytesToAdd
= length
- filebytes
;
2936 retval
= MacToVFSError(ExtendFileC(VTOVCB(vp
),
2941 &actualBytesAdded
));
2943 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)blksize
;
2944 if (actualBytesAdded
== 0 && retval
== E_NONE
) {
2945 if (length
> filebytes
)
2951 hfs_systemfile_unlock(hfsmp
, lockflags
);
2955 (void) hfs_minorupdate(vp
);
2958 (void) hfs_update(vp
, TRUE
);
2959 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2963 hfs_end_transaction(hfsmp
);
2968 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_NONE
,
2969 (int)length
, (int)fp
->ff_size
, (int)filebytes
, 0, 0);
2972 if (!(flags
& IO_NOZEROFILL
)) {
2973 if (UBCINFOEXISTS(vp
) && (vnode_issystem(vp
) == 0) && retval
== E_NONE
) {
2974 struct rl_entry
*invalid_range
;
2977 zero_limit
= (fp
->ff_size
+ (PAGE_SIZE_64
- 1)) & ~PAGE_MASK_64
;
2978 if (length
< zero_limit
) zero_limit
= length
;
2980 if (length
> (off_t
)fp
->ff_size
) {
2983 /* Extending the file: time to fill out the current last page w. zeroes? */
2984 if ((fp
->ff_size
& PAGE_MASK_64
) &&
2985 (rl_scan(&fp
->ff_invalidranges
, fp
->ff_size
& ~PAGE_MASK_64
,
2986 fp
->ff_size
- 1, &invalid_range
) == RL_NOOVERLAP
)) {
2988 /* There's some valid data at the start of the (current) last page
2989 of the file, so zero out the remainder of that page to ensure the
2990 entire page contains valid data. Since there is no invalid range
2991 possible past the (current) eof, there's no need to remove anything
2992 from the invalid range list before calling cluster_write(): */
2994 retval
= cluster_write(vp
, (struct uio
*) 0, fp
->ff_size
, zero_limit
,
2995 fp
->ff_size
, (off_t
)0,
2996 (flags
& IO_SYNC
) | IO_HEADZEROFILL
| IO_NOZERODIRTY
);
2997 hfs_lock(cp
, HFS_FORCE_LOCK
);
2998 if (retval
) goto Err_Exit
;
3000 /* Merely invalidate the remaining area, if necessary: */
3001 if (length
> zero_limit
) {
3003 rl_add(zero_limit
, length
- 1, &fp
->ff_invalidranges
);
3004 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
3007 /* The page containing the (current) eof is invalid: just add the
3008 remainder of the page to the invalid list, along with the area
3009 being newly allocated:
3012 rl_add(fp
->ff_size
, length
- 1, &fp
->ff_invalidranges
);
3013 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
3017 panic("hfs_truncate: invoked on non-UBC object?!");
3020 cp
->c_touch_modtime
= TRUE
;
3021 fp
->ff_size
= length
;
3023 } else { /* Shorten the size of the file */
3025 if ((off_t
)fp
->ff_size
> length
) {
3026 /* Any space previously marked as invalid is now irrelevant: */
3027 rl_remove(length
, fp
->ff_size
- 1, &fp
->ff_invalidranges
);
3031 * Account for any unmapped blocks. Note that the new
3032 * file length can still end up with unmapped blocks.
3034 if (fp
->ff_unallocblocks
> 0) {
3035 u_int32_t finalblks
;
3036 u_int32_t loanedBlocks
;
3038 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
3040 loanedBlocks
= fp
->ff_unallocblocks
;
3041 cp
->c_blocks
-= loanedBlocks
;
3042 fp
->ff_blocks
-= loanedBlocks
;
3043 fp
->ff_unallocblocks
= 0;
3045 hfsmp
->loanedBlocks
-= loanedBlocks
;
3047 finalblks
= (length
+ blksize
- 1) / blksize
;
3048 if (finalblks
> fp
->ff_blocks
) {
3049 /* calculate required unmapped blocks */
3050 loanedBlocks
= finalblks
- fp
->ff_blocks
;
3051 hfsmp
->loanedBlocks
+= loanedBlocks
;
3053 fp
->ff_unallocblocks
= loanedBlocks
;
3054 cp
->c_blocks
+= loanedBlocks
;
3055 fp
->ff_blocks
+= loanedBlocks
;
3057 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
3061 * For a TBE process the deallocation of the file blocks is
3062 * delayed until the file is closed. And hfs_close calls
3063 * truncate with the IO_NDELAY flag set. So when IO_NDELAY
3064 * isn't set, we make sure this isn't a TBE process.
3066 if ((flags
& IO_NDELAY
) || (proc_tbe(p
) == 0)) {
3068 off_t savedbytes
= ((off_t
)fp
->ff_blocks
* (off_t
)blksize
);
3070 if (hfs_start_transaction(hfsmp
) != 0) {
3075 if (fp
->ff_unallocblocks
== 0) {
3076 /* Protect extents b-tree and allocation bitmap */
3077 lockflags
= SFL_BITMAP
;
3078 if (overflow_extents(fp
))
3079 lockflags
|= SFL_EXTENTS
;
3080 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
3082 retval
= MacToVFSError(TruncateFileC(VTOVCB(vp
), (FCB
*)fp
, length
, 0,
3083 FORK_IS_RSRC (fp
), FTOC(fp
)->c_fileid
, false));
3085 hfs_systemfile_unlock(hfsmp
, lockflags
);
3089 fp
->ff_size
= length
;
3092 (void) hfs_minorupdate(vp
);
3095 (void) hfs_update(vp
, TRUE
);
3096 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
3099 hfs_end_transaction(hfsmp
);
3101 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)blksize
;
3105 /* These are bytesreleased */
3106 (void) hfs_chkdq(cp
, (int64_t)-(savedbytes
- filebytes
), NOCRED
, 0);
3109 /* Only set update flag if the logical length changes */
3110 if ((off_t
)fp
->ff_size
!= length
)
3111 cp
->c_touch_modtime
= TRUE
;
3112 fp
->ff_size
= length
;
3114 if (cp
->c_mode
& (S_ISUID
| S_ISGID
)) {
3115 if (!vfs_context_issuser(context
)) {
3116 cp
->c_mode
&= ~(S_ISUID
| S_ISGID
);
3121 retval
= hfs_minorupdate(vp
);
3124 cp
->c_touch_chgtime
= TRUE
; /* status changed */
3125 cp
->c_touch_modtime
= TRUE
; /* file data was modified */
3126 retval
= hfs_update(vp
, MNT_WAIT
);
3129 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_NONE
,
3130 -1, -1, -1, retval
, 0);
3135 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_END
,
3136 (int)length
, (int)fp
->ff_size
, (int)filebytes
, retval
, 0);
3142 * Preparation which must be done prior to deleting the catalog record
3143 * of a file or directory. In order to make the on-disk as safe as possible,
3144 * we remove the catalog entry before releasing the bitmap blocks and the
3145 * overflow extent records. However, some work must be done prior to deleting
3146 * the catalog record.
3148 * When calling this function, the cnode must exist both in memory and on-disk.
3149 * If there are both resource fork and data fork vnodes, this function should
3150 * be called on both.
3154 hfs_prepare_release_storage (struct hfsmount
*hfsmp
, struct vnode
*vp
) {
3156 struct filefork
*fp
= VTOF(vp
);
3157 struct cnode
*cp
= VTOC(vp
);
3162 /* Cannot truncate an HFS directory! */
3163 if (vnode_isdir(vp
)) {
3168 * See the comment below in hfs_truncate for why we need to call
3169 * setsize here. Essentially we want to avoid pending IO if we
3170 * already know that the blocks are going to be released here.
3171 * This function is only called when totally removing all storage for a file, so
3172 * we can take a shortcut and immediately setsize (0);
3176 /* This should only happen with a corrupt filesystem */
3177 if ((off_t
)fp
->ff_size
< 0)
3181 * We cannot just check if fp->ff_size == length (as an optimization)
3182 * since there may be extra physical blocks that also need truncation.
3185 if ((retval
= hfs_getinoquota(cp
))) {
3190 /* Wipe out any invalid ranges which have yet to be backed by disk */
3191 rl_remove(0, fp
->ff_size
- 1, &fp
->ff_invalidranges
);
3194 * Account for any unmapped blocks. Since we're deleting the
3195 * entire file, we don't have to worry about just shrinking
3196 * to a smaller number of borrowed blocks.
3198 if (fp
->ff_unallocblocks
> 0) {
3199 u_int32_t loanedBlocks
;
3201 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
3203 loanedBlocks
= fp
->ff_unallocblocks
;
3204 cp
->c_blocks
-= loanedBlocks
;
3205 fp
->ff_blocks
-= loanedBlocks
;
3206 fp
->ff_unallocblocks
= 0;
3208 hfsmp
->loanedBlocks
-= loanedBlocks
;
3210 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
3218 * Special wrapper around calling TruncateFileC. This function is useable
3219 * even when the catalog record does not exist any longer, making it ideal
3220 * for use when deleting a file. The simplification here is that we know
3221 * that we are releasing all blocks.
3223 * Note that this function may be called when there is no vnode backing
3224 * the file fork in question. We may call this from hfs_vnop_inactive
3225 * to clear out resource fork data (and may not want to clear out the data
3226 * fork yet). As a result, we pointer-check both sets of inputs before
3227 * doing anything with them.
3229 * The caller is responsible for saving off a copy of the filefork(s)
3230 * embedded within the cnode prior to calling this function. The pointers
3231 * supplied as arguments must be valid even if the cnode is no longer valid.
3235 hfs_release_storage (struct hfsmount
*hfsmp
, struct filefork
*datafork
,
3236 struct filefork
*rsrcfork
, u_int32_t fileid
) {
3239 u_int32_t fileblocks
;
3244 blksize
= hfsmp
->blockSize
;
3247 if ((datafork
!= NULL
) && (datafork
->ff_blocks
> 0)) {
3248 fileblocks
= datafork
->ff_blocks
;
3249 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
3251 /* We killed invalid ranges and loaned blocks before we removed the catalog entry */
3253 while (filebytes
> 0) {
3254 if (filebytes
> HFS_BIGFILE_SIZE
&& overflow_extents(datafork
)) {
3255 filebytes
-= HFS_BIGFILE_SIZE
;
3260 /* Start a transaction, and wipe out as many blocks as we can in this iteration */
3261 if (hfs_start_transaction(hfsmp
) != 0) {
3266 if (datafork
->ff_unallocblocks
== 0) {
3267 /* Protect extents b-tree and allocation bitmap */
3268 lockflags
= SFL_BITMAP
;
3269 if (overflow_extents(datafork
))
3270 lockflags
|= SFL_EXTENTS
;
3271 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
3273 error
= MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp
), datafork
, filebytes
, 1, 0, fileid
, false));
3275 hfs_systemfile_unlock(hfsmp
, lockflags
);
3278 datafork
->ff_size
= filebytes
;
3280 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
3282 /* Finish the transaction and start over if necessary */
3283 hfs_end_transaction(hfsmp
);
3292 if (error
== 0 && (rsrcfork
!= NULL
) && rsrcfork
->ff_blocks
> 0) {
3293 fileblocks
= rsrcfork
->ff_blocks
;
3294 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
3296 /* We killed invalid ranges and loaned blocks before we removed the catalog entry */
3298 while (filebytes
> 0) {
3299 if (filebytes
> HFS_BIGFILE_SIZE
&& overflow_extents(rsrcfork
)) {
3300 filebytes
-= HFS_BIGFILE_SIZE
;
3305 /* Start a transaction, and wipe out as many blocks as we can in this iteration */
3306 if (hfs_start_transaction(hfsmp
) != 0) {
3311 if (rsrcfork
->ff_unallocblocks
== 0) {
3312 /* Protect extents b-tree and allocation bitmap */
3313 lockflags
= SFL_BITMAP
;
3314 if (overflow_extents(rsrcfork
))
3315 lockflags
|= SFL_EXTENTS
;
3316 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
3318 error
= MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp
), rsrcfork
, filebytes
, 1, 1, fileid
, false));
3320 hfs_systemfile_unlock(hfsmp
, lockflags
);
3323 rsrcfork
->ff_size
= filebytes
;
3325 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
3327 /* Finish the transaction and start over if necessary */
3328 hfs_end_transaction(hfsmp
);
3341 * Truncate a cnode to at most length size, freeing (or adding) the
3345 hfs_truncate(struct vnode
*vp
, off_t length
, int flags
, int skipsetsize
,
3346 int skipupdate
, vfs_context_t context
)
3348 struct filefork
*fp
= VTOF(vp
);
3350 u_int32_t fileblocks
;
3351 int blksize
, error
= 0;
3352 struct cnode
*cp
= VTOC(vp
);
3354 /* Cannot truncate an HFS directory! */
3355 if (vnode_isdir(vp
)) {
3358 /* A swap file cannot change size. */
3359 if (vnode_isswap(vp
) && (length
!= 0)) {
3363 blksize
= VTOVCB(vp
)->blockSize
;
3364 fileblocks
= fp
->ff_blocks
;
3365 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
3368 // Have to do this here so that we don't wind up with
3369 // i/o pending for blocks that are about to be released
3370 // if we truncate the file.
3372 // If skipsetsize is set, then the caller is responsible
3373 // for the ubc_setsize.
3375 // Even if skipsetsize is set, if the length is zero we
3376 // want to call ubc_setsize() because as of SnowLeopard
3377 // it will no longer cause any page-ins and it will drop
3378 // any dirty pages so that we don't do any i/o that we
3379 // don't have to. This also prevents a race where i/o
3380 // for truncated blocks may overwrite later data if the
3381 // blocks get reallocated to a different file.
3383 if (!skipsetsize
|| length
== 0)
3384 ubc_setsize(vp
, length
);
3386 // have to loop truncating or growing files that are
3387 // really big because otherwise transactions can get
3388 // enormous and consume too many kernel resources.
3390 if (length
< filebytes
) {
3391 while (filebytes
> length
) {
3392 if ((filebytes
- length
) > HFS_BIGFILE_SIZE
&& overflow_extents(fp
)) {
3393 filebytes
-= HFS_BIGFILE_SIZE
;
3397 cp
->c_flag
|= C_FORCEUPDATE
;
3398 error
= do_hfs_truncate(vp
, filebytes
, flags
, skipupdate
, context
);
3402 } else if (length
> filebytes
) {
3403 while (filebytes
< length
) {
3404 if ((length
- filebytes
) > HFS_BIGFILE_SIZE
&& overflow_extents(fp
)) {
3405 filebytes
+= HFS_BIGFILE_SIZE
;
3409 cp
->c_flag
|= C_FORCEUPDATE
;
3410 error
= do_hfs_truncate(vp
, filebytes
, flags
, skipupdate
, context
);
3414 } else /* Same logical size */ {
3416 error
= do_hfs_truncate(vp
, length
, flags
, skipupdate
, context
);
3418 /* Files that are changing size are not hot file candidates. */
3419 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
) {
3420 fp
->ff_bytesread
= 0;
3429 * Preallocate file storage space.
3432 hfs_vnop_allocate(struct vnop_allocate_args
/* {
3436 off_t *a_bytesallocated;
3438 vfs_context_t a_context;
3441 struct vnode
*vp
= ap
->a_vp
;
3443 struct filefork
*fp
;
3445 off_t length
= ap
->a_length
;
3447 off_t moreBytesRequested
;
3448 off_t actualBytesAdded
;
3450 u_int32_t fileblocks
;
3451 int retval
, retval2
;
3452 u_int32_t blockHint
;
3453 u_int32_t extendFlags
; /* For call to ExtendFileC */
3454 struct hfsmount
*hfsmp
;
3455 kauth_cred_t cred
= vfs_context_ucred(ap
->a_context
);
3459 *(ap
->a_bytesallocated
) = 0;
3461 if (!vnode_isreg(vp
))
3463 if (length
< (off_t
)0)
3468 orig_ctime
= VTOC(vp
)->c_ctime
;
3470 check_for_tracked_file(vp
, orig_ctime
, ap
->a_length
== 0 ? NAMESPACE_HANDLER_TRUNCATE_OP
|NAMESPACE_HANDLER_DELETE_OP
: NAMESPACE_HANDLER_TRUNCATE_OP
, NULL
);
3472 hfs_lock_truncate(cp
, HFS_EXCLUSIVE_LOCK
);
3474 if ((retval
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
))) {
3482 fileblocks
= fp
->ff_blocks
;
3483 filebytes
= (off_t
)fileblocks
* (off_t
)vcb
->blockSize
;
3485 if ((ap
->a_flags
& ALLOCATEFROMVOL
) && (length
< filebytes
)) {
3490 /* Fill in the flags word for the call to Extend the file */
3492 extendFlags
= kEFNoClumpMask
;
3493 if (ap
->a_flags
& ALLOCATECONTIG
)
3494 extendFlags
|= kEFContigMask
;
3495 if (ap
->a_flags
& ALLOCATEALL
)
3496 extendFlags
|= kEFAllMask
;
3497 if (cred
&& suser(cred
, NULL
) != 0)
3498 extendFlags
|= kEFReserveMask
;
3499 if (hfs_virtualmetafile(cp
))
3500 extendFlags
|= kEFMetadataMask
;
3504 startingPEOF
= filebytes
;
3506 if (ap
->a_flags
& ALLOCATEFROMPEOF
)
3507 length
+= filebytes
;
3508 else if (ap
->a_flags
& ALLOCATEFROMVOL
)
3509 blockHint
= ap
->a_offset
/ VTOVCB(vp
)->blockSize
;
3511 /* If no changes are necesary, then we're done */
3512 if (filebytes
== length
)
3516 * Lengthen the size of the file. We must ensure that the
3517 * last byte of the file is allocated. Since the smallest
3518 * value of filebytes is 0, length will be at least 1.
3520 if (length
> filebytes
) {
3521 off_t total_bytes_added
= 0, orig_request_size
;
3523 orig_request_size
= moreBytesRequested
= length
- filebytes
;
3526 retval
= hfs_chkdq(cp
,
3527 (int64_t)(roundup(moreBytesRequested
, vcb
->blockSize
)),
3534 * Metadata zone checks.
3536 if (hfsmp
->hfs_flags
& HFS_METADATA_ZONE
) {
3538 * Allocate Journal and Quota files in metadata zone.
3540 if (hfs_virtualmetafile(cp
)) {
3541 blockHint
= hfsmp
->hfs_metazone_start
;
3542 } else if ((blockHint
>= hfsmp
->hfs_metazone_start
) &&
3543 (blockHint
<= hfsmp
->hfs_metazone_end
)) {
3545 * Move blockHint outside metadata zone.
3547 blockHint
= hfsmp
->hfs_metazone_end
+ 1;
3552 while ((length
> filebytes
) && (retval
== E_NONE
)) {
3553 off_t bytesRequested
;
3555 if (hfs_start_transaction(hfsmp
) != 0) {
3560 /* Protect extents b-tree and allocation bitmap */
3561 lockflags
= SFL_BITMAP
;
3562 if (overflow_extents(fp
))
3563 lockflags
|= SFL_EXTENTS
;
3564 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
3566 if (moreBytesRequested
>= HFS_BIGFILE_SIZE
) {
3567 bytesRequested
= HFS_BIGFILE_SIZE
;
3569 bytesRequested
= moreBytesRequested
;
3572 if (extendFlags
& kEFContigMask
) {
3573 // if we're on a sparse device, this will force it to do a
3574 // full scan to find the space needed.
3575 hfsmp
->hfs_flags
&= ~HFS_DID_CONTIG_SCAN
;
3578 retval
= MacToVFSError(ExtendFileC(vcb
,
3583 &actualBytesAdded
));
3585 if (retval
== E_NONE
) {
3586 *(ap
->a_bytesallocated
) += actualBytesAdded
;
3587 total_bytes_added
+= actualBytesAdded
;
3588 moreBytesRequested
-= actualBytesAdded
;
3589 if (blockHint
!= 0) {
3590 blockHint
+= actualBytesAdded
/ vcb
->blockSize
;
3593 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)vcb
->blockSize
;
3595 hfs_systemfile_unlock(hfsmp
, lockflags
);
3598 (void) hfs_update(vp
, TRUE
);
3599 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
3602 hfs_end_transaction(hfsmp
);
3607 * if we get an error and no changes were made then exit
3608 * otherwise we must do the hfs_update to reflect the changes
3610 if (retval
&& (startingPEOF
== filebytes
))
3614 * Adjust actualBytesAdded to be allocation block aligned, not
3615 * clump size aligned.
3616 * NOTE: So what we are reporting does not affect reality
3617 * until the file is closed, when we truncate the file to allocation
3620 if (total_bytes_added
!= 0 && orig_request_size
< total_bytes_added
)
3621 *(ap
->a_bytesallocated
) =
3622 roundup(orig_request_size
, (off_t
)vcb
->blockSize
);
3624 } else { /* Shorten the size of the file */
3626 if (fp
->ff_size
> length
) {
3628 * Any buffers that are past the truncation point need to be
3629 * invalidated (to maintain buffer cache consistency).
3633 retval
= hfs_truncate(vp
, length
, 0, 0, 0, ap
->a_context
);
3634 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)vcb
->blockSize
;
3637 * if we get an error and no changes were made then exit
3638 * otherwise we must do the hfs_update to reflect the changes
3640 if (retval
&& (startingPEOF
== filebytes
)) goto Err_Exit
;
3642 /* These are bytesreleased */
3643 (void) hfs_chkdq(cp
, (int64_t)-((startingPEOF
- filebytes
)), NOCRED
,0);
3646 if (fp
->ff_size
> filebytes
) {
3647 fp
->ff_size
= filebytes
;
3650 ubc_setsize(vp
, fp
->ff_size
);
3651 hfs_lock(cp
, HFS_FORCE_LOCK
);
3656 cp
->c_touch_chgtime
= TRUE
;
3657 cp
->c_touch_modtime
= TRUE
;
3658 retval2
= hfs_update(vp
, MNT_WAIT
);
3663 hfs_unlock_truncate(cp
, 0);
3670 * Pagein for HFS filesystem
3673 hfs_vnop_pagein(struct vnop_pagein_args
*ap
)
3675 struct vnop_pagein_args {
3678 vm_offset_t a_pl_offset,
3682 vfs_context_t a_context;
3688 struct filefork
*fp
;
3691 upl_page_info_t
*pl
;
3696 boolean_t truncate_lock_held
= FALSE
;
3697 boolean_t file_converted
= FALSE
;
3705 if ((error
= cp_handle_vnop(vp
, CP_READ_ACCESS
| CP_WRITE_ACCESS
, 0)) != 0) {
3708 #endif /* CONFIG_PROTECT */
3710 if (ap
->a_pl
!= NULL
) {
3712 * this can only happen for swap files now that
3713 * we're asking for V2 paging behavior...
3714 * so don't need to worry about decompression, or
3715 * keeping track of blocks read or taking the truncate lock
3717 error
= cluster_pagein(vp
, ap
->a_pl
, ap
->a_pl_offset
, ap
->a_f_offset
,
3718 ap
->a_size
, (off_t
)fp
->ff_size
, ap
->a_flags
);
3724 * take truncate lock (shared/recursive) to guard against
3725 * zero-fill thru fsync interfering, but only for v2
3727 * the HFS_RECURSE_TRUNCLOCK arg indicates that we want the
3728 * lock shared and we are allowed to recurse 1 level if this thread already
3729 * owns the lock exclusively... this can legally occur
3730 * if we are doing a shrinking ftruncate against a file
3731 * that is mapped private, and the pages being truncated
3732 * do not currently exist in the cache... in that case
3733 * we will have to page-in the missing pages in order
3734 * to provide them to the private mapping... we must
3735 * also call hfs_unlock_truncate with a postive been_recursed
3736 * arg to indicate that if we have recursed, there is no need to drop
3737 * the lock. Allowing this simple recursion is necessary
3738 * in order to avoid a certain deadlock... since the ftruncate
3739 * already holds the truncate lock exclusively, if we try
3740 * to acquire it shared to protect the pagein path, we will
3743 * NOTE: The if () block below is a workaround in order to prevent a
3744 * VM deadlock. See rdar://7853471.
3746 * If we are in a forced unmount, then launchd will still have the
3747 * dyld_shared_cache file mapped as it is trying to reboot. If we
3748 * take the truncate lock here to service a page fault, then our
3749 * thread could deadlock with the forced-unmount. The forced unmount
3750 * thread will try to reclaim the dyld_shared_cache vnode, but since it's
3751 * marked C_DELETED, it will call ubc_setsize(0). As a result, the unmount
3752 * thread will think it needs to copy all of the data out of the file
3753 * and into a VM copy object. If we hold the cnode lock here, then that
3754 * VM operation will not be able to proceed, because we'll set a busy page
3755 * before attempting to grab the lock. Note that this isn't as simple as "don't
3756 * call ubc_setsize" because doing that would just shift the problem to the
3757 * ubc_msync done before the vnode is reclaimed.
3759 * So, if a forced unmount on this volume is in flight AND the cnode is
3760 * marked C_DELETED, then just go ahead and do the page in without taking
3761 * the lock (thus suspending pagein_v2 semantics temporarily). Since it's on a file
3762 * that is not going to be available on the next mount, this seems like a
3763 * OK solution from a correctness point of view, even though it is hacky.
3765 if (vfs_isforce(vp
->v_mount
)) {
3766 if (cp
->c_flag
& C_DELETED
) {
3767 /* If we don't get it, then just go ahead and operate without the lock */
3768 truncate_lock_held
= hfs_try_trunclock(cp
, HFS_RECURSE_TRUNCLOCK
);
3772 hfs_lock_truncate(cp
, HFS_RECURSE_TRUNCLOCK
);
3773 truncate_lock_held
= TRUE
;
3776 kret
= ubc_create_upl(vp
, ap
->a_f_offset
, ap
->a_size
, &upl
, &pl
, UPL_UBC_PAGEIN
| UPL_RET_ONLY_ABSENT
);
3778 if ((kret
!= KERN_SUCCESS
) || (upl
== (upl_t
) NULL
)) {
3782 ubc_upl_range_needed(upl
, ap
->a_pl_offset
/ PAGE_SIZE
, 1);
3787 * Scan from the back to find the last page in the UPL, so that we
3788 * aren't looking at a UPL that may have already been freed by the
3789 * preceding aborts/completions.
3791 for (pg_index
= ((isize
) / PAGE_SIZE
); pg_index
> 0;) {
3792 if (upl_page_present(pl
, --pg_index
))
3794 if (pg_index
== 0) {
3796 * no absent pages were found in the range specified
3797 * just abort the UPL to get rid of it and then we're done
3799 ubc_upl_abort_range(upl
, 0, isize
, UPL_ABORT_FREE_ON_EMPTY
);
3804 * initialize the offset variables before we touch the UPL.
3805 * f_offset is the position into the file, in bytes
3806 * offset is the position into the UPL, in bytes
3807 * pg_index is the pg# of the UPL we're operating on
3808 * isize is the offset into the UPL of the last page that is present.
3810 isize
= ((pg_index
+ 1) * PAGE_SIZE
);
3813 f_offset
= ap
->a_f_offset
;
3819 if ( !upl_page_present(pl
, pg_index
)) {
3821 * we asked for RET_ONLY_ABSENT, so it's possible
3822 * to get back empty slots in the UPL.
3823 * just skip over them
3825 f_offset
+= PAGE_SIZE
;
3826 offset
+= PAGE_SIZE
;
3833 * We know that we have at least one absent page.
3834 * Now checking to see how many in a row we have
3837 xsize
= isize
- PAGE_SIZE
;
3840 if ( !upl_page_present(pl
, pg_index
+ num_of_pages
))
3845 xsize
= num_of_pages
* PAGE_SIZE
;
3848 if (VNODE_IS_RSRC(vp
)) {
3849 /* allow pageins of the resource fork */
3851 int compressed
= hfs_file_is_compressed(VTOC(vp
), 1); /* 1 == don't take the cnode lock */
3854 if (truncate_lock_held
) {
3856 * can't hold the truncate lock when calling into the decmpfs layer
3857 * since it calls back into this layer... even though we're only
3858 * holding the lock in shared mode, and the re-entrant path only
3859 * takes the lock shared, we can deadlock if some other thread
3860 * tries to grab the lock exclusively in between.
3862 hfs_unlock_truncate(cp
, 1);
3863 truncate_lock_held
= FALSE
;
3866 ap
->a_pl_offset
= offset
;
3867 ap
->a_f_offset
= f_offset
;
3870 error
= decmpfs_pagein_compressed(ap
, &compressed
, VTOCMP(vp
));
3872 * note that decpfs_pagein_compressed can change the state of
3873 * 'compressed'... it will set it to 0 if the file is no longer
3874 * compressed once the compression lock is successfully taken
3875 * i.e. we would block on that lock while the file is being inflated
3879 /* successful page-in, update the access time */
3880 VTOC(vp
)->c_touch_acctime
= TRUE
;
3882 /* compressed files are not hot file candidates */
3883 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
) {
3884 fp
->ff_bytesread
= 0;
3886 } else if (error
== EAGAIN
) {
3888 * EAGAIN indicates someone else already holds the compression lock...
3889 * to avoid deadlocking, we'll abort this range of pages with an
3890 * indication that the pagein needs to be redriven
3892 ubc_upl_abort_range(upl
, (upl_offset_t
) offset
, xsize
, UPL_ABORT_FREE_ON_EMPTY
| UPL_ABORT_RESTART
);
3894 goto pagein_next_range
;
3898 * Set file_converted only if the file became decompressed while we were
3899 * paging in. If it were still compressed, we would re-start the loop using the goto
3900 * in the above block. This avoid us overloading truncate_lock_held as our retry_pagein
3901 * condition below, since we could have avoided taking the truncate lock to prevent
3902 * a deadlock in the force unmount case.
3904 file_converted
= TRUE
;
3907 if (file_converted
== TRUE
) {
3909 * the file was converted back to a regular file after we first saw it as compressed
3910 * we need to abort the upl, retake the truncate lock, recreate the UPL and start over
3911 * reset a_size so that we consider what remains of the original request
3912 * and null out a_upl and a_pl_offset.
3914 * We should only be able to get into this block if the decmpfs_pagein_compressed
3915 * successfully decompressed the range in question for this file.
3917 ubc_upl_abort_range(upl
, (upl_offset_t
) offset
, isize
, UPL_ABORT_FREE_ON_EMPTY
);
3921 ap
->a_pl_offset
= 0;
3923 /* Reset file_converted back to false so that we don't infinite-loop. */
3924 file_converted
= FALSE
;
3929 error
= cluster_pagein(vp
, upl
, offset
, f_offset
, xsize
, (off_t
)fp
->ff_size
, ap
->a_flags
);
3932 * Keep track of blocks read.
3934 if ( !vnode_isswap(vp
) && VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
&& error
== 0) {
3936 int took_cnode_lock
= 0;
3938 if (ap
->a_f_offset
== 0 && fp
->ff_size
< PAGE_SIZE
)
3939 bytesread
= fp
->ff_size
;
3943 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
3944 if ((fp
->ff_bytesread
+ bytesread
) > 0x00000000ffffffff && cp
->c_lockowner
!= current_thread()) {
3945 hfs_lock(cp
, HFS_FORCE_LOCK
);
3946 took_cnode_lock
= 1;
3949 * If this file hasn't been seen since the start of
3950 * the current sampling period then start over.
3952 if (cp
->c_atime
< VTOHFS(vp
)->hfc_timebase
) {
3955 fp
->ff_bytesread
= bytesread
;
3957 cp
->c_atime
= tv
.tv_sec
;
3959 fp
->ff_bytesread
+= bytesread
;
3961 cp
->c_touch_acctime
= TRUE
;
3962 if (took_cnode_lock
)
3969 pg_index
+= num_of_pages
;
3975 if (truncate_lock_held
== TRUE
) {
3976 /* Note 1 is passed to hfs_unlock_truncate in been_recursed argument */
3977 hfs_unlock_truncate(cp
, 1);
3984 * Pageout for HFS filesystem.
3987 hfs_vnop_pageout(struct vnop_pageout_args
*ap
)
3989 struct vnop_pageout_args {
3992 vm_offset_t a_pl_offset,
3996 vfs_context_t a_context;
4000 vnode_t vp
= ap
->a_vp
;
4002 struct filefork
*fp
;
4006 upl_page_info_t
* pl
;
4007 vm_offset_t a_pl_offset
;
4009 int is_pageoutv2
= 0;
4016 * Figure out where the file ends, for pageout purposes. If
4017 * ff_new_size > ff_size, then we're in the middle of extending the
4018 * file via a write, so it is safe (and necessary) that we be able
4019 * to pageout up to that point.
4021 filesize
= fp
->ff_size
;
4022 if (fp
->ff_new_size
> filesize
)
4023 filesize
= fp
->ff_new_size
;
4025 a_flags
= ap
->a_flags
;
4026 a_pl_offset
= ap
->a_pl_offset
;
4029 * we can tell if we're getting the new or old behavior from the UPL
4031 if ((upl
= ap
->a_pl
) == NULL
) {
4036 * we're in control of any UPL we commit
4037 * make sure someone hasn't accidentally passed in UPL_NOCOMMIT
4039 a_flags
&= ~UPL_NOCOMMIT
;
4043 * For V2 semantics, we want to take the cnode truncate lock
4044 * shared to guard against the file size changing via zero-filling.
4046 * However, we have to be careful because we may be invoked
4047 * via the ubc_msync path to write out dirty mmap'd pages
4048 * in response to a lock event on a content-protected
4049 * filesystem (e.g. to write out class A files).
4050 * As a result, we want to take the truncate lock 'SHARED' with
4051 * the mini-recursion locktype so that we don't deadlock/panic
4052 * because we may be already holding the truncate lock exclusive to force any other
4053 * IOs to have blocked behind us.
4055 hfs_lock_truncate(cp
, HFS_RECURSE_TRUNCLOCK
);
4057 if (a_flags
& UPL_MSYNC
) {
4058 request_flags
= UPL_UBC_MSYNC
| UPL_RET_ONLY_DIRTY
;
4061 request_flags
= UPL_UBC_PAGEOUT
| UPL_RET_ONLY_DIRTY
;
4064 kret
= ubc_create_upl(vp
, ap
->a_f_offset
, ap
->a_size
, &upl
, &pl
, request_flags
);
4066 if ((kret
!= KERN_SUCCESS
) || (upl
== (upl_t
) NULL
)) {
4072 * from this point forward upl points at the UPL we're working with
4073 * it was either passed in or we succesfully created it
4077 * Now that HFS is opting into VFC_VFSVNOP_PAGEOUTV2, we may need to operate on our own
4078 * UPL instead of relying on the UPL passed into us. We go ahead and do that here,
4079 * scanning for dirty ranges. We'll issue our own N cluster_pageout calls, for
4080 * N dirty ranges in the UPL. Note that this is almost a direct copy of the
4081 * logic in vnode_pageout except that we need to do it after grabbing the truncate
4082 * lock in HFS so that we don't lock invert ourselves.
4084 * Note that we can still get into this function on behalf of the default pager with
4085 * non-V2 behavior (swapfiles). However in that case, we did not grab locks above
4086 * since fsync and other writing threads will grab the locks, then mark the
4087 * relevant pages as busy. But the pageout codepath marks the pages as busy,
4088 * and THEN would attempt to grab the truncate lock, which would result in deadlock. So
4089 * we do not try to grab anything for the pre-V2 case, which should only be accessed
4090 * by the paging/VM system.
4102 f_offset
= ap
->a_f_offset
;
4105 * Scan from the back to find the last page in the UPL, so that we
4106 * aren't looking at a UPL that may have already been freed by the
4107 * preceding aborts/completions.
4109 for (pg_index
= ((isize
) / PAGE_SIZE
); pg_index
> 0;) {
4110 if (upl_page_present(pl
, --pg_index
))
4112 if (pg_index
== 0) {
4113 ubc_upl_abort_range(upl
, 0, isize
, UPL_ABORT_FREE_ON_EMPTY
);
4119 * initialize the offset variables before we touch the UPL.
4120 * a_f_offset is the position into the file, in bytes
4121 * offset is the position into the UPL, in bytes
4122 * pg_index is the pg# of the UPL we're operating on.
4123 * isize is the offset into the UPL of the last non-clean page.
4125 isize
= ((pg_index
+ 1) * PAGE_SIZE
);
4134 if ( !upl_page_present(pl
, pg_index
)) {
4136 * we asked for RET_ONLY_DIRTY, so it's possible
4137 * to get back empty slots in the UPL.
4138 * just skip over them
4140 f_offset
+= PAGE_SIZE
;
4141 offset
+= PAGE_SIZE
;
4147 if ( !upl_dirty_page(pl
, pg_index
)) {
4148 panic ("hfs_vnop_pageout: unforeseen clean page @ index %d for UPL %p\n", pg_index
, upl
);
4152 * We know that we have at least one dirty page.
4153 * Now checking to see how many in a row we have
4156 xsize
= isize
- PAGE_SIZE
;
4159 if ( !upl_dirty_page(pl
, pg_index
+ num_of_pages
))
4164 xsize
= num_of_pages
* PAGE_SIZE
;
4166 if (!vnode_isswap(vp
)) {
4172 if (cp
->c_lockowner
!= current_thread()) {
4173 if ((retval
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
))) {
4175 * we're in the v2 path, so we are the
4176 * owner of the UPL... we may have already
4177 * processed some of the UPL, so abort it
4178 * from the current working offset to the
4181 ubc_upl_abort_range(upl
,
4183 ap
->a_size
- offset
,
4184 UPL_ABORT_FREE_ON_EMPTY
);
4189 end_of_range
= f_offset
+ xsize
- 1;
4191 if (end_of_range
>= filesize
) {
4192 end_of_range
= (off_t
)(filesize
- 1);
4194 if (f_offset
< filesize
) {
4195 rl_remove(f_offset
, end_of_range
, &fp
->ff_invalidranges
);
4196 cp
->c_flag
|= C_MODIFIED
; /* leof is dirty */
4202 if ((error
= cluster_pageout(vp
, upl
, offset
, f_offset
,
4203 xsize
, filesize
, a_flags
))) {
4210 pg_index
+= num_of_pages
;
4212 /* capture errnos bubbled out of cluster_pageout if they occurred */
4213 if (error_ret
!= 0) {
4216 } /* end block for v2 pageout behavior */
4218 if (!vnode_isswap(vp
)) {
4222 if (cp
->c_lockowner
!= current_thread()) {
4223 if ((retval
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
))) {
4224 if (!(a_flags
& UPL_NOCOMMIT
)) {
4225 ubc_upl_abort_range(upl
,
4228 UPL_ABORT_FREE_ON_EMPTY
);
4234 end_of_range
= ap
->a_f_offset
+ ap
->a_size
- 1;
4236 if (end_of_range
>= filesize
) {
4237 end_of_range
= (off_t
)(filesize
- 1);
4239 if (ap
->a_f_offset
< filesize
) {
4240 rl_remove(ap
->a_f_offset
, end_of_range
, &fp
->ff_invalidranges
);
4241 cp
->c_flag
|= C_MODIFIED
; /* leof is dirty */
4249 * just call cluster_pageout for old pre-v2 behavior
4251 retval
= cluster_pageout(vp
, upl
, a_pl_offset
, ap
->a_f_offset
,
4252 ap
->a_size
, filesize
, a_flags
);
4256 * If data was written, update the modification time of the file.
4257 * If setuid or setgid bits are set and this process is not the
4258 * superuser then clear the setuid and setgid bits as a precaution
4259 * against tampering.
4262 cp
->c_touch_modtime
= TRUE
;
4263 cp
->c_touch_chgtime
= TRUE
;
4264 if ((cp
->c_mode
& (S_ISUID
| S_ISGID
)) &&
4265 (vfs_context_suser(ap
->a_context
) != 0)) {
4266 hfs_lock(cp
, HFS_FORCE_LOCK
);
4267 cp
->c_mode
&= ~(S_ISUID
| S_ISGID
);
4275 * Release the truncate lock. Note that because
4276 * we may have taken the lock recursively by
4277 * being invoked via ubc_msync due to lockdown,
4278 * we should release it recursively, too.
4280 hfs_unlock_truncate(cp
, 1);
4286 * Intercept B-Tree node writes to unswap them if necessary.
4289 hfs_vnop_bwrite(struct vnop_bwrite_args
*ap
)
4292 register struct buf
*bp
= ap
->a_bp
;
4293 register struct vnode
*vp
= buf_vnode(bp
);
4294 BlockDescriptor block
;
4296 /* Trap B-Tree writes */
4297 if ((VTOC(vp
)->c_fileid
== kHFSExtentsFileID
) ||
4298 (VTOC(vp
)->c_fileid
== kHFSCatalogFileID
) ||
4299 (VTOC(vp
)->c_fileid
== kHFSAttributesFileID
) ||
4300 (vp
== VTOHFS(vp
)->hfc_filevp
)) {
4303 * Swap and validate the node if it is in native byte order.
4304 * This is always be true on big endian, so we always validate
4305 * before writing here. On little endian, the node typically has
4306 * been swapped and validated when it was written to the journal,
4307 * so we won't do anything here.
4309 if (((u_int16_t
*)((char *)buf_dataptr(bp
) + buf_count(bp
) - 2))[0] == 0x000e) {
4310 /* Prepare the block pointer */
4311 block
.blockHeader
= bp
;
4312 block
.buffer
= (char *)buf_dataptr(bp
);
4313 block
.blockNum
= buf_lblkno(bp
);
4314 /* not found in cache ==> came from disk */
4315 block
.blockReadFromDisk
= (buf_fromcache(bp
) == 0);
4316 block
.blockSize
= buf_count(bp
);
4318 /* Endian un-swap B-Tree node */
4319 retval
= hfs_swap_BTNode (&block
, vp
, kSwapBTNodeHostToBig
, false);
4321 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
4325 /* This buffer shouldn't be locked anymore but if it is clear it */
4326 if ((buf_flags(bp
) & B_LOCKED
)) {
4328 if (VTOHFS(vp
)->jnl
) {
4329 panic("hfs: CLEARING the lock bit on bp %p\n", bp
);
4331 buf_clearflags(bp
, B_LOCKED
);
4333 retval
= vn_bwrite (ap
);
4339 * Relocate a file to a new location on disk
4340 * cnode must be locked on entry
4342 * Relocation occurs by cloning the file's data from its
4343 * current set of blocks to a new set of blocks. During
4344 * the relocation all of the blocks (old and new) are
4345 * owned by the file.
4352 * ----------------- -----------------
4353 * |///////////////| | | STEP 1 (acquire new blocks)
4354 * ----------------- -----------------
4357 * ----------------- -----------------
4358 * |///////////////| |///////////////| STEP 2 (clone data)
4359 * ----------------- -----------------
4363 * |///////////////| STEP 3 (head truncate blocks)
4367 * During steps 2 and 3 page-outs to file offsets less
4368 * than or equal to N are suspended.
4370 * During step 3 page-ins to the file get suspended.
4373 hfs_relocate(struct vnode
*vp
, u_int32_t blockHint
, kauth_cred_t cred
,
4377 struct filefork
*fp
;
4378 struct hfsmount
*hfsmp
;
4383 u_int32_t nextallocsave
;
4384 daddr64_t sector_a
, sector_b
;
4389 int took_trunc_lock
= 0;
4391 enum vtype vnodetype
;
4393 vnodetype
= vnode_vtype(vp
);
4394 if (vnodetype
!= VREG
&& vnodetype
!= VLNK
) {
4399 if (hfsmp
->hfs_flags
& HFS_FRAGMENTED_FREESPACE
) {
4405 if (fp
->ff_unallocblocks
)
4410 * <rdar://problem/9118426>
4411 * Disable HFS file relocation on content-protected filesystems
4413 if (cp_fs_protected (hfsmp
->hfs_mp
)) {
4417 /* If it's an SSD, also disable HFS relocation */
4418 if (hfsmp
->hfs_flags
& HFS_SSD
) {
4423 blksize
= hfsmp
->blockSize
;
4425 blockHint
= hfsmp
->nextAllocation
;
4427 if ((fp
->ff_size
> 0x7fffffff) ||
4428 ((fp
->ff_size
> blksize
) && vnodetype
== VLNK
)) {
4433 // We do not believe that this call to hfs_fsync() is
4434 // necessary and it causes a journal transaction
4435 // deadlock so we are removing it.
4437 //if (vnodetype == VREG && !vnode_issystem(vp)) {
4438 // retval = hfs_fsync(vp, MNT_WAIT, 0, p);
4443 if (!vnode_issystem(vp
) && (vnodetype
!= VLNK
)) {
4445 hfs_lock_truncate(cp
, HFS_EXCLUSIVE_LOCK
);
4446 /* Force lock since callers expects lock to be held. */
4447 if ((retval
= hfs_lock(cp
, HFS_FORCE_LOCK
))) {
4448 hfs_unlock_truncate(cp
, 0);
4451 /* No need to continue if file was removed. */
4452 if (cp
->c_flag
& C_NOEXISTS
) {
4453 hfs_unlock_truncate(cp
, 0);
4456 took_trunc_lock
= 1;
4458 headblks
= fp
->ff_blocks
;
4459 datablks
= howmany(fp
->ff_size
, blksize
);
4460 growsize
= datablks
* blksize
;
4461 eflags
= kEFContigMask
| kEFAllMask
| kEFNoClumpMask
;
4462 if (blockHint
>= hfsmp
->hfs_metazone_start
&&
4463 blockHint
<= hfsmp
->hfs_metazone_end
)
4464 eflags
|= kEFMetadataMask
;
4466 if (hfs_start_transaction(hfsmp
) != 0) {
4467 if (took_trunc_lock
)
4468 hfs_unlock_truncate(cp
, 0);
4473 * Protect the extents b-tree and the allocation bitmap
4474 * during MapFileBlockC and ExtendFileC operations.
4476 lockflags
= SFL_BITMAP
;
4477 if (overflow_extents(fp
))
4478 lockflags
|= SFL_EXTENTS
;
4479 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
4481 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, 1, growsize
- 1, §or_a
, NULL
);
4483 retval
= MacToVFSError(retval
);
4488 * STEP 1 - acquire new allocation blocks.
4490 nextallocsave
= hfsmp
->nextAllocation
;
4491 retval
= ExtendFileC(hfsmp
, (FCB
*)fp
, growsize
, blockHint
, eflags
, &newbytes
);
4492 if (eflags
& kEFMetadataMask
) {
4493 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
4494 HFS_UPDATE_NEXT_ALLOCATION(hfsmp
, nextallocsave
);
4495 MarkVCBDirty(hfsmp
);
4496 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
4499 retval
= MacToVFSError(retval
);
4501 cp
->c_flag
|= C_MODIFIED
;
4502 if (newbytes
< growsize
) {
4505 } else if (fp
->ff_blocks
< (headblks
+ datablks
)) {
4506 printf("hfs_relocate: allocation failed");
4511 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, 1, growsize
, §or_b
, NULL
);
4513 retval
= MacToVFSError(retval
);
4514 } else if ((sector_a
+ 1) == sector_b
) {
4517 } else if ((eflags
& kEFMetadataMask
) &&
4518 ((((u_int64_t
)sector_b
* hfsmp
->hfs_logical_block_size
) / blksize
) >
4519 hfsmp
->hfs_metazone_end
)) {
4521 const char * filestr
;
4522 char emptystr
= '\0';
4524 if (cp
->c_desc
.cd_nameptr
!= NULL
) {
4525 filestr
= (const char *)&cp
->c_desc
.cd_nameptr
[0];
4526 } else if (vnode_name(vp
) != NULL
) {
4527 filestr
= vnode_name(vp
);
4529 filestr
= &emptystr
;
4536 /* Done with system locks and journal for now. */
4537 hfs_systemfile_unlock(hfsmp
, lockflags
);
4539 hfs_end_transaction(hfsmp
);
4544 * Check to see if failure is due to excessive fragmentation.
4546 if ((retval
== ENOSPC
) &&
4547 (hfs_freeblks(hfsmp
, 0) > (datablks
* 2))) {
4548 hfsmp
->hfs_flags
|= HFS_FRAGMENTED_FREESPACE
;
4553 * STEP 2 - clone file data into the new allocation blocks.
4556 if (vnodetype
== VLNK
)
4557 retval
= hfs_clonelink(vp
, blksize
, cred
, p
);
4558 else if (vnode_issystem(vp
))
4559 retval
= hfs_clonesysfile(vp
, headblks
, datablks
, blksize
, cred
, p
);
4561 retval
= hfs_clonefile(vp
, headblks
, datablks
, blksize
);
4563 /* Start transaction for step 3 or for a restore. */
4564 if (hfs_start_transaction(hfsmp
) != 0) {
4573 * STEP 3 - switch to cloned data and remove old blocks.
4575 lockflags
= SFL_BITMAP
;
4576 if (overflow_extents(fp
))
4577 lockflags
|= SFL_EXTENTS
;
4578 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
4580 retval
= HeadTruncateFile(hfsmp
, (FCB
*)fp
, headblks
);
4582 hfs_systemfile_unlock(hfsmp
, lockflags
);
4587 if (took_trunc_lock
)
4588 hfs_unlock_truncate(cp
, 0);
4591 hfs_systemfile_unlock(hfsmp
, lockflags
);
4595 /* Push cnode's new extent data to disk. */
4597 (void) hfs_update(vp
, MNT_WAIT
);
4600 if (cp
->c_cnid
< kHFSFirstUserCatalogNodeID
)
4601 (void) hfs_flushvolumeheader(hfsmp
, MNT_WAIT
, HFS_ALTFLUSH
);
4603 (void) hfs_flushvolumeheader(hfsmp
, MNT_NOWAIT
, 0);
4607 hfs_end_transaction(hfsmp
);
4612 if (fp
->ff_blocks
== headblks
) {
4613 if (took_trunc_lock
)
4614 hfs_unlock_truncate(cp
, 0);
4618 * Give back any newly allocated space.
4620 if (lockflags
== 0) {
4621 lockflags
= SFL_BITMAP
;
4622 if (overflow_extents(fp
))
4623 lockflags
|= SFL_EXTENTS
;
4624 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
4627 (void) TruncateFileC(hfsmp
, (FCB
*)fp
, fp
->ff_size
, 0, FORK_IS_RSRC(fp
),
4628 FTOC(fp
)->c_fileid
, false);
4630 hfs_systemfile_unlock(hfsmp
, lockflags
);
4633 if (took_trunc_lock
)
4634 hfs_unlock_truncate(cp
, 0);
4644 hfs_clonelink(struct vnode
*vp
, int blksize
, kauth_cred_t cred
, __unused
struct proc
*p
)
4646 struct buf
*head_bp
= NULL
;
4647 struct buf
*tail_bp
= NULL
;
4651 error
= (int)buf_meta_bread(vp
, (daddr64_t
)0, blksize
, cred
, &head_bp
);
4655 tail_bp
= buf_getblk(vp
, (daddr64_t
)1, blksize
, 0, 0, BLK_META
);
4656 if (tail_bp
== NULL
) {
4660 bcopy((char *)buf_dataptr(head_bp
), (char *)buf_dataptr(tail_bp
), blksize
);
4661 error
= (int)buf_bwrite(tail_bp
);
4664 buf_markinvalid(head_bp
);
4665 buf_brelse(head_bp
);
4667 (void) buf_invalidateblks(vp
, BUF_WRITE_DATA
, 0, 0);
4673 * Clone a file's data within the file.
4677 hfs_clonefile(struct vnode
*vp
, int blkstart
, int blkcnt
, int blksize
)
4688 writebase
= blkstart
* blksize
;
4689 copysize
= blkcnt
* blksize
;
4690 iosize
= bufsize
= MIN(copysize
, 128 * 1024);
4693 hfs_unlock(VTOC(vp
));
4696 if ((error
= cp_handle_vnop(vp
, CP_WRITE_ACCESS
, 0)) != 0) {
4697 hfs_lock(VTOC(vp
), HFS_FORCE_LOCK
);
4700 #endif /* CONFIG_PROTECT */
4702 if (kmem_alloc(kernel_map
, (vm_offset_t
*)&bufp
, bufsize
)) {
4703 hfs_lock(VTOC(vp
), HFS_FORCE_LOCK
);
4707 auio
= uio_create(1, 0, UIO_SYSSPACE
, UIO_READ
);
4709 while (offset
< copysize
) {
4710 iosize
= MIN(copysize
- offset
, iosize
);
4712 uio_reset(auio
, offset
, UIO_SYSSPACE
, UIO_READ
);
4713 uio_addiov(auio
, (uintptr_t)bufp
, iosize
);
4715 error
= cluster_read(vp
, auio
, copysize
, IO_NOCACHE
);
4717 printf("hfs_clonefile: cluster_read failed - %d\n", error
);
4720 if (uio_resid(auio
) != 0) {
4721 printf("hfs_clonefile: cluster_read: uio_resid = %lld\n", (int64_t)uio_resid(auio
));
4726 uio_reset(auio
, writebase
+ offset
, UIO_SYSSPACE
, UIO_WRITE
);
4727 uio_addiov(auio
, (uintptr_t)bufp
, iosize
);
4729 error
= cluster_write(vp
, auio
, writebase
+ offset
,
4730 writebase
+ offset
+ iosize
,
4731 uio_offset(auio
), 0, IO_NOCACHE
| IO_SYNC
);
4733 printf("hfs_clonefile: cluster_write failed - %d\n", error
);
4736 if (uio_resid(auio
) != 0) {
4737 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
4745 if ((blksize
& PAGE_MASK
)) {
4747 * since the copy may not have started on a PAGE
4748 * boundary (or may not have ended on one), we
4749 * may have pages left in the cache since NOCACHE
4750 * will let partially written pages linger...
4751 * lets just flush the entire range to make sure
4752 * we don't have any pages left that are beyond
4753 * (or intersect) the real LEOF of this file
4755 ubc_msync(vp
, writebase
, writebase
+ offset
, NULL
, UBC_INVALIDATE
| UBC_PUSHDIRTY
);
4758 * No need to call ubc_sync_range or hfs_invalbuf
4759 * since the file was copied using IO_NOCACHE and
4760 * the copy was done starting and ending on a page
4761 * boundary in the file.
4764 kmem_free(kernel_map
, (vm_offset_t
)bufp
, bufsize
);
4766 hfs_lock(VTOC(vp
), HFS_FORCE_LOCK
);
4771 * Clone a system (metadata) file.
4775 hfs_clonesysfile(struct vnode
*vp
, int blkstart
, int blkcnt
, int blksize
,
4776 kauth_cred_t cred
, struct proc
*p
)
4782 struct buf
*bp
= NULL
;
4785 daddr64_t start_blk
;
4792 iosize
= GetLogicalBlockSize(vp
);
4793 bufsize
= MIN(blkcnt
* blksize
, 1024 * 1024) & ~(iosize
- 1);
4794 breadcnt
= bufsize
/ iosize
;
4796 if (kmem_alloc(kernel_map
, (vm_offset_t
*)&bufp
, bufsize
)) {
4799 start_blk
= ((daddr64_t
)blkstart
* blksize
) / iosize
;
4800 last_blk
= ((daddr64_t
)blkcnt
* blksize
) / iosize
;
4803 while (blkno
< last_blk
) {
4805 * Read up to a megabyte
4808 for (i
= 0, blk
= blkno
; (i
< breadcnt
) && (blk
< last_blk
); ++i
, ++blk
) {
4809 error
= (int)buf_meta_bread(vp
, blk
, iosize
, cred
, &bp
);
4811 printf("hfs_clonesysfile: meta_bread error %d\n", error
);
4814 if (buf_count(bp
) != iosize
) {
4815 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp
));
4818 bcopy((char *)buf_dataptr(bp
), offset
, iosize
);
4820 buf_markinvalid(bp
);
4828 * Write up to a megabyte
4831 for (i
= 0; (i
< breadcnt
) && (blkno
< last_blk
); ++i
, ++blkno
) {
4832 bp
= buf_getblk(vp
, start_blk
+ blkno
, iosize
, 0, 0, BLK_META
);
4834 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk
+ blkno
);
4838 bcopy(offset
, (char *)buf_dataptr(bp
), iosize
);
4839 error
= (int)buf_bwrite(bp
);
4851 kmem_free(kernel_map
, (vm_offset_t
)bufp
, bufsize
);
4853 error
= hfs_fsync(vp
, MNT_WAIT
, 0, p
);