2 * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* @(#)hfs_readwrite.c 1.0
30 * (c) 1998-2001 Apple Computer, Inc. All Rights Reserved
32 * hfs_readwrite.c -- vnode operations to deal with reading and writing files.
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/resourcevar.h>
39 #include <sys/kernel.h>
40 #include <sys/fcntl.h>
41 #include <sys/filedesc.h>
45 #include <sys/kauth.h>
46 #include <sys/vnode.h>
47 #include <sys/vnode_internal.h>
49 #include <sys/vfs_context.h>
50 #include <sys/fsevents.h>
51 #include <kern/kalloc.h>
53 #include <sys/sysctl.h>
54 #include <sys/fsctl.h>
56 #include <miscfs/specfs/specdev.h>
59 #include <sys/ubc_internal.h>
61 #include <vm/vm_pageout.h>
62 #include <vm/vm_kern.h>
64 #include <sys/kdebug.h>
67 #include "hfs_attrlist.h"
68 #include "hfs_endian.h"
69 #include "hfs_fsctl.h"
70 #include "hfs_quota.h"
71 #include "hfscommon/headers/FileMgrInternal.h"
72 #include "hfscommon/headers/BTreesInternal.h"
73 #include "hfs_cnode.h"
76 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
79 MAXHFSFILESIZE
= 0x7FFFFFFF /* this needs to go in the mount structure */
82 /* from bsd/hfs/hfs_vfsops.c */
83 extern int hfs_vfs_vget (struct mount
*mp
, ino64_t ino
, struct vnode
**vpp
, vfs_context_t context
);
85 static int hfs_clonelink(struct vnode
*, int, kauth_cred_t
, struct proc
*);
86 static int hfs_clonefile(struct vnode
*, int, int, int);
87 static int hfs_clonesysfile(struct vnode
*, int, int, int, kauth_cred_t
, struct proc
*);
88 static int hfs_minorupdate(struct vnode
*vp
);
89 static int do_hfs_truncate(struct vnode
*vp
, off_t length
, int flags
, int skip
, vfs_context_t context
);
92 int flush_cache_on_write
= 0;
93 SYSCTL_INT (_kern
, OID_AUTO
, flush_cache_on_write
, CTLFLAG_RW
| CTLFLAG_LOCKED
, &flush_cache_on_write
, 0, "always flush the drive cache on writes to uncached files");
96 * Read data from a file.
99 hfs_vnop_read(struct vnop_read_args
*ap
)
101 uio_t uio
= ap
->a_uio
;
102 struct vnode
*vp
= ap
->a_vp
;
105 struct hfsmount
*hfsmp
;
108 off_t start_resid
= uio_resid(uio
);
109 off_t offset
= uio_offset(uio
);
111 int took_truncate_lock
= 0;
113 /* Preflight checks */
114 if (!vnode_isreg(vp
)) {
115 /* can only read regular files */
121 if (start_resid
== 0)
122 return (0); /* Nothing left to do */
124 return (EINVAL
); /* cant read from a negative offset */
127 if (VNODE_IS_RSRC(vp
)) {
128 if (hfs_hides_rsrc(ap
->a_context
, VTOC(vp
), 1)) { /* 1 == don't take the cnode lock */
131 /* otherwise read the resource fork normally */
133 int compressed
= hfs_file_is_compressed(VTOC(vp
), 1); /* 1 == don't take the cnode lock */
135 retval
= decmpfs_read_compressed(ap
, &compressed
, VTOCMP(vp
));
138 /* successful read, update the access time */
139 VTOC(vp
)->c_touch_acctime
= TRUE
;
141 /* compressed files are not hot file candidates */
142 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
) {
143 VTOF(vp
)->ff_bytesread
= 0;
148 /* otherwise the file was converted back to a regular file while we were reading it */
150 } else if ((VTOC(vp
)->c_flags
& UF_COMPRESSED
)) {
153 error
= check_for_dataless_file(vp
, NAMESPACE_HANDLER_READ_OP
);
160 #endif /* HFS_COMPRESSION */
167 if ((retval
= cp_handle_vnop (cp
, CP_READ_ACCESS
)) != 0) {
172 /* Protect against a size change. */
173 hfs_lock_truncate(cp
, HFS_SHARED_LOCK
);
174 took_truncate_lock
= 1;
176 filesize
= fp
->ff_size
;
177 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
178 if (offset
> filesize
) {
179 if ((hfsmp
->hfs_flags
& HFS_STANDARD
) &&
180 (offset
> (off_t
)MAXHFSFILESIZE
)) {
186 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 12)) | DBG_FUNC_START
,
187 (int)uio_offset(uio
), uio_resid(uio
), (int)filesize
, (int)filebytes
, 0);
189 retval
= cluster_read(vp
, uio
, filesize
, ap
->a_ioflag
);
191 cp
->c_touch_acctime
= TRUE
;
193 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 12)) | DBG_FUNC_END
,
194 (int)uio_offset(uio
), uio_resid(uio
), (int)filesize
, (int)filebytes
, 0);
197 * Keep track blocks read
199 if (hfsmp
->hfc_stage
== HFC_RECORDING
&& retval
== 0) {
200 int took_cnode_lock
= 0;
203 bytesread
= start_resid
- uio_resid(uio
);
205 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
206 if ((fp
->ff_bytesread
+ bytesread
) > 0x00000000ffffffff) {
207 hfs_lock(cp
, HFS_FORCE_LOCK
);
211 * If this file hasn't been seen since the start of
212 * the current sampling period then start over.
214 if (cp
->c_atime
< hfsmp
->hfc_timebase
) {
217 fp
->ff_bytesread
= bytesread
;
219 cp
->c_atime
= tv
.tv_sec
;
221 fp
->ff_bytesread
+= bytesread
;
227 if (took_truncate_lock
) {
228 hfs_unlock_truncate(cp
, 0);
235 * Write data to a file.
238 hfs_vnop_write(struct vnop_write_args
*ap
)
240 uio_t uio
= ap
->a_uio
;
241 struct vnode
*vp
= ap
->a_vp
;
244 struct hfsmount
*hfsmp
;
245 kauth_cred_t cred
= NULL
;
248 off_t bytesToAdd
= 0;
249 off_t actualBytesAdded
;
254 int ioflag
= ap
->a_ioflag
;
257 int cnode_locked
= 0;
258 int partialwrite
= 0;
260 time_t orig_ctime
=VTOC(vp
)->c_ctime
;
261 int took_truncate_lock
= 0;
264 if ( hfs_file_is_compressed(VTOC(vp
), 1) ) { /* 1 == don't take the cnode lock */
265 int state
= decmpfs_cnode_get_vnode_state(VTOCMP(vp
));
267 case FILE_IS_COMPRESSED
:
269 case FILE_IS_CONVERTING
:
270 /* if FILE_IS_CONVERTING, we allow writes but do not
271 bother with snapshots or else we will deadlock.
276 printf("invalid state %d for compressed file\n", state
);
279 } else if ((VTOC(vp
)->c_flags
& UF_COMPRESSED
)) {
282 error
= check_for_dataless_file(vp
, NAMESPACE_HANDLER_WRITE_OP
);
289 check_for_tracked_file(vp
, orig_ctime
, NAMESPACE_HANDLER_WRITE_OP
, uio
);
294 // LP64todo - fix this! uio_resid may be 64-bit value
295 resid
= uio_resid(uio
);
296 offset
= uio_offset(uio
);
302 if (!vnode_isreg(vp
))
303 return (EPERM
); /* Can only write regular files */
310 if ((retval
= cp_handle_vnop (cp
, CP_WRITE_ACCESS
)) != 0) {
315 eflags
= kEFDeferMask
; /* defer file block allocations */
318 * When the underlying device is sparse and space
319 * is low (< 8MB), stop doing delayed allocations
320 * and begin doing synchronous I/O.
322 if ((hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) &&
323 (hfs_freeblks(hfsmp
, 0) < 2048)) {
324 eflags
&= ~kEFDeferMask
;
327 #endif /* HFS_SPARSE_DEV */
330 /* Protect against a size change. */
331 if (ioflag
& IO_APPEND
) {
332 hfs_lock_truncate(cp
, HFS_EXCLUSIVE_LOCK
);
335 hfs_lock_truncate(cp
, HFS_SHARED_LOCK
);
337 took_truncate_lock
= 1;
340 if (ioflag
& IO_APPEND
) {
341 uio_setoffset(uio
, fp
->ff_size
);
342 offset
= fp
->ff_size
;
344 if ((cp
->c_flags
& APPEND
) && offset
!= fp
->ff_size
) {
349 origFileSize
= fp
->ff_size
;
350 writelimit
= offset
+ resid
;
351 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
353 /* If the truncate lock is shared, and if we either have virtual
354 * blocks or will need to extend the file, upgrade the truncate
355 * to exclusive lock. If upgrade fails, we lose the lock and
356 * have to get exclusive lock again. Note that we want to
357 * grab the truncate lock exclusive even if we're not allocating new blocks
358 * because we could still be growing past the LEOF.
360 if ((cp
->c_truncatelockowner
== HFS_SHARED_OWNER
) &&
361 ((fp
->ff_unallocblocks
!= 0) || (writelimit
> origFileSize
))) {
362 /* Lock upgrade failed and we lost our shared lock, try again */
363 if (lck_rw_lock_shared_to_exclusive(&cp
->c_truncatelock
) == FALSE
) {
367 /* Store the owner in the c_truncatelockowner field if we successfully upgrade */
368 cp
->c_truncatelockowner
= current_thread();
372 if ( (retval
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
))) {
377 if (cp
->c_truncatelockowner
== HFS_SHARED_OWNER
) {
378 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 0)) | DBG_FUNC_START
,
379 (int)offset
, uio_resid(uio
), (int)fp
->ff_size
,
383 /* Check if we do not need to extend the file */
384 if (writelimit
<= filebytes
) {
388 cred
= vfs_context_ucred(ap
->a_context
);
389 bytesToAdd
= writelimit
- filebytes
;
392 retval
= hfs_chkdq(cp
, (int64_t)(roundup(bytesToAdd
, hfsmp
->blockSize
)),
398 if (hfs_start_transaction(hfsmp
) != 0) {
403 while (writelimit
> filebytes
) {
404 bytesToAdd
= writelimit
- filebytes
;
405 if (cred
&& suser(cred
, NULL
) != 0)
406 eflags
|= kEFReserveMask
;
408 /* Protect extents b-tree and allocation bitmap */
409 lockflags
= SFL_BITMAP
;
410 if (overflow_extents(fp
))
411 lockflags
|= SFL_EXTENTS
;
412 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
414 /* Files that are changing size are not hot file candidates. */
415 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
416 fp
->ff_bytesread
= 0;
418 retval
= MacToVFSError(ExtendFileC (hfsmp
, (FCB
*)fp
, bytesToAdd
,
419 0, eflags
, &actualBytesAdded
));
421 hfs_systemfile_unlock(hfsmp
, lockflags
);
423 if ((actualBytesAdded
== 0) && (retval
== E_NONE
))
425 if (retval
!= E_NONE
)
427 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
428 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 0)) | DBG_FUNC_NONE
,
429 (int)offset
, uio_resid(uio
), (int)fp
->ff_size
, (int)filebytes
, 0);
431 (void) hfs_update(vp
, TRUE
);
432 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
433 (void) hfs_end_transaction(hfsmp
);
436 * If we didn't grow the file enough try a partial write.
437 * POSIX expects this behavior.
439 if ((retval
== ENOSPC
) && (filebytes
> offset
)) {
442 uio_setresid(uio
, (uio_resid(uio
) - bytesToAdd
));
444 writelimit
= filebytes
;
447 if (retval
== E_NONE
) {
455 struct rl_entry
*invalid_range
;
457 if (writelimit
> fp
->ff_size
)
458 filesize
= writelimit
;
460 filesize
= fp
->ff_size
;
462 lflag
= ioflag
& ~(IO_TAILZEROFILL
| IO_HEADZEROFILL
| IO_NOZEROVALID
| IO_NOZERODIRTY
);
464 if (offset
<= fp
->ff_size
) {
465 zero_off
= offset
& ~PAGE_MASK_64
;
467 /* Check to see whether the area between the zero_offset and the start
468 of the transfer to see whether is invalid and should be zero-filled
469 as part of the transfer:
471 if (offset
> zero_off
) {
472 if (rl_scan(&fp
->ff_invalidranges
, zero_off
, offset
- 1, &invalid_range
) != RL_NOOVERLAP
)
473 lflag
|= IO_HEADZEROFILL
;
476 off_t eof_page_base
= fp
->ff_size
& ~PAGE_MASK_64
;
478 /* The bytes between fp->ff_size and uio->uio_offset must never be
479 read without being zeroed. The current last block is filled with zeroes
480 if it holds valid data but in all cases merely do a little bookkeeping
481 to track the area from the end of the current last page to the start of
482 the area actually written. For the same reason only the bytes up to the
483 start of the page where this write will start is invalidated; any remainder
484 before uio->uio_offset is explicitly zeroed as part of the cluster_write.
486 Note that inval_start, the start of the page after the current EOF,
487 may be past the start of the write, in which case the zeroing
488 will be handled by the cluser_write of the actual data.
490 inval_start
= (fp
->ff_size
+ (PAGE_SIZE_64
- 1)) & ~PAGE_MASK_64
;
491 inval_end
= offset
& ~PAGE_MASK_64
;
492 zero_off
= fp
->ff_size
;
494 if ((fp
->ff_size
& PAGE_MASK_64
) &&
495 (rl_scan(&fp
->ff_invalidranges
,
498 &invalid_range
) != RL_NOOVERLAP
)) {
499 /* The page containing the EOF is not valid, so the
500 entire page must be made inaccessible now. If the write
501 starts on a page beyond the page containing the eof
502 (inval_end > eof_page_base), add the
503 whole page to the range to be invalidated. Otherwise
504 (i.e. if the write starts on the same page), zero-fill
505 the entire page explicitly now:
507 if (inval_end
> eof_page_base
) {
508 inval_start
= eof_page_base
;
510 zero_off
= eof_page_base
;
514 if (inval_start
< inval_end
) {
516 /* There's some range of data that's going to be marked invalid */
518 if (zero_off
< inval_start
) {
519 /* The pages between inval_start and inval_end are going to be invalidated,
520 and the actual write will start on a page past inval_end. Now's the last
521 chance to zero-fill the page containing the EOF:
525 retval
= cluster_write(vp
, (uio_t
) 0,
526 fp
->ff_size
, inval_start
,
528 lflag
| IO_HEADZEROFILL
| IO_NOZERODIRTY
);
529 hfs_lock(cp
, HFS_FORCE_LOCK
);
531 if (retval
) goto ioerr_exit
;
532 offset
= uio_offset(uio
);
535 /* Mark the remaining area of the newly allocated space as invalid: */
536 rl_add(inval_start
, inval_end
- 1 , &fp
->ff_invalidranges
);
538 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
539 zero_off
= fp
->ff_size
= inval_end
;
542 if (offset
> zero_off
) lflag
|= IO_HEADZEROFILL
;
545 /* Check to see whether the area between the end of the write and the end of
546 the page it falls in is invalid and should be zero-filled as part of the transfer:
548 tail_off
= (writelimit
+ (PAGE_SIZE_64
- 1)) & ~PAGE_MASK_64
;
549 if (tail_off
> filesize
) tail_off
= filesize
;
550 if (tail_off
> writelimit
) {
551 if (rl_scan(&fp
->ff_invalidranges
, writelimit
, tail_off
- 1, &invalid_range
) != RL_NOOVERLAP
) {
552 lflag
|= IO_TAILZEROFILL
;
557 * if the write starts beyond the current EOF (possibly advanced in the
558 * zeroing of the last block, above), then we'll zero fill from the current EOF
559 * to where the write begins:
561 * NOTE: If (and ONLY if) the portion of the file about to be written is
562 * before the current EOF it might be marked as invalid now and must be
563 * made readable (removed from the invalid ranges) before cluster_write
566 io_start
= (lflag
& IO_HEADZEROFILL
) ? zero_off
: offset
;
567 if (io_start
< fp
->ff_size
) {
570 io_end
= (lflag
& IO_TAILZEROFILL
) ? tail_off
: writelimit
;
571 rl_remove(io_start
, io_end
- 1, &fp
->ff_invalidranges
);
578 * We need to tell UBC the fork's new size BEFORE calling
579 * cluster_write, in case any of the new pages need to be
580 * paged out before cluster_write completes (which does happen
581 * in embedded systems due to extreme memory pressure).
582 * Similarly, we need to tell hfs_vnop_pageout what the new EOF
583 * will be, so that it can pass that on to cluster_pageout, and
584 * allow those pageouts.
586 * We don't update ff_size yet since we don't want pageins to
587 * be able to see uninitialized data between the old and new
588 * EOF, until cluster_write has completed and initialized that
591 * The vnode pager relies on the file size last given to UBC via
592 * ubc_setsize. hfs_vnop_pageout relies on fp->ff_new_size or
593 * ff_size (whichever is larger). NOTE: ff_new_size is always
594 * zero, unless we are extending the file via write.
596 if (filesize
> fp
->ff_size
) {
597 fp
->ff_new_size
= filesize
;
598 ubc_setsize(vp
, filesize
);
600 retval
= cluster_write(vp
, uio
, fp
->ff_size
, filesize
, zero_off
,
601 tail_off
, lflag
| IO_NOZERODIRTY
);
603 fp
->ff_new_size
= 0; /* no longer extending; use ff_size */
604 if (filesize
> origFileSize
) {
605 ubc_setsize(vp
, origFileSize
);
610 if (filesize
> origFileSize
) {
611 fp
->ff_size
= filesize
;
613 /* Files that are changing size are not hot file candidates. */
614 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
615 fp
->ff_bytesread
= 0;
618 fp
->ff_new_size
= 0; /* ff_size now has the correct size */
620 /* If we wrote some bytes, then touch the change and mod times */
621 if (resid
> uio_resid(uio
)) {
622 cp
->c_touch_chgtime
= TRUE
;
623 cp
->c_touch_modtime
= TRUE
;
627 uio_setresid(uio
, (uio_resid(uio
) + bytesToAdd
));
631 // XXXdbg - see radar 4871353 for more info
633 if (flush_cache_on_write
&& ((ioflag
& IO_NOCACHE
) || vnode_isnocache(vp
))) {
634 VNOP_IOCTL(hfsmp
->hfs_devvp
, DKIOCSYNCHRONIZECACHE
, NULL
, FWRITE
, NULL
);
640 * If we successfully wrote any data, and we are not the superuser
641 * we clear the setuid and setgid bits as a precaution against
644 if (cp
->c_mode
& (S_ISUID
| S_ISGID
)) {
645 cred
= vfs_context_ucred(ap
->a_context
);
646 if (resid
> uio_resid(uio
) && cred
&& suser(cred
, NULL
)) {
648 hfs_lock(cp
, HFS_FORCE_LOCK
);
651 cp
->c_mode
&= ~(S_ISUID
| S_ISGID
);
655 if (ioflag
& IO_UNIT
) {
657 hfs_lock(cp
, HFS_FORCE_LOCK
);
660 (void)hfs_truncate(vp
, origFileSize
, ioflag
& IO_SYNC
,
661 0, 0, ap
->a_context
);
662 // LP64todo - fix this! resid needs to by user_ssize_t
663 uio_setoffset(uio
, (uio_offset(uio
) - (resid
- uio_resid(uio
))));
664 uio_setresid(uio
, resid
);
665 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
667 } else if ((ioflag
& IO_SYNC
) && (resid
> uio_resid(uio
))) {
669 hfs_lock(cp
, HFS_FORCE_LOCK
);
672 retval
= hfs_update(vp
, TRUE
);
674 /* Updating vcbWrCnt doesn't need to be atomic. */
677 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 0)) | DBG_FUNC_END
,
678 (int)uio_offset(uio
), uio_resid(uio
), (int)fp
->ff_size
, (int)filebytes
, 0);
683 if (took_truncate_lock
) {
684 hfs_unlock_truncate(cp
, 0);
689 /* support for the "bulk-access" fcntl */
691 #define CACHE_LEVELS 16
692 #define NUM_CACHE_ENTRIES (64*16)
693 #define PARENT_IDS_FLAG 0x100
695 struct access_cache
{
697 int cachehits
; /* these two for statistics gathering */
699 unsigned int *acache
;
700 unsigned char *haveaccess
;
704 uid_t uid
; /* IN: effective user id */
705 short flags
; /* IN: access requested (i.e. R_OK) */
706 short num_groups
; /* IN: number of groups user belongs to */
707 int num_files
; /* IN: number of files to process */
708 int *file_ids
; /* IN: array of file ids */
709 gid_t
*groups
; /* IN: array of groups */
710 short *access
; /* OUT: access info for each file (0 for 'has access') */
711 } __attribute__((unavailable
)); // this structure is for reference purposes only
713 struct user32_access_t
{
714 uid_t uid
; /* IN: effective user id */
715 short flags
; /* IN: access requested (i.e. R_OK) */
716 short num_groups
; /* IN: number of groups user belongs to */
717 int num_files
; /* IN: number of files to process */
718 user32_addr_t file_ids
; /* IN: array of file ids */
719 user32_addr_t groups
; /* IN: array of groups */
720 user32_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
723 struct user64_access_t
{
724 uid_t uid
; /* IN: effective user id */
725 short flags
; /* IN: access requested (i.e. R_OK) */
726 short num_groups
; /* IN: number of groups user belongs to */
727 int num_files
; /* IN: number of files to process */
728 user64_addr_t file_ids
; /* IN: array of file ids */
729 user64_addr_t groups
; /* IN: array of groups */
730 user64_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
734 // these are the "extended" versions of the above structures
735 // note that it is crucial that they be different sized than
736 // the regular version
737 struct ext_access_t
{
738 uint32_t flags
; /* IN: access requested (i.e. R_OK) */
739 uint32_t num_files
; /* IN: number of files to process */
740 uint32_t map_size
; /* IN: size of the bit map */
741 uint32_t *file_ids
; /* IN: Array of file ids */
742 char *bitmap
; /* OUT: hash-bitmap of interesting directory ids */
743 short *access
; /* OUT: access info for each file (0 for 'has access') */
744 uint32_t num_parents
; /* future use */
745 cnid_t
*parents
; /* future use */
746 } __attribute__((unavailable
)); // this structure is for reference purposes only
748 struct user32_ext_access_t
{
749 uint32_t flags
; /* IN: access requested (i.e. R_OK) */
750 uint32_t num_files
; /* IN: number of files to process */
751 uint32_t map_size
; /* IN: size of the bit map */
752 user32_addr_t file_ids
; /* IN: Array of file ids */
753 user32_addr_t bitmap
; /* OUT: hash-bitmap of interesting directory ids */
754 user32_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
755 uint32_t num_parents
; /* future use */
756 user32_addr_t parents
; /* future use */
759 struct user64_ext_access_t
{
760 uint32_t flags
; /* IN: access requested (i.e. R_OK) */
761 uint32_t num_files
; /* IN: number of files to process */
762 uint32_t map_size
; /* IN: size of the bit map */
763 user64_addr_t file_ids
; /* IN: array of file ids */
764 user64_addr_t bitmap
; /* IN: array of groups */
765 user64_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
766 uint32_t num_parents
;/* future use */
767 user64_addr_t parents
;/* future use */
772 * Perform a binary search for the given parent_id. Return value is
773 * the index if there is a match. If no_match_indexp is non-NULL it
774 * will be assigned with the index to insert the item (even if it was
777 static int cache_binSearch(cnid_t
*array
, unsigned int hi
, cnid_t parent_id
, int *no_match_indexp
)
783 unsigned int mid
= ((hi
- lo
)/2) + lo
;
784 unsigned int this_id
= array
[mid
];
786 if (parent_id
== this_id
) {
791 if (parent_id
< this_id
) {
796 if (parent_id
> this_id
) {
802 /* check if lo and hi converged on the match */
803 if (parent_id
== array
[hi
]) {
807 if (no_match_indexp
) {
808 *no_match_indexp
= hi
;
816 lookup_bucket(struct access_cache
*cache
, int *indexp
, cnid_t parent_id
)
820 int index
, no_match_index
;
822 if (cache
->numcached
== 0) {
824 return 0; // table is empty, so insert at index=0 and report no match
827 if (cache
->numcached
> NUM_CACHE_ENTRIES
) {
828 /*printf("hfs: EGAD! numcached is %d... cut our losses and trim to %d\n",
829 cache->numcached, NUM_CACHE_ENTRIES);*/
830 cache
->numcached
= NUM_CACHE_ENTRIES
;
833 hi
= cache
->numcached
- 1;
835 index
= cache_binSearch(cache
->acache
, hi
, parent_id
, &no_match_index
);
837 /* if no existing entry found, find index for new one */
839 index
= no_match_index
;
850 * Add a node to the access_cache at the given index (or do a lookup first
851 * to find the index if -1 is passed in). We currently do a replace rather
852 * than an insert if the cache is full.
855 add_node(struct access_cache
*cache
, int index
, cnid_t nodeID
, int access
)
857 int lookup_index
= -1;
859 /* need to do a lookup first if -1 passed for index */
861 if (lookup_bucket(cache
, &lookup_index
, nodeID
)) {
862 if (cache
->haveaccess
[lookup_index
] != access
&& cache
->haveaccess
[lookup_index
] == ESRCH
) {
863 // only update an entry if the previous access was ESRCH (i.e. a scope checking error)
864 cache
->haveaccess
[lookup_index
] = access
;
867 /* mission accomplished */
870 index
= lookup_index
;
875 /* if the cache is full, do a replace rather than an insert */
876 if (cache
->numcached
>= NUM_CACHE_ENTRIES
) {
877 //printf("hfs: cache is full (%d). replace at index %d\n", cache->numcached, index);
878 cache
->numcached
= NUM_CACHE_ENTRIES
-1;
880 if (index
> cache
->numcached
) {
881 // printf("hfs: index %d pinned to %d\n", index, cache->numcached);
882 index
= cache
->numcached
;
886 if (index
< cache
->numcached
&& index
< NUM_CACHE_ENTRIES
&& nodeID
> cache
->acache
[index
]) {
890 if (index
>= 0 && index
< cache
->numcached
) {
891 /* only do bcopy if we're inserting */
892 bcopy( cache
->acache
+index
, cache
->acache
+(index
+1), (cache
->numcached
- index
)*sizeof(int) );
893 bcopy( cache
->haveaccess
+index
, cache
->haveaccess
+(index
+1), (cache
->numcached
- index
)*sizeof(unsigned char) );
896 cache
->acache
[index
] = nodeID
;
897 cache
->haveaccess
[index
] = access
;
911 snoop_callback(const struct cat_desc
*descp
, const struct cat_attr
*attrp
, void * arg
)
913 struct cinfo
*cip
= (struct cinfo
*)arg
;
915 cip
->uid
= attrp
->ca_uid
;
916 cip
->gid
= attrp
->ca_gid
;
917 cip
->mode
= attrp
->ca_mode
;
918 cip
->parentcnid
= descp
->cd_parentcnid
;
919 cip
->recflags
= attrp
->ca_recflags
;
925 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
926 * isn't incore, then go to the catalog.
929 do_attr_lookup(struct hfsmount
*hfsmp
, struct access_cache
*cache
, cnid_t cnid
,
930 struct cnode
*skip_cp
, CatalogKey
*keyp
, struct cat_attr
*cnattrp
)
934 /* if this id matches the one the fsctl was called with, skip the lookup */
935 if (cnid
== skip_cp
->c_cnid
) {
936 cnattrp
->ca_uid
= skip_cp
->c_uid
;
937 cnattrp
->ca_gid
= skip_cp
->c_gid
;
938 cnattrp
->ca_mode
= skip_cp
->c_mode
;
939 cnattrp
->ca_recflags
= skip_cp
->c_attr
.ca_recflags
;
940 keyp
->hfsPlus
.parentID
= skip_cp
->c_parentcnid
;
944 /* otherwise, check the cnode hash incase the file/dir is incore */
945 if (hfs_chash_snoop(hfsmp
, cnid
, 0, snoop_callback
, &c_info
) == 0) {
946 cnattrp
->ca_uid
= c_info
.uid
;
947 cnattrp
->ca_gid
= c_info
.gid
;
948 cnattrp
->ca_mode
= c_info
.mode
;
949 cnattrp
->ca_recflags
= c_info
.recflags
;
950 keyp
->hfsPlus
.parentID
= c_info
.parentcnid
;
954 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_CATALOG
, HFS_SHARED_LOCK
);
956 /* lookup this cnid in the catalog */
957 error
= cat_getkeyplusattr(hfsmp
, cnid
, keyp
, cnattrp
);
959 hfs_systemfile_unlock(hfsmp
, lockflags
);
970 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
971 * up to CACHE_LEVELS as we progress towards the root.
974 do_access_check(struct hfsmount
*hfsmp
, int *err
, struct access_cache
*cache
, HFSCatalogNodeID nodeID
,
975 struct cnode
*skip_cp
, struct proc
*theProcPtr
, kauth_cred_t myp_ucred
,
976 struct vfs_context
*my_context
,
980 uint32_t num_parents
)
984 HFSCatalogNodeID thisNodeID
;
985 unsigned int myPerms
;
986 struct cat_attr cnattr
;
987 int cache_index
= -1, scope_index
= -1, scope_idx_start
= -1;
990 int i
= 0, ids_to_cache
= 0;
991 int parent_ids
[CACHE_LEVELS
];
994 while (thisNodeID
>= kRootDirID
) {
995 myResult
= 0; /* default to "no access" */
997 /* check the cache before resorting to hitting the catalog */
999 /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
1000 * to look any further after hitting cached dir */
1002 if (lookup_bucket(cache
, &cache_index
, thisNodeID
)) {
1004 myErr
= cache
->haveaccess
[cache_index
];
1005 if (scope_index
!= -1) {
1006 if (myErr
== ESRCH
) {
1010 scope_index
= 0; // so we'll just use the cache result
1011 scope_idx_start
= ids_to_cache
;
1013 myResult
= (myErr
== 0) ? 1 : 0;
1014 goto ExitThisRoutine
;
1020 tmp
= cache_binSearch(parents
, num_parents
-1, thisNodeID
, NULL
);
1021 if (scope_index
== -1)
1023 if (tmp
!= -1 && scope_idx_start
== -1 && ids_to_cache
< CACHE_LEVELS
) {
1024 scope_idx_start
= ids_to_cache
;
1028 /* remember which parents we want to cache */
1029 if (ids_to_cache
< CACHE_LEVELS
) {
1030 parent_ids
[ids_to_cache
] = thisNodeID
;
1033 // Inefficient (using modulo) and we might want to use a hash function, not rely on the node id to be "nice"...
1034 if (bitmap
&& map_size
) {
1035 bitmap
[(thisNodeID
/8)%(map_size
)]|=(1<<(thisNodeID
&7));
1039 /* do the lookup (checks the cnode hash, then the catalog) */
1040 myErr
= do_attr_lookup(hfsmp
, cache
, thisNodeID
, skip_cp
, &catkey
, &cnattr
);
1042 goto ExitThisRoutine
; /* no access */
1045 /* Root always gets access. */
1046 if (suser(myp_ucred
, NULL
) == 0) {
1047 thisNodeID
= catkey
.hfsPlus
.parentID
;
1052 // if the thing has acl's, do the full permission check
1053 if ((cnattr
.ca_recflags
& kHFSHasSecurityMask
) != 0) {
1056 /* get the vnode for this cnid */
1057 myErr
= hfs_vget(hfsmp
, thisNodeID
, &vp
, 0, 0);
1060 goto ExitThisRoutine
;
1063 thisNodeID
= VTOC(vp
)->c_parentcnid
;
1065 hfs_unlock(VTOC(vp
));
1067 if (vnode_vtype(vp
) == VDIR
) {
1068 myErr
= vnode_authorize(vp
, NULL
, (KAUTH_VNODE_SEARCH
| KAUTH_VNODE_LIST_DIRECTORY
), my_context
);
1070 myErr
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_READ_DATA
, my_context
);
1076 goto ExitThisRoutine
;
1080 int mode
= cnattr
.ca_mode
& S_IFMT
;
1081 myPerms
= DerivePermissionSummary(cnattr
.ca_uid
, cnattr
.ca_gid
, cnattr
.ca_mode
, hfsmp
->hfs_mp
,myp_ucred
, theProcPtr
);
1083 if (mode
== S_IFDIR
) {
1084 flags
= R_OK
| X_OK
;
1088 if ( (myPerms
& flags
) != flags
) {
1091 goto ExitThisRoutine
; /* no access */
1094 /* up the hierarchy we go */
1095 thisNodeID
= catkey
.hfsPlus
.parentID
;
1099 /* if here, we have access to this node */
1103 if (parents
&& myErr
== 0 && scope_index
== -1) {
1112 /* cache the parent directory(ies) */
1113 for (i
= 0; i
< ids_to_cache
; i
++) {
1114 if (myErr
== 0 && parents
&& (scope_idx_start
== -1 || i
> scope_idx_start
)) {
1115 add_node(cache
, -1, parent_ids
[i
], ESRCH
);
1117 add_node(cache
, -1, parent_ids
[i
], myErr
);
1125 do_bulk_access_check(struct hfsmount
*hfsmp
, struct vnode
*vp
,
1126 struct vnop_ioctl_args
*ap
, int arg_size
, vfs_context_t context
)
1131 * NOTE: on entry, the vnode is locked. Incase this vnode
1132 * happens to be in our list of file_ids, we'll note it
1133 * avoid calling hfs_chashget_nowait() on that id as that
1134 * will cause a "locking against myself" panic.
1136 Boolean check_leaf
= true;
1138 struct user64_ext_access_t
*user_access_structp
;
1139 struct user64_ext_access_t tmp_user_access
;
1140 struct access_cache cache
;
1142 int error
= 0, prev_parent_check_ok
=1;
1146 unsigned int num_files
= 0;
1148 int num_parents
= 0;
1152 cnid_t
*parents
=NULL
;
1156 cnid_t prevParent_cnid
= 0;
1157 unsigned int myPerms
;
1159 struct cat_attr cnattr
;
1161 struct cnode
*skip_cp
= VTOC(vp
);
1162 kauth_cred_t cred
= vfs_context_ucred(context
);
1163 proc_t p
= vfs_context_proc(context
);
1165 is64bit
= proc_is64bit(p
);
1167 /* initialize the local cache and buffers */
1168 cache
.numcached
= 0;
1169 cache
.cachehits
= 0;
1171 cache
.acache
= NULL
;
1172 cache
.haveaccess
= NULL
;
1174 /* struct copyin done during dispatch... need to copy file_id array separately */
1175 if (ap
->a_data
== NULL
) {
1177 goto err_exit_bulk_access
;
1181 if (arg_size
!= sizeof(struct user64_ext_access_t
)) {
1183 goto err_exit_bulk_access
;
1186 user_access_structp
= (struct user64_ext_access_t
*)ap
->a_data
;
1188 } else if (arg_size
== sizeof(struct user32_access_t
)) {
1189 struct user32_access_t
*accessp
= (struct user32_access_t
*)ap
->a_data
;
1191 // convert an old style bulk-access struct to the new style
1192 tmp_user_access
.flags
= accessp
->flags
;
1193 tmp_user_access
.num_files
= accessp
->num_files
;
1194 tmp_user_access
.map_size
= 0;
1195 tmp_user_access
.file_ids
= CAST_USER_ADDR_T(accessp
->file_ids
);
1196 tmp_user_access
.bitmap
= USER_ADDR_NULL
;
1197 tmp_user_access
.access
= CAST_USER_ADDR_T(accessp
->access
);
1198 tmp_user_access
.num_parents
= 0;
1199 user_access_structp
= &tmp_user_access
;
1201 } else if (arg_size
== sizeof(struct user32_ext_access_t
)) {
1202 struct user32_ext_access_t
*accessp
= (struct user32_ext_access_t
*)ap
->a_data
;
1204 // up-cast from a 32-bit version of the struct
1205 tmp_user_access
.flags
= accessp
->flags
;
1206 tmp_user_access
.num_files
= accessp
->num_files
;
1207 tmp_user_access
.map_size
= accessp
->map_size
;
1208 tmp_user_access
.num_parents
= accessp
->num_parents
;
1210 tmp_user_access
.file_ids
= CAST_USER_ADDR_T(accessp
->file_ids
);
1211 tmp_user_access
.bitmap
= CAST_USER_ADDR_T(accessp
->bitmap
);
1212 tmp_user_access
.access
= CAST_USER_ADDR_T(accessp
->access
);
1213 tmp_user_access
.parents
= CAST_USER_ADDR_T(accessp
->parents
);
1215 user_access_structp
= &tmp_user_access
;
1218 goto err_exit_bulk_access
;
1221 map_size
= user_access_structp
->map_size
;
1223 num_files
= user_access_structp
->num_files
;
1225 num_parents
= user_access_structp
->num_parents
;
1227 if (num_files
< 1) {
1228 goto err_exit_bulk_access
;
1230 if (num_files
> 1024) {
1232 goto err_exit_bulk_access
;
1235 if (num_parents
> 1024) {
1237 goto err_exit_bulk_access
;
1240 file_ids
= (int *) kalloc(sizeof(int) * num_files
);
1241 access
= (short *) kalloc(sizeof(short) * num_files
);
1243 bitmap
= (char *) kalloc(sizeof(char) * map_size
);
1247 parents
= (cnid_t
*) kalloc(sizeof(cnid_t
) * num_parents
);
1250 cache
.acache
= (unsigned int *) kalloc(sizeof(int) * NUM_CACHE_ENTRIES
);
1251 cache
.haveaccess
= (unsigned char *) kalloc(sizeof(unsigned char) * NUM_CACHE_ENTRIES
);
1253 if (file_ids
== NULL
|| access
== NULL
|| (map_size
!= 0 && bitmap
== NULL
) || cache
.acache
== NULL
|| cache
.haveaccess
== NULL
) {
1255 kfree(file_ids
, sizeof(int) * num_files
);
1258 kfree(bitmap
, sizeof(char) * map_size
);
1261 kfree(access
, sizeof(short) * num_files
);
1264 kfree(cache
.acache
, sizeof(int) * NUM_CACHE_ENTRIES
);
1266 if (cache
.haveaccess
) {
1267 kfree(cache
.haveaccess
, sizeof(unsigned char) * NUM_CACHE_ENTRIES
);
1270 kfree(parents
, sizeof(cnid_t
) * num_parents
);
1275 // make sure the bitmap is zero'ed out...
1277 bzero(bitmap
, (sizeof(char) * map_size
));
1280 if ((error
= copyin(user_access_structp
->file_ids
, (caddr_t
)file_ids
,
1281 num_files
* sizeof(int)))) {
1282 goto err_exit_bulk_access
;
1286 if ((error
= copyin(user_access_structp
->parents
, (caddr_t
)parents
,
1287 num_parents
* sizeof(cnid_t
)))) {
1288 goto err_exit_bulk_access
;
1292 flags
= user_access_structp
->flags
;
1293 if ((flags
& (F_OK
| R_OK
| W_OK
| X_OK
)) == 0) {
1297 /* check if we've been passed leaf node ids or parent ids */
1298 if (flags
& PARENT_IDS_FLAG
) {
1302 /* Check access to each file_id passed in */
1303 for (i
= 0; i
< num_files
; i
++) {
1305 cnid
= (cnid_t
) file_ids
[i
];
1307 /* root always has access */
1308 if ((!parents
) && (!suser(cred
, NULL
))) {
1314 /* do the lookup (checks the cnode hash, then the catalog) */
1315 error
= do_attr_lookup(hfsmp
, &cache
, cnid
, skip_cp
, &catkey
, &cnattr
);
1317 access
[i
] = (short) error
;
1322 // Check if the leaf matches one of the parent scopes
1323 leaf_index
= cache_binSearch(parents
, num_parents
-1, cnid
, NULL
);
1324 if (leaf_index
>= 0 && parents
[leaf_index
] == cnid
)
1325 prev_parent_check_ok
= 0;
1326 else if (leaf_index
>= 0)
1327 prev_parent_check_ok
= 1;
1330 // if the thing has acl's, do the full permission check
1331 if ((cnattr
.ca_recflags
& kHFSHasSecurityMask
) != 0) {
1334 /* get the vnode for this cnid */
1335 myErr
= hfs_vget(hfsmp
, cnid
, &cvp
, 0, 0);
1341 hfs_unlock(VTOC(cvp
));
1343 if (vnode_vtype(cvp
) == VDIR
) {
1344 myErr
= vnode_authorize(cvp
, NULL
, (KAUTH_VNODE_SEARCH
| KAUTH_VNODE_LIST_DIRECTORY
), context
);
1346 myErr
= vnode_authorize(cvp
, NULL
, KAUTH_VNODE_READ_DATA
, context
);
1355 /* before calling CheckAccess(), check the target file for read access */
1356 myPerms
= DerivePermissionSummary(cnattr
.ca_uid
, cnattr
.ca_gid
,
1357 cnattr
.ca_mode
, hfsmp
->hfs_mp
, cred
, p
);
1359 /* fail fast if no access */
1360 if ((myPerms
& flags
) == 0) {
1366 /* we were passed an array of parent ids */
1367 catkey
.hfsPlus
.parentID
= cnid
;
1370 /* if the last guy had the same parent and had access, we're done */
1371 if (i
> 0 && catkey
.hfsPlus
.parentID
== prevParent_cnid
&& access
[i
-1] == 0 && prev_parent_check_ok
) {
1377 myaccess
= do_access_check(hfsmp
, &error
, &cache
, catkey
.hfsPlus
.parentID
,
1378 skip_cp
, p
, cred
, context
,bitmap
, map_size
, parents
, num_parents
);
1380 if (myaccess
|| (error
== ESRCH
&& leaf_index
!= -1)) {
1381 access
[i
] = 0; // have access.. no errors to report
1383 access
[i
] = (error
!= 0 ? (short) error
: EACCES
);
1386 prevParent_cnid
= catkey
.hfsPlus
.parentID
;
1389 /* copyout the access array */
1390 if ((error
= copyout((caddr_t
)access
, user_access_structp
->access
,
1391 num_files
* sizeof (short)))) {
1392 goto err_exit_bulk_access
;
1394 if (map_size
&& bitmap
) {
1395 if ((error
= copyout((caddr_t
)bitmap
, user_access_structp
->bitmap
,
1396 map_size
* sizeof (char)))) {
1397 goto err_exit_bulk_access
;
1402 err_exit_bulk_access
:
1404 //printf("hfs: on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
1407 kfree(file_ids
, sizeof(int) * num_files
);
1409 kfree(parents
, sizeof(cnid_t
) * num_parents
);
1411 kfree(bitmap
, sizeof(char) * map_size
);
1413 kfree(access
, sizeof(short) * num_files
);
1415 kfree(cache
.acache
, sizeof(int) * NUM_CACHE_ENTRIES
);
1416 if (cache
.haveaccess
)
1417 kfree(cache
.haveaccess
, sizeof(unsigned char) * NUM_CACHE_ENTRIES
);
1423 /* end "bulk-access" support */
1427 * Callback for use with freeze ioctl.
1430 hfs_freezewrite_callback(struct vnode
*vp
, __unused
void *cargs
)
1432 vnode_waitforwrites(vp
, 0, 0, 0, "hfs freeze");
1438 * Control filesystem operating characteristics.
1441 hfs_vnop_ioctl( struct vnop_ioctl_args
/* {
1446 vfs_context_t a_context;
1449 struct vnode
* vp
= ap
->a_vp
;
1450 struct hfsmount
*hfsmp
= VTOHFS(vp
);
1451 vfs_context_t context
= ap
->a_context
;
1452 kauth_cred_t cred
= vfs_context_ucred(context
);
1453 proc_t p
= vfs_context_proc(context
);
1454 struct vfsstatfs
*vfsp
;
1456 off_t jnl_start
, jnl_size
;
1457 struct hfs_journal_info
*jip
;
1460 off_t uncompressed_size
= -1;
1461 int decmpfs_error
= 0;
1463 if (ap
->a_command
== F_RDADVISE
) {
1464 /* we need to inspect the decmpfs state of the file as early as possible */
1465 compressed
= hfs_file_is_compressed(VTOC(vp
), 0);
1467 if (VNODE_IS_RSRC(vp
)) {
1468 /* if this is the resource fork, treat it as if it were empty */
1469 uncompressed_size
= 0;
1471 decmpfs_error
= hfs_uncompressed_size_of_compressed_file(NULL
, vp
, 0, &uncompressed_size
, 0);
1472 if (decmpfs_error
!= 0) {
1473 /* failed to get the uncompressed size, we'll check for this later */
1474 uncompressed_size
= -1;
1479 #endif /* HFS_COMPRESSION */
1481 is64bit
= proc_is64bit(p
);
1486 if ((error
= cp_handle_vnop(VTOC(vp
), CP_WRITE_ACCESS
)) != 0) {
1490 #endif /* CONFIG_PROTECT */
1492 switch (ap
->a_command
) {
1496 struct vnode
*file_vp
;
1502 /* Caller must be owner of file system. */
1503 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1504 if (suser(cred
, NULL
) &&
1505 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1508 /* Target vnode must be file system's root. */
1509 if (!vnode_isvroot(vp
)) {
1512 bufptr
= (char *)ap
->a_data
;
1513 cnid
= strtoul(bufptr
, NULL
, 10);
1515 /* We need to call hfs_vfs_vget to leverage the code that will
1516 * fix the origin list for us if needed, as opposed to calling
1517 * hfs_vget, since we will need the parent for build_path call.
1520 if ((error
= hfs_vfs_vget(HFSTOVFS(hfsmp
), cnid
, &file_vp
, context
))) {
1523 error
= build_path(file_vp
, bufptr
, sizeof(pathname_t
), &outlen
, 0, context
);
1537 /* Caller must be owner of file system. */
1538 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1539 if (suser(cred
, NULL
) &&
1540 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1543 /* Target vnode must be file system's root. */
1544 if (!vnode_isvroot(vp
)) {
1547 linkfileid
= *(cnid_t
*)ap
->a_data
;
1548 if (linkfileid
< kHFSFirstUserCatalogNodeID
) {
1551 if ((error
= hfs_lookup_siblinglinks(hfsmp
, linkfileid
, &prevlinkid
, &nextlinkid
))) {
1554 if (ap
->a_command
== HFS_NEXT_LINK
) {
1555 *(cnid_t
*)ap
->a_data
= nextlinkid
;
1557 *(cnid_t
*)ap
->a_data
= prevlinkid
;
1562 case HFS_RESIZE_PROGRESS
: {
1564 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1565 if (suser(cred
, NULL
) &&
1566 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1567 return (EACCES
); /* must be owner of file system */
1569 if (!vnode_isvroot(vp
)) {
1572 /* file system must not be mounted read-only */
1573 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
1577 return hfs_resize_progress(hfsmp
, (u_int32_t
*)ap
->a_data
);
1580 case HFS_RESIZE_VOLUME
: {
1584 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1585 if (suser(cred
, NULL
) &&
1586 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1587 return (EACCES
); /* must be owner of file system */
1589 if (!vnode_isvroot(vp
)) {
1593 /* filesystem must not be mounted read only */
1594 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
1597 newsize
= *(u_int64_t
*)ap
->a_data
;
1598 cursize
= (u_int64_t
)hfsmp
->totalBlocks
* (u_int64_t
)hfsmp
->blockSize
;
1600 if (newsize
> cursize
) {
1601 return hfs_extendfs(hfsmp
, *(u_int64_t
*)ap
->a_data
, context
);
1602 } else if (newsize
< cursize
) {
1603 return hfs_truncatefs(hfsmp
, *(u_int64_t
*)ap
->a_data
, context
);
1608 case HFS_CHANGE_NEXT_ALLOCATION
: {
1609 int error
= 0; /* Assume success */
1612 if (vnode_vfsisrdonly(vp
)) {
1615 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1616 if (suser(cred
, NULL
) &&
1617 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1618 return (EACCES
); /* must be owner of file system */
1620 if (!vnode_isvroot(vp
)) {
1623 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
1624 location
= *(u_int32_t
*)ap
->a_data
;
1625 if ((location
>= hfsmp
->allocLimit
) &&
1626 (location
!= HFS_NO_UPDATE_NEXT_ALLOCATION
)) {
1628 goto fail_change_next_allocation
;
1630 /* Return previous value. */
1631 *(u_int32_t
*)ap
->a_data
= hfsmp
->nextAllocation
;
1632 if (location
== HFS_NO_UPDATE_NEXT_ALLOCATION
) {
1633 /* On magic value for location, set nextAllocation to next block
1634 * after metadata zone and set flag in mount structure to indicate
1635 * that nextAllocation should not be updated again.
1637 if (hfsmp
->hfs_metazone_end
!= 0) {
1638 HFS_UPDATE_NEXT_ALLOCATION(hfsmp
, hfsmp
->hfs_metazone_end
+ 1);
1640 hfsmp
->hfs_flags
|= HFS_SKIP_UPDATE_NEXT_ALLOCATION
;
1642 hfsmp
->hfs_flags
&= ~HFS_SKIP_UPDATE_NEXT_ALLOCATION
;
1643 HFS_UPDATE_NEXT_ALLOCATION(hfsmp
, location
);
1645 MarkVCBDirty(hfsmp
);
1646 fail_change_next_allocation
:
1647 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
1652 case HFS_SETBACKINGSTOREINFO
: {
1653 struct vnode
* bsfs_rootvp
;
1654 struct vnode
* di_vp
;
1655 struct hfs_backingstoreinfo
*bsdata
;
1658 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
1661 if (hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) {
1664 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1665 if (suser(cred
, NULL
) &&
1666 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1667 return (EACCES
); /* must be owner of file system */
1669 bsdata
= (struct hfs_backingstoreinfo
*)ap
->a_data
;
1670 if (bsdata
== NULL
) {
1673 if ((error
= file_vnode(bsdata
->backingfd
, &di_vp
))) {
1676 if ((error
= vnode_getwithref(di_vp
))) {
1677 file_drop(bsdata
->backingfd
);
1681 if (vnode_mount(vp
) == vnode_mount(di_vp
)) {
1682 (void)vnode_put(di_vp
);
1683 file_drop(bsdata
->backingfd
);
1688 * Obtain the backing fs root vnode and keep a reference
1689 * on it. This reference will be dropped in hfs_unmount.
1691 error
= VFS_ROOT(vnode_mount(di_vp
), &bsfs_rootvp
, NULL
); /* XXX use context! */
1693 (void)vnode_put(di_vp
);
1694 file_drop(bsdata
->backingfd
);
1697 vnode_ref(bsfs_rootvp
);
1698 vnode_put(bsfs_rootvp
);
1700 hfsmp
->hfs_backingfs_rootvp
= bsfs_rootvp
;
1702 hfsmp
->hfs_flags
|= HFS_HAS_SPARSE_DEVICE
;
1703 /* The free extent cache is managed differently for sparse devices.
1704 * There is a window between which the volume is mounted and the
1705 * device is marked as sparse, so the free extent cache for this
1706 * volume is currently initialized as normal volume (sorted by block
1707 * count). Reset the cache so that it will be rebuilt again
1708 * for sparse device (sorted by start block).
1710 ResetVCBFreeExtCache(hfsmp
);
1712 hfsmp
->hfs_sparsebandblks
= bsdata
->bandsize
/ HFSTOVCB(hfsmp
)->blockSize
;
1713 hfsmp
->hfs_sparsebandblks
*= 4;
1715 vfs_markdependency(hfsmp
->hfs_mp
);
1718 * If the sparse image is on a sparse image file (as opposed to a sparse
1719 * bundle), then we may need to limit the free space to the maximum size
1720 * of a file on that volume. So we query (using pathconf), and if we get
1721 * a meaningful result, we cache the number of blocks for later use in
1724 hfsmp
->hfs_backingfs_maxblocks
= 0;
1725 if (vnode_vtype(di_vp
) == VREG
) {
1728 terr
= vn_pathconf(di_vp
, _PC_FILESIZEBITS
, &hostbits
, context
);
1729 if (terr
== 0 && hostbits
!= 0 && hostbits
< 64) {
1730 u_int64_t hostfilesizemax
= ((u_int64_t
)1) << hostbits
;
1732 hfsmp
->hfs_backingfs_maxblocks
= hostfilesizemax
/ hfsmp
->blockSize
;
1736 (void)vnode_put(di_vp
);
1737 file_drop(bsdata
->backingfd
);
1740 case HFS_CLRBACKINGSTOREINFO
: {
1741 struct vnode
* tmpvp
;
1743 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1744 if (suser(cred
, NULL
) &&
1745 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1746 return (EACCES
); /* must be owner of file system */
1748 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
1752 if ((hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) &&
1753 hfsmp
->hfs_backingfs_rootvp
) {
1755 hfsmp
->hfs_flags
&= ~HFS_HAS_SPARSE_DEVICE
;
1756 tmpvp
= hfsmp
->hfs_backingfs_rootvp
;
1757 hfsmp
->hfs_backingfs_rootvp
= NULLVP
;
1758 hfsmp
->hfs_sparsebandblks
= 0;
1763 #endif /* HFS_SPARSE_DEV */
1768 mp
= vnode_mount(vp
);
1769 hfsmp
= VFSTOHFS(mp
);
1774 vfsp
= vfs_statfs(mp
);
1776 if (kauth_cred_getuid(cred
) != vfsp
->f_owner
&&
1777 !kauth_cred_issuser(cred
))
1780 lck_rw_lock_exclusive(&hfsmp
->hfs_insync
);
1782 // flush things before we get started to try and prevent
1783 // dirty data from being paged out while we're frozen.
1784 // note: can't do this after taking the lock as it will
1785 // deadlock against ourselves.
1786 vnode_iterate(mp
, 0, hfs_freezewrite_callback
, NULL
);
1787 hfs_lock_global (hfsmp
, HFS_EXCLUSIVE_LOCK
);
1789 // DO NOT call hfs_journal_flush() because that takes a
1790 // shared lock on the global exclusive lock!
1791 journal_flush(hfsmp
->jnl
, TRUE
);
1793 // don't need to iterate on all vnodes, we just need to
1794 // wait for writes to the system files and the device vnode
1796 // Now that journal flush waits for all metadata blocks to
1797 // be written out, waiting for btree writes is probably no
1799 if (HFSTOVCB(hfsmp
)->extentsRefNum
)
1800 vnode_waitforwrites(HFSTOVCB(hfsmp
)->extentsRefNum
, 0, 0, 0, "hfs freeze");
1801 if (HFSTOVCB(hfsmp
)->catalogRefNum
)
1802 vnode_waitforwrites(HFSTOVCB(hfsmp
)->catalogRefNum
, 0, 0, 0, "hfs freeze");
1803 if (HFSTOVCB(hfsmp
)->allocationsRefNum
)
1804 vnode_waitforwrites(HFSTOVCB(hfsmp
)->allocationsRefNum
, 0, 0, 0, "hfs freeze");
1805 if (hfsmp
->hfs_attribute_vp
)
1806 vnode_waitforwrites(hfsmp
->hfs_attribute_vp
, 0, 0, 0, "hfs freeze");
1807 vnode_waitforwrites(hfsmp
->hfs_devvp
, 0, 0, 0, "hfs freeze");
1809 hfsmp
->hfs_freezing_proc
= current_proc();
1815 vfsp
= vfs_statfs(vnode_mount(vp
));
1816 if (kauth_cred_getuid(cred
) != vfsp
->f_owner
&&
1817 !kauth_cred_issuser(cred
))
1820 // if we're not the one who froze the fs then we
1822 if (hfsmp
->hfs_freezing_proc
!= current_proc()) {
1826 // NOTE: if you add code here, also go check the
1827 // code that "thaws" the fs in hfs_vnop_close()
1829 hfsmp
->hfs_freezing_proc
= NULL
;
1830 hfs_unlock_global (hfsmp
);
1831 lck_rw_unlock_exclusive(&hfsmp
->hfs_insync
);
1836 case HFS_BULKACCESS_FSCTL
: {
1839 if (hfsmp
->hfs_flags
& HFS_STANDARD
) {
1844 size
= sizeof(struct user64_access_t
);
1846 size
= sizeof(struct user32_access_t
);
1849 return do_bulk_access_check(hfsmp
, vp
, ap
, size
, context
);
1852 case HFS_EXT_BULKACCESS_FSCTL
: {
1855 if (hfsmp
->hfs_flags
& HFS_STANDARD
) {
1860 size
= sizeof(struct user64_ext_access_t
);
1862 size
= sizeof(struct user32_ext_access_t
);
1865 return do_bulk_access_check(hfsmp
, vp
, ap
, size
, context
);
1868 case HFS_SET_XATTREXTENTS_STATE
: {
1871 if (ap
->a_data
== NULL
) {
1875 state
= *(int *)ap
->a_data
;
1877 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
1881 /* Super-user can enable or disable extent-based extended
1882 * attribute support on a volume
1883 * Note: Starting Mac OS X 10.7, extent-based extended attributes
1884 * are enabled by default, so any change will be transient only
1885 * till the volume is remounted.
1890 if (state
== 0 || state
== 1)
1891 return hfs_set_volxattr(hfsmp
, HFS_SET_XATTREXTENTS_STATE
, state
);
1899 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
1902 error
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
);
1904 error
= hfs_fsync(vp
, MNT_WAIT
, TRUE
, p
);
1905 hfs_unlock(VTOC(vp
));
1912 register struct cnode
*cp
;
1915 if (!vnode_isreg(vp
))
1918 error
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
);
1922 * used by regression test to determine if
1923 * all the dirty pages (via write) have been cleaned
1924 * after a call to 'fsysnc'.
1926 error
= is_file_clean(vp
, VTOF(vp
)->ff_size
);
1933 register struct radvisory
*ra
;
1934 struct filefork
*fp
;
1937 if (!vnode_isreg(vp
))
1940 ra
= (struct radvisory
*)(ap
->a_data
);
1943 /* Protect against a size change. */
1944 hfs_lock_truncate(VTOC(vp
), HFS_EXCLUSIVE_LOCK
);
1947 if (compressed
&& (uncompressed_size
== -1)) {
1948 /* fetching the uncompressed size failed above, so return the error */
1949 error
= decmpfs_error
;
1950 } else if ((compressed
&& (ra
->ra_offset
>= uncompressed_size
)) ||
1951 (!compressed
&& (ra
->ra_offset
>= fp
->ff_size
))) {
1954 #else /* HFS_COMPRESSION */
1955 if (ra
->ra_offset
>= fp
->ff_size
) {
1958 #endif /* HFS_COMPRESSION */
1960 error
= advisory_read(vp
, fp
->ff_size
, ra
->ra_offset
, ra
->ra_count
);
1963 hfs_unlock_truncate(VTOC(vp
), 0);
1967 case F_READBOOTSTRAP
:
1968 case F_WRITEBOOTSTRAP
:
1970 struct vnode
*devvp
= NULL
;
1971 user_fbootstraptransfer_t
*user_bootstrapp
;
1975 daddr64_t blockNumber
;
1976 u_int32_t blockOffset
;
1979 user_fbootstraptransfer_t user_bootstrap
;
1981 if (!vnode_isvroot(vp
))
1983 /* LP64 - when caller is a 64 bit process then we are passed a pointer
1984 * to a user_fbootstraptransfer_t else we get a pointer to a
1985 * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
1987 if ((hfsmp
->hfs_flags
& HFS_READ_ONLY
)
1988 && (ap
->a_command
== F_WRITEBOOTSTRAP
)) {
1992 user_bootstrapp
= (user_fbootstraptransfer_t
*)ap
->a_data
;
1995 user32_fbootstraptransfer_t
*bootstrapp
= (user32_fbootstraptransfer_t
*)ap
->a_data
;
1996 user_bootstrapp
= &user_bootstrap
;
1997 user_bootstrap
.fbt_offset
= bootstrapp
->fbt_offset
;
1998 user_bootstrap
.fbt_length
= bootstrapp
->fbt_length
;
1999 user_bootstrap
.fbt_buffer
= CAST_USER_ADDR_T(bootstrapp
->fbt_buffer
);
2002 if ((user_bootstrapp
->fbt_offset
< 0) || (user_bootstrapp
->fbt_offset
> 1024) ||
2003 (user_bootstrapp
->fbt_length
> 1024)) {
2007 if (user_bootstrapp
->fbt_offset
+ user_bootstrapp
->fbt_length
> 1024)
2010 devvp
= VTOHFS(vp
)->hfs_devvp
;
2011 auio
= uio_create(1, user_bootstrapp
->fbt_offset
,
2012 is64bit
? UIO_USERSPACE64
: UIO_USERSPACE32
,
2013 (ap
->a_command
== F_WRITEBOOTSTRAP
) ? UIO_WRITE
: UIO_READ
);
2014 uio_addiov(auio
, user_bootstrapp
->fbt_buffer
, user_bootstrapp
->fbt_length
);
2016 devBlockSize
= vfs_devblocksize(vnode_mount(vp
));
2018 while (uio_resid(auio
) > 0) {
2019 blockNumber
= uio_offset(auio
) / devBlockSize
;
2020 error
= (int)buf_bread(devvp
, blockNumber
, devBlockSize
, cred
, &bp
);
2022 if (bp
) buf_brelse(bp
);
2027 blockOffset
= uio_offset(auio
) % devBlockSize
;
2028 xfersize
= devBlockSize
- blockOffset
;
2029 error
= uiomove((caddr_t
)buf_dataptr(bp
) + blockOffset
, (int)xfersize
, auio
);
2035 if (uio_rw(auio
) == UIO_WRITE
) {
2036 error
= VNOP_BWRITE(bp
);
2049 case _IOC(IOC_OUT
,'h', 4, 0): /* Create date in local time */
2052 *(user_time_t
*)(ap
->a_data
) = (user_time_t
) (to_bsd_time(VTOVCB(vp
)->localCreateDate
));
2055 *(user32_time_t
*)(ap
->a_data
) = (user32_time_t
) (to_bsd_time(VTOVCB(vp
)->localCreateDate
));
2060 case SPOTLIGHT_FSCTL_GET_MOUNT_TIME
:
2061 *(uint32_t *)ap
->a_data
= hfsmp
->hfs_mount_time
;
2064 case SPOTLIGHT_FSCTL_GET_LAST_MTIME
:
2065 *(uint32_t *)ap
->a_data
= hfsmp
->hfs_last_mounted_mtime
;
2068 case HFS_FSCTL_SET_VERY_LOW_DISK
:
2069 if (*(uint32_t *)ap
->a_data
>= hfsmp
->hfs_freespace_notify_warninglimit
) {
2073 hfsmp
->hfs_freespace_notify_dangerlimit
= *(uint32_t *)ap
->a_data
;
2076 case HFS_FSCTL_SET_LOW_DISK
:
2077 if ( *(uint32_t *)ap
->a_data
>= hfsmp
->hfs_freespace_notify_desiredlevel
2078 || *(uint32_t *)ap
->a_data
<= hfsmp
->hfs_freespace_notify_dangerlimit
) {
2083 hfsmp
->hfs_freespace_notify_warninglimit
= *(uint32_t *)ap
->a_data
;
2086 case HFS_FSCTL_SET_DESIRED_DISK
:
2087 if (*(uint32_t *)ap
->a_data
<= hfsmp
->hfs_freespace_notify_warninglimit
) {
2091 hfsmp
->hfs_freespace_notify_desiredlevel
= *(uint32_t *)ap
->a_data
;
2094 case HFS_VOLUME_STATUS
:
2095 *(uint32_t *)ap
->a_data
= hfsmp
->hfs_notification_conditions
;
2098 case HFS_SET_BOOT_INFO
:
2099 if (!vnode_isvroot(vp
))
2101 if (!kauth_cred_issuser(cred
) && (kauth_cred_getuid(cred
) != vfs_statfs(HFSTOVFS(hfsmp
))->f_owner
))
2102 return(EACCES
); /* must be superuser or owner of filesystem */
2103 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2106 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
2107 bcopy(ap
->a_data
, &hfsmp
->vcbFndrInfo
, sizeof(hfsmp
->vcbFndrInfo
));
2108 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
2109 (void) hfs_flushvolumeheader(hfsmp
, MNT_WAIT
, 0);
2112 case HFS_GET_BOOT_INFO
:
2113 if (!vnode_isvroot(vp
))
2115 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
2116 bcopy(&hfsmp
->vcbFndrInfo
, ap
->a_data
, sizeof(hfsmp
->vcbFndrInfo
));
2117 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
2120 case HFS_MARK_BOOT_CORRUPT
:
2121 /* Mark the boot volume corrupt by setting
2122 * kHFSVolumeInconsistentBit in the volume header. This will
2123 * force fsck_hfs on next mount.
2129 /* Allowed only on the root vnode of the boot volume */
2130 if (!(vfs_flags(HFSTOVFS(hfsmp
)) & MNT_ROOTFS
) ||
2131 !vnode_isvroot(vp
)) {
2134 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2137 printf ("hfs_vnop_ioctl: Marking the boot volume corrupt.\n");
2138 hfs_mark_volume_inconsistent(hfsmp
);
2141 case HFS_FSCTL_GET_JOURNAL_INFO
:
2142 jip
= (struct hfs_journal_info
*)ap
->a_data
;
2147 if (hfsmp
->jnl
== NULL
) {
2151 jnl_start
= (off_t
)(hfsmp
->jnl_start
* HFSTOVCB(hfsmp
)->blockSize
) + (off_t
)HFSTOVCB(hfsmp
)->hfsPlusIOPosOffset
;
2152 jnl_size
= (off_t
)hfsmp
->jnl_size
;
2155 jip
->jstart
= jnl_start
;
2156 jip
->jsize
= jnl_size
;
2159 case HFS_SET_ALWAYS_ZEROFILL
: {
2160 struct cnode
*cp
= VTOC(vp
);
2162 if (*(int *)ap
->a_data
) {
2163 cp
->c_flag
|= C_ALWAYS_ZEROFILL
;
2165 cp
->c_flag
&= ~C_ALWAYS_ZEROFILL
;
2170 case HFS_DISABLE_METAZONE
: {
2171 /* Only root can disable metadata zone */
2175 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2179 /* Disable metadata zone now */
2180 (void) hfs_metadatazone_init(hfsmp
, true);
2181 printf ("hfs: Disabling metadata zone on %s\n", hfsmp
->vcbVN
);
2196 hfs_vnop_select(__unused
struct vnop_select_args
*ap
)
2198 struct vnop_select_args {
2203 vfs_context_t a_context;
2208 * We should really check to see if I/O is possible.
2214 * Converts a logical block number to a physical block, and optionally returns
2215 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
2216 * The physical block number is based on the device block size, currently its 512.
2217 * The block run is returned in logical blocks, and is the REMAINING amount of blocks
2220 hfs_bmap(struct vnode
*vp
, daddr_t bn
, struct vnode
**vpp
, daddr64_t
*bnp
, unsigned int *runp
)
2222 struct filefork
*fp
= VTOF(vp
);
2223 struct hfsmount
*hfsmp
= VTOHFS(vp
);
2224 int retval
= E_NONE
;
2225 u_int32_t logBlockSize
;
2226 size_t bytesContAvail
= 0;
2227 off_t blockposition
;
2232 * Check for underlying vnode requests and ensure that logical
2233 * to physical mapping is requested.
2236 *vpp
= hfsmp
->hfs_devvp
;
2240 logBlockSize
= GetLogicalBlockSize(vp
);
2241 blockposition
= (off_t
)bn
* logBlockSize
;
2243 lockExtBtree
= overflow_extents(fp
);
2246 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_EXTENTS
, HFS_EXCLUSIVE_LOCK
);
2248 retval
= MacToVFSError(
2249 MapFileBlockC (HFSTOVCB(hfsmp
),
2257 hfs_systemfile_unlock(hfsmp
, lockflags
);
2259 if (retval
== E_NONE
) {
2260 /* Figure out how many read ahead blocks there are */
2262 if (can_cluster(logBlockSize
)) {
2263 /* Make sure this result never goes negative: */
2264 *runp
= (bytesContAvail
< logBlockSize
) ? 0 : (bytesContAvail
/ logBlockSize
) - 1;
2274 * Convert logical block number to file offset.
2277 hfs_vnop_blktooff(struct vnop_blktooff_args
*ap
)
2279 struct vnop_blktooff_args {
2286 if (ap
->a_vp
== NULL
)
2288 *ap
->a_offset
= (off_t
)ap
->a_lblkno
* (off_t
)GetLogicalBlockSize(ap
->a_vp
);
2294 * Convert file offset to logical block number.
2297 hfs_vnop_offtoblk(struct vnop_offtoblk_args
*ap
)
2299 struct vnop_offtoblk_args {
2302 daddr64_t *a_lblkno;
2306 if (ap
->a_vp
== NULL
)
2308 *ap
->a_lblkno
= (daddr64_t
)(ap
->a_offset
/ (off_t
)GetLogicalBlockSize(ap
->a_vp
));
2314 * Map file offset to physical block number.
2316 * If this function is called for write operation, and if the file
2317 * had virtual blocks allocated (delayed allocation), real blocks
2318 * are allocated by calling ExtendFileC().
2320 * If this function is called for read operation, and if the file
2321 * had virtual blocks allocated (delayed allocation), no change
2322 * to the size of file is done, and if required, rangelist is
2323 * searched for mapping.
2325 * System file cnodes are expected to be locked (shared or exclusive).
2328 hfs_vnop_blockmap(struct vnop_blockmap_args
*ap
)
2330 struct vnop_blockmap_args {
2338 vfs_context_t a_context;
2342 struct vnode
*vp
= ap
->a_vp
;
2344 struct filefork
*fp
;
2345 struct hfsmount
*hfsmp
;
2346 size_t bytesContAvail
= 0;
2347 int retval
= E_NONE
;
2350 struct rl_entry
*invalid_range
;
2351 enum rl_overlaptype overlaptype
;
2356 if (VNODE_IS_RSRC(vp
)) {
2357 /* allow blockmaps to the resource fork */
2359 if ( hfs_file_is_compressed(VTOC(vp
), 1) ) { /* 1 == don't take the cnode lock */
2360 int state
= decmpfs_cnode_get_vnode_state(VTOCMP(vp
));
2362 case FILE_IS_COMPRESSED
:
2364 case FILE_IS_CONVERTING
:
2365 /* if FILE_IS_CONVERTING, we allow blockmap */
2368 printf("invalid state %d for compressed file\n", state
);
2373 #endif /* HFS_COMPRESSION */
2375 /* Do not allow blockmap operation on a directory */
2376 if (vnode_isdir(vp
)) {
2381 * Check for underlying vnode requests and ensure that logical
2382 * to physical mapping is requested.
2384 if (ap
->a_bpn
== NULL
)
2387 if ( !vnode_issystem(vp
) && !vnode_islnk(vp
) && !vnode_isswap(vp
)) {
2388 if (VTOC(vp
)->c_lockowner
!= current_thread()) {
2389 hfs_lock(VTOC(vp
), HFS_FORCE_LOCK
);
2398 /* Check virtual blocks only when performing write operation */
2399 if ((ap
->a_flags
& VNODE_WRITE
) && (fp
->ff_unallocblocks
!= 0)) {
2400 if (hfs_start_transaction(hfsmp
) != 0) {
2406 syslocks
= SFL_EXTENTS
| SFL_BITMAP
;
2408 } else if (overflow_extents(fp
)) {
2409 syslocks
= SFL_EXTENTS
;
2413 lockflags
= hfs_systemfile_lock(hfsmp
, syslocks
, HFS_EXCLUSIVE_LOCK
);
2416 * Check for any delayed allocations.
2418 if ((ap
->a_flags
& VNODE_WRITE
) && (fp
->ff_unallocblocks
!= 0)) {
2420 u_int32_t loanedBlocks
;
2423 // Make sure we have a transaction. It's possible
2424 // that we came in and fp->ff_unallocblocks was zero
2425 // but during the time we blocked acquiring the extents
2426 // btree, ff_unallocblocks became non-zero and so we
2427 // will need to start a transaction.
2429 if (started_tr
== 0) {
2431 hfs_systemfile_unlock(hfsmp
, lockflags
);
2438 * Note: ExtendFileC will Release any blocks on loan and
2439 * aquire real blocks. So we ask to extend by zero bytes
2440 * since ExtendFileC will account for the virtual blocks.
2443 loanedBlocks
= fp
->ff_unallocblocks
;
2444 retval
= ExtendFileC(hfsmp
, (FCB
*)fp
, 0, 0,
2445 kEFAllMask
| kEFNoClumpMask
, &actbytes
);
2448 fp
->ff_unallocblocks
= loanedBlocks
;
2449 cp
->c_blocks
+= loanedBlocks
;
2450 fp
->ff_blocks
+= loanedBlocks
;
2452 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
2453 hfsmp
->loanedBlocks
+= loanedBlocks
;
2454 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
2456 hfs_systemfile_unlock(hfsmp
, lockflags
);
2457 cp
->c_flag
|= C_MODIFIED
;
2459 (void) hfs_update(vp
, TRUE
);
2460 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2462 hfs_end_transaction(hfsmp
);
2469 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, ap
->a_size
, ap
->a_foffset
,
2470 ap
->a_bpn
, &bytesContAvail
);
2472 hfs_systemfile_unlock(hfsmp
, lockflags
);
2477 (void) hfs_update(vp
, TRUE
);
2478 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2479 hfs_end_transaction(hfsmp
);
2483 /* On write, always return error because virtual blocks, if any,
2484 * should have been allocated in ExtendFileC(). We do not
2485 * allocate virtual blocks on read, therefore return error
2486 * only if no virtual blocks are allocated. Otherwise we search
2487 * rangelist for zero-fills
2489 if ((MacToVFSError(retval
) != ERANGE
) ||
2490 (ap
->a_flags
& VNODE_WRITE
) ||
2491 ((ap
->a_flags
& VNODE_READ
) && (fp
->ff_unallocblocks
== 0))) {
2495 /* Validate if the start offset is within logical file size */
2496 if (ap
->a_foffset
> fp
->ff_size
) {
2500 /* Searching file extents has failed for read operation, therefore
2501 * search rangelist for any uncommitted holes in the file.
2503 overlaptype
= rl_scan(&fp
->ff_invalidranges
, ap
->a_foffset
,
2504 ap
->a_foffset
+ (off_t
)(ap
->a_size
- 1),
2506 switch(overlaptype
) {
2507 case RL_OVERLAPISCONTAINED
:
2508 /* start_offset <= rl_start, end_offset >= rl_end */
2509 if (ap
->a_foffset
!= invalid_range
->rl_start
) {
2512 case RL_MATCHINGOVERLAP
:
2513 /* start_offset = rl_start, end_offset = rl_end */
2514 case RL_OVERLAPCONTAINSRANGE
:
2515 /* start_offset >= rl_start, end_offset <= rl_end */
2516 case RL_OVERLAPSTARTSBEFORE
:
2517 /* start_offset > rl_start, end_offset >= rl_start */
2518 if ((off_t
)fp
->ff_size
> (invalid_range
->rl_end
+ 1)) {
2519 bytesContAvail
= (invalid_range
->rl_end
+ 1) - ap
->a_foffset
;
2521 bytesContAvail
= fp
->ff_size
- ap
->a_foffset
;
2523 if (bytesContAvail
> ap
->a_size
) {
2524 bytesContAvail
= ap
->a_size
;
2526 *ap
->a_bpn
= (daddr64_t
)-1;
2529 case RL_OVERLAPENDSAFTER
:
2530 /* start_offset < rl_start, end_offset < rl_end */
2537 /* MapFileC() found a valid extent in the filefork. Search the
2538 * mapping information further for invalid file ranges
2540 overlaptype
= rl_scan(&fp
->ff_invalidranges
, ap
->a_foffset
,
2541 ap
->a_foffset
+ (off_t
)bytesContAvail
- 1,
2543 if (overlaptype
!= RL_NOOVERLAP
) {
2544 switch(overlaptype
) {
2545 case RL_MATCHINGOVERLAP
:
2546 case RL_OVERLAPCONTAINSRANGE
:
2547 case RL_OVERLAPSTARTSBEFORE
:
2548 /* There's no valid block for this byte offset */
2549 *ap
->a_bpn
= (daddr64_t
)-1;
2550 /* There's no point limiting the amount to be returned
2551 * if the invalid range that was hit extends all the way
2552 * to the EOF (i.e. there's no valid bytes between the
2553 * end of this range and the file's EOF):
2555 if (((off_t
)fp
->ff_size
> (invalid_range
->rl_end
+ 1)) &&
2556 ((size_t)(invalid_range
->rl_end
+ 1 - ap
->a_foffset
) < bytesContAvail
)) {
2557 bytesContAvail
= invalid_range
->rl_end
+ 1 - ap
->a_foffset
;
2561 case RL_OVERLAPISCONTAINED
:
2562 case RL_OVERLAPENDSAFTER
:
2563 /* The range of interest hits an invalid block before the end: */
2564 if (invalid_range
->rl_start
== ap
->a_foffset
) {
2565 /* There's actually no valid information to be had starting here: */
2566 *ap
->a_bpn
= (daddr64_t
)-1;
2567 if (((off_t
)fp
->ff_size
> (invalid_range
->rl_end
+ 1)) &&
2568 ((size_t)(invalid_range
->rl_end
+ 1 - ap
->a_foffset
) < bytesContAvail
)) {
2569 bytesContAvail
= invalid_range
->rl_end
+ 1 - ap
->a_foffset
;
2572 bytesContAvail
= invalid_range
->rl_start
- ap
->a_foffset
;
2579 if (bytesContAvail
> ap
->a_size
)
2580 bytesContAvail
= ap
->a_size
;
2586 *ap
->a_run
= bytesContAvail
;
2589 *(int *)ap
->a_poff
= 0;
2595 return (MacToVFSError(retval
));
2600 * prepare and issue the I/O
2601 * buf_strategy knows how to deal
2602 * with requests that require
2606 hfs_vnop_strategy(struct vnop_strategy_args
*ap
)
2608 buf_t bp
= ap
->a_bp
;
2609 vnode_t vp
= buf_vnode(bp
);
2615 if ((cp
= cp_get_protected_cnode(vp
)) != NULL
) {
2617 * Some paths to hfs_vnop_strategy will take the cnode lock,
2618 * and some won't. But since content protection is only enabled
2619 * for files that (a) aren't system files and (b) are regular
2620 * files, any valid cnode here will be unlocked.
2622 hfs_lock(cp
, HFS_SHARED_LOCK
);
2623 buf_setcpaddr(bp
, cp
->c_cpentry
);
2625 #endif /* CONFIG_PROTECT */
2627 error
= buf_strategy(VTOHFS(vp
)->hfs_devvp
, ap
);
2639 hfs_minorupdate(struct vnode
*vp
) {
2640 struct cnode
*cp
= VTOC(vp
);
2641 cp
->c_flag
&= ~C_MODIFIED
;
2642 cp
->c_touch_acctime
= 0;
2643 cp
->c_touch_chgtime
= 0;
2644 cp
->c_touch_modtime
= 0;
2650 do_hfs_truncate(struct vnode
*vp
, off_t length
, int flags
, int skipupdate
, vfs_context_t context
)
2652 register struct cnode
*cp
= VTOC(vp
);
2653 struct filefork
*fp
= VTOF(vp
);
2654 struct proc
*p
= vfs_context_proc(context
);;
2655 kauth_cred_t cred
= vfs_context_ucred(context
);
2658 off_t actualBytesAdded
;
2660 u_int32_t fileblocks
;
2662 struct hfsmount
*hfsmp
;
2665 blksize
= VTOVCB(vp
)->blockSize
;
2666 fileblocks
= fp
->ff_blocks
;
2667 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
2669 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_START
,
2670 (int)length
, (int)fp
->ff_size
, (int)filebytes
, 0, 0);
2675 /* This should only happen with a corrupt filesystem */
2676 if ((off_t
)fp
->ff_size
< 0)
2679 if ((!ISHFSPLUS(VTOVCB(vp
))) && (length
> (off_t
)MAXHFSFILESIZE
))
2686 /* Files that are changing size are not hot file candidates. */
2687 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
2688 fp
->ff_bytesread
= 0;
2692 * We cannot just check if fp->ff_size == length (as an optimization)
2693 * since there may be extra physical blocks that also need truncation.
2696 if ((retval
= hfs_getinoquota(cp
)))
2701 * Lengthen the size of the file. We must ensure that the
2702 * last byte of the file is allocated. Since the smallest
2703 * value of ff_size is 0, length will be at least 1.
2705 if (length
> (off_t
)fp
->ff_size
) {
2707 retval
= hfs_chkdq(cp
, (int64_t)(roundup(length
- filebytes
, blksize
)),
2713 * If we don't have enough physical space then
2714 * we need to extend the physical size.
2716 if (length
> filebytes
) {
2718 u_int32_t blockHint
= 0;
2720 /* All or nothing and don't round up to clumpsize. */
2721 eflags
= kEFAllMask
| kEFNoClumpMask
;
2723 if (cred
&& suser(cred
, NULL
) != 0)
2724 eflags
|= kEFReserveMask
; /* keep a reserve */
2727 * Allocate Journal and Quota files in metadata zone.
2729 if (filebytes
== 0 &&
2730 hfsmp
->hfs_flags
& HFS_METADATA_ZONE
&&
2731 hfs_virtualmetafile(cp
)) {
2732 eflags
|= kEFMetadataMask
;
2733 blockHint
= hfsmp
->hfs_metazone_start
;
2735 if (hfs_start_transaction(hfsmp
) != 0) {
2740 /* Protect extents b-tree and allocation bitmap */
2741 lockflags
= SFL_BITMAP
;
2742 if (overflow_extents(fp
))
2743 lockflags
|= SFL_EXTENTS
;
2744 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2746 while ((length
> filebytes
) && (retval
== E_NONE
)) {
2747 bytesToAdd
= length
- filebytes
;
2748 retval
= MacToVFSError(ExtendFileC(VTOVCB(vp
),
2753 &actualBytesAdded
));
2755 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)blksize
;
2756 if (actualBytesAdded
== 0 && retval
== E_NONE
) {
2757 if (length
> filebytes
)
2763 hfs_systemfile_unlock(hfsmp
, lockflags
);
2767 (void) hfs_minorupdate(vp
);
2770 (void) hfs_update(vp
, TRUE
);
2771 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2775 hfs_end_transaction(hfsmp
);
2780 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_NONE
,
2781 (int)length
, (int)fp
->ff_size
, (int)filebytes
, 0, 0);
2784 if (!(flags
& IO_NOZEROFILL
)) {
2785 if (UBCINFOEXISTS(vp
) && (vnode_issystem(vp
) == 0) && retval
== E_NONE
) {
2786 struct rl_entry
*invalid_range
;
2789 zero_limit
= (fp
->ff_size
+ (PAGE_SIZE_64
- 1)) & ~PAGE_MASK_64
;
2790 if (length
< zero_limit
) zero_limit
= length
;
2792 if (length
> (off_t
)fp
->ff_size
) {
2795 /* Extending the file: time to fill out the current last page w. zeroes? */
2796 if ((fp
->ff_size
& PAGE_MASK_64
) &&
2797 (rl_scan(&fp
->ff_invalidranges
, fp
->ff_size
& ~PAGE_MASK_64
,
2798 fp
->ff_size
- 1, &invalid_range
) == RL_NOOVERLAP
)) {
2800 /* There's some valid data at the start of the (current) last page
2801 of the file, so zero out the remainder of that page to ensure the
2802 entire page contains valid data. Since there is no invalid range
2803 possible past the (current) eof, there's no need to remove anything
2804 from the invalid range list before calling cluster_write(): */
2806 retval
= cluster_write(vp
, (struct uio
*) 0, fp
->ff_size
, zero_limit
,
2807 fp
->ff_size
, (off_t
)0,
2808 (flags
& IO_SYNC
) | IO_HEADZEROFILL
| IO_NOZERODIRTY
);
2809 hfs_lock(cp
, HFS_FORCE_LOCK
);
2810 if (retval
) goto Err_Exit
;
2812 /* Merely invalidate the remaining area, if necessary: */
2813 if (length
> zero_limit
) {
2815 rl_add(zero_limit
, length
- 1, &fp
->ff_invalidranges
);
2816 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
2819 /* The page containing the (current) eof is invalid: just add the
2820 remainder of the page to the invalid list, along with the area
2821 being newly allocated:
2824 rl_add(fp
->ff_size
, length
- 1, &fp
->ff_invalidranges
);
2825 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
2829 panic("hfs_truncate: invoked on non-UBC object?!");
2832 cp
->c_touch_modtime
= TRUE
;
2833 fp
->ff_size
= length
;
2835 } else { /* Shorten the size of the file */
2837 if ((off_t
)fp
->ff_size
> length
) {
2838 /* Any space previously marked as invalid is now irrelevant: */
2839 rl_remove(length
, fp
->ff_size
- 1, &fp
->ff_invalidranges
);
2843 * Account for any unmapped blocks. Note that the new
2844 * file length can still end up with unmapped blocks.
2846 if (fp
->ff_unallocblocks
> 0) {
2847 u_int32_t finalblks
;
2848 u_int32_t loanedBlocks
;
2850 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
2852 loanedBlocks
= fp
->ff_unallocblocks
;
2853 cp
->c_blocks
-= loanedBlocks
;
2854 fp
->ff_blocks
-= loanedBlocks
;
2855 fp
->ff_unallocblocks
= 0;
2857 hfsmp
->loanedBlocks
-= loanedBlocks
;
2859 finalblks
= (length
+ blksize
- 1) / blksize
;
2860 if (finalblks
> fp
->ff_blocks
) {
2861 /* calculate required unmapped blocks */
2862 loanedBlocks
= finalblks
- fp
->ff_blocks
;
2863 hfsmp
->loanedBlocks
+= loanedBlocks
;
2865 fp
->ff_unallocblocks
= loanedBlocks
;
2866 cp
->c_blocks
+= loanedBlocks
;
2867 fp
->ff_blocks
+= loanedBlocks
;
2869 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
2873 * For a TBE process the deallocation of the file blocks is
2874 * delayed until the file is closed. And hfs_close calls
2875 * truncate with the IO_NDELAY flag set. So when IO_NDELAY
2876 * isn't set, we make sure this isn't a TBE process.
2878 if ((flags
& IO_NDELAY
) || (proc_tbe(p
) == 0)) {
2880 off_t savedbytes
= ((off_t
)fp
->ff_blocks
* (off_t
)blksize
);
2882 if (hfs_start_transaction(hfsmp
) != 0) {
2887 if (fp
->ff_unallocblocks
== 0) {
2888 /* Protect extents b-tree and allocation bitmap */
2889 lockflags
= SFL_BITMAP
;
2890 if (overflow_extents(fp
))
2891 lockflags
|= SFL_EXTENTS
;
2892 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2894 retval
= MacToVFSError(TruncateFileC(VTOVCB(vp
), (FCB
*)fp
, length
, 0,
2895 FORK_IS_RSRC (fp
), FTOC(fp
)->c_fileid
, false));
2897 hfs_systemfile_unlock(hfsmp
, lockflags
);
2901 fp
->ff_size
= length
;
2904 (void) hfs_minorupdate(vp
);
2907 (void) hfs_update(vp
, TRUE
);
2908 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2911 hfs_end_transaction(hfsmp
);
2913 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)blksize
;
2917 /* These are bytesreleased */
2918 (void) hfs_chkdq(cp
, (int64_t)-(savedbytes
- filebytes
), NOCRED
, 0);
2921 /* Only set update flag if the logical length changes */
2922 if ((off_t
)fp
->ff_size
!= length
)
2923 cp
->c_touch_modtime
= TRUE
;
2924 fp
->ff_size
= length
;
2926 if (cp
->c_mode
& (S_ISUID
| S_ISGID
)) {
2927 if (!vfs_context_issuser(context
)) {
2928 cp
->c_mode
&= ~(S_ISUID
| S_ISGID
);
2933 retval
= hfs_minorupdate(vp
);
2936 cp
->c_touch_chgtime
= TRUE
; /* status changed */
2937 cp
->c_touch_modtime
= TRUE
; /* file data was modified */
2938 retval
= hfs_update(vp
, MNT_WAIT
);
2941 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_NONE
,
2942 -1, -1, -1, retval
, 0);
2947 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_END
,
2948 (int)length
, (int)fp
->ff_size
, (int)filebytes
, retval
, 0);
2954 * Preparation which must be done prior to deleting the catalog record
2955 * of a file or directory. In order to make the on-disk as safe as possible,
2956 * we remove the catalog entry before releasing the bitmap blocks and the
2957 * overflow extent records. However, some work must be done prior to deleting
2958 * the catalog record.
2960 * When calling this function, the cnode must exist both in memory and on-disk.
2961 * If there are both resource fork and data fork vnodes, this function should
2962 * be called on both.
2966 hfs_prepare_release_storage (struct hfsmount
*hfsmp
, struct vnode
*vp
) {
2968 struct filefork
*fp
= VTOF(vp
);
2969 struct cnode
*cp
= VTOC(vp
);
2972 /* Cannot truncate an HFS directory! */
2973 if (vnode_isdir(vp
)) {
2978 * See the comment below in hfs_truncate for why we need to call
2979 * setsize here. Essentially we want to avoid pending IO if we
2980 * already know that the blocks are going to be released here.
2981 * This function is only called when totally removing all storage for a file, so
2982 * we can take a shortcut and immediately setsize (0);
2986 /* This should only happen with a corrupt filesystem */
2987 if ((off_t
)fp
->ff_size
< 0)
2991 * We cannot just check if fp->ff_size == length (as an optimization)
2992 * since there may be extra physical blocks that also need truncation.
2995 if ((retval
= hfs_getinoquota(cp
))) {
3000 /* Wipe out any invalid ranges which have yet to be backed by disk */
3001 rl_remove(0, fp
->ff_size
- 1, &fp
->ff_invalidranges
);
3004 * Account for any unmapped blocks. Since we're deleting the
3005 * entire file, we don't have to worry about just shrinking
3006 * to a smaller number of borrowed blocks.
3008 if (fp
->ff_unallocblocks
> 0) {
3009 u_int32_t loanedBlocks
;
3011 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
3013 loanedBlocks
= fp
->ff_unallocblocks
;
3014 cp
->c_blocks
-= loanedBlocks
;
3015 fp
->ff_blocks
-= loanedBlocks
;
3016 fp
->ff_unallocblocks
= 0;
3018 hfsmp
->loanedBlocks
-= loanedBlocks
;
3020 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
3028 * Special wrapper around calling TruncateFileC. This function is useable
3029 * even when the catalog record does not exist any longer, making it ideal
3030 * for use when deleting a file. The simplification here is that we know
3031 * that we are releasing all blocks.
3033 * The caller is responsible for saving off a copy of the filefork(s)
3034 * embedded within the cnode prior to calling this function. The pointers
3035 * supplied as arguments must be valid even if the cnode is no longer valid.
3039 hfs_release_storage (struct hfsmount
*hfsmp
, struct filefork
*datafork
,
3040 struct filefork
*rsrcfork
, u_int32_t fileid
) {
3043 u_int32_t fileblocks
;
3048 blksize
= hfsmp
->blockSize
;
3051 if (datafork
->ff_blocks
> 0) {
3052 fileblocks
= datafork
->ff_blocks
;
3053 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
3055 /* We killed invalid ranges and loaned blocks before we removed the catalog entry */
3057 while (filebytes
> 0) {
3058 if (filebytes
> HFS_BIGFILE_SIZE
&& overflow_extents(datafork
)) {
3059 filebytes
-= HFS_BIGFILE_SIZE
;
3064 /* Start a transaction, and wipe out as many blocks as we can in this iteration */
3065 if (hfs_start_transaction(hfsmp
) != 0) {
3070 if (datafork
->ff_unallocblocks
== 0) {
3071 /* Protect extents b-tree and allocation bitmap */
3072 lockflags
= SFL_BITMAP
;
3073 if (overflow_extents(datafork
))
3074 lockflags
|= SFL_EXTENTS
;
3075 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
3077 error
= MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp
), datafork
, filebytes
, 1, 0, fileid
, false));
3079 hfs_systemfile_unlock(hfsmp
, lockflags
);
3082 datafork
->ff_size
= filebytes
;
3084 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
3086 /* Finish the transaction and start over if necessary */
3087 hfs_end_transaction(hfsmp
);
3096 if (error
== 0 && (rsrcfork
!= NULL
) && rsrcfork
->ff_blocks
> 0) {
3097 fileblocks
= rsrcfork
->ff_blocks
;
3098 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
3100 /* We killed invalid ranges and loaned blocks before we removed the catalog entry */
3102 while (filebytes
> 0) {
3103 if (filebytes
> HFS_BIGFILE_SIZE
&& overflow_extents(rsrcfork
)) {
3104 filebytes
-= HFS_BIGFILE_SIZE
;
3109 /* Start a transaction, and wipe out as many blocks as we can in this iteration */
3110 if (hfs_start_transaction(hfsmp
) != 0) {
3115 if (rsrcfork
->ff_unallocblocks
== 0) {
3116 /* Protect extents b-tree and allocation bitmap */
3117 lockflags
= SFL_BITMAP
;
3118 if (overflow_extents(rsrcfork
))
3119 lockflags
|= SFL_EXTENTS
;
3120 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
3122 error
= MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp
), rsrcfork
, filebytes
, 1, 1, fileid
, false));
3124 hfs_systemfile_unlock(hfsmp
, lockflags
);
3127 rsrcfork
->ff_size
= filebytes
;
3129 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
3131 /* Finish the transaction and start over if necessary */
3132 hfs_end_transaction(hfsmp
);
3145 * Truncate a cnode to at most length size, freeing (or adding) the
3149 hfs_truncate(struct vnode
*vp
, off_t length
, int flags
, int skipsetsize
,
3150 int skipupdate
, vfs_context_t context
)
3152 struct filefork
*fp
= VTOF(vp
);
3154 u_int32_t fileblocks
;
3155 int blksize
, error
= 0;
3156 struct cnode
*cp
= VTOC(vp
);
3158 /* Cannot truncate an HFS directory! */
3159 if (vnode_isdir(vp
)) {
3162 /* A swap file cannot change size. */
3163 if (vnode_isswap(vp
) && (length
!= 0)) {
3167 blksize
= VTOVCB(vp
)->blockSize
;
3168 fileblocks
= fp
->ff_blocks
;
3169 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
3172 // Have to do this here so that we don't wind up with
3173 // i/o pending for blocks that are about to be released
3174 // if we truncate the file.
3176 // If skipsetsize is set, then the caller is responsible
3177 // for the ubc_setsize.
3179 // Even if skipsetsize is set, if the length is zero we
3180 // want to call ubc_setsize() because as of SnowLeopard
3181 // it will no longer cause any page-ins and it will drop
3182 // any dirty pages so that we don't do any i/o that we
3183 // don't have to. This also prevents a race where i/o
3184 // for truncated blocks may overwrite later data if the
3185 // blocks get reallocated to a different file.
3187 if (!skipsetsize
|| length
== 0)
3188 ubc_setsize(vp
, length
);
3190 // have to loop truncating or growing files that are
3191 // really big because otherwise transactions can get
3192 // enormous and consume too many kernel resources.
3194 if (length
< filebytes
) {
3195 while (filebytes
> length
) {
3196 if ((filebytes
- length
) > HFS_BIGFILE_SIZE
&& overflow_extents(fp
)) {
3197 filebytes
-= HFS_BIGFILE_SIZE
;
3201 cp
->c_flag
|= C_FORCEUPDATE
;
3202 error
= do_hfs_truncate(vp
, filebytes
, flags
, skipupdate
, context
);
3206 } else if (length
> filebytes
) {
3207 while (filebytes
< length
) {
3208 if ((length
- filebytes
) > HFS_BIGFILE_SIZE
&& overflow_extents(fp
)) {
3209 filebytes
+= HFS_BIGFILE_SIZE
;
3213 cp
->c_flag
|= C_FORCEUPDATE
;
3214 error
= do_hfs_truncate(vp
, filebytes
, flags
, skipupdate
, context
);
3218 } else /* Same logical size */ {
3220 error
= do_hfs_truncate(vp
, length
, flags
, skipupdate
, context
);
3222 /* Files that are changing size are not hot file candidates. */
3223 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
) {
3224 fp
->ff_bytesread
= 0;
3233 * Preallocate file storage space.
3236 hfs_vnop_allocate(struct vnop_allocate_args
/* {
3240 off_t *a_bytesallocated;
3242 vfs_context_t a_context;
3245 struct vnode
*vp
= ap
->a_vp
;
3247 struct filefork
*fp
;
3249 off_t length
= ap
->a_length
;
3251 off_t moreBytesRequested
;
3252 off_t actualBytesAdded
;
3254 u_int32_t fileblocks
;
3255 int retval
, retval2
;
3256 u_int32_t blockHint
;
3257 u_int32_t extendFlags
; /* For call to ExtendFileC */
3258 struct hfsmount
*hfsmp
;
3259 kauth_cred_t cred
= vfs_context_ucred(ap
->a_context
);
3263 *(ap
->a_bytesallocated
) = 0;
3265 if (!vnode_isreg(vp
))
3267 if (length
< (off_t
)0)
3272 orig_ctime
= VTOC(vp
)->c_ctime
;
3274 check_for_tracked_file(vp
, orig_ctime
, ap
->a_length
== 0 ? NAMESPACE_HANDLER_TRUNCATE_OP
|NAMESPACE_HANDLER_DELETE_OP
: NAMESPACE_HANDLER_TRUNCATE_OP
, NULL
);
3276 hfs_lock_truncate(cp
, HFS_EXCLUSIVE_LOCK
);
3278 if ((retval
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
))) {
3286 fileblocks
= fp
->ff_blocks
;
3287 filebytes
= (off_t
)fileblocks
* (off_t
)vcb
->blockSize
;
3289 if ((ap
->a_flags
& ALLOCATEFROMVOL
) && (length
< filebytes
)) {
3294 /* Fill in the flags word for the call to Extend the file */
3296 extendFlags
= kEFNoClumpMask
;
3297 if (ap
->a_flags
& ALLOCATECONTIG
)
3298 extendFlags
|= kEFContigMask
;
3299 if (ap
->a_flags
& ALLOCATEALL
)
3300 extendFlags
|= kEFAllMask
;
3301 if (cred
&& suser(cred
, NULL
) != 0)
3302 extendFlags
|= kEFReserveMask
;
3303 if (hfs_virtualmetafile(cp
))
3304 extendFlags
|= kEFMetadataMask
;
3308 startingPEOF
= filebytes
;
3310 if (ap
->a_flags
& ALLOCATEFROMPEOF
)
3311 length
+= filebytes
;
3312 else if (ap
->a_flags
& ALLOCATEFROMVOL
)
3313 blockHint
= ap
->a_offset
/ VTOVCB(vp
)->blockSize
;
3315 /* If no changes are necesary, then we're done */
3316 if (filebytes
== length
)
3320 * Lengthen the size of the file. We must ensure that the
3321 * last byte of the file is allocated. Since the smallest
3322 * value of filebytes is 0, length will be at least 1.
3324 if (length
> filebytes
) {
3325 off_t total_bytes_added
= 0, orig_request_size
;
3327 orig_request_size
= moreBytesRequested
= length
- filebytes
;
3330 retval
= hfs_chkdq(cp
,
3331 (int64_t)(roundup(moreBytesRequested
, vcb
->blockSize
)),
3338 * Metadata zone checks.
3340 if (hfsmp
->hfs_flags
& HFS_METADATA_ZONE
) {
3342 * Allocate Journal and Quota files in metadata zone.
3344 if (hfs_virtualmetafile(cp
)) {
3345 blockHint
= hfsmp
->hfs_metazone_start
;
3346 } else if ((blockHint
>= hfsmp
->hfs_metazone_start
) &&
3347 (blockHint
<= hfsmp
->hfs_metazone_end
)) {
3349 * Move blockHint outside metadata zone.
3351 blockHint
= hfsmp
->hfs_metazone_end
+ 1;
3356 while ((length
> filebytes
) && (retval
== E_NONE
)) {
3357 off_t bytesRequested
;
3359 if (hfs_start_transaction(hfsmp
) != 0) {
3364 /* Protect extents b-tree and allocation bitmap */
3365 lockflags
= SFL_BITMAP
;
3366 if (overflow_extents(fp
))
3367 lockflags
|= SFL_EXTENTS
;
3368 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
3370 if (moreBytesRequested
>= HFS_BIGFILE_SIZE
) {
3371 bytesRequested
= HFS_BIGFILE_SIZE
;
3373 bytesRequested
= moreBytesRequested
;
3376 if (extendFlags
& kEFContigMask
) {
3377 // if we're on a sparse device, this will force it to do a
3378 // full scan to find the space needed.
3379 hfsmp
->hfs_flags
&= ~HFS_DID_CONTIG_SCAN
;
3382 retval
= MacToVFSError(ExtendFileC(vcb
,
3387 &actualBytesAdded
));
3389 if (retval
== E_NONE
) {
3390 *(ap
->a_bytesallocated
) += actualBytesAdded
;
3391 total_bytes_added
+= actualBytesAdded
;
3392 moreBytesRequested
-= actualBytesAdded
;
3393 if (blockHint
!= 0) {
3394 blockHint
+= actualBytesAdded
/ vcb
->blockSize
;
3397 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)vcb
->blockSize
;
3399 hfs_systemfile_unlock(hfsmp
, lockflags
);
3402 (void) hfs_update(vp
, TRUE
);
3403 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
3406 hfs_end_transaction(hfsmp
);
3411 * if we get an error and no changes were made then exit
3412 * otherwise we must do the hfs_update to reflect the changes
3414 if (retval
&& (startingPEOF
== filebytes
))
3418 * Adjust actualBytesAdded to be allocation block aligned, not
3419 * clump size aligned.
3420 * NOTE: So what we are reporting does not affect reality
3421 * until the file is closed, when we truncate the file to allocation
3424 if (total_bytes_added
!= 0 && orig_request_size
< total_bytes_added
)
3425 *(ap
->a_bytesallocated
) =
3426 roundup(orig_request_size
, (off_t
)vcb
->blockSize
);
3428 } else { /* Shorten the size of the file */
3430 if (fp
->ff_size
> length
) {
3432 * Any buffers that are past the truncation point need to be
3433 * invalidated (to maintain buffer cache consistency).
3437 retval
= hfs_truncate(vp
, length
, 0, 0, 0, ap
->a_context
);
3438 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)vcb
->blockSize
;
3441 * if we get an error and no changes were made then exit
3442 * otherwise we must do the hfs_update to reflect the changes
3444 if (retval
&& (startingPEOF
== filebytes
)) goto Err_Exit
;
3446 /* These are bytesreleased */
3447 (void) hfs_chkdq(cp
, (int64_t)-((startingPEOF
- filebytes
)), NOCRED
,0);
3450 if (fp
->ff_size
> filebytes
) {
3451 fp
->ff_size
= filebytes
;
3454 ubc_setsize(vp
, fp
->ff_size
);
3455 hfs_lock(cp
, HFS_FORCE_LOCK
);
3460 cp
->c_touch_chgtime
= TRUE
;
3461 cp
->c_touch_modtime
= TRUE
;
3462 retval2
= hfs_update(vp
, MNT_WAIT
);
3467 hfs_unlock_truncate(cp
, 0);
3474 * Pagein for HFS filesystem
3477 hfs_vnop_pagein(struct vnop_pagein_args
*ap
)
3479 struct vnop_pagein_args {
3482 vm_offset_t a_pl_offset,
3486 vfs_context_t a_context;
3492 struct filefork
*fp
;
3495 upl_page_info_t
*pl
;
3500 boolean_t truncate_lock_held
= FALSE
;
3501 boolean_t file_converted
= FALSE
;
3509 if ((error
= cp_handle_vnop(cp
, CP_READ_ACCESS
| CP_WRITE_ACCESS
)) != 0) {
3512 #endif /* CONFIG_PROTECT */
3514 if (ap
->a_pl
!= NULL
) {
3516 * this can only happen for swap files now that
3517 * we're asking for V2 paging behavior...
3518 * so don't need to worry about decompression, or
3519 * keeping track of blocks read or taking the truncate lock
3521 error
= cluster_pagein(vp
, ap
->a_pl
, ap
->a_pl_offset
, ap
->a_f_offset
,
3522 ap
->a_size
, (off_t
)fp
->ff_size
, ap
->a_flags
);
3528 * take truncate lock (shared/recursive) to guard against
3529 * zero-fill thru fsync interfering, but only for v2
3531 * the HFS_RECURSE_TRUNCLOCK arg indicates that we want the
3532 * lock shared and we are allowed to recurse 1 level if this thread already
3533 * owns the lock exclusively... this can legally occur
3534 * if we are doing a shrinking ftruncate against a file
3535 * that is mapped private, and the pages being truncated
3536 * do not currently exist in the cache... in that case
3537 * we will have to page-in the missing pages in order
3538 * to provide them to the private mapping... we must
3539 * also call hfs_unlock_truncate with a postive been_recursed
3540 * arg to indicate that if we have recursed, there is no need to drop
3541 * the lock. Allowing this simple recursion is necessary
3542 * in order to avoid a certain deadlock... since the ftruncate
3543 * already holds the truncate lock exclusively, if we try
3544 * to acquire it shared to protect the pagein path, we will
3547 * NOTE: The if () block below is a workaround in order to prevent a
3548 * VM deadlock. See rdar://7853471.
3550 * If we are in a forced unmount, then launchd will still have the
3551 * dyld_shared_cache file mapped as it is trying to reboot. If we
3552 * take the truncate lock here to service a page fault, then our
3553 * thread could deadlock with the forced-unmount. The forced unmount
3554 * thread will try to reclaim the dyld_shared_cache vnode, but since it's
3555 * marked C_DELETED, it will call ubc_setsize(0). As a result, the unmount
3556 * thread will think it needs to copy all of the data out of the file
3557 * and into a VM copy object. If we hold the cnode lock here, then that
3558 * VM operation will not be able to proceed, because we'll set a busy page
3559 * before attempting to grab the lock. Note that this isn't as simple as "don't
3560 * call ubc_setsize" because doing that would just shift the problem to the
3561 * ubc_msync done before the vnode is reclaimed.
3563 * So, if a forced unmount on this volume is in flight AND the cnode is
3564 * marked C_DELETED, then just go ahead and do the page in without taking
3565 * the lock (thus suspending pagein_v2 semantics temporarily). Since it's on a file
3566 * that is not going to be available on the next mount, this seems like a
3567 * OK solution from a correctness point of view, even though it is hacky.
3569 if (vfs_isforce(vp
->v_mount
)) {
3570 if (cp
->c_flag
& C_DELETED
) {
3571 /* If we don't get it, then just go ahead and operate without the lock */
3572 truncate_lock_held
= hfs_try_trunclock(cp
, HFS_RECURSE_TRUNCLOCK
);
3576 hfs_lock_truncate(cp
, HFS_RECURSE_TRUNCLOCK
);
3577 truncate_lock_held
= TRUE
;
3580 kret
= ubc_create_upl(vp
, ap
->a_f_offset
, ap
->a_size
, &upl
, &pl
, UPL_UBC_PAGEIN
| UPL_RET_ONLY_ABSENT
);
3582 if ((kret
!= KERN_SUCCESS
) || (upl
== (upl_t
) NULL
)) {
3589 * Scan from the back to find the last page in the UPL, so that we
3590 * aren't looking at a UPL that may have already been freed by the
3591 * preceding aborts/completions.
3593 for (pg_index
= ((isize
) / PAGE_SIZE
); pg_index
> 0;) {
3594 if (upl_page_present(pl
, --pg_index
))
3596 if (pg_index
== 0) {
3598 * no absent pages were found in the range specified
3599 * just abort the UPL to get rid of it and then we're done
3601 ubc_upl_abort_range(upl
, 0, isize
, UPL_ABORT_FREE_ON_EMPTY
);
3606 * initialize the offset variables before we touch the UPL.
3607 * f_offset is the position into the file, in bytes
3608 * offset is the position into the UPL, in bytes
3609 * pg_index is the pg# of the UPL we're operating on
3610 * isize is the offset into the UPL of the last page that is present.
3612 isize
= ((pg_index
+ 1) * PAGE_SIZE
);
3615 f_offset
= ap
->a_f_offset
;
3621 if ( !upl_page_present(pl
, pg_index
)) {
3623 * we asked for RET_ONLY_ABSENT, so it's possible
3624 * to get back empty slots in the UPL.
3625 * just skip over them
3627 f_offset
+= PAGE_SIZE
;
3628 offset
+= PAGE_SIZE
;
3635 * We know that we have at least one absent page.
3636 * Now checking to see how many in a row we have
3639 xsize
= isize
- PAGE_SIZE
;
3642 if ( !upl_page_present(pl
, pg_index
+ num_of_pages
))
3647 xsize
= num_of_pages
* PAGE_SIZE
;
3650 if (VNODE_IS_RSRC(vp
)) {
3651 /* allow pageins of the resource fork */
3653 int compressed
= hfs_file_is_compressed(VTOC(vp
), 1); /* 1 == don't take the cnode lock */
3656 if (truncate_lock_held
) {
3658 * can't hold the truncate lock when calling into the decmpfs layer
3659 * since it calls back into this layer... even though we're only
3660 * holding the lock in shared mode, and the re-entrant path only
3661 * takes the lock shared, we can deadlock if some other thread
3662 * tries to grab the lock exclusively in between.
3664 hfs_unlock_truncate(cp
, 1);
3665 truncate_lock_held
= FALSE
;
3668 ap
->a_pl_offset
= offset
;
3669 ap
->a_f_offset
= f_offset
;
3672 error
= decmpfs_pagein_compressed(ap
, &compressed
, VTOCMP(vp
));
3674 * note that decpfs_pagein_compressed can change the state of
3675 * 'compressed'... it will set it to 0 if the file is no longer
3676 * compressed once the compression lock is successfully taken
3677 * i.e. we would block on that lock while the file is being inflated
3681 /* successful page-in, update the access time */
3682 VTOC(vp
)->c_touch_acctime
= TRUE
;
3684 /* compressed files are not hot file candidates */
3685 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
) {
3686 fp
->ff_bytesread
= 0;
3688 } else if (error
== EAGAIN
) {
3690 * EAGAIN indicates someone else already holds the compression lock...
3691 * to avoid deadlocking, we'll abort this range of pages with an
3692 * indication that the pagein needs to be redriven
3694 ubc_upl_abort_range(upl
, (upl_offset_t
) offset
, xsize
, UPL_ABORT_FREE_ON_EMPTY
| UPL_ABORT_RESTART
);
3696 goto pagein_next_range
;
3700 * Set file_converted only if the file became decompressed while we were
3701 * paging in. If it were still compressed, we would re-start the loop using the goto
3702 * in the above block. This avoid us overloading truncate_lock_held as our retry_pagein
3703 * condition below, since we could have avoided taking the truncate lock to prevent
3704 * a deadlock in the force unmount case.
3706 file_converted
= TRUE
;
3709 if (file_converted
== TRUE
) {
3711 * the file was converted back to a regular file after we first saw it as compressed
3712 * we need to abort the upl, retake the truncate lock, recreate the UPL and start over
3713 * reset a_size so that we consider what remains of the original request
3714 * and null out a_upl and a_pl_offset.
3716 * We should only be able to get into this block if the decmpfs_pagein_compressed
3717 * successfully decompressed the range in question for this file.
3719 ubc_upl_abort_range(upl
, (upl_offset_t
) offset
, isize
, UPL_ABORT_FREE_ON_EMPTY
);
3723 ap
->a_pl_offset
= 0;
3725 /* Reset file_converted back to false so that we don't infinite-loop. */
3726 file_converted
= FALSE
;
3731 error
= cluster_pagein(vp
, upl
, offset
, f_offset
, xsize
, (off_t
)fp
->ff_size
, ap
->a_flags
);
3734 * Keep track of blocks read.
3736 if ( !vnode_isswap(vp
) && VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
&& error
== 0) {
3738 int took_cnode_lock
= 0;
3740 if (ap
->a_f_offset
== 0 && fp
->ff_size
< PAGE_SIZE
)
3741 bytesread
= fp
->ff_size
;
3745 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
3746 if ((fp
->ff_bytesread
+ bytesread
) > 0x00000000ffffffff && cp
->c_lockowner
!= current_thread()) {
3747 hfs_lock(cp
, HFS_FORCE_LOCK
);
3748 took_cnode_lock
= 1;
3751 * If this file hasn't been seen since the start of
3752 * the current sampling period then start over.
3754 if (cp
->c_atime
< VTOHFS(vp
)->hfc_timebase
) {
3757 fp
->ff_bytesread
= bytesread
;
3759 cp
->c_atime
= tv
.tv_sec
;
3761 fp
->ff_bytesread
+= bytesread
;
3763 cp
->c_touch_acctime
= TRUE
;
3764 if (took_cnode_lock
)
3771 pg_index
+= num_of_pages
;
3777 if (truncate_lock_held
== TRUE
) {
3778 /* Note 1 is passed to hfs_unlock_truncate in been_recursed argument */
3779 hfs_unlock_truncate(cp
, 1);
3786 * Pageout for HFS filesystem.
3789 hfs_vnop_pageout(struct vnop_pageout_args
*ap
)
3791 struct vnop_pageout_args {
3794 vm_offset_t a_pl_offset,
3798 vfs_context_t a_context;
3802 vnode_t vp
= ap
->a_vp
;
3804 struct filefork
*fp
;
3808 upl_page_info_t
* pl
;
3809 vm_offset_t a_pl_offset
;
3811 int is_pageoutv2
= 0;
3818 * Figure out where the file ends, for pageout purposes. If
3819 * ff_new_size > ff_size, then we're in the middle of extending the
3820 * file via a write, so it is safe (and necessary) that we be able
3821 * to pageout up to that point.
3823 filesize
= fp
->ff_size
;
3824 if (fp
->ff_new_size
> filesize
)
3825 filesize
= fp
->ff_new_size
;
3827 a_flags
= ap
->a_flags
;
3828 a_pl_offset
= ap
->a_pl_offset
;
3831 * we can tell if we're getting the new or old behavior from the UPL
3833 if ((upl
= ap
->a_pl
) == NULL
) {
3838 * we're in control of any UPL we commit
3839 * make sure someone hasn't accidentally passed in UPL_NOCOMMIT
3841 a_flags
&= ~UPL_NOCOMMIT
;
3845 * take truncate lock (shared) to guard against
3846 * zero-fill thru fsync interfering, but only for v2
3848 hfs_lock_truncate(cp
, HFS_SHARED_LOCK
);
3850 if (a_flags
& UPL_MSYNC
) {
3851 request_flags
= UPL_UBC_MSYNC
| UPL_RET_ONLY_DIRTY
;
3854 request_flags
= UPL_UBC_PAGEOUT
| UPL_RET_ONLY_DIRTY
;
3857 kret
= ubc_create_upl(vp
, ap
->a_f_offset
, ap
->a_size
, &upl
, &pl
, request_flags
);
3859 if ((kret
!= KERN_SUCCESS
) || (upl
== (upl_t
) NULL
)) {
3865 * from this point forward upl points at the UPL we're working with
3866 * it was either passed in or we succesfully created it
3870 * Now that HFS is opting into VFC_VFSVNOP_PAGEOUTV2, we may need to operate on our own
3871 * UPL instead of relying on the UPL passed into us. We go ahead and do that here,
3872 * scanning for dirty ranges. We'll issue our own N cluster_pageout calls, for
3873 * N dirty ranges in the UPL. Note that this is almost a direct copy of the
3874 * logic in vnode_pageout except that we need to do it after grabbing the truncate
3875 * lock in HFS so that we don't lock invert ourselves.
3877 * Note that we can still get into this function on behalf of the default pager with
3878 * non-V2 behavior (swapfiles). However in that case, we did not grab locks above
3879 * since fsync and other writing threads will grab the locks, then mark the
3880 * relevant pages as busy. But the pageout codepath marks the pages as busy,
3881 * and THEN would attempt to grab the truncate lock, which would result in deadlock. So
3882 * we do not try to grab anything for the pre-V2 case, which should only be accessed
3883 * by the paging/VM system.
3895 f_offset
= ap
->a_f_offset
;
3898 * Scan from the back to find the last page in the UPL, so that we
3899 * aren't looking at a UPL that may have already been freed by the
3900 * preceding aborts/completions.
3902 for (pg_index
= ((isize
) / PAGE_SIZE
); pg_index
> 0;) {
3903 if (upl_page_present(pl
, --pg_index
))
3905 if (pg_index
== 0) {
3906 ubc_upl_abort_range(upl
, 0, isize
, UPL_ABORT_FREE_ON_EMPTY
);
3912 * initialize the offset variables before we touch the UPL.
3913 * a_f_offset is the position into the file, in bytes
3914 * offset is the position into the UPL, in bytes
3915 * pg_index is the pg# of the UPL we're operating on.
3916 * isize is the offset into the UPL of the last non-clean page.
3918 isize
= ((pg_index
+ 1) * PAGE_SIZE
);
3927 if ( !upl_page_present(pl
, pg_index
)) {
3929 * we asked for RET_ONLY_DIRTY, so it's possible
3930 * to get back empty slots in the UPL.
3931 * just skip over them
3933 f_offset
+= PAGE_SIZE
;
3934 offset
+= PAGE_SIZE
;
3940 if ( !upl_dirty_page(pl
, pg_index
)) {
3941 panic ("hfs_vnop_pageout: unforeseen clean page @ index %d for UPL %p\n", pg_index
, upl
);
3945 * We know that we have at least one dirty page.
3946 * Now checking to see how many in a row we have
3949 xsize
= isize
- PAGE_SIZE
;
3952 if ( !upl_dirty_page(pl
, pg_index
+ num_of_pages
))
3957 xsize
= num_of_pages
* PAGE_SIZE
;
3959 if (!vnode_isswap(vp
)) {
3965 if (cp
->c_lockowner
!= current_thread()) {
3966 if ((retval
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
))) {
3968 * we're in the v2 path, so we are the
3969 * owner of the UPL... we may have already
3970 * processed some of the UPL, so abort it
3971 * from the current working offset to the
3974 ubc_upl_abort_range(upl
,
3976 ap
->a_size
- offset
,
3977 UPL_ABORT_FREE_ON_EMPTY
);
3982 end_of_range
= f_offset
+ xsize
- 1;
3984 if (end_of_range
>= filesize
) {
3985 end_of_range
= (off_t
)(filesize
- 1);
3987 if (f_offset
< filesize
) {
3988 rl_remove(f_offset
, end_of_range
, &fp
->ff_invalidranges
);
3989 cp
->c_flag
|= C_MODIFIED
; /* leof is dirty */
3995 if ((error
= cluster_pageout(vp
, upl
, offset
, f_offset
,
3996 xsize
, filesize
, a_flags
))) {
4003 pg_index
+= num_of_pages
;
4005 /* capture errnos bubbled out of cluster_pageout if they occurred */
4006 if (error_ret
!= 0) {
4009 } /* end block for v2 pageout behavior */
4011 if (!vnode_isswap(vp
)) {
4015 if (cp
->c_lockowner
!= current_thread()) {
4016 if ((retval
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
))) {
4017 if (!(a_flags
& UPL_NOCOMMIT
)) {
4018 ubc_upl_abort_range(upl
,
4021 UPL_ABORT_FREE_ON_EMPTY
);
4027 end_of_range
= ap
->a_f_offset
+ ap
->a_size
- 1;
4029 if (end_of_range
>= filesize
) {
4030 end_of_range
= (off_t
)(filesize
- 1);
4032 if (ap
->a_f_offset
< filesize
) {
4033 rl_remove(ap
->a_f_offset
, end_of_range
, &fp
->ff_invalidranges
);
4034 cp
->c_flag
|= C_MODIFIED
; /* leof is dirty */
4042 * just call cluster_pageout for old pre-v2 behavior
4044 retval
= cluster_pageout(vp
, upl
, a_pl_offset
, ap
->a_f_offset
,
4045 ap
->a_size
, filesize
, a_flags
);
4049 * If data was written, update the modification time of the file.
4050 * If setuid or setgid bits are set and this process is not the
4051 * superuser then clear the setuid and setgid bits as a precaution
4052 * against tampering.
4055 cp
->c_touch_modtime
= TRUE
;
4056 cp
->c_touch_chgtime
= TRUE
;
4057 if ((cp
->c_mode
& (S_ISUID
| S_ISGID
)) &&
4058 (vfs_context_suser(ap
->a_context
) != 0)) {
4059 hfs_lock(cp
, HFS_FORCE_LOCK
);
4060 cp
->c_mode
&= ~(S_ISUID
| S_ISGID
);
4067 /* release truncate lock (shared) */
4068 hfs_unlock_truncate(cp
, 0);
4074 * Intercept B-Tree node writes to unswap them if necessary.
4077 hfs_vnop_bwrite(struct vnop_bwrite_args
*ap
)
4080 register struct buf
*bp
= ap
->a_bp
;
4081 register struct vnode
*vp
= buf_vnode(bp
);
4082 BlockDescriptor block
;
4084 /* Trap B-Tree writes */
4085 if ((VTOC(vp
)->c_fileid
== kHFSExtentsFileID
) ||
4086 (VTOC(vp
)->c_fileid
== kHFSCatalogFileID
) ||
4087 (VTOC(vp
)->c_fileid
== kHFSAttributesFileID
) ||
4088 (vp
== VTOHFS(vp
)->hfc_filevp
)) {
4091 * Swap and validate the node if it is in native byte order.
4092 * This is always be true on big endian, so we always validate
4093 * before writing here. On little endian, the node typically has
4094 * been swapped and validated when it was written to the journal,
4095 * so we won't do anything here.
4097 if (((u_int16_t
*)((char *)buf_dataptr(bp
) + buf_count(bp
) - 2))[0] == 0x000e) {
4098 /* Prepare the block pointer */
4099 block
.blockHeader
= bp
;
4100 block
.buffer
= (char *)buf_dataptr(bp
);
4101 block
.blockNum
= buf_lblkno(bp
);
4102 /* not found in cache ==> came from disk */
4103 block
.blockReadFromDisk
= (buf_fromcache(bp
) == 0);
4104 block
.blockSize
= buf_count(bp
);
4106 /* Endian un-swap B-Tree node */
4107 retval
= hfs_swap_BTNode (&block
, vp
, kSwapBTNodeHostToBig
, false);
4109 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
4113 /* This buffer shouldn't be locked anymore but if it is clear it */
4114 if ((buf_flags(bp
) & B_LOCKED
)) {
4116 if (VTOHFS(vp
)->jnl
) {
4117 panic("hfs: CLEARING the lock bit on bp %p\n", bp
);
4119 buf_clearflags(bp
, B_LOCKED
);
4121 retval
= vn_bwrite (ap
);
4127 * Relocate a file to a new location on disk
4128 * cnode must be locked on entry
4130 * Relocation occurs by cloning the file's data from its
4131 * current set of blocks to a new set of blocks. During
4132 * the relocation all of the blocks (old and new) are
4133 * owned by the file.
4140 * ----------------- -----------------
4141 * |///////////////| | | STEP 1 (acquire new blocks)
4142 * ----------------- -----------------
4145 * ----------------- -----------------
4146 * |///////////////| |///////////////| STEP 2 (clone data)
4147 * ----------------- -----------------
4151 * |///////////////| STEP 3 (head truncate blocks)
4155 * During steps 2 and 3 page-outs to file offsets less
4156 * than or equal to N are suspended.
4158 * During step 3 page-ins to the file get suspended.
4161 hfs_relocate(struct vnode
*vp
, u_int32_t blockHint
, kauth_cred_t cred
,
4165 struct filefork
*fp
;
4166 struct hfsmount
*hfsmp
;
4171 u_int32_t nextallocsave
;
4172 daddr64_t sector_a
, sector_b
;
4177 int took_trunc_lock
= 0;
4179 enum vtype vnodetype
;
4181 vnodetype
= vnode_vtype(vp
);
4182 if (vnodetype
!= VREG
&& vnodetype
!= VLNK
) {
4187 if (hfsmp
->hfs_flags
& HFS_FRAGMENTED_FREESPACE
) {
4193 if (fp
->ff_unallocblocks
)
4198 * <rdar://problem/9118426>
4199 * Disable HFS file relocation on content-protected filesystems
4201 if (cp_fs_protected (hfsmp
->hfs_mp
)) {
4206 /* If it's an SSD, also disable HFS relocation */
4207 if (hfsmp
->hfs_flags
& HFS_SSD
) {
4211 blksize
= hfsmp
->blockSize
;
4213 blockHint
= hfsmp
->nextAllocation
;
4215 if ((fp
->ff_size
> 0x7fffffff) ||
4216 ((fp
->ff_size
> blksize
) && vnodetype
== VLNK
)) {
4221 // We do not believe that this call to hfs_fsync() is
4222 // necessary and it causes a journal transaction
4223 // deadlock so we are removing it.
4225 //if (vnodetype == VREG && !vnode_issystem(vp)) {
4226 // retval = hfs_fsync(vp, MNT_WAIT, 0, p);
4231 if (!vnode_issystem(vp
) && (vnodetype
!= VLNK
)) {
4233 hfs_lock_truncate(cp
, HFS_EXCLUSIVE_LOCK
);
4234 /* Force lock since callers expects lock to be held. */
4235 if ((retval
= hfs_lock(cp
, HFS_FORCE_LOCK
))) {
4236 hfs_unlock_truncate(cp
, 0);
4239 /* No need to continue if file was removed. */
4240 if (cp
->c_flag
& C_NOEXISTS
) {
4241 hfs_unlock_truncate(cp
, 0);
4244 took_trunc_lock
= 1;
4246 headblks
= fp
->ff_blocks
;
4247 datablks
= howmany(fp
->ff_size
, blksize
);
4248 growsize
= datablks
* blksize
;
4249 eflags
= kEFContigMask
| kEFAllMask
| kEFNoClumpMask
;
4250 if (blockHint
>= hfsmp
->hfs_metazone_start
&&
4251 blockHint
<= hfsmp
->hfs_metazone_end
)
4252 eflags
|= kEFMetadataMask
;
4254 if (hfs_start_transaction(hfsmp
) != 0) {
4255 if (took_trunc_lock
)
4256 hfs_unlock_truncate(cp
, 0);
4261 * Protect the extents b-tree and the allocation bitmap
4262 * during MapFileBlockC and ExtendFileC operations.
4264 lockflags
= SFL_BITMAP
;
4265 if (overflow_extents(fp
))
4266 lockflags
|= SFL_EXTENTS
;
4267 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
4269 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, 1, growsize
- 1, §or_a
, NULL
);
4271 retval
= MacToVFSError(retval
);
4276 * STEP 1 - acquire new allocation blocks.
4278 nextallocsave
= hfsmp
->nextAllocation
;
4279 retval
= ExtendFileC(hfsmp
, (FCB
*)fp
, growsize
, blockHint
, eflags
, &newbytes
);
4280 if (eflags
& kEFMetadataMask
) {
4281 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
4282 HFS_UPDATE_NEXT_ALLOCATION(hfsmp
, nextallocsave
);
4283 MarkVCBDirty(hfsmp
);
4284 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
4287 retval
= MacToVFSError(retval
);
4289 cp
->c_flag
|= C_MODIFIED
;
4290 if (newbytes
< growsize
) {
4293 } else if (fp
->ff_blocks
< (headblks
+ datablks
)) {
4294 printf("hfs_relocate: allocation failed");
4299 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, 1, growsize
, §or_b
, NULL
);
4301 retval
= MacToVFSError(retval
);
4302 } else if ((sector_a
+ 1) == sector_b
) {
4305 } else if ((eflags
& kEFMetadataMask
) &&
4306 ((((u_int64_t
)sector_b
* hfsmp
->hfs_logical_block_size
) / blksize
) >
4307 hfsmp
->hfs_metazone_end
)) {
4309 const char * filestr
;
4310 char emptystr
= '\0';
4312 if (cp
->c_desc
.cd_nameptr
!= NULL
) {
4313 filestr
= (const char *)&cp
->c_desc
.cd_nameptr
[0];
4314 } else if (vnode_name(vp
) != NULL
) {
4315 filestr
= vnode_name(vp
);
4317 filestr
= &emptystr
;
4324 /* Done with system locks and journal for now. */
4325 hfs_systemfile_unlock(hfsmp
, lockflags
);
4327 hfs_end_transaction(hfsmp
);
4332 * Check to see if failure is due to excessive fragmentation.
4334 if ((retval
== ENOSPC
) &&
4335 (hfs_freeblks(hfsmp
, 0) > (datablks
* 2))) {
4336 hfsmp
->hfs_flags
|= HFS_FRAGMENTED_FREESPACE
;
4341 * STEP 2 - clone file data into the new allocation blocks.
4344 if (vnodetype
== VLNK
)
4345 retval
= hfs_clonelink(vp
, blksize
, cred
, p
);
4346 else if (vnode_issystem(vp
))
4347 retval
= hfs_clonesysfile(vp
, headblks
, datablks
, blksize
, cred
, p
);
4349 retval
= hfs_clonefile(vp
, headblks
, datablks
, blksize
);
4351 /* Start transaction for step 3 or for a restore. */
4352 if (hfs_start_transaction(hfsmp
) != 0) {
4361 * STEP 3 - switch to cloned data and remove old blocks.
4363 lockflags
= SFL_BITMAP
;
4364 if (overflow_extents(fp
))
4365 lockflags
|= SFL_EXTENTS
;
4366 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
4368 retval
= HeadTruncateFile(hfsmp
, (FCB
*)fp
, headblks
);
4370 hfs_systemfile_unlock(hfsmp
, lockflags
);
4375 if (took_trunc_lock
)
4376 hfs_unlock_truncate(cp
, 0);
4379 hfs_systemfile_unlock(hfsmp
, lockflags
);
4383 /* Push cnode's new extent data to disk. */
4385 (void) hfs_update(vp
, MNT_WAIT
);
4388 if (cp
->c_cnid
< kHFSFirstUserCatalogNodeID
)
4389 (void) hfs_flushvolumeheader(hfsmp
, MNT_WAIT
, HFS_ALTFLUSH
);
4391 (void) hfs_flushvolumeheader(hfsmp
, MNT_NOWAIT
, 0);
4395 hfs_end_transaction(hfsmp
);
4400 if (fp
->ff_blocks
== headblks
) {
4401 if (took_trunc_lock
)
4402 hfs_unlock_truncate(cp
, 0);
4406 * Give back any newly allocated space.
4408 if (lockflags
== 0) {
4409 lockflags
= SFL_BITMAP
;
4410 if (overflow_extents(fp
))
4411 lockflags
|= SFL_EXTENTS
;
4412 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
4415 (void) TruncateFileC(hfsmp
, (FCB
*)fp
, fp
->ff_size
, 0, FORK_IS_RSRC(fp
),
4416 FTOC(fp
)->c_fileid
, false);
4418 hfs_systemfile_unlock(hfsmp
, lockflags
);
4421 if (took_trunc_lock
)
4422 hfs_unlock_truncate(cp
, 0);
4432 hfs_clonelink(struct vnode
*vp
, int blksize
, kauth_cred_t cred
, __unused
struct proc
*p
)
4434 struct buf
*head_bp
= NULL
;
4435 struct buf
*tail_bp
= NULL
;
4439 error
= (int)buf_meta_bread(vp
, (daddr64_t
)0, blksize
, cred
, &head_bp
);
4443 tail_bp
= buf_getblk(vp
, (daddr64_t
)1, blksize
, 0, 0, BLK_META
);
4444 if (tail_bp
== NULL
) {
4448 bcopy((char *)buf_dataptr(head_bp
), (char *)buf_dataptr(tail_bp
), blksize
);
4449 error
= (int)buf_bwrite(tail_bp
);
4452 buf_markinvalid(head_bp
);
4453 buf_brelse(head_bp
);
4455 (void) buf_invalidateblks(vp
, BUF_WRITE_DATA
, 0, 0);
4461 * Clone a file's data within the file.
4465 hfs_clonefile(struct vnode
*vp
, int blkstart
, int blkcnt
, int blksize
)
4476 writebase
= blkstart
* blksize
;
4477 copysize
= blkcnt
* blksize
;
4478 iosize
= bufsize
= MIN(copysize
, 128 * 1024);
4481 hfs_unlock(VTOC(vp
));
4484 if ((error
= cp_handle_vnop(VTOC(vp
), CP_WRITE_ACCESS
)) != 0) {
4485 hfs_lock(VTOC(vp
), HFS_FORCE_LOCK
);
4488 #endif /* CONFIG_PROTECT */
4490 if (kmem_alloc(kernel_map
, (vm_offset_t
*)&bufp
, bufsize
)) {
4491 hfs_lock(VTOC(vp
), HFS_FORCE_LOCK
);
4495 auio
= uio_create(1, 0, UIO_SYSSPACE
, UIO_READ
);
4497 while (offset
< copysize
) {
4498 iosize
= MIN(copysize
- offset
, iosize
);
4500 uio_reset(auio
, offset
, UIO_SYSSPACE
, UIO_READ
);
4501 uio_addiov(auio
, (uintptr_t)bufp
, iosize
);
4503 error
= cluster_read(vp
, auio
, copysize
, IO_NOCACHE
);
4505 printf("hfs_clonefile: cluster_read failed - %d\n", error
);
4508 if (uio_resid(auio
) != 0) {
4509 printf("hfs_clonefile: cluster_read: uio_resid = %lld\n", uio_resid(auio
));
4514 uio_reset(auio
, writebase
+ offset
, UIO_SYSSPACE
, UIO_WRITE
);
4515 uio_addiov(auio
, (uintptr_t)bufp
, iosize
);
4517 error
= cluster_write(vp
, auio
, writebase
+ offset
,
4518 writebase
+ offset
+ iosize
,
4519 uio_offset(auio
), 0, IO_NOCACHE
| IO_SYNC
);
4521 printf("hfs_clonefile: cluster_write failed - %d\n", error
);
4524 if (uio_resid(auio
) != 0) {
4525 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
4533 if ((blksize
& PAGE_MASK
)) {
4535 * since the copy may not have started on a PAGE
4536 * boundary (or may not have ended on one), we
4537 * may have pages left in the cache since NOCACHE
4538 * will let partially written pages linger...
4539 * lets just flush the entire range to make sure
4540 * we don't have any pages left that are beyond
4541 * (or intersect) the real LEOF of this file
4543 ubc_msync(vp
, writebase
, writebase
+ offset
, NULL
, UBC_INVALIDATE
| UBC_PUSHDIRTY
);
4546 * No need to call ubc_sync_range or hfs_invalbuf
4547 * since the file was copied using IO_NOCACHE and
4548 * the copy was done starting and ending on a page
4549 * boundary in the file.
4552 kmem_free(kernel_map
, (vm_offset_t
)bufp
, bufsize
);
4554 hfs_lock(VTOC(vp
), HFS_FORCE_LOCK
);
4559 * Clone a system (metadata) file.
4563 hfs_clonesysfile(struct vnode
*vp
, int blkstart
, int blkcnt
, int blksize
,
4564 kauth_cred_t cred
, struct proc
*p
)
4570 struct buf
*bp
= NULL
;
4573 daddr64_t start_blk
;
4580 iosize
= GetLogicalBlockSize(vp
);
4581 bufsize
= MIN(blkcnt
* blksize
, 1024 * 1024) & ~(iosize
- 1);
4582 breadcnt
= bufsize
/ iosize
;
4584 if (kmem_alloc(kernel_map
, (vm_offset_t
*)&bufp
, bufsize
)) {
4587 start_blk
= ((daddr64_t
)blkstart
* blksize
) / iosize
;
4588 last_blk
= ((daddr64_t
)blkcnt
* blksize
) / iosize
;
4591 while (blkno
< last_blk
) {
4593 * Read up to a megabyte
4596 for (i
= 0, blk
= blkno
; (i
< breadcnt
) && (blk
< last_blk
); ++i
, ++blk
) {
4597 error
= (int)buf_meta_bread(vp
, blk
, iosize
, cred
, &bp
);
4599 printf("hfs_clonesysfile: meta_bread error %d\n", error
);
4602 if (buf_count(bp
) != iosize
) {
4603 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp
));
4606 bcopy((char *)buf_dataptr(bp
), offset
, iosize
);
4608 buf_markinvalid(bp
);
4616 * Write up to a megabyte
4619 for (i
= 0; (i
< breadcnt
) && (blkno
< last_blk
); ++i
, ++blkno
) {
4620 bp
= buf_getblk(vp
, start_blk
+ blkno
, iosize
, 0, 0, BLK_META
);
4622 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk
+ blkno
);
4626 bcopy(offset
, (char *)buf_dataptr(bp
), iosize
);
4627 error
= (int)buf_bwrite(bp
);
4639 kmem_free(kernel_map
, (vm_offset_t
)bufp
, bufsize
);
4641 error
= hfs_fsync(vp
, MNT_WAIT
, 0, p
);