2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* @(#)hfs_readwrite.c 1.0
30 * (c) 1998-2001 Apple Computer, Inc. All Rights Reserved
32 * hfs_readwrite.c -- vnode operations to deal with reading and writing files.
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/resourcevar.h>
39 #include <sys/kernel.h>
40 #include <sys/fcntl.h>
41 #include <sys/filedesc.h>
45 #include <sys/kauth.h>
46 #include <sys/vnode.h>
47 #include <sys/vnode_internal.h>
49 #include <sys/vfs_context.h>
50 #include <sys/fsevents.h>
51 #include <kern/kalloc.h>
53 #include <sys/sysctl.h>
55 #include <miscfs/specfs/specdev.h>
58 #include <sys/ubc_internal.h>
60 #include <vm/vm_pageout.h>
61 #include <vm/vm_kern.h>
63 #include <sys/kdebug.h>
66 #include "hfs_attrlist.h"
67 #include "hfs_endian.h"
68 #include "hfs_fsctl.h"
69 #include "hfs_quota.h"
70 #include "hfscommon/headers/FileMgrInternal.h"
71 #include "hfscommon/headers/BTreesInternal.h"
72 #include "hfs_cnode.h"
75 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
78 MAXHFSFILESIZE
= 0x7FFFFFFF /* this needs to go in the mount structure */
81 /* from bsd/vfs/vfs_cluster.c */
82 extern int is_file_clean(vnode_t vp
, off_t filesize
);
83 /* from bsd/hfs/hfs_vfsops.c */
84 extern int hfs_vfs_vget(struct mount
*mp
, ino64_t ino
, struct vnode
**vpp
, vfs_context_t context
);
86 static int hfs_clonelink(struct vnode
*, int, kauth_cred_t
, struct proc
*);
87 static int hfs_clonefile(struct vnode
*, int, int, int);
88 static int hfs_clonesysfile(struct vnode
*, int, int, int, kauth_cred_t
, struct proc
*);
90 int flush_cache_on_write
= 0;
91 SYSCTL_INT (_kern
, OID_AUTO
, flush_cache_on_write
, CTLFLAG_RW
, &flush_cache_on_write
, 0, "always flush the drive cache on writes to uncached files");
95 * Read data from a file.
98 hfs_vnop_read(struct vnop_read_args
*ap
)
100 uio_t uio
= ap
->a_uio
;
101 struct vnode
*vp
= ap
->a_vp
;
104 struct hfsmount
*hfsmp
;
107 off_t start_resid
= uio_resid(uio
);
108 off_t offset
= uio_offset(uio
);
112 /* Preflight checks */
113 if (!vnode_isreg(vp
)) {
114 /* can only read regular files */
120 if (start_resid
== 0)
121 return (0); /* Nothing left to do */
123 return (EINVAL
); /* cant read from a negative offset */
129 /* Protect against a size change. */
130 hfs_lock_truncate(cp
, 0);
132 filesize
= fp
->ff_size
;
133 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
134 if (offset
> filesize
) {
135 if ((hfsmp
->hfs_flags
& HFS_STANDARD
) &&
136 (offset
> (off_t
)MAXHFSFILESIZE
)) {
142 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 12)) | DBG_FUNC_START
,
143 (int)uio_offset(uio
), uio_resid(uio
), (int)filesize
, (int)filebytes
, 0);
145 retval
= cluster_read(vp
, uio
, filesize
, ap
->a_ioflag
);
147 cp
->c_touch_acctime
= TRUE
;
149 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 12)) | DBG_FUNC_END
,
150 (int)uio_offset(uio
), uio_resid(uio
), (int)filesize
, (int)filebytes
, 0);
153 * Keep track blocks read
155 if (hfsmp
->hfc_stage
== HFC_RECORDING
&& retval
== 0) {
156 int took_cnode_lock
= 0;
159 bytesread
= start_resid
- uio_resid(uio
);
161 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
162 if ((fp
->ff_bytesread
+ bytesread
) > 0x00000000ffffffff) {
163 hfs_lock(cp
, HFS_FORCE_LOCK
);
167 * If this file hasn't been seen since the start of
168 * the current sampling period then start over.
170 if (cp
->c_atime
< hfsmp
->hfc_timebase
) {
173 fp
->ff_bytesread
= bytesread
;
175 cp
->c_atime
= tv
.tv_sec
;
177 fp
->ff_bytesread
+= bytesread
;
183 hfs_unlock_truncate(cp
, 0);
188 * Write data to a file.
191 hfs_vnop_write(struct vnop_write_args
*ap
)
193 uio_t uio
= ap
->a_uio
;
194 struct vnode
*vp
= ap
->a_vp
;
197 struct hfsmount
*hfsmp
;
198 kauth_cred_t cred
= NULL
;
201 off_t bytesToAdd
= 0;
202 off_t actualBytesAdded
;
207 int ioflag
= ap
->a_ioflag
;
210 int cnode_locked
= 0;
211 int partialwrite
= 0;
212 int exclusive_lock
= 0;
214 // LP64todo - fix this! uio_resid may be 64-bit value
215 resid
= uio_resid(uio
);
216 offset
= uio_offset(uio
);
218 if (ioflag
& IO_APPEND
) {
226 if (!vnode_isreg(vp
))
227 return (EPERM
); /* Can only write regular files */
233 eflags
= kEFDeferMask
; /* defer file block allocations */
234 #ifdef HFS_SPARSE_DEV
236 * When the underlying device is sparse and space
237 * is low (< 8MB), stop doing delayed allocations
238 * and begin doing synchronous I/O.
240 if ((hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) &&
241 (hfs_freeblks(hfsmp
, 0) < 2048)) {
242 eflags
&= ~kEFDeferMask
;
245 #endif /* HFS_SPARSE_DEV */
248 /* Protect against a size change. */
249 hfs_lock_truncate(cp
, exclusive_lock
);
251 if (ioflag
& IO_APPEND
) {
252 uio_setoffset(uio
, fp
->ff_size
);
253 offset
= fp
->ff_size
;
255 if ((cp
->c_flags
& APPEND
) && offset
!= fp
->ff_size
) {
260 origFileSize
= fp
->ff_size
;
261 writelimit
= offset
+ resid
;
262 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
264 /* If the truncate lock is shared, and if we either have virtual
265 * blocks or will need to extend the file, upgrade the truncate
266 * to exclusive lock. If upgrade fails, we lose the lock and
267 * have to get exclusive lock again
269 if ((exclusive_lock
== 0) &&
270 ((fp
->ff_unallocblocks
!= 0) || (writelimit
> filebytes
))) {
272 /* Lock upgrade failed and we lost our shared lock, try again */
273 if (lck_rw_lock_shared_to_exclusive(&cp
->c_truncatelock
) == FALSE
) {
278 if ( (retval
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
))) {
283 if (!exclusive_lock
) {
284 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 0)) | DBG_FUNC_START
,
285 (int)offset
, uio_resid(uio
), (int)fp
->ff_size
,
289 /* Check if we do not need to extend the file */
290 if (writelimit
<= filebytes
) {
294 cred
= vfs_context_ucred(ap
->a_context
);
295 bytesToAdd
= writelimit
- filebytes
;
298 retval
= hfs_chkdq(cp
, (int64_t)(roundup(bytesToAdd
, hfsmp
->blockSize
)),
304 if (hfs_start_transaction(hfsmp
) != 0) {
309 while (writelimit
> filebytes
) {
310 bytesToAdd
= writelimit
- filebytes
;
311 if (cred
&& suser(cred
, NULL
) != 0)
312 eflags
|= kEFReserveMask
;
314 /* Protect extents b-tree and allocation bitmap */
315 lockflags
= SFL_BITMAP
;
316 if (overflow_extents(fp
))
317 lockflags
|= SFL_EXTENTS
;
318 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
320 /* Files that are changing size are not hot file candidates. */
321 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
322 fp
->ff_bytesread
= 0;
324 retval
= MacToVFSError(ExtendFileC (hfsmp
, (FCB
*)fp
, bytesToAdd
,
325 0, eflags
, &actualBytesAdded
));
327 hfs_systemfile_unlock(hfsmp
, lockflags
);
329 if ((actualBytesAdded
== 0) && (retval
== E_NONE
))
331 if (retval
!= E_NONE
)
333 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
334 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 0)) | DBG_FUNC_NONE
,
335 (int)offset
, uio_resid(uio
), (int)fp
->ff_size
, (int)filebytes
, 0);
337 (void) hfs_update(vp
, TRUE
);
338 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
339 (void) hfs_end_transaction(hfsmp
);
342 * If we didn't grow the file enough try a partial write.
343 * POSIX expects this behavior.
345 if ((retval
== ENOSPC
) && (filebytes
> offset
)) {
348 uio_setresid(uio
, (uio_resid(uio
) - bytesToAdd
));
350 writelimit
= filebytes
;
353 if (retval
== E_NONE
) {
361 struct rl_entry
*invalid_range
;
363 if (writelimit
> fp
->ff_size
)
364 filesize
= writelimit
;
366 filesize
= fp
->ff_size
;
368 lflag
= ioflag
& ~(IO_TAILZEROFILL
| IO_HEADZEROFILL
| IO_NOZEROVALID
| IO_NOZERODIRTY
);
370 if (offset
<= fp
->ff_size
) {
371 zero_off
= offset
& ~PAGE_MASK_64
;
373 /* Check to see whether the area between the zero_offset and the start
374 of the transfer to see whether is invalid and should be zero-filled
375 as part of the transfer:
377 if (offset
> zero_off
) {
378 if (rl_scan(&fp
->ff_invalidranges
, zero_off
, offset
- 1, &invalid_range
) != RL_NOOVERLAP
)
379 lflag
|= IO_HEADZEROFILL
;
382 off_t eof_page_base
= fp
->ff_size
& ~PAGE_MASK_64
;
384 /* The bytes between fp->ff_size and uio->uio_offset must never be
385 read without being zeroed. The current last block is filled with zeroes
386 if it holds valid data but in all cases merely do a little bookkeeping
387 to track the area from the end of the current last page to the start of
388 the area actually written. For the same reason only the bytes up to the
389 start of the page where this write will start is invalidated; any remainder
390 before uio->uio_offset is explicitly zeroed as part of the cluster_write.
392 Note that inval_start, the start of the page after the current EOF,
393 may be past the start of the write, in which case the zeroing
394 will be handled by the cluser_write of the actual data.
396 inval_start
= (fp
->ff_size
+ (PAGE_SIZE_64
- 1)) & ~PAGE_MASK_64
;
397 inval_end
= offset
& ~PAGE_MASK_64
;
398 zero_off
= fp
->ff_size
;
400 if ((fp
->ff_size
& PAGE_MASK_64
) &&
401 (rl_scan(&fp
->ff_invalidranges
,
404 &invalid_range
) != RL_NOOVERLAP
)) {
405 /* The page containing the EOF is not valid, so the
406 entire page must be made inaccessible now. If the write
407 starts on a page beyond the page containing the eof
408 (inval_end > eof_page_base), add the
409 whole page to the range to be invalidated. Otherwise
410 (i.e. if the write starts on the same page), zero-fill
411 the entire page explicitly now:
413 if (inval_end
> eof_page_base
) {
414 inval_start
= eof_page_base
;
416 zero_off
= eof_page_base
;
420 if (inval_start
< inval_end
) {
422 /* There's some range of data that's going to be marked invalid */
424 if (zero_off
< inval_start
) {
425 /* The pages between inval_start and inval_end are going to be invalidated,
426 and the actual write will start on a page past inval_end. Now's the last
427 chance to zero-fill the page containing the EOF:
431 retval
= cluster_write(vp
, (uio_t
) 0,
432 fp
->ff_size
, inval_start
,
434 lflag
| IO_HEADZEROFILL
| IO_NOZERODIRTY
);
435 hfs_lock(cp
, HFS_FORCE_LOCK
);
437 if (retval
) goto ioerr_exit
;
438 offset
= uio_offset(uio
);
441 /* Mark the remaining area of the newly allocated space as invalid: */
442 rl_add(inval_start
, inval_end
- 1 , &fp
->ff_invalidranges
);
444 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
445 zero_off
= fp
->ff_size
= inval_end
;
448 if (offset
> zero_off
) lflag
|= IO_HEADZEROFILL
;
451 /* Check to see whether the area between the end of the write and the end of
452 the page it falls in is invalid and should be zero-filled as part of the transfer:
454 tail_off
= (writelimit
+ (PAGE_SIZE_64
- 1)) & ~PAGE_MASK_64
;
455 if (tail_off
> filesize
) tail_off
= filesize
;
456 if (tail_off
> writelimit
) {
457 if (rl_scan(&fp
->ff_invalidranges
, writelimit
, tail_off
- 1, &invalid_range
) != RL_NOOVERLAP
) {
458 lflag
|= IO_TAILZEROFILL
;
463 * if the write starts beyond the current EOF (possibly advanced in the
464 * zeroing of the last block, above), then we'll zero fill from the current EOF
465 * to where the write begins:
467 * NOTE: If (and ONLY if) the portion of the file about to be written is
468 * before the current EOF it might be marked as invalid now and must be
469 * made readable (removed from the invalid ranges) before cluster_write
472 io_start
= (lflag
& IO_HEADZEROFILL
) ? zero_off
: offset
;
473 if (io_start
< fp
->ff_size
) {
476 io_end
= (lflag
& IO_TAILZEROFILL
) ? tail_off
: writelimit
;
477 rl_remove(io_start
, io_end
- 1, &fp
->ff_invalidranges
);
484 * We need to tell UBC the fork's new size BEFORE calling
485 * cluster_write, in case any of the new pages need to be
486 * paged out before cluster_write completes (which does happen
487 * in embedded systems due to extreme memory pressure).
488 * Similarly, we need to tell hfs_vnop_pageout what the new EOF
489 * will be, so that it can pass that on to cluster_pageout, and
490 * allow those pageouts.
492 * We don't update ff_size yet since we don't want pageins to
493 * be able to see uninitialized data between the old and new
494 * EOF, until cluster_write has completed and initialized that
497 * The vnode pager relies on the file size last given to UBC via
498 * ubc_setsize. hfs_vnop_pageout relies on fp->ff_new_size or
499 * ff_size (whichever is larger). NOTE: ff_new_size is always
500 * zero, unless we are extending the file via write.
502 if (filesize
> fp
->ff_size
) {
503 fp
->ff_new_size
= filesize
;
504 ubc_setsize(vp
, filesize
);
506 retval
= cluster_write(vp
, uio
, fp
->ff_size
, filesize
, zero_off
,
507 tail_off
, lflag
| IO_NOZERODIRTY
);
509 fp
->ff_new_size
= 0; /* no longer extending; use ff_size */
510 if (filesize
> origFileSize
) {
511 ubc_setsize(vp
, origFileSize
);
516 if (filesize
> origFileSize
) {
517 fp
->ff_size
= filesize
;
519 /* Files that are changing size are not hot file candidates. */
520 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
521 fp
->ff_bytesread
= 0;
524 fp
->ff_new_size
= 0; /* ff_size now has the correct size */
526 /* If we wrote some bytes, then touch the change and mod times */
527 if (resid
> uio_resid(uio
)) {
528 cp
->c_touch_chgtime
= TRUE
;
529 cp
->c_touch_modtime
= TRUE
;
533 uio_setresid(uio
, (uio_resid(uio
) + bytesToAdd
));
537 // XXXdbg - see radar 4871353 for more info
539 if (flush_cache_on_write
&& ((ioflag
& IO_NOCACHE
) || vnode_isnocache(vp
))) {
540 VNOP_IOCTL(hfsmp
->hfs_devvp
, DKIOCSYNCHRONIZECACHE
, NULL
, FWRITE
, NULL
);
543 HFS_KNOTE(vp
, NOTE_WRITE
);
547 * If we successfully wrote any data, and we are not the superuser
548 * we clear the setuid and setgid bits as a precaution against
551 if (cp
->c_mode
& (S_ISUID
| S_ISGID
)) {
552 cred
= vfs_context_ucred(ap
->a_context
);
553 if (resid
> uio_resid(uio
) && cred
&& suser(cred
, NULL
)) {
555 hfs_lock(cp
, HFS_FORCE_LOCK
);
558 cp
->c_mode
&= ~(S_ISUID
| S_ISGID
);
562 if (ioflag
& IO_UNIT
) {
564 hfs_lock(cp
, HFS_FORCE_LOCK
);
567 (void)hfs_truncate(vp
, origFileSize
, ioflag
& IO_SYNC
,
569 // LP64todo - fix this! resid needs to by user_ssize_t
570 uio_setoffset(uio
, (uio_offset(uio
) - (resid
- uio_resid(uio
))));
571 uio_setresid(uio
, resid
);
572 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
574 } else if ((ioflag
& IO_SYNC
) && (resid
> uio_resid(uio
))) {
576 hfs_lock(cp
, HFS_FORCE_LOCK
);
579 retval
= hfs_update(vp
, TRUE
);
581 /* Updating vcbWrCnt doesn't need to be atomic. */
584 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 0)) | DBG_FUNC_END
,
585 (int)uio_offset(uio
), uio_resid(uio
), (int)fp
->ff_size
, (int)filebytes
, 0);
589 hfs_unlock_truncate(cp
, exclusive_lock
);
593 /* support for the "bulk-access" fcntl */
595 #define CACHE_LEVELS 16
596 #define NUM_CACHE_ENTRIES (64*16)
597 #define PARENT_IDS_FLAG 0x100
599 struct access_cache
{
601 int cachehits
; /* these two for statistics gathering */
603 unsigned int *acache
;
604 unsigned char *haveaccess
;
608 uid_t uid
; /* IN: effective user id */
609 short flags
; /* IN: access requested (i.e. R_OK) */
610 short num_groups
; /* IN: number of groups user belongs to */
611 int num_files
; /* IN: number of files to process */
612 int *file_ids
; /* IN: array of file ids */
613 gid_t
*groups
; /* IN: array of groups */
614 short *access
; /* OUT: access info for each file (0 for 'has access') */
617 struct user_access_t
{
618 uid_t uid
; /* IN: effective user id */
619 short flags
; /* IN: access requested (i.e. R_OK) */
620 short num_groups
; /* IN: number of groups user belongs to */
621 int num_files
; /* IN: number of files to process */
622 user_addr_t file_ids
; /* IN: array of file ids */
623 user_addr_t groups
; /* IN: array of groups */
624 user_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
628 // these are the "extended" versions of the above structures
629 // note that it is crucial that they be different sized than
630 // the regular version
631 struct ext_access_t
{
632 uint32_t flags
; /* IN: access requested (i.e. R_OK) */
633 uint32_t num_files
; /* IN: number of files to process */
634 uint32_t map_size
; /* IN: size of the bit map */
635 uint32_t *file_ids
; /* IN: Array of file ids */
636 char *bitmap
; /* OUT: hash-bitmap of interesting directory ids */
637 short *access
; /* OUT: access info for each file (0 for 'has access') */
638 uint32_t num_parents
; /* future use */
639 cnid_t
*parents
; /* future use */
642 struct ext_user_access_t
{
643 uint32_t flags
; /* IN: access requested (i.e. R_OK) */
644 uint32_t num_files
; /* IN: number of files to process */
645 uint32_t map_size
; /* IN: size of the bit map */
646 user_addr_t file_ids
; /* IN: array of file ids */
647 user_addr_t bitmap
; /* IN: array of groups */
648 user_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
649 uint32_t num_parents
;/* future use */
650 user_addr_t parents
;/* future use */
655 * Perform a binary search for the given parent_id. Return value is
656 * the index if there is a match. If no_match_indexp is non-NULL it
657 * will be assigned with the index to insert the item (even if it was
660 static int cache_binSearch(cnid_t
*array
, unsigned int hi
, cnid_t parent_id
, int *no_match_indexp
)
666 unsigned int mid
= ((hi
- lo
)/2) + lo
;
667 unsigned int this_id
= array
[mid
];
669 if (parent_id
== this_id
) {
674 if (parent_id
< this_id
) {
679 if (parent_id
> this_id
) {
685 /* check if lo and hi converged on the match */
686 if (parent_id
== array
[hi
]) {
690 if (no_match_indexp
) {
691 *no_match_indexp
= hi
;
699 lookup_bucket(struct access_cache
*cache
, int *indexp
, cnid_t parent_id
)
703 int index
, no_match_index
;
705 if (cache
->numcached
== 0) {
707 return 0; // table is empty, so insert at index=0 and report no match
710 if (cache
->numcached
> NUM_CACHE_ENTRIES
) {
711 /*printf("EGAD! numcached is %d... cut our losses and trim to %d\n",
712 cache->numcached, NUM_CACHE_ENTRIES);*/
713 cache
->numcached
= NUM_CACHE_ENTRIES
;
716 hi
= cache
->numcached
- 1;
718 index
= cache_binSearch(cache
->acache
, hi
, parent_id
, &no_match_index
);
720 /* if no existing entry found, find index for new one */
722 index
= no_match_index
;
733 * Add a node to the access_cache at the given index (or do a lookup first
734 * to find the index if -1 is passed in). We currently do a replace rather
735 * than an insert if the cache is full.
738 add_node(struct access_cache
*cache
, int index
, cnid_t nodeID
, int access
)
740 int lookup_index
= -1;
742 /* need to do a lookup first if -1 passed for index */
744 if (lookup_bucket(cache
, &lookup_index
, nodeID
)) {
745 if (cache
->haveaccess
[lookup_index
] != access
&& cache
->haveaccess
[lookup_index
] == ESRCH
) {
746 // only update an entry if the previous access was ESRCH (i.e. a scope checking error)
747 cache
->haveaccess
[lookup_index
] = access
;
750 /* mission accomplished */
753 index
= lookup_index
;
758 /* if the cache is full, do a replace rather than an insert */
759 if (cache
->numcached
>= NUM_CACHE_ENTRIES
) {
760 //printf("cache is full (%d). replace at index %d\n", cache->numcached, index);
761 cache
->numcached
= NUM_CACHE_ENTRIES
-1;
763 if (index
> cache
->numcached
) {
764 // printf("index %d pinned to %d\n", index, cache->numcached);
765 index
= cache
->numcached
;
769 if (index
< cache
->numcached
&& index
< NUM_CACHE_ENTRIES
&& nodeID
> cache
->acache
[index
]) {
773 if (index
>= 0 && index
< cache
->numcached
) {
774 /* only do bcopy if we're inserting */
775 bcopy( cache
->acache
+index
, cache
->acache
+(index
+1), (cache
->numcached
- index
)*sizeof(int) );
776 bcopy( cache
->haveaccess
+index
, cache
->haveaccess
+(index
+1), (cache
->numcached
- index
)*sizeof(unsigned char) );
779 cache
->acache
[index
] = nodeID
;
780 cache
->haveaccess
[index
] = access
;
794 snoop_callback(const struct cat_desc
*descp
, const struct cat_attr
*attrp
, void * arg
)
796 struct cinfo
*cip
= (struct cinfo
*)arg
;
798 cip
->uid
= attrp
->ca_uid
;
799 cip
->gid
= attrp
->ca_gid
;
800 cip
->mode
= attrp
->ca_mode
;
801 cip
->parentcnid
= descp
->cd_parentcnid
;
802 cip
->recflags
= attrp
->ca_recflags
;
808 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
809 * isn't incore, then go to the catalog.
812 do_attr_lookup(struct hfsmount
*hfsmp
, struct access_cache
*cache
, dev_t dev
, cnid_t cnid
,
813 struct cnode
*skip_cp
, CatalogKey
*keyp
, struct cat_attr
*cnattrp
)
817 /* if this id matches the one the fsctl was called with, skip the lookup */
818 if (cnid
== skip_cp
->c_cnid
) {
819 cnattrp
->ca_uid
= skip_cp
->c_uid
;
820 cnattrp
->ca_gid
= skip_cp
->c_gid
;
821 cnattrp
->ca_mode
= skip_cp
->c_mode
;
822 keyp
->hfsPlus
.parentID
= skip_cp
->c_parentcnid
;
826 /* otherwise, check the cnode hash incase the file/dir is incore */
827 if (hfs_chash_snoop(dev
, cnid
, snoop_callback
, &c_info
) == 0) {
828 cnattrp
->ca_uid
= c_info
.uid
;
829 cnattrp
->ca_gid
= c_info
.gid
;
830 cnattrp
->ca_mode
= c_info
.mode
;
831 cnattrp
->ca_recflags
= c_info
.recflags
;
832 keyp
->hfsPlus
.parentID
= c_info
.parentcnid
;
836 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_CATALOG
, HFS_SHARED_LOCK
);
838 /* lookup this cnid in the catalog */
839 error
= cat_getkeyplusattr(hfsmp
, cnid
, keyp
, cnattrp
);
841 hfs_systemfile_unlock(hfsmp
, lockflags
);
852 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
853 * up to CACHE_LEVELS as we progress towards the root.
856 do_access_check(struct hfsmount
*hfsmp
, int *err
, struct access_cache
*cache
, HFSCatalogNodeID nodeID
,
857 struct cnode
*skip_cp
, struct proc
*theProcPtr
, kauth_cred_t myp_ucred
, dev_t dev
,
858 struct vfs_context
*my_context
,
862 uint32_t num_parents
)
866 HFSCatalogNodeID thisNodeID
;
867 unsigned int myPerms
;
868 struct cat_attr cnattr
;
869 int cache_index
= -1, scope_index
= -1, scope_idx_start
= -1;
872 int i
= 0, ids_to_cache
= 0;
873 int parent_ids
[CACHE_LEVELS
];
876 while (thisNodeID
>= kRootDirID
) {
877 myResult
= 0; /* default to "no access" */
879 /* check the cache before resorting to hitting the catalog */
881 /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
882 * to look any further after hitting cached dir */
884 if (lookup_bucket(cache
, &cache_index
, thisNodeID
)) {
886 myErr
= cache
->haveaccess
[cache_index
];
887 if (scope_index
!= -1) {
888 if (myErr
== ESRCH
) {
892 scope_index
= 0; // so we'll just use the cache result
893 scope_idx_start
= ids_to_cache
;
895 myResult
= (myErr
== 0) ? 1 : 0;
896 goto ExitThisRoutine
;
902 tmp
= cache_binSearch(parents
, num_parents
-1, thisNodeID
, NULL
);
903 if (scope_index
== -1)
905 if (tmp
!= -1 && scope_idx_start
== -1 && ids_to_cache
< CACHE_LEVELS
) {
906 scope_idx_start
= ids_to_cache
;
910 /* remember which parents we want to cache */
911 if (ids_to_cache
< CACHE_LEVELS
) {
912 parent_ids
[ids_to_cache
] = thisNodeID
;
915 // Inefficient (using modulo) and we might want to use a hash function, not rely on the node id to be "nice"...
916 if (bitmap
&& map_size
) {
917 bitmap
[(thisNodeID
/8)%(map_size
)]|=(1<<(thisNodeID
&7));
921 /* do the lookup (checks the cnode hash, then the catalog) */
922 myErr
= do_attr_lookup(hfsmp
, cache
, dev
, thisNodeID
, skip_cp
, &catkey
, &cnattr
);
924 goto ExitThisRoutine
; /* no access */
927 /* Root always gets access. */
928 if (suser(myp_ucred
, NULL
) == 0) {
929 thisNodeID
= catkey
.hfsPlus
.parentID
;
934 // if the thing has acl's, do the full permission check
935 if ((cnattr
.ca_recflags
& kHFSHasSecurityMask
) != 0) {
938 /* get the vnode for this cnid */
939 myErr
= hfs_vget(hfsmp
, thisNodeID
, &vp
, 0);
942 goto ExitThisRoutine
;
945 thisNodeID
= VTOC(vp
)->c_parentcnid
;
947 hfs_unlock(VTOC(vp
));
949 if (vnode_vtype(vp
) == VDIR
) {
950 myErr
= vnode_authorize(vp
, NULL
, (KAUTH_VNODE_SEARCH
| KAUTH_VNODE_LIST_DIRECTORY
), my_context
);
952 myErr
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_READ_DATA
, my_context
);
958 goto ExitThisRoutine
;
963 myPerms
= DerivePermissionSummary(cnattr
.ca_uid
, cnattr
.ca_gid
,
964 cnattr
.ca_mode
, hfsmp
->hfs_mp
,
965 myp_ucred
, theProcPtr
);
967 if (cnattr
.ca_mode
& S_IFDIR
) {
972 if ( (myPerms
& flags
) != flags
) {
975 goto ExitThisRoutine
; /* no access */
978 /* up the hierarchy we go */
979 thisNodeID
= catkey
.hfsPlus
.parentID
;
983 /* if here, we have access to this node */
987 if (parents
&& myErr
== 0 && scope_index
== -1) {
996 /* cache the parent directory(ies) */
997 for (i
= 0; i
< ids_to_cache
; i
++) {
998 if (myErr
== 0 && parents
&& (scope_idx_start
== -1 || i
> scope_idx_start
)) {
999 add_node(cache
, -1, parent_ids
[i
], ESRCH
);
1001 add_node(cache
, -1, parent_ids
[i
], myErr
);
1009 do_bulk_access_check(struct hfsmount
*hfsmp
, struct vnode
*vp
,
1010 struct vnop_ioctl_args
*ap
, int arg_size
, vfs_context_t context
)
1015 * NOTE: on entry, the vnode is locked. Incase this vnode
1016 * happens to be in our list of file_ids, we'll note it
1017 * avoid calling hfs_chashget_nowait() on that id as that
1018 * will cause a "locking against myself" panic.
1020 Boolean check_leaf
= true;
1022 struct ext_user_access_t
*user_access_structp
;
1023 struct ext_user_access_t tmp_user_access
;
1024 struct access_cache cache
;
1029 dev_t dev
= VTOC(vp
)->c_dev
;
1032 unsigned int num_files
= 0;
1034 int num_parents
= 0;
1038 cnid_t
*parents
=NULL
;
1042 cnid_t prevParent_cnid
= 0;
1043 unsigned int myPerms
;
1045 struct cat_attr cnattr
;
1047 struct cnode
*skip_cp
= VTOC(vp
);
1048 kauth_cred_t cred
= vfs_context_ucred(context
);
1049 proc_t p
= vfs_context_proc(context
);
1051 is64bit
= proc_is64bit(p
);
1053 /* initialize the local cache and buffers */
1054 cache
.numcached
= 0;
1055 cache
.cachehits
= 0;
1057 cache
.acache
= NULL
;
1058 cache
.haveaccess
= NULL
;
1060 /* struct copyin done during dispatch... need to copy file_id array separately */
1061 if (ap
->a_data
== NULL
) {
1063 goto err_exit_bulk_access
;
1067 if (arg_size
!= sizeof(struct ext_user_access_t
)) {
1069 goto err_exit_bulk_access
;
1072 user_access_structp
= (struct ext_user_access_t
*)ap
->a_data
;
1074 } else if (arg_size
== sizeof(struct access_t
)) {
1075 struct access_t
*accessp
= (struct access_t
*)ap
->a_data
;
1077 // convert an old style bulk-access struct to the new style
1078 tmp_user_access
.flags
= accessp
->flags
;
1079 tmp_user_access
.num_files
= accessp
->num_files
;
1080 tmp_user_access
.map_size
= 0;
1081 tmp_user_access
.file_ids
= CAST_USER_ADDR_T(accessp
->file_ids
);
1082 tmp_user_access
.bitmap
= USER_ADDR_NULL
;
1083 tmp_user_access
.access
= CAST_USER_ADDR_T(accessp
->access
);
1084 tmp_user_access
.num_parents
= 0;
1085 user_access_structp
= &tmp_user_access
;
1087 } else if (arg_size
== sizeof(struct ext_access_t
)) {
1088 struct ext_access_t
*accessp
= (struct ext_access_t
*)ap
->a_data
;
1090 // up-cast from a 32-bit version of the struct
1091 tmp_user_access
.flags
= accessp
->flags
;
1092 tmp_user_access
.num_files
= accessp
->num_files
;
1093 tmp_user_access
.map_size
= accessp
->map_size
;
1094 tmp_user_access
.num_parents
= accessp
->num_parents
;
1096 tmp_user_access
.file_ids
= CAST_USER_ADDR_T(accessp
->file_ids
);
1097 tmp_user_access
.bitmap
= CAST_USER_ADDR_T(accessp
->bitmap
);
1098 tmp_user_access
.access
= CAST_USER_ADDR_T(accessp
->access
);
1099 tmp_user_access
.parents
= CAST_USER_ADDR_T(accessp
->parents
);
1101 user_access_structp
= &tmp_user_access
;
1104 goto err_exit_bulk_access
;
1107 map_size
= user_access_structp
->map_size
;
1109 num_files
= user_access_structp
->num_files
;
1111 num_parents
= user_access_structp
->num_parents
;
1113 if (num_files
< 1) {
1114 goto err_exit_bulk_access
;
1116 if (num_files
> 1024) {
1118 goto err_exit_bulk_access
;
1121 if (num_parents
> 1024) {
1123 goto err_exit_bulk_access
;
1126 file_ids
= (int *) kalloc(sizeof(int) * num_files
);
1127 access
= (short *) kalloc(sizeof(short) * num_files
);
1129 bitmap
= (char *) kalloc(sizeof(char) * map_size
);
1133 parents
= (cnid_t
*) kalloc(sizeof(cnid_t
) * num_parents
);
1136 cache
.acache
= (unsigned int *) kalloc(sizeof(int) * NUM_CACHE_ENTRIES
);
1137 cache
.haveaccess
= (unsigned char *) kalloc(sizeof(unsigned char) * NUM_CACHE_ENTRIES
);
1139 if (file_ids
== NULL
|| access
== NULL
|| (map_size
!= 0 && bitmap
== NULL
) || cache
.acache
== NULL
|| cache
.haveaccess
== NULL
) {
1141 kfree(file_ids
, sizeof(int) * num_files
);
1144 kfree(bitmap
, sizeof(char) * map_size
);
1147 kfree(access
, sizeof(short) * num_files
);
1150 kfree(cache
.acache
, sizeof(int) * NUM_CACHE_ENTRIES
);
1152 if (cache
.haveaccess
) {
1153 kfree(cache
.haveaccess
, sizeof(unsigned char) * NUM_CACHE_ENTRIES
);
1156 kfree(parents
, sizeof(cnid_t
) * num_parents
);
1161 // make sure the bitmap is zero'ed out...
1163 bzero(bitmap
, (sizeof(char) * map_size
));
1166 if ((error
= copyin(user_access_structp
->file_ids
, (caddr_t
)file_ids
,
1167 num_files
* sizeof(int)))) {
1168 goto err_exit_bulk_access
;
1172 if ((error
= copyin(user_access_structp
->parents
, (caddr_t
)parents
,
1173 num_parents
* sizeof(cnid_t
)))) {
1174 goto err_exit_bulk_access
;
1178 flags
= user_access_structp
->flags
;
1179 if ((flags
& (F_OK
| R_OK
| W_OK
| X_OK
)) == 0) {
1183 /* check if we've been passed leaf node ids or parent ids */
1184 if (flags
& PARENT_IDS_FLAG
) {
1188 /* Check access to each file_id passed in */
1189 for (i
= 0; i
< num_files
; i
++) {
1191 cnid
= (cnid_t
) file_ids
[i
];
1193 /* root always has access */
1194 if ((!parents
) && (!suser(cred
, NULL
))) {
1200 /* do the lookup (checks the cnode hash, then the catalog) */
1201 error
= do_attr_lookup(hfsmp
, &cache
, dev
, cnid
, skip_cp
, &catkey
, &cnattr
);
1203 access
[i
] = (short) error
;
1208 // Check if the leaf matches one of the parent scopes
1209 leaf_index
= cache_binSearch(parents
, num_parents
-1, cnid
, NULL
);
1212 // if the thing has acl's, do the full permission check
1213 if ((cnattr
.ca_recflags
& kHFSHasSecurityMask
) != 0) {
1216 /* get the vnode for this cnid */
1217 myErr
= hfs_vget(hfsmp
, cnid
, &cvp
, 0);
1223 hfs_unlock(VTOC(cvp
));
1225 if (vnode_vtype(cvp
) == VDIR
) {
1226 myErr
= vnode_authorize(cvp
, NULL
, (KAUTH_VNODE_SEARCH
| KAUTH_VNODE_LIST_DIRECTORY
), context
);
1228 myErr
= vnode_authorize(cvp
, NULL
, KAUTH_VNODE_READ_DATA
, context
);
1237 /* before calling CheckAccess(), check the target file for read access */
1238 myPerms
= DerivePermissionSummary(cnattr
.ca_uid
, cnattr
.ca_gid
,
1239 cnattr
.ca_mode
, hfsmp
->hfs_mp
, cred
, p
);
1241 /* fail fast if no access */
1242 if ((myPerms
& flags
) == 0) {
1248 /* we were passed an array of parent ids */
1249 catkey
.hfsPlus
.parentID
= cnid
;
1252 /* if the last guy had the same parent and had access, we're done */
1253 if (i
> 0 && catkey
.hfsPlus
.parentID
== prevParent_cnid
&& access
[i
-1] == 0) {
1259 myaccess
= do_access_check(hfsmp
, &error
, &cache
, catkey
.hfsPlus
.parentID
,
1260 skip_cp
, p
, cred
, dev
, context
,bitmap
, map_size
, parents
, num_parents
);
1262 if (myaccess
|| (error
== ESRCH
&& leaf_index
!= -1)) {
1263 access
[i
] = 0; // have access.. no errors to report
1265 access
[i
] = (error
!= 0 ? (short) error
: EACCES
);
1268 prevParent_cnid
= catkey
.hfsPlus
.parentID
;
1271 /* copyout the access array */
1272 if ((error
= copyout((caddr_t
)access
, user_access_structp
->access
,
1273 num_files
* sizeof (short)))) {
1274 goto err_exit_bulk_access
;
1276 if (map_size
&& bitmap
) {
1277 if ((error
= copyout((caddr_t
)bitmap
, user_access_structp
->bitmap
,
1278 map_size
* sizeof (char)))) {
1279 goto err_exit_bulk_access
;
1284 err_exit_bulk_access
:
1286 //printf("on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
1289 kfree(file_ids
, sizeof(int) * num_files
);
1291 kfree(parents
, sizeof(cnid_t
) * num_parents
);
1293 kfree(bitmap
, sizeof(char) * map_size
);
1295 kfree(access
, sizeof(short) * num_files
);
1297 kfree(cache
.acache
, sizeof(int) * NUM_CACHE_ENTRIES
);
1298 if (cache
.haveaccess
)
1299 kfree(cache
.haveaccess
, sizeof(unsigned char) * NUM_CACHE_ENTRIES
);
1305 /* end "bulk-access" support */
1309 * Callback for use with freeze ioctl.
1312 hfs_freezewrite_callback(struct vnode
*vp
, __unused
void *cargs
)
1314 vnode_waitforwrites(vp
, 0, 0, 0, "hfs freeze");
1320 * Control filesystem operating characteristics.
1323 hfs_vnop_ioctl( struct vnop_ioctl_args
/* {
1328 vfs_context_t a_context;
1331 struct vnode
* vp
= ap
->a_vp
;
1332 struct hfsmount
*hfsmp
= VTOHFS(vp
);
1333 vfs_context_t context
= ap
->a_context
;
1334 kauth_cred_t cred
= vfs_context_ucred(context
);
1335 proc_t p
= vfs_context_proc(context
);
1336 struct vfsstatfs
*vfsp
;
1339 is64bit
= proc_is64bit(p
);
1341 switch (ap
->a_command
) {
1345 struct vnode
*file_vp
;
1351 /* Caller must be owner of file system. */
1352 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1353 if (suser(cred
, NULL
) &&
1354 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1357 /* Target vnode must be file system's root. */
1358 if (!vnode_isvroot(vp
)) {
1361 bufptr
= (char *)ap
->a_data
;
1362 cnid
= strtoul(bufptr
, NULL
, 10);
1364 /* We need to call hfs_vfs_vget to leverage the code that will fix the
1365 * origin list for us if needed, as opposed to calling hfs_vget, since
1366 * we will need it for the subsequent build_path call.
1368 if ((error
= hfs_vfs_vget(HFSTOVFS(hfsmp
), cnid
, &file_vp
, context
))) {
1371 error
= build_path(file_vp
, bufptr
, sizeof(pathname_t
), &outlen
, 0, context
);
1385 /* Caller must be owner of file system. */
1386 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1387 if (suser(cred
, NULL
) &&
1388 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1391 /* Target vnode must be file system's root. */
1392 if (!vnode_isvroot(vp
)) {
1395 linkfileid
= *(cnid_t
*)ap
->a_data
;
1396 if (linkfileid
< kHFSFirstUserCatalogNodeID
) {
1399 if ((error
= hfs_lookuplink(hfsmp
, linkfileid
, &prevlinkid
, &nextlinkid
))) {
1402 if (ap
->a_command
== HFS_NEXT_LINK
) {
1403 *(cnid_t
*)ap
->a_data
= nextlinkid
;
1405 *(cnid_t
*)ap
->a_data
= prevlinkid
;
1410 case HFS_RESIZE_PROGRESS
: {
1412 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1413 if (suser(cred
, NULL
) &&
1414 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1415 return (EACCES
); /* must be owner of file system */
1417 if (!vnode_isvroot(vp
)) {
1420 return hfs_resize_progress(hfsmp
, (u_int32_t
*)ap
->a_data
);
1423 case HFS_RESIZE_VOLUME
: {
1427 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1428 if (suser(cred
, NULL
) &&
1429 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1430 return (EACCES
); /* must be owner of file system */
1432 if (!vnode_isvroot(vp
)) {
1435 newsize
= *(u_int64_t
*)ap
->a_data
;
1436 cursize
= (u_int64_t
)hfsmp
->totalBlocks
* (u_int64_t
)hfsmp
->blockSize
;
1438 if (newsize
> cursize
) {
1439 return hfs_extendfs(hfsmp
, *(u_int64_t
*)ap
->a_data
, context
);
1440 } else if (newsize
< cursize
) {
1441 return hfs_truncatefs(hfsmp
, *(u_int64_t
*)ap
->a_data
, context
);
1446 case HFS_CHANGE_NEXT_ALLOCATION
: {
1447 int error
= 0; /* Assume success */
1450 if (vnode_vfsisrdonly(vp
)) {
1453 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1454 if (suser(cred
, NULL
) &&
1455 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1456 return (EACCES
); /* must be owner of file system */
1458 if (!vnode_isvroot(vp
)) {
1461 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
1462 location
= *(u_int32_t
*)ap
->a_data
;
1463 if ((location
>= hfsmp
->allocLimit
) &&
1464 (location
!= HFS_NO_UPDATE_NEXT_ALLOCATION
)) {
1466 goto fail_change_next_allocation
;
1468 /* Return previous value. */
1469 *(u_int32_t
*)ap
->a_data
= hfsmp
->nextAllocation
;
1470 if (location
== HFS_NO_UPDATE_NEXT_ALLOCATION
) {
1471 /* On magic value for location, set nextAllocation to next block
1472 * after metadata zone and set flag in mount structure to indicate
1473 * that nextAllocation should not be updated again.
1475 HFS_UPDATE_NEXT_ALLOCATION(hfsmp
, hfsmp
->hfs_metazone_end
+ 1);
1476 hfsmp
->hfs_flags
|= HFS_SKIP_UPDATE_NEXT_ALLOCATION
;
1478 hfsmp
->hfs_flags
&= ~HFS_SKIP_UPDATE_NEXT_ALLOCATION
;
1479 HFS_UPDATE_NEXT_ALLOCATION(hfsmp
, location
);
1481 MarkVCBDirty(hfsmp
);
1482 fail_change_next_allocation
:
1483 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
1487 #ifdef HFS_SPARSE_DEV
1488 case HFS_SETBACKINGSTOREINFO
: {
1489 struct vnode
* bsfs_rootvp
;
1490 struct vnode
* di_vp
;
1491 struct hfs_backingstoreinfo
*bsdata
;
1494 if (hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) {
1497 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1498 if (suser(cred
, NULL
) &&
1499 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1500 return (EACCES
); /* must be owner of file system */
1502 bsdata
= (struct hfs_backingstoreinfo
*)ap
->a_data
;
1503 if (bsdata
== NULL
) {
1506 if ((error
= file_vnode(bsdata
->backingfd
, &di_vp
))) {
1509 if ((error
= vnode_getwithref(di_vp
))) {
1510 file_drop(bsdata
->backingfd
);
1514 if (vnode_mount(vp
) == vnode_mount(di_vp
)) {
1515 (void)vnode_put(di_vp
);
1516 file_drop(bsdata
->backingfd
);
1521 * Obtain the backing fs root vnode and keep a reference
1522 * on it. This reference will be dropped in hfs_unmount.
1524 error
= VFS_ROOT(vnode_mount(di_vp
), &bsfs_rootvp
, NULL
); /* XXX use context! */
1526 (void)vnode_put(di_vp
);
1527 file_drop(bsdata
->backingfd
);
1530 vnode_ref(bsfs_rootvp
);
1531 vnode_put(bsfs_rootvp
);
1533 hfsmp
->hfs_backingfs_rootvp
= bsfs_rootvp
;
1534 hfsmp
->hfs_flags
|= HFS_HAS_SPARSE_DEVICE
;
1535 hfsmp
->hfs_sparsebandblks
= bsdata
->bandsize
/ HFSTOVCB(hfsmp
)->blockSize
;
1536 hfsmp
->hfs_sparsebandblks
*= 4;
1538 vfs_markdependency(hfsmp
->hfs_mp
);
1540 (void)vnode_put(di_vp
);
1541 file_drop(bsdata
->backingfd
);
1544 case HFS_CLRBACKINGSTOREINFO
: {
1545 struct vnode
* tmpvp
;
1547 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1548 if (suser(cred
, NULL
) &&
1549 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1550 return (EACCES
); /* must be owner of file system */
1552 if ((hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) &&
1553 hfsmp
->hfs_backingfs_rootvp
) {
1555 hfsmp
->hfs_flags
&= ~HFS_HAS_SPARSE_DEVICE
;
1556 tmpvp
= hfsmp
->hfs_backingfs_rootvp
;
1557 hfsmp
->hfs_backingfs_rootvp
= NULLVP
;
1558 hfsmp
->hfs_sparsebandblks
= 0;
1563 #endif /* HFS_SPARSE_DEV */
1571 mp
= vnode_mount(vp
);
1572 hfsmp
= VFSTOHFS(mp
);
1577 lck_rw_lock_exclusive(&hfsmp
->hfs_insync
);
1579 // flush things before we get started to try and prevent
1580 // dirty data from being paged out while we're frozen.
1581 // note: can't do this after taking the lock as it will
1582 // deadlock against ourselves.
1583 vnode_iterate(mp
, 0, hfs_freezewrite_callback
, NULL
);
1584 hfs_global_exclusive_lock_acquire(hfsmp
);
1585 journal_flush(hfsmp
->jnl
);
1587 // don't need to iterate on all vnodes, we just need to
1588 // wait for writes to the system files and the device vnode
1589 if (HFSTOVCB(hfsmp
)->extentsRefNum
)
1590 vnode_waitforwrites(HFSTOVCB(hfsmp
)->extentsRefNum
, 0, 0, 0, "hfs freeze");
1591 if (HFSTOVCB(hfsmp
)->catalogRefNum
)
1592 vnode_waitforwrites(HFSTOVCB(hfsmp
)->catalogRefNum
, 0, 0, 0, "hfs freeze");
1593 if (HFSTOVCB(hfsmp
)->allocationsRefNum
)
1594 vnode_waitforwrites(HFSTOVCB(hfsmp
)->allocationsRefNum
, 0, 0, 0, "hfs freeze");
1595 if (hfsmp
->hfs_attribute_vp
)
1596 vnode_waitforwrites(hfsmp
->hfs_attribute_vp
, 0, 0, 0, "hfs freeze");
1597 vnode_waitforwrites(hfsmp
->hfs_devvp
, 0, 0, 0, "hfs freeze");
1599 hfsmp
->hfs_freezing_proc
= current_proc();
1608 // if we're not the one who froze the fs then we
1610 if (hfsmp
->hfs_freezing_proc
!= current_proc()) {
1614 // NOTE: if you add code here, also go check the
1615 // code that "thaws" the fs in hfs_vnop_close()
1617 hfsmp
->hfs_freezing_proc
= NULL
;
1618 hfs_global_exclusive_lock_release(hfsmp
);
1619 lck_rw_unlock_exclusive(&hfsmp
->hfs_insync
);
1624 case HFS_BULKACCESS_FSCTL
: {
1627 if (hfsmp
->hfs_flags
& HFS_STANDARD
) {
1632 size
= sizeof(struct user_access_t
);
1634 size
= sizeof(struct access_t
);
1637 return do_bulk_access_check(hfsmp
, vp
, ap
, size
, context
);
1640 case HFS_EXT_BULKACCESS_FSCTL
: {
1643 if (hfsmp
->hfs_flags
& HFS_STANDARD
) {
1648 size
= sizeof(struct ext_user_access_t
);
1650 size
= sizeof(struct ext_access_t
);
1653 return do_bulk_access_check(hfsmp
, vp
, ap
, size
, context
);
1656 case HFS_SETACLSTATE
: {
1659 if (ap
->a_data
== NULL
) {
1663 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1664 state
= *(int *)ap
->a_data
;
1666 // super-user can enable or disable acl's on a volume.
1667 // the volume owner can only enable acl's
1668 if (!is_suser() && (state
== 0 || kauth_cred_getuid(cred
) != vfsp
->f_owner
)) {
1671 if (state
== 0 || state
== 1)
1672 return hfs_set_volxattr(hfsmp
, HFS_SETACLSTATE
, state
);
1677 case HFS_SET_XATTREXTENTS_STATE
: {
1680 if (ap
->a_data
== NULL
) {
1684 state
= *(int *)ap
->a_data
;
1686 /* Super-user can enable or disable extent-based extended
1687 * attribute support on a volume
1692 if (state
== 0 || state
== 1)
1693 return hfs_set_volxattr(hfsmp
, HFS_SET_XATTREXTENTS_STATE
, state
);
1701 error
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
);
1703 error
= hfs_fsync(vp
, MNT_WAIT
, TRUE
, p
);
1704 hfs_unlock(VTOC(vp
));
1711 register struct cnode
*cp
;
1714 if (!vnode_isreg(vp
))
1717 error
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
);
1721 * used by regression test to determine if
1722 * all the dirty pages (via write) have been cleaned
1723 * after a call to 'fsysnc'.
1725 error
= is_file_clean(vp
, VTOF(vp
)->ff_size
);
1732 register struct radvisory
*ra
;
1733 struct filefork
*fp
;
1736 if (!vnode_isreg(vp
))
1739 ra
= (struct radvisory
*)(ap
->a_data
);
1742 /* Protect against a size change. */
1743 hfs_lock_truncate(VTOC(vp
), TRUE
);
1745 if (ra
->ra_offset
>= fp
->ff_size
) {
1748 error
= advisory_read(vp
, fp
->ff_size
, ra
->ra_offset
, ra
->ra_count
);
1751 hfs_unlock_truncate(VTOC(vp
), TRUE
);
1755 case F_READBOOTSTRAP
:
1756 case F_WRITEBOOTSTRAP
:
1758 struct vnode
*devvp
= NULL
;
1759 user_fbootstraptransfer_t
*user_bootstrapp
;
1763 daddr64_t blockNumber
;
1767 user_fbootstraptransfer_t user_bootstrap
;
1769 if (!vnode_isvroot(vp
))
1771 /* LP64 - when caller is a 64 bit process then we are passed a pointer
1772 * to a user_fbootstraptransfer_t else we get a pointer to a
1773 * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
1776 user_bootstrapp
= (user_fbootstraptransfer_t
*)ap
->a_data
;
1779 fbootstraptransfer_t
*bootstrapp
= (fbootstraptransfer_t
*)ap
->a_data
;
1780 user_bootstrapp
= &user_bootstrap
;
1781 user_bootstrap
.fbt_offset
= bootstrapp
->fbt_offset
;
1782 user_bootstrap
.fbt_length
= bootstrapp
->fbt_length
;
1783 user_bootstrap
.fbt_buffer
= CAST_USER_ADDR_T(bootstrapp
->fbt_buffer
);
1785 if (user_bootstrapp
->fbt_offset
+ user_bootstrapp
->fbt_length
> 1024)
1788 devvp
= VTOHFS(vp
)->hfs_devvp
;
1789 auio
= uio_create(1, user_bootstrapp
->fbt_offset
,
1790 is64bit
? UIO_USERSPACE64
: UIO_USERSPACE32
,
1791 (ap
->a_command
== F_WRITEBOOTSTRAP
) ? UIO_WRITE
: UIO_READ
);
1792 uio_addiov(auio
, user_bootstrapp
->fbt_buffer
, user_bootstrapp
->fbt_length
);
1794 devBlockSize
= vfs_devblocksize(vnode_mount(vp
));
1796 while (uio_resid(auio
) > 0) {
1797 blockNumber
= uio_offset(auio
) / devBlockSize
;
1798 error
= (int)buf_bread(devvp
, blockNumber
, devBlockSize
, cred
, &bp
);
1800 if (bp
) buf_brelse(bp
);
1805 blockOffset
= uio_offset(auio
) % devBlockSize
;
1806 xfersize
= devBlockSize
- blockOffset
;
1807 error
= uiomove((caddr_t
)buf_dataptr(bp
) + blockOffset
, (int)xfersize
, auio
);
1813 if (uio_rw(auio
) == UIO_WRITE
) {
1814 error
= VNOP_BWRITE(bp
);
1827 case _IOC(IOC_OUT
,'h', 4, 0): /* Create date in local time */
1830 *(user_time_t
*)(ap
->a_data
) = (user_time_t
) (to_bsd_time(VTOVCB(vp
)->localCreateDate
));
1833 *(time_t *)(ap
->a_data
) = to_bsd_time(VTOVCB(vp
)->localCreateDate
);
1838 case HFS_GET_MOUNT_TIME
:
1839 return copyout(&hfsmp
->hfs_mount_time
, CAST_USER_ADDR_T(ap
->a_data
), sizeof(hfsmp
->hfs_mount_time
));
1842 case HFS_GET_LAST_MTIME
:
1843 return copyout(&hfsmp
->hfs_last_mounted_mtime
, CAST_USER_ADDR_T(ap
->a_data
), sizeof(hfsmp
->hfs_last_mounted_mtime
));
1846 case HFS_SET_BOOT_INFO
:
1847 if (!vnode_isvroot(vp
))
1849 if (!kauth_cred_issuser(cred
) && (kauth_cred_getuid(cred
) != vfs_statfs(HFSTOVFS(hfsmp
))->f_owner
))
1850 return(EACCES
); /* must be superuser or owner of filesystem */
1851 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
1852 bcopy(ap
->a_data
, &hfsmp
->vcbFndrInfo
, sizeof(hfsmp
->vcbFndrInfo
));
1853 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
1854 (void) hfs_flushvolumeheader(hfsmp
, MNT_WAIT
, 0);
1857 case HFS_GET_BOOT_INFO
:
1858 if (!vnode_isvroot(vp
))
1860 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
1861 bcopy(&hfsmp
->vcbFndrInfo
, ap
->a_data
, sizeof(hfsmp
->vcbFndrInfo
));
1862 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
1865 case HFS_MARK_BOOT_CORRUPT
:
1866 /* Mark the boot volume corrupt by setting
1867 * kHFSVolumeInconsistentBit in the volume header. This will
1868 * force fsck_hfs on next mount.
1874 /* Allowed only on the root vnode of the boot volume */
1875 if (!(vfs_flags(HFSTOVFS(hfsmp
)) & MNT_ROOTFS
) ||
1876 !vnode_isvroot(vp
)) {
1880 printf ("hfs_vnop_ioctl: Marking the boot volume corrupt.\n");
1881 hfs_mark_volume_inconsistent(hfsmp
);
1888 /* Should never get here */
1896 hfs_vnop_select(__unused
struct vnop_select_args
*ap
)
1898 struct vnop_select_args {
1903 vfs_context_t a_context;
1908 * We should really check to see if I/O is possible.
1914 * Converts a logical block number to a physical block, and optionally returns
1915 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
1916 * The physical block number is based on the device block size, currently its 512.
1917 * The block run is returned in logical blocks, and is the REMAINING amount of blocks
1920 hfs_bmap(struct vnode
*vp
, daddr_t bn
, struct vnode
**vpp
, daddr64_t
*bnp
, unsigned int *runp
)
1922 struct filefork
*fp
= VTOF(vp
);
1923 struct hfsmount
*hfsmp
= VTOHFS(vp
);
1924 int retval
= E_NONE
;
1925 u_int32_t logBlockSize
;
1926 size_t bytesContAvail
= 0;
1927 off_t blockposition
;
1932 * Check for underlying vnode requests and ensure that logical
1933 * to physical mapping is requested.
1936 *vpp
= hfsmp
->hfs_devvp
;
1940 logBlockSize
= GetLogicalBlockSize(vp
);
1941 blockposition
= (off_t
)bn
* logBlockSize
;
1943 lockExtBtree
= overflow_extents(fp
);
1946 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_EXTENTS
, HFS_EXCLUSIVE_LOCK
);
1948 retval
= MacToVFSError(
1949 MapFileBlockC (HFSTOVCB(hfsmp
),
1957 hfs_systemfile_unlock(hfsmp
, lockflags
);
1959 if (retval
== E_NONE
) {
1960 /* Figure out how many read ahead blocks there are */
1962 if (can_cluster(logBlockSize
)) {
1963 /* Make sure this result never goes negative: */
1964 *runp
= (bytesContAvail
< logBlockSize
) ? 0 : (bytesContAvail
/ logBlockSize
) - 1;
1974 * Convert logical block number to file offset.
1977 hfs_vnop_blktooff(struct vnop_blktooff_args
*ap
)
1979 struct vnop_blktooff_args {
1986 if (ap
->a_vp
== NULL
)
1988 *ap
->a_offset
= (off_t
)ap
->a_lblkno
* (off_t
)GetLogicalBlockSize(ap
->a_vp
);
1994 * Convert file offset to logical block number.
1997 hfs_vnop_offtoblk(struct vnop_offtoblk_args
*ap
)
1999 struct vnop_offtoblk_args {
2002 daddr64_t *a_lblkno;
2006 if (ap
->a_vp
== NULL
)
2008 *ap
->a_lblkno
= (daddr64_t
)(ap
->a_offset
/ (off_t
)GetLogicalBlockSize(ap
->a_vp
));
2014 * Map file offset to physical block number.
2016 * If this function is called for write operation, and if the file
2017 * had virtual blocks allocated (delayed allocation), real blocks
2018 * are allocated by calling ExtendFileC().
2020 * If this function is called for read operation, and if the file
2021 * had virtual blocks allocated (delayed allocation), no change
2022 * to the size of file is done, and if required, rangelist is
2023 * searched for mapping.
2025 * System file cnodes are expected to be locked (shared or exclusive).
2028 hfs_vnop_blockmap(struct vnop_blockmap_args
*ap
)
2030 struct vnop_blockmap_args {
2038 vfs_context_t a_context;
2042 struct vnode
*vp
= ap
->a_vp
;
2044 struct filefork
*fp
;
2045 struct hfsmount
*hfsmp
;
2046 size_t bytesContAvail
= 0;
2047 int retval
= E_NONE
;
2050 struct rl_entry
*invalid_range
;
2051 enum rl_overlaptype overlaptype
;
2055 /* Do not allow blockmap operation on a directory */
2056 if (vnode_isdir(vp
)) {
2061 * Check for underlying vnode requests and ensure that logical
2062 * to physical mapping is requested.
2064 if (ap
->a_bpn
== NULL
)
2067 if ( !vnode_issystem(vp
) && !vnode_islnk(vp
) && !vnode_isswap(vp
)) {
2068 if (VTOC(vp
)->c_lockowner
!= current_thread()) {
2069 hfs_lock(VTOC(vp
), HFS_FORCE_LOCK
);
2078 /* Check virtual blocks only when performing write operation */
2079 if ((ap
->a_flags
& VNODE_WRITE
) && (fp
->ff_unallocblocks
!= 0)) {
2080 if (hfs_start_transaction(hfsmp
) != 0) {
2086 syslocks
= SFL_EXTENTS
| SFL_BITMAP
;
2088 } else if (overflow_extents(fp
)) {
2089 syslocks
= SFL_EXTENTS
;
2093 lockflags
= hfs_systemfile_lock(hfsmp
, syslocks
, HFS_EXCLUSIVE_LOCK
);
2096 * Check for any delayed allocations.
2098 if ((ap
->a_flags
& VNODE_WRITE
) && (fp
->ff_unallocblocks
!= 0)) {
2100 u_int32_t loanedBlocks
;
2103 // Make sure we have a transaction. It's possible
2104 // that we came in and fp->ff_unallocblocks was zero
2105 // but during the time we blocked acquiring the extents
2106 // btree, ff_unallocblocks became non-zero and so we
2107 // will need to start a transaction.
2109 if (started_tr
== 0) {
2111 hfs_systemfile_unlock(hfsmp
, lockflags
);
2118 * Note: ExtendFileC will Release any blocks on loan and
2119 * aquire real blocks. So we ask to extend by zero bytes
2120 * since ExtendFileC will account for the virtual blocks.
2123 loanedBlocks
= fp
->ff_unallocblocks
;
2124 retval
= ExtendFileC(hfsmp
, (FCB
*)fp
, 0, 0,
2125 kEFAllMask
| kEFNoClumpMask
, &actbytes
);
2128 fp
->ff_unallocblocks
= loanedBlocks
;
2129 cp
->c_blocks
+= loanedBlocks
;
2130 fp
->ff_blocks
+= loanedBlocks
;
2132 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
2133 hfsmp
->loanedBlocks
+= loanedBlocks
;
2134 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
2136 hfs_systemfile_unlock(hfsmp
, lockflags
);
2137 cp
->c_flag
|= C_MODIFIED
;
2139 (void) hfs_update(vp
, TRUE
);
2140 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2142 hfs_end_transaction(hfsmp
);
2149 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, ap
->a_size
, ap
->a_foffset
,
2150 ap
->a_bpn
, &bytesContAvail
);
2152 hfs_systemfile_unlock(hfsmp
, lockflags
);
2157 (void) hfs_update(vp
, TRUE
);
2158 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2159 hfs_end_transaction(hfsmp
);
2163 /* On write, always return error because virtual blocks, if any,
2164 * should have been allocated in ExtendFileC(). We do not
2165 * allocate virtual blocks on read, therefore return error
2166 * only if no virtual blocks are allocated. Otherwise we search
2167 * rangelist for zero-fills
2169 if ((MacToVFSError(retval
) != ERANGE
) ||
2170 (ap
->a_flags
& VNODE_WRITE
) ||
2171 ((ap
->a_flags
& VNODE_READ
) && (fp
->ff_unallocblocks
== 0))) {
2175 /* Validate if the start offset is within logical file size */
2176 if (ap
->a_foffset
> fp
->ff_size
) {
2180 /* Searching file extents has failed for read operation, therefore
2181 * search rangelist for any uncommitted holes in the file.
2183 overlaptype
= rl_scan(&fp
->ff_invalidranges
, ap
->a_foffset
,
2184 ap
->a_foffset
+ (off_t
)(ap
->a_size
- 1),
2186 switch(overlaptype
) {
2187 case RL_OVERLAPISCONTAINED
:
2188 /* start_offset <= rl_start, end_offset >= rl_end */
2189 if (ap
->a_foffset
!= invalid_range
->rl_start
) {
2192 case RL_MATCHINGOVERLAP
:
2193 /* start_offset = rl_start, end_offset = rl_end */
2194 case RL_OVERLAPCONTAINSRANGE
:
2195 /* start_offset >= rl_start, end_offset <= rl_end */
2196 case RL_OVERLAPSTARTSBEFORE
:
2197 /* start_offset > rl_start, end_offset >= rl_start */
2198 if ((off_t
)fp
->ff_size
> (invalid_range
->rl_end
+ 1)) {
2199 bytesContAvail
= (invalid_range
->rl_end
+ 1) - ap
->a_foffset
;
2201 bytesContAvail
= fp
->ff_size
- ap
->a_foffset
;
2203 if (bytesContAvail
> ap
->a_size
) {
2204 bytesContAvail
= ap
->a_size
;
2206 *ap
->a_bpn
= (daddr64_t
)-1;
2209 case RL_OVERLAPENDSAFTER
:
2210 /* start_offset < rl_start, end_offset < rl_end */
2217 /* MapFileC() found a valid extent in the filefork. Search the
2218 * mapping information further for invalid file ranges
2220 overlaptype
= rl_scan(&fp
->ff_invalidranges
, ap
->a_foffset
,
2221 ap
->a_foffset
+ (off_t
)bytesContAvail
- 1,
2223 if (overlaptype
!= RL_NOOVERLAP
) {
2224 switch(overlaptype
) {
2225 case RL_MATCHINGOVERLAP
:
2226 case RL_OVERLAPCONTAINSRANGE
:
2227 case RL_OVERLAPSTARTSBEFORE
:
2228 /* There's no valid block for this byte offset */
2229 *ap
->a_bpn
= (daddr64_t
)-1;
2230 /* There's no point limiting the amount to be returned
2231 * if the invalid range that was hit extends all the way
2232 * to the EOF (i.e. there's no valid bytes between the
2233 * end of this range and the file's EOF):
2235 if (((off_t
)fp
->ff_size
> (invalid_range
->rl_end
+ 1)) &&
2236 (invalid_range
->rl_end
+ 1 - ap
->a_foffset
< bytesContAvail
)) {
2237 bytesContAvail
= invalid_range
->rl_end
+ 1 - ap
->a_foffset
;
2241 case RL_OVERLAPISCONTAINED
:
2242 case RL_OVERLAPENDSAFTER
:
2243 /* The range of interest hits an invalid block before the end: */
2244 if (invalid_range
->rl_start
== ap
->a_foffset
) {
2245 /* There's actually no valid information to be had starting here: */
2246 *ap
->a_bpn
= (daddr64_t
)-1;
2247 if (((off_t
)fp
->ff_size
> (invalid_range
->rl_end
+ 1)) &&
2248 (invalid_range
->rl_end
+ 1 - ap
->a_foffset
< bytesContAvail
)) {
2249 bytesContAvail
= invalid_range
->rl_end
+ 1 - ap
->a_foffset
;
2252 bytesContAvail
= invalid_range
->rl_start
- ap
->a_foffset
;
2259 if (bytesContAvail
> ap
->a_size
)
2260 bytesContAvail
= ap
->a_size
;
2266 *ap
->a_run
= bytesContAvail
;
2269 *(int *)ap
->a_poff
= 0;
2275 return (MacToVFSError(retval
));
2280 * prepare and issue the I/O
2281 * buf_strategy knows how to deal
2282 * with requests that require
2286 hfs_vnop_strategy(struct vnop_strategy_args
*ap
)
2288 buf_t bp
= ap
->a_bp
;
2289 vnode_t vp
= buf_vnode(bp
);
2291 return (buf_strategy(VTOHFS(vp
)->hfs_devvp
, ap
));
2296 do_hfs_truncate(struct vnode
*vp
, off_t length
, int flags
, vfs_context_t context
)
2298 register struct cnode
*cp
= VTOC(vp
);
2299 struct filefork
*fp
= VTOF(vp
);
2300 struct proc
*p
= vfs_context_proc(context
);;
2301 kauth_cred_t cred
= vfs_context_ucred(context
);
2304 off_t actualBytesAdded
;
2308 struct hfsmount
*hfsmp
;
2311 blksize
= VTOVCB(vp
)->blockSize
;
2312 fileblocks
= fp
->ff_blocks
;
2313 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
2315 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_START
,
2316 (int)length
, (int)fp
->ff_size
, (int)filebytes
, 0, 0);
2321 /* This should only happen with a corrupt filesystem */
2322 if ((off_t
)fp
->ff_size
< 0)
2325 if ((!ISHFSPLUS(VTOVCB(vp
))) && (length
> (off_t
)MAXHFSFILESIZE
))
2332 /* Files that are changing size are not hot file candidates. */
2333 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
2334 fp
->ff_bytesread
= 0;
2338 * We cannot just check if fp->ff_size == length (as an optimization)
2339 * since there may be extra physical blocks that also need truncation.
2342 if ((retval
= hfs_getinoquota(cp
)))
2347 * Lengthen the size of the file. We must ensure that the
2348 * last byte of the file is allocated. Since the smallest
2349 * value of ff_size is 0, length will be at least 1.
2351 if (length
> (off_t
)fp
->ff_size
) {
2353 retval
= hfs_chkdq(cp
, (int64_t)(roundup(length
- filebytes
, blksize
)),
2359 * If we don't have enough physical space then
2360 * we need to extend the physical size.
2362 if (length
> filebytes
) {
2364 u_long blockHint
= 0;
2366 /* All or nothing and don't round up to clumpsize. */
2367 eflags
= kEFAllMask
| kEFNoClumpMask
;
2369 if (cred
&& suser(cred
, NULL
) != 0)
2370 eflags
|= kEFReserveMask
; /* keep a reserve */
2373 * Allocate Journal and Quota files in metadata zone.
2375 if (filebytes
== 0 &&
2376 hfsmp
->hfs_flags
& HFS_METADATA_ZONE
&&
2377 hfs_virtualmetafile(cp
)) {
2378 eflags
|= kEFMetadataMask
;
2379 blockHint
= hfsmp
->hfs_metazone_start
;
2381 if (hfs_start_transaction(hfsmp
) != 0) {
2386 /* Protect extents b-tree and allocation bitmap */
2387 lockflags
= SFL_BITMAP
;
2388 if (overflow_extents(fp
))
2389 lockflags
|= SFL_EXTENTS
;
2390 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2392 while ((length
> filebytes
) && (retval
== E_NONE
)) {
2393 bytesToAdd
= length
- filebytes
;
2394 retval
= MacToVFSError(ExtendFileC(VTOVCB(vp
),
2399 &actualBytesAdded
));
2401 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)blksize
;
2402 if (actualBytesAdded
== 0 && retval
== E_NONE
) {
2403 if (length
> filebytes
)
2409 hfs_systemfile_unlock(hfsmp
, lockflags
);
2412 (void) hfs_update(vp
, TRUE
);
2413 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2416 hfs_end_transaction(hfsmp
);
2421 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_NONE
,
2422 (int)length
, (int)fp
->ff_size
, (int)filebytes
, 0, 0);
2425 if (!(flags
& IO_NOZEROFILL
)) {
2426 if (UBCINFOEXISTS(vp
) && (vnode_issystem(vp
) == 0) && retval
== E_NONE
) {
2427 struct rl_entry
*invalid_range
;
2430 zero_limit
= (fp
->ff_size
+ (PAGE_SIZE_64
- 1)) & ~PAGE_MASK_64
;
2431 if (length
< zero_limit
) zero_limit
= length
;
2433 if (length
> (off_t
)fp
->ff_size
) {
2436 /* Extending the file: time to fill out the current last page w. zeroes? */
2437 if ((fp
->ff_size
& PAGE_MASK_64
) &&
2438 (rl_scan(&fp
->ff_invalidranges
, fp
->ff_size
& ~PAGE_MASK_64
,
2439 fp
->ff_size
- 1, &invalid_range
) == RL_NOOVERLAP
)) {
2441 /* There's some valid data at the start of the (current) last page
2442 of the file, so zero out the remainder of that page to ensure the
2443 entire page contains valid data. Since there is no invalid range
2444 possible past the (current) eof, there's no need to remove anything
2445 from the invalid range list before calling cluster_write(): */
2447 retval
= cluster_write(vp
, (struct uio
*) 0, fp
->ff_size
, zero_limit
,
2448 fp
->ff_size
, (off_t
)0,
2449 (flags
& IO_SYNC
) | IO_HEADZEROFILL
| IO_NOZERODIRTY
);
2450 hfs_lock(cp
, HFS_FORCE_LOCK
);
2451 if (retval
) goto Err_Exit
;
2453 /* Merely invalidate the remaining area, if necessary: */
2454 if (length
> zero_limit
) {
2456 rl_add(zero_limit
, length
- 1, &fp
->ff_invalidranges
);
2457 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
2460 /* The page containing the (current) eof is invalid: just add the
2461 remainder of the page to the invalid list, along with the area
2462 being newly allocated:
2465 rl_add(fp
->ff_size
, length
- 1, &fp
->ff_invalidranges
);
2466 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
2470 panic("hfs_truncate: invoked on non-UBC object?!");
2473 cp
->c_touch_modtime
= TRUE
;
2474 fp
->ff_size
= length
;
2476 } else { /* Shorten the size of the file */
2478 if ((off_t
)fp
->ff_size
> length
) {
2479 /* Any space previously marked as invalid is now irrelevant: */
2480 rl_remove(length
, fp
->ff_size
- 1, &fp
->ff_invalidranges
);
2484 * Account for any unmapped blocks. Note that the new
2485 * file length can still end up with unmapped blocks.
2487 if (fp
->ff_unallocblocks
> 0) {
2488 u_int32_t finalblks
;
2489 u_int32_t loanedBlocks
;
2491 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
2493 loanedBlocks
= fp
->ff_unallocblocks
;
2494 cp
->c_blocks
-= loanedBlocks
;
2495 fp
->ff_blocks
-= loanedBlocks
;
2496 fp
->ff_unallocblocks
= 0;
2498 hfsmp
->loanedBlocks
-= loanedBlocks
;
2500 finalblks
= (length
+ blksize
- 1) / blksize
;
2501 if (finalblks
> fp
->ff_blocks
) {
2502 /* calculate required unmapped blocks */
2503 loanedBlocks
= finalblks
- fp
->ff_blocks
;
2504 hfsmp
->loanedBlocks
+= loanedBlocks
;
2506 fp
->ff_unallocblocks
= loanedBlocks
;
2507 cp
->c_blocks
+= loanedBlocks
;
2508 fp
->ff_blocks
+= loanedBlocks
;
2510 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
2514 * For a TBE process the deallocation of the file blocks is
2515 * delayed until the file is closed. And hfs_close calls
2516 * truncate with the IO_NDELAY flag set. So when IO_NDELAY
2517 * isn't set, we make sure this isn't a TBE process.
2519 if ((flags
& IO_NDELAY
) || (proc_tbe(p
) == 0)) {
2521 off_t savedbytes
= ((off_t
)fp
->ff_blocks
* (off_t
)blksize
);
2523 if (hfs_start_transaction(hfsmp
) != 0) {
2528 if (fp
->ff_unallocblocks
== 0) {
2529 /* Protect extents b-tree and allocation bitmap */
2530 lockflags
= SFL_BITMAP
;
2531 if (overflow_extents(fp
))
2532 lockflags
|= SFL_EXTENTS
;
2533 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2535 retval
= MacToVFSError(TruncateFileC(VTOVCB(vp
),
2536 (FCB
*)fp
, length
, false));
2538 hfs_systemfile_unlock(hfsmp
, lockflags
);
2542 fp
->ff_size
= length
;
2544 (void) hfs_update(vp
, TRUE
);
2545 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2548 hfs_end_transaction(hfsmp
);
2550 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)blksize
;
2554 /* These are bytesreleased */
2555 (void) hfs_chkdq(cp
, (int64_t)-(savedbytes
- filebytes
), NOCRED
, 0);
2558 /* Only set update flag if the logical length changes */
2559 if ((off_t
)fp
->ff_size
!= length
)
2560 cp
->c_touch_modtime
= TRUE
;
2561 fp
->ff_size
= length
;
2563 cp
->c_touch_chgtime
= TRUE
; /* status changed */
2564 cp
->c_touch_modtime
= TRUE
; /* file data was modified */
2565 retval
= hfs_update(vp
, MNT_WAIT
);
2567 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_NONE
,
2568 -1, -1, -1, retval
, 0);
2573 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_END
,
2574 (int)length
, (int)fp
->ff_size
, (int)filebytes
, retval
, 0);
2582 * Truncate a cnode to at most length size, freeing (or adding) the
2587 hfs_truncate(struct vnode
*vp
, off_t length
, int flags
, int skipsetsize
,
2588 vfs_context_t context
)
2590 struct filefork
*fp
= VTOF(vp
);
2593 int blksize
, error
= 0;
2594 struct cnode
*cp
= VTOC(vp
);
2596 /* Cannot truncate an HFS directory! */
2597 if (vnode_isdir(vp
)) {
2600 /* A swap file cannot change size. */
2601 if (vnode_isswap(vp
) && (length
!= 0)) {
2605 blksize
= VTOVCB(vp
)->blockSize
;
2606 fileblocks
= fp
->ff_blocks
;
2607 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
2610 // Have to do this here so that we don't wind up with
2611 // i/o pending for blocks that are about to be released
2612 // if we truncate the file.
2614 // If skipsetsize is set, then the caller is responsible
2615 // for the ubc_setsize.
2618 ubc_setsize(vp
, length
);
2620 // have to loop truncating or growing files that are
2621 // really big because otherwise transactions can get
2622 // enormous and consume too many kernel resources.
2624 if (length
< filebytes
) {
2625 while (filebytes
> length
) {
2626 if ((filebytes
- length
) > HFS_BIGFILE_SIZE
&& overflow_extents(fp
)) {
2627 filebytes
-= HFS_BIGFILE_SIZE
;
2631 cp
->c_flag
|= C_FORCEUPDATE
;
2632 error
= do_hfs_truncate(vp
, filebytes
, flags
, context
);
2636 } else if (length
> filebytes
) {
2637 while (filebytes
< length
) {
2638 if ((length
- filebytes
) > HFS_BIGFILE_SIZE
&& overflow_extents(fp
)) {
2639 filebytes
+= HFS_BIGFILE_SIZE
;
2643 cp
->c_flag
|= C_FORCEUPDATE
;
2644 error
= do_hfs_truncate(vp
, filebytes
, flags
, context
);
2648 } else /* Same logical size */ {
2650 error
= do_hfs_truncate(vp
, length
, flags
, context
);
2652 /* Files that are changing size are not hot file candidates. */
2653 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
) {
2654 fp
->ff_bytesread
= 0;
2663 * Preallocate file storage space.
2666 hfs_vnop_allocate(struct vnop_allocate_args
/* {
2670 off_t *a_bytesallocated;
2672 vfs_context_t a_context;
2675 struct vnode
*vp
= ap
->a_vp
;
2677 struct filefork
*fp
;
2679 off_t length
= ap
->a_length
;
2681 off_t moreBytesRequested
;
2682 off_t actualBytesAdded
;
2685 int retval
, retval2
;
2686 u_int32_t blockHint
;
2687 u_int32_t extendFlags
; /* For call to ExtendFileC */
2688 struct hfsmount
*hfsmp
;
2689 kauth_cred_t cred
= vfs_context_ucred(ap
->a_context
);
2692 *(ap
->a_bytesallocated
) = 0;
2694 if (!vnode_isreg(vp
))
2696 if (length
< (off_t
)0)
2701 hfs_lock_truncate(cp
, TRUE
);
2703 if ((retval
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
))) {
2711 fileblocks
= fp
->ff_blocks
;
2712 filebytes
= (off_t
)fileblocks
* (off_t
)vcb
->blockSize
;
2714 if ((ap
->a_flags
& ALLOCATEFROMVOL
) && (length
< filebytes
)) {
2719 /* Fill in the flags word for the call to Extend the file */
2721 extendFlags
= kEFNoClumpMask
;
2722 if (ap
->a_flags
& ALLOCATECONTIG
)
2723 extendFlags
|= kEFContigMask
;
2724 if (ap
->a_flags
& ALLOCATEALL
)
2725 extendFlags
|= kEFAllMask
;
2726 if (cred
&& suser(cred
, NULL
) != 0)
2727 extendFlags
|= kEFReserveMask
;
2731 startingPEOF
= filebytes
;
2733 if (ap
->a_flags
& ALLOCATEFROMPEOF
)
2734 length
+= filebytes
;
2735 else if (ap
->a_flags
& ALLOCATEFROMVOL
)
2736 blockHint
= ap
->a_offset
/ VTOVCB(vp
)->blockSize
;
2738 /* If no changes are necesary, then we're done */
2739 if (filebytes
== length
)
2743 * Lengthen the size of the file. We must ensure that the
2744 * last byte of the file is allocated. Since the smallest
2745 * value of filebytes is 0, length will be at least 1.
2747 if (length
> filebytes
) {
2748 off_t total_bytes_added
= 0, orig_request_size
;
2750 orig_request_size
= moreBytesRequested
= length
- filebytes
;
2753 retval
= hfs_chkdq(cp
,
2754 (int64_t)(roundup(moreBytesRequested
, vcb
->blockSize
)),
2761 * Metadata zone checks.
2763 if (hfsmp
->hfs_flags
& HFS_METADATA_ZONE
) {
2765 * Allocate Journal and Quota files in metadata zone.
2767 if (hfs_virtualmetafile(cp
)) {
2768 extendFlags
|= kEFMetadataMask
;
2769 blockHint
= hfsmp
->hfs_metazone_start
;
2770 } else if ((blockHint
>= hfsmp
->hfs_metazone_start
) &&
2771 (blockHint
<= hfsmp
->hfs_metazone_end
)) {
2773 * Move blockHint outside metadata zone.
2775 blockHint
= hfsmp
->hfs_metazone_end
+ 1;
2780 while ((length
> filebytes
) && (retval
== E_NONE
)) {
2781 off_t bytesRequested
;
2783 if (hfs_start_transaction(hfsmp
) != 0) {
2788 /* Protect extents b-tree and allocation bitmap */
2789 lockflags
= SFL_BITMAP
;
2790 if (overflow_extents(fp
))
2791 lockflags
|= SFL_EXTENTS
;
2792 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2794 if (moreBytesRequested
>= HFS_BIGFILE_SIZE
) {
2795 bytesRequested
= HFS_BIGFILE_SIZE
;
2797 bytesRequested
= moreBytesRequested
;
2800 retval
= MacToVFSError(ExtendFileC(vcb
,
2805 &actualBytesAdded
));
2807 if (retval
== E_NONE
) {
2808 *(ap
->a_bytesallocated
) += actualBytesAdded
;
2809 total_bytes_added
+= actualBytesAdded
;
2810 moreBytesRequested
-= actualBytesAdded
;
2811 if (blockHint
!= 0) {
2812 blockHint
+= actualBytesAdded
/ vcb
->blockSize
;
2815 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)vcb
->blockSize
;
2817 hfs_systemfile_unlock(hfsmp
, lockflags
);
2820 (void) hfs_update(vp
, TRUE
);
2821 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2824 hfs_end_transaction(hfsmp
);
2829 * if we get an error and no changes were made then exit
2830 * otherwise we must do the hfs_update to reflect the changes
2832 if (retval
&& (startingPEOF
== filebytes
))
2836 * Adjust actualBytesAdded to be allocation block aligned, not
2837 * clump size aligned.
2838 * NOTE: So what we are reporting does not affect reality
2839 * until the file is closed, when we truncate the file to allocation
2842 if (total_bytes_added
!= 0 && orig_request_size
< total_bytes_added
)
2843 *(ap
->a_bytesallocated
) =
2844 roundup(orig_request_size
, (off_t
)vcb
->blockSize
);
2846 } else { /* Shorten the size of the file */
2848 if (fp
->ff_size
> length
) {
2850 * Any buffers that are past the truncation point need to be
2851 * invalidated (to maintain buffer cache consistency).
2855 retval
= hfs_truncate(vp
, length
, 0, 0, ap
->a_context
);
2856 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)vcb
->blockSize
;
2859 * if we get an error and no changes were made then exit
2860 * otherwise we must do the hfs_update to reflect the changes
2862 if (retval
&& (startingPEOF
== filebytes
)) goto Err_Exit
;
2864 /* These are bytesreleased */
2865 (void) hfs_chkdq(cp
, (int64_t)-((startingPEOF
- filebytes
)), NOCRED
,0);
2868 if (fp
->ff_size
> filebytes
) {
2869 fp
->ff_size
= filebytes
;
2872 ubc_setsize(vp
, fp
->ff_size
);
2873 hfs_lock(cp
, HFS_FORCE_LOCK
);
2878 cp
->c_touch_chgtime
= TRUE
;
2879 cp
->c_touch_modtime
= TRUE
;
2880 retval2
= hfs_update(vp
, MNT_WAIT
);
2885 hfs_unlock_truncate(cp
, TRUE
);
2892 * Pagein for HFS filesystem
2895 hfs_vnop_pagein(struct vnop_pagein_args
*ap
)
2897 struct vnop_pagein_args {
2900 vm_offset_t a_pl_offset,
2904 vfs_context_t a_context;
2908 vnode_t vp
= ap
->a_vp
;
2911 error
= cluster_pagein(vp
, ap
->a_pl
, ap
->a_pl_offset
, ap
->a_f_offset
,
2912 ap
->a_size
, (off_t
)VTOF(vp
)->ff_size
, ap
->a_flags
);
2914 * Keep track of blocks read.
2916 if (!vnode_isswap(vp
) && VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
&& error
== 0) {
2918 struct filefork
*fp
;
2920 int took_cnode_lock
= 0;
2925 if (ap
->a_f_offset
== 0 && fp
->ff_size
< PAGE_SIZE
)
2926 bytesread
= fp
->ff_size
;
2928 bytesread
= ap
->a_size
;
2930 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
2931 if ((fp
->ff_bytesread
+ bytesread
) > 0x00000000ffffffff && cp
->c_lockowner
!= current_thread()) {
2932 hfs_lock(cp
, HFS_FORCE_LOCK
);
2933 took_cnode_lock
= 1;
2936 * If this file hasn't been seen since the start of
2937 * the current sampling period then start over.
2939 if (cp
->c_atime
< VTOHFS(vp
)->hfc_timebase
) {
2942 fp
->ff_bytesread
= bytesread
;
2944 cp
->c_atime
= tv
.tv_sec
;
2946 fp
->ff_bytesread
+= bytesread
;
2948 cp
->c_touch_acctime
= TRUE
;
2949 if (took_cnode_lock
)
2956 * Pageout for HFS filesystem.
2959 hfs_vnop_pageout(struct vnop_pageout_args
*ap
)
2961 struct vnop_pageout_args {
2964 vm_offset_t a_pl_offset,
2968 vfs_context_t a_context;
2972 vnode_t vp
= ap
->a_vp
;
2974 struct filefork
*fp
;
2982 * Figure out where the file ends, for pageout purposes. If
2983 * ff_new_size > ff_size, then we're in the middle of extending the
2984 * file via a write, so it is safe (and necessary) that we be able
2985 * to pageout up to that point.
2987 filesize
= fp
->ff_size
;
2988 if (fp
->ff_new_size
> filesize
)
2989 filesize
= fp
->ff_new_size
;
2991 if (!vnode_isswap(vp
)) {
2995 if (cp
->c_lockowner
!= current_thread()) {
2996 if ( (retval
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
))) {
2997 if (!(ap
->a_flags
& UPL_NOCOMMIT
)) {
2998 ubc_upl_abort_range(ap
->a_pl
,
3001 UPL_ABORT_FREE_ON_EMPTY
);
3008 end_of_range
= ap
->a_f_offset
+ ap
->a_size
- 1;
3010 if (end_of_range
>= filesize
) {
3011 end_of_range
= (off_t
)(filesize
- 1);
3013 if (ap
->a_f_offset
< filesize
) {
3014 rl_remove(ap
->a_f_offset
, end_of_range
, &fp
->ff_invalidranges
);
3015 cp
->c_flag
|= C_MODIFIED
; /* leof is dirty */
3023 retval
= cluster_pageout(vp
, ap
->a_pl
, ap
->a_pl_offset
, ap
->a_f_offset
,
3024 ap
->a_size
, filesize
, ap
->a_flags
);
3027 * If data was written, and setuid or setgid bits are set and
3028 * this process is not the superuser then clear the setuid and
3029 * setgid bits as a precaution against tampering.
3031 if ((retval
== 0) &&
3032 (cp
->c_mode
& (S_ISUID
| S_ISGID
)) &&
3033 (vfs_context_suser(ap
->a_context
) != 0)) {
3034 hfs_lock(cp
, HFS_FORCE_LOCK
);
3035 cp
->c_mode
&= ~(S_ISUID
| S_ISGID
);
3036 cp
->c_touch_chgtime
= TRUE
;
3043 * Intercept B-Tree node writes to unswap them if necessary.
3046 hfs_vnop_bwrite(struct vnop_bwrite_args
*ap
)
3049 register struct buf
*bp
= ap
->a_bp
;
3050 register struct vnode
*vp
= buf_vnode(bp
);
3051 BlockDescriptor block
;
3053 /* Trap B-Tree writes */
3054 if ((VTOC(vp
)->c_fileid
== kHFSExtentsFileID
) ||
3055 (VTOC(vp
)->c_fileid
== kHFSCatalogFileID
) ||
3056 (VTOC(vp
)->c_fileid
== kHFSAttributesFileID
) ||
3057 (vp
== VTOHFS(vp
)->hfc_filevp
)) {
3060 * Swap and validate the node if it is in native byte order.
3061 * This is always be true on big endian, so we always validate
3062 * before writing here. On little endian, the node typically has
3063 * been swapped and validated when it was written to the journal,
3064 * so we won't do anything here.
3066 if (((u_int16_t
*)((char *)buf_dataptr(bp
) + buf_count(bp
) - 2))[0] == 0x000e) {
3067 /* Prepare the block pointer */
3068 block
.blockHeader
= bp
;
3069 block
.buffer
= (char *)buf_dataptr(bp
);
3070 block
.blockNum
= buf_lblkno(bp
);
3071 /* not found in cache ==> came from disk */
3072 block
.blockReadFromDisk
= (buf_fromcache(bp
) == 0);
3073 block
.blockSize
= buf_count(bp
);
3075 /* Endian un-swap B-Tree node */
3076 retval
= hfs_swap_BTNode (&block
, vp
, kSwapBTNodeHostToBig
, false);
3078 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
3082 /* This buffer shouldn't be locked anymore but if it is clear it */
3083 if ((buf_flags(bp
) & B_LOCKED
)) {
3085 if (VTOHFS(vp
)->jnl
) {
3086 panic("hfs: CLEARING the lock bit on bp %p\n", bp
);
3088 buf_clearflags(bp
, B_LOCKED
);
3090 retval
= vn_bwrite (ap
);
3096 * Relocate a file to a new location on disk
3097 * cnode must be locked on entry
3099 * Relocation occurs by cloning the file's data from its
3100 * current set of blocks to a new set of blocks. During
3101 * the relocation all of the blocks (old and new) are
3102 * owned by the file.
3109 * ----------------- -----------------
3110 * |///////////////| | | STEP 1 (acquire new blocks)
3111 * ----------------- -----------------
3114 * ----------------- -----------------
3115 * |///////////////| |///////////////| STEP 2 (clone data)
3116 * ----------------- -----------------
3120 * |///////////////| STEP 3 (head truncate blocks)
3124 * During steps 2 and 3 page-outs to file offsets less
3125 * than or equal to N are suspended.
3127 * During step 3 page-ins to the file get suspended.
3131 hfs_relocate(struct vnode
*vp
, u_int32_t blockHint
, kauth_cred_t cred
,
3135 struct filefork
*fp
;
3136 struct hfsmount
*hfsmp
;
3141 u_int32_t nextallocsave
;
3142 daddr64_t sector_a
, sector_b
;
3147 int took_trunc_lock
= 0;
3149 enum vtype vnodetype
;
3151 vnodetype
= vnode_vtype(vp
);
3152 if (vnodetype
!= VREG
&& vnodetype
!= VLNK
) {
3157 if (hfsmp
->hfs_flags
& HFS_FRAGMENTED_FREESPACE
) {
3163 if (fp
->ff_unallocblocks
)
3165 blksize
= hfsmp
->blockSize
;
3167 blockHint
= hfsmp
->nextAllocation
;
3169 if ((fp
->ff_size
> 0x7fffffff) ||
3170 ((fp
->ff_size
> blksize
) && vnodetype
== VLNK
)) {
3175 // We do not believe that this call to hfs_fsync() is
3176 // necessary and it causes a journal transaction
3177 // deadlock so we are removing it.
3179 //if (vnodetype == VREG && !vnode_issystem(vp)) {
3180 // retval = hfs_fsync(vp, MNT_WAIT, 0, p);
3185 if (!vnode_issystem(vp
) && (vnodetype
!= VLNK
)) {
3187 hfs_lock_truncate(cp
, TRUE
);
3188 /* Force lock since callers expects lock to be held. */
3189 if ((retval
= hfs_lock(cp
, HFS_FORCE_LOCK
))) {
3190 hfs_unlock_truncate(cp
, TRUE
);
3193 /* No need to continue if file was removed. */
3194 if (cp
->c_flag
& C_NOEXISTS
) {
3195 hfs_unlock_truncate(cp
, TRUE
);
3198 took_trunc_lock
= 1;
3200 headblks
= fp
->ff_blocks
;
3201 datablks
= howmany(fp
->ff_size
, blksize
);
3202 growsize
= datablks
* blksize
;
3203 eflags
= kEFContigMask
| kEFAllMask
| kEFNoClumpMask
;
3204 if (blockHint
>= hfsmp
->hfs_metazone_start
&&
3205 blockHint
<= hfsmp
->hfs_metazone_end
)
3206 eflags
|= kEFMetadataMask
;
3208 if (hfs_start_transaction(hfsmp
) != 0) {
3209 if (took_trunc_lock
)
3210 hfs_unlock_truncate(cp
, TRUE
);
3215 * Protect the extents b-tree and the allocation bitmap
3216 * during MapFileBlockC and ExtendFileC operations.
3218 lockflags
= SFL_BITMAP
;
3219 if (overflow_extents(fp
))
3220 lockflags
|= SFL_EXTENTS
;
3221 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
3223 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, 1, growsize
- 1, §or_a
, NULL
);
3225 retval
= MacToVFSError(retval
);
3230 * STEP 1 - acquire new allocation blocks.
3232 nextallocsave
= hfsmp
->nextAllocation
;
3233 retval
= ExtendFileC(hfsmp
, (FCB
*)fp
, growsize
, blockHint
, eflags
, &newbytes
);
3234 if (eflags
& kEFMetadataMask
) {
3235 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
3236 HFS_UPDATE_NEXT_ALLOCATION(hfsmp
, nextallocsave
);
3237 MarkVCBDirty(hfsmp
);
3238 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
3241 retval
= MacToVFSError(retval
);
3243 cp
->c_flag
|= C_MODIFIED
;
3244 if (newbytes
< growsize
) {
3247 } else if (fp
->ff_blocks
< (headblks
+ datablks
)) {
3248 printf("hfs_relocate: allocation failed");
3253 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, 1, growsize
, §or_b
, NULL
);
3255 retval
= MacToVFSError(retval
);
3256 } else if ((sector_a
+ 1) == sector_b
) {
3259 } else if ((eflags
& kEFMetadataMask
) &&
3260 ((((u_int64_t
)sector_b
* hfsmp
->hfs_logical_block_size
) / blksize
) >
3261 hfsmp
->hfs_metazone_end
)) {
3262 const char * filestr
;
3263 char emptystr
= '\0';
3265 if (cp
->c_desc
.cd_nameptr
!= NULL
) {
3266 filestr
= (const char *)&cp
->c_desc
.cd_nameptr
[0];
3267 } else if (vnode_name(vp
) != NULL
) {
3268 filestr
= vnode_name(vp
);
3270 filestr
= &emptystr
;
3272 printf("hfs_relocate: %s didn't move into MDZ (%d blks)\n", filestr
, fp
->ff_blocks
);
3277 /* Done with system locks and journal for now. */
3278 hfs_systemfile_unlock(hfsmp
, lockflags
);
3280 hfs_end_transaction(hfsmp
);
3285 * Check to see if failure is due to excessive fragmentation.
3287 if ((retval
== ENOSPC
) &&
3288 (hfs_freeblks(hfsmp
, 0) > (datablks
* 2))) {
3289 hfsmp
->hfs_flags
|= HFS_FRAGMENTED_FREESPACE
;
3294 * STEP 2 - clone file data into the new allocation blocks.
3297 if (vnodetype
== VLNK
)
3298 retval
= hfs_clonelink(vp
, blksize
, cred
, p
);
3299 else if (vnode_issystem(vp
))
3300 retval
= hfs_clonesysfile(vp
, headblks
, datablks
, blksize
, cred
, p
);
3302 retval
= hfs_clonefile(vp
, headblks
, datablks
, blksize
);
3304 /* Start transaction for step 3 or for a restore. */
3305 if (hfs_start_transaction(hfsmp
) != 0) {
3314 * STEP 3 - switch to cloned data and remove old blocks.
3316 lockflags
= SFL_BITMAP
;
3317 if (overflow_extents(fp
))
3318 lockflags
|= SFL_EXTENTS
;
3319 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
3321 retval
= HeadTruncateFile(hfsmp
, (FCB
*)fp
, headblks
);
3323 hfs_systemfile_unlock(hfsmp
, lockflags
);
3328 if (took_trunc_lock
)
3329 hfs_unlock_truncate(cp
, TRUE
);
3332 hfs_systemfile_unlock(hfsmp
, lockflags
);
3336 /* Push cnode's new extent data to disk. */
3338 (void) hfs_update(vp
, MNT_WAIT
);
3341 if (cp
->c_cnid
< kHFSFirstUserCatalogNodeID
)
3342 (void) hfs_flushvolumeheader(hfsmp
, MNT_WAIT
, HFS_ALTFLUSH
);
3344 (void) hfs_flushvolumeheader(hfsmp
, MNT_NOWAIT
, 0);
3348 hfs_end_transaction(hfsmp
);
3353 if (fp
->ff_blocks
== headblks
) {
3354 if (took_trunc_lock
)
3355 hfs_unlock_truncate(cp
, TRUE
);
3359 * Give back any newly allocated space.
3361 if (lockflags
== 0) {
3362 lockflags
= SFL_BITMAP
;
3363 if (overflow_extents(fp
))
3364 lockflags
|= SFL_EXTENTS
;
3365 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
3368 (void) TruncateFileC(hfsmp
, (FCB
*)fp
, fp
->ff_size
, false);
3370 hfs_systemfile_unlock(hfsmp
, lockflags
);
3373 if (took_trunc_lock
)
3374 hfs_unlock_truncate(cp
, TRUE
);
3384 hfs_clonelink(struct vnode
*vp
, int blksize
, kauth_cred_t cred
, __unused
struct proc
*p
)
3386 struct buf
*head_bp
= NULL
;
3387 struct buf
*tail_bp
= NULL
;
3391 error
= (int)buf_meta_bread(vp
, (daddr64_t
)0, blksize
, cred
, &head_bp
);
3395 tail_bp
= buf_getblk(vp
, (daddr64_t
)1, blksize
, 0, 0, BLK_META
);
3396 if (tail_bp
== NULL
) {
3400 bcopy((char *)buf_dataptr(head_bp
), (char *)buf_dataptr(tail_bp
), blksize
);
3401 error
= (int)buf_bwrite(tail_bp
);
3404 buf_markinvalid(head_bp
);
3405 buf_brelse(head_bp
);
3407 (void) buf_invalidateblks(vp
, BUF_WRITE_DATA
, 0, 0);
3413 * Clone a file's data within the file.
3417 hfs_clonefile(struct vnode
*vp
, int blkstart
, int blkcnt
, int blksize
)
3429 filesize
= VTOF(vp
)->ff_blocks
* blksize
; /* virtual file size */
3430 writebase
= blkstart
* blksize
;
3431 copysize
= blkcnt
* blksize
;
3432 iosize
= bufsize
= MIN(copysize
, 128 * 1024);
3435 if (kmem_alloc(kernel_map
, (vm_offset_t
*)&bufp
, bufsize
)) {
3438 hfs_unlock(VTOC(vp
));
3440 auio
= uio_create(1, 0, UIO_SYSSPACE32
, UIO_READ
);
3442 while (offset
< copysize
) {
3443 iosize
= MIN(copysize
- offset
, iosize
);
3445 uio_reset(auio
, offset
, UIO_SYSSPACE32
, UIO_READ
);
3446 uio_addiov(auio
, (uintptr_t)bufp
, iosize
);
3448 error
= cluster_read(vp
, auio
, copysize
, IO_NOCACHE
);
3450 printf("hfs_clonefile: cluster_read failed - %d\n", error
);
3453 if (uio_resid(auio
) != 0) {
3454 printf("clonedata: cluster_read: uio_resid = %lld\n", uio_resid(auio
));
3459 uio_reset(auio
, writebase
+ offset
, UIO_SYSSPACE32
, UIO_WRITE
);
3460 uio_addiov(auio
, (uintptr_t)bufp
, iosize
);
3462 error
= cluster_write(vp
, auio
, filesize
+ offset
,
3463 filesize
+ offset
+ iosize
,
3464 uio_offset(auio
), 0, IO_NOCACHE
| IO_SYNC
);
3466 printf("hfs_clonefile: cluster_write failed - %d\n", error
);
3469 if (uio_resid(auio
) != 0) {
3470 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
3479 * No need to call ubc_sync_range or hfs_invalbuf
3480 * since the file was copied using IO_NOCACHE.
3483 kmem_free(kernel_map
, (vm_offset_t
)bufp
, bufsize
);
3485 hfs_lock(VTOC(vp
), HFS_FORCE_LOCK
);
3490 * Clone a system (metadata) file.
3494 hfs_clonesysfile(struct vnode
*vp
, int blkstart
, int blkcnt
, int blksize
,
3495 kauth_cred_t cred
, struct proc
*p
)
3501 struct buf
*bp
= NULL
;
3504 daddr64_t start_blk
;
3511 iosize
= GetLogicalBlockSize(vp
);
3512 bufsize
= MIN(blkcnt
* blksize
, 1024 * 1024) & ~(iosize
- 1);
3513 breadcnt
= bufsize
/ iosize
;
3515 if (kmem_alloc(kernel_map
, (vm_offset_t
*)&bufp
, bufsize
)) {
3518 start_blk
= ((daddr64_t
)blkstart
* blksize
) / iosize
;
3519 last_blk
= ((daddr64_t
)blkcnt
* blksize
) / iosize
;
3522 while (blkno
< last_blk
) {
3524 * Read up to a megabyte
3527 for (i
= 0, blk
= blkno
; (i
< breadcnt
) && (blk
< last_blk
); ++i
, ++blk
) {
3528 error
= (int)buf_meta_bread(vp
, blk
, iosize
, cred
, &bp
);
3530 printf("hfs_clonesysfile: meta_bread error %d\n", error
);
3533 if (buf_count(bp
) != iosize
) {
3534 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp
));
3537 bcopy((char *)buf_dataptr(bp
), offset
, iosize
);
3539 buf_markinvalid(bp
);
3547 * Write up to a megabyte
3550 for (i
= 0; (i
< breadcnt
) && (blkno
< last_blk
); ++i
, ++blkno
) {
3551 bp
= buf_getblk(vp
, start_blk
+ blkno
, iosize
, 0, 0, BLK_META
);
3553 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk
+ blkno
);
3557 bcopy(offset
, (char *)buf_dataptr(bp
), iosize
);
3558 error
= (int)buf_bwrite(bp
);
3570 kmem_free(kernel_map
, (vm_offset_t
)bufp
, bufsize
);
3572 error
= hfs_fsync(vp
, MNT_WAIT
, 0, p
);